diff --git a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl index 0780b8ba853..20b9b20819c 100644 --- a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl +++ b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl @@ -95,7 +95,8 @@ workflow GvsExtractAvroFilesForHail { call Utils.IsUsingCompressedReferences { input: - project_id = project_id, + query_project_id = project_id, + dest_project_id = project_id, dataset_name = dataset_name, ref_table_timestamp = RefTableDatetimeCheck.last_modified_timestamp, cloud_sdk_docker = effective_cloud_sdk_docker, @@ -302,7 +303,7 @@ task ExtractFromSuperpartitionedTables { EXPORT DATA OPTIONS( uri='${avro_prefix}/vets/vet_${str_table_index}/vet_${str_table_index}_*.avro', format='AVRO', compression='SNAPPY') AS SELECT location, v.sample_id, ref, REPLACE(alt,',','') alt, call_GT as GT, call_AD as AD, call_GQ as GQ, cast(SPLIT(call_pl,',')[OFFSET(0)] as int64) as RGQ, - call_PS as PS + SAFE_CAST(call_PS AS INT64) AS PS FROM \`~{project_id}.~{dataset_name}.vet_${str_table_index}\` v INNER JOIN \`~{project_id}.~{dataset_name}.sample_info\` s ON s.sample_id = v.sample_id WHERE withdrawn IS NULL AND diff --git a/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl b/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl index a111634a935..d99b76e0106 100644 --- a/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl +++ b/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl @@ -56,7 +56,8 @@ workflow GvsPrepareCallset { call Utils.IsUsingCompressedReferences { input: - project_id = query_project, + query_project_id = query_project, + dest_project_id = destination_project, dataset_name = dataset_name, ref_table_timestamp = RefTableDatetimeCheck.last_modified_timestamp, cloud_sdk_docker = effective_cloud_sdk_docker, diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl index 7a2cbf9ece1..9c7d6247a2d 100644 --- a/scripts/variantstore/wdl/GvsUtils.wdl +++ b/scripts/variantstore/wdl/GvsUtils.wdl @@ -929,7 +929,8 @@ task IsVETS { task IsUsingCompressedReferences { input { - String project_id + String query_project_id + String dest_project_id String dataset_name String ref_table_timestamp String cloud_sdk_docker @@ -940,11 +941,11 @@ task IsUsingCompressedReferences { set -o errexit -o nounset -o pipefail -o xtrace # bq query --max_rows check: ok one row - bq --apilog=false query --project_id=~{project_id} --format=csv --use_legacy_sql=false ' + bq --apilog=false query --project_id=~{query_project_id} --format=csv --use_legacy_sql=false ' SELECT column_name FROM - `~{dataset_name}.INFORMATION_SCHEMA.COLUMNS` + `~{dest_project_id}.~{dataset_name}.INFORMATION_SCHEMA.COLUMNS` WHERE table_name = "ref_ranges_001" AND (column_name = "location" OR column_name = "packed_ref_data") ' | sed 1d > column_name.txt diff --git a/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py b/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py index fbbe6fafadf..8363d5abdc7 100644 --- a/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py +++ b/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py @@ -254,7 +254,7 @@ def populate_final_extract_table_with_vet(fq_ranges_dataset, fq_destination_tabl # split file into files with x lines and then run def get_ref_subselect(fq_vet_table, samples, id): sample_stanza = ','.join([str(s) for s in samples]) - sql = f" q_{id} AS (SELECT location, sample_id, ref, alt, call_GT, call_GQ, call_AD, AS_QUALapprox, QUALapprox, CALL_PL, CALL_PGT, CALL_PID, CALL_PS FROM \n" \ + sql = f" q_{id} AS (SELECT location, sample_id, ref, alt, call_GT, call_GQ, call_AD, AS_QUALapprox, QUALapprox, CALL_PL, CALL_PGT, CALL_PID, SAFE_CAST(CALL_PS AS INT64) AS CALL_PS) FROM \n" \ f" `{fq_vet_table}` WHERE sample_id IN ({sample_stanza})), " return sql