From 58ee11fc4b1d06145ad9678fe801df73502efb77 Mon Sep 17 00:00:00 2001 From: Yonghao Yu Date: Tue, 29 Oct 2024 16:08:58 -0400 Subject: [PATCH 1/5] pass dest project id --- scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl | 3 ++- scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl | 3 ++- scripts/variantstore/wdl/GvsUtils.wdl | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl index bd71aa608c8..0e499aaace7 100644 --- a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl +++ b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl @@ -78,7 +78,8 @@ workflow GvsExtractAvroFilesForHail { call Utils.IsUsingCompressedReferences { input: - project_id = project_id, + query_project_id = project_id, + dest_project_id = project_id, dataset_name = dataset_name, cloud_sdk_docker = effective_cloud_sdk_docker, } diff --git a/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl b/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl index 7e25984700b..623c7a58d8f 100644 --- a/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl +++ b/scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl @@ -47,7 +47,8 @@ workflow GvsPrepareCallset { call Utils.IsUsingCompressedReferences { input: - project_id = query_project, + query_project_id = query_project, + dest_project_id = destination_project, dataset_name = dataset_name, cloud_sdk_docker = effective_cloud_sdk_docker, } diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl index 43c716c68d0..804e096e2e1 100644 --- a/scripts/variantstore/wdl/GvsUtils.wdl +++ b/scripts/variantstore/wdl/GvsUtils.wdl @@ -798,7 +798,8 @@ task IsVQSRLite { task IsUsingCompressedReferences { input { - String project_id + String query_project_id + String dest_project_id String dataset_name String cloud_sdk_docker } @@ -811,7 +812,7 @@ task IsUsingCompressedReferences { SELECT column_name FROM - `~{dataset_name}.INFORMATION_SCHEMA.COLUMNS` + `~{project_id}.~{dataset_name}.INFORMATION_SCHEMA.COLUMNS` WHERE table_name = "ref_ranges_001" AND (column_name = "location" OR column_name = "packed_ref_data") ' | sed 1d > column_name.txt From 44b70f3bd18c5bb305ce12f5cdf504ca81c3f41b Mon Sep 17 00:00:00 2001 From: Yonghao Yu Date: Tue, 29 Oct 2024 16:12:24 -0400 Subject: [PATCH 2/5] Update GvsUtils.wdl --- scripts/variantstore/wdl/GvsUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl index 804e096e2e1..a737dfb5943 100644 --- a/scripts/variantstore/wdl/GvsUtils.wdl +++ b/scripts/variantstore/wdl/GvsUtils.wdl @@ -812,7 +812,7 @@ task IsUsingCompressedReferences { SELECT column_name FROM - `~{project_id}.~{dataset_name}.INFORMATION_SCHEMA.COLUMNS` + `~{dest_project_id}.~{dataset_name}.INFORMATION_SCHEMA.COLUMNS` WHERE table_name = "ref_ranges_001" AND (column_name = "location" OR column_name = "packed_ref_data") ' | sed 1d > column_name.txt From 82d7cca38e1c80d56da2aa2b7d974e844f32865d Mon Sep 17 00:00:00 2001 From: Yonghao Yu Date: Wed, 30 Oct 2024 09:04:02 -0400 Subject: [PATCH 3/5] query_project_id --- scripts/variantstore/wdl/GvsUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl index a737dfb5943..f8d73b0396a 100644 --- a/scripts/variantstore/wdl/GvsUtils.wdl +++ b/scripts/variantstore/wdl/GvsUtils.wdl @@ -808,7 +808,7 @@ task IsUsingCompressedReferences { PS4='\D{+%F %T} \w $ ' set -o errexit -o nounset -o pipefail -o xtrace - bq --apilog=false query --project_id=~{project_id} --format=csv --use_legacy_sql=false ' + bq --apilog=false query --project_id=~{query_project_id} --format=csv --use_legacy_sql=false ' SELECT column_name FROM From d2f79de3c3a2eee84c4fb505fec5ec930d0df2cd Mon Sep 17 00:00:00 2001 From: Yonghao Yu Date: Wed, 30 Oct 2024 11:54:57 -0400 Subject: [PATCH 4/5] test1 --- scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl index ab64dccd003..20b9b20819c 100644 --- a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl +++ b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl @@ -303,7 +303,7 @@ task ExtractFromSuperpartitionedTables { EXPORT DATA OPTIONS( uri='${avro_prefix}/vets/vet_${str_table_index}/vet_${str_table_index}_*.avro', format='AVRO', compression='SNAPPY') AS SELECT location, v.sample_id, ref, REPLACE(alt,',','') alt, call_GT as GT, call_AD as AD, call_GQ as GQ, cast(SPLIT(call_pl,',')[OFFSET(0)] as int64) as RGQ, - call_PS as PS + SAFE_CAST(call_PS AS INT64) AS PS FROM \`~{project_id}.~{dataset_name}.vet_${str_table_index}\` v INNER JOIN \`~{project_id}.~{dataset_name}.sample_info\` s ON s.sample_id = v.sample_id WHERE withdrawn IS NULL AND From be92207f6f2ba4cc302c9edb4dd9d4dba1a99ff1 Mon Sep 17 00:00:00 2001 From: Yonghao Yu Date: Wed, 30 Oct 2024 13:34:00 -0400 Subject: [PATCH 5/5] change --- .../wdl/extract/create_ranges_cohort_extract_data_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py b/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py index fbbe6fafadf..8363d5abdc7 100644 --- a/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py +++ b/scripts/variantstore/wdl/extract/create_ranges_cohort_extract_data_table.py @@ -254,7 +254,7 @@ def populate_final_extract_table_with_vet(fq_ranges_dataset, fq_destination_tabl # split file into files with x lines and then run def get_ref_subselect(fq_vet_table, samples, id): sample_stanza = ','.join([str(s) for s in samples]) - sql = f" q_{id} AS (SELECT location, sample_id, ref, alt, call_GT, call_GQ, call_AD, AS_QUALapprox, QUALapprox, CALL_PL, CALL_PGT, CALL_PID, CALL_PS FROM \n" \ + sql = f" q_{id} AS (SELECT location, sample_id, ref, alt, call_GT, call_GQ, call_AD, AS_QUALapprox, QUALapprox, CALL_PL, CALL_PGT, CALL_PID, SAFE_CAST(CALL_PS AS INT64) AS CALL_PS) FROM \n" \ f" `{fq_vet_table}` WHERE sample_id IN ({sample_stanza})), " return sql