Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yyu test 1 #9026

Draft
wants to merge 6 commits into
base: ah_var_store
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ workflow GvsExtractAvroFilesForHail {

call Utils.IsUsingCompressedReferences {
input:
project_id = project_id,
query_project_id = project_id,
dest_project_id = project_id,
dataset_name = dataset_name,
ref_table_timestamp = RefTableDatetimeCheck.last_modified_timestamp,
cloud_sdk_docker = effective_cloud_sdk_docker,
Expand Down Expand Up @@ -302,7 +303,7 @@ task ExtractFromSuperpartitionedTables {
EXPORT DATA OPTIONS(
uri='${avro_prefix}/vets/vet_${str_table_index}/vet_${str_table_index}_*.avro', format='AVRO', compression='SNAPPY') AS
SELECT location, v.sample_id, ref, REPLACE(alt,',<NON_REF>','') alt, call_GT as GT, call_AD as AD, call_GQ as GQ, cast(SPLIT(call_pl,',')[OFFSET(0)] as int64) as RGQ,
call_PS as PS
SAFE_CAST(call_PS AS INT64) AS PS
FROM \`~{project_id}.~{dataset_name}.vet_${str_table_index}\` v
INNER JOIN \`~{project_id}.~{dataset_name}.sample_info\` s ON s.sample_id = v.sample_id
WHERE withdrawn IS NULL AND
Expand Down
3 changes: 2 additions & 1 deletion scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ workflow GvsPrepareCallset {

call Utils.IsUsingCompressedReferences {
input:
project_id = query_project,
query_project_id = query_project,
dest_project_id = destination_project,
dataset_name = dataset_name,
ref_table_timestamp = RefTableDatetimeCheck.last_modified_timestamp,
cloud_sdk_docker = effective_cloud_sdk_docker,
Expand Down
7 changes: 4 additions & 3 deletions scripts/variantstore/wdl/GvsUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,8 @@ task IsVETS {

task IsUsingCompressedReferences {
input {
String project_id
String query_project_id
String dest_project_id
String dataset_name
String ref_table_timestamp
String cloud_sdk_docker
Expand All @@ -940,11 +941,11 @@ task IsUsingCompressedReferences {
set -o errexit -o nounset -o pipefail -o xtrace

# bq query --max_rows check: ok one row
bq --apilog=false query --project_id=~{project_id} --format=csv --use_legacy_sql=false '
bq --apilog=false query --project_id=~{query_project_id} --format=csv --use_legacy_sql=false '
SELECT
column_name
FROM
`~{dataset_name}.INFORMATION_SCHEMA.COLUMNS`
`~{dest_project_id}.~{dataset_name}.INFORMATION_SCHEMA.COLUMNS`
WHERE
table_name = "ref_ranges_001"
AND (column_name = "location" OR column_name = "packed_ref_data") ' | sed 1d > column_name.txt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def populate_final_extract_table_with_vet(fq_ranges_dataset, fq_destination_tabl
# split file into files with x lines and then run
def get_ref_subselect(fq_vet_table, samples, id):
sample_stanza = ','.join([str(s) for s in samples])
sql = f" q_{id} AS (SELECT location, sample_id, ref, alt, call_GT, call_GQ, call_AD, AS_QUALapprox, QUALapprox, CALL_PL, CALL_PGT, CALL_PID, CALL_PS FROM \n" \
sql = f" q_{id} AS (SELECT location, sample_id, ref, alt, call_GT, call_GQ, call_AD, AS_QUALapprox, QUALapprox, CALL_PL, CALL_PGT, CALL_PID, SAFE_CAST(CALL_PS AS INT64) AS CALL_PS) FROM \n" \
f" `{fq_vet_table}` WHERE sample_id IN ({sample_stanza})), "
return sql

Expand Down
Loading