-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
merge with develop, resolve conflicts
- Loading branch information
Showing
13 changed files
with
588 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ process run_quac_upload_report_error { | |
debug true | ||
|
||
input: | ||
val previous | ||
val cohort | ||
|
||
output: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
FROM python:3.8 | ||
FROM python:3.11 | ||
|
||
WORKDIR /root/scripts | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
synapseclient[pandas] == 2.7.2 | ||
synapseclient[pandas] == 4.6.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
from unittest.mock import MagicMock, create_autospec, patch | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
import synapseclient | ||
from synapseclient import Schema, Table | ||
from table_updates import utilities | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def syn(): | ||
return create_autospec(synapseclient.Synapse) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def table_schema(): | ||
schema = synapseclient.table.Schema( | ||
name="test_table", | ||
parent="syn123", | ||
column_names=["col1", "col2"], | ||
column_types=["STRING", "INTEGER"], | ||
) | ||
return schema | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"query_return_df,select,query,expected_df", | ||
[ | ||
( | ||
pd.DataFrame({"col1": ["value1", "value2"]}), | ||
"col1", | ||
"SELECT col1 from syn123456", | ||
pd.DataFrame({"col1": ["value1", "value2"]}), | ||
), | ||
( | ||
pd.DataFrame({"col1": ["value1", "value2"], "col2": [1, 2]}), | ||
"col1,col2", | ||
"SELECT col1,col2 from syn123456", | ||
pd.DataFrame({"col1": ["value1", "value2"], "col2": [1, 2]}), | ||
), | ||
( | ||
pd.DataFrame({"col1": ["NA", "value1", "None"], "col2": [1, 2, 3]}), | ||
"*", | ||
"SELECT * from syn123456", | ||
pd.DataFrame({"col1": [np.nan, "value1", "None"], "col2": [1, 2, 3]}), | ||
), | ||
( | ||
pd.DataFrame(columns=["col1", "col2"]), | ||
"*", | ||
"SELECT * from syn123456", | ||
pd.DataFrame(columns=["col1", "col2"]), | ||
), | ||
], | ||
ids=[ | ||
"selected_single_column", | ||
"selected_multiple_column", | ||
"pull_table_with_na_values_all_columns", | ||
"pull_empty_table_all_columns", | ||
], | ||
) | ||
def test_download_synapse_table_default_condition( | ||
syn, table_schema, query_return_df, select, query, expected_df | ||
): | ||
syn.tableQuery = MagicMock(return_value=Table(table_schema, query_return_df)) | ||
result = utilities.download_synapse_table(syn, "syn123456", select) | ||
|
||
# validate | ||
syn.tableQuery.assert_called_once_with(query) | ||
pd.testing.assert_frame_equal(result, expected_df) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"query_return_df,condition,query,expected_df", | ||
[ | ||
( | ||
pd.DataFrame({"col1": ["value1"], "col2": [1]}), | ||
"col1 = 'value1'", | ||
"SELECT * from syn123456 WHERE col1 = 'value1'", | ||
pd.DataFrame({"col1": ["value1"], "col2": [1]}), | ||
), | ||
( | ||
pd.DataFrame({"col1": ["NA", "value1", "None"], "col2": [1, 1, 1]}), | ||
"col2 = 1", | ||
"SELECT * from syn123456 WHERE col2 = 1", | ||
pd.DataFrame({"col1": [np.nan, "value1", "None"], "col2": [1, 1, 1]}), | ||
), | ||
], | ||
ids=["selected_row_all_columns", "pull_table_with_na_values_all_columns"], | ||
) | ||
def test_download_synapse_table_with_condition( | ||
syn, table_schema, query_return_df, condition, query, expected_df | ||
): | ||
syn.tableQuery = MagicMock(return_value=Table(table_schema, query_return_df)) | ||
result = utilities.download_synapse_table(syn, "syn123456", condition=condition) | ||
|
||
# validate | ||
syn.tableQuery.assert_called_once_with(query) | ||
pd.testing.assert_frame_equal(result, expected_df) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"query_return_df,select,condition,query,expected_df", | ||
[ | ||
( | ||
pd.DataFrame({"col1": ["value1"], "col2": [1]}), | ||
"col1", | ||
"col1 = 'value1'", | ||
"SELECT col1 from syn123456 WHERE col1 = 'value1'", | ||
pd.DataFrame({"col1": ["value1"], "col2": [1]}), | ||
), | ||
( | ||
pd.DataFrame({"col1": ["value1"], "col2": [1]}), | ||
"col1,col2", | ||
"col1 = 'value1'", | ||
"SELECT col1,col2 from syn123456 WHERE col1 = 'value1'", | ||
pd.DataFrame({"col1": ["value1"], "col2": [1]}), | ||
), | ||
], | ||
ids=[ | ||
"selected_one_columns_with_condition", | ||
"select_multiple_columns_with_condition", | ||
], | ||
) | ||
def test_download_synapse_table_with_select_and_condition( | ||
syn, table_schema, query_return_df, select, condition, query, expected_df | ||
): | ||
syn.tableQuery = MagicMock(return_value=Table(table_schema, query_return_df)) | ||
result = utilities.download_synapse_table( | ||
syn, "syn123456", select=select, condition=condition | ||
) | ||
|
||
# validate | ||
syn.tableQuery.assert_called_once_with(query) | ||
pd.testing.assert_frame_equal(result, expected_df) | ||
|
||
|
||
def test_download_empty_synapse_table_with_condition( | ||
syn, | ||
table_schema, | ||
): | ||
syn.tableQuery = MagicMock( | ||
return_value=Table(table_schema, pd.DataFrame(columns=["col1", "col2"])) | ||
) | ||
result = utilities.download_synapse_table(syn, "syn123456", condition="col2 = 1") | ||
|
||
# validate | ||
syn.tableQuery.assert_called_once_with("SELECT * from syn123456 WHERE col2 = 1") | ||
pd.testing.assert_frame_equal(result, pd.DataFrame(columns=["col1", "col2"])) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_df,cols,expected_df", | ||
[ | ||
( | ||
pd.DataFrame({"col1": ["\\abc", "def"], "col2": ["abc", "def\\"]}), | ||
["col1", "col2"], | ||
pd.DataFrame({"col1": ["abc", "def"], "col2": ["abc", "def"]}), | ||
), | ||
( | ||
pd.DataFrame({"col1": ["abc", "def"], "col2": ["abc", "def\\"]}), | ||
["col2"], | ||
pd.DataFrame({"col1": ["abc", "def"], "col2": ["abc", "def"]}), | ||
), | ||
( | ||
pd.DataFrame({"col1": ["abc", "def"], "col2": ["abc", "def\\"]}), | ||
["col1"], | ||
pd.DataFrame({"col1": ["abc", "def"], "col2": ["abc", "def\\"]}), | ||
), | ||
( | ||
pd.DataFrame({"col1": ["abc", "def"], "col2": ["abc", "def"]}), | ||
["col1", "col2"], | ||
pd.DataFrame({"col1": ["abc", "def"], "col2": ["abc", "def"]}), | ||
), | ||
( | ||
pd.DataFrame( | ||
{ | ||
"col1": ["\\abc", "de\\f", "ghi\\"], | ||
"col2": ["abc(\\hh)", "def,\\,hh", "ghi, ,\\hh"], | ||
} | ||
), | ||
["col1", "col2"], | ||
pd.DataFrame( | ||
{ | ||
"col1": ["abc", "def", "ghi"], | ||
"col2": ["abc(hh)", "def,,hh", "ghi, ,hh"], | ||
} | ||
), | ||
), | ||
( | ||
pd.DataFrame( | ||
{ | ||
"col1": [1, "de\\f", "ghi\\", np.nan], | ||
"col2": ["abc(\\hh)", "def,\\,hh", "ghi, ,\\hh", 2], | ||
} | ||
), | ||
["col1", "col2"], | ||
pd.DataFrame( | ||
{ | ||
"col1": [1, "def", "ghi", np.nan], | ||
"col2": ["abc(hh)", "def,,hh", "ghi, ,hh", 2], | ||
} | ||
), | ||
), | ||
], | ||
ids=[ | ||
"multiple_columns_with_backslash", | ||
"one_column_with_backslash", | ||
"one_column_with_backslash_but_not_selected", | ||
"none_column_with_backslash", | ||
"backslashes_in_multiple_places", | ||
"various_column_types", | ||
], | ||
) | ||
def test_remove_backslash(input_df, cols, expected_df): | ||
results = utilities.remove_backslash(input_df, cols) | ||
pd.testing.assert_frame_equal(results, expected_df) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_df,cols", | ||
[ | ||
(pd.DataFrame({"col1": ["\\abc", "def"], "col2": ["abc", "def\\"]}), ["col3"]), | ||
], | ||
) | ||
def test_remove_backslsh_fail(input_df, cols): | ||
with pytest.raises( | ||
ValueError, match="Invalid column list. Not all columns are in the dataframe." | ||
): | ||
utilities.remove_backslash(input_df, cols) |
Oops, something went wrong.