From 50e1b865e8e0887fbb0d62234c1fda0c8514612f Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 9 Feb 2024 13:55:02 -0800 Subject: [PATCH 1/5] Add accession to dropped strains where possible --- phylogenetic/config/dropped_strains.txt | 82 ++++++++++++------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/phylogenetic/config/dropped_strains.txt b/phylogenetic/config/dropped_strains.txt index 1118ed58..b15eb91e 100644 --- a/phylogenetic/config/dropped_strains.txt +++ b/phylogenetic/config/dropped_strains.txt @@ -26,44 +26,44 @@ DENV2/TRINIDAD_AND_TOBAGO/NA/1953 DENV4/MALAYSIA/P215/1975 DENV4/MALAYSIA/P514/1975 DENV4/MALAYSIA/P731120/1973 -D2Sab2015 # miscategorized -QML22 # miscategorized -DAK_Ar_A1247 # sylvatic -Dak_Ar_2039 # sylvatic -Dak_Ar_578 # sylvatic -DAK_Ar_510 # sylvatic -PM33974 # sylvatic -Dak_Ar_A2022 # sylvatic -Dak_Ar_141069 # sylvatic -Dak_Ar_141070 # sylvatic -Dak_Ar_D75505 # sylvatic -Dak_HD_10674 # sylvatic -Dak_Ar_D20761 # sylvatic -IBH11664 # sylvatic -IBH11208 # sylvatic -IBH11234 # sylvatic -P8_1407 # sylvatic -P75_514 # sylvatic -P73_1120 # sylvatic -P75_215 # sylvatic -DKD811 # sylvatic -ZS01/01 # metadata issue -Vero # cell line -MS13002673 # too divergent -MS11011405 # too divergent -V43257 # too divergent -KDC0574A2_06/02/2011 # too divergent -00178/03 # too divergent -00759/12 # too divergent -00988/11 # too divergent -01113/10 # too divergent -01224/04 # too divergent -01231/10 # too divergent -01488/09 # too divergent -01542/04 # too divergent -dev1 # too divergent -DKE_121 # too divergent -SENDAK_HD_10674 # sylvatic -DENV2_1_DAK_HD_76395 # sylvatic -DENV3/PUERTORICO/1963/PRS_228762_AC27 # too divergent -PR_6 # too divergent +KY923048 # D2Sab2015 # miscategorized +KX274130 # QML22 # miscategorized +EF105383 # DAK_Ar_A1247 # sylvatic +EF105382 # Dak_Ar_2039 # sylvatic +EF105380 # Dak_Ar_578 # sylvatic +EF105381 # DAK_Ar_510 # sylvatic +EF105378 # PM33974 # sylvatic +EF105386 # Dak_Ar_A2022 # sylvatic +EF105389 # Dak_Ar_141069 # sylvatic +EF105390 # Dak_Ar_141070 # sylvatic +EF457904 # Dak_Ar_D75505 # sylvatic +EF105384 # Dak_HD_10674 # sylvatic +EF105385 # Dak_Ar_D20761 # sylvatic +EF105388 # IBH11664 # sylvatic +EF105387 # IBH11208 # sylvatic +EU003591 # IBH11234 # sylvatic +EF105379 # P8_1407 # sylvatic +JF262779 # P75_514 # sylvatic +JF262780 # P73_1120 # sylvatic +EF457906 # P75_215 # sylvatic +FJ467493 # DKD811 # sylvatic +EF051521 # ZS01/01 # metadata issue +MT929160 # Vero # cell line +MH048676 # MS13002673 # too divergent +MH048674 # MS11011405 # too divergent +MT597439 # V43257 # too divergent +MN448607 # KDC0574A2_06/02/2011 # too divergent +ON046268 # 00178/03 # too divergent +ON046278 # 00759/12 # too divergent +ON046276 # 00988/11 # too divergent +ON046273 # 01113/10 # too divergent +ON046270 # 01224/04 # too divergent +ON046274 # 01231/10 # too divergent +ON046272 # 01488/09 # too divergent +ON046271 # 01542/04 # too divergent +MZ284953 # dev1 # too divergent +MZ215848 # DKE_121 # too divergent +MW946564 # SENDAK_HD_10674 # sylvatic +OK605757 # DENV2_1_DAK_HD_76395 # sylvatic +MW945427 # DENV3/PUERTORICO/1963/PRS_228762_AC27 # too divergent +OM258630 # PR_6 # too divergent From 20819527239362603c26a8c8f165d52ef9674536 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 9 Feb 2024 14:04:46 -0800 Subject: [PATCH 2/5] Cleanup duplicates --- phylogenetic/config/dropped_strains.txt | 54 +++++++++---------------- 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/phylogenetic/config/dropped_strains.txt b/phylogenetic/config/dropped_strains.txt index b15eb91e..2a599754 100644 --- a/phylogenetic/config/dropped_strains.txt +++ b/phylogenetic/config/dropped_strains.txt @@ -4,49 +4,31 @@ DENV1/MALAYSIA/P1244/1972 DENV1/VIETNAM/BIDV3990/2008 DENV1/VIETNAM/BIDV992/2006 DENV2/AUSTRALIA/QML22/2015 -DENV2/BURKINA_FASO/DAKAR2039/1980 -DENV2/BURKINA_FASO/DAKARA2022/1980 -DENV2/COTE_D_IVOIRE/DAKAR510/1980 -DENV2/COTE_D_IVOIRE/DAKAR578/1980 -DENV2/COTE_D_IVOIRE/DAKARA1247/1980 -DENV2/GUINEA/PM33974/1981 DENV2/HAITI/DENGUEVIRUS2HOMOSAPIENS1/2016 -DENV2/MALAYSIA/DKD811/2008 -DENV2/MALAYSIA/P81407/1970 DENV2/MALAYSIA/SAB/2015 -DENV2/NIGERIA/IBH11208/1966 -DENV2/NIGERIA/IBH11234/1966 -DENV2/NIGERIA/IBH11664/1966 DENV2/SENEGAL/0674/1970 -DENV2/SENEGAL/DAKAR0761/1974 -DENV2/SENEGAL/DAKAR141069/1999 -DENV2/SENEGAL/DAKAR141070/1999 -DENV2/SENEGAL/DAKARD75505/1999 DENV2/TRINIDAD_AND_TOBAGO/NA/1953 -DENV4/MALAYSIA/P215/1975 -DENV4/MALAYSIA/P514/1975 -DENV4/MALAYSIA/P731120/1973 KY923048 # D2Sab2015 # miscategorized KX274130 # QML22 # miscategorized -EF105383 # DAK_Ar_A1247 # sylvatic -EF105382 # Dak_Ar_2039 # sylvatic -EF105380 # Dak_Ar_578 # sylvatic -EF105381 # DAK_Ar_510 # sylvatic -EF105378 # PM33974 # sylvatic -EF105386 # Dak_Ar_A2022 # sylvatic -EF105389 # Dak_Ar_141069 # sylvatic -EF105390 # Dak_Ar_141070 # sylvatic -EF457904 # Dak_Ar_D75505 # sylvatic +EF105383 # DAK_Ar_A1247 # sylvatic # DENV2/COTE_D_IVOIRE/DAKARA1247/1980 +EF105382 # Dak_Ar_2039 # sylvatic # DENV2/BURKINA_FASO/DAKAR2039/1980 +EF105380 # Dak_Ar_578 # sylvatic # DENV2/COTE_D_IVOIRE/DAKAR578/1980 +EF105381 # DAK_Ar_510 # sylvatic # DENV2/COTE_D_IVOIRE/DAKAR510/1980 +EF105378 # PM33974 # sylvatic # DENV2/GUINEA/PM33974/1981 +EF105386 # Dak_Ar_A2022 # sylvatic # DENV2/BURKINA_FASO/DAKARA2022/1980 +EF105389 # Dak_Ar_141069 # sylvatic # DENV2/SENEGAL/DAKAR141069/1999 +EF105390 # Dak_Ar_141070 # sylvatic # DENV2/SENEGAL/DAKAR141070/1999 +EF457904 # Dak_Ar_D75505 # sylvatic # DENV2/SENEGAL/DAKARD75505/1999 EF105384 # Dak_HD_10674 # sylvatic -EF105385 # Dak_Ar_D20761 # sylvatic -EF105388 # IBH11664 # sylvatic -EF105387 # IBH11208 # sylvatic -EU003591 # IBH11234 # sylvatic -EF105379 # P8_1407 # sylvatic -JF262779 # P75_514 # sylvatic -JF262780 # P73_1120 # sylvatic -EF457906 # P75_215 # sylvatic -FJ467493 # DKD811 # sylvatic +EF105385 # Dak_Ar_D20761 # sylvatic # DENV2/SENEGAL/DAKAR0761/1974 +EF105388 # IBH11664 # sylvatic # DENV2/NIGERIA/IBH11664/1966 +EF105387 # IBH11208 # sylvatic # DENV2/NIGERIA/IBH11208/1966 +EU003591 # IBH11234 # sylvatic # DENV2/NIGERIA/IBH11234/1966 +EF105379 # P8_1407 # sylvatic # DENV2/MALAYSIA/P81407/1970 +JF262779 # P75_514 # sylvatic # DENV4/MALAYSIA/P514/1975 +JF262780 # P73_1120 # sylvatic # DENV4/MALAYSIA/P731120/1973 +EF457906 # P75_215 # sylvatic # DENV4/MALAYSIA/P215/1975 +FJ467493 # DKD811 # sylvatic # DENV2/MALAYSIA/DKD811/2008 EF051521 # ZS01/01 # metadata issue MT929160 # Vero # cell line MH048676 # MS13002673 # too divergent From 751e623e44c2c62bd4ab6c122cb421e16ca2987c Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 9 Feb 2024 15:03:44 -0800 Subject: [PATCH 3/5] From a search of metadata and academic papers --- phylogenetic/config/dropped_strains.txt | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/phylogenetic/config/dropped_strains.txt b/phylogenetic/config/dropped_strains.txt index 2a599754..34676ab8 100644 --- a/phylogenetic/config/dropped_strains.txt +++ b/phylogenetic/config/dropped_strains.txt @@ -1,15 +1,13 @@ -DENV/SPAIN/EEB17/2009 -DENV1/FRANCE/00475/2008 -DENV1/MALAYSIA/P1244/1972 -DENV1/VIETNAM/BIDV3990/2008 -DENV1/VIETNAM/BIDV992/2006 -DENV2/AUSTRALIA/QML22/2015 -DENV2/HAITI/DENGUEVIRUS2HOMOSAPIENS1/2016 -DENV2/MALAYSIA/SAB/2015 -DENV2/SENEGAL/0674/1970 -DENV2/TRINIDAD_AND_TOBAGO/NA/1953 -KY923048 # D2Sab2015 # miscategorized -KX274130 # QML22 # miscategorized +JF260983 # DENV/SPAIN/EEB17/2009 # sylvatic according to https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3149010/ +HE795086 # DENV1/FRANCE/00475/2008 # 2008/00475 +EF457905 # DENV1/MALAYSIA/P1244/1972 # P72-1244 +GU131762 # DENV1/VIETNAM/BIDV3990/2008 # DENV-1/VN/BID-V3990/2008 +EU482536 # DENV1/VIETNAM/BIDV992/2006 # DENV-1/VN/BID-V992/2006 +KX702403 # DENV2/HAITI/DENGUEVIRUS2HOMOSAPIENS1/2016 +MW946564 # DENV2/SENEGAL/0674/1970 # SENDAK-HD-10674 +DENV2/TRINIDAD_AND_TOBAGO/NA/1953 # Perhaps from https://pubmed.ncbi.nlm.nih.gov/13351628/, but did not search far for the accession +KY923048 # D2Sab2015 # miscategorized # DENV2/MALAYSIA/SAB/2015 +KX274130 # QML22 # miscategorized # DENV2/AUSTRALIA/QML22/2015 EF105383 # DAK_Ar_A1247 # sylvatic # DENV2/COTE_D_IVOIRE/DAKARA1247/1980 EF105382 # Dak_Ar_2039 # sylvatic # DENV2/BURKINA_FASO/DAKAR2039/1980 EF105380 # Dak_Ar_578 # sylvatic # DENV2/COTE_D_IVOIRE/DAKAR578/1980 From be23c3696ce758cf91b81a5a86d3b57be2d3594f Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 13 Feb 2024 06:58:45 -0500 Subject: [PATCH 4/5] Document format of exclude list --- phylogenetic/config/dropped_strains.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/phylogenetic/config/dropped_strains.txt b/phylogenetic/config/dropped_strains.txt index 34676ab8..1940fe11 100644 --- a/phylogenetic/config/dropped_strains.txt +++ b/phylogenetic/config/dropped_strains.txt @@ -1,3 +1,4 @@ +# Format: [# ] JF260983 # DENV/SPAIN/EEB17/2009 # sylvatic according to https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3149010/ HE795086 # DENV1/FRANCE/00475/2008 # 2008/00475 EF457905 # DENV1/MALAYSIA/P1244/1972 # P72-1244 From ce4c91fd37513391a0a80d778daf9628cad5b56e Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 13 Feb 2024 07:02:53 -0500 Subject: [PATCH 5/5] Rename to dropped_strains.txt to exclude.txt Rename the file dropped_strains.txt to exclude.txt to better reflect its purpose since it lists accession numbers instead of strain names. This file is a list of sequences to exclude from analysis and gets passed to `augur filter --exclude`. --- phylogenetic/config/config_dengue.yaml | 2 +- phylogenetic/config/{dropped_strains.txt => exclude.txt} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename phylogenetic/config/{dropped_strains.txt => exclude.txt} (100%) diff --git a/phylogenetic/config/config_dengue.yaml b/phylogenetic/config/config_dengue.yaml index b9588622..3824415b 100644 --- a/phylogenetic/config/config_dengue.yaml +++ b/phylogenetic/config/config_dengue.yaml @@ -2,7 +2,7 @@ strain_id_field: "accession" display_strain_field: "strain" filter: - exclude: "config/dropped_strains.txt" + exclude: "config/exclude.txt" group_by: "year region" min_length: 5000 sequences_per_group: diff --git a/phylogenetic/config/dropped_strains.txt b/phylogenetic/config/exclude.txt similarity index 100% rename from phylogenetic/config/dropped_strains.txt rename to phylogenetic/config/exclude.txt