From 215c0c90a241ae168d90f20965e6e32697f14e1c Mon Sep 17 00:00:00 2001 From: Nils Hoffmann <3309580+nilshoffmann@users.noreply.github.com> Date: Fri, 12 Apr 2024 14:39:54 +0200 Subject: [PATCH] Squashed 'src/cppgoslin/' changes from b14796dc..58837eae 58837eae Merge pull request #16 from lifs-tools/alpine-linux f5cb5f7f Updated imports be05b436 Added explicit imports for cstdint and vector 6449f0d8 Merge commit '3892beb34cf318d5e878b565b5f4e802c87858c5' cb6fcc8e adding more mediator support 3892beb3 adding more trivial mediator names 9329952d Merge commit '48ebc1d373919475f619a0f93852db4efd1e950f' 48ebc1d3 updating grammars e4cdf628 added lyso galsph glcsph 9ad782e2 Merge commit '37fa28110479dc9e74f8589cc381bebd9e27d01b' 37fa2811 fixed typo in trivial list 8b841231 Merge commit '8c58c13573c8993b48d9f59857b33bbec7cabe88' 8c58c135 adding more HDoHE support fab15338 more prostaglandins support 3af50808 Merge commit 'cdf1bbf5c3f9495e1bd6a6b7cdf6e4a8246b70f9' ce56b4b3 adding even more support for oxylipins cdf1bbf5 adding more support for oxylipins 8e2231d4 added triH 1a161caa Merge commit '93058c0c53af43484158666f7667e412533900dc' 93058c0c updated tri hydro information fd0bd7f6 bug fixing on recursive function a54eb4ed added ARA c66a6549 Merge commit '3759dba1db3f35ecbd676122d2dd6a0cb5142f95' 615bd4b0 updated prostaglandins 3759dba1 updated trivial list e8a8ce3f Merge commit 'b9cd9277ccb82b64f1bb43b77db6f39468e7f606' b9cd9277 added more support for oxylipins 649656c6 Merge commit '9946c54034cd8ffea235abfc28b79822690d5795' 9946c540 adding more oxolipins bd1d4ca4 updated unit tests 09932b8e Merge commit '9437c99f889d4bf246ca7d1e20857ccbb36a3603' 9437c99f updated trivial mediator table 61bdbb5e updated trivial mediators abcbd041 Merge commit 'd2ba69537010f6720d3ec2426e96c8ed71d69f67' d2ba6953 added more trivial mediator dbs f9d048bd added trivial mediator double bond information ca7df904 Merge commit 'a3af90548efdd6383114521c9e2b7a3cebf68f01' a3af9054 adding trivial mediator db information git-subtree-dir: src/cppgoslin git-subtree-split: 58837eaeeb4e2283f7b7409a647d02a81fa67889 --- .../parser/LipidMapsParserEventHandler.h | 2 + src/cppgoslin/cppgoslin/parser/Parser.h | 2 +- .../cppgoslin/parser/ParserClasses.h | 1 + src/cppgoslin/cppgoslin/parser/Parser_impl.h | 3 + src/cppgoslin/data/goslin/Goslin.g4 | 2 +- src/cppgoslin/data/goslin/LipidMaps.g4 | 10 +-- src/cppgoslin/data/goslin/lipid-list.csv | 2 + .../data/goslin/trivial_mediators.csv | 5 ++ .../src/parser/GoslinParserEventHandler.cpp | 2 +- .../parser/LipidMapsParserEventHandler.cpp | 78 +++++++++++++------ 10 files changed, 76 insertions(+), 31 deletions(-) diff --git a/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h b/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h index 4926f34..204d5fd 100644 --- a/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h +++ b/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h @@ -114,6 +114,8 @@ class LipidMapsParserEventHandler : public LipidBaseParserEventHandler { void add_carbon_pre_len(TreeNode *node); void set_hydro_pre_num(TreeNode *node); void new_sphinga_pure(TreeNode *node); + void c_type(TreeNode *node); + void new_sph(TreeNode *node); }; diff --git a/src/cppgoslin/cppgoslin/parser/Parser.h b/src/cppgoslin/cppgoslin/parser/Parser.h index 76043e6..5055ced 100644 --- a/src/cppgoslin/cppgoslin/parser/Parser.h +++ b/src/cppgoslin/cppgoslin/parser/Parser.h @@ -33,7 +33,7 @@ SOFTWARE. #include "cppgoslin/parser/ParserClasses.h" #include "cppgoslin/domain/Element.h" #include "cppgoslin/parser/KnownGrammars.h" -#include +#include #include #include #include diff --git a/src/cppgoslin/cppgoslin/parser/ParserClasses.h b/src/cppgoslin/cppgoslin/parser/ParserClasses.h index ca10a9a..19c6e72 100644 --- a/src/cppgoslin/cppgoslin/parser/ParserClasses.h +++ b/src/cppgoslin/cppgoslin/parser/ParserClasses.h @@ -28,6 +28,7 @@ SOFTWARE. #include "cppgoslin/domain/StringFunctions.h" +#include #include #include #include diff --git a/src/cppgoslin/cppgoslin/parser/Parser_impl.h b/src/cppgoslin/cppgoslin/parser/Parser_impl.h index 15672b6..86c7738 100644 --- a/src/cppgoslin/cppgoslin/parser/Parser_impl.h +++ b/src/cppgoslin/cppgoslin/parser/Parser_impl.h @@ -21,6 +21,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include +#include template const uint32_t Parser::SHIFT = 32; diff --git a/src/cppgoslin/data/goslin/Goslin.g4 b/src/cppgoslin/data/goslin/Goslin.g4 index 50fe560..f842f40 100644 --- a/src/cppgoslin/data/goslin/Goslin.g4 +++ b/src/cppgoslin/data/goslin/Goslin.g4 @@ -182,7 +182,7 @@ mediator_functions : mediator_mono_functions | mediator_di_functions | mediator_ mediator_mono_functions: 'H' | 'Oxo' | 'oxo' | 'OXO' | 'keto' | 'Hp' | 'HP' | 'NO2' | 'K' | 'k' | 'hydroxy' | 'd' | 'deoxy' | 'beta' | 'iso'; mediator_di_functions: 'E' | 'Ep' | 'EP' | 'DH' | 'DiH' | 'diH' | 'dihydro' | 'dh'; mediator_tri_functions : 'TriH' | 'triH' | 'trihydroxy'; -mediator_full_function : mediator_position_group mediator_func_group_separator mediator_mono_functions | mediator_di_pos mediator_func_group_separator mediator_di_functions | mediator_tri_pos mediator_func_group_separator mediator_tri_functions | mediator_position_group mediator_mono_functions; +mediator_full_function : mediator_position_group mediator_func_group_separator mediator_mono_functions | mediator_di_pos mediator_func_group_separator mediator_di_functions | mediator_tri_pos mediator_func_group_separator mediator_tri_functions | mediator_position_group mediator_mono_functions | mediator_position_group mediator_mono_functions | mediator_di_pos mediator_di_functions | mediator_tri_pos mediator_tri_functions | mediator_position_group mediator_mono_functions; mediator_db_function : mediator_double_bond_positions mediator_func_group_separator mediator_mono_functions; mediator_double_bond_positions_full : '(' mediator_double_bond_positions ')'; mediator_double_bond_positions : mediator_double_bond_position | mediator_double_bond_positions ',' mediator_double_bond_position; diff --git a/src/cppgoslin/data/goslin/LipidMaps.g4 b/src/cppgoslin/data/goslin/LipidMaps.g4 index b6bc79d..6318765 100644 --- a/src/cppgoslin/data/goslin/LipidMaps.g4 +++ b/src/cppgoslin/data/goslin/LipidMaps.g4 @@ -122,8 +122,8 @@ hg_threepl: 'SLBPA' | 'PS-NAc' | 'NAPE'; /* sphingolipid rules */ sl: lsl | dsl; -lsl: hg_lslc round_open_bracket lcb round_close_bracket | hg_lslc lcb | sphinga; -dsl: hg_dslc dsl_species | hg_dslc dsl_subspecies; +lsl: hg_lslc round_open_bracket lcb round_close_bracket | hg_lslc lcb | sphinga | hg_lsl_pure; +dsl: hg_dslc dsl_species | hg_dslc dsl_subspecies | hg_dslc c_type; dsl_species: round_open_bracket lcb round_close_bracket | lcb; dsl_subspecies: round_open_bracket lcb_fa_sorted round_close_bracket | lcb_fa_sorted; @@ -140,6 +140,7 @@ sphinga_C_lcb : 'C' sphinga_lcb_len; sphinga_lcb_len : number; sphinga_bracket_lcb : round_open_bracket lcb round_close_bracket | lcb; +c_type : 'C' carbon | 'C' carbon carbon_db_separator db; hg_dslc: hg_dsl_global | hg_dsl_global headgroup_separator; @@ -167,9 +168,8 @@ greek : 'alpha' | 'beta' | 'α' | 'β'; hg_lslc: hg_lsl | hg_lsl headgroup_separator; -hg_lsl: 'SPH' | 'Sph' | 'S1P' | 'HexSph' | 'SPC' | 'SPH-P' | 'LysoSM' | 'SIP'; - - +hg_lsl_pure : hg_lsl; +hg_lsl: 'SPH' | 'Sph' | 'S1P' | 'HexSph' | 'SPC' | 'SPH-P' | 'LysoSM' | 'SIP' | 'GlcSph' | 'GalSph'; /* polyketides */ pk : pk_hg pk_fa; diff --git a/src/cppgoslin/data/goslin/lipid-list.csv b/src/cppgoslin/data/goslin/lipid-list.csv index 3759bf9..e73bdef 100644 --- a/src/cppgoslin/data/goslin/lipid-list.csv +++ b/src/cppgoslin/data/goslin/lipid-list.csv @@ -181,6 +181,8 @@ LCL,GP,Glycerophosphoglycerophosphoglycerols [GP12],4,3,Lyso;Cardio,C9H18O13P2,M LDMPE,GP,Lysodimethylphosphatidylethanolamine,2,1,Lyso,,,,,,,,, Lex-GM1,SP,Globoside,2,2,,C57H93N3O42,,,,,,,, LHexCer,SP,Hexosylsphingosine,1,1,,C6H12O5,HexSph,,,,,,, +LGalCer,SP,Hexosylsphingosine,1,1,,C6H12O5,GalSph,,,,,,, +LGlcCer,SP,Hexosylsphingosine,1,1,,C6H12O5,GlcSph,,,,,,, LHex2Cer,SP,Neutral glycosphingolipids [SP05],1,1,,C12H22O10,LDHCer,LDHCER,,,,,, LHex3Cer,SP,Neutral glycosphingolipids [SP05],1,1,,C18H32O15,LTHCer,LTHCER,,,,,, Linoleic acid,FA,Unsaturated fatty acids [FA0103],0,0,,C18H32O2,,,,,,,, diff --git a/src/cppgoslin/data/goslin/trivial_mediators.csv b/src/cppgoslin/data/goslin/trivial_mediators.csv index 7a0906b..dba5f67 100644 --- a/src/cppgoslin/data/goslin/trivial_mediators.csv +++ b/src/cppgoslin/data/goslin/trivial_mediators.csv @@ -171,3 +171,8 @@ 16-HDoHE 4,7,10,13,17,19 17-HDoHE 4,7,10,13,15,19 20-HDoHE 4,7,10,13,16,18 +7(8)-EpDPE 4,10,13,16,19 +10(11)-EpDPE 4,7,13,16,19 +13(14)-EpDPE 4,7,10,16,19 +16(17)-EpDPE 4,7,10,13,19 +19(20)-EpDPE 4,7,10,13,16 diff --git a/src/cppgoslin/src/parser/GoslinParserEventHandler.cpp b/src/cppgoslin/src/parser/GoslinParserEventHandler.cpp index dd440cf..a953aa7 100644 --- a/src/cppgoslin/src/parser/GoslinParserEventHandler.cpp +++ b/src/cppgoslin/src/parser/GoslinParserEventHandler.cpp @@ -28,7 +28,7 @@ SOFTWARE. #define reg(x, y) BaseParserEventHandler::registered_events->insert({x, bind(&GoslinParserEventHandler::y, this, placeholders::_1)}) -const map GoslinParserEventHandler::mediator_FA{{"H", 17}, {"O", 18}, {"E", 20}, {"Do", 22}}; +const map GoslinParserEventHandler::mediator_FA{{"H", 17}, {"O", 18}, {"E", 20}, {"Do", 22}, {"D", 22}}; const map GoslinParserEventHandler::mediator_DB{{"M", 1}, {"D", 2}, {"Tr", 3}, {"T", 4}, {"P", 5}, {"H", 6}}; diff --git a/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp b/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp index b96a7da..040ceab 100644 --- a/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp +++ b/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp @@ -68,6 +68,8 @@ LipidMapsParserEventHandler::LipidMapsParserEventHandler() : LipidBaseParserEven reg("lcb_post_event", clean_lcb); reg("fa_pre_event", new_fa); reg("fa_post_event", append_fa); + reg("c_type_pre_event", c_type); + reg("c_type_post_event", append_fa); reg("glyco_struct_pre_event", add_glyco); reg("db_single_position_pre_event", set_isomeric_level); reg("db_single_position_post_event", add_db_position); @@ -95,6 +97,7 @@ LipidMapsParserEventHandler::LipidMapsParserEventHandler() : LipidBaseParserEven reg("isotope_element_pre_event", set_heavy_element); reg("isotope_number_pre_event", set_heavy_number); reg("sphinga_pre_event", new_sphinga); + reg("hg_lsl_pure_pre_event", new_sph); reg("sphinga_phospho_pre_event", add_phospho); reg("sphinga_suffix_pre_event", sphinga_db_set); reg("sphinga_lcb_len_pre_event", add_carbon_pre_len); @@ -166,41 +169,70 @@ void LipidMapsParserEventHandler::add_additional_modifier(TreeNode* node){ void LipidMapsParserEventHandler::add_carbon_pre_len(TreeNode* node){ - lcb_carbon_pre_set = node->get_int(); + lcb_carbon_pre_set = node->get_int(); } void LipidMapsParserEventHandler::sphinga_db_set(TreeNode* node){ - sphinga_suffix = node->get_text(); - - if (sphinga_suffix == "anine") lcb_db_pre_set = 0; - else if (sphinga_suffix == "osine") lcb_db_pre_set = 1; - else if (sphinga_suffix == "adienine") lcb_db_pre_set = 2; + sphinga_suffix = node->get_text(); + + if (sphinga_suffix == "anine") lcb_db_pre_set = 0; + else if (sphinga_suffix == "osine") lcb_db_pre_set = 1; + else if (sphinga_suffix == "adienine") lcb_db_pre_set = 2; } - - - - + + + + void LipidMapsParserEventHandler::new_sphinga(TreeNode* node){ - head_group = "SPB"; + head_group = "SPB"; } - - - -void LipidMapsParserEventHandler::new_sphinga_pure(TreeNode* node){ + + +void LipidMapsParserEventHandler::c_type(TreeNode* node){ + if (head_group == "Cer"){ + new_sphinga_pure(node); + } + else { sphinga_pure = true; lcb_hydro_pre_set.push_back(KnownFunctionalGroups::get_functional_group("OH")); + lcb_hydro_pre_set[0]->position = 3; + new_lcb(node); + } + clean_lcb(node); + new_fa(node); +} + + +void LipidMapsParserEventHandler::new_sph(TreeNode* node){ + if (to_lower(head_group) == "sph"){ + new_sphinga_pure(node); + } + else { + sphinga_pure = true; lcb_hydro_pre_set.push_back(KnownFunctionalGroups::get_functional_group("OH")); - lcb_hydro_pre_set[0]->position = 1; - lcb_hydro_pre_set[1]->position = 3; + lcb_hydro_pre_set[0]->position = 3; new_lcb(node); + } + clean_lcb(node); } - - - + + + +void LipidMapsParserEventHandler::new_sphinga_pure(TreeNode* node){ + sphinga_pure = true; + lcb_hydro_pre_set.push_back(KnownFunctionalGroups::get_functional_group("OH")); + lcb_hydro_pre_set.push_back(KnownFunctionalGroups::get_functional_group("OH")); + lcb_hydro_pre_set[0]->position = 1; + lcb_hydro_pre_set[1]->position = 3; + new_lcb(node); +} + + + void LipidMapsParserEventHandler::set_hydro_pre_num(TreeNode* node){ - lcb_hydro_pre_set.push_back(KnownFunctionalGroups::get_functional_group("OH")); - lcb_hydro_pre_set.back()->position = 4; - sphinga_prefix = node->get_text(); + lcb_hydro_pre_set.push_back(KnownFunctionalGroups::get_functional_group("OH")); + lcb_hydro_pre_set.back()->position = 4; + sphinga_prefix = node->get_text(); }