From bb20bc5771b1acb05d394f11e9ef3a3c973ff1aa Mon Sep 17 00:00:00 2001 From: Maximilien Rothier Bautzer Date: Thu, 4 Jan 2024 15:51:10 +0000 Subject: [PATCH 1/4] generate codon table in constructor rather than at buildtime --- synthesis/codon/codon.go | 65 ++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/synthesis/codon/codon.go b/synthesis/codon/codon.go index 75ae10f1..f0bdc596 100644 --- a/synthesis/codon/codon.go +++ b/synthesis/codon/codon.go @@ -385,7 +385,7 @@ Tim ******************************************************************************/ // Function to generate default codon tables from NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi -func generateCodonTable(aminoAcids, starts string) *TranslationTable { +func generateCodonTable(aminoAcids, starts string) (*TranslationTable, error) { base1 := "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" base2 := "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" base3 := "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" @@ -432,7 +432,7 @@ func generateCodonTable(aminoAcids, starts string) *TranslationTable { // This function is run at buildtime and failure here means we have an invalid codon table. chooser, err := newAminoAcidChoosers(aminoAcidSlice) if err != nil { - panic(fmt.Errorf("tried to generate an invalid codon table %w", err)) + return nil, fmt.Errorf("tried to generate an invalid codon table %w", err) } return &TranslationTable{ @@ -443,41 +443,42 @@ func generateCodonTable(aminoAcids, starts string) *TranslationTable { StartCodonTable: startCodonsMap, Choosers: chooser, Stats: NewStats(), - } + }, nil } // NewTranslationTable takes the index of desired NCBI codon table and returns it. -func NewTranslationTable(index int) *TranslationTable { - return translationTablesByNumber[index].Copy() +func NewTranslationTable(index int) (*TranslationTable, error) { + return generateCodonTable(translationTablesByNumber[index][0], translationTablesByNumber[index][1]) } -// translationTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. -var translationTablesByNumber = map[int]*TranslationTable{ - 1: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"), - 2: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"), - 3: generateCodonTable("FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"), - 4: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--MM------**-------M------------MMMM---------------M------------"), - 5: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "---M------**--------------------MMMM---------------M------------"), - 6: generateCodonTable("FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 9: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"), - 10: generateCodonTable("FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"), - 11: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M------------MMMM---------------M------------"), - 12: generateCodonTable("FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"), - 13: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "---M------**----------------------MM---------------M------------"), - 14: generateCodonTable("FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------*-----------------------M----------------------------"), - 16: generateCodonTable("FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------*---*--------------------M----------------------------"), - 21: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"), - 22: generateCodonTable("FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "------*---*---*--------------------M----------------------------"), - 23: generateCodonTable("FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--*-------**--*-----------------M--M---------------M------------"), - 24: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M------**-------M---------------M---------------M------------"), - 25: generateCodonTable("FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**-----------------------M---------------M------------"), - 26: generateCodonTable("FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"), - 27: generateCodonTable("FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 28: generateCodonTable("FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*--------------------M----------------------------"), - 29: generateCodonTable("FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 30: generateCodonTable("FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 31: generateCodonTable("FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"), - 33: generateCodonTable("FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M-------*-------M---------------M---------------M------------")} +// translationTablesByNumber stores all data necessary to generate codon tables from sequences published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. +var translationTablesByNumber = map[int][]string{ + 1: {"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"}, + 2: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"}, + 3: {"FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"}, + 4: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--MM------**-------M------------MMMM---------------M------------"}, + 5: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "---M------**--------------------MMMM---------------M------------"}, + 6: {"FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 9: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"}, + 10: {"FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"}, + 11: {"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M------------MMMM---------------M------------"}, + 12: {"FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"}, + 13: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "---M------**----------------------MM---------------M------------"}, + 14: {"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------*-----------------------M----------------------------"}, + 16: {"FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------*---*--------------------M----------------------------"}, + 21: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"}, + 22: {"FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "------*---*---*--------------------M----------------------------"}, + 23: {"FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--*-------**--*-----------------M--M---------------M------------"}, + 24: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M------**-------M---------------M---------------M------------"}, + 25: {"FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**-----------------------M---------------M------------"}, + 26: {"FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"}, + 27: {"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 28: {"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*--------------------M----------------------------"}, + 29: {"FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 30: {"FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 31: {"FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"}, + 33: {"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M-------*-------M---------------M---------------M------------"}, +} /****************************************************************************** Nov, 20, 2020 From 58f850d757772f4e89612140f0f9013e1ba8a893 Mon Sep 17 00:00:00 2001 From: Maximilien Rothier Bautzer Date: Thu, 4 Jan 2024 15:53:26 +0000 Subject: [PATCH 2/4] update tests to handle constructor error, since tables are no longer generated at buildtime --- synthesis/codon/codon_test.go | 141 ++++++++++++++++++++++++++-------- 1 file changed, 111 insertions(+), 30 deletions(-) diff --git a/synthesis/codon/codon_test.go b/synthesis/codon/codon_test.go index 8d34e7cd..423817df 100644 --- a/synthesis/codon/codon_test.go +++ b/synthesis/codon/codon_test.go @@ -16,14 +16,23 @@ func TestTranslation(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation) } } func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := NewTranslationTable(1) - _, err := nonEmptyCodonTable.Translate("") + nonEmptyCodonTable, err := NewTranslationTable(1) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + _, err = nonEmptyCodonTable.Translate("") if err != errEmptySequenceString { t.Error("Translation should return an error if given an empty sequence string") @@ -33,7 +42,12 @@ func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { func TestTranslationMixedCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaaTTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationMixedCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -41,7 +55,13 @@ func TestTranslationMixedCase(t *testing.T) { func TestTranslationLowerCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgctacatacggaaagcttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactcgagtacaactataactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattgaagatggatccgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtcgacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaactgctgctgggattacacatggcatggatgagctctacaaataa" - if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { + + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationLowerCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -51,13 +71,20 @@ func TestOptimize(t *testing.T) { sequence, _ := genbank.Read("../../data/puc19.gbk") - table := NewTranslationTable(11) - err := table.UpdateWeightsWithSequence(sequence) + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = table.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } - codonTable := NewTranslationTable(11) + codonTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } optimizedSequence, _ := table.Optimize(gfpTranslation) optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence) @@ -70,8 +97,12 @@ func TestOptimize(t *testing.T) { func TestOptimizeSameSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } @@ -92,8 +123,12 @@ func TestOptimizeSameSeed(t *testing.T) { func TestOptimizeDifferentSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } @@ -107,8 +142,12 @@ func TestOptimizeDifferentSeed(t *testing.T) { } func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := NewTranslationTable(1) - _, err := nonEmptyCodonTable.Optimize("") + nonEmptyCodonTable, err := NewTranslationTable(1) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + _, err = nonEmptyCodonTable.Optimize("") if err != errEmptyAminoAcidString { t.Error("Optimize should return an error if given an empty amino acid string") @@ -116,14 +155,22 @@ func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { } func TestOptimizeErrorsOnInvalidAminoAcid(t *testing.T) { aminoAcids := "TOP" - table := NewTranslationTable(1) // does not contain 'O' + table, err := NewTranslationTable(1) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + // does not contain 'O' _, optimizeErr := table.Optimize(aminoAcids) assert.EqualError(t, optimizeErr, invalidAminoAcidError{'O'}.Error()) } func TestGetCodonFrequency(t *testing.T) { - translationTable := NewTranslationTable(11).TranslationMap + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + translationTable := table.TranslationMap var codons strings.Builder @@ -197,14 +244,22 @@ func TestCompromiseCodonTable(t *testing.T) { // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := NewTranslationTable(11) + optimizationTable2, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { t.Error(err) @@ -239,14 +294,22 @@ func TestAddCodonTable(t *testing.T) { // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := NewTranslationTable(11) + optimizationTable2, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { t.Error(err) @@ -273,8 +336,12 @@ func TestCapitalizationRegression(t *testing.T) { sequence, _ := genbank.Read("../../data/puc19.gbk") - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } @@ -350,8 +417,12 @@ func TestOptimizeSequence(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith) if !errors.Is(err, tt.wantUpdateWeightsErr) { t.Errorf("got %v, want %v", err, tt.wantUpdateWeightsErr) } @@ -453,7 +524,8 @@ func TestUpdateWeights(t *testing.T) { chooserFn func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) - wantErr error + wantInitErr error + wantErr error }{ { name: "ok", @@ -493,7 +565,8 @@ func TestUpdateWeights(t *testing.T) { return nil, mockError }, - wantErr: mockError, + wantInitErr: mockError, + wantErr: mockError, }, } @@ -506,9 +579,17 @@ func TestUpdateWeights(t *testing.T) { newChooserFn = weightedRand.NewChooser }() - optimizationTable := NewTranslationTable(11) + optimizationTable, err := NewTranslationTable(11) + if !errors.Is(err, tt.wantInitErr) { + t.Fatalf("got %v, want %v", err, tt.wantInitErr) + return + } + + if tt.wantInitErr != nil { + return + } - err := optimizationTable.UpdateWeights(tt.aminoAcids) + err = optimizationTable.UpdateWeights(tt.aminoAcids) if !errors.Is(err, tt.wantErr) { t.Errorf("got %v, want %v", err, tt.wantErr) } From 09af4d6b78a357a37d57b6d78f27ccfc2302628f Mon Sep 17 00:00:00 2001 From: Maximilien Rothier Bautzer Date: Thu, 4 Jan 2024 15:55:43 +0000 Subject: [PATCH 3/4] update examples to handle constructor error, since tables are no longer generated at buildtime --- synthesis/codon/example_test.go | 68 ++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/synthesis/codon/example_test.go b/synthesis/codon/example_test.go index 0fae9bbb..0061c76b 100644 --- a/synthesis/codon/example_test.go +++ b/synthesis/codon/example_test.go @@ -11,7 +11,13 @@ import ( func ExampleTranslationTable_Translate() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - testTranslation, _ := codon.NewTranslationTable(11).Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table + table, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + + testTranslation, _ := table.Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table fmt.Println(gfpTranslation == testTranslation) // output: true @@ -19,14 +25,19 @@ func ExampleTranslationTable_Translate() { func ExampleTranslationTable_UpdateWeights() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" - sequenceWithCustomWeights := "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA" - table := codon.NewTranslationTable(11) + sequenceWithCustomWeights := "ATGGCGAGCAAGGGCGAAGAGCTTTTTACTGGAGTGGTACCCATCCTTGTGGAGCTGGATGGGGATGTTAATGGGCACAAGTTTTCTGTGTCCGGTGAGGGGGAGGGTGACGCGACCTATGGCAAACTAACGTTGAAGTTTATCTGCACCACCGGCAAGCTCCCTGTCCCTTGGCCGACGCTGGTAACCACTTTTTCATACGGAGTGCAATGCTTTTCACGATACCCAGACCACATGAAACGGCACGACTTCTTCAAGAGCGCGATGCCAGAAGGTTATGTGCAAGAGCGTACGATCTCATTCAAGGACGACGGGAATTATAAGACAAGAGCAGAGGTGAAATTTGAGGGGGACACGTTAGTAAATCGGATTGAATTAAAGGGAATCGACTTTAAGGAGGATGGGAACATACTTGGTCACAAACTGGAATATAATTACAATTCACACAATGTTTACATCACTGCCGACAAGCAAAAAAATGGGATTAAAGCAAATTTCAAAATTCGGCATAATATTGAGGATGGTAGTGTCCAGCTCGCGGATCACTATCAGCAAAACACACCTATCGGAGACGGACCCGTTTTACTACCGGATAATCATTACTTAAGCACCCAATCAGCGTTATCCAAAGATCCGAACGAAAAACGTGACCACATGGTTCTCTTGGAGTTCGTCACCGCAGCTGGAATAACTCATGGAATGGACGAACTATACAAATAA" + + table, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } // this example is using custom weights for different codons for Arginine. Use this if you would rather use your own // codon weights, they can also be computed for you with `UpdateWeightsWithSequence`. - err := table.UpdateWeights([]codon.AminoAcid{ + err = table.UpdateWeights([]codon.AminoAcid{ { Letter: "R", Codons: []codon.Codon{ @@ -57,7 +68,11 @@ func ExampleTranslationTable_UpdateWeights() { fmt.Println("Could not update weights in example") } - optimizedSequence, _ := table.Optimize(gfpTranslation, 1) + optimizedSequence, err := table.Optimize(gfpTranslation, 1) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } fmt.Println(optimizedSequence == sequenceWithCustomWeights) // output: true @@ -67,7 +82,12 @@ func ExampleTranslationTable_Optimize() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.NewTranslationTable(11) + codonTable, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + _ = codonTable.UpdateWeightsWithSequence(sequence) // Here, we double check if the number of genes is equal to the number of stop codons @@ -122,14 +142,24 @@ func ExampleCompromiseCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codon.NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := codon.NewTranslationTable(11) + optimizationTable2, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) @@ -143,21 +173,31 @@ func ExampleCompromiseCodonTable() { } } } - //output: 2727 + //output: 3863 } func ExampleAddCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codon.NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := codon.NewTranslationTable(11) + optimizationTable2, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) @@ -175,5 +215,5 @@ func ExampleAddCodonTable() { } } } - //output: 90 + //output: 51 } From 4bd535328265a131d024076886990d587a25c30d Mon Sep 17 00:00:00 2001 From: Maximilien Rothier Bautzer Date: Thu, 4 Jan 2024 16:56:28 +0000 Subject: [PATCH 4/4] write a proper deepcopy func + test --- synthesis/codon/codon.go | 61 ++++++++++++++++++++------ synthesis/codon/codon_test.go | 80 +++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 14 deletions(-) diff --git a/synthesis/codon/codon.go b/synthesis/codon/codon.go index f0bdc596..17550f47 100644 --- a/synthesis/codon/codon.go +++ b/synthesis/codon/codon.go @@ -117,19 +117,46 @@ type TranslationTable struct { } // Copy returns a deep copy of the translation table. This is to prevent an unintended update of data used in another -// process, since the tables are generated at build time. -func (table *TranslationTable) Copy() *TranslationTable { - return &TranslationTable{ - StartCodons: table.StartCodons, - StopCodons: table.StopCodons, - AminoAcids: table.AminoAcids, +// process. +func (table *TranslationTable) Copy() (*TranslationTable, error) { + newTranslationMap := map[string]string{} + newStartCodonTable := map[string]string{} - StartCodonTable: table.StartCodonTable, - TranslationMap: table.TranslationMap, - Choosers: table.Choosers, + for k, v := range table.TranslationMap { + newTranslationMap[k] = v + } - Stats: table.Stats, + for k, v := range table.StartCodonTable { + newStartCodonTable[k] = v } + + newAAs := []AminoAcid{} + for _, v := range table.AminoAcids { + newAAs = append(newAAs, AminoAcid{ + Letter: "", + Codons: append([]Codon{}, v.Codons...), + }) + } + + newChoosers, err := newAminoAcidChoosers(newAAs) + if err != nil { + return nil, err + } + + return &TranslationTable{ + StartCodons: append([]string{}, table.StartCodons...), + StopCodons: append([]string{}, table.StopCodons...), + AminoAcids: append([]AminoAcid{}, table.AminoAcids...), + + TranslationMap: newTranslationMap, + StartCodonTable: newStartCodonTable, + Choosers: newChoosers, + + Stats: &Stats{ + StartCodonCount: table.Stats.StartCodonCount, + GeneCount: table.Stats.GeneCount, + }, + }, nil } // GetWeightedAminoAcids returns the amino acids along with their associated codon weights @@ -592,7 +619,10 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, c // // this take start and stop strings from first table // and use them as start + stops in final codonTable - mergedTable := firstCodonTable.Copy() + mergedTable, err := firstCodonTable.Copy() + if err != nil { + return nil, err + } // Check if cutOff is too high or low (this is converted to a percent) if cutOff < 0 { @@ -663,7 +693,7 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, c finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - err := mergedTable.UpdateWeights(finalAminoAcids) + err = mergedTable.UpdateWeights(finalAminoAcids) if err != nil { return nil, err } @@ -690,9 +720,12 @@ func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*Transl finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - mergedTable := firstCodonTable.Copy() + mergedTable, err := firstCodonTable.Copy() + if err != nil { + return nil, err + } - err := mergedTable.UpdateWeights(finalAminoAcids) + err = mergedTable.UpdateWeights(finalAminoAcids) if err != nil { return nil, err } diff --git a/synthesis/codon/codon_test.go b/synthesis/codon/codon_test.go index 423817df..57663a04 100644 --- a/synthesis/codon/codon_test.go +++ b/synthesis/codon/codon_test.go @@ -8,6 +8,7 @@ import ( "github.com/bebop/poly/io/genbank" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" weightedRand "github.com/mroth/weightedrand" "github.com/stretchr/testify/assert" ) @@ -596,3 +597,82 @@ func TestUpdateWeights(t *testing.T) { }) } } + +func TestCopy(t *testing.T) { + t.Parallel() + + cmpOptions := []cmp.Option{ + cmpopts.IgnoreUnexported(weightedRand.Chooser{}), + } + + tests := []struct { + name string + + wantErr error + }{ + { + name: "ok", + + wantErr: nil, + }, + } + + for _, tt := range tests { + var tt = tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + original, err := NewTranslationTable(11) + if err != nil { + t.Fatal(err) + } + + // perform a deep copy (changing the copy will not change the original) + + deepCopy, err := original.Copy() + if !errors.Is(err, tt.wantErr) { + t.Errorf("got %v, want %v", err, tt.wantErr) + } + + // modify fields + + deepCopy.StartCodons[0] = "🍌" + deepCopy.StopCodons[0] = "🐗" + deepCopy.AminoAcids = []AminoAcid{} + deepCopy.Choosers = map[string]weightedRand.Chooser{} + deepCopy.Stats = &Stats{} + deepCopy.TranslationMap = map[string]string{} + + // this compares pointers + if cmp.Equal(deepCopy, original, cmpOptions...) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy, original, cmpOptions...)) + } + + // we compare the table's fields + + if cmp.Equal(deepCopy.StartCodonTable, original.StartCodons) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.StartCodonTable, original.StartCodons)) + } + + if cmp.Equal(deepCopy.StopCodons, original.StopCodons) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.StopCodons, original.StopCodons)) + } + + if cmp.Equal(deepCopy.AminoAcids, original.AminoAcids) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.AminoAcids, original.AminoAcids)) + } + + if cmp.Equal(deepCopy.Choosers, original.Choosers) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.Choosers, original.Choosers)) + } + + if cmp.Equal(deepCopy.Stats, original.Stats) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.Stats, original.Stats)) + } + + if cmp.Equal(deepCopy.TranslationMap, original.TranslationMap) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.TranslationMap, original.TranslationMap)) + } + }) + } +}