diff --git a/synthesis/codon/codon.go b/synthesis/codon/codon.go index 75ae10f1..17550f47 100644 --- a/synthesis/codon/codon.go +++ b/synthesis/codon/codon.go @@ -117,19 +117,46 @@ type TranslationTable struct { } // Copy returns a deep copy of the translation table. This is to prevent an unintended update of data used in another -// process, since the tables are generated at build time. -func (table *TranslationTable) Copy() *TranslationTable { - return &TranslationTable{ - StartCodons: table.StartCodons, - StopCodons: table.StopCodons, - AminoAcids: table.AminoAcids, +// process. +func (table *TranslationTable) Copy() (*TranslationTable, error) { + newTranslationMap := map[string]string{} + newStartCodonTable := map[string]string{} - StartCodonTable: table.StartCodonTable, - TranslationMap: table.TranslationMap, - Choosers: table.Choosers, + for k, v := range table.TranslationMap { + newTranslationMap[k] = v + } - Stats: table.Stats, + for k, v := range table.StartCodonTable { + newStartCodonTable[k] = v } + + newAAs := []AminoAcid{} + for _, v := range table.AminoAcids { + newAAs = append(newAAs, AminoAcid{ + Letter: "", + Codons: append([]Codon{}, v.Codons...), + }) + } + + newChoosers, err := newAminoAcidChoosers(newAAs) + if err != nil { + return nil, err + } + + return &TranslationTable{ + StartCodons: append([]string{}, table.StartCodons...), + StopCodons: append([]string{}, table.StopCodons...), + AminoAcids: append([]AminoAcid{}, table.AminoAcids...), + + TranslationMap: newTranslationMap, + StartCodonTable: newStartCodonTable, + Choosers: newChoosers, + + Stats: &Stats{ + StartCodonCount: table.Stats.StartCodonCount, + GeneCount: table.Stats.GeneCount, + }, + }, nil } // GetWeightedAminoAcids returns the amino acids along with their associated codon weights @@ -385,7 +412,7 @@ Tim ******************************************************************************/ // Function to generate default codon tables from NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi -func generateCodonTable(aminoAcids, starts string) *TranslationTable { +func generateCodonTable(aminoAcids, starts string) (*TranslationTable, error) { base1 := "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" base2 := "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" base3 := "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" @@ -432,7 +459,7 @@ func generateCodonTable(aminoAcids, starts string) *TranslationTable { // This function is run at buildtime and failure here means we have an invalid codon table. chooser, err := newAminoAcidChoosers(aminoAcidSlice) if err != nil { - panic(fmt.Errorf("tried to generate an invalid codon table %w", err)) + return nil, fmt.Errorf("tried to generate an invalid codon table %w", err) } return &TranslationTable{ @@ -443,41 +470,42 @@ func generateCodonTable(aminoAcids, starts string) *TranslationTable { StartCodonTable: startCodonsMap, Choosers: chooser, Stats: NewStats(), - } + }, nil } // NewTranslationTable takes the index of desired NCBI codon table and returns it. -func NewTranslationTable(index int) *TranslationTable { - return translationTablesByNumber[index].Copy() +func NewTranslationTable(index int) (*TranslationTable, error) { + return generateCodonTable(translationTablesByNumber[index][0], translationTablesByNumber[index][1]) } -// translationTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. -var translationTablesByNumber = map[int]*TranslationTable{ - 1: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"), - 2: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"), - 3: generateCodonTable("FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"), - 4: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--MM------**-------M------------MMMM---------------M------------"), - 5: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "---M------**--------------------MMMM---------------M------------"), - 6: generateCodonTable("FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 9: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"), - 10: generateCodonTable("FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"), - 11: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M------------MMMM---------------M------------"), - 12: generateCodonTable("FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"), - 13: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "---M------**----------------------MM---------------M------------"), - 14: generateCodonTable("FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------*-----------------------M----------------------------"), - 16: generateCodonTable("FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------*---*--------------------M----------------------------"), - 21: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"), - 22: generateCodonTable("FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "------*---*---*--------------------M----------------------------"), - 23: generateCodonTable("FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--*-------**--*-----------------M--M---------------M------------"), - 24: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M------**-------M---------------M---------------M------------"), - 25: generateCodonTable("FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**-----------------------M---------------M------------"), - 26: generateCodonTable("FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"), - 27: generateCodonTable("FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 28: generateCodonTable("FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*--------------------M----------------------------"), - 29: generateCodonTable("FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 30: generateCodonTable("FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"), - 31: generateCodonTable("FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"), - 33: generateCodonTable("FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M-------*-------M---------------M---------------M------------")} +// translationTablesByNumber stores all data necessary to generate codon tables from sequences published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. +var translationTablesByNumber = map[int][]string{ + 1: {"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"}, + 2: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"}, + 3: {"FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"}, + 4: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--MM------**-------M------------MMMM---------------M------------"}, + 5: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "---M------**--------------------MMMM---------------M------------"}, + 6: {"FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 9: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"}, + 10: {"FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"}, + 11: {"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M------------MMMM---------------M------------"}, + 12: {"FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"}, + 13: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "---M------**----------------------MM---------------M------------"}, + 14: {"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------*-----------------------M----------------------------"}, + 16: {"FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------*---*--------------------M----------------------------"}, + 21: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"}, + 22: {"FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "------*---*---*--------------------M----------------------------"}, + 23: {"FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--*-------**--*-----------------M--M---------------M------------"}, + 24: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M------**-------M---------------M---------------M------------"}, + 25: {"FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**-----------------------M---------------M------------"}, + 26: {"FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"}, + 27: {"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 28: {"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*--------------------M----------------------------"}, + 29: {"FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 30: {"FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"}, + 31: {"FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"}, + 33: {"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M-------*-------M---------------M---------------M------------"}, +} /****************************************************************************** Nov, 20, 2020 @@ -591,7 +619,10 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, c // // this take start and stop strings from first table // and use them as start + stops in final codonTable - mergedTable := firstCodonTable.Copy() + mergedTable, err := firstCodonTable.Copy() + if err != nil { + return nil, err + } // Check if cutOff is too high or low (this is converted to a percent) if cutOff < 0 { @@ -662,7 +693,7 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, c finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - err := mergedTable.UpdateWeights(finalAminoAcids) + err = mergedTable.UpdateWeights(finalAminoAcids) if err != nil { return nil, err } @@ -689,9 +720,12 @@ func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*Transl finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - mergedTable := firstCodonTable.Copy() + mergedTable, err := firstCodonTable.Copy() + if err != nil { + return nil, err + } - err := mergedTable.UpdateWeights(finalAminoAcids) + err = mergedTable.UpdateWeights(finalAminoAcids) if err != nil { return nil, err } diff --git a/synthesis/codon/codon_test.go b/synthesis/codon/codon_test.go index 8d34e7cd..57663a04 100644 --- a/synthesis/codon/codon_test.go +++ b/synthesis/codon/codon_test.go @@ -8,6 +8,7 @@ import ( "github.com/bebop/poly/io/genbank" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" weightedRand "github.com/mroth/weightedrand" "github.com/stretchr/testify/assert" ) @@ -16,14 +17,23 @@ func TestTranslation(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation) } } func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := NewTranslationTable(1) - _, err := nonEmptyCodonTable.Translate("") + nonEmptyCodonTable, err := NewTranslationTable(1) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + _, err = nonEmptyCodonTable.Translate("") if err != errEmptySequenceString { t.Error("Translation should return an error if given an empty sequence string") @@ -33,7 +43,12 @@ func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { func TestTranslationMixedCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaaTTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationMixedCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -41,7 +56,13 @@ func TestTranslationMixedCase(t *testing.T) { func TestTranslationLowerCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgctacatacggaaagcttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactcgagtacaactataactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattgaagatggatccgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtcgacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaactgctgctgggattacacatggcatggatgagctctacaaataa" - if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { + + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationLowerCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -51,13 +72,20 @@ func TestOptimize(t *testing.T) { sequence, _ := genbank.Read("../../data/puc19.gbk") - table := NewTranslationTable(11) - err := table.UpdateWeightsWithSequence(sequence) + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = table.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } - codonTable := NewTranslationTable(11) + codonTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } optimizedSequence, _ := table.Optimize(gfpTranslation) optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence) @@ -70,8 +98,12 @@ func TestOptimize(t *testing.T) { func TestOptimizeSameSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } @@ -92,8 +124,12 @@ func TestOptimizeSameSeed(t *testing.T) { func TestOptimizeDifferentSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } @@ -107,8 +143,12 @@ func TestOptimizeDifferentSeed(t *testing.T) { } func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := NewTranslationTable(1) - _, err := nonEmptyCodonTable.Optimize("") + nonEmptyCodonTable, err := NewTranslationTable(1) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + _, err = nonEmptyCodonTable.Optimize("") if err != errEmptyAminoAcidString { t.Error("Optimize should return an error if given an empty amino acid string") @@ -116,14 +156,22 @@ func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { } func TestOptimizeErrorsOnInvalidAminoAcid(t *testing.T) { aminoAcids := "TOP" - table := NewTranslationTable(1) // does not contain 'O' + table, err := NewTranslationTable(1) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + // does not contain 'O' _, optimizeErr := table.Optimize(aminoAcids) assert.EqualError(t, optimizeErr, invalidAminoAcidError{'O'}.Error()) } func TestGetCodonFrequency(t *testing.T) { - translationTable := NewTranslationTable(11).TranslationMap + table, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + translationTable := table.TranslationMap var codons strings.Builder @@ -197,14 +245,22 @@ func TestCompromiseCodonTable(t *testing.T) { // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := NewTranslationTable(11) + optimizationTable2, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { t.Error(err) @@ -239,14 +295,22 @@ func TestAddCodonTable(t *testing.T) { // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := NewTranslationTable(11) + optimizationTable2, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { t.Error(err) @@ -273,8 +337,12 @@ func TestCapitalizationRegression(t *testing.T) { sequence, _ := genbank.Read("../../data/puc19.gbk") - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { t.Error(err) } @@ -350,8 +418,12 @@ func TestOptimizeSequence(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - optimizationTable := NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith) + optimizationTable, err := NewTranslationTable(11) + if err != nil { + t.Fatalf("failed to initialise codon table: %s", err) + } + + err = optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith) if !errors.Is(err, tt.wantUpdateWeightsErr) { t.Errorf("got %v, want %v", err, tt.wantUpdateWeightsErr) } @@ -453,7 +525,8 @@ func TestUpdateWeights(t *testing.T) { chooserFn func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) - wantErr error + wantInitErr error + wantErr error }{ { name: "ok", @@ -493,7 +566,8 @@ func TestUpdateWeights(t *testing.T) { return nil, mockError }, - wantErr: mockError, + wantInitErr: mockError, + wantErr: mockError, }, } @@ -506,12 +580,99 @@ func TestUpdateWeights(t *testing.T) { newChooserFn = weightedRand.NewChooser }() - optimizationTable := NewTranslationTable(11) + optimizationTable, err := NewTranslationTable(11) + if !errors.Is(err, tt.wantInitErr) { + t.Fatalf("got %v, want %v", err, tt.wantInitErr) + return + } + + if tt.wantInitErr != nil { + return + } + + err = optimizationTable.UpdateWeights(tt.aminoAcids) + if !errors.Is(err, tt.wantErr) { + t.Errorf("got %v, want %v", err, tt.wantErr) + } + }) + } +} + +func TestCopy(t *testing.T) { + t.Parallel() + + cmpOptions := []cmp.Option{ + cmpopts.IgnoreUnexported(weightedRand.Chooser{}), + } + + tests := []struct { + name string + + wantErr error + }{ + { + name: "ok", + + wantErr: nil, + }, + } + + for _, tt := range tests { + var tt = tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() - err := optimizationTable.UpdateWeights(tt.aminoAcids) + original, err := NewTranslationTable(11) + if err != nil { + t.Fatal(err) + } + + // perform a deep copy (changing the copy will not change the original) + + deepCopy, err := original.Copy() if !errors.Is(err, tt.wantErr) { t.Errorf("got %v, want %v", err, tt.wantErr) } + + // modify fields + + deepCopy.StartCodons[0] = "🍌" + deepCopy.StopCodons[0] = "🐗" + deepCopy.AminoAcids = []AminoAcid{} + deepCopy.Choosers = map[string]weightedRand.Chooser{} + deepCopy.Stats = &Stats{} + deepCopy.TranslationMap = map[string]string{} + + // this compares pointers + if cmp.Equal(deepCopy, original, cmpOptions...) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy, original, cmpOptions...)) + } + + // we compare the table's fields + + if cmp.Equal(deepCopy.StartCodonTable, original.StartCodons) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.StartCodonTable, original.StartCodons)) + } + + if cmp.Equal(deepCopy.StopCodons, original.StopCodons) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.StopCodons, original.StopCodons)) + } + + if cmp.Equal(deepCopy.AminoAcids, original.AminoAcids) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.AminoAcids, original.AminoAcids)) + } + + if cmp.Equal(deepCopy.Choosers, original.Choosers) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.Choosers, original.Choosers)) + } + + if cmp.Equal(deepCopy.Stats, original.Stats) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.Stats, original.Stats)) + } + + if cmp.Equal(deepCopy.TranslationMap, original.TranslationMap) { + t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.TranslationMap, original.TranslationMap)) + } }) } } diff --git a/synthesis/codon/example_test.go b/synthesis/codon/example_test.go index 0fae9bbb..0061c76b 100644 --- a/synthesis/codon/example_test.go +++ b/synthesis/codon/example_test.go @@ -11,7 +11,13 @@ import ( func ExampleTranslationTable_Translate() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - testTranslation, _ := codon.NewTranslationTable(11).Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table + table, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + + testTranslation, _ := table.Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table fmt.Println(gfpTranslation == testTranslation) // output: true @@ -19,14 +25,19 @@ func ExampleTranslationTable_Translate() { func ExampleTranslationTable_UpdateWeights() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" - sequenceWithCustomWeights := "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA" - table := codon.NewTranslationTable(11) + sequenceWithCustomWeights := "ATGGCGAGCAAGGGCGAAGAGCTTTTTACTGGAGTGGTACCCATCCTTGTGGAGCTGGATGGGGATGTTAATGGGCACAAGTTTTCTGTGTCCGGTGAGGGGGAGGGTGACGCGACCTATGGCAAACTAACGTTGAAGTTTATCTGCACCACCGGCAAGCTCCCTGTCCCTTGGCCGACGCTGGTAACCACTTTTTCATACGGAGTGCAATGCTTTTCACGATACCCAGACCACATGAAACGGCACGACTTCTTCAAGAGCGCGATGCCAGAAGGTTATGTGCAAGAGCGTACGATCTCATTCAAGGACGACGGGAATTATAAGACAAGAGCAGAGGTGAAATTTGAGGGGGACACGTTAGTAAATCGGATTGAATTAAAGGGAATCGACTTTAAGGAGGATGGGAACATACTTGGTCACAAACTGGAATATAATTACAATTCACACAATGTTTACATCACTGCCGACAAGCAAAAAAATGGGATTAAAGCAAATTTCAAAATTCGGCATAATATTGAGGATGGTAGTGTCCAGCTCGCGGATCACTATCAGCAAAACACACCTATCGGAGACGGACCCGTTTTACTACCGGATAATCATTACTTAAGCACCCAATCAGCGTTATCCAAAGATCCGAACGAAAAACGTGACCACATGGTTCTCTTGGAGTTCGTCACCGCAGCTGGAATAACTCATGGAATGGACGAACTATACAAATAA" + + table, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } // this example is using custom weights for different codons for Arginine. Use this if you would rather use your own // codon weights, they can also be computed for you with `UpdateWeightsWithSequence`. - err := table.UpdateWeights([]codon.AminoAcid{ + err = table.UpdateWeights([]codon.AminoAcid{ { Letter: "R", Codons: []codon.Codon{ @@ -57,7 +68,11 @@ func ExampleTranslationTable_UpdateWeights() { fmt.Println("Could not update weights in example") } - optimizedSequence, _ := table.Optimize(gfpTranslation, 1) + optimizedSequence, err := table.Optimize(gfpTranslation, 1) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } fmt.Println(optimizedSequence == sequenceWithCustomWeights) // output: true @@ -67,7 +82,12 @@ func ExampleTranslationTable_Optimize() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.NewTranslationTable(11) + codonTable, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + _ = codonTable.UpdateWeightsWithSequence(sequence) // Here, we double check if the number of genes is equal to the number of stop codons @@ -122,14 +142,24 @@ func ExampleCompromiseCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codon.NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := codon.NewTranslationTable(11) + optimizationTable2, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) @@ -143,21 +173,31 @@ func ExampleCompromiseCodonTable() { } } } - //output: 2727 + //output: 3863 } func ExampleAddCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codon.NewTranslationTable(11) - err := optimizationTable.UpdateWeightsWithSequence(sequence) + optimizationTable, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + + err = optimizationTable.UpdateWeightsWithSequence(sequence) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) } sequence2, _ := genbank.Read("../../data/phix174.gb") - optimizationTable2 := codon.NewTranslationTable(11) + optimizationTable2, err := codon.NewTranslationTable(11) + if err != nil { + fmt.Printf("error running example: %s\n", err) + return + } + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) if err != nil { panic(fmt.Errorf("got unexpected error in an example: %w", err)) @@ -175,5 +215,5 @@ func ExampleAddCodonTable() { } } } - //output: 90 + //output: 51 }