From f00faebeb9d1173ae78c9d13c07b34b0359527f4 Mon Sep 17 00:00:00 2001 From: dariober Date: Tue, 26 Nov 2024 16:51:07 +0000 Subject: [PATCH 01/10] Start adding test and possibly some fixes for #481 Conversion to GFF3Feature includes source and score and sets ID and Parent attributes. However, export to GFF from UI fails. --- packages/apollo-cli/README.md | 2 +- .../src/export/export.service.ts | 4 +- .../src/GFF3/annotationFeatureToGFF3.test.ts | 34 ++++++ .../src/GFF3/annotationFeatureToGFF3.ts | 112 ++++++++++++++++++ .../src/GFF3/gff3ToAnnotationFeature.test.ts | 4 +- packages/apollo-shared/src/GFF3/index.ts | 1 + packages/apollo-shared/src/util.ts | 102 ---------------- packages/apollo-shared/test_data/gene.json | 69 +++++++++++ .../src/BackendDrivers/DesktopFileDriver.ts | 4 +- .../src/components/DownloadGFF3.tsx | 4 +- 10 files changed, 226 insertions(+), 110 deletions(-) create mode 100644 packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts create mode 100644 packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts create mode 100644 packages/apollo-shared/test_data/gene.json diff --git a/packages/apollo-cli/README.md b/packages/apollo-cli/README.md index 7e1b98051..f0d70be83 100644 --- a/packages/apollo-cli/README.md +++ b/packages/apollo-cli/README.md @@ -16,7 +16,7 @@ $ npm install -g @apollo-annotation/cli $ apollo COMMAND running command... $ apollo (--version) -@apollo-annotation/cli/0.1.21 linux-x64 node-v20.17.0 +@apollo-annotation/cli/0.1.21 linux-x64 node-v20.13.0 $ apollo --help [COMMAND] USAGE $ apollo COMMAND diff --git a/packages/apollo-collaboration-server/src/export/export.service.ts b/packages/apollo-collaboration-server/src/export/export.service.ts index eb8072c88..bff83ef45 100644 --- a/packages/apollo-collaboration-server/src/export/export.service.ts +++ b/packages/apollo-collaboration-server/src/export/export.service.ts @@ -25,7 +25,7 @@ import { RefSeqDocument, } from '@apollo-annotation/schemas' import { - makeGFF3Feature, + annotationFeatureToGFF3, splitStringIntoChunks, } from '@apollo-annotation/shared' import gff from '@gmod/gff' @@ -179,7 +179,7 @@ export class ExportService { const refSeqNames = Object.fromEntries( refSeqs.map((refSeq) => [refSeq._id, refSeq.name]), ) - const gff3Feature = makeGFF3Feature( + const gff3Feature = annotationFeatureToGFF3( flattened as unknown as AnnotationFeatureSnapshot, undefined, refSeqNames, diff --git a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts new file mode 100644 index 000000000..50769a82a --- /dev/null +++ b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts @@ -0,0 +1,34 @@ +/* eslint-disable @typescript-eslint/no-floating-promises */ + +import { describe, it } from 'node:test' +import { assert } from 'chai' +import { readAnnotationFeatureSnapshot } from './gff3ToAnnotationFeature.test' +import { annotationFeatureToGFF3 } from './annotationFeatureToGFF3' + +describe('annotationFeatureToGFF3', () => { + it('Convert one gene', () => { + const annotationFeature = readAnnotationFeatureSnapshot( + 'test_data/gene.json', + ) + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + + assert.deepEqual(gff3Feature.type, 'gene') + assert.deepEqual(gff3Feature.start, 1000) + assert.deepEqual(gff3Feature.end, 9000) + assert.deepEqual(gff3Feature.strand, '+') + assert.deepEqual(gff3Feature.score, 123) + assert.deepEqual(gff3Feature.source, 'test_data') + assert.deepEqual(gff3Feature.attributes?.Name, ['EDEN']) + assert.deepEqual(gff3Feature.attributes?.testid, ['t003']) + assert.deepEqual(gff3Feature.attributes?.ID, ['gene10001']) + + const [children] = gff3Feature.child_features + const [mrna] = children + assert.deepEqual(mrna.type, 'mRNA') + assert.deepEqual(mrna.attributes?.Parent, ['gene10001']) + + // Sanity check the annotationFeature does have a score, etc. + // assert.deepEqual(annotationFeature.attributes?.gff_score, ['123']) + // assert.deepEqual(annotationFeature.attributes?.gff_source, ['test_data']) + }) +}) diff --git a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts new file mode 100644 index 000000000..9caad6488 --- /dev/null +++ b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts @@ -0,0 +1,112 @@ +/* eslint-disable @typescript-eslint/no-unsafe-assignment */ + +import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' +import { GFF3Feature } from '@gmod/gff' + +export function annotationFeatureToGFF3( + feature: AnnotationFeatureSnapshot, + parentId?: string, + refSeqNames?: Record, +): GFF3Feature { + const locations = [{ start: feature.min, end: feature.max }] + // const locations = feature.discontinuousLocations?.length + // ? feature.discontinuousLocations + // : [{ start: feature.start, end: feature.end, phase: feature.phase }] + const attributes: Record = JSON.parse( + JSON.stringify(feature.attributes), + ) + const ontologyTerms: string[] = [] + const source = feature.attributes?.gff_source?.[0] ?? null + delete attributes.gff_source + if (parentId) { + attributes.Parent = [parentId] + } + if (attributes.gff_id) { + attributes.ID = attributes.gff_id + delete attributes.gff_id + } + if (attributes.gff_name) { + attributes.Name = attributes.gff_name + delete attributes.gff_name + } + if (attributes.gff_alias) { + attributes.Alias = attributes.gff_alias + delete attributes.gff_alias + } + if (attributes.gff_target) { + attributes.Target = attributes.gff_target + delete attributes.gff_target + } + if (attributes.gff_gap) { + attributes.Gap = attributes.gff_gap + delete attributes.gff_gap + } + if (attributes.gff_derives_from) { + attributes.Derives_from = attributes.gff_derives_from + delete attributes.gff_derives_from + } + if (attributes.gff_note) { + attributes.Note = attributes.gff_note + delete attributes.gff_note + } + if (attributes.gff_dbxref) { + attributes.Dbxref = attributes.gff_dbxref + delete attributes.gff_dbxref + } + if (attributes.gff_is_circular) { + attributes.Is_circular = attributes.gff_is_circular + delete attributes.gff_is_circular + } + if (attributes.gff_ontology_term) { + ontologyTerms.push(...attributes.gff_ontology_term) + delete attributes.gff_ontology_term + } + if (attributes['Gene Ontology']) { + ontologyTerms.push(...attributes['Gene Ontology']) + delete attributes['Gene Ontology'] + } + if (attributes['Sequence Ontology']) { + ontologyTerms.push(...attributes['Sequence Ontology']) + delete attributes['Sequence Ontology'] + } + if (ontologyTerms.length > 0) { + attributes.Ontology_term = ontologyTerms + } + + const gff_score = feature.attributes?.gff_score + let score = null + if (gff_score) { + if (gff_score.length == 1) { + score = Number(gff_score[0]) + } else { + throw new Error('Unexpected score') + } + } + delete attributes.gff_score + + return locations.map((location) => ({ + start: location.start + 1, + end: location.end, + seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq, + source, + type: feature.type, + score, + strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null, + phase: null, + // phase: + // location.phase === 0 + // ? '0' + // : location.phase === 1 + // ? '1' + // : location.phase === 2 + // ? '2' + // : null, + attributes: Object.keys(attributes).length > 0 ? attributes : null, + derived_features: [], + child_features: feature.children + ? Object.values(feature.children).map((child) => + annotationFeatureToGFF3(child, attributes.ID?.[0], refSeqNames), + ) + : [], + })) +} diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts index b6abc8f0a..64ed0c7ed 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts @@ -120,7 +120,9 @@ function readFeatureFile(fn: string): GFF3Feature[] { return inGff } -function readAnnotationFeatureSnapshot(fn: string): AnnotationFeatureSnapshot { +export function readAnnotationFeatureSnapshot( + fn: string, +): AnnotationFeatureSnapshot { const lines = readFileSync(fn).toString() return JSON.parse(lines) as AnnotationFeatureSnapshot } diff --git a/packages/apollo-shared/src/GFF3/index.ts b/packages/apollo-shared/src/GFF3/index.ts index fd9ae5fb0..45cec9a92 100644 --- a/packages/apollo-shared/src/GFF3/index.ts +++ b/packages/apollo-shared/src/GFF3/index.ts @@ -1,2 +1,3 @@ +export * from './annotationFeatureToGFF3' export * from './gffReservedKeys' export * from './gff3ToAnnotationFeature' diff --git a/packages/apollo-shared/src/util.ts b/packages/apollo-shared/src/util.ts index 36dda6c67..331a40a13 100644 --- a/packages/apollo-shared/src/util.ts +++ b/packages/apollo-shared/src/util.ts @@ -1,105 +1,3 @@ -/* eslint-disable @typescript-eslint/no-unsafe-assignment */ - -import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' -import { GFF3Feature } from '@gmod/gff' - -export function makeGFF3Feature( - feature: AnnotationFeatureSnapshot, - parentId?: string, - refSeqNames?: Record, -): GFF3Feature { - const locations = [{ start: feature.min, end: feature.max }] - // const locations = feature.discontinuousLocations?.length - // ? feature.discontinuousLocations - // : [{ start: feature.start, end: feature.end, phase: feature.phase }] - const attributes: Record = JSON.parse( - JSON.stringify(feature.attributes), - ) - const ontologyTerms: string[] = [] - const source = feature.attributes?.source?.[0] ?? null - delete attributes.source - if (parentId) { - attributes.Parent = [parentId] - } - if (attributes._id) { - attributes.ID = attributes._id - delete attributes._id - } - if (attributes.gff_name) { - attributes.Name = attributes.gff_name - delete attributes.gff_name - } - if (attributes.gff_alias) { - attributes.Alias = attributes.gff_alias - delete attributes.gff_alias - } - if (attributes.gff_target) { - attributes.Target = attributes.gff_target - delete attributes.gff_target - } - if (attributes.gff_gap) { - attributes.Gap = attributes.gff_gap - delete attributes.gff_gap - } - if (attributes.gff_derives_from) { - attributes.Derives_from = attributes.gff_derives_from - delete attributes.gff_derives_from - } - if (attributes.gff_note) { - attributes.Note = attributes.gff_note - delete attributes.gff_note - } - if (attributes.gff_dbxref) { - attributes.Dbxref = attributes.gff_dbxref - delete attributes.gff_dbxref - } - if (attributes.gff_is_circular) { - attributes.Is_circular = attributes.gff_is_circular - delete attributes.gff_is_circular - } - if (attributes.gff_ontology_term) { - ontologyTerms.push(...attributes.gff_ontology_term) - delete attributes.gff_ontology_term - } - if (attributes['Gene Ontology']) { - ontologyTerms.push(...attributes['Gene Ontology']) - delete attributes['Gene Ontology'] - } - if (attributes['Sequence Ontology']) { - ontologyTerms.push(...attributes['Sequence Ontology']) - delete attributes['Sequence Ontology'] - } - if (ontologyTerms.length > 0) { - attributes.Ontology_term = ontologyTerms - } - return locations.map((location) => ({ - start: location.start + 1, - end: location.end, - seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq, - source, - type: feature.type, - score: null, - // score: feature.score ?? null, - strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null, - phase: null, - // phase: - // location.phase === 0 - // ? '0' - // : location.phase === 1 - // ? '1' - // : location.phase === 2 - // ? '2' - // : null, - attributes: Object.keys(attributes).length > 0 ? attributes : null, - derived_features: [], - child_features: feature.children - ? Object.values(feature.children).map((child) => - makeGFF3Feature(child, attributes.ID?.[0], refSeqNames), - ) - : [], - })) -} - export function splitStringIntoChunks( input: string, chunkSize: number, diff --git a/packages/apollo-shared/test_data/gene.json b/packages/apollo-shared/test_data/gene.json new file mode 100644 index 000000000..a7806d835 --- /dev/null +++ b/packages/apollo-shared/test_data/gene.json @@ -0,0 +1,69 @@ +{ + "_id": "66d70e4ccc30b55b65e5f619", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "attributes": { + "gff_id": ["gene10001"], + "gff_name": ["EDEN"], + "gff_score": ["123"], + "gff_source": ["test_data"], + "testid": ["t003"] + }, + "children": { + "66d70e4ccc30b55b65e5f618": { + "_id": "66d70e4ccc30b55b65e5f618", + "refSeq": "chr1", + "type": "mRNA", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "66d70e4ccc30b55b65e5f615": { + "_id": "66d70e4ccc30b55b65e5f615", + "refSeq": "chr1", + "type": "exon", + "min": 1049, + "max": 1500, + "strand": 1, + "attributes": { + "gff_id": ["exon10001"], + "testid": ["t007"] + } + }, + "66d70e4ccc30b55b65e5f616": { + "_id": "66d70e4ccc30b55b65e5f616", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { + "gff_id": ["exon10004"], + "testid": ["t010"] + } + }, + "66d70e4ccc30b55b65e5f617": { + "_id": "66d70e4ccc30b55b65e5f617", + "refSeq": "chr1", + "type": "CDS", + "min": 1200, + "max": 5000, + "strand": 1, + "attributes": { + "gff_id": ["cds10001"], + "gff_name": ["edenprotein.1"], + "testid": ["t012", "t013", "t014", "t015"] + } + } + }, + "attributes": { + "gff_id": ["mRNA10001"], + "gff_name": ["EDEN.1"], + "testid": ["t004", "t001", "t004"] + } + } + } +} diff --git a/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts b/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts index 157e92a6a..fe21ccc90 100644 --- a/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts +++ b/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts @@ -10,9 +10,9 @@ import { } from '@apollo-annotation/mst' import { ValidationResultSet, - makeGFF3Feature, splitStringIntoChunks, } from '@apollo-annotation/shared' +import { annotationFeatureToGFF3 } from '@apollo-annotation/shared/src/GFF3/annotationFeatureToGFF3' import gff, { GFF3Item } from '@gmod/gff' import { getConf } from '@jbrowse/core/configuration' import { Region, getSession } from '@jbrowse/core/util' @@ -147,7 +147,7 @@ export class DesktopFileDriver extends BackendDriver { for (const [, refSeq] of clientAssembly.refSeqs) { const { features } = refSeq for (const [, feature] of features) { - gff3Items.push(makeGFF3Feature(getSnapshot(feature))) + gff3Items.push(annotationFeatureToGFF3(getSnapshot(feature))) } } for (const [, refSeq] of clientAssembly.refSeqs) { diff --git a/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx b/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx index 20fcd421e..6172ed258 100644 --- a/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx +++ b/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx @@ -4,7 +4,6 @@ /* eslint-disable @typescript-eslint/no-unnecessary-condition */ /* eslint-disable @typescript-eslint/no-misused-promises */ import { ApolloAssembly } from '@apollo-annotation/mst' -import { makeGFF3Feature } from '@apollo-annotation/shared' import gff, { GFF3Item } from '@gmod/gff' import { Assembly } from '@jbrowse/core/assemblyManager/assembly' import { getConf } from '@jbrowse/core/configuration' @@ -29,6 +28,7 @@ import { import { ApolloSessionModel } from '../session' import { createFetchErrorMessage } from '../util' import { Dialog } from './Dialog' +import { annotationFeatureToGFF3 } from '@apollo-annotation/shared/src/GFF3/annotationFeatureToGFF3' interface DownloadGFF3Props { session: ApolloSessionModel @@ -153,7 +153,7 @@ export function DownloadGFF3({ handleClose, session }: DownloadGFF3Props) { continue } for (const [, feature] of features) { - gff3Items.push(makeGFF3Feature(getSnapshot(feature))) + gff3Items.push(annotationFeatureToGFF3(getSnapshot(feature))) } } for (const sequenceFeature of sequenceFeatures) { From 0a10d980be40e56c9590d0654e882bc555ebd95e Mon Sep 17 00:00:00 2001 From: dariober Date: Wed, 27 Nov 2024 10:13:13 +0000 Subject: [PATCH 02/10] Fix imports --- .../src/BackendDrivers/DesktopFileDriver.ts | 2 +- packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts b/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts index fe21ccc90..01db56b99 100644 --- a/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts +++ b/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts @@ -9,10 +9,10 @@ import { CheckResultSnapshot, } from '@apollo-annotation/mst' import { + annotationFeatureToGFF3, ValidationResultSet, splitStringIntoChunks, } from '@apollo-annotation/shared' -import { annotationFeatureToGFF3 } from '@apollo-annotation/shared/src/GFF3/annotationFeatureToGFF3' import gff, { GFF3Item } from '@gmod/gff' import { getConf } from '@jbrowse/core/configuration' import { Region, getSession } from '@jbrowse/core/util' diff --git a/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx b/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx index 6172ed258..ed01d70dd 100644 --- a/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx +++ b/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx @@ -28,7 +28,7 @@ import { import { ApolloSessionModel } from '../session' import { createFetchErrorMessage } from '../util' import { Dialog } from './Dialog' -import { annotationFeatureToGFF3 } from '@apollo-annotation/shared/src/GFF3/annotationFeatureToGFF3' +import { annotationFeatureToGFF3 } from '@apollo-annotation/shared' interface DownloadGFF3Props { session: ApolloSessionModel From 781e33014785713f7898bff3cb542596eac1297f Mon Sep 17 00:00:00 2001 From: dariober Date: Wed, 27 Nov 2024 14:03:18 +0000 Subject: [PATCH 03/10] Test expanded --- .../src/GFF3/annotationFeatureToGFF3.test.ts | 49 ++++++++++++++++--- packages/apollo-shared/test_data/gene.json | 22 ++++++--- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts index 50769a82a..018837c48 100644 --- a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts +++ b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts @@ -1,34 +1,69 @@ +/* eslint-disable prefer-destructuring */ /* eslint-disable @typescript-eslint/no-floating-promises */ - import { describe, it } from 'node:test' import { assert } from 'chai' import { readAnnotationFeatureSnapshot } from './gff3ToAnnotationFeature.test' import { annotationFeatureToGFF3 } from './annotationFeatureToGFF3' describe('annotationFeatureToGFF3', () => { - it('Convert one gene', () => { + it('Convert one gene test fields', () => { const annotationFeature = readAnnotationFeatureSnapshot( 'test_data/gene.json', ) const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + assert.deepEqual(gff3Feature.seq_id, 'chr1') assert.deepEqual(gff3Feature.type, 'gene') assert.deepEqual(gff3Feature.start, 1000) assert.deepEqual(gff3Feature.end, 9000) assert.deepEqual(gff3Feature.strand, '+') assert.deepEqual(gff3Feature.score, 123) assert.deepEqual(gff3Feature.source, 'test_data') + }) + it.skip('Convert one gene test phase', () => { + const annotationFeature = readAnnotationFeatureSnapshot( + 'test_data/gene.json', + ) + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + const cds = gff3Feature.child_features[0][0].child_features[2][0] + assert.deepEqual(cds.start, 1201) + assert.deepEqual(cds.phase, '0') + }) + it('Convert one gene test attributes', () => { + const annotationFeature = readAnnotationFeatureSnapshot( + 'test_data/gene.json', + ) + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + assert.deepEqual(gff3Feature.attributes?.Name, ['EDEN']) - assert.deepEqual(gff3Feature.attributes?.testid, ['t003']) + assert.deepEqual(gff3Feature.attributes?.testid, ['t001', 't003']) assert.deepEqual(gff3Feature.attributes?.ID, ['gene10001']) - + assert.deepEqual(gff3Feature.attributes?.Ontology_term, [ + 'GO1234', + 'GO4567', + 'SO1234', + ]) + assert.deepEqual(gff3Feature.attributes?.Alias, ['myalias']) + assert.deepEqual(gff3Feature.attributes?.Target, ['mytarget']) + assert.deepEqual(gff3Feature.attributes?.Gap, ['mygap']) + assert.deepEqual(gff3Feature.attributes?.Derives_from, ['myderives']) + assert.deepEqual(gff3Feature.attributes?.Note, ['mynote']) + assert.deepEqual(gff3Feature.attributes?.Dbxref, ['mydbxref']) + assert.deepEqual(gff3Feature.attributes?.Is_circular, ['true']) + }) + it('Convert one gene test children', () => { + const annotationFeature = readAnnotationFeatureSnapshot( + 'test_data/gene.json', + ) + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) const [children] = gff3Feature.child_features const [mrna] = children assert.deepEqual(mrna.type, 'mRNA') assert.deepEqual(mrna.attributes?.Parent, ['gene10001']) - // Sanity check the annotationFeature does have a score, etc. - // assert.deepEqual(annotationFeature.attributes?.gff_score, ['123']) - // assert.deepEqual(annotationFeature.attributes?.gff_source, ['test_data']) + const [cds] = mrna.child_features[2] + assert.deepEqual(cds.type, 'CDS') + assert.deepEqual(cds.attributes?.ID, ['cds10001']) + assert.deepEqual(cds.attributes?.Parent, ['mRNA10001']) }) }) diff --git a/packages/apollo-shared/test_data/gene.json b/packages/apollo-shared/test_data/gene.json index a7806d835..572efcce7 100644 --- a/packages/apollo-shared/test_data/gene.json +++ b/packages/apollo-shared/test_data/gene.json @@ -10,7 +10,17 @@ "gff_name": ["EDEN"], "gff_score": ["123"], "gff_source": ["test_data"], - "testid": ["t003"] + "testid": ["t001", "t003"], + "gff_ontology_term": ["GO1234"], + "Gene Ontology": ["GO4567"], + "Sequence Ontology": ["SO1234"], + "gff_alias": ["myalias"], + "gff_target": ["mytarget"], + "gff_gap": ["mygap"], + "gff_derives_from": ["myderives"], + "gff_note": ["mynote"], + "gff_dbxref": ["mydbxref"], + "gff_is_circular": ["true"] }, "children": { "66d70e4ccc30b55b65e5f618": { @@ -20,6 +30,11 @@ "min": 1049, "max": 9000, "strand": 1, + "attributes": { + "gff_id": ["mRNA10001"], + "gff_name": ["EDEN.1"], + "testid": ["t004", "t001", "t004"] + }, "children": { "66d70e4ccc30b55b65e5f615": { "_id": "66d70e4ccc30b55b65e5f615", @@ -58,11 +73,6 @@ "testid": ["t012", "t013", "t014", "t015"] } } - }, - "attributes": { - "gff_id": ["mRNA10001"], - "gff_name": ["EDEN.1"], - "testid": ["t004", "t001", "t004"] } } } From c887747d29114fb93ecc51bc5fc543628395db3a Mon Sep 17 00:00:00 2001 From: dariober Date: Mon, 2 Dec 2024 14:08:33 +0000 Subject: [PATCH 04/10] Process CDSs, tests extended --- .../src/GFF3/annotationFeatureToGFF3.test.ts | 117 ++++++++++- .../src/GFF3/annotationFeatureToGFF3.ts | 197 +++++++++++++++--- packages/apollo-shared/test_data/gene.json | 40 +++- 3 files changed, 319 insertions(+), 35 deletions(-) diff --git a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts index 018837c48..bd7fd3a57 100644 --- a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts +++ b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts @@ -4,9 +4,10 @@ import { describe, it } from 'node:test' import { assert } from 'chai' import { readAnnotationFeatureSnapshot } from './gff3ToAnnotationFeature.test' import { annotationFeatureToGFF3 } from './annotationFeatureToGFF3' +import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' describe('annotationFeatureToGFF3', () => { - it('Convert one gene test fields', () => { + it('Test mandatory columns', () => { const annotationFeature = readAnnotationFeatureSnapshot( 'test_data/gene.json', ) @@ -20,14 +21,74 @@ describe('annotationFeatureToGFF3', () => { assert.deepEqual(gff3Feature.score, 123) assert.deepEqual(gff3Feature.source, 'test_data') }) - it.skip('Convert one gene test phase', () => { - const annotationFeature = readAnnotationFeatureSnapshot( - 'test_data/gene.json', - ) + it('Feature with no children and no gff_id has no ID attribute', () => { + const annotationFeature = JSON.parse(`{ + "_id": "66d70e4ccc30b55b65e5f619", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "attributes": {} + }`) as AnnotationFeatureSnapshot const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) - const cds = gff3Feature.child_features[0][0].child_features[2][0] - assert.deepEqual(cds.start, 1201) - assert.deepEqual(cds.phase, '0') + assert.isUndefined(gff3Feature.attributes?.ID) + }) + it('Feature with children and no gff_id has internal _id as ID', () => { + const annotationFeature = JSON.parse(`{ + "_id": "66d70e4ccc30b55b65e5f619", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "attributes": {}, + "children": { + "66d70e4ccc30b55b65e5f618": { + "_id": "66d70e4ccc30b55b65e5f618", + "refSeq": "chr1", + "type": "gene_segment", + "min": 1049, + "max": 9000, + "strand": 1, + "attributes": {} + } + } + }`) as AnnotationFeatureSnapshot + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + assert.deepEqual(gff3Feature.attributes?.ID, ['66d70e4ccc30b55b65e5f619']) + }) + it('Convert multiple scores', () => { + const annotationFeature = JSON.parse(`{ + "_id": "66d70e4ccc30b55b65e5f619", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "attributes": { + "gff_id": ["gene10001"], + "gff_score": ["123", "345"] + } + }`) as AnnotationFeatureSnapshot + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + assert.deepEqual(gff3Feature.score, 123) + }) + it('Convert invalid score', () => { + const annotationFeature = JSON.parse(`{ + "_id": "66d70e4ccc30b55b65e5f619", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "attributes": { + "gff_id": ["gene10001"], + "gff_score": ["xyz"] + } + }`) as AnnotationFeatureSnapshot + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + assert.deepEqual(gff3Feature.score, null) }) it('Convert one gene test attributes', () => { const annotationFeature = readAnnotationFeatureSnapshot( @@ -66,4 +127,44 @@ describe('annotationFeatureToGFF3', () => { assert.deepEqual(cds.attributes?.ID, ['cds10001']) assert.deepEqual(cds.attributes?.Parent, ['mRNA10001']) }) + it('Convert CDSs', () => { + const annotationFeature = readAnnotationFeatureSnapshot( + 'test_data/gene.json', + ) + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + const [children] = gff3Feature.child_features + const [mrna] = children + const cds10001 = mrna.child_features.filter((child) => { + const id = child[0].attributes?.ID + return id !== undefined && id[0] === 'cds10001' + }) + assert.deepEqual(cds10001.length, 2) + + const cds1_1 = cds10001[0][0] + assert.deepEqual(cds1_1.attributes?.ID, ['cds10001']) + assert.deepEqual(cds1_1.start, 1201) + assert.deepEqual(cds1_1.end, 1500) + assert.deepEqual(cds1_1.phase, '0') + + const cds1_2 = cds10001[1][0] + assert.deepEqual(cds1_2.attributes?.ID, ['cds10001']) + assert.deepEqual(cds1_2.start, 5000) + assert.deepEqual(cds1_2.end, 5100) + assert.deepEqual(cds1_2.phase, '0') + + assert.deepEqual(cds1_1.child_features[0][0].attributes?.ID, [ + 'cds_region10001', + ]) + assert.deepEqual(cds1_1.child_features[0][0].start, 1351) + assert.deepEqual(cds1_1.child_features[0][0].end, 1400) + assert.deepEqual(cds1_1.child_features[0][0].phase, null) + + const cds10004 = mrna.child_features.filter((child) => { + const id = child[0].attributes?.ID + return id !== undefined && id[0] === 'cds10004' + }) + assert.deepEqual(cds10004.length, 2) + const cds4_1 = cds10004[0][0] + assert.deepEqual(cds4_1.attributes?.ID, ['cds10004']) + }) }) diff --git a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts index 9caad6488..dfd4d340f 100644 --- a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts +++ b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts @@ -1,22 +1,24 @@ /* eslint-disable @typescript-eslint/no-unsafe-assignment */ -import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' +import { + AnnotationFeatureSnapshot, + TranscriptPartLocation, + TranscriptPartNonCoding, +} from '@apollo-annotation/mst' import { GFF3Feature } from '@gmod/gff' +import { intersection2 } from '@jbrowse/core/util' export function annotationFeatureToGFF3( feature: AnnotationFeatureSnapshot, parentId?: string, refSeqNames?: Record, ): GFF3Feature { - const locations = [{ start: feature.min, end: feature.max }] - // const locations = feature.discontinuousLocations?.length - // ? feature.discontinuousLocations - // : [{ start: feature.start, end: feature.end, phase: feature.phase }] const attributes: Record = JSON.parse( JSON.stringify(feature.attributes), ) const ontologyTerms: string[] = [] const source = feature.attributes?.gff_source?.[0] ?? null + delete attributes.gff_source if (parentId) { attributes.Parent = [parentId] @@ -24,6 +26,8 @@ export function annotationFeatureToGFF3( if (attributes.gff_id) { attributes.ID = attributes.gff_id delete attributes.gff_id + } else if (feature.children) { + attributes.ID = [feature._id] } if (attributes.gff_name) { attributes.Name = attributes.gff_name @@ -74,39 +78,180 @@ export function annotationFeatureToGFF3( } const gff_score = feature.attributes?.gff_score - let score = null - if (gff_score) { - if (gff_score.length == 1) { + let score: number | null = null + if (gff_score && gff_score.length > 0) { + if (gff_score[0]) { score = Number(gff_score[0]) - } else { - throw new Error('Unexpected score') + if (Number.isNaN(score)) { + score = null + } } + delete attributes.gff_score } - delete attributes.gff_score + + const locations = [{ start: feature.min, end: feature.max }] return locations.map((location) => ({ - start: location.start + 1, - end: location.end, + start: Number(location.start) + 1, + end: Number(location.end), seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq, source, type: feature.type, score, strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null, phase: null, - // phase: - // location.phase === 0 - // ? '0' - // : location.phase === 1 - // ? '1' - // : location.phase === 2 - // ? '2' - // : null, attributes: Object.keys(attributes).length > 0 ? attributes : null, derived_features: [], - child_features: feature.children - ? Object.values(feature.children).map((child) => - annotationFeatureToGFF3(child, attributes.ID?.[0], refSeqNames), - ) - : [], + child_features: prepareChildFeatures( + feature, + attributes.ID?.[0], + refSeqNames, + ), })) } + +function prepareChildFeatures( + feature: AnnotationFeatureSnapshot, + parentID?: string, + refSeqNames?: Record, +): GFF3Feature[] { + if (!feature.children) { + return [] + } + if (feature.type === 'mRNA') { + const child_features: GFF3Feature[] = [] + const cdsLocations = getCdsLocations(feature) + let cds_idx = 0 + for (const child of Object.values(feature.children)) { + const gffChild = annotationFeatureToGFF3(child, parentID, refSeqNames) + if (child.type === 'CDS') { + for (const loc of cdsLocations[cds_idx]) { + const gffCds = JSON.parse(JSON.stringify(gffChild)) as GFF3Feature + if (gffCds.length != 1) { + // Do we need this check? + throw new Error( + `Unexpected CDS: ${JSON.stringify(gffCds, null, 2)}`, + ) + } + gffCds[0].start = loc.min + 1 + gffCds[0].end = loc.max + gffCds[0].phase = loc.phase.toString() + gffCds[0].type = loc.type // Do we need this? + child_features.push(gffCds) + } + cds_idx++ + } else { + child_features.push(gffChild) + } + } + return child_features + } + return Object.values(feature.children).map((child) => + annotationFeatureToGFF3(child, parentID, refSeqNames), + ) +} + +interface TranscriptPartCoding extends TranscriptPartLocation { + type: 'CDS' + phase: 0 | 1 | 2 +} +type TranscriptPart = TranscriptPartCoding | TranscriptPartNonCoding +type TranscriptParts = TranscriptPart[] + +function getTranscriptParts( + feature: AnnotationFeatureSnapshot, +): TranscriptParts[] { + if (feature.type !== 'mRNA') { + throw new Error( + 'Only features of type "mRNA" or equivalent can calculate CDS locations', + ) + } + if (!feature.children) { + throw new Error('no CDS or exons in mRNA') + } + // In AnnotationFeatureModel we have `children.values()` + const children = Object.values(feature.children) + const cdsChildren = children.filter((child) => child.type === 'CDS') + if (cdsChildren.length === 0) { + throw new Error('no CDS in mRNA') + } + const transcriptParts: TranscriptParts[] = [] + for (const cds of cdsChildren) { + const { max: cdsMax, min: cdsMin } = cds + const parts: TranscriptParts = [] + let hasIntersected = false + const exonLocations: TranscriptPartLocation[] = [] + for (const child of children) { + if (child.type === 'exon') { + exonLocations.push({ min: child.min, max: child.max }) + } + } + exonLocations.sort(({ min: a }, { min: b }) => a - b) + for (const child of exonLocations) { + const lastPart = parts.at(-1) + if (lastPart) { + parts.push({ min: lastPart.max, max: child.min, type: 'intron' }) + } + const [start, end] = intersection2(cdsMin, cdsMax, child.min, child.max) + let utrType: 'fivePrimeUTR' | 'threePrimeUTR' + if (hasIntersected) { + utrType = feature.strand === 1 ? 'threePrimeUTR' : 'fivePrimeUTR' + } else { + utrType = feature.strand === 1 ? 'fivePrimeUTR' : 'threePrimeUTR' + } + if (start !== undefined && end !== undefined) { + hasIntersected = true + if (start === child.min && end === child.max) { + parts.push({ min: start, max: end, phase: 0, type: 'CDS' }) + } else if (start === child.min) { + parts.push( + { min: start, max: end, phase: 0, type: 'CDS' }, + { min: end, max: child.max, type: utrType }, + ) + } else if (end === child.max) { + parts.push( + { min: child.min, max: start, type: utrType }, + { min: start, max: end, phase: 0, type: 'CDS' }, + ) + } else { + parts.push( + { min: child.min, max: start, type: utrType }, + { min: start, max: end, phase: 0, type: 'CDS' }, + { + min: end, + max: child.max, + type: + utrType === 'fivePrimeUTR' ? 'threePrimeUTR' : 'fivePrimeUTR', + }, + ) + } + } else { + parts.push({ min: child.min, max: child.max, type: utrType }) + } + } + parts.sort(({ min: a }, { min: b }) => a - b) + if (feature.strand === -1) { + parts.reverse() + } + let nextPhase: 0 | 1 | 2 = 0 + const phasedParts = parts.map((loc) => { + if (loc.type !== 'CDS') { + return loc + } + const phase = nextPhase + nextPhase = ((3 - ((loc.max - loc.min - phase + 3) % 3)) % 3) as 0 | 1 | 2 + return { ...loc, phase } + }) + transcriptParts.push(phasedParts) + } + return transcriptParts +} + +function getCdsLocations( + feature: AnnotationFeatureSnapshot, +): TranscriptPartCoding[][] { + const transcriptParts = getTranscriptParts(feature) + return transcriptParts.map((transcript) => + transcript.filter((transcriptPart) => transcriptPart.type === 'CDS'), + ) +} diff --git a/packages/apollo-shared/test_data/gene.json b/packages/apollo-shared/test_data/gene.json index 572efcce7..3f5d27590 100644 --- a/packages/apollo-shared/test_data/gene.json +++ b/packages/apollo-shared/test_data/gene.json @@ -58,6 +58,19 @@ "attributes": { "gff_id": ["exon10004"], "testid": ["t010"] + }, + "children": { + "xyz": { + "_id": "xyz", + "refSeq": "chr1", + "type": "exon_region", + "min": 5300, + "max": 5400, + "strand": 1, + "attributes": { + "gff_id": ["exon_region10001"] + } + } } }, "66d70e4ccc30b55b65e5f617": { @@ -65,12 +78,37 @@ "refSeq": "chr1", "type": "CDS", "min": 1200, - "max": 5000, + "max": 5100, "strand": 1, "attributes": { "gff_id": ["cds10001"], "gff_name": ["edenprotein.1"], "testid": ["t012", "t013", "t014", "t015"] + }, + "children": { + "abc": { + "id": "abc", + "refSeq": "chr1", + "type": "CDS_region", + "min": "1350", + "max": "1400", + "strand": 1, + "attributes": { + "gff_id": ["cds_region10001"] + } + } + } + }, + "66e049f17b9cedae9ad89106": { + "_id": "66e049f17b9cedae9ad89106", + "refSeq": "chr1", + "type": "CDS", + "min": 1300, + "max": 5200, + "strand": 1, + "attributes": { + "gff_id": ["cds10004"], + "gff_name": ["edenprotein.4"] } } } From 3d18e6c0d525ba9f6d8cf22ace8a5ff1188b23c9 Mon Sep 17 00:00:00 2001 From: dariober Date: Mon, 9 Dec 2024 17:28:01 +0000 Subject: [PATCH 05/10] Temp commit: Need to fix handling of missing refSeq and proper testing --- .../.development.env | 4 +- .../src/GFF3/gff3ToAnnotationFeature.test.ts | 8 + .../src/GFF3/gff3ToAnnotationFeature.ts | 112 +++ .../test_data/cds_without_exon.gff | 914 ++++++++++++++++++ 4 files changed, 1036 insertions(+), 2 deletions(-) create mode 100644 packages/apollo-shared/test_data/cds_without_exon.gff diff --git a/packages/apollo-collaboration-server/.development.env b/packages/apollo-collaboration-server/.development.env index 4a7461026..5fcf32278 100644 --- a/packages/apollo-collaboration-server/.development.env +++ b/packages/apollo-collaboration-server/.development.env @@ -32,10 +32,10 @@ SESSION_SECRET=g9fGaRuw06T7hs960Tm7KYyfcFaYEIaG9jfFnVEQ4QyFXmq7 ############################################################################## # Google client id and secret. -GOOGLE_CLIENT_ID=1054515969695-3hpfg1gd0ld3sgj135kfgikolu86vv30.apps.googleusercontent.com +GOOGLE_CLIENT_ID=1000521104117-bhd8r4v11cc053g0b80ui00ss9s5fitv.apps.googleusercontent.com # Alternatively, can be a path to a file with the client ID # GOOGLE_CLIENT_ID_FILE=/run/secrets/google-client-id -GOOGLE_CLIENT_SECRET=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 +GOOGLE_CLIENT_SECRET=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 # Alternatively, can be a path to a file with the client secret # GOOGLE_CLIENT_SECRET_FILE=/run/secrets/google-client-secret diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts index 64ed0c7ed..e794889fc 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts @@ -197,6 +197,14 @@ describe('gff3ToAnnotationFeature examples', () => { }) }) +describe('CDS without exons', () => { + it('Convert mRNA with CDS but without exon', () => { + const [gffFeature] = readFeatureFile('test_data/cds_without_exon.gff') + const actual = gff3ToAnnotationFeature(gffFeature) + assert.deepEqual(JSON.stringify(actual), '') + }) +}) + describe('gff3ToAnnotationFeature', () => { for (const testCase of testCases) { const [description, featureLine, convertedFeature] = testCase diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts index cd4c368d9..dbdc79c28 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts @@ -154,8 +154,19 @@ function convertChildren( const { child_features: childFeatures } = firstFeature const cdsFeatures: GFF3Feature[] = [] + const exonFeatures: GFF3Feature[] = [] + const utrFeatures: GFF3Feature[] = [] for (const childFeature of childFeatures) { const [firstChildFeatureLocation] = childFeature + if (firstChildFeatureLocation.type === 'exon') { + exonFeatures.push(childFeature) + } + if ( + firstChildFeatureLocation.type === 'three_prime_UTR' || + firstChildFeatureLocation.type === 'five_prime_UTR' + ) { + utrFeatures.push(childFeature) + } if ( firstChildFeatureLocation.type === 'three_prime_UTR' || firstChildFeatureLocation.type === 'five_prime_UTR' || @@ -174,16 +185,117 @@ function convertChildren( } const processedCDS = cdsFeatures.length > 0 ? processCDS(cdsFeatures, refSeq, featureIds) : [] + for (const cds of processedCDS) { convertedChildren[cds._id] = cds } + const missingExons = inferMissingExons( + cdsFeatures, + exonFeatures, + utrFeatures, + refSeq, + ) + for (const exon of missingExons) { + convertedChildren[exon._id] = exon + } + if (Object.keys(convertedChildren).length > 0) { return convertedChildren } return } +function inferMissingExons( + cdsFeatures: GFF3Feature[], + existingExons: GFF3Feature[], + utrFeatures: GFF3Feature[], + refSeq?: string, +): AnnotationFeatureSnapshot[] { + if (!refSeq) { + return [] + // throw new Error('refSeq is missing') + } + const missingExons: AnnotationFeatureSnapshot[] = [] + for (const protein of cdsFeatures) { + for (const cds of protein) { + let exonFound = false + for (const x of existingExons) { + if (x.length != 1) { + throw new Error('Unexpected number fo exons') + } + const [exon] = x + if ( + exon.start && + exon.end && + cds.start && + cds.end && + exon.start <= cds.start && + exon.end >= cds.end + ) { + exonFound = true + break + } + } + if (!exonFound) { + if (!cds.start || !cds.end) { + throw new Error('Invalid CDS feature') + } + const newExon: AnnotationFeatureSnapshot = { + _id: new ObjectID().toHexString(), + refSeq, + type: 'exon', + min: cds.start - 1, + max: cds.end, + strand: cds.strand === '+' ? 1 : cds.strand === '-' ? -1 : undefined, + } + for (const utr of utrFeatures) { + if (utr.length != 1 || !utr[0].start || !utr[0].end) { + throw new Error('Too many UTRs') + } + // If the new exon is adjacent to a UTR, merge the UTR + if (utr[0].end === newExon.min) { + newExon.min = utr[0].start - 1 + break + } + if (newExon.max + 1 === utr[0].start) { + newExon.max = utr[0].end + break + } + } + missingExons.push(newExon) + } + } + } + const mergedExons = mergeAnnotationFeatures(missingExons) + return mergedExons +} + +function mergeAnnotationFeatures( + features: AnnotationFeatureSnapshot[], +): AnnotationFeatureSnapshot[] { + if (features.length === 0) { + return [] + } + features.sort((a, b) => a.min - b.min) + + const res = [] + res.push(features[0]) + + for (let i = 1; i < features.length; i++) { + const last = res.at(-1) + const curr = features[i] + + // If current interval overlaps with the last merged interval, merge them + if (last && curr.min <= last.max) { + last.max = Math.max(last.max, curr.max) + } else { + res.push(curr) + } + } + return res +} + /** * If a GFF3 file has CDS features that either (1) don't have an ID or (2) have * different IDs for each CDS, we have to do a bit of guessing about how they diff --git a/packages/apollo-shared/test_data/cds_without_exon.gff b/packages/apollo-shared/test_data/cds_without_exon.gff new file mode 100644 index 000000000..60471a656 --- /dev/null +++ b/packages/apollo-shared/test_data/cds_without_exon.gff @@ -0,0 +1,914 @@ +##gff-version 3 +##sequence-region 53cb6b9b4f4ddef1ad47f943 1050 9000 +ctgA example gene 1050 9000 . + . ID=eden +ctgA example mRNA 1050 9000 . + . ID=eden.1;Parent=eden +ctgA example five_prime_UTR 1050 1210 . + 0 ID=five1;Parent=eden.1 +ctgA example exon 1211 1510 . + 0 ID=exon2;Parent=eden.1 +ctgA example CDS 1211 1510 . + 0 ID=cds2;Parent=eden.1 +ctgA example CDS 1611 1710 . + 0 ID=cds2;Parent=eden.1 +ctgA example three_prime_UTR 1711 1800 . + 0 ID=three1;Parent=eden.1 +ctgA example exon 1201 1500 . + 0 ID=exon1;Parent=eden.1 +ctgA example CDS 1601 1700 . + 0 ID=cds1;Parent=eden.1 +ctgA example CDS 1201 1500 . + 0 ID=cds1;Parent=eden.1 +ctgA example TF_binding_site 1050 1100 . + . Parent=eden +##FASTA +>ctgA +cattgttgcggagttgaacaACGGCATTAGGAACACTTCCGTCTCtcacttttatacgat +tatgattggttctttagccttggtttagattggtagtagtagcggcgctaatgctacctg +aattgagaactcgagcgggggctaggcaaattctgattcagcctgacttctcttggaacc +ctgcccataaatcaaagggttagtgcggccaaaacgttggacaacggtattagaagacca +acctgaccaccaaaccgtcaattaaccggtatcttctcggaaacggcggttctctcctag +atagcgatctgtggtctcaccatgcaatttaaacaggtgagtaaagattgctacaaatac +gagactagctgtcaccagatgctgttcatctgttggctccttggtcgctccgttgtaccc +aggctactttgaaagagcgcagaatacttagacggtatcgatcatggtagcatagcattc +tgataacatgtatggagttcgaacatccgtctggggccggacggtccgtttgaggttggt +tgatctgggtgatagtcagcaagatagacgttagataacaaattaaaggattttacctta +gattgcgactagtacaacggtacatcggtgattcgcgctctactagatcacgctatgggt +accataaacaaacggtggaccttctcaagctggttgacgcctcagcaacataggcttcct +cctccacgcatctcagcataaaaggcttataaactgcttctttgtgccagagcaactcaa +ttaagcccttggtaccgtgggcacgcattctgtcacggtgaccaactgttcatcctgaat +cgccgaatgggactatttggtacaggaatcaagcggatggcactactgcagcttatttac +gacggtattcttaaagtttttaagacaatgtatttcatgggtagttcggtttgttttatt +gctacacaggctcttgtagacgacctacttagcactacggccgagcgcaataacccccgg +aaagcacttgctactgggaggcgggtttatccatcggcaataggggttatcagtactacc +aagaagattgtgaagatattaacagcattgaaaaaagttcggactgggcatgaaacgtgt +gtcagagttagagtccttgagggactgaatgggtttgtcccaggcccaagcttgaggtgg +atgtcacctcgggtactgcctctattacagaggtatcttaatggcgcatccagccttgtg +gctgggtctacgtacgcgtgggcaccatacgtatgttggcaggaaaggtcaatcatgctt +gtttcctcgtcgcagaaacgttcacactattggctcgcgggatcgaacgggcctgattat +ttttccagctcctgcgttcctatcacgccaactgtcgctaataaaatgttatatagagat +aacccattgctatgcaaggatggagaaaccgcttcacaacaccctagaattacttcagca +ctaacatctaagataccgggaaaaccgtaggtgccacttggttttgagggcaatgcctct +tgcactggcgattcgtggagtaaccttgctaccgatttccaccttttctaggtatgatta +catgcgatcgccattgtcagtcgtcgtacgaaatccaggaaggaattcgaatacatgacc +gaaagctatggcatcataagcgtggctctttactaaggacacgtgtagtcggctgatttc +gcgcagaacttcgctcaccggacagtgactgctgtccgaacttgggggcagcgtagattc +tagctagagaccgcagcgaaaatgccactcgctaggtggctatggacgtccagctatagg +cccccacattcgtatgtatactccggtaattgagtctacctttcgaaagatcaaagttca +caaaccttctctcacgttaacatagatgctcgcacatagctagtccggtgataaagcggc +cgtgcatgcgagtatttagcgcacggaacagatattaggctgcgaaataccctcatatgc +tacagcgcaggtaacagacggttgatctccccgtagaattccctcaaggccggatcgttc +tcatgagtagcctttgtctgacatttcctctcaatagattgcagactcctgtgtcgtggg +aaaacttcgcggaatcgctgtcgttacataaatctaaacagtgcagtccaagctatttac +taccacgtatcataatgaacatctctttgtgaaatacgccggtacgccgagagatcgccc +acattgcgtgtcaccaagacccagtgtctattgggcgcgctggttatagtatgcggaggc +acccgatgtgctgctaatcactacaaactcgacaccaagaggcgaccgcgtgcgggaggg +ctagaacgcgagagccacgtcaacgtggtggtaccgctcctgatgatacgagcactgtgc +tggcaccgctgccctatgtccagctaacaggtgtcacataatatcgcatctatagttgag +tatcttatcgctagcgttacttgtgagttctcgggtagaatcgtcgtagcattctagtcc +ttagcgtaagcaacgatatcgccgatattataggggcaataggtctcacccaccacgggt +gtatatttaacgccctaagtagttaacggagactattaaacaattatgcacgtgaaagat +tagggtactgcacacgtgtcacgttgacgatgtaaacaccataaaggttgttctgtgatg +cgatgcccatacctgcggtgcaacgtcctaaacttatgcggatcaccttctaattcagcc +gacgttgagaacgccagctatactctgtgagtacaaggtgaaagccccagatcaaaatat +tggagtcttgtccgtagctcttgggtggagagtgtgaccacgtttatcccttaatactga +gactcttctcgttccacgccccggacattcgcgatacagcctagtttgataggtggctac +aagggctctatccgaatagcacaaaactcagtcctagtgcgtataggtcgtgctctgcgg +ttatacgctccacagaagaaagtacccactgcagtttgtcggtgggacttagcaaaacct +gtaaccaaaaccgcacatggactctgttttggtggcgtggagtgtgcccgccgcctttat +gctgggtcaaaatactatatgatttgttgtggtcgggcgcgacattggctcttatggttt +ataactattattcggtgcacagaaccgacttatgccccgattttgcacctcccgagagaa +atggtactagtcattgtctcggggttttacaactcaaggataatctgccagcggtacccc +gaaaaatcccatcccgaacggctggtacctatgctggagaccgaaatggagccagtgcta +tgcaaaccctcgctctttctatttggcccgaccagctgcgagagtcattcgacatacatt +gcgatggctggttcatctctctgcttcggattccgatcctgctgcctatggccacaacac +gaaagagatcttggatacgtcctcaccatagtccaacggacaaatatggaggtgtacgcg +cacggatacgagtacgacgggccgcccaatgctggtccttcgtgttatactgtttacttg +cctccggtaagagtgtacagcttgtacccacgcggtggaggaccttacgcgtcgtccgca +ggtgtggagatttcgctacctgttgcattggggcctcgccttacgtttttttcgacggag +gcccgacccgccaggccagaccctcatcattgggatttttactgccttggacggcagatt +ctgatgctggtaacgcgcttggcacacaccctggtcgtaattacgtactatcacctctat +tttaccaggggcgggccccgcctcaacgatcggtagtctcgttagaacagttgcttatac +tccgttagcactcacaactcggagaatgagcttacacagtccgtaatagtctccacagct +tcccctggaaggttcgtacaggggccctggggtctaatgtgctaccgcctaacttcgtta +gtgtaaggtctcgtgtgcacctccaataacgctgcatttttgtatagaactcctttatag +gcctgactcttcggaacagcccactaattgacgtgcacgatgctgcatatacccttctca +atgacgcataccggtgactaaatcctgtggctggttgacactttgcataacccaccacga +aaacgcggtttgcacgtggttggaagcgagtaccggttaagggaagttacgtaaacccaa +catagtgagcgatgcttaaaacactgcatgcgaacaacgaccggaacgagagcctaatag +aaccaaatggataggtggttacgaaatccgcgcacaagtgtcgtgccactctcgtaaaga +atgattgcttgcttatctatgagaaacctaataggcacgcacgtgtcgccagggaaaaac +caccatccgacaagacgctgaaaatcgagatagatgcagcccgcgcgtactttaagcacg +atagccatcttgagcgcgacgattcagcttacggagattacttctcaaagcggacacctc +gacgctgaagctctctatcgatgcacttgcatgtccatctacggcttgagcgaaccatgc +ttaacgcttggatgacgttgttagtcggattcgatcgtaccatactgtcctatccatcat +aacttcccgaattaaaactcattttctaccctcatttgtttcattggcgcatatgagcgc +cccatgggcgctgatcgacacaggcttgtagcgcacggctcgtcttgcagtagataacca +tagtctttacgtcgcatatattaatatatcctcagacttccactgcggtattttggatcg +tggcgtctgggacagtcacactctttaacgggagcgcgttcaatgtctgagtccatccgt +gcagttcggtgcgattttattcttccctgcatcttccggattccttcttcttgtcggaga +attcgccgctcgatcaaactcatcattggtacttattgaccgtttcatgctattacaccg +gtacactgatgataacgctgcggttttatcccccccaatacgcgcacacacgcttctttc +ggtgcgtcctgtcgtacccctattgcgcatcatggcactccagccgagcaatgctttgga +caggtgtaaccaagctggatctagcgtgggttcgacacagctcggttcgtataaccacac +gcatgaactgcgagtgcctcgccccagcgccgctgtctacttgctgtttagaggaagaag +caacagcttgcacccaatccgcatatctgcatataggtggccgttcctccgtggcgcgcc +gctacgttgtagcgcacggatcaaaagcgctgtgttactatacactggttcggagacggt +tcatgacgagcgcgctatatgtcggcatctgcgccccatgagcggccgctgtccggcggc +acgaataatatagtgcaagaaaaaccgaagactacggttatatatgatggaacggccctc +acagcattctaacaggtttgacaaacttaataaatgatgggccgcgcctgctgtgaatcc +cggacggtttggccggaaatacctaggcagtctttggaaaagcttttcctagatcaccat +atcgttgtcagtggccaaggtttcgttaactctcggcgtacccagtatcggcgcaatagg +cctttgatcaacccttggaagattagtcgatcgtaacttcctacatcccggtgaaaggct +ttaattctaagtcttcgccacaaacgcttcaggagcgtcgagttctatcactttcgaggt +actgccacttactatacaccggtacacttgttaagcaagtgtttgcggatgtggttaaat +tttgatggcagatttctgggtgttttagctatagctgtatcctcgaggtcgtcgtttaac +cgctcctgttgctagatcatatgtatcgttgttcgagacgctagtagtcgctcctaccga +atcttacaggttcgatctctgctcggcttttgccgcgggccggtgcttgtgctataatta +taatgataagggcagtggccgcacaactgcagattactgacacttgagtgagaactaagc +cttgaagcatagtgttgaatgttgtagaaaagtatactgtgacaaaaacagggcctggtg +tcaagtgtcctcagtgatctggatatcatcacgccttgttagcaggatttacccgcatag +taatggccggactttatattgccctgctgcgctagctagtactgcgggggctctcttccc +cctattgatattccgggcagaatgcgcgggtagtcagcattcatgtcaggcttctatcaa +cgtctcattcacccttggagtgtgacctacgtgttagaggcaatgtagcccgagagcccg +ttcaaagacaaactcccgaattaaacagacaccggttatgggagtgtgagtagtgacttc +cgaccagtgtttggtttcagcctgtcggttaacctcgcagggctaggagaatgagctgct +agtaggtgattaccgaagtctccccagaaagggaacagtcttatagagtagagaatgtca +aatagcgttattgagttctgtctactgcactaccaagagcatgcccaccagagatgcgtc +gcagtcgtagcgtagacgtcgtaatgacccaggtggcgttcggcctattcgcgtcggacg +cgcgcgctactacaacgaagggttctgaaagtgcatgttcacactgaaattctaagtgtt +agtaaacaaacgcgtgtattccagggtcgtgtgtgacattatggctgtctgcgcccctca +atgatcatcaagacgttcaattgtatgttaacgtagatatcaggtttagttatccgtata +ctttttacgccgcgcgcttggaacagattctcctaacagccctcgcggtttcaaaaagaa +ccaaagtctataccatccttgttcccaattctgcctggttgcggagaaaagaccgcctcc +atacgtacccgactcggtattggtaagtggggaagcagtcgaacgcatatttcttggtta +tatcacaggccacgttctatatcggaagtggccggattacgatttgacgttctatccccg +agagcgcattcttgtttgttactactaccacgcgggcgcttcttattcccagaccagagg +gaatgtgcggaagctttttccacagattggcggaactccagcgtgcttagtgcaggcgga +gaacgtccgttcagtggtgcgtgctttatttttcaatctgacccgacctgcgctcaaggg +ttgcaagttgttgtgcgcccgagtaataggcgactcgtcgcaatgggtctggtacattgc +attttcatcggtacggcgcttcataaagtgcggcaaatttctcaccccagtactcatgtt +tatagggtatcaggacccgaagcttctctgttccaaagaaatgtactgtttggctcccct +gtccatattggaggtagatcacttgtggattatgcaatgaatgaatgaaagtttggtgct +ccccaccggggcgctctcaaagagagtgagctaaatttgaacatttaaattgctattcca +acccggagtcctgaccggaacagtaatgaaacttcaaccatgccggacagactagaagaa +gggaaagttgcttgtatatgggagtaaaaatgatgttggtgccgtaatggtgccggaagt +gactatagagcatgtcgtgacgcaccggtaggcagtgctataatcgtatgtccttcaggc +gccgccggacctacgaagctgaaattagacaccggcacactagccccgtcagcgacggtg +cgcggccgctgccctgcagcgaatggggctaacacgcataaaacgcccgcataaccactc +gagctacgggaattcactcaggctgttgcttcgacgtgtagtctcattacataatcataa +tacctccaagaccaacggctgctcatgactctcttaccttgttagggacatttcggcact +agggaagagctgaggactttgaaaacgtcgataaaaccatcgcgggaactagctgcgtta +gaactccatattttacgggtcgcaagcttgaggtcctgtcccggcagctgcaagtgctac +ggcaggaggggatctacctaacgtgcagtaacgagcccctgcccgtaatgaggcgtactc +gtctctaatcgtcagtaagttactatgtccgaggacgcctctacgagttgaactctggct +aggcccacctgtccgcgccctgctcgggtaccccatctgcttatccaacttcacctcgcc +ttacggaatctctggttgccagtcatccgatggtcattaagcagcgtggtacatcgtagc +caatacttcagggcgccagccatattcccagccaagcggctgcataattacagcgcctgg +cacgactaatcgatcccacaagcctggtagatgacccttagccctaaagcgccctctgac +ctatctgcacgtgatacttgattatttgtaatgagcggacagggtagatgactaatatac +agggtcgtctaggtttgcacaatgcagacatcatccgcgcaaggcccggacggctgtact +cacacagctagctccatctccctcagcagcactaagattcccacgtgaccagggcgacgg +gcctcagccaaacgtatccttgatatctacttaagtcaaggttgactccgaaccctatgg +gtcggtgccgttaacagggagtctatatctcggcgttccattgcttgtttcaaactcctg +ctataaggtgaaagcgctggagggcatagtttatgcccaaagttgcgcgtagatccgtcg +ggatatgtgctataataaggactgctcgaggtaggcggtaacggctcccgccttcagtag +gcgcggaactcgaatcggagttacaggacttgaccgagtcatatccaagtttatgttacc +cgatcggatccggatgctcgtatctgcgacgaggtcggaagacggacgaaatacgattca +accgcgcgaaccattagcatctaacctttagcctcaatgggtgttaacgtggtgggctca +ctcggcgtactctttgtgcaactattccgtatgaacaacagtcaagttgcgacatgatgc +tcttacgtgattcccacagtttcccacctcaggatgctttctttagctaaacccaatagt +tatggcggcaccttcagactcccacgaggacggctatgacgttgctaaaccattcgccgc +caaaggctagcgctaacgagttctgtgagtttgttccgggccagatctccaggatggtcg +caacaacgcagtaccggttttatactggtgaccctctacctgttattaagttacagcgtt +gtcctacgtacatggtcgtgagtactcgcgtagtcaaacgccaggactagcagcgagaat +tattgtcgcgatacttactacaatacttacccgatattgacgtgcagggttgaaagagat +ggacagttgaatatctattttgacggaatcctcaaaactccctccacctcaggtaacggc +cctgtccgggaccgcattcttgcatatattggttcccagagcgtatctaagttagtctct +tgaccgttcaccgactctagggcgactcgttatcgccctccgaaacgatgctttcgttac +ctcaatgatgacaggctgtaacgtaagtgatcccaatctcactcgtgccttgtccaccgt +tccgtgaagacgaagcaatacgcggaatacgtggcttcgtaatattttgacgatatgggg +ctgggacgctcaagacttccatgacaaacaaagtgaagagcaactgcatccctcatcatg +atcactattaccagagtagcgatggataacgctaatttggtcagggcagctatcgcatcc +cgcaggtgtaggcggagactttttcttttgttgcgagttgacaggtaatctcacggtata +agcacggttatttacgcaagcgacgtccctgggagaatccgcccacgtaggaccccataa +tccataaatactgcggtcgaaaccttcatatcgtgacagaaccgctttctagggatgcgg +tccccgcattcagagttctactttggccagcgtgagacttaacaactccacttacgcggt +acattgaagtcgttcagtccagtgtgacctgtgtaccgaataacgtgtagaccagcgcgt +ctacgacttagcgcggctccactccaaagcaccttttgggactttccaacgagcctgttg +gccgttaagcggtatttcacaaatagatcaccctagtgtcggtaaccgactaccctattg +ggatcatcgtgagctcgaaacactagaggcggaccaacggatgacatttgattcggctct +acagagcttgtcgccagagaaaaactgtggcaatctacgctcgcggggaattgactttag +cggcccctagacaggtgtgggacactagtctagattcacgtcctacacgacataacagca +ccttcctggccagcccagaaatagtacctggacgacatccagccttccgacgccataatg +tgagccgtagcgcccacgacgatcaacgaggagaaatttacaaaggctgtgtgaatgcta +cgtcgtctaccattgctcatcgaaacgaacgcaacgcacagcatacaacgtttaccatgc +cggagcgggatcctcaagtacagaaaacagaggtctaaacatgatccgaacaaatcggta +ggtttacacagctacctcgtccattggcgtactgcatcgatcgtgcttactacggtcatg +ccggcccgcgatgcacgtacgaaggaataccctgtctgcccccgcgcgagttacgctgtc +tcgcacataccgagcactgtcgttcgaagctaaactatgagcccagccgagctccttatg +gccgcaacgctggtgcggccagctgataaattccacagtacacgatcctcgtgtaagatc +tcgggcatagtaagtcatttcacatggttaggagagatagaatacatggttctggtagct +caaccaggatttgtggaacccttggcccttggtgagtgctacaataaaattctccgtatg +ggacaaccaaagggtgctggatgtgacttcccggcccaggttagatgtccatatcattca +tacattgcccgaccgacccaatgcctaaatcagaggcgccttagctagttcttgtagtgt +gccacgtccggccacgcagacacgaccctcggcgagtgatcaccattaccggattggcat +cgaagtctttttctgggaagttagccagtttggtgtgcggtgcttagaatcttattccca +gtcaaacgcccctgggacgaattgctaaccctagttgccacgccggaaccatcttcggga +gagtagacaaatccgagttagatatgttagcgtcttcgtgagtctgaaatgtatcacttc +accgcagaatacgcgaatgtctgtttgccctggactgacggaattggcttaaaagccgac +tagagcattttggtacggttcctatccgcgatgtaattacctatctaggttatcgctaga +cgaatagcgagtacagtgtagcaggccctttgttagcaagttgctctaaacagttgtcaa +aacgtaggcacaatagtgcgattcttctaaatccgggaagctcatggcgctgggcagaaa +tatcacatacgggaataatcaacctccattttggttcgttttactcgatgagtgcccctt +gcttgagacgagcgttctgagttgatggcatgtcgaaaggtttacgcggtgagtagagca +ctttacccctacagatcggaatcctcgaggaggacagttggaacttcacattaacctttg +ttcgattgcatgaaggttgtgttctgggagtaggctcccaaggtagcggttcatgctggg +ggcagccctaagtttgtattatgtgagtttgcgtctgaaactacatttagcatgaggaac +gtaagctttctggagggatcttctaaagccaggtatcgcccgctacgatgccggagccgg +tggttcacagctacctgtgctcaaaggcttaaggctaatcatagcaacagtgcgaaagga +cgtctttcagatttcgaaaggtgctgacacaacaagggtcagggcggtcctacccttctg +attccctacctggttttttagcaagggtcaaggctaggcttatactcccgaacgctttaa +acactatcccacccctgacggggggaagttgcgcgttaagtataagaataagatttaaca +gtacactttaggttcctcttccgcgagccgtcatacagcaccgagcgccgttgaaacgcg +attaacgcgtattgtcgtgcgaaaaaaaaacgctcgccagcatattggagtgtcgacttg +aaatattgaacaacaccgcatatcaaggacgaatagtagggcttcactacctccacctga +cggccctaggacttatactcgaaaaagaccttccatcacgatgtcccttaccggcgagag +ggctatatacgcatgaatagcagatcttgccgtcgctgagtgtcacccagggttgctcca +gaaaggagatagggcggagagccatcgacagcagctctcgtctaggtggtagcagctaag +gagtcgtgtcgtcgcgccgagttggaacattatcgatgtacatcaatgcagtaatgatgc +tgatagactcgggagtttcctcaaacccagagttacgagaagacgcaggtctatcagtta +gaaggagtcagtattggcctttgaaagatcttatgctcatgcccaatcgtagttaaacgc +gaatcgggaaggccaatctggcggttttgacccccggactcttaagacgtccaatgtggc +tagacataagtaacgaattactctataccgaggggcgggaaccggccaatttattacgag +agcacgagagccttgtagcggccagcacactatcctcgagtccctctatcctgagacgta +gatatacatatacgcctagagagaatagccgtctaggcttccgtcgccctctccgtcgtt +cgcgtgaaccgtaagtcttccgcattcccttcctcaagcgcgttggtgtgagagtggtat +tgaggcccagtcttataacgcatatacttgtgcactctattacttaccatgggaaccaat +ggcactctcgaatcatgctcacagctgagcaacggtgctgctcaccaattacatatgagt +cgtggtttagcgttggagcggaagatgaatttccatctgttcgcgcgcatcactaaccaa +tatacggttatcccagcgctatctagttctgaccgggttggtagcgaacccttttgcaag +ccggcttagtggatgtgaagtgggagtgataacttaagccgccacgttcgggggggactc +gtttatattggtgctggaatacgaacggcgtgattcgtagtcgccctaatcgggcgcgac +aacacatgtagtactgtcgaggcggtttaaacccacagtaggtactctatcagcagaatt +atgctagaagtttcacaacaactttccgcatgaggctcagcggcagcgtcgcactcccaa +tggccagtgccggtagcgatgtttggtggaattagttccttcggaatacgaaccggattt +aaagagcctcggagaacctaaacgaatccgtacgcatcttgcccaaggtgctgagccttg +tcgcctttctcagttccacctacattaatgcaatgcgttcgaagctctgaccgcaaacag +gaatcaagttcagacagagtgcaagagtttcgcaataattgggaacgacccacttgatat +aggtgcttttagagatgtgtgtacgaccgtccttcgagcatacctacgggttacaattgc +tccggtaagtcaaggcacatagaaaacatagccaactgagagtgtatacaagattacctc +atgtagactgaaatacacacatcgctttaagctctcaaccgatgtagaacagattttggg +cggcgttgacagcgtgcccgctcaccggtttgctcccttctcaccaaataaccatgagac +gactttggtgactggactgccagatgacgggctacaaccgttttggttccgaattcgctc +taactcaactaacatcatactatatgcgccaggatattctcgcggttggacccccctgcc +aattcgggttaaaaccactccccccatgtagggagctgccgcaattacaatatcgacgat +cccagatggacgctcacaaaatatcagtcctttcacgatccgctcatatagacggatgaa +gggactgaggctgttagatagtgacgtcgagcatggcgtagacgagcgcaaccgggtcga +ggcccgcattaccgtgacacccagttgaaaggatttacactgcttcattcgatatttacc +actttgtatgaggagctcaacctaagtcaacacggaccatcatacaggtcgccagtaatg +agaaggctgctgtgccatggagaagcgctgctacagcacacaacgaacatcttgcaatgt +gaaggagggtgctcttttgggatgagcctacggggatgtgtatccctgccctgtaggcag +ttgggacttagcgcgactatctagataactaaggcgccagccgcggctgtttgccgaagt +cgtgctgatgctgtacaacgaagggcgagcgtgttaacatgctacacgttgacctagact +agtccaagtctgaaagtcccaatttaggtcgggtagtacagtcctcggttccagtcccat +gttgtgccgacaaggacaagcgatcatcaaatcgactgaaattgaatcagctacctcaga +ccacattcagctctcggtaacatgggaggcttgtggttgcaccgtaaaagggggatagcc +catccatcctgtaaacctacaatcgcgcgtagcttaatacgctcacattagacattcgat +cgagagacctggtttcaagagccttcccttttgctttagtgggccaaatcgcaaccctgc +tcccctcccttacgccttatacacttcagtgcaaattcatgcgttcagcgaacaactgga +cttctgttgtacgtagtccacgggggcttattcattatagaaagccccctactgtcaccg +ttatatggttcacacatgagctgatcacctagagagtcgtcatgcacattcgcctaacaa +ggacatatgagtaaccgggaggggatatcttcgatttgcagcaccaatcgacgttgtact +ggtctattgtcggttaggtccgattatccgaccggcaatgaggcaagcccatctattcag +gaaacttaggcagttccctgtgctggcccgacgtcgatgagttaagtctatacaggccgg +ccgcgagtagttaacgagaccaacatagaactatcatactagccggcaatgatcaatagg +gtcttagtgccactgtccttcgagccctcgcctaatttagcgcgaccggtttcctattgg +cctgtggggttgcgggcgcgtccgctttaagaatggtccttaacacctacccggagatcc +attgcatagcacactctccccattagcctagacacgtcgtcgcccgtcgactttggctgg +aatttaatcgccggggatatcgaactttcacgccctttaacgacgaggaaacctaccgtc +gcctggctggtaaatgggtgccttacggggactcacgatgctgtgaaccgcccgccagtc +tctggggctcccaaaatccaggttggaattacggacctccgccggtactacgcattacgg +ggtggaaagtcctaagataggtgaatgaaagggcttcgctaaaccagtaagtcattaaca +ggacatcggcgtcacgtctcgcgggtttacacggcgcacaaatcctattcccatgataaa +caccttatgccaatccacatccctcgctgcctaataaaattgtcacacctgcgctactga +ctaacgtttacgcaatgagagatgaattccgacacccacgcttgttgcaagcacagccgt +atgggttctcgggtaaggtaacacgaggcacactccggcgcggccacttcggcccctccc +tgacgatgaccctcattggtcaagcatcagtcgaatgattcgtaacaaggagcaaccgac +tcagtagagagggtgaatctcacgcgctagcctgaggaccgcctaagtgcttgctgtgcg +tcgcggcagtcgcgaggtgcgggctaaagtaaataattactggtcttacctaattaaagt +tggtggttagggacaacgttaccactaccagggtgcgcccctgaattcgggtatcggact +atccagggcgccttttgcggccttaaatacctttttaatcacgctggagctagaaggcca +tcgggatagacggggttctaggtactcgaaaaaacaggcctaataactttattgcgcttg +gactaacctatgtacaacacgttgtagcatactatggaatgttactgacctacacggata +gcatttgttcgggagcgtcattgacctcccagaccctatgccgatctgcaccattcagtg +accatccacaagtctctaggtttagccaaaggtgacaggtcaagcacttgcgcatgtcca +ctagtcgattatgcaacgtctctgaacagttagcacttatctcccgccgtgtgactcacg +cagtgaacttacctatagcatacgcctactattgaatgttcgttgttctaacacagcgct +aatacctcactggcagaggtgcgcacgctcctagtatgggagggagtcaggtcagagtgt +atgagactgatttttattcccgataggggttcagtgaaatcgacctctcaaaagagaggc +gcagaattcgctgataagctctgctacgatcgctaaggcacgataagcagggcggtgaag +gttggagcaagataatatcacccggacgcgggtcctgtcgcaacgagcggccgtgagacc +tggtgcgcaccgtgtctcgatcggccgaatatcggaacccactgcgtgactcaaatgata +tttttgctatctgtgggggatttatgtcccggtagaactctgcttctagcagaggataaa +ctttacaaggacggcgaatatggtgtcagagcgccctaatgatcccgtgctattccgcgg +gccaccggccgttaatggacttcgggttagaaggatgggttattcatcttccacagaaac +gcccagccgcaatcgtgggtttactcgaccgcggttatctgcctacttagctgcattcct +aaaacaggattaaaaaggccgcgagggttgcgaacctatggttgagaacagagtcataga +gtcagatgcgcagggacggcacggatccacatggcagttaactaatattaataccccctt +agcctcgtatatgggcgtgtgcagtcttgtccactcgcggacgcgtatccgagcgattct +gtcttccaacgctattcgttaccactttggcacctctttgctaagcaggatgagaacatc +tcactcactacgagccttgactttcagcacgcggtacgagggcactgggcttctcgtcct +ttgctagatgtaggtgccttcccgccctcatgacgatgtcacgtctatcggtttattaag +gtcggggaccattacgagattaccccgcgaccttcgtccaaatgggatcacggcaacgtt +ccacgaggggcgcgttgcctgagccactcgggtatccccttccgatatccgcagaagtct +agctaggagcaccgccaagcattacccgattaagcaaagcctactcatcgcatacacggt +tcttggggtttgcgtcagatagcaacttccggcggcagtagagatgatattttatccgat +tgagcttgcccagcgcaccctcgggccccgtggctcttctcttaaatgtgccctgcacga +tgttagcggttcctactcctccccgagccctatgggtggacagactcgccctctgaggct +gcgtgttcaactggttccaagtccggcgcctgtggatctacgacgcgaccagcaacatta +taccaatgtctaggcttagctcgaaaactagtaagccttagaactagggtcgtagcttct +tctaaatgaagggcagcgtcatagccatccacgttagcgcttactcaatcgcctgtcggc +tgtcatgttaccgctgccgtaagttcgtgaacataaaatacaacacattttaggtttaac +aaggattgtttaccgccacgtactggtgccggtagtgaaacgaaaaacctcagcatcggg +aggaggagaacggaatccctaccaatcttatctctacttaaagaacaaagcgcgcggtac +atggcgcagacgagaatgaatccccgaacgggccgagttagactcctcaacctgtcgaaa +agttgcgatccgctagatcgtagagtacggacttaaaaaagaaaagaaggtctctacccg +agcggtgagcccaaccagttccaggttgctgccggattcttccacacggcggtgccatat +ggacgaccccgtactttgggcattctaggctacctactgcgaacgcctcgcgttcatgcg +caaagttttctctagataggcgcgctttggtaagcatagtaataggttcttcagcgtcta +aagcccgcccattaggcctggcgactctgctttagtggtaaagagatagcatcgatgtac +atcgatggagagagtattttcacgaaagtgcgcgccgtacactatttatcggcatgcgcg +ctaaactggagggtctaataaccagatcacgcaacgaggtcccatgtacggttcgcattg +cgaaagtatgggatactagaggttttatcgggcctcgcttgaggtctgtctgggactggc +gcacatcgctcgtcgcccagtcgatagcggggtgattaaataagaaatatgttagtgccc +ggatttggaaccaaccagtcccggtagcggtacaaaaagcctttcctgctagttctatgc +ttaaagcgtactcctgttacaatccgtaggcaacctgaagaggcaacctggttttccttt +atttcgactttgtttgccatttccaatgtgatactgtgcggcgaccttaatgctttttgg +taaaaccataccgagatccagcgcacgcgacattcagaccggtcccggtttggcggtcaa +cctcgtactctgcactgttcagctagaggggtctcctatcccgaggtaccggtcgtctaa +cgggtggttacaggggctggtcctactctaccaacagttaaggagggcaacaagttcgat +ggggcatcgtataacttgaatgccattggacaaataataattgctccacgtcccaaacct +caactgaccttgtacgcataggtgccacctgcacgcaaccgagctcgttcaagtgtcctt +cttagctggcgccgagtgaacctgcatctgagagtctacccaacgatctagtatcataat +gtctgtagccagcgactatgacacttagagagccgcctaagaaagtgtttgcggctcctg +ccccggtgcgcttgttcggtgggaggtcgttatatagaaccatggggatataatgaatgg +taacatatctgccacgttgacaagccgctattatgaattcagggttggatactattatgg +cccttcacggtcacttgtaggacggccccaaacaggattagtaaagtcggtggtctaagt +gtaagcgataggcaaactgcggttattcgcgtagaccgcttgatgatgtaaaataacagt +tttcatgtctttgcaattaccgcgtgtagtctgaggacttaaggccccttctcggatagt +gaaatagttcaggtccggctgttgacaggtgtcctgaccggaacgctttacagctcctga +ggagcgtagcgccctacgtccttcctgatccagccgcacccatagctagtaaacaagttg +actgcgagacacaaagtcaagggcctgggcaagcgcgggccggactgcttgtcgaatatt +gaagcgtactcgatccagatctgcgccattgagttaaaaggactatacgcctgtctccag +ccgattgcagaaaggacagatcctaatagaagaactggggctccattctctcaccgacac +cctcctattcctatcccactggccccattgctggtaacccaaacctctcacgacgattca +gcgctgcgattaaagagtggagtaaggttaaaatcggttggggcgagtagtgtttgatgc +aagatccattgattatcgccagtgggatggcggcaaaggtaccatacctcgagcttattt +gggtgcctcactcctcggggaaccatatagacttgcacttggcaaaatatggtcgggttc +aaccgaccttttatgcctggaccctctgctgagggcctcttgatcaaaatgcttgcgttg +cttataagttgccgcttgatgtccaggattattattccagcgtggaacgggaaatgatag +cttatctgttactcagggcgcctgaattagggttgccattacataaaggaacgacaaggt +gagcactgggagggcgcattcttataaccgggcatatgcgcggaacaccagcgttggatg +gcctacctccgcggaggtcggggtcgaaggcccccgtttcctggaacaataacactgtcc +cacttggtttgtaaagaacctccactcgtcatgacacggcctagcggtgttatcggcgac +gataatcatgggtctttacaagtgaatcaggcttaccaggcggaccaggaggaaacactc +cacgagtgagacctggatccggggggatggcatacatcccatacgctcactcagcatgca +cgttcctagcacatgctggaatcttgcaagctggacagagtgtattgtggcccttgagct +atacttctaacgccagatccatgtttgcaaacatagctccagtggcctattcgacggttt +agcctctgcaccatgttatggctcttataggtgttgggtcgacagtgggtgactagacgt +aaagagagtatgccgtggttgaaccgagacctaaaaagctcaagatcacgacggatatct +ttaacggataccagcaatcggctaagaccagtgtctactactcgtgatcggcaccggccg +cccgttatctggtgtagtagtcagccccgcatcacccgtgcctatgcgagccttgactct +atcagtctaagagcacacgtttagacctgagccaagcgtaggtcactatcgagtcaacct +taaccctagttgattaatgctcttgacgtgtatgattacaccgaccgcgaataaactctc +acgcgcgtgacggtttcctcgctagaagaagccatatactgtacacactgctgtgaccga +caagacttacctttggtttacgtacaggtggacagttgtttctcccctcgggtcgatagc +gtttggtcgtgattccaataagtgccagatgtcggaacaacatgtttccataaagacgcg +tgacatgatatggtttgctatgaagccactacgaaaatgccagtgcttctctttatattt +tgcagatgtggaagaagtgcatccattatagtccttttacgtacctggattctccgccag +cactgccactactttcttactatccgttggggggggacgctcaacagagcgcggactcag +gaatcagataacgaccggaacggggcatatgcccagcattcgttgttgtaaagcgattga +gtttgggctgctaaggggccggcagttcgttccgggatacccgcaaaatttatgcctttg +tccggtgattctaggtttagctcgatccacgaatacgacaatacaaggtttgcctcctct +ttcgtctatcacgcaagcttagagttgctaaaatttcgtagtccgtgcgttaaagctccc +tacatttctaagctgtacgacaccacactatcttgcaacaagagtcactctatttctcat +atccgaatgaatatcgtcctgtacacgggtggcaatctcggcaagatggccgagatgtgt +agcactgtcacgttcaaaagctcaaacttctgcccactgccagccctaacagaatctgga +gacatcataccagcgctgtcccatgtggcggagagttgcttcaatagaacgctatcaccg +tgggatcacatacgtcactactccgtgtgacactatttattattcctttacctccaatac +gcccgtccttgcatcgcattttgtagtaagacatcagacgacgttactgccacggaacca +taaaacgtgctgagcccacgtaaacgtcctatcggtgaggtccgctcatcgtacgactct +gaaatttggaaactaccagtttgttggcggtgaatatccaacctgtcggcatacacgctc +acgcgtatggttggtacatgcgaaaaactacccaagcaccgggcgcgcgagacactatga +gtttagtgatgatttacggagcacgttttttgaactcaacagaccgattgaatccttcaa +acagggttactcgttcgtgacaaccgattacagcattctgaacgtggtacgtgcacatag +cttggagagttgcgcgaatctctttcgtaccgtattctacctgatcgctagctttccggg +gtaacgacatcggcaatgatgagtagcagccagtatccatactggacaggtactccatat +aaaactcccgcttcaacacgcgagatttgaaccgcacagtgattgtgctgagtcctagtc +accatcccgatcttgtacacgggtaggggtgtaacgtgggcagatgatgcttacccttcc +gctggcttgtaaaggtgccccgcttcacttgatgagctgcgtcgacaagaccacccaacc +cgagccacgtggttccgacgactcaatgatttccagtatggtccaatcgagcaccttcca +ctcatgaaatttgagcactgttggggatcgagtatgtgttttgcagtgggtctgtagcat +ggaaagatgtaccctaacccggctctggcattatcaacggtggattgggtggttaaacag +cttcccttcggagacttaatggactaaaacgaatgaatcagacgccgagaacgagaactc +accatctggcccaggagccatatttttgattggtaaatcgctcgcatagtgcccgatacg +atgcgtacttgagcgtaaacggcgcagcctctaatcataggtgattgctaagtctacggt +catccccagtggctgacgaggaattaaagacctatttccgtacttgttgcgcgccttcaa +ttatccagcgtataccccgatggctactgctacatgtctaaagtgactagccagaaaaac +ctaaccctcgcccacgaggccttgatcatctcaatcatagagtaatgttcattgaattgc +acggctcttggtgtgcacattgagacaatttctaaaccgacaagtttaatggccgagctc +ctcctgcgtccagctggaccttcatgcaggcatgaaggtccatatatattgtcctcccat +agcccgccgaccgggtctgactcaactgtgttttcgctatcccaggctagcacttctatt +ctttgttacgtccagtcatagtgttactatagggtaattttagtcatagtagacggccgc +tttttcgtatggcccgagaccgtccaccggctacccaattaagtcacatccggatcttgg +gtctagatattcctatcgaaaatagtctcgccgcctcactgcgtagttcagggggcgtca +cacttgttcgcggcttttcctcatgggatctttacccgatggttgatgcaataaatgtct +acaccggactggcgtgtccgagacgactttatacacgtgtgacgagtagatcagatcgta +cgaatggtctgtctcacctatcccagtgggaggatggaaaacactcctgcctaccgggtc +gaattatttacgcgtgttacaatatgtaatttagaaaaagggattgctggtcgatgcgtc +tccaagggattttttatctaaaagcatccttttgggtgtactctgatcgcacgtcgcaga +cagcagtgggttttgacgcagtccgtaggcccacagactcgtttgttgtttattaatccc +aggggagcgttgaagccacacctattctgtagctgtttgaaaggtagctagcccggatat +tactcaaggtgactcccttcagaatcacacgtcgctggagtcgccacagggtggcatata +cgagtgatagagcaccttactttcgaggtagcggtacattagtgcaacgatgaacccact +atagtcttagtgatttcatgttttacttacgcgaaaacgtggggttttgtcaacacgtat +acgttgaatgcacatgcctcatcctaaactgatgcactgccacaagtctgaaagagcgac +agtctgcaacatagcggaaggttacgcccaagccagtggtgatcccccataagcttggag +ggactccccttagcgttggatgtctttgccccagcggcctcggtgtacgggttctccacc +ccactatggtttggaactatgaagaggtacggcaacctacccgaggcaccaaatcgtgaa +cctacgcctatatatacggatagcagggtatccattcttaccatgagctcgtaaaccact +ccgctgaattcgatgggctttggcgcacatcaccgtttctatcacagatctgtcaacgga +atctaacgctatttactcggcgcacacagatcggaaaacccaactgtggcgcgggacgga +ctccaggaatcgttacgcgttatcaccttcggctaagtcttgacgacattagagctatat +ggtattaatagtagctgtacatcaaatgataaaattatctgaattataagtgatagcgcc +cacataatgacacacacgttttagatagttagtagacgctcgagactttgcgagcaagaa +tggactgttaaccttaggggcgggttcccgcttacaaaagctgattcgctcctagagatc +tataagcgaagttcaacccctaattaccattgcataagaccgggagaacctgtgacacct +gttcccatgggattagcgaaggattgttgacttattgccgagtgacctagtattgattaa +aaatgcgacgtgaagtaaccaggccttcgatgggcgtctccgaatgggccagtaacaacc +atatacaaactacaagttaaacccgaaaccgtggctaccatgctcatctagtcgggttcc +cgatggaacattccacggtgagcggccttggatgtcggaccaccatttggcaggacaacc +gtcacgattcgcaacagcggttaaagcaagatggacgccaataatgatattcgaaagtgg +ggttaaaactggccgccgcttgatcggtcctacttagtagcgtctcagatgctagaacca +ttccgagtgcaatgtgtgctagcaacagtggaatcggtctataacggtttcatgatccct +tctccggccatccattgagaacgattctacattaatctgttaagcgaccgccaatcactg +attagctccagaaaacctaagattaccgtatataagaatcaacctgctaaagtaggaagg +cgcaggttgccgatgaacagatcaagttcatacaggagctcttaatccaccgtacccaag +acctagtcaagcgggatgtcgttaacaactttgtacagacatttgggagcattgcaccac +agactccaatgaataacgcatccgctaggaccggatatagactgaagaaagtatagtcag +ctgcctcctaaaggaggcagtagtaggagccaacctaacgtcgtggatacgcattactcg +gtagcgtggtaaacacacgaacccgatctcagaattttagcggcgagaagttccataaat +catatttctacttcgtgctccggcttcatctgatggtggtcattattctccatatgcatc +cagtagagcttgcgcttcacgcaaagttccaggatgctcacatatgtccatacggcgcta +ctacatttactcgaacacttggaagcccgccgtcgggtgtatgactagcctttgactgag +actcagcctcgcaattgtatgtaactagcgtatcccggcaaagtttataggtccatatta +acaacagatgggagtagagcagcagcttttattcggactggcacctcatcgctttcgctg +tctgcgggatcaccggtctctcttgaacgtgttagagcctgagggatctgagggcacaca +cggcacttgtatccggaaaacctatgtatagagcctcggttgccagtactagttcgtggt +ccatagcgcctgaggggattaaacgcctccggaaacacgcgagttgttcttgtgcacgta +aaaatcacttgatgttatcgtgttcggccccagtcgctgtctcacttcaaaccagcgccg +tatccattctatgcaattacgattatacttttgacccgatctacgaatcagttacccatg +aatgcgacctgcgataacatacagtgcgacctccagtcctgttcgattaaatcgacagag +ggtcccgaacgcatccaagcggcttgggtcccgctggcaccgctgggaagtaggtagaga +cgtattgaaggaagcgtgtacagattggtgattacggtgctcttataggaccagactcgg +gcgattctacggggtcgtccacactatggacccgagcatcgaatcagtatgtcaatctga +agggactaatcggggagacgctggactccgtcaggagtgaccagaagcagggatgcaaag +acatcccggacgtggatagactacaaatcggacactcagatacatacctctagaaaatac +tattccccgtttctattccgttccgcggccttttacgtttagcttggagcaaactatata +gccgaggagggtactacaacgcgacgcacttatatcccaatgaaagaataaaagctggta +agacaacgctagccactctacctgtgttgcactcgcgatattagggagacaggttacaat +cagctgacacatgactagctgacccggagcactcaattctgcccatcggggatctgatca +aattcgtcgtcgcattggacggcttccctgcgtagcgaaggaatggttaccgtacgcctc +ccaacacacggcggtcaaaatagtcttcagcaacagtaacacaaatctcttcctagtact +cttggtacacggccctagagttaattgtgtgcccccccatactctctacatgtgataaca +gcgacccactaggttgccaaacagagtcggaagtcctatcgagtccagggcgctgtaggg +cctacagactctccgatgtcctgtgttcttcgtaattgcaatattttcgggacccttctg +ctgttgtcagattcagtctgttccatcttctggaaatgttgaggaactcgaaaacgctaa +ctccctgatatttgtggccttacatcaaccgtcctcaggtgaagcccaatgctaaaggag +tgaaccagacgtgcaggaagaagctgttgaccctttaaagctctagttcttgtcaggtgt +tccgactcgctggcgagagtatgatccagtaagcggcggacctgcgaccacatgatgagg +tccctacaaaatcctatgctccctgcgaattacaactcacagagaacagcctaggctttc +ttagttattgatgcacattctactgacgaacgcagcattcgaactaaaccattggtaatg +taattgtgacacgtgggaatctatttaaagctgcaagaactccaccacgtgttcatccac +atcggtctctgtggaatggtccaggaccgtcccaatagggggaattgcgagacccaacta +atcgagtgattgaacatgggagcaattcccgaatagaaacttgcaacgcgcagtactacg +acgatggtagcaataacgacgcgctacttcagctcatgggtctaaattagggcgaacgat +tgcacctaatctgctggcttctctagattgtagatccacagggccaattaacagtgcaaa +gaatagcgtcatatgattagtttgaaaataatatacatgaaaatcgagcacccgcatcaa +taagctacgagagtctttggagagtgccaatacacctagcacatgctgtgcttatgttat +gaaaattcatacttgactaacgttagccaccagccgatggcgctgtcacaacgaccctgg +gttaccgtttagttctctaagaagggtcactctaccagcggggttaaatataccggccga +ctgtctcgagagtgggttgagataactcaatttggatcagcattaagtctagagcgattc +tcgcggagcgatctatgcgcactgacttactcttggaatgtcactcgcggcttacatgcc +tccttgttccgcggattgaatattttatgcggtagtcatgactttaatcgtttctacaga +aaagaccgtttgaaatggcagaggaaacaaccctgctggatcctccctaagtcacatccg +gacggacagattctacttaacctccaataaattgagaaaaatgcaaaaggatgccaatac +ctatagtacattttacgtttcccgtgtggttcgtgccaacccctggacggtggatgtccc +cggtgggtttttggaccgggcgaaattattggccaacccggaaacccaccgagagcctaa +gacgtgcatgatagttataagtttaatgggagccttaattgcaaccgatgggggcgggta +tttgtctcctacaccgacactatggttgttatttgcggcttcactcaactacaatctggt +ctgtagagagtaaccccgtattatcttcccttgcgccctgggtgcgttagcggaagtatg +gggattaaggggcgtgaacaatgcttctaagagcccggcgctaacggacggtgtcacaga +gtgtctaggcagagtccccactgtggaatacgcatgcgtagcgaaccgcgcgagtcagta +ggaacacattgggagcgattgttcctcctatcagccggcctagcaagcatgacgtcaaac +cttagtgagcagcccaagaaataacgctcggactgcaacagatctaagcgggaaatatat +cgatagcgaggaagccgagaggactaagcagagagacgaagaggtgagccggagtgattt +agccccagatggcaggtatagcttccgttcacaacgaattgaacagaaagtccggcgccc +ctggaatttcgaaacgtcaccaggttggccgtaaggttggtaactcaacaccctaagact +agcggtcttgcgtcatgcaagcggggcgtgtagcctgcacagacataatacggtagggag +gttaggcgtggggcgacacagtaagatctgatgaatccatgtacgttgcctccccgtcgt +acgcagttgatacttcgcatcatacttagcagaaagcatcatcgcgacctctcgtacaat +ggcaaaaaggagggcttcgtgcacggggtagagtcttccgcactctaatgaatcaacagt +cgaggttcatggccacaaatgtaacgacgcgatatgactcgttatgtctaggtcccactg +cggatctccttagaagcgaattgtataaatctcatagccccccccttagcaagtggtacg +tagcccacacgatttacgcagagtggggtaagacctcccccacgacttggggaacgcgcg +ctcaccggatagctactgccactgtagagccactagcgcatagtgtatccatagtgatgt +acgtagccaagcggggagtccttgtcttatgactagcgccatggggctatcaactgagaa +cggatgcacaaacggtccggccattcgcacagtccgccggtcactaatgaccggtctgca +tggggctgcaacgatcgcccaaggattggaaattgttaataagagtccagtggacaattt +agtaacgcgacgcgcatctctccccgcctaacaaggctcatttttagcttgacggcgcgt +ctcacgttgccgggctcagctcgagctcacgtgccgcgcggggttacctcaggtttgagg +aatgtgtcttgaacggttcgtaccggcttagcgtcgggtgctcctcagggttcccagcaa +cattcttaccgcaatccaatactgagggcgctaccaatctaacgtatcactgacccagta +aaactcggtattacgggggcgatatatgtgcttgcctcaacatagaactaccgccttgac +gttagaacgttaagttagagcacctgcccatctggattgcggcgataaaatcgattattg +tcacctggatgaggaatatggcggacaaacgatcgtgtggacctgcgccgcaaccgtggg +atgccgcagcagaatatctacagagcgtttgcaggagggccacgcggtatccacgtaagt +tcgcgcaatagcagcatcatctcgttcggggttgctgctctcagactgtcgatacgtccg +aataccagcaacccctcgaggctttgtaatatttacggagtcctaagggatttggtgccc +caagtatagttttgccgaagggggggcacagtgccgtgactacgattgggctattgggca +acccgaccccgtgcgcacacgtttgatctcagacagagggcaaaaagacgaagccacgag +ctcttcgaaatcaatctacatctatgctgggataagttctggtccacacctagatccgag +aatggaccgtggataacgagcaccgttacctttgaggcggcagcacttttaacaccgtaa +aagtaactctatagttgtcagcctttaaagattgcgttcccaatacggtacgcaccgtct +actaccgtcaacacaccgtagcttaggtccccccgcggcagcgcttccatcgtggagggc +tgtcccccatcctccgtagaactgcgtttaccggtctggggagactccctaaagaaacac +cacaggtgaggtggacgggaatcaagttaacgcattcacgtaattcactcctgttcactt +acataagagaagttctggttcgcgccttaagtgccatccatgaggcatacgatgcgatgg +ggaccacgcctgtagactagagaaacaccatcaagttgataaggagtgaaaattagtata +ggtctggcccgctcgtctgttgatagggcctttaaatgtaataggcctccgctctgaacg +agtccgtacttatctgttggatagtaagaacacggttctcagtccgcaaggtagtatatg +actacgcgtcgatggacctaaagatacgtttggcatcgtcctgagcagcttatgaaaatt +gctgcgcgtttcaggtggaagaatctgtgttattagtgcggggacggtagtttcgtaggg +aattatgggtagtacagatctatggacgggaggtgactgtcagcagactgtgcgcaggcg +ctggtggtataatcgctgggaccacgtactggcaagacgcatgcagcctggtgacatcca +caccgccctcgctcagataggacggacgtgcggcaataccctatctggctcctggacacg +gaaaaacagcagtttaaaaaaaagtgggttttgtgcattatcactttgtccgttcgacag +tttattgcacctcccagcatgctagtaaccccttgctctgatttgagatttattatcgta +acacggagtcgacgtgaagtttgatctgcggagaataggccgagaagcccctacgtaccc +tatgattcccgccatgtgcggtgtaaaattaaccactacttatggatttcatccaaatta +actgacgcattatatatacctcgagtgtgtaagccggccctcgcggtaccaaatttcggg +ctgcatacctagaagtgtatgtcatctaaacctaaggatcgggtagtgacgtgtaactaa +cacgtgggacaatgtctgtcctgagctacgtcttaagaagtatttagtcctacgtactca +ggcatcgatactccccgatgttgggtgcagtggggcgtccgggcggctaagatagtccag +ccactccttatgtgaatttatcaaaccgggaagaccagtgcataggccacgacctacgta +gcacctccgtacaaccttgctccagggcacgtctaaatgtcgtactgggttcggactcca +tgcctggcgactagcctggcggtccacatattttacaaatccgagacaacaaagcacttc +caaagtcgcgtggaccttaaggaaggcaacacaggggacatttagccactctaaagaccc +gcatactccagggcatacacgtaacgcgttctcttcactacggagtccgcgagcgaacga +ctaagggcgagggccgtagatctccttctcgtaaatataatcaaggttgccagcgcttat +cggtgccaccgatccgaagagcgtaaggacgttacagtgtaggatacatggtttgcgccc +gtccatacataatcgaccatcaaatcgaaagcgcgtatgcagtacctcgtactgcccaaa +ggacagagcgcattacgtcaccccgagcgtagcttaccatgttaagaccgtgctcatggt +ttccacccgtaagatgagcacagattgcttgctctaggtaacatagtaataagagaacat +tcatagacgttggaccacggttgaaagaccgcctcttaactactgaaaaacaatatttgt +ccttagtaaccaagatacattaatcaatcctccgctaatgaagttggtaggcaagccttc +agtagcgtaccgtggatttgtgtttactgcatttacaccgcggctgggtacgaagcggtg +tcgggcacgcacacgcgttcgatactgtaggtttcgttggggttgcttgttgccgctaac +gttcgggtcgcttacttctgcaggcttgattactgagttcattcgatcggttcccaactt +cggactattgcgccatcatcaatgtgggaaatcatttttaggaatacacctcgtacatgc +atatgaagtctgcgtgcaacagactttccatgatgcgagctgggcttcccgtcctgcggg +atgcaattggcccagtgcgcgaaggctgggctgattacggacatatctgctgtcctgaca +atcgatggtgtagagcgtaacaatcattccaatttttcgacacgggccagggctttaggc +cactacgcacctgggttaacactcggcgacccgcctaacactgggtggacattcccgagt +tataacgacacctcattttcaacaaccacaaaaaggcataggcaaccgaactaacctttt +gtttagttacttctggtatgttcatagtgtagggcaggtgatcggttaaacgcttgtacg +gaccccattctgtagcgcttttctgaacgataacactacgttgggagatacttgttgcgc +agtcgtgggttaaaagcgagccgacgataaactgcaataaggaaagcggacctgtcacct +cccgcgggtaaatctactcgctcgtttagaggccgtaaagcgacataacggtgtccagta +caagcccaactggtttagagaattcttctcctactacattcaccgtccgtatttcggcgc +gccctacggtattcgtggtgttctgagcatacatgggatggactatcacccctgcgcgta +aggactagcagttgtaagtcacgttaaggttgcgtaggcaactgttggagtgcgtccata +cttttcagcaccccgaatttgtacgcccaagttgtatcgtcgtgctgggacgagactcag +cctttaccctaagcgtttaggaaagcgatgttttaccgcagacgcatatgagagaacgtt +gcaagatcgatctttctcgcgcacgttctgtagctagacctgcagctaatctaactgccg +aactgttaggatgttttctctcctagacgcagggaccagttgtaagtaagggtttctcat +cgctaggctagggtgatggtaggtgataccaatggagcggcgcgttaaaagggttgtaat +ctcggtgatatgtgcaccatagattgttccgttattttccttggtcatttagatacgaga +taggggcgccccagtgccacgaggtccatcttatgctgttgcgtagctagagccctacaa +ctttctaagtaaaacctccatcgagtgcgcttggagaatttcatgtaccgtggcggtacc +ttacttgatgccgcgacccctatctaattaaagaccgcttcccgatgggcgaccttaatc +ttgctgcagtccaagttaacttcctctacgcccgatgacaaatttagccggagtaagttg +ttaacctagcctaatttgtaccgaattttgggttgtgagaagaccatttctacccgacga +tcatgggttcctctcaactatacctagtgtcggttaatgatttatggcaaacaatccccc +aatgtagcacccatatgggcgcaattatgtgaacttcaacactaactattggtttagtta +gtgatatttagcgcaaataagaggcatgggctgcgaaaagattgtgtttccgcaatgcta +gggaacctgacggcatacccgagatgatagttaggtgagcccgaatcatgcttgattaac +gtaacgtctatcactacccgtgggcttttgcgattgcgatgcgttactcaaggcatactc +aatggggctggtaataccgatatgggcctgtgtcttagcagcgacgtgtcgataatcgaa +gtacaatacgccagtctacgttagggctttgctaacacatcacccacagttgaagcatca +cgcacagccctcattaggttgtcgtcaacctggaactcctagcaggctgggcagtatggc +atccttcacgaacaatcttaagagacgcacttgacacaggttgcgacagtcaggatgggg +agcttctgaaaatctctacataaggtggcctcgtgatatcgtattaactcggagagcggt +tggactaatgagtgagtccaatcagtgatcggtttttaatgccgggaccacggcttgcgg +ctgcgcgatctatttccctcgcgttacacaaggggtaggcataagcaacaagcccaaaaa +aatcctcacgcaggcgtcaggtacagactttttccctcagcaacgcaaggtacacgtgta +gtcgaatgtggtcataaccagcagctgtcttgcgcaagggcggtgctacacgcatgtttc +actccacagtgacatgatgcccgctcttcggttgttctatgcagcgcgctggtcgcacct +taagcccagttcaatccatgcaattcaccgttgcagcactagatcatataccgcatactt +cgtcagagcacaaccagatcacttacaacgaccgcaagctaagcatccctatcgtgcgta +tgtgtcgaggcgcaagtactatggtcggatgaggaggtgtgccctcaggacacacagtat +cgctcctcattcgacgacgcaagtcgtctagttctatgataggcgaagtgagtactgcgg +atcactacgcccaagacttggagtgacgacgccggctactagcgatggcagttcagaatg +gtgccttccaagggacactcggtcctaatcaggcacacgcgtctcaaagccactactcat +tcatgagctcgctggcgtggcagtggtcttatgcggctagcgaccagaatttctactagc +cttaggcgtacaagcgttgggcaggtcgcgagtaacctctacactctatcatggaccaca +cccatccatgttttaaacacagaggtcttaagaatagcgatggtacggttgagaacgtgg +gagttttttgctctacagcaagcacgttgcagtgaggagagctttaatcctaaatcataa +gtgatactggtgccgagcatctttcggacgaatatgaagacccaacgctaatgtgcctgt +tcgtcggcacattctcttgggacacccgctacccacatgtatgtgaattgggcccgtctt +gattgtgtcgtagatgccaacggttcaatcagccccgacgacgagaagcatttctacgtc +tccagccttgcaacggtttctagagtcttgctgttcgtaaattgaagaatctactgtgca +gactttatcgacccacgatacgctaatccgagcgcgactcggtctcggaaaattatccga +gggagctcgttcaatgcggcggaggctctgagtgaagtttaaggctgagatagtcacacg +cattcgggtccctacaccttgtttcgaatgtccaacgcgaactctgggcaacatcgcgag +actttgaaagatgagggggctgtgacagggcctttgtaataggggtactttaagggcggc +acaatgtgatacctatgtattcctatggtctgggtcagttatttaaattgaccggacaag +aatctgactcgtagactgctgtatagatcacgattagagtttggcaatggttctgaaagt +gatcatgtctaacgtaccggatgacactagtaataactgcagcagcgtgacgatgtaggg +gacttactctgtttacgggtctcgtctgccataaacatcctgctccggcagcggtcaaag +acgtcctattatcgcactcgggcagaaactccagtcatggtgaggcacaccaacagtgtg +gggtcggttcgtggttaccaagccaccatttttcggcccaaactcatcttgaatagtcgt +cctcattcatttgatcgatcgtcattacaaggttcataaatgcccagcgccgcgtctccg +gccagatggaagagctacgtgcaggcggtaagcaattgacgctatagccctatatacgta +tgtgggactcaaggcgacatgctataccgactggtatcgaggcgaacgacaattgctatg +ggtcatcacctcatatttagattcagcccctggtggcgtcgctttgaccaactagttaag +gttgaggagttcgctctcaaacctgataacgagctaggccatgtaccaaccttggctgtg +caccgacgagctgaaggaccgagctgctaggaggtcgcagtatcgattgctctttcagcc +ttctaaaggtgcgacagaaacttgaacctctcggtactctaacactaggggtacgagtgg +ataactcgattacattagtcctggtacaaaagtcctccttccgggcgccttaagctgctt +taaagctatgtccgcttacgagaaaggggcgcacggtccgatctacttacagactgtcaa +aacggtgtagtcggaatgggctcgaacgacgctaatagatacacgcgcattggcatggta +cttcaccaatactatatctttaaagaagggcggcgagcctatttacccacttcgattagg +ggataacacaggtgattgagttttctggaaccgtcatctaatagccgaagcagtccacat +tacagtccattgcatttaaccactaggaatcgcgccaatcttgcgcttatgttgtttagg +aggggccacgctagatcttgcctatcctgagttatatgagggacgatcctacatgtatcc +tcaaagttcgcgctcatccttagattctaggagatggattactaagtgtgtgtccatagt +ttaacgcaacacataatgttcgtgtcgtgatgcgtatccttggtcccgacctactactct +agacctatacgatagcgttcaaagctccaccatcgatgcaacgtggtcgtaccagtactt +atgaaaccttttttcgcagttcgaaccgcctggaaagaagcatacgcaatatcgtgtgat +cagcacgcagggtatcagttaaagagcccaagacttagtctatcgagcagaagaccgatt +agggtatacaccatgtcagaatcgcctccagactagtggctgatatcgtacgttacacga +ggtcgctcttgtcgagtgctcgaacgatctatcccctagatcacgcgtcccagtcggagt +atcccgcctcggatcgaaatgacggctaaaaggtgtaatcacgcgcgtaattcctgcaag +ctctctgagttctgctcgctggttcgaatgagcctgcaggcagtacctacaattcctgct +cggaggacctgttgcgagggatgcccccatacactgctgagggtaagtagctgaagtata +aaggcggcaatcttctgacaaacacccttcttccccttggttaaccagacaagctggatt +tatcacccctcccggggacactgccctgttttatatcgcggacttgcatcagtcgcagta +gtccgcgtagatgtggggacccctcccaccttacatggcaacgcgtgatggagtagtatc +tcgcgtcagtgagcgagttcgcatatttgtagggcgtcataacaaatgtaaatgctagta +agtgaaagatgtcaacatgcgggctcggttttgcgctccaggtatatcctgcatctgatg +aacttttcaatgaccgtgtctggcgcagggctgtcgttttgcaggaggggatgcccagcg +catctcgagaatcatcggctaatcaacgcgaaaaagttaatcttctatcagtcagcgggc +tataactccttgaccaacagaggatcgcaggtatttgtactgtggagaaacaatcgtaag +atggccggccatgaggcaaggattgtaatataccggggtggttatcgaagaagtaaagta +aggcctaccttctggggagcaatggggttctcagacgctaactcttcgttcacctacaac +acgattggacttagagagggacctacggacccgaaattcacataggggggttccccgata +agccaaggcaggatagagtctgaatccatggacttccatcaaacccctgtggtggttccg +ttcaagtcagggagtatcacgactcacttactgagtcacgtcctcaatatgactgaagtc +gataagacgaaggcttaatttacggaacccccgtttagtaatcaagaaacgcattggcgc +gggtctaacgtaagctcttcacaccgccgtctacgaaccagttcccggcgttgcatggtg +atcgccgggttcccttggagggagtaccttgtgccctgcccgtatggtcgttcctaaata +gcgaccaagtattagttgactgttgcgacttaaaatccaacccataagcatatatcgtgc +acccagggactggagggcctcagccccttctttcgaaacttaaaccgtcttcttatgggt +gaaatgtaagtttctacgtctggaataaacggttcgggccaccatgaagcgtctcgtgtg +atagagattcgcaatgagtctgggaagtagaatcacaccaagttagaaaaattctccaac +tcagctaccgcgacccatccgttggggtaacgcaagggtattagccgctcagaggccaaa +gattagagaaacgcgacgcctgcggctgtcttgtgactactccgaccatgccgtcagctg +gagaccctacggtgggtagttgggtagagcgccgatagctaattctcgagtggaactgcg +gaatgtgtcatatgaggtagcgatattgggctatcacaagtacgtgagcctgtgaaaacg +gacgactcactagcaagtaagcacggccctggcgtggcggcggtacgcgtacaaacctcc +actgcttggggaataaggtgcgaacaccgtgatccattgaggcgtggaggcatagggccg +gagtgttcccgtaccaattacgagctaagaccgcctgtccagccctggtgattacgtagt +aagctcggtacaagcctgtaattagcttccgggtggcggacattcgttcatattgccagt +cacggcagaggccgcgctagctaggccagccccgatgaaacacgtgctagttttctatcg +tgcctaaggatcgatggctatcgcggcattgttaacttatcaagcagccgaatcggcgat +agtggacacaacatgaacacgctaagctctgagtgccagactctaccgcggcgaatccta +ggccctgctggggctcccccacagagcagtaatggtcttctgcggccgcgtgcattcaat +ccgcgctggtcaatgcgttccatcgtgccttaagcggggcagtgtgataagacaattttg +ggggtttcatggagatatcttcagaaacctcgcgaggtgcgtttagataccacacgcgcg +gcggtcttttcacacaattcagtccgcggccggtaacctaactttgcttatcaatacata +atcctcccaaatgtacggtttcgaaggagatacctctcccctgttagactctaggttttg +cagtgctacctagagatcacaccagcattaaaccaaggtatcagcatcgtaacatgggga +ggcacgcgcttggcggtaagtacccgccacgattactctgtttcatagcgccaacgcccg +tatagtgtgcttttaacgtcctcagaaatcaccgactgatcagtatggcgacatcggagg +gaggttggcgaaatgttgcttttatagatcgttaaaacctatcgttggcgcgtatcgcct +ccatagaacttcttactcacctagtctgatcccgggactggtcgaaatggagacgacgta +gcttgagatctggatcgatcgactgatatcacacttaaatatgatcagattttactacgg +tcgatctccatatatctatcccagcacttgatacctcctgtccagccgactgcttggacc +ctagcacgcgatattagagggcgcgttgtagctcaaagtaaccagtgggacgacccgagt +gaggttaccgctggcacgtaagggtgacatgactcccccagaaacactgaatttgtctag +cagtacatcataatctgagaacatttagcgaagacgagaacaagctgtctcaatggacgt +aggacgagaggaatgtgacggttataatacgcgacctctatgtgtctattaaatccctac +gggccgtagcgagcaagctcgggatctttagtagcggatcgggtcttctacatggcgcac +caggtttgatatttcgagctagagttgctctaaagaaacgctcttgcttgggaaagtgac +ctggggtacgacgaagacacgggcctggttgcataccgtctcaccttcattcatcgagcg +cgatagcgtagcattatccagcacgggaggcacctggcatgggcgtgaaatacaatccga +agagcagcatgaaaaaaaaatgagcggtcgtgacccgagggtgcgtccctccgaacgttg +ttcgttgccaaggtaagggacccacgccaagcatggttagggaagcgtgaccgacacaac +atagaggacaatctcaatatgccttctgggtatcacctgggtgtgtagcagtgctactgt +tacaagtcgcgccatctgcgacgtcgttcgtccctattccctcctaaaggccccgactct +gaaaaaacctacaatcctagacacgttgtggcccagcacaacgcgtctgctatacagcaa +gtcattcgtcgaatggggcaacacgtataagaaggccgtgattccaagggcacatggcgg +gtctctcaatgatctacgacgtgacgcaaacggggaaaggctggaccgttgtccggtagg +cgatccatcataaaaggcgtctggttctgtggttattgtgtacacgtttgaaatatctga +ctgtagaacgatggaggtctctacgtagctctgcacgattccctcgggctcacgttcatg +gcaaggcgacatcacatgaacagggttgggtaacagccatctctgaagcattcccccagc +tcgcacggctcctttacccgctactacgtcatgcactggctagagacccaccaactattg +tgcaagtaaagaccataaaattttggagctgtagcgaaagcgagttgctgcgatggttag +accaacattgggtcaaactattctaatccggctaggctcattaagcatcgcagattgccg +atactaatgctaacgagcgatgaatgggcctcttcgagacgcgtccgtacctgcgaatat +caaccatgaaccctaatctggaaagtttgatgccgctgtgatagccactcctgtgtcgcg +acggcaacgtaatgtacgtgcttggtttgagctctaaacataccttcgttagagttggtc +agaggaggatattgtcatagcgctacaaccagcgccaaaggatagctccgtggctggtcg +gacgcgctagaatctaggcagaacacgtgcggcttcttgcactctgacgctcacgcattg +gactttggcggtcatctggttctcataagtccgaatcacagtagataaagatccgcgtga +taagatatgcctacgtacttaagtagcaccctctgttcttggcgatcctatcattgcggg +ttctatacatatgttgatccaaccatcggccgccgggccgtcgtaggatctaatggccaa +cacatcaatcaaatcactcgcgaggacgcggaggtcccgtccataggtgtgtatgttggt +acgcagcacctaatctggtctggtcaagcaactggcttgttccagtatataatctcagat +agactgccctgaaagtagacaatgataaatatgtaagagcggggaaatcccatcgtcgac +tgtgttgcggacgattagcttgcaaagagaggtgtgagcggctgtcttaaattatggact +cctcgcttccgtcgaacgtctgactagagaatctcttggaggtacacacgaacgtaggtt +cctcacagtatgtggcatggacaccggacaggtcacttgtatttatccattgaacgtgat +atagtgtgtcgagtcgttatacaatacatactccaagcgtactccctttgggtaacgggt +ttgggtacatattaccatggtgtggaccgatttgtaaccctataagcgttccgggtactt +taccatcgcggatgcgatcgcggatctacctatgtcagcgacatactgcccgtccaaaag +tgattcagtatgtgttaatcagcgtattgcatcatcagtcattgagagtggaccgttctc +gaagagcaagattcctgaccctttccactgggccatccagtacacccaattatatgcaga +cgaaaagctccatacatatataatacctcttcacaccgtgtgcagagaccaacttgatcg +atattgtactcactcgtacggaataattagaagggcccgactgtcgagaccgttagcatt +tgcattatcgcggtgagcgtatagaaacaattataggctttgctgcgatcgcagtgtctg +ggacagagtctcctagacctagatcagcttaaaacaatgattgtaggtaatgtgcgttgc +cgtcgggccacacctttcagttcggttacaccgactcagaattatcttctatcgggaccc +tcaaactgataatggggtcaaatggtggtggtcccgtctttctgcagtgctttgtgagca +gaatagctctcgagcgctccagtacgcctgagtatggctgcgcccaccaattggcgccgt +taatgcgtggcgacacaagagaaccagtccacataagttatgcccttaagaagatcaccg +ggtttgcgccatcctgattcaggtaacggacgtccgtgtacacgaaccatactaatgagt +tctacatcgctccaacgcgtcttgaaccctatcgacaattacaaatgcaaagtattagtt +gggtcagtgcgtagcttatccaagggctgcagtaagtcgtccccatgacagacggacggc +ataagggccaaaaattccgccgcattagctgttaataatgccatccagaaccggacagtc +tcgatatataacttacggctgcgaactggcccttatgaattacatccacgtgctagttaa +caggacggatcgcccggactaattgccgatcggccgtcccattctccgtacgatgtactg +aacccaggcgtgtgaatgggtatgtcaacgtttacccatcgaccctgcctatagacctga +gtttcattccagttgagcgtagaatgggatgagcagctttgcttgaggtgcgatattcgg +gagacctggtcagcaggattgatggttttatagtgctggattccctgctgaaattctctc +ttctgccgcatgtgaagaaggccttgcttcccttccacatgattgtaagatttcctaagg +cctcccagctgcgtggaactgtgagtcaattaaacctctttcctttataaattacccagt +ctcaggtatttccttatcgtggtatgagaacggattaatacagttgtgtaagccagctcc +ttataataaatctctctccctctctctgcctctcatctatgtatgtatgtatctgtcatc +tatctcctataagttgtttcttttgagaactctgactaacatagacattaaataaacaat +ttgtatactgaataattatataactactgcttgggggtgtggttgtgttgtgtggtgagg +gggtgtagaaatgtctaggggttgatctagcctaacaccttccagatgaccctaggttct +ggcctttccccaaagagtcctggtctactcagtcatctcccacaatttggccatcttctt +gggtgctcaggacaataaatgacccagccagatcctctagagtcgacctgcaggcatgca +agcttggcgtaatcatggtcatagctgtttgctgtgtgaaattgttatgcgctcacattg +cacacaacatacgagccggaagcatacagtgtagagcctggggtggcgtatgagtgagct +aactcacattaatttgcgtttgcgctcactgccgcttttcagtcggggaaactgtcgtga +cagctgcatttaatgatcggtcatcgcgcggggagatgcggttttgcgtattgtgccgca +ggtggttatttctctcaacagtgagacgggcacagctgattgccctcaccggctgggctg +agagagttgcagcaagcgtacacgcggggtttgcccagcagggcgaaattcctgttgtga +tgtggattccgaatcgggcaaatgccctataaatcaaagcatagtccgagatagggtttg +agtgtgttcagttttgggacagaggtcactataaagaacgttgactgcacgtcaagggcg +agaaacgtcatcaggcgattgcactacgtgacatcaccaatcagttttttgggtcaggtg +cgtgagacttatcggaactaaaggagcccgttttagcttcggggaaacgcgaagtgtcgg +aagaaggaaaacgaagagcgggtatcgcggaatgtacgtgctgagcgaaaccaacgcgcg +ttacggtcaagcgtgtatgtgttttggaggtaagtttccctgataaagcggaacaggggc +tatgtgtttacgggggtcgcatggagtacaagggggcaggacaagatttgcgcgacatgg +ttttttttaaattgcgggagatggttggctgtcccgatgagattagaacctgcttttgtt +ttagtttcggttcggtcagttcggactttggattggttttgttgggtatttaagaagggt +gtttgtgggatgtttttttttgggtcgagcatcactggactatgtgtgtcctgcgtatcg +tggcggggcctcgaacttagaggtaggacgattccgatgcaacaaccttatgctttactc +caagcgatggtgaggcaaccttgtacagaagtgcaaccgccattgtggctgggtcggccc +aaggaacggatattctcctgggtactgacaacccaggtctactgggttctatcgacggta +cagtcaagcctcgagtttactgtctcccacttaagtccgctctgggcattaatgagtctg +tggggtatcaagggtcacccaacgaagtagctagtcccgttgtacatttcgcgcgttacc +tagggagcacggtgagctacgagggtgtagctgcacacagcacggtatataggaacgcaa +acatagtagtatgtccctatgcataacagggagaagtgactctccgacggtcatttatcc +cagactctcctcgctccctactaattgttctatatacaatcggagtaaattggtacctct +ctgacaacataggggggttgaagtttatgccacgcagtctgttacacttttgccggggaa +ttgtgtagaccatgactcaaggtggtggtgcgagttactctactatctactgcttctcat +tacacatctcttggcttacaggccccagttatcagttacgaggctgccttaagagaaggc +caacattcatgatcagatccccgcacacgggatgatagtctgagatctgccggacgctcg +tctgctacaagtgtgggttgagcatagggtaagcctaagggctcgtaatggtacttcgcg +atgtttattcgatatgagctacaaggtataacgttgccttcaccatttcggctcacgact +cttacctcgcctcaccgctagacaccctgaccaaccgttagggtactgacaaccgatcag +gcgactcctaaaggacctctcagcctcccagacatgaggtccgctcgcatgctgttcagc +gagtccatatcaggaacagcaacgagacaaggcgagagcaacactagacttggagacata +ccaccacatctgtttactcttgattcaggaagacctgtggtaactgggcgacgcgtgcgt +ttatccgtcttccaatctccacagaatctgctccgtcagagggatgtatgactaacgcac +tgattttttgttcagtaaccgggcgcggattacggggcgtattccaggggtacattcaac +gggaccccaaggaaaagttcggggtatttaaggaggcggtccaactctcggacttattgg +gcgacgtaatgataagcacacctctgctcatacagtgcacgaagaccggcattgttcaaa +cacagccgtccattccgcgactattgtcctgtgtacaggtgcctcgggtcccaaatccag +cttatacccaaaactctcccggacttttttcgtccttgcaggaggaaaggacctgactcc +acgtttattaggggatcgttgggcccggacaatcgtgtattcaacataataaatgggttc +tagtttaaagccggaggtgacggtggattatggactagaggtaaacatttccggcgtagg +tcagaaagaatatagaccctaaatttacgcaaaacgtgcacacgattgtaggtggagtcg +tattaggaactcagggcgggtggaacgagtgtgttttaggcttaccatgaaccgtcgcgc +ggaaaatgatgacagaatggacggtgcttttttgcggggccaacaaaactcgcttatgtt +cgccattggatgtaagtaccgctatactacgcagcgaccccttacgacattgttgatttc +caatgctttcatggaggtttgtgagcggtcgctaggaatcttcctccagcgagtagccag +ctggccactagttagcaaccagagttcatatgagctccagtaacacgaaggtacataagg +aacgcaagcaagttcgggactacatagaacttggggtgcgtctaagccggggatcttaat +agacaagaggccatcgtatgggtcaaatttcatgtttcgtcagatgccctatccgtggac +acctaaatcaagcatcatatcctaatgctatttgctctcgctttggtatgattcatatgc +agccctgtccgtaaaatgacgggccagaggatacctgtcactacctatttcagattacca +ctgttcgaaaccttgctctgaaatcgatcttcgtagagtgttcgttaatcgccgcgtgaa +ttatataatcccccaaggagtacccttcgtcggtgcagtctacgtaactttcccaggtta +gcccccatctaatcgacgtacgtgagggggctacgggagatcgcacaagccgaagcattt +ctgctctgtttcgttacaactggaatcctcatcacgcagcgctaccggatgccatcccta +gtcatccccgcgagtttaaccagtcgagatggctcgaaccgtgtttcgaaggctggctat +tgaaccagttataaagcaggggcgactgtccgtcggacttcgatataaactatcacaaat +ttggcataagtatatacagccaagaaggctcagataggcacacgcacccaaatagtgacc +gcctacgttcaggttgcccggcaacgaagcgcgcttgctccattaatccactatcagagg +taacgaccctctttcaccgtagctgcccccatgtcggggcaacagtttcccaccgataac +gtgttctgtctagtacgtgaaacggattggctaacccctgtcagcctaattagatgacat +cctcgtacctctagagaaggtgaggagaccgctggaacataagtaaacagtttaccaaac +aatgatcatacatataacagaaaggagagatgcccggtggggtcgaagatacagctgtgg +ttgatatctgcaacggccaggctgtcatgaagatcacgcgcacgaagctatttatgaaag +atacccatgccggggcggcagcggtacctagtcctaaataccgacacgtagcaacgcgtc +ccctgaggcccaccctagtcaagggaacttaaaatatacgctgactttgtctaccaatca +tcttcgattaacaccggcatactaagtctaccctggacccagcaatgagaaaaacttttc +cgaatgatcattgtgagtttacagagattaggaacaccaaaaaaatttacgtacgtccgg +ttacccctgtgtgacccttggccacaaaccgtaaggtgcagcatgggcgttgagtccagg +gtttgtgacaccagacatgaataccctgtggcgggttgttagcatgttattcggatccga +tttcagagggtccgttgataccgggcagtacaagacaccccacaccactgaggtggtacg +tccgccagtctgctccctacgttatgcaaaattccttccccggcgttcttgcctgtacta +ctcctaaagggccgtgaaatccgacgttgctgctccgtctcgggacacccacacaacagc +tacggaaaatcgtgaggatgcacaagggtcttgcattgttgatctatacagctcttcgca +cggtgtgctactgttttccacaatgggcaaggggcgtgtaggtcttatgtaggagtaccc +cgtgatctggctataccttgctatctattccaccaaaatagccggggtcttatagtttta +ttccggatctatcaagtgacaagtccatggggtaaacggtaagtttcgtacgctgggcgg +tgatccccgcttataaacgagcaaaccgccaaagcaaccattgccgggcttccagctagt +aaatgttgggtgtcaatcgtgcatgtgactacttacccacaaagggacgcttgaagcgcc +tgaactcgtcacgtcatgcctagctcccggttgaaacacgaagacgcgtgaacctatctt +tgcttactattcgcctcctttaagaggcttcttgatgtggctctgcgacatggacagtag +tagttgtacctgggtatgttagtgaaattacaagtacctcaaaaacgaattacgtgtata +gagattatgtcactccgtcac +>ctgB +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT +ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATG + + + From 6cf24fd1e84a1de725711badfa15a34f735ea1eb Mon Sep 17 00:00:00 2001 From: dariober Date: Tue, 10 Dec 2024 15:47:27 +0000 Subject: [PATCH 06/10] Add test - need to test with spliced UTRs --- .../src/GFF3/gff3ToAnnotationFeature.test.ts | 5 +- .../src/GFF3/gff3ToAnnotationFeature.ts | 47 ++++++------ .../test_data/cds_without_exon.gff | 1 - .../test_data/cds_without_exon.json | 74 +++++++++++++++++++ 4 files changed, 104 insertions(+), 23 deletions(-) create mode 100644 packages/apollo-shared/test_data/cds_without_exon.json diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts index e794889fc..67f799d24 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts @@ -201,7 +201,10 @@ describe('CDS without exons', () => { it('Convert mRNA with CDS but without exon', () => { const [gffFeature] = readFeatureFile('test_data/cds_without_exon.gff') const actual = gff3ToAnnotationFeature(gffFeature) - assert.deepEqual(JSON.stringify(actual), '') + const expected = readAnnotationFeatureSnapshot( + 'test_data/cds_without_exon.json', + ) + compareFeatures(actual, expected) }) }) diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts index dbdc79c28..1d8a6b1bb 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts @@ -183,21 +183,23 @@ function convertChildren( convertedChildren[child._id] = child } } - const processedCDS = - cdsFeatures.length > 0 ? processCDS(cdsFeatures, refSeq, featureIds) : [] - for (const cds of processedCDS) { - convertedChildren[cds._id] = cds - } + if (cdsFeatures.length > 0) { + const processedCDS = processCDS(cdsFeatures, refSeq, featureIds) - const missingExons = inferMissingExons( - cdsFeatures, - exonFeatures, - utrFeatures, - refSeq, - ) - for (const exon of missingExons) { - convertedChildren[exon._id] = exon + for (const cds of processedCDS) { + convertedChildren[cds._id] = cds + } + + const missingExons = inferMissingExons( + cdsFeatures, + exonFeatures, + utrFeatures, + processedCDS[0].refSeq, + ) + for (const exon of missingExons) { + convertedChildren[exon._id] = exon + } } if (Object.keys(convertedChildren).length > 0) { @@ -210,15 +212,12 @@ function inferMissingExons( cdsFeatures: GFF3Feature[], existingExons: GFF3Feature[], utrFeatures: GFF3Feature[], - refSeq?: string, + refSeq: string, ): AnnotationFeatureSnapshot[] { - if (!refSeq) { - return [] - // throw new Error('refSeq is missing') - } const missingExons: AnnotationFeatureSnapshot[] = [] for (const protein of cdsFeatures) { for (const cds of protein) { + // For CDS check if there is an exon containing it. If not, create an exon with same coords as the CDS. let exonFound = false for (const x of existingExons) { if (x.length != 1) { @@ -241,19 +240,25 @@ function inferMissingExons( if (!cds.start || !cds.end) { throw new Error('Invalid CDS feature') } + let strand: 1 | -1 | undefined = undefined + if (cds.strand === '+') { + strand = 1 + } else if (cds.strand === '-') { + strand = -1 + } const newExon: AnnotationFeatureSnapshot = { _id: new ObjectID().toHexString(), refSeq, type: 'exon', min: cds.start - 1, max: cds.end, - strand: cds.strand === '+' ? 1 : cds.strand === '-' ? -1 : undefined, + strand, } for (const utr of utrFeatures) { + // If the new exon is adjacent to a UTR, merge the UTR if (utr.length != 1 || !utr[0].start || !utr[0].end) { - throw new Error('Too many UTRs') + throw new Error('Too many UTRs or invalid UTR') } - // If the new exon is adjacent to a UTR, merge the UTR if (utr[0].end === newExon.min) { newExon.min = utr[0].start - 1 break diff --git a/packages/apollo-shared/test_data/cds_without_exon.gff b/packages/apollo-shared/test_data/cds_without_exon.gff index 60471a656..f41e2db67 100644 --- a/packages/apollo-shared/test_data/cds_without_exon.gff +++ b/packages/apollo-shared/test_data/cds_without_exon.gff @@ -3,7 +3,6 @@ ctgA example gene 1050 9000 . + . ID=eden ctgA example mRNA 1050 9000 . + . ID=eden.1;Parent=eden ctgA example five_prime_UTR 1050 1210 . + 0 ID=five1;Parent=eden.1 -ctgA example exon 1211 1510 . + 0 ID=exon2;Parent=eden.1 ctgA example CDS 1211 1510 . + 0 ID=cds2;Parent=eden.1 ctgA example CDS 1611 1710 . + 0 ID=cds2;Parent=eden.1 ctgA example three_prime_UTR 1711 1800 . + 0 ID=three1;Parent=eden.1 diff --git a/packages/apollo-shared/test_data/cds_without_exon.json b/packages/apollo-shared/test_data/cds_without_exon.json new file mode 100644 index 000000000..849544ef8 --- /dev/null +++ b/packages/apollo-shared/test_data/cds_without_exon.json @@ -0,0 +1,74 @@ +{ + "_id": "67581b7d5890a8eb1bedab6e", + "refSeq": "ctgA", + "type": "gene", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "67581b7d5890a8eb1bedab6c": { + "_id": "67581b7d5890a8eb1bedab6c", + "refSeq": "ctgA", + "type": "mRNA", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "67581b7d5890a8eb1bedab66": { + "_id": "67581b7d5890a8eb1bedab66", + "refSeq": "ctgA", + "type": "exon", + "min": 1200, + "max": 1500, + "strand": 1, + "attributes": { "gff_source": ["example"], "gff_id": ["exon1"] } + }, + "67581b7d5890a8eb1bedab67": { + "_id": "67581b7d5890a8eb1bedab67", + "refSeq": "ctgA", + "type": "CDS", + "min": 1210, + "max": 1710, + "strand": 1, + "attributes": { "gff_source": ["example"], "gff_id": ["cds2"] } + }, + "67581b7d5890a8eb1bedab68": { + "_id": "67581b7d5890a8eb1bedab68", + "refSeq": "ctgA", + "type": "CDS", + "min": 1200, + "max": 1700, + "strand": 1, + "attributes": { "gff_source": ["example"], "gff_id": ["cds1"] } + }, + "67581b7d5890a8eb1bedab69": { + "_id": "67581b7d5890a8eb1bedab69", + "refSeq": "ctgA", + "type": "exon", + "min": 1049, + "max": 1510, + "strand": 1 + }, + "67581b7d5890a8eb1bedab6b": { + "_id": "67581b7d5890a8eb1bedab6b", + "refSeq": "ctgA", + "type": "exon", + "min": 1600, + "max": 1800, + "strand": 1 + } + }, + "attributes": { "gff_source": ["example"], "gff_id": ["eden.1"] } + }, + "67581b7d5890a8eb1bedab6d": { + "_id": "67581b7d5890a8eb1bedab6d", + "refSeq": "ctgA", + "type": "TF_binding_site", + "min": 1049, + "max": 1100, + "strand": 1, + "attributes": { "gff_source": ["example"] } + } + }, + "attributes": { "gff_source": ["example"], "gff_id": ["eden"] } +} From d481ef364495a9593363adba1f2773e1cf63d401 Mon Sep 17 00:00:00 2001 From: dariober Date: Thu, 12 Dec 2024 12:27:00 +0000 Subject: [PATCH 07/10] Fix header and incorrect phase (not used by Apollo anyway) --- .../apollo-shared/test_data/gene_representations.gff3 | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/apollo-shared/test_data/gene_representations.gff3 b/packages/apollo-shared/test_data/gene_representations.gff3 index b9aff4134..06f6c275b 100644 --- a/packages/apollo-shared/test_data/gene_representations.gff3 +++ b/packages/apollo-shared/test_data/gene_representations.gff3 @@ -1,4 +1,5 @@ ##gff-version 3 +##sequence-region chr1 1000 39000 # example 1 chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN chr1 . TF_binding_site 1000 1012 . + . ID=tfbs10001;Parent=gene10001 @@ -76,10 +77,10 @@ chr1 . CDS 21201 21500 . + 0 ID=cds30005;Parent=mRNA30002;Name=edenprotein.2 chr1 . CDS 25000 25500 . + 0 ID=cds30006;Parent=mRNA30002;Name=edenprotein.2 chr1 . CDS 27000 27600 . + 0 ID=cds30007;Parent=mRNA30002;Name=edenprotein.2 chr1 . CDS 23301 23902 . + 0 ID=cds30008;Parent=mRNA30003;Name=edenprotein.3 -chr1 . CDS 25000 25500 . + 1 ID=cds30009;Parent=mRNA30003;Name=edenprotein.3 -chr1 . CDS 27000 27600 . + 1 ID=cds30010;Parent=mRNA30003;Name=edenprotein.3 -chr1 . CDS 23391 23902 . + 0 ID=cds30011;Parent=mRNA30003;Name=edenprotein.4 -chr1 . CDS 25000 25500 . + 1 ID=cds30012;Parent=mRNA30003;Name=edenprotein.4 +chr1 . CDS 25000 25500 . + 2 ID=cds30009;Parent=mRNA30003;Name=edenprotein.3 +chr1 . CDS 27000 27600 . + 2 ID=cds30010;Parent=mRNA30003;Name=edenprotein.3 +chr1 . CDS 23391 23902 . + 1 ID=cds30011;Parent=mRNA30003;Name=edenprotein.4 +chr1 . CDS 25000 25500 . + 2 ID=cds30012;Parent=mRNA30003;Name=edenprotein.4 chr1 . CDS 27000 27600 . + 1 ID=cds30013;Parent=mRNA30003;Name=edenprotein.4 # example 4 chr1 . gene 31000 39000 . + . ID=gene40001;Name=EDEN From 488216aaa44742f1a9cd7f5036c24726e08e41fe Mon Sep 17 00:00:00 2001 From: dariober Date: Thu, 12 Dec 2024 15:17:54 +0000 Subject: [PATCH 08/10] Handle spliced UTRs --- .../src/GFF3/gff3ToAnnotationFeature.test.ts | 20 + .../src/GFF3/gff3ToAnnotationFeature.ts | 78 +- .../test_data/cds_without_exon.gff | 901 ------------------ .../cds_without_exon_spliced_utr.gff | 14 + .../cds_without_exon_spliced_utr.json | 73 ++ .../onecds_without_exon_spliced_utr.gff | 6 + .../onecds_without_exon_spliced_utr.json | 55 ++ 7 files changed, 232 insertions(+), 915 deletions(-) create mode 100644 packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff create mode 100644 packages/apollo-shared/test_data/cds_without_exon_spliced_utr.json create mode 100644 packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.gff create mode 100644 packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.json diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts index 67f799d24..6c5e77c12 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts @@ -206,6 +206,26 @@ describe('CDS without exons', () => { ) compareFeatures(actual, expected) }) + it('Convert mRNA with CDS but without exon and spliced UTR', () => { + const [gffFeature] = readFeatureFile( + 'test_data/cds_without_exon_spliced_utr.gff', + ) + const actual = gff3ToAnnotationFeature(gffFeature) + const expected = readAnnotationFeatureSnapshot( + 'test_data/cds_without_exon_spliced_utr.json', + ) + compareFeatures(actual, expected) + }) + it('Convert mRNA with one CDS, without exons non-adjacent UTR', () => { + const [gffFeature] = readFeatureFile( + 'test_data/onecds_without_exon_spliced_utr.gff', + ) + const actual = gff3ToAnnotationFeature(gffFeature) + const expected = readAnnotationFeatureSnapshot( + 'test_data/onecds_without_exon_spliced_utr.json', + ) + compareFeatures(actual, expected) + }) }) describe('gff3ToAnnotationFeature', () => { diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts index 1d8a6b1bb..e7c2052b3 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts @@ -214,14 +214,48 @@ function inferMissingExons( utrFeatures: GFF3Feature[], refSeq: string, ): AnnotationFeatureSnapshot[] { + // Convert utrFeatures from GFF3Feature to AnnotationFeatureSnapshot + const utrExons: AnnotationFeatureSnapshot[] = [] + for (const utrs of utrFeatures) { + for (const utr of utrs) { + if (!utr.start || !utr.end) { + throw new Error( + `UTR has undefined start and/or end\n: ${JSON.stringify(utr, null, 2)}`, + ) + } + let strand: 1 | -1 | undefined = undefined + if (utr.strand === '+') { + strand = 1 + } else if (utr.strand === '-') { + strand = -1 + } + utrExons.push({ + _id: new ObjectID().toHexString(), + refSeq, + type: 'exon', + min: utr.start - 1, + max: utr.end, + strand, + }) + } + } + utrExons.sort((a, b) => a.min - b.min) + const missingExons: AnnotationFeatureSnapshot[] = [] for (const protein of cdsFeatures) { - for (const cds of protein) { + protein.sort((a, b) => { + if (!a.start || !b.start) { + throw new Error('CDS has undefined start') + } + return a.start - b.start + }) + for (let cdsIdx = 0; cdsIdx < protein.length; cdsIdx++) { + const cds = protein[cdsIdx] // For CDS check if there is an exon containing it. If not, create an exon with same coords as the CDS. let exonFound = false for (const x of existingExons) { if (x.length != 1) { - throw new Error('Unexpected number fo exons') + throw new Error('Unexpected number of exons') } const [exon] = x if ( @@ -238,7 +272,9 @@ function inferMissingExons( } if (!exonFound) { if (!cds.start || !cds.end) { - throw new Error('Invalid CDS feature') + throw new Error( + `CDS has undefined start and/or end: ${JSON.stringify(cds, null, 2)}`, + ) } let strand: 1 | -1 | undefined = undefined if (cds.strand === '+') { @@ -254,18 +290,32 @@ function inferMissingExons( max: cds.end, strand, } - for (const utr of utrFeatures) { - // If the new exon is adjacent to a UTR, merge the UTR - if (utr.length != 1 || !utr[0].start || !utr[0].end) { - throw new Error('Too many UTRs or invalid UTR') + if (cdsIdx === 0) { + // If this CDS is the leftmost (or the only CDS in this protein), check if we need to add UTRs before it + for (const utr of utrExons) { + if (utr.max > newExon.min) { + break + } + if (utr.max === newExon.min) { + // UTR ends where exon begins: Extend the exon to include this UTR + newExon.min = utr.min + } else { + missingExons.push(utr) + } } - if (utr[0].end === newExon.min) { - newExon.min = utr[0].start - 1 - break - } - if (newExon.max + 1 === utr[0].start) { - newExon.max = utr[0].end - break + } + if (cdsIdx === protein.length - 1) { + // If this CDS is the rightmost (or the only CDS in this protein), check if we need to add UTRs after it + for (const utr of utrExons) { + if (utr.min < newExon.max) { + continue + } + if (utr.min === newExon.max) { + // UTR begins where exon end: Extend the exon to include this UTR + newExon.max = utr.max + } else { + missingExons.push(utr) + } } } missingExons.push(newExon) diff --git a/packages/apollo-shared/test_data/cds_without_exon.gff b/packages/apollo-shared/test_data/cds_without_exon.gff index f41e2db67..49a26b0b0 100644 --- a/packages/apollo-shared/test_data/cds_without_exon.gff +++ b/packages/apollo-shared/test_data/cds_without_exon.gff @@ -10,904 +10,3 @@ ctgA example exon 1201 1500 . + 0 ID=exon1;Parent=eden.1 ctgA example CDS 1601 1700 . + 0 ID=cds1;Parent=eden.1 ctgA example CDS 1201 1500 . + 0 ID=cds1;Parent=eden.1 ctgA example TF_binding_site 1050 1100 . + . Parent=eden -##FASTA ->ctgA -cattgttgcggagttgaacaACGGCATTAGGAACACTTCCGTCTCtcacttttatacgat -tatgattggttctttagccttggtttagattggtagtagtagcggcgctaatgctacctg -aattgagaactcgagcgggggctaggcaaattctgattcagcctgacttctcttggaacc -ctgcccataaatcaaagggttagtgcggccaaaacgttggacaacggtattagaagacca -acctgaccaccaaaccgtcaattaaccggtatcttctcggaaacggcggttctctcctag -atagcgatctgtggtctcaccatgcaatttaaacaggtgagtaaagattgctacaaatac -gagactagctgtcaccagatgctgttcatctgttggctccttggtcgctccgttgtaccc -aggctactttgaaagagcgcagaatacttagacggtatcgatcatggtagcatagcattc -tgataacatgtatggagttcgaacatccgtctggggccggacggtccgtttgaggttggt -tgatctgggtgatagtcagcaagatagacgttagataacaaattaaaggattttacctta -gattgcgactagtacaacggtacatcggtgattcgcgctctactagatcacgctatgggt -accataaacaaacggtggaccttctcaagctggttgacgcctcagcaacataggcttcct -cctccacgcatctcagcataaaaggcttataaactgcttctttgtgccagagcaactcaa -ttaagcccttggtaccgtgggcacgcattctgtcacggtgaccaactgttcatcctgaat -cgccgaatgggactatttggtacaggaatcaagcggatggcactactgcagcttatttac -gacggtattcttaaagtttttaagacaatgtatttcatgggtagttcggtttgttttatt -gctacacaggctcttgtagacgacctacttagcactacggccgagcgcaataacccccgg -aaagcacttgctactgggaggcgggtttatccatcggcaataggggttatcagtactacc -aagaagattgtgaagatattaacagcattgaaaaaagttcggactgggcatgaaacgtgt -gtcagagttagagtccttgagggactgaatgggtttgtcccaggcccaagcttgaggtgg -atgtcacctcgggtactgcctctattacagaggtatcttaatggcgcatccagccttgtg -gctgggtctacgtacgcgtgggcaccatacgtatgttggcaggaaaggtcaatcatgctt -gtttcctcgtcgcagaaacgttcacactattggctcgcgggatcgaacgggcctgattat -ttttccagctcctgcgttcctatcacgccaactgtcgctaataaaatgttatatagagat -aacccattgctatgcaaggatggagaaaccgcttcacaacaccctagaattacttcagca -ctaacatctaagataccgggaaaaccgtaggtgccacttggttttgagggcaatgcctct -tgcactggcgattcgtggagtaaccttgctaccgatttccaccttttctaggtatgatta -catgcgatcgccattgtcagtcgtcgtacgaaatccaggaaggaattcgaatacatgacc -gaaagctatggcatcataagcgtggctctttactaaggacacgtgtagtcggctgatttc -gcgcagaacttcgctcaccggacagtgactgctgtccgaacttgggggcagcgtagattc -tagctagagaccgcagcgaaaatgccactcgctaggtggctatggacgtccagctatagg -cccccacattcgtatgtatactccggtaattgagtctacctttcgaaagatcaaagttca -caaaccttctctcacgttaacatagatgctcgcacatagctagtccggtgataaagcggc -cgtgcatgcgagtatttagcgcacggaacagatattaggctgcgaaataccctcatatgc -tacagcgcaggtaacagacggttgatctccccgtagaattccctcaaggccggatcgttc -tcatgagtagcctttgtctgacatttcctctcaatagattgcagactcctgtgtcgtggg -aaaacttcgcggaatcgctgtcgttacataaatctaaacagtgcagtccaagctatttac -taccacgtatcataatgaacatctctttgtgaaatacgccggtacgccgagagatcgccc -acattgcgtgtcaccaagacccagtgtctattgggcgcgctggttatagtatgcggaggc -acccgatgtgctgctaatcactacaaactcgacaccaagaggcgaccgcgtgcgggaggg -ctagaacgcgagagccacgtcaacgtggtggtaccgctcctgatgatacgagcactgtgc -tggcaccgctgccctatgtccagctaacaggtgtcacataatatcgcatctatagttgag -tatcttatcgctagcgttacttgtgagttctcgggtagaatcgtcgtagcattctagtcc -ttagcgtaagcaacgatatcgccgatattataggggcaataggtctcacccaccacgggt -gtatatttaacgccctaagtagttaacggagactattaaacaattatgcacgtgaaagat -tagggtactgcacacgtgtcacgttgacgatgtaaacaccataaaggttgttctgtgatg -cgatgcccatacctgcggtgcaacgtcctaaacttatgcggatcaccttctaattcagcc -gacgttgagaacgccagctatactctgtgagtacaaggtgaaagccccagatcaaaatat -tggagtcttgtccgtagctcttgggtggagagtgtgaccacgtttatcccttaatactga -gactcttctcgttccacgccccggacattcgcgatacagcctagtttgataggtggctac -aagggctctatccgaatagcacaaaactcagtcctagtgcgtataggtcgtgctctgcgg -ttatacgctccacagaagaaagtacccactgcagtttgtcggtgggacttagcaaaacct -gtaaccaaaaccgcacatggactctgttttggtggcgtggagtgtgcccgccgcctttat -gctgggtcaaaatactatatgatttgttgtggtcgggcgcgacattggctcttatggttt -ataactattattcggtgcacagaaccgacttatgccccgattttgcacctcccgagagaa -atggtactagtcattgtctcggggttttacaactcaaggataatctgccagcggtacccc -gaaaaatcccatcccgaacggctggtacctatgctggagaccgaaatggagccagtgcta -tgcaaaccctcgctctttctatttggcccgaccagctgcgagagtcattcgacatacatt -gcgatggctggttcatctctctgcttcggattccgatcctgctgcctatggccacaacac -gaaagagatcttggatacgtcctcaccatagtccaacggacaaatatggaggtgtacgcg -cacggatacgagtacgacgggccgcccaatgctggtccttcgtgttatactgtttacttg -cctccggtaagagtgtacagcttgtacccacgcggtggaggaccttacgcgtcgtccgca -ggtgtggagatttcgctacctgttgcattggggcctcgccttacgtttttttcgacggag -gcccgacccgccaggccagaccctcatcattgggatttttactgccttggacggcagatt -ctgatgctggtaacgcgcttggcacacaccctggtcgtaattacgtactatcacctctat -tttaccaggggcgggccccgcctcaacgatcggtagtctcgttagaacagttgcttatac -tccgttagcactcacaactcggagaatgagcttacacagtccgtaatagtctccacagct -tcccctggaaggttcgtacaggggccctggggtctaatgtgctaccgcctaacttcgtta -gtgtaaggtctcgtgtgcacctccaataacgctgcatttttgtatagaactcctttatag -gcctgactcttcggaacagcccactaattgacgtgcacgatgctgcatatacccttctca -atgacgcataccggtgactaaatcctgtggctggttgacactttgcataacccaccacga -aaacgcggtttgcacgtggttggaagcgagtaccggttaagggaagttacgtaaacccaa -catagtgagcgatgcttaaaacactgcatgcgaacaacgaccggaacgagagcctaatag -aaccaaatggataggtggttacgaaatccgcgcacaagtgtcgtgccactctcgtaaaga -atgattgcttgcttatctatgagaaacctaataggcacgcacgtgtcgccagggaaaaac -caccatccgacaagacgctgaaaatcgagatagatgcagcccgcgcgtactttaagcacg -atagccatcttgagcgcgacgattcagcttacggagattacttctcaaagcggacacctc -gacgctgaagctctctatcgatgcacttgcatgtccatctacggcttgagcgaaccatgc -ttaacgcttggatgacgttgttagtcggattcgatcgtaccatactgtcctatccatcat -aacttcccgaattaaaactcattttctaccctcatttgtttcattggcgcatatgagcgc -cccatgggcgctgatcgacacaggcttgtagcgcacggctcgtcttgcagtagataacca -tagtctttacgtcgcatatattaatatatcctcagacttccactgcggtattttggatcg -tggcgtctgggacagtcacactctttaacgggagcgcgttcaatgtctgagtccatccgt -gcagttcggtgcgattttattcttccctgcatcttccggattccttcttcttgtcggaga -attcgccgctcgatcaaactcatcattggtacttattgaccgtttcatgctattacaccg -gtacactgatgataacgctgcggttttatcccccccaatacgcgcacacacgcttctttc -ggtgcgtcctgtcgtacccctattgcgcatcatggcactccagccgagcaatgctttgga -caggtgtaaccaagctggatctagcgtgggttcgacacagctcggttcgtataaccacac -gcatgaactgcgagtgcctcgccccagcgccgctgtctacttgctgtttagaggaagaag -caacagcttgcacccaatccgcatatctgcatataggtggccgttcctccgtggcgcgcc -gctacgttgtagcgcacggatcaaaagcgctgtgttactatacactggttcggagacggt -tcatgacgagcgcgctatatgtcggcatctgcgccccatgagcggccgctgtccggcggc -acgaataatatagtgcaagaaaaaccgaagactacggttatatatgatggaacggccctc -acagcattctaacaggtttgacaaacttaataaatgatgggccgcgcctgctgtgaatcc -cggacggtttggccggaaatacctaggcagtctttggaaaagcttttcctagatcaccat -atcgttgtcagtggccaaggtttcgttaactctcggcgtacccagtatcggcgcaatagg -cctttgatcaacccttggaagattagtcgatcgtaacttcctacatcccggtgaaaggct -ttaattctaagtcttcgccacaaacgcttcaggagcgtcgagttctatcactttcgaggt -actgccacttactatacaccggtacacttgttaagcaagtgtttgcggatgtggttaaat -tttgatggcagatttctgggtgttttagctatagctgtatcctcgaggtcgtcgtttaac -cgctcctgttgctagatcatatgtatcgttgttcgagacgctagtagtcgctcctaccga -atcttacaggttcgatctctgctcggcttttgccgcgggccggtgcttgtgctataatta -taatgataagggcagtggccgcacaactgcagattactgacacttgagtgagaactaagc -cttgaagcatagtgttgaatgttgtagaaaagtatactgtgacaaaaacagggcctggtg -tcaagtgtcctcagtgatctggatatcatcacgccttgttagcaggatttacccgcatag -taatggccggactttatattgccctgctgcgctagctagtactgcgggggctctcttccc -cctattgatattccgggcagaatgcgcgggtagtcagcattcatgtcaggcttctatcaa -cgtctcattcacccttggagtgtgacctacgtgttagaggcaatgtagcccgagagcccg -ttcaaagacaaactcccgaattaaacagacaccggttatgggagtgtgagtagtgacttc -cgaccagtgtttggtttcagcctgtcggttaacctcgcagggctaggagaatgagctgct -agtaggtgattaccgaagtctccccagaaagggaacagtcttatagagtagagaatgtca -aatagcgttattgagttctgtctactgcactaccaagagcatgcccaccagagatgcgtc -gcagtcgtagcgtagacgtcgtaatgacccaggtggcgttcggcctattcgcgtcggacg -cgcgcgctactacaacgaagggttctgaaagtgcatgttcacactgaaattctaagtgtt -agtaaacaaacgcgtgtattccagggtcgtgtgtgacattatggctgtctgcgcccctca -atgatcatcaagacgttcaattgtatgttaacgtagatatcaggtttagttatccgtata -ctttttacgccgcgcgcttggaacagattctcctaacagccctcgcggtttcaaaaagaa -ccaaagtctataccatccttgttcccaattctgcctggttgcggagaaaagaccgcctcc -atacgtacccgactcggtattggtaagtggggaagcagtcgaacgcatatttcttggtta -tatcacaggccacgttctatatcggaagtggccggattacgatttgacgttctatccccg -agagcgcattcttgtttgttactactaccacgcgggcgcttcttattcccagaccagagg -gaatgtgcggaagctttttccacagattggcggaactccagcgtgcttagtgcaggcgga -gaacgtccgttcagtggtgcgtgctttatttttcaatctgacccgacctgcgctcaaggg -ttgcaagttgttgtgcgcccgagtaataggcgactcgtcgcaatgggtctggtacattgc -attttcatcggtacggcgcttcataaagtgcggcaaatttctcaccccagtactcatgtt -tatagggtatcaggacccgaagcttctctgttccaaagaaatgtactgtttggctcccct -gtccatattggaggtagatcacttgtggattatgcaatgaatgaatgaaagtttggtgct -ccccaccggggcgctctcaaagagagtgagctaaatttgaacatttaaattgctattcca -acccggagtcctgaccggaacagtaatgaaacttcaaccatgccggacagactagaagaa -gggaaagttgcttgtatatgggagtaaaaatgatgttggtgccgtaatggtgccggaagt -gactatagagcatgtcgtgacgcaccggtaggcagtgctataatcgtatgtccttcaggc -gccgccggacctacgaagctgaaattagacaccggcacactagccccgtcagcgacggtg -cgcggccgctgccctgcagcgaatggggctaacacgcataaaacgcccgcataaccactc -gagctacgggaattcactcaggctgttgcttcgacgtgtagtctcattacataatcataa -tacctccaagaccaacggctgctcatgactctcttaccttgttagggacatttcggcact -agggaagagctgaggactttgaaaacgtcgataaaaccatcgcgggaactagctgcgtta -gaactccatattttacgggtcgcaagcttgaggtcctgtcccggcagctgcaagtgctac -ggcaggaggggatctacctaacgtgcagtaacgagcccctgcccgtaatgaggcgtactc -gtctctaatcgtcagtaagttactatgtccgaggacgcctctacgagttgaactctggct -aggcccacctgtccgcgccctgctcgggtaccccatctgcttatccaacttcacctcgcc -ttacggaatctctggttgccagtcatccgatggtcattaagcagcgtggtacatcgtagc -caatacttcagggcgccagccatattcccagccaagcggctgcataattacagcgcctgg -cacgactaatcgatcccacaagcctggtagatgacccttagccctaaagcgccctctgac -ctatctgcacgtgatacttgattatttgtaatgagcggacagggtagatgactaatatac -agggtcgtctaggtttgcacaatgcagacatcatccgcgcaaggcccggacggctgtact -cacacagctagctccatctccctcagcagcactaagattcccacgtgaccagggcgacgg -gcctcagccaaacgtatccttgatatctacttaagtcaaggttgactccgaaccctatgg -gtcggtgccgttaacagggagtctatatctcggcgttccattgcttgtttcaaactcctg -ctataaggtgaaagcgctggagggcatagtttatgcccaaagttgcgcgtagatccgtcg -ggatatgtgctataataaggactgctcgaggtaggcggtaacggctcccgccttcagtag -gcgcggaactcgaatcggagttacaggacttgaccgagtcatatccaagtttatgttacc -cgatcggatccggatgctcgtatctgcgacgaggtcggaagacggacgaaatacgattca -accgcgcgaaccattagcatctaacctttagcctcaatgggtgttaacgtggtgggctca -ctcggcgtactctttgtgcaactattccgtatgaacaacagtcaagttgcgacatgatgc -tcttacgtgattcccacagtttcccacctcaggatgctttctttagctaaacccaatagt -tatggcggcaccttcagactcccacgaggacggctatgacgttgctaaaccattcgccgc -caaaggctagcgctaacgagttctgtgagtttgttccgggccagatctccaggatggtcg -caacaacgcagtaccggttttatactggtgaccctctacctgttattaagttacagcgtt -gtcctacgtacatggtcgtgagtactcgcgtagtcaaacgccaggactagcagcgagaat -tattgtcgcgatacttactacaatacttacccgatattgacgtgcagggttgaaagagat -ggacagttgaatatctattttgacggaatcctcaaaactccctccacctcaggtaacggc -cctgtccgggaccgcattcttgcatatattggttcccagagcgtatctaagttagtctct -tgaccgttcaccgactctagggcgactcgttatcgccctccgaaacgatgctttcgttac -ctcaatgatgacaggctgtaacgtaagtgatcccaatctcactcgtgccttgtccaccgt -tccgtgaagacgaagcaatacgcggaatacgtggcttcgtaatattttgacgatatgggg -ctgggacgctcaagacttccatgacaaacaaagtgaagagcaactgcatccctcatcatg -atcactattaccagagtagcgatggataacgctaatttggtcagggcagctatcgcatcc -cgcaggtgtaggcggagactttttcttttgttgcgagttgacaggtaatctcacggtata -agcacggttatttacgcaagcgacgtccctgggagaatccgcccacgtaggaccccataa -tccataaatactgcggtcgaaaccttcatatcgtgacagaaccgctttctagggatgcgg -tccccgcattcagagttctactttggccagcgtgagacttaacaactccacttacgcggt -acattgaagtcgttcagtccagtgtgacctgtgtaccgaataacgtgtagaccagcgcgt -ctacgacttagcgcggctccactccaaagcaccttttgggactttccaacgagcctgttg -gccgttaagcggtatttcacaaatagatcaccctagtgtcggtaaccgactaccctattg -ggatcatcgtgagctcgaaacactagaggcggaccaacggatgacatttgattcggctct -acagagcttgtcgccagagaaaaactgtggcaatctacgctcgcggggaattgactttag -cggcccctagacaggtgtgggacactagtctagattcacgtcctacacgacataacagca -ccttcctggccagcccagaaatagtacctggacgacatccagccttccgacgccataatg -tgagccgtagcgcccacgacgatcaacgaggagaaatttacaaaggctgtgtgaatgcta -cgtcgtctaccattgctcatcgaaacgaacgcaacgcacagcatacaacgtttaccatgc -cggagcgggatcctcaagtacagaaaacagaggtctaaacatgatccgaacaaatcggta -ggtttacacagctacctcgtccattggcgtactgcatcgatcgtgcttactacggtcatg -ccggcccgcgatgcacgtacgaaggaataccctgtctgcccccgcgcgagttacgctgtc -tcgcacataccgagcactgtcgttcgaagctaaactatgagcccagccgagctccttatg -gccgcaacgctggtgcggccagctgataaattccacagtacacgatcctcgtgtaagatc -tcgggcatagtaagtcatttcacatggttaggagagatagaatacatggttctggtagct -caaccaggatttgtggaacccttggcccttggtgagtgctacaataaaattctccgtatg -ggacaaccaaagggtgctggatgtgacttcccggcccaggttagatgtccatatcattca -tacattgcccgaccgacccaatgcctaaatcagaggcgccttagctagttcttgtagtgt -gccacgtccggccacgcagacacgaccctcggcgagtgatcaccattaccggattggcat -cgaagtctttttctgggaagttagccagtttggtgtgcggtgcttagaatcttattccca -gtcaaacgcccctgggacgaattgctaaccctagttgccacgccggaaccatcttcggga -gagtagacaaatccgagttagatatgttagcgtcttcgtgagtctgaaatgtatcacttc -accgcagaatacgcgaatgtctgtttgccctggactgacggaattggcttaaaagccgac -tagagcattttggtacggttcctatccgcgatgtaattacctatctaggttatcgctaga -cgaatagcgagtacagtgtagcaggccctttgttagcaagttgctctaaacagttgtcaa -aacgtaggcacaatagtgcgattcttctaaatccgggaagctcatggcgctgggcagaaa -tatcacatacgggaataatcaacctccattttggttcgttttactcgatgagtgcccctt -gcttgagacgagcgttctgagttgatggcatgtcgaaaggtttacgcggtgagtagagca -ctttacccctacagatcggaatcctcgaggaggacagttggaacttcacattaacctttg -ttcgattgcatgaaggttgtgttctgggagtaggctcccaaggtagcggttcatgctggg -ggcagccctaagtttgtattatgtgagtttgcgtctgaaactacatttagcatgaggaac -gtaagctttctggagggatcttctaaagccaggtatcgcccgctacgatgccggagccgg -tggttcacagctacctgtgctcaaaggcttaaggctaatcatagcaacagtgcgaaagga -cgtctttcagatttcgaaaggtgctgacacaacaagggtcagggcggtcctacccttctg -attccctacctggttttttagcaagggtcaaggctaggcttatactcccgaacgctttaa -acactatcccacccctgacggggggaagttgcgcgttaagtataagaataagatttaaca -gtacactttaggttcctcttccgcgagccgtcatacagcaccgagcgccgttgaaacgcg -attaacgcgtattgtcgtgcgaaaaaaaaacgctcgccagcatattggagtgtcgacttg -aaatattgaacaacaccgcatatcaaggacgaatagtagggcttcactacctccacctga -cggccctaggacttatactcgaaaaagaccttccatcacgatgtcccttaccggcgagag -ggctatatacgcatgaatagcagatcttgccgtcgctgagtgtcacccagggttgctcca -gaaaggagatagggcggagagccatcgacagcagctctcgtctaggtggtagcagctaag -gagtcgtgtcgtcgcgccgagttggaacattatcgatgtacatcaatgcagtaatgatgc -tgatagactcgggagtttcctcaaacccagagttacgagaagacgcaggtctatcagtta -gaaggagtcagtattggcctttgaaagatcttatgctcatgcccaatcgtagttaaacgc -gaatcgggaaggccaatctggcggttttgacccccggactcttaagacgtccaatgtggc -tagacataagtaacgaattactctataccgaggggcgggaaccggccaatttattacgag -agcacgagagccttgtagcggccagcacactatcctcgagtccctctatcctgagacgta -gatatacatatacgcctagagagaatagccgtctaggcttccgtcgccctctccgtcgtt -cgcgtgaaccgtaagtcttccgcattcccttcctcaagcgcgttggtgtgagagtggtat -tgaggcccagtcttataacgcatatacttgtgcactctattacttaccatgggaaccaat -ggcactctcgaatcatgctcacagctgagcaacggtgctgctcaccaattacatatgagt -cgtggtttagcgttggagcggaagatgaatttccatctgttcgcgcgcatcactaaccaa -tatacggttatcccagcgctatctagttctgaccgggttggtagcgaacccttttgcaag -ccggcttagtggatgtgaagtgggagtgataacttaagccgccacgttcgggggggactc -gtttatattggtgctggaatacgaacggcgtgattcgtagtcgccctaatcgggcgcgac -aacacatgtagtactgtcgaggcggtttaaacccacagtaggtactctatcagcagaatt -atgctagaagtttcacaacaactttccgcatgaggctcagcggcagcgtcgcactcccaa -tggccagtgccggtagcgatgtttggtggaattagttccttcggaatacgaaccggattt -aaagagcctcggagaacctaaacgaatccgtacgcatcttgcccaaggtgctgagccttg -tcgcctttctcagttccacctacattaatgcaatgcgttcgaagctctgaccgcaaacag -gaatcaagttcagacagagtgcaagagtttcgcaataattgggaacgacccacttgatat -aggtgcttttagagatgtgtgtacgaccgtccttcgagcatacctacgggttacaattgc -tccggtaagtcaaggcacatagaaaacatagccaactgagagtgtatacaagattacctc -atgtagactgaaatacacacatcgctttaagctctcaaccgatgtagaacagattttggg -cggcgttgacagcgtgcccgctcaccggtttgctcccttctcaccaaataaccatgagac -gactttggtgactggactgccagatgacgggctacaaccgttttggttccgaattcgctc -taactcaactaacatcatactatatgcgccaggatattctcgcggttggacccccctgcc -aattcgggttaaaaccactccccccatgtagggagctgccgcaattacaatatcgacgat -cccagatggacgctcacaaaatatcagtcctttcacgatccgctcatatagacggatgaa -gggactgaggctgttagatagtgacgtcgagcatggcgtagacgagcgcaaccgggtcga -ggcccgcattaccgtgacacccagttgaaaggatttacactgcttcattcgatatttacc -actttgtatgaggagctcaacctaagtcaacacggaccatcatacaggtcgccagtaatg -agaaggctgctgtgccatggagaagcgctgctacagcacacaacgaacatcttgcaatgt -gaaggagggtgctcttttgggatgagcctacggggatgtgtatccctgccctgtaggcag -ttgggacttagcgcgactatctagataactaaggcgccagccgcggctgtttgccgaagt -cgtgctgatgctgtacaacgaagggcgagcgtgttaacatgctacacgttgacctagact -agtccaagtctgaaagtcccaatttaggtcgggtagtacagtcctcggttccagtcccat -gttgtgccgacaaggacaagcgatcatcaaatcgactgaaattgaatcagctacctcaga -ccacattcagctctcggtaacatgggaggcttgtggttgcaccgtaaaagggggatagcc -catccatcctgtaaacctacaatcgcgcgtagcttaatacgctcacattagacattcgat -cgagagacctggtttcaagagccttcccttttgctttagtgggccaaatcgcaaccctgc -tcccctcccttacgccttatacacttcagtgcaaattcatgcgttcagcgaacaactgga -cttctgttgtacgtagtccacgggggcttattcattatagaaagccccctactgtcaccg -ttatatggttcacacatgagctgatcacctagagagtcgtcatgcacattcgcctaacaa -ggacatatgagtaaccgggaggggatatcttcgatttgcagcaccaatcgacgttgtact -ggtctattgtcggttaggtccgattatccgaccggcaatgaggcaagcccatctattcag -gaaacttaggcagttccctgtgctggcccgacgtcgatgagttaagtctatacaggccgg -ccgcgagtagttaacgagaccaacatagaactatcatactagccggcaatgatcaatagg -gtcttagtgccactgtccttcgagccctcgcctaatttagcgcgaccggtttcctattgg -cctgtggggttgcgggcgcgtccgctttaagaatggtccttaacacctacccggagatcc -attgcatagcacactctccccattagcctagacacgtcgtcgcccgtcgactttggctgg -aatttaatcgccggggatatcgaactttcacgccctttaacgacgaggaaacctaccgtc -gcctggctggtaaatgggtgccttacggggactcacgatgctgtgaaccgcccgccagtc -tctggggctcccaaaatccaggttggaattacggacctccgccggtactacgcattacgg -ggtggaaagtcctaagataggtgaatgaaagggcttcgctaaaccagtaagtcattaaca -ggacatcggcgtcacgtctcgcgggtttacacggcgcacaaatcctattcccatgataaa -caccttatgccaatccacatccctcgctgcctaataaaattgtcacacctgcgctactga -ctaacgtttacgcaatgagagatgaattccgacacccacgcttgttgcaagcacagccgt -atgggttctcgggtaaggtaacacgaggcacactccggcgcggccacttcggcccctccc -tgacgatgaccctcattggtcaagcatcagtcgaatgattcgtaacaaggagcaaccgac -tcagtagagagggtgaatctcacgcgctagcctgaggaccgcctaagtgcttgctgtgcg -tcgcggcagtcgcgaggtgcgggctaaagtaaataattactggtcttacctaattaaagt -tggtggttagggacaacgttaccactaccagggtgcgcccctgaattcgggtatcggact -atccagggcgccttttgcggccttaaatacctttttaatcacgctggagctagaaggcca -tcgggatagacggggttctaggtactcgaaaaaacaggcctaataactttattgcgcttg -gactaacctatgtacaacacgttgtagcatactatggaatgttactgacctacacggata -gcatttgttcgggagcgtcattgacctcccagaccctatgccgatctgcaccattcagtg -accatccacaagtctctaggtttagccaaaggtgacaggtcaagcacttgcgcatgtcca -ctagtcgattatgcaacgtctctgaacagttagcacttatctcccgccgtgtgactcacg -cagtgaacttacctatagcatacgcctactattgaatgttcgttgttctaacacagcgct -aatacctcactggcagaggtgcgcacgctcctagtatgggagggagtcaggtcagagtgt -atgagactgatttttattcccgataggggttcagtgaaatcgacctctcaaaagagaggc -gcagaattcgctgataagctctgctacgatcgctaaggcacgataagcagggcggtgaag -gttggagcaagataatatcacccggacgcgggtcctgtcgcaacgagcggccgtgagacc -tggtgcgcaccgtgtctcgatcggccgaatatcggaacccactgcgtgactcaaatgata -tttttgctatctgtgggggatttatgtcccggtagaactctgcttctagcagaggataaa -ctttacaaggacggcgaatatggtgtcagagcgccctaatgatcccgtgctattccgcgg -gccaccggccgttaatggacttcgggttagaaggatgggttattcatcttccacagaaac -gcccagccgcaatcgtgggtttactcgaccgcggttatctgcctacttagctgcattcct -aaaacaggattaaaaaggccgcgagggttgcgaacctatggttgagaacagagtcataga -gtcagatgcgcagggacggcacggatccacatggcagttaactaatattaataccccctt -agcctcgtatatgggcgtgtgcagtcttgtccactcgcggacgcgtatccgagcgattct -gtcttccaacgctattcgttaccactttggcacctctttgctaagcaggatgagaacatc -tcactcactacgagccttgactttcagcacgcggtacgagggcactgggcttctcgtcct -ttgctagatgtaggtgccttcccgccctcatgacgatgtcacgtctatcggtttattaag -gtcggggaccattacgagattaccccgcgaccttcgtccaaatgggatcacggcaacgtt -ccacgaggggcgcgttgcctgagccactcgggtatccccttccgatatccgcagaagtct -agctaggagcaccgccaagcattacccgattaagcaaagcctactcatcgcatacacggt -tcttggggtttgcgtcagatagcaacttccggcggcagtagagatgatattttatccgat -tgagcttgcccagcgcaccctcgggccccgtggctcttctcttaaatgtgccctgcacga -tgttagcggttcctactcctccccgagccctatgggtggacagactcgccctctgaggct -gcgtgttcaactggttccaagtccggcgcctgtggatctacgacgcgaccagcaacatta -taccaatgtctaggcttagctcgaaaactagtaagccttagaactagggtcgtagcttct -tctaaatgaagggcagcgtcatagccatccacgttagcgcttactcaatcgcctgtcggc -tgtcatgttaccgctgccgtaagttcgtgaacataaaatacaacacattttaggtttaac -aaggattgtttaccgccacgtactggtgccggtagtgaaacgaaaaacctcagcatcggg -aggaggagaacggaatccctaccaatcttatctctacttaaagaacaaagcgcgcggtac -atggcgcagacgagaatgaatccccgaacgggccgagttagactcctcaacctgtcgaaa -agttgcgatccgctagatcgtagagtacggacttaaaaaagaaaagaaggtctctacccg -agcggtgagcccaaccagttccaggttgctgccggattcttccacacggcggtgccatat -ggacgaccccgtactttgggcattctaggctacctactgcgaacgcctcgcgttcatgcg -caaagttttctctagataggcgcgctttggtaagcatagtaataggttcttcagcgtcta -aagcccgcccattaggcctggcgactctgctttagtggtaaagagatagcatcgatgtac -atcgatggagagagtattttcacgaaagtgcgcgccgtacactatttatcggcatgcgcg -ctaaactggagggtctaataaccagatcacgcaacgaggtcccatgtacggttcgcattg -cgaaagtatgggatactagaggttttatcgggcctcgcttgaggtctgtctgggactggc -gcacatcgctcgtcgcccagtcgatagcggggtgattaaataagaaatatgttagtgccc -ggatttggaaccaaccagtcccggtagcggtacaaaaagcctttcctgctagttctatgc -ttaaagcgtactcctgttacaatccgtaggcaacctgaagaggcaacctggttttccttt -atttcgactttgtttgccatttccaatgtgatactgtgcggcgaccttaatgctttttgg -taaaaccataccgagatccagcgcacgcgacattcagaccggtcccggtttggcggtcaa -cctcgtactctgcactgttcagctagaggggtctcctatcccgaggtaccggtcgtctaa -cgggtggttacaggggctggtcctactctaccaacagttaaggagggcaacaagttcgat -ggggcatcgtataacttgaatgccattggacaaataataattgctccacgtcccaaacct -caactgaccttgtacgcataggtgccacctgcacgcaaccgagctcgttcaagtgtcctt -cttagctggcgccgagtgaacctgcatctgagagtctacccaacgatctagtatcataat -gtctgtagccagcgactatgacacttagagagccgcctaagaaagtgtttgcggctcctg -ccccggtgcgcttgttcggtgggaggtcgttatatagaaccatggggatataatgaatgg -taacatatctgccacgttgacaagccgctattatgaattcagggttggatactattatgg -cccttcacggtcacttgtaggacggccccaaacaggattagtaaagtcggtggtctaagt -gtaagcgataggcaaactgcggttattcgcgtagaccgcttgatgatgtaaaataacagt -tttcatgtctttgcaattaccgcgtgtagtctgaggacttaaggccccttctcggatagt -gaaatagttcaggtccggctgttgacaggtgtcctgaccggaacgctttacagctcctga -ggagcgtagcgccctacgtccttcctgatccagccgcacccatagctagtaaacaagttg -actgcgagacacaaagtcaagggcctgggcaagcgcgggccggactgcttgtcgaatatt -gaagcgtactcgatccagatctgcgccattgagttaaaaggactatacgcctgtctccag -ccgattgcagaaaggacagatcctaatagaagaactggggctccattctctcaccgacac -cctcctattcctatcccactggccccattgctggtaacccaaacctctcacgacgattca -gcgctgcgattaaagagtggagtaaggttaaaatcggttggggcgagtagtgtttgatgc -aagatccattgattatcgccagtgggatggcggcaaaggtaccatacctcgagcttattt -gggtgcctcactcctcggggaaccatatagacttgcacttggcaaaatatggtcgggttc -aaccgaccttttatgcctggaccctctgctgagggcctcttgatcaaaatgcttgcgttg -cttataagttgccgcttgatgtccaggattattattccagcgtggaacgggaaatgatag -cttatctgttactcagggcgcctgaattagggttgccattacataaaggaacgacaaggt -gagcactgggagggcgcattcttataaccgggcatatgcgcggaacaccagcgttggatg -gcctacctccgcggaggtcggggtcgaaggcccccgtttcctggaacaataacactgtcc -cacttggtttgtaaagaacctccactcgtcatgacacggcctagcggtgttatcggcgac -gataatcatgggtctttacaagtgaatcaggcttaccaggcggaccaggaggaaacactc -cacgagtgagacctggatccggggggatggcatacatcccatacgctcactcagcatgca -cgttcctagcacatgctggaatcttgcaagctggacagagtgtattgtggcccttgagct -atacttctaacgccagatccatgtttgcaaacatagctccagtggcctattcgacggttt -agcctctgcaccatgttatggctcttataggtgttgggtcgacagtgggtgactagacgt -aaagagagtatgccgtggttgaaccgagacctaaaaagctcaagatcacgacggatatct -ttaacggataccagcaatcggctaagaccagtgtctactactcgtgatcggcaccggccg -cccgttatctggtgtagtagtcagccccgcatcacccgtgcctatgcgagccttgactct -atcagtctaagagcacacgtttagacctgagccaagcgtaggtcactatcgagtcaacct -taaccctagttgattaatgctcttgacgtgtatgattacaccgaccgcgaataaactctc -acgcgcgtgacggtttcctcgctagaagaagccatatactgtacacactgctgtgaccga -caagacttacctttggtttacgtacaggtggacagttgtttctcccctcgggtcgatagc -gtttggtcgtgattccaataagtgccagatgtcggaacaacatgtttccataaagacgcg -tgacatgatatggtttgctatgaagccactacgaaaatgccagtgcttctctttatattt -tgcagatgtggaagaagtgcatccattatagtccttttacgtacctggattctccgccag -cactgccactactttcttactatccgttggggggggacgctcaacagagcgcggactcag -gaatcagataacgaccggaacggggcatatgcccagcattcgttgttgtaaagcgattga -gtttgggctgctaaggggccggcagttcgttccgggatacccgcaaaatttatgcctttg -tccggtgattctaggtttagctcgatccacgaatacgacaatacaaggtttgcctcctct -ttcgtctatcacgcaagcttagagttgctaaaatttcgtagtccgtgcgttaaagctccc -tacatttctaagctgtacgacaccacactatcttgcaacaagagtcactctatttctcat -atccgaatgaatatcgtcctgtacacgggtggcaatctcggcaagatggccgagatgtgt -agcactgtcacgttcaaaagctcaaacttctgcccactgccagccctaacagaatctgga -gacatcataccagcgctgtcccatgtggcggagagttgcttcaatagaacgctatcaccg -tgggatcacatacgtcactactccgtgtgacactatttattattcctttacctccaatac -gcccgtccttgcatcgcattttgtagtaagacatcagacgacgttactgccacggaacca -taaaacgtgctgagcccacgtaaacgtcctatcggtgaggtccgctcatcgtacgactct -gaaatttggaaactaccagtttgttggcggtgaatatccaacctgtcggcatacacgctc -acgcgtatggttggtacatgcgaaaaactacccaagcaccgggcgcgcgagacactatga -gtttagtgatgatttacggagcacgttttttgaactcaacagaccgattgaatccttcaa -acagggttactcgttcgtgacaaccgattacagcattctgaacgtggtacgtgcacatag -cttggagagttgcgcgaatctctttcgtaccgtattctacctgatcgctagctttccggg -gtaacgacatcggcaatgatgagtagcagccagtatccatactggacaggtactccatat -aaaactcccgcttcaacacgcgagatttgaaccgcacagtgattgtgctgagtcctagtc -accatcccgatcttgtacacgggtaggggtgtaacgtgggcagatgatgcttacccttcc -gctggcttgtaaaggtgccccgcttcacttgatgagctgcgtcgacaagaccacccaacc -cgagccacgtggttccgacgactcaatgatttccagtatggtccaatcgagcaccttcca -ctcatgaaatttgagcactgttggggatcgagtatgtgttttgcagtgggtctgtagcat -ggaaagatgtaccctaacccggctctggcattatcaacggtggattgggtggttaaacag -cttcccttcggagacttaatggactaaaacgaatgaatcagacgccgagaacgagaactc -accatctggcccaggagccatatttttgattggtaaatcgctcgcatagtgcccgatacg -atgcgtacttgagcgtaaacggcgcagcctctaatcataggtgattgctaagtctacggt -catccccagtggctgacgaggaattaaagacctatttccgtacttgttgcgcgccttcaa -ttatccagcgtataccccgatggctactgctacatgtctaaagtgactagccagaaaaac -ctaaccctcgcccacgaggccttgatcatctcaatcatagagtaatgttcattgaattgc -acggctcttggtgtgcacattgagacaatttctaaaccgacaagtttaatggccgagctc -ctcctgcgtccagctggaccttcatgcaggcatgaaggtccatatatattgtcctcccat -agcccgccgaccgggtctgactcaactgtgttttcgctatcccaggctagcacttctatt -ctttgttacgtccagtcatagtgttactatagggtaattttagtcatagtagacggccgc -tttttcgtatggcccgagaccgtccaccggctacccaattaagtcacatccggatcttgg -gtctagatattcctatcgaaaatagtctcgccgcctcactgcgtagttcagggggcgtca -cacttgttcgcggcttttcctcatgggatctttacccgatggttgatgcaataaatgtct -acaccggactggcgtgtccgagacgactttatacacgtgtgacgagtagatcagatcgta -cgaatggtctgtctcacctatcccagtgggaggatggaaaacactcctgcctaccgggtc -gaattatttacgcgtgttacaatatgtaatttagaaaaagggattgctggtcgatgcgtc -tccaagggattttttatctaaaagcatccttttgggtgtactctgatcgcacgtcgcaga -cagcagtgggttttgacgcagtccgtaggcccacagactcgtttgttgtttattaatccc -aggggagcgttgaagccacacctattctgtagctgtttgaaaggtagctagcccggatat -tactcaaggtgactcccttcagaatcacacgtcgctggagtcgccacagggtggcatata -cgagtgatagagcaccttactttcgaggtagcggtacattagtgcaacgatgaacccact -atagtcttagtgatttcatgttttacttacgcgaaaacgtggggttttgtcaacacgtat -acgttgaatgcacatgcctcatcctaaactgatgcactgccacaagtctgaaagagcgac -agtctgcaacatagcggaaggttacgcccaagccagtggtgatcccccataagcttggag -ggactccccttagcgttggatgtctttgccccagcggcctcggtgtacgggttctccacc -ccactatggtttggaactatgaagaggtacggcaacctacccgaggcaccaaatcgtgaa -cctacgcctatatatacggatagcagggtatccattcttaccatgagctcgtaaaccact -ccgctgaattcgatgggctttggcgcacatcaccgtttctatcacagatctgtcaacgga -atctaacgctatttactcggcgcacacagatcggaaaacccaactgtggcgcgggacgga -ctccaggaatcgttacgcgttatcaccttcggctaagtcttgacgacattagagctatat -ggtattaatagtagctgtacatcaaatgataaaattatctgaattataagtgatagcgcc -cacataatgacacacacgttttagatagttagtagacgctcgagactttgcgagcaagaa -tggactgttaaccttaggggcgggttcccgcttacaaaagctgattcgctcctagagatc -tataagcgaagttcaacccctaattaccattgcataagaccgggagaacctgtgacacct -gttcccatgggattagcgaaggattgttgacttattgccgagtgacctagtattgattaa -aaatgcgacgtgaagtaaccaggccttcgatgggcgtctccgaatgggccagtaacaacc -atatacaaactacaagttaaacccgaaaccgtggctaccatgctcatctagtcgggttcc -cgatggaacattccacggtgagcggccttggatgtcggaccaccatttggcaggacaacc -gtcacgattcgcaacagcggttaaagcaagatggacgccaataatgatattcgaaagtgg -ggttaaaactggccgccgcttgatcggtcctacttagtagcgtctcagatgctagaacca -ttccgagtgcaatgtgtgctagcaacagtggaatcggtctataacggtttcatgatccct -tctccggccatccattgagaacgattctacattaatctgttaagcgaccgccaatcactg -attagctccagaaaacctaagattaccgtatataagaatcaacctgctaaagtaggaagg -cgcaggttgccgatgaacagatcaagttcatacaggagctcttaatccaccgtacccaag -acctagtcaagcgggatgtcgttaacaactttgtacagacatttgggagcattgcaccac -agactccaatgaataacgcatccgctaggaccggatatagactgaagaaagtatagtcag -ctgcctcctaaaggaggcagtagtaggagccaacctaacgtcgtggatacgcattactcg -gtagcgtggtaaacacacgaacccgatctcagaattttagcggcgagaagttccataaat -catatttctacttcgtgctccggcttcatctgatggtggtcattattctccatatgcatc -cagtagagcttgcgcttcacgcaaagttccaggatgctcacatatgtccatacggcgcta -ctacatttactcgaacacttggaagcccgccgtcgggtgtatgactagcctttgactgag -actcagcctcgcaattgtatgtaactagcgtatcccggcaaagtttataggtccatatta -acaacagatgggagtagagcagcagcttttattcggactggcacctcatcgctttcgctg -tctgcgggatcaccggtctctcttgaacgtgttagagcctgagggatctgagggcacaca -cggcacttgtatccggaaaacctatgtatagagcctcggttgccagtactagttcgtggt -ccatagcgcctgaggggattaaacgcctccggaaacacgcgagttgttcttgtgcacgta -aaaatcacttgatgttatcgtgttcggccccagtcgctgtctcacttcaaaccagcgccg -tatccattctatgcaattacgattatacttttgacccgatctacgaatcagttacccatg -aatgcgacctgcgataacatacagtgcgacctccagtcctgttcgattaaatcgacagag -ggtcccgaacgcatccaagcggcttgggtcccgctggcaccgctgggaagtaggtagaga -cgtattgaaggaagcgtgtacagattggtgattacggtgctcttataggaccagactcgg -gcgattctacggggtcgtccacactatggacccgagcatcgaatcagtatgtcaatctga -agggactaatcggggagacgctggactccgtcaggagtgaccagaagcagggatgcaaag -acatcccggacgtggatagactacaaatcggacactcagatacatacctctagaaaatac -tattccccgtttctattccgttccgcggccttttacgtttagcttggagcaaactatata -gccgaggagggtactacaacgcgacgcacttatatcccaatgaaagaataaaagctggta -agacaacgctagccactctacctgtgttgcactcgcgatattagggagacaggttacaat -cagctgacacatgactagctgacccggagcactcaattctgcccatcggggatctgatca -aattcgtcgtcgcattggacggcttccctgcgtagcgaaggaatggttaccgtacgcctc -ccaacacacggcggtcaaaatagtcttcagcaacagtaacacaaatctcttcctagtact -cttggtacacggccctagagttaattgtgtgcccccccatactctctacatgtgataaca -gcgacccactaggttgccaaacagagtcggaagtcctatcgagtccagggcgctgtaggg -cctacagactctccgatgtcctgtgttcttcgtaattgcaatattttcgggacccttctg -ctgttgtcagattcagtctgttccatcttctggaaatgttgaggaactcgaaaacgctaa -ctccctgatatttgtggccttacatcaaccgtcctcaggtgaagcccaatgctaaaggag -tgaaccagacgtgcaggaagaagctgttgaccctttaaagctctagttcttgtcaggtgt -tccgactcgctggcgagagtatgatccagtaagcggcggacctgcgaccacatgatgagg -tccctacaaaatcctatgctccctgcgaattacaactcacagagaacagcctaggctttc -ttagttattgatgcacattctactgacgaacgcagcattcgaactaaaccattggtaatg -taattgtgacacgtgggaatctatttaaagctgcaagaactccaccacgtgttcatccac -atcggtctctgtggaatggtccaggaccgtcccaatagggggaattgcgagacccaacta -atcgagtgattgaacatgggagcaattcccgaatagaaacttgcaacgcgcagtactacg -acgatggtagcaataacgacgcgctacttcagctcatgggtctaaattagggcgaacgat -tgcacctaatctgctggcttctctagattgtagatccacagggccaattaacagtgcaaa -gaatagcgtcatatgattagtttgaaaataatatacatgaaaatcgagcacccgcatcaa -taagctacgagagtctttggagagtgccaatacacctagcacatgctgtgcttatgttat -gaaaattcatacttgactaacgttagccaccagccgatggcgctgtcacaacgaccctgg -gttaccgtttagttctctaagaagggtcactctaccagcggggttaaatataccggccga -ctgtctcgagagtgggttgagataactcaatttggatcagcattaagtctagagcgattc -tcgcggagcgatctatgcgcactgacttactcttggaatgtcactcgcggcttacatgcc -tccttgttccgcggattgaatattttatgcggtagtcatgactttaatcgtttctacaga -aaagaccgtttgaaatggcagaggaaacaaccctgctggatcctccctaagtcacatccg -gacggacagattctacttaacctccaataaattgagaaaaatgcaaaaggatgccaatac -ctatagtacattttacgtttcccgtgtggttcgtgccaacccctggacggtggatgtccc -cggtgggtttttggaccgggcgaaattattggccaacccggaaacccaccgagagcctaa -gacgtgcatgatagttataagtttaatgggagccttaattgcaaccgatgggggcgggta -tttgtctcctacaccgacactatggttgttatttgcggcttcactcaactacaatctggt -ctgtagagagtaaccccgtattatcttcccttgcgccctgggtgcgttagcggaagtatg -gggattaaggggcgtgaacaatgcttctaagagcccggcgctaacggacggtgtcacaga -gtgtctaggcagagtccccactgtggaatacgcatgcgtagcgaaccgcgcgagtcagta -ggaacacattgggagcgattgttcctcctatcagccggcctagcaagcatgacgtcaaac -cttagtgagcagcccaagaaataacgctcggactgcaacagatctaagcgggaaatatat -cgatagcgaggaagccgagaggactaagcagagagacgaagaggtgagccggagtgattt -agccccagatggcaggtatagcttccgttcacaacgaattgaacagaaagtccggcgccc -ctggaatttcgaaacgtcaccaggttggccgtaaggttggtaactcaacaccctaagact -agcggtcttgcgtcatgcaagcggggcgtgtagcctgcacagacataatacggtagggag -gttaggcgtggggcgacacagtaagatctgatgaatccatgtacgttgcctccccgtcgt -acgcagttgatacttcgcatcatacttagcagaaagcatcatcgcgacctctcgtacaat -ggcaaaaaggagggcttcgtgcacggggtagagtcttccgcactctaatgaatcaacagt -cgaggttcatggccacaaatgtaacgacgcgatatgactcgttatgtctaggtcccactg -cggatctccttagaagcgaattgtataaatctcatagccccccccttagcaagtggtacg -tagcccacacgatttacgcagagtggggtaagacctcccccacgacttggggaacgcgcg -ctcaccggatagctactgccactgtagagccactagcgcatagtgtatccatagtgatgt -acgtagccaagcggggagtccttgtcttatgactagcgccatggggctatcaactgagaa -cggatgcacaaacggtccggccattcgcacagtccgccggtcactaatgaccggtctgca -tggggctgcaacgatcgcccaaggattggaaattgttaataagagtccagtggacaattt -agtaacgcgacgcgcatctctccccgcctaacaaggctcatttttagcttgacggcgcgt -ctcacgttgccgggctcagctcgagctcacgtgccgcgcggggttacctcaggtttgagg -aatgtgtcttgaacggttcgtaccggcttagcgtcgggtgctcctcagggttcccagcaa -cattcttaccgcaatccaatactgagggcgctaccaatctaacgtatcactgacccagta -aaactcggtattacgggggcgatatatgtgcttgcctcaacatagaactaccgccttgac -gttagaacgttaagttagagcacctgcccatctggattgcggcgataaaatcgattattg -tcacctggatgaggaatatggcggacaaacgatcgtgtggacctgcgccgcaaccgtggg -atgccgcagcagaatatctacagagcgtttgcaggagggccacgcggtatccacgtaagt -tcgcgcaatagcagcatcatctcgttcggggttgctgctctcagactgtcgatacgtccg -aataccagcaacccctcgaggctttgtaatatttacggagtcctaagggatttggtgccc -caagtatagttttgccgaagggggggcacagtgccgtgactacgattgggctattgggca -acccgaccccgtgcgcacacgtttgatctcagacagagggcaaaaagacgaagccacgag -ctcttcgaaatcaatctacatctatgctgggataagttctggtccacacctagatccgag -aatggaccgtggataacgagcaccgttacctttgaggcggcagcacttttaacaccgtaa -aagtaactctatagttgtcagcctttaaagattgcgttcccaatacggtacgcaccgtct -actaccgtcaacacaccgtagcttaggtccccccgcggcagcgcttccatcgtggagggc -tgtcccccatcctccgtagaactgcgtttaccggtctggggagactccctaaagaaacac -cacaggtgaggtggacgggaatcaagttaacgcattcacgtaattcactcctgttcactt -acataagagaagttctggttcgcgccttaagtgccatccatgaggcatacgatgcgatgg -ggaccacgcctgtagactagagaaacaccatcaagttgataaggagtgaaaattagtata -ggtctggcccgctcgtctgttgatagggcctttaaatgtaataggcctccgctctgaacg -agtccgtacttatctgttggatagtaagaacacggttctcagtccgcaaggtagtatatg -actacgcgtcgatggacctaaagatacgtttggcatcgtcctgagcagcttatgaaaatt -gctgcgcgtttcaggtggaagaatctgtgttattagtgcggggacggtagtttcgtaggg -aattatgggtagtacagatctatggacgggaggtgactgtcagcagactgtgcgcaggcg -ctggtggtataatcgctgggaccacgtactggcaagacgcatgcagcctggtgacatcca -caccgccctcgctcagataggacggacgtgcggcaataccctatctggctcctggacacg -gaaaaacagcagtttaaaaaaaagtgggttttgtgcattatcactttgtccgttcgacag -tttattgcacctcccagcatgctagtaaccccttgctctgatttgagatttattatcgta -acacggagtcgacgtgaagtttgatctgcggagaataggccgagaagcccctacgtaccc -tatgattcccgccatgtgcggtgtaaaattaaccactacttatggatttcatccaaatta -actgacgcattatatatacctcgagtgtgtaagccggccctcgcggtaccaaatttcggg -ctgcatacctagaagtgtatgtcatctaaacctaaggatcgggtagtgacgtgtaactaa -cacgtgggacaatgtctgtcctgagctacgtcttaagaagtatttagtcctacgtactca -ggcatcgatactccccgatgttgggtgcagtggggcgtccgggcggctaagatagtccag -ccactccttatgtgaatttatcaaaccgggaagaccagtgcataggccacgacctacgta -gcacctccgtacaaccttgctccagggcacgtctaaatgtcgtactgggttcggactcca -tgcctggcgactagcctggcggtccacatattttacaaatccgagacaacaaagcacttc -caaagtcgcgtggaccttaaggaaggcaacacaggggacatttagccactctaaagaccc -gcatactccagggcatacacgtaacgcgttctcttcactacggagtccgcgagcgaacga -ctaagggcgagggccgtagatctccttctcgtaaatataatcaaggttgccagcgcttat -cggtgccaccgatccgaagagcgtaaggacgttacagtgtaggatacatggtttgcgccc -gtccatacataatcgaccatcaaatcgaaagcgcgtatgcagtacctcgtactgcccaaa -ggacagagcgcattacgtcaccccgagcgtagcttaccatgttaagaccgtgctcatggt -ttccacccgtaagatgagcacagattgcttgctctaggtaacatagtaataagagaacat -tcatagacgttggaccacggttgaaagaccgcctcttaactactgaaaaacaatatttgt -ccttagtaaccaagatacattaatcaatcctccgctaatgaagttggtaggcaagccttc -agtagcgtaccgtggatttgtgtttactgcatttacaccgcggctgggtacgaagcggtg -tcgggcacgcacacgcgttcgatactgtaggtttcgttggggttgcttgttgccgctaac -gttcgggtcgcttacttctgcaggcttgattactgagttcattcgatcggttcccaactt -cggactattgcgccatcatcaatgtgggaaatcatttttaggaatacacctcgtacatgc -atatgaagtctgcgtgcaacagactttccatgatgcgagctgggcttcccgtcctgcggg -atgcaattggcccagtgcgcgaaggctgggctgattacggacatatctgctgtcctgaca -atcgatggtgtagagcgtaacaatcattccaatttttcgacacgggccagggctttaggc -cactacgcacctgggttaacactcggcgacccgcctaacactgggtggacattcccgagt -tataacgacacctcattttcaacaaccacaaaaaggcataggcaaccgaactaacctttt -gtttagttacttctggtatgttcatagtgtagggcaggtgatcggttaaacgcttgtacg -gaccccattctgtagcgcttttctgaacgataacactacgttgggagatacttgttgcgc -agtcgtgggttaaaagcgagccgacgataaactgcaataaggaaagcggacctgtcacct -cccgcgggtaaatctactcgctcgtttagaggccgtaaagcgacataacggtgtccagta -caagcccaactggtttagagaattcttctcctactacattcaccgtccgtatttcggcgc -gccctacggtattcgtggtgttctgagcatacatgggatggactatcacccctgcgcgta -aggactagcagttgtaagtcacgttaaggttgcgtaggcaactgttggagtgcgtccata -cttttcagcaccccgaatttgtacgcccaagttgtatcgtcgtgctgggacgagactcag -cctttaccctaagcgtttaggaaagcgatgttttaccgcagacgcatatgagagaacgtt -gcaagatcgatctttctcgcgcacgttctgtagctagacctgcagctaatctaactgccg -aactgttaggatgttttctctcctagacgcagggaccagttgtaagtaagggtttctcat -cgctaggctagggtgatggtaggtgataccaatggagcggcgcgttaaaagggttgtaat -ctcggtgatatgtgcaccatagattgttccgttattttccttggtcatttagatacgaga -taggggcgccccagtgccacgaggtccatcttatgctgttgcgtagctagagccctacaa -ctttctaagtaaaacctccatcgagtgcgcttggagaatttcatgtaccgtggcggtacc -ttacttgatgccgcgacccctatctaattaaagaccgcttcccgatgggcgaccttaatc -ttgctgcagtccaagttaacttcctctacgcccgatgacaaatttagccggagtaagttg -ttaacctagcctaatttgtaccgaattttgggttgtgagaagaccatttctacccgacga -tcatgggttcctctcaactatacctagtgtcggttaatgatttatggcaaacaatccccc -aatgtagcacccatatgggcgcaattatgtgaacttcaacactaactattggtttagtta -gtgatatttagcgcaaataagaggcatgggctgcgaaaagattgtgtttccgcaatgcta -gggaacctgacggcatacccgagatgatagttaggtgagcccgaatcatgcttgattaac -gtaacgtctatcactacccgtgggcttttgcgattgcgatgcgttactcaaggcatactc -aatggggctggtaataccgatatgggcctgtgtcttagcagcgacgtgtcgataatcgaa -gtacaatacgccagtctacgttagggctttgctaacacatcacccacagttgaagcatca -cgcacagccctcattaggttgtcgtcaacctggaactcctagcaggctgggcagtatggc -atccttcacgaacaatcttaagagacgcacttgacacaggttgcgacagtcaggatgggg -agcttctgaaaatctctacataaggtggcctcgtgatatcgtattaactcggagagcggt -tggactaatgagtgagtccaatcagtgatcggtttttaatgccgggaccacggcttgcgg -ctgcgcgatctatttccctcgcgttacacaaggggtaggcataagcaacaagcccaaaaa -aatcctcacgcaggcgtcaggtacagactttttccctcagcaacgcaaggtacacgtgta -gtcgaatgtggtcataaccagcagctgtcttgcgcaagggcggtgctacacgcatgtttc -actccacagtgacatgatgcccgctcttcggttgttctatgcagcgcgctggtcgcacct -taagcccagttcaatccatgcaattcaccgttgcagcactagatcatataccgcatactt -cgtcagagcacaaccagatcacttacaacgaccgcaagctaagcatccctatcgtgcgta -tgtgtcgaggcgcaagtactatggtcggatgaggaggtgtgccctcaggacacacagtat -cgctcctcattcgacgacgcaagtcgtctagttctatgataggcgaagtgagtactgcgg -atcactacgcccaagacttggagtgacgacgccggctactagcgatggcagttcagaatg -gtgccttccaagggacactcggtcctaatcaggcacacgcgtctcaaagccactactcat -tcatgagctcgctggcgtggcagtggtcttatgcggctagcgaccagaatttctactagc -cttaggcgtacaagcgttgggcaggtcgcgagtaacctctacactctatcatggaccaca -cccatccatgttttaaacacagaggtcttaagaatagcgatggtacggttgagaacgtgg -gagttttttgctctacagcaagcacgttgcagtgaggagagctttaatcctaaatcataa -gtgatactggtgccgagcatctttcggacgaatatgaagacccaacgctaatgtgcctgt -tcgtcggcacattctcttgggacacccgctacccacatgtatgtgaattgggcccgtctt -gattgtgtcgtagatgccaacggttcaatcagccccgacgacgagaagcatttctacgtc -tccagccttgcaacggtttctagagtcttgctgttcgtaaattgaagaatctactgtgca -gactttatcgacccacgatacgctaatccgagcgcgactcggtctcggaaaattatccga -gggagctcgttcaatgcggcggaggctctgagtgaagtttaaggctgagatagtcacacg -cattcgggtccctacaccttgtttcgaatgtccaacgcgaactctgggcaacatcgcgag -actttgaaagatgagggggctgtgacagggcctttgtaataggggtactttaagggcggc -acaatgtgatacctatgtattcctatggtctgggtcagttatttaaattgaccggacaag -aatctgactcgtagactgctgtatagatcacgattagagtttggcaatggttctgaaagt -gatcatgtctaacgtaccggatgacactagtaataactgcagcagcgtgacgatgtaggg -gacttactctgtttacgggtctcgtctgccataaacatcctgctccggcagcggtcaaag -acgtcctattatcgcactcgggcagaaactccagtcatggtgaggcacaccaacagtgtg -gggtcggttcgtggttaccaagccaccatttttcggcccaaactcatcttgaatagtcgt -cctcattcatttgatcgatcgtcattacaaggttcataaatgcccagcgccgcgtctccg -gccagatggaagagctacgtgcaggcggtaagcaattgacgctatagccctatatacgta -tgtgggactcaaggcgacatgctataccgactggtatcgaggcgaacgacaattgctatg -ggtcatcacctcatatttagattcagcccctggtggcgtcgctttgaccaactagttaag -gttgaggagttcgctctcaaacctgataacgagctaggccatgtaccaaccttggctgtg -caccgacgagctgaaggaccgagctgctaggaggtcgcagtatcgattgctctttcagcc -ttctaaaggtgcgacagaaacttgaacctctcggtactctaacactaggggtacgagtgg -ataactcgattacattagtcctggtacaaaagtcctccttccgggcgccttaagctgctt -taaagctatgtccgcttacgagaaaggggcgcacggtccgatctacttacagactgtcaa -aacggtgtagtcggaatgggctcgaacgacgctaatagatacacgcgcattggcatggta -cttcaccaatactatatctttaaagaagggcggcgagcctatttacccacttcgattagg -ggataacacaggtgattgagttttctggaaccgtcatctaatagccgaagcagtccacat -tacagtccattgcatttaaccactaggaatcgcgccaatcttgcgcttatgttgtttagg -aggggccacgctagatcttgcctatcctgagttatatgagggacgatcctacatgtatcc -tcaaagttcgcgctcatccttagattctaggagatggattactaagtgtgtgtccatagt -ttaacgcaacacataatgttcgtgtcgtgatgcgtatccttggtcccgacctactactct -agacctatacgatagcgttcaaagctccaccatcgatgcaacgtggtcgtaccagtactt -atgaaaccttttttcgcagttcgaaccgcctggaaagaagcatacgcaatatcgtgtgat -cagcacgcagggtatcagttaaagagcccaagacttagtctatcgagcagaagaccgatt -agggtatacaccatgtcagaatcgcctccagactagtggctgatatcgtacgttacacga -ggtcgctcttgtcgagtgctcgaacgatctatcccctagatcacgcgtcccagtcggagt -atcccgcctcggatcgaaatgacggctaaaaggtgtaatcacgcgcgtaattcctgcaag -ctctctgagttctgctcgctggttcgaatgagcctgcaggcagtacctacaattcctgct -cggaggacctgttgcgagggatgcccccatacactgctgagggtaagtagctgaagtata -aaggcggcaatcttctgacaaacacccttcttccccttggttaaccagacaagctggatt -tatcacccctcccggggacactgccctgttttatatcgcggacttgcatcagtcgcagta -gtccgcgtagatgtggggacccctcccaccttacatggcaacgcgtgatggagtagtatc -tcgcgtcagtgagcgagttcgcatatttgtagggcgtcataacaaatgtaaatgctagta -agtgaaagatgtcaacatgcgggctcggttttgcgctccaggtatatcctgcatctgatg -aacttttcaatgaccgtgtctggcgcagggctgtcgttttgcaggaggggatgcccagcg -catctcgagaatcatcggctaatcaacgcgaaaaagttaatcttctatcagtcagcgggc -tataactccttgaccaacagaggatcgcaggtatttgtactgtggagaaacaatcgtaag -atggccggccatgaggcaaggattgtaatataccggggtggttatcgaagaagtaaagta -aggcctaccttctggggagcaatggggttctcagacgctaactcttcgttcacctacaac -acgattggacttagagagggacctacggacccgaaattcacataggggggttccccgata -agccaaggcaggatagagtctgaatccatggacttccatcaaacccctgtggtggttccg -ttcaagtcagggagtatcacgactcacttactgagtcacgtcctcaatatgactgaagtc -gataagacgaaggcttaatttacggaacccccgtttagtaatcaagaaacgcattggcgc -gggtctaacgtaagctcttcacaccgccgtctacgaaccagttcccggcgttgcatggtg -atcgccgggttcccttggagggagtaccttgtgccctgcccgtatggtcgttcctaaata -gcgaccaagtattagttgactgttgcgacttaaaatccaacccataagcatatatcgtgc -acccagggactggagggcctcagccccttctttcgaaacttaaaccgtcttcttatgggt -gaaatgtaagtttctacgtctggaataaacggttcgggccaccatgaagcgtctcgtgtg -atagagattcgcaatgagtctgggaagtagaatcacaccaagttagaaaaattctccaac -tcagctaccgcgacccatccgttggggtaacgcaagggtattagccgctcagaggccaaa -gattagagaaacgcgacgcctgcggctgtcttgtgactactccgaccatgccgtcagctg -gagaccctacggtgggtagttgggtagagcgccgatagctaattctcgagtggaactgcg -gaatgtgtcatatgaggtagcgatattgggctatcacaagtacgtgagcctgtgaaaacg -gacgactcactagcaagtaagcacggccctggcgtggcggcggtacgcgtacaaacctcc -actgcttggggaataaggtgcgaacaccgtgatccattgaggcgtggaggcatagggccg -gagtgttcccgtaccaattacgagctaagaccgcctgtccagccctggtgattacgtagt -aagctcggtacaagcctgtaattagcttccgggtggcggacattcgttcatattgccagt -cacggcagaggccgcgctagctaggccagccccgatgaaacacgtgctagttttctatcg -tgcctaaggatcgatggctatcgcggcattgttaacttatcaagcagccgaatcggcgat -agtggacacaacatgaacacgctaagctctgagtgccagactctaccgcggcgaatccta -ggccctgctggggctcccccacagagcagtaatggtcttctgcggccgcgtgcattcaat -ccgcgctggtcaatgcgttccatcgtgccttaagcggggcagtgtgataagacaattttg -ggggtttcatggagatatcttcagaaacctcgcgaggtgcgtttagataccacacgcgcg -gcggtcttttcacacaattcagtccgcggccggtaacctaactttgcttatcaatacata -atcctcccaaatgtacggtttcgaaggagatacctctcccctgttagactctaggttttg -cagtgctacctagagatcacaccagcattaaaccaaggtatcagcatcgtaacatgggga -ggcacgcgcttggcggtaagtacccgccacgattactctgtttcatagcgccaacgcccg -tatagtgtgcttttaacgtcctcagaaatcaccgactgatcagtatggcgacatcggagg -gaggttggcgaaatgttgcttttatagatcgttaaaacctatcgttggcgcgtatcgcct -ccatagaacttcttactcacctagtctgatcccgggactggtcgaaatggagacgacgta -gcttgagatctggatcgatcgactgatatcacacttaaatatgatcagattttactacgg -tcgatctccatatatctatcccagcacttgatacctcctgtccagccgactgcttggacc -ctagcacgcgatattagagggcgcgttgtagctcaaagtaaccagtgggacgacccgagt -gaggttaccgctggcacgtaagggtgacatgactcccccagaaacactgaatttgtctag -cagtacatcataatctgagaacatttagcgaagacgagaacaagctgtctcaatggacgt -aggacgagaggaatgtgacggttataatacgcgacctctatgtgtctattaaatccctac -gggccgtagcgagcaagctcgggatctttagtagcggatcgggtcttctacatggcgcac -caggtttgatatttcgagctagagttgctctaaagaaacgctcttgcttgggaaagtgac -ctggggtacgacgaagacacgggcctggttgcataccgtctcaccttcattcatcgagcg -cgatagcgtagcattatccagcacgggaggcacctggcatgggcgtgaaatacaatccga -agagcagcatgaaaaaaaaatgagcggtcgtgacccgagggtgcgtccctccgaacgttg -ttcgttgccaaggtaagggacccacgccaagcatggttagggaagcgtgaccgacacaac -atagaggacaatctcaatatgccttctgggtatcacctgggtgtgtagcagtgctactgt -tacaagtcgcgccatctgcgacgtcgttcgtccctattccctcctaaaggccccgactct -gaaaaaacctacaatcctagacacgttgtggcccagcacaacgcgtctgctatacagcaa -gtcattcgtcgaatggggcaacacgtataagaaggccgtgattccaagggcacatggcgg -gtctctcaatgatctacgacgtgacgcaaacggggaaaggctggaccgttgtccggtagg -cgatccatcataaaaggcgtctggttctgtggttattgtgtacacgtttgaaatatctga -ctgtagaacgatggaggtctctacgtagctctgcacgattccctcgggctcacgttcatg -gcaaggcgacatcacatgaacagggttgggtaacagccatctctgaagcattcccccagc -tcgcacggctcctttacccgctactacgtcatgcactggctagagacccaccaactattg -tgcaagtaaagaccataaaattttggagctgtagcgaaagcgagttgctgcgatggttag -accaacattgggtcaaactattctaatccggctaggctcattaagcatcgcagattgccg -atactaatgctaacgagcgatgaatgggcctcttcgagacgcgtccgtacctgcgaatat -caaccatgaaccctaatctggaaagtttgatgccgctgtgatagccactcctgtgtcgcg -acggcaacgtaatgtacgtgcttggtttgagctctaaacataccttcgttagagttggtc -agaggaggatattgtcatagcgctacaaccagcgccaaaggatagctccgtggctggtcg -gacgcgctagaatctaggcagaacacgtgcggcttcttgcactctgacgctcacgcattg -gactttggcggtcatctggttctcataagtccgaatcacagtagataaagatccgcgtga -taagatatgcctacgtacttaagtagcaccctctgttcttggcgatcctatcattgcggg -ttctatacatatgttgatccaaccatcggccgccgggccgtcgtaggatctaatggccaa -cacatcaatcaaatcactcgcgaggacgcggaggtcccgtccataggtgtgtatgttggt -acgcagcacctaatctggtctggtcaagcaactggcttgttccagtatataatctcagat -agactgccctgaaagtagacaatgataaatatgtaagagcggggaaatcccatcgtcgac -tgtgttgcggacgattagcttgcaaagagaggtgtgagcggctgtcttaaattatggact -cctcgcttccgtcgaacgtctgactagagaatctcttggaggtacacacgaacgtaggtt -cctcacagtatgtggcatggacaccggacaggtcacttgtatttatccattgaacgtgat -atagtgtgtcgagtcgttatacaatacatactccaagcgtactccctttgggtaacgggt -ttgggtacatattaccatggtgtggaccgatttgtaaccctataagcgttccgggtactt -taccatcgcggatgcgatcgcggatctacctatgtcagcgacatactgcccgtccaaaag -tgattcagtatgtgttaatcagcgtattgcatcatcagtcattgagagtggaccgttctc -gaagagcaagattcctgaccctttccactgggccatccagtacacccaattatatgcaga -cgaaaagctccatacatatataatacctcttcacaccgtgtgcagagaccaacttgatcg -atattgtactcactcgtacggaataattagaagggcccgactgtcgagaccgttagcatt -tgcattatcgcggtgagcgtatagaaacaattataggctttgctgcgatcgcagtgtctg -ggacagagtctcctagacctagatcagcttaaaacaatgattgtaggtaatgtgcgttgc -cgtcgggccacacctttcagttcggttacaccgactcagaattatcttctatcgggaccc -tcaaactgataatggggtcaaatggtggtggtcccgtctttctgcagtgctttgtgagca -gaatagctctcgagcgctccagtacgcctgagtatggctgcgcccaccaattggcgccgt -taatgcgtggcgacacaagagaaccagtccacataagttatgcccttaagaagatcaccg -ggtttgcgccatcctgattcaggtaacggacgtccgtgtacacgaaccatactaatgagt -tctacatcgctccaacgcgtcttgaaccctatcgacaattacaaatgcaaagtattagtt -gggtcagtgcgtagcttatccaagggctgcagtaagtcgtccccatgacagacggacggc -ataagggccaaaaattccgccgcattagctgttaataatgccatccagaaccggacagtc -tcgatatataacttacggctgcgaactggcccttatgaattacatccacgtgctagttaa -caggacggatcgcccggactaattgccgatcggccgtcccattctccgtacgatgtactg -aacccaggcgtgtgaatgggtatgtcaacgtttacccatcgaccctgcctatagacctga -gtttcattccagttgagcgtagaatgggatgagcagctttgcttgaggtgcgatattcgg -gagacctggtcagcaggattgatggttttatagtgctggattccctgctgaaattctctc -ttctgccgcatgtgaagaaggccttgcttcccttccacatgattgtaagatttcctaagg -cctcccagctgcgtggaactgtgagtcaattaaacctctttcctttataaattacccagt -ctcaggtatttccttatcgtggtatgagaacggattaatacagttgtgtaagccagctcc -ttataataaatctctctccctctctctgcctctcatctatgtatgtatgtatctgtcatc -tatctcctataagttgtttcttttgagaactctgactaacatagacattaaataaacaat -ttgtatactgaataattatataactactgcttgggggtgtggttgtgttgtgtggtgagg -gggtgtagaaatgtctaggggttgatctagcctaacaccttccagatgaccctaggttct -ggcctttccccaaagagtcctggtctactcagtcatctcccacaatttggccatcttctt -gggtgctcaggacaataaatgacccagccagatcctctagagtcgacctgcaggcatgca -agcttggcgtaatcatggtcatagctgtttgctgtgtgaaattgttatgcgctcacattg -cacacaacatacgagccggaagcatacagtgtagagcctggggtggcgtatgagtgagct -aactcacattaatttgcgtttgcgctcactgccgcttttcagtcggggaaactgtcgtga -cagctgcatttaatgatcggtcatcgcgcggggagatgcggttttgcgtattgtgccgca -ggtggttatttctctcaacagtgagacgggcacagctgattgccctcaccggctgggctg -agagagttgcagcaagcgtacacgcggggtttgcccagcagggcgaaattcctgttgtga -tgtggattccgaatcgggcaaatgccctataaatcaaagcatagtccgagatagggtttg -agtgtgttcagttttgggacagaggtcactataaagaacgttgactgcacgtcaagggcg -agaaacgtcatcaggcgattgcactacgtgacatcaccaatcagttttttgggtcaggtg -cgtgagacttatcggaactaaaggagcccgttttagcttcggggaaacgcgaagtgtcgg -aagaaggaaaacgaagagcgggtatcgcggaatgtacgtgctgagcgaaaccaacgcgcg -ttacggtcaagcgtgtatgtgttttggaggtaagtttccctgataaagcggaacaggggc -tatgtgtttacgggggtcgcatggagtacaagggggcaggacaagatttgcgcgacatgg -ttttttttaaattgcgggagatggttggctgtcccgatgagattagaacctgcttttgtt -ttagtttcggttcggtcagttcggactttggattggttttgttgggtatttaagaagggt -gtttgtgggatgtttttttttgggtcgagcatcactggactatgtgtgtcctgcgtatcg -tggcggggcctcgaacttagaggtaggacgattccgatgcaacaaccttatgctttactc -caagcgatggtgaggcaaccttgtacagaagtgcaaccgccattgtggctgggtcggccc -aaggaacggatattctcctgggtactgacaacccaggtctactgggttctatcgacggta -cagtcaagcctcgagtttactgtctcccacttaagtccgctctgggcattaatgagtctg -tggggtatcaagggtcacccaacgaagtagctagtcccgttgtacatttcgcgcgttacc -tagggagcacggtgagctacgagggtgtagctgcacacagcacggtatataggaacgcaa -acatagtagtatgtccctatgcataacagggagaagtgactctccgacggtcatttatcc -cagactctcctcgctccctactaattgttctatatacaatcggagtaaattggtacctct -ctgacaacataggggggttgaagtttatgccacgcagtctgttacacttttgccggggaa -ttgtgtagaccatgactcaaggtggtggtgcgagttactctactatctactgcttctcat -tacacatctcttggcttacaggccccagttatcagttacgaggctgccttaagagaaggc -caacattcatgatcagatccccgcacacgggatgatagtctgagatctgccggacgctcg -tctgctacaagtgtgggttgagcatagggtaagcctaagggctcgtaatggtacttcgcg -atgtttattcgatatgagctacaaggtataacgttgccttcaccatttcggctcacgact -cttacctcgcctcaccgctagacaccctgaccaaccgttagggtactgacaaccgatcag -gcgactcctaaaggacctctcagcctcccagacatgaggtccgctcgcatgctgttcagc -gagtccatatcaggaacagcaacgagacaaggcgagagcaacactagacttggagacata -ccaccacatctgtttactcttgattcaggaagacctgtggtaactgggcgacgcgtgcgt -ttatccgtcttccaatctccacagaatctgctccgtcagagggatgtatgactaacgcac -tgattttttgttcagtaaccgggcgcggattacggggcgtattccaggggtacattcaac -gggaccccaaggaaaagttcggggtatttaaggaggcggtccaactctcggacttattgg -gcgacgtaatgataagcacacctctgctcatacagtgcacgaagaccggcattgttcaaa -cacagccgtccattccgcgactattgtcctgtgtacaggtgcctcgggtcccaaatccag -cttatacccaaaactctcccggacttttttcgtccttgcaggaggaaaggacctgactcc -acgtttattaggggatcgttgggcccggacaatcgtgtattcaacataataaatgggttc -tagtttaaagccggaggtgacggtggattatggactagaggtaaacatttccggcgtagg -tcagaaagaatatagaccctaaatttacgcaaaacgtgcacacgattgtaggtggagtcg -tattaggaactcagggcgggtggaacgagtgtgttttaggcttaccatgaaccgtcgcgc -ggaaaatgatgacagaatggacggtgcttttttgcggggccaacaaaactcgcttatgtt -cgccattggatgtaagtaccgctatactacgcagcgaccccttacgacattgttgatttc -caatgctttcatggaggtttgtgagcggtcgctaggaatcttcctccagcgagtagccag -ctggccactagttagcaaccagagttcatatgagctccagtaacacgaaggtacataagg -aacgcaagcaagttcgggactacatagaacttggggtgcgtctaagccggggatcttaat -agacaagaggccatcgtatgggtcaaatttcatgtttcgtcagatgccctatccgtggac -acctaaatcaagcatcatatcctaatgctatttgctctcgctttggtatgattcatatgc -agccctgtccgtaaaatgacgggccagaggatacctgtcactacctatttcagattacca -ctgttcgaaaccttgctctgaaatcgatcttcgtagagtgttcgttaatcgccgcgtgaa -ttatataatcccccaaggagtacccttcgtcggtgcagtctacgtaactttcccaggtta -gcccccatctaatcgacgtacgtgagggggctacgggagatcgcacaagccgaagcattt -ctgctctgtttcgttacaactggaatcctcatcacgcagcgctaccggatgccatcccta -gtcatccccgcgagtttaaccagtcgagatggctcgaaccgtgtttcgaaggctggctat -tgaaccagttataaagcaggggcgactgtccgtcggacttcgatataaactatcacaaat -ttggcataagtatatacagccaagaaggctcagataggcacacgcacccaaatagtgacc -gcctacgttcaggttgcccggcaacgaagcgcgcttgctccattaatccactatcagagg -taacgaccctctttcaccgtagctgcccccatgtcggggcaacagtttcccaccgataac -gtgttctgtctagtacgtgaaacggattggctaacccctgtcagcctaattagatgacat -cctcgtacctctagagaaggtgaggagaccgctggaacataagtaaacagtttaccaaac -aatgatcatacatataacagaaaggagagatgcccggtggggtcgaagatacagctgtgg -ttgatatctgcaacggccaggctgtcatgaagatcacgcgcacgaagctatttatgaaag -atacccatgccggggcggcagcggtacctagtcctaaataccgacacgtagcaacgcgtc -ccctgaggcccaccctagtcaagggaacttaaaatatacgctgactttgtctaccaatca -tcttcgattaacaccggcatactaagtctaccctggacccagcaatgagaaaaacttttc -cgaatgatcattgtgagtttacagagattaggaacaccaaaaaaatttacgtacgtccgg -ttacccctgtgtgacccttggccacaaaccgtaaggtgcagcatgggcgttgagtccagg -gtttgtgacaccagacatgaataccctgtggcgggttgttagcatgttattcggatccga -tttcagagggtccgttgataccgggcagtacaagacaccccacaccactgaggtggtacg -tccgccagtctgctccctacgttatgcaaaattccttccccggcgttcttgcctgtacta -ctcctaaagggccgtgaaatccgacgttgctgctccgtctcgggacacccacacaacagc -tacggaaaatcgtgaggatgcacaagggtcttgcattgttgatctatacagctcttcgca -cggtgtgctactgttttccacaatgggcaaggggcgtgtaggtcttatgtaggagtaccc -cgtgatctggctataccttgctatctattccaccaaaatagccggggtcttatagtttta -ttccggatctatcaagtgacaagtccatggggtaaacggtaagtttcgtacgctgggcgg -tgatccccgcttataaacgagcaaaccgccaaagcaaccattgccgggcttccagctagt -aaatgttgggtgtcaatcgtgcatgtgactacttacccacaaagggacgcttgaagcgcc -tgaactcgtcacgtcatgcctagctcccggttgaaacacgaagacgcgtgaacctatctt -tgcttactattcgcctcctttaagaggcttcttgatgtggctctgcgacatggacagtag -tagttgtacctgggtatgttagtgaaattacaagtacctcaaaaacgaattacgtgtata -gagattatgtcactccgtcac ->ctgB -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGAT -ACATGCTAGCTACGTGCATGCTCGACATGCATCATCAGCCTGATGCTGATACATGCTAGCTACGTGCATGCTCGACATG - - - diff --git a/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff b/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff new file mode 100644 index 000000000..245069001 --- /dev/null +++ b/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff @@ -0,0 +1,14 @@ +##gff-version 3 +chr1 . gene 1000 9000 . + . ID=gene00001 +#chr1 . mRNA 1300 9000 . + . ID=mRNA00001;Parent=gene00001 +#chr1 . five_prime_UTR 3000 3300 . + . Parent=mRNA00001 +#chr1 . CDS 5000 5500 . + 1 ID=cds00001;Parent=mRNA00001 +#chr1 . three_prime_UTR 7601 8000 . + . Parent=mRNA00001 +chr1 . mRNA 1300 9000 . + . ID=mRNA00003;Parent=gene00001 +chr1 . five_prime_UTR 1300 1500 . + . Parent=mRNA00003 +chr1 . five_prime_UTR 3000 3300 . + . Parent=mRNA00003 +chr1 . CDS 3301 3902 . + 0 ID=cds00003;Parent=mRNA00003 +chr1 . CDS 5000 5500 . + 1 ID=cds00003;Parent=mRNA00003 +chr1 . CDS 7000 7600 . + 2 ID=cds00003;Parent=mRNA00003 +chr1 . three_prime_UTR 7601 8000 . + . Parent=mRNA00003 +chr1 . three_prime_UTR 8501 8900 . + . Parent=mRNA00003 \ No newline at end of file diff --git a/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.json b/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.json new file mode 100644 index 000000000..ceb7b7edb --- /dev/null +++ b/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.json @@ -0,0 +1,73 @@ +{ + "_id": "675ad4e6a5abb3a5087c0652", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "children": { + "675ad4e6a5abb3a5087c0651": { + "_id": "675ad4e6a5abb3a5087c0651", + "refSeq": "chr1", + "type": "mRNA", + "min": 1299, + "max": 9000, + "strand": 1, + "children": { + "675ad4e6a5abb3a5087c0649": { + "_id": "675ad4e6a5abb3a5087c0649", + "refSeq": "chr1", + "type": "CDS", + "min": 3300, + "max": 7600, + "strand": 1, + "attributes": { + "gff_id": ["cds00003"] + } + }, + "675ad4e6a5abb3a5087c064a": { + "_id": "675ad4e6a5abb3a5087c064a", + "refSeq": "chr1", + "type": "exon", + "min": 1299, + "max": 1500, + "strand": 1 + }, + "675ad4e6a5abb3a5087c064e": { + "_id": "675ad4e6a5abb3a5087c064e", + "refSeq": "chr1", + "type": "exon", + "min": 2999, + "max": 3902, + "strand": 1 + }, + "675ad4e6a5abb3a5087c064f": { + "_id": "675ad4e6a5abb3a5087c064f", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1 + }, + "675ad4e6a5abb3a5087c0650": { + "_id": "675ad4e6a5abb3a5087c0650", + "refSeq": "chr1", + "type": "exon", + "min": 6999, + "max": 8000, + "strand": 1 + }, + "675ad4e6a5abb3a5087c064d": { + "_id": "675ad4e6a5abb3a5087c064d", + "refSeq": "chr1", + "type": "exon", + "min": 8500, + "max": 8900, + "strand": 1 + } + }, + "attributes": { "gff_id": ["mRNA00003"] } + } + }, + "attributes": { "gff_id": ["gene00001"] } +} diff --git a/packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.gff b/packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.gff new file mode 100644 index 000000000..7d469cc05 --- /dev/null +++ b/packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.gff @@ -0,0 +1,6 @@ +##gff-version 3 +chr1 . gene 1000 9000 . + . ID=gene00001 +chr1 . mRNA 1300 9000 . + . ID=mRNA00001;Parent=gene00001 +chr1 . five_prime_UTR 3000 3300 . + . Parent=mRNA00001 +chr1 . CDS 5000 5500 . + 1 ID=cds00001;Parent=mRNA00001 +chr1 . three_prime_UTR 7601 8000 . + . Parent=mRNA00001 diff --git a/packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.json b/packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.json new file mode 100644 index 000000000..505692dde --- /dev/null +++ b/packages/apollo-shared/test_data/onecds_without_exon_spliced_utr.json @@ -0,0 +1,55 @@ +{ + "_id": "675af35f5758c90ab1d55838", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "children": { + "675af35f5758c90ab1d55837": { + "_id": "675af35f5758c90ab1d55837", + "refSeq": "chr1", + "type": "mRNA", + "min": 1299, + "max": 9000, + "strand": 1, + "children": { + "675af35f5758c90ab1d55833": { + "_id": "675af35f5758c90ab1d55833", + "refSeq": "chr1", + "type": "CDS", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { "gff_id": ["cds00001"] } + }, + "675af35f5758c90ab1d55834": { + "_id": "675af35f5758c90ab1d55834", + "refSeq": "chr1", + "type": "exon", + "min": 2999, + "max": 3300, + "strand": 1 + }, + "675af35f5758c90ab1d55836": { + "_id": "675af35f5758c90ab1d55836", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1 + }, + "675af35f5758c90ab1d55835": { + "_id": "675af35f5758c90ab1d55835", + "refSeq": "chr1", + "type": "exon", + "min": 7600, + "max": 8000, + "strand": 1 + } + }, + "attributes": { "gff_id": ["mRNA00001"] } + } + }, + "attributes": { "gff_id": ["gene00001"] } +} From 00d018c1a3783322edf84923234c5dd43c577e30 Mon Sep 17 00:00:00 2001 From: dariober Date: Wed, 22 Jan 2025 14:58:07 +0000 Subject: [PATCH 09/10] Fix tests --- packages/jbrowse-plugin-apollo/cypress/e2e/addAssembly.cy.ts | 2 +- packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/jbrowse-plugin-apollo/cypress/e2e/addAssembly.cy.ts b/packages/jbrowse-plugin-apollo/cypress/e2e/addAssembly.cy.ts index 8de0ac0ab..058e53236 100644 --- a/packages/jbrowse-plugin-apollo/cypress/e2e/addAssembly.cy.ts +++ b/packages/jbrowse-plugin-apollo/cypress/e2e/addAssembly.cy.ts @@ -15,7 +15,7 @@ describe('Add Assembly', () => { cy.intercept('/changes').as('changes') cy.contains('Submit').click() - cy.contains('is being added', { timeout: 10_000 }) + cy.contains('added successfully', { timeout: 10_000 }) cy.wait('@changes').its('response.statusCode').should('match', /2../) }) diff --git a/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts b/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts index 1db38ee35..b7b7cda70 100644 --- a/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts +++ b/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts @@ -197,7 +197,7 @@ describe('Different ways of editing features', () => { ) }) - it('Can add child feature via table editor', () => { + it.only('Can add child feature via table editor', () => { cy.addAssemblyFromGff('onegene.fasta.gff3', 'test_data/onegene.fasta.gff3') cy.selectAssemblyToView('onegene.fasta.gff3') cy.searchFeatures('gx1', 1) @@ -239,7 +239,7 @@ describe('Different ways of editing features', () => { cy.reload() // Ideally, you shouldn't need to reload to see the change? cy.get('tbody', { timeout: 60_000 }).within(() => { cy.get('input[value="start_codon"]').should('have.length', 1) - cy.get('input[value="1"]').should('have.length', 4) + cy.get('input[value="1"]').should('have.length', 5) cy.get('input[value="3"]').should('have.length', 1) }) }) From cb6d69171abf00c7b7e270ff9d6bf1e6f3d1b535 Mon Sep 17 00:00:00 2001 From: Garrett Stevens Date: Wed, 22 Jan 2025 21:54:45 +0000 Subject: [PATCH 10/10] Re-enable skipped tests --- .../apollo-shared/test_data/cds_without_exon_spliced_utr.gff | 2 +- packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff b/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff index 245069001..4e29b0854 100644 --- a/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff +++ b/packages/apollo-shared/test_data/cds_without_exon_spliced_utr.gff @@ -11,4 +11,4 @@ chr1 . CDS 3301 3902 . + 0 ID=cds00003;Parent=mRNA00003 chr1 . CDS 5000 5500 . + 1 ID=cds00003;Parent=mRNA00003 chr1 . CDS 7000 7600 . + 2 ID=cds00003;Parent=mRNA00003 chr1 . three_prime_UTR 7601 8000 . + . Parent=mRNA00003 -chr1 . three_prime_UTR 8501 8900 . + . Parent=mRNA00003 \ No newline at end of file +chr1 . three_prime_UTR 8501 8900 . + . Parent=mRNA00003 diff --git a/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts b/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts index b7b7cda70..e8ea491f0 100644 --- a/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts +++ b/packages/jbrowse-plugin-apollo/cypress/e2e/editFeature.cy.ts @@ -197,7 +197,7 @@ describe('Different ways of editing features', () => { ) }) - it.only('Can add child feature via table editor', () => { + it('Can add child feature via table editor', () => { cy.addAssemblyFromGff('onegene.fasta.gff3', 'test_data/onegene.fasta.gff3') cy.selectAssemblyToView('onegene.fasta.gff3') cy.searchFeatures('gx1', 1)