From b12e0e2c9c7312e6bdcc482a0f82c258fedfe6d6 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 11 Sep 2023 10:00:40 -0400 Subject: [PATCH] build: update pydantic to v2 and remove `GeneNormalizer` class (#189) - Update to pydantic v2 - Remove GeneNormalizer class (#187) - Temporarily remove `get_mapped_mane_data` since we do not have a Chromosome Location in VRS 2.0-alpha yet. Will be added back in #194 . I still left gene-normalizer dependency + instance variable in CoolSeqTool since it will be added back --- Pipfile | 4 +- cool_seq_tool/app.py | 37 +- cool_seq_tool/data_sources/__init__.py | 1 - cool_seq_tool/data_sources/gene_normalizer.py | 49 --- cool_seq_tool/data_sources/mane_transcript.py | 253 ++++++------ cool_seq_tool/routers/default.py | 4 +- cool_seq_tool/routers/mane.py | 102 +++-- cool_seq_tool/schemas.py | 381 +++++++----------- cool_seq_tool/version.py | 2 +- setup.cfg | 4 +- tests/unit/test_cool_seq_tool.py | 22 +- tests/unit/test_mane_transcript.py | 13 +- 12 files changed, 365 insertions(+), 507 deletions(-) delete mode 100644 cool_seq_tool/data_sources/gene_normalizer.py diff --git a/Pipfile b/Pipfile index 2295afdb..978ea813 100644 --- a/Pipfile +++ b/Pipfile @@ -14,8 +14,8 @@ hgvs = "*" pydantic = "*" fastapi = "*" uvicorn = "*" -gene-normalizer = ">=0.1.34, != 0.2.0, != 0.2.1, != 0.2.2, != 0.2.3, != 0.2.4, != 0.2.5, != 0.2.6, != 0.2.7, != 0.2.8" -"ga4gh.vrs" = "*" +gene-normalizer = "~=0.3.0.dev0" +"ga4gh.vrs" = "~=2.0.0.dev0" [dev-packages] cool_seq_tool = {editable = true, path = "."} diff --git a/cool_seq_tool/app.py b/cool_seq_tool/app.py index b8360028..f54d011d 100644 --- a/cool_seq_tool/app.py +++ b/cool_seq_tool/app.py @@ -6,6 +6,7 @@ from biocommons.seqrepo import SeqRepo from gene.query import QueryHandler as GeneQueryHandler +from gene.database import create_db from cool_seq_tool.data_sources.alignment_mapper import AlignmentMapper from cool_seq_tool.data_sources.uta_database import UTA_DB_URL @@ -14,7 +15,7 @@ from cool_seq_tool.schemas import Assembly, GenomicData, TranscriptExonData, \ ResidueMode, GenomicDataResponse, ServiceMeta, TranscriptExonDataResponse from cool_seq_tool.data_sources import MANETranscript, MANETranscriptMappings, \ - SeqRepoAccess, TranscriptMappings, UTADatabase, GeneNormalizer + SeqRepoAccess, TranscriptMappings, UTADatabase from cool_seq_tool.version import __version__ @@ -34,25 +35,19 @@ def __init__( lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH, mane_data_path: Path = MANE_SUMMARY_PATH, db_url: str = UTA_DB_URL, gene_query_handler: Optional[GeneQueryHandler] = None, - gene_db_url: str = "", gene_db_region: str = "us-east-2", sr: Optional[SeqRepo] = None ) -> None: """Initialize CoolSeqTool class - :param Path transcript_file_path: The path to transcript_mapping.tsv - :param Path lrg_refseqgene_path: The path to LRG_RefSeqGene - :param Path mane_data_path: Path to RefSeq MANE summary data - :param str db_url: PostgreSQL connection URL + :param transcript_file_path: The path to transcript_mapping.tsv + :param lrg_refseqgene_path: The path to LRG_RefSeqGene + :param mane_data_path: Path to RefSeq MANE summary data + :param db_url: PostgreSQL connection URL Format: `driver://user:password@host/database/schema` - :param Optional[GeneQueryHandler] gene_query_handler: Gene normalizer query - handler instance. If this is provided, will use a current instance. If this - is not provided, will create a new instance. - :param str gene_db_url: URL to gene normalizer dynamodb. Only used when - `gene_query_handler` is `None`. - :param str gene_db_region: AWS region for gene normalizer db. Only used when - `gene_query_handler` is `None`. - :param Optional[SeqRepo] sr: SeqRepo instance. If this is not provided, will - create a new instance. + :param gene_query_handler: Gene normalizer query handler instance. If this is + provided, will use a current instance. If this is not provided, will create + a new instance. + :param sr: SeqRepo instance. If this is not provided, will create a new instance """ if not sr: sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR) @@ -63,14 +58,14 @@ def __init__( self.mane_transcript_mappings = MANETranscriptMappings( mane_data_path=mane_data_path) self.uta_db = UTADatabase(db_url=db_url) - gene_normalizer = GeneNormalizer(gene_query_handler, gene_db_url, - gene_db_region) - self.gene_query_handler = gene_normalizer.query_handler + if not gene_query_handler: + gene_query_handler = GeneQueryHandler(create_db()) + self.gene_query_handler = gene_query_handler self.alignment_mapper = AlignmentMapper( self.seqrepo_access, self.transcript_mappings, self.uta_db) self.mane_transcript = MANETranscript( self.seqrepo_access, self.transcript_mappings, - self.mane_transcript_mappings, self.uta_db, gene_normalizer) + self.mane_transcript_mappings, self.uta_db) @staticmethod def service_meta() -> ServiceMeta: @@ -242,7 +237,7 @@ async def genomic_to_transcript_exon_coordinates( residue_mode=ResidueMode.INTER_RESIDUE ) if start_data.transcript_exon_data: - start_data = start_data.transcript_exon_data.dict() + start_data = start_data.transcript_exon_data.model_dump() else: return self._return_warnings(resp, start_data.warnings[0]) else: @@ -257,7 +252,7 @@ async def genomic_to_transcript_exon_coordinates( residue_mode=ResidueMode.INTER_RESIDUE ) if end_data.transcript_exon_data: - end_data = end_data.transcript_exon_data.dict() + end_data = end_data.transcript_exon_data.model_dump() else: return self._return_warnings(resp, end_data.warnings[0]) else: diff --git a/cool_seq_tool/data_sources/__init__.py b/cool_seq_tool/data_sources/__init__.py index 02f01e4b..10a8c6e7 100644 --- a/cool_seq_tool/data_sources/__init__.py +++ b/cool_seq_tool/data_sources/__init__.py @@ -3,6 +3,5 @@ from .mane_transcript_mappings import MANETranscriptMappings from .transcript_mappings import TranscriptMappings from .uta_database import UTADatabase -from .gene_normalizer import GeneNormalizer from .mane_transcript import MANETranscript from .alignment_mapper import AlignmentMapper diff --git a/cool_seq_tool/data_sources/gene_normalizer.py b/cool_seq_tool/data_sources/gene_normalizer.py deleted file mode 100644 index 3b761f4a..00000000 --- a/cool_seq_tool/data_sources/gene_normalizer.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Module for accessing Gene Normalizer""" -import logging -from typing import Dict, Optional - -from gene.database.dynamodb import DynamoDbDatabase -from gene.query import QueryHandler -from gene.schemas import SourceName - - -logger = logging.getLogger("cool_seq_tool") - - -class GeneNormalizer: - """Gene Normalizer class for getting gene data""" - - def __init__( - self, query_handler: Optional[QueryHandler] = None, db_url: str = "", - db_region: str = "us-east-2" - ) -> None: - """Initialize gene normalizer class - - :param QueryHandler query_handler: Gene normalizer query handler instance. - If this is provided, will use a current instance. If this is not provided, - will create a new instance. - :param str db_url: URL to gene normalizer dynamodb. Only used when - `query_handler` is `None`. - :param str db_region: AWS region for gene normalizer db. Only used when - `query_handler` is `None`. - """ - if query_handler: - self.query_handler = query_handler - else: - ddb = DynamoDbDatabase(db_url=db_url, region_name=db_region) - self.query_handler = QueryHandler(ddb) - - def get_hgnc_data(self, gene: str) -> Dict: - """Return HGNC data for a given gene - - :param str gene: Gene query - :return: HGNC data - """ - hgnc_data = dict() - gene_resp = self.query_handler.normalize_unmerged(gene) - hgnc_matches = gene_resp.source_matches.get(SourceName.HGNC) - if hgnc_matches and hgnc_matches.records: - hgnc_data = hgnc_matches.records[0].dict() - else: - logger.warning(f"Unable to get HGNC symbol for {gene}") - return hgnc_data diff --git a/cool_seq_tool/data_sources/mane_transcript.py b/cool_seq_tool/data_sources/mane_transcript.py index b4d7f292..c9a80b08 100644 --- a/cool_seq_tool/data_sources/mane_transcript.py +++ b/cool_seq_tool/data_sources/mane_transcript.py @@ -13,10 +13,11 @@ import pandas as pd -from cool_seq_tool.schemas import AnnotationLayer, Assembly, MappedManeData, \ - ResidueMode, TranscriptPriorityLabel +from cool_seq_tool.schemas import ( + AnnotationLayer, Assembly, ResidueMode, TranscriptPriorityLabel +) from cool_seq_tool.data_sources import SeqRepoAccess, TranscriptMappings, \ - MANETranscriptMappings, UTADatabase, GeneNormalizer + MANETranscriptMappings, UTADatabase from cool_seq_tool.data_sources.residue_mode import get_inter_residue_pos @@ -35,24 +36,22 @@ class MANETranscript: def __init__(self, seqrepo_access: SeqRepoAccess, transcript_mappings: TranscriptMappings, mane_transcript_mappings: MANETranscriptMappings, - uta_db: UTADatabase, - gene_normalizer: GeneNormalizer) -> None: + uta_db: UTADatabase) -> None: """Initialize the MANETranscript class. - :param SeqRepoAccess seqrepo_access: Access to seqrepo queries - :param TranscriptMappings transcript_mappings: Access to transcript - accession mappings and conversions - :param MANETranscriptMappings mane_transcript_mappings: Access to - MANE Transcript accession mapping data - :param UTADatabase uta_db: UTADatabase instance to give access to query - UTA database - :param GeneNormalizer gene_normalizer: Access to Gene Normalizer + :param seqrepo_access: Access to seqrepo queries + :param transcript_mappings: Access to transcript accession mappings and + conversions + :param mane_transcript_mappings: Access to MANE Transcript accession mapping + data + :param uta_db: UTADatabase instance to give access to query UTA database """ self.seqrepo_access = seqrepo_access self.transcript_mappings = transcript_mappings self.mane_transcript_mappings = mane_transcript_mappings self.uta_db = uta_db - self.gene_normalizer = gene_normalizer + # Adding back in issue-194 + # self.gene_query_handler = gene_query_handler @staticmethod def _get_reading_frame(pos: int) -> int: @@ -867,107 +866,125 @@ async def g_to_mane_c( ensembl_c_ac=current_mane_data["Ensembl_nuc"], alt_ac=grch38["ac"] if grch38 else None) - async def get_mapped_mane_data( - self, gene: str, assembly: Assembly, genomic_position: int, - residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE - ) -> Optional[MappedManeData]: - """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given, - will return mapped MANE data. - - :param str gene: Gene symbol or identifier - :param Assembly assembly: Assembly for the provided genomic position - :param int genomic_position: Position on the genomic reference sequence to find - MANE data for - :param ResidueMode residue_mode: Starting residue mode for `start_pos` - and `end_pos`. Will always return coordinates in inter-residue - :return: Mapped MANE or Longest Compatible Remaining data if found/compatible. - MANETranscriptError will be raised if unable to get required data for - retrieving mapped MANE data. - """ - hgnc_gene_data = self.gene_normalizer.get_hgnc_data(gene) - if not hgnc_gene_data: - raise MANETranscriptError(f"Unable to get HGNC data for gene: {gene}") - - gene = hgnc_gene_data["symbol"] - - mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene) - if not mane_data: - raise MANETranscriptError(f"Unable to get MANE data for gene: {gene}") - - mane_data_len = len(mane_data) - - alt_ac = None - if hgnc_gene_data["locations"]: - chr = hgnc_gene_data["locations"][0].get("chr") or "" - alt_acs, _ = self.seqrepo_access.translate_identifier( - f"{assembly.value}:{chr}", "refseq" - ) - if alt_acs: - alt_ac = alt_acs[0].split(":")[1] - else: - raise MANETranscriptError(f"Unable to translate identifier for: " - f"{assembly}:{chr}") - - inter_residue_pos, _ = get_inter_residue_pos(genomic_position, residue_mode) - g_pos = inter_residue_pos[0] - - mane_transcripts = set() - for i in range(mane_data_len): - index = mane_data_len - i - 1 - current_mane_data = mane_data[index] - mane_transcripts |= set((current_mane_data["RefSeq_nuc"], - current_mane_data["Ensembl_nuc"])) - mane_c_ac = current_mane_data["RefSeq_nuc"] - - ac_query = mane_c_ac.split(".")[0] - tx_exon_aln_v_data = await self.uta_db.get_tx_exon_aln_v_data( - ac_query, g_pos, g_pos, alt_ac, False, True) - - if not tx_exon_aln_v_data: - continue - else: - len_of_aligned_data = len(tx_exon_aln_v_data) - if len_of_aligned_data == 1: - tx_exon_aln_v_data = tx_exon_aln_v_data[0] - else: - logger.debug(f"Found {len_of_aligned_data} records for aligned " - f"mapped MANE data for {ac_query}, {g_pos}, {alt_ac}") - - # Try checking for MANE match - filter_data = list(filter(lambda x: x[1] == mane_c_ac, - tx_exon_aln_v_data)) - if filter_data: - tx_exon_aln_v_data = filter_data[0] - else: - # Try checking for older versions of MANE - filter_data = list(filter(lambda x: x[1].startswith( - mane_c_ac.split(".")[0]), tx_exon_aln_v_data)) - if filter_data: - filter_data.sort(key=lambda x: x[1], reverse=True) - tx_exon_aln_v_data = filter_data[0] - return MappedManeData( - gene=gene, - refseq=current_mane_data["RefSeq_nuc"], - ensembl=current_mane_data["Ensembl_nuc"], - strand="-" if tx_exon_aln_v_data[7] == -1 else "+", - status="_".join(current_mane_data["MANE_status"].split()).lower(), - alt_ac=alt_ac, - assembly=assembly.value - ) - - lcr_data = await self.get_longest_compatible_transcript( - gene, g_pos, g_pos, AnnotationLayer.GENOMIC, - residue_mode=ResidueMode.INTER_RESIDUE, mane_transcripts=mane_transcripts, - alt_ac=alt_ac) - if lcr_data: - return MappedManeData( - gene=gene, - refseq=lcr_data["refseq"], - ensembl=lcr_data["ensembl"], - strand=lcr_data["strand"], - status=lcr_data["status"], - alt_ac=alt_ac, - assembly=assembly.value - ) - - return None + # Will be added once Chromosome Locations are added back to VRS 2.0-alpha + # def _get_hgnc_data(self, gene: str) -> Dict: + # """Return HGNC data for a given gene + + # :param gene: Gene query + # :return: HGNC data + # """ + # hgnc_data = {} + # gene_resp = self.gene_query_handler.normalize_unmerged(gene) + # hgnc_matches = gene_resp.source_matches.get(SourceName.HGNC) + # if hgnc_matches and hgnc_matches.records: + # hgnc_data = hgnc_matches.records[0].dict() + # else: + # logger.warning(f"Unable to get HGNC symbol for {gene}") + # return hgnc_data + + # async def get_mapped_mane_data( + # self, gene: str, assembly: Assembly, genomic_position: int, + # residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE + # ) -> Optional[MappedManeData]: + # """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given, # noqa: E501 + # will return mapped MANE data. + + # :param str gene: Gene symbol or identifier + # :param Assembly assembly: Assembly for the provided genomic position + # :param int genomic_position: Position on the genomic reference sequence to find # noqa: E501 + # MANE data for + # :param ResidueMode residue_mode: Starting residue mode for `start_pos` + # and `end_pos`. Will always return coordinates in inter-residue + # :return: Mapped MANE or Longest Compatible Remaining data if found/compatible. + # MANETranscriptError will be raised if unable to get required data for + # retrieving mapped MANE data. + # """ + # hgnc_gene_data = self._get_hgnc_data(gene) + # if not hgnc_gene_data: + # raise MANETranscriptError(f"Unable to get HGNC data for gene: {gene}") + + # gene = hgnc_gene_data["symbol"] + + # mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene) + # if not mane_data: + # raise MANETranscriptError(f"Unable to get MANE data for gene: {gene}") + + # mane_data_len = len(mane_data) + + # alt_ac = None + # if hgnc_gene_data["locations"]: + # chr = hgnc_gene_data["locations"][0].get("chr") or "" + # alt_acs, _ = self.seqrepo_access.translate_identifier( + # f"{assembly.value}:{chr}", "refseq" + # ) + # if alt_acs: + # alt_ac = alt_acs[0].split(":")[1] + # else: + # raise MANETranscriptError(f"Unable to translate identifier for: " + # f"{assembly}:{chr}") + # else: + # raise MANETranscriptError("Unable to get HGNC gene location data") + + # inter_residue_pos, _ = get_inter_residue_pos(genomic_position, residue_mode) + # g_pos = inter_residue_pos[0] + + # mane_transcripts = set() + # for i in range(mane_data_len): + # index = mane_data_len - i - 1 + # current_mane_data = mane_data[index] + # mane_transcripts |= set((current_mane_data["RefSeq_nuc"], + # current_mane_data["Ensembl_nuc"])) + # mane_c_ac = current_mane_data["RefSeq_nuc"] + + # ac_query = mane_c_ac.split(".")[0] + # tx_exon_aln_v_data = await self.uta_db.get_tx_exon_aln_v_data( + # ac_query, g_pos, g_pos, alt_ac, False, True) + + # if not tx_exon_aln_v_data: + # continue + # else: + # len_of_aligned_data = len(tx_exon_aln_v_data) + # if len_of_aligned_data == 1: + # tx_exon_aln_v_data = tx_exon_aln_v_data[0] + # else: + # logger.debug(f"Found {len_of_aligned_data} records for aligned " + # f"mapped MANE data for {ac_query}, {g_pos}, {alt_ac}") # noqa: E501 + + # # Try checking for MANE match + # filter_data = list(filter(lambda x: x[1] == mane_c_ac, + # tx_exon_aln_v_data)) + # if filter_data: + # tx_exon_aln_v_data = filter_data[0] + # else: + # # Try checking for older versions of MANE + # filter_data = list(filter(lambda x: x[1].startswith( + # mane_c_ac.split(".")[0]), tx_exon_aln_v_data)) + # if filter_data: + # filter_data.sort(key=lambda x: x[1], reverse=True) + # tx_exon_aln_v_data = filter_data[0] + # return MappedManeData( + # gene=gene, + # refseq=current_mane_data["RefSeq_nuc"], + # ensembl=current_mane_data["Ensembl_nuc"], + # strand="-" if tx_exon_aln_v_data[7] == -1 else "+", + # status="_".join(current_mane_data["MANE_status"].split()).lower(), + # alt_ac=alt_ac, + # assembly=assembly.value + # ) + + # lcr_data = await self.get_longest_compatible_transcript( + # gene, g_pos, g_pos, AnnotationLayer.GENOMIC, + # residue_mode=ResidueMode.INTER_RESIDUE, mane_transcripts=mane_transcripts, + # alt_ac=alt_ac) + # if lcr_data: + # return MappedManeData( + # gene=gene, + # refseq=lcr_data["refseq"], + # ensembl=lcr_data["ensembl"], + # strand=lcr_data["strand"], + # status=lcr_data["status"], + # alt_ac=alt_ac, + # assembly=assembly.value + # ) + + # return None diff --git a/cool_seq_tool/routers/default.py b/cool_seq_tool/routers/default.py index c44d510a..63d63ab3 100644 --- a/cool_seq_tool/routers/default.py +++ b/cool_seq_tool/routers/default.py @@ -37,7 +37,7 @@ async def genomic_to_transcript_exon_coordinates( Returns: GenomicDataResponse with data and warnings """ - request_body = request_body.dict() + request_body = request_body.model_dump() response = GenomicDataResponse( genomic_data=None, warnings=list(), service_meta=cool_seq_tool.service_meta()) @@ -68,7 +68,7 @@ async def transcript_to_genomic_coordinates( Returns: GenomicDataResponse with data and warnings """ - request_body = request_body.dict() + request_body = request_body.model_dump() response = GenomicDataResponse( genomic_data=None, warnings=list(), service_meta=cool_seq_tool.service_meta()) diff --git a/cool_seq_tool/routers/mane.py b/cool_seq_tool/routers/mane.py index 366d06b7..7149b8fd 100644 --- a/cool_seq_tool/routers/mane.py +++ b/cool_seq_tool/routers/mane.py @@ -1,15 +1,13 @@ """Module containing routes related to MANE data""" import logging -from typing import List, Optional +from typing import Optional from fastapi import APIRouter from fastapi import Query from cool_seq_tool.routers import cool_seq_tool, SERVICE_NAME, RESP_DESCR, \ UNHANDLED_EXCEPTION_MSG, Tags -from cool_seq_tool.data_sources.mane_transcript import MANETranscriptError -from cool_seq_tool.schemas import AnnotationLayer, Assembly, ManeDataService, \ - MappedManeDataService, ResidueMode +from cool_seq_tool.schemas import AnnotationLayer, ManeDataService, ResidueMode logger = logging.getLogger("cool_seq_tool") @@ -80,51 +78,51 @@ async def get_mane_data( ) -@router.get( - "/get_mapped_mane_data", - summary="Retrieve MANE Transcript mapped to a given assembly", - response_description=RESP_DESCR, - description="Return mapped MANE Transcript data to a given assembly", - response_model=MappedManeDataService, - tags=[Tags.MANE_TRANSCRIPT] -) -async def get_mapped_mane_data( - gene: str = Query(..., description="HGNC Symbol or Identifier"), - assembly: Assembly = Query(..., description="Genomic assembly to use"), - genomic_position: int = Query(..., description="Genomic position associated to the given gene and assembly"), # noqa: E501 - residue_mode: ResidueMode = Query(ResidueMode.INTER_RESIDUE, - description="Residue mode for `genomic_position`") -) -> MappedManeDataService: - """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given, - will return mapped MANE data. - - :param str gene: HGNC symbol or identifier - :param Assembly assembly: Assembly for the provided genomic position - :param int genomic_position: Position on the genomic reference sequence to find - MANE data for - :param ResidueMode residue_mode: Starting residue mode for `start_pos` - and `end_pos`. Will always return coordinates in inter-residue - :return: Mapped MANE or Longest Compatible Remaining data - """ - warnings: List = list() - mapped_mane_data = None - try: - mapped_mane_data = await cool_seq_tool.mane_transcript.get_mapped_mane_data( - gene, assembly, genomic_position, residue_mode) - if not mapped_mane_data: - warnings.append(f"Unable to find mapped data for gene {gene} at position " - f"{genomic_position} ({residue_mode} coordinates) on " - f"assembly {assembly}") - except MANETranscriptError as e: - e = str(e) - logger.exception(e) - warnings.append(e) - except Exception as e: - logger.exception(f"get_mapped_mane_data unhandled exception {e}") - warnings.append(UNHANDLED_EXCEPTION_MSG) - - return MappedManeDataService( - mapped_mane_data=mapped_mane_data, - warnings=warnings, - service_meta=cool_seq_tool.service_meta() - ) +# @router.get( +# "/get_mapped_mane_data", +# summary="Retrieve MANE Transcript mapped to a given assembly", +# response_description=RESP_DESCR, +# description="Return mapped MANE Transcript data to a given assembly", +# response_model=MappedManeDataService, +# tags=[Tags.MANE_TRANSCRIPT] +# ) +# async def get_mapped_mane_data( +# gene: str = Query(..., description="HGNC Symbol or Identifier"), +# assembly: Assembly = Query(..., description="Genomic assembly to use"), +# genomic_position: int = Query(..., description="Genomic position associated to the given gene and assembly"), # noqa: E501 +# residue_mode: ResidueMode = Query(ResidueMode.INTER_RESIDUE, +# description="Residue mode for `genomic_position`") # noqa: E501 +# ) -> MappedManeDataService: +# """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given, +# will return mapped MANE data. + +# :param str gene: HGNC symbol or identifier +# :param Assembly assembly: Assembly for the provided genomic position +# :param int genomic_position: Position on the genomic reference sequence to find +# MANE data for +# :param ResidueMode residue_mode: Starting residue mode for `start_pos` +# and `end_pos`. Will always return coordinates in inter-residue +# :return: Mapped MANE or Longest Compatible Remaining data +# """ +# warnings: List = list() +# mapped_mane_data = None +# try: +# mapped_mane_data = await cool_seq_tool.mane_transcript.get_mapped_mane_data( +# gene, assembly, genomic_position, residue_mode) +# if not mapped_mane_data: +# warnings.append(f"Unable to find mapped data for gene {gene} at position " +# f"{genomic_position} ({residue_mode} coordinates) on " +# f"assembly {assembly}") +# except MANETranscriptError as e: +# e = str(e) +# logger.exception(e) +# warnings.append(e) +# except Exception as e: +# logger.exception(f"get_mapped_mane_data unhandled exception {e}") +# warnings.append(UNHANDLED_EXCEPTION_MSG) + +# return MappedManeDataService( +# mapped_mane_data=mapped_mane_data, +# warnings=warnings, +# service_meta=cool_seq_tool.service_meta() +# ) diff --git a/cool_seq_tool/schemas.py b/cool_seq_tool/schemas.py index c3d5ea73..745ef4ea 100644 --- a/cool_seq_tool/schemas.py +++ b/cool_seq_tool/schemas.py @@ -2,11 +2,16 @@ from datetime import datetime from enum import Enum import re -from typing import Literal, Optional, List, Tuple, Union, Dict, Any, Type +from typing import Literal, Optional, List, Tuple, Union -from pydantic import BaseModel, root_validator, validator -from pydantic.main import Extra -from pydantic.types import StrictStr, StrictInt +from pydantic import ( + BaseModel, + model_validator, + field_validator, + StrictStr, + StrictInt, + ConfigDict, +) from cool_seq_tool.version import __version__ @@ -14,9 +19,9 @@ class AnnotationLayer(str, Enum): """Create enum for supported annotation layers""" - PROTEIN = "p" - CDNA = "c" - GENOMIC = "g" + PROTEIN: Literal["p"] = "p" + CDNA: Literal["c"] = "c" + GENOMIC: Literal["g"] = "g" class Strand(str, Enum): @@ -48,14 +53,9 @@ class ResidueMode(str, Enum): INTER_RESIDUE = "inter-residue" -class BaseModelForbidExtra(BaseModel): +class BaseModelForbidExtra(BaseModel, extra="forbid"): """Base Pydantic model class with extra values forbidden.""" - class Config: - """Class configs.""" - - extra = Extra.forbid - class GenomicRequestBody(BaseModelForbidExtra): """Define constraints for genomic to transcript exon coordinates request body""" @@ -68,34 +68,27 @@ class GenomicRequestBody(BaseModelForbidExtra): gene: Optional[StrictStr] = None residue_mode: ResidueMode = ResidueMode.RESIDUE - @root_validator(pre=False) + @model_validator(mode="after") def check_start_and_end(cls, values): """Check that at least one of {`start`, `end`} is set""" msg = "Must provide either `start` or `end`" - start, end = values.get("start"), values.get("end") + start, end = values.start, values.end assert start or end, msg return values - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["GenomicRequestBody"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "chromosome": "NC_000001.11", "start": 154192135, "end": None, "strand": -1, "transcript": "NM_152263.3", "gene": "TPM3", - "residue_mode": "residue" + "residue_mode": "residue", } + } + ) class TranscriptRequestBody(BaseModelForbidExtra): @@ -108,26 +101,17 @@ class TranscriptRequestBody(BaseModelForbidExtra): exon_end: Optional[StrictInt] = None exon_end_offset: Optional[StrictInt] = 0 - @root_validator(pre=False) + @model_validator(mode="after") def check_exon_start_and_exon_end(cls, values): """Check that at least one of {`exon_start`, `exon_end`} is set""" msg = "Must provide either `exon_start` or `exon_end`" - exon_start, exon_end = values.get("exon_start"), values.get("exon_end") + exon_start, exon_end = values.exon_start, values.exon_end assert exon_start or exon_end, msg return values - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["TranscriptRequestBody"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "gene": "TPM3", "transcript": "NM_152263.3", "exon_start": 1, @@ -135,6 +119,8 @@ def schema_extra(schema: Dict[str, Any], "exon_end": None, "exon_end_offset": None, } + } + ) class TranscriptExonData(BaseModelForbidExtra): @@ -148,26 +134,19 @@ class TranscriptExonData(BaseModelForbidExtra): chr: StrictStr strand: StrictInt - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["TranscriptExonData"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "chr": "NC_000001.11", "gene": "TPM3", "pos": 154192135, "exon": 1, "exon_offset": 0, "transcript": "NM_152263.3", - "strand": -1 + "strand": -1, } + } + ) class GenomicData(BaseModelForbidExtra): @@ -184,7 +163,7 @@ class GenomicData(BaseModelForbidExtra): transcript: StrictStr strand: StrictInt - @root_validator(pre=True) + @model_validator(mode="after") def check_start_end(cls, values): """ Check that at least one of {`start`, `end`} is set. @@ -192,35 +171,26 @@ def check_start_end(cls, values): If not set, set corresponding offset to `None` """ msg = "Missing values for `start` or `end`" - start = values.get("start") - end = values.get("end") + start = values.start + end = values.end assert start or end, msg if start: msg = "Missing value `exon_start`" - assert values.get("exon_start"), msg + assert values.exon_start, msg else: - values["exon_start_offset"] = None + values.exon_start_offset = None if end: msg = "Missing value `exon_end`" - assert values.get("exon_end"), msg + assert values.exon_end, msg else: - values["exon_end_offset"] = None + values.exon_end_offset = None return values - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["GenomicData"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "gene": "TPM3", "chr": "NC_000001.11", "start": 154192135, @@ -230,8 +200,10 @@ def schema_extra(schema: Dict[str, Any], "exon_start_offset": 0, "exon_end_offset": None, "transcript": "NM_152263.3", - "strand": -1 + "strand": -1, } + } + ) class ServiceMeta(BaseModelForbidExtra): @@ -240,9 +212,11 @@ class ServiceMeta(BaseModelForbidExtra): name: Literal["cool_seq_tool"] = "cool_seq_tool" version: StrictStr response_datetime: datetime - url: Literal["https://github.com/GenomicMedLab/cool-seq-tool"] = "https://github.com/GenomicMedLab/cool-seq-tool" # noqa: E501 + url: Literal[ + "https://github.com/GenomicMedLab/cool-seq-tool" + ] = "https://github.com/GenomicMedLab/cool-seq-tool" # noqa: E501 - @validator("version") + @field_validator("version") def validate_version(cls, v): """Check version matches semantic versioning regex pattern. https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string @@ -251,23 +225,16 @@ def validate_version(cls, v): assert bool(re.match(version_regex, v)) return v - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["ServiceMeta"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "name": "cool_seq_tool", "version": __version__, "response_datetime": datetime.now(), - "url": "https://github.com/GenomicMedLab/cool-seq-tool" + "url": "https://github.com/GenomicMedLab/cool-seq-tool", } + } + ) class TranscriptExonDataResponse(BaseModelForbidExtra): @@ -277,18 +244,9 @@ class TranscriptExonDataResponse(BaseModelForbidExtra): warnings: List[StrictStr] = [] service_meta: ServiceMeta - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["TranscriptExonDataResponse"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "transcript_exon_data": { "chr": "NC_000001.11", "gene": "TPM3", @@ -296,16 +254,18 @@ def schema_extra(schema: Dict[str, Any], "exon": 1, "exon_offset": 0, "transcript": "NM_152263.3", - "strand": -1 + "strand": -1, }, - "warnings": list(), + "warnings": [], "service_meta": { "name": "cool_seq_tool", "version": __version__, "response_datetime": datetime.now(), - "url": "https://github.com/GenomicMedLab/cool-seq-tool" - } + "url": "https://github.com/GenomicMedLab/cool-seq-tool", + }, } + } + ) class GenomicDataResponse(BaseModelForbidExtra): @@ -315,18 +275,9 @@ class GenomicDataResponse(BaseModelForbidExtra): warnings: List[StrictStr] = [] service_meta: ServiceMeta - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["GenomicDataResponse"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "genomic_data": { "gene": "TPM3", "chr": "NC_000001.11", @@ -337,16 +288,18 @@ def schema_extra(schema: Dict[str, Any], "exon_start_offset": 0, "exon_end_offset": None, "transcript": "NM_152263.3", - "strand": -1 + "strand": -1, }, - "warnings": list(), + "warnings": [], "service_meta": { "name": "cool_seq_tool", "version": __version__, "response_datetime": datetime.now(), - "url": "https://github.com/GenomicMedLab/cool-seq-tool" - } + "url": "https://github.com/GenomicMedLab/cool-seq-tool", + }, } + } + ) class MappedManeData(BaseModel): @@ -360,26 +313,19 @@ class MappedManeData(BaseModel): alt_ac: StrictStr assembly: Assembly - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["MappedManeData"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "gene": "BRAF", "refseq": "NM_001374258.1", "ensembl": "ENST00000644969.2", "strand": "-", "status": "mane_plus_clinical", "alt_ac": "NC_000007.13", - "assembly": "GRCh37" + "assembly": "GRCh37", } + } + ) class MappedManeDataService(BaseModelForbidExtra): @@ -389,18 +335,9 @@ class MappedManeDataService(BaseModelForbidExtra): warnings: List[StrictStr] = [] service_meta: ServiceMeta - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["MappedManeDataService"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "mapped_mane_data": { "gene": "BRAF", "refseq": "NM_001374258.1", @@ -408,16 +345,18 @@ def schema_extra(schema: Dict[str, Any], "strand": "-", "status": "mane_plus_clinical", "alt_ac": "NC_000007.13", - "assembly": "GRCh37" + "assembly": "GRCh37", }, - "warnings": list(), + "warnings": [], "service_meta": { "name": "cool_seq_tool", "version": __version__, "response_datetime": datetime.now(), - "url": "https://github.com/GenomicMedLab/cool-seq-tool" - } + "url": "https://github.com/GenomicMedLab/cool-seq-tool", + }, } + } + ) class ManeData(BaseModel): @@ -430,25 +369,18 @@ class ManeData(BaseModel): strand: Strand status: TranscriptPriorityLabel - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["ManeData"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "gene": "BRAF", "refseq": "NP_004324.2", "ensembl": "ENSP00000493543.1", "pos": (598, 598), "strand": "-", - "status": "mane_select" + "status": "mane_select", } + } + ) class ManeDataService(BaseModelForbidExtra): @@ -458,34 +390,27 @@ class ManeDataService(BaseModelForbidExtra): warnings: List[StrictStr] = [] service_meta: ServiceMeta - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["ManeDataService"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "mane_data": { "gene": "BRAF", "refseq": "NP_004324.2", "ensembl": "ENSP00000493543.1", "pos": (598, 598), "strand": "-", - "status": "mane_select" + "status": "mane_select", }, - "warnings": list(), + "warnings": [], "service_meta": { "name": "cool_seq_tool", "version": __version__, "response_datetime": datetime.now(), - "url": "https://github.com/GenomicMedLab/cool-seq-tool" - } + "url": "https://github.com/GenomicMedLab/cool-seq-tool", + }, } + } + ) # ALIGNMENT MAPPER SERVICE SCHEMAS @@ -498,26 +423,19 @@ class CdnaRepresentation(BaseModelForbidExtra): c_start_pos: str c_end_pos: str cds_start: int - residue_mode = ResidueMode.INTER_RESIDUE.value - - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["CdnaRepresentation"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + residue_mode: Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.INTER_RESIDUE.value + + model_config = ConfigDict( + json_schema_extra={ + "example": { "c_ac": "NM_004333.6", "c_start_pos": 1797, "c_end_pos": 1800, "cds_start": 226, - "residue_mode": "inter-residue" + "residue_mode": "inter-residue", } + } + ) class ToCdnaService(BaseModelForbidExtra): @@ -527,33 +445,26 @@ class ToCdnaService(BaseModelForbidExtra): warnings: List[StrictStr] = [] service_meta: ServiceMeta - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["ToCdnaService"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "c_data": { "c_ac": "NM_004333.6", "c_start_pos": 1797, "c_end_pos": 1800, "cds_start": 226, - "residue_mode": "inter-residue" + "residue_mode": "inter-residue", }, - "warnings": list(), + "warnings": [], "service_meta": { "name": "cool_seq_tool", "version": __version__, "response_datetime": datetime.now(), - "url": "https://github.com/GenomicMedLab/cool-seq-tool" - } + "url": "https://github.com/GenomicMedLab/cool-seq-tool", + }, } + } + ) class GenomicRepresentation(BaseModelForbidExtra): @@ -562,25 +473,18 @@ class GenomicRepresentation(BaseModelForbidExtra): g_ac: str g_start_pos: int g_end_pos: int - residue_mode = ResidueMode.INTER_RESIDUE.value - - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["CdnaRepresentation"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + residue_mode: Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.INTER_RESIDUE.value + + model_config = ConfigDict( + json_schema_extra={ + "example": { "g_ac": "NC_000007.13", "g_start_pos": 140453134, "g_end_pos": 140453137, - "residue_mode": "inter-residue" + "residue_mode": "inter-residue", } + } + ) class ToGenomicService(BaseModelForbidExtra): @@ -590,29 +494,22 @@ class ToGenomicService(BaseModelForbidExtra): warnings: List[StrictStr] = [] service_meta: ServiceMeta - class Config(BaseModelForbidExtra.Config): - """Configure model.""" - - @staticmethod - def schema_extra(schema: Dict[str, Any], - model: Type["ToGenomicService"]) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { + model_config = ConfigDict( + json_schema_extra={ + "example": { "g_data": { "g_ac": "NC_000007.13", "g_start_pos": 140453134, "g_end_pos": 140453137, - "residue_mode": "inter-residue" + "residue_mode": "inter-residue", }, - "warnings": list(), + "warnings": [], "service_meta": { "name": "cool_seq_tool", "version": __version__, "response_datetime": datetime.now(), - "url": "https://github.com/GenomicMedLab/cool-seq-tool" - } + "url": "https://github.com/GenomicMedLab/cool-seq-tool", + }, } + } + ) diff --git a/cool_seq_tool/version.py b/cool_seq_tool/version.py index 44de9d69..71852dda 100644 --- a/cool_seq_tool/version.py +++ b/cool_seq_tool/version.py @@ -1 +1 @@ -__version__ = "0.1.14-dev1" +__version__ = "0.2.0-dev0" diff --git a/setup.cfg b/setup.cfg index 55ca5061..cf814d65 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,8 +23,8 @@ install_requires = pydantic uvicorn fastapi - gene-normalizer >=0.1.34, != 0.2.0, != 0.2.1, != 0.2.2, != 0.2.3, != 0.2.4, != 0.2.5, != 0.2.6, != 0.2.7, != 0.2.8 - ga4gh.vrs + gene-normalizer ~= 0.3.0.dev0 + ga4gh.vrs ~= 2.0.0.dev0 [options.package_data] cool_seq_tool = diff --git a/tests/unit/test_cool_seq_tool.py b/tests/unit/test_cool_seq_tool.py index 5164e173..cbc05846 100644 --- a/tests/unit/test_cool_seq_tool.py +++ b/tests/unit/test_cool_seq_tool.py @@ -352,7 +352,7 @@ async def test_tpm3(test_cool_seq_tool, tpm3_exon1_exon8, g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, tpm3_exon1_exon8) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, tpm3_exon1_exon8_t_to_g) inputs["residue_mode"] = "INTER-RESIDUE" @@ -361,7 +361,7 @@ async def test_tpm3(test_cool_seq_tool, tpm3_exon1_exon8, g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, tpm3_exon1_exon8_t_to_g) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, tpm3_exon1_exon8_t_to_g) # No strand @@ -372,7 +372,7 @@ async def test_tpm3(test_cool_seq_tool, tpm3_exon1_exon8, g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, tpm3_exon1_exon8) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, tpm3_exon1_exon8_t_to_g) # Offset, no strand @@ -384,7 +384,7 @@ async def test_tpm3(test_cool_seq_tool, tpm3_exon1_exon8, g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, tpm3_exon1_exon8_offset) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, tpm3_exon1_exon8_offset_t_to_g) # Offset, strand @@ -392,7 +392,7 @@ async def test_tpm3(test_cool_seq_tool, tpm3_exon1_exon8, g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, tpm3_exon1_exon8_offset) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, tpm3_exon1_exon8_offset_t_to_g) # Test only setting start @@ -409,7 +409,7 @@ async def test_tpm3(test_cool_seq_tool, tpm3_exon1_exon8, g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, tpm3_exon1_g) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, tpm3_exon1_exon8_t_to_g) # Test only setting end @@ -425,7 +425,7 @@ async def test_tpm3(test_cool_seq_tool, tpm3_exon1_exon8, g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, tpm3_exon8_g) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, tpm3_exon1_exon8_t_to_g) @@ -453,7 +453,7 @@ async def test_braf(test_cool_seq_tool, mane_braf): mane_braf_t_to_g = copy.deepcopy(mane_braf) t_to_g_resp = \ - await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 mane_braf_t_to_g.start = 140808062 genomic_data_assertion_checks(t_to_g_resp, mane_braf_t_to_g) @@ -475,7 +475,7 @@ async def test_wee1(test_cool_seq_tool, wee1_exon2_exon11, mane_wee1_exon2_exon1 g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, wee1_exon2_exon11) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, wee1_exon2_exon11_t_to_g) inputs["gene"] = "wee1" @@ -483,7 +483,7 @@ async def test_wee1(test_cool_seq_tool, wee1_exon2_exon11, mane_wee1_exon2_exon1 g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, wee1_exon2_exon11) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, wee1_exon2_exon11_t_to_g) # MANE @@ -493,7 +493,7 @@ async def test_wee1(test_cool_seq_tool, wee1_exon2_exon11, mane_wee1_exon2_exon1 g_to_t_resp = \ await test_cool_seq_tool.genomic_to_transcript_exon_coordinates(**inputs) genomic_data_assertion_checks(g_to_t_resp, mane_wee1_exon2_exon11) - t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.dict()) # noqa: E501 + t_to_g_resp = await test_cool_seq_tool.transcript_to_genomic_coordinates(**g_to_t_resp.genomic_data.model_dump()) # noqa: E501 genomic_data_assertion_checks(t_to_g_resp, mane_wee1_exon2_exon11_t_to_g) diff --git a/tests/unit/test_mane_transcript.py b/tests/unit/test_mane_transcript.py index 85e1939d..1dd822e8 100644 --- a/tests/unit/test_mane_transcript.py +++ b/tests/unit/test_mane_transcript.py @@ -6,7 +6,7 @@ import pandas as pd from cool_seq_tool.data_sources import MANETranscript, MANETranscriptMappings, \ - SeqRepoAccess, TranscriptMappings, UTADatabase, GeneNormalizer + SeqRepoAccess, TranscriptMappings, UTADatabase from cool_seq_tool.data_sources.mane_transcript import MANETranscriptError from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode @@ -15,7 +15,7 @@ def test_mane_transcript(test_seqrepo_access): """Build mane transcript test fixture.""" return MANETranscript(test_seqrepo_access, TranscriptMappings(), - MANETranscriptMappings(), UTADatabase(), GeneNormalizer()) + MANETranscriptMappings(), UTADatabase()) @pytest.fixture(scope="module") @@ -567,12 +567,13 @@ async def test_g_to_mane_c(test_mane_transcript, egfr_l858r_mane_c, } +@pytest.mark.skipif(True, reason="chromosome locations not supported in 2.0-alpha") @pytest.mark.asyncio async def test_get_mapped_mane_data(test_mane_transcript): """Test that get_mapped_mane_data works correctly""" resp = await test_mane_transcript.get_mapped_mane_data( "braf", Assembly.GRCH38, 140785808, ResidueMode.INTER_RESIDUE) - assert resp.dict() == { + assert resp.model_dump() == { "gene": "BRAF", "refseq": "NM_001374258.1", "ensembl": "ENST00000644969.2", @@ -584,7 +585,7 @@ async def test_get_mapped_mane_data(test_mane_transcript): resp = await test_mane_transcript.get_mapped_mane_data( "Braf", Assembly.GRCH37, 140485608, ResidueMode.INTER_RESIDUE) - assert resp.dict() == { + assert resp.model_dump() == { "gene": "BRAF", "refseq": "NM_001374258.1", "ensembl": "ENST00000644969.2", @@ -596,7 +597,7 @@ async def test_get_mapped_mane_data(test_mane_transcript): resp = await test_mane_transcript.get_mapped_mane_data( "BRAF", Assembly.GRCH38, 140783157, ResidueMode.INTER_RESIDUE) - assert resp.dict() == { + assert resp.model_dump() == { "gene": "BRAF", "refseq": "NM_004333.6", "ensembl": "ENST00000646891.2", @@ -608,7 +609,7 @@ async def test_get_mapped_mane_data(test_mane_transcript): resp = await test_mane_transcript.get_mapped_mane_data( "BRAF", Assembly.GRCH37, 140482958, ResidueMode.RESIDUE) - assert resp.dict() == { + assert resp.model_dump() == { "gene": "BRAF", "refseq": "NM_004333.6", "ensembl": "ENST00000646891.2",