diff --git a/applications/visualizer/backend/ingestion/validator.py b/applications/visualizer/backend/ingestion/validator.py index 7b8ed2de..9a59afe5 100644 --- a/applications/visualizer/backend/ingestion/validator.py +++ b/applications/visualizer/backend/ingestion/validator.py @@ -1,17 +1,19 @@ from __future__ import annotations -from enum import IntEnum -from typing import Any, Dict, List, Literal, Optional, Tuple +from enum import IntEnum, StrEnum +from typing import Dict, List, Literal, Optional, Tuple -from pydantic import BaseModel, Field -from pydantic.alias_generators import to_camel +from pydantic import BaseModel, Field, RootModel, model_validator class Data(BaseModel): neurons: List[Neuron] datasets: List[Dataset] - connections: Dict[str, Connection] - annotations: Dict[str, Annotation] + connections: Dict[str, List[Connection]] + annotations: Dict[ + Literal["head", "complete"], # TODO: should 'tail' be included + Annotation + ] class Neuron(BaseModel): @@ -24,6 +26,12 @@ class Neuron(BaseModel): typ: str # type of the neuron +class DatasetType(StrEnum): + COMPLETE = "complete" + HEAD = "head" + TAIL = "tail" + + class Axe(BaseModel): face: str axisIndex: int @@ -33,7 +41,7 @@ class Axe(BaseModel): class Dataset(BaseModel): id: str name: str - type: Literal["complete", "head", "tail"] + type: DatasetType time: int # TODO: should be gte than 0? visualTime: float # TODO: should be gte than 0? description: str @@ -67,11 +75,25 @@ class Connection(BaseModel): ) typ: ConnectionType # the type of connection ("electrical" or "chemical") - -class Annotation(BaseModel): - increase: List[ - Tuple[ # the type of annotation - str, # pre, the ID/name of a neuron from "neurons.json" - str, # post, the ID/name of the other neuron from "neurons.json" that is part of the couple - ] - ] + @model_validator(mode="after") + def check_same_size_elements(self): + length = len(self.ids) + ok = all( + len(l) == length for l in iter([self.post_tid, self.pre_tid, self.syn]) + ) + assert ( + ok + ), "ids, post_tid, pre_tid and syn must have the same number of elements" + return self + + +class Annotation(RootModel): + root: Dict[ + Literal["increase", "variable", "postembryonic", "decrease", "stable"], + List[ + Tuple[ # the type of annotation + str, # pre, the ID/name of a neuron from "neurons.json" + str, # post, the ID/name of the other neuron from "neurons.json" that is part of the couple + ] + ], + ] = {} diff --git a/applications/visualizer/backend/tests/ingestion/test_validator.py b/applications/visualizer/backend/tests/ingestion/test_validator.py index c9a5fa21..cbea9138 100644 --- a/applications/visualizer/backend/tests/ingestion/test_validator.py +++ b/applications/visualizer/backend/tests/ingestion/test_validator.py @@ -1,20 +1,29 @@ +import json from typing import Any, Dict, List, NamedTuple import pytest from pydantic import ValidationError -from ingestion.validator import Axe, Dataset, Neuron +from ingestion.validator import ( + Annotation, + Axe, + Connection, + ConnectionType, + Dataset, + DatasetType, + Neuron, +) JSON = Dict[str, Any] # just for type checking; otherwise does nothing -class NeuronValidTc(NamedTuple): +class NeuronTc(NamedTuple): data: JSON expected: Neuron -valid_neurons_tc: List[NeuronValidTc] = [ - NeuronValidTc( +valid_neurons_tc: List[NeuronTc] = [ + NeuronTc( data={ "inhead": 1, "name": "ADAL", @@ -38,15 +47,11 @@ class NeuronValidTc(NamedTuple): @pytest.mark.parametrize("data, expected", valid_neurons_tc) -def test__valida_neuron(data: JSON, expected: Neuron): +def test__valid_neuron(data: JSON, expected: Neuron): neuron = Neuron.model_validate(data) assert neuron == expected -class NeuronInvalidTc(NamedTuple): - data: JSON - - invalid_neurons_tc: List[JSON] = [ { "inhead": 2, # not valid bool interpretation @@ -84,13 +89,13 @@ def test__invalid_neuron(data: JSON): Neuron.model_validate(data) -class DatasetValidTc(NamedTuple): +class DatasetTc(NamedTuple): data: JSON expected: Dataset -valid_datasets_tc: List[DatasetValidTc] = [ - DatasetValidTc( +valid_datasets_tc: List[DatasetTc] = [ + DatasetTc( data={ "id": "white_1986_jse", "name": "White et al., 1986, JSE (adult)", @@ -102,13 +107,13 @@ class DatasetValidTc(NamedTuple): expected=Dataset( id="white_1986_jse", name="White et al., 1986, JSE (adult)", - type="tail", + type=DatasetType.TAIL, time=60, visualTime=50, description="Adult legacy tail with pre-anal ganglion", ), ), - DatasetValidTc( + DatasetTc( data={ "id": "witvliet_2020_1", "name": "Witvliet et al., 2020, Dataset 1 (L1)", @@ -125,7 +130,7 @@ class DatasetValidTc(NamedTuple): expected=Dataset( id="witvliet_2020_1", name="Witvliet et al., 2020, Dataset 1 (L1)", - type="head", + type=DatasetType.HEAD, time=0, visualTime=0.5, description="~0 hours after birth", @@ -145,10 +150,6 @@ def test__valid_dataset(data: JSON, expected: Dataset): assert dataset == expected -class DatasetInvalidTc(NamedTuple): - data: JSON - - invalid_datasets_tc: List[JSON] = [ { "id": "white_1986_jse", @@ -165,3 +166,135 @@ class DatasetInvalidTc(NamedTuple): def test__invalid_dataset(data: JSON): with pytest.raises(ValidationError): Dataset.model_validate(data) + + +class ConnectionTc(NamedTuple): + data: JSON + expected: Connection + + +valid_connections_tc: List[ConnectionTc] = [ + ConnectionTc( + data={ + "ids": [9583833], + "post": "ADAR", + "post_tid": [9576727], + "pre": "ADAL", + "pre_tid": [9577831], + "syn": [1], + "typ": 2, + }, + expected=Connection( + ids=[9583833], + post="ADAR", + post_tid=[9576727], + pre="ADAL", + pre_tid=[9577831], + syn=[1], + typ=ConnectionType.CHEMICAL, + ), + ) +] + + +@pytest.mark.parametrize("data, expected", valid_connections_tc) +def test__valid_connection(data: JSON, expected: Connection): + conn = Connection.model_validate(data) + assert conn == expected + + +invalid_connections_tc: List[JSON] = [ + { + "ids": [9583833], + "post": "ADAR", + "post_tid": [9576727], + "pre": "ADAL", + "pre_tid": [9577831], + "syn": [1], + "typ": 1, # invalid connection type + }, + { + "ids": [9583833, 9583834], # not same length + "post": "ADAR", + "post_tid": [9576727], + "pre": "ADAL", + "pre_tid": [9577831], + "syn": [1], + "typ": 2, + }, + { + "ids": [9583833], + "post": "ADAR", + "post_tid": [9576727, 9583834, 9583834], # not same length + "pre": "ADAL", + "pre_tid": [9577831], + "syn": [1], + "typ": 2, + }, +] + + +@pytest.mark.parametrize("data", invalid_connections_tc) +def test__invalid_connection(data: JSON): + with pytest.raises(ValidationError): + Connection.model_validate(data) + + +class AnnotationTc(NamedTuple): + data: JSON + expected: Annotation + + +valid_annotations_tc: List[AnnotationTc] = [ + AnnotationTc( + data={ + "increase": [ + ["ADAL", "RIPL"], + ["ADAR", "RIPR"], + ["ADEL", "AVKR"], + ] + }, + expected=Annotation( + root={ + "increase": [ + ("ADAL", "RIPL"), + ("ADAR", "RIPR"), + ("ADEL", "AVKR"), + ] + } + ), + ), + AnnotationTc( + data=json.loads("{}"), # empty annotation seems to be valid + expected=Annotation(), + ), + AnnotationTc( # multiple annotation types + data={"increase": [["ADAL", "RIPL"]], "postembryonic": [["ADAL", "RIPL"]]}, + expected=Annotation( + root={"increase": [("ADAL", "RIPL")], "postembryonic": [("ADAL", "RIPL")]} + ), + ), +] + + +@pytest.mark.parametrize("data, expected", valid_annotations_tc) +def test__valid_annotation(data: JSON, expected: Annotation): + annotation = Annotation.model_validate(data) + assert annotation == expected + + +invalid_annotations_tc: List[JSON] = [ + {"inexistent": [["ADAL", "RIPL"]]}, # inexistent is not an annotation type + { + "increase": [ + ["ADAL", "RIPL", "CEPDL"], # not a tuple of only pre and post + ["ADAR", "RIPR"], + ] + }, +] + + +@pytest.mark.parametrize("data", invalid_annotations_tc) +def test__invalid_annotation(data: JSON): + with pytest.raises(ValidationError): + Annotation.model_validate(data) diff --git a/format-ingestion.md b/format-ingestion.md index 2a6d7e35..5c691f87 100644 --- a/format-ingestion.md +++ b/format-ingestion.md @@ -89,7 +89,7 @@ The schema is the following: "pre": string, // the name of a neuron as defined in "neurons.json" "pre_tid": [ ... ], // a list of int where each int represents the ID of a pre synapse for a dedicated pre neuron "syn": [ ... ], // a list of int where each int represents the weight of a post or pre synapses (indice matches the neuron in pre/post_tid) - "typ": int // the type of connection ("electrical" or "chemical") + "typ": int // the type of connection ("electrical" (0) or "chemical" (2)) } ```