Skip to content

Commit

Permalink
CELE-46 ingestion structures schema validation
Browse files Browse the repository at this point in the history
  • Loading branch information
dvcorreia committed Aug 16, 2024
1 parent 41399df commit f92aca3
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 35 deletions.
52 changes: 37 additions & 15 deletions applications/visualizer/backend/ingestion/validator.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from __future__ import annotations

from enum import IntEnum
from typing import Any, Dict, List, Literal, Optional, Tuple
from enum import IntEnum, StrEnum
from typing import Dict, List, Literal, Optional, Tuple

from pydantic import BaseModel, Field
from pydantic.alias_generators import to_camel
from pydantic import BaseModel, Field, RootModel, model_validator


class Data(BaseModel):
neurons: List[Neuron]
datasets: List[Dataset]
connections: Dict[str, Connection]
annotations: Dict[str, Annotation]
connections: Dict[str, List[Connection]]
annotations: Dict[
Literal["head", "complete"], # TODO: should 'tail' be included
Annotation
]


class Neuron(BaseModel):
Expand All @@ -24,6 +26,12 @@ class Neuron(BaseModel):
typ: str # type of the neuron


class DatasetType(StrEnum):
COMPLETE = "complete"
HEAD = "head"
TAIL = "tail"


class Axe(BaseModel):
face: str
axisIndex: int
Expand All @@ -33,7 +41,7 @@ class Axe(BaseModel):
class Dataset(BaseModel):
id: str
name: str
type: Literal["complete", "head", "tail"]
type: DatasetType
time: int # TODO: should be gte than 0?
visualTime: float # TODO: should be gte than 0?
description: str
Expand Down Expand Up @@ -67,11 +75,25 @@ class Connection(BaseModel):
)
typ: ConnectionType # the type of connection ("electrical" or "chemical")


class Annotation(BaseModel):
increase: List[
Tuple[ # the type of annotation
str, # pre, the ID/name of a neuron from "neurons.json"
str, # post, the ID/name of the other neuron from "neurons.json" that is part of the couple
]
]
@model_validator(mode="after")
def check_same_size_elements(self):
length = len(self.ids)
ok = all(
len(l) == length for l in iter([self.post_tid, self.pre_tid, self.syn])
)
assert (
ok
), "ids, post_tid, pre_tid and syn must have the same number of elements"
return self


class Annotation(RootModel):
root: Dict[
Literal["increase", "variable", "postembryonic", "decrease", "stable"],
List[
Tuple[ # the type of annotation
str, # pre, the ID/name of a neuron from "neurons.json"
str, # post, the ID/name of the other neuron from "neurons.json" that is part of the couple
]
],
] = {}
171 changes: 152 additions & 19 deletions applications/visualizer/backend/tests/ingestion/test_validator.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
import json
from typing import Any, Dict, List, NamedTuple

import pytest
from pydantic import ValidationError

from ingestion.validator import Axe, Dataset, Neuron
from ingestion.validator import (
Annotation,
Axe,
Connection,
ConnectionType,
Dataset,
DatasetType,
Neuron,
)

JSON = Dict[str, Any] # just for type checking; otherwise does nothing


class NeuronValidTc(NamedTuple):
class NeuronTc(NamedTuple):
data: JSON
expected: Neuron


valid_neurons_tc: List[NeuronValidTc] = [
NeuronValidTc(
valid_neurons_tc: List[NeuronTc] = [
NeuronTc(
data={
"inhead": 1,
"name": "ADAL",
Expand All @@ -38,15 +47,11 @@ class NeuronValidTc(NamedTuple):


@pytest.mark.parametrize("data, expected", valid_neurons_tc)
def test__valida_neuron(data: JSON, expected: Neuron):
def test__valid_neuron(data: JSON, expected: Neuron):
neuron = Neuron.model_validate(data)
assert neuron == expected


class NeuronInvalidTc(NamedTuple):
data: JSON


invalid_neurons_tc: List[JSON] = [
{
"inhead": 2, # not valid bool interpretation
Expand Down Expand Up @@ -84,13 +89,13 @@ def test__invalid_neuron(data: JSON):
Neuron.model_validate(data)


class DatasetValidTc(NamedTuple):
class DatasetTc(NamedTuple):
data: JSON
expected: Dataset


valid_datasets_tc: List[DatasetValidTc] = [
DatasetValidTc(
valid_datasets_tc: List[DatasetTc] = [
DatasetTc(
data={
"id": "white_1986_jse",
"name": "White et al., 1986, JSE (adult)",
Expand All @@ -102,13 +107,13 @@ class DatasetValidTc(NamedTuple):
expected=Dataset(
id="white_1986_jse",
name="White et al., 1986, JSE (adult)",
type="tail",
type=DatasetType.TAIL,
time=60,
visualTime=50,
description="Adult legacy tail with pre-anal ganglion",
),
),
DatasetValidTc(
DatasetTc(
data={
"id": "witvliet_2020_1",
"name": "Witvliet et al., 2020, Dataset 1 (L1)",
Expand All @@ -125,7 +130,7 @@ class DatasetValidTc(NamedTuple):
expected=Dataset(
id="witvliet_2020_1",
name="Witvliet et al., 2020, Dataset 1 (L1)",
type="head",
type=DatasetType.HEAD,
time=0,
visualTime=0.5,
description="~0 hours after birth",
Expand All @@ -145,10 +150,6 @@ def test__valid_dataset(data: JSON, expected: Dataset):
assert dataset == expected


class DatasetInvalidTc(NamedTuple):
data: JSON


invalid_datasets_tc: List[JSON] = [
{
"id": "white_1986_jse",
Expand All @@ -165,3 +166,135 @@ class DatasetInvalidTc(NamedTuple):
def test__invalid_dataset(data: JSON):
with pytest.raises(ValidationError):
Dataset.model_validate(data)


class ConnectionTc(NamedTuple):
data: JSON
expected: Connection


valid_connections_tc: List[ConnectionTc] = [
ConnectionTc(
data={
"ids": [9583833],
"post": "ADAR",
"post_tid": [9576727],
"pre": "ADAL",
"pre_tid": [9577831],
"syn": [1],
"typ": 2,
},
expected=Connection(
ids=[9583833],
post="ADAR",
post_tid=[9576727],
pre="ADAL",
pre_tid=[9577831],
syn=[1],
typ=ConnectionType.CHEMICAL,
),
)
]


@pytest.mark.parametrize("data, expected", valid_connections_tc)
def test__valid_connection(data: JSON, expected: Connection):
conn = Connection.model_validate(data)
assert conn == expected


invalid_connections_tc: List[JSON] = [
{
"ids": [9583833],
"post": "ADAR",
"post_tid": [9576727],
"pre": "ADAL",
"pre_tid": [9577831],
"syn": [1],
"typ": 1, # invalid connection type
},
{
"ids": [9583833, 9583834], # not same length
"post": "ADAR",
"post_tid": [9576727],
"pre": "ADAL",
"pre_tid": [9577831],
"syn": [1],
"typ": 2,
},
{
"ids": [9583833],
"post": "ADAR",
"post_tid": [9576727, 9583834, 9583834], # not same length
"pre": "ADAL",
"pre_tid": [9577831],
"syn": [1],
"typ": 2,
},
]


@pytest.mark.parametrize("data", invalid_connections_tc)
def test__invalid_connection(data: JSON):
with pytest.raises(ValidationError):
Connection.model_validate(data)


class AnnotationTc(NamedTuple):
data: JSON
expected: Annotation


valid_annotations_tc: List[AnnotationTc] = [
AnnotationTc(
data={
"increase": [
["ADAL", "RIPL"],
["ADAR", "RIPR"],
["ADEL", "AVKR"],
]
},
expected=Annotation(
root={
"increase": [
("ADAL", "RIPL"),
("ADAR", "RIPR"),
("ADEL", "AVKR"),
]
}
),
),
AnnotationTc(
data=json.loads("{}"), # empty annotation seems to be valid
expected=Annotation(),
),
AnnotationTc( # multiple annotation types
data={"increase": [["ADAL", "RIPL"]], "postembryonic": [["ADAL", "RIPL"]]},
expected=Annotation(
root={"increase": [("ADAL", "RIPL")], "postembryonic": [("ADAL", "RIPL")]}
),
),
]


@pytest.mark.parametrize("data, expected", valid_annotations_tc)
def test__valid_annotation(data: JSON, expected: Annotation):
annotation = Annotation.model_validate(data)
assert annotation == expected


invalid_annotations_tc: List[JSON] = [
{"inexistent": [["ADAL", "RIPL"]]}, # inexistent is not an annotation type
{
"increase": [
["ADAL", "RIPL", "CEPDL"], # not a tuple of only pre and post
["ADAR", "RIPR"],
]
},
]


@pytest.mark.parametrize("data", invalid_annotations_tc)
def test__invalid_annotation(data: JSON):
with pytest.raises(ValidationError):
Annotation.model_validate(data)
2 changes: 1 addition & 1 deletion format-ingestion.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ The schema is the following:
"pre": string, // the name of a neuron as defined in "neurons.json"
"pre_tid": [ ... ], // a list of int where each int represents the ID of a pre synapse for a dedicated pre neuron
"syn": [ ... ], // a list of int where each int represents the weight of a post or pre synapses (indice matches the neuron in pre/post_tid)
"typ": int // the type of connection ("electrical" or "chemical")
"typ": int // the type of connection ("electrical" (0) or "chemical" (2))
}
```

Expand Down

0 comments on commit f92aca3

Please sign in to comment.