Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding FHIR CodeSystem importer #137

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ xmltodict = "^0.13.0"
click-default-group = "^1.2.4"
linkml-runtime = "^1.7.2"
duckdb = "^0.10.1"
duckdb-engine = "^0.11.2"


[tool.poetry.dev-dependencies]
Expand Down
5 changes: 3 additions & 2 deletions schema_automator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def import_dosdps(dpfiles, output, **args):
@schema_name_option
def import_sql(db, output, **args):
"""
Imports a schema by introspecting a relational database
Imports a schema by introspecting a relational database.

See :ref:`importers` for more on the importers framework
"""
Expand Down Expand Up @@ -297,13 +297,14 @@ def import_htmltable(url, output, class_name, schema_name, columns,
@click.option('--enum-threshold', default=0.1, help='if the number of distinct values / rows is less than this, do not make an enum')
@click.option('--omit-null/--no-omit-null', default=False, help="if true, ignore null values")
@click.option('--inlined-map', multiple=True, help="SLOT_NAME.KEY pairs indicating which slots are inlined as dict")
@click.option('--index-slot', help="slot to inject for lists of objects")
@click.option('--depluralize/--no-depluralized',
default=True,
show_default=True,
help="Auto-depluralize class names to singular form")
def generalize_json(input, output, schema_name, depluralize: bool, format, omit_null, inlined_map, **kwargs):
"""
Generalizes from a JSON file to a schema
Generalizes from a JSON (or YAML) file to a schema

See :ref:`generalizers` for more on the generalization framework

Expand Down
11 changes: 8 additions & 3 deletions schema_automator/generalizers/csv_data_generalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,17 +264,22 @@ def convert_multiple(self, files: List[str], **kwargs) -> SchemaDefinition:
self.inject_foreign_keys(sv, fks)
return sv.schema

def convert(self, file: str, **kwargs) -> SchemaDefinition:
def convert(self, file: str, delimiter=None, **kwargs) -> SchemaDefinition:
"""
Converts a single TSV file to a single-class schema

:param file:
:param kwargs:
:return:
"""
if delimiter is None:
if file.endswith(".csv"):
delimiter = ","
else:
delimiter = self.column_separator
with open(file, newline='', encoding='utf-8') as tsv_file:
header = [h.strip() for h in tsv_file.readline().split('\t')]
rr = csv.DictReader(tsv_file, fieldnames=header, delimiter=self.column_separator, skipinitialspace=False)
header = [h.strip() for h in tsv_file.readline().split(delimiter)]
rr = csv.DictReader(tsv_file, fieldnames=header, delimiter=delimiter, skipinitialspace=False)
return self.convert_dicts([r for r in rr], **kwargs)

def convert_from_dataframe(self, df: pd.DataFrame, **kwargs) -> SchemaDefinition:
Expand Down
9 changes: 8 additions & 1 deletion schema_automator/generalizers/json_instance_generalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,14 @@ class JsonDataGeneralizer(Generalizer):


def convert(self, input: Union[str, Dict], format: str = 'json',
index_slot: str = None,
container_class_name='Container',
**kwargs) -> SchemaDefinition:
"""
Generalizes from a JSON file

:param input:
:param format:
:param format: json or yaml; use yaml_multi for multiple documents
:param container_class_name:
:param kwargs:
:return:
Expand All @@ -62,13 +63,19 @@ def convert(self, input: Union[str, Dict], format: str = 'json',
obj = json.load(stream)
elif format == 'yaml':
obj = yaml.safe_load(stream)
elif format == 'yaml_multi':
obj = list(yaml.safe_load_all(stream))
elif format == 'toml':
obj_str = "".join(stream.readlines())
toml_obj = tomlkit.parse(obj_str)
json_str = json.dumps(toml_obj)
obj = json.loads(json_str)
else:
raise Exception(f'bad format {format}')
if isinstance(obj, list):
if index_slot is None:
index_slot = 'members'
obj = {index_slot: obj}
rows_by_table = defaultdict(list)
self.rows_by_table = rows_by_table
self._convert_obj(obj, table=container_class_name)
Expand Down
56 changes: 56 additions & 0 deletions schema_automator/importers/fhir_codesystem_import_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import json
from typing import Dict, Any

from linkml_runtime.linkml_model import SchemaDefinition, EnumDefinition, PermissibleValue
from schema_automator.importers.import_engine import ImportEngine

class FHIRCodeSystemImportEngine(ImportEngine):
def load(self, input: str) -> SchemaDefinition:
# Parse the JSON input
data = json.loads(input)

# Create a new SchemaDefinition
schema = SchemaDefinition(
name=data.get('name', 'FHIRCodeSystem'),
id=data.get('url', 'http://example.org/FHIRCodeSystem')
)

# Define the Enum for the CodeSystem
code_system_enum = EnumDefinition(
name='CodeSystemEnum',
description=data.get('description', 'A FHIR CodeSystem resource')
)

# Process the concepts and create permissible values
if 'concept' in data:
code_system_enum.permissible_values = self._process_concepts(data['concept'])

# Add the Enum to the schema
schema.enums = {
'CodeSystemEnum': code_system_enum
}

return schema

def _process_concepts(self, concepts: Dict[str, Any]) -> Dict[str, PermissibleValue]:
permissible_values = {}

for concept in concepts:
code = concept['code']
pv = PermissibleValue(
text=code,
title=concept.get('display', None),
description=concept.get('definition', None),
)

# Check for parent relationships in properties
for prop in concept.get('property', []):
if prop['code'] == 'subsumedBy':
pv.is_a = prop['valueCode']
if prop['code'] == 'status':
pv.status = prop['valueCode']

permissible_values[code] = pv

return permissible_values

25 changes: 25 additions & 0 deletions tests/test_importers/test_fhir_codesystem_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os

from linkml.generators import PythonGenerator
from linkml_runtime import SchemaView

from schema_automator.importers.fhir_codesystem_import_engine import FHIRCodeSystemImportEngine
from schema_automator.utils import write_schema
from tests import INPUT_DIR, OUTPUT_DIR

INPUT_JSON = os.path.join(INPUT_DIR, "CodeSystem-v3-RoleCode.json")
OUT = os.path.join(OUTPUT_DIR, "CodeSystem-v3-RoleCode.linkml.yaml")


def test_fhir_code_system_import():
with open(INPUT_JSON, "r", encoding="utf-8") as f:
input_data = f.read()

ie = FHIRCodeSystemImportEngine()
schema = ie.load(input_data)
assert schema
write_schema(schema, OUT)

py_str = PythonGenerator(OUT).serialize()
assert py_str
_sv = SchemaView(schema)
Loading