Skip to content

Commit

Permalink
Create scivision projects catalog and add first entry for testing pur…
Browse files Browse the repository at this point in the history
…poses (#475)

* Add para

* move docs for python package in navbar

* add pypi link

* rename to scivision

* summary md that can be imported in 3 locations, README, docs and web

* summary updated

* move summary into frontend

* move AboutText to separate file

* use react-showdown to render markdown

* move mdsummary into about.js

* add GitHub to navbar

* add ➕ emoji  to navbar

* simplify headers

* make README consistent with scivision web

* move support table below installation

* make readme header consistent with readthedocs

* make readthedocs intro consistent with README and webapp (bad links)

* add header and image to make webapp consistent with readthedocs and GH

* reduce column width

* consistent/tidy descriptions

* minor differences between the 3 landing pages

* improve readthedocs landing page links

* add user guide link

* add support table to readthedocs and make install consistent with README

* move install instructions out of README

* add first attempt at a projects.json

* add authors and contributors

* add a header field

* use plural for fields that are likely plural

* create CatalogProjectEntry

* create CatalogProjects

* add _coerce_projects_catalog

* fix error msg

* add projects to PandasCatalog

* whitespace and missing var

* add missing comma

* fix json

* stop mistakenly using TaskEnum

* duplicate models code for projects

* add links to thumbnails

* import projects json

* fix plural

* add projects pages

* move model-grid position in App.js

* add project-grid

* add links

* fix model-grid

* move for clarity

* remove not needed

* remove frontend changes

* make the project entry about the connections workshop rather than a tutorial

* move header above desc

* add description to project catalog

* merge catalog changes from react branch

* remove unused import

* flake8
  • Loading branch information
edwardchalstrey1 authored Jan 17, 2023
1 parent 04625ee commit 3a9f5f4
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 2 deletions.
131 changes: 129 additions & 2 deletions scivision/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,105 @@ def name_unique_key(cls, entries):
return entries


def get_models():
models_raw = pkgutil.get_data(__name__, "data/models.json")
models = CatalogModels.parse_raw(models_raw)
names = []
for model_entry in models.entries:
names.append(model_entry["name"])
return names


modelEnumStrings = ((x, x) for x in get_models())
ModelEnum = Enum('ModelEnum', modelEnumStrings)


def get_datasources():
datasources_raw = pkgutil.get_data(__name__, "data/datasources.json")
datasources = CatalogDatasources.parse_raw(datasources_raw)
names = []
for datasources_entry in datasources.entries:
names.append(datasources_entry["name"])
return names


datasourceEnumStrings = ((x, x) for x in get_datasources())
DataEnum = Enum('DataEnum', datasourceEnumStrings)


class CatalogProjectEntry(BaseModel, extra="forbid", title="A project catalog entry"):
# tasks, institution and tags are Tuples (rather than Lists) so
# that they are immutable - Tuple is being used as an immutable
# sequence here. This means that these fields are hashable, which
# can be more convenient when included in a dataframe
# (e.g. unique()). Could consider using a Frozenset for these
# fields instead, since duplicates and ordering should not be
# significant.
name: str = Field(
...,
title="Name",
description="Short, unique name for the project (one or two words, "
"under 20 characters recommended)",
)
header: str = Field(
...,
title="Header",
description="Header that will display at the top of the project page",
)
description: Optional[str] = Field(
None,
title="Description",
description="Short description of the project (that will appear when hovering on the project thumbnail)",
)
page: str = Field(
None,
title="Page",
description="Markdown formatted content for the project page",
)
models: Tuple[ModelEnum, ...] = Field(
(),
title="Models",
description="Which models from the scivision catalog are used in the project?",
)
datasources: Tuple[DataEnum, ...] = Field(
(),
title="Datasources",
description="Which datasources from the scivision catalog are used in the project?",
)
tasks: Tuple[TaskEnum, ...] = Field(
(),
title="Tasks",
description="Which task (or tasks) do the CV models used in the project perform?",
)
institution: Tuple[str, ...] = Field(
(),
title="Institution(s)",
description="A list of institutions that produced or are associated with "
"the project (one per item)",
)
tags: Tuple[str, ...]

def __getitem__(self, item):
return getattr(self, item)


class CatalogProjects(BaseModel, extra="forbid"):
catalog_type: str = "scivision project catalog"
name: str
# Tuple: see comment on CatalogProjectEntry
entries: Tuple[CatalogProjectEntry, ...]

@validator("entries")
def name_unique_key(cls, entries):
name_counts = Counter([entry['name'] for entry in entries])
dups = [item for item, count in name_counts.items() if count > 1]

if dups:
raise ValueError(f"The 'name' field in the project catalog should be unique (duplicates: {dups})")

return entries


def _coerce_datasources_catalog(
datasources: Union[CatalogDatasources, os.PathLike, None]
) -> CatalogDatasources:
Expand Down Expand Up @@ -213,7 +312,25 @@ def _coerce_models_catalog(
models_raw = pkgutil.get_data(__name__, "data/models.json")
return CatalogModels.parse_raw(models_raw)
else:
raise TypeError("Cannot load datasource from unsupported type")
raise TypeError("Cannot load model from unsupported type")


def _coerce_projects_catalog(
projects: Union[CatalogProjects, os.PathLike, None]
) -> CatalogProjects:
"""Returns a CatalogProjects determined from the argument: either the
one passed, or one loaded from a file
"""
if isinstance(projects, CatalogProjects):
return projects
elif isinstance(projects, (bytes, str, os.PathLike)):
projects_raw = Path(projects).read_text()
return CatalogProjects.parse_raw(projects_raw)
elif projects is None:
projects_raw = pkgutil.get_data(__name__, "data/projects.json")
return CatalogProjects.parse_raw(projects_raw)
else:
raise TypeError("Cannot load project from unsupported type")


class QueryResult(ABC):
Expand All @@ -234,7 +351,7 @@ def to_dataframe(self) -> pd.DataFrame:


class PandasCatalog:
def __init__(self, datasources=None, models=None):
def __init__(self, datasources=None, models=None, projects=None):
super().__init__()

if isinstance(datasources, pd.DataFrame):
Expand All @@ -251,6 +368,12 @@ def __init__(self, datasources=None, models=None):
models_cat = _coerce_models_catalog(models)
self._models = pd.DataFrame([ent.dict() for ent in models_cat.entries])

if isinstance(projects, pd.DataFrame):
self._projects = projects
else:
projects_cat = _coerce_projects_catalog(projects)
self._projects = pd.DataFrame([ent.dict() for ent in projects_cat.entries])

@property
def models(self) -> PandasQueryResult:
return PandasQueryResult(self._models)
Expand All @@ -259,6 +382,10 @@ def models(self) -> PandasQueryResult:
def datasources(self) -> PandasQueryResult:
return PandasQueryResult(self._datasources)

@property
def projects(self) -> PandasQueryResult:
return PandasQueryResult(self._projects)

def _compatible_models(self, datasource) -> PandasQueryResult:
models_compatible_format = self._models[
self._models.format == datasource["format"]
Expand Down
31 changes: 31 additions & 0 deletions scivision/catalog/data/projects.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"catalog_type": "scivision project catalog",
"name": "default",
"entries": [
{
"name": "sept22-connections-workshop",
"header": "Turing Connections Workshop",
"description": "Classification challenge for datasets of Flower and Butterfly images from Kaggle",
"page": "PhD candidates attended a workshop hosted by The Alan Turing Institute's Research Engineering Group (REG) to get some hands-on experience working on scivision as a piece of research software. The task involved adding pre-trained models and datasources to the scivision catalog. Succesful submissions include two classification models, for flowers and butterflies, and related datasets.\n#### Notebooks:\n- [Butterflies](https://github.com/scivision-gallery/connections-workshop-examples/blob/main/ButterflyClassification.ipynb)\n- [Flowers](https://github.com/scivision-gallery/connections-workshop-examples/blob/main/FlowerClassification.ipynb)\n\nTo learn more about the REG at Turing, click [here](https://www.turing.ac.uk/research/research-engineering)",
"tasks": [
"classification"
],
"models": [
"butterfly-classification-model",
"flower-classification-model"
],
"datasources": [
"flowers",
"butterflies"
],
"institution": [
"alan-turing-institute"
],
"tags": [
"tutorial",
"butterflies",
"flowers"
]
}
]
}

0 comments on commit 3a9f5f4

Please sign in to comment.