Skip to content

Commit

Permalink
Handle data license (#903)
Browse files Browse the repository at this point in the history
* Add license injection to materials

* Add data license to materials builder

* Add license handling to entries, thermo and oxi

* Have summary pull builder meta from materials

* Fix oxi states builder

* Fix stress field in validation task doc

* Fix task prop ref

* Fix chemenv tests
  • Loading branch information
Jason Munro authored Nov 22, 2023
1 parent 0318745 commit 728e858
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 9 deletions.
9 changes: 7 additions & 2 deletions emmet-builders/emmet/builders/materials/corrected_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ def get_entries(self, chemsys: str) -> List[Dict]:

materials_docs = list(
self.materials.query(
criteria=new_q, properties=["material_id", "entries", "deprecated"]
criteria=new_q,
properties=["material_id", "entries", "deprecated", "builder_meta"],
)
)

Expand All @@ -253,7 +254,10 @@ def get_entries(self, chemsys: str) -> List[Dict]:
oxi_states_data = {
d["material_id"]: d.get("average_oxidation_states", {})
for d in self.oxidation_states.query(
properties=["material_id", "average_oxidation_states"],
properties=[
"material_id",
"average_oxidation_states",
],
criteria={
"material_id": {"$in": material_ids},
"state": "successful",
Expand All @@ -272,6 +276,7 @@ def get_entries(self, chemsys: str) -> List[Dict]:
entry_dict["data"]["oxidation_states"] = oxi_states_data.get(
entry_dict["data"]["material_id"], {}
)
entry_dict["data"]["license"] = doc["builder_meta"].get("license")
entry_dict["data"]["run_type"] = r_type
elsyms = sorted(set([el for el in entry_dict["composition"]]))
self._entries_cache["-".join(elsyms)].append(entry_dict)
Expand Down
8 changes: 6 additions & 2 deletions emmet-builders/emmet/builders/materials/oxidation_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
super().__init__(
source=materials,
target=oxidation_states,
projection=["structure", "deprecated"],
projection=["structure", "deprecated", "builder_meta"],
query=query,
**kwargs,
)
Expand All @@ -42,9 +42,13 @@ def unary_function(self, item):
structure = Structure.from_dict(item["structure"])
mpid = item["material_id"]
deprecated = item["deprecated"]
builder_meta = item["builder_meta"]

oxi_doc = OxidationStateDoc.from_structure(
structure=structure, material_id=mpid, deprecated=deprecated
structure=structure,
material_id=mpid,
deprecated=deprecated,
builder_meta=builder_meta,
)
doc = jsanitize(oxi_doc.model_dump(), allow_bson=True)

Expand Down
4 changes: 4 additions & 0 deletions emmet-builders/emmet/builders/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class EmmetBuildSettings(EmmetSettings):
[], description="Tags for calculations to deprecate"
)

NON_COMMERCIAL_TAGS: List[str] = Field(
[], description="Tages for which to add BY-NC as license data in builder_meta"
)

VASP_ALLOWED_VASP_TYPES: List[VaspTaskType] = Field(
[t.value for t in VaspTaskType],
description="Allowed task_types to build materials from",
Expand Down
12 changes: 11 additions & 1 deletion emmet-builders/emmet/builders/vasp/materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,17 +239,27 @@ def process_item(self, items: List[Dict]) -> List[Dict]:
grouped_tasks = self.filter_and_group_tasks(tasks, task_transformations)
materials = []
for group in grouped_tasks:
commercial_license = True
for task_doc in group:
if set(task_doc.tags).intersection(
set(self.settings.NON_COMMERCIAL_TAGS)
):
commercial_license = False
break
try:
materials.append(
MaterialsDoc.from_tasks(
group,
structure_quality_scores=self.settings.VASP_STRUCTURE_QUALITY_SCORES,
use_statics=self.settings.VASP_USE_STATICS,
commercial_license=commercial_license,
)
)
except Exception as e:
failed_ids = list({t_.task_id for t_ in group})
doc = MaterialsDoc.construct_deprecated_material(group)
doc = MaterialsDoc.construct_deprecated_material(
group, commercial_license
)
doc.warnings.append(str(e))
materials.append(doc)
self.logger.warn(
Expand Down
3 changes: 3 additions & 0 deletions emmet-builders/tests/test_chemenv.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from emmet.core.base import EmmetMeta
import pytest
from maggma.stores import JSONStore, MemoryStore

Expand All @@ -14,11 +15,13 @@ def fake_materials(test_dir):
materials_store.connect()

for doc in entries.query():
builder_meta = EmmetMeta(license="BY-C").model_dump()
materials_store.update(
{
"material_id": doc["entry_id"],
"structure": doc["structure"],
"deprecated": False,
"builder_meta": builder_meta,
}
)
return materials_store
Expand Down
5 changes: 4 additions & 1 deletion emmet-builders/tests/test_oxidation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
from maggma.stores import JSONStore, MemoryStore

from emmet.core.base import EmmetMeta
from emmet.builders.materials.oxidation_states import OxidationStatesBuilder


Expand All @@ -13,12 +14,14 @@ def fake_materials(test_dir):
materials_store.connect()

for doc in entries.query():
builder_meta = EmmetMeta(license="BY-C").model_dump()
materials_store.update(
{
"material_id": doc["entry_id"],
"structure": doc["structure"],
"deprecated": False,
}
"builder_meta": builder_meta,
},
)
return materials_store

Expand Down
1 change: 1 addition & 0 deletions emmet-core/emmet/core/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ def from_docs(cls, material_id: MPID, **docs: Dict[str, Dict]):
"structure",
"deprecated",
"task_ids",
"builder_meta",
],
HasProps.thermo.value: [
"uncorrected_energy_per_atom",
Expand Down
4 changes: 4 additions & 0 deletions emmet-core/emmet/core/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections import defaultdict
from typing import Dict, List, Optional, Union
from datetime import datetime
from emmet.core.base import EmmetMeta
from emmet.core.utils import ValueEnum

from pydantic import BaseModel, Field
Expand Down Expand Up @@ -178,6 +179,8 @@ def _energy_eval(entry: ComputedStructureEntry):

(decomp, ehull) = pd.get_decomp_and_e_above_hull(blessed_entry)

builder_meta = EmmetMeta(license=blessed_entry.data.get("license"))

d = {
"thermo_id": "{}_{}".format(material_id, str(thermo_type)),
"material_id": material_id,
Expand All @@ -189,6 +192,7 @@ def _energy_eval(entry: ComputedStructureEntry):
"formation_energy_per_atom": pd.get_form_energy_per_atom(blessed_entry),
"energy_above_hull": ehull,
"is_stable": blessed_entry in pd.stable_entries,
"builder_meta": builder_meta.model_dump(),
}

# Uncomment to make last_updated line up with materials.
Expand Down
16 changes: 15 additions & 1 deletion emmet-core/emmet/core/vasp/material.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Core definition of a Materials Document """
from typing import Dict, List, Mapping, Optional
from emmet.core.base import EmmetMeta

from pydantic import Field, BaseModel
from pymatgen.analysis.structure_analyzer import SpacegroupAnalyzer
Expand Down Expand Up @@ -55,6 +56,7 @@ def from_tasks(
str, int
] = SETTINGS.VASP_STRUCTURE_QUALITY_SCORES,
use_statics: bool = SETTINGS.VASP_USE_STATICS,
commercial_license: bool = True,
) -> "MaterialsDoc":
"""
Converts a group of tasks into one material
Expand All @@ -63,6 +65,7 @@ def from_tasks(
task_group: List of task document
structure_quality_scores: quality scores for various calculation types
use_statics: Use statics to define a material
commercial_license: Whether the data should be licensed with BY-C (otherwise BY-NC).
"""
if len(task_group) == 0:
raise Exception("Must have more than one task in the group.")
Expand Down Expand Up @@ -196,6 +199,9 @@ def _entry_eval(task: TaskDocument):
"Individual material entry must contain at least one GGA or GGA+U calculation"
)

# Builder meta and license
builder_meta = EmmetMeta(license="BY-C" if commercial_license else "BY-NC")

return cls.from_structure(
structure=structure,
material_id=material_id,
Expand All @@ -210,17 +216,21 @@ def _entry_eval(task: TaskDocument):
deprecated_tasks=deprecated_tasks,
origins=origins,
entries=entries,
builder_meta=builder_meta,
)

@classmethod
def construct_deprecated_material(
cls, task_group: List[TaskDocument]
cls,
task_group: List[TaskDocument],
commercial_license: bool = True,
) -> "MaterialsDoc":
"""
Converts a group of tasks into a deprecated material
Args:
task_group: List of task document
commercial_license: Whether the data should be licensed with BY-C (otherwise BY-NC).
"""
if len(task_group) == 0:
raise Exception("Must have more than one task in the group.")
Expand All @@ -246,6 +256,9 @@ def construct_deprecated_material(
# Deprecated
deprecated = True

# Builder meta and license
builder_meta = EmmetMeta(license="BY-C" if commercial_license else "BY-NC")

return cls.from_structure(
structure=structure,
material_id=material_id,
Expand All @@ -257,4 +270,5 @@ def construct_deprecated_material(
task_types=task_types,
deprecated=deprecated,
deprecated_tasks=deprecated_tasks,
builder_meta=builder_meta,
)
4 changes: 2 additions & 2 deletions emmet-core/emmet/core/vasp/task_valid.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ class OutputSummary(BaseModel):
bandgap: Optional[float] = Field(
None, description="The DFT bandgap for the last calculation"
)
forces: List[Vector3D] = Field(
forces: Optional[List[Vector3D]] = Field(
[], description="Forces on atoms from the last calculation"
)
stress: Matrix3D = Field(
stress: Optional[Matrix3D] = Field(
[], description="Stress on the unitcell from the last calculation"
)

Expand Down

0 comments on commit 728e858

Please sign in to comment.