-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:lmmentel/mendeleev
- Loading branch information
Showing
6 changed files
with
349 additions
and
0 deletions.
There are no files selected for viewing
31 changes: 31 additions & 0 deletions
31
alembic/versions/68c046f6983b_add_scattering_factors_table.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
"""add scattering factors table | ||
Revision ID: 68c046f6983b | ||
Revises: 7d745d77a7c1 | ||
Create Date: 2024-08-15 20:21:40.641912 | ||
""" | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = '68c046f6983b' | ||
down_revision = '7d745d77a7c1' | ||
branch_labels = None | ||
depends_on = None | ||
|
||
from alembic import op | ||
import sqlalchemy as sa | ||
|
||
|
||
def upgrade(): | ||
op.create_table( | ||
"scattering_factors", | ||
sa.Column("id", sa.Integer, primary_key=True), | ||
sa.Column("atomic_number", sa.Integer), | ||
sa.Column("energy", sa.Float), | ||
sa.Column("f1", sa.Float), | ||
sa.Column("f2", sa.Float), | ||
) | ||
|
||
|
||
def downgrade(): | ||
op.drop_table("scattering_factors") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,258 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "1e1ca7ca-264b-429e-b7a9-ee31bd1e6e2e", | ||
"metadata": {}, | ||
"source": [ | ||
"## Aromic scattering factors\n", | ||
"\n", | ||
"source: https://henke.lbl.gov/optical_constants/asf.html" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "eb146023-9199-4927-b15a-53aedc19902a", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pathlib import Path\n", | ||
"import pandas as pd\n", | ||
"from mendeleev import element\n", | ||
"from mendeleev.db import get_session\n", | ||
"from mendeleev.models import ScatteringFactor, PropertyMetadata, ValueOrigin" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "8e3f5828-3b08-4947-bcf7-6ceefd97c8dd", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"root = Path(\"../data/atomic-scattering-factors/\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "6a7ca7c6-317e-488e-a566-f6cea3954531", | ||
"metadata": {}, | ||
"source": [ | ||
"## Preprocess the files\n", | ||
"\n", | ||
"- some rows have traling tabs \"\\t\" that break parsing\n", | ||
"- a few file start with a comment or non standard header - manually correct" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "2f4a618d-364f-4e1b-9dc2-f24a54ac13cf", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"proc = root.joinpath(\"preprocessed\")\n", | ||
"proc.mkdir(exist_ok=True)\n", | ||
"\n", | ||
"for file in root.glob(\"*.nff\"):\n", | ||
"\n", | ||
" with file.open(\"r\") as fo:\n", | ||
" lines = fo.readlines()\n", | ||
" \n", | ||
" with proc.joinpath(file.name).open(\"w\") as fo:\n", | ||
" for line in lines:\n", | ||
" fo.write(line.strip() + \"\\n\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "810be02e-d7bd-4d51-947f-549f3e270a1d", | ||
"metadata": {}, | ||
"source": [ | ||
"## Load the data into the db" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "cbb5401d-7c1b-476e-aae0-d8381d3dad53", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"session = get_session(read_only=False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "27b8c7ec-7943-446c-ab63-17d3f418cfe5", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"for file in proc.glob(\"*.nff\"):\n", | ||
" e = element(file.stem.capitalize())\n", | ||
" print(file.stem.capitalize(), e.atomic_number)\n", | ||
" df = pd.read_csv(file, sep=\"\\t\", header=0, engine=\"python\", comment=\"#\")\n", | ||
" print(df.columns)\n", | ||
" for index, row in df.iterrows():\n", | ||
" scattering_factor = ScatteringFactor(\n", | ||
" atomic_number=e.atomic_number,\n", | ||
" energy=row['E(eV)'],\n", | ||
" f1=row['f1'],\n", | ||
" f2=row['f2']\n", | ||
" )\n", | ||
" session.add(scattering_factor)\n", | ||
" session.commit()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "89a898e9-e2ae-4eef-88c3-dab163a5dc7d", | ||
"metadata": {}, | ||
"source": [ | ||
"## Test if the numbers of rows in the Scattering Factors table are the same as rows in the DF" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7c041915-a112-4880-832d-998ed2ad1cea", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"for file in proc.glob(\"*.nff\"):\n", | ||
" e = element(file.stem.capitalize())\n", | ||
" print(file.stem.capitalize(), e.atomic_number)\n", | ||
" df = pd.read_csv(file, sep=\"\\t\", header=0, engine=\"python\", comment=\"#\")\n", | ||
" db_rows = session.query(ScatteringFactor).filter_by(atomic_number=e.atomic_number).count()\n", | ||
" \n", | ||
" if db_rows == df.shape[0]:\n", | ||
" print(\"OK\")\n", | ||
" else:\n", | ||
" print(f\"ERROR {db_rows=} {df.shape[0]=}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "45576c65-5042-42a2-8890-48c34ecae2af", | ||
"metadata": {}, | ||
"source": [ | ||
"## Create PropertyMetdata records" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6a08f783-a63f-4ffe-b49c-1c26c21c7d04", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"z = PropertyMetadata(\n", | ||
" annotations=None,\n", | ||
" attribute_name=\"atomic_number\",\n", | ||
" category=\"electric and optical properties\",\n", | ||
" citation_keys=\"atomic_scattering_factors,henke1993xray\",\n", | ||
" table_name=\"scattering_factors\",\n", | ||
" class_name=\"ScatteringFactor\",\n", | ||
" column_name=\"atomic_number\",\n", | ||
" description=\"Atomic number\",\n", | ||
" unit=None,\n", | ||
" value_origin=ValueOrigin.STORED,\n", | ||
")\n", | ||
"session.add(z)\n", | ||
"session.commit()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "dfb62a0f-65e6-4bd7-b459-87a8e252d130", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"energy = PropertyMetadata(\n", | ||
" annotations=\"specific data references available at cited data source\",\n", | ||
" attribute_name=\"energy\",\n", | ||
" category=\"electric and optical properties\",\n", | ||
" citation_keys=\"atomic_scattering_factors,henke1993xray\",\n", | ||
" table_name=\"scattering_factors\",\n", | ||
" class_name=\"ScatteringFactor\",\n", | ||
" column_name=\"energy\",\n", | ||
" description=\"Energy of the incident photon\",\n", | ||
" unit=\"eV\",\n", | ||
" value_origin=ValueOrigin.STORED,\n", | ||
")\n", | ||
"session.add(energy)\n", | ||
"session.commit()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "af7a8a3a-cbca-4f62-a961-0d34ad729ce3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"f1 = PropertyMetadata(\n", | ||
" annotations=\"specific data references available at cited data source\",\n", | ||
" attribute_name=\"f1\",\n", | ||
" category=\"electric and optical properties\",\n", | ||
" citation_keys=\"atomic_scattering_factors,henke1993xray\",\n", | ||
" table_name=\"scattering_factors\",\n", | ||
" class_name=\"ScatteringFactor\",\n", | ||
" column_name=\"f1\",\n", | ||
" description=\"Scattering factor f1\",\n", | ||
" unit=None,\n", | ||
" value_origin=ValueOrigin.STORED,\n", | ||
")\n", | ||
"session.add(f1)\n", | ||
"session.commit()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ab8d29db-2bf8-4ea3-bf33-5ee81b55f3c8", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"f2 = PropertyMetadata(\n", | ||
" annotations=\"specific data references available at cited data source\",\n", | ||
" attribute_name=\"f2\",\n", | ||
" category=\"electric and optical properties\",\n", | ||
" citation_keys=\"atomic_scattering_factors,henke1993xray\",\n", | ||
" table_name=\"scattering_factors\",\n", | ||
" class_name=\"ScatteringFactor\",\n", | ||
" column_name=\"f2\",\n", | ||
" description=\"Scattering factor f2\",\n", | ||
" unit=None,\n", | ||
" value_origin=ValueOrigin.STORED,\n", | ||
")\n", | ||
"session.add(f2)\n", | ||
"session.commit()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |