[DOC] Update data documentation (#175)

* add docs for ionic radii * add utilities for rendering doc tables * update docs for colors * update property docs * update format for code block * add isotope decay modes * add screening constant docs * more docs updates * update docstring * bumpt year * add notebook rendering docs
lmmentel · Aug 18, 2024 · 3f97bae · 3f97bae
1 parent 679e59a
commit 3f97bae
Show file tree

Hide file tree

Showing 8 changed files with 407 additions and 39 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -117,7 +117,7 @@
 
 # General information about the project.
 project = "mendeleev"
-copyright = "2021, Lukasz Mentel"
+copyright = "2024, Lukasz Mentel"
 
 # Link to GitHub repo for github_issues extension
 issues_github_path = "lmmentel/mendeleev"

diff --git a/docs/source/data.rst b/docs/source/data.rst
diff --git a/docs/source/data_access.rst b/docs/source/data_access.rst
@@ -83,9 +83,9 @@ The command will export all the tables from the database to a set of files in th
 
 In order to use this functionality you'll need to clone the mendeleev repository and install the package in the development mode. Here's how you can do it:
 
-```bash
-gh clone lmmentel/mendeleev
-cd mendeleev
-poetry install
-poetry run inv export
-```
+.. code-block:: bash
+
+   gh clone lmmentel/mendeleev
+   cd mendeleev
+   poetry install
+   poetry run inv export
diff --git a/docs/source/references.bib b/docs/source/references.bib
@@ -874,3 +874,10 @@ @article{henke1993xray
   month     = {July},
   publisher = {Elsevier}
 }
+@misc{ionization_energies,
+  author       = {A.~Kramida and {Yu.~Ralchenko} and J.~Reader and {and NIST ASD Team}},
+  title        = {NIST Atomic Spectra Database Ionization Energies Data},
+  howpublished = {{NIST Atomic Spectra Database (ver. 5.11), [Online]. Available: {\tt{https://physics.nist.gov/asd}} [2024, August 17]. National Institute of Standards and Technology, Gaithersburg, MD.}},
+  year         = {2023},
+  note         = {Accessed: 2015-04-13}
+}
diff --git a/mendeleev/elements.db b/mendeleev/elements.db
diff --git a/mendeleev/models.py b/mendeleev/models.py
@@ -1191,8 +1191,8 @@ class ScatteringFactor(Base):
     Args:
         atomic_number (int): Atomic number
         energy (float): Energy in eV
-        f1 (float): Energy in eV
-        f1 (float): Energy in eV
+        f1 (float): Scattering factor f1
+        f1 (float): Scattering factor f2
     """
 
     __tablename__ = "scattering_factors"

diff --git a/mendeleev/utils.py b/mendeleev/utils.py
@@ -1,6 +1,8 @@
 from typing import Union, Tuple
 import math
 
+import pandas as pd
+
 
 def coeffs(a: int, b: int = 2) -> Tuple[int, int]:
     """
@@ -43,3 +45,75 @@ def n_effective(n: int, source: str = "slater") -> Union[float, None]:
         raise ValueError(
             f"source '{source}' not found, available sources are: {', '.join(numbers.keys())}"
         )
+
+
+def render_rst_table(df: pd.DataFrame) -> str:
+    """
+    Converts a pandas DataFrame to a reStructuredText table.
+
+    Args:
+        df (pd.DataFrame): The DataFrame to convert.
+
+    Returns:
+        str: The DataFrame as a reStructuredText table.
+    """
+    # Get the column headers
+    headers = df.columns.tolist()
+
+    # Get the lengths of each column for formatting
+    col_lengths = [
+        max(len(str(val)) for val in df[col].tolist() + [col]) for col in headers
+    ]
+
+    # Create the horizontal line for the table
+    hline = "+" + "+".join(["-" * (length + 2) for length in col_lengths]) + "+"
+    header_hline = "+" + "+".join(["=" * (length + 2) for length in col_lengths]) + "+"
+
+    # Format the header row
+    header_row = (
+        "|"
+        + "|".join(
+            [
+                f" {headers[i]}{' ' * (col_lengths[i] - len(headers[i]))} "
+                for i in range(len(headers))
+            ]
+        )
+        + "|"
+    )
+
+    data_rows = []
+    for _, row in df.iterrows():
+        data_row = (
+            "|"
+            + "|".join(
+                [
+                    f" {str(row[col])}{' ' * (col_lengths[i] - len(str(row[col])))} "
+                    for i, col in enumerate(headers)
+                ]
+            )
+            + "|"
+        )
+        data_rows.extend((data_row, hline))
+    return "\n".join([hline, header_row, header_hline] + data_rows)
+
+
+def apply_rst_format(df: pd.DataFrame) -> pd.DataFrame:
+    "Prepare daraframe for printing by intorducing ReST specific formatting"
+
+    # convert the key to cite directive
+    df.loc[:, "citation_keys"] = ":cite:`" + df["citation_keys"] + "`"
+
+    # identify and add footnote_marks
+    mask = df["annotations"].notnull()
+    df.loc[mask, "footnote_mark"] = "[#f_" + df.loc[mask, "attribute_name"] + "]"
+    df.loc[mask, "description"] = (
+        df.loc[mask, "description"] + " (" + df.loc[mask, "footnote_mark"] + "_)"
+    )
+
+    # wrap attributes into code blocks
+    df.loc[:, "attribute_name"] = "``" + df["attribute_name"] + "``"
+    df.loc[:, "value_origin"] = df["value_origin"].str.lower()
+
+    # capitalize column names which will be table headers
+    df.columns = [c.replace("_", " ").capitalize() for c in df.columns]
+    return df.fillna("")
diff --git a/notebooks/render-docs-from-metadata.ipynb b/notebooks/render-docs-from-metadata.ipynb
@@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfc08dfa-d54f-47a9-9983-6259839b8313",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mendeleev.models import ValueOrigin, PropertyMetadata\n",
+    "from mendeleev.db import get_session, get_engine\n",
+    "from mendeleev.fetch import fetch_table\n",
+    "from sqlalchemy import select, distinct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bfcf8a73-ecfe-454b-8cd8-ef5690463ca3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mendeleev.utils import render_rst_table, apply_rst_format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d188e6dd-f67d-473b-9bb2-22897794019b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def render_doc_table(class_name: str) -> str:\n",
+    "    \"\"\"\n",
+    "    Fetch attributes for a specific class and render a table for documentation.\n",
+    "    \"\"\"\n",
+    "    df = fetch_table(\"propertymetadata\")\n",
+    "    df = df.loc[df[\"class_name\"] == class_name]\n",
+    "    df = apply_rst_format(df)\n",
+    "    \n",
+    "    cols = [\n",
+    "        'Attribute name',\n",
+    "        'Description',\n",
+    "        'Unit',\n",
+    "        'Value origin',\n",
+    "        'Citation keys'\n",
+    "    ]\n",
+    "    # display version of the column names\n",
+    "    return render_rst_table(df[cols].sort_values(\"Attribute name\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "923addd1-f2c6-4cfd-9d6f-6dd2727b292c",
+   "metadata": {},
+   "source": [
+    "## Elements"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "140f15ed-bbab-4376-9374-17871698c87f",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"Element\")\n",
+    "print(table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c68e88e-e30c-440d-b8ce-981273fe6e7c",
+   "metadata": {},
+   "source": [
+    "## IonicRadius"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d26ad06-af78-499a-abf6-32e4eff18ef3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"IonicRadius\")\n",
+    "print(table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "230f1375-8476-4644-b41f-1deed0a29dc4",
+   "metadata": {},
+   "source": [
+    "## Isotopes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b605fec2-d336-4662-9819-837cc8a766f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"Isotope\")\n",
+    "print(table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ad2138e2-4439-4348-81b5-fa00919ca05c",
+   "metadata": {},
+   "source": [
+    "## Isotope Decay Modes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b529265-14d6-4306-b6d7-8c7f32a4f6df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"IsotopeDecayMode\")\n",
+    "print(table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4fbe1d2c-26c2-4c27-94c6-4ae6b66c4685",
+   "metadata": {},
+   "source": [
+    "## Atomic Scattering Factors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81a79ab6-b0bb-474f-aba7-a7dae85b9ced",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"ScatteringFactor\")\n",
+    "print(table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c866a3a-5800-4826-920e-c6cbcc8d504f",
+   "metadata": {},
+   "source": [
+    "## Ionization Energies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11f567eb-271b-42bc-af49-2d524ab6ec5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"IonizationEnergy\")\n",
+    "print(table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fcc7db46-8ef9-482f-8202-9f2e1138de97",
+   "metadata": {},
+   "source": [
+    "## Screening Constants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ede28c4c-cb74-4670-8bfb-09b583785d42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"ScreeningConstant\")\n",
+    "print(table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f50ff7ef-e18b-47cf-aa08-3e3c3343818f",
+   "metadata": {},
+   "source": [
+    "## Oxidation States"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8601497d-df35-4759-bf78-5228d965f733",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = render_doc_table(\"OxidationState\")\n",
+    "print(table)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}