Skip to content

Commit

Permalink
[DOC] Update data documentation (#175)
Browse files Browse the repository at this point in the history
* add docs for ionic radii

* add utilities for rendering doc tables

* update docs for colors

* update property docs

* update format for code block

* add isotope decay modes

* add screening constant docs

* more docs updates

* update docstring

* bumpt year

* add notebook rendering docs
  • Loading branch information
lmmentel authored Aug 18, 2024
1 parent 679e59a commit 3f97bae
Show file tree
Hide file tree
Showing 8 changed files with 407 additions and 39 deletions.
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@

# General information about the project.
project = "mendeleev"
copyright = "2021, Lukasz Mentel"
copyright = "2024, Lukasz Mentel"

# Link to GitHub repo for github_issues extension
issues_github_path = "lmmentel/mendeleev"
Expand Down
119 changes: 89 additions & 30 deletions docs/source/data.rst

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions docs/source/data_access.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ The command will export all the tables from the database to a set of files in th

In order to use this functionality you'll need to clone the mendeleev repository and install the package in the development mode. Here's how you can do it:

```bash
gh clone lmmentel/mendeleev
cd mendeleev
poetry install
poetry run inv export
```
.. code-block:: bash
gh clone lmmentel/mendeleev
cd mendeleev
poetry install
poetry run inv export
7 changes: 7 additions & 0 deletions docs/source/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -874,3 +874,10 @@ @article{henke1993xray
month = {July},
publisher = {Elsevier}
}
@misc{ionization_energies,
author = {A.~Kramida and {Yu.~Ralchenko} and J.~Reader and {and NIST ASD Team}},
title = {NIST Atomic Spectra Database Ionization Energies Data},
howpublished = {{NIST Atomic Spectra Database (ver. 5.11), [Online]. Available: {\tt{https://physics.nist.gov/asd}} [2024, August 17]. National Institute of Standards and Technology, Gaithersburg, MD.}},
year = {2023},
note = {Accessed: 2015-04-13}
}
Binary file modified mendeleev/elements.db
Binary file not shown.
4 changes: 2 additions & 2 deletions mendeleev/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,8 +1191,8 @@ class ScatteringFactor(Base):
Args:
atomic_number (int): Atomic number
energy (float): Energy in eV
f1 (float): Energy in eV
f1 (float): Energy in eV
f1 (float): Scattering factor f1
f1 (float): Scattering factor f2
"""

__tablename__ = "scattering_factors"
Expand Down
74 changes: 74 additions & 0 deletions mendeleev/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Union, Tuple
import math

import pandas as pd


def coeffs(a: int, b: int = 2) -> Tuple[int, int]:
"""
Expand Down Expand Up @@ -43,3 +45,75 @@ def n_effective(n: int, source: str = "slater") -> Union[float, None]:
raise ValueError(
f"source '{source}' not found, available sources are: {', '.join(numbers.keys())}"
)


def render_rst_table(df: pd.DataFrame) -> str:
"""
Converts a pandas DataFrame to a reStructuredText table.
Args:
df (pd.DataFrame): The DataFrame to convert.
Returns:
str: The DataFrame as a reStructuredText table.
"""
# Get the column headers
headers = df.columns.tolist()

# Get the lengths of each column for formatting
col_lengths = [
max(len(str(val)) for val in df[col].tolist() + [col]) for col in headers
]

# Create the horizontal line for the table
hline = "+" + "+".join(["-" * (length + 2) for length in col_lengths]) + "+"
header_hline = "+" + "+".join(["=" * (length + 2) for length in col_lengths]) + "+"

# Format the header row
header_row = (
"|"
+ "|".join(
[
f" {headers[i]}{' ' * (col_lengths[i] - len(headers[i]))} "
for i in range(len(headers))
]
)
+ "|"
)

data_rows = []
for _, row in df.iterrows():
data_row = (
"|"
+ "|".join(
[
f" {str(row[col])}{' ' * (col_lengths[i] - len(str(row[col])))} "
for i, col in enumerate(headers)
]
)
+ "|"
)
data_rows.extend((data_row, hline))
return "\n".join([hline, header_row, header_hline] + data_rows)


def apply_rst_format(df: pd.DataFrame) -> pd.DataFrame:
"Prepare daraframe for printing by intorducing ReST specific formatting"

# convert the key to cite directive
df.loc[:, "citation_keys"] = ":cite:`" + df["citation_keys"] + "`"

# identify and add footnote_marks
mask = df["annotations"].notnull()
df.loc[mask, "footnote_mark"] = "[#f_" + df.loc[mask, "attribute_name"] + "]"
df.loc[mask, "description"] = (
df.loc[mask, "description"] + " (" + df.loc[mask, "footnote_mark"] + "_)"
)

# wrap attributes into code blocks
df.loc[:, "attribute_name"] = "``" + df["attribute_name"] + "``"
df.loc[:, "value_origin"] = df["value_origin"].str.lower()

# capitalize column names which will be table headers
df.columns = [c.replace("_", " ").capitalize() for c in df.columns]
return df.fillna("")
228 changes: 228 additions & 0 deletions notebooks/render-docs-from-metadata.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "dfc08dfa-d54f-47a9-9983-6259839b8313",
"metadata": {},
"outputs": [],
"source": [
"from mendeleev.models import ValueOrigin, PropertyMetadata\n",
"from mendeleev.db import get_session, get_engine\n",
"from mendeleev.fetch import fetch_table\n",
"from sqlalchemy import select, distinct"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bfcf8a73-ecfe-454b-8cd8-ef5690463ca3",
"metadata": {},
"outputs": [],
"source": [
"from mendeleev.utils import render_rst_table, apply_rst_format"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d188e6dd-f67d-473b-9bb2-22897794019b",
"metadata": {},
"outputs": [],
"source": [
"def render_doc_table(class_name: str) -> str:\n",
" \"\"\"\n",
" Fetch attributes for a specific class and render a table for documentation.\n",
" \"\"\"\n",
" df = fetch_table(\"propertymetadata\")\n",
" df = df.loc[df[\"class_name\"] == class_name]\n",
" df = apply_rst_format(df)\n",
" \n",
" cols = [\n",
" 'Attribute name',\n",
" 'Description',\n",
" 'Unit',\n",
" 'Value origin',\n",
" 'Citation keys'\n",
" ]\n",
" # display version of the column names\n",
" return render_rst_table(df[cols].sort_values(\"Attribute name\"))"
]
},
{
"cell_type": "markdown",
"id": "923addd1-f2c6-4cfd-9d6f-6dd2727b292c",
"metadata": {},
"source": [
"## Elements"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "140f15ed-bbab-4376-9374-17871698c87f",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"table = render_doc_table(\"Element\")\n",
"print(table)"
]
},
{
"cell_type": "markdown",
"id": "8c68e88e-e30c-440d-b8ce-981273fe6e7c",
"metadata": {},
"source": [
"## IonicRadius"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d26ad06-af78-499a-abf6-32e4eff18ef3",
"metadata": {},
"outputs": [],
"source": [
"table = render_doc_table(\"IonicRadius\")\n",
"print(table)"
]
},
{
"cell_type": "markdown",
"id": "230f1375-8476-4644-b41f-1deed0a29dc4",
"metadata": {},
"source": [
"## Isotopes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b605fec2-d336-4662-9819-837cc8a766f3",
"metadata": {},
"outputs": [],
"source": [
"table = render_doc_table(\"Isotope\")\n",
"print(table)"
]
},
{
"cell_type": "markdown",
"id": "ad2138e2-4439-4348-81b5-fa00919ca05c",
"metadata": {},
"source": [
"## Isotope Decay Modes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b529265-14d6-4306-b6d7-8c7f32a4f6df",
"metadata": {},
"outputs": [],
"source": [
"table = render_doc_table(\"IsotopeDecayMode\")\n",
"print(table)"
]
},
{
"cell_type": "markdown",
"id": "4fbe1d2c-26c2-4c27-94c6-4ae6b66c4685",
"metadata": {},
"source": [
"## Atomic Scattering Factors"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "81a79ab6-b0bb-474f-aba7-a7dae85b9ced",
"metadata": {},
"outputs": [],
"source": [
"table = render_doc_table(\"ScatteringFactor\")\n",
"print(table)"
]
},
{
"cell_type": "markdown",
"id": "2c866a3a-5800-4826-920e-c6cbcc8d504f",
"metadata": {},
"source": [
"## Ionization Energies"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11f567eb-271b-42bc-af49-2d524ab6ec5c",
"metadata": {},
"outputs": [],
"source": [
"table = render_doc_table(\"IonizationEnergy\")\n",
"print(table)"
]
},
{
"cell_type": "markdown",
"id": "fcc7db46-8ef9-482f-8202-9f2e1138de97",
"metadata": {},
"source": [
"## Screening Constants"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ede28c4c-cb74-4670-8bfb-09b583785d42",
"metadata": {},
"outputs": [],
"source": [
"table = render_doc_table(\"ScreeningConstant\")\n",
"print(table)"
]
},
{
"cell_type": "markdown",
"id": "f50ff7ef-e18b-47cf-aa08-3e3c3343818f",
"metadata": {},
"source": [
"## Oxidation States"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8601497d-df35-4759-bf78-5228d965f733",
"metadata": {},
"outputs": [],
"source": [
"table = render_doc_table(\"OxidationState\")\n",
"print(table)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 3f97bae

Please sign in to comment.