diff --git a/.github/workflows/ci-erd-diagrams.yml b/.github/workflows/ci-erd-diagrams.yml new file mode 100644 index 000000000..f7b8bb5ba --- /dev/null +++ b/.github/workflows/ci-erd-diagrams.yml @@ -0,0 +1,43 @@ +# Update database ERD diagrams so that they remain up to date with the application +name: Update Database ERD Diagrams + +on: + pull_request: + paths: + - api/src/db/models/** + - api/bin/create_erds.py + - Makefile + - .github/workflows/ci-erd-diagrams.yml + +defaults: + run: + working-directory: ./api + +# Only trigger one update of the ERD diagrams at a time on the branch. +# If new commits are pushed to the branch, cancel in progress runs and start +# a new one. +concurrency: + group: ${{ github.head_ref }} + cancel-in-progress: true + + +jobs: + update-openapi-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Checkout the feature branch associated with the pull request + ref: ${{ github.head_ref }} + + - name: Update OpenAPI spec + run: make create-erds + + - name: Push changes + run: | + git config user.name nava-platform-bot + git config user.email platform-admins@navapbc.com + git add --all + # Commit changes (if no changes then no-op) + git diff-index --quiet HEAD || git commit -m "Update database ERD diagrams" + git push diff --git a/api/Dockerfile b/api/Dockerfile index 865ea8e0a..513944933 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -41,6 +41,13 @@ RUN : "${RUN_USER:?RUN_USER and RUN_UID need to be set and non-empty.}" && \ FROM base AS dev ARG RUN_USER + +# In between ARG RUN_USER and USER ${RUN_USER}, the user is still root +# If there is anything that needs to be ran as root, this is the spot + +# Install graphviz which is used to generate ERD diagrams +RUN apt-get update && apt-get install --no-install-recommends --yes graphviz + USER ${RUN_USER} WORKDIR /api diff --git a/api/Makefile b/api/Makefile index 00abc6ecd..0a83cf894 100644 --- a/api/Makefile +++ b/api/Makefile @@ -164,6 +164,10 @@ db-migrate-heads: ## Show migrations marked as a head db-seed-local: $(PY_RUN_CMD) db-seed-local +create-erds: # Create ERD diagrams for our DB schema + $(PY_RUN_CMD) create-erds + mv bin/*.png ../documentation/api/database/erds + ################################################## # Testing ################################################## @@ -190,22 +194,22 @@ test-coverage-report: ## Open HTML test coverage report ################################################## format: ## Format files - $(PY_RUN_CMD) isort --atomic src tests - $(PY_RUN_CMD) black src tests + $(PY_RUN_CMD) isort --atomic src tests bin + $(PY_RUN_CMD) black src tests bin format-check: ## Check file formatting - $(PY_RUN_CMD) isort --atomic --check-only src tests - $(PY_RUN_CMD) black --check src tests + $(PY_RUN_CMD) isort --atomic --check-only src tests bin + $(PY_RUN_CMD) black --check src tests bin lint: lint-py ## Lint lint-py: lint-flake lint-mypy lint-flake: - $(PY_RUN_CMD) flake8 --format=$(FLAKE8_FORMAT) src tests + $(PY_RUN_CMD) flake8 --format=$(FLAKE8_FORMAT) src tests bin lint-mypy: - $(PY_RUN_CMD) mypy --show-error-codes $(MYPY_FLAGS) src $(MYPY_POSTPROC) + $(PY_RUN_CMD) mypy --show-error-codes $(MYPY_FLAGS) src bin $(MYPY_POSTPROC) lint-security: # https://bandit.readthedocs.io/en/latest/index.html $(PY_RUN_CMD) bandit -c pyproject.toml -r . --number 3 --skip B101 -ll -x ./.venv diff --git a/api/bin/__init__.py b/api/bin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/bin/create_erds.py b/api/bin/create_erds.py new file mode 100755 index 000000000..f292201e0 --- /dev/null +++ b/api/bin/create_erds.py @@ -0,0 +1,59 @@ +# Generate database schema diagrams from our SQLAlchemy models +import codecs +import logging +import os +import pathlib +from typing import Any + +import pydot +import sadisplay + +import src.logging +from src.db.models import opportunity_models + +logger = logging.getLogger(__name__) + +# Construct the path to the folder this file is within +# This gets an absolute path so that where you run the script from won't matter +# and should always resolve to the app/erds folder +ERD_FOLDER = pathlib.Path(__file__).parent.resolve() + +# If we want to generate separate files for more specific groups, we can set that up here +ALL_MODULES = [opportunity_models] + + +def create_erds(modules: Any, file_name: str) -> None: + logger.info("Generating ERD diagrams for %s", file_name) + + items = [] + for module in modules: + items.extend([getattr(module, attr) for attr in dir(module)]) + + description = sadisplay.describe( + items, + show_methods=True, + show_properties=True, + show_indexes=True, + ) + + dot_file_name = ERD_FOLDER / f"{file_name}.dot" + + # We create a temporary .dot file which we then convert to a png + with codecs.open(str(dot_file_name), "w", encoding="utf8") as f: + f.write(sadisplay.dot(description)) + + (graph,) = pydot.graph_from_dot_file(dot_file_name) + + png_file_path = ERD_FOLDER / f"{file_name}.png" + logger.info("Creating ERD diagram at %s", png_file_path) + graph.write_png(png_file_path) + + # remove the temporary .dot file + os.remove(dot_file_name) + + +def main() -> None: + with src.logging.init(__package__): + logger.info("Generating ERD diagrams") + + create_erds(ALL_MODULES, "full-schema") diff --git a/api/poetry.lock b/api/poetry.lock index 080b18eb0..6fb1639d9 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -1511,6 +1511,20 @@ files = [ pydantic = ">=2.0.1" python-dotenv = ">=0.21.0" +[[package]] +name = "pydot" +version = "1.4.2" +description = "Python interface to Graphviz's Dot" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pydot-1.4.2-py2.py3-none-any.whl", hash = "sha256:66c98190c65b8d2e2382a441b4c0edfdb4f4c025ef9cb9874de478fb0793a451"}, + {file = "pydot-1.4.2.tar.gz", hash = "sha256:248081a39bcb56784deb018977e428605c1c758f10897a339fce1dd728ff007d"}, +] + +[package.dependencies] +pyparsing = ">=2.1.4" + [[package]] name = "pyflakes" version = "3.1.0" @@ -1536,6 +1550,20 @@ files = [ [package.extras] plugins = ["importlib-metadata"] +[[package]] +name = "pyparsing" +version = "3.1.1" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, + {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + [[package]] name = "pytest" version = "7.4.2" @@ -1752,6 +1780,20 @@ botocore = ">=1.12.36,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +[[package]] +name = "sadisplay" +version = "0.4.9" +description = "SqlAlchemy schema display script" +optional = false +python-versions = "*" +files = [ + {file = "sadisplay-0.4.9-py2.py3-none-any.whl", hash = "sha256:bf456f582b8f5da19fedef7a9afe969b49231d79724710bc7d35c9439f44c2fc"}, + {file = "sadisplay-0.4.9.tar.gz", hash = "sha256:af67160f89123886ab42b247262862bfcde0a3c236229ecdd59de0a1e8e35d96"}, +] + +[package.dependencies] +SQLAlchemy = ">=0.5" + [[package]] name = "setuptools" version = "68.2.2" @@ -2109,4 +2151,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "a3440ea0134e772b0a01f888be1b12e3e881fdc34577dd22a0eb9743e6f8cccb" +content-hash = "1725ab8db18eae6b340e1f4835868d7a866c93b8f5dd67882e7d5a0ee7b0d3ae" diff --git a/api/pyproject.toml b/api/pyproject.toml index c9d93f285..ef4ca36ac 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -40,6 +40,8 @@ pytest-watch = "^4.2.0" pytest-lazy-fixture = "^0.6.3" types-pyyaml = "^6.0.12.11" setuptools = "^68.2.2" +pydot = "1.4.2" +sadisplay = "0.4.9" [build-system] requires = ["poetry-core>=1.0.0"] @@ -50,6 +52,7 @@ db-migrate = "src.db.migrations.run:up" db-migrate-down = "src.db.migrations.run:down" db-migrate-down-all = "src.db.migrations.run:downall" db-seed-local = "tests.lib.seed_local_db:seed_local_db" +create-erds = "bin.create_erds:main" [tool.black] line-length = 100 diff --git a/documentation/api/database/erds/README.md b/documentation/api/database/erds/README.md new file mode 100644 index 000000000..8e5f0c3f1 --- /dev/null +++ b/documentation/api/database/erds/README.md @@ -0,0 +1,17 @@ +# Overview +This folder contains ERD diagrams representing our database schema for both our postgres DB + +Diagrams can be manually generated by running `make create-erds` from the api folder. + +# Dependencies +If running outside of Docker, you must install `graphviz` (`brew install graphviz`) for this to work, this should be automatically installed as part of the Dockerfile inside Docker. + +# Caveats +The diagrams generated are based on our SQLAlchemy models, and not the database itself, so there are a few differences. + +* Fields that we name different in-code will have a different name +* The table names use the class name +* Property fields are SQLAlchemy only and generally represent relationships (ie. values fetched via a foreign key `join`) + +# Files +![Postgres ERD](full-schema.png) \ No newline at end of file diff --git a/documentation/api/database/erds/full-schema.png b/documentation/api/database/erds/full-schema.png new file mode 100644 index 000000000..02528cd69 Binary files /dev/null and b/documentation/api/database/erds/full-schema.png differ