Merge branch 'main' into chore/docs-and-types-cleanup

ghandic · Jan 30, 2024 · 3d369b6 · 3d369b6
2 parents d0acb05 + 0598f0e
commit 3d369b6
Show file tree

Hide file tree

Showing 11 changed files with 116 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # Application
 *.json
+!examples/**/*.json
 !.vscode/settings.json
 !jsf/tests/data/*.json
 TODO.md

diff --git a/examples/flatfile/BUILD b/examples/flatfile/BUILD
@@ -0,0 +1,5 @@
+python_sources(interpreter_constraints=["CPython>=3.10,<4"])
+
+python_requirements(
+    name="reqs",
+)
diff --git a/examples/flatfile/README.md b/examples/flatfile/README.md
@@ -0,0 +1,17 @@
+# Flat file example
+
+## Dependencies
+
+* Typer
+* pandas
+* openpyxl
+* PyArrow
+* jsonlines
+
+## Usage
+
+Using `main.py` in current example folder
+
+```bash
+python main.py --schema schema.json --records 5 --output-format parquet --output output.parquet
+```
diff --git a/examples/flatfile/main.py b/examples/flatfile/main.py
@@ -0,0 +1,68 @@
+import json
+from enum import Enum
+from pathlib import Path
+
+import jsonlines
+import pandas as pd
+import typer
+from typing_extensions import Annotated
+
+from jsf import JSF
+
+
+class OutputFormat(str, Enum):
+    csv = "csv"
+    excel = "excel"
+    parquet = "parquet"
+    json = "json"
+    jsonl = "jsonl"
+
+
+def main(
+    schema: Annotated[
+        Path,
+        typer.Option(
+            exists=True,
+            file_okay=True,
+            dir_okay=False,
+            writable=False,
+            readable=True,
+            resolve_path=True,
+            help="Path to the JSON schema used to produce the fake data.",
+        ),
+    ],
+    records: Annotated[int, typer.Option(min=0, help="Number of records you wish to produce.")],
+    output_format: Annotated[OutputFormat, typer.Option(help="Fake data output format.")],
+    output: Annotated[Path, typer.Option(help="Output file path")],
+):
+    faker = JSF.from_json(schema)
+    fake_data = faker.generate(records)
+    match output_format:
+        case OutputFormat.csv:
+            pd.DataFrame.from_records(fake_data).to_csv(output, index=False)
+        case OutputFormat.excel:
+            more_fake_data = faker.generate(records)
+            custom_header = [
+                v.get("title") or k for k, v in faker.root_schema["properties"].items()
+            ]
+            with pd.ExcelWriter(output) as excel_writer:
+                pd.DataFrame.from_records(fake_data).to_excel(
+                    excel_writer, sheet_name="Fake Data", index=False, header=custom_header
+                )
+                pd.DataFrame.from_records(more_fake_data).to_excel(
+                    excel_writer, sheet_name="More Fake Data", index=False, header=custom_header
+                )
+        case OutputFormat.json:
+            with open(output, "w") as f:
+                json.dump(fake_data, f)
+        case OutputFormat.jsonl:
+            with jsonlines.open(output, mode="w") as writer:
+                writer.write_all(fake_data)
+        case OutputFormat.parquet:
+            pd.DataFrame.from_records(fake_data).to_parquet(output, index=False)
+        case _:
+            raise NotImplementedError("Unable to produce in this file format yet")
+
+
+if __name__ == "__main__":
+    typer.run(main)
diff --git a/examples/flatfile/output.csv b/examples/flatfile/output.csv
@@ -0,0 +1,6 @@
+name,email
+Jessica Bennett,[email protected]
+Christine Sanchez,[email protected]
+Emily Ayala,[email protected]
+Julia Dickerson,[email protected]
+Justin Miller,[email protected]
diff --git a/examples/flatfile/output.json b/examples/flatfile/output.json
@@ -0,0 +1 @@
+[{"name": "Melissa Jackson", "email": "[email protected]"}, {"name": "Miss Tina Morales", "email": "[email protected]"}, {"name": "Jake Vazquez", "email": "[email protected]"}, {"name": "Terri Taylor", "email": "[email protected]"}, {"name": "Kayla Williams", "email": "[email protected]"}]
diff --git a/examples/flatfile/output.jsonl b/examples/flatfile/output.jsonl
@@ -0,0 +1,5 @@
+{"name": "Judith Lara", "email": "[email protected]"}
+{"name": "Joseph Warren", "email": "[email protected]"}
+{"name": "Marilyn Thompson", "email": "[email protected]"}
+{"name": "Teresa Brown", "email": "[email protected]"}
+{"name": "Derek Singleton", "email": "[email protected]"}
diff --git a/examples/flatfile/output.parquet b/examples/flatfile/output.parquet
diff --git a/examples/flatfile/output.xlsx b/examples/flatfile/output.xlsx
diff --git a/examples/flatfile/requirements.txt b/examples/flatfile/requirements.txt
@@ -0,0 +1,5 @@
+jsf
+pandas
+openpyxl
+PyArrow
+jsonlines
diff --git a/examples/flatfile/schema.json b/examples/flatfile/schema.json
@@ -0,0 +1,8 @@
+{
+    "type": "object",
+    "properties": {
+        "name": {"type": "string", "$provider": "faker.name", "title": "Full Name"},
+        "email": {"type": "string", "$provider": "faker.email", "title": "Email"}
+    },
+    "required": ["name", "email"]
+}