Skip to content

Commit

Permalink
Merge branch 'main' into chore/docs-and-types-cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ghandic authored Jan 30, 2024
2 parents d0acb05 + 0598f0e commit 3d369b6
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Application
*.json
!examples/**/*.json
!.vscode/settings.json
!jsf/tests/data/*.json
TODO.md
Expand Down
5 changes: 5 additions & 0 deletions examples/flatfile/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
python_sources(interpreter_constraints=["CPython>=3.10,<4"])

python_requirements(
name="reqs",
)
17 changes: 17 additions & 0 deletions examples/flatfile/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Flat file example

## Dependencies

* Typer
* pandas
* openpyxl
* PyArrow
* jsonlines

## Usage

Using `main.py` in current example folder

```bash
python main.py --schema schema.json --records 5 --output-format parquet --output output.parquet
```
68 changes: 68 additions & 0 deletions examples/flatfile/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import json
from enum import Enum
from pathlib import Path

import jsonlines
import pandas as pd
import typer
from typing_extensions import Annotated

from jsf import JSF


class OutputFormat(str, Enum):
csv = "csv"
excel = "excel"
parquet = "parquet"
json = "json"
jsonl = "jsonl"


def main(
schema: Annotated[
Path,
typer.Option(
exists=True,
file_okay=True,
dir_okay=False,
writable=False,
readable=True,
resolve_path=True,
help="Path to the JSON schema used to produce the fake data.",
),
],
records: Annotated[int, typer.Option(min=0, help="Number of records you wish to produce.")],
output_format: Annotated[OutputFormat, typer.Option(help="Fake data output format.")],
output: Annotated[Path, typer.Option(help="Output file path")],
):
faker = JSF.from_json(schema)
fake_data = faker.generate(records)
match output_format:
case OutputFormat.csv:
pd.DataFrame.from_records(fake_data).to_csv(output, index=False)
case OutputFormat.excel:
more_fake_data = faker.generate(records)
custom_header = [
v.get("title") or k for k, v in faker.root_schema["properties"].items()
]
with pd.ExcelWriter(output) as excel_writer:
pd.DataFrame.from_records(fake_data).to_excel(
excel_writer, sheet_name="Fake Data", index=False, header=custom_header
)
pd.DataFrame.from_records(more_fake_data).to_excel(
excel_writer, sheet_name="More Fake Data", index=False, header=custom_header
)
case OutputFormat.json:
with open(output, "w") as f:
json.dump(fake_data, f)
case OutputFormat.jsonl:
with jsonlines.open(output, mode="w") as writer:
writer.write_all(fake_data)
case OutputFormat.parquet:
pd.DataFrame.from_records(fake_data).to_parquet(output, index=False)
case _:
raise NotImplementedError("Unable to produce in this file format yet")


if __name__ == "__main__":
typer.run(main)
6 changes: 6 additions & 0 deletions examples/flatfile/output.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name,email
Jessica Bennett,[email protected]
Christine Sanchez,[email protected]
Emily Ayala,[email protected]
Julia Dickerson,[email protected]
Justin Miller,[email protected]
1 change: 1 addition & 0 deletions examples/flatfile/output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"name": "Melissa Jackson", "email": "[email protected]"}, {"name": "Miss Tina Morales", "email": "[email protected]"}, {"name": "Jake Vazquez", "email": "[email protected]"}, {"name": "Terri Taylor", "email": "[email protected]"}, {"name": "Kayla Williams", "email": "[email protected]"}]
5 changes: 5 additions & 0 deletions examples/flatfile/output.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"name": "Judith Lara", "email": "[email protected]"}
{"name": "Joseph Warren", "email": "[email protected]"}
{"name": "Marilyn Thompson", "email": "[email protected]"}
{"name": "Teresa Brown", "email": "[email protected]"}
{"name": "Derek Singleton", "email": "[email protected]"}
Binary file added examples/flatfile/output.parquet
Binary file not shown.
Binary file added examples/flatfile/output.xlsx
Binary file not shown.
5 changes: 5 additions & 0 deletions examples/flatfile/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
jsf
pandas
openpyxl
PyArrow
jsonlines
8 changes: 8 additions & 0 deletions examples/flatfile/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"type": "object",
"properties": {
"name": {"type": "string", "$provider": "faker.name", "title": "Full Name"},
"email": {"type": "string", "$provider": "faker.email", "title": "Email"}
},
"required": ["name", "email"]
}

0 comments on commit 3d369b6

Please sign in to comment.