-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into chore/docs-and-types-cleanup
- Loading branch information
Showing
11 changed files
with
116 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# Application | ||
*.json | ||
!examples/**/*.json | ||
!.vscode/settings.json | ||
!jsf/tests/data/*.json | ||
TODO.md | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
python_sources(interpreter_constraints=["CPython>=3.10,<4"]) | ||
|
||
python_requirements( | ||
name="reqs", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Flat file example | ||
|
||
## Dependencies | ||
|
||
* Typer | ||
* pandas | ||
* openpyxl | ||
* PyArrow | ||
* jsonlines | ||
|
||
## Usage | ||
|
||
Using `main.py` in current example folder | ||
|
||
```bash | ||
python main.py --schema schema.json --records 5 --output-format parquet --output output.parquet | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import json | ||
from enum import Enum | ||
from pathlib import Path | ||
|
||
import jsonlines | ||
import pandas as pd | ||
import typer | ||
from typing_extensions import Annotated | ||
|
||
from jsf import JSF | ||
|
||
|
||
class OutputFormat(str, Enum): | ||
csv = "csv" | ||
excel = "excel" | ||
parquet = "parquet" | ||
json = "json" | ||
jsonl = "jsonl" | ||
|
||
|
||
def main( | ||
schema: Annotated[ | ||
Path, | ||
typer.Option( | ||
exists=True, | ||
file_okay=True, | ||
dir_okay=False, | ||
writable=False, | ||
readable=True, | ||
resolve_path=True, | ||
help="Path to the JSON schema used to produce the fake data.", | ||
), | ||
], | ||
records: Annotated[int, typer.Option(min=0, help="Number of records you wish to produce.")], | ||
output_format: Annotated[OutputFormat, typer.Option(help="Fake data output format.")], | ||
output: Annotated[Path, typer.Option(help="Output file path")], | ||
): | ||
faker = JSF.from_json(schema) | ||
fake_data = faker.generate(records) | ||
match output_format: | ||
case OutputFormat.csv: | ||
pd.DataFrame.from_records(fake_data).to_csv(output, index=False) | ||
case OutputFormat.excel: | ||
more_fake_data = faker.generate(records) | ||
custom_header = [ | ||
v.get("title") or k for k, v in faker.root_schema["properties"].items() | ||
] | ||
with pd.ExcelWriter(output) as excel_writer: | ||
pd.DataFrame.from_records(fake_data).to_excel( | ||
excel_writer, sheet_name="Fake Data", index=False, header=custom_header | ||
) | ||
pd.DataFrame.from_records(more_fake_data).to_excel( | ||
excel_writer, sheet_name="More Fake Data", index=False, header=custom_header | ||
) | ||
case OutputFormat.json: | ||
with open(output, "w") as f: | ||
json.dump(fake_data, f) | ||
case OutputFormat.jsonl: | ||
with jsonlines.open(output, mode="w") as writer: | ||
writer.write_all(fake_data) | ||
case OutputFormat.parquet: | ||
pd.DataFrame.from_records(fake_data).to_parquet(output, index=False) | ||
case _: | ||
raise NotImplementedError("Unable to produce in this file format yet") | ||
|
||
|
||
if __name__ == "__main__": | ||
typer.run(main) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
name,email | ||
Jessica Bennett,[email protected] | ||
Christine Sanchez,[email protected] | ||
Emily Ayala,[email protected] | ||
Julia Dickerson,[email protected] | ||
Justin Miller,[email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[{"name": "Melissa Jackson", "email": "[email protected]"}, {"name": "Miss Tina Morales", "email": "[email protected]"}, {"name": "Jake Vazquez", "email": "[email protected]"}, {"name": "Terri Taylor", "email": "[email protected]"}, {"name": "Kayla Williams", "email": "[email protected]"}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{"name": "Judith Lara", "email": "[email protected]"} | ||
{"name": "Joseph Warren", "email": "[email protected]"} | ||
{"name": "Marilyn Thompson", "email": "[email protected]"} | ||
{"name": "Teresa Brown", "email": "[email protected]"} | ||
{"name": "Derek Singleton", "email": "[email protected]"} |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
jsf | ||
pandas | ||
openpyxl | ||
PyArrow | ||
jsonlines |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"type": "object", | ||
"properties": { | ||
"name": {"type": "string", "$provider": "faker.name", "title": "Full Name"}, | ||
"email": {"type": "string", "$provider": "faker.email", "title": "Email"} | ||
}, | ||
"required": ["name", "email"] | ||
} |