Prepare 0.16.3. Add section for docx creation.

barseghyanartur · Jun 29, 2023 · 04dc099 · 04dc099
1 parent d55d0ca
commit 04dc099
Show file tree

Hide file tree

Showing 13 changed files with 174 additions and 401 deletions.
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -118,7 +118,7 @@
         "filename": "README.rst",
         "hashed_secret": "077d5a0e0f8bb517307a6e92a73b0a9aa959233c",
         "is_verified": true,
-        "line_number": 459
+        "line_number": 461
       }
     ],
     "examples/django_example/project/settings/base.py": [
@@ -176,5 +176,5 @@
       }
     ]
   },
-  "generated_at": "2023-06-29T22:03:52Z"
+  "generated_at": "2023-06-29T23:37:17Z"
 }
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -17,9 +17,11 @@ are used for versioning (schema follows below):
 
 0.16.3
 ------
-2023-06-29
+2023-06-30
 
-- Documentation improvements.
+- Documentation improvements. Added a dedicated section for creating PDF files.
+  Added a dedicated section for creating DOCX files.
+- Adding ``add_paragraph`` and ``add_page_break`` to ``DOCX`` contrib module.
 
 0.16.2
 ------

diff --git a/README.rst b/README.rst
@@ -33,7 +33,8 @@ faker-file
 .. _Read the Docs: http://faker-file.readthedocs.io/
 .. _Quick start: https://faker-file.readthedocs.io/en/latest/quick_start.html
 .. _Recipes: https://faker-file.readthedocs.io/en/latest/recipes.html
-.. _Creating PDFs: https://faker-file.readthedocs.io/en/latest/creating_pdfs.html
+.. _Creating PDF: https://faker-file.readthedocs.io/en/latest/creating_pdf.html
+.. _Creating DOCX: https://faker-file.readthedocs.io/en/latest/creating_docx.html
 .. _CLI: https://faker-file.readthedocs.io/en/latest/cli.html
 .. _Contributor guidelines: https://faker-file.readthedocs.io/en/latest/contributor_guidelines.html
 
@@ -121,7 +122,8 @@ Documentation
 - Documentation is available on `Read the Docs`_.
 - For bootstrapping check the `Quick start`_.
 - For various ready to use code examples see the `Recipes`_.
-- For tips on PDF creation see `Creating PDFs`_.
+- For tips on ``PDF`` creation see `Creating PDF`_.
+- For tips on ``DOCX`` creation see `Creating PDF`_.
 - For CLI options see the `CLI`_.
 - For guidelines on contributing check the `Contributor guidelines`_.
 

diff --git a/docs/creating_docx.rst b/docs/creating_docx.rst
@@ -0,0 +1,91 @@
+Creating DOCX
+=============
+
+Building DOCX
+-------------
+See the following full functional snippet for generating DOCX.
+
+.. code-block:: python
+
+    # Imports
+    from faker import Faker
+    from faker_file.providers.docx_file import DocxFileProvider
+
+    FAKER = Faker() # Initialize Faker
+    FAKER.add_provider(DocxFileProvider)  # Register DocxFileProvider
+
+    # Generate DOCX file
+    docx_file = FAKER.docx_file()
+
+The generated DOCX will have 10,000 characters of text, which is about 5 pages.
+
+If you want DOCX with more pages, you could either:
+
+- Increase the value of ``max_nb_chars`` accordingly.
+- Set value of ``wrap_chars_after`` to 80 characters to force longer pages.
+- Insert manual page breaks and other content.
+
+See the example below for ``max_nb_chars`` tweak:
+
+.. code-block:: python
+
+    # Generate DOCX file of 20,000 characters
+    docx_file = FAKER.docx_file(max_nb_chars=20_000)
+
+See the example below for ``wrap_chars_after`` tweak:
+
+.. code-block:: python
+
+    # Generate DOCX file, wrapping each line after 80 characters
+    docx_file = FAKER.docx_file(wrap_chars_after=80)
+
+As mentioned above, it's possible to diversify the generated context with
+images, paragraphs, tables, manual text break and pretty much everything that
+is supported by DOCX format specification, although currently only images,
+paragraphs, tables and manual text breaks are supported out of the box. In
+order to customise the blocks DOCX file is built from, the ``DynamicTemplate``
+class is used. See the example below for usage examples:
+
+.. code-block:: python
+
+    # Additional imports
+    from faker_file.base import DynamicTemplate
+    from faker_file.contrib.docx_file import (
+        add_page_break,
+        add_paragraph,
+        add_picture,
+        add_table,
+    )
+
+    # Create a DOCX file with paragraph, picture, table and manual page breaks
+    # in between the mentioned elements. The ``DynamicTemplate`` simply
+    # accepts a list of callables (such as ``add_paragraph``,
+    # ``add_page_break``) and dictionary to be later on fed to the callables
+    # as keyword arguments for customising the default values.
+    docx_file = FAKER.docx_file(
+        content=DynamicTemplate(
+            [
+                (add_paragraph, {}),  # Add paragraph
+                (add_page_break, {}),  # Add page break
+                (add_picture, {}),  # Add picture
+                (add_page_break, {}),  # Add page break
+                (add_table, {}),  # Add table
+                (add_page_break, {}),  # Add page break
+            ]
+        )
+    )
+
+    # You could make the list as long as you like or simply multiply for
+    # easier repetition as follows:
+    docx_file = FAKER.docx_file(
+        content=DynamicTemplate(
+            [
+                (add_paragraph, {}),  # Add paragraph
+                (add_page_break, {}),  # Add page break
+                (add_picture, {}),  # Add picture
+                (add_page_break, {}),  # Add page break
+                (add_table, {}),  # Add table
+                (add_page_break, {}),  # Add page break
+            ] * 100  # Will repeat your config 100 times
+        )
+    )
diff --git a/docs/creating_pdfs.rst b/docs/creating_pdfs.rst
@@ -1,29 +1,29 @@
-Creating PDFs
-=============
+Creating PDF
+============
 .. External references
 
 .. _wkhtmltopdf: https://wkhtmltopdf.org/
 .. _pdfkit: https://pypi.org/project/pdfkit/
 .. _reportlab: https://pypi.org/project/reportlab/
 
-PDF is certainly one of the most complicated formats out there. And certainly
-one of the formats most of the developers will be having trouble with, as
-there are many versions and dialects. That makes it almost challenging to
-create one way of making PDFs. That's why, creation of PDF files have been
-delegated to flexible abstraction layer - PDF generators. If you don't like
-how PDFs are generated, you can create your own layer, using your favourite
-library.
+PDF is certainly one of the most complicated formats out there. And
+certainly one of the formats most of the developers will be having trouble
+with, as there are many versions and dialects. That makes it almost challenging
+to create one way of making PDF files. That's why, creation of PDF
+files have been delegated to flexible abstraction layer - PDF generators.
+If you don't like how PDF files are generated, you can create your own
+layer, using your favourite library.
 
 Currently, there are two PDF generators:
 
 - ``PdfkitPdfGenerator`` (default), built on top of the `pdfkit`_
   and `wkhtmltopdf`_.
 - ``ReportlabPdfGenerator``, build on top of the famous `reportlab`_.
 
-Building PDFs using `pdfkit`_
------------------------------
+Building PDF using `pdfkit`_
+----------------------------
 While `pdfkit`_ generator is a heavier and has `wkhtmltopdf`_ as a system
-dependency, it's produces better quality PDFs and has no issues with fonts
+dependency, it's produces better quality PDF and has no issues with fonts
 or unicode characters.
 
 See the following full functional snippet for generating PDF using `pdfkit`_.
@@ -64,7 +64,7 @@ See the example below for ``wrap_chars_after`` tweak:
 
 .. code-block:: python
 
-    # Generate PDF file of 20,000 characters
+    # Generate PDF file, wrapping each line after 80 characters
     pdf_file = FAKER.pdf_file(
         pdf_generator_cls=PdfkitPdfGenerator, wrap_chars_after=80
     )
@@ -125,8 +125,8 @@ class is used. See the example below for usage examples:
 Building PDFs using `reportlab`_
 --------------------------------
 While `reportlab`_ generator is much lighter than the `pdfkit`_ and does not
-have system dependencies, but might produce PDS with questionable encoding
-when generating unicode text.
+have system dependencies, but might produce PDF files with questionable
+encoding when generating unicode text.
 
 See the following full functional snippet for generating PDF using `reportlab`_.
 
@@ -143,8 +143,8 @@ See the following full functional snippet for generating PDF using `reportlab`_.
     pdf_file = FAKER.pdf_file(pdf_generator_cls=ReportlabPdfGenerator)
 
 All examples shown for `pdfkit`_ apply for `reportlab`_ generator, however
-when building PDFs from blocks (paragraphs, images, tables and page breaks),
-the imports shall be adjusted:
+when building PDF files from blocks (paragraphs, images, tables and page
+breaks), the imports shall be adjusted:
 
 As mentioned above, it's possible to diversify the generated context with
 images, paragraphs, tables, manual text break and pretty much everything that

diff --git a/docs/documentation.rst b/docs/documentation.rst
@@ -12,6 +12,7 @@ Contents:
    quick_start
    recipes
    creating_pdfs
+   creating_docx
    cli
    security
    code_of_conduct

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import find_packages, setup
 
-version = "0.16.2"
+version = "0.16.3"
 
 try:
     readme = open(os.path.join(os.path.dirname(__file__), "README.rst")).read()

diff --git a/src/faker_file/__init__.py b/src/faker_file/__init__.py
@@ -1,5 +1,5 @@
 __title__ = "faker_file"
-__version__ = "0.16.2"
+__version__ = "0.16.3"
 __author__ = "Artur Barseghyan <[email protected]>"
 __copyright__ = "2022-2023 Artur Barseghyan"
 __license__ = "MIT"
diff --git a/src/faker_file/contrib/docx_file.py b/src/faker_file/contrib/docx_file.py
@@ -1,9 +1,13 @@
 from io import BytesIO
 
+from ..base import DEFAULT_FORMAT_FUNC
+
 __author__ = "Artur Barseghyan <[email protected]>"
 __copyright__ = "2022-2023 Artur Barseghyan"
 __license__ = "MIT"
 __all__ = (
+    "add_page_break",
+    "add_paragraph",
     "add_picture",
     "add_table",
 )
@@ -47,3 +51,32 @@ def add_picture(provider, document, data, counter, **kwargs):
     #     jpeg_file.data["content"]
     # )
     # data["content"] += "\r\n" + jpeg_file.data["content"]
+
+
+def add_page_break(provider, document, data, counter, **kwargs):
+    """Callable responsible for page break generation."""
+    # Insert a page break
+    document.add_page_break()
+
+
+def add_paragraph(provider, document, data, counter, **kwargs):
+    """Callable responsible for the paragraph generation."""
+    content = kwargs.get("content", None)
+    max_nb_chars = kwargs.get("content", 5_000)
+    wrap_chars_after = kwargs.get("wrap_chars_after", None)
+    format_func = kwargs.get("format_func", DEFAULT_FORMAT_FUNC)
+
+    _content = provider._generate_text_content(
+        max_nb_chars=max_nb_chars,
+        wrap_chars_after=wrap_chars_after,
+        content=content,
+        format_func=format_func,
+    )
+    document.add_paragraph(_content)
+
+    # Meta-data
+    data.setdefault("content_modifiers", {})
+    data["content_modifiers"].setdefault("add_paragraph", {})
+    data["content_modifiers"]["add_paragraph"].setdefault(counter, [])
+    data["content_modifiers"]["add_paragraph"][counter].append(_content)
+    data["content"] += "\r\n" + _content
diff --git a/src/faker_file/contrib/pdf_file/pdfkit_snippets.py b/src/faker_file/contrib/pdf_file/pdfkit_snippets.py
@@ -96,15 +96,12 @@ def add_paragraph(
     wrap_chars_after = kwargs.get("wrap_chars_after", None)
     format_func = kwargs.get("format_func", DEFAULT_FORMAT_FUNC)
 
-    if content:
-        _content = provider._generate_text_content(
-            max_nb_chars=max_nb_chars,
-            wrap_chars_after=wrap_chars_after,
-            content=content,
-            format_func=format_func,
-        )
-    else:
-        _content = provider.generator.text(max_nb_chars=5_000)
+    _content = provider._generate_text_content(
+        max_nb_chars=max_nb_chars,
+        wrap_chars_after=wrap_chars_after,
+        content=content,
+        format_func=format_func,
+    )
 
     paragraph_html = f"<div><p>{_content}</p></div>"
     document += "\r\n" + paragraph_html

diff --git a/src/faker_file/contrib/pdf_file/reportlab_snippets.py b/src/faker_file/contrib/pdf_file/reportlab_snippets.py
@@ -129,15 +129,12 @@ def add_paragraph(
     wrap_chars_after = kwargs.get("wrap_chars_after", None)
     format_func = kwargs.get("format_func", DEFAULT_FORMAT_FUNC)
 
-    if content:
-        _content = provider._generate_text_content(
-            max_nb_chars=max_nb_chars,
-            wrap_chars_after=wrap_chars_after,
-            content=content,
-            format_func=format_func,
-        )
-    else:
-        _content = provider.generator.text(max_nb_chars=5_000)
+    _content = provider._generate_text_content(
+        max_nb_chars=max_nb_chars,
+        wrap_chars_after=wrap_chars_after,
+        content=content,
+        format_func=format_func,
+    )
 
     # Insert a paragraph
     styles = getSampleStyleSheet()