Skip to content

Commit

Permalink
Always remove PDF metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
MWedl committed Nov 13, 2024
1 parent 6f27c7a commit 7810dd0
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
* Allow to cancel PDF rendering requests
* Enforce PDF rendering timeout in self-hosted installations (default: 5 min)
* Show PDF render timing information
* Always remove PDF metadata
* Add button to download preview PDF
* Fix error while updating user fields via REST API
* Update HTB designs to improve table rendering performance
* Add button to download preview PDF


## v2024.81 - 2024-10-25
Expand Down
14 changes: 13 additions & 1 deletion api/src/reportcreator_api/tasks/rendering/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,17 @@ def decode_data(stdout):
)


@sync_to_async()
def remove_metadata(pdf_data: bytes) -> RenderStageResult:
with Pdf.open(BytesIO(pdf_data)) as pdf:
delattr(pdf.Root, 'Metadata')
delattr(pdf, 'docinfo')

out_data = BytesIO()
pdf.save(filename_or_stream=out_data)
return out_data.getvalue()


@log_timing(log_start=True)
async def compress_pdf(pdf_data: bytes) -> RenderStageResult:
out = RenderStageResult()
Expand All @@ -90,7 +101,7 @@ async def compress_pdf(pdf_data: bytes) -> RenderStageResult:
raise Exception(f'Ghostscript failed with exit code {proc.returncode}')

pdfout.seek(0)
out.pdf = pdfout.read()
out.pdf = await remove_metadata(pdfout.read())
except Exception:
logging.exception('Error while compressing PDF (ghostscript)')
out.pdf = pdf_data
Expand Down Expand Up @@ -164,5 +175,6 @@ async def render_pdf_impl(
pdf_data=out.pdf,
password=password,
)

return out

Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def weasyprint_capture_logs(msg, *args, **kwargs):
))

def weasyprint_strip_pdf_metadata(doc, pdf):
# remove Producer meta-data info from PDF
del pdf.info['Producer']
# remove PDF metadata
pdf.info.clear()

# Capture weasyprint logs and provide as messages
with mock.patch.object(WEASYPRINT_LOGGER, 'error', new=weasyprint_capture_logs, spec=True), \
Expand Down

0 comments on commit 7810dd0

Please sign in to comment.