Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for file stream #28

Merged
merged 1 commit into from
Apr 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pdf2john"
version = "0.1.10"
version = "0.2.0"
description = "A modern refactoring of the legacy pdf2john library"
authors = ["Benjamin Dornel <[email protected]>"]
license = "MIT"
Expand Down
23 changes: 18 additions & 5 deletions src/pdf2john/pdf2john.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import argparse
import logging
import sys
from io import BytesIO
from typing import Optional

from pyhanko.pdf_utils.misc import PdfReadError
from pyhanko.pdf_utils.reader import PdfFileReader
Expand Down Expand Up @@ -50,11 +52,22 @@ class PdfHashExtractor:
- `revision`: Revision of the standard security handler
"""

def __init__(self, file_name: str, strict: bool = False):
self.file_name = file_name

with open(file_name, "rb") as doc:
self.pdf = PdfFileReader(doc, strict=strict)
def __init__(
self,
file_name: Optional[str] = None,
file_bytes: Optional[str] = None,
strict: bool = False,
):
if not any([file_name, file_bytes]):
raise RuntimeError("Either file name or file stream must be passed")

if file_bytes:
stream = BytesIO(file_bytes)
else:
stream = open(file_name, "rb")

with stream:
self.pdf = PdfFileReader(stream, strict=strict)
self.encrypt_dict = self.pdf.encrypt_dict

if not self.encrypt_dict:
Expand Down
14 changes: 12 additions & 2 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,18 @@ def test_main_unencrypted(unencrypted_pdf_path, caplog):


def test_parse_unencrypted_should_not_return_encrypt_dict(unencrypted_pdf_path):
extractor = PdfHashExtractor(unencrypted_pdf_path)
assert not extractor.encrypt_dict
pdf = PdfHashExtractor(unencrypted_pdf_path)
assert not pdf.encrypt_dict


def test_can_read_from_byte_stream():
with open("tests/pdf/pypdf/r6-owner-password.pdf", "rb") as file:
file_bytes = file.read()
pdf = PdfHashExtractor(file_bytes=file_bytes)
assert pdf.algorithm == 5
assert pdf.length == 256
assert pdf.permissions == -4
assert pdf.revision == 6


def test_invalid_pdf():
Expand Down
Loading