Skip to content

Commit

Permalink
feat: add debug option to cli
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Oct 23, 2023
1 parent bb6ef7c commit f1898ac
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 18 deletions.
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ Run pdf2john.py with a PDF of choice
python3 pdf2john.py tests/pdf/pypdf/r6-user-password.pdf
```

which will output a hash like:
```text
$pdf$5*6*256*-4*1*16*fce2fe96b7
```

To pass the hash to john:
```bash
python3 pdf2john.py tests/pdf/pypdf/r6-empty-password.pdf >> .hash
Expand All @@ -47,7 +52,14 @@ john --show --format=PDF .hash
- Responsibility for PDF parsing and handling has been delegated to [pyHanko](https://github.com/MatthiasValvekens/pyHanko) (a crytography focused fork of PyPDF2)
- CICD workflow that tests pdf2john against PDFs ranging from Security Handler Revision 2 -> 6
- Removal of legacy Python 2.x support
- An optional debugging flag that shows the encryption dictionary of a PDF
- Readability improvements (a general attempt to be more Pythonic)

## Troubleshooting
To access the encryption dictionary:
```bash
python3 pdf2john.py tests/pdf/pypdf/r6-user-password.pdf --debug
```

## Acknowledgement
This repository was based on the original pdf2john.py code by [Shane Quigley](https://github.com/ShaneQful)
This repository was based on the original pdf2john.py by [Shane Quigley](https://github.com/ShaneQful)
47 changes: 30 additions & 17 deletions pdf2john.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python

import argparse
import logging
import os
import sys

from pyhanko.pdf_utils.misc import PdfReadError
from pyhanko.pdf_utils.reader import PdfFileReader

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -44,15 +44,15 @@ def __init__(self, file_name):

with open(file_name, "rb") as doc:
self.pdf = PdfFileReader(doc, strict=False)
encrypt_dict = self.pdf._get_encryption_params()
self.encrypt_dict = self.pdf._get_encryption_params()

if not encrypt_dict:
if not self.encrypt_dict:
raise RuntimeError("File not encrypted")

self.algorithm: int = encrypt_dict.get("/V")
self.length: int = encrypt_dict.get("/Length", 40)
self.permissions: int = encrypt_dict["/P"]
self.revision: int = encrypt_dict["/R"]
self.algorithm: int = self.encrypt_dict.get("/V")
self.length: int = self.encrypt_dict.get("/Length", 40)
self.permissions: int = self.encrypt_dict["/P"]
self.revision: int = self.encrypt_dict["/R"]

@property
def document_id(self) -> bytes:
Expand Down Expand Up @@ -102,15 +102,28 @@ def get_passwords(self) -> str:


if __name__ == "__main__":
if len(sys.argv) < 2:
logger.error("Usage: %s <PDF file(s)>", os.path.basename(__file__))
sys.exit(-1)

for filename in sys.argv[1:]:
extractor = PdfHashExtractor(filename)

parser = argparse.ArgumentParser(description="PDF Hash Extractor")
parser.add_argument(
"pdf_files", nargs="+", help="PDF file(s) to extract information from"
)
parser.add_argument(
"-d", "--debug", action="store_true", help="Print the encryption dictionary"
)
args = parser.parse_args()

for filename in args.pdf_files:
try:
extractor = PdfHashExtractor(filename)
pdf_hash = extractor.parse()
print(pdf_hash)
except RuntimeError as error:
logger.error("%s : %s", filename, error)

if args.debug:
if extractor.encrypt_dict:
print("Encryption Dictionary:")
for key, value in extractor.encrypt_dict.items():
print(f"{key}: {value}")
else:
print("No encryption dictionary found in the PDF.")

except PdfReadError as error:
logger.error("%s : %s", filename, error, exc_info=True)
15 changes: 15 additions & 0 deletions tests/debug_output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
$pdf$5*6*256*-4*1*16*fce2fe96b7e142b4a0576f61e2e9c441*48*aef6c4bf5e8a0f3bb1adef2b8ac2367d1ce95ecc1ddc3243ce49786086a023aa310aa9f7d1d103f837e4d4f738ac913d*48*eabf37f3f1f1b208f7c8ddfad3b817c689889ecbadd30f4581382cfbf79806304fb438e9ca227a023138a38eadcf82f3*32*37afcbfcbb32d4e1bca1eb10165693a1633ebb742c00045177a284ba22196937*32*3459a644d5f4c4f7cee562b754b30df48d598e1911ea513ef29bb3928593caf3
Encryption Dictionary:
/CF: {'/StdCF': {'/AuthEvent': '/DocOpen', '/CFM': '/AESV3', '/Length': 32}}
/Filter: /Standard
/Length: 256
/O: b'\xea\xbf7\xf3\xf1\xf1\xb2\x08\xf7\xc8\xdd\xfa\xd3\xb8\x17\xc6\x89\x88\x9e\xcb\xad\xd3\x0fE\x818,\xfb\xf7\x98\x060O\xb48\xe9\xca"z\x0218\xa3\x8e\xad\xcf\x82\xf3'
/OE: b'7\xaf\xcb\xfc\xbb2\xd4\xe1\xbc\xa1\xeb\x10\x16V\x93\xa1c>\xbbt,\x00\x04Qw\xa2\x84\xba"\x19i7'
/P: -4
/Perms: b'\x00\xe2p@\xe8I\xe5\xc8\xcd^\xb1\x8c\xba\x99\xceI'
/R: 6
/StmF: /StdCF
/StrF: /StdCF
/U: b'\xae\xf6\xc4\xbf^\x8a\x0f;\xb1\xad\xef+\x8a\xc26}\x1c\xe9^\xcc\x1d\xdc2C\xceIx`\x86\xa0#\xaa1\n\xa9\xf7\xd1\xd1\x03\xf87\xe4\xd4\xf78\xac\x91='
/UE: b'4Y\xa6D\xd5\xf4\xc4\xf7\xce\xe5b\xb7T\xb3\r\xf4\x8dY\x8e\x19\x11\xeaQ>\xf2\x9b\xb3\x92\x85\x93\xca\xf3'
/V: 5
49 changes: 49 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import subprocess


def run_cli(args, input_text=None):
process = subprocess.Popen(
["python3", "pdf2john.py"] + args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
text=True,
)
stdout, stderr = process.communicate(input_text)
return process, stdout, stderr


def test_cli_extraction():
pdf_file = "tests/pdf/r6-test-bad-password.pdf"
process, stdout, _ = run_cli([pdf_file])

expected = "$pdf$5*6*256*-4*1*16*fce2fe96b7e142b4a0576f61e2e9c441*48*aef6c4bf5e8a0f3bb1adef2b8ac2367d1ce95ecc1ddc3243ce49786086a023aa310aa9f7d1d103f837e4d4f738ac913d*48*eabf37f3f1f1b208f7c8ddfad3b817c689889ecbadd30f4581382cfbf79806304fb438e9ca227a023138a38eadcf82f3*32*37afcbfcbb32d4e1bca1eb10165693a1633ebb742c00045177a284ba22196937*32*3459a644d5f4c4f7cee562b754b30df48d598e1911ea513ef29bb3928593caf3"
# Check if the exit code indicates success (assuming no errors occurred)
assert process.returncode == 0
assert stdout == expected + "\n"


def test_handle_multiple_files():
pdf_files = [
"tests/pdf/r6-test-bad-password.pdf",
"tests/pdf/pypdf/r6-user-password.pdf",
]
process, stdout, _ = run_cli(pdf_files)

expected = [
"$pdf$5*6*256*-4*1*16*fce2fe96b7e142b4a0576f61e2e9c441*48*aef6c4bf5e8a0f3bb1adef2b8ac2367d1ce95ecc1ddc3243ce49786086a023aa310aa9f7d1d103f837e4d4f738ac913d*48*eabf37f3f1f1b208f7c8ddfad3b817c689889ecbadd30f4581382cfbf79806304fb438e9ca227a023138a38eadcf82f3*32*37afcbfcbb32d4e1bca1eb10165693a1633ebb742c00045177a284ba22196937*32*3459a644d5f4c4f7cee562b754b30df48d598e1911ea513ef29bb3928593caf3",
"$pdf$5*6*256*-4*1*16*fce2fe96b7e142b4a0576f61e2e9c441*48*ae700c793d687882958ce411fed797a6364e182dc4cb8c3819d347b9d577c3526d9fd5c2b9fe54cfed6539accc53ac28*48*57c08f4d2b7e02a2eb6deabf903267643bd971f5201ed5e1865311001c05d012e7a9dea18e3de1aa35675d0069944da1*32*ec81dd84bd5492acdcd5f82cd3093427d2db0eace3789f0f39c65ebea10253dd*32*557c4e6be3cc2bd0166a1b1ab3b0e09d6f0e3f5d17d305925c7055116ac14c6c",
]
# Check if the exit code indicates success (assuming no errors occurred)
assert process.returncode == 0
assert stdout.split("\n")[:2] == expected


def test_cli_debug():
pdf_file = "tests/pdf/r6-test-bad-password.pdf"
process, stdout, _ = run_cli([pdf_file, "--debug"])
assert process.returncode == 0

with open("tests/debug_output.txt", "r") as file:
lines = file.readlines()
assert stdout == "".join(lines)

0 comments on commit f1898ac

Please sign in to comment.