Skip to content

Commit

Permalink
feat: make pdf2john cli executable
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Nov 3, 2023
1 parent 39d32dd commit 047b2cc
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 17 deletions.
19 changes: 7 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,22 @@ Create a virtual environment and install Python dependencies
```bash
pyenv virtualenv 3.11.4 pdf2john
pyenv shell pdf2john
poetry install
```

or if you don't want to use a virtual environment, simply
```bash
pip install -r requirements.txt
poetry install --only main
```

## Usage
Run pdf2john.py with a PDF of choice
```bash
./pdf2john.py example.pdf
```

To pass the hash to john:
```bash
./pdf2john.py example.pdf >> .hash
pdf2john example.pdf >> .hash
john .hash
john --show --format=PDF .hash
```

If you don't want to install pdf2john, you can run it as a standalone script:
```bash
./src/pdf2john/pdf2john.py example.pdf >> .hash
```

## Features
- Responsibility for PDF parsing and handling has been delegated to [pyHanko](https://github.com/MatthiasValvekens/pyHanko) (a crytography focused fork of PyPDF2)
- CICD workflow that tests pdf2john against PDFs ranging from Security Handler Revision 2 -> 6
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ profile = "black"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
pdf2john = "pdf2john.pdf2john:main"
3 changes: 3 additions & 0 deletions src/pdf2john/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .pdf2john import PdfHashExtractor

__all__ = ["PdfHashExtractor"]
6 changes: 5 additions & 1 deletion pdf2john.py → src/pdf2john/pdf2john.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def get_passwords(self) -> str:
return "*".join(passwords)


if __name__ == "__main__":
def main() -> None:
parser = argparse.ArgumentParser(description="PDF Hash Extractor")
parser.add_argument(
"pdf_files", nargs="+", help="PDF file(s) to extract information from"
Expand All @@ -137,3 +137,7 @@ def get_passwords(self) -> str:

except PdfReadError as error:
logger.error("%s : %s", filename, error, exc_info=True)


if __name__ == "__main__":
main()
11 changes: 7 additions & 4 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import subprocess

from pytest import mark

def run_cli(args, input_text=None):

def run_cli(args: list, cmd: str = "pdf2john", input_text=None):
process = subprocess.Popen(
["python3", "pdf2john.py"] + args,
[cmd] + args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
Expand All @@ -13,9 +15,10 @@ def run_cli(args, input_text=None):
return process, stdout, stderr


def test_cli_extraction():
@mark.parametrize("cmd", ["pdf2john", "./src/pdf2john/pdf2john.py"])
def test_cli_extraction(cmd):
pdf_file = "tests/pdf/r6-test-bad-password.pdf"
process, stdout, _ = run_cli([pdf_file])
process, stdout, _ = run_cli(cmd=cmd, args=[pdf_file])

expected = "$pdf$5*6*256*-4*1*16*fce2fe96b7e142b4a0576f61e2e9c441*48*aef6c4bf5e8a0f3bb1adef2b8ac2367d1ce95ecc1ddc3243ce49786086a023aa310aa9f7d1d103f837e4d4f738ac913d*48*eabf37f3f1f1b208f7c8ddfad3b817c689889ecbadd30f4581382cfbf79806304fb438e9ca227a023138a38eadcf82f3*32*37afcbfcbb32d4e1bca1eb10165693a1633ebb742c00045177a284ba22196937*32*3459a644d5f4c4f7cee562b754b30df48d598e1911ea513ef29bb3928593caf3"
assert process.returncode == 0
Expand Down

0 comments on commit 047b2cc

Please sign in to comment.