diff --git a/README.md b/README.md index 4a6cf5a..69a1604 100644 --- a/README.md +++ b/README.md @@ -22,27 +22,22 @@ Create a virtual environment and install Python dependencies ```bash pyenv virtualenv 3.11.4 pdf2john pyenv shell pdf2john -poetry install -``` - -or if you don't want to use a virtual environment, simply -```bash -pip install -r requirements.txt +poetry install --only main ``` ## Usage -Run pdf2john.py with a PDF of choice -```bash -./pdf2john.py example.pdf -``` - To pass the hash to john: ```bash -./pdf2john.py example.pdf >> .hash +pdf2john example.pdf >> .hash john .hash john --show --format=PDF .hash ``` +If you don't want to install pdf2john, you can run it as a standalone script: +```bash +./src/pdf2john/pdf2john.py example.pdf >> .hash +``` + ## Features - Responsibility for PDF parsing and handling has been delegated to [pyHanko](https://github.com/MatthiasValvekens/pyHanko) (a crytography focused fork of PyPDF2) - CICD workflow that tests pdf2john against PDFs ranging from Security Handler Revision 2 -> 6 diff --git a/pyproject.toml b/pyproject.toml index dd4f924..e2c5aba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,16 @@ [tool.poetry] name = "pdf2john" -version = "0.1.5" -description = "" +version = "0.1.6" +description = "A modern refactoring of the legacy pdf2john library" authors = ["Benjamin Dornel "] license = "MIT" +repository = "https://github.com/benjamin-awd/pdf2john" readme = "README.md" +classifiers = [ + "Development Status :: 3 - Alpha", + "Programming Language :: Python :: 3.11", + "Topic :: Security" +] [tool.poetry.dependencies] python = "^3.11" @@ -31,3 +37,6 @@ profile = "black" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.poetry.scripts] +pdf2john = "pdf2john.pdf2john:main" diff --git a/src/pdf2john/__init__.py b/src/pdf2john/__init__.py new file mode 100644 index 0000000..b887dda --- /dev/null +++ b/src/pdf2john/__init__.py @@ -0,0 +1,3 @@ +from .pdf2john import PdfHashExtractor + +__all__ = ["PdfHashExtractor"] diff --git a/pdf2john.py b/src/pdf2john/pdf2john.py similarity index 99% rename from pdf2john.py rename to src/pdf2john/pdf2john.py index 9f26546..2c2681f 100755 --- a/pdf2john.py +++ b/src/pdf2john/pdf2john.py @@ -111,7 +111,7 @@ def get_passwords(self) -> str: return "*".join(passwords) -if __name__ == "__main__": +def main() -> None: parser = argparse.ArgumentParser(description="PDF Hash Extractor") parser.add_argument( "pdf_files", nargs="+", help="PDF file(s) to extract information from" @@ -137,3 +137,7 @@ def get_passwords(self) -> str: except PdfReadError as error: logger.error("%s : %s", filename, error, exc_info=True) + + +if __name__ == "__main__": + main() diff --git a/tests/test_cli.py b/tests/test_cli.py index da42867..e3c33cd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,9 +1,11 @@ import subprocess +from pytest import mark -def run_cli(args, input_text=None): + +def run_cli(args: list, cmd: str = "pdf2john", input_text=None): process = subprocess.Popen( - ["python3", "pdf2john.py"] + args, + [cmd] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, @@ -13,9 +15,10 @@ def run_cli(args, input_text=None): return process, stdout, stderr -def test_cli_extraction(): +@mark.parametrize("cmd", ["pdf2john", "./src/pdf2john/pdf2john.py"]) +def test_cli_extraction(cmd): pdf_file = "tests/pdf/r6-test-bad-password.pdf" - process, stdout, _ = run_cli([pdf_file]) + process, stdout, _ = run_cli(cmd=cmd, args=[pdf_file]) expected = "$pdf$5*6*256*-4*1*16*fce2fe96b7e142b4a0576f61e2e9c441*48*aef6c4bf5e8a0f3bb1adef2b8ac2367d1ce95ecc1ddc3243ce49786086a023aa310aa9f7d1d103f837e4d4f738ac913d*48*eabf37f3f1f1b208f7c8ddfad3b817c689889ecbadd30f4581382cfbf79806304fb438e9ca227a023138a38eadcf82f3*32*37afcbfcbb32d4e1bca1eb10165693a1633ebb742c00045177a284ba22196937*32*3459a644d5f4c4f7cee562b754b30df48d598e1911ea513ef29bb3928593caf3" assert process.returncode == 0