Skip to content

Commit

Permalink
Add ISO/TS 32003 support
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthiasValvekens committed Apr 27, 2024
1 parent a5dcf64 commit bf75c16
Show file tree
Hide file tree
Showing 5 changed files with 305 additions and 15 deletions.
23 changes: 18 additions & 5 deletions pyhanko/pdf_utils/crypt/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import enum
from dataclasses import dataclass
from typing import Callable, Dict, List, Optional, Set, Tuple, Type
from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Type

from pyhanko.pdf_utils import generic, misc
from pyhanko.pdf_utils.crypt.cred_ser import SerialisableCredential
Expand Down Expand Up @@ -74,6 +74,7 @@ class SecurityHandlerVersion(misc.VersionEnum):
RC4_LONGER_KEYS = 2
RC4_OR_AES128 = 4
AES256 = 5
AES_GCM = 6

OTHER = None
"""
Expand Down Expand Up @@ -471,11 +472,17 @@ def get_min_pdf_version(self) -> Optional[Tuple[int, int]]:
return None

def get_extensions(self) -> List[DeveloperExtension]:
exts = []
if self.pdf_mac_enabled:
from .pdfmac import ISO32004

return [ISO32004]
return []
exts.append(ISO32004)

for cf in self.crypt_filter_config.filters():
cf_exts = cf.get_extensions()
if cf_exts is not None:
exts.extend(cf_exts)
return exts


class CryptFilter:
Expand Down Expand Up @@ -525,6 +532,12 @@ def method(self) -> generic.NameObject:
"""
raise NotImplementedError

def get_extensions(self) -> Optional[List[DeveloperExtension]]:
"""
Get applicable developer extensions for this crypt filter.
"""
return None

@property
def keylen(self) -> int:
"""
Expand Down Expand Up @@ -624,7 +637,7 @@ def derive_object_key(self, idnum, generation) -> bytes:
:return:
The local key to use for this object.
"""
raise NotImplementedError
return self.shared_key

def set_embedded_only(self):
self._embedded_only = True
Expand Down Expand Up @@ -781,7 +794,7 @@ def __contains__(self, item):
or item in self._crypt_filters
)

def filters(self):
def filters(self) -> Iterable['CryptFilter']:
"""Enumerate all crypt filters in this configuration."""
return self._crypt_filters.values()

Expand Down
90 changes: 89 additions & 1 deletion pyhanko/pdf_utils/crypt/filter_mixins.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
import abc
import secrets
import struct
from typing import List, Optional

from pyhanko.pdf_utils import generic
import cryptography.exceptions
from cryptography.hazmat.primitives.ciphers.aead import AESGCM

from pyhanko.pdf_utils import generic, misc
from pyhanko.pdf_utils.crypt._util import (
aes_cbc_decrypt,
aes_cbc_encrypt,
rc4_encrypt,
)
from pyhanko.pdf_utils.crypt.api import CryptFilter, SecurityHandlerVersion

from ..extensions import DeveloperExtension, DevExtensionMultivalued
from ._legacy import legacy_derive_object_key

ISO32003 = DeveloperExtension(
prefix_name=generic.pdf_name('/ISO_'),
base_version=generic.pdf_name('/2.0'),
extension_level=32003,
extension_revision=':2023',
url='https://www.iso.org/standard/45876.html',
compare_by_level=False,
multivalued=DevExtensionMultivalued.ALWAYS,
)


class RC4CryptFilterMixin(CryptFilter, abc.ABC):
"""
Expand Down Expand Up @@ -157,3 +173,75 @@ def derive_object_key(self, idnum, generation) -> bytes:
return legacy_derive_object_key(
self.shared_key, idnum, generation, use_aes=True
)


class AESGCMCryptFilterMixin(CryptFilter, abc.ABC):
"""Mixin for AES GCM-based crypt filters (ISO 32003)"""

method = generic.NameObject('/AESV4')

def __init__(self: 'AESGCMCryptFilterMixin', **kwargs):
super().__init__(**kwargs)
self.__counter: int = 0

@property
def keylen(self) -> int:
return 32

def _get_nonce(self) -> bytes:
# nonce is 12 bytes, we use 8 for the counter and 4 random ones
# (mostly because there's no convenient way to do a 12-byte counter with
# struct.pack)
# Crypt filter instances are not shared between documents, so this
# should be plenty unique enough.
random_part = secrets.token_bytes(4)
self.__counter += 1
counter_part = struct.pack('>Q', self.__counter)
return random_part + counter_part

def encrypt(self, key, plaintext: bytes, params=None):
"""
Encrypt data using AES-GCM.
:param key:
The key to use.
:param plaintext:
The plaintext to be encrypted.
:param params:
Ignored.
:return:
The resulting ciphertext and tag, prepended with a 12-byte nonce
"""

nonce = self._get_nonce()
ciphertext = AESGCM(key).encrypt(
nonce=nonce, data=plaintext, associated_data=None
)
return nonce + ciphertext

def decrypt(self, key, ciphertext: bytes, params=None) -> bytes:
"""
Decrypt data using AES-GCM.
:param key:
The key to use.
:param ciphertext:
The ciphertext to be decrypted, prepended with a 12-byte
initialisation vector, and suffixed with the 16-byte authentication
tag.
:param params:
Ignored.
:return:
The resulting plaintext.
"""
nonce, data = ciphertext[:12], ciphertext[12:]
try:
plaintext = AESGCM(key).decrypt(
nonce=nonce, data=data, associated_data=None
)
except cryptography.exceptions.InvalidTag:
raise misc.PdfReadError("Invalid GCM tag")
return plaintext

def get_extensions(self) -> Optional[List[DeveloperExtension]]:
return [ISO32003]
43 changes: 39 additions & 4 deletions pyhanko/pdf_utils/crypt/pubkey.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@
build_crypt_filter,
)
from .cred_ser import SerialisableCredential, SerialisedCredential
from .filter_mixins import AESCryptFilterMixin, RC4CryptFilterMixin
from .filter_mixins import (
AESCryptFilterMixin,
AESGCMCryptFilterMixin,
RC4CryptFilterMixin,
)
from .permissions import PubKeyPermissions

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -229,6 +233,14 @@ class PubKeyAESCryptFilter(PubKeyCryptFilter, AESCryptFilterMixin):
pass


class PubKeyAESGCMCryptFilter(PubKeyCryptFilter, AESGCMCryptFilterMixin):
"""
AES-GCM crypt filter for public key security handlers.
"""

pass


class PubKeyRC4CryptFilter(PubKeyCryptFilter, RC4CryptFilterMixin):
"""
RC4 crypt filter for public key security handlers.
Expand Down Expand Up @@ -289,9 +301,18 @@ def _pubkey_aes_config(keylen, recipients=None, encrypt_metadata=True):
)


"""
Type alias for a callable that produces a crypt filter from a dictionary.
"""
def _pubkey_gcm_config(recipients=None, encrypt_metadata=True):
return CryptFilterConfiguration(
{
DEFAULT_CRYPT_FILTER: PubKeyAESGCMCryptFilter(
acts_as_default=True,
recipients=recipients,
encrypt_metadata=encrypt_metadata,
)
},
default_stream_filter=DEFAULT_CRYPT_FILTER,
default_string_filter=DEFAULT_CRYPT_FILTER,
)


@enum.unique
Expand Down Expand Up @@ -1168,6 +1189,12 @@ def _build_aes256_pubkey_cf(cfdict, acts_as_default):
)


def _build_aesgcm_pubkey_cf(cfdict, acts_as_default):
return PubKeyAESGCMCryptFilter(
acts_as_default=acts_as_default, **_read_generic_pubkey_cf_info(cfdict)
)


@SecurityHandler.register
class PubKeySecurityHandler(SecurityHandler):
"""
Expand All @@ -1181,6 +1208,7 @@ class PubKeySecurityHandler(SecurityHandler):
generic.NameObject('/V2'): _build_legacy_pubkey_cf,
generic.NameObject('/AESV2'): _build_aes128_pubkey_cf,
generic.NameObject('/AESV3'): _build_aes256_pubkey_cf,
generic.NameObject('/AESV4'): _build_aesgcm_pubkey_cf,
generic.NameObject('/Identity'): lambda _, __: IdentityCryptFilter(),
}

Expand Down Expand Up @@ -1312,9 +1340,16 @@ def __init__(
encrypt_metadata=encrypt_metadata,
recipients=recipient_objs,
)
elif version == SecurityHandlerVersion.AES_GCM:
crypt_filter_config = _pubkey_gcm_config(
recipients=recipient_objs, encrypt_metadata=encrypt_metadata
)
elif version >= SecurityHandlerVersion.AES256:
# there's a reasonable default config that we can fall back to
# here
# NOTE: we _don't_ use GCM by default. With the way PDF
# encryption works, the authentication guarantees are not
# worth much anyhow (need ISO 32004-style solution).
crypt_filter_config = _pubkey_aes_config(
keylen=32,
encrypt_metadata=encrypt_metadata,
Expand Down
47 changes: 44 additions & 3 deletions pyhanko/pdf_utils/crypt/standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@
SecurityHandlerVersion,
)
from .cred_ser import SerialisableCredential, SerialisedCredential
from .filter_mixins import AESCryptFilterMixin, RC4CryptFilterMixin
from .filter_mixins import (
AESCryptFilterMixin,
AESGCMCryptFilterMixin,
RC4CryptFilterMixin,
)
from .permissions import StandardPermissions


Expand Down Expand Up @@ -124,6 +128,7 @@ class StandardSecuritySettingsRevision(misc.VersionEnum):
RC4_EXTENDED = 3
RC4_OR_AES128 = 4
AES256 = 6
AES_GCM = 7
OTHER = None
"""
Placeholder value for custom security handlers.
Expand Down Expand Up @@ -203,6 +208,14 @@ class StandardAESCryptFilter(StandardCryptFilter, AESCryptFilterMixin):
pass


class StandardAESGCMCryptFilter(StandardCryptFilter, AESGCMCryptFilterMixin):
"""
AES-GCM crypt filter for the standard security handler.
"""

pass


class StandardRC4CryptFilter(StandardCryptFilter, RC4CryptFilterMixin):
"""
RC4 crypt filter for the standard security handler.
Expand Down Expand Up @@ -230,6 +243,14 @@ def _std_aes_config(keylen):
)


def _std_gcm_config():
return CryptFilterConfiguration(
{STD_CF: StandardAESGCMCryptFilter()},
default_stream_filter=STD_CF,
default_string_filter=STD_CF,
)


def _build_legacy_standard_crypt_filter(
cfdict: generic.DictionaryObject, _acts_as_default
):
Expand Down Expand Up @@ -258,6 +279,7 @@ class StandardSecurityHandler(SecurityHandler):
generic.NameObject('/AESV3'): lambda _, __: StandardAESCryptFilter(
keylen=32
),
generic.NameObject('/AESV4'): lambda _, __: StandardAESGCMCryptFilter(),
generic.NameObject('/Identity'): lambda _, __: IdentityCryptFilter(),
}

Expand Down Expand Up @@ -396,6 +418,7 @@ def build_from_pw(
perms: StandardPermissions = StandardPermissions.allow_everything(),
encrypt_metadata=True,
pdf_mac: bool = True,
use_gcm: bool = True,
**kwargs,
):
"""
Expand All @@ -416,6 +439,15 @@ def build_from_pw(
as well.
:param pdf_mac:
Include an ISO 32004 MAC.
:param use_gcm:
Use AES-GCM (ISO 32003) to encrypt strings and streams.
.. danger::
Due to the way PDF encryption works, the authentication
guarantees of AES-GCM only apply to the content of individual
strings and streams. The PDF file structure itself is not
authenticated. Document-level integrity protection is provided
by the ``pdf_mac=True`` option.
:return:
A :class:`StandardSecurityHandler` instance.
"""
Expand Down Expand Up @@ -473,9 +505,16 @@ def build_from_pw(
else:
kdf_salt = None

if use_gcm:
version = SecurityHandlerVersion.AES_GCM
revision = StandardSecuritySettingsRevision.AES_GCM
else:
version = SecurityHandlerVersion.AES256
revision = StandardSecuritySettingsRevision.AES256

sh = cls(
version=SecurityHandlerVersion.AES256,
revision=StandardSecuritySettingsRevision.AES256,
version=version,
revision=revision,
legacy_keylen=32,
perm_flags=perms,
odata=o_entry,
Expand Down Expand Up @@ -530,6 +569,8 @@ def __init__(
crypt_filter_config = _std_rc4_config(5)
elif version == SecurityHandlerVersion.RC4_LONGER_KEYS:
crypt_filter_config = _std_rc4_config(legacy_keylen)
elif version == SecurityHandlerVersion.AES_GCM:
crypt_filter_config = _std_gcm_config()
elif (
version >= SecurityHandlerVersion.AES256
and crypt_filter_config is None
Expand Down
Loading

0 comments on commit bf75c16

Please sign in to comment.