Skip to content

Commit

Permalink
cli: Remove hand-rolled WHEEL parsing/mutation code
Browse files Browse the repository at this point in the history
The PyPA Binary Distribution Format spec says the WHEEL metadata file is
"in the same basic key: value format" as the METADATA file.  METADATA in
turn is governed by the Core Metadata Specifications, which say: "In the
absence of a precise definition, the practical standard is set by what the
standard library email.parser module can parse using the compat32 policy."

wheel.bdist_wheel accordingly uses email.generator.BytesGenerator to
generate WHEEL, but the CLI `pack` and `tags` commands opt to
hand-implement WHEEL parsing and mutation.  Their mutation functions,
set_tags() and set_build_number(), append any new headers to the existing
WHEEL file.  Since WHEEL tends to have a trailing blank line (separating
the headers from the nonexistent body), the new headers end up in the
"body" and are ignored by any tool that parses WHEEL with email.parser.

In addition, both functions assume that WHEEL uses CRLF line endings,
while bdist_wheel (and email.generator with the compat32 policy)
specifically always writes LF line endings.  This turns out okay for
set_tags(), which rewrites the whole file, but set_build_number() ends up
appending a CRLF-terminated line to a file with LF-terminated lines.

Fix all this by removing the hand-written parsing/mutation functions in
favor of email.parser and email.generator.
  • Loading branch information
bgilbert committed Nov 25, 2023
1 parent e2a8623 commit 2c0a583
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 81 deletions.
4 changes: 4 additions & 0 deletions docs/news.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ Release Notes

**UNRELEASED**

- Fixed ``wheel pack`` and ``wheel tags`` writing updated ``WHEEL`` fields after a
blank line, causing other tools to ignore them
- Fixed ``wheel pack`` and ``wheel tags`` writing ``WHEEL`` with CRLF line endings or
a mix of CRLF and LF
- Allow removing build tag with ``wheel tags --build ""``
- Fixed ``wheel pack --build-number ""`` not removing build tag from ``WHEEL``
(above changes by Benjamin Gilbert)
Expand Down
59 changes: 10 additions & 49 deletions src/wheel/cli/pack.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import annotations

import email.policy
import os.path
import re
from email.generator import BytesGenerator
from email.parser import BytesParser

from wheel.cli import WheelError
from wheel.wheelfile import WheelFile

DIST_INFO_RE = re.compile(r"^(?P<namever>(?P<name>.+?)-(?P<ver>\d.*?))\.dist-info$")
BUILD_NUM_RE = re.compile(rb"Build: (\d\w*)\r?\n")


def pack(directory: str, dest_dir: str, build_number: str | None) -> None:
Expand Down Expand Up @@ -35,10 +37,11 @@ def pack(directory: str, dest_dir: str, build_number: str | None) -> None:
name_version = DIST_INFO_RE.match(dist_info_dir).group("namever")

# Read the tags and the existing build number from .dist-info/WHEEL
existing_build_number = None
wheel_file_path = os.path.join(directory, dist_info_dir, "WHEEL")
with open(wheel_file_path, "rb") as f:
tags, existing_build_number = read_tags(f.read())
info = BytesParser(policy=email.policy.compat32).parse(f)
tags: list[str] = info.get_all("Tag", [])
existing_build_number = info.get("Build")

if not tags:
raise WheelError(
Expand All @@ -49,17 +52,14 @@ def pack(directory: str, dest_dir: str, build_number: str | None) -> None:
# Set the wheel file name and add/replace/remove the Build tag in .dist-info/WHEEL
build_number = build_number if build_number is not None else existing_build_number
if build_number is not None:
del info["Build"]
if build_number:
info["Build"] = build_number
name_version += "-" + build_number

if build_number != existing_build_number:
with open(wheel_file_path, "rb+") as f:
wheel_file_content = f.read()
wheel_file_content = set_build_number(wheel_file_content, build_number)

f.seek(0)
f.truncate()
f.write(wheel_file_content)
with open(wheel_file_path, "wb") as f:
BytesGenerator(f, maxheaderlen=0).flatten(info)

# Reassemble the tags for the wheel file
tagline = compute_tagline(tags)
Expand All @@ -73,45 +73,6 @@ def pack(directory: str, dest_dir: str, build_number: str | None) -> None:
print("OK")


def read_tags(input_str: bytes) -> tuple[list[str], str | None]:
"""Read tags from a string.
:param input_str: A string containing one or more tags, separated by spaces
:return: A list of tags and a list of build tags
"""

tags = []
existing_build_number = None
for line in input_str.splitlines():
if line.startswith(b"Tag: "):
tags.append(line.split(b" ")[1].rstrip().decode("ascii"))
elif line.startswith(b"Build: "):
existing_build_number = line.split(b" ")[1].rstrip().decode("ascii")

return tags, existing_build_number


def set_build_number(wheel_file_content: bytes, build_number: str | None) -> bytes:
"""Compute a build tag and add/replace/remove as necessary.
:param wheel_file_content: The contents of .dist-info/WHEEL
:param build_number: The build tags present in .dist-info/WHEEL
:return: The (modified) contents of .dist-info/WHEEL
"""
replacement = (
("Build: %s\r\n" % build_number).encode("ascii") if build_number else b""
)

wheel_file_content, num_replaced = BUILD_NUM_RE.subn(
replacement, wheel_file_content
)

if not num_replaced:
wheel_file_content += replacement

return wheel_file_content


def compute_tagline(tags: list[str]) -> str:
"""Compute a tagline from a list of tags.
Expand Down
40 changes: 13 additions & 27 deletions src/wheel/cli/tags.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations

import email.policy
import itertools
import os
from collections.abc import Iterable
from email.parser import BytesParser

from ..wheelfile import WheelFile
from .pack import read_tags, set_build_number


def _compute_tags(original_tags: Iterable[str], new_tags: str | None) -> set[str]:
Expand Down Expand Up @@ -48,6 +49,7 @@ def tags(
assert f.filename, f"{f.filename} must be available"

wheel_info = f.read(f.dist_info_path + "/WHEEL")
info = BytesParser(policy=email.policy.compat32).parsebytes(wheel_info)

original_wheel_name = os.path.basename(f.filename)
namever = f.parsed_filename.group("namever")
Expand All @@ -56,7 +58,8 @@ def tags(
original_abi_tags = f.parsed_filename.group("abi").split(".")
original_plat_tags = f.parsed_filename.group("plat").split(".")

tags, existing_build_tag = read_tags(wheel_info)
tags: list[str] = info.get_all("Tag", [])
existing_build_tag = info.get("Build")

impls = {tag.split("-")[0] for tag in tags}
abivers = {tag.split("-")[1] for tag in tags}
Expand Down Expand Up @@ -103,12 +106,13 @@ def tags(
final_wheel_name = "-".join(final_tags) + ".whl"

if original_wheel_name != final_wheel_name:
tags = [
f"{a}-{b}-{c}"
for a, b, c in itertools.product(
final_python_tags, final_abi_tags, final_plat_tags
)
]
del info["Tag"], info["Build"]
for a, b, c in itertools.product(
final_python_tags, final_abi_tags, final_plat_tags
):
info["Tag"] = f"{a}-{b}-{c}"
if build:
info["Build"] = build

original_wheel_path = os.path.join(
os.path.dirname(f.filename), original_wheel_name
Expand All @@ -125,29 +129,11 @@ def tags(
if item.filename == f.dist_info_path + "/RECORD":
continue
if item.filename == f.dist_info_path + "/WHEEL":
content = fin.read(item)
content = set_tags(content, tags)
content = set_build_number(content, build)
fout.writestr(item, content)
fout.writestr(item, info.as_bytes())
else:
fout.writestr(item, fin.read(item))

if remove:
os.remove(original_wheel_path)

return final_wheel_name


def set_tags(in_string: bytes, tags: Iterable[str]) -> bytes:
"""Set the tags in the .dist-info/WHEEL file contents.
:param in_string: The string to modify.
:param tags: The tags to set.
"""

lines = [line for line in in_string.splitlines() if not line.startswith(b"Tag:")]
for tag in tags:
lines.append(b"Tag: " + tag.encode("ascii"))
in_string = b"\r\n".join(lines) + b"\r\n"

return in_string
10 changes: 5 additions & 5 deletions tests/cli/test_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def test_python_tags(wheelpath):
with WheelFile(str(output_file)) as f:
output = f.read(f.dist_info_path + "/WHEEL")
assert (
output == b"Wheel-Version: 1.0\r\nGenerator: bdist_wheel (0.30.0)"
b"\r\nRoot-Is-Purelib: false\r\nTag: py3-none-any\r\n"
output == b"Wheel-Version: 1.0\nGenerator: bdist_wheel (0.30.0)"
b"\nRoot-Is-Purelib: false\nTag: py3-none-any\n\n"
)
output_file.unlink()

Expand Down Expand Up @@ -153,9 +153,9 @@ def test_multi_tags(wheelpath):
output = f.read(f.dist_info_path + "/WHEEL")
assert (
output
== b"Wheel-Version: 1.0\r\nGenerator: bdist_wheel (0.30.0)\r\nRoot-Is-Purelib:"
b" false\r\nTag: py2-none-linux_x86_64\r\nTag: py3-none-linux_x86_64\r\nTag:"
b" py4-none-linux_x86_64\r\nBuild: 1\r\n"
== b"Wheel-Version: 1.0\nGenerator: bdist_wheel (0.30.0)\nRoot-Is-Purelib:"
b" false\nTag: py2-none-linux_x86_64\nTag: py3-none-linux_x86_64\nTag:"
b" py4-none-linux_x86_64\nBuild: 1\n\n"
)
output_file.unlink()

Expand Down

0 comments on commit 2c0a583

Please sign in to comment.