Skip to content

Commit

Permalink
Introduce the --include-symbols option
Browse files Browse the repository at this point in the history
This option allows .pdb debug symbol files to be included in the wheel
to aid in debugging of the vendored DLLs.
  • Loading branch information
adang1345 committed Aug 8, 2023
1 parent 354137a commit 6ce9d8d
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 44 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ The path separator to use in the following options is `';'` on Windows and `':'`
- `--namespace-pkg`: namespace packages, specified in case-sensitive dot notation and delimited by the path separator. Normally, we patch or create `__init__.py` in each top-level package to add the vendored DLL location to the DLL search path at runtime. If you have a top-level namespace package that requires `__init__.py` to be absent or unmodified, then this technique can cause problems. This option tells `delvewheel` to use an alternate strategy that does not create or modify `__init__.py` at the root of the given namespace package(s). For example,
- `--namespace-pkg package1` declares `package1` as a namespace package.
- On Windows, `--namespace-pkg package1.package2;package3` declares `package1`, `package1\package2`, and `package3` as namespace packages.
- `--include-symbols`: include `.pdb` symbol files with the vendored DLLs. To be included, a symbol file must be in the same directory as the DLL and have the same filename before the extension, e.g. `example.dll` and `example.pdb`.

## Limitations

Expand Down
3 changes: 2 additions & 1 deletion delvewheel/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def main():
parser_repair.add_argument('-L', '--lib-sdir', default='.libs', type=_subdir_suffix, help='directory suffix in package to store vendored DLLs (default .libs)')
parser_repair.add_argument('--namespace-pkg', default='', metavar='PKGS', type=_namespace_pkgs, help=f'namespace package(s), {os.pathsep!r}-delimited')
parser_repair.add_argument('--no-diagnostic', action='store_true', help=argparse.SUPPRESS) # don't write diagnostic information to DELVEWHEEL metadata file
parser_repair.add_argument('--include-symbols', action='store_true', help='include .pdb symbol files with vendored DLLs')
parser_needed.add_argument('file', help='path to a DLL or PYD file')
parser_needed.add_argument('-v', action='count', default=0, help='verbosity')
args = parser.parse_args()
Expand All @@ -80,7 +81,7 @@ def main():
else: # args.command == 'repair'
no_mangles = set(dll_name.lower() for dll_name in args.no_mangle.split(os.pathsep) if dll_name)
namespace_pkgs = set(tuple(namespace_pkg.split('.')) for namespace_pkg in args.namespace_pkg.split(os.pathsep) if namespace_pkg)
wr.repair(args.target, no_mangles, args.no_mangle_all, args.strip, args.lib_sdir, args.no_diagnostic, namespace_pkgs)
wr.repair(args.target, no_mangles, args.no_mangle_all, args.strip, args.lib_sdir, args.no_diagnostic, namespace_pkgs, args.include_symbols)
else: # args.command == 'needed'
for dll_name in sorted(_dll_utils.get_direct_needed(args.file, args.v), key=str.lower):
print(dll_name)
Expand Down
86 changes: 57 additions & 29 deletions delvewheel/_dll_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,17 +209,25 @@ def translate_system32_to_sysnative(directory: str) -> str:
_translate_directory = _translate_directory()


def find_library(name: str, wheel_dirs: typing.Optional[typing.Iterable], arch: MachineType) -> typing.Optional[str]:
"""Given the name of a DLL, return the path to the DLL, or None if the DLL
cannot be found. DLL names are searched in a case-insensitive fashion. The
search goes in the following order and considers only the DLLs with the
given architecture.
1. If not None, the directories in wheel_dirs.
def find_library(name: str, wheel_dirs: typing.Optional[typing.Iterable], arch: MachineType, include_symbols: bool) -> typing.Optional[typing.Tuple[str, typing.Optional[str]]]:
"""Given the name of a DLL, return a tuple where
- the 1st element is the path to the DLL
- the 2nd element is
- if include_symbols is False, then None
- if include_symbols is True, then the path to the .pdb symbol file next
to the DLL if it exists, None otherwise. Excluding the file extension,
the name of the symbol file is assumed to be the same as the name of
the DLL.
If the DLL cannot be found, then return None. DLL and symbol file names are
searched in a case-insensitive fashion. The search goes in the following
order and considers only the DLLs with the architecture arch.
1. If not None, the directories in wheel_dirs. We never search for symbol
files in wheel_dirs.
2. The PATH environment variable, with any applicable adjustments due to
the Windows file system redirector. (If we are on a case-sensitive file
the Windows file system redirector. If we are on a case-sensitive file
system and a directory contains more than one DLL with the correct
architecture that differs by case only, then choose one arbitrarily.)"""
architecture that differs by case only, then choose one arbitrarily."""
name = name.lower()
if wheel_dirs is not None:
for wheel_dir in wheel_dirs:
Expand All @@ -231,18 +239,31 @@ def find_library(name: str, wheel_dirs: typing.Optional[typing.Iterable], arch:
if name == item.lower():
path = os.path.join(wheel_dir, item)
if os.path.isfile(path) and get_arch(path) == arch:
return path
return path, None
for directory in os.environ['PATH'].split(os.pathsep):
directory = _translate_directory(directory, arch)
try:
contents = os.listdir(directory)
except FileNotFoundError:
continue
dll_path = None
for item in contents:
if name == item.lower():
path = os.path.join(directory, item)
if os.path.isfile(path) and get_arch(path) == arch:
return path
dll_path = path
break
symbol_path = None
if dll_path and include_symbols:
symbol_name = os.path.splitext(name)[0] + '.pdb'
for item in contents:
if symbol_name == item.lower():
path = os.path.join(directory, item)
if os.path.isfile(path):
symbol_path = path
break
if dll_path:
return dll_path, symbol_path
return None


Expand Down Expand Up @@ -300,28 +321,33 @@ def get_all_needed(lib_path: str,
no_dlls: set,
wheel_dirs: typing.Optional[typing.Iterable],
on_error: str,
verbose: int) -> typing.Tuple[typing.Set[str], typing.Set[str], typing.Set[str]]:
"""Given the path to a shared library, return a 3-tuple of sets
(discovered, ignored, not_found).
discovered contains the original-case DLL paths of all direct and indirect
dependencies of that shared library that should be bundled into the wheel.
ignored contains the lowercased DLL names of all direct and indirect
dependencies of that shared library that will not be bundled into the wheel
because they are assumed to be on the target system.
If on_error is 'raise', FileNotFoundError is raised if a dependent library
cannot be found. If on_error is 'ignore', not_found contains the lowercased
DLL names of all dependent DLLs that cannot be found.
include_symbols: bool,
verbose: int) -> typing.Tuple[typing.Set[str], typing.Set[str], typing.Set[str], typing.Set[str]]:
"""Given the path to a shared library, return a 4-tuple of sets
(discovered, symbols, ignored, not_found).
- discovered contains the original-case DLL paths of all direct and
indirect dependencies of that shared library that should be bundled into
the wheel.
- symbols contains the original-case paths of any .pdb symbol files
corresponding to the DLLs in discovered.
- ignored contains the lowercased DLL names of all direct and indirect
dependencies of that shared library that will not be bundled into the
wheel because they are assumed to be on the target system.
- If on_error is 'raise', FileNotFoundError is raised if a dependent
library cannot be found. If on_error is 'ignore', not_found contains the
lowercased DLL names of all dependent DLLs that cannot be found.
no_dlls is a set of DLL names to force exclusion from the wheel. We do not
search for dependencies of these DLLs.
If wheel_dirs is not None, it is an iterable of directories in the wheel
where dependencies are searched first."""
where dependencies are searched first.
include_symbols specifies whether to search for .pdb symbol files"""
first_lib_path = lib_path.lower()
stack = [first_lib_path]
discovered = set()
symbols = set()
ignored = set()
not_found = set()
while stack:
Expand All @@ -344,17 +370,19 @@ def get_all_needed(lib_path: str,
not any(r.fullmatch(dll_name) for r in _dll_list.ignore_regexes) and \
dll_name not in no_dlls and \
(lib_name_lower not in _dll_list.ignore_dependency or dll_name not in _dll_list.ignore_dependency[lib_name_lower]):
dll_path = find_library(dll_name, wheel_dirs, lib_arch)
if dll_path:
stack.append(dll_path)
dll_info = find_library(dll_name, wheel_dirs, lib_arch, include_symbols)
if dll_info:
stack.append(dll_info[0])
if dll_info[1]:
symbols.add(dll_info[1])
elif on_error == 'raise':
raise FileNotFoundError(f'Unable to find library: {dll_name}')
else:
not_found.add(dll_name)
else:
ignored.add(dll_name)
discovered.remove(first_lib_path)
return discovered, ignored, not_found
return discovered, symbols, ignored, not_found


def _round_to_next(size: int, alignment: int) -> int:
Expand Down
38 changes: 24 additions & 14 deletions delvewheel/_wheel_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,17 +562,17 @@ def show(self) -> None:
if filename.lower().endswith('.pyd'):
extension_module_path = os.path.join(root, filename)
extension_module_paths.append(extension_module_path)
discovered, ignored, not_found = _dll_utils.get_all_needed(extension_module_path, self._no_dlls, self._wheel_dirs, 'ignore', self._verbose)
discovered, _, ignored, not_found = _dll_utils.get_all_needed(extension_module_path, self._no_dlls, self._wheel_dirs, 'ignore', False, self._verbose)
dependency_paths |= discovered
ignored_dll_names |= ignored
not_found_dll_names |= not_found

# find extra dependencies specified with --add-dll
extra_dependency_paths = set()
for dll_name in self._add_dlls:
path = _dll_utils.find_library(dll_name, None, self._arch)
if path:
extra_dependency_paths.add(path)
dll_info = _dll_utils.find_library(dll_name, None, self._arch, False)
if dll_info:
extra_dependency_paths.add(dll_info[0])
else:
not_found_dll_names.add(dll_name)

Expand Down Expand Up @@ -619,7 +619,7 @@ def show(self) -> None:
if not_found_dll_names:
print('\nWarning: At least one dependent DLL needs to be copied into the wheel but was not found.')

def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool, lib_sdir: str, no_diagnostic: bool, namespace_pkgs: typing.Set[typing.Tuple[str]]) -> None:
def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool, lib_sdir: str, no_diagnostic: bool, namespace_pkgs: typing.Set[typing.Tuple[str]], include_symbols: bool) -> None:
"""Repair the wheel in a manner similar to auditwheel.
target is the target directory for storing the repaired wheel
no_mangles is a set of lowercase DLL names that will not be mangled
Expand All @@ -631,7 +631,9 @@ def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool,
DELVEWHEEL metadata file
namespace_pkgs is a set of paths, relative to the wheel root,
corresponding to the namespace packages. Each path is represented
as a tuple of path components"""
as a tuple of path components
include_symbols is True if .pdb symbol files should be included with
the vendored DLLs"""
print(f'repairing {self._whl_path}')

# check whether wheel has already been repaired
Expand All @@ -643,6 +645,7 @@ def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool,
# find dependencies
print('finding DLL dependencies')
dependency_paths = set()
symbol_paths = set()
ignored_dll_names = set()
extension_module_paths = []
has_top_level_ext_module = False
Expand All @@ -662,8 +665,9 @@ def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool,
elif self._verbose >= 1:
print(f'analyzing package-level extension module {os.path.relpath(extension_module_path, self._extract_dir)}')
extension_module_paths.append(extension_module_path)
discovered, ignored = _dll_utils.get_all_needed(extension_module_path, self._no_dlls, self._wheel_dirs, 'raise', self._verbose)[:2]
discovered, symbols, ignored = _dll_utils.get_all_needed(extension_module_path, self._no_dlls, self._wheel_dirs, 'raise', include_symbols, self._verbose)[:3]
dependency_paths |= discovered
symbol_paths |= symbols
ignored_dll_names |= ignored

# if --ignore-in-wheel is specified, ignore DLLs that were found inside
Expand All @@ -686,9 +690,11 @@ def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool,
for dll_name in self._add_dlls:
if dll_name in dependency_names_lower:
continue
path = _dll_utils.find_library(dll_name, None, self._arch)
if path:
extra_dependency_paths.add(path)
dll_info = _dll_utils.find_library(dll_name, None, self._arch, include_symbols)
if dll_info:
extra_dependency_paths.add(dll_info[0])
if dll_info[1]:
symbol_paths.add(dll_info[1])
else:
raise FileNotFoundError(f'{dll_name} not found')
if not dependency_paths and not extra_dependency_paths:
Expand Down Expand Up @@ -730,6 +736,10 @@ def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool,
if self._verbose >= 1:
print(f'copying {dependency_path} -> {os.path.join(libs_dir, os.path.basename(dependency_path))}')
shutil.copy2(dependency_path, libs_dir)
for symbol_path in symbol_paths:
if self._verbose >= 1:
print(f'copying {symbol_path} -> {os.path.join(libs_dir, os.path.basename(symbol_path))}')
shutil.copy2(symbol_path, libs_dir)

# mangle library names
name_mangler = {} # dict from lowercased old name to new name
Expand Down Expand Up @@ -825,11 +835,11 @@ def repair(self, target: str, no_mangles: set, no_mangle_all: bool, strip: bool,
for dirname in dirnames:
dirname_relative = self._get_site_packages_relpath(dirname)
if dirname_relative not in seen_relative:
for lib_name in os.listdir(libs_dir):
lib_path = os.path.join(libs_dir, lib_name)
for filename in os.listdir(libs_dir):
filepath = os.path.join(libs_dir, filename)
if self._verbose >= 1:
print(f'copying {lib_path} -> {os.path.join(dirname, lib_name)}')
shutil.copy2(lib_path, dirname)
print(f'copying {filepath} -> {os.path.join(dirname, filename)}')
shutil.copy2(filepath, dirname)
seen_relative.add(dirname_relative)

if load_order_filename is not None:
Expand Down
17 changes: 17 additions & 0 deletions tests/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,23 @@ def test_ignore_data(self):
check_call(['delvewheel', 'repair', 'simpleext/simpleext-0.0.1-0ignore-cp310-cp310-win_amd64.whl'])
self.assertFalse(os.path.exists('wheelhouse/simpleext-0.0.1-0ignore-cp310-cp310-win_amd64.whl'))

def test_include_symbols0(self):
"""Simple test of the --include-symbols flag."""
check_call(['delvewheel', 'repair', '--add-path', 'simpleext/x64', '--include-symbols', 'simpleext/simpleext-0.0.1-cp310-cp310-win_amd64.whl'])
with tempfile.TemporaryDirectory() as tempdir:
with zipfile.ZipFile('wheelhouse/simpleext-0.0.1-cp310-cp310-win_amd64.whl') as whl_file:
whl_file.extractall(tempdir)
self.assertTrue(os.path.exists(os.path.join(tempdir, 'simpleext-0.0.1.data/platlib/simpledll.pdb')))

def test_include_symbols1(self):
"""Two copies of symbol file exist if 2 copies of DLL exist"""
check_call(['delvewheel', 'repair', '--add-path', 'simpleext/x64', '--namespace-pkg', 'ns', '--include-symbols', 'simpleext/simpleext-0.0.1-2namespace-cp310-cp310-win_amd64.whl'])
with tempfile.TemporaryDirectory() as tempdir:
with zipfile.ZipFile('wheelhouse/simpleext-0.0.1-2namespace-cp310-cp310-win_amd64.whl') as whl_file:
whl_file.extractall(tempdir)
self.assertTrue(os.path.exists(os.path.join(tempdir, 'simpleext.libs/simpledll.pdb')))
self.assertTrue(os.path.exists(os.path.join(tempdir, 'ns/simpledll.pdb')))


class NeededTestCase(unittest.TestCase):
"""Tests for delvewheel needed"""
Expand Down
Binary file added tests/simpleext/x64/simpledll.pdb
Binary file not shown.

0 comments on commit 6ce9d8d

Please sign in to comment.