diff --git a/codebasin/__init__.py b/codebasin/__init__.py index c1bd884..909dc76 100644 --- a/codebasin/__init__.py +++ b/codebasin/__init__.py @@ -191,7 +191,7 @@ def from_file(cls, filename: str | os.PathLike[str]): ------- A CompilationDatbase corresponding to the provided JSON file. """ - with codebasin.util.safe_open_read_nofollow(filename, "r") as f: + with open(filename) as f: db = codebasin.util._load_json(f, schema_name="compiledb") return CompilationDatabase.from_json(db) diff --git a/codebasin/__main__.py b/codebasin/__main__.py index 32c2f78..9cb0d95 100755 --- a/codebasin/__main__.py +++ b/codebasin/__main__.py @@ -271,19 +271,19 @@ def main(): args.reports = ["all"] # Determine the root directory based on where codebasin is run. - rootdir = os.path.realpath(os.getcwd()) + rootdir = os.path.abspath(os.getcwd()) # Set up a default configuration object. configuration = {} # Load the analysis file if it exists. if args.analysis_file is not None: - path = os.path.realpath(args.analysis_file) + path = os.path.abspath(args.analysis_file) if os.path.exists(path): if not os.path.splitext(path)[1] == ".toml": raise RuntimeError(f"Analysis file {path} must end in .toml.") - with util.safe_open_read_nofollow(path, "rb") as f: + with open(path, "rb") as f: try: analysis_toml = util._load_toml(f, "analysis") except BaseException: diff --git a/codebasin/config.py b/codebasin/config.py index 047abcd..38c8c4a 100644 --- a/codebasin/config.py +++ b/codebasin/config.py @@ -82,7 +82,7 @@ def load_importcfg(): path = ".cbi/config" if os.path.exists(path): log.info(f"Found configuration file at {path}") - with util.safe_open_read_nofollow(path, "rb") as f: + with open(path, "rb") as f: try: _importcfg_toml = util._load_toml(f, "cbiconfig") for name, compiler in _importcfg_toml["compiler"].items(): @@ -324,7 +324,7 @@ def load_database(dbpath, rootdir): # Include paths may be specified relative to root include_paths = [ - os.path.realpath(os.path.join(rootdir, f)) for f in include_paths + os.path.abspath(os.path.join(rootdir, f)) for f in include_paths ] # Files may be specified: @@ -336,15 +336,15 @@ def load_database(dbpath, rootdir): if os.path.isabs(command.directory): filedir = command.directory else: - filedir = os.path.realpath( + filedir = os.path.abspath( rootdir, os.path.join(command.directory), ) if os.path.isabs(command.filename): - path = os.path.realpath(command.filename) + path = os.path.abspath(command.filename) else: - path = os.path.realpath(os.path.join(filedir, command.filename)) + path = os.path.abspath(os.path.join(filedir, command.filename)) # Compilation database may contain files that don't # exist without running make diff --git a/codebasin/file_parser.py b/codebasin/file_parser.py index a57f248..01727e6 100644 --- a/codebasin/file_parser.py +++ b/codebasin/file_parser.py @@ -8,7 +8,7 @@ import logging import os -from codebasin import preprocessor, util +from codebasin import preprocessor from codebasin.file_source import get_file_source log = logging.getLogger(__name__) @@ -91,7 +91,7 @@ class FileParser: """ def __init__(self, _filename): - self._filename = os.path.realpath(_filename) + self._filename = os.path.abspath(_filename) @staticmethod def handle_directive(out_tree, groups, logical_line): @@ -170,11 +170,7 @@ def parse_file(self, *, summarize_only=True, language=None): f"{filename} doesn't appear " + "to be a language this tool can process", ) - with util.safe_open_read_nofollow( - filename, - mode="r", - errors="replace", - ) as source_file: + with open(filename, errors="replace") as source_file: groups = { "code": LineGroup(), "directive": LineGroup(), diff --git a/codebasin/finder.py b/codebasin/finder.py index 86d59a3..436951a 100644 --- a/codebasin/finder.py +++ b/codebasin/finder.py @@ -32,12 +32,26 @@ def __init__(self, summarize_only): self.maps = {} self.langs = {} self.summarize_only = summarize_only + self._path_cache = {} + + def _get_realpath(self, path: str) -> str: + """ + Returns + ------- + str + Equivalent to os.path.realpath(path). + """ + if path not in self._path_cache: + real = os.path.realpath(path) + self._path_cache[path] = real + return self._path_cache[path] def insert_file(self, fn, language=None): """ Build a new tree for a source file, and create an association map for it. """ + fn = self._get_realpath(fn) if fn not in self.trees: parser = file_parser.FileParser(fn) self.trees[fn] = parser.parse_file( @@ -60,6 +74,7 @@ def get_tree(self, fn): """ Return the SourceTree associated with a filename """ + fn = self._get_realpath(fn) if fn not in self.trees: return None return self.trees[fn] @@ -68,6 +83,7 @@ def get_map(self, fn): """ Return the NodeAssociationMap associated with a filename """ + fn = self._get_realpath(fn) if fn not in self.maps: return None return self.maps[fn] diff --git a/codebasin/platform.py b/codebasin/platform.py index eba6ecd..c3b3bce 100644 --- a/codebasin/platform.py +++ b/codebasin/platform.py @@ -97,7 +97,7 @@ def find_include_file(self, filename, this_path, is_system_include=False): # Determine the path to the include file, if it exists for path in local_paths + self._include_paths: - test_path = os.path.realpath(os.path.join(path, filename)) + test_path = os.path.abspath(os.path.join(path, filename)) if os.path.isfile(test_path): include_file = test_path self.found_incl[filename] = include_file diff --git a/codebasin/preprocessor.py b/codebasin/preprocessor.py index 0a37ca2..03c189c 100644 --- a/codebasin/preprocessor.py +++ b/codebasin/preprocessor.py @@ -616,7 +616,7 @@ def __init__(self, _filename): def __compute_file_hash(self): chunk_size = 4096 hasher = hashlib.sha512() - with util.safe_open_read_nofollow(self.filename, "rb") as in_file: + with open(self.filename, "rb") as in_file: for chunk in iter(lambda: in_file.read(chunk_size), b""): hasher.update(chunk) diff --git a/codebasin/util.py b/codebasin/util.py index 0aebb6a..6082324 100644 --- a/codebasin/util.py +++ b/codebasin/util.py @@ -75,12 +75,6 @@ def safe_open_write_binary(fname): return os.fdopen(fpid, "wb") -def safe_open_read_nofollow(fname, *args, **kwargs): - """Open fname for reading, but don't follow links.""" - fpid = os.open(fname, os.O_RDONLY | os.O_NOFOLLOW) - return os.fdopen(fpid, *args, **kwargs) - - def valid_path(path): """Return true if the path passed in is valid""" valid = True diff --git a/tests/duplicates/cpu2 b/tests/duplicates/cpu2 new file mode 120000 index 0000000..840a958 --- /dev/null +++ b/tests/duplicates/cpu2 @@ -0,0 +1 @@ +cpu/ \ No newline at end of file diff --git a/tests/duplicates/test_duplicates.py b/tests/duplicates/test_duplicates.py index 459f29e..ee5a940 100644 --- a/tests/duplicates/test_duplicates.py +++ b/tests/duplicates/test_duplicates.py @@ -52,6 +52,40 @@ def test_duplicates(self): setmap = mapper.walk(state) self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") + def test_symlinks(self): + """Check that symlinks do not count towards divergence.""" + + cpufile = str(self.rootdir / "cpu/foo.cpp") + cpu2file = str(self.rootdir / "cpu2/foo.cpp") + + codebase = CodeBase(self.rootdir, exclude_patterns=["gpu/"]) + + configuration = { + "cpu": [ + { + "file": cpufile, + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + "cpu2": [ + { + "file": cpu2file, + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + } + + expected_setmap = {frozenset(["cpu", "cpu2"]): 1} + + state = finder.find(self.rootdir, codebase, configuration) + mapper = PlatformMapper(codebase) + setmap = mapper.walk(state) + self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") + if __name__ == "__main__": unittest.main()