Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove duplicate detection #79

Merged
merged 10 commits into from
Mar 19, 2024
69 changes: 25 additions & 44 deletions bin/codebasin
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,9 @@ def main():
metavar="<platform>",
action="append",
default=[],
help="Add the specified platform to the analysis. "
+ "May be a name or a path to a compilation database. "
help="Include the specified platform in the analysis. "
+ "May be specified multiple times. "
+ "If not specified, all known platforms will be included.",
+ "If not specified, all platforms will be included.",
)
# The analysis-file argument is optional while we support the -c option.
parser.add_argument(
Expand Down Expand Up @@ -190,33 +189,9 @@ def main():
"Cannot use --config (-c) with TOML analysis files.",
)

# Process the -p flag first to infer wider context.
filtered_platforms = []
additional_platforms = []
for p in args.platforms:
# If it's a path, it has to be a compilation database.
if os.path.exists(p):
if not os.path.splitext(p)[1] == ".json":
raise RuntimeError(f"Platform file {p} must end in .json.")
additional_platforms.append(p)
continue

# Otherwise, treat it as a name in the configuration file.
# Explain the logic above in cases that look suspiciously like paths.
if "/" in p or os.path.splitext(p)[1] == ".json":
logging.getLogger("codebasin").warning(
f"{p} doesn't exist, so will be treated as a name.",
)
filtered_platforms.append(p)

# A legacy config file is required if:
# - No additional platforms are specified; and
# - No TOML analysis file is specified
# If no file is specified, legacy behavior checks for config.yaml
config_file = args.config_file
config_required = (
len(additional_platforms) == 0 and args.analysis_file is None
)
if config_file is None and config_required:
if args.config_file is None and args.analysis_file is None:
warnings.warn(
"Implicitly defined configuration files are deprecated.",
DeprecationWarning,
Expand Down Expand Up @@ -251,15 +226,15 @@ def main():
config_file,
rootdir,
exclude_patterns=args.excludes,
filtered_platforms=filtered_platforms,
filtered_platforms=args.platforms,
)

# Load the analysis file if it exists.
if args.analysis_file is not None:
path = os.path.realpath(args.analysis_file)
if os.path.exists(path):
if not os.path.splitext(path)[1] == ".toml":
raise RuntimeError(f"Analysis file {p} must end in .toml.")
raise RuntimeError(f"Analysis file {path} must end in .toml.")

with util.safe_open_read_nofollow(path, "rb") as f:
try:
Expand All @@ -272,23 +247,23 @@ def main():
excludes = analysis_toml["codebase"]["exclude"]
codebase["exclude_patterns"] += excludes

for name in args.platforms:
if name not in analysis_toml["platform"].keys():
raise RuntimeError(
f"Platform {name} requested on the command line "
+ "does not exist in the configuration file.",
)

for name in analysis_toml["platform"].keys():
if filtered_platforms and name not in filtered_platforms:
if args.platforms and name not in args.platforms:
continue
if "commands" not in analysis_toml["platform"][name]:
raise ValueError(f"Missing 'commands' for platform {name}")
p = analysis_toml["platform"][name]["commands"]
db = config.load_database(p, rootdir)
codebase["platforms"].append(name)
configuration.update({name: db})

# Extend configuration with any additional platforms.
for p in additional_platforms:
name = os.path.splitext(os.path.basename(p))[0]
if name in codebase["platforms"]:
raise RuntimeError(f"Platform name {p} conflicts with {name}.")
db = config.load_database(p, rootdir)
configuration.update({name: db})

# Parse the source tree, and determine source line associations.
# The trees and associations are housed in state.
legacy_warnings = True if config_file else False
Expand Down Expand Up @@ -331,11 +306,17 @@ def main():

# Print clustering report
if report_enabled("clustering"):
if config_file is None:
platform_names = [p[0] for p in args.platforms]
output_prefix = "-".join(platform_names)
else:
# Legacy behavior: guess prefix from YAML filename
if config_file is not None:
output_prefix = os.path.realpath(guess_project_name(config_file))

# Modern behavior: append platforms to TOML filename
else:
basename = os.path.basename(args.analysis_file)
filename = os.path.splitext(basename)[0]
platform_names = [p for p in codebase["platforms"]]
output_prefix = "-".join([filename] + platform_names)

clustering_output_name = output_prefix + "-dendrogram.png"
clustering = report.clustering(clustering_output_name, setmap)
if clustering is not None:
Expand Down
6 changes: 6 additions & 0 deletions codebasin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,12 @@ def load_database(dbpath, rootdir):
else:
log.warning("Couldn't find file %s -- ignoring it.", path)

if len(configuration) == 0:
log.warning(
f"No files found in compilation database at '{dbpath}'.\n"
+ "Ensure that 'directory' and 'file' are in the root directory.",
)

return configuration


Expand Down
2 changes: 1 addition & 1 deletion codebasin/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, summarize_only):
self.langs = {}
self.summarize_only = summarize_only
self.fileinfo = collections.defaultdict(list)
self.merge_duplicates = True
self.merge_duplicates = False

def _map_filename(self, fn):
"""
Expand Down
2 changes: 2 additions & 0 deletions tests/build-dir/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (C) 2021 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
1 change: 1 addition & 0 deletions tests/build-dir/foo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
void foo() { return; }
110 changes: 110 additions & 0 deletions tests/build-dir/test_build_dir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause

import json
import logging
import tempfile
import unittest
from pathlib import Path

from codebasin import config, finder
from codebasin.walkers.platform_mapper import PlatformMapper


class TestBuildDirectories(unittest.TestCase):
"""
Test ability to correctly handle out-of-tree builds.
"""

def setUp(self):
self.rootdir = str(Path(__file__).parent)
logging.getLogger("codebasin").disabled = False

def test_absolute_paths(self):
"""
Test database with build "directory" path but source "file" path.
All "file" fields are absolute paths.
"""

source = str(Path(__file__).parent.joinpath("foo.cpp"))

# CBI only understands how to load compilation databases from file.
# For now, create temporary files every time we test.
dir1 = str(Path(__file__).parent.joinpath("build1/"))
build1 = tempfile.NamedTemporaryFile()
json1 = [
{
"command": f"/usr/bin/c++ -o foo.cpp.o -c {source}",
"directory": f"{dir1}",
"file": f"{source}",
},
]
with open(build1.name, "w") as f:
json.dump(json1, f)

dir2 = str(Path(__file__).parent.joinpath("build2/"))
build2 = tempfile.NamedTemporaryFile()
json2 = [
{
"command": f"/usr/bin/c++ -o foo.cpp.o -c {source}",
"directory": f"{dir2}",
"file": f"{source}",
},
]
with open(build2.name, "w") as f:
json.dump(json2, f)

codebase = {
"files": [source],
"platforms": ["one", "two"],
"exclude_files": set(),
"exclude_patterns": [],
"rootdir": self.rootdir,
}

configuration = {}
for name, path in [("one", build1.name), ("two", build2.name)]:
db = config.load_database(path, self.rootdir)
configuration.update({name: db})

expected_setmap = {frozenset(["one", "two"]): 1}

state = finder.find(self.rootdir, codebase, configuration)
mapper = PlatformMapper(codebase)
setmap = mapper.walk(state)
self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap")

def test_empty_platform(self):
"""
Check that we warn if all files from a platform are excluded.
This may be a sign that the compilation database has incorrect paths.
"""

source = str(Path(__file__).parent.joinpath("foo.cpp"))

# CBI only understands how to load compilation databases from file.
# For now, create temporary files every time we test.
build = str(Path(__file__).parent.joinpath("build/"))
tmp = tempfile.NamedTemporaryFile()
obj = [
{
"command": f"/usr/bin/c++ -o foo.cpp.o -c {source}",
"directory": f"{build}",
"file": "foo.cpp",
},
]
with open(tmp.name, "w") as f:
json.dump(obj, f)

with self.assertLogs("codebasin", level="WARNING") as log:
config.load_database(tmp.name, self.rootdir)

found_expected_warning = False
for msg in log.output:
if msg.find("No files found in compilation database"):
found_expected_warning = True
self.assertTrue(found_expected_warning)


if __name__ == "__main__":
unittest.main()
10 changes: 0 additions & 10 deletions tests/duplicates/build1/bar.c

This file was deleted.

5 changes: 0 additions & 5 deletions tests/duplicates/build1/bar.h

This file was deleted.

10 changes: 0 additions & 10 deletions tests/duplicates/build1/baz.c

This file was deleted.

5 changes: 0 additions & 5 deletions tests/duplicates/build1/baz.h

This file was deleted.

10 changes: 0 additions & 10 deletions tests/duplicates/build1/foo.c

This file was deleted.

5 changes: 0 additions & 5 deletions tests/duplicates/build1/foo.h

This file was deleted.

10 changes: 0 additions & 10 deletions tests/duplicates/build2/bar.c

This file was deleted.

5 changes: 0 additions & 5 deletions tests/duplicates/build2/bar.h

This file was deleted.

10 changes: 0 additions & 10 deletions tests/duplicates/build2/baz.c

This file was deleted.

5 changes: 0 additions & 5 deletions tests/duplicates/build2/baz.h

This file was deleted.

10 changes: 0 additions & 10 deletions tests/duplicates/build2/foo.c

This file was deleted.

5 changes: 0 additions & 5 deletions tests/duplicates/build2/foo.h

This file was deleted.

1 change: 1 addition & 0 deletions tests/duplicates/cpu/foo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
void foo() {}
11 changes: 0 additions & 11 deletions tests/duplicates/duplicates.yaml

This file was deleted.

1 change: 1 addition & 0 deletions tests/duplicates/gpu/foo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
void foo() {}
11 changes: 0 additions & 11 deletions tests/duplicates/names.yaml

This file was deleted.

Loading
Loading