Skip to content

Commit

Permalink
refactor: simplify scancode conversion using new Resource functions
Browse files Browse the repository at this point in the history
  • Loading branch information
abraemer committed Jan 21, 2025
1 parent efa56f9 commit 1e1d053
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 137 deletions.
5 changes: 1 addition & 4 deletions src/opossum_lib/scancode/convert_scancode_to_opossum.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@ def convert_scancode_to_opossum(filename: str) -> OpossumFileContent:

scancode_data = load_scancode_json(filename)

filetree = scancode_to_file_tree(scancode_data)
resources = filetree.to_opossum_resources()
with open("debug.json", "w") as out:
out.write(resources[0].model_dump_json(indent=4, by_alias=True))
resources = [scancode_to_file_tree(scancode_data)]

scancode_header = extract_scancode_header(scancode_data, filename)
metadata = opossum_model.Metadata(
Expand Down
54 changes: 10 additions & 44 deletions src/opossum_lib/scancode/resource_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,25 @@

from __future__ import annotations

from pydantic import BaseModel
from pathlib import Path

import opossum_lib.opossum_model as opossum_model
from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME
from opossum_lib.scancode.helpers import check_schema, path_segments
from opossum_lib.scancode.model import File, FileType, ScanCodeData


class ScanCodeFileTree(BaseModel):
file: File
children: dict[str, ScanCodeFileTree] = {}

def get_path(self, path: list[str]) -> ScanCodeFileTree:
if len(path) == 0:
return self
next_segment, *rest = path
if next_segment not in self.children:
self.children[next_segment] = ScanCodeFileTree.model_construct(None) # type: ignore
return self.children[next_segment].get_path(rest)

def revalidate(self) -> None:
check_schema(self)
for child in self.children.values():
child.revalidate()

def to_opossum_resources(
self,
) -> list[opossum_model.Resource]:
def process_node(
node: ScanCodeFileTree,
) -> opossum_model.Resource:
return opossum_model.Resource(
path=node.file.path,
attributions=get_attribution_info(node.file),
type=convert_resource_type(node.file.type),
children={
key: process_node(child) for (key, child) in node.children.items()
},
)

return [process_node(self)]


def scancode_to_file_tree(scancode_data: ScanCodeData) -> ScanCodeFileTree:
temp_root = ScanCodeFileTree.model_construct(file=None) # type: ignore
def scancode_to_file_tree(scancode_data: ScanCodeData) -> opossum_model.Resource:
temp_root = opossum_model.Resource(path=Path(""))
for file in scancode_data.files:
segments = path_segments(file.path)
temp_root.get_path(segments).file = file
resource = opossum_model.Resource(
path=Path(file.path),
attributions=get_attribution_info(file),
type=convert_resource_type(file.type),
)
temp_root.add_resource(resource)

assert len(temp_root.children) == 1
root = list(temp_root.children.values())[0]
check_schema(root)
return root
return list(temp_root.children.values())[0]


def get_attribution_info(file: File) -> list[opossum_model.OpossumPackage]:
Expand Down
10 changes: 10 additions & 0 deletions tests/test_scancode/model_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
)


def _create_reference_scancode_files() -> list[File]:
return [
_create_file("A", FileType.DIRECTORY),
_create_file("A/B", FileType.DIRECTORY),
_create_file("A/file1", FileType.FILE),
_create_file("A/file2.txt", FileType.FILE),
_create_file("A/B/file3", FileType.FILE),
]


def _create_file(
path: str,
type: FileType,
Expand Down
89 changes: 0 additions & 89 deletions tests/test_scancode/test_resource_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,83 +4,20 @@

from copy import deepcopy

import pytest
from pydantic import ValidationError

from opossum_lib.opossum_model import OpossumPackage, SourceInfo
from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME
from opossum_lib.scancode.model import (
Copyright,
File,
FileBasedLicenseDetection,
FileType,
Match,
ScanCodeData,
)
from opossum_lib.scancode.resource_tree import (
ScanCodeFileTree,
get_attribution_info,
scancode_to_file_tree,
)
from tests.test_scancode.model_helpers import _create_file


class TestRevalidate:
def test_successfully_revalidate_valid_file_tree(self) -> None:
dummy_file = _create_file("A", FileType.FILE)
valid_structure = ScanCodeFileTree(
file=dummy_file,
children={
"A": ScanCodeFileTree(file=dummy_file),
"B": ScanCodeFileTree(
file=dummy_file, children={"C": ScanCodeFileTree(file=dummy_file)}
),
},
)
valid_structure.revalidate()

def test_fail_to_revalidate_file_tree_invalid_at_toplevel(self) -> None:
dummy_file = _create_file("A", FileType.FILE)
invalid_structure = ScanCodeFileTree.model_construct(
children={
"A": ScanCodeFileTree(file=dummy_file),
"B": ScanCodeFileTree(
file=dummy_file, children={"C": ScanCodeFileTree(file=dummy_file)}
),
},
file=None, # type: ignore
)
with pytest.raises(ValidationError):
invalid_structure.revalidate()

def test_fail_to_revalidate_file_tree_invalid_only_at_lower_level(self) -> None:
dummy_file = _create_file("A", FileType.FILE)
invalid_structure = ScanCodeFileTree(
file=dummy_file,
children={
"A": ScanCodeFileTree(file=dummy_file),
"B": ScanCodeFileTree(
file=dummy_file,
children={"C": ScanCodeFileTree.model_construct(None)}, # type: ignore
),
},
)
with pytest.raises(ValidationError):
invalid_structure.revalidate()


def test_scancode_to_resource_tree_produces_expected_result() -> None:
files = _create_reference_scancode_files()
scancode_data = ScanCodeData(
headers=[], packages=[], dependencies=[], license_detections=[], files=files
)

tree = scancode_to_file_tree(scancode_data)
reference = _create_reference_node_structure()

assert tree == reference


def test_get_attribution_info_directory() -> None:
folder = _create_file("A", FileType.DIRECTORY)
assert get_attribution_info(folder) == []
Expand Down Expand Up @@ -159,29 +96,3 @@ def test_get_attribution_info_file_multiple() -> None:
attribution_confidence=50,
)
assert set(attributions) == {expected1, expected2}


def _create_reference_scancode_files() -> list[File]:
return [
_create_file("A", FileType.DIRECTORY),
_create_file("A/B", FileType.DIRECTORY),
_create_file("A/file1", FileType.FILE),
_create_file("A/file2.txt", FileType.FILE),
_create_file("A/B/file3", FileType.FILE),
]


def _create_reference_node_structure() -> ScanCodeFileTree:
folder, subfolder, file1, file2, file3 = _create_reference_scancode_files()
inner = ScanCodeFileTree(
file=subfolder, children={"file3": ScanCodeFileTree(file=file3)}
)
reference = ScanCodeFileTree(
file=folder,
children={
"B": inner,
"file1": ScanCodeFileTree(file=file1),
"file2.txt": ScanCodeFileTree(file=file2),
},
)
return reference

0 comments on commit 1e1d053

Please sign in to comment.