Skip to content

Commit

Permalink
feat: change SPDX to the new internal opossum_model
Browse files Browse the repository at this point in the history
* minor changes to attribution_generation.py and helper_methods.py
* convert_to_opossum.py now uses opossum_model.Resource which simplifies the logic
* this caused some tests to change:
 - (minor) some tests require a conversion to opossum file format
 - resources_to_attribution used to have trailing "/" for folders which are no longer
   generated. This should have no impact on the validity of the .opossum file
  • Loading branch information
abraemer committed Jan 22, 2025
1 parent 54f5c61 commit 4f72940
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 158 deletions.
22 changes: 11 additions & 11 deletions src/opossum_lib/spdx/attribution_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from spdx_tools.spdx.writer.tagvalue.package_writer import write_package
from spdx_tools.spdx.writer.tagvalue.snippet_writer import write_snippet

from opossum_lib.opossum.opossum_file import OpossumPackage, SourceInfo
from opossum_lib.opossum_model import OpossumPackage, SourceInfo
from opossum_lib.spdx.constants import (
PURL,
SPDX_FILE_IDENTIFIER,
Expand All @@ -34,13 +34,13 @@ def create_package_attribution(package: Package) -> OpossumPackage:
source = SourceInfo(name=SPDX_PACKAGE_IDENTIFIER)
package_attribution = OpossumPackage(
source=source,
packageName=package.name,
package_name=package.name,
url=str(package.download_location),
packageVersion=package.version,
packagePURLAppendix=_get_purl(package),
package_version=package.version,
package_p_u_r_l_appendix=_get_purl(package),
copyright=str(package.copyright_text),
comment=package_data.getvalue(),
licenseName=str(package.license_concluded),
license_name=str(package.license_concluded),
)

return package_attribution
Expand All @@ -52,10 +52,10 @@ def create_file_attribution(file: File) -> OpossumPackage:
source = SourceInfo(name=SPDX_FILE_IDENTIFIER)
file_attribution = OpossumPackage(
source=source,
packageName=file.name.split("/")[-1],
package_name=file.name.split("/")[-1],
copyright=str(file.copyright_text),
comment=file_data.getvalue(),
licenseName=str(file.license_concluded),
license_name=str(file.license_concluded),
)
return file_attribution

Expand All @@ -66,10 +66,10 @@ def create_snippet_attribution(snippet: Snippet) -> OpossumPackage:
source = SourceInfo(name=SPDX_SNIPPET_IDENTIFIER)
snippet_attribution = OpossumPackage(
source=source,
packageName=snippet.name,
package_name=snippet.name,
copyright=str(snippet.copyright_text),
comment=snippet_data.getvalue(),
licenseName=str(snippet.license_concluded),
license_name=str(snippet.license_concluded),
)

return snippet_attribution
Expand All @@ -83,8 +83,8 @@ def create_document_attribution(
source = SourceInfo(name=creation_info.spdx_id)
document_attribution = OpossumPackage(
source=source,
packageName=creation_info.name,
licenseName=creation_info.data_license,
package_name=creation_info.name,
license_name=creation_info.data_license,
comment=creation_info_data.getvalue(),
)

Expand Down
120 changes: 47 additions & 73 deletions src/opossum_lib/spdx/convert_to_opossum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import logging
import sys
import uuid
from typing import Any

from networkx import DiGraph, shortest_path
from networkx import DiGraph
from spdx_tools.spdx.model.document import CreationInfo
from spdx_tools.spdx.model.document import Document as SpdxDocument
from spdx_tools.spdx.model.file import File
Expand All @@ -16,17 +17,15 @@
from spdx_tools.spdx.parser.parse_anything import parse_file
from spdx_tools.spdx.validation.document_validator import validate_full_spdx_document

from opossum_lib.opossum.opossum_file import (
from opossum_lib.opossum.opossum_file_content import OpossumFileContent
from opossum_lib.opossum_model import (
ExternalAttributionSource,
Metadata,
OpossumInformation,
Opossum,
OpossumPackage,
OpossumPackageIdentifier,
Resource,
ResourceType,
SourceInfo,
)
from opossum_lib.opossum.opossum_file_content import OpossumFileContent
from opossum_lib.spdx.attribution_generation import (
create_document_attribution,
create_file_attribution,
Expand All @@ -40,10 +39,10 @@
)
from opossum_lib.spdx.graph_generation import generate_graph_from_spdx
from opossum_lib.spdx.helper_methods import (
_create_file_path_from_graph_path,
_get_file_path,
_get_resource_type,
_get_source_for_graph_traversal,
_node_represents_a_spdx_element,
_replace_node_ids_with_labels_and_add_resource_type,
_weakly_connected_component_sub_graphs,
)
from opossum_lib.spdx.tree_generation import generate_tree_from_graph
Expand All @@ -69,15 +68,15 @@ def convert_spdx_to_opossum_information(filename: str) -> OpossumFileContent:
)
graph = generate_graph_from_spdx(document)
tree = generate_tree_from_graph(graph)
opossum_information = convert_tree_to_opossum_information(tree)
return OpossumFileContent(opossum_information)
return convert_tree_to_opossum(tree).to_opossum_file_format()


def convert_tree_to_opossum_information(tree: DiGraph) -> OpossumInformation:
def convert_tree_to_opossum(tree: DiGraph) -> Opossum:
metadata = create_metadata(tree)
resources = Resource(type=ResourceType.TOP_LEVEL)
resources_to_attributions: dict[str, list[str]] = dict()
external_attributions: dict[str, OpossumPackage] = dict()
resources = [] # Resource(type=ResourceType.TOP_LEVEL)
# resources_to_attributions: dict[str, list[str]] = dict()
# external_attributions: dict[str, OpossumPackage] = dict()
attribution_to_id: dict[OpossumPackage, str] = {}
attribution_breakpoints = []
external_attribution_sources = {
SPDX_FILE_IDENTIFIER: ExternalAttributionSource(
Expand All @@ -97,78 +96,53 @@ def convert_tree_to_opossum_information(tree: DiGraph) -> OpossumInformation:
raise RuntimeError(
"A tree should always have a node without incoming edge."
)
for node in connected_subgraph.nodes():
path: list[str] = shortest_path(connected_subgraph, source, node)
path_with_labels: list[tuple[str, ResourceType]] = (
_replace_node_ids_with_labels_and_add_resource_type(
path, connected_subgraph
)
)
resources = resources.add_path(path_with_labels)
file_path: str = _create_file_path_from_graph_path(path, connected_subgraph)
if _node_represents_a_spdx_element(connected_subgraph, node):
create_attribution_and_link_with_resource(
external_attributions,
resources_to_attributions,
file_path,
node,
connected_subgraph,
source_file_path = _get_file_path(connected_subgraph, source, source)
rootnode = Resource(path=source_file_path)
resources.append(rootnode)
for node_label in connected_subgraph.nodes():
node = connected_subgraph.nodes[node_label]
file_path: str = _get_file_path(connected_subgraph, source, node_label)
new_resource = Resource(path=file_path, type=_get_resource_type(node))
if _node_represents_a_spdx_element(connected_subgraph, node_label):
attribution = create_attribution(node)
attribution_to_id[attribution] = (
get_attribution_id(node["element"]) or node_label
)

new_resource.attributions.append(attribution)
else:
attribution_breakpoints.append(file_path)
attribution_breakpoints.append("/" + file_path)
rootnode.add_resource(new_resource)

opossum_information = OpossumInformation(
opossum_information = Opossum(
metadata=metadata,
resources=resources.convert_to_file_resource(),
external_attributions=external_attributions,
resources_to_attributions=resources_to_attributions,
attributionBreakpoints=attribution_breakpoints,
externalAttributionSources=external_attribution_sources,
resources=resources,
attribution_to_id=attribution_to_id,
attribution_breakpoints=attribution_breakpoints,
external_attribution_sources=external_attribution_sources,
)
return opossum_information


def create_attribution_and_link_with_resource(
external_attributions: dict[OpossumPackageIdentifier, OpossumPackage],
resources_to_attributions: dict[OpossumPackageIdentifier, list[str]],
file_path: str,
node: str,
tree: DiGraph,
) -> None:
node_element = tree.nodes[node]["element"]
def create_attribution(
node: dict[str, Any],
) -> OpossumPackage:
node_element = node["element"]
if isinstance(node_element, Package):
external_attributions[node_element.spdx_id] = create_package_attribution(
package=node_element
)
resources_to_attributions[file_path] = [
node_element.spdx_id,
]
return create_package_attribution(package=node_element)
elif isinstance(node_element, File):
external_attributions[node_element.spdx_id] = create_file_attribution(
node_element
)
resources_to_attributions[file_path] = [
node_element.spdx_id,
]
return create_file_attribution(node_element)
elif isinstance(node_element, Snippet):
external_attributions[node_element.spdx_id] = create_snippet_attribution(
node_element
)
resources_to_attributions[file_path] = [
node_element.spdx_id,
]
return create_snippet_attribution(node_element)
elif isinstance(node_element, CreationInfo):
external_attributions[node_element.spdx_id] = create_document_attribution(
node_element
)
resources_to_attributions[file_path] = [node_element.spdx_id]

return create_document_attribution(node_element)
else:
external_attributions[node] = OpossumPackage(
source=SourceInfo(name=tree.nodes[node]["label"])
)
resources_to_attributions[file_path] = [node]
return OpossumPackage(source=SourceInfo(name=node["label"]))


def get_attribution_id(element: Any) -> str | None:
if isinstance(element, Package | File | Snippet | CreationInfo):
return element.spdx_id
return None


def create_metadata(tree: DiGraph) -> Metadata:
Expand Down
40 changes: 10 additions & 30 deletions src/opossum_lib/spdx/helper_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# SPDX-License-Identifier: Apache-2.0
from typing import Any

from networkx import DiGraph, weakly_connected_components
from networkx import DiGraph, shortest_path, weakly_connected_components
from spdx_tools.spdx.constants import DOCUMENT_SPDX_ID
from spdx_tools.spdx.model import File, Package, Snippet

from opossum_lib.opossum.opossum_file import ResourceType
from opossum_lib.opossum_model import ResourceType


def _get_source_for_graph_traversal(connected_subgraph: DiGraph) -> str | None:
Expand Down Expand Up @@ -41,38 +41,18 @@ def _weakly_connected_component_sub_graphs(graph: DiGraph) -> list[DiGraph]:
return connected_sub_graphs


def _get_file_path(graph: DiGraph, source: str, to: str) -> str:
path = shortest_path(graph, source, to)
return _create_file_path_from_graph_path(path, graph)


def _create_file_path_from_graph_path(path: list[str], graph: DiGraph) -> str:
base_path = "/" + "/".join(
[_replace_prefix(graph.nodes[node]["label"]) for node in path]
)
base_path = "/".join([_replace_prefix(graph.nodes[node]["label"]) for node in path])
if list(graph.successors(path[-1])):
base_path += "/"
return base_path


def _replace_node_ids_with_labels_and_add_resource_type(
path: list[str], graph: DiGraph
) -> list[tuple[str, ResourceType]]:
resulting_path = []
path_with_label_and_resource_type = [
(
_replace_prefix(graph.nodes[node]["label"]),
_get_resource_type(graph.nodes[node]),
)
for node in path
]
for element_or_path, resource_type in path_with_label_and_resource_type:
resulting_path.extend(
[
(element, resource_type)
for element in element_or_path.split("/")
if element
]
)

return resulting_path


def _replace_prefix(label: str) -> str:
"""
Some spdx element names start with "./" or "/", to avoid paths like "/./" or
Expand All @@ -85,11 +65,11 @@ def _replace_prefix(label: str) -> str:
return label


def _get_resource_type(node_attributes: dict[str, Any]) -> ResourceType:
def _get_resource_type(node_attributes: dict[str, Any]) -> ResourceType | None:
element = node_attributes.get("element")
if isinstance(element, Package):
return ResourceType.FOLDER
elif isinstance(element, Snippet | File):
return ResourceType.FILE
else:
return ResourceType.OTHER
return None
6 changes: 3 additions & 3 deletions tests/data/expected_opossum.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@
}
},
"resourcesToAttributions":{
"/SPDX Lite Document/":[
"/SPDX Lite Document":[
"SPDXRef-DOCUMENT"
],
"/SPDX Lite Document/DESCRIBES/Package A/":[
"/SPDX Lite Document/DESCRIBES/Package A":[
"SPDXRef-Package-A"
],
"/SPDX Lite Document/DESCRIBES/Package B":[
Expand All @@ -124,7 +124,7 @@
"/SPDX Lite Document/DESCRIBES/Package A/CONTAINS/File-C":[
"SPDXRef-File-C"
],
"/SPDX Lite Document/DESCRIBES/Package A/COPY_OF/Package C/":[
"/SPDX Lite Document/DESCRIBES/Package A/COPY_OF/Package C":[
"SPDXRef-Package-C"
],
"/SPDX Lite Document/DESCRIBES/Package A/COPY_OF/Package C/CONTAINS/File-B":[
Expand Down
8 changes: 4 additions & 4 deletions tests/test_spdx/test_attribution_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_create_package_attribution() -> None:
write_package(package, package_data)
package_attribution = create_package_attribution(package)

assert package_attribution == OpossumPackage(
assert package_attribution.to_opossum_file_format() == OpossumPackage(
source=SourceInfo(name=SPDX_PACKAGE_IDENTIFIER),
comment=package_data.getvalue(),
packageName=package.name,
Expand All @@ -79,7 +79,7 @@ def test_create_file_attribution() -> None:
write_file(file, file_data)
file_attribution = create_file_attribution(file)

assert file_attribution == OpossumPackage(
assert file_attribution.to_opossum_file_format() == OpossumPackage(
source=SourceInfo(name=SPDX_FILE_IDENTIFIER),
comment=file_data.getvalue(),
packageName=file.name,
Expand All @@ -102,7 +102,7 @@ def test_create_snippet_attribution() -> None:
write_snippet(snippet, snippet_data)
snippet_attribution = create_snippet_attribution(snippet)

assert snippet_attribution == OpossumPackage(
assert snippet_attribution.to_opossum_file_format() == OpossumPackage(
source=SourceInfo(name=SPDX_SNIPPET_IDENTIFIER),
comment=snippet_data.getvalue(),
packageName=snippet.name,
Expand All @@ -124,7 +124,7 @@ def test_create_document_attribution() -> None:
write_creation_info(creation_info, creation_info_data)
document_attribution = create_document_attribution(creation_info)

assert document_attribution == OpossumPackage(
assert document_attribution.to_opossum_file_format() == OpossumPackage(
source=SourceInfo(name=DOCUMENT_SPDX_ID),
packageName=creation_info.name,
licenseName=creation_info.data_license,
Expand Down
Loading

0 comments on commit 4f72940

Please sign in to comment.