Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solves #25 #26

Merged
merged 3 commits into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 64 additions & 11 deletions cag/framework/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

from pyArango.theExceptions import DocumentNotFoundError, SimpleQueryError

from cag.graph_elements.nodes import GenericOOSNode
from cag.graph_elements.relations import GenericEdge
from cag.utils.config import Config, configuration

from cag.graph_elements.base_graph import *
from pyArango.collection import Document, Collection
from pyArango.collection import Document, Collection, Collection_metaclass
import re
from typing import Any, Optional
from typing import Any, Optional, Union

from cag import logger

Expand Down Expand Up @@ -39,7 +41,6 @@ def __init__(self, conf: Config = None):
self.graph_name = conf.graph
if self.database.hasGraph(self.graph_name):
self.graph: BaseGraph = self.database.graphs[self.graph_name]

else:
edge_def_arr = []
for ed in edges:
Expand All @@ -48,14 +49,24 @@ def __init__(self, conf: Config = None):
+ ed["from_collections"]
+ ed["to_collections"]
):
if not self.database.hasCollection(col):
self.database.createCollection(col)

if not self.database.hasCollection(
self.get_collection_name(col)
):
self.database.createCollection(
self.get_collection_name(col)
)
edge_def_arr.append(
EdgeDefinition(
ed["relation"],
fromCollections=ed["from_collections"],
toCollections=ed["to_collections"],
self.get_collection_name(ed["relation"]),
fromCollections=[
self.get_collection_name(col)
for col in ed["from_collections"]
],
toCollections=[
self.get_collection_name(col)
for col in ed["to_collections"]
],
)
)
if len(edge_def_arr) == 0:
Expand All @@ -75,12 +86,54 @@ def __init__(self, conf: Config = None):
# Setup graph structure
for ed in edges:
self.graph.update_graph_structure(
ed["relation"],
ed["from_collections"],
ed["to_collections"],
self.get_collection_name(ed["relation"]),
[
self.get_collection_name(col)
for col in ed["from_collections"]
],
[
self.get_collection_name(col)
for col in ed["to_collections"]
],
create_collections=True,
)

@staticmethod
def get_collection_name(
collection: Union[str, Collection_metaclass]
) -> str:
"""
Returns the name of a collection based on the input collection. If the collection is a string,
it returns the same string. If the collection is an instance of Collection_metaclass, it tries
to return the '_name' attribute of the class. If '_name' is not available, it returns the class
name. Raises ValueError if the input collection is not a string or an instance of
Collection_metaclass.

Args:
collection (Union[str, Collection_metaclass]): The input collection, which can be a string
or an instance of Collection_metaclass.

Returns:
str: The name of the collection.

Raises:
ValueError: If the input collection is not a string or an instance of Collection_metaclass.
"""
if isinstance(collection, str):
# Backward compatibility, when strings are used in edge definition
return collection
if isinstance(collection, Collection_metaclass):
# When a class of GenericOOSNode gets passed, we take the _name if possible
if hasattr(collection, "_name"):
return collection._name # noqa
else:
# Otherwise just take the name of the class
return collection.__name__
raise ValueError(
f"{collection} is of incompatible type {type(collection)}"
f"Make sure it's a str, GenericOOSNode or GenericEdge!"
)

def get_document(
self,
collectionName: str,
Expand Down
104 changes: 104 additions & 0 deletions tests/test_graph_creator/test_improve_edge_definitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Tests for https://github.com/DLR-SC/corpus-annotation-graph-builder/issues/25

import os

import pytest

from cag.framework import GraphCreatorBase
from cag.graph_elements.nodes import GenericOOSNode, Field
from cag.graph_elements.relations import GenericEdge
from cag.utils.config import Config
from tests.test_graph_creator import config_factory


class CollectionA(GenericOOSNode):
_name = "CollectionA"
_fields = {"value": Field(), "value2": Field(), **GenericOOSNode._fields}


class CollectionB(GenericOOSNode):
_name = "CollectionB"
_fields = {"value": Field(), **GenericOOSNode._fields}


class CollectionC(GenericOOSNode):
_name = "CollectionC"
_fields = {"value": Field(), **GenericOOSNode._fields}


class HasRelation(GenericEdge):
_fields = GenericEdge._fields


class HasAnotherRelation(GenericEdge):
_fields = GenericEdge._fields


class HasAnotherAnotherRelation(GenericEdge):
_fields = GenericEdge._fields


class SampleGraphCreator(GraphCreatorBase):
_name = "SampleGraphCreator"
_description = "Graph based on the DLR elib corpus"

# Testing mixed definitions (Class based and string based)
_edge_definitions = [
{ # new style
"relation": HasRelation,
"from_collections": [CollectionA],
"to_collections": [CollectionB],
},
{
"relation": "HasAnotherRelation",
"from_collections": [CollectionC],
"to_collections": [CollectionC],
},
{ # old style
"relation": "HasAnotherAnotherRelation",
"from_collections": ["CollectionA"],
"to_collections": ["CollectionA"],
},
]

def init_graph(self):
pass


class TestGC25:
def test_arango_connection(self):
config = config_factory()
assert config.arango_db.name == config.database

def test_create_collection(self):
config = config_factory()
gc = SampleGraphCreator("", config_factory())
assert config.arango_db.has_collection("CollectionA")
assert config.arango_db.has_collection("CollectionB")
assert config.arango_db.has_collection("CollectionC")
assert config.arango_db.has_collection("HasRelation")
assert config.arango_db.has_collection("HasAnotherRelation")

node1 = gc.upsert_node(
CollectionA._name, {"value": "val1", "_key": "v1"}
)
assert (
config.arango_db.collection("CollectionA").get("v1")
== node1.getStore()
)

node2 = gc.upsert_node(
CollectionB._name, {"value": "val2", "_key": "v2"}
)
assert (
config.arango_db.collection("CollectionB").get("v2")
== node2.getStore()
)

edge = gc.upsert_edge("HasRelation", node1, node2)
assert (
config.arango_db.collection("HasRelation").get(
"CollectionA-v1-CollectionB-v2"
)
== edge.getStore()
)