Skip to content

Commit

Permalink
Merge pull request #122 from forslund/feature/drops
Browse files Browse the repository at this point in the history
Add support for dropping intents, entities and regexes
  • Loading branch information
forslund authored Apr 1, 2021
2 parents 658edab + 20f1aac commit 4260a85
Show file tree
Hide file tree
Showing 6 changed files with 350 additions and 1 deletion.
106 changes: 106 additions & 0 deletions adapt/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,72 @@ def register_intent_parser(self, intent_parser):
else:
raise ValueError("%s is not an intent parser" % str(intent_parser))

def drop_intent_parser(self, parser_names):
"""Drop a registered intent parser.
Arguments:
parser_names (str or iterable): parser name to drop or list of
names
Returns:
(bool) True if a parser was dropped else False
"""
if isinstance(parser_names, str):
parser_names = [parser_names]

new_parsers = [p for p in self.intent_parsers
if p.name not in parser_names]
num_original_parsers = len(self.intent_parsers)
self.intent_parsers = new_parsers

return len(self.intent_parsers) != num_original_parsers

def drop_entity(self, entity_type=None, match_func=None):
"""Drop all entities mathching the given entity type or match function
Arguments:
entity_type (str): entity name to match against
match_func (callable): match function to find entities
Returns:
(bool) True if vocab was found and removed otherwise False.
"""
def default_match_func(data):
return data and data[1] == entity_type

ent_tuples = self.trie.scan(match_func or default_match_func)
for entity in ent_tuples:
self.trie.remove(*entity)

return len(ent_tuples) != 0

def drop_regex_entity(self, entity_type=None, match_func=None):
"""Remove registered regex entity.
Arguments:
entity_type (str): entity name to match against
match_func (callable): match function to find entities
Returns:
(bool) True if vocab was found and removed otherwise False.
"""
def default_match_func(regexp):
return entity_type in regexp.groupindex.keys()

match_func = match_func or default_match_func
matches = [r for r in self.regular_expressions_entities
if match_func(r)]
matching_patterns = [r.pattern for r in matches]

self.regular_expressions_entities = [
r for r in self.regular_expressions_entities if r not in matches
]
self._regex_strings = [
r for r in self._regex_strings if r not in matching_patterns
]

return len(matches) != 0


class DomainIntentDeterminationEngine(object):
"""
Expand Down Expand Up @@ -368,3 +434,43 @@ def register_intent_parser(self, intent_parser, domain=0):
self.register_domain(domain=domain)
self.domains[domain].register_intent_parser(
intent_parser=intent_parser)

def drop_intent_parser(self, parser_names, domain):
"""Drop a registered intent parser.
Arguments:
parser_names (list, str): parser names to drop.
domain (str): domain to drop from
Returns:
(bool) True if an intent parser was dropped else false.
"""
return self.domains[domain].drop_intent_parser(parser_names)

def drop_entity(self, domain, entity_type=None, match_func=None):
"""Drop all entities mathching the given entity type or match function.
Arguments:
domain (str): intent domain
entity_type (str): entity name to match against
match_func (callable): match function to find entities
Returns:
(bool) True if vocab was found and removed otherwise False.
"""
return self.domains[domain].drop_entity(entity_type=entity_type,
match_func=match_func)

def drop_regex_entity(self, domain, entity_type=None, match_func=None):
"""Remove registered regex entity.
Arguments:
domain (str): intent domain
entity_type (str): entity name to match against
match_func (callable): match function to find entities
Returns:
(bool) True if vocab was found and removed otherwise False.
"""
return self.domains[domain].drop_regex_entity(entity_type=entity_type,
match_func=match_func)
33 changes: 33 additions & 0 deletions adapt/tools/text/trie.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,36 @@ def remove(self, iterable, data=None):
data: data to be paired with the key
"""
return self.root.remove(iterable, data=data)

def scan(self, match_func):
"""Traverse the trie scanning for end nodes with matching data.
Arguments:
match_func (callable): function used to match data.
Returns:
(list) list with matching (data, value) pairs.
"""
def _traverse(node, match_func, current=''):
"""Traverse Trie searching for nodes with matching data
Performs recursive depth first search of Trie and collects
value / data pairs matched by the match_func
Arguments:
node (trie node): Node to parse
match_func (callable): Function performing match
current (str): string "position" in Trie
Returns:
(list) list with matching (data, value) pairs.
"""
# Check if node matches
result = [(current, d) for d in node.data if match_func(d)]

# Traverse further down into the tree
for c in node.children:
result += _traverse(node.children[c], match_func, current + c)
return result

return _traverse(self.root, match_func)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

setup(
name="adapt-parser",
version="0.3.7",
version="0.4.0",
author="Sean Fitzgerald",
author_email="[email protected]",
description=("A text-to-intent parsing framework."),
Expand Down
50 changes: 50 additions & 0 deletions test/DomainIntentEngineTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,53 @@ def test_select_best_intent_enuse_enitities_dont_register_in_multiple_domains(se
intents = self.engine.determine_intent(utterance, 1)
for intent in intents:
self.assertNotEqual(intent['intent_type'], 'Parser2')

def test_drop_intent_from_domain(self):
"""Test that intent is dropped from the correct domain."""
self.engine.register_domain('Domain1')
self.engine.register_domain('Domain2')

# Creating first intent domain
parser1 = IntentBuilder("Parser1").require("Entity1").build()
self.engine.register_intent_parser(parser1, domain='Domain1')
self.engine.register_entity("tree", "Entity1", domain='Domain1')

# Creating second intent domain
parser2 = IntentBuilder("Parser2").require("Entity2").build()
self.engine.register_intent_parser(parser2, domain="Domain2")
self.engine.register_entity("house", "Entity2", domain="Domain2")

self.engine.drop_intent_parser(domain="Domain2",
parser_names=['Parser2'])
self.assertEqual(len(self.engine.domains['Domain2'].intent_parsers), 0)

def test_drop_entity_from_domain(self):
"""Test that entity is dropped from domain."""
self.engine.register_domain('Domain1')
self.engine.register_domain('Domain2')

# Creating first intent domain
parser1 = IntentBuilder("Parser1").require("Entity1").build()
self.engine.register_intent_parser(parser1, domain='Domain1')
self.engine.register_entity("tree", "Entity1", domain='Domain1')

# Creating second intent domain
parser2 = IntentBuilder("Parser2").require("Entity2").build()
self.engine.register_intent_parser(parser2, domain="Domain2")
self.engine.register_entity("house", "Entity2", domain="Domain2")

self.assertTrue(self.engine.drop_entity(domain="Domain2",
entity_type='Entity2'))

def testDropRegexEntity(self):
self.engine.register_domain("Domain1")
self.engine.register_domain("Domain2")

self.engine.register_regex_entity(r"the dog (?P<Dog>.*)",
"Domain1")
self.engine.register_regex_entity(r"the cat (?P<Cat>.*)",
"Domain2")
self.assertTrue(self.engine.drop_regex_entity(domain='Domain2',
entity_type='Cat'))
self.assertFalse(self.engine.drop_regex_entity(domain='Domain1',
entity_type='Cat'))
107 changes: 107 additions & 0 deletions test/IntentEngineTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,110 @@ def testSelectBestIntent(self):
intent = next(self.engine.determine_intent(utterance))
assert intent
assert intent['intent_type'] == 'Parser2'

def testDropIntent(self):
parser1 = IntentBuilder("Parser1").require("Entity1").build()
self.engine.register_intent_parser(parser1)
self.engine.register_entity("tree", "Entity1")

parser2 = (IntentBuilder("Parser2").require("Entity1")
.require("Entity2").build())
self.engine.register_intent_parser(parser2)
self.engine.register_entity("house", "Entity2")

utterance = "go to the tree house"

intent = next(self.engine.determine_intent(utterance))
assert intent
assert intent['intent_type'] == 'Parser2'

assert self.engine.drop_intent_parser('Parser2') is True
intent = next(self.engine.determine_intent(utterance))
assert intent
assert intent['intent_type'] == 'Parser1'

def testDropEntity(self):
parser1 = IntentBuilder("Parser1").require("Entity1").build()
self.engine.register_intent_parser(parser1)
self.engine.register_entity("laboratory", "Entity1")
self.engine.register_entity("lab", "Entity1")

utterance = "get out of my lab"
utterance2 = "get out of my laboratory"
intent = next(self.engine.determine_intent(utterance))
assert intent
assert intent['intent_type'] == 'Parser1'

intent = next(self.engine.determine_intent(utterance2))
assert intent
assert intent['intent_type'] == 'Parser1'

# Remove Entity and re-register laboratory and make sure only that
# matches.
self.engine.drop_entity(entity_type='Entity1')
self.engine.register_entity("laboratory", "Entity1")

# Sentence containing lab should not produce any results
with self.assertRaises(StopIteration):
intent = next(self.engine.determine_intent(utterance))

# But sentence with laboratory should
intent = next(self.engine.determine_intent(utterance2))
assert intent
assert intent['intent_type'] == 'Parser1'

def testCustomDropEntity(self):
parser1 = (IntentBuilder("Parser1").one_of("Entity1", "Entity2")
.build())
self.engine.register_intent_parser(parser1)
self.engine.register_entity("laboratory", "Entity1")
self.engine.register_entity("lab", "Entity2")

utterance = "get out of my lab"
utterance2 = "get out of my laboratory"
intent = next(self.engine.determine_intent(utterance))
assert intent
assert intent['intent_type'] == 'Parser1'

intent = next(self.engine.determine_intent(utterance2))
assert intent
assert intent['intent_type'] == 'Parser1'

def matcher(data):
return data[1].startswith('Entity')

self.engine.drop_entity(match_func=matcher)
self.engine.register_entity("laboratory", "Entity1")

# Sentence containing lab should not produce any results
with self.assertRaises(StopIteration):
intent = next(self.engine.determine_intent(utterance))

# But sentence with laboratory should
intent = next(self.engine.determine_intent(utterance2))
assert intent

def testDropRegexEntity(self):
self.engine.register_regex_entity(r"the dog (?P<Dog>.*)")
self.engine.register_regex_entity(r"the cat (?P<Cat>.*)")
assert len(self.engine._regex_strings) == 2
assert len(self.engine.regular_expressions_entities) == 2
self.engine.drop_regex_entity(entity_type='Cat')
assert len(self.engine._regex_strings) == 1
assert len(self.engine.regular_expressions_entities) == 1

def testCustomDropRegexEntity(self):
self.engine.register_regex_entity(r"the dog (?P<SkillADog>.*)")
self.engine.register_regex_entity(r"the cat (?P<SkillACat>.*)")
self.engine.register_regex_entity(r"the mangy dog (?P<SkillBDog>.*)")
assert len(self.engine._regex_strings) == 3
assert len(self.engine.regular_expressions_entities) == 3

def matcher(regexp):
"""Matcher for all match groups defined for SkillB"""
match_groups = regexp.groupindex.keys()
return any([k.startswith('SkillB') for k in match_groups])

self.engine.drop_regex_entity(match_func=matcher)
assert len(self.engine._regex_strings) == 2
assert len(self.engine.regular_expressions_entities) == 2
53 changes: 53 additions & 0 deletions test/TrieTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,59 @@ def test_edit_distance_no_confidence(self):
results = list(trie.gather("of the big bang theory"))
assert len(results) == 0

def test_remove(self):
trie = Trie(max_edit_distance=2)
trie.insert("1", "Number")
trie.insert("2", "Number")
trie.remove("2")

one_lookup = list(trie.gather("1"))
two_lookup = list(trie.gather("2"))
assert len(one_lookup) == 1 # One match found
assert len(two_lookup) == 0 # Zero matches since removed

def test_remove_multi_last(self):
trie = Trie(max_edit_distance=2)
trie.insert("Kermit", "Muppets")
trie.insert("Kermit", "Frogs")
kermit_lookup = list(trie.lookup("Kermit"))[0]
assert 'Frogs' in kermit_lookup['data']
assert 'Muppets' in kermit_lookup['data']

trie.remove("Kermit", "Frogs")

kermit_lookup = list(trie.gather("Kermit"))[0]
assert kermit_lookup['data'] == {"Muppets"} # Right data remains

def test_remove_multi_first(self):
trie = Trie(max_edit_distance=2)
trie.insert("Kermit", "Muppets")
trie.insert("Kermit", "Frogs")
kermit_lookup = list(trie.lookup("Kermit"))[0]
assert 'Frogs' in kermit_lookup['data']
assert 'Muppets' in kermit_lookup['data']

trie.remove("Kermit", "Muppets")

kermit_lookup = list(trie.lookup("Kermit"))[0]
assert kermit_lookup['data'] == {"Frogs"} # Right data remains

def test_scan(self):
trie = Trie(max_edit_distance=2)
trie.insert("Kermit", "Muppets")
trie.insert("Gonzo", "Muppets")
trie.insert("Rowlf", "Muppets")
trie.insert("Gobo", "Fraggles")

def match_func(data):
return data == "Muppets"

results = trie.scan(match_func)
assert len(results) == 3
muppet_names = [r[0] for r in results]
assert "Kermit" in muppet_names
assert "Gonzo" in muppet_names
assert "Rowlf" in muppet_names

def tearDown(self):
pass

0 comments on commit 4260a85

Please sign in to comment.