From 53c7790167a18d59fef36c05ee875096b9198e3b Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 2 Jan 2016 21:31:47 +0200 Subject: [PATCH 1/4] named groups --- tests/verbal_expressions_test.py | 8 +++++++ verbalexpressions/verbal_expressions.py | 31 ++++++++++++++----------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/tests/verbal_expressions_test.py b/tests/verbal_expressions_test.py index 5ed52d8..13ee0ab 100644 --- a/tests/verbal_expressions_test.py +++ b/tests/verbal_expressions_test.py @@ -135,3 +135,11 @@ def test_should_match_email_address(self): def test_should_match_url(self): self.exp = self.v.start_of_line().then('http').maybe('s').then('://').maybe('www.').word().then('.').word().maybe('/').end_of_line().regex() self.assertRegexpMatches('https://www.google.com/', self.exp, 'Not a valid email') + + def test_should_find_named_groups(self): + name = "Linus Torvalds" + self.exp = self.v.start_of_line().word(name='first_name').then(' ').word(name='last_name').end_of_line().regex() + match = self.exp.match(name) + self.assertIsNotNone(match) + self.assertTrue(match.group('first_name') == 'Linus') + self.assertTrue(match.group('last_name') == 'Torvalds') diff --git a/verbalexpressions/verbal_expressions.py b/verbalexpressions/verbal_expressions.py index 1450186..4d52d3f 100644 --- a/verbalexpressions/verbal_expressions.py +++ b/verbalexpressions/verbal_expressions.py @@ -2,11 +2,16 @@ def re_escape(fn): - def arg_escaped(this, *args): + def arg_escaped(this, *args, **kwargs): t = [isinstance(a, VerEx) and a.s or re.escape(str(a)) for a in args] - return fn(this, *t) + return fn(this, *t, **kwargs) return arg_escaped + +def group(val, name=None): + prefix = '?P<{0}>'.format(name) if name else '' + return '(' + prefix + val + ')' + class VerEx(object): ''' @@ -50,49 +55,49 @@ def source(self): # --------------------------------------------- - def anything(self): - return self.add('(.*)') + def anything(self, name=None): + return self.add(group('.*', name)) @re_escape def anything_but(self, value): - return self.add('([^' + value + ']*)') + return self.add(group('[^' + value + ']*')) def end_of_line(self): return self.add('$') @re_escape def maybe(self, value): - return self.add("(" + value + ")?") + return self.add(group(value) + "?") def start_of_line(self): return self.add('^') @re_escape - def find(self, value): - return self.add('(' + value + ')') + def find(self, value, name=None): + return self.add(group(value, name)) then = find # special characters and groups @re_escape def any(self, value): - return self.add("([" + value + "])") + return self.add(group("[" + value + "]")) any_of = any def line_break(self): - return self.add(r"(\n|(\r\n))") + return self.add(group(r"\n|(\r\n)")) br = line_break @re_escape def range(self, *args): from_tos = [args[i:i+2] for i in range(0, len(args), 2)] - return self.add("([" + ''.join(['-'.join(i) for i in from_tos]) + "])") + return self.add(group("[" + ''.join(['-'.join(i) for i in from_tos]) + "]")) def tab(self): return self.add(r'\t') - def word(self): - return self.add(r"(\w+)") + def word(self, name=None): + return self.add(group(r"\w+", name)) def OR(self, value=None): ''' `or` is a python keyword so we use `OR` instead. ''' From 73d31976c70d0d0bcda5cf4c482b742cdca993e6 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 2 Jan 2016 21:57:22 +0200 Subject: [PATCH 2/4] number handling --- tests/verbal_expressions_test.py | 16 +++++++++++++--- verbalexpressions/verbal_expressions.py | 7 +++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/verbal_expressions_test.py b/tests/verbal_expressions_test.py index 13ee0ab..2d322d9 100644 --- a/tests/verbal_expressions_test.py +++ b/tests/verbal_expressions_test.py @@ -136,10 +136,20 @@ def test_should_match_url(self): self.exp = self.v.start_of_line().then('http').maybe('s').then('://').maybe('www.').word().then('.').word().maybe('/').end_of_line().regex() self.assertRegexpMatches('https://www.google.com/', self.exp, 'Not a valid email') - def test_should_find_named_groups(self): + def test_should_find_number(self): + self.exp = self.v.start_of_line().number().end_of_line().regex() + self.assertRegexpMatches('123', self.exp, 'Number not found') + + def test_word_should_find_named_groups(self): name = "Linus Torvalds" self.exp = self.v.start_of_line().word(name='first_name').then(' ').word(name='last_name').end_of_line().regex() match = self.exp.match(name) self.assertIsNotNone(match) - self.assertTrue(match.group('first_name') == 'Linus') - self.assertTrue(match.group('last_name') == 'Torvalds') + self.assertEquals(match.group('first_name'), 'Linus') + self.assertEquals(match.group('last_name'), 'Torvalds') + + def test_number_should_find_named_groups(self): + self.exp = self.v.start_of_line().number('number').end_of_line().regex() + match = self.exp.match('123') + self.assertIsNotNone(match, self.exp.pattern) + self.assertEquals(match.group('number'), '123') diff --git a/verbalexpressions/verbal_expressions.py b/verbalexpressions/verbal_expressions.py index 4d52d3f..db77ce9 100644 --- a/verbalexpressions/verbal_expressions.py +++ b/verbalexpressions/verbal_expressions.py @@ -73,8 +73,8 @@ def start_of_line(self): return self.add('^') @re_escape - def find(self, value, name=None): - return self.add(group(value, name)) + def find(self, value): + return self.add(group(value)) then = find # special characters and groups @@ -98,6 +98,9 @@ def tab(self): def word(self, name=None): return self.add(group(r"\w+", name)) + + def number(self, name=None): + return self.add(group(r"\d+", name)) def OR(self, value=None): ''' `or` is a python keyword so we use `OR` instead. ''' From 2bdb6f793a37adc87263f92d5cfdae447ccc8168 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 2 Jan 2016 22:02:54 +0200 Subject: [PATCH 3/4] trailing spaces cleanup --- verbalexpressions/verbal_expressions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verbalexpressions/verbal_expressions.py b/verbalexpressions/verbal_expressions.py index db77ce9..d983105 100644 --- a/verbalexpressions/verbal_expressions.py +++ b/verbalexpressions/verbal_expressions.py @@ -73,7 +73,7 @@ def start_of_line(self): return self.add('^') @re_escape - def find(self, value): + def find(self, value): return self.add(group(value)) then = find From f7ded0d2a1adb11fb24d04fe7e2361b6646587d2 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 2 Jan 2016 22:15:11 +0200 Subject: [PATCH 4/4] Update README.md --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 305c567..40fda2e 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,21 @@ regexp = expression.compile() result_re = regexp.sub('duck', replace_me) print result_re ``` +### Using named groups +```python +name = "Linus Torvalds" +expression = VerEx() + .start_of_line() + .word(name='first_name') + .then(' ') + .word(name='last_name') + .end_of_line() + .regex() +match = self.exp.match(name) + +print(match.group('first_name')) # Linus +print(match.group('last_name')) # Torvalds +``` ### Shorthand for string replace ```python result = VerEx().find('red').replace('We have a red house', 'blue')