From dc0b05286002963d97a957faad19fe1a5f102de2 Mon Sep 17 00:00:00 2001 From: Maxim Kolchin Date: Sat, 4 Apr 2015 22:32:49 +0300 Subject: [PATCH] Extended test_Q1.10. Filed an issue #4 --- .../parsers/publication_parser.py | 2 +- .../tests/test_Q1.10/expected.output | 35 +++++++++++++++++++ .../CeurWsParser/tests/test_Q1.10/input.urls | 22 ++++++++++++ .../tests/test_Q1.10/query.sparql | 17 ++++++++- 4 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/input.urls diff --git a/ceur-ws-crawler/CeurWsParser/parsers/publication_parser.py b/ceur-ws-crawler/CeurWsParser/parsers/publication_parser.py index 3b3c3da..e7a3853 100755 --- a/ceur-ws-crawler/CeurWsParser/parsers/publication_parser.py +++ b/ceur-ws-crawler/CeurWsParser/parsers/publication_parser.py @@ -39,7 +39,7 @@ def end_template(self): @staticmethod def is_invited(publication): - if rex.rex(publication['link'], r'.*(keynote|invite).*', re.I, default=None): + if rex.rex(publication['link'], r'.*(keynote|invite|talk|-inv-).*', re.I, default=None): return True else: return False diff --git a/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/expected.output b/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/expected.output index 3da89f2..e2ac5ec 100755 --- a/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/expected.output +++ b/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/expected.output @@ -3,8 +3,43 @@ ,"Mihai Codescu" ,"Christoph Lange" ,"George A. Vouros" +,"Giuseppe De Giacomo" +,"Michel Dumontier" +,"Ian Pratt-Hartmann" ,"Ethan Munson" +,"Peter Murray-Rust" +,"Hannes Mühleisen" +,"Christian Bizer" +,"Peter Mika" +,"Tim Potter" +,"Roberto Navigli" ,"Thomas Eiter" ,"Luciano Serafini" +,"Serge Abiteboul" +,"Piero Bonatti" +,"Alan Rector" +,"Marcelo Arenas" +,"Gert Smolka" +,"Heiner Stuckenschmidt" +,"Leonid Libkin" +,"Kavitha Srinivas" +,"Rajeev Goré" +,"Andrea Cali" +,"Georg Gottlob" +,"Michael Kifer" +,"Renee Miller" +,"Hector Levesque" +,"Alex Borgida" +,"John Mylopoulos" +,"Richard Hull" +,"Patrick Blackburn" +,"Andrei Voronkov" +,"Maurizio Lenzerini" +,"P. Devanbu" +,"C. Goble" +,"D. Harel" +,"Natasha Alechina" +,"Robert MacGregor" +,"Deborah McGuinness" ,"M. Lenzerini" ,"M. Scholl" diff --git a/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/input.urls b/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/input.urls new file mode 100644 index 0000000..0f18b3f --- /dev/null +++ b/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/input.urls @@ -0,0 +1,22 @@ +input_urls = ["http://ceur-ws.org/", +"http://ceur-ws.org/Vol-1085/", +"http://ceur-ws.org/Vol-1081/", +"http://ceur-ws.org/Vol-1014/", +"http://ceur-ws.org/Vol-1008/", +"http://ceur-ws.org/Vol-994/", +"http://ceur-ws.org/Vol-937/", +"http://ceur-ws.org/Vol-936/", +"http://ceur-ws.org/Vol-875/", +"http://ceur-ws.org/Vol-859/", +"http://ceur-ws.org/Vol-846/", +"http://ceur-ws.org/Vol-745/", +"http://ceur-ws.org/Vol-573/", +"http://ceur-ws.org/Vol-477/", +"http://ceur-ws.org/Vol-353/", +"http://ceur-ws.org/Vol-250/", +"http://ceur-ws.org/Vol-81/", +"http://ceur-ws.org/Vol-53/", +"http://ceur-ws.org/Vol-33/", +"http://ceur-ws.org/Vol-22/", +"http://ceur-ws.org/Vol-1/" +] diff --git a/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/query.sparql b/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/query.sparql index 1144808..3979fbd 100755 --- a/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/query.sparql +++ b/ceur-ws-crawler/CeurWsParser/tests/test_Q1.10/query.sparql @@ -2,12 +2,27 @@ SELECT ?proc ?author_name { VALUES ?proc { + + + + + + + + + + + + + + + } ?proc dcterms:hasPart ?pub . ?pub a swc:InvitedPaper . - ?author dc:creator ?pub ; + ?author foaf:made ?pub ; foaf:name ?author_name . }