From 9d408cade1a79d08aef1a7b8eb7601ca2bd5c53a Mon Sep 17 00:00:00 2001 From: Benjamin Schmidt Date: Mon, 10 Aug 2015 14:48:08 -0400 Subject: [PATCH 1/4] returning day-level resolution --- bookwormDB/MetaParser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bookwormDB/MetaParser.py b/bookwormDB/MetaParser.py index 599f6d6..0a88030 100644 --- a/bookwormDB/MetaParser.py +++ b/bookwormDB/MetaParser.py @@ -3,6 +3,7 @@ import dateutil.parser import json import sys +import logging fields_to_derive = [] fields = [] @@ -174,8 +175,13 @@ def ParseJSONCatalog(target="default",source = "default"): time = int(inttime/7)*7 #Not starting on Sunday or anything funky like that. Actually, I don't know what we're starting on. Adding an integer here would fix that. line[k] = time + elif derive['resolution'] == 'day': + k = "%s_day" % field['field'] + dt = date(intent[0], intent[1], intent[2]) + inttime = DaysSinceZero(dt) + line[k] = inttime else: - sys.stderr.write('Resolution currently not supported.\n') + logging.warning('Resolution %s currently not supported.' %(derive['resolution'])) continue except ValueError: # One of out a million Times articles threw this with From 1bd318dffc836b1c96d1deda5643ad65ae238fd7 Mon Sep 17 00:00:00 2001 From: Benjamin Schmidt Date: Mon, 10 Aug 2015 14:48:32 -0400 Subject: [PATCH 2/4] seeking default directory --- bookwormDB/manager.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/bookwormDB/manager.py b/bookwormDB/manager.py index 6a0cd3d..35bfe77 100644 --- a/bookwormDB/manager.py +++ b/bookwormDB/manager.py @@ -26,6 +26,14 @@ class BookwormManager(object): def __init__(self,cnf_file="bookworm.cnf",database=None,user=None,password=None): # This will likely be changed if it isn't None. import ConfigParser + + self.basedir = None + for i in range(10): + basedir = "../"*i + if os.path.exists(basedir + ".bookworm"): + self.basedir = basedir + if self.basedir==None: + logging.debug("No bookworm directory found; proceeding on nonetheless.") self.dbname=database @@ -196,9 +204,9 @@ def extension(self,args): Creates (or updates) an extension """ - if not os.path.exists("extensions"): - os.makedirs("extensions") - my_extension = Extension(args) + if not os.path.exists(self.basedir + ".bookworm/extensions"): + os.makedirs(self.basedir + ".bookworm/extensions") + my_extension = Extension(args,basedir = self.basedir) my_extension.clone_or_pull() my_extension.make() @@ -408,9 +416,9 @@ class Extension(object): they are build using `make`. """ - def __init__(self,args): + def __init__(self,args,basedir="./"): self.args = args - self.dir = "extensions/" + re.sub(".*/","",self.args.url) + self.dir = basedir + ".bookworm/extensions/" + re.sub(".*/","",self.args.url) def clone_or_pull(self): if not os.path.exists(self.dir): From afac6691d6872a1c685057cfc134dd2cdbb39523 Mon Sep 17 00:00:00 2001 From: Benjamin Schmidt Date: Mon, 10 Aug 2015 14:49:07 -0400 Subject: [PATCH 3/4] tweaks to test suite --- test_bookworm/testAPI.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test_bookworm/testAPI.py b/test_bookworm/testAPI.py index 56c6c43..bf1aade 100644 --- a/test_bookworm/testAPI.py +++ b/test_bookworm/testAPI.py @@ -21,7 +21,7 @@ def test_server_connection(self): which isn't strictly necessary for a bookworm to be built. """ - def mtest_bookworm_creation(self): + def test_bookworm_creation(self): """ Creates a test bookworm. Removes any existing databases called "federalist_bookworm" """ @@ -110,7 +110,8 @@ class Dummy: field_descriptions = None # Test the guessing at field_descriptions while we're at it import os os.chdir("/tmp/federalist/federalist-bookworm-master") - manager.add_metadata(Dummy) + manager.add_metadata(Dummy) + self.assertTrue(1==1) def test_metadata_addition_can_be_retrieved(self): from bookwormDB.general_API import SQLAPIcall as SQLAPIcall @@ -121,13 +122,16 @@ def test_metadata_addition_can_be_retrieved(self): query = { "database":"federalist_bookworm", "search_limits":{}, - "counttype":"TextPercent", + "counttype":"TextCount", "groups":["oddness"], "method":"return_json" } m = json.loads(SQLAPIcall(query).execute()) + # Even or odd is one of two things. self.assertTrue(len(m)==2) + # Since the first paragraph is even, there should be more of those. + self.assertTrue(m['odd'][0]>=m['even'][0]) From 4450a57c7f5f52ec6c368d7f312ec23349b22cb9 Mon Sep 17 00:00:00 2001 From: Benjamin Schmidt Date: Wed, 19 Aug 2015 14:37:02 -0400 Subject: [PATCH 4/4] Allow trailing blank lines in textid files --- bookwormDB/CreateDatabase.py | 12 +++++++++--- bookwormDB/variableSet.py | 8 ++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/bookwormDB/CreateDatabase.py b/bookwormDB/CreateDatabase.py index 58c502d..9b7e77b 100755 --- a/bookwormDB/CreateDatabase.py +++ b/bookwormDB/CreateDatabase.py @@ -35,11 +35,17 @@ def text_id_dbm(): """ dbm = anydbm.open(".bookworm/texts/textids.dbm","c") for file in os.listdir(".bookworm/texts/textids"): - for line in open(".bookworm/texts/textids/" + file): + for line in open(".bookworm/texts/textids/" + file): line = line.rstrip("\n") splat = line.split("\t") - dbm[splat[1]] = splat[0] - + try: + dbm[splat[1]] = splat[0] + except IndexError: + if line=="": + # It's OK to have a blank line, let's say. + continue + else: + raise class DB: def __init__(self,dbname=None): config = ConfigParser.ConfigParser(allow_no_value=True) diff --git a/bookwormDB/variableSet.py b/bookwormDB/variableSet.py index 7b63306..ab26917 100644 --- a/bookwormDB/variableSet.py +++ b/bookwormDB/variableSet.py @@ -60,8 +60,12 @@ def __init__(self): for filelist in filelists: for line in open(".bookworm/texts/textids/%s" % filelist): parts = line.replace('\n', '').split("\t") - self[parts[1]] = int(parts[0]) - numbers.append(int(parts[0])) + if len(parts)==2: + # Allowing terminal newline. + self[parts[1]] = int(parts[0]) + numbers.append(int(parts[0])) + + self.new = open('.bookworm/texts/textids/new', 'a') self.max = max(numbers)