diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..d9c4529 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +sudo: required +language: python + +python: + - "2.7" + - "3.6" + - "3.7-dev" # 3.7 development branch + +install: + +script: + - "python setup.py install" diff --git a/concurrentfloodscraper/pubsub.py b/concurrentfloodscraper/pubsub.py index ade8488..aeff6ca 100644 --- a/concurrentfloodscraper/pubsub.py +++ b/concurrentfloodscraper/pubsub.py @@ -70,4 +70,4 @@ def pop(self, amount=1): # returns a string of some stats def print_info(self): - print('PubSub: pushed=%s, popped=%s, current_size=%s' % (self.pushed, self.popped, len(self.queue))) + print(('PubSub: pushed=%s, popped=%s, current_size=%s' % (self.pushed, self.popped, len(self.queue)))) diff --git a/concurrentfloodscraper/router.py b/concurrentfloodscraper/router.py index 7a288e3..27087a0 100644 --- a/concurrentfloodscraper/router.py +++ b/concurrentfloodscraper/router.py @@ -15,7 +15,7 @@ def register(cls, regex): # return the scraper class responsible for a given url @staticmethod def route(url): - for regex, cls in RouteManager.paths.items(): + for regex, cls in list(RouteManager.paths.items()): if regex.match(url): return cls diff --git a/concurrentfloodscraper/scraper.py b/concurrentfloodscraper/scraper.py index 1d80d18..e3ec994 100644 --- a/concurrentfloodscraper/scraper.py +++ b/concurrentfloodscraper/scraper.py @@ -18,13 +18,13 @@ def __init__(self, url): # main function. returns new_urls. any data is the responsibility of subclasses def parse(self): - print('Parsing %s' % self.url) + print(('Parsing %s' % self.url)) # get text try: text = self.load_page() except requests.exceptions.RequestException as e: - print('Error loading "%s". Error is %s' % (self.url, e)) + print(('Error loading "%s". Error is %s' % (self.url, e))) return [''] # no new urls # subclass does their stuff @@ -32,7 +32,7 @@ def parse(self): # get new urls, and filter. return those to worker all_urls = self.parse_all_urls(text) - new_urls = list(filter(lambda x: self.url_filter_regex.match(x), all_urls)) + new_urls = list([x for x in all_urls if self.url_filter_regex.match(x)]) return new_urls # get html code from url