Skip to content

Commit

Permalink
Upgrade scrapy to version 2.2.0 (#47)
Browse files Browse the repository at this point in the history
* Remove custom context factory

Update scrapy to latest version

* Fix Changelog after update
  • Loading branch information
renehernandez authored Jul 7, 2020
1 parent 8e34402 commit 2e14dbe
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ verify_ssl = true
name = "pypi"

[packages]
Scrapy = "==1.7.*"
Scrapy = "==2.2.0"
selenium = "==3.141.0"
pytest = "==5.4.3"
meilisearch = "==0.11.2"
Expand Down
74 changes: 65 additions & 9 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions scraper/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from .custom_downloader_middleware import CustomDownloaderMiddleware
from .custom_dupefilter import CustomDupeFilter
from .config.browser_handler import BrowserHandler
from .scrapy_patch import CustomContextFactory

try:
# disable boto (S3 download)
Expand Down Expand Up @@ -46,7 +45,6 @@ def run_config(config):

root_module = 'src.' if __name__ == '__main__' else 'scraper.src.'
DOWNLOADER_MIDDLEWARES_PATH = root_module + 'custom_downloader_middleware.' + CustomDownloaderMiddleware.__name__
DOWNLOADER_CLIENTCONTEXTFACTORY = root_module + 'scrapy_patch.' + CustomContextFactory.__name__
DUPEFILTER_CLASS_PATH = root_module + 'custom_dupefilter.' + CustomDupeFilter.__name__

headers = {
Expand Down Expand Up @@ -78,7 +76,6 @@ def run_config(config):
'USER_AGENT': config.user_agent,
'DOWNLOADER_MIDDLEWARES': {DOWNLOADER_MIDDLEWARES_PATH: 900},
# Need to be > 600 to be after the redirectMiddleware
'DOWNLOADER_CLIENTCONTEXTFACTORY': DOWNLOADER_CLIENTCONTEXTFACTORY,
'DUPEFILTER_USE_ANCHORS': config.use_anchors,
# Use our custom dupefilter in order to be scheme agnostic regarding link provided
'DUPEFILTER_CLASS': DUPEFILTER_CLASS_PATH,
Expand Down
14 changes: 0 additions & 14 deletions scraper/src/scrapy_patch.py

This file was deleted.

0 comments on commit 2e14dbe

Please sign in to comment.