-
Notifications
You must be signed in to change notification settings - Fork 1
/
packtfree.py
63 lines (52 loc) · 3.11 KB
/
packtfree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- coding: utf-8 -*-
#
# packtfree_telegram_bot - Receive Packt Publishing Ltd. Free Learning updates in Telegram each day
# Copyright (c) 2016-2020 Emanuele Cipolla <[email protected]>
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"),. to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,. and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY,. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER. LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import html2text
import pickle
import os.path
# This URL can (will?) change:
_FREE_LEARNING_PAGE = "https://packtpub.com/free-learning"
_BOOK_INFO_PICKLE_FILE = 'book_info.pickle'
def get_book_info(force=False):
if not os.path.isfile(_BOOK_INFO_PICKLE_FILE) or force is True:
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('disable-gpu')
driver = webdriver.Chrome(chrome_options=options)
driver.get(_FREE_LEARNING_PAGE)
driver.implicitly_wait(10) # allow for sluggish connection
try:
product_img = driver.find_element_by_class_name("product__img")
image_url = product_img.get_attribute('src')
title = product_img.get_attribute('alt')
product_info = driver.find_element_by_class_name("product__info")
description = html2text.html2text(product_info.get_attribute('innerHTML').replace("\n", "").replace("\r", ""))
book_info_dict = dict(error=False, title=title, image=image_url, description=description)
except NoSuchElementException:
try:
message = driver.find_element_by_class_name("message")
book_info_dict = dict(error='warning' not in message.get_attribute('class').split(' '),
title=None, image=None, description=message)
except NoSuchElementException:
book_info_dict = dict(error=True, title=None, image=None, description='Unexpected changes in page structure')
driver.quit()
with open(_BOOK_INFO_PICKLE_FILE, 'wb') as handle:
pickle.dump(book_info_dict, handle)
else:
with open(_BOOK_INFO_PICKLE_FILE, 'rb') as handle:
book_info_dict = pickle.load(handle)
return book_info_dict