-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
79 lines (67 loc) · 2.91 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from flask import Flask, render_template, request
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import logging
from pymongo import MongoClient
logging.basicConfig(filename="scrapper.log", level=logging.INFO)
app = Flask(__name__)
@app.route("/", methods=['GET'])
def homepage():
return render_template("index.html")
@app.route("/review", methods=['POST', 'GET'])
def index():
if request.method == 'POST':
try:
searchString = request.form['content'].replace(" ", "")
flipkart_url = "https://www.flipkart.com/search?q=" + searchString
uClient = uReq(flipkart_url)
flipkartPage = uClient.read()
uClient.close()
flipkart_html = bs(flipkartPage, "html.parser")
bigboxes = flipkart_html.findAll("div", {"class": "_1AtVbE col-12-12"})
del bigboxes[0:3]
box = bigboxes[0]
productLink = "https://www.flipkart.com" + box.div.div.div.a['href']
prodRes = requests.get(productLink)
prodRes.encoding = 'utf-8'
prod_html = bs(prodRes.text, "html.parser")
commentboxes = prod_html.find_all('div', {'class': "_16PBlm"})
reviews = []
for commentbox in commentboxes:
try:
name = commentbox.div.div.find_all('p', {'class': '_2sc7ZR _2V5EHH'})[0].text
except:
logging.info("name")
try:
rating = commentbox.div.div.div.div.text
except:
rating = 'No Rating'
logging.info("rating")
try:
commentHead = commentbox.div.div.div.p.text
except:
commentHead = 'No Comment Heading'
logging.info(commentHead)
try:
comtag = commentbox.div.div.find_all('div', {'class': ''})
custComment = comtag[0].div.text
except Exception as e:
logging.info(e)
mydict = {"Product": searchString, "Name": name, "Rating": rating, "CommentHead": commentHead,
"Comment": custComment}
reviews.append(mydict)
# integrating MongoDB database(local host)
client = MongoClient()
client = MongoClient("mongodb://localhost:27017/")
db = client['review_scrap']
review_collection = db['review_scrap_data']
review_collection.insert_many(reviews)
return render_template('result.html', reviews=reviews[0:(len(reviews) - 1)])
except Exception as e:
logging.error("Error occurred: {}".format(e))
return 'Something went wrong.'
else:
return render_template('index.html')
if __name__ == "__main__":
app.run(host="0.0.0.0")