-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlauncher_ds.py
159 lines (113 loc) · 3.47 KB
/
launcher_ds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import asyncio
import atexit
import multiprocessing
import threading
import time
from datetime import datetime
from multiprocessing.pool import Pool
from smart.log import log
from smart.pipline import Piplines
from smart.runer import CrawStater
from smart.setting import gloable_setting_dict
from smart.signal import reminder
from smart.spider import Spider
from spiders.db.sanicdb import SanicDB
from spiders.govs import GovsSpider, ArticelItem
from spiders.image_spider import ImageSpider
from spiders.ipspider2 import IpSpider3, GovSpider, IpSpider, ApiSpider
from spiders.js.js_spider import JsSpider, Broswer
from spiders.json_spider import JsonSpider
from test import middleware2
piplinestest = Piplines()
@piplinestest.pipline(1)
async def do_pip(spider_ins, item):
print(f"我是item1111111 {item.results}")
return item
@piplinestest.pipline(2)
async def pip2(spider_ins, item):
print(f"我是item2222222 {item.results}")
return item
@piplinestest.pipline(3)
async def pip3(spider_ins, item):
print(f"我是item33333 {item.results}")
return item
db = SanicDB('localhost', 'testdb', 'root', 'root',
minsize=5, maxsize=55,
connect_timeout=10
)
@atexit.register
def when_end():
global db
if db:
db.close()
@piplinestest.pipline(3)
async def to_mysql_db(spider_ins, item):
if item and isinstance(item, ArticelItem):
print(f"我是item3 入库 {item.results}")
global db
last_id = await db.table_insert("art", item.results)
print(f"last_id {last_id}")
return item
def start1():
starter = CrawStater()
starter.run_single(IpSpider(), middlewire=middleware2, pipline=piplinestest)
@reminder.spider_start.connect
def rr(sender, **kwargs):
print("spider_start1")
return 1222222
@reminder.spider_start.connect
def gfgfgf(sender, **kwargs):
print("spider_start2")
return 33333333
@reminder.spider_execption.connect
def asa(sender, **kwargs):
print("spider_execption")
@reminder.spider_close.connect
def dfd(sender, **kwargs):
print("spider_close")
@reminder.engin_start.connect
def hhh(sender, **kwargs):
print("engin_start")
@reminder.engin_idle.connect
def ggg(sender, **kwargs):
print("engin_idle")
@reminder.engin_close.connect
def gggggg(sender, **kwargs):
print("engin_close")
@reminder.request_dropped.connect
def rrr(sender, **kwargs):
print("spider_start")
@reminder.request_scheduled.connect
def dsdsds(sender, **kwargs):
print("request_scheduled")
@reminder.response_received.connect
def sasa(sender, **kwargs):
print("response_received")
@reminder.response_downloaded.connect
def yyy(sender, **kwargs):
print("response_downloaded")
@reminder.item_dropped.connect
def xxx(sender, **kwargs):
print("spider_start")
def main():
starter = CrawStater()
spider1 = GovsSpider()
spider2 = JsonSpider()
js_spider = JsSpider()
gloable_setting_dict.update(
duplicate_filter_class="spiders.distributed.AioRedisBaseDuplicateFilter",
scheduler_container_class="spiders.distributed.AioRedisSchuler",
is_single=0,
)
spider = IpSpider()
starter.run_many([spider], middlewire=middleware2, pipline=piplinestest)
if __name__ == '__main__':
start = time.time()
pool = multiprocessing.Pool(4)
for i in range(4):
pool.apply_async(main)
# main()
pool.close()
pool.join()
print(f'结束 花费{time.time() - start}s')
# starter.run_many([spider])