-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlauncher.py
145 lines (101 loc) · 3.12 KB
/
launcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import asyncio
import atexit
import threading
import time
from multiprocessing.pool import Pool
from blinker import Signal
from smart.log import log
from smart.pipline import Piplines
from smart.runer import CrawStater
from smart.setting import gloable_setting_dict
from smart.signal import reminder
from smart.spider import Spider
from spiders.db.sanicdb import SanicDB
from spiders.govs import GovsSpider, ArticelItem
from spiders.image_spider import ImageSpider
from spiders.ipspider2 import IpSpider3, GovSpider, IpSpider, ApiSpider
from spiders.js.js_spider import JsSpider, Broswer
from spiders.json_spider import JsonSpider
from test import middleware2
piplinestest = Piplines()
@piplinestest.pipline(1)
async def do_pip(spider_ins, item):
await asyncio.sleep(1)
print(f"我是item1111111 {item.results}")
return item
@piplinestest.pipline(2)
async def pip2(spider_ins, item):
print(f"我是item2222222 {item.results}")
await asyncio.sleep(0.5)
return item
@piplinestest.pipline(3)
async def pip3(spider_ins, item):
print(f"我是item33333 {item.results}")
return item
db = SanicDB('localhost', 'testdb', 'root', 'root',
minsize=5, maxsize=55,
connect_timeout=10
)
@atexit.register
def when_end():
global db
if db:
db.close()
@piplinestest.pipline(3)
async def to_mysql_db(spider_ins, item):
if item and isinstance(item, ArticelItem):
print(f"我是item3 入库 {item.results}")
global db
last_id = await db.table_insert("art", item.results)
print(f"last_id {last_id}")
return item
def start1():
starter = CrawStater()
starter.run_single(IpSpider(), middlewire=middleware2, pipline=piplinestest)
@reminder.spider_start.connect
def test1(sender, **kwargs):
print("spider_start1")
return 1222222
@reminder.spider_start.connect
def test221(sender, **kwargs):
print("spider_start2")
return 33333333
@reminder.spider_execption.connect
def test2(sender, **kwargs):
print("spider_execption")
@reminder.spider_close.connect
def tes3t(sender, **kwargs):
print("spider_close")
@reminder.engin_start.connect
def test4(sender, **kwargs):
print("engin_start")
@reminder.engin_idle.connect
def test5(sender, **kwargs):
print("engin_idle")
@reminder.engin_close.connect
def test6(sender, **kwargs):
print("engin_close")
@reminder.request_dropped.connect
def test7(sender, **kwargs):
print("spider_start")
@reminder.request_scheduled.connect
def test8(sender, **kwargs):
print("request_scheduled")
@reminder.response_received.connect
def test9(sender, **kwargs):
print("response_received")
@reminder.response_downloaded.connect
def test10(sender, **kwargs):
print("response_downloaded")
@reminder.item_dropped.connect
def test11(sender, **kwargs):
print("spider_start")
if __name__ == '__main__':
starter = CrawStater()
spider1 = GovsSpider()
spider2 = JsonSpider()
js_spider = JsSpider()
spider = IpSpider()
spider22 = IpSpider()
starter.run_many([spider], middlewire=middleware2, pipline=piplinestest)
# starter.run_many([spider])