Content-Length: 309233 | pFad | http://github.com/cwjokaka/ok_ip_proxy_pool/commit/843d95212f856489baff4ba687930612eebc7a50

9E Revert "upd" · cwjokaka/ok_ip_proxy_pool@843d952 · GitHub
Skip to content

Commit

Permalink
Revert "upd"
Browse files Browse the repository at this point in the history
This reverts commit 532e147
  • Loading branch information
cwjokaka committed Sep 14, 2019
1 parent 532e147 commit 843d952
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 207 deletions.
22 changes: 5 additions & 17 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
import asyncio
import typing
from asyncio import AbstractEventLoop

from src.database.memory_db import db_collection
from src.entity.proxy_entity import ProxyEntity
from src.spider.spiders import spider_collection
from setting import SPIDER_LIST, DB_CONFIG
from threading import Thread


def crawl(event_loop: AbstractEventLoop):
def crawl():
proxies = []
for spider_name in SPIDER_LIST:
proxies.extend(spider_collection[spider_name].crawl(event_loop))
proxies.extend(spider_collection[spider_name].crawl())
return proxies


Expand All @@ -22,21 +19,12 @@ def save(proxies: typing.List[ProxyEntity]):
db.set(f'{proxy.ip}:{proxy.port}', proxy)


def start_event_loop(loop):
def init_loop(_loop):
asyncio.set_event_loop(_loop)
_loop.run_forever()
loop_thread = Thread(target=init_loop, args=(loop,))
loop_thread.setDaemon(True)
loop_thread.start()


if __name__ == '__main__':
new_loop = asyncio.new_event_loop()
start_event_loop(new_loop)

if __name__ == '__main__':
proxies = crawl()
# 爬取
proxies = crawl(new_loop)
save(proxies)
# 持久化
# save(proxies)
print()
4 changes: 2 additions & 2 deletions setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

SPIDER_LIST = [
'Spider66Ip',
# 'SpiderQuanWangIp',
# 'SpiderXiciIp'
'SpiderQuanWangIp',
'SpiderXiciIp'
]

# 爬虫请求头
Expand Down
20 changes: 7 additions & 13 deletions src/spider/abs_spider.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import asyncio
from asyncio import AbstractEventLoop
from collections import Coroutine
from typing import List, Iterable
from typing import List

from src.entity.proxy_entity import ProxyEntity

Expand All @@ -11,17 +8,14 @@ class AbsSpider(object):
def __init__(self, name='unknown') -> None:
self._name = name

def crawl(self, event_loop: AbstractEventLoop):
def crawl(self):
print(f'{self._name}开始爬取...')
# self.do_crawl()
# print(type(self.do_crawl()))
# print(isinstance(self.do_crawl(), Coroutine))
self.do_crawl(event_loop)
# print(f'{self._name}爬取完毕!共:{len(res)}个代理')
res = self.do_crawl()
print(f'{self._name}爬取完毕!共:{len(res)}个代理')
# todo 持久化到数据库

# return ful.result()
return res

def do_crawl(self, event_loop) -> Iterable[ProxyEntity]:
raise NotImplementedError
def do_crawl(self) -> List[ProxyEntity]:
raise RuntimeError('do_crawl方法没有实现!')

Loading

0 comments on commit 843d952

Please sign in to comment.








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/cwjokaka/ok_ip_proxy_pool/commit/843d95212f856489baff4ba687930612eebc7a50

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy