Content-Length: 453165 | pFad | http://github.com/cwjokaka/ok_ip_proxy_pool/commit/290e01badb12cf2f5a6856de6a3984612baaf54c

15 first commit · cwjokaka/ok_ip_proxy_pool@290e01b · GitHub
Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
cwjokaka committed Sep 5, 2019
1 parent 1c21566 commit 290e01b
Show file tree
Hide file tree
Showing 15 changed files with 115 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,6 @@ venv.bak/

# mypy
.mypy_cache/

.idea
idea/
4 changes: 4 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from src.spider.spider_66_ip import Spider66Ip

if __name__ == '__main__':
Spider66Ip().crawl()
5 changes: 5 additions & 0 deletions setting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from src.spider import Spider66Ip

DB_TYPE = 'memory' # memory/redis

SPIDER_LIST = [Spider66Ip]
Empty file added src/__init__.py
Empty file.
Empty file added src/database/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions src/database/abs_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class AbsDatabase(object):

def put(self, key, value):
raise RuntimeError('该put方法未实现!')

def get(self, key):
raise RuntimeError('该get方法未实现!')

def remove(self, key):
raise RuntimeError('该remove方法未实现!')
18 changes: 18 additions & 0 deletions src/database/memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from src.database.abs_database import AbsDatabase


class Memory(AbsDatabase):
"""
数据库:基于内存实现
"""
def __init__(self) -> None:
self._box = {}

def put(self, key, value):
self._box[key] = value

def get(self, key):
return self._box[key]

def remove(self, key):
return self._box.pop(key, None)
Empty file added src/entity/__init__.py
Empty file.
38 changes: 38 additions & 0 deletions src/entity/proxy_entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
class ProxyEntity(object):

def __init__(self, ip, port, source='', type='', check_count=0, region='', last_check_time=None):
self._ip = ip
self._port = port
self._source = source
self._type = type
self._check_count = check_count
self._region = region
self._last_check_time = last_check_time

@property
def ip(self):
return self._ip

@property
def port(self):
return self._port

@property
def source(self):
return self._source

@property
def type(self):
return self._type

@property
def check_count(self):
return self._check_count

@property
def region(self):
return self._region

@property
def last_check_time(self):
return self._last_check_time
Empty file added src/exception/__init__.py
Empty file.
4 changes: 4 additions & 0 deletions src/exception/ok_proxy_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class OkProxyError(RuntimeError):

def __init__(self, msg) -> None:
self._msg = msg
Empty file added src/spider/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions src/spider/abs_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class AbsSpider(object):

def __init__(self, name='unknown') -> None:
self._name = name

def crawl(self):
print('开始爬取...')
self.do_crawl()
print('爬取完毕!')

def do_crawl(self):
raise RuntimeError('do_crawl方法没有实现!')
21 changes: 21 additions & 0 deletions src/spider/spider_66_ip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import requests

from src.spider.abs_spider import AbsSpider
from bs4 import BeautifulSoup

class Spider66Ip(AbsSpider):
"""
66IP代理爬虫
http://www.66ip.cn/
"""
def __init__(self) -> None:
super().__init__('66IP代理爬虫')
self._base_url = 'http://www.66ip.cn'

def do_crawl(self):
for page in range(1, 2):
resp = requests.get(f'{self._base_url}/{page}.html')
resp.encoding = 'gb2312'
soup = BeautifulSoup(resp.text, 'lxml')
print(soup.find('table', attrs={'width': '100%', 'bordercolor': '#6699ff'}).find_all('tr'))
# print(resp.text)
Empty file added test/__init__.py
Empty file.

0 comments on commit 290e01b

Please sign in to comment.








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/cwjokaka/ok_ip_proxy_pool/commit/290e01badb12cf2f5a6856de6a3984612baaf54c

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy