爬取网站:http://www.xici.net.co/nn
获取网络代理,将代理写入本地文件,并保存
import requests
from bs4 import BeautifulSoup
#import MySQLdb
#import MySQLdb.cursors
import sys
reload(sys)
sys.setdefaultencoding('utf8')
proxy_info = []
filename = "proxy.txt"
url = 'http://www.xici.net.co/nn'
page_code = requests.get(url).text
soup = BeautifulSoup(page_code)
table_soup = soup.find('table')
proxy_list = table_soup.findAll('tr')[1:]
for tr in proxy_list:
td_list = tr.findAll('td')
ip = td_list[2].string
port = td_list[3].string
ip = ip + ':' +port
proxy_info.append(ip)
f = open(filename,'w')
for i in proxy_info:
f.write(i + '\n')
f.close()