刚刚接触Python,需要学习网页内容抓取,就使用BeautifulSoup练习抓取了中国城市经纬度查询网站所有城市的经纬度信息,并且保存到txt文件,现将代码粘贴如下:
from bs4 import BeautifulSoup
import urllib2
import sys
reload(sys)
sys.setdefaultencoding( “utf-8” )
url = “http://www.bjfewd.com/gnjw/ch_jwd.htm”
content = urllib2.urlopen(url).read()
soup = BeautifulSoup(content)
fp = open(‘text.txt’,’w’)
for link in soup.find_all(‘a’):
content1 = urllib2.urlopen(‘http://www.bjfewd.com/gnjw/’+link.get(‘href’)).read()
soup1 = BeautifulSoup(content1)
i=0
for link1 in soup1.find_all(‘td’):
if i<10:
i=i+1
else:
fp.write(link1.string+’\n’)
print “Done”