先上代码
#coding=utf-8
import requests
from bs4 import Tag
from bs4 import BeautifulSoup
def getHtml(url):
page = requests.get(url)
html =page.text
return html
def getText(html):
get_text = Tag.get_text
soup = BeautifulSoup(html, 'html.parser')
author_info = soup.find_all('div', class_='atl-info')
listauthor = [x.get_text() for x in author_info]
list_info = soup.find_all('div', class_='bbs-content')
listtext = [x.get_text() for x in list_info]
global i
if i > 1:
listtext = [""] + listtext
for x in range(len(listauthor)):
if "楼主" in listauthor[x]:
print (listtext[x].strip())
if __name__=='__main__':
for i in range(1,6):
url = ("http://bbs.tianya.cn/post-feeling-4286798-%s.shtml" % str(i))
html = getHtml(url)
getText(html)
刚学Python不到一个月,代码写的有点乱,以后优化。