from bs4 import BeautifulSoup
import requests
import time
url = 'https://knewone.com/discover?page='
def get_page(url,data=None):
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
imgs = soup.select('a.cover-inner > img')
titles = soup.select('section.content > h4 > a')
links = soup.select('section.content > h4 > a')
if data==None:
for img, title, link in zip(imgs, titles, links):
data = {
'img': img.get('src'),
'title': title.get('title'),
'link': link.get('href')
}
print(data)
def get_more_pages(start,end):
for one in range(start,end):
get_page(url+str(one))
time.sleep(2)
get_more_pages(1,10)
python爬虫(五)多页码
原文作者:luogan129
原文地址: https://blog.csdn.net/luogan129/article/details/71302486
本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
原文地址: https://blog.csdn.net/luogan129/article/details/71302486
本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。