python mongodb爬取58网站

__author__ = 'Lee'

from bs4 import BeautifulSoup
import requests
'''
用这个爬取58中二手的分栏

'''
start_url = 'http://bj.58.com/sale.shtml' #开始的页面
url_host = 'http://bj.58.com'

def get_channel_urls(url):
    wb_data = requests.get(url)
    soup = BeautifulSoup(wb_data.text,'lxml')
    links = soup.select('ul.ym-submnu > li > b > a')
    for link in links:
       page_url = url_host + link.get('href')
       print(page_url)
    原文作者:宁静消失何如
    原文地址: https://www.jianshu.com/p/6055a792c1db
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞