获取历史天气网上城市历史气温数据

2019年6月16日 263次阅读来源: ccvin

使用Python自带的标准库获取历史天气网（http://lishi.tianqi.com）上城市的历史气温数据，需要设置的是city名，这个需要历史天气网查看，以及需要获取的年份和月份。

# -*- coding: utf-8 -*-
"""
@author: CC
"""

import re
import urllib2
import csv
import time

"""
设置需要爬取的地区，使用该地区的拼音
设置需要爬取的年、月
"""
city = 'wuhan'
years = ['2017']
months = ['06', '07', '08','09']


def getHtml(city, year, month):
    url = 'http://lishi.tianqi.com/' + city + '/' + str(year) + str(month) + '.html'
    print url
    request = urllib2.Request(url)
    request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36')
    response = urllib2.urlopen(request)
    html = response.read()
    return html

def getTemp(html):
    text = "".join(html.split())
    patten = re.compile('<divclass="tqtongji2">(.*?)</div><divstyle="clear:both">')
    table = re.findall(patten, text)
    patten1 = re.compile('<ul>(.*?)</ul>')
    uls = re.findall(patten1, table[0])
    rows = []
    for ul in uls:
        patten2 = re.compile('<li>(.*?)</li>')
        lis = re.findall(patten2, ul)
        time = re.findall('>(.*?)</a>', lis[0])[0]
        high = lis[1]
        low = lis[2]
        rows.append((time, high, low))
    return rows


if __name__ == "__main__":
    with open(city + '.csv', 'wb+') as f:
        writer = csv.writer(f)
        row1 = [('time', 'high', 'low')]
        writer.writerows(row1)
        for year in years:
            for month in months:
                html = getHtml(city, year, month)
                rows = getTemp(html)
                writer.writerows(rows)
                print year + month + ' OK！'
                time.sleep(2)

    原文作者：ccvin
    原文地址: https://blog.csdn.net/ccvin/article/details/81587008
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。