Code:
'''
Created on 2018年2月11日
python 3.6
@author: Livon
'''
import urllib.request
import re
url = 'https://gupiao.baidu.com/stock/sz002633.html'
print( 'url: ' + url )
htmlResponse = urllib.request.urlopen( url )
html = htmlResponse.read()
html = html.decode('utf8')
marketCaps = re.findall("<dl><dt>总市值</dt><dd>(.*?)亿</dd></dl>",html)
# 匹配换行
dates = re.findall('<span class="state f-up">(.*?)\s+</span>',html)
for i in range( 0, len( marketCaps )):
print( '总市值:' + marketCaps[i] + ' 亿' )
for i in range( 0, len( dates )):
# print('时间:', end='' )
date = re.sub( " ", "", dates[i] )
print('时间:' + date )
代码摘选
html = html.decode('utf8') # 转码
dates = re.findall('<span class="state f-up">(.*?)\s+</span>',html) # 匹配换行
print('时间:', end='' ) # 不换行
date = re.sub( " ", "", dates[i] ) # 正则替换
output
url: https://gupiao.baidu.com/stock/sz002633.html
总市值:12.69 亿
时间:已休市 2018-02-09 15:00:03