Python 中常见的几种下载文件方法

  • 下载比较小的文件:

# 方法一:使用urllib库
# -*- coding:utf-8 -*-
import urllib
import time

url = 'http://mvideo.spriteapp.cn/video/2017/0414' \
      '/697de826-20b5-11e7-9c72-1866daeb0df1cut_wpcco.mp4'
print "downloading with urllib"
start = time.time()
urllib.urlretrieve(url, "video.mp4")
end = time.time()
print 'Finish in :', end - start

# 方法二:使用urllib2库
# -*- coding:utf-8 -*-
import urllib2
import time

url = 'http://mvideo.spriteapp.cn/video/2017/0414/' \
      '697de826-20b5-11e7-9c72-1866daeb0df1cut_wpcco.mp4'

print "downloading with urllib2"
start = time.time()
data = urllib2.urlopen(url).read()
with open('video.mp4', 'wb') as video:
    video.write(data)
end = time.time()
print 'Finish in :', end - start

# 方法三:使用requests库
# -*- coding:utf-8 -*-
import requests
import time

url = 'http://mvideo.spriteapp.cn/video/2017/0414/' \
      '697de826-20b5-11e7-9c72-1866daeb0df1cut_wpcco.mp4'

print "downloading with requests"
start = time.time()
r = requests.get(url)
with open('video.mp4', 'wb') as video:
    video.write(r.content)
end = time.time()
print 'Finish in :', end - start

  • 下载比较大的文件:

# 方法一:使用urllib2库
# -*- coding:utf-8 -*-
import urllib2
import time

url = 'http://mvideo.spriteapp.cn/video/2017/0414/' \
      '697de826-20b5-11e7-9c72-1866daeb0df1cut_wpcco.mp4'

r = urllib2.Request(url)
u = urllib2.urlopen(r)
start = time.time()
with open('video.mp4', 'w') as f:
    while True:
        tmp = u.read(1024)
        if not tmp:
            break
        f.write(tmp)
end = time.time()
print 'Finish in :', end - start
# 方法二:使用requests库
# -*- coding:utf-8 -*-
import requests
import time
url = 'http://mvideo.spriteapp.cn/video/2017/0414/' \
      '697de826-20b5-11e7-9c72-1866daeb0df1cut_wpcco.mp4'
# 当把get函数的stream参数设置成False时,
# 它会立即开始下载文件并放到内存中,如果文件过大,有可能导致内存不足。

# 当把get函数的stream参数设置成True时,它不会立即开始下载,
# 使用iter_content或iter_lines遍历内容或访问内容属性时才开始下载
r = requests.get(url, stream=True)
f = open("file_path", "wb")
start = time.time()
for chunk in r.iter_content(chunk_size=1024):
    if chunk:
        f.write(chunk)
        f.flush()
# iter_content:一块一块的遍历要下载的内容
# iter_lines:一行一行的遍历要下载的内容
# 这两个函数下载大文件可以防止占用过多的内存,因为每次只下载小部分数据
end = time.time()
print 'Finish in :', end - start

    原文作者:東飛
    原文地址: https://www.jianshu.com/p/521a8a37db9c
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞