PEXELS网站提供海量共享图片素材,图片质量高,可免费用于个人和商业用途。
网址:https://www.pexels.com/网站支持英文关键字,可考虑用有道智云API将中文转变成英文。
采用requests 写入方法爬取图片。
代码如下():
# 网页采用了动态加载,目前一次只抓取15张左右的图片。
import requests
import hashlib
import random
import json
from bs4 import BeautifulSoup
def translate(q):
appKey = 'XXXXX' #应用ID,进行注册后可自动获得
secretKey = 'XXXXX' #应用密钥,进行注册后可自动获得
url = 'http://openapi.youdao.com/api'
fromLang = 'zh-CHS';toLang = 'EN';salt = random.randint(1, 10) #中译英
sign1 = appKey+q+str(salt)+secretKey
sign = hashlib.md5(sign1.encode(encoding='utf-8')).hexdigest()
myurl = url+'?q='+q+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)\
+'&appKey='+appKey+'&sign='+sign
r = requests.get(myurl)
json_data = json.loads(r.text)
result = json_data['web'][0]['value'][0]
return result
def get_image(url):
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3294.6 Safari/537.36'}
r = requests.get(url,headers = headers)
print(r.status_code)
soup = BeautifulSoup(r.text,"lxml")
infos = soup.select("img.photo-item__img")
download_list = []
for info in infos:
image_url = info.get('src')
download_list.append(image_url)
for item in download_list:
res = requests.get(item,headers = headers)
file_name = item.split("?")[0][-10:].strip("-")
with open("F://"+file_name,'wb') as f:
f.write(res.content)
print(file_name)
if __name__ == "__main__":
word = input("请输入您要搜索的中文关键词:")
result = translate(word)
print(result)
url = "https://www.pexels.com/search/" + result
get_image(url)
print("Finished!")