阿里试用排序
抱歉,之前稀里糊涂把配置文件给 ignore 了,已修复,抱歉
远景提纲
说来几乎丢尽了钢铁直男的脸,没错,昨晚我在兴奋的做着外包的活(中国移动的小顺序,自由职业,喂),11点多了,女友倏忽头脑一抽:“你能不能帮我把这个玩意排序一下给我用啊,我好薅点羊毛,手艺能完成嘛?”
我比较无法的看了看,阿里试用咩?什么鬼,哦哦哦,就这玩意啊,爬虫爬一下就是了。我是前端……
回道:“没题目啊,爬虫呗。”
她:“哇,多久能做出来啊?”
我:“我现在在忙诶,1-2小时吧。”
她:“行了,你别忙了,赶忙帮我弄一下出来!”
我看了看她的脸,羞辱的最小化《微信开发者东西》。。。
页面展现
你如果以为这也是广告,那真是太提拔我了。
爬虫搞起来
NodeJS 爬虫,百度一下,随处都是现成的代码,我也就不一一剖析了,拿出简书的一段代码,来自 埃米莉Emily:
const express = require('express');
// 挪用 express 实例,它是一个函数,不带参数挪用时,会返回一个 express 实例,将这个变量给予 app 变量。
const superagent = require('superagent');
const cheerio = require('cheerio');
const app = express();
app.get('/', (req, res, next) => {
console.log(req)
superagent.get('https://www.v2ex.com/')
.end((err, sres) => {
// 通例的错误处理
if (err) {
return next(err);
}
// sres.text 内里存储着网页的 html 内容,将它传给 cheerio.load 以后
// 就能够获得一个完成了 jquery 接口的变量,我们习气性地将它命名为 `$`
// 剩下就都是 jquery 的内容了
let $ = cheerio.load(sres.text);
let items = [];
$('.item_title a').each((idx, element) => {
let $element = $(element);
items.push({
title: $element.text(),
href: $element.attr('href')
});
});
res.send(items);
});
});
app.listen(3000, function () {
console.log('app is listening at port 3000');
});
嘛,express 用 NodeJS 的不可能不知道,superagent 明白成能够在 Node 内里做对外要求即可,cheerio 嗯,Node 专用 JQ。
首爬
把上面的要求地点换成:https://try.taobao.com/
,检察页面标签构造,找到想要的选择器构造:
.tb-try-wd-item-info > .detail
,把这个替代上面选择器 .item_title a
,走起:
……我不想展现效果,由于只要六个,页面现实展现是 10 个,找了半天,发明两个题目:
如上,第一个是爬到的 6 个是引荐,喵的,不是下面列表;
第二个,下面列表是背面经由过程 POST 零丁要求来的数据,怎样看都是某框架的 SSR 干的功德。
因而爬虫不成,得换计谋。
模仿 POST
OK,既然是 POST,就好弄了,直接把衔接跟参数刨出来,然后 superagent 模仿:
superagent
.post(
`https://try.taobao.com/api3/call?what=show&page=${paylaod.page}&pageSize&api=x%2Fsearch`
)
.set('content-type', 'application/x-www-form-urlencoded; charset=UTF-8')
.end((err, sres) => {
// 通例的错误处理
if (err) {
return next(err)
}
const result = JSON.parse(sres.text).result // 返回构造树
resolve(result)
})
content-type 源自:
哼哼哼,你没猜错,失利了,以下:
想一想是必定的,怎样可能给你随意要求呢,然后该怎样做?研讨?nonono,老汉上来就是一梭子,不就是 Content-Type 么!
superagent
.post(
`https://try.taobao.com/api3/call?what=show&page=${paylaod.page}&pageSize&api=x%2Fsearch`
)
.set(
'user-agent',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
)
.set('accept', 'pplication/json, text/javascript, */*; q=0.01')
.set('accept-encoding', 'gzip, deflate, br')
.set(
'accept-language',
'zh-CN,zh;q=0.9,en;q=0.8,la;q=0.7,zh-TW;q=0.6,da;q=0.5'
)
// .set('content-length', '8')
.set('content-type', 'application/x-www-form-urlencoded; charset=UTF-8')
.set(
'cookie',
'your cookie'
)
.set('origin', 'https://try.taobao.com')
.set('referer', 'https://try.taobao.com')
.set('x-csrf-token', 'f0b8e7443eb7e')
.set('x-requested-with', 'XMLHttpRequest')
.end((err, sres) => {
// 通例的错误处理
if (err) {
return next(err)
}
const result = JSON.parse(sres.text).result
resolve(result)
})
根据就是下面这个:
不就是头么,不就是源么,不就是用户代办么,用个 HTTPS 还没有你方法了?
注重上面 .set('content-length', '8')
,不知道那里怎样玩,加上这个就超时……
因而,交卸了吧:
{
"pages": {
"paging": {
"n": 2182,
"page": 1,
"pages": 219
},
"items": [
{
"shopUserId": "2450112357",
"title": "凯度高端款嵌入式蒸烤箱",
"status": 1,
"totalNum": 1,
"requestNum": 15530,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "casdon凯度旗舰店",
"showId": "2561626",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34530215",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1ycS2eMDqK1RjSZSyXXaxEVXa.jpg",
"shopItemId": "559771706359",
"price": 13850
},
{
"shopUserId": "3189770892",
"title": "皇家美素佳儿老包装2段400g",
"status": 1,
"totalNum": 50,
"requestNum": 2079,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "皇家美素佳儿旗舰店",
"showId": "2551240",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34396042",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1YrSZaVYqK1RjSZLeXXbXppXa.jpg",
"shopItemId": "547114874458",
"price": 189
},
{
"shopUserId": "1077716829",
"title": "关注商号优先审水暗码幻彩断绝",
"status": 1,
"totalNum": 10,
"requestNum": 6907,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "水暗码旗舰店",
"showId": "2568391",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34784086",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB16_4ChmzqK1RjSZPxXXc4tVXa.jpg",
"shopItemId": "559005882880",
"price": 599
},
{
"shopUserId": "725786863",
"title": "佳构皮草派克大衣",
"status": 1,
"totalNum": 1,
"requestNum": 11793,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "美瑞蓓特",
"showId": "2557886",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34574078",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1zVLMdCrqK1RjSZK9XXXyypXa.jpg",
"shopItemId": "577418950477",
"price": 5980
},
{
"shopUserId": "3000840351",
"title": "保友智能新品Pofit电脑椅",
"status": 1,
"totalNum": 1,
"requestNum": 12895,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "保友办公家具旗舰店",
"showId": "2557100",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34528042",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1bYZEg6TpK1RjSZKPXXa3UpXa.png",
"shopItemId": "577598687971",
"price": 5408
},
{
"shopUserId": "791732485",
"title": "TEK手持吸尘器A8",
"status": 1,
"totalNum": 1,
"requestNum": 17195,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "泰怡凯旗舰店",
"showId": "2552265",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34444014",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1D6bWbhTpK1RjSZFGXXcHqFXa.jpg",
"shopItemId": "547653053965",
"price": 5199
},
{
"shopUserId": "3229583972",
"title": "椰富海南冷炸椰子油食用油1L",
"status": 1,
"totalNum": 20,
"requestNum": 4451,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "椰富食物专营店",
"showId": "2561698",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34532250",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1VjLSePDpK1RjSZFrXXa78VXa.jpg",
"shopItemId": "578653506446",
"price": 256
},
{
"shopUserId": "855223948",
"title": "卡西欧立式家用电钢琴PX770",
"status": 1,
"totalNum": 1,
"requestNum": 16762,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "世纪音缘乐器专营店",
"showId": "2551326",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34420041",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1CC6aa9zqK1RjSZFpXXakSXXa.jpg",
"shopItemId": "562405126383",
"price": 4838
},
{
"shopUserId": "4065939832",
"title": "关注宝贝送轻奢沙发床",
"status": 1,
"totalNum": 1,
"requestNum": 17436,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "贝兮旗舰店",
"showId": "2559904",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34532170",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1AzxYegHqK1RjSZFPXXcwapXa.jpg",
"shopItemId": "577798067313",
"price": 4399
},
{
"shopUserId": "807974445",
"title": "森海塞尔CX6蓝牙耳机",
"status": 1,
"totalNum": 4,
"requestNum": 22557,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "sennheiser旗舰店",
"showId": "2559701",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34532161",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1HET6d7voK1RjSZFwXXciCFXa.jpg",
"shopItemId": "564408956766",
"price": 999
}
]
}
}
仔细的小伙伴应当看到,我没有发送 form 给他,一样能够要求到须要的数据,page 挂在了 query 上……
展现部份
数据拿到,就简朴了,实在就是这一个接口完成剩下的功用了,没错,记着我是前端。
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>tb try</title>
<style>
.warning {
color: red;
}
button {
width: 100px;
height: 44px;
margin-right: 44px;
}
table {
border: 1px solid #d8d8d8;
border-collapse: collapse;
}
tr {
border-bottom: 1px solid #d8d8d8;
cursor: pointer;
}
tr:last-child {
border: 0;
}
</style>
</head>
<body>
<button onclick="postPage()">下一页</button>
<span id="currentPage"></span>
<table>
<tbody>
<tr>
<th>序号(倒序)</th>
<th>几率</th>
<th>名字</th>
</tr>
</tbody>
<tbody id="results"></tbody>
</table>
<script>
let currentPage = 0 // 当前页面
let allItems = [] // 悉数数据
let currentTime = 0 // 锁频次运用,标记上次时候
const xhr = new XMLHttpRequest()
const loopInterval = 2 // 锁频次步长,单元秒
const results = document.querySelector('#results')
const currentPageText = document.querySelector('#currentPage')
const reFullTBody = arr => {
let innerHtml = ''
arr.forEach((item, i) => {
item.rate = item.totalNum / item.requestNum * 100
let tr = `
<tr onclick="window.open('https://try.taobao.com/item.htm?id=${item.id}')">
<td>${i + 1}</td>
<td>${item.rate.toFixed(3) + '%'}</td>
<td>${item.title}</td>
</tr>
`
if (item.rate > 5) tr = tr.replace('<tr', '<tr class="warning"')
innerHtml += tr
})
currentPageText.innerText = `当前页:${currentPage}`
results.innerHTML = innerHtml
}
const postPage = () => {
// 锁频次步长内作废要求
const newTime = new Date().getTime()
const shoudBack = newTime - currentTime < loopInterval * 1000
if(shoudBack) {
alert(loopInterval + '秒内不要屡次点击哦。')
return
}
currentTime = newTime
xhr.onreadystatechange = function() {
if(this.readyState === 4 && this.status === 200) {
const res = JSON.parse(this.response)
if(res.length < 1) {
alert('本日完毕的已挑选完了')
return
}
allItems = [...allItems, ...res]
allItems.sort((a, b) => b.rate - a.rate)
reFullTBody(allItems)
currentPage--
}
}
xhr.open('post', '/table')
xhr.setRequestHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
//发送要求
xhr.send("page=" + currentPage)
}
xhr.onreadystatechange = function() {
if(this.readyState === 4 && this.status === 200) {
currentPage = JSON.parse(this.response).pages
postPage()
}
}
xhr.open('get', '/total')
xhr.send()
</script>
</body>
</html>
长这个模样:
我多人性化,能够点击跳转、几率凌驾 5% 赤色展现、还通知你当前地点页码、点太快还给你提醒………………………………
就是这么好用,喜好的赶忙体验吧!
线上:点我体验
Github: Spider
以为有效,不要怜惜 star 哦。