Query String模块和http小爬虫和events模块和fs模块和stream模块

## querystring模块 1. 功能:是node.js中处理字符的 2. 核心方法 – parse:将string->object – parse( str , arg1 , arg2) str: 你要处理的字符 arg1: 分隔字符 arg2: 将 = 转化为 : , (这句话前提是 & 符号是提前被转化的) “`javascript var qs=require(‘querystring’); var str=’http://www.baidu.com/001?a=1&b=2#hash=20′; var obj=qs.parse(str,’?’,’&’); console.log(obj) “` – stringify:将object->string “`javascript qs.stringify(obj) “` – escape:将中文字符编码 “`javascript var charStr=’http://www.baidu.com/001?city=杭州’; var url=require(‘url’); var charurl=url.parse(charStr).query; console.log(qs.escape(charurl)); “` – unescape:将中文字符解码 “`javascript qs.unescape(qs.escape(charurl)) “` ## http – 核心方法:get、request、小爬虫 – http小爬虫: 使用数据请求一段内容,然后将这段内容做数据清洗,最后再通过后端服务器发送到前台页面 – 反爬虫:反数据请求,反内容,让数据清洗不好处理   “`javascript http小爬虫举例:请求网址:http://stu.1000phone.net/student.php/Index/index 1.进入node.js官网,找到http模块,引入http 2.使用http的get方法 http.get(url/options,callback) 3.定义一个options 4.通过使用get方法已经获得了数据请求,是一个网页 5.然后进行数据清洗,通过一个第三方包(工具:cheerio),去npmjs里面找 6.安装cheerio `$ npm i cheerio -S` 和安装package.json `$ npm init -y` 7.引入cheerio 8.发送给前台 var http = require(‘http’); var cheerio = require(‘cheerio’);

const options = { hostname: ‘stu.1000phone.net’, port: 80, path: ‘/student.php/Index/index’, method: ‘get’, headers: { //这里的数据是request header里的数据 ‘Host’: ‘ stu.1000phone.net’, ‘Connection’: ‘ keep-alive’, ‘Cache-Control’: ‘ max-age=0’, ‘Upgrade-Insecure-Requests’: ‘ 1’, ‘User-Agent’: ‘ Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ 74.0.3729.131 Safari/537.36’, ‘Accept’: ‘ text/html,application/xhtml+xml, application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8, application/signed-exchange;v=b3’, ‘Referer’: ‘ http://stu.1000phone.net/student.php/ Index/moneyDetail’, ‘Accept-Encoding’: ‘ gzip, deflate’, ‘Accept-Language’: ‘ zh-CN,zh;q=0.9,en;q=0.8’, ‘Cookie’: ‘ PHPSESSID=ouvnaju74a3be6lcb6updgvb95; StuInfo=think%3A%7B%22StuId%22%3A%22133717%22%2C%22StuN umber%22%3A%22HZ190213065%22%2C%22IDcard%22%3A%22QFEDU_ tEubnTherf1jqmBcsdhx9KlB0Nk%252BSeEjdkl%252F9W44GGI%253 D%22%2C%22StuName%22%3A%22%25E8%25B5%25B5%25E8%258B%25B 1%25E5%25A7%25BF%22%2C%22Cid%22%3A%222237%22%7D’, ‘Content-Type’: ‘application/x-www-form-urlencoded’, } };

http.createServer(function (request, response) { response.writeHead( 200 , { ‘Content-type’: ‘text/html;charset=utf8’ }) var req = http.get(options, function (res) { res.setEncoding(‘utf8’); let rawData = ”; res.on(‘data’, (chunk) => { rawData += chunk; }); res.on(‘end’, () => { try { //console.log(rawData)//输出的是整个网页 var $ = cheerio.load(rawData); response.write($(‘.inline .user-title-label span’).text().toString()); response.end()

} catch (e) { console.error(e.message); } }) }).on(‘error’, (e) => { console.error(`problem with request: ${e.message}`) }) req.end()

}).listen(8002, ‘localhost’, function () { console.log(`服务器运行在:http://localhost:8002`) })

“`

## events模块 – 1.功能:是node.js中的事件模块 – 2.使用 “`javascript //创建event var Events=require(‘events’); //通过定义一个类继承这个方式 class MyEvents extends Events {}; //在实例化这个类,得到一个对象,对象身上就会具备一些属性 var myEvents=new MyEvents(); //这个实例身上具备on和emit两个方法,on是事件的定义(声明),emit是事件的执行 //声明事件 myEvents.on(‘a’,()=>{ console.log(‘hello’) }) //触发事件 myEvents.emit(‘a’) “` ## fs – 概念:node.js中处理文件的模块 – 使用 1. 操作目录 “`javascript var fs=require(‘fs’) 增 fs.mkdir(‘./dist’,function( error ) { if( error ) throw error console.log( ‘目录创建成功’ ) }) “` “`javascript 改 fs.rename(‘./dist’,’./fs_dist’,function( error ) { if( error ) throw error console.log(‘ 目录名称修改成功 ‘) }) “` “`javascript 查,查目录里的文件 for( var i = 0 ; i < 10 ; i ++ ){ fs.writeFile(`./fs_dist/${i}.txt`,i,function( err ) { console.log( `第${i}个文件创建成功` ) }) }

fs.readdir(‘./fs_dist’,’utf-8′,function ( error,data ) { if( error ) throw error //console.log( data ) // 以文件名为元素构成的数组 for ( var i = 0 ; i < data.length; i ++ ){ fs.readFile( `./fs_dist/${data[i]}`,’utf8′,function( error , content ) { if( error ) throw error console.log( content ) }) } }) “` “`javascript 删 //fs.rmdir(path,callback) 这个方法只能删除空目录

fs.rmdir( ‘./fs_dist’, function ( error ) {

if( error ) throw error   console.log(‘目录删除成功’)

}) “` 1. 操作文件 “`javascript 增 writeFile(路径,内容 , 错误优先的回调) fs.writeFile(‘./dist/1.txt’,’hello yyb’,function( error ) { if( error ) throw error }) “` “`javascript 改 fs.appendFile(‘./dist/1.txt’,’\nhello 千锋~~~’,’utf8′,function( error ) { if( error ) throw error console.log(‘文件修改成功’) }) “` “`javascript 查 fs.readFile( ‘./dist/1.txt’,’utf8′,function( error, data ) { if ( error ) throw error // console.log( data.toString() ) // 二进制数据 console.log( data ) console.log(‘文件读成功了’) }) “` “`javascript 删 fs.unlink( ‘./dist/1.txt’, function( error ) { if( error ) throw error console.log( ‘文件删除成功’ ) }) “` ## stream 1. 概念stream 流: 减少内存消耗, 增加效率 2. 名词:pipe–>管道流 可读的流,可写的流 “`javascript 举例:压缩包的创建 var fs = require( ‘fs’ ) var zlib = require(‘zlib’) // 创建压缩包 var readeStream = fs.createReadStream( ‘./dist/1.txt’ ) var writeStream = fs.createWriteStream( ‘./dist/1.txt.gz’ ) var gzip = zlib.createGzip() // 空压缩包 readeStream .pipe( gzip ) .pipe( writeStream ) “`

    原文作者:我是小仙女呢~
    原文地址: https://www.cnblogs.com/zhaoyingzi/p/10871955.html
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞