/**
- 教程:https://blog.csdn.net/Qc1998/article/details/83154558
- cheerio使用:https://www.cnblogs.com/zjx2011/p/6554772.html
*/
var cheerio = require('cheerio');
var superagent = require('superagent');
var charset = require('superagent-charset');
superagent = charset(superagent)
superagent.get('https://www.88ys.cc/')
.charset('utf8')
.end(function (err,res) {
if (err) {
console.log(err);
return err;
}
// console.log(res.text)
var $ = cheerio.load(res.text);
var content= $('.index-area ul li .name');
$element = $(content);
$element.each(function(i, elem) {//迭代
console.log($(this).text());//注意这里是this
})
})
下载图片版本
/**
* 需要安装对应的npm包
* 1、$ npm install express --save
* 2、$ npm install superagent –save
* 3、$ npm install superagent-charset
* 4、$ npm install cheerio
* */
// 第一步,建立基本的请求监听
express = require('express');
var app = express();
// 第二步,这里是请求及处理DOM相关的
var superagent = require('superagent');//类似于Axios、Request、Superagent、Fetch
var charset = require('superagent-charset');//用于转码utf8
superagent = charset(superagent);
var cheerio = require('cheerio');//作用于服务器,类似于前端的jquery
//第三步,这个是读写文件
const fs = require('fs');
app.get('/', function (request, response, next) {
response.writeHead(200, {'Content-Type': 'text/html; charset=utf-8'});
response.write("创建爬虫<br>");
superagent.get("http://image.so.com/i?q=%E5%88%98%E4%BA%A6%E8%8F%B2&src=tab_www#/")
.charset('utf8')
.buffer(true)
.end(function (err,res) {
if (err) {
console.log(err);
return err;
}
response.write("获得数据<br>");
// 获得某个dom
const $ = cheerio.load(res.text);
const imgList =JSON.parse($('script[id="initData"]').html()).list;
dowmImg(imgList,'./imgs',response);
// 关闭数据流
response.end("操作完成<br>");
})
})
app.listen(3000, function () {
console.log('app is listening at port 3000');
});
/**
*下载图片
* @param {*} [imgsArr=[]]
* @param {string} [filePath='./imgs']
*/
function dowmImg(imgList=[],filePath='./imgs',response){
if(!(imgList instanceof Array)) return;
let res;
imgList.forEach((item,index)=>{
let {title,img}=item;
if(img.indexOf("http")==-1) return;
res= superagent.get(img);
res.pipe(fs.createWriteStream(`${filePath}/${index}.png`));//这个是一个同步函数
response.write(`<a href=${img}>${title}</a><br />`)
})
}