Browse Source

cms修改,爬虫吉林省事业单位招聘好使

nihao 5 years ago
parent
commit
7c80ad4b90

+ 3 - 3
app/controller/.column.js

@@ -4,14 +4,14 @@ module.exports = {
     "parameters": {
       "query": ["!site"],
     },
-    "requestBody": ["title", "type", "parent_id", "parent", "is_use"],
+    "requestBody": ["title", "type", "parent_id", "news_type","parent", "is_use"],
   },
   // 修改栏目
   "update": {
     "parameters": {
       "params": ["!id"],
     },
-    "requestBody": ["title", "type", "parent_id", "parent", "is_use"],
+    "requestBody": ["title", "type", "parent_id", "news_type","parent", "is_use"],
     // "options": {
     //   "projection": "+name",
     // },
@@ -59,7 +59,7 @@ module.exports = {
   // 前台加载栏目列表
   "list": {
     "parameters": {
-      "query": ["!site"],
+      "query": ["!site", "parent_id", "is_use"],
       "options": {
         "meta.state": 0, // 只显示未删除数据
       },

+ 78 - 0
app/controller/.img.js

@@ -0,0 +1,78 @@
+module.exports = {
+  // 添加图片
+  "create": {
+    "parameters": {
+      "query": ["!site"],
+    },
+    "requestBody": ["title", "type", "pic", "url", "is_use"],
+  },
+  // 修改图片
+  "update": {
+    "parameters": {
+      "params": ["!id"],
+    },
+    "requestBody": ["title", "type", "pic", "url", "is_use"],
+    // "options": {
+    //   "projection": "+name",
+    // },
+  },
+  // 获取图片详情
+  "fetch": {
+    "parameters": {
+      "params": ["!id"],
+    },
+    // "options": {
+    //   "projection": "+content",
+    // },
+  },
+  // 删除图片
+  "delete": {
+    "parameters": {
+      "params": ["!id"],
+    },
+  },
+  // 恢复图片
+  "restore": {
+    "parameters": {
+      "params": ["!id"],
+    },
+  },
+  // 后台查询图片列表
+  "query": {
+    "parameters": {
+      "query": ["!site", "type","is_use"],
+      "options": {
+        "meta.state": 0, // 只显示未删除数据
+      },
+    },
+    "service": "query",
+    "options": {
+      "query": ["skip", "limit"],
+      "sort": ["meta.createdAt"],
+      "desc": true,
+      "count": true,
+      "projection": {
+        "attachment": 0
+      }
+    }
+  },
+  // 前台加载图片列表
+  "list": {
+    "parameters": {
+      "query": ["!site", "type", "is_use"],
+      "options": {
+        "meta.state": 0, // 只显示未删除数据
+      },
+    },
+    "service": "query",
+    "options": {
+      "query": ["skip", "limit"],
+      "sort": ["meta.createdAt"],
+      "desc": true,
+      "count": true,
+      "projection": {
+        "attachment": 0
+      }
+    }
+  },
+};

+ 1 - 1
app/controller/.menu.js

@@ -61,7 +61,7 @@ module.exports = {
   // 前台加载菜单列表
   "list": {
     "parameters": {
-      "query": ["!site"],
+      "query": ["!site", "is_use"],
       "options": {
         "meta.state": 0, // 只显示未删除数据
       },

+ 1 - 1
app/controller/.modules.js

@@ -40,7 +40,7 @@ module.exports = {
   // 后台查询模块列表
   "query": {
     "parameters": {
-      "query": ["!site"],
+      "query": ["!site", "is_use"],
       "options": {
         "meta.state": 0, // 只显示未删除数据
       },

+ 3 - 3
app/controller/.news.js

@@ -4,14 +4,14 @@ module.exports = {
     "parameters": {
       "query": ["!site"],
     },
-    "requestBody": ["title", "pic", "content", "type", "parent_id", "parent", "attachment", "is_use"],
+    "requestBody": ["title", "pic", "content", "type", "parent_id", "parent","publish_time", "attachment", "is_use"],
   },
   // 修改新闻信息
   "update": {
     "parameters": {
       "params": ["!id"],
     },
-    "requestBody": ["title", "pic", "content", "type", "parent_id", "parent","attachment", "is_use"],
+    "requestBody": ["title", "pic", "content", "type", "parent_id", "parent", "publish_time","attachment", "is_use"],
     "options": {
       "projection": "+content",
     },
@@ -59,7 +59,7 @@ module.exports = {
   // 前台加载新闻列表
   "list": {
     "parameters": {
-      "query": ["parent_id", "site"],
+      "query": ["parent_id", "site", "is_use"],
       "options": {
         "meta.state": 0, // 只显示未删除数据
       },

+ 1 - 1
app/controller/.newslist.js

@@ -40,7 +40,7 @@ module.exports = {
   // 后台查询信息列表
   "query": {
     "parameters": {
-      "query": ["!site"],
+      "query": ["!site", "is_use"],
       "options": {
         "meta.state": 0, // 只显示未删除数据
       },

+ 16 - 0
app/controller/img.js

@@ -0,0 +1,16 @@
+'use strict';
+
+const _ = require('lodash');
+const meta = require('./.img.js');
+const Controller = require('egg').Controller;
+const { CrudController } = require('naf-framework-mongoose/lib/controller');
+
+class ImgController extends Controller {
+  constructor(ctx) {
+    super(ctx);
+    this.service = this.ctx.service.img;
+  }
+
+}
+
+module.exports = CrudController(ImgController, meta);

+ 13 - 1
app/controller/verify.js

@@ -54,7 +54,19 @@ class VerifyController extends Controller {
     }
   }
   async creeper() {
-    await this.service.tool.creeper();
+    this.service.tool.creeper();
+    // const { ctx, app } = this;
+    // // 目标链接 吉林省人事考试网第一页
+    // const targetUrl = 'http://www.jlzkb.com/cms/root/ksbmList.vm?dir=L-iAg-ivleaKpeWQjS_kuovkuJrljZXkvY3mi5vogZjogIPor5U&page=1&rows=8';
+    // const columnTitle = '吉林省事业单位招聘';
+    // // 查看是否有这个栏目,没有则创建一个
+    // let column = await this.service.column.model.find({ news_type: '0', title: columnTitle });
+    // if (column.length === 0) {
+    //   column = await this.service.column.model.create({ site: '99991', news_type: '0', title: columnTitle, type: '', parent_id: '', parent: '', is_use: '' });
+    // }
+    // await this.service.tool.creeper(targetUrl, column);
+    // // console.log(column.length);
+    // ctx.body = { errcode: 1, errmsg: 'error', data: column };
   }
 }
 

+ 1 - 1
app/model/column.js

@@ -29,7 +29,7 @@ const SchemaDefine = {
   parent_id: { type: String, required: false, maxLength: 64 }, // 所属id
   parent: { type: String, required: false, maxLength: 100 }, // 所属名称
   is_use: { type: String, required: false, maxLength: 5 }, // 是否使用,0=>使用中;1=>已禁止
-
+  news_type: { type: String, default: 1, maxLength: 5 }, // 0抓取,1正常输入
   meta: {
     createdBy: String, // 创建用户
     updatedBy: String, // 修改用户

+ 31 - 0
app/model/img.js

@@ -0,0 +1,31 @@
+/**
+ * 图片信息
+ */
+'use strict';
+const Schema = require('mongoose').Schema;
+const metaPlugin = require('naf-framework-mongoose/lib/model/meta-plugin');
+
+// 图片信息
+const SchemaDefine = {
+  site: { type: String, required: true, maxLength: 64 }, // 归属站点
+  title: { type: String, required: false, maxLength: 100 }, // 图片名称
+  type: { type: String, required: false, maxLength: 5 }, // 图片类型
+  pic: { type: String, required: false, maxLength: 200 }, // 图片地址
+  url: { type: String, required: false, maxLength: 200 }, // 链接
+  is_use: { type: String, required: false, maxLength: 5 }, // 是否使用,0=>使用中;1=>已禁止
+  meta: {
+    createdBy: String, // 创建用户
+    updatedBy: String, // 修改用户
+  },
+  // remark: { type: String, maxLength: 500 }, // 备注
+};
+const schema = new Schema(SchemaDefine, { 'multi-tenancy': true, toJSON: { virtuals: true } });
+schema.plugin(metaPlugin);
+schema.index({ site: 1 });
+schema.index({ 'meta.state': 1 });
+schema.index({ 'meta.createdAt': -1 });
+schema.index({ 'meta.createdAt': -1, top: -1, 'meta.state': 1 });
+module.exports = app => {
+  const { mongoose } = app;
+  return mongoose.model('ImgInfo', schema, 'cms_img');
+};

+ 1 - 1
app/model/menu.js

@@ -11,7 +11,7 @@ const metaPlugin = require('naf-framework-mongoose/lib/model/meta-plugin');
 //   uri: { type: String, maxLength: 128 },
 // }, { _id: false });
 
-// 栏目信息
+// 菜单信息
 const SchemaDefine = {
   site: { type: String, required: true, maxLength: 64 }, // 归属站点
   title: { type: String, required: false, maxLength: 100 }, // 菜单名称

+ 1 - 0
app/model/news.js

@@ -20,6 +20,7 @@ const SchemaDefine = {
   type: { type: String, required: false, maxLength: 5 }, // 所属类型
   parent_id: { type: String, required: false, maxLength: 64 }, // 所属id
   parent: { type: String, required: false, maxLength: 100 }, // 所属名称
+  publish_time: String, // 发布时间
   attachment: [ Attachment ], // 附件
   is_use: { type: String, required: false, maxLength: 5 }, // 是否使用,0=>使用中;1=>已禁止
   meta: {

+ 12 - 0
app/router.js

@@ -23,6 +23,10 @@ module.exports = app => {
 
   router.get('/api/newslist/list', controller.newslist.list); // 新闻信息列表,隐藏删除信息,按照置顶和时间排序
   router.get('/api/newslist/fetch/:id', controller.newslist.fetch); // 获取新闻信息详情
+
+  router.get('/api/img/list', controller.img.list); // 图片信息列表,隐藏删除信息,按照置顶和时间排序
+  router.get('/api/img/fetch/:id', controller.img.fetch); // 获取图片信息详情
+
   // 管理接口
   // 【分站】新闻接口
   router.get('/adminapi/news/query', controller.news.query); // 查询新闻信息
@@ -78,6 +82,13 @@ module.exports = app => {
   router.post('/adminapi/site/update/:site', controller.site.update); // 修改分站信息
   router.post('/adminapi/site/delete/:site', controller.site.delete); // 删除分站信息
 
+  // 图片接口
+  router.get('/adminapi/img/query', controller.img.query); // 查询图片信息
+  router.get('/adminapi/img/fetch/:id', controller.img.fetch); // 获得图片详情
+  router.post('/adminapi/img/create', controller.img.create); // 发布图片信息
+  router.post('/adminapi/img/update/:id', controller.img.update); // 修改图片信息
+  router.post('/adminapi/img/delete/:id', controller.img.delete); // 删除图片信息
+
   // 暂定为验证码测试接口
   router.get('/adminapi/verify/verify', controller.verify.verify); // 生成验证码并保存
   router.post('/adminapi/verify/check_verify', controller.verify.check_verify); // 检测验证码
@@ -86,4 +97,5 @@ module.exports = app => {
 
 
   router.get('/adminapi/verify/creeper', controller.verify.creeper); // 测试爬虫
+  // http://127.0.0.1:8105/adminapi/verify/creeper
 };

+ 21 - 0
app/schedule/creeper.js

@@ -0,0 +1,21 @@
+'use strict';
+
+const Subscription = require('egg').Subscription;
+
+class Creeper extends Subscription {
+  // 通过 schedule 属性来设置定时任务的执行间隔等配置
+  static get schedule() {
+    return {
+      cron: '0 0 23 * * ?', // 每天晚上23点执行任务
+      // interval: '20s', // 20秒一次
+      type: 'all', // 指定所有的 worker 都需要执行
+    };
+  }
+
+  // subscribe 是真正定时任务执行时被运行的函数
+  async subscribe() {
+    await this.service.tool.creeper();
+  }
+}
+
+module.exports = Creeper;

+ 5 - 5
app/service/column.js

@@ -12,7 +12,7 @@ class ColumnService extends CrudService {
     this.model = this.ctx.model.Column;
   }
 
-  async create({ site }, { title, type, parent_id, parent, is_use }) {
+  async create({ site }, { title, type, parent_id, parent, is_use, news_type }) {
     // 检查数据
     assert(_.isString(site), 'site不能为空');
     assert(!title || _.isString(title), 'title必须为字符串');
@@ -20,14 +20,14 @@ class ColumnService extends CrudService {
     assert(!parent_id || _.isString(parent_id), 'parent_id必须为字符串');
     assert(!parent || _.isString(parent), 'parent必须为字符串');
     assert(!is_use || _.isString(is_use), 'is_use必须为字符串');
-
+    assert(!news_type || _.isString(news_type), 'news_type必须为字符串');
     // TODO: 检查用户信息
     const userid = this.ctx.userid;
     if (!_.isString(userid)) throw new BusinessError(ErrorCode.NOT_LOGIN);
 
     // TODO:保存数据
     const data = {
-      site, title, type, parent_id, parent, is_use,
+      site, title, type, parent_id, parent, is_use, news_type,
       meta: { createdBy: userid },
     };
     
@@ -37,14 +37,14 @@ class ColumnService extends CrudService {
 
   async update({ id }, payload) {
     // 检查数据
-    const { title, type, parent_id, parent, is_use } = payload;
+    const { title, type, parent_id, parent, is_use, news_type } = payload;
     assert(id, 'id不能为空');
     assert(!title || _.isString(title), 'title必须为字符串');
     assert(!type || _.isString(type), 'type必须为字符串');
     assert(!parent_id || _.isString(parent_id), 'parent_id必须为字符串');
     assert(!parent || _.isString(parent), 'parent必须为字符串');
     assert(!is_use || _.isString(is_use), 'is_use必须为字符串');
-
+    assert(!news_type || _.isString(news_type), 'news_type必须为字符串');
     // TODO: 检查用户信息
     const userid = this.ctx.userid;
     if (!_.isString(userid)) throw new BusinessError(ErrorCode.NOT_LOGIN);

+ 168 - 0
app/service/creeperjlsy.js

@@ -0,0 +1,168 @@
+/* eslint-disable strict */
+const svgCaptcha = require('svg-captcha');
+// const cheerio = require('cheerio');
+// const puppeteer = require('puppeteer');
+const charset = require('superagent-charset');
+const superagent = charset(require('superagent'));
+const cheerio = require('cheerio');
+const fs = require('fs');
+const { CrudService } = require('naf-framework-mongoose/lib/service');
+class CreeperjlsyService extends CrudService {
+
+  // 产生验证码
+  async captcha() {
+    const captcha = svgCaptcha.create({
+      size: 4, // 大小
+      fontSize: 50, // 字体大小
+      width: 100, // 宽
+      height: 40, // 高
+      bacground: '#cc9966', // 背景色
+    });
+    return captcha;
+  }
+
+  // 发送短信验证码
+  async sendmessage(ctx, app, randomstr) {
+    const message = '【吉林省就业中心】您的验证码为:' + randomstr + ',请在30分钟内完成输入,欢迎使用吉林省智慧就业企业服务平台。';
+    const data = '?Id=300&Name=wwqcgh&Psw=jljyzx-wwqcgh&Message=' + message + '&Phone=' + ctx.query.mobile + '&Timestamp=0';
+    // const data = '?Id=300&Name=wwqcgh&Psw=jljyzx-wwqcgh&Message=' + message + '&Phone=' + phone + '&Timestamp=0';
+    const path = ctx.app.config.messageDir + data;
+    const result = await app.curl(path, {
+      method: 'GET',
+      dataType: 'text/xml',
+    });
+    return result;
+  }
+
+
+  async creeper() {
+    // 目标链接 吉林省人事考试网第一页
+    const targetUrl = 'http://www.jlzkb.com/cms/root/ksbmList.vm?dir=L-iAg-ivleaKpeWQjS_kuovkuJrljZXkvY3mi5vogZjogIPor5U&page=1&rows=10';
+    const columnTitle = '吉林省事业单位招聘';
+    // 查看是否有这个栏目,没有则创建一个
+    let column = await this.service.column.model.find({ news_type: '0', title: columnTitle });
+    if (column.length === 0) {
+      column = await this.service.column.model.create({ site: '99991', news_type: '0', title: columnTitle, type: '', parent_id: '', parent: '', is_use: '' });
+    } else {
+      column = column[0];
+    }
+    await this.creeperCreate(targetUrl, column);
+  }
+
+
+  // 输入路径返回文本
+  async creeperCreate(targetUrl, column) {
+    // 目标链接 吉林省人事考试网第一页
+    // 用来暂时保存解析到的内容和图片地址数据
+    const hrefOld = targetUrl;
+    const hrefAdd = 'http://www.jlzkb.com/cms/root/';
+    const uri = 'http://www.jlzkb.com';
+    // const imgs = [];
+    // 创建附件文件夹(暂定直接跳到该网站下载)
+    // this.mkdir('./attachment');
+
+    // 发起请求
+    superagent.get(targetUrl).charset('utf-8').buffer(true)
+      .end((error, res) => {
+        if (error) { // 请求出错,打印错误,返回
+          console.log(error);
+          return;
+        }
+        // cheerio需要先load html
+        const $ = cheerio.load(res.text);
+        // 循环列表,获取标题、a标签路径、日期,然后根据a标签路径再次进行爬出内容、保存即可
+        $('#DivInfoList tr').each((index, element) => {
+        // 这些数据都是存放在news中的
+          const title = $(element).find('td a').attr('title');
+          const thisHref = $(element).find('td a').attr('href');
+          const time = $(element).find('td[width="12%"]').text();
+          const publishTime = time.substring(1, 11);
+          // 这里可以给时间做判断当前日期(如果需要的话可以做为判断条件)
+          const nowDate = new Date().toLocaleDateString();
+          if (publishTime !== nowDate) {
+            // 为undefined时,不需要进行下一步了
+            if (thisHref !== undefined) {
+              const thisAllHref = hrefAdd + thisHref;
+              // 请求内容
+              superagent.get(thisAllHref).charset('utf-8').buffer(true)
+                .end((error, res) => {
+                  if (error) { // 请求出错,打印错误,返回
+                    console.log(error);
+                    return;
+                  }
+                  const $ = cheerio.load(res.text);
+                  // 获取内容保存
+                  const content = $('#fontzoom').children('p').clone();
+                  content.find(':nth-child(n)').remove();
+                  const attachment = [];
+                  // 每页都查询是否有附件存在,如果有,下载到本地,保存即可
+                  $('#fontzoom p a').each((index, element) => {
+                    const thisHref = $(element).attr('href');
+                    if (thisHref.substring(0, 4) !== 'http') {
+                      const url = uri + thisHref;
+                      const fileName = $(element).text();
+                      // const filepath = this.downloadAttachment(url, fileName);
+                      const file = {
+                        name: fileName,
+                        uri: url,
+                      };
+                      attachment.push(file);
+                    }
+                  });
+                  const news = this.service.news.model.create({
+                    site: column.site,
+                    title,
+                    pic: '',
+                    content,
+                    type: '',
+                    parent_id: column.id,
+                    parent: column.title,
+                    publish_time: publishTime,
+                    attachment,
+                    is_use: '0'
+                  });
+                });
+            }
+          }
+        });
+        // 点击下一页
+        const href = $('#DivPageControl a').eq(2).attr('href');
+        const hrefNew = hrefAdd + href;
+        // 第一次路径与第二次路径比较,相同,就不需要调自己了
+        console.log(hrefNew);
+        console.log(hrefOld);
+        if (hrefNew !== hrefOld) {
+          // this.creeperCreate(hrefNew, column);
+        // over
+        }
+      });
+  }
+  // 创建目录
+
+  async mkdir(_path) {
+    if (fs.existsSync(_path)) {
+      console.log(`${_path}目录已存在`);
+    } else {
+      fs.mkdir(_path, error => {
+        if (error) {
+          return console.log(`创建${_path}目录失败`);
+        }
+        console.log(`创建${_path}目录成功`);
+      });
+    }
+  }
+  // -------------------------------------------------------这里应返回路径供给前台下载(图片存放问题一并解决)-----------------------------------------------------
+  // 下载爬到的附件
+  async downloadAttachment(thisHref, fileName) {
+    // 下载图片存放到指定目录
+    const stream = fs.createWriteStream('./attachment/' + fileName);
+    const req = superagent.get(thisHref); // 响应流
+    req.pipe(stream);
+    console.log(thisHref);
+    console.log(fileName);
+    return '这里返回路径保存即可';
+    // console.log('开始下载>>>>>>>>>>>>>>');
+  }
+
+}
+module.exports = CreeperjlsyService;

+ 84 - 0
app/service/img.js

@@ -0,0 +1,84 @@
+'use strict';
+
+const assert = require('assert');
+const _ = require('lodash');
+const { BusinessError, ErrorCode } = require('naf-core').Error;
+const { isNullOrUndefined, trimData } = require('naf-core').Util;
+const { CrudService } = require('naf-framework-mongoose/lib/service');
+
+class ImgService extends CrudService {
+  constructor(ctx) {
+    super(ctx);
+    this.model = this.ctx.model.Img;
+  }
+
+  async create({ site }, { title, type, pic, url, is_use}) {
+    // 检查数据
+    assert(_.isString(site), 'site不能为空');
+    assert(!title || _.isString(title), 'title必须为字符串');
+    assert(!type || _.isString(type), 'type必须为字符串');
+    assert(!pic || _.isString(pic), 'pic必须为字符串');
+    assert(!url || _.isString(url), 'url必须为字符串');
+    assert(!is_use || _.isString(is_use), 'is_use必须为字符串');
+    // TODO: 检查用户信息
+    const userid = this.ctx.userid;
+    if (!_.isString(userid)) throw new BusinessError(ErrorCode.NOT_LOGIN);
+
+    // TODO:保存数据
+    const data = {
+      site, title, type, pic, url, is_use,
+      meta: { createdBy: userid },
+    };
+    
+    const res = await this.model.create(data);
+    return res;
+  }
+
+  async update({ id }, payload) {
+    // 检查数据
+    const { title, type, pic, url, is_use} = payload;
+    assert(id, 'id不能为空');
+    assert(!title || _.isString(title), 'title必须为字符串');
+    assert(!type || _.isString(type), 'type必须为字符串');
+    assert(!pic || _.isString(pic), 'pic必须为字符串');
+    assert(!url || _.isString(url), 'url必须为字符串');
+    assert(!is_use || _.isString(is_use), 'is_use必须为字符串');
+    // TODO: 检查用户信息
+    const userid = this.ctx.userid;
+    if (!_.isString(userid)) throw new BusinessError(ErrorCode.NOT_LOGIN);
+
+    // TODO:检查数据是否存在
+    const doc = await this.model.findById(id).exec();
+    if (isNullOrUndefined(doc)) {
+      throw new BusinessError(ErrorCode.DATA_NOT_EXIST);
+    }
+
+    // TODO:保存数据
+    const data = trimData(payload);
+    await this.model.findByIdAndUpdate(doc.id, { ...data, 'meta.updatedBy': userid }, { new: true }).exec();
+    const res = this.model.findById(id, '+imgname').exec();
+    return res;
+  }
+
+  async status({ id, state }) {
+
+    // TODO: 检查数据状态
+    const doc = await this.model.findById(id).exec();
+    if (!doc) {
+      throw new BusinessError(ErrorCode.DATA_NOT_EXIST);
+    }
+
+    doc.meta.state = state;
+    return await doc.save();
+  }
+
+  async delete({ id }) {
+    return await this.status({ id, state: 1 });
+  }
+
+  async restore({ id }) {
+    return await this.status({ id, state: 0 });
+  }
+}
+
+module.exports = ImgService;

+ 5 - 4
app/service/news.js

@@ -12,7 +12,7 @@ class NewsService extends CrudService {
     this.model = this.ctx.model.News;
   }
 
-  async create({ site }, { title, pic, content, type, parent_id, parent, attachment, is_use }) {
+  async create({ site }, { title, pic, content, type, parent_id, parent, publish_time,attachment, is_use }) {
     // 检查数据
     assert(_.isString(site), 'site不能为空');
     assert(_.isString(title), 'title不能为空');
@@ -21,6 +21,7 @@ class NewsService extends CrudService {
     assert(!type || _.isString(type), 'type必须为字符串');
     assert(!parent_id || _.isString(parent_id), 'parent_id必须为字符串');
     assert(!parent || _.isString(parent), 'parent必须为字符串');
+    assert(!publish_time || _.isString(publish_time), 'publish_time必须为字符串');
     assert(!attachment || _.isArray(attachment), 'attachment必须为数组');
     assert(!is_use || _.isString(is_use), 'is_use必须为字符串');
 
@@ -30,7 +31,7 @@ class NewsService extends CrudService {
 
     // TODO:保存数据
     const data = {
-      site, title, pic, content, type, parent_id, parent, attachment, is_use,
+      site, title, pic, content, type, parent_id, parent, publish_time, attachment, is_use,
       meta: { createdBy: userid },
     };
 
@@ -40,7 +41,7 @@ class NewsService extends CrudService {
 
   async update({ id }, payload) {
     // 检查数据
-    const { title, pic, content, type, parent_id, parent, attachment, is_use } = payload;
+    const { title, pic, content, type, parent_id, parent, publish_time,attachment, is_use } = payload;
     assert(id, 'id不能为空');
     assert(!title || _.isString(title), 'title必须为字符串');
     assert(!pic || _.isString(pic), 'pic必须为字符串');
@@ -48,9 +49,9 @@ class NewsService extends CrudService {
     assert(!type || _.isString(type), 'type必须为字符串');
     assert(!parent_id || _.isString(parent_id), 'parent_id必须为字符串');
     assert(!parent || _.isString(parent), 'parent必须为字符串');
+    assert(!publish_time || _.isString(publish_time), 'publish_time必须为字符串');
     assert(!attachment || _.isArray(attachment), 'attachment必须为数组');
     assert(!is_use || _.isString(is_use), 'is_use必须为字符串');
-
     // TODO: 检查用户信息
     const userid = this.ctx.userid;
     if (!_.isString(userid)) throw new BusinessError(ErrorCode.NOT_LOGIN);

+ 128 - 24
app/service/tool.js

@@ -3,10 +3,12 @@ const Service = require('egg').Service;
 const svgCaptcha = require('svg-captcha');
 // const cheerio = require('cheerio');
 // const puppeteer = require('puppeteer');
-const request = require('superagent');
+const charset = require('superagent-charset');
+const superagent = charset(require('superagent'));
 const cheerio = require('cheerio');
 const fs = require('fs');
-class ToolsService extends Service {
+const { CrudService } = require('naf-framework-mongoose/lib/service');
+class ToolsService extends CrudService {
 
   // 产生验证码
   async captcha() {
@@ -32,34 +34,136 @@ class ToolsService extends Service {
     });
     return result;
   }
-  // 输入路径返回文本
+
+
   async creeper() {
     // 目标链接 吉林省人事考试网第一页
-    const targetUrl = 'http://www.jlzkb.com/cms/root/ksbmList.vm?dir=L-iAg-ivleaKpeWQjS_kuovkuJrljZXkvY3mi5vogZjogIPor5U&page=1&rows=8';
+    const targetUrl = 'http://www.jlzkb.com/cms/root/ksbmList.vm?dir=L-iAg-ivleaKpeWQjS_kuovkuJrljZXkvY3mi5vogZjogIPor5U&page=1&rows=10';
+    const columnTitle = '吉林省事业单位招聘';
+    // 查看是否有这个栏目,没有则创建一个
+    let column = await this.service.column.model.find({ news_type: '0', title: columnTitle });
+    if (column.length === 0) {
+      column = await this.service.column.model.create({ site: '99991', news_type: '0', title: columnTitle, type: '', parent_id: '', parent: '', is_use: '' });
+    } else {
+      column = column[0];
+    }
+    await this.creeperCreate(targetUrl, column);
+  }
+
+
+  // 输入路径返回文本
+  async creeperCreate(targetUrl, column) {
+    // 目标链接 吉林省人事考试网第一页
     // 用来暂时保存解析到的内容和图片地址数据
-    const content = '';
+    const hrefOld = targetUrl;
+    const hrefAdd = 'http://www.jlzkb.com/cms/root/';
+    const uri = 'http://www.jlzkb.com';
     // const imgs = [];
+    // 创建附件文件夹(暂定直接跳到该网站下载)
+    // this.mkdir('./attachment');
+
     // 发起请求
-    request.get(targetUrl).end((error, res) => {
-      if (error) { // 请求出错,打印错误,返回
-        console.log(error);
-        return;
-      }
-      // cheerio需要先load html
-      const $ = cheerio.load(res.text);
-      // 循环列表,获取标题、a标签路径、日期,然后根据a标签路径再次进行爬出内容、保存即可
-      $('#DivInfoList tr').each((index, element) => {
-        // const title = $(element).find('td a').attr('title');
-        // console.log(title);
-        // const uri = $(element).find('td a').attr('href');
-        // console.log(uri);
-        // const time = $(element).find('td[width="12%"]').text();
-        // console.log(time);
+    superagent.get(targetUrl).charset('utf-8').buffer(true)
+      .end((error, res) => {
+        if (error) { // 请求出错,打印错误,返回
+          console.log(error);
+          return;
+        }
+        // cheerio需要先load html
+        const $ = cheerio.load(res.text);
+        // 循环列表,获取标题、a标签路径、日期,然后根据a标签路径再次进行爬出内容、保存即可
+        $('#DivInfoList tr').each((index, element) => {
+        // 这些数据都是存放在news中的
+          const title = $(element).find('td a').attr('title');
+          const thisHref = $(element).find('td a').attr('href');
+          const time = $(element).find('td[width="12%"]').text();
+          const publishTime = time.substring(1, 11);
+          // 这里可以给时间做判断当前日期(如果需要的话可以做为判断条件)
+          const nowDate = new Date().toLocaleDateString();
+          if (publishTime !== nowDate) {
+            // 为undefined时,不需要进行下一步了
+            if (thisHref !== undefined) {
+              const thisAllHref = hrefAdd + thisHref;
+              // 请求内容
+              superagent.get(thisAllHref).charset('utf-8').buffer(true)
+                .end((error, res) => {
+                  if (error) { // 请求出错,打印错误,返回
+                    console.log(error);
+                    return;
+                  }
+                  const $ = cheerio.load(res.text);
+                  // 获取内容保存
+                  const content = $('#fontzoom').children('p').clone();
+                  content.find(':nth-child(n)').remove();
+                  const attachment = [];
+                  // 每页都查询是否有附件存在,如果有,下载到本地,保存即可
+                  $('#fontzoom p a').each((index, element) => {
+                    const thisHref = $(element).attr('href');
+                    if (thisHref.substring(0, 4) !== 'http') {
+                      const url = uri + thisHref;
+                      const fileName = $(element).text();
+                      // const filepath = this.downloadAttachment(url, fileName);
+                      const file = {
+                        name: fileName,
+                        uri: url,
+                      };
+                      attachment.push(file);
+                    }
+                  });
+                  const news = this.service.news.model.create({
+                    site: column.site,
+                    title,
+                    pic: '',
+                    content,
+                    type: '',
+                    parent_id: column.id,
+                    parent: column.title,
+                    publish_time: publishTime,
+                    attachment,
+                    is_use: '0'
+                  });
+                });
+            }
+          }
+        });
+        // 点击下一页
+        const href = $('#DivPageControl a').eq(2).attr('href');
+        const hrefNew = hrefAdd + href;
+        // 第一次路径与第二次路径比较,相同,就不需要调自己了
+        console.log(hrefNew);
+        console.log(hrefOld);
+        if (hrefNew !== hrefOld) {
+          // this.creeperCreate(hrefNew, column);
+        // over
+        }
       });
-      // 点击下一页
-      console.log($('#DivPageControl').find('a').attr('href'));
-    });
   }
-}
+  // 创建目录
+
+  async mkdir(_path) {
+    if (fs.existsSync(_path)) {
+      console.log(`${_path}目录已存在`);
+    } else {
+      fs.mkdir(_path, error => {
+        if (error) {
+          return console.log(`创建${_path}目录失败`);
+        }
+        console.log(`创建${_path}目录成功`);
+      });
+    }
+  }
+  // -------------------------------------------------------这里应返回路径供给前台下载(图片存放问题一并解决)-----------------------------------------------------
+  // 下载爬到的附件
+  async downloadAttachment(thisHref, fileName) {
+    // 下载图片存放到指定目录
+    const stream = fs.createWriteStream('./attachment/' + fileName);
+    const req = superagent.get(thisHref); // 响应流
+    req.pipe(stream);
+    console.log(thisHref);
+    console.log(fileName);
+    return '这里返回路径保存即可';
+    // console.log('开始下载>>>>>>>>>>>>>>');
+  }
 
+}
 module.exports = ToolsService;

+ 1 - 0
package.json

@@ -13,6 +13,7 @@
     "redis": "^2.8.0",
     "string-random": "^0.1.3",
     "superagent": "^5.1.0",
+    "superagent-charset": "^1.2.0",
     "svg-captcha": "^1.4.0"
   },
   "devDependencies": {