微信小程序实践_4显示新闻(2)

前言

上一小节说道,点击版面图片区域,可获取到映射的文章链接。本节将着重实现对文章html的解析,正则匹配走起~
通过对文章html结构的比对,调查发现,文章详情从class="text_c"的地方开,所以解析文章的函数如下。

代码实现

在utils目录下新建articleExtract.js

var articleObj = {};
var articleExtract = function (html, newsHref, pagenum) {
  articleObj["newshref"] = newsHref; //该文章的链接
  articleObj["pagenum"] = pagenum; //该文章所在版面编号
  var html = html.replace(/<br\/>/ig, "\r\n");
  // console.log(articleHtml);
  /*正则*/
  //文章部分
  var titleReg = /<div[^>]+class="text_c"[^>]*>[\s\S]+?<\/div>/i; //会匹配到class=lai的结束</div>为止,所以只能用标题部分的
  //来源部分
  var sourceReg = /<div[^>]+class="lai"[^>]*>([\s\S]+?)<\/div>/i;
  //正文图片
  var imgReg = /<table[^>]+class="pci_c"[^>]*>[\s\S]+?<\/table>/ig;
  //正文文章
  var contentReg = /<!--enpcontent--><p>[\s\S]*?<\/p><!--\/enpcontent-->/i;


  //中间变量
  // var articleHtml = articleMatch[0];//存放匹配的文章部分的html
  var titleHtml = "";
  var imgHtmlArray = ""; //存放匹配的正文图片部分的html
  var contentHtml = ""; //存放匹配的正文文章部分的html

  //正则匹配结果
  var titleMatch = html.match(titleReg); //存放匹配的文章的结果
  var sourceMatch = html.match(sourceReg); //存放的标题部分的html
  var imgMatch = html.match(imgReg); //存放匹配的正文图片部分的html
  var contentMatch = html.match(contentReg); //存放匹配的正文文章部分的html


  //结果变量
  var h1 = ""; //主标题
  var h2 = ""; //副标题
  var h3 = ""; //引标题
  var h4 = ""; //不知道代表啥标题
  var source = ""; //来源及日期
  var imgArray = []; //图片及图片说明
  var contentArray = []; //文章每段的内容

  //给中间变量赋值
  titleMatch && (titleHtml = titleMatch[0]);
  sourceMatch && (source = sourceMatch[1].replace(/\s+/g, ''));
  imgMatch && (imgHtmlArray = imgMatch);
  contentMatch && (contentHtml = contentMatch[0]);

  /***********图片***********************/
  if (imgHtmlArray) {
    var i;
    var imgSrc = ''
    var imgDesc = ''

    for (i = 0; i < imgHtmlArray.length; i++) {
      imgSrc = imgMatch[i].match(/<img src="(.*?)"[^]*>/i)[1].replace("../../../", 'http://paper.people.com.cn/rmrb/');
      imgDesc = imgMatch[i].match(/<p>([\s\S]*?)<\/P>/i)[1]
      imgArray.push({
        imgSrc: imgSrc,
        imgDesc: imgDesc
      })
    }
    console.log("图片匹配", imgArray);
  }

  /*************标题*********************/
  h1 = titleHtml.match(/<h1>([\s\S]+?)<\/h1>/i)[1]; //标题肯定存在,所以用 +
  h2 = titleHtml.match(/<h2>([\s\S]*?)<\/h2>/i)[1] //副标题不一定存在,所以用 *
  h3 = titleHtml.match(/<h3>([\s\S]*?)<\/h3>/i)[1] //引标题不一定存在,所以用 *
  h4 = titleHtml.match(/<h4>([\s\S]*?)<\/h4>/i)[1] //h4不知道是啥标题,所以用 *
  console.log("标题 ", h1);
  console.log("副标题 ", h2);
  console.log("引标题 ", h3);
  console.log("不知道是啥的h4 ", h4);
  console.log("来源", source);
  console.log("图片列表", imgHtmlArray);
  console.log("文章段落列表", contentHtml);

  /*************正文*********************/
  if (contentHtml) {
    var contents = contentHtml.match(/<p>.*?<\/p>/ig);
    var p = {};
    var text = "";
    var strong = "strong";

    //某些新闻没有正文内容(比如广告,只有一张图片),因此需要判断一下
    if (contents) {
      for (i = 0; i < contents.length; i++) {
        var currentP = contents[i];
        text = currentP.match(/<p>(.*?)<\/p>/i)[1].replace(/(&nbsp;)+/g, '\t');
        if ((text.indexOf('STRONG') != -1) || (text.indexOf('FONT') != -1)) {
          text = text.match(/<strong>(.*?)<\/strong>/i)[1].replace(/(&nbsp;)+/g, '\t');
          contentArray.push({ "text": text, "strong": strong });
        } else {
          contentArray.push({ "text": text });
        }
      }
    }
  }

  articleObj["titleObj"] = {
    title: h1,
    sub: h2,
    quote: h3,
    unknown: h4,
    source: source
  }
  articleObj["imgArray"] = imgArray

  articleObj['contentArray'] = contentArray

  return articleObj
}

module.exports = articleExtract;

修改pages/article/article.js
添加articleExtract函数

var app = getApp();
//article url
var todayDateArray = require('../../utils/util.js').todayDateArray;
var articleExtract = require('../../utils/articleExtract.js')
var baseUri = "http://paper.people.com.cn/rmrb/html"
//拼接url的变量
//...
Page({
  /**
   * 页面的初始数据
   */
  data: {
    articleObj:{}
  },

  //...
  onShow: function () {
   // ...
  },
  //请求文章
  getArticle: function (url, newsHref, pagenum) {
    var self = this;
    var reqObj = { url: url };
    wx.request({
      url: url,
      success:function(res){
        var html = res.data;
        //解析文章html,获取文章标题、内容等相关信息
        var tmpArticleObj = articleExtract(html, newsHref, pagenum);
        console.log("文章解析结果", tmpArticleObj )
        self.setData({
          articleObj: tmpArticleObj,
        });
      }
    });
  },
})

文章解析出来了,下一步就是把他显示出来

显示文章
在article.wxml,把内容简单的显示出来

<!--pages/article/article.wxml-->
<view class="page-contain">
  <view class="article-contain">
    <view class="article-header">
      <view wx:if="{{articleObj.titleObj.introTitle}}" class="header-introtitle">{{articleObj.titleObj.introTitle}}</view>
      <view wx:if="{{articleObj.titleObj.title}}" class="header-title">{{articleObj.titleObj.title}}</view>
      <view wx:if="{{articleObj.titleObj.subTitle}}" class="header-subTitle">{{articleObj.titleObj.subTitle}}</view>
      <view wx:if="{{articleObj.titleObj.authors}}" class="header-authors">{{articleObj.titleObj.authors}}</view>
    </view>
    <view class="article-attachment" wx:if="{{articleObj.imgArray}}">
      <view class="attachment-img" wx:for="{{articleObj.imgArray}}">
        <image src="{{item.imgSrc}}" mode='widthFix'></image>
        <view class="attachment-alt" wx:if="{{item.imgAlt}}">{{item.imgAlt}}</view>
      </view>
    </view>
    <view class="article-content" wx:if="{{articleObj.contentArray}}">
      <view class='content-p {{item.strong}}' wx:for="{{articleObj.contentArray}}">
        <text decode='true'>{{item.text}}</text>
      </view>
    </view>
  </view>
</view>

至此,文章详情算是显示出来了,下一步,继续完善

    原文作者:TAG_WW
    原文地址: https://segmentfault.com/a/1190000015352011
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞