vue学习—Convert HTML string to AST,如何将html字符串转换为ast数组结构

获取html字符串

首先在入口文件处,使用template属性或者el属性获取到需要解析的html字符串

template

1.html字符串,如

Vue.component('alert-box', {
  template: `
    <div class="demo-alert-box">
      <strong>Error!</strong>
      <slot></slot>
    </div>
  `
})

2.如果值以 # 开始,则它将被用作选择符,并使用匹配元素的 innerHTML 作为模板

el

类型:string | Element
通过el获取需要解析的模版

export function query (el: string | Element): Element {
  if (typeof el === 'string') {
    const selected = document.querySelector(el)
    if (!selected) {
      process.env.NODE_ENV !== 'production' && warn(
        'Cannot find element: ' + el
      )
      return document.createElement('div')
    }
    return selected
  } else {
    return el
  }
}

解析html字符串

通过while循环结合正则表达式逐步匹配拆解html字符串

匹配注释与html声明文件

        // Comment:
        // 添加在root元素下面的comment会被忽略
        if (comment.test(html)) {
          const commentEnd = html.indexOf('-->')

          if (commentEnd >= 0) {
            if (options.shouldKeepComment) {
              options.comment(html.substring(4, commentEnd), index, index + commentEnd + 3)
            }
            advance(commentEnd + 3)
            continue
          }
        }

        // http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
        // ie comment
        if (conditionalComment.test(html)) {
          const conditionalEnd = html.indexOf(']>')

          if (conditionalEnd >= 0) {
            advance(conditionalEnd + 2)
            continue
          }
        }

        // Doctype:
        // match 
        const doctypeMatch = html.match(doctype)
        if (doctypeMatch) {
          advance(doctypeMatch[0].length)
          continue
        }

匹配标签起始位置

匹配标签起始位置,startTagOpen匹配到,但是startTagClose匹配失败,那么失败前的html片段就会被抛弃。

const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeLetters}]*`
const qnameCapture = `((?:${ncname}\\:)?${ncname})`
const startTagOpen = new RegExp(`^<${qnameCapture}`)

  /** 解析起始标签,使用正则匹配attrs,并将匹配到的正则数组放到attrs数组里面 */
  function parseStartTag () {
    // 标签名
    const start = html.match(startTagOpen)
    if (start) {
      const match = {
        tagName: start[1],
        attrs: [],
        start: index
      }
      advance(start[0].length)
      let end, attr
      // 解析attr
      while (!(end = html.match(startTagClose)) && (attr = html.match(dynamicArgAttribute) || html.match(attribute))) {
        attr.start = index
        advance(attr[0].length)
        attr.end = index
        match.attrs.push(attr)
      }
      if (end) {
        // 是否匹配到自闭合符号/,匹配到则设置标志属性unarySlash='/'
        match.unarySlash = end[1]
        advance(end[0].length)
        match.end = index
        return match
      }
    }
  }
  • 匹配函数首先匹配标签起始位置,匹配失败则返回进入下一步。
  • 匹配成功则创建match对象,tagName为匹配到的标签名。然后切割html字符串,进行下一步匹配。
  • 通过while循环,匹配起始标签上的attr,动态attr,将匹配到的正则放入attrs数组。
  • 最后匹配起始标签结束符const startTagClose = /^\s*(\/?)>/
  • 匹配成功,则根据是否匹配到自闭合标志/来给unarySlash属性赋值,最后返回match对象,进行下一步处理。

进一步处理macth对象

  /** 解析上一步获取的正则attrs,保存为{name, value}格式,
   * 并且将被浏览器转译的换行或特殊字符或者href里面的换行反转为相应符号,
   * 最后将tagname,attrs等传递给调用函数的start函数 */
  function handleStartTag (match) {
    const tagName = match.tagName
    const unarySlash = match.unarySlash

    if (expectHTML) {
      // 如标题标签,不应该被p标签包裹,如果父级标签是p,则提前闭合这个p标签
      if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
        parseEndTag(lastTag)
      }
      // 如果是可以自闭合的标签,上个标签和现在的标签一样,则闭合上一个标签
      if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
        parseEndTag(tagName)
      }
    }

    const unary = isUnaryTag(tagName) || !!unarySlash

    const l = match.attrs.length
    const attrs = new Array(l)
    for (let i = 0; i < l; i++) {
      const args = match.attrs[i]
      // 优先获取匹配到的第三个正则捕获
      const value = args[3] || args[4] || args[5] || ''
      const shouldDecodeNewlines = tagName === 'a' && args[1] === 'href'
        ? options.shouldDecodeNewlinesForHref
        : options.shouldDecodeNewlines
      attrs[i] = {
        name: args[1],
        value: decodeAttr(value, shouldDecodeNewlines)
      }
      if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
        attrs[i].start = args.start + args[0].match(/^\s*/).length
        attrs[i].end = args.end
      }
    }

    // 非自闭合标签,存入stack数组
    if (!unary) {
      stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs, start: match.start, end: match.end })
      // 1.修改lastTag,保存堆中的最上层数组项
      lastTag = tagName
    }

    // 将【匹配到的元素返回给上一级解析
    if (options.start) {
      options.start(tagName, attrs, unary, match.start, match.end)
    }
  }

start函数解析标签起始对象

createASTElement

export function createASTElement (
  tag: string,
  attrs: Array<ASTAttr>,
  parent: ASTElement | void
): ASTElement {
  return {
    type: 1,
    tag,
    attrsList: attrs,
    attrsMap: makeAttrsMap(attrs),
    rawAttrsMap: {},
    parent,
    children: []
  }
}
start (tag, attrs, unary, start) {
  // check namespace.
  // inherit parent ns if there is one
  const ns = (currentParent && currentParent.ns) || platformGetTagNamespace(tag)

  // handle IE svg bug
  /* istanbul ignore if */
  if (isIE && ns === 'svg') {
    attrs = guardIESVGBug(attrs)
  }

  // 将传入的数据转化成ast对象 type=1
  let element: ASTElement = createASTElement(tag, attrs, currentParent)
  if (ns) {
    element.ns = ns
  }

  if (process.env.NODE_ENV !== 'production') {
    if (options.outputSourceRange) {
      element.start = start
      element.rawAttrsMap = element.attrsList.reduce((cumulated, attr) => {
        cumulated[attr.name] = attr
        return cumulated
      }, {})
    }
    attrs.forEach(attr => {
      if (invalidAttributeRE.test(attr.name)) {
        warn(
          `Invalid dynamic argument expression: attribute names cannot contain ` +
          `spaces, quotes, <, >, / or =.`,
          {
            start: attr.start + attr.name.indexOf(`[`),
            end: attr.start + attr.name.length
          }
        )
      }
    })
  }

  if (isForbiddenTag(element) && !isServerRendering()) {
    element.forbidden = true
    process.env.NODE_ENV !== 'production' && warn(
      'Templates should only be responsible for mapping the state to the ' +
      'UI. Avoid placing tags with side-effects in your templates, such as ' +
      `<${tag}>` + ', as they will not be parsed.',
      { start: element.start }
    )
  }

  // apply pre-transforms 
  // 提前解析 <input :type='type' v-model='input' />
  for (let i = 0; i < preTransforms.length; i++) {
    element = preTransforms[i](element, options) || element
  }

  // v-pre check
  if (!inVPre) {
    processPre(element)
    if (element.pre) {
      inVPre = true
    }
  }
  // pre tag
  if (platformIsPreTag(element.tag)) {
    inPre = true
  }

  // 如果是带有pre属性,跳过解析
  if (inVPre) {
    // el.attrslist => el.attrs
    processRawAttrs(element)
  } else if (!element.processed) {
    // structural directives
    // 解析v-for= “item in items”,生成element.for,element.alias,element.ite
    processFor(element)
    // 解析v-if,v-else-if,v-else;v-if
    processIf(element)
    // element.once v-once用于渲染一次组件
    processOnce(element)
  }

  // 第一个start tag 为root
  if (!root) {
    root = element
    if (process.env.NODE_ENV !== 'production') {
      // 不能使用slot,template,v-for在root上
      checkRootConstraints(root)
    }
  }

  // 非自闭合
  if (!unary) {
    // last <=> currentParent = element []
    currentParent = element
    stack.push(element)
  } else {
    closeElement(element)
  }
}

processIf

/** 如果解析到v-if,给element增加if对象,如果解析到else或者v-else-if,
 * 标记,等到标签闭合的时候做处理。
 */
function processIf (el) {
  const exp = getAndRemoveAttr(el, 'v-if')
  if (exp) {
    el.if = exp
    addIfCondition(el, {
      exp: exp,
      block: el
    })
  } else {
    if (getAndRemoveAttr(el, 'v-else') != null) {
      el.else = true
    }
    const elseif = getAndRemoveAttr(el, 'v-else-if')
    if (elseif) {
      el.elseif = elseif
    }
  }
}

解析纯文本

let text, rest, next
// 包含 < plain text的情况
if (textEnd >= 0) {
  rest = html.slice(textEnd) // 包含匹配到的<
  // 直到匹配到下一个结束、起始、注释、条件注释为止
  while (
    !endTag.test(rest) &&
    !startTagOpen.test(rest) &&
    !comment.test(rest) &&
    !conditionalComment.test(rest)
  ) {
    // < in plain text, be forgiving and treat it as text
    // 文本里面的<作为普通字符,匹配下一个<
    next = rest.indexOf('<', 1)
    if (next < 0) break
    textEnd += next
    rest = html.slice(textEnd)
  }
  // 获取匹配到的纯文本
  text = html.substring(0, textEnd)
}

if (textEnd < 0) {
  text = html
}

if (text) {
  advance(text.length)
}

// 处理纯文本
if (options.chars && text) {
  options.chars(text, index - text.length, index)
}

chars函数处理文本

chars (text: string, start: number, end: number) {
  // 父元素外面的text,或者template是纯text,都被被忽略
  if (!currentParent) {
    if (process.env.NODE_ENV !== 'production') {
      if (text === template) {
        warnOnce(
          'Component template requires a root element, rather than just text.',
          { start }
        )
      } else if ((text = text.trim())) {
        warnOnce(
          `text "${text}" outside root element will be ignored.`,
          { start }
        )
      }
    }
    return
  }
  // IE textarea placeholder bug
  /* istanbul ignore if */
  if (isIE &&
    currentParent.tag === 'textarea' &&
    currentParent.attrsMap.placeholder === text
  ) {
    return
  }
  const children = currentParent.children
  if (inPre || text.trim()) {
    text = isTextTag(currentParent) ? text : decodeHTMLCached(text) // 转译html里面的特殊字符
  } else if (!children.length) {
    // remove the whitespace-only node right after an opening tag
    text = ''
  } else if (whitespaceOption) {
    if (whitespaceOption === 'condense') {
      // in condense mode, remove the whitespace node if it contains
      // line break, otherwise condense to a single space
      text = lineBreakRE.test(text) ? '' : ' '
    } else {
      text = ' '
    }
  } else {
    text = preserveWhitespace ? ' ' : ''
  }
  if (text) {
    if (whitespaceOption === 'condense') {
      // condense consecutive whitespaces into single space
      text = text.replace(whitespaceRE, ' ')
    }
    let res
    let child: ?ASTNode
    if (!inVPre && text !== ' ' && (res = parseText(text, delimiters))) {
      // 含有动态文本文本
      child = {
        type: 2,
        expression: res.expression,
        tokens: res.tokens,
        text
      }
    } else if (text !== ' ' || !children.length || children[children.length - 1].text !== ' ') {
      // 填入text(不含动态文本),或者填入单一一个' '(有则不再填入
      child = {
        type: 3,
        text
      }
    }
    if (child) {
      if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
        child.start = start
        child.end = end
      }
      // 将文本加入父元素的children
      children.push(child)
    }
  }
}

parseText

// preText?_s(_f(text))+lastText?
// 纯文本返回undefined
export function parseText (
  text: string,
  delimiters?: [string, string]
): TextParseResult | void {
  const tagRE = delimiters ? buildRegex(delimiters) : defaultTagRE
  if (!tagRE.test(text)) {
    return
  }
  const tokens = []
  const rawTokens = []
  let lastIndex = tagRE.lastIndex = 0
  let match, index, tokenValue
  while ((match = tagRE.exec(text))) {
    index = match.index
    // push text token
    if (index > lastIndex) {
      rawTokens.push(tokenValue = text.slice(lastIndex, index))
      tokens.push(JSON.stringify(tokenValue))
    }
    // tag token
    const exp = parseFilters(match[1].trim())
    tokens.push(`_s(${exp})`)
    rawTokens.push({ '@binding': exp })
    lastIndex = index + match[0].length
  }
  if (lastIndex < text.length) {
    rawTokens.push(tokenValue = text.slice(lastIndex))
    tokens.push(JSON.stringify(tokenValue))
  }
  return {
    expression: tokens.join('+'),
    tokens: rawTokens
  }
}

解析标签字符串

// End tag:
// stack为空匹配到的结尾标签,除了</br>, </p>都会被忽略
const endTagMatch = html.match(endTag)
if (endTagMatch) {
  const curIndex = index
  advance(endTagMatch[0].length)
  // 匹配到的正则,start,end
  parseEndTag(endTagMatch[1], curIndex, index)
  continue
}

parseEndTag

 function parseEndTag (tagName, start, end) {
    let pos, lowerCasedTagName
    if (start == null) start = index
    if (end == null) end = index

    // Find the closest opened tag of the same type
    if (tagName) {
      lowerCasedTagName = tagName.toLowerCase()
      for (pos = stack.length - 1; pos >= 0; pos--) {
        if (stack[pos].lowerCasedTag === lowerCasedTagName) {
          break
        }
      }
    } else {
      // If no tag name is provided, clean shop
      pos = 0
    }

    if (pos >= 0) {
      // Close all the open elements, up the stack,关闭匹配到的标签之后的所有未闭合标签
      for (let i = stack.length - 1; i >= pos; i--) {
        if (process.env.NODE_ENV !== 'production' &&
          (i > pos || !tagName) &&
          options.warn
        ) {
          options.warn(
            `tag <${stack[i].tag}> has no matching end tag.`,
            { start: stack[i].start }
          )
        }
        if (options.end) {
          options.end(stack[i].tag, start, end)
        }
      }

      // Remove the open elements from the stack
      stack.length = pos
      lastTag = pos && stack[pos - 1].tag
    } else if (lowerCasedTagName === 'br') {
      if (options.start) {
        options.start(tagName, [], true, start, end)
      }
    } else if (lowerCasedTagName === 'p') {
      // 左边自闭合,如果是p标签,辅助闭合
      if (options.start) {
        options.start(tagName, [], false, start, end)
      }
      if (options.end) {
        options.end(tagName, start, end)
      }
    }
  }

end

  end (tag, start, end) {
  const element = stack[stack.length - 1]
  // pop stack
  stack.length -= 1
  currentParent = stack[stack.length - 1]
  if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
    element.end = end
  }
  closeElement(element)
}

closeElement

function closeElement (element) {
    // 清除空的子节点
    trimEndingWhitespace(element)
    if (!inVPre && !element.processed) {
      element = processElement(element, options)
    }
    // tree management 元素闭合之后,stack为空,且该元素不为root,则查看该元素是否有if或者else-if标签
    // 有,则给root增加if对象,无则抛弃
    if (!stack.length && element !== root) {
      // allow root elements with v-if, v-else-if and v-else
      if (root.if && (element.elseif || element.else)) {
        if (process.env.NODE_ENV !== 'production') {
          checkRootConstraints(element)
        }
        addIfCondition(root, {
          exp: element.elseif,
          block: element
        })
      } else if (process.env.NODE_ENV !== 'production') {
        warnOnce(
          `Component template should contain exactly one root element. ` +
          `If you are using v-if on multiple elements, ` +
          `use v-else-if to chain them instead.`,
          { start: element.start }
        )
      }
    }

    // 根元素下面的标签,闭合后,作为父元素的child插入
    if (currentParent && !element.forbidden) {
      if (element.elseif || element.else) {
        // 存在v-else-if,或者v-else,与同级v-if元素绑定
        processIfConditions(element, currentParent)
      } else {
        if (element.slotScope) {
          // scoped slot
          // keep it in the children list so that v-else(-if) conditions can
          // find it as the prev node.
          const name = element.slotTarget || '"default"';
          (currentParent.scopedSlots || (currentParent.scopedSlots = {}))[name] = element
        }
        currentParent.children.push(element)
        element.parent = currentParent
      }
    }

    // final children cleanup
    // filter out scoped slots
    element.children = element.children.filter(c => !(c: any).slotScope)
    // remove trailing whitespace node again
    trimEndingWhitespace(element)

    // check pre state
    if (element.pre) {
      inVPre = false
    }
    if (platformIsPreTag(element.tag)) {
      inPre = false
    }
    // apply post-transforms
    for (let i = 0; i < postTransforms.length; i++) {
      postTransforms[i](element, options)
    }
  }

最后解析出来的ast数组大致如下:

《vue学习—Convert HTML string to AST,如何将html字符串转换为ast数组结构》

    原文作者:吴霸霸
    原文地址: https://segmentfault.com/a/1190000018277868
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞