complier-core 编译器内核

作者: copyLeft | 来源:发表于2020-12-28 22:51 被阅读0次

complier-core 编译器内核
32_Linux内核链表剖析
Nginx安装和命令
关于STL与泛型编程学习感想二（博览网）
Pixel修改kernel内核调试
ubuntu nginx源码编译升级
挂载nfs文件系统问题及解决
6.828 操作系统 lab1: 阅读boot.S
qemu环境搭建
PHP8中设置开启JIT

parse.ts 转换器

将字符模板转换为节点树

数据定义

// 主要方法以及配置


// 属性对象
type AttributeValue =
  | {
      content: string
      isQuoted: boolean
      loc: SourceLocation
    }
  | undefined


// 需要处理为字符的转义符
const decodeRE = /&(gt|lt|amp|apos|quot);/g
// 转义符映射
const decodeMap: Record<string, string> = {
  gt: '>',
  lt: '<',
  amp: '&',
  apos: "'",
  quot: '"'
}
// 默认上下文属性
export const defaultParserOptions: MergedParserOptions = {
  delimiters: [`{{`, `}}`], // 动态数据分界符
  getNamespace: () => Namespaces.HTML, // 
  getTextMode: () => TextModes.DATA, // 
  isVoidTag: NO, // 是否空标签
  isPreTag: NO, // 是否预处理标签
  isCustomElement: NO, // 是否自定义元素
  decodeEntities: (rawText: string): string =>
    rawText.replace(decodeRE, (_, p1) => decodeMap[p1]), // 转译
  onError: defaultOnError, // 抛出错误 默认：throw(error)
  comments: false // 是否为注释,（这里命名有点不同的🤓）
}
// 文本类型
export const enum TextModes {
  //          | Elements | Entities | End sign              | Inside of
  DATA, //    | ✔        | ✔        | End tags of ancestors |
  RCDATA, //  | ✘        | ✔        | End tag of the parent | <textarea>
  RAWTEXT, // | ✘        | ✘        | End tag of the parent | <style>,<script>
  CDATA,
  ATTRIBUTE_VALUE
}
// 编译上下文对象
export interface ParserContext {
  options: MergedParserOptions
  readonly originalSource: string // 只读模板
  source: string // 源模板
  offset: number // 偏移量
  line: number // 行号
  column: number // 列号
  inPre: boolean // HTML <pre> tag, preserve whitespaces
  inVPre: boolean // v-pre, do not process directives and interpolations
}
// 标签类型
const enum TagType {
  Start, // 起始标签
  End // 结束标签
}
// 指令映射查询, 判断字符是否为指令字段
const isSpecialTemplateDirective = /*#__PURE__*/ makeMap(
  `if,else,else-if,for,slot`
)
// => key => !!({ if:true, else:true, else-if:true, for:true, slot:true }[key]

方法

// 编译入口
baseParse(){}
// 创建编译上下文
createParserContext(){}
// 编译子标签
parseChildren(){}
// 加入节点队列，优化函数
pushNode(){}
// 生成CDATA节点: TextModes.CDATA
parseCDATA(){}
// 生成注释节点：NodeTypes.COMMENT
parseComment(){}
// 生成虚假注释节点:  NodeTypes.COMMENT
parseBogusComment(){} 
// 生成标签节点
parseTag(){}
// 生成属性列表
parseAttributes(){}
// 生成属性节点
parseAttribute(){}
// 提取属性值
parseAttributeValue(){}
// 生成动态数据节点 "{{ data }}"
parseInterpolation() {}
// 生成文本节点
parseText(){}
// 提取文本数据 "name}}" => { type: NodeType.TEXT, content: 'name', loc: SourceLocation }
parseTextData(){}
// 获取读头位置
getCursor(){}
// 生成选取对象
getSelection(){}
// 获取队列末尾项
last()[]
// 字符开头校验
startsWith(){}
// 读头前进
advanceBy(){}
// 跳过空行
advanceSpaces(){}
// 生成新位置信息对象
getNewPosition(){}
// 生成错误位置等信息，并抛出
emitError() {}
// 是否读取结束
isEnd(){}
// 是否以结束标签开头
startsWithEndTagOpen(){}

编译入口, 创建根节点

export function baseParse(
  content: string,
  options: ParserOptions = {}
): RootNode {
  // 创建上下文
  const context = createParserContext(content, options)
  // 获取标签位置信息
  const start = getCursor(context)
  // 创建根节点
  return createRoot(
    // 子节点编译
    parseChildren(context, TextModes.DATA, []),
    // 生成节点信息，{ start: 起始位置, end: 结束位置， source: 标签内容 }
    // 这个函数需要在 parseChildren 完成后调用，此时才能获取到 end 的位置
    getSelection(context, start)
  )
}


// 上下文对象
function createParserContext(
  content: string,
  rawOptions: ParserOptions
): ParserContext {
  const options = extend({}, defaultParserOptions)
  // 合并默认配置
  for (const key in rawOptions) {
    // @ts-ignore
    options[key] = rawOptions[key] || defaultParserOptions[key]
  }
  return {
    options,
    column: 1, // 列位置
    line: 1, // 行位置
    offset: 0, // 偏移量
    originalSource: content, 
    source: content,
    inPre: false, // 是否为html
    inVPre: false // 是否 v-pre
  }
} 


// 返回的节点结构
export function createRoot(
  children: TemplateChildNode[],
  loc = locStub
): RootNode {
  return {
    type: NodeTypes.ROOT, // 节点类型
    children, // 子节点队列
    helpers: [], // 
    components: [], // 组件队列
    directives: [], // 指令
    hoists: [], // 
    imports: [],  // 引入路径, 动态租金啊
    cached: 0, // 缓存
    temps: 0, // 模板
    codegenNode: undefined,
    loc // 字符原copy
  }

节点编译

function parseChildren(
  context: ParserContext,
  mode: TextModes,
  ancestors: ElementNode[]
): TemplateChildNode[] {
  
  // 父节点队列中最后一位，为当前节点父节点
  const parent = last(ancestors)
  // 节点类型
  const ns = parent ? parent.ns : Namespaces.HTML
  // 子节点收集器
  const nodes: TemplateChildNode[] = []


  // 是否遍历结束
  while (!isEnd(context, mode, ancestors)) {


    __TEST__ && assert(context.source.length > 0)
    const s = context.source
    let node: TemplateChildNode | TemplateChildNode[] | undefined = undefined


    //  TextModes.DATA / TextModes.RCDATA 模式
    if (mode === TextModes.DATA || mode === TextModes.RCDATA) {
      // 非 v-pre 节点, 以 '{{' 开头
      if (!context.inVPre && startsWith(s, context.options.delimiters[0])) {
        // '{{'
        node = parseInterpolation(context, mode)
        
        // 标签开头 '<'
      } else if (mode === TextModes.DATA && s[0] === '<') {
        // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
        // 单 < 报错
        if (s.length === 1) {
          emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 1)
        } else if (s[1] === '!') {
          // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
          if (startsWith(s, '<!--')) {
            // 生成注释节点
            node = parseComment(context)
          } else if (startsWith(s, '<!DOCTYPE')) {
            // 生成文档节点
            node = parseBogusComment(context)
          } else if (startsWith(s, '<![CDATA[')) {
            // xml 节点
            if (ns !== Namespaces.HTML) {
              node = parseCDATA(context, ancestors)
            } else {
              emitError(context, ErrorCodes.CDATA_IN_HTML_CONTENT)
              node = parseBogusComment(context)
            }
          } else {
            emitError(context, ErrorCodes.INCORRECTLY_OPENED_COMMENT)
            node = parseBogusComment(context)
          }
        } else if (s[1] === '/') {
          // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
          if (s.length === 2) {
            // </ 错误标签
            emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 2)
          } else if (s[2] === '>') {
            // </> 丢失标签名
            emitError(context, ErrorCodes.MISSING_END_TAG_NAME, 2)
            advanceBy(context, 3)
            continue
          } else if (/[a-z]/i.test(s[2])) {
            // </tag> 错误的自闭合标签
            emitError(context, ErrorCodes.X_INVALID_END_TAG)
            parseTag(context, TagType.End, parent)
            continue
          } else {
            emitError(
              context,
              ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
              2
            )
            node = parseBogusComment(context)
          }
        } else if (/[a-z]/i.test(s[1])) {
          // 正确标签解析
          node = parseElement(context, ancestors)
        } else if (s[1] === '?') {
          // <? 问号标签
          emitError(
            context,
            ErrorCodes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
            1
          )
          node = parseBogusComment(context)
        } else {
          // 其他错误标签
          emitError(context, ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME, 1)
        }
      }
    }


    // 非节点类型，生成文本节点
    if (!node) {
      node = parseText(context, mode)
    }
      
    // 将子节点加入队列
    if (isArray(node)) {
      for (let i = 0; i < node.length; i++) {
        pushNode(nodes, node[i])
      }
    } else {
      pushNode(nodes, node)
    }
  }


  // Whitespace management for more efficient output
  // (same as v2 whitespace: 'condense')
  let removedWhitespace = false


  // 非行文字节点, 替换或去除空格节点
  if (mode !== TextModes.RAWTEXT) {
     ....
  }


  return removedWhitespace ? nodes.filter(Boolean) : n

// parseChildren 伪代码
while( !isEnd() // 是否遍历结束 ){
  // TextModes.DATA | TextModes.RCDATA
  // 如果为 DATA 或 RCDATA 类型，做标签解析
  if (mode === TextModes.DATA || mode === TextModes.RCDATA) {
    // 如果 非 v-pre '{{' 开头, 认为是动态插入值, 例如: {{ data }}
    if (!context.inVPre && startsWith(s, context.options.delimiters[0])) {
       node = parseInterpolation(context, mode)  
    }else if (mode === TextModes.DATA && s[0] === '<'){
      // DATA 类型 以 '<' 开头的标签
      // 以下为各种标签情况的判断
      // 1: '<' 报错： EOF_BEFORE_TAG_NAME
      // 2: '<!' 可能为文档标签类型, 向下查询判断
          // 2-1: '<!--'  注释节点
          // 2-2: '<!DOCTYPE'  html 文档
          // 2-3: '<![CDATA[' xml 文档
          // 2-4: 其他无法识别的节点,  将剔除问题内容，生成虚假的注释节点, parseBogusComment
      // 3: '</' 错误开始标签处理
          // 3-1: '</' 报错提示 EOF_BEFORE_TAG_NAME
          // 3-2: '</>' 提出问题标签, 报错提示： EOF_BEFORE_TAG_NAME
          // 3-3: '</tagName>'  报错提示: X_INVALID_END_TAG 尝试作为闭合标签解析 -> parseTag(context, TagType.End, parent)
      // 4: '<?' 报错提示: INVALID_FIRST_CHARACTER_OF_TAG_NAME    
    }
    // 如果以上未生成节点，将作为文本节点处理
    if (!node) {
      node = parseText(context, mode)
    }
    // 节点加入队列
    if (isArray(node)) {
      for (let i = 0; i < node.length; i++) {
        pushNode(nodes, node[i])
      }
    } else {
      // 这里对文本节点做了优化，如果上一节点也为文本节点，NodeTypes.TEXT，将合并两个节点
      pushNode(nodes, node)
    }
    // 空格节点的处理
    let removedWhitespace = false
    if (mode !== TextModes.RAWTEXT) { ... }
    // 返回节点树
    return removedWhitespace ? nodes.filter(Boolean) : nodes
  }
}

complier-core 编译器内核
parse.ts 转换器将字符模板转换为节点树数据定义方法编译入口, 创建根节点节点编译
32_Linux内核链表剖析
关键词： 0. 课程目标移植Linux内核链表，使其适用于非GNU编译器分析Linux内核中链表的基本实现 1...
Nginx安装和命令
1.安装环境用 uname -a 查看linux 内核是否高于2.6 gcc编译器GCC（GNU Comp...
关于STL与泛型编程学习感想二（博览网）
体系结构与内核分析本机使用了两个编译器 Visual C++ 6.0标准库位于：...\Program File...
Pixel修改kernel内核调试
下载内核源码准备环境需要Android源码下的交叉编译器，在Android系统源码下修改系统调用修改sysca...
ubuntu nginx源码编译升级
一、在安装之前，请确认已经安装以下依赖包： epoll，linux内核版本为2.6或者以上 gcc编译器，g++编...
挂载nfs文件系统问题及解决
在启动linux内核过程，挂载nfs文件系统时，出现下图所示错误。解决方法：将交叉编译器 “arm-corte...
6.828 操作系统 lab1: 阅读boot.S
补充知识补充1. AT&T汇编由于内核代码采用的gcc编译器使用AT&T的汇编格式，首先补充下关于AT&T汇编...
qemu环境搭建
1. 安装qemu 2. 安装交叉编译器 3.下载并编译gdb 4.下载并编译linux内核源码 5.下载并编译b...
PHP8中设置开启JIT
PHP8.0.0于2020年11月26日发布了PHP 8在PHP的内核中添加了JIT编译器，可以极大地提高性能。更...