javascript – Nearley语法在某些条件下多次识别相同的非终端符号

2023年1月28日 238次阅读

鉴于以下nearley代码：

   @builtin "whitespace.ne"

   @{%

   let numberedParams = {
       3: 45
   };

   const lexer = require("moo").compile({
       comment: /\(.*?\)/,
       expstart: /\[/,
       expend: /\]/,
       paramstart: '#',
       equals: "=",
       operator: /\*\*|\+|\-|\*|\/|OR|XOR|AND|MOD|EQ|NE|GT|GE|LT|LE/,
       function: ['ATAN','ABS','ACOS','ASIN','COS','EXP','FIX','FUP','ROUND','LN','SIN','SQRT','TAN','EXISTS'],
       linenumber_command: 'N',
       command: /[ABCDFGHIJKLMPQRSTUVWXYZ]/,
       float: /[0-9]*\.[0-9]+/,
       int: /[0-9]+/,
       ws: /[ \t]+/,
       EOL: {match: /(?:\r\n?|\n)/, lineBreaks: true }
   });

   // Converts from degrees to radians.
   Math.radians = function(degrees) {
     return degrees * Math.PI / 180;
   };

   // Converts from radians to degrees.
   Math.degrees = function(radians) {
     return radians * 180 / Math.PI;
   };

   function empty(d) {
       return null;
   }

   function logid(prefix) {
       return function (d) {
           console.log(prefix, d);
           return d;
       };
   }

   // Appends to list
   function append(d) {
       if (Array.isArray(d[0])) {
           return d[0].concat(d[2]);
       }
       return [d[0], d[2]];
   }

   function getparam(d) {
       return numberedParams[String(d[1])] | 0.0;
   }

   // Returns the result of one parsed line
   function processLine(d) {

       logid("processLine")(d);

       if (d[0] != null) {
           return [];
       }

       if (d[2] != null) {
           return [d[2]].concat(d[4]);
       }

       if (! Array.isArray(d[4])) {
           return [d[4]];
       }

       return d[4];
   }


   %}

   @lexer lexer

   line ->
       block_delete:? _ linenumber:? _ line_items EOL {% processLine %}

   block_delete ->
       "/" {% id %}

   linenumber ->
       %linenumber_command int_or_float {% function (d) { return {command: d[0].value, value: d[1]}; } %}

   line_items ->
       line_item {% id %}
       | line_items _ line_item {% append %}

   line_item ->
       comment {% id %}
       | parameter_setting {% id %}
       | word {% id %}

   word ->
       %command _ number {% function (d) { return {command: d[0].value, value: d[2]}; } %}

   parameter_start ->
       %paramstart {% id %}

   parameter_setting ->
       parameter_start parameter_index _ %equals _ number {% function (d) { return {command: d[0].value + d[1], value: d[5]}; }  %}

   comment ->
       %comment {% function (d) { return {command: 'COMMENT', value: d[0].value}; } %}

   gcode_expression ->
       "[" _ expression _ "]" {% (d) => d[2] %}

   expression ->
       logical_expression {% id %}

   logical_expression ->
       comparative_expression {% id %}
       | logical_expression _ "AND" _ comparative_expression {% (d) => d[0] && d[4] %}
       | logical_expression _ "OR" _ comparative_expression {% (d) => d[0] || d[4] %}
       | logical_expression _ "XOR" _ comparative_expression {% (d) => ((d[0] && !d[4]) || (!d[0] && d[4])) %}

   comparative_expression ->
       additive_expression {% id %}
       | comparative_expression _ "EQ" _ additive_expression {% (d) => d[0] == d[4] %}
       | comparative_expression _ "NE" _ additive_expression {% (d) => d[0] != d[4] %}
       | comparative_expression _ "GT" _ additive_expression {% (d) => d[0] > d[4] %}
       | comparative_expression _ "GE" _ additive_expression {% (d) => d[0] >= d[4] %}
       | comparative_expression _ "LT" _ additive_expression {% (d) => d[0] < d[4] %}
       | comparative_expression _ "LE" _ additive_expression {% (d) => d[0] <= d[4] %}

   additive_expression ->
       multiplicative_expression {% id %}
       | additive_expression _ "+"  _ multiplicative_expression {% (d) => d[0] + d[4] %}
       | additive_expression _ "-"  _ multiplicative_expression {% (d) => d[0] - d[4] %}

   multiplicative_expression ->
       power_expression {% id %}
       | multiplicative_expression _ "*"  _ power_expression {% (d) => d[0] * d[4] %}
       | multiplicative_expression _ "/"  _ power_expression {% (d) => d[0] / d[4] %}
       | multiplicative_expression _ "MOD"  _ power_expression {% (d) => d[0] % d[4] %}

   power_expression ->
       function_expression {% id %}
       | power_expression _ "**"  _ function_expression {% (d) => Math.pow(d[0], d[4]) %}

   function_expression ->
       number {% id %}
       | "ATAN" _ gcode_expression _ "/" _ gcode_expression {% (d) => Math.degrees(Math.atan(d[2], d[6])) %}
       | "ABS" _ gcode_expression {% (d) => Math.abs(d[2]) %}
       | "ACOS" _ gcode_expression {% (d) => Math.degrees(Math.acos(d[2])) %}
       | "ASIN" _ gcode_expression {% (d) => Math.degrees(Math.asin(d[2])) %}
       | "COS" _ gcode_expression {% (d) => Math.cos(Math.radians(d[2])) %}
       | "EXP" _ gcode_expression {% (d) => Math.exp(d[2]) %}
       | "FIX" _ gcode_expression {% (d) => Math.floor(d[2]) %}
       | "FUP" _ gcode_expression {% (d) => Math.ceil(d[2]) %}
       | "ROUND" _ gcode_expression {% (d) => Math.round(d[2]) %}
       | "LN" _ gcode_expression {% (d) => Math.log(d[2]) %}
       | "SIN" _ gcode_expression {% (d) => Math.sin(Math.radians(d[2])) %}
       | "SQRT" _ gcode_expression {% (d) => Math.sqrt(d[2]) %}
       | "TAN" _ gcode_expression {% (d) => Math.tan(d[2]) %}
       | "EXISTS" _ gcode_expression {% (d) => Math.cos(d[2]) %}

   number ->
       primary {% id %}
       | _ "-" number {% (d) => d[2] * -1 %}
       | _ "+" number {% (d) => d[2] * 1 %}

   primary ->
       int_or_float {% id %}
       | gcode_expression {% id %}
       | parameter_expression {% id %}

   parameter_expression ->
       parameter_start parameter_index {% getparam %}

   parameter_index ->
       int {% id %}
       | gcode_expression {% id %}
       | parameter_expression {% id %}

   int ->
       %int {% (d) => parseInt(d[0]) %}

   int_or_float ->
       int {% id %}
       | %float {% (d) => parseFloat(d[0]) %}

   EOL ->
       %EOL {% empty %}

当我运行nearley-test来测试我的编译解析器时,我得到以下结果：

命令：

printf "(Commentwithoutspaces)X[2**[2+5]]\n" | nearley-test gcode.js -q

结果：

processLine [ null,
  null,
  null,
  null,
  [ { command: 'COMMENT', value: '(Commentwithoutspaces)' },
    { command: 'X', value: 128 } ],
  null ]

到目前为止这么好,接下来的测试：

printf "(Comment with spaces)X[2**[2+5]]\n" | nearley-test gcode.js -q

结果：

processLine [ null, null, null, null, { command: 'X', value: 128 }, null ]
processLine [ null, null, null, null, { command: 'X', value: 128 }, null ]
processLine [ null,
  null,
  null,
  null,
  [ { command: 'COMMENT', value: '(Comment with spaces)' },
    { command: 'X', value: 128 } ],
  null ]

看起来它在没有注释的情况下两次识别X命令,然后一次用注释识别.只有当评论中有空格时才会发生这种情况,这让我很困惑……

现在,如果我使用包含行尾的空格的注释来测试它,我得到：

printf "X[2**[2+5]](Comment with spaces)\n" | nearley-test gcode.js -q

结果：

processLine [ null,
  null,
  null,
  null,
  [ { command: 'X', value: 128 },
    { command: 'COMMENT', value: '(Comment with spaces)' } ],
  null ]

所以似乎错误只发生在我用包含空格的注释开始这一行时,这很奇怪……

有谁知道我能做些什么来获得更一致的行为？我的意思是我可以做些什么来使解析器识别命令和每次出现的注释一次,而不管注释中的空格如何,并且无论注释是在线上的其他项之前还是之后？

最佳答案我发现,如果我从非终端命名行中删除了可选空格并将它们添加到“低级别”非终端,则它按预期工作.改变后的非终端现在看起来像这样：

line ->
    block_delete:? linenumber:? line_items EOL {% processLine %}

linenumber ->
    _ %linenumber_command int_or_float _ {% function (d) { return {command: d[1].value, value: d[2]}; } %}