Compiler/syntax analyzer: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) m (Reverted edits by Jwells1213 (talk) to last revision by Chemoelectric) |
Thundergnat (talk | contribs) m (syntax highlighting fixup automation) |
||
Line 21: | Line 21: | ||
[https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form Extended Backus-Naur Form (EBNF)]: |
[https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form Extended Backus-Naur Form (EBNF)]: |
||
<syntaxhighlight lang="ebnf"> |
|||
<lang EBNF> |
|||
stmt_list = {stmt} ; |
stmt_list = {stmt} ; |
||
Line 47: | Line 47: | ||
| '(' expr ')' |
| '(' expr ')' |
||
| ('+' | '-' | '!') primary |
| ('+' | '-' | '!') primary |
||
;</ |
;</syntaxhighlight> |
||
The resulting AST should be formulated as a Binary Tree. |
The resulting AST should be formulated as a Binary Tree. |
||
Line 68: | Line 68: | ||
|- |
|- |
||
| style="vertical-align:top" | |
| style="vertical-align:top" | |
||
< |
<syntaxhighlight lang="c">count = 1; |
||
while (count < 10) { |
while (count < 10) { |
||
print("count is: ", count, "\n"); |
print("count is: ", count, "\n"); |
||
count = count + 1; |
count = count + 1; |
||
}</ |
}</syntaxhighlight> |
||
| style="vertical-align:top" | |
| style="vertical-align:top" | |
||
Line 280: | Line 280: | ||
[https://en.wikipedia.org/wiki/Recursive_descent_parser Recursive Descent] for statement parsing. The AST is also built: |
[https://en.wikipedia.org/wiki/Recursive_descent_parser Recursive Descent] for statement parsing. The AST is also built: |
||
< |
<syntaxhighlight lang="python">def expr(p) |
||
if tok is "(" |
if tok is "(" |
||
x = paren_expr() |
x = paren_expr() |
||
Line 364: | Line 364: | ||
t = make_node(Sequence, t, stmt()) |
t = make_node(Sequence, t, stmt()) |
||
until tok is end-of-file |
until tok is end-of-file |
||
return t</ |
return t</syntaxhighlight> |
||
;Once the AST is built, it should be output in a [[Flatten_a_list|flattened format.]] This can be as simple as the following: |
;Once the AST is built, it should be output in a [[Flatten_a_list|flattened format.]] This can be as simple as the following: |
||
< |
<syntaxhighlight lang="python">def prt_ast(t) |
||
if t == NULL |
if t == NULL |
||
print(";\n") |
print(";\n") |
||
Line 378: | Line 378: | ||
print("\n") |
print("\n") |
||
prt_ast(t.left) |
prt_ast(t.left) |
||
prt_ast(t.right)</ |
prt_ast(t.right)</syntaxhighlight> |
||
;If the AST is correctly built, loading it into a subsequent program should be as simple as: |
;If the AST is correctly built, loading it into a subsequent program should be as simple as: |
||
< |
<syntaxhighlight lang="python">def load_ast() |
||
line = readline() |
line = readline() |
||
# Each line has at least one token |
# Each line has at least one token |
||
Line 402: | Line 402: | ||
left = load_ast() |
left = load_ast() |
||
right = load_ast() |
right = load_ast() |
||
return make_node(node_type, left, right)</ |
return make_node(node_type, left, right)</syntaxhighlight> |
||
Finally, the AST can also be tested by running it against one of the AST Interpreter [[Compiler/AST_interpreter|solutions]]. |
Finally, the AST can also be tested by running it against one of the AST Interpreter [[Compiler/AST_interpreter|solutions]]. |
||
Line 415: | Line 415: | ||
|- |
|- |
||
| style="vertical-align:top" | |
| style="vertical-align:top" | |
||
< |
<syntaxhighlight lang="c">/* |
||
Simple prime number generator |
Simple prime number generator |
||
*/ |
*/ |
||
Line 434: | Line 434: | ||
} |
} |
||
} |
} |
||
print("Total primes found: ", count, "\n");</ |
print("Total primes found: ", count, "\n");</syntaxhighlight> |
||
| style="vertical-align:top" | |
| style="vertical-align:top" | |
||
Line 654: | Line 654: | ||
=={{header|ALGOL W}}== |
=={{header|ALGOL W}}== |
||
< |
<syntaxhighlight lang="algolw">begin % syntax analyser % |
||
% parse tree nodes % |
% parse tree nodes % |
||
record node( integer type |
record node( integer type |
||
Line 1,041: | Line 1,041: | ||
readToken; |
readToken; |
||
writeNode( parseStatementList( tEnd_of_input ) ) |
writeNode( parseStatementList( tEnd_of_input ) ) |
||
end.</ |
end.</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Output from parsing the Prime Numbers example program. |
Output from parsing the Prime Numbers example program. |
||
Line 1,144: | Line 1,144: | ||
=={{header|ATS}}== |
=={{header|ATS}}== |
||
< |
<syntaxhighlight lang="ats">(********************************************************************) |
||
(* Usage: parse [INPUTFILE [OUTPUTFILE]] |
(* Usage: parse [INPUTFILE [OUTPUTFILE]] |
||
If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input |
If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input |
||
Line 2,074: | Line 2,074: | ||
end |
end |
||
(********************************************************************)</ |
(********************************************************************)</syntaxhighlight> |
||
Line 2,177: | Line 2,177: | ||
=={{header|AWK}}== |
=={{header|AWK}}== |
||
Tested with gawk 4.1.1 and mawk 1.3.4. |
Tested with gawk 4.1.1 and mawk 1.3.4. |
||
<syntaxhighlight lang="awk"> |
|||
<lang AWK> |
|||
function Token_assign(tk, attr, attr_array, n, i) { |
function Token_assign(tk, attr, attr_array, n, i) { |
||
n=split(attr, attr_array) |
n=split(attr, attr_array) |
||
Line 2,473: | Line 2,473: | ||
prt_ast(t) |
prt_ast(t) |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out|case=count}} |
{{out|case=count}} |
||
Line 2,514: | Line 2,514: | ||
=={{header|C}}== |
=={{header|C}}== |
||
Tested with gcc 4.81 and later, compiles warning free with -Wall -Wextra |
Tested with gcc 4.81 and later, compiles warning free with -Wall -Wextra |
||
< |
<syntaxhighlight lang="c">#include <stdio.h> |
||
#include <stdlib.h> |
#include <stdlib.h> |
||
#include <string.h> |
#include <string.h> |
||
Line 2,854: | Line 2,854: | ||
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : ""); |
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : ""); |
||
prt_ast(parse()); |
prt_ast(parse()); |
||
}</ |
}</syntaxhighlight> |
||
{{out|case=prime numbers AST}} |
{{out|case=prime numbers AST}} |
||
Line 2,960: | Line 2,960: | ||
Code by Steve Williams. Tested with GnuCOBOL 2.2. |
Code by Steve Williams. Tested with GnuCOBOL 2.2. |
||
< |
<syntaxhighlight lang="cobol"> >>SOURCE FORMAT IS FREE |
||
identification division. |
identification division. |
||
*> this code is dedicated to the public domain |
*> this code is dedicated to the public domain |
||
Line 3,565: | Line 3,565: | ||
. |
. |
||
end program printast. |
end program printast. |
||
end program parser.</ |
end program parser.</syntaxhighlight> |
||
{{out|case=Primes}} |
{{out|case=Primes}} |
||
Line 3,673: | Line 3,673: | ||
< |
<syntaxhighlight lang="lisp">#!/bin/sh |
||
#|-*- mode:lisp -*-|# |
#|-*- mode:lisp -*-|# |
||
#| |
#| |
||
Line 4,084: | Line 4,084: | ||
(uiop:quit 0))) |
(uiop:quit 0))) |
||
;;; vim: set ft=lisp lisp:</ |
;;; vim: set ft=lisp lisp:</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 4,274: | Line 4,274: | ||
=={{header|Forth}}== |
=={{header|Forth}}== |
||
Tested with Gforth 0.7.3. |
Tested with Gforth 0.7.3. |
||
< |
<syntaxhighlight lang="forth">CREATE BUF 0 , \ single-character look-ahead buffer |
||
: PEEK BUF @ 0= IF KEY BUF ! THEN BUF @ ; |
: PEEK BUF @ 0= IF KEY BUF ! THEN BUF @ ; |
||
: GETC PEEK 0 BUF ! ; |
: GETC PEEK 0 BUF ! ; |
||
Line 4,445: | Line 4,445: | ||
: -EOI? TOKEN-TYPE End_of_input <> ; |
: -EOI? TOKEN-TYPE End_of_input <> ; |
||
: PARSE $NULL GETTOK BEGIN -EOI? WHILE STMT $SEQUENCE REPEAT ; |
: PARSE $NULL GETTOK BEGIN -EOI? WHILE STMT $SEQUENCE REPEAT ; |
||
PARSE .NODE</ |
PARSE .NODE</syntaxhighlight> |
||
{{out|case=Count AST}} |
{{out|case=Count AST}} |
||
Line 4,487: | Line 4,487: | ||
{{works with|gfortran|11.2.1}} |
{{works with|gfortran|11.2.1}} |
||
The following is Fortran 2008/2018 code with C preprocessing directives. If you call the program source ‘parse.F90’, with a capital ‘F’, then gfortran will know to run the C preprocessor. |
The following is Fortran 2008/2018 code with C preprocessing directives. If you call the program source ‘parse.F90’, with a capital ‘F’, then gfortran will know to run the C preprocessor. |
||
< |
<syntaxhighlight lang="fortran">!!! |
||
!!! An implementation of the Rosetta Code parser task: |
!!! An implementation of the Rosetta Code parser task: |
||
!!! https://rosettacode.org/wiki/Compiler/syntax_analyzer |
!!! https://rosettacode.org/wiki/Compiler/syntax_analyzer |
||
Line 6,033: | Line 6,033: | ||
end subroutine print_usage |
end subroutine print_usage |
||
end program parse</ |
end program parse</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 6,135: | Line 6,135: | ||
=={{header|Go}}== |
=={{header|Go}}== |
||
{{trans|C}} |
{{trans|C}} |
||
< |
<syntaxhighlight lang="go">package main |
||
import ( |
import ( |
||
Line 6,488: | Line 6,488: | ||
scanner = bufio.NewScanner(source) |
scanner = bufio.NewScanner(source) |
||
prtAst(parse()) |
prtAst(parse()) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 6,597: | Line 6,597: | ||
<syntaxhighlight lang="icon"># |
|||
<lang Icon># |
|||
# The Rosetta Code Tiny-Language Parser, in Icon. |
# The Rosetta Code Tiny-Language Parser, in Icon. |
||
# |
# |
||
Line 6,954: | Line 6,954: | ||
} |
} |
||
return s |
return s |
||
end</ |
end</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 7,057: | Line 7,057: | ||
Implementation: |
Implementation: |
||
< |
<syntaxhighlight lang="j">require'format/printf' |
||
tkref=: tokenize 'End_of_input*/%+--<<=>>===!=!&&||print=print(if{else}while;,putc)a""0' |
tkref=: tokenize 'End_of_input*/%+--<<=>>===!=!&&||print=print(if{else}while;,putc)a""0' |
||
Line 7,214: | Line 7,214: | ||
end. |
end. |
||
}} |
}} |
||
</syntaxhighlight> |
|||
</lang> |
|||
Some quirks worth noting: |
Some quirks worth noting: |
||
Line 7,230: | Line 7,230: | ||
Task example: |
Task example: |
||
<syntaxhighlight lang="j"> |
|||
<lang J> |
|||
primes=: {{)n |
primes=: {{)n |
||
/* |
/* |
||
Line 7,350: | Line 7,350: | ||
String "\n" |
String "\n" |
||
; |
; |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Java}}== |
=={{header|Java}}== |
||
Usage: java Parser infile [>outfile] |
Usage: java Parser infile [>outfile] |
||
{{trans|Python}} |
{{trans|Python}} |
||
< |
<syntaxhighlight lang="java"> |
||
import java.io.File; |
import java.io.File; |
||
import java.io.FileNotFoundException; |
import java.io.FileNotFoundException; |
||
Line 7,707: | Line 7,707: | ||
} |
} |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
Julia tends to discourage large numbers of global variables, so this direct port from the Python reference implementation moves the globals into a function wrapper. |
Julia tends to discourage large numbers of global variables, so this direct port from the Python reference implementation moves the globals into a function wrapper. |
||
{{trans|Python}} |
{{trans|Python}} |
||
< |
<syntaxhighlight lang="julia">struct ASTnode |
||
nodetype::Int |
nodetype::Int |
||
left::Union{Nothing, ASTnode} |
left::Union{Nothing, ASTnode} |
||
Line 7,964: | Line 7,964: | ||
# syntaxanalyzer(length(ARGS) > 1 ? ARGS[1] : stdin) # for use as in the Python code |
# syntaxanalyzer(length(ARGS) > 1 ? ARGS[1] : stdin) # for use as in the Python code |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|M2000 Interpreter}}== |
=={{header|M2000 Interpreter}}== |
||
Line 7,972: | Line 7,972: | ||
<syntaxhighlight lang="m2000 interpreter"> |
|||
<lang M2000 Interpreter> |
|||
Module syntax_analyzer(b$){ |
Module syntax_analyzer(b$){ |
||
enum tokens { |
enum tokens { |
||
Line 8,345: | Line 8,345: | ||
43 1 End_of_Input |
43 1 End_of_Input |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
Line 8,525: | Line 8,525: | ||
Using the third version of Nim lexer. |
Using the third version of Nim lexer. |
||
< |
<syntaxhighlight lang="nim">import ast_lexer |
||
type NodeKind* = enum |
type NodeKind* = enum |
||
Line 8,773: | Line 8,773: | ||
let code = if paramCount() < 1: stdin.readAll() else: paramStr(1).readFile() |
let code = if paramCount() < 1: stdin.readAll() else: paramStr(1).readFile() |
||
let tree = parse(code) |
let tree = parse(code) |
||
tree.printAst()</ |
tree.printAst()</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 8,882: | Line 8,882: | ||
< |
<syntaxhighlight lang="objecticon"># -*- ObjectIcon -*- |
||
# |
# |
||
# The Rosetta Code Tiny-Language Parser, in Object Icon. |
# The Rosetta Code Tiny-Language Parser, in Object Icon. |
||
Line 9,238: | Line 9,238: | ||
} |
} |
||
return s |
return s |
||
end</ |
end</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 9,342: | Line 9,342: | ||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
Tested on perl v5.26.1 |
Tested on perl v5.26.1 |
||
< |
<syntaxhighlight lang="perl">#!/usr/bin/perl |
||
use strict; # parse.pl - inputs lex, outputs flattened ast |
use strict; # parse.pl - inputs lex, outputs flattened ast |
||
Line 9,415: | Line 9,415: | ||
$_[0] <= 0 && /$h Op_or \n/gcx ? "Or\n$ast" . expr(1) : |
$_[0] <= 0 && /$h Op_or \n/gcx ? "Or\n$ast" . expr(1) : |
||
return $ast while 1; |
return $ast while 1; |
||
}</ |
}</syntaxhighlight> |
||
{{out|case=Count AST}} |
{{out|case=Count AST}} |
||
Line 9,449: | Line 9,449: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
Reusing lex.e (and core.e) from the [[Compiler/lexical_analyzer#Phix|Lexical Analyzer task]], and again written as a reusable module. |
Reusing lex.e (and core.e) from the [[Compiler/lexical_analyzer#Phix|Lexical Analyzer task]], and again written as a reusable module. |
||
<!--< |
<!--<syntaxhighlight lang="phix">(phixonline)--> |
||
<span style="color: #000080;font-style:italic;">-- |
<span style="color: #000080;font-style:italic;">-- |
||
-- demo\rosetta\Compiler\parse.e |
-- demo\rosetta\Compiler\parse.e |
||
Line 9,602: | Line 9,602: | ||
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
||
<!--</ |
<!--</syntaxhighlight>--> |
||
And a simple test driver for the specific task: |
And a simple test driver for the specific task: |
||
<!--< |
<!--<syntaxhighlight lang="phix">(phixonline)--> |
||
<span style="color: #000080;font-style:italic;">-- |
<span style="color: #000080;font-style:italic;">-- |
||
-- demo\rosetta\Compiler\parse.exw |
-- demo\rosetta\Compiler\parse.exw |
||
Line 9,663: | Line 9,663: | ||
--main({0,0,"primes.c"}) -- as Algol, C, Python (apart from spacing) |
--main({0,0,"primes.c"}) -- as Algol, C, Python (apart from spacing) |
||
--main({0,0,"count.c"}) -- as AWK ( "" )</span> |
--main({0,0,"count.c"}) -- as AWK ( "" )</span> |
||
<!--</ |
<!--</syntaxhighlight>--> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 9,672: | Line 9,672: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
Tested with Python 2.7 and 3.x |
Tested with Python 2.7 and 3.x |
||
< |
<syntaxhighlight lang="python">from __future__ import print_function |
||
import sys, shlex, operator |
import sys, shlex, operator |
||
Line 9,938: | Line 9,938: | ||
error(0, 0, "Can't open %s" % sys.argv[1]) |
error(0, 0, "Can't open %s" % sys.argv[1]) |
||
t = parse() |
t = parse() |
||
prt_ast(t)</ |
prt_ast(t)</syntaxhighlight> |
||
{{out|case=prime numbers AST}} |
{{out|case=prime numbers AST}} |
||
Line 10,058: | Line 10,058: | ||
< |
<syntaxhighlight lang="ratfor">###################################################################### |
||
# |
# |
||
# The Rosetta Code parser in Ratfor 77. |
# The Rosetta Code parser in Ratfor 77. |
||
Line 11,608: | Line 11,608: | ||
end |
end |
||
######################################################################</ |
######################################################################</syntaxhighlight> |
||
Line 11,723: | Line 11,723: | ||
The following code implements a configurable (from a symbol map provided as a parameter) Precedence Climbing parser for the output of the [http://rosettacode.org/wiki/Compiler/lexical_analyzer#Scala lexer]. The recursive descent language parser is closely based on the pseudo code given in the task description. |
The following code implements a configurable (from a symbol map provided as a parameter) Precedence Climbing parser for the output of the [http://rosettacode.org/wiki/Compiler/lexical_analyzer#Scala lexer]. The recursive descent language parser is closely based on the pseudo code given in the task description. |
||
< |
<syntaxhighlight lang="scala"> |
||
package xyz.hyperreal.rosettacodeCompiler |
package xyz.hyperreal.rosettacodeCompiler |
||
Line 11,933: | Line 11,933: | ||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Scheme}}== |
=={{header|Scheme}}== |
||
Line 11,939: | Line 11,939: | ||
Code implements a recursive descent parser based on the given grammar. Tested against all programs in [[Compiler/Sample programs]]. |
Code implements a recursive descent parser based on the given grammar. Tested against all programs in [[Compiler/Sample programs]]. |
||
< |
<syntaxhighlight lang="scheme"> |
||
(import (scheme base) |
(import (scheme base) |
||
(scheme process-context) |
(scheme process-context) |
||
Line 12,146: | Line 12,146: | ||
(display-ast (parse-file (cadr (command-line)))) |
(display-ast (parse-file (cadr (command-line)))) |
||
(display "Error: provide program filename\n")) |
(display "Error: provide program filename\n")) |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Wren}}== |
=={{header|Wren}}== |
||
Line 12,153: | Line 12,153: | ||
{{libheader|Wren-fmt}} |
{{libheader|Wren-fmt}} |
||
{{libheader|wren-ioutil}} |
{{libheader|wren-ioutil}} |
||
< |
<syntaxhighlight lang="ecmascript">import "/dynamic" for Enum, Struct, Tuple |
||
import "/fmt" for Fmt |
import "/fmt" for Fmt |
||
import "/ioutil" for FileUtil |
import "/ioutil" for FileUtil |
||
Line 12,469: | Line 12,469: | ||
lines = FileUtil.readLines("source.txt") |
lines = FileUtil.readLines("source.txt") |
||
lineCount = lines.count |
lineCount = lines.count |
||
prtAst.call(parse.call())</ |
prtAst.call(parse.call())</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 12,571: | Line 12,571: | ||
=={{header|Zig}}== |
=={{header|Zig}}== |
||
< |
<syntaxhighlight lang="zig"> |
||
const std = @import("std"); |
const std = @import("std"); |
||
Line 13,124: | Line 13,124: | ||
return result; |
return result; |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |