Compiler/syntax analyzer: Difference between revisions

m
m (→‎{{header|Wren}}: Minor tidy)
 
(8 intermediate revisions by 4 users not shown)
Line 21:
[https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form Extended Backus-Naur Form (EBNF)]:
 
<syntaxhighlight lang="ebnf">
<lang EBNF>
stmt_list = {stmt} ;
 
Line 47:
| '(' expr ')'
| ('+' | '-' | '!') primary
;</langsyntaxhighlight>
 
The resulting AST should be formulated as a Binary Tree.
Line 68:
|-
| style="vertical-align:top" |
<langsyntaxhighlight lang="c">count = 1;
while (count < 10) {
print("count is: ", count, "\n");
count = count + 1;
}</langsyntaxhighlight>
 
| style="vertical-align:top" |
Line 280:
[https://en.wikipedia.org/wiki/Recursive_descent_parser Recursive Descent] for statement parsing. The AST is also built:
 
<langsyntaxhighlight lang="python">def expr(p)
if tok is "("
x = paren_expr()
Line 364:
t = make_node(Sequence, t, stmt())
until tok is end-of-file
return t</langsyntaxhighlight>
 
;Once the AST is built, it should be output in a [[Flatten_a_list|flattened format.]] This can be as simple as the following:
 
<langsyntaxhighlight lang="python">def prt_ast(t)
if t == NULL
print(";\n")
Line 378:
print("\n")
prt_ast(t.left)
prt_ast(t.right)</langsyntaxhighlight>
 
;If the AST is correctly built, loading it into a subsequent program should be as simple as:
 
<langsyntaxhighlight lang="python">def load_ast()
line = readline()
# Each line has at least one token
Line 402:
left = load_ast()
right = load_ast()
return make_node(node_type, left, right)</langsyntaxhighlight>
 
Finally, the AST can also be tested by running it against one of the AST Interpreter [[Compiler/AST_interpreter|solutions]].
Line 415:
|-
| style="vertical-align:top" |
<langsyntaxhighlight lang="c">/*
Simple prime number generator
*/
Line 434:
}
}
print("Total primes found: ", count, "\n");</langsyntaxhighlight>
 
| style="vertical-align:top" |
Line 654:
 
=={{header|ALGOL W}}==
<syntaxhighlight lang ="algolw">begin % syntax analyser %
begin % syntax analyser %
% parse tree nodes %
record node( integer type
Line 952 ⟶ 953:
while begin
if tkType = tString then begin
stmtNode := opNode( nSequence, stmtNode, opNode( nPrts, operandNode( nString, tkIntegerValue ), null ) );
, stmtNode
, opNode( nPrts, operandNode( nString, tkIntegerValue ), null )
);
readToken
end
Line 981 ⟶ 985:
reference(node) listNode;
listNode := null;
while tkType not = terminator and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement );
and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement );
listNode
end parseStatementList ;
 
% sets a node code and name %
nIdentifier := 1; ndName( nIdentifier ) := "Identifier"; nString := 2; ndName( nString ) := "String";
procedure setNode ( integer result nd; integer value ndCode; string(14) value name ) ;
nInteger := 3; ndName( nInteger ) := "Integer"; nSequence := 4; ndName( nSequence ) := "Sequence";
nIf begin nd := 5ndCode; ndName( nIf ndCode ) := "If"; nPrtc := 6; ndName( nPrtc ) :=name "Prtc"end;
 
nPrts := 7; ndName( nPrts ) := "Prts"; nPrti := 8; ndName( nPrti ) := "Prti";
setNode( nIdentifier, 1, "Identifier" ); setNode( nString, 2, "String" );
nWhile := 9; ndName( nWhile ) := "While"; nAssign := 10; ndName( nAssign ) := "Assign";
nNegate := 11; ndNamesetNode( nNegatenInteger, ) :=3, "NegateInteger"; ); nNotsetNode( nSequence, 4, "Sequence" := 12); ndNamesetNode( nNotnIf, 5, "If" ) := "Not";
nMultiplysetNode( nPrtc, := 13; ndName( nMultiply6, "Prtc" ) := "Multiply"); setNode( nPrts, nDivide :=7, "Prts" 14; ndName( nDivide ) := "Divide";
nMod := 15; ndNamesetNode( nModnPrti, ) :=8, "ModPrti"; ); nAddsetNode( nWhile, := 16; ndName( nAdd9, "While" ) := "Add";
nSubtractsetNode( nAssign, := 17; ndName( nSubtract10, "Assign" ) := "Subtract"; setNode( nNegate, nLess 11, "Negate" := 18); ndNamesetNode( nLess nNot, 12, "Not" ) := "Less";
nLessEqualsetNode( nMultiply, 13, :="Multiply" 19; ndName( nLessEqual ); setNode( nDivide, ) :=14, "LessEqualDivide" ; nGreater := 20); ndNamesetNode( nGreaternMod, ) :=15, "GreaterMod" );
nGreaterEqualsetNode( nAdd, := 21; ndName( nGreaterEqual ) :=16, "GreaterEqualAdd"; nEqual := 22; ndName( nEqual ); setNode( nSubtract, ) :=17, "EqualSubtract" );
nNotEqual := 23; ndNamesetNode( nNotEqualnLess, ) :=18, "NotEqualLess"; nAnd := 24); ndNamesetNode( nAnd nLessEqual, 19, "LessEqual" ) := "And";
nOr := 25; ndNamesetNode( nOr nGreater, 20, "Greater" ) := "Or";
setNode( nGreaterEqual, 21, "GreaterEqual" ); setNode( nEqual, 22, "Equal" );
setNode( nNotEqual, 23, "NotEqual" ); setNode( nAnd, 24, "And" ); setNode( nOr, 25, "Or" );
tOp_multiply := 1; tkName( tOp_multiply ) := "Op_multiply"; tkPrec( tOp_multiply ) := 5;
tOp_divide := 2; tkName( tOp_divide ) := "Op_divide"; tkPrec( tOp_divide ) := 5;
Line 1,033 ⟶ 1,040:
tkNode( tOp_multiply ) := nMultiply; tkNode( tOp_divide ) := nDivide; tkNode( tOp_mod ) := nMod;
tkNode( tOp_add ) := nAdd; tkNode( tOp_subtract ) := nSubtract; tkNode( tOp_less ) := nLess;
tkNode( tOp_lessequal ) := nLessEqual; tkNode( tOp_greater ) := nGreater; tkNode( tOp_greaterequal ) := nGreaterEqual;
tkNode( tOp_greaterequal ) := nGreaterEqual;
tkNode( tOp_equal ) := nEqual; tkNode( tOp_notequal ) := nNotEqual; tkNode( tOp_not ) := nNot;
tkNode( tOp_and ) := nAnd; tkNode( tOp_or ) := nOr;
Line 1,041 ⟶ 1,049:
readToken;
writeNode( parseStatementList( tEnd_of_input ) )
end.</lang>
</syntaxhighlight>
{{out}}
Output from parsing the Prime Numbers example program.
Line 1,144 ⟶ 1,153:
=={{header|ATS}}==
 
<langsyntaxhighlight ATSlang="ats">(********************************************************************)
(* Usage: parse [INPUTFILE [OUTPUTFILE]]
If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input
Line 2,074 ⟶ 2,083:
end
 
(********************************************************************)</langsyntaxhighlight>
 
 
Line 2,177 ⟶ 2,186:
=={{header|AWK}}==
Tested with gawk 4.1.1 and mawk 1.3.4.
<syntaxhighlight lang="awk">
<lang AWK>
function Token_assign(tk, attr, attr_array, n, i) {
n=split(attr, attr_array)
Line 2,473 ⟶ 2,482:
prt_ast(t)
}
</syntaxhighlight>
</lang>
 
{{out|case=count}}
Line 2,514 ⟶ 2,523:
=={{header|C}}==
Tested with gcc 4.81 and later, compiles warning free with -Wall -Wextra
<langsyntaxhighlight Clang="c">#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Line 2,854 ⟶ 2,863:
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : "");
prt_ast(parse());
}</langsyntaxhighlight>
 
{{out|case=prime numbers AST}}
Line 2,960 ⟶ 2,969:
Code by Steve Williams. Tested with GnuCOBOL 2.2.
 
<langsyntaxhighlight lang="cobol"> >>SOURCE FORMAT IS FREE
identification division.
*> this code is dedicated to the public domain
Line 3,565 ⟶ 3,574:
.
end program printast.
end program parser.</langsyntaxhighlight>
 
{{out|case=Primes}}
Line 3,673 ⟶ 3,682:
 
 
<langsyntaxhighlight lang="lisp">#!/bin/sh
#|-*- mode:lisp -*-|#
#|
Line 4,084 ⟶ 4,093:
(uiop:quit 0)))
 
;;; vim: set ft=lisp lisp:</langsyntaxhighlight>
 
{{out}}
Line 4,274 ⟶ 4,283:
=={{header|Forth}}==
Tested with Gforth 0.7.3.
<langsyntaxhighlight Forthlang="forth">CREATE BUF 0 , \ single-character look-ahead buffer
: PEEK BUF @ 0= IF KEY BUF ! THEN BUF @ ;
: GETC PEEK 0 BUF ! ;
Line 4,445 ⟶ 4,454:
: -EOI? TOKEN-TYPE End_of_input <> ;
: PARSE $NULL GETTOK BEGIN -EOI? WHILE STMT $SEQUENCE REPEAT ;
PARSE .NODE</langsyntaxhighlight>
 
{{out|case=Count AST}}
Line 4,487 ⟶ 4,496:
{{works with|gfortran|11.2.1}}
The following is Fortran 2008/2018 code with C preprocessing directives. If you call the program source ‘parse.F90’, with a capital ‘F’, then gfortran will know to run the C preprocessor.
<langsyntaxhighlight lang="fortran">!!!
!!! An implementation of the Rosetta Code parser task:
!!! https://rosettacode.org/wiki/Compiler/syntax_analyzer
Line 6,033 ⟶ 6,042:
end subroutine print_usage
end program parse</langsyntaxhighlight>
 
{{out}}
Line 6,135 ⟶ 6,144:
=={{header|Go}}==
{{trans|C}}
<langsyntaxhighlight lang="go">package main
 
import (
Line 6,488 ⟶ 6,497:
scanner = bufio.NewScanner(source)
prtAst(parse())
}</langsyntaxhighlight>
 
{{out}}
Line 6,597 ⟶ 6,606:
 
 
<syntaxhighlight lang="icon">#
<lang Icon>#
# The Rosetta Code Tiny-Language Parser, in Icon.
#
Line 6,954 ⟶ 6,963:
}
return s
end</langsyntaxhighlight>
 
{{out}}
Line 7,057 ⟶ 7,066:
Implementation:
 
<langsyntaxhighlight Jlang="j">require'format/printf'
tkref=: tokenize 'End_of_input*/%+--<<=>>===!=!&&||print=print(if{else}while;,putc)a""0'
Line 7,214 ⟶ 7,223:
end.
}}
</syntaxhighlight>
</lang>
 
Some quirks worth noting:
Line 7,230 ⟶ 7,239:
Task example:
 
<syntaxhighlight lang="j">
<lang J>
primes=: {{)n
/*
Line 7,350 ⟶ 7,359:
String "\n"
;
</syntaxhighlight>
</lang>
 
=={{header|Java}}==
Usage: java Parser infile [>outfile]
{{trans|Python}}
<langsyntaxhighlight lang="java">
import java.io.File;
import java.io.FileNotFoundException;
Line 7,707 ⟶ 7,716:
}
}
</syntaxhighlight>
</lang>
 
=={{header|Julia}}==
Julia tends to discourage large numbers of global variables, so this direct port from the Python reference implementation moves the globals into a function wrapper.
{{trans|Python}}
<langsyntaxhighlight lang="julia">struct ASTnode
nodetype::Int
left::Union{Nothing, ASTnode}
Line 7,964 ⟶ 7,973:
 
# syntaxanalyzer(length(ARGS) > 1 ? ARGS[1] : stdin) # for use as in the Python code
</syntaxhighlight>
</lang>
 
=={{header|M2000 Interpreter}}==
Line 7,972 ⟶ 7,981:
 
 
<syntaxhighlight lang="m2000 interpreter">
<lang M2000 Interpreter>
Module syntax_analyzer(b$){
enum tokens {
Line 8,345 ⟶ 8,354:
43 1 End_of_Input
}
</syntaxhighlight>
</lang>
 
{{out}}
Line 8,525 ⟶ 8,534:
Using the third version of Nim lexer.
 
<langsyntaxhighlight Nimlang="nim">import ast_lexer
 
type NodeKind* = enum
Line 8,773 ⟶ 8,782:
let code = if paramCount() < 1: stdin.readAll() else: paramStr(1).readFile()
let tree = parse(code)
tree.printAst()</langsyntaxhighlight>
 
{{out}}
Line 8,882 ⟶ 8,891:
 
 
<langsyntaxhighlight ObjectIconlang="objecticon"># -*- ObjectIcon -*-
#
# The Rosetta Code Tiny-Language Parser, in Object Icon.
Line 9,238 ⟶ 9,247:
}
return s
end</langsyntaxhighlight>
 
{{out}}
Line 9,342 ⟶ 9,351:
=={{header|Perl}}==
Tested on perl v5.26.1
<langsyntaxhighlight Perllang="perl">#!/usr/bin/perl
 
use strict; # parse.pl - inputs lex, outputs flattened ast
Line 9,415 ⟶ 9,424:
$_[0] <= 0 && /$h Op_or \n/gcx ? "Or\n$ast" . expr(1) :
return $ast while 1;
}</langsyntaxhighlight>
 
{{out|case=Count AST}}
Line 9,449 ⟶ 9,458:
=={{header|Phix}}==
Reusing lex.e (and core.e) from the [[Compiler/lexical_analyzer#Phix|Lexical Analyzer task]], and again written as a reusable module.
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\Compiler\parse.e
Line 9,602 ⟶ 9,611:
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<!--</langsyntaxhighlight>-->
And a simple test driver for the specific task:
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\Compiler\parse.exw
Line 9,663 ⟶ 9,672:
--main({0,0,"primes.c"}) -- as Algol, C, Python (apart from spacing)
--main({0,0,"count.c"}) -- as AWK ( "" )</span>
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 9,672 ⟶ 9,681:
=={{header|Python}}==
Tested with Python 2.7 and 3.x
<langsyntaxhighlight Pythonlang="python">from __future__ import print_function
import sys, shlex, operator
 
Line 9,938 ⟶ 9,947:
error(0, 0, "Can't open %s" % sys.argv[1])
t = parse()
prt_ast(t)</langsyntaxhighlight>
 
{{out|case=prime numbers AST}}
Line 10,042 ⟶ 10,051:
 
=={{header|RATFOR}}==
{{works with|ratfor77|1[https://sourceforge.0net/p/chemoelectric/ratfor77/ (public domain ratfor77 in C)1.0]}}
{{works with|gfortran|11.2.1}}
{{works with|f2c|20100827}}
 
 
FORTRAN 77 is ''not'' a non-recursive language, in the specific sense that it does not support recursive algorithms. LocalWhat variables,is inmissing standard-conformingis programs,simple: there is no way to specify that a value should go onto a call stack. Local variables were all treated by compilers more or less as what C programmers would call "static". Subprogram parameters were all passed by reference, rather than by value as in C.
 
''However'', it is perfectly possible to implement a recursive language ''in'' FORTRAN 77 and do the programming in ''that''.
Line 10,054 ⟶ 10,063:
 
Printing the abstract syntax tree is done with a quite ordinary non-recursive tree traversal written directly in Ratfor.
 
There is no paradox in the notion of doing recursive programming within a Ratfor program by the method described above. All the recursion is at a higher level of abstraction than the Ratfor programming itself. If you examine the Ratfor code ''as'' Ratfor code, there is not a single recursive call.
 
 
<langsyntaxhighlight lang="ratfor">######################################################################
#
# The Rosetta Code parser in Ratfor 77.
Line 11,229 ⟶ 11,240:
 
i = 1
while (code(i) /!= XLOC || code(i + 1) /!= loc)
i = i + 2
ipfind = i
Line 11,341 ⟶ 11,352:
{
i = dtpop (dstack, idstck)
if (i /!= 0)
ip = ipfind (code, code(ip + 1))
else
Line 11,606 ⟶ 11,617:
end
 
######################################################################</langsyntaxhighlight>
 
 
Line 11,721 ⟶ 11,732:
The following code implements a configurable (from a symbol map provided as a parameter) Precedence Climbing parser for the output of the [http://rosettacode.org/wiki/Compiler/lexical_analyzer#Scala lexer]. The recursive descent language parser is closely based on the pseudo code given in the task description.
 
<langsyntaxhighlight lang="scala">
package xyz.hyperreal.rosettacodeCompiler
 
Line 11,931 ⟶ 11,942:
 
}
</syntaxhighlight>
</lang>
 
=={{header|Scheme}}==
Line 11,937 ⟶ 11,948:
Code implements a recursive descent parser based on the given grammar. Tested against all programs in [[Compiler/Sample programs]].
 
<langsyntaxhighlight lang="scheme">
(import (scheme base)
(scheme process-context)
Line 12,144 ⟶ 12,155:
(display-ast (parse-file (cadr (command-line))))
(display "Error: provide program filename\n"))
</syntaxhighlight>
</lang>
 
=={{header|Wren}}==
Line 12,151 ⟶ 12,162:
{{libheader|Wren-fmt}}
{{libheader|wren-ioutil}}
<langsyntaxhighlight ecmascriptlang="wren">import "./dynamic" for Enum, Struct, Tuple
import "./fmt" for Fmt
import "./ioutil" for FileUtil
 
var tokens = [
Line 12,467 ⟶ 12,478:
lines = FileUtil.readLines("source.txt")
lineCount = lines.count
prtAst.call(parse.call())</langsyntaxhighlight>
 
{{out}}
Line 12,569 ⟶ 12,580:
 
=={{header|Zig}}==
<langsyntaxhighlight lang="zig">
const std = @import("std");
 
Line 13,122 ⟶ 13,133:
return result;
}
</syntaxhighlight>
</lang>
9,476

edits