Compiler/syntax analyzer: Difference between revisions

← Older edit

Compiler/syntax analyzer (view source)

Revision as of 16:15, 20 November 2023

1,145 bytes added , 5 months ago

m

→‎{{header|Wren}}: Minor tidy

PureFox

9,476

edits

Revision as of 23:47, 22 April 2022 (view source) Chemoelectric (talk \| contribs) m (→‎{{header\|RATFOR}}) ← Older edit		Latest revision as of 16:15, 20 November 2023 (view source) PureFox (talk \| contribs) m (→‎{{header\|Wren}}: Minor tidy)
(8 intermediate revisions by 4 users not shown)
Line 21: [https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form Extended Backus-Naur Form (EBNF)]: <syntaxhighlight lang="ebnf"> ~~<lang EBNF>~~ stmt_list = {stmt} ; Line 47: \| '(' expr ')' \| ('+' \| '-' \| '!') primary ;</~~lang~~syntaxhighlight> The resulting AST should be formulated as a Binary Tree. Line 68: \|- \| style="vertical-align:top" \| <~~lang~~syntaxhighlight lang="c">count = 1; while (count < 10) { print("count is: ", count, "\n"); count = count + 1; }</~~lang~~syntaxhighlight> \| style="vertical-align:top" \| Line 280: [https://en.wikipedia.org/wiki/Recursive_descent_parser Recursive Descent] for statement parsing. The AST is also built: <~~lang~~syntaxhighlight lang="python">def expr(p) if tok is "(" x = paren_expr() Line 364: t = make_node(Sequence, t, stmt()) until tok is end-of-file return t</~~lang~~syntaxhighlight> ;Once the AST is built, it should be output in a [[Flatten_a_list\|flattened format.]] This can be as simple as the following: <~~lang~~syntaxhighlight lang="python">def prt_ast(t) if t == NULL print(";\n") Line 378: print("\n") prt_ast(t.left) prt_ast(t.right)</~~lang~~syntaxhighlight> ;If the AST is correctly built, loading it into a subsequent program should be as simple as: <~~lang~~syntaxhighlight lang="python">def load_ast() line = readline() # Each line has at least one token Line 402: left = load_ast() right = load_ast() return make_node(node_type, left, right)</~~lang~~syntaxhighlight> Finally, the AST can also be tested by running it against one of the AST Interpreter [[Compiler/AST_interpreter\|solutions]]. Line 415: \|- \| style="vertical-align:top" \| <~~lang~~syntaxhighlight lang="c">/* Simple prime number generator / Line 434: } } print("Total primes found: ", count, "\n");</~~lang~~syntaxhighlight> \| style="vertical-align:top" \| Line 654: =={{header\|ALGOL W}}== <syntaxhighlight lang ="algolw">~~begin % syntax analyser %~~ begin % syntax analyser % % parse tree nodes % record node( integer type Line 952 ⟶ 953: while begin if tkType = tString then begin stmtNode := opNode( nSequence~~, stmtNode, opNode( nPrts, operandNode( nString, tkIntegerValue ), null ) );~~ , stmtNode , opNode( nPrts, operandNode( nString, tkIntegerValue ), null ) ); readToken end Line 981 ⟶ 985: reference(node) listNode; listNode := null; while tkType not = terminator ~~and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement );~~ and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement ); listNode end parseStatementList ; % sets a node code and name % ~~nIdentifier := 1; ndName( nIdentifier ) := "Identifier"; nString := 2; ndName( nString ) := "String";~~ procedure setNode ( integer result nd; integer value ndCode; string(14) value name ) ; ~~nInteger := 3; ndName( nInteger ) := "Integer"; nSequence := 4; ndName( nSequence ) := "Sequence";~~ ~~nIf~~ begin nd := 5ndCode; ndName( ~~nIf~~ ndCode ) := ~~"If"; nPrtc := 6; ndName( nPrtc ) :=~~name ~~"Prtc"~~end; ~~nPrts := 7; ndName( nPrts ) := "Prts"; nPrti := 8; ndName( nPrti ) := "Prti";~~ setNode( nIdentifier, 1, "Identifier" ); setNode( nString, 2, "String" ); ~~nWhile := 9; ndName( nWhile ) := "While"; nAssign := 10; ndName( nAssign ) := "Assign";~~ ~~nNegate := 11; ndName~~setNode( ~~nNegate~~nInteger, ~~) :=~~3, "~~Negate~~Integer"; ); ~~nNot~~setNode( nSequence, 4, "Sequence" ~~:= 12~~); ~~ndName~~setNode( ~~nNot~~nIf, 5, "If" ) ~~:= "Not"~~; ~~nMultiply~~setNode( nPrtc, := ~~13;~~ ~~ndName(~~ ~~nMultiply~~6, "Prtc" ) := ~~"Multiply"~~); setNode( nPrts, ~~nDivide~~ :=7, "Prts" ~~14;~~ ~~ndName(~~ ~~nDivide~~ ) ~~:= "Divide"~~; ~~nMod := 15; ndName~~setNode( ~~nMod~~nPrti, ~~) :=~~8, "~~Mod~~Prti"; ); ~~nAdd~~setNode( nWhile, ~~:= 16; ndName( nAdd~~9, "While" ) ~~:= "Add"~~; ~~nSubtract~~setNode( nAssign, ~~:= 17; ndName( nSubtract~~10, "Assign" ) ~~:= "Subtract"~~; setNode( nNegate, ~~nLess~~ 11, "Negate" ~~:= 18~~); ~~ndName~~setNode( ~~nLess~~ nNot, 12, "Not" ) ~~:= "Less"~~; ~~nLessEqual~~setNode( nMultiply, 13, :="Multiply" ~~19;~~ ~~ndName(~~ ~~nLessEqual~~ ); setNode( nDivide, ) :=14, "~~LessEqual~~Divide" ; ~~nGreater := 20~~); ~~ndName~~setNode( ~~nGreater~~nMod, ~~) :=~~15, "~~Greater~~Mod" ); ~~nGreaterEqual~~setNode( nAdd, := ~~21;~~ ~~ndName(~~ ~~nGreaterEqual~~ ~~) :=~~16, "~~GreaterEqual~~Add"; ~~nEqual~~ := ~~22;~~ ~~ndName(~~ ~~nEqual~~ ); setNode( nSubtract, ) :=17, "~~Equal~~Subtract" ); ~~nNotEqual := 23; ndName~~setNode( ~~nNotEqual~~nLess, ) :=18, "~~NotEqual~~Less"; ~~nAnd~~ ~~:= 24~~); ~~ndName~~setNode( ~~nAnd~~ nLessEqual, 19, "LessEqual" ) ~~:= "And"~~; ~~nOr := 25; ndName~~setNode( ~~nOr~~ nGreater, 20, "Greater" ) ~~:= "Or"~~; setNode( nGreaterEqual, 21, "GreaterEqual" ); setNode( nEqual, 22, "Equal" ); setNode( nNotEqual, 23, "NotEqual" ); setNode( nAnd, 24, "And" ); setNode( nOr, 25, "Or" ); tOp_multiply := 1; tkName( tOp_multiply ) := "Op_multiply"; tkPrec( tOp_multiply ) := 5; tOp_divide := 2; tkName( tOp_divide ) := "Op_divide"; tkPrec( tOp_divide ) := 5; Line 1,033 ⟶ 1,040: tkNode( tOp_multiply ) := nMultiply; tkNode( tOp_divide ) := nDivide; tkNode( tOp_mod ) := nMod; tkNode( tOp_add ) := nAdd; tkNode( tOp_subtract ) := nSubtract; tkNode( tOp_less ) := nLess; tkNode( tOp_lessequal ) := nLessEqual; tkNode( tOp_greater ) := nGreater~~; tkNode( tOp_greaterequal ) := nGreaterEqual~~; tkNode( tOp_greaterequal ) := nGreaterEqual; tkNode( tOp_equal ) := nEqual; tkNode( tOp_notequal ) := nNotEqual; tkNode( tOp_not ) := nNot; tkNode( tOp_and ) := nAnd; tkNode( tOp_or ) := nOr; Line 1,041 ⟶ 1,049: readToken; writeNode( parseStatementList( tEnd_of_input ) ) end.~~</lang>~~ </syntaxhighlight> {{out}} Output from parsing the Prime Numbers example program. Line 1,144 ⟶ 1,153: =={{header\|ATS}}== <~~lang~~syntaxhighlight ~~ATS~~lang="ats">(******************************************************************) ( Usage: parse [INPUTFILE [OUTPUTFILE]] If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input Line 2,074 ⟶ 2,083: end (*******************************************************************)</~~lang~~syntaxhighlight> Line 2,177 ⟶ 2,186: =={{header\|AWK}}== Tested with gawk 4.1.1 and mawk 1.3.4. <syntaxhighlight lang="awk"> ~~<lang AWK>~~ function Token_assign(tk, attr, attr_array, n, i) { n=split(attr, attr_array) Line 2,473 ⟶ 2,482: prt_ast(t) } </syntaxhighlight> ~~</lang>~~ {{out\|case=count}} Line 2,514 ⟶ 2,523: =={{header\|C}}== Tested with gcc 4.81 and later, compiles warning free with -Wall -Wextra <~~lang~~syntaxhighlight Clang="c">#include <stdio.h> #include <stdlib.h> #include <string.h> Line 2,854 ⟶ 2,863: init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : ""); prt_ast(parse()); }</~~lang~~syntaxhighlight> {{out\|case=prime numbers AST}} Line 2,960 ⟶ 2,969: Code by Steve Williams. Tested with GnuCOBOL 2.2. <~~lang~~syntaxhighlight lang="cobol"> >>SOURCE FORMAT IS FREE identification division. > this code is dedicated to the public domain Line 3,565 ⟶ 3,574: . end program printast. end program parser.</~~lang~~syntaxhighlight> {{out\|case=Primes}} Line 3,673 ⟶ 3,682: <~~lang~~syntaxhighlight lang="lisp">#!/bin/sh #\|-- mode:lisp --\|# #\| Line 4,084 ⟶ 4,093: (uiop:quit 0))) ;;; vim: set ft=lisp lisp:</~~lang~~syntaxhighlight> {{out}} Line 4,274 ⟶ 4,283: =={{header\|Forth}}== Tested with Gforth 0.7.3. <~~lang~~syntaxhighlight ~~Forth~~lang="forth">CREATE BUF 0 , \ single-character look-ahead buffer : PEEK BUF @ 0= IF KEY BUF ! THEN BUF @ ; : GETC PEEK 0 BUF ! ; Line 4,445 ⟶ 4,454: : -EOI? TOKEN-TYPE End_of_input <> ; : PARSE $NULL GETTOK BEGIN -EOI? WHILE STMT $SEQUENCE REPEAT ; PARSE .NODE</~~lang~~syntaxhighlight> {{out\|case=Count AST}} Line 4,487 ⟶ 4,496: {{works with\|gfortran\|11.2.1}} The following is Fortran 2008/2018 code with C preprocessing directives. If you call the program source ‘parse.F90’, with a capital ‘F’, then gfortran will know to run the C preprocessor. <~~lang~~syntaxhighlight lang="fortran">!!! !!! An implementation of the Rosetta Code parser task: !!! https://rosettacode.org/wiki/Compiler/syntax_analyzer Line 6,033 ⟶ 6,042: end subroutine print_usage end program parse</~~lang~~syntaxhighlight> {{out}} Line 6,135 ⟶ 6,144: =={{header\|Go}}== {{trans\|C}} <~~lang~~syntaxhighlight lang="go">package main import ( Line 6,488 ⟶ 6,497: scanner = bufio.NewScanner(source) prtAst(parse()) }</~~lang~~syntaxhighlight> {{out}} Line 6,597 ⟶ 6,606: <syntaxhighlight lang="icon"># ~~<lang Icon>#~~ # The Rosetta Code Tiny-Language Parser, in Icon. # Line 6,954 ⟶ 6,963: } return s end</~~lang~~syntaxhighlight> {{out}} Line 7,057 ⟶ 7,066: Implementation: <~~lang~~syntaxhighlight Jlang="j">require'format/printf' tkref=: tokenize 'End_of_input/%+--<<=>>===!=!&&\|\|print=print(if{else}while;,putc)a""0' Line 7,214 ⟶ 7,223: end. }} </syntaxhighlight> ~~</lang>~~ Some quirks worth noting: Line 7,230 ⟶ 7,239: Task example: <syntaxhighlight lang="j"> ~~<lang J>~~ primes=: {{)n / Line 7,350 ⟶ 7,359: String "\n" ; </syntaxhighlight> ~~</lang>~~ =={{header\|Java}}== Usage: java Parser infile [>outfile] {{trans\|Python}} <~~lang~~syntaxhighlight lang="java"> import java.io.File; import java.io.FileNotFoundException; Line 7,707 ⟶ 7,716: } } </syntaxhighlight> ~~</lang>~~ =={{header\|Julia}}== Julia tends to discourage large numbers of global variables, so this direct port from the Python reference implementation moves the globals into a function wrapper. {{trans\|Python}} <~~lang~~syntaxhighlight lang="julia">struct ASTnode nodetype::Int left::Union{Nothing, ASTnode} Line 7,964 ⟶ 7,973: # syntaxanalyzer(length(ARGS) > 1 ? ARGS[1] : stdin) # for use as in the Python code </syntaxhighlight> ~~</lang>~~ =={{header\|M2000 Interpreter}}== Line 7,972 ⟶ 7,981: <syntaxhighlight lang="m2000 interpreter"> ~~<lang M2000 Interpreter>~~ Module syntax_analyzer(b$){ enum tokens { Line 8,345 ⟶ 8,354: 43 1 End_of_Input } </syntaxhighlight> ~~</lang>~~ {{out}} Line 8,525 ⟶ 8,534: Using the third version of Nim lexer. <~~lang~~syntaxhighlight ~~Nim~~lang="nim">import ast_lexer type NodeKind* = enum Line 8,773 ⟶ 8,782: let code = if paramCount() < 1: stdin.readAll() else: paramStr(1).readFile() let tree = parse(code) tree.printAst()</~~lang~~syntaxhighlight> {{out}} Line 8,882 ⟶ 8,891: <~~lang~~syntaxhighlight ~~ObjectIcon~~lang="objecticon"># -- ObjectIcon -- # # The Rosetta Code Tiny-Language Parser, in Object Icon. Line 9,238 ⟶ 9,247: } return s end</~~lang~~syntaxhighlight> {{out}} Line 9,342 ⟶ 9,351: =={{header\|Perl}}== Tested on perl v5.26.1 <~~lang~~syntaxhighlight ~~Perl~~lang="perl">#!/usr/bin/perl use strict; # parse.pl - inputs lex, outputs flattened ast Line 9,415 ⟶ 9,424: $_[0] <= 0 && /$h Op_or \n/gcx ? "Or\n$ast" . expr(1) : return $ast while 1; }</~~lang~~syntaxhighlight> {{out\|case=Count AST}} Line 9,449 ⟶ 9,458: =={{header\|Phix}}== Reusing lex.e (and core.e) from the [[Compiler/lexical_analyzer#Phix\|Lexical Analyzer task]], and again written as a reusable module. <!--<~~lang~~syntaxhighlight ~~Phix~~lang="phix">(phixonline)--> <span style="color: #000080;font-style:italic;">-- -- demo\rosetta\Compiler\parse.e Line 9,602 ⟶ 9,611: <span style="color: #008080;">return</span> <span style="color: #000000;">t</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span> <!--</~~lang~~syntaxhighlight>--> And a simple test driver for the specific task: <!--<~~lang~~syntaxhighlight ~~Phix~~lang="phix">(phixonline)--> <span style="color: #000080;font-style:italic;">-- -- demo\rosetta\Compiler\parse.exw Line 9,663 ⟶ 9,672: --main({0,0,"primes.c"}) -- as Algol, C, Python (apart from spacing) --main({0,0,"count.c"}) -- as AWK ( "" )</span> <!--</~~lang~~syntaxhighlight>--> {{out}} <pre> Line 9,672 ⟶ 9,681: =={{header\|Python}}== Tested with Python 2.7 and 3.x <~~lang~~syntaxhighlight ~~Python~~lang="python">from __future__ import print_function import sys, shlex, operator Line 9,938 ⟶ 9,947: error(0, 0, "Can't open %s" % sys.argv[1]) t = parse() prt_ast(t)</~~lang~~syntaxhighlight> {{out\|case=prime numbers AST}} Line 10,042 ⟶ 10,051: =={{header\|RATFOR}}== {{works with\|ratfor77\|1[https://sourceforge.0net/p/chemoelectric/ratfor77/ (public domain ~~ratfor77 in C)~~1.0]}} {{works with\|gfortran\|11.2.1}} {{works with\|f2c\|20100827}} FORTRAN 77 is ~~''not''~~ a non-recursive language, in the specific sense that it does not support recursive algorithms. ~~Local~~What ~~variables,~~is inmissing ~~standard-conforming~~is ~~programs,~~simple: there is no way to specify that a value should go onto a call stack. Local variables were all treated by compilers more or less as what C programmers would call "static". Subprogram parameters were all passed by reference, rather than by value as in C. ''However'', it is perfectly possible to implement a recursive language ''in'' FORTRAN 77 and do the programming in ''that''. Line 10,054 ⟶ 10,063: Printing the abstract syntax tree is done with a quite ordinary non-recursive tree traversal written directly in Ratfor. There is no paradox in the notion of doing recursive programming within a Ratfor program by the method described above. All the recursion is at a higher level of abstraction than the Ratfor programming itself. If you examine the Ratfor code ''as'' Ratfor code, there is not a single recursive call. <~~lang~~syntaxhighlight lang="ratfor">###################################################################### # # The Rosetta Code parser in Ratfor 77. Line 11,229 ⟶ 11,240: i = 1 while (code(i) /!= XLOC \|\| code(i + 1) /!= loc) i = i + 2 ipfind = i Line 11,341 ⟶ 11,352: { i = dtpop (dstack, idstck) if (i /!= 0) ip = ipfind (code, code(ip + 1)) else Line 11,606 ⟶ 11,617: end ######################################################################</~~lang~~syntaxhighlight> Line 11,721 ⟶ 11,732: The following code implements a configurable (from a symbol map provided as a parameter) Precedence Climbing parser for the output of the [http://rosettacode.org/wiki/Compiler/lexical_analyzer#Scala lexer]. The recursive descent language parser is closely based on the pseudo code given in the task description. <~~lang~~syntaxhighlight lang="scala"> package xyz.hyperreal.rosettacodeCompiler Line 11,931 ⟶ 11,942: } </syntaxhighlight> ~~</lang>~~ =={{header\|Scheme}}== Line 11,937 ⟶ 11,948: Code implements a recursive descent parser based on the given grammar. Tested against all programs in [[Compiler/Sample programs]]. <~~lang~~syntaxhighlight lang="scheme"> (import (scheme base) (scheme process-context) Line 12,144 ⟶ 12,155: (display-ast (parse-file (cadr (command-line)))) (display "Error: provide program filename\n")) </syntaxhighlight> ~~</lang>~~ =={{header\|Wren}}== Line 12,151 ⟶ 12,162: {{libheader\|Wren-fmt}} {{libheader\|wren-ioutil}} <~~lang~~syntaxhighlight ~~ecmascript~~lang="wren">import "./dynamic" for Enum, Struct, Tuple import "./fmt" for Fmt import "./ioutil" for FileUtil var tokens = [ Line 12,467 ⟶ 12,478: lines = FileUtil.readLines("source.txt") lineCount = lines.count prtAst.call(parse.call())</~~lang~~syntaxhighlight> {{out}} Line 12,569 ⟶ 12,580: =={{header\|Zig}}== <~~lang~~syntaxhighlight lang="zig"> const std = @import("std"); Line 13,122 ⟶ 13,133: return result; } </syntaxhighlight> ~~</lang>~~