Compiler/syntax analyzer: Difference between revisions

m
(J)
m (→‎{{header|Wren}}: Minor tidy)
 
(29 intermediate revisions by 5 users not shown)
Line 21:
[https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form Extended Backus-Naur Form (EBNF)]:
 
<syntaxhighlight lang="ebnf">
<lang EBNF>
stmt_list = {stmt} ;
 
Line 47:
| '(' expr ')'
| ('+' | '-' | '!') primary
;</langsyntaxhighlight>
 
The resulting AST should be formulated as a Binary Tree.
Line 68:
|-
| style="vertical-align:top" |
<langsyntaxhighlight lang="c">count = 1;
while (count < 10) {
print("count is: ", count, "\n");
count = count + 1;
}</langsyntaxhighlight>
 
| style="vertical-align:top" |
Line 280:
[https://en.wikipedia.org/wiki/Recursive_descent_parser Recursive Descent] for statement parsing. The AST is also built:
 
<langsyntaxhighlight lang="python">def expr(p)
if tok is "("
x = paren_expr()
Line 364:
t = make_node(Sequence, t, stmt())
until tok is end-of-file
return t</langsyntaxhighlight>
 
;Once the AST is built, it should be output in a [[Flatten_a_list|flattened format.]] This can be as simple as the following:
 
<langsyntaxhighlight lang="python">def prt_ast(t)
if t == NULL
print(";\n")
Line 378:
print("\n")
prt_ast(t.left)
prt_ast(t.right)</langsyntaxhighlight>
 
;If the AST is correctly built, loading it into a subsequent program should be as simple as:
 
<langsyntaxhighlight lang="python">def load_ast()
line = readline()
# Each line has at least one token
Line 402:
left = load_ast()
right = load_ast()
return make_node(node_type, left, right)</langsyntaxhighlight>
 
Finally, the AST can also be tested by running it against one of the AST Interpreter [[Compiler/AST_interpreter|solutions]].
Line 415:
|-
| style="vertical-align:top" |
<langsyntaxhighlight lang="c">/*
Simple prime number generator
*/
Line 434:
}
}
print("Total primes found: ", count, "\n");</langsyntaxhighlight>
 
| style="vertical-align:top" |
Line 654:
 
=={{header|ALGOL W}}==
<syntaxhighlight lang ="algolw">begin % syntax analyser %
begin % syntax analyser %
% parse tree nodes %
record node( integer type
Line 952 ⟶ 953:
while begin
if tkType = tString then begin
stmtNode := opNode( nSequence, stmtNode, opNode( nPrts, operandNode( nString, tkIntegerValue ), null ) );
, stmtNode
, opNode( nPrts, operandNode( nString, tkIntegerValue ), null )
);
readToken
end
Line 981 ⟶ 985:
reference(node) listNode;
listNode := null;
while tkType not = terminator and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement );
and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement );
listNode
end parseStatementList ;
 
% sets a node code and name %
nIdentifier := 1; ndName( nIdentifier ) := "Identifier"; nString := 2; ndName( nString ) := "String";
procedure setNode ( integer result nd; integer value ndCode; string(14) value name ) ;
nInteger := 3; ndName( nInteger ) := "Integer"; nSequence := 4; ndName( nSequence ) := "Sequence";
nIf begin nd := 5ndCode; ndName( nIf ndCode ) := "If"; nPrtc := 6; ndName( nPrtc ) :=name "Prtc"end;
 
nPrts := 7; ndName( nPrts ) := "Prts"; nPrti := 8; ndName( nPrti ) := "Prti";
setNode( nIdentifier, 1, "Identifier" ); setNode( nString, 2, "String" );
nWhile := 9; ndName( nWhile ) := "While"; nAssign := 10; ndName( nAssign ) := "Assign";
nNegate := 11; ndNamesetNode( nNegatenInteger, ) :=3, "NegateInteger"; ); nNotsetNode( nSequence, 4, "Sequence" := 12); ndNamesetNode( nNotnIf, 5, "If" ) := "Not";
nMultiplysetNode( nPrtc, := 13; ndName( nMultiply6, "Prtc" ) := "Multiply"); setNode( nPrts, nDivide :=7, "Prts" 14; ndName( nDivide ) := "Divide";
nMod := 15; ndNamesetNode( nModnPrti, ) :=8, "ModPrti"; ); nAddsetNode( nWhile, := 16; ndName( nAdd9, "While" ) := "Add";
nSubtractsetNode( nAssign, := 17; ndName( nSubtract10, "Assign" ) := "Subtract"; setNode( nNegate, nLess 11, "Negate" := 18); ndNamesetNode( nLess nNot, 12, "Not" ) := "Less";
nLessEqualsetNode( nMultiply, 13, :="Multiply" 19; ndName( nLessEqual ); setNode( nDivide, ) :=14, "LessEqualDivide" ; nGreater := 20); ndNamesetNode( nGreaternMod, ) :=15, "GreaterMod" );
nGreaterEqualsetNode( nAdd, := 21; ndName( nGreaterEqual ) :=16, "GreaterEqualAdd"; nEqual := 22; ndName( nEqual ); setNode( nSubtract, ) :=17, "EqualSubtract" );
nNotEqual := 23; ndNamesetNode( nNotEqualnLess, ) :=18, "NotEqualLess"; nAnd := 24); ndNamesetNode( nAnd nLessEqual, 19, "LessEqual" ) := "And";
nOr := 25; ndNamesetNode( nOr nGreater, 20, "Greater" ) := "Or";
setNode( nGreaterEqual, 21, "GreaterEqual" ); setNode( nEqual, 22, "Equal" );
setNode( nNotEqual, 23, "NotEqual" ); setNode( nAnd, 24, "And" ); setNode( nOr, 25, "Or" );
tOp_multiply := 1; tkName( tOp_multiply ) := "Op_multiply"; tkPrec( tOp_multiply ) := 5;
tOp_divide := 2; tkName( tOp_divide ) := "Op_divide"; tkPrec( tOp_divide ) := 5;
Line 1,033 ⟶ 1,040:
tkNode( tOp_multiply ) := nMultiply; tkNode( tOp_divide ) := nDivide; tkNode( tOp_mod ) := nMod;
tkNode( tOp_add ) := nAdd; tkNode( tOp_subtract ) := nSubtract; tkNode( tOp_less ) := nLess;
tkNode( tOp_lessequal ) := nLessEqual; tkNode( tOp_greater ) := nGreater; tkNode( tOp_greaterequal ) := nGreaterEqual;
tkNode( tOp_greaterequal ) := nGreaterEqual;
tkNode( tOp_equal ) := nEqual; tkNode( tOp_notequal ) := nNotEqual; tkNode( tOp_not ) := nNot;
tkNode( tOp_and ) := nAnd; tkNode( tOp_or ) := nOr;
Line 1,041 ⟶ 1,049:
readToken;
writeNode( parseStatementList( tEnd_of_input ) )
end.</lang>
</syntaxhighlight>
{{out}}
Output from parsing the Prime Numbers example program.
Line 1,141 ⟶ 1,150:
;
</pre>
 
=={{header|ATS}}==
 
<syntaxhighlight lang="ats">(********************************************************************)
(* Usage: parse [INPUTFILE [OUTPUTFILE]]
If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input
or standard output is used, respectively. *)
 
#include "share/atspre_staload.hats"
staload UN = "prelude/SATS/unsafe.sats"
 
#define NIL list_nil ()
#define :: list_cons
 
%{^
/* alloca(3) is needed for ATS exceptions. */
#include <alloca.h>
%}
 
(********************************************************************)
 
#define NUM_TOKENS 31
 
#define TOKEN_ELSE 0
#define TOKEN_IF 1
#define TOKEN_PRINT 2
#define TOKEN_PUTC 3
#define TOKEN_WHILE 4
#define TOKEN_MULTIPLY 5
#define TOKEN_DIVIDE 6
#define TOKEN_MOD 7
#define TOKEN_ADD 8
#define TOKEN_SUBTRACT 9
#define TOKEN_NEGATE 10
#define TOKEN_LESS 11
#define TOKEN_LESSEQUAL 12
#define TOKEN_GREATER 13
#define TOKEN_GREATEREQUAL 14
#define TOKEN_EQUAL 15
#define TOKEN_NOTEQUAL 16
#define TOKEN_NOT 17
#define TOKEN_ASSIGN 18
#define TOKEN_AND 19
#define TOKEN_OR 20
#define TOKEN_LEFTPAREN 21
#define TOKEN_RIGHTPAREN 22
#define TOKEN_LEFTBRACE 23
#define TOKEN_RIGHTBRACE 24
#define TOKEN_SEMICOLON 25
#define TOKEN_COMMA 26
#define TOKEN_IDENTIFIER 27
#define TOKEN_INTEGER 28
#define TOKEN_STRING 29
#define TOKEN_END_OF_INPUT 30
 
typedef token_t =
[i : int | TOKEN_ELSE <= i; i <= TOKEN_END_OF_INPUT]
int i
typedef tokentuple_t = (token_t, String, ullint, ullint)
 
fn
token_text (tok : token_t) : String =
case+ tok of
| TOKEN_ELSE => "else"
| TOKEN_IF => "if"
| TOKEN_PRINT => "print"
| TOKEN_PUTC => "putc"
| TOKEN_WHILE => "while"
| TOKEN_MULTIPLY => "*"
| TOKEN_DIVIDE => "/"
| TOKEN_MOD => "%"
| TOKEN_ADD => "+"
| TOKEN_SUBTRACT => "-"
| TOKEN_NEGATE => "-"
| TOKEN_LESS => "<"
| TOKEN_LESSEQUAL => "<="
| TOKEN_GREATER => ">"
| TOKEN_GREATEREQUAL => ">="
| TOKEN_EQUAL => "=="
| TOKEN_NOTEQUAL => "!="
| TOKEN_NOT => "!"
| TOKEN_ASSIGN => "="
| TOKEN_AND => "&&"
| TOKEN_OR => "||"
| TOKEN_LEFTPAREN => "("
| TOKEN_RIGHTPAREN => ")"
| TOKEN_LEFTBRACE => "{"
| TOKEN_RIGHTBRACE => "}"
| TOKEN_SEMICOLON => ";"
| TOKEN_COMMA => ","
| TOKEN_IDENTIFIER => "Ident"
| TOKEN_INTEGER => "Integer literal"
| TOKEN_STRING => "String literal"
| TOKEN_END_OF_INPUT => "EOI"
 
(********************************************************************)
(* A perfect hash for the lexical token names.
 
This hash was generated by GNU gperf and then translated to
reasonable ATS by hand. Note, though, that one could have embedded
the generated C code directly and used it. *)
 
#define MIN_WORD_LENGTH 5
#define MAX_WORD_LENGTH 15
#define MIN_HASH_VALUE 5
#define MAX_HASH_VALUE 64
#define HASH_TABLE_SIZE 65
 
local
extern castfn u : {n : nat | n < 256} int n -<> uint8 n
in
vtypedef asso_values_vt = @[[n : nat | n < 256] uint8 n][256]
 
var asso_values =
@[[n : nat | n < 256] uint8 n][256]
(u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 10, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 0, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 0, u 65, u 25,
u 5, u 5, u 0, u 15, u 65, u 0, u 65, u 65, u 10, u 65,
u 30, u 0, u 65, u 5, u 10, u 10, u 0, u 15, u 65, u 65,
u 65, u 5, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
u 65, u 65, u 65, u 65, u 65, u 65)
end
 
fn
get_asso_value {i : nat | i < 256}
(i : uint i) :<>
[n : nat | n < 256] uint n =
let
extern castfn u8ui : {n : nat} uint8 n -<> uint n
extern castfn mk_asso_values :<>
{p : addr} ptr p -<> (asso_values_vt @ p | ptr p)
 
val asso_values_tup = mk_asso_values (addr@ asso_values)
macdef asso_values = !(asso_values_tup.1)
val retval = asso_values[i]
val _ = $UN.castvwtp0{void} asso_values_tup
in
u8ui retval
end
 
fn
hash {n : int | MIN_WORD_LENGTH <= n; n <= MAX_WORD_LENGTH}
(str : string n,
len : size_t n) :<>
[key : nat] uint key =
let
extern castfn uc2ui : {n : nat} uchar n -<> uint n
 
val c1 = uc2ui (c2uc str[4])
val c2 = uc2ui (c2uc str[pred len])
in
sz2u len + get_asso_value c1 + get_asso_value c2
end
 
typedef wordlist_vt = @[(String, token_t)][HASH_TABLE_SIZE]
 
var wordlist =
@[(String, token_t)][HASH_TABLE_SIZE]
(("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
("Comma", 26),
("Op_not", 17),
("", 0), ("", 0), ("", 0),
("Keyword_if", 1),
("Op_mod", 7),
("End_of_input", 30),
("Keyword_print", 2),
("Op_divide", 6),
("RightBrace", 24),
("Op_add", 8),
("Keyword_else", 0),
("Keyword_while", 4),
("Op_negate", 10),
("Identifier", 27),
("Op_notequal", 16),
("Op_less", 11),
("Op_equal", 15),
("LeftBrace", 23),
("Op_or", 20),
("Op_subtract", 9),
("Op_lessequal", 12),
("", 0), ("", 0),
("Op_greater", 13),
("Op_multiply", 5 ),
("Integer", 28),
("", 0), ("", 0),
("Op_greaterequal", 14),
("", 0),
("Keyword_putc", 3),
("", 0),
("LeftParen", 21),
("RightParen", 22),
("Op_and", 19),
("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
("Op_assign", 18),
("", 0),
("String", 29),
("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
("Semicolon", 25))
 
fn
get_wordlist_entry
{n : nat | n <= MAX_HASH_VALUE}
(key : uint n) :<> (String, token_t) =
let
extern castfn mk_wordlist_tup :<>
{p : addr} ptr p -<> (wordlist_vt @ p | ptr p)
 
val wordlist_tup = mk_wordlist_tup (addr@ wordlist)
macdef wordlist = !(wordlist_tup.1)
val retval = wordlist[key]
val _ = $UN.castvwtp0{void} wordlist_tup
in
retval
end
 
fn
string2token_t_opt
{n : int}
(str : string n) :<>
Option token_t =
let
val len = string_length str
in
if len < i2sz MIN_WORD_LENGTH then
None ()
else if i2sz MAX_WORD_LENGTH < len then
None ()
else
let
val key = hash (str, len)
in
if i2u MAX_HASH_VALUE < key then
None ()
else
let
val (s, tok) = get_wordlist_entry (key)
in
if str <> s then
None ()
else
Some tok
end
end
end
 
(********************************************************************)
 
exception bad_lex_integer of (String)
exception bad_lex_token_name of (String)
exception bad_string_literal of (String)
 
extern fun {}
skip_something$pred : char -<> bool
fn {}
skip_something {n : nat}
{i : nat | i <= n}
(s : string n,
n : size_t n,
i : size_t i) :<>
[j : nat | i <= j; j <= n]
size_t j =
let
fun
loop {k : nat | i <= k; k <= n} .<n - k>.
(k : size_t k) :<>
[j : nat | i <= j; j <= n]
size_t j =
if k = n then
k
else if ~(skip_something$pred<> s[k]) then
k
else
loop (succ k)
in
loop i
end
 
fn
skip_space {n : nat}
{i : nat | i <= n}
(s : string n,
n : size_t n,
i : size_t i) :<>
[j : nat | i <= j; j <= n]
size_t j =
let
implement skip_something$pred<> (c) = isspace c
in
skip_something (s, n, i)
end
 
fn
skip_nonspace {n : nat}
{i : nat | i <= n}
(s : string n,
n : size_t n,
i : size_t i) :<>
[j : nat | i <= j; j <= n]
size_t j =
let
implement skip_something$pred<> (c) = ~isspace c
in
skip_something (s, n, i)
end
 
fn
skip_nonquote {n : nat}
{i : nat | i <= n}
(s : string n,
n : size_t n,
i : size_t i) :<>
[j : nat | i <= j; j <= n]
size_t j =
let
implement skip_something$pred<> (c) = c <> '"'
in
skip_something (s, n, i)
end
 
fn
skip_string_literal
{n : nat}
{i : nat | i <= n}
(s : string n,
n : size_t n,
i : size_t i) :<>
[j : nat | i <= j; j <= n]
size_t j =
if i = n then
i
else if s[i] <> '"' then
i
else
let
val j = skip_nonquote (s, n, succ i)
in
if j = n then
i
else
succ j
end
 
fn
get_substr {n, i, j : nat | i <= j; j <= n}
(s : string n,
i : size_t i,
j : size_t j) :
[m : int | m == j - i] string m =
let
val s = string_make_substring (s, i, j - i)
in
strnptr2string s
end
 
fn
string2ullint
{n : nat}
(s : string n) : ullint =
let
val n = string_length s
in
if n = i2sz 0 then
$raise bad_lex_integer ("")
else
let
extern castfn u2ull : uint -<> ullint
 
fun
evaluate {k : nat | k <= n} .<n - k>.
(k : size_t k,
v : ullint) : ullint =
if k = n then
v
else if ~isdigit s[k] then
$raise bad_lex_integer (s)
else
let
val d = char2ui s[k] - char2ui '0'
in
evaluate (succ k, (10ULL * v) + u2ull d)
end
in
evaluate (i2sz 0, 0ULL)
end
end
 
fn
string2token {n : int}
(str : string n) : token_t =
case+ string2token_t_opt str of
| None () => $raise bad_lex_token_name (str)
| Some tok => tok
 
fn
read_lex_file (inpf : FILEref) : List0 tokentuple_t =
(* Convert the output of "lex" to a list of tokens. *)
(* This routine could stand to do more validation of the input. *)
let
fun
loop (lst : List0 tokentuple_t) : List0 tokentuple_t =
if fileref_is_eof inpf then
lst
else
let
val s = strptr2string (fileref_get_line_string inpf)
val n = string_length s
prval _ = lemma_g1uint_param n
 
val i0_line_no = skip_space (s, n, i2sz 0)
in
if i0_line_no = n then
(* Skip any blank lines, including end of file. *)
loop lst
else
let
val i1_line_no = skip_nonspace (s, n, i0_line_no)
val s_line_no = get_substr (s, i0_line_no, i1_line_no)
val line_no = string2ullint s_line_no
 
val i0_column_no = skip_space (s, n, i1_line_no)
val i1_column_no = skip_nonspace (s, n, i0_column_no)
val s_column_no = get_substr (s, i0_column_no,
i1_column_no)
val column_no = string2ullint s_column_no
 
val i0_tokname = skip_space (s, n, i1_column_no)
val i1_tokname = skip_nonspace (s, n, i0_tokname)
val tokname = get_substr (s, i0_tokname, i1_tokname)
val tok = string2token tokname
in
case+ tok of
| TOKEN_INTEGER =>
let
val i0 = skip_space (s, n, i1_tokname)
val i1 = skip_nonspace (s, n, i0)
val arg = get_substr (s, i0, i1)
val toktup = (tok, arg, line_no, column_no)
in
loop (toktup :: lst)
end
| TOKEN_IDENTIFIER =>
let
val i0 = skip_space (s, n, i1_tokname)
val i1 = skip_nonspace (s, n, i0)
val arg = get_substr (s, i0, i1)
val toktup = (tok, arg, line_no, column_no)
in
loop (toktup :: lst)
end
| TOKEN_STRING =>
let
val i0 = skip_space (s, n, i1_tokname)
val i1 = skip_string_literal (s, n, i0)
val arg = get_substr (s, i0, i1)
val toktup = (tok, arg, line_no, column_no)
in
loop (toktup :: lst)
end
| _ =>
let
val toktup = (tok, "", line_no, column_no)
in
loop (toktup :: lst)
end
end
end
in
list_vt2t (list_reverse (loop NIL))
end
 
(********************************************************************)
 
exception truncated_lexical of ()
exception unexpected_token of (tokentuple_t, token_t)
exception unexpected_primary of (tokentuple_t)
exception unterminated_statement_block of (ullint, ullint)
exception expected_a_statement of (tokentuple_t)
 
datatype node_t =
| node_t_nil of ()
| node_t_leaf of (String, String)
| node_t_cons of (String, node_t, node_t)
 
fn
right_assoc (tok : token_t) : bool =
(* None of the currently supported operators is right
associative. *)
false
 
fn
binary_op (tok : token_t) : bool =
case+ tok of
| TOKEN_ADD => true
| TOKEN_SUBTRACT => true
| TOKEN_MULTIPLY => true
| TOKEN_DIVIDE => true
| TOKEN_MOD => true
| TOKEN_LESS => true
| TOKEN_LESSEQUAL => true
| TOKEN_GREATER => true
| TOKEN_GREATEREQUAL => true
| TOKEN_EQUAL => true
| TOKEN_NOTEQUAL => true
| TOKEN_AND => true
| TOKEN_OR => true
| _ => false
 
fn
precedence (tok : token_t) : int =
case+ tok of
| TOKEN_MULTIPLY => 13
| TOKEN_DIVIDE => 13
| TOKEN_MOD => 13
| TOKEN_ADD => 12
| TOKEN_SUBTRACT => 12
| TOKEN_NEGATE => 14
| TOKEN_NOT => 14
| TOKEN_LESS => 10
| TOKEN_LESSEQUAL => 10
| TOKEN_GREATER => 10
| TOKEN_GREATEREQUAL => 10
| TOKEN_EQUAL => 9
| TOKEN_NOTEQUAL => 9
| TOKEN_AND => 5
| TOKEN_OR => 4
| _ => ~1
 
fn
opname (tok : token_t) : String =
case- tok of
| TOKEN_MULTIPLY => "Multiply"
| TOKEN_DIVIDE => "Divide"
| TOKEN_MOD => "Mod"
| TOKEN_ADD => "Add"
| TOKEN_SUBTRACT => "Subtract"
| TOKEN_NEGATE => "Negate"
| TOKEN_NOT => "Not"
| TOKEN_LESS => "Less"
| TOKEN_LESSEQUAL => "LessEqual"
| TOKEN_GREATER => "Greater"
| TOKEN_GREATEREQUAL => "GreaterEqual"
| TOKEN_EQUAL => "Equal"
| TOKEN_NOTEQUAL => "NotEqual"
| TOKEN_AND => "And"
| TOKEN_OR => "Or"
 
fn
parse (lex : List0 tokentuple_t) : node_t =
let
typedef toktups_t (n : int) = list (tokentuple_t, n)
typedef toktups_t = [n : nat] toktups_t n
 
fn
expect (expected : token_t,
lex : toktups_t) : toktups_t =
case+ lex of
| NIL => $raise truncated_lexical ()
| toktup :: tail =>
if toktup.0 = expected then
tail
else
$raise unexpected_token (toktup, expected)
 
fn
peek {n : int} (lex : toktups_t n) : [1 <= n] token_t =
case+ lex of
| NIL => $raise truncated_lexical ()
| (tok, _, _, _) :: _ => tok
 
fun
stmt (lex : toktups_t) : (node_t, toktups_t) =
case+ lex of
| NIL => $raise truncated_lexical ()
| (TOKEN_IF, _, _, _) :: lex =>
let
val (e, lex) = paren_expr lex
val (s, lex) = stmt lex
in
case+ lex of
| (TOKEN_ELSE, _, _, _) :: lex =>
let
val (t, lex) = stmt lex
in
(node_t_cons ("If", e, node_t_cons ("If", s, t)), lex)
end
| _ =>
let
(* There is no 'else' clause. *)
val t = node_t_nil ()
in
(node_t_cons ("If", e, node_t_cons ("If", s, t)), lex)
end
end
| (TOKEN_PUTC, _, _, _) :: lex =>
let
val (subtree, lex) = paren_expr lex
val subtree = node_t_cons ("Prtc", subtree, node_t_nil ())
val lex = expect (TOKEN_SEMICOLON, lex)
in
(subtree, lex)
end
| (TOKEN_PRINT, _, _, _) :: lex =>
let
val lex = expect (TOKEN_LEFTPAREN, lex)
fun
loop_over_args (subtree : node_t,
lex : toktups_t) : (node_t, toktups_t) =
case+ lex of
| (TOKEN_STRING, arg, _, _) ::
(TOKEN_COMMA, _, _, _) :: lex =>
let
val leaf = node_t_leaf ("String", arg)
val e = node_t_cons ("Prts", leaf, node_t_nil ())
in
loop_over_args
(node_t_cons ("Sequence", subtree, e), lex)
end
| (TOKEN_STRING, arg, _, _) :: lex =>
let
val lex = expect (TOKEN_RIGHTPAREN, lex)
val lex = expect (TOKEN_SEMICOLON, lex)
val leaf = node_t_leaf ("String", arg)
val e = node_t_cons ("Prts", leaf, node_t_nil ())
in
(node_t_cons ("Sequence", subtree, e), lex)
end
| _ :: _ =>
let
val (x, lex) = expr (0, lex)
val e = node_t_cons ("Prti", x, node_t_nil ())
val subtree = node_t_cons ("Sequence", subtree, e)
in
case+ peek lex of
| TOKEN_COMMA =>
let
val lex = expect (TOKEN_COMMA, lex)
in
loop_over_args (subtree, lex)
end
| _ =>
let
val lex = expect (TOKEN_RIGHTPAREN, lex)
val lex = expect (TOKEN_SEMICOLON, lex)
in
(subtree, lex)
end
end
| NIL => $raise truncated_lexical ()
in
loop_over_args (node_t_nil (), lex)
end
| (TOKEN_SEMICOLON, _, _, _) :: lex => (node_t_nil (), lex)
| (TOKEN_IDENTIFIER, arg, _, _) :: lex =>
let
val v = node_t_leaf ("Identifier", arg)
val lex = expect (TOKEN_ASSIGN, lex)
val (subtree, lex) = expr (0, lex)
val t = node_t_cons ("Assign", v, subtree)
val lex = expect (TOKEN_SEMICOLON, lex)
in
(t, lex)
end
| (TOKEN_WHILE, _, _, _) :: lex =>
let
val (e, lex) = paren_expr lex
val (t, lex) = stmt lex
in
(node_t_cons ("While", e, t), lex)
end
| (TOKEN_LEFTBRACE, _, _, _) :: lex =>
let
fun
loop_over_stmts (subtree : node_t,
lex : toktups_t) :
(node_t, toktups_t) =
case+ lex of
| (TOKEN_RIGHTBRACE, _, _, _) :: lex => (subtree, lex)
| (TOKEN_END_OF_INPUT, _, line_no, column_no) :: _ =>
$raise unterminated_statement_block (line_no, column_no)
| _ =>
let
val (e, lex) = stmt lex
in
loop_over_stmts
(node_t_cons ("Sequence", subtree, e), lex)
end
in
loop_over_stmts (node_t_nil (), lex)
end
| (TOKEN_END_OF_INPUT, _, _, _) :: lex => (node_t_nil (), lex)
| toktup :: _ => $raise expected_a_statement (toktup)
and
expr (prec : int,
lex : toktups_t) : (node_t, toktups_t) =
case+ lex of
| (TOKEN_LEFTPAREN, _, _, _) :: _ =>
(* '(' expr ')' *)
let
val (subtree, lex) = paren_expr lex
in
prec_climb (prec, subtree, lex)
end
| (TOKEN_ADD, _, _, _) :: lex =>
(* '+' expr *)
let
val (subtree, lex) = expr (precedence TOKEN_ADD, lex)
in
prec_climb (prec, subtree, lex)
end
| (TOKEN_SUBTRACT, _, _, _) :: lex =>
(* '-' expr *)
let
val (subtree, lex) = expr (precedence TOKEN_NEGATE, lex)
val subtree = node_t_cons ("Negate", subtree, node_t_nil ())
in
prec_climb (prec, subtree, lex)
end
| (TOKEN_NOT, _, _, _) :: lex =>
(* '!' expr *)
let
val (subtree, lex) = expr (precedence TOKEN_NOT, lex)
val subtree = node_t_cons ("Not", subtree, node_t_nil ())
in
prec_climb (prec, subtree, lex)
end
| (TOKEN_IDENTIFIER, arg, _, _) :: lex =>
let
val leaf = node_t_leaf ("Identifier", arg)
in
prec_climb (prec, leaf, lex)
end
| (TOKEN_INTEGER, arg, _, _) :: lex =>
let
val leaf = node_t_leaf ("Integer", arg)
in
prec_climb (prec, leaf, lex)
end
| toktup :: lex =>
$raise unexpected_primary (toktup)
| NIL =>
$raise truncated_lexical ()
and
prec_climb (prec : int,
subtree : node_t,
lex : toktups_t) : (node_t, toktups_t) =
case+ peek lex of
| tokval =>
if ~binary_op tokval then
(subtree, lex)
else if precedence tokval < prec then
(subtree, lex)
else
case+ lex of
| toktup :: lex =>
let
val q =
if right_assoc (toktup.0) then
precedence tokval
else
succ (precedence tokval)
 
val (e, lex) = expr (q, lex)
val subtree1 =
node_t_cons (opname (toktup.0), subtree, e)
in
prec_climb (prec, subtree1, lex)
end
and
paren_expr (lex : toktups_t) : (node_t, toktups_t) =
(* '(' expr ')' *)
let
val lex = expect (TOKEN_LEFTPAREN, lex)
val (subtree, lex) = expr (0, lex)
val lex = expect (TOKEN_RIGHTPAREN, lex)
in
(subtree, lex)
end
 
fun
main_loop (subtree : node_t,
lex : toktups_t) : node_t =
case+ peek lex of
| TOKEN_END_OF_INPUT => subtree
| _ =>
let
val (x, lex) = stmt lex
in
main_loop (node_t_cons ("Sequence", subtree, x), lex)
end
in
main_loop (node_t_nil (), lex)
end
 
fn
print_ast (outf : FILEref,
ast : node_t) : void =
let
fun
traverse (ast : node_t) : void =
case+ ast of
| node_t_nil () => fprintln! (outf, ";")
| node_t_leaf (str, arg) => fprintln! (outf, str, " ", arg)
| node_t_cons (str, left, right) =>
begin
fprintln! (outf, str);
traverse left;
traverse right
end
in
traverse ast
end
 
(********************************************************************)
 
fn
main_program (inpf : FILEref,
outf : FILEref) : int =
let
val toklst = read_lex_file inpf
val ast = parse toklst
val () = print_ast (outf, ast)
in
0
end
 
fn
error_start (line_no : ullint,
column_no : ullint) : void =
print! ("(", line_no, ", ", column_no, ") error: ")
 
implement
main (argc, argv) =
let
val inpfname =
if 2 <= argc then
$UN.cast{string} argv[1]
else
"-"
val outfname =
if 3 <= argc then
$UN.cast{string} argv[2]
else
"-"
in
try
let
val inpf =
if (inpfname : string) = "-" then
stdin_ref
else
fileref_open_exn (inpfname, file_mode_r)
 
val outf =
if (outfname : string) = "-" then
stdout_ref
else
fileref_open_exn (outfname, file_mode_w)
in
main_program (inpf, outf)
end
with
| ~ unexpected_primary @(tok, _, line_no, column_no) =>
begin
error_start (line_no, column_no);
println! ("Expecting a primary, found: ", token_text tok);
1
end
| ~ unexpected_token (@(tok, _, line_no, column_no), expected) =>
begin
error_start (line_no, column_no);
println! ("Expecting '", token_text expected,
"', found '", token_text tok, "'");
1
end
| ~ expected_a_statement @(tok, _, line_no, column_no) =>
begin
error_start (line_no, column_no);
println! ("expecting start of statement, found '",
token_text tok, "'");
1
end
| ~ unterminated_statement_block (line_no, column_no) =>
begin
error_start (line_no, column_no);
println! ("unterminated statement block");
1
end
| ~ truncated_lexical () =>
begin
println! ("truncated input token stream");
2
end
| ~ bad_lex_integer (s) =>
begin
println! ("bad integer literal in the token stream: '",
s, "'");
2
end
| ~ bad_string_literal (s) =>
begin
println! ("bad string literal in the token stream: '",
s, "'");
2
end
| ~ bad_lex_token_name (s) =>
begin
println! ("bad token name in the token stream: '",
s, "'");
2
end
end
 
(********************************************************************)</syntaxhighlight>
 
 
{{out}}
<pre>$ patscc -O2 -DATS_MEMALLOC_GCBDW -o parse parse-in-ATS.dats -latslib -lgc && ./lex compiler-tests/primes.t | ./parse
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier count
Integer 1
Assign
Identifier n
Integer 1
Assign
Identifier limit
Integer 100
While
Less
Identifier n
Identifier limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier k
Integer 3
Assign
Identifier p
Integer 1
Assign
Identifier n
Add
Identifier n
Integer 2
While
And
LessEqual
Multiply
Identifier k
Identifier k
Identifier n
Identifier p
Sequence
Sequence
;
Assign
Identifier p
NotEqual
Multiply
Divide
Identifier n
Identifier k
Identifier k
Identifier n
Assign
Identifier k
Add
Identifier k
Integer 2
If
Identifier p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier n
;
Prts
String " is prime\n"
;
Assign
Identifier count
Add
Identifier count
Integer 1
;
Sequence
Sequence
Sequence
;
Prts
String "Total primes found: "
;
Prti
Identifier count
;
Prts
String "\n"
;</pre>
 
=={{header|AWK}}==
Tested with gawk 4.1.1 and mawk 1.3.4.
<syntaxhighlight lang="awk">
<lang AWK>
function Token_assign(tk, attr, attr_array, n, i) {
n=split(attr, attr_array)
Line 1,440 ⟶ 2,482:
prt_ast(t)
}
</syntaxhighlight>
</lang>
 
{{out|case=count}}
Line 1,481 ⟶ 2,523:
=={{header|C}}==
Tested with gcc 4.81 and later, compiles warning free with -Wall -Wextra
<langsyntaxhighlight Clang="c">#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Line 1,821 ⟶ 2,863:
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : "");
prt_ast(parse());
}</langsyntaxhighlight>
 
{{out|case=prime numbers AST}}
Line 1,927 ⟶ 2,969:
Code by Steve Williams. Tested with GnuCOBOL 2.2.
 
<langsyntaxhighlight lang="cobol"> >>SOURCE FORMAT IS FREE
identification division.
*> this code is dedicated to the public domain
Line 2,532 ⟶ 3,574:
.
end program printast.
end program parser.</langsyntaxhighlight>
 
{{out|case=Primes}}
Line 2,631 ⟶ 3,673:
String "\n"
;</pre>
 
=={{header|Common Lisp}}==
{{works with|SBCL|2.2.3}}
{{works with|roswell|21.10.14.111}}
{{libheader|cl-ppcre}}
{{libheader|trivia}}
{{trans|Icon}}
 
 
<syntaxhighlight lang="lisp">#!/bin/sh
#|-*- mode:lisp -*-|#
#|
exec ros -Q -- $0 "$@"
|#
(progn ;;init forms
(ros:ensure-asdf)
#+quicklisp(ql:quickload '() :silent t))
 
(defpackage :ros.script.parse.3859374047
(:use :cl))
(in-package :ros.script.parse.3859374047)
 
;;;
;;; The Rosetta Code Tiny-Language Parser, in Common Lisp.
;;;
 
(require "cl-ppcre")
(require "trivia")
 
(defstruct tokstruc line-no column-no tok tokval)
 
(defconstant re-blank-line
(ppcre:create-scanner "^\\s*$"))
 
(defconstant re-token-1
(ppcre:create-scanner
"^\\s*(\\d+)\\s+(\\d+)\\s+(\\S+)\\s*$"))
 
(defconstant re-token-2
(ppcre:create-scanner
"^\\s*(\\d+)\\s+(\\d+)\\s+(\\S+)\\s+(\\S(.*\\S)?)\\s*$"))
 
(defun string-to-tok (s)
(trivia:match s
("Keyword_else" 'TOK-ELSE)
("Keyword_if" 'TOK-IF)
("Keyword_print" 'TOK-PRINT)
("Keyword_putc" 'TOK-PUTC)
("Keyword_while" 'TOK-WHILE)
("Op_multiply" 'TOK-MULTIPLY)
("Op_divide" 'TOK-DIVIDE)
("Op_mod" 'TOK-MOD)
("Op_add" 'TOK-ADD)
("Op_subtract" 'TOK-SUBTRACT)
("Op_negate" 'TOK-NEGATE)
("Op_less" 'TOK-LESS)
("Op_lessequal" 'TOK-LESSEQUAL)
("Op_greater" 'TOK-GREATER)
("Op_greaterequal" 'TOK-GREATEREQUAL)
("Op_equal" 'TOK-EQUAL)
("Op_notequal" 'TOK-NOTEQUAL)
("Op_not" 'TOK-NOT)
("Op_assign" 'TOK-ASSIGN)
("Op_and" 'TOK-AND)
("Op_or" 'TOK-OR)
("LeftParen" 'TOK-LEFTPAREN)
("RightParen" 'TOK-RIGHTPAREN)
("LeftBrace" 'TOK-LEFTBRACE)
("RightBrace" 'TOK-RIGHTBRACE)
("Semicolon" 'TOK-SEMICOLON)
("Comma" 'TOK-COMMA)
("Identifier" 'TOK-IDENTIFIER)
("Integer" 'TOK-INTEGER)
("String" 'TOK-STRING)
("End_of_input" 'TOK-END-OF-INPUT)
(_ (warn "unparseable token line")
(uiop:quit 1))))
 
(defun precedence (tok)
(case tok
(TOK-MULTIPLY 13)
(TOK-DIVIDE 13)
(TOK-MOD 13)
(TOK-ADD 12)
(TOK-SUBTRACT 12)
(TOK-NEGATE 14)
(TOK-NOT 14)
(TOK-LESS 10)
(TOK-LESSEQUAL 10)
(TOK-GREATER 10)
(TOK-GREATEREQUAL 10)
(TOK-EQUAL 9)
(TOK-NOTEQUAL 9)
(TOK-AND 5)
(TOK-OR 4)
(otherwise -1)))
 
(defun binary-p (tok)
(case tok
(TOK-ADD t)
(TOK-SUBTRACT t)
(TOK-MULTIPLY t)
(TOK-DIVIDE t)
(TOK-MOD t)
(TOK-LESS t)
(TOK-LESSEQUAL t)
(TOK-GREATER t)
(TOK-GREATEREQUAL t)
(TOK-EQUAL t)
(TOK-NOTEQUAL t)
(TOK-AND t)
(TOK-OR t)
(otherwise nil)))
 
(defun right-associative-p (tok)
(declare (ignorable tok))
nil) ; None of the current operators is right associative.
 
(defun tok-text (tok)
(ecase tok
(TOK-ELSE "else")
(TOK-IF "if")
(TOK-PRINT "print")
(TOK-PUTC "putc")
(TOK-WHILE "while")
(TOK-MULTIPLY "*")
(TOK-DIVIDE "/")
(TOK-MOD "%")
(TOK-ADD "+")
(TOK-SUBTRACT "-")
(TOK-NEGATE "-")
(TOK-LESS "<")
(TOK-LESSEQUAL "<=")
(TOK-GREATER ">")
(TOK-GREATEREQUAL ">=")
(TOK-EQUAL "==")
(TOK-NOTEQUAL "!=")
(TOK-NOT "!")
(TOK-ASSIGN "=")
(TOK-AND "&&")
(TOK-OR "((")
(TOK-LEFTPAREN "(")
(TOK-RIGHTPAREN ")")
(TOK-LEFTBRACE "{")
(TOK-RIGHTBRACE "}")
(TOK-SEMICOLON ";")
(TOK-COMMA ",")
(TOK-IDENTIFIER "Ident")
(TOK-INTEGER "Integer literal")
(TOK-STRING "String literal")
(TOK-END_OF_INPUT "EOI")))
 
(defun operator (tok)
(ecase tok
(TOK-MULTIPLY "Multiply")
(TOK-DIVIDE "Divide")
(TOK-MOD "Mod")
(TOK-ADD "Add")
(TOK-SUBTRACT "Subtract")
(TOK-NEGATE "Negate")
(TOK-NOT "Not")
(TOK-LESS "Less")
(TOK-LESSEQUAL "LessEqual")
(TOK-GREATER "Greater")
(TOK-GREATEREQUAL "GreaterEqual")
(TOK-EQUAL "Equal")
(TOK-NOTEQUAL "NotEqual")
(TOK-AND "And")
(TOK-OR "Or")))
 
(defun join (&rest args)
(apply #'concatenate 'string args))
 
(defun nxt (gettok)
(funcall gettok :nxt))
 
(defun curr (gettok)
(funcall gettok :curr))
 
(defun err (token msg)
(format t "(~A, ~A) error: ~A~%"
(tokstruc-line-no token)
(tokstruc-column-no token)
msg)
(uiop:quit 1))
 
(defun prt-ast (outf ast)
;;
;; For fun, let us do prt-ast *non*-recursively, with a stack and a
;; loop.
;;
(let ((stack `(,ast)))
(loop while stack
do (let ((x (car stack)))
(setf stack (cdr stack))
(cond ((not x) (format outf ";~%"))
((or (string= (car x) "Identifier")
(string= (car x) "Integer")
(string= (car x) "String"))
(format outf "~A ~A~%" (car x) (cadr x)))
(t (format outf "~A~%" (car x))
(setf stack (cons (caddr x) stack))
(setf stack (cons (cadr x) stack))))))))
 
(defun accept (gettok tok)
(if (eq (tokstruc-tok (curr gettok)) tok)
(nxt gettok)
nil))
 
(defun expect (gettok msg tok)
(let ((curr-tok (tokstruc-tok (curr gettok))))
(if (eq curr-tok tok)
(nxt gettok)
(err (curr gettok)
(join msg ": Expecting '"
(tok-text tok) "', found '"
(tok-text curr-tok) "'")))))
 
(defun parse (gettok)
(defun paren-expr (gettok)
(expect gettok "paren_expr" 'TOK-LEFTPAREN)
(let ((x (expr gettok 0)))
(expect gettok "paren_expr" 'TOK-RIGHTPAREN)
x))
 
(defun expr (gettok p)
(let* ((tok (curr gettok))
(x (case (tokstruc-tok tok)
(TOK-LEFTPAREN (paren-expr gettok))
(TOK-SUBTRACT
(nxt gettok)
(let ((y (expr gettok (precedence 'TOK-NEGATE))))
`("Negate" ,y ())))
(TOK-ADD
(nxt gettok)
(expr gettok (precedence 'TOK-NEGATE)))
(TOK-NOT
(nxt gettok)
(let ((y (expr gettok (precedence 'TOK-NOT))))
`("Not" ,y ())))
(TOK-IDENTIFIER
(let ((y `("Identifier" ,(tokstruc-tokval tok))))
(nxt gettok)
y))
(TOK-INTEGER
(let ((y `("Integer" ,(tokstruc-tokval tok))))
(nxt gettok)
y))
(otherwise
(err tok (join "Expecting a primary, found: "
(tok-text (tokstruc-tok tok))))))))
;;
;; Precedence climbing for binary operators.
;;
(loop for tok = (curr gettok)
for toktok = (tokstruc-tok tok)
while (and (binary-p toktok) (<= p (precedence toktok)))
do (progn (nxt gettok)
(let ((q (if (right-associative-p toktok)
(precedence toktok)
(1+ (precedence toktok)))))
(setf x `(,(operator toktok) ,x
,(expr gettok q))))))
x))
 
(defun stmt (gettok)
(cond ((accept gettok 'TOK-IF)
(let* ((e (paren-expr gettok))
(s (stmt gettok))
(x (if (accept gettok 'TOK-ELSE)
`("If" ,s ,(stmt gettok))
`("If" ,s ()))))
`("If" ,e ,x)))
 
((accept gettok 'TOK-PUTC)
(let ((x `("Prtc" ,(paren-expr gettok) ())))
(expect gettok "Putc" 'TOK-SEMICOLON)
x))
 
((accept gettok 'TOK-PRINT)
(expect gettok "Print" 'TOK-LEFTPAREN)
(let ((x '()))
(loop for tok = (curr gettok)
for toktok = (tokstruc-tok tok)
for e = (if (eq toktok 'TOK-STRING)
(let* ((tokval (tokstruc-tokval tok))
(leaf `("String" ,tokval))
(e `("Prts" ,leaf ())))
(nxt gettok)
e)
`("Prti" ,(expr gettok 0) ()))
do (setf x `("Sequence" ,x ,e))
while (accept gettok 'TOK-COMMA))
(expect gettok "Print" 'TOK-RIGHTPAREN)
(expect gettok "Print" 'TOK-SEMICOLON)
x))
 
((eq (tokstruc-tok (curr gettok)) 'TOK-SEMICOLON)
(nxt gettok))
 
((eq (tokstruc-tok (curr gettok)) 'TOK-IDENTIFIER)
(let ((v `("Identifier" ,(tokstruc-tokval (curr gettok)))))
(nxt gettok)
(expect gettok "assign" 'TOK-ASSIGN)
(let ((x `("Assign" ,v ,(expr gettok 0))))
(expect gettok "assign" 'TOK-SEMICOLON)
x)))
 
((accept gettok 'TOK-WHILE)
(let ((e (paren-expr gettok)))
`("While" ,e ,(stmt gettok))))
 
((accept gettok 'TOK-LEFTBRACE)
(let ((x '()))
(loop for tok = (curr gettok)
for toktok = (tokstruc-tok tok)
until (or (eq toktok 'TOK-RIGHTBRACE)
(eq toktok 'TOK-END-OF-INPUT))
do (setf x `("Sequence" ,x ,(stmt gettok))))
(expect gettok "Lbrace" 'TOK-RIGHTBRACE)
x))
 
((eq (tokstruc-tok (curr gettok)) 'TOK-END-OF-INPUT)
'())
 
(t (let* ((tok (curr gettok))
(toktok (tokstruc-tok tok)))
(err tok (join "expecting start of statement, found '"
(tok-text toktok) "'"))))))
 
;;
;; Parsing of the top-level statement sequence.
;;
(let ((x '()))
(nxt gettok)
(loop do (setf x `("Sequence" ,x ,(stmt gettok)))
until (eq (tokstruc-tok (curr gettok)) 'TOK-END-OF-INPUT))
x))
 
(defun string-to-tokstruc (s)
(let ((strings
(nth-value 1 (ppcre:scan-to-strings re-token-1 s))))
(if strings
(make-tokstruc :line-no (elt strings 0)
:column-no (elt strings 1)
:tok (string-to-tok (elt strings 2))
:tokval nil)
(let ((strings
(nth-value 1 (ppcre:scan-to-strings re-token-2 s))))
(if strings
(make-tokstruc :line-no (elt strings 0)
:column-no (elt strings 1)
:tok (string-to-tok (elt strings 2))
:tokval (elt strings 3))
(progn
(warn "unparseable token line")
(uiop:quit 1)))))))
 
(defun read-token-line (inpf)
(loop for line = (read-line inpf nil "End_of_input")
while (ppcre:scan re-blank-line line)
finally (return line)))
 
(defun open-inpf (inpf-filename)
(if (string= inpf-filename "-")
*standard-input*
(open inpf-filename :direction :input)))
 
(defun open-outf (outf-filename)
(if (string= outf-filename "-")
*standard-output*
(open outf-filename :direction :output
:if-exists :overwrite
:if-does-not-exist :create)))
 
(defun usage-error ()
(princ "Usage: parse [INPUTFILE [OUTPUTFILE]]" *standard-output*)
(terpri *standard-output*)
(princ "If either INPUTFILE or OUTPUTFILE is \"-\", the respective"
*standard-output*)
(princ " standard I/O is used." *standard-output*)
(terpri *standard-output*)
(uiop:quit 1))
 
(defun get-filenames (argv)
(trivia:match argv
((list) '("-" "-"))
((list inpf-filename) `(,inpf-filename "-"))
((list inpf-filename outf-filename) `(,inpf-filename
,outf-filename))
(_ (usage-error))))
 
(defun main (&rest argv)
(let* ((filenames (get-filenames argv))
(inpf-filename (car filenames))
(inpf (open-inpf inpf-filename))
(outf-filename (cadr filenames))
(outf (open-outf outf-filename)))
 
(let* ((current-token (list nil))
(gettok-curr (lambda () (elt current-token 0)))
(gettok-nxt (lambda ()
(let* ((s (read-token-line inpf))
(tok (string-to-tokstruc s)))
(setf (elt current-token 0) tok)
tok)))
(gettok (lambda (instruction)
(trivia:match instruction
(:curr (funcall gettok-curr))
(:nxt (funcall gettok-nxt)))))
(ast (parse gettok)))
(prt-ast outf ast))
 
(unless (string= inpf-filename "-")
(close inpf))
(unless (string= outf-filename "-")
(close outf))
 
(uiop:quit 0)))
 
;;; vim: set ft=lisp lisp:</syntaxhighlight>
 
{{out}}
<pre>$ ./parse.ros compiler-tests/primes.lex
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier count
Integer 1
Assign
Identifier n
Integer 1
Assign
Identifier limit
Integer 100
While
Less
Identifier n
Identifier limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier k
Integer 3
Assign
Identifier p
Integer 1
Assign
Identifier n
Add
Identifier n
Integer 2
While
And
LessEqual
Multiply
Identifier k
Identifier k
Identifier n
Identifier p
Sequence
Sequence
;
Assign
Identifier p
NotEqual
Multiply
Divide
Identifier n
Identifier k
Identifier k
Identifier n
Assign
Identifier k
Add
Identifier k
Integer 2
If
Identifier p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier n
;
Prts
String " is prime\n"
;
Assign
Identifier count
Add
Identifier count
Integer 1
;
Sequence
Sequence
Sequence
;
Prts
String "Total primes found: "
;
Prti
Identifier count
;
Prts
String "\n"
;
quence
Sequence
Sequence
Sequence
;
Assign
Identifier x_x
Divide
Multiply
Identifier x
Identifier x
Integer 200
Assign
Identifier y_y
Divide
Multiply
Identifier y
Identifier y
Integer 200
If
Greater
Add
Identifier x_x
Identifier y_y
Integer 800
If
Sequence
Sequence
Sequence
;
Assign
Identifier the_char
Add
Integer 48
Identifier i
If
Greater
Identifier i
Integer 9
If
Sequence
;
Assign
Identifier the_char
Integer 64
;
Assign
Identifier i
Identifier max_iter
;
Assign
Identifier y
Add
Divide
Multiply
Identifier x
Identifier y
Integer 100
Identifier y0
Assign
Identifier x
Add
Subtract
Identifier x_x
Identifier y_y
Identifier x0
Assign
Identifier i
Add
Identifier i
Integer 1
Prtc
Identifier the_char
;
Assign
Identifier x0
Add
Identifier x0
Identifier x_step
Prtc
Integer 10
;
Assign
Identifier y0
Subtract
Identifier y0
Identifier y_step</pre>
 
 
=={{header|Forth}}==
Tested with Gforth 0.7.3.
<langsyntaxhighlight Forthlang="forth">CREATE BUF 0 , \ single-character look-ahead buffer
: PEEK BUF @ 0= IF KEY BUF ! THEN BUF @ ;
: GETC PEEK 0 BUF ! ;
Line 2,805 ⟶ 4,454:
: -EOI? TOKEN-TYPE End_of_input <> ;
: PARSE $NULL GETTOK BEGIN -EOI? WHILE STMT $SEQUENCE REPEAT ;
PARSE .NODE</langsyntaxhighlight>
 
{{out|case=Count AST}}
Line 2,844 ⟶ 4,493:
</b>
 
=={{header|Fortran}}==
{{works with|gfortran|11.2.1}}
The following is Fortran 2008/2018 code with C preprocessing directives. If you call the program source ‘parse.F90’, with a capital ‘F’, then gfortran will know to run the C preprocessor.
<syntaxhighlight lang="fortran">!!!
!!! An implementation of the Rosetta Code parser task:
!!! https://rosettacode.org/wiki/Compiler/syntax_analyzer
!!!
!!! The implementation is based on the published pseudocode.
!!!
 
module compiler_type_kinds
use, intrinsic :: iso_fortran_env, only: int32
use, intrinsic :: iso_fortran_env, only: int64
 
implicit none
private
 
! Synonyms.
integer, parameter, public :: size_kind = int64
integer, parameter, public :: length_kind = size_kind
integer, parameter, public :: nk = size_kind
 
! Synonyms for character capable of storing a Unicode code point.
integer, parameter, public :: unicode_char_kind = selected_char_kind ('ISO_10646')
integer, parameter, public :: ck = unicode_char_kind
 
! Synonyms for integers capable of storing a Unicode code point.
integer, parameter, public :: unicode_ichar_kind = int32
integer, parameter, public :: ick = unicode_ichar_kind
end module compiler_type_kinds
 
module string_buffers
use, intrinsic :: iso_fortran_env, only: error_unit
use, intrinsic :: iso_fortran_env, only: int64
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
 
implicit none
private
 
public :: strbuf_t
 
type :: strbuf_t
integer(kind = nk), private :: len = 0
!
! ‘chars’ is made public for efficient access to the individual
! characters.
!
character(1, kind = ck), allocatable, public :: chars(:)
contains
procedure, pass, private :: ensure_storage => strbuf_t_ensure_storage
procedure, pass :: to_unicode_full_string => strbuf_t_to_unicode_full_string
procedure, pass :: to_unicode_substring => strbuf_t_to_unicode_substring
procedure, pass :: length => strbuf_t_length
procedure, pass :: set => strbuf_t_set
procedure, pass :: append => strbuf_t_append
generic :: to_unicode => to_unicode_full_string
generic :: to_unicode => to_unicode_substring
generic :: assignment(=) => set
end type strbuf_t
 
contains
 
function strbuf_t_to_unicode_full_string (strbuf) result (s)
class(strbuf_t), intent(in) :: strbuf
character(:, kind = ck), allocatable :: s
 
!
! This does not actually ensure that the string is valid Unicode;
! any 31-bit ‘character’ is supported.
!
 
integer(kind = nk) :: i
 
allocate (character(len = strbuf%len, kind = ck) :: s)
do i = 1, strbuf%len
s(i:i) = strbuf%chars(i)
end do
end function strbuf_t_to_unicode_full_string
 
function strbuf_t_to_unicode_substring (strbuf, i, j) result (s)
!
! ‘Extreme’ values of i and j are allowed, as shortcuts for ‘from
! the beginning’, ‘up to the end’, or ‘empty substring’.
!
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
character(:, kind = ck), allocatable :: s
 
!
! This does not actually ensure that the string is valid Unicode;
! any 31-bit ‘character’ is supported.
!
 
integer(kind = nk) :: i1, j1
integer(kind = nk) :: n
integer(kind = nk) :: k
 
i1 = max (1_nk, i)
j1 = min (strbuf%len, j)
n = max (0_nk, (j1 - i1) + 1_nk)
 
allocate (character(n, kind = ck) :: s)
do k = 1, n
s(k:k) = strbuf%chars(i1 + (k - 1_nk))
end do
end function strbuf_t_to_unicode_substring
 
elemental function strbuf_t_length (strbuf) result (n)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk) :: n
 
n = strbuf%len
end function strbuf_t_length
 
elemental function next_power_of_two (x) result (y)
integer(kind = nk), intent(in) :: x
integer(kind = nk) :: y
 
!
! It is assumed that no more than 64 bits are used.
!
! The branch-free algorithm is that of
! https://archive.is/nKxAc#RoundUpPowerOf2
!
! Fill in bits until one less than the desired power of two is
! reached, and then add one.
!
 
y = x - 1
y = ior (y, ishft (y, -1))
y = ior (y, ishft (y, -2))
y = ior (y, ishft (y, -4))
y = ior (y, ishft (y, -8))
y = ior (y, ishft (y, -16))
y = ior (y, ishft (y, -32))
y = y + 1
end function next_power_of_two
 
elemental function new_storage_size (length_needed) result (size)
integer(kind = nk), intent(in) :: length_needed
integer(kind = nk) :: size
 
! Increase storage by orders of magnitude.
 
if (2_nk**32 < length_needed) then
size = huge (1_nk)
else
size = next_power_of_two (length_needed)
end if
end function new_storage_size
 
subroutine strbuf_t_ensure_storage (strbuf, length_needed)
class(strbuf_t), intent(inout) :: strbuf
integer(kind = nk), intent(in) :: length_needed
 
integer(kind = nk) :: new_size
type(strbuf_t) :: new_strbuf
 
if (.not. allocated (strbuf%chars)) then
! Initialize a new strbuf%chars array.
new_size = new_storage_size (length_needed)
allocate (strbuf%chars(1:new_size))
else if (ubound (strbuf%chars, 1) < length_needed) then
! Allocate a new strbuf%chars array, larger than the current
! one, but containing the same characters.
new_size = new_storage_size (length_needed)
allocate (new_strbuf%chars(1:new_size))
new_strbuf%chars(1:strbuf%len) = strbuf%chars(1:strbuf%len)
call move_alloc (new_strbuf%chars, strbuf%chars)
end if
end subroutine strbuf_t_ensure_storage
 
subroutine strbuf_t_set (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src
 
integer(kind = nk) :: n
integer(kind = nk) :: i
 
select type (src)
type is (character(*, kind = ck))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
type is (character(*))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n = src%len
call dst%ensure_storage(n)
dst%chars(1:n) = src%chars(1:n)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_set
 
subroutine strbuf_t_append (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src
 
integer(kind = nk) :: n_dst, n_src, n
integer(kind = nk) :: i
 
select type (src)
type is (character(*, kind = ck))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
type is (character(*))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n_dst = dst%len
n_src = src%len
n = n_dst + n_src
call dst%ensure_storage(n)
dst%chars((n_dst + 1):n) = src%chars(1:n_src)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_append
 
end module string_buffers
 
module reading_one_line_from_a_stream
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: string_buffers
 
implicit none
private
 
! get_line_from_stream: read an entire input line from a stream into
! a strbuf_t.
public :: get_line_from_stream
 
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
 
! The following is correct for Unix and its relatives.
character(1, kind = ck), parameter :: newline_char = linefeed_char
 
contains
 
subroutine get_line_from_stream (unit_no, eof, no_newline, strbuf)
integer, intent(in) :: unit_no
logical, intent(out) :: eof ! End of file?
logical, intent(out) :: no_newline ! There is a line but it has no
! newline? (Thus eof also must
! be .true.)
class(strbuf_t), intent(inout) :: strbuf
 
character(1, kind = ck) :: ch
 
strbuf = ''
call get_ch (unit_no, eof, ch)
do while (.not. eof .and. ch /= newline_char)
call strbuf%append (ch)
call get_ch (unit_no, eof, ch)
end do
no_newline = eof .and. (strbuf%length() /= 0)
end subroutine get_line_from_stream
 
subroutine get_ch (unit_no, eof, ch)
!
! Read a single code point from the stream.
!
! Currently this procedure simply inputs ‘ASCII’ bytes rather than
! Unicode code points.
!
integer, intent(in) :: unit_no
logical, intent(out) :: eof
character(1, kind = ck), intent(out) :: ch
 
integer :: stat
character(1) :: c = '*'
 
eof = .false.
 
if (unit_no == input_unit) then
call get_input_unit_char (c, stat)
else
read (unit = unit_no, iostat = stat) c
end if
 
if (stat < 0) then
ch = ck_'*'
eof = .true.
else if (0 < stat) then
write (error_unit, '("Input error with status code ", I0)') stat
stop 1
else
ch = char (ichar (c, kind = ick), kind = ck)
end if
end subroutine get_ch
 
!!!
!!! If you tell gfortran you want -std=f2008 or -std=f2018, you likely
!!! will need to add also -fall-intrinsics or -U__GFORTRAN__
!!!
!!! The first way, you get the FGETC intrinsic. The latter way, you
!!! get the C interface code that uses getchar(3).
!!!
#ifdef __GFORTRAN__
 
subroutine get_input_unit_char (c, stat)
!
! The following works if you are using gfortran.
!
! (FGETC is considered a feature for backwards compatibility with
! g77. However, I know of no way to reconfigure input_unit as a
! Fortran 2003 stream, for use with ordinary ‘read’.)
!
character, intent(inout) :: c
integer, intent(out) :: stat
 
call fgetc (input_unit, c, stat)
end subroutine get_input_unit_char
 
#else
 
subroutine get_input_unit_char (c, stat)
!
! An alternative implementation of get_input_unit_char. This
! actually reads input from the C standard input, which might not
! be the same as input_unit.
!
use, intrinsic :: iso_c_binding, only: c_int
character, intent(inout) :: c
integer, intent(out) :: stat
 
interface
!
! Use getchar(3) to read characters from standard input. This
! assumes there is actually such a function available, and that
! getchar(3) does not exist solely as a macro. (One could write
! one’s own getchar() if necessary, of course.)
!
function getchar () result (c) bind (c, name = 'getchar')
use, intrinsic :: iso_c_binding, only: c_int
integer(kind = c_int) :: c
end function getchar
end interface
 
integer(kind = c_int) :: i_char
 
i_char = getchar ()
!
! The C standard requires that EOF have a negative value. If the
! value returned by getchar(3) is not EOF, then it will be
! representable as an unsigned char. Therefore, to check for end
! of file, one need only test whether i_char is negative.
!
if (i_char < 0) then
stat = -1
else
stat = 0
c = char (i_char)
end if
end subroutine get_input_unit_char
 
#endif
 
end module reading_one_line_from_a_stream
 
module lexer_token_facts
implicit none
private
 
integer, parameter, public :: tk_EOI = 0
integer, parameter, public :: tk_Mul = 1
integer, parameter, public :: tk_Div = 2
integer, parameter, public :: tk_Mod = 3
integer, parameter, public :: tk_Add = 4
integer, parameter, public :: tk_Sub = 5
integer, parameter, public :: tk_Negate = 6
integer, parameter, public :: tk_Not = 7
integer, parameter, public :: tk_Lss = 8
integer, parameter, public :: tk_Leq = 9
integer, parameter, public :: tk_Gtr = 10
integer, parameter, public :: tk_Geq = 11
integer, parameter, public :: tk_Eq = 12
integer, parameter, public :: tk_Neq = 13
integer, parameter, public :: tk_Assign = 14
integer, parameter, public :: tk_And = 15
integer, parameter, public :: tk_Or = 16
integer, parameter, public :: tk_If = 17
integer, parameter, public :: tk_Else = 18
integer, parameter, public :: tk_While = 19
integer, parameter, public :: tk_Print = 20
integer, parameter, public :: tk_Putc = 21
integer, parameter, public :: tk_Lparen = 22
integer, parameter, public :: tk_Rparen = 23
integer, parameter, public :: tk_Lbrace = 24
integer, parameter, public :: tk_Rbrace = 25
integer, parameter, public :: tk_Semi = 26
integer, parameter, public :: tk_Comma = 27
integer, parameter, public :: tk_Ident = 28
integer, parameter, public :: tk_Integer = 29
integer, parameter, public :: tk_String = 30
integer, parameter, public :: tk_Positive = 31
 
character(16), parameter, public :: lexer_token_string(0:31) = &
(/ "EOI ", &
& "* ", &
& "/ ", &
& "% ", &
& "+ ", &
& "- ", &
& "- ", &
& "! ", &
& "< ", &
& "<= ", &
& "> ", &
& ">= ", &
& "== ", &
& "!= ", &
& "= ", &
& "&& ", &
& "|| ", &
& "if ", &
& "else ", &
& "while ", &
& "print ", &
& "putc ", &
& "( ", &
& ") ", &
& "{ ", &
& "} ", &
& "; ", &
& ", ", &
& "Ident ", &
& "Integer literal ", &
& "String literal ", &
& "+ " /)
 
integer, parameter, public :: lexer_token_arity(0:31) = &
& (/ -1, & ! EOI
& 2, 2, 2, 2, 2, & ! * / % + -
& 1, 1, & ! negate !
& 2, 2, 2, 2, 2, 2, & ! < <= > >= == !=
& -1, & ! =
& 2, 2, & ! && ||
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, & !
& 1 /) ! positive
 
integer, parameter, public :: lexer_token_precedence(0:31) = &
& (/ -1, & ! EOI
& 13, 13, 13, & ! * / %
& 12, 12, & ! + -
& 14, 14, & ! negate !
& 10, 10, 10, 10, & ! < <= > >=
& 9, 9, & ! == !=
& -1, & ! =
& 5, & ! &&
& 4, & ! ||
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, & !
& 14 /) ! positive
 
integer, parameter, public :: left_associative = 0
integer, parameter, public :: right_associative = 1
 
! All current operators are left associative. (The values in the
! array for things that are not operators are unimportant.)
integer, parameter, public :: lexer_token_associativity(0:31) = left_associative
 
end module lexer_token_facts
 
module reading_of_lexer_tokens
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: string_buffers
use, non_intrinsic :: reading_one_line_from_a_stream
use, non_intrinsic :: lexer_token_facts
 
implicit none
private
 
public :: lexer_token_t
public :: get_lexer_token
 
character(1, kind = ck), parameter :: horizontal_tab_char = char (9, kind = ck)
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
character(1, kind = ck), parameter :: vertical_tab_char = char (11, kind = ck)
character(1, kind = ck), parameter :: formfeed_char = char (12, kind = ck)
character(1, kind = ck), parameter :: carriage_return_char = char (13, kind = ck)
character(1, kind = ck), parameter :: space_char = ck_' '
 
type :: lexer_token_t
integer :: token_no = -(huge (1))
character(:, kind = ck), allocatable :: val
integer(nk) :: line_no = -(huge (1_nk))
integer(nk) :: column_no = -(huge (1_nk))
end type lexer_token_t
 
contains
 
subroutine get_lexer_token (unit_no, lex_line_no, eof, token)
!
! Lines that are empty or contain only whitespace are tolerated.
!
! Also tolerated are comment lines, whose first character is a
! '!'. It is convenient for debugging to be able to comment out
! lines.
!
! A last line be without a newline is *not* tolerated, unless it
! contains only whitespace.
!
! Letting there be some whitespace is partly for the sake of
! reading cut-and-paste from a browser display.
!
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
logical, intent(out) :: eof
type(lexer_token_t), intent(out) :: token
 
type(strbuf_t) :: strbuf
logical :: no_newline
logical :: input_found
 
! Let a negative setting initialize the line number.
lex_line_no = max (0_nk, lex_line_no)
 
strbuf = ''
eof = .false.
input_found = .false.
do while (.not. eof .and. .not. input_found)
call get_line_from_stream (unit_no, eof, no_newline, strbuf)
if (eof) then
if (no_newline) then
lex_line_no = lex_line_no + 1
if (.not. strbuf_is_all_whitespace (strbuf)) then
call start_error_message (lex_line_no)
write (error_unit, '("lexer line ends without a newline")')
stop 1
end if
end if
else
lex_line_no = lex_line_no + 1
input_found = .true.
if (strbuf_is_all_whitespace (strbuf)) then
! A blank line.
input_found = .false.
else if (0 < strbuf%length()) then
if (strbuf%chars(1) == ck_'!') then
! A comment line.
input_found = .false.
end if
end if
end if
end do
 
token = lexer_token_t ()
if (.not. eof) then
token = strbuf_to_token (lex_line_no, strbuf)
end if
end subroutine get_lexer_token
 
function strbuf_to_token (lex_line_no, strbuf) result (token)
integer(kind = nk), intent(in) :: lex_line_no
class(strbuf_t), intent(in) :: strbuf
type(lexer_token_t) :: token
 
character(:, kind = ck), allocatable :: line_no
character(:, kind = ck), allocatable :: column_no
character(:, kind = ck), allocatable :: token_name
character(:, kind = ck), allocatable :: val_string
integer :: stat
integer(kind = nk) :: n
 
call split_line (lex_line_no, strbuf, line_no, column_no, token_name, val_string)
 
read (line_no, *, iostat = stat) token%line_no
if (stat /= 0) then
call start_error_message (lex_line_no)
write (error_unit, '("line number field is unreadable or too large")')
stop 1
end if
 
read (column_no, *, iostat = stat) token%column_no
if (stat /= 0) then
call start_error_message (lex_line_no)
write (error_unit, '("column number field is unreadable or too large")')
stop 1
end if
 
token%token_no = token_name_to_token_no (lex_line_no, token_name)
 
select case (token%token_no)
case (tk_Ident)
! I do no checking of identifier names.
allocate (token%val, source = val_string)
case (tk_Integer)
call check_is_all_digits (lex_line_no, val_string)
allocate (token%val, source = val_string)
case (tk_String)
n = len (val_string, kind = nk)
if (n < 2) then
call string_literal_missing_or_no_good
else if (val_string(1:1) /= ck_'"' .or. val_string(n:n) /= ck_'"') then
call string_literal_missing_or_no_good
else
allocate (token%val, source = val_string)
end if
case default
if (len (val_string, kind = nk) /= 0) then
call start_error_message (lex_line_no)
write (error_unit, '("token should not have a value")')
stop 1
end if
end select
 
contains
 
subroutine string_literal_missing_or_no_good
call start_error_message (lex_line_no)
write (error_unit, '("""String"" token requires a string literal")')
stop 1
end subroutine string_literal_missing_or_no_good
 
end function strbuf_to_token
 
subroutine split_line (lex_line_no, strbuf, line_no, column_no, token_name, val_string)
integer(kind = nk), intent(in) :: lex_line_no
class(strbuf_t), intent(in) :: strbuf
character(:, kind = ck), allocatable, intent(out) :: line_no
character(:, kind = ck), allocatable, intent(out) :: column_no
character(:, kind = ck), allocatable, intent(out) :: token_name
character(:, kind = ck), allocatable, intent(out) :: val_string
 
integer(kind = nk) :: i, j
 
i = skip_whitespace (strbuf, 1_nk)
j = skip_non_whitespace (strbuf, i)
line_no = strbuf%to_unicode(i, j - 1)
call check_is_all_digits (lex_line_no, line_no)
 
i = skip_whitespace (strbuf, j)
j = skip_non_whitespace (strbuf, i)
column_no = strbuf%to_unicode(i, j - 1)
call check_is_all_digits (lex_line_no, column_no)
 
i = skip_whitespace (strbuf, j)
j = skip_non_whitespace (strbuf, i)
token_name = strbuf%to_unicode(i, j - 1)
 
i = skip_whitespace (strbuf, j)
if (strbuf%length() < i) then
val_string = ck_''
else if (strbuf%chars(i) == ck_'"') then
j = skip_whitespace_backwards (strbuf, strbuf%length())
if (strbuf%chars(j) == ck_'"') then
val_string = strbuf%to_unicode(i, j)
else
call start_error_message (lex_line_no)
write (error_unit, '("string literal does not end in a double quote")')
stop 1
end if
else
j = skip_non_whitespace (strbuf, i)
val_string = strbuf%to_unicode(i, j - 1)
i = skip_whitespace (strbuf, j)
if (i <= strbuf%length()) then
call start_error_message (lex_line_no)
write (error_unit, '("token line contains unexpected text")')
stop 1
end if
end if
end subroutine split_line
 
function token_name_to_token_no (lex_line_no, token_name) result (token_no)
integer(kind = nk), intent(in) :: lex_line_no
character(*, kind = ck), intent(in) :: token_name
integer :: token_no
 
!!
!! This implementation is not optimized in any way, unless the
!! Fortran compiler can optimize the SELECT CASE.
!!
 
select case (token_name)
case (ck_"End_of_input")
token_no = tk_EOI
case (ck_"Op_multiply")
token_no = tk_Mul
case (ck_"Op_divide")
token_no = tk_Div
case (ck_"Op_mod")
token_no = tk_Mod
case (ck_"Op_add")
token_no = tk_Add
case (ck_"Op_subtract")
token_no = tk_Sub
case (ck_"Op_negate")
token_no = tk_Negate
case (ck_"Op_not")
token_no = tk_Not
case (ck_"Op_less")
token_no = tk_Lss
case (ck_"Op_lessequal ")
token_no = tk_Leq
case (ck_"Op_greater")
token_no = tk_Gtr
case (ck_"Op_greaterequal")
token_no = tk_Geq
case (ck_"Op_equal")
token_no = tk_Eq
case (ck_"Op_notequal")
token_no = tk_Neq
case (ck_"Op_assign")
token_no = tk_Assign
case (ck_"Op_and")
token_no = tk_And
case (ck_"Op_or")
token_no = tk_Or
case (ck_"Keyword_if")
token_no = tk_If
case (ck_"Keyword_else")
token_no = tk_Else
case (ck_"Keyword_while")
token_no = tk_While
case (ck_"Keyword_print")
token_no = tk_Print
case (ck_"Keyword_putc")
token_no = tk_Putc
case (ck_"LeftParen")
token_no = tk_Lparen
case (ck_"RightParen")
token_no = tk_Rparen
case (ck_"LeftBrace")
token_no = tk_Lbrace
case (ck_"RightBrace")
token_no = tk_Rbrace
case (ck_"Semicolon")
token_no = tk_Semi
case (ck_"Comma")
token_no = tk_Comma
case (ck_"Identifier")
token_no = tk_Ident
case (ck_"Integer")
token_no = tk_Integer
case (ck_"String")
token_no = tk_String
case default
call start_error_message (lex_line_no)
write (error_unit, '("unrecognized token name: ", A)') token_name
stop 1
end select
end function token_name_to_token_no
 
function skip_whitespace (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
 
logical :: done
 
j = i
done = .false.
do while (.not. done)
if (at_end_of_line (strbuf, j)) then
done = .true.
else if (.not. isspace (strbuf%chars(j))) then
done = .true.
else
j = j + 1
end if
end do
end function skip_whitespace
 
function skip_non_whitespace (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
 
logical :: done
 
j = i
done = .false.
do while (.not. done)
if (at_end_of_line (strbuf, j)) then
done = .true.
else if (isspace (strbuf%chars(j))) then
done = .true.
else
j = j + 1
end if
end do
end function skip_non_whitespace
 
function skip_whitespace_backwards (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
 
logical :: done
 
j = i
done = .false.
do while (.not. done)
if (j == -1) then
done = .true.
else if (.not. isspace (strbuf%chars(j))) then
done = .true.
else
j = j - 1
end if
end do
end function skip_whitespace_backwards
 
function at_end_of_line (strbuf, i) result (bool)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
logical :: bool
 
bool = (strbuf%length() < i)
end function at_end_of_line
 
elemental function strbuf_is_all_whitespace (strbuf) result (bool)
class(strbuf_t), intent(in) :: strbuf
logical :: bool
 
integer(kind = nk) :: n
integer(kind = nk) :: i
 
n = strbuf%length()
if (n == 0) then
bool = .true.
else
i = 1
bool = .true.
do while (bool .and. i /= n + 1)
bool = isspace (strbuf%chars(i))
i = i + 1
end do
end if
end function strbuf_is_all_whitespace
 
elemental function isspace (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool
 
bool = (ch == horizontal_tab_char) .or. &
& (ch == linefeed_char) .or. &
& (ch == vertical_tab_char) .or. &
& (ch == formfeed_char) .or. &
& (ch == carriage_return_char) .or. &
& (ch == space_char)
end function isspace
 
elemental function isdigit (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool
 
integer(kind = ick), parameter :: zero = ichar (ck_'0', kind = ick)
integer(kind = ick), parameter :: nine = ichar (ck_'9', kind = ick)
 
integer(kind = ick) :: i_ch
 
i_ch = ichar (ch, kind = ick)
bool = (zero <= i_ch .and. i_ch <= nine)
end function isdigit
 
subroutine check_is_all_digits (lex_line_no, str)
integer(kind = nk), intent(in) :: lex_line_no
character(*, kind = ck), intent(in) :: str
 
integer(kind = nk) :: n
integer(kind = nk) :: i
 
n = len (str, kind = nk)
if (n == 0_nk) then
call start_error_message (lex_line_no)
write (error_unit, '("a required field is missing")')
stop 1
else
do i = 1, n
if (.not. isdigit (str(i:i))) then
call start_error_message (lex_line_no)
write (error_unit, '("a numeric field contains a non-digit")')
stop 1
end if
end do
end if
end subroutine check_is_all_digits
 
subroutine start_error_message (lex_line_no)
integer(kind = nk), intent(in) :: lex_line_no
 
write (error_unit, '("Token stream error at line ", I0, ": ")', advance = 'no') &
& lex_line_no
end subroutine start_error_message
 
end module reading_of_lexer_tokens
 
module syntactic_analysis
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: string_buffers
use, non_intrinsic :: lexer_token_facts
use, non_intrinsic :: reading_of_lexer_tokens
 
implicit none
private
 
public :: ast_node_t
public :: ast_t
public :: parse_token_stream
public :: output_ast_flattened
 
integer, parameter, public :: tk_start_of_statement = -1
integer, parameter, public :: tk_primary = -2
 
integer, parameter :: node_Identifier = 1
integer, parameter :: node_String = 2
integer, parameter :: node_Integer = 3
integer, parameter :: node_Sequence = 4
integer, parameter :: node_If = 5
integer, parameter :: node_Prtc = 6
integer, parameter :: node_Prts = 7
integer, parameter :: node_Prti = 8
integer, parameter :: node_While = 9
integer, parameter :: node_Assign = 10
integer, parameter :: node_Negate = 11
integer, parameter :: node_Not = 12
integer, parameter :: node_Multiply = 13
integer, parameter :: node_Divide = 14
integer, parameter :: node_Mod = 15
integer, parameter :: node_Add = 16
integer, parameter :: node_Subtract = 17
integer, parameter :: node_Less = 18
integer, parameter :: node_LessEqual = 19
integer, parameter :: node_Greater = 20
integer, parameter :: node_GreaterEqual = 21
integer, parameter :: node_Equal = 22
integer, parameter :: node_NotEqual = 23
integer, parameter :: node_And = 24
integer, parameter :: node_Or = 25
 
character(16), parameter :: node_variety_string(1:25) = &
(/ "Identifier ", &
& "String ", &
& "Integer ", &
& "Sequence ", &
& "If ", &
& "Prtc ", &
& "Prts ", &
& "Prti ", &
& "While ", &
& "Assign ", &
& "Negate ", &
& "Not ", &
& "Multiply ", &
& "Divide ", &
& "Mod ", &
& "Add ", &
& "Subtract ", &
& "Less ", &
& "LessEqual ", &
& "Greater ", &
& "GreaterEqual ", &
& "Equal ", &
& "NotEqual ", &
& "And ", &
& "Or " /)
 
type :: ast_node_t
integer :: node_variety
character(:, kind = ck), allocatable :: val
type(ast_node_t), pointer :: left => null ()
type(ast_node_t), pointer :: right => null ()
contains
procedure, pass :: assign => ast_node_t_assign
generic :: assignment(=) => assign
final :: ast_node_t_finalize
end type ast_node_t
 
! ast_t phases.
integer, parameter :: building = 1
integer, parameter :: completed = 2
 
type :: ast_t
!
! This type is used to build the subtrees, as well as for the
! completed AST. The difference is in the setting of ‘phase’.
!
type(ast_node_t), pointer :: node => null ()
integer, private :: phase = building
contains
procedure, pass :: assign => ast_t_assign
generic :: assignment(=) => assign
final :: ast_t_finalize
end type ast_t
 
type(ast_t), parameter :: ast_nil = ast_t (null ())
 
contains
 
recursive subroutine ast_node_t_assign (node, other)
class(ast_node_t), intent(out) :: node
class(*), intent(in) :: other
 
select type (other)
class is (ast_node_t)
node%node_variety = other%node_variety
if (allocated (other%val)) allocate (node%val, source = other%val)
if (associated (other%left)) allocate (node%left, source = other%left)
if (associated (other%right)) allocate (node%right, source = other%right)
class default
! This branch should never be reached.
error stop
end select
end subroutine ast_node_t_assign
 
recursive subroutine ast_node_t_finalize (node)
type(ast_node_t), intent(inout) :: node
 
if (associated (node%left)) deallocate (node%left)
if (associated (node%right)) deallocate (node%right)
end subroutine ast_node_t_finalize
 
recursive subroutine ast_t_assign (ast, other)
class(ast_t), intent(out) :: ast
class(*), intent(in) :: other
 
select type (other)
class is (ast_t)
if (associated (other%node)) allocate (ast%node, source = other%node)
!
! Whether it is better to set phase to ‘building’ or to set it
! to ‘other%phase’ is unclear to me. Probably ‘building’ is the
! better choice. Which variable controls memory recovery is
! clear and unchanging, in that case: it is the original,
! ‘other’, that does.
!
ast%phase = building
class default
! This should not happen.
error stop
end select
end subroutine ast_t_assign
 
subroutine ast_t_finalize (ast)
type(ast_t), intent(inout) :: ast
 
!
! When we are building the tree, the tree’s nodes should not be
! deallocated when the ast_t variable temporarily holding them
! goes out of scope.
!
! However, once the AST is completed, we do want the memory
! recovered when the variable goes out of scope.
!
! (Elsewhere I have written a primitive garbage collector for
! Fortran programs, but in this case it would be a lot of overhead
! for little gain. In fact, we could reasonably just let the
! memory leak, in this program.
!
! Fortran runtimes *are* allowed by the standard to have garbage
! collectors built in. To my knowledge, at the time of this
! writing, only NAG Fortran has a garbage collector option.)
!
 
if (ast%phase == completed) then
if (associated (ast%node)) deallocate (ast%node)
end if
end subroutine ast_t_finalize
 
function parse_token_stream (unit_no) result (ast)
integer, intent(in) :: unit_no
type(ast_t) :: ast
 
integer(kind = nk) :: lex_line_no
type(ast_t) :: statement
type(lexer_token_t) :: token
 
lex_line_no = -1_nk
call get_token (unit_no, lex_line_no, token)
call parse_statement (unit_no, lex_line_no, token, statement)
ast = make_internal_node (node_Sequence, ast, statement)
do while (token%token_no /= tk_EOI)
call parse_statement (unit_no, lex_line_no, token, statement)
ast = make_internal_node (node_Sequence, ast, statement)
end do
ast%phase = completed
end function parse_token_stream
 
recursive subroutine parse_statement (unit_no, lex_line_no, token, ast)
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(inout) :: token
type(ast_t), intent(out) :: ast
 
ast = ast_nil
 
select case (token%token_no)
case (tk_If)
call parse_ifelse_construct
case (tk_Putc)
call parse_putc
case (tk_Print)
call parse_print
case (tk_Semi)
call get_token (unit_no, lex_line_no, token)
case (tk_Ident)
call parse_identifier
case (tk_While)
call parse_while_construct
case (tk_Lbrace)
call parse_lbrace_construct
case (tk_EOI)
continue
case default
call syntax_error_message ("", tk_start_of_statement, token)
stop 1
end select
 
contains
 
recursive subroutine parse_ifelse_construct
type(ast_t) :: predicate
type(ast_t) :: statement_for_predicate_true
type(ast_t) :: statement_for_predicate_false
 
call expect_token ("If", tk_If, token)
call get_token (unit_no, lex_line_no, token)
call parse_parenthesized_expression (unit_no, lex_line_no, token, predicate)
call parse_statement (unit_no, lex_line_no, token, statement_for_predicate_true)
if (token%token_no == tk_Else) then
call get_token (unit_no, lex_line_no, token)
call parse_statement (unit_no, lex_line_no, token, statement_for_predicate_false)
ast = make_internal_node (node_If, statement_for_predicate_true, &
& statement_for_predicate_false)
else
ast = make_internal_node (node_If, statement_for_predicate_true, ast_nil)
end if
ast = make_internal_node (node_If, predicate, ast)
end subroutine parse_ifelse_construct
 
recursive subroutine parse_putc
type(ast_t) :: arguments
 
call expect_token ("Putc", tk_Putc, token)
call get_token (unit_no, lex_line_no, token)
call parse_parenthesized_expression (unit_no, lex_line_no, token, arguments)
ast = make_internal_node (node_Prtc, arguments, ast_nil)
call expect_token ("Putc", tk_Semi, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_putc
 
recursive subroutine parse_print
logical :: done
type(ast_t) :: arg
type(ast_t) :: printer
 
call expect_token ("Print", tk_Print, token)
call get_token (unit_no, lex_line_no, token)
call expect_token ("Print", tk_Lparen, token)
done = .false.
do while (.not. done)
call get_token (unit_no, lex_line_no, token)
select case (token%token_no)
case (tk_String)
arg = make_leaf_node (node_String, token%val)
printer = make_internal_node (node_Prts, arg, ast_nil)
call get_token (unit_no, lex_line_no, token)
case default
call parse_expression (unit_no, 0, lex_line_no, token, arg)
printer = make_internal_node (node_Prti, arg, ast_nil)
end select
ast = make_internal_node (node_Sequence, ast, printer)
done = (token%token_no /= tk_Comma)
end do
call expect_token ("Print", tk_Rparen, token)
call get_token (unit_no, lex_line_no, token)
call expect_token ("Print", tk_Semi, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_print
 
recursive subroutine parse_identifier
type(ast_t) :: left_side
type(ast_t) :: right_side
 
left_side = make_leaf_node (node_Identifier, token%val)
call get_token (unit_no, lex_line_no, token)
call expect_token ("assign", tk_Assign, token)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, 0, lex_line_no, token, right_side)
ast = make_internal_node (node_Assign, left_side, right_side)
call expect_token ("assign", tk_Semi, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_identifier
 
recursive subroutine parse_while_construct
type(ast_t) :: predicate
type(ast_t) :: statement_to_be_repeated
 
call expect_token ("While", tk_While, token)
call get_token (unit_no, lex_line_no, token)
call parse_parenthesized_expression (unit_no, lex_line_no, token, predicate)
call parse_statement (unit_no, lex_line_no, token, statement_to_be_repeated)
ast = make_internal_node (node_While, predicate, statement_to_be_repeated)
end subroutine parse_while_construct
 
recursive subroutine parse_lbrace_construct
type(ast_t) :: statement
 
call expect_token ("Lbrace", tk_Lbrace, token)
call get_token (unit_no, lex_line_no, token)
do while (token%token_no /= tk_Rbrace .and. token%token_no /= tk_EOI)
call parse_statement (unit_no, lex_line_no, token, statement)
ast = make_internal_node (node_Sequence, ast, statement)
end do
call expect_token ("Lbrace", tk_Rbrace, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_lbrace_construct
 
end subroutine parse_statement
 
recursive subroutine parse_expression (unit_no, p, lex_line_no, token, ast)
integer, intent(in) :: unit_no
integer, intent(in) :: p
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(inout) :: token
type(ast_t), intent(out) :: ast
 
integer :: precedence
type(ast_t) :: expression
 
select case (token%token_no)
case (tk_Lparen)
call parse_parenthesized_expression (unit_no, lex_line_no, token, ast)
case (tk_Sub)
token%token_no = tk_Negate
precedence = lexer_token_precedence(token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, precedence, lex_line_no, token, expression)
ast = make_internal_node (node_Negate, expression, ast_nil)
case (tk_Add)
token%token_no = tk_Positive
precedence = lexer_token_precedence(token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, precedence, lex_line_no, token, expression)
ast = expression
case (tk_Not)
precedence = lexer_token_precedence(token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, precedence, lex_line_no, token, expression)
ast = make_internal_node (node_Not, expression, ast_nil)
case (tk_Ident)
ast = make_leaf_node (node_Identifier, token%val)
call get_token (unit_no, lex_line_no, token)
case (tk_Integer)
ast = make_leaf_node (node_Integer, token%val)
call get_token (unit_no, lex_line_no, token)
case default
call syntax_error_message ("", tk_primary, token)
stop 1
end select
 
do while (lexer_token_arity(token%token_no) == 2 .and. &
& p <= lexer_token_precedence(token%token_no))
block
type(ast_t) :: right_expression
integer :: q
integer :: node_variety
 
if (lexer_token_associativity(token%token_no) == right_associative) then
q = lexer_token_precedence(token%token_no)
else
q = lexer_token_precedence(token%token_no) + 1
end if
node_variety = binary_operator_node_variety (token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, q, lex_line_no, token, right_expression)
ast = make_internal_node (node_variety, ast, right_expression)
end block
end do
end subroutine parse_expression
 
recursive subroutine parse_parenthesized_expression (unit_no, lex_line_no, token, ast)
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(inout) :: token
type(ast_t), intent(out) :: ast
 
call expect_token ("paren_expr", tk_Lparen, token)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, 0, lex_line_no, token, ast)
call expect_token ("paren_expr", tk_Rparen, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_parenthesized_expression
 
elemental function binary_operator_node_variety (token_no) result (node_variety)
integer, intent(in) :: token_no
integer :: node_variety
 
select case (token_no)
case (tk_Mul)
node_variety = node_Multiply
case (tk_Div)
node_variety = node_Divide
case (tk_Mod)
node_variety = node_Mod
case (tk_Add)
node_variety = node_Add
case (tk_Sub)
node_variety = node_Subtract
case (tk_Lss)
node_variety = node_Less
case (tk_Leq)
node_variety = node_LessEqual
case (tk_Gtr)
node_variety = node_Greater
case (tk_Geq)
node_variety = node_GreaterEqual
case (tk_Eq)
node_variety = node_Equal
case (tk_Neq)
node_variety = node_NotEqual
case (tk_And)
node_variety = node_And
case (tk_Or)
node_variety = node_Or
case default
! This branch should never be reached.
error stop
end select
end function binary_operator_node_variety
 
function make_internal_node (node_variety, left, right) result (ast)
integer, intent(in) :: node_variety
class(ast_t), intent(in) :: left, right
type(ast_t) :: ast
 
type(ast_node_t), pointer :: node
 
allocate (node)
node%node_variety = node_variety
node%left => left%node
node%right => right%node
ast%node => node
end function make_internal_node
 
function make_leaf_node (node_variety, val) result (ast)
integer, intent(in) :: node_variety
character(*, kind = ck), intent(in) :: val
type(ast_t) :: ast
 
type(ast_node_t), pointer :: node
 
allocate (node)
node%node_variety = node_variety
node%val = val
ast%node => node
end function make_leaf_node
 
subroutine get_token (unit_no, lex_line_no, token)
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(out) :: token
 
logical :: eof
 
call get_lexer_token (unit_no, lex_line_no, eof, token)
if (eof) then
write (error_unit, '("Parser error: the stream of input tokens is incomplete")')
stop 1
end if
end subroutine get_token
 
subroutine expect_token (message, token_no, token)
character(*), intent(in) :: message
integer, intent (in) :: token_no
class(lexer_token_t), intent(in) :: token
 
if (token%token_no /= token_no) then
call syntax_error_message (message, token_no, token)
stop 1
end if
end subroutine expect_token
 
subroutine syntax_error_message (message, expected_token_no, token)
character(*), intent(in) :: message
integer, intent(in) :: expected_token_no
class(lexer_token_t), intent(in) :: token
 
! Write a message to an output unit dedicated to printing
! errors. The message could, of course, be more detailed than what
! we are doing here.
write (error_unit, '("Syntax error at ", I0, ".", I0)') &
& token%line_no, token%column_no
 
!
! For the sake of the exercise, also write, to output_unit, a
! message in the style of the C reference program.
!
write (output_unit, '("(", I0, ", ", I0, ") error: ")', advance = 'no') &
& token%line_no, token%column_no
select case (expected_token_no)
case (tk_start_of_statement)
write (output_unit, '("expecting start of statement, found ''", 1A, "''")') &
& trim (lexer_token_string(token%token_no))
case (tk_primary)
write (output_unit, '("Expecting a primary, found ''", 1A, "''")') &
& trim (lexer_token_string(token%token_no))
case default
write (output_unit, '(1A, ": Expecting ''", 1A, "'', found ''", 1A, "''")') &
& trim (message), trim (lexer_token_string(expected_token_no)), &
& trim (lexer_token_string(token%token_no))
end select
end subroutine syntax_error_message
 
subroutine output_ast_flattened (unit_no, ast)
integer, intent(in) :: unit_no
type(ast_t), intent(in) :: ast
 
call output_ast_node_flattened (unit_no, ast%node)
end subroutine output_ast_flattened
 
recursive subroutine output_ast_node_flattened (unit_no, node)
integer, intent(in) :: unit_no
type(ast_node_t), pointer, intent(in) :: node
 
if (.not. associated (node)) then
write (unit_no, '(";")')
else
if (allocated (node%val)) then
write (unit_no, '(1A16, 2X, 1A)') &
& node_variety_string(node%node_variety), node%val
else
write (unit_no, '(1A)') &
& trim (node_variety_string(node%node_variety))
call output_ast_node_flattened (unit_no, node%left)
call output_ast_node_flattened (unit_no, node%right)
end if
end if
end subroutine output_ast_node_flattened
 
end module syntactic_analysis
 
program parse
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: syntactic_analysis
 
implicit none
 
integer, parameter :: inp_unit_no = 100
integer, parameter :: outp_unit_no = 101
 
integer :: arg_count
character(200) :: arg
integer :: inp
integer :: outp
 
arg_count = command_argument_count ()
if (3 <= arg_count) then
call print_usage
else
if (arg_count == 0) then
inp = input_unit
outp = output_unit
else if (arg_count == 1) then
call get_command_argument (1, arg)
inp = open_for_input (trim (arg))
outp = output_unit
else if (arg_count == 2) then
call get_command_argument (1, arg)
inp = open_for_input (trim (arg))
call get_command_argument (2, arg)
outp = open_for_output (trim (arg))
end if
 
block
type(ast_t) :: ast
 
ast = parse_token_stream (inp)
call output_ast_flattened (outp, ast)
end block
end if
 
contains
 
function open_for_input (filename) result (unit_no)
character(*), intent(in) :: filename
integer :: unit_no
 
integer :: stat
 
open (unit = inp_unit_no, file = filename, status = 'old', &
& action = 'read', access = 'stream', form = 'unformatted', &
& iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", 1A, " for input")') filename
stop 1
end if
unit_no = inp_unit_no
end function open_for_input
 
function open_for_output (filename) result (unit_no)
character(*), intent(in) :: filename
integer :: unit_no
 
integer :: stat
 
open (unit = outp_unit_no, file = filename, action = 'write', iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", 1A, " for output")') filename
stop 1
end if
unit_no = outp_unit_no
end function open_for_output
 
subroutine print_usage
character(200) :: progname
 
call get_command_argument (0, progname)
write (output_unit, '("Usage: ", 1A, " [INPUT_FILE [OUTPUT_FILE]]")') &
& trim (progname)
end subroutine print_usage
end program parse</syntaxhighlight>
 
{{out}}
Prime numbers example:
<pre>Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier count
Integer 1
Assign
Identifier n
Integer 1
Assign
Identifier limit
Integer 100
While
Less
Identifier n
Identifier limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier k
Integer 3
Assign
Identifier p
Integer 1
Assign
Identifier n
Add
Identifier n
Integer 2
While
And
LessEqual
Multiply
Identifier k
Identifier k
Identifier n
Identifier p
Sequence
Sequence
;
Assign
Identifier p
NotEqual
Multiply
Divide
Identifier n
Identifier k
Identifier k
Identifier n
Assign
Identifier k
Add
Identifier k
Integer 2
If
Identifier p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier n
;
Prts
String " is prime\n"
;
Assign
Identifier count
Add
Identifier count
Integer 1
;
Sequence
Sequence
Sequence
;
Prts
String "Total primes found: "
;
Prti
Identifier count
;
Prts
String "\n"
;</pre>
 
=={{header|Go}}==
{{trans|C}}
<langsyntaxhighlight lang="go">package main
 
import (
Line 3,200 ⟶ 6,497:
scanner = bufio.NewScanner(source)
prtAst(parse())
}</langsyntaxhighlight>
 
{{out}}
Line 3,301 ⟶ 6,598:
;
</pre>
 
=={{header|Icon}}==
{{works with|Icon|9.5.20i}}
 
 
I use co-expressions in a way that could easily be done differently, but I prefer to use the co-expressions. (These can be sluggish or fast, depending on what sort of Icon you are running. In this case, the speed differences are of little concern.)
 
 
<syntaxhighlight lang="icon">#
# The Rosetta Code Tiny-Language Parser, in Icon.
#
# This implementation is based closely on the pseudocode and the C
# reference implementation.
#
 
# ximage from the IPL is useful for debugging. Use "xdump(x)" to
# pretty-print x.
#link ximage
 
record token_record (line_no, column_no, tok, tokval)
record token_getter (nxt, curr)
 
procedure main (args)
local inpf_name, outf_name
local inpf, outf
local nexttok, currtok, current_token, gettok
local ast
 
inpf_name := "-"
outf_name := "-"
if 1 <= *args then inpf_name := args[1]
if 2 <= *args then outf_name := args[2]
 
inpf :=
if inpf_name == "-" then
&input
else
(open(inpf_name, "r") |
stop("failed to open \"" || inpf_name || "\" for input"))
outf :=
if outf_name == "-" then
&output
else
(open(outf_name, "w") |
stop("failed to open \"" || outf_name || "\" for output"))
 
current_token := [&null]
nexttok := create generate_tokens(inpf, current_token)
currtok := create get_current_token (current_token)
gettok := token_getter(nexttok, currtok)
ast := parse(gettok)
prt_ast(outf, ast)
 
close(inpf)
close(outf)
end
 
procedure prt_ast (outf, ast)
if *ast = 0 then {
write(outf, ";")
} else {
writes(outf, ast[1])
if ast[1] == ("Identifier" | "Integer" | "String") then {
write(outf, " ", ast[2])
} else {
write(outf)
prt_ast(outf, ast[2])
prt_ast(outf, ast[3])
}
}
end
 
procedure generate_tokens (inpf, current_token)
local s
 
while s := read(inpf) do {
if trim(s) ~== "" then {
current_token[1] := string_to_token_record(s)
suspend current_token[1]
}
}
end
 
procedure get_current_token (current_token)
repeat (suspend current_token[1])
end
 
procedure string_to_token_record (s)
local line_no, column_no, tok, tokval
 
static spaces
 
initial {
spaces := ' \t\f\v\r\n'
}
 
trim(s) ? {
tab(many(spaces))
line_no := integer(tab(many(&digits)))
tab(many(spaces))
column_no := integer(tab(many(&digits)))
tab(many(spaces))
tok := tab(many(&letters ++ '_'))
tab(many(spaces))
tokval := tab(0)
}
return token_record(line_no, column_no, tok, tokval)
end
 
procedure parse (gettok)
local tok
local t
 
t := []
@gettok.nxt
tok := "Not End_of_input"
while tok ~== "End_of_input" do {
t := ["Sequence", t, stmt(gettok)]
tok := (@gettok.curr).tok
}
return t
end
 
procedure stmt (gettok)
local e, s, t, v
local tok
local done
 
t := []
if accept(gettok, "Keyword_if") then {
e := paren_expr(gettok)
s := stmt(gettok)
t := ["If", e, ["If", s,
if accept(gettok, "Keyword_else")
then stmt(gettok) else []]]
} else if accept(gettok, "Keyword_putc") then {
t := ["Prtc", paren_expr(gettok), []]
expect(gettok, "Putc", "Semicolon")
} else if accept(gettok, "Keyword_print") then {
expect(gettok, "Print", "LeftParen")
done := 0
while done = 0 do {
tok := @gettok.curr
if tok.tok == "String" then {
e := ["Prts", ["String", tok.tokval], []]
@gettok.nxt
} else {
e := ["Prti", expr(gettok, 0), []]
}
t := ["Sequence", t, e]
accept(gettok, "Comma") | (done := 1)
}
expect(gettok, "Print", "RightParen")
expect(gettok, "Print", "Semicolon")
} else if (@gettok.curr).tok == "Semicolon" then {
@gettok.nxt
} else if (@gettok.curr).tok == "Identifier" then {
v := ["Identifier", (@gettok.curr).tokval]
@gettok.nxt
expect(gettok, "assign", "Op_assign")
t := ["Assign", v, expr(gettok, 0)]
expect(gettok, "assign", "Semicolon")
} else if accept(gettok, "Keyword_while") then {
e := paren_expr(gettok)
t := ["While", e, stmt(gettok)]
} else if accept(gettok, "LeftBrace") then {
until (@gettok.curr).tok == ("RightBrace" | "End_of_input") do {
t := ["Sequence", t, stmt(gettok)]
}
expect(gettok, "Lbrace", "RightBrace")
} else if (@gettok.curr).tok ~== "End_of_input" then {
tok := @gettok.curr
error(tok, ("expecting start of statement, found '" ||
text(tok.tok) || "'"))
}
return t
end
 
procedure paren_expr (gettok)
local x
 
expect(gettok, "paren_expr", "LeftParen");
x := expr(gettok, 0);
expect(gettok, "paren_expr", "RightParen");
return x
end
 
procedure expr (gettok, p)
local tok, save_tok
local x, y
local q
 
tok := @gettok.curr
case tok.tok of {
"LeftParen" : {
x := paren_expr(gettok)
}
"Op_subtract" : {
@gettok.nxt
y := expr(gettok, precedence("Op_negate"))
x := ["Negate", y, []]
}
"Op_add" : {
@gettok.nxt
x := expr(gettok, precedence("Op_negate"))
}
"Op_not" : {
@gettok.nxt
y := expr(gettok, precedence("Op_not"))
x := ["Not", y, []]
}
"Identifier" : {
x := ["Identifier", tok.tokval]
@gettok.nxt
}
"Integer" : {
x := ["Integer", tok.tokval]
@gettok.nxt
}
default : {
error(tok, "Expecting a primary, found: " || text(tok.tok))
}
}
 
while (tok := @gettok.curr &
is_binary(tok.tok) &
p <= precedence(tok.tok)) do
{
save_tok := tok
@gettok.nxt
q := precedence(save_tok.tok)
if not is_right_associative(save_tok.tok) then q +:= 1
x := [operator(save_tok.tok), x, expr(gettok, q)]
}
 
return x
end
 
procedure accept (gettok, tok)
local nxt
 
if (@gettok.curr).tok == tok then nxt := @gettok.nxt else fail
return nxt
end
 
procedure expect (gettok, msg, tok)
if (@gettok.curr).tok ~== tok then {
error(@gettok.curr,
msg || ": Expecting '" || text(tok) || "', found '" ||
text((@gettok.curr).tok) || "'")
}
return @gettok.nxt
end
 
procedure error (token, msg)
write("(", token.line_no, ", ", token.column_no, ") error: ", msg)
exit(1)
end
 
procedure precedence (tok)
local p
 
case tok of {
"Op_multiply" : p := 13
"Op_divide" : p := 13
"Op_mod" : p := 13
"Op_add" : p := 12
"Op_subtract" : p := 12
"Op_negate" : p := 14
"Op_not" : p := 14
"Op_less" : p := 10
"Op_lessequal" : p := 10
"Op_greater" : p := 10
"Op_greaterequal" : p := 10
"Op_equal" : p := 9
"Op_notequal" : p := 9
"Op_and" : p := 5
"Op_or" : p := 4
default : p := -1
}
return p
end
 
procedure is_binary (tok)
return ("Op_add" |
"Op_subtract" |
"Op_multiply" |
"Op_divide" |
"Op_mod" |
"Op_less" |
"Op_lessequal" |
"Op_greater" |
"Op_greaterequal" |
"Op_equal" |
"Op_notequal" |
"Op_and" |
"Op_or") == tok
fail
end
 
procedure is_right_associative (tok)
# None of the current operators is right associative.
fail
end
 
procedure operator (tok)
local s
 
case tok of {
"Op_multiply" : s := "Multiply"
"Op_divide" : s := "Divide"
"Op_mod" : s := "Mod"
"Op_add" : s := "Add"
"Op_subtract" : s := "Subtract"
"Op_negate" : s := "Negate"
"Op_not" : s := "Not"
"Op_less" : s := "Less"
"Op_lessequal" : s := "LessEqual"
"Op_greater" : s := "Greater"
"Op_greaterequal" : s := "GreaterEqual"
"Op_equal" : s := "Equal"
"Op_notequal" : s := "NotEqual"
"Op_and" : s := "And"
"Op_or" : s := "Or"
}
return s
end
 
procedure text (tok)
local s
 
case tok of {
"Keyword_else" : s := "else"
"Keyword_if" : s := "if"
"Keyword_print" : s := "print"
"Keyword_putc" : s := "putc"
"Keyword_while" : s := "while"
"Op_multiply" : s := "*"
"Op_divide" : s := "/"
"Op_mod" : s := "%"
"Op_add" : s := "+"
"Op_subtract" : s := "-"
"Op_negate" : s := "-"
"Op_less" : s := "<"
"Op_lessequal" : s := "<="
"Op_greater" : s := ">"
"Op_greaterequal" : s := ">="
"Op_equal" : s := "=="
"Op_notequal" : s := "!="
"Op_not" : s := "!"
"Op_assign" : s := "="
"Op_and" : s := "&&"
"Op_or" : s := "||"
"LeftParen" : s := "("
"RightParen" : s := ")"
"LeftBrace" : s := "{"
"RightBrace" : s := "}"
"Semicolon" : s := ";"
"Comma" : s := ","
"Identifier" : s := "Ident"
"Integer" : s := "Integer literal"
"String" : s := "String literal"
"End_of_input" : s := "EOI"
}
return s
end</syntaxhighlight>
 
{{out}}
<pre>$ icont -s -u parse.icn && ./parse compiler-tests/primes.lex
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier count
Integer 1
Assign
Identifier n
Integer 1
Assign
Identifier limit
Integer 100
While
Less
Identifier n
Identifier limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier k
Integer 3
Assign
Identifier p
Integer 1
Assign
Identifier n
Add
Identifier n
Integer 2
While
And
LessEqual
Multiply
Identifier k
Identifier k
Identifier n
Identifier p
Sequence
Sequence
;
Assign
Identifier p
NotEqual
Multiply
Divide
Identifier n
Identifier k
Identifier k
Identifier n
Assign
Identifier k
Add
Identifier k
Integer 2
If
Identifier p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier n
;
Prts
String " is prime\n"
;
Assign
Identifier count
Add
Identifier count
Integer 1
;
Sequence
Sequence
Sequence
;
Prts
String "Total primes found: "
;
Prti
Identifier count
;
Prts
String "\n"
;</pre>
 
=={{header|J}}==
Implementation:
 
<syntaxhighlight lang="j">require'format/printf'
<lang J>
require'format/printf'
tkref=: tokenize 'End_of_input*/%+--<<=>>===!=!&&||print=print(if{else}while;,putc)a""0'
 
tkref=: tokenize 'End_of_input*/%+--<<=>>===!=!&&||print=print(if{else}while;print,putc)a""0'
tkref,. (tknames)=: tknames=:;: {{)n
End_of_input Op_multiply Op_divide Op_mod Op_add Op_subtract Op_negate Op_less
Op_lessequal Op_greater Op_greaterequal Op_equal Op_notequal Op_not Op_and
Op_or Keyword_print Op_assign Keyword_print LeftParen Keyword_if LeftBrace Keyword_else RightBrace
Keyword_while Semicolon Keyword_print Comma Keyword_putc RightParen
Identifier String Integer
}}-.LF
 
tkref,.(tktyp)=:tktyp=:;: {{)n
tkEOI tkMul tkDiv tkMod tkAdd tkSub tkNeg tkLss tkLeq tkGtr tkGeq tkEql
tkNeq tkNot tkAnd tkOr tkPrint tkAssign tkPrint tkLpar tkIf tkLbra tkElse
tkRbra tkWhile tkSemi tkPrint tkComma tkPutc tkRpar tkId tkString tkInt
}}-.LF
 
tkV=: 2 (tkref i.tokenize '*/%+-<<=>>===!=&&||')} (#tktyp)#0
tkV=: 1 (1 0+tkref i.tokenize '-!')} tkV
tkPrec=: 13 13 13 12 12 10 10 10 10 9 9 5 4 (tkref i.tokenize'*/%+-<<=>>==!=&&||')} tkV<._1
tkPrec=: 14 (1 0+tkref i.tokenize'-!')} tkPrec
NB. proofread |:(<"1 tkV,.tkPrec),tktyp,tkref,:tknames
 
tkref,.(ndDisp)=: ndDisp=:;:{{)n
ndRef=: tkref
ndRef,.(ndDisp)=: ndDisp=:;:{{)n
Sequence Multiply Divide Mod Add Subtract Negate Less LessEqual Greater
GreaterEqual Equal NotEqual Not And Or Prts Assign Prti x If x x x While
x Prtc x PutcPrtc x Identifier String Integer
}}-.LF
NB. proofread |:ndReftkref,:ndDisp
 
tok_pfx=: {{
if.'tok_'-:4{. y do. y else.'tok_',y end.
}}L:0
 
tok_s=: tok_pfx ;:'ln col name value text type valence precedence display'
tok=: {{
do tok_s{::~tok_s i. boxopen tok_pfx y
:
x{::~tok_s i. boxopen tok_pfx y
}}
 
gettoken=: {{
'tok_ln tok_col'=: (0;ndx){::x
Line 3,355 ⟶ 7,098:
ind=. tknames i.<tok_name
tok_text=: ind{::tkref
tok_type=: ind{::tktyp
tok_valence=: ind{::tkV
tok_precedence=: ind{::tkPrec
ndx=:ndx+1
node_display=: ind{::ndDisp
".each tok_s
}}
 
NB. syntax analyzer
 
parse=: {{
ndx=: tok_ln=: tok_col=: 0
Line 3,369 ⟶ 7,109:
gettok''
t=.a:
whilst.-.(a:-:t)+.tok_typetok_name-:tkEOIEnd_of_input do.
t=. Sequence make_node t stmt''
end.
}}
 
stmt=:{{)v
t=. a:
select.tok_typetok_name
case.tkIfKeyword_if do.
s=. stmt e=. paren_expr gettok''
if.tkElseKeyword_else-:tok_typetok_name
do. S=. stmt gettok''
else. S=. a: end.
t=. If make_node e If make_node s S
case.tkPutcKeyword_putc do.
e=. paren_expr gettok''
t=. Prtc make_node e a:
PutcPrtc expect tkSemiSemicolon
case.tkPrintKeyword_print do.gettok''
'Print' expect tkLparLeftParen
while.do.
if.tkStringString-:tok_typetok_name
do. gettok e=. Prts make_node (String make_leaf tok_value) a:
else. e=. Prti make_node (expr 0) a: end.
t=. Sequence make_node t e
if.tkCommaComma-:tok_typetok_name
do.Comma expect tkCommaComma
else.break.end.
end.
'Print' expect tkRparRightParen
'Print' expect tkSemiSemicolon
case.tkSemiSemicolon do.gettok''
case.tkIdIdentifier do.
gettok v=. Identifier make_leaf tok_value
Assign expect tkAssignOp_assign
t=. Assign make_node v e=. expr 0
Assign expect tkSemiSemicolon
case.tkWhileKeyword_while do.
t=. While make_node e s=. stmt e=. paren_expr gettok''
case.tkLbraLeftBrace do.
'LBraceLeftBrace' expect tkLbraLeftBrace
while.-.(<tok_typetok_name) e. tkRbraRightBrace;tkEOIEnd_of_input do.
t=. Sequence make_node t stmt''
end.
'LbraceLeftBrace' expect tkRbraRightBrace
case.tkEOIEnd_of_input do.
case.do. error 'Expecting start of statement, found %s'sprintf<tok_text
end.
t
}}
 
paren_expr=: {{
'paren_expr' expect tkLparLeftParen
t=. expr 0
'paren_expr' expect tkRparRightParen
t
}}
 
not_prec=: tkPrec{~tknames i.<Op_not
expr=: {{
select.tok_typetok_name
case.tkLparLeftParen do.e=. paren_expr''
case.tkAddOp_add do.gettok''
e=. expr not_prec
case.tkSubOp_subtract do.gettok''
e=. Negate make_node (expr not_prec) a:
case.tkNotOp_not do.gettok''
e=. Not make_node (expr not_prec) a:
case.tkIdIdentifier do.
gettok e=. Identifier make_leaf tok_value
case.tkIntInteger do.
gettok e=. Integer make_leaf tok_value
case.do. error 'Expecting a primary, found %s'sprintf<tok_text
end.
while.(2=tok_valence)*y<:tok_precedence>:y do.
q=. 1+tok_precedence [ op=. tok_typenode_display NB. no right associative operators
gettok''
enode=. op make_node e expr q
e=. op make_node e node
end.
e
}}
 
expect=: {{
if.y-:tok_typetok_name do. gettok'' return.end.
error '%s: Expecting "%s", found "%s"'sprintf x;(tkref{::~tktyptknames i.<y);tok_text
}}
 
make_leaf=: {{
x;y
}}
 
make_node=: {{
m;n;<y
}}
 
error=: {{
echo 'Error: line %d, column %d: %s\n'sprintf tok_ln;tok_col;y throw.
}}
 
syntax=: {{
;(flatAST parse y),each LF
}}
 
flatAST=: {{
assert.*L.y
Line 3,481 ⟶ 7,223:
end.
}}
</syntaxhighlight>
</lang>
 
Some quirks worth noting:
 
(1) '+' appears in the productions for 'primary' and 'addition_expr' but has only one node type (because we do not represent its appearance in 'primary' with a node.
 
(2) '-' and 'print' do have two node types (which we sort out on the fly).
 
(3) In this implementation, we require a 1:1 mapping between the data structure representing token types and the data structure representing node types. This means two token entries for both - and print (the second instance of both gets ignored by the lexer).
 
(4) Because the data structure produced by the lexer is independent of any type system implementation, we can use the same type system for the lexer or a different type system for the lexer and either way works (as long as the implementations are consistent with the spec).
 
(5) In this context parallel constant arrays represent token and node types.
 
Task example:
 
<syntaxhighlight lang="j">
<lang J>
primes=: {{)n
/*
Line 3,526 ⟶ 7,280:
Integer 100
While
Less
tkLss
Identifier n
Identifier limit
Line 3,543 ⟶ 7,297:
Assign
Identifier n
Add
tkAdd
Identifier n
Integer 2
While
And
tkAnd
LessEqual
tkLeq
Multiply
tkMul
Identifier k
Identifier k
Line 3,559 ⟶ 7,313:
Assign
Identifier p
NotEqual
tkNeq
Divide
tkDiv
Identifier n
Multiply
tkMul
Identifier k
Identifier k
Line 3,568 ⟶ 7,322:
Assign
Identifier k
Add
tkAdd
Identifier k
Integer 2
Line 3,588 ⟶ 7,342:
Assign
Identifier count
Add
tkAdd
Identifier count
Integer 1
Line 3,605 ⟶ 7,359:
String "\n"
;
</syntaxhighlight>
</lang>
 
=={{header|Java}}==
Usage: java Parser infile [>outfile]
{{trans|Python}}
<langsyntaxhighlight lang="java">
import java.io.File;
import java.io.FileNotFoundException;
Line 3,962 ⟶ 7,716:
}
}
</syntaxhighlight>
</lang>
 
=={{header|Julia}}==
Julia tends to discourage large numbers of global variables, so this direct port from the Python reference implementation moves the globals into a function wrapper.
{{trans|Python}}
<langsyntaxhighlight lang="julia">struct ASTnode
nodetype::Int
left::Union{Nothing, ASTnode}
Line 4,219 ⟶ 7,973:
 
# syntaxanalyzer(length(ARGS) > 1 ? ARGS[1] : stdin) # for use as in the Python code
</syntaxhighlight>
</lang>
 
=={{header|M2000 Interpreter}}==
Line 4,227 ⟶ 7,981:
 
 
<syntaxhighlight lang="m2000 interpreter">
<lang M2000 Interpreter>
Module syntax_analyzer(b$){
enum tokens {
Line 4,600 ⟶ 8,354:
43 1 End_of_Input
}
</syntaxhighlight>
</lang>
 
{{out}}
Line 4,780 ⟶ 8,534:
Using the third version of Nim lexer.
 
<langsyntaxhighlight Nimlang="nim">import ast_lexer
 
type NodeKind* = enum
Line 5,028 ⟶ 8,782:
let code = if paramCount() < 1: stdin.readAll() else: paramStr(1).readFile()
let tree = parse(code)
tree.printAst()</langsyntaxhighlight>
 
{{out}}
Line 5,127 ⟶ 8,881:
String "\n"
;</pre>
 
=={{header|ObjectIcon}}==
{{trans|Icon}}
 
 
There are very few differences from the plain Icon implementation, although neither compiler can compile the other's implementation.
 
In Object Icon, the co-expressions should be fast.
 
 
<syntaxhighlight lang="objecticon"># -*- ObjectIcon -*-
#
# The Rosetta Code Tiny-Language Parser, in Object Icon.
#
# This implementation is based closely on the pseudocode and the C
# reference implementation.
#
 
import io
 
record token_record (line_no, column_no, tok, tokval)
record token_getter (nxt, curr)
 
procedure main (args)
local inpf_name, outf_name
local inpf, outf
local nexttok, currtok, current_token, gettok
local ast
 
inpf_name := "-"
outf_name := "-"
if 1 <= *args then inpf_name := args[1]
if 2 <= *args then outf_name := args[2]
 
inpf :=
if inpf_name == "-" then
FileStream.stdin
else
(FileStream(inpf_name, FileOpt.RDONLY) | stop(&why))
outf :=
if outf_name == "-" then
FileStream.stdout
else
(FileStream(outf_name, ior(FileOpt.WRONLY,
FileOpt.TRUNC,
FileOpt.CREAT)) | stop(&why))
current_token := [&null]
nexttok := create generate_tokens(inpf, current_token)
currtok := create get_current_token (current_token)
gettok := token_getter(nexttok, currtok)
ast := parse(gettok)
prt_ast(outf, ast)
 
close(inpf)
close(outf)
end
 
procedure prt_ast (outf, ast)
if *ast = 0 then {
write(outf, ";")
} else {
writes(outf, ast[1])
if ast[1] == ("Identifier" | "Integer" | "String") then {
write(outf, " ", ast[2])
} else {
write(outf)
prt_ast(outf, ast[2])
prt_ast(outf, ast[3])
}
}
end
 
procedure generate_tokens (inpf, current_token)
local s
 
while s := read(inpf) do {
if trim(s) ~== "" then {
current_token[1] := string_to_token_record(s)
suspend current_token[1]
}
}
end
 
procedure get_current_token (current_token)
repeat (suspend current_token[1])
end
 
procedure string_to_token_record (s)
local line_no, column_no, tok, tokval
 
static spaces
 
initial {
spaces := ' \t\f\v\r\n'
}
 
trim(s) ? {
tab(many(spaces))
line_no := integer(tab(many(&digits)))
tab(many(spaces))
column_no := integer(tab(many(&digits)))
tab(many(spaces))
tok := tab(many(&letters ++ '_'))
tab(many(spaces))
tokval := tab(0)
}
return token_record(line_no, column_no, tok, tokval)
end
 
procedure parse (gettok)
local tok
local t
 
t := []
@gettok.nxt
tok := "Not End_of_input"
while tok ~== "End_of_input" do {
t := ["Sequence", t, stmt(gettok)]
tok := (@gettok.curr).tok
}
return t
end
 
procedure stmt (gettok)
local e, s, t, v
local tok
local done
 
t := []
if accept(gettok, "Keyword_if") then {
e := paren_expr(gettok)
s := stmt(gettok)
t := ["If", e, ["If", s,
if accept(gettok, "Keyword_else")
then stmt(gettok) else []]]
} else if accept(gettok, "Keyword_putc") then {
t := ["Prtc", paren_expr(gettok), []]
expect(gettok, "Putc", "Semicolon")
} else if accept(gettok, "Keyword_print") then {
expect(gettok, "Print", "LeftParen")
done := &no
while /done do {
tok := @gettok.curr
if tok.tok == "String" then {
e := ["Prts", ["String", tok.tokval], []]
@gettok.nxt
} else {
e := ["Prti", expr(gettok, 0), []]
}
t := ["Sequence", t, e]
accept(gettok, "Comma") | (done := &yes)
}
expect(gettok, "Print", "RightParen")
expect(gettok, "Print", "Semicolon")
} else if (@gettok.curr).tok == "Semicolon" then {
@gettok.nxt
} else if (@gettok.curr).tok == "Identifier" then {
v := ["Identifier", (@gettok.curr).tokval]
@gettok.nxt
expect(gettok, "assign", "Op_assign")
t := ["Assign", v, expr(gettok, 0)]
expect(gettok, "assign", "Semicolon")
} else if accept(gettok, "Keyword_while") then {
e := paren_expr(gettok)
t := ["While", e, stmt(gettok)]
} else if accept(gettok, "LeftBrace") then {
until (@gettok.curr).tok == ("RightBrace" | "End_of_input") do {
t := ["Sequence", t, stmt(gettok)]
}
expect(gettok, "Lbrace", "RightBrace")
} else if (@gettok.curr).tok ~== "End_of_input" then {
tok := @gettok.curr
error(tok, ("expecting start of statement, found '" ||
tok_text(tok.tok) || "'"))
}
return t
end
 
procedure paren_expr (gettok)
local x
 
expect(gettok, "paren_expr", "LeftParen");
x := expr(gettok, 0);
expect(gettok, "paren_expr", "RightParen");
return x
end
 
procedure expr (gettok, p)
local tok, save_tok
local x, y
local q
 
tok := @gettok.curr
case tok.tok of {
"LeftParen" : {
x := paren_expr(gettok)
}
"Op_subtract" : {
@gettok.nxt
y := expr(gettok, precedence("Op_negate"))
x := ["Negate", y, []]
}
"Op_add" : {
@gettok.nxt
x := expr(gettok, precedence("Op_negate"))
}
"Op_not" : {
@gettok.nxt
y := expr(gettok, precedence("Op_not"))
x := ["Not", y, []]
}
"Identifier" : {
x := ["Identifier", tok.tokval]
@gettok.nxt
}
"Integer" : {
x := ["Integer", tok.tokval]
@gettok.nxt
}
default : {
error(tok, "Expecting a primary, found: " || tok_text(tok.tok))
}
}
 
while (tok := @gettok.curr &
is_binary(tok.tok) &
p <= precedence(tok.tok)) do
{
save_tok := tok
@gettok.nxt
q := precedence(save_tok.tok)
if not is_right_associative(save_tok.tok) then q +:= 1
x := [operator(save_tok.tok), x, expr(gettok, q)]
}
 
return x
end
 
procedure accept (gettok, tok)
local nxt
 
if (@gettok.curr).tok == tok then nxt := @gettok.nxt else fail
return nxt
end
 
procedure expect (gettok, msg, tok)
if (@gettok.curr).tok ~== tok then {
error(@gettok.curr,
msg || ": Expecting '" || tok_text(tok) || "', found '" ||
tok_text((@gettok.curr).tok) || "'")
}
return @gettok.nxt
end
 
procedure error (token, msg)
write("(", token.line_no, ", ", token.column_no, ") error: ", msg)
exit(1)
end
 
procedure precedence (tok)
local p
 
case tok of {
"Op_multiply" : p := 13
"Op_divide" : p := 13
"Op_mod" : p := 13
"Op_add" : p := 12
"Op_subtract" : p := 12
"Op_negate" : p := 14
"Op_not" : p := 14
"Op_less" : p := 10
"Op_lessequal" : p := 10
"Op_greater" : p := 10
"Op_greaterequal" : p := 10
"Op_equal" : p := 9
"Op_notequal" : p := 9
"Op_and" : p := 5
"Op_or" : p := 4
default : p := -1
}
return p
end
 
procedure is_binary (tok)
return ("Op_add" |
"Op_subtract" |
"Op_multiply" |
"Op_divide" |
"Op_mod" |
"Op_less" |
"Op_lessequal" |
"Op_greater" |
"Op_greaterequal" |
"Op_equal" |
"Op_notequal" |
"Op_and" |
"Op_or") == tok
fail
end
 
procedure is_right_associative (tok)
# None of the current operators is right associative.
fail
end
 
procedure operator (tok)
local s
 
case tok of {
"Op_multiply" : s := "Multiply"
"Op_divide" : s := "Divide"
"Op_mod" : s := "Mod"
"Op_add" : s := "Add"
"Op_subtract" : s := "Subtract"
"Op_negate" : s := "Negate"
"Op_not" : s := "Not"
"Op_less" : s := "Less"
"Op_lessequal" : s := "LessEqual"
"Op_greater" : s := "Greater"
"Op_greaterequal" : s := "GreaterEqual"
"Op_equal" : s := "Equal"
"Op_notequal" : s := "NotEqual"
"Op_and" : s := "And"
"Op_or" : s := "Or"
}
return s
end
 
procedure tok_text (tok)
local s
 
case tok of {
"Keyword_else" : s := "else"
"Keyword_if" : s := "if"
"Keyword_print" : s := "print"
"Keyword_putc" : s := "putc"
"Keyword_while" : s := "while"
"Op_multiply" : s := "*"
"Op_divide" : s := "/"
"Op_mod" : s := "%"
"Op_add" : s := "+"
"Op_subtract" : s := "-"
"Op_negate" : s := "-"
"Op_less" : s := "<"
"Op_lessequal" : s := "<="
"Op_greater" : s := ">"
"Op_greaterequal" : s := ">="
"Op_equal" : s := "=="
"Op_notequal" : s := "!="
"Op_not" : s := "!"
"Op_assign" : s := "="
"Op_and" : s := "&&"
"Op_or" : s := "||"
"LeftParen" : s := "("
"RightParen" : s := ")"
"LeftBrace" : s := "{"
"RightBrace" : s := "}"
"Semicolon" : s := ";"
"Comma" : s := ","
"Identifier" : s := "Ident"
"Integer" : s := "Integer literal"
"String" : s := "String literal"
"End_of_input" : s := "EOI"
}
return s
end</syntaxhighlight>
 
{{out}}
<pre>$ oit -s parse_in_OI.icn && ./parse_in_OI compiler-tests/primes.lex
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier count
Integer 1
Assign
Identifier n
Integer 1
Assign
Identifier limit
Integer 100
While
Less
Identifier n
Identifier limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier k
Integer 3
Assign
Identifier p
Integer 1
Assign
Identifier n
Add
Identifier n
Integer 2
While
And
LessEqual
Multiply
Identifier k
Identifier k
Identifier n
Identifier p
Sequence
Sequence
;
Assign
Identifier p
NotEqual
Multiply
Divide
Identifier n
Identifier k
Identifier k
Identifier n
Assign
Identifier k
Add
Identifier k
Integer 2
If
Identifier p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier n
;
Prts
String " is prime\n"
;
Assign
Identifier count
Add
Identifier count
Integer 1
;
Sequence
Sequence
Sequence
;
Prts
String "Total primes found: "
;
Prti
Identifier count
;
Prts
String "\n"
;</pre>
 
 
 
=={{header|Perl}}==
Tested on perl v5.26.1
<langsyntaxhighlight Perllang="perl">#!/usr/bin/perl
 
use strict; # parse.pl - inputs lex, outputs flattened ast
Line 5,203 ⟶ 9,424:
$_[0] <= 0 && /$h Op_or \n/gcx ? "Or\n$ast" . expr(1) :
return $ast while 1;
}</langsyntaxhighlight>
 
{{out|case=Count AST}}
Line 5,237 ⟶ 9,458:
=={{header|Phix}}==
Reusing lex.e (and core.e) from the [[Compiler/lexical_analyzer#Phix|Lexical Analyzer task]], and again written as a reusable module.
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\Compiler\parse.e
Line 5,390 ⟶ 9,611:
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<!--</langsyntaxhighlight>-->
And a simple test driver for the specific task:
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\Compiler\parse.exw
Line 5,451 ⟶ 9,672:
--main({0,0,"primes.c"}) -- as Algol, C, Python (apart from spacing)
--main({0,0,"count.c"}) -- as AWK ( "" )</span>
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 5,460 ⟶ 9,681:
=={{header|Python}}==
Tested with Python 2.7 and 3.x
<langsyntaxhighlight Pythonlang="python">from __future__ import print_function
import sys, shlex, operator
 
Line 5,726 ⟶ 9,947:
error(0, 0, "Can't open %s" % sys.argv[1])
t = parse()
prt_ast(t)</langsyntaxhighlight>
 
{{out|case=prime numbers AST}}
Line 5,828 ⟶ 10,049:
</pre>
</b>
 
=={{header|RATFOR}}==
{{works with|ratfor77|[https://sourceforge.net/p/chemoelectric/ratfor77/ public domain 1.0]}}
{{works with|gfortran|11.2.1}}
{{works with|f2c|20100827}}
 
 
FORTRAN 77 is a non-recursive language, in the specific sense that it does not support recursive algorithms. What is missing is simple: there is no way to specify that a value should go onto a call stack. Local variables were all treated by compilers more or less as what C programmers would call "static". Subprogram parameters were all passed by reference, rather than by value as in C.
 
''However'', it is perfectly possible to implement a recursive language ''in'' FORTRAN 77 and do the programming in ''that''.
 
Which is what I do here. I have implemented the recursive algorithm of the parser pseudocode in a tiny, FORTH-like "language" specific for the task. The parser code, that is, is not written directly in Ratfor, but instead is written in a tiny "language" and interpreted by a Ratfor subroutine.
 
Printing the abstract syntax tree is done with a quite ordinary non-recursive tree traversal written directly in Ratfor.
 
There is no paradox in the notion of doing recursive programming within a Ratfor program by the method described above. All the recursion is at a higher level of abstraction than the Ratfor programming itself. If you examine the Ratfor code ''as'' Ratfor code, there is not a single recursive call.
 
 
<syntaxhighlight lang="ratfor">######################################################################
#
# The Rosetta Code parser in Ratfor 77.
#
#
# Ratfor 77 is a preprocessor for FORTRAN 77; therefore we do not have
# recursive calls available. For printing the flattened tree, I use an
# ordinary non-recursive implementation of the tree traversal. The
# mutually recursive parser itself is more difficult to handle; for
# that, I implement a tiny, FORTH-like token processor that supports
# recursive calls.
#
# How to deal with input is another problem. I use formatted input,
# treating each line as a (regrettably fixed length) array of type
# CHARACTER. It is a very simple method, and leaves the input in a
# form convenient for the necessary processing (given that the input
# is not formatted in columns).
#
#
# On a POSIX platform, the program can be compiled with f2c and run
# somewhat as follows:
#
# ratfor77 parse-in-ratfor.r > parse-in-ratfor.f
# f2c -C -Nc40 parse-in-ratfor.f
# cc parse-in-ratfor.c -lf2c
# ./a.out < compiler-tests/primes.lex
#
# With gfortran, a little differently:
#
# ratfor77 parse-in-ratfor.r > parse-in-ratfor.f
# gfortran -fcheck=all -std=legacy parse-in-ratfor.f
# ./a.out < compiler-tests/primes.lex
#
#
# I/O is strictly from default input and to default output, which, on
# POSIX systems, usually correspond respectively to standard input and
# standard output.
#
#---------------------------------------------------------------------
 
# Parameters that you can adjust.
 
define(LINESZ, 256) # Size of an input line.
define(STRNSZ, 4096) # Size of the string pool.
define(NODSSZ, 4096) # Size of the nodes pool.
define(DSTKSZ, 4096) # Size of the data stack.
define(PSTKSZ, 4096) # Size of the precedence stack.
define(XSTKSZ, 4096) # Size of the execution stack.
 
#---------------------------------------------------------------------
 
define(TOKSZ, 5) # Size of a lexical token, in integers.
define(ILN, 1) # Index for line number.
define(ICN, 2) # Index for column number.
define(ITK, 3) # Index for token number.
define(ITV, 4) # Index for the string pool index of the token value.
define(ITN, 5) # Index for the length of the token value.
 
define(NODESZ, 3)
define(NTAG, 1) # Index for the tag.
# For an internal node --
define(NLEFT, 2) # Index for the left node.
define(NRIGHT, 3) # Index for the right node.
# For a leaf node --
define(NITV, 2) # Index for the string pool index.
define(NITN, 3) # Length of the value.
 
define(NIL, -1) # Nil node.
 
define(TKELSE, 0)
define(TKIF, 1)
define(TKPRNT, 2)
define(TKPUTC, 3)
define(TKWHIL, 4)
define(TKMUL, 5)
define(TKDIV, 6)
define(TKMOD, 7)
define(TKADD, 8)
define(TKSUB, 9)
define(TKNEG, 10)
define(TKLT, 11)
define(TKLE, 12)
define(TKGT, 13)
define(TKGE, 14)
define(TKEQ, 15)
define(TKNE, 16)
define(TKNOT, 17)
define(TKASGN, 18)
define(TKAND, 19)
define(TKOR, 20)
define(TKLPAR, 21)
define(TKRPAR, 22)
define(TKLBRC, 23)
define(TKRBRC, 24)
define(TKSEMI, 25)
define(TKCMMA, 26)
define(TKID, 27)
define(TKINT, 28)
define(TKSTR, 29)
define(TKEOI, 30)
 
define(NDID, 0)
define(NDSTR, 1)
define(NDINT, 2)
define(NDSEQ, 3)
define(NDIF, 4)
define(NDPRTC, 5)
define(NDPRTS, 6)
define(NDPRTI, 7)
define(NDWHIL, 8)
define(NDASGN, 9)
define(NDNEG, 10)
define(NDNOT, 11)
define(NDMUL, 12)
define(NDDIV, 13)
define(NDMOD, 14)
define(NDADD, 15)
define(NDSUB, 16)
define(NDLT, 17)
define(NDLE, 18)
define(NDGT, 19)
define(NDGE, 20)
define(NDEQ, 21)
define(NDNE, 22)
define(NDAND, 23)
define(NDOR, 24)
 
subroutine string (src, isrc, nsrc, strngs, istrng, i, n)
 
# Store a string in the string pool.
 
implicit none
 
character src(*) # Source string.
integer isrc, nsrc # Index and length of the source substring.
character strngs(STRNSZ) # The string pool.
integer istrng # The string pool's next slot.
integer i, n # Index and length within the string pool.
 
integer j
 
if (STRNSZ < istrng + nsrc)
{
write (*, '(''string pool exhausted'')')
stop
}
for (j = 0; j < nsrc; j = j + 1)
strngs(istrng + j) = src(isrc + j)
i = istrng
n = nsrc
istrng = istrng + nsrc
end
 
subroutine astnod (node, nodes, inodes, i)
 
# Store a node in the nodes pool.
 
implicit none
 
integer node(NODESZ)
integer nodes(NODESZ, NODSSZ)
integer inodes
integer i
 
integer j
 
if (NODSSZ < inodes + 1)
{
write (*, '(''node pool exhausted'')')
stop
}
i = inodes
inodes = inodes + 1
for (j = 1; j <= NODESZ; j = j + 1)
nodes(j, i) = node(j)
end
 
function issp (c)
 
# Is a character a space character?
 
implicit none
 
character c
logical issp
 
integer ic
 
ic = ichar (c)
issp = (ic == 32 || (9 <= ic && ic <= 13))
end
 
function skipsp (str, i, imax)
 
# Skip past spaces in a string.
 
implicit none
 
character str(*)
integer i
integer imax
integer skipsp
 
logical issp
 
logical done
 
skipsp = i
done = .false.
while (!done)
{
if (imax <= skipsp)
done = .true.
else if (!issp (str(skipsp)))
done = .true.
else
skipsp = skipsp + 1
}
end
 
function skipns (str, i, imax)
 
# Skip past non-spaces in a string.
 
implicit none
 
character str(*)
integer i
integer imax
integer skipns
 
logical issp
 
logical done
 
skipns = i
done = .false.
while (!done)
{
if (imax <= skipns)
done = .true.
else if (issp (str(skipns)))
done = .true.
else
skipns = skipns + 1
}
end
 
function trimrt (str, n)
 
# Find the length of a string, if one ignores trailing spaces.
 
implicit none
 
character str(*)
integer n
integer trimrt
 
logical issp
 
logical done
 
trimrt = n
done = .false.
while (!done)
{
if (trimrt == 0)
done = .true.
else if (!issp (str(trimrt)))
done = .true.
else
trimrt = trimrt - 1
}
end
 
function mktok (str, i, n)
 
# Convert a substring to a token integer.
 
implicit none
 
character str(*)
integer i
integer n
integer mktok
 
character*16 tokstr(0:30)
character*16 test
integer j
logical done
 
data tokstr / 'Keyword_else ', _
'Keyword_if ', _
'Keyword_print ', _
'Keyword_putc ', _
'Keyword_while ', _
'Op_multiply ', _
'Op_divide ', _
'Op_mod ', _
'Op_add ', _
'Op_subtract ', _
'Op_negate ', _
'Op_less ', _
'Op_lessequal ', _
'Op_greater ', _
'Op_greaterequal ', _
'Op_equal ', _
'Op_notequal ', _
'Op_not ', _
'Op_assign ', _
'Op_and ', _
'Op_or ', _
'LeftParen ', _
'RightParen ', _
'LeftBrace ', _
'RightBrace ', _
'Semicolon ', _
'Comma ', _
'Identifier ', _
'Integer ', _
'String ', _
'End_of_input ' /
 
test = ' '
for (j = 0; j < n; j = j + 1)
test(j + 1 : j + 1) = str(i + j)
 
j = 0
done = .false.
while (!done)
{
if (TKEOI < j)
{
write (*, '(''unrecognized token'')')
stop
}
else if (test == tokstr(j))
done = .true.
else
j = j + 1
}
 
mktok = j
end
 
function mkint (str, i, n)
 
# Convert a unsigned integer substring to an integer.
 
implicit none
 
character str(*)
integer i
integer n
integer mkint
 
integer j
 
mkint = 0
for (j = 0; j < n; j = j + 1)
mkint = (10 * mkint) + (ichar (str(i + j)) - 48)
end
 
subroutine rdtok (strngs, istrng, blank, linno, colno, tokno, _
itkval, ntkval)
 
# Read a token from the input.
 
implicit none
 
character strngs(STRNSZ) # The string pool.
integer istrng # The string pool's next slot.
logical blank # Is the line blank?
integer linno # The line number.
integer colno # The column number.
integer tokno # The token number.
integer itkval, ntkval # Token value as a string.
 
integer skipsp, skipns, trimrt
integer mkint, mktok
 
character line(LINESZ)
character*20 fmt
integer n, i, j
 
# Read a line of text as an array of characters.
write (fmt, '(''('', I10, ''A1)'')') LINESZ
read (*, fmt) line
 
n = trimrt (line, LINESZ)
blank = (n == 0)
 
if (!blank)
{
i = skipsp (line, 1, n + 1)
j = skipns (line, i, n + 1)
linno = mkint (line, i, j - i)
 
i = skipsp (line, j, n + 1)
j = skipns (line, i, n + 1)
colno = mkint (line, i, j - i)
 
i = skipsp (line, j, n + 1)
j = skipns (line, i, n + 1)
tokno = mktok (line, i, j - i)
 
i = skipsp (line, j, n + 1)
j = n + 1
call string (line, i, j - i, strngs, istrng, itkval, ntkval)
}
end
 
subroutine gettok (strngs, istrng, tok)
 
# Get the next token.
 
implicit none
 
character strngs(STRNSZ) # The string pool.
integer istrng # The string pool's next slot.
integer tok(TOKSZ)
 
integer linno, colno, tokno, itkval, ntkval
logical blank
 
blank = .true.
while (blank)
call rdtok (strngs, istrng, blank, linno, colno, tokno, _
itkval, ntkval)
tok(ILN) = linno
tok(ICN) = colno
tok(ITK) = tokno
tok(ITV) = itkval
tok(ITN) = ntkval
end
 
function accept (strngs, istrng, curtok, tokno)
 
implicit none
 
character strngs(STRNSZ) # The string pool.
integer istrng # The string pool's next slot.
integer curtok(TOKSZ)
integer tokno
logical accept
 
accept = (curtok(ITK) == tokno)
if (accept)
call gettok (strngs, istrng, curtok)
end
 
subroutine expect (strngs, istrng, curtok, tokno)
 
implicit none
 
character strngs(STRNSZ) # The string pool.
integer istrng # The string pool's next slot.
integer curtok(TOKSZ)
integer tokno
 
logical accept
 
if (!accept (strngs, istrng, curtok, tokno))
{
# This is not the same message as printed by the reference C
# implementation. You can change that, if you wish.
write (*, 100) curtok(ILN), curtok(ICN)
100 format ('unexpected token at line ', I5, ', column ', I5)
stop
}
end
 
function prec (tokno)
 
# Precedence.
 
implicit none
 
integer tokno
integer prec
 
if (tokno == TKMUL || tokno == TKDIV || tokno == TKMOD)
prec = 13
else if (tokno == TKADD || tokno == TKSUB)
prec = 12
else if (tokno == TKNEG || tokno == TKNOT)
prec = 14
else if (tokno == TKLT || tokno == TKLE || _
tokno == TKGT || tokno == TKGE)
prec = 10
else if (tokno == TKEQ || tokno == TKNE)
prec = 9
else if (tokno == TKAND)
prec = 5
else if (tokno == TKOR)
prec = 4
else
prec = -1
end
 
function isbin (tokno)
 
# Is an operation binary?
 
implicit none
 
integer tokno
logical isbin
 
isbin = (tokno == TKADD || _
tokno == TKSUB || _
tokno == TKMUL || _
tokno == TKDIV || _
tokno == TKMOD || _
tokno == TKLT || _
tokno == TKLE || _
tokno == TKGT || _
tokno == TKGE || _
tokno == TKEQ || _
tokno == TKNE || _
tokno == TKAND || _
tokno == TKOR)
end
 
function rtassc (tokno)
 
# Is an operation right associative?
 
implicit none
 
integer tokno
logical rtassc
 
# None of the current operators is right associative.
rtassc = .false.
end
 
function opernt (tokno)
 
# Return the node tag for a binary operator.
 
implicit none
 
integer tokno
integer opernt
 
if (tokno == TKMUL)
opernt = NDMUL
else if (tokno == TKDIV)
opernt = NDDIV
else if (tokno == TKMOD)
opernt = NDMOD
else if (tokno == TKADD)
opernt = NDADD
else if (tokno == TKSUB)
opernt = NDSUB
else if (tokno == TKNEG)
opernt = NDNEG
else if (tokno == TKNOT)
opernt = NDNOT
else if (tokno == TKLT)
opernt = NDLT
else if (tokno == TKLE)
opernt = NDLE
else if (tokno == TKGT)
opernt = NDGT
else if (tokno == TKGE)
opernt = NDGE
else if (tokno == TKEQ)
opernt = NDEQ
else if (tokno == TKNE)
opernt = NDNE
else if (tokno == TKAND)
opernt = NDAND
else if (tokno == TKOR)
opernt = NDOR
else
{
write (*, '(''unrecognized binary operator'')')
stop
}
end
 
#---------------------------------------------------------------------
 
subroutine prtast (strngs, nodes, i, dstack)
 
# Print a tree in flattened format.
 
implicit none
 
character strngs(*)
integer nodes(NODESZ, NODSSZ)
integer i
integer dstack(DSTKSZ)
 
integer j
integer k
integer n
integer q, r
integer tag
 
character*80 fmt
 
dstack(1) = i
j = 2
while (j != 1)
{
j = j - 1
k = dstack(j)
if (k < 1)
write (*, '('';'')')
else
{
tag = nodes(NTAG, k)
if (tag == NDID)
{
n = nodes(NITN, k)
write (fmt, '(''("Identifier ", '', I5, ''A)'')') n
q = nodes(NITV, k)
write (*, fmt) (strngs(r), r = q, q + n - 1)
}
else if (tag == NDINT)
{
n = nodes(NITN, k)
write (fmt, '(''("Integer ", '', I5, ''A)'')') n
q = nodes(NITV, k)
write (*, fmt) (strngs(r), r = q, q + n - 1)
}
else if (tag == NDSTR)
{
n = nodes(NITN, k)
write (fmt, '(''("String ", '', I5, ''A)'')') n
q = nodes(NITV, k)
write (*, fmt) (strngs(r), r = q, q + n - 1)
}
else
{
if (tag == NDSEQ)
write (*, '(''Sequence'')')
else if (tag == NDIF)
write (*, '(''If'')')
else if (tag == NDPRTC)
write (*, '(''Prtc'')')
else if (tag == NDPRTS)
write (*, '(''Prts'')')
else if (tag == NDPRTI)
write (*, '(''Prti'')')
else if (tag == NDWHIL)
write (*, '(''While'')')
else if (tag == NDASGN)
write (*, '(''Assign'')')
else if (tag == NDNEG)
write (*, '(''Negate'')')
else if (tag == NDNOT)
write (*, '(''Not'')')
else if (tag == NDMUL)
write (*, '(''Multiply'')')
else if (tag == NDDIV)
write (*, '(''Divide'')')
else if (tag == NDMOD)
write (*, '(''Mod'')')
else if (tag == NDADD)
write (*, '(''Add'')')
else if (tag == NDSUB)
write (*, '(''Subtract'')')
else if (tag == NDLT)
write (*, '(''Less'')')
else if (tag == NDLE)
write (*, '(''LessEqual'')')
else if (tag == NDGT)
write (*, '(''Greater'')')
else if (tag == NDGE)
write (*, '(''GreaterEqual'')')
else if (tag == NDEQ)
write (*, '(''Equal'')')
else if (tag == NDNE)
write (*, '(''NotEqual'')')
else if (tag == NDAND)
write (*, '(''And'')')
else if (tag == NDOR)
write (*, '(''Or'')')
else
{
write (*, '(''unrecognized node type'')')
stop
}
if (DSTKSZ - 2 < n)
{
write (*, '(''node stack overflow'')')
stop
}
dstack(j) = nodes(NRIGHT, k)
dstack(j + 1) = nodes(NLEFT, k)
j = j + 2
}
}
}
end
 
#---------------------------------------------------------------------
 
# A tiny recursive language. Each instruction is two integers,
# although the second integer may be XPAD. XLOCs are named by
# integers.
 
define(XPAD, 0) # "Padding"
 
define(XLOC, 10) # "Jump or call location"
define(XJUMP, 20) # "Jump to a place"
define(XJUMPT, 30) # "Jump to a place, if true"
define(XJUMPF, 40) # "Jump to a place, if false"
define(XCALL, 50) # "Call a subprogram"
define(XRET, 60) # "Return from a subprogram"
 
define(XPUSH, 110) # "Push an immediate value"
define(XSWAP, 120) # "Swap top two stack entries"
 
define(XLT, 200) # "Less than?"
define(XADDI, 210) # "Add immediate."
 
define(XPPUSH, 610) # "Push top to precedence stack"
define(XPCOPY, 620) # "Copy top of prec stack to top"
define(XPDROP, 630) # "Drop top of precedence stack"
 
define(XGETTK, 710) # "Get the next token"
define(XTOKEQ, 720) # "Token equals the argument?"
define(XEXPCT, 730) # "Expect token"
define(XACCPT, 740) # "Accept token"
 
define(XTOK, 810) # "Push the token number"
define(XBINOP, 820) # "Is top a binary operator?"
define(XRASSC, 830) # "Is top a right associative op?"
define(XPREC, 840) # "Precedence of token no. on top"
define(XOPER, 850) # "Operator for token no. on top"
 
define(XINTND, 970) # "Make internal node"
define(XOPND, 975) # "Make internal node for operator"
define(XLEFND, 980) # "Make leaf node"
define(XNILND, 985) # "Make nil node"
 
define(XERROR, 1010) # "Error"
define(XRWARN, 1020) # "Unused right associative branch"
 
define(XPING, 2010) # Print a ping message (for debugging)
define(XPRTND, 2020) # Print node at stack top (for debugging)
define(XPRTTP, 2030) # Print stack top as integer (for debugging)
define(XPRTTK, 2040) # Print the current token (for debugging)
define(XPRTP, 2050) # Print the current precedence (for debugging)
define(XPRTST, 2060) # Print the whole data stack (for debugging)
 
# Call and jump locations in our program:
define(CSTMT, 1000) # stmt
define(STMT01, 1010)
define(STMT02, 1020)
define(STMT03, 1030)
define(STMT04, 1040)
define(STMT05, 1050)
define(STMT06, 1060)
define(STMT07, 1070)
define(STMT08, 1080)
define(STMT09, 1090)
define(STMT10, 1100)
define(STMT11, 1110)
define(STMT12, 1120)
define(STMT13, 1130)
define(STMT14, 1140)
define(STMT15, 1150)
define(CPEXPR, 2000) # paren_expr
define(CEXPR, 3000) # expr
define(EXPR01, 3010)
define(EXPR02, 3020)
define(EXPR03, 3030)
define(EXPR04, 3040)
define(EXPR05, 3050)
define(EXPR06, 3060)
define(EXPR10, 3100)
define(EXPR11, 3110)
define(EXPR12, 3120)
define(EXPR13, 3130)
define(PARS01, 4010) # parse
 
# Error numbers.
define(EXSTMT, 100) # "expecting start of statement"
define(EXPRIM, 200) # "expecting a primary"
 
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
subroutine ld(code, i, instr1, instr2)
 
implicit none
 
integer code(*)
integer i
integer instr1, instr2
 
code(i) = instr1
code(i + 1) = instr2
i = i + 2
end
 
subroutine ldcode (code)
 
# Load the code that is in the recursive language. The array
# allocated to hold the code must be large enough, but we do not
# check.
 
implicit none
 
integer code(*)
integer i
 
i = 1
 
#--------------------------------------------------
 
# The main loop.
call ld (code, i, XNILND, XPAD) # Nil node for start of sequence.
call ld (code, i, XGETTK, XPAD)
call ld (code, i, XLOC, PARS01) # Top of loop
call ld (code, i, XCALL, CSTMT)
call ld (code, i, XINTND, NDSEQ)
call ld (code, i, XTOKEQ, TKEOI) # End_of_input
call ld (code, i, XJUMPF, PARS01) # Loop unless end of input.
call ld (code, i, XRET, XPAD)
 
#--------------------------------------------------
 
call ld (code, i, XLOC, CEXPR) # Start of "expr"
call ld (code, i, XPPUSH, XPAD) # Push the precedence argument.
 
call ld (code, i, XTOKEQ, TKLPAR) # LeftParen
call ld (code, i, XJUMPF, EXPR01)
 
# "( ... )"
call ld (code, i, XCALL, CPEXPR)
call ld (code, i, XJUMP, EXPR10)
 
call ld (code, i, XLOC, EXPR01)
 
call ld (code, i, XACCPT, TKSUB) # Op_subtract
call ld (code, i, XJUMPF, EXPR02)
 
# Unary minus
call ld (code, i, XPUSH, TKNEG)
call ld (code, i, XPREC, XPAD)
call ld (code, i, XCALL, CEXPR) # expr <--
call ld (code, i, XNILND, XPAD) # expr nil <--
call ld (code, i, XINTND, NDNEG)
call ld (code, i, XJUMP, EXPR10)
 
call ld (code, i, XLOC, EXPR02)
 
call ld (code, i, XACCPT, TKADD) # Op_add
call ld (code, i, XJUMPF, EXPR03)
 
# Unary plus
call ld (code, i, XPUSH, TKNEG)
call ld (code, i, XPREC, XPAD)
call ld (code, i, XCALL, CEXPR) # expr <--
call ld (code, i, XJUMP, EXPR10)
 
call ld (code, i, XLOC, EXPR03)
 
call ld (code, i, XACCPT, TKNOT) # Op_not
call ld (code, i, XJUMPF, EXPR04)
 
# "!"
call ld (code, i, XPUSH, TKNOT)
call ld (code, i, XPREC, XPAD)
call ld (code, i, XCALL, CEXPR) # expr <--
call ld (code, i, XNILND, XPAD) # expr nil <--
call ld (code, i, XINTND, NDNOT)
call ld (code, i, XJUMP, EXPR10)
 
call ld (code, i, XLOC, EXPR04)
 
call ld (code, i, XTOKEQ, TKID) # Identifier
call ld (code, i, XJUMPF, EXPR05)
 
# Identifier
call ld (code, i, XLEFND, NDID)
call ld (code, i, XGETTK, XPAD)
call ld (code, i, XJUMP, EXPR10)
 
call ld (code, i, XLOC, EXPR05)
 
call ld (code, i, XTOKEQ, TKINT) # Integer
call ld (code, i, XJUMPF, EXPR06)
 
# Integer.
call ld (code, i, XLEFND, NDINT)
call ld (code, i, XGETTK, XPAD)
call ld (code, i, XJUMP, EXPR10)
 
call ld (code, i, XLOC, EXPR06)
 
call ld (code, i, XERROR, EXPRIM)
 
call ld (code, i, XLOC, EXPR10) # Top of precedence climbing loop
 
call ld (code, i, XTOK, XPAD)
call ld (code, i, XBINOP, XPAD)
call ld (code, i, XJUMPF, EXPR11) # Exit loop, if not a binary op.
 
call ld (code, i, XTOK, XPAD)
call ld (code, i, XPREC, XPAD) # curtok_prec <--
call ld (code, i, XPCOPY, XPAD) # curtok_prec p <--
call ld (code, i, XLT, XPAD) # (curtok_prec < p)? <--
call ld (code, i, XJUMPT, EXPR11) # Exit loop if true.
 
call ld (code, i, XTOK, XPAD)
call ld (code, i, XOPER, XPAD) # x op <--
call ld (code, i, XSWAP, XPAD) # op x <--
 
call ld (code, i, XTOK, XPAD)
call ld (code, i, XRASSC, XPAD)
call ld (code, i, XJUMPT, EXPR12)
 
# Left associative.
call ld (code, i, XTOK, XPAD)
call ld (code, i, XPREC, XPAD)
call ld (code, i, XADDI, 1) # op x q:=(q + 1) <--
call ld (code, i, XJUMP, EXPR13)
 
call ld (code, i, XLOC, EXPR12)
 
# Right associative. (Currently an unused branch.)
call ld (code, i, XRWARN, XPAD) # Warn about unused branch.
call ld (code, i, XTOK, XPAD)
call ld (code, i, XPREC, XPAD) # op x q <--
 
call ld (code, i, XLOC, EXPR13)
 
call ld (code, i, XGETTK, XPAD)
call ld (code, i, XCALL, CEXPR) # op x expr(q) <--
call ld (code, i, XOPND, XPAD) # new_x <--
 
call ld (code, i, XJUMP, EXPR10) # Continue looping.
 
call ld (code, i, XLOC, EXPR11) # Loop exit.
 
call ld (code, i, XPDROP, XPAD) # Drop the precedence argument.
call ld (code, i, XRET, XPAD) # End of "expr"
 
#--------------------------------------------------
 
call ld (code, i, XLOC, CPEXPR) # Start of "paren_expr"
call ld (code, i, XEXPCT, TKLPAR)
call ld (code, i, XPUSH, 0)
call ld (code, i, XCALL, CEXPR)
call ld (code, i, XEXPCT, TKRPAR)
call ld (code, i, XRET, XPAD)
 
#--------------------------------------------------
 
call ld (code, i, XLOC, CSTMT) # Start of "stmt"
 
call ld (code, i, XACCPT, TKIF) # Keyword_if
call ld (code, i, XJUMPF, STMT01)
 
# "if (...) then ... else ..."
call ld (code, i, XCALL, CPEXPR) # Get the paren expr ("if (...)").
call ld (code, i, XCALL, CSTMT) # Get the "then" clause.
call ld (code, i, XACCPT, TKELSE) # Keyword_else
call ld (code, i, XJUMPF, STMT02)
call ld (code, i, XCALL, CSTMT) # Get the "else" clause.
call ld (code, i, XJUMP, STMT03)
call ld (code, i, XLOC, STMT02)
call ld (code, i, XNILND, XPAD) # The "else" statement is nil.
call ld (code, i, XLOC, STMT03)
call ld (code, i, XINTND, NDIF) # ("If" pred ("If" then else))
call ld (code, i, XINTND, NDIF)
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT01)
 
call ld (code, i, XACCPT, TKPUTC) # Keyword_putc
call ld (code, i, XJUMPF, STMT04)
 
# "putc (...);"
call ld (code, i, XCALL, CPEXPR) # Get the paren expr.
call ld (code, i, XNILND, XPAD)
call ld (code, i, XINTND, NDPRTC) # ("Prtc" expr nil)
call ld (code, i, XEXPCT, TKSEMI) # Expect ";"
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT04)
 
call ld (code, i, XACCPT, TKPRNT) # Keyword_print
call ld (code, i, XJUMPF, STMT05)
 
# "print(... , ... , ...);"
call ld (code, i, XEXPCT, TKLPAR) # Expect "("
call ld (code, i, XNILND, XPAD) # nil for start of sequence
call ld (code, i, XLOC, STMT08) # Top of loop
call ld (code, i, XTOKEQ, TKSTR)
call ld (code, i, XJUMPT, STMT06)
call ld (code, i, XPUSH, 0)
call ld (code, i, XCALL, CEXPR)
call ld (code, i, XNILND, XPAD)
call ld (code, i, XINTND, NDPRTI) # ("Prti" expr nil)
call ld (code, i, XJUMP, STMT07)
call ld (code, i, XLOC, STMT06)
call ld (code, i, XLEFND, NDSTR)
call ld (code, i, XNILND, XPAD)
call ld (code, i, XINTND, NDPRTS) # ("Prts" ("String" ...) nil)
call ld (code, i, XGETTK, XPAD)
call ld (code, i, XLOC, STMT07)
call ld (code, i, XINTND, NDSEQ) # ("Sequence" ... ...)
call ld (code, i, XACCPT, TKCMMA) # Comma
call ld (code, i, XJUMPT, STMT08) # Loop if comma.
call ld (code, i, XEXPCT, TKRPAR) # Expect ")"
call ld (code, i, XEXPCT, TKSEMI) # Expect ";"
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT05)
 
call ld (code, i, XACCPT, TKSEMI) # Semicolon
call ld (code, i, XJUMPF, STMT09)
 
# Accept a lone ";".
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT09)
 
call ld (code, i, XTOKEQ, TKID) # Identifier
call ld (code, i, XJUMPF, STMT10)
 
# "identifier = expr;"
call ld (code, i, XLEFND, NDID) # ("Identifier" ...)
call ld (code, i, XGETTK, XPAD)
call ld (code, i, XEXPCT, TKASGN)
call ld (code, i, XPUSH, 0)
call ld (code, i, XCALL, CEXPR)
call ld (code, i, XINTND, NDASGN) # ("Assign" ("Identifier" ...) expr)
call ld (code, i, XEXPCT, TKSEMI)
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT10)
 
call ld (code, i, XACCPT, TKWHIL) # While
call ld (code, i, XJUMPF, STMT11)
 
# "while (...) ..."
call ld (code, i, XCALL, CPEXPR)
call ld (code, i, XCALL, CSTMT)
call ld (code, i, XINTND, NDWHIL) # ("While" pred stmt)
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT11)
 
call ld (code, i, XACCPT, TKLBRC) # LeftBrace
call ld (code, i, XJUMPF, STMT12)
 
# "{ ... }"
call ld (code, i, XNILND, XPAD) # nil for start of sequence
call ld (code, i, XLOC, STMT13) # Top of loop
call ld (code, i, XTOKEQ, TKEOI)
call ld (code, i, XJUMPT, STMT14)
call ld (code, i, XTOKEQ, TKRBRC)
call ld (code, i, XJUMPT, STMT14)
call ld (code, i, XCALL, CSTMT)
call ld (code, i, XINTND, NDSEQ) # ("Sequence" ... ...)
call ld (code, i, XJUMP, STMT13) # Loop again.
call ld (code, i, XLOC, STMT14) # Loop exit
call ld (code, i, XEXPCT, TKRBRC) # Expect ";".
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT12)
 
call ld (code, i, XTOKEQ, TKEOI) # End_of_input
call ld (code, i, XJUMPF, STMT15)
 
call ld (code, i, XRET, XPAD)
 
call ld (code, i, XLOC, STMT15)
 
call ld (code, i, XERROR, EXSTMT) # "expecting start of stmt"
 
#--------------------------------------------------
 
end
 
subroutine dtpush (dstack, idstck, x)
 
# Push to the data stack.
 
implicit none
 
integer dstack(DSTKSZ)
integer idstck
integer x
 
if (DSTKSZ < idstck)
{
write (*, '(''node stack exhausted'')')
stop
}
dstack(idstck) = x
idstck = idstck + 1
end
 
function dtpop (dstack, idstck)
 
# Pop from the data stack.
 
implicit none
 
integer dstack(DSTKSZ)
integer idstck
integer dtpop
 
if (DSTKSZ < idstck)
{
write (*, '(''node stack exhausted'')')
stop
}
idstck = idstck - 1
dtpop = dstack(idstck)
end
 
subroutine ppush (pstack, ipstck, x)
 
# Push to the precedence stack.
 
implicit none
 
integer pstack(PSTKSZ)
integer ipstck
integer x
 
if (PSTKSZ < ipstck)
{
write (*, '(''precedence stack exhausted'')')
stop
}
pstack(ipstck) = x
ipstck = ipstck + 1
end
 
function ppop (pstack, ipstck)
 
# Pop from the precedence stack.
 
implicit none
 
integer pstack(PSTKSZ)
integer ipstck
integer ppop
 
if (PSTKSZ < ipstck)
{
write (*, '(''precedence stack exhausted'')')
stop
}
ipstck = ipstck - 1
ppop = pstack(ipstck)
end
 
function ipfind (code, loc)
 
# Find a location.
 
implicit none
 
integer code(*)
integer loc
integer ipfind
 
integer i
 
i = 1
while (code(i) != XLOC || code(i + 1) != loc)
i = i + 2
ipfind = i
end
 
subroutine ippush (xstack, ixstck, ip)
 
# Push the instruction pointer.
 
implicit none
 
integer xstack(XSTKSZ)
integer ixstck
integer ip
 
if (XSTKSZ < ixstck)
{
write (*, '(''recursive call stack exhausted'')')
stop
}
xstack(ixstck) = ip
ixstck = ixstck + 1
end
 
function ippop (xstack, ixstck)
 
# Pop an instruction pointer value.
 
implicit none
 
integer xstack(XSTKSZ)
integer ixstck
integer ippop
 
if (ixstck == 1)
{
write (*, '(''recursive call stack underflow'')')
stop
}
ixstck = ixstck - 1
ippop = xstack(ixstck)
end
 
function logl2i (u)
 
# Convert LOGICAL to INTEGER.
 
implicit none
 
logical u
integer logl2i
 
if (u)
logl2i = 1
else
logl2i = 0
end
 
subroutine recurs (strngs, istrng,
nodes, inodes, _
dstack, idstck, _
pstack, ipstck, _
xstack, ixstck, _
code, ip)
 
implicit none
 
character strngs(STRNSZ) # String pool.
integer istrng # String pool's next slot.
integer nodes(NODESZ, NODSSZ) # Node pool
integer inodes # Node pool's next slot.
integer dstack(DSTKSZ) # Data stack.
integer idstck # Data stack pointer.
integer pstack(PSTKSZ) # Precedence stack.
integer ipstck # Precedence stack pointer.
integer xstack(XSTKSZ) # Execution stack.
integer ixstck # Execution stack pointer.
integer code(*) # Code in the recursive language.
integer ip # Instruction pointer.
 
integer prec
integer opernt
integer logl2i
integer dtpop
integer ppop
integer ippop
integer ipfind
logical accept
logical isbin
logical rtassc
 
integer curtok(TOKSZ)
integer node(NODESZ)
integer curprc # Current precedence value.
integer i, j
logical done
 
curprc = 0
done = .false.
while (.not. done)
{
if (code(ip) == XLOC)
{
ip = ip + 2
}
else if (code(ip) == XJUMP)
{
ip = ipfind (code, code(ip + 1))
}
else if (code(ip) == XJUMPT)
{
i = dtpop (dstack, idstck)
if (i != 0)
ip = ipfind (code, code(ip + 1))
else
ip = ip + 2
}
else if (code(ip) == XJUMPF)
{
i = dtpop (dstack, idstck)
if (i == 0)
ip = ipfind (code, code(ip + 1))
else
ip = ip + 2
}
else if (code(ip) == XCALL)
{
call ippush (xstack, ixstck, ip + 2)
ip = ipfind (code, code(ip + 1))
}
else if (code(ip) == XRET)
{
if (ixstck == 1)
done = .true.
else
ip = ippop (xstack, ixstck)
}
else if (code(ip) == XINTND)
{
node(NRIGHT) = dtpop (dstack, idstck)
node(NLEFT) = dtpop (dstack, idstck)
node(NTAG) = code(ip + 1)
call astnod (node, nodes, inodes, i)
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XOPND)
{
node(NRIGHT) = dtpop (dstack, idstck)
node(NLEFT) = dtpop (dstack, idstck)
node(NTAG) = dtpop (dstack, idstck)
call astnod (node, nodes, inodes, i)
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XLEFND)
{
node(NITV) = curtok(ITV)
node(NITN) = curtok(ITN)
node(NTAG) = code(ip + 1)
call astnod (node, nodes, inodes, i)
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XNILND)
{
call dtpush (dstack, idstck, NIL)
ip = ip + 2
}
else if (code(ip) == XGETTK)
{
call gettok (strngs, istrng, curtok)
ip = ip + 2
}
else if (code(ip) == XTOKEQ)
{
i = logl2i (curtok(ITK) == code(ip + 1))
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XEXPCT)
{
call expect (strngs, istrng, curtok, code(ip + 1))
ip = ip + 2
}
else if (code(ip) == XACCPT)
{
i = logl2i (accept (strngs, istrng, curtok, code(ip + 1)))
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XSWAP)
{
i = dtpop (dstack, idstck)
j = dtpop (dstack, idstck)
call dtpush (dstack, idstck, i)
call dtpush (dstack, idstck, j)
ip = ip + 2
}
else if (code(ip) == XLT)
{
j = dtpop (dstack, idstck)
i = dtpop (dstack, idstck)
call dtpush (dstack, idstck, logl2i (i < j))
ip = ip + 2
}
else if (code(ip) == XADDI)
{
i = dtpop (dstack, idstck)
call dtpush (dstack, idstck, i + code(ip + 1))
ip = ip + 2
}
else if (code(ip) == XPPUSH)
{
i = dtpop (dstack, idstck)
call ppush (pstack, ipstck, i)
ip = ip + 2
}
else if (code(ip) == XPCOPY)
{
i = ppop (pstack, ipstck)
call ppush (pstack, ipstck, i)
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XPDROP)
{
i = ppop (pstack, ipstck)
ip = ip + 2
}
else if (code(ip) == XBINOP)
{
i = dtpop (dstack, idstck)
i = logl2i (isbin (i))
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XRASSC)
{
i = dtpop (dstack, idstck)
i = logl2i (rtassc (i))
call dtpush (dstack, idstck, i)
ip = ip + 2
}
else if (code(ip) == XPREC)
{
i = dtpop (dstack, idstck)
call dtpush (dstack, idstck, prec (i))
ip = ip + 2
}
else if (code(ip) == XOPER)
{
i = dtpop (dstack, idstck)
call dtpush (dstack, idstck, opernt (i))
ip = ip + 2
}
else if (code(ip) == XTOK)
{
call dtpush (dstack, idstck, curtok(ITK))
ip = ip + 2
}
else if (code(ip) == XPUSH)
{
call dtpush (dstack, idstck, code(ip + 1))
ip = ip + 2
}
else if (code(ip) == XERROR)
{
if (code(ip + 1) == EXSTMT)
{
write (*, 1000) curtok(ILN), curtok(ICN)
1000 format ('expected start of statement at line ', _
I5, ', column ', I5)
}
else if (code(ip + 1) == EXPRIM)
{
write (*, 1010) curtok(ILN), curtok(ICN)
1010 format ('expected a primary at line ', _
I5, ', column ', I5)
}
else
{
write (*, 2000) curtok(ILN), curtok(ICN)
2000 format ('syntax error at line ', _
I5, ', column ', I5)
}
stop
}
else if (code(ip) == XRWARN)
{
write (*, 3000)
3000 format ('executing supposedly unused ', _
'"right associative" operator branch')
ip = ip + 2
}
else if (code(ip) == XPING)
{
write (*, '(''ping'')')
ip = ip + 2
}
else if (code(ip) == XPRTND)
{
i = dtpop (dstack, idstck)
call dtpush (dstack, idstck, i)
call prtast (strngs, nodes, i, dstack)
ip = ip + 2
}
else if (code(ip) == XPRTTP)
{
i = dtpop (dstack, idstck)
call dtpush (dstack, idstck, i)
write (*, '(''top = '', I20)') i
ip = ip + 2
}
else if (code(ip) == XPRTTK)
{
write (*, '(''curtok ='', 5(1X, I5))') curtok
ip = ip + 2
}
else if (code(ip) == XPRTP)
{
write (*, '(''curprc = '', I2)') curprc
ip = ip + 2
}
else if (code(ip) == XPRTST)
{
write (*, '(''dstack ='', 100000(1X, I5))') _
(dstack(i), i = 1, idstck - 1)
ip = ip + 2
}
else
{
write (*, '(''illegal instruction'')')
stop
}
}
end
 
#---------------------------------------------------------------------
 
program parse
 
implicit none
 
character strngs(STRNSZ) # String pool.
integer istrng # String pool's next slot.
integer nodes(NODESZ, NODSSZ) # Node pool
integer inodes # Node pool's next slot.
integer dstack(DSTKSZ) # Node stack.
integer idstck # Node stack pointer.
integer pstack(PSTKSZ) # Precedence stack.
integer ipstck # Precedence stack pointer.
integer xstack(XSTKSZ) # Execution stack.
integer ixstck # Execution stack pointer.
integer code(1000) # Recursive code.
integer ip # Instruction pointer.
 
integer i
 
integer dtpop
 
istrng = 1
inodes = 1
idstck = 1
ipstck = 1
ixstck = 1
 
call ldcode (code)
ip = 1
 
call recurs (strngs, istrng, nodes, inodes, _
dstack, idstck, pstack, ipstck, _
xstack, ixstck, code, ip)
i = dtpop (dstack, idstck)
call prtast (strngs, nodes, i, dstack)
end
 
######################################################################</syntaxhighlight>
 
 
{{out}}
To compile and run with gfortran on a POSIX system:
 
<pre>$ ratfor77 parse-in-ratfor.r > parse-in-ratfor.f && gfortran -O2 -fcheck=all -std=legacy parse-in-ratfor.f && ./a.out < compiler-tests/primes.lex</pre>
 
To use f2c instead of gfortran:
 
<pre>ratfor77 parse-in-ratfor.r > parse-in-ratfor.f && f2c -C -Nc40 parse-in-ratfor.f && cc -O parse-in-ratfor.c -lf2c && ./a.out < compiler-tests/primes.lex</pre>
 
The output should be:
 
<pre>Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier count
Integer 1
Assign
Identifier n
Integer 1
Assign
Identifier limit
Integer 100
While
Less
Identifier n
Identifier limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier k
Integer 3
Assign
Identifier p
Integer 1
Assign
Identifier n
Add
Identifier n
Integer 2
While
And
LessEqual
Multiply
Identifier k
Identifier k
Identifier n
Identifier p
Sequence
Sequence
;
Assign
Identifier p
NotEqual
Multiply
Divide
Identifier n
Identifier k
Identifier k
Identifier n
Assign
Identifier k
Add
Identifier k
Integer 2
If
Identifier p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier n
;
Prts
String " is prime\n"
;
Assign
Identifier count
Add
Identifier count
Integer 1
;
Sequence
Sequence
Sequence
;
Prts
String "Total primes found: "
;
Prti
Identifier count
;
Prts
String "\n"
;</pre>
 
=={{header|Scala}}==
Line 5,834 ⟶ 11,732:
The following code implements a configurable (from a symbol map provided as a parameter) Precedence Climbing parser for the output of the [http://rosettacode.org/wiki/Compiler/lexical_analyzer#Scala lexer]. The recursive descent language parser is closely based on the pseudo code given in the task description.
 
<langsyntaxhighlight lang="scala">
package xyz.hyperreal.rosettacodeCompiler
 
Line 6,044 ⟶ 11,942:
 
}
</syntaxhighlight>
</lang>
 
=={{header|Scheme}}==
Line 6,050 ⟶ 11,948:
Code implements a recursive descent parser based on the given grammar. Tested against all programs in [[Compiler/Sample programs]].
 
<langsyntaxhighlight lang="scheme">
(import (scheme base)
(scheme process-context)
Line 6,257 ⟶ 12,155:
(display-ast (parse-file (cadr (command-line))))
(display "Error: provide program filename\n"))
</syntaxhighlight>
</lang>
 
=={{header|Wren}}==
Line 6,264 ⟶ 12,162:
{{libheader|Wren-fmt}}
{{libheader|wren-ioutil}}
<langsyntaxhighlight ecmascriptlang="wren">import "./dynamic" for Enum, Struct, Tuple
import "./fmt" for Fmt
import "./ioutil" for FileUtil
 
var tokens = [
Line 6,580 ⟶ 12,478:
lines = FileUtil.readLines("source.txt")
lineCount = lines.count
prtAst.call(parse.call())</langsyntaxhighlight>
 
{{out}}
Line 6,682 ⟶ 12,580:
 
=={{header|Zig}}==
<langsyntaxhighlight lang="zig">
const std = @import("std");
 
Line 7,235 ⟶ 13,133:
return result;
}
</syntaxhighlight>
</lang>
9,476

edits