Compiler/AST interpreter
An AST interpreter interprets an Abstract Syntax Tree (AST) produced by a Syntax Analyzer.
Take the AST output from the Syntax analyzer task, and interpret it as appropriate. Refer to the Syntax analyzer task for details of the AST.
- Loading the AST from the syntax analyzer is as simple as (pseudo code)
<lang python>def load_ast()
line = readline() # Each line has at least one token line_list = tokenize the line, respecting double quotes
text = line_list[0] # first token is always the node type
if text == ";" # a terminal node return NULL
node_type = text # could convert to internal form if desired
# A line with two tokens is a leaf node # Leaf nodes are: Identifier, Integer, String # The 2nd token is the value if len(line_list) > 1 return make_leaf(node_type, line_list[1])
left = load_ast() right = load_ast() return make_node(node_type, left, right)</lang>
- The interpreter algorithm is relatively simple
<lang python>interp(x)
if x == NULL return NULL elif x.node_type == Integer return x.value converted to an integer elif x.node_type == Ident return the current value of variable x.value elif x.node_type == String return x.value elif x.node_type == Assign globals[x.left.value] = interp(x.right) return NULL elif x.node_type is a binary operator return interp(x.left) operator interp(x.right) elif x.node_type is a unary operator, return return operator interp(x.left) elif x.node_type == If if (interp(x.left)) then interp(x.right.left) else interp(x.right.right) return NULL elif x.node_type == While while (interp(x.left)) do interp(x.right) return NULL elif x.node_type == Prtc print interp(x.left) as a character, no newline return NULL elif x.node_type == Prti print interp(x.left) as an integer, no newline return NULL elif x.node_type == Prts print interp(x.left) as a string, respecting newlines ("\n") return NULL elif x.node_type == Sequence interp(x.left) interp(x.right) return NULL else error("unknown node type")</lang>
Notes:
Because of the simple nature of our tiny language, Semantic analysis is not needed.
Your interpreter should use C like division semantics, for both division and modulus. For division of positive operands, only the non-fractional portion of the result should be returned. In other words, the result should be truncated towards 0.
This means, for instance, that 3 / 2 should result in 1.
For division when one of the operands is negative, the result should be truncated towards 0.
This means, for instance, that 3 / -2 should result in -1.
- Test program
prime.t | parse | interp |
---|---|
<lang c>/* Simple prime number generator */ count = 1; n = 1; limit = 100; while (n < limit) { k=3; p=1; n=n+2; while ((k*k<=n) && (p)) { p=n/k*k!=n; k=k+2; } if (p) { print(n, " is prime\n"); count = count + 1; } } print("Total primes found: ", count, "\n"); </lang> |
3 is prime 5 is prime 7 is prime 11 is prime 13 is prime 17 is prime 19 is prime 23 is prime 29 is prime 31 is prime 37 is prime 41 is prime 43 is prime 47 is prime 53 is prime 59 is prime 61 is prime 67 is prime 71 is prime 73 is prime 79 is prime 83 is prime 89 is prime 97 is prime 101 is prime Total primes found: 26 |
- Additional examples
Your solution should pass all the test cases above and the additional tests found Here.
The C and Python versions can be considered reference implementations.
- Related Tasks
ALGOL W
<lang algolw>begin % AST interpreter %
% parse tree nodes % record node( integer type ; reference(node) left, right ; integer iValue % nString/nIndentifier number or nInteger value % ); integer nIdentifier, nString, nInteger, nSequence, nIf, nPrtc, nPrts , nPrti, nWhile, nAssign, nNegate, nNot, nMultiply , nDivide, nMod, nAdd, nSubtract, nLess, nLessEqual , nGreater, nGreaterEqual, nEqual, nNotEqual, nAnd, nOr ; string(14) array ndName ( 1 :: 25 ); integer MAX_NODE_TYPE; % string literals and identifiers - uses a linked list - a hash table might be better... % string(1) array text ( 0 :: 4095 ); integer textNext, TEXT_MAX; record textElement ( integer start, length; reference(textElement) next ); reference(textElement) idList, stList; % memory - identifiers hold indexes to locations here % integer array data ( 1 :: 4096 );
% returns a new node with left and right branches % reference(node) procedure opNode ( integer value opType; reference(node) value opLeft, opRight ) ; begin node( opType, opLeft, opRight, 0 ) end opNode ;
% returns a new operand node % reference(node) procedure operandNode ( integer value opType, opValue ) ; begin node( opType, null, null, opValue ) end operandNode ;
% reports an error and stops % procedure rtError( string(80) value message ); begin integer errorPos; write( s_w := 0, "**** Runtime error " ); errorPos := 0; while errorPos < 80 and message( errorPos // 1 ) not = "." do begin writeon( s_w := 0, message( errorPos // 1 ) ); errorPos := errorPos + 1 end while_not_at_end_of_message ; writeon( s_w := 0, "." ); assert( false ) end rtError ;
% reads a node from standard input % reference(node) procedure readNode ; begin reference(node) resultNode;
% parses a string from line and stores it in a string in the text array % % - if it is not already present in the specified textElement list. % % returns the position of the string in the text array % integer procedure readString ( reference(textElement) value result txList; string(1) value terminator ) ; begin string(256) str; integer sLen, sPos, ePos; logical found; reference(textElement) txPos, txLastPos; % get the text of the string % str := " "; sLen := 0; str( sLen // 1 ) := line( lPos // 1 ); sLen := sLen + 1; lPos := lPos + 1; while lPos <= 255 and line( lPos // 1 ) not = terminator do begin str( sLen // 1 ) := line( lPos // 1 ); sLen := sLen + 1; lPos := lPos + 1 end while_more_string ; if lPos > 255 then rtError( "Unterminated String in node file." ); % attempt to find the text in the list of strings/identifiers % txLastPos := txPos := txList; found := false; ePos := 0; while not found and txPos not = null do begin ePos := ePos + 1; found := ( length(txPos) = sLen ); sPos := 0; while found and sPos < sLen do begin found := str( sPos // 1 ) = text( start(txPos) + sPos ); sPos := sPos + 1 end while_not_found ; txLastPos := txPos; if not found then txPos := next(txPos) end while_string_not_found ; if not found then begin % the string/identifier is not in the list - add it % ePos := ePos + 1; if txList = null then txList := textElement( textNext, sLen, null ) else next(txLastPos) := textElement( textNext, sLen, null ); if textNext + sLen > TEXT_MAX then rtError( "Text space exhausted." ) else begin for cPos := 0 until sLen - 1 do begin text( textNext ) := str( cPos // 1 ); textNext := textNext + 1 end for_cPos end end if_not_found ; ePos end readString ;
% gets an integer from the line - no checks for valid digits % integer procedure readInteger ; begin integer n; n := 0; while line( lPos // 1 ) not = " " do begin n := ( n * 10 ) + ( decode( line( lPos // 1 ) ) - decode( "0" ) ); lPos := lPos + 1 end while_not_end_of_integer ; n end readInteger ;
string(256) line; string(16) name; integer lPos, tPos, ndType; tPos := lPos := 0; readcard( line ); % get the node type name % while line( lPos // 1 ) = " " do lPos := lPos + 1; name := ""; while lPos < 256 and line( lPos // 1 ) not = " " do begin name( tPos // 1 ) := line( lPos // 1 ); lPos := lPos + 1; tPos := tPos + 1 end while_more_name ; % determine the node type % ndType := 1; resultNode := null; if name not = ";" then begin % not a null node % while ndType <= MAX_NODE_TYPE and name not = ndName( ndType ) do ndType := ndType + 1; if ndType > MAX_NODE_TYPE then rtError( "Malformed node." ); % handle the additional parameter for identifier/string/integer, or sub-nodes for operator nodes % if ndType = nInteger or ndType = nIdentifier or ndType = nString then begin while line( lPos // 1 ) = " " do lPos := lPos + 1; if ndType = nInteger then resultNode := operandNode( ndType, readInteger ) else if ndType = nIdentifier then resultNode := operandNode( ndType, readString( idList, " " ) ) else % ndType = nString % resultNode := operandNode( ndType, readString( stList, """" ) ) end else begin % operator node % reference(node) leftNode; leftNode := readNode; resultNode := opNode( ndType, leftNode, readNode ) end end if_non_null_node ; resultNode end readNode ;
% interprets the specified node and returns the value % integer procedure eval ( reference(node) value n ) ; begin integer v;
% prints a string from text, escape sequences are interpreted % procedure writeOnText( reference(textElement) value txHead; integer value txNumber ) ; begin reference(textElement) txPos; integer count; txPos := txHead; count := 1; while count < txNumber and txPos not = null do begin txPos := next(txPos); count := count + 1 end while_text_element_not_found ; if txPos = null then rtError( "INTERNAL ERROR: text not found." ) else begin % found the text - output it, handling escape sequences % integer cPos; cPos := 1; % start from 1 to skip over the leading " % while cPos < length(txPos) do begin string(1) ch; ch := text( start(txPos) + cPos ); if ch not = "\" then writeon( s_w := 0, ch ) else begin % escaped character % cPos := cPos + 1; if cPos > length(txPos) then rtError( "String terminates with ""\""." ) else begin ch := text( start(txPos) + cPos ); if ch = "n" then % newline % write() else writeon( s_w := 0, ch ) end end; cPos := cPos + 1 end while_not_end_of_string end end writeOnText ;
% returns 1 if val is true, 0 otherwise % integer procedure booleanResult ( logical value val ) ; begin if val then 1 else 0 end booleanResult ;
v := 0;
if n = null then v := 0 else if type(n) = nIdentifier then v := data( iValue(n) ) else if type(n) = nString then v := iValue(n) else if type(n) = nInteger then v := iValue(n) else if type(n) = nSequence then begin % sequence - evaluate and discard the left branch and return the right branch % v := eval( left(n) ); v := eval( right(n) ) end else if type(n) = nIf then % if-else % begin if eval( left(n) ) not = 0 then v := eval( left(right(n)) ) else v := eval( right(right(n)) ); v := 0 end else if type(n) = nPrtc then % print character % writeon( s_w := 0, code( eval( left(n) ) ) ) else if type(n) = nPrts then % print string % writeOnText( stList, eval( left(n) ) ) else if type(n) = nPrti then % print integer % writeon( s_w := 0, i_w := 1, eval( left(n) ) ) else if type(n) = nWhile then % while-loop % begin while eval( left(n) ) not = 0 do v := eval( right(n) ); v := 0 end else if type(n) = nAssign then % assignment % data( iValue(left(n)) ) := eval( right(n) ) else if type(n) = nNegate then % unary - % v := - eval( left(n) ) else if type(n) = nNot then % unary not % v := booleanResult( eval( left(n) ) = 0 ) else if type(n) = nMultiply then % multiply % v := eval( left(n) ) * eval( right(n) ) else if type(n) = nDivide then % division % begin integer lv, rv; lv := eval( left(n) ); rv := eval( right(n) ); if rv = 0 then rtError( "Division by 0." ) else v := lv div rv end else if type(n) = nMod then % modulo % begin integer lv, rv; lv := eval( left(n) ); rv := eval( right(n) ); if rv = 0 then rtError( "Right operand of % is 0." ) else v := lv rem rv end else if type(n) = nAdd then % addition % v := eval( left(n) ) + eval( right(n) ) else if type(n) = nSubtract then % subtraction % v := eval( left(n) ) - eval( right(n) ) else if type(n) = nLess then % less-than % v := booleanResult( eval( left(n) ) < eval( right(n) ) ) else if type(n) = nLessEqual then % less or equal % v := booleanResult( eval( left(n) ) <= eval( right(n) ) ) else if type(n) = nGreater then % greater-than % v := booleanResult( eval( left(n) ) > eval( right(n) ) ) else if type(n) = nGreaterEqual then % greater or eq % v := booleanResult( eval( left(n) ) >= eval( right(n) ) ) else if type(n) = nEqual then % test equal % v := booleanResult( eval( left(n) ) = eval( right(n) ) ) else if type(n) = nNotEqual then % not-equal % v := booleanResult( eval( left(n) ) not = eval( right(n) ) ) else if type(n) = nAnd then % boolean "and" % begin v := eval( left(n) ); if v not = 0 then v := eval( right(n) ) end else if type(n) = nOr then % boolean "or" % begin v := eval( left(n) ); if v = 0 then v := eval( right(n) ); end else % unknown node % begin rtError( "Unknown node type in eval." ) end; v end eval ;
nIdentifier := 1; ndName( nIdentifier ) := "Identifier"; nString := 2; ndName( nString ) := "String"; nInteger := 3; ndName( nInteger ) := "Integer"; nSequence := 4; ndName( nSequence ) := "Sequence"; nIf := 5; ndName( nIf ) := "If"; nPrtc := 6; ndName( nPrtc ) := "Prtc"; nPrts := 7; ndName( nPrts ) := "Prts"; nPrti := 8; ndName( nPrti ) := "Prti"; nWhile := 9; ndName( nWhile ) := "While"; nAssign := 10; ndName( nAssign ) := "Assign"; nNegate := 11; ndName( nNegate ) := "Negate"; nNot := 12; ndName( nNot ) := "Not"; nMultiply := 13; ndName( nMultiply ) := "Multiply"; nDivide := 14; ndName( nDivide ) := "Divide"; nMod := 15; ndName( nMod ) := "Mod"; nAdd := 16; ndName( nAdd ) := "Add"; nSubtract := 17; ndName( nSubtract ) := "Subtract"; nLess := 18; ndName( nLess ) := "Less"; nLessEqual := 19; ndName( nLessEqual ) := "LessEqual" ; nGreater := 20; ndName( nGreater ) := "Greater"; nGreaterEqual := 21; ndName( nGreaterEqual ) := "GreaterEqual"; nEqual := 22; ndName( nEqual ) := "Equal"; nNotEqual := 23; ndName( nNotEqual ) := "NotEqual"; nAnd := 24; ndName( nAnd ) := "And"; nOr := 25; ndName( nOr ) := "Or"; MAX_NODE_TYPE := 25; TEXT_MAX := 4095; textNext := 0; stList := idList := null;
% parse the output from the syntax analyser and intetrpret parse tree % eval( readNode )
end.</lang>
- Output:
3 is prime 5 is prime 7 is prime 11 is prime ... 83 is prime 89 is prime 97 is prime 101 is prime Total primes found: 26
C
Tested with gcc 4.81 and later, compiles warning free with -Wall -Wextra <lang C>#include <stdlib.h>
- include <stdio.h>
- include <string.h>
- include <stdarg.h>
- include <ctype.h>
- define da_dim(name, type) type *name = NULL; \
int _qy_ ## name ## _p = 0; \ int _qy_ ## name ## _max = 0
- define da_rewind(name) _qy_ ## name ## _p = 0
- define da_redim(name) do {if (_qy_ ## name ## _p >= _qy_ ## name ## _max) \
name = realloc(name, (_qy_ ## name ## _max += 32) * sizeof(name[0]));} while (0)
- define da_append(name, x) do {da_redim(name); name[_qy_ ## name ## _p++] = x;} while (0)
- define da_len(name) _qy_ ## name ## _p
- define da_add(name) do {da_redim(name); _qy_ ## name ## _p++;} while (0)
typedef enum {
nd_Ident, nd_String, nd_Integer, nd_Sequence, nd_If, nd_Prtc, nd_Prts, nd_Prti, nd_While, nd_Assign, nd_Negate, nd_Not, nd_Mul, nd_Div, nd_Mod, nd_Add, nd_Sub, nd_Lss, nd_Leq, nd_Gtr, nd_Geq, nd_Eql, nd_Neq, nd_And, nd_Or
} NodeType;
typedef struct Tree Tree; struct Tree {
NodeType node_type; Tree *left; Tree *right; int value;
};
// dependency: Ordered by NodeType, must remain in same order as NodeType enum
struct {
char *enum_text; NodeType node_type;
} atr[] = {
{"Identifier" , nd_Ident, }, {"String" , nd_String, }, {"Integer" , nd_Integer,}, {"Sequence" , nd_Sequence,}, {"If" , nd_If, }, {"Prtc" , nd_Prtc, }, {"Prts" , nd_Prts, }, {"Prti" , nd_Prti, }, {"While" , nd_While, }, {"Assign" , nd_Assign, }, {"Negate" , nd_Negate, }, {"Not" , nd_Not, }, {"Multiply" , nd_Mul, }, {"Divide" , nd_Div, }, {"Mod" , nd_Mod, }, {"Add" , nd_Add, }, {"Subtract" , nd_Sub, }, {"Less" , nd_Lss, }, {"LessEqual" , nd_Leq, }, {"Greater" , nd_Gtr, }, {"GreaterEqual", nd_Geq, }, {"Equal" , nd_Eql, }, {"NotEqual" , nd_Neq, }, {"And" , nd_And, }, {"Or" , nd_Or, },
};
FILE *source_fp; da_dim(string_pool, const char *); da_dim(global_names, const char *); da_dim(global_values, int);
void error(const char *fmt, ... ) {
va_list ap; char buf[1000];
va_start(ap, fmt); vsprintf(buf, fmt, ap); printf("error: %s\n", buf); exit(1);
}
Tree *make_node(NodeType node_type, Tree *left, Tree *right) {
Tree *t = calloc(sizeof(Tree), 1); t->node_type = node_type; t->left = left; t->right = right; return t;
}
Tree *make_leaf(NodeType node_type, int value) {
Tree *t = calloc(sizeof(Tree), 1); t->node_type = node_type; t->value = value; return t;
}
int interp(Tree *x) { /* interpret the parse tree */
if (!x) return 0; switch(x->node_type) { case nd_Integer: return x->value; case nd_Ident: return global_values[x->value]; case nd_String: return x->value;
case nd_Assign: return global_values[x->left->value] = interp(x->right); case nd_Add: return interp(x->left) + interp(x->right); case nd_Sub: return interp(x->left) - interp(x->right); case nd_Mul: return interp(x->left) * interp(x->right); case nd_Div: return interp(x->left) / interp(x->right); case nd_Mod: return interp(x->left) % interp(x->right); case nd_Lss: return interp(x->left) < interp(x->right); case nd_Gtr: return interp(x->left) > interp(x->right); case nd_Leq: return interp(x->left) <= interp(x->right); case nd_Eql: return interp(x->left) == interp(x->right); case nd_Neq: return interp(x->left) != interp(x->right); case nd_And: return interp(x->left) && interp(x->right); case nd_Negate: return -interp(x->left); case nd_Not: return !interp(x->left);
case nd_If: if (interp(x->left)) interp(x->right->left); else interp(x->right->right); return 0;
case nd_While: while (interp(x->left)) interp(x->right); return 0;
case nd_Prtc: printf("%c", interp(x->left)); return 0; case nd_Prti: printf("%d", interp(x->left)); return 0; case nd_Prts: printf("%s", string_pool[interp(x->left)]); return 0;
case nd_Sequence: interp(x->left); interp(x->right); return 0;
default: error("interp: unknown tree type %d\n", x->node_type); } return 0;
}
void init_in(const char fn[]) {
if (fn[0] == '\0') source_fp = stdin; else { source_fp = fopen(fn, "r"); if (source_fp == NULL) error("Can't open %s\n", fn); }
}
NodeType get_enum_value(const char name[]) {
for (size_t i = 0; i < sizeof(atr) / sizeof(atr[0]); i++) { if (strcmp(atr[i].enum_text, name) == 0) { return atr[i].node_type; } } error("Unknown token %s\n", name); return -1;
}
char *read_line(int *len) {
static char *text = NULL; static int textmax = 0;
for (*len = 0; ; (*len)++) { int ch = fgetc(source_fp); if (ch == EOF || ch == '\n') { if (*len == 0) return NULL; break; } if (*len + 1 >= textmax) { textmax = (textmax == 0 ? 128 : textmax * 2); text = realloc(text, textmax); } text[*len] = ch; } text[*len] = '\0'; return text;
}
char *rtrim(char *text, int *len) { // remove trailing spaces
for (; *len > 0 && isspace(text[*len - 1]); --(*len)) ;
text[*len] = '\0'; return text;
}
int fetch_string_offset(char *st) {
int len = strlen(st); st[len - 1] = '\0'; ++st; char *p, *q; p = q = st;
while ((*p++ = *q++) != '\0') { if (q[-1] == '\\') { if (q[0] == 'n') { p[-1] = '\n'; ++q; } else if (q[0] == '\\') { ++q; } } }
for (int i = 0; i < da_len(string_pool); ++i) { if (strcmp(st, string_pool[i]) == 0) { return i; } } da_add(string_pool); int n = da_len(string_pool) - 1; string_pool[n] = strdup(st); return da_len(string_pool) - 1;
}
int fetch_var_offset(const char *name) {
for (int i = 0; i < da_len(global_names); ++i) { if (strcmp(name, global_names[i]) == 0) return i; } da_add(global_names); int n = da_len(global_names) - 1; global_names[n] = strdup(name); da_append(global_values, 0); return n;
}
Tree *load_ast() {
int len; char *yytext = read_line(&len); yytext = rtrim(yytext, &len);
// get first token char *tok = strtok(yytext, " ");
if (tok[0] == ';') { return NULL; } NodeType node_type = get_enum_value(tok);
// if there is extra data, get it char *p = tok + strlen(tok); if (p != &yytext[len]) { int n; for (++p; isspace(*p); ++p) ; switch (node_type) { case nd_Ident: n = fetch_var_offset(p); break; case nd_Integer: n = strtol(p, NULL, 0); break; case nd_String: n = fetch_string_offset(p); break; default: error("Unknown node type: %s\n", p); } return make_leaf(node_type, n); }
Tree *left = load_ast(); Tree *right = load_ast(); return make_node(node_type, left, right);
}
int main(int argc, char *argv[]) {
init_in(argc > 1 ? argv[1] : "");
Tree *x = load_ast(); interp(x);
return 0;
}</lang>
- Output — prime numbers output from AST interpreter:
lex prime.t | parse | interp 3 is prime 5 is prime 7 is prime 11 is prime 13 is prime 17 is prime 19 is prime 23 is prime 29 is prime 31 is prime 37 is prime 41 is prime 43 is prime 47 is prime 53 is prime 59 is prime 61 is prime 67 is prime 71 is prime 73 is prime 79 is prime 83 is prime 89 is prime 97 is prime 101 is prime Total primes found: 26
Phix
Reusing parse.e from the Syntax Analyzer task <lang Phix>-- -- demo\rosetta\Compiler\interp.exw -- ================================
include parse.e
sequence vars = {},
vals = {}
function var_idx(sequence inode)
if inode[1]!=tk_Identifier then ?9/0 end if string ident = inode[2] integer n = find(ident,vars) if n=0 then vars = append(vars,ident) vals = append(vals,0) n = length(vars) end if return n
end function
function interp(object t)
if t!=NULL then integer ntype = t[1] object t2 = t[2], t3 = iff(length(t)=3?t[3]:0) switch ntype do case tk_Sequence: {} = interp(t2) {} = interp(t3) case tk_assign: vals[var_idx(t2)] = interp(t3) case tk_Identifier: return vals[var_idx(t)] case tk_Integer: return t2 case tk_String: return t2 case tk_lt: return interp(t2) < interp(t3) case tk_add: return interp(t2) + interp(t3) case tk_sub: return interp(t2) - interp(t3) case tk_while: while interp(t2) do {} = interp(t3) end while case tk_Prints: puts(1,interp(t2)) case tk_Printi: printf(1,"%d",interp(t2)) case tk_putc: printf(1,"%c",interp(t2)) case tk_and: return interp(t2) and interp(t3) case tk_or: return interp(t2) or interp(t3) case tk_le: return interp(t2) <= interp(t3) case tk_ge: return interp(t2) >= interp(t3) case tk_ne: return interp(t2) != interp(t3) case tk_gt: return interp(t2) > interp(t3) case tk_mul: return interp(t2) * interp(t3) case tk_div: return trunc(interp(t2)/interp(t3)) case tk_mod: return remainder(interp(t2),interp(t3)) case tk_if: {} = interp(t3[iff(interp(t2)?2:3)]) case tk_not: return not interp(t2) case tk_neg: return - interp(t2) else error("unknown node type") end switch end if return NULL
end function
procedure main(sequence cl)
open_files(cl) toks = lex() object t = parse() {} = interp(t) close_files()
end procedure
--main(command_line()) main({0,0,"primes.c"})</lang>
- Output:
3 is prime 5 is prime 7 is prime 11 is prime 13 is prime 17 is prime 19 is prime 23 is prime 29 is prime 31 is prime 37 is prime 41 is prime 43 is prime 47 is prime 53 is prime 59 is prime 61 is prime 67 is prime 71 is prime 73 is prime 79 is prime 83 is prime 89 is prime 97 is prime 101 is prime Total primes found: 26
Python
Tested with Python 2.7 and 3.x <lang Python>from __future__ import print_function import sys, shlex, operator
nd_Ident, nd_String, nd_Integer, nd_Sequence, nd_If, nd_Prtc, nd_Prts, nd_Prti, nd_While, \ nd_Assign, nd_Negate, nd_Not, nd_Mul, nd_Div, nd_Mod, nd_Add, nd_Sub, nd_Lss, nd_Leq, \ nd_Gtr, nd_Geq, nd_Eql, nd_Neq, nd_And, nd_Or = range(25)
all_syms = {
"Identifier" : nd_Ident, "String" : nd_String, "Integer" : nd_Integer, "Sequence" : nd_Sequence, "If" : nd_If, "Prtc" : nd_Prtc, "Prts" : nd_Prts, "Prti" : nd_Prti, "While" : nd_While, "Assign" : nd_Assign, "Negate" : nd_Negate, "Not" : nd_Not, "Multiply" : nd_Mul, "Divide" : nd_Div, "Mod" : nd_Mod, "Add" : nd_Add, "Subtract" : nd_Sub, "Less" : nd_Lss, "LessEqual" : nd_Leq, "Greater" : nd_Gtr, "GreaterEqual": nd_Geq, "Equal" : nd_Eql, "NotEqual" : nd_Neq, "And" : nd_And, "Or" : nd_Or}
input_file = None globals = {}
- show error and exit
def error(msg):
print("%s" % (msg)) exit(1)
class Node:
def __init__(self, node_type, left = None, right = None, value = None): self.node_type = node_type self.left = left self.right = right self.value = value
def make_node(oper, left, right = None):
return Node(oper, left, right)
def make_leaf(oper, n):
return Node(oper, value = n)
def fetch_var(var_name):
n = globals.get(var_name, None) if n == None: globals[var_name] = n = 0 return n
def interp(x):
global globals
if x == None: return None elif x.node_type == nd_Integer: return int(x.value) elif x.node_type == nd_Ident: return fetch_var(x.value) elif x.node_type == nd_String: return x.value
elif x.node_type == nd_Assign: globals[x.left.value] = interp(x.right) return None elif x.node_type == nd_Add: return interp(x.left) + interp(x.right) elif x.node_type == nd_Sub: return interp(x.left) - interp(x.right) elif x.node_type == nd_Mul: return interp(x.left) * interp(x.right) # use C like division semantics # another way: abs(x) / abs(y) * cmp(x, 0) * cmp(y, 0) elif x.node_type == nd_Div: return int(float(interp(x.left)) / interp(x.right)) elif x.node_type == nd_Mod: return int(float(interp(x.left)) % interp(x.right)) elif x.node_type == nd_Lss: return interp(x.left) < interp(x.right) elif x.node_type == nd_Gtr: return interp(x.left) > interp(x.right) elif x.node_type == nd_Leq: return interp(x.left) <= interp(x.right) elif x.node_type == nd_Geq: return interp(x.left) >= interp(x.right) elif x.node_type == nd_Eql: return interp(x.left) == interp(x.right) elif x.node_type == nd_Neq: return interp(x.left) != interp(x.right) elif x.node_type == nd_And: return interp(x.left) and interp(x.right) elif x.node_type == nd_Or: return interp(x.left) or interp(x.right) elif x.node_type == nd_Negate: return -interp(x.left) elif x.node_type == nd_Not: return not interp(x.left)
elif x.node_type == nd_If: if (interp(x.left)): interp(x.right.left) else: interp(x.right.right) return None
elif x.node_type == nd_While: while (interp(x.left)): interp(x.right) return None
elif x.node_type == nd_Prtc: print("%c" % (interp(x.left)), end=) return None
elif x.node_type == nd_Prti: print("%d" % (interp(x.left)), end=) return None
elif x.node_type == nd_Prts: print(interp(x.left), end=) return None
elif x.node_type == nd_Sequence: interp(x.left) interp(x.right) return None else: error("error in code generator - found %d, expecting operator" % (x.node_type))
def str_trans(srce):
dest = "" i = 0 srce = srce[1:-1] while i < len(srce): if srce[i] == '\\' and i + 1 < len(srce): if srce[i + 1] == 'n': dest += '\n' i += 2 elif srce[i + 1] == '\\': dest += '\\' i += 2 else: dest += srce[i] i += 1
return dest
def load_ast():
line = input_file.readline() line_list = shlex.split(line, False, False)
text = line_list[0]
value = None if len(line_list) > 1: value = line_list[1] if value.isdigit(): value = int(value)
if text == ";": return None node_type = all_syms[text] if value != None: if node_type == nd_String: value = str_trans(value)
return make_leaf(node_type, value) left = load_ast() right = load_ast() return make_node(node_type, left, right)
- main driver
input_file = sys.stdin if len(sys.argv) > 1:
try: input_file = open(sys.argv[1], "r", 4096) except IOError as e: error(0, 0, "Can't open %s" % sys.argv[1])
n = load_ast() interp(n)</lang>
- Output — prime numbers output from AST interpreter:
lex prime.t | parse | interp 3 is prime 5 is prime 7 is prime 11 is prime 13 is prime 17 is prime 19 is prime 23 is prime 29 is prime 31 is prime 37 is prime 41 is prime 43 is prime 47 is prime 53 is prime 59 is prime 61 is prime 67 is prime 71 is prime 73 is prime 79 is prime 83 is prime 89 is prime 97 is prime 101 is prime Total primes found: 26
Scheme
<lang scheme> (import (scheme base)
(scheme file) (scheme process-context) (scheme write) (only (srfi 13) string-delete string-index string-trim))
- Mappings from operation symbols to internal procedures.
- We define operations appropriate to virtual machine
- e.g. division must return an int, not a rational
- boolean values are treated as numbers
- 0 is false, other is true
(define *unary-ops*
(list (cons 'Negate (lambda (a) (- a))) (cons 'Not (lambda (a) (if (zero? a) 1 0)))))
(define *binary-ops*
(let ((number-comp (lambda (op) (lambda (a b) (if (op a b) 1 0))))) (list (cons 'Add +) (cons 'Subtract -) (cons 'Multiply *) (cons 'Divide (lambda (a b) (truncate (/ a b)))) ; int division (cons 'Mod modulo) (cons 'Less (number-comp <)) (cons 'Greater (number-comp >)) (cons 'LessEqual (number-comp <=)) (cons 'GreaterEqual (number-comp >=)) (cons 'Equal (lambda (a b) (if (= a b) 1 0))) (cons 'NotEqual (lambda (a b) (if (= a b) 0 1))) (cons 'And (lambda (a b) ; make "and" work on numbers (if (and (not (zero? a)) (not (zero? b))) 1 0))) (cons 'Or (lambda (a b) ; make "or" work on numbers (if (or (not (zero? a)) (not (zero? b))) 1 0))))))
- Read AST from given filename
- - return as an s-expression
(define (read-code filename)
(define (read-expr) (let ((line (string-trim (read-line)))) (if (string=? line ";") '() (let ((space (string-index line #\space))) (if space (list (string->symbol (string-trim (substring line 0 space))) (string-trim (substring line space (string-length line)))) (list (string->symbol line) (read-expr) (read-expr))))))) ; (with-input-from-file filename (lambda () (read-expr))))
- interpret AST provided as an s-expression
(define run-program
(let ((env '())) ; env is an association list for variable names (lambda (expr) (define (tidy-string str) (string-delete ; remove any quote marks #\" ; " (to appease Rosetta code's syntax highlighter) (list->string (let loop ((chars (string->list str))) ; replace newlines, obeying \\n (cond ((< (length chars) 2) ; finished list chars) ((and (>= (length chars) 3) ; preserve \\n (char=? #\\ (car chars)) (char=? #\\ (cadr chars)) (char=? #\n (cadr (cdr chars)))) (cons (car chars) (cons (cadr chars) (cons (cadr (cdr chars)) (loop (cdr (cdr (cdr chars)))))))) ((and (char=? #\\ (car chars)) ; replace \n with newline (char=? #\n (cadr chars))) (cons #\newline (loop (cdr (cdr chars))))) (else ; keep char and look further (cons (car chars) (loop (cdr chars))))))))) ; define some more meaningful names for fields (define left cadr) (define right (lambda (x) (cadr (cdr x)))) ; (if (null? expr) '() (case (car expr) ; interpret AST from the head node ((Integer) (string->number (left expr))) ((Identifier) (let ((val (assq (string->symbol (left expr)) env))) (if val (cdr val) (error "Variable not in environment")))) ((String) (left expr)) ((Assign) (set! env (cons (cons (string->symbol (left (left expr))) (run-program (right expr))) env))) ((Add Subtract Multiply Divide Mod Less Greater LessEqual GreaterEqual Equal NotEqual And Or) (let ((binop (assq (car expr) *binary-ops*))) (if binop ((cdr binop) (run-program (left expr)) (run-program (right expr))) (error "Could not find binary operator")))) ((Negate Not) (let ((unaryop (assq (car expr) *unary-ops*))) (if unaryop ((cdr unaryop) (run-program (left expr))) (error "Could not find unary operator")))) ((If) (if (not (zero? (run-program (left expr)))) ; 0 means false (run-program (left (right expr))) (run-program (right (right expr)))) '()) ((While) (let loop () (unless (zero? (run-program (left expr))) (run-program (right expr)) (loop))) '()) ((Prtc) (display (integer->char (run-program (left expr)))) '()) ((Prti) (display (run-program (left expr))) '()) ((Prts) (display (tidy-string (run-program (left expr)))) '()) ((Sequence) (run-program (left expr)) (run-program (right expr)) '()) (else (error "Unknown node type")))))))
- read AST from file and interpret, from filename passed on command line
(if (= 2 (length (command-line)))
(run-program (read-code (cadr (command-line)))) (display "Error: pass an ast filename\n"))
</lang>
- Output:
Output for primes program from above. Also tested on programs in Compiler/Sample programs.
3 is prime 5 is prime 7 is prime 11 is prime 13 is prime 17 is prime 19 is prime 23 is prime 29 is prime 31 is prime 37 is prime 41 is prime 43 is prime 47 is prime 53 is prime 59 is prime 61 is prime 67 is prime 71 is prime 73 is prime 79 is prime 83 is prime 89 is prime 97 is prime 101 is prime Total primes found: 26
zkl
<lang zkl>const{ var _n=-1; var[proxy]N=fcn{ _n+=1 }; } // enumerator const FETCH=N, STORE=N, PUSH=N, ADD=N, SUB=N, MUL=N, DIV=N, MOD=N,
LT=N, GT=N, LE=N, GE=N, EQ=N, NE=N, AND=N, OR=N, NEG=N, NOT=N, JMP=N, JZ=N, PRTC=N, PRTS=N, PRTI=N, HALT=N;
const nd_String=N, nd_Sequence=N, nd_If=N, nd_While=N; var [const]
all_syms=Dictionary( "Identifier" ,FETCH, "String" ,nd_String, "Integer" ,PUSH, "Sequence" ,nd_Sequence, "If" ,nd_If, "Prtc" ,PRTC, "Prts" ,PRTS, "Prti" ,PRTI, "While" ,nd_While, "Assign" ,STORE, "Negate" ,NEG, "Not" ,NOT, "Multiply" ,MUL, "Divide" ,DIV, "Mod" ,MOD, "Add" ,ADD, "Subtract" ,SUB, "Less" ,LT, "LessEqual" ,LE, "Greater" ,GT, "GreaterEqual",GE, "Equal" ,EQ, "NotEqual" ,NE, "And" ,AND, "Or" ,OR, "halt" ,HALT), bops=Dictionary(ADD,'+, SUB,'-, MUL,'*, DIV,'/, MOD,'%,
LT,'<, GT,'>, LE,'<=, GE,'>=, NE,'!=, EQ,'==, NE,'!=);
class Node{
fcn init(_node_type, _value, _left=Void, _right=Void){ var type=_node_type, left=_left, right=_right, value=_value; }
}
fcn runNode(node){
var vars=Dictionary(); // fcn local static var if(Void==node) return(); switch(node.type){ case(PUSH,nd_String){ return(node.value) } case(FETCH){ return(vars[node.value]) } case(STORE){ vars[node.left.value]=runNode(node.right); return(Void); } case(nd_If){ if(runNode(node.left)) runNode(node.right.left);
else runNode(node.right.right);
} case(nd_While) { while(runNode(node.left)){ runNode(node.right) } return(Void) } case(nd_Sequence){ runNode(node.left); runNode(node.right); return(Void) } case(PRTC) { print(runNode(node.left).toAsc()) } case(PRTI,PRTS) { print(runNode(node.left)) } case(NEG) { return(-runNode(node.left)) } case(NOT) { return(not runNode(node.left)) } case(AND) { return(runNode(node.left) and runNode(node.right)) } case(OR) { return(runNode(node.left) or runNode(node.right)) } else{
if(op:=bops.find(node.type)) return(op(runNode(node.left),runNode(node.right))); else throw(Exception.AssertionError( "Unknown node type: %d".fmt(node.type)))
} } Void
}</lang> <lang zkl>fcn load_ast(file){
line:=file.readln().strip(); // one or two tokens if(line[0]==";") return(Void); parts,type,value := line.split(),parts[0],parts[1,*].concat(" "); type=all_syms[type]; if(value){ try{ value=value.toInt() }catch{} if(type==nd_String) value=value[1,-1].replace("\\n","\n"); return(Node(type,value)); } left,right := load_ast(file),load_ast(file); Node(type,Void,left,right)
}</lang> <lang zkl>ast:=load_ast(File(vm.nthArg(0))); runNode(ast);</lang>
- Output:
$ zkl runAST.zkl primeAST.txt 3 is prime 5 is prime 7 is prime 11 is prime ... 89 is prime 97 is prime 101 is prime Total primes found: 26