Arithmetic evaluation
You are encouraged to solve this task according to the task description, using any language you may know.
Create a program which parses and evaluates arithmetic expressions. Requirements: an abstract-syntax tree (AST) for the expression must be created from parsing the input. The AST must be used in evaluation, also, so the input may not be directly evaluated (e.g. by calling eval or a similar language feature.) The expression will be a string or list of symbols like "(1+3)*7". The four symbols + - * / must be supported as binary relations with conventional precedence rules. Precedence-control parentheses must also be supported.
For those who don't remember, mathematical precedence is as follows:
- Parentheses
- Multiplication/Division (left to right)
- Addition/Subtraction (left to right)
Ada
The following implementation uses table-driven parsers provided by Simple components for Ada. A parser is controlled by the tables of prefix, infix and postfix operations. Between the operations it calls Get_Operand in order to recognize expression terms. The parser communicates with its back end using the primitive operations Call and Enclose. The former is used for operations, the latter is for brackets. For this example we generate the parsing tree from there. There are many other operations which are used for advanced parsing and optimization, here they are defined as trivially returning True or False.
A parsing tree node has one operation Evaluate in order to calculate the expression. The nodes are allocated in an arena implemented by a storage pool. The pools is organized as a stack, so that the whole tree is popped when no more needed. This is a standard technique in compiler construction.
The implementation provides an advanced error handling and skipping blanks and Ada comments (these are taken from the library). <lang ada> with Ada.Unchecked_Deallocation; with Parsers.String_Source; use Parsers.String_Source; with Parsers.Generic_Lexer.Ada_Blanks; with Parsers.Generic_Token.Segmented_Lexer; with Stack_Storage; with Tables.Names;
package Parsers.Simple is
type Operations is (Add, Sub, Mul, Div, Left_Bracket, Right_Bracket); type Priorities is mod 3; -- The levels of association
function "and" (Left, Right : Operations) return Boolean; function Is_Commutative (Left, Right : Operations) return Boolean; function Is_Inverse (Operation : Operations) return Boolean; function Group_Inverse (Operation : Operations) return Operations;
Tree_Pool : Stack_Storage.Pool (2048, 128); -- Arena for the tree -- Tree nodes type Node is abstract tagged limited null record; function Evaluate (Item : Node) return Integer is abstract; type Node_Ptr is access Node'Class; for Node_Ptr'Storage_Pool use Tree_Pool; procedure Free is new Standard.Ada.Unchecked_Deallocation (Node'Class, Node_Ptr); -- Stub of the arena type Mark is new Node with null record; overriding function Evaluate (Item : Mark) return Integer; -- Terminal nodes type Literal is new Node with record Location : Parsers.String_Source.Location; Value : Integer; end record; overriding function Evaluate (Item : Literal) return Integer; -- Non-terminal nodes type Argument_List is array (Positive range <>) of Node_Ptr; type Expression (Count : Positive) is new Node with record Operation : Operations; Location : Parsers.String_Source.Location; Operands : Argument_List (1..Count); end record; overriding function Evaluate (Item : Expression) return Integer;
package Tokens is -- The lexical tokens new Parsers.Generic_Token ( Operation_Type => Operations, Argument_Type => Node_Ptr, Priority_Type => Priorities, Sources => Code ); use Tokens;
procedure Check_Spelling (Name : String); function Check_Matched (Source : String; Pointer : Integer) return Boolean; package Token_Tables is new Tokens.Vocabulary.Names; -- The tables of prefix, infix and postfix operations Prefixes : aliased Token_Tables.Dictionary; Infixes : aliased Token_Tables.Dictionary; Postfixes : aliased Token_Tables.Dictionary;
package Lexers is new Tokens.Segmented_Lexer; -- Table driven lexers package Blank_Skipping_Lexers is -- Lexers that skip Ada blanks new Lexers.Token_Lexer.Implementation.Ada_Blanks (Lexers.Lexer);
type Simple_Expression is -- The lexer that uses our tables new Blank_Skipping_Lexers.Lexer ( Prefixes => Prefixes'Access, Infixes => Infixes'Access, Postfixes => Postfixes'Access ) with null record; overriding -- Evaluates an operator function Call ( Context : access Simple_Expression; Operation : Tokens.Operation_Token; List : Tokens.Arguments.Frame ) return Tokens.Argument_Token; overriding -- Evaluates an expression in brackets function Enclose ( Context : access Simple_Expression; Left : Tokens.Operation_Token; Right : Tokens.Operation_Token; List : Tokens.Arguments.Frame ) return Tokens.Argument_Token; overriding -- Recognizes an operand (float number) procedure Get_Operand ( Context : in out Simple_Expression; Code : in out Source; Argument : out Tokens.Argument_Token; Got_It : out Boolean );
end Parsers.Simple; </lang> Here is the implementation of the package. <lang ada> with Ada.Characters.Handling; use Ada.Characters.Handling; with Ada.Exceptions; use Ada.Exceptions; with Ada.IO_Exceptions; use Ada.IO_Exceptions; with Strings_Edit.Integers; use Strings_Edit.Integers;
package body Parsers.Simple is
function "and" (Left, Right : Operations) return Boolean is begin return True; end "and";
function Is_Commutative (Left, Right : Operations) return Boolean is begin return False; end Is_Commutative;
function Is_Inverse (Operation : Operations) return Boolean is begin return False; end Is_Inverse;
function Group_Inverse (Operation : Operations) return Operations is begin return Mul; end Group_Inverse;
procedure Check_Spelling (Name : String) is begin null; end Check_Spelling;
function Check_Matched (Source : String; Pointer : Integer) return Boolean is begin return ( not Is_Alphanumeric (Source (Pointer)) or else not Is_Alphanumeric (Source (Pointer - 1)) ); end Check_Matched;
function Call ( Context : access Simple_Expression; Operation : Tokens.Operation_Token; List : Tokens.Arguments.Frame ) return Tokens.Argument_Token is Result : Node_Ptr := new Expression (List'Length); begin declare This : Expression renames Expression (Result.all); begin This.Operation := Operation.Operation; This.Location := Operation.Location; for Argument in List'Range loop This.Operands (Integer (Argument)) := List (Argument).Value; end loop; end; return (Result, Operation.Location & Link (List)); end Call;
function Enclose ( Context : access Simple_Expression; Left : Tokens.Operation_Token; Right : Tokens.Operation_Token; List : Tokens.Arguments.Frame ) return Tokens.Argument_Token is Result : Node_Ptr := new Expression (List'Length); begin declare This : Expression renames Expression (Result.all); begin This.Operation := Left.Operation; This.Location := Left.Location & Right.Location; for Argument in List'Range loop This.Operands (Integer (Argument)) := List (Argument).Value; end loop; end; return (Result, Left.Location & Right.Location & Link (List)); end Enclose;
procedure Get_Operand ( Context : in out Simple_Expression; Code : in out Source; Argument : out Tokens.Argument_Token; Got_It : out Boolean ) is Line : String renames Get_Line (Code); Pointer : Integer := Get_Pointer (Code); Value : Integer; begin if Is_Decimal_Digit (Line (Pointer)) then Get (Line, Pointer, Value); Set_Pointer (Code, Pointer); Argument.Location := Link (Code); Argument.Value := new Literal; declare Result : Literal renames Literal (Argument.Value.all); begin Result.Value := Value; Result.Location := Argument.Location; end; Got_It := True; else Got_It := False; end if; exception when Constraint_Error => Raise_Exception ( Parsers.Syntax_Error'Identity, "Too large number at " & Image (Link (Code)) ); when Data_Error => Raise_Exception ( Parsers.Syntax_Error'Identity, "Malformed number at " & Image (Link (Code)) ); when End_Error => Got_It := False; end Get_Operand;
function Evaluate (Item : Mark) return Integer is begin return 0; end Evaluate;
function Evaluate (Item : Literal) return Integer is begin return Item.Value; end Evaluate;
function Evaluate (Item : Expression) return Integer is Argument : array (Item.Operands'Range) of Integer; begin for I in Argument'Range loop Argument (I) := Item.Operands (I).Evaluate; end loop; case Item.Operation is when Add => return Argument (1) + Argument (2); when Sub => return Argument (1) - Argument (2); when Mul => return Argument (1) * Argument (2); when Div => return Argument (1) / Argument (2); when others => return Argument (1); end case; exception when Constraint_Error => Raise_Exception ( Parsers.Syntax_Error'Identity, "Numeric error at " & Image (Item.Location) ); end Evaluate;
use type Tokens.Descriptors.Descriptor_Class; use Lexers.Lexical_Descriptors.Operation; use Lexers.Lexical_Arguments;
begin
Add_Operator (Infixes, "+", Add, 1, 1); Add_Operator (Infixes, "-", Sub, 1, 1); Add_Operator (Infixes, "*", Mul, 2, 2); Add_Operator (Infixes, "/", Div, 2, 2);
Add_Bracket (Prefixes, "(", Left_Bracket); Add_Bracket (Postfixes, ")", Right_Bracket);
end Parsers.Simple; </lang> The next is a little test. It reads a line from the keyboard and then evaluates it. The program stops when the input is empty: <lang ada> with Ada.Exceptions; use Ada.Exceptions; with Ada.Text_IO; use Ada.Text_IO; with Parsers.Simple; use Parsers.Simple; with Parsers.String_Source; use Parsers.String_Source; with Strings_Edit.Integers; use Strings_Edit.Integers; with Parsers.Generic_Source.Text_IO;
procedure Test_Simple_Parser is
use Lexers, Tokens;
package Text_IO is new Code.Text_IO; use Text_IO;
Parser : Simple_Expression; Result : Argument_Token; Stub : Node_Ptr;
begin
loop Put ("Expression:"); declare Line : aliased String := Get_Line; Code : Source (Line'Access); begin exit when Line'Length = 0; Stub := new Mark; -- Mark the tree stack begin Parse (Parser, Code, Result); Put_Line ( Image (Result.Location) & " = " & Image (Result.Value.Evaluate) ); exception when Error : Parsers.Syntax_Error => Put_Line ("Error : " & Exception_Message (Error)); end; Free (Stub); -- Release the stack end; end loop;
end Test_Simple_Parser; </lang> Sample exchange. When the expression is evaluated its range in the source string is indicated. Upon errors, the location of is shown as well:
Expression:(3 * 50) - (100 / 10) 1..21 = 140 Expression:1+ Error : Operand expected at 3 Expression:39999999999*9999999999+23 Error : Too large number at 1 Expression:5/0 Error : Numeric error at 2..2 Expression:
ALGOL 68
INT base=10; MODE FIXED = LONG REAL; # numbers in the format 9,999.999 # #IF build abstract syntax tree and then EVAL tree # MODE AST = UNION(NODE, FIXED); MODE NUM = REF AST; MODE NODE = STRUCT(NUM a, PROC (FIXED,FIXED)FIXED op, NUM b); OP EVAL = (NUM ast)FIXED:( CASE ast IN (FIXED num): num, (NODE fork): (op OF fork)(EVAL( a OF fork), EVAL (b OF fork)) ESAC ); OP + = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a+b, b) ); OP - = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a-b, b) ); OP * = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a*b, b) ); OP / = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a/b, b) ); OP **= (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a**b, b) ); #ELSE simply use REAL arithmetic with no abstract syntax tree at all # CO MODE NUM = FIXED, AST = FIXED; OP EVAL = (FIXED num)FIXED: num; #FI# END CO MODE LEX = PROC (TOK)NUM; MODE MONADIC =PROC (NUM)NUM; MODE DIADIC = PROC (NUM,NUM)NUM; MODE TOK = CHAR; MODE ACTION = UNION(STACKACTION, LEX, MONADIC, DIADIC); MODE OPVAL = STRUCT(INT prio, ACTION action); MODE OPITEM = STRUCT(TOK token, OPVAL opval); [256]STACKITEM stack; MODE STACKITEM = STRUCT(NUM value, OPVAL op); MODE STACKACTION = PROC (REF STACKITEM)VOID; PROC begin = (REF STACKITEM top)VOID: prio OF op OF top -:= +10; PROC end = (REF STACKITEM top)VOID: prio OF op OF top -:= -10; OP ** = (COMPL a,b)COMPL: complex exp(complex ln(a)*b); [8]OPITEM op list :=( # OP PRIO ACTION # ("^", (8, (NUM a,b)NUM: a**b)), ("*", (7, (NUM a,b)NUM: a*b)), ("/", (7, (NUM a,b)NUM: a/b)), ("+", (6, (NUM a,b)NUM: a+b)), ("-", (6, (NUM a,b)NUM: a-b)), ("(",(+10, begin)), (")",(-10, end)), ("?", (9, LEX:SKIP)) ); PROC op dict = (TOK op)REF OPVAL:( # This can be unrolled to increase performance # REF OPITEM candidate; FOR i TO UPB op list WHILE candidate := op list[i]; # WHILE # op /= token OF candidate DO SKIP OD; opval OF candidate ); PROC build ast = (STRING expr)NUM:( INT top:=0; PROC compress ast stack = (INT prio, NUM in value)NUM:( NUM out value := in value; FOR loc FROM top BY -1 TO 1 WHILE REF STACKITEM stack top := stack[loc]; # WHILE # ( top >= LWB stack | prio <= prio OF op OF stack top | FALSE ) DO top := loc - 1; out value := CASE action OF op OF stack top IN (MONADIC op): op(value OF stack top), # not implemented # (DIADIC op): op(value OF stack top,out value) ESAC OD; out value ); NUM value := NIL; FIXED num value; INT decimal places; FOR i TO UPB expr DO TOK token = expr[i]; REF OPVAL this op := op dict(token); CASE action OF this op IN (STACKACTION action):( IF prio OF thisop = -10 THEN value := compress ast stack(0, value) FI; IF top >= LWB stack THEN action(stack[top]) FI ), (LEX):( # a crude lexer # SHORT INT digit = ABS token - ABS "0"; IF 0<= digit AND digit < base THEN IF NUM(value) IS NIL THEN # first digit # decimal places := 0; value := HEAP AST := num value := digit ELSE NUM(value) := num value := IF decimal places = 0 THEN num value * base + digit ELSE decimal places *:= base; num value + digit / decimal places FI FI ELIF token = "." THEN decimal places := 1 ELSE SKIP # and ignore spaces and any unrecognised characters # FI ), (MONADIC): SKIP, # not implemented # (DIADIC):( value := compress ast stack(prio OF this op, value); IF top=UPB stack THEN index error FI; stack[top+:=1]:=STACKITEM(value, this op); value:=NIL ) ESAC OD; compress ast stack(-max int, value) ); # TEST # printf(($" euler's number is about: "g(-long real width,long real width-2)l$, EVAL build ast("1+1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+1/15)/14)/13)/12)/11)/10)/9)/8)/7)/6)/5)/4)/3)/2"))); SKIP EXIT index error: printf(("Stack over flow"))
Output:
euler's number is about: 2.71828182845899446428546958
C
Compiler: gcc (version 4.3.2 20081105 (Red Hat 4.3.2-7))
This is a LL(1) recursive descent parser. Only performs integer division. There is a function for every non-terminal in the grammar, save add_op and mult_op, which were lumped into term_tail and factor_tail respectively.
<lang c>#include "stdlib.h"
- include "stdio.h"
- include "ctype.h"
unsigned int G_STRING_ITERATOR = 0;
/*
* expr := term term_tail * term_tail := add_op term term_tail | e * term := factor factor_tail * factor_tail := mult_op factor factor_tail | e * factor := ( expr ) | number * add_op := + | - * mult_op := * | / */
typedef union {
int terminal; struct expression* expr[2];
} Data;
typedef struct expression {
char op; Data data;
} Expr;
void parse_error(const char* string) {
unsigned int i; fprintf(stderr, "Unexpected symbol '%c' at position %u.\n\n", string[G_STRING_ITERATOR], G_STRING_ITERATOR); fprintf(stderr, "String: '%s'\n", string); fprintf(stderr, "Problem: "); for(i = 0; i < G_STRING_ITERATOR; ++i) { fprintf(stderr, " "); } fprintf(stderr, "^\n"); exit(1);
}
char consume_char(const char* string, char c) {
if(string[G_STRING_ITERATOR] != c) { parse_error(string); } ++G_STRING_ITERATOR; return c;
}
int consume_int(const char* string) {
int i;
if(!isdigit(string[G_STRING_ITERATOR])) { parse_error(string); }
i = atoi(string + G_STRING_ITERATOR); while(isdigit(string[G_STRING_ITERATOR])) { ++G_STRING_ITERATOR; } return i;
}
Expr* expression(const char* string);
Expr* factor(const char* string, Expr* expr) {
if(string[G_STRING_ITERATOR] == '(') { expr->op = consume_char(string, '('); expr->data.expr[0] = expression(string); consume_char(string, ')'); } else if(isdigit(string[G_STRING_ITERATOR])) { expr->op = 'd'; expr->data.terminal = consume_int(string); } return expr;
}
Expr* factor_tail(const char* string, Expr* expr) {
Expr* new_expr;
switch(string[G_STRING_ITERATOR]) { case '*': case '/': if(NULL == (new_expr = (Expr*)malloc(sizeof(Expr)))) { exit(1); } if(NULL == (new_expr->data.expr[1] = (Expr*)malloc(sizeof(Expr)))) { exit(1); } new_expr->op = consume_char(string, string[G_STRING_ITERATOR]); new_expr->data.expr[0] = expr;
new_expr->data.expr[1] = factor(string, new_expr->data.expr[1]); new_expr = factor_tail(string, new_expr); return new_expr; case '+': case '-': case ')': case 0: return expr; default: parse_error(string); }
}
Expr* term(const char* string, Expr* expr) {
if(string[G_STRING_ITERATOR] == '(' || isdigit(string[G_STRING_ITERATOR])) { expr = factor(string, expr); expr = factor_tail(string, expr); return expr; } else { parse_error(string); }
}
Expr* term_tail(const char* string, Expr* expr) {
Expr* new_expr;
switch(string[G_STRING_ITERATOR]) { case '+': case '-': if(NULL == (new_expr = (Expr*)malloc(sizeof(Expr)))) { exit(1); } if(NULL == (new_expr->data.expr[1] = (Expr*)malloc(sizeof(Expr)))) { exit(1); } new_expr->op = consume_char(string, string[G_STRING_ITERATOR]); new_expr->data.expr[0] = expr;
new_expr->data.expr[1] = term(string, new_expr->data.expr[1]); new_expr = term_tail(string, new_expr); return new_expr; case ')': case 0: return expr; default: parse_error(string); }
}
Expr* expression(const char* string) {
Expr* expr;
if(string[G_STRING_ITERATOR] == '(' || isdigit(string[G_STRING_ITERATOR])) { if(NULL == (expr = (Expr*)malloc(sizeof(Expr)))) { exit(1); }
expr = term(string, expr); expr = term_tail(string, expr); return expr; } else { parse_error(string); }
}
int evaluate(Expr* expr) {
int ret;
switch(expr->op) { case '(': ret = evaluate(expr->data.expr[0]); free(expr->data.expr[0]); break; case '*': ret = evaluate(expr->data.expr[0]) * evaluate(expr->data.expr[1]) ; free(expr->data.expr[0]); free(expr->data.expr[1]); break; case '/': ret = evaluate(expr->data.expr[0]) / evaluate(expr->data.expr[1]) ; free(expr->data.expr[0]); free(expr->data.expr[1]); break; case '+': ret = evaluate(expr->data.expr[0]) + evaluate(expr->data.expr[1]) ; free(expr->data.expr[0]); free(expr->data.expr[1]); break; case '-': ret = evaluate(expr->data.expr[0]) - evaluate(expr->data.expr[1]) ; free(expr->data.expr[0]); free(expr->data.expr[1]); break; case 'd': ret = expr->data.terminal; break; default: exit(1); } return ret;
}
int main(int argc, char** argv) {
Expr* expr = NULL;
if(argc > 1) { expr = expression(argv[1]); printf("%d\n", evaluate(expr)); free(expr); } return 0;
} </lang>
C++
1.8.4
<lang cpp> #include <boost/spirit.hpp>
#include <boost/spirit/tree/ast.hpp> #include <string> #include <cassert> #include <iostream> #include <istream> #include <ostream> using boost::spirit::rule; using boost::spirit::parser_tag; using boost::spirit::ch_p; using boost::spirit::real_p; using boost::spirit::tree_node; using boost::spirit::node_val_data; // The grammar struct parser: public boost::spirit::grammar<parser> { enum rule_ids { addsub_id, multdiv_id, value_id, real_id }; struct set_value { set_value(parser const& p): self(p) {} void operator()(tree_node<node_val_data<std::string::iterator, double> >& node, std::string::iterator begin, std::string::iterator end) const { node.value.value(self.tmp); } parser const& self; }; mutable double tmp; template<typename Scanner> struct definition { rule<Scanner, parser_tag<addsub_id> > addsub; rule<Scanner, parser_tag<multdiv_id> > multdiv; rule<Scanner, parser_tag<value_id> > value; rule<Scanner, parser_tag<real_id> > real; definition(parser const& self) { using namespace boost::spirit; addsub = multdiv >> *((root_node_d[ch_p('+')] | root_node_d[ch_p('-')]) >> multdiv); multdiv = value >> *((root_node_d[ch_p('*')] | root_node_d[ch_p('/')]) >> value); value = real | inner_node_d[('(' >> addsub >> ')')]; real = leaf_node_d[access_node_d[real_p[assign_a(self.tmp)]][set_value(self)]]; } rule<Scanner, parser_tag<addsub_id> > const& start() const { return addsub; } }; }; template<typename TreeIter> double evaluate(TreeIter const& i) { double op1, op2; switch (i->value.id().to_long()) { case parser::real_id: return i->value.value(); case parser::value_id: case parser::addsub_id: case parser::multdiv_id: op1 = evaluate(i->children.begin()); op2 = evaluate(i->children.begin()+1); switch(*i->value.begin()) { case '+': return op1 + op2; case '-': return op1 - op2; case '*': return op1 * op2; case '/': return op1 / op2; default: assert(!"Should not happen"); } default: assert(!"Should not happen"); } return 0; } // the read/eval/write loop int main() { parser eval; std::string line; while (std::cout << "Expression: " && std::getline(std::cin, line) && !line.empty()) { typedef boost::spirit::node_val_data_factory<double> factory_t; boost::spirit::tree_parse_info<std::string::iterator, factory_t> info = boost::spirit::ast_parse<factory_t>(line.begin(), line.end(), eval, boost::spirit::space_p); if (info.full) { std::cout << "Result: " << evaluate(info.trees.begin()) << std::endl; } else { std::cout << "Error in expression." << std::endl; } } };
</lang>
Common Lisp
The following code parses a string into a sequence of tokens. The sequence of tokens includes :lparen
and :rparen
indicating left and right parenthesis, respectively. That sequence of tokens is then transformed by replacing subsequences of the form :lparen ... :rparen
with a sublist containing the tokens between the :lparen
and :rparen
. The resulting tree is then simplified by replacing any subsequence of the form A x B y C …
with either (A x B) y C …
or A x (B y C)
depending on the relative precedence of x
and y
. This produces a syntax tree each of whose elements is either a node representing an integer, (:integer . n)
, a list containing a single expression, (exp), or an operation,
(e1 op e2)
. Evaluating such a syntax tree is then trivial. This implementation can read integers, and produce integral and rational values.
<lang lisp>(defun tokenize-stream (stream)
(labels ((whitespace-p (char)
(find char #(#\space #\newline #\return #\tab)))
(consume-whitespace ()
(loop while (whitespace-p (peek-char nil stream nil #\a))
do (read-char stream)))
(read-integer ()
(loop while (digit-char-p (peek-char nil stream nil #\space))
collect (read-char stream) into digits
finally (return (parse-integer (coerce digits 'string))))))
(consume-whitespace)
(let ((c (peek-char nil stream nil nil)))
(multiple-value-bind (token value)
(case c
((nil) :eof)
((#\() :lparen)
((#\)) :rparen)
((#\*) :multiply)
((#\/) :divide)
((#\+) :add)
((#\-) :subtract)
(otherwise
(unless (digit-char-p c)
(cerror "Skip it." "Unexpected character ~w." c)
(read-char stream)
(return-from tokenize-stream
(tokenize-stream stream)))
(values :integer (read-integer))))
(unless (find token #(:integer :eof))
(read-char stream))
(if (not (eql token :integer)) token
(cons token value))))))
(defun group-parentheses (tokens &optional (delimited nil))
(do ((new-tokens '()))
((endp tokens)
(when delimited
(cerror "Insert it." "Expected right parenthesis."))
(values (nreverse new-tokens) '()))
(let ((token (pop tokens)))
(case token
((:lparen)
(multiple-value-bind (group remaining-tokens)
(group-parentheses tokens t)
(setf new-tokens (cons group new-tokens)
tokens remaining-tokens)))
((:rparen)
(if (not delimited)
(cerror "Ignore it." "Unexpected right parenthesis.")
(return (values (nreverse new-tokens) tokens))))
(otherwise
(push token new-tokens))))))
(defun group-operations (expression)
(flet ((gop (exp) (group-operations exp)))
(if (eql (car expression) :integer) expression
(destructuring-bind (A &optional (x nil xp) B (y nil yp) C &rest others)
expression
(cond
((not xp) (gop A))
((not yp) (list (gop A) x (gop B)))
(t (let ((a (gop A)) (B (gop B)) (C (gop C)))
(if (and (find x #(:add :subtract))
(find y #(:multiply :divide)))
(gop (list* A x (list B y C) others))
(gop (list* (list A x B) y C others))))))))))
(defun evaluate-expression (expression)
(cond
((eql (car expression) :integer)
(cdr expression))
((endp (cdr expression))
(evaluate-expression (car expression)))
(t (destructuring-bind (e1 op e2) expression
(let ((v1 (evaluate-expression e1))
(v2 (evaluate-expression e2)))
(ecase op
(:add (+ v1 v2))
(:subtract (- v1 v2))
(:multiply (* v1 v2))
(:divide (/ v1 v2))))))))
(defun evaluate (string)
(with-input-from-string (in string)
(evaluate-expression
(group-operations
(group-parentheses
(loop for token = (tokenize-stream in)
until (eql :eof token)
collect token))))))</lang>
Examples
> (evaluate "1 - 5 * 2 / 20 + 1")
3/2
> (evaluate "(1 - 5) * 2 / (20 + 1)")
-8/21
> (evaluate "2 * (3 + ((5) / (7 - 11)))")
7/2
> (evaluate "(2 + 3) / (10 - 5)")
1
Examples of error handling
> (evaluate "(3 * 2) a - (1 + 2) / 4")
Error: Unexpected character a.
1 (continue) Skip it.
2 (abort) Return to level 0.
3 Return to top loop level 0.
Type :b for backtrace, :c <option number> to proceed, or :? for other options
: 1 > :c 1
21/4
> (evaluate "(3 * 2) - (1 + 2) / (4")
Error: Expected right parenthesis.
1 (continue) Insert it.
2 (abort) Return to level 0.
3 Return to top loop level 0.
Type :b for backtrace, :c <option number> to proceed, or :? for other options
: 1 > :c 1
21/4
D
Following the previous number-operator dual stacks approach, an AST is built while previous version is evaluating the expression value. After the AST tree is constructed, a visitor pattern is used to display the AST structure and calculate the value.
<lang d>//module evaluate ;
import std.stdio, std.string, std.ctype, std.conv ;
// simple stack template
void push(T)(inout T[] stk, T top) { stk ~= top ; }
T pop(T)(inout T[] stk, bool discard = true) {
T top ;
if (stk.length == 0) throw new Exception("Stack Empty") ;
top = stk[$-1] ;
if (discard) stk.length = stk.length - 1 ;
return top ;
}
alias int Type ;
enum { Num, OBkt, CBkt, Add, Sub, Mul, Div } ; // Type
string[] opChar = ["#","(",")","+","-","*","/"] ;
int[] opPrec = [0,-9,-9,1,1,2,2] ;
abstract class Visitor { void visit(XP e) ; }
class XP {
Type type ;
string str ;
int pos ; // optional, for dispalying AST struct.
XP LHS, RHS = null ;
this(string s = ")", int p = -1) {
str = s ; pos = p ;
type = Num ;
for(Type t = Div ; t > Num ; t--)
if(opChar[t] == s) type = t ;
}
int opCmp(XP rhs) { return opPrec[type] - opPrec[rhs.type] ; }
void accept(Visitor v) { v.visit(this) ; } ;
}
class AST {
XP root ;
XP[] num, opr ;
string xpr, token ;
int xpHead, xpTail ;
void joinXP(XP x) { x.RHS = num.pop() ; x.LHS = num.pop() ; num.push(x) ; }
string nextToken() {
while (xpHead < xpr.length && xpr[xpHead] == ' ')
xpHead++ ; // skip spc
xpTail = xpHead ;
if(xpHead < xpr.length) {
token = xpr[xpTail..xpTail+1] ;
switch(token) {
case "(",")","+","-","*","/": // valid non-number
xpTail++ ;
return token ;
default: // should be number
if(isdigit(token[0])) {
while(xpTail < xpr.length && isdigit(xpr[xpTail]))
xpTail++ ;
return xpr[xpHead..xpTail] ;
} // else may be error
} // end switch
}
if(xpTail < xpr.length)
throw new Exception("Invalid Char <" ~ xpr[xpTail] ~ ">") ;
return null ;
} // end nextToken
AST parse(string s) {
bool expectingOP ;
xpr = s ;
try {
xpHead = xpTail = 0 ;
num = opr = null ;
root = null ;
opr.push(new XP) ; // CBkt, prevent evaluate null OP precidence
while((token = nextToken) !is null) {
XP tokenXP = new XP(token, xpHead) ;
if(expectingOP) { // process OP-alike XP
switch(token) {
case ")":
while(opr.pop(false).type != OBkt)
joinXP(opr.pop()) ;
opr.pop() ;
expectingOP = true ; break ;
case "+","-","*","/":
while (tokenXP <= opr.pop(false))
joinXP(opr.pop()) ;
opr.push(tokenXP) ;
expectingOP = false ; break ;
default:
throw new Exception("Expecting Operator or ), not <" ~ token ~ ">") ;
}
} else { // process Num-alike XP
switch(token) {
case "+","-","*","/",")":
throw new Exception("Expecting Number or (, not <" ~ token ~ ">") ;
case "(":
opr.push(tokenXP) ;
expectingOP = false ; break ;
default: // number
num.push(tokenXP) ;
expectingOP = true ;
}
}
xpHead = xpTail ;
} // end while
while (opr.length > 1) // join pending Op
joinXP(opr.pop()) ;
}catch(Exception e) {
writefln("%s\n%s\n%s^", e.msg, xpr, repeat(" ", xpHead)) ;
root = null ;
return this ;
}
if(num.length != 1) { // should be one XP left
writefln("Parse Error...") ;
root = null ;
} else
root = num.pop() ;
return this ;
} // end Parse
} // end class AST
// for display AST fancy struct
void ins(inout char[][] s, string v, int p, int l) {
while(s.length < l + 1) s.length = s.length + 1 ;
while(s[l].length < p + v.length + 1) s[l] ~= " " ;
s[l][p..p +v.length] = v ;
}
class calcVis : Visitor {
int result, level = 0 ;
string Result = null ;
char[][] Tree = null ;
static void opCall(AST a) {
if (a && a.root) {
calcVis c = new calcVis ;
a.root.accept(c) ;
for(int i = 1; i < c.Tree.length ; i++) { // more fancy
bool flipflop = false ; char mk = '.' ;
for(int j = 0 ; j < c.Tree[i].length ; j++) {
while(j >= c.Tree[i-1].length) c.Tree[i-1] ~= " " ;
char c1 = c.Tree[i][j] ; char c2 = c.Tree[i-1][j] ;
if(flipflop && (c1 == ' ') && c2 == ' ')
c.Tree[i-1][j] = mk ;
if(c1 != mk && c1 != ' ' && (j == 0 || !isdigit(c.Tree[i][j-1])))
flipflop = !flipflop ;
}
}
foreach(t; c.Tree) writefln(t) ;
writefln("%s ==>\n%s = %s", a.xpr,c.Result,c.result) ;
} else
writefln("Evalute invalid or null Expression") ;
}
void visit(XP xp) {// calc. the value, display AST struct and eval order.
ins(Tree, xp.str, xp.pos, level) ;
level++ ;
if (xp.type == Num) {
Result ~= xp.str ;
result = toInt(xp.str) ;
} else {
Result ~= "(" ;
xp.LHS.accept(this) ;
int lhs = result ;
Result ~= opChar[xp.type] ;
xp.RHS.accept(this) ;
Result ~= ")" ;
switch(xp.type) {
case Add: result = lhs + result ; break ;
case Sub: result = lhs - result ; break ;
case Mul: result = lhs * result ; break ;
case Div: result = lhs / result ; break ;
default: throw new Exception("Invalid type") ;
}
} //
level-- ;
}
}
void main(string[] args) {
string expression = args.length > 1 ? join(args[1..$]," ") :
"1 + 2*(3 - 2*(3 - 2)*((2 - 4)*5 - 22/(7 + 2*(3 - 1)) - 1)) + 1" ; // should be 60
calcVis((new AST).parse(expression)) ;
}</lang>
E
While the task requirements specify not evaluating using the language's built-in eval, they don't say that you have to write your own parser...
<lang e>def eParser := <elang:syntax.makeEParser>
def LiteralExpr := <elang:evm.makeLiteralExpr>.asType()
def arithEvaluate(expr :String) {
def ast := eParser(expr)
def evalAST(ast) {
return switch (ast) {
match e`@a + @b` { evalAST(a) + evalAST(b) }
match e`@a - @b` { evalAST(a) - evalAST(b) }
match e`@a * @b` { evalAST(a) * evalAST(b) }
match e`@a / @b` { evalAST(a) / evalAST(b) }
match e`-@a` { -(evalAST(a)) }
match l :LiteralExpr { l.getValue() }
}
}
return evalAST(ast)
}</lang>
Parentheses are handled by the parser.
<lang e>? arithEvaluate("1 + 2")
- value: 3
? arithEvaluate("(1 + 2) * 10 / 100")
- value: 0.3
? arithEvaluate("(1 + 2 / 2) * (5 + 5)")
- value: 20.0</lang>
Haskell
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
data Exp = Num Int
| Add Exp Exp
| Sub Exp Exp
| Mul Exp Exp
| Div Exp Exp
expr = buildExpressionParser table factor
table = [[op "*" (Mul) AssocLeft, op "/" (Div) AssocLeft]
,[op "+" (Add) AssocLeft, op "-" (Sub) AssocLeft]]
where op s f assoc = Infix (do string s; return f) assoc
factor = do char '(' ; x <- expr ; char ')'
return x
<|> do ds <- many1 digit
return $ Num (read ds)
evaluate (Num x) = fromIntegral x
evaluate (Add a b) = (evaluate a) + (evaluate b)
evaluate (Sub a b) = (evaluate a) - (evaluate b)
evaluate (Mul a b) = (evaluate a) * (evaluate b)
evaluate (Div a b) = (evaluate a) `div` (evaluate b)
solution exp = case parse expr [] exp of
Right expr -> evaluate expr
Left _ -> error "Did not parse"
J
parse=:parse_parser_
eval=:monad define
'gerund structure'=:y
gerund@.structure
)
coclass 'parser'
classify=: '$()*/+-'&(((>:@#@[ # 2:) #: 2 ^ i.)&;:)
rules=: ''
patterns=: ,"0 assert 1
addrule=: dyad define
rules=: rules,;:x
patterns=: patterns,+./@classify"1 y
)
'Term' addrule '$()', '0', '+-',: '0'
'Factor' addrule '$()+-', '0', '*/',: '0'
'Parens' addrule '(', '*/+-0', ')',: ')*/+-0$'
rules=: rules,;:'Move'
buildTree=: monad define
words=: ;:'$',y
queue=: classify '$',y
stack=: classify '$$$$'
tokens=: ]&.>i.#words
tree=: ''
while.(#queue)+.6<#stack do.
rule=: rules {~ i.&1 patterns (*./"1)@:(+./"1) .(*."1)4{.stack
rule`:6''
end.
'syntax' assert 1 0 1 1 1 1 -: {:"1 stack
gerund=: literal&.> (<,'%') (I. words=<,'/')} words
gerund;1{tree
)
literal=:monad define ::]
".'t=.',y
5!:1<'t'
)
Term=: Factor=: monad define
stack=: ({.stack),(classify '0'),4}.stack
tree=: ({.tree),(<1 2 3{tree),4}.tree
)
Parens=: monad define
stack=: (1{stack),3}.stack
tree=: (1{tree),3}.tree
)
Move=: monad define
'syntax' assert 0<#queue
stack=: ({:queue),stack
queue=: }:queue
tree=: ({:tokens),tree
tokens=: }:tokens
)
parse=:monad define
tmp=: conew 'parser'
r=: buildTree__tmp y
coerase tmp
r
)
example use:
eval parse '1+2*3/(4-5+6)'
2.2
You can also display the syntax tree, for example:
parse '1+1'
Pascal
Note: This code is completely standard pascal, checked with gpc --classic-pascal. It uses certain features of standard Pascal which are not implemented in all Pascal compilers (e.g. the code will not compile with Turbo/Borland Pascal or Free Pascal).
<lang pascal> program calculator(input, output);
type
NodeType = (binop, number, error);
pAstNode = ^tAstNode;
tAstNode = record
case typ: NodeType of
binop:
(
operation: char;
first, second: pAstNode;
);
number:
(value: integer);
error:
();
end;
function newBinOp(op: char; left: pAstNode): pAstNode;
var
node: pAstNode;
begin
new(node, binop);
node^.operation := op;
node^.first := left;
node^.second := nil;
newBinOp := node;
end;
procedure disposeTree(tree: pAstNode);
begin
if tree^.typ = binop
then
begin
if (tree^.first <> nil)
then
disposeTree(tree^.first);
if (tree^.second <> nil)
then
disposeTree(tree^.second)
end;
dispose(tree);
end;
procedure skipWhitespace(var f: text);
var
ch:char;
function isWhite: boolean;
begin
isWhite := false;
if not eoln(f)
then
if f^ = ' '
then
isWhite := true
end;
begin
while isWhite do
read(f, ch)
end;
function parseAddSub(var f: text): pAstNode; forward;
function parseMulDiv(var f: text): pAstNode; forward;
function parseValue(var f: text): pAstNode; forward;
function parseAddSub;
var
node1, node2: pAstNode;
continue: boolean;
begin
node1 := parseMulDiv(f);
if node1^.typ <> error
then
begin
continue := true;
while continue and not eoln(f) do
begin
skipWhitespace(f);
if f^ in ['+', '-']
then
begin
node1 := newBinop(f^, node1);
get(f);
node2 := parseMulDiv(f);
if (node2^.typ = error)
then
begin
disposeTree(node1);
node1 := node2;
continue := false
end
else
node1^.second := node2
end
else
continue := false
end;
end;
parseAddSub := node1;
end;
function parseMulDiv;
var
node1, node2: pAstNode;
continue: boolean;
begin
node1 := parseValue(f);
if node1^.typ <> error
then
begin
continue := true;
while continue and not eoln(f) do
begin
skipWhitespace(f);
if f^ in ['*', '/']
then
begin
node1 := newBinop(f^, node1);
get(f);
node2 := parseValue(f);
if (node2^.typ = error)
then
begin
disposeTree(node1);
node1 := node2;
continue := false
end
else
node1^.second := node2
end
else
continue := false
end;
end;
parseMulDiv := node1;
end;
function parseValue;
var
node: pAstNode;
value: integer;
neg: boolean;
begin
node := nil;
skipWhitespace(f);
if f^ = '('
then
begin
get(f);
node := parseAddSub(f);
if node^.typ <> error
then
begin
skipWhitespace(f);
if f^ = ')'
then
get(f)
else
begin
disposeTree(node);
new(node, error)
end
end
end
else if f^ in ['0' .. '9', '+', '-']
then
begin
neg := f^ = '-';
if f^ in ['+', '-']
then
get(f);
value := 0;
if f^ in ['0' .. '9']
then
begin
while f^ in ['0' .. '9'] do
begin
value := 10 * value + (ord(f^) - ord('0'));
get(f)
end;
new(node, number);
if (neg)
then
node^.value := -value
else
node^.value := value
end
end;
if node = nil
then
new(node, error);
parseValue := node
end;
function eval(ast: pAstNode): integer;
begin
with ast^ do
case typ of
number: eval := value;
binop:
case operation of
'+': eval := eval(first) + eval(second);
'-': eval := eval(first) - eval(second);
'*': eval := eval(first) * eval(second);
'/': eval := eval(first) div eval(second);
end;
error:
writeln('Oops! Program is buggy!')
end
end;
procedure ReadEvalPrintLoop;
var
ast: pAstNode;
begin
while not eof do
begin
ast := parseAddSub(input);
if (ast^.typ = error) or not eoln
then
writeln('Error in expression.')
else
writeln('Result: ', eval(ast));
readln;
disposeTree(ast)
end
end;
begin
ReadEvalPrintLoop
end.</lang>
Perl
<lang perl>sub ev
- Evaluates an arithmetic expression like "(1+3)*7" and returns
- its value.
{my $exp = shift;
# Delete all meaningless characters. (Scientific notation,
# infinity, and not-a-number aren't supported.)
$exp =~ tr {0-9.+-/*()} {}cd;
return ev_ast(astize($exp));}
{my $balanced_paren_regex;
$balanced_paren_regex = qr
{\( ( [^()]+ | (??{$balanced_paren_regex}) )+ \)}x;
# ??{ ... } interpolates lazily (only when necessary),
# permitting recursion to arbitrary depths.
sub astize
# Constructs an abstract syntax tree by recursively
# transforming textual arithmetic expressions into array
# references of the form [operator, left oprand, right oprand].
{my $exp = shift;
# If $exp is just a number, return it as-is.
$exp =~ /[^0-9.]/ or return $exp;
# If parentheses surround the entire expression, get rid of
# them.
$exp = substr($exp, 1, length($exp) - 2)
while $exp =~ /\A($balanced_paren_regex)\z/;
# Replace stuff in parentheses with placeholders.
my @paren_contents;
$exp =~ s {($balanced_paren_regex)}
{push(@paren_contents, $1);
"[p$#paren_contents]"}eg;
# Scan for operators in order of increasing precedence,
# preferring the rightmost.
$exp =~ m{(.+) ([+-]) (.+)}x or
$exp =~ m{(.+) ([*/]) (.+)}x or
# The expression must've been malformed somehow.
# (Note that unary minus isn't supported.)
die "Eh?: [$exp]\n";
my ($op, $lo, $ro) = ($2, $1, $3);
# Restore the parenthetical expressions.
s {\[p(\d+)\]} {($paren_contents[$1])}eg
foreach $lo, $ro;
# And recurse.
return [$op, astize($lo), astize($ro)];}}
{my %ops =
('+' => sub {$_[0] + $_[1]},
'-' => sub {$_[0] - $_[1]},
'*' => sub {$_[0] * $_[1]},
'/' => sub {$_[0] / $_[1]});
sub ev_ast
# Evaluates an abstract syntax tree of the form returned by
# &astize.
{my $ast = shift;
# If $ast is just a number, return it as-is.
ref $ast or return $ast;
# Otherwise, recurse.
my ($op, @operands) = @$ast;
$_ = ev_ast($_) foreach @operands;
return $ops{$op}->(@operands);}}</lang>
Pop11
/* Scanner routines */
/* Uncomment the following to parse data from standard input
vars itemrep;
incharitem(charin) -> itemrep;
*/
;;; Current symbol
vars sym;
define get_sym();
itemrep() -> sym;
enddefine;
define expect(x);
lvars x;
if x /= sym then
printf(x, 'Error, expected %p\n');
mishap(sym, 1, 'Example parser error');
endif;
get_sym();
enddefine;
lconstant res_list = [( ) + * ];
lconstant reserved = newproperty(
maplist(res_list, procedure(x); [^x ^(true)]; endprocedure),
20, false, "perm");
/*
Parser for arithmetic expressions
*/
/*
expr: term
| expr "+" term
| expr "-" term
;
*/
define do_expr() -> result;
lvars result = do_term(), op;
while sym = "+" or sym = "-" do
sym -> op;
get_sym();
[^op ^result ^(do_term())] -> result;
endwhile;
enddefine;
/*
term: factor
| term "*" factor
| term "/" factor
;
*/
define do_term() -> result;
lvars result = do_factor(), op;
while sym = "*" or sym = "/" do
sym -> op;
get_sym();
[^op ^result ^(do_factor())] -> result;
endwhile;
enddefine;
/*
factor: word
| constant
| "(" expr ")"
;
*/
define do_factor() -> result;
if sym = "(" then
get_sym();
do_expr() -> result;
expect(")");
elseif isinteger(sym) or isbiginteger(sym) then
sym -> result;
get_sym();
else
if reserved(sym) then
printf(sym, 'unexpected symbol %p\n');
mishap(sym, 1, 'Example parser syntax error');
endif;
sym -> result;
get_sym();
endif;
enddefine;
/* Expression evaluator, returns false on error (currently only
division by 0 */
define arith_eval(expr);
lvars op, arg1, arg2;
if not(expr) then
return(expr);
endif;
if isinteger(expr) or isbiginteger(expr) then
return(expr);
endif;
expr(1) -> op;
arith_eval(expr(2)) -> arg1;
arith_eval(expr(3)) -> arg2;
if not(arg1) or not(arg2) then
return(false);
endif;
if op = "+" then
return(arg1 + arg2);
elseif op = "-" then
return(arg1 - arg2);
elseif op = "*" then
return(arg1 * arg2);
elseif op = "/" then
if arg2 = 0 then
return(false);
else
return(arg1 div arg2);
endif;
else
printf('Internal error\n');
return(false);
endif;
enddefine;
/* Given list, create item repeater. Input list is stored in a
closure are traversed when new item is requested. */
define listitemrep(lst);
procedure();
lvars item;
if lst = [] then
termin;
else
front(lst) -> item;
back(lst) -> lst;
item;
endif;
endprocedure;
enddefine;
/* Initialise scanner */
listitemrep([(3 + 50) * 7 - 100 / 10]) -> itemrep;
get_sym();
;;; Test it
arith_eval(do_expr()) =>
Prolog
<lang prolog> % Lexer
numeric(X) :- 48 =< X, X =< 57.
not_numeric(X) :- 48 > X ; X > 57.
lex1([], []).
lex1([40|Xs], ['('|Ys]) :- lex1(Xs, Ys).
lex1([41|Xs], [')'|Ys]) :- lex1(Xs, Ys).
lex1([43|Xs], ['+'|Ys]) :- lex1(Xs, Ys).
lex1([45|Xs], ['-'|Ys]) :- lex1(Xs, Ys).
lex1([42|Xs], ['*'|Ys]) :- lex1(Xs, Ys).
lex1([47|Xs], ['/'|Ys]) :- lex1(Xs, Ys).
lex1([X|Xs], [N|Ys]) :- numeric(X), N is X - 48, lex1(Xs, Ys).
lex2([], []).
lex2([X], [X]).
lex2([Xa,Xb|Xs], [Xa|Ys]) :- atom(Xa), lex2([Xb|Xs], Ys).
lex2([Xa,Xb|Xs], [Xa|Ys]) :- number(Xa), atom(Xb), lex2([Xb|Xs], Ys).
lex2([Xa,Xb|Xs], [Y|Ys]) :- number(Xa), number(Xb), N is Xa * 10 + Xb, lex2([N|Xs], [Y|Ys]).
% Parser
oper(1, *, X, Y, X * Y). oper(1, /, X, Y, X / Y).
oper(2, +, X, Y, X + Y). oper(2, -, X, Y, X - Y).
num(D) --> [D], {number(D)}.
expr(0, Z) --> num(Z).
expr(0, Z) --> {Z = (X)}, ['('], expr(2, X), [')'].
expr(N, Z) --> {succ(N0, N)}, {oper(N, Op, X, Y, Z)}, expr(N0, X), [Op], expr(N, Y).
expr(N, Z) --> {succ(N0, N)}, expr(N0, Z).
parse(Tokens, Expr) :- expr(2, Expr, Tokens, []).
% Evaluator
evaluate(E, E) :- number(E).
evaluate(A + B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae + Be.
evaluate(A - B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae - Be.
evaluate(A * B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae * Be.
evaluate(A / B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae / Be.
% Solution
calculator(String, Value) :-
lex1(String, Tokens1),
lex2(Tokens1, Tokens2),
parse(Tokens2, Expression),
evaluate(Expression, Value).
% Example use
% calculator("(3+50)*7-9", X).</lang>
Python
There are python modules, such as Ply, which facilitate the implementation of parsers. This example, however, uses only standard Python with the parser having
two stacks, one for operators, one for operands. A subsequent example uses Python ast module to generate the AST.
<lang python>import operator
class AstNode(object):
def __init__( self, opr, left, right ):
self.opr = opr
self.l = left
self.r = right
def eval(self):
return self.opr(self.l.eval(), self.r.eval())
class LeafNode(object):
def __init__( self, valStrg ):
self.v = int(valStrg)
def eval(self):
return self.v
class Yaccer(object):
def __init__(self):
self.operstak = []
self.nodestak =[]
self.__dict__.update(self.state1)
def v1( self, valStrg ):
# Value String
self.nodestak.append( LeafNode(valStrg))
self.__dict__.update(self.state2)
#print 'push', valStrg
def o2( self, operchar ):
# Operator character or open paren in state1
def openParen(a,b):
return 0 # function should not be called
opDict= { '+': ( operator.add, 2, 2 ),
'-': (operator.sub, 2, 2 ),
'*': (operator.mul, 3, 3 ),
'/': (operator.div, 3, 3 ),
'^': ( pow, 4, 5 ), # right associative exponentiation for grins
'(': ( openParen, 0, 8 )
}
operPrecidence = opDict[operchar][2]
self.redeuce(operPrecidence)
self.operstak.append(opDict[operchar])
self.__dict__.update(self.state1)
# print 'pushop', operchar
def syntaxErr(self, char ):
# Open Parenthesis
print 'parse error - near operator "%s"' %char
def pc2( self,operchar ):
# Close Parenthesis
# reduce node until matching open paren found
self.redeuce( 1 )
if len(self.operstak)>0:
self.operstak.pop() # pop off open parenthesis
else:
print 'Error - no open parenthesis matches close parens.'
self.__dict__.update(self.state2)
def end(self):
self.redeuce(0)
return self.nodestak.pop()
def redeuce(self, precidence):
while len(self.operstak)>0:
tailOper = self.operstak[len(self.operstak)-1]
if tailOper[1] < precidence: break
tailOper = self.operstak.pop()
vrgt = self.nodestak.pop()
vlft= self.nodestak.pop()
self.nodestak.append( AstNode(tailOper[0], vlft, vrgt))
# print 'reduce'
state1 = { 'v': v1, 'o':syntaxErr, 'po':o2, 'pc':syntaxErr }
state2 = { 'v': syntaxErr, 'o':o2, 'po':syntaxErr, 'pc':pc2 }
def Lex( exprssn, p ):
bgn = None
cp = -1
for c in exprssn:
cp += 1
if c in '+-/*^()': # throw in exponentiation (^)for grins
if bgn is not None:
p.v(p, exprssn[bgn:cp])
bgn = None
if c=='(': p.po(p, c)
elif c==')':p.pc(p, c)
else: p.o(p, c)
elif c in ' \t':
if bgn is not None:
p.v(p, exprssn[bgn:cp])
bgn = None
elif c in '0123456789':
if bgn is None:
bgn = cp
else:
print 'Invalid character in expression'
if bgn is not None:
p.v(p, exprssn[bgn:cp])
bgn = None
if bgn is not None:
p.v(p, exprssn[bgn:cp+1])
bgn = None
return p.end()
expr = raw_input("Expression:")
astTree = Lex( expr, Yaccer())
print expr, '=',astTree.eval()</lang>
ast standard library module
Python comes with its own ast module as part of its standard libraries. The module compiles Python source into an AST tree that can in turn be compiled to bytecode then executed.
<lang python>>>> import ast
>>>
>>> expr="2 * (3 -1) + 2 * 5"
>>> node = ast.parse(expr, mode='eval')
>>> ast.dump(node)
'Expression(body=BinOp(left=BinOp(left=Num(n=2), op=Mult(), right=BinOp(left=Num(n=3), op=Sub(), right=Num(n=1))), op=Add(), right=BinOp(left=Num(n=2), op=Mult(), right=Num(n=5))))'
>>> code_object = compile(node, filename='<string>', mode='eval')
>>> eval(code_object)
14
>>> # lets modify the AST by changing the 5 to a 6
>>> node.body.right.right.n
5
>>> node.body.right.right.n = 6
>>> code_object = compile(node, filename='<string>', mode='eval')
>>> eval(code_object)
16</lang>
Tcl
The code below delivers the AST for an expression in a form that it can be immediately eval-led, using Tcl's prefix operators.
<lang Tcl>namespace import tcl::mathop::*
proc ast str {
# produce abstract syntax tree for an expression
regsub -all {[-+*/()]} $str { & } str ;# "tokenizer"
s $str
}
proc s {args} {
# parse "(a + b) * c + d" to "+ [* [+ a b] c] d"
if {[llength $args] == 1} {set args [lindex $args 0]}
if [regexp {[()]} $args] {
eval s [string map {( "\[s " ) \]} $args]
} elseif {"*" in $args} {
s [s_group $args *]
} elseif {"/" in $args} {
s [s_group $args /]
} elseif {"+" in $args} {
s [s_group $args +]
} elseif {"-" in $args} {
s [s_group $args -]
} else {
string map {\{ \[ \} \]} [join $args]
}
}
proc s_group {list op} {
# turn ".. a op b .." to ".. {op a b} .."
set pos [lsearch -exact $list $op]
set p_1 [- $pos 1]
set p1 [+ $pos 1]
lreplace $list $p_1 $p1 \
[list $op [lindex $list $p_1] [lindex $list $p1]]
}
- -- Test suite
foreach test [split {
ast 2-2
ast 1-2-3
ast (1-2)-3
ast 1-(2-3)
ast (1+2)*3
ast (1+2)/3-4*5
ast ((1+2)/3-4)*5
} \n] {
puts "$test ..... [eval $test] ..... [eval [eval $test]]"
}</lang>
Output:
ast 2-2 ..... - 2 2 ..... 0
ast 1-2-3 ..... - [- 1 2] 3 ..... -4
ast (1-2)-3 ..... - [- 1 2] 3 ..... -4
ast 1-(2-3) ..... - 1 [- 2 3] ..... 2
ast (1+2)*3 ..... * [+ 1 2] 3 ..... 9
ast (1+2)/3-4*5 ..... - [/ [+ 1 2] 3] [* 4 5] ..... -19
ast ((1+2)/3-4)*5 ..... * [- [/ [+ 1 2] 3] 4] 5 ..... -15
Ursala
with no error checking other than removal of spaces
<lang Ursala>
- import std
- import nat
- import flo
lex = ~=' '*~F+ rlc both -=digits # separate into tokens
parse = # build a tree
--<';'>; @iNX ~&l->rh ^/~< cases~&lhh\~&lhPNVrC {
'*/': ^|C/~&hNV associate '*/',
'+-': ^|C/~&hNV associate '*/+-',
');': @r ~&htitBPC+ associate '*/+-'}
associate "ops" = ~&tihdh2B-="ops"-> ~&thd2tth2hNCCVttt2C
traverse = *^ ~&v?\%ep ^H\~&vhthPX '+-*/'-$<plus,minus,times,div>@dh
evaluate = traverse+ parse+ lex</lang>
test program:
<lang Ursala>
- cast %eL
test = evaluate*t
-[
1+1
4/5
2-1
3*7
3+4+5
9-2-4
7/3/2
4+2*3
5*2-1
5-3*2
(1+1)*(2+3)
(2-4)/(3+5*(8-1))]-</lang>
output:
<
2.000000e+00,
8.000000e-01,
1.000000e+00,
2.100000e+01,
1.200000e+01,
3.000000e+00,
1.166667e+00,
1.000000e+01,
9.000000e+00,
-1.000000e+00,
1.000000e+01,
-5.263158e-02>