Arithmetic evaluation
You are encouraged to solve this task according to the task description, using any language you may know.
Create a program which parses and evaluates arithmetic expressions. Requirements: an abstract-syntax tree (AST) for the expression must be created from parsing the input. The AST must be used in evaluation, also, so the input may not be directly evaluated (e.g. by calling eval or a similar language feature.) The expression will be a string or list of symbols like "(1+3)*7". The four symbols + - * / must be supported as binary relations with conventional precedence rules. Precedence-control parentheses must also be supported.
For those who don't remember, mathematical precedence is as follows:
- Parentheses
- Multiplication/Division (left to right)
- Addition/Subtraction (left to right)
Ada
ALGOL 68
INT base=10; MODE FIXED = LONG REAL; # numbers in the format 9,999.999 # #IF build abstract syntax tree and then EVAL tree # MODE AST = UNION(NODE, FIXED); MODE NUM = REF AST; MODE NODE = STRUCT(NUM a, PROC (FIXED,FIXED)FIXED op, NUM b); OP EVAL = (NUM ast)FIXED:( CASE ast IN (FIXED num): num, (NODE fork): (op OF fork)(EVAL( a OF fork), EVAL (b OF fork)) ESAC ); OP + = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a+b, b) ); OP - = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a-b, b) ); OP * = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a*b, b) ); OP / = (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a/b, b) ); OP **= (NUM a,b)NUM: ( HEAP AST := NODE(a, (FIXED a,b)FIXED:a**b, b) ); #ELSE simply use REAL arithmetic with no abstract syntax tree at all # CO MODE NUM = FIXED, AST = FIXED; OP EVAL = (FIXED num)FIXED: num; #FI# END CO MODE LEX = PROC (TOK)NUM; MODE MONADIC =PROC (NUM)NUM; MODE DIADIC = PROC (NUM,NUM)NUM; MODE TOK = CHAR; MODE ACTION = UNION(STACKACTION, LEX, MONADIC, DIADIC); MODE OPVAL = STRUCT(INT prio, ACTION action); MODE OPITEM = STRUCT(TOK token, OPVAL opval); [256]STACKITEM stack; MODE STACKITEM = STRUCT(NUM value, OPVAL op); MODE STACKACTION = PROC (REF STACKITEM)VOID; PROC begin = (REF STACKITEM top)VOID: prio OF op OF top -:= +10; PROC end = (REF STACKITEM top)VOID: prio OF op OF top -:= -10; OP ** = (COMPL a,b)COMPL: complex exp(complex ln(a)*b); [8]OPITEM op list :=( # OP PRIO ACTION # ("^", (8, (NUM a,b)NUM: a**b)), ("*", (7, (NUM a,b)NUM: a*b)), ("/", (7, (NUM a,b)NUM: a/b)), ("+", (6, (NUM a,b)NUM: a+b)), ("-", (6, (NUM a,b)NUM: a-b)), ("(",(+10, begin)), (")",(-10, end)), ("?", (9, LEX:SKIP)) ); PROC op dict = (TOK op)REF OPVAL:( # This can be unrolled to increase performance # REF OPITEM candidate; FOR i TO UPB op list WHILE candidate := op list[i]; # WHILE # op /= token OF candidate DO SKIP OD; opval OF candidate ); PROC build ast = (STRING expr)NUM:( INT top:=0; PROC compress ast stack = (INT prio, NUM in value)NUM:( NUM out value := in value; FOR loc FROM top BY -1 TO 1 WHILE REF STACKITEM stack top := stack[loc]; # WHILE # ( top >= LWB stack | prio <= prio OF op OF stack top | FALSE ) DO top := loc - 1; out value := CASE action OF op OF stack top IN (MONADIC op): op(value OF stack top), # not implemented # (DIADIC op): op(value OF stack top,out value) ESAC OD; out value ); NUM value := NIL; FIXED num value; INT decimal places; FOR i TO UPB expr DO TOK token = expr[i]; REF OPVAL this op := op dict(token); CASE action OF this op IN (STACKACTION action):( IF prio OF thisop = -10 THEN value := compress ast stack(0, value) FI; IF top >= LWB stack THEN action(stack[top]) FI ), (LEX):( # a crude lexer # SHORT INT digit = ABS token - ABS "0"; IF 0<= digit AND digit < base THEN IF NUM(value) IS NIL THEN # first digit # decimal places := 0; value := HEAP AST := num value := digit ELSE NUM(value) := num value := IF decimal places = 0 THEN num value * base + digit ELSE decimal places *:= base; num value + digit / decimal places FI FI ELIF token = "." THEN decimal places := 1 ELSE SKIP # and ignore spaces and any unrecognised characters # FI ), (MONADIC): SKIP, # not implemented # (DIADIC):( value := compress ast stack(prio OF this op, value); IF top=UPB stack THEN index error FI; stack[top+:=1]:=STACKITEM(value, this op); value:=NIL ) ESAC OD; compress ast stack(-max int, value) ); # TEST # printf(($" euler's number is about: "g(-long real width,long real width-2)l$, EVAL build ast("1+1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+1/15)/14)/13)/12)/11)/10)/9)/8)/7)/6)/5)/4)/3)/2"))); SKIP EXIT index error: printf(("Stack over flow"))
Output:
euler's number is about: 2.71828182845899446428546958
C
C++
1.8.4
<lang cpp> #include <boost/spirit.hpp>
#include <boost/spirit/tree/ast.hpp> #include <string> #include <cassert> #include <iostream> #include <istream> #include <ostream> using boost::spirit::rule; using boost::spirit::parser_tag; using boost::spirit::ch_p; using boost::spirit::real_p; using boost::spirit::tree_node; using boost::spirit::node_val_data; // The grammar struct parser: public boost::spirit::grammar<parser> { enum rule_ids { addsub_id, multdiv_id, value_id, real_id }; struct set_value { set_value(parser const& p): self(p) {} void operator()(tree_node<node_val_data<std::string::iterator, double> >& node, std::string::iterator begin, std::string::iterator end) const { node.value.value(self.tmp); } parser const& self; }; mutable double tmp; template<typename Scanner> struct definition { rule<Scanner, parser_tag<addsub_id> > addsub; rule<Scanner, parser_tag<multdiv_id> > multdiv; rule<Scanner, parser_tag<value_id> > value; rule<Scanner, parser_tag<real_id> > real; definition(parser const& self) { using namespace boost::spirit; addsub = multdiv >> *((root_node_d[ch_p('+')] | root_node_d[ch_p('-')]) >> multdiv); multdiv = value >> *((root_node_d[ch_p('*')] | root_node_d[ch_p('/')]) >> value); value = real | inner_node_d[('(' >> addsub >> ')')]; real = leaf_node_d[access_node_d[real_p[assign_a(self.tmp)]][set_value(self)]]; } rule<Scanner, parser_tag<addsub_id> > const& start() const { return addsub; } }; }; template<typename TreeIter> double evaluate(TreeIter const& i) { double op1, op2; switch (i->value.id().to_long()) { case parser::real_id: return i->value.value(); case parser::value_id: case parser::addsub_id: case parser::multdiv_id: op1 = evaluate(i->children.begin()); op2 = evaluate(i->children.begin()+1); switch(*i->value.begin()) { case '+': return op1 + op2; case '-': return op1 - op2; case '*': return op1 * op2; case '/': return op1 / op2; default: assert(!"Should not happen"); } default: assert(!"Should not happen"); } return 0; } // the read/eval/write loop int main() { parser eval; std::string line; while (std::cout << "Expression: " && std::getline(std::cin, line) && !line.empty()) { typedef boost::spirit::node_val_data_factory<double> factory_t; boost::spirit::tree_parse_info<std::string::iterator, factory_t> info = boost::spirit::ast_parse<factory_t>(line.begin(), line.end(), eval, boost::spirit::space_p); if (info.full) { std::cout << "Result: " << evaluate(info.trees.begin()) << std::endl; } else { std::cout << "Error in expression." << std::endl; } } };
</lang>
Common Lisp
The following code parses a string into a sequence of tokens. The sequence of tokens includes :lparen
and :rparen
indicating left and right parenthesis, respectively. That sequence of tokens is then transformed by replacing subsequences of the form :lparen ... :rparen
with a sublist containing the tokens between the :lparen
and :rparen
. The resulting tree is then simplified by replacing any subsequence of the form A x B y C …
with either (A x B) y C …
or A x (B y C)
depending on the relative precedence of x
and y
. This produces a syntax tree each of whose elements is either a node representing an integer, (:integer . n)
, a list containing a single expression, (exp), or an operation,
(e1 op e2)
. Evaluating such a syntax tree is then trivial. This implementation can read integers, and produce integral and rational values.
<lang lisp>(defun tokenize-stream (stream)
(labels ((whitespace-p (char)
(find char #(#\space #\newline #\return #\tab)))
(consume-whitespace ()
(loop while (whitespace-p (peek-char nil stream nil #\a))
do (read-char stream)))
(read-integer ()
(loop while (digit-char-p (peek-char nil stream nil #\space))
collect (read-char stream) into digits
finally (return (parse-integer (coerce digits 'string))))))
(consume-whitespace)
(let ((c (peek-char nil stream nil nil)))
(multiple-value-bind (token value)
(case c
((nil) :eof)
((#\() :lparen)
((#\)) :rparen)
((#\*) :multiply)
((#\/) :divide)
((#\+) :add)
((#\-) :subtract)
(otherwise
(unless (digit-char-p c)
(cerror "Skip it." "Unexpected character ~w." c)
(read-char stream)
(return-from tokenize-stream
(tokenize-stream stream)))
(values :integer (read-integer))))
(unless (find token #(:integer :eof))
(read-char stream))
(if (not (eql token :integer)) token
(cons token value))))))
(defun group-parentheses (tokens &optional (delimited nil))
(do ((new-tokens '()))
((endp tokens)
(when delimited
(cerror "Insert it." "Expected right parenthesis."))
(values (nreverse new-tokens) '()))
(let ((token (pop tokens)))
(case token
((:lparen)
(multiple-value-bind (group remaining-tokens)
(group-parentheses tokens t)
(setf new-tokens (cons group new-tokens)
tokens remaining-tokens)))
((:rparen)
(if (not delimited)
(cerror "Ignore it." "Unexpected right parenthesis.")
(return (values (nreverse new-tokens) tokens))))
(otherwise
(push token new-tokens))))))
(defun group-operations (expression)
(flet ((gop (exp) (group-operations exp)))
(if (eql (car expression) :integer) expression
(destructuring-bind (A &optional (x nil xp) B (y nil yp) C &rest others)
expression
(cond
((not xp) (gop A))
((not yp) (list (gop A) x (gop B)))
(t (let ((a (gop A)) (B (gop B)) (C (gop C)))
(if (and (find x #(:add :subtract))
(find y #(:multiply :divide)))
(gop (list* A x (list B y C) others))
(gop (list* (list A x B) y C others))))))))))
(defun evaluate-expression (expression)
(cond
((eql (car expression) :integer)
(cdr expression))
((endp (cdr expression))
(evaluate-expression (car expression)))
(t (destructuring-bind (e1 op e2) expression
(let ((v1 (evaluate-expression e1))
(v2 (evaluate-expression e2)))
(ecase op
(:add (+ v1 v2))
(:subtract (- v1 v2))
(:multiply (* v1 v2))
(:divide (/ v1 v2))))))))
(defun evaluate (string)
(with-input-from-string (in string)
(evaluate-expression
(group-operations
(group-parentheses
(loop for token = (tokenize-stream in)
until (eql :eof token)
collect token))))))</lang>
Examples
> (evaluate "1 - 5 * 2 / 20 + 1")
3/2
> (evaluate "(1 - 5) * 2 / (20 + 1)")
-8/21
> (evaluate "2 * (3 + ((5) / (7 - 11)))")
7/2
> (evaluate "(2 + 3) / (10 - 5)")
1
Examples of error handling
> (evaluate "(3 * 2) a - (1 + 2) / 4")
Error: Unexpected character a.
1 (continue) Skip it.
2 (abort) Return to level 0.
3 Return to top loop level 0.
Type :b for backtrace, :c <option number> to proceed, or :? for other options
: 1 > :c 1
21/4
> (evaluate "(3 * 2) - (1 + 2) / (4")
Error: Expected right parenthesis.
1 (continue) Insert it.
2 (abort) Return to level 0.
3 Return to top loop level 0.
Type :b for backtrace, :c <option number> to proceed, or :? for other options
: 1 > :c 1
21/4
D
Following the previous number-operator dual stacks approach, an AST is built while previous version is evaluating the expression value. After the AST tree is constructed, a visitor pattern is used to display the AST structure and calculate the value.
<lang d>//module evaluate ;
import std.stdio, std.string, std.ctype, std.conv ;
// simple stack template
void push(T)(inout T[] stk, T top) { stk ~= top ; }
T pop(T)(inout T[] stk, bool discard = true) {
T top ;
if (stk.length == 0) throw new Exception("Stack Empty") ;
top = stk[$-1] ;
if (discard) stk.length = stk.length - 1 ;
return top ;
}
alias int Type ;
enum { Num, OBkt, CBkt, Add, Sub, Mul, Div } ; // Type
string[] opChar = ["#","(",")","+","-","*","/"] ;
int[] opPrec = [0,-9,-9,1,1,2,2] ;
abstract class Visitor { void visit(XP e) ; }
class XP {
Type type ;
string str ;
int pos ; // optional, for dispalying AST struct.
XP LHS, RHS = null ;
this(string s = ")", int p = -1) {
str = s ; pos = p ;
type = Num ;
for(Type t = Div ; t > Num ; t--)
if(opChar[t] == s) type = t ;
}
int opCmp(XP rhs) { return opPrec[type] - opPrec[rhs.type] ; }
void accept(Visitor v) { v.visit(this) ; } ;
}
class AST {
XP root ;
XP[] num, opr ;
string xpr, token ;
int xpHead, xpTail ;
void joinXP(XP x) { x.RHS = num.pop() ; x.LHS = num.pop() ; num.push(x) ; }
string nextToken() {
while (xpHead < xpr.length && xpr[xpHead] == ' ')
xpHead++ ; // skip spc
xpTail = xpHead ;
if(xpHead < xpr.length) {
token = xpr[xpTail..xpTail+1] ;
switch(token) {
case "(",")","+","-","*","/": // valid non-number
xpTail++ ;
return token ;
default: // should be number
if(isdigit(token[0])) {
while(xpTail < xpr.length && isdigit(xpr[xpTail]))
xpTail++ ;
return xpr[xpHead..xpTail] ;
} // else may be error
} // end switch
}
if(xpTail < xpr.length)
throw new Exception("Invalid Char <" ~ xpr[xpTail] ~ ">") ;
return null ;
} // end nextToken
AST parse(string s) {
bool expectingOP ;
xpr = s ;
try {
xpHead = xpTail = 0 ;
num = opr = null ;
root = null ;
opr.push(new XP) ; // CBkt, prevent evaluate null OP precidence
while((token = nextToken) !is null) {
XP tokenXP = new XP(token, xpHead) ;
if(expectingOP) { // process OP-alike XP
switch(token) {
case ")":
while(opr.pop(false).type != OBkt)
joinXP(opr.pop()) ;
opr.pop() ;
expectingOP = true ; break ;
case "+","-","*","/":
while (tokenXP <= opr.pop(false))
joinXP(opr.pop()) ;
opr.push(tokenXP) ;
expectingOP = false ; break ;
default:
throw new Exception("Expecting Operator or ), not <" ~ token ~ ">") ;
}
} else { // process Num-alike XP
switch(token) {
case "+","-","*","/",")":
throw new Exception("Expecting Number or (, not <" ~ token ~ ">") ;
case "(":
opr.push(tokenXP) ;
expectingOP = false ; break ;
default: // number
num.push(tokenXP) ;
expectingOP = true ;
}
}
xpHead = xpTail ;
} // end while
while (opr.length > 1) // join pending Op
joinXP(opr.pop()) ;
}catch(Exception e) {
writefln("%s\n%s\n%s^", e.msg, xpr, repeat(" ", xpHead)) ;
root = null ;
return this ;
}
if(num.length != 1) { // should be one XP left
writefln("Parse Error...") ;
root = null ;
} else
root = num.pop() ;
return this ;
} // end Parse
} // end class AST
// for display AST fancy struct
void ins(inout char[][] s, string v, int p, int l) {
while(s.length < l + 1) s.length = s.length + 1 ;
while(s[l].length < p + v.length + 1) s[l] ~= " " ;
s[l][p..p +v.length] = v ;
}
class calcVis : Visitor {
int result, level = 0 ;
string Result = null ;
char[][] Tree = null ;
static void opCall(AST a) {
if (a && a.root) {
calcVis c = new calcVis ;
a.root.accept(c) ;
for(int i = 1; i < c.Tree.length ; i++) { // more fancy
bool flipflop = false ; char mk = '.' ;
for(int j = 0 ; j < c.Tree[i].length ; j++) {
while(j >= c.Tree[i-1].length) c.Tree[i-1] ~= " " ;
char c1 = c.Tree[i][j] ; char c2 = c.Tree[i-1][j] ;
if(flipflop && (c1 == ' ') && c2 == ' ')
c.Tree[i-1][j] = mk ;
if(c1 != mk && c1 != ' ' && (j == 0 || !isdigit(c.Tree[i][j-1])))
flipflop = !flipflop ;
}
}
foreach(t; c.Tree) writefln(t) ;
writefln("%s ==>\n%s = %s", a.xpr,c.Result,c.result) ;
} else
writefln("Evalute invalid or null Expression") ;
}
void visit(XP xp) {// calc. the value, display AST struct and eval order.
ins(Tree, xp.str, xp.pos, level) ;
level++ ;
if (xp.type == Num) {
Result ~= xp.str ;
result = toInt(xp.str) ;
} else {
Result ~= "(" ;
xp.LHS.accept(this) ;
int lhs = result ;
Result ~= opChar[xp.type] ;
xp.RHS.accept(this) ;
Result ~= ")" ;
switch(xp.type) {
case Add: result = lhs + result ; break ;
case Sub: result = lhs - result ; break ;
case Mul: result = lhs * result ; break ;
case Div: result = lhs / result ; break ;
default: throw new Exception("Invalid type") ;
}
} //
level-- ;
}
}
void main(string[] args) {
string expression = args.length > 1 ? join(args[1..$]," ") :
"1 + 2*(3 - 2*(3 - 2)*((2 - 4)*5 - 22/(7 + 2*(3 - 1)) - 1)) + 1" ; // should be 60
calcVis((new AST).parse(expression)) ;
}</lang>
E
While the task requirements specify not evaluating using the language's built-in eval, they don't say that you have to write your own parser...
<lang e>def eParser := <elang:syntax.makeEParser>
def LiteralExpr := <elang:evm.makeLiteralExpr>.asType()
def arithEvaluate(expr :String) {
def ast := eParser(expr)
def evalAST(ast) {
return switch (ast) {
match e`@a + @b` { evalAST(a) + evalAST(b) }
match e`@a - @b` { evalAST(a) - evalAST(b) }
match e`@a * @b` { evalAST(a) * evalAST(b) }
match e`@a / @b` { evalAST(a) / evalAST(b) }
match e`-@a` { -(evalAST(a)) }
match l :LiteralExpr { l.getValue() }
}
}
return evalAST(ast)
}</lang>
Parentheses are handled by the parser.
<lang e>? arithEvaluate("1 + 2")
- value: 3
? arithEvaluate("(1 + 2) * 10 / 100")
- value: 0.3
? arithEvaluate("(1 + 2 / 2) * (5 + 5)")
- value: 20.0</lang>
Haskell
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
data Exp = Num Int
| Add Exp Exp
| Sub Exp Exp
| Mul Exp Exp
| Div Exp Exp
expr = buildExpressionParser table factor
table = [[op "*" (Mul) AssocLeft, op "/" (Div) AssocLeft]
,[op "+" (Add) AssocLeft, op "-" (Sub) AssocLeft]]
where op s f assoc = Infix (do string s; return f) assoc
factor = do char '(' ; x <- expr ; char ')'
return x
<|> do ds <- many1 digit
return $ Num (read ds)
evaluate (Num x) = fromIntegral x
evaluate (Add a b) = (evaluate a) + (evaluate b)
evaluate (Sub a b) = (evaluate a) - (evaluate b)
evaluate (Mul a b) = (evaluate a) * (evaluate b)
evaluate (Div a b) = (evaluate a) `div` (evaluate b)
solution exp = case parse expr [] exp of
Right expr -> evaluate expr
Left _ -> error "Did not parse"
J
<lang j>parse=:parse_parser_
eval=:monad define
'gerund structure'=:y
gerund@.structure
)
coclass 'parser'
classify=: '$()*/+-'&(((>:@#@[ # 2:) #: 2 ^ i.)&;:)
rules=:
patterns=: ,"0 assert 1
addrule=: dyad define
rules=: rules,;:x
patterns=: patterns,+./@classify"1 y
)
'Term' addrule '$()', '0', '+-',: '0'
'Factor' addrule '$()+-', '0', '*/',: '0'
'Parens' addrule '(', '*/+-0', ')',: ')*/+-0$'
rules=: rules,;:'Move'
buildTree=: monad define
words=: ;:'$',y
queue=: classify '$',y
stack=: classify '$$$$'
tokens=: ]&.>i.#words
tree=:
while.(#queue)+.6<#stack do.
rule=: rules {~ i.&1 patterns (*./"1)@:(+./"1) .(*."1)4{.stack
rule`:6
end.
'syntax' assert 1 0 1 1 1 1 -: {:"1 stack
gerund=: literal&.> (<,'%') (I. words=<,'/')} words
gerund;1{tree
)
literal=:monad define ::]
".'t=.',y
5!:1<'t'
)
Term=: Factor=: monad define
stack=: ({.stack),(classify '0'),4}.stack
tree=: ({.tree),(<1 2 3{tree),4}.tree
)
Parens=: monad define
stack=: (1{stack),3}.stack
tree=: (1{tree),3}.tree
)
Move=: monad define
'syntax' assert 0<#queue
stack=: ({:queue),stack
queue=: }:queue
tree=: ({:tokens),tree
tokens=: }:tokens
)
parse=:monad define
tmp=: conew 'parser'
r=: buildTree__tmp y
coerase tmp
r
)</lang>
example use:
eval parse '1+2*3/(4-5+6)'
2.2
You can also display the syntax tree, for example:
parse '1+1'
Pascal
Perl
<lang perl>sub ev
- Evaluates an arithmetic expression like "(1+3)*7" and returns
- its value.
{my $exp = shift;
# Delete all meaningless characters. (Scientific notation,
# infinity, and not-a-number aren't supported.)
$exp =~ tr {0-9.+-/*()} {}cd;
return ev_ast(astize($exp));}
{my $balanced_paren_regex;
$balanced_paren_regex = qr
{\( ( [^()]+ | (??{$balanced_paren_regex}) )+ \)}x;
# ??{ ... } interpolates lazily (only when necessary),
# permitting recursion to arbitrary depths.
sub astize
# Constructs an abstract syntax tree by recursively
# transforming textual arithmetic expressions into array
# references of the form [operator, left oprand, right oprand].
{my $exp = shift;
# If $exp is just a number, return it as-is.
$exp =~ /[^0-9.]/ or return $exp;
# If parentheses surround the entire expression, get rid of
# them.
$exp = substr($exp, 1, length($exp) - 2)
while $exp =~ /\A($balanced_paren_regex)\z/;
# Replace stuff in parentheses with placeholders.
my @paren_contents;
$exp =~ s {($balanced_paren_regex)}
{push(@paren_contents, $1);
"[p$#paren_contents]"}eg;
# Scan for operators in order of increasing precedence,
# preferring the rightmost.
$exp =~ m{(.+) ([+-]) (.+)}x or
$exp =~ m{(.+) ([*/]) (.+)}x or
# The expression must've been malformed somehow.
# (Note that unary minus isn't supported.)
die "Eh?: [$exp]\n";
my ($op, $lo, $ro) = ($2, $1, $3);
# Restore the parenthetical expressions.
s {\[p(\d+)\]} {($paren_contents[$1])}eg
foreach $lo, $ro;
# And recurse.
return [$op, astize($lo), astize($ro)];}}
{my %ops =
('+' => sub {$_[0] + $_[1]},
'-' => sub {$_[0] - $_[1]},
'*' => sub {$_[0] * $_[1]},
'/' => sub {$_[0] / $_[1]});
sub ev_ast
# Evaluates an abstract syntax tree of the form returned by
# &astize.
{my $ast = shift;
# If $ast is just a number, return it as-is.
ref $ast or return $ast;
# Otherwise, recurse.
my ($op, @operands) = @$ast;
$_ = ev_ast($_) foreach @operands;
return $ops{$op}->(@operands);}}</lang>
Pop11
/* Scanner routines */
/* Uncomment the following to parse data from standard input
vars itemrep;
incharitem(charin) -> itemrep;
*/
;;; Current symbol
vars sym;
define get_sym();
itemrep() -> sym;
enddefine;
define expect(x);
lvars x;
if x /= sym then
printf(x, 'Error, expected %p\n');
mishap(sym, 1, 'Example parser error');
endif;
get_sym();
enddefine;
lconstant res_list = [( ) + * ];
lconstant reserved = newproperty(
maplist(res_list, procedure(x); [^x ^(true)]; endprocedure),
20, false, "perm");
/*
Parser for arithmetic expressions
*/
/*
expr: term
| expr "+" term
| expr "-" term
;
*/
define do_expr() -> result;
lvars result = do_term(), op;
while sym = "+" or sym = "-" do
sym -> op;
get_sym();
[^op ^result ^(do_term())] -> result;
endwhile;
enddefine;
/*
term: factor
| term "*" factor
| term "/" factor
;
*/
define do_term() -> result;
lvars result = do_factor(), op;
while sym = "*" or sym = "/" do
sym -> op;
get_sym();
[^op ^result ^(do_factor())] -> result;
endwhile;
enddefine;
/*
factor: word
| constant
| "(" expr ")"
;
*/
define do_factor() -> result;
if sym = "(" then
get_sym();
do_expr() -> result;
expect(")");
elseif isinteger(sym) or isbiginteger(sym) then
sym -> result;
get_sym();
else
if reserved(sym) then
printf(sym, 'unexpected symbol %p\n');
mishap(sym, 1, 'Example parser syntax error');
endif;
sym -> result;
get_sym();
endif;
enddefine;
/* Expression evaluator, returns false on error (currently only
division by 0 */
define arith_eval(expr);
lvars op, arg1, arg2;
if not(expr) then
return(expr);
endif;
if isinteger(expr) or isbiginteger(expr) then
return(expr);
endif;
expr(1) -> op;
arith_eval(expr(2)) -> arg1;
arith_eval(expr(3)) -> arg2;
if not(arg1) or not(arg2) then
return(false);
endif;
if op = "+" then
return(arg1 + arg2);
elseif op = "-" then
return(arg1 - arg2);
elseif op = "*" then
return(arg1 * arg2);
elseif op = "/" then
if arg2 = 0 then
return(false);
else
return(arg1 div arg2);
endif;
else
printf('Internal error\n');
return(false);
endif;
enddefine;
/* Given list, create item repeater. Input list is stored in a
closure are traversed when new item is requested. */
define listitemrep(lst);
procedure();
lvars item;
if lst = [] then
termin;
else
front(lst) -> item;
back(lst) -> lst;
item;
endif;
endprocedure;
enddefine;
/* Initialise scanner */
listitemrep([(3 + 50) * 7 - 100 / 10]) -> itemrep;
get_sym();
;;; Test it
arith_eval(do_expr()) =>
Prolog
<lang prolog> % Lexer
numeric(X) :- 48 =< X, X =< 57.
not_numeric(X) :- 48 > X ; X > 57.
lex1([], []).
lex1([40|Xs], ['('|Ys]) :- lex1(Xs, Ys).
lex1([41|Xs], [')'|Ys]) :- lex1(Xs, Ys).
lex1([43|Xs], ['+'|Ys]) :- lex1(Xs, Ys).
lex1([45|Xs], ['-'|Ys]) :- lex1(Xs, Ys).
lex1([42|Xs], ['*'|Ys]) :- lex1(Xs, Ys).
lex1([47|Xs], ['/'|Ys]) :- lex1(Xs, Ys).
lex1([X|Xs], [N|Ys]) :- numeric(X), N is X - 48, lex1(Xs, Ys).
lex2([], []).
lex2([X], [X]).
lex2([Xa,Xb|Xs], [Xa|Ys]) :- atom(Xa), lex2([Xb|Xs], Ys).
lex2([Xa,Xb|Xs], [Xa|Ys]) :- number(Xa), atom(Xb), lex2([Xb|Xs], Ys).
lex2([Xa,Xb|Xs], [Y|Ys]) :- number(Xa), number(Xb), N is Xa * 10 + Xb, lex2([N|Xs], [Y|Ys]).
% Parser
oper(1, *, X, Y, X * Y). oper(1, /, X, Y, X / Y).
oper(2, +, X, Y, X + Y). oper(2, -, X, Y, X - Y).
num(D) --> [D], {number(D)}.
expr(0, Z) --> num(Z).
expr(0, Z) --> {Z = (X)}, ['('], expr(2, X), [')'].
expr(N, Z) --> {succ(N0, N)}, {oper(N, Op, X, Y, Z)}, expr(N0, X), [Op], expr(N, Y).
expr(N, Z) --> {succ(N0, N)}, expr(N0, Z).
parse(Tokens, Expr) :- expr(2, Expr, Tokens, []).
% Evaluator
evaluate(E, E) :- number(E).
evaluate(A + B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae + Be.
evaluate(A - B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae - Be.
evaluate(A * B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae * Be.
evaluate(A / B, E) :- evaluate(A, Ae), evaluate(B, Be), E is Ae / Be.
% Solution
calculator(String, Value) :-
lex1(String, Tokens1),
lex2(Tokens1, Tokens2),
parse(Tokens2, Expression),
evaluate(Expression, Value).
% Example use
% calculator("(3+50)*7-9", X).</lang>
Python
There are python modules, such as Ply, which facilitate the implementation of parsers. This example, however, uses only standard Python with the parser having
two stacks, one for operators, one for operands. A subsequent example uses Python ast module to generate the AST.
<lang python>import operator
class AstNode(object):
def __init__( self, opr, left, right ):
self.opr = opr
self.l = left
self.r = right
def eval(self):
return self.opr(self.l.eval(), self.r.eval())
class LeafNode(object):
def __init__( self, valStrg ):
self.v = int(valStrg)
def eval(self):
return self.v
class Yaccer(object):
def __init__(self):
self.operstak = []
self.nodestak =[]
self.__dict__.update(self.state1)
def v1( self, valStrg ):
# Value String
self.nodestak.append( LeafNode(valStrg))
self.__dict__.update(self.state2)
#print 'push', valStrg
def o2( self, operchar ):
# Operator character or open paren in state1
def openParen(a,b):
return 0 # function should not be called
opDict= { '+': ( operator.add, 2, 2 ),
'-': (operator.sub, 2, 2 ),
'*': (operator.mul, 3, 3 ),
'/': (operator.div, 3, 3 ),
'^': ( pow, 4, 5 ), # right associative exponentiation for grins
'(': ( openParen, 0, 8 )
}
operPrecidence = opDict[operchar][2]
self.redeuce(operPrecidence)
self.operstak.append(opDict[operchar])
self.__dict__.update(self.state1)
# print 'pushop', operchar
def syntaxErr(self, char ):
# Open Parenthesis
print 'parse error - near operator "%s"' %char
def pc2( self,operchar ):
# Close Parenthesis
# reduce node until matching open paren found
self.redeuce( 1 )
if len(self.operstak)>0:
self.operstak.pop() # pop off open parenthesis
else:
print 'Error - no open parenthesis matches close parens.'
self.__dict__.update(self.state2)
def end(self):
self.redeuce(0)
return self.nodestak.pop()
def redeuce(self, precidence):
while len(self.operstak)>0:
tailOper = self.operstak[len(self.operstak)-1]
if tailOper[1] < precidence: break
tailOper = self.operstak.pop()
vrgt = self.nodestak.pop()
vlft= self.nodestak.pop()
self.nodestak.append( AstNode(tailOper[0], vlft, vrgt))
# print 'reduce'
state1 = { 'v': v1, 'o':syntaxErr, 'po':o2, 'pc':syntaxErr }
state2 = { 'v': syntaxErr, 'o':o2, 'po':syntaxErr, 'pc':pc2 }
def Lex( exprssn, p ):
bgn = None
cp = -1
for c in exprssn:
cp += 1
if c in '+-/*^()': # throw in exponentiation (^)for grins
if bgn is not None:
p.v(p, exprssn[bgn:cp])
bgn = None
if c=='(': p.po(p, c)
elif c==')':p.pc(p, c)
else: p.o(p, c)
elif c in ' \t':
if bgn is not None:
p.v(p, exprssn[bgn:cp])
bgn = None
elif c in '0123456789':
if bgn is None:
bgn = cp
else:
print 'Invalid character in expression'
if bgn is not None:
p.v(p, exprssn[bgn:cp])
bgn = None
if bgn is not None:
p.v(p, exprssn[bgn:cp+1])
bgn = None
return p.end()
expr = raw_input("Expression:")
astTree = Lex( expr, Yaccer())
print expr, '=',astTree.eval()</lang>
ast standard library module
Python comes with its own ast module as part of its standard libraries. The module compiles Python source into an AST tree that can in turn be compiled to bytecode then executed.
<lang python>>>> import ast
>>>
>>> expr="2 * (3 -1) + 2 * 5"
>>> node = ast.parse(expr, mode='eval')
>>> ast.dump(node)
'Expression(body=BinOp(left=BinOp(left=Num(n=2), op=Mult(), right=BinOp(left=Num(n=3), op=Sub(), right=Num(n=1))), op=Add(), right=BinOp(left=Num(n=2), op=Mult(), right=Num(n=5))))'
>>> code_object = compile(node, filename='<string>', mode='eval')
>>> eval(code_object)
14
>>> # lets modify the AST by changing the 5 to a 6
>>> node.body.right.right.n
5
>>> node.body.right.right.n = 6
>>> code_object = compile(node, filename='<string>', mode='eval')
>>> eval(code_object)
16</lang>
Tcl
The code below delivers the AST for an expression in a form that it can be immediately eval-led, using Tcl's prefix operators.
<lang Tcl>namespace import tcl::mathop::*
proc ast str {
# produce abstract syntax tree for an expression
regsub -all {[-+*/()]} $str { & } str ;# "tokenizer"
s $str
}
proc s {args} {
# parse "(a + b) * c + d" to "+ [* [+ a b] c] d"
if {[llength $args] == 1} {set args [lindex $args 0]}
if [regexp {[()]} $args] {
eval s [string map {( "\[s " ) \]} $args]
} elseif {"*" in $args} {
s [s_group $args *]
} elseif {"/" in $args} {
s [s_group $args /]
} elseif {"+" in $args} {
s [s_group $args +]
} elseif {"-" in $args} {
s [s_group $args -]
} else {
string map {\{ \[ \} \]} [join $args]
}
}
proc s_group {list op} {
# turn ".. a op b .." to ".. {op a b} .."
set pos [lsearch -exact $list $op]
set p_1 [- $pos 1]
set p1 [+ $pos 1]
lreplace $list $p_1 $p1 \
[list $op [lindex $list $p_1] [lindex $list $p1]]
}
- -- Test suite
foreach test [split {
ast 2-2
ast 1-2-3
ast (1-2)-3
ast 1-(2-3)
ast (1+2)*3
ast (1+2)/3-4*5
ast ((1+2)/3-4)*5
} \n] {
puts "$test ..... [eval $test] ..... [eval [eval $test]]"
}</lang>
Output:
ast 2-2 ..... - 2 2 ..... 0
ast 1-2-3 ..... - [- 1 2] 3 ..... -4
ast (1-2)-3 ..... - [- 1 2] 3 ..... -4
ast 1-(2-3) ..... - 1 [- 2 3] ..... 2
ast (1+2)*3 ..... * [+ 1 2] 3 ..... 9
ast (1+2)/3-4*5 ..... - [/ [+ 1 2] 3] [* 4 5] ..... -19
ast ((1+2)/3-4)*5 ..... * [- [/ [+ 1 2] 3] 4] 5 ..... -15
Ursala
with no error checking other than removal of spaces
<lang Ursala>
- import std
- import nat
- import flo
lex = ~=' '*~F+ rlc both -=digits # separate into tokens
parse = # build a tree
--<';'>; @iNX ~&l->rh ^/~< cases~&lhh\~&lhPNVrC {
'*/': ^|C/~&hNV associate '*/',
'+-': ^|C/~&hNV associate '*/+-',
');': @r ~&htitBPC+ associate '*/+-'}
associate "ops" = ~&tihdh2B-="ops"-> ~&thd2tth2hNCCVttt2C
traverse = *^ ~&v?\%ep ^H\~&vhthPX '+-*/'-$<plus,minus,times,div>@dh
evaluate = traverse+ parse+ lex</lang>
test program:
<lang Ursala>
- cast %eL
test = evaluate*t
-[
1+1
4/5
2-1
3*7
3+4+5
9-2-4
7/3/2
4+2*3
5*2-1
5-3*2
(1+1)*(2+3)
(2-4)/(3+5*(8-1))]-</lang>
output:
<
2.000000e+00,
8.000000e-01,
1.000000e+00,
2.100000e+01,
1.200000e+01,
3.000000e+00,
1.166667e+00,
1.000000e+01,
9.000000e+00,
-1.000000e+00,
1.000000e+01,
-5.263158e-02>