Recursive descent parser generator

Write a recursive descent parser generator that takes a description of a grammar as input and outputs the source code for a parser in the same language as the generator. (So a generator written in C++ would output C++ source code for the parser.) You can assume that all of the rules have been preprocessed into a form suitable for the construction of a recursive descent parser. Check the following links for more details.

Use the parser generator and a grammar file to build a parser that takes an arithmetic expression and turns it in to three address code. The resulting parser should take this (or something similar) as input:

(one + two) * three - four * five

And generate this (or something similar) as output:

_0001 = one + two
_0002 = _0001 * three
_0003 = four * five
_0004 = _0002 - _0003

C++

Works with: C++11

This program translates an annotated LL(1) grammar into a C++ lexer plus parser. Each rule is required to return a string of some kind and the return values of the already matched nonterminals (and matched text of terminals) can be accessed with $1, $2, etc. which are replaced by the appropriate string variable.

It can't handle newlines as part of the grammar, the error checking is fairly limited and the error reporting is basically non-existent, but the parser it generates (not shown below) is human readable.

#include <fstream>
#include <iostream>
#include <map>
#include <regex>
#include <set>
#include <sstream>
#include <string>
using namespace std;

map<string, string> terminals;
map<string, vector<vector<string>>> nonterminalRules;
map<string, set<string>> nonterminalFirst;
map<string, vector<string>> nonterminalCode;

int main(int argc, char **argv) {
  if (argc < 3) {
    cout << "Usage: <input file> <output file>" << endl;
    return 1;
  }

  ifstream inFile(argv[1]);
  ofstream outFile(argv[2]);

  regex blankLine(R"(^\s*$)");
  regex terminalPattern(R"((\w+)\s+(.+))");
  regex rulePattern(R"(^!!\s*(\w+)\s*->\s*((?:\w+\s*)*)$)");
  regex argPattern(R"(\$(\d+))");
  smatch results;

  // Read terminal patterns
  string line;
  while (true) {
    getline(inFile, line);

    // Terminals section ends with a blank line
    if (regex_match(line, blankLine))
      break;

    regex_match(line, results, terminalPattern);
    terminals[results[1]] = results[2];
  }

  outFile << "#include <iostream>" << endl
          << "#include <fstream>" << endl
          << "#include <string>" << endl
          << "#include <regex>" << endl
          << "using namespace std;" << endl
          << endl;

  // Generate the token processing functions
  outFile << "string input, nextToken, nextTokenValue;" << endl
          << "string prevToken, prevTokenValue;" << endl
          << endl
          << "void advanceToken() {" << endl
          << "	static smatch results;" << endl
          << endl
          << "	prevToken = nextToken;" << endl
          << "	prevTokenValue = nextTokenValue;" << endl
          << endl;

  for (auto i = terminals.begin(); i != terminals.end(); ++i) {
    string name = i->first + "_pattern";
    string pattern = i->second;

    outFile << "	static regex " << name << "(R\"(^\\s*(" << pattern << "))\");" << endl
            << "	if (regex_search(input, results, " << name << ", regex_constants::match_continuous)) {" << endl
            << "		nextToken = \"" << i->first << "\";" << endl
            << "		nextTokenValue = results[1];" << endl
            << "		input = regex_replace(input, " << name << ", \"\");" << endl
            << "		return;" << endl
            << "	}" << endl
            << endl;
  }

  outFile << "	static regex eof(R\"(\\s*)\");" << endl
          << "	if (regex_match(input, results, eof, regex_constants::match_continuous)) {" << endl
          << "		nextToken = \"\";" << endl
          << "		nextTokenValue = \"\";" << endl
          << "		return;" << endl
          << "	}" << endl
          << endl
          << "	throw \"Unknown token\";" << endl
          << "}" << endl
          << endl
          << "bool same(string symbol) {" << endl
          << "	if (symbol == nextToken) {" << endl
          << "		advanceToken();" << endl
          << "		return true;" << endl
          << "	}" << endl
          << "	return false;" << endl
          << "}" << endl
          << endl;

  // Copy the header code to the output
  while (true) {
    getline(inFile, line);

    // Copy lines until we reach the first rule
    if (regex_match(line, results, rulePattern))
      break;

    outFile << line << endl;
  }

  // Build the nonterminal table
  while (true) {
    // results already contains the last matched rule
    string name = results[1];
    stringstream ss(results[2]);

    string tempString;
    vector<string> tempVector;
    while (ss >> tempString)
      tempVector.push_back(tempString);
    nonterminalRules[name].push_back(tempVector);

    // Read code until another rule is found
    string code = "";
    while (true) {
      getline(inFile, line);

      if (!inFile || regex_match(line, results, rulePattern))
        break;

      // Replace $1 with results[1], etc.
      line = regex_replace(line, argPattern, "results[$1]");

      code += line + "\n";
    }
    nonterminalCode[name].push_back(code);

    // Stop when we reach the end of the file
    if (!inFile)
      break;
  }

  // Generate the first sets, inefficiently
  bool done = false;
  while (!done)
    for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
      string name = i->first;
      done = true;

      if (nonterminalFirst.find(i->first) == nonterminalFirst.end())
        nonterminalFirst[i->first] = set<string>();

      for (int j = 0; j < i->second.size(); ++j) {
        if (i->second[j].size() == 0)
          nonterminalFirst[i->first].insert("");
        else {
          string first = i->second[j][0];
          if (nonterminalFirst.find(first) != nonterminalFirst.end()) {
            for (auto k = nonterminalFirst[first].begin(); k != nonterminalFirst[first].end(); ++k) {
              if (nonterminalFirst[name].find(*k) == nonterminalFirst[name].end()) {
                nonterminalFirst[name].insert(*k);
                done = false;
              }
            }
          } else if (nonterminalFirst[name].find(first) == nonterminalFirst[name].end()) {
            nonterminalFirst[name].insert(first);
            done = false;
          }
        }
      }
    }

  // Generate function signatures for the nonterminals
  for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
    string name = i->first + "_rule";
    outFile << "string " << name << "();" << endl;
  }
  outFile << endl;

  // Generate the nonterminal functions
  for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
    string name = i->first + "_rule";
    outFile << "string " << name << "() {" << endl
            << "	vector<string> results;" << endl
            << "	results.push_back(\"\");" << endl
            << endl;

    // Check if this rule can match an empty string
    int epsilon = -1;
    for (int j = 0; epsilon == -1 && j < i->second.size(); ++j)
      if (i->second[j].size() == 0)
        epsilon = j;

    // Generate each production
    for (int j = 0; j < i->second.size(); ++j) {
      // Nothing to generate for an empty rule
      if (j == epsilon)
        continue;

      string token = i->second[j][0];
      if (terminals.find(token) != terminals.end())
        outFile << "	if (nextToken == \"" << i->second[j][0] << "\") {" << endl;
      else {
        outFile << "	if (";
        bool first = true;
        for (auto k = nonterminalFirst[token].begin(); k != nonterminalFirst[token].end(); ++k, first = false) {
          if (!first)
            outFile << " || ";
          outFile << "nextToken == \"" << (*k) << "\"";
        }
        outFile << ") {" << endl;
      }

      for (int k = 0; k < i->second[j].size(); ++k) {
        if (terminals.find(i->second[j][k]) != terminals.end()) {
          outFile << "		if(same(\"" << i->second[j][k] << "\"))" << endl
                  << "			results.push_back(prevTokenValue);" << endl
                  << "		else" << endl
                  << "			throw \"Syntax error - mismatched token\";" << endl;
        } else
          outFile << "		results.push_back(" << i->second[j][k] << "_rule());" << endl;
      }

      // Copy rule code to output
      outFile << nonterminalCode[i->first][j];

      outFile << "	}" << endl << endl;
    }

    if (epsilon == -1)
      outFile << "	throw \"Syntax error - unmatched token\";" << endl;
    else
      outFile << nonterminalCode[i->first][epsilon];

    outFile << "}" << endl << endl;
  }

  // Generate the main function
  outFile << "int main(int argc, char **argv) {" << endl
          << "	if(argc < 2) {" << endl
          << "		cout << \"Usage: <input file>\" << endl;" << endl
          << "		return 1;" << endl
          << "	}" << endl
          << endl
          << "	ifstream file(argv[1]);" << endl
          << "	string line;" << endl
          << "	input = \"\";" << endl
          << endl
          << "	while(true) {" << endl
          << "		getline(file, line);" << endl
          << "		if(!file) break;" << endl
          << "		input += line + \"\\n\";" << endl
          << "	}" << endl
          << endl
          << "	advanceToken();" << endl
          << endl
          << "	start_rule();" << endl
          << "}" << endl;
}

Example grammar:

plus	\+
minus	-
times	\*
div	/
open	\(
close	\)
num	[0-9]+
var	[a-z]+

string nextLabel() {
	static string label = "0000";
	for(int i = label.length() - 1; i >= 0; --i) {
		if(label[i] == '9')
			label[i] = '0';
		else {
			++label[i];
			break;
		}
	}
	return "_" + label;
}

!! start -> expr start2
if($2 == "")
	return $1;
else {
	string label = nextLabel();
	cout << label << " = " << $1 << " " << $2 << endl;
	return label;
}

!! start2 -> plus start
return "+ " + $2;

!! start2 -> minus start
return "- " + $2;

!! start2 -> 
return "";

!! expr -> term expr2
if($2 == "")
	return $1;
else {
	string label = nextLabel();
	cout << label << " = " << $1 << " " << $2 << endl;
	return label;
}

!! expr2 -> times expr
return "* " + $2;

!! expr2 -> div expr
return "/ " + $2;

!! expr2 ->
return "";

!! term -> var
return $1;

!! term -> num
return $1;

!! term -> open start close
return $2;

Example input to parser (filename passed through command line):

(one + two) * three + four * five

Output (to standard out):

_0001 = one + two
_0002 = _0001 * three
_0003 = four * five
_0004 = _0002 + _0003

Go

The standard library already contains a recursive descent parser for Go programs or expressions, written in Go itself, whose output is an abstract syntax tree (AST) representing such code.

I've therefore applied this parser to the expression designated by the task, which only involves binary expressions and identifiers, and written a separate routine to convert the resulting AST into three-address code.

package main

import (
    "fmt"
    "go/ast"
    "go/parser"
    "log"
)

func labelStr(label int) string {
    return fmt.Sprintf("_%04d", label)
}

type binexp struct {
    op, left, right string
    kind, index     int
}

func main() {
    x := "(one + two) * three - four * five"
    fmt.Println("Expression to parse: ", x)
    f, err := parser.ParseExpr(x)
    if err != nil {
        log.Fatal(err)
    }

    fmt.Println("\nThe abstract syntax tree for this expression:")
    ast.Print(nil, f)

    fmt.Println("\nThe corresponding three-address code:")
    var binexps []binexp
    // Inspect nodes in depth-first order.
    ast.Inspect(f, func(n ast.Node) bool {
        switch x := n.(type) {
        case *ast.BinaryExpr:
            sx, ok1 := x.X.(*ast.Ident)
            sy, ok2 := x.Y.(*ast.Ident)
            op := x.Op.String()
            if ok1 && ok2 {
                binexps = append(binexps, binexp{op, sx.Name, sy.Name, 3, 0})
            } else if !ok1 && ok2 {
                binexps = append(binexps, binexp{op, "<addr>", sy.Name, 2, 0})
            } else if ok1 && !ok2 {
                binexps = append(binexps, binexp{op, sx.Name, "<addr>", 1, 0})
            } else {
                binexps = append(binexps, binexp{op, "<addr>", "<addr>", 0, 0})
            }
        }
        return true
    })

    for i := 0; i < len(binexps); i++ {
        binexps[i].index = i
    }

    label, last := 0, -1
    var ops, args []binexp
    var labels []string
    for i, be := range binexps {
        if be.kind == 0 {
            ops = append(ops, be)
        }
        if be.kind != 3 {
            continue
        }
        label++
        ls := labelStr(label)
        fmt.Printf("    %s = %s %s %s\n", ls, be.left, be.op, be.right)
        for j := i - 1; j > last; j-- {
            be2 := binexps[j]
            if be2.kind == 2 {
                label++
                ls2 := labelStr(label)
                fmt.Printf("    %s = %s %s %s\n", ls2, ls, be2.op, be2.right)
                ls = ls2
                be = be2
            } else if be2.kind == 1 {
                label++
                ls2 := labelStr(label)
                fmt.Printf("    %s = %s %s %s\n", ls2, be2.left, be2.op, ls)
                ls = ls2
                be = be2
            }
        }
        args = append(args, be)
        labels = append(labels, ls)
        lea, leo := len(args), len(ops)
        for lea >= 2 {
            if i < len(binexps)-1 && args[lea-2].index <= ops[leo-1].index {
                break
            }
            label++
            ls2 := labelStr(label)
            fmt.Printf("    %s = %s %s %s\n", ls2, labels[lea-2], ops[leo-1].op, labels[lea-1])
            ops = ops[0 : leo-1]
            args = args[0 : lea-1]
            labels = labels[0 : lea-1]
            lea--
            leo--
            args[lea-1] = be
            labels[lea-1] = ls2
        }
        last = i
    }
}

Output:

Expression to parse:  (one + two) * three - four * five

The abstract syntax tree for this expression:
     0  *ast.BinaryExpr {
     1  .  X: *ast.BinaryExpr {
     2  .  .  X: *ast.ParenExpr {
     3  .  .  .  Lparen: 1
     4  .  .  .  X: *ast.BinaryExpr {
     5  .  .  .  .  X: *ast.Ident {
     6  .  .  .  .  .  NamePos: 2
     7  .  .  .  .  .  Name: "one"
     8  .  .  .  .  .  Obj: *ast.Object {
     9  .  .  .  .  .  .  Kind: bad
    10  .  .  .  .  .  .  Name: ""
    11  .  .  .  .  .  }
    12  .  .  .  .  }
    13  .  .  .  .  OpPos: 6
    14  .  .  .  .  Op: +
    15  .  .  .  .  Y: *ast.Ident {
    16  .  .  .  .  .  NamePos: 8
    17  .  .  .  .  .  Name: "two"
    18  .  .  .  .  .  Obj: *(obj @ 8)
    19  .  .  .  .  }
    20  .  .  .  }
    21  .  .  .  Rparen: 11
    22  .  .  }
    23  .  .  OpPos: 13
    24  .  .  Op: *
    25  .  .  Y: *ast.Ident {
    26  .  .  .  NamePos: 15
    27  .  .  .  Name: "three"
    28  .  .  .  Obj: *(obj @ 8)
    29  .  .  }
    30  .  }
    31  .  OpPos: 21
    32  .  Op: -
    33  .  Y: *ast.BinaryExpr {
    34  .  .  X: *ast.Ident {
    35  .  .  .  NamePos: 23
    36  .  .  .  Name: "four"
    37  .  .  .  Obj: *(obj @ 8)
    38  .  .  }
    39  .  .  OpPos: 28
    40  .  .  Op: *
    41  .  .  Y: *ast.Ident {
    42  .  .  .  NamePos: 30
    43  .  .  .  Name: "five"
    44  .  .  .  Obj: *(obj @ 8)
    45  .  .  }
    46  .  }
    47  }

The corresponding three-address code:
    _0001 = one + two
    _0002 = _0001 * three
    _0003 = four * five
    _0004 = _0002 - _0003

J

J's native recursive descent parser is adequate for this task, if we map names appropriately.

Implementation:

cocurrent 'base'

inlocale=: 4 :0 L:0
  x,'_',y,'_'
)

parse=: 3 :0
  sentence=. ;:y
  opinds=. (;:'+*-')i.sentence
  opfuns=. (;:'plus times minus') inlocale 'base'
  scratch=. cocreate''
  coinsert__scratch 'base'
  names=. ~.sentence#~_1<:nc sentence
  (names inlocale scratch)=: names
  r=. do__scratch ;:inv opinds}((#sentence)#"0 opfuns),sentence
  codestroy__scratch''
  r
)

term=: 1 :0
  2 :('m''',m,'''expr n')
)

expr=:1 :0
:
  r=. genname''
  emit r,'=:',x,m,y
  r
)

plus=: '+' expr
times=: '*' term
minus=: '-' expr

N=: 10000
genname=: 3 :0
  'z',}.":N=: N+1
)

emit=: smoutput

Task example:

   parse '(one + two) * three - four * five'
z0001=:four*five
z0002=:one+two
z0003=:z0002*three
z0004=:z0003-z0001
z0004

See also https://github.com/jsoftware/general_misc/blob/master/trace.ijs for a model of the underlying parser being employed here.

Julia

The Julia compiler's own parser is a recursive descent parser, and can be used directly here.

const one, two, three, four, five, six, seven, eight, nine = collect(1:9)

function testparser(s)
    cod = Meta.parse(s)
    println(Meta.lower(Main, cod))
end

testparser("(one + two) * three - four * five")

Output:

$(Expr(:thunk, CodeInfo(
    @ none within `top-level scope'
1 ─ %1 = one + two
│   %2 = %1 * three
│   %3 = four * five
│   %4 = %2 - %3
└──      return %4
)))

Perl

#!/usr/bin/perl

use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings;
use Path::Tiny;

my $h = qr/\G\s*/;
my $identifier = qr/$h([a-z]\w*)\b/i;
my $literal = qr/$h(['"])(.+?)\1/s;
my (%rules, %called, $usercode, %patches);
my $filename = './generatedparser.pl';

sub node { bless [ @_[1..$#_] ], $_[0] }
sub error { die "ERROR: ", s/\G\s*\K/<**ERROR @_**>/r, "\n" }
sub want { /$h\Q$_[1]\E/gc ? shift : error "missing '$_[1]' " }
sub addin { node $_[0] => ref $_[1] eq $_[0] ? @{$_[1]} : $_[1], pop }

local $_ = do { local $/; @ARGV ? <> : <DATA> }; # the EBNF
$usercode = s/^(#usercode.*)//ms ? $1 : "# no usercode included\n";;
$patches{PATCHUSER} = $usercode . "#end usercode\n"; # grammar support code
s/^\h*#.*\n//gm; # remove comment lines
$patches{PATCHGRAMMAR} = s/^(?:\h*\n)+//r =~ s/\n(?:\h*\n)+\z//r;
while( /$identifier\s*=/gc )              # the start of a rule
  {
  my $name = $1;
  $rules{startsymbol} //= node RULE => $name;
  my $tree = expr(0);
  $rules{$name} = $rules{$name} ? addin ALT => $rules{$name}, $tree : $tree;
  /$h[.;]/gc or error 'missing rule terminator, needs . or ;';
  }
/\G\s*\z/ or error "incomplete parse at ", substr $_, pos($_) // 0;
%rules or error "no rules found ";
delete @called{keys %rules};
%called and die "\nERROR: undefined rule(s) <@{[ keys %called]}>\n";

sub expr                     # precedence climbing parser for grammer rules
  {
  my $tree =
    /$h(NAME)\b/gc    ? node $1 :                       # internal name matcher
    /$identifier/gc   ? do { $called{$1}++; node RULE => $1 } :
    /$literal/gc      ? node LIT => $2 :
    /$h<(\w+)>/gc     ? node ACTION => $1 :
    /$h\[/gc          ? node OPTION => want expr(0), ']' :
    /$h\{/gc          ? node REPEAT => want expr(0), '}' :
    /$h\(/gc          ?                want expr(0), ')' :
    error 'Invalid expression';
  $tree =
    /\G\s+/gc                            ? $tree :
    $_[0] <= 1 && /\G(?=[[<{('"a-z])/gci ? addin SEQ => $tree, expr(2) :
    $_[0] <= 0 && /\G\|/gc               ? addin ALT => $tree, expr(1) :
    return $tree while 1;
  }

my $perlcode = "# generated code (put in Rule:: package)\n";
for my $rule ( sort keys %rules )
  {
  my $body = $rules{$rule}->code;
  $perlcode .= "\nsub Rule::$rule\n\t{\n\t$body\n\t}\n";
  }
$perlcode =~ s/^(?:\h*\n)+(?=\h*\})//gm;
$perlcode .= "\n# preceding code was generated for rules\n";
$patches{PATCHGENERATED} = $perlcode;
sub ALT::code
  {
  my $all = join " or\n\t", map $_->code, @{ $_[0] };
  "( # alt\n\t$all )"
  }
sub SEQ::code
  {
  my $all = join " and\n\t", map $_->code, @{ $_[0] };
  "( # seq\n\t$all )"
  }
sub REPEAT::code { "do { # repeat\n\t1 while @{[ $_[0][0]->code ]} ; 1 }" }
sub OPTION::code { "( # option\n\t@{[ $_[0][0]->code ]} or 1 )" }
sub RULE::code { "Rule::$_[0][0]()" }
sub ACTION::code { "( $_[0][0]() || 1 )" }
sub NAME::code { "( /\\G\$whitespace(\\w+)/gc and push \@stack, \$1 )" }
sub LIT::code { "( /\\G\$whitespace(\Q$_[0][0]\E)/gc and push \@stack, \$1 )" }

$_ = <<'END'; ##################################### template for generated code
#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings; # WARNING: this code is generated

my @stack;
my $whitespace = qr/\s*/;

my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode
PATCHGRAMMAR
GRAMMAR

PATCHUSER
PATCHGENERATED
local $_ = shift // '(one + two) * three - four * five';
eval { begin() };                           # eval because it is optional
Rule::startsymbol();
eval { end() };                             # eval because it is optional
/\G\s*\z/ or die "ERROR: incomplete parse\n";
END

s/(PATCH\w+)/$patches{$1}/g; # insert grammar variable stuff
path( $filename )->spew( $_ );
chmod 0555, $filename; # readonly, executable
exec 'perl', $filename, @ARGV or die "exec failed $!";

__DATA__

expr = term { plus term <gen3addr> } .
term = factor { times factor <gen3addr> } .
factor = primary [ '^' factor <gen3addr> ] .
primary = '(' expr ')' <removeparens> | NAME .
plus = "+" | "-" .
times = "*" | "/" .

#usercode -- separator for included code for actions

my $temp = '0000';

sub begin { print "parsing: $_\n\n" }

sub gen3addr
  {
  @stack >= 3 or die "not enough on stack";
  my @three = splice @stack, -3, 3, my $t = '_' . ++$temp;
  print "$t = @three\n";
  }

sub removeparens
  {
  @stack >= 3 or die "not enough on stack";
  splice @stack, -3, 3, $stack[-2];
  }

Running the above with no arguments uses a default grammar that will solve the specified example. It produces the following perl script (and then runs it).

#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings; # WARNING: this code is generated

my @stack;
my $whitespace = qr/\s*/;

my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode
expr = term { plus term <gen3addr> } .
term = factor { times factor <gen3addr> } .
factor = primary [ '^' factor <gen3addr> ] .
primary = '(' expr ')' <removeparens> | NAME .
plus = "+" | "-" .
times = "*" | "/" .
GRAMMAR

#usercode -- separator for included code for actions

my $temp = '0000';

sub begin { print "parsing: $_\n\n" }

sub gen3addr
  {
  @stack >= 3 or die "not enough on stack";
  my @three = splice @stack, -3, 3, my $t = '_' . ++$temp;
  print "$t = @three\n";
  }

sub removeparens
  {
  @stack >= 3 or die "not enough on stack";
  splice @stack, -3, 3, $stack[-2];
  }
#end usercode

# generated code (put in Rule:: package)

sub Rule::expr
  {
  ( # seq
  Rule::term() and
  do { # repeat
  1 while ( # seq
  Rule::plus() and
  Rule::term() and
  ( gen3addr() || 1 ) ) ; 1 } )
  }

sub Rule::factor
  {
  ( # seq
  Rule::primary() and
  ( # option
  ( # seq
  ( /\G$whitespace(\^)/gc and push @stack, $1 ) and
  Rule::factor() and
  ( gen3addr() || 1 ) ) or 1 ) )
  }

sub Rule::plus
  {
  ( # alt
  ( /\G$whitespace(\+)/gc and push @stack, $1 ) or
  ( /\G$whitespace(\-)/gc and push @stack, $1 ) )
  }

sub Rule::primary
  {
  ( # alt
  ( # seq
  ( /\G$whitespace(\()/gc and push @stack, $1 ) and
  Rule::expr() and
  ( /\G$whitespace(\))/gc and push @stack, $1 ) and
  ( removeparens() || 1 ) ) or
  ( /\G$whitespace(\w+)/gc and push @stack, $1 ) )
  }

sub Rule::startsymbol
  {
  Rule::expr()
  }

sub Rule::term
  {
  ( # seq
  Rule::factor() and
  do { # repeat
  1 while ( # seq
  Rule::times() and
  Rule::factor() and
  ( gen3addr() || 1 ) ) ; 1 } )
  }

sub Rule::times
  {
  ( # alt
  ( /\G$whitespace(\*)/gc and push @stack, $1 ) or
  ( /\G$whitespace(\/)/gc and push @stack, $1 ) )
  }

# preceding code was generated for rules

local $_ = shift // '(one + two) * three - four * five';
eval { begin() };                           # eval because it is optional
Rule::startsymbol();
eval { end() };                             # eval because it is optional
/\G\s*\z/ or die "ERROR: incomplete parse\n";

The above script can also be run stand-alone and produces the following output.

Output:

parsing: (one + two) * three - four * five

_0001 = one + two
_0002 = _0001 * three
_0003 = four * five
_0004 = _0002 - _0003

Different grammars and input can be specified on the command line.

recursivedescentparsergenerator.pl arithexpr.y '2 * 3 + 4 * 5'

and giving this file as "arithexpr.y"

# test arith expr

expr = term { '+' term <fadd> | '-' term <fsub> } .
term = factor { '*' factor <fmul> | '/' factor <fdiv> } .
factor = '(' expr ')' <noparen> | NAME .

#usercode

sub noparen { splice @stack, -3, 3, $stack[-2]; }
sub fadd { splice @stack, -3, 3, $stack[-3] + $stack[-1] }
sub fsub { splice @stack, -3, 3, $stack[-3] - $stack[-1] }
sub fmul { splice @stack, -3, 3, $stack[-3] * $stack[-1] }
sub fdiv { splice @stack, -3, 3, $stack[-3] / $stack[-1] }
sub begin { print "expr   = $_\n" }
sub end { print "answer = @{[pop @stack]}\n" }

will produce the following

Output:

expr   = 2 * 3 + 4 * 5
answer = 26

Phix

Technically the task is asking for code which generates something like the following, so I suppose it would actually meet the spec if it began with constant src = """ and ended with """ puts(1,src)... (and like several/most other entries on this page, this does not use a formal grammer)

--
-- demo\rosetta\RecursiveDescentParser.exw
-- =======================================
--
with javascript_semantics
string src
integer ch, sdx
sequence tok
 
procedure skip_spaces()
    while 1 do
        if sdx>length(src) then exit end if
        ch = src[sdx]
        if not find(ch,{' ','\t','\r','\n'}) then exit end if
        sdx += 1
    end while
end procedure
 
procedure next_token()
-- yeilds one of:
--  {"SYMBOL",ch} where ch is one of "()+-/*", or
--  {"IDENT",string}, or {"EOF"}
    skip_spaces()
    integer tokstart = sdx
    if sdx>length(src) then
        tok = {"EOF"}
    elsif find(ch,"()+-/*") then
        sdx += 1
        tok = {"SYMBOL",ch&""}
    elsif (ch>='a' and ch<='z')
       or (ch>='A' and ch<='Z') then
        while true do
            sdx += 1
            if sdx>length(src) then exit end if
            ch = src[sdx]
            if ch!='_' 
            and (ch<'a' or ch>'z')
            and (ch<'A' or ch>'Z')
            and (ch<'0' or ch>'9') then
                exit
            end if
        end while
        tok = {"IDENT",src[tokstart..sdx-1]}
    else
        ?9/0
    end if
end procedure
 
forward function sum_expr()
 
function primary()
    sequence res
    if tok[1]="IDENT" then
        res = tok
        next_token()
    elsif tok={"SYMBOL","("} then
        next_token()
        res = sum_expr()
        if tok!={"SYMBOL",")"} then ?9/0 end if
        next_token()
    else
        ?9/0
    end if
    return res
end function
 
function mul_expr()
    sequence res = primary()
    while true do
        if tok[1]!="SYMBOL" or not find(tok[2],{"*","/"}) then exit end if
        res = {tok,res,NULL}
        next_token()
        res[3] = primary()
    end while
    return res
end function
 
function sum_expr()
    sequence res = mul_expr()
    while true do
        if tok[1]!="SYMBOL" or not find(tok[2],{"+","-"}) then exit end if
        res = {tok,res,NULL}
        next_token()
        res[3] = mul_expr()
    end while
    return res
end function
 
integer nxt = 1
function show_ast(sequence ast) 
    if ast[1][1]="SYMBOL" then
        string op = ast[1][2],
               lhs = show_ast(ast[2]),
               rhs = show_ast(ast[3]),
               nid = sprintf("_%04d",nxt)
        printf(1,"%s = %s %s %s\n",{nid,lhs,op,rhs})
        nxt += 1
        return nid
    elsif ast[1]="IDENT" then
        return ast[2]
    end if
    ?9/0
end function    
 
procedure parse(string source)
    src = source
    sdx = 1
    next_token()
    sequence ast = sum_expr()
    if tok!={"EOF"} then ?9/0 end if
    pp(ast,{pp_Nest,4})
    {} = show_ast(ast)
end procedure
 
parse("(one + two) * three - four * five")
{} = wait_key()

Output:

{{`SYMBOL`,
  `-`},
 {{`SYMBOL`,
   `*`},
  {{`SYMBOL`,
    `+`},
   {`IDENT`,
    `one`},
   {`IDENT`,
    `two`}},
  {`IDENT`,
   `three`}},
 {{`SYMBOL`,
   `*`},
  {`IDENT`,
   `four`},
  {`IDENT`,
   `five`}}}
_0001 = one + two
_0002 = _0001 * three
_0003 = four * five
_0004 = _0002 - _0003

Raku

(formerly Perl 6) Instead of writing a full-blown generator, it is possible to solve the task partly by making use of the built-in optimizer and study the relevant AST output

raku --target=optimize -e 'no strict; say ($one + $two) * $three - $four * $five' | tail -11
                  - QAST::Op(callstatic &say) <sunk> :statement_id<2> say ($one + $two) * $three - $four * $five
                    - QAST::Op(callstatic &infix:<->) <wanted> -
                      - QAST::Op(callstatic &infix:<*>) <wanted> *
                        - QAST::Op(callstatic &infix:<+>) <wanted> :statement_id<3> +
                          - QAST::Var(local __lowered_lex_5) <wanted> $one
                          - QAST::Var(local __lowered_lex_4) <wanted> $two
                        - QAST::Var(local __lowered_lex_3) <wanted> $three
                      - QAST::Op(callstatic &infix:<*>) <wanted> *
                        - QAST::Var(local __lowered_lex_2) <wanted> $four
                        - QAST::Var(local __lowered_lex_1) <wanted> $five
            - QAST::WVal(Nil)

As you can see by examining the nested tree, the calculations are done as follows (expressed using a postfix notation)

one two + three * four five * -

Wren

Translation of: Phix

Library: Wren-str

Library: Wren-fmt

import "./str" for Char
import "./fmt" for Fmt

class RDP { 
    construct parse(source) {
        _src = source
        _sdx = 0
        _ch  = null
        _tok = null
        _nxt = 1
        nextToken()
        var ast = sumExpr()
        if (_tok[0] != "EOF") Fiber.abort("Something went wrong.")
        printAst(ast, 0)
        System.print()
        showAst(ast)
    }

    skipSpaces() {
        while (true) {
            if (_sdx >= _src.count) return
            _ch = _src[_sdx]
            if (!" \t\r\n".contains(_ch)) return
            _sdx = _sdx + 1
        }
    }

    // yields one of:
    //   ["SYMBOL", ch] where ch is one of "()+-/*", or
    //   ["IDENT", string] or ["EOF"]
    nextToken() {
        skipSpaces()
        var tokStart = _sdx
        if (_sdx >= _src.count) {
            _tok = ["EOF"]
        } else if ("()+-/*".contains(_ch)) {
            _sdx = _sdx + 1
            _tok = ["SYMBOL", _ch]
        } else if (Char.isAsciiLetter(_ch)) {
            while (true) {
                _sdx = _sdx + 1
                if (_sdx >= _src.count) break
                _ch = _src[_sdx]
                if (!Char.isAsciiAlphaNum(_ch) && _ch != "_") break
            }
            _tok = ["IDENT", _src[tokStart..._sdx]]
        } else {
            Fiber.abort("Invalid token '%(_ch)'.")
        }
    }

    primary() {
        var res = []
        if (_tok[0] == "IDENT") {
            res = _tok.toList
            nextToken()
        } else if (_tok[0] == "SYMBOL" && _tok[1] == "(") {
            nextToken()
            res = sumExpr()
            if (_tok[0] != "SYMBOL" || _tok[1] != ")") Fiber.abort("Unexpected token '%(_tok)'.")
            nextToken()
        } else {
            Fiber.abort("Unexpected token '%(_tok)'.")
        }
        return res
    }

    mulExpr() {
        var res = primary()
        while (true) {
            if (_tok[0] != "SYMBOL" || !"*/".contains(_tok[1])) break
            res = [_tok, res, null]
            nextToken()
            res[2] = primary()
        }
        return res
    }

    sumExpr() {
        var res = mulExpr()
        while (true) {
            if (_tok[0] != "SYMBOL" || !"+-".contains(_tok[1])) break
            res = [_tok, res, null]
            nextToken()
            res[2] = mulExpr()
        }
        return res
    }

    showAst(ast) {
        if (ast[0][0] == "SYMBOL") {
            var op = ast[0][1]
            var lhs = showAst(ast[1])
            var rhs = showAst(ast[2])
            var thiz = Fmt.swrite("_$04d", _nxt)
            Fmt.print("$s = $s $s $s", thiz, lhs, op, rhs)
            _nxt = _nxt + 1
            return thiz
        } else if (ast[0] == "IDENT") {
            return ast[1]
        }
        Fiber.abort("Something went wrong.")
    }

    printAst(ast, level) {
        for (e in ast) {
            var indent = "  " * level
            if (!(e is List)) {
                System.print(indent + e)
            } else {
                System.print(indent + "{")
                printAst(e, level+1)
                System.print(indent + "}")        
            }
        }
    }
}

RDP.parse("(one + two) * three - four * five")

Output:

{
  SYMBOL
  -
}
{
  {
    SYMBOL
    *
  }
  {
    {
      SYMBOL
      +
    }
    {
      IDENT
      one
    }
    {
      IDENT
      two
    }
  }
  {
    IDENT
    three
  }
}
{
  {
    SYMBOL
    *
  }
  {
    IDENT
    four
  }
  {
    IDENT
    five
  }
}

_0001 = one + two
_0002 = _0001 * three
_0003 = four * five
_0004 = _0002 - _0003