Recursive descent parser generator: Difference between revisions
m (Undoing delete of J draft - I've a simple fix...) |
m (→{{header|Wren}}: Changed to Wren S/H) |
||
(18 intermediate revisions by 10 users not shown) | |||
Line 1: | Line 1: | ||
{{draft task}} |
{{draft task}} |
||
Write a recursive descent parser generator that takes a description of a grammar as input and outputs the source code for a parser in the same language as the generator. (So a generator written in C++ would output C++ source code for the parser.) You can assume that all of the rules have been preprocessed into a form suitable for the construction of a recursive descent parser. |
Write a recursive descent parser generator that takes a description of a grammar as input and outputs the source code for a parser in the same language as the generator. (So a generator written in C++ would output C++ source code for the parser.) You can assume that all of the rules have been preprocessed into a form suitable for the construction of a recursive descent parser. Check the following links for more details. |
||
* http://www.cs.engr.uky.edu/~lewis/essays/compilers/rec-des.html |
|||
* http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm |
|||
Use the parser generator and a grammar file to build a parser that takes an arithmetic expression and turns it in to three address code. The resulting parser should take this (or something similar) as input: |
Use the parser generator and a grammar file to build a parser that takes an arithmetic expression and turns it in to three address code. The resulting parser should take this (or something similar) as input: |
||
Line 22: | Line 24: | ||
It can't handle newlines as part of the grammar, the error checking is fairly limited and the error reporting is basically non-existent, but the parser it generates (not shown below) is human readable. |
It can't handle newlines as part of the grammar, the error checking is fairly limited and the error reporting is basically non-existent, but the parser it generates (not shown below) is human readable. |
||
< |
<syntaxhighlight lang="cpp"> |
||
#include <fstream> |
|||
#include <iostream> |
#include <iostream> |
||
#include <fstream> |
|||
#include <string> |
|||
#include <sstream> |
|||
#include <map> |
#include <map> |
||
#include <regex> |
|||
#include <set> |
#include <set> |
||
#include < |
#include <sstream> |
||
#include <string> |
|||
using namespace std; |
using namespace std; |
||
Line 38: | Line 40: | ||
int main(int argc, char **argv) { |
int main(int argc, char **argv) { |
||
if (argc < 3) { |
|||
cout << "Usage: <input file> <output file>" << endl; |
|||
return 1; |
|||
} |
|||
ifstream inFile(argv[1]); |
|||
ofstream outFile(argv[2]); |
|||
regex blankLine(R"(^\s*$)"); |
|||
regex terminalPattern(R"((\w+)\s+(.+))"); |
|||
regex rulePattern(R"(^!!\s*(\w+)\s*->\s*((?:\w+\s*)*)$)"); |
|||
regex argPattern(R"(\$(\d+))"); |
|||
smatch results; |
|||
// Read terminal patterns |
|||
string line; |
|||
while (true) { |
|||
getline(inFile, line); |
|||
// Terminals section ends with a blank line |
|||
if (regex_match(line, blankLine)) |
|||
break; |
|||
regex_match(line, results, terminalPattern); |
|||
terminals[results[1]] = results[2]; |
|||
} |
|||
outFile << "#include <iostream>" << endl |
|||
<< "#include <fstream>" << endl |
|||
<< "#include <string>" << endl |
|||
<< "#include <regex>" << endl |
|||
<< "using namespace std;" << endl |
|||
<< endl; |
|||
// Generate the token processing functions |
|||
outFile << "string input, nextToken, nextTokenValue;" << endl |
|||
<< "string prevToken, prevTokenValue;" << endl |
|||
<< endl |
|||
<< "void advanceToken() {" << endl |
|||
<< " static smatch results;" << endl |
|||
<< endl |
|||
<< " prevToken = nextToken;" << endl |
|||
<< " prevTokenValue = nextTokenValue;" << endl |
|||
<< endl; |
|||
for (auto i = terminals.begin(); i != terminals.end(); ++i) { |
|||
outFile << "void advanceToken() {" << endl; |
|||
string name = i->first + "_pattern"; |
|||
outFile << " static smatch results;" << endl << endl; |
|||
string pattern = i->second; |
|||
outFile << " static regex " << name << "(R\"(^\\s*(" << pattern << "))\");" << endl |
|||
<< " if (regex_search(input, results, " << name << ", regex_constants::match_continuous)) {" << endl |
|||
outFile << " prevTokenValue = nextTokenValue;" << endl << endl; |
|||
<< " nextToken = \"" << i->first << "\";" << endl |
|||
<< " nextTokenValue = results[1];" << endl |
|||
<< " input = regex_replace(input, " << name << ", \"\");" << endl |
|||
<< " return;" << endl |
|||
<< " }" << endl |
|||
<< endl; |
|||
} |
|||
outFile << " static regex eof(R\"(\\s*)\");" << endl |
|||
for (auto i = terminals.begin(); i != terminals.end(); ++i) { |
|||
<< " if (regex_match(input, results, eof, regex_constants::match_continuous)) {" << endl |
|||
string name = i->first + "_pattern"; |
|||
<< " nextToken = \"\";" << endl |
|||
string pattern = i->second; |
|||
<< " nextTokenValue = \"\";" << endl |
|||
<< " return;" << endl |
|||
<< " }" << endl |
|||
<< endl |
|||
<< " throw \"Unknown token\";" << endl |
|||
<< "}" << endl |
|||
<< endl |
|||
<< "bool same(string symbol) {" << endl |
|||
<< " if (symbol == nextToken) {" << endl |
|||
<< " advanceToken();" << endl |
|||
<< " return true;" << endl |
|||
<< " }" << endl |
|||
<< " return false;" << endl |
|||
<< "}" << endl |
|||
<< endl; |
|||
// Copy the header code to the output |
|||
outFile << " static regex " << name << "(R\"(^\\s*(" << pattern << "))\");" << endl; |
|||
while (true) { |
|||
outFile << " if (regex_search(input, results, " << name << ", regex_constants::match_continuous)) {" << endl; |
|||
getline(inFile, line); |
|||
outFile << " nextToken = \"" << i->first << "\";" << endl; |
|||
outFile << " nextTokenValue = results[1];" << endl; |
|||
outFile << " input = regex_replace(input, " << name << ", \"\");" << endl; |
|||
outFile << " return;" << endl; |
|||
outFile << " }" << endl << endl; |
|||
} |
|||
// Copy lines until we reach the first rule |
|||
outFile << " static regex eof(R\"(\\s*)\");" << endl; |
|||
if (regex_match(line, results, rulePattern)) |
|||
break; |
|||
outFile << " nextToken = \"\";" << endl; |
|||
outFile << " nextTokenValue = \"\";" << endl; |
|||
outFile << " return;" << endl; |
|||
outFile << " }" << endl << endl; |
|||
outFile << line << endl; |
|||
} |
|||
outFile << "}" << endl << endl; |
|||
// Build the nonterminal table |
|||
outFile << "bool same(string symbol) {" << endl; |
|||
while (true) { |
|||
outFile << " if (symbol == nextToken) {" << endl; |
|||
// results already contains the last matched rule |
|||
outFile << " advanceToken();" << endl; |
|||
string name = results[1]; |
|||
outFile << " return true;" << endl; |
|||
stringstream ss(results[2]); |
|||
outFile << " }" << endl; |
|||
outFile << " return false;" << endl; |
|||
outFile << "}" << endl << endl; |
|||
string tempString; |
|||
// Copy the header code to the output |
|||
vector<string> tempVector; |
|||
while (true) { |
|||
while (ss >> tempString) |
|||
getline(inFile, line); |
|||
tempVector.push_back(tempString); |
|||
nonterminalRules[name].push_back(tempVector); |
|||
// Copy lines until we reach the first rule |
|||
if (regex_match(line, results, rulePattern)) |
|||
break; |
|||
// Read code until another rule is found |
|||
outFile << line << endl; |
|||
string code = ""; |
|||
} |
|||
while (true) { |
|||
getline(inFile, line); |
|||
if (!inFile || regex_match(line, results, rulePattern)) |
|||
// Build the nonterminal table |
|||
break; |
|||
while (true) { |
|||
// results already contains the last matched rule |
|||
string name = results[1]; |
|||
stringstream ss(results[2]); |
|||
// Replace $1 with results[1], etc. |
|||
string tempString; |
|||
line = regex_replace(line, argPattern, "results[$1]"); |
|||
vector<string> tempVector; |
|||
while (ss >> tempString) |
|||
tempVector.push_back(tempString); |
|||
nonterminalRules[name].push_back(tempVector); |
|||
code += line + "\n"; |
|||
// Read code until another rule is found |
|||
} |
|||
string code = ""; |
|||
nonterminalCode[name].push_back(code); |
|||
while (true) { |
|||
getline(inFile, line); |
|||
// Stop when we reach the end of the file |
|||
if (!inFile || regex_match(line, results, rulePattern)) |
|||
if (!inFile) |
|||
break; |
|||
break; |
|||
} |
|||
// Generate the first sets, inefficiently |
|||
// Replace $1 with results[1], etc. |
|||
bool done = false; |
|||
line = regex_replace(line, argPattern, "results[$1]"); |
|||
while (!done) |
|||
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) { |
|||
string name = i->first; |
|||
done = true; |
|||
if (nonterminalFirst.find(i->first) == nonterminalFirst.end()) |
|||
code += line + "\n"; |
|||
nonterminalFirst[i->first] = set<string>(); |
|||
} |
|||
nonterminalCode[name].push_back(code); |
|||
for (int j = 0; j < i->second.size(); ++j) { |
|||
// Stop when we reach the end of the file |
|||
if (i->second[j].size() == 0) |
|||
if (!inFile) |
|||
nonterminalFirst[i->first].insert(""); |
|||
break; |
|||
else { |
|||
} |
|||
string first = i->second[j][0]; |
|||
if (nonterminalFirst.find(first) != nonterminalFirst.end()) { |
|||
for (auto k = nonterminalFirst[first].begin(); k != nonterminalFirst[first].end(); ++k) { |
|||
if (nonterminalFirst[name].find(*k) == nonterminalFirst[name].end()) { |
|||
nonterminalFirst[name].insert(*k); |
|||
done = false; |
|||
} |
|||
} |
|||
} else if (nonterminalFirst[name].find(first) == nonterminalFirst[name].end()) { |
|||
nonterminalFirst[name].insert(first); |
|||
done = false; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
// Generate function signatures for the nonterminals |
|||
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) { |
|||
bool done = false; |
|||
string name = i->first + "_rule"; |
|||
while (!done) |
|||
outFile << "string " << name << "();" << endl; |
|||
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) { |
|||
} |
|||
string name = i->first; |
|||
outFile << endl; |
|||
done = true; |
|||
// Generate the nonterminal functions |
|||
if (nonterminalFirst.find(i->first) == nonterminalFirst.end()) |
|||
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) { |
|||
nonterminalFirst[i->first] = set<string>(); |
|||
string name = i->first + "_rule"; |
|||
outFile << "string " << name << "() {" << endl |
|||
<< " vector<string> results;" << endl |
|||
<< " results.push_back(\"\");" << endl |
|||
<< endl; |
|||
// Check if this rule can match an empty string |
|||
for (int j = 0; j < i->second.size(); ++j) { |
|||
int epsilon = -1; |
|||
if (i->second[j].size() == 0) |
|||
for (int j = 0; epsilon == -1 && j < i->second.size(); ++j) |
|||
nonterminalFirst[i->first].insert(""); |
|||
if (i->second[j].size() == 0) |
|||
else { |
|||
epsilon = j; |
|||
string first = i->second[j][0]; |
|||
if (nonterminalFirst.find(first) != nonterminalFirst.end()) { |
|||
for (auto k = nonterminalFirst[first].begin(); k != nonterminalFirst[first].end(); ++k) { |
|||
if (nonterminalFirst[name].find(*k) == nonterminalFirst[name].end()) { |
|||
nonterminalFirst[name].insert(*k); |
|||
done = false; |
|||
} |
|||
} |
|||
} else if (nonterminalFirst[name].find(first) == nonterminalFirst[name].end()) { |
|||
nonterminalFirst[name].insert(first); |
|||
done = false; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
// Generate each production |
|||
// Generate function signatures for the nonterminals |
|||
for (int j = 0; j < i->second.size(); ++j) { |
|||
// Nothing to generate for an empty rule |
|||
string name = i->first + "_rule"; |
|||
if (j == epsilon) |
|||
outFile << "string " << name << "();" << endl; |
|||
continue; |
|||
} |
|||
outFile << endl; |
|||
string token = i->second[j][0]; |
|||
// Generate the nonterminal functions |
|||
if (terminals.find(token) != terminals.end()) |
|||
outFile << " if (nextToken == \"" << i->second[j][0] << "\") {" << endl; |
|||
string name = i->first + "_rule"; |
|||
else { |
|||
outFile << "string " << name << "() {" << endl; |
|||
outFile << " if ("; |
|||
bool first = true; |
|||
outFile << " results.push_back(\"\");" << endl << endl; |
|||
for (auto k = nonterminalFirst[token].begin(); k != nonterminalFirst[token].end(); ++k, first = false) { |
|||
if (!first) |
|||
// Check if this rule can match an empty string |
|||
outFile << " || "; |
|||
int epsilon = -1; |
|||
outFile << "nextToken == \"" << (*k) << "\""; |
|||
for (int j = 0; epsilon == -1 && j < i->second.size(); ++j) |
|||
} |
|||
if (i->second[j].size() == 0) |
|||
outFile << ") {" << endl; |
|||
epsilon = j; |
|||
} |
|||
for (int k = 0; k < i->second[j].size(); ++k) { |
|||
// Generate each production |
|||
if (terminals.find(i->second[j][k]) != terminals.end()) { |
|||
outFile << " if(same(\"" << i->second[j][k] << "\"))" << endl |
|||
// Nothing to generate for an empty rule |
|||
<< " results.push_back(prevTokenValue);" << endl |
|||
if (j == epsilon) |
|||
<< " else" << endl |
|||
continue; |
|||
<< " throw \"Syntax error - mismatched token\";" << endl; |
|||
} else |
|||
string token = i->second[j][0]; |
|||
outFile << " results.push_back(" << i->second[j][k] << "_rule());" << endl; |
|||
if (terminals.find(token) != terminals.end()) |
|||
} |
|||
outFile << " if (nextToken == \"" << i->second[j][0] << "\") {" << endl; |
|||
else { |
|||
outFile << " if ("; |
|||
bool first = true; |
|||
for (auto k = nonterminalFirst[token].begin(); k != nonterminalFirst[token].end(); ++k, first = false) { |
|||
if (!first) |
|||
outFile << " || "; |
|||
outFile << "nextToken == \"" << (*k) << "\""; |
|||
} |
|||
outFile << ") {" << endl; |
|||
} |
|||
for (int k = 0; k < i->second[j].size(); ++k) { |
|||
if (terminals.find(i->second[j][k]) != terminals.end()) { |
|||
outFile << " if(same(\"" << i->second[j][k] << "\"))" << endl; |
|||
outFile << " results.push_back(prevTokenValue);" << endl; |
|||
outFile << " else" << endl; |
|||
outFile << " throw \"Syntax error - mismatched token\";" << endl; |
|||
} else |
|||
outFile << " results.push_back(" << i->second[j][k] << "_rule());" << endl; |
|||
} |
|||
// Copy rule code to output |
|||
outFile << nonterminalCode[i->first][j]; |
|||
outFile << " }" << endl << endl; |
|||
} |
|||
if (epsilon == -1) |
|||
outFile << " throw \"Syntax error - unmatched token\";" << endl; |
|||
else |
|||
outFile << nonterminalCode[i->first][epsilon]; |
|||
outFile << "}" << endl << endl; |
|||
} |
|||
// Copy rule code to output |
|||
// Generate the main function |
|||
outFile << nonterminalCode[i->first][j]; |
|||
outFile << "int main(int argc, char **argv) {" << endl; |
|||
outFile << " if(argc < 2) {" << endl; |
|||
outFile << " cout << \"Usage: <input file>\" << endl;" << endl; |
|||
outFile << " return 1;" << endl; |
|||
outFile << " }" << endl << endl; |
|||
outFile << " }" << endl << endl; |
|||
} |
|||
outFile << " string line;" << endl; |
|||
outFile << " input = \"\";" << endl << endl; |
|||
if (epsilon == -1) |
|||
outFile << " while(true) {" << endl; |
|||
outFile << " throw \"Syntax error - unmatched token\";" << endl; |
|||
else |
|||
outFile << " if(!file) break;" << endl; |
|||
outFile << nonterminalCode[i->first][epsilon]; |
|||
outFile << " input += line + \"\\n\";" << endl; |
|||
outFile << " }" << endl << endl; |
|||
outFile << "}" << endl << endl; |
|||
} |
|||
// Generate the main function |
|||
outFile << " start_rule();" << endl; |
|||
outFile << "int main(int argc, char **argv) {" << endl |
|||
<< " if(argc < 2) {" << endl |
|||
<< " cout << \"Usage: <input file>\" << endl;" << endl |
|||
<< " return 1;" << endl |
|||
<< " }" << endl |
|||
<< endl |
|||
<< " ifstream file(argv[1]);" << endl |
|||
<< " string line;" << endl |
|||
<< " input = \"\";" << endl |
|||
<< endl |
|||
<< " while(true) {" << endl |
|||
<< " getline(file, line);" << endl |
|||
<< " if(!file) break;" << endl |
|||
<< " input += line + \"\\n\";" << endl |
|||
<< " }" << endl |
|||
<< endl |
|||
<< " advanceToken();" << endl |
|||
<< endl |
|||
<< " start_rule();" << endl |
|||
<< "}" << endl; |
|||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
Example grammar: |
Example grammar: |
||
Line 354: | Line 361: | ||
_0003 = four * five |
_0003 = four * five |
||
_0004 = _0002 + _0003 |
_0004 = _0002 + _0003 |
||
</pre> |
|||
=={{header|Go}}== |
|||
The standard library already contains a recursive descent parser for Go programs or expressions, written in Go itself, whose output is an abstract syntax tree (AST) representing such code. |
|||
I've therefore applied this parser to the expression designated by the task, which only involves binary expressions and identifiers, and written a separate routine to convert the resulting AST into three-address code. |
|||
<syntaxhighlight lang="go">package main |
|||
import ( |
|||
"fmt" |
|||
"go/ast" |
|||
"go/parser" |
|||
"log" |
|||
) |
|||
func labelStr(label int) string { |
|||
return fmt.Sprintf("_%04d", label) |
|||
} |
|||
type binexp struct { |
|||
op, left, right string |
|||
kind, index int |
|||
} |
|||
func main() { |
|||
x := "(one + two) * three - four * five" |
|||
fmt.Println("Expression to parse: ", x) |
|||
f, err := parser.ParseExpr(x) |
|||
if err != nil { |
|||
log.Fatal(err) |
|||
} |
|||
fmt.Println("\nThe abstract syntax tree for this expression:") |
|||
ast.Print(nil, f) |
|||
fmt.Println("\nThe corresponding three-address code:") |
|||
var binexps []binexp |
|||
// Inspect nodes in depth-first order. |
|||
ast.Inspect(f, func(n ast.Node) bool { |
|||
switch x := n.(type) { |
|||
case *ast.BinaryExpr: |
|||
sx, ok1 := x.X.(*ast.Ident) |
|||
sy, ok2 := x.Y.(*ast.Ident) |
|||
op := x.Op.String() |
|||
if ok1 && ok2 { |
|||
binexps = append(binexps, binexp{op, sx.Name, sy.Name, 3, 0}) |
|||
} else if !ok1 && ok2 { |
|||
binexps = append(binexps, binexp{op, "<addr>", sy.Name, 2, 0}) |
|||
} else if ok1 && !ok2 { |
|||
binexps = append(binexps, binexp{op, sx.Name, "<addr>", 1, 0}) |
|||
} else { |
|||
binexps = append(binexps, binexp{op, "<addr>", "<addr>", 0, 0}) |
|||
} |
|||
} |
|||
return true |
|||
}) |
|||
for i := 0; i < len(binexps); i++ { |
|||
binexps[i].index = i |
|||
} |
|||
label, last := 0, -1 |
|||
var ops, args []binexp |
|||
var labels []string |
|||
for i, be := range binexps { |
|||
if be.kind == 0 { |
|||
ops = append(ops, be) |
|||
} |
|||
if be.kind != 3 { |
|||
continue |
|||
} |
|||
label++ |
|||
ls := labelStr(label) |
|||
fmt.Printf(" %s = %s %s %s\n", ls, be.left, be.op, be.right) |
|||
for j := i - 1; j > last; j-- { |
|||
be2 := binexps[j] |
|||
if be2.kind == 2 { |
|||
label++ |
|||
ls2 := labelStr(label) |
|||
fmt.Printf(" %s = %s %s %s\n", ls2, ls, be2.op, be2.right) |
|||
ls = ls2 |
|||
be = be2 |
|||
} else if be2.kind == 1 { |
|||
label++ |
|||
ls2 := labelStr(label) |
|||
fmt.Printf(" %s = %s %s %s\n", ls2, be2.left, be2.op, ls) |
|||
ls = ls2 |
|||
be = be2 |
|||
} |
|||
} |
|||
args = append(args, be) |
|||
labels = append(labels, ls) |
|||
lea, leo := len(args), len(ops) |
|||
for lea >= 2 { |
|||
if i < len(binexps)-1 && args[lea-2].index <= ops[leo-1].index { |
|||
break |
|||
} |
|||
label++ |
|||
ls2 := labelStr(label) |
|||
fmt.Printf(" %s = %s %s %s\n", ls2, labels[lea-2], ops[leo-1].op, labels[lea-1]) |
|||
ops = ops[0 : leo-1] |
|||
args = args[0 : lea-1] |
|||
labels = labels[0 : lea-1] |
|||
lea-- |
|||
leo-- |
|||
args[lea-1] = be |
|||
labels[lea-1] = ls2 |
|||
} |
|||
last = i |
|||
} |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
Expression to parse: (one + two) * three - four * five |
|||
The abstract syntax tree for this expression: |
|||
0 *ast.BinaryExpr { |
|||
1 . X: *ast.BinaryExpr { |
|||
2 . . X: *ast.ParenExpr { |
|||
3 . . . Lparen: 1 |
|||
4 . . . X: *ast.BinaryExpr { |
|||
5 . . . . X: *ast.Ident { |
|||
6 . . . . . NamePos: 2 |
|||
7 . . . . . Name: "one" |
|||
8 . . . . . Obj: *ast.Object { |
|||
9 . . . . . . Kind: bad |
|||
10 . . . . . . Name: "" |
|||
11 . . . . . } |
|||
12 . . . . } |
|||
13 . . . . OpPos: 6 |
|||
14 . . . . Op: + |
|||
15 . . . . Y: *ast.Ident { |
|||
16 . . . . . NamePos: 8 |
|||
17 . . . . . Name: "two" |
|||
18 . . . . . Obj: *(obj @ 8) |
|||
19 . . . . } |
|||
20 . . . } |
|||
21 . . . Rparen: 11 |
|||
22 . . } |
|||
23 . . OpPos: 13 |
|||
24 . . Op: * |
|||
25 . . Y: *ast.Ident { |
|||
26 . . . NamePos: 15 |
|||
27 . . . Name: "three" |
|||
28 . . . Obj: *(obj @ 8) |
|||
29 . . } |
|||
30 . } |
|||
31 . OpPos: 21 |
|||
32 . Op: - |
|||
33 . Y: *ast.BinaryExpr { |
|||
34 . . X: *ast.Ident { |
|||
35 . . . NamePos: 23 |
|||
36 . . . Name: "four" |
|||
37 . . . Obj: *(obj @ 8) |
|||
38 . . } |
|||
39 . . OpPos: 28 |
|||
40 . . Op: * |
|||
41 . . Y: *ast.Ident { |
|||
42 . . . NamePos: 30 |
|||
43 . . . Name: "five" |
|||
44 . . . Obj: *(obj @ 8) |
|||
45 . . } |
|||
46 . } |
|||
47 } |
|||
The corresponding three-address code: |
|||
_0001 = one + two |
|||
_0002 = _0001 * three |
|||
_0003 = four * five |
|||
_0004 = _0002 - _0003 |
|||
</pre> |
</pre> |
||
Line 362: | Line 540: | ||
Implementation: |
Implementation: |
||
< |
<syntaxhighlight lang="j">cocurrent 'base' |
||
inlocale=: 4 :0 L:0 |
inlocale=: 4 :0 L:0 |
||
Line 376: | Line 554: | ||
names=. ~.sentence#~_1<:nc sentence |
names=. ~.sentence#~_1<:nc sentence |
||
(names inlocale scratch)=: names |
(names inlocale scratch)=: names |
||
do__scratch ;:inv opinds}((#sentence)#"0 opfuns),sentence |
r=. do__scratch ;:inv opinds}((#sentence)#"0 opfuns),sentence |
||
codestroy__scratch'' |
|||
r |
|||
) |
|||
term=: 1 :0 |
|||
2 :('m''',m,'''expr n') |
|||
) |
) |
||
expr=:1 :0 |
|||
: |
: |
||
r=. genname'' |
r=. genname'' |
||
Line 386: | Line 570: | ||
) |
) |
||
plus=: '+' |
plus=: '+' expr |
||
times=: '*' |
times=: '*' term |
||
minus=: '-' |
minus=: '-' expr |
||
N=: 10000 |
N=: 10000 |
||
Line 395: | Line 579: | ||
) |
) |
||
emit=: smoutput |
emit=: smoutput |
||
</syntaxhighlight> |
|||
Task example: |
Task example: |
||
< |
<syntaxhighlight lang="j"> parse '(one + two) * three - four * five' |
||
z0001=:four*five |
z0001=:four*five |
||
z0002=: |
z0002=:one+two |
||
z0003=: |
z0003=:z0002*three |
||
z0004=:z0003 |
z0004=:z0003-z0001 |
||
z0004</ |
z0004</syntaxhighlight> |
||
See also https://github.com/jsoftware/general_misc/blob/master/trace.ijs for a model of the underlying parser being employed here. |
|||
=={{header|Julia}}== |
|||
The Julia compiler's own parser is a recursive descent parser, and can be used directly here. |
|||
<syntaxhighlight lang="julia">const one, two, three, four, five, six, seven, eight, nine = collect(1:9) |
|||
function testparser(s) |
|||
cod = Meta.parse(s) |
|||
println(Meta.lower(Main, cod)) |
|||
end |
|||
testparser("(one + two) * three - four * five") |
|||
</syntaxhighlight>{{out}} |
|||
<pre> |
|||
$(Expr(:thunk, CodeInfo( |
|||
@ none within `top-level scope' |
|||
1 ─ %1 = one + two |
|||
│ %2 = %1 * three |
|||
│ %3 = four * five |
|||
│ %4 = %2 - %3 |
|||
└── return %4 |
|||
))) |
|||
</pre> |
|||
=={{header|Perl}}== |
|||
<syntaxhighlight lang="perl">#!/usr/bin/perl |
|||
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator |
|||
use warnings; |
|||
use Path::Tiny; |
|||
my $h = qr/\G\s*/; |
|||
my $identifier = qr/$h([a-z]\w*)\b/i; |
|||
my $literal = qr/$h(['"])(.+?)\1/s; |
|||
my (%rules, %called, $usercode, %patches); |
|||
my $filename = './generatedparser.pl'; |
|||
sub node { bless [ @_[1..$#_] ], $_[0] } |
|||
sub error { die "ERROR: ", s/\G\s*\K/<**ERROR @_**>/r, "\n" } |
|||
sub want { /$h\Q$_[1]\E/gc ? shift : error "missing '$_[1]' " } |
|||
sub addin { node $_[0] => ref $_[1] eq $_[0] ? @{$_[1]} : $_[1], pop } |
|||
local $_ = do { local $/; @ARGV ? <> : <DATA> }; # the EBNF |
|||
$usercode = s/^(#usercode.*)//ms ? $1 : "# no usercode included\n";; |
|||
$patches{PATCHUSER} = $usercode . "#end usercode\n"; # grammar support code |
|||
s/^\h*#.*\n//gm; # remove comment lines |
|||
$patches{PATCHGRAMMAR} = s/^(?:\h*\n)+//r =~ s/\n(?:\h*\n)+\z//r; |
|||
while( /$identifier\s*=/gc ) # the start of a rule |
|||
{ |
|||
my $name = $1; |
|||
$rules{startsymbol} //= node RULE => $name; |
|||
my $tree = expr(0); |
|||
$rules{$name} = $rules{$name} ? addin ALT => $rules{$name}, $tree : $tree; |
|||
/$h[.;]/gc or error 'missing rule terminator, needs . or ;'; |
|||
} |
|||
/\G\s*\z/ or error "incomplete parse at ", substr $_, pos($_) // 0; |
|||
%rules or error "no rules found "; |
|||
delete @called{keys %rules}; |
|||
%called and die "\nERROR: undefined rule(s) <@{[ keys %called]}>\n"; |
|||
sub expr # precedence climbing parser for grammer rules |
|||
{ |
|||
my $tree = |
|||
/$h(NAME)\b/gc ? node $1 : # internal name matcher |
|||
/$identifier/gc ? do { $called{$1}++; node RULE => $1 } : |
|||
/$literal/gc ? node LIT => $2 : |
|||
/$h<(\w+)>/gc ? node ACTION => $1 : |
|||
/$h\[/gc ? node OPTION => want expr(0), ']' : |
|||
/$h\{/gc ? node REPEAT => want expr(0), '}' : |
|||
/$h\(/gc ? want expr(0), ')' : |
|||
error 'Invalid expression'; |
|||
$tree = |
|||
/\G\s+/gc ? $tree : |
|||
$_[0] <= 1 && /\G(?=[[<{('"a-z])/gci ? addin SEQ => $tree, expr(2) : |
|||
$_[0] <= 0 && /\G\|/gc ? addin ALT => $tree, expr(1) : |
|||
return $tree while 1; |
|||
} |
|||
my $perlcode = "# generated code (put in Rule:: package)\n"; |
|||
for my $rule ( sort keys %rules ) |
|||
{ |
|||
my $body = $rules{$rule}->code; |
|||
$perlcode .= "\nsub Rule::$rule\n\t{\n\t$body\n\t}\n"; |
|||
} |
|||
$perlcode =~ s/^(?:\h*\n)+(?=\h*\})//gm; |
|||
$perlcode .= "\n# preceding code was generated for rules\n"; |
|||
$patches{PATCHGENERATED} = $perlcode; |
|||
sub ALT::code |
|||
{ |
|||
my $all = join " or\n\t", map $_->code, @{ $_[0] }; |
|||
"( # alt\n\t$all )" |
|||
} |
|||
sub SEQ::code |
|||
{ |
|||
my $all = join " and\n\t", map $_->code, @{ $_[0] }; |
|||
"( # seq\n\t$all )" |
|||
} |
|||
sub REPEAT::code { "do { # repeat\n\t1 while @{[ $_[0][0]->code ]} ; 1 }" } |
|||
sub OPTION::code { "( # option\n\t@{[ $_[0][0]->code ]} or 1 )" } |
|||
sub RULE::code { "Rule::$_[0][0]()" } |
|||
sub ACTION::code { "( $_[0][0]() || 1 )" } |
|||
sub NAME::code { "( /\\G\$whitespace(\\w+)/gc and push \@stack, \$1 )" } |
|||
sub LIT::code { "( /\\G\$whitespace(\Q$_[0][0]\E)/gc and push \@stack, \$1 )" } |
|||
$_ = <<'END'; ##################################### template for generated code |
|||
#!/usr/bin/perl |
|||
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator |
|||
use warnings; # WARNING: this code is generated |
|||
my @stack; |
|||
my $whitespace = qr/\s*/; |
|||
my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode |
|||
PATCHGRAMMAR |
|||
GRAMMAR |
|||
PATCHUSER |
|||
PATCHGENERATED |
|||
local $_ = shift // '(one + two) * three - four * five'; |
|||
eval { begin() }; # eval because it is optional |
|||
Rule::startsymbol(); |
|||
eval { end() }; # eval because it is optional |
|||
/\G\s*\z/ or die "ERROR: incomplete parse\n"; |
|||
END |
|||
s/(PATCH\w+)/$patches{$1}/g; # insert grammar variable stuff |
|||
path( $filename )->spew( $_ ); |
|||
chmod 0555, $filename; # readonly, executable |
|||
exec 'perl', $filename, @ARGV or die "exec failed $!"; |
|||
__DATA__ |
|||
expr = term { plus term <gen3addr> } . |
|||
term = factor { times factor <gen3addr> } . |
|||
factor = primary [ '^' factor <gen3addr> ] . |
|||
primary = '(' expr ')' <removeparens> | NAME . |
|||
plus = "+" | "-" . |
|||
times = "*" | "/" . |
|||
#usercode -- separator for included code for actions |
|||
my $temp = '0000'; |
|||
sub begin { print "parsing: $_\n\n" } |
|||
sub gen3addr |
|||
{ |
|||
@stack >= 3 or die "not enough on stack"; |
|||
my @three = splice @stack, -3, 3, my $t = '_' . ++$temp; |
|||
print "$t = @three\n"; |
|||
} |
|||
sub removeparens |
|||
{ |
|||
@stack >= 3 or die "not enough on stack"; |
|||
splice @stack, -3, 3, $stack[-2]; |
|||
}</syntaxhighlight> |
|||
Running the above with no arguments uses a default grammar that will solve the specified example. It produces |
|||
the following perl script (and then runs it). |
|||
<syntaxhighlight lang="perl">#!/usr/bin/perl |
|||
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator |
|||
use warnings; # WARNING: this code is generated |
|||
my @stack; |
|||
my $whitespace = qr/\s*/; |
|||
my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode |
|||
expr = term { plus term <gen3addr> } . |
|||
term = factor { times factor <gen3addr> } . |
|||
factor = primary [ '^' factor <gen3addr> ] . |
|||
primary = '(' expr ')' <removeparens> | NAME . |
|||
plus = "+" | "-" . |
|||
times = "*" | "/" . |
|||
GRAMMAR |
|||
#usercode -- separator for included code for actions |
|||
my $temp = '0000'; |
|||
sub begin { print "parsing: $_\n\n" } |
|||
sub gen3addr |
|||
{ |
|||
@stack >= 3 or die "not enough on stack"; |
|||
my @three = splice @stack, -3, 3, my $t = '_' . ++$temp; |
|||
print "$t = @three\n"; |
|||
} |
|||
sub removeparens |
|||
{ |
|||
@stack >= 3 or die "not enough on stack"; |
|||
splice @stack, -3, 3, $stack[-2]; |
|||
} |
|||
#end usercode |
|||
# generated code (put in Rule:: package) |
|||
sub Rule::expr |
|||
{ |
|||
( # seq |
|||
Rule::term() and |
|||
do { # repeat |
|||
1 while ( # seq |
|||
Rule::plus() and |
|||
Rule::term() and |
|||
( gen3addr() || 1 ) ) ; 1 } ) |
|||
} |
|||
sub Rule::factor |
|||
{ |
|||
( # seq |
|||
Rule::primary() and |
|||
( # option |
|||
( # seq |
|||
( /\G$whitespace(\^)/gc and push @stack, $1 ) and |
|||
Rule::factor() and |
|||
( gen3addr() || 1 ) ) or 1 ) ) |
|||
} |
|||
sub Rule::plus |
|||
{ |
|||
( # alt |
|||
( /\G$whitespace(\+)/gc and push @stack, $1 ) or |
|||
( /\G$whitespace(\-)/gc and push @stack, $1 ) ) |
|||
} |
|||
sub Rule::primary |
|||
{ |
|||
( # alt |
|||
( # seq |
|||
( /\G$whitespace(\()/gc and push @stack, $1 ) and |
|||
Rule::expr() and |
|||
( /\G$whitespace(\))/gc and push @stack, $1 ) and |
|||
( removeparens() || 1 ) ) or |
|||
( /\G$whitespace(\w+)/gc and push @stack, $1 ) ) |
|||
} |
|||
sub Rule::startsymbol |
|||
{ |
|||
Rule::expr() |
|||
} |
|||
sub Rule::term |
|||
{ |
|||
( # seq |
|||
Rule::factor() and |
|||
do { # repeat |
|||
1 while ( # seq |
|||
Rule::times() and |
|||
Rule::factor() and |
|||
( gen3addr() || 1 ) ) ; 1 } ) |
|||
} |
|||
sub Rule::times |
|||
{ |
|||
( # alt |
|||
( /\G$whitespace(\*)/gc and push @stack, $1 ) or |
|||
( /\G$whitespace(\/)/gc and push @stack, $1 ) ) |
|||
} |
|||
# preceding code was generated for rules |
|||
local $_ = shift // '(one + two) * three - four * five'; |
|||
eval { begin() }; # eval because it is optional |
|||
Rule::startsymbol(); |
|||
eval { end() }; # eval because it is optional |
|||
/\G\s*\z/ or die "ERROR: incomplete parse\n";</syntaxhighlight> |
|||
The above script can also be run stand-alone and produces the following output. |
|||
{{out}} |
|||
<pre> |
|||
parsing: (one + two) * three - four * five |
|||
_0001 = one + two |
|||
_0002 = _0001 * three |
|||
_0003 = four * five |
|||
_0004 = _0002 - _0003 |
|||
</pre> |
|||
Different grammars and input can be specified on the command line. |
|||
<pre> |
|||
recursivedescentparsergenerator.pl arithexpr.y '2 * 3 + 4 * 5' |
|||
</pre> |
|||
and giving this file as "arithexpr.y" |
|||
<syntaxhighlight lang="perl"># test arith expr |
|||
expr = term { '+' term <fadd> | '-' term <fsub> } . |
|||
term = factor { '*' factor <fmul> | '/' factor <fdiv> } . |
|||
factor = '(' expr ')' <noparen> | NAME . |
|||
#usercode |
|||
sub noparen { splice @stack, -3, 3, $stack[-2]; } |
|||
sub fadd { splice @stack, -3, 3, $stack[-3] + $stack[-1] } |
|||
sub fsub { splice @stack, -3, 3, $stack[-3] - $stack[-1] } |
|||
sub fmul { splice @stack, -3, 3, $stack[-3] * $stack[-1] } |
|||
sub fdiv { splice @stack, -3, 3, $stack[-3] / $stack[-1] } |
|||
sub begin { print "expr = $_\n" } |
|||
sub end { print "answer = @{[pop @stack]}\n" }</syntaxhighlight> |
|||
will produce the following |
|||
{{out}} |
|||
<pre> |
|||
expr = 2 * 3 + 4 * 5 |
|||
answer = 26 |
|||
</pre> |
|||
=={{header|Phix}}== |
|||
Technically the task is asking for code which generates something like the following, so I suppose |
|||
it would actually meet the spec if it began with <code>constant src = """</code> and ended with |
|||
<code>""" puts(1,src)</code>... (and like several/most other entries on this page, this does not use a formal grammer) |
|||
<!--<syntaxhighlight lang="phix">(phixonline)--> |
|||
<span style="color: #000080;font-style:italic;">-- |
|||
-- demo\rosetta\RecursiveDescentParser.exw |
|||
-- ======================================= |
|||
--</span> |
|||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">src</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">sdx</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tok</span> |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\t'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\r'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\n'</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #000080;font-style:italic;">-- yeilds one of: |
|||
-- {"SYMBOL",ch} where ch is one of "()+-/*", or |
|||
-- {"IDENT",string}, or {"EOF"}</span> |
|||
<span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">tokstart</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sdx</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"EOF"</span><span style="color: #0000FF;">}</span> |
|||
<span style="color: #008080;">elsif</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"()+-/*"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"SYMBOL"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">&</span><span style="color: #008000;">""</span><span style="color: #0000FF;">}</span> |
|||
<span style="color: #008080;">elsif</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'A'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'Z'</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">!=</span><span style="color: #008000;">'_'</span> |
|||
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'A'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'Z'</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'0'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'9'</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #008080;">exit</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #000000;">tok</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"IDENT"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tokstart</span><span style="color: #0000FF;">..</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]}</span> |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #008080;">forward</span> <span style="color: #008080;">function</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #008000;">"IDENT"</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tok</span> |
|||
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">elsif</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">={</span><span style="color: #008000;">"SYMBOL"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"("</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">!={</span><span style="color: #008000;">"SYMBOL"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">")"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #008000;">"SYMBOL"</span> <span style="color: #008080;">or</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],{</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"/"</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">}</span> |
|||
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">primary</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #008000;">"SYMBOL"</span> <span style="color: #008080;">or</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],{</span><span style="color: #008000;">"+"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">tok</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">}</span> |
|||
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mul_expr</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">nxt</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #008000;">"SYMBOL"</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">op</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span> |
|||
<span style="color: #000000;">lhs</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]),</span> |
|||
<span style="color: #000000;">rhs</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]),</span> |
|||
<span style="color: #000000;">nid</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"_%04d"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nxt</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s = %s %s %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">nid</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lhs</span><span style="color: #0000FF;">,</span><span style="color: #000000;">op</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rhs</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #000000;">nxt</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">nid</span> |
|||
<span style="color: #008080;">elsif</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #008000;">"IDENT"</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">ast</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">parse</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">source</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">src</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">source</span> |
|||
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">next_token</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">ast</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sum_expr</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">tok</span><span style="color: #0000FF;">!={</span><span style="color: #008000;">"EOF"</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #7060A8;">pp</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">,{</span><span style="color: #004600;">pp_Nest</span><span style="color: #0000FF;">,</span><span style="color: #000000;">4</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">show_ast</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ast</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #000000;">parse</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"(one + two) * three - four * five"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span> |
|||
<!--</syntaxhighlight>--> |
|||
{{out}} |
|||
<pre> |
|||
{{`SYMBOL`, |
|||
`-`}, |
|||
{{`SYMBOL`, |
|||
`*`}, |
|||
{{`SYMBOL`, |
|||
`+`}, |
|||
{`IDENT`, |
|||
`one`}, |
|||
{`IDENT`, |
|||
`two`}}, |
|||
{`IDENT`, |
|||
`three`}}, |
|||
{{`SYMBOL`, |
|||
`*`}, |
|||
{`IDENT`, |
|||
`four`}, |
|||
{`IDENT`, |
|||
`five`}}} |
|||
_0001 = one + two |
|||
_0002 = _0001 * three |
|||
_0003 = four * five |
|||
_0004 = _0002 - _0003 |
|||
</pre> |
|||
=={{header|Raku}}== |
|||
(formerly Perl 6) |
|||
Instead of writing a full-blown generator, it is possible to solve the task partly by making use of the built-in optimizer and study the relevant AST output |
|||
<pre> |
|||
raku --target=optimize -e 'no strict; say ($one + $two) * $three - $four * $five' | tail -11 |
|||
- QAST::Op(callstatic &say) <sunk> :statement_id<2> say ($one + $two) * $three - $four * $five |
|||
- QAST::Op(callstatic &infix:<->) <wanted> - |
|||
- QAST::Op(callstatic &infix:<*>) <wanted> * |
|||
- QAST::Op(callstatic &infix:<+>) <wanted> :statement_id<3> + |
|||
- QAST::Var(local __lowered_lex_5) <wanted> $one |
|||
- QAST::Var(local __lowered_lex_4) <wanted> $two |
|||
- QAST::Var(local __lowered_lex_3) <wanted> $three |
|||
- QAST::Op(callstatic &infix:<*>) <wanted> * |
|||
- QAST::Var(local __lowered_lex_2) <wanted> $four |
|||
- QAST::Var(local __lowered_lex_1) <wanted> $five |
|||
- QAST::WVal(Nil) |
|||
</pre> |
|||
As you can see by examining the nested tree, the calculations are done as follows (expressed using a postfix notation) |
|||
<pre> |
|||
one two + three * four five * - |
|||
</pre> |
|||
=={{header|Wren}}== |
|||
{{trans|Phix}} |
|||
{{libheader|Wren-str}} |
|||
{{libheader|Wren-fmt}} |
|||
<syntaxhighlight lang="wren">import "./str" for Char |
|||
import "./fmt" for Fmt |
|||
class RDP { |
|||
construct parse(source) { |
|||
_src = source |
|||
_sdx = 0 |
|||
_ch = null |
|||
_tok = null |
|||
_nxt = 1 |
|||
nextToken() |
|||
var ast = sumExpr() |
|||
if (_tok[0] != "EOF") Fiber.abort("Something went wrong.") |
|||
printAst(ast, 0) |
|||
System.print() |
|||
showAst(ast) |
|||
} |
|||
skipSpaces() { |
|||
while (true) { |
|||
if (_sdx >= _src.count) return |
|||
_ch = _src[_sdx] |
|||
if (!" \t\r\n".contains(_ch)) return |
|||
_sdx = _sdx + 1 |
|||
} |
|||
} |
|||
// yields one of: |
|||
// ["SYMBOL", ch] where ch is one of "()+-/*", or |
|||
// ["IDENT", string] or ["EOF"] |
|||
nextToken() { |
|||
skipSpaces() |
|||
var tokStart = _sdx |
|||
if (_sdx >= _src.count) { |
|||
_tok = ["EOF"] |
|||
} else if ("()+-/*".contains(_ch)) { |
|||
_sdx = _sdx + 1 |
|||
_tok = ["SYMBOL", _ch] |
|||
} else if (Char.isAsciiLetter(_ch)) { |
|||
while (true) { |
|||
_sdx = _sdx + 1 |
|||
if (_sdx >= _src.count) break |
|||
_ch = _src[_sdx] |
|||
if (!Char.isAsciiAlphaNum(_ch) && _ch != "_") break |
|||
} |
|||
_tok = ["IDENT", _src[tokStart..._sdx]] |
|||
} else { |
|||
Fiber.abort("Invalid token '%(_ch)'.") |
|||
} |
|||
} |
|||
primary() { |
|||
var res = [] |
|||
if (_tok[0] == "IDENT") { |
|||
res = _tok.toList |
|||
nextToken() |
|||
} else if (_tok[0] == "SYMBOL" && _tok[1] == "(") { |
|||
nextToken() |
|||
res = sumExpr() |
|||
if (_tok[0] != "SYMBOL" || _tok[1] != ")") Fiber.abort("Unexpected token '%(_tok)'.") |
|||
nextToken() |
|||
} else { |
|||
Fiber.abort("Unexpected token '%(_tok)'.") |
|||
} |
|||
return res |
|||
} |
|||
mulExpr() { |
|||
var res = primary() |
|||
while (true) { |
|||
if (_tok[0] != "SYMBOL" || !"*/".contains(_tok[1])) break |
|||
res = [_tok, res, null] |
|||
nextToken() |
|||
res[2] = primary() |
|||
} |
|||
return res |
|||
} |
|||
sumExpr() { |
|||
var res = mulExpr() |
|||
while (true) { |
|||
if (_tok[0] != "SYMBOL" || !"+-".contains(_tok[1])) break |
|||
res = [_tok, res, null] |
|||
nextToken() |
|||
res[2] = mulExpr() |
|||
} |
|||
return res |
|||
} |
|||
showAst(ast) { |
|||
if (ast[0][0] == "SYMBOL") { |
|||
var op = ast[0][1] |
|||
var lhs = showAst(ast[1]) |
|||
var rhs = showAst(ast[2]) |
|||
var thiz = Fmt.swrite("_$04d", _nxt) |
|||
Fmt.print("$s = $s $s $s", thiz, lhs, op, rhs) |
|||
_nxt = _nxt + 1 |
|||
return thiz |
|||
} else if (ast[0] == "IDENT") { |
|||
return ast[1] |
|||
} |
|||
Fiber.abort("Something went wrong.") |
|||
} |
|||
printAst(ast, level) { |
|||
for (e in ast) { |
|||
var indent = " " * level |
|||
if (!(e is List)) { |
|||
System.print(indent + e) |
|||
} else { |
|||
System.print(indent + "{") |
|||
printAst(e, level+1) |
|||
System.print(indent + "}") |
|||
} |
|||
} |
|||
} |
|||
} |
|||
RDP.parse("(one + two) * three - four * five")</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
{ |
|||
SYMBOL |
|||
- |
|||
} |
|||
{ |
|||
{ |
|||
SYMBOL |
|||
* |
|||
} |
|||
{ |
|||
{ |
|||
SYMBOL |
|||
+ |
|||
} |
|||
{ |
|||
IDENT |
|||
one |
|||
} |
|||
{ |
|||
IDENT |
|||
two |
|||
} |
|||
} |
|||
{ |
|||
IDENT |
|||
three |
|||
} |
|||
} |
|||
{ |
|||
{ |
|||
SYMBOL |
|||
* |
|||
} |
|||
{ |
|||
IDENT |
|||
four |
|||
} |
|||
{ |
|||
IDENT |
|||
five |
|||
} |
|||
} |
|||
_0001 = one + two |
|||
_0002 = _0001 * three |
|||
_0003 = four * five |
|||
_0004 = _0002 - _0003 |
|||
</pre> |
Latest revision as of 10:53, 2 February 2024
Write a recursive descent parser generator that takes a description of a grammar as input and outputs the source code for a parser in the same language as the generator. (So a generator written in C++ would output C++ source code for the parser.) You can assume that all of the rules have been preprocessed into a form suitable for the construction of a recursive descent parser. Check the following links for more details.
- http://www.cs.engr.uky.edu/~lewis/essays/compilers/rec-des.html
- http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
Use the parser generator and a grammar file to build a parser that takes an arithmetic expression and turns it in to three address code. The resulting parser should take this (or something similar) as input:
(one + two) * three - four * five
And generate this (or something similar) as output:
_0001 = one + two _0002 = _0001 * three _0003 = four * five _0004 = _0002 - _0003
C++
This program translates an annotated LL(1) grammar into a C++ lexer plus parser. Each rule is required to return a string of some kind and the return values of the already matched nonterminals (and matched text of terminals) can be accessed with $1, $2, etc. which are replaced by the appropriate string variable.
It can't handle newlines as part of the grammar, the error checking is fairly limited and the error reporting is basically non-existent, but the parser it generates (not shown below) is human readable.
#include <fstream>
#include <iostream>
#include <map>
#include <regex>
#include <set>
#include <sstream>
#include <string>
using namespace std;
map<string, string> terminals;
map<string, vector<vector<string>>> nonterminalRules;
map<string, set<string>> nonterminalFirst;
map<string, vector<string>> nonterminalCode;
int main(int argc, char **argv) {
if (argc < 3) {
cout << "Usage: <input file> <output file>" << endl;
return 1;
}
ifstream inFile(argv[1]);
ofstream outFile(argv[2]);
regex blankLine(R"(^\s*$)");
regex terminalPattern(R"((\w+)\s+(.+))");
regex rulePattern(R"(^!!\s*(\w+)\s*->\s*((?:\w+\s*)*)$)");
regex argPattern(R"(\$(\d+))");
smatch results;
// Read terminal patterns
string line;
while (true) {
getline(inFile, line);
// Terminals section ends with a blank line
if (regex_match(line, blankLine))
break;
regex_match(line, results, terminalPattern);
terminals[results[1]] = results[2];
}
outFile << "#include <iostream>" << endl
<< "#include <fstream>" << endl
<< "#include <string>" << endl
<< "#include <regex>" << endl
<< "using namespace std;" << endl
<< endl;
// Generate the token processing functions
outFile << "string input, nextToken, nextTokenValue;" << endl
<< "string prevToken, prevTokenValue;" << endl
<< endl
<< "void advanceToken() {" << endl
<< " static smatch results;" << endl
<< endl
<< " prevToken = nextToken;" << endl
<< " prevTokenValue = nextTokenValue;" << endl
<< endl;
for (auto i = terminals.begin(); i != terminals.end(); ++i) {
string name = i->first + "_pattern";
string pattern = i->second;
outFile << " static regex " << name << "(R\"(^\\s*(" << pattern << "))\");" << endl
<< " if (regex_search(input, results, " << name << ", regex_constants::match_continuous)) {" << endl
<< " nextToken = \"" << i->first << "\";" << endl
<< " nextTokenValue = results[1];" << endl
<< " input = regex_replace(input, " << name << ", \"\");" << endl
<< " return;" << endl
<< " }" << endl
<< endl;
}
outFile << " static regex eof(R\"(\\s*)\");" << endl
<< " if (regex_match(input, results, eof, regex_constants::match_continuous)) {" << endl
<< " nextToken = \"\";" << endl
<< " nextTokenValue = \"\";" << endl
<< " return;" << endl
<< " }" << endl
<< endl
<< " throw \"Unknown token\";" << endl
<< "}" << endl
<< endl
<< "bool same(string symbol) {" << endl
<< " if (symbol == nextToken) {" << endl
<< " advanceToken();" << endl
<< " return true;" << endl
<< " }" << endl
<< " return false;" << endl
<< "}" << endl
<< endl;
// Copy the header code to the output
while (true) {
getline(inFile, line);
// Copy lines until we reach the first rule
if (regex_match(line, results, rulePattern))
break;
outFile << line << endl;
}
// Build the nonterminal table
while (true) {
// results already contains the last matched rule
string name = results[1];
stringstream ss(results[2]);
string tempString;
vector<string> tempVector;
while (ss >> tempString)
tempVector.push_back(tempString);
nonterminalRules[name].push_back(tempVector);
// Read code until another rule is found
string code = "";
while (true) {
getline(inFile, line);
if (!inFile || regex_match(line, results, rulePattern))
break;
// Replace $1 with results[1], etc.
line = regex_replace(line, argPattern, "results[$1]");
code += line + "\n";
}
nonterminalCode[name].push_back(code);
// Stop when we reach the end of the file
if (!inFile)
break;
}
// Generate the first sets, inefficiently
bool done = false;
while (!done)
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
string name = i->first;
done = true;
if (nonterminalFirst.find(i->first) == nonterminalFirst.end())
nonterminalFirst[i->first] = set<string>();
for (int j = 0; j < i->second.size(); ++j) {
if (i->second[j].size() == 0)
nonterminalFirst[i->first].insert("");
else {
string first = i->second[j][0];
if (nonterminalFirst.find(first) != nonterminalFirst.end()) {
for (auto k = nonterminalFirst[first].begin(); k != nonterminalFirst[first].end(); ++k) {
if (nonterminalFirst[name].find(*k) == nonterminalFirst[name].end()) {
nonterminalFirst[name].insert(*k);
done = false;
}
}
} else if (nonterminalFirst[name].find(first) == nonterminalFirst[name].end()) {
nonterminalFirst[name].insert(first);
done = false;
}
}
}
}
// Generate function signatures for the nonterminals
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
string name = i->first + "_rule";
outFile << "string " << name << "();" << endl;
}
outFile << endl;
// Generate the nonterminal functions
for (auto i = nonterminalRules.begin(); i != nonterminalRules.end(); ++i) {
string name = i->first + "_rule";
outFile << "string " << name << "() {" << endl
<< " vector<string> results;" << endl
<< " results.push_back(\"\");" << endl
<< endl;
// Check if this rule can match an empty string
int epsilon = -1;
for (int j = 0; epsilon == -1 && j < i->second.size(); ++j)
if (i->second[j].size() == 0)
epsilon = j;
// Generate each production
for (int j = 0; j < i->second.size(); ++j) {
// Nothing to generate for an empty rule
if (j == epsilon)
continue;
string token = i->second[j][0];
if (terminals.find(token) != terminals.end())
outFile << " if (nextToken == \"" << i->second[j][0] << "\") {" << endl;
else {
outFile << " if (";
bool first = true;
for (auto k = nonterminalFirst[token].begin(); k != nonterminalFirst[token].end(); ++k, first = false) {
if (!first)
outFile << " || ";
outFile << "nextToken == \"" << (*k) << "\"";
}
outFile << ") {" << endl;
}
for (int k = 0; k < i->second[j].size(); ++k) {
if (terminals.find(i->second[j][k]) != terminals.end()) {
outFile << " if(same(\"" << i->second[j][k] << "\"))" << endl
<< " results.push_back(prevTokenValue);" << endl
<< " else" << endl
<< " throw \"Syntax error - mismatched token\";" << endl;
} else
outFile << " results.push_back(" << i->second[j][k] << "_rule());" << endl;
}
// Copy rule code to output
outFile << nonterminalCode[i->first][j];
outFile << " }" << endl << endl;
}
if (epsilon == -1)
outFile << " throw \"Syntax error - unmatched token\";" << endl;
else
outFile << nonterminalCode[i->first][epsilon];
outFile << "}" << endl << endl;
}
// Generate the main function
outFile << "int main(int argc, char **argv) {" << endl
<< " if(argc < 2) {" << endl
<< " cout << \"Usage: <input file>\" << endl;" << endl
<< " return 1;" << endl
<< " }" << endl
<< endl
<< " ifstream file(argv[1]);" << endl
<< " string line;" << endl
<< " input = \"\";" << endl
<< endl
<< " while(true) {" << endl
<< " getline(file, line);" << endl
<< " if(!file) break;" << endl
<< " input += line + \"\\n\";" << endl
<< " }" << endl
<< endl
<< " advanceToken();" << endl
<< endl
<< " start_rule();" << endl
<< "}" << endl;
}
Example grammar:
plus \+ minus - times \* div / open \( close \) num [0-9]+ var [a-z]+ string nextLabel() { static string label = "0000"; for(int i = label.length() - 1; i >= 0; --i) { if(label[i] == '9') label[i] = '0'; else { ++label[i]; break; } } return "_" + label; } !! start -> expr start2 if($2 == "") return $1; else { string label = nextLabel(); cout << label << " = " << $1 << " " << $2 << endl; return label; } !! start2 -> plus start return "+ " + $2; !! start2 -> minus start return "- " + $2; !! start2 -> return ""; !! expr -> term expr2 if($2 == "") return $1; else { string label = nextLabel(); cout << label << " = " << $1 << " " << $2 << endl; return label; } !! expr2 -> times expr return "* " + $2; !! expr2 -> div expr return "/ " + $2; !! expr2 -> return ""; !! term -> var return $1; !! term -> num return $1; !! term -> open start close return $2;
Example input to parser (filename passed through command line):
(one + two) * three + four * five
Output (to standard out):
_0001 = one + two _0002 = _0001 * three _0003 = four * five _0004 = _0002 + _0003
Go
The standard library already contains a recursive descent parser for Go programs or expressions, written in Go itself, whose output is an abstract syntax tree (AST) representing such code.
I've therefore applied this parser to the expression designated by the task, which only involves binary expressions and identifiers, and written a separate routine to convert the resulting AST into three-address code.
package main
import (
"fmt"
"go/ast"
"go/parser"
"log"
)
func labelStr(label int) string {
return fmt.Sprintf("_%04d", label)
}
type binexp struct {
op, left, right string
kind, index int
}
func main() {
x := "(one + two) * three - four * five"
fmt.Println("Expression to parse: ", x)
f, err := parser.ParseExpr(x)
if err != nil {
log.Fatal(err)
}
fmt.Println("\nThe abstract syntax tree for this expression:")
ast.Print(nil, f)
fmt.Println("\nThe corresponding three-address code:")
var binexps []binexp
// Inspect nodes in depth-first order.
ast.Inspect(f, func(n ast.Node) bool {
switch x := n.(type) {
case *ast.BinaryExpr:
sx, ok1 := x.X.(*ast.Ident)
sy, ok2 := x.Y.(*ast.Ident)
op := x.Op.String()
if ok1 && ok2 {
binexps = append(binexps, binexp{op, sx.Name, sy.Name, 3, 0})
} else if !ok1 && ok2 {
binexps = append(binexps, binexp{op, "<addr>", sy.Name, 2, 0})
} else if ok1 && !ok2 {
binexps = append(binexps, binexp{op, sx.Name, "<addr>", 1, 0})
} else {
binexps = append(binexps, binexp{op, "<addr>", "<addr>", 0, 0})
}
}
return true
})
for i := 0; i < len(binexps); i++ {
binexps[i].index = i
}
label, last := 0, -1
var ops, args []binexp
var labels []string
for i, be := range binexps {
if be.kind == 0 {
ops = append(ops, be)
}
if be.kind != 3 {
continue
}
label++
ls := labelStr(label)
fmt.Printf(" %s = %s %s %s\n", ls, be.left, be.op, be.right)
for j := i - 1; j > last; j-- {
be2 := binexps[j]
if be2.kind == 2 {
label++
ls2 := labelStr(label)
fmt.Printf(" %s = %s %s %s\n", ls2, ls, be2.op, be2.right)
ls = ls2
be = be2
} else if be2.kind == 1 {
label++
ls2 := labelStr(label)
fmt.Printf(" %s = %s %s %s\n", ls2, be2.left, be2.op, ls)
ls = ls2
be = be2
}
}
args = append(args, be)
labels = append(labels, ls)
lea, leo := len(args), len(ops)
for lea >= 2 {
if i < len(binexps)-1 && args[lea-2].index <= ops[leo-1].index {
break
}
label++
ls2 := labelStr(label)
fmt.Printf(" %s = %s %s %s\n", ls2, labels[lea-2], ops[leo-1].op, labels[lea-1])
ops = ops[0 : leo-1]
args = args[0 : lea-1]
labels = labels[0 : lea-1]
lea--
leo--
args[lea-1] = be
labels[lea-1] = ls2
}
last = i
}
}
- Output:
Expression to parse: (one + two) * three - four * five The abstract syntax tree for this expression: 0 *ast.BinaryExpr { 1 . X: *ast.BinaryExpr { 2 . . X: *ast.ParenExpr { 3 . . . Lparen: 1 4 . . . X: *ast.BinaryExpr { 5 . . . . X: *ast.Ident { 6 . . . . . NamePos: 2 7 . . . . . Name: "one" 8 . . . . . Obj: *ast.Object { 9 . . . . . . Kind: bad 10 . . . . . . Name: "" 11 . . . . . } 12 . . . . } 13 . . . . OpPos: 6 14 . . . . Op: + 15 . . . . Y: *ast.Ident { 16 . . . . . NamePos: 8 17 . . . . . Name: "two" 18 . . . . . Obj: *(obj @ 8) 19 . . . . } 20 . . . } 21 . . . Rparen: 11 22 . . } 23 . . OpPos: 13 24 . . Op: * 25 . . Y: *ast.Ident { 26 . . . NamePos: 15 27 . . . Name: "three" 28 . . . Obj: *(obj @ 8) 29 . . } 30 . } 31 . OpPos: 21 32 . Op: - 33 . Y: *ast.BinaryExpr { 34 . . X: *ast.Ident { 35 . . . NamePos: 23 36 . . . Name: "four" 37 . . . Obj: *(obj @ 8) 38 . . } 39 . . OpPos: 28 40 . . Op: * 41 . . Y: *ast.Ident { 42 . . . NamePos: 30 43 . . . Name: "five" 44 . . . Obj: *(obj @ 8) 45 . . } 46 . } 47 } The corresponding three-address code: _0001 = one + two _0002 = _0001 * three _0003 = four * five _0004 = _0002 - _0003
J
J's native recursive descent parser is adequate for this task, if we map names appropriately.
Implementation:
cocurrent 'base'
inlocale=: 4 :0 L:0
x,'_',y,'_'
)
parse=: 3 :0
sentence=. ;:y
opinds=. (;:'+*-')i.sentence
opfuns=. (;:'plus times minus') inlocale 'base'
scratch=. cocreate''
coinsert__scratch 'base'
names=. ~.sentence#~_1<:nc sentence
(names inlocale scratch)=: names
r=. do__scratch ;:inv opinds}((#sentence)#"0 opfuns),sentence
codestroy__scratch''
r
)
term=: 1 :0
2 :('m''',m,'''expr n')
)
expr=:1 :0
:
r=. genname''
emit r,'=:',x,m,y
r
)
plus=: '+' expr
times=: '*' term
minus=: '-' expr
N=: 10000
genname=: 3 :0
'z',}.":N=: N+1
)
emit=: smoutput
Task example:
parse '(one + two) * three - four * five'
z0001=:four*five
z0002=:one+two
z0003=:z0002*three
z0004=:z0003-z0001
z0004
See also https://github.com/jsoftware/general_misc/blob/master/trace.ijs for a model of the underlying parser being employed here.
Julia
The Julia compiler's own parser is a recursive descent parser, and can be used directly here.
const one, two, three, four, five, six, seven, eight, nine = collect(1:9)
function testparser(s)
cod = Meta.parse(s)
println(Meta.lower(Main, cod))
end
testparser("(one + two) * three - four * five")
- Output:
$(Expr(:thunk, CodeInfo( @ none within `top-level scope' 1 ─ %1 = one + two │ %2 = %1 * three │ %3 = four * five │ %4 = %2 - %3 └── return %4 )))
Perl
#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings;
use Path::Tiny;
my $h = qr/\G\s*/;
my $identifier = qr/$h([a-z]\w*)\b/i;
my $literal = qr/$h(['"])(.+?)\1/s;
my (%rules, %called, $usercode, %patches);
my $filename = './generatedparser.pl';
sub node { bless [ @_[1..$#_] ], $_[0] }
sub error { die "ERROR: ", s/\G\s*\K/<**ERROR @_**>/r, "\n" }
sub want { /$h\Q$_[1]\E/gc ? shift : error "missing '$_[1]' " }
sub addin { node $_[0] => ref $_[1] eq $_[0] ? @{$_[1]} : $_[1], pop }
local $_ = do { local $/; @ARGV ? <> : <DATA> }; # the EBNF
$usercode = s/^(#usercode.*)//ms ? $1 : "# no usercode included\n";;
$patches{PATCHUSER} = $usercode . "#end usercode\n"; # grammar support code
s/^\h*#.*\n//gm; # remove comment lines
$patches{PATCHGRAMMAR} = s/^(?:\h*\n)+//r =~ s/\n(?:\h*\n)+\z//r;
while( /$identifier\s*=/gc ) # the start of a rule
{
my $name = $1;
$rules{startsymbol} //= node RULE => $name;
my $tree = expr(0);
$rules{$name} = $rules{$name} ? addin ALT => $rules{$name}, $tree : $tree;
/$h[.;]/gc or error 'missing rule terminator, needs . or ;';
}
/\G\s*\z/ or error "incomplete parse at ", substr $_, pos($_) // 0;
%rules or error "no rules found ";
delete @called{keys %rules};
%called and die "\nERROR: undefined rule(s) <@{[ keys %called]}>\n";
sub expr # precedence climbing parser for grammer rules
{
my $tree =
/$h(NAME)\b/gc ? node $1 : # internal name matcher
/$identifier/gc ? do { $called{$1}++; node RULE => $1 } :
/$literal/gc ? node LIT => $2 :
/$h<(\w+)>/gc ? node ACTION => $1 :
/$h\[/gc ? node OPTION => want expr(0), ']' :
/$h\{/gc ? node REPEAT => want expr(0), '}' :
/$h\(/gc ? want expr(0), ')' :
error 'Invalid expression';
$tree =
/\G\s+/gc ? $tree :
$_[0] <= 1 && /\G(?=[[<{('"a-z])/gci ? addin SEQ => $tree, expr(2) :
$_[0] <= 0 && /\G\|/gc ? addin ALT => $tree, expr(1) :
return $tree while 1;
}
my $perlcode = "# generated code (put in Rule:: package)\n";
for my $rule ( sort keys %rules )
{
my $body = $rules{$rule}->code;
$perlcode .= "\nsub Rule::$rule\n\t{\n\t$body\n\t}\n";
}
$perlcode =~ s/^(?:\h*\n)+(?=\h*\})//gm;
$perlcode .= "\n# preceding code was generated for rules\n";
$patches{PATCHGENERATED} = $perlcode;
sub ALT::code
{
my $all = join " or\n\t", map $_->code, @{ $_[0] };
"( # alt\n\t$all )"
}
sub SEQ::code
{
my $all = join " and\n\t", map $_->code, @{ $_[0] };
"( # seq\n\t$all )"
}
sub REPEAT::code { "do { # repeat\n\t1 while @{[ $_[0][0]->code ]} ; 1 }" }
sub OPTION::code { "( # option\n\t@{[ $_[0][0]->code ]} or 1 )" }
sub RULE::code { "Rule::$_[0][0]()" }
sub ACTION::code { "( $_[0][0]() || 1 )" }
sub NAME::code { "( /\\G\$whitespace(\\w+)/gc and push \@stack, \$1 )" }
sub LIT::code { "( /\\G\$whitespace(\Q$_[0][0]\E)/gc and push \@stack, \$1 )" }
$_ = <<'END'; ##################################### template for generated code
#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings; # WARNING: this code is generated
my @stack;
my $whitespace = qr/\s*/;
my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode
PATCHGRAMMAR
GRAMMAR
PATCHUSER
PATCHGENERATED
local $_ = shift // '(one + two) * three - four * five';
eval { begin() }; # eval because it is optional
Rule::startsymbol();
eval { end() }; # eval because it is optional
/\G\s*\z/ or die "ERROR: incomplete parse\n";
END
s/(PATCH\w+)/$patches{$1}/g; # insert grammar variable stuff
path( $filename )->spew( $_ );
chmod 0555, $filename; # readonly, executable
exec 'perl', $filename, @ARGV or die "exec failed $!";
__DATA__
expr = term { plus term <gen3addr> } .
term = factor { times factor <gen3addr> } .
factor = primary [ '^' factor <gen3addr> ] .
primary = '(' expr ')' <removeparens> | NAME .
plus = "+" | "-" .
times = "*" | "/" .
#usercode -- separator for included code for actions
my $temp = '0000';
sub begin { print "parsing: $_\n\n" }
sub gen3addr
{
@stack >= 3 or die "not enough on stack";
my @three = splice @stack, -3, 3, my $t = '_' . ++$temp;
print "$t = @three\n";
}
sub removeparens
{
@stack >= 3 or die "not enough on stack";
splice @stack, -3, 3, $stack[-2];
}
Running the above with no arguments uses a default grammar that will solve the specified example. It produces the following perl script (and then runs it).
#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Recursive_descent_parser_generator
use warnings; # WARNING: this code is generated
my @stack;
my $whitespace = qr/\s*/;
my $grammar = <<'GRAMMAR'; # make grammar rules available to usercode
expr = term { plus term <gen3addr> } .
term = factor { times factor <gen3addr> } .
factor = primary [ '^' factor <gen3addr> ] .
primary = '(' expr ')' <removeparens> | NAME .
plus = "+" | "-" .
times = "*" | "/" .
GRAMMAR
#usercode -- separator for included code for actions
my $temp = '0000';
sub begin { print "parsing: $_\n\n" }
sub gen3addr
{
@stack >= 3 or die "not enough on stack";
my @three = splice @stack, -3, 3, my $t = '_' . ++$temp;
print "$t = @three\n";
}
sub removeparens
{
@stack >= 3 or die "not enough on stack";
splice @stack, -3, 3, $stack[-2];
}
#end usercode
# generated code (put in Rule:: package)
sub Rule::expr
{
( # seq
Rule::term() and
do { # repeat
1 while ( # seq
Rule::plus() and
Rule::term() and
( gen3addr() || 1 ) ) ; 1 } )
}
sub Rule::factor
{
( # seq
Rule::primary() and
( # option
( # seq
( /\G$whitespace(\^)/gc and push @stack, $1 ) and
Rule::factor() and
( gen3addr() || 1 ) ) or 1 ) )
}
sub Rule::plus
{
( # alt
( /\G$whitespace(\+)/gc and push @stack, $1 ) or
( /\G$whitespace(\-)/gc and push @stack, $1 ) )
}
sub Rule::primary
{
( # alt
( # seq
( /\G$whitespace(\()/gc and push @stack, $1 ) and
Rule::expr() and
( /\G$whitespace(\))/gc and push @stack, $1 ) and
( removeparens() || 1 ) ) or
( /\G$whitespace(\w+)/gc and push @stack, $1 ) )
}
sub Rule::startsymbol
{
Rule::expr()
}
sub Rule::term
{
( # seq
Rule::factor() and
do { # repeat
1 while ( # seq
Rule::times() and
Rule::factor() and
( gen3addr() || 1 ) ) ; 1 } )
}
sub Rule::times
{
( # alt
( /\G$whitespace(\*)/gc and push @stack, $1 ) or
( /\G$whitespace(\/)/gc and push @stack, $1 ) )
}
# preceding code was generated for rules
local $_ = shift // '(one + two) * three - four * five';
eval { begin() }; # eval because it is optional
Rule::startsymbol();
eval { end() }; # eval because it is optional
/\G\s*\z/ or die "ERROR: incomplete parse\n";
The above script can also be run stand-alone and produces the following output.
- Output:
parsing: (one + two) * three - four * five _0001 = one + two _0002 = _0001 * three _0003 = four * five _0004 = _0002 - _0003
Different grammars and input can be specified on the command line.
recursivedescentparsergenerator.pl arithexpr.y '2 * 3 + 4 * 5'
and giving this file as "arithexpr.y"
# test arith expr
expr = term { '+' term <fadd> | '-' term <fsub> } .
term = factor { '*' factor <fmul> | '/' factor <fdiv> } .
factor = '(' expr ')' <noparen> | NAME .
#usercode
sub noparen { splice @stack, -3, 3, $stack[-2]; }
sub fadd { splice @stack, -3, 3, $stack[-3] + $stack[-1] }
sub fsub { splice @stack, -3, 3, $stack[-3] - $stack[-1] }
sub fmul { splice @stack, -3, 3, $stack[-3] * $stack[-1] }
sub fdiv { splice @stack, -3, 3, $stack[-3] / $stack[-1] }
sub begin { print "expr = $_\n" }
sub end { print "answer = @{[pop @stack]}\n" }
will produce the following
- Output:
expr = 2 * 3 + 4 * 5 answer = 26
Phix
Technically the task is asking for code which generates something like the following, so I suppose
it would actually meet the spec if it began with constant src = """
and ended with
""" puts(1,src)
... (and like several/most other entries on this page, this does not use a formal grammer)
-- -- demo\rosetta\RecursiveDescentParser.exw -- ======================================= -- with javascript_semantics string src integer ch, sdx sequence tok procedure skip_spaces() while 1 do if sdx>length(src) then exit end if ch = src[sdx] if not find(ch,{' ','\t','\r','\n'}) then exit end if sdx += 1 end while end procedure procedure next_token() -- yeilds one of: -- {"SYMBOL",ch} where ch is one of "()+-/*", or -- {"IDENT",string}, or {"EOF"} skip_spaces() integer tokstart = sdx if sdx>length(src) then tok = {"EOF"} elsif find(ch,"()+-/*") then sdx += 1 tok = {"SYMBOL",ch&""} elsif (ch>='a' and ch<='z') or (ch>='A' and ch<='Z') then while true do sdx += 1 if sdx>length(src) then exit end if ch = src[sdx] if ch!='_' and (ch<'a' or ch>'z') and (ch<'A' or ch>'Z') and (ch<'0' or ch>'9') then exit end if end while tok = {"IDENT",src[tokstart..sdx-1]} else ?9/0 end if end procedure forward function sum_expr() function primary() sequence res if tok[1]="IDENT" then res = tok next_token() elsif tok={"SYMBOL","("} then next_token() res = sum_expr() if tok!={"SYMBOL",")"} then ?9/0 end if next_token() else ?9/0 end if return res end function function mul_expr() sequence res = primary() while true do if tok[1]!="SYMBOL" or not find(tok[2],{"*","/"}) then exit end if res = {tok,res,NULL} next_token() res[3] = primary() end while return res end function function sum_expr() sequence res = mul_expr() while true do if tok[1]!="SYMBOL" or not find(tok[2],{"+","-"}) then exit end if res = {tok,res,NULL} next_token() res[3] = mul_expr() end while return res end function integer nxt = 1 function show_ast(sequence ast) if ast[1][1]="SYMBOL" then string op = ast[1][2], lhs = show_ast(ast[2]), rhs = show_ast(ast[3]), nid = sprintf("_%04d",nxt) printf(1,"%s = %s %s %s\n",{nid,lhs,op,rhs}) nxt += 1 return nid elsif ast[1]="IDENT" then return ast[2] end if ?9/0 end function procedure parse(string source) src = source sdx = 1 next_token() sequence ast = sum_expr() if tok!={"EOF"} then ?9/0 end if pp(ast,{pp_Nest,4}) {} = show_ast(ast) end procedure parse("(one + two) * three - four * five") {} = wait_key()
- Output:
{{`SYMBOL`, `-`}, {{`SYMBOL`, `*`}, {{`SYMBOL`, `+`}, {`IDENT`, `one`}, {`IDENT`, `two`}}, {`IDENT`, `three`}}, {{`SYMBOL`, `*`}, {`IDENT`, `four`}, {`IDENT`, `five`}}} _0001 = one + two _0002 = _0001 * three _0003 = four * five _0004 = _0002 - _0003
Raku
(formerly Perl 6) Instead of writing a full-blown generator, it is possible to solve the task partly by making use of the built-in optimizer and study the relevant AST output
raku --target=optimize -e 'no strict; say ($one + $two) * $three - $four * $five' | tail -11 - QAST::Op(callstatic &say) <sunk> :statement_id<2> say ($one + $two) * $three - $four * $five - QAST::Op(callstatic &infix:<->) <wanted> - - QAST::Op(callstatic &infix:<*>) <wanted> * - QAST::Op(callstatic &infix:<+>) <wanted> :statement_id<3> + - QAST::Var(local __lowered_lex_5) <wanted> $one - QAST::Var(local __lowered_lex_4) <wanted> $two - QAST::Var(local __lowered_lex_3) <wanted> $three - QAST::Op(callstatic &infix:<*>) <wanted> * - QAST::Var(local __lowered_lex_2) <wanted> $four - QAST::Var(local __lowered_lex_1) <wanted> $five - QAST::WVal(Nil)
As you can see by examining the nested tree, the calculations are done as follows (expressed using a postfix notation)
one two + three * four five * -
Wren
import "./str" for Char
import "./fmt" for Fmt
class RDP {
construct parse(source) {
_src = source
_sdx = 0
_ch = null
_tok = null
_nxt = 1
nextToken()
var ast = sumExpr()
if (_tok[0] != "EOF") Fiber.abort("Something went wrong.")
printAst(ast, 0)
System.print()
showAst(ast)
}
skipSpaces() {
while (true) {
if (_sdx >= _src.count) return
_ch = _src[_sdx]
if (!" \t\r\n".contains(_ch)) return
_sdx = _sdx + 1
}
}
// yields one of:
// ["SYMBOL", ch] where ch is one of "()+-/*", or
// ["IDENT", string] or ["EOF"]
nextToken() {
skipSpaces()
var tokStart = _sdx
if (_sdx >= _src.count) {
_tok = ["EOF"]
} else if ("()+-/*".contains(_ch)) {
_sdx = _sdx + 1
_tok = ["SYMBOL", _ch]
} else if (Char.isAsciiLetter(_ch)) {
while (true) {
_sdx = _sdx + 1
if (_sdx >= _src.count) break
_ch = _src[_sdx]
if (!Char.isAsciiAlphaNum(_ch) && _ch != "_") break
}
_tok = ["IDENT", _src[tokStart..._sdx]]
} else {
Fiber.abort("Invalid token '%(_ch)'.")
}
}
primary() {
var res = []
if (_tok[0] == "IDENT") {
res = _tok.toList
nextToken()
} else if (_tok[0] == "SYMBOL" && _tok[1] == "(") {
nextToken()
res = sumExpr()
if (_tok[0] != "SYMBOL" || _tok[1] != ")") Fiber.abort("Unexpected token '%(_tok)'.")
nextToken()
} else {
Fiber.abort("Unexpected token '%(_tok)'.")
}
return res
}
mulExpr() {
var res = primary()
while (true) {
if (_tok[0] != "SYMBOL" || !"*/".contains(_tok[1])) break
res = [_tok, res, null]
nextToken()
res[2] = primary()
}
return res
}
sumExpr() {
var res = mulExpr()
while (true) {
if (_tok[0] != "SYMBOL" || !"+-".contains(_tok[1])) break
res = [_tok, res, null]
nextToken()
res[2] = mulExpr()
}
return res
}
showAst(ast) {
if (ast[0][0] == "SYMBOL") {
var op = ast[0][1]
var lhs = showAst(ast[1])
var rhs = showAst(ast[2])
var thiz = Fmt.swrite("_$04d", _nxt)
Fmt.print("$s = $s $s $s", thiz, lhs, op, rhs)
_nxt = _nxt + 1
return thiz
} else if (ast[0] == "IDENT") {
return ast[1]
}
Fiber.abort("Something went wrong.")
}
printAst(ast, level) {
for (e in ast) {
var indent = " " * level
if (!(e is List)) {
System.print(indent + e)
} else {
System.print(indent + "{")
printAst(e, level+1)
System.print(indent + "}")
}
}
}
}
RDP.parse("(one + two) * three - four * five")
- Output:
{ SYMBOL - } { { SYMBOL * } { { SYMBOL + } { IDENT one } { IDENT two } } { IDENT three } } { { SYMBOL * } { IDENT four } { IDENT five } } _0001 = one + two _0002 = _0001 * three _0003 = four * five _0004 = _0002 - _0003