Parse EBNF: Difference between revisions

11,352 bytes added ,  2 years ago
Added Wren
m (→‎{{header|Raku}}: Fix code and comments: Perl 6 --> Raku)
(Added Wren)
Line 2,355:
PLUS {MINUS {MINUS 1 2} {MULT -3 4}} 5
MINUS {MINUS 1 2} {MULT -3 {PLUS 4 5}}
</pre>
 
=={{header|Wren}}==
{{trans|Phix}}
{{libheader|Wren-fmt}}
{{libheader|Wren-trait}}
{{libheader|Wren-str}}
{{libheader|Wren-seq}}
Translated via the Go entry.
<lang ecmascript>import "/fmt" for Conv, Fmt
import "/trait" for Stepped
import "/str" for Char
import "/seq" for Lst
 
var src = ""
var ch = ""
var sdx = 0
var token = null
var isSeq = false
var err = false
var idents = []
var ididx = []
var productions = []
var extras = []
var results = ["pass", "fail"]
 
var invalid = Fn.new { |msg|
err = true
System.print(msg)
sdx = src.count // set to EOF
return -1
}
 
var skipSpaces = Fn.new {
while (true) {
if (sdx >= src.count) return
ch = src[sdx]
if (!" \t\r\n".contains(ch)) return
sdx = sdx + 1
}
}
 
var getToken = Fn.new {
// Yields a single character token, one of {}()[]|=.;
// or {"terminal",string} or {"ident", string} or -1.
skipSpaces.call()
if (sdx >= src.count) {
token = -1
isSeq = false
return
}
var tokstart = sdx
if ("{}()[]|=.;".contains(ch)) {
sdx = sdx + 1
token = ch
isSeq = false
} else if (ch == "\"" || ch == "'") {
var closech = ch
for (tokend in Stepped.ascend(sdx + 1...src.count)) {
if (src[tokend] == closech) {
sdx = tokend + 1
token = ["terminal", src[tokstart+1...tokend]]
isSeq = true
return
}
}
token = invalid.call("no closing quote")
isSeq = false
} else if (Char.isAsciiLower(ch)) {
// To simplify things for the purposes of this task,
// identifiers are strictly a-z only, not A-Z or 1-9.
while (true) {
sdx = sdx + 1
if (sdx >= src.count) break
ch = src[sdx]
if (!Char.isAsciiLower(ch)) break
}
token = ["ident", src[tokstart...sdx]]
isSeq = true
} else {
token = invalid.call("invalid ebnf")
isSeq = false
}
}
 
var matchToken = Fn.new { |ch|
if (token != ch) {
token = invalid.call("invalid ebnf (%(ch) expected)")
isSeq = false
} else {
getToken.call()
}
}
 
var addIdent = Fn.new { |ident|
var k = -1
var i = 0
for (id in idents) {
if (id == ident) {
k = i
break
}
i = i + 1
}
if (k == -1) {
idents.add(ident)
k = idents.count - 1
ididx.add(-1)
}
return k
}
 
var expression // forward declaration
 
var factor = Fn.new {
var res
if (isSeq) {
if (token[0] == "ident") {
var idx = addIdent.call(token[1])
token.add(idx)
}
res = token
getToken.call()
} else if (token == "[") {
getToken.call()
res = ["optional", expression.call()]
matchToken.call("]")
} else if (token == "(") {
getToken.call()
res = expression.call()
matchToken.call(")")
} else if (token == "{") {
getToken.call()
res = ["repeat", expression.call()]
matchToken.call("}")
} else {
Fiber.abort("invalid token in factor() function")
}
if (res is Sequence && res.count == 1) return res[0]
return res
}
 
var term = Fn.new {
var res = [factor.call()]
var tokens = [-1, "|", ".", ";", ")", "]", "}"]
while (true) {
var outer = false
for (t in tokens) {
if (t == token) {
outer = true
break
}
}
if (outer) break
res.add(factor.call())
}
if (res.count == 1) return res[0]
return res
}
 
// declared earlier
expression = Fn.new {
var res = [term.call()]
if (token == "|") {
res = ["or", res[0]]
while (token == "|") {
getToken.call()
res.add(term.call())
}
}
if (res.count == 1) return res[0]
return res
}
 
var production = Fn.new {
// Returns a token or -1; the real result is left in 'productions' etc,
getToken.call()
if (token != "}") {
if (token == -1) return invalid.call("invalid ebnf (missing closing })")
if (!isSeq) return -1
if (token[0] != "ident") return -1
var ident = token[1]
var idx = addIdent.call(ident)
getToken.call()
matchToken.call("=")
if (token == -1) return -1
productions.add([ident, idx, expression.call()])
ididx[idx] = productions.count - 1
}
return token
}
 
// Adjusts Wren's normal printing of lists to look more like Phix output.
var pprint = Fn.new { |ob, header|
Fmt.print("\n$s:", header)
var quote // recursive closure
quote = Fn.new { |list|
for (i in 0...list.count) {
if (list[i] is String) {
list[i] = Fmt.swrite("$q", list[i])
} else if (list[i] is List) quote.call(list[i])
}
}
var obs
if (ob is String) {
obs = Fmt.swrite("$q", ob)
} else if (ob is List) {
obs = Lst.clone(ob)
quote.call(obs)
}
var pp = obs.toString
pp = pp.replace("[", "{")
pp = pp.replace("]", "}")
for (i in 0...idents.count) {
var xs = Fmt.swrite("'\\x$02d'", i)
pp = pp.replace(xs, i.toString)
}
System.print(pp)
}
 
var parse = Fn.new { |ebnf|
// Returns +1 if ok, -1 if bad.
System.print("parse:\n%(ebnf) ===>")
err = false
src = ebnf
sdx = 0
idents.clear()
ididx.clear()
productions.clear()
extras.clear()
getToken.call()
if (isSeq) {
token[0] = "title"
extras.add(token)
getToken.call()
}
if (token != "{") return invalid.call("invalid ebnf (missing opening {)")
while (true) {
token = production.call()
if (token == "}" || token == -1) break
}
getToken.call()
if (isSeq) {
token[0] = "comment"
extras.add(token)
getToken.call()
}
if (token != -1) return invalid.call("invalid ebnf (missing eof?)")
if (err) return -1
var k = -1
var i = 0
for (idx in ididx) {
if (idx == -1) {
k = i
break
}
i = i + 1
}
if (k != -1) return invalid.call("invalid ebnf (undefined:%(idents[k]))")
pprint.call(productions, "productions")
pprint.call(idents, "idents")
pprint.call(ididx, "ididx")
pprint.call(extras, "extras")
return 1
}
 
// The rules that applies() has to deal with are:
// {factors} - if rule[0] is not string,
// just apply one after the other recursively.
// {"terminal", "a1"} -- literal constants
// {"or", <e1>, <e2>, ...} -- (any) one of n
// {"repeat", <e1>} -- as per "{}" in ebnf
// {"optional", <e1>} -- as per "[]" in ebnf
// {"ident", <name>, idx} -- apply the sub-rule
var applies // recursive function
applies = Fn.new { |rule|
var wasSdx = sdx // in case of failure
var r1 = rule[0]
if (!(r1 is String)) {
for (i in 0...rule.count) {
if (!applies.call(rule[i])) {
sdx = wasSdx
return false
}
}
} else if (r1 == "terminal") {
skipSpaces.call()
var r2 = rule[1]
for (i in 0...r2.count) {
if (sdx >= src.count || src[sdx] != r2[i]) {
sdx = wasSdx
return false
}
sdx = sdx + 1
}
} else if (r1 == "or") {
for (i in Stepped.ascend(1...rule.count)) {
if (applies.call(rule[i])) return true
}
sdx = wasSdx
return false
} else if (r1 == "repeat") {
while (applies.call(rule[1])) {}
} else if (r1 == "optional") {
applies.call(rule[1])
} else if (r1 == "ident") {
var i = rule[2]
var ii = ididx[i]
if (!(applies.call(productions[ii][2]))) {
sdx = wasSdx
return false
}
} else {
Fiber.abort("invalid rule in applies() function")
}
return true
}
 
var checkValid = Fn.new { |test|
src = test
sdx = 0
var res = applies.call(productions[0][2])
skipSpaces.call()
if (sdx < src.count) res = false
Fmt.print("$q, $s", test, results[1-Conv.btoi(res)])
}
 
var ebnfs = [
""""a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" """,
"""{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}""",
"a = \"1\"",
"""{ a = "1" ;""",
"""{ hello world = "1"; }""",
"""{ foo = bar . }"""
]
 
var tests = [
[
"a1a3a4a4a5a6",
"a1 a2a6",
"a1 a3 a4 a6",
"a1 a4 a5 a6",
"a1 a2 a4 a5 a5 a6",
"a1 a2 a4 a5 a6 a7",
"your ad here"
],
[
"2",
"2*3 + 4/23 - 7",
"(3 + 4) * 6-2+(4*(4))",
"-2",
"3 +",
"(4 + 3"
]
]
 
var i = 0
for (ebnf in ebnfs) {
if (parse.call(ebnf) == 1) {
System.print("\ntests:")
for (test in tests[i]) checkValid.call(test)
}
System.print()
i = i + 1
}</lang>
 
{{out}}
<pre>
parse:
"a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" ===>
 
productions:
{{"a", 0, {{"terminal", "a1"}, {"or", {"terminal", "a2"}, {"terminal", "a3"}}, {"repeat", {"terminal", "a4"}}, {"optional", {"terminal", "a5"}}, {"terminal", "a6"}}}}
 
idents:
{"a"}
 
ididx:
{0}
 
extras:
{{"title", "a"}, {"comment", "z"}}
 
tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail
 
parse:
{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>
 
productions:
{{"expr", 0, {{"ident", "term", 1}, {"repeat", {{"ident", "plus", 2}, {"ident", "term", 1}}}}}, {"term", 1, {{"ident", "factor", 3}, {"repeat", {{"ident", "times", 4}, {"ident", "factor", 3}}}}}, {"factor", 3, {"or", {"ident", "number", 5}, {{"terminal", "("}, {"ident", "expr", 0}, {"terminal", ")"}}}}, {"plus", 2, {"or", {"terminal", "+"}, {"terminal", "-"}}}, {"times", 4, {"or", {"terminal", "*"}, {"terminal", "/"}}}, {"number", 5, {{"ident", "digit", 6}, {"repeat", {"ident", "digit", 6}}}}, {"digit", 6, {"or", {"terminal", "0"}, {"terminal", "1"}, {"terminal", "2"}, {"terminal", "3"}, {"terminal", "4"}, {"terminal", "5"}, {"terminal", "6"}, {"terminal", "7"}, {"terminal", "8"}, {"terminal", "9"}}}}
 
idents:
{"expr", "term", "plus", "factor", "times", "number", "digit"}
 
ididx:
{0, 1, 3, 2, 4, 5, 6}
 
extras:
{}
 
tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail
 
parse:
a = "1" ===>
invalid ebnf (missing opening {)
 
parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })
 
parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)
 
parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)
</pre>
9,479

edits