Parse EBNF: Difference between revisions

64,340 bytes added ,  4 months ago
Added Algol 68
(Rename Perl 6 -> Raku, alphabetize, minor clean-up)
(Added Algol 68)
 
(10 intermediate revisions by 7 users not shown)
Line 1:
{{clarified-review}}{{draft task}}[[Category:Recursion]]
[[Category:Recursion]]
Write a program that can parse a grammar in Extended Backus–Naur Form and then parse something else according to the grammar. The program is only required to decide whether or not the something else belongs to the language described by the grammar, but for extra credit, it can output a syntax tree. See [[Parse EBNF/Tests|the tests]].
{{clarified-review}}
 
 
;Task:
Write a program that can parse a grammar in   Extended Backus–Naur Form   ('''EBNF'''),   and then parse something else according to the grammar.
 
The program is only required to decide whether or not the something else belongs to the language described by the grammar,  but for extra credit,   it can output a syntax tree.
 
See [[Parse EBNF/Tests|the tests]].
<br><br>
 
=={{header|ALGOL 68}}==
 
The source of the Algol 68 sample is somewhat largish, so is on a separate page on Rosetta Code here: [[Parse EBNF/ALGOL 68]].
{{out}}
<pre>
Valid EBNF: a/z: 'a' { a = 'a1' ( 'a2' | 'a3' ) { 'a4' } [ 'a5' ] 'a6' ; } 'z'
a: seq
literal "a1"
oneof
seq
literal "a2"
seq
literal "a3"
list
literal "a4"
opt
literal "a5"
literal "a6"
 
 
"a1a3a4a4a5a6" is valid according to a
 
"a1 a2a6" is valid according to a
 
"a1 a3 a4 a6" is valid according to a
 
**** Syntax error near "a1 a4 a5 a6"
"a1 a4 a5 a6" is not valid according to a
 
**** Syntax error near "a5 a5 a6"
"a1 a2 a4 a5 a5 a6" is not valid according to a
 
**** Unexpected text: "a7" at the end of source
"a1 a2 a4 a5 a6 a7" is not valid according to a
 
**** Syntax error near "your ad here"
"your ad here" is not valid according to a
 
 
Valid EBNF: Arithmetic expressions:
'Arithmetic expressions'
{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
plus = '+' | '-' .
times = '*' | '/' .
number = digit { digit } .
digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
}
expr: seq
rule term
list
rule plus
rule term
term: seq
rule factor
list
rule times
rule factor
factor: oneof
seq
rule number
seq
literal "("
rule expr
literal ")"
plus: oneof
seq
literal "+"
seq
literal "-"
times: oneof
seq
literal "*"
seq
literal "/"
number: seq
rule digit
list
rule digit
digit: oneof
seq
literal "0"
seq
literal "1"
seq
literal "2"
seq
literal "3"
seq
literal "4"
seq
literal "5"
seq
literal "6"
seq
literal "7"
seq
literal "8"
seq
literal "9"
 
 
"2" is valid according to Arithmetic expressions
 
"2*3 + 4/23 - 7" is valid according to Arithmetic expressions
 
"(3 + 4) * 6-2+(4*(4))" is valid according to Arithmetic expressions
 
**** Syntax error near "-2"
"-2" is not valid according to Arithmetic expressions
 
**** Unexpected text: "+" at the end of source
"3 +" is not valid according to Arithmetic expressions
 
**** Syntax error near " 3"
"(4 + 3" is not valid according to Arithmetic expressions
 
 
**** Expected "{", not "a"
Invalid EBNF: a = '1';
 
**** Expected "}", not "(eof)"
Invalid EBNF: { a = '1' ;
 
**** Expected "=", not "world"
Invalid EBNF: { hello world = '1'; }
 
**** Rule "bar" not defined
Invalid EBNF: { foo = bar . }
</pre>
 
=={{header|Go}}==
Line 6 ⟶ 149:
<br>
A more or less faithful translation except that indices are 0-based rather than 1-based and so 1 less than in the Phix results.
<langsyntaxhighlight lang="go">package main
 
import (
Line 410 ⟶ 553:
fmt.Println()
}
}</langsyntaxhighlight>
 
{{out}}
Line 493 ⟶ 636:
We use Parsec to generate Parsec.
 
<langsyntaxhighlight lang="haskell">import Control.Applicative
import Control.Monad
import Data.Maybe
Line 665 ⟶ 808:
lc c = char c <* ws
 
ws = many $ oneOf " \n\t"</langsyntaxhighlight>
 
=={{header|Julia}}==
 
Creates a Grammar struct from an EBNF grammar, which has a matching Regex for each production of the EBNF grammar.
 
Tested with Julia v1.7.2
 
<syntaxhighlight lang="julia">
struct Grammar
regex::Regex
rules::Dict{Symbol,Regex}
root::Regex
function Grammar(regex::Regex)
names = keys(match(regex, ""))
new(regex,
Dict{Symbol,Regex}(
Symbol(name) => Regex(
"$(regex.pattern)(?&$(name))",
regex.compile_options,
regex.match_options
)
for name in names if name isa String),
Regex(
"$(regex.pattern)^\\s*(?&$(first(names)))\\s*\$",
regex.compile_options,
regex.match_options))
end
end
 
Base.getindex(grammar::Grammar, key) = grammar.rules[key]
Base.occursin(grammar::Grammar, str::AbstractString) = occursin(grammar.root, str)
Base.show(io::IO, grammar::Grammar) = print(io, "Grammar(", grammar.regex, ")")
 
const ebnfgrammar = Grammar(r"""
(?(DEFINE) # EBNF syntax
(?<syntax>
(?&title)? \s*+ { \s*+ ( (?&production) \s*+ )* } \s*+ (?&comment)? )
(?<production>
(?&identifier) \s*+ = \s*+ (?&expression) \s*+ [.;] )
(?<expression>
(?&term) ( \s*+ \| \s*+ (?&term) )* )
(?<term>
(?&factor) ( \s*+ (?&factor) )* )
(?<factor>
(?&identifier)
| (?&literal)
| \[ \s*+ (?&expression) \s*+ \]
| \( \s*+ (?&expression) \s*+ \)
| { \s*+ (?&expression) \s*+ } )
(?<identifier>
[^'"\s=|(){}[\].;]+ )
(?<title>
(?&literal) )
(?<comment>
(?&literal) )
(?<literal>
' [^']+ '
| " [^"]+ " )
)"""x)
 
function syntax(grammar::Grammar, str::AbstractString)
if occursin(grammar[:syntax], str)
"""(?(DEFINE)$(join(production(grammar, eachmatch(grammar[:production], str)))))"""
else
throw(ErrorException("Invalid EBNF syntax."))
end
end
 
production(grammar::Grammar, iter) = (production(grammar, m.match) for m in iter)
function production(grammar::Grammar, str::AbstractString)
rstrip(c -> isspace(c) || c == '.' || c == ';', str)
id, expr = split(str, r"\s*=\s*", limit=2)
"(?<$(id)>" * join(
expression(grammar, eachmatch(grammar[:expression], expr)),
"\\s*+|\\s*+") * "\\s*+)"
end
 
expression(grammar::Grammar, iter) = (expression(grammar, m.match) for m in iter)
function expression(grammar::Grammar, str::AbstractString)
join(term(grammar, eachmatch(grammar[:term], str)), "\\s*+|\\s*+")
end
 
term(grammar::Grammar, iter) = (term(grammar, m.match) for m in iter)
function term(grammar::Grammar, str::AbstractString)
join(factor(grammar, eachmatch(grammar[:factor], str)), "\\s*+")
end
 
factor(grammar::Grammar, iter) = (factor(grammar, m.match) for m in iter)
function factor(grammar::Grammar, str::AbstractString)
str = strip(str)
if startswith(str, r"['\"]")
literal(grammar, str)
elseif startswith(str, r"[[({]")
"(\\s*+" * expression(grammar, str[begin+1:end-1]) * (
if startswith(str, '[')
"\\s*+)?"
elseif startswith(str, '{')
"\\s*+)*"
else
"\\s*+)"
end
)
else
"(?&$(str))"
end
end
 
literal(::Grammar, str::AbstractString) = "\\Q$(str[begin+1:end-1])\\E"
 
grammar(ebnf::AbstractString) = Grammar(Regex(syntax(ebnfgrammar, ebnf)))
 
using Test
 
oneliner = grammar("""
"a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"
""")
 
@testset "One liner" begin
@test occursin(oneliner, "a1a3a4a4a5a6")
@test occursin(oneliner, "a1 a2a6")
@test occursin(oneliner, "a1 a3 a4 a6")
@test !occursin(oneliner, "a1 a4 a5 a6")
@test !occursin(oneliner, "a1 a2 a4 a5 a5 a6")
@test !occursin(oneliner, "a1 a2 a4 a5 a6 a7")
@test !occursin(oneliner, "your ad here")
end
 
arithmetic = grammar("""
{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}
""")
 
@testset "Arithmetic expressions" begin
@test occursin(arithmetic, "2")
@test occursin(arithmetic, "2*3 + 4/23 - 7")
@test occursin(arithmetic, "(3 + 4) * 6-2+(4*(4))")
@test !occursin(arithmetic, "-2")
@test !occursin(arithmetic, "3 +")
@test !occursin(arithmetic, "(4 + 3")
end
 
@testset "Invalid EBNF" begin
@test_throws ErrorException grammar("""a = "1";""")
@test_throws ErrorException grammar("""{ a = "1";""")
@test_throws ErrorException grammar("""{ hello world = "1"; }""")
@test_throws ErrorException grammar("{ foo = bar . }")
end
 
</syntaxhighlight>
 
 
=={{header|Modula-2}}==
<langsyntaxhighlight lang="modula2">MODULE EBNF;
 
FROM ASCII IMPORT EOL;
Line 756 ⟶ 1,060:
Tabulate (T0);
Tabulate (T1);
END EBNF.</langsyntaxhighlight>
And the source for the EBNF scanner. I hope you like nested procedures.
<langsyntaxhighlight lang="modula2">IMPLEMENTATION MODULE EBNFScanner;
 
FROM ASCII IMPORT LF;
Line 925 ⟶ 1,229:
Ino := 0;
ch := ' '
END EBNFScanner.</langsyntaxhighlight>
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">#!/usr/bin/perl
 
use strict; # http://www.rosettacode.org/wiki/Parse_EBNF
Line 1,102 ⟶ 1,406:
----------------------------------------------------------------------
{ foo = bar . } "undefined production check"
----------------------------------------------------------------------</langsyntaxhighlight>
{{out}}
<pre>
Line 1,216 ⟶ 1,520:
 
=={{header|Phix}}==
<!--<syntaxhighlight lang="phix">(phixonline)-->
<lang Phix>string src
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
integer ch, sdx
<span style="color: #004080;">string</span> <span style="color: #000000;">src</span>
object token
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">sdx</span>
 
<span style="color: #004080;">object</span> <span style="color: #000000;">token</span>
bool error = false
function invalid(string msg)
<span style="color: #004080;">bool</span> <span style="color: #000000;">error</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
error = true
<span style="color: #008080;">function</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">msg</span><span style="color: #0000FF;">)</span>
?msg
<span style="color: #000000;">error</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
sdx = length(src)+1 -- (set to eof)
<span style="color: #0000FF;">?</span><span style="color: #000000;">msg</span>
return -1
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">-- (set to eof)</span>
end function
<span style="color: #008080;">return</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
procedure skip_spaces()
while 1 do
<span style="color: #008080;">procedure</span> <span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
if sdx>length(src) then exit end if
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
ch = src[sdx]
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if not find(ch," \t\r\n") then exit end if
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span>
sdx += 1
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\t'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\r'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\n'</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end while
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end procedure
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
procedure get_token()
-- yeilds a single character token, one of {}()[]|=.;
<span style="color: #008080;">procedure</span> <span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
-- or {"terminal",string} or {"ident", string} or -1.
<span style="color: #000080;font-style:italic;">-- yeilds a single character token, one of {}()[]|=.;
skip_spaces()
-- or {"terminal",string} or {"ident", string} or -1.</span>
if sdx>length(src) then
<span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
token = -1
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
return
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
end if
<span style="color: #008080;">return</span>
integer tokstart = sdx
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if find(ch,"{}()[]|=.;") then
<span style="color: #004080;">integer</span> <span style="color: #000000;">tokstart</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sdx</span>
sdx += 1
<span style="color: #008080;">if</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"{}()[]|=.;"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
token = ch
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
elsif ch='\"'
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ch</span>
or ch='\'' then
<span style="color: #008080;">elsif</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'\"'</span>
integer closech = ch
<span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">=</span><span style="color: #008000;"><nowiki>'\''</nowiki></span> <span style="color: #008080;">then</span>
for tokend=sdx+1 to length(src) do
<span style="color: #004080;">integer</span> <span style="color: #000000;">closech</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ch</span>
if src[tokend] = closech then
<span style="color: #008080;">for</span> <span style="color: #000000;">tokend</span><span style="color: #0000FF;">=</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
sdx = tokend+1
<span style="color: #008080;">if</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tokend</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">closech</span> <span style="color: #008080;">then</span>
token = {"terminal",src[tokstart+1..tokend-1]}
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tokend</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
return
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"terminal"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tokstart</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">tokend</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]}</span>
end if
<span style="color: #008080;">return</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
token = invalid("no closing quote")
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
elsif ch>='a' and ch<='z' then
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"no closing quote"</span><span style="color: #0000FF;">)</span>
-- to simplify things for the purposes of this task,
<span style="color: #008080;">elsif</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span> <span style="color: #008080;">then</span>
-- identifiers are strictly a-z only, not A-Z or 1-9
<span style="color: #000080;font-style:italic;">-- to simplify things for the purposes of this task,
while 1 do
-- identifiers are sdxstrictly +=a-z only, not A-Z or 1-9</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
if sdx>length(src) then exit end if
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
ch = src[sdx]
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if ch<'a' or ch>'z' then exit end if
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]</span>
end while
<span style="color: #008080;">if</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #008000;">'z'</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
token = {"ident",src[tokstart..sdx-1]}
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
else
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"ident"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tokstart</span><span style="color: #0000FF;">..</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]}</span>
token = invalid("invalid ebnf")
<span style="color: #008080;">else</span>
end if
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"invalid ebnf"</span><span style="color: #0000FF;">)</span>
end procedure
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
procedure match_token(integer ch)
if token!=ch then
<span style="color: #008080;">procedure</span> <span style="color: #000000;">match_token</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">)</span>
token = invalid(sprintf("invalid ebnf (%c expected)",ch))
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">ch</span> <span style="color: #008080;">then</span>
else
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"invalid ebnf (%c expected)"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">))</span>
get_token()
<span style="color: #008080;">else</span>
end if
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
end procedure
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
sequence idents = {},
ididx = {}
<span style="color: #004080;">sequence</span> <span style="color: #000000;">idents</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
 
<span style="color: #000000;">ididx</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
function add_ident(string ident)
integer k = find(ident,idents)
<span style="color: #008080;">function</span> <span style="color: #000000;">add_ident</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">ident</span><span style="color: #0000FF;">)</span>
if k=0 then
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ident</span><span style="color: #0000FF;">,</span><span style="color: #000000;">idents</span><span style="color: #0000FF;">)</span>
idents = append(idents,ident)
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
k = length(idents)
<span style="color: #000000;">idents</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">idents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ident</span><span style="color: #0000FF;">)</span>
ididx = append(ididx,0)
<span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">idents</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #000000;">ididx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ididx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">)</span>
return k
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">k</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
forward function expression()
 
<span style="color: #008080;">forward</span> <span style="color: #008080;">function</span> <span style="color: #000000;">expression</span><span style="color: #0000FF;">()</span>
function factor()
object res
<span style="color: #008080;">function</span> <span style="color: #000000;">factor</span><span style="color: #0000FF;">()</span>
if sequence(token) then
<span style="color: #004080;">object</span> <span style="color: #000000;">res</span>
if token[1]="ident" then
<span style="color: #008080;">if</span> <span style="color: #004080;">sequence</span><span style="color: #0000FF;">(</span><span style="color: #000000;">token</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
token &= add_ident(token[2])
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=</span><span style="color: #008000;">"ident"</span> <span style="color: #008080;">then</span>
end if
<span style="color: #000000;">token</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">add_ident</span><span style="color: #0000FF;">(</span><span style="color: #000000;">token</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">])</span>
res = token
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
get_token()
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">token</span>
elsif token='[' then
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'['</span> <span style="color: #008080;">then</span>
res = {"optional",expression()}
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
match_token(']')
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"optional"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">expression</span><span style="color: #0000FF;">()}</span>
elsif token='(' then
<span style="color: #000000;">match_token</span><span style="color: #0000FF;">(</span><span style="color: #008000;">']'</span><span style="color: #0000FF;">)</span>
get_token()
<span style="color: #008080;">elsif</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'('</span> <span style="color: #008080;">then</span>
res = expression()
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
match_token(')')
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">expression</span><span style="color: #0000FF;">()</span>
elsif token='{' then
<span style="color: #000000;">match_token</span><span style="color: #0000FF;">(</span><span style="color: #008000;">')'</span><span style="color: #0000FF;">)</span>
get_token()
<span style="color: #008080;">elsif</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'{'</span> <span style="color: #008080;">then</span>
res = {"repeat",expression()}
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
match_token('}')
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"repeat"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">expression</span><span style="color: #0000FF;">()}</span>
else
<span style="color: #000000;">match_token</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'}'</span><span style="color: #0000FF;">)</span>
?9/0 -- erm??
<span style="color: #008080;">else</span>
-- res = -1 -- ""
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #000080;font-style:italic;">-- erm??
end if
-- res = -1 -- ""</span>
return res
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
function term()
-- sequence res = {"factors",factor()} -- (opted against this)
<span style="color: #008080;">function</span> <span style="color: #000000;">term</span><span style="color: #0000FF;">()</span>
sequence res = {factor()}
<span style="color: #000080;font-style:italic;">-- sequence res = {"factors",factor()} -- (opted against this)</span>
while not find(token,{-1,'|','.',';',')',']','}'}) do
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">factor</span><span style="color: #0000FF;">()}</span>
res = append(res,factor())
<span style="color: #008080;">while</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">token</span><span style="color: #0000FF;">,{-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'|'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'.'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">';'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">')'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">']'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'}'</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">do</span>
end while
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">factor</span><span style="color: #0000FF;">())</span>
if length(res)=1 then res = res[1] end if
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
return res
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
function expression()
object res = term()
<span style="color: #008080;">function</span> <span style="color: #000000;">expression</span><span style="color: #0000FF;">()</span>
if token='|' then
<span style="color: #004080;">object</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">term</span><span style="color: #0000FF;">()</span>
res = {"or",res}
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'|'</span> <span style="color: #008080;">then</span>
while token='|' do
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"or"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">}</span>
get_token()
<span style="color: #008080;">while</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'|'</span> <span style="color: #008080;">do</span>
res = append(res,term())
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
end while
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">term</span><span style="color: #0000FF;">())</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
return res
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
sequence productions = {}
 
<span style="color: #004080;">sequence</span> <span style="color: #000000;">productions</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
function production()
-- returns a token or -1; the real result is left in productions[] etc.
<span style="color: #008080;">function</span> <span style="color: #000000;">production</span><span style="color: #0000FF;">()</span>
get_token()
<span style="color: #000080;font-style:italic;">-- returns a token or -1; the real result is left in productions[] etc.</span>
if token!='}' then
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
if token=-1 then
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">!=</span><span style="color: #008000;">'}'</span> <span style="color: #008080;">then</span>
return invalid("invalid ebnf (missing closing })")
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">return</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"invalid ebnf (missing closing })"</span><span style="color: #0000FF;">)</span>
if not sequence(token)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
or token[1]!="ident" then
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #004080;">sequence</span><span style="color: #0000FF;">(</span><span style="color: #000000;">token</span><span style="color: #0000FF;">)</span>
return -1
<span style="color: #008080;">or</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #008000;">"ident"</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">return</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
string ident = token[2]
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
integer idx = add_ident(ident)
<span style="color: #004080;">string</span> <span style="color: #000000;">ident</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]</span>
get_token()
<span style="color: #004080;">integer</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">add_ident</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ident</span><span style="color: #0000FF;">)</span>
match_token('=')
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
if token=-1 then
<span style="color: #000000;">match_token</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'='</span><span style="color: #0000FF;">)</span>
return -1
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">return</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
sequence res = expression()
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
productions = append(productions,{ident,idx,res})
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">expression</span><span style="color: #0000FF;">()</span>
ididx[idx] = length(productions)
<span style="color: #000000;">productions</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">productions</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ident</span><span style="color: #0000FF;">,</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">})</span>
end if
<span style="color: #000000;">ididx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">productions</span><span style="color: #0000FF;">)</span>
return token
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">token</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
sequence extras = {}
 
<span style="color: #004080;">sequence</span> <span style="color: #000000;">extras</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
function parse(string ebnf)
-- returns: +1 if ok, -1 if bad
<span style="color: #008080;">function</span> <span style="color: #000000;">parse</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">ebnf</span><span style="color: #0000FF;">)</span>
puts(1,"parse: "&ebnf&" ===>\n")
<span style="color: #000080;font-style:italic;">-- returns: +1 if ok, -1 if bad</span>
error = false
<span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"parse: "</span><span style="color: #0000FF;">&</span><span style="color: #000000;">ebnf</span><span style="color: #0000FF;">&</span><span style="color: #008000;">" ===&gt;\n"</span><span style="color: #0000FF;">)</span>
src = ebnf
<span style="color: #000000;">error</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
sdx = 1
<span style="color: #000000;">src</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ebnf</span>
idents = {}
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
ididx = {}
<span style="color: #000000;">idents</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
productions = {}
<span style="color: #000000;">ididx</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
extras = {}
<span style="color: #000000;">productions</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
get_token()
<span style="color: #000000;">extras</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
if sequence(token) then
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
token[1] = "title"
<span style="color: #008080;">if</span> <span style="color: #004080;">sequence</span><span style="color: #0000FF;">(</span><span style="color: #000000;">token</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
extras = append(extras,token)
<span style="color: #000000;">token</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"title"</span>
get_token()
<span style="color: #000000;">extras</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">extras</span><span style="color: #0000FF;">,</span><span style="color: #000000;">token</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
if token!='{' then
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return invalid("invalid ebnf (missing opening {)")
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">!=</span><span style="color: #008000;">'{'</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">return</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"invalid ebnf (missing opening {)"</span><span style="color: #0000FF;">)</span>
while 1 do
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
token = production()
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
if token='}' or token=-1 then exit end if
<span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">production</span><span style="color: #0000FF;">()</span>
end while
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'}'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">=-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
get_token()
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if sequence(token) then
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
token[1] = "comment"
<span style="color: #008080;">if</span> <span style="color: #004080;">sequence</span><span style="color: #0000FF;">(</span><span style="color: #000000;">token</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
extras = append(extras,token)
<span style="color: #000000;">token</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"comment"</span>
get_token()
<span style="color: #000000;">extras</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">extras</span><span style="color: #0000FF;">,</span><span style="color: #000000;">token</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #000000;">get_token</span><span style="color: #0000FF;">()</span>
if token!=-1 then
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return invalid("invalid ebnf (missing eof?)")
<span style="color: #008080;">if</span> <span style="color: #000000;">token</span><span style="color: #0000FF;">!=-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">return</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"invalid ebnf (missing eof?)"</span><span style="color: #0000FF;">)</span>
if error then return -1 end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
integer k = find(0,ididx)
<span style="color: #008080;">if</span> <span style="color: #000000;">error</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if k!=0 then
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ididx</span><span style="color: #0000FF;">)</span>
return invalid("invalid ebnf (undefined:"&idents[k]&")")
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">return</span> <span style="color: #000000;">invalid</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"invalid ebnf (undefined:"</span><span style="color: #0000FF;">&</span><span style="color: #000000;">idents</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]&</span><span style="color: #008000;">")"</span><span style="color: #0000FF;">)</span>
ppOpt({pp_Pause,0})
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
pp(productions)
<span style="color: #7060A8;">ppOpt</span><span style="color: #0000FF;">({</span><span style="color: #004600;">pp_Pause</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">})</span>
-- pp(idents)
<span style="color: #7060A8;">pp</span><span style="color: #0000FF;">(</span><span style="color: #000000;">productions</span><span style="color: #0000FF;">)</span>
-- pp(ididx)
<span style="color: #000080;font-style:italic;">-- pp(idents)
-- pp(extras)
-- pp(ididx)
return 1
-- pp(extras)</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">1</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
-- The rules that applies() has to deal with are:
-- {factors} - if rule[1] is not string, just apply one after the other,
<span style="color: #000080;font-style:italic;">-- The rules that applies() has to deal with are:
-- recursively. As per term() above, originally had "factors",
-- {factors} - if rule[1] is not string, just apply one after the other,
-- but getting rid of it made the syntax tree much clearer)
-- recursively. As per term() above, originally had "factors",
-- {"terminal", "a1"} -- literal constants
-- but getting rid of it made the syntax tree much clearer)
-- {"or", <e1>, <e2>, ...} -- (any) one of n
-- {"repeatterminal", <e1>"a1"} -- as per "{}" inliteral ebnfconstants
-- {"optionalor", <&lt;e1>} &gt;, &lt;e2&gt;, ...} -- as(any) per "[]"one inof ebnfn
-- {"identrepeat", <name>, idx&lt;e1&gt;} -- applyas theper sub-rule"{}" in ebnf
-- {"optional", &lt;e1&gt;} -- as per "[]" in ebnf
 
-- {"ident", &lt;name&gt;, idx} -- apply the sub-rule</span>
function applies(sequence rule)
integer was_sdx = sdx -- in case of failure
<span style="color: #008080;">function</span> <span style="color: #000000;">applies</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">rule</span><span style="color: #0000FF;">)</span>
object r1 = rule[1]
<span style="color: #004080;">integer</span> <span style="color: #000000;">was_sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sdx</span> <span style="color: #000080;font-style:italic;">-- in case of failure</span>
if not string(r1) then
<span style="color: #004080;">object</span> <span style="color: #000000;">r1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
for i=1 to length(rule) do
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #004080;">string</span><span style="color: #0000FF;">(</span><span style="color: #000000;">r1</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
if not applies(rule[i]) then
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
sdx = was_sdx
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">applies</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">then</span>
return false
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">was_sdx</span>
end if
<span style="color: #008080;">return</span> <span style="color: #004600;">false</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
elsif r1="terminal" then
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
skip_spaces()
<span style="color: #008080;">elsif</span> <span style="color: #000000;">r1</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"terminal"</span> <span style="color: #008080;">then</span>
for i=1 to length(rule[2]) do
<span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
if sdx>length(src)
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">do</span>
or src[sdx]!=rule[2][i] then
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span>
sdx = was_sdx
<span style="color: #008080;">or</span> <span style="color: #000000;">src</span><span style="color: #0000FF;">[</span><span style="color: #000000;">sdx</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
return false
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">was_sdx</span>
end if
<span style="color: #008080;">return</span> <span style="color: #004600;">false</span>
sdx += 1
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end for
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
elsif r1="or" then
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
for i=2 to length(rule) do
<span style="color: #008080;">elsif</span> <span style="color: #000000;">r1</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"or"</span> <span style="color: #008080;">then</span>
if applies(rule[i]) then
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
return true
<span style="color: #008080;">if</span> <span style="color: #000000;">applies</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">return</span> <span style="color: #004600;">true</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
sdx = was_sdx
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
return false
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">was_sdx</span>
elsif r1="repeat" then
<span style="color: #008080;">return</span> <span style="color: #004600;">false</span>
while applies(rule[2]) do
<span style="color: #008080;">elsif</span> <span style="color: #000000;">r1</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"repeat"</span> <span style="color: #008080;">then</span>
end while
<span style="color: #008080;">while</span> <span style="color: #000000;">applies</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">do</span>
elsif r1="optional" then
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if applies(rule[2]) then
<span style="color: #008080;">elsif</span> <span style="color: #000000;">r1</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"optional"</span> <span style="color: #008080;">then</span>
end if
<span style="color: #008080;">if</span> <span style="color: #000000;">applies</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">then</span>
elsif r1="ident" then
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
integer i = rule[3],
<span style="color: #008080;">elsif</span> <span style="color: #000000;">r1</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"ident"</span> <span style="color: #008080;">then</span>
ii = ididx[i]
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">rule</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">],</span>
if not applies(productions[ii][3]) then
<span style="color: #000000;">ii</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ididx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
sdx = was_sdx
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">applies</span><span style="color: #0000FF;">(</span><span style="color: #000000;">productions</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ii</span><span style="color: #0000FF;">][</span><span style="color: #000000;">3</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">then</span>
return false
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">was_sdx</span>
end if
<span style="color: #008080;">return</span> <span style="color: #004600;">false</span>
else
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
?9/0
<span style="color: #008080;">else</span>
end if
<span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span>
return true
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end function
<span style="color: #008080;">return</span> <span style="color: #004600;">true</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
procedure check_valid(string test)
src = test
<span style="color: #008080;">procedure</span> <span style="color: #000000;">check_valid</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">test</span><span style="color: #0000FF;">)</span>
sdx = 1
<span style="color: #000000;">src</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">test</span>
bool res = applies(productions[1][3])
<span style="color: #000000;">sdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
skip_spaces()
<span style="color: #004080;">bool</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">applies</span><span style="color: #0000FF;">(</span><span style="color: #000000;">productions</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">3</span><span style="color: #0000FF;">])</span>
if sdx<=length(src) then res = false end if
<span style="color: #000000;">skip_spaces</span><span style="color: #0000FF;">()</span>
?{test,{"pass","fail"}[2-res]}
<span style="color: #008080;">if</span> <span style="color: #000000;">sdx</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end procedure
<span style="color: #0000FF;">?{</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">"pass"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"fail"</span><span style="color: #0000FF;">}[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">-</span><span style="color: #000000;">res</span><span style="color: #0000FF;">]}</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
constant ebnf = {"""
"a" {
<span style="color: #008080;">constant</span> <span style="color: #000000;">ebnf</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"""
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "za" """,{
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
"""
} "z" """</span><span style="color: #0000FF;">,</span>
{
<span style="color: #008000;">"""
expr = term { plus term } .
{
term = factor { times factor } .
factor expr = numberterm |{ '('plus exprterm ')'} .
term = factor { times factor } .
plus factor = "+"number | "-"'(' expr ')' .
times = "*" | "/" .
plus = "+" | "-" .
number =times digit= {"*" digit| }"/" .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
number = digit { digit } .
}""",
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
`a = "1"`,
}"""</span><span style="color: #0000FF;">,</span>
`{ a = "1" ;`,
<span style="color: #008000;">`a = "1"`</span><span style="color: #0000FF;">,</span>
`{ hello world = "1"; }`,
<span style="color: #008000;">`{ a = "1" ;`</span><span style="color: #0000FF;">,</span>
`{ foo = bar . }`
<span style="color: #008000;">`{ hello world = "1"; }`</span><span style="color: #0000FF;">,</span>
}
<span style="color: #008000;">`{ foo = bar . }`</span>
 
<span style="color: #0000FF;">}</span>
constant tests = { {"a1a3a4a4a5a6",
"a1 a2a6",
<span style="color: #008080;">constant</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"a1a3a4a4a5a6"</span><span style="color: #0000FF;">,</span>
"a1 a3 a4 a6",
<span style="color: #008000;">"a1 a4a2a6"</span><span a5style="color: a6#0000FF;">,</span>
<span style="color: #008000;">"a1 a2a3 a4 a5a6"</span><span a5style="color: a6#0000FF;">,</span>
<span style="color: #008000;">"a1 a2 a4 a5 a6"</span><span style="color: a7#0000FF;">,</span>
<span style="yourcolor: ad#008000;">"a1 a2 a4 a5 a5 a6"</span><span style="color: here#0000FF;"}>,</span>
<span style="color: #008000;">"a1 a2 a4 a5 a6 a7"</span><span style="color: #0000FF;">,</span>
{"2",
<span style="2*3color: +#008000;">"your 4ad here"</23span><span -style="color: 7#0000FF;">},</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">"2"</span><span style="color: #0000FF;">,</span>
"(3 + 4) * 6-2+(4*(4))",
<span style="color: #008000;">"-2*3 + 4/23 - 7"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"(3 + 4) * 6-2+(4*(4))"</span><span style="color: #0000FF;">,</span>
<span style="(4color: +#008000;">"-2"</span><span style="color: 3#0000FF;"}}>,</span>
<span style="color: #008000;">"3 +"</span><span style="color: #0000FF;">,</span>
 
<span style="color: #008000;">"(4 + 3"</span><span style="color: #0000FF;">}}</span>
for i=1 to length(ebnf) do
if parse(ebnf[i])=+1 then
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ebnf</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
?"tests:"
<span style="color: #008080;">if</span> <span style="color: #000000;">parse</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ebnf</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])=+</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
for j=1 to length(tests[i]) do
<span style="color: #0000FF;">?</span><span style="color: #008000;">"tests:"</span>
check_valid(tests[i][j])
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">do</span>
end for
<span style="color: #000000;">check_valid</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">])</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end for</lang>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</syntaxhighlight>-->
In real use, I would be tempted to use numeric literals rather than string tags in such structures, but the latter certainly make things ten times easier to debug, plus I got an instantly legible syntax tree dump (the bit just after "===>" below) practically for free.
{{out}}
Line 1,597 ⟶ 1,904:
 
=={{header|PicoLisp}}==
<langsyntaxhighlight PicoLisplang="picolisp">(de EBNF
"expr : term ( ( PLUS | MINUS ) term )* ;"
"term : factor ( ( MULT | DIV ) factor )* ;"
Line 1,606 ⟶ 1,913:
(unless (and (match '(@S : @E ;) (str E)) (not (cdr @S)))
(quit "Invalid EBNF" E) )
(put (car @S) 'ebnf @E) ) )</langsyntaxhighlight>
<langsyntaxhighlight PicoLisplang="picolisp">(de matchEbnf (Pat)
(cond
((asoq Pat '((PLUS . +) (MINUS . -) (MULT . *) (DIV . /)))
Line 1,649 ⟶ 1,956:
(let *Lst (str Str "")
(catch NIL
(parseLst (get 'expr 'ebnf)) ) ) )</langsyntaxhighlight>
Output:
<pre>: (parseEbnf "1 + 2 * -3 / 7 - 3 * 4")
Line 1,658 ⟶ 1,965:
{{works with|Rakudo|2019.03.1}}
 
This parses the EBNF rule set using a perl 6Raku grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods.
It is implemented and exercised using the flavor of EBNF and test cases specified on the [[Parse EBNF/Tests|test page]].
 
<syntaxhighlight lang="raku" perl6line># A perl 6Raku grammar to parse EBNF
grammar EBNF {
rule TOP { ^ <title>? '{' [ <production> ]+ '}' <comment>? $ }
rule production { <name> '=' <expression> <[.;]> }
rule expression { <term> +% "|" }
rule term { <factor>+ }
rule factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
rule group { '(' <expression> ')' }
rule repeat { '{' <expression> '}' }
rule optional { '[' <expression> ']' }
token identifier { <-[\|\(\)\{\}\[\]\.\;\"\'\s]>+ } #"
token literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #"
token title { <literal> }
token comment { <literal> }
token name { <identifier> <?before \h* '='> }
}
 
Line 1,686 ⟶ 1,993:
">]+\$\}\n " ~ $<production>>>.ast ~ "\}"
}
method production($/) {
make 'token ' ~ $<name> ~ ' {' ~
$<expression>.ast ~ "}\n"
}
method expression($/) { make join '|', $<term>>>.ast }
method term($/) { make join '\h*', $<factor>>>.ast }
method factor($/) {
make $<literal> ?? $<literal> !!
$<group> ?? '[' ~ $<group>.ast ~ ']' !!
Line 1,699 ⟶ 2,006:
'<' ~ $<identifier> ~ '>'
}
method repeat($/) { make $<expression>.ast }
method optional($/) { make $<expression>.ast }
method group($/) { make $<expression>.ast }
}
 
Line 1,729 ⟶ 2,036:
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
Line 1,794 ⟶ 2,101:
$fh.say( qq| "\\n"} |);
$fh.close;
say qqx/perl6raku $fn/;
say '*' x 79, "\n";
unlink $fn;
}</langsyntaxhighlight>
 
Output:
Line 2,068 ⟶ 2,375:
{{in progress|lang=Ruby|day=12|month=May|year=2011}}
{{incomplete|Ruby|The tokenizer is here, but the parser is very incomplete.}}
<langsyntaxhighlight lang="ruby">#--
# The tokenizer splits the input into Tokens like "identifier",
# ":", ")*" and so on. This design uses a StringScanner on each line of
Line 2,206 ⟶ 2,513:
parse
end
</syntaxhighlight>
</lang>
 
=={{header|Tcl}}==
Line 2,212 ⟶ 2,519:
 
Demonstration lexer and parser. Note that this parser supports parenthesized expressions, making the grammar recursive.
<langsyntaxhighlight lang="tcl">package require Tcl 8.6
 
# Utilities to make the coroutine easier to use
Line 2,346 ⟶ 2,653:
}
throw SYNTAX "\"$payload\" at position $index"
}</langsyntaxhighlight>
 
<langsyntaxhighlight lang="tcl"># Demonstration code
puts [parse "1 - 2 - -3 * 4 + 5"]
puts [parse "1 - 2 - -3 * (4 + 5)"]</langsyntaxhighlight>
Output:
<pre>
PLUS {MINUS {MINUS 1 2} {MULT -3 4}} 5
MINUS {MINUS 1 2} {MULT -3 {PLUS 4 5}}
</pre>
 
=={{header|Wren}}==
{{trans|Phix}}
{{libheader|Wren-fmt}}
{{libheader|Wren-iterate}}
{{libheader|Wren-str}}
{{libheader|Wren-seq}}
Translated via the Go entry.
<syntaxhighlight lang="wren">import "./fmt" for Conv, Fmt
import "./iterate" for Stepped
import "./str" for Char
import "./seq" for Lst
 
var src = ""
var ch = ""
var sdx = 0
var token = null
var isSeq = false
var err = false
var idents = []
var ididx = []
var productions = []
var extras = []
var results = ["pass", "fail"]
 
var invalid = Fn.new { |msg|
err = true
System.print(msg)
sdx = src.count // set to EOF
return -1
}
 
var skipSpaces = Fn.new {
while (true) {
if (sdx >= src.count) return
ch = src[sdx]
if (!" \t\r\n".contains(ch)) return
sdx = sdx + 1
}
}
 
var getToken = Fn.new {
// Yields a single character token, one of {}()[]|=.;
// or {"terminal",string} or {"ident", string} or -1.
skipSpaces.call()
if (sdx >= src.count) {
token = -1
isSeq = false
return
}
var tokstart = sdx
if ("{}()[]|=.;".contains(ch)) {
sdx = sdx + 1
token = ch
isSeq = false
} else if (ch == "\"" || ch == "'") {
var closech = ch
for (tokend in Stepped.ascend(sdx + 1...src.count)) {
if (src[tokend] == closech) {
sdx = tokend + 1
token = ["terminal", src[tokstart+1...tokend]]
isSeq = true
return
}
}
token = invalid.call("no closing quote")
isSeq = false
} else if (Char.isAsciiLower(ch)) {
// To simplify things for the purposes of this task,
// identifiers are strictly a-z only, not A-Z or 1-9.
while (true) {
sdx = sdx + 1
if (sdx >= src.count) break
ch = src[sdx]
if (!Char.isAsciiLower(ch)) break
}
token = ["ident", src[tokstart...sdx]]
isSeq = true
} else {
token = invalid.call("invalid ebnf")
isSeq = false
}
}
 
var matchToken = Fn.new { |ch|
if (token != ch) {
token = invalid.call("invalid ebnf (%(ch) expected)")
isSeq = false
} else {
getToken.call()
}
}
 
var addIdent = Fn.new { |ident|
var k = -1
var i = 0
for (id in idents) {
if (id == ident) {
k = i
break
}
i = i + 1
}
if (k == -1) {
idents.add(ident)
k = idents.count - 1
ididx.add(-1)
}
return k
}
 
var expression // forward declaration
 
var factor = Fn.new {
var res
if (isSeq) {
if (token[0] == "ident") {
var idx = addIdent.call(token[1])
token.add(idx)
}
res = token
getToken.call()
} else if (token == "[") {
getToken.call()
res = ["optional", expression.call()]
matchToken.call("]")
} else if (token == "(") {
getToken.call()
res = expression.call()
matchToken.call(")")
} else if (token == "{") {
getToken.call()
res = ["repeat", expression.call()]
matchToken.call("}")
} else {
Fiber.abort("invalid token in factor() function")
}
if (res is Sequence && res.count == 1) return res[0]
return res
}
 
var term = Fn.new {
var res = [factor.call()]
var tokens = [-1, "|", ".", ";", ")", "]", "}"]
while (true) {
var outer = false
for (t in tokens) {
if (t == token) {
outer = true
break
}
}
if (outer) break
res.add(factor.call())
}
if (res.count == 1) return res[0]
return res
}
 
// declared earlier
expression = Fn.new {
var res = [term.call()]
if (token == "|") {
res = ["or", res[0]]
while (token == "|") {
getToken.call()
res.add(term.call())
}
}
if (res.count == 1) return res[0]
return res
}
 
var production = Fn.new {
// Returns a token or -1; the real result is left in 'productions' etc,
getToken.call()
if (token != "}") {
if (token == -1) return invalid.call("invalid ebnf (missing closing })")
if (!isSeq) return -1
if (token[0] != "ident") return -1
var ident = token[1]
var idx = addIdent.call(ident)
getToken.call()
matchToken.call("=")
if (token == -1) return -1
productions.add([ident, idx, expression.call()])
ididx[idx] = productions.count - 1
}
return token
}
 
// Adjusts Wren's normal printing of lists to look more like Phix output.
var pprint = Fn.new { |ob, header|
Fmt.print("\n$s:", header)
var quote // recursive closure
quote = Fn.new { |list|
for (i in 0...list.count) {
if (list[i] is String) {
list[i] = Fmt.swrite("$q", list[i])
} else if (list[i] is List) quote.call(list[i])
}
}
var obs
if (ob is String) {
obs = Fmt.swrite("$q", ob)
} else if (ob is List) {
obs = Lst.clone(ob)
quote.call(obs)
}
var pp = obs.toString
pp = pp.replace("[", "{")
pp = pp.replace("]", "}")
for (i in 0...idents.count) {
var xs = Fmt.swrite("'\\x$02d'", i)
pp = pp.replace(xs, i.toString)
}
System.print(pp)
}
 
var parse = Fn.new { |ebnf|
// Returns +1 if ok, -1 if bad.
System.print("parse:\n%(ebnf) ===>")
err = false
src = ebnf
sdx = 0
idents.clear()
ididx.clear()
productions.clear()
extras.clear()
getToken.call()
if (isSeq) {
token[0] = "title"
extras.add(token)
getToken.call()
}
if (token != "{") return invalid.call("invalid ebnf (missing opening {)")
while (true) {
token = production.call()
if (token == "}" || token == -1) break
}
getToken.call()
if (isSeq) {
token[0] = "comment"
extras.add(token)
getToken.call()
}
if (token != -1) return invalid.call("invalid ebnf (missing eof?)")
if (err) return -1
var k = -1
var i = 0
for (idx in ididx) {
if (idx == -1) {
k = i
break
}
i = i + 1
}
if (k != -1) return invalid.call("invalid ebnf (undefined:%(idents[k]))")
pprint.call(productions, "productions")
pprint.call(idents, "idents")
pprint.call(ididx, "ididx")
pprint.call(extras, "extras")
return 1
}
 
// The rules that applies() has to deal with are:
// {factors} - if rule[0] is not string,
// just apply one after the other recursively.
// {"terminal", "a1"} -- literal constants
// {"or", <e1>, <e2>, ...} -- (any) one of n
// {"repeat", <e1>} -- as per "{}" in ebnf
// {"optional", <e1>} -- as per "[]" in ebnf
// {"ident", <name>, idx} -- apply the sub-rule
var applies // recursive function
applies = Fn.new { |rule|
var wasSdx = sdx // in case of failure
var r1 = rule[0]
if (!(r1 is String)) {
for (i in 0...rule.count) {
if (!applies.call(rule[i])) {
sdx = wasSdx
return false
}
}
} else if (r1 == "terminal") {
skipSpaces.call()
var r2 = rule[1]
for (i in 0...r2.count) {
if (sdx >= src.count || src[sdx] != r2[i]) {
sdx = wasSdx
return false
}
sdx = sdx + 1
}
} else if (r1 == "or") {
for (i in Stepped.ascend(1...rule.count)) {
if (applies.call(rule[i])) return true
}
sdx = wasSdx
return false
} else if (r1 == "repeat") {
while (applies.call(rule[1])) {}
} else if (r1 == "optional") {
applies.call(rule[1])
} else if (r1 == "ident") {
var i = rule[2]
var ii = ididx[i]
if (!(applies.call(productions[ii][2]))) {
sdx = wasSdx
return false
}
} else {
Fiber.abort("invalid rule in applies() function")
}
return true
}
 
var checkValid = Fn.new { |test|
src = test
sdx = 0
var res = applies.call(productions[0][2])
skipSpaces.call()
if (sdx < src.count) res = false
Fmt.print("$q, $s", test, results[1-Conv.btoi(res)])
}
 
var ebnfs = [
""""a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" """,
"""{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}""",
"a = \"1\"",
"""{ a = "1" ;""",
"""{ hello world = "1"; }""",
"""{ foo = bar . }"""
]
 
var tests = [
[
"a1a3a4a4a5a6",
"a1 a2a6",
"a1 a3 a4 a6",
"a1 a4 a5 a6",
"a1 a2 a4 a5 a5 a6",
"a1 a2 a4 a5 a6 a7",
"your ad here"
],
[
"2",
"2*3 + 4/23 - 7",
"(3 + 4) * 6-2+(4*(4))",
"-2",
"3 +",
"(4 + 3"
]
]
 
var i = 0
for (ebnf in ebnfs) {
if (parse.call(ebnf) == 1) {
System.print("\ntests:")
for (test in tests[i]) checkValid.call(test)
}
System.print()
i = i + 1
}</syntaxhighlight>
 
{{out}}
<pre>
parse:
"a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" ===>
 
productions:
{{"a", 0, {{"terminal", "a1"}, {"or", {"terminal", "a2"}, {"terminal", "a3"}}, {"repeat", {"terminal", "a4"}}, {"optional", {"terminal", "a5"}}, {"terminal", "a6"}}}}
 
idents:
{"a"}
 
ididx:
{0}
 
extras:
{{"title", "a"}, {"comment", "z"}}
 
tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail
 
parse:
{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>
 
productions:
{{"expr", 0, {{"ident", "term", 1}, {"repeat", {{"ident", "plus", 2}, {"ident", "term", 1}}}}}, {"term", 1, {{"ident", "factor", 3}, {"repeat", {{"ident", "times", 4}, {"ident", "factor", 3}}}}}, {"factor", 3, {"or", {"ident", "number", 5}, {{"terminal", "("}, {"ident", "expr", 0}, {"terminal", ")"}}}}, {"plus", 2, {"or", {"terminal", "+"}, {"terminal", "-"}}}, {"times", 4, {"or", {"terminal", "*"}, {"terminal", "/"}}}, {"number", 5, {{"ident", "digit", 6}, {"repeat", {"ident", "digit", 6}}}}, {"digit", 6, {"or", {"terminal", "0"}, {"terminal", "1"}, {"terminal", "2"}, {"terminal", "3"}, {"terminal", "4"}, {"terminal", "5"}, {"terminal", "6"}, {"terminal", "7"}, {"terminal", "8"}, {"terminal", "9"}}}}
 
idents:
{"expr", "term", "plus", "factor", "times", "number", "digit"}
 
ididx:
{0, 1, 3, 2, 4, 5, 6}
 
extras:
{}
 
tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail
 
parse:
a = "1" ===>
invalid ebnf (missing opening {)
 
parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })
 
parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)
 
parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)
</pre>
3,038

edits