Parse EBNF: Difference between revisions

4,592 bytes added ,  11 years ago
→‎{{header|Perl 6}}: Updated to work with current specs (repetion operator changed). Also displays syntax tree now
(→‎{{header|Perl 6}}: Add Perl 6 example)
(→‎{{header|Perl 6}}: Updated to work with current specs (repetion operator changed). Also displays syntax tree now)
Line 440:
 
=={{header|Perl 6}}==
{{works with|Rakudo|20112012.0705}}
This is a fairly naive implementation of an EBNF parser. It works, but takes some shortcuts and implements a subset of EBNF. The biggest restriction is that identifiers can only contain alpha-numeric characters rather than anything but EBNF operators: |(){}[];,"' So identifiers like <?-&#@>, though technically correct, won't work.
 
This parses the EBNF rule set using a perl 6 grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods.
Line 447 ⟶ 446:
<lang perl6># A perl 6 grammar to parse EBNF
grammar EBNF {
rule TOP { ^^ <title>? '{' [ <rulesetproduction> ]+ '}' <comment>?$ $ }
rule rulesetproduction { <name> '=' <expression> <[.;]> }
rule expression { <term> **+% "|" }
rule term { <factor>+ }
rule factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
Line 455 ⟶ 454:
rule repeat { '{' <expression> '}' }
rule optional { '[' <expression> ']' }
token identifier { <-[\w|\(\)\{\}\[\]\.\;\"\'\s]>+ } #'" Defeat confused syntax highlighting
token literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #'" bogus comment to defeatDefeat confused syntax highlighterhighlighting
token title { <literal> }
token comment { <literal> }
token name { <identifier> <?before \h* '='> }
}
 
# And actions to build a EBNF parser
class EBNF::Actions {
method TOP($/) {
mysay @top"Syntax Tree:\n", $/; # Dump the syntax tree to STDOUT
my $grammar = $/;
$grammar.=subst(/<-[\{]>*\{\s*/, '');
for $grammar.split(/\n\h*\n/)[0] -> $f {
for $f.split(/\n\h*/) -> $g {
next if $g ~~ /^\W/;
@top.push: '<' ~ $g.split(' =')[0] ~ '>';
}
}
make 'grammar ' ~
($<title>.subst(/\W/, '', :g) || 'anonymousunnamed') ~
" \{\n rule TOP \{^[<" ~ (join '|', @top )$/<production>[0]<name> ~
">]+\$\}\n " ~ $<rulesetproduction>>>.ast ~ "\n\}"
}
method rulesetproduction($/) {
make 'ruletoken ' ~ $<name> ~ ' {' ~
$<expression>.ast ~ "\\h*}\n"
}
method expression($/) { make join '|', $<term>>>.ast }
method term($/) { make join '\h*', $<factor>>>.ast }
method factor($/) { make $<literal> ?? $<literal> !!
make $<groupliteral> ?? '[' ~ $<groupliteral>.ast ~ ']' !!
$<repeatgroup> ?? '[' ~ $<repeatgroup>.ast ~ ']*' !!
$<optionalrepeat> ?? '[' ~ $<optionalrepeat>.ast ~ '\\s*]?*' !!
$<optional> ?? '<[' ~ $<identifieroptional>.ast ~ '>]?' !!
'<' ~ $<identifier> ~ '>'
}
method repeat($/) { make $<expression>.ast }
Line 496 ⟶ 487:
}
 
# An array of test cases
# Now test as follows
my @tests = (
{
Line 509 ⟶ 500:
'a1 a3 a4 a6',
'a1 a4 a5 a6',
'a1 a2 a4 a4 a5 a6',
'a1 a2 a4 a5 a5 a6',
'a1 a2 a4 a5 a6 a7',
Line 539 ⟶ 531:
{
ebnf => q<a = "1";>,
teststrings => ['foofoobar']
},
{
ebnf => q<{ a = "1" ;>,
teststrings => ['foofoobar']
},
{
ebnf => q<{ hello world = "1"; }>,
teststrings => ['foofoobar']
},
{
ebnf => q<{ foo = bar . }>,
teststrings => ['foofoobar']
},
);
 
# Test the parser.
my $i = 1;
for @tests -> $test {
mysay $a'*' =x EBNF::Actions79;
unless EBNF.parse($test<ebnf>) {
say "TestingParsing EBNF grammar:\n";
say "{$test<ebnf>.subst(/^|(\n)^\h*/, -> $/ {$0}'', :g)}\n";
say "Invalid EBNF grammarsyntax. Can not be parsed.\n";
say '*' x 6079;
next;
}
my $p = EBNF.parse($test<ebnf>, :actions($a));
my $p = EBNF.parse($test<ebnf>, :actions(EBNF::Actions));
my $grammar = $p.ast;
$grammar ~~ m/^['grammar '](\w+)/;
my $title = $0.Str;
my $fn = 'EBNFtest'~$i++;
my $fh = open($fn, :w) or die "$!\n";
$fh.say( $grammar );
$fh.say( qqq|say "TestingParsing EBNF grammar '$title':\\n";| );
$fh.say( qq|say q<{$test<ebnf>.subst(/^|(\n)^\h*/,->$/{$0}'',:g)}>,"\n";| );
$fh.say( q|say "Parses as valid\nValid EBNFsyntax.\n\nTesting:\n";| );
$fh.say( q|sayCATCH { '-'die x$_ 60};| );
my $len = [max] $test<teststrings>.flat>>.chars;
for $test<teststrings>.flat -> $s {
$fh.say( qq|printf "%{$len}s is %svalid.\n", '{$s}',;\n| ~
qq|printf " - %s\\n", try ({$title}.parse('{$s}'))| ?? '' !! 'NOT ';|);~
qq| ?? 'valid.' !! 'NOT valid.';|
);
}
$fh.close;
say qqx/perl6 $fn/;
say '*' x 6079;
unlink $fn;
}
}</lang>
 
Output:
<pre>
*******************************************************************************
Testing EBNF grammar:
Syntax Tree:
q["a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"]
title => q["a"]
literal => q["a"]
production => q[a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
]
name => q[a]
identifier => q[a]
expression => q["a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ]
term => q["a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ]
factor => q["a1" ]
literal => q["a1"]
factor => q[( "a2" | "a3" ) ]
group => q[( "a2" | "a3" ) ]
expression => q["a2" | "a3" ]
term => q["a2" ]
factor => q["a2" ]
literal => q["a2"]
term => q[ "a3" ]
factor => q["a3" ]
literal => q["a3"]
factor => q[{ "a4" } ]
repeat => q[{ "a4" } ]
expression => q["a4" ]
term => q["a4" ]
factor => q["a4" ]
literal => q["a4"]
factor => q[[ "a5" ] ]
optional => q[[ "a5" ] ]
expression => q["a5" ]
term => q["a5" ]
factor => q["a5" ]
literal => q["a5"]
factor => q["a6" ]
literal => q["a6"]
comment => q["z"]
literal => q["z"]
 
Parsing EBNF grammar 'a':
 
"a" {
Line 595 ⟶ 634:
} "z"
 
Valid syntax.
Parses as valid EBNF.
------------------------------------------------------------
a1a3a4a4a5a6 is valid.
a1 a2a6 is valid.
a1 a3 a4 a6 is valid.
a1 a4 a5 a6 is NOT valid.
a1 a2 a4 a5 a5 a6 is NOT valid.
a1 a2 a4 a5 a6 a7 is NOT valid.
your ad here is NOT valid.
 
Testing:
************************************************************
 
Testing EBNF grammar:
a1a3a4a4a5a6 - valid.
a1 a2a6 - valid.
a1 a3 a4 a6 - valid.
a1 a4 a5 a6 - NOT valid.
a1 a2 a4 a4 a5 a6 - valid.
a1 a2 a4 a5 a5 a6 - NOT valid.
a1 a2 a4 a5 a6 a7 - NOT valid.
your ad here - NOT valid.
 
*******************************************************************************
*******************************************************************************
Syntax Tree:
q[{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .
 
plus = "+" | "-" .
times = "*" | "/" .
 
number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}]
production => q[expr = term { plus term } .
]
name => q[expr]
identifier => q[expr]
expression => q[term { plus term } ]
term => q[term { plus term } ]
factor => q[term ]
identifier => q[term]
factor => q[{ plus term } ]
repeat => q[{ plus term } ]
expression => q[plus term ]
term => q[plus term ]
factor => q[plus ]
identifier => q[plus]
factor => q[term ]
identifier => q[term]
production => q[term = factor { times factor } .
]
name => q[term]
identifier => q[term]
expression => q[factor { times factor } ]
term => q[factor { times factor } ]
factor => q[factor ]
identifier => q[factor]
factor => q[{ times factor } ]
repeat => q[{ times factor } ]
expression => q[times factor ]
term => q[times factor ]
factor => q[times ]
identifier => q[times]
factor => q[factor ]
identifier => q[factor]
production => q[factor = number | '(' expr ')' .
 
]
name => q[factor]
identifier => q[factor]
expression => q[number | '(' expr ')' ]
term => q[number ]
factor => q[number ]
identifier => q[number]
term => q[ '(' expr ')' ]
factor => q['(' ]
literal => q['(']
factor => q[expr ]
identifier => q[expr]
factor => q[')' ]
literal => q[')']
production => q[plus = "+" | "-" .
]
name => q[plus]
identifier => q[plus]
expression => q["+" | "-" ]
term => q["+" ]
factor => q["+" ]
literal => q["+"]
term => q[ "-" ]
factor => q["-" ]
literal => q["-"]
production => q[times = "*" | "/" .
 
]
name => q[times]
identifier => q[times]
expression => q["*" | "/" ]
term => q["*" ]
factor => q["*" ]
literal => q["*"]
term => q[ "/" ]
factor => q["/" ]
literal => q["/"]
production => q[number = digit { digit } .
]
name => q[number]
identifier => q[number]
expression => q[digit { digit } ]
term => q[digit { digit } ]
factor => q[digit ]
identifier => q[digit]
factor => q[{ digit } ]
repeat => q[{ digit } ]
expression => q[digit ]
term => q[digit ]
factor => q[digit ]
identifier => q[digit]
production => q[digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
]
name => q[digit]
identifier => q[digit]
expression => q["0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ]
term => q["0" ]
factor => q["0" ]
literal => q["0"]
term => q[ "1" ]
factor => q["1" ]
literal => q["1"]
term => q[ "2" ]
factor => q["2" ]
literal => q["2"]
term => q[ "3" ]
factor => q["3" ]
literal => q["3"]
term => q[ "4" ]
factor => q["4" ]
literal => q["4"]
term => q[ "5" ]
factor => q["5" ]
literal => q["5"]
term => q[ "6" ]
factor => q["6" ]
literal => q["6"]
term => q[ "7" ]
factor => q["7" ]
literal => q["7"]
term => q[ "8" ]
factor => q["8" ]
literal => q["8"]
term => q[ "9" ]
factor => q["9" ]
literal => q["9"]
 
Parsing EBNF grammar 'unnamed':
 
{
Line 620 ⟶ 796:
}
 
Valid syntax.
Parses as valid EBNF.
------------------------------------------------------------
2 is valid.
2*3 + 4/23 - 7 is valid.
(3 + 4) * 6-2+(4*(4)) is valid.
-2 is NOT valid.
3 + is NOT valid.
(4 + 3 is NOT valid.
 
Testing:
************************************************************
 
Testing EBNF grammar:
2 - valid.
2*3 + 4/23 - 7 - valid.
(3 + 4) * 6-2+(4*(4)) - valid.
-2 - NOT valid.
3 + - NOT valid.
(4 + 3 - NOT valid.
 
*******************************************************************************
*******************************************************************************
Parsing EBNF grammar:
 
a = "1";
 
Invalid EBNF grammarsyntax. Can not be parsed.
 
************************************************************
*******************************************************************************
Testing EBNF grammar:
*******************************************************************************
Parsing EBNF grammar:
 
{ a = "1" ;
 
Invalid EBNF grammarsyntax. Can not be parsed.
 
************************************************************
*******************************************************************************
Testing EBNF grammar:
*******************************************************************************
Parsing EBNF grammar:
 
{ hello world = "1"; }
 
Invalid EBNF grammarsyntax. Can not be parsed.
 
************************************************************
*******************************************************************************
Testing EBNF grammar:
*******************************************************************************
Syntax Tree:
q[{ foo = bar . }]
production => q[foo = bar . ]
name => q[foo]
identifier => q[foo]
expression => q[bar ]
term => q[bar ]
factor => q[bar ]
identifier => q[bar]
 
Parsing EBNF grammar 'unnamed':
 
{ foo = bar . }
 
Valid syntax.
Parses as valid EBNF.
------------------------------------------------------------
Method 'bar' not found for invocant of class 'anonymous'
in 'anonymous::foo' at line 3:EBNFtest3
in 'anonymous::TOP' at line 2:EBNFtest3
in 'Grammar::parse' at line 6466:src/gen/core.pm
in main program body at line 12:EBNFtest3
 
Testing:
************************************************************
</pre>
 
foobar - No such method 'bar' for invocant of type 'unnamed'
 
*******************************************************************************
</pre>
 
=={{header|PicoLisp}}==
10,333

edits