Parse EBNF: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) (→{{header|Perl 6}}: Add Perl 6 example) |
Thundergnat (talk | contribs) (→{{header|Perl 6}}: Updated to work with current specs (repetion operator changed). Also displays syntax tree now) |
||
Line 440: | Line 440: | ||
=={{header|Perl 6}}== |
=={{header|Perl 6}}== |
||
{{works with|Rakudo| |
{{works with|Rakudo|2012.05}} |
||
This is a fairly naive implementation of an EBNF parser. It works, but takes some shortcuts and implements a subset of EBNF. The biggest restriction is that identifiers can only contain alpha-numeric characters rather than anything but EBNF operators: |(){}[];,"' So identifiers like <?-&#@>, though technically correct, won't work. |
|||
This parses the EBNF rule set using a perl 6 grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods. |
This parses the EBNF rule set using a perl 6 grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods. |
||
Line 447: | Line 446: | ||
<lang perl6># A perl 6 grammar to parse EBNF |
<lang perl6># A perl 6 grammar to parse EBNF |
||
grammar EBNF { |
grammar EBNF { |
||
rule TOP { ^ |
rule TOP { ^ <title>? '{' [ <production> ]+ '}' <comment>? $ } |
||
rule |
rule production { <name> '=' <expression> <[.;]> } |
||
rule expression { <term> |
rule expression { <term> +% "|" } |
||
rule term { <factor>+ } |
rule term { <factor>+ } |
||
rule factor { <group> | <repeat> | <optional> | <identifier> | <literal> } |
rule factor { <group> | <repeat> | <optional> | <identifier> | <literal> } |
||
Line 455: | Line 454: | ||
rule repeat { '{' <expression> '}' } |
rule repeat { '{' <expression> '}' } |
||
rule optional { '[' <expression> ']' } |
rule optional { '[' <expression> ']' } |
||
token identifier { \ |
token identifier { <-[\|\(\)\{\}\[\]\.\;\"\'\s]>+ } #'" Defeat confused syntax highlighting |
||
token literal { "'" <-[']>+ "'" | '"' <-["]>+ '"' } #" |
token literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #'" Defeat confused syntax highlighting |
||
token title { <literal> } |
token title { <literal> } |
||
token comment { <literal> } |
token comment { <literal> } |
||
token name { <identifier> } |
token name { <identifier> <?before \h* '='> } |
||
} |
} |
||
# And actions to build a EBNF parser |
|||
class EBNF::Actions { |
class EBNF::Actions { |
||
method TOP($/) { |
method TOP($/) { |
||
say "Syntax Tree:\n", $/; # Dump the syntax tree to STDOUT |
|||
my $grammar = $/; |
|||
$grammar.=subst(/<-[\{]>*\{\s*/, ''); |
|||
for $grammar.split(/\n\h*\n/)[0] -> $f { |
|||
for $f.split(/\n\h*/) -> $g { |
|||
next if $g ~~ /^\W/; |
|||
@top.push: '<' ~ $g.split(' =')[0] ~ '>'; |
|||
} |
|||
} |
|||
make 'grammar ' ~ |
make 'grammar ' ~ |
||
($<title>.subst(/\W/, '', :g) || ' |
($<title>.subst(/\W/, '', :g) || 'unnamed') ~ |
||
" \{\n rule TOP \{^[" ~ |
" \{\n rule TOP \{^[<" ~ $/<production>[0]<name> ~ |
||
"]+\$\}\n " ~ $< |
">]+\$\}\n " ~ $<production>>>.ast ~ "\}" |
||
} |
} |
||
method |
method production($/) { |
||
make ' |
make 'token ' ~ $<name> ~ ' {' ~ |
||
$<expression>.ast ~ " |
$<expression>.ast ~ "}\n" |
||
} |
} |
||
method expression($/) { make join '|', $<term>>>.ast } |
method expression($/) { make join '|', $<term>>>.ast } |
||
method term($/) { make join '\h*', $<factor>>>.ast } |
method term($/) { make join '\h*', $<factor>>>.ast } |
||
method factor($/) { |
method factor($/) { |
||
make $<literal> ?? $<literal> !! |
|||
$<group> ?? '[' ~ $<group>.ast ~ ']' !! |
|||
$<repeat> ?? '[' ~ $<repeat>.ast ~ '\\s*]*' !! |
|||
' |
$<optional> ?? '[' ~ $<optional>.ast ~ ']?' !! |
||
'<' ~ $<identifier> ~ '>' |
|||
} |
} |
||
method repeat($/) { make $<expression>.ast } |
method repeat($/) { make $<expression>.ast } |
||
Line 496: | Line 487: | ||
} |
} |
||
# An array of test cases |
|||
# Now test as follows |
|||
my @tests = ( |
my @tests = ( |
||
{ |
{ |
||
Line 509: | Line 500: | ||
'a1 a3 a4 a6', |
'a1 a3 a4 a6', |
||
'a1 a4 a5 a6', |
'a1 a4 a5 a6', |
||
'a1 a2 a4 a4 a5 a6', |
|||
'a1 a2 a4 a5 a5 a6', |
'a1 a2 a4 a5 a5 a6', |
||
'a1 a2 a4 a5 a6 a7', |
'a1 a2 a4 a5 a6 a7', |
||
Line 539: | Line 531: | ||
{ |
{ |
||
ebnf => q<a = "1";>, |
ebnf => q<a = "1";>, |
||
teststrings => [' |
teststrings => ['foobar'] |
||
}, |
}, |
||
{ |
{ |
||
ebnf => q<{ a = "1" ;>, |
ebnf => q<{ a = "1" ;>, |
||
teststrings => [' |
teststrings => ['foobar'] |
||
}, |
}, |
||
{ |
{ |
||
ebnf => q<{ hello world = "1"; }>, |
ebnf => q<{ hello world = "1"; }>, |
||
teststrings => [' |
teststrings => ['foobar'] |
||
}, |
}, |
||
{ |
{ |
||
ebnf => q<{ foo = bar . }>, |
ebnf => q<{ foo = bar . }>, |
||
teststrings => [' |
teststrings => ['foobar'] |
||
} |
} |
||
); |
); |
||
# Test the parser. |
|||
my $i = 1; |
my $i = 1; |
||
for @tests -> $test { |
for @tests -> $test { |
||
say '*' x 79; |
|||
unless EBNF.parse($test<ebnf>) { |
unless EBNF.parse($test<ebnf>) { |
||
say " |
say "Parsing EBNF grammar:\n"; |
||
say "{$test<ebnf>.subst(/^ |
say "{$test<ebnf>.subst(/^^\h*/,'',:g)}\n"; |
||
say "Invalid |
say "Invalid syntax. Can not be parsed.\n"; |
||
say '*' x |
say '*' x 79; |
||
next; |
next; |
||
} |
} |
||
my $p = EBNF.parse($test<ebnf>, :actions($a)); |
|||
my $p = EBNF.parse($test<ebnf>, :actions(EBNF::Actions)); |
|||
my $grammar = $p.ast; |
my $grammar = $p.ast; |
||
$grammar ~~ m/^ |
$grammar ~~ m/^'grammar '(\w+)/; |
||
my $title = $0 |
my $title = $0; |
||
my $fn = 'EBNFtest'~$i++; |
my $fn = 'EBNFtest'~$i++; |
||
my $fh = open($fn, :w) or die "$!\n"; |
my $fh = open($fn, :w) or die "$!\n"; |
||
$fh.say( $grammar); |
$fh.say( $grammar ); |
||
$fh.say( |
$fh.say( qq|say "Parsing EBNF grammar '$title':\\n";| ); |
||
$fh.say(qq|say q<{$test<ebnf>.subst(/^ |
$fh.say( qq|say q<{$test<ebnf>.subst(/^^\h*/,'',:g)}>;| ); |
||
$fh.say( q|say " |
$fh.say( q|say "\nValid syntax.\n\nTesting:\n";| ); |
||
$fh.say( q| |
$fh.say( q|CATCH { die $_ };| ); |
||
my $len = [max] $test<teststrings>.flat>>.chars; |
my $len = [max] $test<teststrings>.flat>>.chars; |
||
for $test<teststrings>.flat -> $s { |
for $test<teststrings>.flat -> $s { |
||
$fh.say( qq|printf "%{$len}s |
$fh.say( qq|printf "%{$len}s", '{$s}';\n| ~ |
||
qq|{$title}.parse('{$s}') |
qq|printf " - %s\\n", try ({$title}.parse('{$s}'))| ~ |
||
qq| ?? 'valid.' !! 'NOT valid.';| |
|||
); |
|||
} |
} |
||
$fh.close; |
$fh.close; |
||
say qqx/perl6 $fn/; |
say qqx/perl6 $fn/; |
||
say '*' x |
say '*' x 79; |
||
unlink $fn; |
unlink $fn; |
||
} |
|||
}</lang> |
}</lang> |
||
Output: |
Output: |
||
<pre> |
<pre> |
||
******************************************************************************* |
|||
Testing EBNF grammar: |
|||
Syntax Tree: |
|||
q["a" { |
|||
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ; |
|||
} "z"] |
|||
title => q["a"] |
|||
literal => q["a"] |
|||
production => q[a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ; |
|||
] |
|||
name => q[a] |
|||
identifier => q[a] |
|||
expression => q["a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ] |
|||
term => q["a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ] |
|||
factor => q["a1" ] |
|||
literal => q["a1"] |
|||
factor => q[( "a2" | "a3" ) ] |
|||
group => q[( "a2" | "a3" ) ] |
|||
expression => q["a2" | "a3" ] |
|||
term => q["a2" ] |
|||
factor => q["a2" ] |
|||
literal => q["a2"] |
|||
term => q[ "a3" ] |
|||
factor => q["a3" ] |
|||
literal => q["a3"] |
|||
factor => q[{ "a4" } ] |
|||
repeat => q[{ "a4" } ] |
|||
expression => q["a4" ] |
|||
term => q["a4" ] |
|||
factor => q["a4" ] |
|||
literal => q["a4"] |
|||
factor => q[[ "a5" ] ] |
|||
optional => q[[ "a5" ] ] |
|||
expression => q["a5" ] |
|||
term => q["a5" ] |
|||
factor => q["a5" ] |
|||
literal => q["a5"] |
|||
factor => q["a6" ] |
|||
literal => q["a6"] |
|||
comment => q["z"] |
|||
literal => q["z"] |
|||
Parsing EBNF grammar 'a': |
|||
"a" { |
"a" { |
||
Line 595: | Line 634: | ||
} "z" |
} "z" |
||
Valid syntax. |
|||
Parses as valid EBNF. |
|||
------------------------------------------------------------ |
|||
a1a3a4a4a5a6 is valid. |
|||
a1 a2a6 is valid. |
|||
a1 a3 a4 a6 is valid. |
|||
a1 a4 a5 a6 is NOT valid. |
|||
a1 a2 a4 a5 a5 a6 is NOT valid. |
|||
a1 a2 a4 a5 a6 a7 is NOT valid. |
|||
your ad here is NOT valid. |
|||
Testing: |
|||
************************************************************ |
|||
Testing EBNF grammar: |
|||
a1a3a4a4a5a6 - valid. |
|||
a1 a2a6 - valid. |
|||
a1 a3 a4 a6 - valid. |
|||
a1 a4 a5 a6 - NOT valid. |
|||
a1 a2 a4 a4 a5 a6 - valid. |
|||
a1 a2 a4 a5 a5 a6 - NOT valid. |
|||
a1 a2 a4 a5 a6 a7 - NOT valid. |
|||
your ad here - NOT valid. |
|||
******************************************************************************* |
|||
******************************************************************************* |
|||
Syntax Tree: |
|||
q[{ |
|||
expr = term { plus term } . |
|||
term = factor { times factor } . |
|||
factor = number | '(' expr ')' . |
|||
plus = "+" | "-" . |
|||
times = "*" | "/" . |
|||
number = digit { digit } . |
|||
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" . |
|||
}] |
|||
production => q[expr = term { plus term } . |
|||
] |
|||
name => q[expr] |
|||
identifier => q[expr] |
|||
expression => q[term { plus term } ] |
|||
term => q[term { plus term } ] |
|||
factor => q[term ] |
|||
identifier => q[term] |
|||
factor => q[{ plus term } ] |
|||
repeat => q[{ plus term } ] |
|||
expression => q[plus term ] |
|||
term => q[plus term ] |
|||
factor => q[plus ] |
|||
identifier => q[plus] |
|||
factor => q[term ] |
|||
identifier => q[term] |
|||
production => q[term = factor { times factor } . |
|||
] |
|||
name => q[term] |
|||
identifier => q[term] |
|||
expression => q[factor { times factor } ] |
|||
term => q[factor { times factor } ] |
|||
factor => q[factor ] |
|||
identifier => q[factor] |
|||
factor => q[{ times factor } ] |
|||
repeat => q[{ times factor } ] |
|||
expression => q[times factor ] |
|||
term => q[times factor ] |
|||
factor => q[times ] |
|||
identifier => q[times] |
|||
factor => q[factor ] |
|||
identifier => q[factor] |
|||
production => q[factor = number | '(' expr ')' . |
|||
] |
|||
name => q[factor] |
|||
identifier => q[factor] |
|||
expression => q[number | '(' expr ')' ] |
|||
term => q[number ] |
|||
factor => q[number ] |
|||
identifier => q[number] |
|||
term => q[ '(' expr ')' ] |
|||
factor => q['(' ] |
|||
literal => q['('] |
|||
factor => q[expr ] |
|||
identifier => q[expr] |
|||
factor => q[')' ] |
|||
literal => q[')'] |
|||
production => q[plus = "+" | "-" . |
|||
] |
|||
name => q[plus] |
|||
identifier => q[plus] |
|||
expression => q["+" | "-" ] |
|||
term => q["+" ] |
|||
factor => q["+" ] |
|||
literal => q["+"] |
|||
term => q[ "-" ] |
|||
factor => q["-" ] |
|||
literal => q["-"] |
|||
production => q[times = "*" | "/" . |
|||
] |
|||
name => q[times] |
|||
identifier => q[times] |
|||
expression => q["*" | "/" ] |
|||
term => q["*" ] |
|||
factor => q["*" ] |
|||
literal => q["*"] |
|||
term => q[ "/" ] |
|||
factor => q["/" ] |
|||
literal => q["/"] |
|||
production => q[number = digit { digit } . |
|||
] |
|||
name => q[number] |
|||
identifier => q[number] |
|||
expression => q[digit { digit } ] |
|||
term => q[digit { digit } ] |
|||
factor => q[digit ] |
|||
identifier => q[digit] |
|||
factor => q[{ digit } ] |
|||
repeat => q[{ digit } ] |
|||
expression => q[digit ] |
|||
term => q[digit ] |
|||
factor => q[digit ] |
|||
identifier => q[digit] |
|||
production => q[digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" . |
|||
] |
|||
name => q[digit] |
|||
identifier => q[digit] |
|||
expression => q["0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ] |
|||
term => q["0" ] |
|||
factor => q["0" ] |
|||
literal => q["0"] |
|||
term => q[ "1" ] |
|||
factor => q["1" ] |
|||
literal => q["1"] |
|||
term => q[ "2" ] |
|||
factor => q["2" ] |
|||
literal => q["2"] |
|||
term => q[ "3" ] |
|||
factor => q["3" ] |
|||
literal => q["3"] |
|||
term => q[ "4" ] |
|||
factor => q["4" ] |
|||
literal => q["4"] |
|||
term => q[ "5" ] |
|||
factor => q["5" ] |
|||
literal => q["5"] |
|||
term => q[ "6" ] |
|||
factor => q["6" ] |
|||
literal => q["6"] |
|||
term => q[ "7" ] |
|||
factor => q["7" ] |
|||
literal => q["7"] |
|||
term => q[ "8" ] |
|||
factor => q["8" ] |
|||
literal => q["8"] |
|||
term => q[ "9" ] |
|||
factor => q["9" ] |
|||
literal => q["9"] |
|||
Parsing EBNF grammar 'unnamed': |
|||
{ |
{ |
||
Line 620: | Line 796: | ||
} |
} |
||
Valid syntax. |
|||
Parses as valid EBNF. |
|||
------------------------------------------------------------ |
|||
2 is valid. |
|||
2*3 + 4/23 - 7 is valid. |
|||
(3 + 4) * 6-2+(4*(4)) is valid. |
|||
-2 is NOT valid. |
|||
3 + is NOT valid. |
|||
(4 + 3 is NOT valid. |
|||
Testing: |
|||
************************************************************ |
|||
Testing EBNF grammar: |
|||
2 - valid. |
|||
2*3 + 4/23 - 7 - valid. |
|||
(3 + 4) * 6-2+(4*(4)) - valid. |
|||
-2 - NOT valid. |
|||
3 + - NOT valid. |
|||
(4 + 3 - NOT valid. |
|||
******************************************************************************* |
|||
******************************************************************************* |
|||
Parsing EBNF grammar: |
|||
a = "1"; |
a = "1"; |
||
Invalid |
Invalid syntax. Can not be parsed. |
||
************************************************************ |
|||
******************************************************************************* |
|||
Testing EBNF grammar: |
|||
******************************************************************************* |
|||
Parsing EBNF grammar: |
|||
{ a = "1" ; |
{ a = "1" ; |
||
Invalid |
Invalid syntax. Can not be parsed. |
||
************************************************************ |
|||
******************************************************************************* |
|||
Testing EBNF grammar: |
|||
******************************************************************************* |
|||
Parsing EBNF grammar: |
|||
{ hello world = "1"; } |
{ hello world = "1"; } |
||
Invalid |
Invalid syntax. Can not be parsed. |
||
************************************************************ |
|||
******************************************************************************* |
|||
Testing EBNF grammar: |
|||
******************************************************************************* |
|||
Syntax Tree: |
|||
q[{ foo = bar . }] |
|||
production => q[foo = bar . ] |
|||
name => q[foo] |
|||
identifier => q[foo] |
|||
expression => q[bar ] |
|||
term => q[bar ] |
|||
factor => q[bar ] |
|||
identifier => q[bar] |
|||
Parsing EBNF grammar 'unnamed': |
|||
{ foo = bar . } |
{ foo = bar . } |
||
Valid syntax. |
|||
Parses as valid EBNF. |
|||
------------------------------------------------------------ |
|||
Method 'bar' not found for invocant of class 'anonymous' |
|||
in 'anonymous::foo' at line 3:EBNFtest3 |
|||
in 'anonymous::TOP' at line 2:EBNFtest3 |
|||
in 'Grammar::parse' at line 6466:src/gen/core.pm |
|||
in main program body at line 12:EBNFtest3 |
|||
Testing: |
|||
************************************************************ |
|||
</pre> |
|||
foobar - No such method 'bar' for invocant of type 'unnamed' |
|||
******************************************************************************* |
|||
</pre> |
|||
=={{header|PicoLisp}}== |
=={{header|PicoLisp}}== |