Parse EBNF: Difference between revisions

Content added Content deleted
(→‎{{header|Perl 6}}: Add Perl 6 example)
(→‎{{header|Perl 6}}: Updated to work with current specs (repetion operator changed). Also displays syntax tree now)
Line 440: Line 440:


=={{header|Perl 6}}==
=={{header|Perl 6}}==
{{works with|Rakudo|2011.07}}
{{works with|Rakudo|2012.05}}
This is a fairly naive implementation of an EBNF parser. It works, but takes some shortcuts and implements a subset of EBNF. The biggest restriction is that identifiers can only contain alpha-numeric characters rather than anything but EBNF operators: |(){}[];,"' So identifiers like <?-&#@>, though technically correct, won't work.


This parses the EBNF rule set using a perl 6 grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods.
This parses the EBNF rule set using a perl 6 grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods.
Line 447: Line 446:
<lang perl6># A perl 6 grammar to parse EBNF
<lang perl6># A perl 6 grammar to parse EBNF
grammar EBNF {
grammar EBNF {
rule TOP { ^^<title>? '{' [ <ruleset> ]+ '}' <comment>?$$ }
rule TOP { ^ <title>? '{' [ <production> ]+ '}' <comment>? $ }
rule ruleset { <name> '=' <expression> <[.;]> }
rule production { <name> '=' <expression> <[.;]> }
rule expression { <term> ** "|" }
rule expression { <term> +% "|" }
rule term { <factor>+ }
rule term { <factor>+ }
rule factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
rule factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
Line 455: Line 454:
rule repeat { '{' <expression> '}' }
rule repeat { '{' <expression> '}' }
rule optional { '[' <expression> ']' }
rule optional { '[' <expression> ']' }
token identifier { \w+ }
token identifier { <-[\|\(\)\{\}\[\]\.\;\"\'\s]>+ } #'" Defeat confused syntax highlighting
token literal { "'" <-[']>+ "'" | '"' <-["]>+ '"' } #" bogus comment to defeat confused syntax highlighter
token literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #'" Defeat confused syntax highlighting
token title { <literal> }
token title { <literal> }
token comment { <literal> }
token comment { <literal> }
token name { <identifier> }
token name { <identifier> <?before \h* '='> }
}
}


# And actions to build a EBNF parser
class EBNF::Actions {
class EBNF::Actions {
method TOP($/) {
method TOP($/) {
my @top;
say "Syntax Tree:\n", $/; # Dump the syntax tree to STDOUT
my $grammar = $/;
$grammar.=subst(/<-[\{]>*\{\s*/, '');
for $grammar.split(/\n\h*\n/)[0] -> $f {
for $f.split(/\n\h*/) -> $g {
next if $g ~~ /^\W/;
@top.push: '<' ~ $g.split(' =')[0] ~ '>';
}
}
make 'grammar ' ~
make 'grammar ' ~
($<title>.subst(/\W/, '', :g) || 'anonymous') ~
($<title>.subst(/\W/, '', :g) || 'unnamed') ~
" \{\n rule TOP \{^[" ~ (join '|', @top ) ~
" \{\n rule TOP \{^[<" ~ $/<production>[0]<name> ~
"]+\$\}\n " ~ $<ruleset>>>.ast ~ "\n\}"
">]+\$\}\n " ~ $<production>>>.ast ~ "\}"
}
}
method ruleset($/) {
method production($/) {
make 'rule ' ~ $<name> ~ ' {' ~
make 'token ' ~ $<name> ~ ' {' ~
$<expression>.ast ~ "\\h*}\n"
$<expression>.ast ~ "}\n"
}
}
method expression($/) { make join '|', $<term>>>.ast }
method expression($/) { make join '|', $<term>>>.ast }
method term($/) { make join '\h*', $<factor>>>.ast }
method term($/) { make join '\h*', $<factor>>>.ast }
method factor($/) { make $<literal> ?? $<literal> !!
method factor($/) {
$<group> ?? '[' ~ $<group>.ast ~ ']' !!
make $<literal> ?? $<literal> !!
$<repeat> ?? '[' ~ $<repeat>.ast ~ ']*' !!
$<group> ?? '[' ~ $<group>.ast ~ ']' !!
$<optional> ?? '[' ~ $<optional>.ast ~ ']?' !!
$<repeat> ?? '[' ~ $<repeat>.ast ~ '\\s*]*' !!
'<' ~ $<identifier> ~ '>'
$<optional> ?? '[' ~ $<optional>.ast ~ ']?' !!
'<' ~ $<identifier> ~ '>'
}
}
method repeat($/) { make $<expression>.ast }
method repeat($/) { make $<expression>.ast }
Line 496: Line 487:
}
}


# An array of test cases
# Now test as follows
my @tests = (
my @tests = (
{
{
Line 509: Line 500:
'a1 a3 a4 a6',
'a1 a3 a4 a6',
'a1 a4 a5 a6',
'a1 a4 a5 a6',
'a1 a2 a4 a4 a5 a6',
'a1 a2 a4 a5 a5 a6',
'a1 a2 a4 a5 a5 a6',
'a1 a2 a4 a5 a6 a7',
'a1 a2 a4 a5 a6 a7',
Line 539: Line 531:
{
{
ebnf => q<a = "1";>,
ebnf => q<a = "1";>,
teststrings => ['foo']
teststrings => ['foobar']
},
},
{
{
ebnf => q<{ a = "1" ;>,
ebnf => q<{ a = "1" ;>,
teststrings => ['foo']
teststrings => ['foobar']
},
},
{
{
ebnf => q<{ hello world = "1"; }>,
ebnf => q<{ hello world = "1"; }>,
teststrings => ['foo']
teststrings => ['foobar']
},
},
{
{
ebnf => q<{ foo = bar . }>,
ebnf => q<{ foo = bar . }>,
teststrings => ['foo']
teststrings => ['foobar']
},
}
);
);


# Test the parser.
my $i = 1;
my $i = 1;
for @tests -> $test {
for @tests -> $test {
my $a = EBNF::Actions;
say '*' x 79;
unless EBNF.parse($test<ebnf>) {
unless EBNF.parse($test<ebnf>) {
say "Testing EBNF grammar:\n";
say "Parsing EBNF grammar:\n";
say "{$test<ebnf>.subst(/^|(\n)\h*/, -> $/ {$0}, :g)}\n";
say "{$test<ebnf>.subst(/^^\h*/,'',:g)}\n";
say "Invalid EBNF grammar. Can not be parsed.";
say "Invalid syntax. Can not be parsed.\n";
say '*' x 60;
say '*' x 79;
next;
next;
}
}
my $p = EBNF.parse($test<ebnf>, :actions($a));
my $p = EBNF.parse($test<ebnf>, :actions(EBNF::Actions));
my $grammar = $p.ast;
my $grammar = $p.ast;
$grammar ~~ m/^['grammar '](\w+)/;
$grammar ~~ m/^'grammar '(\w+)/;
my $title = $0.Str;
my $title = $0;
my $fn = 'EBNFtest'~$i++;
my $fn = 'EBNFtest'~$i++;
my $fh = open($fn, :w) or die "$!\n";
my $fh = open($fn, :w) or die "$!\n";
$fh.say( $grammar);
$fh.say( $grammar );
$fh.say( q|say "Testing EBNF grammar:\n";| );
$fh.say( qq|say "Parsing EBNF grammar '$title':\\n";| );
$fh.say(qq|say q<{$test<ebnf>.subst(/^|(\n)\h*/,->$/{$0},:g)}>,"\n";|);
$fh.say( qq|say q<{$test<ebnf>.subst(/^^\h*/,'',:g)}>;| );
$fh.say( q|say "Parses as valid EBNF.";| );
$fh.say( q|say "\nValid syntax.\n\nTesting:\n";| );
$fh.say( q|say '-' x 60;| );
$fh.say( q|CATCH { die $_ };| );
my $len = [max] $test<teststrings>.flat>>.chars;
my $len = [max] $test<teststrings>.flat>>.chars;
for $test<teststrings>.flat -> $s {
for $test<teststrings>.flat -> $s {
$fh.say( qq|printf "%{$len}s is %svalid.\n", '{$s}',| ~
$fh.say( qq|printf "%{$len}s", '{$s}';\n| ~
qq|{$title}.parse('{$s}') ?? '' !! 'NOT ';|);
qq|printf " - %s\\n", try ({$title}.parse('{$s}'))| ~
qq| ?? 'valid.' !! 'NOT valid.';|
);
}
}
$fh.close;
$fh.close;
say qqx/perl6 $fn/;
say qqx/perl6 $fn/;
say '*' x 60;
say '*' x 79;
unlink $fn;
unlink $fn;
}
}</lang>
}</lang>


Output:
Output:
<pre>
<pre>
*******************************************************************************
Testing EBNF grammar:
Syntax Tree:
q["a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"]
title => q["a"]
literal => q["a"]
production => q[a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
]
name => q[a]
identifier => q[a]
expression => q["a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ]
term => q["a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ]
factor => q["a1" ]
literal => q["a1"]
factor => q[( "a2" | "a3" ) ]
group => q[( "a2" | "a3" ) ]
expression => q["a2" | "a3" ]
term => q["a2" ]
factor => q["a2" ]
literal => q["a2"]
term => q[ "a3" ]
factor => q["a3" ]
literal => q["a3"]
factor => q[{ "a4" } ]
repeat => q[{ "a4" } ]
expression => q["a4" ]
term => q["a4" ]
factor => q["a4" ]
literal => q["a4"]
factor => q[[ "a5" ] ]
optional => q[[ "a5" ] ]
expression => q["a5" ]
term => q["a5" ]
factor => q["a5" ]
literal => q["a5"]
factor => q["a6" ]
literal => q["a6"]
comment => q["z"]
literal => q["z"]

Parsing EBNF grammar 'a':


"a" {
"a" {
Line 595: Line 634:
} "z"
} "z"


Valid syntax.
Parses as valid EBNF.
------------------------------------------------------------
a1a3a4a4a5a6 is valid.
a1 a2a6 is valid.
a1 a3 a4 a6 is valid.
a1 a4 a5 a6 is NOT valid.
a1 a2 a4 a5 a5 a6 is NOT valid.
a1 a2 a4 a5 a6 a7 is NOT valid.
your ad here is NOT valid.


Testing:
************************************************************

Testing EBNF grammar:
a1a3a4a4a5a6 - valid.
a1 a2a6 - valid.
a1 a3 a4 a6 - valid.
a1 a4 a5 a6 - NOT valid.
a1 a2 a4 a4 a5 a6 - valid.
a1 a2 a4 a5 a5 a6 - NOT valid.
a1 a2 a4 a5 a6 a7 - NOT valid.
your ad here - NOT valid.

*******************************************************************************
*******************************************************************************
Syntax Tree:
q[{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .

plus = "+" | "-" .
times = "*" | "/" .

number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}]
production => q[expr = term { plus term } .
]
name => q[expr]
identifier => q[expr]
expression => q[term { plus term } ]
term => q[term { plus term } ]
factor => q[term ]
identifier => q[term]
factor => q[{ plus term } ]
repeat => q[{ plus term } ]
expression => q[plus term ]
term => q[plus term ]
factor => q[plus ]
identifier => q[plus]
factor => q[term ]
identifier => q[term]
production => q[term = factor { times factor } .
]
name => q[term]
identifier => q[term]
expression => q[factor { times factor } ]
term => q[factor { times factor } ]
factor => q[factor ]
identifier => q[factor]
factor => q[{ times factor } ]
repeat => q[{ times factor } ]
expression => q[times factor ]
term => q[times factor ]
factor => q[times ]
identifier => q[times]
factor => q[factor ]
identifier => q[factor]
production => q[factor = number | '(' expr ')' .

]
name => q[factor]
identifier => q[factor]
expression => q[number | '(' expr ')' ]
term => q[number ]
factor => q[number ]
identifier => q[number]
term => q[ '(' expr ')' ]
factor => q['(' ]
literal => q['(']
factor => q[expr ]
identifier => q[expr]
factor => q[')' ]
literal => q[')']
production => q[plus = "+" | "-" .
]
name => q[plus]
identifier => q[plus]
expression => q["+" | "-" ]
term => q["+" ]
factor => q["+" ]
literal => q["+"]
term => q[ "-" ]
factor => q["-" ]
literal => q["-"]
production => q[times = "*" | "/" .

]
name => q[times]
identifier => q[times]
expression => q["*" | "/" ]
term => q["*" ]
factor => q["*" ]
literal => q["*"]
term => q[ "/" ]
factor => q["/" ]
literal => q["/"]
production => q[number = digit { digit } .
]
name => q[number]
identifier => q[number]
expression => q[digit { digit } ]
term => q[digit { digit } ]
factor => q[digit ]
identifier => q[digit]
factor => q[{ digit } ]
repeat => q[{ digit } ]
expression => q[digit ]
term => q[digit ]
factor => q[digit ]
identifier => q[digit]
production => q[digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
]
name => q[digit]
identifier => q[digit]
expression => q["0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ]
term => q["0" ]
factor => q["0" ]
literal => q["0"]
term => q[ "1" ]
factor => q["1" ]
literal => q["1"]
term => q[ "2" ]
factor => q["2" ]
literal => q["2"]
term => q[ "3" ]
factor => q["3" ]
literal => q["3"]
term => q[ "4" ]
factor => q["4" ]
literal => q["4"]
term => q[ "5" ]
factor => q["5" ]
literal => q["5"]
term => q[ "6" ]
factor => q["6" ]
literal => q["6"]
term => q[ "7" ]
factor => q["7" ]
literal => q["7"]
term => q[ "8" ]
factor => q["8" ]
literal => q["8"]
term => q[ "9" ]
factor => q["9" ]
literal => q["9"]

Parsing EBNF grammar 'unnamed':


{
{
Line 620: Line 796:
}
}


Valid syntax.
Parses as valid EBNF.
------------------------------------------------------------
2 is valid.
2*3 + 4/23 - 7 is valid.
(3 + 4) * 6-2+(4*(4)) is valid.
-2 is NOT valid.
3 + is NOT valid.
(4 + 3 is NOT valid.


Testing:
************************************************************

Testing EBNF grammar:
2 - valid.
2*3 + 4/23 - 7 - valid.
(3 + 4) * 6-2+(4*(4)) - valid.
-2 - NOT valid.
3 + - NOT valid.
(4 + 3 - NOT valid.

*******************************************************************************
*******************************************************************************
Parsing EBNF grammar:


a = "1";
a = "1";


Invalid EBNF grammar. Can not be parsed.
Invalid syntax. Can not be parsed.

************************************************************
*******************************************************************************
Testing EBNF grammar:
*******************************************************************************
Parsing EBNF grammar:


{ a = "1" ;
{ a = "1" ;


Invalid EBNF grammar. Can not be parsed.
Invalid syntax. Can not be parsed.

************************************************************
*******************************************************************************
Testing EBNF grammar:
*******************************************************************************
Parsing EBNF grammar:


{ hello world = "1"; }
{ hello world = "1"; }


Invalid EBNF grammar. Can not be parsed.
Invalid syntax. Can not be parsed.

************************************************************
*******************************************************************************
Testing EBNF grammar:
*******************************************************************************
Syntax Tree:
q[{ foo = bar . }]
production => q[foo = bar . ]
name => q[foo]
identifier => q[foo]
expression => q[bar ]
term => q[bar ]
factor => q[bar ]
identifier => q[bar]

Parsing EBNF grammar 'unnamed':


{ foo = bar . }
{ foo = bar . }


Valid syntax.
Parses as valid EBNF.
------------------------------------------------------------
Method 'bar' not found for invocant of class 'anonymous'
in 'anonymous::foo' at line 3:EBNFtest3
in 'anonymous::TOP' at line 2:EBNFtest3
in 'Grammar::parse' at line 6466:src/gen/core.pm
in main program body at line 12:EBNFtest3


Testing:
************************************************************
</pre>


foobar - No such method 'bar' for invocant of type 'unnamed'

*******************************************************************************
</pre>


=={{header|PicoLisp}}==
=={{header|PicoLisp}}==