Compiler/Verifying syntax: Difference between revisions

No edit summary
Line 313:
l or {{EXPECTED A PRIMARY}}_m
false
</pre>
 
=={{header|Phix}}==
<lang Phix>-- demo\rosetta\Compiler\Verify_Syntax.exw
string src
integer ch, sdx
procedure skip_spaces()
while 1 do
if sdx>length(src) then exit end if
ch = src[sdx]
if not find(ch," \t\r\n") then exit end if
sdx += 1
end while
end procedure
enum SYMBOL, STRING, INTEGER, IDENT, ERROR, EOF
constant toktypes = {"SYMBOL","STRING","INTEGER","IDENT","ERROR","EOF"}
sequence tok
 
function sprintok(string fmt)
tok[1] = toktypes[tok[1]]
return sprintf(fmt,{tok})
end function
 
procedure next_token()
-- yeilds a single character token, one of:
-- {SYMBOL,ch} where ch is one of "{}()[]|=.;", or
-- {STRING,string}, or
-- {INTEGER,n}, or
-- {IDENT,string}, or
-- {ERROR,msg}, or
-- {EOF}
skip_spaces()
integer tokstart = sdx
if tok[1]=ERROR then
?{"erm, tok is",tok} -- looping??
elsif sdx>length(src) then
tok = {EOF}
elsif find(ch,"()+-/*=&<") then
sdx += 1
tok = {SYMBOL,ch&""}
elsif ch='\"'
or ch='\'' then
integer closech = ch
tok = {ERROR,"no closing quote"}
sdx += 1
for tokend=sdx to length(src) do
if src[tokend]=closech then
sdx = tokend+1
tok = {STRING,src[tokstart+1..tokend-1]}
exit
end if
end for
elsif (ch>='0' and ch<='9') then
integer n = ch-'0'
while true do
sdx += 1
if sdx>length(src) then exit end if
ch = src[sdx]
if ch<'0' or ch>'9' then exit end if
n = n*10 + ch-'0'
end while
tok = {INTEGER,n}
elsif (ch>='a' and ch<='z')
or (ch>='A' and ch<='Z') then
while true do
sdx += 1
if sdx>length(src) then exit end if
ch = src[sdx]
if ch!='_'
and (ch<'a' or ch>'z')
and (ch<'A' or ch>'Z')
and (ch<'0' or ch>'9') then
exit
end if
end while
tok = {IDENT,src[tokstart..sdx-1]}
elsif ch='_' then
tok = {ERROR,"identifiers may not start with _"}
sdx += 1
else
tok = {ERROR,sprintf("illegal char (%c/%d)",ch)}
sdx += 1
end if
end procedure
 
forward procedure or_expr()
 
procedure primary()
integer tt = tok[1]
if tt=IDENT
or tt=INTEGER
or (tt=STRING and find(tok[2],{"true","false"})) then
next_token()
elsif tok={SYMBOL,"("} then
next_token()
or_expr()
if tok!={SYMBOL,")"} then
tok = {ERROR,") expected"}
else
next_token()
end if
else
tok = {ERROR,sprintok("invalid [%v]")}
end if
end procedure
 
procedure mul_expr()
while true do
primary()
if not find(tok,{{SYMBOL,"*"},{SYMBOL,"/"}}) then exit end if
next_token()
end while
end procedure
 
procedure sum_expr()
while true do
mul_expr()
if not find(tok,{{SYMBOL,"+"},{SYMBOL,"-"}}) then exit end if
next_token()
end while
end procedure
 
procedure cmp_expr()
if tok=={IDENT,"not"} then next_token() end if
while true do
sum_expr()
if not find(tok,{{SYMBOL,"="},{SYMBOL,"<"}}) then exit end if
next_token()
end while
end procedure
 
procedure and_expr()
while true do
cmp_expr()
if tok!={IDENT,"and"} then exit end if
next_token()
end while
end procedure
 
procedure or_expr()
while true do
and_expr()
if tok!={IDENT,"or"} then exit end if
next_token()
end while
end procedure
 
procedure statement()
or_expr()
end procedure
procedure verify_syntax(string source)
src = source
sdx = 1
tok = {0} -- ("not error"/invalid-ish)
next_token()
statement()
printf(1,"%30s ==> %s\n",{source,iff(tok[1]=EOF?"pass":sprintok("fail [tok=%v]"))})
end procedure
 
constant tests = {
"(42 + 3",
" not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true",
" and 3 < 2",
"not 7 < 2",
"4 * (32 - 16) + 9 = 73",
"235 76 + 1",
"a + b = not c and false",
"a + b = (not c) and false",
"ab_c / bd2 or < e_f7",
"g not = h",
"été = false",
"i++",
"j & k",
"l or _m"}
 
for i=1 to length(tests) do
verify_syntax(tests[i])
end for</lang>
{{out}}
Note that "= not c" fails, whereas "= (not c)" passes, see talk page.
<pre>
(42 + 3 ==> fail [tok={"ERROR",") expected"}]
not 3 < 4 or (true or 3 / 4 + 8 * 5 - 5 * 2 < 56) and 4 * 3 < 12 or not true ==> pass
and 3 < 2 ==> fail [tok={"INTEGER",3}]
not 7 < 2 ==> pass
4 * (32 - 16) + 9 = 73 ==> pass
235 76 + 1 ==> fail [tok={"INTEGER",76}]
a + b = not c and false ==> fail [tok={"IDENT","c"}]
a + b = (not c) and false ==> pass
ab_c / bd2 or < e_f7 ==> fail [tok={"ERROR",`invalid [{"SYMBOL","<"}]`}]
g not = h ==> fail [tok={"IDENT","not"}]
â??®tâ??® = false ==> fail [tok={"ERROR",`invalid [{"ERROR","illegal char (â??/195)"}]`}]
i++ ==> fail [tok={"ERROR",`invalid [{"SYMBOL","+"}]`}]
j & k ==> fail [tok={"SYMBOL","&"}]
l or _m ==> fail [tok={"ERROR",`invalid [{"ERROR","identifiers may not start with _"}]`}]
</pre>
7,815

edits