Compiler/lexical analyzer: Difference between revisions
Content added Content deleted
(Corrected bug with character literal (missing “next()” to advance to next character).) |
(Added QB64 Example) |
||
Line 7,902: | Line 7,902: | ||
23 1 End_of_input |
23 1 End_of_input |
||
</pre> |
</pre> |
||
</b> |
|||
=={{header|QB64}}== |
|||
Tested with QB64 1.5 |
|||
<lang QB64>dim shared source as string, the_ch as string, tok as string, toktyp as string |
|||
dim shared line_n as integer, col_n as integer, text_p as integer, err_line as integer, err_col as integer, errors as integer |
|||
declare function isalnum&(s as string) |
|||
declare function isalpha&(s as string) |
|||
declare function isdigit&(s as string) |
|||
declare sub divide_or_comment |
|||
declare sub error_exit(line_n as integer, col_n as integer, msg as string) |
|||
declare sub follow(c as string, typ2 as string, typ1 as string) |
|||
declare sub nextch |
|||
declare sub nexttok |
|||
declare sub read_char_lit |
|||
declare sub read_ident |
|||
declare sub read_number |
|||
declare sub read_string |
|||
const c_integer = "Integer", c_ident = "Identifier", c_string = "String" |
|||
dim out_fn as string, out_tok as string |
|||
if command$(1) = "" then print "Expecting a filename": end |
|||
open command$(1) for binary as #1 |
|||
source = space$(lof(1)) |
|||
get #1, 1, source |
|||
close #1 |
|||
out_fn = command$(2): if out_fn <> "" then open out_fn for output as #1 |
|||
line_n = 1: col_n = 0: text_p = 1: the_ch = " " |
|||
do |
|||
call nexttok |
|||
select case toktyp |
|||
case c_integer, c_ident, c_string: out_tok = tok |
|||
case else: out_tok = "" |
|||
end select |
|||
if out_fn = "" then |
|||
print err_line, err_col, toktyp, out_tok |
|||
else |
|||
print #1, err_line, err_col, toktyp, out_tok |
|||
end if |
|||
loop until errors or tok = "" |
|||
if out_fn <> "" then close #1 |
|||
end |
|||
' get next tok, toktyp |
|||
sub nexttok |
|||
toktyp = "" |
|||
restart: err_line = line_n: err_col = col_n: tok = the_ch |
|||
select case the_ch |
|||
case " ", chr$(9), chr$(10): call nextch: goto restart |
|||
case "/": call divide_or_comment: if tok = "" then goto restart |
|||
case "%": call nextch: toktyp = "Op_mod" |
|||
case "(": call nextch: toktyp = "LeftParen" |
|||
case ")": call nextch: toktyp = "RightParen" |
|||
case "*": call nextch: toktyp = "Op_multiply" |
|||
case "+": call nextch: toktyp = "Op_add" |
|||
case ",": call nextch: toktyp = "Comma" |
|||
case "-": call nextch: toktyp = "Op_subtract" |
|||
case ";": call nextch: toktyp = "Semicolon" |
|||
case "{": call nextch: toktyp = "LeftBrace" |
|||
case "}": call nextch: toktyp = "RightBrace" |
|||
case "&": call follow("&", "Op_and", "") |
|||
case "|": call follow("|", "Op_or", "") |
|||
case "!": call follow("=", "Op_notequal", "Op_not") |
|||
case "<": call follow("=", "Op_lessequal", "Op_less") |
|||
case "=": call follow("=", "Op_equal", "Op_assign") |
|||
case ">": call follow("=", "Op_greaterequal", "Op_greater") |
|||
case chr$(34): call read_string |
|||
case chr$(39): call read_char_lit |
|||
case "": toktyp = "End_of_input" |
|||
case else |
|||
if isdigit&(the_ch) then |
|||
call read_number |
|||
elseif isalpha&(the_ch) then |
|||
call read_ident |
|||
else |
|||
call nextch |
|||
end if |
|||
end select |
|||
end sub |
|||
sub follow(c as string, if_both as string, if_one as string) |
|||
call nextch |
|||
if the_ch = c then |
|||
tok = tok + the_ch |
|||
call nextch |
|||
toktyp = if_both |
|||
else |
|||
if if_one = "" then call error_exit(line_n, col_n, "Expecting " + c): exit sub |
|||
toktyp = if_one |
|||
end if |
|||
end sub |
|||
sub read_string |
|||
toktyp = c_string |
|||
call nextch |
|||
do |
|||
tok = tok + the_ch |
|||
select case the_ch |
|||
case chr$(10): call error_exit(line_n, col_n, "EOL in string"): exit sub |
|||
case "": call error_exit(line_n, col_n, "EOF in string"): exit sub |
|||
case chr$(34): call nextch: exit sub |
|||
case else: call nextch |
|||
end select |
|||
loop |
|||
end sub |
|||
sub read_char_lit |
|||
toktyp = c_integer |
|||
call nextch |
|||
if the_ch = chr$(39) then |
|||
call error_exit(err_line, err_col, "Empty character constant"): exit sub |
|||
end if |
|||
if the_ch = "\" then |
|||
call nextch |
|||
if the_ch = "n" then |
|||
tok = "10" |
|||
elseif the_ch = "\" then |
|||
tok = "92" |
|||
else |
|||
call error_exit(line_n, col_n, "Unknown escape sequence:" + the_ch): exit sub |
|||
end if |
|||
else |
|||
tok = ltrim$(str$(asc(the_ch))) |
|||
end if |
|||
call nextch |
|||
if the_ch <> chr$(39) then |
|||
call error_exit(line_n, col_n, "Multi-character constant"): exit sub |
|||
end if |
|||
call nextch |
|||
end sub |
|||
sub divide_or_comment |
|||
call nextch |
|||
if the_ch <> "*" then |
|||
toktyp = "Op_divide" |
|||
else ' skip comments |
|||
tok = "" |
|||
call nextch |
|||
do |
|||
if the_ch = "*" then |
|||
call nextch |
|||
if the_ch = "/" then |
|||
call nextch |
|||
exit sub |
|||
end if |
|||
elseif the_ch = "" then |
|||
call error_exit(line_n, col_n, "EOF in comment"): exit sub |
|||
else |
|||
call nextch |
|||
end if |
|||
loop |
|||
end if |
|||
end sub |
|||
sub read_ident |
|||
do |
|||
call nextch |
|||
if not isalnum&(the_ch) then exit do |
|||
tok = tok + the_ch |
|||
loop |
|||
select case tok |
|||
case "else": toktyp = "keyword_else" |
|||
case "if": toktyp = "keyword_if" |
|||
case "print": toktyp = "keyword_print" |
|||
case "putc":: toktyp = "keyword_putc" |
|||
case "while": toktyp = "keyword_while" |
|||
case else: toktyp = c_ident |
|||
end select |
|||
end sub |
|||
sub read_number |
|||
toktyp = c_integer |
|||
do |
|||
call nextch |
|||
if not isdigit&(the_ch) then exit do |
|||
tok = tok + the_ch |
|||
loop |
|||
if isalpha&(the_ch) then |
|||
call error_exit(err_line, err_col, "Bogus number: " + tok + the_ch): exit sub |
|||
end if |
|||
end sub |
|||
function isalpha&(s as string) |
|||
dim c as string |
|||
c = left$(s, 1) |
|||
isalpha& = c <> "" and instr("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", c) > 0 |
|||
end function |
|||
function isdigit&(s as string) |
|||
dim c as string |
|||
c = left$(s, 1) |
|||
isdigit& = c <> "" and instr("0123456789", c) > 0 |
|||
end function |
|||
function isalnum&(s as string) |
|||
dim c as string |
|||
c = left$(s, 1) |
|||
isalnum& = c <> "" and instr("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_", c) > 0 |
|||
end function |
|||
' get next char - fold cr/lf into just lf |
|||
sub nextch |
|||
the_ch = "" |
|||
col_n = col_n + 1 |
|||
if text_p > len(source) then exit sub |
|||
the_ch = mid$(source, text_p, 1) |
|||
text_p = text_p + 1 |
|||
if the_ch = chr$(13) then |
|||
the_ch = chr$(10) |
|||
if text_p <= len(source) then |
|||
if mid$(source, text_p, 1) = chr$(10) then |
|||
text_p = text_p + 1 |
|||
end if |
|||
end if |
|||
end if |
|||
if the_ch = chr$(10) then |
|||
line_n = line_n + 1 |
|||
col_n = 0 |
|||
end if |
|||
end sub |
|||
sub error_exit(line_n as integer, col_n as integer, msg as string) |
|||
errors = -1 |
|||
print line_n, col_n, msg |
|||
end |
|||
end sub |
|||
</lang> |
|||
{{out|case=test case 3}} |
|||
<b> |
|||
<pre> 5 16 keyword_print |
|||
5 40 Op_subtract |
|||
6 16 keyword_putc |
|||
6 40 Op_less |
|||
7 16 keyword_if |
|||
7 40 Op_greater |
|||
8 16 keyword_else |
|||
8 40 Op_lessequal |
|||
9 16 keyword_while |
|||
9 40 Op_greaterequal |
|||
10 16 LeftBrace |
|||
10 40 Op_equal |
|||
11 16 RightBrace |
|||
11 40 Op_notequal |
|||
12 16 LeftParen |
|||
12 40 Op_and |
|||
13 16 RightParen |
|||
13 40 Op_or |
|||
14 16 Op_subtract |
|||
14 40 Semicolon |
|||
15 16 Op_not |
|||
15 40 Comma |
|||
16 16 Op_multiply |
|||
16 40 Op_assign |
|||
17 16 Op_divide |
|||
17 40 Integer 42 |
|||
18 16 Op_mod |
|||
18 40 String "String literal" |
|||
19 16 Op_add |
|||
19 40 Identifier variable_name |
|||
20 26 Integer 10 |
|||
21 26 Integer 92 |
|||
22 26 Integer 32 |
|||
23 1 End_of_input</pre> |
|||
</b> |
</b> |
||