Ed Davis
Joined 31 August 2022
no edit summary
No edit summary |
No edit summary |
||
Line 163:
<lang c>
/*
All lexical tokens - not
have to wait until syntax analysis
*/
Line 232:
;Implementations
=={{header|C}}==
Line 288 ⟶ 287:
}
static
the_ch = getc(source_fp);
++col;
Line 295 ⟶ 294:
col = 0;
}
return the_ch;
}
Line 301:
error(err_line, err_col, "gettok: empty character constant");
if (the_ch == '\\') {
if (the_ch == 'n')
n = 10;
Line 308:
else error(err_line, err_col, "gettok: unknown escape sequence \\%c", the_ch);
}
if (next_ch() != '\'')
return (tok_s){Integerk, err_line, err_col, {n}};
}
Line 320:
/* comment found */
for (;;) {
if (next_ch() == '*' && next_ch() == '/') {
}
error(err_line, err_col, "EOF in
}
}
Line 334 ⟶ 331:
da_rewind(text);
if (the_ch == '\n') error(err_line, err_col, "EOL in string");
if (the_ch == EOF) error(err_line, err_col, "
da_append(text, (char)the_ch);
}
da_append(text, '\0');
return (tok_s){Stringk, err_line, err_col, {.text=text}};
}
Line 373 ⟶ 368:
if (!isdigit(the_ch))
is_number = false;
}
if (da_len(text) == 0)
Line 391 ⟶ 386:
static tok_s follow(int expect, TokenType ifyes, TokenType ifno, int err_line, int err_col) { /* look ahead for '>=', etc. */
if (the_ch == expect) {
return (tok_s){ifyes, err_line, err_col, {0}};
}
if (ifno == EOI)
error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch);
return (tok_s){ifno, err_line, err_col, {0}};
}
Line 401 ⟶ 397:
/* skip white space */
while (isspace(the_ch))
int err_line = line;
int err_col = col;
switch (the_ch) {
case '{':
case '}':
case '(':
case ')':
case '+':
case '-':
case '*':
case ';':
case ',':
case '>':
case '=':
case '/':
case '\'':
case '<':
case '!':
case '&':
case '"' : return string_lit(the_ch, err_line, err_col);
default: return ident_or_int(err_line, err_col);
Line 436 ⟶ 432:
"Uminus Mul Div Add Sub Lss Gtr Leq Neq "
"And Semi Comma Assign Integer String Ident "[tok.tok * 9]);
if (tok.tok == Integerk) fprintf(dest_fp, " %
else if (tok.tok == Ident) fprintf(dest_fp, " %s", tok.text);
else if (tok.tok == Stringk) fprintf(dest_fp, " \"%s\"", tok.text);
fprintf(dest_fp, "\n");
} while (tok.tok != EOI);
Line 460 ⟶ 454:
run();
}
</lang>
=={{header|Euphoria}}==
<lang euphoria>
include std/io.e
include std/map.e
include std/types.e
include std/convert.e
constant true = 1, false = 0, EOF = -1
enum EOI, Printk, Putc, Ifk, Whilek, Lbrace, Rbrace, Lparen, Rparen, Uminus, Mul, Div,
Add, Sub, Lss, Gtr, Leq, Neq, Andk, Semi, Comma, Assign, Integerk, Stringk, Ident
constant all_syms = { "EOI", "Print", "Putc", "If", "While", "Lbrace", "Rbrace", "Lparen",
"Rparen", "Uminus", "Mul", "Div", "Add", "Sub", "Lss", "Gtr", "Leq", "Neq", "And",
"Semi", "Comma", "Assign", "Integer", "String", "Ident"}
integer input_file, the_ch = ' ', the_col = 0, the_line = 1
sequence symbols
map key_words = new()
procedure error(sequence format, sequence data)
printf(STDOUT, format, data)
abort(1)
end procedure
-- get the next character from the input
function next_ch()
the_ch = getc(input_file)
the_col += 1
if the_ch = '\n' then
the_line += 1
the_col = 0
end if
return the_ch
end function
-- 'x' - character constants
function char_lit(integer err_line, integer err_col)
integer n = next_ch() -- skip opening quote
if the_ch = '\'' then
error("%d %d empty character constant", {err_line, err_col})
elsif the_ch = '\\' then
next_ch()
if the_ch = 'n' then
n = 10
elsif the_ch = '\\' then
n = '\\'
else
error("%d %d unknown escape sequence \\%c", {err_line, err_col, the_ch})
end if
end if
if next_ch() != '\'' then
error("%d %d multi-character constant", {err_line, err_col})
end if
next_ch()
return {Integerk, err_line, err_col, n}
end function
-- process divide or comments
function div_or_cmt(integer err_line, integer err_col)
if next_ch() != '*' then
return {Div, err_line, err_col}
end if
-- comment found
while true do
if next_ch() = '*' and next_ch() = '/' then
next_ch()
return get_tok()
elsif the_ch = EOF then
error("%d %d EOF in comment", {err_line, err_col})
end if
end while
end function
-- "string"
function string_lit(integer start, integer err_line, integer err_col)
string text = ""
while next_ch() != start do
if the_ch = EOF then
error("%d %d EOF while scanning string literal", {err_line, err_col})
end if
if the_ch = '\n' then
error("%d %d EOL while scanning string literal", {err_line, err_col})
end if
text &= the_ch
end while
next_ch()
return {Stringk, err_line, err_col, text}
end function
-- handle identifiers and integers
function ident_or_int(integer err_line, integer err_col)
integer n, is_number = true
string text = ""
while t_alnum(the_ch) or the_ch = '_' do
text &= the_ch
if not t_digit(the_ch) then
is_number = false
end if
next_ch()
end while
if length(text) = 0 then
error("%d %d ident_or_int: unrecognized character: (%d) '%s'", {err_line, err_col, the_ch, the_ch})
end if
if t_digit(text[1]) then
if not is_number then
error("%d %d invalid number: %s", {err_line, err_col, text})
end if
n = to_integer(text)
return {Integerk, err_line, err_col, n}
end if
if has(key_words, text) then
return {get(key_words, text), err_line, err_col}
end if
return {Ident, err_line, err_col, text}
end function
-- look ahead for '>=', etc.
function follow(integer expect, integer ifyes, integer ifno, integer err_line, integer err_col)
if next_ch() = expect then
next_ch()
return {ifyes, err_line, err_col}
end if
if ifno = EOI then
error("%d %d follow: unrecognized character: (%d)", {err_line, err_col, the_ch})
end if
return {ifno, err_line, err_col}
end function
-- return the next token type
function get_tok()
while t_space(the_ch) do
next_ch()
end while
integer err_line = the_line
integer err_col = the_col
switch the_ch do
case EOF then return {EOI, err_line, err_col}
case '/' then return div_or_cmt(err_line, err_col)
case '\'' then return char_lit(err_line, err_col)
case '<' then return follow('=', Leq, Lss, err_line, err_col)
case '!' then return follow('=', Neq, EOI, err_line, err_col)
case '&' then return follow('&', Andk, EOI, err_line, err_col)
case '"' then return string_lit(the_ch, err_line, err_col)
case else
integer sym = symbols[the_ch]
if sym != EOI then
next_ch()
return {sym, err_line, err_col}
end if
return ident_or_int(err_line, err_col)
end switch
end function
procedure init()
put(key_words, "if", Ifk)
put(key_words, "print", Printk)
put(key_words, "putc", Putc)
put(key_words, "while", Whilek)
symbols = repeat(EOI, 256)
symbols['{'] = Lbrace
symbols['}'] = Rbrace
symbols['('] = Lparen
symbols[')'] = Rparen
symbols['+'] = Add
symbols['-'] = Sub
symbols['*'] = Mul
symbols[';'] = Semi
symbols[','] = Comma
symbols['>'] = Gtr
symbols['='] = Assign
end procedure
procedure main(sequence cl)
sequence file_name
input_file = STDIN
if length(cl) > 2 then
file_name = cl[3]
input_file = open(file_name, "r")
if input_file = -1 then
error("Could not open %s", {file_name})
end if
end if
init()
sequence t
loop do
t = get_tok()
printf(STDOUT, "line %5d col %5d %-8s", {t[2], t[3], all_syms[t[1]]})
switch t[1] do
case Integerk then printf(STDOUT, " %5d\n", {t[4]})
case Ident then printf(STDOUT, " %s\n", {t[4]})
case Stringk then printf(STDOUT, " \"%s\"\n", {t[4]})
case else printf(STDOUT, "\n")
end switch
until t[1] = EOI
end loop
end procedure
main(command_line())
</lang>
Line 689 ⟶ 898:
dim tok_list(tk_eoi to tk_ident) as string
tok_list(tk_eoi
tok_list(tk_print
tok_list(tk_putc
tok_list(tk_if
tok_list(tk_while
tok_list(tk_lbrace
tok_list(tk_rbrace
tok_list(tk_lparen
tok_list(tk_rparen
tok_list(tk_uminus
tok_list(tk_mul
tok_list(tk_div
tok_list(tk_add
tok_list(tk_sub
tok_list(tk_lss
tok_list(tk_gtr
tok_list(tk_leq
tok_list(tk_neq
tok_list(tk_and
tok_list(tk_semi
tok_list(tk_comma
tok_list(tk_assign
tok_list(tk_integer
tok_list(tk_string
tok_list(tk_ident
do
Line 735 ⟶ 944:
=={{header|Python}}==
<lang Python>
from __future__ import print_function
import sys
Line 762 ⟶ 972:
#*** get the next character from the input
def
global the_ch, the_col, the_line
Line 774 ⟶ 984:
#*** 'x' - character constants
def char_lit(err_line, err_col):
n = ord(
if the_ch == '\'':
error(err_line, err_col, "empty character constant")
elif the_ch == '\\':
if the_ch == 'n':
n = 10
Line 785 ⟶ 995:
else:
error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch))
if
error(err_line, err_col, "multi-character constant")
return Integerk, err_line, err_col, n
#*** process divide or comments
def div_or_cmt(err_line, err_col):
if
return Div, err_line, err_col
# comment found
while True:
if
return gettok()
elif len(the_ch) == 0:
Line 807 ⟶ 1,017:
text = ""
while
if len(the_ch) == 0:
error(err_line, err_col, "EOF while scanning string literal")
Line 814 ⟶ 1,024:
text += the_ch
return Stringk, err_line, err_col, text
Line 826 ⟶ 1,036:
if not the_ch.isdigit():
is_number = False
if len(text) == 0:
Line 844 ⟶ 1,054:
#*** look ahead for '>=', etc.
def follow(expect, ifyes, ifno, err_line, err_col):
if
return ifyes, err_line, err_col
Line 856 ⟶ 1,066:
def gettok():
while the_ch.isspace():
err_line = the_line
Line 862 ⟶ 1,072:
if len(the_ch) == 0: return EOI, err_line, err_col
elif the_ch == '/': return div_or_cmt(err_line, err_col)
elif the_ch == '\'': return char_lit(err_line, err_col)
Line 869 ⟶ 1,078:
elif the_ch == '&': return follow('&', And, EOI, err_line, err_col)
elif the_ch == '"': return string_lit(the_ch, err_line, err_col)
elif the_ch in symbols:
sym = symbols[the_ch]
next_ch()
return sym, err_line, err_col
else: return ident_or_int(err_line, err_col)
#*** main driver
Line 885 ⟶ 1,098:
col = t[2]
print("line %5d col %5d %-8s" % (line, col, all_syms[tok]), end='')
elif tok == Ident: print("
elif tok == Stringk: print(' "%s"' % (t[3]))
else:
if tok == EOI:
|