Ed Davis

Joined 31 August 2022
no edit summary
No edit summary
No edit summary
Line 163:
<lang c>
/*
All lexical tokens - not syntaticallysyntactically correct, but that will
have to wait until syntax analysis
*/
Line 232:
;Implementations
 
__TOC__
 
=={{header|C}}==
Line 288 ⟶ 287:
}
 
static voidint read_chnext_ch() { /* get next char from input */
the_ch = getc(source_fp);
++col;
Line 295 ⟶ 294:
col = 0;
}
return the_ch;
}
 
Line 301:
error(err_line, err_col, "gettok: empty character constant");
if (the_ch == '\\') {
read_chnext_ch();
if (the_ch == 'n')
n = 10;
Line 308:
else error(err_line, err_col, "gettok: unknown escape sequence \\%c", the_ch);
}
if (next_ch() != '\'')
read_ch();
if (the_ch != '\'') error(err_line, err_col, "multi-character constant");
read_chnext_ch();
return (tok_s){Integerk, err_line, err_col, {n}};
}
Line 320:
/* comment found */
for (;;) {
if (next_ch() == '*' && next_ch() == '/') {
read_ch();
if (the_ch == '*' || the_ch == EOFnext_ch() {;
read_chreturn gettok();
} else if (the_ch == '/' || the_ch == EOF) {
error(err_line, err_col, "EOF in read_ch(comment");
return gettok();
}
}
}
}
Line 334 ⟶ 331:
da_rewind(text);
 
forwhile (read_chnext_ch(); the_ch != start; read_ch()) {
if (the_ch == '\n') error(err_line, err_col, "EOL in string");
if (the_ch == EOF) error(err_line, err_col, "EOLEOF in string");
if (the_ch == EOF)
error(err_line, err_col, "EOF in string");
da_append(text, (char)the_ch);
}
da_append(text, '\0');
 
read_chnext_ch();
return (tok_s){Stringk, err_line, err_col, {.text=text}};
}
Line 373 ⟶ 368:
if (!isdigit(the_ch))
is_number = false;
read_chnext_ch();
}
if (da_len(text) == 0)
Line 391 ⟶ 386:
static tok_s follow(int expect, TokenType ifyes, TokenType ifno, int err_line, int err_col) { /* look ahead for '>=', etc. */
if (the_ch == expect) {
read_chnext_ch();
return (tok_s){ifyes, err_line, err_col, {0}};
}
if (ifno == EOI)
if (ifno == EOI) error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch);
error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch);
return (tok_s){ifno, err_line, err_col, {0}};
}
Line 401 ⟶ 397:
/* skip white space */
while (isspace(the_ch))
read_chnext_ch();
int err_line = line;
int err_col = col;
switch (the_ch) {
case '{': read_chnext_ch(); return (tok_s){Lbrace, err_line, err_col, {0}};
case '}': read_chnext_ch(); return (tok_s){Rbrace, err_line, err_col, {0}};
case '(': read_chnext_ch(); return (tok_s){Lparen, err_line, err_col, {0}};
case ')': read_chnext_ch(); return (tok_s){Rparen, err_line, err_col, {0}};
case '+': read_chnext_ch(); return (tok_s){Add, err_line, err_col, {0}};
case '-': read_chnext_ch(); return (tok_s){Sub, err_line, err_col, {0}};
case '*': read_chnext_ch(); return (tok_s){Mul, err_line, err_col, {0}};
case ';': read_chnext_ch(); return (tok_s){Semi, err_line, err_col, {0}};
case ',': read_chnext_ch(); return (tok_s){Comma, err_line, err_col, {0}};
case '>': read_chnext_ch(); return (tok_s){Gtr, err_line, err_col, {0}};
case '=': read_chnext_ch(); return (tok_s){Assign, err_line, err_col, {0}};
case '/': read_chnext_ch(); return div_or_cmt(err_line, err_col);
case '\'': read_chnext_ch(); return char_lit(the_ch, err_line, err_col);
case '<': read_chnext_ch(); return follow('=', Leq, Lss, err_line, err_col);
case '!': read_chnext_ch(); return follow('=', Neq, EOI, err_line, err_col);
case '&': read_chnext_ch(); return follow('&', And, EOI, err_line, err_col);
case '"' : return string_lit(the_ch, err_line, err_col);
default: return ident_or_int(err_line, err_col);
Line 436 ⟶ 432:
"Uminus Mul Div Add Sub Lss Gtr Leq Neq "
"And Semi Comma Assign Integer String Ident "[tok.tok * 9]);
 
if (tok.tok == Integerk)
if (tok.tok == Integerk) fprintf(dest_fp, " %8d4d", tok.n);
else if (tok.tok == Ident) fprintf(dest_fp, " %s", tok.text);
else if (tok.tok == Stringk) fprintf(dest_fp, " \"%s\"", tok.text);
else if (tok.tok == Stringk)
fprintf(dest_fp, " \"%s\"", tok.text);
fprintf(dest_fp, "\n");
} while (tok.tok != EOI);
Line 460 ⟶ 454:
run();
}
</lang>
 
=={{header|Euphoria}}==
<lang euphoria>
include std/io.e
include std/map.e
include std/types.e
include std/convert.e
 
constant true = 1, false = 0, EOF = -1
 
enum EOI, Printk, Putc, Ifk, Whilek, Lbrace, Rbrace, Lparen, Rparen, Uminus, Mul, Div,
Add, Sub, Lss, Gtr, Leq, Neq, Andk, Semi, Comma, Assign, Integerk, Stringk, Ident
 
constant all_syms = { "EOI", "Print", "Putc", "If", "While", "Lbrace", "Rbrace", "Lparen",
"Rparen", "Uminus", "Mul", "Div", "Add", "Sub", "Lss", "Gtr", "Leq", "Neq", "And",
"Semi", "Comma", "Assign", "Integer", "String", "Ident"}
 
integer input_file, the_ch = ' ', the_col = 0, the_line = 1
sequence symbols
map key_words = new()
 
procedure error(sequence format, sequence data)
printf(STDOUT, format, data)
abort(1)
end procedure
 
-- get the next character from the input
function next_ch()
the_ch = getc(input_file)
the_col += 1
if the_ch = '\n' then
the_line += 1
the_col = 0
end if
return the_ch
end function
 
-- 'x' - character constants
function char_lit(integer err_line, integer err_col)
integer n = next_ch() -- skip opening quote
if the_ch = '\'' then
error("%d %d empty character constant", {err_line, err_col})
elsif the_ch = '\\' then
next_ch()
if the_ch = 'n' then
n = 10
elsif the_ch = '\\' then
n = '\\'
else
error("%d %d unknown escape sequence \\%c", {err_line, err_col, the_ch})
end if
end if
if next_ch() != '\'' then
error("%d %d multi-character constant", {err_line, err_col})
end if
next_ch()
return {Integerk, err_line, err_col, n}
end function
 
-- process divide or comments
function div_or_cmt(integer err_line, integer err_col)
if next_ch() != '*' then
return {Div, err_line, err_col}
end if
 
-- comment found
while true do
if next_ch() = '*' and next_ch() = '/' then
next_ch()
return get_tok()
elsif the_ch = EOF then
error("%d %d EOF in comment", {err_line, err_col})
end if
end while
end function
 
-- "string"
function string_lit(integer start, integer err_line, integer err_col)
string text = ""
 
while next_ch() != start do
if the_ch = EOF then
error("%d %d EOF while scanning string literal", {err_line, err_col})
end if
if the_ch = '\n' then
error("%d %d EOL while scanning string literal", {err_line, err_col})
end if
text &= the_ch
end while
 
next_ch()
return {Stringk, err_line, err_col, text}
end function
 
-- handle identifiers and integers
function ident_or_int(integer err_line, integer err_col)
integer n, is_number = true
string text = ""
 
while t_alnum(the_ch) or the_ch = '_' do
text &= the_ch
if not t_digit(the_ch) then
is_number = false
end if
next_ch()
end while
 
if length(text) = 0 then
error("%d %d ident_or_int: unrecognized character: (%d) '%s'", {err_line, err_col, the_ch, the_ch})
end if
 
if t_digit(text[1]) then
if not is_number then
error("%d %d invalid number: %s", {err_line, err_col, text})
end if
n = to_integer(text)
return {Integerk, err_line, err_col, n}
end if
 
if has(key_words, text) then
return {get(key_words, text), err_line, err_col}
end if
 
return {Ident, err_line, err_col, text}
end function
 
-- look ahead for '>=', etc.
function follow(integer expect, integer ifyes, integer ifno, integer err_line, integer err_col)
if next_ch() = expect then
next_ch()
return {ifyes, err_line, err_col}
end if
 
if ifno = EOI then
error("%d %d follow: unrecognized character: (%d)", {err_line, err_col, the_ch})
end if
 
return {ifno, err_line, err_col}
end function
 
-- return the next token type
function get_tok()
while t_space(the_ch) do
next_ch()
end while
 
integer err_line = the_line
integer err_col = the_col
 
switch the_ch do
case EOF then return {EOI, err_line, err_col}
case '/' then return div_or_cmt(err_line, err_col)
case '\'' then return char_lit(err_line, err_col)
case '<' then return follow('=', Leq, Lss, err_line, err_col)
case '!' then return follow('=', Neq, EOI, err_line, err_col)
case '&' then return follow('&', Andk, EOI, err_line, err_col)
case '"' then return string_lit(the_ch, err_line, err_col)
case else
integer sym = symbols[the_ch]
if sym != EOI then
next_ch()
return {sym, err_line, err_col}
end if
return ident_or_int(err_line, err_col)
end switch
end function
 
procedure init()
put(key_words, "if", Ifk)
put(key_words, "print", Printk)
put(key_words, "putc", Putc)
put(key_words, "while", Whilek)
 
symbols = repeat(EOI, 256)
symbols['{'] = Lbrace
symbols['}'] = Rbrace
symbols['('] = Lparen
symbols[')'] = Rparen
symbols['+'] = Add
symbols['-'] = Sub
symbols['*'] = Mul
symbols[';'] = Semi
symbols[','] = Comma
symbols['>'] = Gtr
symbols['='] = Assign
end procedure
 
procedure main(sequence cl)
sequence file_name
 
input_file = STDIN
if length(cl) > 2 then
file_name = cl[3]
input_file = open(file_name, "r")
if input_file = -1 then
error("Could not open %s", {file_name})
end if
end if
init()
sequence t
loop do
t = get_tok()
printf(STDOUT, "line %5d col %5d %-8s", {t[2], t[3], all_syms[t[1]]})
switch t[1] do
case Integerk then printf(STDOUT, " %5d\n", {t[4]})
case Ident then printf(STDOUT, " %s\n", {t[4]})
case Stringk then printf(STDOUT, " \"%s\"\n", {t[4]})
case else printf(STDOUT, "\n")
end switch
until t[1] = EOI
end loop
end procedure
 
main(command_line())
</lang>
 
Line 689 ⟶ 898:
dim tok_list(tk_eoi to tk_ident) as string
 
tok_list(tk_eoi ) = "EOI"
tok_list(tk_print ) = "Print"
tok_list(tk_putc ) = "Putc"
tok_list(tk_if ) = "If"
tok_list(tk_while ) = "While"
tok_list(tk_lbrace ) = "Lbrace"
tok_list(tk_rbrace ) = "Rbrace"
tok_list(tk_lparen ) = "Lparen"
tok_list(tk_rparen ) = "Rparen"
tok_list(tk_uminus ) = "Uminus"
tok_list(tk_mul ) = "Mul"
tok_list(tk_div ) = "Div"
tok_list(tk_add ) = "Add"
tok_list(tk_sub ) = "Sub"
tok_list(tk_lss ) = "Lss"
tok_list(tk_gtr ) = "Gtr"
tok_list(tk_leq ) = "Leq"
tok_list(tk_neq ) = "Neq"
tok_list(tk_and ) = "And"
tok_list(tk_semi ) = "Semi"
tok_list(tk_comma ) = "Comma"
tok_list(tk_assign ) = "Assign"
tok_list(tk_integer ) = "Integer"
tok_list(tk_string ) = "String"
tok_list(tk_ident ) = "Ident"
 
do
Line 735 ⟶ 944:
=={{header|Python}}==
<lang Python>
from __future__ import print_function
import sys
 
Line 762 ⟶ 972:
 
#*** get the next character from the input
def getcnext_ch():
global the_ch, the_col, the_line
 
Line 774 ⟶ 984:
#*** 'x' - character constants
def char_lit(err_line, err_col):
n = ord(getcnext_ch()) # skip opening quote
if the_ch == '\'':
error(err_line, err_col, "empty character constant")
elif the_ch == '\\':
getcnext_ch()
if the_ch == 'n':
n = 10
Line 785 ⟶ 995:
else:
error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch))
if getcnext_ch() != '\'':
error(err_line, err_col, "multi-character constant")
getcnext_ch()
return Integerk, err_line, err_col, n
 
#*** process divide or comments
def div_or_cmt(err_line, err_col):
if getcnext_ch() != '*':
return Div, err_line, err_col
 
# comment found
while True:
if getcnext_ch() == '*' and getcnext_ch() == '/':
getcnext_ch()
return gettok()
elif len(the_ch) == 0:
Line 807 ⟶ 1,017:
text = ""
 
while getcnext_ch() != start:
if len(the_ch) == 0:
error(err_line, err_col, "EOF while scanning string literal")
Line 814 ⟶ 1,024:
text += the_ch
 
getcnext_ch()
return Stringk, err_line, err_col, text
 
Line 826 ⟶ 1,036:
if not the_ch.isdigit():
is_number = False
getcnext_ch()
 
if len(text) == 0:
Line 844 ⟶ 1,054:
#*** look ahead for '>=', etc.
def follow(expect, ifyes, ifno, err_line, err_col):
if getcnext_ch() == expect:
getcnext_ch()
return ifyes, err_line, err_col
 
Line 856 ⟶ 1,066:
def gettok():
while the_ch.isspace():
getcnext_ch()
 
err_line = the_line
Line 862 ⟶ 1,072:
 
if len(the_ch) == 0: return EOI, err_line, err_col
elif the_ch in symbols: sym = symbols[the_ch]; getc(); return sym, err_line, err_col
elif the_ch == '/': return div_or_cmt(err_line, err_col)
elif the_ch == '\'': return char_lit(err_line, err_col)
Line 869 ⟶ 1,078:
elif the_ch == '&': return follow('&', And, EOI, err_line, err_col)
elif the_ch == '"': return string_lit(the_ch, err_line, err_col)
elif the_ch in symbols:
else: return ident_or_int(err_line, err_col)
sym = symbols[the_ch]
next_ch()
return sym, err_line, err_col
else: return ident_or_int(err_line, err_col)
 
#*** main driver
Line 885 ⟶ 1,098:
col = t[2]
 
print("line %5d col %5d %-8s" % (line, col, all_syms[tok]), end='')
if tok == Integerk:
 
print("line %5d col %5d %-8s %8d" % (line, col, all_syms[tok], t[3]))
elifif tok == IdentIntegerk: print(" %5d" % (t[3]))
elif tok == Ident: print("line %5d col %5d %-8s %s" % % (line, col, all_syms[tok], t[3]))
elif tok == Stringk: print(' "%s"' % (t[3]))
else: print('line %5d col %5d %-8s "%s"' % (line, col, all_syms[tok], t[3]) print("")
else:
print("line %5d col %5d %-8s" % (line, col, all_syms[tok]))
 
if tok == EOI:
155

edits