User:Ed Davis: Difference between revisions
Content added Content deleted
No edit summary |
No edit summary |
||
Line 163: | Line 163: | ||
<lang c> |
<lang c> |
||
/* |
/* |
||
All lexical tokens - not |
All lexical tokens - not syntactically correct, but that will |
||
have to wait until syntax analysis |
have to wait until syntax analysis |
||
*/ |
*/ |
||
Line 232: | Line 232: | ||
;Implementations |
;Implementations |
||
__TOC__ |
|||
=={{header|C}}== |
=={{header|C}}== |
||
Line 288: | Line 287: | ||
} |
} |
||
static |
static int next_ch() { /* get next char from input */ |
||
the_ch = getc(source_fp); |
the_ch = getc(source_fp); |
||
++col; |
++col; |
||
Line 295: | Line 294: | ||
col = 0; |
col = 0; |
||
} |
} |
||
return the_ch; |
|||
} |
} |
||
Line 301: | Line 301: | ||
error(err_line, err_col, "gettok: empty character constant"); |
error(err_line, err_col, "gettok: empty character constant"); |
||
if (the_ch == '\\') { |
if (the_ch == '\\') { |
||
next_ch(); |
|||
if (the_ch == 'n') |
if (the_ch == 'n') |
||
n = 10; |
n = 10; |
||
Line 308: | Line 308: | ||
else error(err_line, err_col, "gettok: unknown escape sequence \\%c", the_ch); |
else error(err_line, err_col, "gettok: unknown escape sequence \\%c", the_ch); |
||
} |
} |
||
if (next_ch() != '\'') |
|||
read_ch(); |
|||
error(err_line, err_col, "multi-character constant"); |
|||
next_ch(); |
|||
return (tok_s){Integerk, err_line, err_col, {n}}; |
return (tok_s){Integerk, err_line, err_col, {n}}; |
||
} |
} |
||
Line 320: | Line 320: | ||
/* comment found */ |
/* comment found */ |
||
for (;;) { |
for (;;) { |
||
if (next_ch() == '*' && next_ch() == '/') { |
|||
read_ch(); |
|||
next_ch(); |
|||
return gettok(); |
|||
} else if (the_ch == EOF) |
|||
error(err_line, err_col, "EOF in comment"); |
|||
return gettok(); |
|||
} |
|||
} |
|||
} |
} |
||
} |
} |
||
Line 334: | Line 331: | ||
da_rewind(text); |
da_rewind(text); |
||
while (next_ch() != start) { |
|||
if (the_ch == '\n') |
if (the_ch == '\n') error(err_line, err_col, "EOL in string"); |
||
error(err_line, err_col, " |
if (the_ch == EOF) error(err_line, err_col, "EOF in string"); |
||
if (the_ch == EOF) |
|||
error(err_line, err_col, "EOF in string"); |
|||
da_append(text, (char)the_ch); |
da_append(text, (char)the_ch); |
||
} |
} |
||
da_append(text, '\0'); |
da_append(text, '\0'); |
||
next_ch(); |
|||
return (tok_s){Stringk, err_line, err_col, {.text=text}}; |
return (tok_s){Stringk, err_line, err_col, {.text=text}}; |
||
} |
} |
||
Line 373: | Line 368: | ||
if (!isdigit(the_ch)) |
if (!isdigit(the_ch)) |
||
is_number = false; |
is_number = false; |
||
next_ch(); |
|||
} |
} |
||
if (da_len(text) == 0) |
if (da_len(text) == 0) |
||
Line 391: | Line 386: | ||
static tok_s follow(int expect, TokenType ifyes, TokenType ifno, int err_line, int err_col) { /* look ahead for '>=', etc. */ |
static tok_s follow(int expect, TokenType ifyes, TokenType ifno, int err_line, int err_col) { /* look ahead for '>=', etc. */ |
||
if (the_ch == expect) { |
if (the_ch == expect) { |
||
next_ch(); |
|||
return (tok_s){ifyes, err_line, err_col, {0}}; |
return (tok_s){ifyes, err_line, err_col, {0}}; |
||
} |
} |
||
if (ifno == EOI) |
|||
if (ifno == EOI) error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch); |
|||
error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch); |
|||
return (tok_s){ifno, err_line, err_col, {0}}; |
return (tok_s){ifno, err_line, err_col, {0}}; |
||
} |
} |
||
Line 401: | Line 397: | ||
/* skip white space */ |
/* skip white space */ |
||
while (isspace(the_ch)) |
while (isspace(the_ch)) |
||
next_ch(); |
|||
int err_line = line; |
int err_line = line; |
||
int err_col = col; |
int err_col = col; |
||
switch (the_ch) { |
switch (the_ch) { |
||
case '{': |
case '{': next_ch(); return (tok_s){Lbrace, err_line, err_col, {0}}; |
||
case '}': |
case '}': next_ch(); return (tok_s){Rbrace, err_line, err_col, {0}}; |
||
case '(': |
case '(': next_ch(); return (tok_s){Lparen, err_line, err_col, {0}}; |
||
case ')': |
case ')': next_ch(); return (tok_s){Rparen, err_line, err_col, {0}}; |
||
case '+': |
case '+': next_ch(); return (tok_s){Add, err_line, err_col, {0}}; |
||
case '-': |
case '-': next_ch(); return (tok_s){Sub, err_line, err_col, {0}}; |
||
case '*': |
case '*': next_ch(); return (tok_s){Mul, err_line, err_col, {0}}; |
||
case ';': |
case ';': next_ch(); return (tok_s){Semi, err_line, err_col, {0}}; |
||
case ',': |
case ',': next_ch(); return (tok_s){Comma, err_line, err_col, {0}}; |
||
case '>': |
case '>': next_ch(); return (tok_s){Gtr, err_line, err_col, {0}}; |
||
case '=': |
case '=': next_ch(); return (tok_s){Assign, err_line, err_col, {0}}; |
||
case '/': |
case '/': next_ch(); return div_or_cmt(err_line, err_col); |
||
case '\'': |
case '\'': next_ch(); return char_lit(the_ch, err_line, err_col); |
||
case '<': |
case '<': next_ch(); return follow('=', Leq, Lss, err_line, err_col); |
||
case '!': |
case '!': next_ch(); return follow('=', Neq, EOI, err_line, err_col); |
||
case '&': |
case '&': next_ch(); return follow('&', And, EOI, err_line, err_col); |
||
case '"' : return string_lit(the_ch, err_line, err_col); |
case '"' : return string_lit(the_ch, err_line, err_col); |
||
default: return ident_or_int(err_line, err_col); |
default: return ident_or_int(err_line, err_col); |
||
Line 436: | Line 432: | ||
"Uminus Mul Div Add Sub Lss Gtr Leq Neq " |
"Uminus Mul Div Add Sub Lss Gtr Leq Neq " |
||
"And Semi Comma Assign Integer String Ident "[tok.tok * 9]); |
"And Semi Comma Assign Integer String Ident "[tok.tok * 9]); |
||
if (tok.tok == Integerk) |
|||
fprintf(dest_fp, " % |
if (tok.tok == Integerk) fprintf(dest_fp, " %4d", tok.n); |
||
else if (tok.tok == Ident) |
else if (tok.tok == Ident) fprintf(dest_fp, " %s", tok.text); |
||
fprintf(dest_fp, " %s", tok.text); |
else if (tok.tok == Stringk) fprintf(dest_fp, " \"%s\"", tok.text); |
||
else if (tok.tok == Stringk) |
|||
fprintf(dest_fp, " \"%s\"", tok.text); |
|||
fprintf(dest_fp, "\n"); |
fprintf(dest_fp, "\n"); |
||
} while (tok.tok != EOI); |
} while (tok.tok != EOI); |
||
Line 460: | Line 454: | ||
run(); |
run(); |
||
} |
} |
||
</lang> |
|||
=={{header|Euphoria}}== |
|||
<lang euphoria> |
|||
include std/io.e |
|||
include std/map.e |
|||
include std/types.e |
|||
include std/convert.e |
|||
constant true = 1, false = 0, EOF = -1 |
|||
enum EOI, Printk, Putc, Ifk, Whilek, Lbrace, Rbrace, Lparen, Rparen, Uminus, Mul, Div, |
|||
Add, Sub, Lss, Gtr, Leq, Neq, Andk, Semi, Comma, Assign, Integerk, Stringk, Ident |
|||
constant all_syms = { "EOI", "Print", "Putc", "If", "While", "Lbrace", "Rbrace", "Lparen", |
|||
"Rparen", "Uminus", "Mul", "Div", "Add", "Sub", "Lss", "Gtr", "Leq", "Neq", "And", |
|||
"Semi", "Comma", "Assign", "Integer", "String", "Ident"} |
|||
integer input_file, the_ch = ' ', the_col = 0, the_line = 1 |
|||
sequence symbols |
|||
map key_words = new() |
|||
procedure error(sequence format, sequence data) |
|||
printf(STDOUT, format, data) |
|||
abort(1) |
|||
end procedure |
|||
-- get the next character from the input |
|||
function next_ch() |
|||
the_ch = getc(input_file) |
|||
the_col += 1 |
|||
if the_ch = '\n' then |
|||
the_line += 1 |
|||
the_col = 0 |
|||
end if |
|||
return the_ch |
|||
end function |
|||
-- 'x' - character constants |
|||
function char_lit(integer err_line, integer err_col) |
|||
integer n = next_ch() -- skip opening quote |
|||
if the_ch = '\'' then |
|||
error("%d %d empty character constant", {err_line, err_col}) |
|||
elsif the_ch = '\\' then |
|||
next_ch() |
|||
if the_ch = 'n' then |
|||
n = 10 |
|||
elsif the_ch = '\\' then |
|||
n = '\\' |
|||
else |
|||
error("%d %d unknown escape sequence \\%c", {err_line, err_col, the_ch}) |
|||
end if |
|||
end if |
|||
if next_ch() != '\'' then |
|||
error("%d %d multi-character constant", {err_line, err_col}) |
|||
end if |
|||
next_ch() |
|||
return {Integerk, err_line, err_col, n} |
|||
end function |
|||
-- process divide or comments |
|||
function div_or_cmt(integer err_line, integer err_col) |
|||
if next_ch() != '*' then |
|||
return {Div, err_line, err_col} |
|||
end if |
|||
-- comment found |
|||
while true do |
|||
if next_ch() = '*' and next_ch() = '/' then |
|||
next_ch() |
|||
return get_tok() |
|||
elsif the_ch = EOF then |
|||
error("%d %d EOF in comment", {err_line, err_col}) |
|||
end if |
|||
end while |
|||
end function |
|||
-- "string" |
|||
function string_lit(integer start, integer err_line, integer err_col) |
|||
string text = "" |
|||
while next_ch() != start do |
|||
if the_ch = EOF then |
|||
error("%d %d EOF while scanning string literal", {err_line, err_col}) |
|||
end if |
|||
if the_ch = '\n' then |
|||
error("%d %d EOL while scanning string literal", {err_line, err_col}) |
|||
end if |
|||
text &= the_ch |
|||
end while |
|||
next_ch() |
|||
return {Stringk, err_line, err_col, text} |
|||
end function |
|||
-- handle identifiers and integers |
|||
function ident_or_int(integer err_line, integer err_col) |
|||
integer n, is_number = true |
|||
string text = "" |
|||
while t_alnum(the_ch) or the_ch = '_' do |
|||
text &= the_ch |
|||
if not t_digit(the_ch) then |
|||
is_number = false |
|||
end if |
|||
next_ch() |
|||
end while |
|||
if length(text) = 0 then |
|||
error("%d %d ident_or_int: unrecognized character: (%d) '%s'", {err_line, err_col, the_ch, the_ch}) |
|||
end if |
|||
if t_digit(text[1]) then |
|||
if not is_number then |
|||
error("%d %d invalid number: %s", {err_line, err_col, text}) |
|||
end if |
|||
n = to_integer(text) |
|||
return {Integerk, err_line, err_col, n} |
|||
end if |
|||
if has(key_words, text) then |
|||
return {get(key_words, text), err_line, err_col} |
|||
end if |
|||
return {Ident, err_line, err_col, text} |
|||
end function |
|||
-- look ahead for '>=', etc. |
|||
function follow(integer expect, integer ifyes, integer ifno, integer err_line, integer err_col) |
|||
if next_ch() = expect then |
|||
next_ch() |
|||
return {ifyes, err_line, err_col} |
|||
end if |
|||
if ifno = EOI then |
|||
error("%d %d follow: unrecognized character: (%d)", {err_line, err_col, the_ch}) |
|||
end if |
|||
return {ifno, err_line, err_col} |
|||
end function |
|||
-- return the next token type |
|||
function get_tok() |
|||
while t_space(the_ch) do |
|||
next_ch() |
|||
end while |
|||
integer err_line = the_line |
|||
integer err_col = the_col |
|||
switch the_ch do |
|||
case EOF then return {EOI, err_line, err_col} |
|||
case '/' then return div_or_cmt(err_line, err_col) |
|||
case '\'' then return char_lit(err_line, err_col) |
|||
case '<' then return follow('=', Leq, Lss, err_line, err_col) |
|||
case '!' then return follow('=', Neq, EOI, err_line, err_col) |
|||
case '&' then return follow('&', Andk, EOI, err_line, err_col) |
|||
case '"' then return string_lit(the_ch, err_line, err_col) |
|||
case else |
|||
integer sym = symbols[the_ch] |
|||
if sym != EOI then |
|||
next_ch() |
|||
return {sym, err_line, err_col} |
|||
end if |
|||
return ident_or_int(err_line, err_col) |
|||
end switch |
|||
end function |
|||
procedure init() |
|||
put(key_words, "if", Ifk) |
|||
put(key_words, "print", Printk) |
|||
put(key_words, "putc", Putc) |
|||
put(key_words, "while", Whilek) |
|||
symbols = repeat(EOI, 256) |
|||
symbols['{'] = Lbrace |
|||
symbols['}'] = Rbrace |
|||
symbols['('] = Lparen |
|||
symbols[')'] = Rparen |
|||
symbols['+'] = Add |
|||
symbols['-'] = Sub |
|||
symbols['*'] = Mul |
|||
symbols[';'] = Semi |
|||
symbols[','] = Comma |
|||
symbols['>'] = Gtr |
|||
symbols['='] = Assign |
|||
end procedure |
|||
procedure main(sequence cl) |
|||
sequence file_name |
|||
input_file = STDIN |
|||
if length(cl) > 2 then |
|||
file_name = cl[3] |
|||
input_file = open(file_name, "r") |
|||
if input_file = -1 then |
|||
error("Could not open %s", {file_name}) |
|||
end if |
|||
end if |
|||
init() |
|||
sequence t |
|||
loop do |
|||
t = get_tok() |
|||
printf(STDOUT, "line %5d col %5d %-8s", {t[2], t[3], all_syms[t[1]]}) |
|||
switch t[1] do |
|||
case Integerk then printf(STDOUT, " %5d\n", {t[4]}) |
|||
case Ident then printf(STDOUT, " %s\n", {t[4]}) |
|||
case Stringk then printf(STDOUT, " \"%s\"\n", {t[4]}) |
|||
case else printf(STDOUT, "\n") |
|||
end switch |
|||
until t[1] = EOI |
|||
end loop |
|||
end procedure |
|||
main(command_line()) |
|||
</lang> |
</lang> |
||
Line 689: | Line 898: | ||
dim tok_list(tk_eoi to tk_ident) as string |
dim tok_list(tk_eoi to tk_ident) as string |
||
tok_list(tk_eoi |
tok_list(tk_eoi ) = "EOI" |
||
tok_list(tk_print |
tok_list(tk_print ) = "Print" |
||
tok_list(tk_putc |
tok_list(tk_putc ) = "Putc" |
||
tok_list(tk_if |
tok_list(tk_if ) = "If" |
||
tok_list(tk_while |
tok_list(tk_while ) = "While" |
||
tok_list(tk_lbrace |
tok_list(tk_lbrace ) = "Lbrace" |
||
tok_list(tk_rbrace |
tok_list(tk_rbrace ) = "Rbrace" |
||
tok_list(tk_lparen |
tok_list(tk_lparen ) = "Lparen" |
||
tok_list(tk_rparen |
tok_list(tk_rparen ) = "Rparen" |
||
tok_list(tk_uminus |
tok_list(tk_uminus ) = "Uminus" |
||
tok_list(tk_mul |
tok_list(tk_mul ) = "Mul" |
||
tok_list(tk_div |
tok_list(tk_div ) = "Div" |
||
tok_list(tk_add |
tok_list(tk_add ) = "Add" |
||
tok_list(tk_sub |
tok_list(tk_sub ) = "Sub" |
||
tok_list(tk_lss |
tok_list(tk_lss ) = "Lss" |
||
tok_list(tk_gtr |
tok_list(tk_gtr ) = "Gtr" |
||
tok_list(tk_leq |
tok_list(tk_leq ) = "Leq" |
||
tok_list(tk_neq |
tok_list(tk_neq ) = "Neq" |
||
tok_list(tk_and |
tok_list(tk_and ) = "And" |
||
tok_list(tk_semi |
tok_list(tk_semi ) = "Semi" |
||
tok_list(tk_comma |
tok_list(tk_comma ) = "Comma" |
||
tok_list(tk_assign |
tok_list(tk_assign ) = "Assign" |
||
tok_list(tk_integer |
tok_list(tk_integer) = "Integer" |
||
tok_list(tk_string |
tok_list(tk_string ) = "String" |
||
tok_list(tk_ident |
tok_list(tk_ident ) = "Ident" |
||
do |
do |
||
Line 735: | Line 944: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
<lang Python> |
<lang Python> |
||
from __future__ import print_function |
|||
import sys |
import sys |
||
Line 762: | Line 972: | ||
#*** get the next character from the input |
#*** get the next character from the input |
||
def |
def next_ch(): |
||
global the_ch, the_col, the_line |
global the_ch, the_col, the_line |
||
Line 774: | Line 984: | ||
#*** 'x' - character constants |
#*** 'x' - character constants |
||
def char_lit(err_line, err_col): |
def char_lit(err_line, err_col): |
||
n = ord( |
n = ord(next_ch()) # skip opening quote |
||
if the_ch == '\'': |
if the_ch == '\'': |
||
error(err_line, err_col, "empty character constant") |
error(err_line, err_col, "empty character constant") |
||
elif the_ch == '\\': |
elif the_ch == '\\': |
||
next_ch() |
|||
if the_ch == 'n': |
if the_ch == 'n': |
||
n = 10 |
n = 10 |
||
Line 785: | Line 995: | ||
else: |
else: |
||
error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch)) |
error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch)) |
||
if |
if next_ch() != '\'': |
||
error(err_line, err_col, "multi-character constant") |
error(err_line, err_col, "multi-character constant") |
||
next_ch() |
|||
return Integerk, err_line, err_col, n |
return Integerk, err_line, err_col, n |
||
#*** process divide or comments |
#*** process divide or comments |
||
def div_or_cmt(err_line, err_col): |
def div_or_cmt(err_line, err_col): |
||
if |
if next_ch() != '*': |
||
return Div, err_line, err_col |
return Div, err_line, err_col |
||
# comment found |
# comment found |
||
while True: |
while True: |
||
if |
if next_ch() == '*' and next_ch() == '/': |
||
next_ch() |
|||
return gettok() |
return gettok() |
||
elif len(the_ch) == 0: |
elif len(the_ch) == 0: |
||
Line 807: | Line 1,017: | ||
text = "" |
text = "" |
||
while |
while next_ch() != start: |
||
if len(the_ch) == 0: |
if len(the_ch) == 0: |
||
error(err_line, err_col, "EOF while scanning string literal") |
error(err_line, err_col, "EOF while scanning string literal") |
||
Line 814: | Line 1,024: | ||
text += the_ch |
text += the_ch |
||
next_ch() |
|||
return Stringk, err_line, err_col, text |
return Stringk, err_line, err_col, text |
||
Line 826: | Line 1,036: | ||
if not the_ch.isdigit(): |
if not the_ch.isdigit(): |
||
is_number = False |
is_number = False |
||
next_ch() |
|||
if len(text) == 0: |
if len(text) == 0: |
||
Line 844: | Line 1,054: | ||
#*** look ahead for '>=', etc. |
#*** look ahead for '>=', etc. |
||
def follow(expect, ifyes, ifno, err_line, err_col): |
def follow(expect, ifyes, ifno, err_line, err_col): |
||
if |
if next_ch() == expect: |
||
next_ch() |
|||
return ifyes, err_line, err_col |
return ifyes, err_line, err_col |
||
Line 856: | Line 1,066: | ||
def gettok(): |
def gettok(): |
||
while the_ch.isspace(): |
while the_ch.isspace(): |
||
next_ch() |
|||
err_line = the_line |
err_line = the_line |
||
Line 862: | Line 1,072: | ||
if len(the_ch) == 0: return EOI, err_line, err_col |
if len(the_ch) == 0: return EOI, err_line, err_col |
||
elif the_ch in symbols: sym = symbols[the_ch]; getc(); return sym, err_line, err_col |
|||
elif the_ch == '/': return div_or_cmt(err_line, err_col) |
elif the_ch == '/': return div_or_cmt(err_line, err_col) |
||
elif the_ch == '\'': return char_lit(err_line, err_col) |
elif the_ch == '\'': return char_lit(err_line, err_col) |
||
Line 869: | Line 1,078: | ||
elif the_ch == '&': return follow('&', And, EOI, err_line, err_col) |
elif the_ch == '&': return follow('&', And, EOI, err_line, err_col) |
||
elif the_ch == '"': return string_lit(the_ch, err_line, err_col) |
elif the_ch == '"': return string_lit(the_ch, err_line, err_col) |
||
elif the_ch in symbols: |
|||
else: return ident_or_int(err_line, err_col) |
|||
sym = symbols[the_ch] |
|||
next_ch() |
|||
return sym, err_line, err_col |
|||
else: return ident_or_int(err_line, err_col) |
|||
#*** main driver |
#*** main driver |
||
Line 885: | Line 1,098: | ||
col = t[2] |
col = t[2] |
||
print("line %5d col %5d %-8s" % (line, col, all_syms[tok]), end='') |
|||
if tok == Integerk: |
|||
print("line %5d col %5d %-8s %8d" % (line, col, all_syms[tok], t[3])) |
|||
if tok == Integerk: print(" %5d" % (t[3])) |
|||
print(" |
elif tok == Ident: print(" %s" % (t[3])) |
||
elif tok == Stringk: |
elif tok == Stringk: print(' "%s"' % (t[3])) |
||
else: print("") |
|||
else: |
|||
print("line %5d col %5d %-8s" % (line, col, all_syms[tok])) |
|||
if tok == EOI: |
if tok == EOI: |