Compiler/lexical analyzer: Difference between revisions
Content added Content deleted
Line 1,019: | Line 1,019: | ||
23 1 End_of_input |
23 1 End_of_input |
||
</pre> |
</pre> |
||
=={{header|ATS}}== |
|||
<lang ATS>(********************************************************************) |
|||
(* Usage: lex [INPUTFILE [OUTPUTFILE]] |
|||
If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input |
|||
or standard output is used, respectively. *) |
|||
#define ATS_DYNLOADFLAG 0 |
|||
#include "share/atspre_staload.hats" |
|||
staload UN = "prelude/SATS/unsafe.sats" |
|||
#define NIL list_nil () |
|||
#define :: list_cons |
|||
%{^ |
|||
/* alloca(3) is needed for ATS exceptions. */ |
|||
#include <alloca.h> |
|||
%} |
|||
(********************************************************************) |
|||
#define NUM_TOKENS 31 |
|||
#define RESERVED_WORD_HASHTAB_SIZE 9 |
|||
#define token_else 0 |
|||
#define token_if 1 |
|||
#define token_print 2 |
|||
#define token_putc 3 |
|||
#define token_while 4 |
|||
#define token_multiply 5 |
|||
#define token_divide 6 |
|||
#define token_mod 7 |
|||
#define token_add 8 |
|||
#define token_subtract 9 |
|||
#define token_negate 10 |
|||
#define token_less 11 |
|||
#define token_lessequal 12 |
|||
#define token_greater 13 |
|||
#define token_greaterequal 14 |
|||
#define token_equal 15 |
|||
#define token_notequal 16 |
|||
#define token_not 17 |
|||
#define token_assign 18 |
|||
#define token_and 19 |
|||
#define token_or 20 |
|||
#define token_LeftParen 21 |
|||
#define token_RightParen 22 |
|||
#define token_LeftBrace 23 |
|||
#define token_RightBrace 24 |
|||
#define token_Semicolon 25 |
|||
#define token_Comma 26 |
|||
#define token_Identifier 27 |
|||
#define token_Integer 28 |
|||
#define token_String 29 |
|||
#define token_End_of_input 30 |
|||
typedef token_t = |
|||
[i : int | token_else <= i; i <= token_End_of_input] |
|||
int i |
|||
typedef tokentuple_t = (token_t, String, ullint, ullint) |
|||
typedef token_names_vt = @[string][NUM_TOKENS] |
|||
vtypedef reserved_words_vt = |
|||
@[String][RESERVED_WORD_HASHTAB_SIZE] |
|||
vtypedef reserved_word_tokens_vt = |
|||
@[token_t][RESERVED_WORD_HASHTAB_SIZE] |
|||
vtypedef lookups_vt = |
|||
[p_toknames : addr] |
|||
[p_wordtab : addr] |
|||
[p_toktab : addr] |
|||
@{ |
|||
pf_toknames = token_names_vt @ p_toknames, |
|||
pf_wordtab = reserved_words_vt @ p_wordtab, |
|||
pf_toktab = reserved_word_tokens_vt @ p_toktab | |
|||
toknames = ptr p_toknames, |
|||
wordtab = ptr p_wordtab, |
|||
toktab = ptr p_toktab |
|||
} |
|||
fn |
|||
reserved_word_lookup |
|||
(s : String, |
|||
lookups : !lookups_vt, |
|||
line_no : ullint, |
|||
column_no : ullint) : tokentuple_t = |
|||
if string_length s < 2 then |
|||
(token_Identifier, s, line_no, column_no) |
|||
else |
|||
let |
|||
macdef wordtab = !(lookups.wordtab) |
|||
macdef toktab = !(lookups.toktab) |
|||
val hashval = |
|||
g1uint_mod (g1ofg0 (char2ui s[0] + char2ui s[1]), |
|||
g1i2u RESERVED_WORD_HASHTAB_SIZE) |
|||
val token = toktab[hashval] |
|||
in |
|||
if token = token_Identifier || s <> wordtab[hashval] then |
|||
(token_Identifier, s, line_no, column_no) |
|||
else |
|||
(token, s, line_no, column_no) |
|||
end |
|||
(********************************************************************) |
|||
(* Input allows pushback into a buffer. *) |
|||
typedef ch_t = |
|||
@{ |
|||
ichar = int, |
|||
line_no = ullint, |
|||
column_no = ullint |
|||
} |
|||
typedef inp_t (n : int) = |
|||
[0 <= n] |
|||
@{ |
|||
file = FILEref, |
|||
pushback = list (ch_t, n), |
|||
line_no = ullint, |
|||
column_no = ullint |
|||
} |
|||
typedef inp_t = [n : int] inp_t n |
|||
fn |
|||
get_ch (inp : inp_t) : (ch_t, inp_t) = |
|||
case+ (inp.pushback) of |
|||
| NIL => |
|||
let |
|||
val c = fileref_getc (inp.file) |
|||
val ch = |
|||
@{ |
|||
ichar = c, |
|||
line_no = inp.line_no, |
|||
column_no = inp.column_no |
|||
} |
|||
in |
|||
if c = char2i '\n' then |
|||
let |
|||
val inp = |
|||
@{ |
|||
file = inp.file, |
|||
pushback = inp.pushback, |
|||
line_no = succ (inp.line_no), |
|||
column_no = 1ULL |
|||
} |
|||
in |
|||
(ch, inp) |
|||
end |
|||
else |
|||
let |
|||
val inp = |
|||
@{ |
|||
file = inp.file, |
|||
pushback = inp.pushback, |
|||
line_no = inp.line_no, |
|||
column_no = succ (inp.column_no) |
|||
} |
|||
in |
|||
(ch, inp) |
|||
end |
|||
end |
|||
| ch :: pushback => |
|||
let |
|||
val inp = |
|||
@{ |
|||
file = inp.file, |
|||
pushback = pushback, |
|||
line_no = inp.line_no, |
|||
column_no = inp.column_no |
|||
} |
|||
in |
|||
(ch, inp) |
|||
end |
|||
fn |
|||
push_back_ch (ch : ch_t, |
|||
inp : inp_t) : [n : pos] inp_t n = |
|||
let |
|||
prval _ = lemma_list_param (inp.pushback) |
|||
in |
|||
@{ |
|||
file = inp.file, |
|||
pushback = ch :: (inp.pushback), |
|||
line_no = inp.line_no, |
|||
column_no = inp.column_no |
|||
} |
|||
end |
|||
(********************************************************************) |
|||
exception unterminated_comment of (ullint, ullint) |
|||
exception unterminated_character_literal of (ullint, ullint) |
|||
exception multicharacter_literal of (ullint, ullint) |
|||
exception unterminated_string_literal of (ullint, ullint, bool) |
|||
exception unsupported_escape of (ullint, ullint, int) |
|||
exception invalid_integer_literal of (ullint, ullint, String) |
|||
exception unexpected_character of (ullint, ullint, int) |
|||
fn |
|||
scan_comment (inp : inp_t, |
|||
line_no : ullint, |
|||
column_no : ullint) : inp_t = |
|||
let |
|||
fun |
|||
loop (inp : inp_t) : inp_t = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
in |
|||
if (ch.ichar) < 0 then |
|||
$raise unterminated_comment (line_no, column_no) |
|||
else if (ch.ichar) = char2i '*' then |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch.ichar) < 0 then |
|||
$raise unterminated_comment (line_no, column_no) |
|||
else if (ch1.ichar) = char2i '/' then |
|||
inp |
|||
else |
|||
loop inp |
|||
end |
|||
else |
|||
loop inp |
|||
end |
|||
in |
|||
loop inp |
|||
end |
|||
fn |
|||
skip_spaces_and_comments (inp : inp_t) : [n : pos] inp_t n = |
|||
let |
|||
fun |
|||
loop (inp : inp_t) : [n : pos] inp_t n = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
in |
|||
if isspace (ch.ichar) then |
|||
loop inp |
|||
else if (ch.ichar) = char2i '/' then |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) = char2i '*' then |
|||
loop (scan_comment (inp, ch.line_no, ch.column_no)) |
|||
else |
|||
let |
|||
val inp = push_back_ch (ch1, inp) |
|||
val inp = push_back_ch (ch, inp) |
|||
in |
|||
inp |
|||
end |
|||
end |
|||
else |
|||
push_back_ch (ch, inp) |
|||
end |
|||
in |
|||
loop inp |
|||
end |
|||
fn |
|||
reverse_list_to_string |
|||
{m : int} |
|||
(lst : list (char, m)) : string m = |
|||
let |
|||
fun |
|||
fill_array {n : nat | n <= m} .<n>. |
|||
(arr : &(@[char][m + 1]), |
|||
lst : list (char, n), |
|||
n : size_t n) : void = |
|||
case+ lst of |
|||
| NIL => () |
|||
| c :: tail => |
|||
begin |
|||
arr[pred n] := c; |
|||
fill_array (arr, tail, pred n) |
|||
end |
|||
prval _ = lemma_list_param lst |
|||
val m : size_t m = i2sz (list_length lst) |
|||
val (pf, pfgc | p) = array_ptr_alloc<char> (succ m) |
|||
val _ = array_initize_elt<char> (!p, succ m, '\0') |
|||
val _ = fill_array (!p, lst, m) |
|||
in |
|||
$UN.castvwtp0 @(pf, pfgc | p) |
|||
end |
|||
extern fun {} |
|||
simple_scan$pred : int -> bool |
|||
fun {} |
|||
simple_scan {u : nat} |
|||
(lst : list (char, u), |
|||
inp : inp_t) : |
|||
[m : nat] |
|||
[n : pos] |
|||
(list (char, m), inp_t n) = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
in |
|||
if simple_scan$pred (ch.ichar) then |
|||
simple_scan<> (int2char0 (ch.ichar) :: lst, inp) |
|||
else |
|||
let |
|||
val inp = push_back_ch (ch, inp) |
|||
in |
|||
(lst, inp) |
|||
end |
|||
end |
|||
fn |
|||
is_ident_start (c : int) :<> bool = |
|||
isalpha (c) || c = char2i '_' |
|||
fn |
|||
is_ident_continuation (c : int) :<> bool = |
|||
isalnum (c) || c = char2i '_' |
|||
fn |
|||
scan_identifier_or_reserved_word |
|||
(inp : inp_t, |
|||
lookups : !lookups_vt) : |
|||
(tokentuple_t, [n : pos] inp_t n) = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
val _ = assertloc (is_ident_start (ch.ichar)) |
|||
implement simple_scan$pred<> c = is_ident_continuation c |
|||
val (lst, inp) = simple_scan (int2char0 (ch.ichar) :: NIL, inp) |
|||
val s = reverse_list_to_string lst |
|||
val toktup = |
|||
reserved_word_lookup (s, lookups, ch.line_no, ch.column_no) |
|||
in |
|||
(toktup, inp) |
|||
end |
|||
fn |
|||
scan_integer_literal |
|||
(inp : inp_t, |
|||
lookups : !lookups_vt) : |
|||
(tokentuple_t, [n : pos] inp_t n) = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
val _ = assertloc (isdigit (ch.ichar)) |
|||
implement simple_scan$pred<> c = is_ident_continuation c |
|||
val (lst, inp) = simple_scan (int2char0 (ch.ichar) :: NIL, inp) |
|||
val s = reverse_list_to_string lst |
|||
fun |
|||
check_they_are_all_digits |
|||
{n : nat} .<n>. |
|||
(lst : list (char, n)) : void = |
|||
case+ lst of |
|||
| NIL => () |
|||
| c :: tail => |
|||
if isdigit c then |
|||
check_they_are_all_digits tail |
|||
else |
|||
$raise invalid_integer_literal (ch.line_no, ch.column_no, s) |
|||
val _ = check_they_are_all_digits lst |
|||
in |
|||
((token_Integer, s, ch.line_no, ch.column_no), inp) |
|||
end |
|||
fn |
|||
ichar2integer_literal (c : int) : String0 = |
|||
let |
|||
var buf = @[char][100] ('\0') |
|||
val _ = $extfcall (int, "snprintf", addr@ buf, i2sz 99, "%d", c) |
|||
val s = string1_copy ($UN.castvwtp0{String0} buf) |
|||
in |
|||
strnptr2string s |
|||
end |
|||
fn |
|||
scan_character_literal_without_checking_end (inp : inp_t) : |
|||
(tokentuple_t, inp_t) = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
val _ = assertloc ((ch.ichar) = '\'') |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) < 0 then |
|||
$raise unterminated_character_literal (ch.line_no, ch.column_no) |
|||
else if (ch1.ichar) = char2i '\\' then |
|||
let |
|||
val (ch2, inp) = get_ch inp |
|||
in |
|||
if (ch2.ichar) < 0 then |
|||
$raise unterminated_character_literal (ch.line_no, |
|||
ch.column_no) |
|||
else if (ch2.ichar) = char2i 'n' then |
|||
let |
|||
val s = ichar2integer_literal (char2i '\n') |
|||
in |
|||
((token_Integer, s, ch.line_no, ch.column_no), inp) |
|||
end |
|||
else if (ch2.ichar) = char2i '\\' then |
|||
let |
|||
val s = ichar2integer_literal (char2i '\\') |
|||
in |
|||
((token_Integer, s, ch.line_no, ch.column_no), inp) |
|||
end |
|||
else |
|||
$raise unsupported_escape (ch1.line_no, ch1.column_no, |
|||
ch2.ichar) |
|||
end |
|||
else |
|||
let |
|||
val s = ichar2integer_literal (ch1.ichar) |
|||
in |
|||
((token_Integer, s, ch.line_no, ch.column_no), inp) |
|||
end |
|||
end |
|||
fn |
|||
scan_character_literal (inp : inp_t) : (tokentuple_t, inp_t) = |
|||
let |
|||
val (tok, inp) = |
|||
scan_character_literal_without_checking_end inp |
|||
val line_no = (tok.2) |
|||
val column_no = (tok.3) |
|||
fun |
|||
check_end (inp : inp_t) : inp_t = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
in |
|||
if (ch.ichar) = char2i '\'' then |
|||
inp |
|||
else |
|||
let |
|||
fun |
|||
loop_to_end (ch1 : ch_t, |
|||
inp : inp_t) : inp_t = |
|||
if (ch1.ichar) < 0 then |
|||
$raise unterminated_character_literal (line_no, |
|||
column_no) |
|||
else if (ch1.ichar) = char2i '\'' then |
|||
$raise multicharacter_literal (line_no, column_no) |
|||
else |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
loop_to_end (ch1, inp) |
|||
end |
|||
val inp = loop_to_end (ch, inp) |
|||
in |
|||
inp |
|||
end |
|||
end |
|||
val inp = check_end inp |
|||
in |
|||
(tok, inp) |
|||
end |
|||
fn |
|||
scan_string_literal (inp : inp_t) : (tokentuple_t, inp_t) = |
|||
let |
|||
val (ch, inp) = get_ch inp |
|||
val _ = assertloc ((ch.ichar) = '"') |
|||
fun |
|||
scan {u : pos} |
|||
(lst : list (char, u), |
|||
inp : inp_t) : |
|||
[m : pos] (list (char, m), inp_t) = |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) < 0 then |
|||
$raise unterminated_string_literal (ch.line_no, |
|||
ch.column_no, false) |
|||
else if (ch1.ichar) = char2i '\n' then |
|||
$raise unterminated_string_literal (ch.line_no, |
|||
ch.column_no, true) |
|||
else if (ch1.ichar) = char2i '"' then |
|||
(lst, inp) |
|||
else if (ch1.ichar) <> char2i '\\' then |
|||
scan (int2char0 (ch1.ichar) :: lst, inp) |
|||
else |
|||
let |
|||
val (ch2, inp) = get_ch inp |
|||
in |
|||
if (ch2.ichar) = char2i 'n' then |
|||
scan ('n' :: '\\' :: lst, inp) |
|||
else if (ch2.ichar) = char2i '\\' then |
|||
scan ('\\' :: '\\' :: lst, inp) |
|||
else |
|||
$raise unsupported_escape (ch1.line_no, ch1.column_no, |
|||
ch2.ichar) |
|||
end |
|||
end |
|||
val lst = '"' :: NIL |
|||
val (lst, inp) = scan (lst, inp) |
|||
val lst = '"' :: lst |
|||
val s = reverse_list_to_string lst |
|||
in |
|||
((token_String, s, ch.line_no, ch.column_no), inp) |
|||
end |
|||
fn |
|||
get_next_token (inp : inp_t, |
|||
lookups : !lookups_vt) : (tokentuple_t, inp_t) = |
|||
let |
|||
val inp = skip_spaces_and_comments inp |
|||
val (ch, inp) = get_ch inp |
|||
val ln = ch.line_no |
|||
val cn = ch.column_no |
|||
in |
|||
case+ int2char0 (ch.ichar) of |
|||
| ',' => ((token_Comma, ",", ln, cn), inp) |
|||
| ';' => ((token_Semicolon, ";", ln, cn), inp) |
|||
| '\(' => ((token_LeftParen, "(", ln, cn), inp) |
|||
| ')' => ((token_RightParen, ")", ln, cn), inp) |
|||
| '\{' => ((token_LeftBrace, "{", ln, cn), inp) |
|||
| '}' => ((token_RightBrace, "}", ln, cn), inp) |
|||
| '*' => ((token_multiply, "*", ln, cn), inp) |
|||
| '/' => ((token_divide, "/", ln, cn), inp) |
|||
| '%' => ((token_mod, "%", ln, cn), inp) |
|||
| '+' => ((token_add, "+", ln, cn), inp) |
|||
| '-' => ((token_subtract, "-", ln, cn), inp) |
|||
| '<' => |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) = char2i '=' then |
|||
((token_lessequal, "<=", ln, cn), inp) |
|||
else |
|||
let |
|||
val inp = push_back_ch (ch1, inp) |
|||
in |
|||
((token_less, "<", ln, cn), inp) |
|||
end |
|||
end |
|||
| '>' => |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) = char2i '=' then |
|||
((token_greaterequal, ">=", ln, cn), inp) |
|||
else |
|||
let |
|||
val inp = push_back_ch (ch1, inp) |
|||
in |
|||
((token_greater, ">", ln, cn), inp) |
|||
end |
|||
end |
|||
| '=' => |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) = char2i '=' then |
|||
((token_equal, "==", ln, cn), inp) |
|||
else |
|||
let |
|||
val inp = push_back_ch (ch1, inp) |
|||
in |
|||
((token_assign, "=", ln, cn), inp) |
|||
end |
|||
end |
|||
| '!' => |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) = char2i '=' then |
|||
((token_notequal, "!=", ln, cn), inp) |
|||
else |
|||
let |
|||
val inp = push_back_ch (ch1, inp) |
|||
in |
|||
((token_not, "!", ln, cn), inp) |
|||
end |
|||
end |
|||
| '&' => |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) = char2i '&' then |
|||
((token_and, "&&", ln, cn), inp) |
|||
else |
|||
$raise unexpected_character (ch.line_no, ch.column_no, |
|||
ch.ichar) |
|||
end |
|||
| '|' => |
|||
let |
|||
val (ch1, inp) = get_ch inp |
|||
in |
|||
if (ch1.ichar) = char2i '|' then |
|||
((token_or, "||", ln, cn), inp) |
|||
else |
|||
$raise unexpected_character (ch.line_no, ch.column_no, |
|||
ch.ichar) |
|||
end |
|||
| '"' => |
|||
let |
|||
val inp = push_back_ch (ch, inp) |
|||
in |
|||
scan_string_literal inp |
|||
end |
|||
| '\'' => |
|||
let |
|||
val inp = push_back_ch (ch, inp) |
|||
in |
|||
scan_character_literal inp |
|||
end |
|||
| _ when isdigit (ch.ichar) => |
|||
let |
|||
val inp = push_back_ch (ch, inp) |
|||
in |
|||
scan_integer_literal (inp, lookups) |
|||
end |
|||
| _ when is_ident_start (ch.ichar) => |
|||
let |
|||
val inp = push_back_ch (ch, inp) |
|||
in |
|||
scan_identifier_or_reserved_word (inp, lookups) |
|||
end |
|||
| _ => $raise unexpected_character (ch.line_no, ch.column_no, |
|||
ch.ichar) |
|||
end |
|||
fn |
|||
fprint_ullint_rightjust (outf : FILEref, |
|||
num : ullint) : void = |
|||
if num < 10ULL then |
|||
fprint! (outf, " ", num) |
|||
else if num < 100ULL then |
|||
fprint! (outf, " ", num) |
|||
else if num < 1000ULL then |
|||
fprint! (outf, " ", num) |
|||
else if num < 10000ULL then |
|||
fprint! (outf, " ", num) |
|||
else |
|||
fprint! (outf, num) |
|||
fn |
|||
print_token (outf : FILEref, |
|||
toktup : tokentuple_t, |
|||
lookups : !lookups_vt) : void = |
|||
let |
|||
macdef toknames = !(lookups.toknames) |
|||
val name = toknames[toktup.0] |
|||
val str = (toktup.1) |
|||
val line_no = (toktup.2) |
|||
val column_no = (toktup.3) |
|||
val _ = fprint_ullint_rightjust (outf, line_no) |
|||
val _ = fileref_puts (outf, " ") |
|||
val _ = fprint_ullint_rightjust (outf, column_no) |
|||
val _ = fileref_puts (outf, " ") |
|||
val _ = fileref_puts (outf, name) |
|||
in |
|||
begin |
|||
case+ toktup.0 of |
|||
| token_Identifier => fprint! (outf, " ", str) |
|||
| token_Integer => fprint! (outf, " ", str) |
|||
| token_String => fprint! (outf, " ", str) |
|||
| _ => () |
|||
end; |
|||
fileref_putc (outf, '\n') |
|||
end |
|||
fn |
|||
scan_text (outf : FILEref, |
|||
inp : inp_t, |
|||
lookups : !lookups_vt) : void = |
|||
let |
|||
fun |
|||
loop (inp : inp_t, |
|||
lookups : !lookups_vt) : void = |
|||
let |
|||
val inp = skip_spaces_and_comments inp |
|||
val (ch, inp) = get_ch inp |
|||
val ln = ch.line_no |
|||
val cn = ch.column_no |
|||
in |
|||
if (ch.ichar) < 0 then |
|||
print_token (outf, (token_End_of_input, "", ln, cn), |
|||
lookups) |
|||
else |
|||
let |
|||
val inp = push_back_ch (ch, inp) |
|||
val (toktup, inp) = get_next_token (inp, lookups) |
|||
in |
|||
print_token (outf, toktup, lookups); |
|||
loop (inp, lookups) |
|||
end |
|||
end |
|||
in |
|||
loop (inp, lookups) |
|||
end |
|||
(********************************************************************) |
|||
fn |
|||
main_program (inpf : FILEref, |
|||
outf : FILEref) : int = |
|||
let |
|||
(* Using a simple Scheme program, I found the following perfect |
|||
hash for the reserved words, using the sum of the first two |
|||
characters as the hash value. *) |
|||
var reserved_words = |
|||
@[String][RESERVED_WORD_HASHTAB_SIZE] |
|||
("if", "print", "else", "", "putc", "", "", "while", "") |
|||
var reserved_word_tokens = |
|||
@[token_t][RESERVED_WORD_HASHTAB_SIZE] |
|||
(token_if, token_print, token_else, token_Identifier, |
|||
token_putc, token_Identifier, token_Identifier, token_while, |
|||
token_Identifier) |
|||
var token_names = |
|||
@[string][NUM_TOKENS] |
|||
("Keyword_else", |
|||
"Keyword_if", |
|||
"Keyword_print", |
|||
"Keyword_putc", |
|||
"Keyword_while", |
|||
"Op_multiply", |
|||
"Op_divide", |
|||
"Op_mod", |
|||
"Op_add", |
|||
"Op_subtract", |
|||
"Op_negate", |
|||
"Op_less", |
|||
"Op_lessequal", |
|||
"Op_greater", |
|||
"Op_greaterequal", |
|||
"Op_equal", |
|||
"Op_notequal", |
|||
"Op_not", |
|||
"Op_assign", |
|||
"Op_and", |
|||
"Op_or", |
|||
"LeftParen", |
|||
"RightParen", |
|||
"LeftBrace", |
|||
"RightBrace", |
|||
"Semicolon", |
|||
"Comma", |
|||
"Identifier", |
|||
"Integer", |
|||
"String", |
|||
"End_of_input") |
|||
var lookups : lookups_vt = |
|||
@{ |
|||
pf_toknames = view@ token_names, |
|||
pf_wordtab = view@ reserved_words, |
|||
pf_toktab = view@ reserved_word_tokens | |
|||
toknames = addr@ token_names, |
|||
wordtab = addr@ reserved_words, |
|||
toktab = addr@ reserved_word_tokens |
|||
} |
|||
val inp = |
|||
@{ |
|||
file = inpf, |
|||
pushback = NIL, |
|||
line_no = 1ULL, |
|||
column_no = 1ULL |
|||
} |
|||
val _ = scan_text (outf, inp, lookups) |
|||
val @{ |
|||
pf_toknames = pf_toknames, |
|||
pf_wordtab = pf_wordtab, |
|||
pf_toktab = pf_toktab | |
|||
toknames = toknames, |
|||
wordtab = wordtab, |
|||
toktab = toktab |
|||
} = lookups |
|||
prval _ = view@ token_names := pf_toknames |
|||
prval _ = view@ reserved_words := pf_wordtab |
|||
prval _ = view@ reserved_word_tokens := pf_toktab |
|||
in |
|||
0 |
|||
end |
|||
macdef lex_error = "Lexical error: " |
|||
implement |
|||
main (argc, argv) = |
|||
let |
|||
val inpfname = |
|||
if 2 <= argc then |
|||
$UN.cast{string} argv[1] |
|||
else |
|||
"-" |
|||
val outfname = |
|||
if 3 <= argc then |
|||
$UN.cast{string} argv[2] |
|||
else |
|||
"-" |
|||
in |
|||
try |
|||
let |
|||
val inpf = |
|||
if (inpfname : string) = "-" then |
|||
stdin_ref |
|||
else |
|||
fileref_open_exn (inpfname, file_mode_r) |
|||
val outf = |
|||
if (outfname : string) = "-" then |
|||
stdout_ref |
|||
else |
|||
fileref_open_exn (outfname, file_mode_w) |
|||
in |
|||
main_program (inpf, outf) |
|||
end |
|||
with |
|||
| ~ unterminated_comment (line_no, column_no) => |
|||
begin |
|||
fprintln! (stderr_ref, lex_error, |
|||
"unterminated comment starting at ", |
|||
line_no, ":", column_no); |
|||
1 |
|||
end |
|||
| ~ unterminated_character_literal (line_no, column_no) => |
|||
begin |
|||
fprintln! (stderr_ref, lex_error, |
|||
"unterminated character literal starting at ", |
|||
line_no, ":", column_no); |
|||
1 |
|||
end |
|||
| ~ multicharacter_literal (line_no, column_no) => |
|||
begin |
|||
fprintln! (stderr_ref, lex_error, |
|||
"unsupported multicharacter literal starting at ", |
|||
line_no, ":", column_no); |
|||
1 |
|||
end |
|||
| ~ unterminated_string_literal (line_no, column_no, |
|||
end_of_line) => |
|||
let |
|||
val s = |
|||
begin |
|||
if end_of_line then |
|||
"end of line" |
|||
else |
|||
"end of input" |
|||
end : String |
|||
in |
|||
fprintln! (stderr_ref, lex_error, |
|||
"unterminated string literal (", s, |
|||
") starting at ", line_no, ":", column_no); |
|||
1 |
|||
end |
|||
| ~ unsupported_escape (line_no, column_no, c) => |
|||
begin |
|||
fprintln! (stderr_ref, lex_error, |
|||
"unsupported escape \\", |
|||
int2char0 c, " starting at ", |
|||
line_no, ":", column_no); |
|||
1 |
|||
end |
|||
| ~ invalid_integer_literal (line_no, column_no, s) => |
|||
begin |
|||
fprintln! (stderr_ref, lex_error, |
|||
"invalid integer literal ", s, |
|||
" starting at ", line_no, ":", column_no); |
|||
1 |
|||
end |
|||
| ~ unexpected_character (line_no, column_no, c) => |
|||
begin |
|||
fprintln! (stderr_ref, lex_error, |
|||
"unexpected character '", int2char0 c, |
|||
"' at ", line_no, ":", column_no); |
|||
1 |
|||
end |
|||
end |
|||
(********************************************************************)</lang> |
|||
=={{header|AWK}}== |
=={{header|AWK}}== |