Compiler/lexical analyzer: Difference between revisions
Content added Content deleted
Line 1,047: | Line 1,047: | ||
#define RESERVED_WORD_HASHTAB_SIZE 9 |
#define RESERVED_WORD_HASHTAB_SIZE 9 |
||
#define |
#define TOKEN_ELSE 0 |
||
#define |
#define TOKEN_IF 1 |
||
#define |
#define TOKEN_PRINT 2 |
||
#define |
#define TOKEN_PUTC 3 |
||
#define |
#define TOKEN_WHILE 4 |
||
#define |
#define TOKEN_MULTIPLY 5 |
||
#define |
#define TOKEN_DIVIDE 6 |
||
#define |
#define TOKEN_MOD 7 |
||
#define |
#define TOKEN_ADD 8 |
||
#define |
#define TOKEN_SUBTRACT 9 |
||
#define |
#define TOKEN_NEGATE 10 |
||
#define |
#define TOKEN_LESS 11 |
||
#define |
#define TOKEN_LESSEQUAL 12 |
||
#define |
#define TOKEN_GREATER 13 |
||
#define |
#define TOKEN_GREATEREQUAL 14 |
||
#define |
#define TOKEN_EQUAL 15 |
||
#define |
#define TOKEN_NOTEQUAL 16 |
||
#define |
#define TOKEN_NOT 17 |
||
#define |
#define TOKEN_ASSIGN 18 |
||
#define |
#define TOKEN_AND 19 |
||
#define |
#define TOKEN_OR 20 |
||
#define |
#define TOKEN_LEFTPAREN 21 |
||
#define |
#define TOKEN_RIGHTPAREN 22 |
||
#define |
#define TOKEN_LEFTBRACE 23 |
||
#define |
#define TOKEN_RIGHTBRACE 24 |
||
#define |
#define TOKEN_SEMICOLON 25 |
||
#define |
#define TOKEN_COMMA 26 |
||
#define |
#define TOKEN_IDENTIFIER 27 |
||
#define |
#define TOKEN_INTEGER 28 |
||
#define |
#define TOKEN_STRING 29 |
||
#define |
#define TOKEN_END_OF_INPUT 30 |
||
typedef token_t = |
typedef token_t = |
||
[i : int | |
[i : int | TOKEN_ELSE <= i; i <= TOKEN_END_OF_INPUT] |
||
int i |
int i |
||
typedef tokentuple_t = (token_t, String, ullint, ullint) |
typedef tokentuple_t = (token_t, String, ullint, ullint) |
||
Line 1,110: | Line 1,110: | ||
column_no : ullint) : tokentuple_t = |
column_no : ullint) : tokentuple_t = |
||
if string_length s < 2 then |
if string_length s < 2 then |
||
( |
(TOKEN_IDENTIFIER, s, line_no, column_no) |
||
else |
else |
||
let |
let |
||
Line 1,120: | Line 1,120: | ||
val token = toktab[hashval] |
val token = toktab[hashval] |
||
in |
in |
||
if token = |
if token = TOKEN_IDENTIFIER || s <> wordtab[hashval] then |
||
( |
(TOKEN_IDENTIFIER, s, line_no, column_no) |
||
else |
else |
||
(token, s, line_no, column_no) |
(token, s, line_no, column_no) |
||
Line 1,386: | Line 1,386: | ||
val _ = check_they_are_all_digits lst |
val _ = check_they_are_all_digits lst |
||
in |
in |
||
(( |
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp) |
||
end |
end |
||
Line 1,421: | Line 1,421: | ||
val s = ichar2integer_literal (char2i '\n') |
val s = ichar2integer_literal (char2i '\n') |
||
in |
in |
||
(( |
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp) |
||
end |
end |
||
else if (ch2.ichar) = char2i '\\' then |
else if (ch2.ichar) = char2i '\\' then |
||
Line 1,427: | Line 1,427: | ||
val s = ichar2integer_literal (char2i '\\') |
val s = ichar2integer_literal (char2i '\\') |
||
in |
in |
||
(( |
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp) |
||
end |
end |
||
else |
else |
||
Line 1,437: | Line 1,437: | ||
val s = ichar2integer_literal (ch1.ichar) |
val s = ichar2integer_literal (ch1.ichar) |
||
in |
in |
||
(( |
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp) |
||
end |
end |
||
end |
end |
||
Line 1,527: | Line 1,527: | ||
val s = reverse_list_to_string lst |
val s = reverse_list_to_string lst |
||
in |
in |
||
(( |
((TOKEN_STRING, s, ch.line_no, ch.column_no), inp) |
||
end |
end |
||
Line 1,540: | Line 1,540: | ||
in |
in |
||
case+ int2char0 (ch.ichar) of |
case+ int2char0 (ch.ichar) of |
||
| ',' => (( |
| ',' => ((TOKEN_COMMA, ",", ln, cn), inp) |
||
| ';' => (( |
| ';' => ((TOKEN_SEMICOLON, ";", ln, cn), inp) |
||
| '\(' => (( |
| '\(' => ((TOKEN_LEFTPAREN, "(", ln, cn), inp) |
||
| ')' => (( |
| ')' => ((TOKEN_RIGHTPAREN, ")", ln, cn), inp) |
||
| '\{' => (( |
| '\{' => ((TOKEN_LEFTBRACE, "{", ln, cn), inp) |
||
| '}' => (( |
| '}' => ((TOKEN_RIGHTBRACE, "}", ln, cn), inp) |
||
| '*' => (( |
| '*' => ((TOKEN_MULTIPLY, "*", ln, cn), inp) |
||
| '/' => (( |
| '/' => ((TOKEN_DIVIDE, "/", ln, cn), inp) |
||
| '%' => (( |
| '%' => ((TOKEN_MOD, "%", ln, cn), inp) |
||
| '+' => (( |
| '+' => ((TOKEN_ADD, "+", ln, cn), inp) |
||
| '-' => (( |
| '-' => ((TOKEN_SUBTRACT, "-", ln, cn), inp) |
||
| '<' => |
| '<' => |
||
let |
let |
||
Line 1,556: | Line 1,556: | ||
in |
in |
||
if (ch1.ichar) = char2i '=' then |
if (ch1.ichar) = char2i '=' then |
||
(( |
((TOKEN_LESSEQUAL, "<=", ln, cn), inp) |
||
else |
else |
||
let |
let |
||
val inp = push_back_ch (ch1, inp) |
val inp = push_back_ch (ch1, inp) |
||
in |
in |
||
(( |
((TOKEN_LESS, "<", ln, cn), inp) |
||
end |
end |
||
end |
end |
||
Line 1,569: | Line 1,569: | ||
in |
in |
||
if (ch1.ichar) = char2i '=' then |
if (ch1.ichar) = char2i '=' then |
||
(( |
((TOKEN_GREATEREQUAL, ">=", ln, cn), inp) |
||
else |
else |
||
let |
let |
||
val inp = push_back_ch (ch1, inp) |
val inp = push_back_ch (ch1, inp) |
||
in |
in |
||
(( |
((TOKEN_GREATER, ">", ln, cn), inp) |
||
end |
end |
||
end |
end |
||
Line 1,582: | Line 1,582: | ||
in |
in |
||
if (ch1.ichar) = char2i '=' then |
if (ch1.ichar) = char2i '=' then |
||
(( |
((TOKEN_EQUAL, "==", ln, cn), inp) |
||
else |
else |
||
let |
let |
||
val inp = push_back_ch (ch1, inp) |
val inp = push_back_ch (ch1, inp) |
||
in |
in |
||
(( |
((TOKEN_ASSIGN, "=", ln, cn), inp) |
||
end |
end |
||
end |
end |
||
Line 1,595: | Line 1,595: | ||
in |
in |
||
if (ch1.ichar) = char2i '=' then |
if (ch1.ichar) = char2i '=' then |
||
(( |
((TOKEN_NOTEQUAL, "!=", ln, cn), inp) |
||
else |
else |
||
let |
let |
||
val inp = push_back_ch (ch1, inp) |
val inp = push_back_ch (ch1, inp) |
||
in |
in |
||
(( |
((TOKEN_NOT, "!", ln, cn), inp) |
||
end |
end |
||
end |
end |
||
Line 1,608: | Line 1,608: | ||
in |
in |
||
if (ch1.ichar) = char2i '&' then |
if (ch1.ichar) = char2i '&' then |
||
(( |
((TOKEN_AND, "&&", ln, cn), inp) |
||
else |
else |
||
$raise unexpected_character (ch.line_no, ch.column_no, |
$raise unexpected_character (ch.line_no, ch.column_no, |
||
Line 1,618: | Line 1,618: | ||
in |
in |
||
if (ch1.ichar) = char2i '|' then |
if (ch1.ichar) = char2i '|' then |
||
(( |
((TOKEN_OR, "||", ln, cn), inp) |
||
else |
else |
||
$raise unexpected_character (ch.line_no, ch.column_no, |
$raise unexpected_character (ch.line_no, ch.column_no, |
||
Line 1,684: | Line 1,684: | ||
begin |
begin |
||
case+ toktup.0 of |
case+ toktup.0 of |
||
| |
| TOKEN_IDENTIFIER => fprint! (outf, " ", str) |
||
| |
| TOKEN_INTEGER => fprint! (outf, " ", str) |
||
| |
| TOKEN_STRING => fprint! (outf, " ", str) |
||
| _ => () |
| _ => () |
||
end; |
end; |
||
Line 1,708: | Line 1,708: | ||
in |
in |
||
if (ch.ichar) < 0 then |
if (ch.ichar) < 0 then |
||
print_token (outf, ( |
print_token (outf, (TOKEN_END_OF_INPUT, "", ln, cn), |
||
lookups) |
lookups) |
||
else |
else |
||
Line 1,737: | Line 1,737: | ||
var reserved_word_tokens = |
var reserved_word_tokens = |
||
@[token_t][RESERVED_WORD_HASHTAB_SIZE] |
@[token_t][RESERVED_WORD_HASHTAB_SIZE] |
||
( |
(TOKEN_IF, TOKEN_PRINT, TOKEN_ELSE, TOKEN_IDENTIFIER, |
||
TOKEN_PUTC, TOKEN_IDENTIFIER, TOKEN_IDENTIFIER, TOKEN_WHILE, |
|||
TOKEN_IDENTIFIER) |
|||
var token_names = |
var token_names = |