Compiler/lexical analyzer: Difference between revisions

Content added Content deleted
Line 1,047: Line 1,047:
#define RESERVED_WORD_HASHTAB_SIZE 9
#define RESERVED_WORD_HASHTAB_SIZE 9


#define token_else 0
#define TOKEN_ELSE 0
#define token_if 1
#define TOKEN_IF 1
#define token_print 2
#define TOKEN_PRINT 2
#define token_putc 3
#define TOKEN_PUTC 3
#define token_while 4
#define TOKEN_WHILE 4
#define token_multiply 5
#define TOKEN_MULTIPLY 5
#define token_divide 6
#define TOKEN_DIVIDE 6
#define token_mod 7
#define TOKEN_MOD 7
#define token_add 8
#define TOKEN_ADD 8
#define token_subtract 9
#define TOKEN_SUBTRACT 9
#define token_negate 10
#define TOKEN_NEGATE 10
#define token_less 11
#define TOKEN_LESS 11
#define token_lessequal 12
#define TOKEN_LESSEQUAL 12
#define token_greater 13
#define TOKEN_GREATER 13
#define token_greaterequal 14
#define TOKEN_GREATEREQUAL 14
#define token_equal 15
#define TOKEN_EQUAL 15
#define token_notequal 16
#define TOKEN_NOTEQUAL 16
#define token_not 17
#define TOKEN_NOT 17
#define token_assign 18
#define TOKEN_ASSIGN 18
#define token_and 19
#define TOKEN_AND 19
#define token_or 20
#define TOKEN_OR 20
#define token_LeftParen 21
#define TOKEN_LEFTPAREN 21
#define token_RightParen 22
#define TOKEN_RIGHTPAREN 22
#define token_LeftBrace 23
#define TOKEN_LEFTBRACE 23
#define token_RightBrace 24
#define TOKEN_RIGHTBRACE 24
#define token_Semicolon 25
#define TOKEN_SEMICOLON 25
#define token_Comma 26
#define TOKEN_COMMA 26
#define token_Identifier 27
#define TOKEN_IDENTIFIER 27
#define token_Integer 28
#define TOKEN_INTEGER 28
#define token_String 29
#define TOKEN_STRING 29
#define token_End_of_input 30
#define TOKEN_END_OF_INPUT 30


typedef token_t =
typedef token_t =
[i : int | token_else <= i; i <= token_End_of_input]
[i : int | TOKEN_ELSE <= i; i <= TOKEN_END_OF_INPUT]
int i
int i
typedef tokentuple_t = (token_t, String, ullint, ullint)
typedef tokentuple_t = (token_t, String, ullint, ullint)
Line 1,110: Line 1,110:
column_no : ullint) : tokentuple_t =
column_no : ullint) : tokentuple_t =
if string_length s < 2 then
if string_length s < 2 then
(token_Identifier, s, line_no, column_no)
(TOKEN_IDENTIFIER, s, line_no, column_no)
else
else
let
let
Line 1,120: Line 1,120:
val token = toktab[hashval]
val token = toktab[hashval]
in
in
if token = token_Identifier || s <> wordtab[hashval] then
if token = TOKEN_IDENTIFIER || s <> wordtab[hashval] then
(token_Identifier, s, line_no, column_no)
(TOKEN_IDENTIFIER, s, line_no, column_no)
else
else
(token, s, line_no, column_no)
(token, s, line_no, column_no)
Line 1,386: Line 1,386:
val _ = check_they_are_all_digits lst
val _ = check_they_are_all_digits lst
in
in
((token_Integer, s, ch.line_no, ch.column_no), inp)
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp)
end
end


Line 1,421: Line 1,421:
val s = ichar2integer_literal (char2i '\n')
val s = ichar2integer_literal (char2i '\n')
in
in
((token_Integer, s, ch.line_no, ch.column_no), inp)
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp)
end
end
else if (ch2.ichar) = char2i '\\' then
else if (ch2.ichar) = char2i '\\' then
Line 1,427: Line 1,427:
val s = ichar2integer_literal (char2i '\\')
val s = ichar2integer_literal (char2i '\\')
in
in
((token_Integer, s, ch.line_no, ch.column_no), inp)
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp)
end
end
else
else
Line 1,437: Line 1,437:
val s = ichar2integer_literal (ch1.ichar)
val s = ichar2integer_literal (ch1.ichar)
in
in
((token_Integer, s, ch.line_no, ch.column_no), inp)
((TOKEN_INTEGER, s, ch.line_no, ch.column_no), inp)
end
end
end
end
Line 1,527: Line 1,527:
val s = reverse_list_to_string lst
val s = reverse_list_to_string lst
in
in
((token_String, s, ch.line_no, ch.column_no), inp)
((TOKEN_STRING, s, ch.line_no, ch.column_no), inp)
end
end


Line 1,540: Line 1,540:
in
in
case+ int2char0 (ch.ichar) of
case+ int2char0 (ch.ichar) of
| ',' => ((token_Comma, ",", ln, cn), inp)
| ',' => ((TOKEN_COMMA, ",", ln, cn), inp)
| ';' => ((token_Semicolon, ";", ln, cn), inp)
| ';' => ((TOKEN_SEMICOLON, ";", ln, cn), inp)
| '\(' => ((token_LeftParen, "(", ln, cn), inp)
| '\(' => ((TOKEN_LEFTPAREN, "(", ln, cn), inp)
| ')' => ((token_RightParen, ")", ln, cn), inp)
| ')' => ((TOKEN_RIGHTPAREN, ")", ln, cn), inp)
| '\{' => ((token_LeftBrace, "{", ln, cn), inp)
| '\{' => ((TOKEN_LEFTBRACE, "{", ln, cn), inp)
| '}' => ((token_RightBrace, "}", ln, cn), inp)
| '}' => ((TOKEN_RIGHTBRACE, "}", ln, cn), inp)
| '*' => ((token_multiply, "*", ln, cn), inp)
| '*' => ((TOKEN_MULTIPLY, "*", ln, cn), inp)
| '/' => ((token_divide, "/", ln, cn), inp)
| '/' => ((TOKEN_DIVIDE, "/", ln, cn), inp)
| '%' => ((token_mod, "%", ln, cn), inp)
| '%' => ((TOKEN_MOD, "%", ln, cn), inp)
| '+' => ((token_add, "+", ln, cn), inp)
| '+' => ((TOKEN_ADD, "+", ln, cn), inp)
| '-' => ((token_subtract, "-", ln, cn), inp)
| '-' => ((TOKEN_SUBTRACT, "-", ln, cn), inp)
| '<' =>
| '<' =>
let
let
Line 1,556: Line 1,556:
in
in
if (ch1.ichar) = char2i '=' then
if (ch1.ichar) = char2i '=' then
((token_lessequal, "<=", ln, cn), inp)
((TOKEN_LESSEQUAL, "<=", ln, cn), inp)
else
else
let
let
val inp = push_back_ch (ch1, inp)
val inp = push_back_ch (ch1, inp)
in
in
((token_less, "<", ln, cn), inp)
((TOKEN_LESS, "<", ln, cn), inp)
end
end
end
end
Line 1,569: Line 1,569:
in
in
if (ch1.ichar) = char2i '=' then
if (ch1.ichar) = char2i '=' then
((token_greaterequal, ">=", ln, cn), inp)
((TOKEN_GREATEREQUAL, ">=", ln, cn), inp)
else
else
let
let
val inp = push_back_ch (ch1, inp)
val inp = push_back_ch (ch1, inp)
in
in
((token_greater, ">", ln, cn), inp)
((TOKEN_GREATER, ">", ln, cn), inp)
end
end
end
end
Line 1,582: Line 1,582:
in
in
if (ch1.ichar) = char2i '=' then
if (ch1.ichar) = char2i '=' then
((token_equal, "==", ln, cn), inp)
((TOKEN_EQUAL, "==", ln, cn), inp)
else
else
let
let
val inp = push_back_ch (ch1, inp)
val inp = push_back_ch (ch1, inp)
in
in
((token_assign, "=", ln, cn), inp)
((TOKEN_ASSIGN, "=", ln, cn), inp)
end
end
end
end
Line 1,595: Line 1,595:
in
in
if (ch1.ichar) = char2i '=' then
if (ch1.ichar) = char2i '=' then
((token_notequal, "!=", ln, cn), inp)
((TOKEN_NOTEQUAL, "!=", ln, cn), inp)
else
else
let
let
val inp = push_back_ch (ch1, inp)
val inp = push_back_ch (ch1, inp)
in
in
((token_not, "!", ln, cn), inp)
((TOKEN_NOT, "!", ln, cn), inp)
end
end
end
end
Line 1,608: Line 1,608:
in
in
if (ch1.ichar) = char2i '&' then
if (ch1.ichar) = char2i '&' then
((token_and, "&&", ln, cn), inp)
((TOKEN_AND, "&&", ln, cn), inp)
else
else
$raise unexpected_character (ch.line_no, ch.column_no,
$raise unexpected_character (ch.line_no, ch.column_no,
Line 1,618: Line 1,618:
in
in
if (ch1.ichar) = char2i '|' then
if (ch1.ichar) = char2i '|' then
((token_or, "||", ln, cn), inp)
((TOKEN_OR, "||", ln, cn), inp)
else
else
$raise unexpected_character (ch.line_no, ch.column_no,
$raise unexpected_character (ch.line_no, ch.column_no,
Line 1,684: Line 1,684:
begin
begin
case+ toktup.0 of
case+ toktup.0 of
| token_Identifier => fprint! (outf, " ", str)
| TOKEN_IDENTIFIER => fprint! (outf, " ", str)
| token_Integer => fprint! (outf, " ", str)
| TOKEN_INTEGER => fprint! (outf, " ", str)
| token_String => fprint! (outf, " ", str)
| TOKEN_STRING => fprint! (outf, " ", str)
| _ => ()
| _ => ()
end;
end;
Line 1,708: Line 1,708:
in
in
if (ch.ichar) < 0 then
if (ch.ichar) < 0 then
print_token (outf, (token_End_of_input, "", ln, cn),
print_token (outf, (TOKEN_END_OF_INPUT, "", ln, cn),
lookups)
lookups)
else
else
Line 1,737: Line 1,737:
var reserved_word_tokens =
var reserved_word_tokens =
@[token_t][RESERVED_WORD_HASHTAB_SIZE]
@[token_t][RESERVED_WORD_HASHTAB_SIZE]
(token_if, token_print, token_else, token_Identifier,
(TOKEN_IF, TOKEN_PRINT, TOKEN_ELSE, TOKEN_IDENTIFIER,
token_putc, token_Identifier, token_Identifier, token_while,
TOKEN_PUTC, TOKEN_IDENTIFIER, TOKEN_IDENTIFIER, TOKEN_WHILE,
token_Identifier)
TOKEN_IDENTIFIER)


var token_names =
var token_names =