Compiler/lexical analyzer: Difference between revisions

Line 1,023:
 
One interesting feature of this implementation is my liberal use of a pushback buffer for input characters. This kept the code modular and easier to write.
 
(One point of note: the C "EOF" pseudo-character is detected in the following code by looking for a negative number. That EOF has to be negative and the other characters non-negative is implied by the ISO C standard.)
 
<lang ATS>(********************************************************************)
Line 1,539 ⟶ 1,541:
val cn = ch.column_no
in
case+if int2char0 (ch.ichar) of< 0 then
| ',' => ((TOKEN_COMMATOKEN_END_OF_INPUT, ",", ln, cn), inp)
else
| ';' => ((TOKEN_SEMICOLON, ";", ln, cn), inp)
| '\(' =>case+ ((TOKEN_LEFTPAREN,int2char0 "(", ln, cnch.ichar), inp)of
| '),' => ((TOKEN_RIGHTPARENTOKEN_COMMA, "),", ln, cn), inp)
| '\{;' => ((TOKEN_LEFTBRACETOKEN_SEMICOLON, "{;", ln, cn), inp)
| '}\(' => ((TOKEN_RIGHTBRACETOKEN_LEFTPAREN, "}(", ln, cn), inp)
| '*)' => ((TOKEN_MULTIPLYTOKEN_RIGHTPAREN, "*)", ln, cn), inp)
| '/\{' => ((TOKEN_DIVIDETOKEN_LEFTBRACE, "/{", ln, cn), inp)
| '%}' => ((TOKEN_MODTOKEN_RIGHTBRACE, "%}", ln, cn), inp)
| '+*' => ((TOKEN_ADDTOKEN_MULTIPLY, "+*", ln, cn), inp)
| '-/' => ((TOKEN_SUBTRACTTOKEN_DIVIDE, "-/", ln, cn), inp)
| '<%' => ((TOKEN_MOD, "%", ln, cn), inp)
| ';+' => ((TOKEN_SEMICOLONTOKEN_ADD, ";+", ln, cn), inp)
let
| '-' val=> (ch1(TOKEN_SUBTRACT, inp)"-", =ln, get_chcn), inp)
in| '<' =>
let
if (ch1.ichar) = char2i '=' then
val ((TOKEN_LESSEQUALch1, "<inp) =", ln, cn),get_ch inp)
elsein
letif (ch1.ichar) = char2i '=' then
val inp((TOKEN_LESSEQUAL, "<=", push_back_chln, (ch1cn), inp)
inelse
((TOKEN_LESS, "<", ln, cn), inp)let
end val inp = push_back_ch (ch1, inp)
end in
((TOKEN_LESS, "<", ln, cn), inp)
| '>' =>
let end
val (ch1, inp) = get_ch inpend
in| '>' =>
let
if (ch1.ichar) = char2i '=' then
val ((TOKEN_GREATEREQUALch1, ">inp) =", ln, cn),get_ch inp)
elsein
letif (ch1.ichar) = char2i '=' then
val inp((TOKEN_GREATEREQUAL, ">=", push_back_chln, (ch1cn), inp)
inelse
((TOKEN_GREATER, ">", ln, cn), inp)let
end val inp = push_back_ch (ch1, inp)
end in
((TOKEN_GREATER, ">", ln, cn), inp)
| '=' =>
let end
val (ch1, inp) = get_ch inpend
in| '=' =>
let
if (ch1.ichar) = char2i '=' then
val ((TOKEN_EQUALch1, "inp) ==", ln, cn),get_ch inp)
elsein
letif (ch1.ichar) = char2i '=' then
val inp((TOKEN_EQUAL, "==", push_back_chln, (ch1cn), inp)
inelse
((TOKEN_ASSIGN, "=", ln, cn), inp)let
end val inp = push_back_ch (ch1, inp)
end in
((TOKEN_ASSIGN, "=", ln, cn), inp)
| '!' =>
let end
val (ch1, inp) = get_ch inpend
in| '!' =>
let
if (ch1.ichar) = char2i '=' then
val ((TOKEN_NOTEQUALch1, "!inp) =", ln, cn),get_ch inp)
elsein
letif (ch1.ichar) = char2i '=' then
val inp((TOKEN_NOTEQUAL, "!=", push_back_chln, (ch1cn), inp)
inelse
((TOKEN_NOT, "!", ln, cn), inp)let
end val inp = push_back_ch (ch1, inp)
end in
((TOKEN_NOT, "!", ln, cn), inp)
| '&' =>
let end
val (ch1, inp) = get_ch inpend
in| '&' =>
let
if (ch1.ichar) = char2i '&' then
val ((TOKEN_ANDch1, "&&",inp) ln,= cn),get_ch inp)
elsein
$raise unexpected_characterif (chch1.line_no,ichar) ch.column_no,= char2i '&' then
((TOKEN_AND, "&&", ln, cn), ch.icharinp)
end else
| _ => $raise unexpected_character (ch.line_no, ch.column_no,
| '|' =>
ch.ichar)
let
val (ch1, inp) = get_ch inpend
in| '|' =>
let
if (ch1.ichar) = char2i '|' then
val ((TOKEN_ORch1, "||",inp) ln,= cn),get_ch inp)
elsein
$raise unexpected_characterif (chch1.line_no,ichar) ch.column_no,= char2i '|' then
((TOKEN_OR, "||", ln, cn), ch.icharinp)
end else
$raise unexpected_character (ch.line_no, ch.column_no,
| '"' =>
ch.ichar)
let
end
val inp = push_back_ch (ch, inp)
in| '"' =>
scan_string_literal inplet
val inp = push_back_ch (ch, inp)
end
| '\'' => in
scan_string_literal inp
let
end
val inp = push_back_ch (ch, inp)
in| '\'' =>
scan_character_literal inplet
val inp = push_back_ch (ch, inp)
end
| _ when isdigit (ch.ichar) =>in
scan_character_literal inp
let
end
val inp = push_back_ch (ch, inp)
| _ when is_ident_startisdigit (ch.ichar) =>
in
let
scan_integer_literal (inp, lookups)
val inp = push_back_ch (ch, inp)
end
| '>' => in
| _ when is_ident_start (ch.ichar) =>
scan_integer_literal (inp, lookups)
let
end
val inp = push_back_ch (ch, inp)
| _ when is_ident_start (ch.ichar) =>
in
inlet
scan_identifier_or_reserved_word (inp, lookups)
val inp = push_back_ch (ch, inp)
end
| '=' => in
| _ => $raise unexpected_character (ch.line_no, ch.column_no,
scan_identifier_or_reserved_word (inp, lookups)
ch.ichar)
end
| _ => $raise unexpected_character (ch.line_no, ch.column_no,
ch.ichar)
end
 
Line 1,702 ⟶ 1,707:
lookups : !lookups_vt) : void =
let
val (toktup, inp) = skip_spaces_and_commentsget_next_token (inp, lookups)
val (ch, inp) = get_ch inp
val ln = ch.line_no
val cn = ch.column_no
in
ifprint_token (ch.ichar)outf, <toktup, 0 thenlookups);
if toktup.0 print_token<> (outf, (TOKEN_END_OF_INPUT, "", ln, cn),then
loop (inp, lookups)
else
let
val inp = push_back_ch (ch, inp)
val (toktup, inp) = get_next_token (inp, lookups)
in
print_token (outf, toktup, lookups);
loop (inp, lookups)
end
end
in
1,448

edits