Compiler/lexical analyzer: Difference between revisions

Content added Content deleted
(use the new "case" parameter of the "out" template)
(Added "else", and ">", "==", "!", "||" operators)
Line 22: Line 22:
! Name !! Common name !! Character sequence
! Name !! Common name !! Character sequence
|-
|-
| <tt>OP_MULTIPLY</tt> || multiply || <tt>*</tt>
| <tt>OP_MULTIPLY</tt> || multiply || <tt>*</tt>
|-
|-
| <tt>OP_DIVIDE</tt> || divide || <tt>/</tt>
| <tt>OP_DIVIDE</tt> || divide || <tt>/</tt>
|-
|-
| <tt>OP_ADD</tt> || plus || <tt>+</tt>
| <tt>OP_MOD</tt> || mod || <tt>%</tt>
|-
|-
| <tt>OP_SUBTRACT</tt> || minus || <tt>-</tt>
| <tt>OP_ADD</tt> || plus || <tt>+</tt>
|-
|-
| <tt>OP_NEGATE</tt> || unary minus || <tt>-</tt>
| <tt>OP_SUBTRACT</tt> || minus || <tt>-</tt>
|-
|-
| <tt>OP_LESS</tt> || less than || <tt><</tt>
| <tt>OP_NEGATE</tt> || unary minus || <tt>-</tt>
|-
|-
| <tt>OP_LESSEQUAL</tt> || less than or equal || <tt><=</tt>
| <tt>OP_LESS</tt> || less than || <tt><</tt>
|-
|-
| <tt>OP_GREATER</tt> || greater than || <tt>&gt;</tt>
| <tt>OP_LESSEQUAL</tt> || less than or equal || <tt><=</tt>
|-
|-
| <tt>OP_NOTEQUAL</tt> || not equal || <tt>&#33;=</tt>
| <tt>OP_GREATER</tt> || greater than || <tt>&gt;</tt>
|-
|-
| <tt>OP_ASSIGN</tt> || assignment || <tt>=</tt>
| <tt>OP_GREATEREQUAL</tt> || greater than or equal || <tt>&gt;=</tt>
|-
|-
| <tt>OP_AND</tt> || logical and || <tt>&&</tt>
| <tt>OP_EQUAL</tt> || equal || <tt>==</tt>
|-
| <tt>OP_NOTEQUAL</tt> || not equal || <tt>&#33;=</tt>
|-
| <tt>OP_NOT</tt> || unary not || <tt>&#33;</tt>
|-
| <tt>OP_ASSIGN</tt> || assignment || <tt>=</tt>
|-
| <tt>OP_AND</tt> || logical and || <tt>&amp;&amp;</tt>
|-
| <tt>OP_OR</tt> || logical or || <tt>&brvbar;&brvbar;</tt>
|}
|}


Line 73: Line 83:
|-
|-
| <tt>KEYWORD_IF</tt> || <tt>if</tt>
| <tt>KEYWORD_IF</tt> || <tt>if</tt>
|-
| <tt>KEYWORD_ELSE</tt> || <tt>else</tt>
|-
|-
| <tt>KEYWORD_WHILE</tt> || <tt>while</tt>
| <tt>KEYWORD_WHILE</tt> || <tt>while</tt>
Line 155: Line 167:


<pre>
<pre>
END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_ADD OP_SUBTRACT OP_NEGATE
END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_MOD OP_ADD OP_SUBTRACT
OP_NEGATE OP_LESS OP_LESSEQUAL OP_GREATER OP_GREATEREQUAL OP_EQUAL
OP_LESS OP_LESSEQUAL OP_GREATER OP_NOTEQUAL OP_ASSIGN OP_AND
OP_NOTEQUAL OP_ASSIGN OP_AND OP_OR KEYWORD_IF KEYWORD_ELSE
KEYWORD_IF KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN
KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE
LEFTBRACE RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER
RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER STRING
STRING
</pre>
</pre>


Line 252: Line 264:
have to wait until syntax analysis
have to wait until syntax analysis
*/
*/
/* Print */ print /* Sub */ -
/* Print */ print /* Sub */ -
/* Putc */ putc /* Lss */ <
/* Putc */ putc /* Lss */ <
/* If */ if /* Gtr */ >
/* If */ if /* Gtr */ >
/* While */ while /* Leq */ <=
/* Else */ else /* Leq */ <=
/* Lbrace */ { /* Neq */ !=
/* While */ while /* Geq */ >=
/* Rbrace */ } /* And */ &&
/* Lbrace */ { /* Eq */ ==
/* Lparen */ ( /* Semi */ ;
/* Rbrace */ } /* Neq */ !=
/* Rparen */ ) /* Comma */ ,
/* Lparen */ ( /* And */ &&
/* Uminus */ - /* Assign */ =
/* Rparen */ ) /* Or */ ||
/* Mul */ * /* Integer */ 42
/* Uminus */ - /* Semi */ ;
/* Div */ / /* String */ "String literal"
/* Not */ ! /* Comma */ ,
/* Add */ + /* Ident */ variable_name
/* Mul */ * /* Assign */ =
/* Div */ / /* Integer */ 42
/* Mod */ % /* String */ "String literal"
/* Add */ + /* Ident */ variable_name
/* character literal */ '\n'
/* character literal */ '\n'
/* character literal */ '\\'
/* character literal */ '\\'
Line 270: Line 285:
| style="vertical-align:top" |
| style="vertical-align:top" |
<b><pre>
<b><pre>
5 15 KEYWORD_PRINT
5 16 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 16 KEYWORD_PUTC
6 41 OP_LESS
6 40 OP_LESS
7 15 KEYWORD_IF
7 16 KEYWORD_IF
7 41 OP_GREATER
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 16 KEYWORD_ELSE
8 41 OP_LESSEQUAL
8 40 OP_LESSEQUAL
9 15 LEFTBRACE
9 16 KEYWORD_WHILE
9 41 OP_NOTEQUAL
9 40 OP_GREATEREQUAL
10 15 RIGHTBRACE
10 16 LEFTBRACE
10 41 OP_AND
10 40 OP_EQUAL
11 15 LEFTPAREN
11 16 RIGHTBRACE
11 41 SEMICOLON
11 40 OP_NOTEQUAL
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
17 40 INTEGER 42
19 26 INTEGER 32
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre></b>
</pre></b>
|}
|}
Line 303: Line 324:
{{task heading|Reference}}
{{task heading|Reference}}


The Flex, C, Python and Euphoria versions can be considered reference implementations.
The C and Python versions can be considered reference implementations.


<hr>
<hr>
Line 330: Line 351:
#define da_len(name) _qy_ ## name ## _p
#define da_len(name) _qy_ ## name ## _p


typedef enum {
typedef enum {tk_EOI, tk_Mul, tk_Div, tk_Add, tk_Sub, tk_Uminus, tk_Lss, tk_Leq, tk_Gtr,
tk_Neq, tk_Assign, tk_And, tk_If, tk_While, tk_Print, tk_Putc, tk_Lparen, tk_Rparen,
tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq,
tk_Gtr, tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While,
tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String
tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma,
tk_Ident, tk_Integer, tk_String
} TokenType;
} TokenType;


typedef struct {
typedef struct {
int tok;
TokenType tok;
int err_ln, err_col;
int err_ln, err_col;
union {
union {
Line 425: Line 448:
TokenType sym;
TokenType sym;
} kwds[] = {
} kwds[] = {
{"else", tk_Else},
{"if", tk_If},
{"if", tk_If},
{"print", tk_Print},
{"print", tk_Print},
Line 482: Line 506:
case '-': next_ch(); return (tok_s){tk_Sub, err_line, err_col, {0}};
case '-': next_ch(); return (tok_s){tk_Sub, err_line, err_col, {0}};
case '*': next_ch(); return (tok_s){tk_Mul, err_line, err_col, {0}};
case '*': next_ch(); return (tok_s){tk_Mul, err_line, err_col, {0}};
case '%': next_ch(); return (tok_s){tk_Mod, err_line, err_col, {0}};
case ';': next_ch(); return (tok_s){tk_Semi, err_line, err_col, {0}};
case ';': next_ch(); return (tok_s){tk_Semi, err_line, err_col, {0}};
case ',': next_ch(); return (tok_s){tk_Comma,err_line, err_col, {0}};
case ',': next_ch(); return (tok_s){tk_Comma,err_line, err_col, {0}};
case '>': next_ch(); return (tok_s){tk_Gtr, err_line, err_col, {0}};
case '=': next_ch(); return (tok_s){tk_Assign, err_line, err_col, {0}};
case '/': next_ch(); return div_or_cmt(err_line, err_col);
case '/': next_ch(); return div_or_cmt(err_line, err_col);
case '\'': next_ch(); return char_lit(the_ch, err_line, err_col);
case '\'': next_ch(); return char_lit(the_ch, err_line, err_col);
case '<': next_ch(); return follow('=', tk_Leq, tk_Lss, err_line, err_col);
case '<': next_ch(); return follow('=', tk_Leq, tk_Lss, err_line, err_col);
case '!': next_ch(); return follow('=', tk_Neq, tk_EOI, err_line, err_col);
case '>': next_ch(); return follow('=', tk_Geq, tk_Gtr, err_line, err_col);
case '&': next_ch(); return follow('&', tk_And, tk_EOI, err_line, err_col);
case '=': next_ch(); return follow('=', tk_Eq, tk_Assign, err_line, err_col);
case '!': next_ch(); return follow('=', tk_Neq, tk_Not, err_line, err_col);
case '&': next_ch(); return follow('&', tk_And, tk_EOI, err_line, err_col);
case '|': next_ch(); return follow('|', tk_Or, tk_EOI, err_line, err_col);
case '"' : return string_lit(the_ch, err_line, err_col);
case '"' : return string_lit(the_ch, err_line, err_col);
default: return ident_or_int(err_line, err_col);
default: return ident_or_int(err_line, err_col);
Line 501: Line 527:
do {
do {
tok = gettok();
tok = gettok();
fprintf(dest_fp, "%5d %5d %.14s",
fprintf(dest_fp, "%5d %5d %.15s",
tok.err_ln, tok.err_col,
tok.err_ln, tok.err_col,
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_ADD OP_SUBTRACT OP_NEGATE "
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_MOD OP_ADD "
"OP_LESS OP_LESSEQUAL OP_GREATER OP_NOTEQUAL OP_ASSIGN OP_AND "
"OP_SUBTRACT OP_NEGATE OP_NOT OP_LESS OP_LESSEQUAL "
"KEYWORD_IF KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN "
"OP_GREATER OP_GREATEREQUAL OP_EQUAL OP_NOTEQUAL OP_ASSIGN "
"LEFTBRACE RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "
"OP_AND OP_OR KEYWORD_IF KEYWORD_ELSE KEYWORD_WHILE "
"STRING "[tok.tok * 14]);
"KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE "
"RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "

"STRING "
[tok.tok * 16]);
if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n);
if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n);
else if (tok.tok == tk_Ident) fprintf(dest_fp, " %s", tok.text);
else if (tok.tok == tk_Ident) fprintf(dest_fp, " %s", tok.text);
Line 535: Line 563:
<b>
<b>
<pre>
<pre>
5 15 KEYWORD_PRINT
5 16 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 16 KEYWORD_PUTC
6 41 OP_LESS
6 40 OP_LESS
7 15 KEYWORD_IF
7 16 KEYWORD_IF
7 41 OP_GREATER
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 16 KEYWORD_ELSE
8 41 OP_LESSEQUAL
8 40 OP_LESSEQUAL
9 15 LEFTBRACE
9 16 KEYWORD_WHILE
9 41 OP_NOTEQUAL
9 40 OP_GREATEREQUAL
10 15 RIGHTBRACE
10 16 LEFTBRACE
10 41 OP_AND
10 40 OP_EQUAL
11 15 LEFTPAREN
11 16 RIGHTBRACE
11 41 SEMICOLON
11 40 OP_NOTEQUAL
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
17 40 INTEGER 42
19 26 INTEGER 32
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
</pre>
</b>
</b>
Line 575: Line 609:
constant true = 1, false = 0, EOF = -1
constant true = 1, false = 0, EOF = -1


enum tk_EOI, tk_Mul, tk_Div, tk_Add, tk_Sub, tk_Uminus, tk_Lss, tk_Leq, tk_Gtr, tk_Neq,
enum tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq,
tk_Gtr, tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While,
tk_Assign, tk_And, tk_If, tk_While, tk_Print, tk_Putc, tk_Lparen, tk_Rparen,
tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma,
tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String
tk_Ident, tk_Integer, tk_String


constant all_syms = {"END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_ADD", "OP_SUBTRACT",
constant all_syms = {"END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_MOD", "OP_ADD",
"OP_NEGATE", "OP_LESS", "OP_LESSEQUAL", "OP_GREATER", "OP_NOTEQUAL", "OP_ASSIGN",
"OP_SUBTRACT", "OP_NEGATE", "OP_NOT", "OP_LESS", "OP_LESSEQUAL", "OP_GREATER",
"OP_AND", "KEYWORD_IF", "KEYWORD_WHILE", "KEYWORD_PRINT", "KEYWORD_PUTC", "LEFTPAREN",
"OP_GREATEREQUAL", "OP_EQUAL", "OP_NOTEQUAL", "OP_ASSIGN", "OP_AND", "OP_OR",
"RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA", "IDENTIFIER",
"KEYWORD_IF", "KEYWORD_ELSE", "KEYWORD_WHILE", "KEYWORD_PRINT", "KEYWORD_PUTC",
"LEFTPAREN", "RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA",
"INTEGER", "STRING"}
"IDENTIFIER", "INTEGER", "STRING"}


integer input_file, the_ch = ' ', the_col = 0, the_line = 1
integer input_file, the_ch = ' ', the_col = 0, the_line = 1
Line 721: Line 757:
case '/' then return div_or_cmt(err_line, err_col)
case '/' then return div_or_cmt(err_line, err_col)
case '\'' then return char_lit(err_line, err_col)
case '\'' then return char_lit(err_line, err_col)

case '<' then return follow('=', tk_Leq, tk_Lss, err_line, err_col)
case '!' then return follow('=', tk_Neq, tk_EOI, err_line, err_col)
case '<' then return follow('=', tk_Leq, tk_Lss, err_line, err_col)
case '&' then return follow('&', tk_And, tk_EOI, err_line, err_col)
case '>' then return follow('=', tk_Geq, tk_Gtr, err_line, err_col)
case '=' then return follow('=', tk_Eq, tk_Assign, err_line, err_col)
case '!' then return follow('=', tk_Neq, tk_Not, err_line, err_col)
case '&' then return follow('&', tk_And, tk_EOI, err_line, err_col)
case '|' then return follow('|', tk_Or, tk_EOI, err_line, err_col)

case '"' then return string_lit(the_ch, err_line, err_col)
case '"' then return string_lit(the_ch, err_line, err_col)
case else
case else
Line 736: Line 777:


procedure init()
procedure init()
put(key_words, "else", tk_Else)
put(key_words, "if", tk_If)
put(key_words, "if", tk_If)
put(key_words, "print", tk_Print)
put(key_words, "print", tk_Print)
Line 749: Line 791:
symbols['-'] = tk_Sub
symbols['-'] = tk_Sub
symbols['*'] = tk_Mul
symbols['*'] = tk_Mul
symbols['%'] = tk_Mod
symbols[';'] = tk_Semi
symbols[';'] = tk_Semi
symbols[','] = tk_Comma
symbols[','] = tk_Comma
symbols['>'] = tk_Gtr
symbols['='] = tk_Assign
end procedure
end procedure


Line 772: Line 813:
printf(STDOUT, "%5d %5d %-8s", {t[2], t[3], all_syms[t[1]]})
printf(STDOUT, "%5d %5d %-8s", {t[2], t[3], all_syms[t[1]]})
switch t[1] do
switch t[1] do
case tk_Integer then printf(STDOUT, " %5d\n", {t[4]})
case tk_Integer then printf(STDOUT, " %5d\n", {t[4]})
case tk_Ident then printf(STDOUT, " %s\n", {t[4]})
case tk_Ident then printf(STDOUT, " %s\n", {t[4]})
case tk_String then printf(STDOUT, " \"%s\"\n", {t[4]})
case tk_String then printf(STDOUT, " \"%s\"\n", {t[4]})
case else printf(STDOUT, "\n")
case else printf(STDOUT, "\n")
end switch
end switch
Line 786: Line 827:
<b>
<b>
<pre>
<pre>
5 15 KEYWORD_PRINT
5 16 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 16 KEYWORD_PUTC
6 41 OP_LESS
6 40 OP_LESS
7 15 KEYWORD_IF
7 16 KEYWORD_IF
7 41 OP_GREATER
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 16 KEYWORD_ELSE
8 41 OP_LESSEQUAL
8 40 OP_LESSEQUAL
9 15 LEFTBRACE
9 16 KEYWORD_WHILE
9 41 OP_NOTEQUAL
9 40 OP_GREATEREQUAL
10 15 RIGHTBRACE
10 16 LEFTBRACE
10 41 OP_AND
10 40 OP_EQUAL
11 15 LEFTPAREN
11 16 RIGHTBRACE
11 41 SEMICOLON
11 40 OP_NOTEQUAL
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
17 40 INTEGER 42
19 26 INTEGER 32
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
</pre>
</b>
</b>
Line 828: Line 875:
#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0]))
#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0]))


typedef enum {
typedef enum {tk_EOI, tk_Mul, tk_Div, tk_Add, tk_Sub, tk_Uminus, tk_Lss, tk_Leq, tk_Gtr,
tk_Neq, tk_Assign, tk_And, tk_If, tk_While, tk_Print, tk_Putc, tk_Lparen, tk_Rparen,
tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq,
tk_Gtr, tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While,
tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String
tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma,
tk_Ident, tk_Integer, tk_String
} TokenType;
} TokenType;


Line 876: Line 925:
TokenType sym;
TokenType sym;
} kwds[] = {
} kwds[] = {
{"else", tk_Else},
{"if", tk_If},
{"if", tk_If},
{"print", tk_Print},
{"print", tk_Print},
Line 911: Line 961:
"*" {return tk_Mul;}
"*" {return tk_Mul;}
"/" {return tk_Div;}
"/" {return tk_Div;}
"%" {return tk_Mod;}
"+" {return tk_Add;}
"+" {return tk_Add;}
"-" {return tk_Sub;}
"-" {return tk_Sub;}
Line 916: Line 967:
">" {return tk_Gtr;}
">" {return tk_Gtr;}
"<=" {return tk_Leq;}
"<=" {return tk_Leq;}
">=" {return tk_Geq;}
"!=" {return tk_Neq;}
"!=" {return tk_Neq;}
"!" {return tk_Not;}
"&&" {return tk_And;}
"&&" {return tk_And;}
"||" {return tk_Or;}
";" {return tk_Semi;}
";" {return tk_Semi;}
"," {return tk_Comma;}
"," {return tk_Comma;}
"==" {return tk_Eq;}
"=" {return tk_Assign;}
"=" {return tk_Assign;}
{ident} {return get_ident_type(yytext);}
{ident} {return get_ident_type(yytext);}
Line 968: Line 1,023:
do {
do {
tok = yylex();
tok = yylex();
printf("%5d %5d %.14s", yylloc.first_line, yylloc.first_col,
printf("%5d %5d %.15s", yylloc.first_line, yylloc.first_col,
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_ADD OP_SUBTRACT OP_NEGATE "
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_MOD OP_ADD "
"OP_LESS OP_LESSEQUAL OP_GREATER OP_NOTEQUAL OP_ASSIGN OP_AND "
"OP_SUBTRACT OP_NEGATE OP_NOT OP_LESS OP_LESSEQUAL "
"KEYWORD_IF KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN "
"OP_GREATER OP_GREATEREQUAL OP_EQUAL OP_NOTEQUAL OP_ASSIGN "
"LEFTBRACE RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "
"OP_AND OP_OR KEYWORD_IF KEYWORD_ELSE KEYWORD_WHILE "
"STRING "[tok * 14]);
"KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE "
"RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "
"STRING "
[tok * 16]);


if (tok == tk_Integer) printf(" %5d", yynval);
if (tok == tk_Integer) printf(" %5d", yynval);
Line 986: Line 1,044:
<b>
<b>
<pre>
<pre>
5 15 KEYWORD_PRINT
5 16 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 16 KEYWORD_PUTC
6 41 OP_LESS
6 40 OP_LESS
7 15 KEYWORD_IF
7 16 KEYWORD_IF
7 41 OP_GREATER
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 16 KEYWORD_ELSE
8 41 OP_LESSEQUAL
8 40 OP_LESSEQUAL
9 15 LEFTBRACE
9 16 KEYWORD_WHILE
9 41 OP_NOTEQUAL
9 40 OP_GREATEREQUAL
10 15 RIGHTBRACE
10 16 LEFTBRACE
10 41 OP_AND
10 40 OP_EQUAL
11 15 LEFTPAREN
11 16 RIGHTBRACE
11 41 SEMICOLON
11 40 OP_NOTEQUAL
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
17 40 INTEGER 42
19 26 INTEGER 32
18 16 OP_MOD
18 40 STRING "String literal"
19 29 END_OF_INPUT
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
22 29 END_OF_INPUT
</pre>
</pre>
</b>
</b>
Line 1,023: Line 1,087:
tk_Mul
tk_Mul
tk_Div
tk_Div
tk_Mod
tk_Add
tk_Add
tk_Sub
tk_Sub
tk_Uminus
tk_Negate
tk_Not
tk_Lss
tk_Lss
tk_Leq
tk_Leq
tk_Gtr
tk_Gtr
tk_Geq
tk_Eq
tk_Neq
tk_Neq
tk_Assign
tk_Assign
tk_And
tk_And
tk_Or
tk_If
tk_If
tk_Else
tk_While
tk_While
tk_Print
tk_Print
Line 1,143: Line 1,213:
case "-": tok = tk_sub: next_char(): exit sub
case "-": tok = tk_sub: next_char(): exit sub
case "*": tok = tk_mul: next_char(): exit sub
case "*": tok = tk_mul: next_char(): exit sub
case "%": tok = tk_Mod: next_char(): exit sub
case ";": tok = tk_semi: next_char(): exit sub
case ";": tok = tk_semi: next_char(): exit sub
case ",": tok = tk_comma: next_char(): exit sub
case ",": tok = tk_comma: next_char(): exit sub
case ">": tok = tk_gtr: next_char(): exit sub
case "=": tok = tk_assign: next_char(): exit sub
case "/": ' div or comment
case "/": ' div or comment
next_char()
next_char()
Line 1,184: Line 1,253:
exit sub
exit sub
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_EOI): exit sub
case ">": next_char(): tok = follow(err_line, err_col, "=", tk_Geq, tk_Gtr): exit sub
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_Not): exit sub
case "=": next_char(): tok = follow(err_line, err_col, "=", tk_Eq, tk_Assign): exit sub
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub
case "|": next_char(): tok = follow(err_line, err_col, "|", tk_Or, tk_EOI): exit sub
case DoubleQuote: ' string
case DoubleQuote: ' string
v = cur_ch
v = cur_ch
Line 1,224: Line 1,296:


sub init_lex(byval filein as string)
sub init_lex(byval filein as string)
install("else", tk_else)
install("if", tk_if)
install("if", tk_if)
install("print", tk_print)
install("print", tk_print)
Line 1,247: Line 1,320:
tok_list(tk_Mul ) = "OP_MULTIPLY"
tok_list(tk_Mul ) = "OP_MULTIPLY"
tok_list(tk_Div ) = "OP_DIVIDE"
tok_list(tk_Div ) = "OP_DIVIDE"
tok_list(tk_Mod ) = "OP_MOD"
tok_list(tk_Add ) = "OP_ADD"
tok_list(tk_Add ) = "OP_ADD"
tok_list(tk_Sub ) = "OP_SUBTRACT"
tok_list(tk_Sub ) = "OP_SUBTRACT"
tok_list(tk_Uminus ) = "OP_NEGATE"
tok_list(tk_Negate ) = "OP_NEGATE"
tok_list(tk_Not ) = "OP_NOT"
tok_list(tk_Lss ) = "OP_LESS"
tok_list(tk_Lss ) = "OP_LESS"
tok_list(tk_Leq ) = "OP_LESSEQUAL"
tok_list(tk_Leq ) = "OP_LESSEQUAL"
tok_list(tk_Gtr ) = "OP_GREATER"
tok_list(tk_Gtr ) = "OP_GREATER"
tok_list(tk_Geq ) = "OP_GREATEREQUAL"
tok_list(tk_Eq ) = "OP_EQUAL"
tok_list(tk_Neq ) = "OP_NOTEQUAL"
tok_list(tk_Neq ) = "OP_NOTEQUAL"
tok_list(tk_Assign ) = "OP_ASSIGN"
tok_list(tk_Assign ) = "OP_ASSIGN"
tok_list(tk_And ) = "OP_AND"
tok_list(tk_And ) = "OP_AND"
tok_list(tk_Or ) = "OP_OR"
tok_list(tk_If ) = "KEYWORD_IF"
tok_list(tk_If ) = "KEYWORD_IF"
tok_list(tk_Else ) = "KEYWORD_ELSE"
tok_list(tk_While ) = "KEYWORD_WHILE"
tok_list(tk_While ) = "KEYWORD_WHILE"
tok_list(tk_Print ) = "KEYWORD_PRINT"
tok_list(tk_Print ) = "KEYWORD_PRINT"
Line 1,272: Line 1,351:
do
do
gettok(err_line, err_col, tok, v)
gettok(err_line, err_col, tok, v)
print using "##### ##### \ \"; err_line; err_col; tok_list(tok);
print using "##### ##### \ \"; err_line; err_col; tok_list(tok);
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v;
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v;
print
print
Line 1,290: Line 1,369:
<b>
<b>
<pre>
<pre>
5 15 KEYWORD_PRINT
5 16 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 16 KEYWORD_PUTC
6 41 OP_LESS
6 40 OP_LESS
7 15 KEYWORD_IF
7 16 KEYWORD_IF
7 41 OP_GREATER
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 16 KEYWORD_ELSE
8 41 OP_LESSEQUAL
8 40 OP_LESSEQUAL
9 15 LEFTBRACE
9 16 KEYWORD_WHILE
9 41 OP_NOTEQUAL
9 40 OP_GREATEREQUAL
10 15 RIGHTBRACE
10 16 LEFTBRACE
10 41 OP_AND
10 40 OP_EQUAL
11 15 LEFTPAREN
11 16 RIGHTBRACE
11 41 SEMICOLON
11 40 OP_NOTEQUAL
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
17 40 INTEGER 42
19 26 INTEGER 32
18 16 OP_MOD
18 40 STRING "String literal"
19 30 END_OF_INPUT
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
22 30 END_OF_INPUT
</pre>
</pre>
</b>
</b>
Line 1,333: Line 1,418:


my @tokens = (
my @tokens = (
# Name | Format | Value #
# Name | Format | Value #
# --------------|----------------------|-------------#
# -------------- |----------------------|-------------#
['OP_MULTIPLY' , '*' , ],
['OP_MULTIPLY' , '*' , ],
['OP_DIVIDE' , '/' , ],
['OP_DIVIDE' , '/' , ],
['OP_ADD' , '+' , ],
['OP_MOD' , '%' , ],
['OP_SUBTRACT' , '-' , ],
['OP_ADD' , '+' , ],
['OP_LESSEQUAL' , '<=' , ],
['OP_SUBTRACT' , '-' , ],
['OP_LESS' , '<' , ],
['OP_LESSEQUAL' , '<=' , ],
['OP_GREATER' , '>' , ],
['OP_LESS' , '<' , ],
['OP_NOTEQUAL' , '!=' , ],
['OP_GREATEREQUAL', '>=' , ],
['OP_ASSIGN' , '=' , ],
['OP_GREATER' , '>' , ],
['OP_AND' , '&&' , ],
['OP_EQUAL' , '==' , ],
['KEYWORD_IF' , qr/if\b/ , ],
['OP_ASSIGN' , '=' , ],
['KEYWORD_WHILE', qr/while\b/ , ],
['OP_NOT' , '!' , ],
['KEYWORD_PRINT', qr/print\b/ , ],
['OP_NOTEQUAL' , '!=' , ],
['KEYWORD_PUTC' , qr/putc\b/ , ],
['OP_AND' , '&&' , ],
['OP_OR' , '||' , ],
['KEYWORD_ELSE' , qr/else\b/ , ],
['KEYWORD_IF' , qr/if\b/ , ],
['KEYWORD_WHILE' , qr/while\b/ , ],
['KEYWORD_PRINT' , qr/print\b/ , ],
['KEYWORD_PUTC' , qr/putc\b/ , ],


['LEFTPAREN' , '(' , ],
['LEFTPAREN' , '(' , ],
['RIGHTPAREN' , ')' , ],
['RIGHTPAREN' , ')' , ],
['LEFTBRACE' , '{' , ],
['LEFTBRACE' , '{' , ],
['RIGHTBRACE' , '}' , ],
['RIGHTBRACE' , '}' , ],
['SEMICOLON' , ';' , ],
['SEMICOLON' , ';' , ],
['COMMA' , ',' , ],
['COMMA' , ',' , ],


['IDENTIFIER' , qr/[_a-z][_a-z0-9]*/i, \&raw ],
['IDENTIFIER' , qr/[_a-z][_a-z0-9]*/i, \&raw ],
['INTEGER' , qr/[0-9]+\b/ , \&raw ],
['INTEGER' , qr/[0-9]+\b/ , \&raw ],
['INTEGER' , qr/'([^']*)(')?/ , \&char_val ],
['INTEGER' , qr/'([^']*)(')?/ , \&char_val ],
['STRING' , qr/"([^"]*)(")?/ , \&string_raw],
['STRING' , qr/"([^"]*)(")?/ , \&string_raw],

['END_OF_INPUT' , qr/$/ , ],
['END_OF_INPUT' , qr/$/ , ],
);
);


Line 1,427: Line 1,518:
my ($line, $col) = $linecol->(substr $input, $pos, $-[0] - $pos);
my ($line, $col) = $linecol->(substr $input, $pos, $-[0] - $pos);
$pos = $-[0];
$pos = $-[0];

# Get the token type that was identified by the scanner regex
# Get the token type that was identified by the scanner regex
my $type = $main::REGMARK;
my $type = $main::REGMARK;
die "Unrecognized token $1 at line $line, col $col\n" if $type eq '!';
die "Unrecognized token $1 at line $line, col $col\n" if $type eq '!';
my ($name, $evaluator) = @{$tokens[$type]}[0, 2];
my ($name, $evaluator) = @{$tokens[$type]}[0, 2];

# Get the token value
# Get the token value
my $value;
my $value;
Line 1,439: Line 1,530:
if ($@) { chomp $@; die "$@ in $name at line $line, col $col\n" }
if ($@) { chomp $@; die "$@ in $name at line $line, col $col\n" }
}
}

# Print the output line
# Print the output line
print "$line\t$col\t$name".($value ? "\t$value" : '')."\n";
print "$line\t$col\t$name".($value ? "\t$value" : '')."\n";
Line 1,460: Line 1,551:
{{out|case=test case 3}}
{{out|case=test case 3}}
<pre>
<pre>
5 15 KEYWORD_PRINT
5 16 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 16 KEYWORD_PUTC
6 41 OP_LESS
6 40 OP_LESS
7 15 KEYWORD_IF
7 16 KEYWORD_IF
7 41 OP_GREATER
7 40 OP_GREATER
8 16 KEYWORD_ELSE
8 15 KEYWORD_WHILE
8 41 OP_LESSEQUAL
8 40 OP_LESSEQUAL
9 16 KEYWORD_WHILE
9 15 LEFTBRACE
9 40 OP_GREATEREQUAL
9 41 OP_NOTEQUAL
10 16 LEFTBRACE
10 15 RIGHTBRACE
10 40 OP_EQUAL
10 41 OP_AND
11 16 RIGHTBRACE
11 15 LEFTPAREN
11 40 OP_NOT
11 41 SEMICOLON
11 41 OP_ASSIGN
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
19 26 INTEGER 32
17 40 INTEGER 42
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
</pre>


Line 1,518: Line 1,616:


proto token operator {*}
proto token operator {*}
token operator:sym<*> { '*' { make 'OP_MULTIPLY' } }
token operator:sym<*> { '*' { make 'OP_MULTIPLY' } }
token operator:sym</> { '/'<!before '*'> { make 'OP_DIVIDE' } }
token operator:sym</> { '/'<!before '*'> { make 'OP_DIVIDE' } }
token operator:sym<+> { '+' { make 'OP_ADD' } }
token operator:sym<%> { '%' { make 'OP_MOD' } }
token operator:sym<-> { '-' { make 'OP_SUBTRACT' } }
token operator:sym<+> { '+' { make 'OP_ADD' } }
token operator:sym('<='){ '<=' { make 'OP_LESSEQUAL' } }
token operator:sym<-> { '-' { make 'OP_SUBTRACT' } }
token operator:sym('<') { '<' { make 'OP_LESS' } }
token operator:sym('<='){ '<=' { make 'OP_LESSEQUAL' } }
token operator:sym('>') { '>' { make 'OP_GREATER' } }
token operator:sym('<') { '<' { make 'OP_LESS' } }
token operator:sym<!=> { '!=' { make 'OP_NOTEQUAL' } }
token operator:sym('>='){ '>=' { make 'OP_GREATEREQUAL'} }
token operator:sym<=> { '=' { make 'OP_ASSIGN' } }
token operator:sym('>') { '>' { make 'OP_GREATER' } }
token operator:sym<&&> { '&&' { make 'OP_AND' } }
token operator:sym<==> { '==' { make 'OP_EQUAL' } }
token operator:sym<!=> { '!=' { make 'OP_NOTEQUAL' } }
token operator:sym<!> { '!' { make 'OP_NOT' } }
token operator:sym<=> { '=' { make 'OP_ASSIGN' } }
token operator:sym<&&> { '&&' { make 'OP_AND' } }
token operator:sym<||> { '||' { make 'OP_OR' } }


proto token keyword {*}
proto token keyword {*}
token keyword:sym<if> { 'if' { make 'KEYWORD_IF' } }
token keyword:sym<if> { 'if' { make 'KEYWORD_IF' } }
token keyword:sym<else> { 'else' { make 'KEYWORD_ELSE' } }
token keyword:sym<putc> { 'putc' { make 'KEYWORD_PUTC' } }
token keyword:sym<putc> { 'putc' { make 'KEYWORD_PUTC' } }
token keyword:sym<while> { 'while' { make 'KEYWORD_WHILE' } }
token keyword:sym<while> { 'while' { make 'KEYWORD_WHILE' } }
Line 1,586: Line 1,690:
parse_it( $tokenizer );</lang>
parse_it( $tokenizer );</lang>


{{out|case=test case 3}}
{{out|case=test case 3}}
<pre>
<pre> 5 15 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 16 KEYWORD_PRINT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 41 OP_LESS
6 16 KEYWORD_PUTC
7 15 KEYWORD_IF
6 40 OP_LESS
7 41 OP_GREATER
7 16 KEYWORD_IF
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 41 OP_LESSEQUAL
8 16 KEYWORD_ELSE
8 40 OP_LESSEQUAL
9 15 LEFT_BRACE
9 41 OP_NOTEQUAL
9 16 KEYWORD_WHILE
9 40 OP_GREATEREQUAL
10 15 RIGHT_BRACE
10 41 OP_AND
10 16 LEFT_BRACE
10 40 OP_EQUAL
11 15 LEFT_PAREN
11 41 SEMICOLON
11 16 RIGHT_BRACE
11 40 OP_NOTEQUAL
12 15 RIGHT_PAREN
12 41 COMMA
12 16 LEFT_PAREN
12 40 OP_AND
13 15 OP_SUBTRACT
13 41 OP_ASSIGN
13 16 RIGHT_PAREN
13 40 OP_OR
14 15 OP_MULTIPLY
14 41 INTEGER 42
14 16 OP_SUBTRACT
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
16 15 OP_ADD
15 40 COMMA
16 16 OP_MULTIPLY
16 41 IDENTIFER variable_name
16 40 OP_ASSIGN
17 26 CHAR_LITERAL 10
17 16 OP_DIVIDE
18 26 CHAR_LITERAL 92
17 40 INTEGER 42
19 26 CHAR_LITERAL 32
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFER variable_name
20 26 CHAR_LITERAL 10
21 26 CHAR_LITERAL 92
22 26 CHAR_LITERAL 32
23 1 END_OF_INPUT
</pre>
</pre>


Line 1,624: Line 1,735:
# following two must remain in the same order
# following two must remain in the same order


tk_EOI, tk_Mul, tk_Div, tk_Add, tk_Sub, tk_Uminus, tk_Lss, tk_Leq, tk_Gtr, tk_Neq, \
tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq, tk_Gtr, \
tk_Assign, tk_And, tk_If, tk_While, tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, \
tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While, tk_Print, \
tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String = range(25)
tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, \
tk_Integer, tk_String = range(31)


all_syms = ["END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_ADD", "OP_SUBTRACT",
all_syms = ["END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_MOD", "OP_ADD", "OP_SUBTRACT",
"OP_NEGATE", "OP_LESS", "OP_LESSEQUAL", "OP_GREATER", "OP_NOTEQUAL", "OP_ASSIGN",
"OP_NEGATE", "OP_NOT", "OP_LESS", "OP_LESSEQUAL", "OP_GREATER", "OP_GREATEREQUAL",
"OP_AND", "KEYWORD_IF", "KEYWORD_WHILE", "KEYWORD_PRINT", "KEYWORD_PUTC", "LEFTPAREN",
"OP_EQUAL", "OP_NOTEQUAL", "OP_ASSIGN", "OP_AND", "OP_OR", "KEYWORD_IF",
"KEYWORD_ELSE", "KEYWORD_WHILE", "KEYWORD_PRINT", "KEYWORD_PUTC", "LEFTPAREN",
"RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA", "IDENTIFIER",
"RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA", "IDENTIFIER",
"INTEGER", "STRING"]
"INTEGER", "STRING"]
Line 1,636: Line 1,749:
# single character only symbols
# single character only symbols
symbols = { '{': tk_Lbrace, '}': tk_Rbrace, '(': tk_Lparen, ')': tk_Rparen, '+': tk_Add, '-': tk_Sub,
symbols = { '{': tk_Lbrace, '}': tk_Rbrace, '(': tk_Lparen, ')': tk_Rparen, '+': tk_Add, '-': tk_Sub,
'*': tk_Mul, ';': tk_Semi, ',': tk_Comma, '>': tk_Gtr, '=': tk_Assign }
'*': tk_Mul, '%': tk_Mod, ';': tk_Semi, ',': tk_Comma }


key_words = {'if': tk_If, 'print': tk_Print, 'putc': tk_Putc, 'while': tk_While}
key_words = {'if': tk_If, 'else': tk_Else, 'print': tk_Print, 'putc': tk_Putc, 'while': tk_While}


the_ch = " " # dummy first char - but it must be a space
the_ch = " " # dummy first char - but it must be a space
Line 1,753: Line 1,866:
elif the_ch == '/': return div_or_cmt(err_line, err_col)
elif the_ch == '/': return div_or_cmt(err_line, err_col)
elif the_ch == '\'': return char_lit(err_line, err_col)
elif the_ch == '\'': return char_lit(err_line, err_col)
elif the_ch == '<': return follow('=', tk_Leq, tk_Lss, err_line, err_col)
elif the_ch == '<': return follow('=', tk_Leq, tk_Lss, err_line, err_col)
elif the_ch == '!': return follow('=', tk_Neq, tk_EOI, err_line, err_col)
elif the_ch == '>': return follow('=', tk_Geq, tk_Gtr, err_line, err_col)
elif the_ch == '&': return follow('&', tk_And, tk_EOI, err_line, err_col)
elif the_ch == '=': return follow('=', tk_Eq, tk_Assign, err_line, err_col)
elif the_ch == '!': return follow('=', tk_Neq, tk_Not, err_line, err_col)
elif the_ch == '&': return follow('&', tk_And, tk_EOI, err_line, err_col)
elif the_ch == '|': return follow('|', tk_Or, tk_EOI, err_line, err_col)
elif the_ch == '"': return string_lit(the_ch, err_line, err_col)
elif the_ch == '"': return string_lit(the_ch, err_line, err_col)
elif the_ch in symbols:
elif the_ch in symbols:
Line 1,790: Line 1,906:
<b>
<b>
<pre>
<pre>
5 15 KEYWORD_PRINT
5 16 KEYWORD_PRINT
5 41 OP_SUBTRACT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 16 KEYWORD_PUTC
6 41 OP_LESS
6 40 OP_LESS
7 15 KEYWORD_IF
7 16 KEYWORD_IF
7 41 OP_GREATER
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 16 KEYWORD_ELSE
8 41 OP_LESSEQUAL
8 40 OP_LESSEQUAL
9 15 LEFTBRACE
9 16 KEYWORD_WHILE
9 41 OP_NOTEQUAL
9 40 OP_GREATEREQUAL
10 15 RIGHTBRACE
10 16 LEFTBRACE
10 41 OP_AND
10 40 OP_EQUAL
11 15 LEFTPAREN
11 16 RIGHTBRACE
11 41 SEMICOLON
11 40 OP_NOTEQUAL
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
17 40 INTEGER 42
19 26 INTEGER 32
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
</pre>
</b>
</b>