Compiler/lexical analyzer: Difference between revisions

Content added Content deleted
(use the new "case" parameter of the "out" template)
(Added "else", and ">", "==", "!", "||" operators)
Line 22:
! Name !! Common name !! Character sequence
|-
| <tt>OP_MULTIPLY</tt> || multiply || <tt>*</tt>
|-
| <tt>OP_DIVIDE</tt> || divide || <tt>/</tt>
|-
| <tt>OP_ADDOP_MOD</tt> || plusmod || <tt>+%</tt>
|-
| <tt>OP_SUBTRACTOP_ADD</tt> || minusplus || <tt>-+</tt>
|-
| <tt>OP_NEGATEOP_SUBTRACT</tt> || unary minus || <tt>-</tt>
|-
| <tt>OP_LESSOP_NEGATE</tt> || lessunary thanminus || <tt><-</tt>
|-
| <tt>OP_LESSEQUALOP_LESS</tt> || less than or equal || <tt><=</tt>
|-
| <tt>OP_GREATEROP_LESSEQUAL</tt> || greaterless than or equal || <tt>&gt;<=</tt>
|-
| <tt>OP_NOTEQUALOP_GREATER</tt> || not equalgreater than || <tt>&#33gt;=</tt>
|-
| <tt>OP_ASSIGNOP_GREATEREQUAL</tt> || assignment greater than or equal || <tt>&gt;=</tt>
|-
| <tt>OP_ANDOP_EQUAL</tt> || logicalequal and || <tt>&&==</tt>
|-
| <tt>OP_NOTEQUAL</tt> || not equal || <tt>&#33;=</tt>
|-
| <tt>OP_NOT</tt> || unary not || <tt>&#33;</tt>
|-
| <tt>OP_ASSIGN</tt> || assignment || <tt>=</tt>
|-
| <tt>OP_AND</tt> || logical and || <tt>&amp;&amp;</tt>
|-
| <tt>OP_OR</tt> || logical or || <tt>&brvbar;&brvbar;</tt>
|}
 
Line 73 ⟶ 83:
|-
| <tt>KEYWORD_IF</tt> || <tt>if</tt>
|-
| <tt>KEYWORD_ELSE</tt> || <tt>else</tt>
|-
| <tt>KEYWORD_WHILE</tt> || <tt>while</tt>
Line 155 ⟶ 167:
 
<pre>
END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_MOD OP_ADD OP_ADD OP_SUBTRACT OP_NEGATE OP_SUBTRACT
OP_NEGATE OP_LESS OP_LESSEQUAL OP_GREATER OP_GREATEREQUAL OP_EQUAL
OP_LESS OP_LESSEQUAL OP_GREATER OP_NOTEQUAL OP_ASSIGN OP_AND
OP_NOTEQUAL OP_ASSIGN OP_AND OP_OR KEYWORD_IF KEYWORD_ELSE
KEYWORD_IF KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN
KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE
LEFTBRACE RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER
RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER STRING
STRING
</pre>
 
Line 252 ⟶ 264:
have to wait until syntax analysis
*/
/* Print */ print /* Sub */ -
/* Putc */ putc /* Lss */ <
/* If */ if /* Gtr */ >
/* WhileElse */ while else /* Leq */ <=
/* LbraceWhile */ { while /* NeqGeq */ !>=
/* RbraceLbrace */ } { /* AndEq */ &&==
/* LparenRbrace */ ( } /* SemiNeq */ ;!=
/* RparenLparen */ ) ( /* CommaAnd */ ,&&
/* UminusRparen */ - ) /* AssignOr */ =||
/* Mul Uminus */ *- /* Semi /* Integer */ 42;
/* DivNot */ / ! /* StringComma */ "String literal",
/* AddMul */ + * /* Ident Assign */ variable_name=
/* Div */ / /* Integer */ 42
/* Mod */ % /* String */ "String literal"
/* Add */ + /* Ident */ variable_name
/* character literal */ '\n'
/* character literal */ '\\'
Line 270 ⟶ 285:
| style="vertical-align:top" |
<b><pre>
5 1516 KEYWORD_PRINT
5 4140 OP_SUBTRACT
6 1516 KEYWORD_PUTC
6 4140 OP_LESS
7 1516 KEYWORD_IF
7 4140 OP_GREATER
8 1516 KEYWORD_WHILE KEYWORD_ELSE
8 4140 OP_LESSEQUAL
9 1516 LEFTBRACE KEYWORD_WHILE
9 4140 OP_NOTEQUAL OP_GREATEREQUAL
10 1516 RIGHTBRACE LEFTBRACE
10 4140 OP_AND OP_EQUAL
11 1516 LEFTPAREN RIGHTBRACE
11 4140 SEMICOLON OP_NOTEQUAL
12 1516 RIGHTPAREN LEFTPAREN
12 4140 COMMA OP_AND
13 1516 OP_SUBTRACT RIGHTPAREN
13 4140 OP_ASSIGN OP_OR
14 1516 OP_MULTIPLY OP_SUBTRACT
14 4140 INTEGER 42SEMICOLON
15 1516 OP_DIVIDE OP_NOT
15 4140 STRING "String literal"COMMA
16 1516 OP_ADD OP_MULTIPLY
16 4140 IDENTIFIER variable_nameOP_ASSIGN
17 2616 INTEGER 10OP_DIVIDE
1817 2640 INTEGER 92 42
1918 2616 INTEGER 32OP_MOD
2018 40 1 END_OF_INPUT STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre></b>
|}
Line 303 ⟶ 324:
{{task heading|Reference}}
 
The Flex, C, Python and EuphoriaPython versions can be considered reference implementations.
 
<hr>
Line 330 ⟶ 351:
#define da_len(name) _qy_ ## name ## _p
 
typedef enum {
typedef enum {tk_EOI, tk_Mul, tk_Div, tk_Add, tk_Sub, tk_Uminus, tk_Lss, tk_Leq, tk_Gtr,
tk_Neqtk_EOI, tk_Assigntk_Mul, tk_Andtk_Div, tk_Iftk_Mod, tk_Whiletk_Add, tk_Printtk_Sub, tk_Putctk_Negate, tk_Lparentk_Not, tk_Rparentk_Lss, tk_Leq,
tk_Gtr, tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While,
tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String
tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma,
tk_Ident, tk_Integer, tk_String
} TokenType;
 
typedef struct {
intTokenType tok;
int err_ln, err_col;
union {
Line 425 ⟶ 448:
TokenType sym;
} kwds[] = {
{"else", tk_Else},
{"if", tk_If},
{"print", tk_Print},
Line 482 ⟶ 506:
case '-': next_ch(); return (tok_s){tk_Sub, err_line, err_col, {0}};
case '*': next_ch(); return (tok_s){tk_Mul, err_line, err_col, {0}};
case '%': next_ch(); return (tok_s){tk_Mod, err_line, err_col, {0}};
case ';': next_ch(); return (tok_s){tk_Semi, err_line, err_col, {0}};
case ',': next_ch(); return (tok_s){tk_Comma,err_line, err_col, {0}};
case '>': next_ch(); return (tok_s){tk_Gtr, err_line, err_col, {0}};
case '=': next_ch(); return (tok_s){tk_Assign, err_line, err_col, {0}};
case '/': next_ch(); return div_or_cmt(err_line, err_col);
case '\'': next_ch(); return char_lit(the_ch, err_line, err_col);
case '<': next_ch(); return follow('=', tk_Leq, tk_Lss, err_line, err_col);
case '!>': next_ch(); return follow('=', tk_Neqtk_Geq, tk_EOItk_Gtr, err_line, err_col);
case '&=': next_ch(); return follow('&=', tk_Andtk_Eq, tk_EOI tk_Assign, err_line, err_col);
case '!': next_ch(); return follow('=', tk_Neq, tk_Not, err_line, err_col);
case '&': next_ch(); return follow('&', tk_And, tk_EOI, err_line, err_col);
case '|': next_ch(); return follow('|', tk_Or, tk_EOI, err_line, err_col);
case '"' : return string_lit(the_ch, err_line, err_col);
default: return ident_or_int(err_line, err_col);
Line 501 ⟶ 527:
do {
tok = gettok();
fprintf(dest_fp, "%5d %5d %.14s15s",
tok.err_ln, tok.err_col,
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_ADD OP_MOD OP_SUBTRACT OP_NEGATE OP_ADD "
"OP_LESSOP_SUBTRACT OP_NEGATE OP_LESSEQUAL OP_GREATER OP_NOT OP_NOTEQUAL OP_ASSIGN OP_AND OP_LESS OP_LESSEQUAL "
"KEYWORD_IFOP_GREATER KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTCOP_GREATEREQUAL OP_EQUAL LEFTPAREN RIGHTPAREN OP_NOTEQUAL OP_ASSIGN "
"LEFTBRACEOP_AND RIGHTBRACE SEMICOLON OP_OR COMMA KEYWORD_IF IDENTIFIER INTEGERKEYWORD_ELSE KEYWORD_WHILE "
"STRINGKEYWORD_PRINT KEYWORD_PUTC LEFTPAREN "[tok.tok * 14]); RIGHTPAREN LEFTBRACE "
"RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "
 
"STRING "
[tok.tok * 16]);
if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n);
else if (tok.tok == tk_Ident) fprintf(dest_fp, " %s", tok.text);
Line 535 ⟶ 563:
<b>
<pre>
5 1516 KEYWORD_PRINT
5 4140 OP_SUBTRACT
6 1516 KEYWORD_PUTC
6 4140 OP_LESS
7 1516 KEYWORD_IF
7 4140 OP_GREATER
8 1516 KEYWORD_WHILEKEYWORD_ELSE
8 4140 OP_LESSEQUAL
9 1516 LEFTBRACEKEYWORD_WHILE
9 4140 OP_NOTEQUALOP_GREATEREQUAL
10 1516 RIGHTBRACELEFTBRACE
10 4140 OP_ANDOP_EQUAL
11 1516 LEFTPARENRIGHTBRACE
11 4140 SEMICOLONOP_NOTEQUAL
12 1516 RIGHTPARENLEFTPAREN
12 4140 COMMAOP_AND
13 1516 OP_SUBTRACTRIGHTPAREN
13 4140 OP_ASSIGNOP_OR
14 1516 OP_MULTIPLYOP_SUBTRACT
14 4140 INTEGER 42SEMICOLON
15 1516 OP_DIVIDEOP_NOT
15 4140 STRING "String literal"COMMA
16 1516 OP_ADDOP_MULTIPLY
16 4140 IDENTIFIER variable_nameOP_ASSIGN
17 2616 INTEGER 10OP_DIVIDE
1817 2640 INTEGER 92 42
1918 2616 INTEGER 32OP_MOD
2018 40 1STRING END_OF_INPUT "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
</b>
Line 575 ⟶ 609:
constant true = 1, false = 0, EOF = -1
 
enum tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Uminustk_Negate, tk_Lsstk_Not, tk_Leqtk_Lss, tk_Gtr, tk_Neqtk_Leq,
tk_Gtr, tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While,
tk_Assign, tk_And, tk_If, tk_While, tk_Print, tk_Putc, tk_Lparen, tk_Rparen,
tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma,
tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String
tk_Ident, tk_Integer, tk_String
 
constant all_syms = {"END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_ADDOP_MOD", "OP_SUBTRACTOP_ADD",
"OP_NEGATEOP_SUBTRACT", "OP_LESSOP_NEGATE", "OP_LESSEQUALOP_NOT", "OP_GREATEROP_LESS", "OP_NOTEQUALOP_LESSEQUAL", "OP_ASSIGNOP_GREATER",
"OP_ANDOP_GREATEREQUAL", "KEYWORD_IFOP_EQUAL", "KEYWORD_WHILEOP_NOTEQUAL", "KEYWORD_PRINTOP_ASSIGN", "KEYWORD_PUTCOP_AND", "LEFTPARENOP_OR",
"RIGHTPARENKEYWORD_IF", "LEFTBRACEKEYWORD_ELSE", "RIGHTBRACEKEYWORD_WHILE", "SEMICOLONKEYWORD_PRINT", "COMMA", "IDENTIFIERKEYWORD_PUTC",
"LEFTPAREN", "RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA",
"INTEGER", "STRING"}
"IDENTIFIER", "INTEGER", "STRING"}
 
integer input_file, the_ch = ' ', the_col = 0, the_line = 1
Line 721 ⟶ 757:
case '/' then return div_or_cmt(err_line, err_col)
case '\'' then return char_lit(err_line, err_col)
 
case '<' then return follow('=', tk_Leq, tk_Lss, err_line, err_col)
case '!<' then return follow('=', tk_Neqtk_Leq, tk_EOItk_Lss, err_line, err_col)
case '&>' then return follow('&=', tk_Andtk_Geq, tk_EOItk_Gtr, err_line, err_col)
case '=' then return follow('=', tk_Eq, tk_Assign, err_line, err_col)
case '!' then return follow('=', tk_Neq, tk_Not, err_line, err_col)
case '&' then return follow('&', tk_And, tk_EOI, err_line, err_col)
case '|' then return follow('|', tk_Or, tk_EOI, err_line, err_col)
 
case '"' then return string_lit(the_ch, err_line, err_col)
case else
Line 736 ⟶ 777:
 
procedure init()
put(key_words, "else", tk_Else)
put(key_words, "if", tk_If)
put(key_words, "print", tk_Print)
Line 749 ⟶ 791:
symbols['-'] = tk_Sub
symbols['*'] = tk_Mul
symbols['%'] = tk_Mod
symbols[';'] = tk_Semi
symbols[','] = tk_Comma
symbols['>'] = tk_Gtr
symbols['='] = tk_Assign
end procedure
 
Line 772 ⟶ 813:
printf(STDOUT, "%5d %5d %-8s", {t[2], t[3], all_syms[t[1]]})
switch t[1] do
case tk_Integer then printf(STDOUT, " %5d\n", {t[4]})
case tk_Ident then printf(STDOUT, " %s\n", {t[4]})
case tk_String then printf(STDOUT, " \"%s\"\n", {t[4]})
case else printf(STDOUT, "\n")
end switch
Line 786 ⟶ 827:
<b>
<pre>
5 1516 KEYWORD_PRINT
5 4140 OP_SUBTRACT
6 1516 KEYWORD_PUTC
6 4140 OP_LESS
7 1516 KEYWORD_IF
7 4140 OP_GREATER
8 1516 KEYWORD_WHILEKEYWORD_ELSE
8 4140 OP_LESSEQUAL
9 1516 LEFTBRACEKEYWORD_WHILE
9 4140 OP_NOTEQUALOP_GREATEREQUAL
10 1516 RIGHTBRACELEFTBRACE
10 4140 OP_ANDOP_EQUAL
11 1516 LEFTPARENRIGHTBRACE
11 4140 SEMICOLONOP_NOTEQUAL
12 1516 RIGHTPARENLEFTPAREN
12 4140 COMMAOP_AND
13 1516 OP_SUBTRACTRIGHTPAREN
13 4140 OP_ASSIGNOP_OR
14 1516 OP_MULTIPLYOP_SUBTRACT
14 4140 INTEGER 42SEMICOLON
15 1516 OP_DIVIDEOP_NOT
15 4140 STRING "String literal"COMMA
16 1516 OP_ADDOP_MULTIPLY
16 4140 IDENTIFIER variable_nameOP_ASSIGN
17 2616 INTEGER 10OP_DIVIDE
1817 2640 INTEGER 9242
1918 2616 INTEGER 32OP_MOD
2018 40 1STRING END_OF_INPUT "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
</b>
Line 828 ⟶ 875:
#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0]))
 
typedef enum {
typedef enum {tk_EOI, tk_Mul, tk_Div, tk_Add, tk_Sub, tk_Uminus, tk_Lss, tk_Leq, tk_Gtr,
tk_Neqtk_EOI, tk_Assigntk_Mul, tk_Andtk_Div, tk_Iftk_Mod, tk_Whiletk_Add, tk_Printtk_Sub, tk_Putctk_Negate, tk_Lparentk_Not, tk_Rparentk_Lss, tk_Leq,
tk_Gtr, tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While,
tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String
tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma,
tk_Ident, tk_Integer, tk_String
} TokenType;
 
Line 876 ⟶ 925:
TokenType sym;
} kwds[] = {
{"else", tk_Else},
{"if", tk_If},
{"print", tk_Print},
Line 911 ⟶ 961:
"*" {return tk_Mul;}
"/" {return tk_Div;}
"%" {return tk_Mod;}
"+" {return tk_Add;}
"-" {return tk_Sub;}
Line 916 ⟶ 967:
">" {return tk_Gtr;}
"<=" {return tk_Leq;}
">=" {return tk_Geq;}
"!=" {return tk_Neq;}
"!" {return tk_Not;}
"&&" {return tk_And;}
"||" {return tk_Or;}
";" {return tk_Semi;}
"," {return tk_Comma;}
"==" {return tk_Eq;}
"=" {return tk_Assign;}
{ident} {return get_ident_type(yytext);}
Line 968 ⟶ 1,023:
do {
tok = yylex();
printf("%5d %5d %.14s15s", yylloc.first_line, yylloc.first_col,
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_ADD OP_MOD OP_SUBTRACT OP_NEGATE OP_ADD "
"OP_LESSOP_SUBTRACT OP_NEGATE OP_LESSEQUAL OP_GREATER OP_NOT OP_NOTEQUAL OP_ASSIGN OP_AND OP_LESS OP_LESSEQUAL "
"KEYWORD_IFOP_GREATER KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTCOP_GREATEREQUAL OP_EQUAL LEFTPAREN RIGHTPAREN OP_NOTEQUAL OP_ASSIGN "
"LEFTBRACEOP_AND RIGHTBRACE SEMICOLON OP_OR COMMA KEYWORD_IF IDENTIFIER INTEGERKEYWORD_ELSE KEYWORD_WHILE "
"STRINGKEYWORD_PRINT KEYWORD_PUTC LEFTPAREN "[tok * 14]); RIGHTPAREN LEFTBRACE "
"RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "
"STRING "
[tok * 16]);
 
if (tok == tk_Integer) printf(" %5d", yynval);
Line 986 ⟶ 1,044:
<b>
<pre>
5 1516 KEYWORD_PRINT
5 4140 OP_SUBTRACT
6 1516 KEYWORD_PUTC
6 4140 OP_LESS
7 1516 KEYWORD_IF
7 4140 OP_GREATER
8 1516 KEYWORD_WHILEKEYWORD_ELSE
8 4140 OP_LESSEQUAL
9 1516 LEFTBRACEKEYWORD_WHILE
9 4140 OP_NOTEQUALOP_GREATEREQUAL
10 1516 RIGHTBRACELEFTBRACE
10 4140 OP_ANDOP_EQUAL
11 1516 LEFTPARENRIGHTBRACE
11 4140 SEMICOLONOP_NOTEQUAL
12 1516 RIGHTPARENLEFTPAREN
12 4140 COMMAOP_AND
13 1516 OP_SUBTRACTRIGHTPAREN
13 4140 OP_ASSIGNOP_OR
14 1516 OP_MULTIPLYOP_SUBTRACT
14 4140 INTEGER 42SEMICOLON
15 1516 OP_DIVIDEOP_NOT
15 4140 STRING "String literal"COMMA
16 1516 OP_ADDOP_MULTIPLY
16 4140 IDENTIFIER variable_nameOP_ASSIGN
17 2616 INTEGER 10OP_DIVIDE
1817 2640 INTEGER 92 42
1918 2616 INTEGER 32OP_MOD
18 40 STRING "String literal"
19 29 END_OF_INPUT
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
22 29 END_OF_INPUT
</pre>
</b>
Line 1,023 ⟶ 1,087:
tk_Mul
tk_Div
tk_Mod
tk_Add
tk_Sub
tk_Uminustk_Negate
tk_Not
tk_Lss
tk_Leq
tk_Gtr
tk_Geq
tk_Eq
tk_Neq
tk_Assign
tk_And
tk_Or
tk_If
tk_Else
tk_While
tk_Print
Line 1,143 ⟶ 1,213:
case "-": tok = tk_sub: next_char(): exit sub
case "*": tok = tk_mul: next_char(): exit sub
case "%": tok = tk_Mod: next_char(): exit sub
case ";": tok = tk_semi: next_char(): exit sub
case ",": tok = tk_comma: next_char(): exit sub
case ">": tok = tk_gtr: next_char(): exit sub
case "=": tok = tk_assign: next_char(): exit sub
case "/": ' div or comment
next_char()
Line 1,184 ⟶ 1,253:
exit sub
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub
case "!>": next_char(): tok = follow(err_line, err_col, "=", tk_Neqtk_Geq, tk_EOItk_Gtr): exit sub
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_Not): exit sub
case "=": next_char(): tok = follow(err_line, err_col, "=", tk_Eq, tk_Assign): exit sub
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub
case "|": next_char(): tok = follow(err_line, err_col, "|", tk_Or, tk_EOI): exit sub
case DoubleQuote: ' string
v = cur_ch
Line 1,224 ⟶ 1,296:
 
sub init_lex(byval filein as string)
install("else", tk_else)
install("if", tk_if)
install("print", tk_print)
Line 1,247 ⟶ 1,320:
tok_list(tk_Mul ) = "OP_MULTIPLY"
tok_list(tk_Div ) = "OP_DIVIDE"
tok_list(tk_Mod ) = "OP_MOD"
tok_list(tk_Add ) = "OP_ADD"
tok_list(tk_Sub ) = "OP_SUBTRACT"
tok_list(tk_Uminustk_Negate ) = "OP_NEGATE"
tok_list(tk_Not ) = "OP_NOT"
tok_list(tk_Lss ) = "OP_LESS"
tok_list(tk_Leq ) = "OP_LESSEQUAL"
tok_list(tk_Gtr ) = "OP_GREATER"
tok_list(tk_Geq ) = "OP_GREATEREQUAL"
tok_list(tk_Eq ) = "OP_EQUAL"
tok_list(tk_Neq ) = "OP_NOTEQUAL"
tok_list(tk_Assign ) = "OP_ASSIGN"
tok_list(tk_And ) = "OP_AND"
tok_list(tk_Or ) = "OP_OR"
tok_list(tk_If ) = "KEYWORD_IF"
tok_list(tk_Else ) = "KEYWORD_ELSE"
tok_list(tk_While ) = "KEYWORD_WHILE"
tok_list(tk_Print ) = "KEYWORD_PRINT"
Line 1,272 ⟶ 1,351:
do
gettok(err_line, err_col, tok, v)
print using "##### ##### \ \"; err_line; err_col; tok_list(tok);
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v;
print
Line 1,290 ⟶ 1,369:
<b>
<pre>
5 1516 KEYWORD_PRINT
5 4140 OP_SUBTRACT
6 1516 KEYWORD_PUTC
6 4140 OP_LESS
7 1516 KEYWORD_IF
7 4140 OP_GREATER
8 1516 KEYWORD_WHILEKEYWORD_ELSE
8 4140 OP_LESSEQUAL
9 1516 LEFTBRACEKEYWORD_WHILE
9 4140 OP_NOTEQUALOP_GREATEREQUAL
10 1516 RIGHTBRACELEFTBRACE
10 4140 OP_ANDOP_EQUAL
11 1516 LEFTPARENRIGHTBRACE
11 4140 SEMICOLONOP_NOTEQUAL
12 1516 RIGHTPARENLEFTPAREN
12 4140 COMMAOP_AND
13 1516 OP_SUBTRACTRIGHTPAREN
13 4140 OP_ASSIGNOP_OR
14 1516 OP_MULTIPLYOP_SUBTRACT
14 4140 INTEGER 42SEMICOLON
15 1516 OP_DIVIDEOP_NOT
15 4140 STRING "String literal"COMMA
16 1516 OP_ADDOP_MULTIPLY
16 4140 IDENTIFIER variable_nameOP_ASSIGN
17 2616 INTEGER 10OP_DIVIDE
1817 2640 INTEGER 92 42
1918 2616 INTEGER 32OP_MOD
18 40 STRING "String literal"
19 30 END_OF_INPUT
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
22 30 END_OF_INPUT
</pre>
</b>
Line 1,333 ⟶ 1,418:
 
my @tokens = (
# Name | Format | Value #
# -------------- |----------------------|-------------#
['OP_MULTIPLY' , '*' , ],
['OP_DIVIDE' , '/' , ],
['OP_ADDOP_MOD' , '+%' , ],
['OP_SUBTRACTOP_ADD' , '-+' , ],
['OP_LESSEQUALOP_SUBTRACT' , '<=-' , ],
['OP_LESSOP_LESSEQUAL' , '<=' , ],
['OP_GREATEROP_LESS' , '><' , ],
['OP_NOTEQUALOP_GREATEREQUAL' , '!>=' , ],
['OP_ASSIGNOP_GREATER' , '=>' , ],
['OP_ANDOP_EQUAL' , '&&==' , ],
['KEYWORD_IFOP_ASSIGN' , qr/if\b/'=' , ],
['KEYWORD_WHILEOP_NOT' , qr/while\b/'!' , ],
['KEYWORD_PRINTOP_NOTEQUAL' , qr/print\b/'!=' , ],
['KEYWORD_PUTCOP_AND' , qr/putc\b/'&&' , ],
['OP_OR' , '||' , ],
['KEYWORD_ELSE' , qr/else\b/ , ],
['KEYWORD_IF' , qr/if\b/ , ],
['KEYWORD_WHILE' , qr/while\b/ , ],
['KEYWORD_PRINT' , qr/print\b/ , ],
['KEYWORD_PUTC' , qr/putc\b/ , ],
 
['LEFTPAREN' , '(' , ],
['RIGHTPAREN' , ')' , ],
['LEFTBRACE' , '{' , ],
['RIGHTBRACE' , '}' , ],
['SEMICOLON' , ';' , ],
['COMMA' , ',' , ],
 
['IDENTIFIER' , qr/[_a-z][_a-z0-9]*/i, \&raw ],
['INTEGER' , qr/[0-9]+\b/ , \&raw ],
['INTEGER' , qr/'([^']*)(')?/ , \&char_val ],
['STRING' , qr/"([^"]*)(")?/ , \&string_raw],
 
['END_OF_INPUT' , qr/$/ , ],
);
 
Line 1,427 ⟶ 1,518:
my ($line, $col) = $linecol->(substr $input, $pos, $-[0] - $pos);
$pos = $-[0];
 
# Get the token type that was identified by the scanner regex
my $type = $main::REGMARK;
die "Unrecognized token $1 at line $line, col $col\n" if $type eq '!';
my ($name, $evaluator) = @{$tokens[$type]}[0, 2];
 
# Get the token value
my $value;
Line 1,439 ⟶ 1,530:
if ($@) { chomp $@; die "$@ in $name at line $line, col $col\n" }
}
 
# Print the output line
print "$line\t$col\t$name".($value ? "\t$value" : '')."\n";
Line 1,460 ⟶ 1,551:
{{out|case=test case 3}}
<pre>
5 15 16 KEYWORD_PRINT
5 41 40 OP_SUBTRACT
6 15 16 KEYWORD_PUTC
6 41 40 OP_LESS
7 15 16 KEYWORD_IF
7 41 40 OP_GREATER
8 16 KEYWORD_ELSE
8 15 KEYWORD_WHILE
8 41 40 OP_LESSEQUAL
9 16 KEYWORD_WHILE
9 15 LEFTBRACE
9 40 OP_GREATEREQUAL
9 41 OP_NOTEQUAL
10 16 LEFTBRACE
10 15 RIGHTBRACE
10 40 OP_EQUAL
10 41 OP_AND
11 16 RIGHTBRACE
11 15 LEFTPAREN
11 40 OP_NOT
11 41 SEMICOLON
11 41 OP_ASSIGN
12 15 RIGHTPAREN
12 16 LEFTPAREN
12 41 COMMA
12 40 OP_AND
13 15 OP_SUBTRACT
13 16 RIGHTPAREN
13 41 OP_ASSIGN
13 40 OP_OR
14 15 OP_MULTIPLY
14 16 OP_SUBTRACT
14 41 INTEGER 42
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
15 40 COMMA
16 15 OP_ADD
16 16 OP_MULTIPLY
16 41 IDENTIFIER variable_name
16 40 OP_ASSIGN
17 26 INTEGER 10
17 16 OP_DIVIDE
18 26 INTEGER 92
19 26 17 40 INTEGER 32 42
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
 
Line 1,518 ⟶ 1,616:
 
proto token operator {*}
token operator:sym<*> { '*' { make 'OP_MULTIPLY' } }
token operator:sym</> { '/'<!before '*'> { make 'OP_DIVIDE' } }
token operator:sym<+%> { '+%' { make 'OP_ADDOP_MOD' } }
token operator:sym<-+> { '-+' { make 'OP_SUBTRACTOP_ADD' } }
token operator:sym('<=')-> { '<=-' { make 'OP_LESSEQUALOP_SUBTRACT' } }
token operator:sym('<=') { '<=' { make 'OP_LESSOP_LESSEQUAL' } }
token operator:sym('><') { '><' { make 'OP_GREATEROP_LESS' } }
token operator:sym<!('>=> '){ '!>=' { make 'OP_NOTEQUALOP_GREATEREQUAL' } }
token operator:sym<=('> ') { '=>' { make 'OP_ASSIGNOP_GREATER' } }
token operator:sym<&&==> { '&&==' { make 'OP_ANDOP_EQUAL' } }
token operator:sym<!=> { '!=' { make 'OP_NOTEQUAL' } }
token operator:sym<!> { '!' { make 'OP_NOT' } }
token operator:sym<=> { '=' { make 'OP_ASSIGN' } }
token operator:sym<&&> { '&&' { make 'OP_AND' } }
token operator:sym<||> { '||' { make 'OP_OR' } }
 
proto token keyword {*}
token keyword:sym<if> { 'if' { make 'KEYWORD_IF' } }
token keyword:sym<else> { 'else' { make 'KEYWORD_ELSE' } }
token keyword:sym<putc> { 'putc' { make 'KEYWORD_PUTC' } }
token keyword:sym<while> { 'while' { make 'KEYWORD_WHILE' } }
Line 1,586 ⟶ 1,690:
parse_it( $tokenizer );</lang>
 
{{out|case=test case 3}}
<pre>
<pre> 5 15 KEYWORD_PRINT
5 41 OP_SUBTRACT16 KEYWORD_PRINT
5 40 OP_SUBTRACT
6 15 KEYWORD_PUTC
6 41 OP_LESS16 KEYWORD_PUTC
76 15 KEYWORD_IF40 OP_LESS
7 41 OP_GREATER16 KEYWORD_IF
7 40 OP_GREATER
8 15 KEYWORD_WHILE
8 41 OP_LESSEQUAL16 KEYWORD_ELSE
8 40 OP_LESSEQUAL
9 15 LEFT_BRACE
9 41 OP_NOTEQUAL16 KEYWORD_WHILE
9 40 OP_GREATEREQUAL
10 15 RIGHT_BRACE
10 41 OP_AND16 LEFT_BRACE
10 40 OP_EQUAL
11 15 LEFT_PAREN
11 41 SEMICOLON16 RIGHT_BRACE
11 40 OP_NOTEQUAL
12 15 RIGHT_PAREN
12 41 COMMA16 LEFT_PAREN
12 40 OP_AND
13 15 OP_SUBTRACT
13 41 OP_ASSIGN16 RIGHT_PAREN
13 40 OP_OR
14 15 OP_MULTIPLY
14 41 INTEGER16 42OP_SUBTRACT
14 40 SEMICOLON
15 15 OP_DIVIDE
15 16 OP_NOT
15 41 STRING "String literal"
1615 15 OP_ADD40 COMMA
16 16 OP_MULTIPLY
16 41 IDENTIFER variable_name
16 40 OP_ASSIGN
17 26 CHAR_LITERAL 10
17 16 OP_DIVIDE
18 26 CHAR_LITERAL 92
17 40 INTEGER 42
19 26 CHAR_LITERAL 32
18 16 OP_MOD
20 1 END_OF_INPUT
18 40 STRING "String literal"
19 16 OP_ADD
19 40 IDENTIFER variable_name
20 26 CHAR_LITERAL 10
21 26 CHAR_LITERAL 92
22 26 CHAR_LITERAL 32
23 1 END_OF_INPUT
</pre>
 
Line 1,624 ⟶ 1,735:
# following two must remain in the same order
 
tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Uminustk_Negate, tk_Not, tk_Lss, tk_Leq, tk_Gtr, tk_Neq, \
tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While, tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, \
tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, tk_Integer, tk_String = range(25) \
tk_Integer, tk_String = range(31)
 
all_syms = ["END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_MOD", "OP_ADD", "OP_SUBTRACT",
"OP_NEGATE", "OP_LESSOP_NOT", "OP_LESSEQUALOP_LESS", "OP_GREATEROP_LESSEQUAL", "OP_NOTEQUALOP_GREATER", "OP_ASSIGNOP_GREATEREQUAL",
"OP_ANDOP_EQUAL", "KEYWORD_IFOP_NOTEQUAL", "KEYWORD_WHILEOP_ASSIGN", "KEYWORD_PRINTOP_AND", "KEYWORD_PUTCOP_OR", "LEFTPARENKEYWORD_IF",
"KEYWORD_ELSE", "KEYWORD_WHILE", "KEYWORD_PRINT", "KEYWORD_PUTC", "LEFTPAREN",
"RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA", "IDENTIFIER",
"INTEGER", "STRING"]
Line 1,636 ⟶ 1,749:
# single character only symbols
symbols = { '{': tk_Lbrace, '}': tk_Rbrace, '(': tk_Lparen, ')': tk_Rparen, '+': tk_Add, '-': tk_Sub,
'*': tk_Mul, ';%': tk_Semitk_Mod, ',;': tk_Commatk_Semi, '>': tk_Gtr, '=': tk_Assigntk_Comma }
 
key_words = {'if': tk_If, 'else': tk_Else, 'print': tk_Print, 'putc': tk_Putc, 'while': tk_While}
 
the_ch = " " # dummy first char - but it must be a space
Line 1,753 ⟶ 1,866:
elif the_ch == '/': return div_or_cmt(err_line, err_col)
elif the_ch == '\'': return char_lit(err_line, err_col)
elif the_ch == '<': return follow('=', tk_Leq, tk_Lss, err_line, err_col)
elif the_ch == '!>': return follow('=', tk_Neqtk_Geq, tk_EOItk_Gtr, err_line, err_col)
elif the_ch == '&=': return follow('&=', tk_Andtk_Eq, tk_EOI tk_Assign, err_line, err_col)
elif the_ch == '!': return follow('=', tk_Neq, tk_Not, err_line, err_col)
elif the_ch == '&': return follow('&', tk_And, tk_EOI, err_line, err_col)
elif the_ch == '|': return follow('|', tk_Or, tk_EOI, err_line, err_col)
elif the_ch == '"': return string_lit(the_ch, err_line, err_col)
elif the_ch in symbols:
Line 1,790 ⟶ 1,906:
<b>
<pre>
5 1516 KEYWORD_PRINT
5 4140 OP_SUBTRACT
6 1516 KEYWORD_PUTC
6 4140 OP_LESS
7 1516 KEYWORD_IF
7 4140 OP_GREATER
8 1516 KEYWORD_WHILEKEYWORD_ELSE
8 4140 OP_LESSEQUAL
9 1516 LEFTBRACEKEYWORD_WHILE
9 4140 OP_NOTEQUALOP_GREATEREQUAL
10 1516 RIGHTBRACELEFTBRACE
10 4140 OP_ANDOP_EQUAL
11 1516 LEFTPARENRIGHTBRACE
11 4140 SEMICOLONOP_NOTEQUAL
12 1516 RIGHTPARENLEFTPAREN
12 4140 COMMAOP_AND
13 1516 OP_SUBTRACTRIGHTPAREN
13 4140 OP_ASSIGNOP_OR
14 1516 OP_MULTIPLYOP_SUBTRACT
14 4140 INTEGER 42SEMICOLON
15 1516 OP_DIVIDEOP_NOT
15 4140 STRING "String literal"COMMA
16 1516 OP_ADDOP_MULTIPLY
16 4140 IDENTIFIER variable_nameOP_ASSIGN
17 2616 INTEGER 10OP_DIVIDE
1817 2640 INTEGER 9242
1918 2616 INTEGER 32OP_MOD
2018 40 1 STRING END_OF_INPUT "String literal"
19 16 OP_ADD
19 40 IDENTIFIER variable_name
20 26 INTEGER 10
21 26 INTEGER 92
22 26 INTEGER 32
23 1 END_OF_INPUT
</pre>
</b>