Compiler/lexical analyzer: Difference between revisions

Content added Content deleted
(Added "else", and ">", "==", "!", "||" operators)
(CHANGE TOKEN NAMES FROM ALL CAPS to something a little easier on old eyes.)
Line 22: Line 22:
! Name !! Common name !! Character sequence
! Name !! Common name !! Character sequence
|-
|-
| <tt>OP_MULTIPLY</tt> || multiply || <tt>*</tt>
| <tt>Op_multiply</tt> || multiply || <tt>*</tt>
|-
|-
| <tt>OP_DIVIDE</tt> || divide || <tt>/</tt>
| <tt>Op_divide</tt> || divide || <tt>/</tt>
|-
|-
| <tt>OP_MOD</tt> || mod || <tt>%</tt>
| <tt>Op_mod</tt> || mod || <tt>%</tt>
|-
|-
| <tt>OP_ADD</tt> || plus || <tt>+</tt>
| <tt>Op_add</tt> || plus || <tt>+</tt>
|-
|-
| <tt>OP_SUBTRACT</tt> || minus || <tt>-</tt>
| <tt>Op_subtract</tt> || minus || <tt>-</tt>
|-
|-
| <tt>OP_NEGATE</tt> || unary minus || <tt>-</tt>
| <tt>Op_negate</tt> || unary minus || <tt>-</tt>
|-
|-
| <tt>OP_LESS</tt> || less than || <tt><</tt>
| <tt>Op_less</tt> || less than || <tt><</tt>
|-
|-
| <tt>OP_LESSEQUAL</tt> || less than or equal || <tt><=</tt>
| <tt>Op_lessequal</tt> || less than or equal || <tt><=</tt>
|-
|-
| <tt>OP_GREATER</tt> || greater than || <tt>&gt;</tt>
| <tt>Op_greater</tt> || greater than || <tt>&gt;</tt>
|-
|-
| <tt>OP_GREATEREQUAL</tt> || greater than or equal || <tt>&gt;=</tt>
| <tt>Op_greaterequal</tt> || greater than or equal || <tt>&gt;=</tt>
|-
|-
| <tt>OP_EQUAL</tt> || equal || <tt>==</tt>
| <tt>Op_equal</tt> || equal || <tt>==</tt>
|-
|-
| <tt>OP_NOTEQUAL</tt> || not equal || <tt>&#33;=</tt>
| <tt>Op_notequal</tt> || not equal || <tt>&#33;=</tt>
|-
|-
| <tt>OP_NOT</tt> || unary not || <tt>&#33;</tt>
| <tt>Op_not</tt> || unary not || <tt>&#33;</tt>
|-
|-
| <tt>OP_ASSIGN</tt> || assignment || <tt>=</tt>
| <tt>Op_assign</tt> || assignment || <tt>=</tt>
|-
|-
| <tt>OP_AND</tt> || logical and || <tt>&amp;&amp;</tt>
| <tt>Op_and</tt> || logical and || <tt>&amp;&amp;</tt>
|-
|-
| <tt>OP_OR</tt> || logical or || <tt>&brvbar;&brvbar;</tt>
| <tt>Op_or</tt> || logical or || <tt>&brvbar;&brvbar;</tt>
|}
|}


* The <code>-</code> token should always be interpreted as <tt>OP_SUBTRACT</tt> by the lexer. Turning some <tt>OP_SUBTRACT</tt> into <tt>OP_NEGATE</tt> will be the job of the syntax analyzer, which is not part of this task.
* The <code>-</code> token should always be interpreted as <tt>Op_subtract</tt> by the lexer. Turning some <tt>Op_subtract</tt> into <tt>Op_negate</tt> will be the job of the syntax analyzer, which is not part of this task.


;Symbols
;Symbols
Line 63: Line 63:
! Name !! Common name !! Character
! Name !! Common name !! Character
|-
|-
| <tt>LEFTPAREN</tt> || left parenthesis || <tt>(</tt>
| <tt>LeftParen</tt> || left parenthesis || <tt>(</tt>
|-
|-
| <tt>RIGHTPAREN</tt> || right parenthesis || <tt>)</tt>
| <tt>RightParen</tt> || right parenthesis || <tt>)</tt>
|-
|-
| <tt>LEFTBRACE</tt> || left brace || <tt>{</tt>
| <tt>LeftBrace</tt> || left brace || <tt>{</tt>
|-
|-
| <tt>RIGHTBRACE</tt> || right brace || <tt>}</tt>
| <tt>RightBrace</tt> || right brace || <tt>}</tt>
|-
|-
| <tt>SEMICOLON</tt> || semi-colon || <tt>&#59;</tt>
| <tt>Semicolon</tt> || semi-colon || <tt>&#59;</tt>
|-
|-
| <tt>COMMA</tt> || comma || <tt>,</tt>
| <tt>Comma</tt> || comma || <tt>,</tt>
|}
|}


Line 82: Line 82:
! Name || Character sequence
! Name || Character sequence
|-
|-
| <tt>KEYWORD_IF</tt> || <tt>if</tt>
| <tt>Keyword_if</tt> || <tt>if</tt>
|-
|-
| <tt>KEYWORD_ELSE</tt> || <tt>else</tt>
| <tt>Keyword_else</tt> || <tt>else</tt>
|-
|-
| <tt>KEYWORD_WHILE</tt> || <tt>while</tt>
| <tt>Keyword_while</tt> || <tt>while</tt>
|-
|-
| <tt>KEYWORD_PRINT</tt> || <tt>print</tt>
| <tt>Keyword_print</tt> || <tt>print</tt>
|-
|-
| <tt>KEYWORD_PUTC</tt> || <tt>putc</tt>
| <tt>Keyword_putc</tt> || <tt>putc</tt>
|}
|}


Line 105: Line 105:
! Value
! Value
|-
|-
| <tt>IDENTIFIER</tt>
| <tt>Identifier</tt>
| identifier
| identifier
| one or more letter/number/underscore characters, but not starting with a number
| one or more letter/number/underscore characters, but not starting with a number
Line 111: Line 111:
| as is
| as is
|-
|-
| <tt>INTEGER</tt>
| <tt>Integer</tt>
| integer literal
| integer literal
| one or more digits
| one or more digits
Line 117: Line 117:
| as is, interpreted as a number
| as is, interpreted as a number
|-
|-
| <tt>INTEGER</tt>
| <tt>Integer</tt>
| char literal
| char literal
| exactly one character (anything except newline or single quote) or one of the allowed escape sequences, enclosed by single quotes
| exactly one character (anything except newline or single quote) or one of the allowed escape sequences, enclosed by single quotes
Line 123: Line 123:
| the ASCII code point number of the character, e.g. 65 for <code>'A'</code> and 10 for <code>'\n'</code>
| the ASCII code point number of the character, e.g. 65 for <code>'A'</code> and 10 for <code>'\n'</code>
|-
|-
| <tt>STRING</tt>
| <tt>String</tt>
| string literal
| string literal
| zero or more characters (anything except newline or double quote), enclosed by double quotes
| zero or more characters (anything except newline or double quote), enclosed by double quotes
Line 142: Line 142:
! Name || Location
! Name || Location
|-
|-
| <tt>END_OF_INPUT</tt> || when the end of the input stream is reached
| <tt>End_of_input</tt> || when the end of the input stream is reached
|}
|}


Line 167: Line 167:


<pre>
<pre>
END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_MOD OP_ADD OP_SUBTRACT
End_of_input Op_multiply Op_divide Op_mod Op_add Op_subtract
Op_negate Op_not Op_less Op_lessequal Op_greater Op_greaterequal
OP_NEGATE OP_LESS OP_LESSEQUAL OP_GREATER OP_GREATEREQUAL OP_EQUAL
OP_NOTEQUAL OP_ASSIGN OP_AND OP_OR KEYWORD_IF KEYWORD_ELSE
Op_equal Op_notequal Op_assign Op_and Op_or Keyword_if
Keyword_else Keyword_while Keyword_print Keyword_putc LeftParen RightParen
KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE
RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER STRING
LeftBrace RightBrace Semicolon Comma Identifier Integer
String
</pre>
</pre>


Line 181: Line 182:
# the column number where the token starts
# the column number where the token starts
# the token name
# the token name
# the token value (only for <tt>IDENTIFIER</tt>, <tt>INTEGER</tt>, and <tt>STRING</tt> tokens)
# the token value (only for <tt>Identifier</tt>, <tt>Integer</tt>, and <tt>String</tt> tokens)


{{task heading|Diagnostics}}
{{task heading|Diagnostics}}
Line 226: Line 227:
| style="vertical-align:top" |
| style="vertical-align:top" |
<b><pre>
<b><pre>
4 1 KEYWORD_PRINT
4 1 Keyword_print
4 6 LEFTPAREN
4 6 LeftParen
4 7 STRING "Hello, World!\n"
4 7 String "Hello, World!\n"
4 24 RIGHTPAREN
4 24 RightParen
4 25 SEMICOLON
4 25 Semicolon
5 1 END_OF_INPUT
5 1 End_of_input
</pre></b>
</pre></b>


Line 244: Line 245:
| style="vertical-align:top" |
| style="vertical-align:top" |
<b><pre>
<b><pre>
4 1 IDENTIFIER phoenix_number
4 1 Identifier phoenix_number
4 16 OP_ASSIGN
4 16 Op_assign
4 18 INTEGER 142857
4 18 Integer 142857
4 24 SEMICOLON
4 24 Semicolon
5 1 KEYWORD_PRINT
5 1 Keyword_print
5 6 LEFTPAREN
5 6 LeftParen
5 7 IDENTIFIER phoenix_number
5 7 Identifier phoenix_number
5 21 COMMA
5 21 Comma
5 23 STRING "\n"
5 23 String "\n"
5 27 RIGHTPAREN
5 27 RightParen
5 28 SEMICOLON
5 28 Semicolon
6 1 END_OF_INPUT
6 1 End_of_input
</pre></b>
</pre></b>


Line 285: Line 286:
| style="vertical-align:top" |
| style="vertical-align:top" |
<b><pre>
<b><pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFTBRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHTBRACE
11 16 RightBrace
11 40 OP_NOTEQUAL
11 40 Op_notequal
12 16 LEFTPAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHTPAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFIER variable_name
19 40 Identifier variable_name
20 26 INTEGER 10
20 26 Integer 10
21 26 INTEGER 92
21 26 Integer 92
22 26 INTEGER 32
22 26 Integer 32
23 1 END_OF_INPUT
23 1 End_of_input
</pre></b>
</pre></b>
|}
|}
Line 529: Line 530:
fprintf(dest_fp, "%5d %5d %.15s",
fprintf(dest_fp, "%5d %5d %.15s",
tok.err_ln, tok.err_col,
tok.err_ln, tok.err_col,
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_MOD OP_ADD "
&"End_of_input Op_multiply Op_divide Op_mod Op_add "
"OP_SUBTRACT OP_NEGATE OP_NOT OP_LESS OP_LESSEQUAL "
"Op_subtract Op_negate Op_not Op_less Op_lessequal "
"OP_GREATER OP_GREATEREQUAL OP_EQUAL OP_NOTEQUAL OP_ASSIGN "
"Op_greater Op_greaterequal Op_equal Op_notequal Op_assign "
"OP_AND OP_OR KEYWORD_IF KEYWORD_ELSE KEYWORD_WHILE "
"Op_and Op_or Keyword_if Keyword_else Keyword_while "
"KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE "
"Keyword_print Keyword_putc LeftParen RightParen LeftBrace "
"RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "
"RightBrace Semicolon Comma Identifier Integer "
"STRING "
"String "
[tok.tok * 16]);
[tok.tok * 16]);
if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n);
if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n);
Line 563: Line 564:
<b>
<b>
<pre>
<pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFTBRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHTBRACE
11 16 RightBrace
11 40 OP_NOTEQUAL
11 40 Op_notequal
12 16 LEFTPAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHTPAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFIER variable_name
19 40 Identifier variable_name
20 26 INTEGER 10
20 26 Integer 10
21 26 INTEGER 92
21 26 Integer 92
22 26 INTEGER 32
22 26 Integer 32
23 1 END_OF_INPUT
23 1 End_of_input
</pre>
</pre>
</b>
</b>
Line 614: Line 615:
tk_Ident, tk_Integer, tk_String
tk_Ident, tk_Integer, tk_String


constant all_syms = {"END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_MOD", "OP_ADD",
constant all_syms = {"End_of_input", "Op_multiply", "Op_divide", "Op_mod", "Op_add",
"OP_SUBTRACT", "OP_NEGATE", "OP_NOT", "OP_LESS", "OP_LESSEQUAL", "OP_GREATER",
"Op_subtract", "Op_negate", "Op_not", "Op_less", "Op_lessequal", "Op_greater",
"OP_GREATEREQUAL", "OP_EQUAL", "OP_NOTEQUAL", "OP_ASSIGN", "OP_AND", "OP_OR",
"Op_greaterequal", "Op_equal", "Op_notequal", "Op_assign", "Op_and", "Op_or",
"KEYWORD_IF", "KEYWORD_ELSE", "KEYWORD_WHILE", "KEYWORD_PRINT", "KEYWORD_PUTC",
"Keyword_if", "Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc",
"LEFTPAREN", "RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA",
"LeftParen", "RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma",
"IDENTIFIER", "INTEGER", "STRING"}
"Identifier", "Integer", "String"}


integer input_file, the_ch = ' ', the_col = 0, the_line = 1
integer input_file, the_ch = ' ', the_col = 0, the_line = 1
Line 827: Line 828:
<b>
<b>
<pre>
<pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFTBRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHTBRACE
11 16 RightBrace
11 40 OP_NOTEQUAL
11 40 Op_notequal
12 16 LEFTPAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHTPAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFIER variable_name
19 40 Identifier variable_name
20 26 INTEGER 10
20 26 Integer 10
21 26 INTEGER 92
21 26 Integer 92
22 26 INTEGER 32
22 26 Integer 32
23 1 END_OF_INPUT
23 1 End_of_input
</pre>
</pre>
</b>
</b>
Line 1,024: Line 1,025:
tok = yylex();
tok = yylex();
printf("%5d %5d %.15s", yylloc.first_line, yylloc.first_col,
printf("%5d %5d %.15s", yylloc.first_line, yylloc.first_col,
&"END_OF_INPUT OP_MULTIPLY OP_DIVIDE OP_MOD OP_ADD "
&"End_of_input Op_multiply Op_divide Op_mod Op_add "
"OP_SUBTRACT OP_NEGATE OP_NOT OP_LESS OP_LESSEQUAL "
"Op_subtract Op_negate Op_not Op_less Op_lessequal "
"OP_GREATER OP_GREATEREQUAL OP_EQUAL OP_NOTEQUAL OP_ASSIGN "
"Op_greater Op_greaterequal Op_equal Op_notequal Op_assign "
"OP_AND OP_OR KEYWORD_IF KEYWORD_ELSE KEYWORD_WHILE "
"Op_and Op_or Keyword_if Keyword_else Keyword_while "
"KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE "
"Keyword_print Keyword_putc LeftParen RightParen LeftBrace "
"RIGHTBRACE SEMICOLON COMMA IDENTIFIER INTEGER "
"RightBrace Semicolon Comma Identifier Integer "
"STRING "
"String "
[tok * 16]);
[tok * 16]);


Line 1,044: Line 1,045:
<b>
<b>
<pre>
<pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFTBRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHTBRACE
11 16 RightBrace
11 40 OP_NOTEQUAL
11 40 Op_notequal
12 16 LEFTPAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHTPAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFIER variable_name
19 40 Identifier variable_name
20 26 INTEGER 10
20 26 Integer 10
21 26 INTEGER 92
21 26 Integer 92
22 26 INTEGER 32
22 26 Integer 32
22 29 END_OF_INPUT
22 29 End_of_input
</pre>
</pre>
</b>
</b>
Line 1,317: Line 1,318:
dim tok_list(tk_eoi to tk_string) as string
dim tok_list(tk_eoi to tk_string) as string


tok_list(tk_EOI ) = "END_OF_INPUT"
tok_list(tk_EOI ) = "End_of_input"
tok_list(tk_Mul ) = "OP_MULTIPLY"
tok_list(tk_Mul ) = "Op_multiply"
tok_list(tk_Div ) = "OP_DIVIDE"
tok_list(tk_Div ) = "Op_divide"
tok_list(tk_Mod ) = "OP_MOD"
tok_list(tk_Mod ) = "Op_mod"
tok_list(tk_Add ) = "OP_ADD"
tok_list(tk_Add ) = "Op_add"
tok_list(tk_Sub ) = "OP_SUBTRACT"
tok_list(tk_Sub ) = "Op_subtract"
tok_list(tk_Negate ) = "OP_NEGATE"
tok_list(tk_Negate ) = "Op_negate"
tok_list(tk_Not ) = "OP_NOT"
tok_list(tk_Not ) = "Op_not"
tok_list(tk_Lss ) = "OP_LESS"
tok_list(tk_Lss ) = "Op_less"
tok_list(tk_Leq ) = "OP_LESSEQUAL"
tok_list(tk_Leq ) = "Op_lessequal"
tok_list(tk_Gtr ) = "OP_GREATER"
tok_list(tk_Gtr ) = "Op_greater"
tok_list(tk_Geq ) = "OP_GREATEREQUAL"
tok_list(tk_Geq ) = "Op_greaterequal"
tok_list(tk_Eq ) = "OP_EQUAL"
tok_list(tk_Eq ) = "Op_equal"
tok_list(tk_Neq ) = "OP_NOTEQUAL"
tok_list(tk_Neq ) = "Op_notequal"
tok_list(tk_Assign ) = "OP_ASSIGN"
tok_list(tk_Assign ) = "Op_assign"
tok_list(tk_And ) = "OP_AND"
tok_list(tk_And ) = "Op_and"
tok_list(tk_Or ) = "OP_OR"
tok_list(tk_Or ) = "Op_or"
tok_list(tk_If ) = "KEYWORD_IF"
tok_list(tk_If ) = "Keyword_if"
tok_list(tk_Else ) = "KEYWORD_ELSE"
tok_list(tk_Else ) = "Keyword_else"
tok_list(tk_While ) = "KEYWORD_WHILE"
tok_list(tk_While ) = "Keyword_while"
tok_list(tk_Print ) = "KEYWORD_PRINT"
tok_list(tk_Print ) = "Keyword_print"
tok_list(tk_Putc ) = "KEYWORD_PUTC"
tok_list(tk_Putc ) = "Keyword_putc"
tok_list(tk_Lparen ) = "LEFTPAREN"
tok_list(tk_Lparen ) = "LeftParen"
tok_list(tk_Rparen ) = "RIGHTPAREN"
tok_list(tk_Rparen ) = "RightParen"
tok_list(tk_Lbrace ) = "LEFTBRACE"
tok_list(tk_Lbrace ) = "LeftBrace"
tok_list(tk_Rbrace ) = "RIGHTBRACE"
tok_list(tk_Rbrace ) = "RightBrace"
tok_list(tk_Semi ) = "SEMICOLON"
tok_list(tk_Semi ) = "Semicolon"
tok_list(tk_Comma ) = "COMMA"
tok_list(tk_Comma ) = "Comma"
tok_list(tk_Ident ) = "IDENTIFIER"
tok_list(tk_Ident ) = "Identifier"
tok_list(tk_Integer) = "INTEGER"
tok_list(tk_Integer) = "Integer"
tok_list(tk_String ) = "STRING"
tok_list(tk_String ) = "String"


do
do
Line 1,369: Line 1,370:
<b>
<b>
<pre>
<pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFTBRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHTBRACE
11 16 RightBrace
11 40 OP_NOTEQUAL
11 40 Op_notequal
12 16 LEFTPAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHTPAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFIER variable_name
19 40 Identifier variable_name
20 26 INTEGER 10
20 26 Integer 10
21 26 INTEGER 92
21 26 Integer 92
22 26 INTEGER 32
22 26 Integer 32
22 30 END_OF_INPUT
22 30 End_of_input
</pre>
</pre>
</b>
</b>
Line 1,420: Line 1,421:
# Name | Format | Value #
# Name | Format | Value #
# -------------- |----------------------|-------------#
# -------------- |----------------------|-------------#
['OP_MULTIPLY' , '*' , ],
['Op_multiply' , '*' , ],
['OP_DIVIDE' , '/' , ],
['Op_divide' , '/' , ],
['OP_MOD' , '%' , ],
['Op_mod' , '%' , ],
['OP_ADD' , '+' , ],
['Op_add' , '+' , ],
['OP_SUBTRACT' , '-' , ],
['Op_subtract' , '-' , ],
['OP_LESSEQUAL' , '<=' , ],
['Op_lessequal' , '<=' , ],
['OP_LESS' , '<' , ],
['Op_less' , '<' , ],
['OP_GREATEREQUAL', '>=' , ],
['Op_greaterequal', '>=' , ],
['OP_GREATER' , '>' , ],
['Op_greater' , '>' , ],
['OP_EQUAL' , '==' , ],
['Op_equal' , '==' , ],
['OP_ASSIGN' , '=' , ],
['Op_assign' , '=' , ],
['OP_NOT' , '!' , ],
['Op_not' , '!' , ],
['OP_NOTEQUAL' , '!=' , ],
['Op_notequal' , '!=' , ],
['OP_AND' , '&&' , ],
['Op_and' , '&&' , ],
['OP_OR' , '||' , ],
['Op_or' , '||' , ],
['KEYWORD_ELSE' , qr/else\b/ , ],
['Keyword_else' , qr/else\b/ , ],
['KEYWORD_IF' , qr/if\b/ , ],
['Keyword_if' , qr/if\b/ , ],
['KEYWORD_WHILE' , qr/while\b/ , ],
['Keyword_while' , qr/while\b/ , ],
['KEYWORD_PRINT' , qr/print\b/ , ],
['Keyword_print' , qr/print\b/ , ],
['KEYWORD_PUTC' , qr/putc\b/ , ],
['Keyword_putc' , qr/putc\b/ , ],


['LEFTPAREN' , '(' , ],
['LeftParen' , '(' , ],
['RIGHTPAREN' , ')' , ],
['RightParen' , ')' , ],
['LEFTBRACE' , '{' , ],
['LeftBrace' , '{' , ],
['RIGHTBRACE' , '}' , ],
['RightBrace' , '}' , ],
['SEMICOLON' , ';' , ],
['Semicolon' , ';' , ],
['COMMA' , ',' , ],
['Comma' , ',' , ],


['IDENTIFIER' , qr/[_a-z][_a-z0-9]*/i, \&raw ],
['Identifier' , qr/[_a-z][_a-z0-9]*/i, \&raw ],
['INTEGER' , qr/[0-9]+\b/ , \&raw ],
['Integer' , qr/[0-9]+\b/ , \&raw ],
['INTEGER' , qr/'([^']*)(')?/ , \&char_val ],
['Integer' , qr/'([^']*)(')?/ , \&char_val ],
['STRING' , qr/"([^"]*)(")?/ , \&string_raw],
['String' , qr/"([^"]*)(")?/ , \&string_raw],


['END_OF_INPUT' , qr/$/ , ],
['End_of_input' , qr/$/ , ],
);
);


Line 1,551: Line 1,552:
{{out|case=test case 3}}
{{out|case=test case 3}}
<pre>
<pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFTBRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHTBRACE
11 16 RightBrace
11 40 OP_NOT
11 40 Op_not
11 41 OP_ASSIGN
11 41 Op_assign
12 16 LEFTPAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHTPAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFIER variable_name
19 40 Identifier variable_name
20 26 INTEGER 10
20 26 Integer 10
21 26 INTEGER 92
21 26 Integer 92
22 26 INTEGER 32
22 26 Integer 32
23 1 END_OF_INPUT
23 1 End_of_input
</pre>
</pre>


Line 1,616: Line 1,617:


proto token operator {*}
proto token operator {*}
token operator:sym<*> { '*' { make 'OP_MULTIPLY' } }
token operator:sym<*> { '*' { make 'Op_multiply' } }
token operator:sym</> { '/'<!before '*'> { make 'OP_DIVIDE' } }
token operator:sym</> { '/'<!before '*'> { make 'Op_divide' } }
token operator:sym<%> { '%' { make 'OP_MOD' } }
token operator:sym<%> { '%' { make 'Op_mod' } }
token operator:sym<+> { '+' { make 'OP_ADD' } }
token operator:sym<+> { '+' { make 'Op_add' } }
token operator:sym<-> { '-' { make 'OP_SUBTRACT' } }
token operator:sym<-> { '-' { make 'Op_subtract' } }
token operator:sym('<='){ '<=' { make 'OP_LESSEQUAL' } }
token operator:sym('<='){ '<=' { make 'Op_lessequal' } }
token operator:sym('<') { '<' { make 'OP_LESS' } }
token operator:sym('<') { '<' { make 'Op_less' } }
token operator:sym('>='){ '>=' { make 'OP_GREATEREQUAL'} }
token operator:sym('>='){ '>=' { make 'Op_greaterequal'} }
token operator:sym('>') { '>' { make 'OP_GREATER' } }
token operator:sym('>') { '>' { make 'Op_greater' } }
token operator:sym<==> { '==' { make 'OP_EQUAL' } }
token operator:sym<==> { '==' { make 'Op_equal' } }
token operator:sym<!=> { '!=' { make 'OP_NOTEQUAL' } }
token operator:sym<!=> { '!=' { make 'Op_notequal' } }
token operator:sym<!> { '!' { make 'OP_NOT' } }
token operator:sym<!> { '!' { make 'Op_not' } }
token operator:sym<=> { '=' { make 'OP_ASSIGN' } }
token operator:sym<=> { '=' { make 'Op_assign' } }
token operator:sym<&&> { '&&' { make 'OP_AND' } }
token operator:sym<&&> { '&&' { make 'Op_and' } }
token operator:sym<||> { '||' { make 'OP_OR' } }
token operator:sym<||> { '||' { make 'Op_or' } }


proto token keyword {*}
proto token keyword {*}
token keyword:sym<if> { 'if' { make 'KEYWORD_IF' } }
token keyword:sym<if> { 'if' { make 'Keyword_if' } }
token keyword:sym<else> { 'else' { make 'KEYWORD_ELSE' } }
token keyword:sym<else> { 'else' { make 'Keyword_else' } }
token keyword:sym<putc> { 'putc' { make 'KEYWORD_PUTC' } }
token keyword:sym<putc> { 'putc' { make 'Keyword_putc' } }
token keyword:sym<while> { 'while' { make 'KEYWORD_WHILE' } }
token keyword:sym<while> { 'while' { make 'Keyword_while' } }
token keyword:sym<print> { 'print' { make 'KEYWORD_PRINT' } }
token keyword:sym<print> { 'print' { make 'Keyword_print' } }


proto token symbol {*}
proto token symbol {*}
token symbol:sym<(> { '(' { make 'LEFT_PAREN' } }
token symbol:sym<(> { '(' { make 'LeftParen' } }
token symbol:sym<)> { ')' { make 'RIGHT_PAREN' } }
token symbol:sym<)> { ')' { make 'RightParen' } }
token symbol:sym<{> { '{' { make 'LEFT_BRACE' } }
token symbol:sym<{> { '{' { make 'LeftBrace' } }
token symbol:sym<}> { '}' { make 'RIGHT_BRACE' } }
token symbol:sym<}> { '}' { make 'RightBrace' } }
token symbol:sym<;> { ';' { make 'SEMICOLON' } }
token symbol:sym<;> { ';' { make 'Semicolon' } }
token symbol:sym<,> { ',' { make 'COMMA' } }
token symbol:sym<,> { ',' { make 'Comma' } }


token identifier { <[_A..Za..z]><[_A..Za..z0..9]>* { make 'IDENTIFER ' ~ $/ } }
token identifier { <[_A..Za..z]><[_A..Za..z0..9]>* { make 'IDENTIFER ' ~ $/ } }
token integer { <[0..9]>+ { make 'INTEGER ' ~ $/ } }
token integer { <[0..9]>+ { make 'Integer ' ~ $/ } }


token char {
token char {
Line 1,658: Line 1,659:
'"' <-["\n]>* '"' #'
'"' <-["\n]>* '"' #'
{
{
make 'STRING ' ~ $/;
make 'String ' ~ $/;
note 'Error: Unknown escape sequence.' and exit if (~$/ ~~ m:r/ <!after <[\\]>>[\\<-[n\\]>]<!before <[\\]>> /);
note 'Error: Unknown escape sequence.' and exit if (~$/ ~~ m:r/ <!after <[\\]>>[\\<-[n\\]>]<!before <[\\]>> /);
}
}
}
}


token eoi { $ { make 'END_OF_INPUT' } }
token eoi { $ { make 'End_of_input' } }


token error {
token error {
Line 1,676: Line 1,677:
sub parse_it ( $c_code ) {
sub parse_it ( $c_code ) {
my $l;
my $l;
my @pos = gather for $c_code.lines».chars.kv -> $line, $v {
my @pos = gather for $c_code.lines>>.chars.kv -> $line, $v {
take [ $line + 1, $_ ] for 1 .. ($v+1); # v+1 for newline
take [ $line + 1, $_ ] for 1 .. ($v+1); # v+1 for newline
$l = $line+2;
$l = $line+2;
Line 1,692: Line 1,693:
{{out|case=test case 3}}
{{out|case=test case 3}}
<pre>
<pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFT_BRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHT_BRACE
11 16 RightBrace
11 40 OP_NOTEQUAL
11 40 Op_notequal
12 16 LEFT_PAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHT_PAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFER variable_name
19 40 IDENTIFER variable_name
20 26 CHAR_LITERAL 10
20 26 CHAR_LITERAL 10
21 26 CHAR_LITERAL 92
21 26 CHAR_LITERAL 92
22 26 CHAR_LITERAL 32
22 26 CHAR_LITERAL 32
23 1 END_OF_INPUT
23 1 End_of_input
</pre>
</pre>


Line 1,740: Line 1,741:
tk_Integer, tk_String = range(31)
tk_Integer, tk_String = range(31)


all_syms = ["END_OF_INPUT", "OP_MULTIPLY", "OP_DIVIDE", "OP_MOD", "OP_ADD", "OP_SUBTRACT",
all_syms = ["End_of_input", "Op_multiply", "Op_divide", "Op_mod", "Op_add", "Op_subtract",
"OP_NEGATE", "OP_NOT", "OP_LESS", "OP_LESSEQUAL", "OP_GREATER", "OP_GREATEREQUAL",
"Op_negate", "Op_not", "Op_less", "Op_lessequal", "Op_greater", "Op_greaterequal",
"OP_EQUAL", "OP_NOTEQUAL", "OP_ASSIGN", "OP_AND", "OP_OR", "KEYWORD_IF",
"Op_equal", "Op_notequal", "Op_assign", "Op_and", "Op_or", "Keyword_if",
"KEYWORD_ELSE", "KEYWORD_WHILE", "KEYWORD_PRINT", "KEYWORD_PUTC", "LEFTPAREN",
"Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc", "LeftParen",
"RIGHTPAREN", "LEFTBRACE", "RIGHTBRACE", "SEMICOLON", "COMMA", "IDENTIFIER",
"RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma", "Identifier",
"INTEGER", "STRING"]
"Integer", "String"]


# single character only symbols
# single character only symbols
Line 1,906: Line 1,907:
<b>
<b>
<pre>
<pre>
5 16 KEYWORD_PRINT
5 16 Keyword_print
5 40 OP_SUBTRACT
5 40 Op_subtract
6 16 KEYWORD_PUTC
6 16 Keyword_putc
6 40 OP_LESS
6 40 Op_less
7 16 KEYWORD_IF
7 16 Keyword_if
7 40 OP_GREATER
7 40 Op_greater
8 16 KEYWORD_ELSE
8 16 Keyword_else
8 40 OP_LESSEQUAL
8 40 Op_lessequal
9 16 KEYWORD_WHILE
9 16 Keyword_while
9 40 OP_GREATEREQUAL
9 40 Op_greaterequal
10 16 LEFTBRACE
10 16 LeftBrace
10 40 OP_EQUAL
10 40 Op_equal
11 16 RIGHTBRACE
11 16 RightBrace
11 40 OP_NOTEQUAL
11 40 Op_notequal
12 16 LEFTPAREN
12 16 LeftParen
12 40 OP_AND
12 40 Op_and
13 16 RIGHTPAREN
13 16 RightParen
13 40 OP_OR
13 40 Op_or
14 16 OP_SUBTRACT
14 16 Op_subtract
14 40 SEMICOLON
14 40 Semicolon
15 16 OP_NOT
15 16 Op_not
15 40 COMMA
15 40 Comma
16 16 OP_MULTIPLY
16 16 Op_multiply
16 40 OP_ASSIGN
16 40 Op_assign
17 16 OP_DIVIDE
17 16 Op_divide
17 40 INTEGER 42
17 40 Integer 42
18 16 OP_MOD
18 16 Op_mod
18 40 STRING "String literal"
18 40 String "String literal"
19 16 OP_ADD
19 16 Op_add
19 40 IDENTIFIER variable_name
19 40 Identifier variable_name
20 26 INTEGER 10
20 26 Integer 10
21 26 INTEGER 92
21 26 Integer 92
22 26 INTEGER 32
22 26 Integer 32
23 1 END_OF_INPUT
23 1 End_of_input
</pre>
</pre>
</b>
</b>