Compiler/lexical analyzer: Difference between revisions
Content added Content deleted
(Added "else", and ">", "==", "!", "||" operators) |
(CHANGE TOKEN NAMES FROM ALL CAPS to something a little easier on old eyes.) |
||
Line 22: | Line 22: | ||
! Name !! Common name !! Character sequence |
! Name !! Common name !! Character sequence |
||
|- |
|- |
||
| <tt> |
| <tt>Op_multiply</tt> || multiply || <tt>*</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_divide</tt> || divide || <tt>/</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_mod</tt> || mod || <tt>%</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_add</tt> || plus || <tt>+</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_subtract</tt> || minus || <tt>-</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_negate</tt> || unary minus || <tt>-</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_less</tt> || less than || <tt><</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_lessequal</tt> || less than or equal || <tt><=</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_greater</tt> || greater than || <tt>></tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_greaterequal</tt> || greater than or equal || <tt>>=</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_equal</tt> || equal || <tt>==</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_notequal</tt> || not equal || <tt>!=</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_not</tt> || unary not || <tt>!</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_assign</tt> || assignment || <tt>=</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_and</tt> || logical and || <tt>&&</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Op_or</tt> || logical or || <tt>¦¦</tt> |
||
|} |
|} |
||
* The <code>-</code> token should always be interpreted as <tt> |
* The <code>-</code> token should always be interpreted as <tt>Op_subtract</tt> by the lexer. Turning some <tt>Op_subtract</tt> into <tt>Op_negate</tt> will be the job of the syntax analyzer, which is not part of this task. |
||
;Symbols |
;Symbols |
||
Line 63: | Line 63: | ||
! Name !! Common name !! Character |
! Name !! Common name !! Character |
||
|- |
|- |
||
| <tt> |
| <tt>LeftParen</tt> || left parenthesis || <tt>(</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>RightParen</tt> || right parenthesis || <tt>)</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>LeftBrace</tt> || left brace || <tt>{</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>RightBrace</tt> || right brace || <tt>}</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Semicolon</tt> || semi-colon || <tt>;</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Comma</tt> || comma || <tt>,</tt> |
||
|} |
|} |
||
Line 82: | Line 82: | ||
! Name || Character sequence |
! Name || Character sequence |
||
|- |
|- |
||
| <tt> |
| <tt>Keyword_if</tt> || <tt>if</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Keyword_else</tt> || <tt>else</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Keyword_while</tt> || <tt>while</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Keyword_print</tt> || <tt>print</tt> |
||
|- |
|- |
||
| <tt> |
| <tt>Keyword_putc</tt> || <tt>putc</tt> |
||
|} |
|} |
||
Line 105: | Line 105: | ||
! Value |
! Value |
||
|- |
|- |
||
| <tt> |
| <tt>Identifier</tt> |
||
| identifier |
| identifier |
||
| one or more letter/number/underscore characters, but not starting with a number |
| one or more letter/number/underscore characters, but not starting with a number |
||
Line 111: | Line 111: | ||
| as is |
| as is |
||
|- |
|- |
||
| <tt> |
| <tt>Integer</tt> |
||
| integer literal |
| integer literal |
||
| one or more digits |
| one or more digits |
||
Line 117: | Line 117: | ||
| as is, interpreted as a number |
| as is, interpreted as a number |
||
|- |
|- |
||
| <tt> |
| <tt>Integer</tt> |
||
| char literal |
| char literal |
||
| exactly one character (anything except newline or single quote) or one of the allowed escape sequences, enclosed by single quotes |
| exactly one character (anything except newline or single quote) or one of the allowed escape sequences, enclosed by single quotes |
||
Line 123: | Line 123: | ||
| the ASCII code point number of the character, e.g. 65 for <code>'A'</code> and 10 for <code>'\n'</code> |
| the ASCII code point number of the character, e.g. 65 for <code>'A'</code> and 10 for <code>'\n'</code> |
||
|- |
|- |
||
| <tt> |
| <tt>String</tt> |
||
| string literal |
| string literal |
||
| zero or more characters (anything except newline or double quote), enclosed by double quotes |
| zero or more characters (anything except newline or double quote), enclosed by double quotes |
||
Line 142: | Line 142: | ||
! Name || Location |
! Name || Location |
||
|- |
|- |
||
| <tt> |
| <tt>End_of_input</tt> || when the end of the input stream is reached |
||
|} |
|} |
||
Line 167: | Line 167: | ||
<pre> |
<pre> |
||
End_of_input Op_multiply Op_divide Op_mod Op_add Op_subtract |
|||
Op_negate Op_not Op_less Op_lessequal Op_greater Op_greaterequal |
|||
OP_NEGATE OP_LESS OP_LESSEQUAL OP_GREATER OP_GREATEREQUAL OP_EQUAL |
|||
Op_equal Op_notequal Op_assign Op_and Op_or Keyword_if |
|||
Keyword_else Keyword_while Keyword_print Keyword_putc LeftParen RightParen |
|||
KEYWORD_WHILE KEYWORD_PRINT KEYWORD_PUTC LEFTPAREN RIGHTPAREN LEFTBRACE |
|||
LeftBrace RightBrace Semicolon Comma Identifier Integer |
|||
String |
|||
</pre> |
</pre> |
||
Line 181: | Line 182: | ||
# the column number where the token starts |
# the column number where the token starts |
||
# the token name |
# the token name |
||
# the token value (only for <tt> |
# the token value (only for <tt>Identifier</tt>, <tt>Integer</tt>, and <tt>String</tt> tokens) |
||
{{task heading|Diagnostics}} |
{{task heading|Diagnostics}} |
||
Line 226: | Line 227: | ||
| style="vertical-align:top" | |
| style="vertical-align:top" | |
||
<b><pre> |
<b><pre> |
||
4 1 |
4 1 Keyword_print |
||
4 6 |
4 6 LeftParen |
||
4 7 |
4 7 String "Hello, World!\n" |
||
4 24 |
4 24 RightParen |
||
4 25 |
4 25 Semicolon |
||
5 1 |
5 1 End_of_input |
||
</pre></b> |
</pre></b> |
||
Line 244: | Line 245: | ||
| style="vertical-align:top" | |
| style="vertical-align:top" | |
||
<b><pre> |
<b><pre> |
||
4 1 |
4 1 Identifier phoenix_number |
||
4 16 |
4 16 Op_assign |
||
4 18 |
4 18 Integer 142857 |
||
4 24 |
4 24 Semicolon |
||
5 1 |
5 1 Keyword_print |
||
5 6 |
5 6 LeftParen |
||
5 7 |
5 7 Identifier phoenix_number |
||
5 21 |
5 21 Comma |
||
5 23 |
5 23 String "\n" |
||
5 27 |
5 27 RightParen |
||
5 28 |
5 28 Semicolon |
||
6 1 |
6 1 End_of_input |
||
</pre></b> |
</pre></b> |
||
Line 285: | Line 286: | ||
| style="vertical-align:top" | |
| style="vertical-align:top" | |
||
<b><pre> |
<b><pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_notequal |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 |
19 40 Identifier variable_name |
||
20 26 |
20 26 Integer 10 |
||
21 26 |
21 26 Integer 92 |
||
22 26 |
22 26 Integer 32 |
||
23 1 |
23 1 End_of_input |
||
</pre></b> |
</pre></b> |
||
|} |
|} |
||
Line 529: | Line 530: | ||
fprintf(dest_fp, "%5d %5d %.15s", |
fprintf(dest_fp, "%5d %5d %.15s", |
||
tok.err_ln, tok.err_col, |
tok.err_ln, tok.err_col, |
||
&" |
&"End_of_input Op_multiply Op_divide Op_mod Op_add " |
||
" |
"Op_subtract Op_negate Op_not Op_less Op_lessequal " |
||
" |
"Op_greater Op_greaterequal Op_equal Op_notequal Op_assign " |
||
" |
"Op_and Op_or Keyword_if Keyword_else Keyword_while " |
||
" |
"Keyword_print Keyword_putc LeftParen RightParen LeftBrace " |
||
" |
"RightBrace Semicolon Comma Identifier Integer " |
||
" |
"String " |
||
[tok.tok * 16]); |
[tok.tok * 16]); |
||
if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n); |
if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n); |
||
Line 563: | Line 564: | ||
<b> |
<b> |
||
<pre> |
<pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_notequal |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 |
19 40 Identifier variable_name |
||
20 26 |
20 26 Integer 10 |
||
21 26 |
21 26 Integer 92 |
||
22 26 |
22 26 Integer 32 |
||
23 1 |
23 1 End_of_input |
||
</pre> |
</pre> |
||
</b> |
</b> |
||
Line 614: | Line 615: | ||
tk_Ident, tk_Integer, tk_String |
tk_Ident, tk_Integer, tk_String |
||
constant all_syms = {" |
constant all_syms = {"End_of_input", "Op_multiply", "Op_divide", "Op_mod", "Op_add", |
||
" |
"Op_subtract", "Op_negate", "Op_not", "Op_less", "Op_lessequal", "Op_greater", |
||
" |
"Op_greaterequal", "Op_equal", "Op_notequal", "Op_assign", "Op_and", "Op_or", |
||
" |
"Keyword_if", "Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc", |
||
" |
"LeftParen", "RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma", |
||
" |
"Identifier", "Integer", "String"} |
||
integer input_file, the_ch = ' ', the_col = 0, the_line = 1 |
integer input_file, the_ch = ' ', the_col = 0, the_line = 1 |
||
Line 827: | Line 828: | ||
<b> |
<b> |
||
<pre> |
<pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_notequal |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 |
19 40 Identifier variable_name |
||
20 26 |
20 26 Integer 10 |
||
21 26 |
21 26 Integer 92 |
||
22 26 |
22 26 Integer 32 |
||
23 1 |
23 1 End_of_input |
||
</pre> |
</pre> |
||
</b> |
</b> |
||
Line 1,024: | Line 1,025: | ||
tok = yylex(); |
tok = yylex(); |
||
printf("%5d %5d %.15s", yylloc.first_line, yylloc.first_col, |
printf("%5d %5d %.15s", yylloc.first_line, yylloc.first_col, |
||
&" |
&"End_of_input Op_multiply Op_divide Op_mod Op_add " |
||
" |
"Op_subtract Op_negate Op_not Op_less Op_lessequal " |
||
" |
"Op_greater Op_greaterequal Op_equal Op_notequal Op_assign " |
||
" |
"Op_and Op_or Keyword_if Keyword_else Keyword_while " |
||
" |
"Keyword_print Keyword_putc LeftParen RightParen LeftBrace " |
||
" |
"RightBrace Semicolon Comma Identifier Integer " |
||
" |
"String " |
||
[tok * 16]); |
[tok * 16]); |
||
Line 1,044: | Line 1,045: | ||
<b> |
<b> |
||
<pre> |
<pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_notequal |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 |
19 40 Identifier variable_name |
||
20 26 |
20 26 Integer 10 |
||
21 26 |
21 26 Integer 92 |
||
22 26 |
22 26 Integer 32 |
||
22 29 |
22 29 End_of_input |
||
</pre> |
</pre> |
||
</b> |
</b> |
||
Line 1,317: | Line 1,318: | ||
dim tok_list(tk_eoi to tk_string) as string |
dim tok_list(tk_eoi to tk_string) as string |
||
tok_list(tk_EOI ) = " |
tok_list(tk_EOI ) = "End_of_input" |
||
tok_list(tk_Mul ) = " |
tok_list(tk_Mul ) = "Op_multiply" |
||
tok_list(tk_Div ) = " |
tok_list(tk_Div ) = "Op_divide" |
||
tok_list(tk_Mod ) = " |
tok_list(tk_Mod ) = "Op_mod" |
||
tok_list(tk_Add ) = " |
tok_list(tk_Add ) = "Op_add" |
||
tok_list(tk_Sub ) = " |
tok_list(tk_Sub ) = "Op_subtract" |
||
tok_list(tk_Negate ) = " |
tok_list(tk_Negate ) = "Op_negate" |
||
tok_list(tk_Not ) = " |
tok_list(tk_Not ) = "Op_not" |
||
tok_list(tk_Lss ) = " |
tok_list(tk_Lss ) = "Op_less" |
||
tok_list(tk_Leq ) = " |
tok_list(tk_Leq ) = "Op_lessequal" |
||
tok_list(tk_Gtr ) = " |
tok_list(tk_Gtr ) = "Op_greater" |
||
tok_list(tk_Geq ) = " |
tok_list(tk_Geq ) = "Op_greaterequal" |
||
tok_list(tk_Eq ) = " |
tok_list(tk_Eq ) = "Op_equal" |
||
tok_list(tk_Neq ) = " |
tok_list(tk_Neq ) = "Op_notequal" |
||
tok_list(tk_Assign ) = " |
tok_list(tk_Assign ) = "Op_assign" |
||
tok_list(tk_And ) = " |
tok_list(tk_And ) = "Op_and" |
||
tok_list(tk_Or ) = " |
tok_list(tk_Or ) = "Op_or" |
||
tok_list(tk_If ) = " |
tok_list(tk_If ) = "Keyword_if" |
||
tok_list(tk_Else ) = " |
tok_list(tk_Else ) = "Keyword_else" |
||
tok_list(tk_While ) = " |
tok_list(tk_While ) = "Keyword_while" |
||
tok_list(tk_Print ) = " |
tok_list(tk_Print ) = "Keyword_print" |
||
tok_list(tk_Putc ) = " |
tok_list(tk_Putc ) = "Keyword_putc" |
||
tok_list(tk_Lparen ) = " |
tok_list(tk_Lparen ) = "LeftParen" |
||
tok_list(tk_Rparen ) = " |
tok_list(tk_Rparen ) = "RightParen" |
||
tok_list(tk_Lbrace ) = " |
tok_list(tk_Lbrace ) = "LeftBrace" |
||
tok_list(tk_Rbrace ) = " |
tok_list(tk_Rbrace ) = "RightBrace" |
||
tok_list(tk_Semi ) = " |
tok_list(tk_Semi ) = "Semicolon" |
||
tok_list(tk_Comma ) = " |
tok_list(tk_Comma ) = "Comma" |
||
tok_list(tk_Ident ) = " |
tok_list(tk_Ident ) = "Identifier" |
||
tok_list(tk_Integer) = " |
tok_list(tk_Integer) = "Integer" |
||
tok_list(tk_String ) = " |
tok_list(tk_String ) = "String" |
||
do |
do |
||
Line 1,369: | Line 1,370: | ||
<b> |
<b> |
||
<pre> |
<pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_notequal |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 |
19 40 Identifier variable_name |
||
20 26 |
20 26 Integer 10 |
||
21 26 |
21 26 Integer 92 |
||
22 26 |
22 26 Integer 32 |
||
22 30 |
22 30 End_of_input |
||
</pre> |
</pre> |
||
</b> |
</b> |
||
Line 1,420: | Line 1,421: | ||
# Name | Format | Value # |
# Name | Format | Value # |
||
# -------------- |----------------------|-------------# |
# -------------- |----------------------|-------------# |
||
[' |
['Op_multiply' , '*' , ], |
||
[' |
['Op_divide' , '/' , ], |
||
[' |
['Op_mod' , '%' , ], |
||
[' |
['Op_add' , '+' , ], |
||
[' |
['Op_subtract' , '-' , ], |
||
[' |
['Op_lessequal' , '<=' , ], |
||
[' |
['Op_less' , '<' , ], |
||
[' |
['Op_greaterequal', '>=' , ], |
||
[' |
['Op_greater' , '>' , ], |
||
[' |
['Op_equal' , '==' , ], |
||
[' |
['Op_assign' , '=' , ], |
||
[' |
['Op_not' , '!' , ], |
||
[' |
['Op_notequal' , '!=' , ], |
||
[' |
['Op_and' , '&&' , ], |
||
[' |
['Op_or' , '||' , ], |
||
[' |
['Keyword_else' , qr/else\b/ , ], |
||
[' |
['Keyword_if' , qr/if\b/ , ], |
||
[' |
['Keyword_while' , qr/while\b/ , ], |
||
[' |
['Keyword_print' , qr/print\b/ , ], |
||
[' |
['Keyword_putc' , qr/putc\b/ , ], |
||
[' |
['LeftParen' , '(' , ], |
||
[' |
['RightParen' , ')' , ], |
||
[' |
['LeftBrace' , '{' , ], |
||
[' |
['RightBrace' , '}' , ], |
||
[' |
['Semicolon' , ';' , ], |
||
[' |
['Comma' , ',' , ], |
||
[' |
['Identifier' , qr/[_a-z][_a-z0-9]*/i, \&raw ], |
||
[' |
['Integer' , qr/[0-9]+\b/ , \&raw ], |
||
[' |
['Integer' , qr/'([^']*)(')?/ , \&char_val ], |
||
[' |
['String' , qr/"([^"]*)(")?/ , \&string_raw], |
||
[' |
['End_of_input' , qr/$/ , ], |
||
); |
); |
||
Line 1,551: | Line 1,552: | ||
{{out|case=test case 3}} |
{{out|case=test case 3}} |
||
<pre> |
<pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_not |
||
11 41 |
11 41 Op_assign |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 |
19 40 Identifier variable_name |
||
20 26 |
20 26 Integer 10 |
||
21 26 |
21 26 Integer 92 |
||
22 26 |
22 26 Integer 32 |
||
23 1 |
23 1 End_of_input |
||
</pre> |
</pre> |
||
Line 1,616: | Line 1,617: | ||
proto token operator {*} |
proto token operator {*} |
||
token operator:sym<*> { '*' { make ' |
token operator:sym<*> { '*' { make 'Op_multiply' } } |
||
token operator:sym</> { '/'<!before '*'> { make ' |
token operator:sym</> { '/'<!before '*'> { make 'Op_divide' } } |
||
token operator:sym<%> { '%' { make ' |
token operator:sym<%> { '%' { make 'Op_mod' } } |
||
token operator:sym<+> { '+' { make ' |
token operator:sym<+> { '+' { make 'Op_add' } } |
||
token operator:sym<-> { '-' { make ' |
token operator:sym<-> { '-' { make 'Op_subtract' } } |
||
token operator:sym('<='){ '<=' { make ' |
token operator:sym('<='){ '<=' { make 'Op_lessequal' } } |
||
token operator:sym('<') { '<' { make ' |
token operator:sym('<') { '<' { make 'Op_less' } } |
||
token operator:sym('>='){ '>=' { make ' |
token operator:sym('>='){ '>=' { make 'Op_greaterequal'} } |
||
token operator:sym('>') { '>' { make ' |
token operator:sym('>') { '>' { make 'Op_greater' } } |
||
token operator:sym<==> { '==' { make ' |
token operator:sym<==> { '==' { make 'Op_equal' } } |
||
token operator:sym<!=> { '!=' { make ' |
token operator:sym<!=> { '!=' { make 'Op_notequal' } } |
||
token operator:sym<!> { '!' { make ' |
token operator:sym<!> { '!' { make 'Op_not' } } |
||
token operator:sym<=> { '=' { make ' |
token operator:sym<=> { '=' { make 'Op_assign' } } |
||
token operator:sym<&&> { '&&' { make ' |
token operator:sym<&&> { '&&' { make 'Op_and' } } |
||
token operator:sym<||> { '||' { make ' |
token operator:sym<||> { '||' { make 'Op_or' } } |
||
proto token keyword {*} |
proto token keyword {*} |
||
token keyword:sym<if> { 'if' { make ' |
token keyword:sym<if> { 'if' { make 'Keyword_if' } } |
||
token keyword:sym<else> { 'else' { make ' |
token keyword:sym<else> { 'else' { make 'Keyword_else' } } |
||
token keyword:sym<putc> { 'putc' { make ' |
token keyword:sym<putc> { 'putc' { make 'Keyword_putc' } } |
||
token keyword:sym<while> { 'while' { make ' |
token keyword:sym<while> { 'while' { make 'Keyword_while' } } |
||
token keyword:sym<print> { 'print' { make ' |
token keyword:sym<print> { 'print' { make 'Keyword_print' } } |
||
proto token symbol {*} |
proto token symbol {*} |
||
token symbol:sym<(> { '(' { make ' |
token symbol:sym<(> { '(' { make 'LeftParen' } } |
||
token symbol:sym<)> { ')' { make ' |
token symbol:sym<)> { ')' { make 'RightParen' } } |
||
token symbol:sym<{> { '{' { make ' |
token symbol:sym<{> { '{' { make 'LeftBrace' } } |
||
token symbol:sym<}> { '}' { make ' |
token symbol:sym<}> { '}' { make 'RightBrace' } } |
||
token symbol:sym<;> { ';' { make ' |
token symbol:sym<;> { ';' { make 'Semicolon' } } |
||
token symbol:sym<,> { ',' { make ' |
token symbol:sym<,> { ',' { make 'Comma' } } |
||
token identifier { <[_A..Za..z]><[_A..Za..z0..9]>* { make 'IDENTIFER ' ~ $/ } } |
token identifier { <[_A..Za..z]><[_A..Za..z0..9]>* { make 'IDENTIFER ' ~ $/ } } |
||
token integer { <[0..9]>+ { make ' |
token integer { <[0..9]>+ { make 'Integer ' ~ $/ } } |
||
token char { |
token char { |
||
Line 1,658: | Line 1,659: | ||
'"' <-["\n]>* '"' #' |
'"' <-["\n]>* '"' #' |
||
{ |
{ |
||
make ' |
make 'String ' ~ $/; |
||
note 'Error: Unknown escape sequence.' and exit if (~$/ ~~ m:r/ <!after <[\\]>>[\\<-[n\\]>]<!before <[\\]>> /); |
note 'Error: Unknown escape sequence.' and exit if (~$/ ~~ m:r/ <!after <[\\]>>[\\<-[n\\]>]<!before <[\\]>> /); |
||
} |
} |
||
} |
} |
||
token eoi { $ { make ' |
token eoi { $ { make 'End_of_input' } } |
||
token error { |
token error { |
||
Line 1,676: | Line 1,677: | ||
sub parse_it ( $c_code ) { |
sub parse_it ( $c_code ) { |
||
my $l; |
my $l; |
||
my @pos = gather for $c_code.lines |
my @pos = gather for $c_code.lines>>.chars.kv -> $line, $v { |
||
take [ $line + 1, $_ ] for 1 .. ($v+1); # v+1 for newline |
take [ $line + 1, $_ ] for 1 .. ($v+1); # v+1 for newline |
||
$l = $line+2; |
$l = $line+2; |
||
Line 1,692: | Line 1,693: | ||
{{out|case=test case 3}} |
{{out|case=test case 3}} |
||
<pre> |
<pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_notequal |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 IDENTIFER variable_name |
19 40 IDENTIFER variable_name |
||
20 26 CHAR_LITERAL 10 |
20 26 CHAR_LITERAL 10 |
||
21 26 CHAR_LITERAL 92 |
21 26 CHAR_LITERAL 92 |
||
22 26 CHAR_LITERAL 32 |
22 26 CHAR_LITERAL 32 |
||
23 1 |
23 1 End_of_input |
||
</pre> |
</pre> |
||
Line 1,740: | Line 1,741: | ||
tk_Integer, tk_String = range(31) |
tk_Integer, tk_String = range(31) |
||
all_syms = [" |
all_syms = ["End_of_input", "Op_multiply", "Op_divide", "Op_mod", "Op_add", "Op_subtract", |
||
" |
"Op_negate", "Op_not", "Op_less", "Op_lessequal", "Op_greater", "Op_greaterequal", |
||
" |
"Op_equal", "Op_notequal", "Op_assign", "Op_and", "Op_or", "Keyword_if", |
||
" |
"Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc", "LeftParen", |
||
" |
"RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma", "Identifier", |
||
" |
"Integer", "String"] |
||
# single character only symbols |
# single character only symbols |
||
Line 1,906: | Line 1,907: | ||
<b> |
<b> |
||
<pre> |
<pre> |
||
5 16 |
5 16 Keyword_print |
||
5 40 |
5 40 Op_subtract |
||
6 16 |
6 16 Keyword_putc |
||
6 40 |
6 40 Op_less |
||
7 16 |
7 16 Keyword_if |
||
7 40 |
7 40 Op_greater |
||
8 16 |
8 16 Keyword_else |
||
8 40 |
8 40 Op_lessequal |
||
9 16 |
9 16 Keyword_while |
||
9 40 |
9 40 Op_greaterequal |
||
10 16 |
10 16 LeftBrace |
||
10 40 |
10 40 Op_equal |
||
11 16 |
11 16 RightBrace |
||
11 40 |
11 40 Op_notequal |
||
12 16 |
12 16 LeftParen |
||
12 40 |
12 40 Op_and |
||
13 16 |
13 16 RightParen |
||
13 40 |
13 40 Op_or |
||
14 16 |
14 16 Op_subtract |
||
14 40 |
14 40 Semicolon |
||
15 16 |
15 16 Op_not |
||
15 40 |
15 40 Comma |
||
16 16 |
16 16 Op_multiply |
||
16 40 |
16 40 Op_assign |
||
17 16 |
17 16 Op_divide |
||
17 40 |
17 40 Integer 42 |
||
18 16 |
18 16 Op_mod |
||
18 40 |
18 40 String "String literal" |
||
19 16 |
19 16 Op_add |
||
19 40 |
19 40 Identifier variable_name |
||
20 26 |
20 26 Integer 10 |
||
21 26 |
21 26 Integer 92 |
||
22 26 |
22 26 Integer 32 |
||
23 1 |
23 1 End_of_input |
||
</pre> |
</pre> |
||
</b> |
</b> |