Compiler/lexical analyzer: Difference between revisions
Content added Content deleted
(Added Go) |
|||
Line 3,265: | Line 3,265: | ||
22 30 End_of_input</pre> |
22 30 End_of_input</pre> |
||
</b> |
</b> |
||
=={{header|Go}}== |
|||
{{trans|FreeBASIC}} |
|||
<lang go>package main |
|||
import ( |
|||
"bufio" |
|||
"fmt" |
|||
"log" |
|||
"os" |
|||
) |
|||
type TokenType int |
|||
const ( |
|||
tkEOI TokenType = iota |
|||
tkMul |
|||
tkDiv |
|||
tkMod |
|||
tkAdd |
|||
tkSub |
|||
tkNegate |
|||
tkNot |
|||
tkLss |
|||
tkLeq |
|||
tkGtr |
|||
tkGeq |
|||
tkEq |
|||
tkNeq |
|||
tkAssign |
|||
tkAnd |
|||
tkOr |
|||
tkIf |
|||
tkElse |
|||
tkWhile |
|||
tkPrint |
|||
tkPutc |
|||
tkLparen |
|||
tkRparen |
|||
tkLbrace |
|||
tkRbrace |
|||
tkSemi |
|||
tkComma |
|||
tkIdent |
|||
tkInteger |
|||
tkString |
|||
) |
|||
type Symbol struct { |
|||
name string |
|||
tok TokenType |
|||
} |
|||
// symbol table |
|||
var symtab []Symbol |
|||
var scanner *bufio.Scanner |
|||
var ( |
|||
curLine = "" |
|||
curCh byte |
|||
lineNum = 0 |
|||
colNum = 0 |
|||
) |
|||
const etx byte = 4 // used to signify EOI |
|||
func isDigit(ch byte) bool { |
|||
return ch >= '0' && ch <= '9' |
|||
} |
|||
func isAlnum(ch byte) bool { |
|||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || isDigit(ch) |
|||
} |
|||
func errorMsg(eline, ecol int, msg string) { |
|||
log.Fatalf("(%d:%d) %s", eline, ecol, msg) |
|||
} |
|||
// add an identifier to the symbol table |
|||
func install(name string, tok TokenType) { |
|||
sym := Symbol{name, tok} |
|||
symtab = append(symtab, sym) |
|||
} |
|||
// search for an identifier in the symbol table |
|||
func lookup(name string) int { |
|||
for i := 0; i < len(symtab); i++ { |
|||
if symtab[i].name == name { |
|||
return i |
|||
} |
|||
} |
|||
return -1 |
|||
} |
|||
// read the next line of input from the source file |
|||
func nextLine() { |
|||
if scanner.Scan() { |
|||
curLine = scanner.Text() |
|||
lineNum++ |
|||
colNum = 0 |
|||
if curLine == "" { // skip blank lines |
|||
nextLine() |
|||
} |
|||
} else { |
|||
err := scanner.Err() |
|||
if err == nil { // EOF |
|||
curCh = etx |
|||
curLine = "" |
|||
lineNum++ |
|||
colNum = 1 |
|||
} else { |
|||
log.Fatal(err) |
|||
} |
|||
} |
|||
} |
|||
// get the next char |
|||
func nextChar() { |
|||
if colNum >= len(curLine) { |
|||
nextLine() |
|||
} |
|||
if colNum < len(curLine) { |
|||
curCh = curLine[colNum] |
|||
colNum++ |
|||
} |
|||
} |
|||
func follow(eline, ecol int, expect byte, ifyes, ifno TokenType) TokenType { |
|||
if curCh == expect { |
|||
nextChar() |
|||
return ifyes |
|||
} |
|||
if ifno == tkEOI { |
|||
errorMsg(eline, ecol, "follow unrecognized character: "+string(curCh)) |
|||
} |
|||
return ifno |
|||
} |
|||
func gettok() (eline, ecol int, tok TokenType, v string) { |
|||
// skip whitespace |
|||
for curCh == ' ' || curCh == '\t' || curCh == '\n' { |
|||
nextChar() |
|||
} |
|||
eline = lineNum |
|||
ecol = colNum |
|||
switch curCh { |
|||
case etx: |
|||
tok = tkEOI |
|||
return |
|||
case '{': |
|||
tok = tkLbrace |
|||
nextChar() |
|||
return |
|||
case '}': |
|||
tok = tkRbrace |
|||
nextChar() |
|||
return |
|||
case '(': |
|||
tok = tkLparen |
|||
nextChar() |
|||
return |
|||
case ')': |
|||
tok = tkRparen |
|||
nextChar() |
|||
return |
|||
case '+': |
|||
tok = tkAdd |
|||
nextChar() |
|||
return |
|||
case '-': |
|||
tok = tkSub |
|||
nextChar() |
|||
return |
|||
case '*': |
|||
tok = tkMul |
|||
nextChar() |
|||
return |
|||
case '%': |
|||
tok = tkMod |
|||
nextChar() |
|||
return |
|||
case ';': |
|||
tok = tkSemi |
|||
nextChar() |
|||
return |
|||
case ',': |
|||
tok = tkComma |
|||
nextChar() |
|||
return |
|||
case '/': // div or comment |
|||
nextChar() |
|||
if curCh != '*' { |
|||
tok = tkDiv |
|||
return |
|||
} |
|||
// skip comments |
|||
nextChar() |
|||
for { |
|||
if curCh == '*' { |
|||
nextChar() |
|||
if curCh == '/' { |
|||
nextChar() |
|||
eline, ecol, tok, v = gettok() |
|||
return |
|||
} |
|||
} else if curCh == etx { |
|||
errorMsg(eline, ecol, "EOF in comment") |
|||
} else { |
|||
nextChar() |
|||
} |
|||
} |
|||
case '\'': // single char literals |
|||
nextChar() |
|||
v = fmt.Sprintf("%d", curCh) |
|||
if curCh == '\'' { |
|||
errorMsg(eline, ecol, "Empty character constant") |
|||
} |
|||
if curCh == '\\' { |
|||
nextChar() |
|||
if curCh == 'n' { |
|||
v = "10" |
|||
} else if curCh == '\\' { |
|||
v = "92" |
|||
} else { |
|||
errorMsg(eline, ecol, "unknown escape sequence: "+string(curCh)) |
|||
} |
|||
} |
|||
nextChar() |
|||
if curCh != '\'' { |
|||
errorMsg(eline, ecol, "multi-character constant") |
|||
} |
|||
nextChar() |
|||
tok = tkInteger |
|||
return |
|||
case '<': |
|||
nextChar() |
|||
tok = follow(eline, ecol, '=', tkLeq, tkLss) |
|||
return |
|||
case '>': |
|||
nextChar() |
|||
tok = follow(eline, ecol, '=', tkGeq, tkGtr) |
|||
return |
|||
case '!': |
|||
nextChar() |
|||
tok = follow(eline, ecol, '=', tkNeq, tkNot) |
|||
return |
|||
case '=': |
|||
nextChar() |
|||
tok = follow(eline, ecol, '=', tkEq, tkAssign) |
|||
return |
|||
case '&': |
|||
nextChar() |
|||
tok = follow(eline, ecol, '&', tkAnd, tkEOI) |
|||
return |
|||
case '|': |
|||
nextChar() |
|||
tok = follow(eline, ecol, '|', tkOr, tkEOI) |
|||
return |
|||
case '"': // string |
|||
v = string(curCh) |
|||
nextChar() |
|||
for curCh != '"' { |
|||
if curCh == '\n' { |
|||
errorMsg(eline, ecol, "EOL in string") |
|||
} |
|||
if curCh == etx { |
|||
errorMsg(eline, ecol, "EOF in string") |
|||
} |
|||
v += string(curCh) |
|||
nextChar() |
|||
} |
|||
v += string(curCh) |
|||
nextChar() |
|||
tok = tkString |
|||
return |
|||
default: // integers or identifiers |
|||
isNumber := isDigit(curCh) |
|||
v = "" |
|||
for isAlnum(curCh) || curCh == '_' { |
|||
if !isDigit(curCh) { |
|||
isNumber = false |
|||
} |
|||
v += string(curCh) |
|||
nextChar() |
|||
} |
|||
if len(v) == 0 { |
|||
errorMsg(eline, ecol, "unknown character: "+string(curCh)) |
|||
} |
|||
if isDigit(v[0]) { |
|||
if !isNumber { |
|||
errorMsg(eline, ecol, "invalid number: "+string(curCh)) |
|||
} |
|||
tok = tkInteger |
|||
return |
|||
} |
|||
index := lookup(v) |
|||
if index == -1 { |
|||
tok = tkIdent |
|||
} else { |
|||
tok = symtab[index].tok |
|||
} |
|||
return |
|||
} |
|||
} |
|||
func initLex() { |
|||
install("else", tkElse) |
|||
install("if", tkIf) |
|||
install("print", tkPrint) |
|||
install("putc", tkPutc) |
|||
install("while", tkWhile) |
|||
nextChar() |
|||
} |
|||
func process() { |
|||
tokMap := make(map[TokenType]string) |
|||
tokMap[tkEOI] = "End_of_input" |
|||
tokMap[tkMul] = "Op_multiply" |
|||
tokMap[tkDiv] = "Op_divide" |
|||
tokMap[tkMod] = "Op_mod" |
|||
tokMap[tkAdd] = "Op_add" |
|||
tokMap[tkSub] = "Op_subtract" |
|||
tokMap[tkNegate] = "Op_negate" |
|||
tokMap[tkNot] = "Op_not" |
|||
tokMap[tkLss] = "Op_less" |
|||
tokMap[tkLeq] = "Op_lessequal" |
|||
tokMap[tkGtr] = "Op_greater" |
|||
tokMap[tkGeq] = "Op_greaterequal" |
|||
tokMap[tkEq] = "Op_equal" |
|||
tokMap[tkNeq] = "Op_notequal" |
|||
tokMap[tkAssign] = "Op_assign" |
|||
tokMap[tkAnd] = "Op_and" |
|||
tokMap[tkOr] = "Op_or" |
|||
tokMap[tkIf] = "Keyword_if" |
|||
tokMap[tkElse] = "Keyword_else" |
|||
tokMap[tkWhile] = "Keyword_while" |
|||
tokMap[tkPrint] = "Keyword_print" |
|||
tokMap[tkPutc] = "Keyword_putc" |
|||
tokMap[tkLparen] = "LeftParen" |
|||
tokMap[tkRparen] = "RightParen" |
|||
tokMap[tkLbrace] = "LeftBrace" |
|||
tokMap[tkRbrace] = "RightBrace" |
|||
tokMap[tkSemi] = "Semicolon" |
|||
tokMap[tkComma] = "Comma" |
|||
tokMap[tkIdent] = "Identifier" |
|||
tokMap[tkInteger] = "Integer" |
|||
tokMap[tkString] = "String" |
|||
for { |
|||
eline, ecol, tok, v := gettok() |
|||
fmt.Printf("%5d %5d %-16s", eline, ecol, tokMap[tok]) |
|||
if tok == tkInteger || tok == tkIdent || tok == tkString { |
|||
fmt.Println(v) |
|||
} else { |
|||
fmt.Println() |
|||
} |
|||
if tok == tkEOI { |
|||
return |
|||
} |
|||
} |
|||
} |
|||
func check(err error) { |
|||
if err != nil { |
|||
log.Fatal(err) |
|||
} |
|||
} |
|||
func main() { |
|||
if len(os.Args) < 2 { |
|||
fmt.Println("Filename required") |
|||
return |
|||
} |
|||
f, err := os.Open(os.Args[1]) |
|||
check(err) |
|||
defer f.Close() |
|||
scanner = bufio.NewScanner(f) |
|||
initLex() |
|||
process() |
|||
}</lang> |
|||
{{out}} |
|||
Test Case 3: |
|||
<pre> |
|||
5 16 Keyword_print |
|||
5 40 Op_subtract |
|||
6 16 Keyword_putc |
|||
6 40 Op_less |
|||
7 16 Keyword_if |
|||
7 40 Op_greater |
|||
8 16 Keyword_else |
|||
8 40 Op_lessequal |
|||
9 16 Keyword_while |
|||
9 40 Op_greaterequal |
|||
10 16 LeftBrace |
|||
10 40 Op_equal |
|||
11 16 RightBrace |
|||
11 40 Op_notequal |
|||
12 16 LeftParen |
|||
12 40 Op_and |
|||
13 16 RightParen |
|||
13 40 Op_or |
|||
14 16 Op_subtract |
|||
14 40 Semicolon |
|||
15 16 Op_not |
|||
15 40 Comma |
|||
16 16 Op_multiply |
|||
16 40 Op_assign |
|||
17 16 Op_divide |
|||
17 40 Integer 42 |
|||
18 16 Op_mod |
|||
18 40 String "String literal" |
|||
19 16 Op_add |
|||
19 40 Identifier variable_name |
|||
20 26 Integer 10 |
|||
21 26 Integer 92 |
|||
22 26 Integer 32 |
|||
23 1 End_of_input |
|||
</pre> |
|||
=={{header|Perl}}== |
=={{header|Perl}}== |