-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscanner.go
95 lines (87 loc) · 1.51 KB
/
scanner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main
import (
"os"
"unicode"
"utf8"
)
type tokType int
const (
empty tokType = iota
nonterm
term
newline
enddef
alternate
begindef
pcent
begincode
code
endcode
other
)
type tok struct {
text string
ttype tokType
}
type scanner struct {
index int
content []uint8
}
func (self *scanner) remainder() []byte {
return self.content[self.index:]
}
func (self *scanner) nextWord() (word tok, err os.Error) {
if self.index >= len(self.content) {
err = os.NewError("EOF")
return
}
for self.index < len(self.content) {
r, l := utf8.DecodeRune(self.content[self.index:])
if !unicode.IsSpace(r) || r == '\n' {
break
}
self.index += l
}
j, ttype, inchar, incode := self.index, other, false, 0
for self.index < len(self.content) {
r, l := utf8.DecodeRune(self.content[self.index:])
if r == '\'' {
inchar = !inchar
}
if self.index == j {
switch {
case unicode.IsUpper(r):
ttype = nonterm
case r == '\n':
self.index++
ttype = newline
break
case r == ':':
ttype = begindef
case r == ';':
ttype = enddef
case r == '|':
ttype = alternate
case r == '{' && memorizeTerms:
incode++
ttype = code
default:
ttype = term
}
} else if incode > 0 && r == '{' {
incode++
} else if incode > 0 && r == '}' {
incode--
}
if incode == 0 && !inchar && unicode.IsSpace(r) {
break
}
self.index += l
}
token := string(self.content[j:self.index])
if ttype == newline {
token = ""
}
word = tok{token, ttype}
return
}