@@ -4,116 +4,118 @@ import (
4
4
"fmt"
5
5
"runtime"
6
6
"testing"
7
+
8
+ "github.com/vcaesar/tt"
7
9
)
8
10
9
11
var (
10
12
prodSeg = Segmenter {}
11
13
)
12
14
13
15
func TestGetVer (t * testing.T ) {
14
- fmt .Println (runtime .Version ())
16
+ fmt .Println ("go version: " , runtime .Version ())
15
17
ver := GetVersion ()
16
- expect (t , version , ver )
18
+ tt . Expect (t , version , ver )
17
19
}
18
20
19
21
func TestSplit (t * testing.T ) {
20
- expect (t , "中/国/有/十/三/亿/人/口/" ,
22
+ tt . Expect (t , "中/国/有/十/三/亿/人/口/" ,
21
23
bytesToString (splitTextToWords ([]byte (
22
24
"中国有十三亿人口" ))))
23
25
24
- expect (t , "github/ /is/ /a/ /web/-/based/ /hosting/ /service/,/ /for/ /software/ /development/ /projects/./" ,
26
+ tt . Expect (t , "github/ /is/ /a/ /web/-/based/ /hosting/ /service/,/ /for/ /software/ /development/ /projects/./" ,
25
27
bytesToString (splitTextToWords ([]byte (
26
28
"GitHub is a web-based hosting service, for software development projects." ))))
27
29
28
- expect (t , "中/国/雅/虎/yahoo/!/ /china/致/力/于/,/领/先/的/公/益/民/生/门/户/网/站/。/" ,
30
+ tt . Expect (t , "中/国/雅/虎/yahoo/!/ /china/致/力/于/,/领/先/的/公/益/民/生/门/户/网/站/。/" ,
29
31
bytesToString (splitTextToWords ([]byte (
30
32
"中国雅虎Yahoo! China致力于,领先的公益民生门户网站。" ))))
31
33
32
- expect (t , "こ/ん/に/ち/は/" , bytesToString (splitTextToWords ([]byte ("こんにちは" ))))
34
+ tt . Expect (t , "こ/ん/に/ち/は/" , bytesToString (splitTextToWords ([]byte ("こんにちは" ))))
33
35
34
- expect (t , "안/녕/하/세/요/" , bytesToString (splitTextToWords ([]byte ("안녕하세요" ))))
36
+ tt . Expect (t , "안/녕/하/세/요/" , bytesToString (splitTextToWords ([]byte ("안녕하세요" ))))
35
37
36
- expect (t , "Я/ /тоже/ /рада/ /Вас/ /видеть/" , bytesToString (splitTextToWords ([]byte ("Я тоже рада Вас видеть" ))))
38
+ tt . Expect (t , "Я/ /тоже/ /рада/ /Вас/ /видеть/" , bytesToString (splitTextToWords ([]byte ("Я тоже рада Вас видеть" ))))
37
39
38
- expect (t , "¿/cómo/ /van/ /las/ /cosas/" , bytesToString (splitTextToWords ([]byte ("¿Cómo van las cosas" ))))
40
+ tt . Expect (t , "¿/cómo/ /van/ /las/ /cosas/" , bytesToString (splitTextToWords ([]byte ("¿Cómo van las cosas" ))))
39
41
40
- expect (t , "wie/ /geht/ /es/ /ihnen/" , bytesToString (splitTextToWords ([]byte ("Wie geht es Ihnen" ))))
42
+ tt . Expect (t , "wie/ /geht/ /es/ /ihnen/" , bytesToString (splitTextToWords ([]byte ("Wie geht es Ihnen" ))))
41
43
42
- expect (t , "je/ /suis/ /enchanté/ /de/ /cette/ /pièce/" ,
44
+ tt . Expect (t , "je/ /suis/ /enchanté/ /de/ /cette/ /pièce/" ,
43
45
bytesToString (splitTextToWords ([]byte ("Je suis enchanté de cette pièce" ))))
44
46
}
45
47
46
48
func TestSegment (t * testing.T ) {
47
49
var seg Segmenter
48
50
seg .LoadDict ("testdata/test_dict1.txt,testdata/test_dict2.txt" )
49
51
// seg.LoadDict("testdata/test_dict1.txt", "testdata/test_dict2.txt")
50
- expect (t , "12" , seg .dict .NumTokens ())
51
- // expect (t, "5", seg.dict.NumTokens())
52
+ tt . Expect (t , "12" , seg .dict .NumTokens ())
53
+ // tt.Expect (t, "5", seg.dict.NumTokens())
52
54
segments := seg .Segment ([]byte ("中国有十三亿人口" ))
53
- expect (t , "中国/ 有/p3 十三亿/ 人口/p12 " , ToString (segments , false ))
54
- // expect (t, "中国/ 有/x 十三亿/ 人口/p12 ", ToString(segments, false))
55
- expect (t , "4" , len (segments ))
56
- expect (t , "0" , segments [0 ].start )
57
- expect (t , "6" , segments [0 ].end )
58
- expect (t , "6" , segments [1 ].start )
59
- expect (t , "9" , segments [1 ].end )
60
- expect (t , "9" , segments [2 ].start )
61
- expect (t , "18" , segments [2 ].end )
62
- expect (t , "18" , segments [3 ].start )
63
- expect (t , "24" , segments [3 ].end )
55
+ tt . Expect (t , "中国/ 有/p3 十三亿/ 人口/p12 " , ToString (segments , false ))
56
+ // tt.Expect (t, "中国/ 有/x 十三亿/ 人口/p12 ", ToString(segments, false))
57
+ tt . Expect (t , "4" , len (segments ))
58
+ tt . Expect (t , "0" , segments [0 ].start )
59
+ tt . Expect (t , "6" , segments [0 ].end )
60
+ tt . Expect (t , "6" , segments [1 ].start )
61
+ tt . Expect (t , "9" , segments [1 ].end )
62
+ tt . Expect (t , "9" , segments [2 ].start )
63
+ tt . Expect (t , "18" , segments [2 ].end )
64
+ tt . Expect (t , "18" , segments [3 ].start )
65
+ tt . Expect (t , "24" , segments [3 ].end )
64
66
}
65
67
66
68
func TestSegmentS (t * testing.T ) {
67
69
var seg Segmenter
68
70
seg .LoadDict ("testdata/test_dict.txt" )
69
71
70
72
dict := seg .Dictionary ()
71
- expect (t , "4" , dict .maxTokenLen )
72
- expect (t , "2103" , dict .totalFrequency )
73
+ tt . Expect (t , "4" , dict .maxTokenLen )
74
+ tt . Expect (t , "2103" , dict .totalFrequency )
73
75
74
- expect (t , "19" , seg .dict .NumTokens ())
76
+ tt . Expect (t , "19" , seg .dict .NumTokens ())
75
77
text1 := []byte ("深圳地王大厦" )
76
78
segments := seg .Segment ([]byte (text1 ))
77
- expect (t , "深圳/n 地王大厦/n " , ToString (segments , false ))
79
+ tt . Expect (t , "深圳/n 地王大厦/n " , ToString (segments , false ))
78
80
79
81
segs := seg .ModeSegment ([]byte (text1 ), true )
80
- expect (t , "深圳/n 地王大厦/n " , ToString (segs , false ))
82
+ tt . Expect (t , "深圳/n 地王大厦/n " , ToString (segs , false ))
81
83
82
- expect (t , "2" , len (segments ))
83
- expect (t , "0" , segments [0 ].start )
84
- expect (t , "6" , segments [0 ].end )
85
- expect (t , "6" , segments [1 ].start )
86
- expect (t , "18" , segments [1 ].end )
84
+ tt . Expect (t , "2" , len (segments ))
85
+ tt . Expect (t , "0" , segments [0 ].start )
86
+ tt . Expect (t , "6" , segments [0 ].end )
87
+ tt . Expect (t , "6" , segments [1 ].start )
88
+ tt . Expect (t , "18" , segments [1 ].end )
87
89
88
90
text2 := []byte ("留给真爱你的人" )
89
91
segments2 := seg .Segment ([]byte (text2 ))
90
- expect (t , "留给/v 真爱/nr 你/x 的/x 人/x " , ToString (segments2 , false ))
92
+ tt . Expect (t , "留给/v 真爱/nr 你/x 的/x 人/x " , ToString (segments2 , false ))
91
93
92
- expect (t , "5" , len (segments2 ))
93
- expect (t , "0" , segments2 [0 ].start )
94
- expect (t , "6" , segments2 [0 ].end )
95
- expect (t , "6" , segments2 [1 ].start )
96
- expect (t , "12" , segments2 [1 ].end )
94
+ tt . Expect (t , "5" , len (segments2 ))
95
+ tt . Expect (t , "0" , segments2 [0 ].start )
96
+ tt . Expect (t , "6" , segments2 [0 ].end )
97
+ tt . Expect (t , "6" , segments2 [1 ].start )
98
+ tt . Expect (t , "12" , segments2 [1 ].end )
97
99
}
98
100
99
101
func TestSegmentJp (t * testing.T ) {
100
102
var seg Segmenter
101
103
seg .LoadDict ("data/dict/jp/dict.txt" )
102
104
text2 := []byte ("こんにちは世界" )
103
105
segments := seg .Segment ([]byte (text2 ))
104
- expect (t , "こんにちは/感動詞 世界/名詞 " , ToString (segments , false ))
105
- expect (t , "2" , len (segments ))
106
- expect (t , "こん/名詞 こんにちは/感動詞 世界/名詞 " , ToString (segments , true ))
107
- expect (t , "[こん こんにちは 世界]" , ToSlice (segments , true ))
108
- expect (t , "[こんにちは 世界]" , ToSlice (segments , false ))
109
- expect (t , "2" , len (segments ))
110
- expect (t , "0" , segments [0 ].start )
111
- expect (t , "15" , segments [0 ].end )
106
+ tt . Expect (t , "こんにちは/感動詞 世界/名詞 " , ToString (segments , false ))
107
+ tt . Expect (t , "2" , len (segments ))
108
+ tt . Expect (t , "こん/名詞 こんにちは/感動詞 世界/名詞 " , ToString (segments , true ))
109
+ tt . Expect (t , "[こん こんにちは 世界]" , ToSlice (segments , true ))
110
+ tt . Expect (t , "[こんにちは 世界]" , ToSlice (segments , false ))
111
+ tt . Expect (t , "2" , len (segments ))
112
+ tt . Expect (t , "0" , segments [0 ].start )
113
+ tt . Expect (t , "15" , segments [0 ].end )
112
114
}
113
115
114
116
func TestDictPaths (t * testing.T ) {
115
117
paths := DictPaths ("./dictDir" , "zh,jp" )
116
- expect (t , "2" , len (paths ))
118
+ tt . Expect (t , "2" , len (paths ))
117
119
if paths [0 ] != "dictDir/dict/dictionary.txt" {
118
120
t .Errorf ("what=\" %s\" , got=\" %s\" " , "dictDir/dict/dictionary.txt" , paths [0 ])
119
121
}
@@ -129,75 +131,75 @@ func TestSegmentDicts(t *testing.T) {
129
131
130
132
text1 := []byte ("深圳地王大厦" )
131
133
segments := seg .Segment ([]byte (text1 ))
132
- expect (t , "深圳/ns 地王大厦/n " , ToString (segments , false ))
134
+ tt . Expect (t , "深圳/ns 地王大厦/n " , ToString (segments , false ))
133
135
134
- expect (t , "2" , len (segments ))
135
- expect (t , "0" , segments [0 ].start )
136
- expect (t , "6" , segments [0 ].end )
137
- expect (t , "6" , segments [1 ].start )
138
- expect (t , "18" , segments [1 ].end )
136
+ tt . Expect (t , "2" , len (segments ))
137
+ tt . Expect (t , "0" , segments [0 ].start )
138
+ tt . Expect (t , "6" , segments [0 ].end )
139
+ tt . Expect (t , "6" , segments [1 ].start )
140
+ tt . Expect (t , "18" , segments [1 ].end )
139
141
140
142
text2 := []byte ("こんにちは世界" )
141
143
segments = seg .Segment ([]byte (text2 ))
142
- expect (t , "こんにちは/感動詞 世界/n " , ToString (segments , false ))
143
- expect (t , "2" , len (segments ))
144
- expect (t , "こん/名詞 こんにちは/感動詞 世界/n " , ToString (segments , true ))
145
- expect (t , "2" , len (segments ))
146
- expect (t , "0" , segments [0 ].start )
147
- expect (t , "15" , segments [0 ].end )
144
+ tt . Expect (t , "こんにちは/感動詞 世界/n " , ToString (segments , false ))
145
+ tt . Expect (t , "2" , len (segments ))
146
+ tt . Expect (t , "こん/名詞 こんにちは/感動詞 世界/n " , ToString (segments , true ))
147
+ tt . Expect (t , "2" , len (segments ))
148
+ tt . Expect (t , "0" , segments [0 ].start )
149
+ tt . Expect (t , "15" , segments [0 ].end )
148
150
149
- expect (t , "0" , segments [0 ].Start ())
150
- expect (t , "15" , segments [0 ].End ())
151
+ tt . Expect (t , "0" , segments [0 ].Start ())
152
+ tt . Expect (t , "15" , segments [0 ].End ())
151
153
152
154
token := segments [0 ].Token ()
153
- expect (t , "こんにちは" , token .Text ())
154
- expect (t , "5704" , token .Frequency ())
155
- expect (t , "感動詞" , token .Pos ())
155
+ tt . Expect (t , "こんにちは" , token .Text ())
156
+ tt . Expect (t , "5704" , token .Frequency ())
157
+ tt . Expect (t , "感動詞" , token .Pos ())
156
158
157
159
tseg := token .Segments ()
158
- expect (t , "0" , tseg [0 ].Start ())
159
- expect (t , "6" , tseg [0 ].End ())
160
+ tt . Expect (t , "0" , tseg [0 ].Start ())
161
+ tt . Expect (t , "6" , tseg [0 ].End ())
160
162
}
161
163
162
164
func TestLargeDictionary (t * testing.T ) {
163
165
prodSeg .LoadDict ("data/dict/dictionary.txt" )
164
- expect (t , "中国/ns 人口/n " , ToString (prodSeg .Segment (
166
+ tt . Expect (t , "中国/ns 人口/n " , ToString (prodSeg .Segment (
165
167
[]byte ("中国人口" )), false ))
166
168
167
- expect (t , "中国/ns 人口/n " , ToString (prodSeg .internalSegment (
169
+ tt . Expect (t , "中国/ns 人口/n " , ToString (prodSeg .internalSegment (
168
170
[]byte ("中国人口" ), false ), false ))
169
171
170
- expect (t , "中国/ns 人口/n " , ToString (prodSeg .internalSegment (
172
+ tt . Expect (t , "中国/ns 人口/n " , ToString (prodSeg .internalSegment (
171
173
[]byte ("中国人口" ), true ), false ))
172
174
173
- expect (t , "中华人民共和国/ns 中央人民政府/nt " , ToString (prodSeg .internalSegment (
175
+ tt . Expect (t , "中华人民共和国/ns 中央人民政府/nt " , ToString (prodSeg .internalSegment (
174
176
[]byte ("中华人民共和国中央人民政府" ), true ), false ))
175
177
176
- expect (t , "中华人民共和国中央人民政府/nt " , ToString (prodSeg .internalSegment (
178
+ tt . Expect (t , "中华人民共和国中央人民政府/nt " , ToString (prodSeg .internalSegment (
177
179
[]byte ("中华人民共和国中央人民政府" ), false ), false ))
178
180
179
- expect (t , "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt " , ToString (prodSeg .Segment (
181
+ tt . Expect (t , "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt " , ToString (prodSeg .Segment (
180
182
[]byte ("中华人民共和国中央人民政府" )), true ))
181
183
}
182
184
183
185
// func TestLoadDictionary(t *testing.T) {
184
186
// var seg Segmenter
185
187
// seg.LoadDict()
186
- // expect (t, "中国/ns 人口/n ", ToString(prodSeg.Segment(
188
+ // tt.Expect (t, "中国/ns 人口/n ", ToString(prodSeg.Segment(
187
189
// []byte("中国人口")), false))
188
190
189
- // expect (t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
191
+ // tt.Expect (t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
190
192
// []byte("中国人口"), false), false))
191
193
192
- // expect (t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
194
+ // tt.Expect (t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
193
195
// []byte("中国人口"), true), false))
194
196
195
- // expect (t, "中华人民共和国/ns 中央人民政府/nt ", ToString(prodSeg.internalSegment(
197
+ // tt.Expect (t, "中华人民共和国/ns 中央人民政府/nt ", ToString(prodSeg.internalSegment(
196
198
// []byte("中华人民共和国中央人民政府"), true), false))
197
199
198
- // expect (t, "中华人民共和国中央人民政府/nt ", ToString(prodSeg.internalSegment(
200
+ // tt.Expect (t, "中华人民共和国中央人民政府/nt ", ToString(prodSeg.internalSegment(
199
201
// []byte("中华人民共和国中央人民政府"), false), false))
200
202
201
- // expect (t, "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt ", ToString(prodSeg.Segment(
203
+ // tt.Expect (t, "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt ", ToString(prodSeg.Segment(
202
204
// []byte("中华人民共和国中央人民政府")), true))
203
205
// }
0 commit comments