Skip to content

Commit 0306de4

Browse files
committedApr 3, 2018
update gse test utils
1 parent b378813 commit 0306de4

File tree

2 files changed

+89
-87
lines changed

2 files changed

+89
-87
lines changed
 

‎segmenter_test.go

+82-80
Original file line numberDiff line numberDiff line change
@@ -4,116 +4,118 @@ import (
44
"fmt"
55
"runtime"
66
"testing"
7+
8+
"github.com/vcaesar/tt"
79
)
810

911
var (
1012
prodSeg = Segmenter{}
1113
)
1214

1315
func TestGetVer(t *testing.T) {
14-
fmt.Println(runtime.Version())
16+
fmt.Println("go version: ", runtime.Version())
1517
ver := GetVersion()
16-
expect(t, version, ver)
18+
tt.Expect(t, version, ver)
1719
}
1820

1921
func TestSplit(t *testing.T) {
20-
expect(t, "中/国/有/十/三/亿/人/口/",
22+
tt.Expect(t, "中/国/有/十/三/亿/人/口/",
2123
bytesToString(splitTextToWords([]byte(
2224
"中国有十三亿人口"))))
2325

24-
expect(t, "github/ /is/ /a/ /web/-/based/ /hosting/ /service/,/ /for/ /software/ /development/ /projects/./",
26+
tt.Expect(t, "github/ /is/ /a/ /web/-/based/ /hosting/ /service/,/ /for/ /software/ /development/ /projects/./",
2527
bytesToString(splitTextToWords([]byte(
2628
"GitHub is a web-based hosting service, for software development projects."))))
2729

28-
expect(t, "中/国/雅/虎/yahoo/!/ /china/致/力/于/,/领/先/的/公/益/民/生/门/户/网/站/。/",
30+
tt.Expect(t, "中/国/雅/虎/yahoo/!/ /china/致/力/于/,/领/先/的/公/益/民/生/门/户/网/站/。/",
2931
bytesToString(splitTextToWords([]byte(
3032
"中国雅虎Yahoo! China致力于,领先的公益民生门户网站。"))))
3133

32-
expect(t, "こ/ん/に/ち/は/", bytesToString(splitTextToWords([]byte("こんにちは"))))
34+
tt.Expect(t, "こ/ん/に/ち/は/", bytesToString(splitTextToWords([]byte("こんにちは"))))
3335

34-
expect(t, "안/녕/하/세/요/", bytesToString(splitTextToWords([]byte("안녕하세요"))))
36+
tt.Expect(t, "안/녕/하/세/요/", bytesToString(splitTextToWords([]byte("안녕하세요"))))
3537

36-
expect(t, "Я/ /тоже/ /рада/ /Вас/ /видеть/", bytesToString(splitTextToWords([]byte("Я тоже рада Вас видеть"))))
38+
tt.Expect(t, "Я/ /тоже/ /рада/ /Вас/ /видеть/", bytesToString(splitTextToWords([]byte("Я тоже рада Вас видеть"))))
3739

38-
expect(t, "¿/cómo/ /van/ /las/ /cosas/", bytesToString(splitTextToWords([]byte("¿Cómo van las cosas"))))
40+
tt.Expect(t, "¿/cómo/ /van/ /las/ /cosas/", bytesToString(splitTextToWords([]byte("¿Cómo van las cosas"))))
3941

40-
expect(t, "wie/ /geht/ /es/ /ihnen/", bytesToString(splitTextToWords([]byte("Wie geht es Ihnen"))))
42+
tt.Expect(t, "wie/ /geht/ /es/ /ihnen/", bytesToString(splitTextToWords([]byte("Wie geht es Ihnen"))))
4143

42-
expect(t, "je/ /suis/ /enchanté/ /de/ /cette/ /pièce/",
44+
tt.Expect(t, "je/ /suis/ /enchanté/ /de/ /cette/ /pièce/",
4345
bytesToString(splitTextToWords([]byte("Je suis enchanté de cette pièce"))))
4446
}
4547

4648
func TestSegment(t *testing.T) {
4749
var seg Segmenter
4850
seg.LoadDict("testdata/test_dict1.txt,testdata/test_dict2.txt")
4951
// seg.LoadDict("testdata/test_dict1.txt", "testdata/test_dict2.txt")
50-
expect(t, "12", seg.dict.NumTokens())
51-
// expect(t, "5", seg.dict.NumTokens())
52+
tt.Expect(t, "12", seg.dict.NumTokens())
53+
// tt.Expect(t, "5", seg.dict.NumTokens())
5254
segments := seg.Segment([]byte("中国有十三亿人口"))
53-
expect(t, "中国/ 有/p3 十三亿/ 人口/p12 ", ToString(segments, false))
54-
// expect(t, "中国/ 有/x 十三亿/ 人口/p12 ", ToString(segments, false))
55-
expect(t, "4", len(segments))
56-
expect(t, "0", segments[0].start)
57-
expect(t, "6", segments[0].end)
58-
expect(t, "6", segments[1].start)
59-
expect(t, "9", segments[1].end)
60-
expect(t, "9", segments[2].start)
61-
expect(t, "18", segments[2].end)
62-
expect(t, "18", segments[3].start)
63-
expect(t, "24", segments[3].end)
55+
tt.Expect(t, "中国/ 有/p3 十三亿/ 人口/p12 ", ToString(segments, false))
56+
// tt.Expect(t, "中国/ 有/x 十三亿/ 人口/p12 ", ToString(segments, false))
57+
tt.Expect(t, "4", len(segments))
58+
tt.Expect(t, "0", segments[0].start)
59+
tt.Expect(t, "6", segments[0].end)
60+
tt.Expect(t, "6", segments[1].start)
61+
tt.Expect(t, "9", segments[1].end)
62+
tt.Expect(t, "9", segments[2].start)
63+
tt.Expect(t, "18", segments[2].end)
64+
tt.Expect(t, "18", segments[3].start)
65+
tt.Expect(t, "24", segments[3].end)
6466
}
6567

6668
func TestSegmentS(t *testing.T) {
6769
var seg Segmenter
6870
seg.LoadDict("testdata/test_dict.txt")
6971

7072
dict := seg.Dictionary()
71-
expect(t, "4", dict.maxTokenLen)
72-
expect(t, "2103", dict.totalFrequency)
73+
tt.Expect(t, "4", dict.maxTokenLen)
74+
tt.Expect(t, "2103", dict.totalFrequency)
7375

74-
expect(t, "19", seg.dict.NumTokens())
76+
tt.Expect(t, "19", seg.dict.NumTokens())
7577
text1 := []byte("深圳地王大厦")
7678
segments := seg.Segment([]byte(text1))
77-
expect(t, "深圳/n 地王大厦/n ", ToString(segments, false))
79+
tt.Expect(t, "深圳/n 地王大厦/n ", ToString(segments, false))
7880

7981
segs := seg.ModeSegment([]byte(text1), true)
80-
expect(t, "深圳/n 地王大厦/n ", ToString(segs, false))
82+
tt.Expect(t, "深圳/n 地王大厦/n ", ToString(segs, false))
8183

82-
expect(t, "2", len(segments))
83-
expect(t, "0", segments[0].start)
84-
expect(t, "6", segments[0].end)
85-
expect(t, "6", segments[1].start)
86-
expect(t, "18", segments[1].end)
84+
tt.Expect(t, "2", len(segments))
85+
tt.Expect(t, "0", segments[0].start)
86+
tt.Expect(t, "6", segments[0].end)
87+
tt.Expect(t, "6", segments[1].start)
88+
tt.Expect(t, "18", segments[1].end)
8789

8890
text2 := []byte("留给真爱你的人")
8991
segments2 := seg.Segment([]byte(text2))
90-
expect(t, "留给/v 真爱/nr 你/x 的/x 人/x ", ToString(segments2, false))
92+
tt.Expect(t, "留给/v 真爱/nr 你/x 的/x 人/x ", ToString(segments2, false))
9193

92-
expect(t, "5", len(segments2))
93-
expect(t, "0", segments2[0].start)
94-
expect(t, "6", segments2[0].end)
95-
expect(t, "6", segments2[1].start)
96-
expect(t, "12", segments2[1].end)
94+
tt.Expect(t, "5", len(segments2))
95+
tt.Expect(t, "0", segments2[0].start)
96+
tt.Expect(t, "6", segments2[0].end)
97+
tt.Expect(t, "6", segments2[1].start)
98+
tt.Expect(t, "12", segments2[1].end)
9799
}
98100

99101
func TestSegmentJp(t *testing.T) {
100102
var seg Segmenter
101103
seg.LoadDict("data/dict/jp/dict.txt")
102104
text2 := []byte("こんにちは世界")
103105
segments := seg.Segment([]byte(text2))
104-
expect(t, "こんにちは/感動詞 世界/名詞 ", ToString(segments, false))
105-
expect(t, "2", len(segments))
106-
expect(t, "こん/名詞 こんにちは/感動詞 世界/名詞 ", ToString(segments, true))
107-
expect(t, "[こん こんにちは 世界]", ToSlice(segments, true))
108-
expect(t, "[こんにちは 世界]", ToSlice(segments, false))
109-
expect(t, "2", len(segments))
110-
expect(t, "0", segments[0].start)
111-
expect(t, "15", segments[0].end)
106+
tt.Expect(t, "こんにちは/感動詞 世界/名詞 ", ToString(segments, false))
107+
tt.Expect(t, "2", len(segments))
108+
tt.Expect(t, "こん/名詞 こんにちは/感動詞 世界/名詞 ", ToString(segments, true))
109+
tt.Expect(t, "[こん こんにちは 世界]", ToSlice(segments, true))
110+
tt.Expect(t, "[こんにちは 世界]", ToSlice(segments, false))
111+
tt.Expect(t, "2", len(segments))
112+
tt.Expect(t, "0", segments[0].start)
113+
tt.Expect(t, "15", segments[0].end)
112114
}
113115

114116
func TestDictPaths(t *testing.T) {
115117
paths := DictPaths("./dictDir", "zh,jp")
116-
expect(t, "2", len(paths))
118+
tt.Expect(t, "2", len(paths))
117119
if paths[0] != "dictDir/dict/dictionary.txt" {
118120
t.Errorf("what=\"%s\", got=\"%s\"", "dictDir/dict/dictionary.txt", paths[0])
119121
}
@@ -129,75 +131,75 @@ func TestSegmentDicts(t *testing.T) {
129131

130132
text1 := []byte("深圳地王大厦")
131133
segments := seg.Segment([]byte(text1))
132-
expect(t, "深圳/ns 地王大厦/n ", ToString(segments, false))
134+
tt.Expect(t, "深圳/ns 地王大厦/n ", ToString(segments, false))
133135

134-
expect(t, "2", len(segments))
135-
expect(t, "0", segments[0].start)
136-
expect(t, "6", segments[0].end)
137-
expect(t, "6", segments[1].start)
138-
expect(t, "18", segments[1].end)
136+
tt.Expect(t, "2", len(segments))
137+
tt.Expect(t, "0", segments[0].start)
138+
tt.Expect(t, "6", segments[0].end)
139+
tt.Expect(t, "6", segments[1].start)
140+
tt.Expect(t, "18", segments[1].end)
139141

140142
text2 := []byte("こんにちは世界")
141143
segments = seg.Segment([]byte(text2))
142-
expect(t, "こんにちは/感動詞 世界/n ", ToString(segments, false))
143-
expect(t, "2", len(segments))
144-
expect(t, "こん/名詞 こんにちは/感動詞 世界/n ", ToString(segments, true))
145-
expect(t, "2", len(segments))
146-
expect(t, "0", segments[0].start)
147-
expect(t, "15", segments[0].end)
144+
tt.Expect(t, "こんにちは/感動詞 世界/n ", ToString(segments, false))
145+
tt.Expect(t, "2", len(segments))
146+
tt.Expect(t, "こん/名詞 こんにちは/感動詞 世界/n ", ToString(segments, true))
147+
tt.Expect(t, "2", len(segments))
148+
tt.Expect(t, "0", segments[0].start)
149+
tt.Expect(t, "15", segments[0].end)
148150

149-
expect(t, "0", segments[0].Start())
150-
expect(t, "15", segments[0].End())
151+
tt.Expect(t, "0", segments[0].Start())
152+
tt.Expect(t, "15", segments[0].End())
151153

152154
token := segments[0].Token()
153-
expect(t, "こんにちは", token.Text())
154-
expect(t, "5704", token.Frequency())
155-
expect(t, "感動詞", token.Pos())
155+
tt.Expect(t, "こんにちは", token.Text())
156+
tt.Expect(t, "5704", token.Frequency())
157+
tt.Expect(t, "感動詞", token.Pos())
156158

157159
tseg := token.Segments()
158-
expect(t, "0", tseg[0].Start())
159-
expect(t, "6", tseg[0].End())
160+
tt.Expect(t, "0", tseg[0].Start())
161+
tt.Expect(t, "6", tseg[0].End())
160162
}
161163

162164
func TestLargeDictionary(t *testing.T) {
163165
prodSeg.LoadDict("data/dict/dictionary.txt")
164-
expect(t, "中国/ns 人口/n ", ToString(prodSeg.Segment(
166+
tt.Expect(t, "中国/ns 人口/n ", ToString(prodSeg.Segment(
165167
[]byte("中国人口")), false))
166168

167-
expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
169+
tt.Expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
168170
[]byte("中国人口"), false), false))
169171

170-
expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
172+
tt.Expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
171173
[]byte("中国人口"), true), false))
172174

173-
expect(t, "中华人民共和国/ns 中央人民政府/nt ", ToString(prodSeg.internalSegment(
175+
tt.Expect(t, "中华人民共和国/ns 中央人民政府/nt ", ToString(prodSeg.internalSegment(
174176
[]byte("中华人民共和国中央人民政府"), true), false))
175177

176-
expect(t, "中华人民共和国中央人民政府/nt ", ToString(prodSeg.internalSegment(
178+
tt.Expect(t, "中华人民共和国中央人民政府/nt ", ToString(prodSeg.internalSegment(
177179
[]byte("中华人民共和国中央人民政府"), false), false))
178180

179-
expect(t, "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt ", ToString(prodSeg.Segment(
181+
tt.Expect(t, "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt ", ToString(prodSeg.Segment(
180182
[]byte("中华人民共和国中央人民政府")), true))
181183
}
182184

183185
// func TestLoadDictionary(t *testing.T) {
184186
// var seg Segmenter
185187
// seg.LoadDict()
186-
// expect(t, "中国/ns 人口/n ", ToString(prodSeg.Segment(
188+
// tt.Expect(t, "中国/ns 人口/n ", ToString(prodSeg.Segment(
187189
// []byte("中国人口")), false))
188190

189-
// expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
191+
// tt.Expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
190192
// []byte("中国人口"), false), false))
191193

192-
// expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
194+
// tt.Expect(t, "中国/ns 人口/n ", ToString(prodSeg.internalSegment(
193195
// []byte("中国人口"), true), false))
194196

195-
// expect(t, "中华人民共和国/ns 中央人民政府/nt ", ToString(prodSeg.internalSegment(
197+
// tt.Expect(t, "中华人民共和国/ns 中央人民政府/nt ", ToString(prodSeg.internalSegment(
196198
// []byte("中华人民共和国中央人民政府"), true), false))
197199

198-
// expect(t, "中华人民共和国中央人民政府/nt ", ToString(prodSeg.internalSegment(
200+
// tt.Expect(t, "中华人民共和国中央人民政府/nt ", ToString(prodSeg.internalSegment(
199201
// []byte("中华人民共和国中央人民政府"), false), false))
200202

201-
// expect(t, "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt ", ToString(prodSeg.Segment(
203+
// tt.Expect(t, "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt ", ToString(prodSeg.Segment(
202204
// []byte("中华人民共和国中央人民政府")), true))
203205
// }

‎test_utils.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,6 @@ import (
55
"testing"
66
)
77

8-
func expect(t *testing.T, expect string, actual interface{}) {
9-
actualString := fmt.Sprint(actual)
10-
if expect != actualString {
11-
t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString)
12-
}
13-
}
14-
158
func printTokens(tokens []*Token, numTokens int) (output string) {
169
for iToken := 0; iToken < numTokens; iToken++ {
1710
for _, word := range tokens[iToken].text {
@@ -36,3 +29,10 @@ func bytesToString(bytes []Text) (output string) {
3629
}
3730
return
3831
}
32+
33+
func expect(t *testing.T, expect string, actual interface{}) {
34+
actualString := fmt.Sprint(actual)
35+
if expect != actualString {
36+
t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString)
37+
}
38+
}

0 commit comments

Comments
 (0)
Please sign in to comment.