-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtriego.go
471 lines (410 loc) · 10.2 KB
/
triego.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
// This package exposes a simple Trie implementation
package triego
import (
"github.com/alediaferia/stackgo"
"strings"
)
const (
k_DEFAULT_ALLOC_SIZE = 10
k_WHITESPACE = " "
)
type Trie struct {
IsWord bool
Parent *Trie
chars []rune
Children []*Trie
isRoot bool
depth int
data interface{}
}
type TrieNode Trie
type TriePtr *Trie
type PrefixInfo struct {
Prefix string
IsWord bool
Depth int
// SharedLength gives
// information about how
// much length is shared with
// the previous prefix,
// aka, how many characters
// of the previous prefix are shared
// with the current one
SharedLength int
}
// This call back is used by EachPrefix function
// and called for every prefix in the radix tree.
// Returning true for the skip_subtree value
// makes the algorithm skip the entire subtree that
// is about to explore. This feature
// can enable very fast tree iterations despite the
// DFS nature of the traversal.
type PrefixIteratorCallback func(PrefixInfo) (skip_subtree, halt bool)
// Initializes a new radix tree
func NewTrie() (t *Trie) {
t = new(Trie)
t.IsWord = false
t.Parent = nil
t.chars = make([]rune, 0, k_DEFAULT_ALLOC_SIZE)
t.isRoot = true
t.Children = make([]*Trie, 0)
t.depth = 0
t.data = nil
return
}
// Returns true if this radix tree node is root
func (t *Trie) IsRoot() bool {
return t.isRoot
}
func (t *TrieNode) IsRoot() bool {
return t.isRoot
}
// Returns the depth of the
// node within the whole radix tree
// it belongs to
func (t *TrieNode) Depth() int {
return t.depth
}
// Appends a word to the trie
// The algorithm follows the
// BFS traversal principles implemented
// iteratively. Given suffix is treated
// as a full word.
func (t *Trie) AppendWord(phrase string) {
words := strings.Split(phrase, k_WHITESPACE)
for _, w := range words {
if len(w) != 0 {
t.append_radix([]rune(w), phrase) // we are inserting the whole 'word' for each word part
}
}
}
func (t *Trie) AppendWords(words ...string) {
for _, w := range words {
t.AppendWord(w)
}
}
func (t *Trie) increase_depth() {
last_depth := t.depth
q := new_queue()
q.enqueue(t)
for !q.is_empty() {
n := q.dequeue()
if n.depth > last_depth {
last_depth = n.depth
}
n.depth++
for _, c := range n.Children {
q.enqueue(c)
}
}
}
func (t *Trie) delete_child(name string) {
l := len(t.Children)
for i := 0; i < l; i++ {
if string(t.Children[i].chars) == name {
if i == l-1 {
t.Children = t.Children[:i]
} else {
t.Children = append(t.Children[:i], t.Children[i+1:]...)
}
return
}
}
}
// Inserts the given suffix in the trie associating
// it with the given data
func (t *Trie) append_radix(suffix []rune, data interface{}) {
cn := t
current_children := []*Trie{}
var last_node *Trie = nil
var e_range int = 0
for cn != nil && len(suffix) > 0 {
if !cn.isRoot {
// the current node is not a prefix
// for the current suffix so we skip
// it altogether
if suffix[0] != cn.chars[0] {
goto next
}
// how many characters does this node
// share with this suffix?
r := same_until(suffix, cn.chars)
// case 1:
// suffix and cn.chars completely
// match: we found the node already
// and we just have to make sure
// the node is marked as word already
// and contains the specified data
if r == len(cn.chars)-1 && len(suffix) == len(cn.chars) {
cn.IsWord = true
cn.data = data
return
}
// there is a partial match:
if r > -1 {
// storing this as the
// last visited node
last_node = cn
// now adjusting our
// given and temporary suffix
// before proceeding:
// they'll both start
// at the next character
suffix = suffix[r+1:]
// finally storing the last
// common index for both
// suffixes
e_range = r
// if the matching range is
// smaller than the amount of
// characters in the current node we are done searching
if r < len(cn.chars)-1 {
break
}
}
}
current_children = cn.Children
next:
if len(current_children) != 0 {
cn = current_children[0]
current_children = current_children[1:]
} else {
cn = nil
}
}
// No node found matching
// part of the suffix we want
// to append. A new one will
// be created
if last_node == nil {
new_ := NewTrie()
new_.isRoot = false
new_.chars = make([]rune, len(suffix))
copy(new_.chars, suffix)
new_.Parent = t
t.Children = append(t.Children, new_)
new_.depth = t.depth + 1
new_.IsWord = true
new_.data = data
return
}
// last_node now will contain the node
// which constructs the closest match
// to the suffix we are about to append
//
// we now need to split the matching node
// content so that we can add our suffix
// now adjusting current node
// characters:
// if current node is 'romane'
// and we are about to append
// word 'romanus' we want to preserve
// just up to 'roman' and create 2 subnodes
// 'e' and 'us' respectively
orig_size := len(last_node.chars)
left := last_node.chars[:e_range+1] // will become the content of last_node
sub1 := last_node.chars[e_range+1:] // will become a new sub node
sub2 := suffix // new sub node as well
last_node.chars = left
was_word := last_node.IsWord
if len(suffix) == 0 {
last_node.IsWord = true
} else if e_range+1 != orig_size {
last_node.IsWord = false
}
// TODO: clarify this
if len(sub1) != 0 {
// appending sub1 contents
sub1_c := new(Trie)
sub1_c.isRoot = false
sub1_c.IsWord = was_word
sub1_c.Parent = last_node
sub1_c.chars = sub1
sub1_c.depth = last_node.depth // will increase this later
sub1_c.Children = last_node.Children
sub1_c.data = last_node.data
last_node.data = nil
// we need to update children depth
// since we have just moved this
// subtree one lever lower
sub1_c.increase_depth()
// an important thing to remember is that
// sub1_c inherits all the children from
// last_node which has now been split
last_node.Children = []*Trie{sub1_c}
}
if len(sub2) != 0 {
// appending sub2 contents
sub2_c := new(Trie)
sub2_c.isRoot = false
sub2_c.IsWord = true
sub2_c.Parent = last_node
sub2_c.chars = sub2
sub2_c.depth = last_node.depth + 1
sub2_c.Children = make([]*Trie, 0, 1)
sub2_c.data = data
last_node.Children = append(last_node.Children, sub2_c)
}
}
// Returns true if the word is found
// in the radix tree
func (t *Trie) HasWord(word string) bool {
suffix := []rune(word)
cn := t
current_children := []*Trie{}
for cn != nil {
if !cn.isRoot {
if suffix[0] != cn.chars[0] {
goto next
}
last := same_until(suffix, cn.chars)
if last == len(cn.chars)-1 && len(suffix) == len(cn.chars) {
return cn.IsWord // exact node match
} else {
suffix = suffix[last+1:]
}
// node contains the whole
// suffix string: this means
// we haven't found an exact
// match
if len(suffix) == 0 {
return false
}
}
current_children = cn.Children
next:
if len(current_children) != 0 {
cn = current_children[0]
current_children = current_children[1:]
} else {
cn = nil
}
}
return false
}
// Returns an array of objects that are associated
// with the words closest to the specified word param
func (t *Trie) ClosestWords(word string) []interface{} {
suffix := []rune(word)
cn := t
current_children := []*Trie{}
var last_prefix_node *Trie = nil
prefix := []rune{}
for cn != nil {
if !cn.isRoot {
if suffix[0] != cn.chars[0] {
goto next
}
last := same_until(suffix, cn.chars)
// if the given suffix is equal
// to the node we have an exact
// match and therefore we return
// the corresponding data
if last == len(cn.chars)-1 && len(suffix) == len(cn.chars) {
if cn.IsWord {
return []interface{}{cn.data}
}
}
// if the given suffix
// is still not an empty
// string this means we have
// found a prefix for the given
// word
if last > -1 {
last_prefix_node = cn
suffix = suffix[last+1:]
}
if len(suffix) == 0 {
break
} else {
prefix = append(prefix, cn.chars[:last+1]...)
}
}
current_children = cn.Children
next:
if len(current_children) != 0 {
cn = current_children[0]
current_children = current_children[1:]
} else {
cn = nil
}
}
if last_prefix_node != nil {
return last_prefix_node.Words()
}
return []interface{}{}
}
// Returns a list with all the
// words present in the radix tree
func (t *Trie) Words() (words []interface{}) {
// DFS-based implementation for returning
// all the words in the trie
stack := NewStack()
words = make([]interface{}, 0)
stack.Push(t)
for stack.Size() > 0 {
node := TriePtr(stack.Pop())
if !node.isRoot {
if node.IsWord {
words = append(words, node.data)
}
}
stack.Push(node.Children...)
}
return
}
// Iterates for each prefix in the
// radix tree calling the given callback.
// The given callback can be used to
// guide the tree traversal.
// The traversal is based on a DFS implementation
// backed by a stack to handle the nodes to iterate
// to. A special implementation of the stack
// allows for a O(1) push for all the node children at once
// keeping the whole traversal MAX(O(N)) where N is the
// number of nodes.
func (t *Trie) EachPrefix(callback PrefixIteratorCallback) {
stack := NewStack()
prefix := []rune{}
skipsubtree := false
halt := false
added_lengths := stackgo.NewStack()
last_depth := t.depth
stack.Push(t)
for stack.Size() != 0 {
node := TriePtr(stack.Pop())
if !node.isRoot {
// if we are now going up
// in the radix (e.g. we have
// finished with the current branch)
// then we adjust the current prefix
if last_depth >= node.depth {
var length = 0
for i := 0; i < (last_depth-node.depth)+1; i++ {
length += added_lengths.Pop().(int)
}
prefix = prefix[:len(prefix)-length]
}
last_depth = node.depth
shared_length := len(prefix)
prefix = append(prefix, node.chars...)
added_lengths.Push(len(node.chars))
// building the info
// data to pass to the callback
info := PrefixInfo{
string(prefix),
node.IsWord,
node.depth,
shared_length,
}
skipsubtree, halt = callback(info)
if halt {
return
}
if skipsubtree {
continue
}
}
stack.Push(node.Children...)
}
}