1
+ // SA-IS algorithm for suffix array construction
2
+ fn sa_is ( s : & [ u16 ] ) -> Vec < usize > {
3
+ let n = s. len ( ) ;
4
+ let mut sa = vec ! [ 0 ; n] ;
5
+ if n <= 1 {
6
+ return sa; // Base case: for empty or single-character strings
7
+ }
8
+
9
+ // Step 1: Classify each character as S-type or L-type
10
+ let mut t = vec ! [ false ; n] ; // false: L-type, true: S-type
11
+ t[ n - 1 ] = true ; // Last character is always S-type
12
+ for i in ( 0 ..n - 1 ) . rev ( ) {
13
+ t[ i] = if s[ i] < s[ i + 1 ] {
14
+ true // S-type
15
+ } else if s[ i] > s[ i + 1 ] {
16
+ false // L-type
17
+ } else {
18
+ t[ i + 1 ] // Same as next if equal
19
+ } ;
20
+ }
21
+
22
+ // Step 2: Bucket counting
23
+ let mut bkt = vec ! [ 0 ; 65536 ] ; // Bucket for each possible u16 value
24
+ for & c in s {
25
+ bkt[ c as usize ] += 1 ;
26
+ }
27
+
28
+ // Calculate bucket heads
29
+ let mut sum = 0 ;
30
+ for i in 0 ..65536 {
31
+ sum += bkt[ i] ;
32
+ bkt[ i] = sum;
33
+ }
34
+
35
+ // Step 3: Place LMS suffixes
36
+ sa. fill ( 0 ) ;
37
+ for i in ( 0 ..n - 1 ) . rev ( ) {
38
+ if t[ i] && !t[ i + 1 ] { // If current is S and next is L, it's an LMS character
39
+ bkt[ s[ i] as usize ] -= 1 ;
40
+ sa[ bkt[ s[ i] as usize ] ] = i + 1 ;
41
+ }
42
+ }
43
+
44
+ // Step 4: Induce L-type suffixes
45
+ induce_l ( & mut sa, s, & mut bkt, & t) ;
46
+
47
+ // Step 5: Induce S-type suffixes
48
+ induce_s ( & mut sa, s, & mut bkt, & t) ;
49
+
50
+ sa
51
+ }
52
+
53
+ // Helper function to induce L-type suffixes
54
+ fn induce_l ( sa : & mut [ usize ] , s : & [ u16 ] , bkt : & mut [ usize ] , t : & [ bool ] ) {
55
+ let n = s. len ( ) ;
56
+ // Reset bucket heads
57
+ for i in 0 ..65536 {
58
+ bkt[ i] = if i == 0 { 0 } else { bkt[ i - 1 ] } ;
59
+ }
60
+ for i in 0 ..n {
61
+ if sa[ i] > 0 && !t[ sa[ i] - 1 ] {
62
+ let c = s[ sa[ i] - 1 ] as usize ;
63
+ sa[ bkt[ c] ] = sa[ i] - 1 ;
64
+ bkt[ c] += 1 ;
65
+ }
66
+ }
67
+ }
68
+
69
+ // Helper function to induce S-type suffixes
70
+ fn induce_s ( sa : & mut [ usize ] , s : & [ u16 ] , bkt : & mut [ usize ] , t : & [ bool ] ) {
71
+ let n = s. len ( ) ;
72
+ // Reset bucket tails
73
+ for i in ( 1 ..65536 ) . rev ( ) {
74
+ bkt[ i] = bkt[ i - 1 ] ;
75
+ }
76
+ bkt[ 0 ] = 0 ;
77
+ for i in ( 0 ..n) . rev ( ) {
78
+ if sa[ i] > 0 && t[ sa[ i] - 1 ] {
79
+ let c = s[ sa[ i] - 1 ] as usize ;
80
+ bkt[ c] -= 1 ;
81
+ sa[ bkt[ c] ] = sa[ i] - 1 ;
82
+ }
83
+ }
84
+ }
85
+
86
+
87
+ #[ cfg( test) ]
88
+ mod tests {
89
+ use super :: * ;
90
+
91
+ // Helper function to verify if the suffix array is correct
92
+ fn is_suffix_array_correct ( s : & [ u16 ] , sa : & [ usize ] ) -> bool {
93
+ let n = s. len ( ) ;
94
+ if sa. len ( ) != n {
95
+ return false ;
96
+ }
97
+
98
+ let mut used = vec ! [ false ; n] ;
99
+ for & pos in sa {
100
+ if pos >= n || used[ pos] {
101
+ return false ;
102
+ }
103
+ used[ pos] = true ;
104
+ }
105
+
106
+ for i in 1 ..n {
107
+ let suf1 = & s[ sa[ i - 1 ] ..] ;
108
+ let suf2 = & s[ sa[ i] ..] ;
109
+ if suf1 <= suf2 {
110
+ continue ;
111
+ }
112
+ return false ;
113
+ }
114
+
115
+ true
116
+ }
117
+
118
+ #[ test]
119
+ fn test_empty_string ( ) {
120
+ let s: Vec < u16 > = vec ! [ ] ;
121
+ let sa = sa_is ( & s) ;
122
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
123
+ }
124
+
125
+ #[ test]
126
+ fn test_single_character ( ) {
127
+ let s: Vec < u16 > = vec ! [ 42 ] ;
128
+ let sa = sa_is ( & s) ;
129
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
130
+ }
131
+
132
+ #[ test]
133
+ fn test_two_characters ( ) {
134
+ let s: Vec < u16 > = vec ! [ 2 , 1 ] ;
135
+ let sa = sa_is ( & s) ;
136
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
137
+ }
138
+
139
+ #[ test]
140
+ fn test_repeated_characters ( ) {
141
+ let s: Vec < u16 > = vec ! [ 1 , 1 , 1 , 1 ] ;
142
+ let sa = sa_is ( & s) ;
143
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
144
+ }
145
+
146
+ #[ test]
147
+ fn test_ascending_sequence ( ) {
148
+ let s: Vec < u16 > = vec ! [ 1 , 2 , 3 , 4 , 5 ] ;
149
+ let sa = sa_is ( & s) ;
150
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
151
+ }
152
+
153
+ #[ test]
154
+ fn test_descending_sequence ( ) {
155
+ let s: Vec < u16 > = vec ! [ 5 , 4 , 3 , 2 , 1 ] ;
156
+ let sa = sa_is ( & s) ;
157
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
158
+ }
159
+
160
+ #[ test]
161
+ fn test_random_sequence ( ) {
162
+ let s: Vec < u16 > = vec ! [ 10 , 5 , 8 , 3 , 1 , 7 , 2 , 9 , 4 , 6 ] ;
163
+ let sa = sa_is ( & s) ;
164
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
165
+ }
166
+
167
+ #[ test]
168
+ fn test_large_values ( ) {
169
+ let s: Vec < u16 > = vec ! [ 65535 , 0 , 32768 , 1 , 65534 ] ;
170
+ let sa = sa_is ( & s) ;
171
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
172
+ }
173
+
174
+ #[ test]
175
+ fn test_longer_sequence ( ) {
176
+ let s: Vec < u16 > = ( 0 ..1000 ) . map ( |x| ( x * 17 + 11 ) % 256 ) . map ( |x| x as u16 ) . collect ( ) ;
177
+ let sa = sa_is ( & s) ;
178
+ assert ! ( is_suffix_array_correct( & s, & sa) ) ;
179
+ }
180
+ }
0 commit comments