gopkg.in/alecthomas/gometalinter.v3@v3.0.0/_linters/src/github.com/client9/misspell/stringreplacer.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package misspell 6 7 import ( 8 "io" 9 // "log" 10 "strings" 11 ) 12 13 // StringReplacer replaces a list of strings with replacements. 14 // It is safe for concurrent use by multiple goroutines. 15 type StringReplacer struct { 16 r replacer 17 } 18 19 // replacer is the interface that a replacement algorithm needs to implement. 20 type replacer interface { 21 Replace(s string) string 22 WriteString(w io.Writer, s string) (n int, err error) 23 } 24 25 // NewStringReplacer returns a new Replacer from a list of old, new string pairs. 26 // Replacements are performed in order, without overlapping matches. 27 func NewStringReplacer(oldnew ...string) *StringReplacer { 28 if len(oldnew)%2 == 1 { 29 panic("strings.NewReplacer: odd argument count") 30 } 31 32 return &StringReplacer{r: makeGenericReplacer(oldnew)} 33 } 34 35 // Replace returns a copy of s with all replacements performed. 36 func (r *StringReplacer) Replace(s string) string { 37 return r.r.Replace(s) 38 } 39 40 // WriteString writes s to w with all replacements performed. 41 func (r *StringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 42 return r.r.WriteString(w, s) 43 } 44 45 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys 46 // and values may be empty. For example, the trie containing keys "ax", "ay", 47 // "bcbc", "x" and "xy" could have eight nodes: 48 // 49 // n0 - 50 // n1 a- 51 // n2 .x+ 52 // n3 .y+ 53 // n4 b- 54 // n5 .cbc+ 55 // n6 x+ 56 // n7 .y+ 57 // 58 // n0 is the root node, and its children are n1, n4 and n6; n1's children are 59 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked 60 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 61 // (marked with a trailing "+") are complete keys. 62 type trieNode struct { 63 // value is the value of the trie node's key/value pair. It is empty if 64 // this node is not a complete key. 65 value string 66 // priority is the priority (higher is more important) of the trie node's 67 // key/value pair; keys are not necessarily matched shortest- or longest- 68 // first. Priority is positive if this node is a complete key, and zero 69 // otherwise. In the example above, positive/zero priorities are marked 70 // with a trailing "+" or "-". 71 priority int 72 73 // A trie node may have zero, one or more child nodes: 74 // * if the remaining fields are zero, there are no children. 75 // * if prefix and next are non-zero, there is one child in next. 76 // * if table is non-zero, it defines all the children. 77 // 78 // Prefixes are preferred over tables when there is one child, but the 79 // root node always uses a table for lookup efficiency. 80 81 // prefix is the difference in keys between this trie node and the next. 82 // In the example above, node n4 has prefix "cbc" and n4's next node is n5. 83 // Node n5 has no children and so has zero prefix, next and table fields. 84 prefix string 85 next *trieNode 86 87 // table is a lookup table indexed by the next byte in the key, after 88 // remapping that byte through genericReplacer.mapping to create a dense 89 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and 90 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and 91 // genericReplacer.tableSize will be 5. Node n0's table will be 92 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped 93 // 'a', 'b' and 'x'. 94 table []*trieNode 95 } 96 97 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { 98 if key == "" { 99 if t.priority == 0 { 100 t.value = val 101 t.priority = priority 102 } 103 return 104 } 105 106 if t.prefix != "" { 107 // Need to split the prefix among multiple nodes. 108 var n int // length of the longest common prefix 109 for ; n < len(t.prefix) && n < len(key); n++ { 110 if t.prefix[n] != key[n] { 111 break 112 } 113 } 114 if n == len(t.prefix) { 115 t.next.add(key[n:], val, priority, r) 116 } else if n == 0 { 117 // First byte differs, start a new lookup table here. Looking up 118 // what is currently t.prefix[0] will lead to prefixNode, and 119 // looking up key[0] will lead to keyNode. 120 var prefixNode *trieNode 121 if len(t.prefix) == 1 { 122 prefixNode = t.next 123 } else { 124 prefixNode = &trieNode{ 125 prefix: t.prefix[1:], 126 next: t.next, 127 } 128 } 129 keyNode := new(trieNode) 130 t.table = make([]*trieNode, r.tableSize) 131 t.table[r.mapping[t.prefix[0]]] = prefixNode 132 t.table[r.mapping[key[0]]] = keyNode 133 t.prefix = "" 134 t.next = nil 135 keyNode.add(key[1:], val, priority, r) 136 } else { 137 // Insert new node after the common section of the prefix. 138 next := &trieNode{ 139 prefix: t.prefix[n:], 140 next: t.next, 141 } 142 t.prefix = t.prefix[:n] 143 t.next = next 144 next.add(key[n:], val, priority, r) 145 } 146 } else if t.table != nil { 147 // Insert into existing table. 148 m := r.mapping[key[0]] 149 if t.table[m] == nil { 150 t.table[m] = new(trieNode) 151 } 152 t.table[m].add(key[1:], val, priority, r) 153 } else { 154 t.prefix = key 155 t.next = new(trieNode) 156 t.next.add("", val, priority, r) 157 } 158 } 159 160 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { 161 // Iterate down the trie to the end, and grab the value and keylen with 162 // the highest priority. 163 bestPriority := 0 164 node := &r.root 165 n := 0 166 for node != nil { 167 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { 168 bestPriority = node.priority 169 val = node.value 170 keylen = n 171 found = true 172 } 173 174 if s == "" { 175 break 176 } 177 if node.table != nil { 178 index := r.mapping[ByteToLower(s[0])] 179 if int(index) == r.tableSize { 180 break 181 } 182 node = node.table[index] 183 s = s[1:] 184 n++ 185 } else if node.prefix != "" && StringHasPrefixFold(s, node.prefix) { 186 n += len(node.prefix) 187 s = s[len(node.prefix):] 188 node = node.next 189 } else { 190 break 191 } 192 } 193 return 194 } 195 196 // genericReplacer is the fully generic algorithm. 197 // It's used as a fallback when nothing faster can be used. 198 type genericReplacer struct { 199 root trieNode 200 // tableSize is the size of a trie node's lookup table. It is the number 201 // of unique key bytes. 202 tableSize int 203 // mapping maps from key bytes to a dense index for trieNode.table. 204 mapping [256]byte 205 } 206 207 func makeGenericReplacer(oldnew []string) *genericReplacer { 208 r := new(genericReplacer) 209 // Find each byte used, then assign them each an index. 210 for i := 0; i < len(oldnew); i += 2 { 211 key := strings.ToLower(oldnew[i]) 212 for j := 0; j < len(key); j++ { 213 r.mapping[key[j]] = 1 214 } 215 } 216 217 for _, b := range r.mapping { 218 r.tableSize += int(b) 219 } 220 221 var index byte 222 for i, b := range r.mapping { 223 if b == 0 { 224 r.mapping[i] = byte(r.tableSize) 225 } else { 226 r.mapping[i] = index 227 index++ 228 } 229 } 230 // Ensure root node uses a lookup table (for performance). 231 r.root.table = make([]*trieNode, r.tableSize) 232 233 for i := 0; i < len(oldnew); i += 2 { 234 r.root.add(strings.ToLower(oldnew[i]), oldnew[i+1], len(oldnew)-i, r) 235 } 236 return r 237 } 238 239 type appendSliceWriter []byte 240 241 // Write writes to the buffer to satisfy io.Writer. 242 func (w *appendSliceWriter) Write(p []byte) (int, error) { 243 *w = append(*w, p...) 244 return len(p), nil 245 } 246 247 // WriteString writes to the buffer without string->[]byte->string allocations. 248 func (w *appendSliceWriter) WriteString(s string) (int, error) { 249 *w = append(*w, s...) 250 return len(s), nil 251 } 252 253 type stringWriterIface interface { 254 WriteString(string) (int, error) 255 } 256 257 type stringWriter struct { 258 w io.Writer 259 } 260 261 func (w stringWriter) WriteString(s string) (int, error) { 262 return w.w.Write([]byte(s)) 263 } 264 265 func getStringWriter(w io.Writer) stringWriterIface { 266 sw, ok := w.(stringWriterIface) 267 if !ok { 268 sw = stringWriter{w} 269 } 270 return sw 271 } 272 273 func (r *genericReplacer) Replace(s string) string { 274 buf := make(appendSliceWriter, 0, len(s)) 275 r.WriteString(&buf, s) 276 return string(buf) 277 } 278 279 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { 280 sw := getStringWriter(w) 281 var last, wn int 282 var prevMatchEmpty bool 283 for i := 0; i <= len(s); { 284 // Fast path: s[i] is not a prefix of any pattern. 285 if i != len(s) && r.root.priority == 0 { 286 index := int(r.mapping[ByteToLower(s[i])]) 287 if index == r.tableSize || r.root.table[index] == nil { 288 i++ 289 continue 290 } 291 } 292 293 // Ignore the empty match iff the previous loop found the empty match. 294 val, keylen, match := r.lookup(s[i:], prevMatchEmpty) 295 prevMatchEmpty = match && keylen == 0 296 if match { 297 orig := s[i : i+keylen] 298 switch CaseStyle(orig) { 299 case CaseUnknown: 300 // pretend we didn't match 301 // i++ 302 // continue 303 case CaseUpper: 304 val = strings.ToUpper(val) 305 case CaseLower: 306 val = strings.ToLower(val) 307 case CaseTitle: 308 if len(val) < 2 { 309 val = strings.ToUpper(val) 310 } else { 311 val = strings.ToUpper(val[:1]) + strings.ToLower(val[1:]) 312 } 313 } 314 wn, err = sw.WriteString(s[last:i]) 315 n += wn 316 if err != nil { 317 return 318 } 319 //log.Printf("%d: Going to correct %q with %q", i, s[i:i+keylen], val) 320 wn, err = sw.WriteString(val) 321 n += wn 322 if err != nil { 323 return 324 } 325 i += keylen 326 last = i 327 continue 328 } 329 i++ 330 } 331 if last != len(s) { 332 wn, err = sw.WriteString(s[last:]) 333 n += wn 334 } 335 return 336 }