github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/strings/replace.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings 6 7 import "io" 8 9 // Replacer replaces a list of strings with replacements. 10 // It is safe for concurrent use by multiple goroutines. 11 type Replacer struct { 12 r replacer 13 } 14 15 // replacer is the interface that a replacement algorithm needs to implement. 16 type replacer interface { 17 Replace(s string) string 18 WriteString(w io.Writer, s string) (n int, err error) 19 } 20 21 // NewReplacer returns a new Replacer from a list of old, new string pairs. 22 // Replacements are performed in order, without overlapping matches. 23 func NewReplacer(oldnew ...string) *Replacer { 24 if len(oldnew)%2 == 1 { 25 panic("strings.NewReplacer: odd argument count") 26 } 27 28 if len(oldnew) == 2 && len(oldnew[0]) > 1 { 29 return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])} 30 } 31 32 allNewBytes := true 33 for i := 0; i < len(oldnew); i += 2 { 34 if len(oldnew[i]) != 1 { 35 return &Replacer{r: makeGenericReplacer(oldnew)} 36 } 37 if len(oldnew[i+1]) != 1 { 38 allNewBytes = false 39 } 40 } 41 42 if allNewBytes { 43 r := byteReplacer{} 44 for i := range r { 45 r[i] = byte(i) 46 } 47 // The first occurrence of old->new map takes precedence 48 // over the others with the same old string. 49 for i := len(oldnew) - 2; i >= 0; i -= 2 { 50 o := oldnew[i][0] 51 n := oldnew[i+1][0] 52 r[o] = n 53 } 54 return &Replacer{r: &r} 55 } 56 57 r := byteStringReplacer{} 58 // The first occurrence of old->new map takes precedence 59 // over the others with the same old string. 60 for i := len(oldnew) - 2; i >= 0; i -= 2 { 61 o := oldnew[i][0] 62 n := oldnew[i+1] 63 r[o] = []byte(n) 64 } 65 return &Replacer{r: &r} 66 } 67 68 // Replace returns a copy of s with all replacements performed. 69 func (r *Replacer) Replace(s string) string { 70 return r.r.Replace(s) 71 } 72 73 // WriteString writes s to w with all replacements performed. 74 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { 75 return r.r.WriteString(w, s) 76 } 77 78 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys 79 // and values may be empty. For example, the trie containing keys "ax", "ay", 80 // "bcbc", "x" and "xy" could have eight nodes: 81 // 82 // n0 - 83 // n1 a- 84 // n2 .x+ 85 // n3 .y+ 86 // n4 b- 87 // n5 .cbc+ 88 // n6 x+ 89 // n7 .y+ 90 // 91 // n0 is the root node, and its children are n1, n4 and n6; n1's children are 92 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked 93 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 94 // (marked with a trailing "+") are complete keys. 95 type trieNode struct { 96 // value is the value of the trie node's key/value pair. It is empty if 97 // this node is not a complete key. 98 value string 99 // priority is the priority (higher is more important) of the trie node's 100 // key/value pair; keys are not necessarily matched shortest- or longest- 101 // first. Priority is positive if this node is a complete key, and zero 102 // otherwise. In the example above, positive/zero priorities are marked 103 // with a trailing "+" or "-". 104 priority int 105 106 // A trie node may have zero, one or more child nodes: 107 // * if the remaining fields are zero, there are no children. 108 // * if prefix and next are non-zero, there is one child in next. 109 // * if table is non-zero, it defines all the children. 110 // 111 // Prefixes are preferred over tables when there is one child, but the 112 // root node always uses a table for lookup efficiency. 113 114 // prefix is the difference in keys between this trie node and the next. 115 // In the example above, node n4 has prefix "cbc" and n4's next node is n5. 116 // Node n5 has no children and so has zero prefix, next and table fields. 117 prefix string 118 next *trieNode 119 120 // table is a lookup table indexed by the next byte in the key, after 121 // remapping that byte through genericReplacer.mapping to create a dense 122 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and 123 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and 124 // genericReplacer.tableSize will be 5. Node n0's table will be 125 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped 126 // 'a', 'b' and 'x'. 127 table []*trieNode 128 } 129 130 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { 131 if key == "" { 132 if t.priority == 0 { 133 t.value = val 134 t.priority = priority 135 } 136 return 137 } 138 139 if t.prefix != "" { 140 // Need to split the prefix among multiple nodes. 141 var n int // length of the longest common prefix 142 for ; n < len(t.prefix) && n < len(key); n++ { 143 if t.prefix[n] != key[n] { 144 break 145 } 146 } 147 if n == len(t.prefix) { 148 t.next.add(key[n:], val, priority, r) 149 } else if n == 0 { 150 // First byte differs, start a new lookup table here. Looking up 151 // what is currently t.prefix[0] will lead to prefixNode, and 152 // looking up key[0] will lead to keyNode. 153 var prefixNode *trieNode 154 if len(t.prefix) == 1 { 155 prefixNode = t.next 156 } else { 157 prefixNode = &trieNode{ 158 prefix: t.prefix[1:], 159 next: t.next, 160 } 161 } 162 keyNode := new(trieNode) 163 t.table = make([]*trieNode, r.tableSize) 164 t.table[r.mapping[t.prefix[0]]] = prefixNode 165 t.table[r.mapping[key[0]]] = keyNode 166 t.prefix = "" 167 t.next = nil 168 keyNode.add(key[1:], val, priority, r) 169 } else { 170 // Insert new node after the common section of the prefix. 171 next := &trieNode{ 172 prefix: t.prefix[n:], 173 next: t.next, 174 } 175 t.prefix = t.prefix[:n] 176 t.next = next 177 next.add(key[n:], val, priority, r) 178 } 179 } else if t.table != nil { 180 // Insert into existing table. 181 m := r.mapping[key[0]] 182 if t.table[m] == nil { 183 t.table[m] = new(trieNode) 184 } 185 t.table[m].add(key[1:], val, priority, r) 186 } else { 187 t.prefix = key 188 t.next = new(trieNode) 189 t.next.add("", val, priority, r) 190 } 191 } 192 193 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { 194 // Iterate down the trie to the end, and grab the value and keylen with 195 // the highest priority. 196 bestPriority := 0 197 node := &r.root 198 n := 0 199 for node != nil { 200 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { 201 bestPriority = node.priority 202 val = node.value 203 keylen = n 204 found = true 205 } 206 207 if s == "" { 208 break 209 } 210 if node.table != nil { 211 index := r.mapping[s[0]] 212 if int(index) == r.tableSize { 213 break 214 } 215 node = node.table[index] 216 s = s[1:] 217 n++ 218 } else if node.prefix != "" && HasPrefix(s, node.prefix) { 219 n += len(node.prefix) 220 s = s[len(node.prefix):] 221 node = node.next 222 } else { 223 break 224 } 225 } 226 return 227 } 228 229 // genericReplacer is the fully generic algorithm. 230 // It's used as a fallback when nothing faster can be used. 231 type genericReplacer struct { 232 root trieNode 233 // tableSize is the size of a trie node's lookup table. It is the number 234 // of unique key bytes. 235 tableSize int 236 // mapping maps from key bytes to a dense index for trieNode.table. 237 mapping [256]byte 238 } 239 240 func makeGenericReplacer(oldnew []string) *genericReplacer { 241 r := new(genericReplacer) 242 // Find each byte used, then assign them each an index. 243 for i := 0; i < len(oldnew); i += 2 { 244 key := oldnew[i] 245 for j := 0; j < len(key); j++ { 246 r.mapping[key[j]] = 1 247 } 248 } 249 250 for _, b := range r.mapping { 251 r.tableSize += int(b) 252 } 253 254 var index byte 255 for i, b := range r.mapping { 256 if b == 0 { 257 r.mapping[i] = byte(r.tableSize) 258 } else { 259 r.mapping[i] = index 260 index++ 261 } 262 } 263 // Ensure root node uses a lookup table (for performance). 264 r.root.table = make([]*trieNode, r.tableSize) 265 266 for i := 0; i < len(oldnew); i += 2 { 267 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) 268 } 269 return r 270 } 271 272 type appendSliceWriter []byte 273 274 // Write writes to the buffer to satisfy io.Writer. 275 func (w *appendSliceWriter) Write(p []byte) (int, error) { 276 *w = append(*w, p...) 277 return len(p), nil 278 } 279 280 // WriteString writes to the buffer without string->[]byte->string allocations. 281 func (w *appendSliceWriter) WriteString(s string) (int, error) { 282 *w = append(*w, s...) 283 return len(s), nil 284 } 285 286 type stringWriterIface interface { 287 WriteString(string) (int, error) 288 } 289 290 type stringWriter struct { 291 w io.Writer 292 } 293 294 func (w stringWriter) WriteString(s string) (int, error) { 295 return w.w.Write([]byte(s)) 296 } 297 298 func getStringWriter(w io.Writer) stringWriterIface { 299 sw, ok := w.(stringWriterIface) 300 if !ok { 301 sw = stringWriter{w} 302 } 303 return sw 304 } 305 306 func (r *genericReplacer) Replace(s string) string { 307 buf := make(appendSliceWriter, 0, len(s)) 308 r.WriteString(&buf, s) 309 return string(buf) 310 } 311 312 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { 313 sw := getStringWriter(w) 314 var last, wn int 315 var prevMatchEmpty bool 316 for i := 0; i <= len(s); { 317 // Fast path: s[i] is not a prefix of any pattern. 318 if i != len(s) && r.root.priority == 0 { 319 index := int(r.mapping[s[i]]) 320 if index == r.tableSize || r.root.table[index] == nil { 321 i++ 322 continue 323 } 324 } 325 326 // Ignore the empty match iff the previous loop found the empty match. 327 val, keylen, match := r.lookup(s[i:], prevMatchEmpty) 328 prevMatchEmpty = match && keylen == 0 329 if match { 330 wn, err = sw.WriteString(s[last:i]) 331 n += wn 332 if err != nil { 333 return 334 } 335 wn, err = sw.WriteString(val) 336 n += wn 337 if err != nil { 338 return 339 } 340 i += keylen 341 last = i 342 continue 343 } 344 i++ 345 } 346 if last != len(s) { 347 wn, err = sw.WriteString(s[last:]) 348 n += wn 349 } 350 return 351 } 352 353 // singleStringReplacer is the implementation that's used when there is only 354 // one string to replace (and that string has more than one byte). 355 type singleStringReplacer struct { 356 finder *stringFinder 357 // value is the new string that replaces that pattern when it's found. 358 value string 359 } 360 361 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { 362 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} 363 } 364 365 func (r *singleStringReplacer) Replace(s string) string { 366 var buf []byte 367 i, matched := 0, false 368 for { 369 match := r.finder.next(s[i:]) 370 if match == -1 { 371 break 372 } 373 matched = true 374 buf = append(buf, s[i:i+match]...) 375 buf = append(buf, r.value...) 376 i += match + len(r.finder.pattern) 377 } 378 if !matched { 379 return s 380 } 381 buf = append(buf, s[i:]...) 382 return string(buf) 383 } 384 385 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 386 sw := getStringWriter(w) 387 var i, wn int 388 for { 389 match := r.finder.next(s[i:]) 390 if match == -1 { 391 break 392 } 393 wn, err = sw.WriteString(s[i : i+match]) 394 n += wn 395 if err != nil { 396 return 397 } 398 wn, err = sw.WriteString(r.value) 399 n += wn 400 if err != nil { 401 return 402 } 403 i += match + len(r.finder.pattern) 404 } 405 wn, err = sw.WriteString(s[i:]) 406 n += wn 407 return 408 } 409 410 // byteReplacer is the implementation that's used when all the "old" 411 // and "new" values are single ASCII bytes. 412 // The array contains replacement bytes indexed by old byte. 413 type byteReplacer [256]byte 414 415 func (r *byteReplacer) Replace(s string) string { 416 var buf []byte // lazily allocated 417 for i := 0; i < len(s); i++ { 418 b := s[i] 419 if r[b] != b { 420 if buf == nil { 421 buf = []byte(s) 422 } 423 buf[i] = r[b] 424 } 425 } 426 if buf == nil { 427 return s 428 } 429 return string(buf) 430 } 431 432 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { 433 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. 434 bufsize := 32 << 10 435 if len(s) < bufsize { 436 bufsize = len(s) 437 } 438 buf := make([]byte, bufsize) 439 440 for len(s) > 0 { 441 ncopy := copy(buf, s[:]) 442 s = s[ncopy:] 443 for i, b := range buf[:ncopy] { 444 buf[i] = r[b] 445 } 446 wn, err := w.Write(buf[:ncopy]) 447 n += wn 448 if err != nil { 449 return n, err 450 } 451 } 452 return n, nil 453 } 454 455 // byteStringReplacer is the implementation that's used when all the 456 // "old" values are single ASCII bytes but the "new" values vary in size. 457 // The array contains replacement byte slices indexed by old byte. 458 // A nil []byte means that the old byte should not be replaced. 459 type byteStringReplacer [256][]byte 460 461 func (r *byteStringReplacer) Replace(s string) string { 462 newSize := len(s) 463 anyChanges := false 464 for i := 0; i < len(s); i++ { 465 b := s[i] 466 if r[b] != nil { 467 anyChanges = true 468 // The -1 is because we are replacing 1 byte with len(r[b]) bytes. 469 newSize += len(r[b]) - 1 470 } 471 } 472 if !anyChanges { 473 return s 474 } 475 buf := make([]byte, newSize) 476 bi := buf 477 for i := 0; i < len(s); i++ { 478 b := s[i] 479 if r[b] != nil { 480 n := copy(bi, r[b]) 481 bi = bi[n:] 482 } else { 483 bi[0] = b 484 bi = bi[1:] 485 } 486 } 487 return string(buf) 488 } 489 490 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 491 sw := getStringWriter(w) 492 last := 0 493 for i := 0; i < len(s); i++ { 494 b := s[i] 495 if r[b] == nil { 496 continue 497 } 498 if last != i { 499 nw, err := sw.WriteString(s[last:i]) 500 n += nw 501 if err != nil { 502 return n, err 503 } 504 } 505 last = i + 1 506 nw, err := w.Write(r[b]) 507 n += nw 508 if err != nil { 509 return n, err 510 } 511 } 512 if last != len(s) { 513 var nw int 514 nw, err = sw.WriteString(s[last:]) 515 n += nw 516 } 517 return 518 }