github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/strings/replace.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings 6 7 import ( 8 "io" 9 "sync" 10 ) 11 12 // Replacer replaces a list of strings with replacements. 13 // It is safe for concurrent use by multiple goroutines. 14 type Replacer struct { 15 once sync.Once // guards buildOnce method 16 r replacer 17 oldnew []string 18 } 19 20 // replacer is the interface that a replacement algorithm needs to implement. 21 type replacer interface { 22 Replace(s string) string 23 WriteString(w io.Writer, s string) (n int, err error) 24 } 25 26 // NewReplacer returns a new Replacer from a list of old, new string 27 // pairs. Replacements are performed in the order they appear in the 28 // target string, without overlapping matches. The old string 29 // comparisons are done in argument order. 30 // 31 // NewReplacer panics if given an odd number of arguments. 32 func NewReplacer(oldnew ...string) *Replacer { 33 if len(oldnew)%2 == 1 { 34 panic("strings.NewReplacer: odd argument count") 35 } 36 return &Replacer{oldnew: append([]string(nil), oldnew...)} 37 } 38 39 func (r *Replacer) buildOnce() { 40 r.r = r.build() 41 r.oldnew = nil 42 } 43 44 func (b *Replacer) build() replacer { 45 oldnew := b.oldnew 46 if len(oldnew) == 2 && len(oldnew[0]) > 1 { 47 return makeSingleStringReplacer(oldnew[0], oldnew[1]) 48 } 49 50 allNewBytes := true 51 for i := 0; i < len(oldnew); i += 2 { 52 if len(oldnew[i]) != 1 { 53 return makeGenericReplacer(oldnew) 54 } 55 if len(oldnew[i+1]) != 1 { 56 allNewBytes = false 57 } 58 } 59 60 if allNewBytes { 61 r := byteReplacer{} 62 for i := range r { 63 r[i] = byte(i) 64 } 65 // The first occurrence of old->new map takes precedence 66 // over the others with the same old string. 67 for i := len(oldnew) - 2; i >= 0; i -= 2 { 68 o := oldnew[i][0] 69 n := oldnew[i+1][0] 70 r[o] = n 71 } 72 return &r 73 } 74 75 r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)} 76 // The first occurrence of old->new map takes precedence 77 // over the others with the same old string. 78 for i := len(oldnew) - 2; i >= 0; i -= 2 { 79 o := oldnew[i][0] 80 n := oldnew[i+1] 81 // To avoid counting repetitions multiple times. 82 if r.replacements[o] == nil { 83 // We need to use string([]byte{o}) instead of string(o), 84 // to avoid utf8 encoding of o. 85 // E. g. byte(150) produces string of length 2. 86 r.toReplace = append(r.toReplace, string([]byte{o})) 87 } 88 r.replacements[o] = []byte(n) 89 90 } 91 return &r 92 } 93 94 // Replace returns a copy of s with all replacements performed. 95 func (r *Replacer) Replace(s string) string { 96 r.once.Do(r.buildOnce) 97 return r.r.Replace(s) 98 } 99 100 // WriteString writes s to w with all replacements performed. 101 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { 102 r.once.Do(r.buildOnce) 103 return r.r.WriteString(w, s) 104 } 105 106 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys 107 // and values may be empty. For example, the trie containing keys "ax", "ay", 108 // "bcbc", "x" and "xy" could have eight nodes: 109 // 110 // n0 - 111 // n1 a- 112 // n2 .x+ 113 // n3 .y+ 114 // n4 b- 115 // n5 .cbc+ 116 // n6 x+ 117 // n7 .y+ 118 // 119 // n0 is the root node, and its children are n1, n4 and n6; n1's children are 120 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked 121 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 122 // (marked with a trailing "+") are complete keys. 123 type trieNode struct { 124 // value is the value of the trie node's key/value pair. It is empty if 125 // this node is not a complete key. 126 value string 127 // priority is the priority (higher is more important) of the trie node's 128 // key/value pair; keys are not necessarily matched shortest- or longest- 129 // first. Priority is positive if this node is a complete key, and zero 130 // otherwise. In the example above, positive/zero priorities are marked 131 // with a trailing "+" or "-". 132 priority int 133 134 // A trie node may have zero, one or more child nodes: 135 // * if the remaining fields are zero, there are no children. 136 // * if prefix and next are non-zero, there is one child in next. 137 // * if table is non-zero, it defines all the children. 138 // 139 // Prefixes are preferred over tables when there is one child, but the 140 // root node always uses a table for lookup efficiency. 141 142 // prefix is the difference in keys between this trie node and the next. 143 // In the example above, node n4 has prefix "cbc" and n4's next node is n5. 144 // Node n5 has no children and so has zero prefix, next and table fields. 145 prefix string 146 next *trieNode 147 148 // table is a lookup table indexed by the next byte in the key, after 149 // remapping that byte through genericReplacer.mapping to create a dense 150 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and 151 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and 152 // genericReplacer.tableSize will be 5. Node n0's table will be 153 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped 154 // 'a', 'b' and 'x'. 155 table []*trieNode 156 } 157 158 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { 159 if key == "" { 160 if t.priority == 0 { 161 t.value = val 162 t.priority = priority 163 } 164 return 165 } 166 167 if t.prefix != "" { 168 // Need to split the prefix among multiple nodes. 169 var n int // length of the longest common prefix 170 for ; n < len(t.prefix) && n < len(key); n++ { 171 if t.prefix[n] != key[n] { 172 break 173 } 174 } 175 if n == len(t.prefix) { 176 t.next.add(key[n:], val, priority, r) 177 } else if n == 0 { 178 // First byte differs, start a new lookup table here. Looking up 179 // what is currently t.prefix[0] will lead to prefixNode, and 180 // looking up key[0] will lead to keyNode. 181 var prefixNode *trieNode 182 if len(t.prefix) == 1 { 183 prefixNode = t.next 184 } else { 185 prefixNode = &trieNode{ 186 prefix: t.prefix[1:], 187 next: t.next, 188 } 189 } 190 keyNode := new(trieNode) 191 t.table = make([]*trieNode, r.tableSize) 192 t.table[r.mapping[t.prefix[0]]] = prefixNode 193 t.table[r.mapping[key[0]]] = keyNode 194 t.prefix = "" 195 t.next = nil 196 keyNode.add(key[1:], val, priority, r) 197 } else { 198 // Insert new node after the common section of the prefix. 199 next := &trieNode{ 200 prefix: t.prefix[n:], 201 next: t.next, 202 } 203 t.prefix = t.prefix[:n] 204 t.next = next 205 next.add(key[n:], val, priority, r) 206 } 207 } else if t.table != nil { 208 // Insert into existing table. 209 m := r.mapping[key[0]] 210 if t.table[m] == nil { 211 t.table[m] = new(trieNode) 212 } 213 t.table[m].add(key[1:], val, priority, r) 214 } else { 215 t.prefix = key 216 t.next = new(trieNode) 217 t.next.add("", val, priority, r) 218 } 219 } 220 221 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { 222 // Iterate down the trie to the end, and grab the value and keylen with 223 // the highest priority. 224 bestPriority := 0 225 node := &r.root 226 n := 0 227 for node != nil { 228 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { 229 bestPriority = node.priority 230 val = node.value 231 keylen = n 232 found = true 233 } 234 235 if s == "" { 236 break 237 } 238 if node.table != nil { 239 index := r.mapping[s[0]] 240 if int(index) == r.tableSize { 241 break 242 } 243 node = node.table[index] 244 s = s[1:] 245 n++ 246 } else if node.prefix != "" && HasPrefix(s, node.prefix) { 247 n += len(node.prefix) 248 s = s[len(node.prefix):] 249 node = node.next 250 } else { 251 break 252 } 253 } 254 return 255 } 256 257 // genericReplacer is the fully generic algorithm. 258 // It's used as a fallback when nothing faster can be used. 259 type genericReplacer struct { 260 root trieNode 261 // tableSize is the size of a trie node's lookup table. It is the number 262 // of unique key bytes. 263 tableSize int 264 // mapping maps from key bytes to a dense index for trieNode.table. 265 mapping [256]byte 266 } 267 268 func makeGenericReplacer(oldnew []string) *genericReplacer { 269 r := new(genericReplacer) 270 // Find each byte used, then assign them each an index. 271 for i := 0; i < len(oldnew); i += 2 { 272 key := oldnew[i] 273 for j := 0; j < len(key); j++ { 274 r.mapping[key[j]] = 1 275 } 276 } 277 278 for _, b := range r.mapping { 279 r.tableSize += int(b) 280 } 281 282 var index byte 283 for i, b := range r.mapping { 284 if b == 0 { 285 r.mapping[i] = byte(r.tableSize) 286 } else { 287 r.mapping[i] = index 288 index++ 289 } 290 } 291 // Ensure root node uses a lookup table (for performance). 292 r.root.table = make([]*trieNode, r.tableSize) 293 294 for i := 0; i < len(oldnew); i += 2 { 295 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) 296 } 297 return r 298 } 299 300 type appendSliceWriter []byte 301 302 // Write writes to the buffer to satisfy io.Writer. 303 func (w *appendSliceWriter) Write(p []byte) (int, error) { 304 *w = append(*w, p...) 305 return len(p), nil 306 } 307 308 // WriteString writes to the buffer without string->[]byte->string allocations. 309 func (w *appendSliceWriter) WriteString(s string) (int, error) { 310 *w = append(*w, s...) 311 return len(s), nil 312 } 313 314 type stringWriter struct { 315 w io.Writer 316 } 317 318 func (w stringWriter) WriteString(s string) (int, error) { 319 return w.w.Write([]byte(s)) 320 } 321 322 func getStringWriter(w io.Writer) io.StringWriter { 323 sw, ok := w.(io.StringWriter) 324 if !ok { 325 sw = stringWriter{w} 326 } 327 return sw 328 } 329 330 func (r *genericReplacer) Replace(s string) string { 331 buf := make(appendSliceWriter, 0, len(s)) 332 r.WriteString(&buf, s) 333 return string(buf) 334 } 335 336 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { 337 sw := getStringWriter(w) 338 var last, wn int 339 var prevMatchEmpty bool 340 for i := 0; i <= len(s); { 341 // Fast path: s[i] is not a prefix of any pattern. 342 if i != len(s) && r.root.priority == 0 { 343 index := int(r.mapping[s[i]]) 344 if index == r.tableSize || r.root.table[index] == nil { 345 i++ 346 continue 347 } 348 } 349 350 // Ignore the empty match iff the previous loop found the empty match. 351 val, keylen, match := r.lookup(s[i:], prevMatchEmpty) 352 prevMatchEmpty = match && keylen == 0 353 if match { 354 wn, err = sw.WriteString(s[last:i]) 355 n += wn 356 if err != nil { 357 return 358 } 359 wn, err = sw.WriteString(val) 360 n += wn 361 if err != nil { 362 return 363 } 364 i += keylen 365 last = i 366 continue 367 } 368 i++ 369 } 370 if last != len(s) { 371 wn, err = sw.WriteString(s[last:]) 372 n += wn 373 } 374 return 375 } 376 377 // singleStringReplacer is the implementation that's used when there is only 378 // one string to replace (and that string has more than one byte). 379 type singleStringReplacer struct { 380 finder *stringFinder 381 // value is the new string that replaces that pattern when it's found. 382 value string 383 } 384 385 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { 386 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} 387 } 388 389 func (r *singleStringReplacer) Replace(s string) string { 390 var buf []byte 391 i, matched := 0, false 392 for { 393 match := r.finder.next(s[i:]) 394 if match == -1 { 395 break 396 } 397 matched = true 398 buf = append(buf, s[i:i+match]...) 399 buf = append(buf, r.value...) 400 i += match + len(r.finder.pattern) 401 } 402 if !matched { 403 return s 404 } 405 buf = append(buf, s[i:]...) 406 return string(buf) 407 } 408 409 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 410 sw := getStringWriter(w) 411 var i, wn int 412 for { 413 match := r.finder.next(s[i:]) 414 if match == -1 { 415 break 416 } 417 wn, err = sw.WriteString(s[i : i+match]) 418 n += wn 419 if err != nil { 420 return 421 } 422 wn, err = sw.WriteString(r.value) 423 n += wn 424 if err != nil { 425 return 426 } 427 i += match + len(r.finder.pattern) 428 } 429 wn, err = sw.WriteString(s[i:]) 430 n += wn 431 return 432 } 433 434 // byteReplacer is the implementation that's used when all the "old" 435 // and "new" values are single ASCII bytes. 436 // The array contains replacement bytes indexed by old byte. 437 type byteReplacer [256]byte 438 439 func (r *byteReplacer) Replace(s string) string { 440 var buf []byte // lazily allocated 441 for i := 0; i < len(s); i++ { 442 b := s[i] 443 if r[b] != b { 444 if buf == nil { 445 buf = []byte(s) 446 } 447 buf[i] = r[b] 448 } 449 } 450 if buf == nil { 451 return s 452 } 453 return string(buf) 454 } 455 456 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { 457 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. 458 bufsize := 32 << 10 459 if len(s) < bufsize { 460 bufsize = len(s) 461 } 462 buf := make([]byte, bufsize) 463 464 for len(s) > 0 { 465 ncopy := copy(buf, s) 466 s = s[ncopy:] 467 for i, b := range buf[:ncopy] { 468 buf[i] = r[b] 469 } 470 wn, err := w.Write(buf[:ncopy]) 471 n += wn 472 if err != nil { 473 return n, err 474 } 475 } 476 return n, nil 477 } 478 479 // byteStringReplacer is the implementation that's used when all the 480 // "old" values are single ASCII bytes but the "new" values vary in size. 481 type byteStringReplacer struct { 482 // replacements contains replacement byte slices indexed by old byte. 483 // A nil []byte means that the old byte should not be replaced. 484 replacements [256][]byte 485 // toReplace keeps a list of bytes to replace. Depending on length of toReplace 486 // and length of target string it may be faster to use Count, or a plain loop. 487 // We store single byte as a string, because Count takes a string. 488 toReplace []string 489 } 490 491 // countCutOff controls the ratio of a string length to a number of replacements 492 // at which (*byteStringReplacer).Replace switches algorithms. 493 // For strings with higher ration of length to replacements than that value, 494 // we call Count, for each replacement from toReplace. 495 // For strings, with a lower ratio we use simple loop, because of Count overhead. 496 // countCutOff is an empirically determined overhead multiplier. 497 // TODO(tocarip) revisit once we have register-based abi/mid-stack inlining. 498 const countCutOff = 8 499 500 func (r *byteStringReplacer) Replace(s string) string { 501 newSize := len(s) 502 anyChanges := false 503 // Is it faster to use Count? 504 if len(r.toReplace)*countCutOff <= len(s) { 505 for _, x := range r.toReplace { 506 if c := Count(s, x); c != 0 { 507 // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes. 508 newSize += c * (len(r.replacements[x[0]]) - 1) 509 anyChanges = true 510 } 511 512 } 513 } else { 514 for i := 0; i < len(s); i++ { 515 b := s[i] 516 if r.replacements[b] != nil { 517 // See above for explanation of -1 518 newSize += len(r.replacements[b]) - 1 519 anyChanges = true 520 } 521 } 522 } 523 if !anyChanges { 524 return s 525 } 526 buf := make([]byte, newSize) 527 j := 0 528 for i := 0; i < len(s); i++ { 529 b := s[i] 530 if r.replacements[b] != nil { 531 j += copy(buf[j:], r.replacements[b]) 532 } else { 533 buf[j] = b 534 j++ 535 } 536 } 537 return string(buf) 538 } 539 540 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 541 sw := getStringWriter(w) 542 last := 0 543 for i := 0; i < len(s); i++ { 544 b := s[i] 545 if r.replacements[b] == nil { 546 continue 547 } 548 if last != i { 549 nw, err := sw.WriteString(s[last:i]) 550 n += nw 551 if err != nil { 552 return n, err 553 } 554 } 555 last = i + 1 556 nw, err := w.Write(r.replacements[b]) 557 n += nw 558 if err != nil { 559 return n, err 560 } 561 } 562 if last != len(s) { 563 var nw int 564 nw, err = sw.WriteString(s[last:]) 565 n += nw 566 } 567 return 568 }