github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/strings/replace.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings 6 7 import ( 8 "io" 9 "sync" 10 ) 11 12 // Replacer replaces a list of strings with replacements. 13 // It is safe for concurrent use by multiple goroutines. 14 type Replacer struct { 15 once sync.Once // guards buildOnce method 16 r replacer 17 oldnew []string 18 } 19 20 // replacer is the interface that a replacement algorithm needs to implement. 21 type replacer interface { 22 Replace(s string) string 23 WriteString(w io.Writer, s string) (n int, err error) 24 } 25 26 // NewReplacer returns a new Replacer from a list of old, new string 27 // pairs. Replacements are performed in the order they appear in the 28 // target string, without overlapping matches. 29 func NewReplacer(oldnew ...string) *Replacer { 30 if len(oldnew)%2 == 1 { 31 panic("strings.NewReplacer: odd argument count") 32 } 33 return &Replacer{oldnew: append([]string(nil), oldnew...)} 34 } 35 36 func (r *Replacer) buildOnce() { 37 r.r = r.build() 38 r.oldnew = nil 39 } 40 41 func (b *Replacer) build() replacer { 42 oldnew := b.oldnew 43 if len(oldnew) == 2 && len(oldnew[0]) > 1 { 44 return makeSingleStringReplacer(oldnew[0], oldnew[1]) 45 } 46 47 allNewBytes := true 48 for i := 0; i < len(oldnew); i += 2 { 49 if len(oldnew[i]) != 1 { 50 return makeGenericReplacer(oldnew) 51 } 52 if len(oldnew[i+1]) != 1 { 53 allNewBytes = false 54 } 55 } 56 57 if allNewBytes { 58 r := byteReplacer{} 59 for i := range r { 60 r[i] = byte(i) 61 } 62 // The first occurrence of old->new map takes precedence 63 // over the others with the same old string. 64 for i := len(oldnew) - 2; i >= 0; i -= 2 { 65 o := oldnew[i][0] 66 n := oldnew[i+1][0] 67 r[o] = n 68 } 69 return &r 70 } 71 72 r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)} 73 // The first occurrence of old->new map takes precedence 74 // over the others with the same old string. 75 for i := len(oldnew) - 2; i >= 0; i -= 2 { 76 o := oldnew[i][0] 77 n := oldnew[i+1] 78 // To avoid counting repetitions multiple times. 79 if r.replacements[o] == nil { 80 // We need to use string([]byte{o}) instead of string(o), 81 // to avoid utf8 encoding of o. 82 // E. g. byte(150) produces string of length 2. 83 r.toReplace = append(r.toReplace, string([]byte{o})) 84 } 85 r.replacements[o] = []byte(n) 86 87 } 88 return &r 89 } 90 91 // Replace returns a copy of s with all replacements performed. 92 func (r *Replacer) Replace(s string) string { 93 r.once.Do(r.buildOnce) 94 return r.r.Replace(s) 95 } 96 97 // WriteString writes s to w with all replacements performed. 98 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { 99 r.once.Do(r.buildOnce) 100 return r.r.WriteString(w, s) 101 } 102 103 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys 104 // and values may be empty. For example, the trie containing keys "ax", "ay", 105 // "bcbc", "x" and "xy" could have eight nodes: 106 // 107 // n0 - 108 // n1 a- 109 // n2 .x+ 110 // n3 .y+ 111 // n4 b- 112 // n5 .cbc+ 113 // n6 x+ 114 // n7 .y+ 115 // 116 // n0 is the root node, and its children are n1, n4 and n6; n1's children are 117 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked 118 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 119 // (marked with a trailing "+") are complete keys. 120 type trieNode struct { 121 // value is the value of the trie node's key/value pair. It is empty if 122 // this node is not a complete key. 123 value string 124 // priority is the priority (higher is more important) of the trie node's 125 // key/value pair; keys are not necessarily matched shortest- or longest- 126 // first. Priority is positive if this node is a complete key, and zero 127 // otherwise. In the example above, positive/zero priorities are marked 128 // with a trailing "+" or "-". 129 priority int 130 131 // A trie node may have zero, one or more child nodes: 132 // * if the remaining fields are zero, there are no children. 133 // * if prefix and next are non-zero, there is one child in next. 134 // * if table is non-zero, it defines all the children. 135 // 136 // Prefixes are preferred over tables when there is one child, but the 137 // root node always uses a table for lookup efficiency. 138 139 // prefix is the difference in keys between this trie node and the next. 140 // In the example above, node n4 has prefix "cbc" and n4's next node is n5. 141 // Node n5 has no children and so has zero prefix, next and table fields. 142 prefix string 143 next *trieNode 144 145 // table is a lookup table indexed by the next byte in the key, after 146 // remapping that byte through genericReplacer.mapping to create a dense 147 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and 148 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and 149 // genericReplacer.tableSize will be 5. Node n0's table will be 150 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped 151 // 'a', 'b' and 'x'. 152 table []*trieNode 153 } 154 155 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { 156 if key == "" { 157 if t.priority == 0 { 158 t.value = val 159 t.priority = priority 160 } 161 return 162 } 163 164 if t.prefix != "" { 165 // Need to split the prefix among multiple nodes. 166 var n int // length of the longest common prefix 167 for ; n < len(t.prefix) && n < len(key); n++ { 168 if t.prefix[n] != key[n] { 169 break 170 } 171 } 172 if n == len(t.prefix) { 173 t.next.add(key[n:], val, priority, r) 174 } else if n == 0 { 175 // First byte differs, start a new lookup table here. Looking up 176 // what is currently t.prefix[0] will lead to prefixNode, and 177 // looking up key[0] will lead to keyNode. 178 var prefixNode *trieNode 179 if len(t.prefix) == 1 { 180 prefixNode = t.next 181 } else { 182 prefixNode = &trieNode{ 183 prefix: t.prefix[1:], 184 next: t.next, 185 } 186 } 187 keyNode := new(trieNode) 188 t.table = make([]*trieNode, r.tableSize) 189 t.table[r.mapping[t.prefix[0]]] = prefixNode 190 t.table[r.mapping[key[0]]] = keyNode 191 t.prefix = "" 192 t.next = nil 193 keyNode.add(key[1:], val, priority, r) 194 } else { 195 // Insert new node after the common section of the prefix. 196 next := &trieNode{ 197 prefix: t.prefix[n:], 198 next: t.next, 199 } 200 t.prefix = t.prefix[:n] 201 t.next = next 202 next.add(key[n:], val, priority, r) 203 } 204 } else if t.table != nil { 205 // Insert into existing table. 206 m := r.mapping[key[0]] 207 if t.table[m] == nil { 208 t.table[m] = new(trieNode) 209 } 210 t.table[m].add(key[1:], val, priority, r) 211 } else { 212 t.prefix = key 213 t.next = new(trieNode) 214 t.next.add("", val, priority, r) 215 } 216 } 217 218 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { 219 // Iterate down the trie to the end, and grab the value and keylen with 220 // the highest priority. 221 bestPriority := 0 222 node := &r.root 223 n := 0 224 for node != nil { 225 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { 226 bestPriority = node.priority 227 val = node.value 228 keylen = n 229 found = true 230 } 231 232 if s == "" { 233 break 234 } 235 if node.table != nil { 236 index := r.mapping[s[0]] 237 if int(index) == r.tableSize { 238 break 239 } 240 node = node.table[index] 241 s = s[1:] 242 n++ 243 } else if node.prefix != "" && HasPrefix(s, node.prefix) { 244 n += len(node.prefix) 245 s = s[len(node.prefix):] 246 node = node.next 247 } else { 248 break 249 } 250 } 251 return 252 } 253 254 // genericReplacer is the fully generic algorithm. 255 // It's used as a fallback when nothing faster can be used. 256 type genericReplacer struct { 257 root trieNode 258 // tableSize is the size of a trie node's lookup table. It is the number 259 // of unique key bytes. 260 tableSize int 261 // mapping maps from key bytes to a dense index for trieNode.table. 262 mapping [256]byte 263 } 264 265 func makeGenericReplacer(oldnew []string) *genericReplacer { 266 r := new(genericReplacer) 267 // Find each byte used, then assign them each an index. 268 for i := 0; i < len(oldnew); i += 2 { 269 key := oldnew[i] 270 for j := 0; j < len(key); j++ { 271 r.mapping[key[j]] = 1 272 } 273 } 274 275 for _, b := range r.mapping { 276 r.tableSize += int(b) 277 } 278 279 var index byte 280 for i, b := range r.mapping { 281 if b == 0 { 282 r.mapping[i] = byte(r.tableSize) 283 } else { 284 r.mapping[i] = index 285 index++ 286 } 287 } 288 // Ensure root node uses a lookup table (for performance). 289 r.root.table = make([]*trieNode, r.tableSize) 290 291 for i := 0; i < len(oldnew); i += 2 { 292 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) 293 } 294 return r 295 } 296 297 type appendSliceWriter []byte 298 299 // Write writes to the buffer to satisfy io.Writer. 300 func (w *appendSliceWriter) Write(p []byte) (int, error) { 301 *w = append(*w, p...) 302 return len(p), nil 303 } 304 305 // WriteString writes to the buffer without string->[]byte->string allocations. 306 func (w *appendSliceWriter) WriteString(s string) (int, error) { 307 *w = append(*w, s...) 308 return len(s), nil 309 } 310 311 type stringWriter struct { 312 w io.Writer 313 } 314 315 func (w stringWriter) WriteString(s string) (int, error) { 316 return w.w.Write([]byte(s)) 317 } 318 319 func getStringWriter(w io.Writer) io.StringWriter { 320 sw, ok := w.(io.StringWriter) 321 if !ok { 322 sw = stringWriter{w} 323 } 324 return sw 325 } 326 327 func (r *genericReplacer) Replace(s string) string { 328 buf := make(appendSliceWriter, 0, len(s)) 329 r.WriteString(&buf, s) 330 return string(buf) 331 } 332 333 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { 334 sw := getStringWriter(w) 335 var last, wn int 336 var prevMatchEmpty bool 337 for i := 0; i <= len(s); { 338 // Fast path: s[i] is not a prefix of any pattern. 339 if i != len(s) && r.root.priority == 0 { 340 index := int(r.mapping[s[i]]) 341 if index == r.tableSize || r.root.table[index] == nil { 342 i++ 343 continue 344 } 345 } 346 347 // Ignore the empty match iff the previous loop found the empty match. 348 val, keylen, match := r.lookup(s[i:], prevMatchEmpty) 349 prevMatchEmpty = match && keylen == 0 350 if match { 351 wn, err = sw.WriteString(s[last:i]) 352 n += wn 353 if err != nil { 354 return 355 } 356 wn, err = sw.WriteString(val) 357 n += wn 358 if err != nil { 359 return 360 } 361 i += keylen 362 last = i 363 continue 364 } 365 i++ 366 } 367 if last != len(s) { 368 wn, err = sw.WriteString(s[last:]) 369 n += wn 370 } 371 return 372 } 373 374 // singleStringReplacer is the implementation that's used when there is only 375 // one string to replace (and that string has more than one byte). 376 type singleStringReplacer struct { 377 finder *stringFinder 378 // value is the new string that replaces that pattern when it's found. 379 value string 380 } 381 382 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { 383 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} 384 } 385 386 func (r *singleStringReplacer) Replace(s string) string { 387 var buf []byte 388 i, matched := 0, false 389 for { 390 match := r.finder.next(s[i:]) 391 if match == -1 { 392 break 393 } 394 matched = true 395 buf = append(buf, s[i:i+match]...) 396 buf = append(buf, r.value...) 397 i += match + len(r.finder.pattern) 398 } 399 if !matched { 400 return s 401 } 402 buf = append(buf, s[i:]...) 403 return string(buf) 404 } 405 406 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 407 sw := getStringWriter(w) 408 var i, wn int 409 for { 410 match := r.finder.next(s[i:]) 411 if match == -1 { 412 break 413 } 414 wn, err = sw.WriteString(s[i : i+match]) 415 n += wn 416 if err != nil { 417 return 418 } 419 wn, err = sw.WriteString(r.value) 420 n += wn 421 if err != nil { 422 return 423 } 424 i += match + len(r.finder.pattern) 425 } 426 wn, err = sw.WriteString(s[i:]) 427 n += wn 428 return 429 } 430 431 // byteReplacer is the implementation that's used when all the "old" 432 // and "new" values are single ASCII bytes. 433 // The array contains replacement bytes indexed by old byte. 434 type byteReplacer [256]byte 435 436 func (r *byteReplacer) Replace(s string) string { 437 var buf []byte // lazily allocated 438 for i := 0; i < len(s); i++ { 439 b := s[i] 440 if r[b] != b { 441 if buf == nil { 442 buf = []byte(s) 443 } 444 buf[i] = r[b] 445 } 446 } 447 if buf == nil { 448 return s 449 } 450 return string(buf) 451 } 452 453 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { 454 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. 455 bufsize := 32 << 10 456 if len(s) < bufsize { 457 bufsize = len(s) 458 } 459 buf := make([]byte, bufsize) 460 461 for len(s) > 0 { 462 ncopy := copy(buf, s) 463 s = s[ncopy:] 464 for i, b := range buf[:ncopy] { 465 buf[i] = r[b] 466 } 467 wn, err := w.Write(buf[:ncopy]) 468 n += wn 469 if err != nil { 470 return n, err 471 } 472 } 473 return n, nil 474 } 475 476 // byteStringReplacer is the implementation that's used when all the 477 // "old" values are single ASCII bytes but the "new" values vary in size. 478 type byteStringReplacer struct { 479 // replacements contains replacement byte slices indexed by old byte. 480 // A nil []byte means that the old byte should not be replaced. 481 replacements [256][]byte 482 // toReplace keeps a list of bytes to replace. Depending on length of toReplace 483 // and length of target string it may be faster to use Count, or a plain loop. 484 // We store single byte as a string, because Count takes a string. 485 toReplace []string 486 } 487 488 // countCutOff controls the ratio of a string length to a number of replacements 489 // at which (*byteStringReplacer).Replace switches algorithms. 490 // For strings with higher ration of length to replacements than that value, 491 // we call Count, for each replacement from toReplace. 492 // For strings, with a lower ratio we use simple loop, because of Count overhead. 493 // countCutOff is an empirically determined overhead multiplier. 494 // TODO(tocarip) revisit once we have register-based abi/mid-stack inlining. 495 const countCutOff = 8 496 497 func (r *byteStringReplacer) Replace(s string) string { 498 newSize := len(s) 499 anyChanges := false 500 // Is it faster to use Count? 501 if len(r.toReplace)*countCutOff <= len(s) { 502 for _, x := range r.toReplace { 503 if c := Count(s, x); c != 0 { 504 // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes. 505 newSize += c * (len(r.replacements[x[0]]) - 1) 506 anyChanges = true 507 } 508 509 } 510 } else { 511 for i := 0; i < len(s); i++ { 512 b := s[i] 513 if r.replacements[b] != nil { 514 // See above for explanation of -1 515 newSize += len(r.replacements[b]) - 1 516 anyChanges = true 517 } 518 } 519 } 520 if !anyChanges { 521 return s 522 } 523 buf := make([]byte, newSize) 524 j := 0 525 for i := 0; i < len(s); i++ { 526 b := s[i] 527 if r.replacements[b] != nil { 528 j += copy(buf[j:], r.replacements[b]) 529 } else { 530 buf[j] = b 531 j++ 532 } 533 } 534 return string(buf) 535 } 536 537 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 538 sw := getStringWriter(w) 539 last := 0 540 for i := 0; i < len(s); i++ { 541 b := s[i] 542 if r.replacements[b] == nil { 543 continue 544 } 545 if last != i { 546 nw, err := sw.WriteString(s[last:i]) 547 n += nw 548 if err != nil { 549 return n, err 550 } 551 } 552 last = i + 1 553 nw, err := w.Write(r.replacements[b]) 554 n += nw 555 if err != nil { 556 return n, err 557 } 558 } 559 if last != len(s) { 560 var nw int 561 nw, err = sw.WriteString(s[last:]) 562 n += nw 563 } 564 return 565 }