github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/pkg/strings/replace.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings 6 7 import "io" 8 9 // A Replacer replaces a list of strings with replacements. 10 type Replacer struct { 11 r replacer 12 } 13 14 // replacer is the interface that a replacement algorithm needs to implement. 15 type replacer interface { 16 Replace(s string) string 17 WriteString(w io.Writer, s string) (n int, err error) 18 } 19 20 // byteBitmap represents bytes which are sought for replacement. 21 // byteBitmap is 256 bits wide, with a bit set for each old byte to be 22 // replaced. 23 type byteBitmap [256 / 32]uint32 24 25 func (m *byteBitmap) set(b byte) { 26 m[b>>5] |= uint32(1 << (b & 31)) 27 } 28 29 // NewReplacer returns a new Replacer from a list of old, new string pairs. 30 // Replacements are performed in order, without overlapping matches. 31 func NewReplacer(oldnew ...string) *Replacer { 32 if len(oldnew)%2 == 1 { 33 panic("strings.NewReplacer: odd argument count") 34 } 35 36 if len(oldnew) == 2 && len(oldnew[0]) > 1 { 37 return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])} 38 } 39 40 allNewBytes := true 41 for i := 0; i < len(oldnew); i += 2 { 42 if len(oldnew[i]) != 1 { 43 return &Replacer{r: makeGenericReplacer(oldnew)} 44 } 45 if len(oldnew[i+1]) != 1 { 46 allNewBytes = false 47 } 48 } 49 50 if allNewBytes { 51 bb := &byteReplacer{} 52 for i := 0; i < len(oldnew); i += 2 { 53 o, n := oldnew[i][0], oldnew[i+1][0] 54 if bb.old[o>>5]&uint32(1<<(o&31)) != 0 { 55 // Later old->new maps do not override previous ones with the same old string. 56 continue 57 } 58 bb.old.set(o) 59 bb.new[o] = n 60 } 61 return &Replacer{r: bb} 62 } 63 64 bs := &byteStringReplacer{} 65 for i := 0; i < len(oldnew); i += 2 { 66 o, new := oldnew[i][0], oldnew[i+1] 67 if bs.old[o>>5]&uint32(1<<(o&31)) != 0 { 68 // Later old->new maps do not override previous ones with the same old string. 69 continue 70 } 71 bs.old.set(o) 72 bs.new[o] = []byte(new) 73 } 74 return &Replacer{r: bs} 75 } 76 77 // Replace returns a copy of s with all replacements performed. 78 func (r *Replacer) Replace(s string) string { 79 return r.r.Replace(s) 80 } 81 82 // WriteString writes s to w with all replacements performed. 83 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { 84 return r.r.WriteString(w, s) 85 } 86 87 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys 88 // and values may be empty. For example, the trie containing keys "ax", "ay", 89 // "bcbc", "x" and "xy" could have eight nodes: 90 // 91 // n0 - 92 // n1 a- 93 // n2 .x+ 94 // n3 .y+ 95 // n4 b- 96 // n5 .cbc+ 97 // n6 x+ 98 // n7 .y+ 99 // 100 // n0 is the root node, and its children are n1, n4 and n6; n1's children are 101 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked 102 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 103 // (marked with a trailing "+") are complete keys. 104 type trieNode struct { 105 // value is the value of the trie node's key/value pair. It is empty if 106 // this node is not a complete key. 107 value string 108 // priority is the priority (higher is more important) of the trie node's 109 // key/value pair; keys are not necessarily matched shortest- or longest- 110 // first. Priority is positive if this node is a complete key, and zero 111 // otherwise. In the example above, positive/zero priorities are marked 112 // with a trailing "+" or "-". 113 priority int 114 115 // A trie node may have zero, one or more child nodes: 116 // * if the remaining fields are zero, there are no children. 117 // * if prefix and next are non-zero, there is one child in next. 118 // * if table is non-zero, it defines all the children. 119 // 120 // Prefixes are preferred over tables when there is one child, but the 121 // root node always uses a table for lookup efficiency. 122 123 // prefix is the difference in keys between this trie node and the next. 124 // In the example above, node n4 has prefix "cbc" and n4's next node is n5. 125 // Node n5 has no children and so has zero prefix, next and table fields. 126 prefix string 127 next *trieNode 128 129 // table is a lookup table indexed by the next byte in the key, after 130 // remapping that byte through genericReplacer.mapping to create a dense 131 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and 132 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and 133 // genericReplacer.tableSize will be 5. Node n0's table will be 134 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped 135 // 'a', 'b' and 'x'. 136 table []*trieNode 137 } 138 139 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { 140 if key == "" { 141 if t.priority == 0 { 142 t.value = val 143 t.priority = priority 144 } 145 return 146 } 147 148 if t.prefix != "" { 149 // Need to split the prefix among multiple nodes. 150 var n int // length of the longest common prefix 151 for ; n < len(t.prefix) && n < len(key); n++ { 152 if t.prefix[n] != key[n] { 153 break 154 } 155 } 156 if n == len(t.prefix) { 157 t.next.add(key[n:], val, priority, r) 158 } else if n == 0 { 159 // First byte differs, start a new lookup table here. Looking up 160 // what is currently t.prefix[0] will lead to prefixNode, and 161 // looking up key[0] will lead to keyNode. 162 var prefixNode *trieNode 163 if len(t.prefix) == 1 { 164 prefixNode = t.next 165 } else { 166 prefixNode = &trieNode{ 167 prefix: t.prefix[1:], 168 next: t.next, 169 } 170 } 171 keyNode := new(trieNode) 172 t.table = make([]*trieNode, r.tableSize) 173 t.table[r.mapping[t.prefix[0]]] = prefixNode 174 t.table[r.mapping[key[0]]] = keyNode 175 t.prefix = "" 176 t.next = nil 177 keyNode.add(key[1:], val, priority, r) 178 } else { 179 // Insert new node after the common section of the prefix. 180 next := &trieNode{ 181 prefix: t.prefix[n:], 182 next: t.next, 183 } 184 t.prefix = t.prefix[:n] 185 t.next = next 186 next.add(key[n:], val, priority, r) 187 } 188 } else if t.table != nil { 189 // Insert into existing table. 190 m := r.mapping[key[0]] 191 if t.table[m] == nil { 192 t.table[m] = new(trieNode) 193 } 194 t.table[m].add(key[1:], val, priority, r) 195 } else { 196 t.prefix = key 197 t.next = new(trieNode) 198 t.next.add("", val, priority, r) 199 } 200 } 201 202 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { 203 // Iterate down the trie to the end, and grab the value and keylen with 204 // the highest priority. 205 bestPriority := 0 206 node := &r.root 207 n := 0 208 for node != nil { 209 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { 210 bestPriority = node.priority 211 val = node.value 212 keylen = n 213 found = true 214 } 215 216 if s == "" { 217 break 218 } 219 if node.table != nil { 220 index := r.mapping[s[0]] 221 if int(index) == r.tableSize { 222 break 223 } 224 node = node.table[index] 225 s = s[1:] 226 n++ 227 } else if node.prefix != "" && HasPrefix(s, node.prefix) { 228 n += len(node.prefix) 229 s = s[len(node.prefix):] 230 node = node.next 231 } else { 232 break 233 } 234 } 235 return 236 } 237 238 // genericReplacer is the fully generic algorithm. 239 // It's used as a fallback when nothing faster can be used. 240 type genericReplacer struct { 241 root trieNode 242 // tableSize is the size of a trie node's lookup table. It is the number 243 // of unique key bytes. 244 tableSize int 245 // mapping maps from key bytes to a dense index for trieNode.table. 246 mapping [256]byte 247 } 248 249 func makeGenericReplacer(oldnew []string) *genericReplacer { 250 r := new(genericReplacer) 251 // Find each byte used, then assign them each an index. 252 for i := 0; i < len(oldnew); i += 2 { 253 key := oldnew[i] 254 for j := 0; j < len(key); j++ { 255 r.mapping[key[j]] = 1 256 } 257 } 258 259 for _, b := range r.mapping { 260 r.tableSize += int(b) 261 } 262 263 var index byte 264 for i, b := range r.mapping { 265 if b == 0 { 266 r.mapping[i] = byte(r.tableSize) 267 } else { 268 r.mapping[i] = index 269 index++ 270 } 271 } 272 // Ensure root node uses a lookup table (for performance). 273 r.root.table = make([]*trieNode, r.tableSize) 274 275 for i := 0; i < len(oldnew); i += 2 { 276 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) 277 } 278 return r 279 } 280 281 type appendSliceWriter []byte 282 283 // Write writes to the buffer to satisfy io.Writer. 284 func (w *appendSliceWriter) Write(p []byte) (int, error) { 285 *w = append(*w, p...) 286 return len(p), nil 287 } 288 289 // WriteString writes to the buffer without string->[]byte->string allocations. 290 func (w *appendSliceWriter) WriteString(s string) (int, error) { 291 *w = append(*w, s...) 292 return len(s), nil 293 } 294 295 type stringWriterIface interface { 296 WriteString(string) (int, error) 297 } 298 299 type stringWriter struct { 300 w io.Writer 301 } 302 303 func (w stringWriter) WriteString(s string) (int, error) { 304 return w.w.Write([]byte(s)) 305 } 306 307 func getStringWriter(w io.Writer) stringWriterIface { 308 sw, ok := w.(stringWriterIface) 309 if !ok { 310 sw = stringWriter{w} 311 } 312 return sw 313 } 314 315 func (r *genericReplacer) Replace(s string) string { 316 buf := make(appendSliceWriter, 0, len(s)) 317 r.WriteString(&buf, s) 318 return string(buf) 319 } 320 321 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { 322 sw := getStringWriter(w) 323 var last, wn int 324 var prevMatchEmpty bool 325 for i := 0; i <= len(s); { 326 // Ignore the empty match iff the previous loop found the empty match. 327 val, keylen, match := r.lookup(s[i:], prevMatchEmpty) 328 prevMatchEmpty = match && keylen == 0 329 if match { 330 wn, err = sw.WriteString(s[last:i]) 331 n += wn 332 if err != nil { 333 return 334 } 335 wn, err = sw.WriteString(val) 336 n += wn 337 if err != nil { 338 return 339 } 340 i += keylen 341 last = i 342 continue 343 } 344 i++ 345 } 346 if last != len(s) { 347 wn, err = sw.WriteString(s[last:]) 348 n += wn 349 } 350 return 351 } 352 353 // singleStringReplacer is the implementation that's used when there is only 354 // one string to replace (and that string has more than one byte). 355 type singleStringReplacer struct { 356 finder *stringFinder 357 // value is the new string that replaces that pattern when it's found. 358 value string 359 } 360 361 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { 362 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} 363 } 364 365 func (r *singleStringReplacer) Replace(s string) string { 366 var buf []byte 367 i, matched := 0, false 368 for { 369 match := r.finder.next(s[i:]) 370 if match == -1 { 371 break 372 } 373 matched = true 374 buf = append(buf, s[i:i+match]...) 375 buf = append(buf, r.value...) 376 i += match + len(r.finder.pattern) 377 } 378 if !matched { 379 return s 380 } 381 buf = append(buf, s[i:]...) 382 return string(buf) 383 } 384 385 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 386 sw := getStringWriter(w) 387 var i, wn int 388 for { 389 match := r.finder.next(s[i:]) 390 if match == -1 { 391 break 392 } 393 wn, err = sw.WriteString(s[i : i+match]) 394 n += wn 395 if err != nil { 396 return 397 } 398 wn, err = sw.WriteString(r.value) 399 n += wn 400 if err != nil { 401 return 402 } 403 i += match + len(r.finder.pattern) 404 } 405 wn, err = sw.WriteString(s[i:]) 406 n += wn 407 return 408 } 409 410 // byteReplacer is the implementation that's used when all the "old" 411 // and "new" values are single ASCII bytes. 412 type byteReplacer struct { 413 // old has a bit set for each old byte that should be replaced. 414 old byteBitmap 415 416 // replacement byte, indexed by old byte. only valid if 417 // corresponding old bit is set. 418 new [256]byte 419 } 420 421 func (r *byteReplacer) Replace(s string) string { 422 var buf []byte // lazily allocated 423 for i := 0; i < len(s); i++ { 424 b := s[i] 425 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 426 if buf == nil { 427 buf = []byte(s) 428 } 429 buf[i] = r.new[b] 430 } 431 } 432 if buf == nil { 433 return s 434 } 435 return string(buf) 436 } 437 438 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { 439 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. 440 bufsize := 32 << 10 441 if len(s) < bufsize { 442 bufsize = len(s) 443 } 444 buf := make([]byte, bufsize) 445 446 for len(s) > 0 { 447 ncopy := copy(buf, s[:]) 448 s = s[ncopy:] 449 for i, b := range buf[:ncopy] { 450 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 451 buf[i] = r.new[b] 452 } 453 } 454 wn, err := w.Write(buf[:ncopy]) 455 n += wn 456 if err != nil { 457 return n, err 458 } 459 } 460 return n, nil 461 } 462 463 // byteStringReplacer is the implementation that's used when all the 464 // "old" values are single ASCII bytes but the "new" values vary in 465 // size. 466 type byteStringReplacer struct { 467 // old has a bit set for each old byte that should be replaced. 468 old byteBitmap 469 470 // replacement string, indexed by old byte. only valid if 471 // corresponding old bit is set. 472 new [256][]byte 473 } 474 475 func (r *byteStringReplacer) Replace(s string) string { 476 newSize := 0 477 anyChanges := false 478 for i := 0; i < len(s); i++ { 479 b := s[i] 480 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 481 anyChanges = true 482 newSize += len(r.new[b]) 483 } else { 484 newSize++ 485 } 486 } 487 if !anyChanges { 488 return s 489 } 490 buf := make([]byte, newSize) 491 bi := buf 492 for i := 0; i < len(s); i++ { 493 b := s[i] 494 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 495 n := copy(bi, r.new[b]) 496 bi = bi[n:] 497 } else { 498 bi[0] = b 499 bi = bi[1:] 500 } 501 } 502 return string(buf) 503 } 504 505 // WriteString maintains one buffer that's at most 32KB. The bytes in 506 // s are enumerated and the buffer is filled. If it reaches its 507 // capacity or a byte has a replacement, the buffer is flushed to w. 508 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 509 // TODO(bradfitz): use io.WriteString with slices of s instead. 510 bufsize := 32 << 10 511 if len(s) < bufsize { 512 bufsize = len(s) 513 } 514 buf := make([]byte, bufsize) 515 bi := buf[:0] 516 517 for i := 0; i < len(s); i++ { 518 b := s[i] 519 var new []byte 520 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 521 new = r.new[b] 522 } else { 523 bi = append(bi, b) 524 } 525 if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) { 526 nw, err := w.Write(bi) 527 n += nw 528 if err != nil { 529 return n, err 530 } 531 bi = buf[:0] 532 } 533 if len(new) > 0 { 534 nw, err := w.Write(new) 535 n += nw 536 if err != nil { 537 return n, err 538 } 539 } 540 } 541 if len(bi) > 0 { 542 nw, err := w.Write(bi) 543 n += nw 544 if err != nil { 545 return n, err 546 } 547 } 548 return n, nil 549 }