github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/strings/replace.go

github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/strings/replace.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strings
     6  
     7  import "io"
     8  
     9  // Replacer replaces a list of strings with replacements.
    10  // It is safe for concurrent use by multiple goroutines.
    11  type Replacer struct {
    12  	r replacer
    13  }
    14  
    15  // replacer is the interface that a replacement algorithm needs to implement.
    16  type replacer interface {
    17  	Replace(s string) string
    18  	WriteString(w io.Writer, s string) (n int, err error)
    19  }
    20  
    21  // NewReplacer returns a new Replacer from a list of old, new string pairs.
    22  // Replacements are performed in order, without overlapping matches.
    23  func NewReplacer(oldnew ...string) *Replacer {
    24  	if len(oldnew)%2 == 1 {
    25  		panic("strings.NewReplacer: odd argument count")
    26  	}
    27  
    28  	if len(oldnew) == 2 && len(oldnew[0]) > 1 {
    29  		return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
    30  	}
    31  
    32  	allNewBytes := true
    33  	for i := 0; i < len(oldnew); i += 2 {
    34  		if len(oldnew[i]) != 1 {
    35  			return &Replacer{r: makeGenericReplacer(oldnew)}
    36  		}
    37  		if len(oldnew[i+1]) != 1 {
    38  			allNewBytes = false
    39  		}
    40  	}
    41  
    42  	if allNewBytes {
    43  		r := byteReplacer{}
    44  		for i := range r {
    45  			r[i] = byte(i)
    46  		}
    47  		// The first occurrence of old->new map takes precedence
    48  		// over the others with the same old string.
    49  		for i := len(oldnew) - 2; i >= 0; i -= 2 {
    50  			o := oldnew[i][0]
    51  			n := oldnew[i+1][0]
    52  			r[o] = n
    53  		}
    54  		return &Replacer{r: &r}
    55  	}
    56  
    57  	r := byteStringReplacer{}
    58  	// The first occurrence of old->new map takes precedence
    59  	// over the others with the same old string.
    60  	for i := len(oldnew) - 2; i >= 0; i -= 2 {
    61  		o := oldnew[i][0]
    62  		n := oldnew[i+1]
    63  		r[o] = []byte(n)
    64  	}
    65  	return &Replacer{r: &r}
    66  }
    67  
    68  // Replace returns a copy of s with all replacements performed.
    69  func (r *Replacer) Replace(s string) string {
    70  	return r.r.Replace(s)
    71  }
    72  
    73  // WriteString writes s to w with all replacements performed.
    74  func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
    75  	return r.r.WriteString(w, s)
    76  }
    77  
    78  // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
    79  // and values may be empty. For example, the trie containing keys "ax", "ay",
    80  // "bcbc", "x" and "xy" could have eight nodes:
    81  //
    82  //  n0  -
    83  //  n1  a-
    84  //  n2  .x+
    85  //  n3  .y+
    86  //  n4  b-
    87  //  n5  .cbc+
    88  //  n6  x+
    89  //  n7  .y+
    90  //
    91  // n0 is the root node, and its children are n1, n4 and n6; n1's children are
    92  // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
    93  // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
    94  // (marked with a trailing "+") are complete keys.
    95  type trieNode struct {
    96  	// value is the value of the trie node's key/value pair. It is empty if
    97  	// this node is not a complete key.
    98  	value string
    99  	// priority is the priority (higher is more important) of the trie node's
   100  	// key/value pair; keys are not necessarily matched shortest- or longest-
   101  	// first. Priority is positive if this node is a complete key, and zero
   102  	// otherwise. In the example above, positive/zero priorities are marked
   103  	// with a trailing "+" or "-".
   104  	priority int
   105  
   106  	// A trie node may have zero, one or more child nodes:
   107  	//  * if the remaining fields are zero, there are no children.
   108  	//  * if prefix and next are non-zero, there is one child in next.
   109  	//  * if table is non-zero, it defines all the children.
   110  	//
   111  	// Prefixes are preferred over tables when there is one child, but the
   112  	// root node always uses a table for lookup efficiency.
   113  
   114  	// prefix is the difference in keys between this trie node and the next.
   115  	// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
   116  	// Node n5 has no children and so has zero prefix, next and table fields.
   117  	prefix string
   118  	next   *trieNode
   119  
   120  	// table is a lookup table indexed by the next byte in the key, after
   121  	// remapping that byte through genericReplacer.mapping to create a dense
   122  	// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
   123  	// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
   124  	// genericReplacer.tableSize will be 5. Node n0's table will be
   125  	// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
   126  	// 'a', 'b' and 'x'.
   127  	table []*trieNode
   128  }
   129  
   130  func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
   131  	if key == "" {
   132  		if t.priority == 0 {
   133  			t.value = val
   134  			t.priority = priority
   135  		}
   136  		return
   137  	}
   138  
   139  	if t.prefix != "" {
   140  		// Need to split the prefix among multiple nodes.
   141  		var n int // length of the longest common prefix
   142  		for ; n < len(t.prefix) && n < len(key); n++ {
   143  			if t.prefix[n] != key[n] {
   144  				break
   145  			}
   146  		}
   147  		if n == len(t.prefix) {
   148  			t.next.add(key[n:], val, priority, r)
   149  		} else if n == 0 {
   150  			// First byte differs, start a new lookup table here. Looking up
   151  			// what is currently t.prefix[0] will lead to prefixNode, and
   152  			// looking up key[0] will lead to keyNode.
   153  			var prefixNode *trieNode
   154  			if len(t.prefix) == 1 {
   155  				prefixNode = t.next
   156  			} else {
   157  				prefixNode = &trieNode{
   158  					prefix: t.prefix[1:],
   159  					next:   t.next,
   160  				}
   161  			}
   162  			keyNode := new(trieNode)
   163  			t.table = make([]*trieNode, r.tableSize)
   164  			t.table[r.mapping[t.prefix[0]]] = prefixNode
   165  			t.table[r.mapping[key[0]]] = keyNode
   166  			t.prefix = ""
   167  			t.next = nil
   168  			keyNode.add(key[1:], val, priority, r)
   169  		} else {
   170  			// Insert new node after the common section of the prefix.
   171  			next := &trieNode{
   172  				prefix: t.prefix[n:],
   173  				next:   t.next,
   174  			}
   175  			t.prefix = t.prefix[:n]
   176  			t.next = next
   177  			next.add(key[n:], val, priority, r)
   178  		}
   179  	} else if t.table != nil {
   180  		// Insert into existing table.
   181  		m := r.mapping[key[0]]
   182  		if t.table[m] == nil {
   183  			t.table[m] = new(trieNode)
   184  		}
   185  		t.table[m].add(key[1:], val, priority, r)
   186  	} else {
   187  		t.prefix = key
   188  		t.next = new(trieNode)
   189  		t.next.add("", val, priority, r)
   190  	}
   191  }
   192  
   193  func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
   194  	// Iterate down the trie to the end, and grab the value and keylen with
   195  	// the highest priority.
   196  	bestPriority := 0
   197  	node := &r.root
   198  	n := 0
   199  	for node != nil {
   200  		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
   201  			bestPriority = node.priority
   202  			val = node.value
   203  			keylen = n
   204  			found = true
   205  		}
   206  
   207  		if s == "" {
   208  			break
   209  		}
   210  		if node.table != nil {
   211  			index := r.mapping[s[0]]
   212  			if int(index) == r.tableSize {
   213  				break
   214  			}
   215  			node = node.table[index]
   216  			s = s[1:]
   217  			n++
   218  		} else if node.prefix != "" && HasPrefix(s, node.prefix) {
   219  			n += len(node.prefix)
   220  			s = s[len(node.prefix):]
   221  			node = node.next
   222  		} else {
   223  			break
   224  		}
   225  	}
   226  	return
   227  }
   228  
   229  // genericReplacer is the fully generic algorithm.
   230  // It's used as a fallback when nothing faster can be used.
   231  type genericReplacer struct {
   232  	root trieNode
   233  	// tableSize is the size of a trie node's lookup table. It is the number
   234  	// of unique key bytes.
   235  	tableSize int
   236  	// mapping maps from key bytes to a dense index for trieNode.table.
   237  	mapping [256]byte
   238  }
   239  
   240  func makeGenericReplacer(oldnew []string) *genericReplacer {
   241  	r := new(genericReplacer)
   242  	// Find each byte used, then assign them each an index.
   243  	for i := 0; i < len(oldnew); i += 2 {
   244  		key := oldnew[i]
   245  		for j := 0; j < len(key); j++ {
   246  			r.mapping[key[j]] = 1
   247  		}
   248  	}
   249  
   250  	for _, b := range r.mapping {
   251  		r.tableSize += int(b)
   252  	}
   253  
   254  	var index byte
   255  	for i, b := range r.mapping {
   256  		if b == 0 {
   257  			r.mapping[i] = byte(r.tableSize)
   258  		} else {
   259  			r.mapping[i] = index
   260  			index++
   261  		}
   262  	}
   263  	// Ensure root node uses a lookup table (for performance).
   264  	r.root.table = make([]*trieNode, r.tableSize)
   265  
   266  	for i := 0; i < len(oldnew); i += 2 {
   267  		r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
   268  	}
   269  	return r
   270  }
   271  
   272  type appendSliceWriter []byte
   273  
   274  // Write writes to the buffer to satisfy io.Writer.
   275  func (w *appendSliceWriter) Write(p []byte) (int, error) {
   276  	*w = append(*w, p...)
   277  	return len(p), nil
   278  }
   279  
   280  // WriteString writes to the buffer without string->[]byte->string allocations.
   281  func (w *appendSliceWriter) WriteString(s string) (int, error) {
   282  	*w = append(*w, s...)
   283  	return len(s), nil
   284  }
   285  
   286  type stringWriterIface interface {
   287  	WriteString(string) (int, error)
   288  }
   289  
   290  type stringWriter struct {
   291  	w io.Writer
   292  }
   293  
   294  func (w stringWriter) WriteString(s string) (int, error) {
   295  	return w.w.Write([]byte(s))
   296  }
   297  
   298  func getStringWriter(w io.Writer) stringWriterIface {
   299  	sw, ok := w.(stringWriterIface)
   300  	if !ok {
   301  		sw = stringWriter{w}
   302  	}
   303  	return sw
   304  }
   305  
   306  func (r *genericReplacer) Replace(s string) string {
   307  	buf := make(appendSliceWriter, 0, len(s))
   308  	r.WriteString(&buf, s)
   309  	return string(buf)
   310  }
   311  
   312  func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   313  	sw := getStringWriter(w)
   314  	var last, wn int
   315  	var prevMatchEmpty bool
   316  	for i := 0; i <= len(s); {
   317  		// Fast path: s[i] is not a prefix of any pattern.
   318  		if i != len(s) && r.root.priority == 0 {
   319  			index := int(r.mapping[s[i]])
   320  			if index == r.tableSize || r.root.table[index] == nil {
   321  				i++
   322  				continue
   323  			}
   324  		}
   325  
   326  		// Ignore the empty match iff the previous loop found the empty match.
   327  		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
   328  		prevMatchEmpty = match && keylen == 0
   329  		if match {
   330  			wn, err = sw.WriteString(s[last:i])
   331  			n += wn
   332  			if err != nil {
   333  				return
   334  			}
   335  			wn, err = sw.WriteString(val)
   336  			n += wn
   337  			if err != nil {
   338  				return
   339  			}
   340  			i += keylen
   341  			last = i
   342  			continue
   343  		}
   344  		i++
   345  	}
   346  	if last != len(s) {
   347  		wn, err = sw.WriteString(s[last:])
   348  		n += wn
   349  	}
   350  	return
   351  }
   352  
   353  // singleStringReplacer is the implementation that's used when there is only
   354  // one string to replace (and that string has more than one byte).
   355  type singleStringReplacer struct {
   356  	finder *stringFinder
   357  	// value is the new string that replaces that pattern when it's found.
   358  	value string
   359  }
   360  
   361  func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
   362  	return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
   363  }
   364  
   365  func (r *singleStringReplacer) Replace(s string) string {
   366  	var buf []byte
   367  	i, matched := 0, false
   368  	for {
   369  		match := r.finder.next(s[i:])
   370  		if match == -1 {
   371  			break
   372  		}
   373  		matched = true
   374  		buf = append(buf, s[i:i+match]...)
   375  		buf = append(buf, r.value...)
   376  		i += match + len(r.finder.pattern)
   377  	}
   378  	if !matched {
   379  		return s
   380  	}
   381  	buf = append(buf, s[i:]...)
   382  	return string(buf)
   383  }
   384  
   385  func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   386  	sw := getStringWriter(w)
   387  	var i, wn int
   388  	for {
   389  		match := r.finder.next(s[i:])
   390  		if match == -1 {
   391  			break
   392  		}
   393  		wn, err = sw.WriteString(s[i : i+match])
   394  		n += wn
   395  		if err != nil {
   396  			return
   397  		}
   398  		wn, err = sw.WriteString(r.value)
   399  		n += wn
   400  		if err != nil {
   401  			return
   402  		}
   403  		i += match + len(r.finder.pattern)
   404  	}
   405  	wn, err = sw.WriteString(s[i:])
   406  	n += wn
   407  	return
   408  }
   409  
   410  // byteReplacer is the implementation that's used when all the "old"
   411  // and "new" values are single ASCII bytes.
   412  // The array contains replacement bytes indexed by old byte.
   413  type byteReplacer [256]byte
   414  
   415  func (r *byteReplacer) Replace(s string) string {
   416  	var buf []byte // lazily allocated
   417  	for i := 0; i < len(s); i++ {
   418  		b := s[i]
   419  		if r[b] != b {
   420  			if buf == nil {
   421  				buf = []byte(s)
   422  			}
   423  			buf[i] = r[b]
   424  		}
   425  	}
   426  	if buf == nil {
   427  		return s
   428  	}
   429  	return string(buf)
   430  }
   431  
   432  func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   433  	// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
   434  	bufsize := 32 << 10
   435  	if len(s) < bufsize {
   436  		bufsize = len(s)
   437  	}
   438  	buf := make([]byte, bufsize)
   439  
   440  	for len(s) > 0 {
   441  		ncopy := copy(buf, s[:])
   442  		s = s[ncopy:]
   443  		for i, b := range buf[:ncopy] {
   444  			buf[i] = r[b]
   445  		}
   446  		wn, err := w.Write(buf[:ncopy])
   447  		n += wn
   448  		if err != nil {
   449  			return n, err
   450  		}
   451  	}
   452  	return n, nil
   453  }
   454  
   455  // byteStringReplacer is the implementation that's used when all the
   456  // "old" values are single ASCII bytes but the "new" values vary in size.
   457  // The array contains replacement byte slices indexed by old byte.
   458  // A nil []byte means that the old byte should not be replaced.
   459  type byteStringReplacer [256][]byte
   460  
   461  func (r *byteStringReplacer) Replace(s string) string {
   462  	newSize := len(s)
   463  	anyChanges := false
   464  	for i := 0; i < len(s); i++ {
   465  		b := s[i]
   466  		if r[b] != nil {
   467  			anyChanges = true
   468  			// The -1 is because we are replacing 1 byte with len(r[b]) bytes.
   469  			newSize += len(r[b]) - 1
   470  		}
   471  	}
   472  	if !anyChanges {
   473  		return s
   474  	}
   475  	buf := make([]byte, newSize)
   476  	bi := buf
   477  	for i := 0; i < len(s); i++ {
   478  		b := s[i]
   479  		if r[b] != nil {
   480  			n := copy(bi, r[b])
   481  			bi = bi[n:]
   482  		} else {
   483  			bi[0] = b
   484  			bi = bi[1:]
   485  		}
   486  	}
   487  	return string(buf)
   488  }
   489  
   490  func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   491  	sw := getStringWriter(w)
   492  	last := 0
   493  	for i := 0; i < len(s); i++ {
   494  		b := s[i]
   495  		if r[b] == nil {
   496  			continue
   497  		}
   498  		if last != i {
   499  			nw, err := sw.WriteString(s[last:i])
   500  			n += nw
   501  			if err != nil {
   502  				return n, err
   503  			}
   504  		}
   505  		last = i + 1
   506  		nw, err := w.Write(r[b])
   507  		n += nw
   508  		if err != nil {
   509  			return n, err
   510  		}
   511  	}
   512  	if last != len(s) {
   513  		var nw int
   514  		nw, err = sw.WriteString(s[last:])
   515  		n += nw
   516  	}
   517  	return
   518  }