go4.org@v0.0.0-20230225012048-214862532bf5/bytereplacer/bytereplacer.go (about)

     1  /*
     2  Copyright 2015 The Perkeep Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package bytereplacer provides a utility for replacing parts of byte slices.
    18  package bytereplacer // import "go4.org/bytereplacer"
    19  
    20  import "bytes"
    21  
    22  // Replacer replaces a list of strings with replacements.
    23  // It is safe for concurrent use by multiple goroutines.
    24  type Replacer struct {
    25  	r replacer
    26  }
    27  
    28  // replacer is the interface that a replacement algorithm needs to implement.
    29  type replacer interface {
    30  	// Replace performs all replacements, in-place if possible.
    31  	Replace(s []byte) []byte
    32  }
    33  
    34  // New returns a new Replacer from a list of old, new string pairs.
    35  // Replacements are performed in order, without overlapping matches.
    36  func New(oldnew ...string) *Replacer {
    37  	if len(oldnew)%2 == 1 {
    38  		panic("bytes.NewReplacer: odd argument count")
    39  	}
    40  
    41  	allNewBytes := true
    42  	for i := 0; i < len(oldnew); i += 2 {
    43  		if len(oldnew[i]) != 1 {
    44  			return &Replacer{r: makeGenericReplacer(oldnew)}
    45  		}
    46  		if len(oldnew[i+1]) != 1 {
    47  			allNewBytes = false
    48  		}
    49  	}
    50  
    51  	if allNewBytes {
    52  		r := byteReplacer{}
    53  		for i := range r {
    54  			r[i] = byte(i)
    55  		}
    56  		// The first occurrence of old->new map takes precedence
    57  		// over the others with the same old string.
    58  		for i := len(oldnew) - 2; i >= 0; i -= 2 {
    59  			o := oldnew[i][0]
    60  			n := oldnew[i+1][0]
    61  			r[o] = n
    62  		}
    63  		return &Replacer{r: &r}
    64  	}
    65  
    66  	return &Replacer{r: makeGenericReplacer(oldnew)}
    67  }
    68  
    69  // Replace performs all replacements in-place on s. If the capacity
    70  // of s is not sufficient, a new slice is allocated, otherwise Replace
    71  // returns s.
    72  func (r *Replacer) Replace(s []byte) []byte {
    73  	return r.r.Replace(s)
    74  }
    75  
    76  type trieNode struct {
    77  	value    []byte
    78  	priority int
    79  	prefix   []byte
    80  	next     *trieNode
    81  	table    []*trieNode
    82  }
    83  
    84  func (t *trieNode) add(key, val []byte, priority int, r *genericReplacer) {
    85  	if len(key) == 0 {
    86  		if t.priority == 0 {
    87  			t.value = val
    88  			t.priority = priority
    89  		}
    90  		return
    91  	}
    92  
    93  	if len(t.prefix) > 0 {
    94  		// Need to split the prefix among multiple nodes.
    95  		var n int // length of the longest common prefix
    96  		for ; n < len(t.prefix) && n < len(key); n++ {
    97  			if t.prefix[n] != key[n] {
    98  				break
    99  			}
   100  		}
   101  		if n == len(t.prefix) {
   102  			t.next.add(key[n:], val, priority, r)
   103  		} else if n == 0 {
   104  			// First byte differs, start a new lookup table here. Looking up
   105  			// what is currently t.prefix[0] will lead to prefixNode, and
   106  			// looking up key[0] will lead to keyNode.
   107  			var prefixNode *trieNode
   108  			if len(t.prefix) == 1 {
   109  				prefixNode = t.next
   110  			} else {
   111  				prefixNode = &trieNode{
   112  					prefix: t.prefix[1:],
   113  					next:   t.next,
   114  				}
   115  			}
   116  			keyNode := new(trieNode)
   117  			t.table = make([]*trieNode, r.tableSize)
   118  			t.table[r.mapping[t.prefix[0]]] = prefixNode
   119  			t.table[r.mapping[key[0]]] = keyNode
   120  			t.prefix = nil
   121  			t.next = nil
   122  			keyNode.add(key[1:], val, priority, r)
   123  		} else {
   124  			// Insert new node after the common section of the prefix.
   125  			next := &trieNode{
   126  				prefix: t.prefix[n:],
   127  				next:   t.next,
   128  			}
   129  			t.prefix = t.prefix[:n]
   130  			t.next = next
   131  			next.add(key[n:], val, priority, r)
   132  		}
   133  	} else if t.table != nil {
   134  		// Insert into existing table.
   135  		m := r.mapping[key[0]]
   136  		if t.table[m] == nil {
   137  			t.table[m] = new(trieNode)
   138  		}
   139  		t.table[m].add(key[1:], val, priority, r)
   140  	} else {
   141  		t.prefix = key
   142  		t.next = new(trieNode)
   143  		t.next.add(nil, val, priority, r)
   144  	}
   145  }
   146  
   147  func (r *genericReplacer) lookup(s []byte, ignoreRoot bool) (val []byte, keylen int, found bool) {
   148  	// Iterate down the trie to the end, and grab the value and keylen with
   149  	// the highest priority.
   150  	bestPriority := 0
   151  	node := &r.root
   152  	n := 0
   153  	for node != nil {
   154  		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
   155  			bestPriority = node.priority
   156  			val = node.value
   157  			keylen = n
   158  			found = true
   159  		}
   160  
   161  		if len(s) == 0 {
   162  			break
   163  		}
   164  		if node.table != nil {
   165  			index := r.mapping[s[0]]
   166  			if int(index) == r.tableSize {
   167  				break
   168  			}
   169  			node = node.table[index]
   170  			s = s[1:]
   171  			n++
   172  		} else if len(node.prefix) > 0 && bytes.HasPrefix(s, node.prefix) {
   173  			n += len(node.prefix)
   174  			s = s[len(node.prefix):]
   175  			node = node.next
   176  		} else {
   177  			break
   178  		}
   179  	}
   180  	return
   181  }
   182  
   183  // genericReplacer is the fully generic algorithm.
   184  // It's used as a fallback when nothing faster can be used.
   185  type genericReplacer struct {
   186  	root trieNode
   187  	// tableSize is the size of a trie node's lookup table. It is the number
   188  	// of unique key bytes.
   189  	tableSize int
   190  	// mapping maps from key bytes to a dense index for trieNode.table.
   191  	mapping [256]byte
   192  }
   193  
   194  func makeGenericReplacer(oldnew []string) *genericReplacer {
   195  	r := new(genericReplacer)
   196  	// Find each byte used, then assign them each an index.
   197  	for i := 0; i < len(oldnew); i += 2 {
   198  		key := oldnew[i]
   199  		for j := 0; j < len(key); j++ {
   200  			r.mapping[key[j]] = 1
   201  		}
   202  	}
   203  
   204  	for _, b := range r.mapping {
   205  		r.tableSize += int(b)
   206  	}
   207  
   208  	var index byte
   209  	for i, b := range r.mapping {
   210  		if b == 0 {
   211  			r.mapping[i] = byte(r.tableSize)
   212  		} else {
   213  			r.mapping[i] = index
   214  			index++
   215  		}
   216  	}
   217  	// Ensure root node uses a lookup table (for performance).
   218  	r.root.table = make([]*trieNode, r.tableSize)
   219  
   220  	for i := 0; i < len(oldnew); i += 2 {
   221  		r.root.add([]byte(oldnew[i]), []byte(oldnew[i+1]), len(oldnew)-i, r)
   222  	}
   223  	return r
   224  }
   225  
   226  func (r *genericReplacer) Replace(s []byte) []byte {
   227  	var last int
   228  	var prevMatchEmpty bool
   229  	dst := s[:0]
   230  	grown := false
   231  	for i := 0; i <= len(s); {
   232  		// Fast path: s[i] is not a prefix of any pattern.
   233  		if i != len(s) && r.root.priority == 0 {
   234  			index := int(r.mapping[s[i]])
   235  			if index == r.tableSize || r.root.table[index] == nil {
   236  				i++
   237  				continue
   238  			}
   239  		}
   240  
   241  		// Ignore the empty match iff the previous loop found the empty match.
   242  		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
   243  		prevMatchEmpty = match && keylen == 0
   244  		if match {
   245  			dst = append(dst, s[last:i]...)
   246  			if diff := len(val) - keylen; grown || diff < 0 {
   247  				dst = append(dst, val...)
   248  				i += keylen
   249  			} else if diff <= cap(s)-len(s) {
   250  				// The replacement is larger than the original, but can still fit in the original buffer.
   251  				copy(s[i+len(val):cap(dst)], s[i+keylen:])
   252  				dst = append(dst, val...)
   253  				s = s[:len(s)+diff]
   254  				i += len(val)
   255  			} else {
   256  				// The output will grow larger than the original buffer.  Allocate a new one.
   257  				grown = true
   258  				newDst := make([]byte, len(dst), cap(dst)+diff)
   259  				copy(newDst, dst)
   260  				dst = newDst
   261  
   262  				dst = append(dst, val...)
   263  				i += keylen
   264  			}
   265  			last = i
   266  			continue
   267  		}
   268  		i++
   269  	}
   270  	if last != len(s) {
   271  		dst = append(dst, s[last:]...)
   272  	}
   273  	return dst
   274  }
   275  
   276  // byteReplacer is the implementation that's used when all the "old"
   277  // and "new" values are single ASCII bytes.
   278  // The array contains replacement bytes indexed by old byte.
   279  type byteReplacer [256]byte
   280  
   281  func (r *byteReplacer) Replace(s []byte) []byte {
   282  	for i, b := range s {
   283  		s[i] = r[b]
   284  	}
   285  	return s
   286  }