gopkg.in/alecthomas/gometalinter.v3@v3.0.0/_linters/src/github.com/client9/misspell/stringreplacer.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package misspell
     6  
     7  import (
     8  	"io"
     9  	//	"log"
    10  	"strings"
    11  )
    12  
    13  // StringReplacer replaces a list of strings with replacements.
    14  // It is safe for concurrent use by multiple goroutines.
    15  type StringReplacer struct {
    16  	r replacer
    17  }
    18  
    19  // replacer is the interface that a replacement algorithm needs to implement.
    20  type replacer interface {
    21  	Replace(s string) string
    22  	WriteString(w io.Writer, s string) (n int, err error)
    23  }
    24  
    25  // NewStringReplacer returns a new Replacer from a list of old, new string pairs.
    26  // Replacements are performed in order, without overlapping matches.
    27  func NewStringReplacer(oldnew ...string) *StringReplacer {
    28  	if len(oldnew)%2 == 1 {
    29  		panic("strings.NewReplacer: odd argument count")
    30  	}
    31  
    32  	return &StringReplacer{r: makeGenericReplacer(oldnew)}
    33  }
    34  
    35  // Replace returns a copy of s with all replacements performed.
    36  func (r *StringReplacer) Replace(s string) string {
    37  	return r.r.Replace(s)
    38  }
    39  
    40  // WriteString writes s to w with all replacements performed.
    41  func (r *StringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
    42  	return r.r.WriteString(w, s)
    43  }
    44  
    45  // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
    46  // and values may be empty. For example, the trie containing keys "ax", "ay",
    47  // "bcbc", "x" and "xy" could have eight nodes:
    48  //
    49  //  n0  -
    50  //  n1  a-
    51  //  n2  .x+
    52  //  n3  .y+
    53  //  n4  b-
    54  //  n5  .cbc+
    55  //  n6  x+
    56  //  n7  .y+
    57  //
    58  // n0 is the root node, and its children are n1, n4 and n6; n1's children are
    59  // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
    60  // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
    61  // (marked with a trailing "+") are complete keys.
    62  type trieNode struct {
    63  	// value is the value of the trie node's key/value pair. It is empty if
    64  	// this node is not a complete key.
    65  	value string
    66  	// priority is the priority (higher is more important) of the trie node's
    67  	// key/value pair; keys are not necessarily matched shortest- or longest-
    68  	// first. Priority is positive if this node is a complete key, and zero
    69  	// otherwise. In the example above, positive/zero priorities are marked
    70  	// with a trailing "+" or "-".
    71  	priority int
    72  
    73  	// A trie node may have zero, one or more child nodes:
    74  	//  * if the remaining fields are zero, there are no children.
    75  	//  * if prefix and next are non-zero, there is one child in next.
    76  	//  * if table is non-zero, it defines all the children.
    77  	//
    78  	// Prefixes are preferred over tables when there is one child, but the
    79  	// root node always uses a table for lookup efficiency.
    80  
    81  	// prefix is the difference in keys between this trie node and the next.
    82  	// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
    83  	// Node n5 has no children and so has zero prefix, next and table fields.
    84  	prefix string
    85  	next   *trieNode
    86  
    87  	// table is a lookup table indexed by the next byte in the key, after
    88  	// remapping that byte through genericReplacer.mapping to create a dense
    89  	// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
    90  	// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
    91  	// genericReplacer.tableSize will be 5. Node n0's table will be
    92  	// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
    93  	// 'a', 'b' and 'x'.
    94  	table []*trieNode
    95  }
    96  
    97  func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
    98  	if key == "" {
    99  		if t.priority == 0 {
   100  			t.value = val
   101  			t.priority = priority
   102  		}
   103  		return
   104  	}
   105  
   106  	if t.prefix != "" {
   107  		// Need to split the prefix among multiple nodes.
   108  		var n int // length of the longest common prefix
   109  		for ; n < len(t.prefix) && n < len(key); n++ {
   110  			if t.prefix[n] != key[n] {
   111  				break
   112  			}
   113  		}
   114  		if n == len(t.prefix) {
   115  			t.next.add(key[n:], val, priority, r)
   116  		} else if n == 0 {
   117  			// First byte differs, start a new lookup table here. Looking up
   118  			// what is currently t.prefix[0] will lead to prefixNode, and
   119  			// looking up key[0] will lead to keyNode.
   120  			var prefixNode *trieNode
   121  			if len(t.prefix) == 1 {
   122  				prefixNode = t.next
   123  			} else {
   124  				prefixNode = &trieNode{
   125  					prefix: t.prefix[1:],
   126  					next:   t.next,
   127  				}
   128  			}
   129  			keyNode := new(trieNode)
   130  			t.table = make([]*trieNode, r.tableSize)
   131  			t.table[r.mapping[t.prefix[0]]] = prefixNode
   132  			t.table[r.mapping[key[0]]] = keyNode
   133  			t.prefix = ""
   134  			t.next = nil
   135  			keyNode.add(key[1:], val, priority, r)
   136  		} else {
   137  			// Insert new node after the common section of the prefix.
   138  			next := &trieNode{
   139  				prefix: t.prefix[n:],
   140  				next:   t.next,
   141  			}
   142  			t.prefix = t.prefix[:n]
   143  			t.next = next
   144  			next.add(key[n:], val, priority, r)
   145  		}
   146  	} else if t.table != nil {
   147  		// Insert into existing table.
   148  		m := r.mapping[key[0]]
   149  		if t.table[m] == nil {
   150  			t.table[m] = new(trieNode)
   151  		}
   152  		t.table[m].add(key[1:], val, priority, r)
   153  	} else {
   154  		t.prefix = key
   155  		t.next = new(trieNode)
   156  		t.next.add("", val, priority, r)
   157  	}
   158  }
   159  
   160  func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
   161  	// Iterate down the trie to the end, and grab the value and keylen with
   162  	// the highest priority.
   163  	bestPriority := 0
   164  	node := &r.root
   165  	n := 0
   166  	for node != nil {
   167  		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
   168  			bestPriority = node.priority
   169  			val = node.value
   170  			keylen = n
   171  			found = true
   172  		}
   173  
   174  		if s == "" {
   175  			break
   176  		}
   177  		if node.table != nil {
   178  			index := r.mapping[ByteToLower(s[0])]
   179  			if int(index) == r.tableSize {
   180  				break
   181  			}
   182  			node = node.table[index]
   183  			s = s[1:]
   184  			n++
   185  		} else if node.prefix != "" && StringHasPrefixFold(s, node.prefix) {
   186  			n += len(node.prefix)
   187  			s = s[len(node.prefix):]
   188  			node = node.next
   189  		} else {
   190  			break
   191  		}
   192  	}
   193  	return
   194  }
   195  
   196  // genericReplacer is the fully generic algorithm.
   197  // It's used as a fallback when nothing faster can be used.
   198  type genericReplacer struct {
   199  	root trieNode
   200  	// tableSize is the size of a trie node's lookup table. It is the number
   201  	// of unique key bytes.
   202  	tableSize int
   203  	// mapping maps from key bytes to a dense index for trieNode.table.
   204  	mapping [256]byte
   205  }
   206  
   207  func makeGenericReplacer(oldnew []string) *genericReplacer {
   208  	r := new(genericReplacer)
   209  	// Find each byte used, then assign them each an index.
   210  	for i := 0; i < len(oldnew); i += 2 {
   211  		key := strings.ToLower(oldnew[i])
   212  		for j := 0; j < len(key); j++ {
   213  			r.mapping[key[j]] = 1
   214  		}
   215  	}
   216  
   217  	for _, b := range r.mapping {
   218  		r.tableSize += int(b)
   219  	}
   220  
   221  	var index byte
   222  	for i, b := range r.mapping {
   223  		if b == 0 {
   224  			r.mapping[i] = byte(r.tableSize)
   225  		} else {
   226  			r.mapping[i] = index
   227  			index++
   228  		}
   229  	}
   230  	// Ensure root node uses a lookup table (for performance).
   231  	r.root.table = make([]*trieNode, r.tableSize)
   232  
   233  	for i := 0; i < len(oldnew); i += 2 {
   234  		r.root.add(strings.ToLower(oldnew[i]), oldnew[i+1], len(oldnew)-i, r)
   235  	}
   236  	return r
   237  }
   238  
   239  type appendSliceWriter []byte
   240  
   241  // Write writes to the buffer to satisfy io.Writer.
   242  func (w *appendSliceWriter) Write(p []byte) (int, error) {
   243  	*w = append(*w, p...)
   244  	return len(p), nil
   245  }
   246  
   247  // WriteString writes to the buffer without string->[]byte->string allocations.
   248  func (w *appendSliceWriter) WriteString(s string) (int, error) {
   249  	*w = append(*w, s...)
   250  	return len(s), nil
   251  }
   252  
   253  type stringWriterIface interface {
   254  	WriteString(string) (int, error)
   255  }
   256  
   257  type stringWriter struct {
   258  	w io.Writer
   259  }
   260  
   261  func (w stringWriter) WriteString(s string) (int, error) {
   262  	return w.w.Write([]byte(s))
   263  }
   264  
   265  func getStringWriter(w io.Writer) stringWriterIface {
   266  	sw, ok := w.(stringWriterIface)
   267  	if !ok {
   268  		sw = stringWriter{w}
   269  	}
   270  	return sw
   271  }
   272  
   273  func (r *genericReplacer) Replace(s string) string {
   274  	buf := make(appendSliceWriter, 0, len(s))
   275  	r.WriteString(&buf, s)
   276  	return string(buf)
   277  }
   278  
   279  func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   280  	sw := getStringWriter(w)
   281  	var last, wn int
   282  	var prevMatchEmpty bool
   283  	for i := 0; i <= len(s); {
   284  		// Fast path: s[i] is not a prefix of any pattern.
   285  		if i != len(s) && r.root.priority == 0 {
   286  			index := int(r.mapping[ByteToLower(s[i])])
   287  			if index == r.tableSize || r.root.table[index] == nil {
   288  				i++
   289  				continue
   290  			}
   291  		}
   292  
   293  		// Ignore the empty match iff the previous loop found the empty match.
   294  		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
   295  		prevMatchEmpty = match && keylen == 0
   296  		if match {
   297  			orig := s[i : i+keylen]
   298  			switch CaseStyle(orig) {
   299  			case CaseUnknown:
   300  				// pretend we didn't match
   301  			//	i++
   302  			//	continue
   303  			case CaseUpper:
   304  				val = strings.ToUpper(val)
   305  			case CaseLower:
   306  				val = strings.ToLower(val)
   307  			case CaseTitle:
   308  				if len(val) < 2 {
   309  					val = strings.ToUpper(val)
   310  				} else {
   311  					val = strings.ToUpper(val[:1]) + strings.ToLower(val[1:])
   312  				}
   313  			}
   314  			wn, err = sw.WriteString(s[last:i])
   315  			n += wn
   316  			if err != nil {
   317  				return
   318  			}
   319  			//log.Printf("%d: Going to correct %q with %q", i, s[i:i+keylen], val)
   320  			wn, err = sw.WriteString(val)
   321  			n += wn
   322  			if err != nil {
   323  				return
   324  			}
   325  			i += keylen
   326  			last = i
   327  			continue
   328  		}
   329  		i++
   330  	}
   331  	if last != len(s) {
   332  		wn, err = sw.WriteString(s[last:])
   333  		n += wn
   334  	}
   335  	return
   336  }