gopkg.in/alecthomas/gometalinter.v3@v3.0.0/_linters/src/github.com/client9/misspell/replace.go (about)

     1  package misspell
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"io"
     7  	"regexp"
     8  	"strings"
     9  	"text/scanner"
    10  )
    11  
    12  func max(x, y int) int {
    13  	if x > y {
    14  		return x
    15  	}
    16  	return y
    17  }
    18  
    19  func inArray(haystack []string, needle string) bool {
    20  	for _, word := range haystack {
    21  		if needle == word {
    22  			return true
    23  		}
    24  	}
    25  	return false
    26  }
    27  
    28  var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`)
    29  
    30  // Diff is datastructure showing what changed in a single line
    31  type Diff struct {
    32  	Filename  string
    33  	FullLine  string
    34  	Line      int
    35  	Column    int
    36  	Original  string
    37  	Corrected string
    38  }
    39  
    40  // Replacer is the main struct for spelling correction
    41  type Replacer struct {
    42  	Replacements []string
    43  	Debug        bool
    44  	engine       *StringReplacer
    45  	corrected    map[string]string
    46  }
    47  
    48  // New creates a new default Replacer using the main rule list
    49  func New() *Replacer {
    50  	r := Replacer{
    51  		Replacements: DictMain,
    52  	}
    53  	r.Compile()
    54  	return &r
    55  }
    56  
    57  // RemoveRule deletes existings rules.
    58  // TODO: make inplace to save memory
    59  func (r *Replacer) RemoveRule(ignore []string) {
    60  	newwords := make([]string, 0, len(r.Replacements))
    61  	for i := 0; i < len(r.Replacements); i += 2 {
    62  		if inArray(ignore, r.Replacements[i]) {
    63  			continue
    64  		}
    65  		newwords = append(newwords, r.Replacements[i:i+2]...)
    66  	}
    67  	r.engine = nil
    68  	r.Replacements = newwords
    69  }
    70  
    71  // AddRuleList appends new rules.
    72  // Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....]
    73  // Note: does not check for duplictes
    74  func (r *Replacer) AddRuleList(additions []string) {
    75  	r.engine = nil
    76  	r.Replacements = append(r.Replacements, additions...)
    77  }
    78  
    79  // Compile compiles the rules.  Required before using the Replace functions
    80  func (r *Replacer) Compile() {
    81  
    82  	r.corrected = make(map[string]string, len(r.Replacements)/2)
    83  	for i := 0; i < len(r.Replacements); i += 2 {
    84  		r.corrected[r.Replacements[i]] = r.Replacements[i+1]
    85  	}
    86  	r.engine = NewStringReplacer(r.Replacements...)
    87  }
    88  
    89  /*
    90  line1 and line2 are different
    91  extract words from each line1
    92  
    93  replace word -> newword
    94  if word == new-word
    95    continue
    96  if new-word in list of replacements
    97    continue
    98  new word not original, and not in list of replacements
    99    some substring got mixed up.  UNdo
   100  */
   101  func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) {
   102  	first := 0
   103  	redacted := RemoveNotWords(s)
   104  
   105  	idx := wordRegexp.FindAllStringIndex(redacted, -1)
   106  	for _, ab := range idx {
   107  		word := s[ab[0]:ab[1]]
   108  		newword := r.engine.Replace(word)
   109  		if newword == word {
   110  			// no replacement done
   111  			continue
   112  		}
   113  
   114  		// ignore camelCase words
   115  		// https://github.com/client9/misspell/issues/113
   116  		if CaseStyle(word) == CaseUnknown {
   117  			continue
   118  		}
   119  
   120  		if StringEqualFold(r.corrected[strings.ToLower(word)], newword) {
   121  			// word got corrected into something we know
   122  			io.WriteString(buf, s[first:ab[0]])
   123  			io.WriteString(buf, newword)
   124  			first = ab[1]
   125  			next(Diff{
   126  				FullLine:  s,
   127  				Line:      lineNum,
   128  				Original:  word,
   129  				Corrected: newword,
   130  				Column:    ab[0],
   131  			})
   132  			continue
   133  		}
   134  		// Word got corrected into something unknown. Ignore it
   135  	}
   136  	io.WriteString(buf, s[first:])
   137  }
   138  
   139  // ReplaceGo is a specialized routine for correcting Golang source
   140  // files.  Currently only checks comments, not identifiers for
   141  // spelling.
   142  func (r *Replacer) ReplaceGo(input string) (string, []Diff) {
   143  	var s scanner.Scanner
   144  	s.Init(strings.NewReader(input))
   145  	s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments
   146  	lastPos := 0
   147  	output := ""
   148  Loop:
   149  	for {
   150  		switch s.Scan() {
   151  		case scanner.Comment:
   152  			origComment := s.TokenText()
   153  			newComment := r.engine.Replace(origComment)
   154  
   155  			if origComment != newComment {
   156  				// s.Pos().Offset is the end of the current token
   157  				// subtract len(origComment) to get the start of the token
   158  				offset := s.Pos().Offset
   159  				output = output + input[lastPos:offset-len(origComment)] + newComment
   160  				lastPos = offset
   161  			}
   162  		case scanner.EOF:
   163  			break Loop
   164  		}
   165  	}
   166  
   167  	if lastPos == 0 {
   168  		// no changes, no copies
   169  		return input, nil
   170  	}
   171  	if lastPos < len(input) {
   172  		output = output + input[lastPos:]
   173  	}
   174  	diffs := make([]Diff, 0, 8)
   175  	buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
   176  	// faster that making a bytes.Buffer and bufio.ReadString
   177  	outlines := strings.SplitAfter(output, "\n")
   178  	inlines := strings.SplitAfter(input, "\n")
   179  	for i := 0; i < len(inlines); i++ {
   180  		if inlines[i] == outlines[i] {
   181  			buf.WriteString(outlines[i])
   182  			continue
   183  		}
   184  		r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
   185  			diffs = append(diffs, d)
   186  		})
   187  	}
   188  
   189  	return buf.String(), diffs
   190  
   191  }
   192  
   193  // Replace is corrects misspellings in input, returning corrected version
   194  //  along with a list of diffs.
   195  func (r *Replacer) Replace(input string) (string, []Diff) {
   196  	output := r.engine.Replace(input)
   197  	if input == output {
   198  		return input, nil
   199  	}
   200  	diffs := make([]Diff, 0, 8)
   201  	buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
   202  	// faster that making a bytes.Buffer and bufio.ReadString
   203  	outlines := strings.SplitAfter(output, "\n")
   204  	inlines := strings.SplitAfter(input, "\n")
   205  	for i := 0; i < len(inlines); i++ {
   206  		if inlines[i] == outlines[i] {
   207  			buf.WriteString(outlines[i])
   208  			continue
   209  		}
   210  		r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
   211  			diffs = append(diffs, d)
   212  		})
   213  	}
   214  
   215  	return buf.String(), diffs
   216  }
   217  
   218  // ReplaceReader applies spelling corrections to a reader stream.  Diffs are
   219  // emitted through a callback.
   220  func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error {
   221  	var (
   222  		err     error
   223  		line    string
   224  		lineNum int
   225  	)
   226  	reader := bufio.NewReader(raw)
   227  	for err == nil {
   228  		lineNum++
   229  		line, err = reader.ReadString('\n')
   230  
   231  		// if it's EOF, then line has the last line
   232  		// don't like the check of err here and
   233  		// in for loop
   234  		if err != nil && err != io.EOF {
   235  			return err
   236  		}
   237  		// easily 5x faster than regexp+map
   238  		if line == r.engine.Replace(line) {
   239  			io.WriteString(w, line)
   240  			continue
   241  		}
   242  		// but it can be inaccurate, so we need to double check
   243  		r.recheckLine(line, lineNum, w, next)
   244  	}
   245  	return nil
   246  }