github.com/serversong/goreporter@v0.0.0-20200325104552-3cfaf44fd178/linters/spellcheck/misspell/replace.go (about)

     1  package misspell
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"io"
     7  	"regexp"
     8  	"strings"
     9  )
    10  
    11  func max(x, y int) int {
    12  	if x > y {
    13  		return x
    14  	}
    15  	return y
    16  }
    17  
    18  func inArray(haystack []string, needle string) bool {
    19  	for _, word := range haystack {
    20  		if needle == word {
    21  			return true
    22  		}
    23  	}
    24  	return false
    25  }
    26  
    27  var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`)
    28  
    29  // Diff is datastructure showing what changed in a single line
    30  type Diff struct {
    31  	Filename  string
    32  	FullLine  string
    33  	Line      int
    34  	Column    int
    35  	Original  string
    36  	Corrected string
    37  }
    38  
    39  // Replacer is the main struct for spelling correction
    40  type Replacer struct {
    41  	Replacements []string
    42  	Debug        bool
    43  	engine       *strings.Replacer
    44  	corrected    map[string]string
    45  }
    46  
    47  // New creates a new default Replacer using the main rule list
    48  func New() *Replacer {
    49  	r := Replacer{
    50  		Replacements: DictMain,
    51  	}
    52  	r.Compile()
    53  	return &r
    54  }
    55  
    56  // RemoveRule deletes existings rules.
    57  // TODO: make inplace to save memory
    58  func (r *Replacer) RemoveRule(ignore []string) {
    59  	newwords := make([]string, 0, len(r.Replacements))
    60  	for i := 0; i < len(r.Replacements); i += 2 {
    61  		if inArray(ignore, r.Replacements[i]) {
    62  			continue
    63  		}
    64  		newwords = append(newwords, r.Replacements[i:i+2]...)
    65  	}
    66  	r.engine = nil
    67  	r.Replacements = newwords
    68  }
    69  
    70  // AddRuleList appends new rules.
    71  // Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....]
    72  // Note: does not check for duplictes
    73  func (r *Replacer) AddRuleList(additions []string) {
    74  	r.engine = nil
    75  	r.Replacements = append(r.Replacements, additions...)
    76  }
    77  
    78  // Compile compiles the rules.  Required before using the Replace functions
    79  func (r *Replacer) Compile() {
    80  
    81  	r.corrected = make(map[string]string, len(r.Replacements)/2)
    82  	for i := 0; i < len(r.Replacements); i += 2 {
    83  		r.corrected[r.Replacements[i]] = r.Replacements[i+1]
    84  	}
    85  	r.engine = strings.NewReplacer(r.Replacements...)
    86  }
    87  
    88  /*
    89  line1 and line2 are different
    90  extract words from each line1
    91  
    92  replace word -> newword
    93  if word == new-word
    94    continue
    95  if new-word in list of replacements
    96    continue
    97  new word not original, and not in list of replacements
    98    some substring got mixed up.  UNdo
    99  */
   100  func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) {
   101  	first := 0
   102  	redacted := RemoveNotWords(s)
   103  
   104  	idx := wordRegexp.FindAllStringIndex(redacted, -1)
   105  	for _, ab := range idx {
   106  		word := s[ab[0]:ab[1]]
   107  		newword := r.engine.Replace(word)
   108  		if newword == word {
   109  			// no replacement done
   110  			continue
   111  		}
   112  		if r.corrected[word] == newword {
   113  			// word got corrected into something we know
   114  			io.WriteString(buf, s[first:ab[0]])
   115  			io.WriteString(buf, newword)
   116  			first = ab[1]
   117  			next(Diff{
   118  				FullLine:  s,
   119  				Line:      lineNum,
   120  				Original:  word,
   121  				Corrected: newword,
   122  				Column:    ab[0],
   123  			})
   124  			continue
   125  		}
   126  		// Word got corrected into something unknown. Ignore it
   127  	}
   128  	io.WriteString(buf, s[first:])
   129  }
   130  
   131  // Replace is corrects misspellings in input, returning corrected version
   132  //  along with a list of diffs.
   133  func (r *Replacer) Replace(input string) (string, []Diff) {
   134  	output := r.engine.Replace(input)
   135  	if input == output {
   136  		return input, nil
   137  	}
   138  	diffs := make([]Diff, 0, 8)
   139  	buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
   140  	// faster that making a bytes.Buffer and bufio.ReadString
   141  	outlines := strings.SplitAfter(output, "\n")
   142  	inlines := strings.SplitAfter(input, "\n")
   143  	for i := 0; i < len(inlines); i++ {
   144  		if inlines[i] == outlines[i] {
   145  			buf.WriteString(outlines[i])
   146  			continue
   147  		}
   148  		r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
   149  			diffs = append(diffs, d)
   150  		})
   151  	}
   152  
   153  	return buf.String(), diffs
   154  }
   155  
   156  // ReplaceReader applies spelling corrections to a reader stream.  Diffs are
   157  // emitted through a callback.
   158  func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error {
   159  	var (
   160  		err     error
   161  		line    string
   162  		lineNum int
   163  	)
   164  	reader := bufio.NewReader(raw)
   165  	for err == nil {
   166  		lineNum++
   167  		line, err = reader.ReadString('\n')
   168  
   169  		// if it's EOF, then line has the last line
   170  		// don't like the check of err here and
   171  		// in for loop
   172  		if err != nil && err != io.EOF {
   173  			return err
   174  		}
   175  		// easily 5x faster than regexp+map
   176  		if line == r.engine.Replace(line) {
   177  			io.WriteString(w, line)
   178  			continue
   179  		}
   180  		// but it can be inaccurate, so we need to double check
   181  		r.recheckLine(line, lineNum, w, next)
   182  	}
   183  	return nil
   184  }