github.com/serversong/goreporter@v0.0.0-20200325104552-3cfaf44fd178/linters/spellcheck/misspell/replace.go (about) 1 package misspell 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 "regexp" 8 "strings" 9 ) 10 11 func max(x, y int) int { 12 if x > y { 13 return x 14 } 15 return y 16 } 17 18 func inArray(haystack []string, needle string) bool { 19 for _, word := range haystack { 20 if needle == word { 21 return true 22 } 23 } 24 return false 25 } 26 27 var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`) 28 29 // Diff is datastructure showing what changed in a single line 30 type Diff struct { 31 Filename string 32 FullLine string 33 Line int 34 Column int 35 Original string 36 Corrected string 37 } 38 39 // Replacer is the main struct for spelling correction 40 type Replacer struct { 41 Replacements []string 42 Debug bool 43 engine *strings.Replacer 44 corrected map[string]string 45 } 46 47 // New creates a new default Replacer using the main rule list 48 func New() *Replacer { 49 r := Replacer{ 50 Replacements: DictMain, 51 } 52 r.Compile() 53 return &r 54 } 55 56 // RemoveRule deletes existings rules. 57 // TODO: make inplace to save memory 58 func (r *Replacer) RemoveRule(ignore []string) { 59 newwords := make([]string, 0, len(r.Replacements)) 60 for i := 0; i < len(r.Replacements); i += 2 { 61 if inArray(ignore, r.Replacements[i]) { 62 continue 63 } 64 newwords = append(newwords, r.Replacements[i:i+2]...) 65 } 66 r.engine = nil 67 r.Replacements = newwords 68 } 69 70 // AddRuleList appends new rules. 71 // Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....] 72 // Note: does not check for duplictes 73 func (r *Replacer) AddRuleList(additions []string) { 74 r.engine = nil 75 r.Replacements = append(r.Replacements, additions...) 76 } 77 78 // Compile compiles the rules. Required before using the Replace functions 79 func (r *Replacer) Compile() { 80 81 r.corrected = make(map[string]string, len(r.Replacements)/2) 82 for i := 0; i < len(r.Replacements); i += 2 { 83 r.corrected[r.Replacements[i]] = r.Replacements[i+1] 84 } 85 r.engine = strings.NewReplacer(r.Replacements...) 86 } 87 88 /* 89 line1 and line2 are different 90 extract words from each line1 91 92 replace word -> newword 93 if word == new-word 94 continue 95 if new-word in list of replacements 96 continue 97 new word not original, and not in list of replacements 98 some substring got mixed up. UNdo 99 */ 100 func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) { 101 first := 0 102 redacted := RemoveNotWords(s) 103 104 idx := wordRegexp.FindAllStringIndex(redacted, -1) 105 for _, ab := range idx { 106 word := s[ab[0]:ab[1]] 107 newword := r.engine.Replace(word) 108 if newword == word { 109 // no replacement done 110 continue 111 } 112 if r.corrected[word] == newword { 113 // word got corrected into something we know 114 io.WriteString(buf, s[first:ab[0]]) 115 io.WriteString(buf, newword) 116 first = ab[1] 117 next(Diff{ 118 FullLine: s, 119 Line: lineNum, 120 Original: word, 121 Corrected: newword, 122 Column: ab[0], 123 }) 124 continue 125 } 126 // Word got corrected into something unknown. Ignore it 127 } 128 io.WriteString(buf, s[first:]) 129 } 130 131 // Replace is corrects misspellings in input, returning corrected version 132 // along with a list of diffs. 133 func (r *Replacer) Replace(input string) (string, []Diff) { 134 output := r.engine.Replace(input) 135 if input == output { 136 return input, nil 137 } 138 diffs := make([]Diff, 0, 8) 139 buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100)) 140 // faster that making a bytes.Buffer and bufio.ReadString 141 outlines := strings.SplitAfter(output, "\n") 142 inlines := strings.SplitAfter(input, "\n") 143 for i := 0; i < len(inlines); i++ { 144 if inlines[i] == outlines[i] { 145 buf.WriteString(outlines[i]) 146 continue 147 } 148 r.recheckLine(inlines[i], i+1, buf, func(d Diff) { 149 diffs = append(diffs, d) 150 }) 151 } 152 153 return buf.String(), diffs 154 } 155 156 // ReplaceReader applies spelling corrections to a reader stream. Diffs are 157 // emitted through a callback. 158 func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error { 159 var ( 160 err error 161 line string 162 lineNum int 163 ) 164 reader := bufio.NewReader(raw) 165 for err == nil { 166 lineNum++ 167 line, err = reader.ReadString('\n') 168 169 // if it's EOF, then line has the last line 170 // don't like the check of err here and 171 // in for loop 172 if err != nil && err != io.EOF { 173 return err 174 } 175 // easily 5x faster than regexp+map 176 if line == r.engine.Replace(line) { 177 io.WriteString(w, line) 178 continue 179 } 180 // but it can be inaccurate, so we need to double check 181 r.recheckLine(line, lineNum, w, next) 182 } 183 return nil 184 }