gopkg.in/alecthomas/gometalinter.v3@v3.0.0/_linters/src/github.com/client9/misspell/replace.go (about) 1 package misspell 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 "regexp" 8 "strings" 9 "text/scanner" 10 ) 11 12 func max(x, y int) int { 13 if x > y { 14 return x 15 } 16 return y 17 } 18 19 func inArray(haystack []string, needle string) bool { 20 for _, word := range haystack { 21 if needle == word { 22 return true 23 } 24 } 25 return false 26 } 27 28 var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`) 29 30 // Diff is datastructure showing what changed in a single line 31 type Diff struct { 32 Filename string 33 FullLine string 34 Line int 35 Column int 36 Original string 37 Corrected string 38 } 39 40 // Replacer is the main struct for spelling correction 41 type Replacer struct { 42 Replacements []string 43 Debug bool 44 engine *StringReplacer 45 corrected map[string]string 46 } 47 48 // New creates a new default Replacer using the main rule list 49 func New() *Replacer { 50 r := Replacer{ 51 Replacements: DictMain, 52 } 53 r.Compile() 54 return &r 55 } 56 57 // RemoveRule deletes existings rules. 58 // TODO: make inplace to save memory 59 func (r *Replacer) RemoveRule(ignore []string) { 60 newwords := make([]string, 0, len(r.Replacements)) 61 for i := 0; i < len(r.Replacements); i += 2 { 62 if inArray(ignore, r.Replacements[i]) { 63 continue 64 } 65 newwords = append(newwords, r.Replacements[i:i+2]...) 66 } 67 r.engine = nil 68 r.Replacements = newwords 69 } 70 71 // AddRuleList appends new rules. 72 // Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....] 73 // Note: does not check for duplictes 74 func (r *Replacer) AddRuleList(additions []string) { 75 r.engine = nil 76 r.Replacements = append(r.Replacements, additions...) 77 } 78 79 // Compile compiles the rules. Required before using the Replace functions 80 func (r *Replacer) Compile() { 81 82 r.corrected = make(map[string]string, len(r.Replacements)/2) 83 for i := 0; i < len(r.Replacements); i += 2 { 84 r.corrected[r.Replacements[i]] = r.Replacements[i+1] 85 } 86 r.engine = NewStringReplacer(r.Replacements...) 87 } 88 89 /* 90 line1 and line2 are different 91 extract words from each line1 92 93 replace word -> newword 94 if word == new-word 95 continue 96 if new-word in list of replacements 97 continue 98 new word not original, and not in list of replacements 99 some substring got mixed up. UNdo 100 */ 101 func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) { 102 first := 0 103 redacted := RemoveNotWords(s) 104 105 idx := wordRegexp.FindAllStringIndex(redacted, -1) 106 for _, ab := range idx { 107 word := s[ab[0]:ab[1]] 108 newword := r.engine.Replace(word) 109 if newword == word { 110 // no replacement done 111 continue 112 } 113 114 // ignore camelCase words 115 // https://github.com/client9/misspell/issues/113 116 if CaseStyle(word) == CaseUnknown { 117 continue 118 } 119 120 if StringEqualFold(r.corrected[strings.ToLower(word)], newword) { 121 // word got corrected into something we know 122 io.WriteString(buf, s[first:ab[0]]) 123 io.WriteString(buf, newword) 124 first = ab[1] 125 next(Diff{ 126 FullLine: s, 127 Line: lineNum, 128 Original: word, 129 Corrected: newword, 130 Column: ab[0], 131 }) 132 continue 133 } 134 // Word got corrected into something unknown. Ignore it 135 } 136 io.WriteString(buf, s[first:]) 137 } 138 139 // ReplaceGo is a specialized routine for correcting Golang source 140 // files. Currently only checks comments, not identifiers for 141 // spelling. 142 func (r *Replacer) ReplaceGo(input string) (string, []Diff) { 143 var s scanner.Scanner 144 s.Init(strings.NewReader(input)) 145 s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments 146 lastPos := 0 147 output := "" 148 Loop: 149 for { 150 switch s.Scan() { 151 case scanner.Comment: 152 origComment := s.TokenText() 153 newComment := r.engine.Replace(origComment) 154 155 if origComment != newComment { 156 // s.Pos().Offset is the end of the current token 157 // subtract len(origComment) to get the start of the token 158 offset := s.Pos().Offset 159 output = output + input[lastPos:offset-len(origComment)] + newComment 160 lastPos = offset 161 } 162 case scanner.EOF: 163 break Loop 164 } 165 } 166 167 if lastPos == 0 { 168 // no changes, no copies 169 return input, nil 170 } 171 if lastPos < len(input) { 172 output = output + input[lastPos:] 173 } 174 diffs := make([]Diff, 0, 8) 175 buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100)) 176 // faster that making a bytes.Buffer and bufio.ReadString 177 outlines := strings.SplitAfter(output, "\n") 178 inlines := strings.SplitAfter(input, "\n") 179 for i := 0; i < len(inlines); i++ { 180 if inlines[i] == outlines[i] { 181 buf.WriteString(outlines[i]) 182 continue 183 } 184 r.recheckLine(inlines[i], i+1, buf, func(d Diff) { 185 diffs = append(diffs, d) 186 }) 187 } 188 189 return buf.String(), diffs 190 191 } 192 193 // Replace is corrects misspellings in input, returning corrected version 194 // along with a list of diffs. 195 func (r *Replacer) Replace(input string) (string, []Diff) { 196 output := r.engine.Replace(input) 197 if input == output { 198 return input, nil 199 } 200 diffs := make([]Diff, 0, 8) 201 buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100)) 202 // faster that making a bytes.Buffer and bufio.ReadString 203 outlines := strings.SplitAfter(output, "\n") 204 inlines := strings.SplitAfter(input, "\n") 205 for i := 0; i < len(inlines); i++ { 206 if inlines[i] == outlines[i] { 207 buf.WriteString(outlines[i]) 208 continue 209 } 210 r.recheckLine(inlines[i], i+1, buf, func(d Diff) { 211 diffs = append(diffs, d) 212 }) 213 } 214 215 return buf.String(), diffs 216 } 217 218 // ReplaceReader applies spelling corrections to a reader stream. Diffs are 219 // emitted through a callback. 220 func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error { 221 var ( 222 err error 223 line string 224 lineNum int 225 ) 226 reader := bufio.NewReader(raw) 227 for err == nil { 228 lineNum++ 229 line, err = reader.ReadString('\n') 230 231 // if it's EOF, then line has the last line 232 // don't like the check of err here and 233 // in for loop 234 if err != nil && err != io.EOF { 235 return err 236 } 237 // easily 5x faster than regexp+map 238 if line == r.engine.Replace(line) { 239 io.WriteString(w, line) 240 continue 241 } 242 // but it can be inaccurate, so we need to double check 243 r.recheckLine(line, lineNum, w, next) 244 } 245 return nil 246 }