bitbucket.org/ai69/amoy@v0.2.3/replace.go (about) 1 package amoy 2 3 import ( 4 "sort" 5 "strings" 6 "unicode" 7 8 "github.com/1set/gut/ystring" 9 ) 10 11 // ReplaceStringOptions indicates the options for the ReplaceString function. 12 type ReplaceStringOptions struct { 13 // Replacements is a map of old-new string pairs. 14 Replacements map[string]string 15 // CaseInsensitive indicates if the match should be case-insensitive. 16 CaseInsensitive bool 17 // ImitateResult indicates if the result should be the imitation (about case) of the original string. 18 ImitateResult bool 19 } 20 21 // ReplaceString replaces all occurrences of given strings with replacements, with options to make all the replacements 22 // case-insensitive and imitate the case of old string. 23 func ReplaceString(s string, opt ReplaceStringOptions) string { 24 // if given string is empty, or replacements is empty, just return the original string 25 if ystring.IsEmpty(s) || len(opt.Replacements) == 0 { 26 return s 27 } 28 // extract replacement pairs from map 29 pairs := make([]*replacePair, 0, len(opt.Replacements)) 30 for so, sn := range opt.Replacements { 31 // if old string is empty, just skip it 32 if ystring.IsNotEmpty(so) { 33 pairs = append(pairs, &replacePair{so, sn}) 34 } 35 } 36 // quit if replacement pairs are actually empty 37 if len(pairs) == 0 { 38 return s 39 } 40 singlePair := len(pairs) == 1 41 42 // sort replacement map by length pairs in order to handle the longest match first 43 sort.SliceStable(pairs, func(i, j int) bool { 44 return len(pairs[i].Old) > len(pairs[j].Old) 45 }) 46 47 // use built-in replace method if it's case-sensitive and no imitation is required 48 if !opt.CaseInsensitive && !opt.ImitateResult { 49 if singlePair { 50 // if there is only one replacement pair, use simple built-in replace method 51 return strings.ReplaceAll(s, pairs[0].Old, pairs[0].New) 52 } 53 // if there are more than one replacement pairs, build a Replacer object and use it 54 pl := make([]string, 0, 2*len(pairs)) 55 for _, p := range pairs { 56 pl = append(pl, p.Old, p.New) 57 } 58 rp := strings.NewReplacer(pl...) 59 return rp.Replace(s) 60 } 61 62 // for custom replacements, use various methods for single pair or multiple pairs 63 if singlePair { 64 return replaceSingleString(s, pairs[0], opt.CaseInsensitive, opt.ImitateResult) 65 } 66 return replaceMultipleString(s, pairs, opt.CaseInsensitive, opt.ImitateResult) 67 } 68 69 // replacePair is a struct indicates replacement with two fields, Old and New, both of which are strings. 70 type replacePair struct { 71 Old string 72 New string 73 } 74 75 // replaceSingleString replaces all occurrences of a given string with another string, with options to make all the replacements. 76 func replaceSingleString(s string, rp *replacePair, ignoreCase, imitateOld bool) string { 77 // ignore empty pair 78 if rp == nil || ystring.IsEmpty(rp.Old) { 79 return s 80 } 81 // extract olds and news strings 82 olds, news := rp.Old, rp.New 83 // get full string to compare 84 var tmp string 85 if ignoreCase { 86 tmp = strings.ToLower(s) 87 olds = strings.ToLower(olds) 88 } else { 89 tmp = s 90 } 91 // compare and replace until no match found 92 var ( 93 res strings.Builder 94 pos = 0 // absolute position in original string 95 ) 96 for { 97 // find the match 98 matchBegin := strings.Index(tmp, olds) 99 if matchBegin < 0 { 100 // copy the rest and quit for no more matches 101 res.WriteString(s[pos:]) 102 break 103 } 104 if matchBegin > 0 { 105 // copy the part before the match 106 res.WriteString(s[pos : pos+matchBegin]) 107 } 108 // replace the matched string with exactly new or imitate the old string 109 matchEnd := matchBegin + len(olds) 110 if imitateOld { 111 newr := imitateString(s[pos+matchBegin:pos+matchEnd], news) 112 res.WriteString(newr) 113 } else { 114 res.WriteString(news) 115 } 116 // truncate the processed part 117 pos += matchEnd 118 tmp = tmp[matchEnd:] 119 } 120 return res.String() 121 } 122 123 // replaceMultipleString replaces all occurrences of given strings with other strings, with options to make all the replacements. 124 func replaceMultipleString(s string, rps []*replacePair, ignoreCase, imitateOld bool) string { 125 // ignore empty pairs 126 if rps == nil || len(rps) == 0 { 127 return s 128 } 129 // get full string to compare 130 var ss string 131 if ignoreCase { 132 ss = strings.ToLower(s) 133 } else { 134 ss = s 135 } 136 // compare and replace until no match found 137 type charRange struct { 138 Low int 139 High int 140 Match int 141 } 142 matchRanges := make([]*charRange, 0, len(rps)) 143 leftRanges := make([]*charRange, 0, len(rps)) 144 leftRanges = append(leftRanges, &charRange{0, len(ss), 0}) 145 // for each replacement pair, record the matched and left char ranges 146 for pairIndex, pair := range rps { 147 // ignore pair with empty old string 148 var old string 149 if ignoreCase { 150 old = strings.ToLower(pair.Old) 151 } else { 152 old = pair.Old 153 } 154 if ystring.IsEmpty(old) { 155 continue 156 } 157 newLeftRanges := make([]*charRange, 0, len(leftRanges)) 158 // for each left range 159 for _, lr := range leftRanges { 160 low, high := lr.Low, lr.High 161 for { 162 // find the first longest match for current range 163 matchBegin := strings.Index(ss[low:high], old) 164 if matchBegin < 0 { 165 newLeftRanges = append(newLeftRanges, &charRange{low, high, -1}) 166 break 167 } 168 if matchBegin > 0 { 169 // skip if the match starts at the beginning of the range 170 newLeftRanges = append(newLeftRanges, &charRange{low, low + matchBegin, -1}) 171 } 172 // record the range, find in the rest 173 matchEnd := matchBegin + len(old) 174 matchRanges = append(matchRanges, &charRange{low + matchBegin, low + matchEnd, pairIndex}) 175 low += matchEnd 176 } 177 } 178 // updates the left range 179 leftRanges = newLeftRanges 180 } 181 // sort by the low index of each range 182 sort.SliceStable(matchRanges, func(i, j int) bool { 183 return matchRanges[i].Low < matchRanges[j].Low 184 }) 185 // copy or replace 186 var ( 187 res strings.Builder 188 mrCur, lrCur *charRange 189 mrIdx, lrIdx, pos = 0, 0, 0 190 ) 191 for { 192 if len(matchRanges) > mrIdx && len(leftRanges) > lrIdx { 193 // first part 194 matchPos := matchRanges[mrIdx].Low 195 leftPos := leftRanges[lrIdx].Low 196 if pos == matchPos { 197 mrCur = matchRanges[mrIdx] 198 pos = mrCur.High 199 mrIdx++ 200 } else if pos == leftPos { 201 lrCur = leftRanges[lrIdx] 202 pos = lrCur.High 203 lrIdx++ 204 } 205 } else if len(leftRanges) > lrIdx { 206 // only left is left 207 lrCur = leftRanges[lrIdx] 208 pos = lrCur.High 209 lrIdx++ 210 } else if len(matchRanges) > mrIdx { 211 // only match is left 212 mrCur = matchRanges[mrIdx] 213 pos = mrCur.High 214 mrIdx++ 215 } else { 216 // all the range are handled 217 break 218 } 219 220 if lrCur != nil { 221 // copy the left part directly 222 res.WriteString(s[lrCur.Low:lrCur.High]) 223 lrCur = nil 224 } else if mrCur != nil { 225 // replace the matched string with exactly new or imitate the old string 226 if imitateOld { 227 newr := imitateString(s[mrCur.Low:mrCur.High], rps[mrCur.Match].New) 228 res.WriteString(newr) 229 } else { 230 res.WriteString(rps[mrCur.Match].New) 231 } 232 mrCur = nil 233 } 234 } 235 // result 236 return res.String() 237 } 238 239 type stringCaseType uint8 240 241 const ( 242 stringCaseMisc stringCaseType = iota 243 stringCaseLower 244 stringCaseUpper 245 stringCaseTitle 246 ) 247 248 // imitateString returns a dest string imitating src string. 249 // if the source string is lowercase, lowercase the destination string; 250 // if the source string is uppercase, uppercase the destination string; 251 // if the source string is titlecase, titlecase the destination string; 252 // otherwise, do nothing. 253 func imitateString(old, new string) string { 254 switch getStringCaseType(old) { 255 case stringCaseLower: 256 return strings.ToLower(new) 257 case stringCaseUpper: 258 return strings.ToUpper(new) 259 case stringCaseTitle: 260 return strings.Title(new) 261 } 262 return new 263 } 264 265 func getStringCaseType(s string) stringCaseType { 266 const defaultFlag = uint8(0b111) // first 3 bits: 2-title, 1-upper, 0-lower 267 caseFlag := defaultFlag 268 wordStart := true 269 for _, r := range s { 270 if unicode.IsLower(r) { 271 if wordStart { 272 // start with lower case, it can't be title or upper case 273 caseFlag &= 0b001 274 wordStart = false 275 } else { 276 // has lower case after start, it can't be upper case 277 caseFlag &= 0b101 278 } 279 } else if unicode.IsUpper(r) { 280 if wordStart { 281 // start with upper case, it can't be lower 282 caseFlag &= 0b110 283 wordStart = false 284 } else { 285 // has upper case after start, it can't be title or lower case 286 caseFlag &= 0b010 287 } 288 } else if unicode.IsSpace(r) || r == '-' || r == '.' { 289 // space indicates the start of a new word 290 wordStart = true 291 } 292 if caseFlag == 0 { 293 // all the flags are 0, it's misc 294 return stringCaseMisc 295 } 296 } 297 // make the judgement 298 switch caseFlag { 299 case 0b001: 300 return stringCaseLower 301 case 0b110: 302 fallthrough 303 case 0b010: 304 return stringCaseUpper 305 case 0b100: 306 return stringCaseTitle 307 default: 308 return stringCaseMisc 309 } 310 }