bitbucket.org/ai69/amoy@v0.2.3/replace.go (about)

     1  package amoy
     2  
     3  import (
     4  	"sort"
     5  	"strings"
     6  	"unicode"
     7  
     8  	"github.com/1set/gut/ystring"
     9  )
    10  
    11  // ReplaceStringOptions indicates the options for the ReplaceString function.
    12  type ReplaceStringOptions struct {
    13  	// Replacements is a map of old-new string pairs.
    14  	Replacements map[string]string
    15  	// CaseInsensitive indicates if the match should be case-insensitive.
    16  	CaseInsensitive bool
    17  	// ImitateResult indicates if the result should be the imitation (about case) of the original string.
    18  	ImitateResult bool
    19  }
    20  
    21  // ReplaceString replaces all occurrences of given strings with replacements, with options to make all the replacements
    22  // case-insensitive and imitate the case of old string.
    23  func ReplaceString(s string, opt ReplaceStringOptions) string {
    24  	// if given string is empty, or replacements is empty, just return the original string
    25  	if ystring.IsEmpty(s) || len(opt.Replacements) == 0 {
    26  		return s
    27  	}
    28  	// extract replacement pairs from map
    29  	pairs := make([]*replacePair, 0, len(opt.Replacements))
    30  	for so, sn := range opt.Replacements {
    31  		// if old string is empty, just skip it
    32  		if ystring.IsNotEmpty(so) {
    33  			pairs = append(pairs, &replacePair{so, sn})
    34  		}
    35  	}
    36  	// quit if replacement pairs are actually empty
    37  	if len(pairs) == 0 {
    38  		return s
    39  	}
    40  	singlePair := len(pairs) == 1
    41  
    42  	// sort replacement map by length pairs in order to handle the longest match first
    43  	sort.SliceStable(pairs, func(i, j int) bool {
    44  		return len(pairs[i].Old) > len(pairs[j].Old)
    45  	})
    46  
    47  	// use built-in replace method if it's case-sensitive and no imitation is required
    48  	if !opt.CaseInsensitive && !opt.ImitateResult {
    49  		if singlePair {
    50  			// if there is only one replacement pair, use simple built-in replace method
    51  			return strings.ReplaceAll(s, pairs[0].Old, pairs[0].New)
    52  		}
    53  		// if there are more than one replacement pairs, build a Replacer object and use it
    54  		pl := make([]string, 0, 2*len(pairs))
    55  		for _, p := range pairs {
    56  			pl = append(pl, p.Old, p.New)
    57  		}
    58  		rp := strings.NewReplacer(pl...)
    59  		return rp.Replace(s)
    60  	}
    61  
    62  	// for custom replacements, use various methods for single pair or multiple pairs
    63  	if singlePair {
    64  		return replaceSingleString(s, pairs[0], opt.CaseInsensitive, opt.ImitateResult)
    65  	}
    66  	return replaceMultipleString(s, pairs, opt.CaseInsensitive, opt.ImitateResult)
    67  }
    68  
    69  // replacePair is a struct indicates replacement with two fields, Old and New, both of which are strings.
    70  type replacePair struct {
    71  	Old string
    72  	New string
    73  }
    74  
    75  // replaceSingleString replaces all occurrences of a given string with another string, with options to make all the replacements.
    76  func replaceSingleString(s string, rp *replacePair, ignoreCase, imitateOld bool) string {
    77  	// ignore empty pair
    78  	if rp == nil || ystring.IsEmpty(rp.Old) {
    79  		return s
    80  	}
    81  	// extract olds and news strings
    82  	olds, news := rp.Old, rp.New
    83  	// get full string to compare
    84  	var tmp string
    85  	if ignoreCase {
    86  		tmp = strings.ToLower(s)
    87  		olds = strings.ToLower(olds)
    88  	} else {
    89  		tmp = s
    90  	}
    91  	// compare and replace until no match found
    92  	var (
    93  		res strings.Builder
    94  		pos = 0 // absolute position in original string
    95  	)
    96  	for {
    97  		// find the match
    98  		matchBegin := strings.Index(tmp, olds)
    99  		if matchBegin < 0 {
   100  			// copy the rest and quit for no more matches
   101  			res.WriteString(s[pos:])
   102  			break
   103  		}
   104  		if matchBegin > 0 {
   105  			// copy the part before the match
   106  			res.WriteString(s[pos : pos+matchBegin])
   107  		}
   108  		// replace the matched string with exactly new or imitate the old string
   109  		matchEnd := matchBegin + len(olds)
   110  		if imitateOld {
   111  			newr := imitateString(s[pos+matchBegin:pos+matchEnd], news)
   112  			res.WriteString(newr)
   113  		} else {
   114  			res.WriteString(news)
   115  		}
   116  		// truncate the processed part
   117  		pos += matchEnd
   118  		tmp = tmp[matchEnd:]
   119  	}
   120  	return res.String()
   121  }
   122  
   123  // replaceMultipleString replaces all occurrences of given strings with other strings, with options to make all the replacements.
   124  func replaceMultipleString(s string, rps []*replacePair, ignoreCase, imitateOld bool) string {
   125  	// ignore empty pairs
   126  	if rps == nil || len(rps) == 0 {
   127  		return s
   128  	}
   129  	// get full string to compare
   130  	var ss string
   131  	if ignoreCase {
   132  		ss = strings.ToLower(s)
   133  	} else {
   134  		ss = s
   135  	}
   136  	// compare and replace until no match found
   137  	type charRange struct {
   138  		Low   int
   139  		High  int
   140  		Match int
   141  	}
   142  	matchRanges := make([]*charRange, 0, len(rps))
   143  	leftRanges := make([]*charRange, 0, len(rps))
   144  	leftRanges = append(leftRanges, &charRange{0, len(ss), 0})
   145  	// for each replacement pair, record the matched and left char ranges
   146  	for pairIndex, pair := range rps {
   147  		// ignore pair with empty old string
   148  		var old string
   149  		if ignoreCase {
   150  			old = strings.ToLower(pair.Old)
   151  		} else {
   152  			old = pair.Old
   153  		}
   154  		if ystring.IsEmpty(old) {
   155  			continue
   156  		}
   157  		newLeftRanges := make([]*charRange, 0, len(leftRanges))
   158  		// for each left range
   159  		for _, lr := range leftRanges {
   160  			low, high := lr.Low, lr.High
   161  			for {
   162  				// find the first longest match for current range
   163  				matchBegin := strings.Index(ss[low:high], old)
   164  				if matchBegin < 0 {
   165  					newLeftRanges = append(newLeftRanges, &charRange{low, high, -1})
   166  					break
   167  				}
   168  				if matchBegin > 0 {
   169  					// skip if the match starts at the beginning of the range
   170  					newLeftRanges = append(newLeftRanges, &charRange{low, low + matchBegin, -1})
   171  				}
   172  				// record the range, find in the rest
   173  				matchEnd := matchBegin + len(old)
   174  				matchRanges = append(matchRanges, &charRange{low + matchBegin, low + matchEnd, pairIndex})
   175  				low += matchEnd
   176  			}
   177  		}
   178  		// updates the left range
   179  		leftRanges = newLeftRanges
   180  	}
   181  	// sort by the low index of each range
   182  	sort.SliceStable(matchRanges, func(i, j int) bool {
   183  		return matchRanges[i].Low < matchRanges[j].Low
   184  	})
   185  	// copy or replace
   186  	var (
   187  		res               strings.Builder
   188  		mrCur, lrCur      *charRange
   189  		mrIdx, lrIdx, pos = 0, 0, 0
   190  	)
   191  	for {
   192  		if len(matchRanges) > mrIdx && len(leftRanges) > lrIdx {
   193  			// first part
   194  			matchPos := matchRanges[mrIdx].Low
   195  			leftPos := leftRanges[lrIdx].Low
   196  			if pos == matchPos {
   197  				mrCur = matchRanges[mrIdx]
   198  				pos = mrCur.High
   199  				mrIdx++
   200  			} else if pos == leftPos {
   201  				lrCur = leftRanges[lrIdx]
   202  				pos = lrCur.High
   203  				lrIdx++
   204  			}
   205  		} else if len(leftRanges) > lrIdx {
   206  			// only left is left
   207  			lrCur = leftRanges[lrIdx]
   208  			pos = lrCur.High
   209  			lrIdx++
   210  		} else if len(matchRanges) > mrIdx {
   211  			// only match is left
   212  			mrCur = matchRanges[mrIdx]
   213  			pos = mrCur.High
   214  			mrIdx++
   215  		} else {
   216  			// all the range are handled
   217  			break
   218  		}
   219  
   220  		if lrCur != nil {
   221  			// copy the left part directly
   222  			res.WriteString(s[lrCur.Low:lrCur.High])
   223  			lrCur = nil
   224  		} else if mrCur != nil {
   225  			// replace the matched string with exactly new or imitate the old string
   226  			if imitateOld {
   227  				newr := imitateString(s[mrCur.Low:mrCur.High], rps[mrCur.Match].New)
   228  				res.WriteString(newr)
   229  			} else {
   230  				res.WriteString(rps[mrCur.Match].New)
   231  			}
   232  			mrCur = nil
   233  		}
   234  	}
   235  	// result
   236  	return res.String()
   237  }
   238  
   239  type stringCaseType uint8
   240  
   241  const (
   242  	stringCaseMisc stringCaseType = iota
   243  	stringCaseLower
   244  	stringCaseUpper
   245  	stringCaseTitle
   246  )
   247  
   248  // imitateString returns a dest string imitating src string.
   249  // if the source string is lowercase, lowercase the destination string;
   250  // if the source string is uppercase, uppercase the destination string;
   251  // if the source string is titlecase, titlecase the destination string;
   252  // otherwise, do nothing.
   253  func imitateString(old, new string) string {
   254  	switch getStringCaseType(old) {
   255  	case stringCaseLower:
   256  		return strings.ToLower(new)
   257  	case stringCaseUpper:
   258  		return strings.ToUpper(new)
   259  	case stringCaseTitle:
   260  		return strings.Title(new)
   261  	}
   262  	return new
   263  }
   264  
   265  func getStringCaseType(s string) stringCaseType {
   266  	const defaultFlag = uint8(0b111) // first 3 bits: 2-title, 1-upper, 0-lower
   267  	caseFlag := defaultFlag
   268  	wordStart := true
   269  	for _, r := range s {
   270  		if unicode.IsLower(r) {
   271  			if wordStart {
   272  				// start with lower case, it can't be title or upper case
   273  				caseFlag &= 0b001
   274  				wordStart = false
   275  			} else {
   276  				// has lower case after start, it can't be upper case
   277  				caseFlag &= 0b101
   278  			}
   279  		} else if unicode.IsUpper(r) {
   280  			if wordStart {
   281  				// start with upper case, it can't be lower
   282  				caseFlag &= 0b110
   283  				wordStart = false
   284  			} else {
   285  				// has upper case after start, it can't be title or lower case
   286  				caseFlag &= 0b010
   287  			}
   288  		} else if unicode.IsSpace(r) || r == '-' || r == '.' {
   289  			// space indicates the start of a new word
   290  			wordStart = true
   291  		}
   292  		if caseFlag == 0 {
   293  			// all the flags are 0, it's misc
   294  			return stringCaseMisc
   295  		}
   296  	}
   297  	// make the judgement
   298  	switch caseFlag {
   299  	case 0b001:
   300  		return stringCaseLower
   301  	case 0b110:
   302  		fallthrough
   303  	case 0b010:
   304  		return stringCaseUpper
   305  	case 0b100:
   306  		return stringCaseTitle
   307  	default:
   308  		return stringCaseMisc
   309  	}
   310  }