github.com/mailgun/holster/v4@v4.20.0/anonymize/anonymize.go (about)

     1  package anonymize
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"sort"
     7  	"strings"
     8  )
     9  
    10  const anonym = "xxx"
    11  
    12  var tokenSep = regexp.MustCompile(`\s|[,;]`)
    13  var userSep = regexp.MustCompile("[._-]")
    14  var adjacentSecrets = regexp.MustCompile(fmt.Sprintf(`%s(\s%s)+`, anonym, anonym))
    15  
    16  func replaceNames(src string, names []string) string {
    17  	words := strings.Split(src, " ")
    18  	for i, word := range words {
    19  		for _, name := range names {
    20  			lowerCasedWord := strings.ToLower(word)
    21  			lowerCasedTrimmedWord := strings.Trim(lowerCasedWord, ":,!?.;")
    22  			lowerCasedName := strings.ToLower(name)
    23  			if lowerCasedTrimmedWord == lowerCasedName {
    24  				words[i] = strings.ReplaceAll(lowerCasedWord, lowerCasedName, anonym)
    25  				break
    26  			}
    27  		}
    28  	}
    29  	return strings.Join(words, " ")
    30  }
    31  
    32  // Anonymize replace secret information with xxx.
    33  func Anonymize(src string, names []string, secrets ...string) (string, error) {
    34  	src = replaceNames(src, names)
    35  	tokens := tokenize(secrets...)
    36  	if len(tokens) == 0 {
    37  		return src, nil
    38  	}
    39  	secret, err := or(tokens)
    40  	if err != nil {
    41  		return src, err
    42  	}
    43  	src = secret.ReplaceAllString(src, anonym)
    44  	src = adjacentSecrets.ReplaceAllString(src, anonym)
    45  	return src, nil
    46  }
    47  
    48  func tokenize(text ...string) (tokens []string) {
    49  	tokenSet := map[string]interface{}{}
    50  	for _, s := range text {
    51  		for _, token := range tokenSep.Split(strings.ToLower(s), -1) {
    52  			token = strings.Trim(token, "<>\" \n\t'")
    53  			if strings.Contains(token, "@") {
    54  				parts := strings.SplitN(token, "@", 2)
    55  				tokenSet[parts[1]] = true
    56  				for _, userPart := range userSep.Split(parts[0], 5) {
    57  					if len(userPart) > 2 {
    58  						tokenSet[userPart] = true
    59  					}
    60  				}
    61  			} else if len(token) > 1 {
    62  				tokenSet[token] = true
    63  			}
    64  		}
    65  	}
    66  	for token := range tokenSet {
    67  		tokens = append(tokens, regexp.QuoteMeta(token))
    68  	}
    69  	sort.SliceStable(tokens, func(i, j int) bool {
    70  		return len(tokens[i]) > len(tokens[j])
    71  	})
    72  	return tokens
    73  }
    74  
    75  func or(tokens []string) (*regexp.Regexp, error) {
    76  	return regexp.Compile(fmt.Sprintf("(?i)%s", strings.Join(tokens, "|")))
    77  }