github.com/uber-go/tally/v4@v4.1.17/sanitize.go (about)

     1  // Copyright (c) 2021 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package tally
    22  
    23  import (
    24  	"bytes"
    25  	"sync"
    26  )
    27  
    28  var (
    29  	// DefaultReplacementCharacter is the default character used for
    30  	// replacements.
    31  	DefaultReplacementCharacter = '_'
    32  
    33  	// AlphanumericRange is the range of alphanumeric characters.
    34  	AlphanumericRange = []SanitizeRange{
    35  		{rune('a'), rune('z')},
    36  		{rune('A'), rune('Z')},
    37  		{rune('0'), rune('9')}}
    38  
    39  	// UnderscoreCharacters is just an underscore character.
    40  	UnderscoreCharacters = []rune{
    41  		'_'}
    42  
    43  	// UnderscoreDashCharacters is a slice of underscore, and
    44  	// dash characters.
    45  	UnderscoreDashCharacters = []rune{
    46  		'-',
    47  		'_'}
    48  
    49  	// UnderscoreDashDotCharacters is a slice of underscore,
    50  	// dash, and dot characters.
    51  	UnderscoreDashDotCharacters = []rune{
    52  		'.',
    53  		'-',
    54  		'_'}
    55  )
    56  
    57  // SanitizeFn returns a sanitized version of the input string.
    58  type SanitizeFn func(string) string
    59  
    60  // SanitizeRange is a range of characters (inclusive on both ends).
    61  type SanitizeRange [2]rune
    62  
    63  // ValidCharacters is a collection of valid characters.
    64  type ValidCharacters struct {
    65  	Ranges     []SanitizeRange
    66  	Characters []rune
    67  }
    68  
    69  // SanitizeOptions are the set of configurable options for sanitisation.
    70  type SanitizeOptions struct {
    71  	NameCharacters       ValidCharacters
    72  	KeyCharacters        ValidCharacters
    73  	ValueCharacters      ValidCharacters
    74  	ReplacementCharacter rune
    75  }
    76  
    77  // Sanitizer sanitizes the provided input based on the function executed.
    78  type Sanitizer interface {
    79  	// Name sanitizes the provided `name` string.
    80  	Name(n string) string
    81  
    82  	// Key sanitizes the provided `key` string.
    83  	Key(k string) string
    84  
    85  	// Value sanitizes the provided `value` string.
    86  	Value(v string) string
    87  }
    88  
    89  // NewSanitizer returns a new sanitizer based on provided options.
    90  func NewSanitizer(opts SanitizeOptions) Sanitizer {
    91  	return sanitizer{
    92  		nameFn:  opts.NameCharacters.sanitizeFn(opts.ReplacementCharacter),
    93  		keyFn:   opts.KeyCharacters.sanitizeFn(opts.ReplacementCharacter),
    94  		valueFn: opts.ValueCharacters.sanitizeFn(opts.ReplacementCharacter),
    95  	}
    96  }
    97  
    98  // NoOpSanitizeFn returns the input un-touched.
    99  func NoOpSanitizeFn(v string) string { return v }
   100  
   101  // NewNoOpSanitizer returns a sanitizer which returns all inputs un-touched.
   102  func NewNoOpSanitizer() Sanitizer {
   103  	return sanitizer{
   104  		nameFn:  NoOpSanitizeFn,
   105  		keyFn:   NoOpSanitizeFn,
   106  		valueFn: NoOpSanitizeFn,
   107  	}
   108  }
   109  
   110  type sanitizer struct {
   111  	nameFn  SanitizeFn
   112  	keyFn   SanitizeFn
   113  	valueFn SanitizeFn
   114  }
   115  
   116  func (s sanitizer) Name(n string) string {
   117  	return s.nameFn(n)
   118  }
   119  
   120  func (s sanitizer) Key(k string) string {
   121  	return s.keyFn(k)
   122  }
   123  
   124  func (s sanitizer) Value(v string) string {
   125  	return s.valueFn(v)
   126  }
   127  
   128  var _sanitizeBuffers = sync.Pool{
   129  	New: func() interface{} {
   130  		return new(bytes.Buffer)
   131  	},
   132  }
   133  
   134  func getSanitizeBuffer() *bytes.Buffer {
   135  	return _sanitizeBuffers.Get().(*bytes.Buffer)
   136  }
   137  
   138  func putSanitizeBuffer(b *bytes.Buffer) {
   139  	b.Reset()
   140  	_sanitizeBuffers.Put(b)
   141  }
   142  
   143  func (c *ValidCharacters) sanitizeFn(repChar rune) SanitizeFn {
   144  	return func(value string) string {
   145  		var buf *bytes.Buffer
   146  		for idx, ch := range value {
   147  			// first check if the provided character is valid
   148  			validCurr := false
   149  			for i := 0; !validCurr && i < len(c.Ranges); i++ {
   150  				if ch >= c.Ranges[i][0] && ch <= c.Ranges[i][1] {
   151  					validCurr = true
   152  					break
   153  				}
   154  			}
   155  			for i := 0; !validCurr && i < len(c.Characters); i++ {
   156  				if c.Characters[i] == ch {
   157  					validCurr = true
   158  					break
   159  				}
   160  			}
   161  
   162  			// if it's valid, we can optimise allocations by avoiding copying
   163  			if validCurr {
   164  				if buf == nil {
   165  					continue // haven't deviated from string, still no need to init buffer
   166  				}
   167  				buf.WriteRune(ch) // we've deviated from string, write to buffer
   168  				continue
   169  			}
   170  
   171  			// ie the character is invalid, and the buffer has not been initialised
   172  			// so we initialise buffer and backfill
   173  			if buf == nil {
   174  				buf = getSanitizeBuffer()
   175  				if idx > 0 {
   176  					buf.WriteString(value[:idx])
   177  				}
   178  			}
   179  
   180  			// write the replacement character
   181  			buf.WriteRune(repChar)
   182  		}
   183  
   184  		// return input un-touched if the buffer has been not initialised
   185  		if buf == nil {
   186  			return value
   187  		}
   188  
   189  		// otherwise, return the newly constructed buffer
   190  		result := buf.String()
   191  		putSanitizeBuffer(buf)
   192  		return result
   193  	}
   194  }