github.com/uber-go/tally/v4@v4.1.17/sanitize.go (about) 1 // Copyright (c) 2021 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package tally 22 23 import ( 24 "bytes" 25 "sync" 26 ) 27 28 var ( 29 // DefaultReplacementCharacter is the default character used for 30 // replacements. 31 DefaultReplacementCharacter = '_' 32 33 // AlphanumericRange is the range of alphanumeric characters. 34 AlphanumericRange = []SanitizeRange{ 35 {rune('a'), rune('z')}, 36 {rune('A'), rune('Z')}, 37 {rune('0'), rune('9')}} 38 39 // UnderscoreCharacters is just an underscore character. 40 UnderscoreCharacters = []rune{ 41 '_'} 42 43 // UnderscoreDashCharacters is a slice of underscore, and 44 // dash characters. 45 UnderscoreDashCharacters = []rune{ 46 '-', 47 '_'} 48 49 // UnderscoreDashDotCharacters is a slice of underscore, 50 // dash, and dot characters. 51 UnderscoreDashDotCharacters = []rune{ 52 '.', 53 '-', 54 '_'} 55 ) 56 57 // SanitizeFn returns a sanitized version of the input string. 58 type SanitizeFn func(string) string 59 60 // SanitizeRange is a range of characters (inclusive on both ends). 61 type SanitizeRange [2]rune 62 63 // ValidCharacters is a collection of valid characters. 64 type ValidCharacters struct { 65 Ranges []SanitizeRange 66 Characters []rune 67 } 68 69 // SanitizeOptions are the set of configurable options for sanitisation. 70 type SanitizeOptions struct { 71 NameCharacters ValidCharacters 72 KeyCharacters ValidCharacters 73 ValueCharacters ValidCharacters 74 ReplacementCharacter rune 75 } 76 77 // Sanitizer sanitizes the provided input based on the function executed. 78 type Sanitizer interface { 79 // Name sanitizes the provided `name` string. 80 Name(n string) string 81 82 // Key sanitizes the provided `key` string. 83 Key(k string) string 84 85 // Value sanitizes the provided `value` string. 86 Value(v string) string 87 } 88 89 // NewSanitizer returns a new sanitizer based on provided options. 90 func NewSanitizer(opts SanitizeOptions) Sanitizer { 91 return sanitizer{ 92 nameFn: opts.NameCharacters.sanitizeFn(opts.ReplacementCharacter), 93 keyFn: opts.KeyCharacters.sanitizeFn(opts.ReplacementCharacter), 94 valueFn: opts.ValueCharacters.sanitizeFn(opts.ReplacementCharacter), 95 } 96 } 97 98 // NoOpSanitizeFn returns the input un-touched. 99 func NoOpSanitizeFn(v string) string { return v } 100 101 // NewNoOpSanitizer returns a sanitizer which returns all inputs un-touched. 102 func NewNoOpSanitizer() Sanitizer { 103 return sanitizer{ 104 nameFn: NoOpSanitizeFn, 105 keyFn: NoOpSanitizeFn, 106 valueFn: NoOpSanitizeFn, 107 } 108 } 109 110 type sanitizer struct { 111 nameFn SanitizeFn 112 keyFn SanitizeFn 113 valueFn SanitizeFn 114 } 115 116 func (s sanitizer) Name(n string) string { 117 return s.nameFn(n) 118 } 119 120 func (s sanitizer) Key(k string) string { 121 return s.keyFn(k) 122 } 123 124 func (s sanitizer) Value(v string) string { 125 return s.valueFn(v) 126 } 127 128 var _sanitizeBuffers = sync.Pool{ 129 New: func() interface{} { 130 return new(bytes.Buffer) 131 }, 132 } 133 134 func getSanitizeBuffer() *bytes.Buffer { 135 return _sanitizeBuffers.Get().(*bytes.Buffer) 136 } 137 138 func putSanitizeBuffer(b *bytes.Buffer) { 139 b.Reset() 140 _sanitizeBuffers.Put(b) 141 } 142 143 func (c *ValidCharacters) sanitizeFn(repChar rune) SanitizeFn { 144 return func(value string) string { 145 var buf *bytes.Buffer 146 for idx, ch := range value { 147 // first check if the provided character is valid 148 validCurr := false 149 for i := 0; !validCurr && i < len(c.Ranges); i++ { 150 if ch >= c.Ranges[i][0] && ch <= c.Ranges[i][1] { 151 validCurr = true 152 break 153 } 154 } 155 for i := 0; !validCurr && i < len(c.Characters); i++ { 156 if c.Characters[i] == ch { 157 validCurr = true 158 break 159 } 160 } 161 162 // if it's valid, we can optimise allocations by avoiding copying 163 if validCurr { 164 if buf == nil { 165 continue // haven't deviated from string, still no need to init buffer 166 } 167 buf.WriteRune(ch) // we've deviated from string, write to buffer 168 continue 169 } 170 171 // ie the character is invalid, and the buffer has not been initialised 172 // so we initialise buffer and backfill 173 if buf == nil { 174 buf = getSanitizeBuffer() 175 if idx > 0 { 176 buf.WriteString(value[:idx]) 177 } 178 } 179 180 // write the replacement character 181 buf.WriteRune(repChar) 182 } 183 184 // return input un-touched if the buffer has been not initialised 185 if buf == nil { 186 return value 187 } 188 189 // otherwise, return the newly constructed buffer 190 result := buf.String() 191 putSanitizeBuffer(buf) 192 return result 193 } 194 }