github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/runes/runes.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package runes provide transforms for UTF-8 encoded text. 6 package runes // import "golang.org/x/text/runes" 7 8 import ( 9 "unicode" 10 "unicode/utf8" 11 12 "golang.org/x/text/transform" 13 ) 14 15 // A Set is a collection of runes. 16 type Set interface { 17 // Contains returns true if r is contained in the set. 18 Contains(r rune) bool 19 } 20 21 type setFunc func(rune) bool 22 23 func (s setFunc) Contains(r rune) bool { 24 return s(r) 25 } 26 27 // Note: using funcs here instead of wrapping types result in cleaner 28 // documentation and a smaller API. 29 30 // In creates a Set with a Contains method that returns true for all runes in 31 // the given RangeTable. 32 func In(rt *unicode.RangeTable) Set { 33 return setFunc(func(r rune) bool { return unicode.Is(rt, r) }) 34 } 35 36 // In creates a Set with a Contains method that returns true for all runes not 37 // in the given RangeTable. 38 func NotIn(rt *unicode.RangeTable) Set { 39 return setFunc(func(r rune) bool { return !unicode.Is(rt, r) }) 40 } 41 42 // Predicate creates a Set with a Contains method that returns f(r). 43 func Predicate(f func(rune) bool) Set { 44 return setFunc(f) 45 } 46 47 // Transformer implements the transform.Transformer interface. 48 type Transformer struct { 49 transform.Transformer 50 } 51 52 // Bytes returns a new byte slice with the result of converting b using t. It 53 // calls Reset on t. It returns nil if any error was found. This can only happen 54 // if an error-producing Transformer is passed to If. 55 func (t Transformer) Bytes(b []byte) []byte { 56 b, _, err := transform.Bytes(t, b) 57 if err != nil { 58 return nil 59 } 60 return b 61 } 62 63 // String returns a string with the result of converting s using t. It calls 64 // Reset on t. It returns the empty string if any error was found. This can only 65 // happen if an error-producing Transformer is passed to If. 66 func (t Transformer) String(s string) string { 67 s, _, err := transform.String(t, s) 68 if err != nil { 69 return "" 70 } 71 return s 72 } 73 74 // TODO: 75 // - Copy: copying strings and bytes in whole-rune units. 76 // - Validation (maybe) 77 // - Well-formed-ness (maybe) 78 79 const runeErrorString = string(utf8.RuneError) 80 81 // Remove returns a Transformer that removes runes r for which s.Contains(r). 82 // Illegal input bytes are replaced by RuneError before being passed to f. 83 func Remove(s Set) Transformer { 84 if f, ok := s.(setFunc); ok { 85 // This little trick cuts the running time of BenchmarkRemove for sets 86 // created by Predicate roughly in half. 87 // TODO: special-case RangeTables as well. 88 return Transformer{remove(f)} 89 } 90 return Transformer{remove(s.Contains)} 91 } 92 93 // TODO: remove transform.RemoveFunc. 94 95 type remove func(r rune) bool 96 97 func (remove) Reset() {} 98 99 // Transform implements transform.Transformer. 100 func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 101 for r, size := rune(0), 0; nSrc < len(src); { 102 if r = rune(src[nSrc]); r < utf8.RuneSelf { 103 size = 1 104 } else { 105 r, size = utf8.DecodeRune(src[nSrc:]) 106 107 if size == 1 { 108 // Invalid rune. 109 if !atEOF && !utf8.FullRune(src[nSrc:]) { 110 err = transform.ErrShortSrc 111 break 112 } 113 // We replace illegal bytes with RuneError. Not doing so might 114 // otherwise turn a sequence of invalid UTF-8 into valid UTF-8. 115 // The resulting byte sequence may subsequently contain runes 116 // for which t(r) is true that were passed unnoticed. 117 if !t(utf8.RuneError) { 118 if nDst+3 > len(dst) { 119 err = transform.ErrShortDst 120 break 121 } 122 dst[nDst+0] = runeErrorString[0] 123 dst[nDst+1] = runeErrorString[1] 124 dst[nDst+2] = runeErrorString[2] 125 nDst += 3 126 } 127 nSrc++ 128 continue 129 } 130 } 131 132 if t(r) { 133 nSrc += size 134 continue 135 } 136 if nDst+size > len(dst) { 137 err = transform.ErrShortDst 138 break 139 } 140 for i := 0; i < size; i++ { 141 dst[nDst] = src[nSrc] 142 nDst++ 143 nSrc++ 144 } 145 } 146 return 147 } 148 149 // Map returns a Transformer that maps the runes in the input using the given 150 // mapping. Illegal bytes in the input are converted to utf8.RuneError before 151 // being passed to the mapping func. 152 func Map(mapping func(rune) rune) Transformer { 153 return Transformer{mapper(mapping)} 154 } 155 156 type mapper func(rune) rune 157 158 func (mapper) Reset() {} 159 160 // Transform implements transform.Transformer. 161 func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 162 var replacement rune 163 var b [utf8.UTFMax]byte 164 165 for r, size := rune(0), 0; nSrc < len(src); { 166 if r = rune(src[nSrc]); r < utf8.RuneSelf { 167 if replacement = t(r); replacement < utf8.RuneSelf { 168 if nDst == len(dst) { 169 err = transform.ErrShortDst 170 break 171 } 172 dst[nDst] = byte(replacement) 173 nDst++ 174 nSrc++ 175 continue 176 } 177 size = 1 178 } else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 { 179 // Invalid rune. 180 if !atEOF && !utf8.FullRune(src[nSrc:]) { 181 err = transform.ErrShortSrc 182 break 183 } 184 185 if replacement = t(utf8.RuneError); replacement == utf8.RuneError { 186 if nDst+3 > len(dst) { 187 err = transform.ErrShortDst 188 break 189 } 190 dst[nDst+0] = runeErrorString[0] 191 dst[nDst+1] = runeErrorString[1] 192 dst[nDst+2] = runeErrorString[2] 193 nDst += 3 194 nSrc++ 195 continue 196 } 197 } else if replacement = t(r); replacement == r { 198 if nDst+size > len(dst) { 199 err = transform.ErrShortDst 200 break 201 } 202 for i := 0; i < size; i++ { 203 dst[nDst] = src[nSrc] 204 nDst++ 205 nSrc++ 206 } 207 continue 208 } 209 210 n := utf8.EncodeRune(b[:], replacement) 211 212 if nDst+n > len(dst) { 213 err = transform.ErrShortDst 214 break 215 } 216 for i := 0; i < n; i++ { 217 dst[nDst] = b[i] 218 nDst++ 219 } 220 nSrc += size 221 } 222 return 223 } 224 225 // ReplaceIllFormed returns a transformer that replaces all input bytes that are 226 // not part of a well-formed UTF-8 code sequence with utf8.RuneError. 227 func ReplaceIllFormed() Transformer { 228 return Transformer{&replaceIllFormed{}} 229 } 230 231 type replaceIllFormed struct{ transform.NopResetter } 232 233 func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 234 for nSrc < len(src) { 235 r, size := utf8.DecodeRune(src[nSrc:]) 236 237 // Look for an ASCII rune. 238 if r < utf8.RuneSelf { 239 if nDst == len(dst) { 240 err = transform.ErrShortDst 241 break 242 } 243 dst[nDst] = byte(r) 244 nDst++ 245 nSrc++ 246 continue 247 } 248 249 // Look for a valid non-ASCII rune. 250 if r != utf8.RuneError || size != 1 { 251 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { 252 err = transform.ErrShortDst 253 break 254 } 255 nDst += size 256 nSrc += size 257 continue 258 } 259 260 // Look for short source data. 261 if !atEOF && !utf8.FullRune(src[nSrc:]) { 262 err = transform.ErrShortSrc 263 break 264 } 265 266 // We have an invalid rune. 267 if nDst+3 > len(dst) { 268 err = transform.ErrShortDst 269 break 270 } 271 dst[nDst+0] = runeErrorString[0] 272 dst[nDst+1] = runeErrorString[1] 273 dst[nDst+2] = runeErrorString[2] 274 nDst += 3 275 nSrc++ 276 } 277 return nDst, nSrc, err 278 }