github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/runes/runes.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package runes provide transforms for UTF-8 encoded text.
     6  package runes // import "golang.org/x/text/runes"
     7  
     8  import (
     9  	"unicode"
    10  	"unicode/utf8"
    11  
    12  	"golang.org/x/text/transform"
    13  )
    14  
    15  // A Set is a collection of runes.
    16  type Set interface {
    17  	// Contains returns true if r is contained in the set.
    18  	Contains(r rune) bool
    19  }
    20  
    21  type setFunc func(rune) bool
    22  
    23  func (s setFunc) Contains(r rune) bool {
    24  	return s(r)
    25  }
    26  
    27  // Note: using funcs here instead of wrapping types result in cleaner
    28  // documentation and a smaller API.
    29  
    30  // In creates a Set with a Contains method that returns true for all runes in
    31  // the given RangeTable.
    32  func In(rt *unicode.RangeTable) Set {
    33  	return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
    34  }
    35  
    36  // In creates a Set with a Contains method that returns true for all runes not
    37  // in the given RangeTable.
    38  func NotIn(rt *unicode.RangeTable) Set {
    39  	return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
    40  }
    41  
    42  // Predicate creates a Set with a Contains method that returns f(r).
    43  func Predicate(f func(rune) bool) Set {
    44  	return setFunc(f)
    45  }
    46  
    47  // Transformer implements the transform.Transformer interface.
    48  type Transformer struct {
    49  	transform.Transformer
    50  }
    51  
    52  // Bytes returns a new byte slice with the result of converting b using t.  It
    53  // calls Reset on t. It returns nil if any error was found. This can only happen
    54  // if an error-producing Transformer is passed to If.
    55  func (t Transformer) Bytes(b []byte) []byte {
    56  	b, _, err := transform.Bytes(t, b)
    57  	if err != nil {
    58  		return nil
    59  	}
    60  	return b
    61  }
    62  
    63  // String returns a string with the result of converting s using t. It calls
    64  // Reset on t. It returns the empty string if any error was found. This can only
    65  // happen if an error-producing Transformer is passed to If.
    66  func (t Transformer) String(s string) string {
    67  	s, _, err := transform.String(t, s)
    68  	if err != nil {
    69  		return ""
    70  	}
    71  	return s
    72  }
    73  
    74  // TODO:
    75  // - Copy: copying strings and bytes in whole-rune units.
    76  // - Validation (maybe)
    77  // - Well-formed-ness (maybe)
    78  
    79  const runeErrorString = string(utf8.RuneError)
    80  
    81  // Remove returns a Transformer that removes runes r for which s.Contains(r).
    82  // Illegal input bytes are replaced by RuneError before being passed to f.
    83  func Remove(s Set) Transformer {
    84  	if f, ok := s.(setFunc); ok {
    85  		// This little trick cuts the running time of BenchmarkRemove for sets
    86  		// created by Predicate roughly in half.
    87  		// TODO: special-case RangeTables as well.
    88  		return Transformer{remove(f)}
    89  	}
    90  	return Transformer{remove(s.Contains)}
    91  }
    92  
    93  // TODO: remove transform.RemoveFunc.
    94  
    95  type remove func(r rune) bool
    96  
    97  func (remove) Reset() {}
    98  
    99  // Transform implements transform.Transformer.
   100  func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   101  	for r, size := rune(0), 0; nSrc < len(src); {
   102  		if r = rune(src[nSrc]); r < utf8.RuneSelf {
   103  			size = 1
   104  		} else {
   105  			r, size = utf8.DecodeRune(src[nSrc:])
   106  
   107  			if size == 1 {
   108  				// Invalid rune.
   109  				if !atEOF && !utf8.FullRune(src[nSrc:]) {
   110  					err = transform.ErrShortSrc
   111  					break
   112  				}
   113  				// We replace illegal bytes with RuneError. Not doing so might
   114  				// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
   115  				// The resulting byte sequence may subsequently contain runes
   116  				// for which t(r) is true that were passed unnoticed.
   117  				if !t(utf8.RuneError) {
   118  					if nDst+3 > len(dst) {
   119  						err = transform.ErrShortDst
   120  						break
   121  					}
   122  					dst[nDst+0] = runeErrorString[0]
   123  					dst[nDst+1] = runeErrorString[1]
   124  					dst[nDst+2] = runeErrorString[2]
   125  					nDst += 3
   126  				}
   127  				nSrc++
   128  				continue
   129  			}
   130  		}
   131  
   132  		if t(r) {
   133  			nSrc += size
   134  			continue
   135  		}
   136  		if nDst+size > len(dst) {
   137  			err = transform.ErrShortDst
   138  			break
   139  		}
   140  		for i := 0; i < size; i++ {
   141  			dst[nDst] = src[nSrc]
   142  			nDst++
   143  			nSrc++
   144  		}
   145  	}
   146  	return
   147  }
   148  
   149  // Map returns a Transformer that maps the runes in the input using the given
   150  // mapping. Illegal bytes in the input are converted to utf8.RuneError before
   151  // being passed to the mapping func.
   152  func Map(mapping func(rune) rune) Transformer {
   153  	return Transformer{mapper(mapping)}
   154  }
   155  
   156  type mapper func(rune) rune
   157  
   158  func (mapper) Reset() {}
   159  
   160  // Transform implements transform.Transformer.
   161  func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   162  	var replacement rune
   163  	var b [utf8.UTFMax]byte
   164  
   165  	for r, size := rune(0), 0; nSrc < len(src); {
   166  		if r = rune(src[nSrc]); r < utf8.RuneSelf {
   167  			if replacement = t(r); replacement < utf8.RuneSelf {
   168  				if nDst == len(dst) {
   169  					err = transform.ErrShortDst
   170  					break
   171  				}
   172  				dst[nDst] = byte(replacement)
   173  				nDst++
   174  				nSrc++
   175  				continue
   176  			}
   177  			size = 1
   178  		} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
   179  			// Invalid rune.
   180  			if !atEOF && !utf8.FullRune(src[nSrc:]) {
   181  				err = transform.ErrShortSrc
   182  				break
   183  			}
   184  
   185  			if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
   186  				if nDst+3 > len(dst) {
   187  					err = transform.ErrShortDst
   188  					break
   189  				}
   190  				dst[nDst+0] = runeErrorString[0]
   191  				dst[nDst+1] = runeErrorString[1]
   192  				dst[nDst+2] = runeErrorString[2]
   193  				nDst += 3
   194  				nSrc++
   195  				continue
   196  			}
   197  		} else if replacement = t(r); replacement == r {
   198  			if nDst+size > len(dst) {
   199  				err = transform.ErrShortDst
   200  				break
   201  			}
   202  			for i := 0; i < size; i++ {
   203  				dst[nDst] = src[nSrc]
   204  				nDst++
   205  				nSrc++
   206  			}
   207  			continue
   208  		}
   209  
   210  		n := utf8.EncodeRune(b[:], replacement)
   211  
   212  		if nDst+n > len(dst) {
   213  			err = transform.ErrShortDst
   214  			break
   215  		}
   216  		for i := 0; i < n; i++ {
   217  			dst[nDst] = b[i]
   218  			nDst++
   219  		}
   220  		nSrc += size
   221  	}
   222  	return
   223  }
   224  
   225  // ReplaceIllFormed returns a transformer that replaces all input bytes that are
   226  // not part of a well-formed UTF-8 code sequence with utf8.RuneError.
   227  func ReplaceIllFormed() Transformer {
   228  	return Transformer{&replaceIllFormed{}}
   229  }
   230  
   231  type replaceIllFormed struct{ transform.NopResetter }
   232  
   233  func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   234  	for nSrc < len(src) {
   235  		r, size := utf8.DecodeRune(src[nSrc:])
   236  
   237  		// Look for an ASCII rune.
   238  		if r < utf8.RuneSelf {
   239  			if nDst == len(dst) {
   240  				err = transform.ErrShortDst
   241  				break
   242  			}
   243  			dst[nDst] = byte(r)
   244  			nDst++
   245  			nSrc++
   246  			continue
   247  		}
   248  
   249  		// Look for a valid non-ASCII rune.
   250  		if r != utf8.RuneError || size != 1 {
   251  			if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
   252  				err = transform.ErrShortDst
   253  				break
   254  			}
   255  			nDst += size
   256  			nSrc += size
   257  			continue
   258  		}
   259  
   260  		// Look for short source data.
   261  		if !atEOF && !utf8.FullRune(src[nSrc:]) {
   262  			err = transform.ErrShortSrc
   263  			break
   264  		}
   265  
   266  		// We have an invalid rune.
   267  		if nDst+3 > len(dst) {
   268  			err = transform.ErrShortDst
   269  			break
   270  		}
   271  		dst[nDst+0] = runeErrorString[0]
   272  		dst[nDst+1] = runeErrorString[1]
   273  		dst[nDst+2] = runeErrorString[2]
   274  		nDst += 3
   275  		nSrc++
   276  	}
   277  	return nDst, nSrc, err
   278  }