github.com/thiagoyeds/go-cloud@v0.26.0/docstore/internal/fields/fold.go (about)

     1  // Copyright 2019 The Go Cloud Development Kit Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fields
    16  
    17  // This file was copied from https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/fold.go.
    18  // Only the license and package were changed.
    19  
    20  import (
    21  	"bytes"
    22  	"unicode/utf8"
    23  )
    24  
    25  const (
    26  	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
    27  	kelvin       = '\u212a'
    28  	smallLongEss = '\u017f'
    29  )
    30  
    31  // foldFunc returns one of four different case folding equivalence
    32  // functions, from most general (and slow) to fastest:
    33  //
    34  // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
    35  // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
    36  // 3) asciiEqualFold, no special, but includes non-letters (including _)
    37  // 4) simpleLetterEqualFold, no specials, no non-letters.
    38  //
    39  // The letters S and K are special because they map to 3 runes, not just 2:
    40  //  * S maps to s and to U+017F 'ſ' Latin small letter long s
    41  //  * k maps to K and to U+212A 'K' Kelvin sign
    42  // See https://play.golang.org/p/tTxjOc0OGo
    43  //
    44  // The returned function is specialized for matching against s and
    45  // should only be given s. It's not curried for performance reasons.
    46  func foldFunc(s []byte) func(s, t []byte) bool {
    47  	nonLetter := false
    48  	special := false // special letter
    49  	for _, b := range s {
    50  		if b >= utf8.RuneSelf {
    51  			return bytes.EqualFold
    52  		}
    53  		upper := b & caseMask
    54  		if upper < 'A' || upper > 'Z' {
    55  			nonLetter = true
    56  		} else if upper == 'K' || upper == 'S' {
    57  			// See above for why these letters are special.
    58  			special = true
    59  		}
    60  	}
    61  	if special {
    62  		return equalFoldRight
    63  	}
    64  	if nonLetter {
    65  		return asciiEqualFold
    66  	}
    67  	return simpleLetterEqualFold
    68  }
    69  
    70  // equalFoldRight is a specialization of bytes.EqualFold when s is
    71  // known to be all ASCII (including punctuation), but contains an 's',
    72  // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
    73  // See comments on foldFunc.
    74  func equalFoldRight(s, t []byte) bool {
    75  	for _, sb := range s {
    76  		if len(t) == 0 {
    77  			return false
    78  		}
    79  		tb := t[0]
    80  		if tb < utf8.RuneSelf {
    81  			if sb != tb {
    82  				sbUpper := sb & caseMask
    83  				if 'A' <= sbUpper && sbUpper <= 'Z' {
    84  					if sbUpper != tb&caseMask {
    85  						return false
    86  					}
    87  				} else {
    88  					return false
    89  				}
    90  			}
    91  			t = t[1:]
    92  			continue
    93  		}
    94  		// sb is ASCII and t is not. t must be either kelvin
    95  		// sign or long s; sb must be s, S, k, or K.
    96  		tr, size := utf8.DecodeRune(t)
    97  		switch sb {
    98  		case 's', 'S':
    99  			if tr != smallLongEss {
   100  				return false
   101  			}
   102  		case 'k', 'K':
   103  			if tr != kelvin {
   104  				return false
   105  			}
   106  		default:
   107  			return false
   108  		}
   109  		t = t[size:]
   110  
   111  	}
   112  	if len(t) > 0 {
   113  		return false
   114  	}
   115  	return true
   116  }
   117  
   118  // asciiEqualFold is a specialization of bytes.EqualFold for use when
   119  // s is all ASCII (but may contain non-letters) and contains no
   120  // special-folding letters.
   121  // See comments on foldFunc.
   122  func asciiEqualFold(s, t []byte) bool {
   123  	if len(s) != len(t) {
   124  		return false
   125  	}
   126  	for i, sb := range s {
   127  		tb := t[i]
   128  		if sb == tb {
   129  			continue
   130  		}
   131  		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
   132  			if sb&caseMask != tb&caseMask {
   133  				return false
   134  			}
   135  		} else {
   136  			return false
   137  		}
   138  	}
   139  	return true
   140  }
   141  
   142  // simpleLetterEqualFold is a specialization of bytes.EqualFold for
   143  // use when s is all ASCII letters (no underscores, etc) and also
   144  // doesn't contain 'k', 'K', 's', or 'S'.
   145  // See comments on foldFunc.
   146  func simpleLetterEqualFold(s, t []byte) bool {
   147  	if len(s) != len(t) {
   148  		return false
   149  	}
   150  	for i, b := range s {
   151  		if b&caseMask != t[i]&caseMask {
   152  			return false
   153  		}
   154  	}
   155  	return true
   156  }