github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/encoding/json/fold.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"unicode/utf8"
    10  )
    11  
    12  const (
    13  	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
    14  	kelvin       = '\u212a'
    15  	smallLongEss = '\u017f'
    16  )
    17  
    18  // foldFunc returns one of four different case folding equivalence
    19  // functions, from most general (and slow) to fastest:
    20  //
    21  // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
    22  // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
    23  // 3) asciiEqualFold, no special, but includes non-letters (including _)
    24  // 4) simpleLetterEqualFold, no specials, no non-letters.
    25  //
    26  // The letters S and K are special because they map to 3 runes, not just 2:
    27  //  * S maps to s and to U+017F 'ſ' Latin small letter long s
    28  //  * k maps to K and to U+212A 'K' Kelvin sign
    29  // See http://play.golang.org/p/tTxjOc0OGo
    30  //
    31  // The returned function is specialized for matching against s and
    32  // should only be given s. It's not curried for performance reasons.
    33  func foldFunc(s []byte) func(s, t []byte) bool {
    34  	nonLetter := false
    35  	special := false // special letter
    36  	for _, b := range s {
    37  		if b >= utf8.RuneSelf {
    38  			return bytes.EqualFold
    39  		}
    40  		upper := b & caseMask
    41  		if upper < 'A' || upper > 'Z' {
    42  			nonLetter = true
    43  		} else if upper == 'K' || upper == 'S' {
    44  			// See above for why these letters are special.
    45  			special = true
    46  		}
    47  	}
    48  	if special {
    49  		return equalFoldRight
    50  	}
    51  	if nonLetter {
    52  		return asciiEqualFold
    53  	}
    54  	return simpleLetterEqualFold
    55  }
    56  
    57  // equalFoldRight is a specialization of bytes.EqualFold when s is
    58  // known to be all ASCII (including punctuation), but contains an 's',
    59  // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
    60  // See comments on foldFunc.
    61  func equalFoldRight(s, t []byte) bool {
    62  	for _, sb := range s {
    63  		if len(t) == 0 {
    64  			return false
    65  		}
    66  		tb := t[0]
    67  		if tb < utf8.RuneSelf {
    68  			if sb != tb {
    69  				sbUpper := sb & caseMask
    70  				if 'A' <= sbUpper && sbUpper <= 'Z' {
    71  					if sbUpper != tb&caseMask {
    72  						return false
    73  					}
    74  				} else {
    75  					return false
    76  				}
    77  			}
    78  			t = t[1:]
    79  			continue
    80  		}
    81  		// sb is ASCII and t is not. t must be either kelvin
    82  		// sign or long s; sb must be s, S, k, or K.
    83  		tr, size := utf8.DecodeRune(t)
    84  		switch sb {
    85  		case 's', 'S':
    86  			if tr != smallLongEss {
    87  				return false
    88  			}
    89  		case 'k', 'K':
    90  			if tr != kelvin {
    91  				return false
    92  			}
    93  		default:
    94  			return false
    95  		}
    96  		t = t[size:]
    97  
    98  	}
    99  	if len(t) > 0 {
   100  		return false
   101  	}
   102  	return true
   103  }
   104  
   105  // asciiEqualFold is a specialization of bytes.EqualFold for use when
   106  // s is all ASCII (but may contain non-letters) and contains no
   107  // special-folding letters.
   108  // See comments on foldFunc.
   109  func asciiEqualFold(s, t []byte) bool {
   110  	if len(s) != len(t) {
   111  		return false
   112  	}
   113  	for i, sb := range s {
   114  		tb := t[i]
   115  		if sb == tb {
   116  			continue
   117  		}
   118  		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
   119  			if sb&caseMask != tb&caseMask {
   120  				return false
   121  			}
   122  		} else {
   123  			return false
   124  		}
   125  	}
   126  	return true
   127  }
   128  
   129  // simpleLetterEqualFold is a specialization of bytes.EqualFold for
   130  // use when s is all ASCII letters (no underscores, etc) and also
   131  // doesn't contain 'k', 'K', 's', or 'S'.
   132  // See comments on foldFunc.
   133  func simpleLetterEqualFold(s, t []byte) bool {
   134  	if len(s) != len(t) {
   135  		return false
   136  	}
   137  	for i, b := range s {
   138  		if b&caseMask != t[i]&caseMask {
   139  			return false
   140  		}
   141  	}
   142  	return true
   143  }