github.com/primecitizens/pcz/std@v0.2.1/text/unicode/graphic.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2011 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  package unicode
     9  
    10  // Bit masks for each code point under U+0100, for fast lookup.
    11  const (
    12  	pC     = 1 << iota // a control character.
    13  	pP                 // a punctuation character.
    14  	pN                 // a numeral.
    15  	pS                 // a symbolic character.
    16  	pZ                 // a spacing character.
    17  	pLu                // an upper-case letter.
    18  	pLl                // a lower-case letter.
    19  	pp                 // a printable character according to Go's definition.
    20  	pg     = pp | pZ   // a graphical character according to the Unicode definition.
    21  	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
    22  	pLmask = pLo
    23  )
    24  
    25  // GraphicRanges defines the set of graphic characters according to Unicode.
    26  var GraphicRanges = []*RangeTable{
    27  	L, M, N, P, S, Zs,
    28  }
    29  
    30  // PrintRanges defines the set of printable characters according to Go.
    31  // ASCII space, U+0020, is handled separately.
    32  var PrintRanges = []*RangeTable{
    33  	L, M, N, P, S,
    34  }
    35  
    36  // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
    37  // Such characters include letters, marks, numbers, punctuation, symbols, and
    38  // spaces, from categories L, M, N, P, S, Zs.
    39  func IsGraphic(r rune) bool {
    40  	// We convert to uint32 to avoid the extra test for negative,
    41  	// and in the index we convert to uint8 to avoid the range check.
    42  	if uint32(r) <= MaxLatin1 {
    43  		return properties[uint8(r)]&pg != 0
    44  	}
    45  	return In(r, GraphicRanges...)
    46  }
    47  
    48  // IsPrint reports whether the rune is defined as printable by Go. Such
    49  // characters include letters, marks, numbers, punctuation, symbols, and the
    50  // ASCII space character, from categories L, M, N, P, S and the ASCII space
    51  // character. This categorization is the same as IsGraphic except that the
    52  // only spacing character is ASCII space, U+0020.
    53  func IsPrint(r rune) bool {
    54  	if uint32(r) <= MaxLatin1 {
    55  		return properties[uint8(r)]&pp != 0
    56  	}
    57  	return In(r, PrintRanges...)
    58  }
    59  
    60  // IsOneOf reports whether the rune is a member of one of the ranges.
    61  // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
    62  func IsOneOf(ranges []*RangeTable, r rune) bool {
    63  	for _, inside := range ranges {
    64  		if Is(inside, r) {
    65  			return true
    66  		}
    67  	}
    68  	return false
    69  }
    70  
    71  // In reports whether the rune is a member of one of the ranges.
    72  func In(r rune, ranges ...*RangeTable) bool {
    73  	for _, inside := range ranges {
    74  		if Is(inside, r) {
    75  			return true
    76  		}
    77  	}
    78  	return false
    79  }
    80  
    81  // IsControl reports whether the rune is a control character.
    82  // The C (Other) Unicode category includes more code points
    83  // such as surrogates; use Is(C, r) to test for them.
    84  func IsControl(r rune) bool {
    85  	if uint32(r) <= MaxLatin1 {
    86  		return properties[uint8(r)]&pC != 0
    87  	}
    88  	// All control characters are < MaxLatin1.
    89  	return false
    90  }
    91  
    92  // IsLetter reports whether the rune is a letter (category L).
    93  func IsLetter(r rune) bool {
    94  	if uint32(r) <= MaxLatin1 {
    95  		return properties[uint8(r)]&(pLmask) != 0
    96  	}
    97  	return isExcludingLatin(Letter, r)
    98  }
    99  
   100  // IsMark reports whether the rune is a mark character (category M).
   101  func IsMark(r rune) bool {
   102  	// There are no mark characters in Latin-1.
   103  	return isExcludingLatin(Mark, r)
   104  }
   105  
   106  // IsNumber reports whether the rune is a number (category N).
   107  func IsNumber(r rune) bool {
   108  	if uint32(r) <= MaxLatin1 {
   109  		return properties[uint8(r)]&pN != 0
   110  	}
   111  	return isExcludingLatin(Number, r)
   112  }
   113  
   114  // IsPunct reports whether the rune is a Unicode punctuation character
   115  // (category P).
   116  func IsPunct(r rune) bool {
   117  	if uint32(r) <= MaxLatin1 {
   118  		return properties[uint8(r)]&pP != 0
   119  	}
   120  	return Is(Punct, r)
   121  }
   122  
   123  // IsSpace reports whether the rune is a space character as defined
   124  // by Unicode's White Space property; in the Latin-1 space
   125  // this is
   126  //
   127  //	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
   128  //
   129  // Other definitions of spacing characters are set by category
   130  // Z and property Pattern_White_Space.
   131  func IsSpace(r rune) bool {
   132  	// This property isn't the same as Z; special-case it.
   133  	if uint32(r) <= MaxLatin1 {
   134  		switch r {
   135  		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
   136  			return true
   137  		}
   138  		return false
   139  	}
   140  	return isExcludingLatin(White_Space, r)
   141  }
   142  
   143  // IsSymbol reports whether the rune is a symbolic character.
   144  func IsSymbol(r rune) bool {
   145  	if uint32(r) <= MaxLatin1 {
   146  		return properties[uint8(r)]&pS != 0
   147  	}
   148  	return isExcludingLatin(Symbol, r)
   149  }