github.com/primecitizens/pcz/std@v0.2.1/text/unicode/graphic.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2011 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 package unicode 9 10 // Bit masks for each code point under U+0100, for fast lookup. 11 const ( 12 pC = 1 << iota // a control character. 13 pP // a punctuation character. 14 pN // a numeral. 15 pS // a symbolic character. 16 pZ // a spacing character. 17 pLu // an upper-case letter. 18 pLl // a lower-case letter. 19 pp // a printable character according to Go's definition. 20 pg = pp | pZ // a graphical character according to the Unicode definition. 21 pLo = pLl | pLu // a letter that is neither upper nor lower case. 22 pLmask = pLo 23 ) 24 25 // GraphicRanges defines the set of graphic characters according to Unicode. 26 var GraphicRanges = []*RangeTable{ 27 L, M, N, P, S, Zs, 28 } 29 30 // PrintRanges defines the set of printable characters according to Go. 31 // ASCII space, U+0020, is handled separately. 32 var PrintRanges = []*RangeTable{ 33 L, M, N, P, S, 34 } 35 36 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. 37 // Such characters include letters, marks, numbers, punctuation, symbols, and 38 // spaces, from categories L, M, N, P, S, Zs. 39 func IsGraphic(r rune) bool { 40 // We convert to uint32 to avoid the extra test for negative, 41 // and in the index we convert to uint8 to avoid the range check. 42 if uint32(r) <= MaxLatin1 { 43 return properties[uint8(r)]&pg != 0 44 } 45 return In(r, GraphicRanges...) 46 } 47 48 // IsPrint reports whether the rune is defined as printable by Go. Such 49 // characters include letters, marks, numbers, punctuation, symbols, and the 50 // ASCII space character, from categories L, M, N, P, S and the ASCII space 51 // character. This categorization is the same as IsGraphic except that the 52 // only spacing character is ASCII space, U+0020. 53 func IsPrint(r rune) bool { 54 if uint32(r) <= MaxLatin1 { 55 return properties[uint8(r)]&pp != 0 56 } 57 return In(r, PrintRanges...) 58 } 59 60 // IsOneOf reports whether the rune is a member of one of the ranges. 61 // The function "In" provides a nicer signature and should be used in preference to IsOneOf. 62 func IsOneOf(ranges []*RangeTable, r rune) bool { 63 for _, inside := range ranges { 64 if Is(inside, r) { 65 return true 66 } 67 } 68 return false 69 } 70 71 // In reports whether the rune is a member of one of the ranges. 72 func In(r rune, ranges ...*RangeTable) bool { 73 for _, inside := range ranges { 74 if Is(inside, r) { 75 return true 76 } 77 } 78 return false 79 } 80 81 // IsControl reports whether the rune is a control character. 82 // The C (Other) Unicode category includes more code points 83 // such as surrogates; use Is(C, r) to test for them. 84 func IsControl(r rune) bool { 85 if uint32(r) <= MaxLatin1 { 86 return properties[uint8(r)]&pC != 0 87 } 88 // All control characters are < MaxLatin1. 89 return false 90 } 91 92 // IsLetter reports whether the rune is a letter (category L). 93 func IsLetter(r rune) bool { 94 if uint32(r) <= MaxLatin1 { 95 return properties[uint8(r)]&(pLmask) != 0 96 } 97 return isExcludingLatin(Letter, r) 98 } 99 100 // IsMark reports whether the rune is a mark character (category M). 101 func IsMark(r rune) bool { 102 // There are no mark characters in Latin-1. 103 return isExcludingLatin(Mark, r) 104 } 105 106 // IsNumber reports whether the rune is a number (category N). 107 func IsNumber(r rune) bool { 108 if uint32(r) <= MaxLatin1 { 109 return properties[uint8(r)]&pN != 0 110 } 111 return isExcludingLatin(Number, r) 112 } 113 114 // IsPunct reports whether the rune is a Unicode punctuation character 115 // (category P). 116 func IsPunct(r rune) bool { 117 if uint32(r) <= MaxLatin1 { 118 return properties[uint8(r)]&pP != 0 119 } 120 return Is(Punct, r) 121 } 122 123 // IsSpace reports whether the rune is a space character as defined 124 // by Unicode's White Space property; in the Latin-1 space 125 // this is 126 // 127 // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP). 128 // 129 // Other definitions of spacing characters are set by category 130 // Z and property Pattern_White_Space. 131 func IsSpace(r rune) bool { 132 // This property isn't the same as Z; special-case it. 133 if uint32(r) <= MaxLatin1 { 134 switch r { 135 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: 136 return true 137 } 138 return false 139 } 140 return isExcludingLatin(White_Space, r) 141 } 142 143 // IsSymbol reports whether the rune is a symbolic character. 144 func IsSymbol(r rune) bool { 145 if uint32(r) <= MaxLatin1 { 146 return properties[uint8(r)]&pS != 0 147 } 148 return isExcludingLatin(Symbol, r) 149 }