github.com/tunabay/go-bitarray@v1.3.1/bitarray_parse.go (about) 1 // Copyright (c) 2021 Hirotsuna Mizuno. All rights reserved. 2 // Use of this source code is governed by the MIT license that can be found in 3 // the LICENSE file. 4 5 package bitarray 6 7 import ( 8 "fmt" 9 "regexp" 10 "strings" 11 "unicode" 12 ) 13 14 var ( 15 parsePSepRE = regexp.MustCompile(`\s*[+]\s*`) 16 parseScanRE = regexp.MustCompile( 17 `^\s*(0([box]))?([-_:0-9a-fA-F]+)( ?[(](pad=|!)([0-3])[)])?\s*(.*)$`, 18 ) 19 parseBaseExpr = map[byte]byte{'b': 2, 'o': 8, 'x': 16} 20 parseDigits = map[rune]byte{ 21 '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, 22 '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 23 'a': 0xa, 'b': 0xb, 'c': 0xc, 'd': 0xd, 'e': 0xe, 'f': 0xf, 24 'A': 0xa, 'B': 0xb, 'C': 0xc, 'D': 0xd, 'E': 0xe, 'F': 0xf, 25 } 26 ) 27 28 // MustParse is like Parse but panics if the expression can not be parsed. It 29 // simplifies safe initialization of global variables holding bit arrays. 30 func MustParse(s string) *BitArray { 31 ba, err := Parse(s) 32 if err != nil { 33 panicf("MustParse(%q): %d", s, err) 34 } 35 return ba 36 } 37 38 // Parse parses a string as a bit array representation, like "01010". 39 // 40 // Multiple tokens can be presented, which are separated by one or more spaces 41 // and/or a single "+" sign. All bits contained in tokens will be simply 42 // concatenated. Each token can be binary, octal, or hexademical, and the type 43 // is specified by the prefixes "0b", "0o" or "0x". Tokens without a prefix are 44 // always parsed as binary representation. Each token also can contain any 45 // number of separators "-", "_", and ":". These separators are safely ignored. 46 // 47 // Note that spaces between digits are parsed as token delimiters, not 48 // separators within tokens. This is not a problem for binary representations, 49 // but "0o" and "0x" prefixes have no effect beyond the spaces. For example, 50 // "0b0000 1111" is legal, but "0x0000 ffff" is illegal. Because the "ffff" is 51 // interpreted as a second token without a prefix, so "f" cannot be parsed as 52 // binary representation. Use other separators instead: e.g. "0x0000_ffff". 53 // 54 // bitarray = *WSP [ token *( token-sep token ) ] *WSP 55 // token-sep = *WSP ( WSP / "+" ) *WSP 56 // token = bin-token / oct-token / hex-token 57 // bin-token = [ "0b" [ char-sep ] ] bin-char *( [ char-sep ] bin-char ) 58 // oct-token = "0o" 1*( [ char-sep ] oct-char ) [ oct-pad ] 59 // hex-token = "0x" 1*( [ char-sep ] hex-char ) [ hex-pad ] 60 // char-sep = "-" / "_" / ":" 61 // bin-char = "0" / "1" 62 // oct-char = bin-char / "2" / "3" / "4" / "5" / "6" / "7" 63 // hex-char = oct-char / "8" / "9" 64 // / "a" / "b" / "c" / "d" / "e" / "f" 65 // / "A" / "B" / "C" / "D" / "E" / "F" 66 // oct-pad = [ " " ] "(" pad-ind ( "0" / "1" / "2" ) ")" 67 // hex-pad = [ " " ] "(" pad-ind ( "0" / "1" / "2" / "3" ) ")" 68 // pad-ind = "pad=" / "!" 69 func Parse(s string) (*BitArray, error) { 70 s = strings.Map(parseMapSpaces, s) 71 zf := true 72 bb := NewBuilder() 73 lines := parsePSepRE.Split(s, -1) 74 for _, line := range lines { 75 line = strings.TrimSpace(line) 76 if len(line) == 0 && 1 < len(lines) { 77 return nil, fmt.Errorf("%q: %w: empty token", s, ErrIllegalExpression) 78 } 79 for 0 < len(line) { 80 m := parseScanRE.FindStringSubmatch(line) 81 if len(m) != parseScanRE.NumSubexp()+1 { 82 return nil, fmt.Errorf("%q: %w: malformed input", line, ErrIllegalExpression) 83 } 84 tzf, err := parseToken(bb, m[2], m[3], m[6]) 85 if err != nil { 86 return nil, fmt.Errorf("%q: malformed token: %w", m[0], err) 87 } 88 zf = zf && tzf 89 line = m[7] 90 } 91 } 92 if zf { 93 return &BitArray{nBits: bb.nBits}, nil 94 } 95 96 return bb.BitArray(), nil 97 } 98 99 func parseMapSpaces(r rune) rune { 100 if unicode.IsSpace(r) { 101 return ' ' 102 } 103 return r 104 } 105 106 func parseToken(bb *Builder, baseStr, bodyStr, npadStr string) (bool, error) { 107 base := byte(2) 108 if len(baseStr) != 0 { 109 b, ok := parseBaseExpr[baseStr[0]] 110 if !ok { 111 return false, fmt.Errorf(`%w: base %q`, ErrIllegalExpression, baseStr) 112 } 113 base = b 114 } 115 116 // digits 117 var zfb byte 118 digits := make([]byte, 0, len(bodyStr)) 119 allowSep := baseStr != "" 120 var lastSep rune 121 for _, r := range bodyStr { 122 if dv, ok := parseDigits[r]; ok { 123 if base <= dv { 124 return false, fmt.Errorf(`%w: digit '%x' for base %d`, ErrIllegalExpression, dv, base) 125 } 126 zfb |= dv 127 digits = append(digits, dv) 128 allowSep = true 129 continue 130 } 131 if strings.ContainsRune("-_:", r) { 132 if !allowSep { 133 return false, fmt.Errorf(`%w: separator '%c'`, ErrIllegalExpression, r) 134 } 135 allowSep = false 136 lastSep = r 137 continue 138 } 139 return false, fmt.Errorf("%w: unexpected '%c'", ErrIllegalExpression, r) 140 } 141 if !allowSep { 142 return false, fmt.Errorf(`%w: token ends with a separator '%c'`, ErrIllegalExpression, lastSep) 143 } 144 145 // padding 146 npad := 0 147 if npadStr != "" { 148 npad = int(npadStr[0]) - int('0') 149 } 150 switch base { 151 case 2: 152 if npadStr != "" { 153 return false, fmt.Errorf("%w: pad=%s for bin token", ErrIllegalExpression, npadStr) 154 } 155 bb.WriteByteBits(digits) 156 case 8: 157 if 2 < npad { 158 return false, fmt.Errorf("%w: pad=%s for oct token", ErrIllegalExpression, npadStr) 159 } 160 for i, digit := range digits { 161 switch { 162 case i+1 < len(digits) || npad == 0: 163 bb.WriteByteBits([]byte{ 164 digit >> 2, 165 digit >> 1, 166 digit, 167 }) 168 case npad == 1: 169 bb.WriteByteBits([]byte{ 170 digit >> 2, 171 digit >> 1, 172 }) 173 case npad == 2: 174 bb.WriteByteBits([]byte{ 175 digit >> 2, 176 }) 177 } 178 } 179 case 16: 180 if 3 < npad { // this case should have been eliminated by regex 181 return false, fmt.Errorf( 182 "%w: pad=%s for hex token", 183 ErrIllegalExpression, npadStr, 184 ) 185 } 186 for i, digit := range digits { 187 switch { 188 case i+1 < len(digits) || npad == 0: 189 bb.WriteByteBits([]byte{ 190 digit >> 3, 191 digit >> 2, 192 digit >> 1, 193 digit, 194 }) 195 case npad == 1: 196 bb.WriteByteBits([]byte{ 197 digit >> 3, 198 digit >> 2, 199 digit >> 1, 200 }) 201 case npad == 2: 202 bb.WriteByteBits([]byte{ 203 digit >> 3, 204 digit >> 2, 205 }) 206 case npad == 3: 207 bb.WriteByteBits([]byte{ 208 digit >> 3, 209 }) 210 } 211 } 212 default: 213 // this should never happen 214 return false, fmt.Errorf( 215 "%w: base %d(%s)", 216 ErrIllegalExpression, base, baseStr, 217 ) 218 } 219 220 return zfb == 0, nil 221 }