github.com/tunabay/go-bitarray@v1.3.1/bitarray_parse.go (about)

     1  // Copyright (c) 2021 Hirotsuna Mizuno. All rights reserved.
     2  // Use of this source code is governed by the MIT license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitarray
     6  
     7  import (
     8  	"fmt"
     9  	"regexp"
    10  	"strings"
    11  	"unicode"
    12  )
    13  
    14  var (
    15  	parsePSepRE = regexp.MustCompile(`\s*[+]\s*`)
    16  	parseScanRE = regexp.MustCompile(
    17  		`^\s*(0([box]))?([-_:0-9a-fA-F]+)( ?[(](pad=|!)([0-3])[)])?\s*(.*)$`,
    18  	)
    19  	parseBaseExpr = map[byte]byte{'b': 2, 'o': 8, 'x': 16}
    20  	parseDigits   = map[rune]byte{
    21  		'0': 0, '1': 1, '2': 2, '3': 3, '4': 4,
    22  		'5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
    23  		'a': 0xa, 'b': 0xb, 'c': 0xc, 'd': 0xd, 'e': 0xe, 'f': 0xf,
    24  		'A': 0xa, 'B': 0xb, 'C': 0xc, 'D': 0xd, 'E': 0xe, 'F': 0xf,
    25  	}
    26  )
    27  
    28  // MustParse is like Parse but panics if the expression can not be parsed. It
    29  // simplifies safe initialization of global variables holding bit arrays.
    30  func MustParse(s string) *BitArray {
    31  	ba, err := Parse(s)
    32  	if err != nil {
    33  		panicf("MustParse(%q): %d", s, err)
    34  	}
    35  	return ba
    36  }
    37  
    38  // Parse parses a string as a bit array representation, like "01010".
    39  //
    40  // Multiple tokens can be presented, which are separated by one or more spaces
    41  // and/or a single "+" sign. All bits contained in tokens will be simply
    42  // concatenated. Each token can be binary, octal, or hexademical, and the type
    43  // is specified by the prefixes "0b", "0o" or "0x". Tokens without a prefix are
    44  // always parsed as binary representation. Each token also can contain any
    45  // number of separators "-", "_", and ":". These separators are safely ignored.
    46  //
    47  // Note that spaces between digits are parsed as token delimiters, not
    48  // separators within tokens. This is not a problem for binary representations,
    49  // but "0o" and "0x" prefixes have no effect beyond the spaces. For example,
    50  // "0b0000 1111" is legal, but "0x0000 ffff" is illegal. Because the "ffff" is
    51  // interpreted as a second token without a prefix, so "f" cannot be parsed as
    52  // binary representation. Use other separators instead: e.g. "0x0000_ffff".
    53  //
    54  //     bitarray  = *WSP [ token *( token-sep token ) ] *WSP
    55  //     token-sep = *WSP ( WSP / "+" ) *WSP
    56  //     token     = bin-token / oct-token / hex-token
    57  //     bin-token = [ "0b" [ char-sep ] ] bin-char *( [ char-sep ] bin-char )
    58  //     oct-token = "0o" 1*( [ char-sep ] oct-char ) [ oct-pad ]
    59  //     hex-token = "0x" 1*( [ char-sep ] hex-char ) [ hex-pad ]
    60  //     char-sep  = "-" / "_" / ":"
    61  //     bin-char  = "0" / "1"
    62  //     oct-char  = bin-char / "2" / "3" / "4" / "5" / "6" / "7"
    63  //     hex-char  = oct-char / "8" / "9"
    64  //               / "a" / "b" / "c" / "d" / "e" / "f"
    65  //               / "A" / "B" / "C" / "D" / "E" / "F"
    66  //     oct-pad   = [ " " ] "(" pad-ind ( "0" / "1" / "2" )       ")"
    67  //     hex-pad   = [ " " ] "(" pad-ind ( "0" / "1" / "2" / "3" ) ")"
    68  //     pad-ind   = "pad=" / "!"
    69  func Parse(s string) (*BitArray, error) {
    70  	s = strings.Map(parseMapSpaces, s)
    71  	zf := true
    72  	bb := NewBuilder()
    73  	lines := parsePSepRE.Split(s, -1)
    74  	for _, line := range lines {
    75  		line = strings.TrimSpace(line)
    76  		if len(line) == 0 && 1 < len(lines) {
    77  			return nil, fmt.Errorf("%q: %w: empty token", s, ErrIllegalExpression)
    78  		}
    79  		for 0 < len(line) {
    80  			m := parseScanRE.FindStringSubmatch(line)
    81  			if len(m) != parseScanRE.NumSubexp()+1 {
    82  				return nil, fmt.Errorf("%q: %w: malformed input", line, ErrIllegalExpression)
    83  			}
    84  			tzf, err := parseToken(bb, m[2], m[3], m[6])
    85  			if err != nil {
    86  				return nil, fmt.Errorf("%q: malformed token: %w", m[0], err)
    87  			}
    88  			zf = zf && tzf
    89  			line = m[7]
    90  		}
    91  	}
    92  	if zf {
    93  		return &BitArray{nBits: bb.nBits}, nil
    94  	}
    95  
    96  	return bb.BitArray(), nil
    97  }
    98  
    99  func parseMapSpaces(r rune) rune {
   100  	if unicode.IsSpace(r) {
   101  		return ' '
   102  	}
   103  	return r
   104  }
   105  
   106  func parseToken(bb *Builder, baseStr, bodyStr, npadStr string) (bool, error) {
   107  	base := byte(2)
   108  	if len(baseStr) != 0 {
   109  		b, ok := parseBaseExpr[baseStr[0]]
   110  		if !ok {
   111  			return false, fmt.Errorf(`%w: base %q`, ErrIllegalExpression, baseStr)
   112  		}
   113  		base = b
   114  	}
   115  
   116  	// digits
   117  	var zfb byte
   118  	digits := make([]byte, 0, len(bodyStr))
   119  	allowSep := baseStr != ""
   120  	var lastSep rune
   121  	for _, r := range bodyStr {
   122  		if dv, ok := parseDigits[r]; ok {
   123  			if base <= dv {
   124  				return false, fmt.Errorf(`%w: digit '%x' for base %d`, ErrIllegalExpression, dv, base)
   125  			}
   126  			zfb |= dv
   127  			digits = append(digits, dv)
   128  			allowSep = true
   129  			continue
   130  		}
   131  		if strings.ContainsRune("-_:", r) {
   132  			if !allowSep {
   133  				return false, fmt.Errorf(`%w: separator '%c'`, ErrIllegalExpression, r)
   134  			}
   135  			allowSep = false
   136  			lastSep = r
   137  			continue
   138  		}
   139  		return false, fmt.Errorf("%w: unexpected '%c'", ErrIllegalExpression, r)
   140  	}
   141  	if !allowSep {
   142  		return false, fmt.Errorf(`%w: token ends with a separator '%c'`, ErrIllegalExpression, lastSep)
   143  	}
   144  
   145  	// padding
   146  	npad := 0
   147  	if npadStr != "" {
   148  		npad = int(npadStr[0]) - int('0')
   149  	}
   150  	switch base {
   151  	case 2:
   152  		if npadStr != "" {
   153  			return false, fmt.Errorf("%w: pad=%s for bin token", ErrIllegalExpression, npadStr)
   154  		}
   155  		bb.WriteByteBits(digits)
   156  	case 8:
   157  		if 2 < npad {
   158  			return false, fmt.Errorf("%w: pad=%s for oct token", ErrIllegalExpression, npadStr)
   159  		}
   160  		for i, digit := range digits {
   161  			switch {
   162  			case i+1 < len(digits) || npad == 0:
   163  				bb.WriteByteBits([]byte{
   164  					digit >> 2,
   165  					digit >> 1,
   166  					digit,
   167  				})
   168  			case npad == 1:
   169  				bb.WriteByteBits([]byte{
   170  					digit >> 2,
   171  					digit >> 1,
   172  				})
   173  			case npad == 2:
   174  				bb.WriteByteBits([]byte{
   175  					digit >> 2,
   176  				})
   177  			}
   178  		}
   179  	case 16:
   180  		if 3 < npad { // this case should have been eliminated by regex
   181  			return false, fmt.Errorf(
   182  				"%w: pad=%s for hex token",
   183  				ErrIllegalExpression, npadStr,
   184  			)
   185  		}
   186  		for i, digit := range digits {
   187  			switch {
   188  			case i+1 < len(digits) || npad == 0:
   189  				bb.WriteByteBits([]byte{
   190  					digit >> 3,
   191  					digit >> 2,
   192  					digit >> 1,
   193  					digit,
   194  				})
   195  			case npad == 1:
   196  				bb.WriteByteBits([]byte{
   197  					digit >> 3,
   198  					digit >> 2,
   199  					digit >> 1,
   200  				})
   201  			case npad == 2:
   202  				bb.WriteByteBits([]byte{
   203  					digit >> 3,
   204  					digit >> 2,
   205  				})
   206  			case npad == 3:
   207  				bb.WriteByteBits([]byte{
   208  					digit >> 3,
   209  				})
   210  			}
   211  		}
   212  	default:
   213  		// this should never happen
   214  		return false, fmt.Errorf(
   215  			"%w: base %d(%s)",
   216  			ErrIllegalExpression, base, baseStr,
   217  		)
   218  	}
   219  
   220  	return zfb == 0, nil
   221  }