github.com/tunabay/go-bitarray@v1.3.1/bitarray.go (about)

     1  // Copyright (c) 2021 Hirotsuna Mizuno. All rights reserved.
     2  // Use of this source code is governed by the MIT license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitarray
     6  
     7  import (
     8  	"hash"
     9  	"math/bits"
    10  )
    11  
    12  // BitArray represents an immutable bit array, or a sequence of bits, of
    13  // arbitrary length. Unlike the builtin []byte, BitArray can properly hold and
    14  // handle fractional bits less than 8 bits. The zero value for BitArray
    15  // represents an empty bit array of zero length. Since it is immutable, it can
    16  // be shared, copied and is safe for concurrent use by multiple goroutines.
    17  type BitArray struct {
    18  	b     []byte // nil for zero filled bit array including zero length
    19  	nBits int    // number of bits contained
    20  }
    21  
    22  // zeroBitArray is a shared instance representing an empty bit array.
    23  var zeroBitArray = &BitArray{}
    24  
    25  // New creates and returns a new BitArray instance from the bits passed as
    26  // parameters. Each parameter should be 0 or 1, but if any other value is
    27  // passed, no error is reported and only the LSB of each is silently used. In
    28  // most cases it is more convenient to use Parse, NewFromBytes or other
    29  // functions instead of New.
    30  func New(bits ...byte) *BitArray {
    31  	if len(bits) == 0 {
    32  		return zeroBitArray
    33  	}
    34  	var zfb byte
    35  	buf := allocByteSlice((len(bits) + 7) >> 3)
    36  	for i, bit := range bits {
    37  		bit &= 1
    38  		zfb |= bit
    39  		buf[i>>3] |= bit << (7 - i&7)
    40  	}
    41  	if zfb == 0 {
    42  		return &BitArray{nBits: len(bits)}
    43  	}
    44  
    45  	return &BitArray{b: buf, nBits: len(bits)}
    46  }
    47  
    48  // NewFromBytes reads bits from a byte slice b, creates a new BitArray instance
    49  // and returns it. It skips off bits from the beginning of p and reads nBits
    50  // bits from the next bit.
    51  func NewFromBytes(p []byte, off, nBits int) *BitArray {
    52  	switch {
    53  	case nBits < 0:
    54  		panicf("NewFromBytes: negative nBits %d.", nBits)
    55  	case off < 0:
    56  		panicf("NewFromBytes: negative off %d.", off)
    57  	case len(p)<<3 < off+nBits:
    58  		panicf("NewFromBytes: out of range off=%d + nBits=%d > p.len=%d.", off, nBits, len(p)<<3)
    59  	case nBits == 0:
    60  		return zeroBitArray
    61  	}
    62  	buf := allocByteSlice((nBits + 7) >> 3)
    63  	if copyBits(buf, p, 0, off, nBits) {
    64  		return &BitArray{nBits: nBits}
    65  	}
    66  
    67  	return &BitArray{b: buf, nBits: nBits}
    68  }
    69  
    70  // NewFromByteBits creates a new BitArray from a []byte in which each element
    71  // represents 1 bit as 0 or 1. If an element is neighter 0 nor 1, only its LSB
    72  // is silently used.
    73  func NewFromByteBits(bits []byte) *BitArray {
    74  	if len(bits) == 0 {
    75  		return zeroBitArray
    76  	}
    77  	var zfb byte
    78  	buf := allocByteSlice((len(bits) + 7) >> 3)
    79  	for i, bit := range bits {
    80  		bit &= 1
    81  		zfb |= bit
    82  		buf[i>>3] |= bit << (7 - i&7)
    83  	}
    84  	if zfb == 0 {
    85  		return &BitArray{nBits: len(bits)}
    86  	}
    87  
    88  	return &BitArray{b: buf, nBits: len(bits)}
    89  }
    90  
    91  // NewZeroFilled creates a BitArray with all digits filled with 0. An all zero
    92  // filled bit array does not allocate memory for 0 bits. If all bits in a bit
    93  // array are guaranteed to be 0, using this function saves memory usage and
    94  // optimizes some bitwise operations.
    95  func NewZeroFilled(nBits int) *BitArray {
    96  	switch {
    97  	case nBits < 0:
    98  		panicf("NewZeroFilled: negative nBits %d.", nBits)
    99  	case nBits == 0:
   100  		return zeroBitArray
   101  	}
   102  
   103  	return &BitArray{nBits: nBits}
   104  }
   105  
   106  // NewOneFilled creates a BitArray with all digits filled with 1.
   107  func NewOneFilled(nBits int) *BitArray {
   108  	switch {
   109  	case nBits < 0:
   110  		panicf("NewOneFilled: negative nBits %d.", nBits)
   111  	case nBits == 0:
   112  		return zeroBitArray
   113  	}
   114  	buf := allocByteSlice((nBits + 7) >> 3)
   115  	fillFF(buf)
   116  	if f := nBits & 7; f != 0 {
   117  		buf[len(buf)-1] &= byte(0xff) << (8 - f)
   118  	}
   119  
   120  	return &BitArray{b: buf, nBits: nBits}
   121  }
   122  
   123  // NewByRunLength creates a BitArray with the argument that represents the
   124  // number of consecutive 0 and 1 bits. The (2n+1)th arguments including the
   125  // first specifies the length of 0s, and the (2n)th arguments including the
   126  // second specifies the length of 1s. Passing 0 as the first argument allows to
   127  // create a bit array starting with 1. It is suitable for making simple bit
   128  // masks.
   129  func NewByRunLength(lengths ...int) *BitArray {
   130  	max1Len, sumLen := 0, 0
   131  	for i := 0; i < len(lengths); i++ {
   132  		sumLen += lengths[i]
   133  		if i&1 != 0 && max1Len < lengths[i] {
   134  			max1Len = lengths[i]
   135  		}
   136  	}
   137  	switch {
   138  	case sumLen == 0:
   139  		return zeroBitArray
   140  	case max1Len == 0:
   141  		return &BitArray{nBits: sumLen}
   142  	}
   143  
   144  	buf1 := make([]byte, (max1Len+7)>>3)
   145  	fillFF(buf1)
   146  
   147  	bb := NewBuilder()
   148  	for i, length := range lengths {
   149  		switch {
   150  		case length == 0:
   151  		case i&1 == 0:
   152  			bb.append(nil, 0, length, true)
   153  		default:
   154  			bb.append(buf1, 0, length, false)
   155  		}
   156  	}
   157  
   158  	return bb.BitArray()
   159  }
   160  
   161  // IsZero returns whether the BitArray is empty, zero length.
   162  func (ba *BitArray) IsZero() bool {
   163  	return ba == nil || ba.nBits == 0
   164  }
   165  
   166  // Len returns the number of bits contained in the BitArray.
   167  func (ba *BitArray) Len() int {
   168  	if ba == nil {
   169  		return 0
   170  	}
   171  
   172  	return ba.nBits
   173  }
   174  
   175  // NumPadding returns the number of LSBs padded when expressing the bit array as
   176  // []byte type, that is, the number of bits to be added to make it a multiple of
   177  // 8 bits.
   178  func (ba *BitArray) NumPadding() int {
   179  	if ba == nil {
   180  		return 0
   181  	}
   182  
   183  	return (8 - ba.nBits&7) & 7
   184  }
   185  
   186  // String returns the string representation of the BitArray.
   187  func (ba BitArray) String() string {
   188  	if ba.IsZero() {
   189  		return ""
   190  	}
   191  	sb := make([]byte, ba.nBits)
   192  	if ba.b == nil {
   193  		fill30(sb)
   194  	} else {
   195  		for i := 0; i < ba.nBits; i++ {
   196  			sb[i] = '0' + ba.b[i>>3]>>(7-i&7)&1
   197  		}
   198  	}
   199  
   200  	return string(sb)
   201  }
   202  
   203  // Bytes returns the byte slice containing the bit array. It also returns the
   204  // number of the padded LSBs.
   205  func (ba *BitArray) Bytes() ([]byte, int) {
   206  	n := ba.Len() //nolint:ifshort // false positive
   207  	if n == 0 {
   208  		return []byte{}, 0
   209  	}
   210  	b := make([]byte, (n+7)>>3)
   211  	copy(b, ba.b) // works with ba.b == nil
   212  
   213  	return b, (8 - ba.nBits&7) & 7
   214  }
   215  
   216  // BitArray implements the BitArrayer interface returning itself.
   217  func (ba *BitArray) BitArray() *BitArray {
   218  	return ba
   219  }
   220  
   221  // BitAt returns a single bit at the specified offset as 0 or 1. It panics if
   222  // the off is negative or greater than ba.Len()-1.
   223  func (ba *BitArray) BitAt(off int) byte {
   224  	switch {
   225  	case off < 0:
   226  		panicf("BitAt: negative off %d.", off)
   227  	case ba.Len() <= off:
   228  		panicf("BitAt: out of range: off=%d >= len=%d.", off, ba.Len())
   229  	case ba.b == nil:
   230  		return 0
   231  	}
   232  
   233  	return ba.b[off>>3] >> (7 - off&7) & 1
   234  }
   235  
   236  // Hash calculates the hash of the bit array using the hash function h. The
   237  // hash.Hash is designed to accept input in bytes instead of bits. This causes
   238  // problems with bit arrays that have padding LSBs at the end. For example, the
   239  // two bit arrays "1111" and "1111000" would both be represented as the same
   240  // single byte 0xf0. In order to prevent these from being mapped to the same
   241  // hash value, the hash is calculated after appending a 3 bits marker
   242  // indicating the number of padding LSBs at the end of the original bit array.
   243  //
   244  // Deprecated: Most hash functions can handle bit-oriented messages as-is by
   245  // design, and it is not appropriate to use the byte-oriented standard hash.Hash
   246  // with padding bits. The result does not comply with the specifications. Not
   247  // all hash functions are available, but for SHA-1 and SHA-2, which can handle
   248  // bit-oriented messages correctly, dedicated methods such as SHA512, SHA256,
   249  // and SHA1 are now available. It is better to use them instead.
   250  func (ba *BitArray) Hash(h hash.Hash) []byte {
   251  	b, _ := ba.MarshalBinary()
   252  	h.Write(b)
   253  
   254  	return h.Sum(nil)
   255  }
   256  
   257  // MapKey returns a string that can be used as a key for the Go built-in map.
   258  // Only the same bit array returns the same string. The String method can also
   259  // be used for the same purpose, but MapKey is faster. Note that it can be used
   260  // as a map key, but it may contain non-printable characters.
   261  func (ba *BitArray) MapKey() string {
   262  	if ba.IsZero() {
   263  		return ""
   264  	}
   265  	nBytes := (ba.nBits + 7) >> 3
   266  	sb := make([]byte, nBytes+1)
   267  	sb[0] = byte(ba.nBits & 7)
   268  	copy(sb[1:], ba.b) // works with ba.b == nil
   269  
   270  	return string(sb)
   271  }
   272  
   273  // ToPadded8 returns a new BitArray with a length that is a multiple of 8 bits
   274  // by apending 0 to 7 padding bits at the end. For the returned bit array,
   275  // NumPadding() returns 0.
   276  func (ba *BitArray) ToPadded8() *BitArray {
   277  	switch {
   278  	case ba.IsZero():
   279  		return zeroBitArray
   280  	case ba.nBits&7 == 0:
   281  		return ba
   282  	}
   283  	nBits := (ba.nBits + 7) & ^7
   284  	if ba.b == nil {
   285  		return &BitArray{nBits: nBits}
   286  	}
   287  
   288  	return &BitArray{b: ba.b, nBits: nBits}
   289  }
   290  
   291  // ToPadded64 returns a new BitArray with a length that is a multiple of 64 bits
   292  // by apending 0 to 63 padding bits at the end. For the returned bit array,
   293  // NumPadding() returns 0, and Len() returns a multiple of 8.
   294  func (ba *BitArray) ToPadded64() *BitArray {
   295  	switch {
   296  	case ba.IsZero():
   297  		return zeroBitArray
   298  	case ba.nBits&63 == 0:
   299  		return ba
   300  	}
   301  	nBits := (ba.nBits + 63) & ^63
   302  	if ba.b == nil {
   303  		return &BitArray{nBits: nBits}
   304  	}
   305  
   306  	return &BitArray{b: ba.b[:nBits>>3], nBits: nBits}
   307  }
   308  
   309  // ToByteBits returns a byte slice that represents the bit array with 1 byte
   310  // per bit. Each byte element of the returned slice represents a single bit with
   311  // 0 or 1. It is a memory-wasting data type, but for the purpose of repeating
   312  // searches and matching using the same bit array, converting to this format
   313  // allows the standard bytes package to be used.
   314  func (ba *BitArray) ToByteBits() []byte {
   315  	if ba.IsZero() {
   316  		return []byte{}
   317  	}
   318  	return ba.bits8()
   319  }
   320  
   321  // ParityBit calculates the odd parity bit of the bit array.
   322  func (ba *BitArray) ParityBit() int {
   323  	if ba.IsZero() || ba.b == nil {
   324  		return 1
   325  	}
   326  
   327  	// TODO: use an optimized algorithm
   328  	var sum uint64
   329  	for _, b := range asUint64Slice(ba.b) {
   330  		sum ^= b
   331  	}
   332  
   333  	return (bits.OnesCount64(sum) + 1) & 1
   334  }
   335  
   336  // RepeatEach returns a new BitArray in which each bit is repeated the specified
   337  // number of times. It is an operation like "scaling" a bit pattern.
   338  func (ba *BitArray) RepeatEach(count int) *BitArray {
   339  	switch {
   340  	case count < 0:
   341  		panicf("RepeatEach: negative count %d.", count)
   342  	case ba.IsZero(), count == 0:
   343  		return zeroBitArray
   344  	case count == 1:
   345  		return ba
   346  	case ba.b == nil:
   347  		return &BitArray{nBits: ba.nBits * count}
   348  	}
   349  
   350  	buf1 := make([]byte, (count+7)>>3)
   351  	fillFF(buf1)
   352  
   353  	bb := NewBuilder()
   354  	for i := 0; i < ba.nBits; i++ {
   355  		if ba.b[i>>3]>>(7-i&7)&1 == 0 {
   356  			bb.append(nil, 0, count, true)
   357  		} else {
   358  			bb.append(buf1, 0, count, false)
   359  		}
   360  	}
   361  
   362  	return bb.BitArray()
   363  }