github.com/tunabay/go-bitarray@v1.3.1/bitarray.go (about) 1 // Copyright (c) 2021 Hirotsuna Mizuno. All rights reserved. 2 // Use of this source code is governed by the MIT license that can be found in 3 // the LICENSE file. 4 5 package bitarray 6 7 import ( 8 "hash" 9 "math/bits" 10 ) 11 12 // BitArray represents an immutable bit array, or a sequence of bits, of 13 // arbitrary length. Unlike the builtin []byte, BitArray can properly hold and 14 // handle fractional bits less than 8 bits. The zero value for BitArray 15 // represents an empty bit array of zero length. Since it is immutable, it can 16 // be shared, copied and is safe for concurrent use by multiple goroutines. 17 type BitArray struct { 18 b []byte // nil for zero filled bit array including zero length 19 nBits int // number of bits contained 20 } 21 22 // zeroBitArray is a shared instance representing an empty bit array. 23 var zeroBitArray = &BitArray{} 24 25 // New creates and returns a new BitArray instance from the bits passed as 26 // parameters. Each parameter should be 0 or 1, but if any other value is 27 // passed, no error is reported and only the LSB of each is silently used. In 28 // most cases it is more convenient to use Parse, NewFromBytes or other 29 // functions instead of New. 30 func New(bits ...byte) *BitArray { 31 if len(bits) == 0 { 32 return zeroBitArray 33 } 34 var zfb byte 35 buf := allocByteSlice((len(bits) + 7) >> 3) 36 for i, bit := range bits { 37 bit &= 1 38 zfb |= bit 39 buf[i>>3] |= bit << (7 - i&7) 40 } 41 if zfb == 0 { 42 return &BitArray{nBits: len(bits)} 43 } 44 45 return &BitArray{b: buf, nBits: len(bits)} 46 } 47 48 // NewFromBytes reads bits from a byte slice b, creates a new BitArray instance 49 // and returns it. It skips off bits from the beginning of p and reads nBits 50 // bits from the next bit. 51 func NewFromBytes(p []byte, off, nBits int) *BitArray { 52 switch { 53 case nBits < 0: 54 panicf("NewFromBytes: negative nBits %d.", nBits) 55 case off < 0: 56 panicf("NewFromBytes: negative off %d.", off) 57 case len(p)<<3 < off+nBits: 58 panicf("NewFromBytes: out of range off=%d + nBits=%d > p.len=%d.", off, nBits, len(p)<<3) 59 case nBits == 0: 60 return zeroBitArray 61 } 62 buf := allocByteSlice((nBits + 7) >> 3) 63 if copyBits(buf, p, 0, off, nBits) { 64 return &BitArray{nBits: nBits} 65 } 66 67 return &BitArray{b: buf, nBits: nBits} 68 } 69 70 // NewFromByteBits creates a new BitArray from a []byte in which each element 71 // represents 1 bit as 0 or 1. If an element is neighter 0 nor 1, only its LSB 72 // is silently used. 73 func NewFromByteBits(bits []byte) *BitArray { 74 if len(bits) == 0 { 75 return zeroBitArray 76 } 77 var zfb byte 78 buf := allocByteSlice((len(bits) + 7) >> 3) 79 for i, bit := range bits { 80 bit &= 1 81 zfb |= bit 82 buf[i>>3] |= bit << (7 - i&7) 83 } 84 if zfb == 0 { 85 return &BitArray{nBits: len(bits)} 86 } 87 88 return &BitArray{b: buf, nBits: len(bits)} 89 } 90 91 // NewZeroFilled creates a BitArray with all digits filled with 0. An all zero 92 // filled bit array does not allocate memory for 0 bits. If all bits in a bit 93 // array are guaranteed to be 0, using this function saves memory usage and 94 // optimizes some bitwise operations. 95 func NewZeroFilled(nBits int) *BitArray { 96 switch { 97 case nBits < 0: 98 panicf("NewZeroFilled: negative nBits %d.", nBits) 99 case nBits == 0: 100 return zeroBitArray 101 } 102 103 return &BitArray{nBits: nBits} 104 } 105 106 // NewOneFilled creates a BitArray with all digits filled with 1. 107 func NewOneFilled(nBits int) *BitArray { 108 switch { 109 case nBits < 0: 110 panicf("NewOneFilled: negative nBits %d.", nBits) 111 case nBits == 0: 112 return zeroBitArray 113 } 114 buf := allocByteSlice((nBits + 7) >> 3) 115 fillFF(buf) 116 if f := nBits & 7; f != 0 { 117 buf[len(buf)-1] &= byte(0xff) << (8 - f) 118 } 119 120 return &BitArray{b: buf, nBits: nBits} 121 } 122 123 // NewByRunLength creates a BitArray with the argument that represents the 124 // number of consecutive 0 and 1 bits. The (2n+1)th arguments including the 125 // first specifies the length of 0s, and the (2n)th arguments including the 126 // second specifies the length of 1s. Passing 0 as the first argument allows to 127 // create a bit array starting with 1. It is suitable for making simple bit 128 // masks. 129 func NewByRunLength(lengths ...int) *BitArray { 130 max1Len, sumLen := 0, 0 131 for i := 0; i < len(lengths); i++ { 132 sumLen += lengths[i] 133 if i&1 != 0 && max1Len < lengths[i] { 134 max1Len = lengths[i] 135 } 136 } 137 switch { 138 case sumLen == 0: 139 return zeroBitArray 140 case max1Len == 0: 141 return &BitArray{nBits: sumLen} 142 } 143 144 buf1 := make([]byte, (max1Len+7)>>3) 145 fillFF(buf1) 146 147 bb := NewBuilder() 148 for i, length := range lengths { 149 switch { 150 case length == 0: 151 case i&1 == 0: 152 bb.append(nil, 0, length, true) 153 default: 154 bb.append(buf1, 0, length, false) 155 } 156 } 157 158 return bb.BitArray() 159 } 160 161 // IsZero returns whether the BitArray is empty, zero length. 162 func (ba *BitArray) IsZero() bool { 163 return ba == nil || ba.nBits == 0 164 } 165 166 // Len returns the number of bits contained in the BitArray. 167 func (ba *BitArray) Len() int { 168 if ba == nil { 169 return 0 170 } 171 172 return ba.nBits 173 } 174 175 // NumPadding returns the number of LSBs padded when expressing the bit array as 176 // []byte type, that is, the number of bits to be added to make it a multiple of 177 // 8 bits. 178 func (ba *BitArray) NumPadding() int { 179 if ba == nil { 180 return 0 181 } 182 183 return (8 - ba.nBits&7) & 7 184 } 185 186 // String returns the string representation of the BitArray. 187 func (ba BitArray) String() string { 188 if ba.IsZero() { 189 return "" 190 } 191 sb := make([]byte, ba.nBits) 192 if ba.b == nil { 193 fill30(sb) 194 } else { 195 for i := 0; i < ba.nBits; i++ { 196 sb[i] = '0' + ba.b[i>>3]>>(7-i&7)&1 197 } 198 } 199 200 return string(sb) 201 } 202 203 // Bytes returns the byte slice containing the bit array. It also returns the 204 // number of the padded LSBs. 205 func (ba *BitArray) Bytes() ([]byte, int) { 206 n := ba.Len() //nolint:ifshort // false positive 207 if n == 0 { 208 return []byte{}, 0 209 } 210 b := make([]byte, (n+7)>>3) 211 copy(b, ba.b) // works with ba.b == nil 212 213 return b, (8 - ba.nBits&7) & 7 214 } 215 216 // BitArray implements the BitArrayer interface returning itself. 217 func (ba *BitArray) BitArray() *BitArray { 218 return ba 219 } 220 221 // BitAt returns a single bit at the specified offset as 0 or 1. It panics if 222 // the off is negative or greater than ba.Len()-1. 223 func (ba *BitArray) BitAt(off int) byte { 224 switch { 225 case off < 0: 226 panicf("BitAt: negative off %d.", off) 227 case ba.Len() <= off: 228 panicf("BitAt: out of range: off=%d >= len=%d.", off, ba.Len()) 229 case ba.b == nil: 230 return 0 231 } 232 233 return ba.b[off>>3] >> (7 - off&7) & 1 234 } 235 236 // Hash calculates the hash of the bit array using the hash function h. The 237 // hash.Hash is designed to accept input in bytes instead of bits. This causes 238 // problems with bit arrays that have padding LSBs at the end. For example, the 239 // two bit arrays "1111" and "1111000" would both be represented as the same 240 // single byte 0xf0. In order to prevent these from being mapped to the same 241 // hash value, the hash is calculated after appending a 3 bits marker 242 // indicating the number of padding LSBs at the end of the original bit array. 243 // 244 // Deprecated: Most hash functions can handle bit-oriented messages as-is by 245 // design, and it is not appropriate to use the byte-oriented standard hash.Hash 246 // with padding bits. The result does not comply with the specifications. Not 247 // all hash functions are available, but for SHA-1 and SHA-2, which can handle 248 // bit-oriented messages correctly, dedicated methods such as SHA512, SHA256, 249 // and SHA1 are now available. It is better to use them instead. 250 func (ba *BitArray) Hash(h hash.Hash) []byte { 251 b, _ := ba.MarshalBinary() 252 h.Write(b) 253 254 return h.Sum(nil) 255 } 256 257 // MapKey returns a string that can be used as a key for the Go built-in map. 258 // Only the same bit array returns the same string. The String method can also 259 // be used for the same purpose, but MapKey is faster. Note that it can be used 260 // as a map key, but it may contain non-printable characters. 261 func (ba *BitArray) MapKey() string { 262 if ba.IsZero() { 263 return "" 264 } 265 nBytes := (ba.nBits + 7) >> 3 266 sb := make([]byte, nBytes+1) 267 sb[0] = byte(ba.nBits & 7) 268 copy(sb[1:], ba.b) // works with ba.b == nil 269 270 return string(sb) 271 } 272 273 // ToPadded8 returns a new BitArray with a length that is a multiple of 8 bits 274 // by apending 0 to 7 padding bits at the end. For the returned bit array, 275 // NumPadding() returns 0. 276 func (ba *BitArray) ToPadded8() *BitArray { 277 switch { 278 case ba.IsZero(): 279 return zeroBitArray 280 case ba.nBits&7 == 0: 281 return ba 282 } 283 nBits := (ba.nBits + 7) & ^7 284 if ba.b == nil { 285 return &BitArray{nBits: nBits} 286 } 287 288 return &BitArray{b: ba.b, nBits: nBits} 289 } 290 291 // ToPadded64 returns a new BitArray with a length that is a multiple of 64 bits 292 // by apending 0 to 63 padding bits at the end. For the returned bit array, 293 // NumPadding() returns 0, and Len() returns a multiple of 8. 294 func (ba *BitArray) ToPadded64() *BitArray { 295 switch { 296 case ba.IsZero(): 297 return zeroBitArray 298 case ba.nBits&63 == 0: 299 return ba 300 } 301 nBits := (ba.nBits + 63) & ^63 302 if ba.b == nil { 303 return &BitArray{nBits: nBits} 304 } 305 306 return &BitArray{b: ba.b[:nBits>>3], nBits: nBits} 307 } 308 309 // ToByteBits returns a byte slice that represents the bit array with 1 byte 310 // per bit. Each byte element of the returned slice represents a single bit with 311 // 0 or 1. It is a memory-wasting data type, but for the purpose of repeating 312 // searches and matching using the same bit array, converting to this format 313 // allows the standard bytes package to be used. 314 func (ba *BitArray) ToByteBits() []byte { 315 if ba.IsZero() { 316 return []byte{} 317 } 318 return ba.bits8() 319 } 320 321 // ParityBit calculates the odd parity bit of the bit array. 322 func (ba *BitArray) ParityBit() int { 323 if ba.IsZero() || ba.b == nil { 324 return 1 325 } 326 327 // TODO: use an optimized algorithm 328 var sum uint64 329 for _, b := range asUint64Slice(ba.b) { 330 sum ^= b 331 } 332 333 return (bits.OnesCount64(sum) + 1) & 1 334 } 335 336 // RepeatEach returns a new BitArray in which each bit is repeated the specified 337 // number of times. It is an operation like "scaling" a bit pattern. 338 func (ba *BitArray) RepeatEach(count int) *BitArray { 339 switch { 340 case count < 0: 341 panicf("RepeatEach: negative count %d.", count) 342 case ba.IsZero(), count == 0: 343 return zeroBitArray 344 case count == 1: 345 return ba 346 case ba.b == nil: 347 return &BitArray{nBits: ba.nBits * count} 348 } 349 350 buf1 := make([]byte, (count+7)>>3) 351 fillFF(buf1) 352 353 bb := NewBuilder() 354 for i := 0; i < ba.nBits; i++ { 355 if ba.b[i>>3]>>(7-i&7)&1 == 0 { 356 bb.append(nil, 0, count, true) 357 } else { 358 bb.append(buf1, 0, count, false) 359 } 360 } 361 362 return bb.BitArray() 363 }