github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/packed/bulkOperation.go (about) 1 package packed 2 3 import ( 4 "fmt" 5 ) 6 7 // util/packed/BulkOperation.java 8 9 // Efficient sequential read/write of packed integers. 10 type BulkOperation interface { 11 LongBlockCount() int 12 LongValueCount() int 13 ByteBlockCount() int 14 ByteValueCount() int 15 16 // PackedIntsEncoder 17 encodeLongToByte(values []int64, blocks []byte, iterations int) 18 encodeLongToLong(values, blocks []int64, iterations int) 19 EncodeIntToByte(values []int, blocks []byte, iterations int) 20 21 // PackedIntsDecoder 22 decodeLongToLong(blocks, values []int64, iterations int) 23 decodeByteToLong(blocks []byte, values []int64, iterations int) 24 /* 25 For every number of bits per value, there is a minumum number of 26 blocks (b) / values (v) you need to write an order to reach the next block 27 boundary: 28 - 16 bits per value -> b=2, v=1 29 - 24 bits per value -> b=3, v=1 30 - 50 bits per value -> b=25, v=4 31 - 63 bits per value -> b=63, v=8 32 - ... 33 34 A bulk read consists in copying iterations*v vlaues that are contained in 35 iterations*b blocks into a []int64 (higher values of iterations are likely to 36 yield a better throughput) => this requires n * (b + 8v) bytes of memory. 37 38 This method computes iterations as ramBudget / (b + 8v) (since an int64 is 39 8 bytes). 40 */ 41 computeIterations(valueCount, ramBudget int) int 42 } 43 44 var ( 45 packedBulkOps = []BulkOperation{ 46 /*[[[gocog 47 package main 48 49 import ( 50 "fmt" 51 "io" 52 "os" 53 ) 54 55 const ( 56 MAX_SPECIALIZED_BITS_PER_VALUE = 24 57 HEADER = `// This file has been automatically generated, DO NOT EDIT 58 59 package packed 60 61 // Efficient sequential read/write of packed integers.` 62 ) 63 64 func isPowerOfTwo(n int) bool { 65 return n&(n-1) == 0 66 } 67 68 func casts(typ string) (castStart, castEnd string) { 69 if typ == "int64" { 70 return "", "" 71 } 72 return fmt.Sprintf("%s(", typ), ")" 73 } 74 75 func masks(bits int) (start, end string) { 76 if bits == 64 { 77 return "", "" 78 } 79 return "(", fmt.Sprintf(" & %x)", (1<<uint(bits))-1) 80 } 81 82 var ( 83 TYPES = map[int]string{8: "byte", 16: "int16", 32: "int32", 64: "int64"} 84 NAMES = map[int]string{8: "Byte", 16: "Short", 32: "Int", 64: "Long"} 85 ) 86 87 func blockValueCount(bpv, bits int) (blocks, values int) { 88 blocks = bpv 89 values = blocks * bits / bpv 90 for blocks%2 == 0 && values%2 == 0 { 91 blocks /= 2 92 values /= 2 93 } 94 assert2(values*bpv == bits*blocks, fmt.Sprintf("%d values, %d blocks, %d bits per value", values, blocks, bpv)) 95 return blocks, values 96 } 97 98 func assert2(ok bool, msg string) { 99 if !ok { 100 panic(msg) 101 } 102 } 103 104 func packed64(bpv int, f io.Writer) { 105 if bpv == 64 { 106 panic("not implemented yet") 107 } else { 108 p64Decode(bpv, f, 32) 109 p64Decode(bpv, f, 64) 110 } 111 } 112 113 func p64Decode(bpv int, f io.Writer, bits int) { 114 _, values := blockValueCount(bpv, 64) 115 typ := TYPES[bits] 116 castStart, castEnd := casts(typ) 117 var mask uint 118 119 fmt.Fprintf(f, "func (op *BulkOperationPacked%d) decodeLongTo%s(blocks []int64, values []%s, iterations int) {\n", bpv, NAMES[bits], typ) 120 if bits < bpv { 121 fmt.Fprintln(f, " panic(\"not supported yet\")") 122 } else { 123 fmt.Fprintln(f, " blocksOffset, valuesOffset := 0, 0") 124 fmt.Fprintf(f, " for i := 0; i < iterations; i ++ {\n") 125 mask = 1<<uint(bpv) - 1 126 127 if isPowerOfTwo(bpv) { 128 fmt.Fprintln(f, " block := blocks[blocksOffset]; blocksOffset++") 129 fmt.Fprintf(f, " for shift := uint(%d); shift >= 0; shift -= %d {\n", 64-bpv, bpv) 130 fmt.Fprintf(f, " values[valuesOffset] = %s(int64(uint64(block) >> shift)) & %d%s; valuesOffset++\n", castStart, mask, castEnd) 131 fmt.Fprintln(f, " }") 132 } else { 133 for i := 0; i < values; i++ { 134 blockOffset := i * bpv / 64 135 bitOffset := (i * bpv) % 64 136 if bitOffset == 0 { 137 // start of block 138 fmt.Fprintf(f, " block%d := blocks[blocksOffset]; blocksOffset++\n", blockOffset) 139 fmt.Fprintf(f, " values[valuesOffset] = %sint64(uint64(block%d) >> %d%s); valuesOffset++\n", castStart, blockOffset, 64-bpv, castEnd) 140 } else if bitOffset+bpv == 64 { 141 // end of block 142 fmt.Fprintf(f, " values[valuesOffset] = %sblock%d & %d%s; valuesOffset++\n", castStart, blockOffset, mask, castEnd) 143 } else if bitOffset+bpv < 64 { 144 // middle of block 145 fmt.Fprintf(f, " values[valuesOffset] = %sint64(uint64(block%d) >> %d) & %d%s; valuesOffset++\n", castStart, blockOffset, 64-bitOffset-bpv, mask, castEnd) 146 } else { 147 // value spans across 2 blocks 148 mask1 := int(1<<uint(64-bitOffset)) - 1 149 shift1 := bitOffset + bpv - 64 150 shift2 := 64 - shift1 151 fmt.Fprintf(f, " block%d := blocks[blocksOffset]; blocksOffset++\n", blockOffset+1) 152 fmt.Fprintf(f, " values[valuesOffset] = %s((block%d & %d) << %d) | (int64(uint64(block%d) >> %d))%s; valuesOffset++\n", 153 castStart, blockOffset, mask1, shift1, blockOffset+1, shift2, castEnd) 154 } 155 } 156 } 157 fmt.Fprintln(f, " }") 158 } 159 fmt.Fprintln(f, "}\n") 160 161 _, byteValues := blockValueCount(bpv, 8) 162 163 fmt.Fprintf(f, "func (op *BulkOperationPacked%d) decodeByteTo%s(blocks []byte, values []%s, iterations int) {\n", bpv, NAMES[bits], typ) 164 if bits < bpv { 165 fmt.Fprintln(f, " panic(\"not supported yet\")") 166 } else { 167 fmt.Fprintln(f, " blocksOffset, valuesOffset := 0, 0") 168 if isPowerOfTwo(bpv) && bpv < 8 { 169 fmt.Fprintf(f, " for j := 0; j < iterations; j ++ {\n") 170 fmt.Fprintf(f, " block := blocks[blocksOffset]\n") 171 fmt.Fprintln(f, " blocksOffset++") 172 for shift := 8 - bpv; shift > 0; shift -= bpv { 173 fmt.Fprintf(f, " values[valuesOffset] = %s(byte(uint8(block)) >> %d) & %d\n", typ, shift, mask) 174 fmt.Fprintln(f, " valuesOffset++") 175 } 176 fmt.Fprintf(f, " values[valuesOffset] = %s(block & %d)\n", typ, mask) 177 fmt.Fprintln(f, " valuesOffset++") 178 fmt.Fprintln(f, " }") 179 } else if bpv == 8 { 180 fmt.Fprintln(f, " for j := 0; j < iterations; j ++ {") 181 fmt.Fprintf(f, " values[valuesOffset] = %s(blocks[blocksOffset]); valuesOffset++; blocksOffset++\n", typ) 182 fmt.Fprintln(f, " }") 183 } else if isPowerOfTwo(bpv) && bpv > 8 { 184 fmt.Fprintf(f, " for j := 0; j < iterations; j ++ {\n") 185 m := "int32" 186 if bits > 32 { 187 m = "int64" 188 } 189 fmt.Fprintf(f, " values[valuesOffset] =") 190 for i, until := 0, bpv/8-1; i < until; i++ { 191 fmt.Fprintf(f, " (%s(blocks[blocksOffset+%d]) << %d) |", m, i, bpv-8) 192 } 193 fmt.Fprintf(f, " %s(blocks[blocksOffset+%d])\n", m, bpv/8-1) 194 fmt.Fprintln(f, " valuesOffset++") 195 fmt.Fprintf(f, " blocksOffset += %d\n", bpv/8) 196 fmt.Fprintln(f, " }") 197 } else { 198 fmt.Fprintf(f, " for i := 0; i < iterations; i ++ {\n") 199 for i := 0; i < byteValues; i++ { 200 byteStart, byteEnd := i*bpv/8, ((i+1)*bpv-1)/8 201 bitStart, bitEnd := (i*bpv)%8, ((i+1)*bpv-1)%8 202 shift := func(b int) int { return 8*(byteEnd-b-1) + 1 + bitEnd } 203 if bitStart == 0 { 204 fmt.Fprintf(f, " byte%d := blocks[blocksOffset]\n", byteStart) 205 fmt.Fprintln(f, " blocksOffset++") 206 } 207 for b, until := byteStart+1, byteEnd+1; b < until; b++ { 208 fmt.Fprintf(f, " byte%d := blocks[blocksOffset]\n", b) 209 fmt.Fprintln(f, " blocksOffset++") 210 } 211 fmt.Fprintf(f, " values[valuesOffset] = %s(", typ) 212 if byteStart == byteEnd { 213 if bitStart == 0 { 214 if bitEnd == 7 { 215 fmt.Fprintf(f, " int64(byte%d)", byteStart) 216 } else { 217 fmt.Fprintf(f, " int64(uint8(byte%d) >> %d)", byteStart, 7-bitEnd) 218 } 219 } else { 220 if bitEnd == 7 { 221 fmt.Fprintf(f, " int64(byte%d) & %d", byteStart, 1<<uint(8-bitStart)-1) 222 } else { 223 fmt.Fprintf(f, " int64(uint8(byte%d) >> %d) & %d", byteStart, 7-bitEnd, 1<<uint(bitEnd-bitStart+1)-1) 224 } 225 } 226 } else { 227 if bitStart == 0 { 228 fmt.Fprintf(f, "(int64(byte%d) << %d)", byteStart, shift(byteStart)) 229 } else { 230 fmt.Fprintf(f, "(int64(byte%d & %d) << %d)", byteStart, 1<<uint(8-bitStart)-1, shift(byteStart)) 231 } 232 for b, until := byteStart+1, byteEnd; b < until; b++ { 233 fmt.Fprintf(f, " | (int64(byte%d) << %d)", b, shift(b)) 234 } 235 if bitEnd == 7 { 236 fmt.Fprintf(f, " | int64(byte%d)", byteEnd) 237 } else { 238 fmt.Fprintf(f, " | int64(uint8(byte%d) >> %d)", byteEnd, 7-bitEnd) 239 } 240 } 241 fmt.Fprintf(f, ")") 242 fmt.Fprintln(f, "") 243 fmt.Fprintln(f, " valuesOffset++") 244 } 245 fmt.Fprintln(f, " }") 246 } 247 } 248 fmt.Fprintln(f, "}") 249 } 250 251 func main() { 252 for bpv := 1; bpv <= 64; bpv++ { 253 if bpv > MAX_SPECIALIZED_BITS_PER_VALUE { 254 fmt.Printf(" newBulkOperationPacked(%d),\n", bpv) 255 continue 256 } 257 f, err := os.Create(fmt.Sprintf("bulkOperation%d.go", bpv)) 258 if err != nil { 259 panic(err) 260 } 261 defer f.Close() 262 263 fmt.Fprintf(f, "%v\n", HEADER) 264 fmt.Fprintf(f, "type BulkOperationPacked%d struct {\n", bpv) 265 fmt.Fprintln(f, " *BulkOperationPacked") 266 fmt.Fprintln(f, "}\n") 267 268 fmt.Fprintf(f, "func newBulkOperationPacked%d() BulkOperation {\n", bpv) 269 fmt.Fprintf(f, " return &BulkOperationPacked%d{newBulkOperationPacked(%d)}\n", bpv, bpv) 270 fmt.Fprintln(f, "}\n") 271 272 packed64(bpv, f) 273 274 fmt.Printf(" newBulkOperationPacked%d(),\n", bpv) 275 } 276 } 277 gocog]]]*/ 278 newBulkOperationPacked1(), 279 newBulkOperationPacked2(), 280 newBulkOperationPacked3(), 281 newBulkOperationPacked4(), 282 newBulkOperationPacked5(), 283 newBulkOperationPacked6(), 284 newBulkOperationPacked7(), 285 newBulkOperationPacked8(), 286 newBulkOperationPacked9(), 287 newBulkOperationPacked10(), 288 newBulkOperationPacked11(), 289 newBulkOperationPacked12(), 290 newBulkOperationPacked13(), 291 newBulkOperationPacked14(), 292 newBulkOperationPacked15(), 293 newBulkOperationPacked16(), 294 newBulkOperationPacked17(), 295 newBulkOperationPacked18(), 296 newBulkOperationPacked19(), 297 newBulkOperationPacked20(), 298 newBulkOperationPacked21(), 299 newBulkOperationPacked22(), 300 newBulkOperationPacked23(), 301 newBulkOperationPacked24(), 302 newBulkOperationPacked(25), 303 newBulkOperationPacked(26), 304 newBulkOperationPacked(27), 305 newBulkOperationPacked(28), 306 newBulkOperationPacked(29), 307 newBulkOperationPacked(30), 308 newBulkOperationPacked(31), 309 newBulkOperationPacked(32), 310 newBulkOperationPacked(33), 311 newBulkOperationPacked(34), 312 newBulkOperationPacked(35), 313 newBulkOperationPacked(36), 314 newBulkOperationPacked(37), 315 newBulkOperationPacked(38), 316 newBulkOperationPacked(39), 317 newBulkOperationPacked(40), 318 newBulkOperationPacked(41), 319 newBulkOperationPacked(42), 320 newBulkOperationPacked(43), 321 newBulkOperationPacked(44), 322 newBulkOperationPacked(45), 323 newBulkOperationPacked(46), 324 newBulkOperationPacked(47), 325 newBulkOperationPacked(48), 326 newBulkOperationPacked(49), 327 newBulkOperationPacked(50), 328 newBulkOperationPacked(51), 329 newBulkOperationPacked(52), 330 newBulkOperationPacked(53), 331 newBulkOperationPacked(54), 332 newBulkOperationPacked(55), 333 newBulkOperationPacked(56), 334 newBulkOperationPacked(57), 335 newBulkOperationPacked(58), 336 newBulkOperationPacked(59), 337 newBulkOperationPacked(60), 338 newBulkOperationPacked(61), 339 newBulkOperationPacked(62), 340 newBulkOperationPacked(63), 341 newBulkOperationPacked(64), 342 // [[[end]]] 343 } 344 345 packedSingleBlockBulkOps = []BulkOperation{ 346 /*[[[gocog 347 package main 348 import "fmt" 349 var PACKED_64_SINGLE_BLOCK_BPV = []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32} 350 func main() { 351 var bpv int = 1 352 for _, v := range PACKED_64_SINGLE_BLOCK_BPV { 353 for ;bpv < v; bpv++ { 354 fmt.Print(" nil,\n") 355 } 356 fmt.Printf(" newBulkOperationPackedSingleBlock(%v),\n", bpv) 357 bpv++ 358 } 359 } 360 gocog]]]*/ 361 newBulkOperationPackedSingleBlock(1), 362 newBulkOperationPackedSingleBlock(2), 363 newBulkOperationPackedSingleBlock(3), 364 newBulkOperationPackedSingleBlock(4), 365 newBulkOperationPackedSingleBlock(5), 366 newBulkOperationPackedSingleBlock(6), 367 newBulkOperationPackedSingleBlock(7), 368 newBulkOperationPackedSingleBlock(8), 369 newBulkOperationPackedSingleBlock(9), 370 newBulkOperationPackedSingleBlock(10), 371 nil, 372 newBulkOperationPackedSingleBlock(12), 373 nil, 374 nil, 375 nil, 376 newBulkOperationPackedSingleBlock(16), 377 nil, 378 nil, 379 nil, 380 nil, 381 newBulkOperationPackedSingleBlock(21), 382 nil, 383 nil, 384 nil, 385 nil, 386 nil, 387 nil, 388 nil, 389 nil, 390 nil, 391 nil, 392 newBulkOperationPackedSingleBlock(32), 393 // [[[end]]] 394 } 395 ) 396 397 func newBulkOperation(format PackedFormat, bitsPerValue uint32) BulkOperation { 398 // log.Printf("Initializing BulkOperation(%v,%v)", format, bitsPerValue) 399 switch int(format) { 400 case PACKED: 401 assert2(packedBulkOps[bitsPerValue-1] != nil, fmt.Sprintf("bpv=%v", bitsPerValue)) 402 return packedBulkOps[bitsPerValue-1] 403 case PACKED_SINGLE_BLOCK: 404 assert2(packedSingleBlockBulkOps[bitsPerValue-1] != nil, fmt.Sprintf("bpv=%v", bitsPerValue)) 405 return packedSingleBlockBulkOps[bitsPerValue-1] 406 } 407 panic(fmt.Sprintf("invalid packed format: %v", format)) 408 } 409 410 type BulkOperationImpl struct { 411 PackedIntsDecoder 412 } 413 414 func newBulkOperationImpl(decoder PackedIntsDecoder) *BulkOperationImpl { 415 return &BulkOperationImpl{decoder} 416 } 417 418 func (op *BulkOperationImpl) writeLong(block int64, blocks []byte) int { 419 blocksOffset := 0 420 for j := 1; j <= 8; j++ { 421 blocks[blocksOffset] = byte(uint64(block) >> uint(64-(j<<3))) 422 blocksOffset++ 423 } 424 return blocksOffset 425 } 426 427 func (op *BulkOperationImpl) computeIterations(valueCount, ramBudget int) int { 428 iterations := ramBudget / (op.ByteBlockCount() + 8*op.ByteValueCount()) 429 if iterations == 0 { 430 // at least 1 431 return 1 432 } else if (iterations-1)*op.ByteValueCount() >= valueCount { 433 // don't allocate for more than the size of the reader 434 panic("not implemented yet") 435 } else { 436 return iterations 437 } 438 }