github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/packed/bulkOperation.go (about)

     1  package packed
     2  
     3  import (
     4  	"fmt"
     5  )
     6  
     7  // util/packed/BulkOperation.java
     8  
     9  // Efficient sequential read/write of packed integers.
    10  type BulkOperation interface {
    11  	LongBlockCount() int
    12  	LongValueCount() int
    13  	ByteBlockCount() int
    14  	ByteValueCount() int
    15  
    16  	// PackedIntsEncoder
    17  	encodeLongToByte(values []int64, blocks []byte, iterations int)
    18  	encodeLongToLong(values, blocks []int64, iterations int)
    19  	EncodeIntToByte(values []int, blocks []byte, iterations int)
    20  
    21  	// PackedIntsDecoder
    22  	decodeLongToLong(blocks, values []int64, iterations int)
    23  	decodeByteToLong(blocks []byte, values []int64, iterations int)
    24  	/*
    25  		For every number of bits per value, there is a minumum number of
    26  		blocks (b) / values (v) you need to write an order to reach the next block
    27  		boundary:
    28  		- 16 bits per value -> b=2, v=1
    29  		- 24 bits per value -> b=3, v=1
    30  		- 50 bits per value -> b=25, v=4
    31  		- 63 bits per value -> b=63, v=8
    32  		- ...
    33  
    34  		A bulk read consists in copying iterations*v vlaues that are contained in
    35  		iterations*b blocks into a []int64 (higher values of iterations are likely to
    36  		yield a better throughput) => this requires n * (b + 8v) bytes of memory.
    37  
    38  		This method computes iterations as ramBudget / (b + 8v) (since an int64 is
    39  		8 bytes).
    40  	*/
    41  	computeIterations(valueCount, ramBudget int) int
    42  }
    43  
    44  var (
    45  	packedBulkOps = []BulkOperation{
    46  		/*[[[gocog
    47  		package main
    48  
    49  		import (
    50  			"fmt"
    51  			"io"
    52  			"os"
    53  		)
    54  
    55  		const (
    56  			MAX_SPECIALIZED_BITS_PER_VALUE = 24
    57  			HEADER                         = `// This file has been automatically generated, DO NOT EDIT
    58  
    59  		package packed
    60  
    61  		// Efficient sequential read/write of packed integers.`
    62  		)
    63  
    64  		func isPowerOfTwo(n int) bool {
    65  			return n&(n-1) == 0
    66  		}
    67  
    68  		func casts(typ string) (castStart, castEnd string) {
    69  			if typ == "int64" {
    70  				return "", ""
    71  			}
    72  			return fmt.Sprintf("%s(", typ), ")"
    73  		}
    74  
    75  		func masks(bits int) (start, end string) {
    76  			if bits == 64 {
    77  				return "", ""
    78  			}
    79  			return "(", fmt.Sprintf(" & %x)", (1<<uint(bits))-1)
    80  		}
    81  
    82  		var (
    83  			TYPES = map[int]string{8: "byte", 16: "int16", 32: "int32", 64: "int64"}
    84  			NAMES = map[int]string{8: "Byte", 16: "Short", 32: "Int", 64: "Long"}
    85  		)
    86  
    87  		func blockValueCount(bpv, bits int) (blocks, values int) {
    88  			blocks = bpv
    89  			values = blocks * bits / bpv
    90  			for blocks%2 == 0 && values%2 == 0 {
    91  				blocks /= 2
    92  				values /= 2
    93  			}
    94  			assert2(values*bpv == bits*blocks, fmt.Sprintf("%d values, %d blocks, %d bits per value", values, blocks, bpv))
    95  			return blocks, values
    96  		}
    97  
    98  		func assert2(ok bool, msg string) {
    99  			if !ok {
   100  				panic(msg)
   101  			}
   102  		}
   103  
   104  		func packed64(bpv int, f io.Writer) {
   105  			if bpv == 64 {
   106  				panic("not implemented yet")
   107  			} else {
   108  				p64Decode(bpv, f, 32)
   109  				p64Decode(bpv, f, 64)
   110  			}
   111  		}
   112  
   113  		func p64Decode(bpv int, f io.Writer, bits int) {
   114  			_, values := blockValueCount(bpv, 64)
   115  			typ := TYPES[bits]
   116  			castStart, castEnd := casts(typ)
   117  			var mask uint
   118  
   119  			fmt.Fprintf(f, "func (op *BulkOperationPacked%d) decodeLongTo%s(blocks []int64, values []%s, iterations int) {\n", bpv, NAMES[bits], typ)
   120  			if bits < bpv {
   121  				fmt.Fprintln(f, "	panic(\"not supported yet\")")
   122  			} else {
   123  				fmt.Fprintln(f, "	blocksOffset, valuesOffset := 0, 0")
   124  				fmt.Fprintf(f, "	for i := 0; i < iterations; i ++ {\n")
   125  				mask = 1<<uint(bpv) - 1
   126  
   127  				if isPowerOfTwo(bpv) {
   128  					fmt.Fprintln(f, "		block := blocks[blocksOffset]; blocksOffset++")
   129  					fmt.Fprintf(f, "		for shift := uint(%d); shift >= 0; shift -= %d {\n", 64-bpv, bpv)
   130  					fmt.Fprintf(f, "			values[valuesOffset] = %s(int64(uint64(block) >> shift)) & %d%s; valuesOffset++\n", castStart, mask, castEnd)
   131  					fmt.Fprintln(f, "		}")
   132  				} else {
   133  					for i := 0; i < values; i++ {
   134  						blockOffset := i * bpv / 64
   135  						bitOffset := (i * bpv) % 64
   136  						if bitOffset == 0 {
   137  							// start of block
   138  							fmt.Fprintf(f, "		block%d := blocks[blocksOffset]; blocksOffset++\n", blockOffset)
   139  							fmt.Fprintf(f, "		values[valuesOffset] = %sint64(uint64(block%d) >> %d%s); valuesOffset++\n", castStart, blockOffset, 64-bpv, castEnd)
   140  						} else if bitOffset+bpv == 64 {
   141  							// end of block
   142  							fmt.Fprintf(f, "		values[valuesOffset] = %sblock%d & %d%s; valuesOffset++\n", castStart, blockOffset, mask, castEnd)
   143  						} else if bitOffset+bpv < 64 {
   144  							// middle of block
   145  							fmt.Fprintf(f, "		values[valuesOffset] = %sint64(uint64(block%d) >> %d) & %d%s; valuesOffset++\n", castStart, blockOffset, 64-bitOffset-bpv, mask, castEnd)
   146  						} else {
   147  							// value spans across 2 blocks
   148  							mask1 := int(1<<uint(64-bitOffset)) - 1
   149  							shift1 := bitOffset + bpv - 64
   150  							shift2 := 64 - shift1
   151  							fmt.Fprintf(f, "		block%d := blocks[blocksOffset]; blocksOffset++\n", blockOffset+1)
   152  							fmt.Fprintf(f, "		values[valuesOffset] = %s((block%d & %d) << %d) | (int64(uint64(block%d) >> %d))%s; valuesOffset++\n",
   153  								castStart, blockOffset, mask1, shift1, blockOffset+1, shift2, castEnd)
   154  						}
   155  					}
   156  				}
   157  				fmt.Fprintln(f, "	}")
   158  			}
   159  			fmt.Fprintln(f, "}\n")
   160  
   161  			_, byteValues := blockValueCount(bpv, 8)
   162  
   163  			fmt.Fprintf(f, "func (op *BulkOperationPacked%d) decodeByteTo%s(blocks []byte, values []%s, iterations int) {\n", bpv, NAMES[bits], typ)
   164  			if bits < bpv {
   165  				fmt.Fprintln(f, "	panic(\"not supported yet\")")
   166  			} else {
   167  				fmt.Fprintln(f, "	blocksOffset, valuesOffset := 0, 0")
   168  				if isPowerOfTwo(bpv) && bpv < 8 {
   169  					fmt.Fprintf(f, "	for j := 0; j < iterations; j ++ {\n")
   170  					fmt.Fprintf(f, "		block := blocks[blocksOffset]\n")
   171  					fmt.Fprintln(f, "		blocksOffset++")
   172  					for shift := 8 - bpv; shift > 0; shift -= bpv {
   173  						fmt.Fprintf(f, "		values[valuesOffset] = %s(byte(uint8(block)) >> %d) & %d\n", typ, shift, mask)
   174  						fmt.Fprintln(f, "		valuesOffset++")
   175  					}
   176  					fmt.Fprintf(f, "		values[valuesOffset] = %s(block & %d)\n", typ, mask)
   177  					fmt.Fprintln(f, "		valuesOffset++")
   178  					fmt.Fprintln(f, "	}")
   179  				} else if bpv == 8 {
   180  					fmt.Fprintln(f, "	for j := 0; j < iterations; j ++ {")
   181  					fmt.Fprintf(f, "		values[valuesOffset] = %s(blocks[blocksOffset]); valuesOffset++; blocksOffset++\n", typ)
   182  					fmt.Fprintln(f, "	}")
   183  				} else if isPowerOfTwo(bpv) && bpv > 8 {
   184  					fmt.Fprintf(f, "	for j := 0; j < iterations; j ++ {\n")
   185  					m := "int32"
   186  					if bits > 32 {
   187  						m = "int64"
   188  					}
   189  					fmt.Fprintf(f, "		values[valuesOffset] =")
   190  					for i, until := 0, bpv/8-1; i < until; i++ {
   191  						fmt.Fprintf(f, " (%s(blocks[blocksOffset+%d]) << %d) |", m, i, bpv-8)
   192  					}
   193  					fmt.Fprintf(f, " %s(blocks[blocksOffset+%d])\n", m, bpv/8-1)
   194  					fmt.Fprintln(f, "		valuesOffset++")
   195  					fmt.Fprintf(f, "		blocksOffset += %d\n", bpv/8)
   196  					fmt.Fprintln(f, "	}")
   197  				} else {
   198  					fmt.Fprintf(f, "	for i := 0; i < iterations; i ++ {\n")
   199  					for i := 0; i < byteValues; i++ {
   200  						byteStart, byteEnd := i*bpv/8, ((i+1)*bpv-1)/8
   201  						bitStart, bitEnd := (i*bpv)%8, ((i+1)*bpv-1)%8
   202  						shift := func(b int) int { return 8*(byteEnd-b-1) + 1 + bitEnd }
   203  						if bitStart == 0 {
   204  							fmt.Fprintf(f, "		byte%d := blocks[blocksOffset]\n", byteStart)
   205  							fmt.Fprintln(f, "		blocksOffset++")
   206  						}
   207  						for b, until := byteStart+1, byteEnd+1; b < until; b++ {
   208  							fmt.Fprintf(f, "		byte%d := blocks[blocksOffset]\n", b)
   209  							fmt.Fprintln(f, "		blocksOffset++")
   210  						}
   211  						fmt.Fprintf(f, "		values[valuesOffset] = %s(", typ)
   212  						if byteStart == byteEnd {
   213  							if bitStart == 0 {
   214  								if bitEnd == 7 {
   215  									fmt.Fprintf(f, " int64(byte%d)", byteStart)
   216  								} else {
   217  									fmt.Fprintf(f, " int64(uint8(byte%d) >> %d)", byteStart, 7-bitEnd)
   218  								}
   219  							} else {
   220  								if bitEnd == 7 {
   221  									fmt.Fprintf(f, " int64(byte%d) & %d", byteStart, 1<<uint(8-bitStart)-1)
   222  								} else {
   223  									fmt.Fprintf(f, " int64(uint8(byte%d) >> %d) & %d", byteStart, 7-bitEnd, 1<<uint(bitEnd-bitStart+1)-1)
   224  								}
   225  							}
   226  						} else {
   227  							if bitStart == 0 {
   228  								fmt.Fprintf(f, "(int64(byte%d) << %d)", byteStart, shift(byteStart))
   229  							} else {
   230  								fmt.Fprintf(f, "(int64(byte%d & %d) << %d)", byteStart, 1<<uint(8-bitStart)-1, shift(byteStart))
   231  							}
   232  							for b, until := byteStart+1, byteEnd; b < until; b++ {
   233  								fmt.Fprintf(f, " | (int64(byte%d) << %d)", b, shift(b))
   234  							}
   235  							if bitEnd == 7 {
   236  								fmt.Fprintf(f, " | int64(byte%d)", byteEnd)
   237  							} else {
   238  								fmt.Fprintf(f, " | int64(uint8(byte%d) >> %d)", byteEnd, 7-bitEnd)
   239  							}
   240  						}
   241  						fmt.Fprintf(f, ")")
   242  						fmt.Fprintln(f, "")
   243  						fmt.Fprintln(f, "		valuesOffset++")
   244  					}
   245  					fmt.Fprintln(f, "	}")
   246  				}
   247  			}
   248  			fmt.Fprintln(f, "}")
   249  		}
   250  
   251  		func main() {
   252  			for bpv := 1; bpv <= 64; bpv++ {
   253  				if bpv > MAX_SPECIALIZED_BITS_PER_VALUE {
   254  					fmt.Printf("		newBulkOperationPacked(%d),\n", bpv)
   255  					continue
   256  				}
   257  				f, err := os.Create(fmt.Sprintf("bulkOperation%d.go", bpv))
   258  				if err != nil {
   259  					panic(err)
   260  				}
   261  				defer f.Close()
   262  
   263  				fmt.Fprintf(f, "%v\n", HEADER)
   264  				fmt.Fprintf(f, "type BulkOperationPacked%d struct {\n", bpv)
   265  				fmt.Fprintln(f, "	*BulkOperationPacked")
   266  				fmt.Fprintln(f, "}\n")
   267  
   268  				fmt.Fprintf(f, "func newBulkOperationPacked%d() BulkOperation {\n", bpv)
   269  				fmt.Fprintf(f, "	return &BulkOperationPacked%d{newBulkOperationPacked(%d)}\n", bpv, bpv)
   270  				fmt.Fprintln(f, "}\n")
   271  
   272  				packed64(bpv, f)
   273  
   274  				fmt.Printf("		newBulkOperationPacked%d(),\n", bpv)
   275  			}
   276  		}
   277  				gocog]]]*/
   278  		newBulkOperationPacked1(),
   279  		newBulkOperationPacked2(),
   280  		newBulkOperationPacked3(),
   281  		newBulkOperationPacked4(),
   282  		newBulkOperationPacked5(),
   283  		newBulkOperationPacked6(),
   284  		newBulkOperationPacked7(),
   285  		newBulkOperationPacked8(),
   286  		newBulkOperationPacked9(),
   287  		newBulkOperationPacked10(),
   288  		newBulkOperationPacked11(),
   289  		newBulkOperationPacked12(),
   290  		newBulkOperationPacked13(),
   291  		newBulkOperationPacked14(),
   292  		newBulkOperationPacked15(),
   293  		newBulkOperationPacked16(),
   294  		newBulkOperationPacked17(),
   295  		newBulkOperationPacked18(),
   296  		newBulkOperationPacked19(),
   297  		newBulkOperationPacked20(),
   298  		newBulkOperationPacked21(),
   299  		newBulkOperationPacked22(),
   300  		newBulkOperationPacked23(),
   301  		newBulkOperationPacked24(),
   302  		newBulkOperationPacked(25),
   303  		newBulkOperationPacked(26),
   304  		newBulkOperationPacked(27),
   305  		newBulkOperationPacked(28),
   306  		newBulkOperationPacked(29),
   307  		newBulkOperationPacked(30),
   308  		newBulkOperationPacked(31),
   309  		newBulkOperationPacked(32),
   310  		newBulkOperationPacked(33),
   311  		newBulkOperationPacked(34),
   312  		newBulkOperationPacked(35),
   313  		newBulkOperationPacked(36),
   314  		newBulkOperationPacked(37),
   315  		newBulkOperationPacked(38),
   316  		newBulkOperationPacked(39),
   317  		newBulkOperationPacked(40),
   318  		newBulkOperationPacked(41),
   319  		newBulkOperationPacked(42),
   320  		newBulkOperationPacked(43),
   321  		newBulkOperationPacked(44),
   322  		newBulkOperationPacked(45),
   323  		newBulkOperationPacked(46),
   324  		newBulkOperationPacked(47),
   325  		newBulkOperationPacked(48),
   326  		newBulkOperationPacked(49),
   327  		newBulkOperationPacked(50),
   328  		newBulkOperationPacked(51),
   329  		newBulkOperationPacked(52),
   330  		newBulkOperationPacked(53),
   331  		newBulkOperationPacked(54),
   332  		newBulkOperationPacked(55),
   333  		newBulkOperationPacked(56),
   334  		newBulkOperationPacked(57),
   335  		newBulkOperationPacked(58),
   336  		newBulkOperationPacked(59),
   337  		newBulkOperationPacked(60),
   338  		newBulkOperationPacked(61),
   339  		newBulkOperationPacked(62),
   340  		newBulkOperationPacked(63),
   341  		newBulkOperationPacked(64),
   342  		// [[[end]]]
   343  	}
   344  
   345  	packedSingleBlockBulkOps = []BulkOperation{
   346  		/*[[[gocog
   347  			package main
   348  			import "fmt"
   349  		  var PACKED_64_SINGLE_BLOCK_BPV = []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32}
   350  			func main() {
   351  				var bpv int = 1
   352  				for _, v := range PACKED_64_SINGLE_BLOCK_BPV {
   353  					for ;bpv < v; bpv++ {
   354  						fmt.Print("		nil,\n")
   355  					}
   356  					fmt.Printf("		newBulkOperationPackedSingleBlock(%v),\n", bpv)
   357  					bpv++
   358  				}
   359  			}
   360  			gocog]]]*/
   361  		newBulkOperationPackedSingleBlock(1),
   362  		newBulkOperationPackedSingleBlock(2),
   363  		newBulkOperationPackedSingleBlock(3),
   364  		newBulkOperationPackedSingleBlock(4),
   365  		newBulkOperationPackedSingleBlock(5),
   366  		newBulkOperationPackedSingleBlock(6),
   367  		newBulkOperationPackedSingleBlock(7),
   368  		newBulkOperationPackedSingleBlock(8),
   369  		newBulkOperationPackedSingleBlock(9),
   370  		newBulkOperationPackedSingleBlock(10),
   371  		nil,
   372  		newBulkOperationPackedSingleBlock(12),
   373  		nil,
   374  		nil,
   375  		nil,
   376  		newBulkOperationPackedSingleBlock(16),
   377  		nil,
   378  		nil,
   379  		nil,
   380  		nil,
   381  		newBulkOperationPackedSingleBlock(21),
   382  		nil,
   383  		nil,
   384  		nil,
   385  		nil,
   386  		nil,
   387  		nil,
   388  		nil,
   389  		nil,
   390  		nil,
   391  		nil,
   392  		newBulkOperationPackedSingleBlock(32),
   393  		// [[[end]]]
   394  	}
   395  )
   396  
   397  func newBulkOperation(format PackedFormat, bitsPerValue uint32) BulkOperation {
   398  	// log.Printf("Initializing BulkOperation(%v,%v)", format, bitsPerValue)
   399  	switch int(format) {
   400  	case PACKED:
   401  		assert2(packedBulkOps[bitsPerValue-1] != nil, fmt.Sprintf("bpv=%v", bitsPerValue))
   402  		return packedBulkOps[bitsPerValue-1]
   403  	case PACKED_SINGLE_BLOCK:
   404  		assert2(packedSingleBlockBulkOps[bitsPerValue-1] != nil, fmt.Sprintf("bpv=%v", bitsPerValue))
   405  		return packedSingleBlockBulkOps[bitsPerValue-1]
   406  	}
   407  	panic(fmt.Sprintf("invalid packed format: %v", format))
   408  }
   409  
   410  type BulkOperationImpl struct {
   411  	PackedIntsDecoder
   412  }
   413  
   414  func newBulkOperationImpl(decoder PackedIntsDecoder) *BulkOperationImpl {
   415  	return &BulkOperationImpl{decoder}
   416  }
   417  
   418  func (op *BulkOperationImpl) writeLong(block int64, blocks []byte) int {
   419  	blocksOffset := 0
   420  	for j := 1; j <= 8; j++ {
   421  		blocks[blocksOffset] = byte(uint64(block) >> uint(64-(j<<3)))
   422  		blocksOffset++
   423  	}
   424  	return blocksOffset
   425  }
   426  
   427  func (op *BulkOperationImpl) computeIterations(valueCount, ramBudget int) int {
   428  	iterations := ramBudget / (op.ByteBlockCount() + 8*op.ByteValueCount())
   429  	if iterations == 0 {
   430  		// at least 1
   431  		return 1
   432  	} else if (iterations-1)*op.ByteValueCount() >= valueCount {
   433  		// don't allocate for more than the size of the reader
   434  		panic("not implemented yet")
   435  	} else {
   436  		return iterations
   437  	}
   438  }