github.com/fraugster/parquet-go@v0.12.0/bitpack_gen.go (about)

     1  //go:build ignore
     2  // +build ignore
     3  
     4  package main
     5  
     6  // This file is based on the code from https://github.com/kostya-sh/parquet-go
     7  // Copyright (c) 2015 Konstantin Shaposhnikov
     8  
     9  import (
    10  	"bytes"
    11  	"fmt"
    12  	"go/format"
    13  	"io"
    14  	"io/ioutil"
    15  	"log"
    16  	"strings"
    17  )
    18  
    19  func genExpr(maxWidth int, bw int, i int, startBit int) (expr string, newStartBit int) {
    20  	byteShift := 0
    21  	firstCurByteBit := startBit - startBit%8
    22  	for bw != 0 {
    23  		curByte := startBit / 8
    24  		bitsInCurByte := bw
    25  		if bitsLeft := startBit - firstCurByteBit + 1; bitsInCurByte > bitsLeft {
    26  			bitsInCurByte = bitsLeft
    27  		}
    28  		shiftSize := 7 - startBit%8
    29  		mask := 1<<uint(bitsInCurByte) - 1
    30  
    31  		if len(expr) != 0 {
    32  			expr += " | "
    33  		}
    34  		expr += fmt.Sprintf("uint%d((data[%d] >> %d) & %d) << %d",
    35  			maxWidth, curByte, shiftSize, mask, byteShift)
    36  
    37  		bw -= bitsInCurByte
    38  		startBit -= bitsInCurByte
    39  		if startBit < firstCurByteBit {
    40  			startBit = firstCurByteBit + 15
    41  			firstCurByteBit += 8
    42  		}
    43  		byteShift += bitsInCurByte
    44  	}
    45  	return expr, startBit
    46  }
    47  
    48  func genUnpackFunc(out io.Writer, maxWidth int, bw int) {
    49  	fmt.Fprintf(out, "func unpack8int%d_%d(data []byte) (a [8]int%d) {\n", maxWidth, bw, maxWidth)
    50  	fmt.Fprintf(out, "\t_ = data[%d]\n", bw-1)
    51  	startBit := 7
    52  	var expr string
    53  	for i := 0; i < 8; i++ {
    54  		expr, startBit = genExpr(maxWidth, bw, i, startBit)
    55  		fmt.Fprintf(out, "\ta[%d] = int%d(%s)\n", i, maxWidth, expr)
    56  	}
    57  	fmt.Fprintf(out, "\treturn\n")
    58  	fmt.Fprintf(out, "}\n\n")
    59  }
    60  
    61  func getBits(idx int, bitSize, size, left, pos int, rev bool) string {
    62  	op := "<<"
    63  	if rev {
    64  		op = ">>"
    65  	}
    66  	return fmt.Sprintf("uint%d(data[%d])%s%d", bitSize, idx, op, size-left+pos)
    67  }
    68  
    69  func genPackFunc(w io.Writer, bitSize, size int) {
    70  	var (
    71  		left = size
    72  		indx int
    73  		rev  bool
    74  	)
    75  
    76  	fmt.Fprintf(w, "func pack8int%[1]d_%[2]d(data [8]int%[1]d) []byte {", bitSize, size)
    77  	fmt.Fprintln(w, "\n\treturn []byte{")
    78  	for i := 0; i < size; i++ {
    79  		var fields []string
    80  		for right := 0; right < 8; {
    81  			if left == 0 {
    82  				indx++
    83  				left = size
    84  				rev = false
    85  			}
    86  			fields = append(fields, getBits(indx, bitSize, size, left, right, rev))
    87  			if left >= 8-right {
    88  				left -= (8 - right)
    89  				right = 8
    90  				rev = true
    91  			} else {
    92  				right += left
    93  				left = 0
    94  			}
    95  		}
    96  
    97  		fmt.Fprintf(w, "\t\tbyte(%s),\n", strings.Join(fields, " | "))
    98  	}
    99  	fmt.Fprintln(w, "\t}\n}\n")
   100  }
   101  
   102  func funcSlice(bitSize int) string {
   103  	buf := &bytes.Buffer{}
   104  	fmt.Fprintf(buf, `var unpack8Int%[1]dFuncByWidth = [%[2]d]unpack8int%[1]dFunc{`, bitSize, bitSize+1)
   105  	for i := 0; i <= bitSize; i++ {
   106  		fmt.Fprintf(buf, "\n\tunpack8int%d_%d,", bitSize, i)
   107  	}
   108  	fmt.Fprintf(buf, "\n}\n")
   109  
   110  	fmt.Fprintf(buf, `var pack8Int%[1]dFuncByWidth = [%[2]d]pack8int%[1]dFunc{`, bitSize, bitSize+1)
   111  	for i := 0; i <= bitSize; i++ {
   112  		fmt.Fprintf(buf, "\n\tpack8int%d_%d,", bitSize, i)
   113  	}
   114  	fmt.Fprintf(buf, "\n}\n")
   115  
   116  	return buf.String()
   117  }
   118  
   119  func zeroFuncs(w io.Writer, bitSize int) {
   120  	fmt.Fprintf(w, `
   121  type (
   122  	unpack8int%[1]dFunc func([]byte) [8]int%[1]d
   123  	pack8int%[1]dFunc func([8]int%[1]d) []byte
   124  )
   125  
   126  %[2]s
   127  
   128  func unpack8int%[1]d_0(_ []byte) (a [8]int%[1]d) {
   129  	return a
   130  }
   131  
   132  func pack8int%[1]d_0(_ [8]int%[1]d) []byte {
   133  	return []byte{}
   134  }
   135  
   136  `, bitSize, funcSlice(bitSize))
   137  }
   138  
   139  func genPackage(fn string, maxWidth int) {
   140  	buf := new(bytes.Buffer)
   141  
   142  	fmt.Fprint(buf, "// Code generated by \"bitpacking_gen.go\"; DO NOT EDIT.\n\n")
   143  	fmt.Fprintf(buf, "package goparquet\n\n")
   144  
   145  	zeroFuncs(buf, maxWidth)
   146  	for i := 1; i <= maxWidth; i++ {
   147  		genUnpackFunc(buf, maxWidth, i)
   148  		genPackFunc(buf, maxWidth, i)
   149  	}
   150  
   151  	src, err := format.Source(buf.Bytes())
   152  	if err != nil {
   153  		log.Fatal(err)
   154  	}
   155  
   156  	err = ioutil.WriteFile(fn, src, 0644)
   157  	if err != nil {
   158  		log.Fatal(err)
   159  	}
   160  }
   161  
   162  func main() {
   163  	genPackage("bitbacking32.go", 32)
   164  	genPackage("bitpacking64.go", 64)
   165  }