github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/compressing/storedFieldsIndexReader.go (about)

     1  package compressing
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/balzaczyy/golucene/core/index/model"
     7  	"github.com/balzaczyy/golucene/core/store"
     8  	"github.com/balzaczyy/golucene/core/util"
     9  	"github.com/balzaczyy/golucene/core/util/packed"
    10  )
    11  
    12  // codec/compressing/CompressingStoredFieldsIndexReader.java
    13  
    14  // Random-access reader for CompressingStoredFieldsIndexWriter
    15  type CompressingStoredFieldsIndexReader struct {
    16  	maxDoc              int
    17  	docBases            []int
    18  	startPointers       []int64
    19  	avgChunkDocs        []int
    20  	avgChunkSizes       []int64
    21  	docBasesDeltas      []packed.PackedIntsReader
    22  	startPointersDeltas []packed.PackedIntsReader
    23  }
    24  
    25  func newCompressingStoredFieldsIndexReader(fieldsIndexIn store.IndexInput,
    26  	si *model.SegmentInfo) (r *CompressingStoredFieldsIndexReader, err error) {
    27  
    28  	r = &CompressingStoredFieldsIndexReader{}
    29  	r.maxDoc = si.DocCount()
    30  	r.docBases = make([]int, 0, 16)
    31  	r.startPointers = make([]int64, 0, 16)
    32  	r.avgChunkDocs = make([]int, 0, 16)
    33  	r.avgChunkSizes = make([]int64, 0, 16)
    34  	r.docBasesDeltas = make([]packed.PackedIntsReader, 0, 16)
    35  	r.startPointersDeltas = make([]packed.PackedIntsReader, 0, 16)
    36  
    37  	packedIntsVersion, err := fieldsIndexIn.ReadVInt()
    38  	if err != nil {
    39  		return nil, err
    40  	}
    41  
    42  	for blockCount := 0; ; blockCount++ {
    43  		numChunks, err := fieldsIndexIn.ReadVInt()
    44  		if err != nil {
    45  			return nil, err
    46  		}
    47  		if numChunks == 0 {
    48  			break
    49  		}
    50  
    51  		{ // doc bases
    52  			n, err := fieldsIndexIn.ReadVInt()
    53  			if err != nil {
    54  				return nil, err
    55  			}
    56  			r.docBases = append(r.docBases, int(n))
    57  			n, err = fieldsIndexIn.ReadVInt()
    58  			if err != nil {
    59  				return nil, err
    60  			}
    61  			r.avgChunkDocs = append(r.avgChunkDocs, int(n))
    62  			bitsPerDocBase, err := fieldsIndexIn.ReadVInt()
    63  			if err != nil {
    64  				return nil, err
    65  			}
    66  			if bitsPerDocBase > 32 {
    67  				return nil, errors.New(fmt.Sprintf("Corrupted bitsPerDocBase (resource=%v)", fieldsIndexIn))
    68  			}
    69  			pr, err := packed.ReaderNoHeader(fieldsIndexIn, packed.PACKED, packedIntsVersion, numChunks, uint32(bitsPerDocBase))
    70  			if err != nil {
    71  				return nil, err
    72  			}
    73  			r.docBasesDeltas = append(r.docBasesDeltas, pr)
    74  		}
    75  
    76  		{ // start pointers
    77  			n, err := fieldsIndexIn.ReadVLong()
    78  			if err != nil {
    79  				return nil, err
    80  			}
    81  			r.startPointers = append(r.startPointers, n)
    82  			n, err = fieldsIndexIn.ReadVLong()
    83  			if err != nil {
    84  				return nil, err
    85  			}
    86  			r.avgChunkSizes = append(r.avgChunkSizes, n)
    87  			bitsPerStartPointer, err := fieldsIndexIn.ReadVInt()
    88  			if err != nil {
    89  				return nil, err
    90  			}
    91  			if bitsPerStartPointer > 64 {
    92  				return nil, errors.New(fmt.Sprintf("Corrupted bitsPerStartPonter (resource=%v)", fieldsIndexIn))
    93  			}
    94  			pr, err := packed.ReaderNoHeader(fieldsIndexIn, packed.PACKED, packedIntsVersion, numChunks, uint32(bitsPerStartPointer))
    95  			if err != nil {
    96  				return nil, err
    97  			}
    98  			r.startPointersDeltas = append(r.startPointersDeltas, pr)
    99  		}
   100  	}
   101  
   102  	return r, nil
   103  }
   104  
   105  func (r *CompressingStoredFieldsIndexReader) block(docID int) int {
   106  	lo, hi := 0, len(r.docBases)-1
   107  	for lo <= hi {
   108  		mid := int(uint(lo+hi) >> 1)
   109  		midValue := r.docBases[mid]
   110  		if midValue == docID {
   111  			return mid
   112  		} else if midValue < docID {
   113  			lo = mid + 1
   114  		} else {
   115  			hi = mid - 1
   116  		}
   117  	}
   118  	return hi
   119  }
   120  
   121  func (r *CompressingStoredFieldsIndexReader) relativeDocBase(block, relativeChunk int) int {
   122  	expected := r.avgChunkDocs[block] * relativeChunk
   123  	delta := util.ZigZagDecodeLong(r.docBasesDeltas[block].Get(relativeChunk))
   124  	return expected + int(delta)
   125  }
   126  
   127  func (r *CompressingStoredFieldsIndexReader) relativeStartPointer(block, relativeChunk int) int64 {
   128  	expected := r.avgChunkSizes[block] * int64(relativeChunk)
   129  	delta := util.ZigZagDecodeLong(r.startPointersDeltas[block].Get(relativeChunk))
   130  	return expected + delta
   131  }
   132  
   133  func (r *CompressingStoredFieldsIndexReader) relativeChunk(block, relativeDoc int) int {
   134  	lo, hi := 0, int(r.docBasesDeltas[block].Size())-1
   135  	for lo <= hi {
   136  		mid := int(uint(lo+hi) >> 1)
   137  		midValue := r.relativeDocBase(block, mid)
   138  		if midValue == relativeDoc {
   139  			return mid
   140  		} else if midValue < relativeDoc {
   141  			lo = mid + 1
   142  		} else {
   143  			hi = mid - 1
   144  		}
   145  	}
   146  	return hi
   147  }
   148  
   149  func (r *CompressingStoredFieldsIndexReader) startPointer(docID int) int64 {
   150  	if docID < 0 || docID >= r.maxDoc {
   151  		panic(fmt.Sprintf("docID out of range [0-%v]: %v", r.maxDoc, docID))
   152  	}
   153  	block := r.block(docID)
   154  	relativeChunk := r.relativeChunk(block, docID-r.docBases[block])
   155  	return r.startPointers[block] + r.relativeStartPointer(block, relativeChunk)
   156  }
   157  
   158  func (r *CompressingStoredFieldsIndexReader) Clone() *CompressingStoredFieldsIndexReader {
   159  	return r
   160  }