github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene40/liveDocsFormat.go (about)

     1  package lucene40
     2  
     3  import (
     4  	. "github.com/balzaczyy/golucene/core/codec/spi"
     5  	"github.com/balzaczyy/golucene/core/store"
     6  	"github.com/balzaczyy/golucene/core/util"
     7  )
     8  
     9  // codecs/lucene40/Lucene40LiveDocsFormat.java
    10  
    11  /*
    12  Lucene 4.0 Live Documents Format.
    13  
    14  The .del file is optional, and only exists when a segment contains
    15  deletions.
    16  
    17  Although per-segment, this file is maintained exterior to compound
    18  segment files.
    19  
    20  Deletions (.del) --> Format,Heaer,ByteCount,BitCount, Bits | DGaps
    21    (depending on Format)
    22  	Format,ByteSize,BitCount --> uint32
    23  	Bits --> <byte>^ByteCount
    24  	DGaps --> <DGap,NonOnesByte>^NonzeroBytesCount
    25  	DGap --> vint
    26  	NonOnesByte --> byte
    27  	Header --> CodecHeader
    28  
    29  Format is 1: indicates cleard DGaps.
    30  
    31  ByteCount indicates the number of bytes in Bits. It is typically
    32  (SegSize/8)+1.
    33  
    34  BitCount indicates the number of bits that are currently set in Bits.
    35  
    36  Bits contains one bit for each document indexed. When the bit
    37  corresponding to a document number is cleared, that document is
    38  marked as deleted. Bit ordering is from least to most significant.
    39  Thus, if Bits contains two bytes, 0x00 and 0x02, then document 9 is
    40  marked as alive (not deleted).
    41  
    42  DGaps represents sparse bit-vectors more efficiently than Bits. It is
    43  makde of DGaps on indexes of nonOnes bytes in Bits, and the nonOnes
    44  bytes themselves. The number of nonOnes byte in Bits
    45  (NonOnesBytesCount) is not stored.
    46  
    47  For example, if there are 8000 bits and only bits 10,12,32 are
    48  cleared, DGaps would be used:
    49  
    50  (vint) 1, (byte) 20, (vint) 3, (byte) 1
    51  */
    52  type Lucene40LiveDocsFormat struct {
    53  }
    54  
    55  /* Extension of deletes */
    56  const DELETES_EXTENSION = "del"
    57  
    58  func (format *Lucene40LiveDocsFormat) NewLiveDocs(size int) util.MutableBits {
    59  	ans := NewBitVector(size)
    60  	ans.InvertAll()
    61  	return ans
    62  }
    63  
    64  func (format *Lucene40LiveDocsFormat) WriteLiveDocs(bits util.MutableBits,
    65  	dir store.Directory, info *SegmentCommitInfo, newDelCount int,
    66  	ctx store.IOContext) error {
    67  
    68  	filename := util.FileNameFromGeneration(info.Info.Name, DELETES_EXTENSION, info.NextDelGen())
    69  	liveDocs := bits.(*BitVector)
    70  	assert(liveDocs.Count() == info.Info.DocCount()-info.DelCount()-newDelCount)
    71  	assert(liveDocs.Length() == info.Info.DocCount())
    72  	return liveDocs.Write(dir, filename, ctx)
    73  }
    74  
    75  func (format *Lucene40LiveDocsFormat) Files(info *SegmentCommitInfo) []string {
    76  	if info.HasDeletions() {
    77  		return []string{util.FileNameFromGeneration(info.Info.Name, DELETES_EXTENSION, info.DelGen())}
    78  	}
    79  	return []string{}
    80  }