github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene40/liveDocsFormat.go (about) 1 package lucene40 2 3 import ( 4 . "github.com/balzaczyy/golucene/core/codec/spi" 5 "github.com/balzaczyy/golucene/core/store" 6 "github.com/balzaczyy/golucene/core/util" 7 ) 8 9 // codecs/lucene40/Lucene40LiveDocsFormat.java 10 11 /* 12 Lucene 4.0 Live Documents Format. 13 14 The .del file is optional, and only exists when a segment contains 15 deletions. 16 17 Although per-segment, this file is maintained exterior to compound 18 segment files. 19 20 Deletions (.del) --> Format,Heaer,ByteCount,BitCount, Bits | DGaps 21 (depending on Format) 22 Format,ByteSize,BitCount --> uint32 23 Bits --> <byte>^ByteCount 24 DGaps --> <DGap,NonOnesByte>^NonzeroBytesCount 25 DGap --> vint 26 NonOnesByte --> byte 27 Header --> CodecHeader 28 29 Format is 1: indicates cleard DGaps. 30 31 ByteCount indicates the number of bytes in Bits. It is typically 32 (SegSize/8)+1. 33 34 BitCount indicates the number of bits that are currently set in Bits. 35 36 Bits contains one bit for each document indexed. When the bit 37 corresponding to a document number is cleared, that document is 38 marked as deleted. Bit ordering is from least to most significant. 39 Thus, if Bits contains two bytes, 0x00 and 0x02, then document 9 is 40 marked as alive (not deleted). 41 42 DGaps represents sparse bit-vectors more efficiently than Bits. It is 43 makde of DGaps on indexes of nonOnes bytes in Bits, and the nonOnes 44 bytes themselves. The number of nonOnes byte in Bits 45 (NonOnesBytesCount) is not stored. 46 47 For example, if there are 8000 bits and only bits 10,12,32 are 48 cleared, DGaps would be used: 49 50 (vint) 1, (byte) 20, (vint) 3, (byte) 1 51 */ 52 type Lucene40LiveDocsFormat struct { 53 } 54 55 /* Extension of deletes */ 56 const DELETES_EXTENSION = "del" 57 58 func (format *Lucene40LiveDocsFormat) NewLiveDocs(size int) util.MutableBits { 59 ans := NewBitVector(size) 60 ans.InvertAll() 61 return ans 62 } 63 64 func (format *Lucene40LiveDocsFormat) WriteLiveDocs(bits util.MutableBits, 65 dir store.Directory, info *SegmentCommitInfo, newDelCount int, 66 ctx store.IOContext) error { 67 68 filename := util.FileNameFromGeneration(info.Info.Name, DELETES_EXTENSION, info.NextDelGen()) 69 liveDocs := bits.(*BitVector) 70 assert(liveDocs.Count() == info.Info.DocCount()-info.DelCount()-newDelCount) 71 assert(liveDocs.Length() == info.Info.DocCount()) 72 return liveDocs.Write(dir, filename, ctx) 73 } 74 75 func (format *Lucene40LiveDocsFormat) Files(info *SegmentCommitInfo) []string { 76 if info.HasDeletions() { 77 return []string{util.FileNameFromGeneration(info.Info.Name, DELETES_EXTENSION, info.DelGen())} 78 } 79 return []string{} 80 }