github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/bufferedDeletes.go (about) 1 package index 2 3 import ( 4 // "bytes" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/util" 7 "math" 8 "sync/atomic" 9 ) 10 11 // index/BufferedUpdates.java 12 13 /* Go slice consumes two int for an extra doc ID, assuming 50% pre-allocation. */ 14 const BYTES_PER_DEL_DOCID = 2 * util.NUM_BYTES_INT 15 16 /* Go map (amd64) consumes about 40 bytes for an extra entry. */ 17 const BYTES_PER_DEL_QUERY = 40 + util.NUM_BYTES_OBJECT_REF + util.NUM_BYTES_INT 18 19 const MAX_INT = int(math.MaxInt32) 20 21 const VERBOSE = false 22 23 /* 24 Holds buffered deletes, by docID, term or query for a single segment. 25 This is used to hold buffered pending deletes against the 26 to-be-flushed segment. Once the deletes are pushed (on flush in DW), 27 these deletes are converted to a FronzenDeletes instance. 28 29 NOTE: instances of this class are accessed either via a private 30 instance on DocumentsWriterPerThread, or via sync'd code by 31 DocumentsWriterDeleteQueue 32 */ 33 type BufferedUpdates struct { 34 numTermDeletes int32 // atomic 35 36 terms map[*Term]int 37 queries map[interface{}]int 38 docIDs []int 39 40 numericUpdates map[string]map[*Term]*DocValuesUpdate 41 42 binaryUpdates map[string]map[*Term]*DocValuesUpdate 43 44 bytesUsed int64 // atomic 45 46 gen int64 47 } 48 49 func newBufferedUpdates() *BufferedUpdates { 50 return &BufferedUpdates{ 51 terms: make(map[*Term]int), 52 queries: make(map[interface{}]int), 53 numericUpdates: make(map[string]map[*Term]*DocValuesUpdate), 54 binaryUpdates: make(map[string]map[*Term]*DocValuesUpdate), 55 } 56 } 57 58 func (bd *BufferedUpdates) String() string { 59 panic("not implemented yet") 60 // if VERBOSE { 61 // return fmt.Sprintf( 62 // "BufferedUpdates[gen=%v, numTerms=%v, terms=%v, queries=%v, docIDs=%v, bytesUsed=%v]", 63 // bd.gen, atomic.LoadInt32(&bd.numTermDeletes), bd.terms, bd.queries, bd.docIDs, bd.bytesUsed) 64 // } else { 65 // var buf bytes.Buffer 66 // fmt.Fprintf(&buf, "BufferedUpdates[gen=%v", bd.gen) 67 // if n := atomic.LoadInt32(&bd.numTermDeletes); n != 0 { 68 // fmt.Fprintf(&buf, " %v deleted terms (unique count=%v)", n, len(bd.terms)) 69 // } 70 // if len(bd.queries) > 0 { 71 // fmt.Fprintf(&buf, " %v deleted queries", len(bd.queries)) 72 // } 73 // if len(bd.docIDs) > 0 { 74 // fmt.Fprintf(&buf, " %v deleted docIDs", len(bd.docIDs)) 75 // } 76 // if n := atomic.LoadInt64(&bd.bytesUsed); n != 0 { 77 // fmt.Fprintf(&buf, " bytesUsed=%v", n) 78 // } 79 // buf.WriteRune(']') 80 // return buf.String() 81 // } 82 } 83 84 func (bd *BufferedUpdates) addDocID(docID int) { 85 bd.docIDs = append(bd.docIDs, docID) 86 atomic.AddInt64(&bd.bytesUsed, BYTES_PER_DEL_DOCID) 87 } 88 89 func (bd *BufferedUpdates) clear() { 90 bd.terms = make(map[*Term]int) 91 bd.queries = make(map[interface{}]int) 92 bd.docIDs = nil 93 atomic.StoreInt32(&bd.numTermDeletes, 0) 94 atomic.StoreInt64(&bd.bytesUsed, 0) 95 } 96 97 func (bd *BufferedUpdates) any() bool { 98 return len(bd.terms) > 0 || len(bd.docIDs) > 0 || len(bd.queries) > 0 || 99 len(bd.numericUpdates) > 0 || len(bd.binaryUpdates) > 0 100 } 101 102 // index/FrozenBufferedUpdates.java 103 104 /* 105 Holds buffered deletes and updates by term or query, once pushed. 106 Pushed deletes/updates are write-once, so we shift to more memory 107 efficient data structure to hold them. We don't hold docIDs because 108 these are applied on flush. 109 */ 110 type FrozenBufferedUpdates struct { 111 // Terms, in sorted order: 112 terms *PrefixCodedTerms 113 termCount int // just for debugging 114 115 // Parallel array of deleted query, and the docIDUpto for each 116 _queries []Query 117 queryLimits []int 118 119 // numeric DV update term and their updates 120 numericDVUpdates []*DocValuesUpdate 121 122 // binary DV update term and their updates 123 binaryDVUpdates []*DocValuesUpdate 124 125 bytesUsed int 126 numTermDeletes int 127 gen int64 // -1, assigned by BufferedUpdatesStream once pushed 128 // true iff this frozen packet represents a segment private deletes 129 // in that case it should only have queries 130 isSegmentPrivate bool 131 } 132 133 func freezeBufferedUpdates(deletes *BufferedUpdates, isPrivate bool) *FrozenBufferedUpdates { 134 assert2(!isPrivate || len(deletes.terms) == 0, 135 "segment private package should only have del queries") 136 var termsArray []*Term 137 for k, _ := range deletes.terms { 138 termsArray = append(termsArray, k) 139 } 140 util.TimSort(TermSorter(termsArray)) 141 builder := newPrefixCodedTermsBuilder() 142 for _, term := range termsArray { 143 builder.add(term) 144 } 145 terms := builder.finish() 146 147 queries := make([]Query, len(deletes.queries)) 148 queryLimits := make([]int, len(deletes.queries)) 149 var upto = 0 150 for k, v := range deletes.queries { 151 queries[upto] = k 152 queryLimits[upto] = v 153 upto++ 154 } 155 156 // TODO if a Term affects multiple fields, we could keep the updates key'd by Term 157 // so that it maps to all fields it affects, sorted by their docUpto, and traverse 158 // that Term only once, applying the update to all fields that still need to be 159 // updated. 160 var allNumericUpdates []*DocValuesUpdate 161 numericUpdatesSize := 0 162 for _, numericUpdates := range deletes.numericUpdates { 163 for _, update := range numericUpdates { 164 allNumericUpdates = append(allNumericUpdates, update) 165 numericUpdatesSize += update.sizeInBytes() 166 } 167 } 168 169 // TODO if a Term affects multiple fields, we could keep the updates key'd by Term 170 // so that it maps to all fields it affects, sorted by their docUpto, and traverse 171 // that Term only once, applying the update to all fields that still need to be 172 // updated. 173 var allBinaryUpdates []*DocValuesUpdate 174 binaryUpdatesSize := 0 175 for _, binaryUpdates := range deletes.binaryUpdates { 176 for _, update := range binaryUpdates { 177 allBinaryUpdates = append(allBinaryUpdates, update) 178 binaryUpdatesSize += update.sizeInBytes() 179 } 180 } 181 182 bytesUsed := int(terms.RamBytesUsed() + 183 int64(len(queries))*BYTES_PER_DEL_QUERY + 184 int64(numericUpdatesSize) + util.ShallowSizeOf(allNumericUpdates) + 185 int64(binaryUpdatesSize) + util.ShallowSizeOf(allBinaryUpdates)) 186 187 return &FrozenBufferedUpdates{ 188 gen: -1, 189 isSegmentPrivate: isPrivate, 190 termCount: len(termsArray), 191 terms: terms, 192 _queries: queries, 193 queryLimits: queryLimits, 194 numericDVUpdates: allNumericUpdates, 195 binaryDVUpdates: allBinaryUpdates, 196 bytesUsed: bytesUsed, 197 numTermDeletes: int(atomic.LoadInt32(&deletes.numTermDeletes)), 198 } 199 } 200 201 func assert(ok bool) { 202 if !ok { 203 panic("assert fail") 204 } 205 } 206 207 func assert2(ok bool, msg string, args ...interface{}) { 208 if !ok { 209 panic(fmt.Sprintf(msg, args...)) 210 } 211 } 212 213 func (bd *FrozenBufferedUpdates) queries() []*QueryAndLimit { 214 panic("not implemented yet") 215 } 216 217 func (bd *FrozenBufferedUpdates) String() string { 218 panic("not implemented yet") 219 } 220 221 func (d *FrozenBufferedUpdates) any() bool { 222 return d.termCount > 0 || len(d._queries) > 0 || len(d.numericDVUpdates) > 0 || len(d.binaryDVUpdates) > 0 223 }