github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/deleteStream.go (about) 1 package index 2 3 import ( 4 "errors" 5 "fmt" 6 . "github.com/balzaczyy/golucene/core/codec/spi" 7 "github.com/balzaczyy/golucene/core/store" 8 "github.com/balzaczyy/golucene/core/util" 9 "log" 10 "math" 11 "sort" 12 "sync" 13 "sync/atomic" 14 "time" 15 ) 16 17 // index/BufferedUpdatesStream.java 18 19 type ApplyDeletesResult struct { 20 // True if any actual deletes took place: 21 anyDeletes bool 22 23 // Curreng gen, for the merged segment: 24 gen int64 25 26 // If non-nil, contains segments that are 100% deleted 27 allDeleted []*SegmentCommitInfo 28 } 29 30 type SegInfoByDelGen []*SegmentCommitInfo 31 32 func (a SegInfoByDelGen) Len() int { return len(a) } 33 func (a SegInfoByDelGen) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 34 func (a SegInfoByDelGen) Less(i, j int) bool { return a[i].BufferedUpdatesGen < a[j].BufferedUpdatesGen } 35 36 type Query interface{} 37 38 type QueryAndLimit struct { 39 } 40 41 // index/CoalescedUpdates.java 42 43 type CoalescedUpdates struct { 44 _queries map[Query]int 45 numericDVUpdates []*DocValuesUpdate 46 binaryDVUpdates []*DocValuesUpdate 47 } 48 49 func newCoalescedUpdates() *CoalescedUpdates { 50 return &CoalescedUpdates{ 51 _queries: make(map[Query]int), 52 } 53 } 54 55 func (cd *CoalescedUpdates) String() string { 56 panic("not implemented yet") 57 } 58 59 func (cd *CoalescedUpdates) update(in *FrozenBufferedUpdates) { 60 panic("not implemented yet") 61 } 62 63 func (cd *CoalescedUpdates) terms() []*Term { 64 panic("not implemented yet") 65 } 66 67 func (cd *CoalescedUpdates) queries() []*QueryAndLimit { 68 panic("not implemented yet") 69 } 70 71 /* 72 Tracks the stream of BufferedUpdates. When DocumentsWriterPerThread 73 flushes, its buffered deletes and updates are appended to this stream. 74 We later apply them (resolve them to the actual docIDs, per segment) 75 when a merge is started (only to the to-be-merged segments). We also 76 apply to all segments when NRT reader is pulled, commit/close is 77 called, or when too many deletes or updates are buffered and must be 78 flushed (by RAM usage or by count). 79 80 Each packet is assigned a generation, and each flushed or merged 81 segment is also assigned a generation, so we can track when 82 BufferedUpdates packets to apply to any given segment. 83 */ 84 type BufferedUpdatesStream struct { 85 sync.Locker 86 // TODO: maybe linked list? 87 updates []*FrozenBufferedUpdates 88 89 // Starts at 1 so that SegmentInfos that have never had deletes 90 // applied (whose bufferedDelGen defaults to 0) will be correct: 91 nextGen int64 92 93 // used only by assert 94 lastDeleteTerm *Term 95 96 infoStream util.InfoStream 97 bytesUsed int64 // atomic 98 numTerms int32 // atomic 99 } 100 101 func newBufferedUpdatesStream(infoStream util.InfoStream) *BufferedUpdatesStream { 102 return &BufferedUpdatesStream{ 103 Locker: &sync.Mutex{}, 104 updates: make([]*FrozenBufferedUpdates, 0), 105 nextGen: 1, 106 infoStream: infoStream, 107 } 108 } 109 110 /* Appends a new packet of buffered deletes to the stream, setting its generation: */ 111 func (s *BufferedUpdatesStream) push(packet *FrozenBufferedUpdates) int64 { 112 panic("not implemented yet") 113 } 114 115 func (ds *BufferedUpdatesStream) clear() { 116 ds.Lock() 117 defer ds.Unlock() 118 119 ds.updates = nil 120 ds.nextGen = 1 121 atomic.StoreInt32(&ds.numTerms, 0) 122 atomic.StoreInt64(&ds.bytesUsed, 0) 123 } 124 125 func (ds *BufferedUpdatesStream) any() bool { 126 return atomic.LoadInt64(&ds.bytesUsed) != 0 127 } 128 129 func (ds *BufferedUpdatesStream) RamBytesUsed() int64 { 130 return atomic.LoadInt64(&ds.bytesUsed) 131 } 132 133 /* 134 Resolves the buffered deleted Term/Query/docIDs, into actual deleted 135 docIDs in the liveDocs MutableBits for each SegmentReader. 136 */ 137 func (ds *BufferedUpdatesStream) applyDeletesAndUpdates(readerPool *ReaderPool, infos []*SegmentCommitInfo) (*ApplyDeletesResult, error) { 138 ds.Lock() 139 defer ds.Unlock() 140 141 if len(infos) == 0 { 142 ds.nextGen++ 143 return &ApplyDeletesResult{false, ds.nextGen - 1, nil}, nil 144 } 145 146 t0 := time.Now() 147 ds.assertDeleteStats() 148 if !ds.any() { 149 if ds.infoStream.IsEnabled("BD") { 150 ds.infoStream.Message("BD", "applyDeletes: no deletes; skipping") 151 } 152 ds.nextGen++ 153 return &ApplyDeletesResult{false, ds.nextGen - 1, nil}, nil 154 } 155 156 if ds.infoStream.IsEnabled("BD") { 157 ds.infoStream.Message("BD", "applyDeletes: infos=%v packetCount=%v", infos, len(ds.updates)) 158 } 159 160 gen := ds.nextGen 161 ds.nextGen++ 162 163 infos2 := make([]*SegmentCommitInfo, len(infos)) 164 copy(infos2, infos) 165 sort.Sort(SegInfoByDelGen(infos2)) 166 167 var coalescedUpdates *CoalescedUpdates 168 var anyNewDeletes bool 169 170 infosIDX := len(infos2) - 1 171 delIDX := len(ds.updates) - 1 172 173 var allDeleted []*SegmentCommitInfo 174 175 for infosIDX >= 0 { 176 log.Printf("BD: cycle delIDX=%v infoIDX=%v", delIDX, infosIDX) 177 178 var packet *FrozenBufferedUpdates 179 if delIDX >= 0 { 180 packet = ds.updates[delIDX] 181 } 182 info := infos2[infosIDX] 183 segGen := info.BufferedUpdatesGen 184 185 if packet != nil && segGen < packet.gen { 186 log.Println(" coalesce") 187 if coalescedUpdates == nil { 188 coalescedUpdates = newCoalescedUpdates() 189 } 190 if !packet.isSegmentPrivate { 191 // Only coalesce if we are NOT on a segment private del 192 // packet: the segment private del packet must only be 193 // applied to segments with the same delGen. yet, if a 194 // segment is already deleted from the SI since it had no 195 // more documents remaining after some del packets younger 196 // than its segPrivate packet (higher delGen) have been 197 // applied, the segPrivate packet has not been removed. 198 coalescedUpdates.update(packet) 199 } 200 delIDX-- 201 202 } else if packet != nil && segGen == packet.gen { 203 assertn(packet.isSegmentPrivate, 204 "Packet and Segments deletegen can only match on a segment private del packet gen=%v", 205 segGen) 206 log.Println(" eq") 207 208 // Lockorder: IW -> BD -> RP 209 assert(readerPool.infoIsLive(info)) 210 rld := readerPool.get(info, true) 211 reader, err := rld.reader(store.IO_CONTEXT_READ) 212 if err != nil { 213 return nil, err 214 } 215 delCount, segAllDeletes, err := func() (delCount int64, segAllDeletes bool, err error) { 216 defer func() { 217 err = mergeError(err, rld.release(reader)) 218 err = mergeError(err, readerPool.release(rld)) 219 }() 220 dvUpdates := newDocValuesFieldUpdatesContainer() 221 if coalescedUpdates != nil { 222 fmt.Println(" del coalesced") 223 var delta int64 224 delta, err = ds._applyTermDeletes(coalescedUpdates.terms(), rld, reader) 225 if err == nil { 226 delCount += delta 227 delta, err = applyQueryDeletes(coalescedUpdates.queries(), rld, reader) 228 if err == nil { 229 delCount += delta 230 err = ds.applyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates) 231 if err == nil { 232 err = ds.applyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates) 233 } 234 } 235 } 236 if err != nil { 237 return 238 } 239 } 240 fmt.Println(" del exact") 241 // Don't delete by Term here; DWPT already did that on flush: 242 var delta int64 243 delta, err = applyQueryDeletes(packet.queries(), rld, reader) 244 if err == nil { 245 delCount += delta 246 err = ds.applyDocValuesUpdates(packet.numericDVUpdates, rld, reader, dvUpdates) 247 if err == nil { 248 err = ds.applyDocValuesUpdates(packet.binaryDVUpdates, rld, reader, dvUpdates) 249 if err == nil && dvUpdates.any() { 250 err = rld.writeFieldUpdates(info.Info.Dir, dvUpdates) 251 } 252 } 253 } 254 if err != nil { 255 return 256 } 257 fullDelCount := rld.info.DelCount() + rld.pendingDeleteCount() 258 infoDocCount := rld.info.Info.DocCount() 259 assert(fullDelCount <= infoDocCount) 260 return delCount, fullDelCount == infoDocCount, nil 261 }() 262 if err != nil { 263 return nil, err 264 } 265 anyNewDeletes = anyNewDeletes || (delCount > 0) 266 267 if segAllDeletes { 268 allDeleted = append(allDeleted, info) 269 } 270 271 if ds.infoStream.IsEnabled("BD") { 272 var suffix string 273 if segAllDeletes { 274 suffix = " 100%% deleted" 275 } 276 ds.infoStream.Message("BD", "Seg=%v segGen=%v segDeletes=[%v]; coalesced deletes=[%v] newDelCount=%v%v", 277 info, segGen, packet, coalescedUpdates, delCount, suffix) 278 } 279 280 if coalescedUpdates == nil { 281 coalescedUpdates = newCoalescedUpdates() 282 } 283 284 // Since we are on a segment private del packet we must not 285 // update the CoalescedUpdates here! We can simply advance to 286 // the next packet and seginfo. 287 delIDX-- 288 infosIDX-- 289 info.SetBufferedUpdatesGen(gen) 290 291 } else { 292 log.Println(" gt") 293 294 if coalescedUpdates != nil { 295 // Lock order: IW -> BD -> RP 296 assert(readerPool.infoIsLive(info)) 297 rld := readerPool.get(info, true) 298 reader, err := rld.reader(store.IO_CONTEXT_READ) 299 if err != nil { 300 return nil, err 301 } 302 delCount, segAllDeletes, err := func() (delCount int64, segAllDeletes bool, err error) { 303 defer func() { 304 err = mergeError(err, rld.release(reader)) 305 err = mergeError(err, readerPool.release(rld)) 306 }() 307 var delta int64 308 delta, err = ds._applyTermDeletes(coalescedUpdates.terms(), rld, reader) 309 if err == nil { 310 delCount += delta 311 delta, err = applyQueryDeletes(coalescedUpdates.queries(), rld, reader) 312 if err == nil { 313 delCount += delta 314 dvUpdates := newDocValuesFieldUpdatesContainer() 315 err = ds.applyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates) 316 if err == nil { 317 err = ds.applyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates) 318 if err == nil && dvUpdates.any() { 319 err = rld.writeFieldUpdates(info.Info.Dir, dvUpdates) 320 } 321 } 322 } 323 } 324 if err != nil { 325 return 326 } 327 328 fullDelCount := rld.info.DelCount() + rld.pendingDeleteCount() 329 infoDocCount := rld.info.Info.DocCount() 330 assert(fullDelCount <= infoDocCount) 331 return delCount, fullDelCount == infoDocCount, nil 332 }() 333 if err != nil { 334 return nil, err 335 } 336 anyNewDeletes = anyNewDeletes || (delCount > 0) 337 338 if segAllDeletes { 339 allDeleted = append(allDeleted, info) 340 } 341 342 if ds.infoStream.IsEnabled("BD") { 343 var suffix string 344 if segAllDeletes { 345 suffix = " 100%% deleted" 346 } 347 ds.infoStream.Message("BD", "Seg=%v segGen=%v coalesced deletes=[%v] newDelCount=%v%v", 348 info, segGen, coalescedUpdates, delCount, suffix) 349 } 350 } 351 info.SetBufferedUpdatesGen(gen) 352 353 infosIDX-- 354 } 355 } 356 357 ds.assertDeleteStats() 358 if ds.infoStream.IsEnabled("BD") { 359 ds.infoStream.Message("BD", "applyDeletes took %v", time.Now().Sub(t0)) 360 } 361 362 return &ApplyDeletesResult{anyNewDeletes, gen, allDeleted}, nil 363 } 364 365 func mergeError(err, err2 error) error { 366 if err == nil { 367 return err2 368 } else { 369 return errors.New(fmt.Sprintf("%v\n %v", err, err2)) 370 } 371 } 372 373 // Lock order IW -> BD 374 /* 375 Removes any BufferedUpdates that we no longer need to store because 376 all segments in the index have had the deletes applied. 377 */ 378 func (ds *BufferedUpdatesStream) prune(infos *SegmentInfos) { 379 ds.assertDeleteStats() 380 var minGen int64 = math.MaxInt64 381 for _, info := range infos.Segments { 382 if info.BufferedUpdatesGen < minGen { 383 minGen = info.BufferedUpdatesGen 384 } 385 } 386 387 if ds.infoStream.IsEnabled("BD") { 388 var dir store.Directory 389 if len(infos.Segments) > 0 { 390 dir = infos.Segments[0].Info.Dir 391 } 392 ds.infoStream.Message("BD", "prune sis=%v minGen=%v packetCount=%v", 393 infos.toString(dir), minGen, len(ds.updates)) 394 } 395 for delIDX, update := range ds.updates { 396 if update.gen >= minGen { 397 ds.pruneUpdates(delIDX) 398 ds.assertDeleteStats() 399 return 400 } 401 } 402 403 // All deletes pruned 404 ds.pruneUpdates(len(ds.updates)) 405 assert(!ds.any()) 406 ds.assertDeleteStats() 407 } 408 409 func (ds *BufferedUpdatesStream) pruneUpdates(count int) { 410 if count > 0 { 411 if ds.infoStream.IsEnabled("BD") { 412 ds.infoStream.Message("BD", "pruneDeletes: prune %v packets; %v packets remain", 413 count, len(ds.updates)-count) 414 } 415 for delIDX := 0; delIDX < count; delIDX++ { 416 packet := ds.updates[delIDX] 417 n := atomic.AddInt32(&ds.numTerms, -int32(packet.numTermDeletes)) 418 assert(n >= 0) 419 n2 := atomic.AddInt64(&ds.bytesUsed, -int64(packet.bytesUsed)) 420 assert(n2 >= 0) 421 ds.updates[delIDX] = nil 422 } 423 ds.updates = ds.updates[count:] 424 } 425 } 426 427 /* Delete by term */ 428 func (ds *BufferedUpdatesStream) _applyTermDeletes(terms []*Term, 429 rld *ReadersAndUpdates, reader *SegmentReader) (int64, error) { 430 panic("not implemented yet") 431 } 432 433 /* DocValues updates */ 434 func (ds *BufferedUpdatesStream) applyDocValuesUpdates(updates []*DocValuesUpdate, 435 rld *ReadersAndUpdates, reader *SegmentReader, 436 dvUpdatesCntainer *DocValuesFieldUpdatesContainer) error { 437 panic("not implemented yet") 438 } 439 440 /* Delete by query */ 441 func applyQueryDeletes(queries []*QueryAndLimit, 442 rld *ReadersAndUpdates, reader *SegmentReader) (int64, error) { 443 panic("not implemented yet") 444 } 445 446 func (ds *BufferedUpdatesStream) assertDeleteStats() { 447 var numTerms2 int 448 var bytesUsed2 int64 449 for _, packet := range ds.updates { 450 numTerms2 += packet.numTermDeletes 451 bytesUsed2 += int64(packet.bytesUsed) 452 } 453 n1 := int(atomic.LoadInt32(&ds.numTerms)) 454 assertn(numTerms2 == n1, "numTerms2=%v vs %v", numTerms2, n1) 455 n2 := int64(atomic.LoadInt64(&ds.bytesUsed)) 456 assertn(bytesUsed2 == n2, "bytesUsed2=%v vs %v", bytesUsed2, n2) 457 }