github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/bufferedDeletes.go (about)

     1  package index
     2  
     3  import (
     4  	// "bytes"
     5  	"fmt"
     6  	"github.com/balzaczyy/golucene/core/util"
     7  	"math"
     8  	"sync/atomic"
     9  )
    10  
    11  // index/BufferedUpdates.java
    12  
    13  /* Go slice consumes two int for an extra doc ID, assuming 50% pre-allocation. */
    14  const BYTES_PER_DEL_DOCID = 2 * util.NUM_BYTES_INT
    15  
    16  /* Go map (amd64) consumes about 40 bytes for an extra entry. */
    17  const BYTES_PER_DEL_QUERY = 40 + util.NUM_BYTES_OBJECT_REF + util.NUM_BYTES_INT
    18  
    19  const MAX_INT = int(math.MaxInt32)
    20  
    21  const VERBOSE = false
    22  
    23  /*
    24  Holds buffered deletes, by docID, term or query for a single segment.
    25  This is used to hold buffered pending deletes against the
    26  to-be-flushed segment. Once the deletes are pushed (on flush in DW),
    27  these deletes are converted to a FronzenDeletes instance.
    28  
    29  NOTE: instances of this class are accessed either via a private
    30  instance on DocumentsWriterPerThread, or via sync'd code by
    31  DocumentsWriterDeleteQueue
    32  */
    33  type BufferedUpdates struct {
    34  	numTermDeletes int32 // atomic
    35  
    36  	terms   map[*Term]int
    37  	queries map[interface{}]int
    38  	docIDs  []int
    39  
    40  	numericUpdates map[string]map[*Term]*DocValuesUpdate
    41  
    42  	binaryUpdates map[string]map[*Term]*DocValuesUpdate
    43  
    44  	bytesUsed int64 // atomic
    45  
    46  	gen int64
    47  }
    48  
    49  func newBufferedUpdates() *BufferedUpdates {
    50  	return &BufferedUpdates{
    51  		terms:          make(map[*Term]int),
    52  		queries:        make(map[interface{}]int),
    53  		numericUpdates: make(map[string]map[*Term]*DocValuesUpdate),
    54  		binaryUpdates:  make(map[string]map[*Term]*DocValuesUpdate),
    55  	}
    56  }
    57  
    58  func (bd *BufferedUpdates) String() string {
    59  	panic("not implemented yet")
    60  	// if VERBOSE {
    61  	// 	return fmt.Sprintf(
    62  	// 		"BufferedUpdates[gen=%v, numTerms=%v, terms=%v, queries=%v, docIDs=%v, bytesUsed=%v]",
    63  	// 		bd.gen, atomic.LoadInt32(&bd.numTermDeletes), bd.terms, bd.queries, bd.docIDs, bd.bytesUsed)
    64  	// } else {
    65  	// 	var buf bytes.Buffer
    66  	// 	fmt.Fprintf(&buf, "BufferedUpdates[gen=%v", bd.gen)
    67  	// 	if n := atomic.LoadInt32(&bd.numTermDeletes); n != 0 {
    68  	// 		fmt.Fprintf(&buf, " %v deleted terms (unique count=%v)", n, len(bd.terms))
    69  	// 	}
    70  	// 	if len(bd.queries) > 0 {
    71  	// 		fmt.Fprintf(&buf, " %v deleted queries", len(bd.queries))
    72  	// 	}
    73  	// 	if len(bd.docIDs) > 0 {
    74  	// 		fmt.Fprintf(&buf, " %v deleted docIDs", len(bd.docIDs))
    75  	// 	}
    76  	// 	if n := atomic.LoadInt64(&bd.bytesUsed); n != 0 {
    77  	// 		fmt.Fprintf(&buf, " bytesUsed=%v", n)
    78  	// 	}
    79  	// 	buf.WriteRune(']')
    80  	// 	return buf.String()
    81  	// }
    82  }
    83  
    84  func (bd *BufferedUpdates) addDocID(docID int) {
    85  	bd.docIDs = append(bd.docIDs, docID)
    86  	atomic.AddInt64(&bd.bytesUsed, BYTES_PER_DEL_DOCID)
    87  }
    88  
    89  func (bd *BufferedUpdates) clear() {
    90  	bd.terms = make(map[*Term]int)
    91  	bd.queries = make(map[interface{}]int)
    92  	bd.docIDs = nil
    93  	atomic.StoreInt32(&bd.numTermDeletes, 0)
    94  	atomic.StoreInt64(&bd.bytesUsed, 0)
    95  }
    96  
    97  func (bd *BufferedUpdates) any() bool {
    98  	return len(bd.terms) > 0 || len(bd.docIDs) > 0 || len(bd.queries) > 0 ||
    99  		len(bd.numericUpdates) > 0 || len(bd.binaryUpdates) > 0
   100  }
   101  
   102  // index/FrozenBufferedUpdates.java
   103  
   104  /*
   105  Holds buffered deletes and updates by term or query, once pushed.
   106  Pushed deletes/updates are write-once, so we shift to more memory
   107  efficient data structure to hold them. We don't hold docIDs because
   108  these are applied on flush.
   109  */
   110  type FrozenBufferedUpdates struct {
   111  	// Terms, in sorted order:
   112  	terms     *PrefixCodedTerms
   113  	termCount int // just for debugging
   114  
   115  	// Parallel array of deleted query, and the docIDUpto for each
   116  	_queries    []Query
   117  	queryLimits []int
   118  
   119  	// numeric DV update term and their updates
   120  	numericDVUpdates []*DocValuesUpdate
   121  
   122  	// binary DV update term and their updates
   123  	binaryDVUpdates []*DocValuesUpdate
   124  
   125  	bytesUsed      int
   126  	numTermDeletes int
   127  	gen            int64 // -1, assigned by BufferedUpdatesStream once pushed
   128  	// true iff this frozen packet represents a segment private deletes
   129  	// in that case it should only have queries
   130  	isSegmentPrivate bool
   131  }
   132  
   133  func freezeBufferedUpdates(deletes *BufferedUpdates, isPrivate bool) *FrozenBufferedUpdates {
   134  	assert2(!isPrivate || len(deletes.terms) == 0,
   135  		"segment private package should only have del queries")
   136  	var termsArray []*Term
   137  	for k, _ := range deletes.terms {
   138  		termsArray = append(termsArray, k)
   139  	}
   140  	util.TimSort(TermSorter(termsArray))
   141  	builder := newPrefixCodedTermsBuilder()
   142  	for _, term := range termsArray {
   143  		builder.add(term)
   144  	}
   145  	terms := builder.finish()
   146  
   147  	queries := make([]Query, len(deletes.queries))
   148  	queryLimits := make([]int, len(deletes.queries))
   149  	var upto = 0
   150  	for k, v := range deletes.queries {
   151  		queries[upto] = k
   152  		queryLimits[upto] = v
   153  		upto++
   154  	}
   155  
   156  	// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
   157  	// so that it maps to all fields it affects, sorted by their docUpto, and traverse
   158  	// that Term only once, applying the update to all fields that still need to be
   159  	// updated.
   160  	var allNumericUpdates []*DocValuesUpdate
   161  	numericUpdatesSize := 0
   162  	for _, numericUpdates := range deletes.numericUpdates {
   163  		for _, update := range numericUpdates {
   164  			allNumericUpdates = append(allNumericUpdates, update)
   165  			numericUpdatesSize += update.sizeInBytes()
   166  		}
   167  	}
   168  
   169  	// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
   170  	// so that it maps to all fields it affects, sorted by their docUpto, and traverse
   171  	// that Term only once, applying the update to all fields that still need to be
   172  	// updated.
   173  	var allBinaryUpdates []*DocValuesUpdate
   174  	binaryUpdatesSize := 0
   175  	for _, binaryUpdates := range deletes.binaryUpdates {
   176  		for _, update := range binaryUpdates {
   177  			allBinaryUpdates = append(allBinaryUpdates, update)
   178  			binaryUpdatesSize += update.sizeInBytes()
   179  		}
   180  	}
   181  
   182  	bytesUsed := int(terms.RamBytesUsed() +
   183  		int64(len(queries))*BYTES_PER_DEL_QUERY +
   184  		int64(numericUpdatesSize) + util.ShallowSizeOf(allNumericUpdates) +
   185  		int64(binaryUpdatesSize) + util.ShallowSizeOf(allBinaryUpdates))
   186  
   187  	return &FrozenBufferedUpdates{
   188  		gen:              -1,
   189  		isSegmentPrivate: isPrivate,
   190  		termCount:        len(termsArray),
   191  		terms:            terms,
   192  		_queries:         queries,
   193  		queryLimits:      queryLimits,
   194  		numericDVUpdates: allNumericUpdates,
   195  		binaryDVUpdates:  allBinaryUpdates,
   196  		bytesUsed:        bytesUsed,
   197  		numTermDeletes:   int(atomic.LoadInt32(&deletes.numTermDeletes)),
   198  	}
   199  }
   200  
   201  func assert(ok bool) {
   202  	if !ok {
   203  		panic("assert fail")
   204  	}
   205  }
   206  
   207  func assert2(ok bool, msg string, args ...interface{}) {
   208  	if !ok {
   209  		panic(fmt.Sprintf(msg, args...))
   210  	}
   211  }
   212  
   213  func (bd *FrozenBufferedUpdates) queries() []*QueryAndLimit {
   214  	panic("not implemented yet")
   215  }
   216  
   217  func (bd *FrozenBufferedUpdates) String() string {
   218  	panic("not implemented yet")
   219  }
   220  
   221  func (d *FrozenBufferedUpdates) any() bool {
   222  	return d.termCount > 0 || len(d._queries) > 0 || len(d.numericDVUpdates) > 0 || len(d.binaryDVUpdates) > 0
   223  }