github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/live.go (about)

     1  package index
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/balzaczyy/golucene/core/analysis"
     6  	. "github.com/balzaczyy/golucene/core/codec/spi"
     7  	"github.com/balzaczyy/golucene/core/util"
     8  	"reflect"
     9  )
    10  
    11  // index/LiveIndexWriterConfig.java
    12  
    13  /*
    14  Holds all the configuration used by IndexWriter with few setters for
    15  settings that can be changed on an IndexWriter instance "live".
    16  
    17  All the fields are either readonly or volatile.
    18  */
    19  type LiveIndexWriterConfig interface {
    20  	TermIndexInterval() int
    21  	MaxBufferedDocs() int
    22  	RAMBufferSizeMB() float64
    23  	Similarity() Similarity
    24  	Codec() Codec
    25  	MergePolicy() MergePolicy
    26  	indexingChain() IndexingChain
    27  	RAMPerThreadHardLimitMB() int
    28  	flushPolicy() FlushPolicy
    29  	InfoStream() util.InfoStream
    30  	indexerThreadPool() *DocumentsWriterPerThreadPool
    31  	UseCompoundFile() bool
    32  }
    33  
    34  type LiveIndexWriterConfigImpl struct {
    35  	analyzer analysis.Analyzer
    36  
    37  	maxBufferedDocs         int
    38  	ramBufferSizeMB         float64
    39  	maxBufferedDeleteTerms  int
    40  	readerTermsIndexDivisor int
    41  	mergedSegmentWarmer     IndexReaderWarmer
    42  	termIndexInterval       int
    43  	// TODO: this should be private to the codec, not settable here
    44  
    45  	// controlling when commit points are deleted.
    46  	delPolicy IndexDeletionPolicy
    47  
    48  	// IndexCommit that IndexWriter is opened on.
    49  	commit IndexCommit
    50  
    51  	// OpenMode that IndexWriter is opened with.
    52  	openMode OpenMode
    53  
    54  	// Similarity to use when encoding norms.
    55  	similarity Similarity
    56  
    57  	// MergeScheduler to use for running merges.
    58  	mergeScheduler MergeScheduler
    59  
    60  	// Timeout when trying to obtain the write lock on init.
    61  	writeLockTimeout int64
    62  
    63  	// IndexingChain that determines how documents are indexed.
    64  	_indexingChain IndexingChain
    65  
    66  	// Codec used to write new segments.
    67  	codec Codec
    68  
    69  	// InfoStream for debugging messages.
    70  	infoStream util.InfoStream
    71  
    72  	// MergePolicy for selecting merges.
    73  	mergePolicy MergePolicy
    74  
    75  	// DocumentsWriterPerThreadPool to control how goroutines are
    76  	// allocated to DocumentsWriterPerThread.
    77  	_indexerThreadPool *DocumentsWriterPerThreadPool
    78  
    79  	// True if readers should be pooled.
    80  	readerPooling bool
    81  
    82  	// FlushPolicy to control when segments are flushed.
    83  	_flushPolicy FlushPolicy
    84  
    85  	// Sets the hard upper bound on RAM usage for a single segment,
    86  	// after which the segment is forced to flush.
    87  	perRoutineHardLimitMB int
    88  
    89  	// Version that IndexWriter should emulate.
    90  	matchVersion util.Version
    91  
    92  	// True is segment flushes should use compound file format
    93  	useCompoundFile bool // volatile
    94  
    95  	// True if merging should check integrity of segments before merge
    96  	checkIntegrityAtMerge bool // volatile
    97  }
    98  
    99  // used by IndexWriterConfig
   100  func newLiveIndexWriterConfig(analyzer analysis.Analyzer,
   101  	matchVersion util.Version) *LiveIndexWriterConfigImpl {
   102  
   103  	assert(DefaultSimilarity != nil)
   104  	assert(DefaultCodec != nil)
   105  	return &LiveIndexWriterConfigImpl{
   106  		analyzer:                analyzer,
   107  		matchVersion:            matchVersion,
   108  		ramBufferSizeMB:         DEFAULT_RAM_BUFFER_SIZE_MB,
   109  		maxBufferedDocs:         DEFAULT_MAX_BUFFERED_DOCS,
   110  		maxBufferedDeleteTerms:  DEFAULT_MAX_BUFFERED_DELETE_TERMS,
   111  		readerTermsIndexDivisor: DEFAULT_READER_TERMS_INDEX_DIVISOR,
   112  		termIndexInterval:       DEFAULT_TERM_INDEX_INTERVAL, // TODO: this should be private to the codec, not settable here
   113  		delPolicy:               DEFAULT_DELETION_POLICY,
   114  		useCompoundFile:         DEFAULT_USE_COMPOUND_FILE_SYSTEM,
   115  		openMode:                OPEN_MODE_CREATE_OR_APPEND,
   116  		similarity:              DefaultSimilarity(),
   117  		mergeScheduler:          NewConcurrentMergeScheduler(),
   118  		writeLockTimeout:        WRITE_LOCK_TIMEOUT,
   119  		_indexingChain:          defaultIndexingChain,
   120  		codec:                   DefaultCodec(),
   121  		infoStream:              util.DefaultInfoStream(),
   122  		mergePolicy:             NewTieredMergePolicy(),
   123  		_flushPolicy:            newFlushByRamOrCountsPolicy(),
   124  		readerPooling:           DEFAULT_READER_POOLING,
   125  		_indexerThreadPool:      NewDocumentsWriterPerThreadPool(DEFAULT_MAX_THREAD_STATES),
   126  		perRoutineHardLimitMB:   DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB,
   127  		checkIntegrityAtMerge:   DEFAULT_CHECK_INTEGRITY_AT_MERGE,
   128  	}
   129  }
   130  
   131  // Creates a new config that handles the live IndexWriter settings.
   132  // func newLiveIndexWriterConfigFrom(config *IndexWriterConfig) *LiveIndexWriterConfigImpl {
   133  // 	return &LiveIndexWriterConfig{
   134  // 		maxBufferedDeleteTerms:  config.maxBufferedDeleteTerms,
   135  // 		maxBufferedDocs:         config.maxBufferedDocs,
   136  // 		mergedSegmentWarmer:     config.mergedSegmentWarmer,
   137  // 		ramBufferSizeMB:         config.ramBufferSizeMB,
   138  // 		readerTermsIndexDivisor: config.readerTermsIndexDivisor,
   139  // 		termIndexInterval:       config.termIndexInterval,
   140  // 		matchVersion:            config.matchVersion,
   141  // 		analyzer:                config.analyzer,
   142  // 		delPolicy:               config.delPolicy,
   143  // 		commit:                  config.commit,
   144  // 		openMode:                config.openMode,
   145  // 		similarity:              config.similarity,
   146  // 		mergeScheduler:          config.mergeScheduler,
   147  // 		writeLockTimeout:        config.writeLockTimeout,
   148  // 		indexingChain:           config.indexingChain,
   149  // 		codec:                   config.codec,
   150  // 		infoStream:              config.infoStream,
   151  // 		mergePolicy:             config.mergePolicy,
   152  // 		indexerThreadPool:       config.indexerThreadPool,
   153  // 		readerPooling:           config.readerPooling,
   154  // 		flushPolicy:             config.flushPolicy,
   155  // 		perRoutineHardLimitMB:   config.perRoutineHardLimitMB,
   156  // 		useCompoundFile:         config.useCompoundFile,
   157  // 	}
   158  // }
   159  
   160  func (conf *LiveIndexWriterConfigImpl) TermIndexInterval() int {
   161  	return conf.termIndexInterval
   162  }
   163  
   164  // L358
   165  /*
   166  Determines the minimal number of documents required before the
   167  buffered in-memory documents are flushed as a new Segment. Large
   168  values generally give faster indexing.
   169  
   170  When this is set, the writer will flush every maxBufferedDocs added
   171  documents. Pass in DISABLE_AUTO_FLUSH to prevent triggering a flush
   172  due to number of buffered documents. Note that if flushing by RAM
   173  usage is also enabled, then the flush will be triggered by whichever
   174  comes first.
   175  
   176  Disabled by default (writer flushes by RAM usage).
   177  
   178  Takes effect immediately, but only the next time a document is added,
   179  updated or deleted.
   180  */
   181  func (conf *LiveIndexWriterConfigImpl) SetMaxBufferedDocs(maxBufferedDocs int) *LiveIndexWriterConfigImpl {
   182  	assert2(maxBufferedDocs == DISABLE_AUTO_FLUSH || maxBufferedDocs >= 2,
   183  		"maxBufferedDocs must at least be 2 when enabled")
   184  	assert2(maxBufferedDocs != DISABLE_AUTO_FLUSH || conf.ramBufferSizeMB != DISABLE_AUTO_FLUSH,
   185  		"at least one of ramBufferSize and maxBufferedDocs must be enabled")
   186  	conf.maxBufferedDocs = maxBufferedDocs
   187  	return conf
   188  }
   189  
   190  /* Returns the number of buffered added documents that will trigger a flush if enabled. */
   191  func (conf *LiveIndexWriterConfigImpl) MaxBufferedDocs() int {
   192  	return conf.maxBufferedDocs
   193  }
   194  
   195  /*
   196  Expert: MergePolicy is invoked whenver there are changes to the
   197  segments in the index. Its role is to select which merges to do, if
   198  any, and return a MergeSpecification describing the merges. It also
   199  selects merges to do for forceMerge.
   200  */
   201  func (conf *LiveIndexWriterConfigImpl) SetMergePolicy(mergePolicy MergePolicy) *LiveIndexWriterConfigImpl {
   202  	assert2(mergePolicy != nil, "mergePolicy must not be nil")
   203  	conf.mergePolicy = mergePolicy
   204  	return conf
   205  }
   206  
   207  func (conf *LiveIndexWriterConfigImpl) RAMBufferSizeMB() float64 {
   208  	return conf.ramBufferSizeMB
   209  }
   210  
   211  /*
   212  Sets the merged segment warmer.
   213  
   214  Take effect on the next merge.
   215  */
   216  func (conf *LiveIndexWriterConfigImpl) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *LiveIndexWriterConfigImpl {
   217  	conf.mergedSegmentWarmer = mergeSegmentWarmer
   218  	return conf
   219  }
   220  
   221  /*
   222  Sets the termsIndeDivisor passed to any readers that IndexWriter
   223  opens, for example when applying deletes or creating a near-real-time
   224  reader in OpenDirectoryReader(). If you pass -1, the terms index
   225  won't be loaded by the readers. This is only useful in advanced
   226  siguations when you will only .Next() through all terms; attempts to
   227  seek will hit an error.
   228  
   229  takes effect immediately, but only applies to readers opened after
   230  this call
   231  
   232  NOTE: divisor settings > 1 do not apply to all PostingsFormat
   233  implementation, including the default one in this release. It only
   234  makes sense for terms indexes that can efficiently re-sample terms at
   235  load time.
   236  */
   237  func (conf *LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor(divisor int) *LiveIndexWriterConfigImpl {
   238  	assert2(divisor > 0 || divisor == -1, fmt.Sprintf(
   239  		"divisor must be >= 1, or -1 (got %v)", divisor))
   240  	conf.readerTermsIndexDivisor = divisor
   241  	return conf
   242  }
   243  
   244  func (conf *LiveIndexWriterConfigImpl) Similarity() Similarity {
   245  	return conf.similarity
   246  }
   247  
   248  /* Returns the current Codec. */
   249  func (conf *LiveIndexWriterConfigImpl) Codec() Codec {
   250  	return conf.codec
   251  }
   252  
   253  // L477
   254  /* Returns the current MergePolicy in use by this writer. */
   255  func (conf *LiveIndexWriterConfigImpl) MergePolicy() MergePolicy {
   256  	return conf.mergePolicy
   257  }
   258  
   259  /* Returns the configured DocumentsWriterPerThreadPool instance. */
   260  func (conf *LiveIndexWriterConfigImpl) indexerThreadPool() *DocumentsWriterPerThreadPool {
   261  	return conf._indexerThreadPool
   262  }
   263  
   264  func (conf *LiveIndexWriterConfigImpl) indexingChain() IndexingChain {
   265  	return conf._indexingChain
   266  }
   267  
   268  func (conf *LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB() int {
   269  	return conf.perRoutineHardLimitMB
   270  }
   271  
   272  func (conf *LiveIndexWriterConfigImpl) flushPolicy() FlushPolicy {
   273  	return conf._flushPolicy
   274  }
   275  
   276  /* Returns InfoStream used for debugging. */
   277  func (conf *LiveIndexWriterConfigImpl) InfoStream() util.InfoStream {
   278  	return conf.infoStream
   279  }
   280  
   281  /*
   282  Sets if the IndexWriter should pack newly written segments in a
   283  compound file. Default is true.
   284  
   285  Use false for batch indexing with very large ram buffer settings.
   286  
   287  Note: To control compound file usage during segment merges see
   288  SetNoCFSRatio() and SetMaxCFSSegmentSizeMB(). This setting only
   289  applies to newly created segment.
   290  */
   291  func (conf *LiveIndexWriterConfigImpl) SetUseCompoundFile(useCompoundFile bool) *LiveIndexWriterConfigImpl {
   292  	conf.useCompoundFile = useCompoundFile
   293  	return conf
   294  }
   295  
   296  func (conf *LiveIndexWriterConfigImpl) UseCompoundFile() bool {
   297  	return conf.useCompoundFile
   298  }
   299  
   300  func (conf *LiveIndexWriterConfigImpl) String() string {
   301  	return fmt.Sprintf(`matchVersion=%v
   302  analyzer=%v
   303  ramBufferSizeMB=%v
   304  maxBufferedDocs=%v
   305  maxBufferedDeleteTerms=%v
   306  mergedSegmentWarmer=%v
   307  readerTermsIndexDivisor=%v
   308  termIndexInterval=%v
   309  delPolicy=%v
   310  commit=%v
   311  openMode=%v
   312  similarity=%v
   313  mergeScheduler=%v
   314  default WRITE_LOCK_TIMEOUT=%v
   315  writeLockTimeout=%v
   316  codec=%v
   317  infoStream=%v
   318  mergePolicy=%v
   319  indexerThreadPool=%v
   320  readerPooling=%v
   321  perThreadHardLimitMB=%v
   322  useCompoundFile=%v
   323  checkIntegrityAtMerge=%v
   324  `, conf.matchVersion, reflect.TypeOf(conf.analyzer),
   325  		conf.ramBufferSizeMB, conf.maxBufferedDocs,
   326  		conf.maxBufferedDeleteTerms, reflect.TypeOf(conf.mergedSegmentWarmer),
   327  		conf.readerTermsIndexDivisor, conf.termIndexInterval,
   328  		reflect.TypeOf(conf.delPolicy), conf.commit,
   329  		conf.openMode, reflect.TypeOf(conf.similarity),
   330  		conf.mergeScheduler, WRITE_LOCK_TIMEOUT,
   331  		conf.writeLockTimeout, conf.codec,
   332  		reflect.TypeOf(conf.infoStream), conf.mergePolicy,
   333  		conf.indexerThreadPool, conf.readerPooling,
   334  		conf.perRoutineHardLimitMB, conf.useCompoundFile,
   335  		conf.checkIntegrityAtMerge)
   336  }