github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/live.go (about) 1 package index 2 3 import ( 4 "fmt" 5 "github.com/balzaczyy/golucene/core/analysis" 6 . "github.com/balzaczyy/golucene/core/codec/spi" 7 "github.com/balzaczyy/golucene/core/util" 8 "reflect" 9 ) 10 11 // index/LiveIndexWriterConfig.java 12 13 /* 14 Holds all the configuration used by IndexWriter with few setters for 15 settings that can be changed on an IndexWriter instance "live". 16 17 All the fields are either readonly or volatile. 18 */ 19 type LiveIndexWriterConfig interface { 20 TermIndexInterval() int 21 MaxBufferedDocs() int 22 RAMBufferSizeMB() float64 23 Similarity() Similarity 24 Codec() Codec 25 MergePolicy() MergePolicy 26 indexingChain() IndexingChain 27 RAMPerThreadHardLimitMB() int 28 flushPolicy() FlushPolicy 29 InfoStream() util.InfoStream 30 indexerThreadPool() *DocumentsWriterPerThreadPool 31 UseCompoundFile() bool 32 } 33 34 type LiveIndexWriterConfigImpl struct { 35 analyzer analysis.Analyzer 36 37 maxBufferedDocs int 38 ramBufferSizeMB float64 39 maxBufferedDeleteTerms int 40 readerTermsIndexDivisor int 41 mergedSegmentWarmer IndexReaderWarmer 42 termIndexInterval int 43 // TODO: this should be private to the codec, not settable here 44 45 // controlling when commit points are deleted. 46 delPolicy IndexDeletionPolicy 47 48 // IndexCommit that IndexWriter is opened on. 49 commit IndexCommit 50 51 // OpenMode that IndexWriter is opened with. 52 openMode OpenMode 53 54 // Similarity to use when encoding norms. 55 similarity Similarity 56 57 // MergeScheduler to use for running merges. 58 mergeScheduler MergeScheduler 59 60 // Timeout when trying to obtain the write lock on init. 61 writeLockTimeout int64 62 63 // IndexingChain that determines how documents are indexed. 64 _indexingChain IndexingChain 65 66 // Codec used to write new segments. 67 codec Codec 68 69 // InfoStream for debugging messages. 70 infoStream util.InfoStream 71 72 // MergePolicy for selecting merges. 73 mergePolicy MergePolicy 74 75 // DocumentsWriterPerThreadPool to control how goroutines are 76 // allocated to DocumentsWriterPerThread. 77 _indexerThreadPool *DocumentsWriterPerThreadPool 78 79 // True if readers should be pooled. 80 readerPooling bool 81 82 // FlushPolicy to control when segments are flushed. 83 _flushPolicy FlushPolicy 84 85 // Sets the hard upper bound on RAM usage for a single segment, 86 // after which the segment is forced to flush. 87 perRoutineHardLimitMB int 88 89 // Version that IndexWriter should emulate. 90 matchVersion util.Version 91 92 // True is segment flushes should use compound file format 93 useCompoundFile bool // volatile 94 95 // True if merging should check integrity of segments before merge 96 checkIntegrityAtMerge bool // volatile 97 } 98 99 // used by IndexWriterConfig 100 func newLiveIndexWriterConfig(analyzer analysis.Analyzer, 101 matchVersion util.Version) *LiveIndexWriterConfigImpl { 102 103 assert(DefaultSimilarity != nil) 104 assert(DefaultCodec != nil) 105 return &LiveIndexWriterConfigImpl{ 106 analyzer: analyzer, 107 matchVersion: matchVersion, 108 ramBufferSizeMB: DEFAULT_RAM_BUFFER_SIZE_MB, 109 maxBufferedDocs: DEFAULT_MAX_BUFFERED_DOCS, 110 maxBufferedDeleteTerms: DEFAULT_MAX_BUFFERED_DELETE_TERMS, 111 readerTermsIndexDivisor: DEFAULT_READER_TERMS_INDEX_DIVISOR, 112 termIndexInterval: DEFAULT_TERM_INDEX_INTERVAL, // TODO: this should be private to the codec, not settable here 113 delPolicy: DEFAULT_DELETION_POLICY, 114 useCompoundFile: DEFAULT_USE_COMPOUND_FILE_SYSTEM, 115 openMode: OPEN_MODE_CREATE_OR_APPEND, 116 similarity: DefaultSimilarity(), 117 mergeScheduler: NewConcurrentMergeScheduler(), 118 writeLockTimeout: WRITE_LOCK_TIMEOUT, 119 _indexingChain: defaultIndexingChain, 120 codec: DefaultCodec(), 121 infoStream: util.DefaultInfoStream(), 122 mergePolicy: NewTieredMergePolicy(), 123 _flushPolicy: newFlushByRamOrCountsPolicy(), 124 readerPooling: DEFAULT_READER_POOLING, 125 _indexerThreadPool: NewDocumentsWriterPerThreadPool(DEFAULT_MAX_THREAD_STATES), 126 perRoutineHardLimitMB: DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, 127 checkIntegrityAtMerge: DEFAULT_CHECK_INTEGRITY_AT_MERGE, 128 } 129 } 130 131 // Creates a new config that handles the live IndexWriter settings. 132 // func newLiveIndexWriterConfigFrom(config *IndexWriterConfig) *LiveIndexWriterConfigImpl { 133 // return &LiveIndexWriterConfig{ 134 // maxBufferedDeleteTerms: config.maxBufferedDeleteTerms, 135 // maxBufferedDocs: config.maxBufferedDocs, 136 // mergedSegmentWarmer: config.mergedSegmentWarmer, 137 // ramBufferSizeMB: config.ramBufferSizeMB, 138 // readerTermsIndexDivisor: config.readerTermsIndexDivisor, 139 // termIndexInterval: config.termIndexInterval, 140 // matchVersion: config.matchVersion, 141 // analyzer: config.analyzer, 142 // delPolicy: config.delPolicy, 143 // commit: config.commit, 144 // openMode: config.openMode, 145 // similarity: config.similarity, 146 // mergeScheduler: config.mergeScheduler, 147 // writeLockTimeout: config.writeLockTimeout, 148 // indexingChain: config.indexingChain, 149 // codec: config.codec, 150 // infoStream: config.infoStream, 151 // mergePolicy: config.mergePolicy, 152 // indexerThreadPool: config.indexerThreadPool, 153 // readerPooling: config.readerPooling, 154 // flushPolicy: config.flushPolicy, 155 // perRoutineHardLimitMB: config.perRoutineHardLimitMB, 156 // useCompoundFile: config.useCompoundFile, 157 // } 158 // } 159 160 func (conf *LiveIndexWriterConfigImpl) TermIndexInterval() int { 161 return conf.termIndexInterval 162 } 163 164 // L358 165 /* 166 Determines the minimal number of documents required before the 167 buffered in-memory documents are flushed as a new Segment. Large 168 values generally give faster indexing. 169 170 When this is set, the writer will flush every maxBufferedDocs added 171 documents. Pass in DISABLE_AUTO_FLUSH to prevent triggering a flush 172 due to number of buffered documents. Note that if flushing by RAM 173 usage is also enabled, then the flush will be triggered by whichever 174 comes first. 175 176 Disabled by default (writer flushes by RAM usage). 177 178 Takes effect immediately, but only the next time a document is added, 179 updated or deleted. 180 */ 181 func (conf *LiveIndexWriterConfigImpl) SetMaxBufferedDocs(maxBufferedDocs int) *LiveIndexWriterConfigImpl { 182 assert2(maxBufferedDocs == DISABLE_AUTO_FLUSH || maxBufferedDocs >= 2, 183 "maxBufferedDocs must at least be 2 when enabled") 184 assert2(maxBufferedDocs != DISABLE_AUTO_FLUSH || conf.ramBufferSizeMB != DISABLE_AUTO_FLUSH, 185 "at least one of ramBufferSize and maxBufferedDocs must be enabled") 186 conf.maxBufferedDocs = maxBufferedDocs 187 return conf 188 } 189 190 /* Returns the number of buffered added documents that will trigger a flush if enabled. */ 191 func (conf *LiveIndexWriterConfigImpl) MaxBufferedDocs() int { 192 return conf.maxBufferedDocs 193 } 194 195 /* 196 Expert: MergePolicy is invoked whenver there are changes to the 197 segments in the index. Its role is to select which merges to do, if 198 any, and return a MergeSpecification describing the merges. It also 199 selects merges to do for forceMerge. 200 */ 201 func (conf *LiveIndexWriterConfigImpl) SetMergePolicy(mergePolicy MergePolicy) *LiveIndexWriterConfigImpl { 202 assert2(mergePolicy != nil, "mergePolicy must not be nil") 203 conf.mergePolicy = mergePolicy 204 return conf 205 } 206 207 func (conf *LiveIndexWriterConfigImpl) RAMBufferSizeMB() float64 { 208 return conf.ramBufferSizeMB 209 } 210 211 /* 212 Sets the merged segment warmer. 213 214 Take effect on the next merge. 215 */ 216 func (conf *LiveIndexWriterConfigImpl) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *LiveIndexWriterConfigImpl { 217 conf.mergedSegmentWarmer = mergeSegmentWarmer 218 return conf 219 } 220 221 /* 222 Sets the termsIndeDivisor passed to any readers that IndexWriter 223 opens, for example when applying deletes or creating a near-real-time 224 reader in OpenDirectoryReader(). If you pass -1, the terms index 225 won't be loaded by the readers. This is only useful in advanced 226 siguations when you will only .Next() through all terms; attempts to 227 seek will hit an error. 228 229 takes effect immediately, but only applies to readers opened after 230 this call 231 232 NOTE: divisor settings > 1 do not apply to all PostingsFormat 233 implementation, including the default one in this release. It only 234 makes sense for terms indexes that can efficiently re-sample terms at 235 load time. 236 */ 237 func (conf *LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor(divisor int) *LiveIndexWriterConfigImpl { 238 assert2(divisor > 0 || divisor == -1, fmt.Sprintf( 239 "divisor must be >= 1, or -1 (got %v)", divisor)) 240 conf.readerTermsIndexDivisor = divisor 241 return conf 242 } 243 244 func (conf *LiveIndexWriterConfigImpl) Similarity() Similarity { 245 return conf.similarity 246 } 247 248 /* Returns the current Codec. */ 249 func (conf *LiveIndexWriterConfigImpl) Codec() Codec { 250 return conf.codec 251 } 252 253 // L477 254 /* Returns the current MergePolicy in use by this writer. */ 255 func (conf *LiveIndexWriterConfigImpl) MergePolicy() MergePolicy { 256 return conf.mergePolicy 257 } 258 259 /* Returns the configured DocumentsWriterPerThreadPool instance. */ 260 func (conf *LiveIndexWriterConfigImpl) indexerThreadPool() *DocumentsWriterPerThreadPool { 261 return conf._indexerThreadPool 262 } 263 264 func (conf *LiveIndexWriterConfigImpl) indexingChain() IndexingChain { 265 return conf._indexingChain 266 } 267 268 func (conf *LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB() int { 269 return conf.perRoutineHardLimitMB 270 } 271 272 func (conf *LiveIndexWriterConfigImpl) flushPolicy() FlushPolicy { 273 return conf._flushPolicy 274 } 275 276 /* Returns InfoStream used for debugging. */ 277 func (conf *LiveIndexWriterConfigImpl) InfoStream() util.InfoStream { 278 return conf.infoStream 279 } 280 281 /* 282 Sets if the IndexWriter should pack newly written segments in a 283 compound file. Default is true. 284 285 Use false for batch indexing with very large ram buffer settings. 286 287 Note: To control compound file usage during segment merges see 288 SetNoCFSRatio() and SetMaxCFSSegmentSizeMB(). This setting only 289 applies to newly created segment. 290 */ 291 func (conf *LiveIndexWriterConfigImpl) SetUseCompoundFile(useCompoundFile bool) *LiveIndexWriterConfigImpl { 292 conf.useCompoundFile = useCompoundFile 293 return conf 294 } 295 296 func (conf *LiveIndexWriterConfigImpl) UseCompoundFile() bool { 297 return conf.useCompoundFile 298 } 299 300 func (conf *LiveIndexWriterConfigImpl) String() string { 301 return fmt.Sprintf(`matchVersion=%v 302 analyzer=%v 303 ramBufferSizeMB=%v 304 maxBufferedDocs=%v 305 maxBufferedDeleteTerms=%v 306 mergedSegmentWarmer=%v 307 readerTermsIndexDivisor=%v 308 termIndexInterval=%v 309 delPolicy=%v 310 commit=%v 311 openMode=%v 312 similarity=%v 313 mergeScheduler=%v 314 default WRITE_LOCK_TIMEOUT=%v 315 writeLockTimeout=%v 316 codec=%v 317 infoStream=%v 318 mergePolicy=%v 319 indexerThreadPool=%v 320 readerPooling=%v 321 perThreadHardLimitMB=%v 322 useCompoundFile=%v 323 checkIntegrityAtMerge=%v 324 `, conf.matchVersion, reflect.TypeOf(conf.analyzer), 325 conf.ramBufferSizeMB, conf.maxBufferedDocs, 326 conf.maxBufferedDeleteTerms, reflect.TypeOf(conf.mergedSegmentWarmer), 327 conf.readerTermsIndexDivisor, conf.termIndexInterval, 328 reflect.TypeOf(conf.delPolicy), conf.commit, 329 conf.openMode, reflect.TypeOf(conf.similarity), 330 conf.mergeScheduler, WRITE_LOCK_TIMEOUT, 331 conf.writeLockTimeout, conf.codec, 332 reflect.TypeOf(conf.infoStream), conf.mergePolicy, 333 conf.indexerThreadPool, conf.readerPooling, 334 conf.perRoutineHardLimitMB, conf.useCompoundFile, 335 conf.checkIntegrityAtMerge) 336 }