github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/store/nrt.go (about)

     1  package store
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"sync"
     7  )
     8  
     9  const NRT_VERBOSE = false
    10  
    11  // TODO
    12  // - let subclass dictate policy...?
    13  // - rename to MergeCachingDir? NRTCachingDIR
    14  
    15  /*
    16  Wraps a RAMDirectory around any provided delegate directory, to be
    17  used during NRT search.
    18  
    19  This class is likely only useful in a near-real-time context, where
    20  indexing rate is lowish but reopen rate is highish, resulting in many
    21  tiny files being written. This directory keeps such segments (as well
    22  as the segments produced by merging them, as long as they are small
    23  enough), in RAM.
    24  
    25  This is safe to use: when you app calls IndexWriter.Commit(), all
    26  cached files will be flushed from the cached and sync'd.
    27  
    28  Here's a simple example usage:
    29  
    30  	fsDir, _ := OpenFSDirectory("/path/to/index")
    31  	cachedFSDir := NewNRTCachingDirectory(fsDir, 5.0, 60.0)
    32  	conf := NewIndexWriterConfig(VERSION_49, analyzer)
    33  	writer := NewIndexWriter(cachedFSDir, conf)
    34  
    35  This will cache all newly flushed segments, all merged whose expected
    36  segment size is <= 5 MB, unless the net cached bytes exceeds 60 MB at
    37  which point all writes will not be cached (until the net bytes falls
    38  below 60 MB).
    39  */
    40  type NRTCachingDirectory struct {
    41  	Directory
    42  	sync.Locker
    43  
    44  	cache             *RAMDirectory
    45  	maxMergeSizeBytes int64
    46  	maxCachedBytes    int64
    47  
    48  	doCacheWrite func(name string, context IOContext) bool
    49  	uncacheLock  sync.Locker
    50  }
    51  
    52  /*
    53  We will cache a newly created output if 1) it's a flush or a merge
    54  and the estimated size of the merged segment is <= maxMergedSizeMB,
    55  and 2) the total cached bytes is <= maxCachedMB.
    56  */
    57  func NewNRTCachingDirectory(delegate Directory, maxMergeSizeMB, maxCachedMB float64) (nrt *NRTCachingDirectory) {
    58  	nrt = &NRTCachingDirectory{
    59  		Directory:         delegate,
    60  		Locker:            &sync.Mutex{},
    61  		cache:             NewRAMDirectory(),
    62  		maxMergeSizeBytes: int64(maxMergeSizeMB * 1024 * 1024),
    63  		maxCachedBytes:    int64(maxCachedMB * 1024 * 1024),
    64  		uncacheLock:       &sync.Mutex{},
    65  	}
    66  	// Subclass can override this to customize logic; return true if this
    67  	// file should be written to the RAMDirectory.
    68  	nrt.doCacheWrite = func(name string, context IOContext) bool {
    69  		var bytes int64
    70  		if context.MergeInfo != nil {
    71  			bytes = context.MergeInfo.EstimatedMergeBytes
    72  		} else if context.FlushInfo != nil {
    73  			bytes = context.FlushInfo.EstimatedSegmentSize
    74  		}
    75  		if NRT_VERBOSE {
    76  			log.Printf("CACHE check merge=%v flush=%v size=%v",
    77  				context.MergeInfo, context.FlushInfo, bytes)
    78  		}
    79  		return name != "segments.gen" &&
    80  			bytes <= nrt.maxMergeSizeBytes &&
    81  			bytes+nrt.cache.RamBytesUsed() <= nrt.maxCachedBytes
    82  	}
    83  	return
    84  }
    85  
    86  func (nrt *NRTCachingDirectory) String() string {
    87  	return fmt.Sprintf("NRTCachingDirectory(%v; maxCacheMB=%.2f maxMergeSizeMB=%.2f)",
    88  		nrt.Directory, float32(nrt.maxCachedBytes/1024/1024),
    89  		float32(nrt.maxMergeSizeBytes/1024/1024))
    90  }
    91  
    92  func (nrt *NRTCachingDirectory) ListAll() (all []string, err error) {
    93  	nrt.Lock() // synchronized
    94  	defer nrt.Unlock()
    95  	files := make(map[string]bool)
    96  	all, err = nrt.cache.ListAll()
    97  	if err != nil {
    98  		return
    99  	}
   100  	for _, f := range all {
   101  		files[f] = true
   102  	}
   103  	// LUCENE-1468: our NRTCachingDirectory will actually exist (RAMDir!),
   104  	// but if the underlying delegate is an FSDir and mkdirs() has not
   105  	// yet been called, because so far everything is a cached write,
   106  	// in this case, we don't want to throw a NoSuchDirectoryException
   107  	all, err = nrt.Directory.ListAll()
   108  	if err != nil {
   109  		if _, ok := err.(*NoSuchDirectoryError); ok {
   110  			// however if there are no cached files, then the directory truly
   111  			// does not "exist"
   112  			if len(files) == 0 {
   113  				return
   114  			}
   115  		} else {
   116  			return
   117  		}
   118  	}
   119  	for _, f := range all {
   120  		// Cannot do this -- if Lucene calls createOutput but files
   121  		// already exists then this falsely trips:
   122  		// assert2(!files[f], fmt.Sprintf("file '%v' is in both dirs", f)
   123  		files[f] = true
   124  	}
   125  	all = make([]string, 0, len(files))
   126  	for f, _ := range files {
   127  		all = append(all, f)
   128  	}
   129  	return
   130  }
   131  
   132  // Returns how many bytes are being used by the RAMDirectory cache
   133  // func (nrt *NRTCachingDirectory) sizeInBytes() int64 {
   134  // 	return nrt.cache.sizeInBytes
   135  // }
   136  
   137  func (nrt *NRTCachingDirectory) FileExists(name string) bool {
   138  	nrt.Lock() // synchronized
   139  	defer nrt.Unlock()
   140  	return nrt._fileExists(name)
   141  }
   142  
   143  func (nrt *NRTCachingDirectory) _fileExists(name string) bool {
   144  	return nrt.cache.FileExists(name) || nrt.Directory.FileExists(name)
   145  }
   146  
   147  func (nrt *NRTCachingDirectory) DeleteFile(name string) error {
   148  	assert(nrt.Directory != nil)
   149  	nrt.Lock() // synchronized
   150  	defer nrt.Unlock()
   151  
   152  	if NRT_VERBOSE {
   153  		log.Printf("nrtdir.deleteFile name=%v", name)
   154  	}
   155  	if nrt.cache.FileExists(name) {
   156  		return nrt.cache.DeleteFile(name)
   157  	} else {
   158  		return nrt.Directory.DeleteFile(name)
   159  	}
   160  }
   161  
   162  func assert2(ok bool, msg string, args ...interface{}) {
   163  	if !ok {
   164  		panic(fmt.Sprintf(msg, args...))
   165  	}
   166  }
   167  
   168  func (nrt *NRTCachingDirectory) FileLength(name string) (length int64, err error) {
   169  	nrt.Lock() // synchronized
   170  	defer nrt.Unlock()
   171  	if nrt.cache.FileExists(name) {
   172  		return nrt.cache.FileLength(name)
   173  	} else {
   174  		return nrt.Directory.FileLength(name)
   175  	}
   176  }
   177  
   178  func (nrt *NRTCachingDirectory) CreateOutput(name string, context IOContext) (out IndexOutput, err error) {
   179  	if NRT_VERBOSE {
   180  		log.Printf("nrtdir.createOutput name=%v", name)
   181  	}
   182  	if nrt.doCacheWrite(name, context) {
   183  		if NRT_VERBOSE {
   184  			log.Println("  to cache")
   185  		}
   186  		nrt.Directory.DeleteFile(name) // ignore IO error
   187  		return nrt.cache.CreateOutput(name, context)
   188  	}
   189  	nrt.cache.DeleteFile(name) // ignore IO error
   190  	return nrt.Directory.CreateOutput(name, context)
   191  }
   192  
   193  func (nrt *NRTCachingDirectory) Sync(fileNames []string) (err error) {
   194  	if NRT_VERBOSE {
   195  		log.Printf("nrtdir.sync files=%v", fileNames)
   196  	}
   197  	for _, fileName := range fileNames {
   198  		err = nrt.unCache(fileName)
   199  		if err != nil {
   200  			return
   201  		}
   202  	}
   203  	return nrt.Directory.Sync(fileNames)
   204  }
   205  
   206  func (nrt *NRTCachingDirectory) OpenInput(name string, context IOContext) (in IndexInput, err error) {
   207  	nrt.Lock() // synchronized
   208  	defer nrt.Unlock()
   209  	if NRT_VERBOSE {
   210  		log.Printf("nrtdir.openInput name=%v", name)
   211  	}
   212  	if nrt.cache.FileExists(name) {
   213  		if NRT_VERBOSE {
   214  			log.Println("  from cache")
   215  		}
   216  		return nrt.cache.OpenInput(name, context)
   217  	}
   218  	return nrt.Directory.OpenInput(name, context)
   219  }
   220  
   221  // func (nrt *NRTCachingDirectory) CreateSlicer(name string, context IOContext) (slicer IndexInputSlicer, err error) {
   222  // 	nrt.EnsureOpen()
   223  // 	if NRT_VERBOSE {
   224  // 		log.Println("nrtdir.openInput name=%v", name)
   225  // 	}
   226  // 	if nrt.cache.FileExists(name) {
   227  // 		if NRT_VERBOSE {
   228  // 			log.Println("  from cache")
   229  // 		}
   230  // 		return nrt.cache.CreateSlicer(name, context)
   231  // 	}
   232  // 	return nrt.Directory.CreateSlicer(name, context)
   233  // }
   234  
   235  // Close this directory, which flushes any cached files to the
   236  // delegate and then closes the delegate.
   237  func (nrt *NRTCachingDirectory) Close() error {
   238  	// NOTE: technically we shouldn't have to do this, ie,
   239  	// IndexWriter should have sync'd all files, but we do
   240  	// it for defensive reasons... or in case the app is
   241  	// doing something custom (creating outputs directly w/o
   242  	// using IndexWriter):
   243  	all, err := nrt.cache.ListAll()
   244  	if err != nil {
   245  		return err
   246  	}
   247  	for _, fileName := range all {
   248  		nrt.unCache(fileName)
   249  	}
   250  	err = nrt.cache.Close()
   251  	if err != nil {
   252  		return err
   253  	}
   254  	return nrt.Directory.Close()
   255  }
   256  
   257  func (nrt *NRTCachingDirectory) unCache(fileName string) (err error) {
   258  	// Only let one goroutine uncache at a time; this only happens
   259  	// during commit() or close():
   260  	nrt.uncacheLock.Lock()
   261  	defer nrt.uncacheLock.Unlock()
   262  
   263  	log.Printf("nrtdir.unCache name=%v", fileName)
   264  	if !nrt.cache.FileExists(fileName) {
   265  		// Another goroutine beat us...
   266  		return
   267  	}
   268  	context := IO_CONTEXT_DEFAULT
   269  	var out IndexOutput
   270  	out, err = nrt.Directory.CreateOutput(fileName, context)
   271  	if err != nil {
   272  		return
   273  	}
   274  	defer out.Close()
   275  	var in IndexInput
   276  	in, err = nrt.cache.OpenInput(fileName, context)
   277  	if err != nil {
   278  		return
   279  	}
   280  	defer in.Close()
   281  	err = out.CopyBytes(in, in.Length())
   282  	if err != nil {
   283  		return
   284  	}
   285  
   286  	nrt.Lock() // Lock order: uncacheLock -> this
   287  	defer nrt.Unlock()
   288  	// Must sync here because other sync methods have
   289  	// if nrt.cache.FileExists(name) { ... } else { ... }
   290  	return nrt.cache.DeleteFile(fileName)
   291  }