github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/compressing/storedFieldsFormat.go (about)

     1  package compressing
     2  
     3  import (
     4  	"fmt"
     5  	. "github.com/balzaczyy/golucene/core/codec/spi"
     6  	"github.com/balzaczyy/golucene/core/index/model"
     7  	"github.com/balzaczyy/golucene/core/store"
     8  )
     9  
    10  // compressing/CompressingStoredFieldsFormat.java
    11  
    12  /*
    13  A StoredFieldsFormat that is very similar to Lucene40StoredFieldsFormat
    14  but compresses documents in chunks in order to improve the
    15  compression ratio.
    16  
    17  For a chunk size of chunkSize bytes, this StoredFieldsFormat does not
    18  support documents larger than (2^31 - chunkSize) bytes. In case this
    19  is a problem, you should use another format, such as Lucene40StoredFieldsFormat.
    20  
    21  For optimal performance, you should use a MergePolicy that returns
    22  segments that have the biggest byte size first.
    23  */
    24  type CompressingStoredFieldsFormat struct {
    25  	formatName      string
    26  	segmentSuffix   string
    27  	compressionMode CompressionMode
    28  	chunkSize       int
    29  }
    30  
    31  /*
    32  Create a new CompressingStoredFieldsFormat
    33  
    34  formatName is the name of the format. This name will be used in the
    35  file formats to perform CheckHeader().
    36  
    37  segmentSuffix is the segment suffix. This suffix is added to the result
    38   file name only if it's not te empty string.
    39  
    40   The compressionMode parameter allows you to choose between compresison
    41   algorithms that have various compression and decompression speeds so
    42   that you can pick the one that best fits your indexing and searching
    43   throughput. You should never instantiate two CoompressingStoredFieldsFormats
    44   that have the same name but different CompressionModes.
    45  
    46   chunkSize is the minimum byte size of a chunk of documents. A value
    47   of 1 can make sense if there is redundancy across fields. In that
    48   case, both performance and compression ratio should be better than
    49   with Lucene40StoredFieldsFormat with compressed fields.
    50  
    51   Higher values of chunkSize should improve the compresison ratio but
    52   will require more memery at indexing time and might make document
    53   loading a little slower (depending on the size of our OS cache compared
    54   to the size of your index).
    55  */
    56  func NewCompressingStoredFieldsFormat(formatName, segmentSuffix string,
    57  	compressionMode CompressionMode, chunkSize int) *CompressingStoredFieldsFormat {
    58  	assert2(chunkSize >= 1, "chunkSize must be >= 1")
    59  	return &CompressingStoredFieldsFormat{
    60  		formatName:      formatName,
    61  		segmentSuffix:   segmentSuffix,
    62  		compressionMode: compressionMode,
    63  		chunkSize:       chunkSize,
    64  	}
    65  }
    66  
    67  func (format *CompressingStoredFieldsFormat) FieldsReader(d store.Directory,
    68  	si *model.SegmentInfo, fn model.FieldInfos, ctx store.IOContext) (r StoredFieldsReader, err error) {
    69  
    70  	return newCompressingStoredFieldsReader(d, si, format.segmentSuffix, fn,
    71  		ctx, format.formatName, format.compressionMode)
    72  }
    73  
    74  func (format *CompressingStoredFieldsFormat) FieldsWriter(d store.Directory,
    75  	si *model.SegmentInfo, ctx store.IOContext) (w StoredFieldsWriter, err error) {
    76  
    77  	return NewCompressingStoredFieldsWriter(d, si, format.segmentSuffix, ctx,
    78  		format.formatName, format.compressionMode, format.chunkSize)
    79  }
    80  
    81  func (format *CompressingStoredFieldsFormat) String() string {
    82  	return fmt.Sprintf("CompressingStoredFieldsFormat(compressionMode=%v, chunkSize=%v)",
    83  		format.compressionMode, format.chunkSize)
    84  }