github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/compressing/storedFieldsFormat.go (about) 1 package compressing 2 3 import ( 4 "fmt" 5 . "github.com/balzaczyy/golucene/core/codec/spi" 6 "github.com/balzaczyy/golucene/core/index/model" 7 "github.com/balzaczyy/golucene/core/store" 8 ) 9 10 // compressing/CompressingStoredFieldsFormat.java 11 12 /* 13 A StoredFieldsFormat that is very similar to Lucene40StoredFieldsFormat 14 but compresses documents in chunks in order to improve the 15 compression ratio. 16 17 For a chunk size of chunkSize bytes, this StoredFieldsFormat does not 18 support documents larger than (2^31 - chunkSize) bytes. In case this 19 is a problem, you should use another format, such as Lucene40StoredFieldsFormat. 20 21 For optimal performance, you should use a MergePolicy that returns 22 segments that have the biggest byte size first. 23 */ 24 type CompressingStoredFieldsFormat struct { 25 formatName string 26 segmentSuffix string 27 compressionMode CompressionMode 28 chunkSize int 29 } 30 31 /* 32 Create a new CompressingStoredFieldsFormat 33 34 formatName is the name of the format. This name will be used in the 35 file formats to perform CheckHeader(). 36 37 segmentSuffix is the segment suffix. This suffix is added to the result 38 file name only if it's not te empty string. 39 40 The compressionMode parameter allows you to choose between compresison 41 algorithms that have various compression and decompression speeds so 42 that you can pick the one that best fits your indexing and searching 43 throughput. You should never instantiate two CoompressingStoredFieldsFormats 44 that have the same name but different CompressionModes. 45 46 chunkSize is the minimum byte size of a chunk of documents. A value 47 of 1 can make sense if there is redundancy across fields. In that 48 case, both performance and compression ratio should be better than 49 with Lucene40StoredFieldsFormat with compressed fields. 50 51 Higher values of chunkSize should improve the compresison ratio but 52 will require more memery at indexing time and might make document 53 loading a little slower (depending on the size of our OS cache compared 54 to the size of your index). 55 */ 56 func NewCompressingStoredFieldsFormat(formatName, segmentSuffix string, 57 compressionMode CompressionMode, chunkSize int) *CompressingStoredFieldsFormat { 58 assert2(chunkSize >= 1, "chunkSize must be >= 1") 59 return &CompressingStoredFieldsFormat{ 60 formatName: formatName, 61 segmentSuffix: segmentSuffix, 62 compressionMode: compressionMode, 63 chunkSize: chunkSize, 64 } 65 } 66 67 func (format *CompressingStoredFieldsFormat) FieldsReader(d store.Directory, 68 si *model.SegmentInfo, fn model.FieldInfos, ctx store.IOContext) (r StoredFieldsReader, err error) { 69 70 return newCompressingStoredFieldsReader(d, si, format.segmentSuffix, fn, 71 ctx, format.formatName, format.compressionMode) 72 } 73 74 func (format *CompressingStoredFieldsFormat) FieldsWriter(d store.Directory, 75 si *model.SegmentInfo, ctx store.IOContext) (w StoredFieldsWriter, err error) { 76 77 return NewCompressingStoredFieldsWriter(d, si, format.segmentSuffix, ctx, 78 format.formatName, format.compressionMode, format.chunkSize) 79 } 80 81 func (format *CompressingStoredFieldsFormat) String() string { 82 return fmt.Sprintf("CompressingStoredFieldsFormat(compressionMode=%v, chunkSize=%v)", 83 format.compressionMode, format.chunkSize) 84 }