github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene49/normsProducer.go (about)

     1  package lucene49
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/balzaczyy/golucene/core/codec"
     7  	. "github.com/balzaczyy/golucene/core/codec/spi"
     8  	. "github.com/balzaczyy/golucene/core/index/model"
     9  	"github.com/balzaczyy/golucene/core/store"
    10  	"github.com/balzaczyy/golucene/core/util"
    11  	"github.com/balzaczyy/golucene/core/util/packed"
    12  	"reflect"
    13  	"sync"
    14  	"sync/atomic"
    15  )
    16  
    17  // lucene49/Lucene49NormsProduer.java
    18  
    19  type NormsEntry struct {
    20  	format byte
    21  	offset int64
    22  }
    23  
    24  type NormsProducer struct {
    25  	sync.Locker
    26  
    27  	norms   map[int]*NormsEntry
    28  	data    store.IndexInput
    29  	version int32
    30  
    31  	instances map[int]NumericDocValues
    32  
    33  	maxDoc       int
    34  	ramBytesUsed int64 // atomic
    35  }
    36  
    37  func newLucene49NormsProducer(state SegmentReadState,
    38  	dataCodec, dataExtension, metaCodec, metaExtension string) (np *NormsProducer, err error) {
    39  
    40  	np = &NormsProducer{
    41  		Locker:       new(sync.Mutex),
    42  		norms:        make(map[int]*NormsEntry),
    43  		instances:    make(map[int]NumericDocValues),
    44  		maxDoc:       state.SegmentInfo.DocCount(),
    45  		ramBytesUsed: util.ShallowSizeOfInstance(reflect.TypeOf(np)),
    46  	}
    47  	metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension)
    48  	// read in the entries from the metadta file.
    49  	var in store.ChecksumIndexInput
    50  	if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil {
    51  		return nil, err
    52  	}
    53  
    54  	if err = func() error {
    55  		var success = false
    56  		defer func() {
    57  			if success {
    58  				err = util.Close(in)
    59  			} else {
    60  				util.CloseWhileSuppressingError(in)
    61  			}
    62  		}()
    63  
    64  		if np.version, err = codec.CheckHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); err != nil {
    65  			return err
    66  		}
    67  		if err = np.readFields(in, state.FieldInfos); err != nil {
    68  			return err
    69  		}
    70  		if _, err = codec.CheckFooter(in); err != nil {
    71  			return err
    72  		}
    73  		success = true
    74  		return nil
    75  	}(); err != nil {
    76  		return nil, err
    77  	}
    78  
    79  	dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension)
    80  	if np.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil {
    81  		return nil, err
    82  	}
    83  	var success = false
    84  	defer func() {
    85  		if !success {
    86  			util.CloseWhileSuppressingError(np.data)
    87  		}
    88  	}()
    89  
    90  	var version2 int32
    91  	if version2, err = codec.CheckHeader(np.data, dataCodec, VERSION_START, VERSION_CURRENT); err != nil {
    92  		return nil, err
    93  	}
    94  	if version2 != np.version {
    95  		return nil, errors.New("Format versions mismatch")
    96  	}
    97  
    98  	// NOTE: data file is too costly to verify checksum against all the
    99  	// bytes on open, but fo rnow we at least verify proper structure
   100  	// of the checksum footer: which looks for FOOTER_MATIC +
   101  	// algorithmID. This is cheap and can detect some forms of
   102  	// corruption such as file trucation.
   103  	if _, err = codec.RetrieveChecksum(np.data); err != nil {
   104  		return nil, err
   105  	}
   106  
   107  	success = true
   108  
   109  	return np, nil
   110  }
   111  
   112  func (np *NormsProducer) readFields(meta store.IndexInput, infos FieldInfos) (err error) {
   113  	var fieldNumber int32
   114  	if fieldNumber, err = meta.ReadVInt(); err != nil {
   115  		return err
   116  	}
   117  	for fieldNumber != -1 {
   118  		info := infos.FieldInfoByNumber(int(fieldNumber))
   119  		if info == nil {
   120  			return errors.New(fmt.Sprintf("Invalid field number: %v (resource=%v)", fieldNumber, meta))
   121  		} else if !info.HasNorms() {
   122  			return errors.New(fmt.Sprintf("Invalid field: %v (resource=%v)", info.Name, meta))
   123  		}
   124  		var format byte
   125  		if format, err = meta.ReadByte(); err != nil {
   126  			return err
   127  		}
   128  		var offset int64
   129  		if offset, err = meta.ReadLong(); err != nil {
   130  			return err
   131  		}
   132  		entry := &NormsEntry{
   133  			format: format,
   134  			offset: offset,
   135  		}
   136  		if format > UNCOMPRESSED {
   137  			return errors.New(fmt.Sprintf("Unknown format: %v, input=%v", format, meta))
   138  		}
   139  		np.norms[int(fieldNumber)] = entry
   140  		if fieldNumber, err = meta.ReadVInt(); err != nil {
   141  			return err
   142  		}
   143  	}
   144  	return nil
   145  }
   146  
   147  func (np *NormsProducer) Numeric(field *FieldInfo) (NumericDocValues, error) {
   148  	np.Lock()
   149  	defer np.Unlock()
   150  
   151  	instance, ok := np.instances[int(field.Number)]
   152  	if !ok {
   153  		var err error
   154  		if instance, err = np.loadNorms(field); err != nil {
   155  			return nil, err
   156  		}
   157  		np.instances[int(field.Number)] = instance
   158  	}
   159  	return instance, nil
   160  }
   161  
   162  func (np *NormsProducer) loadNorms(field *FieldInfo) (NumericDocValues, error) {
   163  	entry, ok := np.norms[int(field.Number)]
   164  	assert(ok)
   165  	switch entry.format {
   166  	case CONST_COMPRESSED:
   167  		return func(int) int64 { return entry.offset }, nil
   168  	case UNCOMPRESSED:
   169  		panic("not implemented yet")
   170  	case DELTA_COMPRESSED:
   171  		panic("not implemented yet")
   172  	case TABLE_COMPRESSED:
   173  		var err error
   174  		if err = np.data.Seek(entry.offset); err == nil {
   175  			var packedVersion int32
   176  			if packedVersion, err = np.data.ReadVInt(); err == nil {
   177  				var size int
   178  				if size, err = int32ToInt(np.data.ReadVInt()); err == nil {
   179  					if size > 256 {
   180  						return nil, errors.New(fmt.Sprintf(
   181  							"TABLE_COMPRESSED cannot have more than 256 distinct values, input=%v",
   182  							np.data))
   183  					}
   184  					decode := make([]int64, size)
   185  					for i, _ := range decode {
   186  						if decode[i], err = np.data.ReadLong(); err != nil {
   187  							break
   188  						}
   189  					}
   190  					if err == nil {
   191  						var formatId int
   192  						if formatId, err = int32ToInt(np.data.ReadVInt()); err == nil {
   193  							var bitsPerValue int32
   194  							if bitsPerValue, err = np.data.ReadVInt(); err == nil {
   195  								var ordsReader packed.PackedIntsReader
   196  								if ordsReader, err = packed.ReaderNoHeader(np.data,
   197  									packed.PackedFormat(formatId), packedVersion,
   198  									int32(np.maxDoc), uint32(bitsPerValue)); err == nil {
   199  
   200  									atomic.AddInt64(&np.ramBytesUsed, util.SizeOf(decode)+ordsReader.RamBytesUsed())
   201  									return func(docId int) int64 {
   202  										return decode[int(ordsReader.Get(docId))]
   203  									}, nil
   204  								}
   205  							}
   206  						}
   207  					}
   208  				}
   209  			}
   210  		}
   211  		if err != nil {
   212  			return nil, err
   213  		}
   214  	default:
   215  		panic("assert fail")
   216  	}
   217  	panic("should not be here")
   218  }
   219  
   220  func int32ToInt(n int32, err error) (int, error) {
   221  	return int(n), err
   222  }
   223  
   224  func (np *NormsProducer) Binary(field *FieldInfo) (BinaryDocValues, error) {
   225  	panic("not supported")
   226  }
   227  
   228  func (np *NormsProducer) Sorted(field *FieldInfo) (SortedDocValues, error) {
   229  	panic("not supported")
   230  }
   231  
   232  func (np *NormsProducer) SortedSet(field *FieldInfo) (SortedSetDocValues, error) {
   233  	panic("not supported")
   234  }
   235  
   236  func (np *NormsProducer) Close() error {
   237  	return np.data.Close()
   238  }