github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene42/docValuesProducer.go (about)

     1  package lucene42
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/balzaczyy/golucene/core/codec"
     7  	. "github.com/balzaczyy/golucene/core/codec/spi"
     8  	. "github.com/balzaczyy/golucene/core/index/model"
     9  	"github.com/balzaczyy/golucene/core/store"
    10  	"github.com/balzaczyy/golucene/core/util"
    11  	"reflect"
    12  	"sync"
    13  	"sync/atomic"
    14  )
    15  
    16  const (
    17  	LUCENE42_DV_DATA_CODEC         = "Lucene42DocValuesData"
    18  	LUCENE42_DV_DATA_EXTENSION     = "dvd"
    19  	LUCENE42_DV_METADATA_CODEC     = "Lucene42DocValuesMetadata"
    20  	LUCENE42_DV_METADATA_EXTENSION = "dvm"
    21  
    22  	LUCENE42_DV_VERSION_START           = 0
    23  	LUCENE42_DV_VERSION_GCD_COMPRESSION = 1
    24  	LUCENE42_DV_VERSION_CHECKSUM        = 2
    25  	LUCENE42_DV_VERSION_CURRENT         = LUCENE42_DV_VERSION_CHECKSUM
    26  
    27  	LUCENE42_DV_NUMBER = 0
    28  	LUCENE42_DV_BYTES  = 1
    29  	LUCENE42_DV_FST    = 2
    30  
    31  	LUCENE42_DV_DELTA_COMPRESSED = 0
    32  	LUCENE42_DV_TABLE_COMPRESSED = 1
    33  	LUCENE42_DV_UNCOMPRESSED     = 2
    34  	LUCENE42_DV_GCD_COMPRESSED   = 3
    35  )
    36  
    37  type Lucene42DocValuesProducer struct {
    38  	lock sync.Mutex
    39  
    40  	numerics map[int]NumericEntry
    41  	binaries map[int]BinaryEntry
    42  	fsts     map[int]FSTEntry
    43  	data     store.IndexInput
    44  
    45  	numericInstances map[int]NumericDocValues
    46  
    47  	maxDoc       int
    48  	ramBytesUsed int64
    49  }
    50  
    51  func newLucene42DocValuesProducer(state SegmentReadState,
    52  	dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) {
    53  
    54  	fmt.Println("Initializing Lucene42DocValuesProducer...")
    55  	dvp = &Lucene42DocValuesProducer{
    56  		numericInstances: make(map[int]NumericDocValues),
    57  	}
    58  	dvp.maxDoc = state.SegmentInfo.DocCount()
    59  
    60  	metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension)
    61  	fmt.Println("Reading", metaName)
    62  	// read in the entries from the metadata file.
    63  	var in store.ChecksumIndexInput
    64  	if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil {
    65  		return nil, err
    66  	}
    67  	dvp.ramBytesUsed = util.ShallowSizeOfInstance(reflect.TypeOf(dvp))
    68  
    69  	var version int32
    70  	func() {
    71  		var success = false
    72  		defer func() {
    73  			if success {
    74  				err = util.Close(in)
    75  			} else {
    76  				util.CloseWhileSuppressingError(in)
    77  			}
    78  		}()
    79  
    80  		if version, err = codec.CheckHeader(in, metaCodec,
    81  			LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT); err != nil {
    82  			return
    83  		}
    84  
    85  		dvp.numerics = make(map[int]NumericEntry)
    86  		dvp.binaries = make(map[int]BinaryEntry)
    87  		dvp.fsts = make(map[int]FSTEntry)
    88  		if err = dvp.readFields(in, state.FieldInfos); err != nil {
    89  			return
    90  		}
    91  
    92  		if version >= LUCENE42_DV_VERSION_CHECKSUM {
    93  			_, err = codec.CheckFooter(in)
    94  		} else {
    95  			err = codec.CheckEOF(in)
    96  		}
    97  	}()
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	var success = false
   103  	defer func() {
   104  		if !success {
   105  			util.CloseWhileSuppressingError(dvp.data)
   106  		}
   107  	}()
   108  
   109  	dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension)
   110  	fmt.Println("Reading", dataName)
   111  	if dvp.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil {
   112  		return nil, err
   113  	}
   114  	var version2 int32
   115  	if version2, err = codec.CheckHeader(dvp.data, dataCodec,
   116  		LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT); err != nil {
   117  		return nil, err
   118  	}
   119  
   120  	if version != version2 {
   121  		return nil, errors.New("Format versions mismatch")
   122  	}
   123  
   124  	if version >= LUCENE42_DV_VERSION_CHECKSUM {
   125  		panic("niy")
   126  	}
   127  
   128  	success = true
   129  
   130  	return dvp, nil
   131  }
   132  
   133  /*
   134  Lucene42DocValuesProducer.java/4.5.1/L138
   135  */
   136  func (dvp *Lucene42DocValuesProducer) readFields(meta store.IndexInput,
   137  	infos FieldInfos) (err error) {
   138  
   139  	var fieldNumber int
   140  	var fieldType byte
   141  	fieldNumber, err = asInt(meta.ReadVInt())
   142  	for fieldNumber != -1 && err == nil {
   143  		if infos.FieldInfoByNumber(fieldNumber) == nil {
   144  			// tricker to validate more: because we re-use for norms,
   145  			// becaue we use multiple entries for "composite" types like
   146  			// sortedset, etc.
   147  			return errors.New(fmt.Sprintf(
   148  				"Invalid field number: %v (resource=%v)",
   149  				fieldNumber, meta))
   150  		}
   151  
   152  		fieldType, err = meta.ReadByte()
   153  		if err != nil {
   154  			return
   155  		}
   156  		switch fieldType {
   157  		case LUCENE42_DV_NUMBER:
   158  			entry := NumericEntry{}
   159  			entry.offset, err = meta.ReadLong()
   160  			if err != nil {
   161  				return
   162  			}
   163  			entry.format, err = meta.ReadByte()
   164  			if err != nil {
   165  				return
   166  			}
   167  			switch entry.format {
   168  			case LUCENE42_DV_DELTA_COMPRESSED:
   169  			case LUCENE42_DV_TABLE_COMPRESSED:
   170  			case LUCENE42_DV_GCD_COMPRESSED:
   171  			case LUCENE42_DV_UNCOMPRESSED:
   172  			default:
   173  				return errors.New(fmt.Sprintf("Unknown format: %v, input=%v", entry.format, meta))
   174  			}
   175  			if entry.format != LUCENE42_DV_UNCOMPRESSED {
   176  				entry.packedIntsVersion, err = asInt(meta.ReadVInt())
   177  				if err != nil {
   178  					return
   179  				}
   180  			}
   181  			fmt.Printf("Found entry [offset=%v, format=%v, packedIntsVersion=%v\n",
   182  				entry.offset, entry.format, entry.packedIntsVersion)
   183  			dvp.numerics[fieldNumber] = entry
   184  		case LUCENE42_DV_BYTES:
   185  			panic("not implemented yet")
   186  		case LUCENE42_DV_FST:
   187  			panic("not implemented yet")
   188  		default:
   189  			return errors.New(fmt.Sprintf("invalid entry type: %v, input=%v", fieldType, meta))
   190  		}
   191  		fieldNumber, err = asInt(meta.ReadVInt())
   192  	}
   193  	return
   194  }
   195  
   196  func asInt(n int32, err error) (n2 int, err2 error) {
   197  	return int(n), err
   198  }
   199  
   200  func (dvp *Lucene42DocValuesProducer) Numeric(field *FieldInfo) (v NumericDocValues, err error) {
   201  	dvp.lock.Lock()
   202  	defer dvp.lock.Unlock()
   203  
   204  	v, exists := dvp.numericInstances[int(field.Number)]
   205  	if !exists {
   206  		if v, err = dvp.loadNumeric(field); err == nil {
   207  			dvp.numericInstances[int(field.Number)] = v
   208  		}
   209  	}
   210  	return
   211  }
   212  
   213  func (dvp *Lucene42DocValuesProducer) loadNumeric(field *FieldInfo) (v NumericDocValues, err error) {
   214  	entry := dvp.numerics[int(field.Number)]
   215  	if err = dvp.data.Seek(entry.offset); err != nil {
   216  		return
   217  	}
   218  
   219  	switch entry.format {
   220  	case LUCENE42_DV_TABLE_COMPRESSED:
   221  		panic("not implemented yet")
   222  	case LUCENE42_DV_DELTA_COMPRESSED:
   223  		panic("not implemented yet")
   224  	case LUCENE42_DV_UNCOMPRESSED:
   225  		bytes := make([]byte, dvp.maxDoc)
   226  		if err = dvp.data.ReadBytes(bytes); err == nil {
   227  			atomic.AddInt64(&dvp.ramBytesUsed, util.SizeOf(bytes))
   228  			return func(docID int) int64 {
   229  				return int64(bytes[docID])
   230  			}, nil
   231  		}
   232  	case LUCENE42_DV_GCD_COMPRESSED:
   233  		panic("not implemented yet")
   234  	default:
   235  		panic("assert fail")
   236  	}
   237  	return
   238  }
   239  
   240  func (dvp *Lucene42DocValuesProducer) Binary(field *FieldInfo) (v BinaryDocValues, err error) {
   241  	panic("not implemented yet")
   242  	return nil, nil
   243  }
   244  
   245  func (dvp *Lucene42DocValuesProducer) Sorted(field *FieldInfo) (v SortedDocValues, err error) {
   246  	panic("not implemented yet")
   247  	return nil, nil
   248  }
   249  
   250  func (dvp *Lucene42DocValuesProducer) SortedSet(field *FieldInfo) (v SortedSetDocValues, err error) {
   251  	panic("not implemented yet")
   252  	return nil, nil
   253  }
   254  
   255  func (dvp *Lucene42DocValuesProducer) Close() error {
   256  	if dvp == nil {
   257  		return nil
   258  	}
   259  	return dvp.data.Close()
   260  }
   261  
   262  type NumericEntry struct {
   263  	offset            int64
   264  	format            byte
   265  	packedIntsVersion int
   266  }
   267  
   268  type BinaryEntry struct {
   269  	offset            int64
   270  	numBytes          int64
   271  	minLength         int
   272  	maxLength         int
   273  	packedIntsVersion int
   274  	blockSize         int
   275  }
   276  
   277  type FSTEntry struct {
   278  	offset  int64
   279  	numOrds int64
   280  }