github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene49/normsConsumer.go (about)

     1  package lucene49
     2  
     3  import (
     4  	"github.com/balzaczyy/golucene/core/codec"
     5  	. "github.com/balzaczyy/golucene/core/index/model"
     6  	"github.com/balzaczyy/golucene/core/store"
     7  	"github.com/balzaczyy/golucene/core/util"
     8  	"github.com/balzaczyy/golucene/core/util/packed"
     9  	"math"
    10  )
    11  
    12  // lucene49/Lucene49NormsConsumer.java
    13  
    14  const (
    15  	DELTA_COMPRESSED = 0
    16  	TABLE_COMPRESSED = 1
    17  	CONST_COMPRESSED = 2
    18  	UNCOMPRESSED     = 3
    19  )
    20  
    21  type NormsConsumer struct {
    22  	data, meta store.IndexOutput
    23  	maxDoc     int
    24  }
    25  
    26  func newLucene49NormsConsumer(state *SegmentWriteState,
    27  	dataCodec, dataExtension, metaCodec, metaExtension string) (nc *NormsConsumer, err error) {
    28  
    29  	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(1))
    30  	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(2))
    31  	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(4))
    32  
    33  	nc = &NormsConsumer{maxDoc: state.SegmentInfo.DocCount()}
    34  	var success = false
    35  	defer func() {
    36  		if !success {
    37  			util.CloseWhileSuppressingError(nc)
    38  		}
    39  	}()
    40  
    41  	dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension)
    42  	if nc.data, err = state.Directory.CreateOutput(dataName, state.Context); err != nil {
    43  		return nil, err
    44  	}
    45  	if err = codec.WriteHeader(nc.data, dataCodec, VERSION_CURRENT); err != nil {
    46  		return nil, err
    47  	}
    48  	metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension)
    49  	if nc.meta, err = state.Directory.CreateOutput(metaName, state.Context); err != nil {
    50  		return nil, err
    51  	}
    52  	if err = codec.WriteHeader(nc.meta, metaCodec, VERSION_CURRENT); err != nil {
    53  		return nil, err
    54  	}
    55  	success = true
    56  	return nc, nil
    57  }
    58  
    59  func (nc *NormsConsumer) AddNumericField(field *FieldInfo,
    60  	iter func() func() (interface{}, bool)) (err error) {
    61  
    62  	if err = nc.meta.WriteVInt(field.Number); err != nil {
    63  		return
    64  	}
    65  	minValue, maxValue := int64(math.MaxInt64), int64(math.MinInt64)
    66  	// TODO: more efficient?
    67  	uniqueValues := newNormMap()
    68  
    69  	count := int64(0)
    70  	next := iter()
    71  	for {
    72  		nv, ok := next()
    73  		if !ok {
    74  			break
    75  		}
    76  		assert2(nv != nil, "illegal norms data for field %v, got null for value: %v", field.Name, count)
    77  		v := nv.(int64)
    78  
    79  		if v < minValue {
    80  			minValue = v
    81  		}
    82  		if v > maxValue {
    83  			maxValue = v
    84  		}
    85  
    86  		if uniqueValues != nil && uniqueValues.add(v) && uniqueValues.size > 256 {
    87  			uniqueValues = nil
    88  		}
    89  
    90  		count++
    91  	}
    92  	assert2(count == int64(nc.maxDoc),
    93  		"illegal norms data for field %v, expected %v values, got %v",
    94  		field.Name, nc.maxDoc, count)
    95  
    96  	if uniqueValues != nil && uniqueValues.size == 1 {
    97  		// 0 bpv
    98  		if err = nc.meta.WriteByte(CONST_COMPRESSED); err != nil {
    99  			return
   100  		}
   101  		if err = nc.meta.WriteLong(minValue); err != nil {
   102  			return
   103  		}
   104  	} else if uniqueValues != nil {
   105  		// small number of unique values; this is the typical case:
   106  		// we only use bpv=1,2,4,8
   107  		format := packed.PackedFormat(packed.PACKED_SINGLE_BLOCK)
   108  		bitsPerValue := packed.BitsRequired(int64(uniqueValues.size) - 1)
   109  		if bitsPerValue == 3 {
   110  			bitsPerValue = 4
   111  		} else if bitsPerValue > 4 {
   112  			bitsPerValue = 8
   113  		}
   114  
   115  		if bitsPerValue == 8 && minValue >= 0 && maxValue <= 255 {
   116  			if err = store.Stream(nc.meta).WriteByte(UNCOMPRESSED). // uncompressed []byte
   117  										WriteLong(nc.data.FilePointer()).
   118  										Close(); err != nil {
   119  				return err
   120  			}
   121  			next = iter()
   122  			for {
   123  				nv, ok := next()
   124  				if !ok {
   125  					break
   126  				}
   127  				n := byte(0)
   128  				if nv != nil {
   129  					n = byte(nv.(int64))
   130  				}
   131  				if err = nc.data.WriteByte(byte(n)); err != nil {
   132  					return err
   133  				}
   134  			}
   135  		} else {
   136  			if err = store.Stream(nc.meta).WriteByte(TABLE_COMPRESSED). // table-compressed
   137  											WriteLong(nc.data.FilePointer()).
   138  											Close(); err != nil {
   139  				return err
   140  			}
   141  			if err = nc.data.WriteVInt(packed.VERSION_CURRENT); err != nil {
   142  				return err
   143  			}
   144  
   145  			decode := uniqueValues.decodeTable()
   146  			// upgrade to power of two sized array
   147  			size := 1 << uint(bitsPerValue)
   148  			if err = nc.data.WriteVInt(int32(size)); err != nil {
   149  				return err
   150  			}
   151  			for _, v := range decode {
   152  				if err = nc.data.WriteLong(v); err != nil {
   153  					return err
   154  				}
   155  			}
   156  			for i := len(decode); i < size; i++ {
   157  				if err = nc.data.WriteLong(0); err != nil {
   158  					return err
   159  				}
   160  			}
   161  
   162  			if err = store.Stream(nc.data).WriteVInt(int32(format.Id())).
   163  				WriteVInt(int32(bitsPerValue)).
   164  				Close(); err != nil {
   165  				return err
   166  			}
   167  
   168  			writer := packed.WriterNoHeader(nc.data, format, nc.maxDoc, bitsPerValue, packed.DEFAULT_BUFFER_SIZE)
   169  			next = iter()
   170  			for {
   171  				nv, ok := next()
   172  				if !ok {
   173  					break
   174  				}
   175  				if err = writer.Add(int64(uniqueValues.ord(nv.(int64)))); err != nil {
   176  					return err
   177  				}
   178  			}
   179  			if err = writer.Finish(); err != nil {
   180  				return err
   181  			}
   182  		}
   183  	} else {
   184  		panic("not implemented yet")
   185  	}
   186  	return nil
   187  }
   188  
   189  type Longs []int64
   190  
   191  func (a Longs) Len() int           { return len(a) }
   192  func (a Longs) Less(i, j int) bool { return a[i] < a[j] }
   193  func (a Longs) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   194  
   195  func (nc *NormsConsumer) Close() (err error) {
   196  	var success = false
   197  	defer func() {
   198  		if success {
   199  			err = util.Close(nc.data, nc.meta)
   200  		} else {
   201  			util.CloseWhileSuppressingError(nc.data, nc.meta)
   202  		}
   203  	}()
   204  
   205  	if nc.meta != nil {
   206  		if err = nc.meta.WriteVInt(-1); err != nil { // write EOF marker
   207  			return
   208  		}
   209  		if err = codec.WriteFooter(nc.meta); err != nil { // write checksum
   210  			return
   211  		}
   212  	}
   213  	if nc.data != nil {
   214  		if err = codec.WriteFooter(nc.data); err != nil { // write checksum
   215  			return
   216  		}
   217  	}
   218  	success = true
   219  	return nil
   220  }
   221  
   222  /*
   223  Specialized deduplication of long-ord for norms: 99.99999% of the
   224  time this will be a single-byte range.
   225  */
   226  type NormMap struct {
   227  	// we use int16: at most we will add 257 values to this map before its rejected as too big above.
   228  	singleByteRange []int16
   229  	other           map[int64]int16
   230  	size            int
   231  }
   232  
   233  func newNormMap() *NormMap {
   234  	ans := &NormMap{
   235  		singleByteRange: make([]int16, 256),
   236  		other:           make(map[int64]int16),
   237  	}
   238  	for i, _ := range ans.singleByteRange {
   239  		ans.singleByteRange[i] = -1
   240  	}
   241  	return ans
   242  }
   243  
   244  /* Adds an item to the mapping. Returns true if actually added. */
   245  func (m *NormMap) add(l int64) bool {
   246  	assert(m.size <= 256) // once we add > 256 values, we nullify the map in addNumericField and don't use this strategy
   247  	if l >= math.MinInt8 && l <= math.MaxInt8 {
   248  		index := int(l + 128)
   249  		if previous := m.singleByteRange[index]; previous < 0 {
   250  			m.singleByteRange[index] = int16(m.size)
   251  			m.size++
   252  			return true
   253  		}
   254  		return false
   255  	}
   256  	if _, ok := m.other[l]; !ok {
   257  		m.other[l] = int16(m.size)
   258  		m.size++
   259  		return true
   260  	}
   261  	return false
   262  }
   263  
   264  /* Gets the ordinal for a previously added item. */
   265  func (m *NormMap) ord(l int64) int {
   266  	panic("niy")
   267  }
   268  
   269  /* Retrieves the ordinal table for previously added items. */
   270  func (m *NormMap) decodeTable() []int64 {
   271  	panic("niy")
   272  }