github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/model/fieldInfos.go (about)

     1  package model
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  	"sync"
     7  )
     8  
     9  // Collection of FieldInfo(s) (accessible by number of by name)
    10  type FieldInfos struct {
    11  	HasFreq      bool
    12  	HasProx      bool
    13  	HasPayloads  bool
    14  	HasOffsets   bool
    15  	HasVectors   bool
    16  	HasNorms     bool
    17  	HasDocValues bool
    18  
    19  	byNumber map[int32]*FieldInfo
    20  	byName   map[string]*FieldInfo
    21  	Values   []*FieldInfo // sorted by ID
    22  }
    23  
    24  func NewFieldInfos(infos []*FieldInfo) FieldInfos {
    25  	self := FieldInfos{byNumber: make(map[int32]*FieldInfo), byName: make(map[string]*FieldInfo)}
    26  
    27  	numbers := make([]int32, 0)
    28  	for _, info := range infos {
    29  		assert2(info.Number >= 0, "illegal field number: %v for field %v", info.Number, info.Name)
    30  		if prev, ok := self.byNumber[info.Number]; ok {
    31  			panic(fmt.Sprintf("duplicate field numbers: %v and %v have: %v", prev.Name, info.Name, info.Number))
    32  		}
    33  		self.byNumber[info.Number] = info
    34  		numbers = append(numbers, info.Number)
    35  		if prev, ok := self.byName[info.Name]; ok {
    36  			panic(fmt.Sprintf("duplicate field names: %v and %v have: %v", prev.Number, info.Number, info.Name))
    37  		}
    38  		self.byName[info.Name] = info
    39  
    40  		self.HasVectors = self.HasVectors || info.storeTermVector
    41  		self.HasProx = self.HasProx || info.indexed && info.indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS
    42  		self.HasFreq = self.HasFreq || info.indexed && info.indexOptions != INDEX_OPT_DOCS_ONLY
    43  		self.HasOffsets = self.HasOffsets || info.indexed && info.indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
    44  		self.HasNorms = self.HasNorms || info.normType != 0
    45  		self.HasDocValues = self.HasDocValues || info.docValueType != 0
    46  		self.HasPayloads = self.HasPayloads || info.storePayloads
    47  	}
    48  
    49  	sort.Sort(Int32Slice(numbers))
    50  	self.Values = make([]*FieldInfo, len(infos))
    51  	for i, v := range numbers {
    52  		self.Values[int32(i)] = self.byNumber[v]
    53  	}
    54  
    55  	return self
    56  }
    57  
    58  /* Returns the number of fields */
    59  func (infos FieldInfos) Size() int {
    60  	assert(len(infos.byNumber) == len(infos.byName))
    61  	return len(infos.byNumber)
    62  }
    63  
    64  /* Return the FieldInfo object referenced by the field name */
    65  func (infos FieldInfos) FieldInfoByName(fieldName string) *FieldInfo {
    66  	return infos.byName[fieldName]
    67  }
    68  
    69  /* Return the FieldInfo object referenced by the fieldNumber. */
    70  func (infos FieldInfos) FieldInfoByNumber(fieldNumber int) *FieldInfo {
    71  	assert2(fieldNumber >= 0, "Illegal field number: %v", fieldNumber)
    72  	return infos.byNumber[int32(fieldNumber)]
    73  }
    74  
    75  func (fis FieldInfos) String() string {
    76  	return fmt.Sprintf(`
    77  hasFreq = %v
    78  hasProx = %v
    79  hasPayloads = %v
    80  hasOffsets = %v
    81  hasVectors = %v
    82  hasNorms = %v
    83  hasDocValues = %v
    84  %v`, fis.HasFreq, fis.HasProx, fis.HasPayloads, fis.HasOffsets,
    85  		fis.HasVectors, fis.HasNorms, fis.HasDocValues, fis.Values)
    86  }
    87  
    88  type FieldNumbers struct {
    89  	sync.Locker
    90  	numberToName map[int]string
    91  	nameToNumber map[string]int
    92  	// We use this to enforce that a given field never changes DV type,
    93  	// even across segments / IndexWriter sessions:
    94  	docValuesType map[string]DocValuesType
    95  	// TODO: we should similarly catch an attempt to turn norms back on
    96  	// after they were already ommitted; today we silently discard the
    97  	// norm but this is badly trappy
    98  	lowestUnassignedFieldNumber int
    99  }
   100  
   101  func NewFieldNumbers() *FieldNumbers {
   102  	return &FieldNumbers{
   103  		Locker:                      &sync.Mutex{},
   104  		nameToNumber:                make(map[string]int),
   105  		numberToName:                make(map[int]string),
   106  		docValuesType:               make(map[string]DocValuesType),
   107  		lowestUnassignedFieldNumber: -1,
   108  	}
   109  }
   110  
   111  func (fn *FieldNumbers) AddOrGet(info *FieldInfo) int {
   112  	return fn.addOrGet(info.Name, int(info.Number), info.docValueType)
   113  }
   114  
   115  /*
   116  Returns the global field number for the given field name. If the name
   117  does not exist yet it tries to add it with the given preferred field
   118  number assigned if possible otherwise the first unassigned field
   119  number is used as the field number.
   120  */
   121  func (fn *FieldNumbers) addOrGet(name string, preferredNumber int, dv DocValuesType) int {
   122  	fn.Lock()
   123  	defer fn.Unlock()
   124  
   125  	if dv != 0 {
   126  		currentDv, ok := fn.docValuesType[name]
   127  		if !ok || currentDv == 0 {
   128  			fn.docValuesType[name] = dv
   129  		} else {
   130  			assert2(currentDv == dv,
   131  				"cannot change DocValues type from %v to %v for field '%v'",
   132  				currentDv, dv, name)
   133  		}
   134  	}
   135  	number, ok := fn.nameToNumber[name]
   136  	if !ok {
   137  		_, ok = fn.numberToName[preferredNumber]
   138  		if preferredNumber != -1 && !ok {
   139  			// cool - we can use this number globally
   140  			number = preferredNumber
   141  		} else {
   142  			// find a new FieldNumber
   143  			fn.lowestUnassignedFieldNumber++
   144  			for _, ok = fn.numberToName[fn.lowestUnassignedFieldNumber]; ok; {
   145  				// might not be up to date - lets do the work once needed
   146  				fn.lowestUnassignedFieldNumber++
   147  				_, ok = fn.numberToName[fn.lowestUnassignedFieldNumber]
   148  			}
   149  			number = fn.lowestUnassignedFieldNumber
   150  		}
   151  
   152  		fn.numberToName[number] = name
   153  		fn.nameToNumber[name] = number
   154  	}
   155  	return number
   156  }
   157  
   158  type FieldInfosBuilder struct {
   159  	byName             map[string]*FieldInfo
   160  	globalFieldNumbers *FieldNumbers
   161  }
   162  
   163  func NewFieldInfosBuilder(globalFieldNumbers *FieldNumbers) *FieldInfosBuilder {
   164  	assert(globalFieldNumbers != nil)
   165  	return &FieldInfosBuilder{
   166  		byName:             make(map[string]*FieldInfo),
   167  		globalFieldNumbers: globalFieldNumbers,
   168  	}
   169  }
   170  
   171  func assert(ok bool) {
   172  	assert2(ok, "assert fail")
   173  }
   174  
   175  func assert2(ok bool, msg string, args ...interface{}) {
   176  	if !ok {
   177  		panic(fmt.Sprintf(msg, args...))
   178  	}
   179  }
   180  
   181  /*
   182  NOTE: this method does not carry over termVector booleans nor
   183  docValuesType; the indexer chain  (TermVectorsConsumerPerField,
   184  DocFieldProcessor) must set these fields when they succeed in
   185  consuming the document
   186  */
   187  func (b *FieldInfosBuilder) AddOrUpdate(name string, fieldType IndexableFieldType) *FieldInfo {
   188  	// TODO: really, indexer shouldn't even call this method (it's only
   189  	// called from DocFieldProcessor); rather, each component in the
   190  	// chain should update what it "owns". E.g., fieldType.indexOptions()
   191  	// should be updated by maybe FreqProxTermsWriterPerField:
   192  	return b.addOrUpdateInternal(name, -1, fieldType.Indexed(), false,
   193  		fieldType.OmitNorms(), false,
   194  		fieldType.IndexOptions(), fieldType.DocValueType(), DocValuesType(0))
   195  }
   196  
   197  func (b *FieldInfosBuilder) addOrUpdateInternal(name string,
   198  	preferredFieldNumber int, isIndexed bool, storeTermVector bool,
   199  	omitNorms bool, storePayloads bool, indexOptions IndexOptions,
   200  	docValues DocValuesType, normType DocValuesType) *FieldInfo {
   201  
   202  	if fi, ok := b.byName[name]; ok {
   203  		panic("not implemented yet")
   204  		return fi
   205  	} else {
   206  		// This field wasn't yet added to this in-RAM segment's
   207  		// FieldInfos, so now we get a global number for this field. If
   208  		// the field was seen before then we'll get the same name and
   209  		// number, else we'll allocate a new one:
   210  		fieldNumber := int32(b.globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues))
   211  		fi = NewFieldInfo(name, isIndexed, fieldNumber, storeTermVector,
   212  			omitNorms, storePayloads, indexOptions, docValues, normType, -1, nil)
   213  		b.byName[fi.Name] = fi
   214  		return fi
   215  	}
   216  }
   217  
   218  func (b *FieldInfosBuilder) Finish() FieldInfos {
   219  	var infos []*FieldInfo
   220  	for _, v := range b.byName {
   221  		infos = append(infos, v)
   222  	}
   223  	return NewFieldInfos(infos)
   224  }