github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/model/fieldInfos.go (about) 1 package model 2 3 import ( 4 "fmt" 5 "sort" 6 "sync" 7 ) 8 9 // Collection of FieldInfo(s) (accessible by number of by name) 10 type FieldInfos struct { 11 HasFreq bool 12 HasProx bool 13 HasPayloads bool 14 HasOffsets bool 15 HasVectors bool 16 HasNorms bool 17 HasDocValues bool 18 19 byNumber map[int32]*FieldInfo 20 byName map[string]*FieldInfo 21 Values []*FieldInfo // sorted by ID 22 } 23 24 func NewFieldInfos(infos []*FieldInfo) FieldInfos { 25 self := FieldInfos{byNumber: make(map[int32]*FieldInfo), byName: make(map[string]*FieldInfo)} 26 27 numbers := make([]int32, 0) 28 for _, info := range infos { 29 assert2(info.Number >= 0, "illegal field number: %v for field %v", info.Number, info.Name) 30 if prev, ok := self.byNumber[info.Number]; ok { 31 panic(fmt.Sprintf("duplicate field numbers: %v and %v have: %v", prev.Name, info.Name, info.Number)) 32 } 33 self.byNumber[info.Number] = info 34 numbers = append(numbers, info.Number) 35 if prev, ok := self.byName[info.Name]; ok { 36 panic(fmt.Sprintf("duplicate field names: %v and %v have: %v", prev.Number, info.Number, info.Name)) 37 } 38 self.byName[info.Name] = info 39 40 self.HasVectors = self.HasVectors || info.storeTermVector 41 self.HasProx = self.HasProx || info.indexed && info.indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS 42 self.HasFreq = self.HasFreq || info.indexed && info.indexOptions != INDEX_OPT_DOCS_ONLY 43 self.HasOffsets = self.HasOffsets || info.indexed && info.indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS 44 self.HasNorms = self.HasNorms || info.normType != 0 45 self.HasDocValues = self.HasDocValues || info.docValueType != 0 46 self.HasPayloads = self.HasPayloads || info.storePayloads 47 } 48 49 sort.Sort(Int32Slice(numbers)) 50 self.Values = make([]*FieldInfo, len(infos)) 51 for i, v := range numbers { 52 self.Values[int32(i)] = self.byNumber[v] 53 } 54 55 return self 56 } 57 58 /* Returns the number of fields */ 59 func (infos FieldInfos) Size() int { 60 assert(len(infos.byNumber) == len(infos.byName)) 61 return len(infos.byNumber) 62 } 63 64 /* Return the FieldInfo object referenced by the field name */ 65 func (infos FieldInfos) FieldInfoByName(fieldName string) *FieldInfo { 66 return infos.byName[fieldName] 67 } 68 69 /* Return the FieldInfo object referenced by the fieldNumber. */ 70 func (infos FieldInfos) FieldInfoByNumber(fieldNumber int) *FieldInfo { 71 assert2(fieldNumber >= 0, "Illegal field number: %v", fieldNumber) 72 return infos.byNumber[int32(fieldNumber)] 73 } 74 75 func (fis FieldInfos) String() string { 76 return fmt.Sprintf(` 77 hasFreq = %v 78 hasProx = %v 79 hasPayloads = %v 80 hasOffsets = %v 81 hasVectors = %v 82 hasNorms = %v 83 hasDocValues = %v 84 %v`, fis.HasFreq, fis.HasProx, fis.HasPayloads, fis.HasOffsets, 85 fis.HasVectors, fis.HasNorms, fis.HasDocValues, fis.Values) 86 } 87 88 type FieldNumbers struct { 89 sync.Locker 90 numberToName map[int]string 91 nameToNumber map[string]int 92 // We use this to enforce that a given field never changes DV type, 93 // even across segments / IndexWriter sessions: 94 docValuesType map[string]DocValuesType 95 // TODO: we should similarly catch an attempt to turn norms back on 96 // after they were already ommitted; today we silently discard the 97 // norm but this is badly trappy 98 lowestUnassignedFieldNumber int 99 } 100 101 func NewFieldNumbers() *FieldNumbers { 102 return &FieldNumbers{ 103 Locker: &sync.Mutex{}, 104 nameToNumber: make(map[string]int), 105 numberToName: make(map[int]string), 106 docValuesType: make(map[string]DocValuesType), 107 lowestUnassignedFieldNumber: -1, 108 } 109 } 110 111 func (fn *FieldNumbers) AddOrGet(info *FieldInfo) int { 112 return fn.addOrGet(info.Name, int(info.Number), info.docValueType) 113 } 114 115 /* 116 Returns the global field number for the given field name. If the name 117 does not exist yet it tries to add it with the given preferred field 118 number assigned if possible otherwise the first unassigned field 119 number is used as the field number. 120 */ 121 func (fn *FieldNumbers) addOrGet(name string, preferredNumber int, dv DocValuesType) int { 122 fn.Lock() 123 defer fn.Unlock() 124 125 if dv != 0 { 126 currentDv, ok := fn.docValuesType[name] 127 if !ok || currentDv == 0 { 128 fn.docValuesType[name] = dv 129 } else { 130 assert2(currentDv == dv, 131 "cannot change DocValues type from %v to %v for field '%v'", 132 currentDv, dv, name) 133 } 134 } 135 number, ok := fn.nameToNumber[name] 136 if !ok { 137 _, ok = fn.numberToName[preferredNumber] 138 if preferredNumber != -1 && !ok { 139 // cool - we can use this number globally 140 number = preferredNumber 141 } else { 142 // find a new FieldNumber 143 fn.lowestUnassignedFieldNumber++ 144 for _, ok = fn.numberToName[fn.lowestUnassignedFieldNumber]; ok; { 145 // might not be up to date - lets do the work once needed 146 fn.lowestUnassignedFieldNumber++ 147 _, ok = fn.numberToName[fn.lowestUnassignedFieldNumber] 148 } 149 number = fn.lowestUnassignedFieldNumber 150 } 151 152 fn.numberToName[number] = name 153 fn.nameToNumber[name] = number 154 } 155 return number 156 } 157 158 type FieldInfosBuilder struct { 159 byName map[string]*FieldInfo 160 globalFieldNumbers *FieldNumbers 161 } 162 163 func NewFieldInfosBuilder(globalFieldNumbers *FieldNumbers) *FieldInfosBuilder { 164 assert(globalFieldNumbers != nil) 165 return &FieldInfosBuilder{ 166 byName: make(map[string]*FieldInfo), 167 globalFieldNumbers: globalFieldNumbers, 168 } 169 } 170 171 func assert(ok bool) { 172 assert2(ok, "assert fail") 173 } 174 175 func assert2(ok bool, msg string, args ...interface{}) { 176 if !ok { 177 panic(fmt.Sprintf(msg, args...)) 178 } 179 } 180 181 /* 182 NOTE: this method does not carry over termVector booleans nor 183 docValuesType; the indexer chain (TermVectorsConsumerPerField, 184 DocFieldProcessor) must set these fields when they succeed in 185 consuming the document 186 */ 187 func (b *FieldInfosBuilder) AddOrUpdate(name string, fieldType IndexableFieldType) *FieldInfo { 188 // TODO: really, indexer shouldn't even call this method (it's only 189 // called from DocFieldProcessor); rather, each component in the 190 // chain should update what it "owns". E.g., fieldType.indexOptions() 191 // should be updated by maybe FreqProxTermsWriterPerField: 192 return b.addOrUpdateInternal(name, -1, fieldType.Indexed(), false, 193 fieldType.OmitNorms(), false, 194 fieldType.IndexOptions(), fieldType.DocValueType(), DocValuesType(0)) 195 } 196 197 func (b *FieldInfosBuilder) addOrUpdateInternal(name string, 198 preferredFieldNumber int, isIndexed bool, storeTermVector bool, 199 omitNorms bool, storePayloads bool, indexOptions IndexOptions, 200 docValues DocValuesType, normType DocValuesType) *FieldInfo { 201 202 if fi, ok := b.byName[name]; ok { 203 panic("not implemented yet") 204 return fi 205 } else { 206 // This field wasn't yet added to this in-RAM segment's 207 // FieldInfos, so now we get a global number for this field. If 208 // the field was seen before then we'll get the same name and 209 // number, else we'll allocate a new one: 210 fieldNumber := int32(b.globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues)) 211 fi = NewFieldInfo(name, isIndexed, fieldNumber, storeTermVector, 212 omitNorms, storePayloads, indexOptions, docValues, normType, -1, nil) 213 b.byName[fi.Name] = fi 214 return fi 215 } 216 } 217 218 func (b *FieldInfosBuilder) Finish() FieldInfos { 219 var infos []*FieldInfo 220 for _, v := range b.byName { 221 infos = append(infos, v) 222 } 223 return NewFieldInfos(infos) 224 }