github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/perfield/postingsFormat.go (about) 1 package perfield 2 3 import ( 4 "fmt" 5 . "github.com/balzaczyy/golucene/core/codec/spi" 6 . "github.com/balzaczyy/golucene/core/index/model" 7 "github.com/balzaczyy/golucene/core/util" 8 "io" 9 "strconv" 10 ) 11 12 // perfield/PerFieldPostingsFormat.java 13 14 /* 15 Enables per field postings support. 16 17 Note, when extending this class, the name Name() is written into the 18 index. In order for the field to be read, the name must resolve to 19 your implementation via LoadXYZ(). This method use hard-coded map to 20 resolve codec names. 21 22 Files written by each posting format have an additional suffix containing 23 the format name. For example, in a per-field configuration instead of 24 _1.prx fielnames would look like _1_Lucene40_0.prx. 25 */ 26 type PerFieldPostingsFormat struct { 27 postingsFormatForField func(string) PostingsFormat 28 } 29 30 func NewPerFieldPostingsFormat(f func(field string) PostingsFormat) *PerFieldPostingsFormat { 31 return &PerFieldPostingsFormat{f} 32 } 33 34 func (pf *PerFieldPostingsFormat) Name() string { 35 return "PerField40" 36 } 37 38 func (pf *PerFieldPostingsFormat) FieldsConsumer(state *SegmentWriteState) (FieldsConsumer, error) { 39 return newPerFieldPostingsWriter(pf, state), nil 40 } 41 42 func (pf *PerFieldPostingsFormat) FieldsProducer(state SegmentReadState) (FieldsProducer, error) { 43 return newPerFieldPostingsReader(state) 44 } 45 46 const ( 47 PER_FIELD_FORMAT_KEY = "PerFieldPostingsFormat.format" 48 PER_FIELD_SUFFIX_KEY = "PerFieldPostingsFormat.suffix" 49 ) 50 51 type FieldsConsumerAndSuffix struct { 52 consumer FieldsConsumer 53 suffix int 54 } 55 56 func (fcas *FieldsConsumerAndSuffix) Close() error { 57 return fcas.consumer.Close() 58 } 59 60 type PerFieldPostingsWriter struct { 61 owner *PerFieldPostingsFormat 62 formats map[PostingsFormat]*FieldsConsumerAndSuffix 63 suffixes map[string]int 64 segmentWriteState *SegmentWriteState 65 } 66 67 func newPerFieldPostingsWriter(owner *PerFieldPostingsFormat, 68 state *SegmentWriteState) FieldsConsumer { 69 return &PerFieldPostingsWriter{ 70 owner, 71 make(map[PostingsFormat]*FieldsConsumerAndSuffix), 72 make(map[string]int), 73 state, 74 } 75 } 76 77 func (w *PerFieldPostingsWriter) AddField(field *FieldInfo) (TermsConsumer, error) { 78 format := w.owner.postingsFormatForField(field.Name) 79 assert2(format != nil, "invalid nil PostingsFormat for field='%v'", field.Name) 80 formatName := format.Name() 81 82 previousValue := field.PutAttribute(PER_FIELD_FORMAT_KEY, formatName) 83 assert(previousValue == "") 84 85 var suffix int 86 87 consumer, ok := w.formats[format] 88 if !ok { 89 // First time we are seeing this format; create a new instance 90 91 // bump the suffix 92 if suffix, ok = w.suffixes[formatName]; !ok { 93 suffix = 0 94 } else { 95 suffix = suffix + 1 96 } 97 w.suffixes[formatName] = suffix 98 99 segmentSuffix := fullSegmentSuffix(field.Name, 100 w.segmentWriteState.SegmentSuffix, 101 _suffix(formatName, strconv.Itoa(suffix))) 102 103 consumer = new(FieldsConsumerAndSuffix) 104 var err error 105 consumer.consumer, err = format.FieldsConsumer( 106 NewSegmentWriteStateFrom(w.segmentWriteState, segmentSuffix)) 107 if err != nil { 108 return nil, err 109 } 110 consumer.suffix = suffix 111 w.formats[format] = consumer 112 } else { 113 // we've already seen this format, so just grab its suffix 114 _, ok := w.suffixes[formatName] 115 assert(ok) 116 suffix = consumer.suffix 117 } 118 119 previousValue = field.PutAttribute(PER_FIELD_SUFFIX_KEY, fmt.Sprintf("%v", suffix)) 120 assert(previousValue == "") 121 122 // TODO: we should only provide the "slice" of FIS that this PF 123 // actually sees ... then stuff like .hasProx could work correctly? 124 // NOTE: .hasProx is already broken in the same way for the 125 // non-perfield case, if there is a fieldInfo with prox that has no 126 // postings, you get a 0 byte file. 127 return consumer.consumer.AddField(field) 128 } 129 130 func assert(ok bool) { 131 if !ok { 132 panic("assert fail") 133 } 134 } 135 136 func assert2(ok bool, msg string, args ...interface{}) { 137 if !ok { 138 panic(fmt.Sprintf(msg, args...)) 139 } 140 } 141 142 func (w *PerFieldPostingsWriter) Close() error { 143 var subs []io.Closer 144 for _, v := range w.formats { 145 subs = append(subs, v) 146 } 147 return util.Close(subs...) 148 } 149 150 func _suffix(formatName, suffix string) string { 151 return formatName + "_" + suffix 152 } 153 154 func fullSegmentSuffix(fieldName, outerSegmentSuffix, segmentSuffix string) string { 155 if len(outerSegmentSuffix) == 0 { 156 return segmentSuffix 157 } 158 // TODO: support embedding; I think it should work but 159 // we need a test confirm to confirm 160 // return outerSegmentSuffix + "_" + segmentSuffix; 161 panic(fmt.Sprintf( 162 "cannot embed PerFieldPostingsFormat inside itself (field '%v' returned PerFieldPostingsFormat)", 163 fieldName)) 164 } 165 166 type PerFieldPostingsReader struct { 167 fields map[string]FieldsProducer 168 formats map[string]FieldsProducer 169 } 170 171 func newPerFieldPostingsReader(state SegmentReadState) (fp FieldsProducer, err error) { 172 ans := PerFieldPostingsReader{ 173 make(map[string]FieldsProducer), 174 make(map[string]FieldsProducer), 175 } 176 // Read _X.per and init each format: 177 success := false 178 defer func() { 179 if !success { 180 // log.Printf("Failed to initialize PerFieldPostingsReader.") 181 // if err != nil { 182 // log.Print("DEBUG ", err) 183 // } 184 fps := make([]FieldsProducer, 0) 185 for _, v := range ans.formats { 186 fps = append(fps, v) 187 } 188 items := make([]io.Closer, len(fps)) 189 for i, v := range fps { 190 items[i] = v 191 } 192 util.CloseWhileSuppressingError(items...) 193 } 194 }() 195 // Read field name -> format name 196 for _, fi := range state.FieldInfos.Values { 197 // log.Printf("Processing %v...", fi) 198 if fi.IsIndexed() { 199 fieldName := fi.Name 200 // log.Printf("Name: %v", fieldName) 201 if formatName := fi.Attribute(PER_FIELD_FORMAT_KEY); formatName != "" { 202 // log.Printf("Format: %v", formatName) 203 // null formatName means the field is in fieldInfos, but has no postings! 204 suffix := fi.Attribute(PER_FIELD_SUFFIX_KEY) 205 // log.Printf("Suffix: %v", suffix) 206 assert(suffix != "") 207 format := LoadPostingsFormat(formatName) 208 segmentSuffix := formatName + "_" + suffix 209 // log.Printf("Segment suffix: %v", segmentSuffix) 210 if _, ok := ans.formats[segmentSuffix]; !ok { 211 // log.Printf("Loading fields producer: %v", segmentSuffix) 212 newReadState := state // clone 213 newReadState.SegmentSuffix = formatName + "_" + suffix 214 fp, err = format.FieldsProducer(newReadState) 215 if err != nil { 216 return fp, err 217 } 218 ans.formats[segmentSuffix] = fp 219 } 220 ans.fields[fieldName] = ans.formats[segmentSuffix] 221 } 222 } 223 } 224 success = true 225 return &ans, nil 226 } 227 228 func (r *PerFieldPostingsReader) Terms(field string) Terms { 229 if p, ok := r.fields[field]; ok { 230 return p.Terms(field) 231 } 232 return nil 233 } 234 235 func (r *PerFieldPostingsReader) Close() error { 236 fps := make([]FieldsProducer, 0) 237 for _, v := range r.formats { 238 fps = append(fps, v) 239 } 240 items := make([]io.Closer, len(fps)) 241 for i, v := range fps { 242 items[i] = v 243 } 244 return util.Close(items...) 245 }