github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene42/fieldInfos.go (about) 1 package lucene42 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/codec" 7 . "github.com/balzaczyy/golucene/core/codec/spi" 8 . "github.com/balzaczyy/golucene/core/index/model" 9 "github.com/balzaczyy/golucene/core/store" 10 "github.com/balzaczyy/golucene/core/util" 11 "log" 12 ) 13 14 // lucene42/Lucene42FieldInfosFormat.java 15 16 /* 17 Lucene 4.2 Field Infos format. 18 19 Field names are stored in the field info file, with suffix .fnm. 20 21 FieldInfos (.fnm) --> Header, HeaderCount, <FieldName, FieldNumber, 22 FieldBits, DocValuesBits, Attribute>^FieldsCount 23 24 Data types: 25 - Header --> CodecHeader 26 - FieldsCount --> VInt 27 - FieldName --> string 28 - FieldBits, DocValuesBit --> byte 29 - FieldNumber --> VInt 30 - Attributes --> map[string]string 31 32 Field Description: 33 - FieldsCount: the number of fields in this file. 34 - FieldName: name of the field as a UTF-8 string. 35 - FieldNumber: the field's number. NOte that unlike previous versions 36 of Lucene, the fields are not numbered implicitly by their order in 37 the file, instead explicitly. 38 - FieldBits: a byte containing field options. 39 - The low-order bit is one for indexed fields, and zero for non-indexed 40 fields. 41 - The second lowest-order bit is one for fields that have term vectors 42 stored, and zero for fields without term vectors. 43 - If the third lowest order-bit is set (0x4), offsets are stored into 44 the postings list in addition to positions. 45 - Fourth bit is unsed. 46 - If the fifth lowest-order bit is set (0x10), norms are omitted for 47 the indexed field. 48 - If the sixth lowest-order bit is set (0x20), payloads are stored 49 for the indexed field. 50 - If the seventh lowest-order bit is set (0x40), term frequencies a 51 and ositions omitted for the indexed field. 52 - If the eighth lowest-order bit is set (0x80), positions are omitted 53 for the indexed field. 54 - DocValuesBits: a byte containing per-document value types. The type 55 recorded as two four-bit intergers, with the high-order bits 56 representing norms options, and low-order bits representing DocVlaues 57 options. Each four-bit interger can be decoded as such: 58 - 0: no DocValues for this field. 59 - 1: NumericDocValues. 60 - 2: BinaryDocvalues. 61 - 3: SortedDocValues. 62 - Attributes: a key-value map of codec-private attributes. 63 */ 64 type Lucene42FieldInfosFormat struct { 65 reader FieldInfosReader 66 // writer FieldInfosWriter 67 } 68 69 func NewLucene42FieldInfosFormat() *Lucene42FieldInfosFormat { 70 return &Lucene42FieldInfosFormat{ 71 reader: Lucene42FieldInfosReader, 72 // writer: Lucene42FieldInfosWriter, 73 } 74 } 75 76 func (f *Lucene42FieldInfosFormat) FieldInfosReader() FieldInfosReader { 77 return f.reader 78 } 79 80 func (f *Lucene42FieldInfosFormat) FieldInfosWriter() FieldInfosWriter { 81 panic("this codec can only be used for reading") 82 // return f.writer 83 } 84 85 const ( 86 // Extension of field infos 87 LUCENE42_FI_EXTENSION = "fnm" 88 89 // Codec header 90 LUCENE42_FI_CODEC_NAME = "Lucene42FieldInfos" 91 LUCENE42_FI_FORMAT_START = 0 92 LUCENE42_FI_FORMAT_CURRENT = LUCENE42_FI_FORMAT_START 93 94 // Field flags 95 LUCENE42_FI_IS_INDEXED = 0x1 96 LUCENE42_FI_STORE_TERMVECTOR = 0x2 97 LUCENE42_FI_STORE_OFFSETS_IN_POSTINGS = 0x4 98 LUCENE42_FI_OMIT_NORMS = 0x10 99 LUCENE42_FI_STORE_PAYLOADS = 0x20 100 LUCENE42_FI_OMIT_TERM_FREQ_AND_POSITIONS = 0x40 101 LUCENE42_FI_OMIT_POSITIONS = 0x80 102 ) 103 104 var Lucene42FieldInfosReader = func(dir store.Directory, 105 segment, suffix string, context store.IOContext) (fi FieldInfos, err error) { 106 107 log.Printf("Reading FieldInfos from %v...", dir) 108 fi = FieldInfos{} 109 fileName := util.SegmentFileName(segment, "", LUCENE42_FI_EXTENSION) 110 log.Printf("Segment: %v", fileName) 111 input, err := dir.OpenInput(fileName, context) 112 if err != nil { 113 return fi, err 114 } 115 log.Printf("Reading %v", input) 116 117 success := false 118 defer func() { 119 if success { 120 input.Close() 121 } else { 122 util.CloseWhileHandlingError(err, input) 123 } 124 }() 125 126 _, err = codec.CheckHeader(input, 127 LUCENE42_FI_CODEC_NAME, 128 LUCENE42_FI_FORMAT_START, 129 LUCENE42_FI_FORMAT_CURRENT) 130 if err != nil { 131 return fi, err 132 } 133 134 size, err := input.ReadVInt() //read in the size 135 if err != nil { 136 return fi, err 137 } 138 log.Printf("Found %v FieldInfos.", size) 139 140 infos := make([]*FieldInfo, size) 141 for i, _ := range infos { 142 name, err := input.ReadString() 143 if err != nil { 144 return fi, err 145 } 146 fieldNumber, err := input.ReadVInt() 147 if err != nil { 148 return fi, err 149 } 150 bits, err := input.ReadByte() 151 if err != nil { 152 return fi, err 153 } 154 isIndexed := (bits & LUCENE42_FI_IS_INDEXED) != 0 155 storeTermVector := (bits & LUCENE42_FI_STORE_TERMVECTOR) != 0 156 omitNorms := (bits & LUCENE42_FI_OMIT_NORMS) != 0 157 storePayloads := (bits & LUCENE42_FI_STORE_PAYLOADS) != 0 158 var indexOptions IndexOptions 159 switch { 160 case !isIndexed: 161 indexOptions = IndexOptions(0) 162 case (bits & LUCENE42_FI_OMIT_TERM_FREQ_AND_POSITIONS) != 0: 163 indexOptions = INDEX_OPT_DOCS_ONLY 164 case (bits & LUCENE42_FI_OMIT_POSITIONS) != 0: 165 indexOptions = INDEX_OPT_DOCS_AND_FREQS 166 case (bits & LUCENE42_FI_STORE_OFFSETS_IN_POSTINGS) != 0: 167 indexOptions = INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS 168 default: 169 indexOptions = INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS 170 } 171 172 // DV Types are packed in one byte 173 val, err := input.ReadByte() 174 if err != nil { 175 return fi, err 176 } 177 docValuesType, err := getDocValuesType(input, (byte)(val&0x0F)) 178 if err != nil { 179 return fi, err 180 } 181 normsType, err := getDocValuesType(input, (byte)((uint8(val)>>4)&0x0F)) 182 if err != nil { 183 return fi, err 184 } 185 attributes, err := input.ReadStringStringMap() 186 if err != nil { 187 return fi, err 188 } 189 infos[i] = NewFieldInfo(name, isIndexed, fieldNumber, storeTermVector, 190 omitNorms, storePayloads, indexOptions, docValuesType, normsType, -1, attributes) 191 } 192 193 if err = codec.CheckEOF(input); err != nil { 194 return fi, err 195 } 196 fi = NewFieldInfos(infos) 197 success = true 198 return fi, nil 199 } 200 201 func getDocValuesType(input store.IndexInput, b byte) (t DocValuesType, err error) { 202 switch b { 203 case 0: 204 return DocValuesType(0), nil 205 case 1: 206 return DOC_VALUES_TYPE_NUMERIC, nil 207 case 2: 208 return DOC_VALUES_TYPE_BINARY, nil 209 case 3: 210 return DOC_VALUES_TYPE_SORTED, nil 211 case 4: 212 return DOC_VALUES_TYPE_SORTED_SET, nil 213 default: 214 return DocValuesType(0), errors.New( 215 fmt.Sprintf("invalid docvalues byte: %v (resource=%v)", b, input)) 216 } 217 } 218 219 // lucene42/Lucene42FieldInfosWriter.java 220 // var Lucene42FieldInfosWriter = func(dir store.Directory, 221 // segName string, infos FieldInfos, ctx store.IOContext) (err error) { 222 223 // fileName := util.SegmentFileName(segName, "", LUCENE42_FI_EXTENSION) 224 // var output store.IndexOutput 225 // output, err = dir.CreateOutput(fileName, ctx) 226 // if err != nil { 227 // return err 228 // } 229 230 // var success = false 231 // defer func() { 232 // if success { 233 // err = mergeError(err, output.Close()) 234 // } else { 235 // util.CloseWhileSuppressingError(output) 236 // } 237 // }() 238 239 // err = codec.WriteHeader(output, LUCENE42_FI_CODEC_NAME, LUCENE42_FI_FORMAT_CURRENT) 240 // if err != nil { 241 // return err 242 // } 243 // err = output.WriteVInt(int32(len(infos.Values))) 244 // if err != nil { 245 // return err 246 // } 247 // for _, fi := range infos.Values { 248 // indexOptions := fi.IndexOptions() 249 // bits := byte(0x0) 250 // if fi.HasVectors() { 251 // bits |= LUCENE42_FI_STORE_TERMVECTOR 252 // } 253 // if fi.OmitsNorms() { 254 // bits |= LUCENE42_FI_OMIT_NORMS 255 // } 256 // if fi.HasPayloads() { 257 // bits |= LUCENE42_FI_STORE_PAYLOADS 258 // } 259 // if fi.IsIndexed() { 260 // bits |= LUCENE42_FI_IS_INDEXED 261 // assert(int(indexOptions) >= int(INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS) || !fi.HasPayloads()) 262 // switch indexOptions { 263 // case INDEX_OPT_DOCS_ONLY: 264 // bits |= LUCENE42_FI_OMIT_TERM_FREQ_AND_POSITIONS 265 // case INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS: 266 // bits |= LUCENE42_FI_STORE_OFFSETS_IN_POSTINGS 267 // case INDEX_OPT_DOCS_AND_FREQS: 268 // bits |= LUCENE42_FI_OMIT_POSITIONS 269 // } 270 // } 271 // err = output.WriteString(fi.Name) 272 // if err != nil { 273 // return err 274 // } 275 // err = output.WriteVInt(fi.Number) 276 // if err != nil { 277 // return err 278 // } 279 // err = output.WriteByte(bits) 280 // if err != nil { 281 // return err 282 // } 283 284 // // pack the DV types in one byte 285 // dv := docValuesByte(fi.DocValuesType()) 286 // nrm := docValuesByte(fi.NormType()) 287 // assert((int(dv)&(^0xF)) == 0 && (int(nrm)&(^0x0F)) == 0) 288 // val := byte(0xFF & ((nrm << 4) | dv)) 289 // err = output.WriteByte(val) 290 // if err != nil { 291 // return err 292 // } 293 // err = output.WriteStringStringMap(fi.Attributes()) 294 // if err != nil { 295 // return err 296 // } 297 // } 298 // success = true 299 // return nil 300 // } 301 302 // func docValuesByte(typ DocValuesType) byte { 303 // n := byte(typ) 304 // assert(n >= 0 && n <= 4) 305 // return n 306 // }