github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/document/field.go (about) 1 package document 2 3 import ( 4 "bytes" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/analysis" 7 . "github.com/balzaczyy/golucene/core/analysis/tokenattributes" 8 "github.com/balzaczyy/golucene/core/index/model" 9 "io" 10 "log" 11 "strconv" 12 ) 13 14 // document/Field.java 15 16 type Field struct { 17 _type *FieldType // Field's type 18 _name string // Field's name 19 _data interface{} // Field's value 20 _boost float32 // Field's boost 21 22 /* 23 Pre-analyzed tokenStream for indexed fields; this is 24 separte from fieldsData because you are allowed to 25 have both; eg maybe field has a String value but you 26 customize how it's tokenized 27 */ 28 _tokenStream analysis.TokenStream 29 } 30 31 /* Create field with Reader value. */ 32 func NewFieldFromReader(name string, reader io.RuneReader, ft *FieldType) *Field { 33 assert2(name != "", "name cannot be empty") 34 assert2(ft != nil, "type can not be nil") 35 assert2(reader != nil, "reader cannot be nil") 36 assert2(!ft.Stored(), "fields with a Reader value cannot be stored") 37 assert2(!ft.Indexed() || ft.Tokenized(), "non-tokenized fields must use String values") 38 return &Field{ft, name, reader, 1.0, nil} 39 } 40 41 // Create field with String value 42 func NewFieldFromString(name, value string, ft *FieldType) *Field { 43 assert2(name != "", "name cannot be empty") 44 assert2(value != "", "value cannot be empty") 45 assert2(ft.stored || ft.indexed, 46 "it doesn't make sense to have a field that is neither indexed nor stored") 47 return &Field{_type: ft, _name: name, _data: value, _boost: 1} 48 } 49 50 func (f *Field) StringValue() string { 51 switch f._data.(type) { 52 case string: 53 return f._data.(string) 54 case int: 55 return strconv.Itoa(f._data.(int)) 56 default: 57 log.Println("Unknown type", f._data) 58 panic("not implemented yet") 59 } 60 } 61 62 func assert2(ok bool, msg string) { 63 if !ok { 64 panic(msg) 65 } 66 } 67 68 func (f *Field) ReaderValue() io.RuneReader { 69 if v, ok := f._data.(io.RuneReader); ok { 70 return v 71 } 72 return nil 73 } 74 75 func (f *Field) Name() string { 76 return f._name 77 } 78 79 func (f *Field) Boost() float32 { 80 return f._boost 81 } 82 83 func (f *Field) NumericValue() interface{} { 84 switch f._data.(type) { 85 case int32, int64, float32, float64: 86 return f._data 87 default: 88 return nil 89 } 90 } 91 92 func (f *Field) BinaryValue() []byte { 93 if v, ok := f._data.([]byte); ok { 94 return v 95 } 96 return nil 97 } 98 99 func (f *Field) String() string { 100 var buf bytes.Buffer 101 fmt.Fprintf(&buf, "%v<%v:", f._type, f._name) 102 if f._data != nil { 103 fmt.Fprint(&buf, f._data) 104 } 105 fmt.Fprint(&buf, ">") 106 return buf.String() 107 } 108 109 func (f *Field) FieldType() model.IndexableFieldType { 110 return f._type 111 } 112 113 func (f *Field) TokenStream(analyzer analysis.Analyzer, reuse analysis.TokenStream) (ts analysis.TokenStream, err error) { 114 if !f.FieldType().Indexed() { 115 return nil, nil 116 } 117 118 if nt := f.FieldType().(*FieldType).NumericType(); nt != NumericType(0) { 119 panic("not implemented yet") 120 } 121 122 if !f.FieldType().Tokenized() { 123 assert2(f.StringValue() != "", "Non-Tokenized Fields must have a string value") 124 if _, ok := reuse.(*StringTokenStream); !ok { 125 reuse = newStringTokenStream() 126 } 127 reuse.(*StringTokenStream).setValue(f.StringValue()) 128 return reuse, nil 129 } 130 131 if f._tokenStream != nil { 132 return f._tokenStream, nil 133 } else if f.ReaderValue() != nil { 134 return analyzer.TokenStreamForReader(f._name, f.ReaderValue()) 135 } else if f.StringValue() != "" { 136 return analyzer.TokenStreamForString(f._name, f.StringValue()) 137 } 138 139 panic(fmt.Sprintf("Field must have either TokenStream, String, Reader, or Number value; got %v", f)) 140 } 141 142 type StringTokenStream struct { 143 *analysis.TokenStreamImpl 144 termAttribute CharTermAttribute 145 offsetAttribute OffsetAttribute 146 used bool 147 value string 148 } 149 150 /* 151 Creates a new TokenStream that returns a string as single token. 152 153 Warning: Does not initialize the value, you must call setValue() afterwards! 154 */ 155 func newStringTokenStream() *StringTokenStream { 156 ans := &StringTokenStream{TokenStreamImpl: analysis.NewTokenStream()} 157 ans.termAttribute = ans.Attributes().Add("CharTermAttribute").(CharTermAttribute) 158 ans.offsetAttribute = ans.Attributes().Add("OffsetAttribute").(OffsetAttribute) 159 return ans 160 } 161 162 func (ts *StringTokenStream) setValue(value string) { 163 ts.value = value 164 } 165 166 func (ts *StringTokenStream) IncrementToken() (bool, error) { 167 if ts.used { 168 return false, nil 169 } 170 ts.Attributes().Clear() 171 ts.termAttribute.AppendString(ts.value) 172 ts.offsetAttribute.SetOffset(0, len(ts.value)) 173 ts.used = true 174 return true, nil 175 } 176 177 /* Specifies whether and how a field should be stored. */ 178 type Store int 179 180 /* 181 Store the original field value in the index. This is useful for short 182 texts like a document's title which should be displayed with the 183 results. The value is stored in its original form, i.e. no analyzer 184 is used before it is stored. 185 */ 186 const STORE_YES = Store(1) 187 188 /* Do not store the field's value in the index. */ 189 const STORE_NO = Store(2) 190 191 // document/StringField.java 192 193 /* Indexed, not tokenized, omits norms, indexes DOCS_ONLY, not stored. */ 194 var STRING_FIELD_TYPE_NOT_STORED = func() *FieldType { 195 ft := newFieldType() 196 ft.indexed = true 197 ft._omitNorms = true 198 ft._indexOptions = model.INDEX_OPT_DOCS_ONLY 199 ft._tokenized = false 200 ft.frozen = true 201 return ft 202 }() 203 204 /* Indexed, not tokenized, omits norms, indexes DOCS_ONLY, stored */ 205 var STRING_FIELD_TYPE_STORED = func() *FieldType { 206 ft := newFieldType() 207 ft.indexed = true 208 ft._omitNorms = true 209 ft._indexOptions = model.INDEX_OPT_DOCS_ONLY 210 ft.stored = true 211 ft._tokenized = false 212 ft.frozen = true 213 return ft 214 }() 215 216 /* 217 Creates a new field that is indexed but not tokenized: the entire 218 String value is indexed as a single token. For example, this might be 219 used for a 'country' field or an 'id' field, or any field that you 220 intend to use for sorting or access through the field cache. 221 */ 222 func newStringField(name, value string, stored Store) *Field { 223 return NewFieldFromString(name, value, map[Store]*FieldType{ 224 STORE_YES: STRING_FIELD_TYPE_STORED, 225 STORE_NO: STRING_FIELD_TYPE_NOT_STORED, 226 }[stored]) 227 } 228 229 // document/TextField.java 230 231 /* indexed, tokenized, not stored. */ 232 var TEXT_FIELD_TYPE_NOT_STORED = func() *FieldType { 233 ft := newFieldType() 234 ft.indexed = true 235 ft._tokenized = true 236 ft.frozen = true 237 return ft 238 }() 239 240 /* indexed, tokenized, stored. */ 241 var TEXT_FIELD_TYPE_STORED = func() *FieldType { 242 ft := newFieldType() 243 ft.indexed = true 244 ft._tokenized = true 245 ft.stored = true 246 ft.frozen = true 247 return ft 248 }() 249 250 /* 251 A field that is indexed and tokenized, without term vectors. For 252 example, this would be used on a 'body' field, that contains the bulk 253 of a document's text. 254 */ 255 type TextField struct { 256 *Field 257 } 258 259 /* Creates a new un-stored TextField with Reader value */ 260 func NewTextFieldFromReader(name string, reader io.RuneReader) *TextField { 261 return &TextField{ 262 NewFieldFromReader(name, reader, TEXT_FIELD_TYPE_NOT_STORED), 263 } 264 } 265 266 func NewTextFieldFromString(name, value string, store Store) *TextField { 267 return &TextField{NewFieldFromString(name, value, map[Store]*FieldType{ 268 STORE_YES: TEXT_FIELD_TYPE_STORED, 269 STORE_NO: TEXT_FIELD_TYPE_NOT_STORED, 270 }[store])} 271 } 272 273 // document/StoredField.java 274 275 // Type for a stored-only field. 276 var STORED_FIELD_TYPE = func() *FieldType { 277 ans := newFieldType() 278 ans.stored = true 279 return ans 280 }() 281 282 /* 283 A field whose value is stored so that IndexSearcher.doc() 284 and IndexReader.document() will return the field and its 285 value. 286 */ 287 type StoredField struct { 288 *Field 289 } 290 291 /* 292 Create a stored-only field with the given binary value. 293 294 NOTE: the provided byte[] is not copied so be sure 295 not to change it until you're done with this field. 296 */ 297 // func newStoredField(name string, value []byte) *StoredField { 298 // return &StoredField{newStringField(name, value, STORED_FIELD_TYPE)} 299 // }