github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/perfield/postingsFormat.go (about)

     1  package perfield
     2  
     3  import (
     4  	"fmt"
     5  	. "github.com/balzaczyy/golucene/core/codec/spi"
     6  	. "github.com/balzaczyy/golucene/core/index/model"
     7  	"github.com/balzaczyy/golucene/core/util"
     8  	"io"
     9  	"strconv"
    10  )
    11  
    12  // perfield/PerFieldPostingsFormat.java
    13  
    14  /*
    15  Enables per field postings support.
    16  
    17  Note, when extending this class, the name Name() is written into the
    18  index. In order for the field to be read, the name must resolve to
    19  your implementation via LoadXYZ(). This method use hard-coded map to
    20  resolve codec names.
    21  
    22  Files written by each posting format have an additional suffix containing
    23  the format name. For example, in a per-field configuration instead of
    24  _1.prx fielnames would look like _1_Lucene40_0.prx.
    25  */
    26  type PerFieldPostingsFormat struct {
    27  	postingsFormatForField func(string) PostingsFormat
    28  }
    29  
    30  func NewPerFieldPostingsFormat(f func(field string) PostingsFormat) *PerFieldPostingsFormat {
    31  	return &PerFieldPostingsFormat{f}
    32  }
    33  
    34  func (pf *PerFieldPostingsFormat) Name() string {
    35  	return "PerField40"
    36  }
    37  
    38  func (pf *PerFieldPostingsFormat) FieldsConsumer(state *SegmentWriteState) (FieldsConsumer, error) {
    39  	return newPerFieldPostingsWriter(pf, state), nil
    40  }
    41  
    42  func (pf *PerFieldPostingsFormat) FieldsProducer(state SegmentReadState) (FieldsProducer, error) {
    43  	return newPerFieldPostingsReader(state)
    44  }
    45  
    46  const (
    47  	PER_FIELD_FORMAT_KEY = "PerFieldPostingsFormat.format"
    48  	PER_FIELD_SUFFIX_KEY = "PerFieldPostingsFormat.suffix"
    49  )
    50  
    51  type FieldsConsumerAndSuffix struct {
    52  	consumer FieldsConsumer
    53  	suffix   int
    54  }
    55  
    56  func (fcas *FieldsConsumerAndSuffix) Close() error {
    57  	return fcas.consumer.Close()
    58  }
    59  
    60  type PerFieldPostingsWriter struct {
    61  	owner             *PerFieldPostingsFormat
    62  	formats           map[PostingsFormat]*FieldsConsumerAndSuffix
    63  	suffixes          map[string]int
    64  	segmentWriteState *SegmentWriteState
    65  }
    66  
    67  func newPerFieldPostingsWriter(owner *PerFieldPostingsFormat,
    68  	state *SegmentWriteState) FieldsConsumer {
    69  	return &PerFieldPostingsWriter{
    70  		owner,
    71  		make(map[PostingsFormat]*FieldsConsumerAndSuffix),
    72  		make(map[string]int),
    73  		state,
    74  	}
    75  }
    76  
    77  func (w *PerFieldPostingsWriter) AddField(field *FieldInfo) (TermsConsumer, error) {
    78  	format := w.owner.postingsFormatForField(field.Name)
    79  	assert2(format != nil, "invalid nil PostingsFormat for field='%v'", field.Name)
    80  	formatName := format.Name()
    81  
    82  	previousValue := field.PutAttribute(PER_FIELD_FORMAT_KEY, formatName)
    83  	assert(previousValue == "")
    84  
    85  	var suffix int
    86  
    87  	consumer, ok := w.formats[format]
    88  	if !ok {
    89  		// First time we are seeing this format; create a new instance
    90  
    91  		// bump the suffix
    92  		if suffix, ok = w.suffixes[formatName]; !ok {
    93  			suffix = 0
    94  		} else {
    95  			suffix = suffix + 1
    96  		}
    97  		w.suffixes[formatName] = suffix
    98  
    99  		segmentSuffix := fullSegmentSuffix(field.Name,
   100  			w.segmentWriteState.SegmentSuffix,
   101  			_suffix(formatName, strconv.Itoa(suffix)))
   102  
   103  		consumer = new(FieldsConsumerAndSuffix)
   104  		var err error
   105  		consumer.consumer, err = format.FieldsConsumer(
   106  			NewSegmentWriteStateFrom(w.segmentWriteState, segmentSuffix))
   107  		if err != nil {
   108  			return nil, err
   109  		}
   110  		consumer.suffix = suffix
   111  		w.formats[format] = consumer
   112  	} else {
   113  		// we've already seen this format, so just grab its suffix
   114  		_, ok := w.suffixes[formatName]
   115  		assert(ok)
   116  		suffix = consumer.suffix
   117  	}
   118  
   119  	previousValue = field.PutAttribute(PER_FIELD_SUFFIX_KEY, fmt.Sprintf("%v", suffix))
   120  	assert(previousValue == "")
   121  
   122  	// TODO: we should only provide the "slice" of FIS that this PF
   123  	// actually sees ... then stuff like .hasProx could work correctly?
   124  	// NOTE: .hasProx is already broken in the same way for the
   125  	// non-perfield case, if there is a fieldInfo with prox that has no
   126  	// postings, you get a 0 byte file.
   127  	return consumer.consumer.AddField(field)
   128  }
   129  
   130  func assert(ok bool) {
   131  	if !ok {
   132  		panic("assert fail")
   133  	}
   134  }
   135  
   136  func assert2(ok bool, msg string, args ...interface{}) {
   137  	if !ok {
   138  		panic(fmt.Sprintf(msg, args...))
   139  	}
   140  }
   141  
   142  func (w *PerFieldPostingsWriter) Close() error {
   143  	var subs []io.Closer
   144  	for _, v := range w.formats {
   145  		subs = append(subs, v)
   146  	}
   147  	return util.Close(subs...)
   148  }
   149  
   150  func _suffix(formatName, suffix string) string {
   151  	return formatName + "_" + suffix
   152  }
   153  
   154  func fullSegmentSuffix(fieldName, outerSegmentSuffix, segmentSuffix string) string {
   155  	if len(outerSegmentSuffix) == 0 {
   156  		return segmentSuffix
   157  	}
   158  	// TODO: support embedding; I think it should work but
   159  	// we need a test confirm to confirm
   160  	// return outerSegmentSuffix + "_" + segmentSuffix;
   161  	panic(fmt.Sprintf(
   162  		"cannot embed PerFieldPostingsFormat inside itself (field '%v' returned PerFieldPostingsFormat)",
   163  		fieldName))
   164  }
   165  
   166  type PerFieldPostingsReader struct {
   167  	fields  map[string]FieldsProducer
   168  	formats map[string]FieldsProducer
   169  }
   170  
   171  func newPerFieldPostingsReader(state SegmentReadState) (fp FieldsProducer, err error) {
   172  	ans := PerFieldPostingsReader{
   173  		make(map[string]FieldsProducer),
   174  		make(map[string]FieldsProducer),
   175  	}
   176  	// Read _X.per and init each format:
   177  	success := false
   178  	defer func() {
   179  		if !success {
   180  			// log.Printf("Failed to initialize PerFieldPostingsReader.")
   181  			// if err != nil {
   182  			// 	log.Print("DEBUG ", err)
   183  			// }
   184  			fps := make([]FieldsProducer, 0)
   185  			for _, v := range ans.formats {
   186  				fps = append(fps, v)
   187  			}
   188  			items := make([]io.Closer, len(fps))
   189  			for i, v := range fps {
   190  				items[i] = v
   191  			}
   192  			util.CloseWhileSuppressingError(items...)
   193  		}
   194  	}()
   195  	// Read field name -> format name
   196  	for _, fi := range state.FieldInfos.Values {
   197  		// log.Printf("Processing %v...", fi)
   198  		if fi.IsIndexed() {
   199  			fieldName := fi.Name
   200  			// log.Printf("Name: %v", fieldName)
   201  			if formatName := fi.Attribute(PER_FIELD_FORMAT_KEY); formatName != "" {
   202  				// log.Printf("Format: %v", formatName)
   203  				// null formatName means the field is in fieldInfos, but has no postings!
   204  				suffix := fi.Attribute(PER_FIELD_SUFFIX_KEY)
   205  				// log.Printf("Suffix: %v", suffix)
   206  				assert(suffix != "")
   207  				format := LoadPostingsFormat(formatName)
   208  				segmentSuffix := formatName + "_" + suffix
   209  				// log.Printf("Segment suffix: %v", segmentSuffix)
   210  				if _, ok := ans.formats[segmentSuffix]; !ok {
   211  					// log.Printf("Loading fields producer: %v", segmentSuffix)
   212  					newReadState := state // clone
   213  					newReadState.SegmentSuffix = formatName + "_" + suffix
   214  					fp, err = format.FieldsProducer(newReadState)
   215  					if err != nil {
   216  						return fp, err
   217  					}
   218  					ans.formats[segmentSuffix] = fp
   219  				}
   220  				ans.fields[fieldName] = ans.formats[segmentSuffix]
   221  			}
   222  		}
   223  	}
   224  	success = true
   225  	return &ans, nil
   226  }
   227  
   228  func (r *PerFieldPostingsReader) Terms(field string) Terms {
   229  	if p, ok := r.fields[field]; ok {
   230  		return p.Terms(field)
   231  	}
   232  	return nil
   233  }
   234  
   235  func (r *PerFieldPostingsReader) Close() error {
   236  	fps := make([]FieldsProducer, 0)
   237  	for _, v := range r.formats {
   238  		fps = append(fps, v)
   239  	}
   240  	items := make([]io.Closer, len(fps))
   241  	for i, v := range fps {
   242  		items[i] = v
   243  	}
   244  	return util.Close(items...)
   245  }