github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/fst/outputs.go (about)

     1  package fst
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/balzaczyy/golucene/core/util"
     6  )
     7  
     8  // util/fst/Outputs.java
     9  
    10  /**
    11   * Represents the outputs for an FST, providing the basic
    12   * algebra required for building and traversing the FST.
    13   *
    14   * <p>Note that any operation that returns NO_OUTPUT must
    15   * return the same singleton object from {@link
    16   * #getNoOutput}.</p>
    17   */
    18  type Outputs interface {
    19  	// Eg common("foobar", "food") -> "foo"
    20  	Common(output1, output2 interface{}) interface{}
    21  	// Eg subtract("foobar", "foo") -> "bar"
    22  	Subtract(output1, output2 interface{}) interface{}
    23  	/** Eg add("foo", "bar") -> "foobar" */
    24  	Add(prefix interface{}, output interface{}) interface{}
    25  	// Encode an output value into a DataOutput.
    26  	Write(interface{}, util.DataOutput) error
    27  	// Encode an final node output value into a DataOutput. By default
    28  	// this just calls write()
    29  	writeFinalOutput(interface{}, util.DataOutput) error
    30  	/** Decode an output value previously written with {@link
    31  	 *  #write(Object, DataOutput)}. */
    32  	Read(in util.DataInput) (e interface{}, err error)
    33  	// Skip the output; defaults to just calling Read() and discarding the result
    34  	SkipOutput(util.DataInput) error
    35  	/** Decode an output value previously written with {@link
    36  	 *  #writeFinalOutput(Object, DataOutput)}.  By default this
    37  	 *  just calls {@link #read(DataInput)}. */
    38  	ReadFinalOutput(in util.DataInput) (e interface{}, err error)
    39  	// Skip the output previously written with WriteFinalOutput;
    40  	// defaults to just calling ReadFinalOutput and discarding the
    41  	// result.
    42  	SkipFinalOutput(util.DataInput) error
    43  	/** NOTE: this output is compared with == so you must
    44  	 *  ensure that all methods return the single object if
    45  	 *  it's really no output */
    46  	NoOutput() interface{}
    47  	outputToString(interface{}) string
    48  	merge(first, second interface{}) interface{}
    49  	ramBytesUsed(interface{}) int64
    50  }
    51  
    52  type iOutputsReader interface {
    53  	Read(in util.DataInput) (e interface{}, err error)
    54  	Write(interface{}, util.DataOutput) error
    55  }
    56  
    57  type abstractOutputs struct {
    58  	spi iOutputsReader
    59  }
    60  
    61  func (out *abstractOutputs) writeFinalOutput(output interface{}, o util.DataOutput) error {
    62  	return out.spi.Write(output, o)
    63  }
    64  
    65  func (out *abstractOutputs) SkipOutput(in util.DataInput) error {
    66  	_, err := out.spi.Read(in)
    67  	return err
    68  }
    69  
    70  /* Decode an output value previously written with writeFinalOutput(). By default this just calls read(). */
    71  func (out *abstractOutputs) ReadFinalOutput(in util.DataInput) (e interface{}, err error) {
    72  	return out.spi.Read(in)
    73  }
    74  
    75  func (out *abstractOutputs) SkipFinalOutput(in util.DataInput) error {
    76  	return out.SkipOutput(in)
    77  }
    78  
    79  func (out *abstractOutputs) merge(first, second interface{}) interface{} {
    80  	panic("not supported yet")
    81  }
    82  
    83  var NO_OUTPUT = newNoOutputs()
    84  
    85  /* A nil FST Outputs implementation; use this if you just want to build an FSA. */
    86  type NoOutputs struct {
    87  	*abstractOutputs
    88  }
    89  
    90  func newNoOutputs() *NoOutputs {
    91  	ans := &NoOutputs{}
    92  	ans.abstractOutputs = &abstractOutputs{ans}
    93  	return ans
    94  }
    95  
    96  func (o *NoOutputs) Common(output1, output2 interface{}) interface{} {
    97  	panic("niy")
    98  }
    99  
   100  func (o *NoOutputs) Subtract(output1, output2 interface{}) interface{} {
   101  	assert(output1 == NO_OUTPUT)
   102  	assert(output2 == NO_OUTPUT)
   103  	return NO_OUTPUT
   104  }
   105  
   106  func (o *NoOutputs) Add(prefix, output interface{}) interface{} {
   107  	panic("not implemented yet")
   108  }
   109  
   110  func (o *NoOutputs) merge(first, second interface{}) interface{} {
   111  	assert(first == NO_OUTPUT)
   112  	assert(second == NO_OUTPUT)
   113  	return NO_OUTPUT
   114  }
   115  
   116  func (o *NoOutputs) Write(prefix interface{}, out util.DataOutput) error {
   117  	return nil
   118  }
   119  
   120  func (o *NoOutputs) Read(in util.DataInput) (interface{}, error) {
   121  	return NO_OUTPUT, nil
   122  }
   123  
   124  func (o *NoOutputs) NoOutput() interface{} {
   125  	return NO_OUTPUT
   126  }
   127  
   128  func (o *NoOutputs) outputToString(output interface{}) string {
   129  	return ""
   130  }
   131  
   132  // fst/ByteSequenceOutputs.java
   133  
   134  /**
   135   * An FST {@link Outputs} implementation where each output
   136   * is a sequence of bytes.
   137   */
   138  type ByteSequenceOutputs struct {
   139  	*abstractOutputs
   140  }
   141  
   142  var oneByteSequenceOutputs *ByteSequenceOutputs
   143  
   144  func ByteSequenceOutputsSingleton() *ByteSequenceOutputs {
   145  	if oneByteSequenceOutputs == nil {
   146  		oneByteSequenceOutputs = &ByteSequenceOutputs{}
   147  		oneByteSequenceOutputs.abstractOutputs = &abstractOutputs{oneByteSequenceOutputs}
   148  	}
   149  	return oneByteSequenceOutputs
   150  }
   151  
   152  func (out *ByteSequenceOutputs) Common(_output1, _output2 interface{}) interface{} {
   153  	assert(_output1 != nil)
   154  	assert(_output2 != nil)
   155  
   156  	if _output1 == NO_OUTPUT || _output2 == NO_OUTPUT {
   157  		return NO_OUTPUT
   158  	}
   159  
   160  	output1, output2 := _output1.([]byte), _output2.([]byte)
   161  	pos1, pos2 := 0, 0
   162  	stopAt1 := len(output1)
   163  	if len(output2) < stopAt1 {
   164  		stopAt1 = len(output2)
   165  	}
   166  	for pos1 < stopAt1 {
   167  		if output1[pos1] != output2[pos2] {
   168  			break
   169  		}
   170  		pos1++
   171  		pos2++
   172  	}
   173  
   174  	if pos1 == 0 {
   175  		// no common prefix
   176  		return NO_OUTPUT
   177  	} else if pos1 == len(output1) {
   178  		// output1 is a prefix of output2
   179  		return output1
   180  	} else if pos2 == len(output2) {
   181  		// outpu2 is a prefix of output1
   182  		return output2
   183  	} else {
   184  		return output1[:pos1]
   185  	}
   186  }
   187  
   188  func (out *ByteSequenceOutputs) Subtract(_output, _inc interface{}) interface{} {
   189  	assert(_output != nil)
   190  	assert(_inc != nil)
   191  	if _output == NO_OUTPUT || _inc == NO_OUTPUT {
   192  		// no prefix removed
   193  		return _output
   194  	}
   195  	output, inc := _output.([]byte), _inc.([]byte)
   196  	assert(util.StartsWith(output, inc))
   197  	if len(inc) == len(output) {
   198  		// entire output removed
   199  		return NO_OUTPUT
   200  	}
   201  	assert2(len(inc) < len(output), "len(inc)=%v vs len(output)=%v", len(inc), len(output))
   202  	assert(len(inc) > 0)
   203  	return output[len(inc):]
   204  }
   205  
   206  func (out *ByteSequenceOutputs) Add(_prefix interface{}, _output interface{}) interface{} {
   207  	assert(_prefix != nil)
   208  	assert(_output != nil)
   209  	if _prefix == NO_OUTPUT {
   210  		return _output
   211  	} else if _output == NO_OUTPUT {
   212  		return _prefix
   213  	} else {
   214  		prefix, output := _prefix.([]byte), _output.([]byte)
   215  		assert(len(prefix) > 0)
   216  		assert(len(output) > 0)
   217  		result := make([]byte, len(prefix)+len(output))
   218  		copy(result, prefix)
   219  		copy(result[len(prefix):], output)
   220  		return result
   221  	}
   222  }
   223  
   224  func (o *ByteSequenceOutputs) Write(obj interface{}, out util.DataOutput) error {
   225  	assert(obj != nil)
   226  	prefix, ok := obj.([]byte)
   227  	assert(ok)
   228  	err := out.WriteVInt(int32(len(prefix)))
   229  	if err == nil {
   230  		err = out.WriteBytes(prefix)
   231  	}
   232  	return err
   233  }
   234  
   235  func (out *ByteSequenceOutputs) Read(in util.DataInput) (e interface{}, err error) {
   236  	if length, err := in.ReadVInt(); err == nil {
   237  		// fmt.Printf("Length: %v\n", length)
   238  		if length == 0 {
   239  			e = out.NoOutput()
   240  		} else {
   241  			buf := make([]byte, length)
   242  			e = buf
   243  			err = in.ReadBytes(buf)
   244  		}
   245  	} else {
   246  		fmt.Printf("Failed to read length due to %v", err)
   247  	}
   248  	return e, err
   249  }
   250  
   251  func (out *ByteSequenceOutputs) NoOutput() interface{} {
   252  	return NO_OUTPUT
   253  }
   254  
   255  func (out *ByteSequenceOutputs) outputToString(output interface{}) string {
   256  	return fmt.Sprintf("%v", output)
   257  }
   258  
   259  func (out *ByteSequenceOutputs) String() string {
   260  	return "ByteSequenceOutputs"
   261  }
   262  
   263  var BASE_NUM_BYTES = util.ShallowSizeOf(NO_OUTPUT)
   264  
   265  func (out *ByteSequenceOutputs) ramBytesUsed(output interface{}) int64 {
   266  	return BASE_NUM_BYTES + util.SizeOf(output.([]byte))
   267  }
   268  
   269  // util/fst/Util.java
   270  
   271  /** Looks up the output for this input, or null if the
   272   *  input is not accepted */
   273  func GetFSTOutput(fst *FST, input []byte) (output interface{}, err error) {
   274  	if fst.inputType != INPUT_TYPE_BYTE1 {
   275  		panic("assert fail")
   276  	}
   277  	fstReader := fst.BytesReader()
   278  	// TODO: would be nice not to alloc this on every lookup
   279  	arc := fst.FirstArc(&Arc{})
   280  
   281  	// Accumulate output as we go
   282  	output = fst.outputs.NoOutput()
   283  	for _, v := range input {
   284  		ret, err := fst.FindTargetArc(int(v), arc, arc, fstReader)
   285  		if ret == nil || err != nil {
   286  			return ret, err
   287  		}
   288  		output = fst.outputs.Add(output, arc.Output)
   289  	}
   290  
   291  	if arc.IsFinal() {
   292  		return fst.outputs.Add(output, arc.NextFinalOutput), nil
   293  	} else {
   294  		return nil, nil
   295  	}
   296  }