github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/fst/outputs.go (about) 1 package fst 2 3 import ( 4 "fmt" 5 "github.com/balzaczyy/golucene/core/util" 6 ) 7 8 // util/fst/Outputs.java 9 10 /** 11 * Represents the outputs for an FST, providing the basic 12 * algebra required for building and traversing the FST. 13 * 14 * <p>Note that any operation that returns NO_OUTPUT must 15 * return the same singleton object from {@link 16 * #getNoOutput}.</p> 17 */ 18 type Outputs interface { 19 // Eg common("foobar", "food") -> "foo" 20 Common(output1, output2 interface{}) interface{} 21 // Eg subtract("foobar", "foo") -> "bar" 22 Subtract(output1, output2 interface{}) interface{} 23 /** Eg add("foo", "bar") -> "foobar" */ 24 Add(prefix interface{}, output interface{}) interface{} 25 // Encode an output value into a DataOutput. 26 Write(interface{}, util.DataOutput) error 27 // Encode an final node output value into a DataOutput. By default 28 // this just calls write() 29 writeFinalOutput(interface{}, util.DataOutput) error 30 /** Decode an output value previously written with {@link 31 * #write(Object, DataOutput)}. */ 32 Read(in util.DataInput) (e interface{}, err error) 33 // Skip the output; defaults to just calling Read() and discarding the result 34 SkipOutput(util.DataInput) error 35 /** Decode an output value previously written with {@link 36 * #writeFinalOutput(Object, DataOutput)}. By default this 37 * just calls {@link #read(DataInput)}. */ 38 ReadFinalOutput(in util.DataInput) (e interface{}, err error) 39 // Skip the output previously written with WriteFinalOutput; 40 // defaults to just calling ReadFinalOutput and discarding the 41 // result. 42 SkipFinalOutput(util.DataInput) error 43 /** NOTE: this output is compared with == so you must 44 * ensure that all methods return the single object if 45 * it's really no output */ 46 NoOutput() interface{} 47 outputToString(interface{}) string 48 merge(first, second interface{}) interface{} 49 ramBytesUsed(interface{}) int64 50 } 51 52 type iOutputsReader interface { 53 Read(in util.DataInput) (e interface{}, err error) 54 Write(interface{}, util.DataOutput) error 55 } 56 57 type abstractOutputs struct { 58 spi iOutputsReader 59 } 60 61 func (out *abstractOutputs) writeFinalOutput(output interface{}, o util.DataOutput) error { 62 return out.spi.Write(output, o) 63 } 64 65 func (out *abstractOutputs) SkipOutput(in util.DataInput) error { 66 _, err := out.spi.Read(in) 67 return err 68 } 69 70 /* Decode an output value previously written with writeFinalOutput(). By default this just calls read(). */ 71 func (out *abstractOutputs) ReadFinalOutput(in util.DataInput) (e interface{}, err error) { 72 return out.spi.Read(in) 73 } 74 75 func (out *abstractOutputs) SkipFinalOutput(in util.DataInput) error { 76 return out.SkipOutput(in) 77 } 78 79 func (out *abstractOutputs) merge(first, second interface{}) interface{} { 80 panic("not supported yet") 81 } 82 83 var NO_OUTPUT = newNoOutputs() 84 85 /* A nil FST Outputs implementation; use this if you just want to build an FSA. */ 86 type NoOutputs struct { 87 *abstractOutputs 88 } 89 90 func newNoOutputs() *NoOutputs { 91 ans := &NoOutputs{} 92 ans.abstractOutputs = &abstractOutputs{ans} 93 return ans 94 } 95 96 func (o *NoOutputs) Common(output1, output2 interface{}) interface{} { 97 panic("niy") 98 } 99 100 func (o *NoOutputs) Subtract(output1, output2 interface{}) interface{} { 101 assert(output1 == NO_OUTPUT) 102 assert(output2 == NO_OUTPUT) 103 return NO_OUTPUT 104 } 105 106 func (o *NoOutputs) Add(prefix, output interface{}) interface{} { 107 panic("not implemented yet") 108 } 109 110 func (o *NoOutputs) merge(first, second interface{}) interface{} { 111 assert(first == NO_OUTPUT) 112 assert(second == NO_OUTPUT) 113 return NO_OUTPUT 114 } 115 116 func (o *NoOutputs) Write(prefix interface{}, out util.DataOutput) error { 117 return nil 118 } 119 120 func (o *NoOutputs) Read(in util.DataInput) (interface{}, error) { 121 return NO_OUTPUT, nil 122 } 123 124 func (o *NoOutputs) NoOutput() interface{} { 125 return NO_OUTPUT 126 } 127 128 func (o *NoOutputs) outputToString(output interface{}) string { 129 return "" 130 } 131 132 // fst/ByteSequenceOutputs.java 133 134 /** 135 * An FST {@link Outputs} implementation where each output 136 * is a sequence of bytes. 137 */ 138 type ByteSequenceOutputs struct { 139 *abstractOutputs 140 } 141 142 var oneByteSequenceOutputs *ByteSequenceOutputs 143 144 func ByteSequenceOutputsSingleton() *ByteSequenceOutputs { 145 if oneByteSequenceOutputs == nil { 146 oneByteSequenceOutputs = &ByteSequenceOutputs{} 147 oneByteSequenceOutputs.abstractOutputs = &abstractOutputs{oneByteSequenceOutputs} 148 } 149 return oneByteSequenceOutputs 150 } 151 152 func (out *ByteSequenceOutputs) Common(_output1, _output2 interface{}) interface{} { 153 assert(_output1 != nil) 154 assert(_output2 != nil) 155 156 if _output1 == NO_OUTPUT || _output2 == NO_OUTPUT { 157 return NO_OUTPUT 158 } 159 160 output1, output2 := _output1.([]byte), _output2.([]byte) 161 pos1, pos2 := 0, 0 162 stopAt1 := len(output1) 163 if len(output2) < stopAt1 { 164 stopAt1 = len(output2) 165 } 166 for pos1 < stopAt1 { 167 if output1[pos1] != output2[pos2] { 168 break 169 } 170 pos1++ 171 pos2++ 172 } 173 174 if pos1 == 0 { 175 // no common prefix 176 return NO_OUTPUT 177 } else if pos1 == len(output1) { 178 // output1 is a prefix of output2 179 return output1 180 } else if pos2 == len(output2) { 181 // outpu2 is a prefix of output1 182 return output2 183 } else { 184 return output1[:pos1] 185 } 186 } 187 188 func (out *ByteSequenceOutputs) Subtract(_output, _inc interface{}) interface{} { 189 assert(_output != nil) 190 assert(_inc != nil) 191 if _output == NO_OUTPUT || _inc == NO_OUTPUT { 192 // no prefix removed 193 return _output 194 } 195 output, inc := _output.([]byte), _inc.([]byte) 196 assert(util.StartsWith(output, inc)) 197 if len(inc) == len(output) { 198 // entire output removed 199 return NO_OUTPUT 200 } 201 assert2(len(inc) < len(output), "len(inc)=%v vs len(output)=%v", len(inc), len(output)) 202 assert(len(inc) > 0) 203 return output[len(inc):] 204 } 205 206 func (out *ByteSequenceOutputs) Add(_prefix interface{}, _output interface{}) interface{} { 207 assert(_prefix != nil) 208 assert(_output != nil) 209 if _prefix == NO_OUTPUT { 210 return _output 211 } else if _output == NO_OUTPUT { 212 return _prefix 213 } else { 214 prefix, output := _prefix.([]byte), _output.([]byte) 215 assert(len(prefix) > 0) 216 assert(len(output) > 0) 217 result := make([]byte, len(prefix)+len(output)) 218 copy(result, prefix) 219 copy(result[len(prefix):], output) 220 return result 221 } 222 } 223 224 func (o *ByteSequenceOutputs) Write(obj interface{}, out util.DataOutput) error { 225 assert(obj != nil) 226 prefix, ok := obj.([]byte) 227 assert(ok) 228 err := out.WriteVInt(int32(len(prefix))) 229 if err == nil { 230 err = out.WriteBytes(prefix) 231 } 232 return err 233 } 234 235 func (out *ByteSequenceOutputs) Read(in util.DataInput) (e interface{}, err error) { 236 if length, err := in.ReadVInt(); err == nil { 237 // fmt.Printf("Length: %v\n", length) 238 if length == 0 { 239 e = out.NoOutput() 240 } else { 241 buf := make([]byte, length) 242 e = buf 243 err = in.ReadBytes(buf) 244 } 245 } else { 246 fmt.Printf("Failed to read length due to %v", err) 247 } 248 return e, err 249 } 250 251 func (out *ByteSequenceOutputs) NoOutput() interface{} { 252 return NO_OUTPUT 253 } 254 255 func (out *ByteSequenceOutputs) outputToString(output interface{}) string { 256 return fmt.Sprintf("%v", output) 257 } 258 259 func (out *ByteSequenceOutputs) String() string { 260 return "ByteSequenceOutputs" 261 } 262 263 var BASE_NUM_BYTES = util.ShallowSizeOf(NO_OUTPUT) 264 265 func (out *ByteSequenceOutputs) ramBytesUsed(output interface{}) int64 { 266 return BASE_NUM_BYTES + util.SizeOf(output.([]byte)) 267 } 268 269 // util/fst/Util.java 270 271 /** Looks up the output for this input, or null if the 272 * input is not accepted */ 273 func GetFSTOutput(fst *FST, input []byte) (output interface{}, err error) { 274 if fst.inputType != INPUT_TYPE_BYTE1 { 275 panic("assert fail") 276 } 277 fstReader := fst.BytesReader() 278 // TODO: would be nice not to alloc this on every lookup 279 arc := fst.FirstArc(&Arc{}) 280 281 // Accumulate output as we go 282 output = fst.outputs.NoOutput() 283 for _, v := range input { 284 ret, err := fst.FindTargetArc(int(v), arc, arc, fstReader) 285 if ret == nil || err != nil { 286 return ret, err 287 } 288 output = fst.outputs.Add(output, arc.Output) 289 } 290 291 if arc.IsFinal() { 292 return fst.outputs.Add(output, arc.NextFinalOutput), nil 293 } else { 294 return nil, nil 295 } 296 }