github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/fst/nodeHash.go (about)

     1  package fst
     2  
     3  import (
     4  	// "fmt"
     5  	"github.com/balzaczyy/golucene/core/util/packed"
     6  )
     7  
     8  /* Used to dedup states (lookup already-frozen states) */
     9  type NodeHash struct {
    10  	table      *packed.PagedGrowableWriter
    11  	count      int64
    12  	mask       int64
    13  	fst        *FST
    14  	scratchArc *Arc
    15  	in         BytesReader
    16  }
    17  
    18  func newNodeHash(fst *FST, in BytesReader) *NodeHash {
    19  	return &NodeHash{
    20  		table:      packed.NewPagedGrowableWriter(16, 1<<27, 8, packed.PackedInts.COMPACT),
    21  		mask:       15,
    22  		fst:        fst,
    23  		scratchArc: new(Arc),
    24  		in:         in,
    25  	}
    26  }
    27  
    28  func (nh *NodeHash) nodesEqual(node *UnCompiledNode, address int64) (bool, error) {
    29  	_, err := nh.fst.readFirstRealTargetArc(address, nh.scratchArc, nh.in)
    30  	if err != nil {
    31  		return false, err
    32  	}
    33  	if nh.scratchArc.bytesPerArc != 0 && node.NumArcs != nh.scratchArc.numArcs {
    34  		return false, nil
    35  	}
    36  	for arcUpto := 0; arcUpto < node.NumArcs; arcUpto++ {
    37  		if arc := node.Arcs[arcUpto]; arc.label != nh.scratchArc.Label ||
    38  			arc.output != nh.scratchArc.Output ||
    39  			arc.Target.(*CompiledNode).node != nh.scratchArc.target ||
    40  			arc.nextFinalOutput != nh.scratchArc.NextFinalOutput ||
    41  			arc.isFinal != nh.scratchArc.IsFinal() {
    42  			return false, nil
    43  		}
    44  
    45  		if nh.scratchArc.isLast() {
    46  			return arcUpto == node.NumArcs-1, nil
    47  		}
    48  		if _, err = nh.fst.readNextRealArc(nh.scratchArc, nh.in); err != nil {
    49  			return false, err
    50  		}
    51  	}
    52  	return false, err
    53  }
    54  
    55  const PRIME = 31
    56  
    57  /* hash code for an unfrozen node. This must be identical to the frozen case (below) !! */
    58  func (nh *NodeHash) hash(node *UnCompiledNode) int64 {
    59  	// fmt.Println("hash unfrozen")
    60  	h := int64(0)
    61  	for arcIdx := 0; arcIdx < node.NumArcs; arcIdx++ {
    62  		arc := node.Arcs[arcIdx]
    63  		// fmt.Printf("  label=%v target=%v h=%v output=%v isFinal?=%v\n",
    64  		// 	arc.label, arc.Target.(*CompiledNode).node, h,
    65  		// 	nh.fst.outputs.outputToString(arc.output), arc.isFinal)
    66  		h = PRIME*h + int64(arc.label)
    67  		n := arc.Target.(*CompiledNode).node
    68  		h = PRIME*h + int64(n^(n>>32))
    69  		h = PRIME*h + hashPtr(arc.output)
    70  		h = PRIME*h + hashPtr(arc.nextFinalOutput)
    71  		if arc.isFinal {
    72  			h += 17
    73  		}
    74  	}
    75  	// fmt.Printf("  ret %v\n", int32(h))
    76  	return h
    77  }
    78  
    79  /* hash code for a frozen node */
    80  func (nh *NodeHash) hashFrozen(node int64) (int64, error) {
    81  	// fmt.Printf("hash frozen node=%v\n", node)
    82  	h := int64(0)
    83  	_, err := nh.fst.readFirstRealTargetArc(node, nh.scratchArc, nh.in)
    84  	if err != nil {
    85  		return 0, err
    86  	}
    87  	for {
    88  		// fmt.Printf("  label=%v target=%v h=%v output=%v next?=%v final?=%v pos=%v\n",
    89  		// 	nh.scratchArc.Label, nh.scratchArc.target, h,
    90  		// 	nh.fst.outputs.outputToString(nh.scratchArc.Output),
    91  		// 	nh.scratchArc.flag(4), nh.scratchArc.IsFinal(), nh.in.getPosition())
    92  		h = PRIME*h + int64(nh.scratchArc.Label)
    93  		h = PRIME*h + int64(nh.scratchArc.target^(nh.scratchArc.target>>32))
    94  		h = PRIME*h + hashPtr(nh.scratchArc.Output)
    95  		h = PRIME*h + hashPtr(nh.scratchArc.NextFinalOutput)
    96  		if nh.scratchArc.IsFinal() {
    97  			h += 17
    98  		}
    99  		if nh.scratchArc.isLast() {
   100  			break
   101  		}
   102  		if _, err = nh.fst.readNextRealArc(nh.scratchArc, nh.in); err != nil {
   103  			return 0, err
   104  		}
   105  	}
   106  	// fmt.Printf("  ret %v\n", int32(h))
   107  	return h, nil
   108  }
   109  
   110  func hashPtr(obj interface{}) (h int64) {
   111  	if obj != nil && obj != NO_OUTPUT {
   112  		for _, b := range obj.([]byte) {
   113  			h = PRIME*h + int64(b)
   114  		}
   115  	}
   116  	return
   117  }
   118  
   119  func (nh *NodeHash) add(nodeIn *UnCompiledNode) (int64, error) {
   120  	// fmt.Printf("hash: add count=%v vs %v mask=%v\n", nh.count, nh.table.Size(), nh.mask)
   121  	h := nh.hash(nodeIn)
   122  	pos := h & nh.mask
   123  	c := int64(0)
   124  	for {
   125  		v := nh.table.Get(pos)
   126  		if v == 0 {
   127  			// freeze & add
   128  			node, err := nh.fst.addNode(nodeIn)
   129  			if err != nil {
   130  				return 0, err
   131  			}
   132  			// fmt.Printf("  now freeze node=%v\n", node)
   133  			h2, err := nh.hashFrozen(node)
   134  			if err != nil {
   135  				return 0, err
   136  			}
   137  			assert2(h2 == h, "frozenHash=%v vs h=%v", h2, h)
   138  			nh.count++
   139  			nh.table.Set(pos, node)
   140  			// rehash at 2/3 occupancy:
   141  			if nh.count > 2*nh.table.Size()/3 {
   142  				panic("not implemented yet")
   143  			}
   144  			return node, nil
   145  		} else {
   146  			ok, err := nh.nodesEqual(nodeIn, v)
   147  			if err != nil {
   148  				return 0, err
   149  			}
   150  			if ok {
   151  				// same node is already here
   152  				return v, nil
   153  			}
   154  		}
   155  
   156  		// quadratic probe
   157  		c++
   158  		pos = (pos + c) & nh.mask
   159  	}
   160  }