github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/fst/nodeHash.go (about) 1 package fst 2 3 import ( 4 // "fmt" 5 "github.com/balzaczyy/golucene/core/util/packed" 6 ) 7 8 /* Used to dedup states (lookup already-frozen states) */ 9 type NodeHash struct { 10 table *packed.PagedGrowableWriter 11 count int64 12 mask int64 13 fst *FST 14 scratchArc *Arc 15 in BytesReader 16 } 17 18 func newNodeHash(fst *FST, in BytesReader) *NodeHash { 19 return &NodeHash{ 20 table: packed.NewPagedGrowableWriter(16, 1<<27, 8, packed.PackedInts.COMPACT), 21 mask: 15, 22 fst: fst, 23 scratchArc: new(Arc), 24 in: in, 25 } 26 } 27 28 func (nh *NodeHash) nodesEqual(node *UnCompiledNode, address int64) (bool, error) { 29 _, err := nh.fst.readFirstRealTargetArc(address, nh.scratchArc, nh.in) 30 if err != nil { 31 return false, err 32 } 33 if nh.scratchArc.bytesPerArc != 0 && node.NumArcs != nh.scratchArc.numArcs { 34 return false, nil 35 } 36 for arcUpto := 0; arcUpto < node.NumArcs; arcUpto++ { 37 if arc := node.Arcs[arcUpto]; arc.label != nh.scratchArc.Label || 38 arc.output != nh.scratchArc.Output || 39 arc.Target.(*CompiledNode).node != nh.scratchArc.target || 40 arc.nextFinalOutput != nh.scratchArc.NextFinalOutput || 41 arc.isFinal != nh.scratchArc.IsFinal() { 42 return false, nil 43 } 44 45 if nh.scratchArc.isLast() { 46 return arcUpto == node.NumArcs-1, nil 47 } 48 if _, err = nh.fst.readNextRealArc(nh.scratchArc, nh.in); err != nil { 49 return false, err 50 } 51 } 52 return false, err 53 } 54 55 const PRIME = 31 56 57 /* hash code for an unfrozen node. This must be identical to the frozen case (below) !! */ 58 func (nh *NodeHash) hash(node *UnCompiledNode) int64 { 59 // fmt.Println("hash unfrozen") 60 h := int64(0) 61 for arcIdx := 0; arcIdx < node.NumArcs; arcIdx++ { 62 arc := node.Arcs[arcIdx] 63 // fmt.Printf(" label=%v target=%v h=%v output=%v isFinal?=%v\n", 64 // arc.label, arc.Target.(*CompiledNode).node, h, 65 // nh.fst.outputs.outputToString(arc.output), arc.isFinal) 66 h = PRIME*h + int64(arc.label) 67 n := arc.Target.(*CompiledNode).node 68 h = PRIME*h + int64(n^(n>>32)) 69 h = PRIME*h + hashPtr(arc.output) 70 h = PRIME*h + hashPtr(arc.nextFinalOutput) 71 if arc.isFinal { 72 h += 17 73 } 74 } 75 // fmt.Printf(" ret %v\n", int32(h)) 76 return h 77 } 78 79 /* hash code for a frozen node */ 80 func (nh *NodeHash) hashFrozen(node int64) (int64, error) { 81 // fmt.Printf("hash frozen node=%v\n", node) 82 h := int64(0) 83 _, err := nh.fst.readFirstRealTargetArc(node, nh.scratchArc, nh.in) 84 if err != nil { 85 return 0, err 86 } 87 for { 88 // fmt.Printf(" label=%v target=%v h=%v output=%v next?=%v final?=%v pos=%v\n", 89 // nh.scratchArc.Label, nh.scratchArc.target, h, 90 // nh.fst.outputs.outputToString(nh.scratchArc.Output), 91 // nh.scratchArc.flag(4), nh.scratchArc.IsFinal(), nh.in.getPosition()) 92 h = PRIME*h + int64(nh.scratchArc.Label) 93 h = PRIME*h + int64(nh.scratchArc.target^(nh.scratchArc.target>>32)) 94 h = PRIME*h + hashPtr(nh.scratchArc.Output) 95 h = PRIME*h + hashPtr(nh.scratchArc.NextFinalOutput) 96 if nh.scratchArc.IsFinal() { 97 h += 17 98 } 99 if nh.scratchArc.isLast() { 100 break 101 } 102 if _, err = nh.fst.readNextRealArc(nh.scratchArc, nh.in); err != nil { 103 return 0, err 104 } 105 } 106 // fmt.Printf(" ret %v\n", int32(h)) 107 return h, nil 108 } 109 110 func hashPtr(obj interface{}) (h int64) { 111 if obj != nil && obj != NO_OUTPUT { 112 for _, b := range obj.([]byte) { 113 h = PRIME*h + int64(b) 114 } 115 } 116 return 117 } 118 119 func (nh *NodeHash) add(nodeIn *UnCompiledNode) (int64, error) { 120 // fmt.Printf("hash: add count=%v vs %v mask=%v\n", nh.count, nh.table.Size(), nh.mask) 121 h := nh.hash(nodeIn) 122 pos := h & nh.mask 123 c := int64(0) 124 for { 125 v := nh.table.Get(pos) 126 if v == 0 { 127 // freeze & add 128 node, err := nh.fst.addNode(nodeIn) 129 if err != nil { 130 return 0, err 131 } 132 // fmt.Printf(" now freeze node=%v\n", node) 133 h2, err := nh.hashFrozen(node) 134 if err != nil { 135 return 0, err 136 } 137 assert2(h2 == h, "frozenHash=%v vs h=%v", h2, h) 138 nh.count++ 139 nh.table.Set(pos, node) 140 // rehash at 2/3 occupancy: 141 if nh.count > 2*nh.table.Size()/3 { 142 panic("not implemented yet") 143 } 144 return node, nil 145 } else { 146 ok, err := nh.nodesEqual(nodeIn, v) 147 if err != nil { 148 return 0, err 149 } 150 if ok { 151 // same node is already here 152 return v, nil 153 } 154 } 155 156 // quadratic probe 157 c++ 158 pos = (pos + c) & nh.mask 159 } 160 }