github.com/vuuihc/gocedar@v0.1.0/cedar.go (about) 1 // Copyright 2016 Evans. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cedar 6 7 import ( 8 "os" 9 "unsafe" 10 ) 11 12 const ( 13 maxMemStep = 1 << 30 14 ) 15 16 // NInfo stores the information about the trie 17 type NInfo struct { 18 sibling, child byte // uint8 19 } 20 21 // Node contains the array of `base` and `check` as specified in the paper: 22 // "An efficient implementation of trie structures" 23 // https://dl.acm.org/citation.cfm?id=146691 24 type Node struct { 25 baseV, check int // int32 26 } 27 28 func (n *Node) base(reduced ...bool) int { 29 if !isReduced(reduced...) { 30 return n.baseV 31 } 32 33 return -(n.baseV + 1) 34 } 35 36 // Block stores the linked-list pointers and the stats info for blocks. 37 // 38 // Because of type conversion, this version all int16 and int32 uses int, 39 // witch will be optimized in the next version. 40 type Block struct { 41 prev int // int32 // previous block's index, 3 bytes width 42 next int // next block's index, 3 bytes width 43 num int // the number of slots that is free, the range is 0-256 44 reject int // a heuristic number to make the search for free space faster... 45 trial int // the number of times this block has been probed by `find_places` for the free block. 46 eHead int // the index of the first empty elemenet in this block 47 } 48 49 func (b *Block) init() { 50 b.num = 256 // each of block has 256 free slots at the beginning 51 b.reject = 257 // initially every block need to be fully iterated through so that we can reject it to be unusable. 52 } 53 54 // Cedar holds all of the information about double array trie. 55 type Cedar struct { 56 mmap *MMap 57 *MetaInfo 58 59 // Reduced option the reduced trie 60 // Reduced bool 61 62 array []Node // storing the `base` and `check` info from the original paper. 63 nInfos []NInfo 64 blocks []Block 65 // reject [257]int 66 67 // blocksHeadFull int // the index of the first 'Full' block, 0 means no 'Full' block 68 // blocksHeadClosed int // the index of the first 'Closed' block, 0 means no ' Closed' block 69 // blocksHeadOpen int // the index of the first 'Open' block, 0 means no 'Open' block 70 71 // capacity int 72 // size int 73 // ordered bool 74 // maxTrial int // the parameter for cedar, it could be tuned for more, but the default is 1. 75 } 76 77 const ( 78 // ValLimit cedar value limit 79 ValLimit = int(^uint(0) >> 1) 80 // NoVal not have value 81 NoVal = -1 82 ) 83 84 // type PrefixIter struct { 85 // } 86 87 type Options struct { 88 Reduced bool 89 UseMMap bool 90 MMapPath string 91 } 92 93 // New initialize the Cedar for further use 94 func New(opt *Options) *Cedar { 95 cd := &Cedar{} 96 if opt.UseMMap { 97 if len(opt.MMapPath) == 0 { 98 opt.MMapPath = os.TempDir() 99 } 100 mmap := NewMMap(opt.MMapPath) 101 mmap.InitData(cd) 102 cd.useMMap = true 103 } else { 104 cd.MetaInfo = &MetaInfo{} 105 cd.array = make([]Node, 256) 106 cd.nInfos = make([]NInfo, 256) 107 cd.blocks = make([]Block, 1) 108 } 109 if cd.LoadSize > 0 { // if there is data in mmap, do not need init meta 110 return cd 111 } 112 cd.Reduced = isReduced(opt.Reduced) 113 cd.capacity = 256 114 cd.size = 256 115 cd.ordered = true 116 cd.maxTrial = 1 117 118 if !cd.Reduced { 119 cd.array[0] = Node{baseV: 0, check: -1} 120 } else { 121 cd.array[0] = Node{baseV: -1, check: -1} 122 } 123 // make `baseV` point to the previous element, and make `check` point to the next element 124 for i := 1; i < 256; i++ { 125 cd.array[i] = Node{baseV: -(i - 1), check: -(i + 1)} 126 } 127 // make them link as a cyclic doubly-linked list 128 cd.array[1].baseV = -255 129 cd.array[255].check = -1 130 131 cd.blocks[0].eHead = 1 132 cd.blocks[0].init() 133 134 for i := 0; i <= 256; i++ { 135 cd.reject[i] = i + 1 136 } 137 138 return cd 139 } 140 141 // follow To move in the trie by following the `label`, and insert the node if the node is not there, 142 // it is used by the `update` to populate the trie. 143 func (cd *Cedar) follow(from int, label byte) (to int) { 144 base := cd.array[from].base(cd.Reduced) 145 146 // the node is not there 147 to = base ^ int(label) 148 if base < 0 || cd.array[to].check < 0 { 149 // allocate a e node 150 to = cd.popENode(base, from, label) 151 branch := to ^ int(label) 152 153 // maintain the info in ninfo 154 cd.pushSibling(from, branch, label, base >= 0) 155 return 156 } 157 158 // the node is already there and the ownership is not `from`, 159 // therefore a conflict. 160 if cd.array[to].check != from { 161 // call `resolve` to relocate. 162 to = cd.resolve(from, base, label) 163 } 164 165 return 166 } 167 168 // Mark an edge `e` as used in a trie node. 169 // pop empty node from block; never transfer the special block (idx = 0) 170 // nolint 171 func (cd *Cedar) popENode(base, from int, label byte) int { 172 e := base ^ int(label) 173 if base < 0 { 174 e = cd.findPlace() 175 } 176 177 idx := e >> 8 178 arr := &cd.array[e] 179 180 b := &cd.blocks[idx] 181 b.num-- 182 // move the block at idx to the correct linked-list depending the free slots it still have. 183 if b.num == 0 { 184 if idx != 0 { 185 // Closed to Full 186 cd.transferBlock(idx, &cd.blocksHeadClosed, &cd.blocksHeadFull) 187 } 188 } else { 189 // release empty node from empty ring 190 cd.array[-arr.baseV].check = arr.check 191 cd.array[-arr.check].baseV = arr.baseV 192 193 if e == b.eHead { 194 b.eHead = -arr.check 195 } 196 197 if idx != 0 && b.num == 1 && b.trial != cd.maxTrial { 198 // Open to Closed 199 cd.transferBlock(idx, &cd.blocksHeadOpen, &cd.blocksHeadClosed) 200 } 201 } 202 203 // initialize the released node 204 if !cd.Reduced { 205 if label != 0 { 206 cd.array[e].baseV = -1 207 } else { 208 cd.array[e].baseV = 0 209 } 210 cd.array[e].check = from 211 if base < 0 { 212 cd.array[from].baseV = e ^ int(label) 213 } 214 215 return e 216 } 217 218 cd.array[e].baseV = ValLimit 219 cd.array[e].check = from 220 if base < 0 { 221 cd.array[from].baseV = -(e ^ int(label)) - 1 222 } 223 224 return e 225 } 226 227 // Mark an edge `e` as free in a trie node. 228 // push empty node into empty ring 229 // nolint 230 func (cd *Cedar) pushENode(e int) { 231 idx := e >> 8 232 b := &cd.blocks[idx] 233 b.num++ 234 235 if b.num == 1 { 236 b.eHead = e 237 cd.array[e] = Node{baseV: -e, check: -e} 238 239 if idx != 0 { 240 // Move the block from 'Full' to 'Closed' since it has one free slot now. 241 cd.transferBlock(idx, &cd.blocksHeadFull, &cd.blocksHeadClosed) 242 } 243 } else { 244 prev := b.eHead 245 next := -cd.array[prev].check 246 247 // Insert to the edge immediately after the e_head 248 cd.array[e] = Node{baseV: -prev, check: -next} 249 250 cd.array[prev].check = -e 251 cd.array[next].baseV = -e 252 253 // Move the block from 'Closed' to 'Open' since it has more than one free slot now. 254 if b.num == 2 || b.trial == cd.maxTrial { 255 if idx != 0 { 256 // Closed to Open 257 cd.transferBlock(idx, &cd.blocksHeadClosed, &cd.blocksHeadOpen) 258 } 259 } 260 261 // Reset the trial stats 262 b.trial = 0 263 } 264 265 if b.reject < cd.reject[b.num] { 266 b.reject = cd.reject[b.num] 267 } 268 // reset ninfo; no child, no sibling 269 cd.nInfos[e] = NInfo{} 270 } 271 272 // push the `label` into the sibling chain 273 // to from's child 274 func (cd *Cedar) pushSibling(from, base int, label byte, hasChild bool) { 275 c := &cd.nInfos[from].child 276 keepOrder := *c == 0 277 if cd.ordered { 278 keepOrder = label > *c 279 } 280 281 if hasChild && keepOrder { 282 c = &cd.nInfos[base^int(*c)].sibling 283 for cd.ordered && *c != 0 && *c < label { 284 c = &cd.nInfos[base^int(*c)].sibling 285 } 286 } 287 cd.nInfos[base^int(label)].sibling = *c 288 *c = label 289 } 290 291 // remove the `label` from the sibling chain. 292 func (cd *Cedar) popSibling(from, base int, label byte) { 293 c := &cd.nInfos[from].child 294 for *c != label { 295 c = &cd.nInfos[base^int(*c)].sibling 296 } 297 *c = cd.nInfos[base^int(*c)].sibling 298 } 299 300 // Loop through the siblings to see which one reached the end first, which means 301 // it is the one with smaller in children size, and we should try ti relocate the smaller one. 302 // check whether to replace branching w/ the newly added node 303 func (cd *Cedar) consult(baseN, baseP int, cN, cP byte) bool { 304 cN = cd.nInfos[baseN^int(cN)].sibling 305 cP = cd.nInfos[baseP^int(cP)].sibling 306 307 for cN != 0 && cP != 0 { 308 cN = cd.nInfos[baseN^int(cN)].sibling 309 cP = cd.nInfos[baseP^int(cP)].sibling 310 } 311 312 return cP != 0 313 } 314 315 // Collect the list of the children, and push the label as well if it is not terminal node. 316 // enumerate (equal to or more than one) child nodes 317 func (cd *Cedar) setChild(base int, c, label byte, flag bool) []byte { 318 child := make([]byte, 0, 257) 319 // 0: terminal 320 if c == 0 { 321 child = append(child, c) 322 c = cd.nInfos[base^int(c)].sibling 323 } 324 325 if cd.ordered { 326 for c != 0 && c <= label { 327 child = append(child, c) 328 c = cd.nInfos[base^int(c)].sibling 329 } 330 } 331 332 if flag { 333 child = append(child, label) 334 } 335 336 for c != 0 { 337 child = append(child, c) 338 c = cd.nInfos[base^int(c)].sibling 339 } 340 341 return child 342 } 343 344 // For the case where only one free slot is needed 345 func (cd *Cedar) findPlace() int { 346 if cd.blocksHeadClosed != 0 { 347 return cd.blocks[cd.blocksHeadClosed].eHead 348 } 349 350 if cd.blocksHeadOpen != 0 { 351 return cd.blocks[cd.blocksHeadOpen].eHead 352 } 353 354 // the block is not enough, resize it and allocate it. 355 return cd.addBlock() << 8 356 } 357 358 // For the case where multiple free slots are needed. 359 func (cd *Cedar) findPlaces(child []byte) int { 360 idx := cd.blocksHeadOpen 361 // still have available 'Open' blocks. 362 if idx != 0 { 363 e := cd.listIdx(idx, child) 364 if e > 0 { 365 return e 366 } 367 } 368 369 return cd.addBlock() << 8 370 } 371 372 func (cd *Cedar) listIdx(idx int, child []byte) int { 373 n := len(child) 374 bo := cd.blocks[cd.blocksHeadOpen].prev 375 376 // only proceed if the free slots are more than the number of children. Also, we 377 // save the minimal number of attempts to fail in the `reject`, it only worths to 378 // try out this block if the number of children is less than that number. 379 for { 380 b := &cd.blocks[idx] 381 if b.num >= n && n < b.reject { 382 e := cd.listEHead(b, child) 383 if e > 0 { 384 return e 385 } 386 } 387 388 // we broke out of the loop, that means we failed. We save the information in 389 // `reject` for future pruning. 390 b.reject = n 391 if b.reject < cd.reject[b.num] { 392 // put this stats into the global array of information as well. 393 cd.reject[b.num] = b.reject 394 } 395 396 idxN := b.next 397 b.trial++ 398 // move this block to the 'Closed' block list since it has reached the max_trial 399 if b.trial == cd.maxTrial { 400 cd.transferBlock(idx, &cd.blocksHeadOpen, &cd.blocksHeadClosed) 401 } 402 403 // we have finsihed one round of this cyclic doubly-linked-list. 404 if idx == bo { 405 break 406 } 407 // going to the next in this linked list group 408 idx = idxN 409 } 410 411 return 0 412 } 413 414 func (cd *Cedar) listEHead(b *Block, child []byte) int { 415 for e := b.eHead; ; { 416 base := e ^ int(child[0]) 417 // iterate through the children to see if they are available: (check < 0) 418 for i := 0; cd.array[base^int(child[i])].check < 0; i++ { 419 if i == len(child)-1 { 420 // we have found the available block. 421 b.eHead = e 422 return e 423 } 424 } 425 426 // save the next free block's information in `check` 427 e = -cd.array[e].check 428 if e == b.eHead { 429 break 430 } 431 } 432 433 return 0 434 } 435 436 // resolve the conflict by moving one of the the nodes to a free block. 437 // resolve conflict on base_n ^ label_n = base_p ^ label_p 438 func (cd *Cedar) resolve(fromN, baseN int, labelN byte) int { 439 toPn := baseN ^ int(labelN) 440 441 // the `base` and `from` for the conflicting one. 442 fromP := cd.array[toPn].check 443 baseP := cd.array[fromP].base(cd.Reduced) 444 445 // whether to replace siblings of newly added 446 flag := cd.consult( 447 baseN, baseP, 448 cd.nInfos[fromN].child, 449 cd.nInfos[fromP].child, 450 ) 451 452 // collect the list of children for the block that we are going to relocate. 453 var children []byte 454 if flag { 455 children = cd.setChild(baseN, cd.nInfos[fromN].child, labelN, true) 456 } else { 457 children = cd.setChild(baseP, cd.nInfos[fromP].child, 255, false) 458 } 459 460 // decide which algorithm to allocate free block depending on the number of children 461 // we have. 462 base := 0 463 if len(children) == 1 { 464 base = cd.findPlace() 465 } else { 466 base = cd.findPlaces(children) 467 } 468 base ^= int(children[0]) 469 470 var from, nbase int 471 if flag { 472 from = fromN 473 nbase = baseN 474 } else { 475 from = fromP 476 nbase = baseP 477 } 478 479 if flag && children[0] == labelN { 480 cd.nInfos[from].child = labelN 481 } 482 483 // #[cfg(feature != "reduced-trie")] 484 if !cd.Reduced { 485 cd.array[from].baseV = base 486 } else { 487 cd.array[from].baseV = -base - 1 488 } 489 base, labelN, toPn = cd.listN(base, from, nbase, fromN, toPn, 490 labelN, children, flag) 491 492 // return the position that is free now. 493 if flag { 494 return base ^ int(labelN) 495 } 496 497 return toPn 498 } 499 500 func (cd *Cedar) listN(base, from, nbase, fromN, toPn int, 501 labelN byte, children []byte, flag bool) (int, byte, int) { 502 // the actual work for relocating the chilren 503 for i := 0; i < len(children); i++ { 504 to := cd.popENode(base, from, children[i]) 505 newTo := nbase ^ int(children[i]) 506 507 if i == len(children)-1 { 508 cd.nInfos[to].sibling = 0 509 } else { 510 cd.nInfos[to].sibling = children[i+1] 511 } 512 513 // new node has no children 514 if flag && newTo == toPn { 515 continue 516 } 517 518 arr := &cd.array[to] 519 arrs := &cd.array[newTo] 520 arr.baseV = arrs.baseV 521 522 condition := false 523 if !cd.Reduced { 524 condition = arr.baseV > 0 && children[i] != 0 525 } else { 526 condition = arr.baseV < 0 && children[i] != 0 527 } 528 529 if condition { 530 // this node has children, fix their check 531 c := cd.nInfos[newTo].child 532 cd.nInfos[to].child = c 533 cd.array[arr.base(cd.Reduced)^int(c)].check = to 534 535 c = cd.nInfos[arr.base(cd.Reduced)^int(c)].sibling 536 for c != 0 { 537 cd.array[arr.base(cd.Reduced)^int(c)].check = to 538 c = cd.nInfos[arr.base(cd.Reduced)^int(c)].sibling 539 } 540 } 541 542 // the parent node is moved 543 if !flag && newTo == fromN { 544 fromN = to 545 } 546 547 if flag || newTo != toPn { 548 cd.pushENode(newTo) 549 continue 550 } 551 552 // clean up the space that was moved away from. 553 cd.pushSibling(fromN, toPn^int(labelN), labelN, true) 554 cd.nInfos[newTo].child = 0 555 556 if !cd.Reduced { 557 if labelN != 0 { 558 arrs.baseV = -1 559 } else { 560 arrs.baseV = 0 561 } 562 } else { 563 arrs.baseV = ValLimit 564 } 565 arrs.check = fromN 566 567 } 568 569 return base, labelN, toPn 570 } 571 572 // pop a block at idx from the linked-list of type `from`, specially handled if it is the last 573 // one in the linked-list. 574 func (cd *Cedar) popBlock(idx int, from *int, last bool) { 575 if last { 576 *from = 0 577 return 578 } 579 580 b := &cd.blocks[idx] 581 cd.blocks[b.prev].next = b.next 582 cd.blocks[b.next].prev = b.prev 583 if idx == *from { 584 *from = b.next 585 } 586 } 587 588 // return the block at idx to the linked-list of `to`, specially handled 589 // if the linked-list is empty 590 func (cd *Cedar) pushBlock(idx int, to *int, empty bool) { 591 b := &cd.blocks[idx] 592 if empty { 593 *to, b.prev, b.next = idx, idx, idx 594 return 595 } 596 597 tailTo := &cd.blocks[*to].prev 598 b.prev = *tailTo 599 b.next = *to 600 *to, *tailTo, cd.blocks[*tailTo].next = idx, idx, idx 601 } 602 603 // Reallocate more spaces so that we have more free blocks. 604 func (cd *Cedar) addBlock() int { 605 if cd.size == cd.capacity { 606 if cd.capacity*int(unsafe.Sizeof(Node{})) > maxMemStep { 607 cd.capacity += maxMemStep / int(unsafe.Sizeof(Node{})) 608 } else { 609 cd.capacity += cd.capacity 610 } 611 if cd.useMMap { 612 cd.mmap.AddBlock(cd, cd.capacity) 613 } else { 614 array := cd.array 615 cd.array = make([]Node, cd.capacity) 616 copy(cd.array, array) 617 618 nInfos := cd.nInfos 619 cd.nInfos = make([]NInfo, cd.capacity) 620 copy(cd.nInfos, nInfos) 621 622 blocks := cd.blocks 623 cd.blocks = make([]Block, cd.capacity>>8) 624 copy(cd.blocks, blocks) 625 } 626 627 } 628 629 cd.blocks[cd.size>>8].init() 630 cd.blocks[cd.size>>8].eHead = cd.size 631 632 // make it a doubley linked list 633 cd.array[cd.size] = Node{baseV: -(cd.size + 255), check: -(cd.size + 1)} 634 for i := cd.size + 1; i < cd.size+255; i++ { 635 cd.array[i] = Node{baseV: -(i - 1), check: -(i + 1)} 636 } 637 cd.array[cd.size+255] = Node{baseV: -(cd.size + 254), check: -cd.size} 638 639 // append to block Open 640 cd.pushBlock(cd.size>>8, &cd.blocksHeadOpen, cd.blocksHeadOpen == 0) 641 cd.size += 256 642 return cd.size>>8 - 1 643 } 644 645 // transfer the block at idx from the linked-list of `from` to the linked-list of `to`, 646 // specially handle the case where the destination linked-list is empty. 647 func (cd *Cedar) transferBlock(idx int, from, to *int) { 648 b := cd.blocks[idx] 649 cd.popBlock(idx, from, idx == b.next) // b.next it's the last one if the next points to itself 650 cd.pushBlock(idx, to, *to == 0 && b.num != 0) 651 }