github.com/lxt1045/json@v0.0.0-20231013032136-54d6b1d6e525/tire_tree.go (about)

     1  // MIT License
     2  //
     3  // Copyright (c) 2021 Xiantu Li
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in all
    13  // copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    21  // SOFTWARE.
    22  
    23  package json
    24  
    25  import (
    26  	"fmt"
    27  	"unsafe"
    28  )
    29  
    30  type tireTreeNode struct {
    31  	next int16 // 下一个状态
    32  	idx  int16 // 只有 '"' 才是结束标志,才有 idx
    33  	skip int16 // 有相同前缀的
    34  }
    35  type tireTree struct {
    36  	tree [][128]tireTreeNode // 状态
    37  	// ptree *[1 << 20]tireTreeNode
    38  	tags []*TagInfo
    39  }
    40  
    41  func initTireTreeNode(tree *[128]tireTreeNode) {
    42  	for i := range tree {
    43  		tree[i].idx = -1
    44  		tree[i].next = -1
    45  	}
    46  }
    47  func NewTireTree(tags []*TagInfo) (root *tireTree, err error) {
    48  	// for i, tag := range tags {
    49  	// 	tags[i].TagName = tag.TagName + `"`
    50  	// }
    51  	root = &tireTree{
    52  		tree: make([][128]tireTreeNode, 1, 4),
    53  		tags: tags,
    54  	}
    55  	initTireTreeNode(&root.tree[0])
    56  
    57  out:
    58  	for idx, tag := range tags {
    59  		key := tag.TagName + `"`
    60  		status := &root.tree[0]
    61  		for iKey, c := range []byte(key) {
    62  			k := c % 128
    63  			n := &status[k]
    64  
    65  			// 没有被占领或是叶子结点
    66  			if n.next < 0 {
    67  				// 没有被占领
    68  				if n.idx < 0 {
    69  					//占领此叶节点
    70  					n.idx = int16(idx)
    71  					continue out
    72  				}
    73  				// 叶子节点
    74  				old := root.tags[n.idx].TagName + `"`
    75  				if old == key {
    76  					err = fmt.Errorf("duplicate key: %s", key)
    77  					return
    78  				}
    79  
    80  				// 已经是 old 的终点 '"' 了。
    81  				if len(old) == iKey+1 {
    82  					err = fmt.Errorf("error key: %s", key)
    83  					return
    84  				}
    85  				// 修改老的 status
    86  				nOld := *n
    87  				n.idx = -1
    88  				n.next = int16(len(root.tree))
    89  
    90  				// 给旧的 node 添加状态
    91  				root.tree = append(root.tree, [128]tireTreeNode{})
    92  				status = &root.tree[len(root.tree)-1] // 创建新的状态
    93  				initTireTreeNode(status)
    94  
    95  				kOld := old[iKey+1] % 128
    96  				status[kOld] = nOld
    97  
    98  				// kNew := key[iKey+1] % 128
    99  				// if kNew != kOld {
   100  				// 	//占领此叶节点
   101  				// 	status[k].idx = int16(idx + 1)
   102  				// 	continue out
   103  				// }
   104  
   105  				// key 的 next 在 for 的下一轮再处理!
   106  				continue
   107  			}
   108  			status = &root.tree[n.next]
   109  		}
   110  	}
   111  	// for i := range tags {
   112  	// 	tags[i].TagName = tags[i].TagName[:len(tags[i].TagName)-1]
   113  	// }
   114  
   115  	if cap(root.tree) > len(root.tree) {
   116  		// tree := make([][128]tireTreeNode, 0, len(root.tree))
   117  		// tree = append(tree, root.tree...)
   118  		// root.tree = tree
   119  		root.tree = root.tree[:len(root.tree):len(root.tree)]
   120  	}
   121  
   122  	// root.ptree = (*[1 << 20]tireTreeNode)(unsafe.Pointer(&root.tree[0]))
   123  
   124  	// 处理共同前缀的情形
   125  	for renew := true; renew; {
   126  		renew = root.skipTree()
   127  	}
   128  	// root.skipTree()
   129  
   130  	// 处理:合并同类树,避免树太高
   131  	for renew := true; renew; {
   132  		renew = root.zipTree()
   133  	}
   134  
   135  	return
   136  }
   137  
   138  func (root *tireTree) skipTree() (rennew bool) {
   139  	for current := range root.tree {
   140  		if current == len(root.tree)-1 {
   141  			break
   142  		}
   143  		// 当前处理的状态行
   144  		currentStatus := &root.tree[current]
   145  		for idx, node := range currentStatus {
   146  			nextsDeleted := node.next // 即将删除的节点
   147  			if nextsDeleted < 0 {
   148  				continue
   149  			}
   150  			nextStatus := &root.tree[nextsDeleted] // 即将删除的 status 行
   151  			count, nextNext := 0, int16(0)
   152  			for j := range nextStatus {
   153  				if nextStatus[j].next >= 0 || nextStatus[j].idx >= 0 {
   154  					count++
   155  					nextNext = nextStatus[j].next
   156  				}
   157  			}
   158  			if count > 1 {
   159  				continue
   160  			}
   161  			if count == 0 {
   162  				panic("never happen")
   163  			}
   164  			currentStatus[idx].skip++
   165  			currentStatus[idx].next = int16(nextNext)
   166  
   167  			root.tree = append(root.tree[:nextsDeleted], root.tree[nextsDeleted+1:]...)
   168  
   169  			// 开始修正复制后的状态
   170  			for i := range root.tree {
   171  				for j := range root.tree[i] {
   172  					if root.tree[i][j].next > nextsDeleted {
   173  						root.tree[i][j].next--
   174  					}
   175  				}
   176  			}
   177  
   178  			// 还需再次探测是否可以继续压缩
   179  			rennew = true
   180  			return
   181  		}
   182  
   183  	}
   184  	return
   185  }
   186  
   187  func (root *tireTree) zipTree() (rennew bool) {
   188  	for current := range root.tree {
   189  		if current == len(root.tree)-1 {
   190  			break
   191  		}
   192  		// 当前处理的状态行
   193  		currentStatus := &root.tree[current]
   194  
   195  		// 用于记录当前状态行已存在的状态
   196  		m := make(map[uint8]struct{})
   197  		for j := range currentStatus {
   198  			if currentStatus[j].next >= 0 || currentStatus[j].idx >= 0 {
   199  				m[uint8(j)] = struct{}{}
   200  			}
   201  		}
   202  
   203  		// TODO: 实际上如果和 current 冲突,还可以放其他节点去,一样可以压缩;不过都只能本 status 一起放
   204  		for _, node := range currentStatus {
   205  			nextsDeleted := node.next // 即将删除的节点
   206  			if nextsDeleted < 0 || node.skip > 0 {
   207  				continue
   208  			}
   209  			nextStatus := &root.tree[nextsDeleted] // 即将删除的 status 行
   210  			canZip := true
   211  			for j := range nextStatus {
   212  				if nextStatus[j].skip > 0 {
   213  					// 和父节点有冲突,不适合压缩节点;
   214  					canZip = false
   215  					break
   216  				}
   217  				if nextStatus[j].next >= 0 || nextStatus[j].idx >= 0 {
   218  					if _, ok := m[uint8(j)]; ok {
   219  						// 和父节点有冲突,不适合压缩节点;
   220  						canZip = false
   221  						break
   222  					}
   223  				}
   224  			}
   225  			if !canZip {
   226  				continue
   227  			}
   228  
   229  			// 开始处理压缩逻辑
   230  			for j := range nextStatus {
   231  				if nextStatus[j].next >= 0 || nextStatus[j].idx >= 0 {
   232  					currentStatus[j] = nextStatus[j]
   233  				}
   234  			}
   235  
   236  			root.tree = append(root.tree[:nextsDeleted], root.tree[nextsDeleted+1:]...)
   237  
   238  			// 开始修正复制后的状态
   239  			for i := range root.tree {
   240  				for j := range root.tree[i] {
   241  					if root.tree[i][j].next == nextsDeleted {
   242  						// root.tree[i][j].next = int16(i)
   243  						root.tree[i][j].next = int16(current)
   244  						continue
   245  					}
   246  					if root.tree[i][j].next > nextsDeleted {
   247  						root.tree[i][j].next--
   248  					}
   249  				}
   250  			}
   251  
   252  			// 还需再次探测是否可以继续压缩
   253  			rennew = true
   254  			return
   255  		}
   256  
   257  	}
   258  	return
   259  }
   260  
   261  func (root *tireTree) Get2(key string) *TagInfo {
   262  	p := (*[1 << 20]tireTreeNode)(unsafe.Pointer(&root.tree[0]))
   263  	// p := b.ptree
   264  	idx := int16(0)
   265  	// for _, c := range []byte(key) {
   266  	for i := 0; i < len(key); i++ {
   267  		c := key[i]
   268  		k := c & 0x7f
   269  		next := p[idx+int16(k)]
   270  		idx = int16(next.next) * 128
   271  		if next.next >= 0 {
   272  			i += int(next.skip)
   273  			continue
   274  		}
   275  
   276  		if next.idx >= 0 {
   277  			tag := root.tags[next.idx]
   278  			if len(key) > len(tag.TagName) && key[len(tag.TagName)] == '"' && tag.TagName == key[:len(tag.TagName)] {
   279  				return tag
   280  			}
   281  		}
   282  		return nil
   283  	}
   284  
   285  	return nil
   286  }
   287  func (root *tireTree) Get(key string) *TagInfo {
   288  	status := &root.tree[0]
   289  	// for _, c := range []byte(key) {
   290  	for i := 0; i < len(key); i++ {
   291  		c := key[i]
   292  		k := c & 0x7f
   293  		next := status[k]
   294  		if next.next >= 0 {
   295  			i += int(next.skip)
   296  			status = &root.tree[next.next]
   297  			continue
   298  		}
   299  		if next.idx >= 0 {
   300  			tag := root.tags[next.idx]
   301  			if len(key) > len(tag.TagName) && key[len(tag.TagName)] == '"' && tag.TagName == key[:len(tag.TagName)] {
   302  				return tag
   303  			}
   304  		}
   305  		return nil
   306  	}
   307  
   308  	return nil
   309  }