github.com/goplus/yap@v0.8.1/tree.go (about)

     1  /*
     2   * Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package yap
    18  
    19  import (
    20  	"strings"
    21  	"unicode"
    22  	"unicode/utf8"
    23  )
    24  
    25  func min(a, b int) int {
    26  	if a <= b {
    27  		return a
    28  	}
    29  	return b
    30  }
    31  
    32  func longestCommonPrefix(a, b string) int {
    33  	i := 0
    34  	max := min(len(a), len(b))
    35  	for i < max && a[i] == b[i] {
    36  		i++
    37  	}
    38  	return i
    39  }
    40  
    41  // Search for a wildcard segment and check the name for invalid characters.
    42  // Returns -1 as index, if no wildcard was found.
    43  func findWildcard(path string) (wilcard string, i int, valid bool) {
    44  	// Find start
    45  	for start, c := range []byte(path) {
    46  		// A wildcard starts with ':' (param) or '*' (catch-all)
    47  		if c != ':' && c != '*' {
    48  			continue
    49  		}
    50  
    51  		// Find end and check for invalid characters
    52  		valid = true
    53  		for end, c := range []byte(path[start+1:]) {
    54  			switch c {
    55  			case '/':
    56  				return path[start : start+1+end], start, valid
    57  			case ':', '*':
    58  				valid = false
    59  			}
    60  		}
    61  		return path[start:], start, valid
    62  	}
    63  	return "", -1, false
    64  }
    65  
    66  type nodeType uint8
    67  
    68  const (
    69  	static nodeType = iota // default
    70  	root
    71  	param
    72  	catchAll
    73  )
    74  
    75  type node struct {
    76  	path      string
    77  	indices   string
    78  	wildChild bool
    79  	nType     nodeType
    80  	priority  uint32
    81  	children  []*node
    82  	handle    func(ctx *Context)
    83  }
    84  
    85  // Increments priority of the given child and reorders if necessary
    86  func (n *node) incrementChildPrio(pos int) int {
    87  	cs := n.children
    88  	cs[pos].priority++
    89  	prio := cs[pos].priority
    90  
    91  	// Adjust position (move to front)
    92  	newPos := pos
    93  	for ; newPos > 0 && cs[newPos-1].priority < prio; newPos-- {
    94  		// Swap node positions
    95  		cs[newPos-1], cs[newPos] = cs[newPos], cs[newPos-1]
    96  	}
    97  
    98  	// Build new index char string
    99  	if newPos != pos {
   100  		n.indices = n.indices[:newPos] + // Unchanged prefix, might be empty
   101  			n.indices[pos:pos+1] + // The index char we move
   102  			n.indices[newPos:pos] + n.indices[pos+1:] // Rest without char at 'pos'
   103  	}
   104  
   105  	return newPos
   106  }
   107  
   108  // addRoute adds a node with the given handle to the path.
   109  // Not concurrency-safe!
   110  func (n *node) addRoute(path string, handle func(ctx *Context)) {
   111  	fullPath := path
   112  	n.priority++
   113  
   114  	// Empty tree
   115  	if n.path == "" && n.indices == "" {
   116  		n.insertChild(path, fullPath, handle)
   117  		n.nType = root
   118  		return
   119  	}
   120  
   121  walk:
   122  	for {
   123  		// Find the longest common prefix.
   124  		// This also implies that the common prefix contains no ':' or '*'
   125  		// since the existing key can't contain those chars.
   126  		i := longestCommonPrefix(path, n.path)
   127  
   128  		// Split edge
   129  		if i < len(n.path) {
   130  			child := node{
   131  				path:      n.path[i:],
   132  				wildChild: n.wildChild,
   133  				nType:     static,
   134  				indices:   n.indices,
   135  				children:  n.children,
   136  				handle:    n.handle,
   137  				priority:  n.priority - 1,
   138  			}
   139  
   140  			n.children = []*node{&child}
   141  			// []byte for proper unicode char conversion, see #65
   142  			n.indices = string([]byte{n.path[i]})
   143  			n.path = path[:i]
   144  			n.handle = nil
   145  			n.wildChild = false
   146  		}
   147  
   148  		// Make new node a child of this node
   149  		if i < len(path) {
   150  			path = path[i:]
   151  
   152  			if n.wildChild {
   153  				n = n.children[0]
   154  				n.priority++
   155  
   156  				// Check if the wildcard matches
   157  				if len(path) >= len(n.path) && n.path == path[:len(n.path)] &&
   158  					// Adding a child to a catchAll is not possible
   159  					n.nType != catchAll &&
   160  					// Check for longer wildcard, e.g. :name and :names
   161  					(len(n.path) >= len(path) || path[len(n.path)] == '/') {
   162  					continue walk
   163  				} else {
   164  					// Wildcard conflict
   165  					pathSeg := path
   166  					if n.nType != catchAll {
   167  						pathSeg = strings.SplitN(pathSeg, "/", 2)[0]
   168  					}
   169  					prefix := fullPath[:strings.Index(fullPath, pathSeg)] + n.path
   170  					panic("'" + pathSeg +
   171  						"' in new path '" + fullPath +
   172  						"' conflicts with existing wildcard '" + n.path +
   173  						"' in existing prefix '" + prefix +
   174  						"'")
   175  				}
   176  			}
   177  
   178  			idxc := path[0]
   179  
   180  			// '/' after param
   181  			if n.nType == param && idxc == '/' && len(n.children) == 1 {
   182  				n = n.children[0]
   183  				n.priority++
   184  				continue walk
   185  			}
   186  
   187  			// Check if a child with the next path byte exists
   188  			for i, c := range []byte(n.indices) {
   189  				if c == idxc {
   190  					i = n.incrementChildPrio(i)
   191  					n = n.children[i]
   192  					continue walk
   193  				}
   194  			}
   195  
   196  			// Otherwise insert it
   197  			if idxc != ':' && idxc != '*' {
   198  				// []byte for proper unicode char conversion, see #65
   199  				n.indices += string([]byte{idxc})
   200  				child := &node{}
   201  				n.children = append(n.children, child)
   202  				n.incrementChildPrio(len(n.indices) - 1)
   203  				n = child
   204  			}
   205  			n.insertChild(path, fullPath, handle)
   206  			return
   207  		}
   208  
   209  		// Otherwise add handle to current node
   210  		if n.handle != nil {
   211  			panic("a handle is already registered for path '" + fullPath + "'")
   212  		}
   213  		n.handle = handle
   214  		return
   215  	}
   216  }
   217  
   218  func (n *node) insertChild(path, fullPath string, handle func(ctx *Context)) {
   219  	for {
   220  		// Find prefix until first wildcard
   221  		wildcard, i, valid := findWildcard(path)
   222  		if i < 0 { // No wilcard found
   223  			break
   224  		}
   225  
   226  		// The wildcard name must not contain ':' and '*'
   227  		if !valid {
   228  			panic("only one wildcard per path segment is allowed, has: '" +
   229  				wildcard + "' in path '" + fullPath + "'")
   230  		}
   231  
   232  		// Check if the wildcard has a name
   233  		if len(wildcard) < 2 {
   234  			panic("wildcards must be named with a non-empty name in path '" + fullPath + "'")
   235  		}
   236  
   237  		// Check if this node has existing children which would be
   238  		// unreachable if we insert the wildcard here
   239  		if len(n.children) > 0 {
   240  			panic("wildcard segment '" + wildcard +
   241  				"' conflicts with existing children in path '" + fullPath + "'")
   242  		}
   243  
   244  		// param
   245  		if wildcard[0] == ':' {
   246  			if i > 0 {
   247  				// Insert prefix before the current wildcard
   248  				n.path = path[:i]
   249  				path = path[i:]
   250  			}
   251  
   252  			n.wildChild = true
   253  			child := &node{
   254  				nType: param,
   255  				path:  wildcard,
   256  			}
   257  			n.children = []*node{child}
   258  			n = child
   259  			n.priority++
   260  
   261  			// If the path doesn't end with the wildcard, then there
   262  			// will be another non-wildcard subpath starting with '/'
   263  			if len(wildcard) < len(path) {
   264  				path = path[len(wildcard):]
   265  				child := &node{
   266  					priority: 1,
   267  				}
   268  				n.children = []*node{child}
   269  				n = child
   270  				continue
   271  			}
   272  
   273  			// Otherwise we're done. Insert the handle in the new leaf
   274  			n.handle = handle
   275  			return
   276  		}
   277  
   278  		// catchAll
   279  		if i+len(wildcard) != len(path) {
   280  			panic("catch-all routes are only allowed at the end of the path in path '" + fullPath + "'")
   281  		}
   282  
   283  		if len(n.path) > 0 && n.path[len(n.path)-1] == '/' {
   284  			panic("catch-all conflicts with existing handle for the path segment root in path '" + fullPath + "'")
   285  		}
   286  
   287  		// Currently fixed width 1 for '/'
   288  		i--
   289  		if path[i] != '/' {
   290  			panic("no / before catch-all in path '" + fullPath + "'")
   291  		}
   292  
   293  		n.path = path[:i]
   294  
   295  		// First node: catchAll node with empty path
   296  		child := &node{
   297  			wildChild: true,
   298  			nType:     catchAll,
   299  		}
   300  		n.children = []*node{child}
   301  		n.indices = string('/')
   302  		n = child
   303  		n.priority++
   304  
   305  		// Second node: node holding the variable
   306  		child = &node{
   307  			path:     path[i:],
   308  			nType:    catchAll,
   309  			handle:   handle,
   310  			priority: 1,
   311  		}
   312  		n.children = []*node{child}
   313  
   314  		return
   315  	}
   316  
   317  	// If no wildcard was found, simply insert the path and handle
   318  	n.path = path
   319  	n.handle = handle
   320  }
   321  
   322  // Returns the handle registered with the given path (key). The values of
   323  // wildcards are saved to a map.
   324  // If no handle can be found, a TSR (trailing slash redirect) recommendation is
   325  // made if a handle exists with an extra (without the) trailing slash for the
   326  // given path.
   327  func (n *node) getValue(path string, ctx *Context) (handle func(ctx *Context), tsr bool) {
   328  walk: // Outer loop for walking the tree
   329  	for {
   330  		prefix := n.path
   331  		if len(path) > len(prefix) {
   332  			if path[:len(prefix)] == prefix {
   333  				path = path[len(prefix):]
   334  
   335  				// If this node does not have a wildcard (param or catchAll)
   336  				// child, we can just look up the next child node and continue
   337  				// to walk down the tree
   338  				if !n.wildChild {
   339  					idxc := path[0]
   340  					for i, c := range []byte(n.indices) {
   341  						if c == idxc {
   342  							n = n.children[i]
   343  							continue walk
   344  						}
   345  					}
   346  
   347  					// Nothing found.
   348  					// We can recommend to redirect to the same URL without a
   349  					// trailing slash if a leaf exists for that path.
   350  					tsr = (path == "/" && n.handle != nil)
   351  					return
   352  				}
   353  
   354  				// Handle wildcard child
   355  				n = n.children[0]
   356  				switch n.nType {
   357  				case param:
   358  					// Find param end (either '/' or path end)
   359  					end := 0
   360  					for end < len(path) && path[end] != '/' {
   361  						end++
   362  					}
   363  
   364  					// Save param value
   365  					if ctx != nil {
   366  						ctx.setParam(n.path[1:], path[:end])
   367  					}
   368  
   369  					// We need to go deeper!
   370  					if end < len(path) {
   371  						if len(n.children) > 0 {
   372  							path = path[end:]
   373  							n = n.children[0]
   374  							continue walk
   375  						}
   376  
   377  						// ... but we can't
   378  						tsr = (len(path) == end+1)
   379  						return
   380  					}
   381  
   382  					if handle = n.handle; handle != nil {
   383  						return
   384  					} else if len(n.children) == 1 {
   385  						// No handle found. Check if a handle for this path + a
   386  						// trailing slash exists for TSR recommendation
   387  						n = n.children[0]
   388  						tsr = (n.path == "/" && n.handle != nil) || (n.path == "" && n.indices == "/")
   389  					}
   390  					return
   391  
   392  				case catchAll:
   393  					// Save param value
   394  					if ctx != nil {
   395  						ctx.setParam(n.path[2:], path)
   396  					}
   397  
   398  					handle = n.handle
   399  					return
   400  
   401  				default:
   402  					panic("invalid node type")
   403  				}
   404  			}
   405  		} else if path == prefix {
   406  			// We should have reached the node containing the handle.
   407  			// Check if this node has a handle registered.
   408  			if handle = n.handle; handle != nil {
   409  				return
   410  			}
   411  
   412  			// If there is no handle for this route, but this route has a
   413  			// wildcard child, there must be a handle for this path with an
   414  			// additional trailing slash
   415  			if path == "/" && n.wildChild && n.nType != root {
   416  				tsr = true
   417  				return
   418  			}
   419  
   420  			if path == "/" && n.nType == static {
   421  				tsr = true
   422  				return
   423  			}
   424  
   425  			// No handle found. Check if a handle for this path + a
   426  			// trailing slash exists for trailing slash recommendation
   427  			for i, c := range []byte(n.indices) {
   428  				if c == '/' {
   429  					n = n.children[i]
   430  					tsr = (len(n.path) == 1 && n.handle != nil) ||
   431  						(n.nType == catchAll && n.children[0].handle != nil)
   432  					return
   433  				}
   434  			}
   435  			return
   436  		}
   437  
   438  		// Nothing found. We can recommend to redirect to the same URL with an
   439  		// extra trailing slash if a leaf exists for that path
   440  		tsr = (path == "/") ||
   441  			(len(prefix) == len(path)+1 && prefix[len(path)] == '/' &&
   442  				path == prefix[:len(prefix)-1] && n.handle != nil)
   443  		return
   444  	}
   445  }
   446  
   447  // Makes a case-insensitive lookup of the given path and tries to find a func(ctx *Context).
   448  // It can optionally also fix trailing slashes.
   449  // It returns the case-corrected path and a bool indicating whether the lookup
   450  // was successful.
   451  func (n *node) findCaseInsensitivePath(path string, fixTrailingSlash bool) (fixedPath string, found bool) {
   452  	const stackBufSize = 128
   453  
   454  	// Use a static sized buffer on the stack in the common case.
   455  	// If the path is too long, allocate a buffer on the heap instead.
   456  	buf := make([]byte, 0, stackBufSize)
   457  	if l := len(path) + 1; l > stackBufSize {
   458  		buf = make([]byte, 0, l)
   459  	}
   460  
   461  	ciPath := n.findCaseInsensitivePathRec(
   462  		path,
   463  		buf,       // Preallocate enough memory for new path
   464  		[4]byte{}, // Empty rune buffer
   465  		fixTrailingSlash,
   466  	)
   467  
   468  	return string(ciPath), ciPath != nil
   469  }
   470  
   471  // Shift bytes in array by n bytes left
   472  func shiftNRuneBytes(rb [4]byte, n int) [4]byte {
   473  	switch n {
   474  	case 0:
   475  		return rb
   476  	case 1:
   477  		return [4]byte{rb[1], rb[2], rb[3], 0}
   478  	case 2:
   479  		return [4]byte{rb[2], rb[3]}
   480  	case 3:
   481  		return [4]byte{rb[3]}
   482  	default:
   483  		return [4]byte{}
   484  	}
   485  }
   486  
   487  // Recursive case-insensitive lookup function used by n.findCaseInsensitivePath
   488  func (n *node) findCaseInsensitivePathRec(path string, ciPath []byte, rb [4]byte, fixTrailingSlash bool) []byte {
   489  	npLen := len(n.path)
   490  
   491  walk: // Outer loop for walking the tree
   492  	for len(path) >= npLen && (npLen == 0 || strings.EqualFold(path[1:npLen], n.path[1:])) {
   493  		// Add common prefix to result
   494  		oldPath := path
   495  		path = path[npLen:]
   496  		ciPath = append(ciPath, n.path...)
   497  
   498  		if len(path) > 0 {
   499  			// If this node does not have a wildcard (param or catchAll) child,
   500  			// we can just look up the next child node and continue to walk down
   501  			// the tree
   502  			if !n.wildChild {
   503  				// Skip rune bytes already processed
   504  				rb = shiftNRuneBytes(rb, npLen)
   505  
   506  				if rb[0] != 0 {
   507  					// Old rune not finished
   508  					idxc := rb[0]
   509  					for i, c := range []byte(n.indices) {
   510  						if c == idxc {
   511  							// continue with child node
   512  							n = n.children[i]
   513  							npLen = len(n.path)
   514  							continue walk
   515  						}
   516  					}
   517  				} else {
   518  					// Process a new rune
   519  					var rv rune
   520  
   521  					// Find rune start.
   522  					// Runes are up to 4 byte long,
   523  					// -4 would definitely be another rune.
   524  					var off int
   525  					for max := min(npLen, 3); off < max; off++ {
   526  						if i := npLen - off; utf8.RuneStart(oldPath[i]) {
   527  							// read rune from cached path
   528  							rv, _ = utf8.DecodeRuneInString(oldPath[i:])
   529  							break
   530  						}
   531  					}
   532  
   533  					// Calculate lowercase bytes of current rune
   534  					lo := unicode.ToLower(rv)
   535  					utf8.EncodeRune(rb[:], lo)
   536  
   537  					// Skip already processed bytes
   538  					rb = shiftNRuneBytes(rb, off)
   539  
   540  					idxc := rb[0]
   541  					for i, c := range []byte(n.indices) {
   542  						// Lowercase matches
   543  						if c == idxc {
   544  							// must use a recursive approach since both the
   545  							// uppercase byte and the lowercase byte might exist
   546  							// as an index
   547  							if out := n.children[i].findCaseInsensitivePathRec(
   548  								path, ciPath, rb, fixTrailingSlash,
   549  							); out != nil {
   550  								return out
   551  							}
   552  							break
   553  						}
   554  					}
   555  
   556  					// If we found no match, the same for the uppercase rune,
   557  					// if it differs
   558  					if up := unicode.ToUpper(rv); up != lo {
   559  						utf8.EncodeRune(rb[:], up)
   560  						rb = shiftNRuneBytes(rb, off)
   561  
   562  						idxc := rb[0]
   563  						for i, c := range []byte(n.indices) {
   564  							// Uppercase matches
   565  							if c == idxc {
   566  								// Continue with child node
   567  								n = n.children[i]
   568  								npLen = len(n.path)
   569  								continue walk
   570  							}
   571  						}
   572  					}
   573  				}
   574  
   575  				// Nothing found. We can recommend to redirect to the same URL
   576  				// without a trailing slash if a leaf exists for that path
   577  				if fixTrailingSlash && path == "/" && n.handle != nil {
   578  					return ciPath
   579  				}
   580  				return nil
   581  			}
   582  
   583  			n = n.children[0]
   584  			switch n.nType {
   585  			case param:
   586  				// Find param end (either '/' or path end)
   587  				end := 0
   588  				for end < len(path) && path[end] != '/' {
   589  					end++
   590  				}
   591  
   592  				// Add param value to case insensitive path
   593  				ciPath = append(ciPath, path[:end]...)
   594  
   595  				// We need to go deeper!
   596  				if end < len(path) {
   597  					if len(n.children) > 0 {
   598  						// Continue with child node
   599  						n = n.children[0]
   600  						npLen = len(n.path)
   601  						path = path[end:]
   602  						continue
   603  					}
   604  
   605  					// ... but we can't
   606  					if fixTrailingSlash && len(path) == end+1 {
   607  						return ciPath
   608  					}
   609  					return nil
   610  				}
   611  
   612  				if n.handle != nil {
   613  					return ciPath
   614  				} else if fixTrailingSlash && len(n.children) == 1 {
   615  					// No handle found. Check if a handle for this path + a
   616  					// trailing slash exists
   617  					n = n.children[0]
   618  					if n.path == "/" && n.handle != nil {
   619  						return append(ciPath, '/')
   620  					}
   621  				}
   622  				return nil
   623  
   624  			case catchAll:
   625  				return append(ciPath, path...)
   626  
   627  			default:
   628  				panic("invalid node type")
   629  			}
   630  		} else {
   631  			// We should have reached the node containing the handle.
   632  			// Check if this node has a handle registered.
   633  			if n.handle != nil {
   634  				return ciPath
   635  			}
   636  
   637  			// No handle found.
   638  			// Try to fix the path by adding a trailing slash
   639  			if fixTrailingSlash {
   640  				for i, c := range []byte(n.indices) {
   641  					if c == '/' {
   642  						n = n.children[i]
   643  						if (len(n.path) == 1 && n.handle != nil) ||
   644  							(n.nType == catchAll && n.children[0].handle != nil) {
   645  							return append(ciPath, '/')
   646  						}
   647  						return nil
   648  					}
   649  				}
   650  			}
   651  			return nil
   652  		}
   653  	}
   654  
   655  	// Nothing found.
   656  	// Try to fix the path by adding / removing a trailing slash
   657  	if fixTrailingSlash {
   658  		if path == "/" {
   659  			return ciPath
   660  		}
   661  		if len(path)+1 == npLen && n.path[len(path)] == '/' &&
   662  			strings.EqualFold(path[1:], n.path[1:len(path)]) && n.handle != nil {
   663  			return append(ciPath, n.path...)
   664  		}
   665  	}
   666  	return nil
   667  }