trpc.group/trpc-go/trpc-go@v1.0.3/internal/httprule/parse.go (about)

     1  //
     2  //
     3  // Tencent is pleased to support the open source community by making tRPC available.
     4  //
     5  // Copyright (C) 2023 THL A29 Limited, a Tencent company.
     6  // All rights reserved.
     7  //
     8  // If you have downloaded a copy of the tRPC source code from Tencent,
     9  // please note that tRPC source code is licensed under the  Apache 2.0 License,
    10  // A copy of the Apache 2.0 License is included in this file.
    11  //
    12  //
    13  
    14  package httprule
    15  
    16  import (
    17  	"bytes"
    18  	"errors"
    19  	"fmt"
    20  	"strings"
    21  )
    22  
    23  const (
    24  	invalidChar = byte(0)
    25  )
    26  
    27  var (
    28  	errParserInternal   = errors.New("parser internal error")
    29  	errEmptyLiteral     = errors.New("empty literal is not allowed")
    30  	errInitialCharAlpha = errors.New("initial char of identifier not alpha")
    31  	errEmptyIdent       = errors.New("empty identifier")
    32  	errNestedVar        = errors.New("nested variables are not allowed")
    33  	errDeepWildcard     = errors.New("deep wildcard must be the last segment")
    34  	errDupFieldPath     = errors.New("dup field path")
    35  	errLeadingSlash     = errors.New("leading slash required")
    36  )
    37  
    38  // parser is the template parser.
    39  type parser struct {
    40  	urlPath string // the complete httprule URL path.
    41  	curr    int    // current pointer position.
    42  }
    43  
    44  // Parse parses the httprule URL path into template.
    45  func Parse(urlPath string) (*PathTemplate, error) {
    46  	p := &parser{
    47  		urlPath: urlPath,
    48  	}
    49  
    50  	tpl, err := p.parse()
    51  	if err != nil {
    52  		return nil, fmt.Errorf("failed to parse url path %s to template: %w, curr: %d", urlPath, err, p.curr)
    53  	}
    54  
    55  	return tpl, nil
    56  }
    57  
    58  // parse begins parsing.
    59  func (p *parser) parse() (*PathTemplate, error) {
    60  	// should start with '/'.
    61  	if err := p.consume('/'); err != nil {
    62  		return nil, err
    63  	}
    64  
    65  	// parse segments.
    66  	segments, err := p.parseSegments()
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  	// parse verb.
    71  	var verb string
    72  	// If the last segment is of type literal, then verb has already been included.
    73  	// Find the last position of ':' in the literal.
    74  	lastSegment := segments[len(segments)-1]
    75  	if lastSegment.kind() == kindLiteral {
    76  		s := lastSegment.String()
    77  		idx := strings.LastIndex(s, ":")
    78  		if idx > 0 {
    79  			verb = s[idx+1:]
    80  			segments[len(segments)-1] = literal(s[:idx])
    81  		}
    82  	} else {
    83  		if err := p.consume(':'); err == nil {
    84  			verb, err = p.parseVerb()
    85  			if err != nil {
    86  				return nil, err
    87  			}
    88  		}
    89  	}
    90  
    91  	// check whether parsing is completed.
    92  	if !p.done() {
    93  		return nil, errParserInternal
    94  	}
    95  
    96  	// validate.
    97  	tpl := &PathTemplate{
    98  		segments: segments,
    99  		verb:     verb,
   100  	}
   101  	if err := p.validate(tpl); err != nil {
   102  		return nil, err
   103  	}
   104  
   105  	return tpl, nil
   106  }
   107  
   108  // validate validates the template:
   109  // 1. whether has nested variables
   110  // 2. whether ** is the last segment
   111  // 3. whether exists duplicate variable names
   112  func (p *parser) validate(tpl *PathTemplate) error {
   113  	m := make(map[string]bool) // save duplicate variable names
   114  
   115  	for i, segment := range tpl.segments {
   116  		// If it is of type variable, first check whether it is duplicated,
   117  		// then check its nested segments:
   118  		// 1. whether has nested variables
   119  		// 2. if i != len(tpl.segments) - 1, then nested variables should not have **
   120  		// 3. if i == len(tpl.segments) - 1, then ** has to be the last nested variable
   121  		if segment.kind() == kindVariable {
   122  			// check duplication
   123  			s := strings.Join(segment.fieldPath(), ".")
   124  			if m[s] {
   125  				return errDupFieldPath
   126  			}
   127  			m[s] = true
   128  
   129  			// check nested segments.
   130  			nestedSegments := segment.nestedSegments()
   131  			for j, nestedSegment := range nestedSegments {
   132  				// nested segment is of kind variable.
   133  				if nestedSegment.kind() == kindVariable {
   134  					return errNestedVar
   135  				}
   136  
   137  				// If i != len(tpl.segments) - 1, then nested variables should not have **.
   138  				if i != len(tpl.segments)-1 && nestedSegment.kind() == kindDeepWildcard {
   139  					return errDeepWildcard
   140  				}
   141  
   142  				// If i == len(tpl.segments) - 1, then ** has to be the last nested variable.
   143  				if i == len(tpl.segments)-1 && j != len(nestedSegments)-1 &&
   144  					nestedSegment.kind() == kindDeepWildcard {
   145  					return errDeepWildcard
   146  				}
   147  			}
   148  		}
   149  
   150  		// It is illegal if ** does not appear as the last segment.
   151  		if i != len(tpl.segments)-1 && segment.kind() == kindDeepWildcard {
   152  			return errDeepWildcard
   153  		}
   154  	}
   155  
   156  	return nil
   157  }
   158  
   159  // parseSegments parses segments.
   160  func (p *parser) parseSegments() ([]segment, error) {
   161  	// at lease has one segment.
   162  	seg, err := p.parseSegment()
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	result := []segment{seg}
   168  
   169  	if err := p.consume('/'); err == nil {
   170  		// parse segments recursively.
   171  		segs, err := p.parseSegments()
   172  		if err != nil {
   173  			return nil, err
   174  		}
   175  		result = append(result, segs...)
   176  	}
   177  
   178  	return result, nil
   179  }
   180  
   181  // parseVerb parses verb.
   182  func (p *parser) parseVerb() (string, error) {
   183  	return p.parseLiteral()
   184  }
   185  
   186  // parseSegment parses a single segment.
   187  func (p *parser) parseSegment() (segment, error) {
   188  	switch p.currentChar() {
   189  	case invalidChar:
   190  		return nil, errParserInternal
   191  	case '*':
   192  		if p.peekN(1) == '*' {
   193  			p.curr++
   194  			p.curr++
   195  			return deepWildcard{}, nil
   196  		}
   197  		p.curr++
   198  		return wildcard{}, nil
   199  	case '{':
   200  		return p.parseVariableSegment()
   201  	default:
   202  		return p.parseLiteralSegment()
   203  	}
   204  }
   205  
   206  // parseLiteral parses literal type.
   207  // https://www.ietf.org/rfc/rfc3986.txt, P.49
   208  //
   209  //	pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
   210  //	unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
   211  //	sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
   212  //	              / "*" / "+" / "," / ";" / "="
   213  //	pct-encoded   = "%" HEXDIG HEXDIG
   214  func (p *parser) parseLiteral() (string, error) {
   215  	lit := bytes.Buffer{}
   216  
   217  	for {
   218  		// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
   219  		if isUnreserved(rune(p.currentChar())) || isSubDelims(rune(p.currentChar())) ||
   220  			p.currentChar() == '@' || p.currentChar() == ':' {
   221  			lit.WriteByte(p.currentChar())
   222  			p.curr++
   223  			continue
   224  		} else if isPCTEncoded(rune(p.currentChar()), rune(p.peekN(1)), rune(p.peekN(2))) {
   225  			lit.WriteByte(p.currentChar())
   226  			p.curr++
   227  			lit.WriteByte(p.currentChar())
   228  			p.curr++
   229  			lit.WriteByte(p.currentChar())
   230  			p.curr++
   231  			continue
   232  		} else {
   233  			break
   234  		}
   235  	}
   236  
   237  	// empty literal.
   238  	if lit.Len() == 0 {
   239  		return "", errEmptyLiteral
   240  	}
   241  
   242  	return lit.String(), nil
   243  }
   244  
   245  // parseLiteralSegment parses literal segment.
   246  func (p *parser) parseLiteralSegment() (segment, error) {
   247  	lit, err := p.parseLiteral()
   248  	if err != nil {
   249  		return nil, err
   250  	}
   251  	return literal(lit), nil
   252  }
   253  
   254  // parseVariableSegment parses variable segment.
   255  func (p *parser) parseVariableSegment() (segment, error) {
   256  	var v variable
   257  
   258  	// variable must start with '{'.
   259  	if err := p.consume('{'); err != nil {
   260  		return nil, err
   261  	}
   262  
   263  	// parse fieldPath.
   264  	fieldPath, err := p.parseFieldPath()
   265  	if err != nil {
   266  		return nil, err
   267  	}
   268  	v.fp = fieldPath
   269  
   270  	// check whether has segments.
   271  	if err := p.consume('='); err == nil {
   272  		segments, err := p.parseSegments()
   273  		if err != nil {
   274  			return nil, err
   275  		}
   276  		v.segments = segments
   277  	} else { // no segments, defaults to *.
   278  		v.segments = []segment{wildcard{}}
   279  	}
   280  
   281  	// variable must end with '}'.
   282  	if err := p.consume('}'); err != nil {
   283  		return nil, err
   284  	}
   285  
   286  	return v, nil
   287  }
   288  
   289  // parseFieldPath parses field path.
   290  func (p *parser) parseFieldPath() ([]string, error) {
   291  	// at least has one ident.
   292  	ident, err := p.parseIdent()
   293  	if err != nil {
   294  		return nil, err
   295  	}
   296  
   297  	result := []string{ident}
   298  
   299  	if err := p.consume('.'); err == nil {
   300  		// parse fieldPath recursively.
   301  		fp, err := p.parseFieldPath()
   302  		if err != nil {
   303  			return nil, err
   304  		}
   305  		result = append(result, fp...)
   306  	}
   307  	return result, nil
   308  }
   309  
   310  // parseIdent parses ident, the valid format of ident is ([[:alpha:]_][[:alphanum:]_]*).
   311  func (p *parser) parseIdent() (string, error) {
   312  	ident := bytes.Buffer{}
   313  
   314  	for {
   315  		if ident.Len() == 0 && !isAlpha(rune(p.currentChar())) {
   316  			return "", errInitialCharAlpha
   317  		}
   318  		if isAlpha(rune(p.currentChar())) || isDigit(rune(p.currentChar())) || p.currentChar() == '_' {
   319  			ident.WriteByte(p.currentChar())
   320  			p.curr++
   321  			continue
   322  		}
   323  		break
   324  	}
   325  
   326  	// empty ident.
   327  	if ident.Len() == 0 {
   328  		return "", errEmptyIdent
   329  	}
   330  	return ident.String(), nil
   331  }
   332  
   333  func (p *parser) done() bool {
   334  	return p.curr >= len(p.urlPath)
   335  }
   336  
   337  func (p *parser) currentChar() byte {
   338  	if p.done() {
   339  		return invalidChar
   340  	}
   341  	return p.urlPath[p.curr]
   342  }
   343  
   344  // consume consumes the given character.
   345  func (p *parser) consume(c byte) error {
   346  	if p.currentChar() == c {
   347  		p.curr++
   348  		return nil
   349  	}
   350  	return fmt.Errorf("failed to consume `%c`", c)
   351  }
   352  
   353  // peekN gets the character at position p.curr+n.
   354  func (p *parser) peekN(n int) byte {
   355  	peekIdx := p.curr + n
   356  	if peekIdx < len(p.urlPath) {
   357  		return p.urlPath[peekIdx]
   358  	}
   359  	return invalidChar
   360  }
   361  
   362  // isUnreserved checks whether the given rune is of type unreserved.
   363  func isUnreserved(r rune) bool {
   364  	if isAlpha(r) || isDigit(r) {
   365  		return true
   366  	}
   367  	switch r {
   368  	case '-', '.', '_', '~':
   369  		return true
   370  	default:
   371  		return false
   372  	}
   373  }
   374  
   375  func isAlpha(r rune) bool {
   376  	return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z')
   377  }
   378  
   379  func isDigit(r rune) bool {
   380  	return '0' <= r && r <= '9'
   381  }
   382  
   383  func isSubDelims(r rune) bool {
   384  	switch r {
   385  	case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
   386  		return true
   387  	default:
   388  		return false
   389  	}
   390  }
   391  
   392  func isPCTEncoded(r1, r2, r3 rune) bool {
   393  	return r1 == '%' && isHexDigit(r2) && isHexDigit(r3)
   394  }
   395  
   396  func isHexDigit(r rune) bool {
   397  	switch {
   398  	case '0' <= r && r <= '9':
   399  		return true
   400  	case 'A' <= r && r <= 'F':
   401  		return true
   402  	case 'a' <= r && r <= 'f':
   403  		return true
   404  	default:
   405  		return false
   406  	}
   407  }