github.com/machinefi/w3bstream@v1.6.5-rc9.0.20240426031326-b8c7c4876e72/pkg/depends/kit/validator/rules/rule_scan.go (about)

     1  package rules
     2  
     3  import (
     4  	"bytes"
     5  	"regexp"
     6  	textscanner "text/scanner"
     7  
     8  	"github.com/machinefi/w3bstream/pkg/depends/kit/validator/errors"
     9  )
    10  
    11  type scanner struct {
    12  	raw []byte
    13  	*textscanner.Scanner
    14  }
    15  
    16  func NewScanner(b []byte) *scanner {
    17  	s := &textscanner.Scanner{}
    18  	s.Init(bytes.NewReader(b))
    19  	return &scanner{b, s}
    20  }
    21  
    22  func (s *scanner) RootRule() (*Rule, error) {
    23  	rule, err := s.rule()
    24  	if err != nil {
    25  		return nil, err
    26  	}
    27  	if tok := s.Scan(); tok != EOF {
    28  		return nil, errors.NewSyntaxError(
    29  			"%s | rule should be end but got `%s`",
    30  			s.raw[0:s.Pos().Offset], string(tok))
    31  	}
    32  	return rule, nil
    33  }
    34  
    35  func (s *scanner) rule() (*Rule, error) {
    36  	// simple          @name
    37  	// with parameters @name<param> @name<param1,param2...>
    38  	// with ranges     @name[from,to), @name[length]
    39  	// with values     @name{value1,value2}
    40  	// with regexp     @name\/d+/
    41  	// optional        @name?
    42  	// default value   @name=value @name='xxx'
    43  	// compose         @map<@string[1,10],@string{A,B,C}>[0,10]
    44  	if first := s.Next(); first != '@' {
    45  		return nil, errors.NewSyntaxError(
    46  			"%s | rule should start with `@` but got `%s`",
    47  			s.raw[0:s.Pos().Offset], string(first),
    48  		)
    49  	}
    50  	start := s.Pos().Offset - 1
    51  	name, err := s.lit()
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  	if name == "" {
    56  		return nil, errors.NewSyntaxError(
    57  			"%s | rule missing name",
    58  			s.raw[0:s.Pos().Offset],
    59  		)
    60  	}
    61  	r := NewRule(name)
    62  LOOP:
    63  	for tok := s.Peek(); ; tok = s.Peek() {
    64  		switch tok {
    65  		default:
    66  			break LOOP
    67  		case ' ':
    68  			s.Next()
    69  		case '?', '=':
    70  			optional, dftv, err := s.inherent()
    71  			if err != nil {
    72  				return nil, err
    73  			}
    74  			r.Optional, r.DftValue = optional, dftv
    75  		case '<':
    76  			params, err := s.params()
    77  			if err != nil {
    78  				return nil, err
    79  			}
    80  			r.Params = params
    81  		case '[', '(':
    82  			ranges, end, err := s.ranges()
    83  			if err != nil {
    84  				return nil, err
    85  			}
    86  			r.Range = ranges
    87  			r.ExclusiveMin = tok == '('
    88  			r.ExclusiveMax = end == ')'
    89  		case '{':
    90  			values, err := s.values()
    91  			if err != nil {
    92  				return nil, err
    93  			}
    94  			r.ValueMatrix = append(r.ValueMatrix, values)
    95  		case '/':
    96  			pattern, err := s.pattern()
    97  			if err != nil {
    98  				return nil, err
    99  			}
   100  			r.Pattern = pattern
   101  		}
   102  	}
   103  
   104  	end := s.Pos().Offset
   105  	r.RAW = s.raw[start:end]
   106  	return r, nil
   107  }
   108  
   109  func (s *scanner) lit() (string, error) {
   110  	tok := s.Scan()
   111  	if keychars[tok] {
   112  		return "", errors.NewSyntaxError(
   113  			"%s | invalid literal token `%s`",
   114  			s.raw[0:s.Pos().Offset], string(tok),
   115  		)
   116  	}
   117  	return s.TokenText(), nil
   118  }
   119  
   120  // inherent optional or default value
   121  func (s *scanner) inherent() (bool, []byte, error) {
   122  	first := s.Next()
   123  	if !(first == '=' || first == '?') {
   124  		return false, nil, errors.NewSyntaxError(
   125  			"%s | optional or default value of rule should start with `?` or `=`",
   126  			s.raw[0:s.Pos().Offset],
   127  		)
   128  	}
   129  
   130  	b := &bytes.Buffer{}
   131  
   132  	tok := s.Peek()
   133  	for tok == ' ' {
   134  		tok = s.Next()
   135  	}
   136  
   137  	if tok == '\'' {
   138  		for tok = s.Peek(); tok != '\''; tok = s.Peek() {
   139  			if tok == EOF {
   140  				return true, nil, errors.NewSyntaxError(
   141  					"%s | default value of of rule should end with `'`",
   142  					s.raw[0:s.Pos().Offset],
   143  				)
   144  			}
   145  			if tok == '\\' {
   146  				tok = s.Next()
   147  				next := s.Next()
   148  				// \' -> '
   149  				if next != '\'' {
   150  					b.WriteRune(tok)
   151  				}
   152  				b.WriteRune(next)
   153  				continue
   154  			}
   155  			b.WriteRune(tok)
   156  			s.Next()
   157  		}
   158  		s.Next()
   159  	} else if tok != EOF && tok != '>' && tok != ',' {
   160  		// end or in stmt
   161  		b.WriteRune(tok)
   162  		lit, err := s.lit()
   163  		if err != nil {
   164  			return false, nil, err
   165  		}
   166  		b.WriteString(lit)
   167  	}
   168  
   169  	dftv := b.Bytes()
   170  
   171  	if first == '=' && dftv == nil {
   172  		return true, []byte{}, nil
   173  	}
   174  
   175  	return true, dftv, nil
   176  }
   177  
   178  func (s *scanner) params() ([]Node, error) {
   179  	if first := s.Next(); first != '<' {
   180  		return nil, errors.NewSyntaxError(
   181  			"%s | parameters of rule should start with `<` but got `%s`",
   182  			s.raw[0:s.Pos().Offset], string(first),
   183  		)
   184  	}
   185  
   186  	params := map[int]Node{}
   187  	paramc := 1
   188  
   189  	for tok := s.Peek(); tok != '>'; tok = s.Peek() {
   190  		if tok == EOF {
   191  			return nil, errors.NewSyntaxError(
   192  				"%s | parameters of rule should end with `>` but got `%s`",
   193  				s.raw[0:s.Pos().Offset], string(tok),
   194  			)
   195  		}
   196  		switch tok {
   197  		case ' ':
   198  			s.Next()
   199  		case ',':
   200  			s.Next()
   201  			paramc++
   202  		case '@':
   203  			rule, err := s.rule()
   204  			if err != nil {
   205  				return nil, err
   206  			}
   207  			params[paramc] = rule
   208  		default:
   209  			raw, err := s.lit()
   210  			if err != nil {
   211  				return nil, err
   212  			}
   213  			if node, ok := params[paramc]; !ok {
   214  				params[paramc] = NewLiteral([]byte(raw))
   215  			} else if lit, ok := node.(*Lit); ok {
   216  				lit.Append([]byte(raw))
   217  			} else {
   218  				return nil, errors.NewSyntaxError(
   219  					"%s | rule should be end but got `%s`",
   220  					s.raw[0:s.Pos().Offset], string(tok),
   221  				)
   222  			}
   223  		}
   224  	}
   225  
   226  	lst := make([]Node, paramc)
   227  	for i := range lst {
   228  		if p, ok := params[i+1]; ok {
   229  			lst[i] = p
   230  		} else {
   231  			lst[i] = NewLiteral([]byte(""))
   232  		}
   233  	}
   234  
   235  	s.Next()
   236  	return lst, nil
   237  }
   238  
   239  func (s *scanner) ranges() ([]*Lit, rune, error) {
   240  	if first := s.Next(); !(first == '[' || first == '(') {
   241  		return nil, first, errors.NewSyntaxError(
   242  			"%s range of rule should start with `[` or `(` but got `%s`",
   243  			s.raw[0:s.Pos().Offset], string(first),
   244  		)
   245  	}
   246  
   247  	lits := map[int]*Lit{}
   248  	litc := 1
   249  
   250  	for tok := s.Peek(); !(tok == ']' || tok == ')'); tok = s.Peek() {
   251  		if tok == EOF {
   252  			return nil, tok, errors.NewSyntaxError(
   253  				"%s range of rule should end with `]` `)` but got `%s`",
   254  				s.raw[0:s.Pos().Offset], string(tok),
   255  			)
   256  		}
   257  		switch tok {
   258  		case ' ':
   259  			s.Next()
   260  		case ',':
   261  			s.Next()
   262  			litc++
   263  		default:
   264  			raw, err := s.lit()
   265  			if err != nil {
   266  				return nil, tok, err
   267  			}
   268  			if lit, ok := lits[litc]; !ok {
   269  				lits[litc] = NewLiteral([]byte(raw))
   270  			} else {
   271  				lit.Append([]byte(raw))
   272  			}
   273  		}
   274  	}
   275  
   276  	lst := make([]*Lit, litc)
   277  
   278  	for i := range lst {
   279  		if p, ok := lits[i+1]; ok {
   280  			lst[i] = p
   281  		} else {
   282  			lst[i] = NewLiteral([]byte(""))
   283  		}
   284  	}
   285  
   286  	return lst, s.Next(), nil
   287  }
   288  
   289  func (s *scanner) values() ([]*Lit, error) {
   290  	if first := s.Next(); first != '{' {
   291  		return nil, errors.NewSyntaxError(
   292  			"%s | vals of rule should start with `{` but got `%s`",
   293  			s.raw[0:s.Pos().Offset], string(first))
   294  	}
   295  
   296  	vals := map[int]*Lit{}
   297  	valc := 1
   298  
   299  	for tok := s.Peek(); tok != '}'; tok = s.Peek() {
   300  		if tok == EOF {
   301  			return nil, errors.NewSyntaxError(
   302  				"%s vals of rule should end with `}`",
   303  				s.raw[0:s.Pos().Offset],
   304  			)
   305  		}
   306  		switch tok {
   307  		case ' ':
   308  			s.Next()
   309  		case ',':
   310  			s.Next()
   311  			valc++
   312  		default:
   313  			raw, err := s.lit()
   314  			if err != nil {
   315  				return nil, err
   316  			}
   317  			if literal, ok := vals[valc]; !ok {
   318  				vals[valc] = NewLiteral([]byte(raw))
   319  			} else {
   320  				literal.Append([]byte(raw))
   321  			}
   322  		}
   323  	}
   324  	s.Next()
   325  
   326  	lst := make([]*Lit, valc)
   327  	for i := range lst {
   328  		if p, ok := vals[i+1]; ok {
   329  			lst[i] = p
   330  		} else {
   331  			lst[i] = NewLiteral([]byte(""))
   332  		}
   333  	}
   334  	return lst, nil
   335  }
   336  
   337  func (s *scanner) pattern() (*regexp.Regexp, error) {
   338  	if first := s.Next(); first != '/' {
   339  		return nil, errors.NewSyntaxError(
   340  			"%s | pattern of rule should start with `/`",
   341  			s.raw[0:s.Pos().Offset],
   342  		)
   343  	}
   344  
   345  	b := &bytes.Buffer{}
   346  
   347  	for tok := s.Peek(); tok != '/'; tok = s.Peek() {
   348  		if tok == EOF {
   349  			return nil, errors.NewSyntaxError(
   350  				"%s | pattern of rule should end with `/`",
   351  				s.raw[0:s.Pos().Offset],
   352  			)
   353  		}
   354  		if tok == '\\' {
   355  			tok = s.Next()
   356  			next := s.Next()
   357  			// \/ -> /
   358  			if next != '/' {
   359  				b.WriteRune(tok)
   360  			}
   361  			b.WriteRune(next)
   362  			continue
   363  		}
   364  		b.WriteRune(tok)
   365  		s.Next()
   366  	}
   367  	s.Next()
   368  
   369  	return regexp.Compile(b.String())
   370  }
   371  
   372  var keychars = map[rune]bool{
   373  	'@': true, '?': true, ',': true, ':': true, '=': true, '/': true, '[': true,
   374  	']': true, '(': true, ')': true, '{': true, '}': true, '<': true, '>': true,
   375  }
   376  
   377  const EOF = textscanner.EOF