github.com/khulnasoft-lab/defsec@v1.0.5-0.20230827010352-5e9f46893d95/pkg/scanners/azure/expressions/lex.go (about)

     1  package expressions
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"strconv"
     7  	"strings"
     8  )
     9  
    10  type TokenType uint16
    11  
    12  const (
    13  	TokenName TokenType = iota
    14  	TokenOpenParen
    15  	TokenCloseParen
    16  	TokenComma
    17  	TokenDot
    18  	TokenLiteralString
    19  	TokenLiteralInteger
    20  	TokenLiteralFloat
    21  	TokenNewLine
    22  )
    23  
    24  type Token struct {
    25  	Type TokenType
    26  	Data interface{}
    27  }
    28  
    29  type lexer struct {
    30  	reader *bufio.Reader
    31  }
    32  
    33  func lex(expression string) ([]Token, error) {
    34  	lexer := &lexer{
    35  		reader: bufio.NewReader(strings.NewReader(expression)),
    36  	}
    37  	return lexer.Lex()
    38  }
    39  
    40  func (l *lexer) unread() {
    41  	_ = l.reader.UnreadRune()
    42  }
    43  
    44  func (l *lexer) read() (rune, error) {
    45  	r, _, err := l.reader.ReadRune()
    46  	return r, err
    47  }
    48  
    49  func (l *lexer) Lex() ([]Token, error) {
    50  	var tokens []Token
    51  
    52  	for {
    53  		r, err := l.read()
    54  		if err != nil {
    55  			break
    56  		}
    57  
    58  		switch r {
    59  		case ' ', '\t', '\r':
    60  			continue
    61  		case '\n':
    62  			tokens = append(tokens, Token{Type: TokenNewLine})
    63  		case '(':
    64  			tokens = append(tokens, Token{Type: TokenOpenParen})
    65  		case ')':
    66  			tokens = append(tokens, Token{Type: TokenCloseParen})
    67  		case ',':
    68  			tokens = append(tokens, Token{Type: TokenComma})
    69  		case '.':
    70  			tokens = append(tokens, Token{Type: TokenDot})
    71  		case '"', '\'':
    72  			token, err := l.lexString(r)
    73  			if err != nil {
    74  				return nil, fmt.Errorf("string parse error: %w", err)
    75  			}
    76  			tokens = append(tokens, token)
    77  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    78  			l.unread()
    79  			token, err := l.lexNumber()
    80  			if err != nil {
    81  				return nil, fmt.Errorf("number parse error: %w", err)
    82  			}
    83  			tokens = append(tokens, token)
    84  		default:
    85  			l.unread()
    86  			tokens = append(tokens, l.lexKeyword())
    87  		}
    88  	}
    89  
    90  	return tokens, nil
    91  }
    92  
    93  func (l *lexer) lexString(terminator rune) (Token, error) {
    94  	var sb strings.Builder
    95  	for {
    96  		r, err := l.read()
    97  		if err != nil {
    98  			break
    99  		}
   100  		if r == '\\' {
   101  			r, err := l.readEscapedChar()
   102  			if err != nil {
   103  				return Token{}, fmt.Errorf("bad escape: %w", err)
   104  			}
   105  			sb.WriteRune(r)
   106  			continue
   107  		}
   108  		if r == terminator {
   109  			break
   110  		}
   111  		sb.WriteRune(r)
   112  	}
   113  	return Token{
   114  		Type: TokenLiteralString,
   115  		Data: sb.String(),
   116  	}, nil
   117  }
   118  
   119  func (l *lexer) readEscapedChar() (rune, error) {
   120  	r, err := l.read()
   121  	if err != nil {
   122  		return 0, fmt.Errorf("unexpected EOF")
   123  	}
   124  	switch r {
   125  	case 'n':
   126  		return '\n', nil
   127  	case 'r':
   128  		return '\r', nil
   129  	case 't':
   130  		return '\t', nil
   131  	case '"', '\'':
   132  		return r, nil
   133  	default:
   134  		return 0, fmt.Errorf("'%c' is not a supported escape sequence", r)
   135  	}
   136  }
   137  
   138  func (l *lexer) lexNumber() (Token, error) {
   139  
   140  	var sb strings.Builder
   141  	var decimal bool
   142  
   143  LOOP:
   144  	for {
   145  		r, err := l.read()
   146  		if err != nil {
   147  			break
   148  		}
   149  		switch r {
   150  		case '.':
   151  			decimal = true
   152  			sb.WriteRune('.')
   153  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   154  			sb.WriteRune(r)
   155  		default:
   156  			l.unread()
   157  			break LOOP
   158  		}
   159  	}
   160  
   161  	raw := sb.String()
   162  	if decimal {
   163  		fl, err := strconv.ParseFloat(raw, 64)
   164  		if err != nil {
   165  			return Token{}, err
   166  		}
   167  		return Token{
   168  			Type: TokenLiteralFloat,
   169  			Data: fl,
   170  		}, nil
   171  	}
   172  
   173  	i, err := strconv.ParseInt(raw, 10, 64)
   174  	if err != nil {
   175  		return Token{}, err
   176  	}
   177  	return Token{
   178  		Type: TokenLiteralInteger,
   179  		Data: i,
   180  	}, nil
   181  }
   182  
   183  func (l *lexer) lexKeyword() Token {
   184  	var sb strings.Builder
   185  LOOP:
   186  	for {
   187  		r, err := l.read()
   188  		if err != nil {
   189  			break
   190  		}
   191  		switch {
   192  		case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '_':
   193  			sb.WriteRune(r)
   194  		default:
   195  			l.unread()
   196  			break LOOP
   197  		}
   198  	}
   199  	return Token{
   200  		Type: TokenName,
   201  		Data: sb.String(),
   202  	}
   203  }