github.com/quantosnetwork/Quantos@v0.0.0-20220306172517-e20b28c5a29a/quantix/ast/lex.go (about)

     1  package ast
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"github.com/quantosnetwork/Quantos/quantix/runeset"
     7  	"github.com/quantosnetwork/Quantos/quantix/token"
     8  )
     9  
    10  // TriState has values: {Undefined, False, True}
    11  type TriState int
    12  
    13  const (
    14  	// Undefined is a TriState value
    15  	Undefined TriState = iota
    16  	// False is a TriState value
    17  	False
    18  	// True is a TriState value
    19  	True
    20  )
    21  
    22  type Any struct {
    23  	tok *token.Token
    24  }
    25  
    26  type AnyOf struct {
    27  	any    *token.Token
    28  	strLit *token.Token
    29  	Set    *runeset.RuneSet
    30  }
    31  
    32  type CharLiteral struct {
    33  	tok     *token.Token
    34  	Literal []rune
    35  }
    36  
    37  type LexBracket struct {
    38  	leftBracket *token.Token
    39  	Type        BracketType
    40  	Alternates  []*RegExp
    41  }
    42  
    43  type BracketType int
    44  
    45  const (
    46  	LexGroup BracketType = iota
    47  	LexOptional
    48  	LexZeroOrMore
    49  	LexOneOrMore
    50  )
    51  
    52  type LexBase interface {
    53  	isLexBase()
    54  	LexSymbol
    55  	Equal(LexBase) bool
    56  }
    57  
    58  func (*Any) isLexBase()          {}
    59  func (*AnyOf) isLexBase()        {}
    60  func (*CharLiteral) isLexBase()  {}
    61  func (*Not) isLexBase()          {}
    62  func (*UnicodeClass) isLexBase() {}
    63  
    64  type LexRule struct {
    65  	Suppress bool
    66  	TokID    *TokID
    67  	RegExp   *RegExp
    68  }
    69  
    70  type LexSymbol interface {
    71  	isLexSymbol()
    72  	Lext() int
    73  	String() string
    74  }
    75  
    76  func (*Any) isLexSymbol()          {}
    77  func (*AnyOf) isLexSymbol()        {}
    78  func (*CharLiteral) isLexSymbol()  {}
    79  func (*LexBracket) isLexSymbol()   {}
    80  func (*Not) isLexSymbol()          {}
    81  func (*UnicodeClass) isLexSymbol() {}
    82  
    83  type Not struct {
    84  	not    *token.Token
    85  	strLit *token.Token
    86  	Set    *runeset.RuneSet
    87  }
    88  
    89  type RegExp struct {
    90  	Symbols []LexSymbol
    91  }
    92  
    93  type StringLit struct {
    94  	tok *token.Token
    95  }
    96  
    97  type UnicodeClass struct {
    98  	tok  *token.Token
    99  	Type UnicodeClassType
   100  }
   101  
   102  type UnicodeClassType int
   103  
   104  const (
   105  	Letter UnicodeClassType = iota
   106  	Upcase
   107  	Lowcase
   108  	Number
   109  	Space
   110  )
   111  
   112  func (*Any) Equal(other LexBase) bool {
   113  	if other == nil {
   114  		return false
   115  	}
   116  	_, ok := other.(*Any)
   117  	return ok
   118  }
   119  
   120  func (a *Any) Lext() int {
   121  	return a.tok.Lext()
   122  }
   123  
   124  func (ao *AnyOf) Equal(other LexBase) bool {
   125  	if other == nil {
   126  		return false
   127  	}
   128  	ao1, ok := other.(*AnyOf)
   129  	if !ok {
   130  		return false
   131  	}
   132  	return ao.Set.Equal(ao1.Set)
   133  }
   134  
   135  func (a *AnyOf) Lext() int {
   136  	return a.any.Lext()
   137  }
   138  
   139  func NewCharLiteral(tok *token.Token, literal []rune) *CharLiteral {
   140  	return &CharLiteral{
   141  		tok:     tok,
   142  		Literal: literal,
   143  	}
   144  }
   145  
   146  func (c *CharLiteral) Char() rune {
   147  	if c.Literal[1] == '\\' {
   148  		switch c.Literal[2] {
   149  		case '\'':
   150  			return '\''
   151  		case '"':
   152  			return '"'
   153  		case '\\':
   154  			return '\\'
   155  		case 't':
   156  			return '\t'
   157  		case 'n':
   158  			return '\n'
   159  		case 'r':
   160  			return '\r'
   161  		default:
   162  			panic(fmt.Sprintf("invalid '%c'", c.Literal[2]))
   163  		}
   164  	} else {
   165  		return c.Literal[1]
   166  	}
   167  }
   168  
   169  func (c *CharLiteral) Equal(other LexBase) bool {
   170  	if other == nil {
   171  		return false
   172  	}
   173  	c1, ok := other.(*CharLiteral)
   174  	if !ok {
   175  		return false
   176  	}
   177  	// fmt.Printf("'%c'.Equal('%c') = %t\n", c.Char(), c1.Char(), c.Char() == c1.Char())
   178  	return c.Char() == c1.Char()
   179  }
   180  
   181  func (c *CharLiteral) Lext() int {
   182  	return c.tok.Lext()
   183  }
   184  
   185  func (l *LexBracket) LeftBracket() string {
   186  	switch l.Type {
   187  	case LexGroup:
   188  		return "("
   189  	case LexOptional:
   190  		return "["
   191  	case LexZeroOrMore:
   192  		return "{"
   193  	case LexOneOrMore:
   194  		return "<"
   195  	}
   196  	panic("invalid")
   197  }
   198  
   199  func (l *LexBracket) RightBracket() string {
   200  	switch l.Type {
   201  	case LexGroup:
   202  		return ")"
   203  	case LexOptional:
   204  		return "]"
   205  	case LexZeroOrMore:
   206  		return "}"
   207  	case LexOneOrMore:
   208  		return ">"
   209  	}
   210  	panic("invalid")
   211  }
   212  
   213  // Returns the id of the lex rule
   214  func (l *LexRule) ID() string {
   215  	return l.TokID.ID()
   216  }
   217  
   218  func (l *LexRule) Lext() int {
   219  	return l.TokID.Lext()
   220  }
   221  
   222  func (l *LexRule) String() string {
   223  	return fmt.Sprintf("%s : %s ;", l.ID(), l.RegExp)
   224  }
   225  
   226  func (b *LexBracket) Lext() int {
   227  	return b.leftBracket.Lext()
   228  }
   229  
   230  func (n *Not) Equal(other LexBase) bool {
   231  	if other == nil {
   232  		return false
   233  	}
   234  	n1, ok := other.(*Not)
   235  	if !ok {
   236  		return false
   237  	}
   238  	return n.Set.Equal(n1.Set)
   239  }
   240  
   241  func (n *Not) Lext() int {
   242  	return n.not.Lext()
   243  }
   244  
   245  func (re *RegExp) String() string {
   246  	w := new(bytes.Buffer)
   247  	for _, symbol := range re.Symbols {
   248  		fmt.Fprint(w, symbol)
   249  	}
   250  	return w.String()
   251  }
   252  
   253  func (u *UnicodeClass) Equal(other LexBase) bool {
   254  	if other == nil {
   255  		return false
   256  	}
   257  	u1, ok := other.(*UnicodeClass)
   258  	if !ok {
   259  		return false
   260  	}
   261  	return u.Type == u1.Type
   262  }
   263  
   264  func (u *UnicodeClass) Lext() int {
   265  	return u.Lext()
   266  }
   267  
   268  func (*Any) String() string {
   269  	return "."
   270  }
   271  
   272  func (a *AnyOf) String() string {
   273  	return fmt.Sprintf("any %s", string(a.strLit.Literal()))
   274  }
   275  
   276  func (c *CharLiteral) String() string {
   277  	return string(c.Literal)
   278  }
   279  
   280  func (lb *LexBracket) String() string {
   281  	w := new(bytes.Buffer)
   282  	fmt.Fprint(w, lb.LeftBracket())
   283  	for i, alt := range lb.Alternates {
   284  		if i > 0 {
   285  			fmt.Fprint(w, " | ")
   286  		}
   287  		fmt.Fprint(w, alt)
   288  	}
   289  	fmt.Fprint(w, lb.RightBracket())
   290  	return w.String()
   291  }
   292  
   293  func (n *Not) String() string {
   294  	return fmt.Sprintf("not %s", string(n.strLit.Literal()))
   295  }
   296  
   297  func (sl *StringLit) ContainsWhiteSpace() bool {
   298  	for _, r := range sl.tok.LiteralStripEscape() {
   299  		switch r {
   300  		case ' ', '\t', '\n', '\r':
   301  			return true
   302  		}
   303  	}
   304  	return false
   305  }
   306  
   307  func (sl *StringLit) ID() string {
   308  	return string(sl.Value())
   309  }
   310  
   311  func (sl *StringLit) Literal() []rune {
   312  	return sl.tok.Literal()
   313  }
   314  
   315  func (sl *StringLit) Value() []rune {
   316  	slit := sl.tok.LiteralStripEscape()
   317  	value := slit[1 : len(slit)-1]
   318  	// fmt.Printf("*StringLit.Value %s %s\n", string(slit), string(value))
   319  	return value
   320  }
   321  
   322  func (u *UnicodeClass) String() string {
   323  	return string(u.tok.Literal())
   324  }
   325  
   326  // StringLitToTokID returns a dummy TokID with ID = id
   327  func StringLitToTokID(id *StringLit) *TokID {
   328  	return &TokID{
   329  		token.New(token.StringToType["tokid"],
   330  			id.tok.Lext()+1, id.tok.Rext()-1, id.tok.GetInput()),
   331  	}
   332  }
   333  
   334  // CharLitFromStringLit returns a dummy CharLiteral with Literal sl.Literal[i]
   335  // If escaped sl.Literal[i] == '\\' and sl.Literal[i+1] is the escaped char.
   336  func CharLitFromStringLit(sl *StringLit, i int, escaped bool) *CharLiteral {
   337  	// Make char literal
   338  	lit := []rune{'\''}
   339  	if escaped {
   340  		if sl.Literal()[i+1] != '"' {
   341  			lit = append(lit, '\\')
   342  		}
   343  		lit = append(lit, sl.Literal()[i+1])
   344  	} else {
   345  		lit = append(lit, sl.Literal()[i])
   346  	}
   347  	lit = append(lit, '\'')
   348  
   349  	rext := sl.Lext() + i + 1
   350  	if escaped {
   351  		rext++
   352  	}
   353  
   354  	cl := NewCharLiteral(
   355  		token.New(
   356  			token.StringToType["char_lit"],
   357  			sl.Lext()+i, rext, sl.tok.GetInput()),
   358  		lit)
   359  	return cl
   360  }