github.com/quantosnetwork/Quantos@v0.0.0-20220306172517-e20b28c5a29a/quantix/ast/lex.go (about) 1 package ast 2 3 import ( 4 "bytes" 5 "fmt" 6 "github.com/quantosnetwork/Quantos/quantix/runeset" 7 "github.com/quantosnetwork/Quantos/quantix/token" 8 ) 9 10 // TriState has values: {Undefined, False, True} 11 type TriState int 12 13 const ( 14 // Undefined is a TriState value 15 Undefined TriState = iota 16 // False is a TriState value 17 False 18 // True is a TriState value 19 True 20 ) 21 22 type Any struct { 23 tok *token.Token 24 } 25 26 type AnyOf struct { 27 any *token.Token 28 strLit *token.Token 29 Set *runeset.RuneSet 30 } 31 32 type CharLiteral struct { 33 tok *token.Token 34 Literal []rune 35 } 36 37 type LexBracket struct { 38 leftBracket *token.Token 39 Type BracketType 40 Alternates []*RegExp 41 } 42 43 type BracketType int 44 45 const ( 46 LexGroup BracketType = iota 47 LexOptional 48 LexZeroOrMore 49 LexOneOrMore 50 ) 51 52 type LexBase interface { 53 isLexBase() 54 LexSymbol 55 Equal(LexBase) bool 56 } 57 58 func (*Any) isLexBase() {} 59 func (*AnyOf) isLexBase() {} 60 func (*CharLiteral) isLexBase() {} 61 func (*Not) isLexBase() {} 62 func (*UnicodeClass) isLexBase() {} 63 64 type LexRule struct { 65 Suppress bool 66 TokID *TokID 67 RegExp *RegExp 68 } 69 70 type LexSymbol interface { 71 isLexSymbol() 72 Lext() int 73 String() string 74 } 75 76 func (*Any) isLexSymbol() {} 77 func (*AnyOf) isLexSymbol() {} 78 func (*CharLiteral) isLexSymbol() {} 79 func (*LexBracket) isLexSymbol() {} 80 func (*Not) isLexSymbol() {} 81 func (*UnicodeClass) isLexSymbol() {} 82 83 type Not struct { 84 not *token.Token 85 strLit *token.Token 86 Set *runeset.RuneSet 87 } 88 89 type RegExp struct { 90 Symbols []LexSymbol 91 } 92 93 type StringLit struct { 94 tok *token.Token 95 } 96 97 type UnicodeClass struct { 98 tok *token.Token 99 Type UnicodeClassType 100 } 101 102 type UnicodeClassType int 103 104 const ( 105 Letter UnicodeClassType = iota 106 Upcase 107 Lowcase 108 Number 109 Space 110 ) 111 112 func (*Any) Equal(other LexBase) bool { 113 if other == nil { 114 return false 115 } 116 _, ok := other.(*Any) 117 return ok 118 } 119 120 func (a *Any) Lext() int { 121 return a.tok.Lext() 122 } 123 124 func (ao *AnyOf) Equal(other LexBase) bool { 125 if other == nil { 126 return false 127 } 128 ao1, ok := other.(*AnyOf) 129 if !ok { 130 return false 131 } 132 return ao.Set.Equal(ao1.Set) 133 } 134 135 func (a *AnyOf) Lext() int { 136 return a.any.Lext() 137 } 138 139 func NewCharLiteral(tok *token.Token, literal []rune) *CharLiteral { 140 return &CharLiteral{ 141 tok: tok, 142 Literal: literal, 143 } 144 } 145 146 func (c *CharLiteral) Char() rune { 147 if c.Literal[1] == '\\' { 148 switch c.Literal[2] { 149 case '\'': 150 return '\'' 151 case '"': 152 return '"' 153 case '\\': 154 return '\\' 155 case 't': 156 return '\t' 157 case 'n': 158 return '\n' 159 case 'r': 160 return '\r' 161 default: 162 panic(fmt.Sprintf("invalid '%c'", c.Literal[2])) 163 } 164 } else { 165 return c.Literal[1] 166 } 167 } 168 169 func (c *CharLiteral) Equal(other LexBase) bool { 170 if other == nil { 171 return false 172 } 173 c1, ok := other.(*CharLiteral) 174 if !ok { 175 return false 176 } 177 // fmt.Printf("'%c'.Equal('%c') = %t\n", c.Char(), c1.Char(), c.Char() == c1.Char()) 178 return c.Char() == c1.Char() 179 } 180 181 func (c *CharLiteral) Lext() int { 182 return c.tok.Lext() 183 } 184 185 func (l *LexBracket) LeftBracket() string { 186 switch l.Type { 187 case LexGroup: 188 return "(" 189 case LexOptional: 190 return "[" 191 case LexZeroOrMore: 192 return "{" 193 case LexOneOrMore: 194 return "<" 195 } 196 panic("invalid") 197 } 198 199 func (l *LexBracket) RightBracket() string { 200 switch l.Type { 201 case LexGroup: 202 return ")" 203 case LexOptional: 204 return "]" 205 case LexZeroOrMore: 206 return "}" 207 case LexOneOrMore: 208 return ">" 209 } 210 panic("invalid") 211 } 212 213 // Returns the id of the lex rule 214 func (l *LexRule) ID() string { 215 return l.TokID.ID() 216 } 217 218 func (l *LexRule) Lext() int { 219 return l.TokID.Lext() 220 } 221 222 func (l *LexRule) String() string { 223 return fmt.Sprintf("%s : %s ;", l.ID(), l.RegExp) 224 } 225 226 func (b *LexBracket) Lext() int { 227 return b.leftBracket.Lext() 228 } 229 230 func (n *Not) Equal(other LexBase) bool { 231 if other == nil { 232 return false 233 } 234 n1, ok := other.(*Not) 235 if !ok { 236 return false 237 } 238 return n.Set.Equal(n1.Set) 239 } 240 241 func (n *Not) Lext() int { 242 return n.not.Lext() 243 } 244 245 func (re *RegExp) String() string { 246 w := new(bytes.Buffer) 247 for _, symbol := range re.Symbols { 248 fmt.Fprint(w, symbol) 249 } 250 return w.String() 251 } 252 253 func (u *UnicodeClass) Equal(other LexBase) bool { 254 if other == nil { 255 return false 256 } 257 u1, ok := other.(*UnicodeClass) 258 if !ok { 259 return false 260 } 261 return u.Type == u1.Type 262 } 263 264 func (u *UnicodeClass) Lext() int { 265 return u.Lext() 266 } 267 268 func (*Any) String() string { 269 return "." 270 } 271 272 func (a *AnyOf) String() string { 273 return fmt.Sprintf("any %s", string(a.strLit.Literal())) 274 } 275 276 func (c *CharLiteral) String() string { 277 return string(c.Literal) 278 } 279 280 func (lb *LexBracket) String() string { 281 w := new(bytes.Buffer) 282 fmt.Fprint(w, lb.LeftBracket()) 283 for i, alt := range lb.Alternates { 284 if i > 0 { 285 fmt.Fprint(w, " | ") 286 } 287 fmt.Fprint(w, alt) 288 } 289 fmt.Fprint(w, lb.RightBracket()) 290 return w.String() 291 } 292 293 func (n *Not) String() string { 294 return fmt.Sprintf("not %s", string(n.strLit.Literal())) 295 } 296 297 func (sl *StringLit) ContainsWhiteSpace() bool { 298 for _, r := range sl.tok.LiteralStripEscape() { 299 switch r { 300 case ' ', '\t', '\n', '\r': 301 return true 302 } 303 } 304 return false 305 } 306 307 func (sl *StringLit) ID() string { 308 return string(sl.Value()) 309 } 310 311 func (sl *StringLit) Literal() []rune { 312 return sl.tok.Literal() 313 } 314 315 func (sl *StringLit) Value() []rune { 316 slit := sl.tok.LiteralStripEscape() 317 value := slit[1 : len(slit)-1] 318 // fmt.Printf("*StringLit.Value %s %s\n", string(slit), string(value)) 319 return value 320 } 321 322 func (u *UnicodeClass) String() string { 323 return string(u.tok.Literal()) 324 } 325 326 // StringLitToTokID returns a dummy TokID with ID = id 327 func StringLitToTokID(id *StringLit) *TokID { 328 return &TokID{ 329 token.New(token.StringToType["tokid"], 330 id.tok.Lext()+1, id.tok.Rext()-1, id.tok.GetInput()), 331 } 332 } 333 334 // CharLitFromStringLit returns a dummy CharLiteral with Literal sl.Literal[i] 335 // If escaped sl.Literal[i] == '\\' and sl.Literal[i+1] is the escaped char. 336 func CharLitFromStringLit(sl *StringLit, i int, escaped bool) *CharLiteral { 337 // Make char literal 338 lit := []rune{'\''} 339 if escaped { 340 if sl.Literal()[i+1] != '"' { 341 lit = append(lit, '\\') 342 } 343 lit = append(lit, sl.Literal()[i+1]) 344 } else { 345 lit = append(lit, sl.Literal()[i]) 346 } 347 lit = append(lit, '\'') 348 349 rext := sl.Lext() + i + 1 350 if escaped { 351 rext++ 352 } 353 354 cl := NewCharLiteral( 355 token.New( 356 token.StringToType["char_lit"], 357 sl.Lext()+i, rext, sl.tok.GetInput()), 358 lit) 359 return cl 360 }