github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/parser/goyacc/format_yacc.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package main 15 16 import ( 17 "bufio" 18 "fmt" 19 gofmt "go/format" 20 "go/token" 21 "io/ioutil" 22 "os" 23 "regexp" 24 "strings" 25 26 "github.com/cznic/strutil" 27 "github.com/pingcap/errors" 28 "github.com/vescale/zgraph/parser/format" 29 parser "modernc.org/parser/yacc" 30 ) 31 32 func Format(inputFilename string, goldenFilename string) (err error) { 33 spec, err := parseFileToSpec(inputFilename) 34 if err != nil { 35 return err 36 } 37 38 yFmt := &OutputFormatter{} 39 if err = yFmt.Setup(goldenFilename); err != nil { 40 return err 41 } 42 defer func() { 43 teardownErr := yFmt.Teardown() 44 if err == nil { 45 err = teardownErr 46 } 47 }() 48 49 if err = printDefinitions(yFmt, spec.Defs); err != nil { 50 return err 51 } 52 53 return printRules(yFmt, spec.Rules) 54 } 55 56 func parseFileToSpec(inputFilename string) (*parser.Specification, error) { 57 src, err := ioutil.ReadFile(inputFilename) 58 if err != nil { 59 return nil, err 60 } 61 return parser.Parse(token.NewFileSet(), inputFilename, src) 62 } 63 64 // Definition represents data reduced by productions: 65 // 66 // Definition: 67 // START IDENTIFIER 68 // | UNION // Case 1 69 // | LCURL RCURL // Case 2 70 // | ReservedWord Tag NameList // Case 3 71 // | ReservedWord Tag // Case 4 72 // | ERROR_VERBOSE // Case 5 73 const ( 74 StartIdentifierCase = iota 75 UnionDefinitionCase 76 LCURLRCURLCase 77 ReservedWordTagNameListCase 78 ReservedWordTagCase 79 ) 80 81 func printDefinitions(formatter format.Formatter, definitions []*parser.Definition) error { 82 for _, def := range definitions { 83 var err error 84 switch def.Case { 85 case StartIdentifierCase: 86 err = handleStart(formatter, def) 87 case UnionDefinitionCase: 88 err = handleUnion(formatter, def) 89 case LCURLRCURLCase: 90 err = handleProlog(formatter, def) 91 case ReservedWordTagNameListCase, ReservedWordTagCase: 92 err = handleReservedWordTagNameList(formatter, def) 93 } 94 if err != nil { 95 return err 96 } 97 } 98 _, err := formatter.Format("\n%%%%") 99 return err 100 } 101 102 func handleStart(f format.Formatter, definition *parser.Definition) error { 103 if err := Ensure(definition). 104 and(definition.Token2). 105 and(definition.Token2).NotNil(); err != nil { 106 return err 107 } 108 cmt1 := strings.Join(definition.Token.Comments, "\n") 109 cmt2 := strings.Join(definition.Token2.Comments, "\n") 110 _, err := f.Format("\n%s%s\t%s%s\n", cmt1, definition.Token.Val, cmt2, definition.Token2.Val) 111 return err 112 } 113 114 func handleUnion(f format.Formatter, definition *parser.Definition) error { 115 if err := Ensure(definition). 116 and(definition.Value).NotNil(); err != nil { 117 return err 118 } 119 if len(definition.Value) != 0 { 120 _, err := f.Format("%%union%i%s%u\n\n", definition.Value) 121 if err != nil { 122 return err 123 } 124 } 125 return nil 126 } 127 128 func handleProlog(f format.Formatter, definition *parser.Definition) error { 129 if err := Ensure(definition). 130 and(definition.Value).NotNil(); err != nil { 131 return err 132 } 133 _, err := f.Format("%%{%s%%}\n\n", definition.Value) 134 return err 135 } 136 137 func handleReservedWordTagNameList(f format.Formatter, def *parser.Definition) error { 138 if err := Ensure(def). 139 and(def.ReservedWord). 140 and(def.ReservedWord.Token).NotNil(); err != nil { 141 return err 142 } 143 comment := getTokenComment(def.ReservedWord.Token, divNewLineStringLayout) 144 directive := def.ReservedWord.Token.Val 145 146 hasTag := def.Tag != nil 147 var wordAfterDirective string 148 if hasTag { 149 wordAfterDirective = joinTag(def.Tag) 150 } else { 151 wordAfterDirective = joinNames(def.Nlist) 152 } 153 154 if _, err := f.Format("%s%s%s%i", comment, directive, wordAfterDirective); err != nil { 155 return err 156 } 157 if hasTag { 158 if _, err := f.Format("\n"); err != nil { 159 return err 160 } 161 if err := printNameListVertical(f, def.Nlist); err != nil { 162 return err 163 } 164 } 165 _, err := f.Format("%u\n") 166 return err 167 } 168 169 func joinTag(tag *parser.Tag) string { 170 var sb strings.Builder 171 sb.WriteString("\t") 172 if tag.Token != nil { 173 sb.WriteString(tag.Token.Val) 174 } 175 if tag.Token2 != nil { 176 sb.WriteString(tag.Token2.Val) 177 } 178 if tag.Token3 != nil { 179 sb.WriteString(tag.Token3.Val) 180 } 181 return sb.String() 182 } 183 184 type stringLayout int8 185 186 const ( 187 spanStringLayout stringLayout = iota 188 divStringLayout 189 divNewLineStringLayout 190 ) 191 192 func getTokenComment(token *parser.Token, layout stringLayout) string { 193 if len(token.Comments) == 0 { 194 return "" 195 } 196 var splitter, beforeComment string 197 switch layout { 198 case spanStringLayout: 199 splitter, beforeComment = " ", "" 200 case divStringLayout: 201 splitter, beforeComment = "\n", "" 202 case divNewLineStringLayout: 203 splitter, beforeComment = "\n", "\n" 204 default: 205 panic(errors.Errorf("unsupported stringLayout: %v", layout)) 206 } 207 208 var sb strings.Builder 209 sb.WriteString(beforeComment) 210 for _, comment := range token.Comments { 211 sb.WriteString(comment) 212 sb.WriteString(splitter) 213 } 214 return sb.String() 215 } 216 217 func printNameListVertical(f format.Formatter, names NameArr) (err error) { 218 rest := names 219 for len(rest) != 0 { 220 var processing NameArr 221 processing, rest = rest[:1], rest[1:] 222 223 var noComments NameArr 224 noComments, rest = rest.span(noComment) 225 processing = append(processing, noComments...) 226 227 maxCharLength := processing.findMaxLength() 228 for _, name := range processing { 229 if err := printSingleName(f, name, maxCharLength); err != nil { 230 return err 231 } 232 } 233 } 234 return nil 235 } 236 237 func joinNames(names NameArr) string { 238 var sb strings.Builder 239 for _, name := range names { 240 sb.WriteString(" ") 241 sb.WriteString(getTokenComment(name.Token, spanStringLayout)) 242 sb.WriteString(name.Token.Val) 243 } 244 return sb.String() 245 } 246 247 func printSingleName(f format.Formatter, name *parser.Name, maxCharLength int) error { 248 cmt := getTokenComment(name.Token, divNewLineStringLayout) 249 if _, err := f.Format(escapePercent(cmt)); err != nil { 250 return err 251 } 252 strLit := name.LiteralStringOpt 253 if strLit != nil && strLit.Token != nil { 254 _, err := f.Format("%-*s %s\n", maxCharLength, name.Token.Val, strLit.Token.Val) 255 return err 256 } 257 _, err := f.Format("%s\n", name.Token.Val) 258 return err 259 } 260 261 type NameArr []*parser.Name 262 263 func (ns NameArr) span(pred func(*parser.Name) bool) (first NameArr, second NameArr) { 264 first = ns.takeWhile(pred) 265 second = ns[len(first):] 266 return first, second 267 } 268 269 func (ns NameArr) takeWhile(pred func(*parser.Name) bool) NameArr { 270 for i, def := range ns { 271 if pred(def) { 272 continue 273 } 274 return ns[:i] 275 } 276 return ns 277 } 278 279 func (ns NameArr) findMaxLength() int { 280 maxLen := -1 281 for _, s := range ns { 282 if len(s.Token.Val) > maxLen { 283 maxLen = len(s.Token.Val) 284 } 285 } 286 return maxLen 287 } 288 289 func hasComments(n *parser.Name) bool { 290 return len(n.Token.Comments) != 0 291 } 292 293 func noComment(n *parser.Name) bool { 294 return !hasComments(n) 295 } 296 297 func containsActionInRule(rule *parser.Rule) bool { 298 for _, b := range rule.Body { 299 if _, ok := b.(*parser.Action); ok { 300 return true 301 } 302 } 303 return false 304 } 305 306 type RuleArr []*parser.Rule 307 308 func printRules(f format.Formatter, rules RuleArr) (err error) { 309 var lastRuleName string 310 for _, rule := range rules { 311 if rule.Name.Val == lastRuleName { 312 cmt := getTokenComment(rule.Token, divStringLayout) 313 _, err = f.Format("\n%s|\t%i", cmt) 314 } else { 315 cmt := getTokenComment(rule.Name, divStringLayout) 316 _, err = f.Format("\n\n%s%s:%i\n", cmt, rule.Name.Val) 317 } 318 if err != nil { 319 return err 320 } 321 lastRuleName = rule.Name.Val 322 323 if err = printRuleBody(f, rule); err != nil { 324 return err 325 } 326 if _, err = f.Format("%u"); err != nil { 327 return err 328 } 329 } 330 _, err = f.Format("\n%%%%\n") 331 return err 332 } 333 334 type ruleItemType int8 335 336 const ( 337 identRuleItemType ruleItemType = 1 338 actionRuleItemType ruleItemType = 2 339 strLiteralRuleItemType ruleItemType = 3 340 ) 341 342 func printRuleBody(f format.Formatter, rule *parser.Rule) error { 343 firstRuleItem, counter := rule.RuleItemList, 0 344 for ri := rule.RuleItemList; ri != nil; ri = ri.RuleItemList { 345 switch ruleItemType(ri.Case) { 346 case identRuleItemType, strLiteralRuleItemType: 347 term := fmt.Sprintf(" %s", ri.Token.Val) 348 if ri == firstRuleItem { 349 term = term[1:] 350 } 351 cmt := getTokenComment(ri.Token, divStringLayout) 352 353 if _, err := f.Format(escapePercent(cmt)); err != nil { 354 return err 355 } 356 if _, err := f.Format("%s", term); err != nil { 357 return err 358 } 359 case actionRuleItemType: 360 isFirstRuleItem := ri == firstRuleItem 361 if err := handlePrecedence(f, rule.Precedence, isFirstRuleItem); err != nil { 362 return err 363 } 364 if err := handleAction(f, rule, ri.Action, isFirstRuleItem); err != nil { 365 return err 366 } 367 } 368 counter++ 369 } 370 if err := checkInconsistencyInYaccParser(f, rule, counter); err != nil { 371 return err 372 } 373 if !containsActionInRule(rule) { 374 if err := handlePrecedence(f, rule.Precedence, counter == 0); err != nil { 375 return err 376 } 377 } 378 return nil 379 } 380 381 func handleAction(f format.Formatter, rule *parser.Rule, action *parser.Action, isFirstItem bool) error { 382 if !isFirstItem || rule.Precedence != nil { 383 if _, err := f.Format("\n"); err != nil { 384 return err 385 } 386 } 387 388 cmt := getTokenComment(action.Token, divStringLayout) 389 if _, err := f.Format(escapePercent(cmt)); err != nil { 390 return err 391 } 392 393 goSnippet, err := formatGoSnippet(action.Values) 394 goSnippet = escapePercent(goSnippet) 395 if err != nil { 396 return err 397 } 398 snippet := "{}" 399 if len(goSnippet) != 0 { 400 snippet = fmt.Sprintf("{%%i\n%s%%u\n}", goSnippet) 401 } 402 _, err = f.Format(snippet) 403 return err 404 } 405 406 func handlePrecedence(f format.Formatter, p *parser.Precedence, isFirstItem bool) error { 407 if p == nil { 408 return nil 409 } 410 if err := Ensure(p.Token). 411 and(p.Token2).NotNil(); err != nil { 412 return err 413 } 414 cmt := getTokenComment(p.Token, spanStringLayout) 415 if !isFirstItem { 416 if _, err := f.Format(" "); err != nil { 417 return err 418 } 419 } 420 _, err := f.Format("%s%s %s", cmt, p.Token.Val, p.Token2.Val) 421 return err 422 } 423 424 func formatGoSnippet(actVal []*parser.ActionValue) (string, error) { 425 tran := &SpecialActionValTransformer{ 426 store: map[string]string{}, 427 } 428 goSnippet := collectGoSnippet(tran, actVal) 429 formatted, err := gofmt.Source([]byte(goSnippet)) 430 if err != nil { 431 return "", err 432 } 433 formattedSnippet := tran.restore(string(formatted)) 434 return strings.TrimSpace(formattedSnippet), nil 435 } 436 437 func collectGoSnippet(tran *SpecialActionValTransformer, actionValArr []*parser.ActionValue) string { 438 var sb strings.Builder 439 for _, value := range actionValArr { 440 trimTab := removeLineBeginBlanks(value.Src) 441 sb.WriteString(tran.transform(trimTab)) 442 } 443 snipWithPar := strings.TrimSpace(sb.String()) 444 if strings.HasPrefix(snipWithPar, "{") && strings.HasSuffix(snipWithPar, "}") { 445 return snipWithPar[1 : len(snipWithPar)-1] 446 } 447 return "" 448 } 449 450 var lineBeginBlankRegex = regexp.MustCompile("(?m)^[\t ]+") 451 452 func removeLineBeginBlanks(src string) string { 453 return lineBeginBlankRegex.ReplaceAllString(src, "") 454 } 455 456 type SpecialActionValTransformer struct { 457 store map[string]string 458 } 459 460 const yaccFmtVar = "_yaccfmt_var_" 461 462 var yaccFmtVarRegex = regexp.MustCompile("_yaccfmt_var_[0-9]{1,5}") 463 464 func (s *SpecialActionValTransformer) transform(val string) string { 465 if strings.HasPrefix(val, "$") { 466 generated := fmt.Sprintf("%s%d", yaccFmtVar, len(s.store)) 467 s.store[generated] = val 468 return generated 469 } 470 return val 471 } 472 473 func (s *SpecialActionValTransformer) restore(src string) string { 474 return yaccFmtVarRegex.ReplaceAllStringFunc(src, func(matched string) string { 475 origin, ok := s.store[matched] 476 if !ok { 477 panic(errors.Errorf("mismatch in SpecialActionValTransformer")) 478 } 479 return origin 480 }) 481 } 482 483 type OutputFormatter struct { 484 file *os.File 485 out *bufio.Writer 486 formatter strutil.Formatter 487 } 488 489 func (y *OutputFormatter) Setup(filename string) (err error) { 490 if y.file, err = os.Create(filename); err != nil { 491 return 492 } 493 y.out = bufio.NewWriter(y.file) 494 y.formatter = strutil.IndentFormatter(y.out, "\t") 495 return 496 } 497 498 func (y *OutputFormatter) Teardown() error { 499 if y.out != nil { 500 if err := y.out.Flush(); err != nil { 501 return err 502 } 503 } 504 if y.file != nil { 505 if err := y.file.Close(); err != nil { 506 return err 507 } 508 } 509 return nil 510 } 511 512 func (y *OutputFormatter) Format(format string, args ...interface{}) (int, error) { 513 return y.formatter.Format(format, args...) 514 } 515 516 func (y *OutputFormatter) Write(bytes []byte) (int, error) { 517 return y.formatter.Write(bytes) 518 } 519 520 type NotNilAssert struct { 521 idx int 522 err error 523 } 524 525 func (n *NotNilAssert) and(target interface{}) *NotNilAssert { 526 if n.err != nil { 527 return n 528 } 529 if target == nil { 530 n.err = errors.Errorf("encounter nil, index: %d", n.idx) 531 } 532 n.idx++ 533 return n 534 } 535 536 func (n *NotNilAssert) NotNil() error { 537 return n.err 538 } 539 540 func Ensure(target interface{}) *NotNilAssert { 541 return (&NotNilAssert{}).and(target) 542 } 543 544 func escapePercent(src string) string { 545 return strings.ReplaceAll(src, "%", "%%") 546 } 547 548 func checkInconsistencyInYaccParser(f format.Formatter, rule *parser.Rule, counter int) error { 549 if counter == len(rule.Body) { 550 return nil 551 } 552 // pickup rule item in ruleBody 553 for i := counter; i < len(rule.Body); i++ { 554 body := rule.Body[i] 555 switch b := body.(type) { 556 case string, int: 557 if bInt, ok := b.(int); ok { 558 b = fmt.Sprintf("'%c'", bInt) 559 } 560 term := fmt.Sprintf(" %s", b) 561 if i == 0 { 562 term = term[1:] 563 } 564 _, err := f.Format("%s", term) 565 return err 566 case *parser.Action: 567 isFirstRuleItem := i == 0 568 if err := handlePrecedence(f, rule.Precedence, isFirstRuleItem); err != nil { 569 return err 570 } 571 if err := handleAction(f, rule, b, isFirstRuleItem); err != nil { 572 return err 573 } 574 } 575 } 576 return nil 577 }