github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/parser/goyacc/format_yacc.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package main
    15  
    16  import (
    17  	"bufio"
    18  	"fmt"
    19  	gofmt "go/format"
    20  	"go/token"
    21  	"io/ioutil"
    22  	"os"
    23  	"regexp"
    24  	"strings"
    25  
    26  	"github.com/cznic/strutil"
    27  	"github.com/pingcap/errors"
    28  	"github.com/vescale/zgraph/parser/format"
    29  	parser "modernc.org/parser/yacc"
    30  )
    31  
    32  func Format(inputFilename string, goldenFilename string) (err error) {
    33  	spec, err := parseFileToSpec(inputFilename)
    34  	if err != nil {
    35  		return err
    36  	}
    37  
    38  	yFmt := &OutputFormatter{}
    39  	if err = yFmt.Setup(goldenFilename); err != nil {
    40  		return err
    41  	}
    42  	defer func() {
    43  		teardownErr := yFmt.Teardown()
    44  		if err == nil {
    45  			err = teardownErr
    46  		}
    47  	}()
    48  
    49  	if err = printDefinitions(yFmt, spec.Defs); err != nil {
    50  		return err
    51  	}
    52  
    53  	return printRules(yFmt, spec.Rules)
    54  }
    55  
    56  func parseFileToSpec(inputFilename string) (*parser.Specification, error) {
    57  	src, err := ioutil.ReadFile(inputFilename)
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  	return parser.Parse(token.NewFileSet(), inputFilename, src)
    62  }
    63  
    64  // Definition represents data reduced by productions:
    65  //
    66  //	Definition:
    67  //	        START IDENTIFIER
    68  //	|       UNION                      // Case 1
    69  //	|       LCURL RCURL                // Case 2
    70  //	|       ReservedWord Tag NameList  // Case 3
    71  //	|       ReservedWord Tag           // Case 4
    72  //	|       ERROR_VERBOSE              // Case 5
    73  const (
    74  	StartIdentifierCase = iota
    75  	UnionDefinitionCase
    76  	LCURLRCURLCase
    77  	ReservedWordTagNameListCase
    78  	ReservedWordTagCase
    79  )
    80  
    81  func printDefinitions(formatter format.Formatter, definitions []*parser.Definition) error {
    82  	for _, def := range definitions {
    83  		var err error
    84  		switch def.Case {
    85  		case StartIdentifierCase:
    86  			err = handleStart(formatter, def)
    87  		case UnionDefinitionCase:
    88  			err = handleUnion(formatter, def)
    89  		case LCURLRCURLCase:
    90  			err = handleProlog(formatter, def)
    91  		case ReservedWordTagNameListCase, ReservedWordTagCase:
    92  			err = handleReservedWordTagNameList(formatter, def)
    93  		}
    94  		if err != nil {
    95  			return err
    96  		}
    97  	}
    98  	_, err := formatter.Format("\n%%%%")
    99  	return err
   100  }
   101  
   102  func handleStart(f format.Formatter, definition *parser.Definition) error {
   103  	if err := Ensure(definition).
   104  		and(definition.Token2).
   105  		and(definition.Token2).NotNil(); err != nil {
   106  		return err
   107  	}
   108  	cmt1 := strings.Join(definition.Token.Comments, "\n")
   109  	cmt2 := strings.Join(definition.Token2.Comments, "\n")
   110  	_, err := f.Format("\n%s%s\t%s%s\n", cmt1, definition.Token.Val, cmt2, definition.Token2.Val)
   111  	return err
   112  }
   113  
   114  func handleUnion(f format.Formatter, definition *parser.Definition) error {
   115  	if err := Ensure(definition).
   116  		and(definition.Value).NotNil(); err != nil {
   117  		return err
   118  	}
   119  	if len(definition.Value) != 0 {
   120  		_, err := f.Format("%%union%i%s%u\n\n", definition.Value)
   121  		if err != nil {
   122  			return err
   123  		}
   124  	}
   125  	return nil
   126  }
   127  
   128  func handleProlog(f format.Formatter, definition *parser.Definition) error {
   129  	if err := Ensure(definition).
   130  		and(definition.Value).NotNil(); err != nil {
   131  		return err
   132  	}
   133  	_, err := f.Format("%%{%s%%}\n\n", definition.Value)
   134  	return err
   135  }
   136  
   137  func handleReservedWordTagNameList(f format.Formatter, def *parser.Definition) error {
   138  	if err := Ensure(def).
   139  		and(def.ReservedWord).
   140  		and(def.ReservedWord.Token).NotNil(); err != nil {
   141  		return err
   142  	}
   143  	comment := getTokenComment(def.ReservedWord.Token, divNewLineStringLayout)
   144  	directive := def.ReservedWord.Token.Val
   145  
   146  	hasTag := def.Tag != nil
   147  	var wordAfterDirective string
   148  	if hasTag {
   149  		wordAfterDirective = joinTag(def.Tag)
   150  	} else {
   151  		wordAfterDirective = joinNames(def.Nlist)
   152  	}
   153  
   154  	if _, err := f.Format("%s%s%s%i", comment, directive, wordAfterDirective); err != nil {
   155  		return err
   156  	}
   157  	if hasTag {
   158  		if _, err := f.Format("\n"); err != nil {
   159  			return err
   160  		}
   161  		if err := printNameListVertical(f, def.Nlist); err != nil {
   162  			return err
   163  		}
   164  	}
   165  	_, err := f.Format("%u\n")
   166  	return err
   167  }
   168  
   169  func joinTag(tag *parser.Tag) string {
   170  	var sb strings.Builder
   171  	sb.WriteString("\t")
   172  	if tag.Token != nil {
   173  		sb.WriteString(tag.Token.Val)
   174  	}
   175  	if tag.Token2 != nil {
   176  		sb.WriteString(tag.Token2.Val)
   177  	}
   178  	if tag.Token3 != nil {
   179  		sb.WriteString(tag.Token3.Val)
   180  	}
   181  	return sb.String()
   182  }
   183  
   184  type stringLayout int8
   185  
   186  const (
   187  	spanStringLayout stringLayout = iota
   188  	divStringLayout
   189  	divNewLineStringLayout
   190  )
   191  
   192  func getTokenComment(token *parser.Token, layout stringLayout) string {
   193  	if len(token.Comments) == 0 {
   194  		return ""
   195  	}
   196  	var splitter, beforeComment string
   197  	switch layout {
   198  	case spanStringLayout:
   199  		splitter, beforeComment = " ", ""
   200  	case divStringLayout:
   201  		splitter, beforeComment = "\n", ""
   202  	case divNewLineStringLayout:
   203  		splitter, beforeComment = "\n", "\n"
   204  	default:
   205  		panic(errors.Errorf("unsupported stringLayout: %v", layout))
   206  	}
   207  
   208  	var sb strings.Builder
   209  	sb.WriteString(beforeComment)
   210  	for _, comment := range token.Comments {
   211  		sb.WriteString(comment)
   212  		sb.WriteString(splitter)
   213  	}
   214  	return sb.String()
   215  }
   216  
   217  func printNameListVertical(f format.Formatter, names NameArr) (err error) {
   218  	rest := names
   219  	for len(rest) != 0 {
   220  		var processing NameArr
   221  		processing, rest = rest[:1], rest[1:]
   222  
   223  		var noComments NameArr
   224  		noComments, rest = rest.span(noComment)
   225  		processing = append(processing, noComments...)
   226  
   227  		maxCharLength := processing.findMaxLength()
   228  		for _, name := range processing {
   229  			if err := printSingleName(f, name, maxCharLength); err != nil {
   230  				return err
   231  			}
   232  		}
   233  	}
   234  	return nil
   235  }
   236  
   237  func joinNames(names NameArr) string {
   238  	var sb strings.Builder
   239  	for _, name := range names {
   240  		sb.WriteString(" ")
   241  		sb.WriteString(getTokenComment(name.Token, spanStringLayout))
   242  		sb.WriteString(name.Token.Val)
   243  	}
   244  	return sb.String()
   245  }
   246  
   247  func printSingleName(f format.Formatter, name *parser.Name, maxCharLength int) error {
   248  	cmt := getTokenComment(name.Token, divNewLineStringLayout)
   249  	if _, err := f.Format(escapePercent(cmt)); err != nil {
   250  		return err
   251  	}
   252  	strLit := name.LiteralStringOpt
   253  	if strLit != nil && strLit.Token != nil {
   254  		_, err := f.Format("%-*s %s\n", maxCharLength, name.Token.Val, strLit.Token.Val)
   255  		return err
   256  	}
   257  	_, err := f.Format("%s\n", name.Token.Val)
   258  	return err
   259  }
   260  
   261  type NameArr []*parser.Name
   262  
   263  func (ns NameArr) span(pred func(*parser.Name) bool) (first NameArr, second NameArr) {
   264  	first = ns.takeWhile(pred)
   265  	second = ns[len(first):]
   266  	return first, second
   267  }
   268  
   269  func (ns NameArr) takeWhile(pred func(*parser.Name) bool) NameArr {
   270  	for i, def := range ns {
   271  		if pred(def) {
   272  			continue
   273  		}
   274  		return ns[:i]
   275  	}
   276  	return ns
   277  }
   278  
   279  func (ns NameArr) findMaxLength() int {
   280  	maxLen := -1
   281  	for _, s := range ns {
   282  		if len(s.Token.Val) > maxLen {
   283  			maxLen = len(s.Token.Val)
   284  		}
   285  	}
   286  	return maxLen
   287  }
   288  
   289  func hasComments(n *parser.Name) bool {
   290  	return len(n.Token.Comments) != 0
   291  }
   292  
   293  func noComment(n *parser.Name) bool {
   294  	return !hasComments(n)
   295  }
   296  
   297  func containsActionInRule(rule *parser.Rule) bool {
   298  	for _, b := range rule.Body {
   299  		if _, ok := b.(*parser.Action); ok {
   300  			return true
   301  		}
   302  	}
   303  	return false
   304  }
   305  
   306  type RuleArr []*parser.Rule
   307  
   308  func printRules(f format.Formatter, rules RuleArr) (err error) {
   309  	var lastRuleName string
   310  	for _, rule := range rules {
   311  		if rule.Name.Val == lastRuleName {
   312  			cmt := getTokenComment(rule.Token, divStringLayout)
   313  			_, err = f.Format("\n%s|\t%i", cmt)
   314  		} else {
   315  			cmt := getTokenComment(rule.Name, divStringLayout)
   316  			_, err = f.Format("\n\n%s%s:%i\n", cmt, rule.Name.Val)
   317  		}
   318  		if err != nil {
   319  			return err
   320  		}
   321  		lastRuleName = rule.Name.Val
   322  
   323  		if err = printRuleBody(f, rule); err != nil {
   324  			return err
   325  		}
   326  		if _, err = f.Format("%u"); err != nil {
   327  			return err
   328  		}
   329  	}
   330  	_, err = f.Format("\n%%%%\n")
   331  	return err
   332  }
   333  
   334  type ruleItemType int8
   335  
   336  const (
   337  	identRuleItemType      ruleItemType = 1
   338  	actionRuleItemType     ruleItemType = 2
   339  	strLiteralRuleItemType ruleItemType = 3
   340  )
   341  
   342  func printRuleBody(f format.Formatter, rule *parser.Rule) error {
   343  	firstRuleItem, counter := rule.RuleItemList, 0
   344  	for ri := rule.RuleItemList; ri != nil; ri = ri.RuleItemList {
   345  		switch ruleItemType(ri.Case) {
   346  		case identRuleItemType, strLiteralRuleItemType:
   347  			term := fmt.Sprintf(" %s", ri.Token.Val)
   348  			if ri == firstRuleItem {
   349  				term = term[1:]
   350  			}
   351  			cmt := getTokenComment(ri.Token, divStringLayout)
   352  
   353  			if _, err := f.Format(escapePercent(cmt)); err != nil {
   354  				return err
   355  			}
   356  			if _, err := f.Format("%s", term); err != nil {
   357  				return err
   358  			}
   359  		case actionRuleItemType:
   360  			isFirstRuleItem := ri == firstRuleItem
   361  			if err := handlePrecedence(f, rule.Precedence, isFirstRuleItem); err != nil {
   362  				return err
   363  			}
   364  			if err := handleAction(f, rule, ri.Action, isFirstRuleItem); err != nil {
   365  				return err
   366  			}
   367  		}
   368  		counter++
   369  	}
   370  	if err := checkInconsistencyInYaccParser(f, rule, counter); err != nil {
   371  		return err
   372  	}
   373  	if !containsActionInRule(rule) {
   374  		if err := handlePrecedence(f, rule.Precedence, counter == 0); err != nil {
   375  			return err
   376  		}
   377  	}
   378  	return nil
   379  }
   380  
   381  func handleAction(f format.Formatter, rule *parser.Rule, action *parser.Action, isFirstItem bool) error {
   382  	if !isFirstItem || rule.Precedence != nil {
   383  		if _, err := f.Format("\n"); err != nil {
   384  			return err
   385  		}
   386  	}
   387  
   388  	cmt := getTokenComment(action.Token, divStringLayout)
   389  	if _, err := f.Format(escapePercent(cmt)); err != nil {
   390  		return err
   391  	}
   392  
   393  	goSnippet, err := formatGoSnippet(action.Values)
   394  	goSnippet = escapePercent(goSnippet)
   395  	if err != nil {
   396  		return err
   397  	}
   398  	snippet := "{}"
   399  	if len(goSnippet) != 0 {
   400  		snippet = fmt.Sprintf("{%%i\n%s%%u\n}", goSnippet)
   401  	}
   402  	_, err = f.Format(snippet)
   403  	return err
   404  }
   405  
   406  func handlePrecedence(f format.Formatter, p *parser.Precedence, isFirstItem bool) error {
   407  	if p == nil {
   408  		return nil
   409  	}
   410  	if err := Ensure(p.Token).
   411  		and(p.Token2).NotNil(); err != nil {
   412  		return err
   413  	}
   414  	cmt := getTokenComment(p.Token, spanStringLayout)
   415  	if !isFirstItem {
   416  		if _, err := f.Format(" "); err != nil {
   417  			return err
   418  		}
   419  	}
   420  	_, err := f.Format("%s%s %s", cmt, p.Token.Val, p.Token2.Val)
   421  	return err
   422  }
   423  
   424  func formatGoSnippet(actVal []*parser.ActionValue) (string, error) {
   425  	tran := &SpecialActionValTransformer{
   426  		store: map[string]string{},
   427  	}
   428  	goSnippet := collectGoSnippet(tran, actVal)
   429  	formatted, err := gofmt.Source([]byte(goSnippet))
   430  	if err != nil {
   431  		return "", err
   432  	}
   433  	formattedSnippet := tran.restore(string(formatted))
   434  	return strings.TrimSpace(formattedSnippet), nil
   435  }
   436  
   437  func collectGoSnippet(tran *SpecialActionValTransformer, actionValArr []*parser.ActionValue) string {
   438  	var sb strings.Builder
   439  	for _, value := range actionValArr {
   440  		trimTab := removeLineBeginBlanks(value.Src)
   441  		sb.WriteString(tran.transform(trimTab))
   442  	}
   443  	snipWithPar := strings.TrimSpace(sb.String())
   444  	if strings.HasPrefix(snipWithPar, "{") && strings.HasSuffix(snipWithPar, "}") {
   445  		return snipWithPar[1 : len(snipWithPar)-1]
   446  	}
   447  	return ""
   448  }
   449  
   450  var lineBeginBlankRegex = regexp.MustCompile("(?m)^[\t ]+")
   451  
   452  func removeLineBeginBlanks(src string) string {
   453  	return lineBeginBlankRegex.ReplaceAllString(src, "")
   454  }
   455  
   456  type SpecialActionValTransformer struct {
   457  	store map[string]string
   458  }
   459  
   460  const yaccFmtVar = "_yaccfmt_var_"
   461  
   462  var yaccFmtVarRegex = regexp.MustCompile("_yaccfmt_var_[0-9]{1,5}")
   463  
   464  func (s *SpecialActionValTransformer) transform(val string) string {
   465  	if strings.HasPrefix(val, "$") {
   466  		generated := fmt.Sprintf("%s%d", yaccFmtVar, len(s.store))
   467  		s.store[generated] = val
   468  		return generated
   469  	}
   470  	return val
   471  }
   472  
   473  func (s *SpecialActionValTransformer) restore(src string) string {
   474  	return yaccFmtVarRegex.ReplaceAllStringFunc(src, func(matched string) string {
   475  		origin, ok := s.store[matched]
   476  		if !ok {
   477  			panic(errors.Errorf("mismatch in SpecialActionValTransformer"))
   478  		}
   479  		return origin
   480  	})
   481  }
   482  
   483  type OutputFormatter struct {
   484  	file      *os.File
   485  	out       *bufio.Writer
   486  	formatter strutil.Formatter
   487  }
   488  
   489  func (y *OutputFormatter) Setup(filename string) (err error) {
   490  	if y.file, err = os.Create(filename); err != nil {
   491  		return
   492  	}
   493  	y.out = bufio.NewWriter(y.file)
   494  	y.formatter = strutil.IndentFormatter(y.out, "\t")
   495  	return
   496  }
   497  
   498  func (y *OutputFormatter) Teardown() error {
   499  	if y.out != nil {
   500  		if err := y.out.Flush(); err != nil {
   501  			return err
   502  		}
   503  	}
   504  	if y.file != nil {
   505  		if err := y.file.Close(); err != nil {
   506  			return err
   507  		}
   508  	}
   509  	return nil
   510  }
   511  
   512  func (y *OutputFormatter) Format(format string, args ...interface{}) (int, error) {
   513  	return y.formatter.Format(format, args...)
   514  }
   515  
   516  func (y *OutputFormatter) Write(bytes []byte) (int, error) {
   517  	return y.formatter.Write(bytes)
   518  }
   519  
   520  type NotNilAssert struct {
   521  	idx int
   522  	err error
   523  }
   524  
   525  func (n *NotNilAssert) and(target interface{}) *NotNilAssert {
   526  	if n.err != nil {
   527  		return n
   528  	}
   529  	if target == nil {
   530  		n.err = errors.Errorf("encounter nil, index: %d", n.idx)
   531  	}
   532  	n.idx++
   533  	return n
   534  }
   535  
   536  func (n *NotNilAssert) NotNil() error {
   537  	return n.err
   538  }
   539  
   540  func Ensure(target interface{}) *NotNilAssert {
   541  	return (&NotNilAssert{}).and(target)
   542  }
   543  
   544  func escapePercent(src string) string {
   545  	return strings.ReplaceAll(src, "%", "%%")
   546  }
   547  
   548  func checkInconsistencyInYaccParser(f format.Formatter, rule *parser.Rule, counter int) error {
   549  	if counter == len(rule.Body) {
   550  		return nil
   551  	}
   552  	// pickup rule item in ruleBody
   553  	for i := counter; i < len(rule.Body); i++ {
   554  		body := rule.Body[i]
   555  		switch b := body.(type) {
   556  		case string, int:
   557  			if bInt, ok := b.(int); ok {
   558  				b = fmt.Sprintf("'%c'", bInt)
   559  			}
   560  			term := fmt.Sprintf(" %s", b)
   561  			if i == 0 {
   562  				term = term[1:]
   563  			}
   564  			_, err := f.Format("%s", term)
   565  			return err
   566  		case *parser.Action:
   567  			isFirstRuleItem := i == 0
   568  			if err := handlePrecedence(f, rule.Precedence, isFirstRuleItem); err != nil {
   569  				return err
   570  			}
   571  			if err := handleAction(f, rule, b, isFirstRuleItem); err != nil {
   572  				return err
   573  			}
   574  		}
   575  	}
   576  	return nil
   577  }