cuelang.org/go@v0.10.1/cue/format/printer.go (about)

     1  // Copyright 2018 The CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package format
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"strings"
    21  	"text/tabwriter"
    22  
    23  	"cuelang.org/go/cue/ast"
    24  	"cuelang.org/go/cue/errors"
    25  	"cuelang.org/go/cue/literal"
    26  	"cuelang.org/go/cue/token"
    27  )
    28  
    29  // A printer takes the stream of formatting tokens and spacing directives
    30  // produced by the formatter and adjusts the spacing based on the original
    31  // source code.
    32  type printer struct {
    33  	cfg *config
    34  
    35  	allowed     whiteSpace
    36  	requested   whiteSpace
    37  	indentStack []whiteSpace
    38  
    39  	pos     token.Position // current pos in AST
    40  	lineout line
    41  
    42  	lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace)
    43  
    44  	output           []byte
    45  	indent           int
    46  	spaceBefore      bool
    47  	prevLbraceOnLine bool // true if a '{' has been written on the current line
    48  
    49  	// TODO(mvdan): This is similar to nooverride but used only for comments,
    50  	// to ensure that we always print a newline after them.
    51  	// We should fix our logic with whiteSpace instead, but for now this ensures
    52  	// we don't break the syntax by omitting the newline after a comment.
    53  	printingComment bool
    54  
    55  	errs errors.Error
    56  }
    57  
    58  type line int
    59  
    60  func (p *printer) init(cfg *config) {
    61  	p.cfg = cfg
    62  	p.pos = token.Position{Line: 1, Column: 1}
    63  }
    64  
    65  func (p *printer) errf(n ast.Node, format string, args ...interface{}) {
    66  	p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...))
    67  }
    68  
    69  const debug = false
    70  
    71  func (p *printer) internalError(msg ...interface{}) {
    72  	if debug {
    73  		fmt.Print(p.pos.String() + ": ")
    74  		fmt.Println(msg...)
    75  		panic("go/printer")
    76  	}
    77  }
    78  
    79  func (p *printer) lineFor(pos token.Pos) int {
    80  	return pos.Line()
    81  }
    82  
    83  func (p *printer) Print(v interface{}) {
    84  	var (
    85  		impliedComma = false
    86  		isLit        bool
    87  		data         string
    88  		nextWS       whiteSpace
    89  	)
    90  	switch x := v.(type) {
    91  	case *line:
    92  		*x = p.lineout
    93  
    94  	case token.Token:
    95  		s := x.String()
    96  		before, after := mayCombine(p.lastTok, x)
    97  		if before && !p.spaceBefore {
    98  			// the previous and the current token must be
    99  			// separated by a blank otherwise they combine
   100  			// into a different incorrect token sequence
   101  			// (except for syntax.INT followed by a '.' this
   102  			// should never happen because it is taken care
   103  			// of via binary expression formatting)
   104  			if p.allowed&blank != 0 {
   105  				p.internalError("whitespace buffer not empty")
   106  			}
   107  			p.allowed |= blank
   108  		}
   109  		if after {
   110  			nextWS = blank
   111  		}
   112  		data = s
   113  		switch x {
   114  		case token.EOF:
   115  			data = ""
   116  			p.allowed = newline
   117  			p.allowed &^= newsection
   118  		case token.LPAREN, token.LBRACK, token.LBRACE:
   119  		case token.RPAREN, token.RBRACK, token.RBRACE:
   120  			impliedComma = true
   121  		}
   122  		p.lastTok = x
   123  
   124  	case *ast.BasicLit:
   125  		data = x.Value
   126  		switch x.Kind {
   127  		case token.STRING:
   128  			// TODO: only do this when simplifying. Right now this does not
   129  			// give the right result, but it should be better if:
   130  			// 1) simplification is done as a separate step
   131  			// 2) simplified structs are explicitly referenced separately
   132  			//    in the AST.
   133  			if p.indent < 6 {
   134  				data = literal.IndentTabs(data, p.cfg.Indent+p.indent+1)
   135  			}
   136  
   137  		case token.INT:
   138  			if len(data) > 1 &&
   139  				data[0] == '0' &&
   140  				data[1] >= '0' && data[1] <= '9' {
   141  				data = "0o" + data[1:]
   142  			}
   143  			// Pad trailing dot before multiplier.
   144  			if p := strings.IndexByte(data, '.'); p >= 0 && data[p+1] > '9' {
   145  				data = data[:p+1] + "0" + data[p+1:]
   146  			}
   147  			// Lowercase E, but only if it is not the last character: in the
   148  			// future we may use E for Exa.
   149  			if p := strings.IndexByte(data, 'E'); p != -1 && p < len(data)-1 {
   150  				data = strings.ToLower(data)
   151  			}
   152  
   153  		case token.FLOAT:
   154  			// Pad leading or trailing dots.
   155  			switch p := strings.IndexByte(data, '.'); {
   156  			case p < 0:
   157  			case p == 0:
   158  				data = "0" + data
   159  			case p == len(data)-1:
   160  				data += "0"
   161  			case data[p+1] > '9':
   162  				data = data[:p+1] + "0" + data[p+1:]
   163  			}
   164  			if strings.IndexByte(data, 'E') != -1 {
   165  				data = strings.ToLower(data)
   166  			}
   167  		}
   168  
   169  		isLit = true
   170  		impliedComma = true
   171  		p.lastTok = x.Kind
   172  
   173  	case *ast.Ident:
   174  		data = x.Name
   175  		if !ast.IsValidIdent(data) {
   176  			p.errf(x, "invalid identifier %q", x.Name)
   177  			data = "*bad identifier*"
   178  		}
   179  		impliedComma = true
   180  		p.lastTok = token.IDENT
   181  
   182  	case string:
   183  		// We can print a Go string as part of a CUE identifier or literal;
   184  		// for example, see the formatter.label method.
   185  		isLit = true
   186  		data = x
   187  		impliedComma = true
   188  		p.lastTok = token.STRING
   189  
   190  	case *ast.CommentGroup:
   191  		rel := x.Pos().RelPos()
   192  		if x.Line { // TODO: we probably don't need this.
   193  			rel = token.Blank
   194  		}
   195  		switch rel {
   196  		case token.NoRelPos:
   197  		case token.Newline, token.NewSection:
   198  		case token.Blank, token.Elided:
   199  			p.allowed |= blank
   200  			fallthrough
   201  		case token.NoSpace:
   202  			p.allowed &^= newline | newsection | formfeed | declcomma
   203  		}
   204  		return
   205  
   206  	case *ast.Attribute:
   207  		isLit = true
   208  		data = x.Text
   209  		impliedComma = true
   210  		p.lastTok = token.ATTRIBUTE
   211  
   212  	case *ast.Comment:
   213  		// TODO: if implied comma, postpone comment
   214  		isLit = true
   215  		data = x.Text
   216  		p.lastTok = token.COMMENT
   217  
   218  	case whiteSpace:
   219  		p.allowed |= x
   220  		return
   221  
   222  	case token.Pos:
   223  		// TODO: should we use a known file position to synchronize? Go does,
   224  		// but we don't really have to.
   225  		// pos := x
   226  		if x.HasRelPos() {
   227  			if p.allowed&nooverride == 0 {
   228  				requested := p.allowed
   229  				switch x.RelPos() {
   230  				case token.NoSpace:
   231  					requested &^= newline | newsection | formfeed
   232  				case token.Blank:
   233  					requested |= blank
   234  					requested &^= newline | newsection | formfeed
   235  				case token.Newline:
   236  					requested |= newline
   237  				case token.NewSection:
   238  					requested |= newsection
   239  				}
   240  				if p.printingComment {
   241  					requested |= newline
   242  				}
   243  				p.writeWhitespace(requested)
   244  				p.allowed = 0
   245  				p.requested = 0
   246  			}
   247  			// p.pos = pos
   248  		}
   249  		return
   250  
   251  	default:
   252  		fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x)
   253  		panic("go/printer type")
   254  	}
   255  
   256  	p.writeWhitespace(p.allowed)
   257  	p.allowed = 0
   258  	p.requested = 0
   259  	p.printingComment = false
   260  	p.writeString(data, isLit)
   261  	p.allowed = nextWS
   262  	_ = impliedComma // TODO: delay comment printings
   263  }
   264  
   265  func (p *printer) writeWhitespace(ws whiteSpace) {
   266  	if ws&comma != 0 {
   267  		switch {
   268  		case ws&(newsection|newline|formfeed) != 0,
   269  			ws&trailcomma == 0:
   270  			p.writeByte(',', 1)
   271  		}
   272  	}
   273  	if ws&indent != 0 {
   274  		p.markLineIndent(ws)
   275  	}
   276  	if ws&unindent != 0 {
   277  		p.markUnindentLine()
   278  	}
   279  	switch {
   280  	case ws&newsection != 0:
   281  		p.maybeIndentLine(ws)
   282  		p.writeByte('\f', 2)
   283  		p.incrementLine(2)
   284  		p.spaceBefore = true
   285  	case ws&formfeed != 0:
   286  		p.maybeIndentLine(ws)
   287  		p.writeByte('\f', 1)
   288  		p.incrementLine(1)
   289  		p.spaceBefore = true
   290  	case ws&newline != 0:
   291  		p.maybeIndentLine(ws)
   292  		p.writeByte('\n', 1)
   293  		p.incrementLine(1)
   294  		p.spaceBefore = true
   295  	case ws&declcomma != 0:
   296  		p.writeByte(',', 1)
   297  		p.writeByte(' ', 1)
   298  		p.spaceBefore = true
   299  	case ws&noblank != 0:
   300  	case ws&vtab != 0:
   301  		p.writeByte('\v', 1)
   302  		p.spaceBefore = true
   303  	case ws&blank != 0:
   304  		p.writeByte(' ', 1)
   305  		p.spaceBefore = true
   306  	}
   307  }
   308  
   309  func (p *printer) incrementLine(n int) {
   310  	if n != 0 {
   311  		p.prevLbraceOnLine = false
   312  	}
   313  	p.lineout += line(n)
   314  }
   315  
   316  func (p *printer) markLineIndent(ws whiteSpace) {
   317  	p.indentStack = append(p.indentStack, ws)
   318  }
   319  
   320  func (p *printer) markUnindentLine() (wasUnindented bool) {
   321  	last := len(p.indentStack) - 1
   322  	if ws := p.indentStack[last]; ws&indented != 0 {
   323  		p.indent--
   324  		wasUnindented = true
   325  	}
   326  	p.indentStack = p.indentStack[:last]
   327  	return wasUnindented
   328  }
   329  
   330  func (p *printer) maybeIndentLine(ws whiteSpace) {
   331  	if ws&unindent == 0 && len(p.indentStack) > 0 {
   332  		last := len(p.indentStack) - 1
   333  		if ws := p.indentStack[last]; ws&indented != 0 || ws&indent == 0 {
   334  			return
   335  		}
   336  		p.indentStack[last] |= indented
   337  		p.indent++
   338  	}
   339  }
   340  
   341  func (f *formatter) matchUnindent() whiteSpace {
   342  	f.allowed |= unindent
   343  	// TODO: make this work. Whitespace from closing bracket should match that
   344  	// of opening if there is no position information.
   345  	// f.allowed &^= nooverride | newline | newsection | formfeed | blank | noblank
   346  	// ws := f.indentStack[len(f.indentStack)-1]
   347  	// mask := blank | noblank | vtab
   348  	// f.allowed |= unindent | blank | noblank
   349  	// if ws&newline != 0 || ws*indented != 0 {
   350  	// 	f.allowed |= newline
   351  	// }
   352  	return 0
   353  }
   354  
   355  // writeString writes the string s to p.output and updates p.pos, p.out,
   356  // and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters
   357  // to protect s from being interpreted by the tabwriter.
   358  //
   359  // Note: writeString is only used to write Go tokens, literals, and
   360  // comments, all of which must be written literally. Thus, it is correct
   361  // to always set isLit = true. However, setting it explicitly only when
   362  // needed (i.e., when we don't know that s contains no tabs or line breaks)
   363  // avoids processing extra escape characters and reduces run time of the
   364  // printer benchmark by up to 10%.
   365  func (p *printer) writeString(s string, isLit bool) {
   366  	if s != "" {
   367  		p.spaceBefore = false
   368  	}
   369  
   370  	if isLit {
   371  		// Protect s such that is passes through the tabwriter
   372  		// unchanged. Note that valid Go programs cannot contain
   373  		// tabwriter.Escape bytes since they do not appear in legal
   374  		// UTF-8 sequences.
   375  		p.output = append(p.output, tabwriter.Escape)
   376  	}
   377  
   378  	p.output = append(p.output, s...)
   379  
   380  	if isLit {
   381  		p.output = append(p.output, tabwriter.Escape)
   382  	}
   383  	// update positions
   384  	nLines := 0
   385  	var li int // index of last newline; valid if nLines > 0
   386  	for i := 0; i < len(s); i++ {
   387  		// CUE tokens cannot contain '\f' - no need to look for it
   388  		if s[i] == '\n' {
   389  			nLines++
   390  			li = i
   391  		}
   392  	}
   393  	p.pos.Offset += len(s)
   394  	if nLines > 0 {
   395  		p.pos.Line += nLines
   396  		c := len(s) - li
   397  		p.pos.Column = c
   398  	} else {
   399  		p.pos.Column += len(s)
   400  	}
   401  }
   402  
   403  func (p *printer) writeByte(ch byte, n int) {
   404  	for i := 0; i < n; i++ {
   405  		p.output = append(p.output, ch)
   406  	}
   407  
   408  	// update positions
   409  	p.pos.Offset += n
   410  	if ch == '\n' || ch == '\f' {
   411  		p.pos.Line += n
   412  		p.pos.Column = 1
   413  
   414  		n := p.cfg.Indent + p.indent // include base indentation
   415  		for i := 0; i < n; i++ {
   416  			p.output = append(p.output, '\t')
   417  		}
   418  
   419  		// update positions
   420  		p.pos.Offset += n
   421  		p.pos.Column += n
   422  
   423  		return
   424  	}
   425  	p.pos.Column += n
   426  }
   427  
   428  // TODO(mvdan): mayCombine as a name was carried over from Go,
   429  // but it doesn't really make sense as a name for our logic here,
   430  // since we return true when either side must use a blank space.
   431  
   432  func mayCombine(prev, next token.Token) (before, after bool) {
   433  	s := next.String()
   434  	if 'a' <= s[0] && s[0] < 'z' {
   435  		if prev == token.ILLEGAL {
   436  			// If we're printing the first token,
   437  			// we don't need a blank space before it.
   438  			return false, true
   439  		}
   440  		return true, true
   441  	}
   442  	switch prev {
   443  	case token.IQUO, token.IREM, token.IDIV, token.IMOD:
   444  		return false, false
   445  	case token.INT:
   446  		before = next == token.PERIOD // 1.
   447  	case token.ADD:
   448  		before = s[0] == '+' // ++
   449  	case token.SUB:
   450  		before = s[0] == '-' // --
   451  	case token.QUO:
   452  		before = s[0] == '*' // /*
   453  	}
   454  	return before, false
   455  }