github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/cue/format/printer.go (about)

     1  // Copyright 2018 The CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package format
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"strings"
    21  	"text/tabwriter"
    22  
    23  	"github.com/joomcode/cue/cue/ast"
    24  	"github.com/joomcode/cue/cue/errors"
    25  	"github.com/joomcode/cue/cue/literal"
    26  	"github.com/joomcode/cue/cue/token"
    27  )
    28  
    29  // A printer takes the stream of formatting tokens and spacing directives
    30  // produced by the formatter and adjusts the spacing based on the original
    31  // source code.
    32  type printer struct {
    33  	cfg *config
    34  
    35  	allowed     whiteSpace
    36  	requested   whiteSpace
    37  	indentStack []whiteSpace
    38  
    39  	pos     token.Position // current pos in AST
    40  	lineout line
    41  
    42  	lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace)
    43  
    44  	output      []byte
    45  	indent      int
    46  	spaceBefore bool
    47  
    48  	errs errors.Error
    49  }
    50  
    51  type line int
    52  
    53  func (p *printer) init(cfg *config) {
    54  	p.cfg = cfg
    55  	p.pos = token.Position{Line: 1, Column: 1}
    56  }
    57  
    58  func (p *printer) errf(n ast.Node, format string, args ...interface{}) {
    59  	p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...))
    60  }
    61  
    62  const debug = false
    63  
    64  func (p *printer) internalError(msg ...interface{}) {
    65  	if debug {
    66  		fmt.Print(p.pos.String() + ": ")
    67  		fmt.Println(msg...)
    68  		panic("go/printer")
    69  	}
    70  }
    71  
    72  func (p *printer) lineFor(pos token.Pos) int {
    73  	return pos.Line()
    74  }
    75  
    76  func (p *printer) Print(v interface{}) {
    77  	var (
    78  		impliedComma = false
    79  		isLit        bool
    80  		data         string
    81  		nextWS       whiteSpace
    82  	)
    83  	switch x := v.(type) {
    84  	case *line:
    85  		*x = p.lineout
    86  
    87  	case token.Token:
    88  		s := x.String()
    89  		before, after := mayCombine(p.lastTok, x)
    90  		if before && !p.spaceBefore {
    91  			// the previous and the current token must be
    92  			// separated by a blank otherwise they combine
    93  			// into a different incorrect token sequence
    94  			// (except for syntax.INT followed by a '.' this
    95  			// should never happen because it is taken care
    96  			// of via binary expression formatting)
    97  			if p.allowed&blank != 0 {
    98  				p.internalError("whitespace buffer not empty")
    99  			}
   100  			p.allowed |= blank
   101  		}
   102  		if after {
   103  			nextWS = blank
   104  		}
   105  		data = s
   106  		switch x {
   107  		case token.EOF:
   108  			data = ""
   109  			p.allowed = newline
   110  			p.allowed &^= newsection
   111  		case token.LPAREN, token.LBRACK, token.LBRACE:
   112  		case token.RPAREN, token.RBRACK, token.RBRACE:
   113  			impliedComma = true
   114  		}
   115  		p.lastTok = x
   116  
   117  	case *ast.BasicLit:
   118  		data = x.Value
   119  		switch x.Kind {
   120  		case token.STRING:
   121  			// TODO: only do this when simplifying. Right now this does not
   122  			// give the right result, but it should be better if:
   123  			// 1) simplification is done as a separate step
   124  			// 2) simplified structs are explicitly referenced separately
   125  			//    in the AST.
   126  			if p.indent < 6 {
   127  				data = literal.IndentTabs(data, p.cfg.Indent+p.indent+1)
   128  			}
   129  
   130  		case token.INT:
   131  			if len(data) > 1 &&
   132  				data[0] == '0' &&
   133  				data[1] >= '0' && data[1] <= '9' {
   134  				data = "0o" + data[1:]
   135  			}
   136  			// Pad trailing dot before multiplier.
   137  			if p := strings.IndexByte(data, '.'); p >= 0 && data[p+1] > '9' {
   138  				data = data[:p+1] + "0" + data[p+1:]
   139  			}
   140  			// Lowercase E, but only if it is not the last character: in the
   141  			// future we may use E for Exa.
   142  			if p := strings.IndexByte(data, 'E'); p != -1 && p < len(data)-1 {
   143  				data = strings.ToLower(data)
   144  			}
   145  
   146  		case token.FLOAT:
   147  			// Pad leading or trailing dots.
   148  			switch p := strings.IndexByte(data, '.'); {
   149  			case p < 0:
   150  			case p == 0:
   151  				data = "0" + data
   152  			case p == len(data)-1:
   153  				data += "0"
   154  			case data[p+1] > '9':
   155  				data = data[:p+1] + "0" + data[p+1:]
   156  			}
   157  			if strings.IndexByte(data, 'E') != -1 {
   158  				data = strings.ToLower(data)
   159  			}
   160  		}
   161  
   162  		isLit = true
   163  		impliedComma = true
   164  		p.lastTok = x.Kind
   165  
   166  	case *ast.Ident:
   167  		data = x.Name
   168  		if !ast.IsValidIdent(data) {
   169  			p.errf(x, "invalid identifier %q", x.Name)
   170  			data = "*bad identifier*"
   171  		}
   172  		impliedComma = true
   173  		p.lastTok = token.IDENT
   174  
   175  	case string:
   176  		data = x
   177  		impliedComma = true
   178  		p.lastTok = token.STRING
   179  
   180  	case *ast.CommentGroup:
   181  		rel := x.Pos().RelPos()
   182  		if x.Line { // TODO: we probably don't need this.
   183  			rel = token.Blank
   184  		}
   185  		switch rel {
   186  		case token.NoRelPos:
   187  		case token.Newline, token.NewSection:
   188  		case token.Blank, token.Elided:
   189  			p.allowed |= blank
   190  			fallthrough
   191  		case token.NoSpace:
   192  			p.allowed &^= newline | newsection | formfeed | declcomma
   193  		}
   194  		return
   195  
   196  	case *ast.Attribute:
   197  		data = x.Text
   198  		impliedComma = true
   199  		p.lastTok = token.ATTRIBUTE
   200  
   201  	case *ast.Comment:
   202  		// TODO: if implied comma, postpone comment
   203  		data = x.Text
   204  		p.lastTok = token.COMMENT
   205  
   206  	case whiteSpace:
   207  		p.allowed |= x
   208  		return
   209  
   210  	case token.Pos:
   211  		// TODO: should we use a known file position to synchronize? Go does,
   212  		// but we don't really have to.
   213  		// pos := x
   214  		if x.HasRelPos() {
   215  			if p.allowed&nooverride == 0 {
   216  				requested := p.allowed
   217  				switch x.RelPos() {
   218  				case token.NoSpace:
   219  					requested &^= newline | newsection | formfeed
   220  				case token.Blank:
   221  					requested |= blank
   222  					requested &^= newline | newsection | formfeed
   223  				case token.Newline:
   224  					requested |= newline
   225  				case token.NewSection:
   226  					requested |= newsection
   227  				}
   228  				p.writeWhitespace(requested)
   229  				p.allowed = 0
   230  				p.requested = 0
   231  			}
   232  			// p.pos = pos
   233  		}
   234  		return
   235  
   236  	default:
   237  		fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x)
   238  		panic("go/printer type")
   239  	}
   240  
   241  	p.writeWhitespace(p.allowed)
   242  	p.allowed = 0
   243  	p.requested = 0
   244  	p.writeString(data, isLit)
   245  	p.allowed = nextWS
   246  	_ = impliedComma // TODO: delay comment printings
   247  }
   248  
   249  func (p *printer) writeWhitespace(ws whiteSpace) {
   250  	if ws&comma != 0 {
   251  		switch {
   252  		case ws&(newsection|newline|formfeed) != 0,
   253  			ws&trailcomma == 0:
   254  			p.writeByte(',', 1)
   255  		}
   256  	}
   257  	if ws&indent != 0 {
   258  		p.markLineIndent(ws)
   259  	}
   260  	if ws&unindent != 0 {
   261  		p.markUnindentLine()
   262  	}
   263  	switch {
   264  	case ws&newsection != 0:
   265  		p.maybeIndentLine(ws)
   266  		p.writeByte('\f', 2)
   267  		p.lineout += 2
   268  		p.spaceBefore = true
   269  	case ws&formfeed != 0:
   270  		p.maybeIndentLine(ws)
   271  		p.writeByte('\f', 1)
   272  		p.lineout++
   273  		p.spaceBefore = true
   274  	case ws&newline != 0:
   275  		p.maybeIndentLine(ws)
   276  		p.writeByte('\n', 1)
   277  		p.lineout++
   278  		p.spaceBefore = true
   279  	case ws&declcomma != 0:
   280  		p.writeByte(',', 1)
   281  		p.writeByte(' ', 1)
   282  		p.spaceBefore = true
   283  	case ws&noblank != 0:
   284  	case ws&vtab != 0:
   285  		p.writeByte('\v', 1)
   286  		p.spaceBefore = true
   287  	case ws&blank != 0:
   288  		p.writeByte(' ', 1)
   289  		p.spaceBefore = true
   290  	}
   291  }
   292  
   293  func (p *printer) markLineIndent(ws whiteSpace) {
   294  	p.indentStack = append(p.indentStack, ws)
   295  }
   296  
   297  func (p *printer) markUnindentLine() (wasUnindented bool) {
   298  	last := len(p.indentStack) - 1
   299  	if ws := p.indentStack[last]; ws&indented != 0 {
   300  		p.indent--
   301  		wasUnindented = true
   302  	}
   303  	p.indentStack = p.indentStack[:last]
   304  	return wasUnindented
   305  }
   306  
   307  func (p *printer) maybeIndentLine(ws whiteSpace) {
   308  	if ws&unindent == 0 && len(p.indentStack) > 0 {
   309  		last := len(p.indentStack) - 1
   310  		if ws := p.indentStack[last]; ws&indented != 0 || ws&indent == 0 {
   311  			return
   312  		}
   313  		p.indentStack[last] |= indented
   314  		p.indent++
   315  	}
   316  }
   317  
   318  func (f *formatter) matchUnindent() whiteSpace {
   319  	f.allowed |= unindent
   320  	// TODO: make this work. Whitespace from closing bracket should match that
   321  	// of opening if there is no position information.
   322  	// f.allowed &^= nooverride | newline | newsection | formfeed | blank | noblank
   323  	// ws := f.indentStack[len(f.indentStack)-1]
   324  	// mask := blank | noblank | vtab
   325  	// f.allowed |= unindent | blank | noblank
   326  	// if ws&newline != 0 || ws*indented != 0 {
   327  	// 	f.allowed |= newline
   328  	// }
   329  	return 0
   330  }
   331  
   332  // writeString writes the string s to p.output and updates p.pos, p.out,
   333  // and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters
   334  // to protect s from being interpreted by the tabwriter.
   335  //
   336  // Note: writeString is only used to write Go tokens, literals, and
   337  // comments, all of which must be written literally. Thus, it is correct
   338  // to always set isLit = true. However, setting it explicitly only when
   339  // needed (i.e., when we don't know that s contains no tabs or line breaks)
   340  // avoids processing extra escape characters and reduces run time of the
   341  // printer benchmark by up to 10%.
   342  //
   343  func (p *printer) writeString(s string, isLit bool) {
   344  	if s != "" {
   345  		p.spaceBefore = false
   346  	}
   347  
   348  	if isLit {
   349  		// Protect s such that is passes through the tabwriter
   350  		// unchanged. Note that valid Go programs cannot contain
   351  		// tabwriter.Escape bytes since they do not appear in legal
   352  		// UTF-8 sequences.
   353  		p.output = append(p.output, tabwriter.Escape)
   354  	}
   355  
   356  	p.output = append(p.output, s...)
   357  
   358  	if isLit {
   359  		p.output = append(p.output, tabwriter.Escape)
   360  	}
   361  	// update positions
   362  	nLines := 0
   363  	var li int // index of last newline; valid if nLines > 0
   364  	for i := 0; i < len(s); i++ {
   365  		// CUE tokens cannot contain '\f' - no need to look for it
   366  		if s[i] == '\n' {
   367  			nLines++
   368  			li = i
   369  		}
   370  	}
   371  	p.pos.Offset += len(s)
   372  	if nLines > 0 {
   373  		p.pos.Line += nLines
   374  		c := len(s) - li
   375  		p.pos.Column = c
   376  	} else {
   377  		p.pos.Column += len(s)
   378  	}
   379  }
   380  
   381  func (p *printer) writeByte(ch byte, n int) {
   382  	for i := 0; i < n; i++ {
   383  		p.output = append(p.output, ch)
   384  	}
   385  
   386  	// update positions
   387  	p.pos.Offset += n
   388  	if ch == '\n' || ch == '\f' {
   389  		p.pos.Line += n
   390  		p.pos.Column = 1
   391  
   392  		n := p.cfg.Indent + p.indent // include base indentation
   393  		for i := 0; i < n; i++ {
   394  			p.output = append(p.output, '\t')
   395  		}
   396  
   397  		// update positions
   398  		p.pos.Offset += n
   399  		p.pos.Column += n
   400  
   401  		return
   402  	}
   403  	p.pos.Column += n
   404  }
   405  
   406  func mayCombine(prev, next token.Token) (before, after bool) {
   407  	s := next.String()
   408  	if 'a' <= s[0] && s[0] < 'z' {
   409  		return true, true
   410  	}
   411  	switch prev {
   412  	case token.IQUO, token.IREM, token.IDIV, token.IMOD:
   413  		return false, false
   414  	case token.INT:
   415  		before = next == token.PERIOD // 1.
   416  	case token.ADD:
   417  		before = s[0] == '+' // ++
   418  	case token.SUB:
   419  		before = s[0] == '-' // --
   420  	case token.QUO:
   421  		before = s[0] == '*' // /*
   422  	}
   423  	return before, false
   424  }