cuelang.org/go@v0.10.1/cue/format/printer.go (about) 1 // Copyright 2018 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package format 16 17 import ( 18 "fmt" 19 "os" 20 "strings" 21 "text/tabwriter" 22 23 "cuelang.org/go/cue/ast" 24 "cuelang.org/go/cue/errors" 25 "cuelang.org/go/cue/literal" 26 "cuelang.org/go/cue/token" 27 ) 28 29 // A printer takes the stream of formatting tokens and spacing directives 30 // produced by the formatter and adjusts the spacing based on the original 31 // source code. 32 type printer struct { 33 cfg *config 34 35 allowed whiteSpace 36 requested whiteSpace 37 indentStack []whiteSpace 38 39 pos token.Position // current pos in AST 40 lineout line 41 42 lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace) 43 44 output []byte 45 indent int 46 spaceBefore bool 47 prevLbraceOnLine bool // true if a '{' has been written on the current line 48 49 // TODO(mvdan): This is similar to nooverride but used only for comments, 50 // to ensure that we always print a newline after them. 51 // We should fix our logic with whiteSpace instead, but for now this ensures 52 // we don't break the syntax by omitting the newline after a comment. 53 printingComment bool 54 55 errs errors.Error 56 } 57 58 type line int 59 60 func (p *printer) init(cfg *config) { 61 p.cfg = cfg 62 p.pos = token.Position{Line: 1, Column: 1} 63 } 64 65 func (p *printer) errf(n ast.Node, format string, args ...interface{}) { 66 p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...)) 67 } 68 69 const debug = false 70 71 func (p *printer) internalError(msg ...interface{}) { 72 if debug { 73 fmt.Print(p.pos.String() + ": ") 74 fmt.Println(msg...) 75 panic("go/printer") 76 } 77 } 78 79 func (p *printer) lineFor(pos token.Pos) int { 80 return pos.Line() 81 } 82 83 func (p *printer) Print(v interface{}) { 84 var ( 85 impliedComma = false 86 isLit bool 87 data string 88 nextWS whiteSpace 89 ) 90 switch x := v.(type) { 91 case *line: 92 *x = p.lineout 93 94 case token.Token: 95 s := x.String() 96 before, after := mayCombine(p.lastTok, x) 97 if before && !p.spaceBefore { 98 // the previous and the current token must be 99 // separated by a blank otherwise they combine 100 // into a different incorrect token sequence 101 // (except for syntax.INT followed by a '.' this 102 // should never happen because it is taken care 103 // of via binary expression formatting) 104 if p.allowed&blank != 0 { 105 p.internalError("whitespace buffer not empty") 106 } 107 p.allowed |= blank 108 } 109 if after { 110 nextWS = blank 111 } 112 data = s 113 switch x { 114 case token.EOF: 115 data = "" 116 p.allowed = newline 117 p.allowed &^= newsection 118 case token.LPAREN, token.LBRACK, token.LBRACE: 119 case token.RPAREN, token.RBRACK, token.RBRACE: 120 impliedComma = true 121 } 122 p.lastTok = x 123 124 case *ast.BasicLit: 125 data = x.Value 126 switch x.Kind { 127 case token.STRING: 128 // TODO: only do this when simplifying. Right now this does not 129 // give the right result, but it should be better if: 130 // 1) simplification is done as a separate step 131 // 2) simplified structs are explicitly referenced separately 132 // in the AST. 133 if p.indent < 6 { 134 data = literal.IndentTabs(data, p.cfg.Indent+p.indent+1) 135 } 136 137 case token.INT: 138 if len(data) > 1 && 139 data[0] == '0' && 140 data[1] >= '0' && data[1] <= '9' { 141 data = "0o" + data[1:] 142 } 143 // Pad trailing dot before multiplier. 144 if p := strings.IndexByte(data, '.'); p >= 0 && data[p+1] > '9' { 145 data = data[:p+1] + "0" + data[p+1:] 146 } 147 // Lowercase E, but only if it is not the last character: in the 148 // future we may use E for Exa. 149 if p := strings.IndexByte(data, 'E'); p != -1 && p < len(data)-1 { 150 data = strings.ToLower(data) 151 } 152 153 case token.FLOAT: 154 // Pad leading or trailing dots. 155 switch p := strings.IndexByte(data, '.'); { 156 case p < 0: 157 case p == 0: 158 data = "0" + data 159 case p == len(data)-1: 160 data += "0" 161 case data[p+1] > '9': 162 data = data[:p+1] + "0" + data[p+1:] 163 } 164 if strings.IndexByte(data, 'E') != -1 { 165 data = strings.ToLower(data) 166 } 167 } 168 169 isLit = true 170 impliedComma = true 171 p.lastTok = x.Kind 172 173 case *ast.Ident: 174 data = x.Name 175 if !ast.IsValidIdent(data) { 176 p.errf(x, "invalid identifier %q", x.Name) 177 data = "*bad identifier*" 178 } 179 impliedComma = true 180 p.lastTok = token.IDENT 181 182 case string: 183 // We can print a Go string as part of a CUE identifier or literal; 184 // for example, see the formatter.label method. 185 isLit = true 186 data = x 187 impliedComma = true 188 p.lastTok = token.STRING 189 190 case *ast.CommentGroup: 191 rel := x.Pos().RelPos() 192 if x.Line { // TODO: we probably don't need this. 193 rel = token.Blank 194 } 195 switch rel { 196 case token.NoRelPos: 197 case token.Newline, token.NewSection: 198 case token.Blank, token.Elided: 199 p.allowed |= blank 200 fallthrough 201 case token.NoSpace: 202 p.allowed &^= newline | newsection | formfeed | declcomma 203 } 204 return 205 206 case *ast.Attribute: 207 isLit = true 208 data = x.Text 209 impliedComma = true 210 p.lastTok = token.ATTRIBUTE 211 212 case *ast.Comment: 213 // TODO: if implied comma, postpone comment 214 isLit = true 215 data = x.Text 216 p.lastTok = token.COMMENT 217 218 case whiteSpace: 219 p.allowed |= x 220 return 221 222 case token.Pos: 223 // TODO: should we use a known file position to synchronize? Go does, 224 // but we don't really have to. 225 // pos := x 226 if x.HasRelPos() { 227 if p.allowed&nooverride == 0 { 228 requested := p.allowed 229 switch x.RelPos() { 230 case token.NoSpace: 231 requested &^= newline | newsection | formfeed 232 case token.Blank: 233 requested |= blank 234 requested &^= newline | newsection | formfeed 235 case token.Newline: 236 requested |= newline 237 case token.NewSection: 238 requested |= newsection 239 } 240 if p.printingComment { 241 requested |= newline 242 } 243 p.writeWhitespace(requested) 244 p.allowed = 0 245 p.requested = 0 246 } 247 // p.pos = pos 248 } 249 return 250 251 default: 252 fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x) 253 panic("go/printer type") 254 } 255 256 p.writeWhitespace(p.allowed) 257 p.allowed = 0 258 p.requested = 0 259 p.printingComment = false 260 p.writeString(data, isLit) 261 p.allowed = nextWS 262 _ = impliedComma // TODO: delay comment printings 263 } 264 265 func (p *printer) writeWhitespace(ws whiteSpace) { 266 if ws&comma != 0 { 267 switch { 268 case ws&(newsection|newline|formfeed) != 0, 269 ws&trailcomma == 0: 270 p.writeByte(',', 1) 271 } 272 } 273 if ws&indent != 0 { 274 p.markLineIndent(ws) 275 } 276 if ws&unindent != 0 { 277 p.markUnindentLine() 278 } 279 switch { 280 case ws&newsection != 0: 281 p.maybeIndentLine(ws) 282 p.writeByte('\f', 2) 283 p.incrementLine(2) 284 p.spaceBefore = true 285 case ws&formfeed != 0: 286 p.maybeIndentLine(ws) 287 p.writeByte('\f', 1) 288 p.incrementLine(1) 289 p.spaceBefore = true 290 case ws&newline != 0: 291 p.maybeIndentLine(ws) 292 p.writeByte('\n', 1) 293 p.incrementLine(1) 294 p.spaceBefore = true 295 case ws&declcomma != 0: 296 p.writeByte(',', 1) 297 p.writeByte(' ', 1) 298 p.spaceBefore = true 299 case ws&noblank != 0: 300 case ws&vtab != 0: 301 p.writeByte('\v', 1) 302 p.spaceBefore = true 303 case ws&blank != 0: 304 p.writeByte(' ', 1) 305 p.spaceBefore = true 306 } 307 } 308 309 func (p *printer) incrementLine(n int) { 310 if n != 0 { 311 p.prevLbraceOnLine = false 312 } 313 p.lineout += line(n) 314 } 315 316 func (p *printer) markLineIndent(ws whiteSpace) { 317 p.indentStack = append(p.indentStack, ws) 318 } 319 320 func (p *printer) markUnindentLine() (wasUnindented bool) { 321 last := len(p.indentStack) - 1 322 if ws := p.indentStack[last]; ws&indented != 0 { 323 p.indent-- 324 wasUnindented = true 325 } 326 p.indentStack = p.indentStack[:last] 327 return wasUnindented 328 } 329 330 func (p *printer) maybeIndentLine(ws whiteSpace) { 331 if ws&unindent == 0 && len(p.indentStack) > 0 { 332 last := len(p.indentStack) - 1 333 if ws := p.indentStack[last]; ws&indented != 0 || ws&indent == 0 { 334 return 335 } 336 p.indentStack[last] |= indented 337 p.indent++ 338 } 339 } 340 341 func (f *formatter) matchUnindent() whiteSpace { 342 f.allowed |= unindent 343 // TODO: make this work. Whitespace from closing bracket should match that 344 // of opening if there is no position information. 345 // f.allowed &^= nooverride | newline | newsection | formfeed | blank | noblank 346 // ws := f.indentStack[len(f.indentStack)-1] 347 // mask := blank | noblank | vtab 348 // f.allowed |= unindent | blank | noblank 349 // if ws&newline != 0 || ws*indented != 0 { 350 // f.allowed |= newline 351 // } 352 return 0 353 } 354 355 // writeString writes the string s to p.output and updates p.pos, p.out, 356 // and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters 357 // to protect s from being interpreted by the tabwriter. 358 // 359 // Note: writeString is only used to write Go tokens, literals, and 360 // comments, all of which must be written literally. Thus, it is correct 361 // to always set isLit = true. However, setting it explicitly only when 362 // needed (i.e., when we don't know that s contains no tabs or line breaks) 363 // avoids processing extra escape characters and reduces run time of the 364 // printer benchmark by up to 10%. 365 func (p *printer) writeString(s string, isLit bool) { 366 if s != "" { 367 p.spaceBefore = false 368 } 369 370 if isLit { 371 // Protect s such that is passes through the tabwriter 372 // unchanged. Note that valid Go programs cannot contain 373 // tabwriter.Escape bytes since they do not appear in legal 374 // UTF-8 sequences. 375 p.output = append(p.output, tabwriter.Escape) 376 } 377 378 p.output = append(p.output, s...) 379 380 if isLit { 381 p.output = append(p.output, tabwriter.Escape) 382 } 383 // update positions 384 nLines := 0 385 var li int // index of last newline; valid if nLines > 0 386 for i := 0; i < len(s); i++ { 387 // CUE tokens cannot contain '\f' - no need to look for it 388 if s[i] == '\n' { 389 nLines++ 390 li = i 391 } 392 } 393 p.pos.Offset += len(s) 394 if nLines > 0 { 395 p.pos.Line += nLines 396 c := len(s) - li 397 p.pos.Column = c 398 } else { 399 p.pos.Column += len(s) 400 } 401 } 402 403 func (p *printer) writeByte(ch byte, n int) { 404 for i := 0; i < n; i++ { 405 p.output = append(p.output, ch) 406 } 407 408 // update positions 409 p.pos.Offset += n 410 if ch == '\n' || ch == '\f' { 411 p.pos.Line += n 412 p.pos.Column = 1 413 414 n := p.cfg.Indent + p.indent // include base indentation 415 for i := 0; i < n; i++ { 416 p.output = append(p.output, '\t') 417 } 418 419 // update positions 420 p.pos.Offset += n 421 p.pos.Column += n 422 423 return 424 } 425 p.pos.Column += n 426 } 427 428 // TODO(mvdan): mayCombine as a name was carried over from Go, 429 // but it doesn't really make sense as a name for our logic here, 430 // since we return true when either side must use a blank space. 431 432 func mayCombine(prev, next token.Token) (before, after bool) { 433 s := next.String() 434 if 'a' <= s[0] && s[0] < 'z' { 435 if prev == token.ILLEGAL { 436 // If we're printing the first token, 437 // we don't need a blank space before it. 438 return false, true 439 } 440 return true, true 441 } 442 switch prev { 443 case token.IQUO, token.IREM, token.IDIV, token.IMOD: 444 return false, false 445 case token.INT: 446 before = next == token.PERIOD // 1. 447 case token.ADD: 448 before = s[0] == '+' // ++ 449 case token.SUB: 450 before = s[0] == '-' // -- 451 case token.QUO: 452 before = s[0] == '*' // /* 453 } 454 return before, false 455 }