github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/cue/format/printer.go (about) 1 // Copyright 2018 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package format 16 17 import ( 18 "fmt" 19 "os" 20 "strings" 21 "text/tabwriter" 22 23 "github.com/joomcode/cue/cue/ast" 24 "github.com/joomcode/cue/cue/errors" 25 "github.com/joomcode/cue/cue/literal" 26 "github.com/joomcode/cue/cue/token" 27 ) 28 29 // A printer takes the stream of formatting tokens and spacing directives 30 // produced by the formatter and adjusts the spacing based on the original 31 // source code. 32 type printer struct { 33 cfg *config 34 35 allowed whiteSpace 36 requested whiteSpace 37 indentStack []whiteSpace 38 39 pos token.Position // current pos in AST 40 lineout line 41 42 lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace) 43 44 output []byte 45 indent int 46 spaceBefore bool 47 48 errs errors.Error 49 } 50 51 type line int 52 53 func (p *printer) init(cfg *config) { 54 p.cfg = cfg 55 p.pos = token.Position{Line: 1, Column: 1} 56 } 57 58 func (p *printer) errf(n ast.Node, format string, args ...interface{}) { 59 p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...)) 60 } 61 62 const debug = false 63 64 func (p *printer) internalError(msg ...interface{}) { 65 if debug { 66 fmt.Print(p.pos.String() + ": ") 67 fmt.Println(msg...) 68 panic("go/printer") 69 } 70 } 71 72 func (p *printer) lineFor(pos token.Pos) int { 73 return pos.Line() 74 } 75 76 func (p *printer) Print(v interface{}) { 77 var ( 78 impliedComma = false 79 isLit bool 80 data string 81 nextWS whiteSpace 82 ) 83 switch x := v.(type) { 84 case *line: 85 *x = p.lineout 86 87 case token.Token: 88 s := x.String() 89 before, after := mayCombine(p.lastTok, x) 90 if before && !p.spaceBefore { 91 // the previous and the current token must be 92 // separated by a blank otherwise they combine 93 // into a different incorrect token sequence 94 // (except for syntax.INT followed by a '.' this 95 // should never happen because it is taken care 96 // of via binary expression formatting) 97 if p.allowed&blank != 0 { 98 p.internalError("whitespace buffer not empty") 99 } 100 p.allowed |= blank 101 } 102 if after { 103 nextWS = blank 104 } 105 data = s 106 switch x { 107 case token.EOF: 108 data = "" 109 p.allowed = newline 110 p.allowed &^= newsection 111 case token.LPAREN, token.LBRACK, token.LBRACE: 112 case token.RPAREN, token.RBRACK, token.RBRACE: 113 impliedComma = true 114 } 115 p.lastTok = x 116 117 case *ast.BasicLit: 118 data = x.Value 119 switch x.Kind { 120 case token.STRING: 121 // TODO: only do this when simplifying. Right now this does not 122 // give the right result, but it should be better if: 123 // 1) simplification is done as a separate step 124 // 2) simplified structs are explicitly referenced separately 125 // in the AST. 126 if p.indent < 6 { 127 data = literal.IndentTabs(data, p.cfg.Indent+p.indent+1) 128 } 129 130 case token.INT: 131 if len(data) > 1 && 132 data[0] == '0' && 133 data[1] >= '0' && data[1] <= '9' { 134 data = "0o" + data[1:] 135 } 136 // Pad trailing dot before multiplier. 137 if p := strings.IndexByte(data, '.'); p >= 0 && data[p+1] > '9' { 138 data = data[:p+1] + "0" + data[p+1:] 139 } 140 // Lowercase E, but only if it is not the last character: in the 141 // future we may use E for Exa. 142 if p := strings.IndexByte(data, 'E'); p != -1 && p < len(data)-1 { 143 data = strings.ToLower(data) 144 } 145 146 case token.FLOAT: 147 // Pad leading or trailing dots. 148 switch p := strings.IndexByte(data, '.'); { 149 case p < 0: 150 case p == 0: 151 data = "0" + data 152 case p == len(data)-1: 153 data += "0" 154 case data[p+1] > '9': 155 data = data[:p+1] + "0" + data[p+1:] 156 } 157 if strings.IndexByte(data, 'E') != -1 { 158 data = strings.ToLower(data) 159 } 160 } 161 162 isLit = true 163 impliedComma = true 164 p.lastTok = x.Kind 165 166 case *ast.Ident: 167 data = x.Name 168 if !ast.IsValidIdent(data) { 169 p.errf(x, "invalid identifier %q", x.Name) 170 data = "*bad identifier*" 171 } 172 impliedComma = true 173 p.lastTok = token.IDENT 174 175 case string: 176 data = x 177 impliedComma = true 178 p.lastTok = token.STRING 179 180 case *ast.CommentGroup: 181 rel := x.Pos().RelPos() 182 if x.Line { // TODO: we probably don't need this. 183 rel = token.Blank 184 } 185 switch rel { 186 case token.NoRelPos: 187 case token.Newline, token.NewSection: 188 case token.Blank, token.Elided: 189 p.allowed |= blank 190 fallthrough 191 case token.NoSpace: 192 p.allowed &^= newline | newsection | formfeed | declcomma 193 } 194 return 195 196 case *ast.Attribute: 197 data = x.Text 198 impliedComma = true 199 p.lastTok = token.ATTRIBUTE 200 201 case *ast.Comment: 202 // TODO: if implied comma, postpone comment 203 data = x.Text 204 p.lastTok = token.COMMENT 205 206 case whiteSpace: 207 p.allowed |= x 208 return 209 210 case token.Pos: 211 // TODO: should we use a known file position to synchronize? Go does, 212 // but we don't really have to. 213 // pos := x 214 if x.HasRelPos() { 215 if p.allowed&nooverride == 0 { 216 requested := p.allowed 217 switch x.RelPos() { 218 case token.NoSpace: 219 requested &^= newline | newsection | formfeed 220 case token.Blank: 221 requested |= blank 222 requested &^= newline | newsection | formfeed 223 case token.Newline: 224 requested |= newline 225 case token.NewSection: 226 requested |= newsection 227 } 228 p.writeWhitespace(requested) 229 p.allowed = 0 230 p.requested = 0 231 } 232 // p.pos = pos 233 } 234 return 235 236 default: 237 fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x) 238 panic("go/printer type") 239 } 240 241 p.writeWhitespace(p.allowed) 242 p.allowed = 0 243 p.requested = 0 244 p.writeString(data, isLit) 245 p.allowed = nextWS 246 _ = impliedComma // TODO: delay comment printings 247 } 248 249 func (p *printer) writeWhitespace(ws whiteSpace) { 250 if ws&comma != 0 { 251 switch { 252 case ws&(newsection|newline|formfeed) != 0, 253 ws&trailcomma == 0: 254 p.writeByte(',', 1) 255 } 256 } 257 if ws&indent != 0 { 258 p.markLineIndent(ws) 259 } 260 if ws&unindent != 0 { 261 p.markUnindentLine() 262 } 263 switch { 264 case ws&newsection != 0: 265 p.maybeIndentLine(ws) 266 p.writeByte('\f', 2) 267 p.lineout += 2 268 p.spaceBefore = true 269 case ws&formfeed != 0: 270 p.maybeIndentLine(ws) 271 p.writeByte('\f', 1) 272 p.lineout++ 273 p.spaceBefore = true 274 case ws&newline != 0: 275 p.maybeIndentLine(ws) 276 p.writeByte('\n', 1) 277 p.lineout++ 278 p.spaceBefore = true 279 case ws&declcomma != 0: 280 p.writeByte(',', 1) 281 p.writeByte(' ', 1) 282 p.spaceBefore = true 283 case ws&noblank != 0: 284 case ws&vtab != 0: 285 p.writeByte('\v', 1) 286 p.spaceBefore = true 287 case ws&blank != 0: 288 p.writeByte(' ', 1) 289 p.spaceBefore = true 290 } 291 } 292 293 func (p *printer) markLineIndent(ws whiteSpace) { 294 p.indentStack = append(p.indentStack, ws) 295 } 296 297 func (p *printer) markUnindentLine() (wasUnindented bool) { 298 last := len(p.indentStack) - 1 299 if ws := p.indentStack[last]; ws&indented != 0 { 300 p.indent-- 301 wasUnindented = true 302 } 303 p.indentStack = p.indentStack[:last] 304 return wasUnindented 305 } 306 307 func (p *printer) maybeIndentLine(ws whiteSpace) { 308 if ws&unindent == 0 && len(p.indentStack) > 0 { 309 last := len(p.indentStack) - 1 310 if ws := p.indentStack[last]; ws&indented != 0 || ws&indent == 0 { 311 return 312 } 313 p.indentStack[last] |= indented 314 p.indent++ 315 } 316 } 317 318 func (f *formatter) matchUnindent() whiteSpace { 319 f.allowed |= unindent 320 // TODO: make this work. Whitespace from closing bracket should match that 321 // of opening if there is no position information. 322 // f.allowed &^= nooverride | newline | newsection | formfeed | blank | noblank 323 // ws := f.indentStack[len(f.indentStack)-1] 324 // mask := blank | noblank | vtab 325 // f.allowed |= unindent | blank | noblank 326 // if ws&newline != 0 || ws*indented != 0 { 327 // f.allowed |= newline 328 // } 329 return 0 330 } 331 332 // writeString writes the string s to p.output and updates p.pos, p.out, 333 // and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters 334 // to protect s from being interpreted by the tabwriter. 335 // 336 // Note: writeString is only used to write Go tokens, literals, and 337 // comments, all of which must be written literally. Thus, it is correct 338 // to always set isLit = true. However, setting it explicitly only when 339 // needed (i.e., when we don't know that s contains no tabs or line breaks) 340 // avoids processing extra escape characters and reduces run time of the 341 // printer benchmark by up to 10%. 342 // 343 func (p *printer) writeString(s string, isLit bool) { 344 if s != "" { 345 p.spaceBefore = false 346 } 347 348 if isLit { 349 // Protect s such that is passes through the tabwriter 350 // unchanged. Note that valid Go programs cannot contain 351 // tabwriter.Escape bytes since they do not appear in legal 352 // UTF-8 sequences. 353 p.output = append(p.output, tabwriter.Escape) 354 } 355 356 p.output = append(p.output, s...) 357 358 if isLit { 359 p.output = append(p.output, tabwriter.Escape) 360 } 361 // update positions 362 nLines := 0 363 var li int // index of last newline; valid if nLines > 0 364 for i := 0; i < len(s); i++ { 365 // CUE tokens cannot contain '\f' - no need to look for it 366 if s[i] == '\n' { 367 nLines++ 368 li = i 369 } 370 } 371 p.pos.Offset += len(s) 372 if nLines > 0 { 373 p.pos.Line += nLines 374 c := len(s) - li 375 p.pos.Column = c 376 } else { 377 p.pos.Column += len(s) 378 } 379 } 380 381 func (p *printer) writeByte(ch byte, n int) { 382 for i := 0; i < n; i++ { 383 p.output = append(p.output, ch) 384 } 385 386 // update positions 387 p.pos.Offset += n 388 if ch == '\n' || ch == '\f' { 389 p.pos.Line += n 390 p.pos.Column = 1 391 392 n := p.cfg.Indent + p.indent // include base indentation 393 for i := 0; i < n; i++ { 394 p.output = append(p.output, '\t') 395 } 396 397 // update positions 398 p.pos.Offset += n 399 p.pos.Column += n 400 401 return 402 } 403 p.pos.Column += n 404 } 405 406 func mayCombine(prev, next token.Token) (before, after bool) { 407 s := next.String() 408 if 'a' <= s[0] && s[0] < 'z' { 409 return true, true 410 } 411 switch prev { 412 case token.IQUO, token.IREM, token.IDIV, token.IMOD: 413 return false, false 414 case token.INT: 415 before = next == token.PERIOD // 1. 416 case token.ADD: 417 before = s[0] == '+' // ++ 418 case token.SUB: 419 before = s[0] == '-' // -- 420 case token.QUO: 421 before = s[0] == '*' // /* 422 } 423 return before, false 424 }