github.com/hashicorp/hcl/v2@v2.20.0/hclwrite/format.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package hclwrite 5 6 import ( 7 "github.com/hashicorp/hcl/v2/hclsyntax" 8 ) 9 10 // format rewrites tokens within the given sequence, in-place, to adjust the 11 // whitespace around their content to achieve canonical formatting. 12 func format(tokens Tokens) { 13 // Formatting is a multi-pass process. More details on the passes below, 14 // but this is the overview: 15 // - adjust the leading space on each line to create appropriate 16 // indentation 17 // - adjust spaces between tokens in a single cell using a set of rules 18 // - adjust the leading space in the "assign" and "comment" cells on each 19 // line to vertically align with neighboring lines. 20 // All of these steps operate in-place on the given tokens, so a caller 21 // may collect a flat sequence of all of the tokens underlying an AST 22 // and pass it here and we will then indirectly modify the AST itself. 23 // Formatting must change only whitespace. Specifically, that means 24 // changing the SpacesBefore attribute on a token while leaving the 25 // other token attributes unchanged. 26 27 lines := linesForFormat(tokens) 28 formatIndent(lines) 29 formatSpaces(lines) 30 formatCells(lines) 31 } 32 33 func formatIndent(lines []formatLine) { 34 // Our methodology for indents is to take the input one line at a time 35 // and count the bracketing delimiters on each line. If a line has a net 36 // increase in open brackets, we increase the indent level by one and 37 // remember how many new openers we had. If the line has a net _decrease_, 38 // we'll compare it to the most recent number of openers and decrease the 39 // dedent level by one each time we pass an indent level remembered 40 // earlier. 41 // The "indent stack" used here allows for us to recognize degenerate 42 // input where brackets are not symmetrical within lines and avoid 43 // pushing things too far left or right, creating confusion. 44 45 // We'll start our indent stack at a reasonable capacity to minimize the 46 // chance of us needing to grow it; 10 here means 10 levels of indent, 47 // which should be more than enough for reasonable HCL uses. 48 indents := make([]int, 0, 10) 49 50 for i := range lines { 51 line := &lines[i] 52 if len(line.lead) == 0 { 53 continue 54 } 55 56 if line.lead[0].Type == hclsyntax.TokenNewline { 57 // Never place spaces before a newline 58 line.lead[0].SpacesBefore = 0 59 continue 60 } 61 62 netBrackets := 0 63 for _, token := range line.lead { 64 netBrackets += tokenBracketChange(token) 65 if token.Type == hclsyntax.TokenOHeredoc { 66 break 67 } 68 } 69 70 for _, token := range line.assign { 71 netBrackets += tokenBracketChange(token) 72 } 73 74 switch { 75 case netBrackets > 0: 76 line.lead[0].SpacesBefore = 2 * len(indents) 77 indents = append(indents, netBrackets) 78 case netBrackets < 0: 79 closed := -netBrackets 80 for closed > 0 && len(indents) > 0 { 81 switch { 82 83 case closed > indents[len(indents)-1]: 84 closed -= indents[len(indents)-1] 85 indents = indents[:len(indents)-1] 86 87 case closed < indents[len(indents)-1]: 88 indents[len(indents)-1] -= closed 89 closed = 0 90 91 default: 92 indents = indents[:len(indents)-1] 93 closed = 0 94 } 95 } 96 line.lead[0].SpacesBefore = 2 * len(indents) 97 default: 98 line.lead[0].SpacesBefore = 2 * len(indents) 99 } 100 } 101 } 102 103 func formatSpaces(lines []formatLine) { 104 // placeholder token used when we don't have a token but we don't want 105 // to pass a real "nil" and complicate things with nil pointer checks 106 nilToken := &Token{ 107 Type: hclsyntax.TokenNil, 108 Bytes: []byte{}, 109 SpacesBefore: 0, 110 } 111 112 for _, line := range lines { 113 for i, token := range line.lead { 114 var before, after *Token 115 if i > 0 { 116 before = line.lead[i-1] 117 } else { 118 before = nilToken 119 } 120 if i < (len(line.lead) - 1) { 121 after = line.lead[i+1] 122 } else { 123 continue 124 } 125 if spaceAfterToken(token, before, after) { 126 after.SpacesBefore = 1 127 } else { 128 after.SpacesBefore = 0 129 } 130 } 131 for i, token := range line.assign { 132 if i == 0 { 133 // first token in "assign" always has one space before to 134 // separate the equals sign from what it's assigning. 135 token.SpacesBefore = 1 136 } 137 138 var before, after *Token 139 if i > 0 { 140 before = line.assign[i-1] 141 } else { 142 before = nilToken 143 } 144 if i < (len(line.assign) - 1) { 145 after = line.assign[i+1] 146 } else { 147 continue 148 } 149 if spaceAfterToken(token, before, after) { 150 after.SpacesBefore = 1 151 } else { 152 after.SpacesBefore = 0 153 } 154 } 155 156 } 157 } 158 159 func formatCells(lines []formatLine) { 160 chainStart := -1 161 maxColumns := 0 162 163 // We'll deal with the "assign" cell first, since moving that will 164 // also impact the "comment" cell. 165 closeAssignChain := func(i int) { 166 for _, chainLine := range lines[chainStart:i] { 167 columns := chainLine.lead.Columns() 168 spaces := (maxColumns - columns) + 1 169 chainLine.assign[0].SpacesBefore = spaces 170 } 171 chainStart = -1 172 maxColumns = 0 173 } 174 for i, line := range lines { 175 if line.assign == nil { 176 if chainStart != -1 { 177 closeAssignChain(i) 178 } 179 } else { 180 if chainStart == -1 { 181 chainStart = i 182 } 183 columns := line.lead.Columns() 184 if columns > maxColumns { 185 maxColumns = columns 186 } 187 } 188 } 189 if chainStart != -1 { 190 closeAssignChain(len(lines)) 191 } 192 193 // Now we'll deal with the comments 194 closeCommentChain := func(i int) { 195 for _, chainLine := range lines[chainStart:i] { 196 columns := chainLine.lead.Columns() + chainLine.assign.Columns() 197 spaces := (maxColumns - columns) + 1 198 chainLine.comment[0].SpacesBefore = spaces 199 } 200 chainStart = -1 201 maxColumns = 0 202 } 203 for i, line := range lines { 204 if line.comment == nil { 205 if chainStart != -1 { 206 closeCommentChain(i) 207 } 208 } else { 209 if chainStart == -1 { 210 chainStart = i 211 } 212 columns := line.lead.Columns() + line.assign.Columns() 213 if columns > maxColumns { 214 maxColumns = columns 215 } 216 } 217 } 218 if chainStart != -1 { 219 closeCommentChain(len(lines)) 220 } 221 } 222 223 // spaceAfterToken decides whether a particular subject token should have a 224 // space after it when surrounded by the given before and after tokens. 225 // "before" can be TokenNil, if the subject token is at the start of a sequence. 226 func spaceAfterToken(subject, before, after *Token) bool { 227 switch { 228 229 case after.Type == hclsyntax.TokenNewline || after.Type == hclsyntax.TokenNil: 230 // Never add spaces before a newline 231 return false 232 233 case subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenOParen: 234 // Don't split a function name from open paren in a call 235 return false 236 237 case (subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenDoubleColon) || 238 (subject.Type == hclsyntax.TokenDoubleColon && after.Type == hclsyntax.TokenIdent): 239 // Don't split namespace segments in a function call 240 return false 241 242 case subject.Type == hclsyntax.TokenDot || after.Type == hclsyntax.TokenDot: 243 // Don't use spaces around attribute access dots 244 return false 245 246 case after.Type == hclsyntax.TokenComma || after.Type == hclsyntax.TokenEllipsis: 247 // No space right before a comma or ... in an argument list 248 return false 249 250 case subject.Type == hclsyntax.TokenComma: 251 // Always a space after a comma 252 return true 253 254 case subject.Type == hclsyntax.TokenQuotedLit || subject.Type == hclsyntax.TokenStringLit || subject.Type == hclsyntax.TokenOQuote || subject.Type == hclsyntax.TokenOHeredoc || after.Type == hclsyntax.TokenQuotedLit || after.Type == hclsyntax.TokenStringLit || after.Type == hclsyntax.TokenCQuote || after.Type == hclsyntax.TokenCHeredoc: 255 // No extra spaces within templates 256 return false 257 258 case hclsyntax.Keyword([]byte{'i', 'n'}).TokenMatches(subject.asHCLSyntax()) && before.Type == hclsyntax.TokenIdent: 259 // This is a special case for inside for expressions where a user 260 // might want to use a literal tuple constructor: 261 // [for x in [foo]: x] 262 // ... in that case, we would normally produce in[foo] thinking that 263 // in is a reference, but we'll recognize it as a keyword here instead 264 // to make the result less confusing. 265 return true 266 267 case after.Type == hclsyntax.TokenOBrack && (subject.Type == hclsyntax.TokenIdent || subject.Type == hclsyntax.TokenNumberLit || tokenBracketChange(subject) < 0): 268 return false 269 270 case subject.Type == hclsyntax.TokenBang: 271 // No space after a bang 272 return false 273 274 case subject.Type == hclsyntax.TokenMinus: 275 // Since a minus can either be subtraction or negation, and the latter 276 // should _not_ have a space after it, we need to use some heuristics 277 // to decide which case this is. 278 // We guess that we have a negation if the token before doesn't look 279 // like it could be the end of an expression. 280 281 switch before.Type { 282 283 case hclsyntax.TokenNil: 284 // Minus at the start of input must be a negation 285 return false 286 287 case hclsyntax.TokenOParen, hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenEqual, hclsyntax.TokenColon, hclsyntax.TokenComma, hclsyntax.TokenQuestion: 288 // Minus immediately after an opening bracket or separator must be a negation. 289 return false 290 291 case hclsyntax.TokenPlus, hclsyntax.TokenStar, hclsyntax.TokenSlash, hclsyntax.TokenPercent, hclsyntax.TokenMinus: 292 // Minus immediately after another arithmetic operator must be negation. 293 return false 294 295 case hclsyntax.TokenEqualOp, hclsyntax.TokenNotEqual, hclsyntax.TokenGreaterThan, hclsyntax.TokenGreaterThanEq, hclsyntax.TokenLessThan, hclsyntax.TokenLessThanEq: 296 // Minus immediately after another comparison operator must be negation. 297 return false 298 299 case hclsyntax.TokenAnd, hclsyntax.TokenOr, hclsyntax.TokenBang: 300 // Minus immediately after logical operator doesn't make sense but probably intended as negation. 301 return false 302 303 default: 304 return true 305 } 306 307 case subject.Type == hclsyntax.TokenOBrace || after.Type == hclsyntax.TokenCBrace: 308 // Unlike other bracket types, braces have spaces on both sides of them, 309 // both in single-line nested blocks foo { bar = baz } and in object 310 // constructor expressions foo = { bar = baz }. 311 if subject.Type == hclsyntax.TokenOBrace && after.Type == hclsyntax.TokenCBrace { 312 // An open brace followed by a close brace is an exception, however. 313 // e.g. foo {} rather than foo { } 314 return false 315 } 316 return true 317 318 // In the unlikely event that an interpolation expression is just 319 // a single object constructor, we'll put a space between the ${ and 320 // the following { to make this more obvious, and then the same 321 // thing for the two braces at the end. 322 case (subject.Type == hclsyntax.TokenTemplateInterp || subject.Type == hclsyntax.TokenTemplateControl) && after.Type == hclsyntax.TokenOBrace: 323 return true 324 case subject.Type == hclsyntax.TokenCBrace && after.Type == hclsyntax.TokenTemplateSeqEnd: 325 return true 326 327 // Don't add spaces between interpolated items 328 case subject.Type == hclsyntax.TokenTemplateSeqEnd && (after.Type == hclsyntax.TokenTemplateInterp || after.Type == hclsyntax.TokenTemplateControl): 329 return false 330 331 case tokenBracketChange(subject) > 0: 332 // No spaces after open brackets 333 return false 334 335 case tokenBracketChange(after) < 0: 336 // No spaces before close brackets 337 return false 338 339 default: 340 // Most tokens are space-separated 341 return true 342 343 } 344 } 345 346 func linesForFormat(tokens Tokens) []formatLine { 347 if len(tokens) == 0 { 348 return make([]formatLine, 0) 349 } 350 351 // first we'll count our lines, so we can allocate the array for them in 352 // a single block. (We want to minimize memory pressure in this codepath, 353 // so it can be run somewhat-frequently by editor integrations.) 354 lineCount := 1 // if there are zero newlines then there is one line 355 for _, tok := range tokens { 356 if tokenIsNewline(tok) { 357 lineCount++ 358 } 359 } 360 361 // To start, we'll just put everything in the "lead" cell on each line, 362 // and then do another pass over the lines afterwards to adjust. 363 lines := make([]formatLine, lineCount) 364 li := 0 365 lineStart := 0 366 for i, tok := range tokens { 367 if tok.Type == hclsyntax.TokenEOF { 368 // The EOF token doesn't belong to any line, and terminates the 369 // token sequence. 370 lines[li].lead = tokens[lineStart:i] 371 break 372 } 373 374 if tokenIsNewline(tok) { 375 lines[li].lead = tokens[lineStart : i+1] 376 lineStart = i + 1 377 li++ 378 } 379 } 380 381 // If a set of tokens doesn't end in TokenEOF (e.g. because it's a 382 // fragment of tokens from the middle of a file) then we might fall 383 // out here with a line still pending. 384 if lineStart < len(tokens) { 385 lines[li].lead = tokens[lineStart:] 386 if lines[li].lead[len(lines[li].lead)-1].Type == hclsyntax.TokenEOF { 387 lines[li].lead = lines[li].lead[:len(lines[li].lead)-1] 388 } 389 } 390 391 // Now we'll pick off any trailing comments and attribute assignments 392 // to shuffle off into the "comment" and "assign" cells. 393 for i := range lines { 394 line := &lines[i] 395 396 if len(line.lead) == 0 { 397 // if the line is empty then there's nothing for us to do 398 // (this should happen only for the final line, because all other 399 // lines would have a newline token of some kind) 400 continue 401 } 402 403 if len(line.lead) > 1 && line.lead[len(line.lead)-1].Type == hclsyntax.TokenComment { 404 line.comment = line.lead[len(line.lead)-1:] 405 line.lead = line.lead[:len(line.lead)-1] 406 } 407 408 for i, tok := range line.lead { 409 if i > 0 && tok.Type == hclsyntax.TokenEqual { 410 // We only move the tokens into "assign" if the RHS seems to 411 // be a whole expression, which we determine by counting 412 // brackets. If there's a net positive number of brackets 413 // then that suggests we're introducing a multi-line expression. 414 netBrackets := 0 415 for _, token := range line.lead[i:] { 416 netBrackets += tokenBracketChange(token) 417 } 418 419 if netBrackets == 0 { 420 line.assign = line.lead[i:] 421 line.lead = line.lead[:i] 422 } 423 break 424 } 425 } 426 } 427 428 return lines 429 } 430 431 func tokenIsNewline(tok *Token) bool { 432 if tok.Type == hclsyntax.TokenNewline { 433 return true 434 } else if tok.Type == hclsyntax.TokenComment { 435 // Single line tokens (# and //) consume their terminating newline, 436 // so we need to treat them as newline tokens as well. 437 if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' { 438 return true 439 } 440 } 441 return false 442 } 443 444 func tokenBracketChange(tok *Token) int { 445 switch tok.Type { 446 case hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenOParen, hclsyntax.TokenTemplateControl, hclsyntax.TokenTemplateInterp: 447 return 1 448 case hclsyntax.TokenCBrace, hclsyntax.TokenCBrack, hclsyntax.TokenCParen, hclsyntax.TokenTemplateSeqEnd: 449 return -1 450 default: 451 return 0 452 } 453 } 454 455 // formatLine represents a single line of source code for formatting purposes, 456 // splitting its tokens into up to three "cells": 457 // 458 // - lead: always present, representing everything up to one of the others 459 // - assign: if line contains an attribute assignment, represents the tokens 460 // starting at (and including) the equals symbol 461 // - comment: if line contains any non-comment tokens and ends with a 462 // single-line comment token, represents the comment. 463 // 464 // When formatting, the leading spaces of the first tokens in each of these 465 // cells is adjusted to align vertically their occurences on consecutive 466 // rows. 467 type formatLine struct { 468 lead Tokens 469 assign Tokens 470 comment Tokens 471 }