github.com/hashicorp/hcl/v2@v2.20.0/hclwrite/format.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package hclwrite
     5  
     6  import (
     7  	"github.com/hashicorp/hcl/v2/hclsyntax"
     8  )
     9  
    10  // format rewrites tokens within the given sequence, in-place, to adjust the
    11  // whitespace around their content to achieve canonical formatting.
    12  func format(tokens Tokens) {
    13  	// Formatting is a multi-pass process. More details on the passes below,
    14  	// but this is the overview:
    15  	// - adjust the leading space on each line to create appropriate
    16  	//   indentation
    17  	// - adjust spaces between tokens in a single cell using a set of rules
    18  	// - adjust the leading space in the "assign" and "comment" cells on each
    19  	//   line to vertically align with neighboring lines.
    20  	// All of these steps operate in-place on the given tokens, so a caller
    21  	// may collect a flat sequence of all of the tokens underlying an AST
    22  	// and pass it here and we will then indirectly modify the AST itself.
    23  	// Formatting must change only whitespace. Specifically, that means
    24  	// changing the SpacesBefore attribute on a token while leaving the
    25  	// other token attributes unchanged.
    26  
    27  	lines := linesForFormat(tokens)
    28  	formatIndent(lines)
    29  	formatSpaces(lines)
    30  	formatCells(lines)
    31  }
    32  
    33  func formatIndent(lines []formatLine) {
    34  	// Our methodology for indents is to take the input one line at a time
    35  	// and count the bracketing delimiters on each line. If a line has a net
    36  	// increase in open brackets, we increase the indent level by one and
    37  	// remember how many new openers we had. If the line has a net _decrease_,
    38  	// we'll compare it to the most recent number of openers and decrease the
    39  	// dedent level by one each time we pass an indent level remembered
    40  	// earlier.
    41  	// The "indent stack" used here allows for us to recognize degenerate
    42  	// input where brackets are not symmetrical within lines and avoid
    43  	// pushing things too far left or right, creating confusion.
    44  
    45  	// We'll start our indent stack at a reasonable capacity to minimize the
    46  	// chance of us needing to grow it; 10 here means 10 levels of indent,
    47  	// which should be more than enough for reasonable HCL uses.
    48  	indents := make([]int, 0, 10)
    49  
    50  	for i := range lines {
    51  		line := &lines[i]
    52  		if len(line.lead) == 0 {
    53  			continue
    54  		}
    55  
    56  		if line.lead[0].Type == hclsyntax.TokenNewline {
    57  			// Never place spaces before a newline
    58  			line.lead[0].SpacesBefore = 0
    59  			continue
    60  		}
    61  
    62  		netBrackets := 0
    63  		for _, token := range line.lead {
    64  			netBrackets += tokenBracketChange(token)
    65  			if token.Type == hclsyntax.TokenOHeredoc {
    66  				break
    67  			}
    68  		}
    69  
    70  		for _, token := range line.assign {
    71  			netBrackets += tokenBracketChange(token)
    72  		}
    73  
    74  		switch {
    75  		case netBrackets > 0:
    76  			line.lead[0].SpacesBefore = 2 * len(indents)
    77  			indents = append(indents, netBrackets)
    78  		case netBrackets < 0:
    79  			closed := -netBrackets
    80  			for closed > 0 && len(indents) > 0 {
    81  				switch {
    82  
    83  				case closed > indents[len(indents)-1]:
    84  					closed -= indents[len(indents)-1]
    85  					indents = indents[:len(indents)-1]
    86  
    87  				case closed < indents[len(indents)-1]:
    88  					indents[len(indents)-1] -= closed
    89  					closed = 0
    90  
    91  				default:
    92  					indents = indents[:len(indents)-1]
    93  					closed = 0
    94  				}
    95  			}
    96  			line.lead[0].SpacesBefore = 2 * len(indents)
    97  		default:
    98  			line.lead[0].SpacesBefore = 2 * len(indents)
    99  		}
   100  	}
   101  }
   102  
   103  func formatSpaces(lines []formatLine) {
   104  	// placeholder token used when we don't have a token but we don't want
   105  	// to pass a real "nil" and complicate things with nil pointer checks
   106  	nilToken := &Token{
   107  		Type:         hclsyntax.TokenNil,
   108  		Bytes:        []byte{},
   109  		SpacesBefore: 0,
   110  	}
   111  
   112  	for _, line := range lines {
   113  		for i, token := range line.lead {
   114  			var before, after *Token
   115  			if i > 0 {
   116  				before = line.lead[i-1]
   117  			} else {
   118  				before = nilToken
   119  			}
   120  			if i < (len(line.lead) - 1) {
   121  				after = line.lead[i+1]
   122  			} else {
   123  				continue
   124  			}
   125  			if spaceAfterToken(token, before, after) {
   126  				after.SpacesBefore = 1
   127  			} else {
   128  				after.SpacesBefore = 0
   129  			}
   130  		}
   131  		for i, token := range line.assign {
   132  			if i == 0 {
   133  				// first token in "assign" always has one space before to
   134  				// separate the equals sign from what it's assigning.
   135  				token.SpacesBefore = 1
   136  			}
   137  
   138  			var before, after *Token
   139  			if i > 0 {
   140  				before = line.assign[i-1]
   141  			} else {
   142  				before = nilToken
   143  			}
   144  			if i < (len(line.assign) - 1) {
   145  				after = line.assign[i+1]
   146  			} else {
   147  				continue
   148  			}
   149  			if spaceAfterToken(token, before, after) {
   150  				after.SpacesBefore = 1
   151  			} else {
   152  				after.SpacesBefore = 0
   153  			}
   154  		}
   155  
   156  	}
   157  }
   158  
   159  func formatCells(lines []formatLine) {
   160  	chainStart := -1
   161  	maxColumns := 0
   162  
   163  	// We'll deal with the "assign" cell first, since moving that will
   164  	// also impact the "comment" cell.
   165  	closeAssignChain := func(i int) {
   166  		for _, chainLine := range lines[chainStart:i] {
   167  			columns := chainLine.lead.Columns()
   168  			spaces := (maxColumns - columns) + 1
   169  			chainLine.assign[0].SpacesBefore = spaces
   170  		}
   171  		chainStart = -1
   172  		maxColumns = 0
   173  	}
   174  	for i, line := range lines {
   175  		if line.assign == nil {
   176  			if chainStart != -1 {
   177  				closeAssignChain(i)
   178  			}
   179  		} else {
   180  			if chainStart == -1 {
   181  				chainStart = i
   182  			}
   183  			columns := line.lead.Columns()
   184  			if columns > maxColumns {
   185  				maxColumns = columns
   186  			}
   187  		}
   188  	}
   189  	if chainStart != -1 {
   190  		closeAssignChain(len(lines))
   191  	}
   192  
   193  	// Now we'll deal with the comments
   194  	closeCommentChain := func(i int) {
   195  		for _, chainLine := range lines[chainStart:i] {
   196  			columns := chainLine.lead.Columns() + chainLine.assign.Columns()
   197  			spaces := (maxColumns - columns) + 1
   198  			chainLine.comment[0].SpacesBefore = spaces
   199  		}
   200  		chainStart = -1
   201  		maxColumns = 0
   202  	}
   203  	for i, line := range lines {
   204  		if line.comment == nil {
   205  			if chainStart != -1 {
   206  				closeCommentChain(i)
   207  			}
   208  		} else {
   209  			if chainStart == -1 {
   210  				chainStart = i
   211  			}
   212  			columns := line.lead.Columns() + line.assign.Columns()
   213  			if columns > maxColumns {
   214  				maxColumns = columns
   215  			}
   216  		}
   217  	}
   218  	if chainStart != -1 {
   219  		closeCommentChain(len(lines))
   220  	}
   221  }
   222  
   223  // spaceAfterToken decides whether a particular subject token should have a
   224  // space after it when surrounded by the given before and after tokens.
   225  // "before" can be TokenNil, if the subject token is at the start of a sequence.
   226  func spaceAfterToken(subject, before, after *Token) bool {
   227  	switch {
   228  
   229  	case after.Type == hclsyntax.TokenNewline || after.Type == hclsyntax.TokenNil:
   230  		// Never add spaces before a newline
   231  		return false
   232  
   233  	case subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenOParen:
   234  		// Don't split a function name from open paren in a call
   235  		return false
   236  
   237  	case (subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenDoubleColon) ||
   238  		(subject.Type == hclsyntax.TokenDoubleColon && after.Type == hclsyntax.TokenIdent):
   239  		// Don't split namespace segments in a function call
   240  		return false
   241  
   242  	case subject.Type == hclsyntax.TokenDot || after.Type == hclsyntax.TokenDot:
   243  		// Don't use spaces around attribute access dots
   244  		return false
   245  
   246  	case after.Type == hclsyntax.TokenComma || after.Type == hclsyntax.TokenEllipsis:
   247  		// No space right before a comma or ... in an argument list
   248  		return false
   249  
   250  	case subject.Type == hclsyntax.TokenComma:
   251  		// Always a space after a comma
   252  		return true
   253  
   254  	case subject.Type == hclsyntax.TokenQuotedLit || subject.Type == hclsyntax.TokenStringLit || subject.Type == hclsyntax.TokenOQuote || subject.Type == hclsyntax.TokenOHeredoc || after.Type == hclsyntax.TokenQuotedLit || after.Type == hclsyntax.TokenStringLit || after.Type == hclsyntax.TokenCQuote || after.Type == hclsyntax.TokenCHeredoc:
   255  		// No extra spaces within templates
   256  		return false
   257  
   258  	case hclsyntax.Keyword([]byte{'i', 'n'}).TokenMatches(subject.asHCLSyntax()) && before.Type == hclsyntax.TokenIdent:
   259  		// This is a special case for inside for expressions where a user
   260  		// might want to use a literal tuple constructor:
   261  		// [for x in [foo]: x]
   262  		// ... in that case, we would normally produce in[foo] thinking that
   263  		// in is a reference, but we'll recognize it as a keyword here instead
   264  		// to make the result less confusing.
   265  		return true
   266  
   267  	case after.Type == hclsyntax.TokenOBrack && (subject.Type == hclsyntax.TokenIdent || subject.Type == hclsyntax.TokenNumberLit || tokenBracketChange(subject) < 0):
   268  		return false
   269  
   270  	case subject.Type == hclsyntax.TokenBang:
   271  		// No space after a bang
   272  		return false
   273  
   274  	case subject.Type == hclsyntax.TokenMinus:
   275  		// Since a minus can either be subtraction or negation, and the latter
   276  		// should _not_ have a space after it, we need to use some heuristics
   277  		// to decide which case this is.
   278  		// We guess that we have a negation if the token before doesn't look
   279  		// like it could be the end of an expression.
   280  
   281  		switch before.Type {
   282  
   283  		case hclsyntax.TokenNil:
   284  			// Minus at the start of input must be a negation
   285  			return false
   286  
   287  		case hclsyntax.TokenOParen, hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenEqual, hclsyntax.TokenColon, hclsyntax.TokenComma, hclsyntax.TokenQuestion:
   288  			// Minus immediately after an opening bracket or separator must be a negation.
   289  			return false
   290  
   291  		case hclsyntax.TokenPlus, hclsyntax.TokenStar, hclsyntax.TokenSlash, hclsyntax.TokenPercent, hclsyntax.TokenMinus:
   292  			// Minus immediately after another arithmetic operator must be negation.
   293  			return false
   294  
   295  		case hclsyntax.TokenEqualOp, hclsyntax.TokenNotEqual, hclsyntax.TokenGreaterThan, hclsyntax.TokenGreaterThanEq, hclsyntax.TokenLessThan, hclsyntax.TokenLessThanEq:
   296  			// Minus immediately after another comparison operator must be negation.
   297  			return false
   298  
   299  		case hclsyntax.TokenAnd, hclsyntax.TokenOr, hclsyntax.TokenBang:
   300  			// Minus immediately after logical operator doesn't make sense but probably intended as negation.
   301  			return false
   302  
   303  		default:
   304  			return true
   305  		}
   306  
   307  	case subject.Type == hclsyntax.TokenOBrace || after.Type == hclsyntax.TokenCBrace:
   308  		// Unlike other bracket types, braces have spaces on both sides of them,
   309  		// both in single-line nested blocks foo { bar = baz } and in object
   310  		// constructor expressions foo = { bar = baz }.
   311  		if subject.Type == hclsyntax.TokenOBrace && after.Type == hclsyntax.TokenCBrace {
   312  			// An open brace followed by a close brace is an exception, however.
   313  			// e.g. foo {} rather than foo { }
   314  			return false
   315  		}
   316  		return true
   317  
   318  	// In the unlikely event that an interpolation expression is just
   319  	// a single object constructor, we'll put a space between the ${ and
   320  	// the following { to make this more obvious, and then the same
   321  	// thing for the two braces at the end.
   322  	case (subject.Type == hclsyntax.TokenTemplateInterp || subject.Type == hclsyntax.TokenTemplateControl) && after.Type == hclsyntax.TokenOBrace:
   323  		return true
   324  	case subject.Type == hclsyntax.TokenCBrace && after.Type == hclsyntax.TokenTemplateSeqEnd:
   325  		return true
   326  
   327  	// Don't add spaces between interpolated items
   328  	case subject.Type == hclsyntax.TokenTemplateSeqEnd && (after.Type == hclsyntax.TokenTemplateInterp || after.Type == hclsyntax.TokenTemplateControl):
   329  		return false
   330  
   331  	case tokenBracketChange(subject) > 0:
   332  		// No spaces after open brackets
   333  		return false
   334  
   335  	case tokenBracketChange(after) < 0:
   336  		// No spaces before close brackets
   337  		return false
   338  
   339  	default:
   340  		// Most tokens are space-separated
   341  		return true
   342  
   343  	}
   344  }
   345  
   346  func linesForFormat(tokens Tokens) []formatLine {
   347  	if len(tokens) == 0 {
   348  		return make([]formatLine, 0)
   349  	}
   350  
   351  	// first we'll count our lines, so we can allocate the array for them in
   352  	// a single block. (We want to minimize memory pressure in this codepath,
   353  	// so it can be run somewhat-frequently by editor integrations.)
   354  	lineCount := 1 // if there are zero newlines then there is one line
   355  	for _, tok := range tokens {
   356  		if tokenIsNewline(tok) {
   357  			lineCount++
   358  		}
   359  	}
   360  
   361  	// To start, we'll just put everything in the "lead" cell on each line,
   362  	// and then do another pass over the lines afterwards to adjust.
   363  	lines := make([]formatLine, lineCount)
   364  	li := 0
   365  	lineStart := 0
   366  	for i, tok := range tokens {
   367  		if tok.Type == hclsyntax.TokenEOF {
   368  			// The EOF token doesn't belong to any line, and terminates the
   369  			// token sequence.
   370  			lines[li].lead = tokens[lineStart:i]
   371  			break
   372  		}
   373  
   374  		if tokenIsNewline(tok) {
   375  			lines[li].lead = tokens[lineStart : i+1]
   376  			lineStart = i + 1
   377  			li++
   378  		}
   379  	}
   380  
   381  	// If a set of tokens doesn't end in TokenEOF (e.g. because it's a
   382  	// fragment of tokens from the middle of a file) then we might fall
   383  	// out here with a line still pending.
   384  	if lineStart < len(tokens) {
   385  		lines[li].lead = tokens[lineStart:]
   386  		if lines[li].lead[len(lines[li].lead)-1].Type == hclsyntax.TokenEOF {
   387  			lines[li].lead = lines[li].lead[:len(lines[li].lead)-1]
   388  		}
   389  	}
   390  
   391  	// Now we'll pick off any trailing comments and attribute assignments
   392  	// to shuffle off into the "comment" and "assign" cells.
   393  	for i := range lines {
   394  		line := &lines[i]
   395  
   396  		if len(line.lead) == 0 {
   397  			// if the line is empty then there's nothing for us to do
   398  			// (this should happen only for the final line, because all other
   399  			// lines would have a newline token of some kind)
   400  			continue
   401  		}
   402  
   403  		if len(line.lead) > 1 && line.lead[len(line.lead)-1].Type == hclsyntax.TokenComment {
   404  			line.comment = line.lead[len(line.lead)-1:]
   405  			line.lead = line.lead[:len(line.lead)-1]
   406  		}
   407  
   408  		for i, tok := range line.lead {
   409  			if i > 0 && tok.Type == hclsyntax.TokenEqual {
   410  				// We only move the tokens into "assign" if the RHS seems to
   411  				// be a whole expression, which we determine by counting
   412  				// brackets. If there's a net positive number of brackets
   413  				// then that suggests we're introducing a multi-line expression.
   414  				netBrackets := 0
   415  				for _, token := range line.lead[i:] {
   416  					netBrackets += tokenBracketChange(token)
   417  				}
   418  
   419  				if netBrackets == 0 {
   420  					line.assign = line.lead[i:]
   421  					line.lead = line.lead[:i]
   422  				}
   423  				break
   424  			}
   425  		}
   426  	}
   427  
   428  	return lines
   429  }
   430  
   431  func tokenIsNewline(tok *Token) bool {
   432  	if tok.Type == hclsyntax.TokenNewline {
   433  		return true
   434  	} else if tok.Type == hclsyntax.TokenComment {
   435  		// Single line tokens (# and //) consume their terminating newline,
   436  		// so we need to treat them as newline tokens as well.
   437  		if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
   438  			return true
   439  		}
   440  	}
   441  	return false
   442  }
   443  
   444  func tokenBracketChange(tok *Token) int {
   445  	switch tok.Type {
   446  	case hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenOParen, hclsyntax.TokenTemplateControl, hclsyntax.TokenTemplateInterp:
   447  		return 1
   448  	case hclsyntax.TokenCBrace, hclsyntax.TokenCBrack, hclsyntax.TokenCParen, hclsyntax.TokenTemplateSeqEnd:
   449  		return -1
   450  	default:
   451  		return 0
   452  	}
   453  }
   454  
   455  // formatLine represents a single line of source code for formatting purposes,
   456  // splitting its tokens into up to three "cells":
   457  //
   458  //   - lead: always present, representing everything up to one of the others
   459  //   - assign: if line contains an attribute assignment, represents the tokens
   460  //     starting at (and including) the equals symbol
   461  //   - comment: if line contains any non-comment tokens and ends with a
   462  //     single-line comment token, represents the comment.
   463  //
   464  // When formatting, the leading spaces of the first tokens in each of these
   465  // cells is adjusted to align vertically their occurences on consecutive
   466  // rows.
   467  type formatLine struct {
   468  	lead    Tokens
   469  	assign  Tokens
   470  	comment Tokens
   471  }