github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sem/tree/format.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tree
    12  
    13  import (
    14  	"bytes"
    15  	"fmt"
    16  	"sync"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/sql/lex"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    20  	"github.com/cockroachdb/cockroach/pkg/util"
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  // FmtFlags carries options for the pretty-printer.
    25  type FmtFlags int
    26  
    27  // HasFlags tests whether the given flags are all set.
    28  func (f FmtFlags) HasFlags(subset FmtFlags) bool {
    29  	return f&subset == subset
    30  }
    31  
    32  // SetFlags sets the given formatting flags.
    33  func (f *FmtFlags) SetFlags(subset FmtFlags) {
    34  	*f |= subset
    35  }
    36  
    37  // EncodeFlags returns the subset of the flags that are also lex encode flags.
    38  func (f FmtFlags) EncodeFlags() lex.EncodeFlags {
    39  	return lex.EncodeFlags(f) & (lex.EncFirstFreeFlagBit - 1)
    40  }
    41  
    42  // Basic bit definitions for the FmtFlags bitmask.
    43  const (
    44  	// FmtSimple instructs the pretty-printer to produce
    45  	// a straightforward representation.
    46  	FmtSimple FmtFlags = 0
    47  
    48  	// FmtBareStrings instructs the pretty-printer to print strings and
    49  	// other values without wrapping quotes. If the value is a SQL
    50  	// string, the quotes will only be omitted if the string contains no
    51  	// special characters. If it does contain special characters, the
    52  	// string will be escaped and enclosed in e'...' regardless of
    53  	// whether FmtBareStrings is specified. See FmtRawStrings below for
    54  	// an alternative.
    55  	FmtBareStrings FmtFlags = FmtFlags(lex.EncBareStrings)
    56  
    57  	// FmtBareIdentifiers instructs the pretty-printer to print
    58  	// identifiers without wrapping quotes in any case.
    59  	FmtBareIdentifiers FmtFlags = FmtFlags(lex.EncBareIdentifiers)
    60  
    61  	// FmtShowPasswords instructs the pretty-printer to not suppress passwords.
    62  	// If not set, passwords are replaced by *****.
    63  	FmtShowPasswords FmtFlags = FmtFlags(lex.EncFirstFreeFlagBit) << iota
    64  
    65  	// FmtShowTypes instructs the pretty-printer to
    66  	// annotate expressions with their resolved types.
    67  	FmtShowTypes
    68  
    69  	// FmtHideConstants instructs the pretty-printer to produce a
    70  	// representation that does not disclose query-specific data. It
    71  	// also shorten long lists in tuples, VALUES and array expressions.
    72  	FmtHideConstants
    73  
    74  	// FmtAnonymize instructs the pretty-printer to remove
    75  	// any name but function names.
    76  	// TODO(knz): temporary until a better solution is found for #13968
    77  	FmtAnonymize
    78  
    79  	// FmtAlwaysQualifyTableNames instructs the pretty-printer to
    80  	// qualify table names, even if originally omitted.
    81  	// Requires Annotations in the formatting context.
    82  	FmtAlwaysQualifyTableNames
    83  
    84  	// FmtAlwaysGroupExprs instructs the pretty-printer to enclose
    85  	// sub-expressions between parentheses.
    86  	// Used for testing.
    87  	FmtAlwaysGroupExprs
    88  
    89  	// FmtShowTableAliases reveals the table aliases.
    90  	FmtShowTableAliases
    91  
    92  	// FmtSymbolicSubqueries indicates that subqueries must be pretty-printed
    93  	// using numeric notation (@S123).
    94  	FmtSymbolicSubqueries
    95  
    96  	// If set, strings will be formatted using the postgres datum-to-text
    97  	// conversion. See comments in pgwire_encode.go.
    98  	// Used internally in combination with FmtPgwireText defined below.
    99  	fmtPgwireFormat
   100  
   101  	// If set, datums and placeholders will have type annotations (like
   102  	// :::interval) as necessary to disambiguate between possible type
   103  	// resolutions.
   104  	fmtDisambiguateDatumTypes
   105  
   106  	// fmtSymbolicVars indicates that IndexedVars must be pretty-printed
   107  	// using numeric notation (@123).
   108  	fmtSymbolicVars
   109  
   110  	// fmtUnicodeStrings prints strings and JSON using the Go string
   111  	// formatter. This is used e.g. for emitting values to CSV files.
   112  	fmtRawStrings
   113  
   114  	// FmtParsableNumerics produces decimal and float representations
   115  	// that are always parsable, even if they require a string
   116  	// representation like -Inf. Negative values are preserved "inside"
   117  	// the numeric by enclosing them within parentheses.
   118  	FmtParsableNumerics
   119  
   120  	// FmtPGAttrdefAdbin is used to produce expressions formatted in a way that's
   121  	// as close as possible to what clients expect to live in the pg_attrdef.adbin
   122  	// column. Specifically, this strips type annotations, since Postgres doesn't
   123  	// know what those are.
   124  	FmtPGAttrdefAdbin
   125  
   126  	// FmtPGIndexDef is used to produce CREATE INDEX statements that are
   127  	// compatible with pg_get_indexdef.
   128  	FmtPGIndexDef
   129  
   130  	// If set, user defined types and datums of user defined types will be
   131  	// formatted in a way that is stable across changes to the underlying type.
   132  	// For type names, this means that they will be formatted as '@id'. For enum
   133  	// members, this means that they will be serialized as their bytes physical
   134  	// representations.
   135  	fmtStaticallyFormatUserDefinedTypes
   136  
   137  	// fmtFormatByteLiterals instructs bytes to be formatted as byte literals
   138  	// rather than string literals. For example, the bytes \x40 will be formatted
   139  	// as b'\x40' rather than '\x40'.
   140  	fmtFormatByteLiterals
   141  )
   142  
   143  // Composite/derived flag definitions follow.
   144  const (
   145  	// FmtPgwireText instructs the pretty-printer to use
   146  	// a pg-compatible conversion to strings. See comments
   147  	// in pgwire_encode.go.
   148  	FmtPgwireText FmtFlags = fmtPgwireFormat | FmtFlags(lex.EncBareStrings)
   149  
   150  	// FmtParsable instructs the pretty-printer to produce a representation that
   151  	// can be parsed into an equivalent expression.
   152  	FmtParsable FmtFlags = fmtDisambiguateDatumTypes | FmtParsableNumerics
   153  
   154  	// FmtSerializable instructs the pretty-printer to produce a representation
   155  	// for expressions that can be serialized to disk. It serializes user defined
   156  	// types using representations that are stable across changes of the type
   157  	// itself. This should be used when serializing expressions that will be
   158  	// stored on disk, like DEFAULT expressions of columns.
   159  	FmtSerializable FmtFlags = FmtParsable | fmtStaticallyFormatUserDefinedTypes
   160  
   161  	// FmtCheckEquivalence instructs the pretty-printer to produce a representation
   162  	// that can be used to check equivalence of expressions. Specifically:
   163  	//  - IndexedVars are formatted using symbolic notation (to disambiguate
   164  	//    columns).
   165  	//  - datum types are disambiguated with explicit type
   166  	//    annotations. This is necessary because datums of different types
   167  	//    can otherwise be formatted to the same string: (for example the
   168  	//    DDecimal 1 and the DInt 1).
   169  	//  - user defined types and datums of user defined types are formatted
   170  	//    using static representations to avoid name resolution and invalidation
   171  	//    due to changes in the underlying type.
   172  	FmtCheckEquivalence FmtFlags = fmtSymbolicVars |
   173  		fmtDisambiguateDatumTypes |
   174  		FmtParsableNumerics |
   175  		fmtStaticallyFormatUserDefinedTypes
   176  
   177  	// FmtArrayToString is a special composite flag suitable
   178  	// for the output of array_to_string(). This de-quotes
   179  	// the strings enclosed in the array and skips the normal escaping
   180  	// of strings. Special characters are hex-escaped.
   181  	FmtArrayToString FmtFlags = FmtBareStrings | fmtRawStrings
   182  
   183  	// FmtExport, if set, formats datums in a raw form suitable for
   184  	// EXPORT, e.g. suitable for output into a CSV file. The intended
   185  	// goal for this flag is to ensure values can be read back using the
   186  	// ParseDatumStringAs() / ParseStringas() functions (IMPORT).
   187  	//
   188  	// We do not use FmtParsable for this purpose because FmtParsable
   189  	// intends to preserve all the information useful to CockroachDB
   190  	// internally, at the expense of readability by 3rd party tools.
   191  	//
   192  	// We also separate this set of flag from fmtArrayToString
   193  	// because the behavior of array_to_string() is fixed for compatibility
   194  	// with PostgreSQL, whereas EXPORT may evolve over time to support
   195  	// other things (eg. fixing #33429).
   196  	//
   197  	// TODO(mjibson): Note that this is currently not suitable for
   198  	// emitting arrays or tuples. See: #33429
   199  	FmtExport FmtFlags = FmtBareStrings | fmtRawStrings
   200  )
   201  
   202  const flagsRequiringAnnotations FmtFlags = FmtAlwaysQualifyTableNames
   203  
   204  // FmtCtx is suitable for passing to Format() methods.
   205  // It also exposes the underlying bytes.Buffer interface for
   206  // convenience.
   207  //
   208  // FmtCtx cannot be copied by value.
   209  type FmtCtx struct {
   210  	_ util.NoCopy
   211  
   212  	bytes.Buffer
   213  
   214  	// NOTE: if you add more flags to this structure, make sure to add
   215  	// corresponding cleanup code in FmtCtx.Close().
   216  
   217  	// The flags to use for pretty-printing.
   218  	flags FmtFlags
   219  	// AST Annotations (used by some flags). Can be unset if those flags are not
   220  	// used.
   221  	ann *Annotations
   222  	// indexedVarFormat is an optional interceptor for
   223  	// IndexedVarContainer.IndexedVarFormat calls; it can be used to
   224  	// customize the formatting of IndexedVars.
   225  	indexedVarFormat func(ctx *FmtCtx, idx int)
   226  	// tableNameFormatter will be called on all TableNames if it is non-nil.
   227  	tableNameFormatter func(*FmtCtx, *TableName)
   228  	// placeholderFormat is an optional interceptor for Placeholder.Format calls;
   229  	// it can be used to format placeholders differently than normal.
   230  	placeholderFormat func(ctx *FmtCtx, p *Placeholder)
   231  }
   232  
   233  // NewFmtCtx creates a FmtCtx; only flags that don't require Annotations
   234  // can be used.
   235  func NewFmtCtx(f FmtFlags) *FmtCtx {
   236  	return NewFmtCtxEx(f, nil)
   237  }
   238  
   239  // NewFmtCtxEx creates a FmtCtx.
   240  func NewFmtCtxEx(f FmtFlags, ann *Annotations) *FmtCtx {
   241  	if ann == nil && f&flagsRequiringAnnotations != 0 {
   242  		panic(errors.AssertionFailedf("no Annotations provided"))
   243  	}
   244  	ctx := fmtCtxPool.Get().(*FmtCtx)
   245  	ctx.flags = f
   246  	ctx.ann = ann
   247  	return ctx
   248  }
   249  
   250  // SetReformatTableNames modifies FmtCtx to to substitute the printing of table
   251  // names using the provided function.
   252  func (ctx *FmtCtx) SetReformatTableNames(tableNameFmt func(*FmtCtx, *TableName)) {
   253  	ctx.tableNameFormatter = tableNameFmt
   254  }
   255  
   256  // WithReformatTableNames modifies FmtCtx to to substitute the printing of table
   257  // names using the provided function, calls fn, then restores the original table
   258  // formatting.
   259  func (ctx *FmtCtx) WithReformatTableNames(tableNameFmt func(*FmtCtx, *TableName), fn func()) {
   260  	old := ctx.tableNameFormatter
   261  	ctx.tableNameFormatter = tableNameFmt
   262  	defer func() { ctx.tableNameFormatter = old }()
   263  
   264  	fn()
   265  }
   266  
   267  // WithFlags changes the flags in the FmtCtx, runs the given function, then
   268  // restores the old flags.
   269  func (ctx *FmtCtx) WithFlags(flags FmtFlags, fn func()) {
   270  	if ctx.ann == nil && flags&flagsRequiringAnnotations != 0 {
   271  		panic(errors.AssertionFailedf("no Annotations provided"))
   272  	}
   273  	oldFlags := ctx.flags
   274  	ctx.flags = flags
   275  	defer func() { ctx.flags = oldFlags }()
   276  
   277  	fn()
   278  }
   279  
   280  // HasFlags returns true iff the given flags are set in the formatter context.
   281  func (ctx *FmtCtx) HasFlags(f FmtFlags) bool {
   282  	return ctx.flags.HasFlags(f)
   283  }
   284  
   285  // Printf calls fmt.Fprintf on the linked bytes.Buffer. It is provided
   286  // for convenience, to avoid having to call fmt.Fprintf(&ctx.Buffer, ...).
   287  //
   288  // Note: DO NOT USE THIS TO INTERPOLATE %s ON NodeFormatter OBJECTS.
   289  // This would call the String() method on them and would fail to reuse
   290  // the same bytes buffer (and waste allocations). Instead use
   291  // ctx.FormatNode().
   292  func (ctx *FmtCtx) Printf(f string, args ...interface{}) {
   293  	fmt.Fprintf(&ctx.Buffer, f, args...)
   294  }
   295  
   296  // FmtExpr returns FmtFlags that indicate how the pretty-printer
   297  // should format expressions.
   298  func FmtExpr(base FmtFlags, showTypes bool, symbolicVars bool, showTableAliases bool) FmtFlags {
   299  	if showTypes {
   300  		base |= FmtShowTypes
   301  	}
   302  	if symbolicVars {
   303  		base |= fmtSymbolicVars
   304  	}
   305  	if showTableAliases {
   306  		base |= FmtShowTableAliases
   307  	}
   308  	return base
   309  }
   310  
   311  // SetIndexedVarFormat modifies FmtCtx to customize the printing of
   312  // IndexedVars using the provided function.
   313  func (ctx *FmtCtx) SetIndexedVarFormat(fn func(ctx *FmtCtx, idx int)) {
   314  	ctx.indexedVarFormat = fn
   315  }
   316  
   317  // SetPlaceholderFormat modifies FmtCtx to customize the printing of
   318  // StarDatums using the provided function.
   319  func (ctx *FmtCtx) SetPlaceholderFormat(placeholderFn func(_ *FmtCtx, _ *Placeholder)) {
   320  	ctx.placeholderFormat = placeholderFn
   321  }
   322  
   323  // WithPlaceholderFormat changes the placeholder formatting function, calls the
   324  // given function, then restores the placeholder function.
   325  func (ctx *FmtCtx) WithPlaceholderFormat(placeholderFn func(_ *FmtCtx, _ *Placeholder), fn func()) {
   326  	old := ctx.placeholderFormat
   327  	ctx.placeholderFormat = placeholderFn
   328  	defer func() { ctx.placeholderFormat = old }()
   329  	fn()
   330  }
   331  
   332  // NodeFormatter is implemented by nodes that can be pretty-printed.
   333  type NodeFormatter interface {
   334  	// Format performs pretty-printing towards a bytes buffer. The flags member
   335  	// of ctx influences the results. Most callers should use FormatNode instead.
   336  	Format(ctx *FmtCtx)
   337  }
   338  
   339  // FormatName formats a string as a name.
   340  //
   341  // Note: prefer FormatNameP below when the string is already on the
   342  // heap.
   343  func (ctx *FmtCtx) FormatName(s string) {
   344  	ctx.FormatNode((*Name)(&s))
   345  }
   346  
   347  // FormatNameP formats a string reference as a name.
   348  func (ctx *FmtCtx) FormatNameP(s *string) {
   349  	ctx.FormatNode((*Name)(s))
   350  }
   351  
   352  // FormatNode recurses into a node for pretty-printing.
   353  // Flag-driven special cases can hook into this.
   354  func (ctx *FmtCtx) FormatNode(n NodeFormatter) {
   355  	f := ctx.flags
   356  	if f.HasFlags(FmtShowTypes) {
   357  		if te, ok := n.(TypedExpr); ok {
   358  			ctx.WriteByte('(')
   359  			ctx.formatNodeOrHideConstants(n)
   360  			ctx.WriteString(")[")
   361  			if rt := te.ResolvedType(); rt == nil {
   362  				// An attempt is made to pretty-print an expression that was
   363  				// not assigned a type yet. This should not happen, so we make
   364  				// it clear in the output this needs to be investigated
   365  				// further.
   366  				ctx.Printf("??? %v", te)
   367  			} else {
   368  				ctx.WriteString(rt.String())
   369  			}
   370  			ctx.WriteByte(']')
   371  			return
   372  		}
   373  	}
   374  	if f.HasFlags(FmtAlwaysGroupExprs) {
   375  		if _, ok := n.(Expr); ok {
   376  			ctx.WriteByte('(')
   377  		}
   378  	}
   379  	ctx.formatNodeOrHideConstants(n)
   380  	if f.HasFlags(FmtAlwaysGroupExprs) {
   381  		if _, ok := n.(Expr); ok {
   382  			ctx.WriteByte(')')
   383  		}
   384  	}
   385  	if f.HasFlags(fmtDisambiguateDatumTypes) {
   386  		var typ *types.T
   387  		if d, isDatum := n.(Datum); isDatum {
   388  			if p, isPlaceholder := d.(*Placeholder); isPlaceholder {
   389  				// p.typ will be nil if the placeholder has not been type-checked yet.
   390  				typ = p.typ
   391  			} else if d.AmbiguousFormat() {
   392  				typ = d.ResolvedType()
   393  			}
   394  		}
   395  		if typ != nil {
   396  			ctx.WriteString(":::")
   397  			ctx.FormatTypeReference(typ)
   398  		}
   399  	}
   400  }
   401  
   402  // AsStringWithFlags pretty prints a node to a string given specific flags; only
   403  // flags that don't require Annotations can be used.
   404  func AsStringWithFlags(n NodeFormatter, fl FmtFlags) string {
   405  	ctx := NewFmtCtx(fl)
   406  	ctx.FormatNode(n)
   407  	return ctx.CloseAndGetString()
   408  }
   409  
   410  // AsStringWithFQNames pretty prints a node to a string with the
   411  // FmtAlwaysQualifyTableNames flag (which requires annotations).
   412  func AsStringWithFQNames(n NodeFormatter, ann *Annotations) string {
   413  	ctx := NewFmtCtxEx(FmtAlwaysQualifyTableNames, ann)
   414  	ctx.FormatNode(n)
   415  	return ctx.CloseAndGetString()
   416  }
   417  
   418  // AsString pretty prints a node to a string.
   419  func AsString(n NodeFormatter) string {
   420  	return AsStringWithFlags(n, FmtSimple)
   421  }
   422  
   423  // ErrString pretty prints a node to a string. Identifiers are not quoted.
   424  func ErrString(n NodeFormatter) string {
   425  	return AsStringWithFlags(n, FmtBareIdentifiers)
   426  }
   427  
   428  // Serialize pretty prints a node to a string using FmtSerializable; it is
   429  // appropriate when we store expressions into strings that are stored on disk
   430  // and may be later parsed back into expressions.
   431  func Serialize(n NodeFormatter) string {
   432  	return AsStringWithFlags(n, FmtSerializable)
   433  }
   434  
   435  // SerializeForDisplay pretty prints a node to a string using FmtParsable.
   436  // It is appropriate when printing expressions that are visible to end users.
   437  func SerializeForDisplay(n NodeFormatter) string {
   438  	return AsStringWithFlags(n, FmtParsable)
   439  }
   440  
   441  var fmtCtxPool = sync.Pool{
   442  	New: func() interface{} {
   443  		return &FmtCtx{}
   444  	},
   445  }
   446  
   447  // Close releases a FmtCtx for reuse. Closing a FmtCtx is not required, but is
   448  // recommended for performance-sensitive paths.
   449  func (ctx *FmtCtx) Close() {
   450  	ctx.Buffer.Reset()
   451  	ctx.flags = 0
   452  	ctx.indexedVarFormat = nil
   453  	ctx.tableNameFormatter = nil
   454  	ctx.placeholderFormat = nil
   455  	fmtCtxPool.Put(ctx)
   456  }
   457  
   458  // CloseAndGetString combines Close() and String().
   459  func (ctx *FmtCtx) CloseAndGetString() string {
   460  	s := ctx.String()
   461  	ctx.Close()
   462  	return s
   463  }
   464  
   465  func (ctx *FmtCtx) alwaysFormatTablePrefix() bool {
   466  	return ctx.flags.HasFlags(FmtAlwaysQualifyTableNames) || ctx.tableNameFormatter != nil
   467  }