github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/pretty/document.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // Package pretty prints documents based on a target line width.
    12  //
    13  // See: https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
    14  //
    15  // The paper describes a simple algorithm for printing a document tree with a
    16  // single layout defined by text, newlines, and indentation. This concept is
    17  // then expanded for documents that have more than one possible layout using
    18  // a union type of two documents where both documents reduce to the same
    19  // document, but one document has been flattened to a single line. A method
    20  // ("best") is described that chooses the best of these two layouts (i.e.,
    21  // it removes all union types from a document tree). It works by tracking
    22  // the desired and current line length and choosing either the flattened
    23  // side of a union if it fits on the current line, or else the non-flattened
    24  // side. The paper then describes various performance improvements that reduce
    25  // the search space of the best function such that it can complete in O(n)
    26  // instead of O(n^2) time, where n is the number of nodes.
    27  //
    28  // For example code with SQL to experiment further, refer to
    29  // https://github.com/knz/prettier/
    30  package pretty
    31  
    32  import "fmt"
    33  
    34  // Doc represents a document as described by the type "DOC" in the
    35  // referenced paper. This is the abstract representation constructed
    36  // by the pretty-printing code.
    37  type Doc interface {
    38  	isDoc()
    39  }
    40  
    41  func (text) isDoc()      {}
    42  func (line) isDoc()      {}
    43  func (softbreak) isDoc() {}
    44  func (hardline) isDoc()  {}
    45  func (nilDoc) isDoc()    {}
    46  func (*concat) isDoc()   {}
    47  func (nestt) isDoc()     {}
    48  func (nests) isDoc()     {}
    49  func (*union) isDoc()    {}
    50  func (*scolumn) isDoc()  {}
    51  func (*snesting) isDoc() {}
    52  func (pad) isDoc()       {}
    53  func (keyword) isDoc()   {}
    54  
    55  //
    56  // Implementations of Doc ("DOC" in paper).
    57  //
    58  
    59  // nilDoc represents NIL :: DOC -- the empty doc.
    60  type nilDoc struct{}
    61  
    62  // Nil is the NIL constructor.
    63  var Nil Doc = nilDoc{}
    64  
    65  // text represents (TEXT s) :: DOC -- a simple text string.
    66  type text string
    67  
    68  // Text is the TEXT constructor.
    69  func Text(s string) Doc {
    70  	return text(s)
    71  }
    72  
    73  // line represents LINE :: DOC -- a "soft line" that can be flattened to a space.
    74  type line struct{}
    75  
    76  // Line is a newline and is flattened to a space.
    77  var Line Doc = line{}
    78  
    79  // softbreak represents SOFTBREAK :: DOC -- an invisible space between
    80  // words that tries to break the text across lines.
    81  //
    82  // For example, text "hello" <> softbreak <> text "world"
    83  // flattens to "helloworld" (one word) but splits across lines as:
    84  //
    85  //	hello
    86  //	world
    87  //
    88  // This is a common extension to Wadler's printer.
    89  //
    90  // Idea borrowed from Daniel Mendler's printer at
    91  // https://github.com/minad/wl-pprint-annotated/blob/master/src/Text/PrettyPrint/Annotated/WL.hs
    92  type softbreak struct{}
    93  
    94  // SoftBreak is a newline and is flattened to an empty string.
    95  var SoftBreak Doc = softbreak{}
    96  
    97  type hardline struct{}
    98  
    99  // HardLine is a newline and cannot be flattened.
   100  var HardLine Doc = hardline{}
   101  
   102  // concat represents (DOC <> DOC) :: DOC -- the concatenation of two docs.
   103  type concat struct {
   104  	a, b Doc
   105  }
   106  
   107  // Concat is the <> constructor.
   108  // This uses simplifyNil to avoid actually inserting NIL docs
   109  // in the abstract tree.
   110  func Concat(a, b Doc) Doc {
   111  	return simplifyNil(a, b, func(a, b Doc) Doc { return &concat{a, b} })
   112  }
   113  
   114  // nests represents (NESTS Int DOC) :: DOC -- nesting a doc "under" another.
   115  // NESTS indents d with n spaces.
   116  // This is more or less exactly the NEST operator in Wadler's printer.
   117  type nests struct {
   118  	n int16
   119  	d Doc
   120  }
   121  
   122  // NestS is the NESTS constructor.
   123  func NestS(n int16, d Doc) Doc {
   124  	return nests{n, d}
   125  }
   126  
   127  // nestt represents (NESTT DOC) :: DOC -- nesting a doc "under" another
   128  // NESTT indents d with a tab character.
   129  // This is a variant of the NEST operator in Wadler's printer.
   130  type nestt struct {
   131  	d Doc
   132  }
   133  
   134  // NestT is the NESTT constructor.
   135  func NestT(d Doc) Doc {
   136  	return nestt{d}
   137  }
   138  
   139  // union represents (DOC <|> DOC) :: DOC -- the union of two docs.
   140  // <|> is really the union of two sets of layouts. x and y must flatten to the
   141  // same layout. Additionally, no first line of a document in x is shorter
   142  // than some first line of a document in y; or, equivalently, every first
   143  // line in x is at least as long as every first line in y.
   144  //
   145  // The main use of the union is via the Group operator defined below.
   146  //
   147  // We do not provide a public constructor as this type is not
   148  // exported.
   149  type union struct {
   150  	x, y Doc
   151  }
   152  
   153  // Group will format d on one line if possible.
   154  func Group(d Doc) Doc {
   155  	return &union{flatten(d), d}
   156  }
   157  
   158  var textSpace = Text(" ")
   159  
   160  func flatten(d Doc) Doc {
   161  	switch t := d.(type) {
   162  	case nilDoc:
   163  		return Nil
   164  	case *concat:
   165  		return Concat(flatten(t.a), flatten(t.b))
   166  	case nestt:
   167  		return NestT(flatten(t.d))
   168  	case nests:
   169  		return NestS(t.n, flatten(t.d))
   170  	case text, keyword, hardline:
   171  		return d
   172  	case line:
   173  		return textSpace
   174  	case softbreak:
   175  		return Nil
   176  	case *union:
   177  		return flatten(t.x)
   178  	case *scolumn:
   179  		return &scolumn{f: func(c int16) Doc { return flatten(t.f(c)) }}
   180  	case *snesting:
   181  		return &snesting{f: func(i int16) Doc { return flatten(t.f(i)) }}
   182  	case pad:
   183  		return Nil
   184  	default:
   185  		panic(fmt.Errorf("unknown type: %T", d))
   186  	}
   187  }
   188  
   189  // scolumn is a special document which is replaced during rendering by
   190  // another document depending on the current relative column on the
   191  // rendering line (tab prefix excluded).
   192  //
   193  // It is an extension to the Wadler printer commonly found in
   194  // derivative code. See e.g. use by Daniel Mendler in
   195  // https://github.com/minad/wl-pprint-annotated/blob/master/src/Text/PrettyPrint/Annotated/WL.hs
   196  //
   197  // This type is not exposed, see the Align() operator below instead.
   198  type scolumn struct {
   199  	f func(int16) Doc
   200  }
   201  
   202  // snesting is a special document which is replaced during rendering
   203  // by another document depending on the current space-based nesting
   204  // level (the one added by NestS).
   205  //
   206  // It is an extension to the Wadler printer commonly found in
   207  // derivative code.  See e.g. use by Daniel Mendler in
   208  // https://github.com/minad/wl-pprint-annotated/blob/master/src/Text/PrettyPrint/Annotated/WL.hs
   209  //
   210  // This type is not exposed, see the Align() operator below instead.
   211  type snesting struct {
   212  	f func(int16) Doc
   213  }
   214  
   215  // Align renders document d with the space-based nesting level set to
   216  // the current column.
   217  func Align(d Doc) Doc {
   218  	return &scolumn{
   219  		f: func(k int16) Doc {
   220  			return &snesting{
   221  				f: func(i int16) Doc {
   222  					return nests{k - i, d}
   223  				},
   224  			}
   225  		},
   226  	}
   227  }
   228  
   229  // pad is a special document which is replaced during rendering by
   230  // the specified amount of whitespace. However it is flattened
   231  // to an empty document during grouping.
   232  //
   233  // This is an extension to Wadler's printer first prototyped in
   234  // https://github.com/knz/prettier.
   235  //
   236  // Note that this special document must be handled especially
   237  // carefully with anything that produces a union (<|>) (e.g. Group),
   238  // so as to preserve the invariant of unions: "no first line of a
   239  // document in x is shorter than some first line of a document in y;
   240  // or, equivalently, every first line in x is at least as long as
   241  // every first line in y".
   242  //
   243  // The operator RLTable, defined in util.go, is properly careful about
   244  // this.
   245  //
   246  // This document type is not exposed publicly because of the risk
   247  // described above.
   248  type pad struct {
   249  	n int16
   250  }
   251  
   252  type keyword string
   253  
   254  // Keyword is identical to Text except they are filtered by
   255  // keywordTransform. The computed width is always len(s), regardless of
   256  // the result of the result of the transform. This allows for things like
   257  // coloring and other control characters in the output.
   258  func Keyword(s string) Doc {
   259  	return keyword(s)
   260  }