github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/pretty/document.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // Package pretty prints documents based on a target line width. 12 // 13 // See: https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf 14 // 15 // The paper describes a simple algorithm for printing a document tree with a 16 // single layout defined by text, newlines, and indentation. This concept is 17 // then expanded for documents that have more than one possible layout using 18 // a union type of two documents where both documents reduce to the same 19 // document, but one document has been flattened to a single line. A method 20 // ("best") is described that chooses the best of these two layouts (i.e., 21 // it removes all union types from a document tree). It works by tracking 22 // the desired and current line length and choosing either the flattened 23 // side of a union if it fits on the current line, or else the non-flattened 24 // side. The paper then describes various performance improvements that reduce 25 // the search space of the best function such that it can complete in O(n) 26 // instead of O(n^2) time, where n is the number of nodes. 27 // 28 // For example code with SQL to experiment further, refer to 29 // https://github.com/knz/prettier/ 30 package pretty 31 32 import "fmt" 33 34 // Doc represents a document as described by the type "DOC" in the 35 // referenced paper. This is the abstract representation constructed 36 // by the pretty-printing code. 37 type Doc interface { 38 isDoc() 39 } 40 41 func (text) isDoc() {} 42 func (line) isDoc() {} 43 func (softbreak) isDoc() {} 44 func (hardline) isDoc() {} 45 func (nilDoc) isDoc() {} 46 func (*concat) isDoc() {} 47 func (nestt) isDoc() {} 48 func (nests) isDoc() {} 49 func (*union) isDoc() {} 50 func (*scolumn) isDoc() {} 51 func (*snesting) isDoc() {} 52 func (pad) isDoc() {} 53 func (keyword) isDoc() {} 54 55 // 56 // Implementations of Doc ("DOC" in paper). 57 // 58 59 // nilDoc represents NIL :: DOC -- the empty doc. 60 type nilDoc struct{} 61 62 // Nil is the NIL constructor. 63 var Nil Doc = nilDoc{} 64 65 // text represents (TEXT s) :: DOC -- a simple text string. 66 type text string 67 68 // Text is the TEXT constructor. 69 func Text(s string) Doc { 70 return text(s) 71 } 72 73 // line represents LINE :: DOC -- a "soft line" that can be flattened to a space. 74 type line struct{} 75 76 // Line is a newline and is flattened to a space. 77 var Line Doc = line{} 78 79 // softbreak represents SOFTBREAK :: DOC -- an invisible space between 80 // words that tries to break the text across lines. 81 // 82 // For example, text "hello" <> softbreak <> text "world" 83 // flattens to "helloworld" (one word) but splits across lines as: 84 // 85 // hello 86 // world 87 // 88 // This is a common extension to Wadler's printer. 89 // 90 // Idea borrowed from Daniel Mendler's printer at 91 // https://github.com/minad/wl-pprint-annotated/blob/master/src/Text/PrettyPrint/Annotated/WL.hs 92 type softbreak struct{} 93 94 // SoftBreak is a newline and is flattened to an empty string. 95 var SoftBreak Doc = softbreak{} 96 97 type hardline struct{} 98 99 // HardLine is a newline and cannot be flattened. 100 var HardLine Doc = hardline{} 101 102 // concat represents (DOC <> DOC) :: DOC -- the concatenation of two docs. 103 type concat struct { 104 a, b Doc 105 } 106 107 // Concat is the <> constructor. 108 // This uses simplifyNil to avoid actually inserting NIL docs 109 // in the abstract tree. 110 func Concat(a, b Doc) Doc { 111 return simplifyNil(a, b, func(a, b Doc) Doc { return &concat{a, b} }) 112 } 113 114 // nests represents (NESTS Int DOC) :: DOC -- nesting a doc "under" another. 115 // NESTS indents d with n spaces. 116 // This is more or less exactly the NEST operator in Wadler's printer. 117 type nests struct { 118 n int16 119 d Doc 120 } 121 122 // NestS is the NESTS constructor. 123 func NestS(n int16, d Doc) Doc { 124 return nests{n, d} 125 } 126 127 // nestt represents (NESTT DOC) :: DOC -- nesting a doc "under" another 128 // NESTT indents d with a tab character. 129 // This is a variant of the NEST operator in Wadler's printer. 130 type nestt struct { 131 d Doc 132 } 133 134 // NestT is the NESTT constructor. 135 func NestT(d Doc) Doc { 136 return nestt{d} 137 } 138 139 // union represents (DOC <|> DOC) :: DOC -- the union of two docs. 140 // <|> is really the union of two sets of layouts. x and y must flatten to the 141 // same layout. Additionally, no first line of a document in x is shorter 142 // than some first line of a document in y; or, equivalently, every first 143 // line in x is at least as long as every first line in y. 144 // 145 // The main use of the union is via the Group operator defined below. 146 // 147 // We do not provide a public constructor as this type is not 148 // exported. 149 type union struct { 150 x, y Doc 151 } 152 153 // Group will format d on one line if possible. 154 func Group(d Doc) Doc { 155 return &union{flatten(d), d} 156 } 157 158 var textSpace = Text(" ") 159 160 func flatten(d Doc) Doc { 161 switch t := d.(type) { 162 case nilDoc: 163 return Nil 164 case *concat: 165 return Concat(flatten(t.a), flatten(t.b)) 166 case nestt: 167 return NestT(flatten(t.d)) 168 case nests: 169 return NestS(t.n, flatten(t.d)) 170 case text, keyword, hardline: 171 return d 172 case line: 173 return textSpace 174 case softbreak: 175 return Nil 176 case *union: 177 return flatten(t.x) 178 case *scolumn: 179 return &scolumn{f: func(c int16) Doc { return flatten(t.f(c)) }} 180 case *snesting: 181 return &snesting{f: func(i int16) Doc { return flatten(t.f(i)) }} 182 case pad: 183 return Nil 184 default: 185 panic(fmt.Errorf("unknown type: %T", d)) 186 } 187 } 188 189 // scolumn is a special document which is replaced during rendering by 190 // another document depending on the current relative column on the 191 // rendering line (tab prefix excluded). 192 // 193 // It is an extension to the Wadler printer commonly found in 194 // derivative code. See e.g. use by Daniel Mendler in 195 // https://github.com/minad/wl-pprint-annotated/blob/master/src/Text/PrettyPrint/Annotated/WL.hs 196 // 197 // This type is not exposed, see the Align() operator below instead. 198 type scolumn struct { 199 f func(int16) Doc 200 } 201 202 // snesting is a special document which is replaced during rendering 203 // by another document depending on the current space-based nesting 204 // level (the one added by NestS). 205 // 206 // It is an extension to the Wadler printer commonly found in 207 // derivative code. See e.g. use by Daniel Mendler in 208 // https://github.com/minad/wl-pprint-annotated/blob/master/src/Text/PrettyPrint/Annotated/WL.hs 209 // 210 // This type is not exposed, see the Align() operator below instead. 211 type snesting struct { 212 f func(int16) Doc 213 } 214 215 // Align renders document d with the space-based nesting level set to 216 // the current column. 217 func Align(d Doc) Doc { 218 return &scolumn{ 219 f: func(k int16) Doc { 220 return &snesting{ 221 f: func(i int16) Doc { 222 return nests{k - i, d} 223 }, 224 } 225 }, 226 } 227 } 228 229 // pad is a special document which is replaced during rendering by 230 // the specified amount of whitespace. However it is flattened 231 // to an empty document during grouping. 232 // 233 // This is an extension to Wadler's printer first prototyped in 234 // https://github.com/knz/prettier. 235 // 236 // Note that this special document must be handled especially 237 // carefully with anything that produces a union (<|>) (e.g. Group), 238 // so as to preserve the invariant of unions: "no first line of a 239 // document in x is shorter than some first line of a document in y; 240 // or, equivalently, every first line in x is at least as long as 241 // every first line in y". 242 // 243 // The operator RLTable, defined in util.go, is properly careful about 244 // this. 245 // 246 // This document type is not exposed publicly because of the risk 247 // described above. 248 type pad struct { 249 n int16 250 } 251 252 type keyword string 253 254 // Keyword is identical to Text except they are filtered by 255 // keywordTransform. The computed width is always len(s), regardless of 256 // the result of the result of the transform. This allows for things like 257 // coloring and other control characters in the output. 258 func Keyword(s string) Doc { 259 return keyword(s) 260 }