github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/pretty/pretty.go

github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/pretty/pretty.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package pretty
    12  
    13  import (
    14  	"fmt"
    15  	"strings"
    16  
    17  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/errorutil"
    18  	"github.com/cockroachdb/errors"
    19  )
    20  
    21  // See the referenced paper in the package documentation for explanations
    22  // of the below code. Methods, variables, and implementation details were
    23  // made to resemble it as close as possible.
    24  
    25  // docBest represents a selected document as described by the type
    26  // "Doc" in the referenced paper (not "DOC"). This is the
    27  // less-abstract representation constructed during "best layout"
    28  // selection.
    29  type docBest struct {
    30  	tag docBestType
    31  	i   docPos
    32  	s   string
    33  	d   *docBest
    34  }
    35  
    36  type docBestType int
    37  
    38  const (
    39  	textB docBestType = iota
    40  	lineB
    41  	hardlineB
    42  	spacesB
    43  	keywordB
    44  )
    45  
    46  // Pretty returns a pretty-printed string for the Doc d at line length
    47  // n and tab width t. Keyword Docs are filtered through keywordTransform
    48  // if not nil. keywordTransform must not change the visible length of its
    49  // argument. It can, for example, add invisible characters like control codes
    50  // (colors, etc.).
    51  func Pretty(
    52  	d Doc, n int, useTabs bool, tabWidth int, keywordTransform func(string) string,
    53  ) (_ string, err error) {
    54  	defer func() {
    55  		if r := recover(); r != nil {
    56  			// This code allows us to propagate internal errors without having
    57  			// to add error checks everywhere throughout the code. This is only
    58  			// possible because the code does not update shared state and does
    59  			// not manipulate locks.
    60  			if ok, e := errorutil.ShouldCatch(r); ok {
    61  				err = e
    62  			} else {
    63  				// Other panic objects can't be considered "safe" and thus are
    64  				// propagated as panics.
    65  				panic(r)
    66  			}
    67  		}
    68  	}()
    69  
    70  	var sb strings.Builder
    71  	b := beExec{
    72  		w:                int16(n),
    73  		tabWidth:         int16(tabWidth),
    74  		memoBe:           make(map[beArgs]*docBest),
    75  		memoiDoc:         make(map[iDoc]*iDoc),
    76  		keywordTransform: keywordTransform,
    77  	}
    78  	ldoc := b.best(d)
    79  	b.layout(&sb, useTabs, ldoc)
    80  	return sb.String(), nil
    81  }
    82  
    83  // w is the max line width.
    84  func (b *beExec) best(x Doc) *docBest {
    85  	return b.be(docPos{0, 0}, b.iDoc(docPos{0, 0}, x, nil))
    86  }
    87  
    88  // iDoc represents the type [(Int,DOC)] in the paper,
    89  // extended with arbitrary string prefixes (not just int).
    90  // We'll use linked lists because this makes the
    91  // recursion more efficient than slices.
    92  type iDoc struct {
    93  	d    Doc
    94  	next *iDoc
    95  	i    docPos
    96  }
    97  
    98  type docPos struct {
    99  	tabs   int16
   100  	spaces int16
   101  }
   102  
   103  type beExec struct {
   104  	// w is the available line width.
   105  	w int16
   106  	// tabWidth is the virtual tab width.
   107  	tabWidth int16
   108  
   109  	// memoBe internalizes the results of the be function, so that the
   110  	// same value is not computed multiple times.
   111  	memoBe map[beArgs]*docBest
   112  
   113  	// memo internalizes iDoc objects to ensure they are unique in memory,
   114  	// and we can use pointer-pointer comparisons.
   115  	memoiDoc map[iDoc]*iDoc
   116  
   117  	// docAlloc speeds up the allocations of be()'s return values
   118  	// by (*beExec).newDocBest() defined below.
   119  	docAlloc []docBest
   120  
   121  	// idocAlloc speeds up the allocations by (*beExec).iDoc() defined
   122  	// below.
   123  	idocAlloc []iDoc
   124  
   125  	// keywordTransform filters keywords if not nil.
   126  	keywordTransform func(string) string
   127  
   128  	// beDepth is the depth of recursive calls of be. It is used to detect deep
   129  	// call stacks before a stack overflow occurs.
   130  	beDepth int
   131  }
   132  
   133  // maxBeDepth is the maximum allowed recursive call depth of be. If the depth
   134  // exceeds this value, be will panic.
   135  const maxBeDepth = 50_000
   136  
   137  // ErrPrettyMaxRecursionDepthExceeded is returned from Pretty when the maximum
   138  // recursion depth of function invoked by Pretty is exceeded.
   139  var ErrPrettyMaxRecursionDepthExceeded = errors.AssertionFailedf("max recursion depth exceeded")
   140  
   141  func (b *beExec) be(k docPos, xlist *iDoc) *docBest {
   142  	b.beDepth++
   143  	defer func() { b.beDepth-- }()
   144  	if b.beDepth > maxBeDepth {
   145  		panic(ErrPrettyMaxRecursionDepthExceeded)
   146  	}
   147  
   148  	// Shortcut: be k [] = Nil
   149  	if xlist == nil {
   150  		return nil
   151  	}
   152  
   153  	// If we've computed this result before, short cut here too.
   154  	memoKey := beArgs{k: k, d: xlist}
   155  	if cached, ok := b.memoBe[memoKey]; ok {
   156  		return cached
   157  	}
   158  
   159  	// General case.
   160  
   161  	d := *xlist
   162  	z := xlist.next
   163  
   164  	// Note: we'll need to memoize the result below.
   165  	var res *docBest
   166  
   167  	switch t := d.d.(type) {
   168  	case nilDoc:
   169  		res = b.be(k, z)
   170  	case *concat:
   171  		res = b.be(k, b.iDoc(d.i, t.a, b.iDoc(d.i, t.b, z)))
   172  	case nests:
   173  		res = b.be(k, b.iDoc(docPos{d.i.tabs, d.i.spaces + t.n}, t.d, z))
   174  	case nestt:
   175  		res = b.be(k, b.iDoc(docPos{d.i.tabs + 1 + d.i.spaces/b.tabWidth, 0}, t.d, z))
   176  	case text:
   177  		res = b.newDocBest(docBest{
   178  			tag: textB,
   179  			s:   string(t),
   180  			d:   b.be(docPos{k.tabs, k.spaces + int16(len(t))}, z),
   181  		})
   182  	case keyword:
   183  		res = b.newDocBest(docBest{
   184  			tag: keywordB,
   185  			s:   string(t),
   186  			d:   b.be(docPos{k.tabs, k.spaces + int16(len(t))}, z),
   187  		})
   188  	case line, softbreak:
   189  		res = b.newDocBest(docBest{
   190  			tag: lineB,
   191  			i:   d.i,
   192  			d:   b.be(d.i, z),
   193  		})
   194  	case hardline:
   195  		res = b.newDocBest(docBest{
   196  			tag: hardlineB,
   197  			i:   d.i,
   198  			d:   b.be(d.i, z),
   199  		})
   200  	case *union:
   201  		res = b.better(k,
   202  			b.be(k, b.iDoc(d.i, t.x, z)),
   203  			// We eta-lift the second argument to avoid eager evaluation.
   204  			func() *docBest {
   205  				return b.be(k, b.iDoc(d.i, t.y, z))
   206  			},
   207  		)
   208  	case *scolumn:
   209  		res = b.be(k, b.iDoc(d.i, t.f(k.spaces), z))
   210  	case *snesting:
   211  		res = b.be(k, b.iDoc(d.i, t.f(d.i.spaces), z))
   212  	case pad:
   213  		res = b.newDocBest(docBest{
   214  			tag: spacesB,
   215  			i:   docPos{spaces: t.n},
   216  			d:   b.be(docPos{k.tabs, k.spaces + t.n}, z),
   217  		})
   218  	default:
   219  		panic(fmt.Errorf("unknown type: %T", d.d))
   220  	}
   221  
   222  	// Memoize so we don't compute the same result twice.
   223  	b.memoBe[memoKey] = res
   224  
   225  	return res
   226  }
   227  
   228  // newDocBest makes a new docBest on the heap. Allocations
   229  // are batched for more efficiency.
   230  func (b *beExec) newDocBest(d docBest) *docBest {
   231  	buf := &b.docAlloc
   232  	if len(*buf) == 0 {
   233  		*buf = make([]docBest, 100)
   234  	}
   235  	r := &(*buf)[0]
   236  	*r = d
   237  	*buf = (*buf)[1:]
   238  	return r
   239  }
   240  
   241  // iDoc retrieves the unique instance of iDoc in memory for the given
   242  // values of i, s, d and z. The object is constructed if it does not
   243  // exist yet.
   244  //
   245  // The results of this function guarantee that the pointer addresses
   246  // are equal if the arguments used to construct the value were equal.
   247  func (b *beExec) iDoc(i docPos, d Doc, z *iDoc) *iDoc {
   248  	idoc := iDoc{i: i, d: d, next: z}
   249  	if m, ok := b.memoiDoc[idoc]; ok {
   250  		return m
   251  	}
   252  	r := b.newiDoc(idoc)
   253  	b.memoiDoc[idoc] = r
   254  	return r
   255  }
   256  
   257  // newiDoc makes a new iDoc on the heap. Allocations are batched
   258  // for more efficiency. Do not use this directly! Instead
   259  // use the iDoc() method defined above.
   260  func (b *beExec) newiDoc(d iDoc) *iDoc {
   261  	buf := &b.idocAlloc
   262  	if len(*buf) == 0 {
   263  		*buf = make([]iDoc, 100)
   264  	}
   265  	r := &(*buf)[0]
   266  	*r = d
   267  	*buf = (*buf)[1:]
   268  	return r
   269  }
   270  
   271  type beArgs struct {
   272  	d *iDoc
   273  	k docPos
   274  }
   275  
   276  func (b *beExec) better(k docPos, x *docBest, y func() *docBest) *docBest {
   277  	remainder := b.w - k.spaces - k.tabs*b.tabWidth
   278  	if fits(remainder, x) {
   279  		return x
   280  	}
   281  	return y()
   282  }
   283  
   284  func fits(w int16, x *docBest) bool {
   285  	if w < 0 {
   286  		return false
   287  	}
   288  	if x == nil {
   289  		// Nil doc.
   290  		return true
   291  	}
   292  	switch x.tag {
   293  	case textB, keywordB:
   294  		return fits(w-int16(len(x.s)), x.d)
   295  	case lineB:
   296  		return true
   297  	case hardlineB:
   298  		return false
   299  	case spacesB:
   300  		return fits(w-x.i.spaces, x.d)
   301  	default:
   302  		panic(fmt.Errorf("unknown type: %d", x.tag))
   303  	}
   304  }
   305  
   306  func (b *beExec) layout(sb *strings.Builder, useTabs bool, d *docBest) {
   307  	for ; d != nil; d = d.d {
   308  		switch d.tag {
   309  		case textB:
   310  			sb.WriteString(d.s)
   311  		case keywordB:
   312  			if b.keywordTransform != nil {
   313  				sb.WriteString(b.keywordTransform(d.s))
   314  			} else {
   315  				sb.WriteString(d.s)
   316  			}
   317  		case lineB, hardlineB:
   318  			sb.WriteByte('\n')
   319  			// Fill the tabs first.
   320  			padTabs := d.i.tabs * b.tabWidth
   321  			if useTabs {
   322  				for i := int16(0); i < d.i.tabs; i++ {
   323  					sb.WriteByte('\t')
   324  				}
   325  				padTabs = 0
   326  			}
   327  
   328  			// Fill the remaining spaces.
   329  			for i := int16(0); i < padTabs+d.i.spaces; i++ {
   330  				sb.WriteByte(' ')
   331  			}
   332  		case spacesB:
   333  			for i := int16(0); i < d.i.spaces; i++ {
   334  				sb.WriteByte(' ')
   335  			}
   336  		default:
   337  			panic(fmt.Errorf("unknown type: %d", d.tag))
   338  		}
   339  	}
   340  }