github.com/bir3/gocompiler@v0.9.2202/src/go/doc/comment/text.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package comment 6 7 import ( 8 "bytes" 9 "fmt" 10 "sort" 11 "strings" 12 "unicode/utf8" 13 ) 14 15 // A textPrinter holds the state needed for printing a Doc as plain text. 16 type textPrinter struct { 17 *Printer 18 long strings.Builder 19 prefix string 20 codePrefix string 21 width int 22 } 23 24 // Text returns a textual formatting of the [Doc]. 25 // See the [Printer] documentation for ways to customize the text output. 26 func (p *Printer) Text(d *Doc) []byte { 27 tp := &textPrinter{ 28 Printer: p, 29 prefix: p.TextPrefix, 30 codePrefix: p.TextCodePrefix, 31 width: p.TextWidth, 32 } 33 if tp.codePrefix == "" { 34 tp.codePrefix = p.TextPrefix + "\t" 35 } 36 if tp.width == 0 { 37 tp.width = 80 - utf8.RuneCountInString(tp.prefix) 38 } 39 40 var out bytes.Buffer 41 for i, x := range d.Content { 42 if i > 0 && blankBefore(x) { 43 out.WriteString(tp.prefix) 44 writeNL(&out) 45 } 46 tp.block(&out, x) 47 } 48 anyUsed := false 49 for _, def := range d.Links { 50 if def.Used { 51 anyUsed = true 52 break 53 } 54 } 55 if anyUsed { 56 writeNL(&out) 57 for _, def := range d.Links { 58 if def.Used { 59 fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL) 60 } 61 } 62 } 63 return out.Bytes() 64 } 65 66 // writeNL calls out.WriteByte('\n') 67 // but first trims trailing spaces on the previous line. 68 func writeNL(out *bytes.Buffer) { 69 // Trim trailing spaces. 70 data := out.Bytes() 71 n := 0 72 for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') { 73 n++ 74 } 75 if n > 0 { 76 out.Truncate(len(data) - n) 77 } 78 out.WriteByte('\n') 79 } 80 81 // block prints the block x to out. 82 func (p *textPrinter) block(out *bytes.Buffer, x Block) { 83 switch x := x.(type) { 84 default: 85 fmt.Fprintf(out, "?%T\n", x) 86 87 case *Paragraph: 88 out.WriteString(p.prefix) 89 p.text(out, "", x.Text) 90 91 case *Heading: 92 out.WriteString(p.prefix) 93 out.WriteString("# ") 94 p.text(out, "", x.Text) 95 96 case *Code: 97 text := x.Text 98 for text != "" { 99 var line string 100 line, text, _ = strings.Cut(text, "\n") 101 if line != "" { 102 out.WriteString(p.codePrefix) 103 out.WriteString(line) 104 } 105 writeNL(out) 106 } 107 108 case *List: 109 loose := x.BlankBetween() 110 for i, item := range x.Items { 111 if i > 0 && loose { 112 out.WriteString(p.prefix) 113 writeNL(out) 114 } 115 out.WriteString(p.prefix) 116 out.WriteString(" ") 117 if item.Number == "" { 118 out.WriteString(" - ") 119 } else { 120 out.WriteString(item.Number) 121 out.WriteString(". ") 122 } 123 for i, blk := range item.Content { 124 const fourSpace = " " 125 if i > 0 { 126 writeNL(out) 127 out.WriteString(p.prefix) 128 out.WriteString(fourSpace) 129 } 130 p.text(out, fourSpace, blk.(*Paragraph).Text) 131 } 132 } 133 } 134 } 135 136 // text prints the text sequence x to out. 137 func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) { 138 p.oneLongLine(&p.long, x) 139 words := strings.Fields(p.long.String()) 140 p.long.Reset() 141 142 var seq []int 143 if p.width < 0 || len(words) == 0 { 144 seq = []int{0, len(words)} // one long line 145 } else { 146 seq = wrap(words, p.width-utf8.RuneCountInString(indent)) 147 } 148 for i := 0; i+1 < len(seq); i++ { 149 if i > 0 { 150 out.WriteString(p.prefix) 151 out.WriteString(indent) 152 } 153 for j, w := range words[seq[i]:seq[i+1]] { 154 if j > 0 { 155 out.WriteString(" ") 156 } 157 out.WriteString(w) 158 } 159 writeNL(out) 160 } 161 } 162 163 // oneLongLine prints the text sequence x to out as one long line, 164 // without worrying about line wrapping. 165 // Explicit links have the [ ] dropped to improve readability. 166 func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) { 167 for _, t := range x { 168 switch t := t.(type) { 169 case Plain: 170 out.WriteString(string(t)) 171 case Italic: 172 out.WriteString(string(t)) 173 case *Link: 174 p.oneLongLine(out, t.Text) 175 case *DocLink: 176 p.oneLongLine(out, t.Text) 177 } 178 } 179 } 180 181 // wrap wraps words into lines of at most max runes, 182 // minimizing the sum of the squares of the leftover lengths 183 // at the end of each line (except the last, of course), 184 // with a preference for ending lines at punctuation (.,:;). 185 // 186 // The returned slice gives the indexes of the first words 187 // on each line in the wrapped text with a final entry of len(words). 188 // Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]], 189 // ..., words[seq[len(seq)-2]:seq[len(seq)-1]]. 190 // 191 // The implementation runs in O(n log n) time, where n = len(words), 192 // using the algorithm described in D. S. Hirschberg and L. L. Larmore, 193 // “[The least weight subsequence problem],” FOCS 1985, pp. 137-143. 194 // 195 // [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60 196 func wrap(words []string, max int) (seq []int) { 197 // The algorithm requires that our scoring function be concave, 198 // meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁, 199 // weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀). 200 // 201 // Our weights are two-element pairs [hi, lo] 202 // ordered by elementwise comparison. 203 // The hi entry counts the weight for lines that are longer than max, 204 // and the lo entry counts the weight for lines that are not. 205 // This forces the algorithm to first minimize the number of lines 206 // that are longer than max, which correspond to lines with 207 // single very long words. Having done that, it can move on to 208 // minimizing the lo score, which is more interesting. 209 // 210 // The lo score is the sum for each line of the square of the 211 // number of spaces remaining at the end of the line and a 212 // penalty of 64 given out for not ending the line in a 213 // punctuation character (.,:;). 214 // The penalty is somewhat arbitrarily chosen by trying 215 // different amounts and judging how nice the wrapped text looks. 216 // Roughly speaking, using 64 means that we are willing to 217 // end a line with eight blank spaces in order to end at a 218 // punctuation character, even if the next word would fit in 219 // those spaces. 220 // 221 // We care about ending in punctuation characters because 222 // it makes the text easier to skim if not too many sentences 223 // or phrases begin with a single word on the previous line. 224 225 // A score is the score (also called weight) for a given line. 226 // add and cmp add and compare scores. 227 type score struct { 228 hi int64 229 lo int64 230 } 231 add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} } 232 cmp := func(s, t score) int { 233 switch { 234 case s.hi < t.hi: 235 return -1 236 case s.hi > t.hi: 237 return +1 238 case s.lo < t.lo: 239 return -1 240 case s.lo > t.lo: 241 return +1 242 } 243 return 0 244 } 245 246 // total[j] is the total number of runes 247 // (including separating spaces) in words[:j]. 248 total := make([]int, len(words)+1) 249 total[0] = 0 250 for i, s := range words { 251 total[1+i] = total[i] + utf8.RuneCountInString(s) + 1 252 } 253 254 // weight returns weight(i, j). 255 weight := func(i, j int) score { 256 // On the last line, there is zero weight for being too short. 257 n := total[j] - 1 - total[i] 258 if j == len(words) && n <= max { 259 return score{0, 0} 260 } 261 262 // Otherwise the weight is the penalty plus the square of the number of 263 // characters remaining on the line or by which the line goes over. 264 // In the latter case, that value goes in the hi part of the score. 265 // (See note above.) 266 p := wrapPenalty(words[j-1]) 267 v := int64(max-n) * int64(max-n) 268 if n > max { 269 return score{v, p} 270 } 271 return score{0, v + p} 272 } 273 274 // The rest of this function is “The Basic Algorithm” from 275 // Hirschberg and Larmore's conference paper, 276 // using the same names as in the paper. 277 f := []score{{0, 0}} 278 g := func(i, j int) score { return add(f[i], weight(i, j)) } 279 280 bridge := func(a, b, c int) bool { 281 k := c + sort.Search(len(words)+1-c, func(k int) bool { 282 k += c 283 return cmp(g(a, k), g(b, k)) > 0 284 }) 285 if k > len(words) { 286 return true 287 } 288 return cmp(g(c, k), g(b, k)) <= 0 289 } 290 291 // d is a one-ended deque implemented as a slice. 292 d := make([]int, 1, len(words)) 293 d[0] = 0 294 bestleft := make([]int, 1, len(words)) 295 bestleft[0] = -1 296 for m := 1; m < len(words); m++ { 297 f = append(f, g(d[0], m)) 298 bestleft = append(bestleft, d[0]) 299 for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { 300 d = d[1:] // “Retire” 301 } 302 for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) { 303 d = d[:len(d)-1] // “Fire” 304 } 305 if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 { 306 d = append(d, m) // “Hire” 307 // The next few lines are not in the paper but are necessary 308 // to handle two-word inputs correctly. It appears to be 309 // just a bug in the paper's pseudocode. 310 if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { 311 d = d[1:] 312 } 313 } 314 } 315 bestleft = append(bestleft, d[0]) 316 317 // Recover least weight sequence from bestleft. 318 n := 1 319 for m := len(words); m > 0; m = bestleft[m] { 320 n++ 321 } 322 seq = make([]int, n) 323 for m := len(words); m > 0; m = bestleft[m] { 324 n-- 325 seq[n] = m 326 } 327 return seq 328 } 329 330 // wrapPenalty is the penalty for inserting a line break after word s. 331 func wrapPenalty(s string) int64 { 332 switch s[len(s)-1] { 333 case '.', ',', ':', ';': 334 return 0 335 } 336 return 64 337 }