golang.org/x/tools@v0.21.0/godoc/format.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements FormatSelections and FormatText. 6 // FormatText is used to HTML-format Go and non-Go source 7 // text with line numbers and highlighted sections. It is 8 // built on top of FormatSelections, a generic formatter 9 // for "selected" text. 10 11 package godoc 12 13 import ( 14 "fmt" 15 "go/scanner" 16 "go/token" 17 "io" 18 "regexp" 19 "strconv" 20 "text/template" 21 ) 22 23 // ---------------------------------------------------------------------------- 24 // Implementation of FormatSelections 25 26 // A Segment describes a text segment [start, end). 27 // The zero value of a Segment is a ready-to-use empty segment. 28 type Segment struct { 29 start, end int 30 } 31 32 func (seg *Segment) isEmpty() bool { return seg.start >= seg.end } 33 34 // A Selection is an "iterator" function returning a text segment. 35 // Repeated calls to a selection return consecutive, non-overlapping, 36 // non-empty segments, followed by an infinite sequence of empty 37 // segments. The first empty segment marks the end of the selection. 38 type Selection func() Segment 39 40 // A LinkWriter writes some start or end "tag" to w for the text offset offs. 41 // It is called by FormatSelections at the start or end of each link segment. 42 type LinkWriter func(w io.Writer, offs int, start bool) 43 44 // A SegmentWriter formats a text according to selections and writes it to w. 45 // The selections parameter is a bit set indicating which selections provided 46 // to FormatSelections overlap with the text segment: If the n'th bit is set 47 // in selections, the n'th selection provided to FormatSelections is overlapping 48 // with the text. 49 type SegmentWriter func(w io.Writer, text []byte, selections int) 50 51 // FormatSelections takes a text and writes it to w using link and segment 52 // writers lw and sw as follows: lw is invoked for consecutive segment starts 53 // and ends as specified through the links selection, and sw is invoked for 54 // consecutive segments of text overlapped by the same selections as specified 55 // by selections. The link writer lw may be nil, in which case the links 56 // Selection is ignored. 57 func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) { 58 // If we have a link writer, make the links 59 // selection the last entry in selections 60 if lw != nil { 61 selections = append(selections, links) 62 } 63 64 // compute the sequence of consecutive segment changes 65 changes := newMerger(selections) 66 67 // The i'th bit in bitset indicates that the text 68 // at the current offset is covered by selections[i]. 69 bitset := 0 70 lastOffs := 0 71 72 // Text segments are written in a delayed fashion 73 // such that consecutive segments belonging to the 74 // same selection can be combined (peephole optimization). 75 // last describes the last segment which has not yet been written. 76 var last struct { 77 begin, end int // valid if begin < end 78 bitset int 79 } 80 81 // flush writes the last delayed text segment 82 flush := func() { 83 if last.begin < last.end { 84 sw(w, text[last.begin:last.end], last.bitset) 85 } 86 last.begin = last.end // invalidate last 87 } 88 89 // segment runs the segment [lastOffs, end) with the selection 90 // indicated by bitset through the segment peephole optimizer. 91 segment := func(end int) { 92 if lastOffs < end { // ignore empty segments 93 if last.end != lastOffs || last.bitset != bitset { 94 // the last segment is not adjacent to or 95 // differs from the new one 96 flush() 97 // start a new segment 98 last.begin = lastOffs 99 } 100 last.end = end 101 last.bitset = bitset 102 } 103 } 104 105 for { 106 // get the next segment change 107 index, offs, start := changes.next() 108 if index < 0 || offs > len(text) { 109 // no more segment changes or the next change 110 // is past the end of the text - we're done 111 break 112 } 113 // determine the kind of segment change 114 if lw != nil && index == len(selections)-1 { 115 // we have a link segment change (see start of this function): 116 // format the previous selection segment, write the 117 // link tag and start a new selection segment 118 segment(offs) 119 flush() 120 lastOffs = offs 121 lw(w, offs, start) 122 } else { 123 // we have a selection change: 124 // format the previous selection segment, determine 125 // the new selection bitset and start a new segment 126 segment(offs) 127 lastOffs = offs 128 mask := 1 << uint(index) 129 if start { 130 bitset |= mask 131 } else { 132 bitset &^= mask 133 } 134 } 135 } 136 segment(len(text)) 137 flush() 138 } 139 140 // A merger merges a slice of Selections and produces a sequence of 141 // consecutive segment change events through repeated next() calls. 142 type merger struct { 143 selections []Selection 144 segments []Segment // segments[i] is the next segment of selections[i] 145 } 146 147 const infinity int = 2e9 148 149 func newMerger(selections []Selection) *merger { 150 segments := make([]Segment, len(selections)) 151 for i, sel := range selections { 152 segments[i] = Segment{infinity, infinity} 153 if sel != nil { 154 if seg := sel(); !seg.isEmpty() { 155 segments[i] = seg 156 } 157 } 158 } 159 return &merger{selections, segments} 160 } 161 162 // next returns the next segment change: index specifies the Selection 163 // to which the segment belongs, offs is the segment start or end offset 164 // as determined by the start value. If there are no more segment changes, 165 // next returns an index value < 0. 166 func (m *merger) next() (index, offs int, start bool) { 167 // find the next smallest offset where a segment starts or ends 168 offs = infinity 169 index = -1 170 for i, seg := range m.segments { 171 switch { 172 case seg.start < offs: 173 offs = seg.start 174 index = i 175 start = true 176 case seg.end < offs: 177 offs = seg.end 178 index = i 179 start = false 180 } 181 } 182 if index < 0 { 183 // no offset found => all selections merged 184 return 185 } 186 // offset found - it's either the start or end offset but 187 // either way it is ok to consume the start offset: set it 188 // to infinity so it won't be considered in the following 189 // next call 190 m.segments[index].start = infinity 191 if start { 192 return 193 } 194 // end offset found - consume it 195 m.segments[index].end = infinity 196 // advance to the next segment for that selection 197 seg := m.selections[index]() 198 if !seg.isEmpty() { 199 m.segments[index] = seg 200 } 201 return 202 } 203 204 // ---------------------------------------------------------------------------- 205 // Implementation of FormatText 206 207 // lineSelection returns the line segments for text as a Selection. 208 func lineSelection(text []byte) Selection { 209 i, j := 0, 0 210 return func() (seg Segment) { 211 // find next newline, if any 212 for j < len(text) { 213 j++ 214 if text[j-1] == '\n' { 215 break 216 } 217 } 218 if i < j { 219 // text[i:j] constitutes a line 220 seg = Segment{i, j} 221 i = j 222 } 223 return 224 } 225 } 226 227 // tokenSelection returns, as a selection, the sequence of 228 // consecutive occurrences of token sel in the Go src text. 229 func tokenSelection(src []byte, sel token.Token) Selection { 230 var s scanner.Scanner 231 fset := token.NewFileSet() 232 file := fset.AddFile("", fset.Base(), len(src)) 233 s.Init(file, src, nil, scanner.ScanComments) 234 return func() (seg Segment) { 235 for { 236 pos, tok, lit := s.Scan() 237 if tok == token.EOF { 238 break 239 } 240 offs := file.Offset(pos) 241 if tok == sel { 242 seg = Segment{offs, offs + len(lit)} 243 break 244 } 245 } 246 return 247 } 248 } 249 250 // makeSelection is a helper function to make a Selection from a slice of pairs. 251 // Pairs describing empty segments are ignored. 252 func makeSelection(matches [][]int) Selection { 253 i := 0 254 return func() Segment { 255 for i < len(matches) { 256 m := matches[i] 257 i++ 258 if m[0] < m[1] { 259 // non-empty segment 260 return Segment{m[0], m[1]} 261 } 262 } 263 return Segment{} 264 } 265 } 266 267 // regexpSelection computes the Selection for the regular expression expr in text. 268 func regexpSelection(text []byte, expr string) Selection { 269 var matches [][]int 270 if rx, err := regexp.Compile(expr); err == nil { 271 matches = rx.FindAllIndex(text, -1) 272 } 273 return makeSelection(matches) 274 } 275 276 var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`) 277 278 // RangeSelection computes the Selection for a text range described 279 // by the argument str; the range description must match the selRx 280 // regular expression. 281 func RangeSelection(str string) Selection { 282 m := selRx.FindStringSubmatch(str) 283 if len(m) >= 2 { 284 from, _ := strconv.Atoi(m[1]) 285 to, _ := strconv.Atoi(m[2]) 286 if from < to { 287 return makeSelection([][]int{{from, to}}) 288 } 289 } 290 return nil 291 } 292 293 // Span tags for all the possible selection combinations that may 294 // be generated by FormatText. Selections are indicated by a bitset, 295 // and the value of the bitset specifies the tag to be used. 296 // 297 // bit 0: comments 298 // bit 1: highlights 299 // bit 2: selections 300 var startTags = [][]byte{ 301 /* 000 */ []byte(``), 302 /* 001 */ []byte(`<span class="comment">`), 303 /* 010 */ []byte(`<span class="highlight">`), 304 /* 011 */ []byte(`<span class="highlight-comment">`), 305 /* 100 */ []byte(`<span class="selection">`), 306 /* 101 */ []byte(`<span class="selection-comment">`), 307 /* 110 */ []byte(`<span class="selection-highlight">`), 308 /* 111 */ []byte(`<span class="selection-highlight-comment">`), 309 } 310 311 var endTag = []byte(`</span>`) 312 313 func selectionTag(w io.Writer, text []byte, selections int) { 314 if selections < len(startTags) { 315 if tag := startTags[selections]; len(tag) > 0 { 316 w.Write(tag) 317 template.HTMLEscape(w, text) 318 w.Write(endTag) 319 return 320 } 321 } 322 template.HTMLEscape(w, text) 323 } 324 325 // FormatText HTML-escapes text and writes it to w. 326 // Consecutive text segments are wrapped in HTML spans (with tags as 327 // defined by startTags and endTag) as follows: 328 // 329 // - if line >= 0, line number (ln) spans are inserted before each line, 330 // starting with the value of line 331 // - if the text is Go source, comments get the "comment" span class 332 // - each occurrence of the regular expression pattern gets the "highlight" 333 // span class 334 // - text segments covered by selection get the "selection" span class 335 // 336 // Comments, highlights, and selections may overlap arbitrarily; the respective 337 // HTML span classes are specified in the startTags variable. 338 func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) { 339 var comments, highlights Selection 340 if goSource { 341 comments = tokenSelection(text, token.COMMENT) 342 } 343 if pattern != "" { 344 highlights = regexpSelection(text, pattern) 345 } 346 if line >= 0 || comments != nil || highlights != nil || selection != nil { 347 var lineTag LinkWriter 348 if line >= 0 { 349 lineTag = func(w io.Writer, _ int, start bool) { 350 if start { 351 fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line) 352 line++ 353 } 354 } 355 } 356 FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection) 357 } else { 358 template.HTMLEscape(w, text) 359 } 360 }