github.com/april1989/origin-go-tools@v0.0.32/godoc/format.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements FormatSelections and FormatText.
     6  // FormatText is used to HTML-format Go and non-Go source
     7  // text with line numbers and highlighted sections. It is
     8  // built on top of FormatSelections, a generic formatter
     9  // for "selected" text.
    10  
    11  package godoc
    12  
    13  import (
    14  	"fmt"
    15  	"go/scanner"
    16  	"go/token"
    17  	"io"
    18  	"regexp"
    19  	"strconv"
    20  	"text/template"
    21  )
    22  
    23  // ----------------------------------------------------------------------------
    24  // Implementation of FormatSelections
    25  
    26  // A Segment describes a text segment [start, end).
    27  // The zero value of a Segment is a ready-to-use empty segment.
    28  //
    29  type Segment struct {
    30  	start, end int
    31  }
    32  
    33  func (seg *Segment) isEmpty() bool { return seg.start >= seg.end }
    34  
    35  // A Selection is an "iterator" function returning a text segment.
    36  // Repeated calls to a selection return consecutive, non-overlapping,
    37  // non-empty segments, followed by an infinite sequence of empty
    38  // segments. The first empty segment marks the end of the selection.
    39  //
    40  type Selection func() Segment
    41  
    42  // A LinkWriter writes some start or end "tag" to w for the text offset offs.
    43  // It is called by FormatSelections at the start or end of each link segment.
    44  //
    45  type LinkWriter func(w io.Writer, offs int, start bool)
    46  
    47  // A SegmentWriter formats a text according to selections and writes it to w.
    48  // The selections parameter is a bit set indicating which selections provided
    49  // to FormatSelections overlap with the text segment: If the n'th bit is set
    50  // in selections, the n'th selection provided to FormatSelections is overlapping
    51  // with the text.
    52  //
    53  type SegmentWriter func(w io.Writer, text []byte, selections int)
    54  
    55  // FormatSelections takes a text and writes it to w using link and segment
    56  // writers lw and sw as follows: lw is invoked for consecutive segment starts
    57  // and ends as specified through the links selection, and sw is invoked for
    58  // consecutive segments of text overlapped by the same selections as specified
    59  // by selections. The link writer lw may be nil, in which case the links
    60  // Selection is ignored.
    61  //
    62  func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
    63  	// If we have a link writer, make the links
    64  	// selection the last entry in selections
    65  	if lw != nil {
    66  		selections = append(selections, links)
    67  	}
    68  
    69  	// compute the sequence of consecutive segment changes
    70  	changes := newMerger(selections)
    71  
    72  	// The i'th bit in bitset indicates that the text
    73  	// at the current offset is covered by selections[i].
    74  	bitset := 0
    75  	lastOffs := 0
    76  
    77  	// Text segments are written in a delayed fashion
    78  	// such that consecutive segments belonging to the
    79  	// same selection can be combined (peephole optimization).
    80  	// last describes the last segment which has not yet been written.
    81  	var last struct {
    82  		begin, end int // valid if begin < end
    83  		bitset     int
    84  	}
    85  
    86  	// flush writes the last delayed text segment
    87  	flush := func() {
    88  		if last.begin < last.end {
    89  			sw(w, text[last.begin:last.end], last.bitset)
    90  		}
    91  		last.begin = last.end // invalidate last
    92  	}
    93  
    94  	// segment runs the segment [lastOffs, end) with the selection
    95  	// indicated by bitset through the segment peephole optimizer.
    96  	segment := func(end int) {
    97  		if lastOffs < end { // ignore empty segments
    98  			if last.end != lastOffs || last.bitset != bitset {
    99  				// the last segment is not adjacent to or
   100  				// differs from the new one
   101  				flush()
   102  				// start a new segment
   103  				last.begin = lastOffs
   104  			}
   105  			last.end = end
   106  			last.bitset = bitset
   107  		}
   108  	}
   109  
   110  	for {
   111  		// get the next segment change
   112  		index, offs, start := changes.next()
   113  		if index < 0 || offs > len(text) {
   114  			// no more segment changes or the next change
   115  			// is past the end of the text - we're done
   116  			break
   117  		}
   118  		// determine the kind of segment change
   119  		if lw != nil && index == len(selections)-1 {
   120  			// we have a link segment change (see start of this function):
   121  			// format the previous selection segment, write the
   122  			// link tag and start a new selection segment
   123  			segment(offs)
   124  			flush()
   125  			lastOffs = offs
   126  			lw(w, offs, start)
   127  		} else {
   128  			// we have a selection change:
   129  			// format the previous selection segment, determine
   130  			// the new selection bitset and start a new segment
   131  			segment(offs)
   132  			lastOffs = offs
   133  			mask := 1 << uint(index)
   134  			if start {
   135  				bitset |= mask
   136  			} else {
   137  				bitset &^= mask
   138  			}
   139  		}
   140  	}
   141  	segment(len(text))
   142  	flush()
   143  }
   144  
   145  // A merger merges a slice of Selections and produces a sequence of
   146  // consecutive segment change events through repeated next() calls.
   147  //
   148  type merger struct {
   149  	selections []Selection
   150  	segments   []Segment // segments[i] is the next segment of selections[i]
   151  }
   152  
   153  const infinity int = 2e9
   154  
   155  func newMerger(selections []Selection) *merger {
   156  	segments := make([]Segment, len(selections))
   157  	for i, sel := range selections {
   158  		segments[i] = Segment{infinity, infinity}
   159  		if sel != nil {
   160  			if seg := sel(); !seg.isEmpty() {
   161  				segments[i] = seg
   162  			}
   163  		}
   164  	}
   165  	return &merger{selections, segments}
   166  }
   167  
   168  // next returns the next segment change: index specifies the Selection
   169  // to which the segment belongs, offs is the segment start or end offset
   170  // as determined by the start value. If there are no more segment changes,
   171  // next returns an index value < 0.
   172  //
   173  func (m *merger) next() (index, offs int, start bool) {
   174  	// find the next smallest offset where a segment starts or ends
   175  	offs = infinity
   176  	index = -1
   177  	for i, seg := range m.segments {
   178  		switch {
   179  		case seg.start < offs:
   180  			offs = seg.start
   181  			index = i
   182  			start = true
   183  		case seg.end < offs:
   184  			offs = seg.end
   185  			index = i
   186  			start = false
   187  		}
   188  	}
   189  	if index < 0 {
   190  		// no offset found => all selections merged
   191  		return
   192  	}
   193  	// offset found - it's either the start or end offset but
   194  	// either way it is ok to consume the start offset: set it
   195  	// to infinity so it won't be considered in the following
   196  	// next call
   197  	m.segments[index].start = infinity
   198  	if start {
   199  		return
   200  	}
   201  	// end offset found - consume it
   202  	m.segments[index].end = infinity
   203  	// advance to the next segment for that selection
   204  	seg := m.selections[index]()
   205  	if !seg.isEmpty() {
   206  		m.segments[index] = seg
   207  	}
   208  	return
   209  }
   210  
   211  // ----------------------------------------------------------------------------
   212  // Implementation of FormatText
   213  
   214  // lineSelection returns the line segments for text as a Selection.
   215  func lineSelection(text []byte) Selection {
   216  	i, j := 0, 0
   217  	return func() (seg Segment) {
   218  		// find next newline, if any
   219  		for j < len(text) {
   220  			j++
   221  			if text[j-1] == '\n' {
   222  				break
   223  			}
   224  		}
   225  		if i < j {
   226  			// text[i:j] constitutes a line
   227  			seg = Segment{i, j}
   228  			i = j
   229  		}
   230  		return
   231  	}
   232  }
   233  
   234  // tokenSelection returns, as a selection, the sequence of
   235  // consecutive occurrences of token sel in the Go src text.
   236  //
   237  func tokenSelection(src []byte, sel token.Token) Selection {
   238  	var s scanner.Scanner
   239  	fset := token.NewFileSet()
   240  	file := fset.AddFile("", fset.Base(), len(src))
   241  	s.Init(file, src, nil, scanner.ScanComments)
   242  	return func() (seg Segment) {
   243  		for {
   244  			pos, tok, lit := s.Scan()
   245  			if tok == token.EOF {
   246  				break
   247  			}
   248  			offs := file.Offset(pos)
   249  			if tok == sel {
   250  				seg = Segment{offs, offs + len(lit)}
   251  				break
   252  			}
   253  		}
   254  		return
   255  	}
   256  }
   257  
   258  // makeSelection is a helper function to make a Selection from a slice of pairs.
   259  // Pairs describing empty segments are ignored.
   260  //
   261  func makeSelection(matches [][]int) Selection {
   262  	i := 0
   263  	return func() Segment {
   264  		for i < len(matches) {
   265  			m := matches[i]
   266  			i++
   267  			if m[0] < m[1] {
   268  				// non-empty segment
   269  				return Segment{m[0], m[1]}
   270  			}
   271  		}
   272  		return Segment{}
   273  	}
   274  }
   275  
   276  // regexpSelection computes the Selection for the regular expression expr in text.
   277  func regexpSelection(text []byte, expr string) Selection {
   278  	var matches [][]int
   279  	if rx, err := regexp.Compile(expr); err == nil {
   280  		matches = rx.FindAllIndex(text, -1)
   281  	}
   282  	return makeSelection(matches)
   283  }
   284  
   285  var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
   286  
   287  // RangeSelection computes the Selection for a text range described
   288  // by the argument str; the range description must match the selRx
   289  // regular expression.
   290  func RangeSelection(str string) Selection {
   291  	m := selRx.FindStringSubmatch(str)
   292  	if len(m) >= 2 {
   293  		from, _ := strconv.Atoi(m[1])
   294  		to, _ := strconv.Atoi(m[2])
   295  		if from < to {
   296  			return makeSelection([][]int{{from, to}})
   297  		}
   298  	}
   299  	return nil
   300  }
   301  
   302  // Span tags for all the possible selection combinations that may
   303  // be generated by FormatText. Selections are indicated by a bitset,
   304  // and the value of the bitset specifies the tag to be used.
   305  //
   306  // bit 0: comments
   307  // bit 1: highlights
   308  // bit 2: selections
   309  //
   310  var startTags = [][]byte{
   311  	/* 000 */ []byte(``),
   312  	/* 001 */ []byte(`<span class="comment">`),
   313  	/* 010 */ []byte(`<span class="highlight">`),
   314  	/* 011 */ []byte(`<span class="highlight-comment">`),
   315  	/* 100 */ []byte(`<span class="selection">`),
   316  	/* 101 */ []byte(`<span class="selection-comment">`),
   317  	/* 110 */ []byte(`<span class="selection-highlight">`),
   318  	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
   319  }
   320  
   321  var endTag = []byte(`</span>`)
   322  
   323  func selectionTag(w io.Writer, text []byte, selections int) {
   324  	if selections < len(startTags) {
   325  		if tag := startTags[selections]; len(tag) > 0 {
   326  			w.Write(tag)
   327  			template.HTMLEscape(w, text)
   328  			w.Write(endTag)
   329  			return
   330  		}
   331  	}
   332  	template.HTMLEscape(w, text)
   333  }
   334  
   335  // FormatText HTML-escapes text and writes it to w.
   336  // Consecutive text segments are wrapped in HTML spans (with tags as
   337  // defined by startTags and endTag) as follows:
   338  //
   339  //	- if line >= 0, line number (ln) spans are inserted before each line,
   340  //	  starting with the value of line
   341  //	- if the text is Go source, comments get the "comment" span class
   342  //	- each occurrence of the regular expression pattern gets the "highlight"
   343  //	  span class
   344  //	- text segments covered by selection get the "selection" span class
   345  //
   346  // Comments, highlights, and selections may overlap arbitrarily; the respective
   347  // HTML span classes are specified in the startTags variable.
   348  //
   349  func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) {
   350  	var comments, highlights Selection
   351  	if goSource {
   352  		comments = tokenSelection(text, token.COMMENT)
   353  	}
   354  	if pattern != "" {
   355  		highlights = regexpSelection(text, pattern)
   356  	}
   357  	if line >= 0 || comments != nil || highlights != nil || selection != nil {
   358  		var lineTag LinkWriter
   359  		if line >= 0 {
   360  			lineTag = func(w io.Writer, _ int, start bool) {
   361  				if start {
   362  					fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line)
   363  					line++
   364  				}
   365  			}
   366  		}
   367  		FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
   368  	} else {
   369  		template.HTMLEscape(w, text)
   370  	}
   371  }