github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/website/format.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements FormatSelections and FormatText.
     6  // FormatText is used to HTML-format Go and non-Go source
     7  // text with line numbers and highlighted sections. It is
     8  // built on top of FormatSelections, a generic formatter
     9  // for "selected" text.
    10  
    11  package main
    12  
    13  import (
    14  	"fmt"
    15  	"go/scanner"
    16  	"go/token"
    17  	"io"
    18  	"regexp"
    19  	"strconv"
    20  	"text/template"
    21  )
    22  
    23  // ----------------------------------------------------------------------------
    24  // Implementation of FormatSelections
    25  
    26  // A Selection is a function returning offset pairs []int{a, b}
    27  // describing consecutive non-overlapping text segments [a, b).
    28  // If there are no more segments, a Selection must return nil.
    29  //
    30  // TODO It's more efficient to return a pair (a, b int) instead
    31  //      of creating lots of slices. Need to determine how to
    32  //      indicate the end of a Selection.
    33  //
    34  type Selection func() []int
    35  
    36  // A LinkWriter writes some start or end "tag" to w for the text offset offs.
    37  // It is called by FormatSelections at the start or end of each link segment.
    38  //
    39  type LinkWriter func(w io.Writer, offs int, start bool)
    40  
    41  // A SegmentWriter formats a text according to selections and writes it to w.
    42  // The selections parameter is a bit set indicating which selections provided
    43  // to FormatSelections overlap with the text segment: If the n'th bit is set
    44  // in selections, the n'th selection provided to FormatSelections is overlapping
    45  // with the text.
    46  //
    47  type SegmentWriter func(w io.Writer, text []byte, selections int)
    48  
    49  // FormatSelections takes a text and writes it to w using link and segment
    50  // writers lw and sw as follows: lw is invoked for consecutive segment starts
    51  // and ends as specified through the links selection, and sw is invoked for
    52  // consecutive segments of text overlapped by the same selections as specified
    53  // by selections. The link writer lw may be nil, in which case the links
    54  // Selection is ignored.
    55  //
    56  func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
    57  	// If we have a link writer, make the links
    58  	// selection the last entry in selections
    59  	if lw != nil {
    60  		selections = append(selections, links)
    61  	}
    62  
    63  	// compute the sequence of consecutive segment changes
    64  	changes := newMerger(selections)
    65  
    66  	// The i'th bit in bitset indicates that the text
    67  	// at the current offset is covered by selections[i].
    68  	bitset := 0
    69  	lastOffs := 0
    70  
    71  	// Text segments are written in a delayed fashion
    72  	// such that consecutive segments belonging to the
    73  	// same selection can be combined (peephole optimization).
    74  	// last describes the last segment which has not yet been written.
    75  	var last struct {
    76  		begin, end int // valid if begin < end
    77  		bitset     int
    78  	}
    79  
    80  	// flush writes the last delayed text segment
    81  	flush := func() {
    82  		if last.begin < last.end {
    83  			sw(w, text[last.begin:last.end], last.bitset)
    84  		}
    85  		last.begin = last.end // invalidate last
    86  	}
    87  
    88  	// segment runs the segment [lastOffs, end) with the selection
    89  	// indicated by bitset through the segment peephole optimizer.
    90  	segment := func(end int) {
    91  		if lastOffs < end { // ignore empty segments
    92  			if last.end != lastOffs || last.bitset != bitset {
    93  				// the last segment is not adjacent to or
    94  				// differs from the new one
    95  				flush()
    96  				// start a new segment
    97  				last.begin = lastOffs
    98  			}
    99  			last.end = end
   100  			last.bitset = bitset
   101  		}
   102  	}
   103  
   104  	for {
   105  		// get the next segment change
   106  		index, offs, start := changes.next()
   107  		if index < 0 || offs > len(text) {
   108  			// no more segment changes or the next change
   109  			// is past the end of the text - we're done
   110  			break
   111  		}
   112  		// determine the kind of segment change
   113  		if lw != nil && index == len(selections)-1 {
   114  			// we have a link segment change (see start of this function):
   115  			// format the previous selection segment, write the
   116  			// link tag and start a new selection segment
   117  			segment(offs)
   118  			flush()
   119  			lastOffs = offs
   120  			lw(w, offs, start)
   121  		} else {
   122  			// we have a selection change:
   123  			// format the previous selection segment, determine
   124  			// the new selection bitset and start a new segment
   125  			segment(offs)
   126  			lastOffs = offs
   127  			mask := 1 << uint(index)
   128  			if start {
   129  				bitset |= mask
   130  			} else {
   131  				bitset &^= mask
   132  			}
   133  		}
   134  	}
   135  	segment(len(text))
   136  	flush()
   137  }
   138  
   139  // A merger merges a slice of Selections and produces a sequence of
   140  // consecutive segment change events through repeated next() calls.
   141  //
   142  type merger struct {
   143  	selections []Selection
   144  	segments   [][]int // segments[i] is the next segment of selections[i]
   145  }
   146  
   147  const infinity int = 2e9
   148  
   149  func newMerger(selections []Selection) *merger {
   150  	segments := make([][]int, len(selections))
   151  	for i, sel := range selections {
   152  		segments[i] = []int{infinity, infinity}
   153  		if sel != nil {
   154  			if seg := sel(); seg != nil {
   155  				segments[i] = seg
   156  			}
   157  		}
   158  	}
   159  	return &merger{selections, segments}
   160  }
   161  
   162  // next returns the next segment change: index specifies the Selection
   163  // to which the segment belongs, offs is the segment start or end offset
   164  // as determined by the start value. If there are no more segment changes,
   165  // next returns an index value < 0.
   166  //
   167  func (m *merger) next() (index, offs int, start bool) {
   168  	// find the next smallest offset where a segment starts or ends
   169  	offs = infinity
   170  	index = -1
   171  	for i, seg := range m.segments {
   172  		switch {
   173  		case seg[0] < offs:
   174  			offs = seg[0]
   175  			index = i
   176  			start = true
   177  		case seg[1] < offs:
   178  			offs = seg[1]
   179  			index = i
   180  			start = false
   181  		}
   182  	}
   183  	if index < 0 {
   184  		// no offset found => all selections merged
   185  		return
   186  	}
   187  	// offset found - it's either the start or end offset but
   188  	// either way it is ok to consume the start offset: set it
   189  	// to infinity so it won't be considered in the following
   190  	// next call
   191  	m.segments[index][0] = infinity
   192  	if start {
   193  		return
   194  	}
   195  	// end offset found - consume it
   196  	m.segments[index][1] = infinity
   197  	// advance to the next segment for that selection
   198  	seg := m.selections[index]()
   199  	if seg == nil {
   200  		return
   201  	}
   202  	m.segments[index] = seg
   203  	return
   204  }
   205  
   206  // ----------------------------------------------------------------------------
   207  // Implementation of FormatText
   208  
   209  // lineSelection returns the line segments for text as a Selection.
   210  func lineSelection(text []byte) Selection {
   211  	i, j := 0, 0
   212  	return func() (seg []int) {
   213  		// find next newline, if any
   214  		for j < len(text) {
   215  			j++
   216  			if text[j-1] == '\n' {
   217  				break
   218  			}
   219  		}
   220  		if i < j {
   221  			// text[i:j] constitutes a line
   222  			seg = []int{i, j}
   223  			i = j
   224  		}
   225  		return
   226  	}
   227  }
   228  
   229  // commentSelection returns the sequence of consecutive comments
   230  // in the Go src text as a Selection.
   231  //
   232  func commentSelection(src []byte) Selection {
   233  	var s scanner.Scanner
   234  	fset := token.NewFileSet()
   235  	file := fset.AddFile("", fset.Base(), len(src))
   236  	s.Init(file, src, nil, scanner.ScanComments)
   237  	return func() (seg []int) {
   238  		for {
   239  			pos, tok, lit := s.Scan()
   240  			if tok == token.EOF {
   241  				break
   242  			}
   243  			offs := file.Offset(pos)
   244  			if tok == token.COMMENT {
   245  				seg = []int{offs, offs + len(lit)}
   246  				break
   247  			}
   248  		}
   249  		return
   250  	}
   251  }
   252  
   253  // makeSelection is a helper function to make a Selection from a slice of pairs.
   254  func makeSelection(matches [][]int) Selection {
   255  	return func() (seg []int) {
   256  		if len(matches) > 0 {
   257  			seg = matches[0]
   258  			matches = matches[1:]
   259  		}
   260  		return
   261  	}
   262  }
   263  
   264  // regexpSelection computes the Selection for the regular expression expr in text.
   265  func regexpSelection(text []byte, expr string) Selection {
   266  	var matches [][]int
   267  	if rx, err := regexp.Compile(expr); err == nil {
   268  		matches = rx.FindAllIndex(text, -1)
   269  	}
   270  	return makeSelection(matches)
   271  }
   272  
   273  var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
   274  
   275  // rangeSelection computes the Selection for a text range described
   276  // by the argument str; the range description must match the selRx
   277  // regular expression.
   278  //
   279  func rangeSelection(str string) Selection {
   280  	m := selRx.FindStringSubmatch(str)
   281  	if len(m) >= 2 {
   282  		from, _ := strconv.Atoi(m[1])
   283  		to, _ := strconv.Atoi(m[2])
   284  		if from < to {
   285  			return makeSelection([][]int{{from, to}})
   286  		}
   287  	}
   288  	return nil
   289  }
   290  
   291  // Span tags for all the possible selection combinations that may
   292  // be generated by FormatText. Selections are indicated by a bitset,
   293  // and the value of the bitset specifies the tag to be used.
   294  //
   295  // bit 0: comments
   296  // bit 1: highlights
   297  // bit 2: selections
   298  //
   299  var startTags = [][]byte{
   300  	/* 000 */ []byte(``),
   301  	/* 001 */ []byte(`<span class="comment">`),
   302  	/* 010 */ []byte(`<span class="highlight">`),
   303  	/* 011 */ []byte(`<span class="highlight-comment">`),
   304  	/* 100 */ []byte(`<span class="selection">`),
   305  	/* 101 */ []byte(`<span class="selection-comment">`),
   306  	/* 110 */ []byte(`<span class="selection-highlight">`),
   307  	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
   308  }
   309  
   310  var endTag = []byte(`</span>`)
   311  
   312  func selectionTag(w io.Writer, text []byte, selections int) {
   313  	if selections < len(startTags) {
   314  		if tag := startTags[selections]; len(tag) > 0 {
   315  			w.Write(tag)
   316  			template.HTMLEscape(w, text)
   317  			w.Write(endTag)
   318  			return
   319  		}
   320  	}
   321  	template.HTMLEscape(w, text)
   322  }
   323  
   324  // FormatText HTML-escapes text and writes it to w.
   325  // Consecutive text segments are wrapped in HTML spans (with tags as
   326  // defined by startTags and endTag) as follows:
   327  //
   328  //	- if line >= 0, line number (ln) spans are inserted before each line,
   329  //	  starting with the value of line
   330  //	- if the text is Go source, comments get the "comment" span class
   331  //	- each occurrence of the regular expression pattern gets the "highlight"
   332  //	  span class
   333  //	- text segments covered by selection get the "selection" span class
   334  //
   335  // Comments, highlights, and selections may overlap arbitrarily; the respective
   336  // HTML span classes are specified in the startTags variable.
   337  //
   338  func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) {
   339  	var comments, highlights Selection
   340  	if goSource {
   341  		comments = commentSelection(text)
   342  	}
   343  	if pattern != "" {
   344  		highlights = regexpSelection(text, pattern)
   345  	}
   346  	if line >= 0 || comments != nil || highlights != nil || selection != nil {
   347  		var lineTag LinkWriter
   348  		if line >= 0 {
   349  			lineTag = func(w io.Writer, _ int, start bool) {
   350  				if start {
   351  					fmt.Fprintf(w, "<a id=\"L%d\"></a><span class=\"ln\">%6d</span>\t", line, line)
   352  					line++
   353  				}
   354  			}
   355  		}
   356  		FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
   357  	} else {
   358  		template.HTMLEscape(w, text)
   359  	}
   360  }