github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/russross/blackfriday/markdown.go (about)

     1  //
     2  // Blackfriday Markdown Processor
     3  // Available at http://yougam/libraries/russross/blackfriday
     4  //
     5  // Copyright © 2011 Russ Ross <russ@russross.com>.
     6  // Distributed under the Simplified BSD License.
     7  // See README.md for details.
     8  //
     9  
    10  //
    11  //
    12  // Markdown parsing and processing
    13  //
    14  //
    15  
    16  // Blackfriday markdown processor.
    17  //
    18  // Translates plain text with simple formatting rules into HTML or LaTeX.
    19  package blackfriday
    20  
    21  import (
    22  	"bytes"
    23  	"fmt"
    24  	"strings"
    25  	"unicode/utf8"
    26  )
    27  
    28  const VERSION = "1.5"
    29  
    30  // These are the supported markdown parsing extensions.
    31  // OR these values together to select multiple extensions.
    32  const (
    33  	EXTENSION_NO_INTRA_EMPHASIS          = 1 << iota // ignore emphasis markers inside words
    34  	EXTENSION_TABLES                                 // render tables
    35  	EXTENSION_FENCED_CODE                            // render fenced code blocks
    36  	EXTENSION_AUTOLINK                               // detect embedded URLs that are not explicitly marked
    37  	EXTENSION_STRIKETHROUGH                          // strikethrough text using ~~test~~
    38  	EXTENSION_LAX_HTML_BLOCKS                        // loosen up HTML block parsing rules
    39  	EXTENSION_SPACE_HEADERS                          // be strict about prefix header rules
    40  	EXTENSION_HARD_LINE_BREAK                        // translate newlines into line breaks
    41  	EXTENSION_TAB_SIZE_EIGHT                         // expand tabs to eight spaces instead of four
    42  	EXTENSION_FOOTNOTES                              // Pandoc-style footnotes
    43  	EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK             // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
    44  	EXTENSION_HEADER_IDS                             // specify header IDs  with {#id}
    45  	EXTENSION_TITLEBLOCK                             // Titleblock ala pandoc
    46  	EXTENSION_AUTO_HEADER_IDS                        // Create the header ID from the text
    47  	EXTENSION_BACKSLASH_LINE_BREAK                   // translate trailing backslashes into line breaks
    48  	EXTENSION_DEFINITION_LISTS                       // render definition lists
    49  
    50  	commonHtmlFlags = 0 |
    51  		HTML_USE_XHTML |
    52  		HTML_USE_SMARTYPANTS |
    53  		HTML_SMARTYPANTS_FRACTIONS |
    54  		HTML_SMARTYPANTS_DASHES |
    55  		HTML_SMARTYPANTS_LATEX_DASHES
    56  
    57  	commonExtensions = 0 |
    58  		EXTENSION_NO_INTRA_EMPHASIS |
    59  		EXTENSION_TABLES |
    60  		EXTENSION_FENCED_CODE |
    61  		EXTENSION_AUTOLINK |
    62  		EXTENSION_STRIKETHROUGH |
    63  		EXTENSION_SPACE_HEADERS |
    64  		EXTENSION_HEADER_IDS |
    65  		EXTENSION_BACKSLASH_LINE_BREAK |
    66  		EXTENSION_DEFINITION_LISTS
    67  )
    68  
    69  // These are the possible flag values for the link renderer.
    70  // Only a single one of these values will be used; they are not ORed together.
    71  // These are mostly of interest if you are writing a new output format.
    72  const (
    73  	LINK_TYPE_NOT_AUTOLINK = iota
    74  	LINK_TYPE_NORMAL
    75  	LINK_TYPE_EMAIL
    76  )
    77  
    78  // These are the possible flag values for the ListItem renderer.
    79  // Multiple flag values may be ORed together.
    80  // These are mostly of interest if you are writing a new output format.
    81  const (
    82  	LIST_TYPE_ORDERED = 1 << iota
    83  	LIST_TYPE_DEFINITION
    84  	LIST_TYPE_TERM
    85  	LIST_ITEM_CONTAINS_BLOCK
    86  	LIST_ITEM_BEGINNING_OF_LIST
    87  	LIST_ITEM_END_OF_LIST
    88  )
    89  
    90  // These are the possible flag values for the table cell renderer.
    91  // Only a single one of these values will be used; they are not ORed together.
    92  // These are mostly of interest if you are writing a new output format.
    93  const (
    94  	TABLE_ALIGNMENT_LEFT = 1 << iota
    95  	TABLE_ALIGNMENT_RIGHT
    96  	TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT)
    97  )
    98  
    99  // The size of a tab stop.
   100  const (
   101  	TAB_SIZE_DEFAULT = 4
   102  	TAB_SIZE_EIGHT   = 8
   103  )
   104  
   105  // blockTags is a set of tags that are recognized as HTML block tags.
   106  // Any of these can be included in markdown text without special escaping.
   107  var blockTags = map[string]struct{}{
   108  	"blockquote": {},
   109  	"del":        {},
   110  	"div":        {},
   111  	"dl":         {},
   112  	"fieldset":   {},
   113  	"form":       {},
   114  	"h1":         {},
   115  	"h2":         {},
   116  	"h3":         {},
   117  	"h4":         {},
   118  	"h5":         {},
   119  	"h6":         {},
   120  	"iframe":     {},
   121  	"ins":        {},
   122  	"math":       {},
   123  	"noscript":   {},
   124  	"ol":         {},
   125  	"pre":        {},
   126  	"p":          {},
   127  	"script":     {},
   128  	"style":      {},
   129  	"table":      {},
   130  	"ul":         {},
   131  
   132  	// HTML5
   133  	"address":    {},
   134  	"article":    {},
   135  	"aside":      {},
   136  	"canvas":     {},
   137  	"figcaption": {},
   138  	"figure":     {},
   139  	"footer":     {},
   140  	"header":     {},
   141  	"hgroup":     {},
   142  	"main":       {},
   143  	"nav":        {},
   144  	"output":     {},
   145  	"progress":   {},
   146  	"section":    {},
   147  	"video":      {},
   148  }
   149  
   150  // Renderer is the rendering interface.
   151  // This is mostly of interest if you are implementing a new rendering format.
   152  //
   153  // When a byte slice is provided, it contains the (rendered) contents of the
   154  // element.
   155  //
   156  // When a callback is provided instead, it will write the contents of the
   157  // respective element directly to the output buffer and return true on success.
   158  // If the callback returns false, the rendering function should reset the
   159  // output buffer as though it had never been called.
   160  //
   161  // Currently Html and Latex implementations are provided
   162  type Renderer interface {
   163  	// block-level callbacks
   164  	BlockCode(out *bytes.Buffer, text []byte, lang string)
   165  	BlockQuote(out *bytes.Buffer, text []byte)
   166  	BlockHtml(out *bytes.Buffer, text []byte)
   167  	Header(out *bytes.Buffer, text func() bool, level int, id string)
   168  	HRule(out *bytes.Buffer)
   169  	List(out *bytes.Buffer, text func() bool, flags int)
   170  	ListItem(out *bytes.Buffer, text []byte, flags int)
   171  	Paragraph(out *bytes.Buffer, text func() bool)
   172  	Table(out *bytes.Buffer, header []byte, body []byte, columnData []int)
   173  	TableRow(out *bytes.Buffer, text []byte)
   174  	TableHeaderCell(out *bytes.Buffer, text []byte, flags int)
   175  	TableCell(out *bytes.Buffer, text []byte, flags int)
   176  	Footnotes(out *bytes.Buffer, text func() bool)
   177  	FootnoteItem(out *bytes.Buffer, name, text []byte, flags int)
   178  	TitleBlock(out *bytes.Buffer, text []byte)
   179  
   180  	// Span-level callbacks
   181  	AutoLink(out *bytes.Buffer, link []byte, kind int)
   182  	CodeSpan(out *bytes.Buffer, text []byte)
   183  	DoubleEmphasis(out *bytes.Buffer, text []byte)
   184  	Emphasis(out *bytes.Buffer, text []byte)
   185  	Image(out *bytes.Buffer, link []byte, title []byte, alt []byte)
   186  	LineBreak(out *bytes.Buffer)
   187  	Link(out *bytes.Buffer, link []byte, title []byte, content []byte)
   188  	RawHtmlTag(out *bytes.Buffer, tag []byte)
   189  	TripleEmphasis(out *bytes.Buffer, text []byte)
   190  	StrikeThrough(out *bytes.Buffer, text []byte)
   191  	FootnoteRef(out *bytes.Buffer, ref []byte, id int)
   192  
   193  	// Low-level callbacks
   194  	Entity(out *bytes.Buffer, entity []byte)
   195  	NormalText(out *bytes.Buffer, text []byte)
   196  
   197  	// Header and footer
   198  	DocumentHeader(out *bytes.Buffer)
   199  	DocumentFooter(out *bytes.Buffer)
   200  
   201  	GetFlags() int
   202  }
   203  
   204  // Callback functions for inline parsing. One such function is defined
   205  // for each character that triggers a response when parsing inline data.
   206  type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int
   207  
   208  // Parser holds runtime state used by the parser.
   209  // This is constructed by the Markdown function.
   210  type parser struct {
   211  	r              Renderer
   212  	refOverride    ReferenceOverrideFunc
   213  	refs           map[string]*reference
   214  	inlineCallback [256]inlineParser
   215  	flags          int
   216  	nesting        int
   217  	maxNesting     int
   218  	insideLink     bool
   219  
   220  	// Footnotes need to be ordered as well as available to quickly check for
   221  	// presence. If a ref is also a footnote, it's stored both in refs and here
   222  	// in notes. Slice is nil if footnotes not enabled.
   223  	notes []*reference
   224  }
   225  
   226  func (p *parser) getRef(refid string) (ref *reference, found bool) {
   227  	if p.refOverride != nil {
   228  		r, overridden := p.refOverride(refid)
   229  		if overridden {
   230  			if r == nil {
   231  				return nil, false
   232  			}
   233  			return &reference{
   234  				link:     []byte(r.Link),
   235  				title:    []byte(r.Title),
   236  				noteId:   0,
   237  				hasBlock: false,
   238  				text:     []byte(r.Text)}, true
   239  		}
   240  	}
   241  	// refs are case insensitive
   242  	ref, found = p.refs[strings.ToLower(refid)]
   243  	return ref, found
   244  }
   245  
   246  //
   247  //
   248  // Public interface
   249  //
   250  //
   251  
   252  // Reference represents the details of a link.
   253  // See the documentation in Options for more details on use-case.
   254  type Reference struct {
   255  	// Link is usually the URL the reference points to.
   256  	Link string
   257  	// Title is the alternate text describing the link in more detail.
   258  	Title string
   259  	// Text is the optional text to override the ref with if the syntax used was
   260  	// [refid][]
   261  	Text string
   262  }
   263  
   264  // ReferenceOverrideFunc is expected to be called with a reference string and
   265  // return either a valid Reference type that the reference string maps to or
   266  // nil. If overridden is false, the default reference logic will be executed.
   267  // See the documentation in Options for more details on use-case.
   268  type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
   269  
   270  // Options represents configurable overrides and callbacks (in addition to the
   271  // extension flag set) for configuring a Markdown parse.
   272  type Options struct {
   273  	// Extensions is a flag set of bit-wise ORed extension bits. See the
   274  	// EXTENSION_* flags defined in this package.
   275  	Extensions int
   276  
   277  	// ReferenceOverride is an optional function callback that is called every
   278  	// time a reference is resolved.
   279  	//
   280  	// In Markdown, the link reference syntax can be made to resolve a link to
   281  	// a reference instead of an inline URL, in one of the following ways:
   282  	//
   283  	//  * [link text][refid]
   284  	//  * [refid][]
   285  	//
   286  	// Usually, the refid is defined at the bottom of the Markdown document. If
   287  	// this override function is provided, the refid is passed to the override
   288  	// function first, before consulting the defined refids at the bottom. If
   289  	// the override function indicates an override did not occur, the refids at
   290  	// the bottom will be used to fill in the link details.
   291  	ReferenceOverride ReferenceOverrideFunc
   292  }
   293  
   294  // MarkdownBasic is a convenience function for simple rendering.
   295  // It processes markdown input with no extensions enabled.
   296  func MarkdownBasic(input []byte) []byte {
   297  	// set up the HTML renderer
   298  	htmlFlags := HTML_USE_XHTML
   299  	renderer := HtmlRenderer(htmlFlags, "", "")
   300  
   301  	// set up the parser
   302  	return MarkdownOptions(input, renderer, Options{Extensions: 0})
   303  }
   304  
   305  // Call Markdown with most useful extensions enabled
   306  // MarkdownCommon is a convenience function for simple rendering.
   307  // It processes markdown input with common extensions enabled, including:
   308  //
   309  // * Smartypants processing with smart fractions and LaTeX dashes
   310  //
   311  // * Intra-word emphasis suppression
   312  //
   313  // * Tables
   314  //
   315  // * Fenced code blocks
   316  //
   317  // * Autolinking
   318  //
   319  // * Strikethrough support
   320  //
   321  // * Strict header parsing
   322  //
   323  // * Custom Header IDs
   324  func MarkdownCommon(input []byte) []byte {
   325  	// set up the HTML renderer
   326  	renderer := HtmlRenderer(commonHtmlFlags, "", "")
   327  	return MarkdownOptions(input, renderer, Options{
   328  		Extensions: commonExtensions})
   329  }
   330  
   331  // Markdown is the main rendering function.
   332  // It parses and renders a block of markdown-encoded text.
   333  // The supplied Renderer is used to format the output, and extensions dictates
   334  // which non-standard extensions are enabled.
   335  //
   336  // To use the supplied Html or LaTeX renderers, see HtmlRenderer and
   337  // LatexRenderer, respectively.
   338  func Markdown(input []byte, renderer Renderer, extensions int) []byte {
   339  	return MarkdownOptions(input, renderer, Options{
   340  		Extensions: extensions})
   341  }
   342  
   343  // MarkdownOptions is just like Markdown but takes additional options through
   344  // the Options struct.
   345  func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
   346  	// no point in parsing if we can't render
   347  	if renderer == nil {
   348  		return nil
   349  	}
   350  
   351  	extensions := opts.Extensions
   352  
   353  	// fill in the render structure
   354  	p := new(parser)
   355  	p.r = renderer
   356  	p.flags = extensions
   357  	p.refOverride = opts.ReferenceOverride
   358  	p.refs = make(map[string]*reference)
   359  	p.maxNesting = 16
   360  	p.insideLink = false
   361  
   362  	// register inline parsers
   363  	p.inlineCallback['*'] = emphasis
   364  	p.inlineCallback['_'] = emphasis
   365  	if extensions&EXTENSION_STRIKETHROUGH != 0 {
   366  		p.inlineCallback['~'] = emphasis
   367  	}
   368  	p.inlineCallback['`'] = codeSpan
   369  	p.inlineCallback['\n'] = lineBreak
   370  	p.inlineCallback['['] = link
   371  	p.inlineCallback['<'] = leftAngle
   372  	p.inlineCallback['\\'] = escape
   373  	p.inlineCallback['&'] = entity
   374  
   375  	if extensions&EXTENSION_AUTOLINK != 0 {
   376  		p.inlineCallback[':'] = autoLink
   377  	}
   378  
   379  	if extensions&EXTENSION_FOOTNOTES != 0 {
   380  		p.notes = make([]*reference, 0)
   381  	}
   382  
   383  	first := firstPass(p, input)
   384  	second := secondPass(p, first)
   385  	return second
   386  }
   387  
   388  // first pass:
   389  // - extract references
   390  // - expand tabs
   391  // - normalize newlines
   392  // - copy everything else
   393  func firstPass(p *parser, input []byte) []byte {
   394  	var out bytes.Buffer
   395  	tabSize := TAB_SIZE_DEFAULT
   396  	if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
   397  		tabSize = TAB_SIZE_EIGHT
   398  	}
   399  	beg, end := 0, 0
   400  	lastFencedCodeBlockEnd := 0
   401  	for beg < len(input) { // iterate over lines
   402  		if end = isReference(p, input[beg:], tabSize); end > 0 {
   403  			beg += end
   404  		} else { // skip to the next line
   405  			end = beg
   406  			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
   407  				end++
   408  			}
   409  
   410  			if p.flags&EXTENSION_FENCED_CODE != 0 {
   411  				// track fenced code block boundaries to suppress tab expansion
   412  				// inside them:
   413  				if beg >= lastFencedCodeBlockEnd {
   414  					if i := p.fencedCode(&out, input[beg:], false); i > 0 {
   415  						lastFencedCodeBlockEnd = beg + i
   416  					}
   417  				}
   418  			}
   419  
   420  			// add the line body if present
   421  			if end > beg {
   422  				if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
   423  					out.Write(input[beg:end])
   424  				} else {
   425  					expandTabs(&out, input[beg:end], tabSize)
   426  				}
   427  			}
   428  			out.WriteByte('\n')
   429  
   430  			if end < len(input) && input[end] == '\r' {
   431  				end++
   432  			}
   433  			if end < len(input) && input[end] == '\n' {
   434  				end++
   435  			}
   436  
   437  			beg = end
   438  		}
   439  	}
   440  
   441  	// empty input?
   442  	if out.Len() == 0 {
   443  		out.WriteByte('\n')
   444  	}
   445  
   446  	return out.Bytes()
   447  }
   448  
   449  // second pass: actual rendering
   450  func secondPass(p *parser, input []byte) []byte {
   451  	var output bytes.Buffer
   452  
   453  	p.r.DocumentHeader(&output)
   454  	p.block(&output, input)
   455  
   456  	if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 {
   457  		p.r.Footnotes(&output, func() bool {
   458  			flags := LIST_ITEM_BEGINNING_OF_LIST
   459  			for i := 0; i < len(p.notes); i += 1 {
   460  				ref := p.notes[i]
   461  				var buf bytes.Buffer
   462  				if ref.hasBlock {
   463  					flags |= LIST_ITEM_CONTAINS_BLOCK
   464  					p.block(&buf, ref.title)
   465  				} else {
   466  					p.inline(&buf, ref.title)
   467  				}
   468  				p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags)
   469  				flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK
   470  			}
   471  
   472  			return true
   473  		})
   474  	}
   475  
   476  	p.r.DocumentFooter(&output)
   477  
   478  	if p.nesting != 0 {
   479  		panic("Nesting level did not end at zero")
   480  	}
   481  
   482  	return output.Bytes()
   483  }
   484  
   485  //
   486  // Link references
   487  //
   488  // This section implements support for references that (usually) appear
   489  // as footnotes in a document, and can be referenced anywhere in the document.
   490  // The basic format is:
   491  //
   492  //    [1]: http://www.google.com/ "Google"
   493  //    [2]: http://www.yougam/libraries/ "Github"
   494  //
   495  // Anywhere in the document, the reference can be linked by referring to its
   496  // label, i.e., 1 and 2 in this example, as in:
   497  //
   498  //    This library is hosted on [Github][2], a git hosting site.
   499  //
   500  // Actual footnotes as specified in Pandoc and supported by some other Markdown
   501  // libraries such as php-markdown are also taken care of. They look like this:
   502  //
   503  //    This sentence needs a bit of further explanation.[^note]
   504  //
   505  //    [^note]: This is the explanation.
   506  //
   507  // Footnotes should be placed at the end of the document in an ordered list.
   508  // Inline footnotes such as:
   509  //
   510  //    Inline footnotes^[Not supported.] also exist.
   511  //
   512  // are not yet supported.
   513  
   514  // References are parsed and stored in this struct.
   515  type reference struct {
   516  	link     []byte
   517  	title    []byte
   518  	noteId   int // 0 if not a footnote ref
   519  	hasBlock bool
   520  	text     []byte
   521  }
   522  
   523  func (r *reference) String() string {
   524  	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
   525  		r.link, r.title, r.text, r.noteId, r.hasBlock)
   526  }
   527  
   528  // Check whether or not data starts with a reference link.
   529  // If so, it is parsed and stored in the list of references
   530  // (in the render struct).
   531  // Returns the number of bytes to skip to move past it,
   532  // or zero if the first line is not a reference.
   533  func isReference(p *parser, data []byte, tabSize int) int {
   534  	// up to 3 optional leading spaces
   535  	if len(data) < 4 {
   536  		return 0
   537  	}
   538  	i := 0
   539  	for i < 3 && data[i] == ' ' {
   540  		i++
   541  	}
   542  
   543  	noteId := 0
   544  
   545  	// id part: anything but a newline between brackets
   546  	if data[i] != '[' {
   547  		return 0
   548  	}
   549  	i++
   550  	if p.flags&EXTENSION_FOOTNOTES != 0 {
   551  		if i < len(data) && data[i] == '^' {
   552  			// we can set it to anything here because the proper noteIds will
   553  			// be assigned later during the second pass. It just has to be != 0
   554  			noteId = 1
   555  			i++
   556  		}
   557  	}
   558  	idOffset := i
   559  	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
   560  		i++
   561  	}
   562  	if i >= len(data) || data[i] != ']' {
   563  		return 0
   564  	}
   565  	idEnd := i
   566  
   567  	// spacer: colon (space | tab)* newline? (space | tab)*
   568  	i++
   569  	if i >= len(data) || data[i] != ':' {
   570  		return 0
   571  	}
   572  	i++
   573  	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   574  		i++
   575  	}
   576  	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
   577  		i++
   578  		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
   579  			i++
   580  		}
   581  	}
   582  	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   583  		i++
   584  	}
   585  	if i >= len(data) {
   586  		return 0
   587  	}
   588  
   589  	var (
   590  		linkOffset, linkEnd   int
   591  		titleOffset, titleEnd int
   592  		lineEnd               int
   593  		raw                   []byte
   594  		hasBlock              bool
   595  	)
   596  
   597  	if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 {
   598  		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
   599  		lineEnd = linkEnd
   600  	} else {
   601  		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
   602  	}
   603  	if lineEnd == 0 {
   604  		return 0
   605  	}
   606  
   607  	// a valid ref has been found
   608  
   609  	ref := &reference{
   610  		noteId:   noteId,
   611  		hasBlock: hasBlock,
   612  	}
   613  
   614  	if noteId > 0 {
   615  		// reusing the link field for the id since footnotes don't have links
   616  		ref.link = data[idOffset:idEnd]
   617  		// if footnote, it's not really a title, it's the contained text
   618  		ref.title = raw
   619  	} else {
   620  		ref.link = data[linkOffset:linkEnd]
   621  		ref.title = data[titleOffset:titleEnd]
   622  	}
   623  
   624  	// id matches are case-insensitive
   625  	id := string(bytes.ToLower(data[idOffset:idEnd]))
   626  
   627  	p.refs[id] = ref
   628  
   629  	return lineEnd
   630  }
   631  
   632  func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
   633  	// link: whitespace-free sequence, optionally between angle brackets
   634  	if data[i] == '<' {
   635  		i++
   636  	}
   637  	linkOffset = i
   638  	if i == len(data) {
   639  		return
   640  	}
   641  	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
   642  		i++
   643  	}
   644  	linkEnd = i
   645  	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
   646  		linkOffset++
   647  		linkEnd--
   648  	}
   649  
   650  	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
   651  	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   652  		i++
   653  	}
   654  	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
   655  		return
   656  	}
   657  
   658  	// compute end-of-line
   659  	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
   660  		lineEnd = i
   661  	}
   662  	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
   663  		lineEnd++
   664  	}
   665  
   666  	// optional (space|tab)* spacer after a newline
   667  	if lineEnd > 0 {
   668  		i = lineEnd + 1
   669  		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   670  			i++
   671  		}
   672  	}
   673  
   674  	// optional title: any non-newline sequence enclosed in '"() alone on its line
   675  	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
   676  		i++
   677  		titleOffset = i
   678  
   679  		// look for EOL
   680  		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
   681  			i++
   682  		}
   683  		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
   684  			titleEnd = i + 1
   685  		} else {
   686  			titleEnd = i
   687  		}
   688  
   689  		// step back
   690  		i--
   691  		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
   692  			i--
   693  		}
   694  		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
   695  			lineEnd = titleEnd
   696  			titleEnd = i
   697  		}
   698  	}
   699  
   700  	return
   701  }
   702  
   703  // The first bit of this logic is the same as (*parser).listItem, but the rest
   704  // is much simpler. This function simply finds the entire block and shifts it
   705  // over by one tab if it is indeed a block (just returns the line if it's not).
   706  // blockEnd is the end of the section in the input buffer, and contents is the
   707  // extracted text that was shifted over one tab. It will need to be rendered at
   708  // the end of the document.
   709  func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
   710  	if i == 0 || len(data) == 0 {
   711  		return
   712  	}
   713  
   714  	// skip leading whitespace on first line
   715  	for i < len(data) && data[i] == ' ' {
   716  		i++
   717  	}
   718  
   719  	blockStart = i
   720  
   721  	// find the end of the line
   722  	blockEnd = i
   723  	for i < len(data) && data[i-1] != '\n' {
   724  		i++
   725  	}
   726  
   727  	// get working buffer
   728  	var raw bytes.Buffer
   729  
   730  	// put the first line into the working buffer
   731  	raw.Write(data[blockEnd:i])
   732  	blockEnd = i
   733  
   734  	// process the following lines
   735  	containsBlankLine := false
   736  
   737  gatherLines:
   738  	for blockEnd < len(data) {
   739  		i++
   740  
   741  		// find the end of this line
   742  		for i < len(data) && data[i-1] != '\n' {
   743  			i++
   744  		}
   745  
   746  		// if it is an empty line, guess that it is part of this item
   747  		// and move on to the next line
   748  		if p.isEmpty(data[blockEnd:i]) > 0 {
   749  			containsBlankLine = true
   750  			blockEnd = i
   751  			continue
   752  		}
   753  
   754  		n := 0
   755  		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
   756  			// this is the end of the block.
   757  			// we don't want to include this last line in the index.
   758  			break gatherLines
   759  		}
   760  
   761  		// if there were blank lines before this one, insert a new one now
   762  		if containsBlankLine {
   763  			raw.WriteByte('\n')
   764  			containsBlankLine = false
   765  		}
   766  
   767  		// get rid of that first tab, write to buffer
   768  		raw.Write(data[blockEnd+n : i])
   769  		hasBlock = true
   770  
   771  		blockEnd = i
   772  	}
   773  
   774  	if data[blockEnd-1] != '\n' {
   775  		raw.WriteByte('\n')
   776  	}
   777  
   778  	contents = raw.Bytes()
   779  
   780  	return
   781  }
   782  
   783  //
   784  //
   785  // Miscellaneous helper functions
   786  //
   787  //
   788  
   789  // Test if a character is a punctuation symbol.
   790  // Taken from a private function in regexp in the stdlib.
   791  func ispunct(c byte) bool {
   792  	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
   793  		if c == r {
   794  			return true
   795  		}
   796  	}
   797  	return false
   798  }
   799  
   800  // Test if a character is a whitespace character.
   801  func isspace(c byte) bool {
   802  	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
   803  }
   804  
   805  // Test if a character is letter.
   806  func isletter(c byte) bool {
   807  	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
   808  }
   809  
   810  // Test if a character is a letter or a digit.
   811  // TODO: check when this is looking for ASCII alnum and when it should use unicode
   812  func isalnum(c byte) bool {
   813  	return (c >= '0' && c <= '9') || isletter(c)
   814  }
   815  
   816  // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
   817  // always ends output with a newline
   818  func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
   819  	// first, check for common cases: no tabs, or only tabs at beginning of line
   820  	i, prefix := 0, 0
   821  	slowcase := false
   822  	for i = 0; i < len(line); i++ {
   823  		if line[i] == '\t' {
   824  			if prefix == i {
   825  				prefix++
   826  			} else {
   827  				slowcase = true
   828  				break
   829  			}
   830  		}
   831  	}
   832  
   833  	// no need to decode runes if all tabs are at the beginning of the line
   834  	if !slowcase {
   835  		for i = 0; i < prefix*tabSize; i++ {
   836  			out.WriteByte(' ')
   837  		}
   838  		out.Write(line[prefix:])
   839  		return
   840  	}
   841  
   842  	// the slow case: we need to count runes to figure out how
   843  	// many spaces to insert for each tab
   844  	column := 0
   845  	i = 0
   846  	for i < len(line) {
   847  		start := i
   848  		for i < len(line) && line[i] != '\t' {
   849  			_, size := utf8.DecodeRune(line[i:])
   850  			i += size
   851  			column++
   852  		}
   853  
   854  		if i > start {
   855  			out.Write(line[start:i])
   856  		}
   857  
   858  		if i >= len(line) {
   859  			break
   860  		}
   861  
   862  		for {
   863  			out.WriteByte(' ')
   864  			column++
   865  			if column%tabSize == 0 {
   866  				break
   867  			}
   868  		}
   869  
   870  		i++
   871  	}
   872  }
   873  
   874  // Find if a line counts as indented or not.
   875  // Returns number of characters the indent is (0 = not indented).
   876  func isIndented(data []byte, indentSize int) int {
   877  	if len(data) == 0 {
   878  		return 0
   879  	}
   880  	if data[0] == '\t' {
   881  		return 1
   882  	}
   883  	if len(data) < indentSize {
   884  		return 0
   885  	}
   886  	for i := 0; i < indentSize; i++ {
   887  		if data[i] != ' ' {
   888  			return 0
   889  		}
   890  	}
   891  	return indentSize
   892  }
   893  
   894  // Create a url-safe slug for fragments
   895  func slugify(in []byte) []byte {
   896  	if len(in) == 0 {
   897  		return in
   898  	}
   899  	out := make([]byte, 0, len(in))
   900  	sym := false
   901  
   902  	for _, ch := range in {
   903  		if isalnum(ch) {
   904  			sym = false
   905  			out = append(out, ch)
   906  		} else if sym {
   907  			continue
   908  		} else {
   909  			out = append(out, '-')
   910  			sym = true
   911  		}
   912  	}
   913  	var a, b int
   914  	var ch byte
   915  	for a, ch = range out {
   916  		if ch != '-' {
   917  			break
   918  		}
   919  	}
   920  	for b = len(out) - 1; b > 0; b-- {
   921  		if out[b] != '-' {
   922  			break
   923  		}
   924  	}
   925  	return out[a : b+1]
   926  }