github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/russross/blackfriday/markdown.go (about) 1 // 2 // Blackfriday Markdown Processor 3 // Available at http://yougam/libraries/russross/blackfriday 4 // 5 // Copyright © 2011 Russ Ross <russ@russross.com>. 6 // Distributed under the Simplified BSD License. 7 // See README.md for details. 8 // 9 10 // 11 // 12 // Markdown parsing and processing 13 // 14 // 15 16 // Blackfriday markdown processor. 17 // 18 // Translates plain text with simple formatting rules into HTML or LaTeX. 19 package blackfriday 20 21 import ( 22 "bytes" 23 "fmt" 24 "strings" 25 "unicode/utf8" 26 ) 27 28 const VERSION = "1.5" 29 30 // These are the supported markdown parsing extensions. 31 // OR these values together to select multiple extensions. 32 const ( 33 EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words 34 EXTENSION_TABLES // render tables 35 EXTENSION_FENCED_CODE // render fenced code blocks 36 EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked 37 EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~ 38 EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules 39 EXTENSION_SPACE_HEADERS // be strict about prefix header rules 40 EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks 41 EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four 42 EXTENSION_FOOTNOTES // Pandoc-style footnotes 43 EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block 44 EXTENSION_HEADER_IDS // specify header IDs with {#id} 45 EXTENSION_TITLEBLOCK // Titleblock ala pandoc 46 EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text 47 EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks 48 EXTENSION_DEFINITION_LISTS // render definition lists 49 50 commonHtmlFlags = 0 | 51 HTML_USE_XHTML | 52 HTML_USE_SMARTYPANTS | 53 HTML_SMARTYPANTS_FRACTIONS | 54 HTML_SMARTYPANTS_DASHES | 55 HTML_SMARTYPANTS_LATEX_DASHES 56 57 commonExtensions = 0 | 58 EXTENSION_NO_INTRA_EMPHASIS | 59 EXTENSION_TABLES | 60 EXTENSION_FENCED_CODE | 61 EXTENSION_AUTOLINK | 62 EXTENSION_STRIKETHROUGH | 63 EXTENSION_SPACE_HEADERS | 64 EXTENSION_HEADER_IDS | 65 EXTENSION_BACKSLASH_LINE_BREAK | 66 EXTENSION_DEFINITION_LISTS 67 ) 68 69 // These are the possible flag values for the link renderer. 70 // Only a single one of these values will be used; they are not ORed together. 71 // These are mostly of interest if you are writing a new output format. 72 const ( 73 LINK_TYPE_NOT_AUTOLINK = iota 74 LINK_TYPE_NORMAL 75 LINK_TYPE_EMAIL 76 ) 77 78 // These are the possible flag values for the ListItem renderer. 79 // Multiple flag values may be ORed together. 80 // These are mostly of interest if you are writing a new output format. 81 const ( 82 LIST_TYPE_ORDERED = 1 << iota 83 LIST_TYPE_DEFINITION 84 LIST_TYPE_TERM 85 LIST_ITEM_CONTAINS_BLOCK 86 LIST_ITEM_BEGINNING_OF_LIST 87 LIST_ITEM_END_OF_LIST 88 ) 89 90 // These are the possible flag values for the table cell renderer. 91 // Only a single one of these values will be used; they are not ORed together. 92 // These are mostly of interest if you are writing a new output format. 93 const ( 94 TABLE_ALIGNMENT_LEFT = 1 << iota 95 TABLE_ALIGNMENT_RIGHT 96 TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT) 97 ) 98 99 // The size of a tab stop. 100 const ( 101 TAB_SIZE_DEFAULT = 4 102 TAB_SIZE_EIGHT = 8 103 ) 104 105 // blockTags is a set of tags that are recognized as HTML block tags. 106 // Any of these can be included in markdown text without special escaping. 107 var blockTags = map[string]struct{}{ 108 "blockquote": {}, 109 "del": {}, 110 "div": {}, 111 "dl": {}, 112 "fieldset": {}, 113 "form": {}, 114 "h1": {}, 115 "h2": {}, 116 "h3": {}, 117 "h4": {}, 118 "h5": {}, 119 "h6": {}, 120 "iframe": {}, 121 "ins": {}, 122 "math": {}, 123 "noscript": {}, 124 "ol": {}, 125 "pre": {}, 126 "p": {}, 127 "script": {}, 128 "style": {}, 129 "table": {}, 130 "ul": {}, 131 132 // HTML5 133 "address": {}, 134 "article": {}, 135 "aside": {}, 136 "canvas": {}, 137 "figcaption": {}, 138 "figure": {}, 139 "footer": {}, 140 "header": {}, 141 "hgroup": {}, 142 "main": {}, 143 "nav": {}, 144 "output": {}, 145 "progress": {}, 146 "section": {}, 147 "video": {}, 148 } 149 150 // Renderer is the rendering interface. 151 // This is mostly of interest if you are implementing a new rendering format. 152 // 153 // When a byte slice is provided, it contains the (rendered) contents of the 154 // element. 155 // 156 // When a callback is provided instead, it will write the contents of the 157 // respective element directly to the output buffer and return true on success. 158 // If the callback returns false, the rendering function should reset the 159 // output buffer as though it had never been called. 160 // 161 // Currently Html and Latex implementations are provided 162 type Renderer interface { 163 // block-level callbacks 164 BlockCode(out *bytes.Buffer, text []byte, lang string) 165 BlockQuote(out *bytes.Buffer, text []byte) 166 BlockHtml(out *bytes.Buffer, text []byte) 167 Header(out *bytes.Buffer, text func() bool, level int, id string) 168 HRule(out *bytes.Buffer) 169 List(out *bytes.Buffer, text func() bool, flags int) 170 ListItem(out *bytes.Buffer, text []byte, flags int) 171 Paragraph(out *bytes.Buffer, text func() bool) 172 Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) 173 TableRow(out *bytes.Buffer, text []byte) 174 TableHeaderCell(out *bytes.Buffer, text []byte, flags int) 175 TableCell(out *bytes.Buffer, text []byte, flags int) 176 Footnotes(out *bytes.Buffer, text func() bool) 177 FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) 178 TitleBlock(out *bytes.Buffer, text []byte) 179 180 // Span-level callbacks 181 AutoLink(out *bytes.Buffer, link []byte, kind int) 182 CodeSpan(out *bytes.Buffer, text []byte) 183 DoubleEmphasis(out *bytes.Buffer, text []byte) 184 Emphasis(out *bytes.Buffer, text []byte) 185 Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) 186 LineBreak(out *bytes.Buffer) 187 Link(out *bytes.Buffer, link []byte, title []byte, content []byte) 188 RawHtmlTag(out *bytes.Buffer, tag []byte) 189 TripleEmphasis(out *bytes.Buffer, text []byte) 190 StrikeThrough(out *bytes.Buffer, text []byte) 191 FootnoteRef(out *bytes.Buffer, ref []byte, id int) 192 193 // Low-level callbacks 194 Entity(out *bytes.Buffer, entity []byte) 195 NormalText(out *bytes.Buffer, text []byte) 196 197 // Header and footer 198 DocumentHeader(out *bytes.Buffer) 199 DocumentFooter(out *bytes.Buffer) 200 201 GetFlags() int 202 } 203 204 // Callback functions for inline parsing. One such function is defined 205 // for each character that triggers a response when parsing inline data. 206 type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int 207 208 // Parser holds runtime state used by the parser. 209 // This is constructed by the Markdown function. 210 type parser struct { 211 r Renderer 212 refOverride ReferenceOverrideFunc 213 refs map[string]*reference 214 inlineCallback [256]inlineParser 215 flags int 216 nesting int 217 maxNesting int 218 insideLink bool 219 220 // Footnotes need to be ordered as well as available to quickly check for 221 // presence. If a ref is also a footnote, it's stored both in refs and here 222 // in notes. Slice is nil if footnotes not enabled. 223 notes []*reference 224 } 225 226 func (p *parser) getRef(refid string) (ref *reference, found bool) { 227 if p.refOverride != nil { 228 r, overridden := p.refOverride(refid) 229 if overridden { 230 if r == nil { 231 return nil, false 232 } 233 return &reference{ 234 link: []byte(r.Link), 235 title: []byte(r.Title), 236 noteId: 0, 237 hasBlock: false, 238 text: []byte(r.Text)}, true 239 } 240 } 241 // refs are case insensitive 242 ref, found = p.refs[strings.ToLower(refid)] 243 return ref, found 244 } 245 246 // 247 // 248 // Public interface 249 // 250 // 251 252 // Reference represents the details of a link. 253 // See the documentation in Options for more details on use-case. 254 type Reference struct { 255 // Link is usually the URL the reference points to. 256 Link string 257 // Title is the alternate text describing the link in more detail. 258 Title string 259 // Text is the optional text to override the ref with if the syntax used was 260 // [refid][] 261 Text string 262 } 263 264 // ReferenceOverrideFunc is expected to be called with a reference string and 265 // return either a valid Reference type that the reference string maps to or 266 // nil. If overridden is false, the default reference logic will be executed. 267 // See the documentation in Options for more details on use-case. 268 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) 269 270 // Options represents configurable overrides and callbacks (in addition to the 271 // extension flag set) for configuring a Markdown parse. 272 type Options struct { 273 // Extensions is a flag set of bit-wise ORed extension bits. See the 274 // EXTENSION_* flags defined in this package. 275 Extensions int 276 277 // ReferenceOverride is an optional function callback that is called every 278 // time a reference is resolved. 279 // 280 // In Markdown, the link reference syntax can be made to resolve a link to 281 // a reference instead of an inline URL, in one of the following ways: 282 // 283 // * [link text][refid] 284 // * [refid][] 285 // 286 // Usually, the refid is defined at the bottom of the Markdown document. If 287 // this override function is provided, the refid is passed to the override 288 // function first, before consulting the defined refids at the bottom. If 289 // the override function indicates an override did not occur, the refids at 290 // the bottom will be used to fill in the link details. 291 ReferenceOverride ReferenceOverrideFunc 292 } 293 294 // MarkdownBasic is a convenience function for simple rendering. 295 // It processes markdown input with no extensions enabled. 296 func MarkdownBasic(input []byte) []byte { 297 // set up the HTML renderer 298 htmlFlags := HTML_USE_XHTML 299 renderer := HtmlRenderer(htmlFlags, "", "") 300 301 // set up the parser 302 return MarkdownOptions(input, renderer, Options{Extensions: 0}) 303 } 304 305 // Call Markdown with most useful extensions enabled 306 // MarkdownCommon is a convenience function for simple rendering. 307 // It processes markdown input with common extensions enabled, including: 308 // 309 // * Smartypants processing with smart fractions and LaTeX dashes 310 // 311 // * Intra-word emphasis suppression 312 // 313 // * Tables 314 // 315 // * Fenced code blocks 316 // 317 // * Autolinking 318 // 319 // * Strikethrough support 320 // 321 // * Strict header parsing 322 // 323 // * Custom Header IDs 324 func MarkdownCommon(input []byte) []byte { 325 // set up the HTML renderer 326 renderer := HtmlRenderer(commonHtmlFlags, "", "") 327 return MarkdownOptions(input, renderer, Options{ 328 Extensions: commonExtensions}) 329 } 330 331 // Markdown is the main rendering function. 332 // It parses and renders a block of markdown-encoded text. 333 // The supplied Renderer is used to format the output, and extensions dictates 334 // which non-standard extensions are enabled. 335 // 336 // To use the supplied Html or LaTeX renderers, see HtmlRenderer and 337 // LatexRenderer, respectively. 338 func Markdown(input []byte, renderer Renderer, extensions int) []byte { 339 return MarkdownOptions(input, renderer, Options{ 340 Extensions: extensions}) 341 } 342 343 // MarkdownOptions is just like Markdown but takes additional options through 344 // the Options struct. 345 func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { 346 // no point in parsing if we can't render 347 if renderer == nil { 348 return nil 349 } 350 351 extensions := opts.Extensions 352 353 // fill in the render structure 354 p := new(parser) 355 p.r = renderer 356 p.flags = extensions 357 p.refOverride = opts.ReferenceOverride 358 p.refs = make(map[string]*reference) 359 p.maxNesting = 16 360 p.insideLink = false 361 362 // register inline parsers 363 p.inlineCallback['*'] = emphasis 364 p.inlineCallback['_'] = emphasis 365 if extensions&EXTENSION_STRIKETHROUGH != 0 { 366 p.inlineCallback['~'] = emphasis 367 } 368 p.inlineCallback['`'] = codeSpan 369 p.inlineCallback['\n'] = lineBreak 370 p.inlineCallback['['] = link 371 p.inlineCallback['<'] = leftAngle 372 p.inlineCallback['\\'] = escape 373 p.inlineCallback['&'] = entity 374 375 if extensions&EXTENSION_AUTOLINK != 0 { 376 p.inlineCallback[':'] = autoLink 377 } 378 379 if extensions&EXTENSION_FOOTNOTES != 0 { 380 p.notes = make([]*reference, 0) 381 } 382 383 first := firstPass(p, input) 384 second := secondPass(p, first) 385 return second 386 } 387 388 // first pass: 389 // - extract references 390 // - expand tabs 391 // - normalize newlines 392 // - copy everything else 393 func firstPass(p *parser, input []byte) []byte { 394 var out bytes.Buffer 395 tabSize := TAB_SIZE_DEFAULT 396 if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { 397 tabSize = TAB_SIZE_EIGHT 398 } 399 beg, end := 0, 0 400 lastFencedCodeBlockEnd := 0 401 for beg < len(input) { // iterate over lines 402 if end = isReference(p, input[beg:], tabSize); end > 0 { 403 beg += end 404 } else { // skip to the next line 405 end = beg 406 for end < len(input) && input[end] != '\n' && input[end] != '\r' { 407 end++ 408 } 409 410 if p.flags&EXTENSION_FENCED_CODE != 0 { 411 // track fenced code block boundaries to suppress tab expansion 412 // inside them: 413 if beg >= lastFencedCodeBlockEnd { 414 if i := p.fencedCode(&out, input[beg:], false); i > 0 { 415 lastFencedCodeBlockEnd = beg + i 416 } 417 } 418 } 419 420 // add the line body if present 421 if end > beg { 422 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. 423 out.Write(input[beg:end]) 424 } else { 425 expandTabs(&out, input[beg:end], tabSize) 426 } 427 } 428 out.WriteByte('\n') 429 430 if end < len(input) && input[end] == '\r' { 431 end++ 432 } 433 if end < len(input) && input[end] == '\n' { 434 end++ 435 } 436 437 beg = end 438 } 439 } 440 441 // empty input? 442 if out.Len() == 0 { 443 out.WriteByte('\n') 444 } 445 446 return out.Bytes() 447 } 448 449 // second pass: actual rendering 450 func secondPass(p *parser, input []byte) []byte { 451 var output bytes.Buffer 452 453 p.r.DocumentHeader(&output) 454 p.block(&output, input) 455 456 if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { 457 p.r.Footnotes(&output, func() bool { 458 flags := LIST_ITEM_BEGINNING_OF_LIST 459 for i := 0; i < len(p.notes); i += 1 { 460 ref := p.notes[i] 461 var buf bytes.Buffer 462 if ref.hasBlock { 463 flags |= LIST_ITEM_CONTAINS_BLOCK 464 p.block(&buf, ref.title) 465 } else { 466 p.inline(&buf, ref.title) 467 } 468 p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) 469 flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK 470 } 471 472 return true 473 }) 474 } 475 476 p.r.DocumentFooter(&output) 477 478 if p.nesting != 0 { 479 panic("Nesting level did not end at zero") 480 } 481 482 return output.Bytes() 483 } 484 485 // 486 // Link references 487 // 488 // This section implements support for references that (usually) appear 489 // as footnotes in a document, and can be referenced anywhere in the document. 490 // The basic format is: 491 // 492 // [1]: http://www.google.com/ "Google" 493 // [2]: http://www.yougam/libraries/ "Github" 494 // 495 // Anywhere in the document, the reference can be linked by referring to its 496 // label, i.e., 1 and 2 in this example, as in: 497 // 498 // This library is hosted on [Github][2], a git hosting site. 499 // 500 // Actual footnotes as specified in Pandoc and supported by some other Markdown 501 // libraries such as php-markdown are also taken care of. They look like this: 502 // 503 // This sentence needs a bit of further explanation.[^note] 504 // 505 // [^note]: This is the explanation. 506 // 507 // Footnotes should be placed at the end of the document in an ordered list. 508 // Inline footnotes such as: 509 // 510 // Inline footnotes^[Not supported.] also exist. 511 // 512 // are not yet supported. 513 514 // References are parsed and stored in this struct. 515 type reference struct { 516 link []byte 517 title []byte 518 noteId int // 0 if not a footnote ref 519 hasBlock bool 520 text []byte 521 } 522 523 func (r *reference) String() string { 524 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}", 525 r.link, r.title, r.text, r.noteId, r.hasBlock) 526 } 527 528 // Check whether or not data starts with a reference link. 529 // If so, it is parsed and stored in the list of references 530 // (in the render struct). 531 // Returns the number of bytes to skip to move past it, 532 // or zero if the first line is not a reference. 533 func isReference(p *parser, data []byte, tabSize int) int { 534 // up to 3 optional leading spaces 535 if len(data) < 4 { 536 return 0 537 } 538 i := 0 539 for i < 3 && data[i] == ' ' { 540 i++ 541 } 542 543 noteId := 0 544 545 // id part: anything but a newline between brackets 546 if data[i] != '[' { 547 return 0 548 } 549 i++ 550 if p.flags&EXTENSION_FOOTNOTES != 0 { 551 if i < len(data) && data[i] == '^' { 552 // we can set it to anything here because the proper noteIds will 553 // be assigned later during the second pass. It just has to be != 0 554 noteId = 1 555 i++ 556 } 557 } 558 idOffset := i 559 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { 560 i++ 561 } 562 if i >= len(data) || data[i] != ']' { 563 return 0 564 } 565 idEnd := i 566 567 // spacer: colon (space | tab)* newline? (space | tab)* 568 i++ 569 if i >= len(data) || data[i] != ':' { 570 return 0 571 } 572 i++ 573 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 574 i++ 575 } 576 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { 577 i++ 578 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { 579 i++ 580 } 581 } 582 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 583 i++ 584 } 585 if i >= len(data) { 586 return 0 587 } 588 589 var ( 590 linkOffset, linkEnd int 591 titleOffset, titleEnd int 592 lineEnd int 593 raw []byte 594 hasBlock bool 595 ) 596 597 if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { 598 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) 599 lineEnd = linkEnd 600 } else { 601 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) 602 } 603 if lineEnd == 0 { 604 return 0 605 } 606 607 // a valid ref has been found 608 609 ref := &reference{ 610 noteId: noteId, 611 hasBlock: hasBlock, 612 } 613 614 if noteId > 0 { 615 // reusing the link field for the id since footnotes don't have links 616 ref.link = data[idOffset:idEnd] 617 // if footnote, it's not really a title, it's the contained text 618 ref.title = raw 619 } else { 620 ref.link = data[linkOffset:linkEnd] 621 ref.title = data[titleOffset:titleEnd] 622 } 623 624 // id matches are case-insensitive 625 id := string(bytes.ToLower(data[idOffset:idEnd])) 626 627 p.refs[id] = ref 628 629 return lineEnd 630 } 631 632 func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { 633 // link: whitespace-free sequence, optionally between angle brackets 634 if data[i] == '<' { 635 i++ 636 } 637 linkOffset = i 638 if i == len(data) { 639 return 640 } 641 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { 642 i++ 643 } 644 linkEnd = i 645 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { 646 linkOffset++ 647 linkEnd-- 648 } 649 650 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) 651 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 652 i++ 653 } 654 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { 655 return 656 } 657 658 // compute end-of-line 659 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { 660 lineEnd = i 661 } 662 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { 663 lineEnd++ 664 } 665 666 // optional (space|tab)* spacer after a newline 667 if lineEnd > 0 { 668 i = lineEnd + 1 669 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 670 i++ 671 } 672 } 673 674 // optional title: any non-newline sequence enclosed in '"() alone on its line 675 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { 676 i++ 677 titleOffset = i 678 679 // look for EOL 680 for i < len(data) && data[i] != '\n' && data[i] != '\r' { 681 i++ 682 } 683 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { 684 titleEnd = i + 1 685 } else { 686 titleEnd = i 687 } 688 689 // step back 690 i-- 691 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { 692 i-- 693 } 694 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { 695 lineEnd = titleEnd 696 titleEnd = i 697 } 698 } 699 700 return 701 } 702 703 // The first bit of this logic is the same as (*parser).listItem, but the rest 704 // is much simpler. This function simply finds the entire block and shifts it 705 // over by one tab if it is indeed a block (just returns the line if it's not). 706 // blockEnd is the end of the section in the input buffer, and contents is the 707 // extracted text that was shifted over one tab. It will need to be rendered at 708 // the end of the document. 709 func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { 710 if i == 0 || len(data) == 0 { 711 return 712 } 713 714 // skip leading whitespace on first line 715 for i < len(data) && data[i] == ' ' { 716 i++ 717 } 718 719 blockStart = i 720 721 // find the end of the line 722 blockEnd = i 723 for i < len(data) && data[i-1] != '\n' { 724 i++ 725 } 726 727 // get working buffer 728 var raw bytes.Buffer 729 730 // put the first line into the working buffer 731 raw.Write(data[blockEnd:i]) 732 blockEnd = i 733 734 // process the following lines 735 containsBlankLine := false 736 737 gatherLines: 738 for blockEnd < len(data) { 739 i++ 740 741 // find the end of this line 742 for i < len(data) && data[i-1] != '\n' { 743 i++ 744 } 745 746 // if it is an empty line, guess that it is part of this item 747 // and move on to the next line 748 if p.isEmpty(data[blockEnd:i]) > 0 { 749 containsBlankLine = true 750 blockEnd = i 751 continue 752 } 753 754 n := 0 755 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { 756 // this is the end of the block. 757 // we don't want to include this last line in the index. 758 break gatherLines 759 } 760 761 // if there were blank lines before this one, insert a new one now 762 if containsBlankLine { 763 raw.WriteByte('\n') 764 containsBlankLine = false 765 } 766 767 // get rid of that first tab, write to buffer 768 raw.Write(data[blockEnd+n : i]) 769 hasBlock = true 770 771 blockEnd = i 772 } 773 774 if data[blockEnd-1] != '\n' { 775 raw.WriteByte('\n') 776 } 777 778 contents = raw.Bytes() 779 780 return 781 } 782 783 // 784 // 785 // Miscellaneous helper functions 786 // 787 // 788 789 // Test if a character is a punctuation symbol. 790 // Taken from a private function in regexp in the stdlib. 791 func ispunct(c byte) bool { 792 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { 793 if c == r { 794 return true 795 } 796 } 797 return false 798 } 799 800 // Test if a character is a whitespace character. 801 func isspace(c byte) bool { 802 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v' 803 } 804 805 // Test if a character is letter. 806 func isletter(c byte) bool { 807 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 808 } 809 810 // Test if a character is a letter or a digit. 811 // TODO: check when this is looking for ASCII alnum and when it should use unicode 812 func isalnum(c byte) bool { 813 return (c >= '0' && c <= '9') || isletter(c) 814 } 815 816 // Replace tab characters with spaces, aligning to the next TAB_SIZE column. 817 // always ends output with a newline 818 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { 819 // first, check for common cases: no tabs, or only tabs at beginning of line 820 i, prefix := 0, 0 821 slowcase := false 822 for i = 0; i < len(line); i++ { 823 if line[i] == '\t' { 824 if prefix == i { 825 prefix++ 826 } else { 827 slowcase = true 828 break 829 } 830 } 831 } 832 833 // no need to decode runes if all tabs are at the beginning of the line 834 if !slowcase { 835 for i = 0; i < prefix*tabSize; i++ { 836 out.WriteByte(' ') 837 } 838 out.Write(line[prefix:]) 839 return 840 } 841 842 // the slow case: we need to count runes to figure out how 843 // many spaces to insert for each tab 844 column := 0 845 i = 0 846 for i < len(line) { 847 start := i 848 for i < len(line) && line[i] != '\t' { 849 _, size := utf8.DecodeRune(line[i:]) 850 i += size 851 column++ 852 } 853 854 if i > start { 855 out.Write(line[start:i]) 856 } 857 858 if i >= len(line) { 859 break 860 } 861 862 for { 863 out.WriteByte(' ') 864 column++ 865 if column%tabSize == 0 { 866 break 867 } 868 } 869 870 i++ 871 } 872 } 873 874 // Find if a line counts as indented or not. 875 // Returns number of characters the indent is (0 = not indented). 876 func isIndented(data []byte, indentSize int) int { 877 if len(data) == 0 { 878 return 0 879 } 880 if data[0] == '\t' { 881 return 1 882 } 883 if len(data) < indentSize { 884 return 0 885 } 886 for i := 0; i < indentSize; i++ { 887 if data[i] != ' ' { 888 return 0 889 } 890 } 891 return indentSize 892 } 893 894 // Create a url-safe slug for fragments 895 func slugify(in []byte) []byte { 896 if len(in) == 0 { 897 return in 898 } 899 out := make([]byte, 0, len(in)) 900 sym := false 901 902 for _, ch := range in { 903 if isalnum(ch) { 904 sym = false 905 out = append(out, ch) 906 } else if sym { 907 continue 908 } else { 909 out = append(out, '-') 910 sym = true 911 } 912 } 913 var a, b int 914 var ch byte 915 for a, ch = range out { 916 if ch != '-' { 917 break 918 } 919 } 920 for b = len(out) - 1; b > 0; b-- { 921 if out[b] != '-' { 922 break 923 } 924 } 925 return out[a : b+1] 926 }