github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/russross/blackfriday/block.go (about) 1 // 2 // Blackfriday Markdown Processor 3 // Available at http://yougam/libraries/russross/blackfriday 4 // 5 // Copyright © 2011 Russ Ross <russ@russross.com>. 6 // Distributed under the Simplified BSD License. 7 // See README.md for details. 8 // 9 10 // 11 // Functions to parse block-level elements. 12 // 13 14 package blackfriday 15 16 import ( 17 "bytes" 18 19 "github.com/insionng/yougam/libraries/shurcooL/sanitized_anchor_name" 20 ) 21 22 // Parse block-level data. 23 // Note: this function and many that it calls assume that 24 // the input buffer ends with a newline. 25 func (p *parser) block(out *bytes.Buffer, data []byte) { 26 if len(data) == 0 || data[len(data)-1] != '\n' { 27 panic("block input is missing terminating newline") 28 } 29 30 // this is called recursively: enforce a maximum depth 31 if p.nesting >= p.maxNesting { 32 return 33 } 34 p.nesting++ 35 36 // parse out one block-level construct at a time 37 for len(data) > 0 { 38 // prefixed header: 39 // 40 // # Header 1 41 // ## Header 2 42 // ... 43 // ###### Header 6 44 if p.isPrefixHeader(data) { 45 data = data[p.prefixHeader(out, data):] 46 continue 47 } 48 49 // block of preformatted HTML: 50 // 51 // <div> 52 // ... 53 // </div> 54 if data[0] == '<' { 55 if i := p.html(out, data, true); i > 0 { 56 data = data[i:] 57 continue 58 } 59 } 60 61 // title block 62 // 63 // % stuff 64 // % more stuff 65 // % even more stuff 66 if p.flags&EXTENSION_TITLEBLOCK != 0 { 67 if data[0] == '%' { 68 if i := p.titleBlock(out, data, true); i > 0 { 69 data = data[i:] 70 continue 71 } 72 } 73 } 74 75 // blank lines. note: returns the # of bytes to skip 76 if i := p.isEmpty(data); i > 0 { 77 data = data[i:] 78 continue 79 } 80 81 // indented code block: 82 // 83 // func max(a, b int) int { 84 // if a > b { 85 // return a 86 // } 87 // return b 88 // } 89 if p.codePrefix(data) > 0 { 90 data = data[p.code(out, data):] 91 continue 92 } 93 94 // fenced code block: 95 // 96 // ``` go 97 // func fact(n int) int { 98 // if n <= 1 { 99 // return n 100 // } 101 // return n * fact(n-1) 102 // } 103 // ``` 104 if p.flags&EXTENSION_FENCED_CODE != 0 { 105 if i := p.fencedCode(out, data, true); i > 0 { 106 data = data[i:] 107 continue 108 } 109 } 110 111 // horizontal rule: 112 // 113 // ------ 114 // or 115 // ****** 116 // or 117 // ______ 118 if p.isHRule(data) { 119 p.r.HRule(out) 120 var i int 121 for i = 0; data[i] != '\n'; i++ { 122 } 123 data = data[i:] 124 continue 125 } 126 127 // block quote: 128 // 129 // > A big quote I found somewhere 130 // > on the web 131 if p.quotePrefix(data) > 0 { 132 data = data[p.quote(out, data):] 133 continue 134 } 135 136 // table: 137 // 138 // Name | Age | Phone 139 // ------|-----|--------- 140 // Bob | 31 | 555-1234 141 // Alice | 27 | 555-4321 142 if p.flags&EXTENSION_TABLES != 0 { 143 if i := p.table(out, data); i > 0 { 144 data = data[i:] 145 continue 146 } 147 } 148 149 // an itemized/unordered list: 150 // 151 // * Item 1 152 // * Item 2 153 // 154 // also works with + or - 155 if p.uliPrefix(data) > 0 { 156 data = data[p.list(out, data, 0):] 157 continue 158 } 159 160 // a numbered/ordered list: 161 // 162 // 1. Item 1 163 // 2. Item 2 164 if p.oliPrefix(data) > 0 { 165 data = data[p.list(out, data, LIST_TYPE_ORDERED):] 166 continue 167 } 168 169 // definition lists: 170 // 171 // Term 1 172 // : Definition a 173 // : Definition b 174 // 175 // Term 2 176 // : Definition c 177 if p.flags&EXTENSION_DEFINITION_LISTS != 0 { 178 if p.dliPrefix(data) > 0 { 179 data = data[p.list(out, data, LIST_TYPE_DEFINITION):] 180 continue 181 } 182 } 183 184 // anything else must look like a normal paragraph 185 // note: this finds underlined headers, too 186 data = data[p.paragraph(out, data):] 187 } 188 189 p.nesting-- 190 } 191 192 func (p *parser) isPrefixHeader(data []byte) bool { 193 if data[0] != '#' { 194 return false 195 } 196 197 if p.flags&EXTENSION_SPACE_HEADERS != 0 { 198 level := 0 199 for level < 6 && data[level] == '#' { 200 level++ 201 } 202 if data[level] != ' ' { 203 return false 204 } 205 } 206 return true 207 } 208 209 func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int { 210 level := 0 211 for level < 6 && data[level] == '#' { 212 level++ 213 } 214 i := skipChar(data, level, ' ') 215 end := skipUntilChar(data, i, '\n') 216 skip := end 217 id := "" 218 if p.flags&EXTENSION_HEADER_IDS != 0 { 219 j, k := 0, 0 220 // find start/end of header id 221 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ { 222 } 223 for k = j + 1; k < end && data[k] != '}'; k++ { 224 } 225 // extract header id iff found 226 if j < end && k < end { 227 id = string(data[j+2 : k]) 228 end = j 229 skip = k + 1 230 for end > 0 && data[end-1] == ' ' { 231 end-- 232 } 233 } 234 } 235 for end > 0 && data[end-1] == '#' { 236 if isBackslashEscaped(data, end-1) { 237 break 238 } 239 end-- 240 } 241 for end > 0 && data[end-1] == ' ' { 242 end-- 243 } 244 if end > i { 245 if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { 246 id = sanitized_anchor_name.Create(string(data[i:end])) 247 } 248 work := func() bool { 249 p.inline(out, data[i:end]) 250 return true 251 } 252 p.r.Header(out, work, level, id) 253 } 254 return skip 255 } 256 257 func (p *parser) isUnderlinedHeader(data []byte) int { 258 // test of level 1 header 259 if data[0] == '=' { 260 i := skipChar(data, 1, '=') 261 i = skipChar(data, i, ' ') 262 if data[i] == '\n' { 263 return 1 264 } else { 265 return 0 266 } 267 } 268 269 // test of level 2 header 270 if data[0] == '-' { 271 i := skipChar(data, 1, '-') 272 i = skipChar(data, i, ' ') 273 if data[i] == '\n' { 274 return 2 275 } else { 276 return 0 277 } 278 } 279 280 return 0 281 } 282 283 func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int { 284 if data[0] != '%' { 285 return 0 286 } 287 splitData := bytes.Split(data, []byte("\n")) 288 var i int 289 for idx, b := range splitData { 290 if !bytes.HasPrefix(b, []byte("%")) { 291 i = idx // - 1 292 break 293 } 294 } 295 296 data = bytes.Join(splitData[0:i], []byte("\n")) 297 p.r.TitleBlock(out, data) 298 299 return len(data) 300 } 301 302 func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int { 303 var i, j int 304 305 // identify the opening tag 306 if data[0] != '<' { 307 return 0 308 } 309 curtag, tagfound := p.htmlFindTag(data[1:]) 310 311 // handle special cases 312 if !tagfound { 313 // check for an HTML comment 314 if size := p.htmlComment(out, data, doRender); size > 0 { 315 return size 316 } 317 318 // check for an <hr> tag 319 if size := p.htmlHr(out, data, doRender); size > 0 { 320 return size 321 } 322 323 // check for HTML CDATA 324 if size := p.htmlCDATA(out, data, doRender); size > 0 { 325 return size 326 } 327 328 // no special case recognized 329 return 0 330 } 331 332 // look for an unindented matching closing tag 333 // followed by a blank line 334 found := false 335 /* 336 closetag := []byte("\n</" + curtag + ">") 337 j = len(curtag) + 1 338 for !found { 339 // scan for a closing tag at the beginning of a line 340 if skip := bytes.Index(data[j:], closetag); skip >= 0 { 341 j += skip + len(closetag) 342 } else { 343 break 344 } 345 346 // see if it is the only thing on the line 347 if skip := p.isEmpty(data[j:]); skip > 0 { 348 // see if it is followed by a blank line/eof 349 j += skip 350 if j >= len(data) { 351 found = true 352 i = j 353 } else { 354 if skip := p.isEmpty(data[j:]); skip > 0 { 355 j += skip 356 found = true 357 i = j 358 } 359 } 360 } 361 } 362 */ 363 364 // if not found, try a second pass looking for indented match 365 // but not if tag is "ins" or "del" (following original Markdown.pl) 366 if !found && curtag != "ins" && curtag != "del" { 367 i = 1 368 for i < len(data) { 369 i++ 370 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') { 371 i++ 372 } 373 374 if i+2+len(curtag) >= len(data) { 375 break 376 } 377 378 j = p.htmlFindEnd(curtag, data[i-1:]) 379 380 if j > 0 { 381 i += j - 1 382 found = true 383 break 384 } 385 } 386 } 387 388 if !found { 389 return 0 390 } 391 392 // the end of the block has been found 393 if doRender { 394 // trim newlines 395 end := i 396 for end > 0 && data[end-1] == '\n' { 397 end-- 398 } 399 p.r.BlockHtml(out, data[:end]) 400 } 401 402 return i 403 } 404 405 func (p *parser) renderHTMLBlock(out *bytes.Buffer, data []byte, start int, doRender bool) int { 406 // html block needs to end with a blank line 407 if i := p.isEmpty(data[start:]); i > 0 { 408 size := start + i 409 if doRender { 410 // trim trailing newlines 411 end := size 412 for end > 0 && data[end-1] == '\n' { 413 end-- 414 } 415 p.r.BlockHtml(out, data[:end]) 416 } 417 return size 418 } 419 return 0 420 } 421 422 // HTML comment, lax form 423 func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int { 424 i := p.inlineHTMLComment(out, data) 425 return p.renderHTMLBlock(out, data, i, doRender) 426 } 427 428 // HTML CDATA section 429 func (p *parser) htmlCDATA(out *bytes.Buffer, data []byte, doRender bool) int { 430 const cdataTag = "<![cdata[" 431 const cdataTagLen = len(cdataTag) 432 if len(data) < cdataTagLen+1 { 433 return 0 434 } 435 if !bytes.Equal(bytes.ToLower(data[:cdataTagLen]), []byte(cdataTag)) { 436 return 0 437 } 438 i := cdataTagLen 439 // scan for an end-of-comment marker, across lines if necessary 440 for i < len(data) && !(data[i-2] == ']' && data[i-1] == ']' && data[i] == '>') { 441 i++ 442 } 443 i++ 444 // no end-of-comment marker 445 if i >= len(data) { 446 return 0 447 } 448 return p.renderHTMLBlock(out, data, i, doRender) 449 } 450 451 // HR, which is the only self-closing block tag considered 452 func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int { 453 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') { 454 return 0 455 } 456 if data[3] != ' ' && data[3] != '/' && data[3] != '>' { 457 // not an <hr> tag after all; at least not a valid one 458 return 0 459 } 460 461 i := 3 462 for data[i] != '>' && data[i] != '\n' { 463 i++ 464 } 465 466 if data[i] == '>' { 467 return p.renderHTMLBlock(out, data, i+1, doRender) 468 } 469 470 return 0 471 } 472 473 func (p *parser) htmlFindTag(data []byte) (string, bool) { 474 i := 0 475 for isalnum(data[i]) { 476 i++ 477 } 478 key := string(data[:i]) 479 if _, ok := blockTags[key]; ok { 480 return key, true 481 } 482 return "", false 483 } 484 485 func (p *parser) htmlFindEnd(tag string, data []byte) int { 486 // assume data[0] == '<' && data[1] == '/' already tested 487 488 // check if tag is a match 489 closetag := []byte("</" + tag + ">") 490 if !bytes.HasPrefix(data, closetag) { 491 return 0 492 } 493 i := len(closetag) 494 495 // check that the rest of the line is blank 496 skip := 0 497 if skip = p.isEmpty(data[i:]); skip == 0 { 498 return 0 499 } 500 i += skip 501 skip = 0 502 503 if i >= len(data) { 504 return i 505 } 506 507 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { 508 return i 509 } 510 if skip = p.isEmpty(data[i:]); skip == 0 { 511 // following line must be blank 512 return 0 513 } 514 515 return i + skip 516 } 517 518 func (p *parser) isEmpty(data []byte) int { 519 // it is okay to call isEmpty on an empty buffer 520 if len(data) == 0 { 521 return 0 522 } 523 524 var i int 525 for i = 0; i < len(data) && data[i] != '\n'; i++ { 526 if data[i] != ' ' && data[i] != '\t' { 527 return 0 528 } 529 } 530 return i + 1 531 } 532 533 func (p *parser) isHRule(data []byte) bool { 534 i := 0 535 536 // skip up to three spaces 537 for i < 3 && data[i] == ' ' { 538 i++ 539 } 540 541 // look at the hrule char 542 if data[i] != '*' && data[i] != '-' && data[i] != '_' { 543 return false 544 } 545 c := data[i] 546 547 // the whole line must be the char or whitespace 548 n := 0 549 for data[i] != '\n' { 550 switch { 551 case data[i] == c: 552 n++ 553 case data[i] != ' ': 554 return false 555 } 556 i++ 557 } 558 559 return n >= 3 560 } 561 562 func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) { 563 i, size := 0, 0 564 skip = 0 565 566 // skip up to three spaces 567 for i < len(data) && i < 3 && data[i] == ' ' { 568 i++ 569 } 570 if i >= len(data) { 571 return 572 } 573 574 // check for the marker characters: ~ or ` 575 if data[i] != '~' && data[i] != '`' { 576 return 577 } 578 579 c := data[i] 580 581 // the whole line must be the same char or whitespace 582 for i < len(data) && data[i] == c { 583 size++ 584 i++ 585 } 586 587 if i >= len(data) { 588 return 589 } 590 591 // the marker char must occur at least 3 times 592 if size < 3 { 593 return 594 } 595 marker = string(data[i-size : i]) 596 597 // if this is the end marker, it must match the beginning marker 598 if oldmarker != "" && marker != oldmarker { 599 return 600 } 601 602 if syntax != nil { 603 syn := 0 604 i = skipChar(data, i, ' ') 605 606 if i >= len(data) { 607 return 608 } 609 610 syntaxStart := i 611 612 if data[i] == '{' { 613 i++ 614 syntaxStart++ 615 616 for i < len(data) && data[i] != '}' && data[i] != '\n' { 617 syn++ 618 i++ 619 } 620 621 if i >= len(data) || data[i] != '}' { 622 return 623 } 624 625 // strip all whitespace at the beginning and the end 626 // of the {} block 627 for syn > 0 && isspace(data[syntaxStart]) { 628 syntaxStart++ 629 syn-- 630 } 631 632 for syn > 0 && isspace(data[syntaxStart+syn-1]) { 633 syn-- 634 } 635 636 i++ 637 } else { 638 for i < len(data) && !isspace(data[i]) { 639 syn++ 640 i++ 641 } 642 } 643 644 language := string(data[syntaxStart : syntaxStart+syn]) 645 *syntax = &language 646 } 647 648 i = skipChar(data, i, ' ') 649 if i >= len(data) || data[i] != '\n' { 650 return 651 } 652 653 skip = i + 1 654 return 655 } 656 657 func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { 658 var lang *string 659 beg, marker := p.isFencedCode(data, &lang, "") 660 if beg == 0 || beg >= len(data) { 661 return 0 662 } 663 664 var work bytes.Buffer 665 666 for { 667 // safe to assume beg < len(data) 668 669 // check for the end of the code block 670 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker) 671 if fenceEnd != 0 { 672 beg += fenceEnd 673 break 674 } 675 676 // copy the current line 677 end := skipUntilChar(data, beg, '\n') + 1 678 679 // did we reach the end of the buffer without a closing marker? 680 if end >= len(data) { 681 return 0 682 } 683 684 // verbatim copy to the working buffer 685 if doRender { 686 work.Write(data[beg:end]) 687 } 688 beg = end 689 } 690 691 syntax := "" 692 if lang != nil { 693 syntax = *lang 694 } 695 696 if doRender { 697 p.r.BlockCode(out, work.Bytes(), syntax) 698 } 699 700 return beg 701 } 702 703 func (p *parser) table(out *bytes.Buffer, data []byte) int { 704 var header bytes.Buffer 705 i, columns := p.tableHeader(&header, data) 706 if i == 0 { 707 return 0 708 } 709 710 var body bytes.Buffer 711 712 for i < len(data) { 713 pipes, rowStart := 0, i 714 for ; data[i] != '\n'; i++ { 715 if data[i] == '|' { 716 pipes++ 717 } 718 } 719 720 if pipes == 0 { 721 i = rowStart 722 break 723 } 724 725 // include the newline in data sent to tableRow 726 i++ 727 p.tableRow(&body, data[rowStart:i], columns, false) 728 } 729 730 p.r.Table(out, header.Bytes(), body.Bytes(), columns) 731 732 return i 733 } 734 735 // check if the specified position is preceded by an odd number of backslashes 736 func isBackslashEscaped(data []byte, i int) bool { 737 backslashes := 0 738 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' { 739 backslashes++ 740 } 741 return backslashes&1 == 1 742 } 743 744 func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) { 745 i := 0 746 colCount := 1 747 for i = 0; data[i] != '\n'; i++ { 748 if data[i] == '|' && !isBackslashEscaped(data, i) { 749 colCount++ 750 } 751 } 752 753 // doesn't look like a table header 754 if colCount == 1 { 755 return 756 } 757 758 // include the newline in the data sent to tableRow 759 header := data[:i+1] 760 761 // column count ignores pipes at beginning or end of line 762 if data[0] == '|' { 763 colCount-- 764 } 765 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) { 766 colCount-- 767 } 768 769 columns = make([]int, colCount) 770 771 // move on to the header underline 772 i++ 773 if i >= len(data) { 774 return 775 } 776 777 if data[i] == '|' && !isBackslashEscaped(data, i) { 778 i++ 779 } 780 i = skipChar(data, i, ' ') 781 782 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3 783 // and trailing | optional on last column 784 col := 0 785 for data[i] != '\n' { 786 dashes := 0 787 788 if data[i] == ':' { 789 i++ 790 columns[col] |= TABLE_ALIGNMENT_LEFT 791 dashes++ 792 } 793 for data[i] == '-' { 794 i++ 795 dashes++ 796 } 797 if data[i] == ':' { 798 i++ 799 columns[col] |= TABLE_ALIGNMENT_RIGHT 800 dashes++ 801 } 802 for data[i] == ' ' { 803 i++ 804 } 805 806 // end of column test is messy 807 switch { 808 case dashes < 3: 809 // not a valid column 810 return 811 812 case data[i] == '|' && !isBackslashEscaped(data, i): 813 // marker found, now skip past trailing whitespace 814 col++ 815 i++ 816 for data[i] == ' ' { 817 i++ 818 } 819 820 // trailing junk found after last column 821 if col >= colCount && data[i] != '\n' { 822 return 823 } 824 825 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount: 826 // something else found where marker was required 827 return 828 829 case data[i] == '\n': 830 // marker is optional for the last column 831 col++ 832 833 default: 834 // trailing junk found after last column 835 return 836 } 837 } 838 if col != colCount { 839 return 840 } 841 842 p.tableRow(out, header, columns, true) 843 size = i + 1 844 return 845 } 846 847 func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) { 848 i, col := 0, 0 849 var rowWork bytes.Buffer 850 851 if data[i] == '|' && !isBackslashEscaped(data, i) { 852 i++ 853 } 854 855 for col = 0; col < len(columns) && i < len(data); col++ { 856 for data[i] == ' ' { 857 i++ 858 } 859 860 cellStart := i 861 862 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { 863 i++ 864 } 865 866 cellEnd := i 867 868 // skip the end-of-cell marker, possibly taking us past end of buffer 869 i++ 870 871 for cellEnd > cellStart && data[cellEnd-1] == ' ' { 872 cellEnd-- 873 } 874 875 var cellWork bytes.Buffer 876 p.inline(&cellWork, data[cellStart:cellEnd]) 877 878 if header { 879 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col]) 880 } else { 881 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col]) 882 } 883 } 884 885 // pad it out with empty columns to get the right number 886 for ; col < len(columns); col++ { 887 if header { 888 p.r.TableHeaderCell(&rowWork, nil, columns[col]) 889 } else { 890 p.r.TableCell(&rowWork, nil, columns[col]) 891 } 892 } 893 894 // silently ignore rows with too many cells 895 896 p.r.TableRow(out, rowWork.Bytes()) 897 } 898 899 // returns blockquote prefix length 900 func (p *parser) quotePrefix(data []byte) int { 901 i := 0 902 for i < 3 && data[i] == ' ' { 903 i++ 904 } 905 if data[i] == '>' { 906 if data[i+1] == ' ' { 907 return i + 2 908 } 909 return i + 1 910 } 911 return 0 912 } 913 914 // blockquote ends with at least one blank line 915 // followed by something without a blockquote prefix 916 func (p *parser) terminateBlockquote(data []byte, beg, end int) bool { 917 if p.isEmpty(data[beg:]) <= 0 { 918 return false 919 } 920 if end >= len(data) { 921 return true 922 } 923 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0 924 } 925 926 // parse a blockquote fragment 927 func (p *parser) quote(out *bytes.Buffer, data []byte) int { 928 var raw bytes.Buffer 929 beg, end := 0, 0 930 for beg < len(data) { 931 end = beg 932 // Step over whole lines, collecting them. While doing that, check for 933 // fenced code and if one's found, incorporate it altogether, 934 // irregardless of any contents inside it 935 for data[end] != '\n' { 936 if p.flags&EXTENSION_FENCED_CODE != 0 { 937 if i := p.fencedCode(out, data[end:], false); i > 0 { 938 // -1 to compensate for the extra end++ after the loop: 939 end += i - 1 940 break 941 } 942 } 943 end++ 944 } 945 end++ 946 947 if pre := p.quotePrefix(data[beg:]); pre > 0 { 948 // skip the prefix 949 beg += pre 950 } else if p.terminateBlockquote(data, beg, end) { 951 break 952 } 953 954 // this line is part of the blockquote 955 raw.Write(data[beg:end]) 956 beg = end 957 } 958 959 var cooked bytes.Buffer 960 p.block(&cooked, raw.Bytes()) 961 p.r.BlockQuote(out, cooked.Bytes()) 962 return end 963 } 964 965 // returns prefix length for block code 966 func (p *parser) codePrefix(data []byte) int { 967 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { 968 return 4 969 } 970 return 0 971 } 972 973 func (p *parser) code(out *bytes.Buffer, data []byte) int { 974 var work bytes.Buffer 975 976 i := 0 977 for i < len(data) { 978 beg := i 979 for data[i] != '\n' { 980 i++ 981 } 982 i++ 983 984 blankline := p.isEmpty(data[beg:i]) > 0 985 if pre := p.codePrefix(data[beg:i]); pre > 0 { 986 beg += pre 987 } else if !blankline { 988 // non-empty, non-prefixed line breaks the pre 989 i = beg 990 break 991 } 992 993 // verbatim copy to the working buffeu 994 if blankline { 995 work.WriteByte('\n') 996 } else { 997 work.Write(data[beg:i]) 998 } 999 } 1000 1001 // trim all the \n off the end of work 1002 workbytes := work.Bytes() 1003 eol := len(workbytes) 1004 for eol > 0 && workbytes[eol-1] == '\n' { 1005 eol-- 1006 } 1007 if eol != len(workbytes) { 1008 work.Truncate(eol) 1009 } 1010 1011 work.WriteByte('\n') 1012 1013 p.r.BlockCode(out, work.Bytes(), "") 1014 1015 return i 1016 } 1017 1018 // returns unordered list item prefix 1019 func (p *parser) uliPrefix(data []byte) int { 1020 i := 0 1021 1022 // start with up to 3 spaces 1023 for i < 3 && data[i] == ' ' { 1024 i++ 1025 } 1026 1027 // need a *, +, or - followed by a space 1028 if (data[i] != '*' && data[i] != '+' && data[i] != '-') || 1029 data[i+1] != ' ' { 1030 return 0 1031 } 1032 return i + 2 1033 } 1034 1035 // returns ordered list item prefix 1036 func (p *parser) oliPrefix(data []byte) int { 1037 i := 0 1038 1039 // start with up to 3 spaces 1040 for i < 3 && data[i] == ' ' { 1041 i++ 1042 } 1043 1044 // count the digits 1045 start := i 1046 for data[i] >= '0' && data[i] <= '9' { 1047 i++ 1048 } 1049 1050 // we need >= 1 digits followed by a dot and a space 1051 if start == i || data[i] != '.' || data[i+1] != ' ' { 1052 return 0 1053 } 1054 return i + 2 1055 } 1056 1057 // returns definition list item prefix 1058 func (p *parser) dliPrefix(data []byte) int { 1059 i := 0 1060 1061 // need a : followed by a spaces 1062 if data[i] != ':' || data[i+1] != ' ' { 1063 return 0 1064 } 1065 for data[i] == ' ' { 1066 i++ 1067 } 1068 return i + 2 1069 } 1070 1071 // parse ordered or unordered list block 1072 func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int { 1073 i := 0 1074 flags |= LIST_ITEM_BEGINNING_OF_LIST 1075 work := func() bool { 1076 for i < len(data) { 1077 skip := p.listItem(out, data[i:], &flags) 1078 i += skip 1079 1080 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 { 1081 break 1082 } 1083 flags &= ^LIST_ITEM_BEGINNING_OF_LIST 1084 } 1085 return true 1086 } 1087 1088 p.r.List(out, work, flags) 1089 return i 1090 } 1091 1092 // Parse a single list item. 1093 // Assumes initial prefix is already removed if this is a sublist. 1094 func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int { 1095 // keep track of the indentation of the first line 1096 itemIndent := 0 1097 for itemIndent < 3 && data[itemIndent] == ' ' { 1098 itemIndent++ 1099 } 1100 1101 i := p.uliPrefix(data) 1102 if i == 0 { 1103 i = p.oliPrefix(data) 1104 } 1105 if i == 0 { 1106 i = p.dliPrefix(data) 1107 // reset definition term flag 1108 if i > 0 { 1109 *flags &= ^LIST_TYPE_TERM 1110 } 1111 } 1112 if i == 0 { 1113 // if in defnition list, set term flag and continue 1114 if *flags&LIST_TYPE_DEFINITION != 0 { 1115 *flags |= LIST_TYPE_TERM 1116 } else { 1117 return 0 1118 } 1119 } 1120 1121 // skip leading whitespace on first line 1122 for data[i] == ' ' { 1123 i++ 1124 } 1125 1126 // find the end of the line 1127 line := i 1128 for i > 0 && data[i-1] != '\n' { 1129 i++ 1130 } 1131 1132 // get working buffer 1133 var raw bytes.Buffer 1134 1135 // put the first line into the working buffer 1136 raw.Write(data[line:i]) 1137 line = i 1138 1139 // process the following lines 1140 containsBlankLine := false 1141 sublist := 0 1142 1143 gatherlines: 1144 for line < len(data) { 1145 i++ 1146 1147 // find the end of this line 1148 for data[i-1] != '\n' { 1149 i++ 1150 } 1151 1152 // if it is an empty line, guess that it is part of this item 1153 // and move on to the next line 1154 if p.isEmpty(data[line:i]) > 0 { 1155 containsBlankLine = true 1156 raw.Write(data[line:i]) 1157 line = i 1158 continue 1159 } 1160 1161 // calculate the indentation 1162 indent := 0 1163 for indent < 4 && line+indent < i && data[line+indent] == ' ' { 1164 indent++ 1165 } 1166 1167 chunk := data[line+indent : i] 1168 1169 // evaluate how this line fits in 1170 switch { 1171 // is this a nested list item? 1172 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) || 1173 p.oliPrefix(chunk) > 0 || 1174 p.dliPrefix(chunk) > 0: 1175 1176 if containsBlankLine { 1177 // end the list if the type changed after a blank line 1178 if indent <= itemIndent && 1179 ((*flags&LIST_TYPE_ORDERED != 0 && p.uliPrefix(chunk) > 0) || 1180 (*flags&LIST_TYPE_ORDERED == 0 && p.oliPrefix(chunk) > 0)) { 1181 1182 *flags |= LIST_ITEM_END_OF_LIST 1183 break gatherlines 1184 } 1185 *flags |= LIST_ITEM_CONTAINS_BLOCK 1186 } 1187 1188 // to be a nested list, it must be indented more 1189 // if not, it is the next item in the same list 1190 if indent <= itemIndent { 1191 break gatherlines 1192 } 1193 1194 // is this the first item in the nested list? 1195 if sublist == 0 { 1196 sublist = raw.Len() 1197 } 1198 1199 // is this a nested prefix header? 1200 case p.isPrefixHeader(chunk): 1201 // if the header is not indented, it is not nested in the list 1202 // and thus ends the list 1203 if containsBlankLine && indent < 4 { 1204 *flags |= LIST_ITEM_END_OF_LIST 1205 break gatherlines 1206 } 1207 *flags |= LIST_ITEM_CONTAINS_BLOCK 1208 1209 // anything following an empty line is only part 1210 // of this item if it is indented 4 spaces 1211 // (regardless of the indentation of the beginning of the item) 1212 case containsBlankLine && indent < 4: 1213 if *flags&LIST_TYPE_DEFINITION != 0 && i < len(data)-1 { 1214 // is the next item still a part of this list? 1215 next := i 1216 for data[next] != '\n' { 1217 next++ 1218 } 1219 for next < len(data)-1 && data[next] == '\n' { 1220 next++ 1221 } 1222 if i < len(data)-1 && data[i] != ':' && data[next] != ':' { 1223 *flags |= LIST_ITEM_END_OF_LIST 1224 } 1225 } else { 1226 *flags |= LIST_ITEM_END_OF_LIST 1227 } 1228 break gatherlines 1229 1230 // a blank line means this should be parsed as a block 1231 case containsBlankLine: 1232 *flags |= LIST_ITEM_CONTAINS_BLOCK 1233 } 1234 1235 containsBlankLine = false 1236 1237 // add the line into the working buffer without prefix 1238 raw.Write(data[line+indent : i]) 1239 1240 line = i 1241 } 1242 1243 rawBytes := raw.Bytes() 1244 1245 // render the contents of the list item 1246 var cooked bytes.Buffer 1247 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 && *flags&LIST_TYPE_TERM == 0 { 1248 // intermediate render of block item, except for definition term 1249 if sublist > 0 { 1250 p.block(&cooked, rawBytes[:sublist]) 1251 p.block(&cooked, rawBytes[sublist:]) 1252 } else { 1253 p.block(&cooked, rawBytes) 1254 } 1255 } else { 1256 // intermediate render of inline item 1257 if sublist > 0 { 1258 p.inline(&cooked, rawBytes[:sublist]) 1259 p.block(&cooked, rawBytes[sublist:]) 1260 } else { 1261 p.inline(&cooked, rawBytes) 1262 } 1263 } 1264 1265 // render the actual list item 1266 cookedBytes := cooked.Bytes() 1267 parsedEnd := len(cookedBytes) 1268 1269 // strip trailing newlines 1270 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' { 1271 parsedEnd-- 1272 } 1273 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags) 1274 1275 return line 1276 } 1277 1278 // render a single paragraph that has already been parsed out 1279 func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) { 1280 if len(data) == 0 { 1281 return 1282 } 1283 1284 // trim leading spaces 1285 beg := 0 1286 for data[beg] == ' ' { 1287 beg++ 1288 } 1289 1290 // trim trailing newline 1291 end := len(data) - 1 1292 1293 // trim trailing spaces 1294 for end > beg && data[end-1] == ' ' { 1295 end-- 1296 } 1297 1298 work := func() bool { 1299 p.inline(out, data[beg:end]) 1300 return true 1301 } 1302 p.r.Paragraph(out, work) 1303 } 1304 1305 func (p *parser) paragraph(out *bytes.Buffer, data []byte) int { 1306 // prev: index of 1st char of previous line 1307 // line: index of 1st char of current line 1308 // i: index of cursor/end of current line 1309 var prev, line, i int 1310 1311 // keep going until we find something to mark the end of the paragraph 1312 for i < len(data) { 1313 // mark the beginning of the current line 1314 prev = line 1315 current := data[i:] 1316 line = i 1317 1318 // did we find a blank line marking the end of the paragraph? 1319 if n := p.isEmpty(current); n > 0 { 1320 // did this blank line followed by a definition list item? 1321 if p.flags&EXTENSION_DEFINITION_LISTS != 0 { 1322 if i < len(data)-1 && data[i+1] == ':' { 1323 return p.list(out, data[prev:], LIST_TYPE_DEFINITION) 1324 } 1325 } 1326 1327 p.renderParagraph(out, data[:i]) 1328 return i + n 1329 } 1330 1331 // an underline under some text marks a header, so our paragraph ended on prev line 1332 if i > 0 { 1333 if level := p.isUnderlinedHeader(current); level > 0 { 1334 // render the paragraph 1335 p.renderParagraph(out, data[:prev]) 1336 1337 // ignore leading and trailing whitespace 1338 eol := i - 1 1339 for prev < eol && data[prev] == ' ' { 1340 prev++ 1341 } 1342 for eol > prev && data[eol-1] == ' ' { 1343 eol-- 1344 } 1345 1346 // render the header 1347 // this ugly double closure avoids forcing variables onto the heap 1348 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool { 1349 return func() bool { 1350 pp.inline(o, d) 1351 return true 1352 } 1353 }(out, p, data[prev:eol]) 1354 1355 id := "" 1356 if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { 1357 id = sanitized_anchor_name.Create(string(data[prev:eol])) 1358 } 1359 1360 p.r.Header(out, work, level, id) 1361 1362 // find the end of the underline 1363 for data[i] != '\n' { 1364 i++ 1365 } 1366 return i 1367 } 1368 } 1369 1370 // if the next line starts a block of HTML, then the paragraph ends here 1371 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { 1372 if data[i] == '<' && p.html(out, current, false) > 0 { 1373 // rewind to before the HTML block 1374 p.renderParagraph(out, data[:i]) 1375 return i 1376 } 1377 } 1378 1379 // if there's a prefixed header or a horizontal rule after this, paragraph is over 1380 if p.isPrefixHeader(current) || p.isHRule(current) { 1381 p.renderParagraph(out, data[:i]) 1382 return i 1383 } 1384 1385 // if there's a fenced code block, paragraph is over 1386 if p.flags&EXTENSION_FENCED_CODE != 0 { 1387 if p.fencedCode(out, current, false) > 0 { 1388 p.renderParagraph(out, data[:i]) 1389 return i 1390 } 1391 } 1392 1393 // if there's a definition list item, prev line is a definition term 1394 if p.flags&EXTENSION_DEFINITION_LISTS != 0 { 1395 if p.dliPrefix(current) != 0 { 1396 return p.list(out, data[prev:], LIST_TYPE_DEFINITION) 1397 } 1398 } 1399 1400 // if there's a list after this, paragraph is over 1401 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 { 1402 if p.uliPrefix(current) != 0 || 1403 p.oliPrefix(current) != 0 || 1404 p.quotePrefix(current) != 0 || 1405 p.codePrefix(current) != 0 { 1406 p.renderParagraph(out, data[:i]) 1407 return i 1408 } 1409 } 1410 1411 // otherwise, scan to the beginning of the next line 1412 for data[i] != '\n' { 1413 i++ 1414 } 1415 i++ 1416 } 1417 1418 p.renderParagraph(out, data[:i]) 1419 return i 1420 }