github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/russross/blackfriday/inline.go (about) 1 // 2 // Blackfriday Markdown Processor 3 // Available at http://yougam/libraries/russross/blackfriday 4 // 5 // Copyright © 2011 Russ Ross <russ@russross.com>. 6 // Distributed under the Simplified BSD License. 7 // See README.md for details. 8 // 9 10 // 11 // Functions to parse inline elements. 12 // 13 14 package blackfriday 15 16 import ( 17 "bytes" 18 "regexp" 19 "strconv" 20 ) 21 22 var ( 23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` 24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`) 25 ) 26 27 // Functions to parse text within a block 28 // Each function returns the number of chars taken care of 29 // data is the complete block being rendered 30 // offset is the number of valid chars before the current cursor 31 32 func (p *parser) inline(out *bytes.Buffer, data []byte) { 33 // this is called recursively: enforce a maximum depth 34 if p.nesting >= p.maxNesting { 35 return 36 } 37 p.nesting++ 38 39 i, end := 0, 0 40 for i < len(data) { 41 // copy inactive chars into the output 42 for end < len(data) && p.inlineCallback[data[end]] == nil { 43 end++ 44 } 45 46 p.r.NormalText(out, data[i:end]) 47 48 if end >= len(data) { 49 break 50 } 51 i = end 52 53 // call the trigger 54 handler := p.inlineCallback[data[end]] 55 if consumed := handler(p, out, data, i); consumed == 0 { 56 // no action from the callback; buffer the byte for later 57 end = i + 1 58 } else { 59 // skip past whatever the callback used 60 i += consumed 61 end = i 62 } 63 } 64 65 p.nesting-- 66 } 67 68 // single and double emphasis parsing 69 func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int { 70 data = data[offset:] 71 c := data[0] 72 ret := 0 73 74 if len(data) > 2 && data[1] != c { 75 // whitespace cannot follow an opening emphasis; 76 // strikethrough only takes two characters '~~' 77 if c == '~' || isspace(data[1]) { 78 return 0 79 } 80 if ret = helperEmphasis(p, out, data[1:], c); ret == 0 { 81 return 0 82 } 83 84 return ret + 1 85 } 86 87 if len(data) > 3 && data[1] == c && data[2] != c { 88 if isspace(data[2]) { 89 return 0 90 } 91 if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 { 92 return 0 93 } 94 95 return ret + 2 96 } 97 98 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { 99 if c == '~' || isspace(data[3]) { 100 return 0 101 } 102 if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 { 103 return 0 104 } 105 106 return ret + 3 107 } 108 109 return 0 110 } 111 112 func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int { 113 data = data[offset:] 114 115 nb := 0 116 117 // count the number of backticks in the delimiter 118 for nb < len(data) && data[nb] == '`' { 119 nb++ 120 } 121 122 // find the next delimiter 123 i, end := 0, 0 124 for end = nb; end < len(data) && i < nb; end++ { 125 if data[end] == '`' { 126 i++ 127 } else { 128 i = 0 129 } 130 } 131 132 // no matching delimiter? 133 if i < nb && end >= len(data) { 134 return 0 135 } 136 137 // trim outside whitespace 138 fBegin := nb 139 for fBegin < end && data[fBegin] == ' ' { 140 fBegin++ 141 } 142 143 fEnd := end - nb 144 for fEnd > fBegin && data[fEnd-1] == ' ' { 145 fEnd-- 146 } 147 148 // render the code span 149 if fBegin != fEnd { 150 p.r.CodeSpan(out, data[fBegin:fEnd]) 151 } 152 153 return end 154 155 } 156 157 // newline preceded by two spaces becomes <br> 158 // newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled 159 func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int { 160 // remove trailing spaces from out 161 outBytes := out.Bytes() 162 end := len(outBytes) 163 eol := end 164 for eol > 0 && outBytes[eol-1] == ' ' { 165 eol-- 166 } 167 out.Truncate(eol) 168 169 precededByTwoSpaces := offset >= 2 && data[offset-2] == ' ' && data[offset-1] == ' ' 170 precededByBackslash := offset >= 1 && data[offset-1] == '\\' // see http://spec.commonmark.org/0.18/#example-527 171 precededByBackslash = precededByBackslash && p.flags&EXTENSION_BACKSLASH_LINE_BREAK != 0 172 173 // should there be a hard line break here? 174 if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && !precededByTwoSpaces && !precededByBackslash { 175 return 0 176 } 177 178 if precededByBackslash && eol > 0 { 179 out.Truncate(eol - 1) 180 } 181 p.r.LineBreak(out) 182 return 1 183 } 184 185 type linkType int 186 187 const ( 188 linkNormal linkType = iota 189 linkImg 190 linkDeferredFootnote 191 linkInlineFootnote 192 ) 193 194 func isReferenceStyleLink(data []byte, pos int, t linkType) bool { 195 if t == linkDeferredFootnote { 196 return false 197 } 198 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^' 199 } 200 201 // '[': parse a link or an image or a footnote 202 func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { 203 // no links allowed inside regular links, footnote, and deferred footnotes 204 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') { 205 return 0 206 } 207 208 var t linkType 209 switch { 210 // special case: ![^text] == deferred footnote (that follows something with 211 // an exclamation point) 212 case p.flags&EXTENSION_FOOTNOTES != 0 && len(data)-1 > offset && data[offset+1] == '^': 213 t = linkDeferredFootnote 214 // ![alt] == image 215 case offset > 0 && data[offset-1] == '!': 216 t = linkImg 217 // ^[text] == inline footnote 218 // [^refId] == deferred footnote 219 case p.flags&EXTENSION_FOOTNOTES != 0: 220 if offset > 0 && data[offset-1] == '^' { 221 t = linkInlineFootnote 222 } else if len(data)-1 > offset && data[offset+1] == '^' { 223 t = linkDeferredFootnote 224 } 225 // [text] == regular link 226 default: 227 t = linkNormal 228 } 229 230 data = data[offset:] 231 232 var ( 233 i = 1 234 noteId int 235 title, link, altContent []byte 236 textHasNl = false 237 ) 238 239 if t == linkDeferredFootnote { 240 i++ 241 } 242 243 brace := 0 244 245 // look for the matching closing bracket 246 for level := 1; level > 0 && i < len(data); i++ { 247 switch { 248 case data[i] == '\n': 249 textHasNl = true 250 251 case data[i-1] == '\\': 252 continue 253 254 case data[i] == '[': 255 level++ 256 257 case data[i] == ']': 258 level-- 259 if level <= 0 { 260 i-- // compensate for extra i++ in for loop 261 } 262 } 263 } 264 265 if i >= len(data) { 266 return 0 267 } 268 269 txtE := i 270 i++ 271 272 // skip any amount of whitespace or newline 273 // (this is much more lax than original markdown syntax) 274 for i < len(data) && isspace(data[i]) { 275 i++ 276 } 277 278 switch { 279 // inline style link 280 case i < len(data) && data[i] == '(': 281 // skip initial whitespace 282 i++ 283 284 for i < len(data) && isspace(data[i]) { 285 i++ 286 } 287 288 linkB := i 289 290 // look for link end: ' " ), check for new opening braces and take this 291 // into account, this may lead for overshooting and probably will require 292 // some fine-tuning. 293 findlinkend: 294 for i < len(data) { 295 switch { 296 case data[i] == '\\': 297 i += 2 298 299 case data[i] == '(': 300 brace++ 301 i++ 302 303 case data[i] == ')': 304 if brace <= 0 { 305 break findlinkend 306 } 307 brace-- 308 i++ 309 310 case data[i] == '\'' || data[i] == '"': 311 break findlinkend 312 313 default: 314 i++ 315 } 316 } 317 318 if i >= len(data) { 319 return 0 320 } 321 linkE := i 322 323 // look for title end if present 324 titleB, titleE := 0, 0 325 if data[i] == '\'' || data[i] == '"' { 326 i++ 327 titleB = i 328 329 findtitleend: 330 for i < len(data) { 331 switch { 332 case data[i] == '\\': 333 i += 2 334 335 case data[i] == ')': 336 break findtitleend 337 338 default: 339 i++ 340 } 341 } 342 343 if i >= len(data) { 344 return 0 345 } 346 347 // skip whitespace after title 348 titleE = i - 1 349 for titleE > titleB && isspace(data[titleE]) { 350 titleE-- 351 } 352 353 // check for closing quote presence 354 if data[titleE] != '\'' && data[titleE] != '"' { 355 titleB, titleE = 0, 0 356 linkE = i 357 } 358 } 359 360 // remove whitespace at the end of the link 361 for linkE > linkB && isspace(data[linkE-1]) { 362 linkE-- 363 } 364 365 // remove optional angle brackets around the link 366 if data[linkB] == '<' { 367 linkB++ 368 } 369 if data[linkE-1] == '>' { 370 linkE-- 371 } 372 373 // build escaped link and title 374 if linkE > linkB { 375 link = data[linkB:linkE] 376 } 377 378 if titleE > titleB { 379 title = data[titleB:titleE] 380 } 381 382 i++ 383 384 // reference style link 385 case isReferenceStyleLink(data, i, t): 386 var id []byte 387 altContentConsidered := false 388 389 // look for the id 390 i++ 391 linkB := i 392 for i < len(data) && data[i] != ']' { 393 i++ 394 } 395 if i >= len(data) { 396 return 0 397 } 398 linkE := i 399 400 // find the reference 401 if linkB == linkE { 402 if textHasNl { 403 var b bytes.Buffer 404 405 for j := 1; j < txtE; j++ { 406 switch { 407 case data[j] != '\n': 408 b.WriteByte(data[j]) 409 case data[j-1] != ' ': 410 b.WriteByte(' ') 411 } 412 } 413 414 id = b.Bytes() 415 } else { 416 id = data[1:txtE] 417 altContentConsidered = true 418 } 419 } else { 420 id = data[linkB:linkE] 421 } 422 423 // find the reference with matching id 424 lr, ok := p.getRef(string(id)) 425 if !ok { 426 return 0 427 } 428 429 // keep link and title from reference 430 link = lr.link 431 title = lr.title 432 if altContentConsidered { 433 altContent = lr.text 434 } 435 i++ 436 437 // shortcut reference style link or reference or inline footnote 438 default: 439 var id []byte 440 441 // craft the id 442 if textHasNl { 443 var b bytes.Buffer 444 445 for j := 1; j < txtE; j++ { 446 switch { 447 case data[j] != '\n': 448 b.WriteByte(data[j]) 449 case data[j-1] != ' ': 450 b.WriteByte(' ') 451 } 452 } 453 454 id = b.Bytes() 455 } else { 456 if t == linkDeferredFootnote { 457 id = data[2:txtE] // get rid of the ^ 458 } else { 459 id = data[1:txtE] 460 } 461 } 462 463 if t == linkInlineFootnote { 464 // create a new reference 465 noteId = len(p.notes) + 1 466 467 var fragment []byte 468 if len(id) > 0 { 469 if len(id) < 16 { 470 fragment = make([]byte, len(id)) 471 } else { 472 fragment = make([]byte, 16) 473 } 474 copy(fragment, slugify(id)) 475 } else { 476 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...) 477 } 478 479 ref := &reference{ 480 noteId: noteId, 481 hasBlock: false, 482 link: fragment, 483 title: id, 484 } 485 486 p.notes = append(p.notes, ref) 487 488 link = ref.link 489 title = ref.title 490 } else { 491 // find the reference with matching id 492 lr, ok := p.getRef(string(id)) 493 if !ok { 494 return 0 495 } 496 497 if t == linkDeferredFootnote { 498 lr.noteId = len(p.notes) + 1 499 p.notes = append(p.notes, lr) 500 } 501 502 // keep link and title from reference 503 link = lr.link 504 // if inline footnote, title == footnote contents 505 title = lr.title 506 noteId = lr.noteId 507 } 508 509 // rewind the whitespace 510 i = txtE + 1 511 } 512 513 // build content: img alt is escaped, link content is parsed 514 var content bytes.Buffer 515 if txtE > 1 { 516 if t == linkImg { 517 content.Write(data[1:txtE]) 518 } else { 519 // links cannot contain other links, so turn off link parsing temporarily 520 insideLink := p.insideLink 521 p.insideLink = true 522 p.inline(&content, data[1:txtE]) 523 p.insideLink = insideLink 524 } 525 } 526 527 var uLink []byte 528 if t == linkNormal || t == linkImg { 529 if len(link) > 0 { 530 var uLinkBuf bytes.Buffer 531 unescapeText(&uLinkBuf, link) 532 uLink = uLinkBuf.Bytes() 533 } 534 535 // links need something to click on and somewhere to go 536 if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) { 537 return 0 538 } 539 } 540 541 // call the relevant rendering function 542 switch t { 543 case linkNormal: 544 if len(altContent) > 0 { 545 p.r.Link(out, uLink, title, altContent) 546 } else { 547 p.r.Link(out, uLink, title, content.Bytes()) 548 } 549 550 case linkImg: 551 outSize := out.Len() 552 outBytes := out.Bytes() 553 if outSize > 0 && outBytes[outSize-1] == '!' { 554 out.Truncate(outSize - 1) 555 } 556 557 p.r.Image(out, uLink, title, content.Bytes()) 558 559 case linkInlineFootnote: 560 outSize := out.Len() 561 outBytes := out.Bytes() 562 if outSize > 0 && outBytes[outSize-1] == '^' { 563 out.Truncate(outSize - 1) 564 } 565 566 p.r.FootnoteRef(out, link, noteId) 567 568 case linkDeferredFootnote: 569 p.r.FootnoteRef(out, link, noteId) 570 571 default: 572 return 0 573 } 574 575 return i 576 } 577 578 func (p *parser) inlineHTMLComment(out *bytes.Buffer, data []byte) int { 579 if len(data) < 5 { 580 return 0 581 } 582 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' { 583 return 0 584 } 585 i := 5 586 // scan for an end-of-comment marker, across lines if necessary 587 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { 588 i++ 589 } 590 // no end-of-comment marker 591 if i >= len(data) { 592 return 0 593 } 594 return i + 1 595 } 596 597 // '<' when tags or autolinks are allowed 598 func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int { 599 data = data[offset:] 600 altype := LINK_TYPE_NOT_AUTOLINK 601 end := tagLength(data, &altype) 602 if size := p.inlineHTMLComment(out, data); size > 0 { 603 end = size 604 } 605 if end > 2 { 606 if altype != LINK_TYPE_NOT_AUTOLINK { 607 var uLink bytes.Buffer 608 unescapeText(&uLink, data[1:end+1-2]) 609 if uLink.Len() > 0 { 610 p.r.AutoLink(out, uLink.Bytes(), altype) 611 } 612 } else { 613 p.r.RawHtmlTag(out, data[:end]) 614 } 615 } 616 617 return end 618 } 619 620 // '\\' backslash escape 621 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~") 622 623 func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int { 624 data = data[offset:] 625 626 if len(data) > 1 { 627 if bytes.IndexByte(escapeChars, data[1]) < 0 { 628 return 0 629 } 630 631 p.r.NormalText(out, data[1:2]) 632 } 633 634 return 2 635 } 636 637 func unescapeText(ob *bytes.Buffer, src []byte) { 638 i := 0 639 for i < len(src) { 640 org := i 641 for i < len(src) && src[i] != '\\' { 642 i++ 643 } 644 645 if i > org { 646 ob.Write(src[org:i]) 647 } 648 649 if i+1 >= len(src) { 650 break 651 } 652 653 ob.WriteByte(src[i+1]) 654 i += 2 655 } 656 } 657 658 // '&' escaped when it doesn't belong to an entity 659 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; 660 func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int { 661 data = data[offset:] 662 663 end := 1 664 665 if end < len(data) && data[end] == '#' { 666 end++ 667 } 668 669 for end < len(data) && isalnum(data[end]) { 670 end++ 671 } 672 673 if end < len(data) && data[end] == ';' { 674 end++ // real entity 675 } else { 676 return 0 // lone '&' 677 } 678 679 p.r.Entity(out, data[:end]) 680 681 return end 682 } 683 684 func linkEndsWithEntity(data []byte, linkEnd int) bool { 685 entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1) 686 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd 687 } 688 689 func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int { 690 // quick check to rule out most false hits on ':' 691 if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' { 692 return 0 693 } 694 695 // Now a more expensive check to see if we're not inside an anchor element 696 anchorStart := offset 697 offsetFromAnchor := 0 698 for anchorStart > 0 && data[anchorStart] != '<' { 699 anchorStart-- 700 offsetFromAnchor++ 701 } 702 703 anchorStr := anchorRe.Find(data[anchorStart:]) 704 if anchorStr != nil { 705 out.Write(anchorStr[offsetFromAnchor:]) 706 return len(anchorStr) - offsetFromAnchor 707 } 708 709 // scan backward for a word boundary 710 rewind := 0 711 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) { 712 rewind++ 713 } 714 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters 715 return 0 716 } 717 718 origData := data 719 data = data[offset-rewind:] 720 721 if !isSafeLink(data) { 722 return 0 723 } 724 725 linkEnd := 0 726 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) { 727 linkEnd++ 728 } 729 730 // Skip punctuation at the end of the link 731 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' { 732 linkEnd-- 733 } 734 735 // But don't skip semicolon if it's a part of escaped entity: 736 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) { 737 linkEnd-- 738 } 739 740 // See if the link finishes with a punctuation sign that can be closed. 741 var copen byte 742 switch data[linkEnd-1] { 743 case '"': 744 copen = '"' 745 case '\'': 746 copen = '\'' 747 case ')': 748 copen = '(' 749 case ']': 750 copen = '[' 751 case '}': 752 copen = '{' 753 default: 754 copen = 0 755 } 756 757 if copen != 0 { 758 bufEnd := offset - rewind + linkEnd - 2 759 760 openDelim := 1 761 762 /* Try to close the final punctuation sign in this same line; 763 * if we managed to close it outside of the URL, that means that it's 764 * not part of the URL. If it closes inside the URL, that means it 765 * is part of the URL. 766 * 767 * Examples: 768 * 769 * foo http://www.pokemon.com/Pikachu_(Electric) bar 770 * => http://www.pokemon.com/Pikachu_(Electric) 771 * 772 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar 773 * => http://www.pokemon.com/Pikachu_(Electric) 774 * 775 * foo http://www.pokemon.com/Pikachu_(Electric)) bar 776 * => http://www.pokemon.com/Pikachu_(Electric)) 777 * 778 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar 779 * => foo http://www.pokemon.com/Pikachu_(Electric) 780 */ 781 782 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 { 783 if origData[bufEnd] == data[linkEnd-1] { 784 openDelim++ 785 } 786 787 if origData[bufEnd] == copen { 788 openDelim-- 789 } 790 791 bufEnd-- 792 } 793 794 if openDelim == 0 { 795 linkEnd-- 796 } 797 } 798 799 // we were triggered on the ':', so we need to rewind the output a bit 800 if out.Len() >= rewind { 801 out.Truncate(len(out.Bytes()) - rewind) 802 } 803 804 var uLink bytes.Buffer 805 unescapeText(&uLink, data[:linkEnd]) 806 807 if uLink.Len() > 0 { 808 p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL) 809 } 810 811 return linkEnd - rewind 812 } 813 814 func isEndOfLink(char byte) bool { 815 return isspace(char) || char == '<' 816 } 817 818 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")} 819 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")} 820 821 func isSafeLink(link []byte) bool { 822 for _, path := range validPaths { 823 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) { 824 if len(link) == len(path) { 825 return true 826 } else if isalnum(link[len(path)]) { 827 return true 828 } 829 } 830 } 831 832 for _, prefix := range validUris { 833 // TODO: handle unicode here 834 // case-insensitive prefix test 835 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) { 836 return true 837 } 838 } 839 840 return false 841 } 842 843 // return the length of the given tag, or 0 is it's not valid 844 func tagLength(data []byte, autolink *int) int { 845 var i, j int 846 847 // a valid tag can't be shorter than 3 chars 848 if len(data) < 3 { 849 return 0 850 } 851 852 // begins with a '<' optionally followed by '/', followed by letter or number 853 if data[0] != '<' { 854 return 0 855 } 856 if data[1] == '/' { 857 i = 2 858 } else { 859 i = 1 860 } 861 862 if !isalnum(data[i]) { 863 return 0 864 } 865 866 // scheme test 867 *autolink = LINK_TYPE_NOT_AUTOLINK 868 869 // try to find the beginning of an URI 870 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') { 871 i++ 872 } 873 874 if i > 1 && i < len(data) && data[i] == '@' { 875 if j = isMailtoAutoLink(data[i:]); j != 0 { 876 *autolink = LINK_TYPE_EMAIL 877 return i + j 878 } 879 } 880 881 if i > 2 && i < len(data) && data[i] == ':' { 882 *autolink = LINK_TYPE_NORMAL 883 i++ 884 } 885 886 // complete autolink test: no whitespace or ' or " 887 switch { 888 case i >= len(data): 889 *autolink = LINK_TYPE_NOT_AUTOLINK 890 case *autolink != 0: 891 j = i 892 893 for i < len(data) { 894 if data[i] == '\\' { 895 i += 2 896 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) { 897 break 898 } else { 899 i++ 900 } 901 902 } 903 904 if i >= len(data) { 905 return 0 906 } 907 if i > j && data[i] == '>' { 908 return i + 1 909 } 910 911 // one of the forbidden chars has been found 912 *autolink = LINK_TYPE_NOT_AUTOLINK 913 } 914 915 // look for something looking like a tag end 916 for i < len(data) && data[i] != '>' { 917 i++ 918 } 919 if i >= len(data) { 920 return 0 921 } 922 return i + 1 923 } 924 925 // look for the address part of a mail autolink and '>' 926 // this is less strict than the original markdown e-mail address matching 927 func isMailtoAutoLink(data []byte) int { 928 nb := 0 929 930 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' 931 for i := 0; i < len(data); i++ { 932 if isalnum(data[i]) { 933 continue 934 } 935 936 switch data[i] { 937 case '@': 938 nb++ 939 940 case '-', '.', '_': 941 break 942 943 case '>': 944 if nb == 1 { 945 return i + 1 946 } else { 947 return 0 948 } 949 default: 950 return 0 951 } 952 } 953 954 return 0 955 } 956 957 // look for the next emph char, skipping other constructs 958 func helperFindEmphChar(data []byte, c byte) int { 959 i := 0 960 961 for i < len(data) { 962 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' { 963 i++ 964 } 965 if i >= len(data) { 966 return 0 967 } 968 // do not count escaped chars 969 if i != 0 && data[i-1] == '\\' { 970 i++ 971 continue 972 } 973 if data[i] == c { 974 return i 975 } 976 977 if data[i] == '`' { 978 // skip a code span 979 tmpI := 0 980 i++ 981 for i < len(data) && data[i] != '`' { 982 if tmpI == 0 && data[i] == c { 983 tmpI = i 984 } 985 i++ 986 } 987 if i >= len(data) { 988 return tmpI 989 } 990 i++ 991 } else if data[i] == '[' { 992 // skip a link 993 tmpI := 0 994 i++ 995 for i < len(data) && data[i] != ']' { 996 if tmpI == 0 && data[i] == c { 997 tmpI = i 998 } 999 i++ 1000 } 1001 i++ 1002 for i < len(data) && (data[i] == ' ' || data[i] == '\n') { 1003 i++ 1004 } 1005 if i >= len(data) { 1006 return tmpI 1007 } 1008 if data[i] != '[' && data[i] != '(' { // not a link 1009 if tmpI > 0 { 1010 return tmpI 1011 } else { 1012 continue 1013 } 1014 } 1015 cc := data[i] 1016 i++ 1017 for i < len(data) && data[i] != cc { 1018 if tmpI == 0 && data[i] == c { 1019 return i 1020 } 1021 i++ 1022 } 1023 if i >= len(data) { 1024 return tmpI 1025 } 1026 i++ 1027 } 1028 } 1029 return 0 1030 } 1031 1032 func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int { 1033 i := 0 1034 1035 // skip one symbol if coming from emph3 1036 if len(data) > 1 && data[0] == c && data[1] == c { 1037 i = 1 1038 } 1039 1040 for i < len(data) { 1041 length := helperFindEmphChar(data[i:], c) 1042 if length == 0 { 1043 return 0 1044 } 1045 i += length 1046 if i >= len(data) { 1047 return 0 1048 } 1049 1050 if i+1 < len(data) && data[i+1] == c { 1051 i++ 1052 continue 1053 } 1054 1055 if data[i] == c && !isspace(data[i-1]) { 1056 1057 if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 { 1058 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { 1059 continue 1060 } 1061 } 1062 1063 var work bytes.Buffer 1064 p.inline(&work, data[:i]) 1065 p.r.Emphasis(out, work.Bytes()) 1066 return i + 1 1067 } 1068 } 1069 1070 return 0 1071 } 1072 1073 func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int { 1074 i := 0 1075 1076 for i < len(data) { 1077 length := helperFindEmphChar(data[i:], c) 1078 if length == 0 { 1079 return 0 1080 } 1081 i += length 1082 1083 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) { 1084 var work bytes.Buffer 1085 p.inline(&work, data[:i]) 1086 1087 if work.Len() > 0 { 1088 // pick the right renderer 1089 if c == '~' { 1090 p.r.StrikeThrough(out, work.Bytes()) 1091 } else { 1092 p.r.DoubleEmphasis(out, work.Bytes()) 1093 } 1094 } 1095 return i + 2 1096 } 1097 i++ 1098 } 1099 return 0 1100 } 1101 1102 func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int { 1103 i := 0 1104 origData := data 1105 data = data[offset:] 1106 1107 for i < len(data) { 1108 length := helperFindEmphChar(data[i:], c) 1109 if length == 0 { 1110 return 0 1111 } 1112 i += length 1113 1114 // skip whitespace preceded symbols 1115 if data[i] != c || isspace(data[i-1]) { 1116 continue 1117 } 1118 1119 switch { 1120 case i+2 < len(data) && data[i+1] == c && data[i+2] == c: 1121 // triple symbol found 1122 var work bytes.Buffer 1123 1124 p.inline(&work, data[:i]) 1125 if work.Len() > 0 { 1126 p.r.TripleEmphasis(out, work.Bytes()) 1127 } 1128 return i + 3 1129 case (i+1 < len(data) && data[i+1] == c): 1130 // double symbol found, hand over to emph1 1131 length = helperEmphasis(p, out, origData[offset-2:], c) 1132 if length == 0 { 1133 return 0 1134 } else { 1135 return length - 2 1136 } 1137 default: 1138 // single symbol found, hand over to emph2 1139 length = helperDoubleEmphasis(p, out, origData[offset-1:], c) 1140 if length == 0 { 1141 return 0 1142 } else { 1143 return length - 1 1144 } 1145 } 1146 } 1147 return 0 1148 }