github.com/ashishbhate/mattermost-server@v5.11.1+incompatible/utils/markdown/inlines.go (about) 1 // Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved. 2 // See License.txt for license information. 3 4 package markdown 5 6 import ( 7 "container/list" 8 "strings" 9 "unicode" 10 "unicode/utf8" 11 ) 12 13 type Inline interface { 14 IsInline() bool 15 } 16 17 type inlineBase struct{} 18 19 func (inlineBase) IsInline() bool { return true } 20 21 type Text struct { 22 inlineBase 23 24 Text string 25 Range Range 26 } 27 28 type CodeSpan struct { 29 inlineBase 30 31 Code string 32 } 33 34 type HardLineBreak struct { 35 inlineBase 36 } 37 38 type SoftLineBreak struct { 39 inlineBase 40 } 41 42 type InlineLinkOrImage struct { 43 inlineBase 44 45 Children []Inline 46 47 RawDestination Range 48 49 markdown string 50 rawTitle string 51 } 52 53 func (i *InlineLinkOrImage) Destination() string { 54 return Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End]) 55 } 56 57 func (i *InlineLinkOrImage) Title() string { 58 return Unescape(i.rawTitle) 59 } 60 61 type InlineLink struct { 62 InlineLinkOrImage 63 } 64 65 type InlineImage struct { 66 InlineLinkOrImage 67 } 68 69 type ReferenceLinkOrImage struct { 70 inlineBase 71 *ReferenceDefinition 72 73 Children []Inline 74 } 75 76 type ReferenceLink struct { 77 ReferenceLinkOrImage 78 } 79 80 type ReferenceImage struct { 81 ReferenceLinkOrImage 82 } 83 84 type Autolink struct { 85 inlineBase 86 87 Children []Inline 88 89 RawDestination Range 90 91 markdown string 92 } 93 94 func (i *Autolink) Destination() string { 95 destination := Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End]) 96 97 if strings.HasPrefix(destination, "www") { 98 destination = "http://" + destination 99 } 100 101 return destination 102 } 103 104 type delimiterType int 105 106 const ( 107 linkOpeningDelimiter delimiterType = iota 108 imageOpeningDelimiter 109 ) 110 111 type delimiter struct { 112 Type delimiterType 113 IsInactive bool 114 TextNode int 115 Range Range 116 } 117 118 type inlineParser struct { 119 markdown string 120 ranges []Range 121 referenceDefinitions []*ReferenceDefinition 122 123 raw string 124 position int 125 inlines []Inline 126 delimiterStack *list.List 127 } 128 129 func newInlineParser(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) *inlineParser { 130 return &inlineParser{ 131 markdown: markdown, 132 ranges: ranges, 133 referenceDefinitions: referenceDefinitions, 134 delimiterStack: list.New(), 135 } 136 } 137 138 func (p *inlineParser) parseBackticks() { 139 count := 1 140 for i := p.position + 1; i < len(p.raw) && p.raw[i] == '`'; i++ { 141 count++ 142 } 143 opening := p.raw[p.position : p.position+count] 144 search := p.position + count 145 for search < len(p.raw) { 146 end := strings.Index(p.raw[search:], opening) 147 if end == -1 { 148 break 149 } 150 if search+end+count < len(p.raw) && p.raw[search+end+count] == '`' { 151 search += end + count 152 for search < len(p.raw) && p.raw[search] == '`' { 153 search++ 154 } 155 continue 156 } 157 code := strings.Join(strings.Fields(p.raw[p.position+count:search+end]), " ") 158 p.position = search + end + count 159 p.inlines = append(p.inlines, &CodeSpan{ 160 Code: code, 161 }) 162 return 163 } 164 p.position += len(opening) 165 absPos := relativeToAbsolutePosition(p.ranges, p.position-len(opening)) 166 p.inlines = append(p.inlines, &Text{ 167 Text: opening, 168 Range: Range{absPos, absPos + len(opening)}, 169 }) 170 } 171 172 func (p *inlineParser) parseLineEnding() { 173 if p.position >= 1 && p.raw[p.position-1] == '\t' { 174 p.inlines = append(p.inlines, &HardLineBreak{}) 175 } else if p.position >= 2 && p.raw[p.position-1] == ' ' && (p.raw[p.position-2] == '\t' || p.raw[p.position-1] == ' ') { 176 p.inlines = append(p.inlines, &HardLineBreak{}) 177 } else { 178 p.inlines = append(p.inlines, &SoftLineBreak{}) 179 } 180 p.position++ 181 if p.position < len(p.raw) && p.raw[p.position] == '\n' { 182 p.position++ 183 } 184 } 185 186 func (p *inlineParser) parseEscapeCharacter() { 187 if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) { 188 absPos := relativeToAbsolutePosition(p.ranges, p.position+1) 189 p.inlines = append(p.inlines, &Text{ 190 Text: string(p.raw[p.position+1]), 191 Range: Range{absPos, absPos + len(string(p.raw[p.position+1]))}, 192 }) 193 p.position += 2 194 } else { 195 absPos := relativeToAbsolutePosition(p.ranges, p.position) 196 p.inlines = append(p.inlines, &Text{ 197 Text: `\`, 198 Range: Range{absPos, absPos + 1}, 199 }) 200 p.position++ 201 } 202 } 203 204 func (p *inlineParser) parseText() { 205 if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]wW:"); next == -1 { 206 absPos := relativeToAbsolutePosition(p.ranges, p.position) 207 p.inlines = append(p.inlines, &Text{ 208 Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace), 209 Range: Range{absPos, absPos + len(p.raw[p.position:])}, 210 }) 211 p.position = len(p.raw) 212 } else { 213 absPos := relativeToAbsolutePosition(p.ranges, p.position) 214 if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' { 215 s := strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace) 216 p.inlines = append(p.inlines, &Text{ 217 Text: s, 218 Range: Range{absPos, absPos + len(s)}, 219 }) 220 } else { 221 if next == 0 { 222 // Always read at least one character since 'w', 'W', and ':' may not actually match another 223 // type of node 224 next = 1 225 } 226 227 p.inlines = append(p.inlines, &Text{ 228 Text: p.raw[p.position : p.position+next], 229 Range: Range{absPos, absPos + next}, 230 }) 231 } 232 p.position += next 233 } 234 } 235 236 func (p *inlineParser) parseLinkOrImageDelimiter() { 237 absPos := relativeToAbsolutePosition(p.ranges, p.position) 238 if p.raw[p.position] == '[' { 239 p.inlines = append(p.inlines, &Text{ 240 Text: "[", 241 Range: Range{absPos, absPos + 1}, 242 }) 243 p.delimiterStack.PushBack(&delimiter{ 244 Type: linkOpeningDelimiter, 245 TextNode: len(p.inlines) - 1, 246 Range: Range{p.position, p.position + 1}, 247 }) 248 p.position++ 249 } else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' { 250 p.inlines = append(p.inlines, &Text{ 251 Text: "![", 252 Range: Range{absPos, absPos + 2}, 253 }) 254 p.delimiterStack.PushBack(&delimiter{ 255 Type: imageOpeningDelimiter, 256 TextNode: len(p.inlines) - 1, 257 Range: Range{p.position, p.position + 2}, 258 }) 259 p.position += 2 260 } else { 261 p.inlines = append(p.inlines, &Text{ 262 Text: "!", 263 Range: Range{absPos, absPos + 1}, 264 }) 265 p.position++ 266 } 267 } 268 269 func (p *inlineParser) peekAtInlineLinkDestinationAndTitle(position int, isImage bool) (destination, title Range, end int, ok bool) { 270 if position >= len(p.raw) || p.raw[position] != '(' { 271 return 272 } 273 position++ 274 275 destinationStart := nextNonWhitespace(p.raw, position) 276 if destinationStart >= len(p.raw) { 277 return 278 } else if p.raw[destinationStart] == ')' { 279 return Range{destinationStart, destinationStart}, Range{destinationStart, destinationStart}, destinationStart + 1, true 280 } 281 282 destination, end, ok = parseLinkDestination(p.raw, destinationStart) 283 if !ok { 284 return 285 } 286 position = end 287 288 if isImage && position < len(p.raw) && isWhitespaceByte(p.raw[position]) { 289 dimensionsStart := nextNonWhitespace(p.raw, position) 290 if dimensionsStart >= len(p.raw) { 291 return 292 } 293 294 if p.raw[dimensionsStart] == '=' { 295 // Read optional image dimensions even if we don't use them 296 _, end, ok = parseImageDimensions(p.raw, dimensionsStart) 297 if !ok { 298 return 299 } 300 301 position = end 302 } 303 } 304 305 if position < len(p.raw) && isWhitespaceByte(p.raw[position]) { 306 titleStart := nextNonWhitespace(p.raw, position) 307 if titleStart >= len(p.raw) { 308 return 309 } else if p.raw[titleStart] == ')' { 310 return destination, Range{titleStart, titleStart}, titleStart + 1, true 311 } 312 313 if p.raw[titleStart] == '"' || p.raw[titleStart] == '\'' || p.raw[titleStart] == '(' { 314 title, end, ok = parseLinkTitle(p.raw, titleStart) 315 if !ok { 316 return 317 } 318 position = end 319 } 320 } 321 322 closingPosition := nextNonWhitespace(p.raw, position) 323 if closingPosition >= len(p.raw) || p.raw[closingPosition] != ')' { 324 return Range{}, Range{}, 0, false 325 } 326 327 return destination, title, closingPosition + 1, true 328 } 329 330 func (p *inlineParser) referenceDefinition(label string) *ReferenceDefinition { 331 clean := strings.Join(strings.Fields(label), " ") 332 for _, d := range p.referenceDefinitions { 333 if strings.EqualFold(clean, strings.Join(strings.Fields(d.Label()), " ")) { 334 return d 335 } 336 } 337 return nil 338 } 339 340 func (p *inlineParser) lookForLinkOrImage() { 341 for element := p.delimiterStack.Back(); element != nil; element = element.Prev() { 342 d := element.Value.(*delimiter) 343 if d.Type != imageOpeningDelimiter && d.Type != linkOpeningDelimiter { 344 continue 345 } 346 if d.IsInactive { 347 p.delimiterStack.Remove(element) 348 break 349 } 350 351 isImage := d.Type == imageOpeningDelimiter 352 353 var inline Inline 354 355 if destination, title, next, ok := p.peekAtInlineLinkDestinationAndTitle(p.position+1, isImage); ok { 356 destinationMarkdownPosition := relativeToAbsolutePosition(p.ranges, destination.Position) 357 linkOrImage := InlineLinkOrImage{ 358 Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), 359 RawDestination: Range{destinationMarkdownPosition, destinationMarkdownPosition + destination.End - destination.Position}, 360 markdown: p.markdown, 361 rawTitle: p.raw[title.Position:title.End], 362 } 363 if d.Type == imageOpeningDelimiter { 364 inline = &InlineImage{linkOrImage} 365 } else { 366 inline = &InlineLink{linkOrImage} 367 } 368 p.position = next 369 } else { 370 referenceLabel := "" 371 label, next, hasLinkLabel := parseLinkLabel(p.raw, p.position+1) 372 if hasLinkLabel && label.End > label.Position { 373 referenceLabel = p.raw[label.Position:label.End] 374 } else { 375 referenceLabel = p.raw[d.Range.End:p.position] 376 if !hasLinkLabel { 377 next = p.position + 1 378 } 379 } 380 if referenceLabel != "" { 381 if reference := p.referenceDefinition(referenceLabel); reference != nil { 382 linkOrImage := ReferenceLinkOrImage{ 383 ReferenceDefinition: reference, 384 Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), 385 } 386 if d.Type == imageOpeningDelimiter { 387 inline = &ReferenceImage{linkOrImage} 388 } else { 389 inline = &ReferenceLink{linkOrImage} 390 } 391 p.position = next 392 } 393 } 394 } 395 396 if inline != nil { 397 if d.Type == imageOpeningDelimiter { 398 p.inlines = append(p.inlines[:d.TextNode], inline) 399 } else { 400 p.inlines = append(p.inlines[:d.TextNode], inline) 401 for inlineElement := element.Prev(); inlineElement != nil; inlineElement = inlineElement.Prev() { 402 if d := inlineElement.Value.(*delimiter); d.Type == linkOpeningDelimiter { 403 d.IsInactive = true 404 } 405 } 406 } 407 p.delimiterStack.Remove(element) 408 return 409 } else { 410 p.delimiterStack.Remove(element) 411 break 412 } 413 } 414 absPos := relativeToAbsolutePosition(p.ranges, p.position) 415 p.inlines = append(p.inlines, &Text{ 416 Text: "]", 417 Range: Range{absPos, absPos + 1}, 418 }) 419 p.position++ 420 } 421 422 func CharacterReference(ref string) string { 423 if ref == "" { 424 return "" 425 } 426 if ref[0] == '#' { 427 if len(ref) < 2 { 428 return "" 429 } 430 n := 0 431 if ref[1] == 'X' || ref[1] == 'x' { 432 if len(ref) < 3 { 433 return "" 434 } 435 for i := 2; i < len(ref); i++ { 436 if i > 9 { 437 return "" 438 } 439 d := ref[i] 440 switch { 441 case d >= '0' && d <= '9': 442 n = n*16 + int(d-'0') 443 case d >= 'a' && d <= 'f': 444 n = n*16 + 10 + int(d-'a') 445 case d >= 'A' && d <= 'F': 446 n = n*16 + 10 + int(d-'A') 447 default: 448 return "" 449 } 450 } 451 } else { 452 for i := 1; i < len(ref); i++ { 453 if i > 8 || ref[i] < '0' || ref[i] > '9' { 454 return "" 455 } 456 n = n*10 + int(ref[i]-'0') 457 } 458 } 459 c := rune(n) 460 if c == '\u0000' || !utf8.ValidRune(c) { 461 return string(unicode.ReplacementChar) 462 } 463 return string(c) 464 } 465 if entity, ok := htmlEntities[ref]; ok { 466 return entity 467 } 468 return "" 469 } 470 471 func (p *inlineParser) parseCharacterReference() { 472 absPos := relativeToAbsolutePosition(p.ranges, p.position) 473 p.position++ 474 if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 { 475 p.inlines = append(p.inlines, &Text{ 476 Text: "&", 477 Range: Range{absPos, absPos + 1}, 478 }) 479 } else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" { 480 p.position += semicolon + 1 481 p.inlines = append(p.inlines, &Text{ 482 Text: s, 483 Range: Range{absPos, absPos + len(s)}, 484 }) 485 } else { 486 p.inlines = append(p.inlines, &Text{ 487 Text: "&", 488 Range: Range{absPos, absPos + 1}, 489 }) 490 } 491 } 492 493 func (p *inlineParser) parseAutolink(c rune) bool { 494 for element := p.delimiterStack.Back(); element != nil; element = element.Prev() { 495 d := element.Value.(*delimiter) 496 if !d.IsInactive { 497 return false 498 } 499 } 500 501 var link Range 502 if c == ':' { 503 var ok bool 504 link, ok = parseURLAutolink(p.raw, p.position) 505 506 if !ok { 507 return false 508 } 509 510 // Since the current position is at the colon, we have to rewind the parsing slightly so that 511 // we don't duplicate the URL scheme 512 rewind := strings.Index(p.raw[link.Position:link.End], ":") 513 if rewind != -1 { 514 lastInline := p.inlines[len(p.inlines)-1] 515 lastText, ok := lastInline.(*Text) 516 517 if !ok { 518 // This should never occur since parseURLAutolink will only return a non-empty value 519 // when the previous text ends in a valid URL protocol which would mean that the previous 520 // node is a Text node 521 return false 522 } 523 524 p.inlines = p.inlines[0 : len(p.inlines)-1] 525 p.inlines = append(p.inlines, &Text{ 526 Text: lastText.Text[:len(lastText.Text)-rewind], 527 Range: Range{lastText.Range.Position, lastText.Range.End - rewind}, 528 }) 529 p.position -= rewind 530 } 531 } else if c == 'w' || c == 'W' { 532 var ok bool 533 link, ok = parseWWWAutolink(p.raw, p.position) 534 535 if !ok { 536 return false 537 } 538 } 539 540 linkMarkdownPosition := relativeToAbsolutePosition(p.ranges, link.Position) 541 linkRange := Range{linkMarkdownPosition, linkMarkdownPosition + link.End - link.Position} 542 543 p.inlines = append(p.inlines, &Autolink{ 544 Children: []Inline{ 545 &Text{ 546 Text: p.raw[link.Position:link.End], 547 Range: linkRange, 548 }, 549 }, 550 RawDestination: linkRange, 551 markdown: p.markdown, 552 }) 553 p.position += (link.End - link.Position) 554 555 return true 556 } 557 558 func (p *inlineParser) Parse() []Inline { 559 for _, r := range p.ranges { 560 p.raw += p.markdown[r.Position:r.End] 561 } 562 563 for p.position < len(p.raw) { 564 c, _ := utf8.DecodeRuneInString(p.raw[p.position:]) 565 566 switch c { 567 case '\r', '\n': 568 p.parseLineEnding() 569 case '\\': 570 p.parseEscapeCharacter() 571 case '`': 572 p.parseBackticks() 573 case '&': 574 p.parseCharacterReference() 575 case '!', '[': 576 p.parseLinkOrImageDelimiter() 577 case ']': 578 p.lookForLinkOrImage() 579 case 'w', 'W', ':': 580 matched := p.parseAutolink(c) 581 582 if !matched { 583 p.parseText() 584 } 585 default: 586 p.parseText() 587 } 588 } 589 590 return p.inlines 591 } 592 593 func ParseInlines(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) (inlines []Inline) { 594 return newInlineParser(markdown, ranges, referenceDefinitions).Parse() 595 } 596 597 func MergeInlineText(inlines []Inline) []Inline { 598 var ret []Inline 599 for i, v := range inlines { 600 // always add first node 601 if i == 0 { 602 ret = append(ret, v) 603 continue 604 } 605 // not a text node? nothing to merge 606 text, ok := v.(*Text) 607 if !ok { 608 ret = append(ret, v) 609 continue 610 } 611 // previous node is not a text node? nothing to merge 612 prevText, ok := ret[len(ret)-1].(*Text) 613 if !ok { 614 ret = append(ret, v) 615 continue 616 } 617 // previous node is not right before this one 618 if prevText.Range.End != text.Range.Position { 619 ret = append(ret, v) 620 continue 621 } 622 // we have two consecutive text nodes 623 ret[len(ret)-1] = &Text{ 624 Text: prevText.Text + text.Text, 625 Range: Range{prevText.Range.Position, text.Range.End}, 626 } 627 } 628 return ret 629 } 630 631 func Unescape(markdown string) string { 632 ret := "" 633 634 position := 0 635 for position < len(markdown) { 636 c, cSize := utf8.DecodeRuneInString(markdown[position:]) 637 638 switch c { 639 case '\\': 640 if position+1 < len(markdown) && isEscapableByte(markdown[position+1]) { 641 ret += string(markdown[position+1]) 642 position += 2 643 } else { 644 ret += `\` 645 position++ 646 } 647 case '&': 648 position++ 649 if semicolon := strings.IndexByte(markdown[position:], ';'); semicolon == -1 { 650 ret += "&" 651 } else if s := CharacterReference(markdown[position : position+semicolon]); s != "" { 652 position += semicolon + 1 653 ret += s 654 } else { 655 ret += "&" 656 } 657 default: 658 ret += string(c) 659 position += cSize 660 } 661 } 662 663 return ret 664 }