github.com/mattermost/mattermost-server/v5@v5.39.3/shared/markdown/inlines.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package markdown
     5  
     6  import (
     7  	"container/list"
     8  	"strings"
     9  	"unicode"
    10  	"unicode/utf8"
    11  )
    12  
    13  type Inline interface {
    14  	IsInline() bool
    15  }
    16  
    17  type inlineBase struct{}
    18  
    19  func (inlineBase) IsInline() bool { return true }
    20  
    21  type Text struct {
    22  	inlineBase
    23  
    24  	Text  string
    25  	Range Range
    26  }
    27  
    28  type CodeSpan struct {
    29  	inlineBase
    30  
    31  	Code string
    32  }
    33  
    34  type HardLineBreak struct {
    35  	inlineBase
    36  }
    37  
    38  type SoftLineBreak struct {
    39  	inlineBase
    40  }
    41  
    42  type InlineLinkOrImage struct {
    43  	inlineBase
    44  
    45  	Children []Inline
    46  
    47  	RawDestination Range
    48  
    49  	markdown string
    50  	rawTitle string
    51  }
    52  
    53  func (i *InlineLinkOrImage) Destination() string {
    54  	return Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End])
    55  }
    56  
    57  func (i *InlineLinkOrImage) Title() string {
    58  	return Unescape(i.rawTitle)
    59  }
    60  
    61  type InlineLink struct {
    62  	InlineLinkOrImage
    63  }
    64  
    65  type InlineImage struct {
    66  	InlineLinkOrImage
    67  }
    68  
    69  type ReferenceLinkOrImage struct {
    70  	inlineBase
    71  	*ReferenceDefinition
    72  
    73  	Children []Inline
    74  }
    75  
    76  type ReferenceLink struct {
    77  	ReferenceLinkOrImage
    78  }
    79  
    80  type ReferenceImage struct {
    81  	ReferenceLinkOrImage
    82  }
    83  
    84  type Autolink struct {
    85  	inlineBase
    86  
    87  	Children []Inline
    88  
    89  	RawDestination Range
    90  
    91  	markdown string
    92  }
    93  
    94  func (i *Autolink) Destination() string {
    95  	destination := Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End])
    96  
    97  	if strings.HasPrefix(destination, "www") {
    98  		destination = "http://" + destination
    99  	}
   100  
   101  	return destination
   102  }
   103  
   104  type delimiterType int
   105  
   106  const (
   107  	linkOpeningDelimiter delimiterType = iota
   108  	imageOpeningDelimiter
   109  )
   110  
   111  type delimiter struct {
   112  	Type       delimiterType
   113  	IsInactive bool
   114  	TextNode   int
   115  	Range      Range
   116  }
   117  
   118  type inlineParser struct {
   119  	markdown             string
   120  	ranges               []Range
   121  	referenceDefinitions []*ReferenceDefinition
   122  
   123  	raw            string
   124  	position       int
   125  	inlines        []Inline
   126  	delimiterStack *list.List
   127  }
   128  
   129  func newInlineParser(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) *inlineParser {
   130  	return &inlineParser{
   131  		markdown:             markdown,
   132  		ranges:               ranges,
   133  		referenceDefinitions: referenceDefinitions,
   134  		delimiterStack:       list.New(),
   135  	}
   136  }
   137  
   138  func (p *inlineParser) parseBackticks() {
   139  	count := 1
   140  	for i := p.position + 1; i < len(p.raw) && p.raw[i] == '`'; i++ {
   141  		count++
   142  	}
   143  	opening := p.raw[p.position : p.position+count]
   144  	search := p.position + count
   145  	for search < len(p.raw) {
   146  		end := strings.Index(p.raw[search:], opening)
   147  		if end == -1 {
   148  			break
   149  		}
   150  		if search+end+count < len(p.raw) && p.raw[search+end+count] == '`' {
   151  			search += end + count
   152  			for search < len(p.raw) && p.raw[search] == '`' {
   153  				search++
   154  			}
   155  			continue
   156  		}
   157  		code := strings.Join(strings.Fields(p.raw[p.position+count:search+end]), " ")
   158  		p.position = search + end + count
   159  		p.inlines = append(p.inlines, &CodeSpan{
   160  			Code: code,
   161  		})
   162  		return
   163  	}
   164  	p.position += len(opening)
   165  	absPos := relativeToAbsolutePosition(p.ranges, p.position-len(opening))
   166  	p.inlines = append(p.inlines, &Text{
   167  		Text:  opening,
   168  		Range: Range{absPos, absPos + len(opening)},
   169  	})
   170  }
   171  
   172  func (p *inlineParser) parseLineEnding() {
   173  	if p.position >= 1 && p.raw[p.position-1] == '\t' {
   174  		p.inlines = append(p.inlines, &HardLineBreak{})
   175  	} else if p.position >= 2 && p.raw[p.position-1] == ' ' && (p.raw[p.position-2] == '\t' || p.raw[p.position-1] == ' ') {
   176  		p.inlines = append(p.inlines, &HardLineBreak{})
   177  	} else {
   178  		p.inlines = append(p.inlines, &SoftLineBreak{})
   179  	}
   180  	p.position++
   181  	if p.position < len(p.raw) && p.raw[p.position] == '\n' {
   182  		p.position++
   183  	}
   184  }
   185  
   186  func (p *inlineParser) parseEscapeCharacter() {
   187  	if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) {
   188  		absPos := relativeToAbsolutePosition(p.ranges, p.position+1)
   189  		p.inlines = append(p.inlines, &Text{
   190  			Text:  string(p.raw[p.position+1]),
   191  			Range: Range{absPos, absPos + len(string(p.raw[p.position+1]))},
   192  		})
   193  		p.position += 2
   194  	} else {
   195  		absPos := relativeToAbsolutePosition(p.ranges, p.position)
   196  		p.inlines = append(p.inlines, &Text{
   197  			Text:  `\`,
   198  			Range: Range{absPos, absPos + 1},
   199  		})
   200  		p.position++
   201  	}
   202  }
   203  
   204  func (p *inlineParser) parseText() {
   205  	if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]wW:"); next == -1 {
   206  		absPos := relativeToAbsolutePosition(p.ranges, p.position)
   207  		p.inlines = append(p.inlines, &Text{
   208  			Text:  strings.TrimRightFunc(p.raw[p.position:], isWhitespace),
   209  			Range: Range{absPos, absPos + len(p.raw[p.position:])},
   210  		})
   211  		p.position = len(p.raw)
   212  	} else {
   213  		absPos := relativeToAbsolutePosition(p.ranges, p.position)
   214  		if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' {
   215  			s := strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace)
   216  			p.inlines = append(p.inlines, &Text{
   217  				Text:  s,
   218  				Range: Range{absPos, absPos + len(s)},
   219  			})
   220  		} else {
   221  			if next == 0 {
   222  				// Always read at least one character since 'w', 'W', and ':' may not actually match another
   223  				// type of node
   224  				next = 1
   225  			}
   226  
   227  			p.inlines = append(p.inlines, &Text{
   228  				Text:  p.raw[p.position : p.position+next],
   229  				Range: Range{absPos, absPos + next},
   230  			})
   231  		}
   232  		p.position += next
   233  	}
   234  }
   235  
   236  func (p *inlineParser) parseLinkOrImageDelimiter() {
   237  	absPos := relativeToAbsolutePosition(p.ranges, p.position)
   238  	if p.raw[p.position] == '[' {
   239  		p.inlines = append(p.inlines, &Text{
   240  			Text:  "[",
   241  			Range: Range{absPos, absPos + 1},
   242  		})
   243  		p.delimiterStack.PushBack(&delimiter{
   244  			Type:     linkOpeningDelimiter,
   245  			TextNode: len(p.inlines) - 1,
   246  			Range:    Range{p.position, p.position + 1},
   247  		})
   248  		p.position++
   249  	} else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' {
   250  		p.inlines = append(p.inlines, &Text{
   251  			Text:  "![",
   252  			Range: Range{absPos, absPos + 2},
   253  		})
   254  		p.delimiterStack.PushBack(&delimiter{
   255  			Type:     imageOpeningDelimiter,
   256  			TextNode: len(p.inlines) - 1,
   257  			Range:    Range{p.position, p.position + 2},
   258  		})
   259  		p.position += 2
   260  	} else {
   261  		p.inlines = append(p.inlines, &Text{
   262  			Text:  "!",
   263  			Range: Range{absPos, absPos + 1},
   264  		})
   265  		p.position++
   266  	}
   267  }
   268  
   269  func (p *inlineParser) peekAtInlineLinkDestinationAndTitle(position int, isImage bool) (destination, title Range, end int, ok bool) {
   270  	if position >= len(p.raw) || p.raw[position] != '(' {
   271  		return
   272  	}
   273  	position++
   274  
   275  	destinationStart := nextNonWhitespace(p.raw, position)
   276  	if destinationStart >= len(p.raw) {
   277  		return
   278  	} else if p.raw[destinationStart] == ')' {
   279  		return Range{destinationStart, destinationStart}, Range{destinationStart, destinationStart}, destinationStart + 1, true
   280  	}
   281  
   282  	destination, end, ok = parseLinkDestination(p.raw, destinationStart)
   283  	if !ok {
   284  		return
   285  	}
   286  	position = end
   287  
   288  	if isImage && position < len(p.raw) && isWhitespaceByte(p.raw[position]) {
   289  		dimensionsStart := nextNonWhitespace(p.raw, position)
   290  		if dimensionsStart >= len(p.raw) {
   291  			return
   292  		}
   293  
   294  		if p.raw[dimensionsStart] == '=' {
   295  			// Read optional image dimensions even if we don't use them
   296  			_, end, ok = parseImageDimensions(p.raw, dimensionsStart)
   297  			if !ok {
   298  				return
   299  			}
   300  
   301  			position = end
   302  		}
   303  	}
   304  
   305  	if position < len(p.raw) && isWhitespaceByte(p.raw[position]) {
   306  		titleStart := nextNonWhitespace(p.raw, position)
   307  		if titleStart >= len(p.raw) {
   308  			return
   309  		} else if p.raw[titleStart] == ')' {
   310  			return destination, Range{titleStart, titleStart}, titleStart + 1, true
   311  		}
   312  
   313  		if p.raw[titleStart] == '"' || p.raw[titleStart] == '\'' || p.raw[titleStart] == '(' {
   314  			title, end, ok = parseLinkTitle(p.raw, titleStart)
   315  			if !ok {
   316  				return
   317  			}
   318  			position = end
   319  		}
   320  	}
   321  
   322  	closingPosition := nextNonWhitespace(p.raw, position)
   323  	if closingPosition >= len(p.raw) || p.raw[closingPosition] != ')' {
   324  		return Range{}, Range{}, 0, false
   325  	}
   326  
   327  	return destination, title, closingPosition + 1, true
   328  }
   329  
   330  func (p *inlineParser) referenceDefinition(label string) *ReferenceDefinition {
   331  	clean := strings.Join(strings.Fields(label), " ")
   332  	for _, d := range p.referenceDefinitions {
   333  		if strings.EqualFold(clean, strings.Join(strings.Fields(d.Label()), " ")) {
   334  			return d
   335  		}
   336  	}
   337  	return nil
   338  }
   339  
   340  func (p *inlineParser) lookForLinkOrImage() {
   341  	for element := p.delimiterStack.Back(); element != nil; element = element.Prev() {
   342  		d := element.Value.(*delimiter)
   343  		if d.Type != imageOpeningDelimiter && d.Type != linkOpeningDelimiter {
   344  			continue
   345  		}
   346  		if d.IsInactive {
   347  			p.delimiterStack.Remove(element)
   348  			break
   349  		}
   350  
   351  		isImage := d.Type == imageOpeningDelimiter
   352  
   353  		var inline Inline
   354  
   355  		if destination, title, next, ok := p.peekAtInlineLinkDestinationAndTitle(p.position+1, isImage); ok {
   356  			destinationMarkdownPosition := relativeToAbsolutePosition(p.ranges, destination.Position)
   357  			linkOrImage := InlineLinkOrImage{
   358  				Children:       append([]Inline(nil), p.inlines[d.TextNode+1:]...),
   359  				RawDestination: Range{destinationMarkdownPosition, destinationMarkdownPosition + destination.End - destination.Position},
   360  				markdown:       p.markdown,
   361  				rawTitle:       p.raw[title.Position:title.End],
   362  			}
   363  			if d.Type == imageOpeningDelimiter {
   364  				inline = &InlineImage{linkOrImage}
   365  			} else {
   366  				inline = &InlineLink{linkOrImage}
   367  			}
   368  			p.position = next
   369  		} else {
   370  			referenceLabel := ""
   371  			label, next, hasLinkLabel := parseLinkLabel(p.raw, p.position+1)
   372  			if hasLinkLabel && label.End > label.Position {
   373  				referenceLabel = p.raw[label.Position:label.End]
   374  			} else {
   375  				referenceLabel = p.raw[d.Range.End:p.position]
   376  				if !hasLinkLabel {
   377  					next = p.position + 1
   378  				}
   379  			}
   380  			if referenceLabel != "" {
   381  				if reference := p.referenceDefinition(referenceLabel); reference != nil {
   382  					linkOrImage := ReferenceLinkOrImage{
   383  						ReferenceDefinition: reference,
   384  						Children:            append([]Inline(nil), p.inlines[d.TextNode+1:]...),
   385  					}
   386  					if d.Type == imageOpeningDelimiter {
   387  						inline = &ReferenceImage{linkOrImage}
   388  					} else {
   389  						inline = &ReferenceLink{linkOrImage}
   390  					}
   391  					p.position = next
   392  				}
   393  			}
   394  		}
   395  
   396  		if inline != nil {
   397  			if d.Type == imageOpeningDelimiter {
   398  				p.inlines = append(p.inlines[:d.TextNode], inline)
   399  			} else {
   400  				p.inlines = append(p.inlines[:d.TextNode], inline)
   401  				for inlineElement := element.Prev(); inlineElement != nil; inlineElement = inlineElement.Prev() {
   402  					if d := inlineElement.Value.(*delimiter); d.Type == linkOpeningDelimiter {
   403  						d.IsInactive = true
   404  					}
   405  				}
   406  			}
   407  			p.delimiterStack.Remove(element)
   408  			return
   409  		}
   410  		p.delimiterStack.Remove(element)
   411  		break
   412  	}
   413  	absPos := relativeToAbsolutePosition(p.ranges, p.position)
   414  	p.inlines = append(p.inlines, &Text{
   415  		Text:  "]",
   416  		Range: Range{absPos, absPos + 1},
   417  	})
   418  	p.position++
   419  }
   420  
   421  func CharacterReference(ref string) string {
   422  	if ref == "" {
   423  		return ""
   424  	}
   425  	if ref[0] == '#' {
   426  		if len(ref) < 2 {
   427  			return ""
   428  		}
   429  		n := 0
   430  		if ref[1] == 'X' || ref[1] == 'x' {
   431  			if len(ref) < 3 {
   432  				return ""
   433  			}
   434  			for i := 2; i < len(ref); i++ {
   435  				if i > 9 {
   436  					return ""
   437  				}
   438  				d := ref[i]
   439  				switch {
   440  				case d >= '0' && d <= '9':
   441  					n = n*16 + int(d-'0')
   442  				case d >= 'a' && d <= 'f':
   443  					n = n*16 + 10 + int(d-'a')
   444  				case d >= 'A' && d <= 'F':
   445  					n = n*16 + 10 + int(d-'A')
   446  				default:
   447  					return ""
   448  				}
   449  			}
   450  		} else {
   451  			for i := 1; i < len(ref); i++ {
   452  				if i > 8 || ref[i] < '0' || ref[i] > '9' {
   453  					return ""
   454  				}
   455  				n = n*10 + int(ref[i]-'0')
   456  			}
   457  		}
   458  		c := rune(n)
   459  		if c == '\u0000' || !utf8.ValidRune(c) {
   460  			return string(unicode.ReplacementChar)
   461  		}
   462  		return string(c)
   463  	}
   464  	if entity, ok := htmlEntities[ref]; ok {
   465  		return entity
   466  	}
   467  	return ""
   468  }
   469  
   470  func (p *inlineParser) parseCharacterReference() {
   471  	absPos := relativeToAbsolutePosition(p.ranges, p.position)
   472  	p.position++
   473  	if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 {
   474  		p.inlines = append(p.inlines, &Text{
   475  			Text:  "&",
   476  			Range: Range{absPos, absPos + 1},
   477  		})
   478  	} else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" {
   479  		p.position += semicolon + 1
   480  		p.inlines = append(p.inlines, &Text{
   481  			Text:  s,
   482  			Range: Range{absPos, absPos + len(s)},
   483  		})
   484  	} else {
   485  		p.inlines = append(p.inlines, &Text{
   486  			Text:  "&",
   487  			Range: Range{absPos, absPos + 1},
   488  		})
   489  	}
   490  }
   491  
   492  func (p *inlineParser) parseAutolink(c rune) bool {
   493  	for element := p.delimiterStack.Back(); element != nil; element = element.Prev() {
   494  		d := element.Value.(*delimiter)
   495  		if !d.IsInactive {
   496  			return false
   497  		}
   498  	}
   499  
   500  	var link Range
   501  	if c == ':' {
   502  		var ok bool
   503  		link, ok = parseURLAutolink(p.raw, p.position)
   504  
   505  		if !ok {
   506  			return false
   507  		}
   508  
   509  		// Since the current position is at the colon, we have to rewind the parsing slightly so that
   510  		// we don't duplicate the URL scheme
   511  		rewind := strings.Index(p.raw[link.Position:link.End], ":")
   512  		if rewind != -1 {
   513  			lastInline := p.inlines[len(p.inlines)-1]
   514  			lastText, ok := lastInline.(*Text)
   515  
   516  			if !ok {
   517  				// This should never occur since parseURLAutolink will only return a non-empty value
   518  				// when the previous text ends in a valid URL protocol which would mean that the previous
   519  				// node is a Text node
   520  				return false
   521  			}
   522  
   523  			p.inlines = p.inlines[0 : len(p.inlines)-1]
   524  			p.inlines = append(p.inlines, &Text{
   525  				Text:  lastText.Text[:len(lastText.Text)-rewind],
   526  				Range: Range{lastText.Range.Position, lastText.Range.End - rewind},
   527  			})
   528  			p.position -= rewind
   529  		}
   530  	} else if c == 'w' || c == 'W' {
   531  		var ok bool
   532  		link, ok = parseWWWAutolink(p.raw, p.position)
   533  
   534  		if !ok {
   535  			return false
   536  		}
   537  	}
   538  
   539  	linkMarkdownPosition := relativeToAbsolutePosition(p.ranges, link.Position)
   540  	linkRange := Range{linkMarkdownPosition, linkMarkdownPosition + link.End - link.Position}
   541  
   542  	p.inlines = append(p.inlines, &Autolink{
   543  		Children: []Inline{
   544  			&Text{
   545  				Text:  p.raw[link.Position:link.End],
   546  				Range: linkRange,
   547  			},
   548  		},
   549  		RawDestination: linkRange,
   550  		markdown:       p.markdown,
   551  	})
   552  	p.position += (link.End - link.Position)
   553  
   554  	return true
   555  }
   556  
   557  func (p *inlineParser) Parse() []Inline {
   558  	for _, r := range p.ranges {
   559  		p.raw += p.markdown[r.Position:r.End]
   560  	}
   561  
   562  	for p.position < len(p.raw) {
   563  		c, _ := utf8.DecodeRuneInString(p.raw[p.position:])
   564  
   565  		switch c {
   566  		case '\r', '\n':
   567  			p.parseLineEnding()
   568  		case '\\':
   569  			p.parseEscapeCharacter()
   570  		case '`':
   571  			p.parseBackticks()
   572  		case '&':
   573  			p.parseCharacterReference()
   574  		case '!', '[':
   575  			p.parseLinkOrImageDelimiter()
   576  		case ']':
   577  			p.lookForLinkOrImage()
   578  		case 'w', 'W', ':':
   579  			matched := p.parseAutolink(c)
   580  
   581  			if !matched {
   582  				p.parseText()
   583  			}
   584  		default:
   585  			p.parseText()
   586  		}
   587  	}
   588  
   589  	return p.inlines
   590  }
   591  
   592  func ParseInlines(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) (inlines []Inline) {
   593  	return newInlineParser(markdown, ranges, referenceDefinitions).Parse()
   594  }
   595  
   596  func MergeInlineText(inlines []Inline) []Inline {
   597  	ret := inlines[:0]
   598  	for i, v := range inlines {
   599  		// always add first node
   600  		if i == 0 {
   601  			ret = append(ret, v)
   602  			continue
   603  		}
   604  		// not a text node? nothing to merge
   605  		text, ok := v.(*Text)
   606  		if !ok {
   607  			ret = append(ret, v)
   608  			continue
   609  		}
   610  		// previous node is not a text node? nothing to merge
   611  		prevText, ok := ret[len(ret)-1].(*Text)
   612  		if !ok {
   613  			ret = append(ret, v)
   614  			continue
   615  		}
   616  		// previous node is not right before this one
   617  		if prevText.Range.End != text.Range.Position {
   618  			ret = append(ret, v)
   619  			continue
   620  		}
   621  		// we have two consecutive text nodes
   622  		ret[len(ret)-1] = &Text{
   623  			Text:  prevText.Text + text.Text,
   624  			Range: Range{prevText.Range.Position, text.Range.End},
   625  		}
   626  	}
   627  	return ret
   628  }
   629  
   630  func Unescape(markdown string) string {
   631  	ret := ""
   632  
   633  	position := 0
   634  	for position < len(markdown) {
   635  		c, cSize := utf8.DecodeRuneInString(markdown[position:])
   636  
   637  		switch c {
   638  		case '\\':
   639  			if position+1 < len(markdown) && isEscapableByte(markdown[position+1]) {
   640  				ret += string(markdown[position+1])
   641  				position += 2
   642  			} else {
   643  				ret += `\`
   644  				position++
   645  			}
   646  		case '&':
   647  			position++
   648  			if semicolon := strings.IndexByte(markdown[position:], ';'); semicolon == -1 {
   649  				ret += "&"
   650  			} else if s := CharacterReference(markdown[position : position+semicolon]); s != "" {
   651  				position += semicolon + 1
   652  				ret += s
   653  			} else {
   654  				ret += "&"
   655  			}
   656  		default:
   657  			ret += string(c)
   658  			position += cSize
   659  		}
   660  	}
   661  
   662  	return ret
   663  }