github.com/ashishbhate/mattermost-server@v5.11.1+incompatible/utils/markdown/inlines.go (about)

     1  // Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved.
     2  // See License.txt for license information.
     3  
     4  package markdown
     5  
     6  import (
     7  	"container/list"
     8  	"strings"
     9  	"unicode"
    10  	"unicode/utf8"
    11  )
    12  
    13  type Inline interface {
    14  	IsInline() bool
    15  }
    16  
    17  type inlineBase struct{}
    18  
    19  func (inlineBase) IsInline() bool { return true }
    20  
    21  type Text struct {
    22  	inlineBase
    23  
    24  	Text  string
    25  	Range Range
    26  }
    27  
    28  type CodeSpan struct {
    29  	inlineBase
    30  
    31  	Code string
    32  }
    33  
    34  type HardLineBreak struct {
    35  	inlineBase
    36  }
    37  
    38  type SoftLineBreak struct {
    39  	inlineBase
    40  }
    41  
    42  type InlineLinkOrImage struct {
    43  	inlineBase
    44  
    45  	Children []Inline
    46  
    47  	RawDestination Range
    48  
    49  	markdown string
    50  	rawTitle string
    51  }
    52  
    53  func (i *InlineLinkOrImage) Destination() string {
    54  	return Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End])
    55  }
    56  
    57  func (i *InlineLinkOrImage) Title() string {
    58  	return Unescape(i.rawTitle)
    59  }
    60  
    61  type InlineLink struct {
    62  	InlineLinkOrImage
    63  }
    64  
    65  type InlineImage struct {
    66  	InlineLinkOrImage
    67  }
    68  
    69  type ReferenceLinkOrImage struct {
    70  	inlineBase
    71  	*ReferenceDefinition
    72  
    73  	Children []Inline
    74  }
    75  
    76  type ReferenceLink struct {
    77  	ReferenceLinkOrImage
    78  }
    79  
    80  type ReferenceImage struct {
    81  	ReferenceLinkOrImage
    82  }
    83  
    84  type Autolink struct {
    85  	inlineBase
    86  
    87  	Children []Inline
    88  
    89  	RawDestination Range
    90  
    91  	markdown string
    92  }
    93  
    94  func (i *Autolink) Destination() string {
    95  	destination := Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End])
    96  
    97  	if strings.HasPrefix(destination, "www") {
    98  		destination = "http://" + destination
    99  	}
   100  
   101  	return destination
   102  }
   103  
   104  type delimiterType int
   105  
   106  const (
   107  	linkOpeningDelimiter delimiterType = iota
   108  	imageOpeningDelimiter
   109  )
   110  
   111  type delimiter struct {
   112  	Type       delimiterType
   113  	IsInactive bool
   114  	TextNode   int
   115  	Range      Range
   116  }
   117  
   118  type inlineParser struct {
   119  	markdown             string
   120  	ranges               []Range
   121  	referenceDefinitions []*ReferenceDefinition
   122  
   123  	raw            string
   124  	position       int
   125  	inlines        []Inline
   126  	delimiterStack *list.List
   127  }
   128  
   129  func newInlineParser(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) *inlineParser {
   130  	return &inlineParser{
   131  		markdown:             markdown,
   132  		ranges:               ranges,
   133  		referenceDefinitions: referenceDefinitions,
   134  		delimiterStack:       list.New(),
   135  	}
   136  }
   137  
   138  func (p *inlineParser) parseBackticks() {
   139  	count := 1
   140  	for i := p.position + 1; i < len(p.raw) && p.raw[i] == '`'; i++ {
   141  		count++
   142  	}
   143  	opening := p.raw[p.position : p.position+count]
   144  	search := p.position + count
   145  	for search < len(p.raw) {
   146  		end := strings.Index(p.raw[search:], opening)
   147  		if end == -1 {
   148  			break
   149  		}
   150  		if search+end+count < len(p.raw) && p.raw[search+end+count] == '`' {
   151  			search += end + count
   152  			for search < len(p.raw) && p.raw[search] == '`' {
   153  				search++
   154  			}
   155  			continue
   156  		}
   157  		code := strings.Join(strings.Fields(p.raw[p.position+count:search+end]), " ")
   158  		p.position = search + end + count
   159  		p.inlines = append(p.inlines, &CodeSpan{
   160  			Code: code,
   161  		})
   162  		return
   163  	}
   164  	p.position += len(opening)
   165  	absPos := relativeToAbsolutePosition(p.ranges, p.position-len(opening))
   166  	p.inlines = append(p.inlines, &Text{
   167  		Text:  opening,
   168  		Range: Range{absPos, absPos + len(opening)},
   169  	})
   170  }
   171  
   172  func (p *inlineParser) parseLineEnding() {
   173  	if p.position >= 1 && p.raw[p.position-1] == '\t' {
   174  		p.inlines = append(p.inlines, &HardLineBreak{})
   175  	} else if p.position >= 2 && p.raw[p.position-1] == ' ' && (p.raw[p.position-2] == '\t' || p.raw[p.position-1] == ' ') {
   176  		p.inlines = append(p.inlines, &HardLineBreak{})
   177  	} else {
   178  		p.inlines = append(p.inlines, &SoftLineBreak{})
   179  	}
   180  	p.position++
   181  	if p.position < len(p.raw) && p.raw[p.position] == '\n' {
   182  		p.position++
   183  	}
   184  }
   185  
   186  func (p *inlineParser) parseEscapeCharacter() {
   187  	if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) {
   188  		absPos := relativeToAbsolutePosition(p.ranges, p.position+1)
   189  		p.inlines = append(p.inlines, &Text{
   190  			Text:  string(p.raw[p.position+1]),
   191  			Range: Range{absPos, absPos + len(string(p.raw[p.position+1]))},
   192  		})
   193  		p.position += 2
   194  	} else {
   195  		absPos := relativeToAbsolutePosition(p.ranges, p.position)
   196  		p.inlines = append(p.inlines, &Text{
   197  			Text:  `\`,
   198  			Range: Range{absPos, absPos + 1},
   199  		})
   200  		p.position++
   201  	}
   202  }
   203  
   204  func (p *inlineParser) parseText() {
   205  	if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]wW:"); next == -1 {
   206  		absPos := relativeToAbsolutePosition(p.ranges, p.position)
   207  		p.inlines = append(p.inlines, &Text{
   208  			Text:  strings.TrimRightFunc(p.raw[p.position:], isWhitespace),
   209  			Range: Range{absPos, absPos + len(p.raw[p.position:])},
   210  		})
   211  		p.position = len(p.raw)
   212  	} else {
   213  		absPos := relativeToAbsolutePosition(p.ranges, p.position)
   214  		if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' {
   215  			s := strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace)
   216  			p.inlines = append(p.inlines, &Text{
   217  				Text:  s,
   218  				Range: Range{absPos, absPos + len(s)},
   219  			})
   220  		} else {
   221  			if next == 0 {
   222  				// Always read at least one character since 'w', 'W', and ':' may not actually match another
   223  				// type of node
   224  				next = 1
   225  			}
   226  
   227  			p.inlines = append(p.inlines, &Text{
   228  				Text:  p.raw[p.position : p.position+next],
   229  				Range: Range{absPos, absPos + next},
   230  			})
   231  		}
   232  		p.position += next
   233  	}
   234  }
   235  
   236  func (p *inlineParser) parseLinkOrImageDelimiter() {
   237  	absPos := relativeToAbsolutePosition(p.ranges, p.position)
   238  	if p.raw[p.position] == '[' {
   239  		p.inlines = append(p.inlines, &Text{
   240  			Text:  "[",
   241  			Range: Range{absPos, absPos + 1},
   242  		})
   243  		p.delimiterStack.PushBack(&delimiter{
   244  			Type:     linkOpeningDelimiter,
   245  			TextNode: len(p.inlines) - 1,
   246  			Range:    Range{p.position, p.position + 1},
   247  		})
   248  		p.position++
   249  	} else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' {
   250  		p.inlines = append(p.inlines, &Text{
   251  			Text:  "![",
   252  			Range: Range{absPos, absPos + 2},
   253  		})
   254  		p.delimiterStack.PushBack(&delimiter{
   255  			Type:     imageOpeningDelimiter,
   256  			TextNode: len(p.inlines) - 1,
   257  			Range:    Range{p.position, p.position + 2},
   258  		})
   259  		p.position += 2
   260  	} else {
   261  		p.inlines = append(p.inlines, &Text{
   262  			Text:  "!",
   263  			Range: Range{absPos, absPos + 1},
   264  		})
   265  		p.position++
   266  	}
   267  }
   268  
   269  func (p *inlineParser) peekAtInlineLinkDestinationAndTitle(position int, isImage bool) (destination, title Range, end int, ok bool) {
   270  	if position >= len(p.raw) || p.raw[position] != '(' {
   271  		return
   272  	}
   273  	position++
   274  
   275  	destinationStart := nextNonWhitespace(p.raw, position)
   276  	if destinationStart >= len(p.raw) {
   277  		return
   278  	} else if p.raw[destinationStart] == ')' {
   279  		return Range{destinationStart, destinationStart}, Range{destinationStart, destinationStart}, destinationStart + 1, true
   280  	}
   281  
   282  	destination, end, ok = parseLinkDestination(p.raw, destinationStart)
   283  	if !ok {
   284  		return
   285  	}
   286  	position = end
   287  
   288  	if isImage && position < len(p.raw) && isWhitespaceByte(p.raw[position]) {
   289  		dimensionsStart := nextNonWhitespace(p.raw, position)
   290  		if dimensionsStart >= len(p.raw) {
   291  			return
   292  		}
   293  
   294  		if p.raw[dimensionsStart] == '=' {
   295  			// Read optional image dimensions even if we don't use them
   296  			_, end, ok = parseImageDimensions(p.raw, dimensionsStart)
   297  			if !ok {
   298  				return
   299  			}
   300  
   301  			position = end
   302  		}
   303  	}
   304  
   305  	if position < len(p.raw) && isWhitespaceByte(p.raw[position]) {
   306  		titleStart := nextNonWhitespace(p.raw, position)
   307  		if titleStart >= len(p.raw) {
   308  			return
   309  		} else if p.raw[titleStart] == ')' {
   310  			return destination, Range{titleStart, titleStart}, titleStart + 1, true
   311  		}
   312  
   313  		if p.raw[titleStart] == '"' || p.raw[titleStart] == '\'' || p.raw[titleStart] == '(' {
   314  			title, end, ok = parseLinkTitle(p.raw, titleStart)
   315  			if !ok {
   316  				return
   317  			}
   318  			position = end
   319  		}
   320  	}
   321  
   322  	closingPosition := nextNonWhitespace(p.raw, position)
   323  	if closingPosition >= len(p.raw) || p.raw[closingPosition] != ')' {
   324  		return Range{}, Range{}, 0, false
   325  	}
   326  
   327  	return destination, title, closingPosition + 1, true
   328  }
   329  
   330  func (p *inlineParser) referenceDefinition(label string) *ReferenceDefinition {
   331  	clean := strings.Join(strings.Fields(label), " ")
   332  	for _, d := range p.referenceDefinitions {
   333  		if strings.EqualFold(clean, strings.Join(strings.Fields(d.Label()), " ")) {
   334  			return d
   335  		}
   336  	}
   337  	return nil
   338  }
   339  
   340  func (p *inlineParser) lookForLinkOrImage() {
   341  	for element := p.delimiterStack.Back(); element != nil; element = element.Prev() {
   342  		d := element.Value.(*delimiter)
   343  		if d.Type != imageOpeningDelimiter && d.Type != linkOpeningDelimiter {
   344  			continue
   345  		}
   346  		if d.IsInactive {
   347  			p.delimiterStack.Remove(element)
   348  			break
   349  		}
   350  
   351  		isImage := d.Type == imageOpeningDelimiter
   352  
   353  		var inline Inline
   354  
   355  		if destination, title, next, ok := p.peekAtInlineLinkDestinationAndTitle(p.position+1, isImage); ok {
   356  			destinationMarkdownPosition := relativeToAbsolutePosition(p.ranges, destination.Position)
   357  			linkOrImage := InlineLinkOrImage{
   358  				Children:       append([]Inline(nil), p.inlines[d.TextNode+1:]...),
   359  				RawDestination: Range{destinationMarkdownPosition, destinationMarkdownPosition + destination.End - destination.Position},
   360  				markdown:       p.markdown,
   361  				rawTitle:       p.raw[title.Position:title.End],
   362  			}
   363  			if d.Type == imageOpeningDelimiter {
   364  				inline = &InlineImage{linkOrImage}
   365  			} else {
   366  				inline = &InlineLink{linkOrImage}
   367  			}
   368  			p.position = next
   369  		} else {
   370  			referenceLabel := ""
   371  			label, next, hasLinkLabel := parseLinkLabel(p.raw, p.position+1)
   372  			if hasLinkLabel && label.End > label.Position {
   373  				referenceLabel = p.raw[label.Position:label.End]
   374  			} else {
   375  				referenceLabel = p.raw[d.Range.End:p.position]
   376  				if !hasLinkLabel {
   377  					next = p.position + 1
   378  				}
   379  			}
   380  			if referenceLabel != "" {
   381  				if reference := p.referenceDefinition(referenceLabel); reference != nil {
   382  					linkOrImage := ReferenceLinkOrImage{
   383  						ReferenceDefinition: reference,
   384  						Children:            append([]Inline(nil), p.inlines[d.TextNode+1:]...),
   385  					}
   386  					if d.Type == imageOpeningDelimiter {
   387  						inline = &ReferenceImage{linkOrImage}
   388  					} else {
   389  						inline = &ReferenceLink{linkOrImage}
   390  					}
   391  					p.position = next
   392  				}
   393  			}
   394  		}
   395  
   396  		if inline != nil {
   397  			if d.Type == imageOpeningDelimiter {
   398  				p.inlines = append(p.inlines[:d.TextNode], inline)
   399  			} else {
   400  				p.inlines = append(p.inlines[:d.TextNode], inline)
   401  				for inlineElement := element.Prev(); inlineElement != nil; inlineElement = inlineElement.Prev() {
   402  					if d := inlineElement.Value.(*delimiter); d.Type == linkOpeningDelimiter {
   403  						d.IsInactive = true
   404  					}
   405  				}
   406  			}
   407  			p.delimiterStack.Remove(element)
   408  			return
   409  		} else {
   410  			p.delimiterStack.Remove(element)
   411  			break
   412  		}
   413  	}
   414  	absPos := relativeToAbsolutePosition(p.ranges, p.position)
   415  	p.inlines = append(p.inlines, &Text{
   416  		Text:  "]",
   417  		Range: Range{absPos, absPos + 1},
   418  	})
   419  	p.position++
   420  }
   421  
   422  func CharacterReference(ref string) string {
   423  	if ref == "" {
   424  		return ""
   425  	}
   426  	if ref[0] == '#' {
   427  		if len(ref) < 2 {
   428  			return ""
   429  		}
   430  		n := 0
   431  		if ref[1] == 'X' || ref[1] == 'x' {
   432  			if len(ref) < 3 {
   433  				return ""
   434  			}
   435  			for i := 2; i < len(ref); i++ {
   436  				if i > 9 {
   437  					return ""
   438  				}
   439  				d := ref[i]
   440  				switch {
   441  				case d >= '0' && d <= '9':
   442  					n = n*16 + int(d-'0')
   443  				case d >= 'a' && d <= 'f':
   444  					n = n*16 + 10 + int(d-'a')
   445  				case d >= 'A' && d <= 'F':
   446  					n = n*16 + 10 + int(d-'A')
   447  				default:
   448  					return ""
   449  				}
   450  			}
   451  		} else {
   452  			for i := 1; i < len(ref); i++ {
   453  				if i > 8 || ref[i] < '0' || ref[i] > '9' {
   454  					return ""
   455  				}
   456  				n = n*10 + int(ref[i]-'0')
   457  			}
   458  		}
   459  		c := rune(n)
   460  		if c == '\u0000' || !utf8.ValidRune(c) {
   461  			return string(unicode.ReplacementChar)
   462  		}
   463  		return string(c)
   464  	}
   465  	if entity, ok := htmlEntities[ref]; ok {
   466  		return entity
   467  	}
   468  	return ""
   469  }
   470  
   471  func (p *inlineParser) parseCharacterReference() {
   472  	absPos := relativeToAbsolutePosition(p.ranges, p.position)
   473  	p.position++
   474  	if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 {
   475  		p.inlines = append(p.inlines, &Text{
   476  			Text:  "&",
   477  			Range: Range{absPos, absPos + 1},
   478  		})
   479  	} else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" {
   480  		p.position += semicolon + 1
   481  		p.inlines = append(p.inlines, &Text{
   482  			Text:  s,
   483  			Range: Range{absPos, absPos + len(s)},
   484  		})
   485  	} else {
   486  		p.inlines = append(p.inlines, &Text{
   487  			Text:  "&",
   488  			Range: Range{absPos, absPos + 1},
   489  		})
   490  	}
   491  }
   492  
   493  func (p *inlineParser) parseAutolink(c rune) bool {
   494  	for element := p.delimiterStack.Back(); element != nil; element = element.Prev() {
   495  		d := element.Value.(*delimiter)
   496  		if !d.IsInactive {
   497  			return false
   498  		}
   499  	}
   500  
   501  	var link Range
   502  	if c == ':' {
   503  		var ok bool
   504  		link, ok = parseURLAutolink(p.raw, p.position)
   505  
   506  		if !ok {
   507  			return false
   508  		}
   509  
   510  		// Since the current position is at the colon, we have to rewind the parsing slightly so that
   511  		// we don't duplicate the URL scheme
   512  		rewind := strings.Index(p.raw[link.Position:link.End], ":")
   513  		if rewind != -1 {
   514  			lastInline := p.inlines[len(p.inlines)-1]
   515  			lastText, ok := lastInline.(*Text)
   516  
   517  			if !ok {
   518  				// This should never occur since parseURLAutolink will only return a non-empty value
   519  				// when the previous text ends in a valid URL protocol which would mean that the previous
   520  				// node is a Text node
   521  				return false
   522  			}
   523  
   524  			p.inlines = p.inlines[0 : len(p.inlines)-1]
   525  			p.inlines = append(p.inlines, &Text{
   526  				Text:  lastText.Text[:len(lastText.Text)-rewind],
   527  				Range: Range{lastText.Range.Position, lastText.Range.End - rewind},
   528  			})
   529  			p.position -= rewind
   530  		}
   531  	} else if c == 'w' || c == 'W' {
   532  		var ok bool
   533  		link, ok = parseWWWAutolink(p.raw, p.position)
   534  
   535  		if !ok {
   536  			return false
   537  		}
   538  	}
   539  
   540  	linkMarkdownPosition := relativeToAbsolutePosition(p.ranges, link.Position)
   541  	linkRange := Range{linkMarkdownPosition, linkMarkdownPosition + link.End - link.Position}
   542  
   543  	p.inlines = append(p.inlines, &Autolink{
   544  		Children: []Inline{
   545  			&Text{
   546  				Text:  p.raw[link.Position:link.End],
   547  				Range: linkRange,
   548  			},
   549  		},
   550  		RawDestination: linkRange,
   551  		markdown:       p.markdown,
   552  	})
   553  	p.position += (link.End - link.Position)
   554  
   555  	return true
   556  }
   557  
   558  func (p *inlineParser) Parse() []Inline {
   559  	for _, r := range p.ranges {
   560  		p.raw += p.markdown[r.Position:r.End]
   561  	}
   562  
   563  	for p.position < len(p.raw) {
   564  		c, _ := utf8.DecodeRuneInString(p.raw[p.position:])
   565  
   566  		switch c {
   567  		case '\r', '\n':
   568  			p.parseLineEnding()
   569  		case '\\':
   570  			p.parseEscapeCharacter()
   571  		case '`':
   572  			p.parseBackticks()
   573  		case '&':
   574  			p.parseCharacterReference()
   575  		case '!', '[':
   576  			p.parseLinkOrImageDelimiter()
   577  		case ']':
   578  			p.lookForLinkOrImage()
   579  		case 'w', 'W', ':':
   580  			matched := p.parseAutolink(c)
   581  
   582  			if !matched {
   583  				p.parseText()
   584  			}
   585  		default:
   586  			p.parseText()
   587  		}
   588  	}
   589  
   590  	return p.inlines
   591  }
   592  
   593  func ParseInlines(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) (inlines []Inline) {
   594  	return newInlineParser(markdown, ranges, referenceDefinitions).Parse()
   595  }
   596  
   597  func MergeInlineText(inlines []Inline) []Inline {
   598  	var ret []Inline
   599  	for i, v := range inlines {
   600  		// always add first node
   601  		if i == 0 {
   602  			ret = append(ret, v)
   603  			continue
   604  		}
   605  		// not a text node? nothing to merge
   606  		text, ok := v.(*Text)
   607  		if !ok {
   608  			ret = append(ret, v)
   609  			continue
   610  		}
   611  		// previous node is not a text node? nothing to merge
   612  		prevText, ok := ret[len(ret)-1].(*Text)
   613  		if !ok {
   614  			ret = append(ret, v)
   615  			continue
   616  		}
   617  		// previous node is not right before this one
   618  		if prevText.Range.End != text.Range.Position {
   619  			ret = append(ret, v)
   620  			continue
   621  		}
   622  		// we have two consecutive text nodes
   623  		ret[len(ret)-1] = &Text{
   624  			Text:  prevText.Text + text.Text,
   625  			Range: Range{prevText.Range.Position, text.Range.End},
   626  		}
   627  	}
   628  	return ret
   629  }
   630  
   631  func Unescape(markdown string) string {
   632  	ret := ""
   633  
   634  	position := 0
   635  	for position < len(markdown) {
   636  		c, cSize := utf8.DecodeRuneInString(markdown[position:])
   637  
   638  		switch c {
   639  		case '\\':
   640  			if position+1 < len(markdown) && isEscapableByte(markdown[position+1]) {
   641  				ret += string(markdown[position+1])
   642  				position += 2
   643  			} else {
   644  				ret += `\`
   645  				position++
   646  			}
   647  		case '&':
   648  			position++
   649  			if semicolon := strings.IndexByte(markdown[position:], ';'); semicolon == -1 {
   650  				ret += "&"
   651  			} else if s := CharacterReference(markdown[position : position+semicolon]); s != "" {
   652  				position += semicolon + 1
   653  				ret += s
   654  			} else {
   655  				ret += "&"
   656  			}
   657  		default:
   658  			ret += string(c)
   659  			position += cSize
   660  		}
   661  	}
   662  
   663  	return ret
   664  }