github.com/qichengzx/mattermost-server@v4.5.1-0.20180604164826-2c75247c97d0+incompatible/utils/markdown/inlines.go (about)

     1  // Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved.
     2  // See License.txt for license information.
     3  
     4  package markdown
     5  
     6  import (
     7  	"container/list"
     8  	"strings"
     9  	"unicode"
    10  	"unicode/utf8"
    11  )
    12  
    13  type Inline interface {
    14  	IsInline() bool
    15  }
    16  
    17  type inlineBase struct{}
    18  
    19  func (inlineBase) IsInline() bool { return true }
    20  
    21  type Text struct {
    22  	inlineBase
    23  
    24  	Text string
    25  }
    26  
    27  type CodeSpan struct {
    28  	inlineBase
    29  
    30  	Code string
    31  }
    32  
    33  type HardLineBreak struct {
    34  	inlineBase
    35  }
    36  
    37  type SoftLineBreak struct {
    38  	inlineBase
    39  }
    40  
    41  type InlineLinkOrImage struct {
    42  	inlineBase
    43  
    44  	Children []Inline
    45  
    46  	RawDestination Range
    47  
    48  	markdown string
    49  	rawTitle string
    50  }
    51  
    52  func (i *InlineLinkOrImage) Destination() string {
    53  	return Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End])
    54  }
    55  
    56  func (i *InlineLinkOrImage) Title() string {
    57  	return Unescape(i.rawTitle)
    58  }
    59  
    60  type InlineLink struct {
    61  	InlineLinkOrImage
    62  }
    63  
    64  type InlineImage struct {
    65  	InlineLinkOrImage
    66  }
    67  
    68  type ReferenceLinkOrImage struct {
    69  	inlineBase
    70  	*ReferenceDefinition
    71  
    72  	Children []Inline
    73  }
    74  
    75  type ReferenceLink struct {
    76  	ReferenceLinkOrImage
    77  }
    78  
    79  type ReferenceImage struct {
    80  	ReferenceLinkOrImage
    81  }
    82  
    83  type delimiterType int
    84  
    85  const (
    86  	linkOpeningDelimiter delimiterType = iota
    87  	imageOpeningDelimiter
    88  )
    89  
    90  type delimiter struct {
    91  	Type       delimiterType
    92  	IsInactive bool
    93  	TextNode   int
    94  	Range      Range
    95  }
    96  
    97  type inlineParser struct {
    98  	markdown             string
    99  	ranges               []Range
   100  	referenceDefinitions []*ReferenceDefinition
   101  
   102  	raw            string
   103  	position       int
   104  	inlines        []Inline
   105  	delimiterStack *list.List
   106  }
   107  
   108  func newInlineParser(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) *inlineParser {
   109  	return &inlineParser{
   110  		markdown:             markdown,
   111  		ranges:               ranges,
   112  		referenceDefinitions: referenceDefinitions,
   113  		delimiterStack:       list.New(),
   114  	}
   115  }
   116  
   117  func (p *inlineParser) parseBackticks() {
   118  	count := 1
   119  	for i := p.position + 1; i < len(p.raw) && p.raw[i] == '`'; i++ {
   120  		count++
   121  	}
   122  	opening := p.raw[p.position : p.position+count]
   123  	search := p.position + count
   124  	for search < len(p.raw) {
   125  		end := strings.Index(p.raw[search:], opening)
   126  		if end == -1 {
   127  			break
   128  		}
   129  		if search+end+count < len(p.raw) && p.raw[search+end+count] == '`' {
   130  			search += end + count
   131  			for search < len(p.raw) && p.raw[search] == '`' {
   132  				search++
   133  			}
   134  			continue
   135  		}
   136  		code := strings.Join(strings.Fields(p.raw[p.position+count:search+end]), " ")
   137  		p.position = search + end + count
   138  		p.inlines = append(p.inlines, &CodeSpan{
   139  			Code: code,
   140  		})
   141  		return
   142  	}
   143  	p.position += len(opening)
   144  	p.inlines = append(p.inlines, &Text{
   145  		Text: opening,
   146  	})
   147  }
   148  
   149  func (p *inlineParser) parseLineEnding() {
   150  	if p.position >= 1 && p.raw[p.position-1] == '\t' {
   151  		p.inlines = append(p.inlines, &HardLineBreak{})
   152  	} else if p.position >= 2 && p.raw[p.position-1] == ' ' && (p.raw[p.position-2] == '\t' || p.raw[p.position-1] == ' ') {
   153  		p.inlines = append(p.inlines, &HardLineBreak{})
   154  	} else {
   155  		p.inlines = append(p.inlines, &SoftLineBreak{})
   156  	}
   157  	p.position++
   158  	if p.position < len(p.raw) && p.raw[p.position] == '\n' {
   159  		p.position++
   160  	}
   161  }
   162  
   163  func (p *inlineParser) parseEscapeCharacter() {
   164  	if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) {
   165  		p.inlines = append(p.inlines, &Text{
   166  			Text: string(p.raw[p.position+1]),
   167  		})
   168  		p.position += 2
   169  	} else {
   170  		p.inlines = append(p.inlines, &Text{
   171  			Text: `\`,
   172  		})
   173  		p.position++
   174  	}
   175  }
   176  
   177  func (p *inlineParser) parseText() {
   178  	if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]"); next == -1 {
   179  		p.inlines = append(p.inlines, &Text{
   180  			Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace),
   181  		})
   182  		p.position = len(p.raw)
   183  	} else {
   184  		if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' {
   185  			p.inlines = append(p.inlines, &Text{
   186  				Text: strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace),
   187  			})
   188  		} else {
   189  			p.inlines = append(p.inlines, &Text{
   190  				Text: p.raw[p.position : p.position+next],
   191  			})
   192  		}
   193  		p.position += next
   194  	}
   195  }
   196  
   197  func (p *inlineParser) parseLinkOrImageDelimiter() {
   198  	if p.raw[p.position] == '[' {
   199  		p.inlines = append(p.inlines, &Text{
   200  			Text: "[",
   201  		})
   202  		p.delimiterStack.PushBack(&delimiter{
   203  			Type:     linkOpeningDelimiter,
   204  			TextNode: len(p.inlines) - 1,
   205  			Range:    Range{p.position, p.position + 1},
   206  		})
   207  		p.position++
   208  	} else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' {
   209  		p.inlines = append(p.inlines, &Text{
   210  			Text: "![",
   211  		})
   212  		p.delimiterStack.PushBack(&delimiter{
   213  			Type:     imageOpeningDelimiter,
   214  			TextNode: len(p.inlines) - 1,
   215  			Range:    Range{p.position, p.position + 2},
   216  		})
   217  		p.position += 2
   218  	} else {
   219  		p.inlines = append(p.inlines, &Text{
   220  			Text: "!",
   221  		})
   222  		p.position++
   223  	}
   224  }
   225  
   226  func (p *inlineParser) peekAtInlineLinkDestinationAndTitle(position int) (destination, title Range, end int, ok bool) {
   227  	if position >= len(p.raw) || p.raw[position] != '(' {
   228  		return
   229  	}
   230  	position++
   231  
   232  	destinationStart := nextNonWhitespace(p.raw, position)
   233  	if destinationStart >= len(p.raw) {
   234  		return
   235  	} else if p.raw[destinationStart] == ')' {
   236  		return Range{destinationStart, destinationStart}, Range{destinationStart, destinationStart}, destinationStart + 1, true
   237  	}
   238  
   239  	destination, end, ok = parseLinkDestination(p.raw, destinationStart)
   240  	if !ok {
   241  		return
   242  	}
   243  	position = end
   244  
   245  	if position < len(p.raw) && isWhitespaceByte(p.raw[position]) {
   246  		titleStart := nextNonWhitespace(p.raw, position)
   247  		if titleStart >= len(p.raw) {
   248  			return
   249  		} else if p.raw[titleStart] == ')' {
   250  			return destination, Range{titleStart, titleStart}, titleStart + 1, true
   251  		}
   252  
   253  		title, end, ok = parseLinkTitle(p.raw, titleStart)
   254  		if !ok {
   255  			return
   256  		}
   257  		position = end
   258  	}
   259  
   260  	closingPosition := nextNonWhitespace(p.raw, position)
   261  	if closingPosition >= len(p.raw) || p.raw[closingPosition] != ')' {
   262  		return Range{}, Range{}, 0, false
   263  	}
   264  
   265  	return destination, title, closingPosition + 1, true
   266  }
   267  
   268  func (p *inlineParser) referenceDefinition(label string) *ReferenceDefinition {
   269  	clean := strings.Join(strings.Fields(label), " ")
   270  	for _, d := range p.referenceDefinitions {
   271  		if strings.EqualFold(clean, strings.Join(strings.Fields(d.Label()), " ")) {
   272  			return d
   273  		}
   274  	}
   275  	return nil
   276  }
   277  
   278  func (p *inlineParser) lookForLinkOrImage() {
   279  	for element := p.delimiterStack.Back(); element != nil; element = element.Prev() {
   280  		d := element.Value.(*delimiter)
   281  		if d.Type != imageOpeningDelimiter && d.Type != linkOpeningDelimiter {
   282  			continue
   283  		}
   284  		if d.IsInactive {
   285  			p.delimiterStack.Remove(element)
   286  			break
   287  		}
   288  
   289  		var inline Inline
   290  
   291  		if destination, title, next, ok := p.peekAtInlineLinkDestinationAndTitle(p.position + 1); ok {
   292  			destinationMarkdownPosition := relativeToAbsolutePosition(p.ranges, destination.Position)
   293  			linkOrImage := InlineLinkOrImage{
   294  				Children:       append([]Inline(nil), p.inlines[d.TextNode+1:]...),
   295  				RawDestination: Range{destinationMarkdownPosition, destinationMarkdownPosition + destination.End - destination.Position},
   296  				markdown:       p.markdown,
   297  				rawTitle:       p.raw[title.Position:title.End],
   298  			}
   299  			if d.Type == imageOpeningDelimiter {
   300  				inline = &InlineImage{linkOrImage}
   301  			} else {
   302  				inline = &InlineLink{linkOrImage}
   303  			}
   304  			p.position = next
   305  		} else {
   306  			referenceLabel := ""
   307  			label, next, hasLinkLabel := parseLinkLabel(p.raw, p.position+1)
   308  			if hasLinkLabel && label.End > label.Position {
   309  				referenceLabel = p.raw[label.Position:label.End]
   310  			} else {
   311  				referenceLabel = p.raw[d.Range.End:p.position]
   312  				if !hasLinkLabel {
   313  					next = p.position + 1
   314  				}
   315  			}
   316  			if referenceLabel != "" {
   317  				if reference := p.referenceDefinition(referenceLabel); reference != nil {
   318  					linkOrImage := ReferenceLinkOrImage{
   319  						ReferenceDefinition: reference,
   320  						Children:            append([]Inline(nil), p.inlines[d.TextNode+1:]...),
   321  					}
   322  					if d.Type == imageOpeningDelimiter {
   323  						inline = &ReferenceImage{linkOrImage}
   324  					} else {
   325  						inline = &ReferenceLink{linkOrImage}
   326  					}
   327  					p.position = next
   328  				}
   329  			}
   330  		}
   331  
   332  		if inline != nil {
   333  			if d.Type == imageOpeningDelimiter {
   334  				p.inlines = append(p.inlines[:d.TextNode], inline)
   335  			} else {
   336  				p.inlines = append(p.inlines[:d.TextNode], inline)
   337  				for element := element.Prev(); element != nil; element = element.Prev() {
   338  					if d := element.Value.(*delimiter); d.Type == linkOpeningDelimiter {
   339  						d.IsInactive = true
   340  					}
   341  				}
   342  			}
   343  			p.delimiterStack.Remove(element)
   344  			return
   345  		} else {
   346  			p.delimiterStack.Remove(element)
   347  			break
   348  		}
   349  	}
   350  	p.inlines = append(p.inlines, &Text{
   351  		Text: "]",
   352  	})
   353  	p.position++
   354  }
   355  
   356  func CharacterReference(ref string) string {
   357  	if ref == "" {
   358  		return ""
   359  	}
   360  	if ref[0] == '#' {
   361  		if len(ref) < 2 {
   362  			return ""
   363  		}
   364  		n := 0
   365  		if ref[1] == 'X' || ref[1] == 'x' {
   366  			if len(ref) < 3 {
   367  				return ""
   368  			}
   369  			for i := 2; i < len(ref); i++ {
   370  				if i > 9 {
   371  					return ""
   372  				}
   373  				d := ref[i]
   374  				switch {
   375  				case d >= '0' && d <= '9':
   376  					n = n*16 + int(d-'0')
   377  				case d >= 'a' && d <= 'f':
   378  					n = n*16 + 10 + int(d-'a')
   379  				case d >= 'A' && d <= 'F':
   380  					n = n*16 + 10 + int(d-'A')
   381  				default:
   382  					return ""
   383  				}
   384  			}
   385  		} else {
   386  			for i := 1; i < len(ref); i++ {
   387  				if i > 8 || ref[i] < '0' || ref[i] > '9' {
   388  					return ""
   389  				}
   390  				n = n*10 + int(ref[i]-'0')
   391  			}
   392  		}
   393  		c := rune(n)
   394  		if c == '\u0000' || !utf8.ValidRune(c) {
   395  			return string(unicode.ReplacementChar)
   396  		}
   397  		return string(c)
   398  	}
   399  	if entity, ok := htmlEntities[ref]; ok {
   400  		return entity
   401  	}
   402  	return ""
   403  }
   404  
   405  func (p *inlineParser) parseCharacterReference() {
   406  	p.position++
   407  	if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 {
   408  		p.inlines = append(p.inlines, &Text{
   409  			Text: "&",
   410  		})
   411  	} else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" {
   412  		p.position += semicolon + 1
   413  		p.inlines = append(p.inlines, &Text{
   414  			Text: s,
   415  		})
   416  	} else {
   417  		p.inlines = append(p.inlines, &Text{
   418  			Text: "&",
   419  		})
   420  	}
   421  }
   422  
   423  func (p *inlineParser) Parse() []Inline {
   424  	for _, r := range p.ranges {
   425  		p.raw += p.markdown[r.Position:r.End]
   426  	}
   427  
   428  	for p.position < len(p.raw) {
   429  		c, _ := utf8.DecodeRuneInString(p.raw[p.position:])
   430  
   431  		switch c {
   432  		case '\r', '\n':
   433  			p.parseLineEnding()
   434  		case '\\':
   435  			p.parseEscapeCharacter()
   436  		case '`':
   437  			p.parseBackticks()
   438  		case '&':
   439  			p.parseCharacterReference()
   440  		case '!', '[':
   441  			p.parseLinkOrImageDelimiter()
   442  		case ']':
   443  			p.lookForLinkOrImage()
   444  		default:
   445  			p.parseText()
   446  		}
   447  	}
   448  
   449  	return p.inlines
   450  }
   451  
   452  func ParseInlines(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) (inlines []Inline) {
   453  	return newInlineParser(markdown, ranges, referenceDefinitions).Parse()
   454  }
   455  
   456  func Unescape(markdown string) string {
   457  	ret := ""
   458  
   459  	position := 0
   460  	for position < len(markdown) {
   461  		c, cSize := utf8.DecodeRuneInString(markdown[position:])
   462  
   463  		switch c {
   464  		case '\\':
   465  			if position+1 < len(markdown) && isEscapableByte(markdown[position+1]) {
   466  				ret += string(markdown[position+1])
   467  				position += 2
   468  			} else {
   469  				ret += `\`
   470  				position++
   471  			}
   472  		case '&':
   473  			position++
   474  			if semicolon := strings.IndexByte(markdown[position:], ';'); semicolon == -1 {
   475  				ret += "&"
   476  			} else if s := CharacterReference(markdown[position : position+semicolon]); s != "" {
   477  				position += semicolon + 1
   478  				ret += s
   479  			} else {
   480  				ret += "&"
   481  			}
   482  		default:
   483  			ret += string(c)
   484  			position += cSize
   485  		}
   486  	}
   487  
   488  	return ret
   489  }