github.com/qichengzx/mattermost-server@v4.5.1-0.20180604164826-2c75247c97d0+incompatible/utils/markdown/inlines.go (about) 1 // Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved. 2 // See License.txt for license information. 3 4 package markdown 5 6 import ( 7 "container/list" 8 "strings" 9 "unicode" 10 "unicode/utf8" 11 ) 12 13 type Inline interface { 14 IsInline() bool 15 } 16 17 type inlineBase struct{} 18 19 func (inlineBase) IsInline() bool { return true } 20 21 type Text struct { 22 inlineBase 23 24 Text string 25 } 26 27 type CodeSpan struct { 28 inlineBase 29 30 Code string 31 } 32 33 type HardLineBreak struct { 34 inlineBase 35 } 36 37 type SoftLineBreak struct { 38 inlineBase 39 } 40 41 type InlineLinkOrImage struct { 42 inlineBase 43 44 Children []Inline 45 46 RawDestination Range 47 48 markdown string 49 rawTitle string 50 } 51 52 func (i *InlineLinkOrImage) Destination() string { 53 return Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End]) 54 } 55 56 func (i *InlineLinkOrImage) Title() string { 57 return Unescape(i.rawTitle) 58 } 59 60 type InlineLink struct { 61 InlineLinkOrImage 62 } 63 64 type InlineImage struct { 65 InlineLinkOrImage 66 } 67 68 type ReferenceLinkOrImage struct { 69 inlineBase 70 *ReferenceDefinition 71 72 Children []Inline 73 } 74 75 type ReferenceLink struct { 76 ReferenceLinkOrImage 77 } 78 79 type ReferenceImage struct { 80 ReferenceLinkOrImage 81 } 82 83 type delimiterType int 84 85 const ( 86 linkOpeningDelimiter delimiterType = iota 87 imageOpeningDelimiter 88 ) 89 90 type delimiter struct { 91 Type delimiterType 92 IsInactive bool 93 TextNode int 94 Range Range 95 } 96 97 type inlineParser struct { 98 markdown string 99 ranges []Range 100 referenceDefinitions []*ReferenceDefinition 101 102 raw string 103 position int 104 inlines []Inline 105 delimiterStack *list.List 106 } 107 108 func newInlineParser(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) *inlineParser { 109 return &inlineParser{ 110 markdown: markdown, 111 ranges: ranges, 112 referenceDefinitions: referenceDefinitions, 113 delimiterStack: list.New(), 114 } 115 } 116 117 func (p *inlineParser) parseBackticks() { 118 count := 1 119 for i := p.position + 1; i < len(p.raw) && p.raw[i] == '`'; i++ { 120 count++ 121 } 122 opening := p.raw[p.position : p.position+count] 123 search := p.position + count 124 for search < len(p.raw) { 125 end := strings.Index(p.raw[search:], opening) 126 if end == -1 { 127 break 128 } 129 if search+end+count < len(p.raw) && p.raw[search+end+count] == '`' { 130 search += end + count 131 for search < len(p.raw) && p.raw[search] == '`' { 132 search++ 133 } 134 continue 135 } 136 code := strings.Join(strings.Fields(p.raw[p.position+count:search+end]), " ") 137 p.position = search + end + count 138 p.inlines = append(p.inlines, &CodeSpan{ 139 Code: code, 140 }) 141 return 142 } 143 p.position += len(opening) 144 p.inlines = append(p.inlines, &Text{ 145 Text: opening, 146 }) 147 } 148 149 func (p *inlineParser) parseLineEnding() { 150 if p.position >= 1 && p.raw[p.position-1] == '\t' { 151 p.inlines = append(p.inlines, &HardLineBreak{}) 152 } else if p.position >= 2 && p.raw[p.position-1] == ' ' && (p.raw[p.position-2] == '\t' || p.raw[p.position-1] == ' ') { 153 p.inlines = append(p.inlines, &HardLineBreak{}) 154 } else { 155 p.inlines = append(p.inlines, &SoftLineBreak{}) 156 } 157 p.position++ 158 if p.position < len(p.raw) && p.raw[p.position] == '\n' { 159 p.position++ 160 } 161 } 162 163 func (p *inlineParser) parseEscapeCharacter() { 164 if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) { 165 p.inlines = append(p.inlines, &Text{ 166 Text: string(p.raw[p.position+1]), 167 }) 168 p.position += 2 169 } else { 170 p.inlines = append(p.inlines, &Text{ 171 Text: `\`, 172 }) 173 p.position++ 174 } 175 } 176 177 func (p *inlineParser) parseText() { 178 if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]"); next == -1 { 179 p.inlines = append(p.inlines, &Text{ 180 Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace), 181 }) 182 p.position = len(p.raw) 183 } else { 184 if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' { 185 p.inlines = append(p.inlines, &Text{ 186 Text: strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace), 187 }) 188 } else { 189 p.inlines = append(p.inlines, &Text{ 190 Text: p.raw[p.position : p.position+next], 191 }) 192 } 193 p.position += next 194 } 195 } 196 197 func (p *inlineParser) parseLinkOrImageDelimiter() { 198 if p.raw[p.position] == '[' { 199 p.inlines = append(p.inlines, &Text{ 200 Text: "[", 201 }) 202 p.delimiterStack.PushBack(&delimiter{ 203 Type: linkOpeningDelimiter, 204 TextNode: len(p.inlines) - 1, 205 Range: Range{p.position, p.position + 1}, 206 }) 207 p.position++ 208 } else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' { 209 p.inlines = append(p.inlines, &Text{ 210 Text: "![", 211 }) 212 p.delimiterStack.PushBack(&delimiter{ 213 Type: imageOpeningDelimiter, 214 TextNode: len(p.inlines) - 1, 215 Range: Range{p.position, p.position + 2}, 216 }) 217 p.position += 2 218 } else { 219 p.inlines = append(p.inlines, &Text{ 220 Text: "!", 221 }) 222 p.position++ 223 } 224 } 225 226 func (p *inlineParser) peekAtInlineLinkDestinationAndTitle(position int) (destination, title Range, end int, ok bool) { 227 if position >= len(p.raw) || p.raw[position] != '(' { 228 return 229 } 230 position++ 231 232 destinationStart := nextNonWhitespace(p.raw, position) 233 if destinationStart >= len(p.raw) { 234 return 235 } else if p.raw[destinationStart] == ')' { 236 return Range{destinationStart, destinationStart}, Range{destinationStart, destinationStart}, destinationStart + 1, true 237 } 238 239 destination, end, ok = parseLinkDestination(p.raw, destinationStart) 240 if !ok { 241 return 242 } 243 position = end 244 245 if position < len(p.raw) && isWhitespaceByte(p.raw[position]) { 246 titleStart := nextNonWhitespace(p.raw, position) 247 if titleStart >= len(p.raw) { 248 return 249 } else if p.raw[titleStart] == ')' { 250 return destination, Range{titleStart, titleStart}, titleStart + 1, true 251 } 252 253 title, end, ok = parseLinkTitle(p.raw, titleStart) 254 if !ok { 255 return 256 } 257 position = end 258 } 259 260 closingPosition := nextNonWhitespace(p.raw, position) 261 if closingPosition >= len(p.raw) || p.raw[closingPosition] != ')' { 262 return Range{}, Range{}, 0, false 263 } 264 265 return destination, title, closingPosition + 1, true 266 } 267 268 func (p *inlineParser) referenceDefinition(label string) *ReferenceDefinition { 269 clean := strings.Join(strings.Fields(label), " ") 270 for _, d := range p.referenceDefinitions { 271 if strings.EqualFold(clean, strings.Join(strings.Fields(d.Label()), " ")) { 272 return d 273 } 274 } 275 return nil 276 } 277 278 func (p *inlineParser) lookForLinkOrImage() { 279 for element := p.delimiterStack.Back(); element != nil; element = element.Prev() { 280 d := element.Value.(*delimiter) 281 if d.Type != imageOpeningDelimiter && d.Type != linkOpeningDelimiter { 282 continue 283 } 284 if d.IsInactive { 285 p.delimiterStack.Remove(element) 286 break 287 } 288 289 var inline Inline 290 291 if destination, title, next, ok := p.peekAtInlineLinkDestinationAndTitle(p.position + 1); ok { 292 destinationMarkdownPosition := relativeToAbsolutePosition(p.ranges, destination.Position) 293 linkOrImage := InlineLinkOrImage{ 294 Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), 295 RawDestination: Range{destinationMarkdownPosition, destinationMarkdownPosition + destination.End - destination.Position}, 296 markdown: p.markdown, 297 rawTitle: p.raw[title.Position:title.End], 298 } 299 if d.Type == imageOpeningDelimiter { 300 inline = &InlineImage{linkOrImage} 301 } else { 302 inline = &InlineLink{linkOrImage} 303 } 304 p.position = next 305 } else { 306 referenceLabel := "" 307 label, next, hasLinkLabel := parseLinkLabel(p.raw, p.position+1) 308 if hasLinkLabel && label.End > label.Position { 309 referenceLabel = p.raw[label.Position:label.End] 310 } else { 311 referenceLabel = p.raw[d.Range.End:p.position] 312 if !hasLinkLabel { 313 next = p.position + 1 314 } 315 } 316 if referenceLabel != "" { 317 if reference := p.referenceDefinition(referenceLabel); reference != nil { 318 linkOrImage := ReferenceLinkOrImage{ 319 ReferenceDefinition: reference, 320 Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), 321 } 322 if d.Type == imageOpeningDelimiter { 323 inline = &ReferenceImage{linkOrImage} 324 } else { 325 inline = &ReferenceLink{linkOrImage} 326 } 327 p.position = next 328 } 329 } 330 } 331 332 if inline != nil { 333 if d.Type == imageOpeningDelimiter { 334 p.inlines = append(p.inlines[:d.TextNode], inline) 335 } else { 336 p.inlines = append(p.inlines[:d.TextNode], inline) 337 for element := element.Prev(); element != nil; element = element.Prev() { 338 if d := element.Value.(*delimiter); d.Type == linkOpeningDelimiter { 339 d.IsInactive = true 340 } 341 } 342 } 343 p.delimiterStack.Remove(element) 344 return 345 } else { 346 p.delimiterStack.Remove(element) 347 break 348 } 349 } 350 p.inlines = append(p.inlines, &Text{ 351 Text: "]", 352 }) 353 p.position++ 354 } 355 356 func CharacterReference(ref string) string { 357 if ref == "" { 358 return "" 359 } 360 if ref[0] == '#' { 361 if len(ref) < 2 { 362 return "" 363 } 364 n := 0 365 if ref[1] == 'X' || ref[1] == 'x' { 366 if len(ref) < 3 { 367 return "" 368 } 369 for i := 2; i < len(ref); i++ { 370 if i > 9 { 371 return "" 372 } 373 d := ref[i] 374 switch { 375 case d >= '0' && d <= '9': 376 n = n*16 + int(d-'0') 377 case d >= 'a' && d <= 'f': 378 n = n*16 + 10 + int(d-'a') 379 case d >= 'A' && d <= 'F': 380 n = n*16 + 10 + int(d-'A') 381 default: 382 return "" 383 } 384 } 385 } else { 386 for i := 1; i < len(ref); i++ { 387 if i > 8 || ref[i] < '0' || ref[i] > '9' { 388 return "" 389 } 390 n = n*10 + int(ref[i]-'0') 391 } 392 } 393 c := rune(n) 394 if c == '\u0000' || !utf8.ValidRune(c) { 395 return string(unicode.ReplacementChar) 396 } 397 return string(c) 398 } 399 if entity, ok := htmlEntities[ref]; ok { 400 return entity 401 } 402 return "" 403 } 404 405 func (p *inlineParser) parseCharacterReference() { 406 p.position++ 407 if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 { 408 p.inlines = append(p.inlines, &Text{ 409 Text: "&", 410 }) 411 } else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" { 412 p.position += semicolon + 1 413 p.inlines = append(p.inlines, &Text{ 414 Text: s, 415 }) 416 } else { 417 p.inlines = append(p.inlines, &Text{ 418 Text: "&", 419 }) 420 } 421 } 422 423 func (p *inlineParser) Parse() []Inline { 424 for _, r := range p.ranges { 425 p.raw += p.markdown[r.Position:r.End] 426 } 427 428 for p.position < len(p.raw) { 429 c, _ := utf8.DecodeRuneInString(p.raw[p.position:]) 430 431 switch c { 432 case '\r', '\n': 433 p.parseLineEnding() 434 case '\\': 435 p.parseEscapeCharacter() 436 case '`': 437 p.parseBackticks() 438 case '&': 439 p.parseCharacterReference() 440 case '!', '[': 441 p.parseLinkOrImageDelimiter() 442 case ']': 443 p.lookForLinkOrImage() 444 default: 445 p.parseText() 446 } 447 } 448 449 return p.inlines 450 } 451 452 func ParseInlines(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) (inlines []Inline) { 453 return newInlineParser(markdown, ranges, referenceDefinitions).Parse() 454 } 455 456 func Unescape(markdown string) string { 457 ret := "" 458 459 position := 0 460 for position < len(markdown) { 461 c, cSize := utf8.DecodeRuneInString(markdown[position:]) 462 463 switch c { 464 case '\\': 465 if position+1 < len(markdown) && isEscapableByte(markdown[position+1]) { 466 ret += string(markdown[position+1]) 467 position += 2 468 } else { 469 ret += `\` 470 position++ 471 } 472 case '&': 473 position++ 474 if semicolon := strings.IndexByte(markdown[position:], ';'); semicolon == -1 { 475 ret += "&" 476 } else if s := CharacterReference(markdown[position : position+semicolon]); s != "" { 477 position += semicolon + 1 478 ret += s 479 } else { 480 ret += "&" 481 } 482 default: 483 ret += string(c) 484 position += cSize 485 } 486 } 487 488 return ret 489 }