github.com/aretext/aretext@v1.3.0/syntax/languages/markdown.go (about)

     1  package languages
     2  
     3  import (
     4  	"unicode"
     5  
     6  	"github.com/aretext/aretext/syntax/parser"
     7  )
     8  
     9  const (
    10  	markdownCodeBlockRole      = parser.TokenRoleString
    11  	markdownCodeSpanRole       = parser.TokenRoleString
    12  	markdownListNumberRole     = parser.TokenRoleNumber
    13  	markdownListBulletRole     = parser.TokenRoleOperator
    14  	markdownThematicBreakRole  = parser.TokenRoleOperator
    15  	markdownHeadingRole        = parser.TokenRoleCustom1
    16  	markdownEmphasisRole       = parser.TokenRoleCustom2
    17  	markdownStrongEmphasisRole = parser.TokenRoleCustom3
    18  	markdownLinkRole           = parser.TokenRoleCustom4
    19  )
    20  
    21  type markdownParseState uint8
    22  
    23  const (
    24  	markdownParseStateNormal = markdownParseState(iota)
    25  	markdownParseStateInListItem
    26  )
    27  
    28  func (s markdownParseState) Equals(other parser.State) bool {
    29  	otherState, ok := other.(markdownParseState)
    30  	return ok && s == otherState
    31  }
    32  
    33  // MarkdownParseFunc returns a parse func for Markdown.
    34  // This attempts to follow the CommonMark 0.30 spec,
    35  // but deviates in some cases to simplify the implementation.
    36  //
    37  // Known limitations include:
    38  // * Incorrect handling of nested emphasis in some cases.
    39  // * No support for inline HTML.
    40  // * No support for autolinks.
    41  // * No support for indented code blocks.
    42  // * No support for block quotes.
    43  // * No support for entity and numeric character references.
    44  // * Some differences in handling of nested lists.
    45  // * Some differences in handling link and code span precedence.
    46  // * No restriction on the number of digits in a list item.
    47  // * No restriction on indentation for lists, code fences, headings, etc.
    48  //
    49  // See https://spec.commonmark.org/0.30/ for details.
    50  func MarkdownParseFunc() parser.Func {
    51  	// Incrementally parse one block at a time (headings, paragraphs, list items, etc.).
    52  	// This ensures that each parse func invocation starts at the beginning of a line.
    53  	parseListItem := markdownNumberListItemParseFunc().
    54  		Or(markdownBulletListItemParseFunc()).
    55  		Map(setState(markdownParseStateInListItem))
    56  
    57  	parseThematicBreak := matchState(
    58  		markdownParseStateNormal,
    59  		markdownThematicBreakParseFunc())
    60  
    61  	parseCodeBlock := markdownFencedCodeBlockParseFunc().
    62  		Map(setState(markdownParseStateNormal))
    63  
    64  	parseHeadings := matchState(
    65  		markdownParseStateNormal,
    66  		markdownAtxHeadingParseFunc().
    67  			Or(markdownSetextHeadingParseFunc()),
    68  	)
    69  
    70  	parseOtherBlocks := markdownParagraphParseFunc().
    71  		Or(consumeToNextLineFeed).
    72  		Map(setState(markdownParseStateNormal))
    73  
    74  	return initialState(
    75  		markdownParseStateNormal,
    76  		parseThematicBreak.
    77  			Or(parseListItem).
    78  			Or(parseCodeBlock).
    79  			Or(parseHeadings).
    80  			Or(parseOtherBlocks),
    81  	)
    82  }
    83  
    84  func markdownSkipLeadingIndentation(iter *parser.TrackingRuneIter) uint64 {
    85  	lookaheadIter := *iter
    86  	var n uint64
    87  	for {
    88  		r, err := lookaheadIter.NextRune()
    89  		if err != nil || !(r == ' ' || r == '\t') {
    90  			break
    91  		}
    92  		n++
    93  	}
    94  	iter.Skip(n)
    95  	return n
    96  }
    97  
    98  func markdownThematicBreakParseFunc() parser.Func {
    99  	// A thematic break consists of three or more matching '-', '_', or '*''s,
   100  	// optionally preceded and/or followed by whitespace.
   101  	return func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   102  		var n uint64
   103  		var breakRune rune
   104  		var breakCount int
   105  		for {
   106  			r, err := iter.NextRune()
   107  			if err != nil {
   108  				// End of text.
   109  				break
   110  			}
   111  
   112  			n++
   113  
   114  			if r == '\n' {
   115  				// End of line (include the newline).
   116  				break
   117  			} else if breakRune == '\x00' && (r == '-' || r == '_' || r == '*') {
   118  				// Start matching thematic break characters.
   119  				breakRune = r
   120  				breakCount = 1
   121  				continue
   122  			} else if breakRune == r {
   123  				// Continue matching thematic break character.
   124  				breakCount++
   125  				continue
   126  			} else if r == ' ' || r == '\t' {
   127  				// Allow whitespace between thematic break characters.
   128  				continue
   129  			} else {
   130  				return parser.FailedResult
   131  			}
   132  		}
   133  
   134  		if breakCount < 3 {
   135  			// Need at least three thematic break characters.
   136  			return parser.FailedResult
   137  		}
   138  
   139  		return parser.Result{
   140  			NumConsumed: n,
   141  			ComputedTokens: []parser.ComputedToken{
   142  				{
   143  					Offset: 0,
   144  					Length: n,
   145  					Role:   markdownThematicBreakRole,
   146  				},
   147  			},
   148  			NextState: state,
   149  		}
   150  	}
   151  }
   152  
   153  func markdownAtxHeadingParseFunc() parser.Func {
   154  	// An ATX heading consists of a sequence of between 1 and 6 "#", optionally preceded by
   155  	// indentation, followed by a space then the rest of the line.
   156  	// The CommonMark spec has some additional requirements for sequences of closing "#"
   157  	// at the end of the line, but we don't enforce those.
   158  	consumeOpener := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   159  		// Leading indentation.
   160  		indentCount := markdownSkipLeadingIndentation(&iter)
   161  
   162  		// Allow between 1-6 "#"'s.
   163  		var n uint64
   164  		for {
   165  			r, err := iter.NextRune()
   166  			if err != nil || r != '#' {
   167  				break
   168  			}
   169  			n++
   170  		}
   171  
   172  		if n < 1 || n > 6 {
   173  			return parser.FailedResult
   174  		}
   175  
   176  		return parser.Result{
   177  			NumConsumed: indentCount + n,
   178  			NextState:   state,
   179  		}
   180  	}
   181  
   182  	return parser.Func(consumeOpener).
   183  		ThenNot(consumeSingleRuneLike(func(r rune) bool { return !unicode.IsSpace(r) })).
   184  		Then(consumeToNextLineFeed).
   185  		Map(recognizeToken(markdownHeadingRole))
   186  }
   187  
   188  func markdownSetextHeadingParseFunc() parser.Func {
   189  	// A setext heading consists of one or more non-blank lines, followed by
   190  	// a setext underline (sequence of one or more "-" or "=").
   191  	// The setext underline may have leading indentation and/or trailing whitespace.
   192  	consumeFirstLine := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   193  		// Leading indentation.
   194  		indentCount := markdownSkipLeadingIndentation(&iter)
   195  
   196  		// Consume the rest of the line.
   197  		var n uint64
   198  		var r rune
   199  		var err error
   200  		for {
   201  			r, err = iter.NextRune()
   202  			if err != nil {
   203  				break
   204  			}
   205  
   206  			n++
   207  
   208  			if r == '\n' {
   209  				break
   210  			}
   211  		}
   212  
   213  		// Heading cannot be empty.
   214  		if (r == '\n' && n < 2) || (r != '\n' && n < 1) {
   215  			return parser.FailedResult
   216  		}
   217  
   218  		return parser.Result{
   219  			NumConsumed: indentCount + n,
   220  			NextState:   state,
   221  		}
   222  	}
   223  
   224  	checkSubsequentLine := func(iter parser.TrackingRuneIter) (uint64, bool) {
   225  		var n uint64
   226  		for {
   227  			r, err := iter.NextRune()
   228  			if err != nil {
   229  				if n == 0 {
   230  					// Empty line ending in EOF.
   231  					return 0, false
   232  				} else {
   233  					// Non-empty line ending in EOF.
   234  					return n, true
   235  				}
   236  			}
   237  
   238  			n++
   239  
   240  			if r == '\n' {
   241  				if n == 1 {
   242  					// Empty line ending in a newline.
   243  					return 0, false
   244  				} else {
   245  					// Non-empty line ending in a newline.
   246  					return n, true
   247  				}
   248  			}
   249  		}
   250  	}
   251  
   252  	checkUnderline := func(iter parser.TrackingRuneIter) (uint64, bool) {
   253  		var n uint64
   254  		indentCount := markdownSkipLeadingIndentation(&iter)
   255  		n += indentCount
   256  
   257  		// Check if this is an '-' or '=' underline.
   258  		underlineRune, err := iter.NextRune()
   259  		if err != nil || !(underlineRune == '-' || underlineRune == '=') {
   260  			return 0, false
   261  		}
   262  		n++
   263  
   264  		// Consume repeats of the underline rune.
   265  		var (
   266  			underlineRepeatCount int
   267  			hitEndOfLineOrFile   bool
   268  		)
   269  
   270  		for {
   271  			r, err := iter.NextRune()
   272  			if err != nil {
   273  				// Found EOF (without trailing whitespace),
   274  				// so this is a valid setext underline.
   275  				hitEndOfLineOrFile = true
   276  				break
   277  			}
   278  
   279  			n++
   280  
   281  			if r == '\n' {
   282  				// Found end of line (without trailing whitespace),
   283  				// so this is a valid setext underline.
   284  				hitEndOfLineOrFile = true
   285  				break
   286  			} else if r == ' ' || r == '\t' {
   287  				break
   288  			} else if r != underlineRune {
   289  				return 0, false
   290  			}
   291  
   292  			underlineRepeatCount++
   293  		}
   294  
   295  		// The commonmark spec says:
   296  		//
   297  		//   If a line containing a single - can be interpreted as an empty list items,
   298  		//   it should be interpreted this way and not as a setext heading underline.
   299  		//
   300  		// We're approximating this by rejecting a single '-'. This avoids an
   301  		// annoying behavior when the user starts typing a list, and the text above
   302  		// gets briefly highlighted as a setext heading.
   303  		if underlineRune == '-' && underlineRepeatCount == 0 {
   304  			return 0, false
   305  		}
   306  
   307  		if hitEndOfLineOrFile {
   308  			return n, true
   309  		}
   310  
   311  		// Consume trailing whitespace.
   312  		for {
   313  			r, err := iter.NextRune()
   314  			if err != nil {
   315  				break
   316  			}
   317  
   318  			n++
   319  
   320  			if r == '\n' {
   321  				break
   322  			} else if r == ' ' || r == '\t' {
   323  				continue
   324  			} else {
   325  				return 0, false
   326  			}
   327  		}
   328  
   329  		// Found a valid setext underline.
   330  		return n, true
   331  	}
   332  
   333  	consumeToUnderline := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   334  		var n uint64
   335  		for {
   336  			// Check if the current line is a setext underline.
   337  			underlineLen, found := checkUnderline(iter)
   338  			if found {
   339  				// Found a setext underline, so consume it.
   340  				n += underlineLen
   341  				return parser.Result{
   342  					NumConsumed: n,
   343  					NextState:   state,
   344  				}
   345  			}
   346  
   347  			// Check if the current line is non-empty.
   348  			lineLen, found := checkSubsequentLine(iter)
   349  			if found {
   350  				// Found a non-empty line. Consume it and keep looking for the setext underline.
   351  				n += lineLen
   352  				iter.Skip(lineLen)
   353  				continue
   354  			}
   355  
   356  			// Otherwise, we found an empty line, so this isn't a setext heading.
   357  			return parser.FailedResult
   358  		}
   359  	}
   360  
   361  	return parser.Func(consumeFirstLine).
   362  		Then(consumeToUnderline).
   363  		Map(recognizeToken(markdownHeadingRole))
   364  }
   365  
   366  func markdownFencedCodeBlockParseFunc() parser.Func {
   367  	// A fenced code block consists of a fence ("```" or "~~~" of length >= 3)
   368  	// until a closing fence of at least the same length or EOF.
   369  	// The fences may have leading indentation.
   370  	// Commonmark allows the opening fence to be followed by
   371  	// an optional "info" string (e.g. specifying the code language), which we include
   372  	// within the coe block token (no special treatment).
   373  	checkFenceLen := func(fenceRune rune, iter parser.TrackingRuneIter) (uint64, bool) {
   374  		var n uint64
   375  		for {
   376  			r, err := iter.NextRune()
   377  			if err != nil || r != fenceRune {
   378  				break
   379  			}
   380  			n++
   381  		}
   382  
   383  		if n < 3 {
   384  			return 0, false
   385  		}
   386  		return n, true
   387  	}
   388  
   389  	checkClosingCodeFence := func(fenceRune rune, openFenceLen uint64, iter parser.TrackingRuneIter) (uint64, bool) {
   390  		var n uint64
   391  		for {
   392  			maybeFence := true
   393  
   394  			// Leading indentation.
   395  			indentCount := markdownSkipLeadingIndentation(&iter)
   396  			n += indentCount
   397  
   398  			closeFenceLen, found := checkFenceLen(fenceRune, iter)
   399  			if found && closeFenceLen >= openFenceLen {
   400  				iter.Skip(closeFenceLen)
   401  				n += closeFenceLen
   402  			} else {
   403  				maybeFence = false
   404  			}
   405  
   406  			// Consume to the end of the line or file.
   407  			for {
   408  				r, err := iter.NextRune()
   409  				if err != nil {
   410  					// If we hit the EOF, then close the code block.
   411  					return n, true
   412  				}
   413  				n++
   414  				if r == '\n' {
   415  					break
   416  				} else if maybeFence && !(r == ' ' || r == '\t' || r == '\r') {
   417  					// Only trailing whitespace allowed after code fence.
   418  					maybeFence = false
   419  				}
   420  			}
   421  
   422  			if maybeFence {
   423  				return n, true
   424  			}
   425  		}
   426  	}
   427  
   428  	return func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   429  		var n uint64
   430  
   431  		// Leading indentation.
   432  		indentCount := markdownSkipLeadingIndentation(&iter)
   433  		n += indentCount
   434  
   435  		// Read the opening fence (first check '`', then fallback to '~')
   436  		fenceRune := '`'
   437  		openFenceLen, found := checkFenceLen(fenceRune, iter)
   438  		if !found {
   439  			fenceRune = '~'
   440  			openFenceLen, found = checkFenceLen(fenceRune, iter)
   441  		}
   442  
   443  		if !found || openFenceLen < 3 {
   444  			return parser.FailedResult
   445  		}
   446  
   447  		iter.Skip(openFenceLen)
   448  		n += openFenceLen
   449  
   450  		// Consume to the end of the first line.
   451  		for {
   452  			r, err := iter.NextRune()
   453  			if err != nil {
   454  				break
   455  			}
   456  			n++
   457  			if r == '\n' {
   458  				break
   459  			}
   460  		}
   461  
   462  		// Read subsequent lines until we find a closing code fence or EOF.
   463  		for {
   464  			lineLen, found := checkClosingCodeFence(fenceRune, openFenceLen, iter)
   465  			n += lineLen
   466  			iter.Skip(lineLen)
   467  			if found {
   468  				break
   469  			}
   470  		}
   471  
   472  		// Found the end of the code fence, so return the token.
   473  		return parser.Result{
   474  			NumConsumed: n,
   475  			ComputedTokens: []parser.ComputedToken{
   476  				{
   477  					Offset: 0,
   478  					Length: n,
   479  					Role:   markdownCodeBlockRole,
   480  				},
   481  			},
   482  			NextState: state,
   483  		}
   484  	}
   485  }
   486  
   487  func markdownNumberListItemParseFunc() parser.Func {
   488  	// A numbered list item is a sequence of digits followed by '.' or ')' and a space,
   489  	// optionally preceded by indentation.
   490  	// Commonmark requires no more than nine digits, but we allow more.
   491  	return consumeRunesLike(func(r rune) bool { return r == ' ' || r == '\t' }).
   492  		MaybeBefore(
   493  			consumeRunesLike(func(r rune) bool { return r >= '0' && r <= '9' }).
   494  				Then(consumeSingleRuneLike(func(r rune) bool { return r == '.' || r == ')' })).
   495  				Map(recognizeToken(markdownListNumberRole))).
   496  		Then(consumeSingleRuneLike(func(r rune) bool { return r == ' ' || r == '\t' }))
   497  }
   498  
   499  func markdownBulletListItemParseFunc() parser.Func {
   500  	// A bullet list item is a '-', '+', or '*' character followed by a space,
   501  	// optionally preceded by indentation.
   502  	return consumeRunesLike(func(r rune) bool { return r == ' ' || r == '\t' }).
   503  		MaybeBefore(
   504  			consumeSingleRuneLike(func(r rune) bool { return r == '-' || r == '+' || r == '*' }).
   505  				Map(recognizeToken(markdownListBulletRole))).
   506  		Then(consumeSingleRuneLike(func(r rune) bool { return r == ' ' || r == '\t' }))
   507  }
   508  
   509  func markdownParagraphParseFunc() parser.Func {
   510  	// A paragraph consists of a sequence of non-empty lines that cannot be interpreted
   511  	// as another kind of block.
   512  	// We parse paragraphs in two passes: first find the paragraph contents,
   513  	// then tokenize the paragraph's inlines (emphasis, links, etc.).
   514  	isEmptyLineOrEof := func(iter parser.TrackingRuneIter) bool {
   515  		r, err := iter.NextRune()
   516  		return err != nil || r == '\n'
   517  	}
   518  
   519  	parseNumberList := markdownNumberListItemParseFunc()
   520  	parseBulletList := markdownBulletListItemParseFunc()
   521  	parseAtxHeading := markdownAtxHeadingParseFunc()
   522  	parseThematicBreak := markdownThematicBreakParseFunc()
   523  	parseStartOfCodeBlock := consumeRunesLike(func(r rune) bool { return r == ' ' || r == '\t' }).
   524  		MaybeBefore(consumeString("```").Or(consumeString("~~~")))
   525  
   526  	isStartOfAnotherBlock := func(iter parser.TrackingRuneIter, state parser.State) bool {
   527  		// Setext headings are already handled by an earlier parse func.
   528  		return (parseNumberList(iter, state).IsSuccess() ||
   529  			parseBulletList(iter, state).IsSuccess() ||
   530  			parseAtxHeading(iter, state).IsSuccess() ||
   531  			parseThematicBreak(iter, state).IsSuccess() ||
   532  			parseStartOfCodeBlock(iter, state).IsSuccess())
   533  	}
   534  
   535  	consumeParagraphLines := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   536  		var n uint64
   537  
   538  		for {
   539  			if isEmptyLineOrEof(iter) || isStartOfAnotherBlock(iter, state) {
   540  				// Found end of paragraph.
   541  				break
   542  			}
   543  
   544  			// Leading indentation.
   545  			indentCount := markdownSkipLeadingIndentation(&iter)
   546  			n += indentCount
   547  
   548  			// Consume to end of line.
   549  			for {
   550  				r, err := iter.NextRune()
   551  				if err != nil {
   552  					break
   553  				}
   554  				n++
   555  				if r == '\n' {
   556  					break
   557  				}
   558  			}
   559  		}
   560  
   561  		if n == 0 {
   562  			// Didn't consume any lines, so this isn't a paragraph.
   563  			return parser.FailedResult
   564  		}
   565  
   566  		return parser.Result{
   567  			NumConsumed: n,
   568  			NextState:   state,
   569  		}
   570  	}
   571  
   572  	parseInlineCodeSpan := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   573  		const (
   574  			codeSpanStateNone = iota
   575  			codeSpanStateStartDelim
   576  			codeSpanStateContent
   577  			codeSpanStateEndDelim
   578  		)
   579  
   580  		var done bool
   581  		var tokenStart uint64
   582  		var startDelimLen, endDelimLen int
   583  		var codeSpanState int
   584  		var n uint64
   585  		for !done {
   586  			r, err := iter.NextRune()
   587  			if err != nil {
   588  				// One more iteration for tokens ending at EOF.
   589  				done = true
   590  				r = '\x00'
   591  			} else {
   592  				n++
   593  			}
   594  
   595  			switch codeSpanState {
   596  			case codeSpanStateNone:
   597  				if r == '`' {
   598  					tokenStart = n - 1
   599  					startDelimLen = 1
   600  					codeSpanState = codeSpanStateStartDelim
   601  				} else {
   602  					// Code span must start with a backtick.
   603  					return parser.FailedResult
   604  				}
   605  
   606  			case codeSpanStateStartDelim:
   607  				if r == '`' {
   608  					startDelimLen++
   609  				} else {
   610  					codeSpanState = codeSpanStateContent
   611  				}
   612  
   613  			case codeSpanStateContent:
   614  				if r == '`' {
   615  					endDelimLen = 1
   616  					codeSpanState = codeSpanStateEndDelim
   617  				}
   618  
   619  			case codeSpanStateEndDelim:
   620  				if r == '`' {
   621  					endDelimLen++
   622  				} else if startDelimLen != endDelimLen {
   623  					endDelimLen = 0
   624  					codeSpanState = codeSpanStateContent
   625  				} else {
   626  					tokenEnd := n
   627  					if !done {
   628  						// Compensate for lookahead character (unless we're at EOF)
   629  						tokenEnd--
   630  					}
   631  					return parser.Result{
   632  						NumConsumed: tokenEnd,
   633  						ComputedTokens: []parser.ComputedToken{
   634  							{
   635  								Offset: tokenStart,
   636  								Length: tokenEnd - tokenStart,
   637  								Role:   markdownCodeSpanRole,
   638  							},
   639  						},
   640  						NextState: state,
   641  					}
   642  				}
   643  			}
   644  		}
   645  
   646  		if startDelimLen > 0 {
   647  			// Skip an unmatched start delimiter to avoid partial matches later.
   648  			return parser.Result{
   649  				NumConsumed: tokenStart + uint64(startDelimLen),
   650  				NextState:   state,
   651  			}
   652  		}
   653  
   654  		return parser.FailedResult
   655  	}
   656  
   657  	// Parse inline emphasis and strong emphasis, delimited by '*' or '_'.
   658  	// This implementation doesn't handle all the edge cases in the CommonMark spec
   659  	// involving nested emphasis, but it should handle the most common cases reasonably.
   660  	parseInlineEmphasis := func(delimRune rune, allowWithinWord bool) parser.Func {
   661  		return func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   662  			const (
   663  				emphStateNone = iota
   664  				emphStateStartDelim
   665  				emphStateContent
   666  				emphStateEndDelim
   667  			)
   668  
   669  			var emphState int
   670  			var done bool
   671  			var tokenStart uint64
   672  			var startDelimLen, endDelimLen int
   673  			var lastWasSpace bool
   674  			var lastWasDelim bool
   675  			var inCodeSpan bool
   676  			var n uint64
   677  			for !done {
   678  				r, err := iter.NextRune()
   679  				if err != nil {
   680  					done = true
   681  					r = '\x00'
   682  				} else {
   683  					n++
   684  				}
   685  
   686  				switch emphState {
   687  				case emphStateNone:
   688  					if r == delimRune {
   689  						tokenStart = n - 1
   690  						startDelimLen = 1
   691  						emphState = emphStateStartDelim
   692  					} else {
   693  						return parser.FailedResult
   694  					}
   695  
   696  				case emphStateStartDelim:
   697  					if r == delimRune {
   698  						startDelimLen++
   699  					} else if unicode.IsSpace(r) {
   700  						return parser.FailedResult
   701  					} else {
   702  						endDelimLen = 0
   703  						emphState = emphStateContent
   704  					}
   705  				case emphStateContent:
   706  					if r == delimRune && !lastWasSpace && !lastWasDelim && !inCodeSpan {
   707  						endDelimLen++
   708  						emphState = emphStateEndDelim
   709  					} else if r == '`' {
   710  						inCodeSpan = !inCodeSpan
   711  					}
   712  				case emphStateEndDelim:
   713  					if r == delimRune {
   714  						endDelimLen++
   715  					} else if !allowWithinWord && !(unicode.IsSpace(r) || unicode.IsPunct(r) || r == '\x00') {
   716  						// Disallow delimiters within a word for "_".
   717  						// For example "foo_bar_baz" should not emphasis "bar".
   718  						endDelimLen = 0
   719  						emphState = emphStateContent
   720  					} else if endDelimLen < startDelimLen {
   721  						emphState = emphStateContent
   722  					} else {
   723  						var role parser.TokenRole
   724  						if startDelimLen < 2 {
   725  							role = markdownEmphasisRole
   726  						} else {
   727  							role = markdownStrongEmphasisRole
   728  						}
   729  
   730  						tokenEnd := n
   731  						if !done {
   732  							// Compensate for lookahead character (unless we're at EOF)
   733  							tokenEnd--
   734  						}
   735  
   736  						return parser.Result{
   737  							NumConsumed: tokenEnd,
   738  							ComputedTokens: []parser.ComputedToken{
   739  								{
   740  									Offset: tokenStart,
   741  									Length: tokenEnd - tokenStart,
   742  									Role:   role,
   743  								},
   744  							},
   745  							NextState: state,
   746  						}
   747  					}
   748  				}
   749  
   750  				lastWasSpace = unicode.IsSpace(r)
   751  				lastWasDelim = r == delimRune
   752  			}
   753  
   754  			if startDelimLen > 0 {
   755  				// Skip an unmatched start delimiter to avoid partial matches later.
   756  				return parser.Result{
   757  					NumConsumed: tokenStart + uint64(startDelimLen),
   758  					NextState:   state,
   759  				}
   760  			}
   761  
   762  			return parser.FailedResult
   763  		}
   764  	}
   765  
   766  	consumeLinkPart := func(startDelim, endDelim rune) parser.Func {
   767  		return func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   768  			var n uint64
   769  			var depth int
   770  			for {
   771  				r, err := iter.NextRune()
   772  				if err != nil {
   773  					break
   774  				}
   775  				n++
   776  
   777  				if n == 1 && r != startDelim {
   778  					return parser.FailedResult
   779  				}
   780  
   781  				if r == '\\' {
   782  					// Backslash escape.
   783  					n++
   784  					iter.Skip(1)
   785  					continue
   786  				}
   787  
   788  				if r == '\n' {
   789  					// Links cannot contain newlines.
   790  					return parser.FailedResult
   791  				}
   792  
   793  				if r == startDelim {
   794  					depth++
   795  				} else if r == endDelim {
   796  					depth--
   797  				}
   798  
   799  				if depth == 0 {
   800  					return parser.Result{
   801  						NumConsumed: n,
   802  						NextState:   state,
   803  					}
   804  				}
   805  			}
   806  
   807  			return parser.FailedResult
   808  		}
   809  	}
   810  
   811  	parseInlineLink := consumeString("!").
   812  		MaybeBefore(consumeLinkPart('[', ']')).
   813  		ThenMaybe(consumeLinkPart('(', ')')).
   814  		Map(recognizeToken(markdownLinkRole))
   815  
   816  	consumeToNextPossibleStartDelim := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
   817  		allowUnderscore := true
   818  		var n uint64
   819  		for {
   820  			r, err := iter.NextRune()
   821  			if err != nil || r == '\\' || r == '`' || r == '*' || r == '!' || r == '[' || (allowUnderscore && r == '_') {
   822  				break
   823  			}
   824  			n++
   825  
   826  			// Don't allow an underscore within a word or following another underscore.
   827  			allowUnderscore = r != '_' && (unicode.IsSpace(r) || unicode.IsPunct(r))
   828  		}
   829  		return parser.Result{
   830  			NumConsumed: n,
   831  			NextState:   state,
   832  		}
   833  	}
   834  
   835  	consumeBackslashEscape := consumeString("\\").
   836  		Then(consumeSingleRuneLike(func(r rune) bool {
   837  			// ASCII punctuation
   838  			return (r >= '!' && r <= '/') || (r >= ':' && r <= '@') || (r >= '[' && r <= '`') || (r >= '{' && r <= '~')
   839  		}))
   840  
   841  	parseInlineToken := consumeBackslashEscape.
   842  		Or(parseInlineCodeSpan).
   843  		Or(parseInlineEmphasis('*', true)).
   844  		Or(parseInlineLink).
   845  		Or(parseInlineEmphasis('_', false)).
   846  		Or(consumeToNextPossibleStartDelim)
   847  
   848  	recognizeInlineTokens := func(result parser.Result, iter parser.TrackingRuneIter, state parser.State) parser.Result {
   849  		var n uint64
   850  		for n < result.NumConsumed {
   851  			inlineResult := parseInlineToken(iter, state)
   852  			if inlineResult.IsSuccess() {
   853  				for _, tok := range inlineResult.ComputedTokens {
   854  					tok.Offset += n
   855  					result.ComputedTokens = append(result.ComputedTokens, tok)
   856  				}
   857  				iter.Skip(inlineResult.NumConsumed)
   858  				n += inlineResult.NumConsumed
   859  			} else {
   860  				iter.Skip(1)
   861  				n++
   862  			}
   863  		}
   864  		return result
   865  	}
   866  
   867  	return parser.Func(consumeParagraphLines).
   868  		MapWithInput(recognizeInlineTokens)
   869  }