github.com/aretext/aretext@v1.3.0/syntax/languages/criticmarkup.go (about) 1 package languages 2 3 import ( 4 "sort" 5 6 "github.com/aretext/aretext/syntax/parser" 7 ) 8 9 const ( 10 criticMarkupCommentRole = parser.TokenRoleComment 11 12 // Use higher-numbered custom roles to avoid conflict 13 // with custom roles used for markdown. 14 criticMarkupAddRole = parser.TokenRoleCustom9 15 criticMarkupDelRole = parser.TokenRoleCustom10 16 criticMarkupSubRole = parser.TokenRoleCustom11 17 criticMarkupHighlightRole = parser.TokenRoleCustom12 18 ) 19 20 // CriticMarkupParseFunc returns a parse func for CriticMarkup. 21 // https://github.com/CriticMarkup/CriticMarkup-toolkit/blob/master/README.md 22 func CriticMarkupParseFunc() parser.Func { 23 /* 24 This is a bit of a hack. 25 26 We first run the markdown parser, then run the CriticMarkup parser on whatever 27 the markdown parser consumed (if we see the start of a CriticMarkup tag, we 28 may continue past where the markdown parser stopped). 29 30 We then delete/truncate/split markdown tokens to make space for 31 the CriticMarkup tokens. 32 33 This works, but notice that the text within a CriticMarkup tag is still 34 processed by the Markdown parser! So, for example, an asterisk "*" inside 35 a CriticMarkup tag can terminate a Markdown emphasis tag. 36 Fortunately, CriticMarkup explicitly forbids nesting Markdown tags, 37 so if the user is doing this, it's a mistake and we can interpret 38 it however we want. 39 */ 40 41 parseMarkdown := MarkdownParseFunc() 42 parseCriticMarkup := criticMarkupParseFunc() 43 return func(iter parser.TrackingRuneIter, state parser.State) parser.Result { 44 result := parseMarkdown(iter, state) 45 46 // Lookahead as far as the markdown parser consumed. 47 lookaheadLimit := result.NumConsumed 48 49 // If the markdown parser failed, lookahead to the one rune 50 // that would be consumed by error recovery. 51 // This shouldn't ever happen because the markdown parser always 52 // tries to consume something, but it's safer to check. 53 if lookaheadLimit == 0 { 54 lookaheadLimit = 1 55 } 56 57 // Attempt to parse this part of the document as CriticMarkup. 58 var criticMarkupTokens []parser.ComputedToken 59 var n uint64 60 for n < lookaheadLimit { 61 cmResult := parseCriticMarkup(iter, state) 62 if cmResult.IsSuccess() { 63 for _, tok := range cmResult.ComputedTokens { 64 tok.Offset += n 65 criticMarkupTokens = append(criticMarkupTokens, tok) 66 } 67 iter.Skip(cmResult.NumConsumed) 68 n += cmResult.NumConsumed 69 } else { 70 iter.Skip(1) 71 n++ 72 } 73 } 74 75 // CriticMarkup tokens may overlap the markdown tokens. 76 // Delete/truncate/split the markdown tokens as necessary to make space. 77 result.ComputedTokens = criticMarkupConsolidateTokens(result.ComputedTokens, criticMarkupTokens) 78 79 // There may be CriticMarkup tokens that started within this computation 80 // but extend past the end of the computation. If so, update NumConsumed. 81 if len(result.ComputedTokens) > 0 { 82 lastToken := result.ComputedTokens[len(result.ComputedTokens)-1] 83 lastTokenEnd := lastToken.Offset + lastToken.Length 84 if lastTokenEnd > result.NumConsumed { 85 result.NumConsumed = lastTokenEnd 86 } 87 } 88 89 return result 90 } 91 } 92 93 func criticMarkupParseFunc() parser.Func { 94 parseAdd := consumeString("{++"). 95 Then(consumeToString("++}")). 96 Map(recognizeToken(criticMarkupAddRole)) 97 98 // Examples in the CriticMarkup README use U+2010 hyphens, so allow those as well. 99 parseDel := (consumeString("{--").Then(consumeToString("--}"))). 100 Or(consumeString("{\u2010\u2010").Then(consumeToString("\u2010\u2010}"))). 101 Map(recognizeToken(criticMarkupDelRole)) 102 103 parseSub := consumeString("{~~"). 104 Then(consumeToString("~~}")). 105 Map(recognizeToken(criticMarkupSubRole)) 106 107 parseComment := consumeString("{>>"). 108 Then(consumeToString("<<}")). 109 Map(recognizeToken(criticMarkupCommentRole)) 110 111 parseHighlight := consumeString("{=="). 112 Then(consumeToString("==}")). 113 Map(recognizeToken(criticMarkupHighlightRole)) 114 115 return parseAdd. 116 Or(parseDel). 117 Or(parseSub). 118 Or(parseComment). 119 Or(parseHighlight) 120 } 121 122 func criticMarkupConsolidateTokens(mdTokens, cmTokens []parser.ComputedToken) []parser.ComputedToken { 123 // Fast path if we have only Markdown or only CriticMarkup. 124 if len(cmTokens) == 0 { 125 return mdTokens 126 } else if len(mdTokens) == 0 { 127 return cmTokens 128 } 129 130 // Assume that mdTokens and cmTokens are each sorted ascending and non-overlapping. 131 tokens := make([]parser.ComputedToken, 0, len(mdTokens)+len(cmTokens)) 132 tokens = append(tokens, mdTokens...) 133 134 for _, cmTok := range cmTokens { 135 // Each iteration of this loop eliminates an overlap by deleting, truncating, or splitting 136 // one token. Once there are no overlaps, it inserts cmTok and exits the loop. 137 for { 138 i := sort.Search(len(tokens), func(i int) bool { 139 return tokens[i].Offset >= cmTok.Offset 140 }) 141 142 if i > 0 { 143 tokBefore := tokens[i-1] 144 if tokBefore.Offset+tokBefore.Length > cmTok.Offset+cmTok.Length { 145 // tokBefore contains cmTok, so split tokBefore to make space. 146 tokens = append(tokens, parser.ComputedToken{}) 147 copy(tokens[i+1:], tokens[i:]) 148 tokens[i-1].Length = cmTok.Offset - tokBefore.Offset 149 tokens[i] = parser.ComputedToken{ 150 Offset: cmTok.Offset + cmTok.Length, 151 Length: (tokBefore.Offset + tokBefore.Length) - (cmTok.Offset + cmTok.Length), 152 Role: tokBefore.Role, 153 } 154 continue 155 } else if tokBefore.Offset+tokBefore.Length > cmTok.Offset { 156 // Truncate end of prev token 157 tokens[i-1].Length = cmTok.Offset - tokBefore.Offset 158 continue 159 } 160 } 161 162 if i < len(tokens) { 163 tokAfter := tokens[i] 164 if cmTok.Offset+cmTok.Length >= tokAfter.Offset+tokAfter.Length { 165 // cmTok contains the following token, so delete it to make space. 166 copy(tokens[i:], tokens[i+1:]) 167 tokens = tokens[0 : len(tokens)-1] 168 continue 169 } else if cmTok.Offset+cmTok.Length > tokAfter.Offset { 170 // Truncate start of next token. 171 tokens[i].Offset = cmTok.Offset + cmTok.Length 172 tokens[i].Length -= (cmTok.Offset + cmTok.Length) - tokAfter.Offset 173 continue 174 } 175 } 176 177 // No overlap, so insert the token and exit the loop. 178 tokens = append(tokens, parser.ComputedToken{}) 179 copy(tokens[i+1:], tokens[i:]) 180 tokens[i] = cmTok 181 break 182 } 183 } 184 185 return tokens 186 }