github.com/tada-team/tdproto@v1.51.57/tdmarkup/markup_scanner.go (about) 1 package tdmarkup 2 3 import ( 4 "net/url" 5 "strings" 6 "time" 7 8 "github.com/tada-team/tdproto" 9 ) 10 11 var ops = "*/_~`<>&[]()" 12 13 var opInlines = map[rune]tdproto.MarkupType{ 14 '*': tdproto.Bold, 15 '/': tdproto.Italic, 16 '_': tdproto.Underscore, 17 '~': tdproto.Strike, 18 '`': tdproto.Code, 19 } 20 21 var ( 22 opCodeBlock = []rune("```") 23 opQuoteBlock = []rune("> ") 24 ) 25 26 // FIXME: temporary hack, move to MarkupScanner() itself 27 var CheckUrl = func(u *url.URL) bool { return u.Scheme != "" } 28 29 func contains(s string, typ tdproto.MarkupType) bool { 30 for s := NewMarkupScanner(s); s.Rest() > 0; { 31 _, e := s.Scan(nil) 32 if doContains(e, typ) { 33 return true 34 } 35 } 36 return false 37 } 38 39 func doContains(e *tdproto.MarkupEntity, substring tdproto.MarkupType) bool { 40 if e == nil { 41 return false 42 } 43 if e.Type == substring { 44 return true 45 } 46 for _, child := range e.Childs { 47 if doContains(&child, substring) { 48 return true 49 } 50 } 51 return false 52 } 53 54 func ContainsTime(s string) bool { return contains(s, tdproto.Time) } 55 56 func ParseString(text string, links tdproto.MessageLinks) (string, []tdproto.MarkupEntity) { 57 text = strings.ReplaceAll(text, "\r", "") 58 if len(links) == 0 && !strings.ContainsAny(text, ops) { 59 return text, nil 60 } 61 62 var b strings.Builder 63 b.Grow(len(text)) 64 65 var entities []tdproto.MarkupEntity 66 for s := NewMarkupScanner(text); s.Rest() > 0; { 67 t, e := s.Scan(links) 68 if e != nil { 69 entities = append(entities, *e) 70 } 71 b.WriteString(t) 72 } 73 74 return b.String(), entities 75 } 76 77 type MarkupScanner struct { 78 *Scanner 79 internal bool 80 } 81 82 func NewMarkupScanner(text string) *MarkupScanner { 83 return &MarkupScanner{Scanner: NewScanner(text)} 84 } 85 86 func (s *MarkupScanner) Scan(links tdproto.MessageLinks) (string, *tdproto.MarkupEntity) { 87 if isEOF(s.Next()) { 88 return "", nil 89 } 90 91 // markdown links 92 t, e := s.scanMarkdownLinks() 93 if e != nil { 94 return t, e 95 } 96 97 // links 98 for _, l := range links { 99 t, e := s.scanLink(l) 100 if e != nil { 101 return t, e 102 } 103 } 104 105 // dates (before html tags!) 106 t, e = s.scanTime() 107 if e != nil { 108 return t, e 109 } 110 111 // quotes (before html tags!) 112 if s.Position() == 0 || isEOL(s.Current()) { 113 t, e := s.scanQuote() 114 if e != nil { 115 e.Childs = s.scanChilds(t[len(opQuoteBlock):]) 116 return t, e 117 } 118 if t != "" { 119 return t, nil 120 } 121 } 122 123 // html tags 124 t, e = s.scanUnsafe() 125 if e != nil { 126 return t, e 127 } 128 129 // code block 130 t, e = s.scanBlock(opCodeBlock, opCodeBlock, tdproto.CodeBlock) 131 if e != nil { 132 return t, e 133 } 134 if t != "" { 135 return t, nil 136 } 137 138 // inlines 139 if typ, ok := opInlines[s.Next()]; ok { 140 allowWhitespaceAround := typ == tdproto.Code 141 t, e := s.scanInline(s.Next(), typ, allowWhitespaceAround) 142 if e != nil { 143 if typ == tdproto.Italic && isPath(t) { 144 return t, nil 145 } 146 if typ != tdproto.Code { 147 e.Childs = s.scanChilds(t[1 : len(t)-1]) 148 } 149 return t, e 150 } 151 if t != "" { 152 return t, nil 153 } 154 } 155 156 // unparsed 157 return string(s.TakeNext()), nil 158 } 159 160 func (s *MarkupScanner) scanChilds(text string) (res []tdproto.MarkupEntity) { 161 if len(text) < 3 { 162 return 163 } 164 scanner := NewMarkupScanner(text) 165 scanner.internal = true 166 for scanner.Rest() > 0 { 167 t, e := scanner.Scan(nil) 168 if e != nil { 169 res = append(res, *e) 170 } 171 if t == "" { 172 break 173 } 174 } 175 return 176 } 177 178 var dateLayouts = []string{ 179 "2006-01-02T15:04:05.000000-0700", 180 "2006-01-02T15:04:05.000000Z", 181 } 182 183 func (s *MarkupScanner) scanTime() (string, *tdproto.MarkupEntity) { 184 if s.Next() != '<' { 185 return "", nil 186 } 187 188 start := s.Position() 189 s.TakeNext() 190 strDt := s.ScanUntil([]rune(">")) 191 if len(strDt) < 18 { 192 s.Rewind(start) 193 return "", nil 194 } 195 196 strDt = strDt[:len(strDt)-1] 197 for _, layout := range dateLayouts { 198 _, err := time.Parse(layout, strDt) 199 if err != nil { 200 continue 201 } 202 return "<" + strDt + ">", &tdproto.MarkupEntity{ 203 Open: start, 204 OpenLength: 1, 205 Close: s.Position() - 1, 206 CloseLength: 1, 207 Type: tdproto.Time, 208 Time: strDt, 209 } 210 } 211 212 s.Rewind(start) 213 return "", nil 214 } 215 216 func (s *MarkupScanner) scanInline(marker rune, typ tdproto.MarkupType, allowWhitespaceAround bool) (string, *tdproto.MarkupEntity) { 217 start := s.Position() 218 219 var b strings.Builder 220 b.Grow(s.Length() - start) 221 b.WriteRune(s.TakeNext()) 222 223 if !(start == 0 || isWhitespace(s.Prev()) || isEOL(s.Prev()) || allowWhitespaceAround) { 224 s.Rewind(start) 225 return "", nil 226 } 227 228 if (isWhitespace(s.Next()) || isEOL(s.Next())) && !allowWhitespaceAround { 229 s.Rewind(start) 230 return "", nil 231 } 232 233 e := &tdproto.MarkupEntity{ 234 Type: typ, 235 Open: s.Position() - 1, 236 OpenLength: 1, 237 } 238 239 for s.Rest() > 0 { 240 ch := s.TakeNext() 241 b.WriteRune(ch) 242 243 prev := s.Prev() 244 next := s.Next() 245 246 if ch == marker && s.Since(start) > 2 && next != marker && (allowWhitespaceAround || !isWhitespace(prev)) && prev != ch && 247 (isWhitespace(next) || isEOF(next) || isEOL(next) || isTrailingPunctuation(next)) { 248 e.Close = s.Position() - 1 249 e.CloseLength = 1 250 return b.String(), e 251 } 252 253 if isEOL(ch) { 254 break 255 } 256 } 257 258 s.Rewind(start) 259 return "", nil 260 } 261 262 func (s *MarkupScanner) scanBlock(op, cl []rune, typ tdproto.MarkupType) (string, *tdproto.MarkupEntity) { 263 start := s.Position() 264 265 t := s.ScanUntil(op) 266 if t == "" { 267 return "", nil 268 } 269 270 var b strings.Builder 271 b.Grow(s.Length() - start) 272 b.WriteString(t) 273 274 e := &tdproto.MarkupEntity{ 275 Type: typ, 276 Open: s.Position() - len(op), 277 OpenLength: len(op), 278 } 279 280 for s.Next() == ' ' { 281 e.OpenLength += 1 282 b.WriteRune(s.TakeNext()) 283 } 284 285 for s.Next() == '\n' { 286 e.OpenLength += 1 287 b.WriteRune(s.TakeNext()) 288 } 289 290 var tail []rune 291 for s.Rest() > 0 { 292 t := s.ScanUntil(cl) 293 if t == "" { 294 ch := s.TakeNext() 295 b.WriteRune(ch) 296 tail = append(tail, ch) 297 continue 298 } 299 b.WriteString(t) 300 e.Close = s.Position() - len(cl) 301 e.CloseLength = len(cl) 302 303 for i := len(tail) - 1; i >= 0; i-- { 304 ch := tail[i] 305 if !(isWhitespace(ch) || isEOL(ch)) { 306 break 307 } 308 e.Close-- 309 e.CloseLength++ 310 } 311 return b.String(), e 312 } 313 314 s.Rewind(start) 315 return "", nil 316 } 317 318 func (s *MarkupScanner) scanQuote() (string, *tdproto.MarkupEntity) { 319 t := s.ScanUntil(opQuoteBlock) 320 if t == "" { 321 return "", nil 322 } 323 324 var b strings.Builder 325 b.Grow(s.Length() - s.Position()) 326 b.WriteString(t) 327 328 e := &tdproto.MarkupEntity{ 329 Type: tdproto.Quote, 330 Open: s.Position() - len(opQuoteBlock), 331 OpenLength: len(opQuoteBlock), 332 } 333 334 for { 335 ch := s.Next() 336 if isEOL(ch) || isEOF(ch) { 337 e.Close = s.Position() 338 if isEOL(ch) { 339 e.CloseLength = 1 340 } 341 return b.String(), e 342 } 343 b.WriteRune(s.TakeNext()) 344 } 345 } 346 347 func (s *MarkupScanner) scanLink(l tdproto.MessageLink) (string, *tdproto.MarkupEntity) { 348 start := s.Position() 349 350 for _, r := range []rune(l.Pattern) { 351 if s.TakeNext() != r { 352 s.Rewind(start) 353 return "", nil 354 } 355 } 356 357 return l.Pattern, &tdproto.MarkupEntity{ 358 Type: tdproto.Link, 359 Url: l.Url, 360 Repl: l.Text, 361 Open: start, 362 Close: s.Position(), 363 } 364 } 365 366 func (s *MarkupScanner) scanUnsafe() (string, *tdproto.MarkupEntity) { 367 switch s.Next() { 368 case '&', '<', '>': 369 start := s.Position() 370 return string(s.TakeNext()), &tdproto.MarkupEntity{ 371 Open: start, 372 Close: start + 1, 373 Type: tdproto.Unsafe, 374 } 375 default: 376 return string(s.Next()), nil 377 } 378 } 379 380 func (s *MarkupScanner) scanMarkdownLinks() (string, *tdproto.MarkupEntity) { 381 if s.Next() != '[' { 382 return "", nil 383 } 384 385 start := s.Position() 386 ch := s.TakeNext() 387 388 var b strings.Builder 389 b.Grow(s.Length() - s.Position()) 390 b.WriteRune(ch) 391 392 var replBuilder strings.Builder 393 394 findRepl: 395 for { 396 ch = s.TakeNext() 397 b.WriteRune(ch) 398 switch ch { 399 case ']': 400 break findRepl 401 default: 402 replBuilder.WriteRune(ch) 403 } 404 if s.Rest() == 0 { 405 s.Rewind(start) 406 return "", nil 407 } 408 } 409 410 replText := strings.TrimSpace(replBuilder.String()) 411 if len(replText) == 0 { 412 s.Rewind(start) 413 return "", nil 414 } 415 416 if s.Next() != '(' { 417 replBuilder.WriteRune(ch) 418 goto findRepl 419 } 420 421 ch = s.TakeNext() 422 b.WriteRune(ch) 423 424 var urlBuilder strings.Builder 425 426 findUrl: 427 for { 428 ch := s.TakeNext() 429 b.WriteRune(ch) 430 switch ch { 431 case ')': 432 break findUrl 433 default: 434 urlBuilder.WriteRune(ch) 435 } 436 if s.Rest() == 0 { 437 s.Rewind(start) 438 return "", nil 439 } 440 } 441 442 u, err := url.Parse(urlBuilder.String()) 443 if err != nil || !CheckUrl(u) { 444 s.Rewind(start) 445 return "", nil 446 } 447 448 return b.String(), &tdproto.MarkupEntity{ 449 Type: tdproto.Link, 450 Url: u.String(), 451 Repl: replText, 452 Open: start, 453 Close: s.Position(), 454 } 455 }