github.com/tada-team/tdproto@v1.51.57/tdmarkup/markup_scanner.go (about)

     1  package tdmarkup
     2  
     3  import (
     4  	"net/url"
     5  	"strings"
     6  	"time"
     7  
     8  	"github.com/tada-team/tdproto"
     9  )
    10  
    11  var ops = "*/_~`<>&[]()"
    12  
    13  var opInlines = map[rune]tdproto.MarkupType{
    14  	'*': tdproto.Bold,
    15  	'/': tdproto.Italic,
    16  	'_': tdproto.Underscore,
    17  	'~': tdproto.Strike,
    18  	'`': tdproto.Code,
    19  }
    20  
    21  var (
    22  	opCodeBlock  = []rune("```")
    23  	opQuoteBlock = []rune("> ")
    24  )
    25  
    26  // FIXME: temporary hack, move to MarkupScanner() itself
    27  var CheckUrl = func(u *url.URL) bool { return u.Scheme != "" }
    28  
    29  func contains(s string, typ tdproto.MarkupType) bool {
    30  	for s := NewMarkupScanner(s); s.Rest() > 0; {
    31  		_, e := s.Scan(nil)
    32  		if doContains(e, typ) {
    33  			return true
    34  		}
    35  	}
    36  	return false
    37  }
    38  
    39  func doContains(e *tdproto.MarkupEntity, substring tdproto.MarkupType) bool {
    40  	if e == nil {
    41  		return false
    42  	}
    43  	if e.Type == substring {
    44  		return true
    45  	}
    46  	for _, child := range e.Childs {
    47  		if doContains(&child, substring) {
    48  			return true
    49  		}
    50  	}
    51  	return false
    52  }
    53  
    54  func ContainsTime(s string) bool { return contains(s, tdproto.Time) }
    55  
    56  func ParseString(text string, links tdproto.MessageLinks) (string, []tdproto.MarkupEntity) {
    57  	text = strings.ReplaceAll(text, "\r", "")
    58  	if len(links) == 0 && !strings.ContainsAny(text, ops) {
    59  		return text, nil
    60  	}
    61  
    62  	var b strings.Builder
    63  	b.Grow(len(text))
    64  
    65  	var entities []tdproto.MarkupEntity
    66  	for s := NewMarkupScanner(text); s.Rest() > 0; {
    67  		t, e := s.Scan(links)
    68  		if e != nil {
    69  			entities = append(entities, *e)
    70  		}
    71  		b.WriteString(t)
    72  	}
    73  
    74  	return b.String(), entities
    75  }
    76  
    77  type MarkupScanner struct {
    78  	*Scanner
    79  	internal bool
    80  }
    81  
    82  func NewMarkupScanner(text string) *MarkupScanner {
    83  	return &MarkupScanner{Scanner: NewScanner(text)}
    84  }
    85  
    86  func (s *MarkupScanner) Scan(links tdproto.MessageLinks) (string, *tdproto.MarkupEntity) {
    87  	if isEOF(s.Next()) {
    88  		return "", nil
    89  	}
    90  
    91  	// markdown links
    92  	t, e := s.scanMarkdownLinks()
    93  	if e != nil {
    94  		return t, e
    95  	}
    96  
    97  	// links
    98  	for _, l := range links {
    99  		t, e := s.scanLink(l)
   100  		if e != nil {
   101  			return t, e
   102  		}
   103  	}
   104  
   105  	// dates (before html tags!)
   106  	t, e = s.scanTime()
   107  	if e != nil {
   108  		return t, e
   109  	}
   110  
   111  	// quotes (before html tags!)
   112  	if s.Position() == 0 || isEOL(s.Current()) {
   113  		t, e := s.scanQuote()
   114  		if e != nil {
   115  			e.Childs = s.scanChilds(t[len(opQuoteBlock):])
   116  			return t, e
   117  		}
   118  		if t != "" {
   119  			return t, nil
   120  		}
   121  	}
   122  
   123  	// html tags
   124  	t, e = s.scanUnsafe()
   125  	if e != nil {
   126  		return t, e
   127  	}
   128  
   129  	// code block
   130  	t, e = s.scanBlock(opCodeBlock, opCodeBlock, tdproto.CodeBlock)
   131  	if e != nil {
   132  		return t, e
   133  	}
   134  	if t != "" {
   135  		return t, nil
   136  	}
   137  
   138  	// inlines
   139  	if typ, ok := opInlines[s.Next()]; ok {
   140  		allowWhitespaceAround := typ == tdproto.Code
   141  		t, e := s.scanInline(s.Next(), typ, allowWhitespaceAround)
   142  		if e != nil {
   143  			if typ == tdproto.Italic && isPath(t) {
   144  				return t, nil
   145  			}
   146  			if typ != tdproto.Code {
   147  				e.Childs = s.scanChilds(t[1 : len(t)-1])
   148  			}
   149  			return t, e
   150  		}
   151  		if t != "" {
   152  			return t, nil
   153  		}
   154  	}
   155  
   156  	// unparsed
   157  	return string(s.TakeNext()), nil
   158  }
   159  
   160  func (s *MarkupScanner) scanChilds(text string) (res []tdproto.MarkupEntity) {
   161  	if len(text) < 3 {
   162  		return
   163  	}
   164  	scanner := NewMarkupScanner(text)
   165  	scanner.internal = true
   166  	for scanner.Rest() > 0 {
   167  		t, e := scanner.Scan(nil)
   168  		if e != nil {
   169  			res = append(res, *e)
   170  		}
   171  		if t == "" {
   172  			break
   173  		}
   174  	}
   175  	return
   176  }
   177  
   178  var dateLayouts = []string{
   179  	"2006-01-02T15:04:05.000000-0700",
   180  	"2006-01-02T15:04:05.000000Z",
   181  }
   182  
   183  func (s *MarkupScanner) scanTime() (string, *tdproto.MarkupEntity) {
   184  	if s.Next() != '<' {
   185  		return "", nil
   186  	}
   187  
   188  	start := s.Position()
   189  	s.TakeNext()
   190  	strDt := s.ScanUntil([]rune(">"))
   191  	if len(strDt) < 18 {
   192  		s.Rewind(start)
   193  		return "", nil
   194  	}
   195  
   196  	strDt = strDt[:len(strDt)-1]
   197  	for _, layout := range dateLayouts {
   198  		_, err := time.Parse(layout, strDt)
   199  		if err != nil {
   200  			continue
   201  		}
   202  		return "<" + strDt + ">", &tdproto.MarkupEntity{
   203  			Open:        start,
   204  			OpenLength:  1,
   205  			Close:       s.Position() - 1,
   206  			CloseLength: 1,
   207  			Type:        tdproto.Time,
   208  			Time:        strDt,
   209  		}
   210  	}
   211  
   212  	s.Rewind(start)
   213  	return "", nil
   214  }
   215  
   216  func (s *MarkupScanner) scanInline(marker rune, typ tdproto.MarkupType, allowWhitespaceAround bool) (string, *tdproto.MarkupEntity) {
   217  	start := s.Position()
   218  
   219  	var b strings.Builder
   220  	b.Grow(s.Length() - start)
   221  	b.WriteRune(s.TakeNext())
   222  
   223  	if !(start == 0 || isWhitespace(s.Prev()) || isEOL(s.Prev()) || allowWhitespaceAround) {
   224  		s.Rewind(start)
   225  		return "", nil
   226  	}
   227  
   228  	if (isWhitespace(s.Next()) || isEOL(s.Next())) && !allowWhitespaceAround {
   229  		s.Rewind(start)
   230  		return "", nil
   231  	}
   232  
   233  	e := &tdproto.MarkupEntity{
   234  		Type:       typ,
   235  		Open:       s.Position() - 1,
   236  		OpenLength: 1,
   237  	}
   238  
   239  	for s.Rest() > 0 {
   240  		ch := s.TakeNext()
   241  		b.WriteRune(ch)
   242  
   243  		prev := s.Prev()
   244  		next := s.Next()
   245  
   246  		if ch == marker && s.Since(start) > 2 && next != marker && (allowWhitespaceAround || !isWhitespace(prev)) && prev != ch &&
   247  			(isWhitespace(next) || isEOF(next) || isEOL(next) || isTrailingPunctuation(next)) {
   248  			e.Close = s.Position() - 1
   249  			e.CloseLength = 1
   250  			return b.String(), e
   251  		}
   252  
   253  		if isEOL(ch) {
   254  			break
   255  		}
   256  	}
   257  
   258  	s.Rewind(start)
   259  	return "", nil
   260  }
   261  
   262  func (s *MarkupScanner) scanBlock(op, cl []rune, typ tdproto.MarkupType) (string, *tdproto.MarkupEntity) {
   263  	start := s.Position()
   264  
   265  	t := s.ScanUntil(op)
   266  	if t == "" {
   267  		return "", nil
   268  	}
   269  
   270  	var b strings.Builder
   271  	b.Grow(s.Length() - start)
   272  	b.WriteString(t)
   273  
   274  	e := &tdproto.MarkupEntity{
   275  		Type:       typ,
   276  		Open:       s.Position() - len(op),
   277  		OpenLength: len(op),
   278  	}
   279  
   280  	for s.Next() == ' ' {
   281  		e.OpenLength += 1
   282  		b.WriteRune(s.TakeNext())
   283  	}
   284  
   285  	for s.Next() == '\n' {
   286  		e.OpenLength += 1
   287  		b.WriteRune(s.TakeNext())
   288  	}
   289  
   290  	var tail []rune
   291  	for s.Rest() > 0 {
   292  		t := s.ScanUntil(cl)
   293  		if t == "" {
   294  			ch := s.TakeNext()
   295  			b.WriteRune(ch)
   296  			tail = append(tail, ch)
   297  			continue
   298  		}
   299  		b.WriteString(t)
   300  		e.Close = s.Position() - len(cl)
   301  		e.CloseLength = len(cl)
   302  
   303  		for i := len(tail) - 1; i >= 0; i-- {
   304  			ch := tail[i]
   305  			if !(isWhitespace(ch) || isEOL(ch)) {
   306  				break
   307  			}
   308  			e.Close--
   309  			e.CloseLength++
   310  		}
   311  		return b.String(), e
   312  	}
   313  
   314  	s.Rewind(start)
   315  	return "", nil
   316  }
   317  
   318  func (s *MarkupScanner) scanQuote() (string, *tdproto.MarkupEntity) {
   319  	t := s.ScanUntil(opQuoteBlock)
   320  	if t == "" {
   321  		return "", nil
   322  	}
   323  
   324  	var b strings.Builder
   325  	b.Grow(s.Length() - s.Position())
   326  	b.WriteString(t)
   327  
   328  	e := &tdproto.MarkupEntity{
   329  		Type:       tdproto.Quote,
   330  		Open:       s.Position() - len(opQuoteBlock),
   331  		OpenLength: len(opQuoteBlock),
   332  	}
   333  
   334  	for {
   335  		ch := s.Next()
   336  		if isEOL(ch) || isEOF(ch) {
   337  			e.Close = s.Position()
   338  			if isEOL(ch) {
   339  				e.CloseLength = 1
   340  			}
   341  			return b.String(), e
   342  		}
   343  		b.WriteRune(s.TakeNext())
   344  	}
   345  }
   346  
   347  func (s *MarkupScanner) scanLink(l tdproto.MessageLink) (string, *tdproto.MarkupEntity) {
   348  	start := s.Position()
   349  
   350  	for _, r := range []rune(l.Pattern) {
   351  		if s.TakeNext() != r {
   352  			s.Rewind(start)
   353  			return "", nil
   354  		}
   355  	}
   356  
   357  	return l.Pattern, &tdproto.MarkupEntity{
   358  		Type:  tdproto.Link,
   359  		Url:   l.Url,
   360  		Repl:  l.Text,
   361  		Open:  start,
   362  		Close: s.Position(),
   363  	}
   364  }
   365  
   366  func (s *MarkupScanner) scanUnsafe() (string, *tdproto.MarkupEntity) {
   367  	switch s.Next() {
   368  	case '&', '<', '>':
   369  		start := s.Position()
   370  		return string(s.TakeNext()), &tdproto.MarkupEntity{
   371  			Open:  start,
   372  			Close: start + 1,
   373  			Type:  tdproto.Unsafe,
   374  		}
   375  	default:
   376  		return string(s.Next()), nil
   377  	}
   378  }
   379  
   380  func (s *MarkupScanner) scanMarkdownLinks() (string, *tdproto.MarkupEntity) {
   381  	if s.Next() != '[' {
   382  		return "", nil
   383  	}
   384  
   385  	start := s.Position()
   386  	ch := s.TakeNext()
   387  
   388  	var b strings.Builder
   389  	b.Grow(s.Length() - s.Position())
   390  	b.WriteRune(ch)
   391  
   392  	var replBuilder strings.Builder
   393  
   394  findRepl:
   395  	for {
   396  		ch = s.TakeNext()
   397  		b.WriteRune(ch)
   398  		switch ch {
   399  		case ']':
   400  			break findRepl
   401  		default:
   402  			replBuilder.WriteRune(ch)
   403  		}
   404  		if s.Rest() == 0 {
   405  			s.Rewind(start)
   406  			return "", nil
   407  		}
   408  	}
   409  
   410  	replText := strings.TrimSpace(replBuilder.String())
   411  	if len(replText) == 0 {
   412  		s.Rewind(start)
   413  		return "", nil
   414  	}
   415  
   416  	if s.Next() != '(' {
   417  		replBuilder.WriteRune(ch)
   418  		goto findRepl
   419  	}
   420  
   421  	ch = s.TakeNext()
   422  	b.WriteRune(ch)
   423  
   424  	var urlBuilder strings.Builder
   425  
   426  findUrl:
   427  	for {
   428  		ch := s.TakeNext()
   429  		b.WriteRune(ch)
   430  		switch ch {
   431  		case ')':
   432  			break findUrl
   433  		default:
   434  			urlBuilder.WriteRune(ch)
   435  		}
   436  		if s.Rest() == 0 {
   437  			s.Rewind(start)
   438  			return "", nil
   439  		}
   440  	}
   441  
   442  	u, err := url.Parse(urlBuilder.String())
   443  	if err != nil || !CheckUrl(u) {
   444  		s.Rewind(start)
   445  		return "", nil
   446  	}
   447  
   448  	return b.String(), &tdproto.MarkupEntity{
   449  		Type:  tdproto.Link,
   450  		Url:   u.String(),
   451  		Repl:  replText,
   452  		Open:  start,
   453  		Close: s.Position(),
   454  	}
   455  }