github.com/jmigpin/editor@v1.6.0/util/parseutil/scmatch.go (about)

     1  package parseutil
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"regexp"
     8  	"unicode"
     9  )
    10  
    11  // scanner match utility funcs
    12  type ScMatch struct {
    13  	sc    *Scanner
    14  	P     *ScParse
    15  	cache struct {
    16  		regexps map[string]*regexp.Regexp
    17  	}
    18  }
    19  
    20  func (m *ScMatch) init(sc *Scanner) {
    21  	m.sc = sc
    22  	m.P = &sc.P
    23  	m.cache.regexps = map[string]*regexp.Regexp{}
    24  }
    25  
    26  //----------
    27  
    28  func (m *ScMatch) Eof() bool {
    29  	pos0 := m.sc.KeepPos()
    30  	_, err := m.sc.ReadRune()
    31  	if err == nil {
    32  		pos0.Restore()
    33  		return false
    34  	}
    35  	return err == io.EOF
    36  }
    37  
    38  //----------
    39  
    40  func (m *ScMatch) Rune(ru rune) error {
    41  	return m.sc.RestorePosOnErr(func() error {
    42  		ru2, err := m.sc.ReadRune()
    43  		if err != nil {
    44  			return err
    45  		}
    46  		if ru2 != ru {
    47  			return NoMatchErr
    48  		}
    49  		return nil
    50  	})
    51  }
    52  func (m *ScMatch) RuneAny(rs []rune) error { // "or", any of the runes
    53  	return m.sc.RestorePosOnErr(func() error {
    54  		ru, err := m.sc.ReadRune()
    55  		if err != nil {
    56  			return err
    57  		}
    58  		if !ContainsRune(rs, ru) {
    59  			return NoMatchErr
    60  		}
    61  		return nil
    62  	})
    63  }
    64  func (m *ScMatch) RuneAnyNot(rs []rune) error { // "or", any of the runes
    65  	return m.sc.RestorePosOnErr(func() error {
    66  		ru, err := m.sc.ReadRune()
    67  		if err != nil {
    68  			return err
    69  		}
    70  		if ContainsRune(rs, ru) {
    71  			return NoMatchErr
    72  		}
    73  		return nil
    74  	})
    75  }
    76  func (m *ScMatch) RuneSequence(seq []rune) error {
    77  	return m.sc.RestorePosOnErr(func() error {
    78  		for i, l := 0, len(seq); i < l; i++ {
    79  			ru := seq[i]
    80  			if m.sc.Reverse {
    81  				ru = seq[l-1-i]
    82  			}
    83  
    84  			// NOTE: using spm.Rune() would call keeppos n times
    85  
    86  			ru2, err := m.sc.ReadRune()
    87  			if err != nil {
    88  				return err
    89  			}
    90  			if ru2 != ru {
    91  				return NoMatchErr
    92  			}
    93  		}
    94  		return nil
    95  	})
    96  }
    97  func (m *ScMatch) RuneSequenceMid(rs []rune) error {
    98  	return m.sc.RestorePosOnErr(func() error {
    99  		for k := 0; ; k++ {
   100  			if err := m.RuneSequence(rs); err == nil {
   101  				return nil // match
   102  			}
   103  			if k+1 >= len(rs) {
   104  				break
   105  			}
   106  			// backup to previous rune to try to match again
   107  			m.sc.Reverse = !m.sc.Reverse
   108  			_, err := m.sc.ReadRune()
   109  			m.sc.Reverse = !m.sc.Reverse
   110  			if err != nil {
   111  				return err
   112  			}
   113  		}
   114  		return NoMatchErr
   115  	})
   116  }
   117  func (m *ScMatch) RuneRange(rr RuneRange) error {
   118  	return m.sc.RestorePosOnErr(func() error {
   119  		ru, err := m.sc.ReadRune()
   120  		if err != nil {
   121  			return err
   122  		}
   123  		if !rr.HasRune(ru) {
   124  			return NoMatchErr
   125  		}
   126  		return nil
   127  	})
   128  }
   129  func (m *ScMatch) RuneRangeNot(rr RuneRange) error { // negation
   130  	return m.sc.RestorePosOnErr(func() error {
   131  		ru, err := m.sc.ReadRune()
   132  		if err != nil {
   133  			return err
   134  		}
   135  		if rr.HasRune(ru) {
   136  			return NoMatchErr
   137  		}
   138  		return nil
   139  	})
   140  }
   141  func (m *ScMatch) RunesAndRuneRanges(rs []rune, rrs RuneRanges) error { // negation
   142  	return m.sc.RestorePosOnErr(func() error {
   143  		ru, err := m.sc.ReadRune()
   144  		if err != nil {
   145  			return err
   146  		}
   147  		if !ContainsRune(rs, ru) && !rrs.HasRune(ru) {
   148  			return NoMatchErr
   149  		}
   150  		return nil
   151  	})
   152  }
   153  func (m *ScMatch) RunesAndRuneRangesNot(rs []rune, rrs RuneRanges) error {
   154  	return m.sc.RestorePosOnErr(func() error {
   155  		ru, err := m.sc.ReadRune()
   156  		if err != nil {
   157  			return err
   158  		}
   159  		if ContainsRune(rs, ru) || rrs.HasRune(ru) {
   160  			return NoMatchErr
   161  		}
   162  		return nil
   163  	})
   164  }
   165  
   166  //----------
   167  
   168  func (m *ScMatch) RuneFn(fn func(rune) bool) error {
   169  	pos0 := m.sc.KeepPos()
   170  	ru, err := m.sc.ReadRune()
   171  	if err == nil {
   172  		if !fn(ru) {
   173  			pos0.Restore()
   174  			err = NoMatchErr
   175  		}
   176  	}
   177  	return err
   178  }
   179  
   180  // one or more
   181  func (m *ScMatch) RuneFnLoop(fn func(rune) bool) error {
   182  	for first := true; ; first = false {
   183  		if err := m.RuneFn(fn); err != nil {
   184  			if first {
   185  				return err
   186  			}
   187  			return nil
   188  		}
   189  	}
   190  }
   191  
   192  //func (m *SMatcher) RuneFnZeroOrMore(fn func(rune) bool) int {
   193  //	for i := 0; ; i++ {
   194  //		if err := m.RuneFn(fn); err != nil {
   195  //			return i
   196  //		}
   197  //	}
   198  //}
   199  //func (m *SMatcher) RuneFnOneOrMore(fn func(rune) bool) error {
   200  //	return m.LoopRuneFn(fn)
   201  
   202  //	if err := m.RuneFn(fn); err != nil {
   203  //		return err
   204  //	}
   205  //	_ = m.RuneFnZeroOrMore(fn)
   206  //	return nil
   207  //}
   208  
   209  //----------
   210  
   211  func (m *ScMatch) Sequence(seq string) error {
   212  	return m.RuneSequence([]rune(seq))
   213  }
   214  func (m *ScMatch) SequenceMid(seq string) error {
   215  	return m.RuneSequenceMid([]rune(seq))
   216  }
   217  
   218  //// same as rune sequence, but directly using strings comparison
   219  //func (m *ScMatch) Sequence(seq string) error {
   220  //	if m.sc.Reverse {
   221  //		return m.RuneSequence([]rune(seq))
   222  //	}
   223  //	l := len(seq)
   224  //	b := m.sc.Src[m.sc.Pos:]
   225  //	if l > len(b) {
   226  //		return NoMatchErr
   227  //	}
   228  //	if string(b[:l]) != seq {
   229  //		return NoMatchErr
   230  //	}
   231  //	m.sc.Pos += l
   232  //	return nil
   233  //}
   234  
   235  //----------
   236  
   237  func (m *ScMatch) RegexpFromStartCached(res string, maxLen int) error {
   238  	return m.RegexpFromStart(res, true, maxLen)
   239  }
   240  func (m *ScMatch) RegexpFromStart(res string, cache bool, maxLen int) error {
   241  	// TODO: reverse
   242  
   243  	res = "^(" + res + ")" // from start
   244  
   245  	re := (*regexp.Regexp)(nil)
   246  	if cache {
   247  		re2, ok := m.cache.regexps[res]
   248  		if ok {
   249  			re = re2
   250  		}
   251  	}
   252  	if re == nil {
   253  		re3, err := regexp.Compile(res)
   254  		if err != nil {
   255  			return err
   256  		}
   257  		re = re3
   258  		if cache {
   259  			m.cache.regexps[res] = re
   260  		}
   261  	}
   262  
   263  	// limit input to be read
   264  	src := m.sc.Src[m.sc.Pos:]
   265  	max := maxLen
   266  	if max > len(src) {
   267  		max = len(src)
   268  	}
   269  	src = m.sc.Src[m.sc.Pos : m.sc.Pos+max]
   270  
   271  	locs := re.FindIndex(src)
   272  	if len(locs) == 0 {
   273  		return NoMatchErr
   274  	}
   275  	m.sc.Pos += locs[1]
   276  	return nil
   277  }
   278  
   279  //----------
   280  
   281  func (m *ScMatch) DoubleQuotedString(maxLen int) error {
   282  	return m.StringSection("\"", '\\', true, maxLen, false)
   283  }
   284  func (m *ScMatch) QuotedString() error {
   285  	//return m.QuotedString2('\\', 3000, 10)
   286  	return m.QuotedString2('\\', 3000, 3000)
   287  }
   288  
   289  // allows escaped runes (if esc!=0)
   290  func (m *ScMatch) QuotedString2(esc rune, maxLen1, maxLen2 int) error {
   291  	// doublequote: fail on newline, eof doesn't close
   292  	if err := m.StringSection("\"", esc, true, maxLen1, false); err == nil {
   293  		return nil
   294  	}
   295  	// singlequote: fail on newline, eof doesn't close (usually a smaller maxlen)
   296  	if err := m.StringSection("'", esc, true, maxLen2, false); err == nil {
   297  		return nil
   298  	}
   299  	// backquote: can have newline, eof doesn't close
   300  	if err := m.StringSection("`", esc, false, maxLen1, false); err == nil {
   301  		return nil
   302  	}
   303  	return fmt.Errorf("not a quoted string")
   304  }
   305  
   306  func (m *ScMatch) StringSection(openclose string, esc rune, failOnNewline bool, maxLen int, eofClose bool) error {
   307  	return m.Section(openclose, openclose, esc, failOnNewline, maxLen, eofClose)
   308  }
   309  
   310  // match opened/closed sections.
   311  func (m *ScMatch) Section(open, close string, esc rune, failOnNewline bool, maxLen int, eofClose bool) error {
   312  	pos0 := m.sc.Pos
   313  	return m.sc.RestorePosOnErr(func() error {
   314  		if err := m.Sequence(open); err != nil {
   315  			return err
   316  		}
   317  		for {
   318  			if esc != 0 && m.EscapeAny(esc) == nil {
   319  				continue
   320  			}
   321  			if err := m.Sequence(close); err == nil {
   322  				return nil // ok
   323  			}
   324  			// consume rune
   325  			ru, err := m.sc.ReadRune()
   326  			if err != nil {
   327  				// extension: stop on eof
   328  				if eofClose && err == io.EOF {
   329  					return nil // ok
   330  				}
   331  
   332  				return err
   333  			}
   334  			// extension: stop after maxlength
   335  			if maxLen > 0 {
   336  				d := m.sc.Pos - pos0
   337  				if d < 0 { // handle reverse
   338  					d = -d
   339  				}
   340  				if d > maxLen {
   341  					return fmt.Errorf("passed maxlen")
   342  				}
   343  			}
   344  			// extension: newline
   345  			if failOnNewline && ru == '\n' {
   346  				return fmt.Errorf("found newline")
   347  			}
   348  		}
   349  	})
   350  }
   351  
   352  //----------
   353  
   354  func (m *ScMatch) EscapeAny(escape rune) error {
   355  	return m.sc.RestorePosOnErr(func() error {
   356  		if m.sc.Reverse {
   357  			if err := m.NRunes(1); err != nil {
   358  				return err
   359  			}
   360  		}
   361  		if err := m.Rune(escape); err != nil {
   362  			return err
   363  		}
   364  		if !m.sc.Reverse {
   365  			return m.NRunes(1)
   366  		}
   367  		return nil
   368  	})
   369  }
   370  func (m *ScMatch) NRunes(n int) error {
   371  	pos0 := m.sc.KeepPos()
   372  	for i := 0; i < n; i++ {
   373  		_, err := m.sc.ReadRune()
   374  		if err != nil {
   375  			pos0.Restore()
   376  			return err
   377  		}
   378  	}
   379  	return nil
   380  }
   381  
   382  //----------
   383  
   384  func (m *ScMatch) SpacesIncludingNL() bool {
   385  	err := m.Spaces(true, 0)
   386  	return err == nil
   387  }
   388  func (m *ScMatch) SpacesExcludingNL() bool {
   389  	err := m.Spaces(false, 0)
   390  	return err == nil
   391  }
   392  func (m *ScMatch) Spaces(includeNL bool, escape rune) error {
   393  	for first := true; ; first = false {
   394  		if escape != 0 {
   395  			if err := m.EscapeAny(escape); err == nil {
   396  				continue
   397  			}
   398  		}
   399  		pos0 := m.sc.KeepPos()
   400  		ru, err := m.sc.ReadRune()
   401  		if err == nil {
   402  			valid := unicode.IsSpace(ru) && (includeNL || ru != '\n')
   403  			if !valid {
   404  				err = NoMatchErr
   405  			}
   406  		}
   407  		if err != nil {
   408  			pos0.Restore()
   409  			if first {
   410  				return err
   411  			}
   412  			return nil
   413  		}
   414  	}
   415  }
   416  
   417  //----------
   418  
   419  func (m *ScMatch) And(fns ...ScFn) error {
   420  	return m.sc.RestorePosOnErr(func() error {
   421  		if m.sc.Reverse {
   422  			for i := len(fns) - 1; i >= 0; i-- {
   423  				fn := fns[i]
   424  				if fn == nil {
   425  					continue
   426  				}
   427  				if err := fn(); err != nil {
   428  					return err
   429  				}
   430  			}
   431  		} else {
   432  			for _, fn := range fns {
   433  				if fn == nil {
   434  					continue
   435  				}
   436  				if err := fn(); err != nil {
   437  					return err
   438  				}
   439  			}
   440  		}
   441  		return nil
   442  	})
   443  }
   444  func (m *ScMatch) Or(fns ...ScFn) error {
   445  	//me := iout.MultiError{} // TODO: better then first error?
   446  	firstErr := error(nil)
   447  	for _, fn := range fns {
   448  		if fn == nil {
   449  			continue
   450  		}
   451  		pos0 := m.sc.KeepPos()
   452  		if err := fn(); err != nil {
   453  			if firstErr == nil {
   454  				firstErr = err
   455  			}
   456  			if IsScFatalError(err) {
   457  				return err
   458  			}
   459  			pos0.Restore()
   460  			continue
   461  		}
   462  		return nil
   463  	}
   464  	return firstErr
   465  }
   466  func (m *ScMatch) Optional(fn ScFn) error {
   467  	if fn == nil {
   468  		return nil
   469  	}
   470  	pos0 := m.sc.KeepPos()
   471  	if err := fn(); err != nil {
   472  		if IsScFatalError(err) {
   473  			return err
   474  		}
   475  		pos0.Restore()
   476  	}
   477  	return nil
   478  }
   479  
   480  //----------
   481  
   482  func (m *ScMatch) ToNLExcludeOrEnd(esc rune) int {
   483  	pos0 := m.sc.KeepPos()
   484  	valid := func(ru rune) bool { return ru != '\n' }
   485  	for {
   486  		if esc != 0 && m.EscapeAny(esc) == nil {
   487  			continue
   488  		}
   489  		if err := m.RuneFn(valid); err == nil {
   490  			continue
   491  		}
   492  		break
   493  	}
   494  	return pos0.Len()
   495  }
   496  func (m *ScMatch) ToNLIncludeOrEnd(esc rune) int {
   497  	pos0 := m.sc.KeepPos()
   498  	_ = m.ToNLExcludeOrEnd(esc)
   499  	_ = m.Rune('\n')
   500  	return pos0.Len()
   501  }
   502  
   503  //----------
   504  
   505  func (m *ScMatch) Letter() error {
   506  	return m.RuneFn(unicode.IsLetter)
   507  }
   508  func (m *ScMatch) Digit() error {
   509  	return m.RuneFn(unicode.IsDigit)
   510  }
   511  func (m *ScMatch) Digits() error {
   512  	return m.RuneFnLoop(unicode.IsDigit)
   513  }
   514  
   515  func (m *ScMatch) Integer() error {
   516  	// TODO: reverse
   517  	//u := "[+-]?[0-9]+"
   518  	//return m.RegexpFromStartCached(u)
   519  
   520  	return m.And(
   521  		m.P.Optional(m.sign),
   522  		m.Digits,
   523  	)
   524  }
   525  
   526  func (m *ScMatch) Float() error {
   527  	// TODO: reverse
   528  	//u := "[+-]?([0-9]*[.])?[0-9]+"
   529  	//u := "[+-]?(\\d+([.]\\d*)?([eE][+-]?\\d+)?|[.]\\d+([eE][+-]?\\d+)?)"
   530  	//return m.RegexpFromStartCached(u, 100)
   531  
   532  	return m.Or(
   533  		// -1.2
   534  		// -1.2e3
   535  		m.P.And(
   536  			m.Integer,
   537  			m.fraction,
   538  			m.P.Optional(m.exponent),
   539  		),
   540  		// .2
   541  		// .2e3
   542  		m.P.And(
   543  			m.fraction,
   544  			m.P.Optional(m.exponent),
   545  		),
   546  	)
   547  }
   548  
   549  func (m *ScMatch) sign() error {
   550  	return m.sc.M.RuneAny([]rune("+-"))
   551  }
   552  func (m *ScMatch) fraction() error {
   553  	return m.And(
   554  		m.P.Rune('.'),
   555  		m.Digits,
   556  	)
   557  }
   558  func (m *ScMatch) exponent() error {
   559  	return m.And(
   560  		m.P.RuneAny([]rune("eE")),
   561  		m.P.Optional(m.sign),
   562  		m.Digits,
   563  	)
   564  }
   565  
   566  //----------
   567  //----------
   568  //----------
   569  
   570  type RuneRange [2]rune // assume [0]<[1]
   571  
   572  func (rr RuneRange) HasRune(ru rune) bool {
   573  	return ru >= rr[0] && ru <= rr[1]
   574  }
   575  func (rr RuneRange) IntersectsRange(rr2 RuneRange) bool {
   576  	noIntersection := rr2[1] <= rr[0] || rr2[0] > rr[1]
   577  	return !noIntersection
   578  }
   579  func (rr RuneRange) String() string {
   580  	return fmt.Sprintf("%q-%q", rr[0], rr[1])
   581  }
   582  
   583  //----------
   584  //----------
   585  //----------
   586  
   587  type RuneRanges []RuneRange
   588  
   589  func (rrs RuneRanges) HasRune(ru rune) bool {
   590  	for _, rr := range rrs {
   591  		if rr.HasRune(ru) {
   592  			return true
   593  		}
   594  	}
   595  	return false
   596  }
   597  
   598  //----------
   599  //----------
   600  //----------
   601  
   602  var NoMatchErr = errors.New("no match")