github.com/lmorg/murex@v0.0.0-20240217211045-e081c89cd4ef/builtins/core/lists/regexp.go (about)

     1  package lists
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"regexp"
     8  
     9  	"github.com/lmorg/murex/lang"
    10  	"github.com/lmorg/murex/lang/types"
    11  )
    12  
    13  func init() {
    14  	lang.DefineMethod("match", cmdMatch, types.ReadArray, types.WriteArray)
    15  	lang.DefineMethod("!match", cmdMatch, types.ReadArray, types.WriteArray)
    16  	lang.DefineMethod("regexp", cmdRegexp, types.ReadArray, types.WriteArray)
    17  	lang.DefineMethod("!regexp", cmdRegexp, types.ReadArray, types.WriteArray)
    18  }
    19  
    20  func cmdMatch(p *lang.Process) error {
    21  	dt := p.Stdin.GetDataType()
    22  	p.Stdout.SetDataType(dt)
    23  
    24  	if err := p.ErrIfNotAMethod(); err != nil {
    25  		return err
    26  	}
    27  
    28  	if p.Parameters.StringAll() == "" {
    29  		return errors.New("no parameters supplied")
    30  	}
    31  
    32  	aw, err := p.Stdout.WriteArray(dt)
    33  	if err != nil {
    34  		return err
    35  	}
    36  
    37  	p.Stdin.ReadArray(p.Context, func(b []byte) {
    38  		matched := bytes.Contains(b, p.Parameters.ByteAll())
    39  		if (matched && !p.IsNot) || (!matched && p.IsNot) {
    40  			err = aw.Write(b)
    41  			if err != nil {
    42  				p.Stdin.ForceClose()
    43  				p.Done()
    44  			}
    45  		}
    46  	})
    47  
    48  	if p.HasCancelled() {
    49  		return err
    50  	}
    51  
    52  	return aw.Close()
    53  }
    54  
    55  func cmdRegexp(p *lang.Process) (err error) {
    56  	dt := p.Stdin.GetDataType()
    57  	p.Stdout.SetDataType(dt)
    58  
    59  	if err := p.ErrIfNotAMethod(); err != nil {
    60  		return err
    61  	}
    62  
    63  	if p.Parameters.StringAll() == "" {
    64  		return errors.New("no parameters supplied")
    65  	}
    66  
    67  	var sRegex []string
    68  	if p.Parameters.Len() == 1 {
    69  		sRegex, err = splitRegexParams(p.Parameters.ByteAll())
    70  		if err != nil {
    71  			return err
    72  		}
    73  
    74  	} else {
    75  		// No need to get clever with the regex parser because the parameters are already split by murex's parser
    76  		sRegex = p.Parameters.StringArray()
    77  	}
    78  
    79  	if len(sRegex) < 2 {
    80  		return fmt.Errorf("invalid regexp (too few parameters) in: `%s`", p.Parameters.StringAll())
    81  	}
    82  	if len(sRegex) > 4 {
    83  		return fmt.Errorf("invalid regexp (too many parameters) in: `%s`", p.Parameters.StringAll())
    84  	}
    85  
    86  	var rx *regexp.Regexp
    87  	if rx, err = regexp.Compile(sRegex[1]); err != nil {
    88  		return
    89  	}
    90  
    91  	switch sRegex[0][0] {
    92  	case 'm':
    93  		return regexMatch(p, rx, dt)
    94  
    95  	case 's':
    96  		if p.IsNot {
    97  			return fmt.Errorf("cannot use `%s` with `%s` flag in `%s`", p.Name.String(), string(sRegex[0][0]), p.Parameters.StringAll())
    98  		}
    99  		return regexSubstitute(p, rx, sRegex, dt)
   100  
   101  	case 'f':
   102  		if p.IsNot {
   103  			return fmt.Errorf("cannot use `%s` with `%s` flag in `%s`", p.Name.String(), string(sRegex[0][0]), p.Parameters.StringAll())
   104  		}
   105  		return regexFind(p, rx, dt)
   106  
   107  	default:
   108  		return errors.New("invalid regexp. Please use either match (m), substitute (s) or find (f)")
   109  	}
   110  }
   111  
   112  func splitRegexParams(regex []byte) ([]string, error) {
   113  	if len(regex) < 2 {
   114  		return nil, fmt.Errorf("invalid regexp (too few characters) in: `%s`", string(regex))
   115  	}
   116  
   117  	switch regex[1] {
   118  	default:
   119  		return splitRegexDefault(regex)
   120  
   121  	case '{':
   122  		return nil, fmt.Errorf("the `{` character is not supported for separating regex parameters in: `%s`", string(regex))
   123  		//return splitRegexBraces(regex)
   124  
   125  	case '\\':
   126  		return nil, fmt.Errorf("the `\\` character is not valid for separating regex parameters in: `%s`", string(regex))
   127  	}
   128  }
   129  
   130  func splitRegexDefault(regex []byte) (s []string, _ error) {
   131  	var (
   132  		param   []byte
   133  		escaped bool
   134  		token   = regex[1]
   135  	)
   136  
   137  	for _, c := range regex {
   138  		switch c {
   139  		default:
   140  			if escaped {
   141  				param = append(param, '\\', c)
   142  				escaped = false
   143  				continue
   144  			}
   145  			param = append(param, c)
   146  
   147  		case '\\':
   148  			if escaped {
   149  				param = append(param, '\\', c)
   150  				escaped = false
   151  				continue
   152  			}
   153  			escaped = true
   154  
   155  		case token:
   156  			if escaped {
   157  				escaped = false
   158  				param = append(param, c)
   159  				continue
   160  			}
   161  
   162  			s = append(s, string(param))
   163  			param = []byte{}
   164  		}
   165  	}
   166  	s = append(s, string(param))
   167  
   168  	return
   169  }
   170  
   171  // -------- regex functions --------
   172  
   173  func regexMatch(p *lang.Process, rx *regexp.Regexp, dt string) error {
   174  	aw, err := p.Stdout.WriteArray(dt)
   175  	if err != nil {
   176  		return err
   177  	}
   178  	var count int
   179  
   180  	p.Stdin.ReadArray(p.Context, func(b []byte) {
   181  		matched := rx.Match(b)
   182  		if (matched && !p.IsNot) || (!matched && p.IsNot) {
   183  
   184  			count++
   185  			err = aw.Write(b)
   186  			if err != nil {
   187  				p.Stdin.ForceClose()
   188  				p.Done()
   189  			}
   190  
   191  		}
   192  	})
   193  
   194  	if p.HasCancelled() {
   195  		return err
   196  	}
   197  
   198  	if count == 0 {
   199  		return fmt.Errorf("nothing matched: %s", rx.String())
   200  	}
   201  
   202  	return aw.Close()
   203  }
   204  
   205  func regexSubstitute(p *lang.Process, rx *regexp.Regexp, sRegex []string, dt string) error {
   206  	if len(sRegex) < 3 {
   207  		return fmt.Errorf("invalid regex: too few parameters\nexpecting s/find/substitute/ in: `%s`", p.Parameters.StringAll())
   208  	}
   209  
   210  	aw, err := p.Stdout.WriteArray(dt)
   211  	if err != nil {
   212  		return err
   213  	}
   214  
   215  	sub := []byte(sRegex[2])
   216  
   217  	p.Stdin.ReadArray(p.Context, func(b []byte) {
   218  		err = aw.Write(rx.ReplaceAll(b, sub))
   219  		if err != nil {
   220  			p.Stdin.ForceClose()
   221  			p.Done()
   222  		}
   223  	})
   224  
   225  	if p.HasCancelled() {
   226  		return err
   227  	}
   228  
   229  	return aw.Close()
   230  }
   231  
   232  func regexFind(p *lang.Process, rx *regexp.Regexp, dt string) error {
   233  	aw, err := p.Stdout.WriteArray(dt)
   234  	if err != nil {
   235  		return err
   236  	}
   237  
   238  	p.Stdin.ReadArray(p.Context, func(b []byte) {
   239  		match := rx.FindAllStringSubmatch(string(b), -1)
   240  		for _, found := range match {
   241  			if len(found) > 1 {
   242  
   243  				for i := 1; i < len(found); i++ {
   244  					err = aw.WriteString(found[i])
   245  					if err != nil {
   246  						p.Stdin.ForceClose()
   247  						p.Done()
   248  					}
   249  
   250  				}
   251  
   252  			}
   253  		}
   254  	})
   255  
   256  	if p.HasCancelled() {
   257  		return err
   258  	}
   259  
   260  	return aw.Close()
   261  }