github.com/lmorg/murex@v0.0.0-20240217211045-e081c89cd4ef/builtins/core/lists/regexp.go (about) 1 package lists 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "regexp" 8 9 "github.com/lmorg/murex/lang" 10 "github.com/lmorg/murex/lang/types" 11 ) 12 13 func init() { 14 lang.DefineMethod("match", cmdMatch, types.ReadArray, types.WriteArray) 15 lang.DefineMethod("!match", cmdMatch, types.ReadArray, types.WriteArray) 16 lang.DefineMethod("regexp", cmdRegexp, types.ReadArray, types.WriteArray) 17 lang.DefineMethod("!regexp", cmdRegexp, types.ReadArray, types.WriteArray) 18 } 19 20 func cmdMatch(p *lang.Process) error { 21 dt := p.Stdin.GetDataType() 22 p.Stdout.SetDataType(dt) 23 24 if err := p.ErrIfNotAMethod(); err != nil { 25 return err 26 } 27 28 if p.Parameters.StringAll() == "" { 29 return errors.New("no parameters supplied") 30 } 31 32 aw, err := p.Stdout.WriteArray(dt) 33 if err != nil { 34 return err 35 } 36 37 p.Stdin.ReadArray(p.Context, func(b []byte) { 38 matched := bytes.Contains(b, p.Parameters.ByteAll()) 39 if (matched && !p.IsNot) || (!matched && p.IsNot) { 40 err = aw.Write(b) 41 if err != nil { 42 p.Stdin.ForceClose() 43 p.Done() 44 } 45 } 46 }) 47 48 if p.HasCancelled() { 49 return err 50 } 51 52 return aw.Close() 53 } 54 55 func cmdRegexp(p *lang.Process) (err error) { 56 dt := p.Stdin.GetDataType() 57 p.Stdout.SetDataType(dt) 58 59 if err := p.ErrIfNotAMethod(); err != nil { 60 return err 61 } 62 63 if p.Parameters.StringAll() == "" { 64 return errors.New("no parameters supplied") 65 } 66 67 var sRegex []string 68 if p.Parameters.Len() == 1 { 69 sRegex, err = splitRegexParams(p.Parameters.ByteAll()) 70 if err != nil { 71 return err 72 } 73 74 } else { 75 // No need to get clever with the regex parser because the parameters are already split by murex's parser 76 sRegex = p.Parameters.StringArray() 77 } 78 79 if len(sRegex) < 2 { 80 return fmt.Errorf("invalid regexp (too few parameters) in: `%s`", p.Parameters.StringAll()) 81 } 82 if len(sRegex) > 4 { 83 return fmt.Errorf("invalid regexp (too many parameters) in: `%s`", p.Parameters.StringAll()) 84 } 85 86 var rx *regexp.Regexp 87 if rx, err = regexp.Compile(sRegex[1]); err != nil { 88 return 89 } 90 91 switch sRegex[0][0] { 92 case 'm': 93 return regexMatch(p, rx, dt) 94 95 case 's': 96 if p.IsNot { 97 return fmt.Errorf("cannot use `%s` with `%s` flag in `%s`", p.Name.String(), string(sRegex[0][0]), p.Parameters.StringAll()) 98 } 99 return regexSubstitute(p, rx, sRegex, dt) 100 101 case 'f': 102 if p.IsNot { 103 return fmt.Errorf("cannot use `%s` with `%s` flag in `%s`", p.Name.String(), string(sRegex[0][0]), p.Parameters.StringAll()) 104 } 105 return regexFind(p, rx, dt) 106 107 default: 108 return errors.New("invalid regexp. Please use either match (m), substitute (s) or find (f)") 109 } 110 } 111 112 func splitRegexParams(regex []byte) ([]string, error) { 113 if len(regex) < 2 { 114 return nil, fmt.Errorf("invalid regexp (too few characters) in: `%s`", string(regex)) 115 } 116 117 switch regex[1] { 118 default: 119 return splitRegexDefault(regex) 120 121 case '{': 122 return nil, fmt.Errorf("the `{` character is not supported for separating regex parameters in: `%s`", string(regex)) 123 //return splitRegexBraces(regex) 124 125 case '\\': 126 return nil, fmt.Errorf("the `\\` character is not valid for separating regex parameters in: `%s`", string(regex)) 127 } 128 } 129 130 func splitRegexDefault(regex []byte) (s []string, _ error) { 131 var ( 132 param []byte 133 escaped bool 134 token = regex[1] 135 ) 136 137 for _, c := range regex { 138 switch c { 139 default: 140 if escaped { 141 param = append(param, '\\', c) 142 escaped = false 143 continue 144 } 145 param = append(param, c) 146 147 case '\\': 148 if escaped { 149 param = append(param, '\\', c) 150 escaped = false 151 continue 152 } 153 escaped = true 154 155 case token: 156 if escaped { 157 escaped = false 158 param = append(param, c) 159 continue 160 } 161 162 s = append(s, string(param)) 163 param = []byte{} 164 } 165 } 166 s = append(s, string(param)) 167 168 return 169 } 170 171 // -------- regex functions -------- 172 173 func regexMatch(p *lang.Process, rx *regexp.Regexp, dt string) error { 174 aw, err := p.Stdout.WriteArray(dt) 175 if err != nil { 176 return err 177 } 178 var count int 179 180 p.Stdin.ReadArray(p.Context, func(b []byte) { 181 matched := rx.Match(b) 182 if (matched && !p.IsNot) || (!matched && p.IsNot) { 183 184 count++ 185 err = aw.Write(b) 186 if err != nil { 187 p.Stdin.ForceClose() 188 p.Done() 189 } 190 191 } 192 }) 193 194 if p.HasCancelled() { 195 return err 196 } 197 198 if count == 0 { 199 return fmt.Errorf("nothing matched: %s", rx.String()) 200 } 201 202 return aw.Close() 203 } 204 205 func regexSubstitute(p *lang.Process, rx *regexp.Regexp, sRegex []string, dt string) error { 206 if len(sRegex) < 3 { 207 return fmt.Errorf("invalid regex: too few parameters\nexpecting s/find/substitute/ in: `%s`", p.Parameters.StringAll()) 208 } 209 210 aw, err := p.Stdout.WriteArray(dt) 211 if err != nil { 212 return err 213 } 214 215 sub := []byte(sRegex[2]) 216 217 p.Stdin.ReadArray(p.Context, func(b []byte) { 218 err = aw.Write(rx.ReplaceAll(b, sub)) 219 if err != nil { 220 p.Stdin.ForceClose() 221 p.Done() 222 } 223 }) 224 225 if p.HasCancelled() { 226 return err 227 } 228 229 return aw.Close() 230 } 231 232 func regexFind(p *lang.Process, rx *regexp.Regexp, dt string) error { 233 aw, err := p.Stdout.WriteArray(dt) 234 if err != nil { 235 return err 236 } 237 238 p.Stdin.ReadArray(p.Context, func(b []byte) { 239 match := rx.FindAllStringSubmatch(string(b), -1) 240 for _, found := range match { 241 if len(found) > 1 { 242 243 for i := 1; i < len(found); i++ { 244 err = aw.WriteString(found[i]) 245 if err != nil { 246 p.Stdin.ForceClose() 247 p.Done() 248 } 249 250 } 251 252 } 253 } 254 }) 255 256 if p.HasCancelled() { 257 return err 258 } 259 260 return aw.Close() 261 }