github.com/nuvolaris/goja@v0.0.0-20230825100449-967811910c6d/builtin_regexp.go (about) 1 package goja 2 3 import ( 4 "fmt" 5 "regexp" 6 "strings" 7 "unicode/utf16" 8 "unicode/utf8" 9 10 "github.com/nuvolaris/goja/parser" 11 ) 12 13 func (r *Runtime) newRegexpObject(proto *Object) *regexpObject { 14 v := &Object{runtime: r} 15 16 o := ®expObject{} 17 o.class = classRegExp 18 o.val = v 19 o.extensible = true 20 v.self = o 21 o.prototype = proto 22 o.init() 23 return o 24 } 25 26 func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr String, proto *Object) *regexpObject { 27 o := r.newRegexpObject(proto) 28 29 o.pattern = pattern 30 o.source = patternStr 31 32 return o 33 } 34 35 func decodeHex(s string) (int, bool) { 36 var hex int 37 for i := 0; i < len(s); i++ { 38 var n byte 39 chr := s[i] 40 switch { 41 case '0' <= chr && chr <= '9': 42 n = chr - '0' 43 case 'a' <= chr && chr <= 'f': 44 n = chr - 'a' + 10 45 case 'A' <= chr && chr <= 'F': 46 n = chr - 'A' + 10 47 default: 48 return 0, false 49 } 50 hex = hex*16 + int(n) 51 } 52 return hex, true 53 } 54 55 func writeHex4(b *strings.Builder, i int) { 56 b.WriteByte(hex[i>>12]) 57 b.WriteByte(hex[(i>>8)&0xF]) 58 b.WriteByte(hex[(i>>4)&0xF]) 59 b.WriteByte(hex[i&0xF]) 60 } 61 62 // Convert any valid surrogate pairs in the form of \uXXXX\uXXXX to unicode characters 63 func convertRegexpToUnicode(patternStr string) string { 64 var sb strings.Builder 65 pos := 0 66 for i := 0; i < len(patternStr)-11; { 67 r, size := utf8.DecodeRuneInString(patternStr[i:]) 68 if r == '\\' { 69 i++ 70 if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' { 71 if first, ok := decodeHex(patternStr[i+1 : i+5]); ok { 72 if isUTF16FirstSurrogate(uint16(first)) { 73 if second, ok := decodeHex(patternStr[i+7 : i+11]); ok { 74 if isUTF16SecondSurrogate(uint16(second)) { 75 r = utf16.DecodeRune(rune(first), rune(second)) 76 sb.WriteString(patternStr[pos : i-1]) 77 sb.WriteRune(r) 78 i += 11 79 pos = i 80 continue 81 } 82 } 83 } 84 } 85 } 86 i++ 87 } else { 88 i += size 89 } 90 } 91 if pos > 0 { 92 sb.WriteString(patternStr[pos:]) 93 return sb.String() 94 } 95 return patternStr 96 } 97 98 // Convert any extended unicode characters to UTF-16 in the form of \uXXXX\uXXXX 99 func convertRegexpToUtf16(patternStr string) string { 100 var sb strings.Builder 101 pos := 0 102 var prevRune rune 103 for i := 0; i < len(patternStr); { 104 r, size := utf8.DecodeRuneInString(patternStr[i:]) 105 if r > 0xFFFF { 106 sb.WriteString(patternStr[pos:i]) 107 if prevRune == '\\' { 108 sb.WriteRune('\\') 109 } 110 first, second := utf16.EncodeRune(r) 111 sb.WriteString(`\u`) 112 writeHex4(&sb, int(first)) 113 sb.WriteString(`\u`) 114 writeHex4(&sb, int(second)) 115 pos = i + size 116 } 117 i += size 118 prevRune = r 119 } 120 if pos > 0 { 121 sb.WriteString(patternStr[pos:]) 122 return sb.String() 123 } 124 return patternStr 125 } 126 127 // convert any broken UTF-16 surrogate pairs to \uXXXX 128 func escapeInvalidUtf16(s String) string { 129 if imported, ok := s.(*importedString); ok { 130 return imported.s 131 } 132 if ascii, ok := s.(asciiString); ok { 133 return ascii.String() 134 } 135 var sb strings.Builder 136 rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader()} 137 pos := 0 138 utf8Size := 0 139 var utf8Buf [utf8.UTFMax]byte 140 for { 141 c, size, err := rd.ReadRune() 142 if err != nil { 143 break 144 } 145 if utf16.IsSurrogate(c) { 146 if sb.Len() == 0 { 147 sb.Grow(utf8Size + 7) 148 hrd := s.Reader() 149 var c rune 150 for p := 0; p < pos; { 151 var size int 152 var err error 153 c, size, err = hrd.ReadRune() 154 if err != nil { 155 // will not happen 156 panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err)) 157 } 158 sb.WriteRune(c) 159 p += size 160 } 161 if c == '\\' { 162 sb.WriteRune(c) 163 } 164 } 165 sb.WriteString(`\u`) 166 writeHex4(&sb, int(c)) 167 } else { 168 if sb.Len() > 0 { 169 sb.WriteRune(c) 170 } else { 171 utf8Size += utf8.EncodeRune(utf8Buf[:], c) 172 pos += size 173 } 174 } 175 } 176 if sb.Len() > 0 { 177 return sb.String() 178 } 179 return s.String() 180 } 181 182 func compileRegexpFromValueString(patternStr String, flags string) (*regexpPattern, error) { 183 return compileRegexp(escapeInvalidUtf16(patternStr), flags) 184 } 185 186 func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { 187 var global, ignoreCase, multiline, sticky, unicode bool 188 var wrapper *regexpWrapper 189 var wrapper2 *regexp2Wrapper 190 191 if flags != "" { 192 invalidFlags := func() { 193 err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags) 194 } 195 for _, chr := range flags { 196 switch chr { 197 case 'g': 198 if global { 199 invalidFlags() 200 return 201 } 202 global = true 203 case 'm': 204 if multiline { 205 invalidFlags() 206 return 207 } 208 multiline = true 209 case 'i': 210 if ignoreCase { 211 invalidFlags() 212 return 213 } 214 ignoreCase = true 215 case 'y': 216 if sticky { 217 invalidFlags() 218 return 219 } 220 sticky = true 221 case 'u': 222 if unicode { 223 invalidFlags() 224 } 225 unicode = true 226 default: 227 invalidFlags() 228 return 229 } 230 } 231 } 232 233 if unicode { 234 patternStr = convertRegexpToUnicode(patternStr) 235 } else { 236 patternStr = convertRegexpToUtf16(patternStr) 237 } 238 239 re2Str, err1 := parser.TransformRegExp(patternStr) 240 if err1 == nil { 241 re2flags := "" 242 if multiline { 243 re2flags += "m" 244 } 245 if ignoreCase { 246 re2flags += "i" 247 } 248 if len(re2flags) > 0 { 249 re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str) 250 } 251 252 pattern, err1 := regexp.Compile(re2Str) 253 if err1 != nil { 254 err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1) 255 return 256 } 257 wrapper = (*regexpWrapper)(pattern) 258 } else { 259 if _, incompat := err1.(parser.RegexpErrorIncompatible); !incompat { 260 err = err1 261 return 262 } 263 wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase) 264 if err != nil { 265 err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err) 266 return 267 } 268 } 269 270 p = ®expPattern{ 271 src: patternStr, 272 regexpWrapper: wrapper, 273 regexp2Wrapper: wrapper2, 274 global: global, 275 ignoreCase: ignoreCase, 276 multiline: multiline, 277 sticky: sticky, 278 unicode: unicode, 279 } 280 return 281 } 282 283 func (r *Runtime) _newRegExp(patternStr String, flags string, proto *Object) *regexpObject { 284 pattern, err := compileRegexpFromValueString(patternStr, flags) 285 if err != nil { 286 panic(r.newSyntaxError(err.Error(), -1)) 287 } 288 return r.newRegExpp(pattern, patternStr, proto) 289 } 290 291 func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object { 292 var patternVal, flagsVal Value 293 if len(args) > 0 { 294 patternVal = args[0] 295 } 296 if len(args) > 1 { 297 flagsVal = args[1] 298 } 299 return r.newRegExp(patternVal, flagsVal, proto).val 300 } 301 302 func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *regexpObject { 303 var pattern String 304 var flags string 305 if isRegexp(patternVal) { // this may have side effects so need to call it anyway 306 if obj, ok := patternVal.(*Object); ok { 307 if rx, ok := obj.self.(*regexpObject); ok { 308 if flagsVal == nil || flagsVal == _undefined { 309 return rx.clone() 310 } else { 311 return r._newRegExp(rx.source, flagsVal.toString().String(), proto) 312 } 313 } else { 314 pattern = nilSafe(obj.self.getStr("source", nil)).toString() 315 if flagsVal == nil || flagsVal == _undefined { 316 flags = nilSafe(obj.self.getStr("flags", nil)).toString().String() 317 } else { 318 flags = flagsVal.toString().String() 319 } 320 goto exit 321 } 322 } 323 } 324 325 if patternVal != nil && patternVal != _undefined { 326 pattern = patternVal.toString() 327 } 328 if flagsVal != nil && flagsVal != _undefined { 329 flags = flagsVal.toString().String() 330 } 331 332 if pattern == nil { 333 pattern = stringEmpty 334 } 335 exit: 336 return r._newRegExp(pattern, flags, proto) 337 } 338 339 func (r *Runtime) builtin_RegExp(call FunctionCall) Value { 340 pattern := call.Argument(0) 341 patternIsRegExp := isRegexp(pattern) 342 flags := call.Argument(1) 343 if patternIsRegExp && flags == _undefined { 344 if obj, ok := call.Argument(0).(*Object); ok { 345 patternConstructor := obj.self.getStr("constructor", nil) 346 if patternConstructor == r.global.RegExp { 347 return pattern 348 } 349 } 350 } 351 return r.newRegExp(pattern, flags, r.global.RegExpPrototype).val 352 } 353 354 func (r *Runtime) regexpproto_compile(call FunctionCall) Value { 355 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 356 var ( 357 pattern *regexpPattern 358 source String 359 flags string 360 err error 361 ) 362 patternVal := call.Argument(0) 363 flagsVal := call.Argument(1) 364 if o, ok := patternVal.(*Object); ok { 365 if p, ok := o.self.(*regexpObject); ok { 366 if flagsVal != _undefined { 367 panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another")) 368 } 369 this.pattern = p.pattern 370 this.source = p.source 371 goto exit 372 } 373 } 374 if patternVal != _undefined { 375 source = patternVal.toString() 376 } else { 377 source = stringEmpty 378 } 379 if flagsVal != _undefined { 380 flags = flagsVal.toString().String() 381 } 382 pattern, err = compileRegexpFromValueString(source, flags) 383 if err != nil { 384 panic(r.newSyntaxError(err.Error(), -1)) 385 } 386 this.pattern = pattern 387 this.source = source 388 exit: 389 this.setOwnStr("lastIndex", intToValue(0), true) 390 return call.This 391 } 392 393 panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 394 } 395 396 func (r *Runtime) regexpproto_exec(call FunctionCall) Value { 397 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 398 return this.exec(call.Argument(0).toString()) 399 } else { 400 r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})) 401 return nil 402 } 403 } 404 405 func (r *Runtime) regexpproto_test(call FunctionCall) Value { 406 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 407 if this.test(call.Argument(0).toString()) { 408 return valueTrue 409 } else { 410 return valueFalse 411 } 412 } else { 413 panic(r.NewTypeError("Method RegExp.prototype.test called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 414 } 415 } 416 417 func (r *Runtime) regexpproto_toString(call FunctionCall) Value { 418 obj := r.toObject(call.This) 419 if this := r.checkStdRegexp(obj); this != nil { 420 var sb StringBuilder 421 sb.WriteRune('/') 422 if !this.writeEscapedSource(&sb) { 423 sb.WriteString(this.source) 424 } 425 sb.WriteRune('/') 426 if this.pattern.global { 427 sb.WriteRune('g') 428 } 429 if this.pattern.ignoreCase { 430 sb.WriteRune('i') 431 } 432 if this.pattern.multiline { 433 sb.WriteRune('m') 434 } 435 if this.pattern.unicode { 436 sb.WriteRune('u') 437 } 438 if this.pattern.sticky { 439 sb.WriteRune('y') 440 } 441 return sb.String() 442 } 443 pattern := nilSafe(obj.self.getStr("source", nil)).toString() 444 flags := nilSafe(obj.self.getStr("flags", nil)).toString() 445 var sb StringBuilder 446 sb.WriteRune('/') 447 sb.WriteString(pattern) 448 sb.WriteRune('/') 449 sb.WriteString(flags) 450 return sb.String() 451 } 452 453 func (r *regexpObject) writeEscapedSource(sb *StringBuilder) bool { 454 if r.source.Length() == 0 { 455 sb.WriteString(asciiString("(?:)")) 456 return true 457 } 458 pos := 0 459 lastPos := 0 460 rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader()} 461 L: 462 for { 463 c, size, err := rd.ReadRune() 464 if err != nil { 465 break 466 } 467 switch c { 468 case '\\': 469 pos++ 470 _, size, err = rd.ReadRune() 471 if err != nil { 472 break L 473 } 474 case '/', '\u000a', '\u000d', '\u2028', '\u2029': 475 sb.WriteSubstring(r.source, lastPos, pos) 476 sb.WriteRune('\\') 477 switch c { 478 case '\u000a': 479 sb.WriteRune('n') 480 case '\u000d': 481 sb.WriteRune('r') 482 default: 483 sb.WriteRune('u') 484 sb.WriteRune(rune(hex[c>>12])) 485 sb.WriteRune(rune(hex[(c>>8)&0xF])) 486 sb.WriteRune(rune(hex[(c>>4)&0xF])) 487 sb.WriteRune(rune(hex[c&0xF])) 488 } 489 lastPos = pos + size 490 } 491 pos += size 492 } 493 if lastPos > 0 { 494 sb.WriteSubstring(r.source, lastPos, r.source.Length()) 495 return true 496 } 497 return false 498 } 499 500 func (r *Runtime) regexpproto_getSource(call FunctionCall) Value { 501 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 502 var sb StringBuilder 503 if this.writeEscapedSource(&sb) { 504 return sb.String() 505 } 506 return this.source 507 } else if call.This == r.global.RegExpPrototype { 508 return asciiString("(?:)") 509 } else { 510 panic(r.NewTypeError("Method RegExp.prototype.source getter called on incompatible receiver")) 511 } 512 } 513 514 func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value { 515 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 516 if this.pattern.global { 517 return valueTrue 518 } else { 519 return valueFalse 520 } 521 } else if call.This == r.global.RegExpPrototype { 522 return _undefined 523 } else { 524 panic(r.NewTypeError("Method RegExp.prototype.global getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 525 } 526 } 527 528 func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value { 529 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 530 if this.pattern.multiline { 531 return valueTrue 532 } else { 533 return valueFalse 534 } 535 } else if call.This == r.global.RegExpPrototype { 536 return _undefined 537 } else { 538 panic(r.NewTypeError("Method RegExp.prototype.multiline getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 539 } 540 } 541 542 func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value { 543 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 544 if this.pattern.ignoreCase { 545 return valueTrue 546 } else { 547 return valueFalse 548 } 549 } else if call.This == r.global.RegExpPrototype { 550 return _undefined 551 } else { 552 panic(r.NewTypeError("Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 553 } 554 } 555 556 func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value { 557 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 558 if this.pattern.unicode { 559 return valueTrue 560 } else { 561 return valueFalse 562 } 563 } else if call.This == r.global.RegExpPrototype { 564 return _undefined 565 } else { 566 panic(r.NewTypeError("Method RegExp.prototype.unicode getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 567 } 568 } 569 570 func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value { 571 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 572 if this.pattern.sticky { 573 return valueTrue 574 } else { 575 return valueFalse 576 } 577 } else if call.This == r.global.RegExpPrototype { 578 return _undefined 579 } else { 580 panic(r.NewTypeError("Method RegExp.prototype.sticky getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 581 } 582 } 583 584 func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value { 585 var global, ignoreCase, multiline, sticky, unicode bool 586 587 thisObj := r.toObject(call.This) 588 size := 0 589 if v := thisObj.self.getStr("global", nil); v != nil { 590 global = v.ToBoolean() 591 if global { 592 size++ 593 } 594 } 595 if v := thisObj.self.getStr("ignoreCase", nil); v != nil { 596 ignoreCase = v.ToBoolean() 597 if ignoreCase { 598 size++ 599 } 600 } 601 if v := thisObj.self.getStr("multiline", nil); v != nil { 602 multiline = v.ToBoolean() 603 if multiline { 604 size++ 605 } 606 } 607 if v := thisObj.self.getStr("sticky", nil); v != nil { 608 sticky = v.ToBoolean() 609 if sticky { 610 size++ 611 } 612 } 613 if v := thisObj.self.getStr("unicode", nil); v != nil { 614 unicode = v.ToBoolean() 615 if unicode { 616 size++ 617 } 618 } 619 620 var sb strings.Builder 621 sb.Grow(size) 622 if global { 623 sb.WriteByte('g') 624 } 625 if ignoreCase { 626 sb.WriteByte('i') 627 } 628 if multiline { 629 sb.WriteByte('m') 630 } 631 if unicode { 632 sb.WriteByte('u') 633 } 634 if sticky { 635 sb.WriteByte('y') 636 } 637 638 return asciiString(sb.String()) 639 } 640 641 func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value { 642 res := execFn(FunctionCall{ 643 This: rxObj, 644 Arguments: []Value{arg}, 645 }) 646 647 if res != _null { 648 if _, ok := res.(*Object); !ok { 649 panic(r.NewTypeError("RegExp exec method returned something other than an Object or null")) 650 } 651 } 652 653 return res 654 } 655 656 func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s String) []Value { 657 fullUnicode := nilSafe(rxObj.self.getStr("unicode", nil)).ToBoolean() 658 rxObj.self.setOwnStr("lastIndex", intToValue(0), true) 659 execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable() 660 if !ok { 661 panic(r.NewTypeError("exec is not a function")) 662 } 663 var a []Value 664 for { 665 res := r.regExpExec(execFn, rxObj, s) 666 if res == _null { 667 break 668 } 669 a = append(a, res) 670 matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString() 671 if matchStr.Length() == 0 { 672 thisIndex := toLength(rxObj.self.getStr("lastIndex", nil)) 673 rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(s, thisIndex, fullUnicode)), true) 674 } 675 } 676 677 return a 678 } 679 680 func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s String) Value { 681 rx := rxObj.self 682 global := rx.getStr("global", nil) 683 if global != nil && global.ToBoolean() { 684 a := r.getGlobalRegexpMatches(rxObj, s) 685 if len(a) == 0 { 686 return _null 687 } 688 ar := make([]Value, 0, len(a)) 689 for _, result := range a { 690 obj := r.toObject(result) 691 matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString() 692 ar = append(ar, matchStr) 693 } 694 return r.newArrayValues(ar) 695 } 696 697 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable() 698 if !ok { 699 panic(r.NewTypeError("exec is not a function")) 700 } 701 702 return r.regExpExec(execFn, rxObj, s) 703 } 704 705 func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject { 706 if deoptimiseRegexp { 707 return nil 708 } 709 710 rx, ok := rxObj.self.(*regexpObject) 711 if !ok { 712 return nil 713 } 714 715 if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto { 716 return nil 717 } 718 719 return rx 720 } 721 722 func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value { 723 thisObj := r.toObject(call.This) 724 s := call.Argument(0).toString() 725 rx := r.checkStdRegexp(thisObj) 726 if rx == nil { 727 return r.regexpproto_stdMatcherGeneric(thisObj, s) 728 } 729 if rx.pattern.global { 730 res := rx.pattern.findAllSubmatchIndex(s, 0, -1, rx.pattern.sticky) 731 if len(res) == 0 { 732 rx.setOwnStr("lastIndex", intToValue(0), true) 733 return _null 734 } 735 a := make([]Value, 0, len(res)) 736 for _, result := range res { 737 a = append(a, s.Substring(result[0], result[1])) 738 } 739 rx.setOwnStr("lastIndex", intToValue(int64(res[len(res)-1][1])), true) 740 return r.newArrayValues(a) 741 } else { 742 return rx.exec(s) 743 } 744 } 745 746 func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg String) Value { 747 rx := rxObj.self 748 previousLastIndex := nilSafe(rx.getStr("lastIndex", nil)) 749 zero := intToValue(0) 750 if !previousLastIndex.SameAs(zero) { 751 rx.setOwnStr("lastIndex", zero, true) 752 } 753 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable() 754 if !ok { 755 panic(r.NewTypeError("exec is not a function")) 756 } 757 758 result := r.regExpExec(execFn, rxObj, arg) 759 currentLastIndex := nilSafe(rx.getStr("lastIndex", nil)) 760 if !currentLastIndex.SameAs(previousLastIndex) { 761 rx.setOwnStr("lastIndex", previousLastIndex, true) 762 } 763 764 if result == _null { 765 return intToValue(-1) 766 } 767 768 return r.toObject(result).self.getStr("index", nil) 769 } 770 771 func (r *Runtime) regexpproto_stdMatcherAll(call FunctionCall) Value { 772 thisObj := r.toObject(call.This) 773 s := call.Argument(0).toString() 774 flags := nilSafe(thisObj.self.getStr("flags", nil)).toString() 775 c := r.speciesConstructorObj(call.This.(*Object), r.global.RegExp) 776 matcher := r.toConstructor(c)([]Value{call.This, flags}, nil) 777 matcher.self.setOwnStr("lastIndex", valueInt(toLength(thisObj.self.getStr("lastIndex", nil))), true) 778 flagsStr := flags.String() 779 global := strings.Contains(flagsStr, "g") 780 fullUnicode := strings.Contains(flagsStr, "u") 781 return r.createRegExpStringIterator(matcher, s, global, fullUnicode) 782 } 783 784 func (r *Runtime) createRegExpStringIterator(matcher *Object, s String, global, fullUnicode bool) Value { 785 o := &Object{runtime: r} 786 787 ri := ®ExpStringIterObject{ 788 matcher: matcher, 789 s: s, 790 global: global, 791 fullUnicode: fullUnicode, 792 } 793 ri.class = classObject 794 ri.val = o 795 ri.extensible = true 796 o.self = ri 797 ri.prototype = r.getRegExpStringIteratorPrototype() 798 ri.init() 799 800 return o 801 } 802 803 type regExpStringIterObject struct { 804 baseObject 805 matcher *Object 806 s String 807 global, fullUnicode, done bool 808 } 809 810 // RegExpExec as defined in 21.2.5.2.1 811 func regExpExec(r *Object, s String) Value { 812 exec := r.self.getStr("exec", nil) 813 if execObject, ok := exec.(*Object); ok { 814 if execFn, ok := execObject.self.assertCallable(); ok { 815 return r.runtime.regExpExec(execFn, r, s) 816 } 817 } 818 if rx, ok := r.self.(*regexpObject); ok { 819 return rx.exec(s) 820 } 821 panic(r.runtime.NewTypeError("no RegExpMatcher internal slot")) 822 } 823 824 func (ri *regExpStringIterObject) next() (v Value) { 825 if ri.done { 826 return ri.val.runtime.createIterResultObject(_undefined, true) 827 } 828 829 match := regExpExec(ri.matcher, ri.s) 830 if IsNull(match) { 831 ri.done = true 832 return ri.val.runtime.createIterResultObject(_undefined, true) 833 } 834 if !ri.global { 835 ri.done = true 836 return ri.val.runtime.createIterResultObject(match, false) 837 } 838 839 matchStr := nilSafe(ri.val.runtime.toObject(match).self.getIdx(valueInt(0), nil)).toString() 840 if matchStr.Length() == 0 { 841 thisIndex := toLength(ri.matcher.self.getStr("lastIndex", nil)) 842 ri.matcher.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(ri.s, thisIndex, ri.fullUnicode)), true) 843 } 844 return ri.val.runtime.createIterResultObject(match, false) 845 } 846 847 func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value { 848 thisObj := r.toObject(call.This) 849 s := call.Argument(0).toString() 850 rx := r.checkStdRegexp(thisObj) 851 if rx == nil { 852 return r.regexpproto_stdSearchGeneric(thisObj, s) 853 } 854 855 previousLastIndex := rx.getStr("lastIndex", nil) 856 rx.setOwnStr("lastIndex", intToValue(0), true) 857 858 match, result := rx.execRegexp(s) 859 rx.setOwnStr("lastIndex", previousLastIndex, true) 860 861 if !match { 862 return intToValue(-1) 863 } 864 return intToValue(int64(result[0])) 865 } 866 867 func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s String, limit Value, unicodeMatching bool) Value { 868 var a []Value 869 var lim int64 870 if limit == nil || limit == _undefined { 871 lim = maxInt - 1 872 } else { 873 lim = toLength(limit) 874 } 875 if lim == 0 { 876 return r.newArrayValues(a) 877 } 878 size := s.Length() 879 p := 0 880 execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil 881 882 if size == 0 { 883 if r.regExpExec(execFn, splitter, s) == _null { 884 a = append(a, s) 885 } 886 return r.newArrayValues(a) 887 } 888 889 q := p 890 for q < size { 891 splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true) 892 z := r.regExpExec(execFn, splitter, s) 893 if z == _null { 894 q = advanceStringIndex(s, q, unicodeMatching) 895 } else { 896 z := r.toObject(z) 897 e := toLength(splitter.self.getStr("lastIndex", nil)) 898 if e == int64(p) { 899 q = advanceStringIndex(s, q, unicodeMatching) 900 } else { 901 a = append(a, s.Substring(p, q)) 902 if int64(len(a)) == lim { 903 return r.newArrayValues(a) 904 } 905 if e > int64(size) { 906 p = size 907 } else { 908 p = int(e) 909 } 910 numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0) 911 for i := int64(1); i <= numberOfCaptures; i++ { 912 a = append(a, nilSafe(z.self.getIdx(valueInt(i), nil))) 913 if int64(len(a)) == lim { 914 return r.newArrayValues(a) 915 } 916 } 917 q = p 918 } 919 } 920 } 921 a = append(a, s.Substring(p, size)) 922 return r.newArrayValues(a) 923 } 924 925 func advanceStringIndex(s String, pos int, unicode bool) int { 926 next := pos + 1 927 if !unicode { 928 return next 929 } 930 l := s.Length() 931 if next >= l { 932 return next 933 } 934 if !isUTF16FirstSurrogate(s.CharAt(pos)) { 935 return next 936 } 937 if !isUTF16SecondSurrogate(s.CharAt(next)) { 938 return next 939 } 940 return next + 1 941 } 942 943 func advanceStringIndex64(s String, pos int64, unicode bool) int64 { 944 next := pos + 1 945 if !unicode { 946 return next 947 } 948 l := int64(s.Length()) 949 if next >= l { 950 return next 951 } 952 if !isUTF16FirstSurrogate(s.CharAt(int(pos))) { 953 return next 954 } 955 if !isUTF16SecondSurrogate(s.CharAt(int(next))) { 956 return next 957 } 958 return next + 1 959 } 960 961 func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value { 962 rxObj := r.toObject(call.This) 963 s := call.Argument(0).toString() 964 limitValue := call.Argument(1) 965 var splitter *Object 966 search := r.checkStdRegexp(rxObj) 967 c := r.speciesConstructorObj(rxObj, r.global.RegExp) 968 if search == nil || c != r.global.RegExp { 969 flags := nilSafe(rxObj.self.getStr("flags", nil)).toString() 970 flagsStr := flags.String() 971 972 // Add 'y' flag if missing 973 if !strings.Contains(flagsStr, "y") { 974 flags = flags.Concat(asciiString("y")) 975 } 976 splitter = r.toConstructor(c)([]Value{rxObj, flags}, nil) 977 search = r.checkStdRegexp(splitter) 978 if search == nil { 979 return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u")) 980 } 981 } 982 983 pattern := search.pattern // toUint32() may recompile the pattern, but we still need to use the original 984 limit := -1 985 if limitValue != _undefined { 986 limit = int(toUint32(limitValue)) 987 } 988 989 if limit == 0 { 990 return r.newArrayValues(nil) 991 } 992 993 targetLength := s.Length() 994 var valueArray []Value 995 lastIndex := 0 996 found := 0 997 998 result := pattern.findAllSubmatchIndex(s, 0, -1, false) 999 if targetLength == 0 { 1000 if result == nil { 1001 valueArray = append(valueArray, s) 1002 } 1003 goto RETURN 1004 } 1005 1006 for _, match := range result { 1007 if match[0] == match[1] { 1008 // FIXME Ugh, this is a hack 1009 if match[0] == 0 || match[0] == targetLength { 1010 continue 1011 } 1012 } 1013 1014 if lastIndex != match[0] { 1015 valueArray = append(valueArray, s.Substring(lastIndex, match[0])) 1016 found++ 1017 } else if lastIndex == match[0] { 1018 if lastIndex != -1 { 1019 valueArray = append(valueArray, stringEmpty) 1020 found++ 1021 } 1022 } 1023 1024 lastIndex = match[1] 1025 if found == limit { 1026 goto RETURN 1027 } 1028 1029 captureCount := len(match) / 2 1030 for index := 1; index < captureCount; index++ { 1031 offset := index * 2 1032 var value Value 1033 if match[offset] != -1 { 1034 value = s.Substring(match[offset], match[offset+1]) 1035 } else { 1036 value = _undefined 1037 } 1038 valueArray = append(valueArray, value) 1039 found++ 1040 if found == limit { 1041 goto RETURN 1042 } 1043 } 1044 } 1045 1046 if found != limit { 1047 if lastIndex != targetLength { 1048 valueArray = append(valueArray, s.Substring(lastIndex, targetLength)) 1049 } else { 1050 valueArray = append(valueArray, stringEmpty) 1051 } 1052 } 1053 1054 RETURN: 1055 return r.newArrayValues(valueArray) 1056 } 1057 1058 func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr String, rcall func(FunctionCall) Value) Value { 1059 var results []Value 1060 if nilSafe(rxObj.self.getStr("global", nil)).ToBoolean() { 1061 results = r.getGlobalRegexpMatches(rxObj, s) 1062 } else { 1063 execFn := toMethod(rxObj.self.getStr("exec", nil)) // must be non-nil 1064 result := r.regExpExec(execFn, rxObj, s) 1065 if result != _null { 1066 results = append(results, result) 1067 } 1068 } 1069 lengthS := s.Length() 1070 nextSourcePosition := 0 1071 var resultBuf StringBuilder 1072 for _, result := range results { 1073 obj := r.toObject(result) 1074 nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0) 1075 matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString() 1076 matchLength := matched.Length() 1077 position := toIntStrict(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0)) 1078 var captures []Value 1079 if rcall != nil { 1080 captures = make([]Value, 0, nCaptures+3) 1081 } else { 1082 captures = make([]Value, 0, nCaptures+1) 1083 } 1084 captures = append(captures, matched) 1085 for n := int64(1); n <= nCaptures; n++ { 1086 capN := nilSafe(obj.self.getIdx(valueInt(n), nil)) 1087 if capN != _undefined { 1088 capN = capN.ToString() 1089 } 1090 captures = append(captures, capN) 1091 } 1092 var replacement String 1093 if rcall != nil { 1094 captures = append(captures, intToValue(int64(position)), s) 1095 replacement = rcall(FunctionCall{ 1096 This: _undefined, 1097 Arguments: captures, 1098 }).toString() 1099 if position >= nextSourcePosition { 1100 resultBuf.WriteString(s.Substring(nextSourcePosition, position)) 1101 resultBuf.WriteString(replacement) 1102 nextSourcePosition = position + matchLength 1103 } 1104 } else { 1105 if position >= nextSourcePosition { 1106 resultBuf.WriteString(s.Substring(nextSourcePosition, position)) 1107 writeSubstitution(s, position, len(captures), func(idx int) String { 1108 capture := captures[idx] 1109 if capture != _undefined { 1110 return capture.toString() 1111 } 1112 return stringEmpty 1113 }, replaceStr, &resultBuf) 1114 nextSourcePosition = position + matchLength 1115 } 1116 } 1117 } 1118 if nextSourcePosition < lengthS { 1119 resultBuf.WriteString(s.Substring(nextSourcePosition, lengthS)) 1120 } 1121 return resultBuf.String() 1122 } 1123 1124 func writeSubstitution(s String, position int, numCaptures int, getCapture func(int) String, replaceStr String, buf *StringBuilder) { 1125 l := s.Length() 1126 rl := replaceStr.Length() 1127 matched := getCapture(0) 1128 tailPos := position + matched.Length() 1129 1130 for i := 0; i < rl; i++ { 1131 c := replaceStr.CharAt(i) 1132 if c == '$' && i < rl-1 { 1133 ch := replaceStr.CharAt(i + 1) 1134 switch ch { 1135 case '$': 1136 buf.WriteRune('$') 1137 case '`': 1138 buf.WriteString(s.Substring(0, position)) 1139 case '\'': 1140 if tailPos < l { 1141 buf.WriteString(s.Substring(tailPos, l)) 1142 } 1143 case '&': 1144 buf.WriteString(matched) 1145 default: 1146 matchNumber := 0 1147 j := i + 1 1148 for j < rl { 1149 ch := replaceStr.CharAt(j) 1150 if ch >= '0' && ch <= '9' { 1151 m := matchNumber*10 + int(ch-'0') 1152 if m >= numCaptures { 1153 break 1154 } 1155 matchNumber = m 1156 j++ 1157 } else { 1158 break 1159 } 1160 } 1161 if matchNumber > 0 { 1162 buf.WriteString(getCapture(matchNumber)) 1163 i = j - 1 1164 continue 1165 } else { 1166 buf.WriteRune('$') 1167 buf.WriteRune(rune(ch)) 1168 } 1169 } 1170 i++ 1171 } else { 1172 buf.WriteRune(rune(c)) 1173 } 1174 } 1175 } 1176 1177 func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value { 1178 rxObj := r.toObject(call.This) 1179 s := call.Argument(0).toString() 1180 replaceStr, rcall := getReplaceValue(call.Argument(1)) 1181 1182 rx := r.checkStdRegexp(rxObj) 1183 if rx == nil { 1184 return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall) 1185 } 1186 1187 var index int64 1188 find := 1 1189 if rx.pattern.global { 1190 find = -1 1191 rx.setOwnStr("lastIndex", intToValue(0), true) 1192 } else { 1193 index = rx.getLastIndex() 1194 } 1195 found := rx.pattern.findAllSubmatchIndex(s, toIntStrict(index), find, rx.pattern.sticky) 1196 if len(found) > 0 { 1197 if !rx.updateLastIndex(index, found[0], found[len(found)-1]) { 1198 found = nil 1199 } 1200 } else { 1201 rx.updateLastIndex(index, nil, nil) 1202 } 1203 1204 return stringReplace(s, found, replaceStr, rcall) 1205 } 1206 1207 func (r *Runtime) regExpStringIteratorProto_next(call FunctionCall) Value { 1208 thisObj := r.toObject(call.This) 1209 if iter, ok := thisObj.self.(*regExpStringIterObject); ok { 1210 return iter.next() 1211 } 1212 panic(r.NewTypeError("Method RegExp String Iterator.prototype.next called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: thisObj}))) 1213 } 1214 1215 func (r *Runtime) createRegExpStringIteratorPrototype(val *Object) objectImpl { 1216 o := newBaseObjectObj(val, r.getIteratorPrototype(), classObject) 1217 1218 o._putProp("next", r.newNativeFunc(r.regExpStringIteratorProto_next, nil, "next", nil, 0), true, false, true) 1219 o._putSym(SymToStringTag, valueProp(asciiString(classRegExpStringIterator), false, false, true)) 1220 1221 return o 1222 } 1223 1224 func (r *Runtime) getRegExpStringIteratorPrototype() *Object { 1225 var o *Object 1226 if o = r.global.RegExpStringIteratorPrototype; o == nil { 1227 o = &Object{runtime: r} 1228 r.global.RegExpStringIteratorPrototype = o 1229 o.self = r.createRegExpStringIteratorPrototype(o) 1230 } 1231 return o 1232 } 1233 1234 func (r *Runtime) initRegExp() { 1235 o := r.newGuardedObject(r.global.ObjectPrototype, classObject) 1236 r.global.RegExpPrototype = o.val 1237 r.global.stdRegexpProto = o 1238 1239 o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, nil, "compile", nil, 2), true, false, true) 1240 o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, nil, "exec", nil, 1), true, false, true) 1241 o._putProp("test", r.newNativeFunc(r.regexpproto_test, nil, "test", nil, 1), true, false, true) 1242 o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, nil, "toString", nil, 0), true, false, true) 1243 o.setOwnStr("source", &valueProperty{ 1244 configurable: true, 1245 getterFunc: r.newNativeFunc(r.regexpproto_getSource, nil, "get source", nil, 0), 1246 accessor: true, 1247 }, false) 1248 o.setOwnStr("global", &valueProperty{ 1249 configurable: true, 1250 getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, nil, "get global", nil, 0), 1251 accessor: true, 1252 }, false) 1253 o.setOwnStr("multiline", &valueProperty{ 1254 configurable: true, 1255 getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, nil, "get multiline", nil, 0), 1256 accessor: true, 1257 }, false) 1258 o.setOwnStr("ignoreCase", &valueProperty{ 1259 configurable: true, 1260 getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, nil, "get ignoreCase", nil, 0), 1261 accessor: true, 1262 }, false) 1263 o.setOwnStr("unicode", &valueProperty{ 1264 configurable: true, 1265 getterFunc: r.newNativeFunc(r.regexpproto_getUnicode, nil, "get unicode", nil, 0), 1266 accessor: true, 1267 }, false) 1268 o.setOwnStr("sticky", &valueProperty{ 1269 configurable: true, 1270 getterFunc: r.newNativeFunc(r.regexpproto_getSticky, nil, "get sticky", nil, 0), 1271 accessor: true, 1272 }, false) 1273 o.setOwnStr("flags", &valueProperty{ 1274 configurable: true, 1275 getterFunc: r.newNativeFunc(r.regexpproto_getFlags, nil, "get flags", nil, 0), 1276 accessor: true, 1277 }, false) 1278 1279 o._putSym(SymMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, nil, "[Symbol.match]", nil, 1), true, false, true)) 1280 o._putSym(SymMatchAll, valueProp(r.newNativeFunc(r.regexpproto_stdMatcherAll, nil, "[Symbol.matchAll]", nil, 1), true, false, true)) 1281 o._putSym(SymSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, nil, "[Symbol.search]", nil, 1), true, false, true)) 1282 o._putSym(SymSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, nil, "[Symbol.split]", nil, 2), true, false, true)) 1283 o._putSym(SymReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, nil, "[Symbol.replace]", nil, 2), true, false, true)) 1284 o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky") 1285 1286 r.global.RegExp = r.newNativeFunc(r.builtin_RegExp, r.builtin_newRegExp, "RegExp", r.global.RegExpPrototype, 2) 1287 rx := r.global.RegExp.self 1288 r.putSpeciesReturnThis(rx) 1289 r.addToGlobal("RegExp", r.global.RegExp) 1290 }