github.com/dop251/goja@v0.0.0-20240220182346-e401ed450204/builtin_regexp.go (about) 1 package goja 2 3 import ( 4 "fmt" 5 "github.com/dop251/goja/parser" 6 "regexp" 7 "strings" 8 "unicode/utf16" 9 "unicode/utf8" 10 ) 11 12 func (r *Runtime) newRegexpObject(proto *Object) *regexpObject { 13 v := &Object{runtime: r} 14 15 o := ®expObject{} 16 o.class = classRegExp 17 o.val = v 18 o.extensible = true 19 v.self = o 20 o.prototype = proto 21 o.init() 22 return o 23 } 24 25 func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr String, proto *Object) *regexpObject { 26 o := r.newRegexpObject(proto) 27 28 o.pattern = pattern 29 o.source = patternStr 30 31 return o 32 } 33 34 func decodeHex(s string) (int, bool) { 35 var hex int 36 for i := 0; i < len(s); i++ { 37 var n byte 38 chr := s[i] 39 switch { 40 case '0' <= chr && chr <= '9': 41 n = chr - '0' 42 case 'a' <= chr && chr <= 'f': 43 n = chr - 'a' + 10 44 case 'A' <= chr && chr <= 'F': 45 n = chr - 'A' + 10 46 default: 47 return 0, false 48 } 49 hex = hex*16 + int(n) 50 } 51 return hex, true 52 } 53 54 func writeHex4(b *strings.Builder, i int) { 55 b.WriteByte(hex[i>>12]) 56 b.WriteByte(hex[(i>>8)&0xF]) 57 b.WriteByte(hex[(i>>4)&0xF]) 58 b.WriteByte(hex[i&0xF]) 59 } 60 61 // Convert any valid surrogate pairs in the form of \uXXXX\uXXXX to unicode characters 62 func convertRegexpToUnicode(patternStr string) string { 63 var sb strings.Builder 64 pos := 0 65 for i := 0; i < len(patternStr)-11; { 66 r, size := utf8.DecodeRuneInString(patternStr[i:]) 67 if r == '\\' { 68 i++ 69 if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' { 70 if first, ok := decodeHex(patternStr[i+1 : i+5]); ok { 71 if isUTF16FirstSurrogate(uint16(first)) { 72 if second, ok := decodeHex(patternStr[i+7 : i+11]); ok { 73 if isUTF16SecondSurrogate(uint16(second)) { 74 r = utf16.DecodeRune(rune(first), rune(second)) 75 sb.WriteString(patternStr[pos : i-1]) 76 sb.WriteRune(r) 77 i += 11 78 pos = i 79 continue 80 } 81 } 82 } 83 } 84 } 85 i++ 86 } else { 87 i += size 88 } 89 } 90 if pos > 0 { 91 sb.WriteString(patternStr[pos:]) 92 return sb.String() 93 } 94 return patternStr 95 } 96 97 // Convert any extended unicode characters to UTF-16 in the form of \uXXXX\uXXXX 98 func convertRegexpToUtf16(patternStr string) string { 99 var sb strings.Builder 100 pos := 0 101 var prevRune rune 102 for i := 0; i < len(patternStr); { 103 r, size := utf8.DecodeRuneInString(patternStr[i:]) 104 if r > 0xFFFF { 105 sb.WriteString(patternStr[pos:i]) 106 if prevRune == '\\' { 107 sb.WriteRune('\\') 108 } 109 first, second := utf16.EncodeRune(r) 110 sb.WriteString(`\u`) 111 writeHex4(&sb, int(first)) 112 sb.WriteString(`\u`) 113 writeHex4(&sb, int(second)) 114 pos = i + size 115 } 116 i += size 117 prevRune = r 118 } 119 if pos > 0 { 120 sb.WriteString(patternStr[pos:]) 121 return sb.String() 122 } 123 return patternStr 124 } 125 126 // convert any broken UTF-16 surrogate pairs to \uXXXX 127 func escapeInvalidUtf16(s String) string { 128 if imported, ok := s.(*importedString); ok { 129 return imported.s 130 } 131 if ascii, ok := s.(asciiString); ok { 132 return ascii.String() 133 } 134 var sb strings.Builder 135 rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader()} 136 pos := 0 137 utf8Size := 0 138 var utf8Buf [utf8.UTFMax]byte 139 for { 140 c, size, err := rd.ReadRune() 141 if err != nil { 142 break 143 } 144 if utf16.IsSurrogate(c) { 145 if sb.Len() == 0 { 146 sb.Grow(utf8Size + 7) 147 hrd := s.Reader() 148 var c rune 149 for p := 0; p < pos; { 150 var size int 151 var err error 152 c, size, err = hrd.ReadRune() 153 if err != nil { 154 // will not happen 155 panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err)) 156 } 157 sb.WriteRune(c) 158 p += size 159 } 160 if c == '\\' { 161 sb.WriteRune(c) 162 } 163 } 164 sb.WriteString(`\u`) 165 writeHex4(&sb, int(c)) 166 } else { 167 if sb.Len() > 0 { 168 sb.WriteRune(c) 169 } else { 170 utf8Size += utf8.EncodeRune(utf8Buf[:], c) 171 pos += size 172 } 173 } 174 } 175 if sb.Len() > 0 { 176 return sb.String() 177 } 178 return s.String() 179 } 180 181 func compileRegexpFromValueString(patternStr String, flags string) (*regexpPattern, error) { 182 return compileRegexp(escapeInvalidUtf16(patternStr), flags) 183 } 184 185 func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { 186 var global, ignoreCase, multiline, sticky, unicode bool 187 var wrapper *regexpWrapper 188 var wrapper2 *regexp2Wrapper 189 190 if flags != "" { 191 invalidFlags := func() { 192 err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags) 193 } 194 for _, chr := range flags { 195 switch chr { 196 case 'g': 197 if global { 198 invalidFlags() 199 return 200 } 201 global = true 202 case 'm': 203 if multiline { 204 invalidFlags() 205 return 206 } 207 multiline = true 208 case 'i': 209 if ignoreCase { 210 invalidFlags() 211 return 212 } 213 ignoreCase = true 214 case 'y': 215 if sticky { 216 invalidFlags() 217 return 218 } 219 sticky = true 220 case 'u': 221 if unicode { 222 invalidFlags() 223 } 224 unicode = true 225 default: 226 invalidFlags() 227 return 228 } 229 } 230 } 231 232 if unicode { 233 patternStr = convertRegexpToUnicode(patternStr) 234 } else { 235 patternStr = convertRegexpToUtf16(patternStr) 236 } 237 238 re2Str, err1 := parser.TransformRegExp(patternStr) 239 if err1 == nil { 240 re2flags := "" 241 if multiline { 242 re2flags += "m" 243 } 244 if ignoreCase { 245 re2flags += "i" 246 } 247 if len(re2flags) > 0 { 248 re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str) 249 } 250 251 pattern, err1 := regexp.Compile(re2Str) 252 if err1 != nil { 253 err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1) 254 return 255 } 256 wrapper = (*regexpWrapper)(pattern) 257 } else { 258 if _, incompat := err1.(parser.RegexpErrorIncompatible); !incompat { 259 err = err1 260 return 261 } 262 wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase) 263 if err != nil { 264 err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err) 265 return 266 } 267 } 268 269 p = ®expPattern{ 270 src: patternStr, 271 regexpWrapper: wrapper, 272 regexp2Wrapper: wrapper2, 273 global: global, 274 ignoreCase: ignoreCase, 275 multiline: multiline, 276 sticky: sticky, 277 unicode: unicode, 278 } 279 return 280 } 281 282 func (r *Runtime) _newRegExp(patternStr String, flags string, proto *Object) *regexpObject { 283 pattern, err := compileRegexpFromValueString(patternStr, flags) 284 if err != nil { 285 panic(r.newSyntaxError(err.Error(), -1)) 286 } 287 return r.newRegExpp(pattern, patternStr, proto) 288 } 289 290 func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object { 291 var patternVal, flagsVal Value 292 if len(args) > 0 { 293 patternVal = args[0] 294 } 295 if len(args) > 1 { 296 flagsVal = args[1] 297 } 298 return r.newRegExp(patternVal, flagsVal, proto).val 299 } 300 301 func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *regexpObject { 302 var pattern String 303 var flags string 304 if isRegexp(patternVal) { // this may have side effects so need to call it anyway 305 if obj, ok := patternVal.(*Object); ok { 306 if rx, ok := obj.self.(*regexpObject); ok { 307 if flagsVal == nil || flagsVal == _undefined { 308 return rx.clone() 309 } else { 310 return r._newRegExp(rx.source, flagsVal.toString().String(), proto) 311 } 312 } else { 313 pattern = nilSafe(obj.self.getStr("source", nil)).toString() 314 if flagsVal == nil || flagsVal == _undefined { 315 flags = nilSafe(obj.self.getStr("flags", nil)).toString().String() 316 } else { 317 flags = flagsVal.toString().String() 318 } 319 goto exit 320 } 321 } 322 } 323 324 if patternVal != nil && patternVal != _undefined { 325 pattern = patternVal.toString() 326 } 327 if flagsVal != nil && flagsVal != _undefined { 328 flags = flagsVal.toString().String() 329 } 330 331 if pattern == nil { 332 pattern = stringEmpty 333 } 334 exit: 335 return r._newRegExp(pattern, flags, proto) 336 } 337 338 func (r *Runtime) builtin_RegExp(call FunctionCall) Value { 339 pattern := call.Argument(0) 340 patternIsRegExp := isRegexp(pattern) 341 flags := call.Argument(1) 342 if patternIsRegExp && flags == _undefined { 343 if obj, ok := call.Argument(0).(*Object); ok { 344 patternConstructor := obj.self.getStr("constructor", nil) 345 if patternConstructor == r.global.RegExp { 346 return pattern 347 } 348 } 349 } 350 return r.newRegExp(pattern, flags, r.getRegExpPrototype()).val 351 } 352 353 func (r *Runtime) regexpproto_compile(call FunctionCall) Value { 354 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 355 var ( 356 pattern *regexpPattern 357 source String 358 flags string 359 err error 360 ) 361 patternVal := call.Argument(0) 362 flagsVal := call.Argument(1) 363 if o, ok := patternVal.(*Object); ok { 364 if p, ok := o.self.(*regexpObject); ok { 365 if flagsVal != _undefined { 366 panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another")) 367 } 368 this.pattern = p.pattern 369 this.source = p.source 370 goto exit 371 } 372 } 373 if patternVal != _undefined { 374 source = patternVal.toString() 375 } else { 376 source = stringEmpty 377 } 378 if flagsVal != _undefined { 379 flags = flagsVal.toString().String() 380 } 381 pattern, err = compileRegexpFromValueString(source, flags) 382 if err != nil { 383 panic(r.newSyntaxError(err.Error(), -1)) 384 } 385 this.pattern = pattern 386 this.source = source 387 exit: 388 this.setOwnStr("lastIndex", intToValue(0), true) 389 return call.This 390 } 391 392 panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 393 } 394 395 func (r *Runtime) regexpproto_exec(call FunctionCall) Value { 396 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 397 return this.exec(call.Argument(0).toString()) 398 } else { 399 r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})) 400 return nil 401 } 402 } 403 404 func (r *Runtime) regexpproto_test(call FunctionCall) Value { 405 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 406 if this.test(call.Argument(0).toString()) { 407 return valueTrue 408 } else { 409 return valueFalse 410 } 411 } else { 412 panic(r.NewTypeError("Method RegExp.prototype.test called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 413 } 414 } 415 416 func (r *Runtime) regexpproto_toString(call FunctionCall) Value { 417 obj := r.toObject(call.This) 418 if this := r.checkStdRegexp(obj); this != nil { 419 var sb StringBuilder 420 sb.WriteRune('/') 421 if !this.writeEscapedSource(&sb) { 422 sb.WriteString(this.source) 423 } 424 sb.WriteRune('/') 425 if this.pattern.global { 426 sb.WriteRune('g') 427 } 428 if this.pattern.ignoreCase { 429 sb.WriteRune('i') 430 } 431 if this.pattern.multiline { 432 sb.WriteRune('m') 433 } 434 if this.pattern.unicode { 435 sb.WriteRune('u') 436 } 437 if this.pattern.sticky { 438 sb.WriteRune('y') 439 } 440 return sb.String() 441 } 442 pattern := nilSafe(obj.self.getStr("source", nil)).toString() 443 flags := nilSafe(obj.self.getStr("flags", nil)).toString() 444 var sb StringBuilder 445 sb.WriteRune('/') 446 sb.WriteString(pattern) 447 sb.WriteRune('/') 448 sb.WriteString(flags) 449 return sb.String() 450 } 451 452 func (r *regexpObject) writeEscapedSource(sb *StringBuilder) bool { 453 if r.source.Length() == 0 { 454 sb.WriteString(asciiString("(?:)")) 455 return true 456 } 457 pos := 0 458 lastPos := 0 459 rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader()} 460 L: 461 for { 462 c, size, err := rd.ReadRune() 463 if err != nil { 464 break 465 } 466 switch c { 467 case '\\': 468 pos++ 469 _, size, err = rd.ReadRune() 470 if err != nil { 471 break L 472 } 473 case '/', '\u000a', '\u000d', '\u2028', '\u2029': 474 sb.WriteSubstring(r.source, lastPos, pos) 475 sb.WriteRune('\\') 476 switch c { 477 case '\u000a': 478 sb.WriteRune('n') 479 case '\u000d': 480 sb.WriteRune('r') 481 default: 482 sb.WriteRune('u') 483 sb.WriteRune(rune(hex[c>>12])) 484 sb.WriteRune(rune(hex[(c>>8)&0xF])) 485 sb.WriteRune(rune(hex[(c>>4)&0xF])) 486 sb.WriteRune(rune(hex[c&0xF])) 487 } 488 lastPos = pos + size 489 } 490 pos += size 491 } 492 if lastPos > 0 { 493 sb.WriteSubstring(r.source, lastPos, r.source.Length()) 494 return true 495 } 496 return false 497 } 498 499 func (r *Runtime) regexpproto_getSource(call FunctionCall) Value { 500 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 501 var sb StringBuilder 502 if this.writeEscapedSource(&sb) { 503 return sb.String() 504 } 505 return this.source 506 } else if call.This == r.global.RegExpPrototype { 507 return asciiString("(?:)") 508 } else { 509 panic(r.NewTypeError("Method RegExp.prototype.source getter called on incompatible receiver")) 510 } 511 } 512 513 func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value { 514 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 515 if this.pattern.global { 516 return valueTrue 517 } else { 518 return valueFalse 519 } 520 } else if call.This == r.global.RegExpPrototype { 521 return _undefined 522 } else { 523 panic(r.NewTypeError("Method RegExp.prototype.global getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 524 } 525 } 526 527 func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value { 528 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 529 if this.pattern.multiline { 530 return valueTrue 531 } else { 532 return valueFalse 533 } 534 } else if call.This == r.global.RegExpPrototype { 535 return _undefined 536 } else { 537 panic(r.NewTypeError("Method RegExp.prototype.multiline getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 538 } 539 } 540 541 func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value { 542 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 543 if this.pattern.ignoreCase { 544 return valueTrue 545 } else { 546 return valueFalse 547 } 548 } else if call.This == r.global.RegExpPrototype { 549 return _undefined 550 } else { 551 panic(r.NewTypeError("Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 552 } 553 } 554 555 func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value { 556 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 557 if this.pattern.unicode { 558 return valueTrue 559 } else { 560 return valueFalse 561 } 562 } else if call.This == r.global.RegExpPrototype { 563 return _undefined 564 } else { 565 panic(r.NewTypeError("Method RegExp.prototype.unicode getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 566 } 567 } 568 569 func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value { 570 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 571 if this.pattern.sticky { 572 return valueTrue 573 } else { 574 return valueFalse 575 } 576 } else if call.This == r.global.RegExpPrototype { 577 return _undefined 578 } else { 579 panic(r.NewTypeError("Method RegExp.prototype.sticky getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) 580 } 581 } 582 583 func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value { 584 var global, ignoreCase, multiline, sticky, unicode bool 585 586 thisObj := r.toObject(call.This) 587 size := 0 588 if v := thisObj.self.getStr("global", nil); v != nil { 589 global = v.ToBoolean() 590 if global { 591 size++ 592 } 593 } 594 if v := thisObj.self.getStr("ignoreCase", nil); v != nil { 595 ignoreCase = v.ToBoolean() 596 if ignoreCase { 597 size++ 598 } 599 } 600 if v := thisObj.self.getStr("multiline", nil); v != nil { 601 multiline = v.ToBoolean() 602 if multiline { 603 size++ 604 } 605 } 606 if v := thisObj.self.getStr("sticky", nil); v != nil { 607 sticky = v.ToBoolean() 608 if sticky { 609 size++ 610 } 611 } 612 if v := thisObj.self.getStr("unicode", nil); v != nil { 613 unicode = v.ToBoolean() 614 if unicode { 615 size++ 616 } 617 } 618 619 var sb strings.Builder 620 sb.Grow(size) 621 if global { 622 sb.WriteByte('g') 623 } 624 if ignoreCase { 625 sb.WriteByte('i') 626 } 627 if multiline { 628 sb.WriteByte('m') 629 } 630 if unicode { 631 sb.WriteByte('u') 632 } 633 if sticky { 634 sb.WriteByte('y') 635 } 636 637 return asciiString(sb.String()) 638 } 639 640 func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value { 641 res := execFn(FunctionCall{ 642 This: rxObj, 643 Arguments: []Value{arg}, 644 }) 645 646 if res != _null { 647 if _, ok := res.(*Object); !ok { 648 panic(r.NewTypeError("RegExp exec method returned something other than an Object or null")) 649 } 650 } 651 652 return res 653 } 654 655 func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s String) []Value { 656 fullUnicode := nilSafe(rxObj.self.getStr("unicode", nil)).ToBoolean() 657 rxObj.self.setOwnStr("lastIndex", intToValue(0), true) 658 execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable() 659 if !ok { 660 panic(r.NewTypeError("exec is not a function")) 661 } 662 var a []Value 663 for { 664 res := r.regExpExec(execFn, rxObj, s) 665 if res == _null { 666 break 667 } 668 a = append(a, res) 669 matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString() 670 if matchStr.Length() == 0 { 671 thisIndex := toLength(rxObj.self.getStr("lastIndex", nil)) 672 rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(s, thisIndex, fullUnicode)), true) 673 } 674 } 675 676 return a 677 } 678 679 func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s String) Value { 680 rx := rxObj.self 681 global := rx.getStr("global", nil) 682 if global != nil && global.ToBoolean() { 683 a := r.getGlobalRegexpMatches(rxObj, s) 684 if len(a) == 0 { 685 return _null 686 } 687 ar := make([]Value, 0, len(a)) 688 for _, result := range a { 689 obj := r.toObject(result) 690 matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString() 691 ar = append(ar, matchStr) 692 } 693 return r.newArrayValues(ar) 694 } 695 696 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable() 697 if !ok { 698 panic(r.NewTypeError("exec is not a function")) 699 } 700 701 return r.regExpExec(execFn, rxObj, s) 702 } 703 704 func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject { 705 if deoptimiseRegexp { 706 return nil 707 } 708 709 rx, ok := rxObj.self.(*regexpObject) 710 if !ok { 711 return nil 712 } 713 714 if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto { 715 return nil 716 } 717 718 return rx 719 } 720 721 func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value { 722 thisObj := r.toObject(call.This) 723 s := call.Argument(0).toString() 724 rx := r.checkStdRegexp(thisObj) 725 if rx == nil { 726 return r.regexpproto_stdMatcherGeneric(thisObj, s) 727 } 728 if rx.pattern.global { 729 res := rx.pattern.findAllSubmatchIndex(s, 0, -1, rx.pattern.sticky) 730 if len(res) == 0 { 731 rx.setOwnStr("lastIndex", intToValue(0), true) 732 return _null 733 } 734 a := make([]Value, 0, len(res)) 735 for _, result := range res { 736 a = append(a, s.Substring(result[0], result[1])) 737 } 738 rx.setOwnStr("lastIndex", intToValue(int64(res[len(res)-1][1])), true) 739 return r.newArrayValues(a) 740 } else { 741 return rx.exec(s) 742 } 743 } 744 745 func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg String) Value { 746 rx := rxObj.self 747 previousLastIndex := nilSafe(rx.getStr("lastIndex", nil)) 748 zero := intToValue(0) 749 if !previousLastIndex.SameAs(zero) { 750 rx.setOwnStr("lastIndex", zero, true) 751 } 752 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable() 753 if !ok { 754 panic(r.NewTypeError("exec is not a function")) 755 } 756 757 result := r.regExpExec(execFn, rxObj, arg) 758 currentLastIndex := nilSafe(rx.getStr("lastIndex", nil)) 759 if !currentLastIndex.SameAs(previousLastIndex) { 760 rx.setOwnStr("lastIndex", previousLastIndex, true) 761 } 762 763 if result == _null { 764 return intToValue(-1) 765 } 766 767 return r.toObject(result).self.getStr("index", nil) 768 } 769 770 func (r *Runtime) regexpproto_stdMatcherAll(call FunctionCall) Value { 771 thisObj := r.toObject(call.This) 772 s := call.Argument(0).toString() 773 flags := nilSafe(thisObj.self.getStr("flags", nil)).toString() 774 c := r.speciesConstructorObj(call.This.(*Object), r.getRegExp()) 775 matcher := r.toConstructor(c)([]Value{call.This, flags}, nil) 776 matcher.self.setOwnStr("lastIndex", valueInt(toLength(thisObj.self.getStr("lastIndex", nil))), true) 777 flagsStr := flags.String() 778 global := strings.Contains(flagsStr, "g") 779 fullUnicode := strings.Contains(flagsStr, "u") 780 return r.createRegExpStringIterator(matcher, s, global, fullUnicode) 781 } 782 783 func (r *Runtime) createRegExpStringIterator(matcher *Object, s String, global, fullUnicode bool) Value { 784 o := &Object{runtime: r} 785 786 ri := ®ExpStringIterObject{ 787 matcher: matcher, 788 s: s, 789 global: global, 790 fullUnicode: fullUnicode, 791 } 792 ri.class = classObject 793 ri.val = o 794 ri.extensible = true 795 o.self = ri 796 ri.prototype = r.getRegExpStringIteratorPrototype() 797 ri.init() 798 799 return o 800 } 801 802 type regExpStringIterObject struct { 803 baseObject 804 matcher *Object 805 s String 806 global, fullUnicode, done bool 807 } 808 809 // RegExpExec as defined in 21.2.5.2.1 810 func regExpExec(r *Object, s String) Value { 811 exec := r.self.getStr("exec", nil) 812 if execObject, ok := exec.(*Object); ok { 813 if execFn, ok := execObject.self.assertCallable(); ok { 814 return r.runtime.regExpExec(execFn, r, s) 815 } 816 } 817 if rx, ok := r.self.(*regexpObject); ok { 818 return rx.exec(s) 819 } 820 panic(r.runtime.NewTypeError("no RegExpMatcher internal slot")) 821 } 822 823 func (ri *regExpStringIterObject) next() (v Value) { 824 if ri.done { 825 return ri.val.runtime.createIterResultObject(_undefined, true) 826 } 827 828 match := regExpExec(ri.matcher, ri.s) 829 if IsNull(match) { 830 ri.done = true 831 return ri.val.runtime.createIterResultObject(_undefined, true) 832 } 833 if !ri.global { 834 ri.done = true 835 return ri.val.runtime.createIterResultObject(match, false) 836 } 837 838 matchStr := nilSafe(ri.val.runtime.toObject(match).self.getIdx(valueInt(0), nil)).toString() 839 if matchStr.Length() == 0 { 840 thisIndex := toLength(ri.matcher.self.getStr("lastIndex", nil)) 841 ri.matcher.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(ri.s, thisIndex, ri.fullUnicode)), true) 842 } 843 return ri.val.runtime.createIterResultObject(match, false) 844 } 845 846 func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value { 847 thisObj := r.toObject(call.This) 848 s := call.Argument(0).toString() 849 rx := r.checkStdRegexp(thisObj) 850 if rx == nil { 851 return r.regexpproto_stdSearchGeneric(thisObj, s) 852 } 853 854 previousLastIndex := rx.getStr("lastIndex", nil) 855 rx.setOwnStr("lastIndex", intToValue(0), true) 856 857 match, result := rx.execRegexp(s) 858 rx.setOwnStr("lastIndex", previousLastIndex, true) 859 860 if !match { 861 return intToValue(-1) 862 } 863 return intToValue(int64(result[0])) 864 } 865 866 func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s String, limit Value, unicodeMatching bool) Value { 867 var a []Value 868 var lim int64 869 if limit == nil || limit == _undefined { 870 lim = maxInt - 1 871 } else { 872 lim = toLength(limit) 873 } 874 if lim == 0 { 875 return r.newArrayValues(a) 876 } 877 size := s.Length() 878 p := 0 879 execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil 880 881 if size == 0 { 882 if r.regExpExec(execFn, splitter, s) == _null { 883 a = append(a, s) 884 } 885 return r.newArrayValues(a) 886 } 887 888 q := p 889 for q < size { 890 splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true) 891 z := r.regExpExec(execFn, splitter, s) 892 if z == _null { 893 q = advanceStringIndex(s, q, unicodeMatching) 894 } else { 895 z := r.toObject(z) 896 e := toLength(splitter.self.getStr("lastIndex", nil)) 897 if e == int64(p) { 898 q = advanceStringIndex(s, q, unicodeMatching) 899 } else { 900 a = append(a, s.Substring(p, q)) 901 if int64(len(a)) == lim { 902 return r.newArrayValues(a) 903 } 904 if e > int64(size) { 905 p = size 906 } else { 907 p = int(e) 908 } 909 numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0) 910 for i := int64(1); i <= numberOfCaptures; i++ { 911 a = append(a, nilSafe(z.self.getIdx(valueInt(i), nil))) 912 if int64(len(a)) == lim { 913 return r.newArrayValues(a) 914 } 915 } 916 q = p 917 } 918 } 919 } 920 a = append(a, s.Substring(p, size)) 921 return r.newArrayValues(a) 922 } 923 924 func advanceStringIndex(s String, pos int, unicode bool) int { 925 next := pos + 1 926 if !unicode { 927 return next 928 } 929 l := s.Length() 930 if next >= l { 931 return next 932 } 933 if !isUTF16FirstSurrogate(s.CharAt(pos)) { 934 return next 935 } 936 if !isUTF16SecondSurrogate(s.CharAt(next)) { 937 return next 938 } 939 return next + 1 940 } 941 942 func advanceStringIndex64(s String, pos int64, unicode bool) int64 { 943 next := pos + 1 944 if !unicode { 945 return next 946 } 947 l := int64(s.Length()) 948 if next >= l { 949 return next 950 } 951 if !isUTF16FirstSurrogate(s.CharAt(int(pos))) { 952 return next 953 } 954 if !isUTF16SecondSurrogate(s.CharAt(int(next))) { 955 return next 956 } 957 return next + 1 958 } 959 960 func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value { 961 rxObj := r.toObject(call.This) 962 s := call.Argument(0).toString() 963 limitValue := call.Argument(1) 964 var splitter *Object 965 search := r.checkStdRegexp(rxObj) 966 c := r.speciesConstructorObj(rxObj, r.getRegExp()) 967 if search == nil || c != r.global.RegExp { 968 flags := nilSafe(rxObj.self.getStr("flags", nil)).toString() 969 flagsStr := flags.String() 970 971 // Add 'y' flag if missing 972 if !strings.Contains(flagsStr, "y") { 973 flags = flags.Concat(asciiString("y")) 974 } 975 splitter = r.toConstructor(c)([]Value{rxObj, flags}, nil) 976 search = r.checkStdRegexp(splitter) 977 if search == nil { 978 return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u")) 979 } 980 } 981 982 pattern := search.pattern // toUint32() may recompile the pattern, but we still need to use the original 983 limit := -1 984 if limitValue != _undefined { 985 limit = int(toUint32(limitValue)) 986 } 987 988 if limit == 0 { 989 return r.newArrayValues(nil) 990 } 991 992 targetLength := s.Length() 993 var valueArray []Value 994 lastIndex := 0 995 found := 0 996 997 result := pattern.findAllSubmatchIndex(s, 0, -1, false) 998 if targetLength == 0 { 999 if result == nil { 1000 valueArray = append(valueArray, s) 1001 } 1002 goto RETURN 1003 } 1004 1005 for _, match := range result { 1006 if match[0] == match[1] { 1007 // FIXME Ugh, this is a hack 1008 if match[0] == 0 || match[0] == targetLength { 1009 continue 1010 } 1011 } 1012 1013 if lastIndex != match[0] { 1014 valueArray = append(valueArray, s.Substring(lastIndex, match[0])) 1015 found++ 1016 } else if lastIndex == match[0] { 1017 if lastIndex != -1 { 1018 valueArray = append(valueArray, stringEmpty) 1019 found++ 1020 } 1021 } 1022 1023 lastIndex = match[1] 1024 if found == limit { 1025 goto RETURN 1026 } 1027 1028 captureCount := len(match) / 2 1029 for index := 1; index < captureCount; index++ { 1030 offset := index * 2 1031 var value Value 1032 if match[offset] != -1 { 1033 value = s.Substring(match[offset], match[offset+1]) 1034 } else { 1035 value = _undefined 1036 } 1037 valueArray = append(valueArray, value) 1038 found++ 1039 if found == limit { 1040 goto RETURN 1041 } 1042 } 1043 } 1044 1045 if found != limit { 1046 if lastIndex != targetLength { 1047 valueArray = append(valueArray, s.Substring(lastIndex, targetLength)) 1048 } else { 1049 valueArray = append(valueArray, stringEmpty) 1050 } 1051 } 1052 1053 RETURN: 1054 return r.newArrayValues(valueArray) 1055 } 1056 1057 func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr String, rcall func(FunctionCall) Value) Value { 1058 var results []Value 1059 if nilSafe(rxObj.self.getStr("global", nil)).ToBoolean() { 1060 results = r.getGlobalRegexpMatches(rxObj, s) 1061 } else { 1062 execFn := toMethod(rxObj.self.getStr("exec", nil)) // must be non-nil 1063 result := r.regExpExec(execFn, rxObj, s) 1064 if result != _null { 1065 results = append(results, result) 1066 } 1067 } 1068 lengthS := s.Length() 1069 nextSourcePosition := 0 1070 var resultBuf StringBuilder 1071 for _, result := range results { 1072 obj := r.toObject(result) 1073 nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0) 1074 matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString() 1075 matchLength := matched.Length() 1076 position := toIntStrict(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0)) 1077 var captures []Value 1078 if rcall != nil { 1079 captures = make([]Value, 0, nCaptures+3) 1080 } else { 1081 captures = make([]Value, 0, nCaptures+1) 1082 } 1083 captures = append(captures, matched) 1084 for n := int64(1); n <= nCaptures; n++ { 1085 capN := nilSafe(obj.self.getIdx(valueInt(n), nil)) 1086 if capN != _undefined { 1087 capN = capN.ToString() 1088 } 1089 captures = append(captures, capN) 1090 } 1091 var replacement String 1092 if rcall != nil { 1093 captures = append(captures, intToValue(int64(position)), s) 1094 replacement = rcall(FunctionCall{ 1095 This: _undefined, 1096 Arguments: captures, 1097 }).toString() 1098 if position >= nextSourcePosition { 1099 resultBuf.WriteString(s.Substring(nextSourcePosition, position)) 1100 resultBuf.WriteString(replacement) 1101 nextSourcePosition = position + matchLength 1102 } 1103 } else { 1104 if position >= nextSourcePosition { 1105 resultBuf.WriteString(s.Substring(nextSourcePosition, position)) 1106 writeSubstitution(s, position, len(captures), func(idx int) String { 1107 capture := captures[idx] 1108 if capture != _undefined { 1109 return capture.toString() 1110 } 1111 return stringEmpty 1112 }, replaceStr, &resultBuf) 1113 nextSourcePosition = position + matchLength 1114 } 1115 } 1116 } 1117 if nextSourcePosition < lengthS { 1118 resultBuf.WriteString(s.Substring(nextSourcePosition, lengthS)) 1119 } 1120 return resultBuf.String() 1121 } 1122 1123 func writeSubstitution(s String, position int, numCaptures int, getCapture func(int) String, replaceStr String, buf *StringBuilder) { 1124 l := s.Length() 1125 rl := replaceStr.Length() 1126 matched := getCapture(0) 1127 tailPos := position + matched.Length() 1128 1129 for i := 0; i < rl; i++ { 1130 c := replaceStr.CharAt(i) 1131 if c == '$' && i < rl-1 { 1132 ch := replaceStr.CharAt(i + 1) 1133 switch ch { 1134 case '$': 1135 buf.WriteRune('$') 1136 case '`': 1137 buf.WriteString(s.Substring(0, position)) 1138 case '\'': 1139 if tailPos < l { 1140 buf.WriteString(s.Substring(tailPos, l)) 1141 } 1142 case '&': 1143 buf.WriteString(matched) 1144 default: 1145 matchNumber := 0 1146 j := i + 1 1147 for j < rl { 1148 ch := replaceStr.CharAt(j) 1149 if ch >= '0' && ch <= '9' { 1150 m := matchNumber*10 + int(ch-'0') 1151 if m >= numCaptures { 1152 break 1153 } 1154 matchNumber = m 1155 j++ 1156 } else { 1157 break 1158 } 1159 } 1160 if matchNumber > 0 { 1161 buf.WriteString(getCapture(matchNumber)) 1162 i = j - 1 1163 continue 1164 } else { 1165 buf.WriteRune('$') 1166 buf.WriteRune(rune(ch)) 1167 } 1168 } 1169 i++ 1170 } else { 1171 buf.WriteRune(rune(c)) 1172 } 1173 } 1174 } 1175 1176 func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value { 1177 rxObj := r.toObject(call.This) 1178 s := call.Argument(0).toString() 1179 replaceStr, rcall := getReplaceValue(call.Argument(1)) 1180 1181 rx := r.checkStdRegexp(rxObj) 1182 if rx == nil { 1183 return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall) 1184 } 1185 1186 var index int64 1187 find := 1 1188 if rx.pattern.global { 1189 find = -1 1190 rx.setOwnStr("lastIndex", intToValue(0), true) 1191 } else { 1192 index = rx.getLastIndex() 1193 } 1194 found := rx.pattern.findAllSubmatchIndex(s, toIntStrict(index), find, rx.pattern.sticky) 1195 if len(found) > 0 { 1196 if !rx.updateLastIndex(index, found[0], found[len(found)-1]) { 1197 found = nil 1198 } 1199 } else { 1200 rx.updateLastIndex(index, nil, nil) 1201 } 1202 1203 return stringReplace(s, found, replaceStr, rcall) 1204 } 1205 1206 func (r *Runtime) regExpStringIteratorProto_next(call FunctionCall) Value { 1207 thisObj := r.toObject(call.This) 1208 if iter, ok := thisObj.self.(*regExpStringIterObject); ok { 1209 return iter.next() 1210 } 1211 panic(r.NewTypeError("Method RegExp String Iterator.prototype.next called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: thisObj}))) 1212 } 1213 1214 func (r *Runtime) createRegExpStringIteratorPrototype(val *Object) objectImpl { 1215 o := newBaseObjectObj(val, r.getIteratorPrototype(), classObject) 1216 1217 o._putProp("next", r.newNativeFunc(r.regExpStringIteratorProto_next, "next", 0), true, false, true) 1218 o._putSym(SymToStringTag, valueProp(asciiString(classRegExpStringIterator), false, false, true)) 1219 1220 return o 1221 } 1222 1223 func (r *Runtime) getRegExpStringIteratorPrototype() *Object { 1224 var o *Object 1225 if o = r.global.RegExpStringIteratorPrototype; o == nil { 1226 o = &Object{runtime: r} 1227 r.global.RegExpStringIteratorPrototype = o 1228 o.self = r.createRegExpStringIteratorPrototype(o) 1229 } 1230 return o 1231 } 1232 1233 func (r *Runtime) getRegExp() *Object { 1234 ret := r.global.RegExp 1235 if ret == nil { 1236 ret = &Object{runtime: r} 1237 r.global.RegExp = ret 1238 proto := r.getRegExpPrototype() 1239 r.newNativeFuncAndConstruct(ret, r.builtin_RegExp, 1240 r.wrapNativeConstruct(r.builtin_newRegExp, ret, proto), proto, "RegExp", intToValue(2)) 1241 rx := ret.self 1242 r.putSpeciesReturnThis(rx) 1243 } 1244 return ret 1245 } 1246 1247 func (r *Runtime) getRegExpPrototype() *Object { 1248 ret := r.global.RegExpPrototype 1249 if ret == nil { 1250 o := r.newGuardedObject(r.global.ObjectPrototype, classObject) 1251 ret = o.val 1252 r.global.RegExpPrototype = ret 1253 r.global.stdRegexpProto = o 1254 1255 o._putProp("constructor", r.getRegExp(), true, false, true) 1256 o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, "compile", 2), true, false, true) 1257 o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, "exec", 1), true, false, true) 1258 o._putProp("test", r.newNativeFunc(r.regexpproto_test, "test", 1), true, false, true) 1259 o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, "toString", 0), true, false, true) 1260 o.setOwnStr("source", &valueProperty{ 1261 configurable: true, 1262 getterFunc: r.newNativeFunc(r.regexpproto_getSource, "get source", 0), 1263 accessor: true, 1264 }, false) 1265 o.setOwnStr("global", &valueProperty{ 1266 configurable: true, 1267 getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, "get global", 0), 1268 accessor: true, 1269 }, false) 1270 o.setOwnStr("multiline", &valueProperty{ 1271 configurable: true, 1272 getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, "get multiline", 0), 1273 accessor: true, 1274 }, false) 1275 o.setOwnStr("ignoreCase", &valueProperty{ 1276 configurable: true, 1277 getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, "get ignoreCase", 0), 1278 accessor: true, 1279 }, false) 1280 o.setOwnStr("unicode", &valueProperty{ 1281 configurable: true, 1282 getterFunc: r.newNativeFunc(r.regexpproto_getUnicode, "get unicode", 0), 1283 accessor: true, 1284 }, false) 1285 o.setOwnStr("sticky", &valueProperty{ 1286 configurable: true, 1287 getterFunc: r.newNativeFunc(r.regexpproto_getSticky, "get sticky", 0), 1288 accessor: true, 1289 }, false) 1290 o.setOwnStr("flags", &valueProperty{ 1291 configurable: true, 1292 getterFunc: r.newNativeFunc(r.regexpproto_getFlags, "get flags", 0), 1293 accessor: true, 1294 }, false) 1295 1296 o._putSym(SymMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, "[Symbol.match]", 1), true, false, true)) 1297 o._putSym(SymMatchAll, valueProp(r.newNativeFunc(r.regexpproto_stdMatcherAll, "[Symbol.matchAll]", 1), true, false, true)) 1298 o._putSym(SymSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, "[Symbol.search]", 1), true, false, true)) 1299 o._putSym(SymSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, "[Symbol.split]", 2), true, false, true)) 1300 o._putSym(SymReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, "[Symbol.replace]", 2), true, false, true)) 1301 o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky") 1302 } 1303 return ret 1304 }