github.com/pygolin/runtime@v0.0.0-20201208210830-a62e3cd39798/str.go (about) 1 // Copyright 2016 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package runtime 16 17 import ( 18 "bytes" 19 "fmt" 20 "reflect" 21 "regexp" 22 "strconv" 23 "strings" 24 "sync/atomic" 25 "unicode" 26 "unicode/utf8" 27 "unsafe" 28 ) 29 30 var ( 31 // StrType is the object representing the Python 'str' type. 32 StrType = newBasisType("str", reflect.TypeOf(Str{}), toStrUnsafe, BaseStringType) 33 whitespaceSplitRegexp = regexp.MustCompile(`\s+`) 34 strASCIISpaces = []byte(" \t\n\v\f\r") 35 strInterpolationRegexp = regexp.MustCompile(`^%(\(([^)]+)\))?([#0 +-]?)(\*|[0-9]+)?(\.(\*|[0-9]+))?[hlL]?([diouxXeEfFgGcrs%])`) 36 internedStrs = map[string]*Str{} 37 caseOffset = byte('a' - 'A') 38 39 internedName = NewStr("__name__") 40 ) 41 42 type stripSide int 43 44 const ( 45 stripSideLeft stripSide = iota 46 stripSideRight 47 stripSideBoth 48 ) 49 50 // InternStr adds s to the interned string map. Subsequent calls to NewStr() 51 // will return the same underlying Str. InternStr is not thread safe and should 52 // only be called during module initialization time. 53 func InternStr(s string) *Str { 54 str, _ := internedStrs[s] 55 if str == nil { 56 str = &Str{Object: Object{typ: StrType}, value: s, hash: NewInt(hashString(s))} 57 internedStrs[s] = str 58 } 59 return str 60 } 61 62 // Str represents Python 'str' objects. 63 type Str struct { 64 Object 65 value string 66 hash *Int 67 } 68 69 // NewStr returns a new Str holding the given string value. 70 func NewStr(value string) *Str { 71 if s := internedStrs[value]; s != nil { 72 return s 73 } 74 return &Str{Object: Object{typ: StrType}, value: value} 75 } 76 77 func toStrUnsafe(o *Object) *Str { 78 return (*Str)(o.toPointer()) 79 } 80 81 // Decode produces a unicode object from the bytes of s assuming they have the 82 // given encoding. Invalid code points are resolved using a strategy given by 83 // errors: "ignore" will bypass them, "replace" will substitute the Unicode 84 // replacement character (U+FFFD) and "strict" will raise UnicodeDecodeError. 85 // 86 // NOTE: Decoding UTF-8 data containing surrogates (e.g. U+D800 encoded as 87 // '\xed\xa0\x80') will raise UnicodeDecodeError consistent with CPython 3.x 88 // but different than 2.x. 89 func (s *Str) Decode(f *Frame, encoding, errors string) (*Unicode, *BaseException) { 90 // TODO: Support custom encodings and error handlers. 91 normalized := normalizeEncoding(encoding) 92 if normalized != "utf8" { 93 return nil, f.RaiseType(LookupErrorType, fmt.Sprintf("unknown encoding: %s", encoding)) 94 } 95 var runes []rune 96 for pos, r := range s.Value() { 97 switch { 98 case r != utf8.RuneError: 99 runes = append(runes, r) 100 case errors == EncodeIgnore: 101 // Do nothing 102 case errors == EncodeReplace: 103 runes = append(runes, unicode.ReplacementChar) 104 case errors == EncodeStrict: 105 format := "'%s' codec can't decode byte 0x%02x in position %d" 106 return nil, f.RaiseType(UnicodeDecodeErrorType, fmt.Sprintf(format, encoding, int(s.Value()[pos]), pos)) 107 default: 108 format := "unknown error handler name '%s'" 109 return nil, f.RaiseType(LookupErrorType, fmt.Sprintf(format, errors)) 110 } 111 } 112 return NewUnicodeFromRunes(runes), nil 113 } 114 115 // ToObject upcasts s to an Object. 116 func (s *Str) ToObject() *Object { 117 return &s.Object 118 } 119 120 // Value returns the underlying string value held by s. 121 func (s *Str) Value() string { 122 return s.value 123 } 124 125 func hashString(s string) int { 126 l := len(s) 127 if l == 0 { 128 return 0 129 } 130 h := int(s[0]) << 7 131 for i := 0; i < l; i++ { 132 h = (1000003 * h) ^ int(s[i]) 133 } 134 h ^= l 135 if h == -1 { 136 h = -2 137 } 138 return h 139 } 140 141 func strAdd(f *Frame, v, w *Object) (*Object, *BaseException) { 142 if w.isInstance(UnicodeType) { 143 // CPython explicitly dispatches to unicode here so that's how 144 // we do it even though it would seem more natural to override 145 // unicode.__radd__. 146 ret, raised := toStrUnsafe(v).Decode(f, EncodeDefault, EncodeStrict) 147 if raised != nil { 148 return nil, raised 149 } 150 return unicodeAdd(f, ret.ToObject(), w) 151 } 152 if !w.isInstance(StrType) { 153 return NotImplemented, nil 154 } 155 stringV, stringW := toStrUnsafe(v).Value(), toStrUnsafe(w).Value() 156 if len(stringV)+len(stringW) < 0 { 157 // This indicates an int overflow. 158 return nil, f.RaiseType(OverflowErrorType, errResultTooLarge) 159 } 160 return NewStr(stringV + stringW).ToObject(), nil 161 } 162 163 func strCapitalize(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 164 if raised := checkMethodArgs(f, "capitalize", args, StrType); raised != nil { 165 return nil, raised 166 } 167 s := toStrUnsafe(args[0]).Value() 168 numBytes := len(s) 169 if numBytes == 0 { 170 return args[0], nil 171 } 172 b := make([]byte, numBytes) 173 b[0] = toUpper(s[0]) 174 for i := 1; i < numBytes; i++ { 175 b[i] = toLower(s[i]) 176 } 177 return NewStr(string(b)).ToObject(), nil 178 } 179 180 func strCenter(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 181 s, width, fill, raised := strJustDecodeArgs(f, args, "center") 182 if raised != nil { 183 return nil, raised 184 } 185 if len(s) >= width { 186 return NewStr(s).ToObject(), nil 187 } 188 marg := width - len(s) 189 left := marg/2 + (marg & width & 1) 190 return NewStr(pad(s, left, marg-left, fill)).ToObject(), nil 191 } 192 193 func strContains(f *Frame, o *Object, value *Object) (*Object, *BaseException) { 194 if value.isInstance(UnicodeType) { 195 decoded, raised := toStrUnsafe(o).Decode(f, EncodeDefault, EncodeStrict) 196 if raised != nil { 197 return nil, raised 198 } 199 return unicodeContains(f, decoded.ToObject(), value) 200 } 201 if !value.isInstance(StrType) { 202 format := "'in <string>' requires string as left operand, not %s" 203 return nil, f.RaiseType(TypeErrorType, fmt.Sprintf(format, value.typ.Name())) 204 } 205 return GetBool(strings.Contains(toStrUnsafe(o).Value(), toStrUnsafe(value).Value())).ToObject(), nil 206 } 207 208 func strCount(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 209 if raised := checkMethodArgs(f, "count", args, StrType, ObjectType); raised != nil { 210 return nil, raised 211 } 212 s := toStrUnsafe(args[0]).Value() 213 sep := toStrUnsafe(args[1]).Value() 214 cnt := strings.Count(s, sep) 215 return NewInt(cnt).ToObject(), nil 216 } 217 218 func strDecode(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 219 // TODO: Accept unicode for encoding and errors args. 220 expectedTypes := []*Type{StrType, StrType, StrType} 221 argc := len(args) 222 if argc >= 1 && argc < 3 { 223 expectedTypes = expectedTypes[:argc] 224 } 225 if raised := checkMethodArgs(f, "decode", args, expectedTypes...); raised != nil { 226 return nil, raised 227 } 228 encoding := EncodeDefault 229 if argc > 1 { 230 encoding = toStrUnsafe(args[1]).Value() 231 } 232 errors := EncodeStrict 233 if argc > 2 { 234 errors = toStrUnsafe(args[2]).Value() 235 } 236 s, raised := toStrUnsafe(args[0]).Decode(f, encoding, errors) 237 if raised != nil { 238 return nil, raised 239 } 240 return s.ToObject(), nil 241 } 242 243 func strEndsWith(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 244 return strStartsEndsWith(f, "endswith", args) 245 } 246 247 func strEq(f *Frame, v, w *Object) (*Object, *BaseException) { 248 return strCompare(v, w, False, True, False), nil 249 } 250 251 // strFind returns the lowest index in s where the substring sub is found such 252 // that sub is wholly contained in s[start:end]. Return -1 on failure. 253 func strFind(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 254 return strFindOrIndex(f, args, func(s, sub string) (int, *BaseException) { 255 return strings.Index(s, sub), nil 256 }) 257 } 258 259 func strGE(f *Frame, v, w *Object) (*Object, *BaseException) { 260 return strCompare(v, w, False, True, True), nil 261 } 262 263 // strGetItem returns a slice of string depending on whether index is an integer 264 // or a slice. If index is neither of those types then a TypeError is returned. 265 func strGetItem(f *Frame, o, key *Object) (*Object, *BaseException) { 266 s := toStrUnsafe(o).Value() 267 switch { 268 case key.typ.slots.Index != nil: 269 index, raised := IndexInt(f, key) 270 if raised != nil { 271 return nil, raised 272 } 273 index, raised = seqCheckedIndex(f, len(s), index) 274 if raised != nil { 275 return nil, raised 276 } 277 return NewStr(s[index : index+1]).ToObject(), nil 278 case key.isInstance(SliceType): 279 slice := toSliceUnsafe(key) 280 start, stop, step, sliceLen, raised := slice.calcSlice(f, len(s)) 281 if raised != nil { 282 return nil, raised 283 } 284 if step == 1 { 285 return NewStr(s[start:stop]).ToObject(), nil 286 } 287 result := make([]byte, 0, sliceLen) 288 for j := start; j != stop; j += step { 289 result = append(result, s[j]) 290 } 291 return NewStr(string(result)).ToObject(), nil 292 } 293 return nil, f.RaiseType(TypeErrorType, fmt.Sprintf("string indices must be integers or slice, not %s", key.typ.Name())) 294 } 295 296 func strGetNewArgs(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 297 if raised := checkMethodArgs(f, "__getnewargs__", args, StrType); raised != nil { 298 return nil, raised 299 } 300 return NewTuple1(args[0]).ToObject(), nil 301 } 302 303 func strGT(f *Frame, v, w *Object) (*Object, *BaseException) { 304 return strCompare(v, w, False, False, True), nil 305 } 306 307 func strHash(f *Frame, o *Object) (*Object, *BaseException) { 308 s := toStrUnsafe(o) 309 p := (*unsafe.Pointer)(unsafe.Pointer(&s.hash)) 310 if v := atomic.LoadPointer(p); v != unsafe.Pointer(nil) { 311 return (*Int)(v).ToObject(), nil 312 } 313 h := NewInt(hashString(toStrUnsafe(o).Value())) 314 atomic.StorePointer(p, unsafe.Pointer(h)) 315 return h.ToObject(), nil 316 } 317 318 func strIndex(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 319 return strFindOrIndex(f, args, func(s, sub string) (i int, raised *BaseException) { 320 i = strings.Index(s, sub) 321 if i == -1 { 322 raised = f.RaiseType(ValueErrorType, "substring not found") 323 } 324 return i, raised 325 }) 326 } 327 328 func strIsAlNum(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 329 if raised := checkMethodArgs(f, "isalnum", args, StrType); raised != nil { 330 return nil, raised 331 } 332 s := toStrUnsafe(args[0]).Value() 333 if len(s) == 0 { 334 return False.ToObject(), nil 335 } 336 for i := range s { 337 if !isAlNum(s[i]) { 338 return False.ToObject(), nil 339 } 340 } 341 return True.ToObject(), nil 342 } 343 344 func strIsAlpha(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 345 if raised := checkMethodArgs(f, "isalpha", args, StrType); raised != nil { 346 return nil, raised 347 } 348 s := toStrUnsafe(args[0]).Value() 349 if len(s) == 0 { 350 return False.ToObject(), nil 351 } 352 for i := range s { 353 if !isAlpha(s[i]) { 354 return False.ToObject(), nil 355 } 356 } 357 return True.ToObject(), nil 358 } 359 360 func strIsDigit(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 361 if raised := checkMethodArgs(f, "isdigit", args, StrType); raised != nil { 362 return nil, raised 363 } 364 s := toStrUnsafe(args[0]).Value() 365 if len(s) == 0 { 366 return False.ToObject(), nil 367 } 368 for i := range s { 369 if !isDigit(s[i]) { 370 return False.ToObject(), nil 371 } 372 } 373 return True.ToObject(), nil 374 } 375 376 func strIsLower(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 377 if raised := checkMethodArgs(f, "islower", args, StrType); raised != nil { 378 return nil, raised 379 } 380 s := toStrUnsafe(args[0]).Value() 381 if len(s) == 0 { 382 return False.ToObject(), nil 383 } 384 for i := range s { 385 if !isLower(s[i]) { 386 return False.ToObject(), nil 387 } 388 } 389 return True.ToObject(), nil 390 } 391 392 func strIsSpace(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 393 if raised := checkMethodArgs(f, "isspace", args, StrType); raised != nil { 394 return nil, raised 395 } 396 s := toStrUnsafe(args[0]).Value() 397 if len(s) == 0 { 398 return False.ToObject(), nil 399 } 400 for i := range s { 401 if !isSpace(s[i]) { 402 return False.ToObject(), nil 403 } 404 } 405 return True.ToObject(), nil 406 } 407 408 func strIsTitle(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 409 if raised := checkMethodArgs(f, "istitle", args, StrType); raised != nil { 410 return nil, raised 411 } 412 413 s := toStrUnsafe(args[0]).Value() 414 if len(s) == 0 { 415 return False.ToObject(), nil 416 } 417 418 if len(s) == 1 { 419 return GetBool(isUpper(s[0])).ToObject(), nil 420 } 421 422 cased := false 423 previousIsCased := false 424 425 for i := range s { 426 if isUpper(s[i]) { 427 if previousIsCased { 428 return False.ToObject(), nil 429 } 430 previousIsCased = true 431 cased = true 432 } else if isLower(s[i]) { 433 if !previousIsCased { 434 return False.ToObject(), nil 435 } 436 previousIsCased = true 437 cased = true 438 } else { 439 previousIsCased = false 440 } 441 } 442 443 return GetBool(cased).ToObject(), nil 444 } 445 446 func strIsUpper(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 447 if raised := checkMethodArgs(f, "isupper", args, StrType); raised != nil { 448 return nil, raised 449 } 450 s := toStrUnsafe(args[0]).Value() 451 if len(s) == 0 { 452 return False.ToObject(), nil 453 } 454 for i := range s { 455 if !isUpper(s[i]) { 456 return False.ToObject(), nil 457 } 458 } 459 return True.ToObject(), nil 460 } 461 462 func strJoin(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 463 if raised := checkMethodArgs(f, "join", args, StrType, ObjectType); raised != nil { 464 return nil, raised 465 } 466 sep := toStrUnsafe(args[0]).Value() 467 var result *Object 468 raised := seqApply(f, args[1], func(parts []*Object, _ bool) *BaseException { 469 numParts := len(parts) 470 if numParts == 0 { 471 result = NewStr("").ToObject() 472 return nil 473 } 474 // Calculate the size of the required buffer. 475 numChars := (numParts - 1) * len(sep) 476 for i, part := range parts { 477 if part.isInstance(StrType) { 478 numChars += len(toStrUnsafe(part).Value()) 479 } else if part.isInstance(UnicodeType) { 480 // Some element was unicode so use the unicode 481 // implementation. 482 var raised *BaseException 483 s, raised := unicodeCoerce(f, args[0]) 484 if raised != nil { 485 return raised 486 } 487 result, raised = unicodeJoinParts(f, s, parts) 488 return raised 489 } else { 490 format := "sequence item %d: expected string, %s found" 491 return f.RaiseType(TypeErrorType, fmt.Sprintf(format, i, part.typ.Name())) 492 } 493 } 494 // Piece together the result string into buf. 495 buf := bytes.Buffer{} 496 buf.Grow(numChars) 497 for i, part := range parts { 498 if i > 0 { 499 buf.WriteString(sep) 500 } 501 buf.WriteString(toStrUnsafe(part).Value()) 502 } 503 result = NewStr(buf.String()).ToObject() 504 return nil 505 }) 506 if raised != nil { 507 return nil, raised 508 } 509 return result, nil 510 } 511 512 func strLE(f *Frame, v, w *Object) (*Object, *BaseException) { 513 return strCompare(v, w, True, True, False), nil 514 } 515 516 func strLen(f *Frame, o *Object) (*Object, *BaseException) { 517 return NewInt(len(toStrUnsafe(o).Value())).ToObject(), nil 518 } 519 520 func strLJust(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 521 s, width, fill, raised := strJustDecodeArgs(f, args, "ljust") 522 if raised != nil { 523 return nil, raised 524 } 525 if len(s) >= width { 526 return NewStr(s).ToObject(), nil 527 } 528 return NewStr(pad(s, 0, width-len(s), fill)).ToObject(), nil 529 } 530 531 func strLower(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 532 expectedTypes := []*Type{StrType} 533 if raised := checkMethodArgs(f, "lower", args, expectedTypes...); raised != nil { 534 return nil, raised 535 } 536 s := toStrUnsafe(args[0]).Value() 537 numBytes := len(s) 538 if numBytes == 0 { 539 return args[0], nil 540 } 541 b := make([]byte, numBytes) 542 for i := 0; i < numBytes; i++ { 543 b[i] = toLower(s[i]) 544 } 545 return NewStr(string(b)).ToObject(), nil 546 } 547 548 func strLStrip(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 549 return strStripImpl(f, args, stripSideLeft) 550 } 551 552 func strLT(f *Frame, v, w *Object) (*Object, *BaseException) { 553 return strCompare(v, w, True, False, False), nil 554 } 555 556 func strMul(f *Frame, v, w *Object) (*Object, *BaseException) { 557 s := toStrUnsafe(v).Value() 558 n, ok, raised := strRepeatCount(f, len(s), w) 559 if raised != nil { 560 return nil, raised 561 } 562 if !ok { 563 return NotImplemented, nil 564 } 565 return NewStr(strings.Repeat(s, n)).ToObject(), nil 566 } 567 568 func strNative(f *Frame, o *Object) (reflect.Value, *BaseException) { 569 return reflect.ValueOf(toStrUnsafe(o).Value()), nil 570 } 571 572 func strNE(f *Frame, v, w *Object) (*Object, *BaseException) { 573 return strCompare(v, w, True, False, True), nil 574 } 575 576 func strNew(f *Frame, t *Type, args Args, _ KWArgs) (*Object, *BaseException) { 577 if t != StrType { 578 // Allocate a plain str and then copy it's value into an object 579 // of the str subtype. 580 s, raised := strNew(f, StrType, args, nil) 581 if raised != nil { 582 return nil, raised 583 } 584 result := toStrUnsafe(newObject(t)) 585 result.value = toStrUnsafe(s).Value() 586 return result.ToObject(), nil 587 } 588 argc := len(args) 589 if argc == 0 { 590 // Empty string. 591 return newObject(t), nil 592 } 593 if argc != 1 { 594 return nil, f.RaiseType(TypeErrorType, fmt.Sprintf("str() takes at most 1 argument (%d given)", argc)) 595 } 596 o := args[0] 597 if str := o.typ.slots.Str; str != nil { 598 result, raised := str.Fn(f, o) 599 if raised != nil { 600 return nil, raised 601 } 602 if !result.isInstance(StrType) { 603 format := "__str__ returned non-string (type %s)" 604 return nil, f.RaiseType(TypeErrorType, fmt.Sprintf(format, result.typ.Name())) 605 } 606 return result, nil 607 } 608 s, raised := Repr(f, o) 609 if raised != nil { 610 return nil, raised 611 } 612 return s.ToObject(), nil 613 } 614 615 // strPartition splits the string at the first occurrence of sep, and 616 // return a 3-tuple containing the part before the separator, the separator 617 // itself, and the part after the separator. If the separator is not found, 618 // return a 3-tuple containing the string itself, followed by two empty strings. 619 func strPartition(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 620 if raised := checkMethodArgs(f, "partition", args, StrType, StrType); raised != nil { 621 return nil, raised 622 } 623 sep := toStrUnsafe(args[1]).Value() 624 if sep == "" { 625 return nil, f.RaiseType(ValueErrorType, "empty separator") 626 } 627 s := toStrUnsafe(args[0]).Value() 628 pos := strings.Index(s, sep) 629 if pos < 0 { 630 emptyStr := NewStr("").ToObject() 631 return NewTuple(args[0], emptyStr, emptyStr).ToObject(), nil 632 } 633 start := NewStr(s[0:pos]).ToObject() 634 end := NewStr(s[pos+len(sep):]).ToObject() 635 return NewTuple(start, args[1], end).ToObject(), nil 636 } 637 638 // strPartition splits the string at the last occurrence of sep, and 639 // return a 3-tuple containing the part before the separator, the separator 640 // itself, and the part after the separator. If the separator is not found, 641 // return a 3-tuple containing two empty strings, followed by the string itself. 642 func strRPartition(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 643 if raised := checkMethodArgs(f, "rpartition", args, StrType, StrType); raised != nil { 644 return nil, raised 645 } 646 sep := toStrUnsafe(args[1]).Value() 647 if sep == "" { 648 return nil, f.RaiseType(ValueErrorType, "empty separator") 649 } 650 s := toStrUnsafe(args[0]).Value() 651 pos := strings.LastIndex(s, sep) 652 if pos < 0 { 653 emptyStr := NewStr("").ToObject() 654 return NewTuple(emptyStr, emptyStr, args[0]).ToObject(), nil 655 } 656 start := NewStr(s[0:pos]).ToObject() 657 end := NewStr(s[pos+len(sep):]).ToObject() 658 return NewTuple(start, args[1], end).ToObject(), nil 659 } 660 661 // strReplace returns a copy of the string s with the first n non-overlapping 662 // instances of old replaced by sub. If old is empty, it matches at the 663 // beginning of the string. If n < 0, there is no limit on the number of 664 // replacements. 665 func strReplace(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 666 var raised *BaseException 667 // TODO: Support unicode replace. 668 expectedTypes := []*Type{StrType, StrType, StrType, ObjectType} 669 argc := len(args) 670 if argc == 3 { 671 expectedTypes = expectedTypes[:argc] 672 } 673 if raised := checkMethodArgs(f, "replace", args, expectedTypes...); raised != nil { 674 return nil, raised 675 } 676 n := -1 677 if argc == 4 { 678 n, raised = ToIntValue(f, args[3]) 679 if raised != nil { 680 return nil, raised 681 } 682 } 683 s := toStrUnsafe(args[0]).Value() 684 // Returns early if no need to replace. 685 if n == 0 { 686 return NewStr(s).ToObject(), nil 687 } 688 689 old := toStrUnsafe(args[1]).Value() 690 sub := toStrUnsafe(args[2]).Value() 691 numBytes := len(s) 692 // Even if s and old is blank, replace should return sub, except n is negative. 693 // This is CPython specific behavior. 694 if numBytes == 0 && old == "" && n >= 0 { 695 return NewStr("").ToObject(), nil 696 } 697 // If old is non-blank, pass to strings.Replace. 698 if len(old) > 0 { 699 return NewStr(strings.Replace(s, old, sub, n)).ToObject(), nil 700 } 701 702 // If old is blank, insert sub after every bytes on s and beginning. 703 if n < 0 { 704 n = numBytes + 1 705 } 706 // Insert sub at beginning. 707 buf := bytes.Buffer{} 708 buf.WriteString(sub) 709 n-- 710 // Insert after every byte. 711 i := 0 712 for n > 0 && i < numBytes { 713 buf.WriteByte(s[i]) 714 buf.WriteString(sub) 715 i++ 716 n-- 717 } 718 // Write the remaining string. 719 if i < numBytes { 720 buf.WriteString(s[i:]) 721 } 722 return NewStr(buf.String()).ToObject(), nil 723 } 724 725 func strRepr(_ *Frame, o *Object) (*Object, *BaseException) { 726 s := toStrUnsafe(o).Value() 727 buf := bytes.Buffer{} 728 buf.WriteRune('\'') 729 numBytes := len(s) 730 for i := 0; i < numBytes; i++ { 731 r := rune(s[i]) 732 if escape, ok := escapeMap[r]; ok { 733 buf.WriteString(escape) 734 } else if r > unicode.MaxASCII || !unicode.IsPrint(r) { 735 buf.WriteString(fmt.Sprintf(`\x%02x`, r)) 736 } else { 737 buf.WriteRune(r) 738 } 739 } 740 buf.WriteRune('\'') 741 return NewStr(buf.String()).ToObject(), nil 742 } 743 744 func strRFind(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 745 return strFindOrIndex(f, args, func(s, sub string) (int, *BaseException) { 746 return strings.LastIndex(s, sub), nil 747 }) 748 } 749 750 func strRIndex(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 751 return strFindOrIndex(f, args, func(s, sub string) (i int, raised *BaseException) { 752 i = strings.LastIndex(s, sub) 753 if i == -1 { 754 raised = f.RaiseType(ValueErrorType, "substring not found") 755 } 756 return i, raised 757 }) 758 } 759 760 func strRJust(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 761 s, width, fill, raised := strJustDecodeArgs(f, args, "rjust") 762 if raised != nil { 763 return nil, raised 764 } 765 if len(s) >= width { 766 return NewStr(s).ToObject(), nil 767 } 768 return NewStr(pad(s, width-len(s), 0, fill)).ToObject(), nil 769 } 770 771 func strSplit(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 772 expectedTypes := []*Type{StrType, ObjectType, IntType} 773 argc := len(args) 774 if argc == 1 || argc == 2 { 775 expectedTypes = expectedTypes[:argc] 776 } 777 if raised := checkMethodArgs(f, "split", args, expectedTypes...); raised != nil { 778 return nil, raised 779 } 780 sep := "" 781 if argc > 1 { 782 if arg1 := args[1]; arg1.isInstance(StrType) { 783 sep = toStrUnsafe(arg1).Value() 784 if sep == "" { 785 return nil, f.RaiseType(ValueErrorType, "empty separator") 786 } 787 } else if arg1 != None { 788 return nil, f.RaiseType(TypeErrorType, "expected a str separator") 789 } 790 } 791 maxSplit := -1 792 if argc > 2 { 793 if i := toIntUnsafe(args[2]).Value(); i >= 0 { 794 maxSplit = i + 1 795 } 796 } 797 s := toStrUnsafe(args[0]).Value() 798 var parts []string 799 if sep == "" { 800 s = strings.TrimLeft(s, string(strASCIISpaces)) 801 parts = whitespaceSplitRegexp.Split(s, maxSplit) 802 l := len(parts) 803 if l > 0 && strings.Trim(parts[l-1], string(strASCIISpaces)) == "" { 804 parts = parts[:l-1] 805 } 806 } else { 807 parts = strings.SplitN(s, sep, maxSplit) 808 } 809 results := make([]*Object, len(parts)) 810 for i, part := range parts { 811 results[i] = NewStr(part).ToObject() 812 } 813 return NewList(results...).ToObject(), nil 814 } 815 816 func strSplitLines(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 817 expectedTypes := []*Type{StrType, ObjectType} 818 argc := len(args) 819 if argc == 1 { 820 expectedTypes = expectedTypes[:1] 821 } 822 if raised := checkMethodArgs(f, "splitlines", args, expectedTypes...); raised != nil { 823 return nil, raised 824 } 825 keepEnds := false 826 if argc == 2 { 827 i, raised := ToIntValue(f, args[1]) 828 if raised != nil { 829 return nil, raised 830 } 831 keepEnds = i != 0 832 } 833 s := toStrUnsafe(args[0]).Value() 834 numChars := len(s) 835 start, end := 0, 0 836 lines := make([]*Object, 0, 2) 837 for start < numChars { 838 eol := 0 839 for end = start; end < numChars; end++ { 840 c := s[end] 841 if c == '\n' { 842 eol = end + 1 843 break 844 } 845 if c == '\r' { 846 eol = end + 1 847 if eol < numChars && s[eol] == '\n' { 848 eol++ 849 } 850 break 851 } 852 } 853 if end >= numChars { 854 eol = end 855 } 856 line := "" 857 if keepEnds { 858 line = s[start:eol] 859 } else { 860 line = s[start:end] 861 } 862 lines = append(lines, NewStr(line).ToObject()) 863 start = eol 864 } 865 return NewList(lines...).ToObject(), nil 866 } 867 868 func strStrip(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 869 return strStripImpl(f, args, stripSideBoth) 870 } 871 872 func strRStrip(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 873 return strStripImpl(f, args, stripSideRight) 874 } 875 876 func strStripImpl(f *Frame, args Args, side stripSide) (*Object, *BaseException) { 877 expectedTypes := []*Type{StrType, ObjectType} 878 argc := len(args) 879 if argc == 1 { 880 expectedTypes = expectedTypes[:argc] 881 } 882 if raised := checkMethodArgs(f, "strip", args, expectedTypes...); raised != nil { 883 return nil, raised 884 } 885 s := toStrUnsafe(args[0]) 886 charsArg := None 887 if argc > 1 { 888 charsArg = args[1] 889 } 890 var chars []byte 891 switch { 892 case charsArg.isInstance(UnicodeType): 893 u, raised := s.Decode(f, EncodeDefault, EncodeStrict) 894 if raised != nil { 895 return nil, raised 896 } 897 return unicodeStrip(f, Args{u.ToObject(), charsArg}, nil) 898 case charsArg.isInstance(StrType): 899 chars = []byte(toStrUnsafe(charsArg).Value()) 900 case charsArg == None: 901 chars = strASCIISpaces 902 default: 903 return nil, f.RaiseType(TypeErrorType, "strip arg must be None, str or unicode") 904 } 905 byteSlice := []byte(s.Value()) 906 numBytes := len(byteSlice) 907 lindex := 0 908 if side == stripSideLeft || side == stripSideBoth { 909 LeftStrip: 910 for ; lindex < numBytes; lindex++ { 911 b := byteSlice[lindex] 912 for _, c := range chars { 913 if b == c { 914 continue LeftStrip 915 } 916 } 917 break 918 } 919 } 920 rindex := numBytes 921 if side == stripSideRight || side == stripSideBoth { 922 RightStrip: 923 for ; rindex > lindex; rindex-- { 924 b := byteSlice[rindex-1] 925 for _, c := range chars { 926 if b == c { 927 continue RightStrip 928 } 929 } 930 break 931 } 932 } 933 return NewStr(string(byteSlice[lindex:rindex])).ToObject(), nil 934 } 935 936 func strStartsWith(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 937 return strStartsEndsWith(f, "startswith", args) 938 } 939 940 func strStr(_ *Frame, o *Object) (*Object, *BaseException) { 941 if o.typ == StrType { 942 return o, nil 943 } 944 return NewStr(toStrUnsafe(o).Value()).ToObject(), nil 945 } 946 947 func strSwapCase(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 948 if raised := checkMethodArgs(f, "swapcase", args, StrType); raised != nil { 949 return nil, raised 950 } 951 s := toStrUnsafe(args[0]).Value() 952 numBytes := len(s) 953 if numBytes == 0 { 954 return args[0], nil 955 } 956 b := make([]byte, numBytes) 957 for i := 0; i < numBytes; i++ { 958 if isLower(s[i]) { 959 b[i] = toUpper(s[i]) 960 } else if isUpper(s[i]) { 961 b[i] = toLower(s[i]) 962 } else { 963 b[i] = s[i] 964 } 965 } 966 return NewStr(string(b)).ToObject(), nil 967 } 968 969 func initStrType(dict map[string]*Object) { 970 dict["__getnewargs__"] = newBuiltinFunction("__getnewargs__", strGetNewArgs).ToObject() 971 dict["capitalize"] = newBuiltinFunction("capitalize", strCapitalize).ToObject() 972 dict["count"] = newBuiltinFunction("count", strCount).ToObject() 973 dict["center"] = newBuiltinFunction("center", strCenter).ToObject() 974 dict["decode"] = newBuiltinFunction("decode", strDecode).ToObject() 975 dict["endswith"] = newBuiltinFunction("endswith", strEndsWith).ToObject() 976 dict["find"] = newBuiltinFunction("find", strFind).ToObject() 977 dict["index"] = newBuiltinFunction("index", strIndex).ToObject() 978 dict["isalnum"] = newBuiltinFunction("isalnum", strIsAlNum).ToObject() 979 dict["isalpha"] = newBuiltinFunction("isalpha", strIsAlpha).ToObject() 980 dict["isdigit"] = newBuiltinFunction("isdigit", strIsDigit).ToObject() 981 dict["islower"] = newBuiltinFunction("islower", strIsLower).ToObject() 982 dict["isspace"] = newBuiltinFunction("isspace", strIsSpace).ToObject() 983 dict["istitle"] = newBuiltinFunction("istitle", strIsTitle).ToObject() 984 dict["isupper"] = newBuiltinFunction("isupper", strIsUpper).ToObject() 985 dict["join"] = newBuiltinFunction("join", strJoin).ToObject() 986 dict["lower"] = newBuiltinFunction("lower", strLower).ToObject() 987 dict["ljust"] = newBuiltinFunction("ljust", strLJust).ToObject() 988 dict["lstrip"] = newBuiltinFunction("lstrip", strLStrip).ToObject() 989 dict["rfind"] = newBuiltinFunction("rfind", strRFind).ToObject() 990 dict["rindex"] = newBuiltinFunction("rindex", strRIndex).ToObject() 991 dict["rjust"] = newBuiltinFunction("rjust", strRJust).ToObject() 992 dict["split"] = newBuiltinFunction("split", strSplit).ToObject() 993 dict["splitlines"] = newBuiltinFunction("splitlines", strSplitLines).ToObject() 994 dict["startswith"] = newBuiltinFunction("startswith", strStartsWith).ToObject() 995 dict["strip"] = newBuiltinFunction("strip", strStrip).ToObject() 996 dict["swapcase"] = newBuiltinFunction("swapcase", strSwapCase).ToObject() 997 dict["partition"] = newBuiltinFunction("partition", strPartition).ToObject() 998 dict["rpartition"] = newBuiltinFunction("rpartition", strRPartition).ToObject() 999 dict["replace"] = newBuiltinFunction("replace", strReplace).ToObject() 1000 dict["rstrip"] = newBuiltinFunction("rstrip", strRStrip).ToObject() 1001 dict["title"] = newBuiltinFunction("title", strTitle).ToObject() 1002 dict["upper"] = newBuiltinFunction("upper", strUpper).ToObject() 1003 dict["zfill"] = newBuiltinFunction("zfill", strZFill).ToObject() 1004 StrType.slots.Add = &binaryOpSlot{strAdd} 1005 StrType.slots.Contains = &binaryOpSlot{strContains} 1006 StrType.slots.Eq = &binaryOpSlot{strEq} 1007 StrType.slots.GE = &binaryOpSlot{strGE} 1008 StrType.slots.GetItem = &binaryOpSlot{strGetItem} 1009 StrType.slots.GT = &binaryOpSlot{strGT} 1010 StrType.slots.Hash = &unaryOpSlot{strHash} 1011 StrType.slots.LE = &binaryOpSlot{strLE} 1012 StrType.slots.Len = &unaryOpSlot{strLen} 1013 StrType.slots.LT = &binaryOpSlot{strLT} 1014 StrType.slots.Mod = &binaryOpSlot{strMod} 1015 StrType.slots.Mul = &binaryOpSlot{strMul} 1016 StrType.slots.NE = &binaryOpSlot{strNE} 1017 StrType.slots.New = &newSlot{strNew} 1018 StrType.slots.Native = &nativeSlot{strNative} 1019 StrType.slots.Repr = &unaryOpSlot{strRepr} 1020 StrType.slots.RMul = &binaryOpSlot{strMul} 1021 StrType.slots.Str = &unaryOpSlot{strStr} 1022 } 1023 1024 func strCompare(v, w *Object, ltResult, eqResult, gtResult *Int) *Object { 1025 if v == w { 1026 return eqResult.ToObject() 1027 } 1028 if !w.isInstance(StrType) { 1029 return NotImplemented 1030 } 1031 s1 := toStrUnsafe(v).Value() 1032 s2 := toStrUnsafe(w).Value() 1033 if s1 < s2 { 1034 return ltResult.ToObject() 1035 } 1036 if s1 == s2 { 1037 return eqResult.ToObject() 1038 } 1039 return gtResult.ToObject() 1040 } 1041 1042 func strMod(f *Frame, v, args *Object) (*Object, *BaseException) { 1043 format := toStrUnsafe(v).Value() 1044 // If the format string contains mappings, args must be a dict, 1045 // otherwise it must be treated as a tuple of values, so if it's not a tuple already 1046 // it must be transformed into a single element tuple. 1047 var values *Tuple 1048 var mappings *Dict 1049 if args.isInstance(TupleType) { 1050 values = toTupleUnsafe(args) 1051 } else { 1052 values = NewTuple1(args) 1053 if args.isInstance(DictType) { 1054 mappings = toDictUnsafe(args) 1055 } 1056 } 1057 const ( 1058 idxAll = iota 1059 // Todo: mapping keys can contain balanced parentheses, these should be matched 1060 // manually before using the regexp 1061 _ 1062 idxMappingKey 1063 idxFlags 1064 idxWidth 1065 idxPrecision 1066 _ 1067 idxType 1068 ) 1069 var buf bytes.Buffer 1070 valueIndex := 0 1071 index := strings.Index(format, "%") 1072 for index != -1 { 1073 buf.WriteString(format[:index]) 1074 format = format[index:] 1075 matches := strInterpolationRegexp.FindStringSubmatch(format) 1076 if matches == nil { 1077 return nil, f.RaiseType(ValueErrorType, "invalid format spec") 1078 } 1079 mappingKey, fieldType := matches[idxMappingKey], matches[idxType] 1080 var value *Object 1081 if mappingKey != "" { 1082 // Nb: mappings are checked even in case of "%%" 1083 if mappings == nil { 1084 return nil, f.RaiseType(TypeErrorType, "format requires a mapping") 1085 } 1086 var raised *BaseException 1087 value, raised = mappings.GetItemString(f, mappingKey) 1088 if raised != nil { 1089 return nil, raised 1090 } 1091 if value == nil { 1092 return nil, f.RaiseType(KeyErrorType, fmt.Sprintf("'%s'", mappingKey)) 1093 } 1094 valueIndex = 1 1095 } else if fieldType != "%" { 1096 if valueIndex >= len(values.elems) { 1097 return nil, f.RaiseType(TypeErrorType, "not enough arguments for format string") 1098 } 1099 value = values.elems[valueIndex] 1100 valueIndex++ 1101 } 1102 fieldWidth := -1 1103 if matches[idxWidth] == "*" || matches[idxPrecision] != "" { 1104 return nil, f.RaiseType(NotImplementedErrorType, "field width not yet supported") 1105 } 1106 if matches[idxWidth] != "" { 1107 var err error 1108 fieldWidth, err = strconv.Atoi(matches[idxWidth]) 1109 if err != nil { 1110 return nil, f.RaiseType(TypeErrorType, fmt.Sprint(err)) 1111 } 1112 } 1113 flags := matches[idxFlags] 1114 if flags != "" && flags != "0" { 1115 return nil, f.RaiseType(NotImplementedErrorType, "conversion flags not yet supported") 1116 } 1117 var val string 1118 switch fieldType { 1119 case "r", "s": 1120 var s *Str 1121 var raised *BaseException 1122 if fieldType == "r" { 1123 s, raised = Repr(f, value) 1124 } else { 1125 s, raised = ToStr(f, value) 1126 } 1127 if raised != nil { 1128 return nil, raised 1129 } 1130 val = s.Value() 1131 if fieldWidth > 0 { 1132 val = strLeftPad(val, fieldWidth, " ") 1133 } 1134 buf.WriteString(val) 1135 case "f": 1136 if v, ok := floatCoerce(value); ok { 1137 val := strconv.FormatFloat(v, 'f', 6, 64) 1138 if fieldWidth > 0 { 1139 fillchar := " " 1140 if flags != "" { 1141 fillchar = flags 1142 } 1143 val = strLeftPad(val, fieldWidth, fillchar) 1144 } 1145 buf.WriteString(val) 1146 } else { 1147 return nil, f.RaiseType(TypeErrorType, fmt.Sprintf("float argument required, not %s", value.typ.Name())) 1148 } 1149 case "d", "x", "X", "o": 1150 i, raised := ToInt(f, value) 1151 if raised != nil { 1152 return nil, raised 1153 } 1154 if fieldType == "d" { 1155 s, raised := ToStr(f, i) 1156 if raised != nil { 1157 return nil, raised 1158 } 1159 val = s.Value() 1160 } else if matches[idxType] == "o" { 1161 if value.isInstance(LongType) { 1162 val = toLongUnsafe(value).Value().Text(8) 1163 } else { 1164 val = strconv.FormatInt(int64(toIntUnsafe(i).Value()), 8) 1165 } 1166 } else { 1167 if value.isInstance(LongType) { 1168 val = toLongUnsafe(value).Value().Text(16) 1169 } else { 1170 val = strconv.FormatInt(int64(toIntUnsafe(i).Value()), 16) 1171 } 1172 if fieldType == "X" { 1173 val = strings.ToUpper(val) 1174 } 1175 } 1176 if fieldWidth > 0 { 1177 fillchar := " " 1178 if flags != "" { 1179 fillchar = flags 1180 } 1181 val = strLeftPad(val, fieldWidth, fillchar) 1182 } 1183 buf.WriteString(val) 1184 case "c": 1185 if value.isInstance(IntType) { 1186 val = string(toIntUnsafe(value).Value()) 1187 } else { 1188 val = toStrUnsafe(value).Value() 1189 } 1190 buf.WriteString(val) 1191 case "%": 1192 val = "%" 1193 if fieldWidth > 0 { 1194 val = strLeftPad(val, fieldWidth, " ") 1195 } 1196 buf.WriteString(val) 1197 default: 1198 format := "conversion type not yet supported: %s" 1199 return nil, f.RaiseType(NotImplementedErrorType, fmt.Sprintf(format, fieldType)) 1200 } 1201 format = format[len(matches[idxAll]):] 1202 index = strings.Index(format, "%") 1203 } 1204 if valueIndex < len(values.elems) { 1205 return nil, f.RaiseType(TypeErrorType, "not all arguments converted during string formatting") 1206 } 1207 buf.WriteString(format) 1208 return NewStr(buf.String()).ToObject(), nil 1209 } 1210 1211 func strRepeatCount(f *Frame, numChars int, mult *Object) (int, bool, *BaseException) { 1212 var n int 1213 switch { 1214 case mult.isInstance(IntType): 1215 n = toIntUnsafe(mult).Value() 1216 case mult.isInstance(LongType): 1217 l := toLongUnsafe(mult).Value() 1218 if !numInIntRange(l) { 1219 return 0, false, f.RaiseType(OverflowErrorType, fmt.Sprintf("cannot fit '%s' into an index-sized integer", mult.typ.Name())) 1220 } 1221 n = int(l.Int64()) 1222 default: 1223 return 0, false, nil 1224 } 1225 if n <= 0 { 1226 return 0, true, nil 1227 } 1228 if numChars > MaxInt/n { 1229 return 0, false, f.RaiseType(OverflowErrorType, errResultTooLarge) 1230 } 1231 return n, true, nil 1232 } 1233 1234 func adjustIndex(start, end, length int) (int, int) { 1235 if end > length { 1236 end = length 1237 } else if end < 0 { 1238 end += length 1239 if end < 0 { 1240 end = 0 1241 } 1242 } 1243 if start < 0 { 1244 start += length 1245 if start < 0 { 1246 start = 0 1247 } 1248 } 1249 return start, end 1250 } 1251 1252 func strStartsEndsWith(f *Frame, method string, args Args) (*Object, *BaseException) { 1253 expectedTypes := []*Type{StrType, ObjectType, IntType, IntType} 1254 argc := len(args) 1255 if argc == 2 || argc == 3 { 1256 expectedTypes = expectedTypes[:argc] 1257 } 1258 if raised := checkMethodArgs(f, method, args, expectedTypes...); raised != nil { 1259 return nil, raised 1260 } 1261 matchesArg := args[1] 1262 var matches []string 1263 switch { 1264 case matchesArg.isInstance(TupleType): 1265 elems := toTupleUnsafe(matchesArg).elems 1266 matches = make([]string, len(elems)) 1267 for i, o := range elems { 1268 if !o.isInstance(BaseStringType) { 1269 return nil, f.RaiseType(TypeErrorType, "expected a str") 1270 } 1271 s, raised := ToStr(f, o) 1272 if raised != nil { 1273 return nil, raised 1274 } 1275 matches[i] = s.Value() 1276 } 1277 case matchesArg.isInstance(BaseStringType): 1278 s, raised := ToStr(f, matchesArg) 1279 if raised != nil { 1280 return nil, raised 1281 } 1282 matches = []string{s.Value()} 1283 default: 1284 msg := " first arg must be str, unicode, or tuple, not " 1285 return nil, f.RaiseType(TypeErrorType, method+msg+matchesArg.typ.Name()) 1286 } 1287 s := toStrUnsafe(args[0]).Value() 1288 l := len(s) 1289 start, end := 0, l 1290 if argc >= 3 { 1291 start = toIntUnsafe(args[2]).Value() 1292 } 1293 if argc == 4 { 1294 end = toIntUnsafe(args[3]).Value() 1295 } 1296 start, end = adjustIndex(start, end, l) 1297 if start > end { 1298 // start == end may still return true when matching ''. 1299 return False.ToObject(), nil 1300 } 1301 s = s[start:end] 1302 matcher := strings.HasPrefix 1303 if method == "endswith" { 1304 matcher = strings.HasSuffix 1305 } 1306 for _, match := range matches { 1307 if matcher(s, match) { 1308 return True.ToObject(), nil 1309 } 1310 } 1311 return False.ToObject(), nil 1312 } 1313 1314 func strTitle(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 1315 expectedTypes := []*Type{StrType} 1316 if raised := checkMethodArgs(f, "title", args, expectedTypes...); raised != nil { 1317 return nil, raised 1318 } 1319 s := toStrUnsafe(args[0]).Value() 1320 numBytes := len(s) 1321 if numBytes == 0 { 1322 return args[0], nil 1323 } 1324 b := make([]byte, numBytes) 1325 previousIsCased := false 1326 for i := 0; i < numBytes; i++ { 1327 c := s[i] 1328 switch { 1329 case isLower(c): 1330 if !previousIsCased { 1331 c = toUpper(c) 1332 } 1333 previousIsCased = true 1334 case isUpper(c): 1335 if previousIsCased { 1336 c = toLower(c) 1337 } 1338 previousIsCased = true 1339 default: 1340 previousIsCased = false 1341 } 1342 b[i] = c 1343 } 1344 return NewStr(string(b)).ToObject(), nil 1345 } 1346 1347 func strUpper(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { 1348 expectedTypes := []*Type{StrType} 1349 if raised := checkMethodArgs(f, "upper", args, expectedTypes...); raised != nil { 1350 return nil, raised 1351 } 1352 s := toStrUnsafe(args[0]).Value() 1353 numBytes := len(s) 1354 if numBytes == 0 { 1355 return args[0], nil 1356 } 1357 b := make([]byte, numBytes) 1358 for i := 0; i < numBytes; i++ { 1359 b[i] = toUpper(s[i]) 1360 } 1361 return NewStr(string(b)).ToObject(), nil 1362 } 1363 1364 func strZFill(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { 1365 if raised := checkMethodArgs(f, "zfill", args, StrType, ObjectType); raised != nil { 1366 return nil, raised 1367 } 1368 s := toStrUnsafe(args[0]).Value() 1369 width, raised := ToIntValue(f, args[1]) 1370 if raised != nil { 1371 return nil, raised 1372 } 1373 return NewStr(strLeftPad(s, width, "0")).ToObject(), nil 1374 } 1375 1376 func init() { 1377 InternStr("") 1378 for i := 0; i < 256; i++ { 1379 InternStr(string([]byte{byte(i)})) 1380 } 1381 } 1382 1383 func toLower(b byte) byte { 1384 if isUpper(b) { 1385 return b + caseOffset 1386 } 1387 return b 1388 } 1389 1390 func toUpper(b byte) byte { 1391 if isLower(b) { 1392 return b - caseOffset 1393 } 1394 return b 1395 } 1396 1397 func isAlNum(c byte) bool { 1398 return isAlpha(c) || isDigit(c) 1399 } 1400 1401 func isAlpha(c byte) bool { 1402 return isUpper(c) || isLower(c) 1403 } 1404 1405 func isDigit(c byte) bool { 1406 return '0' <= c && c <= '9' 1407 } 1408 1409 func isLower(c byte) bool { 1410 return 'a' <= c && c <= 'z' 1411 } 1412 1413 func isSpace(c byte) bool { 1414 switch c { 1415 case ' ', '\n', '\t', '\v', '\f', '\r': 1416 return true 1417 default: 1418 return false 1419 } 1420 } 1421 1422 func isUpper(c byte) bool { 1423 return 'A' <= c && c <= 'Z' 1424 } 1425 1426 func pad(s string, left int, right int, fillchar string) string { 1427 buf := bytes.Buffer{} 1428 1429 if left < 0 { 1430 left = 0 1431 } 1432 1433 if right < 0 { 1434 right = 0 1435 } 1436 1437 if left == 0 && right == 0 { 1438 return s 1439 } 1440 1441 buf.Grow(left + len(s) + right) 1442 buf.WriteString(strings.Repeat(fillchar, left)) 1443 buf.WriteString(s) 1444 buf.WriteString(strings.Repeat(fillchar, right)) 1445 1446 return buf.String() 1447 } 1448 1449 // strLeftPad returns s padded with fillchar so that its length is at least width. 1450 // Fillchar must be a single character. When fillchar is "0", s starting with a 1451 // sign are handled correctly. 1452 func strLeftPad(s string, width int, fillchar string) string { 1453 l := len(s) 1454 if width <= l { 1455 return s 1456 } 1457 buf := bytes.Buffer{} 1458 buf.Grow(width) 1459 if l > 0 && fillchar == "0" && (s[0] == '-' || s[0] == '+') { 1460 buf.WriteByte(s[0]) 1461 s = s[1:] 1462 l = len(s) 1463 width-- 1464 } 1465 // TODO: Support or throw fillchar len more than one. 1466 buf.WriteString(strings.Repeat(fillchar, width-l)) 1467 buf.WriteString(s) 1468 return buf.String() 1469 } 1470 1471 type indexFunc func(string, string) (int, *BaseException) 1472 1473 func strFindOrIndex(f *Frame, args Args, fn indexFunc) (*Object, *BaseException) { 1474 // TODO: Support for unicode substring. 1475 expectedTypes := []*Type{StrType, StrType, ObjectType, ObjectType} 1476 argc := len(args) 1477 if argc == 2 || argc == 3 { 1478 expectedTypes = expectedTypes[:argc] 1479 } 1480 if raised := checkMethodArgs(f, "find/index", args, expectedTypes...); raised != nil { 1481 return nil, raised 1482 } 1483 s := toStrUnsafe(args[0]).Value() 1484 l := len(s) 1485 start, end := 0, l 1486 var raised *BaseException 1487 if argc >= 3 && args[2] != None { 1488 start, raised = IndexInt(f, args[2]) 1489 if raised != nil { 1490 return nil, raised 1491 } 1492 } 1493 if argc == 4 && args[3] != None { 1494 end, raised = IndexInt(f, args[3]) 1495 if raised != nil { 1496 return nil, raised 1497 } 1498 } 1499 // Default to an impossible search. 1500 search, sub := "", "-" 1501 if start <= l { 1502 start, end = adjustIndex(start, end, l) 1503 if start <= end { 1504 sub = toStrUnsafe(args[1]).Value() 1505 search = s[start:end] 1506 } 1507 } 1508 index, raised := fn(search, sub) 1509 if raised != nil { 1510 return nil, raised 1511 } 1512 if index != -1 { 1513 index += start 1514 } 1515 return NewInt(index).ToObject(), nil 1516 } 1517 1518 func strJustDecodeArgs(f *Frame, args Args, name string) (string, int, string, *BaseException) { 1519 expectedTypes := []*Type{StrType, IntType, StrType} 1520 if raised := checkMethodArgs(f, name, args, expectedTypes...); raised != nil { 1521 return "", 0, "", raised 1522 } 1523 s := toStrUnsafe(args[0]).Value() 1524 width := toIntUnsafe(args[1]).Value() 1525 fill := toStrUnsafe(args[2]).Value() 1526 1527 if numChars := len(fill); numChars != 1 { 1528 return s, width, fill, f.RaiseType(TypeErrorType, fmt.Sprintf("%[1]s() argument 2 must be char, not str", name)) 1529 } 1530 1531 return s, width, fill, nil 1532 }