github.com/markusbkk/elvish@v0.0.0-20231204143114-91dc52438621/pkg/mods/str/str.go (about) 1 // Package str exposes functionality from Go's strings package as an Elvish 2 // module. 3 package str 4 5 import ( 6 "bytes" 7 "fmt" 8 "strconv" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 13 "github.com/markusbkk/elvish/pkg/eval" 14 "github.com/markusbkk/elvish/pkg/eval/errs" 15 "github.com/markusbkk/elvish/pkg/eval/vals" 16 ) 17 18 var Ns = eval.BuildNsNamed("str"). 19 AddGoFns(map[string]interface{}{ 20 "compare": strings.Compare, 21 "contains": strings.Contains, 22 "contains-any": strings.ContainsAny, 23 "count": strings.Count, 24 "equal-fold": strings.EqualFold, 25 // TODO: Fields, FieldsFunc 26 "from-codepoints": fromCodepoints, 27 "from-utf8-bytes": fromUtf8Bytes, 28 "has-prefix": strings.HasPrefix, 29 "has-suffix": strings.HasSuffix, 30 "index": strings.Index, 31 "index-any": strings.IndexAny, 32 // TODO: IndexFunc 33 "join": join, 34 "last-index": strings.LastIndex, 35 // TODO: LastIndexFunc, Map, Repeat 36 "replace": replace, 37 "split": split, 38 // TODO: SplitAfter 39 "title": strings.Title, 40 "to-codepoints": toCodepoints, 41 "to-lower": strings.ToLower, 42 "to-title": strings.ToTitle, 43 "to-upper": strings.ToUpper, 44 "to-utf8-bytes": toUtf8Bytes, 45 // TODO: ToLowerSpecial, ToTitleSpecial, ToUpperSpecial 46 "trim": strings.Trim, 47 "trim-left": strings.TrimLeft, 48 "trim-right": strings.TrimRight, 49 // TODO: TrimLeft,Right}Func 50 "trim-space": strings.TrimSpace, 51 "trim-prefix": strings.TrimPrefix, 52 "trim-suffix": strings.TrimSuffix, 53 }).Ns() 54 55 //elvdoc:fn compare 56 // 57 // ```elvish 58 // str:compare $a $b 59 // ``` 60 // 61 // Compares two strings and output an integer that will be 0 if a == b, 62 // -1 if a < b, and +1 if a > b. 63 // 64 // ```elvish-transcript 65 // ~> str:compare a a 66 // ▶ 0 67 // ~> str:compare a b 68 // ▶ -1 69 // ~> str:compare b a 70 // ▶ 1 71 // ``` 72 73 //elvdoc:fn contains 74 // 75 // ```elvish 76 // str:contains $str $substr 77 // ``` 78 // 79 // Outputs whether `$str` contains `$substr` as a substring. 80 // 81 // ```elvish-transcript 82 // ~> str:contains abcd x 83 // ▶ $false 84 // ~> str:contains abcd bc 85 // ▶ $true 86 // ``` 87 88 //elvdoc:fn contains-any 89 // 90 // ```elvish 91 // str:contains-any $str $chars 92 // ``` 93 // 94 // Outputs whether `$str` contains any Unicode code points in `$chars`. 95 // 96 // ```elvish-transcript 97 // ~> str:contains-any abcd x 98 // ▶ $false 99 // ~> str:contains-any abcd xby 100 // ▶ $true 101 // ``` 102 103 //elvdoc:fn count 104 // 105 // ```elvish 106 // str:count $str $substr 107 // ``` 108 // 109 // Outputs the number of non-overlapping instances of `$substr` in `$s`. 110 // If `$substr` is an empty string, output 1 + the number of Unicode code 111 // points in `$s`. 112 // 113 // ```elvish-transcript 114 // ~> str:count abcdefabcdef bc 115 // ▶ 2 116 // ~> str:count abcdef '' 117 // ▶ 7 118 // ``` 119 120 //elvdoc:fn equal-fold 121 // 122 // ```elvish 123 // str:equal-fold $str1 $str2 124 // ``` 125 // 126 // Outputs if `$str1` and `$str2`, interpreted as UTF-8 strings, are equal 127 // under Unicode case-folding. 128 // 129 // ```elvish-transcript 130 // ~> str:equal-fold ABC abc 131 // ▶ $true 132 // ~> str:equal-fold abc ab 133 // ▶ $false 134 // ``` 135 136 //elvdoc:fn from-codepoints 137 // 138 // ```elvish 139 // str:from-codepoints $number... 140 // ``` 141 // 142 // Outputs a string consisting of the given Unicode codepoints. Example: 143 // 144 // ```elvish-transcript 145 // ~> str:from-codepoints 0x61 146 // ▶ a 147 // ~> str:from-codepoints 0x4f60 0x597d 148 // ▶ 你好 149 // ``` 150 // 151 // @cf str:to-codepoints 152 153 func fromCodepoints(nums ...int) (string, error) { 154 var b bytes.Buffer 155 for _, num := range nums { 156 if num < 0 || num > unicode.MaxRune { 157 return "", errs.OutOfRange{ 158 What: "codepoint", 159 ValidLow: "0", ValidHigh: strconv.Itoa(unicode.MaxRune), 160 Actual: hex(num), 161 } 162 } 163 if !utf8.ValidRune(rune(num)) { 164 return "", errs.BadValue{ 165 What: "argument to str:from-codepoints", 166 Valid: "valid Unicode codepoint", 167 Actual: hex(num), 168 } 169 } 170 b.WriteRune(rune(num)) 171 } 172 return b.String(), nil 173 } 174 175 func hex(i int) string { 176 if i < 0 { 177 return "-0x" + strconv.FormatInt(-int64(i), 16) 178 } 179 return "0x" + strconv.FormatInt(int64(i), 16) 180 } 181 182 //elvdoc:fn from-utf8-bytes 183 // 184 // ```elvish 185 // str:from-utf8-bytes $number... 186 // ``` 187 // 188 // Outputs a string consisting of the given Unicode bytes. Example: 189 // 190 // ```elvish-transcript 191 // ~> str:from-utf8-bytes 0x61 192 // ▶ a 193 // ~> str:from-utf8-bytes 0xe4 0xbd 0xa0 0xe5 0xa5 0xbd 194 // ▶ 你好 195 // ``` 196 // 197 // @cf str:to-utf8-bytes 198 199 func fromUtf8Bytes(nums ...int) (string, error) { 200 var b bytes.Buffer 201 for _, num := range nums { 202 if num < 0 || num > 255 { 203 return "", errs.OutOfRange{ 204 What: "byte", 205 ValidLow: "0", ValidHigh: "255", 206 Actual: strconv.Itoa(num)} 207 } 208 b.WriteByte(byte(num)) 209 } 210 if !utf8.Valid(b.Bytes()) { 211 return "", errs.BadValue{ 212 What: "arguments to str:from-utf8-bytes", 213 Valid: "valid UTF-8 sequence", 214 Actual: fmt.Sprint(b.Bytes())} 215 } 216 return b.String(), nil 217 } 218 219 //elvdoc:fn has-prefix 220 // 221 // ```elvish 222 // str:has-prefix $str $prefix 223 // ``` 224 // 225 // Outputs if `$str` begins with `$prefix`. 226 // 227 // ```elvish-transcript 228 // ~> str:has-prefix abc ab 229 // ▶ $true 230 // ~> str:has-prefix abc bc 231 // ▶ $false 232 // ``` 233 234 //elvdoc:fn has-suffix 235 // 236 // ```elvish 237 // str:has-suffix $str $suffix 238 // ``` 239 // 240 // Outputs if `$str` ends with `$suffix`. 241 // 242 // ```elvish-transcript 243 // ~> str:has-suffix abc ab 244 // ▶ $false 245 // ~> str:has-suffix abc bc 246 // ▶ $true 247 // ``` 248 249 //elvdoc:fn index 250 // 251 // ```elvish 252 // str:index $str $substr 253 // ``` 254 // 255 // Outputs the index of the first instance of `$substr` in `$str`, or -1 256 // if `$substr` is not present in `$str`. 257 // 258 // ```elvish-transcript 259 // ~> str:index abcd cd 260 // ▶ 2 261 // ~> str:index abcd xyz 262 // ▶ -1 263 // ``` 264 265 //elvdoc:fn index-any 266 // 267 // ```elvish 268 // str:index-any $str $chars 269 // ``` 270 // 271 // Outputs the index of the first instance of any Unicode code point 272 // from `$chars` in `$str`, or -1 if no Unicode code point from `$chars` is 273 // present in `$str`. 274 // 275 // ```elvish-transcript 276 // ~> str:index-any "chicken" "aeiouy" 277 // ▶ 2 278 // ~> str:index-any l33t aeiouy 279 // ▶ -1 280 // ``` 281 282 //elvdoc:fn join 283 // 284 // ```elvish 285 // str:join $sep $input-list? 286 // ``` 287 // 288 // Joins inputs with `$sep`. Examples: 289 // 290 // ```elvish-transcript 291 // ~> put lorem ipsum | str:join , 292 // ▶ lorem,ipsum 293 // ~> str:join , [lorem ipsum] 294 // ▶ lorem,ipsum 295 // ~> str:join '' [lorem ipsum] 296 // ▶ loremipsum 297 // ~> str:join '...' [lorem ipsum] 298 // ▶ lorem...ipsum 299 // ``` 300 // 301 // Etymology: Various languages, 302 // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.join). 303 // 304 // @cf str:split 305 306 func join(sep string, inputs eval.Inputs) (string, error) { 307 var buf bytes.Buffer 308 var errJoin error 309 first := true 310 inputs(func(v interface{}) { 311 if errJoin != nil { 312 return 313 } 314 if s, ok := v.(string); ok { 315 if first { 316 first = false 317 } else { 318 buf.WriteString(sep) 319 } 320 buf.WriteString(s) 321 } else { 322 errJoin = errs.BadValue{ 323 What: "input to str:join", Valid: "string", Actual: vals.Kind(v)} 324 } 325 }) 326 return buf.String(), errJoin 327 } 328 329 //elvdoc:fn last-index 330 // 331 // ```elvish 332 // str:last-index $str $substr 333 // ``` 334 // 335 // Outputs the index of the last instance of `$substr` in `$str`, 336 // or -1 if `$substr` is not present in `$str`. 337 // 338 // ```elvish-transcript 339 // ~> str:last-index "elven speak elvish" elv 340 // ▶ 12 341 // ~> str:last-index "elven speak elvish" romulan 342 // ▶ -1 343 // ``` 344 345 //elvdoc:fn replace 346 // 347 // ```elvish 348 // str:replace &max=-1 $old $repl $source 349 // ``` 350 // 351 // Replaces all occurrences of `$old` with `$repl` in `$source`. If `$max` is 352 // non-negative, it determines the max number of substitutions. 353 // 354 // **Note**: This command does not support searching by regular expressions, `$old` 355 // is always interpreted as a plain string. Use [re:replace](re.html#re:replace) if 356 // you need to search by regex. 357 358 type maxOpt struct{ Max int } 359 360 func (o *maxOpt) SetDefaultOptions() { o.Max = -1 } 361 362 func replace(opts maxOpt, old, repl, s string) string { 363 return strings.Replace(s, old, repl, opts.Max) 364 } 365 366 //elvdoc:fn split 367 // 368 // ```elvish 369 // str:split &max=-1 $sep $string 370 // ``` 371 // 372 // Splits `$string` by `$sep`. If `$sep` is an empty string, split it into 373 // codepoints. 374 // 375 // If the `&max` option is non-negative, stops after producing the maximum 376 // number of results. 377 // 378 // ```elvish-transcript 379 // ~> str:split , lorem,ipsum 380 // ▶ lorem 381 // ▶ ipsum 382 // ~> str:split '' 你好 383 // ▶ 你 384 // ▶ 好 385 // ~> str:split &max=2 ' ' 'a b c d' 386 // ▶ a 387 // ▶ 'b c d' 388 // ``` 389 // 390 // **Note**: This command does not support splitting by regular expressions, 391 // `$sep` is always interpreted as a plain string. Use [re:split](re.html#re:split) 392 // if you need to split by regex. 393 // 394 // Etymology: Various languages, in particular 395 // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.split). 396 // 397 // @cf str:join 398 399 func split(fm *eval.Frame, opts maxOpt, sep, s string) error { 400 out := fm.ValueOutput() 401 parts := strings.SplitN(s, sep, opts.Max) 402 for _, p := range parts { 403 err := out.Put(p) 404 if err != nil { 405 return err 406 } 407 } 408 return nil 409 } 410 411 //elvdoc:fn title 412 // 413 // ```elvish 414 // str:title $str 415 // ``` 416 // 417 // Outputs `$str` with all Unicode letters that begin words mapped to their 418 // Unicode title case. 419 // 420 // ```elvish-transcript 421 // ~> str:title "her royal highness" 422 // ▶ Her Royal Highness 423 // ``` 424 425 //elvdoc:fn to-codepoints 426 // 427 // ```elvish 428 // str:to-codepoints $string 429 // ``` 430 // 431 // Outputs value of each codepoint in `$string`, in hexadecimal. Examples: 432 // 433 // ```elvish-transcript 434 // ~> str:to-codepoints a 435 // ▶ 0x61 436 // ~> str:to-codepoints 你好 437 // ▶ 0x4f60 438 // ▶ 0x597d 439 // ``` 440 // 441 // The output format is subject to change. 442 // 443 // @cf str:from-codepoints 444 445 func toCodepoints(fm *eval.Frame, s string) error { 446 out := fm.ValueOutput() 447 for _, r := range s { 448 err := out.Put("0x" + strconv.FormatInt(int64(r), 16)) 449 if err != nil { 450 return err 451 } 452 } 453 return nil 454 } 455 456 //elvdoc:fn to-lower 457 // 458 // ```elvish 459 // str:to-lower $str 460 // ``` 461 // 462 // Outputs `$str` with all Unicode letters mapped to their lower-case 463 // equivalent. 464 // 465 // ```elvish-transcript 466 // ~> str:to-lower 'ABC!123' 467 // ▶ abc!123 468 // ``` 469 470 //elvdoc:fn to-utf8-bytes 471 // 472 // ```elvish 473 // str:to-utf8-bytes $string 474 // ``` 475 // 476 // Outputs value of each byte in `$string`, in hexadecimal. Examples: 477 // 478 // ```elvish-transcript 479 // ~> str:to-utf8-bytes a 480 // ▶ 0x61 481 // ~> str:to-utf8-bytes 你好 482 // ▶ 0xe4 483 // ▶ 0xbd 484 // ▶ 0xa0 485 // ▶ 0xe5 486 // ▶ 0xa5 487 // ▶ 0xbd 488 // ``` 489 // 490 // The output format is subject to change. 491 // 492 // @cf str:from-utf8-bytes 493 494 func toUtf8Bytes(fm *eval.Frame, s string) error { 495 out := fm.ValueOutput() 496 for _, r := range []byte(s) { 497 err := out.Put("0x" + strconv.FormatInt(int64(r), 16)) 498 if err != nil { 499 return err 500 } 501 } 502 return nil 503 } 504 505 //elvdoc:fn to-title 506 // 507 // ```elvish 508 // str:to-title $str 509 // ``` 510 // 511 // Outputs `$str` with all Unicode letters mapped to their Unicode title case. 512 // 513 // ```elvish-transcript 514 // ~> str:to-title "her royal highness" 515 // ▶ HER ROYAL HIGHNESS 516 // ~> str:to-title "хлеб" 517 // ▶ ХЛЕБ 518 // ``` 519 520 //elvdoc:fn to-upper 521 // 522 // ```elvish 523 // str:to-upper 524 // ``` 525 // 526 // Outputs `$str` with all Unicode letters mapped to their upper-case 527 // equivalent. 528 // 529 // ```elvish-transcript 530 // ~> str:to-upper 'abc!123' 531 // ▶ ABC!123 532 // ``` 533 534 //elvdoc:fn trim 535 // 536 // ```elvish 537 // str:trim $str $cutset 538 // ``` 539 // 540 // Outputs `$str` with all leading and trailing Unicode code points contained 541 // in `$cutset` removed. 542 // 543 // ```elvish-transcript 544 // ~> str:trim "¡¡¡Hello, Elven!!!" "!¡" 545 // ▶ 'Hello, Elven' 546 // ``` 547 548 //elvdoc:fn trim-left 549 // 550 // ```elvish 551 // str:trim-left $str $cutset 552 // ``` 553 // 554 // Outputs `$str` with all leading Unicode code points contained in `$cutset` 555 // removed. To remove a prefix string use [`str:trim-prefix`](#str:trim-prefix). 556 // 557 // ```elvish-transcript 558 // ~> str:trim-left "¡¡¡Hello, Elven!!!" "!¡" 559 // ▶ 'Hello, Elven!!!' 560 // ``` 561 562 //elvdoc:fn trim-prefix 563 // 564 // ```elvish 565 // str:trim-prefix $str $prefix 566 // ``` 567 // 568 // Outputs `$str` minus the leading `$prefix` string. If `$str` doesn't begin 569 // with `$prefix`, `$str` is output unchanged. 570 // 571 // ```elvish-transcript 572 // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hello, " 573 // ▶ Elven!!! 574 // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hola, " 575 // ▶ '¡¡¡Hello, Elven!!!' 576 // ``` 577 578 //elvdoc:fn trim-right 579 // 580 // ```elvish 581 // str:trim-right $str $cutset 582 // ``` 583 // 584 // Outputs `$str` with all leading Unicode code points contained in `$cutset` 585 // removed. To remove a suffix string use [`str:trim-suffix`](#str:trim-suffix). 586 // 587 // ```elvish-transcript 588 // ~> str:trim-right "¡¡¡Hello, Elven!!!" "!¡" 589 // ▶ '¡¡¡Hello, Elven' 590 // ``` 591 592 //elvdoc:fn trim-space 593 // 594 // ```elvish 595 // str:trim-space $str 596 // ``` 597 // 598 // Outputs `$str` with all leading and trailing white space removed as defined 599 // by Unicode. 600 // 601 // ```elvish-transcript 602 // ~> str:trim-space " \t\n Hello, Elven \n\t\r\n" 603 // ▶ 'Hello, Elven' 604 // ``` 605 606 //elvdoc:fn trim-suffix 607 // 608 // ```elvish 609 // str:trim-suffix $str $suffix 610 // ``` 611 // 612 // Outputs `$str` minus the trailing `$suffix` string. If `$str` doesn't end 613 // with `$suffix`, `$str` is output unchanged. 614 // 615 // ```elvish-transcript 616 // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Elven!!!" 617 // ▶ ¡¡¡Hello 618 // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Klingons!!!" 619 // ▶ '¡¡¡Hello, Elven!!!' 620 // ```