github.com/elves/elvish@v0.15.0/pkg/eval/mods/str/str.go (about) 1 // Package str exposes functionality from Go's strings package as an Elvish 2 // module. 3 package str 4 5 import ( 6 "bytes" 7 "fmt" 8 "strconv" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 13 "github.com/elves/elvish/pkg/eval" 14 "github.com/elves/elvish/pkg/eval/errs" 15 "github.com/elves/elvish/pkg/eval/vals" 16 ) 17 18 //elvdoc:fn compare 19 // 20 // ```elvish 21 // str:compare $a $b 22 // ``` 23 // 24 // Compares two strings and output an integer that will be 0 if a == b, 25 // -1 if a < b, and +1 if a > b. 26 // 27 // ```elvish-transcript 28 // ~> str:compare a a 29 // ▶ 0 30 // ~> str:compare a b 31 // ▶ -1 32 // ~> str:compare b a 33 // ▶ 1 34 // ``` 35 36 //elvdoc:fn contains 37 // 38 // ```elvish 39 // str:contains $str $substr 40 // ``` 41 // 42 // Outputs whether `$str` contains `$substr` as a substring. 43 // 44 // ```elvish-transcript 45 // ~> str:contains abcd x 46 // ▶ $false 47 // ~> str:contains abcd bc 48 // ▶ $true 49 // ``` 50 51 //elvdoc:fn contains-any 52 // 53 // ```elvish 54 // str:contains-any $str $chars 55 // ``` 56 // 57 // Outputs whether `$str` contains any Unicode code points in `$chars`. 58 // 59 // ```elvish-transcript 60 // ~> str:contains-any abcd x 61 // ▶ $false 62 // ~> str:contains-any abcd xby 63 // ▶ $true 64 // ``` 65 66 //elvdoc:fn count 67 // 68 // ```elvish 69 // str:count $str $substr 70 // ``` 71 // 72 // Outputs the number of non-overlapping instances of `$substr` in `$s`. 73 // If `$substr` is an empty string, output 1 + the number of Unicode code 74 // points in `$s`. 75 // 76 // ```elvish-transcript 77 // ~> str:count abcdefabcdef bc 78 // ▶ 2 79 // ~> str:count abcdef '' 80 // ▶ 7 81 // ``` 82 83 //elvdoc:fn equal-fold 84 // 85 // ```elvish 86 // str:equal-fold $str1 $str2 87 // ``` 88 // 89 // Outputs if `$str1` and `$str2`, interpreted as UTF-8 strings, are equal 90 // under Unicode case-folding. 91 // 92 // ```elvish-transcript 93 // ~> str:equal-fold ABC abc 94 // ▶ $true 95 // ~> str:equal-fold abc ab 96 // ▶ $false 97 // ``` 98 99 //elvdoc:fn from-codepoints 100 // 101 // ```elvish 102 // str:from-codepoints $number... 103 // ``` 104 // 105 // Outputs a string consisting of the given Unicode codepoints. Example: 106 // 107 // ```elvish-transcript 108 // ~> str:from-codepoints 0x61 109 // ▶ a 110 // ~> str:from-codepoints 0x4f60 0x597d 111 // ▶ 你好 112 // ``` 113 // 114 // @cf str:to-codepoints 115 116 func fromCodepoints(nums ...int) (string, error) { 117 var b bytes.Buffer 118 for _, num := range nums { 119 if num < 0 || num > unicode.MaxRune { 120 return "", errs.OutOfRange{ 121 What: "codepoint", 122 ValidLow: "0", ValidHigh: strconv.Itoa(unicode.MaxRune), 123 Actual: hex(num), 124 } 125 } 126 if !utf8.ValidRune(rune(num)) { 127 return "", errs.BadValue{ 128 What: "argument to str:from-codepoints", 129 Valid: "valid Unicode codepoint", 130 Actual: hex(num), 131 } 132 } 133 b.WriteRune(rune(num)) 134 } 135 return b.String(), nil 136 } 137 138 func hex(i int) string { 139 if i < 0 { 140 return "-0x" + strconv.FormatInt(-int64(i), 16) 141 } 142 return "0x" + strconv.FormatInt(int64(i), 16) 143 } 144 145 //elvdoc:fn from-utf8-bytes 146 // 147 // ```elvish 148 // str:from-utf8-bytes $number... 149 // ``` 150 // 151 // Outputs a string consisting of the given Unicode bytes. Example: 152 // 153 // ```elvish-transcript 154 // ~> str:from-utf8-bytes 0x61 155 // ▶ a 156 // ~> str:from-utf8-bytes 0xe4 0xbd 0xa0 0xe5 0xa5 0xbd 157 // ▶ 你好 158 // ``` 159 // 160 // @cf str:to-utf8-bytes 161 162 func fromUtf8Bytes(nums ...int) (string, error) { 163 var b bytes.Buffer 164 for _, num := range nums { 165 if num < 0 || num > 255 { 166 return "", errs.OutOfRange{ 167 What: "byte", 168 ValidLow: "0", ValidHigh: "255", 169 Actual: strconv.Itoa(num)} 170 } 171 b.WriteByte(byte(num)) 172 } 173 if !utf8.Valid(b.Bytes()) { 174 return "", errs.BadValue{ 175 What: "arguments to str:from-utf8-bytes", 176 Valid: "valid UTF-8 sequence", 177 Actual: fmt.Sprint(b.Bytes())} 178 } 179 return b.String(), nil 180 } 181 182 //elvdoc:fn has-prefix 183 // 184 // ```elvish 185 // str:has-prefix $str $prefix 186 // ``` 187 // 188 // Outputs if `$str` begins with `$prefix`. 189 // 190 // ```elvish-transcript 191 // ~> str:has-prefix abc ab 192 // ▶ $true 193 // ~> str:has-prefix abc bc 194 // ▶ $false 195 // ``` 196 197 //elvdoc:fn has-suffix 198 // 199 // ```elvish 200 // str:has-suffix $str $suffix 201 // ``` 202 // 203 // Outputs if `$str` ends with `$suffix`. 204 // 205 // ```elvish-transcript 206 // ~> str:has-suffix abc ab 207 // ▶ $false 208 // ~> str:has-suffix abc bc 209 // ▶ $true 210 // ``` 211 212 //elvdoc:fn index 213 // 214 // ```elvish 215 // str:index $str $substr 216 // ``` 217 // 218 // Outputs the index of the first instance of `$substr` in `$str`, or -1 219 // if `$substr` is not present in `$str`. 220 // 221 // ```elvish-transcript 222 // ~> str:index abcd cd 223 // ▶ 2 224 // ~> str:index abcd xyz 225 // ▶ -1 226 // ``` 227 228 //elvdoc:fn index-any 229 // 230 // ```elvish 231 // str:index-any $str $chars 232 // ``` 233 // 234 // Outputs the index of the first instance of any Unicode code point 235 // from `$chars` in `$str`, or -1 if no Unicode code point from `$chars` is 236 // present in `$str`. 237 // 238 // ```elvish-transcript 239 // ~> str:index-any "chicken" "aeiouy" 240 // ▶ 2 241 // ~> str:index-any l33t aeiouy 242 // ▶ -1 243 // ``` 244 245 //elvdoc:fn join 246 // 247 // ```elvish 248 // str:join $sep $input-list? 249 // ``` 250 // 251 // Joins inputs with `$sep`. Examples: 252 // 253 // ```elvish-transcript 254 // ~> put lorem ipsum | str:join , 255 // ▶ lorem,ipsum 256 // ~> str:join , [lorem ipsum] 257 // ▶ lorem,ipsum 258 // ~> str:join '' [lorem ipsum] 259 // ▶ loremipsum 260 // ~> str:join '...' [lorem ipsum] 261 // ▶ lorem...ipsum 262 // ``` 263 // 264 // Etymology: Various languages, 265 // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.join). 266 // 267 // @cf str:split 268 269 func join(sep string, inputs eval.Inputs) (string, error) { 270 var buf bytes.Buffer 271 var errJoin error 272 first := true 273 inputs(func(v interface{}) { 274 if errJoin != nil { 275 return 276 } 277 if s, ok := v.(string); ok { 278 if first { 279 first = false 280 } else { 281 buf.WriteString(sep) 282 } 283 buf.WriteString(s) 284 } else { 285 errJoin = errs.BadValue{ 286 What: "input to str:join", Valid: "string", Actual: vals.Kind(v)} 287 } 288 }) 289 return buf.String(), errJoin 290 } 291 292 //elvdoc:fn last-index 293 // 294 // ```elvish 295 // str:last-index $str $substr 296 // ``` 297 // 298 // Outputs the index of the last instance of `$substr` in `$str`, 299 // or -1 if `$substr` is not present in `$str`. 300 // 301 // ```elvish-transcript 302 // ~> str:last-index "elven speak elvish" elv 303 // ▶ 12 304 // ~> str:last-index "elven speak elvish" romulan 305 // ▶ -1 306 // ``` 307 308 //elvdoc:fn replace 309 // 310 // ```elvish 311 // str:replace &max=-1 $old $repl $source 312 // ``` 313 // 314 // Replaces all occurrences of `$old` with `$repl` in `$source`. If `$max` is 315 // non-negative, it determines the max number of substitutions. 316 // 317 // **Note**: This command does not support searching by regular expressions, `$old` 318 // is always interpreted as a plain string. Use [re:replace](re.html#replace) if 319 // you need to search by regex. 320 321 type maxOpt struct{ Max int } 322 323 func (o *maxOpt) SetDefaultOptions() { o.Max = -1 } 324 325 func replace(opts maxOpt, old, repl, s string) string { 326 return strings.Replace(s, old, repl, opts.Max) 327 } 328 329 //elvdoc:fn split 330 // 331 // ```elvish 332 // str:split $sep $string 333 // ``` 334 // 335 // Splits `$string` by `$sep`. If `$sep` is an empty string, split it into 336 // codepoints. 337 // 338 // ```elvish-transcript 339 // ~> str:split , lorem,ipsum 340 // ▶ lorem 341 // ▶ ipsum 342 // ~> str:split '' 你好 343 // ▶ 你 344 // ▶ 好 345 // ``` 346 // 347 // **Note**: This command does not support splitting by regular expressions, 348 // `$sep` is always interpreted as a plain string. Use [re:split](re.html#split) 349 // if you need to split by regex. 350 // 351 // Etymology: Various languages, in particular 352 // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.split). 353 // 354 // @cf str:join 355 356 func split(fm *eval.Frame, opts maxOpt, sep, s string) { 357 out := fm.OutputChan() 358 parts := strings.SplitN(s, sep, opts.Max) 359 for _, p := range parts { 360 out <- p 361 } 362 } 363 364 //elvdoc:fn title 365 // 366 // ```elvish 367 // str:title $str 368 // ``` 369 // 370 // Outputs `$str` with all Unicode letters that begin words mapped to their 371 // Unicode title case. 372 // 373 // ```elvish-transcript 374 // ~> str:title "her royal highness" 375 // ▶ Her Royal Highness 376 // ``` 377 378 //elvdoc:fn to-codepoints 379 // 380 // ```elvish 381 // str:to-codepoints $string 382 // ``` 383 // 384 // Outputs value of each codepoint in `$string`, in hexadecimal. Examples: 385 // 386 // ```elvish-transcript 387 // ~> str:to-codepoints a 388 // ▶ 0x61 389 // ~> str:to-codepoints 你好 390 // ▶ 0x4f60 391 // ▶ 0x597d 392 // ``` 393 // 394 // The output format is subject to change. 395 // 396 // @cf from-codepoints 397 398 func toCodepoints(fm *eval.Frame, s string) { 399 out := fm.OutputChan() 400 for _, r := range s { 401 out <- "0x" + strconv.FormatInt(int64(r), 16) 402 } 403 } 404 405 //elvdoc:fn to-lower 406 // 407 // ```elvish 408 // str:to-lower $str 409 // ``` 410 // 411 // Outputs `$str` with all Unicode letters mapped to their lower-case 412 // equivalent. 413 // 414 // ```elvish-transcript 415 // ~> str:to-lower 'ABC!123' 416 // ▶ abc!123 417 // ``` 418 419 //elvdoc:fn to-utf8-bytes 420 // 421 // ```elvish 422 // str:to-utf8-bytes $string 423 // ``` 424 // 425 // Outputs value of each byte in `$string`, in hexadecimal. Examples: 426 // 427 // ```elvish-transcript 428 // ~> str:to-utf8-bytes a 429 // ▶ 0x61 430 // ~> str:to-utf8-bytes 你好 431 // ▶ 0xe4 432 // ▶ 0xbd 433 // ▶ 0xa0 434 // ▶ 0xe5 435 // ▶ 0xa5 436 // ▶ 0xbd 437 // ``` 438 // 439 // The output format is subject to change. 440 // 441 // @cf from-utf8-bytes 442 443 func toUtf8Bytes(fm *eval.Frame, s string) { 444 out := fm.OutputChan() 445 for _, r := range []byte(s) { 446 out <- "0x" + strconv.FormatInt(int64(r), 16) 447 } 448 } 449 450 //elvdoc:fn to-title 451 // 452 // ```elvish 453 // str:to-title $str 454 // ``` 455 // 456 // Outputs `$str` with all Unicode letters mapped to their Unicode title case. 457 // 458 // ```elvish-transcript 459 // ~> str:to-title "her royal highness" 460 // ▶ HER ROYAL HIGHNESS 461 // ~> str:to-title "хлеб" 462 // ▶ ХЛЕБ 463 // ``` 464 465 //elvdoc:fn to-upper 466 // 467 // ```elvish 468 // str:to-upper 469 // ``` 470 // 471 // Outputs `$str` with all Unicode letters mapped to their upper-case 472 // equivalent. 473 // 474 // ```elvish-transcript 475 // ~> str:to-upper 'abc!123' 476 // ▶ ABC!123 477 // ``` 478 479 //elvdoc:fn trim 480 // 481 // ```elvish 482 // str:trim $str $cutset 483 // ``` 484 // 485 // Outputs `$str` with all leading and trailing Unicode code points contained 486 // in `$cutset` removed. 487 // 488 // ```elvish-transcript 489 // ~> str:trim "¡¡¡Hello, Elven!!!" "!¡" 490 // ▶ 'Hello, Elven' 491 // ``` 492 493 //elvdoc:fn trim-left 494 // 495 // ```elvish 496 // str:trim-left $str $cutset 497 // ``` 498 // 499 // Outputs `$str` with all leading Unicode code points contained in `$cutset` 500 // removed. To remove a prefix string use [`str:trim-prefix`](#strtrim-prefix). 501 // 502 // ```elvish-transcript 503 // ~> str:trim-left "¡¡¡Hello, Elven!!!" "!¡" 504 // ▶ 'Hello, Elven!!!' 505 // ``` 506 507 //elvdoc:fn trim-prefix 508 // 509 // ```elvish 510 // str:trim-prefix $str $prefix 511 // ``` 512 // 513 // Outputs `$str` minus the leading `$prefix` string. If `$str` doesn't begin 514 // with `$prefix`, `$str` is output unchanged. 515 // 516 // ```elvish-transcript 517 // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hello, " 518 // ▶ Elven!!! 519 // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hola, " 520 // ▶ '¡¡¡Hello, Elven!!!' 521 // ``` 522 523 //elvdoc:fn trim-right 524 // 525 // ```elvish 526 // str:trim-right $str $cutset 527 // ``` 528 // 529 // Outputs `$str` with all leading Unicode code points contained in `$cutset` 530 // removed. To remove a suffix string use [`str:trim-suffix`](#strtrim-suffix). 531 // 532 // ```elvish-transcript 533 // ~> str:trim-right "¡¡¡Hello, Elven!!!" "!¡" 534 // ▶ '¡¡¡Hello, Elven' 535 // ``` 536 537 //elvdoc:fn trim-space 538 // 539 // ```elvish 540 // str:trim-space $str 541 // ``` 542 // 543 // Outputs `$str` with all leading and trailing white space removed as defined 544 // by Unicode. 545 // 546 // ```elvish-transcript 547 // ~> str:trim-space " \t\n Hello, Elven \n\t\r\n" 548 // ▶ 'Hello, Elven' 549 // ``` 550 551 //elvdoc:fn trim-suffix 552 // 553 // ```elvish 554 // str:trim-suffix $str $suffix 555 // ``` 556 // 557 // Outputs `$str` minus the trailing `$suffix` string. If `$str` doesn't end 558 // with `$suffix`, `$str` is output unchanged. 559 // 560 // ```elvish-transcript 561 // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Elven!!!" 562 // ▶ ¡¡¡Hello 563 // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Klingons!!!" 564 // ▶ '¡¡¡Hello, Elven!!!' 565 // ``` 566 567 var Ns = eval.NsBuilder{}.AddGoFns("str:", fns).Ns() 568 569 var fns = map[string]interface{}{ 570 "compare": strings.Compare, 571 "contains": strings.Contains, 572 "contains-any": strings.ContainsAny, 573 "count": strings.Count, 574 "equal-fold": strings.EqualFold, 575 // TODO: Fields, FieldsFunc 576 "from-codepoints": fromCodepoints, 577 "from-utf8-bytes": fromUtf8Bytes, 578 "has-prefix": strings.HasPrefix, 579 "has-suffix": strings.HasSuffix, 580 "index": strings.Index, 581 "index-any": strings.IndexAny, 582 // TODO: IndexFunc 583 "join": join, 584 "last-index": strings.LastIndex, 585 // TODO: LastIndexFunc, Map, Repeat 586 "replace": replace, 587 "split": split, 588 // TODO: SplitAfter 589 "title": strings.Title, 590 "to-codepoints": toCodepoints, 591 "to-lower": strings.ToLower, 592 "to-title": strings.ToTitle, 593 "to-upper": strings.ToUpper, 594 "to-utf8-bytes": toUtf8Bytes, 595 // TODO: ToLowerSpecial, ToTitleSpecial, ToUpperSpecial 596 "trim": strings.Trim, 597 "trim-left": strings.TrimLeft, 598 "trim-right": strings.TrimRight, 599 // TODO: TrimLeft,Right}Func 600 "trim-space": strings.TrimSpace, 601 "trim-prefix": strings.TrimPrefix, 602 "trim-suffix": strings.TrimSuffix, 603 }