github.com/markusbkk/elvish@v0.0.0-20231204143114-91dc52438621/pkg/mods/str/str.go

github.com/markusbkk/elvish@v0.0.0-20231204143114-91dc52438621/pkg/mods/str/str.go (about)

     1  // Package str exposes functionality from Go's strings package as an Elvish
     2  // module.
     3  package str
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"strconv"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  
    13  	"github.com/markusbkk/elvish/pkg/eval"
    14  	"github.com/markusbkk/elvish/pkg/eval/errs"
    15  	"github.com/markusbkk/elvish/pkg/eval/vals"
    16  )
    17  
    18  var Ns = eval.BuildNsNamed("str").
    19  	AddGoFns(map[string]interface{}{
    20  		"compare":      strings.Compare,
    21  		"contains":     strings.Contains,
    22  		"contains-any": strings.ContainsAny,
    23  		"count":        strings.Count,
    24  		"equal-fold":   strings.EqualFold,
    25  		// TODO: Fields, FieldsFunc
    26  		"from-codepoints": fromCodepoints,
    27  		"from-utf8-bytes": fromUtf8Bytes,
    28  		"has-prefix":      strings.HasPrefix,
    29  		"has-suffix":      strings.HasSuffix,
    30  		"index":           strings.Index,
    31  		"index-any":       strings.IndexAny,
    32  		// TODO: IndexFunc
    33  		"join":       join,
    34  		"last-index": strings.LastIndex,
    35  		// TODO: LastIndexFunc, Map, Repeat
    36  		"replace": replace,
    37  		"split":   split,
    38  		// TODO: SplitAfter
    39  		"title":         strings.Title,
    40  		"to-codepoints": toCodepoints,
    41  		"to-lower":      strings.ToLower,
    42  		"to-title":      strings.ToTitle,
    43  		"to-upper":      strings.ToUpper,
    44  		"to-utf8-bytes": toUtf8Bytes,
    45  		// TODO: ToLowerSpecial, ToTitleSpecial, ToUpperSpecial
    46  		"trim":       strings.Trim,
    47  		"trim-left":  strings.TrimLeft,
    48  		"trim-right": strings.TrimRight,
    49  		// TODO: TrimLeft,Right}Func
    50  		"trim-space":  strings.TrimSpace,
    51  		"trim-prefix": strings.TrimPrefix,
    52  		"trim-suffix": strings.TrimSuffix,
    53  	}).Ns()
    54  
    55  //elvdoc:fn compare
    56  //
    57  // ```elvish
    58  // str:compare $a $b
    59  // ```
    60  //
    61  // Compares two strings and output an integer that will be 0 if a == b,
    62  // -1 if a < b, and +1 if a > b.
    63  //
    64  // ```elvish-transcript
    65  // ~> str:compare a a
    66  // ▶ 0
    67  // ~> str:compare a b
    68  // ▶ -1
    69  // ~> str:compare b a
    70  // ▶ 1
    71  // ```
    72  
    73  //elvdoc:fn contains
    74  //
    75  // ```elvish
    76  // str:contains $str $substr
    77  // ```
    78  //
    79  // Outputs whether `$str` contains `$substr` as a substring.
    80  //
    81  // ```elvish-transcript
    82  // ~> str:contains abcd x
    83  // ▶ $false
    84  // ~> str:contains abcd bc
    85  // ▶ $true
    86  // ```
    87  
    88  //elvdoc:fn contains-any
    89  //
    90  // ```elvish
    91  // str:contains-any $str $chars
    92  // ```
    93  //
    94  // Outputs whether `$str` contains any Unicode code points in `$chars`.
    95  //
    96  // ```elvish-transcript
    97  // ~> str:contains-any abcd x
    98  // ▶ $false
    99  // ~> str:contains-any abcd xby
   100  // ▶ $true
   101  // ```
   102  
   103  //elvdoc:fn count
   104  //
   105  // ```elvish
   106  // str:count $str $substr
   107  // ```
   108  //
   109  // Outputs the number of non-overlapping instances of `$substr` in `$s`.
   110  // If `$substr` is an empty string, output 1 + the number of Unicode code
   111  // points in `$s`.
   112  //
   113  // ```elvish-transcript
   114  // ~> str:count abcdefabcdef bc
   115  // ▶ 2
   116  // ~> str:count abcdef ''
   117  // ▶ 7
   118  // ```
   119  
   120  //elvdoc:fn equal-fold
   121  //
   122  // ```elvish
   123  // str:equal-fold $str1 $str2
   124  // ```
   125  //
   126  // Outputs if `$str1` and `$str2`, interpreted as UTF-8 strings, are equal
   127  // under Unicode case-folding.
   128  //
   129  // ```elvish-transcript
   130  // ~> str:equal-fold ABC abc
   131  // ▶ $true
   132  // ~> str:equal-fold abc ab
   133  // ▶ $false
   134  // ```
   135  
   136  //elvdoc:fn from-codepoints
   137  //
   138  // ```elvish
   139  // str:from-codepoints $number...
   140  // ```
   141  //
   142  // Outputs a string consisting of the given Unicode codepoints. Example:
   143  //
   144  // ```elvish-transcript
   145  // ~> str:from-codepoints 0x61
   146  // ▶ a
   147  // ~> str:from-codepoints 0x4f60 0x597d
   148  // ▶ 你好
   149  // ```
   150  //
   151  // @cf str:to-codepoints
   152  
   153  func fromCodepoints(nums ...int) (string, error) {
   154  	var b bytes.Buffer
   155  	for _, num := range nums {
   156  		if num < 0 || num > unicode.MaxRune {
   157  			return "", errs.OutOfRange{
   158  				What:     "codepoint",
   159  				ValidLow: "0", ValidHigh: strconv.Itoa(unicode.MaxRune),
   160  				Actual: hex(num),
   161  			}
   162  		}
   163  		if !utf8.ValidRune(rune(num)) {
   164  			return "", errs.BadValue{
   165  				What:   "argument to str:from-codepoints",
   166  				Valid:  "valid Unicode codepoint",
   167  				Actual: hex(num),
   168  			}
   169  		}
   170  		b.WriteRune(rune(num))
   171  	}
   172  	return b.String(), nil
   173  }
   174  
   175  func hex(i int) string {
   176  	if i < 0 {
   177  		return "-0x" + strconv.FormatInt(-int64(i), 16)
   178  	}
   179  	return "0x" + strconv.FormatInt(int64(i), 16)
   180  }
   181  
   182  //elvdoc:fn from-utf8-bytes
   183  //
   184  // ```elvish
   185  // str:from-utf8-bytes $number...
   186  // ```
   187  //
   188  // Outputs a string consisting of the given Unicode bytes. Example:
   189  //
   190  // ```elvish-transcript
   191  // ~> str:from-utf8-bytes 0x61
   192  // ▶ a
   193  // ~> str:from-utf8-bytes 0xe4 0xbd 0xa0 0xe5 0xa5 0xbd
   194  // ▶ 你好
   195  // ```
   196  //
   197  // @cf str:to-utf8-bytes
   198  
   199  func fromUtf8Bytes(nums ...int) (string, error) {
   200  	var b bytes.Buffer
   201  	for _, num := range nums {
   202  		if num < 0 || num > 255 {
   203  			return "", errs.OutOfRange{
   204  				What:     "byte",
   205  				ValidLow: "0", ValidHigh: "255",
   206  				Actual: strconv.Itoa(num)}
   207  		}
   208  		b.WriteByte(byte(num))
   209  	}
   210  	if !utf8.Valid(b.Bytes()) {
   211  		return "", errs.BadValue{
   212  			What:   "arguments to str:from-utf8-bytes",
   213  			Valid:  "valid UTF-8 sequence",
   214  			Actual: fmt.Sprint(b.Bytes())}
   215  	}
   216  	return b.String(), nil
   217  }
   218  
   219  //elvdoc:fn has-prefix
   220  //
   221  // ```elvish
   222  // str:has-prefix $str $prefix
   223  // ```
   224  //
   225  // Outputs if `$str` begins with `$prefix`.
   226  //
   227  // ```elvish-transcript
   228  // ~> str:has-prefix abc ab
   229  // ▶ $true
   230  // ~> str:has-prefix abc bc
   231  // ▶ $false
   232  // ```
   233  
   234  //elvdoc:fn has-suffix
   235  //
   236  // ```elvish
   237  // str:has-suffix $str $suffix
   238  // ```
   239  //
   240  // Outputs if `$str` ends with `$suffix`.
   241  //
   242  // ```elvish-transcript
   243  // ~> str:has-suffix abc ab
   244  // ▶ $false
   245  // ~> str:has-suffix abc bc
   246  // ▶ $true
   247  // ```
   248  
   249  //elvdoc:fn index
   250  //
   251  // ```elvish
   252  // str:index $str $substr
   253  // ```
   254  //
   255  // Outputs the index of the first instance of `$substr` in `$str`, or -1
   256  // if `$substr` is not present in `$str`.
   257  //
   258  // ```elvish-transcript
   259  // ~> str:index abcd cd
   260  // ▶ 2
   261  // ~> str:index abcd xyz
   262  // ▶ -1
   263  // ```
   264  
   265  //elvdoc:fn index-any
   266  //
   267  // ```elvish
   268  // str:index-any $str $chars
   269  // ```
   270  //
   271  // Outputs the index of the first instance of any Unicode code point
   272  // from `$chars` in `$str`, or -1 if no Unicode code point from `$chars` is
   273  // present in `$str`.
   274  //
   275  // ```elvish-transcript
   276  // ~> str:index-any "chicken" "aeiouy"
   277  // ▶ 2
   278  // ~> str:index-any l33t aeiouy
   279  // ▶ -1
   280  // ```
   281  
   282  //elvdoc:fn join
   283  //
   284  // ```elvish
   285  // str:join $sep $input-list?
   286  // ```
   287  //
   288  // Joins inputs with `$sep`. Examples:
   289  //
   290  // ```elvish-transcript
   291  // ~> put lorem ipsum | str:join ,
   292  // ▶ lorem,ipsum
   293  // ~> str:join , [lorem ipsum]
   294  // ▶ lorem,ipsum
   295  // ~> str:join '' [lorem ipsum]
   296  // ▶ loremipsum
   297  // ~> str:join '...' [lorem ipsum]
   298  // ▶ lorem...ipsum
   299  // ```
   300  //
   301  // Etymology: Various languages,
   302  // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.join).
   303  //
   304  // @cf str:split
   305  
   306  func join(sep string, inputs eval.Inputs) (string, error) {
   307  	var buf bytes.Buffer
   308  	var errJoin error
   309  	first := true
   310  	inputs(func(v interface{}) {
   311  		if errJoin != nil {
   312  			return
   313  		}
   314  		if s, ok := v.(string); ok {
   315  			if first {
   316  				first = false
   317  			} else {
   318  				buf.WriteString(sep)
   319  			}
   320  			buf.WriteString(s)
   321  		} else {
   322  			errJoin = errs.BadValue{
   323  				What: "input to str:join", Valid: "string", Actual: vals.Kind(v)}
   324  		}
   325  	})
   326  	return buf.String(), errJoin
   327  }
   328  
   329  //elvdoc:fn last-index
   330  //
   331  // ```elvish
   332  // str:last-index $str $substr
   333  // ```
   334  //
   335  // Outputs the index of the last instance of `$substr` in `$str`,
   336  // or -1 if `$substr` is not present in `$str`.
   337  //
   338  // ```elvish-transcript
   339  // ~> str:last-index "elven speak elvish" elv
   340  // ▶ 12
   341  // ~> str:last-index "elven speak elvish" romulan
   342  // ▶ -1
   343  // ```
   344  
   345  //elvdoc:fn replace
   346  //
   347  // ```elvish
   348  // str:replace &max=-1 $old $repl $source
   349  // ```
   350  //
   351  // Replaces all occurrences of `$old` with `$repl` in `$source`. If `$max` is
   352  // non-negative, it determines the max number of substitutions.
   353  //
   354  // **Note**: This command does not support searching by regular expressions, `$old`
   355  // is always interpreted as a plain string. Use [re:replace](re.html#re:replace) if
   356  // you need to search by regex.
   357  
   358  type maxOpt struct{ Max int }
   359  
   360  func (o *maxOpt) SetDefaultOptions() { o.Max = -1 }
   361  
   362  func replace(opts maxOpt, old, repl, s string) string {
   363  	return strings.Replace(s, old, repl, opts.Max)
   364  }
   365  
   366  //elvdoc:fn split
   367  //
   368  // ```elvish
   369  // str:split &max=-1 $sep $string
   370  // ```
   371  //
   372  // Splits `$string` by `$sep`. If `$sep` is an empty string, split it into
   373  // codepoints.
   374  //
   375  // If the `&max` option is non-negative, stops after producing the maximum
   376  // number of results.
   377  //
   378  // ```elvish-transcript
   379  // ~> str:split , lorem,ipsum
   380  // ▶ lorem
   381  // ▶ ipsum
   382  // ~> str:split '' 你好
   383  // ▶ 你
   384  // ▶ 好
   385  // ~> str:split &max=2 ' ' 'a b c d'
   386  // ▶ a
   387  // ▶ 'b c d'
   388  // ```
   389  //
   390  // **Note**: This command does not support splitting by regular expressions,
   391  // `$sep` is always interpreted as a plain string. Use [re:split](re.html#re:split)
   392  // if you need to split by regex.
   393  //
   394  // Etymology: Various languages, in particular
   395  // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.split).
   396  //
   397  // @cf str:join
   398  
   399  func split(fm *eval.Frame, opts maxOpt, sep, s string) error {
   400  	out := fm.ValueOutput()
   401  	parts := strings.SplitN(s, sep, opts.Max)
   402  	for _, p := range parts {
   403  		err := out.Put(p)
   404  		if err != nil {
   405  			return err
   406  		}
   407  	}
   408  	return nil
   409  }
   410  
   411  //elvdoc:fn title
   412  //
   413  // ```elvish
   414  // str:title $str
   415  // ```
   416  //
   417  // Outputs `$str` with all Unicode letters that begin words mapped to their
   418  // Unicode title case.
   419  //
   420  // ```elvish-transcript
   421  // ~> str:title "her royal highness"
   422  // ▶ Her Royal Highness
   423  // ```
   424  
   425  //elvdoc:fn to-codepoints
   426  //
   427  // ```elvish
   428  // str:to-codepoints $string
   429  // ```
   430  //
   431  // Outputs value of each codepoint in `$string`, in hexadecimal. Examples:
   432  //
   433  // ```elvish-transcript
   434  // ~> str:to-codepoints a
   435  // ▶ 0x61
   436  // ~> str:to-codepoints 你好
   437  // ▶ 0x4f60
   438  // ▶ 0x597d
   439  // ```
   440  //
   441  // The output format is subject to change.
   442  //
   443  // @cf str:from-codepoints
   444  
   445  func toCodepoints(fm *eval.Frame, s string) error {
   446  	out := fm.ValueOutput()
   447  	for _, r := range s {
   448  		err := out.Put("0x" + strconv.FormatInt(int64(r), 16))
   449  		if err != nil {
   450  			return err
   451  		}
   452  	}
   453  	return nil
   454  }
   455  
   456  //elvdoc:fn to-lower
   457  //
   458  // ```elvish
   459  // str:to-lower $str
   460  // ```
   461  //
   462  // Outputs `$str` with all Unicode letters mapped to their lower-case
   463  // equivalent.
   464  //
   465  // ```elvish-transcript
   466  // ~> str:to-lower 'ABC!123'
   467  // ▶ abc!123
   468  // ```
   469  
   470  //elvdoc:fn to-utf8-bytes
   471  //
   472  // ```elvish
   473  // str:to-utf8-bytes $string
   474  // ```
   475  //
   476  // Outputs value of each byte in `$string`, in hexadecimal. Examples:
   477  //
   478  // ```elvish-transcript
   479  // ~> str:to-utf8-bytes a
   480  // ▶ 0x61
   481  // ~> str:to-utf8-bytes 你好
   482  // ▶ 0xe4
   483  // ▶ 0xbd
   484  // ▶ 0xa0
   485  // ▶ 0xe5
   486  // ▶ 0xa5
   487  // ▶ 0xbd
   488  // ```
   489  //
   490  // The output format is subject to change.
   491  //
   492  // @cf str:from-utf8-bytes
   493  
   494  func toUtf8Bytes(fm *eval.Frame, s string) error {
   495  	out := fm.ValueOutput()
   496  	for _, r := range []byte(s) {
   497  		err := out.Put("0x" + strconv.FormatInt(int64(r), 16))
   498  		if err != nil {
   499  			return err
   500  		}
   501  	}
   502  	return nil
   503  }
   504  
   505  //elvdoc:fn to-title
   506  //
   507  // ```elvish
   508  // str:to-title $str
   509  // ```
   510  //
   511  // Outputs `$str` with all Unicode letters mapped to their Unicode title case.
   512  //
   513  // ```elvish-transcript
   514  // ~> str:to-title "her royal highness"
   515  // ▶ HER ROYAL HIGHNESS
   516  // ~> str:to-title "хлеб"
   517  // ▶ ХЛЕБ
   518  // ```
   519  
   520  //elvdoc:fn to-upper
   521  //
   522  // ```elvish
   523  // str:to-upper
   524  // ```
   525  //
   526  // Outputs `$str` with all Unicode letters mapped to their upper-case
   527  // equivalent.
   528  //
   529  // ```elvish-transcript
   530  // ~> str:to-upper 'abc!123'
   531  // ▶ ABC!123
   532  // ```
   533  
   534  //elvdoc:fn trim
   535  //
   536  // ```elvish
   537  // str:trim $str $cutset
   538  // ```
   539  //
   540  // Outputs `$str` with all leading and trailing Unicode code points contained
   541  // in `$cutset` removed.
   542  //
   543  // ```elvish-transcript
   544  // ~> str:trim "¡¡¡Hello, Elven!!!" "!¡"
   545  // ▶ 'Hello, Elven'
   546  // ```
   547  
   548  //elvdoc:fn trim-left
   549  //
   550  // ```elvish
   551  // str:trim-left $str $cutset
   552  // ```
   553  //
   554  // Outputs `$str` with all leading Unicode code points contained in `$cutset`
   555  // removed. To remove a prefix string use [`str:trim-prefix`](#str:trim-prefix).
   556  //
   557  // ```elvish-transcript
   558  // ~> str:trim-left "¡¡¡Hello, Elven!!!" "!¡"
   559  // ▶ 'Hello, Elven!!!'
   560  // ```
   561  
   562  //elvdoc:fn trim-prefix
   563  //
   564  // ```elvish
   565  // str:trim-prefix $str $prefix
   566  // ```
   567  //
   568  // Outputs `$str` minus the leading `$prefix` string. If `$str` doesn't begin
   569  // with `$prefix`, `$str` is output unchanged.
   570  //
   571  // ```elvish-transcript
   572  // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hello, "
   573  // ▶ Elven!!!
   574  // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hola, "
   575  // ▶ '¡¡¡Hello, Elven!!!'
   576  // ```
   577  
   578  //elvdoc:fn trim-right
   579  //
   580  // ```elvish
   581  // str:trim-right $str $cutset
   582  // ```
   583  //
   584  // Outputs `$str` with all leading Unicode code points contained in `$cutset`
   585  // removed. To remove a suffix string use [`str:trim-suffix`](#str:trim-suffix).
   586  //
   587  // ```elvish-transcript
   588  // ~> str:trim-right "¡¡¡Hello, Elven!!!" "!¡"
   589  // ▶ '¡¡¡Hello, Elven'
   590  // ```
   591  
   592  //elvdoc:fn trim-space
   593  //
   594  // ```elvish
   595  // str:trim-space $str
   596  // ```
   597  //
   598  // Outputs `$str` with all leading and trailing white space removed as defined
   599  // by Unicode.
   600  //
   601  // ```elvish-transcript
   602  // ~> str:trim-space " \t\n Hello, Elven \n\t\r\n"
   603  // ▶ 'Hello, Elven'
   604  // ```
   605  
   606  //elvdoc:fn trim-suffix
   607  //
   608  // ```elvish
   609  // str:trim-suffix $str $suffix
   610  // ```
   611  //
   612  // Outputs `$str` minus the trailing `$suffix` string. If `$str` doesn't end
   613  // with `$suffix`, `$str` is output unchanged.
   614  //
   615  // ```elvish-transcript
   616  // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Elven!!!"
   617  // ▶ ¡¡¡Hello
   618  // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Klingons!!!"
   619  // ▶ '¡¡¡Hello, Elven!!!'
   620  // ```