github.com/elves/elvish@v0.15.0/pkg/eval/mods/str/str.go

github.com/elves/elvish@v0.15.0/pkg/eval/mods/str/str.go (about)

     1  // Package str exposes functionality from Go's strings package as an Elvish
     2  // module.
     3  package str
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"strconv"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  
    13  	"github.com/elves/elvish/pkg/eval"
    14  	"github.com/elves/elvish/pkg/eval/errs"
    15  	"github.com/elves/elvish/pkg/eval/vals"
    16  )
    17  
    18  //elvdoc:fn compare
    19  //
    20  // ```elvish
    21  // str:compare $a $b
    22  // ```
    23  //
    24  // Compares two strings and output an integer that will be 0 if a == b,
    25  // -1 if a < b, and +1 if a > b.
    26  //
    27  // ```elvish-transcript
    28  // ~> str:compare a a
    29  // ▶ 0
    30  // ~> str:compare a b
    31  // ▶ -1
    32  // ~> str:compare b a
    33  // ▶ 1
    34  // ```
    35  
    36  //elvdoc:fn contains
    37  //
    38  // ```elvish
    39  // str:contains $str $substr
    40  // ```
    41  //
    42  // Outputs whether `$str` contains `$substr` as a substring.
    43  //
    44  // ```elvish-transcript
    45  // ~> str:contains abcd x
    46  // ▶ $false
    47  // ~> str:contains abcd bc
    48  // ▶ $true
    49  // ```
    50  
    51  //elvdoc:fn contains-any
    52  //
    53  // ```elvish
    54  // str:contains-any $str $chars
    55  // ```
    56  //
    57  // Outputs whether `$str` contains any Unicode code points in `$chars`.
    58  //
    59  // ```elvish-transcript
    60  // ~> str:contains-any abcd x
    61  // ▶ $false
    62  // ~> str:contains-any abcd xby
    63  // ▶ $true
    64  // ```
    65  
    66  //elvdoc:fn count
    67  //
    68  // ```elvish
    69  // str:count $str $substr
    70  // ```
    71  //
    72  // Outputs the number of non-overlapping instances of `$substr` in `$s`.
    73  // If `$substr` is an empty string, output 1 + the number of Unicode code
    74  // points in `$s`.
    75  //
    76  // ```elvish-transcript
    77  // ~> str:count abcdefabcdef bc
    78  // ▶ 2
    79  // ~> str:count abcdef ''
    80  // ▶ 7
    81  // ```
    82  
    83  //elvdoc:fn equal-fold
    84  //
    85  // ```elvish
    86  // str:equal-fold $str1 $str2
    87  // ```
    88  //
    89  // Outputs if `$str1` and `$str2`, interpreted as UTF-8 strings, are equal
    90  // under Unicode case-folding.
    91  //
    92  // ```elvish-transcript
    93  // ~> str:equal-fold ABC abc
    94  // ▶ $true
    95  // ~> str:equal-fold abc ab
    96  // ▶ $false
    97  // ```
    98  
    99  //elvdoc:fn from-codepoints
   100  //
   101  // ```elvish
   102  // str:from-codepoints $number...
   103  // ```
   104  //
   105  // Outputs a string consisting of the given Unicode codepoints. Example:
   106  //
   107  // ```elvish-transcript
   108  // ~> str:from-codepoints 0x61
   109  // ▶ a
   110  // ~> str:from-codepoints 0x4f60 0x597d
   111  // ▶ 你好
   112  // ```
   113  //
   114  // @cf str:to-codepoints
   115  
   116  func fromCodepoints(nums ...int) (string, error) {
   117  	var b bytes.Buffer
   118  	for _, num := range nums {
   119  		if num < 0 || num > unicode.MaxRune {
   120  			return "", errs.OutOfRange{
   121  				What:     "codepoint",
   122  				ValidLow: "0", ValidHigh: strconv.Itoa(unicode.MaxRune),
   123  				Actual: hex(num),
   124  			}
   125  		}
   126  		if !utf8.ValidRune(rune(num)) {
   127  			return "", errs.BadValue{
   128  				What:   "argument to str:from-codepoints",
   129  				Valid:  "valid Unicode codepoint",
   130  				Actual: hex(num),
   131  			}
   132  		}
   133  		b.WriteRune(rune(num))
   134  	}
   135  	return b.String(), nil
   136  }
   137  
   138  func hex(i int) string {
   139  	if i < 0 {
   140  		return "-0x" + strconv.FormatInt(-int64(i), 16)
   141  	}
   142  	return "0x" + strconv.FormatInt(int64(i), 16)
   143  }
   144  
   145  //elvdoc:fn from-utf8-bytes
   146  //
   147  // ```elvish
   148  // str:from-utf8-bytes $number...
   149  // ```
   150  //
   151  // Outputs a string consisting of the given Unicode bytes. Example:
   152  //
   153  // ```elvish-transcript
   154  // ~> str:from-utf8-bytes 0x61
   155  // ▶ a
   156  // ~> str:from-utf8-bytes 0xe4 0xbd 0xa0 0xe5 0xa5 0xbd
   157  // ▶ 你好
   158  // ```
   159  //
   160  // @cf str:to-utf8-bytes
   161  
   162  func fromUtf8Bytes(nums ...int) (string, error) {
   163  	var b bytes.Buffer
   164  	for _, num := range nums {
   165  		if num < 0 || num > 255 {
   166  			return "", errs.OutOfRange{
   167  				What:     "byte",
   168  				ValidLow: "0", ValidHigh: "255",
   169  				Actual: strconv.Itoa(num)}
   170  		}
   171  		b.WriteByte(byte(num))
   172  	}
   173  	if !utf8.Valid(b.Bytes()) {
   174  		return "", errs.BadValue{
   175  			What:   "arguments to str:from-utf8-bytes",
   176  			Valid:  "valid UTF-8 sequence",
   177  			Actual: fmt.Sprint(b.Bytes())}
   178  	}
   179  	return b.String(), nil
   180  }
   181  
   182  //elvdoc:fn has-prefix
   183  //
   184  // ```elvish
   185  // str:has-prefix $str $prefix
   186  // ```
   187  //
   188  // Outputs if `$str` begins with `$prefix`.
   189  //
   190  // ```elvish-transcript
   191  // ~> str:has-prefix abc ab
   192  // ▶ $true
   193  // ~> str:has-prefix abc bc
   194  // ▶ $false
   195  // ```
   196  
   197  //elvdoc:fn has-suffix
   198  //
   199  // ```elvish
   200  // str:has-suffix $str $suffix
   201  // ```
   202  //
   203  // Outputs if `$str` ends with `$suffix`.
   204  //
   205  // ```elvish-transcript
   206  // ~> str:has-suffix abc ab
   207  // ▶ $false
   208  // ~> str:has-suffix abc bc
   209  // ▶ $true
   210  // ```
   211  
   212  //elvdoc:fn index
   213  //
   214  // ```elvish
   215  // str:index $str $substr
   216  // ```
   217  //
   218  // Outputs the index of the first instance of `$substr` in `$str`, or -1
   219  // if `$substr` is not present in `$str`.
   220  //
   221  // ```elvish-transcript
   222  // ~> str:index abcd cd
   223  // ▶ 2
   224  // ~> str:index abcd xyz
   225  // ▶ -1
   226  // ```
   227  
   228  //elvdoc:fn index-any
   229  //
   230  // ```elvish
   231  // str:index-any $str $chars
   232  // ```
   233  //
   234  // Outputs the index of the first instance of any Unicode code point
   235  // from `$chars` in `$str`, or -1 if no Unicode code point from `$chars` is
   236  // present in `$str`.
   237  //
   238  // ```elvish-transcript
   239  // ~> str:index-any "chicken" "aeiouy"
   240  // ▶ 2
   241  // ~> str:index-any l33t aeiouy
   242  // ▶ -1
   243  // ```
   244  
   245  //elvdoc:fn join
   246  //
   247  // ```elvish
   248  // str:join $sep $input-list?
   249  // ```
   250  //
   251  // Joins inputs with `$sep`. Examples:
   252  //
   253  // ```elvish-transcript
   254  // ~> put lorem ipsum | str:join ,
   255  // ▶ lorem,ipsum
   256  // ~> str:join , [lorem ipsum]
   257  // ▶ lorem,ipsum
   258  // ~> str:join '' [lorem ipsum]
   259  // ▶ loremipsum
   260  // ~> str:join '...' [lorem ipsum]
   261  // ▶ lorem...ipsum
   262  // ```
   263  //
   264  // Etymology: Various languages,
   265  // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.join).
   266  //
   267  // @cf str:split
   268  
   269  func join(sep string, inputs eval.Inputs) (string, error) {
   270  	var buf bytes.Buffer
   271  	var errJoin error
   272  	first := true
   273  	inputs(func(v interface{}) {
   274  		if errJoin != nil {
   275  			return
   276  		}
   277  		if s, ok := v.(string); ok {
   278  			if first {
   279  				first = false
   280  			} else {
   281  				buf.WriteString(sep)
   282  			}
   283  			buf.WriteString(s)
   284  		} else {
   285  			errJoin = errs.BadValue{
   286  				What: "input to str:join", Valid: "string", Actual: vals.Kind(v)}
   287  		}
   288  	})
   289  	return buf.String(), errJoin
   290  }
   291  
   292  //elvdoc:fn last-index
   293  //
   294  // ```elvish
   295  // str:last-index $str $substr
   296  // ```
   297  //
   298  // Outputs the index of the last instance of `$substr` in `$str`,
   299  // or -1 if `$substr` is not present in `$str`.
   300  //
   301  // ```elvish-transcript
   302  // ~> str:last-index "elven speak elvish" elv
   303  // ▶ 12
   304  // ~> str:last-index "elven speak elvish" romulan
   305  // ▶ -1
   306  // ```
   307  
   308  //elvdoc:fn replace
   309  //
   310  // ```elvish
   311  // str:replace &max=-1 $old $repl $source
   312  // ```
   313  //
   314  // Replaces all occurrences of `$old` with `$repl` in `$source`. If `$max` is
   315  // non-negative, it determines the max number of substitutions.
   316  //
   317  // **Note**: This command does not support searching by regular expressions, `$old`
   318  // is always interpreted as a plain string. Use [re:replace](re.html#replace) if
   319  // you need to search by regex.
   320  
   321  type maxOpt struct{ Max int }
   322  
   323  func (o *maxOpt) SetDefaultOptions() { o.Max = -1 }
   324  
   325  func replace(opts maxOpt, old, repl, s string) string {
   326  	return strings.Replace(s, old, repl, opts.Max)
   327  }
   328  
   329  //elvdoc:fn split
   330  //
   331  // ```elvish
   332  // str:split $sep $string
   333  // ```
   334  //
   335  // Splits `$string` by `$sep`. If `$sep` is an empty string, split it into
   336  // codepoints.
   337  //
   338  // ```elvish-transcript
   339  // ~> str:split , lorem,ipsum
   340  // ▶ lorem
   341  // ▶ ipsum
   342  // ~> str:split '' 你好
   343  // ▶ 你
   344  // ▶ 好
   345  // ```
   346  //
   347  // **Note**: This command does not support splitting by regular expressions,
   348  // `$sep` is always interpreted as a plain string. Use [re:split](re.html#split)
   349  // if you need to split by regex.
   350  //
   351  // Etymology: Various languages, in particular
   352  // [Python](https://docs.python.org/3.6/library/stdtypes.html#str.split).
   353  //
   354  // @cf str:join
   355  
   356  func split(fm *eval.Frame, opts maxOpt, sep, s string) {
   357  	out := fm.OutputChan()
   358  	parts := strings.SplitN(s, sep, opts.Max)
   359  	for _, p := range parts {
   360  		out <- p
   361  	}
   362  }
   363  
   364  //elvdoc:fn title
   365  //
   366  // ```elvish
   367  // str:title $str
   368  // ```
   369  //
   370  // Outputs `$str` with all Unicode letters that begin words mapped to their
   371  // Unicode title case.
   372  //
   373  // ```elvish-transcript
   374  // ~> str:title "her royal highness"
   375  // ▶ Her Royal Highness
   376  // ```
   377  
   378  //elvdoc:fn to-codepoints
   379  //
   380  // ```elvish
   381  // str:to-codepoints $string
   382  // ```
   383  //
   384  // Outputs value of each codepoint in `$string`, in hexadecimal. Examples:
   385  //
   386  // ```elvish-transcript
   387  // ~> str:to-codepoints a
   388  // ▶ 0x61
   389  // ~> str:to-codepoints 你好
   390  // ▶ 0x4f60
   391  // ▶ 0x597d
   392  // ```
   393  //
   394  // The output format is subject to change.
   395  //
   396  // @cf from-codepoints
   397  
   398  func toCodepoints(fm *eval.Frame, s string) {
   399  	out := fm.OutputChan()
   400  	for _, r := range s {
   401  		out <- "0x" + strconv.FormatInt(int64(r), 16)
   402  	}
   403  }
   404  
   405  //elvdoc:fn to-lower
   406  //
   407  // ```elvish
   408  // str:to-lower $str
   409  // ```
   410  //
   411  // Outputs `$str` with all Unicode letters mapped to their lower-case
   412  // equivalent.
   413  //
   414  // ```elvish-transcript
   415  // ~> str:to-lower 'ABC!123'
   416  // ▶ abc!123
   417  // ```
   418  
   419  //elvdoc:fn to-utf8-bytes
   420  //
   421  // ```elvish
   422  // str:to-utf8-bytes $string
   423  // ```
   424  //
   425  // Outputs value of each byte in `$string`, in hexadecimal. Examples:
   426  //
   427  // ```elvish-transcript
   428  // ~> str:to-utf8-bytes a
   429  // ▶ 0x61
   430  // ~> str:to-utf8-bytes 你好
   431  // ▶ 0xe4
   432  // ▶ 0xbd
   433  // ▶ 0xa0
   434  // ▶ 0xe5
   435  // ▶ 0xa5
   436  // ▶ 0xbd
   437  // ```
   438  //
   439  // The output format is subject to change.
   440  //
   441  // @cf from-utf8-bytes
   442  
   443  func toUtf8Bytes(fm *eval.Frame, s string) {
   444  	out := fm.OutputChan()
   445  	for _, r := range []byte(s) {
   446  		out <- "0x" + strconv.FormatInt(int64(r), 16)
   447  	}
   448  }
   449  
   450  //elvdoc:fn to-title
   451  //
   452  // ```elvish
   453  // str:to-title $str
   454  // ```
   455  //
   456  // Outputs `$str` with all Unicode letters mapped to their Unicode title case.
   457  //
   458  // ```elvish-transcript
   459  // ~> str:to-title "her royal highness"
   460  // ▶ HER ROYAL HIGHNESS
   461  // ~> str:to-title "хлеб"
   462  // ▶ ХЛЕБ
   463  // ```
   464  
   465  //elvdoc:fn to-upper
   466  //
   467  // ```elvish
   468  // str:to-upper
   469  // ```
   470  //
   471  // Outputs `$str` with all Unicode letters mapped to their upper-case
   472  // equivalent.
   473  //
   474  // ```elvish-transcript
   475  // ~> str:to-upper 'abc!123'
   476  // ▶ ABC!123
   477  // ```
   478  
   479  //elvdoc:fn trim
   480  //
   481  // ```elvish
   482  // str:trim $str $cutset
   483  // ```
   484  //
   485  // Outputs `$str` with all leading and trailing Unicode code points contained
   486  // in `$cutset` removed.
   487  //
   488  // ```elvish-transcript
   489  // ~> str:trim "¡¡¡Hello, Elven!!!" "!¡"
   490  // ▶ 'Hello, Elven'
   491  // ```
   492  
   493  //elvdoc:fn trim-left
   494  //
   495  // ```elvish
   496  // str:trim-left $str $cutset
   497  // ```
   498  //
   499  // Outputs `$str` with all leading Unicode code points contained in `$cutset`
   500  // removed. To remove a prefix string use [`str:trim-prefix`](#strtrim-prefix).
   501  //
   502  // ```elvish-transcript
   503  // ~> str:trim-left "¡¡¡Hello, Elven!!!" "!¡"
   504  // ▶ 'Hello, Elven!!!'
   505  // ```
   506  
   507  //elvdoc:fn trim-prefix
   508  //
   509  // ```elvish
   510  // str:trim-prefix $str $prefix
   511  // ```
   512  //
   513  // Outputs `$str` minus the leading `$prefix` string. If `$str` doesn't begin
   514  // with `$prefix`, `$str` is output unchanged.
   515  //
   516  // ```elvish-transcript
   517  // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hello, "
   518  // ▶ Elven!!!
   519  // ~> str:trim-prefix "¡¡¡Hello, Elven!!!" "¡¡¡Hola, "
   520  // ▶ '¡¡¡Hello, Elven!!!'
   521  // ```
   522  
   523  //elvdoc:fn trim-right
   524  //
   525  // ```elvish
   526  // str:trim-right $str $cutset
   527  // ```
   528  //
   529  // Outputs `$str` with all leading Unicode code points contained in `$cutset`
   530  // removed. To remove a suffix string use [`str:trim-suffix`](#strtrim-suffix).
   531  //
   532  // ```elvish-transcript
   533  // ~> str:trim-right "¡¡¡Hello, Elven!!!" "!¡"
   534  // ▶ '¡¡¡Hello, Elven'
   535  // ```
   536  
   537  //elvdoc:fn trim-space
   538  //
   539  // ```elvish
   540  // str:trim-space $str
   541  // ```
   542  //
   543  // Outputs `$str` with all leading and trailing white space removed as defined
   544  // by Unicode.
   545  //
   546  // ```elvish-transcript
   547  // ~> str:trim-space " \t\n Hello, Elven \n\t\r\n"
   548  // ▶ 'Hello, Elven'
   549  // ```
   550  
   551  //elvdoc:fn trim-suffix
   552  //
   553  // ```elvish
   554  // str:trim-suffix $str $suffix
   555  // ```
   556  //
   557  // Outputs `$str` minus the trailing `$suffix` string. If `$str` doesn't end
   558  // with `$suffix`, `$str` is output unchanged.
   559  //
   560  // ```elvish-transcript
   561  // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Elven!!!"
   562  // ▶ ¡¡¡Hello
   563  // ~> str:trim-suffix "¡¡¡Hello, Elven!!!" ", Klingons!!!"
   564  // ▶ '¡¡¡Hello, Elven!!!'
   565  // ```
   566  
   567  var Ns = eval.NsBuilder{}.AddGoFns("str:", fns).Ns()
   568  
   569  var fns = map[string]interface{}{
   570  	"compare":      strings.Compare,
   571  	"contains":     strings.Contains,
   572  	"contains-any": strings.ContainsAny,
   573  	"count":        strings.Count,
   574  	"equal-fold":   strings.EqualFold,
   575  	// TODO: Fields, FieldsFunc
   576  	"from-codepoints": fromCodepoints,
   577  	"from-utf8-bytes": fromUtf8Bytes,
   578  	"has-prefix":      strings.HasPrefix,
   579  	"has-suffix":      strings.HasSuffix,
   580  	"index":           strings.Index,
   581  	"index-any":       strings.IndexAny,
   582  	// TODO: IndexFunc
   583  	"join":       join,
   584  	"last-index": strings.LastIndex,
   585  	// TODO: LastIndexFunc, Map, Repeat
   586  	"replace": replace,
   587  	"split":   split,
   588  	// TODO: SplitAfter
   589  	"title":         strings.Title,
   590  	"to-codepoints": toCodepoints,
   591  	"to-lower":      strings.ToLower,
   592  	"to-title":      strings.ToTitle,
   593  	"to-upper":      strings.ToUpper,
   594  	"to-utf8-bytes": toUtf8Bytes,
   595  	// TODO: ToLowerSpecial, ToTitleSpecial, ToUpperSpecial
   596  	"trim":       strings.Trim,
   597  	"trim-left":  strings.TrimLeft,
   598  	"trim-right": strings.TrimRight,
   599  	// TODO: TrimLeft,Right}Func
   600  	"trim-space":  strings.TrimSpace,
   601  	"trim-prefix": strings.TrimPrefix,
   602  	"trim-suffix": strings.TrimSuffix,
   603  }