github.com/google/skylark@v0.0.0-20181101142754-a5f7082aabed/testdata/string.sky (about)

     1  # Tests of Skylark 'string'
     2  
     3  load("assert.sky", "assert")
     4  
     5  # raw string literals:
     6  assert.eq(r'a\bc', "a\\bc")
     7  
     8  # truth
     9  assert.true("abc")
    10  assert.true("\0")
    11  assert.true(not "")
    12  
    13  # str + str
    14  assert.eq("a"+"b"+"c", "abc")
    15  
    16  # str * int,  int * str
    17  assert.eq("abc" * 0, "")
    18  assert.eq("abc" * -1, "")
    19  assert.eq("abc" * 1, "abc")
    20  assert.eq("abc" * 5, "abcabcabcabcabc")
    21  assert.eq(0 * "abc", "")
    22  assert.eq(-1 * "abc", "")
    23  assert.eq(1 * "abc", "abc")
    24  assert.eq(5 * "abc", "abcabcabcabcabc")
    25  assert.fails(lambda: 1.0 * "abc", "unknown.*float \\* str")
    26  
    27  # len
    28  assert.eq(len("Hello, 世界!"), 14)
    29  assert.eq(len("𐐷"), 4) # U+10437 has a 4-byte UTF-8 encoding (and a 2-code UTF-16 encoding)
    30  
    31  # chr & ord
    32  assert.eq(chr(65), "A")       # 1-byte UTF-8 encoding
    33  assert.eq(chr(1049), "Й")     # 2-byte UTF-8 encoding
    34  assert.eq(chr(0x1F63F), "😿") # 4-byte UTF-8 encoding
    35  assert.fails(lambda: chr(-1), "Unicode code point -1 out of range \\(<0\\)")
    36  assert.fails(lambda: chr(0x110000), "Unicode code point U\\+110000 out of range \\(>0x10FFFF\\)")
    37  assert.eq(ord("A"), 65)
    38  assert.eq(ord("Й"), 1049)
    39  assert.eq(ord("😿"), 0x1F63F)
    40  assert.eq(ord("Й"[1:]), 0xFFFD) # = Unicode replacement character
    41  assert.fails(lambda: ord("abc"), "string encodes 3 Unicode code points, want 1")
    42  assert.fails(lambda: ord(""), "string encodes 0 Unicode code points, want 1")
    43  assert.fails(lambda: ord("😿"[1:]), "string encodes 3 Unicode code points, want 1") # 3 x 0xFFFD
    44  
    45  # string.codepoint_ords
    46  assert.eq(type("abcЙ😿".codepoint_ords()), "codepoints")
    47  assert.eq(str("abcЙ😿".codepoint_ords()), '"abcЙ😿".codepoint_ords()')
    48  assert.eq(list("abcЙ😿".codepoint_ords()), [97, 98, 99, 1049, 128575])
    49  assert.eq(list(("A" + "😿Z"[1:]).codepoint_ords()), [ord("A"), 0xFFFD, 0xFFFD, 0xFFFD, ord("Z")])
    50  assert.eq(list("".codepoint_ords()), [])
    51  
    52  # string.codepoints
    53  assert.eq(type("abcЙ😿".codepoints()), "codepoints")
    54  assert.eq(str("abcЙ😿".codepoints()), '"abcЙ😿".codepoints()')
    55  assert.eq(list("abcЙ😿".codepoints()), ["a", "b", "c", "Й", "😿"])
    56  assert.eq(list(("A" + "😿Z"[1:]).codepoints()), ["A", "\x9f", "\x98", "\xbf", "Z"])
    57  assert.eq(list("".codepoints()), [])
    58  
    59  # string.elem_ords
    60  assert.eq(type("abcЙ😿".elem_ords()), "elems")
    61  assert.eq(str("abcЙ😿".elem_ords()), '"abcЙ😿".elem_ords()')
    62  assert.eq(list("abcЙ😿".elem_ords()), [97, 98, 99,  208, 153, 240, 159, 152, 191])
    63  assert.eq(list(("A" + "😿Z"[1:]).elem_ords()),  [65, 159, 152, 191, 90])
    64  assert.eq(list("".elem_ords()), [])
    65  
    66  # string.elems
    67  assert.eq(type("abcЙ😿".elems()), "elems")
    68  assert.eq(str("abcЙ😿".elems()), '"abcЙ😿".elems()')
    69  assert.eq(list("abcЙ😿".elems()),
    70            ["a", "b", "c", "\xd0", "\x99", "\xf0", "\x9f", "\x98", "\xbf"])
    71  assert.eq(list(("A" + "😿Z"[1:]).elems()),
    72            ["A", "\x9f", "\x98", "\xbf", "Z"])
    73  assert.eq(list("".elems()), [])
    74  
    75  # indexing, x[i]
    76  assert.eq("Hello, 世界!"[0], "H")
    77  assert.eq("Hello, 世界!"[7], "\xe4")
    78  assert.eq("Hello, 世界!"[13], "!")
    79  assert.fails(lambda: "abc"[-4], "out of range")
    80  assert.eq("abc"[-3], "a")
    81  assert.eq("abc"[-2], "b")
    82  assert.eq("abc"[-1], "c")
    83  assert.eq("abc"[0], "a")
    84  assert.eq("abc"[1], "b")
    85  assert.eq("abc"[2], "c")
    86  assert.fails(lambda: "abc"[4], "out of range")
    87  
    88  # x[i] = ...
    89  x2 = "abc"
    90  def f(): x2[1] = 'B'
    91  assert.fails(f, "string.*does not support.*assignment")
    92  
    93  # slicing, x[i:j]
    94  assert.eq("abc"[:], "abc")
    95  assert.eq("abc"[-4:], "abc")
    96  assert.eq("abc"[-3:], "abc")
    97  assert.eq("abc"[-2:], "bc")
    98  assert.eq("abc"[-1:], "c")
    99  assert.eq("abc"[0:], "abc")
   100  assert.eq("abc"[1:], "bc")
   101  assert.eq("abc"[2:], "c")
   102  assert.eq("abc"[3:], "")
   103  assert.eq("abc"[4:], "")
   104  assert.eq("abc"[:-4], "")
   105  assert.eq("abc"[:-3], "")
   106  assert.eq("abc"[:-2], "a")
   107  assert.eq("abc"[:-1], "ab")
   108  assert.eq("abc"[:0], "")
   109  assert.eq("abc"[:1], "a")
   110  assert.eq("abc"[:2], "ab")
   111  assert.eq("abc"[:3], "abc")
   112  assert.eq("abc"[:4], "abc")
   113  assert.eq("abc"[1:2], "b")
   114  assert.eq("abc"[2:1], "")
   115  # non-unit strides
   116  assert.eq("abcd"[0:4:1], "abcd")
   117  assert.eq("abcd"[::2], "ac")
   118  assert.eq("abcd"[1::2], "bd")
   119  assert.eq("abcd"[4:0:-1], "dcb")
   120  assert.eq("banana"[7::-2], "aaa")
   121  assert.eq("banana"[6::-2], "aaa")
   122  assert.eq("banana"[5::-2], "aaa")
   123  assert.eq("banana"[4::-2], "nnb")
   124  assert.eq("banana"[::-1], "ananab")
   125  assert.eq("banana"[None:None:-2], "aaa")
   126  assert.fails(lambda: "banana"[1.0::], "invalid start index: got float, want int")
   127  assert.fails(lambda: "banana"[:"":], "invalid end index: got string, want int")
   128  assert.fails(lambda: "banana"[:"":True], "got bool for slice step, want int")
   129  
   130  # in, not in
   131  assert.true("oo" in "food")
   132  assert.true("ox" not in "food")
   133  assert.true("" in "food")
   134  assert.true("" in "")
   135  assert.fails(lambda: 1 in "", "requires string as left operand")
   136  assert.fails(lambda: "" in 1, "unknown binary op: string in int")
   137  
   138  # ==, !=
   139  assert.eq("hello", "he"+"llo")
   140  assert.ne("hello", "Hello")
   141  
   142  # TODO(adonovan): ordered comparisons
   143  
   144  # string % tuple formatting
   145  assert.eq("A %d %x Z" % (123, 456), "A 123 1c8 Z")
   146  assert.eq("A %(foo)d %(bar)s Z" % {"foo": 123, "bar":"hi"}, "A 123 hi Z")
   147  assert.eq("%s %r" % ("hi", "hi"), 'hi "hi"') # TODO(adonovan): use ''-quotation
   148  assert.eq("%%d %d" % 1, "%d 1")
   149  assert.fails(lambda: "%d %d" % 1, "not enough arguments for format string")
   150  assert.fails(lambda: "%d %d" % (1, 2, 3), "too many arguments for format string")
   151  # %c
   152  assert.eq("%c" % 65, "A")
   153  assert.eq("%c" % 0x3b1, "α")
   154  assert.eq("%c" % "A", "A")
   155  assert.eq("%c" % "α", "α")
   156  assert.fails(lambda: "%c" % "abc", "requires a single-character string")
   157  assert.fails(lambda: "%c" % 65.0, "requires int or single-character string")
   158  assert.fails(lambda: "%c" % 10000000, "requires a valid Unicode code point")
   159  assert.fails(lambda: "%c" % -1, "requires a valid Unicode code point")
   160  # TODO(adonovan): more tests
   161  
   162  # str.format
   163  assert.eq("a{}b".format(123), "a123b")
   164  assert.eq("a{}b{}c{}d{}".format(1, 2, 3, 4), "a1b2c3d4")
   165  assert.eq("a{{b".format(), "a{b")
   166  assert.eq("a}}b".format(), "a}b")
   167  assert.eq("a{{b}}c".format(), "a{b}c")
   168  assert.eq("a{x}b{y}c{}".format(1, x=2, y=3), "a2b3c1")
   169  assert.fails(lambda: "a{z}b".format(x=1), "keyword z not found")
   170  assert.fails(lambda: "{-1}".format(1), "keyword -1 not found")
   171  assert.fails(lambda: "{-0}".format(1), "keyword -0 not found")
   172  assert.fails(lambda: '{0,1} and {1}'.format(1, 2), "keyword 0,1 not found")
   173  assert.fails(lambda: "a{123}b".format(), "tuple index out of range")
   174  assert.fails(lambda: "a{}b{}c".format(1), "tuple index out of range")
   175  assert.eq("a{010}b".format(0,1,2,3,4,5,6,7,8,9,10), "a10b") # index is decimal
   176  assert.fails(lambda: "a{}b{1}c".format(1, 2), "cannot switch from automatic field numbering to manual")
   177  assert.eq("a{!s}c".format("b"), "abc")
   178  assert.eq("a{!r}c".format("b"), r'a"b"c')
   179  assert.eq("a{x!r}c".format(x='b'), r'a"b"c')
   180  assert.fails(lambda: "{x!}".format(x=1), "unknown conversion")
   181  assert.fails(lambda: "{x!:}".format(x=1), "unknown conversion")
   182  assert.fails(lambda: '{a.b}'.format(1), "syntax x.y is not supported")
   183  assert.fails(lambda: '{a[0]}'.format(1), "syntax a\[i\] is not supported")
   184  assert.fails(lambda: '{ {} }'.format(1), "nested replacement fields not supported")
   185  assert.fails(lambda: '{{}'.format(1), "single '}' in format")
   186  assert.fails(lambda: '{}}'.format(1), "single '}' in format")
   187  assert.fails(lambda: '}}{'.format(1), "unmatched '{' in format")
   188  assert.fails(lambda: '}{{'.format(1), "single '}' in format")
   189  
   190  # str.split, str.rsplit
   191  assert.eq("a.b.c.d".split("."), ["a", "b", "c", "d"])
   192  assert.eq("a.b.c.d".rsplit("."), ["a", "b", "c", "d"])
   193  assert.eq("a.b.c.d".split(".", -1), ["a", "b", "c", "d"])
   194  assert.eq("a.b.c.d".rsplit(".", -1), ["a", "b", "c", "d"])
   195  assert.eq("a.b.c.d".split(".", 0), ["a.b.c.d"])
   196  assert.eq("a.b.c.d".rsplit(".", 0), ["a.b.c.d"])
   197  assert.eq("a.b.c.d".split(".", 1), ["a", "b.c.d"])
   198  assert.eq("a.b.c.d".rsplit(".", 1), ["a.b.c", "d"])
   199  assert.eq("a.b.c.d".split(".", 2), ["a", "b", "c.d"])
   200  assert.eq("a.b.c.d".rsplit(".", 2), ["a.b", "c", "d"])
   201  
   202  # {,r}split on white space:
   203  assert.eq(" a bc\n  def \t  ghi".split(), ["a", "bc", "def", "ghi"])
   204  assert.eq(" a bc\n  def \t  ghi".split(None), ["a", "bc", "def", "ghi"])
   205  assert.eq(" a bc\n  def \t  ghi".split(None, 0), ["a bc\n  def \t  ghi"])
   206  assert.eq(" a bc\n  def \t  ghi".rsplit(None, 0), [" a bc\n  def \t  ghi"])
   207  assert.eq(" a bc\n  def \t  ghi".split(None, 1), ["a", "bc\n  def \t  ghi"])
   208  assert.eq(" a bc\n  def \t  ghi".rsplit(None, 1), [" a bc\n  def", "ghi"])
   209  assert.eq(" a bc\n  def \t  ghi".split(None, 2), ["a", "bc", "def \t  ghi"])
   210  assert.eq(" a bc\n  def \t  ghi".rsplit(None, 2), [" a bc", "def", "ghi"])
   211  assert.eq(" a bc\n  def \t  ghi".split(None, 3), ["a", "bc", "def", "ghi"])
   212  assert.eq(" a bc\n  def \t  ghi".rsplit(None, 3), [" a", "bc", "def", "ghi"])
   213  assert.eq(" a bc\n  def \t  ghi".split(None, 4), ["a", "bc", "def", "ghi"])
   214  assert.eq(" a bc\n  def \t  ghi".rsplit(None, 4), ["a", "bc", "def", "ghi"])
   215  assert.eq(" a bc\n  def \t  ghi".rsplit(None, 5), ["a", "bc", "def", "ghi"])
   216  
   217  assert.eq(" a bc\n  def \t  ghi ".split(None, 0), ["a bc\n  def \t  ghi "])
   218  assert.eq(" a bc\n  def \t  ghi ".rsplit(None, 0), [" a bc\n  def \t  ghi"])
   219  assert.eq(" a bc\n  def \t  ghi ".split(None, 1), ["a", "bc\n  def \t  ghi "])
   220  assert.eq(" a bc\n  def \t  ghi ".rsplit(None, 1), [" a bc\n  def", "ghi"])
   221  
   222  # Observe the algorithmic difference when splitting on spaces versus other delimiters.
   223  assert.eq('--aa--bb--cc--'.split('-', 0), ['--aa--bb--cc--'])  # contrast this
   224  assert.eq('  aa  bb  cc  '.split(None, 0), ['aa  bb  cc  '])   #  with this
   225  assert.eq('--aa--bb--cc--'.rsplit('-', 0), ['--aa--bb--cc--']) # ditto this
   226  assert.eq('  aa  bb  cc  '.rsplit(None, 0), ['  aa  bb  cc'])  #  and this
   227  #
   228  assert.eq('--aa--bb--cc--'.split('-', 1), ['', '-aa--bb--cc--'])
   229  assert.eq('--aa--bb--cc--'.rsplit('-', 1), ['--aa--bb--cc-', ''])
   230  assert.eq('  aa  bb  cc  '.split(None, 1), ['aa', 'bb  cc  '])
   231  assert.eq('  aa  bb  cc  '.rsplit(None, 1), ['  aa  bb',  'cc'])
   232  #
   233  assert.eq('--aa--bb--cc--'.split('-', -1), ['', '', 'aa', '', 'bb', '', 'cc', '', ''])
   234  assert.eq('--aa--bb--cc--'.rsplit('-', -1), ['', '', 'aa', '', 'bb', '', 'cc', '', ''])
   235  assert.eq('  aa  bb  cc  '.split(None, -1), ['aa', 'bb', 'cc'])
   236  assert.eq('  aa  bb  cc  '.rsplit(None, -1), ['aa', 'bb', 'cc'])
   237  
   238  assert.eq("localhost:80".rsplit(":", 1)[-1], "80")
   239  
   240  # str.splitlines
   241  assert.eq("\nabc\ndef".splitlines(), ["", "abc", "def"])
   242  assert.eq("\nabc\ndef\n".splitlines(), ["", "abc", "def"])
   243  assert.eq("\nabc\ndef".splitlines(True), ["\n", "abc\n", "def"])
   244  assert.eq("\nabc\ndef\n".splitlines(True), ["\n", "abc\n", "def\n"])
   245  
   246  # str.{,l,r}strip
   247  assert.eq(" \tfoo\n ".strip(), "foo")
   248  assert.eq(" \tfoo\n ".lstrip(), "foo\n ")
   249  assert.eq(" \tfoo\n ".rstrip(), " \tfoo")
   250  assert.eq(" \tfoo\n ".strip(""), "foo")
   251  assert.eq(" \tfoo\n ".lstrip(""), "foo\n ")
   252  assert.eq(" \tfoo\n ".rstrip(""), " \tfoo")
   253  assert.eq("blah.h".strip("b.h"), "la")
   254  assert.eq("blah.h".lstrip("b.h"), "lah.h")
   255  assert.eq("blah.h".rstrip("b.h"), "bla")
   256  
   257  # str.count
   258  assert.eq("banana".count("a"), 3)
   259  assert.eq("banana".count("a", 2), 2)
   260  assert.eq("banana".count("a", -4, -2), 1)
   261  assert.eq("banana".count("a", 1, 4), 2)
   262  assert.eq("banana".count("a", 0, -100), 0)
   263  
   264  # str.{starts,ends}with
   265  assert.true("foo".endswith("oo"))
   266  assert.true(not "foo".endswith("x"))
   267  assert.true("foo".startswith("fo"))
   268  assert.true(not "foo".startswith("x"))
   269  assert.fails(lambda: "foo".startswith(1), "got int.*want string")
   270  #
   271  assert.true('abc'.startswith(('a', 'A')))
   272  assert.true('ABC'.startswith(('a', 'A')))
   273  assert.true(not 'ABC'.startswith(('b', 'B')))
   274  assert.fails(lambda: '123'.startswith((1, 2)), 'got int, for element 0')
   275  assert.fails(lambda: '123'.startswith(['3']), 'got list')
   276  #
   277  assert.true('abc'.endswith(('c', 'C')))
   278  assert.true('ABC'.endswith(('c', 'C')))
   279  assert.true(not 'ABC'.endswith(('b', 'B')))
   280  assert.fails(lambda: '123'.endswith((1, 2)), 'got int, for element 0')
   281  assert.fails(lambda: '123'.endswith(['3']), 'got list')
   282  # start/end
   283  assert.true('abc'.startswith('bc', 1))
   284  assert.true(not 'abc'.startswith('b', 999))
   285  assert.true('abc'.endswith('ab', None, -1))
   286  assert.true(not 'abc'.endswith('b', None, -999))
   287  
   288  # str.replace
   289  assert.eq("banana".replace("a", "o", 1), "bonana")
   290  assert.eq("banana".replace("a", "o"), "bonono")
   291  # TODO(adonovan): more tests
   292  
   293  # str.{,r}find
   294  assert.eq("foofoo".find("oo"), 1)
   295  assert.eq("foofoo".find("ox"), -1)
   296  assert.eq("foofoo".find("oo", 2), 4)
   297  assert.eq("foofoo".rfind("oo"), 4)
   298  assert.eq("foofoo".rfind("ox"), -1)
   299  assert.eq("foofoo".rfind("oo", 1, 4), 1)
   300  assert.eq("foofoo".find(""), 0)
   301  assert.eq("foofoo".rfind(""), 6)
   302  
   303  # str.{,r}partition
   304  assert.eq("foo/bar/wiz".partition("/"), ("foo", "/", "bar/wiz"))
   305  assert.eq("foo/bar/wiz".rpartition("/"), ("foo/bar", "/", "wiz"))
   306  assert.eq("foo/bar/wiz".partition("."), ("foo/bar/wiz", "", ""))
   307  assert.eq("foo/bar/wiz".rpartition("."), ("", "", "foo/bar/wiz"))
   308  assert.fails(lambda: "foo/bar/wiz".partition(""), "empty separator")
   309  assert.fails(lambda: "foo/bar/wiz".rpartition(""), "empty separator")
   310  
   311  assert.eq('?'.join(["foo", "a/b/c.go".rpartition("/")[0]]), 'foo?a/b')
   312  
   313  # str.is{alpha,...}
   314  def test_predicates():
   315    predicates = ["alnum", "alpha", "digit", "lower", "space", "title", "upper"]
   316    table = {
   317        "Hello, World!": "title",
   318        "hello, world!": "lower",
   319        "base64": "alnum lower",
   320        "HAL-9000": "upper",
   321        "Catch-22": "title",
   322        "": "",
   323        "\n\t\r": "space",
   324        "abc": "alnum alpha lower",
   325        "ABC": "alnum alpha upper",
   326        "123": "alnum digit",
   327    }
   328    for str, want in table.items():
   329      got = ' '.join([name for name in predicates if getattr(str, "is"+name)()])
   330      if got != want:
   331        assert.fail("%r matched [%s], want [%s]" % (str, want, got))
   332  test_predicates()
   333  
   334  # Strings are not iterable.
   335  # ok
   336  assert.eq(len("abc"), 3)                       # len
   337  assert.true("a" in "abc")                      # str in str
   338  assert.eq("abc"[1], "b")                       # indexing
   339  # not ok
   340  def for_string():
   341    for x in "abc":
   342      pass
   343  def args(*args): return args
   344  assert.fails(lambda: args(*"abc"), "must be iterable, not string") # varargs
   345  assert.fails(lambda: list("abc"), "got string, want iterable") # list(str)
   346  assert.fails(lambda: tuple("abc"), "got string, want iterable") # tuple(str)
   347  assert.fails(lambda: set("abc"), "got string, want iterable") # set(str)
   348  assert.fails(lambda: set() | "abc", "unknown binary op: set | string")  # set union
   349  assert.fails(lambda: enumerate("ab"), "got string, want iterable") # enumerate
   350  assert.fails(lambda: sorted("abc"), "got string, want iterable") # sorted
   351  assert.fails(lambda: [].extend("bc"), "got string, want iterable") # list.extend
   352  assert.fails(lambda: ",".join("abc"), "got string, want iterable") # string.join
   353  assert.fails(lambda: dict(["ab"]), "not iterable .*string") # dict
   354  # The Java implementation does not correctly reject the following cases:
   355  # (See Google Issue b/34385336)
   356  assert.fails(for_string, "string value is not iterable") # for loop
   357  assert.fails(lambda: [x for x in "abc"], "string value is not iterable") # comprehension
   358  assert.fails(lambda: all("abc"), "got string, want iterable") # all
   359  assert.fails(lambda: any("abc"), "got string, want iterable") # any
   360  assert.fails(lambda: reversed("abc"), "got string, want iterable") # reversed
   361  assert.fails(lambda: zip("ab", "cd"), "not iterable: string") # zip
   362  
   363  # TODO(adonovan): tests for: {,r}index join {capitalize,lower,title,upper}