github.com/google/skylark@v0.0.0-20181101142754-a5f7082aabed/testdata/string.sky (about) 1 # Tests of Skylark 'string' 2 3 load("assert.sky", "assert") 4 5 # raw string literals: 6 assert.eq(r'a\bc', "a\\bc") 7 8 # truth 9 assert.true("abc") 10 assert.true("\0") 11 assert.true(not "") 12 13 # str + str 14 assert.eq("a"+"b"+"c", "abc") 15 16 # str * int, int * str 17 assert.eq("abc" * 0, "") 18 assert.eq("abc" * -1, "") 19 assert.eq("abc" * 1, "abc") 20 assert.eq("abc" * 5, "abcabcabcabcabc") 21 assert.eq(0 * "abc", "") 22 assert.eq(-1 * "abc", "") 23 assert.eq(1 * "abc", "abc") 24 assert.eq(5 * "abc", "abcabcabcabcabc") 25 assert.fails(lambda: 1.0 * "abc", "unknown.*float \\* str") 26 27 # len 28 assert.eq(len("Hello, 世界!"), 14) 29 assert.eq(len("𐐷"), 4) # U+10437 has a 4-byte UTF-8 encoding (and a 2-code UTF-16 encoding) 30 31 # chr & ord 32 assert.eq(chr(65), "A") # 1-byte UTF-8 encoding 33 assert.eq(chr(1049), "Й") # 2-byte UTF-8 encoding 34 assert.eq(chr(0x1F63F), "😿") # 4-byte UTF-8 encoding 35 assert.fails(lambda: chr(-1), "Unicode code point -1 out of range \\(<0\\)") 36 assert.fails(lambda: chr(0x110000), "Unicode code point U\\+110000 out of range \\(>0x10FFFF\\)") 37 assert.eq(ord("A"), 65) 38 assert.eq(ord("Й"), 1049) 39 assert.eq(ord("😿"), 0x1F63F) 40 assert.eq(ord("Й"[1:]), 0xFFFD) # = Unicode replacement character 41 assert.fails(lambda: ord("abc"), "string encodes 3 Unicode code points, want 1") 42 assert.fails(lambda: ord(""), "string encodes 0 Unicode code points, want 1") 43 assert.fails(lambda: ord("😿"[1:]), "string encodes 3 Unicode code points, want 1") # 3 x 0xFFFD 44 45 # string.codepoint_ords 46 assert.eq(type("abcЙ😿".codepoint_ords()), "codepoints") 47 assert.eq(str("abcЙ😿".codepoint_ords()), '"abcЙ😿".codepoint_ords()') 48 assert.eq(list("abcЙ😿".codepoint_ords()), [97, 98, 99, 1049, 128575]) 49 assert.eq(list(("A" + "😿Z"[1:]).codepoint_ords()), [ord("A"), 0xFFFD, 0xFFFD, 0xFFFD, ord("Z")]) 50 assert.eq(list("".codepoint_ords()), []) 51 52 # string.codepoints 53 assert.eq(type("abcЙ😿".codepoints()), "codepoints") 54 assert.eq(str("abcЙ😿".codepoints()), '"abcЙ😿".codepoints()') 55 assert.eq(list("abcЙ😿".codepoints()), ["a", "b", "c", "Й", "😿"]) 56 assert.eq(list(("A" + "😿Z"[1:]).codepoints()), ["A", "\x9f", "\x98", "\xbf", "Z"]) 57 assert.eq(list("".codepoints()), []) 58 59 # string.elem_ords 60 assert.eq(type("abcЙ😿".elem_ords()), "elems") 61 assert.eq(str("abcЙ😿".elem_ords()), '"abcЙ😿".elem_ords()') 62 assert.eq(list("abcЙ😿".elem_ords()), [97, 98, 99, 208, 153, 240, 159, 152, 191]) 63 assert.eq(list(("A" + "😿Z"[1:]).elem_ords()), [65, 159, 152, 191, 90]) 64 assert.eq(list("".elem_ords()), []) 65 66 # string.elems 67 assert.eq(type("abcЙ😿".elems()), "elems") 68 assert.eq(str("abcЙ😿".elems()), '"abcЙ😿".elems()') 69 assert.eq(list("abcЙ😿".elems()), 70 ["a", "b", "c", "\xd0", "\x99", "\xf0", "\x9f", "\x98", "\xbf"]) 71 assert.eq(list(("A" + "😿Z"[1:]).elems()), 72 ["A", "\x9f", "\x98", "\xbf", "Z"]) 73 assert.eq(list("".elems()), []) 74 75 # indexing, x[i] 76 assert.eq("Hello, 世界!"[0], "H") 77 assert.eq("Hello, 世界!"[7], "\xe4") 78 assert.eq("Hello, 世界!"[13], "!") 79 assert.fails(lambda: "abc"[-4], "out of range") 80 assert.eq("abc"[-3], "a") 81 assert.eq("abc"[-2], "b") 82 assert.eq("abc"[-1], "c") 83 assert.eq("abc"[0], "a") 84 assert.eq("abc"[1], "b") 85 assert.eq("abc"[2], "c") 86 assert.fails(lambda: "abc"[4], "out of range") 87 88 # x[i] = ... 89 x2 = "abc" 90 def f(): x2[1] = 'B' 91 assert.fails(f, "string.*does not support.*assignment") 92 93 # slicing, x[i:j] 94 assert.eq("abc"[:], "abc") 95 assert.eq("abc"[-4:], "abc") 96 assert.eq("abc"[-3:], "abc") 97 assert.eq("abc"[-2:], "bc") 98 assert.eq("abc"[-1:], "c") 99 assert.eq("abc"[0:], "abc") 100 assert.eq("abc"[1:], "bc") 101 assert.eq("abc"[2:], "c") 102 assert.eq("abc"[3:], "") 103 assert.eq("abc"[4:], "") 104 assert.eq("abc"[:-4], "") 105 assert.eq("abc"[:-3], "") 106 assert.eq("abc"[:-2], "a") 107 assert.eq("abc"[:-1], "ab") 108 assert.eq("abc"[:0], "") 109 assert.eq("abc"[:1], "a") 110 assert.eq("abc"[:2], "ab") 111 assert.eq("abc"[:3], "abc") 112 assert.eq("abc"[:4], "abc") 113 assert.eq("abc"[1:2], "b") 114 assert.eq("abc"[2:1], "") 115 # non-unit strides 116 assert.eq("abcd"[0:4:1], "abcd") 117 assert.eq("abcd"[::2], "ac") 118 assert.eq("abcd"[1::2], "bd") 119 assert.eq("abcd"[4:0:-1], "dcb") 120 assert.eq("banana"[7::-2], "aaa") 121 assert.eq("banana"[6::-2], "aaa") 122 assert.eq("banana"[5::-2], "aaa") 123 assert.eq("banana"[4::-2], "nnb") 124 assert.eq("banana"[::-1], "ananab") 125 assert.eq("banana"[None:None:-2], "aaa") 126 assert.fails(lambda: "banana"[1.0::], "invalid start index: got float, want int") 127 assert.fails(lambda: "banana"[:"":], "invalid end index: got string, want int") 128 assert.fails(lambda: "banana"[:"":True], "got bool for slice step, want int") 129 130 # in, not in 131 assert.true("oo" in "food") 132 assert.true("ox" not in "food") 133 assert.true("" in "food") 134 assert.true("" in "") 135 assert.fails(lambda: 1 in "", "requires string as left operand") 136 assert.fails(lambda: "" in 1, "unknown binary op: string in int") 137 138 # ==, != 139 assert.eq("hello", "he"+"llo") 140 assert.ne("hello", "Hello") 141 142 # TODO(adonovan): ordered comparisons 143 144 # string % tuple formatting 145 assert.eq("A %d %x Z" % (123, 456), "A 123 1c8 Z") 146 assert.eq("A %(foo)d %(bar)s Z" % {"foo": 123, "bar":"hi"}, "A 123 hi Z") 147 assert.eq("%s %r" % ("hi", "hi"), 'hi "hi"') # TODO(adonovan): use ''-quotation 148 assert.eq("%%d %d" % 1, "%d 1") 149 assert.fails(lambda: "%d %d" % 1, "not enough arguments for format string") 150 assert.fails(lambda: "%d %d" % (1, 2, 3), "too many arguments for format string") 151 # %c 152 assert.eq("%c" % 65, "A") 153 assert.eq("%c" % 0x3b1, "α") 154 assert.eq("%c" % "A", "A") 155 assert.eq("%c" % "α", "α") 156 assert.fails(lambda: "%c" % "abc", "requires a single-character string") 157 assert.fails(lambda: "%c" % 65.0, "requires int or single-character string") 158 assert.fails(lambda: "%c" % 10000000, "requires a valid Unicode code point") 159 assert.fails(lambda: "%c" % -1, "requires a valid Unicode code point") 160 # TODO(adonovan): more tests 161 162 # str.format 163 assert.eq("a{}b".format(123), "a123b") 164 assert.eq("a{}b{}c{}d{}".format(1, 2, 3, 4), "a1b2c3d4") 165 assert.eq("a{{b".format(), "a{b") 166 assert.eq("a}}b".format(), "a}b") 167 assert.eq("a{{b}}c".format(), "a{b}c") 168 assert.eq("a{x}b{y}c{}".format(1, x=2, y=3), "a2b3c1") 169 assert.fails(lambda: "a{z}b".format(x=1), "keyword z not found") 170 assert.fails(lambda: "{-1}".format(1), "keyword -1 not found") 171 assert.fails(lambda: "{-0}".format(1), "keyword -0 not found") 172 assert.fails(lambda: '{0,1} and {1}'.format(1, 2), "keyword 0,1 not found") 173 assert.fails(lambda: "a{123}b".format(), "tuple index out of range") 174 assert.fails(lambda: "a{}b{}c".format(1), "tuple index out of range") 175 assert.eq("a{010}b".format(0,1,2,3,4,5,6,7,8,9,10), "a10b") # index is decimal 176 assert.fails(lambda: "a{}b{1}c".format(1, 2), "cannot switch from automatic field numbering to manual") 177 assert.eq("a{!s}c".format("b"), "abc") 178 assert.eq("a{!r}c".format("b"), r'a"b"c') 179 assert.eq("a{x!r}c".format(x='b'), r'a"b"c') 180 assert.fails(lambda: "{x!}".format(x=1), "unknown conversion") 181 assert.fails(lambda: "{x!:}".format(x=1), "unknown conversion") 182 assert.fails(lambda: '{a.b}'.format(1), "syntax x.y is not supported") 183 assert.fails(lambda: '{a[0]}'.format(1), "syntax a\[i\] is not supported") 184 assert.fails(lambda: '{ {} }'.format(1), "nested replacement fields not supported") 185 assert.fails(lambda: '{{}'.format(1), "single '}' in format") 186 assert.fails(lambda: '{}}'.format(1), "single '}' in format") 187 assert.fails(lambda: '}}{'.format(1), "unmatched '{' in format") 188 assert.fails(lambda: '}{{'.format(1), "single '}' in format") 189 190 # str.split, str.rsplit 191 assert.eq("a.b.c.d".split("."), ["a", "b", "c", "d"]) 192 assert.eq("a.b.c.d".rsplit("."), ["a", "b", "c", "d"]) 193 assert.eq("a.b.c.d".split(".", -1), ["a", "b", "c", "d"]) 194 assert.eq("a.b.c.d".rsplit(".", -1), ["a", "b", "c", "d"]) 195 assert.eq("a.b.c.d".split(".", 0), ["a.b.c.d"]) 196 assert.eq("a.b.c.d".rsplit(".", 0), ["a.b.c.d"]) 197 assert.eq("a.b.c.d".split(".", 1), ["a", "b.c.d"]) 198 assert.eq("a.b.c.d".rsplit(".", 1), ["a.b.c", "d"]) 199 assert.eq("a.b.c.d".split(".", 2), ["a", "b", "c.d"]) 200 assert.eq("a.b.c.d".rsplit(".", 2), ["a.b", "c", "d"]) 201 202 # {,r}split on white space: 203 assert.eq(" a bc\n def \t ghi".split(), ["a", "bc", "def", "ghi"]) 204 assert.eq(" a bc\n def \t ghi".split(None), ["a", "bc", "def", "ghi"]) 205 assert.eq(" a bc\n def \t ghi".split(None, 0), ["a bc\n def \t ghi"]) 206 assert.eq(" a bc\n def \t ghi".rsplit(None, 0), [" a bc\n def \t ghi"]) 207 assert.eq(" a bc\n def \t ghi".split(None, 1), ["a", "bc\n def \t ghi"]) 208 assert.eq(" a bc\n def \t ghi".rsplit(None, 1), [" a bc\n def", "ghi"]) 209 assert.eq(" a bc\n def \t ghi".split(None, 2), ["a", "bc", "def \t ghi"]) 210 assert.eq(" a bc\n def \t ghi".rsplit(None, 2), [" a bc", "def", "ghi"]) 211 assert.eq(" a bc\n def \t ghi".split(None, 3), ["a", "bc", "def", "ghi"]) 212 assert.eq(" a bc\n def \t ghi".rsplit(None, 3), [" a", "bc", "def", "ghi"]) 213 assert.eq(" a bc\n def \t ghi".split(None, 4), ["a", "bc", "def", "ghi"]) 214 assert.eq(" a bc\n def \t ghi".rsplit(None, 4), ["a", "bc", "def", "ghi"]) 215 assert.eq(" a bc\n def \t ghi".rsplit(None, 5), ["a", "bc", "def", "ghi"]) 216 217 assert.eq(" a bc\n def \t ghi ".split(None, 0), ["a bc\n def \t ghi "]) 218 assert.eq(" a bc\n def \t ghi ".rsplit(None, 0), [" a bc\n def \t ghi"]) 219 assert.eq(" a bc\n def \t ghi ".split(None, 1), ["a", "bc\n def \t ghi "]) 220 assert.eq(" a bc\n def \t ghi ".rsplit(None, 1), [" a bc\n def", "ghi"]) 221 222 # Observe the algorithmic difference when splitting on spaces versus other delimiters. 223 assert.eq('--aa--bb--cc--'.split('-', 0), ['--aa--bb--cc--']) # contrast this 224 assert.eq(' aa bb cc '.split(None, 0), ['aa bb cc ']) # with this 225 assert.eq('--aa--bb--cc--'.rsplit('-', 0), ['--aa--bb--cc--']) # ditto this 226 assert.eq(' aa bb cc '.rsplit(None, 0), [' aa bb cc']) # and this 227 # 228 assert.eq('--aa--bb--cc--'.split('-', 1), ['', '-aa--bb--cc--']) 229 assert.eq('--aa--bb--cc--'.rsplit('-', 1), ['--aa--bb--cc-', '']) 230 assert.eq(' aa bb cc '.split(None, 1), ['aa', 'bb cc ']) 231 assert.eq(' aa bb cc '.rsplit(None, 1), [' aa bb', 'cc']) 232 # 233 assert.eq('--aa--bb--cc--'.split('-', -1), ['', '', 'aa', '', 'bb', '', 'cc', '', '']) 234 assert.eq('--aa--bb--cc--'.rsplit('-', -1), ['', '', 'aa', '', 'bb', '', 'cc', '', '']) 235 assert.eq(' aa bb cc '.split(None, -1), ['aa', 'bb', 'cc']) 236 assert.eq(' aa bb cc '.rsplit(None, -1), ['aa', 'bb', 'cc']) 237 238 assert.eq("localhost:80".rsplit(":", 1)[-1], "80") 239 240 # str.splitlines 241 assert.eq("\nabc\ndef".splitlines(), ["", "abc", "def"]) 242 assert.eq("\nabc\ndef\n".splitlines(), ["", "abc", "def"]) 243 assert.eq("\nabc\ndef".splitlines(True), ["\n", "abc\n", "def"]) 244 assert.eq("\nabc\ndef\n".splitlines(True), ["\n", "abc\n", "def\n"]) 245 246 # str.{,l,r}strip 247 assert.eq(" \tfoo\n ".strip(), "foo") 248 assert.eq(" \tfoo\n ".lstrip(), "foo\n ") 249 assert.eq(" \tfoo\n ".rstrip(), " \tfoo") 250 assert.eq(" \tfoo\n ".strip(""), "foo") 251 assert.eq(" \tfoo\n ".lstrip(""), "foo\n ") 252 assert.eq(" \tfoo\n ".rstrip(""), " \tfoo") 253 assert.eq("blah.h".strip("b.h"), "la") 254 assert.eq("blah.h".lstrip("b.h"), "lah.h") 255 assert.eq("blah.h".rstrip("b.h"), "bla") 256 257 # str.count 258 assert.eq("banana".count("a"), 3) 259 assert.eq("banana".count("a", 2), 2) 260 assert.eq("banana".count("a", -4, -2), 1) 261 assert.eq("banana".count("a", 1, 4), 2) 262 assert.eq("banana".count("a", 0, -100), 0) 263 264 # str.{starts,ends}with 265 assert.true("foo".endswith("oo")) 266 assert.true(not "foo".endswith("x")) 267 assert.true("foo".startswith("fo")) 268 assert.true(not "foo".startswith("x")) 269 assert.fails(lambda: "foo".startswith(1), "got int.*want string") 270 # 271 assert.true('abc'.startswith(('a', 'A'))) 272 assert.true('ABC'.startswith(('a', 'A'))) 273 assert.true(not 'ABC'.startswith(('b', 'B'))) 274 assert.fails(lambda: '123'.startswith((1, 2)), 'got int, for element 0') 275 assert.fails(lambda: '123'.startswith(['3']), 'got list') 276 # 277 assert.true('abc'.endswith(('c', 'C'))) 278 assert.true('ABC'.endswith(('c', 'C'))) 279 assert.true(not 'ABC'.endswith(('b', 'B'))) 280 assert.fails(lambda: '123'.endswith((1, 2)), 'got int, for element 0') 281 assert.fails(lambda: '123'.endswith(['3']), 'got list') 282 # start/end 283 assert.true('abc'.startswith('bc', 1)) 284 assert.true(not 'abc'.startswith('b', 999)) 285 assert.true('abc'.endswith('ab', None, -1)) 286 assert.true(not 'abc'.endswith('b', None, -999)) 287 288 # str.replace 289 assert.eq("banana".replace("a", "o", 1), "bonana") 290 assert.eq("banana".replace("a", "o"), "bonono") 291 # TODO(adonovan): more tests 292 293 # str.{,r}find 294 assert.eq("foofoo".find("oo"), 1) 295 assert.eq("foofoo".find("ox"), -1) 296 assert.eq("foofoo".find("oo", 2), 4) 297 assert.eq("foofoo".rfind("oo"), 4) 298 assert.eq("foofoo".rfind("ox"), -1) 299 assert.eq("foofoo".rfind("oo", 1, 4), 1) 300 assert.eq("foofoo".find(""), 0) 301 assert.eq("foofoo".rfind(""), 6) 302 303 # str.{,r}partition 304 assert.eq("foo/bar/wiz".partition("/"), ("foo", "/", "bar/wiz")) 305 assert.eq("foo/bar/wiz".rpartition("/"), ("foo/bar", "/", "wiz")) 306 assert.eq("foo/bar/wiz".partition("."), ("foo/bar/wiz", "", "")) 307 assert.eq("foo/bar/wiz".rpartition("."), ("", "", "foo/bar/wiz")) 308 assert.fails(lambda: "foo/bar/wiz".partition(""), "empty separator") 309 assert.fails(lambda: "foo/bar/wiz".rpartition(""), "empty separator") 310 311 assert.eq('?'.join(["foo", "a/b/c.go".rpartition("/")[0]]), 'foo?a/b') 312 313 # str.is{alpha,...} 314 def test_predicates(): 315 predicates = ["alnum", "alpha", "digit", "lower", "space", "title", "upper"] 316 table = { 317 "Hello, World!": "title", 318 "hello, world!": "lower", 319 "base64": "alnum lower", 320 "HAL-9000": "upper", 321 "Catch-22": "title", 322 "": "", 323 "\n\t\r": "space", 324 "abc": "alnum alpha lower", 325 "ABC": "alnum alpha upper", 326 "123": "alnum digit", 327 } 328 for str, want in table.items(): 329 got = ' '.join([name for name in predicates if getattr(str, "is"+name)()]) 330 if got != want: 331 assert.fail("%r matched [%s], want [%s]" % (str, want, got)) 332 test_predicates() 333 334 # Strings are not iterable. 335 # ok 336 assert.eq(len("abc"), 3) # len 337 assert.true("a" in "abc") # str in str 338 assert.eq("abc"[1], "b") # indexing 339 # not ok 340 def for_string(): 341 for x in "abc": 342 pass 343 def args(*args): return args 344 assert.fails(lambda: args(*"abc"), "must be iterable, not string") # varargs 345 assert.fails(lambda: list("abc"), "got string, want iterable") # list(str) 346 assert.fails(lambda: tuple("abc"), "got string, want iterable") # tuple(str) 347 assert.fails(lambda: set("abc"), "got string, want iterable") # set(str) 348 assert.fails(lambda: set() | "abc", "unknown binary op: set | string") # set union 349 assert.fails(lambda: enumerate("ab"), "got string, want iterable") # enumerate 350 assert.fails(lambda: sorted("abc"), "got string, want iterable") # sorted 351 assert.fails(lambda: [].extend("bc"), "got string, want iterable") # list.extend 352 assert.fails(lambda: ",".join("abc"), "got string, want iterable") # string.join 353 assert.fails(lambda: dict(["ab"]), "not iterable .*string") # dict 354 # The Java implementation does not correctly reject the following cases: 355 # (See Google Issue b/34385336) 356 assert.fails(for_string, "string value is not iterable") # for loop 357 assert.fails(lambda: [x for x in "abc"], "string value is not iterable") # comprehension 358 assert.fails(lambda: all("abc"), "got string, want iterable") # all 359 assert.fails(lambda: any("abc"), "got string, want iterable") # any 360 assert.fails(lambda: reversed("abc"), "got string, want iterable") # reversed 361 assert.fails(lambda: zip("ab", "cd"), "not iterable: string") # zip 362 363 # TODO(adonovan): tests for: {,r}index join {capitalize,lower,title,upper}