github.com/google/grumpy@v0.0.0-20171122020858-3ec87959189c/runtime/unicode_test.go (about)

     1  // Copyright 2016 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package grumpy
    16  
    17  import (
    18  	"bytes"
    19  	"reflect"
    20  	"testing"
    21  	"unicode"
    22  )
    23  
    24  func TestUnicodeNewUnicode(t *testing.T) {
    25  	cases := []struct {
    26  		s    string
    27  		want []rune
    28  	}{
    29  		// Invalid utf-8 characters should not be present in unicode
    30  		// objects, but if that happens they're substituted with the
    31  		// replacement character U+FFFD.
    32  		{"foo\xffbar", []rune{'f', 'o', 'o', '\uFFFD', 'b', 'a', 'r'}},
    33  		// U+D800 is a surrogate that Python 2.x encodes to UTF-8 as
    34  		// \xed\xa0\x80 but Go treats each code unit as a bad rune.
    35  		{"\xed\xa0\x80", []rune{'\uFFFD', '\uFFFD', '\uFFFD'}},
    36  	}
    37  	for _, cas := range cases {
    38  		got := NewUnicode(cas.s).Value()
    39  		if !reflect.DeepEqual(got, cas.want) {
    40  			t.Errorf("NewUnicode(%q) = %v, want %v", cas.s, got, cas.want)
    41  		}
    42  	}
    43  }
    44  
    45  func TestUnicodeBinaryOps(t *testing.T) {
    46  	fun := wrapFuncForTest(func(f *Frame, fn func(f *Frame, v, w *Object) (*Object, *BaseException), v, w *Object) (*Object, *BaseException) {
    47  		return fn(f, v, w)
    48  	})
    49  	cases := []invokeTestCase{
    50  		{args: wrapArgs(Add, NewUnicode("foo"), NewUnicode("bar")), want: NewUnicode("foobar").ToObject()},
    51  		{args: wrapArgs(Add, NewUnicode("foo"), "bar"), want: NewUnicode("foobar").ToObject()},
    52  		{args: wrapArgs(Add, "foo", NewUnicode("bar")), want: NewUnicode("foobar").ToObject()},
    53  		{args: wrapArgs(Add, NewUnicode("baz"), NewUnicode("")), want: NewUnicode("baz").ToObject()},
    54  		{args: wrapArgs(Add, NewUnicode(""), newObject(ObjectType)), wantExc: mustCreateException(TypeErrorType, "coercing to Unicode: need string, object found")},
    55  		{args: wrapArgs(Add, None, NewUnicode("")), wantExc: mustCreateException(TypeErrorType, "unsupported operand type(s) for +: 'NoneType' and 'unicode'")},
    56  		{args: wrapArgs(Mul, NewUnicode(""), 10), want: NewUnicode("").ToObject()},
    57  		{args: wrapArgs(Mul, NewUnicode("foo"), -2), want: NewUnicode("").ToObject()},
    58  		{args: wrapArgs(Mul, NewUnicode("foobar"), 0), want: NewUnicode("").ToObject()},
    59  		{args: wrapArgs(Mul, NewUnicode("aloha"), 2), want: NewUnicode("alohaaloha").ToObject()},
    60  		{args: wrapArgs(Mul, 1, NewUnicode("baz")), want: NewUnicode("baz").ToObject()},
    61  		{args: wrapArgs(Mul, newObject(ObjectType), NewUnicode("qux")), wantExc: mustCreateException(TypeErrorType, "unsupported operand type(s) for *: 'object' and 'unicode'")},
    62  		{args: wrapArgs(Mul, NewUnicode("foo"), NewUnicode("")), wantExc: mustCreateException(TypeErrorType, "unsupported operand type(s) for *: 'unicode' and 'unicode'")},
    63  		{args: wrapArgs(Mul, NewUnicode("bar"), MaxInt), wantExc: mustCreateException(OverflowErrorType, "result too large")},
    64  	}
    65  	for _, cas := range cases {
    66  		if err := runInvokeTestCase(fun, &cas); err != "" {
    67  			t.Error(err)
    68  		}
    69  	}
    70  }
    71  
    72  func TestUnicodeCompare(t *testing.T) {
    73  	cases := []invokeTestCase{
    74  		{args: wrapArgs(NewUnicode(""), NewUnicode("")), want: compareAllResultEq},
    75  		{args: wrapArgs(NewUnicode(""), ""), want: compareAllResultEq},
    76  		{args: wrapArgs(NewStr(""), NewUnicode("")), want: compareAllResultEq},
    77  		{args: wrapArgs(NewUnicode("樂"), NewUnicode("樂")), want: compareAllResultEq},
    78  		{args: wrapArgs(NewUnicode("樂"), "樂"), want: compareAllResultEq},
    79  		{args: wrapArgs(NewStr("樂"), NewUnicode("樂")), want: compareAllResultEq},
    80  		{args: wrapArgs(NewUnicode("вол"), NewUnicode("волн")), want: compareAllResultLT},
    81  		{args: wrapArgs(NewUnicode("вол"), "волн"), want: compareAllResultLT},
    82  		{args: wrapArgs(NewStr("вол"), NewUnicode("волн")), want: compareAllResultLT},
    83  		{args: wrapArgs(NewUnicode("bar"), NewUnicode("baz")), want: compareAllResultLT},
    84  		{args: wrapArgs(NewUnicode("bar"), "baz"), want: compareAllResultLT},
    85  		{args: wrapArgs(NewStr("bar"), NewUnicode("baz")), want: compareAllResultLT},
    86  		{args: wrapArgs(NewUnicode("abc"), None), want: compareAllResultGT},
    87  	}
    88  	for _, cas := range cases {
    89  		if err := runInvokeTestCase(compareAll, &cas); err != "" {
    90  			t.Error(err)
    91  		}
    92  	}
    93  }
    94  
    95  func TestUnicodeContains(t *testing.T) {
    96  	cases := []invokeTestCase{
    97  		{args: wrapArgs(NewUnicode("foobar"), NewUnicode("foo")), want: True.ToObject()},
    98  		{args: wrapArgs(NewUnicode("abcdef"), NewUnicode("bar")), want: False.ToObject()},
    99  		{args: wrapArgs(NewUnicode(""), NewUnicode("")), want: True.ToObject()},
   100  		{args: wrapArgs(NewUnicode(""), 102.1), wantExc: mustCreateException(TypeErrorType, "coercing to Unicode: need string, float found")},
   101  	}
   102  	for _, cas := range cases {
   103  		if err := runInvokeMethodTestCase(UnicodeType, "__contains__", &cas); err != "" {
   104  			t.Error(err)
   105  		}
   106  	}
   107  }
   108  
   109  func TestUnicodeEncode(t *testing.T) {
   110  	cases := []invokeTestCase{
   111  		{args: wrapArgs(NewUnicode("foo")), want: NewStr("foo").ToObject()},
   112  		{args: wrapArgs(NewUnicode("foob\u0300ar"), "utf8"), want: NewStr("foob\u0300ar").ToObject()},
   113  		{args: wrapArgs(NewUnicode("foo"), "noexist", "strict"), wantExc: mustCreateException(LookupErrorType, "unknown encoding: noexist")},
   114  		{args: wrapArgs(NewUnicodeFromRunes([]rune{'в', 'о', 'л', 'н'}), "utf8", "strict"), want: NewStr("\xd0\xb2\xd0\xbe\xd0\xbb\xd0\xbd").ToObject()},
   115  		{args: wrapArgs(NewUnicodeFromRunes([]rune{'\xff'}), "utf8"), want: NewStr("\xc3\xbf").ToObject()},
   116  		{args: wrapArgs(NewUnicodeFromRunes([]rune{0xD800})), wantExc: mustCreateException(UnicodeEncodeErrorType, `'utf8' codec can't encode character \ud800 in position 0`)},
   117  		{args: wrapArgs(NewUnicodeFromRunes([]rune{unicode.MaxRune + 1}), "utf8", "replace"), want: NewStr("\xef\xbf\xbd").ToObject()},
   118  		{args: wrapArgs(NewUnicodeFromRunes([]rune{0xFFFFFF}), "utf8", "ignore"), want: NewStr("").ToObject()},
   119  		{args: wrapArgs(NewUnicodeFromRunes([]rune{0xFFFFFF}), "utf8", "noexist"), wantExc: mustCreateException(LookupErrorType, "unknown error handler name 'noexist'")},
   120  	}
   121  	for _, cas := range cases {
   122  		if err := runInvokeMethodTestCase(UnicodeType, "encode", &cas); err != "" {
   123  			t.Error(err)
   124  		}
   125  	}
   126  }
   127  
   128  func TestUnicodeGetItem(t *testing.T) {
   129  	cases := []invokeTestCase{
   130  		{args: wrapArgs(NewUnicode("bar"), 1), want: NewUnicode("a").ToObject()},
   131  		{args: wrapArgs(NewUnicode("foo"), 3.14), wantExc: mustCreateException(TypeErrorType, "unicode indices must be integers or slice, not float")},
   132  		{args: wrapArgs(NewUnicode("baz"), -1), want: NewUnicode("z").ToObject()},
   133  		{args: wrapArgs(NewUnicode("baz"), -4), wantExc: mustCreateException(IndexErrorType, "index out of range")},
   134  		{args: wrapArgs(NewUnicode(""), 0), wantExc: mustCreateException(IndexErrorType, "index out of range")},
   135  		{args: wrapArgs(NewUnicode("foo"), 3), wantExc: mustCreateException(IndexErrorType, "index out of range")},
   136  		{args: wrapArgs(NewUnicode("bar"), newTestSlice(None, 2)), want: NewStr("ba").ToObject()},
   137  		{args: wrapArgs(NewUnicode("bar"), newTestSlice(1, 3)), want: NewStr("ar").ToObject()},
   138  		{args: wrapArgs(NewUnicode("bar"), newTestSlice(1, None)), want: NewStr("ar").ToObject()},
   139  		{args: wrapArgs(NewUnicode("foobarbaz"), newTestSlice(1, 8, 2)), want: NewStr("obra").ToObject()},
   140  		{args: wrapArgs(NewUnicode("bar"), newTestSlice(1, 2, 0)), wantExc: mustCreateException(ValueErrorType, "slice step cannot be zero")},
   141  	}
   142  	for _, cas := range cases {
   143  		if err := runInvokeMethodTestCase(UnicodeType, "__getitem__", &cas); err != "" {
   144  			t.Error(err)
   145  		}
   146  	}
   147  }
   148  
   149  func TestUnicodeHash(t *testing.T) {
   150  	truncateInt := func(i int64) int { return int(i) } // Support for 32bit systems.
   151  	cases := []invokeTestCase{
   152  		{args: wrapArgs(NewUnicode("foo")), want: NewInt(truncateInt(-4177197833195190597)).ToObject()},
   153  		{args: wrapArgs(NewUnicode("bar")), want: NewInt(truncateInt(327024216814240868)).ToObject()},
   154  		{args: wrapArgs(NewUnicode("baz")), want: NewInt(truncateInt(327024216814240876)).ToObject()},
   155  		{args: wrapArgs(NewUnicode("")), want: NewInt(0).ToObject()},
   156  	}
   157  	for _, cas := range cases {
   158  		if err := runInvokeMethodTestCase(UnicodeType, "__hash__", &cas); err != "" {
   159  			t.Error(err)
   160  		}
   161  	}
   162  }
   163  
   164  func TestUnicodeLen(t *testing.T) {
   165  	cases := []invokeTestCase{
   166  		{args: wrapArgs(NewUnicode("foo")), want: NewInt(3).ToObject()},
   167  		{args: wrapArgs(NewUnicode("")), want: NewInt(0).ToObject()},
   168  		{args: wrapArgs(NewUnicode("волн")), want: NewInt(4).ToObject()},
   169  	}
   170  	for _, cas := range cases {
   171  		if err := runInvokeMethodTestCase(UnicodeType, "__len__", &cas); err != "" {
   172  			t.Error(err)
   173  		}
   174  	}
   175  }
   176  
   177  func TestUnicodeMethods(t *testing.T) {
   178  	cases := []struct {
   179  		methodName string
   180  		args       Args
   181  		want       *Object
   182  		wantExc    *BaseException
   183  	}{
   184  		{"join", wrapArgs(NewUnicode(","), newTestList("foo", "bar")), NewUnicode("foo,bar").ToObject(), nil},
   185  		{"join", wrapArgs(NewUnicode(":"), newTestList(NewUnicode("foo"), "bar", NewUnicode("baz"))), NewUnicode("foo:bar:baz").ToObject(), nil},
   186  		{"join", wrapArgs(NewUnicode("nope"), NewTuple()), NewUnicode("").ToObject(), nil},
   187  		{"join", wrapArgs(NewUnicode("nope"), newTestTuple(NewUnicode("foo"))), NewUnicode("foo").ToObject(), nil},
   188  		{"join", wrapArgs(NewUnicode(","), newTestList("foo", "bar", 3.14)), nil, mustCreateException(TypeErrorType, "coercing to Unicode: need string, float found")},
   189  		{"strip", wrapArgs(NewUnicode("foo ")), NewStr("foo").ToObject(), nil},
   190  		{"strip", wrapArgs(NewUnicode(" foo bar ")), NewStr("foo bar").ToObject(), nil},
   191  		{"strip", wrapArgs(NewUnicode("foo foo"), "o"), NewStr("foo f").ToObject(), nil},
   192  		{"strip", wrapArgs(NewUnicode("foo bar"), "abr"), NewStr("foo ").ToObject(), nil},
   193  		{"strip", wrapArgs(NewUnicode("foo"), NewUnicode("o")), NewUnicode("f").ToObject(), nil},
   194  		{"strip", wrapArgs(NewUnicode("123"), 3), nil, mustCreateException(TypeErrorType, "coercing to Unicode: need string, int found")},
   195  		{"strip", wrapArgs(NewUnicode("foo"), "bar", "baz"), nil, mustCreateException(TypeErrorType, "'strip' of 'unicode' requires 2 arguments")},
   196  		{"strip", wrapArgs(NewUnicode("foo"), NewUnicode("o")), NewUnicode("f").ToObject(), nil},
   197  	}
   198  	for _, cas := range cases {
   199  		testCase := invokeTestCase{args: cas.args, want: cas.want, wantExc: cas.wantExc}
   200  		if err := runInvokeMethodTestCase(UnicodeType, cas.methodName, &testCase); err != "" {
   201  			t.Error(err)
   202  		}
   203  	}
   204  }
   205  func TestUnicodeNative(t *testing.T) {
   206  	fun := wrapFuncForTest(func(f *Frame, s *Unicode) (string, *BaseException) {
   207  		native, raised := ToNative(f, s.ToObject())
   208  		if raised != nil {
   209  			return "", raised
   210  		}
   211  		got, ok := native.Interface().(string)
   212  		if raised := Assert(f, GetBool(ok).ToObject(), nil); raised != nil {
   213  			return "", raised
   214  		}
   215  		return got, nil
   216  	})
   217  	cases := []invokeTestCase{
   218  		{args: wrapArgs(NewUnicode("волн")), want: NewStr("волн").ToObject()},
   219  	}
   220  	for _, cas := range cases {
   221  		if err := runInvokeTestCase(fun, &cas); err != "" {
   222  			t.Error(err)
   223  		}
   224  	}
   225  }
   226  
   227  func TestUnicodeRepr(t *testing.T) {
   228  	cases := []invokeTestCase{
   229  		{args: wrapArgs(NewUnicode("foo")), want: NewStr("u'foo'").ToObject()},
   230  		{args: wrapArgs(NewUnicode("on\nmultiple\nlines")), want: NewStr(`u'on\nmultiple\nlines'`).ToObject()},
   231  		{args: wrapArgs(NewUnicode("a\u0300")), want: NewStr(`u'a\u0300'`).ToObject()},
   232  		{args: wrapArgs(NewUnicodeFromRunes([]rune{'h', 'o', 'l', 0xFF})), want: NewStr(`u'hol\xff'`).ToObject()},
   233  		{args: wrapArgs(NewUnicodeFromRunes([]rune{0x10163})), want: NewStr(`u'\U00010163'`).ToObject()},
   234  	}
   235  	for _, cas := range cases {
   236  		if err := runInvokeMethodTestCase(UnicodeType, "__repr__", &cas); err != "" {
   237  			t.Error(err)
   238  		}
   239  	}
   240  }
   241  
   242  func TestUnicodeNew(t *testing.T) {
   243  	fooType := newTestClass("Foo", []*Type{ObjectType}, newStringDict(map[string]*Object{
   244  		"__unicode__": newBuiltinFunction("__unicode__", func(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
   245  			return NewStr("foo").ToObject(), nil
   246  		}).ToObject(),
   247  	}))
   248  	strictEqType := newTestClassStrictEq("StrictEq", UnicodeType)
   249  	cases := []invokeTestCase{
   250  		{args: wrapArgs(UnicodeType), want: NewUnicode("").ToObject()},
   251  		{args: wrapArgs(UnicodeType, NewUnicode("foo")), want: NewUnicode("foo").ToObject()},
   252  		{args: wrapArgs(UnicodeType, newObject(fooType)), want: NewUnicode("foo").ToObject()},
   253  		{args: wrapArgs(UnicodeType, "foobar"), want: NewUnicode("foobar").ToObject()},
   254  		{args: wrapArgs(UnicodeType, "foo\xffbar"), wantExc: mustCreateException(UnicodeDecodeErrorType, "'utf8' codec can't decode byte 0xff in position 3")},
   255  		{args: wrapArgs(UnicodeType, 123), want: NewUnicode("123").ToObject()},
   256  		{args: wrapArgs(UnicodeType, 3.14, "utf8"), wantExc: mustCreateException(TypeErrorType, "coercing to Unicode: need str, float found")},
   257  		{args: wrapArgs(UnicodeType, "baz", "utf8"), want: NewUnicode("baz").ToObject()},
   258  		{args: wrapArgs(UnicodeType, "baz", "utf-8"), want: NewUnicode("baz").ToObject()},
   259  		{args: wrapArgs(UnicodeType, "foo\xffbar", "utf_8"), wantExc: mustCreateException(UnicodeDecodeErrorType, "'utf_8' codec can't decode byte 0xff in position 3")},
   260  		{args: wrapArgs(UnicodeType, "foo\xffbar", "UTF8", "ignore"), want: NewUnicode("foobar").ToObject()},
   261  		{args: wrapArgs(UnicodeType, "foo\xffbar", "utf8", "replace"), want: NewUnicode("foo\ufffdbar").ToObject()},
   262  		{args: wrapArgs(UnicodeType, "\xff", "utf-8", "noexist"), wantExc: mustCreateException(LookupErrorType, "unknown error handler name 'noexist'")},
   263  		{args: wrapArgs(UnicodeType, "\xff", "utf16"), wantExc: mustCreateException(LookupErrorType, "unknown encoding: utf16")},
   264  		{args: wrapArgs(strictEqType, NewUnicode("foo")), want: (&Unicode{Object{typ: strictEqType}, bytes.Runes([]byte("foo"))}).ToObject()},
   265  	}
   266  	for _, cas := range cases {
   267  		if err := runInvokeMethodTestCase(UnicodeType, "__new__", &cas); err != "" {
   268  			t.Error(err)
   269  		}
   270  	}
   271  }
   272  
   273  func TestUnicodeNewNotSubtype(t *testing.T) {
   274  	cas := invokeTestCase{args: wrapArgs(IntType), wantExc: mustCreateException(TypeErrorType, "unicode.__new__(int): int is not a subtype of unicode")}
   275  	if err := runInvokeMethodTestCase(UnicodeType, "__new__", &cas); err != "" {
   276  		t.Error(err)
   277  	}
   278  }
   279  
   280  func TestUnicodeNewSubclass(t *testing.T) {
   281  	fooType := newTestClass("Foo", []*Type{UnicodeType}, NewDict())
   282  	bar := (&Unicode{Object{typ: fooType}, bytes.Runes([]byte("bar"))}).ToObject()
   283  	fun := wrapFuncForTest(func(f *Frame) *BaseException {
   284  		got, raised := UnicodeType.Call(f, []*Object{bar}, nil)
   285  		if raised != nil {
   286  			return raised
   287  		}
   288  		if got.typ != UnicodeType {
   289  			t.Errorf(`unicode(Foo("bar")) = %v, want u"bar"`, got)
   290  			return nil
   291  		}
   292  		ne, raised := NE(f, got, NewUnicode("bar").ToObject())
   293  		if raised != nil {
   294  			return raised
   295  		}
   296  		isTrue, raised := IsTrue(f, ne)
   297  		if raised != nil {
   298  			return raised
   299  		}
   300  		if isTrue {
   301  			t.Errorf(`unicode(Foo("bar")) = %v, want u"bar"`, got)
   302  		}
   303  		return nil
   304  	})
   305  	if err := runInvokeTestCase(fun, &invokeTestCase{want: None}); err != "" {
   306  		t.Error(err)
   307  	}
   308  }
   309  
   310  func TestUnicodeStr(t *testing.T) {
   311  	cases := []invokeTestCase{
   312  		{args: wrapArgs(NewUnicode("foo")), want: NewStr("foo").ToObject()},
   313  		{args: wrapArgs(NewUnicode("on\nmultiple\nlines")), want: NewStr("on\nmultiple\nlines").ToObject()},
   314  		{args: wrapArgs(NewUnicode("a\u0300")), want: NewStr("a\u0300").ToObject()},
   315  	}
   316  	for _, cas := range cases {
   317  		if err := runInvokeMethodTestCase(UnicodeType, "__str__", &cas); err != "" {
   318  			t.Error(err)
   319  		}
   320  	}
   321  }