storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/data/column_test.go (about)

     1  /*
     2   * Minio Cloud Storage, (C) 2019 Minio, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package data
    18  
    19  import (
    20  	"reflect"
    21  	"testing"
    22  
    23  	"storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
    24  	"storj.io/minio/pkg/s3select/internal/parquet-go/schema"
    25  )
    26  
    27  var (
    28  	v10    = int32(10)
    29  	v20    = int32(20)
    30  	v30    = int32(30)
    31  	ten    = []byte("ten")
    32  	foo    = []byte("foo")
    33  	bar    = []byte("bar")
    34  	phone1 = []byte("1-234-567-8901")
    35  	phone2 = []byte("1-234-567-1098")
    36  	phone3 = []byte("1-111-222-3333")
    37  )
    38  
    39  func TestAddressBookExample(t *testing.T) {
    40  	// message AddressBook {
    41  	//   required string owner;
    42  	//   repeated string ownerPhoneNumbers;
    43  	//   repeated group contacts {
    44  	//     required string name;
    45  	//     optional string phoneNumber;
    46  	//   }
    47  	// }
    48  	t.Skip("Broken")
    49  
    50  	addressBook := schema.NewTree()
    51  	{
    52  		owner, err := schema.NewElement("owner", parquet.FieldRepetitionType_REQUIRED,
    53  			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
    54  			nil, nil, nil)
    55  		if err != nil {
    56  			t.Fatal(err)
    57  		}
    58  
    59  		ownerPhoneNumbers, err := schema.NewElement("ownerPhoneNumbers", parquet.FieldRepetitionType_OPTIONAL,
    60  			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
    61  			nil, nil, nil)
    62  		if err != nil {
    63  			t.Fatal(err)
    64  		}
    65  
    66  		ownerPhoneNumbersList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
    67  			nil, nil,
    68  			nil, nil, nil)
    69  		if err != nil {
    70  			t.Fatal(err)
    71  		}
    72  
    73  		ownerPhoneNumbersElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
    74  			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
    75  			nil, nil, nil)
    76  		if err != nil {
    77  			t.Fatal(err)
    78  		}
    79  
    80  		contacts, err := schema.NewElement("contacts", parquet.FieldRepetitionType_OPTIONAL,
    81  			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
    82  			nil, nil, nil)
    83  		if err != nil {
    84  			t.Fatal(err)
    85  		}
    86  
    87  		contactsList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
    88  			nil, nil,
    89  			nil, nil, nil)
    90  		if err != nil {
    91  			t.Fatal(err)
    92  		}
    93  
    94  		contactsElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
    95  			nil, nil,
    96  			nil, nil, nil)
    97  		if err != nil {
    98  			t.Fatal(err)
    99  		}
   100  
   101  		contactName, err := schema.NewElement("name", parquet.FieldRepetitionType_REQUIRED,
   102  			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
   103  			nil, nil, nil)
   104  		if err != nil {
   105  			t.Fatal(err)
   106  		}
   107  
   108  		contactPhoneNumber, err := schema.NewElement("phoneNumber", parquet.FieldRepetitionType_OPTIONAL,
   109  			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
   110  			nil, nil, nil)
   111  		if err != nil {
   112  			t.Fatal(err)
   113  		}
   114  		if err = addressBook.Set("owner", owner); err != nil {
   115  			t.Fatal(err)
   116  		}
   117  
   118  		if err = addressBook.Set("ownerPhoneNumbers", ownerPhoneNumbers); err != nil {
   119  			t.Fatal(err)
   120  		}
   121  		if err = addressBook.Set("ownerPhoneNumbers.list", ownerPhoneNumbersList); err != nil {
   122  			t.Fatal(err)
   123  		}
   124  		if err = addressBook.Set("ownerPhoneNumbers.list.element", ownerPhoneNumbersElement); err != nil {
   125  			t.Fatal(err)
   126  		}
   127  
   128  		if err = addressBook.Set("contacts", contacts); err != nil {
   129  			t.Fatal(err)
   130  		}
   131  		if err = addressBook.Set("contacts.list", contactsList); err != nil {
   132  			t.Fatal(err)
   133  		}
   134  		if err = addressBook.Set("contacts.list.element", contactsElement); err != nil {
   135  			t.Fatal(err)
   136  		}
   137  		if err = addressBook.Set("contacts.list.element.name", contactName); err != nil {
   138  			t.Fatal(err)
   139  		}
   140  		if err = addressBook.Set("contacts.list.element.phoneNumber", contactPhoneNumber); err != nil {
   141  			t.Fatal(err)
   142  		}
   143  	}
   144  
   145  	if _, _, err := addressBook.ToParquetSchema(); err != nil {
   146  		t.Fatal(err)
   147  	}
   148  
   149  	case2Data := `{
   150      "owner": "foo"
   151  }`
   152  	result2 := map[string]*Column{
   153  		"owner": {
   154  			parquetType:      parquet.Type_BYTE_ARRAY,
   155  			values:           []interface{}{foo},
   156  			definitionLevels: []int64{0},
   157  			repetitionLevels: []int64{0},
   158  		},
   159  		"ownerPhoneNumbers.list.element": {
   160  			parquetType:      parquet.Type_BYTE_ARRAY,
   161  			values:           []interface{}{nil},
   162  			definitionLevels: []int64{0},
   163  			repetitionLevels: []int64{0},
   164  		},
   165  		"contacts.list.element.name": {
   166  			parquetType:      parquet.Type_BYTE_ARRAY,
   167  			values:           []interface{}{nil},
   168  			definitionLevels: []int64{0},
   169  			repetitionLevels: []int64{0},
   170  		},
   171  	}
   172  
   173  	case3Data := `{
   174      "owner": "foo",
   175      "ownerPhoneNumbers": [
   176          "1-234-567-8901"
   177      ]
   178  }
   179  `
   180  	result3 := map[string]*Column{
   181  		"owner": {
   182  			parquetType:      parquet.Type_BYTE_ARRAY,
   183  			values:           []interface{}{foo},
   184  			definitionLevels: []int64{0},
   185  			repetitionLevels: []int64{0},
   186  		},
   187  		"ownerPhoneNumbers.list.element": {
   188  			parquetType:      parquet.Type_BYTE_ARRAY,
   189  			values:           []interface{}{phone1},
   190  			definitionLevels: []int64{2},
   191  			repetitionLevels: []int64{0},
   192  		},
   193  		"contacts.list.element.name": {
   194  			parquetType:      parquet.Type_BYTE_ARRAY,
   195  			values:           []interface{}{nil},
   196  			definitionLevels: []int64{0},
   197  			repetitionLevels: []int64{0},
   198  		},
   199  	}
   200  
   201  	case4Data := `{
   202      "owner": "foo",
   203      "ownerPhoneNumbers": [
   204          "1-234-567-8901",
   205          "1-234-567-1098"
   206      ]
   207  }
   208  `
   209  	result4 := map[string]*Column{
   210  		"owner": {
   211  			parquetType:      parquet.Type_BYTE_ARRAY,
   212  			values:           []interface{}{foo},
   213  			definitionLevels: []int64{0},
   214  			repetitionLevels: []int64{0},
   215  		},
   216  		"ownerPhoneNumbers.list.element": {
   217  			parquetType:      parquet.Type_BYTE_ARRAY,
   218  			values:           []interface{}{phone1, phone2},
   219  			definitionLevels: []int64{2, 2},
   220  			repetitionLevels: []int64{0, 1},
   221  		},
   222  		"contacts.list.element.name": {
   223  			parquetType:      parquet.Type_BYTE_ARRAY,
   224  			values:           []interface{}{nil},
   225  			definitionLevels: []int64{0},
   226  			repetitionLevels: []int64{0},
   227  		},
   228  	}
   229  
   230  	case5Data := `{
   231      "contacts": [
   232          {
   233              "name": "bar"
   234          }
   235      ],
   236      "owner": "foo"
   237  }`
   238  	result5 := map[string]*Column{
   239  		"owner": {
   240  			parquetType:      parquet.Type_BYTE_ARRAY,
   241  			values:           []interface{}{foo},
   242  			definitionLevels: []int64{0},
   243  			repetitionLevels: []int64{0},
   244  		},
   245  		"ownerPhoneNumbers.list.element": {
   246  			parquetType:      parquet.Type_BYTE_ARRAY,
   247  			values:           []interface{}{nil},
   248  			definitionLevels: []int64{0},
   249  			repetitionLevels: []int64{0},
   250  		},
   251  		"contacts.list.element.name": {
   252  			parquetType:      parquet.Type_BYTE_ARRAY,
   253  			values:           []interface{}{bar},
   254  			definitionLevels: []int64{2},
   255  			repetitionLevels: []int64{0},
   256  		},
   257  		"contacts.list.element.phoneNumber": {
   258  			parquetType:      parquet.Type_BYTE_ARRAY,
   259  			values:           []interface{}{nil},
   260  			definitionLevels: []int64{2},
   261  			repetitionLevels: []int64{1},
   262  		},
   263  	}
   264  
   265  	case6Data := `{
   266      "contacts": [
   267          {
   268              "name": "bar",
   269              "phoneNumber": "1-111-222-3333"
   270          }
   271      ],
   272      "owner": "foo"
   273  }`
   274  	result6 := map[string]*Column{
   275  		"owner": {
   276  			parquetType:      parquet.Type_BYTE_ARRAY,
   277  			values:           []interface{}{foo},
   278  			definitionLevels: []int64{0},
   279  			repetitionLevels: []int64{0},
   280  		},
   281  		"ownerPhoneNumbers.list.element": {
   282  			parquetType:      parquet.Type_BYTE_ARRAY,
   283  			values:           []interface{}{nil},
   284  			definitionLevels: []int64{0},
   285  			repetitionLevels: []int64{0},
   286  		},
   287  		"contacts.list.element.name": {
   288  			parquetType:      parquet.Type_BYTE_ARRAY,
   289  			values:           []interface{}{bar},
   290  			definitionLevels: []int64{2},
   291  			repetitionLevels: []int64{0},
   292  		},
   293  		"contacts.list.element.phoneNumber": {
   294  			parquetType:      parquet.Type_BYTE_ARRAY,
   295  			values:           []interface{}{phone3},
   296  			definitionLevels: []int64{3},
   297  			repetitionLevels: []int64{1},
   298  		},
   299  	}
   300  
   301  	case7Data := `{
   302      "contacts": [
   303          {
   304              "name": "bar",
   305              "phoneNumber": "1-111-222-3333"
   306          }
   307      ],
   308      "owner": "foo",
   309      "ownerPhoneNumbers": [
   310          "1-234-567-8901",
   311          "1-234-567-1098"
   312      ]
   313  }`
   314  	result7 := map[string]*Column{
   315  		"owner": {
   316  			parquetType:      parquet.Type_BYTE_ARRAY,
   317  			values:           []interface{}{foo},
   318  			definitionLevels: []int64{0},
   319  			repetitionLevels: []int64{0},
   320  		},
   321  		"ownerPhoneNumbers.list.element": {
   322  			parquetType:      parquet.Type_BYTE_ARRAY,
   323  			values:           []interface{}{phone1, phone2},
   324  			definitionLevels: []int64{2, 2},
   325  			repetitionLevels: []int64{0, 1},
   326  		},
   327  		"contacts.list.element.name": {
   328  			parquetType:      parquet.Type_BYTE_ARRAY,
   329  			values:           []interface{}{bar},
   330  			definitionLevels: []int64{2},
   331  			repetitionLevels: []int64{0},
   332  		},
   333  		"contacts.list.element.phoneNumber": {
   334  			parquetType:      parquet.Type_BYTE_ARRAY,
   335  			values:           []interface{}{phone3},
   336  			definitionLevels: []int64{3},
   337  			repetitionLevels: []int64{1},
   338  		},
   339  	}
   340  
   341  	testCases := []struct {
   342  		data           string
   343  		expectedResult map[string]*Column
   344  		expectErr      bool
   345  	}{
   346  		{`{}`, nil, true}, // err: owner: nil value for required field
   347  		{case2Data, result2, false},
   348  		{case3Data, result3, false},
   349  		{case4Data, result4, false},
   350  		{case5Data, result5, false},
   351  		{case6Data, result6, false},
   352  		{case7Data, result7, false},
   353  	}
   354  
   355  	for i, testCase := range testCases {
   356  		result, err := UnmarshalJSON([]byte(testCase.data), addressBook)
   357  		expectErr := (err != nil)
   358  
   359  		if testCase.expectErr != expectErr {
   360  			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
   361  		}
   362  
   363  		if !testCase.expectErr {
   364  			if !reflect.DeepEqual(result, testCase.expectedResult) {
   365  				t.Errorf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
   366  			}
   367  		}
   368  	}
   369  }