github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/print_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"strings"
     5  	"testing"
     6  
     7  	"github.com/segmentio/parquet-go"
     8  )
     9  
    10  func TestPrintSchema(t *testing.T) {
    11  	tests := []struct {
    12  		node  parquet.Node
    13  		print string
    14  	}{
    15  		{
    16  			node: parquet.Group{"on": parquet.Leaf(parquet.BooleanType)},
    17  			print: `message Test {
    18  	required boolean on;
    19  }`,
    20  		},
    21  
    22  		{
    23  			node: parquet.Group{"name": parquet.String()},
    24  			print: `message Test {
    25  	required binary name (STRING);
    26  }`,
    27  		},
    28  
    29  		{
    30  			node: parquet.Group{"uuid": parquet.UUID()},
    31  			print: `message Test {
    32  	required fixed_len_byte_array(16) uuid (UUID);
    33  }`,
    34  		},
    35  
    36  		{
    37  			node: parquet.Group{"enum": parquet.Enum()},
    38  			print: `message Test {
    39  	required binary enum (ENUM);
    40  }`,
    41  		},
    42  
    43  		{
    44  			node: parquet.Group{"json": parquet.JSON()},
    45  			print: `message Test {
    46  	required binary json (JSON);
    47  }`,
    48  		},
    49  
    50  		{
    51  			node: parquet.Group{"bson": parquet.BSON()},
    52  			print: `message Test {
    53  	required binary bson (BSON);
    54  }`,
    55  		},
    56  
    57  		{
    58  			node: parquet.Group{"name": parquet.Optional(parquet.String())},
    59  			print: `message Test {
    60  	optional binary name (STRING);
    61  }`,
    62  		},
    63  
    64  		{
    65  			node: parquet.Group{"name": parquet.Repeated(parquet.String())},
    66  			print: `message Test {
    67  	repeated binary name (STRING);
    68  }`,
    69  		},
    70  
    71  		{
    72  			node: parquet.Group{"age": parquet.Int(8)},
    73  			print: `message Test {
    74  	required int32 age (INT(8,true));
    75  }`,
    76  		},
    77  
    78  		{
    79  			node: parquet.Group{"age": parquet.Int(16)},
    80  			print: `message Test {
    81  	required int32 age (INT(16,true));
    82  }`,
    83  		},
    84  
    85  		{
    86  			node: parquet.Group{"age": parquet.Int(32)},
    87  			print: `message Test {
    88  	required int32 age (INT(32,true));
    89  }`,
    90  		},
    91  
    92  		{
    93  			node: parquet.Group{"age": parquet.Int(64)},
    94  			print: `message Test {
    95  	required int64 age (INT(64,true));
    96  }`,
    97  		},
    98  
    99  		{
   100  			node: parquet.Group{"age": parquet.Uint(8)},
   101  			print: `message Test {
   102  	required int32 age (INT(8,false));
   103  }`,
   104  		},
   105  
   106  		{
   107  			node: parquet.Group{"age": parquet.Uint(16)},
   108  			print: `message Test {
   109  	required int32 age (INT(16,false));
   110  }`,
   111  		},
   112  
   113  		{
   114  			node: parquet.Group{"age": parquet.Uint(32)},
   115  			print: `message Test {
   116  	required int32 age (INT(32,false));
   117  }`,
   118  		},
   119  
   120  		{
   121  			node: parquet.Group{"age": parquet.Uint(64)},
   122  			print: `message Test {
   123  	required int64 age (INT(64,false));
   124  }`,
   125  		},
   126  
   127  		{
   128  			node: parquet.Group{"ratio": parquet.Leaf(parquet.FloatType)},
   129  			print: `message Test {
   130  	required float ratio;
   131  }`,
   132  		},
   133  
   134  		{
   135  			node: parquet.Group{"ratio": parquet.Leaf(parquet.DoubleType)},
   136  			print: `message Test {
   137  	required double ratio;
   138  }`,
   139  		},
   140  
   141  		{
   142  			node: parquet.Group{"cost": parquet.Decimal(0, 9, parquet.Int32Type)},
   143  			print: `message Test {
   144  	required int32 cost (DECIMAL(9,0));
   145  }`,
   146  		},
   147  
   148  		{
   149  			node: parquet.Group{"cost": parquet.Decimal(0, 18, parquet.Int64Type)},
   150  			print: `message Test {
   151  	required int64 cost (DECIMAL(18,0));
   152  }`,
   153  		},
   154  
   155  		{
   156  			node: parquet.Group{"date": parquet.Date()},
   157  			print: `message Test {
   158  	required int32 date (DATE);
   159  }`,
   160  		},
   161  
   162  		{
   163  			node: parquet.Group{"time": parquet.Time(parquet.Millisecond)},
   164  			print: `message Test {
   165  	required int32 time (TIME(isAdjustedToUTC=true,unit=MILLIS));
   166  }`,
   167  		},
   168  
   169  		{
   170  			node: parquet.Group{"time": parquet.Time(parquet.Microsecond)},
   171  			print: `message Test {
   172  	required int64 time (TIME(isAdjustedToUTC=true,unit=MICROS));
   173  }`,
   174  		},
   175  
   176  		{
   177  			node: parquet.Group{"time": parquet.Time(parquet.Nanosecond)},
   178  			print: `message Test {
   179  	required int64 time (TIME(isAdjustedToUTC=true,unit=NANOS));
   180  }`,
   181  		},
   182  
   183  		{
   184  			node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Millisecond)},
   185  			print: `message Test {
   186  	required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
   187  }`,
   188  		},
   189  
   190  		{
   191  			node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Microsecond)},
   192  			print: `message Test {
   193  	required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
   194  }`,
   195  		},
   196  
   197  		{
   198  			node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Nanosecond)},
   199  			print: `message Test {
   200  	required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=NANOS));
   201  }`,
   202  		},
   203  
   204  		{
   205  			node: parquet.Group{"names": parquet.List(parquet.String())},
   206  			print: `message Test {
   207  	required group names (LIST) {
   208  		repeated group list {
   209  			required binary element (STRING);
   210  		}
   211  	}
   212  }`,
   213  		},
   214  
   215  		{
   216  			node: parquet.Group{
   217  				"keys": parquet.List(
   218  					parquet.Group{
   219  						"key":   parquet.String(),
   220  						"value": parquet.String(),
   221  					},
   222  				),
   223  			},
   224  			print: `message Test {
   225  	required group keys (LIST) {
   226  		repeated group list {
   227  			required group element {
   228  				required binary key (STRING);
   229  				required binary value (STRING);
   230  			}
   231  		}
   232  	}
   233  }`,
   234  		},
   235  
   236  		{
   237  			node: parquet.Group{
   238  				"pairs": parquet.Map(
   239  					parquet.String(),
   240  					parquet.String(),
   241  				),
   242  			},
   243  			print: `message Test {
   244  	required group pairs (MAP) {
   245  		repeated group key_value {
   246  			required binary key (STRING);
   247  			required binary value (STRING);
   248  		}
   249  	}
   250  }`,
   251  		},
   252  	}
   253  
   254  	for _, test := range tests {
   255  		t.Run("", func(t *testing.T) {
   256  			buf := new(strings.Builder)
   257  
   258  			if err := parquet.PrintSchema(buf, "Test", test.node); err != nil {
   259  				t.Fatal(err)
   260  			}
   261  
   262  			if buf.String() != test.print {
   263  				t.Errorf("\nexpected:\n\n%s\n\nfound:\n\n%s\n", test.print, buf)
   264  			}
   265  		})
   266  	}
   267  }