github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/schema_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"testing"
     5  
     6  	"github.com/segmentio/parquet-go"
     7  )
     8  
     9  func TestSchemaOf(t *testing.T) {
    10  	tests := []struct {
    11  		value interface{}
    12  		print string
    13  	}{
    14  		{
    15  			value: new(struct{ Name string }),
    16  			print: `message {
    17  	required binary Name (STRING);
    18  }`,
    19  		},
    20  
    21  		{
    22  			value: new(struct {
    23  				X int
    24  				Y int
    25  			}),
    26  			print: `message {
    27  	required int64 X (INT(64,true));
    28  	required int64 Y (INT(64,true));
    29  }`,
    30  		},
    31  
    32  		{
    33  			value: new(struct {
    34  				X float32
    35  				Y float32
    36  			}),
    37  			print: `message {
    38  	required float X;
    39  	required float Y;
    40  }`,
    41  		},
    42  
    43  		{
    44  			value: new(struct {
    45  				Inner struct {
    46  					FirstName string `parquet:"first_name"`
    47  					LastName  string `parquet:"last_name"`
    48  				} `parquet:"inner,optional"`
    49  			}),
    50  			print: `message {
    51  	optional group inner {
    52  		required binary first_name (STRING);
    53  		required binary last_name (STRING);
    54  	}
    55  }`,
    56  		},
    57  
    58  		{
    59  			value: new(struct {
    60  				Short float32 `parquet:"short,split"`
    61  				Long  float64 `parquet:"long,split"`
    62  			}),
    63  			print: `message {
    64  	required float short;
    65  	required double long;
    66  }`,
    67  		},
    68  
    69  		{
    70  			value: new(struct {
    71  				Inner struct {
    72  					FirstName          string `parquet:"first_name"`
    73  					ShouldNotBePresent string `parquet:"-"`
    74  				} `parquet:"inner,optional"`
    75  			}),
    76  			print: `message {
    77  	optional group inner {
    78  		required binary first_name (STRING);
    79  	}
    80  }`,
    81  		},
    82  
    83  		{
    84  			value: new(struct {
    85  				Inner struct {
    86  					FirstName    string `parquet:"first_name"`
    87  					MyNameIsDash string `parquet:"-,"`
    88  				} `parquet:"inner,optional"`
    89  			}),
    90  			print: `message {
    91  	optional group inner {
    92  		required binary first_name (STRING);
    93  		required binary - (STRING);
    94  	}
    95  }`,
    96  		},
    97  
    98  		{
    99  			value: new(struct {
   100  				Inner struct {
   101  					TimestampMillis int64 `parquet:"timestamp_millis,timestamp"`
   102  					TimestampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"`
   103  				} `parquet:"inner,optional"`
   104  			}),
   105  			print: `message {
   106  	optional group inner {
   107  		required int64 timestamp_millis (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
   108  		required int64 timestamp_micros (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
   109  	}
   110  }`,
   111  		},
   112  
   113  		{
   114  			value: new(struct {
   115  				Name string `parquet:",json"`
   116  			}),
   117  			print: `message {
   118  	required binary Name (JSON);
   119  }`,
   120  		},
   121  
   122  		{
   123  			value: new(struct {
   124  				A map[int64]string `parquet:"," parquet-key:",timestamp"`
   125  				B map[int64]string
   126  			}),
   127  			print: `message {
   128  	required group A (MAP) {
   129  		repeated group key_value {
   130  			required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
   131  			required binary value (STRING);
   132  		}
   133  	}
   134  	required group B (MAP) {
   135  		repeated group key_value {
   136  			required int64 key (INT(64,true));
   137  			required binary value (STRING);
   138  		}
   139  	}
   140  }`,
   141  		},
   142  
   143  		{
   144  			value: new(struct {
   145  				A map[int64]string `parquet:",optional" parquet-value:",json"`
   146  			}),
   147  			print: `message {
   148  	optional group A (MAP) {
   149  		repeated group key_value {
   150  			required int64 key (INT(64,true));
   151  			required binary value (JSON);
   152  		}
   153  	}
   154  }`,
   155  		},
   156  
   157  		{
   158  			value: new(struct {
   159  				A map[int64]string `parquet:",optional"`
   160  			}),
   161  			print: `message {
   162  	optional group A (MAP) {
   163  		repeated group key_value {
   164  			required int64 key (INT(64,true));
   165  			required binary value (STRING);
   166  		}
   167  	}
   168  }`,
   169  		},
   170  
   171  		{
   172  			value: new(struct {
   173  				A map[int64]string `parquet:",optional" parquet-value:",json" parquet-key:",timestamp(microsecond)"`
   174  			}),
   175  			print: `message {
   176  	optional group A (MAP) {
   177  		repeated group key_value {
   178  			required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
   179  			required binary value (JSON);
   180  		}
   181  	}
   182  }`,
   183  		},
   184  	}
   185  
   186  	for _, test := range tests {
   187  		t.Run("", func(t *testing.T) {
   188  			schema := parquet.SchemaOf(test.value)
   189  
   190  			if s := schema.String(); s != test.print {
   191  				t.Errorf("\nexpected:\n\n%s\n\nfound:\n\n%s\n", test.print, s)
   192  			}
   193  		})
   194  	}
   195  }