github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/schema_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"testing"
     5  	"time"
     6  
     7  	"github.com/parquet-go/parquet-go"
     8  )
     9  
    10  func TestSchemaOf(t *testing.T) {
    11  	tests := []struct {
    12  		value interface{}
    13  		print string
    14  	}{
    15  		{
    16  			value: new(struct{ Name string }),
    17  			print: `message {
    18  	required binary Name (STRING);
    19  }`,
    20  		},
    21  
    22  		{
    23  			value: new(struct {
    24  				X int
    25  				Y int
    26  			}),
    27  			print: `message {
    28  	required int64 X (INT(64,true));
    29  	required int64 Y (INT(64,true));
    30  }`,
    31  		},
    32  
    33  		{
    34  			value: new(struct {
    35  				X float32
    36  				Y float32
    37  			}),
    38  			print: `message {
    39  	required float X;
    40  	required float Y;
    41  }`,
    42  		},
    43  
    44  		{
    45  			value: new(struct {
    46  				Inner struct {
    47  					FirstName string `parquet:"first_name"`
    48  					LastName  string `parquet:"last_name"`
    49  				} `parquet:"inner,optional"`
    50  			}),
    51  			print: `message {
    52  	optional group inner {
    53  		required binary first_name (STRING);
    54  		required binary last_name (STRING);
    55  	}
    56  }`,
    57  		},
    58  
    59  		{
    60  			value: new(struct {
    61  				Short float32 `parquet:"short,split"`
    62  				Long  float64 `parquet:"long,split"`
    63  			}),
    64  			print: `message {
    65  	required float short;
    66  	required double long;
    67  }`,
    68  		},
    69  
    70  		{
    71  			value: new(struct {
    72  				Inner struct {
    73  					FirstName          string `parquet:"first_name"`
    74  					ShouldNotBePresent string `parquet:"-"`
    75  				} `parquet:"inner,optional"`
    76  			}),
    77  			print: `message {
    78  	optional group inner {
    79  		required binary first_name (STRING);
    80  	}
    81  }`,
    82  		},
    83  
    84  		{
    85  			value: new(struct {
    86  				Inner struct {
    87  					FirstName    string `parquet:"first_name"`
    88  					MyNameIsDash string `parquet:"-,"`
    89  				} `parquet:"inner,optional"`
    90  			}),
    91  			print: `message {
    92  	optional group inner {
    93  		required binary first_name (STRING);
    94  		required binary - (STRING);
    95  	}
    96  }`,
    97  		},
    98  
    99  		{
   100  			value: new(struct {
   101  				Inner struct {
   102  					TimestampMillis int64 `parquet:"timestamp_millis,timestamp"`
   103  					TimestampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"`
   104  				} `parquet:"inner,optional"`
   105  			}),
   106  			print: `message {
   107  	optional group inner {
   108  		required int64 timestamp_millis (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
   109  		required int64 timestamp_micros (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
   110  	}
   111  }`,
   112  		},
   113  
   114  		{
   115  			value: new(struct {
   116  				Name string `parquet:",json"`
   117  			}),
   118  			print: `message {
   119  	required binary Name (JSON);
   120  }`,
   121  		},
   122  
   123  		{
   124  			value: new(struct {
   125  				A map[int64]string `parquet:"," parquet-key:",timestamp"`
   126  				B map[int64]string
   127  			}),
   128  			print: `message {
   129  	required group A (MAP) {
   130  		repeated group key_value {
   131  			required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
   132  			required binary value (STRING);
   133  		}
   134  	}
   135  	required group B (MAP) {
   136  		repeated group key_value {
   137  			required int64 key (INT(64,true));
   138  			required binary value (STRING);
   139  		}
   140  	}
   141  }`,
   142  		},
   143  
   144  		{
   145  			value: new(struct {
   146  				A map[int64]string `parquet:",optional" parquet-value:",json"`
   147  			}),
   148  			print: `message {
   149  	optional group A (MAP) {
   150  		repeated group key_value {
   151  			required int64 key (INT(64,true));
   152  			required binary value (JSON);
   153  		}
   154  	}
   155  }`,
   156  		},
   157  
   158  		{
   159  			value: new(struct {
   160  				A map[int64]string `parquet:",optional"`
   161  			}),
   162  			print: `message {
   163  	optional group A (MAP) {
   164  		repeated group key_value {
   165  			required int64 key (INT(64,true));
   166  			required binary value (STRING);
   167  		}
   168  	}
   169  }`,
   170  		},
   171  
   172  		{
   173  			value: new(struct {
   174  				A map[int64]string `parquet:",optional" parquet-value:",json" parquet-key:",timestamp(microsecond)"`
   175  			}),
   176  			print: `message {
   177  	optional group A (MAP) {
   178  		repeated group key_value {
   179  			required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
   180  			required binary value (JSON);
   181  		}
   182  	}
   183  }`,
   184  		},
   185  		{
   186  			value: new(struct {
   187  				A struct {
   188  					B string `parquet:"b,id(2)"`
   189  				} `parquet:"a,id(1)"`
   190  				C map[string]string `parquet:"c,id(3)"`
   191  				D []string          `parquet:"d,id(4)"`
   192  				E string            `parquet:"e,optional,id(5)"`
   193  			}),
   194  			print: `message {
   195  	required group a = 1 {
   196  		required binary b (STRING) = 2;
   197  	}
   198  	required group c (MAP) = 3 {
   199  		repeated group key_value {
   200  			required binary key (STRING);
   201  			required binary value (STRING);
   202  		}
   203  	}
   204  	repeated binary d (STRING) = 4;
   205  	optional binary e (STRING) = 5;
   206  }`,
   207  		},
   208  		{
   209  			value: new(struct {
   210  				Time time.Time `parquet:"time,delta"`
   211  			}),
   212  			print: `message {
   213  	required int64 time (TIMESTAMP(isAdjustedToUTC=true,unit=NANOS));
   214  }`,
   215  		},
   216  	}
   217  
   218  	for _, test := range tests {
   219  		t.Run("", func(t *testing.T) {
   220  			schema := parquet.SchemaOf(test.value)
   221  
   222  			if s := schema.String(); s != test.print {
   223  				t.Errorf("\nexpected:\n\n%s\n\nfound:\n\n%s\n", test.print, s)
   224  			}
   225  		})
   226  	}
   227  }