github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/schema_test.go (about) 1 package parquet_test 2 3 import ( 4 "testing" 5 "time" 6 7 "github.com/parquet-go/parquet-go" 8 ) 9 10 func TestSchemaOf(t *testing.T) { 11 tests := []struct { 12 value interface{} 13 print string 14 }{ 15 { 16 value: new(struct{ Name string }), 17 print: `message { 18 required binary Name (STRING); 19 }`, 20 }, 21 22 { 23 value: new(struct { 24 X int 25 Y int 26 }), 27 print: `message { 28 required int64 X (INT(64,true)); 29 required int64 Y (INT(64,true)); 30 }`, 31 }, 32 33 { 34 value: new(struct { 35 X float32 36 Y float32 37 }), 38 print: `message { 39 required float X; 40 required float Y; 41 }`, 42 }, 43 44 { 45 value: new(struct { 46 Inner struct { 47 FirstName string `parquet:"first_name"` 48 LastName string `parquet:"last_name"` 49 } `parquet:"inner,optional"` 50 }), 51 print: `message { 52 optional group inner { 53 required binary first_name (STRING); 54 required binary last_name (STRING); 55 } 56 }`, 57 }, 58 59 { 60 value: new(struct { 61 Short float32 `parquet:"short,split"` 62 Long float64 `parquet:"long,split"` 63 }), 64 print: `message { 65 required float short; 66 required double long; 67 }`, 68 }, 69 70 { 71 value: new(struct { 72 Inner struct { 73 FirstName string `parquet:"first_name"` 74 ShouldNotBePresent string `parquet:"-"` 75 } `parquet:"inner,optional"` 76 }), 77 print: `message { 78 optional group inner { 79 required binary first_name (STRING); 80 } 81 }`, 82 }, 83 84 { 85 value: new(struct { 86 Inner struct { 87 FirstName string `parquet:"first_name"` 88 MyNameIsDash string `parquet:"-,"` 89 } `parquet:"inner,optional"` 90 }), 91 print: `message { 92 optional group inner { 93 required binary first_name (STRING); 94 required binary - (STRING); 95 } 96 }`, 97 }, 98 99 { 100 value: new(struct { 101 Inner struct { 102 TimestampMillis int64 `parquet:"timestamp_millis,timestamp"` 103 TimestampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"` 104 } `parquet:"inner,optional"` 105 }), 106 print: `message { 107 optional group inner { 108 required int64 timestamp_millis (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS)); 109 required int64 timestamp_micros (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS)); 110 } 111 }`, 112 }, 113 114 { 115 value: new(struct { 116 Name string `parquet:",json"` 117 }), 118 print: `message { 119 required binary Name (JSON); 120 }`, 121 }, 122 123 { 124 value: new(struct { 125 A map[int64]string `parquet:"," parquet-key:",timestamp"` 126 B map[int64]string 127 }), 128 print: `message { 129 required group A (MAP) { 130 repeated group key_value { 131 required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS)); 132 required binary value (STRING); 133 } 134 } 135 required group B (MAP) { 136 repeated group key_value { 137 required int64 key (INT(64,true)); 138 required binary value (STRING); 139 } 140 } 141 }`, 142 }, 143 144 { 145 value: new(struct { 146 A map[int64]string `parquet:",optional" parquet-value:",json"` 147 }), 148 print: `message { 149 optional group A (MAP) { 150 repeated group key_value { 151 required int64 key (INT(64,true)); 152 required binary value (JSON); 153 } 154 } 155 }`, 156 }, 157 158 { 159 value: new(struct { 160 A map[int64]string `parquet:",optional"` 161 }), 162 print: `message { 163 optional group A (MAP) { 164 repeated group key_value { 165 required int64 key (INT(64,true)); 166 required binary value (STRING); 167 } 168 } 169 }`, 170 }, 171 172 { 173 value: new(struct { 174 A map[int64]string `parquet:",optional" parquet-value:",json" parquet-key:",timestamp(microsecond)"` 175 }), 176 print: `message { 177 optional group A (MAP) { 178 repeated group key_value { 179 required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS)); 180 required binary value (JSON); 181 } 182 } 183 }`, 184 }, 185 { 186 value: new(struct { 187 A struct { 188 B string `parquet:"b,id(2)"` 189 } `parquet:"a,id(1)"` 190 C map[string]string `parquet:"c,id(3)"` 191 D []string `parquet:"d,id(4)"` 192 E string `parquet:"e,optional,id(5)"` 193 }), 194 print: `message { 195 required group a = 1 { 196 required binary b (STRING) = 2; 197 } 198 required group c (MAP) = 3 { 199 repeated group key_value { 200 required binary key (STRING); 201 required binary value (STRING); 202 } 203 } 204 repeated binary d (STRING) = 4; 205 optional binary e (STRING) = 5; 206 }`, 207 }, 208 { 209 value: new(struct { 210 Time time.Time `parquet:"time,delta"` 211 }), 212 print: `message { 213 required int64 time (TIMESTAMP(isAdjustedToUTC=true,unit=NANOS)); 214 }`, 215 }, 216 } 217 218 for _, test := range tests { 219 t.Run("", func(t *testing.T) { 220 schema := parquet.SchemaOf(test.value) 221 222 if s := schema.String(); s != test.print { 223 t.Errorf("\nexpected:\n\n%s\n\nfound:\n\n%s\n", test.print, s) 224 } 225 }) 226 } 227 }