github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/print_test.go (about) 1 package parquet_test 2 3 import ( 4 "strings" 5 "testing" 6 7 "github.com/segmentio/parquet-go" 8 ) 9 10 func TestPrintSchema(t *testing.T) { 11 tests := []struct { 12 node parquet.Node 13 print string 14 }{ 15 { 16 node: parquet.Group{"on": parquet.Leaf(parquet.BooleanType)}, 17 print: `message Test { 18 required boolean on; 19 }`, 20 }, 21 22 { 23 node: parquet.Group{"name": parquet.String()}, 24 print: `message Test { 25 required binary name (STRING); 26 }`, 27 }, 28 29 { 30 node: parquet.Group{"uuid": parquet.UUID()}, 31 print: `message Test { 32 required fixed_len_byte_array(16) uuid (UUID); 33 }`, 34 }, 35 36 { 37 node: parquet.Group{"enum": parquet.Enum()}, 38 print: `message Test { 39 required binary enum (ENUM); 40 }`, 41 }, 42 43 { 44 node: parquet.Group{"json": parquet.JSON()}, 45 print: `message Test { 46 required binary json (JSON); 47 }`, 48 }, 49 50 { 51 node: parquet.Group{"bson": parquet.BSON()}, 52 print: `message Test { 53 required binary bson (BSON); 54 }`, 55 }, 56 57 { 58 node: parquet.Group{"name": parquet.Optional(parquet.String())}, 59 print: `message Test { 60 optional binary name (STRING); 61 }`, 62 }, 63 64 { 65 node: parquet.Group{"name": parquet.Repeated(parquet.String())}, 66 print: `message Test { 67 repeated binary name (STRING); 68 }`, 69 }, 70 71 { 72 node: parquet.Group{"age": parquet.Int(8)}, 73 print: `message Test { 74 required int32 age (INT(8,true)); 75 }`, 76 }, 77 78 { 79 node: parquet.Group{"age": parquet.Int(16)}, 80 print: `message Test { 81 required int32 age (INT(16,true)); 82 }`, 83 }, 84 85 { 86 node: parquet.Group{"age": parquet.Int(32)}, 87 print: `message Test { 88 required int32 age (INT(32,true)); 89 }`, 90 }, 91 92 { 93 node: parquet.Group{"age": parquet.Int(64)}, 94 print: `message Test { 95 required int64 age (INT(64,true)); 96 }`, 97 }, 98 99 { 100 node: parquet.Group{"age": parquet.Uint(8)}, 101 print: `message Test { 102 required int32 age (INT(8,false)); 103 }`, 104 }, 105 106 { 107 node: parquet.Group{"age": parquet.Uint(16)}, 108 print: `message Test { 109 required int32 age (INT(16,false)); 110 }`, 111 }, 112 113 { 114 node: parquet.Group{"age": parquet.Uint(32)}, 115 print: `message Test { 116 required int32 age (INT(32,false)); 117 }`, 118 }, 119 120 { 121 node: parquet.Group{"age": parquet.Uint(64)}, 122 print: `message Test { 123 required int64 age (INT(64,false)); 124 }`, 125 }, 126 127 { 128 node: parquet.Group{"ratio": parquet.Leaf(parquet.FloatType)}, 129 print: `message Test { 130 required float ratio; 131 }`, 132 }, 133 134 { 135 node: parquet.Group{"ratio": parquet.Leaf(parquet.DoubleType)}, 136 print: `message Test { 137 required double ratio; 138 }`, 139 }, 140 141 { 142 node: parquet.Group{"cost": parquet.Decimal(0, 9, parquet.Int32Type)}, 143 print: `message Test { 144 required int32 cost (DECIMAL(9,0)); 145 }`, 146 }, 147 148 { 149 node: parquet.Group{"cost": parquet.Decimal(0, 18, parquet.Int64Type)}, 150 print: `message Test { 151 required int64 cost (DECIMAL(18,0)); 152 }`, 153 }, 154 155 { 156 node: parquet.Group{"date": parquet.Date()}, 157 print: `message Test { 158 required int32 date (DATE); 159 }`, 160 }, 161 162 { 163 node: parquet.Group{"time": parquet.Time(parquet.Millisecond)}, 164 print: `message Test { 165 required int32 time (TIME(isAdjustedToUTC=true,unit=MILLIS)); 166 }`, 167 }, 168 169 { 170 node: parquet.Group{"time": parquet.Time(parquet.Microsecond)}, 171 print: `message Test { 172 required int64 time (TIME(isAdjustedToUTC=true,unit=MICROS)); 173 }`, 174 }, 175 176 { 177 node: parquet.Group{"time": parquet.Time(parquet.Nanosecond)}, 178 print: `message Test { 179 required int64 time (TIME(isAdjustedToUTC=true,unit=NANOS)); 180 }`, 181 }, 182 183 { 184 node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Millisecond)}, 185 print: `message Test { 186 required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS)); 187 }`, 188 }, 189 190 { 191 node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Microsecond)}, 192 print: `message Test { 193 required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS)); 194 }`, 195 }, 196 197 { 198 node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Nanosecond)}, 199 print: `message Test { 200 required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=NANOS)); 201 }`, 202 }, 203 204 { 205 node: parquet.Group{"names": parquet.List(parquet.String())}, 206 print: `message Test { 207 required group names (LIST) { 208 repeated group list { 209 required binary element (STRING); 210 } 211 } 212 }`, 213 }, 214 215 { 216 node: parquet.Group{ 217 "keys": parquet.List( 218 parquet.Group{ 219 "key": parquet.String(), 220 "value": parquet.String(), 221 }, 222 ), 223 }, 224 print: `message Test { 225 required group keys (LIST) { 226 repeated group list { 227 required group element { 228 required binary key (STRING); 229 required binary value (STRING); 230 } 231 } 232 } 233 }`, 234 }, 235 236 { 237 node: parquet.Group{ 238 "pairs": parquet.Map( 239 parquet.String(), 240 parquet.String(), 241 ), 242 }, 243 print: `message Test { 244 required group pairs (MAP) { 245 repeated group key_value { 246 required binary key (STRING); 247 required binary value (STRING); 248 } 249 } 250 }`, 251 }, 252 } 253 254 for _, test := range tests { 255 t.Run("", func(t *testing.T) { 256 buf := new(strings.Builder) 257 258 if err := parquet.PrintSchema(buf, "Test", test.node); err != nil { 259 t.Fatal(err) 260 } 261 262 if buf.String() != test.print { 263 t.Errorf("\nexpected:\n\n%s\n\nfound:\n\n%s\n", test.print, buf) 264 } 265 }) 266 } 267 }