github.com/apache/arrow/go/v7@v7.0.1/parquet/encryption_write_config_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package parquet_test 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "io/ioutil" 23 "os" 24 "path/filepath" 25 "testing" 26 27 "github.com/apache/arrow/go/v7/parquet" 28 "github.com/apache/arrow/go/v7/parquet/compress" 29 "github.com/apache/arrow/go/v7/parquet/file" 30 "github.com/apache/arrow/go/v7/parquet/schema" 31 "github.com/stretchr/testify/suite" 32 ) 33 34 /* 35 * This file contains unit-tests for writing encrypted Parquet files with 36 * different encryption configurations. 37 * The files are saved in temporary folder and will be deleted after reading 38 * them in encryption_read_config_test.go test. 39 * 40 * A detailed description of the Parquet Modular Encryption specification can be found 41 * here: 42 * https://github.com/apache/parquet-format/blob/encryption/Encryption.md 43 * 44 * Each unit-test creates a single parquet file with eight columns using one of the 45 * following encryption configurations: 46 * 47 * - Encryption configuration 1: Encrypt all columns and the footer with the same key. 48 * (uniform encryption) 49 * - Encryption configuration 2: Encrypt two columns and the footer, with different 50 * keys. 51 * - Encryption configuration 3: Encrypt two columns, with different keys. 52 * Don’t encrypt footer (to enable legacy readers) 53 * - plaintext footer mode. 54 * - Encryption configuration 4: Encrypt two columns and the footer, with different 55 * keys. Supply aad_prefix for file identity 56 * verification. 57 * - Encryption configuration 5: Encrypt two columns and the footer, with different 58 * keys. Supply aad_prefix, and call 59 * disable_aad_prefix_storage to prevent file 60 * identity storage in file metadata. 61 * - Encryption configuration 6: Encrypt two columns and the footer, with different 62 * keys. Use the alternative (AES_GCM_CTR_V1) algorithm. 63 */ 64 65 var ( 66 tempdir string 67 ) 68 69 type EncryptionConfigTestSuite struct { 70 suite.Suite 71 72 pathToDoubleField string 73 pathToFloatField string 74 fileName string 75 numRgs int 76 rowsPerRG int 77 schema *schema.GroupNode 78 footerEncryptionKey string 79 columnEncryptionKey1 string 80 columnEncryptionKey2 string 81 } 82 83 func (en *EncryptionConfigTestSuite) encryptFile(configs *parquet.FileEncryptionProperties, filename string) { 84 filename = filepath.Join(tempdir, filename) 85 86 props := parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Snappy), parquet.WithEncryptionProperties(configs)) 87 outFile, err := os.Create(filename) 88 en.Require().NoError(err) 89 en.Require().NotNil(outFile) 90 91 writer := file.NewParquetWriter(outFile, en.schema, file.WithWriterProps(props)) 92 defer writer.Close() 93 94 for r := 0; r < en.numRgs; r++ { 95 var ( 96 bufferedMode = r%2 == 0 97 rgr file.RowGroupWriter 98 colIndex = 0 99 ) 100 101 if bufferedMode { 102 rgr = writer.AppendBufferedRowGroup() 103 } else { 104 rgr = writer.AppendRowGroup() 105 } 106 107 nextColumn := func() file.ColumnChunkWriter { 108 defer func() { colIndex++ }() 109 if bufferedMode { 110 cw, _ := rgr.(file.BufferedRowGroupWriter).Column(colIndex) 111 return cw 112 } 113 cw, _ := rgr.(file.SerialRowGroupWriter).NextColumn() 114 return cw 115 } 116 117 // write the bool col 118 boolWriter := nextColumn().(*file.BooleanColumnChunkWriter) 119 for i := 0; i < en.rowsPerRG; i++ { 120 value := (i % 2) == 0 121 boolWriter.WriteBatch([]bool{value}, nil, nil) 122 } 123 124 // write the int32 col 125 int32Writer := nextColumn().(*file.Int32ColumnChunkWriter) 126 for i := int32(0); i < int32(en.rowsPerRG); i++ { 127 int32Writer.WriteBatch([]int32{i}, nil, nil) 128 } 129 130 // write the int64 column, each row repeats twice 131 int64Writer := nextColumn().(*file.Int64ColumnChunkWriter) 132 for i := 0; i < 2*en.rowsPerRG; i++ { 133 var ( 134 defLevel = [1]int16{1} 135 repLevel = [1]int16{0} 136 value int64 = int64(i) * 1000 * 1000 * 1000 * 1000 137 ) 138 if i%2 == 0 { 139 repLevel[0] = 1 140 } 141 142 int64Writer.WriteBatch([]int64{value}, defLevel[:], repLevel[:]) 143 } 144 145 // write the int96 col 146 int96Writer := nextColumn().(*file.Int96ColumnChunkWriter) 147 for i := 0; i < en.rowsPerRG; i++ { 148 val := parquet.Int96{} 149 binary.LittleEndian.PutUint32(val[:], uint32(i)) 150 binary.LittleEndian.PutUint32(val[4:], uint32(i+1)) 151 binary.LittleEndian.PutUint32(val[8:], uint32(i+2)) 152 int96Writer.WriteBatch([]parquet.Int96{val}, nil, nil) 153 } 154 155 // write the float column 156 floatWriter := nextColumn().(*file.Float32ColumnChunkWriter) 157 for i := 0; i < en.rowsPerRG; i++ { 158 val := float32(i) * 1.1 159 floatWriter.WriteBatch([]float32{val}, nil, nil) 160 } 161 162 // write the double column 163 doubleWriter := nextColumn().(*file.Float64ColumnChunkWriter) 164 for i := 0; i < en.rowsPerRG; i++ { 165 value := float64(i) * 1.1111111 166 doubleWriter.WriteBatch([]float64{value}, nil, nil) 167 } 168 169 // write the bytearray column. make every alternate value NULL 170 baWriter := nextColumn().(*file.ByteArrayColumnChunkWriter) 171 for i := 0; i < en.rowsPerRG; i++ { 172 var ( 173 hello = []byte{'p', 'a', 'r', 'q', 'u', 'e', 't', 0, 0, 0} 174 ) 175 hello[7] = byte(int('0') + i/100) 176 hello[8] = byte(int('0') + (i/10)%10) 177 hello[9] = byte(int('0') + i%10) 178 if i%2 == 0 { 179 baWriter.WriteBatch([]parquet.ByteArray{hello}, []int16{1}, nil) 180 } else { 181 baWriter.WriteBatch([]parquet.ByteArray{nil}, []int16{0}, nil) 182 } 183 } 184 185 // write fixedlength byte array column 186 flbaWriter := nextColumn().(*file.FixedLenByteArrayColumnChunkWriter) 187 for i := 0; i < en.rowsPerRG; i++ { 188 v := byte(i) 189 value := parquet.FixedLenByteArray{v, v, v, v, v, v, v, v, v, v} 190 flbaWriter.WriteBatch([]parquet.FixedLenByteArray{value}, nil, nil) 191 } 192 } 193 } 194 195 func (en *EncryptionConfigTestSuite) SetupSuite() { 196 var err error 197 tempdir, err = ioutil.TempDir("", "parquet-encryption-test-*") 198 en.Require().NoError(err) 199 fmt.Println(tempdir) 200 201 en.fileName = FileName 202 en.rowsPerRG = 50 203 en.numRgs = 5 204 en.pathToDoubleField = "double_field" 205 en.pathToFloatField = "float_field" 206 en.footerEncryptionKey = FooterEncryptionKey 207 en.columnEncryptionKey1 = ColumnEncryptionKey1 208 en.columnEncryptionKey2 = ColumnEncryptionKey2 209 210 fields := make(schema.FieldList, 0) 211 // create a primitive node named "boolean_field" with type BOOLEAN 212 // repetition:REQUIRED 213 fields = append(fields, schema.NewBooleanNode("boolean_field", parquet.Repetitions.Required, -1)) 214 // create a primitive node named "int32_field" with type INT32 repetition REQUIRED 215 // and logical type: TIME_MILLIS 216 f, _ := schema.NewPrimitiveNodeLogical("int32_field", parquet.Repetitions.Required, 217 schema.NewTimeLogicalType(true, schema.TimeUnitMillis), parquet.Types.Int32, 0, -1) 218 fields = append(fields, f) 219 220 // create a primitive node named "int64_field" with type int64, repetition:REPEATED 221 fields = append(fields, schema.NewInt64Node("int64_field", parquet.Repetitions.Repeated, -1)) 222 223 fields = append(fields, 224 schema.NewInt96Node("int96_field", parquet.Repetitions.Required, -1), 225 schema.NewFloat32Node("float_field", parquet.Repetitions.Required, -1), 226 schema.NewFloat64Node("double_field", parquet.Repetitions.Required, -1)) 227 228 // create a primitive node named ba_field with type:BYTE_ARRAY repetition:OPTIONAL 229 fields = append(fields, schema.NewByteArrayNode("ba_field", parquet.Repetitions.Optional, -1)) 230 231 // create a primitive node for flba_field 232 fields = append(fields, schema.NewFixedLenByteArrayNode("flba_field", parquet.Repetitions.Required, 10, -1)) 233 234 // flba_field fixedlenbytearray 235 en.schema, _ = schema.NewGroupNode("schema", parquet.Repetitions.Required, fields, -1) 236 } 237 238 // Encryption Config 1: Encrypt All columns and the footer with the same key 239 // (uniform encryption) 240 func (en *EncryptionConfigTestSuite) TestUniformEncryption() { 241 props := parquet.NewFileEncryptionProperties(en.footerEncryptionKey, parquet.WithFooterKeyMetadata("kf")) 242 en.encryptFile(props, "tmp_uniform_encryption.parquet.encrypted") 243 } 244 245 // Encryption config 2: Encrypt Two Columns and the Footer, with different keys 246 func (en *EncryptionConfigTestSuite) TestEncryptTwoColumnsAndFooter() { 247 encryptCols := make(parquet.ColumnPathToEncryptionPropsMap) 248 encryptCols[en.pathToDoubleField] = parquet.NewColumnEncryptionProperties(en.pathToDoubleField, parquet.WithKey(en.columnEncryptionKey1), parquet.WithKeyID("kc1")) 249 encryptCols[en.pathToFloatField] = parquet.NewColumnEncryptionProperties(en.pathToFloatField, parquet.WithKey(en.columnEncryptionKey2), parquet.WithKeyID("kc2")) 250 251 props := parquet.NewFileEncryptionProperties(en.footerEncryptionKey, parquet.WithFooterKeyMetadata("kf"), parquet.WithEncryptedColumns(encryptCols)) 252 en.encryptFile(props, "tmp_encrypt_columns_and_footer.parquet.encrypted") 253 } 254 255 // Encryption Config 3: encrypt two columns, with different keys. 256 // plaintext footer 257 // (plaintext footer mode, readable by legacy readers) 258 func (en *EncryptionConfigTestSuite) TestEncryptTwoColumnsPlaintextFooter() { 259 encryptCols := make(parquet.ColumnPathToEncryptionPropsMap) 260 encryptCols[en.pathToDoubleField] = parquet.NewColumnEncryptionProperties(en.pathToDoubleField, parquet.WithKey(en.columnEncryptionKey1), parquet.WithKeyID("kc1")) 261 encryptCols[en.pathToFloatField] = parquet.NewColumnEncryptionProperties(en.pathToFloatField, parquet.WithKey(en.columnEncryptionKey2), parquet.WithKeyID("kc2")) 262 263 props := parquet.NewFileEncryptionProperties(en.footerEncryptionKey, parquet.WithFooterKeyMetadata("kf"), parquet.WithEncryptedColumns(encryptCols), parquet.WithPlaintextFooter()) 264 en.encryptFile(props, "tmp_encrypt_columns_plaintext_footer.parquet.encrypted") 265 } 266 267 // Encryption Config 4: Encrypt two columns and the footer, with different keys 268 // use aad_prefix 269 func (en *EncryptionConfigTestSuite) TestEncryptTwoColumnsAndFooterWithAadPrefix() { 270 encryptCols := make(parquet.ColumnPathToEncryptionPropsMap) 271 encryptCols[en.pathToDoubleField] = parquet.NewColumnEncryptionProperties(en.pathToDoubleField, parquet.WithKey(en.columnEncryptionKey1), parquet.WithKeyID("kc1")) 272 encryptCols[en.pathToFloatField] = parquet.NewColumnEncryptionProperties(en.pathToFloatField, parquet.WithKey(en.columnEncryptionKey2), parquet.WithKeyID("kc2")) 273 274 props := parquet.NewFileEncryptionProperties(en.footerEncryptionKey, parquet.WithFooterKeyMetadata("kf"), parquet.WithEncryptedColumns(encryptCols), parquet.WithAadPrefix(en.fileName)) 275 en.encryptFile(props, "tmp_encrypt_columns_and_footer_aad.parquet.encrypted") 276 } 277 278 // Encryption Config 5: Encrypt Two columns and the footer, with different keys 279 // use aad_prefix and disable_aad_prefix_storage 280 func (en *EncryptionConfigTestSuite) TestEncryptTwoColumnsAndFooterWithAadPrefixDisableAadStorage() { 281 encryptCols := make(parquet.ColumnPathToEncryptionPropsMap) 282 encryptCols[en.pathToDoubleField] = parquet.NewColumnEncryptionProperties(en.pathToDoubleField, parquet.WithKey(en.columnEncryptionKey1), parquet.WithKeyID("kc1")) 283 encryptCols[en.pathToFloatField] = parquet.NewColumnEncryptionProperties(en.pathToFloatField, parquet.WithKey(en.columnEncryptionKey2), parquet.WithKeyID("kc2")) 284 285 props := parquet.NewFileEncryptionProperties(en.footerEncryptionKey, parquet.WithFooterKeyMetadata("kf"), parquet.WithAadPrefix(en.fileName), parquet.DisableAadPrefixStorage()) 286 en.encryptFile(props, "tmp_encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted") 287 } 288 289 // Encryption Config 6: Encrypt two columns and the footer, with different keys. 290 // Use AES_GCM_CTR_V1 291 func (en *EncryptionConfigTestSuite) TestEncryptTwoColumnsAndFooterAesGcmCtr() { 292 encryptCols := make(parquet.ColumnPathToEncryptionPropsMap) 293 encryptCols[en.pathToDoubleField] = parquet.NewColumnEncryptionProperties(en.pathToDoubleField, parquet.WithKey(en.columnEncryptionKey1), parquet.WithKeyID("kc1")) 294 encryptCols[en.pathToFloatField] = parquet.NewColumnEncryptionProperties(en.pathToFloatField, parquet.WithKey(en.columnEncryptionKey2), parquet.WithKeyID("kc2")) 295 296 props := parquet.NewFileEncryptionProperties(en.footerEncryptionKey, parquet.WithFooterKeyMetadata("kf"), parquet.WithEncryptedColumns(encryptCols), parquet.WithAlg(parquet.AesCtr)) 297 en.encryptFile(props, "tmp_encrypt_columns_and_footer_ctr.parquet.encrypted") 298 } 299 300 func TestFileEncryption(t *testing.T) { 301 suite.Run(t, new(EncryptionConfigTestSuite)) 302 }