github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/encryption/aes.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 // Package encryption contains the internal helpers for the parquet AES encryption/decryption handling. 18 // 19 // Testing for this is done via integration testing at the top level parquet package via attempting to 20 // read and write encrypted files with different configurations to match test files in parquet-testing 21 package encryption 22 23 import ( 24 "bytes" 25 "crypto/aes" 26 "crypto/cipher" 27 "crypto/rand" 28 "encoding/binary" 29 "fmt" 30 "io" 31 32 "github.com/apache/arrow/go/v14/parquet" 33 ) 34 35 // important constants for handling the aes encryption 36 const ( 37 GcmTagLength = 16 38 NonceLength = 12 39 40 gcmMode = 0 41 ctrMode = 1 42 ctrIVLen = 16 43 bufferSizeLength = 4 44 ) 45 46 // Module constants for constructing the AAD bytes, the order here is 47 // important as the constants are set via iota. 48 const ( 49 FooterModule int8 = iota 50 ColumnMetaModule 51 DataPageModule 52 DictPageModule 53 DataPageHeaderModule 54 DictPageHeaderModule 55 ColumnIndexModule 56 OffsetIndexModule 57 ) 58 59 type aesEncryptor struct { 60 mode int 61 ciphertextSizeDelta int 62 } 63 64 // NewAesEncryptor constructs an encryptor for the passed in cipher and whether 65 // or not it's being used to encrypt metadata. 66 func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor { 67 ret := &aesEncryptor{} 68 ret.ciphertextSizeDelta = bufferSizeLength + NonceLength 69 if metadata || alg == parquet.AesGcm { 70 ret.mode = gcmMode 71 ret.ciphertextSizeDelta += GcmTagLength 72 } else { 73 ret.mode = ctrMode 74 } 75 76 return ret 77 } 78 79 // CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data 80 // above and beyond the plaintext value. 81 func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta } 82 83 // SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce. 84 // It returns the number of bytes that were written to w. 85 func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int { 86 if a.mode != gcmMode { 87 panic("must use AES GCM (metadata) encryptor") 88 } 89 90 block, err := aes.NewCipher(key) 91 if err != nil { 92 panic(err) 93 } 94 95 aead, err := cipher.NewGCM(block) 96 if err != nil { 97 panic(err) 98 } 99 if aead.NonceSize() != NonceLength { 100 panic(fmt.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength)) 101 } 102 if aead.Overhead() != GcmTagLength { 103 panic(fmt.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength)) 104 } 105 106 ciphertext := aead.Seal(nil, nonce, footer, aad) 107 bufferSize := uint32(len(ciphertext) + len(nonce)) 108 // data is written with a prefix of the size written as a little endian 32bit int. 109 if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil { 110 panic(err) 111 } 112 w.Write(nonce) 113 w.Write(ciphertext) 114 return bufferSizeLength + int(bufferSize) 115 } 116 117 // Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w. 118 // Returns the total number of bytes written. 119 func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int { 120 block, err := aes.NewCipher(key) 121 if err != nil { 122 panic(err) 123 } 124 125 nonce := make([]byte, NonceLength) 126 rand.Read(nonce) 127 128 if a.mode == gcmMode { 129 aead, err := cipher.NewGCM(block) 130 if err != nil { 131 panic(err) 132 } 133 if aead.NonceSize() != NonceLength { 134 panic(fmt.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength)) 135 } 136 if aead.Overhead() != GcmTagLength { 137 panic(fmt.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength)) 138 } 139 140 ciphertext := aead.Seal(nil, nonce, src, aad) 141 bufferSize := len(ciphertext) + len(nonce) 142 // data is written with a prefix of the size written as a little endian 32bit int. 143 if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil { 144 panic(err) 145 } 146 w.Write(nonce) 147 w.Write(ciphertext) 148 return bufferSizeLength + bufferSize 149 } 150 151 // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial 152 // counter field. 153 // The first 31 bits of the initial counter field are set to 0, the last bit 154 // is set to 1. 155 iv := make([]byte, ctrIVLen) 156 copy(iv, nonce) 157 iv[ctrIVLen-1] = 1 158 159 bufferSize := NonceLength + len(src) 160 // data is written with a prefix of the size written as a little endian 32bit int. 161 if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil { 162 panic(err) 163 } 164 w.Write(nonce) 165 cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src) 166 return bufferSizeLength + bufferSize 167 } 168 169 type aesDecryptor struct { 170 mode int 171 ciphertextSizeDelta int 172 } 173 174 // newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or 175 // not it is intended to be used for decrypting metadata. 176 func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor { 177 ret := &aesDecryptor{} 178 ret.ciphertextSizeDelta = bufferSizeLength + NonceLength 179 if metadata || alg == parquet.AesGcm { 180 ret.mode = gcmMode 181 ret.ciphertextSizeDelta += GcmTagLength 182 } else { 183 ret.mode = ctrMode 184 } 185 186 return ret 187 } 188 189 // CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the 190 // plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is 191 // the length of the plaintext after decryption. 192 func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta } 193 194 // DecryptFrom 195 func (a *aesDecryptor) DecryptFrom(r io.Reader, key, aad []byte) []byte { 196 block, err := aes.NewCipher(key) 197 if err != nil { 198 panic(err) 199 } 200 201 var writtenCiphertextLen uint32 202 if err := binary.Read(r, binary.LittleEndian, &writtenCiphertextLen); err != nil { 203 panic(err) 204 } 205 206 cipherText := make([]byte, writtenCiphertextLen) 207 if n, err := io.ReadFull(r, cipherText); n != int(writtenCiphertextLen) || err != nil { 208 panic(err) 209 } 210 211 nonce := cipherText[:NonceLength] 212 cipherText = cipherText[NonceLength:] 213 if a.mode == gcmMode { 214 aead, err := cipher.NewGCM(block) 215 if err != nil { 216 panic(err) 217 } 218 219 plain, err := aead.Open(cipherText[:0], nonce, cipherText, aad) 220 if err != nil { 221 panic(err) 222 } 223 return plain 224 } 225 226 // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial 227 // counter field. 228 // The first 31 bits of the initial counter field are set to 0, the last bit 229 // is set to 1. 230 iv := make([]byte, ctrIVLen) 231 copy(iv, nonce) 232 iv[ctrIVLen-1] = 1 233 234 stream := cipher.NewCTR(block, iv) 235 // dst := make([]byte, len(cipherText)) 236 stream.XORKeyStream(cipherText, cipherText) 237 return cipherText 238 } 239 240 // Decrypt returns the plaintext version of the given ciphertext when decrypted 241 // with the provided key and AAD security bytes. 242 func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte { 243 block, err := aes.NewCipher(key) 244 if err != nil { 245 panic(err) 246 } 247 248 writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText) 249 cipherLen := writtenCiphertextLen + bufferSizeLength 250 nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength] 251 252 if a.mode == gcmMode { 253 aead, err := cipher.NewGCM(block) 254 if err != nil { 255 panic(err) 256 } 257 258 plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad) 259 if err != nil { 260 panic(err) 261 } 262 return plain 263 } 264 265 // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial 266 // counter field. 267 // The first 31 bits of the initial counter field are set to 0, the last bit 268 // is set to 1. 269 iv := make([]byte, ctrIVLen) 270 copy(iv, nonce) 271 iv[ctrIVLen-1] = 1 272 273 stream := cipher.NewCTR(block, iv) 274 dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength) 275 stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:]) 276 return dst 277 } 278 279 // CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page. 280 // 281 // This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes. 282 func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string { 283 buf := bytes.NewBuffer([]byte(fileAad)) 284 buf.WriteByte(byte(moduleType)) 285 286 if moduleType == FooterModule { 287 return buf.String() 288 } 289 290 binary.Write(buf, binary.LittleEndian, rowGroupOrdinal) 291 binary.Write(buf, binary.LittleEndian, columnOrdinal) 292 if DataPageModule != moduleType && DataPageHeaderModule != moduleType { 293 return buf.String() 294 } 295 296 binary.Write(buf, binary.LittleEndian, pageOrdinal) 297 return buf.String() 298 } 299 300 // CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting 301 // and decrypting the parquet footer bytes. 302 func CreateFooterAad(aadPrefix string) string { 303 return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1) 304 } 305 306 // QuickUpdatePageAad updates aad with the new page ordinal, modifying the 307 // last two bytes of aad. 308 func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) { 309 binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal)) 310 }