github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/encryption/aes.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  // Package encryption contains the internal helpers for the parquet AES encryption/decryption handling.
    18  //
    19  // Testing for this is done via integration testing at the top level parquet package via attempting to
    20  // read and write encrypted files with different configurations to match test files in parquet-testing
    21  package encryption
    22  
    23  import (
    24  	"bytes"
    25  	"crypto/aes"
    26  	"crypto/cipher"
    27  	"crypto/rand"
    28  	"encoding/binary"
    29  	"io"
    30  
    31  	"github.com/apache/arrow/go/v7/parquet"
    32  	"golang.org/x/xerrors"
    33  )
    34  
    35  // important constants for handling the aes encryption
    36  const (
    37  	GcmTagLength = 16
    38  	NonceLength  = 12
    39  
    40  	gcmMode          = 0
    41  	ctrMode          = 1
    42  	ctrIVLen         = 16
    43  	bufferSizeLength = 4
    44  )
    45  
    46  // Module constants for constructing the AAD bytes, the order here is
    47  // important as the constants are set via iota.
    48  const (
    49  	FooterModule int8 = iota
    50  	ColumnMetaModule
    51  	DataPageModule
    52  	DictPageModule
    53  	DataPageHeaderModule
    54  	DictPageHeaderModule
    55  	ColumnIndexModule
    56  	OffsetIndexModule
    57  )
    58  
    59  type aesEncryptor struct {
    60  	mode                int
    61  	ciphertextSizeDelta int
    62  }
    63  
    64  // NewAesEncryptor constructs an encryptor for the passed in cipher and whether
    65  // or not it's being used to encrypt metadata.
    66  func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor {
    67  	ret := &aesEncryptor{}
    68  	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
    69  	if metadata || alg == parquet.AesGcm {
    70  		ret.mode = gcmMode
    71  		ret.ciphertextSizeDelta += GcmTagLength
    72  	} else {
    73  		ret.mode = ctrMode
    74  	}
    75  
    76  	return ret
    77  }
    78  
    79  // CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data
    80  // above and beyond the plaintext value.
    81  func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
    82  
    83  // SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce.
    84  // It returns the number of bytes that were written to w.
    85  func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int {
    86  	if a.mode != gcmMode {
    87  		panic("must use AES GCM (metadata) encryptor")
    88  	}
    89  
    90  	block, err := aes.NewCipher(key)
    91  	if err != nil {
    92  		panic(err)
    93  	}
    94  
    95  	aead, err := cipher.NewGCM(block)
    96  	if err != nil {
    97  		panic(err)
    98  	}
    99  	if aead.NonceSize() != NonceLength {
   100  		panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
   101  	}
   102  	if aead.Overhead() != GcmTagLength {
   103  		panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
   104  	}
   105  
   106  	ciphertext := aead.Seal(nil, nonce, footer, aad)
   107  	bufferSize := uint32(len(ciphertext) + len(nonce))
   108  	// data is written with a prefix of the size written as a little endian 32bit int.
   109  	if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil {
   110  		panic(err)
   111  	}
   112  	w.Write(nonce)
   113  	w.Write(ciphertext)
   114  	return bufferSizeLength + int(bufferSize)
   115  }
   116  
   117  // Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w.
   118  // Returns the total number of bytes written.
   119  func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int {
   120  	block, err := aes.NewCipher(key)
   121  	if err != nil {
   122  		panic(err)
   123  	}
   124  
   125  	nonce := make([]byte, NonceLength)
   126  	rand.Read(nonce)
   127  
   128  	if a.mode == gcmMode {
   129  		aead, err := cipher.NewGCM(block)
   130  		if err != nil {
   131  			panic(err)
   132  		}
   133  		if aead.NonceSize() != NonceLength {
   134  			panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
   135  		}
   136  		if aead.Overhead() != GcmTagLength {
   137  			panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
   138  		}
   139  
   140  		ciphertext := aead.Seal(nil, nonce, src, aad)
   141  		bufferSize := len(ciphertext) + len(nonce)
   142  		// data is written with a prefix of the size written as a little endian 32bit int.
   143  		if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
   144  			panic(err)
   145  		}
   146  		w.Write(nonce)
   147  		w.Write(ciphertext)
   148  		return bufferSizeLength + bufferSize
   149  	}
   150  
   151  	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
   152  	// counter field.
   153  	// The first 31 bits of the initial counter field are set to 0, the last bit
   154  	// is set to 1.
   155  	iv := make([]byte, ctrIVLen)
   156  	copy(iv, nonce)
   157  	iv[ctrIVLen-1] = 1
   158  
   159  	bufferSize := NonceLength + len(src)
   160  	// data is written with a prefix of the size written as a little endian 32bit int.
   161  	if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
   162  		panic(err)
   163  	}
   164  	w.Write(nonce)
   165  	cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src)
   166  	return bufferSizeLength + bufferSize
   167  }
   168  
   169  type aesDecryptor struct {
   170  	mode                int
   171  	ciphertextSizeDelta int
   172  }
   173  
   174  // newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or
   175  // not it is intended to be used for decrypting metadata.
   176  func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor {
   177  	ret := &aesDecryptor{}
   178  	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
   179  	if metadata || alg == parquet.AesGcm {
   180  		ret.mode = gcmMode
   181  		ret.ciphertextSizeDelta += GcmTagLength
   182  	} else {
   183  		ret.mode = ctrMode
   184  	}
   185  
   186  	return ret
   187  }
   188  
   189  // CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the
   190  // plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is
   191  // the length of the plaintext after decryption.
   192  func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
   193  
   194  // Decrypt returns the plaintext version of the given ciphertext when decrypted
   195  // with the provided key and AAD security bytes.
   196  func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte {
   197  	block, err := aes.NewCipher(key)
   198  	if err != nil {
   199  		panic(err)
   200  	}
   201  
   202  	writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText)
   203  	cipherLen := writtenCiphertextLen + bufferSizeLength
   204  	nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength]
   205  
   206  	if a.mode == gcmMode {
   207  		aead, err := cipher.NewGCM(block)
   208  		if err != nil {
   209  			panic(err)
   210  		}
   211  
   212  		plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad)
   213  		if err != nil {
   214  			panic(err)
   215  		}
   216  		return plain
   217  	}
   218  
   219  	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
   220  	// counter field.
   221  	// The first 31 bits of the initial counter field are set to 0, the last bit
   222  	// is set to 1.
   223  	iv := make([]byte, ctrIVLen)
   224  	copy(iv, nonce)
   225  	iv[ctrIVLen-1] = 1
   226  
   227  	stream := cipher.NewCTR(block, iv)
   228  	dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength)
   229  	stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:])
   230  	return dst
   231  }
   232  
   233  // CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page.
   234  //
   235  // This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes.
   236  func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string {
   237  	buf := bytes.NewBuffer([]byte(fileAad))
   238  	buf.WriteByte(byte(moduleType))
   239  
   240  	if moduleType == FooterModule {
   241  		return buf.String()
   242  	}
   243  
   244  	binary.Write(buf, binary.LittleEndian, rowGroupOrdinal)
   245  	binary.Write(buf, binary.LittleEndian, columnOrdinal)
   246  	if DataPageModule != moduleType && DataPageHeaderModule != moduleType {
   247  		return buf.String()
   248  	}
   249  
   250  	binary.Write(buf, binary.LittleEndian, pageOrdinal)
   251  	return buf.String()
   252  }
   253  
   254  // CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting
   255  // and decrypting the parquet footer bytes.
   256  func CreateFooterAad(aadPrefix string) string {
   257  	return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1)
   258  }
   259  
   260  // QuickUpdatePageAad updates aad with the new page ordinal, modifying the
   261  // last two bytes of aad.
   262  func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) {
   263  	binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal))
   264  }