github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/encryption/aes.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  // Package encryption contains the internal helpers for the parquet AES encryption/decryption handling.
    18  //
    19  // Testing for this is done via integration testing at the top level parquet package via attempting to
    20  // read and write encrypted files with different configurations to match test files in parquet-testing
    21  package encryption
    22  
    23  import (
    24  	"bytes"
    25  	"crypto/aes"
    26  	"crypto/cipher"
    27  	"crypto/rand"
    28  	"encoding/binary"
    29  	"fmt"
    30  	"io"
    31  
    32  	"github.com/apache/arrow/go/v14/parquet"
    33  )
    34  
    35  // important constants for handling the aes encryption
    36  const (
    37  	GcmTagLength = 16
    38  	NonceLength  = 12
    39  
    40  	gcmMode          = 0
    41  	ctrMode          = 1
    42  	ctrIVLen         = 16
    43  	bufferSizeLength = 4
    44  )
    45  
    46  // Module constants for constructing the AAD bytes, the order here is
    47  // important as the constants are set via iota.
    48  const (
    49  	FooterModule int8 = iota
    50  	ColumnMetaModule
    51  	DataPageModule
    52  	DictPageModule
    53  	DataPageHeaderModule
    54  	DictPageHeaderModule
    55  	ColumnIndexModule
    56  	OffsetIndexModule
    57  )
    58  
    59  type aesEncryptor struct {
    60  	mode                int
    61  	ciphertextSizeDelta int
    62  }
    63  
    64  // NewAesEncryptor constructs an encryptor for the passed in cipher and whether
    65  // or not it's being used to encrypt metadata.
    66  func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor {
    67  	ret := &aesEncryptor{}
    68  	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
    69  	if metadata || alg == parquet.AesGcm {
    70  		ret.mode = gcmMode
    71  		ret.ciphertextSizeDelta += GcmTagLength
    72  	} else {
    73  		ret.mode = ctrMode
    74  	}
    75  
    76  	return ret
    77  }
    78  
    79  // CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data
    80  // above and beyond the plaintext value.
    81  func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
    82  
    83  // SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce.
    84  // It returns the number of bytes that were written to w.
    85  func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int {
    86  	if a.mode != gcmMode {
    87  		panic("must use AES GCM (metadata) encryptor")
    88  	}
    89  
    90  	block, err := aes.NewCipher(key)
    91  	if err != nil {
    92  		panic(err)
    93  	}
    94  
    95  	aead, err := cipher.NewGCM(block)
    96  	if err != nil {
    97  		panic(err)
    98  	}
    99  	if aead.NonceSize() != NonceLength {
   100  		panic(fmt.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
   101  	}
   102  	if aead.Overhead() != GcmTagLength {
   103  		panic(fmt.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
   104  	}
   105  
   106  	ciphertext := aead.Seal(nil, nonce, footer, aad)
   107  	bufferSize := uint32(len(ciphertext) + len(nonce))
   108  	// data is written with a prefix of the size written as a little endian 32bit int.
   109  	if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil {
   110  		panic(err)
   111  	}
   112  	w.Write(nonce)
   113  	w.Write(ciphertext)
   114  	return bufferSizeLength + int(bufferSize)
   115  }
   116  
   117  // Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w.
   118  // Returns the total number of bytes written.
   119  func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int {
   120  	block, err := aes.NewCipher(key)
   121  	if err != nil {
   122  		panic(err)
   123  	}
   124  
   125  	nonce := make([]byte, NonceLength)
   126  	rand.Read(nonce)
   127  
   128  	if a.mode == gcmMode {
   129  		aead, err := cipher.NewGCM(block)
   130  		if err != nil {
   131  			panic(err)
   132  		}
   133  		if aead.NonceSize() != NonceLength {
   134  			panic(fmt.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
   135  		}
   136  		if aead.Overhead() != GcmTagLength {
   137  			panic(fmt.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
   138  		}
   139  
   140  		ciphertext := aead.Seal(nil, nonce, src, aad)
   141  		bufferSize := len(ciphertext) + len(nonce)
   142  		// data is written with a prefix of the size written as a little endian 32bit int.
   143  		if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
   144  			panic(err)
   145  		}
   146  		w.Write(nonce)
   147  		w.Write(ciphertext)
   148  		return bufferSizeLength + bufferSize
   149  	}
   150  
   151  	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
   152  	// counter field.
   153  	// The first 31 bits of the initial counter field are set to 0, the last bit
   154  	// is set to 1.
   155  	iv := make([]byte, ctrIVLen)
   156  	copy(iv, nonce)
   157  	iv[ctrIVLen-1] = 1
   158  
   159  	bufferSize := NonceLength + len(src)
   160  	// data is written with a prefix of the size written as a little endian 32bit int.
   161  	if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
   162  		panic(err)
   163  	}
   164  	w.Write(nonce)
   165  	cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src)
   166  	return bufferSizeLength + bufferSize
   167  }
   168  
   169  type aesDecryptor struct {
   170  	mode                int
   171  	ciphertextSizeDelta int
   172  }
   173  
   174  // newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or
   175  // not it is intended to be used for decrypting metadata.
   176  func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor {
   177  	ret := &aesDecryptor{}
   178  	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
   179  	if metadata || alg == parquet.AesGcm {
   180  		ret.mode = gcmMode
   181  		ret.ciphertextSizeDelta += GcmTagLength
   182  	} else {
   183  		ret.mode = ctrMode
   184  	}
   185  
   186  	return ret
   187  }
   188  
   189  // CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the
   190  // plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is
   191  // the length of the plaintext after decryption.
   192  func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
   193  
   194  // DecryptFrom
   195  func (a *aesDecryptor) DecryptFrom(r io.Reader, key, aad []byte) []byte {
   196  	block, err := aes.NewCipher(key)
   197  	if err != nil {
   198  		panic(err)
   199  	}
   200  
   201  	var writtenCiphertextLen uint32
   202  	if err := binary.Read(r, binary.LittleEndian, &writtenCiphertextLen); err != nil {
   203  		panic(err)
   204  	}
   205  
   206  	cipherText := make([]byte, writtenCiphertextLen)
   207  	if n, err := io.ReadFull(r, cipherText); n != int(writtenCiphertextLen) || err != nil {
   208  		panic(err)
   209  	}
   210  
   211  	nonce := cipherText[:NonceLength]
   212  	cipherText = cipherText[NonceLength:]
   213  	if a.mode == gcmMode {
   214  		aead, err := cipher.NewGCM(block)
   215  		if err != nil {
   216  			panic(err)
   217  		}
   218  
   219  		plain, err := aead.Open(cipherText[:0], nonce, cipherText, aad)
   220  		if err != nil {
   221  			panic(err)
   222  		}
   223  		return plain
   224  	}
   225  
   226  	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
   227  	// counter field.
   228  	// The first 31 bits of the initial counter field are set to 0, the last bit
   229  	// is set to 1.
   230  	iv := make([]byte, ctrIVLen)
   231  	copy(iv, nonce)
   232  	iv[ctrIVLen-1] = 1
   233  
   234  	stream := cipher.NewCTR(block, iv)
   235  	// dst := make([]byte, len(cipherText))
   236  	stream.XORKeyStream(cipherText, cipherText)
   237  	return cipherText
   238  }
   239  
   240  // Decrypt returns the plaintext version of the given ciphertext when decrypted
   241  // with the provided key and AAD security bytes.
   242  func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte {
   243  	block, err := aes.NewCipher(key)
   244  	if err != nil {
   245  		panic(err)
   246  	}
   247  
   248  	writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText)
   249  	cipherLen := writtenCiphertextLen + bufferSizeLength
   250  	nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength]
   251  
   252  	if a.mode == gcmMode {
   253  		aead, err := cipher.NewGCM(block)
   254  		if err != nil {
   255  			panic(err)
   256  		}
   257  
   258  		plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad)
   259  		if err != nil {
   260  			panic(err)
   261  		}
   262  		return plain
   263  	}
   264  
   265  	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
   266  	// counter field.
   267  	// The first 31 bits of the initial counter field are set to 0, the last bit
   268  	// is set to 1.
   269  	iv := make([]byte, ctrIVLen)
   270  	copy(iv, nonce)
   271  	iv[ctrIVLen-1] = 1
   272  
   273  	stream := cipher.NewCTR(block, iv)
   274  	dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength)
   275  	stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:])
   276  	return dst
   277  }
   278  
   279  // CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page.
   280  //
   281  // This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes.
   282  func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string {
   283  	buf := bytes.NewBuffer([]byte(fileAad))
   284  	buf.WriteByte(byte(moduleType))
   285  
   286  	if moduleType == FooterModule {
   287  		return buf.String()
   288  	}
   289  
   290  	binary.Write(buf, binary.LittleEndian, rowGroupOrdinal)
   291  	binary.Write(buf, binary.LittleEndian, columnOrdinal)
   292  	if DataPageModule != moduleType && DataPageHeaderModule != moduleType {
   293  		return buf.String()
   294  	}
   295  
   296  	binary.Write(buf, binary.LittleEndian, pageOrdinal)
   297  	return buf.String()
   298  }
   299  
   300  // CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting
   301  // and decrypting the parquet footer bytes.
   302  func CreateFooterAad(aadPrefix string) string {
   303  	return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1)
   304  }
   305  
   306  // QuickUpdatePageAad updates aad with the new page ordinal, modifying the
   307  // last two bytes of aad.
   308  func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) {
   309  	binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal))
   310  }