github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/encryption/encryptor.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encryption
    18  
    19  import (
    20  	"io"
    21  
    22  	"github.com/apache/arrow/go/v14/arrow/memory"
    23  	"github.com/apache/arrow/go/v14/parquet"
    24  )
    25  
    26  // FileEncryptor is the interface for constructing encryptors for the different
    27  // sections of a parquet file.
    28  type FileEncryptor interface {
    29  	// GetFooterEncryptor returns an encryptor for the footer metadata
    30  	GetFooterEncryptor() Encryptor
    31  	// GetFooterSigningEncryptor returns an encryptor for creating the signature
    32  	// for the footer as opposed to encrypting the footer bytes directly.
    33  	GetFooterSigningEncryptor() Encryptor
    34  	// GetColumnMetaEncryptor returns an encryptor for the metadata only of the requested
    35  	// column path string.
    36  	GetColumnMetaEncryptor(columnPath string) Encryptor
    37  	// GetColumnDataEncryptor returns an encryptor for the column data ONLY of
    38  	// the requested column path string.
    39  	GetColumnDataEncryptor(columnPath string) Encryptor
    40  	// WipeOutEncryptionKeys deletes the keys that were used for encryption,
    41  	// called after every successfully encrypted file to ensure against accidental
    42  	// key re-use.
    43  	WipeOutEncryptionKeys()
    44  }
    45  
    46  type fileEncryptor struct {
    47  	props                  *parquet.FileEncryptionProperties
    48  	columnDataMap          map[string]Encryptor
    49  	columnMetaDataMap      map[string]Encryptor
    50  	footerSigningEncryptor Encryptor
    51  	footerEncryptor        Encryptor
    52  
    53  	// Key must be 16, 24, or 32 bytes in length thus there could be up to
    54  	// three types of meta_encryptors and data_encryptors
    55  	metaEncryptor *aesEncryptor
    56  	dataEncryptor *aesEncryptor
    57  
    58  	mem memory.Allocator
    59  }
    60  
    61  // NewFileEncryptor returns a new encryptor using the given encryption properties.
    62  //
    63  // Panics if the properties passed have already been used to construct an encryptor
    64  // ie: props.IsUtilized returns true. If mem is nil, will default to memory.DefaultAllocator
    65  func NewFileEncryptor(props *parquet.FileEncryptionProperties, mem memory.Allocator) FileEncryptor {
    66  	if props.IsUtilized() {
    67  		panic("re-using encryption properties for another file")
    68  	}
    69  
    70  	props.SetUtilized()
    71  	if mem == nil {
    72  		mem = memory.DefaultAllocator
    73  	}
    74  
    75  	return &fileEncryptor{
    76  		props:             props,
    77  		mem:               mem,
    78  		columnDataMap:     make(map[string]Encryptor),
    79  		columnMetaDataMap: make(map[string]Encryptor),
    80  	}
    81  }
    82  
    83  func (e *fileEncryptor) WipeOutEncryptionKeys() {
    84  	e.props.WipeOutEncryptionKeys()
    85  }
    86  
    87  func (e *fileEncryptor) GetFooterEncryptor() Encryptor {
    88  	if e.footerEncryptor == nil {
    89  		alg := e.props.Algorithm().Algo
    90  		footerAad := CreateFooterAad(e.props.FileAad())
    91  		footerKey := e.props.FooterKey()
    92  		enc := e.getMetaAesEncryptor(alg)
    93  		e.footerEncryptor = &encryptor{
    94  			aesEncryptor: enc,
    95  			key:          []byte(footerKey),
    96  			fileAad:      e.props.FileAad(),
    97  			aad:          footerAad,
    98  			mem:          e.mem,
    99  		}
   100  	}
   101  	return e.footerEncryptor
   102  }
   103  
   104  func (e *fileEncryptor) GetFooterSigningEncryptor() Encryptor {
   105  	if e.footerSigningEncryptor == nil {
   106  		alg := e.props.Algorithm().Algo
   107  		footerAad := CreateFooterAad(e.props.FileAad())
   108  		footerKey := e.props.FooterKey()
   109  		enc := e.getMetaAesEncryptor(alg)
   110  		e.footerSigningEncryptor = &encryptor{
   111  			aesEncryptor: enc,
   112  			key:          []byte(footerKey),
   113  			fileAad:      e.props.FileAad(),
   114  			aad:          footerAad,
   115  			mem:          e.mem,
   116  		}
   117  	}
   118  	return e.footerSigningEncryptor
   119  }
   120  
   121  func (e *fileEncryptor) getMetaAesEncryptor(alg parquet.Cipher) *aesEncryptor {
   122  	if e.metaEncryptor == nil {
   123  		e.metaEncryptor = NewAesEncryptor(alg, true)
   124  	}
   125  	return e.metaEncryptor
   126  }
   127  
   128  func (e *fileEncryptor) getDataAesEncryptor(alg parquet.Cipher) *aesEncryptor {
   129  	if e.dataEncryptor == nil {
   130  		e.dataEncryptor = NewAesEncryptor(alg, false)
   131  	}
   132  	return e.dataEncryptor
   133  }
   134  
   135  func (e *fileEncryptor) GetColumnMetaEncryptor(columnPath string) Encryptor {
   136  	return e.getColumnEncryptor(columnPath, true)
   137  }
   138  
   139  func (e *fileEncryptor) GetColumnDataEncryptor(columnPath string) Encryptor {
   140  	return e.getColumnEncryptor(columnPath, false)
   141  }
   142  
   143  func (e *fileEncryptor) getColumnEncryptor(columnPath string, metadata bool) Encryptor {
   144  	if metadata {
   145  		if enc, ok := e.columnMetaDataMap[columnPath]; ok {
   146  			return enc
   147  		}
   148  	} else {
   149  		if enc, ok := e.columnDataMap[columnPath]; ok {
   150  			return enc
   151  		}
   152  	}
   153  
   154  	columnProp := e.props.ColumnEncryptionProperties(columnPath)
   155  	if columnProp == nil {
   156  		return nil
   157  	}
   158  
   159  	var key string
   160  	if columnProp.IsEncryptedWithFooterKey() {
   161  		key = e.props.FooterKey()
   162  	} else {
   163  		key = columnProp.Key()
   164  	}
   165  
   166  	alg := e.props.Algorithm().Algo
   167  	var enc *aesEncryptor
   168  	if metadata {
   169  		enc = e.getMetaAesEncryptor(alg)
   170  	} else {
   171  		enc = e.getDataAesEncryptor(alg)
   172  	}
   173  
   174  	fileAad := e.props.FileAad()
   175  	ret := &encryptor{
   176  		aesEncryptor: enc,
   177  		key:          []byte(key),
   178  		fileAad:      fileAad,
   179  		aad:          "",
   180  		mem:          e.mem,
   181  	}
   182  	if metadata {
   183  		e.columnMetaDataMap[columnPath] = ret
   184  	} else {
   185  		e.columnDataMap[columnPath] = ret
   186  	}
   187  	return ret
   188  }
   189  
   190  // Encryptor is the basic interface for encryptors, for now there's only the single
   191  // aes encryptor implementation, but having it as an interface allows easy addition
   192  // manipulation of encryptor implementations in the future.
   193  type Encryptor interface {
   194  	// FileAad returns the file level AAD bytes for this encryptor
   195  	FileAad() string
   196  	// UpdateAad sets the aad bytes for encryption to the provided string
   197  	UpdateAad(string)
   198  	// Allocator returns the allocator that was used to construct the encryptor
   199  	Allocator() memory.Allocator
   200  	// CiphertextSizeDelta returns the extra bytes that will be added to the ciphertext
   201  	// for a total size of len(plaintext) + CiphertextSizeDelta bytes
   202  	CiphertextSizeDelta() int
   203  	// Encrypt writes the encrypted ciphertext for src to w and returns the total
   204  	// number of bytes written.
   205  	Encrypt(w io.Writer, src []byte) int
   206  	// EncryptColumnMetaData returns true if the column metadata should be encrypted based on the
   207  	// column encryption settings and footer encryption setting.
   208  	EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool
   209  }
   210  
   211  type encryptor struct {
   212  	aesEncryptor *aesEncryptor
   213  	key          []byte
   214  	fileAad      string
   215  	aad          string
   216  	mem          memory.Allocator
   217  }
   218  
   219  func (e *encryptor) FileAad() string             { return e.fileAad }
   220  func (e *encryptor) UpdateAad(aad string)        { e.aad = aad }
   221  func (e *encryptor) Allocator() memory.Allocator { return e.mem }
   222  func (e *encryptor) CiphertextSizeDelta() int    { return e.aesEncryptor.CiphertextSizeDelta() }
   223  
   224  func (e *encryptor) EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool {
   225  	if properties == nil || !properties.IsEncrypted() {
   226  		return false
   227  	}
   228  	if !encryptFooter {
   229  		return false
   230  	}
   231  	// if not encrypted with footer key then encrypt the metadata
   232  	return !properties.IsEncryptedWithFooterKey()
   233  }
   234  
   235  func (e *encryptor) Encrypt(w io.Writer, src []byte) int {
   236  	return e.aesEncryptor.Encrypt(w, src, e.key, []byte(e.aad))
   237  }