github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/encryption/decryptor.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encryption
    18  
    19  import (
    20  	"io"
    21  
    22  	"github.com/apache/arrow/go/v14/arrow/memory"
    23  	"github.com/apache/arrow/go/v14/parquet"
    24  )
    25  
    26  // FileDecryptor is an interface used by the filereader for decrypting an
    27  // entire parquet file as we go, usually constructed from the DecryptionProperties
    28  type FileDecryptor interface {
    29  	// Returns the key for decrypting the footer if provided
    30  	GetFooterKey() string
    31  	// Provides the file level AAD security bytes
    32  	FileAad() string
    33  	// return which algorithm this decryptor was constructed for
    34  	Algorithm() parquet.Cipher
    35  	// return the FileDecryptionProperties that were used for this decryptor
    36  	Properties() *parquet.FileDecryptionProperties
    37  	// Clear out the decryption keys, this is automatically called after every
    38  	// successfully decrypted file to ensure that keys aren't kept around.
    39  	WipeOutDecryptionKeys()
    40  	// GetFooterDecryptor returns a Decryptor interface for use to decrypt the footer
    41  	// of a parquet file.
    42  	GetFooterDecryptor() Decryptor
    43  	// GetFooterDecryptorForColumnMeta returns a Decryptor interface for Column Metadata
    44  	// in the file footer using the AAD bytes provided.
    45  	GetFooterDecryptorForColumnMeta(aad string) Decryptor
    46  	// GetFooterDecryptorForColumnData returns the decryptor that can be used for decrypting
    47  	// actual column data footer bytes, not column metadata.
    48  	GetFooterDecryptorForColumnData(aad string) Decryptor
    49  	// GetColumnMetaDecryptor returns a decryptor for the requested column path, key and AAD bytes
    50  	// but only for decrypting the row group level metadata
    51  	GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
    52  	// GetColumnDataDecryptor returns a decryptor for the requested column path, key, and AAD bytes
    53  	// but only for the rowgroup column data.
    54  	GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
    55  }
    56  
    57  type fileDecryptor struct {
    58  	// the properties contains the key retriever for us to get keys
    59  	// from the key metadata
    60  	props *parquet.FileDecryptionProperties
    61  	// concatenation of aad_prefix (if exists) and aad_file_unique
    62  	fileAad                 string
    63  	columnDataMap           map[string]Decryptor
    64  	columnMetaDataMap       map[string]Decryptor
    65  	footerMetadataDecryptor Decryptor
    66  	footerDataDecryptor     Decryptor
    67  	alg                     parquet.Cipher
    68  	footerKeyMetadata       string
    69  	metaDecryptor           *aesDecryptor
    70  	dataDecryptor           *aesDecryptor
    71  	mem                     memory.Allocator
    72  }
    73  
    74  // NewFileDecryptor constructs a decryptor from the provided configuration of properties, cipher and key metadata. Using the provided memory allocator or
    75  // the default allocator if one isn't provided.
    76  func NewFileDecryptor(props *parquet.FileDecryptionProperties, fileAad string, alg parquet.Cipher, keymetadata string, mem memory.Allocator) FileDecryptor {
    77  	if mem == nil {
    78  		mem = memory.DefaultAllocator
    79  	}
    80  	return &fileDecryptor{
    81  		fileAad:           fileAad,
    82  		props:             props,
    83  		alg:               alg,
    84  		footerKeyMetadata: keymetadata,
    85  		mem:               mem,
    86  		columnDataMap:     make(map[string]Decryptor),
    87  		columnMetaDataMap: make(map[string]Decryptor),
    88  	}
    89  }
    90  
    91  func (d *fileDecryptor) FileAad() string                               { return d.fileAad }
    92  func (d *fileDecryptor) Properties() *parquet.FileDecryptionProperties { return d.props }
    93  func (d *fileDecryptor) Algorithm() parquet.Cipher                     { return d.alg }
    94  func (d *fileDecryptor) GetFooterKey() string {
    95  	footerKey := d.props.FooterKey()
    96  	if footerKey == "" {
    97  		if d.footerKeyMetadata == "" {
    98  			panic("no footer key or key metadata")
    99  		}
   100  		if d.props.KeyRetriever == nil {
   101  			panic("no footer key or key retriever")
   102  		}
   103  		footerKey = d.props.KeyRetriever.GetKey([]byte(d.footerKeyMetadata))
   104  	}
   105  	if footerKey == "" {
   106  		panic("invalid footer encryption key. Could not parse footer metadata")
   107  	}
   108  	return footerKey
   109  }
   110  
   111  func (d *fileDecryptor) GetFooterDecryptor() Decryptor {
   112  	aad := CreateFooterAad(d.fileAad)
   113  	return d.getFooterDecryptor(aad, true)
   114  }
   115  
   116  func (d *fileDecryptor) GetFooterDecryptorForColumnMeta(aad string) Decryptor {
   117  	return d.getFooterDecryptor(aad, true)
   118  }
   119  
   120  func (d *fileDecryptor) GetFooterDecryptorForColumnData(aad string) Decryptor {
   121  	return d.getFooterDecryptor(aad, false)
   122  }
   123  
   124  func (d *fileDecryptor) GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
   125  	return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, true)
   126  }
   127  
   128  func (d *fileDecryptor) GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
   129  	return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, false)
   130  }
   131  
   132  func (d *fileDecryptor) WipeOutDecryptionKeys() {
   133  	d.props.WipeOutDecryptionKeys()
   134  }
   135  
   136  func (d *fileDecryptor) getFooterDecryptor(aad string, metadata bool) Decryptor {
   137  	if metadata {
   138  		if d.footerMetadataDecryptor != nil {
   139  			return d.footerMetadataDecryptor
   140  		}
   141  	} else {
   142  		if d.footerDataDecryptor != nil {
   143  			return d.footerDataDecryptor
   144  		}
   145  	}
   146  
   147  	footerKey := d.GetFooterKey()
   148  
   149  	// Create both data and metadata decryptors to avoid redundant retrieval of key
   150  	// from the key_retriever.
   151  	aesMetaDecrypt := d.getMetaAesDecryptor()
   152  	aesDataDecrypt := d.getDataAesDecryptor()
   153  
   154  	d.footerMetadataDecryptor = &decryptor{
   155  		decryptor: aesMetaDecrypt,
   156  		key:       []byte(footerKey),
   157  		fileAad:   []byte(d.fileAad),
   158  		aad:       []byte(aad),
   159  		mem:       d.mem,
   160  	}
   161  	d.footerDataDecryptor = &decryptor{
   162  		decryptor: aesDataDecrypt,
   163  		key:       []byte(footerKey),
   164  		fileAad:   []byte(d.fileAad),
   165  		aad:       []byte(aad),
   166  		mem:       d.mem,
   167  	}
   168  
   169  	if metadata {
   170  		return d.footerMetadataDecryptor
   171  	}
   172  	return d.footerDataDecryptor
   173  }
   174  
   175  func (d *fileDecryptor) getColumnDecryptor(columnPath, columnMeta, aad string, metadata bool) Decryptor {
   176  	if metadata {
   177  		if res, ok := d.columnMetaDataMap[columnPath]; ok {
   178  			res.UpdateAad(aad)
   179  			return res
   180  		}
   181  	} else {
   182  		if res, ok := d.columnDataMap[columnPath]; ok {
   183  			res.UpdateAad(aad)
   184  			return res
   185  		}
   186  	}
   187  
   188  	columnKey := d.props.ColumnKey(columnPath)
   189  	// No explicit column key given via API. Retrieve via key metadata.
   190  	if columnKey == "" && columnMeta != "" && d.props.KeyRetriever != nil {
   191  		columnKey = d.props.KeyRetriever.GetKey([]byte(columnMeta))
   192  	}
   193  	if columnKey == "" {
   194  		panic("hidden column exception, path=" + columnPath)
   195  	}
   196  
   197  	aesDataDecrypt := d.getDataAesDecryptor()
   198  	aesMetaDecrypt := d.getMetaAesDecryptor()
   199  
   200  	d.columnDataMap[columnPath] = &decryptor{
   201  		decryptor: aesDataDecrypt,
   202  		key:       []byte(columnKey),
   203  		fileAad:   []byte(d.fileAad),
   204  		aad:       []byte(aad),
   205  		mem:       d.mem,
   206  	}
   207  	d.columnMetaDataMap[columnPath] = &decryptor{
   208  		decryptor: aesMetaDecrypt,
   209  		key:       []byte(columnKey),
   210  		fileAad:   []byte(d.fileAad),
   211  		aad:       []byte(aad),
   212  		mem:       d.mem,
   213  	}
   214  
   215  	if metadata {
   216  		return d.columnMetaDataMap[columnPath]
   217  	}
   218  	return d.columnDataMap[columnPath]
   219  }
   220  
   221  func (d *fileDecryptor) getMetaAesDecryptor() *aesDecryptor {
   222  	if d.metaDecryptor == nil {
   223  		d.metaDecryptor = newAesDecryptor(d.alg, true)
   224  	}
   225  	return d.metaDecryptor
   226  }
   227  
   228  func (d *fileDecryptor) getDataAesDecryptor() *aesDecryptor {
   229  	if d.dataDecryptor == nil {
   230  		d.dataDecryptor = newAesDecryptor(d.alg, false)
   231  	}
   232  	return d.dataDecryptor
   233  }
   234  
   235  // Decryptor is the basic interface for any decryptor generated from a FileDecryptor
   236  type Decryptor interface {
   237  	// returns the File Level AAD bytes
   238  	FileAad() string
   239  	// returns the current allocator that was used for any extra allocations of buffers
   240  	Allocator() memory.Allocator
   241  	// returns the CiphertextSizeDelta from the decryptor
   242  	CiphertextSizeDelta() int
   243  	// Decrypt just returns the decrypted plaintext from the src ciphertext
   244  	Decrypt(src []byte) []byte
   245  	// Decrypt just returns the decrypted plaintext from the src ciphertext
   246  	DecryptFrom(r io.Reader) []byte
   247  	// set the AAD bytes of the decryptor to the provided string
   248  	UpdateAad(string)
   249  }
   250  
   251  type decryptor struct {
   252  	decryptor *aesDecryptor
   253  	key       []byte
   254  	fileAad   []byte
   255  	aad       []byte
   256  	mem       memory.Allocator
   257  }
   258  
   259  func (d *decryptor) Allocator() memory.Allocator { return d.mem }
   260  func (d *decryptor) FileAad() string             { return string(d.fileAad) }
   261  func (d *decryptor) UpdateAad(aad string)        { d.aad = []byte(aad) }
   262  func (d *decryptor) CiphertextSizeDelta() int    { return d.decryptor.CiphertextSizeDelta() }
   263  func (d *decryptor) Decrypt(src []byte) []byte {
   264  	return d.decryptor.Decrypt(src, d.key, d.aad)
   265  }
   266  func (d *decryptor) DecryptFrom(r io.Reader) []byte {
   267  	return d.decryptor.DecryptFrom(r, d.key, d.aad)
   268  }