426 lines
15 KiB
C++
426 lines
15 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <map>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "parquet/exception.h"
|
|
#include "parquet/schema.h"
|
|
#include "parquet/types.h"
|
|
|
|
namespace parquet {
|
|
|
|
static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
|
|
ParquetCipher::AES_GCM_V1;
|
|
static constexpr int32_t kMaximalAadMetadataLength = 256;
|
|
static constexpr bool kDefaultEncryptedFooter = true;
|
|
static constexpr bool kDefaultCheckSignature = true;
|
|
static constexpr bool kDefaultAllowPlaintextFiles = false;
|
|
static constexpr int32_t kAadFileUniqueLength = 8;
|
|
|
|
class ColumnDecryptionProperties;
|
|
using ColumnPathToDecryptionPropertiesMap =
|
|
std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
|
|
|
|
class ColumnEncryptionProperties;
|
|
using ColumnPathToEncryptionPropertiesMap =
|
|
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
|
|
|
|
class PARQUET_EXPORT DecryptionKeyRetriever {
|
|
public:
|
|
virtual std::string GetKey(const std::string& key_metadata) = 0;
|
|
virtual ~DecryptionKeyRetriever() {}
|
|
};
|
|
|
|
/// Simple integer key retriever
|
|
class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
|
|
public:
|
|
void PutKey(uint32_t key_id, const std::string& key);
|
|
std::string GetKey(const std::string& key_metadata) override;
|
|
|
|
private:
|
|
std::map<uint32_t, std::string> key_map_;
|
|
};
|
|
|
|
// Simple string key retriever
|
|
class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
|
|
public:
|
|
void PutKey(const std::string& key_id, const std::string& key);
|
|
std::string GetKey(const std::string& key_metadata) override;
|
|
|
|
private:
|
|
std::map<std::string, std::string> key_map_;
|
|
};
|
|
|
|
class PARQUET_EXPORT HiddenColumnException : public ParquetException {
|
|
public:
|
|
explicit HiddenColumnException(const std::string& columnPath)
|
|
: ParquetException(columnPath.c_str()) {}
|
|
};
|
|
|
|
class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
|
|
public:
|
|
explicit KeyAccessDeniedException(const std::string& columnPath)
|
|
: ParquetException(columnPath.c_str()) {}
|
|
};
|
|
|
|
inline const uint8_t* str2bytes(const std::string& str) {
|
|
if (str.empty()) return NULLPTR;
|
|
|
|
char* cbytes = const_cast<char*>(str.c_str());
|
|
return reinterpret_cast<const uint8_t*>(cbytes);
|
|
}
|
|
|
|
inline ::arrow::util::span<const uint8_t> str2span(const std::string& str) {
|
|
if (str.empty()) {
|
|
return {};
|
|
}
|
|
|
|
return {reinterpret_cast<const uint8_t*>(str.data()), str.size()};
|
|
}
|
|
|
|
class PARQUET_EXPORT ColumnEncryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
/// Convenience builder for encrypted columns.
|
|
explicit Builder(std::string name) : Builder(std::move(name), true) {}
|
|
|
|
/// Convenience builder for encrypted columns.
|
|
explicit Builder(const schema::ColumnPath& path)
|
|
: Builder(path.ToDotString(), true) {}
|
|
|
|
/// Set a column-specific key.
|
|
/// If key is not set on an encrypted column, the column will
|
|
/// be encrypted with the footer key.
|
|
/// keyBytes Key length must be either 16, 24 or 32 bytes.
|
|
/// Caller is responsible for wiping out the input key array.
|
|
Builder* key(std::string column_key);
|
|
|
|
/// Set a key retrieval metadata.
|
|
/// use either key_metadata() or key_id(), not both
|
|
Builder* key_metadata(std::string key_metadata);
|
|
|
|
/// A convenience function to set key metadata using a string id.
|
|
/// Set a key retrieval metadata (converted from String).
|
|
/// use either key_metadata() or key_id(), not both
|
|
/// key_id will be converted to metadata (UTF-8 array).
|
|
Builder* key_id(std::string key_id);
|
|
|
|
std::shared_ptr<ColumnEncryptionProperties> build() {
|
|
return std::shared_ptr<ColumnEncryptionProperties>(
|
|
new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
|
|
}
|
|
|
|
private:
|
|
std::string column_path_;
|
|
bool encrypted_;
|
|
std::string key_;
|
|
std::string key_metadata_;
|
|
|
|
Builder(std::string path, bool encrypted)
|
|
: column_path_(std::move(path)), encrypted_(encrypted) {}
|
|
};
|
|
|
|
std::string column_path() const { return column_path_; }
|
|
bool is_encrypted() const { return encrypted_; }
|
|
bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
|
|
std::string key() const { return key_; }
|
|
std::string key_metadata() const { return key_metadata_; }
|
|
|
|
private:
|
|
std::string column_path_;
|
|
bool encrypted_;
|
|
bool encrypted_with_footer_key_;
|
|
std::string key_;
|
|
std::string key_metadata_;
|
|
explicit ColumnEncryptionProperties(bool encrypted, std::string column_path,
|
|
std::string key, std::string key_metadata);
|
|
};
|
|
|
|
class PARQUET_EXPORT ColumnDecryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
explicit Builder(std::string name) : column_path_(std::move(name)) {}
|
|
|
|
explicit Builder(const schema::ColumnPath& path) : Builder(path.ToDotString()) {}
|
|
|
|
/// Set an explicit column key. If applied on a file that contains
|
|
/// key metadata for this column the metadata will be ignored,
|
|
/// the column will be decrypted with this key.
|
|
/// key length must be either 16, 24 or 32 bytes.
|
|
Builder* key(std::string key);
|
|
|
|
std::shared_ptr<ColumnDecryptionProperties> build();
|
|
|
|
private:
|
|
std::string column_path_;
|
|
std::string key_;
|
|
};
|
|
|
|
std::string column_path() const { return column_path_; }
|
|
std::string key() const { return key_; }
|
|
|
|
private:
|
|
std::string column_path_;
|
|
std::string key_;
|
|
|
|
/// This class is only required for setting explicit column decryption keys -
|
|
/// to override key retriever (or to provide keys when key metadata and/or
|
|
/// key retriever are not available)
|
|
explicit ColumnDecryptionProperties(std::string column_path, std::string key);
|
|
};
|
|
|
|
class PARQUET_EXPORT AADPrefixVerifier {
|
|
public:
|
|
/// Verifies identity (AAD Prefix) of individual file,
|
|
/// or of file collection in a data set.
|
|
/// Throws exception if an AAD prefix is wrong.
|
|
/// In a data set, AAD Prefixes should be collected,
|
|
/// and then checked for missing files.
|
|
virtual void Verify(const std::string& aad_prefix) = 0;
|
|
virtual ~AADPrefixVerifier() {}
|
|
};
|
|
|
|
class PARQUET_EXPORT FileDecryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
Builder() {
|
|
check_plaintext_footer_integrity_ = kDefaultCheckSignature;
|
|
plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
|
|
}
|
|
|
|
/// Set an explicit footer key. If applied on a file that contains
|
|
/// footer key metadata the metadata will be ignored, the footer
|
|
/// will be decrypted/verified with this key.
|
|
/// If explicit key is not set, footer key will be fetched from
|
|
/// key retriever.
|
|
/// With explicit keys or AAD prefix, new encryption properties object must be
|
|
/// created for each encrypted file.
|
|
/// Explicit encryption keys (footer and column) are cloned.
|
|
/// Upon completion of file reading, the cloned encryption keys in the properties
|
|
/// will be wiped out (array values set to 0).
|
|
/// Caller is responsible for wiping out the input key array.
|
|
/// param footerKey Key length must be either 16, 24 or 32 bytes.
|
|
Builder* footer_key(std::string footer_key);
|
|
|
|
/// Set explicit column keys (decryption properties).
|
|
/// Its also possible to set a key retriever on this property object.
|
|
/// Upon file decryption, availability of explicit keys is checked before
|
|
/// invocation of the retriever callback.
|
|
/// If an explicit key is available for a footer or a column,
|
|
/// its key metadata will be ignored.
|
|
Builder* column_keys(
|
|
ColumnPathToDecryptionPropertiesMap column_decryption_properties);
|
|
|
|
/// Set a key retriever callback. Its also possible to
|
|
/// set explicit footer or column keys on this file property object.
|
|
/// Upon file decryption, availability of explicit keys is checked before
|
|
/// invocation of the retriever callback.
|
|
/// If an explicit key is available for a footer or a column,
|
|
/// its key metadata will be ignored.
|
|
Builder* key_retriever(std::shared_ptr<DecryptionKeyRetriever> key_retriever);
|
|
|
|
/// Skip integrity verification of plaintext footers.
|
|
/// If not called, integrity of plaintext footers will be checked in runtime,
|
|
/// and an exception will be thrown in the following situations:
|
|
/// - footer signing key is not available
|
|
/// (not passed, or not found by key retriever)
|
|
/// - footer content and signature don't match
|
|
Builder* disable_footer_signature_verification() {
|
|
check_plaintext_footer_integrity_ = false;
|
|
return this;
|
|
}
|
|
|
|
/// Explicitly supply the file AAD prefix.
|
|
/// A must when a prefix is used for file encryption, but not stored in file.
|
|
/// If AAD prefix is stored in file, it will be compared to the explicitly
|
|
/// supplied value and an exception will be thrown if they differ.
|
|
Builder* aad_prefix(std::string aad_prefix);
|
|
|
|
/// Set callback for verification of AAD Prefixes stored in file.
|
|
Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
|
|
|
|
/// By default, reading plaintext (unencrypted) files is not
|
|
/// allowed when using a decryptor
|
|
/// - in order to detect files that were not encrypted by mistake.
|
|
/// However, the default behavior can be overridden by calling this method.
|
|
/// The caller should use then a different method to ensure encryption
|
|
/// of files with sensitive data.
|
|
Builder* plaintext_files_allowed() {
|
|
plaintext_files_allowed_ = true;
|
|
return this;
|
|
}
|
|
|
|
std::shared_ptr<FileDecryptionProperties> build() {
|
|
return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
|
|
footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
|
|
aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
|
|
}
|
|
|
|
private:
|
|
std::string footer_key_;
|
|
std::string aad_prefix_;
|
|
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
|
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
|
|
|
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
|
bool check_plaintext_footer_integrity_;
|
|
bool plaintext_files_allowed_;
|
|
};
|
|
|
|
std::string column_key(const std::string& column_path) const;
|
|
|
|
std::string footer_key() const { return footer_key_; }
|
|
|
|
std::string aad_prefix() const { return aad_prefix_; }
|
|
|
|
const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
|
|
return key_retriever_;
|
|
}
|
|
|
|
bool check_plaintext_footer_integrity() const {
|
|
return check_plaintext_footer_integrity_;
|
|
}
|
|
|
|
bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
|
|
|
|
const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
|
|
return aad_prefix_verifier_;
|
|
}
|
|
|
|
private:
|
|
std::string footer_key_;
|
|
std::string aad_prefix_;
|
|
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
|
|
|
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
|
|
|
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
|
bool check_plaintext_footer_integrity_;
|
|
bool plaintext_files_allowed_;
|
|
|
|
FileDecryptionProperties(
|
|
std::string footer_key, std::shared_ptr<DecryptionKeyRetriever> key_retriever,
|
|
bool check_plaintext_footer_integrity, std::string aad_prefix,
|
|
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
|
|
ColumnPathToDecryptionPropertiesMap column_decryption_properties,
|
|
bool plaintext_files_allowed);
|
|
};
|
|
|
|
class PARQUET_EXPORT FileEncryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
explicit Builder(std::string footer_key)
|
|
: parquet_cipher_(kDefaultEncryptionAlgorithm),
|
|
encrypted_footer_(kDefaultEncryptedFooter) {
|
|
footer_key_ = std::move(footer_key);
|
|
store_aad_prefix_in_file_ = false;
|
|
}
|
|
|
|
/// Create files with plaintext footer.
|
|
/// If not called, the files will be created with encrypted footer (default).
|
|
Builder* set_plaintext_footer() {
|
|
encrypted_footer_ = false;
|
|
return this;
|
|
}
|
|
|
|
/// Set encryption algorithm.
|
|
/// If not called, files will be encrypted with AES_GCM_V1 (default).
|
|
Builder* algorithm(ParquetCipher::type parquet_cipher) {
|
|
parquet_cipher_ = parquet_cipher;
|
|
return this;
|
|
}
|
|
|
|
/// Set a key retrieval metadata (converted from String).
|
|
/// use either footer_key_metadata or footer_key_id, not both.
|
|
Builder* footer_key_id(std::string key_id);
|
|
|
|
/// Set a key retrieval metadata.
|
|
/// use either footer_key_metadata or footer_key_id, not both.
|
|
Builder* footer_key_metadata(std::string footer_key_metadata);
|
|
|
|
/// Set the file AAD Prefix.
|
|
Builder* aad_prefix(std::string aad_prefix);
|
|
|
|
/// Skip storing AAD Prefix in file.
|
|
/// If not called, and if AAD Prefix is set, it will be stored.
|
|
Builder* disable_aad_prefix_storage();
|
|
|
|
/// Set the list of encrypted columns and their properties (keys etc).
|
|
/// If not called, all columns will be encrypted with the footer key.
|
|
/// If called, the file columns not in the list will be left unencrypted.
|
|
Builder* encrypted_columns(ColumnPathToEncryptionPropertiesMap encrypted_columns);
|
|
|
|
std::shared_ptr<FileEncryptionProperties> build() {
|
|
return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
|
|
parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
|
|
aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
|
|
}
|
|
|
|
private:
|
|
ParquetCipher::type parquet_cipher_;
|
|
bool encrypted_footer_;
|
|
std::string footer_key_;
|
|
std::string footer_key_metadata_;
|
|
|
|
std::string aad_prefix_;
|
|
bool store_aad_prefix_in_file_;
|
|
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
|
};
|
|
|
|
bool encrypted_footer() const { return encrypted_footer_; }
|
|
|
|
EncryptionAlgorithm algorithm() const { return algorithm_; }
|
|
|
|
std::string footer_key() const { return footer_key_; }
|
|
|
|
std::string footer_key_metadata() const { return footer_key_metadata_; }
|
|
|
|
std::string file_aad() const { return file_aad_; }
|
|
|
|
std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
|
|
const std::string& column_path);
|
|
|
|
ColumnPathToEncryptionPropertiesMap encrypted_columns() const {
|
|
return encrypted_columns_;
|
|
}
|
|
|
|
private:
|
|
EncryptionAlgorithm algorithm_;
|
|
std::string footer_key_;
|
|
std::string footer_key_metadata_;
|
|
bool encrypted_footer_;
|
|
std::string file_aad_;
|
|
std::string aad_prefix_;
|
|
bool store_aad_prefix_in_file_;
|
|
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
|
|
|
FileEncryptionProperties(ParquetCipher::type cipher, std::string footer_key,
|
|
std::string footer_key_metadata, bool encrypted_footer,
|
|
std::string aad_prefix, bool store_aad_prefix_in_file,
|
|
ColumnPathToEncryptionPropertiesMap encrypted_columns);
|
|
};
|
|
|
|
} // namespace parquet
|