221 lines
10 KiB
C++
221 lines
10 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
|
|
#include "arrow/memory_pool.h"
|
|
#include "arrow/type_fwd.h"
|
|
#include "arrow/util/bit_util.h"
|
|
|
|
namespace arrow {
|
|
namespace internal {
|
|
|
|
struct BitmapWordAlignParams {
|
|
int64_t leading_bits;
|
|
int64_t trailing_bits;
|
|
int64_t trailing_bit_offset;
|
|
const uint8_t* aligned_start;
|
|
int64_t aligned_bits;
|
|
int64_t aligned_words;
|
|
};
|
|
|
|
// Compute parameters for accessing a bitmap using aligned word instructions.
|
|
// The returned parameters describe:
|
|
// - a leading area of size `leading_bits` before the aligned words
|
|
// - a word-aligned area of size `aligned_bits`
|
|
// - a trailing area of size `trailing_bits` after the aligned words
|
|
template <uint64_t ALIGN_IN_BYTES>
|
|
inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t bit_offset,
|
|
int64_t length) {
|
|
static_assert(bit_util::IsPowerOf2(ALIGN_IN_BYTES),
|
|
"ALIGN_IN_BYTES should be a positive power of two");
|
|
constexpr uint64_t ALIGN_IN_BITS = ALIGN_IN_BYTES * 8;
|
|
|
|
BitmapWordAlignParams p;
|
|
|
|
// Compute a "bit address" that we can align up to ALIGN_IN_BITS.
|
|
// We don't care about losing the upper bits since we are only interested in the
|
|
// difference between both addresses.
|
|
const uint64_t bit_addr =
|
|
reinterpret_cast<size_t>(data) * 8 + static_cast<uint64_t>(bit_offset);
|
|
const uint64_t aligned_bit_addr = bit_util::RoundUpToPowerOf2(bit_addr, ALIGN_IN_BITS);
|
|
|
|
p.leading_bits = std::min<int64_t>(length, aligned_bit_addr - bit_addr);
|
|
p.aligned_words = (length - p.leading_bits) / ALIGN_IN_BITS;
|
|
p.aligned_bits = p.aligned_words * ALIGN_IN_BITS;
|
|
p.trailing_bits = length - p.leading_bits - p.aligned_bits;
|
|
p.trailing_bit_offset = bit_offset + p.leading_bits + p.aligned_bits;
|
|
|
|
p.aligned_start = data + (bit_offset + p.leading_bits) / 8;
|
|
return p;
|
|
}
|
|
} // namespace internal
|
|
|
|
namespace util {
|
|
|
|
// Functions to check if the provided Arrow object is aligned by the specified alignment
|
|
|
|
/// \brief Special alignment value to use data type-specific alignment
|
|
///
|
|
/// If this is passed as the `alignment` in one of the CheckAlignment or EnsureAlignment
|
|
/// functions, then the function will ensure each buffer is suitably aligned
|
|
/// for the data type of the array. For example, given an int32 buffer the values
|
|
/// buffer's address must be a multiple of 4. Given a large_string buffer the offsets
|
|
/// buffer's address must be a multiple of 8.
|
|
constexpr int64_t kValueAlignment = -3;
|
|
|
|
/// \brief Calculate if the buffer's address is a multiple of `alignment`
|
|
///
|
|
/// If `alignment` is less than or equal to 0 then this method will always return true
|
|
/// \param buffer the buffer to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
ARROW_EXPORT bool CheckAlignment(const Buffer& buffer, int64_t alignment);
|
|
/// \brief Calculate if all buffers in the array data are aligned
|
|
///
|
|
/// This will also check the buffers in the dictionary and any children
|
|
/// \param array the array data to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
ARROW_EXPORT bool CheckAlignment(const ArrayData& array, int64_t alignment);
|
|
/// \brief Calculate if all buffers in the array are aligned
|
|
///
|
|
/// This will also check the buffers in the dictionary and any children
|
|
/// \param array the array to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
ARROW_EXPORT bool CheckAlignment(const Array& array, int64_t alignment);
|
|
|
|
// Following functions require an additional boolean vector which stores the
|
|
// alignment check bits of the constituent objects.
|
|
// For example, needs_alignment vector for a ChunkedArray will contain the
|
|
// check bits of the constituent Arrays.
|
|
// The boolean vector check was introduced to minimize the repetitive checks
|
|
// of the constituent objects during the EnsureAlignment function where certain
|
|
// objects can be ignored for further checking if we already know that they are
|
|
// completely aligned.
|
|
|
|
/// \brief Calculate which (if any) chunks in a chunked array are unaligned
|
|
/// \param array the array to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param needs_alignment an output vector that will store the results of the check
|
|
/// it must be set to a valid vector. Extra elements will be added to the end
|
|
/// of the vector for each chunk that is checked. `true` will be stored if
|
|
/// the chunk is unaligned.
|
|
/// \param offset the index of the chunk to start checking
|
|
/// \return true if all chunks (starting at `offset`) are aligned, false otherwise
|
|
ARROW_EXPORT bool CheckAlignment(const ChunkedArray& array, int64_t alignment,
|
|
std::vector<bool>* needs_alignment, int offset = 0);
|
|
|
|
/// \brief calculate which (if any) columns in a record batch are unaligned
|
|
/// \param batch the batch to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param needs_alignment an output vector that will store the results of the
|
|
/// check. It must be set to a valid vector. Extra elements will be added
|
|
/// to the end of the vector for each column that is checked. `true` will be
|
|
/// stored if the column is unaligned.
|
|
ARROW_EXPORT bool CheckAlignment(const RecordBatch& batch, int64_t alignment,
|
|
std::vector<bool>* needs_alignment);
|
|
|
|
/// \brief calculate which (if any) columns in a table are unaligned
|
|
/// \param table the table to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param needs_alignment an output vector that will store the results of the
|
|
/// check. It must be set to a valid vector. Extra elements will be added
|
|
/// to the end of the vector for each column that is checked. `true` will be
|
|
/// stored if the column is unaligned.
|
|
ARROW_EXPORT bool CheckAlignment(const Table& table, int64_t alignment,
|
|
std::vector<bool>* needs_alignment);
|
|
|
|
/// \brief return a buffer that has the given alignment and the same data as the input
|
|
/// buffer
|
|
///
|
|
/// If the input buffer is already aligned then this method will return the input buffer
|
|
/// If the input buffer is not already aligned then this method will allocate a new
|
|
/// buffer. The alignment of the new buffer will have at least
|
|
/// max(kDefaultBufferAlignment, alignment) bytes of alignment.
|
|
///
|
|
/// \param buffer the buffer to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param memory_pool a memory pool that will be used to allocate a new buffer if the
|
|
/// input buffer is not sufficiently aligned
|
|
ARROW_EXPORT Result<std::shared_ptr<Buffer>> EnsureAlignment(
|
|
std::shared_ptr<Buffer> buffer, int64_t alignment, MemoryPool* memory_pool);
|
|
|
|
/// \brief return an array data where all buffers are aligned by the given alignment
|
|
///
|
|
/// If any input buffer is already aligned then this method will reuse that same input
|
|
/// buffer.
|
|
///
|
|
/// \param array_data the array data to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param memory_pool a memory pool that will be used to allocate new buffers if any
|
|
/// input buffer is not sufficiently aligned
|
|
ARROW_EXPORT Result<std::shared_ptr<ArrayData>> EnsureAlignment(
|
|
std::shared_ptr<ArrayData> array_data, int64_t alignment, MemoryPool* memory_pool);
|
|
|
|
/// \brief return an array where all buffers are aligned by the given alignment
|
|
///
|
|
/// If any input buffer is already aligned then this method will reuse that same input
|
|
/// buffer.
|
|
///
|
|
/// \param array the array to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param memory_pool a memory pool that will be used to allocate new buffers if any
|
|
/// input buffer is not sufficiently aligned
|
|
ARROW_EXPORT Result<std::shared_ptr<Array>> EnsureAlignment(std::shared_ptr<Array> array,
|
|
int64_t alignment,
|
|
MemoryPool* memory_pool);
|
|
|
|
/// \brief return a chunked array where all buffers are aligned by the given alignment
|
|
///
|
|
/// If any input buffer is already aligned then this method will reuse that same input
|
|
/// buffer.
|
|
///
|
|
/// \param array the chunked array to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param memory_pool a memory pool that will be used to allocate new buffers if any
|
|
/// input buffer is not sufficiently aligned
|
|
ARROW_EXPORT Result<std::shared_ptr<ChunkedArray>> EnsureAlignment(
|
|
std::shared_ptr<ChunkedArray> array, int64_t alignment, MemoryPool* memory_pool);
|
|
|
|
/// \brief return a record batch where all buffers are aligned by the given alignment
|
|
///
|
|
/// If any input buffer is already aligned then this method will reuse that same input
|
|
/// buffer.
|
|
///
|
|
/// \param batch the batch to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param memory_pool a memory pool that will be used to allocate new buffers if any
|
|
/// input buffer is not sufficiently aligned
|
|
ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> EnsureAlignment(
|
|
std::shared_ptr<RecordBatch> batch, int64_t alignment, MemoryPool* memory_pool);
|
|
|
|
/// \brief return a table where all buffers are aligned by the given alignment
|
|
///
|
|
/// If any input buffer is already aligned then this method will reuse that same input
|
|
/// buffer.
|
|
///
|
|
/// \param table the table to check
|
|
/// \param alignment the alignment (in bytes) to check for
|
|
/// \param memory_pool a memory pool that will be used to allocate new buffers if any
|
|
/// input buffer is not sufficiently aligned
|
|
ARROW_EXPORT Result<std::shared_ptr<Table>> EnsureAlignment(std::shared_ptr<Table> table,
|
|
int64_t alignment,
|
|
MemoryPool* memory_pool);
|
|
|
|
} // namespace util
|
|
} // namespace arrow
|