337 lines
12 KiB
C++
337 lines
12 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <string_view>
|
|
|
|
#include "arrow/array.h"
|
|
#include "arrow/status.h"
|
|
#include "arrow/type.h"
|
|
#include "arrow/type_traits.h"
|
|
#include "arrow/util/binary_view_util.h"
|
|
#include "arrow/util/bit_block_counter.h"
|
|
#include "arrow/util/bit_util.h"
|
|
#include "arrow/util/checked_cast.h"
|
|
#include "arrow/util/functional.h"
|
|
|
|
namespace arrow {
|
|
namespace internal {
|
|
|
|
template <typename T, typename Enable = void>
|
|
struct ArraySpanInlineVisitor {};
|
|
|
|
// Numeric and primitive C-compatible types
|
|
template <typename T>
|
|
struct ArraySpanInlineVisitor<T, enable_if_has_c_type<T>> {
|
|
using c_type = typename T::c_type;
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
if constexpr (std::is_same_v<T, BooleanType>) {
|
|
int64_t offset = arr.offset;
|
|
const uint8_t* data = arr.buffers[1].data;
|
|
return VisitBitBlocks(
|
|
arr.buffers[0].data, offset, arr.length,
|
|
[&](int64_t i) { return valid_func(bit_util::GetBit(data, offset + i)); },
|
|
std::forward<NullFunc>(null_func));
|
|
} else {
|
|
const c_type* data = arr.GetValues<c_type>(1);
|
|
auto visit_valid = [&](int64_t i) { return valid_func(data[i]); };
|
|
return VisitBitBlocks(arr.buffers[0].data, arr.offset, arr.length,
|
|
std::move(visit_valid), std::forward<NullFunc>(null_func));
|
|
}
|
|
}
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
if constexpr (std::is_same_v<T, BooleanType>) {
|
|
int64_t offset = arr.offset;
|
|
const uint8_t* data = arr.buffers[1].data;
|
|
VisitBitBlocksVoid(
|
|
arr.buffers[0].data, offset, arr.length,
|
|
[&](int64_t i) { valid_func(bit_util::GetBit(data, offset + i)); },
|
|
std::forward<NullFunc>(null_func));
|
|
} else {
|
|
const c_type* data = arr.GetValues<c_type>(1);
|
|
auto visit_valid = [&](int64_t i) { valid_func(data[i]); };
|
|
VisitBitBlocksVoid(arr.buffers[0].data, arr.offset, arr.length,
|
|
std::move(visit_valid), std::forward<NullFunc>(null_func));
|
|
}
|
|
}
|
|
};
|
|
|
|
// Binary, String...
|
|
template <typename T>
|
|
struct ArraySpanInlineVisitor<T, enable_if_base_binary<T>> {
|
|
using c_type = std::string_view;
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
using offset_type = typename T::offset_type;
|
|
constexpr char empty_value = 0;
|
|
|
|
if (arr.length == 0) {
|
|
return Status::OK();
|
|
}
|
|
const offset_type* offsets = arr.GetValues<offset_type>(1);
|
|
const char* data;
|
|
if (arr.buffers[2].data == NULLPTR) {
|
|
data = &empty_value;
|
|
} else {
|
|
// Do not apply the array offset to the values array; the value_offsets
|
|
// index the non-sliced values array.
|
|
data = arr.GetValues<char>(2, /*absolute_offset=*/0);
|
|
}
|
|
offset_type cur_offset = *offsets++;
|
|
return VisitBitBlocks(
|
|
arr.buffers[0].data, arr.offset, arr.length,
|
|
[&](int64_t i) {
|
|
ARROW_UNUSED(i);
|
|
auto value = std::string_view(data + cur_offset, *offsets - cur_offset);
|
|
cur_offset = *offsets++;
|
|
return valid_func(value);
|
|
},
|
|
[&]() {
|
|
cur_offset = *offsets++;
|
|
return null_func();
|
|
});
|
|
}
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
using offset_type = typename T::offset_type;
|
|
constexpr uint8_t empty_value = 0;
|
|
|
|
if (arr.length == 0) {
|
|
return;
|
|
}
|
|
const offset_type* offsets = arr.GetValues<offset_type>(1);
|
|
const uint8_t* data;
|
|
if (arr.buffers[2].data == NULLPTR) {
|
|
data = &empty_value;
|
|
} else {
|
|
// Do not apply the array offset to the values array; the value_offsets
|
|
// index the non-sliced values array.
|
|
data = arr.GetValues<uint8_t>(2, /*absolute_offset=*/0);
|
|
}
|
|
|
|
VisitBitBlocksVoid(
|
|
arr.buffers[0].data, arr.offset, arr.length,
|
|
[&](int64_t i) {
|
|
auto value = std::string_view(reinterpret_cast<const char*>(data + offsets[i]),
|
|
offsets[i + 1] - offsets[i]);
|
|
valid_func(value);
|
|
},
|
|
std::forward<NullFunc>(null_func));
|
|
}
|
|
};
|
|
|
|
// BinaryView, StringView...
|
|
template <typename T>
|
|
struct ArraySpanInlineVisitor<T, enable_if_binary_view_like<T>> {
|
|
using c_type = std::string_view;
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
if (arr.length == 0) {
|
|
return Status::OK();
|
|
}
|
|
auto* s = arr.GetValues<BinaryViewType::c_type>(1);
|
|
auto* data_buffers = arr.GetVariadicBuffers().data();
|
|
return VisitBitBlocks(
|
|
arr.buffers[0].data, arr.offset, arr.length,
|
|
[&](int64_t index) {
|
|
return valid_func(util::FromBinaryView(s[index], data_buffers));
|
|
},
|
|
[&]() { return null_func(); });
|
|
}
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
if (arr.length == 0) {
|
|
return;
|
|
}
|
|
auto* s = arr.GetValues<BinaryViewType::c_type>(1);
|
|
auto* data_buffers = arr.GetVariadicBuffers().data();
|
|
VisitBitBlocksVoid(
|
|
arr.buffers[0].data, arr.offset, arr.length,
|
|
[&](int64_t index) { valid_func(util::FromBinaryView(s[index], data_buffers)); },
|
|
std::forward<NullFunc>(null_func));
|
|
}
|
|
};
|
|
|
|
// FixedSizeBinary, Decimal128
|
|
template <typename T>
|
|
struct ArraySpanInlineVisitor<T, enable_if_fixed_size_binary<T>> {
|
|
using c_type = std::string_view;
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
const int32_t byte_width = arr.type->byte_width();
|
|
const char* data = arr.GetValues<char>(1,
|
|
/*absolute_offset=*/arr.offset * byte_width);
|
|
return VisitBitBlocks(
|
|
arr.buffers[0].data, arr.offset, arr.length,
|
|
[&](int64_t i) {
|
|
auto value = std::string_view(data, byte_width);
|
|
data += byte_width;
|
|
return valid_func(value);
|
|
},
|
|
[&]() {
|
|
data += byte_width;
|
|
return null_func();
|
|
});
|
|
}
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
const int32_t byte_width = arr.type->byte_width();
|
|
const char* data = arr.GetValues<char>(1,
|
|
/*absolute_offset=*/arr.offset * byte_width);
|
|
VisitBitBlocksVoid(
|
|
arr.buffers[0].data, arr.offset, arr.length,
|
|
[&](int64_t i) {
|
|
valid_func(std::string_view(data, byte_width));
|
|
data += byte_width;
|
|
},
|
|
[&]() {
|
|
data += byte_width;
|
|
null_func();
|
|
});
|
|
}
|
|
};
|
|
|
|
} // namespace internal
|
|
|
|
template <typename T, typename ValidFunc, typename NullFunc>
|
|
typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
|
|
VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
|
|
return internal::ArraySpanInlineVisitor<T>::VisitStatus(
|
|
arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
|
|
}
|
|
|
|
template <typename T, typename ValidFunc, typename NullFunc>
|
|
typename internal::call_traits::enable_if_return<ValidFunc, void>::type
|
|
VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
|
|
return internal::ArraySpanInlineVisitor<T>::VisitVoid(
|
|
arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
|
|
}
|
|
|
|
// Visit an array's data values, in order, without overhead.
|
|
//
|
|
// The Visit method's `visitor` argument should be an object with two public methods:
|
|
// - Status VisitNull()
|
|
// - Status VisitValue(<scalar>)
|
|
//
|
|
// The scalar value's type depends on the array data type:
|
|
// - the type's `c_type`, if any
|
|
// - for boolean arrays, a `bool`
|
|
// - for binary, string, large binary and string, binary and string view, and fixed-size
|
|
// binary arrays, a `std::string_view`
|
|
|
|
template <typename T>
|
|
struct ArraySpanVisitor {
|
|
using InlineVisitorType = internal::ArraySpanInlineVisitor<T>;
|
|
using c_type = typename InlineVisitorType::c_type;
|
|
|
|
template <typename Visitor>
|
|
static Status Visit(const ArraySpan& arr, Visitor* visitor) {
|
|
return InlineVisitorType::VisitStatus(
|
|
arr, [visitor](c_type v) { return visitor->VisitValue(v); },
|
|
[visitor]() { return visitor->VisitNull(); });
|
|
}
|
|
};
|
|
|
|
// Visit a null bitmap, in order, without overhead.
|
|
//
|
|
// The given `ValidFunc` should be a callable with either of these signatures:
|
|
// - void()
|
|
// - Status()
|
|
//
|
|
// The `NullFunc` should have the same return type as `ValidFunc`.
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
|
|
VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
|
|
int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
|
|
valid_bits_offset, num_values);
|
|
int64_t position = 0;
|
|
int64_t offset_position = valid_bits_offset;
|
|
while (position < num_values) {
|
|
internal::BitBlockCount block = bit_counter.NextBlock();
|
|
if (block.AllSet()) {
|
|
for (int64_t i = 0; i < block.length; ++i) {
|
|
ARROW_RETURN_NOT_OK(valid_func());
|
|
}
|
|
} else if (block.NoneSet()) {
|
|
for (int64_t i = 0; i < block.length; ++i) {
|
|
ARROW_RETURN_NOT_OK(null_func());
|
|
}
|
|
} else {
|
|
for (int64_t i = 0; i < block.length; ++i) {
|
|
ARROW_RETURN_NOT_OK(bit_util::GetBit(valid_bits, offset_position + i)
|
|
? valid_func()
|
|
: null_func());
|
|
}
|
|
}
|
|
position += block.length;
|
|
offset_position += block.length;
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
template <typename ValidFunc, typename NullFunc>
|
|
typename internal::call_traits::enable_if_return<ValidFunc, void>::type
|
|
VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
|
|
int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
|
|
NullFunc&& null_func) {
|
|
internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
|
|
valid_bits_offset, num_values);
|
|
int64_t position = 0;
|
|
int64_t offset_position = valid_bits_offset;
|
|
while (position < num_values) {
|
|
internal::BitBlockCount block = bit_counter.NextBlock();
|
|
if (block.AllSet()) {
|
|
for (int64_t i = 0; i < block.length; ++i) {
|
|
valid_func();
|
|
}
|
|
} else if (block.NoneSet()) {
|
|
for (int64_t i = 0; i < block.length; ++i) {
|
|
null_func();
|
|
}
|
|
} else {
|
|
for (int64_t i = 0; i < block.length; ++i) {
|
|
bit_util::GetBit(valid_bits, offset_position + i) ? valid_func() : null_func();
|
|
}
|
|
}
|
|
position += block.length;
|
|
offset_position += block.length;
|
|
}
|
|
}
|
|
|
|
} // namespace arrow
|