Adding all project files

This commit is contained in:
Martina Burlando 2025-08-02 02:00:33 +02:00
parent 6c9e127bdc
commit cd4316ad0f
42289 changed files with 8009643 additions and 0 deletions

View file

@ -0,0 +1,25 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/io/buffered.h"
#include "arrow/io/compressed.h"
#include "arrow/io/file.h"
#include "arrow/io/hdfs.h"
#include "arrow/io/interfaces.h"
#include "arrow/io/memory.h"

View file

@ -0,0 +1,168 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Buffered stream implementations
#pragma once
#include <cstdint>
#include <memory>
#include <string_view>
#include "arrow/io/concurrency.h"
#include "arrow/io/interfaces.h"
#include "arrow/util/visibility.h"
namespace arrow {
class Buffer;
class MemoryPool;
class Status;
namespace io {
class ARROW_EXPORT BufferedOutputStream : public OutputStream {
public:
~BufferedOutputStream() override;
/// \brief Create a buffered output stream wrapping the given output stream.
/// \param[in] buffer_size the size of the temporary write buffer
/// \param[in] pool a MemoryPool to use for allocations
/// \param[in] raw another OutputStream
/// \return the created BufferedOutputStream
static Result<std::shared_ptr<BufferedOutputStream>> Create(
int64_t buffer_size, MemoryPool* pool, std::shared_ptr<OutputStream> raw);
/// \brief Resize internal buffer
/// \param[in] new_buffer_size the new buffer size
/// \return Status
Status SetBufferSize(int64_t new_buffer_size);
/// \brief Return the current size of the internal buffer
int64_t buffer_size() const;
/// \brief Return the number of remaining bytes that have not been flushed to
/// the raw OutputStream
int64_t bytes_buffered() const;
/// \brief Flush any buffered writes and release the raw
/// OutputStream. Further operations on this object are invalid
/// \return the underlying OutputStream
Result<std::shared_ptr<OutputStream>> Detach();
// OutputStream interface
/// \brief Close the buffered output stream. This implicitly closes the
/// underlying raw output stream.
Status Close() override;
Status Abort() override;
bool closed() const override;
Result<int64_t> Tell() const override;
// Write bytes to the stream. Thread-safe
Status Write(const void* data, int64_t nbytes) override;
Status Write(const std::shared_ptr<Buffer>& data) override;
Status Flush() override;
/// \brief Return the underlying raw output stream.
std::shared_ptr<OutputStream> raw() const;
private:
explicit BufferedOutputStream(std::shared_ptr<OutputStream> raw, MemoryPool* pool);
class ARROW_NO_EXPORT Impl;
std::unique_ptr<Impl> impl_;
};
/// \class BufferedInputStream
/// \brief An InputStream that performs buffered reads from an unbuffered
/// InputStream, which can mitigate the overhead of many small reads in some
/// cases
class ARROW_EXPORT BufferedInputStream
: public internal::InputStreamConcurrencyWrapper<BufferedInputStream> {
public:
~BufferedInputStream() override;
/// \brief Create a BufferedInputStream from a raw InputStream
/// \param[in] buffer_size the size of the temporary read buffer
/// \param[in] pool a MemoryPool to use for allocations
/// \param[in] raw a raw InputStream
/// \param[in] raw_read_bound a bound on the maximum number of bytes
/// to read from the raw input stream. The default -1 indicates that
/// it is unbounded
/// \return the created BufferedInputStream
static Result<std::shared_ptr<BufferedInputStream>> Create(
int64_t buffer_size, MemoryPool* pool, std::shared_ptr<InputStream> raw,
int64_t raw_read_bound = -1);
/// \brief Resize internal read buffer; calls to Read(...) will read at least
/// this many bytes from the raw InputStream if possible.
/// \param[in] new_buffer_size the new read buffer size
/// \return Status
Status SetBufferSize(int64_t new_buffer_size);
/// \brief Return the number of remaining bytes in the read buffer
int64_t bytes_buffered() const;
/// \brief Return the current size of the internal buffer
int64_t buffer_size() const;
/// \brief Release the raw InputStream. Any data buffered will be
/// discarded. Further operations on this object are invalid
/// \return raw the underlying InputStream
std::shared_ptr<InputStream> Detach();
/// \brief Return the unbuffered InputStream
std::shared_ptr<InputStream> raw() const;
// InputStream APIs
bool closed() const override;
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
const IOContext& io_context) override;
private:
friend InputStreamConcurrencyWrapper<BufferedInputStream>;
explicit BufferedInputStream(std::shared_ptr<InputStream> raw, MemoryPool* pool,
int64_t raw_total_bytes_bound);
Status DoClose();
Status DoAbort() override;
/// \brief Returns the position of the buffered stream, though the position
/// of the unbuffered stream may be further advanced.
Result<int64_t> DoTell() const;
Result<int64_t> DoRead(int64_t nbytes, void* out);
/// \brief Read into buffer.
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
/// \brief Return a zero-copy string view referencing buffered data,
/// but do not advance the position of the stream. Buffers data and
/// expands the buffer size if necessary
Result<std::string_view> DoPeek(int64_t nbytes) override;
class ARROW_NO_EXPORT Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,157 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "arrow/io/interfaces.h"
#include "arrow/util/type_fwd.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace io {
struct ARROW_EXPORT CacheOptions {
static constexpr double kDefaultIdealBandwidthUtilizationFrac = 0.9;
static constexpr int64_t kDefaultMaxIdealRequestSizeMib = 64;
/// \brief The maximum distance in bytes between two consecutive
/// ranges; beyond this value, ranges are not combined
int64_t hole_size_limit;
/// \brief The maximum size in bytes of a combined range; if
/// combining two consecutive ranges would produce a range of a
/// size greater than this, they are not combined
int64_t range_size_limit;
/// \brief A lazy cache does not perform any I/O until requested.
/// lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
/// lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
/// needs them.
/// lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
/// range that is currently being read.
bool lazy;
/// \brief The maximum number of ranges to be prefetched. This is only used
/// for lazy cache to asynchronously read some ranges after reading the target range.
int64_t prefetch_limit = 0;
bool operator==(const CacheOptions& other) const {
return hole_size_limit == other.hole_size_limit &&
range_size_limit == other.range_size_limit && lazy == other.lazy &&
prefetch_limit == other.prefetch_limit;
}
/// \brief Construct CacheOptions from network storage metrics (e.g. S3).
///
/// \param[in] time_to_first_byte_millis Seek-time or Time-To-First-Byte (TTFB) in
/// milliseconds, also called call setup latency of a new read request.
/// The value is a positive integer.
/// \param[in] transfer_bandwidth_mib_per_sec Data transfer Bandwidth (BW) in MiB/sec
/// (per connection).
/// The value is a positive integer.
/// \param[in] ideal_bandwidth_utilization_frac Transfer bandwidth utilization fraction
/// (per connection) to maximize the net data load.
/// The value is a positive double precision number less than 1.
/// \param[in] max_ideal_request_size_mib The maximum single data request size (in MiB)
/// to maximize the net data load.
/// The value is a positive integer.
/// \return A new instance of CacheOptions.
static CacheOptions MakeFromNetworkMetrics(
int64_t time_to_first_byte_millis, int64_t transfer_bandwidth_mib_per_sec,
double ideal_bandwidth_utilization_frac = kDefaultIdealBandwidthUtilizationFrac,
int64_t max_ideal_request_size_mib = kDefaultMaxIdealRequestSizeMib);
static CacheOptions Defaults();
static CacheOptions LazyDefaults();
};
namespace internal {
/// \brief A read cache designed to hide IO latencies when reading.
///
/// This class takes multiple byte ranges that an application expects to read, and
/// coalesces them into fewer, larger read requests, which benefits performance on some
/// filesystems, particularly remote ones like Amazon S3. By default, it also issues
/// these read requests in parallel up front.
///
/// To use:
/// 1. Cache() the ranges you expect to read in the future. Ideally, these ranges have
/// the exact offset and length that will later be read. The cache will combine those
/// ranges according to parameters (see constructor).
///
/// By default, the cache will also start fetching the combined ranges in parallel in
/// the background, unless CacheOptions.lazy is set.
///
/// 2. Call WaitFor() to be notified when the given ranges have been read. If
/// CacheOptions.lazy is set, I/O will be triggered in the background here instead.
/// This can be done in parallel (e.g. if parsing a file, call WaitFor() for each
/// chunk of the file that can be parsed in parallel).
///
/// 3. Call Read() to retrieve the actual data for the given ranges.
/// A synchronous application may skip WaitFor() and just call Read() - it will still
/// benefit from coalescing and parallel fetching.
class ARROW_EXPORT ReadRangeCache {
public:
static constexpr int64_t kDefaultHoleSizeLimit = 8192;
static constexpr int64_t kDefaultRangeSizeLimit = 32 * 1024 * 1024;
/// Construct a read cache with default
explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx)
: ReadRangeCache(file, file.get(), std::move(ctx), CacheOptions::Defaults()) {}
/// Construct a read cache with given options
explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
CacheOptions options)
: ReadRangeCache(file, file.get(), std::move(ctx), options) {}
/// Construct a read cache with an unowned file
ReadRangeCache(RandomAccessFile* file, IOContext ctx, CacheOptions options)
: ReadRangeCache(NULLPTR, file, std::move(ctx), options) {}
~ReadRangeCache();
/// \brief Cache the given ranges in the background.
///
/// The caller must ensure that the ranges do not overlap with each other,
/// nor with previously cached ranges. Otherwise, behaviour will be undefined.
Status Cache(std::vector<ReadRange> ranges);
/// \brief Read a range previously given to Cache().
Result<std::shared_ptr<Buffer>> Read(ReadRange range);
/// \brief Wait until all ranges added so far have been cached.
Future<> Wait();
/// \brief Wait until all given ranges have been cached.
Future<> WaitFor(std::vector<ReadRange> ranges);
protected:
struct Impl;
struct LazyImpl;
ReadRangeCache(std::shared_ptr<RandomAccessFile> owned_file, RandomAccessFile* file,
IOContext ctx, CacheOptions options);
std::unique_ptr<Impl> impl_;
};
} // namespace internal
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,124 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Compressed stream implementations
#pragma once
#include <memory>
#include <string>
#include "arrow/io/concurrency.h"
#include "arrow/io/interfaces.h"
#include "arrow/util/visibility.h"
namespace arrow {
class MemoryPool;
class Status;
namespace util {
class Codec;
} // namespace util
namespace io {
class ARROW_EXPORT CompressedOutputStream : public OutputStream {
public:
~CompressedOutputStream() override;
/// \brief Create a compressed output stream wrapping the given output stream.
///
/// The codec must be capable of streaming compression. Some codecs,
/// like Snappy, are not able to do so.
static Result<std::shared_ptr<CompressedOutputStream>> Make(
util::Codec* codec, const std::shared_ptr<OutputStream>& raw,
MemoryPool* pool = default_memory_pool());
// OutputStream interface
/// \brief Close the compressed output stream. This implicitly closes the
/// underlying raw output stream.
Status Close() override;
Status Abort() override;
bool closed() const override;
Result<int64_t> Tell() const override;
Status Write(const void* data, int64_t nbytes) override;
/// \cond FALSE
using Writable::Write;
/// \endcond
Status Flush() override;
/// \brief Return the underlying raw output stream.
std::shared_ptr<OutputStream> raw() const;
private:
ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedOutputStream);
CompressedOutputStream() = default;
class ARROW_NO_EXPORT Impl;
std::unique_ptr<Impl> impl_;
};
class ARROW_EXPORT CompressedInputStream
: public internal::InputStreamConcurrencyWrapper<CompressedInputStream> {
public:
~CompressedInputStream() override;
/// \brief Create a compressed input stream wrapping the given input stream.
///
/// The codec must be capable of streaming decompression. Some codecs,
/// like Snappy, are not able to do so.
static Result<std::shared_ptr<CompressedInputStream>> Make(
util::Codec* codec, const std::shared_ptr<InputStream>& raw,
MemoryPool* pool = default_memory_pool());
// InputStream interface
bool closed() const override;
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
const IOContext& io_context) override;
/// \brief Return the underlying raw input stream.
std::shared_ptr<InputStream> raw() const;
private:
friend InputStreamConcurrencyWrapper<CompressedInputStream>;
ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedInputStream);
CompressedInputStream() = default;
/// \brief Close the compressed input stream. This implicitly closes the
/// underlying raw input stream.
Status DoClose();
Status DoAbort() override;
Result<int64_t> DoTell() const;
Result<int64_t> DoRead(int64_t nbytes, void* out);
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
class ARROW_NO_EXPORT Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,263 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "arrow/io/interfaces.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace io {
namespace internal {
template <class LockType>
class SharedLockGuard {
public:
explicit SharedLockGuard(LockType* lock) : lock_(lock) { lock_->LockShared(); }
~SharedLockGuard() { lock_->UnlockShared(); }
protected:
LockType* lock_;
};
template <class LockType>
class ExclusiveLockGuard {
public:
explicit ExclusiveLockGuard(LockType* lock) : lock_(lock) { lock_->LockExclusive(); }
~ExclusiveLockGuard() { lock_->UnlockExclusive(); }
protected:
LockType* lock_;
};
// Debug concurrency checker that marks "shared" and "exclusive" code sections,
// aborting if the concurrency rules get violated. Does nothing in release mode.
// Note that we intentionally use the same class declaration in debug and
// release builds in order to avoid runtime failures when e.g. loading a
// release-built DLL with a debug-built application, or the reverse.
class ARROW_EXPORT SharedExclusiveChecker {
public:
SharedExclusiveChecker();
void LockShared();
void UnlockShared();
void LockExclusive();
void UnlockExclusive();
SharedLockGuard<SharedExclusiveChecker> shared_guard() {
return SharedLockGuard<SharedExclusiveChecker>(this);
}
ExclusiveLockGuard<SharedExclusiveChecker> exclusive_guard() {
return ExclusiveLockGuard<SharedExclusiveChecker>(this);
}
protected:
struct Impl;
std::shared_ptr<Impl> impl_;
};
// Concurrency wrappers for IO classes that check the correctness of
// concurrent calls to various methods. It is not necessary to wrap all
// IO classes with these, only a few core classes that get used in tests.
//
// We're not using virtual inheritance here as virtual bases have poorly
// understood semantic overhead which we'd be passing on to implementers
// and users of these interfaces. Instead, we just duplicate the method
// wrappers between those two classes.
template <class Derived>
class InputStreamConcurrencyWrapper : public InputStream {
public:
Status Close() final {
auto guard = lock_.exclusive_guard();
return derived()->DoClose();
}
Status Abort() final {
auto guard = lock_.exclusive_guard();
return derived()->DoAbort();
}
Result<int64_t> Tell() const final {
auto guard = lock_.exclusive_guard();
return derived()->DoTell();
}
Result<int64_t> Read(int64_t nbytes, void* out) final {
auto guard = lock_.exclusive_guard();
return derived()->DoRead(nbytes, out);
}
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
auto guard = lock_.exclusive_guard();
return derived()->DoRead(nbytes);
}
Result<std::string_view> Peek(int64_t nbytes) final {
auto guard = lock_.exclusive_guard();
return derived()->DoPeek(nbytes);
}
/*
Methods to implement in derived class:
Status DoClose();
Result<int64_t> DoTell() const;
Result<int64_t> DoRead(int64_t nbytes, void* out);
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
And optionally:
Status DoAbort() override;
Result<std::string_view> DoPeek(int64_t nbytes) override;
These methods should be protected in the derived class and
InputStreamConcurrencyWrapper declared as a friend with
friend InputStreamConcurrencyWrapper<derived>;
*/
protected:
// Default implementations. They are virtual because the derived class may
// have derived classes itself.
virtual Status DoAbort() { return derived()->DoClose(); }
virtual Result<std::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
return Status::NotImplemented("Peek not implemented");
}
Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
const Derived* derived() const {
return ::arrow::internal::checked_cast<const Derived*>(this);
}
mutable SharedExclusiveChecker lock_;
};
template <class Derived>
class RandomAccessFileConcurrencyWrapper : public RandomAccessFile {
public:
Status Close() final {
auto guard = lock_.exclusive_guard();
return derived()->DoClose();
}
Status Abort() final {
auto guard = lock_.exclusive_guard();
return derived()->DoAbort();
}
Result<int64_t> Tell() const final {
auto guard = lock_.exclusive_guard();
return derived()->DoTell();
}
Result<int64_t> Read(int64_t nbytes, void* out) final {
auto guard = lock_.exclusive_guard();
return derived()->DoRead(nbytes, out);
}
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
auto guard = lock_.exclusive_guard();
return derived()->DoRead(nbytes);
}
Result<std::string_view> Peek(int64_t nbytes) final {
auto guard = lock_.exclusive_guard();
return derived()->DoPeek(nbytes);
}
Status Seek(int64_t position) final {
auto guard = lock_.exclusive_guard();
return derived()->DoSeek(position);
}
Result<int64_t> GetSize() final {
auto guard = lock_.shared_guard();
return derived()->DoGetSize();
}
// NOTE: ReadAt doesn't use stream pointer, but it is allowed to update it
// (it's the case on Windows when using ReadFileEx).
// So any method that relies on the current position (even if it doesn't
// update it, such as Peek) cannot run in parallel with ReadAt and has
// to use the exclusive_guard.
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) final {
auto guard = lock_.shared_guard();
return derived()->DoReadAt(position, nbytes, out);
}
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) final {
auto guard = lock_.shared_guard();
return derived()->DoReadAt(position, nbytes);
}
/*
Methods to implement in derived class:
Status DoClose();
Result<int64_t> DoTell() const;
Result<int64_t> DoRead(int64_t nbytes, void* out);
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
Status DoSeek(int64_t position);
Result<int64_t> DoGetSize()
Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
And optionally:
Status DoAbort() override;
Result<std::string_view> DoPeek(int64_t nbytes) override;
These methods should be protected in the derived class and
RandomAccessFileConcurrencyWrapper declared as a friend with
friend RandomAccessFileConcurrencyWrapper<derived>;
*/
protected:
// Default implementations. They are virtual because the derived class may
// have derived classes itself.
virtual Status DoAbort() { return derived()->DoClose(); }
virtual Result<std::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
return Status::NotImplemented("Peek not implemented");
}
Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
const Derived* derived() const {
return ::arrow::internal::checked_cast<const Derived*>(this);
}
mutable SharedExclusiveChecker lock_;
};
} // namespace internal
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,221 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// IO interface implementations for OS files
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "arrow/io/concurrency.h"
#include "arrow/io/interfaces.h"
#include "arrow/util/visibility.h"
namespace arrow {
class Buffer;
class MemoryPool;
class Status;
namespace io {
/// \brief An operating system file open in write-only mode.
class ARROW_EXPORT FileOutputStream : public OutputStream {
public:
~FileOutputStream() override;
/// \brief Open a local file for writing, truncating any existing file
/// \param[in] path with UTF8 encoding
/// \param[in] append append to existing file, otherwise truncate to 0 bytes
/// \return an open FileOutputStream
///
/// When opening a new file, any existing file with the indicated path is
/// truncated to 0 bytes, deleting any existing data
static Result<std::shared_ptr<FileOutputStream>> Open(const std::string& path,
bool append = false);
/// \brief Open a file descriptor for writing. The underlying file isn't
/// truncated.
/// \param[in] fd file descriptor
/// \return an open FileOutputStream
///
/// The file descriptor becomes owned by the OutputStream, and will be closed
/// on Close() or destruction.
static Result<std::shared_ptr<FileOutputStream>> Open(int fd);
// OutputStream interface
Status Close() override;
bool closed() const override;
Result<int64_t> Tell() const override;
// Write bytes to the stream. Thread-safe
Status Write(const void* data, int64_t nbytes) override;
/// \cond FALSE
using Writable::Write;
/// \endcond
int file_descriptor() const;
private:
FileOutputStream();
class ARROW_NO_EXPORT FileOutputStreamImpl;
std::unique_ptr<FileOutputStreamImpl> impl_;
};
/// \brief An operating system file open in read-only mode.
///
/// Reads through this implementation are unbuffered. If many small reads
/// need to be issued, it is recommended to use a buffering layer for good
/// performance.
class ARROW_EXPORT ReadableFile
: public internal::RandomAccessFileConcurrencyWrapper<ReadableFile> {
public:
~ReadableFile() override;
/// \brief Open a local file for reading
/// \param[in] path with UTF8 encoding
/// \param[in] pool a MemoryPool for memory allocations
/// \return ReadableFile instance
static Result<std::shared_ptr<ReadableFile>> Open(
const std::string& path, MemoryPool* pool = default_memory_pool());
/// \brief Open a local file for reading
/// \param[in] fd file descriptor
/// \param[in] pool a MemoryPool for memory allocations
/// \return ReadableFile instance
///
/// The file descriptor becomes owned by the ReadableFile, and will be closed
/// on Close() or destruction.
static Result<std::shared_ptr<ReadableFile>> Open(
int fd, MemoryPool* pool = default_memory_pool());
bool closed() const override;
int file_descriptor() const;
Status WillNeed(const std::vector<ReadRange>& ranges) override;
private:
friend RandomAccessFileConcurrencyWrapper<ReadableFile>;
explicit ReadableFile(MemoryPool* pool);
Status DoClose();
Result<int64_t> DoTell() const;
Result<int64_t> DoRead(int64_t nbytes, void* buffer);
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
/// \brief Thread-safe implementation of ReadAt
Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
/// \brief Thread-safe implementation of ReadAt
Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
Result<int64_t> DoGetSize();
Status DoSeek(int64_t position);
class ARROW_NO_EXPORT ReadableFileImpl;
std::unique_ptr<ReadableFileImpl> impl_;
};
/// \brief A file interface that uses memory-mapped files for memory interactions
///
/// This implementation supports zero-copy reads. The same class is used
/// for both reading and writing.
///
/// If opening a file in a writable mode, it is not truncated first as with
/// FileOutputStream.
class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
public:
~MemoryMappedFile() override;
/// Create new file with indicated size, return in read/write mode
static Result<std::shared_ptr<MemoryMappedFile>> Create(const std::string& path,
int64_t size);
// mmap() with whole file
static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
FileMode::type mode);
// mmap() with a region of file, the offset must be a multiple of the page size
static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
FileMode::type mode,
const int64_t offset,
const int64_t length);
Status Close() override;
bool closed() const override;
Result<int64_t> Tell() const override;
Status Seek(int64_t position) override;
// Required by RandomAccessFile, copies memory into out. Not thread-safe
Result<int64_t> Read(int64_t nbytes, void* out) override;
// Zero copy read, moves position pointer. Not thread-safe
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
// Zero-copy read, leaves position unchanged. Acquires a reader lock
// for the duration of slice creation (typically very short). Is thread-safe.
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
// Raw copy of the memory at specified position. Thread-safe, but
// locks out other readers for the duration of memcpy. Prefer the
// zero copy method
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
// Synchronous ReadAsync override
Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
int64_t nbytes) override;
Status WillNeed(const std::vector<ReadRange>& ranges) override;
bool supports_zero_copy() const override;
/// Write data at the current position in the file. Thread-safe
Status Write(const void* data, int64_t nbytes) override;
/// \cond FALSE
using Writable::Write;
/// \endcond
/// Set the size of the map to new_size.
Status Resize(int64_t new_size);
/// Write data at a particular position in the file. Thread-safe
Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
Result<int64_t> GetSize() override;
int file_descriptor() const;
private:
MemoryMappedFile();
Status WriteInternal(const void* data, int64_t nbytes);
class ARROW_NO_EXPORT MemoryMap;
std::shared_ptr<MemoryMap> memory_map_;
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,284 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "arrow/io/interfaces.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
class Buffer;
class MemoryPool;
class Status;
namespace io {
class HdfsReadableFile;
class HdfsOutputStream;
/// DEPRECATED. Use the FileSystem API in arrow::fs instead.
struct ObjectType {
enum type { FILE, DIRECTORY };
};
/// DEPRECATED. Use the FileSystem API in arrow::fs instead.
struct ARROW_EXPORT FileStatistics {
/// Size of file, -1 if finding length is unsupported
int64_t size;
ObjectType::type kind;
};
class ARROW_EXPORT FileSystem {
public:
virtual ~FileSystem() = default;
virtual Status MakeDirectory(const std::string& path) = 0;
virtual Status DeleteDirectory(const std::string& path) = 0;
virtual Status GetChildren(const std::string& path,
std::vector<std::string>* listing) = 0;
virtual Status Rename(const std::string& src, const std::string& dst) = 0;
virtual Status Stat(const std::string& path, FileStatistics* stat) = 0;
};
struct HdfsPathInfo {
ObjectType::type kind;
std::string name;
std::string owner;
std::string group;
// Access times in UNIX timestamps (seconds)
int64_t size;
int64_t block_size;
int32_t last_modified_time;
int32_t last_access_time;
int16_t replication;
int16_t permissions;
};
struct HdfsConnectionConfig {
std::string host;
int port;
std::string user;
std::string kerb_ticket;
std::unordered_map<std::string, std::string> extra_conf;
};
class ARROW_EXPORT HadoopFileSystem : public FileSystem {
public:
~HadoopFileSystem() override;
// Connect to an HDFS cluster given a configuration
//
// @param config (in): configuration for connecting
// @param fs (out): the created client
// @returns Status
static Status Connect(const HdfsConnectionConfig* config,
std::shared_ptr<HadoopFileSystem>* fs);
// Create directory and all parents
//
// @param path (in): absolute HDFS path
// @returns Status
Status MakeDirectory(const std::string& path) override;
// Delete file or directory
// @param path absolute path to data
// @param recursive if path is a directory, delete contents as well
// @returns error status on failure
Status Delete(const std::string& path, bool recursive = false);
Status DeleteDirectory(const std::string& path) override;
// Disconnect from cluster
//
// @returns Status
Status Disconnect();
// @param path (in): absolute HDFS path
// @returns bool, true if the path exists, false if not (or on error)
bool Exists(const std::string& path);
// @param path (in): absolute HDFS path
// @param info (out)
// @returns Status
Status GetPathInfo(const std::string& path, HdfsPathInfo* info);
// @param nbytes (out): total capacity of the filesystem
// @returns Status
Status GetCapacity(int64_t* nbytes);
// @param nbytes (out): total bytes used of the filesystem
// @returns Status
Status GetUsed(int64_t* nbytes);
Status GetChildren(const std::string& path, std::vector<std::string>* listing) override;
/// List directory contents
///
/// If path is a relative path, returned values will be absolute paths or URIs
/// starting from the current working directory.
Status ListDirectory(const std::string& path, std::vector<HdfsPathInfo>* listing);
/// Return the filesystem's current working directory.
///
/// The working directory is the base path for all relative paths given to
/// other APIs.
/// NOTE: this actually returns a URI.
Status GetWorkingDirectory(std::string* out);
/// Change
///
/// @param path file path to change
/// @param owner pass null for no change
/// @param group pass null for no change
Status Chown(const std::string& path, const char* owner, const char* group);
/// Change path permissions
///
/// \param path Absolute path in file system
/// \param mode Mode bitset
/// \return Status
Status Chmod(const std::string& path, int mode);
// Move file or directory from source path to destination path within the
// current filesystem
Status Rename(const std::string& src, const std::string& dst) override;
Status Copy(const std::string& src, const std::string& dst);
Status Move(const std::string& src, const std::string& dst);
Status Stat(const std::string& path, FileStatistics* stat) override;
// TODO(wesm): GetWorkingDirectory, SetWorkingDirectory
// Open an HDFS file in READ mode. Returns error
// status if the file is not found.
//
// @param path complete file path
Status OpenReadable(const std::string& path, int32_t buffer_size,
std::shared_ptr<HdfsReadableFile>* file);
Status OpenReadable(const std::string& path, int32_t buffer_size,
const io::IOContext& io_context,
std::shared_ptr<HdfsReadableFile>* file);
Status OpenReadable(const std::string& path, std::shared_ptr<HdfsReadableFile>* file);
Status OpenReadable(const std::string& path, const io::IOContext& io_context,
std::shared_ptr<HdfsReadableFile>* file);
// FileMode::WRITE options
// @param path complete file path
// @param buffer_size 0 by default
// @param replication 0 by default
// @param default_block_size 0 by default
Status OpenWritable(const std::string& path, bool append, int32_t buffer_size,
int16_t replication, int64_t default_block_size,
std::shared_ptr<HdfsOutputStream>* file);
Status OpenWritable(const std::string& path, bool append,
std::shared_ptr<HdfsOutputStream>* file);
private:
friend class HdfsReadableFile;
friend class HdfsOutputStream;
class ARROW_NO_EXPORT HadoopFileSystemImpl;
std::unique_ptr<HadoopFileSystemImpl> impl_;
HadoopFileSystem();
ARROW_DISALLOW_COPY_AND_ASSIGN(HadoopFileSystem);
};
class ARROW_EXPORT HdfsReadableFile : public RandomAccessFile {
public:
~HdfsReadableFile() override;
Status Close() override;
bool closed() const override;
// NOTE: If you wish to read a particular range of a file in a multithreaded
// context, you may prefer to use ReadAt to avoid locking issues
Result<int64_t> Read(int64_t nbytes, void* out) override;
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
Status Seek(int64_t position) override;
Result<int64_t> Tell() const override;
Result<int64_t> GetSize() override;
private:
explicit HdfsReadableFile(const io::IOContext&);
class ARROW_NO_EXPORT HdfsReadableFileImpl;
std::unique_ptr<HdfsReadableFileImpl> impl_;
friend class HadoopFileSystem::HadoopFileSystemImpl;
ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsReadableFile);
};
// Naming this file OutputStream because it does not support seeking (like the
// WritableFile interface)
class ARROW_EXPORT HdfsOutputStream : public OutputStream {
public:
~HdfsOutputStream() override;
Status Close() override;
bool closed() const override;
using OutputStream::Write;
Status Write(const void* buffer, int64_t nbytes) override;
Status Flush() override;
Result<int64_t> Tell() const override;
private:
class ARROW_NO_EXPORT HdfsOutputStreamImpl;
std::unique_ptr<HdfsOutputStreamImpl> impl_;
friend class HadoopFileSystem::HadoopFileSystemImpl;
HdfsOutputStream();
ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsOutputStream);
};
ARROW_EXPORT Status HaveLibHdfs();
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,362 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <string_view>
#include <vector>
#include "arrow/io/type_fwd.h"
#include "arrow/type_fwd.h"
#include "arrow/util/cancel.h"
#include "arrow/util/macros.h"
#include "arrow/util/type_fwd.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace io {
struct ReadRange {
int64_t offset;
int64_t length;
friend bool operator==(const ReadRange& left, const ReadRange& right) {
return (left.offset == right.offset && left.length == right.length);
}
friend bool operator!=(const ReadRange& left, const ReadRange& right) {
return !(left == right);
}
bool Contains(const ReadRange& other) const {
return (offset <= other.offset && offset + length >= other.offset + other.length);
}
};
/// EXPERIMENTAL: options provider for IO tasks
///
/// Includes an Executor (which will be used to execute asynchronous reads),
/// a MemoryPool (which will be used to allocate buffers when zero copy reads
/// are not possible), and an external id (in case the executor receives tasks from
/// multiple sources and must distinguish tasks associated with this IOContext).
struct ARROW_EXPORT IOContext {
// No specified executor: will use a global IO thread pool
IOContext() : IOContext(default_memory_pool(), StopToken::Unstoppable()) {}
explicit IOContext(StopToken stop_token)
: IOContext(default_memory_pool(), std::move(stop_token)) {}
explicit IOContext(MemoryPool* pool, StopToken stop_token = StopToken::Unstoppable());
explicit IOContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
StopToken stop_token = StopToken::Unstoppable(),
int64_t external_id = -1)
: pool_(pool),
executor_(executor),
external_id_(external_id),
stop_token_(std::move(stop_token)) {}
explicit IOContext(::arrow::internal::Executor* executor,
StopToken stop_token = StopToken::Unstoppable(),
int64_t external_id = -1)
: pool_(default_memory_pool()),
executor_(executor),
external_id_(external_id),
stop_token_(std::move(stop_token)) {}
MemoryPool* pool() const { return pool_; }
::arrow::internal::Executor* executor() const { return executor_; }
// An application-specific ID, forwarded to executor task submissions
int64_t external_id() const { return external_id_; }
StopToken stop_token() const { return stop_token_; }
private:
MemoryPool* pool_;
::arrow::internal::Executor* executor_;
int64_t external_id_;
StopToken stop_token_;
};
class ARROW_EXPORT FileInterface : public std::enable_shared_from_this<FileInterface> {
public:
virtual ~FileInterface() = 0;
/// \brief Close the stream cleanly
///
/// For writable streams, this will attempt to flush any pending data
/// before releasing the underlying resource.
///
/// After Close() is called, closed() returns true and the stream is not
/// available for further operations.
virtual Status Close() = 0;
/// \brief Close the stream asynchronously
///
/// By default, this will just submit the synchronous Close() to the
/// default I/O thread pool. Subclasses may implement this in a more
/// efficient manner.
virtual Future<> CloseAsync();
/// \brief Close the stream abruptly
///
/// This method does not guarantee that any pending data is flushed.
/// It merely releases any underlying resource used by the stream for
/// its operation.
///
/// After Abort() is called, closed() returns true and the stream is not
/// available for further operations.
virtual Status Abort();
/// \brief Return the position in this stream
virtual Result<int64_t> Tell() const = 0;
/// \brief Return whether the stream is closed
virtual bool closed() const = 0;
FileMode::type mode() const { return mode_; }
protected:
FileInterface() : mode_(FileMode::READ) {}
FileMode::type mode_;
void set_mode(FileMode::type mode) { mode_ = mode; }
private:
ARROW_DISALLOW_COPY_AND_ASSIGN(FileInterface);
};
class ARROW_EXPORT Seekable {
public:
virtual ~Seekable() = default;
virtual Status Seek(int64_t position) = 0;
};
class ARROW_EXPORT Writable {
public:
virtual ~Writable() = default;
/// \brief Write the given data to the stream
///
/// This method always processes the bytes in full. Depending on the
/// semantics of the stream, the data may be written out immediately,
/// held in a buffer, or written asynchronously. In the case where
/// the stream buffers the data, it will be copied. To avoid potentially
/// large copies, use the Write variant that takes an owned Buffer.
virtual Status Write(const void* data, int64_t nbytes) = 0;
/// \brief Write the given data to the stream
///
/// Since the Buffer owns its memory, this method can avoid a copy if
/// buffering is required. See Write(const void*, int64_t) for details.
virtual Status Write(const std::shared_ptr<Buffer>& data);
/// \brief Flush buffered bytes, if any
virtual Status Flush();
Status Write(std::string_view data);
};
class ARROW_EXPORT Readable {
public:
virtual ~Readable() = default;
/// \brief Read data from current file position.
///
/// Read at most `nbytes` from the current file position into `out`.
/// The number of bytes read is returned.
virtual Result<int64_t> Read(int64_t nbytes, void* out) = 0;
/// \brief Read data from current file position.
///
/// Read at most `nbytes` from the current file position. Less bytes may
/// be read if EOF is reached. This method updates the current file position.
///
/// In some cases (e.g. a memory-mapped file), this method may avoid a
/// memory copy.
virtual Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) = 0;
/// EXPERIMENTAL: The IOContext associated with this file.
///
/// By default, this is the same as default_io_context(), but it may be
/// overridden by subclasses.
virtual const IOContext& io_context() const;
};
class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable {
protected:
OutputStream() = default;
};
class ARROW_EXPORT InputStream : virtual public FileInterface, virtual public Readable {
public:
/// \brief Advance or skip stream indicated number of bytes
/// \param[in] nbytes the number to move forward
/// \return Status
Status Advance(int64_t nbytes);
/// \brief Return zero-copy string_view to upcoming bytes.
///
/// Do not modify the stream position. The view becomes invalid after
/// any operation on the stream. May trigger buffering if the requested
/// size is larger than the number of buffered bytes.
///
/// May return NotImplemented on streams that don't support it.
///
/// \param[in] nbytes the maximum number of bytes to see
virtual Result<std::string_view> Peek(int64_t nbytes);
/// \brief Return true if InputStream is capable of zero copy Buffer reads
///
/// Zero copy reads imply the use of Buffer-returning Read() overloads.
virtual bool supports_zero_copy() const;
/// \brief Read and return stream metadata
///
/// If the stream implementation doesn't support metadata, empty metadata
/// is returned. Note that it is allowed to return a null pointer rather
/// than an allocated empty metadata.
virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
/// \brief Read stream metadata asynchronously
virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
const IOContext& io_context);
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
protected:
InputStream() = default;
};
class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
public:
/// Necessary because we hold a std::unique_ptr
~RandomAccessFile() override;
/// \brief Create an isolated InputStream that reads a segment of a
/// RandomAccessFile. Multiple such stream can be created and used
/// independently without interference
/// \param[in] file a file instance
/// \param[in] file_offset the starting position in the file
/// \param[in] nbytes the extent of bytes to read. The file should have
/// sufficient bytes available
static Result<std::shared_ptr<InputStream>> GetStream(
std::shared_ptr<RandomAccessFile> file, int64_t file_offset, int64_t nbytes);
/// \brief Return the total file size in bytes.
///
/// This method does not read or move the current file position, so is safe
/// to call concurrently with e.g. ReadAt().
virtual Result<int64_t> GetSize() = 0;
/// \brief Read data from given file position.
///
/// At most `nbytes` bytes are read. The number of bytes read is returned
/// (it can be less than `nbytes` if EOF is reached).
///
/// This method can be safely called from multiple threads concurrently.
/// It is unspecified whether this method updates the file position or not.
///
/// The default RandomAccessFile-provided implementation uses Seek() and Read(),
/// but subclasses may override it with a more efficient implementation
/// that doesn't depend on implicit file positioning.
///
/// \param[in] position Where to read bytes from
/// \param[in] nbytes The number of bytes to read
/// \param[out] out The buffer to read bytes into
/// \return The number of bytes read, or an error
virtual Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out);
/// \brief Read data from given file position.
///
/// At most `nbytes` bytes are read, but it can be less if EOF is reached.
///
/// \param[in] position Where to read bytes from
/// \param[in] nbytes The number of bytes to read
/// \return A buffer containing the bytes read, or an error
virtual Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes);
/// EXPERIMENTAL: Read data asynchronously.
virtual Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
int64_t nbytes);
/// EXPERIMENTAL: Read data asynchronously, using the file's IOContext.
Future<std::shared_ptr<Buffer>> ReadAsync(int64_t position, int64_t nbytes);
/// EXPERIMENTAL: Explicit multi-read.
/// \brief Request multiple reads at once
///
/// The underlying filesystem may optimize these reads by coalescing small reads into
/// large reads or by breaking up large reads into multiple parallel smaller reads. The
/// reads should be issued in parallel if it makes sense for the filesystem.
///
/// One future will be returned for each input read range. Multiple returned futures
/// may correspond to a single read. Or, a single returned future may be a combined
/// result of several individual reads.
///
/// \param[in] ranges The ranges to read
/// \return A future that will complete with the data from the requested range is
/// available
virtual std::vector<Future<std::shared_ptr<Buffer>>> ReadManyAsync(
const IOContext&, const std::vector<ReadRange>& ranges);
/// EXPERIMENTAL: Explicit multi-read, using the file's IOContext.
std::vector<Future<std::shared_ptr<Buffer>>> ReadManyAsync(
const std::vector<ReadRange>& ranges);
/// EXPERIMENTAL: Inform that the given ranges may be read soon.
///
/// Some implementations might arrange to prefetch some of the data.
/// However, no guarantee is made and the default implementation does nothing.
/// For robust prefetching, use ReadAt() or ReadAsync().
virtual Status WillNeed(const std::vector<ReadRange>& ranges);
protected:
RandomAccessFile();
private:
struct ARROW_NO_EXPORT Impl;
std::unique_ptr<Impl> interface_impl_;
};
class ARROW_EXPORT WritableFile : public OutputStream, public Seekable {
public:
virtual Status WriteAt(int64_t position, const void* data, int64_t nbytes) = 0;
protected:
WritableFile() = default;
};
class ARROW_EXPORT ReadWriteFileInterface : public RandomAccessFile, public WritableFile {
protected:
ReadWriteFileInterface() { RandomAccessFile::set_mode(FileMode::READWRITE); }
};
/// \brief Return an iterator on an input stream
///
/// The iterator yields a fixed-size block on each Next() call, except the
/// last block in the stream which may be smaller.
/// Once the end of stream is reached, Next() returns nullptr
/// (unlike InputStream::Read() which returns an empty buffer).
ARROW_EXPORT
Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
std::shared_ptr<InputStream> stream, int64_t block_size);
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,197 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Public API for different memory sharing / IO mechanisms
#pragma once
#include <cstdint>
#include <memory>
#include <string_view>
#include <vector>
#include "arrow/io/concurrency.h"
#include "arrow/io/interfaces.h"
#include "arrow/type_fwd.h"
#include "arrow/util/visibility.h"
namespace arrow {
class Status;
namespace io {
/// \brief An output stream that writes to a resizable buffer
class ARROW_EXPORT BufferOutputStream : public OutputStream {
public:
explicit BufferOutputStream(const std::shared_ptr<ResizableBuffer>& buffer);
/// \brief Create in-memory output stream with indicated capacity using a
/// memory pool
/// \param[in] initial_capacity the initial allocated internal capacity of
/// the OutputStream
/// \param[in,out] pool a MemoryPool to use for allocations
/// \return the created stream
static Result<std::shared_ptr<BufferOutputStream>> Create(
int64_t initial_capacity = 4096, MemoryPool* pool = default_memory_pool());
~BufferOutputStream() override;
// Implement the OutputStream interface
/// Close the stream, preserving the buffer (retrieve it with Finish()).
Status Close() override;
bool closed() const override;
Result<int64_t> Tell() const override;
Status Write(const void* data, int64_t nbytes) override;
/// \cond FALSE
using OutputStream::Write;
/// \endcond
/// Close the stream and return the buffer
Result<std::shared_ptr<Buffer>> Finish();
/// \brief Initialize state of OutputStream with newly allocated memory and
/// set position to 0
/// \param[in] initial_capacity the starting allocated capacity
/// \param[in,out] pool the memory pool to use for allocations
/// \return Status
Status Reset(int64_t initial_capacity = 1024, MemoryPool* pool = default_memory_pool());
int64_t capacity() const { return capacity_; }
private:
BufferOutputStream();
// Ensures there is sufficient space available to write nbytes
Status Reserve(int64_t nbytes);
std::shared_ptr<ResizableBuffer> buffer_;
bool is_open_;
int64_t capacity_;
int64_t position_;
uint8_t* mutable_data_;
};
/// \brief A helper class to track the size of allocations
///
/// Writes to this stream do not copy or retain any data, they just bump
/// a size counter that can be later used to know exactly which data size
/// needs to be allocated for actual writing.
class ARROW_EXPORT MockOutputStream : public OutputStream {
public:
MockOutputStream() : extent_bytes_written_(0), is_open_(true) {}
// Implement the OutputStream interface
Status Close() override;
bool closed() const override;
Result<int64_t> Tell() const override;
Status Write(const void* data, int64_t nbytes) override;
/// \cond FALSE
using Writable::Write;
/// \endcond
int64_t GetExtentBytesWritten() const { return extent_bytes_written_; }
private:
int64_t extent_bytes_written_;
bool is_open_;
};
/// \brief An output stream that writes into a fixed-size mutable buffer
class ARROW_EXPORT FixedSizeBufferWriter : public WritableFile {
public:
/// Input buffer must be mutable, will abort if not
explicit FixedSizeBufferWriter(const std::shared_ptr<Buffer>& buffer);
~FixedSizeBufferWriter() override;
Status Close() override;
bool closed() const override;
Status Seek(int64_t position) override;
Result<int64_t> Tell() const override;
Status Write(const void* data, int64_t nbytes) override;
/// \cond FALSE
using Writable::Write;
/// \endcond
Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
void set_memcopy_threads(int num_threads);
void set_memcopy_blocksize(int64_t blocksize);
void set_memcopy_threshold(int64_t threshold);
protected:
class FixedSizeBufferWriterImpl;
std::unique_ptr<FixedSizeBufferWriterImpl> impl_;
};
/// \class BufferReader
/// \brief Random access zero-copy reads on an arrow::Buffer
class ARROW_EXPORT BufferReader
: public internal::RandomAccessFileConcurrencyWrapper<BufferReader> {
public:
/// \brief Instantiate from std::shared_ptr<Buffer>.
///
/// This is a zero-copy constructor.
explicit BufferReader(std::shared_ptr<Buffer> buffer);
/// \brief Instantiate from std::string. Owns data.
static std::unique_ptr<BufferReader> FromString(std::string data);
bool closed() const override;
bool supports_zero_copy() const override;
std::shared_ptr<Buffer> buffer() const { return buffer_; }
// Synchronous ReadAsync override
Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
int64_t nbytes) override;
Status WillNeed(const std::vector<ReadRange>& ranges) override;
protected:
friend RandomAccessFileConcurrencyWrapper<BufferReader>;
Status DoClose();
Result<int64_t> DoRead(int64_t nbytes, void* buffer);
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
Result<std::string_view> DoPeek(int64_t nbytes) override;
Result<int64_t> DoTell() const;
Status DoSeek(int64_t position);
Result<int64_t> DoGetSize();
Status CheckClosed() const {
if (!is_open_) {
return Status::Invalid("Operation forbidden on closed BufferReader");
}
return Status::OK();
}
std::shared_ptr<Buffer> buffer_;
const uint8_t* data_;
int64_t size_;
int64_t position_;
bool is_open_;
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,169 @@
// Copyright https://code.google.com/p/mman-win32/
//
// Licensed under the MIT License;
// You may obtain a copy of the License at
//
// https://opensource.org/licenses/MIT
#pragma once
#include "arrow/util/windows_compatibility.h"
#include <errno.h>
#include <io.h>
#include <sys/types.h>
#include <cstdint>
#define PROT_NONE 0
#define PROT_READ 1
#define PROT_WRITE 2
#define PROT_EXEC 4
#define MAP_FILE 0
#define MAP_SHARED 1
#define MAP_PRIVATE 2
#define MAP_TYPE 0xf
#define MAP_FIXED 0x10
#define MAP_ANONYMOUS 0x20
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FAILED ((void*)-1)
/* Flags for msync. */
#define MS_ASYNC 1
#define MS_SYNC 2
#define MS_INVALIDATE 4
#ifndef FILE_MAP_EXECUTE
# define FILE_MAP_EXECUTE 0x0020
#endif
static inline int __map_mman_error(const DWORD err, const int deferr) {
if (err == 0) return 0;
// TODO: implement
return err;
}
static inline DWORD __map_mmap_prot_page(const int prot) {
DWORD protect = 0;
if (prot == PROT_NONE) return protect;
if ((prot & PROT_EXEC) != 0) {
protect = ((prot & PROT_WRITE) != 0) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
} else {
protect = ((prot & PROT_WRITE) != 0) ? PAGE_READWRITE : PAGE_READONLY;
}
return protect;
}
static inline DWORD __map_mmap_prot_file(const int prot) {
DWORD desiredAccess = 0;
if (prot == PROT_NONE) return desiredAccess;
if ((prot & PROT_READ) != 0) desiredAccess |= FILE_MAP_READ;
if ((prot & PROT_WRITE) != 0) desiredAccess |= FILE_MAP_WRITE;
if ((prot & PROT_EXEC) != 0) desiredAccess |= FILE_MAP_EXECUTE;
return desiredAccess;
}
static inline void* mmap(void* addr, size_t len, int prot, int flags, int fildes,
off_t off) {
HANDLE fm, h;
void* map = MAP_FAILED;
const uint64_t off64 = static_cast<uint64_t>(off);
const uint64_t maxSize = off64 + len;
const DWORD dwFileOffsetLow = static_cast<DWORD>(off64 & 0xFFFFFFFFUL);
const DWORD dwFileOffsetHigh = static_cast<DWORD>((off64 >> 32) & 0xFFFFFFFFUL);
const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFUL);
const DWORD dwMaxSizeHigh = static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFUL);
const DWORD protect = __map_mmap_prot_page(prot);
const DWORD desiredAccess = __map_mmap_prot_file(prot);
errno = 0;
if (len == 0
/* Unsupported flag combinations */
|| (flags & MAP_FIXED) != 0
/* Unsupported protection combinations */
|| prot == PROT_EXEC) {
errno = EINVAL;
return MAP_FAILED;
}
h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes)
: INVALID_HANDLE_VALUE;
if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE) {
errno = EBADF;
return MAP_FAILED;
}
fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
if (fm == NULL) {
errno = __map_mman_error(GetLastError(), EPERM);
return MAP_FAILED;
}
map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
CloseHandle(fm);
if (map == NULL) {
errno = __map_mman_error(GetLastError(), EPERM);
return MAP_FAILED;
}
return map;
}
static inline int munmap(void* addr, size_t len) {
if (UnmapViewOfFile(addr)) return 0;
errno = __map_mman_error(GetLastError(), EPERM);
return -1;
}
static inline int mprotect(void* addr, size_t len, int prot) {
DWORD newProtect = __map_mmap_prot_page(prot);
DWORD oldProtect = 0;
if (VirtualProtect(addr, len, newProtect, &oldProtect)) return 0;
errno = __map_mman_error(GetLastError(), EPERM);
return -1;
}
static inline int msync(void* addr, size_t len, int flags) {
if (FlushViewOfFile(addr, len)) return 0;
errno = __map_mman_error(GetLastError(), EPERM);
return -1;
}
static inline int mlock(const void* addr, size_t len) {
if (VirtualLock((LPVOID)addr, len)) return 0;
errno = __map_mman_error(GetLastError(), EPERM);
return -1;
}
static inline int munlock(const void* addr, size_t len) {
if (VirtualUnlock((LPVOID)addr, len)) return 0;
errno = __map_mman_error(GetLastError(), EPERM);
return -1;
}

View file

@ -0,0 +1,118 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Slow stream implementations, mainly for testing and benchmarking
#pragma once
#include <cstdint>
#include <memory>
#include <utility>
#include "arrow/io/interfaces.h"
#include "arrow/util/visibility.h"
namespace arrow {
class Buffer;
class Status;
namespace io {
class ARROW_EXPORT LatencyGenerator {
public:
virtual ~LatencyGenerator();
void Sleep();
virtual double NextLatency() = 0;
static std::shared_ptr<LatencyGenerator> Make(double average_latency);
static std::shared_ptr<LatencyGenerator> Make(double average_latency, int32_t seed);
};
// XXX use ConcurrencyWrapper? It could increase chances of finding a race.
template <class StreamType>
class SlowInputStreamBase : public StreamType {
public:
SlowInputStreamBase(std::shared_ptr<StreamType> stream,
std::shared_ptr<LatencyGenerator> latencies)
: stream_(std::move(stream)), latencies_(std::move(latencies)) {}
SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency)
: stream_(std::move(stream)), latencies_(LatencyGenerator::Make(average_latency)) {}
SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency,
int32_t seed)
: stream_(std::move(stream)),
latencies_(LatencyGenerator::Make(average_latency, seed)) {}
protected:
std::shared_ptr<StreamType> stream_;
std::shared_ptr<LatencyGenerator> latencies_;
};
/// \brief An InputStream wrapper that makes reads slower.
///
/// Read() calls are made slower by an average latency (in seconds).
/// Actual latencies form a normal distribution closely centered
/// on the average latency.
/// Other calls are forwarded directly.
class ARROW_EXPORT SlowInputStream : public SlowInputStreamBase<InputStream> {
public:
~SlowInputStream() override;
using SlowInputStreamBase<InputStream>::SlowInputStreamBase;
Status Close() override;
Status Abort() override;
bool closed() const override;
Result<int64_t> Read(int64_t nbytes, void* out) override;
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
Result<std::string_view> Peek(int64_t nbytes) override;
Result<int64_t> Tell() const override;
};
/// \brief A RandomAccessFile wrapper that makes reads slower.
///
/// Similar to SlowInputStream, but allows random access and seeking.
class ARROW_EXPORT SlowRandomAccessFile : public SlowInputStreamBase<RandomAccessFile> {
public:
~SlowRandomAccessFile() override;
using SlowInputStreamBase<RandomAccessFile>::SlowInputStreamBase;
Status Close() override;
Status Abort() override;
bool closed() const override;
Result<int64_t> Read(int64_t nbytes, void* out) override;
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
Result<std::string_view> Peek(int64_t nbytes) override;
Result<int64_t> GetSize() override;
Status Seek(int64_t position) override;
Result<int64_t> Tell() const override;
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,82 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include "arrow/io/interfaces.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace io {
// Output stream that just writes to stdout.
class ARROW_EXPORT StdoutStream : public OutputStream {
public:
StdoutStream();
~StdoutStream() override {}
Status Close() override;
bool closed() const override;
Result<int64_t> Tell() const override;
Status Write(const void* data, int64_t nbytes) override;
private:
int64_t pos_;
};
// Output stream that just writes to stderr.
class ARROW_EXPORT StderrStream : public OutputStream {
public:
StderrStream();
~StderrStream() override {}
Status Close() override;
bool closed() const override;
Result<int64_t> Tell() const override;
Status Write(const void* data, int64_t nbytes) override;
private:
int64_t pos_;
};
// Input stream that just reads from stdin.
class ARROW_EXPORT StdinStream : public InputStream {
public:
StdinStream();
~StdinStream() override {}
Status Close() override;
bool closed() const override;
Result<int64_t> Tell() const override;
Result<int64_t> Read(int64_t nbytes, void* out) override;
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
private:
int64_t pos_;
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,69 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "arrow/io/interfaces.h"
#include "arrow/testing/visibility.h"
#include "arrow/type_fwd.h"
namespace arrow {
namespace io {
class MemoryMappedFile;
ARROW_TESTING_EXPORT
void AssertFileContents(const std::string& path, const std::string& contents);
ARROW_TESTING_EXPORT bool FileExists(const std::string& path);
ARROW_TESTING_EXPORT Status PurgeLocalFileFromOsCache(const std::string& path);
ARROW_TESTING_EXPORT
Status ZeroMemoryMap(MemoryMappedFile* file);
class ARROW_TESTING_EXPORT MemoryMapFixture {
public:
void TearDown();
void CreateFile(const std::string& path, int64_t size);
Result<std::shared_ptr<MemoryMappedFile>> InitMemoryMap(int64_t size,
const std::string& path);
void AppendFile(const std::string& path);
private:
std::vector<std::string> tmp_files_;
};
class ARROW_TESTING_EXPORT TrackedRandomAccessFile : public io::RandomAccessFile {
public:
virtual int64_t num_reads() const = 0;
virtual int64_t bytes_read() const = 0;
virtual const std::vector<io::ReadRange>& get_read_ranges() const = 0;
virtual void ResetStats() = 0;
static std::unique_ptr<TrackedRandomAccessFile> Make(io::RandomAccessFile* target);
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Transform stream implementations
#pragma once
#include <cstdint>
#include <functional>
#include <memory>
#include <utility>
#include "arrow/io/interfaces.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace io {
class ARROW_EXPORT TransformInputStream : public InputStream {
public:
using TransformFunc =
std::function<Result<std::shared_ptr<Buffer>>(const std::shared_ptr<Buffer>&)>;
TransformInputStream(std::shared_ptr<InputStream> wrapped, TransformFunc transform);
~TransformInputStream() override;
Status Close() override;
Status Abort() override;
bool closed() const override;
Result<int64_t> Read(int64_t nbytes, void* out) override;
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
const IOContext& io_context) override;
Result<int64_t> Tell() const override;
protected:
struct Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace io
} // namespace arrow

View file

@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/type_fwd.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace io {
struct FileMode {
enum type { READ, WRITE, READWRITE };
};
struct IOContext;
struct CacheOptions;
/// EXPERIMENTAL: convenience global singleton for default IOContext settings
ARROW_EXPORT
const IOContext& default_io_context();
/// \brief Get the capacity of the global I/O thread pool
///
/// Return the number of worker threads in the thread pool to which
/// Arrow dispatches various I/O-bound tasks. This is an ideal number,
/// not necessarily the exact number of threads at a given point in time.
///
/// You can change this number using SetIOThreadPoolCapacity().
ARROW_EXPORT int GetIOThreadPoolCapacity();
/// \brief Set the capacity of the global I/O thread pool
///
/// Set the number of worker threads in the thread pool to which
/// Arrow dispatches various I/O-bound tasks.
///
/// The current number is returned by GetIOThreadPoolCapacity().
ARROW_EXPORT Status SetIOThreadPoolCapacity(int threads);
class FileInterface;
class Seekable;
class Writable;
class Readable;
class OutputStream;
class FileOutputStream;
class InputStream;
class ReadableFile;
class RandomAccessFile;
class MemoryMappedFile;
class WritableFile;
class ReadWriteFileInterface;
class LatencyGenerator;
class BufferOutputStream;
class BufferReader;
class CompressedInputStream;
class CompressedOutputStream;
class BufferedInputStream;
class BufferedOutputStream;
} // namespace io
} // namespace arrow