Adding all project files
This commit is contained in:
parent
6c9e127bdc
commit
cd4316ad0f
42289 changed files with 8009643 additions and 0 deletions
25
venv/Lib/site-packages/pyarrow/include/arrow/io/api.h
Normal file
25
venv/Lib/site-packages/pyarrow/include/arrow/io/api.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/io/buffered.h"
|
||||
#include "arrow/io/compressed.h"
|
||||
#include "arrow/io/file.h"
|
||||
#include "arrow/io/hdfs.h"
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/io/memory.h"
|
168
venv/Lib/site-packages/pyarrow/include/arrow/io/buffered.h
Normal file
168
venv/Lib/site-packages/pyarrow/include/arrow/io/buffered.h
Normal file
|
@ -0,0 +1,168 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Buffered stream implementations
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
|
||||
#include "arrow/io/concurrency.h"
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class Buffer;
|
||||
class MemoryPool;
|
||||
class Status;
|
||||
|
||||
namespace io {
|
||||
|
||||
class ARROW_EXPORT BufferedOutputStream : public OutputStream {
|
||||
public:
|
||||
~BufferedOutputStream() override;
|
||||
|
||||
/// \brief Create a buffered output stream wrapping the given output stream.
|
||||
/// \param[in] buffer_size the size of the temporary write buffer
|
||||
/// \param[in] pool a MemoryPool to use for allocations
|
||||
/// \param[in] raw another OutputStream
|
||||
/// \return the created BufferedOutputStream
|
||||
static Result<std::shared_ptr<BufferedOutputStream>> Create(
|
||||
int64_t buffer_size, MemoryPool* pool, std::shared_ptr<OutputStream> raw);
|
||||
|
||||
/// \brief Resize internal buffer
|
||||
/// \param[in] new_buffer_size the new buffer size
|
||||
/// \return Status
|
||||
Status SetBufferSize(int64_t new_buffer_size);
|
||||
|
||||
/// \brief Return the current size of the internal buffer
|
||||
int64_t buffer_size() const;
|
||||
|
||||
/// \brief Return the number of remaining bytes that have not been flushed to
|
||||
/// the raw OutputStream
|
||||
int64_t bytes_buffered() const;
|
||||
|
||||
/// \brief Flush any buffered writes and release the raw
|
||||
/// OutputStream. Further operations on this object are invalid
|
||||
/// \return the underlying OutputStream
|
||||
Result<std::shared_ptr<OutputStream>> Detach();
|
||||
|
||||
// OutputStream interface
|
||||
|
||||
/// \brief Close the buffered output stream. This implicitly closes the
|
||||
/// underlying raw output stream.
|
||||
Status Close() override;
|
||||
Status Abort() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
// Write bytes to the stream. Thread-safe
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
Status Write(const std::shared_ptr<Buffer>& data) override;
|
||||
|
||||
Status Flush() override;
|
||||
|
||||
/// \brief Return the underlying raw output stream.
|
||||
std::shared_ptr<OutputStream> raw() const;
|
||||
|
||||
private:
|
||||
explicit BufferedOutputStream(std::shared_ptr<OutputStream> raw, MemoryPool* pool);
|
||||
|
||||
class ARROW_NO_EXPORT Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
/// \class BufferedInputStream
|
||||
/// \brief An InputStream that performs buffered reads from an unbuffered
|
||||
/// InputStream, which can mitigate the overhead of many small reads in some
|
||||
/// cases
|
||||
class ARROW_EXPORT BufferedInputStream
|
||||
: public internal::InputStreamConcurrencyWrapper<BufferedInputStream> {
|
||||
public:
|
||||
~BufferedInputStream() override;
|
||||
|
||||
/// \brief Create a BufferedInputStream from a raw InputStream
|
||||
/// \param[in] buffer_size the size of the temporary read buffer
|
||||
/// \param[in] pool a MemoryPool to use for allocations
|
||||
/// \param[in] raw a raw InputStream
|
||||
/// \param[in] raw_read_bound a bound on the maximum number of bytes
|
||||
/// to read from the raw input stream. The default -1 indicates that
|
||||
/// it is unbounded
|
||||
/// \return the created BufferedInputStream
|
||||
static Result<std::shared_ptr<BufferedInputStream>> Create(
|
||||
int64_t buffer_size, MemoryPool* pool, std::shared_ptr<InputStream> raw,
|
||||
int64_t raw_read_bound = -1);
|
||||
|
||||
/// \brief Resize internal read buffer; calls to Read(...) will read at least
|
||||
/// this many bytes from the raw InputStream if possible.
|
||||
/// \param[in] new_buffer_size the new read buffer size
|
||||
/// \return Status
|
||||
Status SetBufferSize(int64_t new_buffer_size);
|
||||
|
||||
/// \brief Return the number of remaining bytes in the read buffer
|
||||
int64_t bytes_buffered() const;
|
||||
|
||||
/// \brief Return the current size of the internal buffer
|
||||
int64_t buffer_size() const;
|
||||
|
||||
/// \brief Release the raw InputStream. Any data buffered will be
|
||||
/// discarded. Further operations on this object are invalid
|
||||
/// \return raw the underlying InputStream
|
||||
std::shared_ptr<InputStream> Detach();
|
||||
|
||||
/// \brief Return the unbuffered InputStream
|
||||
std::shared_ptr<InputStream> raw() const;
|
||||
|
||||
// InputStream APIs
|
||||
|
||||
bool closed() const override;
|
||||
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
|
||||
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
|
||||
const IOContext& io_context) override;
|
||||
|
||||
private:
|
||||
friend InputStreamConcurrencyWrapper<BufferedInputStream>;
|
||||
|
||||
explicit BufferedInputStream(std::shared_ptr<InputStream> raw, MemoryPool* pool,
|
||||
int64_t raw_total_bytes_bound);
|
||||
|
||||
Status DoClose();
|
||||
Status DoAbort() override;
|
||||
|
||||
/// \brief Returns the position of the buffered stream, though the position
|
||||
/// of the unbuffered stream may be further advanced.
|
||||
Result<int64_t> DoTell() const;
|
||||
|
||||
Result<int64_t> DoRead(int64_t nbytes, void* out);
|
||||
|
||||
/// \brief Read into buffer.
|
||||
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
|
||||
|
||||
/// \brief Return a zero-copy string view referencing buffered data,
|
||||
/// but do not advance the position of the stream. Buffers data and
|
||||
/// expands the buffer size if necessary
|
||||
Result<std::string_view> DoPeek(int64_t nbytes) override;
|
||||
|
||||
class ARROW_NO_EXPORT Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
157
venv/Lib/site-packages/pyarrow/include/arrow/io/caching.h
Normal file
157
venv/Lib/site-packages/pyarrow/include/arrow/io/caching.h
Normal file
|
@ -0,0 +1,157 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/type_fwd.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
|
||||
struct ARROW_EXPORT CacheOptions {
|
||||
static constexpr double kDefaultIdealBandwidthUtilizationFrac = 0.9;
|
||||
static constexpr int64_t kDefaultMaxIdealRequestSizeMib = 64;
|
||||
|
||||
/// \brief The maximum distance in bytes between two consecutive
|
||||
/// ranges; beyond this value, ranges are not combined
|
||||
int64_t hole_size_limit;
|
||||
/// \brief The maximum size in bytes of a combined range; if
|
||||
/// combining two consecutive ranges would produce a range of a
|
||||
/// size greater than this, they are not combined
|
||||
int64_t range_size_limit;
|
||||
/// \brief A lazy cache does not perform any I/O until requested.
|
||||
/// lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
|
||||
/// lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
|
||||
/// needs them.
|
||||
/// lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
|
||||
/// range that is currently being read.
|
||||
bool lazy;
|
||||
/// \brief The maximum number of ranges to be prefetched. This is only used
|
||||
/// for lazy cache to asynchronously read some ranges after reading the target range.
|
||||
int64_t prefetch_limit = 0;
|
||||
|
||||
bool operator==(const CacheOptions& other) const {
|
||||
return hole_size_limit == other.hole_size_limit &&
|
||||
range_size_limit == other.range_size_limit && lazy == other.lazy &&
|
||||
prefetch_limit == other.prefetch_limit;
|
||||
}
|
||||
|
||||
/// \brief Construct CacheOptions from network storage metrics (e.g. S3).
|
||||
///
|
||||
/// \param[in] time_to_first_byte_millis Seek-time or Time-To-First-Byte (TTFB) in
|
||||
/// milliseconds, also called call setup latency of a new read request.
|
||||
/// The value is a positive integer.
|
||||
/// \param[in] transfer_bandwidth_mib_per_sec Data transfer Bandwidth (BW) in MiB/sec
|
||||
/// (per connection).
|
||||
/// The value is a positive integer.
|
||||
/// \param[in] ideal_bandwidth_utilization_frac Transfer bandwidth utilization fraction
|
||||
/// (per connection) to maximize the net data load.
|
||||
/// The value is a positive double precision number less than 1.
|
||||
/// \param[in] max_ideal_request_size_mib The maximum single data request size (in MiB)
|
||||
/// to maximize the net data load.
|
||||
/// The value is a positive integer.
|
||||
/// \return A new instance of CacheOptions.
|
||||
static CacheOptions MakeFromNetworkMetrics(
|
||||
int64_t time_to_first_byte_millis, int64_t transfer_bandwidth_mib_per_sec,
|
||||
double ideal_bandwidth_utilization_frac = kDefaultIdealBandwidthUtilizationFrac,
|
||||
int64_t max_ideal_request_size_mib = kDefaultMaxIdealRequestSizeMib);
|
||||
|
||||
static CacheOptions Defaults();
|
||||
static CacheOptions LazyDefaults();
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
/// \brief A read cache designed to hide IO latencies when reading.
|
||||
///
|
||||
/// This class takes multiple byte ranges that an application expects to read, and
|
||||
/// coalesces them into fewer, larger read requests, which benefits performance on some
|
||||
/// filesystems, particularly remote ones like Amazon S3. By default, it also issues
|
||||
/// these read requests in parallel up front.
|
||||
///
|
||||
/// To use:
|
||||
/// 1. Cache() the ranges you expect to read in the future. Ideally, these ranges have
|
||||
/// the exact offset and length that will later be read. The cache will combine those
|
||||
/// ranges according to parameters (see constructor).
|
||||
///
|
||||
/// By default, the cache will also start fetching the combined ranges in parallel in
|
||||
/// the background, unless CacheOptions.lazy is set.
|
||||
///
|
||||
/// 2. Call WaitFor() to be notified when the given ranges have been read. If
|
||||
/// CacheOptions.lazy is set, I/O will be triggered in the background here instead.
|
||||
/// This can be done in parallel (e.g. if parsing a file, call WaitFor() for each
|
||||
/// chunk of the file that can be parsed in parallel).
|
||||
///
|
||||
/// 3. Call Read() to retrieve the actual data for the given ranges.
|
||||
/// A synchronous application may skip WaitFor() and just call Read() - it will still
|
||||
/// benefit from coalescing and parallel fetching.
|
||||
class ARROW_EXPORT ReadRangeCache {
|
||||
public:
|
||||
static constexpr int64_t kDefaultHoleSizeLimit = 8192;
|
||||
static constexpr int64_t kDefaultRangeSizeLimit = 32 * 1024 * 1024;
|
||||
|
||||
/// Construct a read cache with default
|
||||
explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx)
|
||||
: ReadRangeCache(file, file.get(), std::move(ctx), CacheOptions::Defaults()) {}
|
||||
|
||||
/// Construct a read cache with given options
|
||||
explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
|
||||
CacheOptions options)
|
||||
: ReadRangeCache(file, file.get(), std::move(ctx), options) {}
|
||||
|
||||
/// Construct a read cache with an unowned file
|
||||
ReadRangeCache(RandomAccessFile* file, IOContext ctx, CacheOptions options)
|
||||
: ReadRangeCache(NULLPTR, file, std::move(ctx), options) {}
|
||||
|
||||
~ReadRangeCache();
|
||||
|
||||
/// \brief Cache the given ranges in the background.
|
||||
///
|
||||
/// The caller must ensure that the ranges do not overlap with each other,
|
||||
/// nor with previously cached ranges. Otherwise, behaviour will be undefined.
|
||||
Status Cache(std::vector<ReadRange> ranges);
|
||||
|
||||
/// \brief Read a range previously given to Cache().
|
||||
Result<std::shared_ptr<Buffer>> Read(ReadRange range);
|
||||
|
||||
/// \brief Wait until all ranges added so far have been cached.
|
||||
Future<> Wait();
|
||||
|
||||
/// \brief Wait until all given ranges have been cached.
|
||||
Future<> WaitFor(std::vector<ReadRange> ranges);
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
struct LazyImpl;
|
||||
|
||||
ReadRangeCache(std::shared_ptr<RandomAccessFile> owned_file, RandomAccessFile* file,
|
||||
IOContext ctx, CacheOptions options);
|
||||
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace io
|
||||
} // namespace arrow
|
124
venv/Lib/site-packages/pyarrow/include/arrow/io/compressed.h
Normal file
124
venv/Lib/site-packages/pyarrow/include/arrow/io/compressed.h
Normal file
|
@ -0,0 +1,124 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Compressed stream implementations
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "arrow/io/concurrency.h"
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class MemoryPool;
|
||||
class Status;
|
||||
|
||||
namespace util {
|
||||
|
||||
class Codec;
|
||||
|
||||
} // namespace util
|
||||
|
||||
namespace io {
|
||||
|
||||
class ARROW_EXPORT CompressedOutputStream : public OutputStream {
|
||||
public:
|
||||
~CompressedOutputStream() override;
|
||||
|
||||
/// \brief Create a compressed output stream wrapping the given output stream.
|
||||
///
|
||||
/// The codec must be capable of streaming compression. Some codecs,
|
||||
/// like Snappy, are not able to do so.
|
||||
static Result<std::shared_ptr<CompressedOutputStream>> Make(
|
||||
util::Codec* codec, const std::shared_ptr<OutputStream>& raw,
|
||||
MemoryPool* pool = default_memory_pool());
|
||||
|
||||
// OutputStream interface
|
||||
|
||||
/// \brief Close the compressed output stream. This implicitly closes the
|
||||
/// underlying raw output stream.
|
||||
Status Close() override;
|
||||
Status Abort() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
/// \cond FALSE
|
||||
using Writable::Write;
|
||||
/// \endcond
|
||||
Status Flush() override;
|
||||
|
||||
/// \brief Return the underlying raw output stream.
|
||||
std::shared_ptr<OutputStream> raw() const;
|
||||
|
||||
private:
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedOutputStream);
|
||||
|
||||
CompressedOutputStream() = default;
|
||||
|
||||
class ARROW_NO_EXPORT Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT CompressedInputStream
|
||||
: public internal::InputStreamConcurrencyWrapper<CompressedInputStream> {
|
||||
public:
|
||||
~CompressedInputStream() override;
|
||||
|
||||
/// \brief Create a compressed input stream wrapping the given input stream.
|
||||
///
|
||||
/// The codec must be capable of streaming decompression. Some codecs,
|
||||
/// like Snappy, are not able to do so.
|
||||
static Result<std::shared_ptr<CompressedInputStream>> Make(
|
||||
util::Codec* codec, const std::shared_ptr<InputStream>& raw,
|
||||
MemoryPool* pool = default_memory_pool());
|
||||
|
||||
// InputStream interface
|
||||
|
||||
bool closed() const override;
|
||||
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
|
||||
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
|
||||
const IOContext& io_context) override;
|
||||
|
||||
/// \brief Return the underlying raw input stream.
|
||||
std::shared_ptr<InputStream> raw() const;
|
||||
|
||||
private:
|
||||
friend InputStreamConcurrencyWrapper<CompressedInputStream>;
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(CompressedInputStream);
|
||||
|
||||
CompressedInputStream() = default;
|
||||
|
||||
/// \brief Close the compressed input stream. This implicitly closes the
|
||||
/// underlying raw input stream.
|
||||
Status DoClose();
|
||||
Status DoAbort() override;
|
||||
Result<int64_t> DoTell() const;
|
||||
Result<int64_t> DoRead(int64_t nbytes, void* out);
|
||||
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
|
||||
|
||||
class ARROW_NO_EXPORT Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
263
venv/Lib/site-packages/pyarrow/include/arrow/io/concurrency.h
Normal file
263
venv/Lib/site-packages/pyarrow/include/arrow/io/concurrency.h
Normal file
|
@ -0,0 +1,263 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/checked_cast.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
namespace internal {
|
||||
|
||||
template <class LockType>
|
||||
class SharedLockGuard {
|
||||
public:
|
||||
explicit SharedLockGuard(LockType* lock) : lock_(lock) { lock_->LockShared(); }
|
||||
|
||||
~SharedLockGuard() { lock_->UnlockShared(); }
|
||||
|
||||
protected:
|
||||
LockType* lock_;
|
||||
};
|
||||
|
||||
template <class LockType>
|
||||
class ExclusiveLockGuard {
|
||||
public:
|
||||
explicit ExclusiveLockGuard(LockType* lock) : lock_(lock) { lock_->LockExclusive(); }
|
||||
|
||||
~ExclusiveLockGuard() { lock_->UnlockExclusive(); }
|
||||
|
||||
protected:
|
||||
LockType* lock_;
|
||||
};
|
||||
|
||||
// Debug concurrency checker that marks "shared" and "exclusive" code sections,
|
||||
// aborting if the concurrency rules get violated. Does nothing in release mode.
|
||||
// Note that we intentionally use the same class declaration in debug and
|
||||
// release builds in order to avoid runtime failures when e.g. loading a
|
||||
// release-built DLL with a debug-built application, or the reverse.
|
||||
|
||||
class ARROW_EXPORT SharedExclusiveChecker {
|
||||
public:
|
||||
SharedExclusiveChecker();
|
||||
void LockShared();
|
||||
void UnlockShared();
|
||||
void LockExclusive();
|
||||
void UnlockExclusive();
|
||||
|
||||
SharedLockGuard<SharedExclusiveChecker> shared_guard() {
|
||||
return SharedLockGuard<SharedExclusiveChecker>(this);
|
||||
}
|
||||
|
||||
ExclusiveLockGuard<SharedExclusiveChecker> exclusive_guard() {
|
||||
return ExclusiveLockGuard<SharedExclusiveChecker>(this);
|
||||
}
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
std::shared_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
// Concurrency wrappers for IO classes that check the correctness of
|
||||
// concurrent calls to various methods. It is not necessary to wrap all
|
||||
// IO classes with these, only a few core classes that get used in tests.
|
||||
//
|
||||
// We're not using virtual inheritance here as virtual bases have poorly
|
||||
// understood semantic overhead which we'd be passing on to implementers
|
||||
// and users of these interfaces. Instead, we just duplicate the method
|
||||
// wrappers between those two classes.
|
||||
|
||||
template <class Derived>
|
||||
class InputStreamConcurrencyWrapper : public InputStream {
|
||||
public:
|
||||
Status Close() final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoClose();
|
||||
}
|
||||
|
||||
Status Abort() final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoAbort();
|
||||
}
|
||||
|
||||
Result<int64_t> Tell() const final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoTell();
|
||||
}
|
||||
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoRead(nbytes, out);
|
||||
}
|
||||
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoRead(nbytes);
|
||||
}
|
||||
|
||||
Result<std::string_view> Peek(int64_t nbytes) final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoPeek(nbytes);
|
||||
}
|
||||
|
||||
/*
|
||||
Methods to implement in derived class:
|
||||
|
||||
Status DoClose();
|
||||
Result<int64_t> DoTell() const;
|
||||
Result<int64_t> DoRead(int64_t nbytes, void* out);
|
||||
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
|
||||
|
||||
And optionally:
|
||||
|
||||
Status DoAbort() override;
|
||||
Result<std::string_view> DoPeek(int64_t nbytes) override;
|
||||
|
||||
These methods should be protected in the derived class and
|
||||
InputStreamConcurrencyWrapper declared as a friend with
|
||||
|
||||
friend InputStreamConcurrencyWrapper<derived>;
|
||||
*/
|
||||
|
||||
protected:
|
||||
// Default implementations. They are virtual because the derived class may
|
||||
// have derived classes itself.
|
||||
virtual Status DoAbort() { return derived()->DoClose(); }
|
||||
|
||||
virtual Result<std::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
|
||||
return Status::NotImplemented("Peek not implemented");
|
||||
}
|
||||
|
||||
Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
|
||||
|
||||
const Derived* derived() const {
|
||||
return ::arrow::internal::checked_cast<const Derived*>(this);
|
||||
}
|
||||
|
||||
mutable SharedExclusiveChecker lock_;
|
||||
};
|
||||
|
||||
template <class Derived>
|
||||
class RandomAccessFileConcurrencyWrapper : public RandomAccessFile {
|
||||
public:
|
||||
Status Close() final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoClose();
|
||||
}
|
||||
|
||||
Status Abort() final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoAbort();
|
||||
}
|
||||
|
||||
Result<int64_t> Tell() const final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoTell();
|
||||
}
|
||||
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoRead(nbytes, out);
|
||||
}
|
||||
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoRead(nbytes);
|
||||
}
|
||||
|
||||
Result<std::string_view> Peek(int64_t nbytes) final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoPeek(nbytes);
|
||||
}
|
||||
|
||||
Status Seek(int64_t position) final {
|
||||
auto guard = lock_.exclusive_guard();
|
||||
return derived()->DoSeek(position);
|
||||
}
|
||||
|
||||
Result<int64_t> GetSize() final {
|
||||
auto guard = lock_.shared_guard();
|
||||
return derived()->DoGetSize();
|
||||
}
|
||||
|
||||
// NOTE: ReadAt doesn't use stream pointer, but it is allowed to update it
|
||||
// (it's the case on Windows when using ReadFileEx).
|
||||
// So any method that relies on the current position (even if it doesn't
|
||||
// update it, such as Peek) cannot run in parallel with ReadAt and has
|
||||
// to use the exclusive_guard.
|
||||
|
||||
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) final {
|
||||
auto guard = lock_.shared_guard();
|
||||
return derived()->DoReadAt(position, nbytes, out);
|
||||
}
|
||||
|
||||
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) final {
|
||||
auto guard = lock_.shared_guard();
|
||||
return derived()->DoReadAt(position, nbytes);
|
||||
}
|
||||
|
||||
/*
|
||||
Methods to implement in derived class:
|
||||
|
||||
Status DoClose();
|
||||
Result<int64_t> DoTell() const;
|
||||
Result<int64_t> DoRead(int64_t nbytes, void* out);
|
||||
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
|
||||
Status DoSeek(int64_t position);
|
||||
Result<int64_t> DoGetSize()
|
||||
Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
|
||||
Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
|
||||
|
||||
And optionally:
|
||||
|
||||
Status DoAbort() override;
|
||||
Result<std::string_view> DoPeek(int64_t nbytes) override;
|
||||
|
||||
These methods should be protected in the derived class and
|
||||
RandomAccessFileConcurrencyWrapper declared as a friend with
|
||||
|
||||
friend RandomAccessFileConcurrencyWrapper<derived>;
|
||||
*/
|
||||
|
||||
protected:
|
||||
// Default implementations. They are virtual because the derived class may
|
||||
// have derived classes itself.
|
||||
virtual Status DoAbort() { return derived()->DoClose(); }
|
||||
|
||||
virtual Result<std::string_view> DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) {
|
||||
return Status::NotImplemented("Peek not implemented");
|
||||
}
|
||||
|
||||
Derived* derived() { return ::arrow::internal::checked_cast<Derived*>(this); }
|
||||
|
||||
const Derived* derived() const {
|
||||
return ::arrow::internal::checked_cast<const Derived*>(this);
|
||||
}
|
||||
|
||||
mutable SharedExclusiveChecker lock_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace io
|
||||
} // namespace arrow
|
221
venv/Lib/site-packages/pyarrow/include/arrow/io/file.h
Normal file
221
venv/Lib/site-packages/pyarrow/include/arrow/io/file.h
Normal file
|
@ -0,0 +1,221 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// IO interface implementations for OS files
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/io/concurrency.h"
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class Buffer;
|
||||
class MemoryPool;
|
||||
class Status;
|
||||
|
||||
namespace io {
|
||||
|
||||
/// \brief An operating system file open in write-only mode.
|
||||
class ARROW_EXPORT FileOutputStream : public OutputStream {
|
||||
public:
|
||||
~FileOutputStream() override;
|
||||
|
||||
/// \brief Open a local file for writing, truncating any existing file
|
||||
/// \param[in] path with UTF8 encoding
|
||||
/// \param[in] append append to existing file, otherwise truncate to 0 bytes
|
||||
/// \return an open FileOutputStream
|
||||
///
|
||||
/// When opening a new file, any existing file with the indicated path is
|
||||
/// truncated to 0 bytes, deleting any existing data
|
||||
static Result<std::shared_ptr<FileOutputStream>> Open(const std::string& path,
|
||||
bool append = false);
|
||||
|
||||
/// \brief Open a file descriptor for writing. The underlying file isn't
|
||||
/// truncated.
|
||||
/// \param[in] fd file descriptor
|
||||
/// \return an open FileOutputStream
|
||||
///
|
||||
/// The file descriptor becomes owned by the OutputStream, and will be closed
|
||||
/// on Close() or destruction.
|
||||
static Result<std::shared_ptr<FileOutputStream>> Open(int fd);
|
||||
|
||||
// OutputStream interface
|
||||
Status Close() override;
|
||||
bool closed() const override;
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
// Write bytes to the stream. Thread-safe
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
/// \cond FALSE
|
||||
using Writable::Write;
|
||||
/// \endcond
|
||||
|
||||
int file_descriptor() const;
|
||||
|
||||
private:
|
||||
FileOutputStream();
|
||||
|
||||
class ARROW_NO_EXPORT FileOutputStreamImpl;
|
||||
std::unique_ptr<FileOutputStreamImpl> impl_;
|
||||
};
|
||||
|
||||
/// \brief An operating system file open in read-only mode.
|
||||
///
|
||||
/// Reads through this implementation are unbuffered. If many small reads
|
||||
/// need to be issued, it is recommended to use a buffering layer for good
|
||||
/// performance.
|
||||
class ARROW_EXPORT ReadableFile
|
||||
: public internal::RandomAccessFileConcurrencyWrapper<ReadableFile> {
|
||||
public:
|
||||
~ReadableFile() override;
|
||||
|
||||
/// \brief Open a local file for reading
|
||||
/// \param[in] path with UTF8 encoding
|
||||
/// \param[in] pool a MemoryPool for memory allocations
|
||||
/// \return ReadableFile instance
|
||||
static Result<std::shared_ptr<ReadableFile>> Open(
|
||||
const std::string& path, MemoryPool* pool = default_memory_pool());
|
||||
|
||||
/// \brief Open a local file for reading
|
||||
/// \param[in] fd file descriptor
|
||||
/// \param[in] pool a MemoryPool for memory allocations
|
||||
/// \return ReadableFile instance
|
||||
///
|
||||
/// The file descriptor becomes owned by the ReadableFile, and will be closed
|
||||
/// on Close() or destruction.
|
||||
static Result<std::shared_ptr<ReadableFile>> Open(
|
||||
int fd, MemoryPool* pool = default_memory_pool());
|
||||
|
||||
bool closed() const override;
|
||||
|
||||
int file_descriptor() const;
|
||||
|
||||
Status WillNeed(const std::vector<ReadRange>& ranges) override;
|
||||
|
||||
private:
|
||||
friend RandomAccessFileConcurrencyWrapper<ReadableFile>;
|
||||
|
||||
explicit ReadableFile(MemoryPool* pool);
|
||||
|
||||
Status DoClose();
|
||||
Result<int64_t> DoTell() const;
|
||||
Result<int64_t> DoRead(int64_t nbytes, void* buffer);
|
||||
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
|
||||
|
||||
/// \brief Thread-safe implementation of ReadAt
|
||||
Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
|
||||
|
||||
/// \brief Thread-safe implementation of ReadAt
|
||||
Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
|
||||
|
||||
Result<int64_t> DoGetSize();
|
||||
Status DoSeek(int64_t position);
|
||||
|
||||
class ARROW_NO_EXPORT ReadableFileImpl;
|
||||
std::unique_ptr<ReadableFileImpl> impl_;
|
||||
};
|
||||
|
||||
/// \brief A file interface that uses memory-mapped files for memory interactions
|
||||
///
|
||||
/// This implementation supports zero-copy reads. The same class is used
|
||||
/// for both reading and writing.
|
||||
///
|
||||
/// If opening a file in a writable mode, it is not truncated first as with
|
||||
/// FileOutputStream.
|
||||
class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
|
||||
public:
|
||||
~MemoryMappedFile() override;
|
||||
|
||||
/// Create new file with indicated size, return in read/write mode
|
||||
static Result<std::shared_ptr<MemoryMappedFile>> Create(const std::string& path,
|
||||
int64_t size);
|
||||
|
||||
// mmap() with whole file
|
||||
static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
|
||||
FileMode::type mode);
|
||||
|
||||
// mmap() with a region of file, the offset must be a multiple of the page size
|
||||
static Result<std::shared_ptr<MemoryMappedFile>> Open(const std::string& path,
|
||||
FileMode::type mode,
|
||||
const int64_t offset,
|
||||
const int64_t length);
|
||||
|
||||
Status Close() override;
|
||||
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
Status Seek(int64_t position) override;
|
||||
|
||||
// Required by RandomAccessFile, copies memory into out. Not thread-safe
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) override;
|
||||
|
||||
// Zero copy read, moves position pointer. Not thread-safe
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
|
||||
|
||||
// Zero-copy read, leaves position unchanged. Acquires a reader lock
|
||||
// for the duration of slice creation (typically very short). Is thread-safe.
|
||||
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
|
||||
|
||||
// Raw copy of the memory at specified position. Thread-safe, but
|
||||
// locks out other readers for the duration of memcpy. Prefer the
|
||||
// zero copy method
|
||||
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
|
||||
|
||||
// Synchronous ReadAsync override
|
||||
Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
|
||||
int64_t nbytes) override;
|
||||
|
||||
Status WillNeed(const std::vector<ReadRange>& ranges) override;
|
||||
|
||||
bool supports_zero_copy() const override;
|
||||
|
||||
/// Write data at the current position in the file. Thread-safe
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
/// \cond FALSE
|
||||
using Writable::Write;
|
||||
/// \endcond
|
||||
|
||||
/// Set the size of the map to new_size.
|
||||
Status Resize(int64_t new_size);
|
||||
|
||||
/// Write data at a particular position in the file. Thread-safe
|
||||
Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
|
||||
|
||||
Result<int64_t> GetSize() override;
|
||||
|
||||
int file_descriptor() const;
|
||||
|
||||
private:
|
||||
MemoryMappedFile();
|
||||
|
||||
Status WriteInternal(const void* data, int64_t nbytes);
|
||||
|
||||
class ARROW_NO_EXPORT MemoryMap;
|
||||
std::shared_ptr<MemoryMap> memory_map_;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
284
venv/Lib/site-packages/pyarrow/include/arrow/io/hdfs.h
Normal file
284
venv/Lib/site-packages/pyarrow/include/arrow/io/hdfs.h
Normal file
|
@ -0,0 +1,284 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class Buffer;
|
||||
class MemoryPool;
|
||||
class Status;
|
||||
|
||||
namespace io {
|
||||
|
||||
class HdfsReadableFile;
|
||||
class HdfsOutputStream;
|
||||
|
||||
/// DEPRECATED. Use the FileSystem API in arrow::fs instead.
|
||||
struct ObjectType {
|
||||
enum type { FILE, DIRECTORY };
|
||||
};
|
||||
|
||||
/// DEPRECATED. Use the FileSystem API in arrow::fs instead.
|
||||
struct ARROW_EXPORT FileStatistics {
|
||||
/// Size of file, -1 if finding length is unsupported
|
||||
int64_t size;
|
||||
ObjectType::type kind;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT FileSystem {
|
||||
public:
|
||||
virtual ~FileSystem() = default;
|
||||
|
||||
virtual Status MakeDirectory(const std::string& path) = 0;
|
||||
|
||||
virtual Status DeleteDirectory(const std::string& path) = 0;
|
||||
|
||||
virtual Status GetChildren(const std::string& path,
|
||||
std::vector<std::string>* listing) = 0;
|
||||
|
||||
virtual Status Rename(const std::string& src, const std::string& dst) = 0;
|
||||
|
||||
virtual Status Stat(const std::string& path, FileStatistics* stat) = 0;
|
||||
};
|
||||
|
||||
struct HdfsPathInfo {
|
||||
ObjectType::type kind;
|
||||
|
||||
std::string name;
|
||||
std::string owner;
|
||||
std::string group;
|
||||
|
||||
// Access times in UNIX timestamps (seconds)
|
||||
int64_t size;
|
||||
int64_t block_size;
|
||||
|
||||
int32_t last_modified_time;
|
||||
int32_t last_access_time;
|
||||
|
||||
int16_t replication;
|
||||
int16_t permissions;
|
||||
};
|
||||
|
||||
struct HdfsConnectionConfig {
|
||||
std::string host;
|
||||
int port;
|
||||
std::string user;
|
||||
std::string kerb_ticket;
|
||||
std::unordered_map<std::string, std::string> extra_conf;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT HadoopFileSystem : public FileSystem {
|
||||
public:
|
||||
~HadoopFileSystem() override;
|
||||
|
||||
// Connect to an HDFS cluster given a configuration
|
||||
//
|
||||
// @param config (in): configuration for connecting
|
||||
// @param fs (out): the created client
|
||||
// @returns Status
|
||||
static Status Connect(const HdfsConnectionConfig* config,
|
||||
std::shared_ptr<HadoopFileSystem>* fs);
|
||||
|
||||
// Create directory and all parents
|
||||
//
|
||||
// @param path (in): absolute HDFS path
|
||||
// @returns Status
|
||||
Status MakeDirectory(const std::string& path) override;
|
||||
|
||||
// Delete file or directory
|
||||
// @param path absolute path to data
|
||||
// @param recursive if path is a directory, delete contents as well
|
||||
// @returns error status on failure
|
||||
Status Delete(const std::string& path, bool recursive = false);
|
||||
|
||||
Status DeleteDirectory(const std::string& path) override;
|
||||
|
||||
// Disconnect from cluster
|
||||
//
|
||||
// @returns Status
|
||||
Status Disconnect();
|
||||
|
||||
// @param path (in): absolute HDFS path
|
||||
// @returns bool, true if the path exists, false if not (or on error)
|
||||
bool Exists(const std::string& path);
|
||||
|
||||
// @param path (in): absolute HDFS path
|
||||
// @param info (out)
|
||||
// @returns Status
|
||||
Status GetPathInfo(const std::string& path, HdfsPathInfo* info);
|
||||
|
||||
// @param nbytes (out): total capacity of the filesystem
|
||||
// @returns Status
|
||||
Status GetCapacity(int64_t* nbytes);
|
||||
|
||||
// @param nbytes (out): total bytes used of the filesystem
|
||||
// @returns Status
|
||||
Status GetUsed(int64_t* nbytes);
|
||||
|
||||
Status GetChildren(const std::string& path, std::vector<std::string>* listing) override;
|
||||
|
||||
/// List directory contents
|
||||
///
|
||||
/// If path is a relative path, returned values will be absolute paths or URIs
|
||||
/// starting from the current working directory.
|
||||
Status ListDirectory(const std::string& path, std::vector<HdfsPathInfo>* listing);
|
||||
|
||||
/// Return the filesystem's current working directory.
|
||||
///
|
||||
/// The working directory is the base path for all relative paths given to
|
||||
/// other APIs.
|
||||
/// NOTE: this actually returns a URI.
|
||||
Status GetWorkingDirectory(std::string* out);
|
||||
|
||||
/// Change
|
||||
///
|
||||
/// @param path file path to change
|
||||
/// @param owner pass null for no change
|
||||
/// @param group pass null for no change
|
||||
Status Chown(const std::string& path, const char* owner, const char* group);
|
||||
|
||||
/// Change path permissions
|
||||
///
|
||||
/// \param path Absolute path in file system
|
||||
/// \param mode Mode bitset
|
||||
/// \return Status
|
||||
Status Chmod(const std::string& path, int mode);
|
||||
|
||||
// Move file or directory from source path to destination path within the
|
||||
// current filesystem
|
||||
Status Rename(const std::string& src, const std::string& dst) override;
|
||||
|
||||
Status Copy(const std::string& src, const std::string& dst);
|
||||
|
||||
Status Move(const std::string& src, const std::string& dst);
|
||||
|
||||
Status Stat(const std::string& path, FileStatistics* stat) override;
|
||||
|
||||
// TODO(wesm): GetWorkingDirectory, SetWorkingDirectory
|
||||
|
||||
// Open an HDFS file in READ mode. Returns error
|
||||
// status if the file is not found.
|
||||
//
|
||||
// @param path complete file path
|
||||
Status OpenReadable(const std::string& path, int32_t buffer_size,
|
||||
std::shared_ptr<HdfsReadableFile>* file);
|
||||
|
||||
Status OpenReadable(const std::string& path, int32_t buffer_size,
|
||||
const io::IOContext& io_context,
|
||||
std::shared_ptr<HdfsReadableFile>* file);
|
||||
|
||||
Status OpenReadable(const std::string& path, std::shared_ptr<HdfsReadableFile>* file);
|
||||
|
||||
Status OpenReadable(const std::string& path, const io::IOContext& io_context,
|
||||
std::shared_ptr<HdfsReadableFile>* file);
|
||||
|
||||
// FileMode::WRITE options
|
||||
// @param path complete file path
|
||||
// @param buffer_size 0 by default
|
||||
// @param replication 0 by default
|
||||
// @param default_block_size 0 by default
|
||||
Status OpenWritable(const std::string& path, bool append, int32_t buffer_size,
|
||||
int16_t replication, int64_t default_block_size,
|
||||
std::shared_ptr<HdfsOutputStream>* file);
|
||||
|
||||
Status OpenWritable(const std::string& path, bool append,
|
||||
std::shared_ptr<HdfsOutputStream>* file);
|
||||
|
||||
private:
|
||||
friend class HdfsReadableFile;
|
||||
friend class HdfsOutputStream;
|
||||
|
||||
class ARROW_NO_EXPORT HadoopFileSystemImpl;
|
||||
std::unique_ptr<HadoopFileSystemImpl> impl_;
|
||||
|
||||
HadoopFileSystem();
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(HadoopFileSystem);
|
||||
};
|
||||
|
||||
class ARROW_EXPORT HdfsReadableFile : public RandomAccessFile {
|
||||
public:
|
||||
~HdfsReadableFile() override;
|
||||
|
||||
Status Close() override;
|
||||
|
||||
bool closed() const override;
|
||||
|
||||
// NOTE: If you wish to read a particular range of a file in a multithreaded
|
||||
// context, you may prefer to use ReadAt to avoid locking issues
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) override;
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
|
||||
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
|
||||
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
|
||||
|
||||
Status Seek(int64_t position) override;
|
||||
Result<int64_t> Tell() const override;
|
||||
Result<int64_t> GetSize() override;
|
||||
|
||||
private:
|
||||
explicit HdfsReadableFile(const io::IOContext&);
|
||||
|
||||
class ARROW_NO_EXPORT HdfsReadableFileImpl;
|
||||
std::unique_ptr<HdfsReadableFileImpl> impl_;
|
||||
|
||||
friend class HadoopFileSystem::HadoopFileSystemImpl;
|
||||
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsReadableFile);
|
||||
};
|
||||
|
||||
// Naming this file OutputStream because it does not support seeking (like the
|
||||
// WritableFile interface)
|
||||
class ARROW_EXPORT HdfsOutputStream : public OutputStream {
|
||||
public:
|
||||
~HdfsOutputStream() override;
|
||||
|
||||
Status Close() override;
|
||||
|
||||
bool closed() const override;
|
||||
|
||||
using OutputStream::Write;
|
||||
Status Write(const void* buffer, int64_t nbytes) override;
|
||||
|
||||
Status Flush() override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
private:
|
||||
class ARROW_NO_EXPORT HdfsOutputStreamImpl;
|
||||
std::unique_ptr<HdfsOutputStreamImpl> impl_;
|
||||
|
||||
friend class HadoopFileSystem::HadoopFileSystemImpl;
|
||||
|
||||
HdfsOutputStream();
|
||||
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsOutputStream);
|
||||
};
|
||||
|
||||
ARROW_EXPORT Status HaveLibHdfs();
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
362
venv/Lib/site-packages/pyarrow/include/arrow/io/interfaces.h
Normal file
362
venv/Lib/site-packages/pyarrow/include/arrow/io/interfaces.h
Normal file
|
@ -0,0 +1,362 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/io/type_fwd.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/cancel.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/type_fwd.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
|
||||
struct ReadRange {
|
||||
int64_t offset;
|
||||
int64_t length;
|
||||
|
||||
friend bool operator==(const ReadRange& left, const ReadRange& right) {
|
||||
return (left.offset == right.offset && left.length == right.length);
|
||||
}
|
||||
friend bool operator!=(const ReadRange& left, const ReadRange& right) {
|
||||
return !(left == right);
|
||||
}
|
||||
|
||||
bool Contains(const ReadRange& other) const {
|
||||
return (offset <= other.offset && offset + length >= other.offset + other.length);
|
||||
}
|
||||
};
|
||||
|
||||
/// EXPERIMENTAL: options provider for IO tasks
|
||||
///
|
||||
/// Includes an Executor (which will be used to execute asynchronous reads),
|
||||
/// a MemoryPool (which will be used to allocate buffers when zero copy reads
|
||||
/// are not possible), and an external id (in case the executor receives tasks from
|
||||
/// multiple sources and must distinguish tasks associated with this IOContext).
|
||||
struct ARROW_EXPORT IOContext {
|
||||
// No specified executor: will use a global IO thread pool
|
||||
IOContext() : IOContext(default_memory_pool(), StopToken::Unstoppable()) {}
|
||||
|
||||
explicit IOContext(StopToken stop_token)
|
||||
: IOContext(default_memory_pool(), std::move(stop_token)) {}
|
||||
|
||||
explicit IOContext(MemoryPool* pool, StopToken stop_token = StopToken::Unstoppable());
|
||||
|
||||
explicit IOContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
|
||||
StopToken stop_token = StopToken::Unstoppable(),
|
||||
int64_t external_id = -1)
|
||||
: pool_(pool),
|
||||
executor_(executor),
|
||||
external_id_(external_id),
|
||||
stop_token_(std::move(stop_token)) {}
|
||||
|
||||
explicit IOContext(::arrow::internal::Executor* executor,
|
||||
StopToken stop_token = StopToken::Unstoppable(),
|
||||
int64_t external_id = -1)
|
||||
: pool_(default_memory_pool()),
|
||||
executor_(executor),
|
||||
external_id_(external_id),
|
||||
stop_token_(std::move(stop_token)) {}
|
||||
|
||||
MemoryPool* pool() const { return pool_; }
|
||||
|
||||
::arrow::internal::Executor* executor() const { return executor_; }
|
||||
|
||||
// An application-specific ID, forwarded to executor task submissions
|
||||
int64_t external_id() const { return external_id_; }
|
||||
|
||||
StopToken stop_token() const { return stop_token_; }
|
||||
|
||||
private:
|
||||
MemoryPool* pool_;
|
||||
::arrow::internal::Executor* executor_;
|
||||
int64_t external_id_;
|
||||
StopToken stop_token_;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT FileInterface : public std::enable_shared_from_this<FileInterface> {
|
||||
public:
|
||||
virtual ~FileInterface() = 0;
|
||||
|
||||
/// \brief Close the stream cleanly
|
||||
///
|
||||
/// For writable streams, this will attempt to flush any pending data
|
||||
/// before releasing the underlying resource.
|
||||
///
|
||||
/// After Close() is called, closed() returns true and the stream is not
|
||||
/// available for further operations.
|
||||
virtual Status Close() = 0;
|
||||
|
||||
/// \brief Close the stream asynchronously
|
||||
///
|
||||
/// By default, this will just submit the synchronous Close() to the
|
||||
/// default I/O thread pool. Subclasses may implement this in a more
|
||||
/// efficient manner.
|
||||
virtual Future<> CloseAsync();
|
||||
|
||||
/// \brief Close the stream abruptly
|
||||
///
|
||||
/// This method does not guarantee that any pending data is flushed.
|
||||
/// It merely releases any underlying resource used by the stream for
|
||||
/// its operation.
|
||||
///
|
||||
/// After Abort() is called, closed() returns true and the stream is not
|
||||
/// available for further operations.
|
||||
virtual Status Abort();
|
||||
|
||||
/// \brief Return the position in this stream
|
||||
virtual Result<int64_t> Tell() const = 0;
|
||||
|
||||
/// \brief Return whether the stream is closed
|
||||
virtual bool closed() const = 0;
|
||||
|
||||
FileMode::type mode() const { return mode_; }
|
||||
|
||||
protected:
|
||||
FileInterface() : mode_(FileMode::READ) {}
|
||||
FileMode::type mode_;
|
||||
void set_mode(FileMode::type mode) { mode_ = mode; }
|
||||
|
||||
private:
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(FileInterface);
|
||||
};
|
||||
|
||||
class ARROW_EXPORT Seekable {
|
||||
public:
|
||||
virtual ~Seekable() = default;
|
||||
virtual Status Seek(int64_t position) = 0;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT Writable {
|
||||
public:
|
||||
virtual ~Writable() = default;
|
||||
|
||||
/// \brief Write the given data to the stream
|
||||
///
|
||||
/// This method always processes the bytes in full. Depending on the
|
||||
/// semantics of the stream, the data may be written out immediately,
|
||||
/// held in a buffer, or written asynchronously. In the case where
|
||||
/// the stream buffers the data, it will be copied. To avoid potentially
|
||||
/// large copies, use the Write variant that takes an owned Buffer.
|
||||
virtual Status Write(const void* data, int64_t nbytes) = 0;
|
||||
|
||||
/// \brief Write the given data to the stream
|
||||
///
|
||||
/// Since the Buffer owns its memory, this method can avoid a copy if
|
||||
/// buffering is required. See Write(const void*, int64_t) for details.
|
||||
virtual Status Write(const std::shared_ptr<Buffer>& data);
|
||||
|
||||
/// \brief Flush buffered bytes, if any
|
||||
virtual Status Flush();
|
||||
|
||||
Status Write(std::string_view data);
|
||||
};
|
||||
|
||||
class ARROW_EXPORT Readable {
|
||||
public:
|
||||
virtual ~Readable() = default;
|
||||
|
||||
/// \brief Read data from current file position.
|
||||
///
|
||||
/// Read at most `nbytes` from the current file position into `out`.
|
||||
/// The number of bytes read is returned.
|
||||
virtual Result<int64_t> Read(int64_t nbytes, void* out) = 0;
|
||||
|
||||
/// \brief Read data from current file position.
|
||||
///
|
||||
/// Read at most `nbytes` from the current file position. Less bytes may
|
||||
/// be read if EOF is reached. This method updates the current file position.
|
||||
///
|
||||
/// In some cases (e.g. a memory-mapped file), this method may avoid a
|
||||
/// memory copy.
|
||||
virtual Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) = 0;
|
||||
|
||||
/// EXPERIMENTAL: The IOContext associated with this file.
|
||||
///
|
||||
/// By default, this is the same as default_io_context(), but it may be
|
||||
/// overridden by subclasses.
|
||||
virtual const IOContext& io_context() const;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable {
|
||||
protected:
|
||||
OutputStream() = default;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT InputStream : virtual public FileInterface, virtual public Readable {
|
||||
public:
|
||||
/// \brief Advance or skip stream indicated number of bytes
|
||||
/// \param[in] nbytes the number to move forward
|
||||
/// \return Status
|
||||
Status Advance(int64_t nbytes);
|
||||
|
||||
/// \brief Return zero-copy string_view to upcoming bytes.
|
||||
///
|
||||
/// Do not modify the stream position. The view becomes invalid after
|
||||
/// any operation on the stream. May trigger buffering if the requested
|
||||
/// size is larger than the number of buffered bytes.
|
||||
///
|
||||
/// May return NotImplemented on streams that don't support it.
|
||||
///
|
||||
/// \param[in] nbytes the maximum number of bytes to see
|
||||
virtual Result<std::string_view> Peek(int64_t nbytes);
|
||||
|
||||
/// \brief Return true if InputStream is capable of zero copy Buffer reads
|
||||
///
|
||||
/// Zero copy reads imply the use of Buffer-returning Read() overloads.
|
||||
virtual bool supports_zero_copy() const;
|
||||
|
||||
/// \brief Read and return stream metadata
|
||||
///
|
||||
/// If the stream implementation doesn't support metadata, empty metadata
|
||||
/// is returned. Note that it is allowed to return a null pointer rather
|
||||
/// than an allocated empty metadata.
|
||||
virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
|
||||
|
||||
/// \brief Read stream metadata asynchronously
|
||||
virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
|
||||
const IOContext& io_context);
|
||||
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
|
||||
|
||||
protected:
|
||||
InputStream() = default;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
|
||||
public:
|
||||
/// Necessary because we hold a std::unique_ptr
|
||||
~RandomAccessFile() override;
|
||||
|
||||
/// \brief Create an isolated InputStream that reads a segment of a
|
||||
/// RandomAccessFile. Multiple such stream can be created and used
|
||||
/// independently without interference
|
||||
/// \param[in] file a file instance
|
||||
/// \param[in] file_offset the starting position in the file
|
||||
/// \param[in] nbytes the extent of bytes to read. The file should have
|
||||
/// sufficient bytes available
|
||||
static Result<std::shared_ptr<InputStream>> GetStream(
|
||||
std::shared_ptr<RandomAccessFile> file, int64_t file_offset, int64_t nbytes);
|
||||
|
||||
/// \brief Return the total file size in bytes.
|
||||
///
|
||||
/// This method does not read or move the current file position, so is safe
|
||||
/// to call concurrently with e.g. ReadAt().
|
||||
virtual Result<int64_t> GetSize() = 0;
|
||||
|
||||
/// \brief Read data from given file position.
|
||||
///
|
||||
/// At most `nbytes` bytes are read. The number of bytes read is returned
|
||||
/// (it can be less than `nbytes` if EOF is reached).
|
||||
///
|
||||
/// This method can be safely called from multiple threads concurrently.
|
||||
/// It is unspecified whether this method updates the file position or not.
|
||||
///
|
||||
/// The default RandomAccessFile-provided implementation uses Seek() and Read(),
|
||||
/// but subclasses may override it with a more efficient implementation
|
||||
/// that doesn't depend on implicit file positioning.
|
||||
///
|
||||
/// \param[in] position Where to read bytes from
|
||||
/// \param[in] nbytes The number of bytes to read
|
||||
/// \param[out] out The buffer to read bytes into
|
||||
/// \return The number of bytes read, or an error
|
||||
virtual Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out);
|
||||
|
||||
/// \brief Read data from given file position.
|
||||
///
|
||||
/// At most `nbytes` bytes are read, but it can be less if EOF is reached.
|
||||
///
|
||||
/// \param[in] position Where to read bytes from
|
||||
/// \param[in] nbytes The number of bytes to read
|
||||
/// \return A buffer containing the bytes read, or an error
|
||||
virtual Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes);
|
||||
|
||||
/// EXPERIMENTAL: Read data asynchronously.
|
||||
virtual Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
|
||||
int64_t nbytes);
|
||||
|
||||
/// EXPERIMENTAL: Read data asynchronously, using the file's IOContext.
|
||||
Future<std::shared_ptr<Buffer>> ReadAsync(int64_t position, int64_t nbytes);
|
||||
|
||||
/// EXPERIMENTAL: Explicit multi-read.
|
||||
/// \brief Request multiple reads at once
|
||||
///
|
||||
/// The underlying filesystem may optimize these reads by coalescing small reads into
|
||||
/// large reads or by breaking up large reads into multiple parallel smaller reads. The
|
||||
/// reads should be issued in parallel if it makes sense for the filesystem.
|
||||
///
|
||||
/// One future will be returned for each input read range. Multiple returned futures
|
||||
/// may correspond to a single read. Or, a single returned future may be a combined
|
||||
/// result of several individual reads.
|
||||
///
|
||||
/// \param[in] ranges The ranges to read
|
||||
/// \return A future that will complete with the data from the requested range is
|
||||
/// available
|
||||
virtual std::vector<Future<std::shared_ptr<Buffer>>> ReadManyAsync(
|
||||
const IOContext&, const std::vector<ReadRange>& ranges);
|
||||
|
||||
/// EXPERIMENTAL: Explicit multi-read, using the file's IOContext.
|
||||
std::vector<Future<std::shared_ptr<Buffer>>> ReadManyAsync(
|
||||
const std::vector<ReadRange>& ranges);
|
||||
|
||||
/// EXPERIMENTAL: Inform that the given ranges may be read soon.
|
||||
///
|
||||
/// Some implementations might arrange to prefetch some of the data.
|
||||
/// However, no guarantee is made and the default implementation does nothing.
|
||||
/// For robust prefetching, use ReadAt() or ReadAsync().
|
||||
virtual Status WillNeed(const std::vector<ReadRange>& ranges);
|
||||
|
||||
protected:
|
||||
RandomAccessFile();
|
||||
|
||||
private:
|
||||
struct ARROW_NO_EXPORT Impl;
|
||||
std::unique_ptr<Impl> interface_impl_;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT WritableFile : public OutputStream, public Seekable {
|
||||
public:
|
||||
virtual Status WriteAt(int64_t position, const void* data, int64_t nbytes) = 0;
|
||||
|
||||
protected:
|
||||
WritableFile() = default;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT ReadWriteFileInterface : public RandomAccessFile, public WritableFile {
|
||||
protected:
|
||||
ReadWriteFileInterface() { RandomAccessFile::set_mode(FileMode::READWRITE); }
|
||||
};
|
||||
|
||||
/// \brief Return an iterator on an input stream
|
||||
///
|
||||
/// The iterator yields a fixed-size block on each Next() call, except the
|
||||
/// last block in the stream which may be smaller.
|
||||
/// Once the end of stream is reached, Next() returns nullptr
|
||||
/// (unlike InputStream::Read() which returns an empty buffer).
|
||||
ARROW_EXPORT
|
||||
Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
|
||||
std::shared_ptr<InputStream> stream, int64_t block_size);
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
197
venv/Lib/site-packages/pyarrow/include/arrow/io/memory.h
Normal file
197
venv/Lib/site-packages/pyarrow/include/arrow/io/memory.h
Normal file
|
@ -0,0 +1,197 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Public API for different memory sharing / IO mechanisms
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/io/concurrency.h"
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class Status;
|
||||
|
||||
namespace io {
|
||||
|
||||
/// \brief An output stream that writes to a resizable buffer
|
||||
class ARROW_EXPORT BufferOutputStream : public OutputStream {
|
||||
public:
|
||||
explicit BufferOutputStream(const std::shared_ptr<ResizableBuffer>& buffer);
|
||||
|
||||
/// \brief Create in-memory output stream with indicated capacity using a
|
||||
/// memory pool
|
||||
/// \param[in] initial_capacity the initial allocated internal capacity of
|
||||
/// the OutputStream
|
||||
/// \param[in,out] pool a MemoryPool to use for allocations
|
||||
/// \return the created stream
|
||||
static Result<std::shared_ptr<BufferOutputStream>> Create(
|
||||
int64_t initial_capacity = 4096, MemoryPool* pool = default_memory_pool());
|
||||
|
||||
~BufferOutputStream() override;
|
||||
|
||||
// Implement the OutputStream interface
|
||||
|
||||
/// Close the stream, preserving the buffer (retrieve it with Finish()).
|
||||
Status Close() override;
|
||||
bool closed() const override;
|
||||
Result<int64_t> Tell() const override;
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
|
||||
/// \cond FALSE
|
||||
using OutputStream::Write;
|
||||
/// \endcond
|
||||
|
||||
/// Close the stream and return the buffer
|
||||
Result<std::shared_ptr<Buffer>> Finish();
|
||||
|
||||
/// \brief Initialize state of OutputStream with newly allocated memory and
|
||||
/// set position to 0
|
||||
/// \param[in] initial_capacity the starting allocated capacity
|
||||
/// \param[in,out] pool the memory pool to use for allocations
|
||||
/// \return Status
|
||||
Status Reset(int64_t initial_capacity = 1024, MemoryPool* pool = default_memory_pool());
|
||||
|
||||
int64_t capacity() const { return capacity_; }
|
||||
|
||||
private:
|
||||
BufferOutputStream();
|
||||
|
||||
// Ensures there is sufficient space available to write nbytes
|
||||
Status Reserve(int64_t nbytes);
|
||||
|
||||
std::shared_ptr<ResizableBuffer> buffer_;
|
||||
bool is_open_;
|
||||
int64_t capacity_;
|
||||
int64_t position_;
|
||||
uint8_t* mutable_data_;
|
||||
};
|
||||
|
||||
/// \brief A helper class to track the size of allocations
|
||||
///
|
||||
/// Writes to this stream do not copy or retain any data, they just bump
|
||||
/// a size counter that can be later used to know exactly which data size
|
||||
/// needs to be allocated for actual writing.
|
||||
class ARROW_EXPORT MockOutputStream : public OutputStream {
|
||||
public:
|
||||
MockOutputStream() : extent_bytes_written_(0), is_open_(true) {}
|
||||
|
||||
// Implement the OutputStream interface
|
||||
Status Close() override;
|
||||
bool closed() const override;
|
||||
Result<int64_t> Tell() const override;
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
/// \cond FALSE
|
||||
using Writable::Write;
|
||||
/// \endcond
|
||||
|
||||
int64_t GetExtentBytesWritten() const { return extent_bytes_written_; }
|
||||
|
||||
private:
|
||||
int64_t extent_bytes_written_;
|
||||
bool is_open_;
|
||||
};
|
||||
|
||||
/// \brief An output stream that writes into a fixed-size mutable buffer
|
||||
class ARROW_EXPORT FixedSizeBufferWriter : public WritableFile {
|
||||
public:
|
||||
/// Input buffer must be mutable, will abort if not
|
||||
explicit FixedSizeBufferWriter(const std::shared_ptr<Buffer>& buffer);
|
||||
~FixedSizeBufferWriter() override;
|
||||
|
||||
Status Close() override;
|
||||
bool closed() const override;
|
||||
Status Seek(int64_t position) override;
|
||||
Result<int64_t> Tell() const override;
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
/// \cond FALSE
|
||||
using Writable::Write;
|
||||
/// \endcond
|
||||
|
||||
Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
|
||||
|
||||
void set_memcopy_threads(int num_threads);
|
||||
void set_memcopy_blocksize(int64_t blocksize);
|
||||
void set_memcopy_threshold(int64_t threshold);
|
||||
|
||||
protected:
|
||||
class FixedSizeBufferWriterImpl;
|
||||
std::unique_ptr<FixedSizeBufferWriterImpl> impl_;
|
||||
};
|
||||
|
||||
/// \class BufferReader
|
||||
/// \brief Random access zero-copy reads on an arrow::Buffer
|
||||
class ARROW_EXPORT BufferReader
|
||||
: public internal::RandomAccessFileConcurrencyWrapper<BufferReader> {
|
||||
public:
|
||||
/// \brief Instantiate from std::shared_ptr<Buffer>.
|
||||
///
|
||||
/// This is a zero-copy constructor.
|
||||
explicit BufferReader(std::shared_ptr<Buffer> buffer);
|
||||
|
||||
/// \brief Instantiate from std::string. Owns data.
|
||||
static std::unique_ptr<BufferReader> FromString(std::string data);
|
||||
|
||||
bool closed() const override;
|
||||
|
||||
bool supports_zero_copy() const override;
|
||||
|
||||
std::shared_ptr<Buffer> buffer() const { return buffer_; }
|
||||
|
||||
// Synchronous ReadAsync override
|
||||
Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
|
||||
int64_t nbytes) override;
|
||||
Status WillNeed(const std::vector<ReadRange>& ranges) override;
|
||||
|
||||
protected:
|
||||
friend RandomAccessFileConcurrencyWrapper<BufferReader>;
|
||||
|
||||
Status DoClose();
|
||||
|
||||
Result<int64_t> DoRead(int64_t nbytes, void* buffer);
|
||||
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
|
||||
Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
|
||||
Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
|
||||
Result<std::string_view> DoPeek(int64_t nbytes) override;
|
||||
|
||||
Result<int64_t> DoTell() const;
|
||||
Status DoSeek(int64_t position);
|
||||
Result<int64_t> DoGetSize();
|
||||
|
||||
Status CheckClosed() const {
|
||||
if (!is_open_) {
|
||||
return Status::Invalid("Operation forbidden on closed BufferReader");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<Buffer> buffer_;
|
||||
const uint8_t* data_;
|
||||
int64_t size_;
|
||||
int64_t position_;
|
||||
bool is_open_;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
169
venv/Lib/site-packages/pyarrow/include/arrow/io/mman.h
Normal file
169
venv/Lib/site-packages/pyarrow/include/arrow/io/mman.h
Normal file
|
@ -0,0 +1,169 @@
|
|||
// Copyright https://code.google.com/p/mman-win32/
|
||||
//
|
||||
// Licensed under the MIT License;
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/util/windows_compatibility.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <io.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#define PROT_NONE 0
|
||||
#define PROT_READ 1
|
||||
#define PROT_WRITE 2
|
||||
#define PROT_EXEC 4
|
||||
|
||||
#define MAP_FILE 0
|
||||
#define MAP_SHARED 1
|
||||
#define MAP_PRIVATE 2
|
||||
#define MAP_TYPE 0xf
|
||||
#define MAP_FIXED 0x10
|
||||
#define MAP_ANONYMOUS 0x20
|
||||
#define MAP_ANON MAP_ANONYMOUS
|
||||
|
||||
#define MAP_FAILED ((void*)-1)
|
||||
|
||||
/* Flags for msync. */
|
||||
#define MS_ASYNC 1
|
||||
#define MS_SYNC 2
|
||||
#define MS_INVALIDATE 4
|
||||
|
||||
#ifndef FILE_MAP_EXECUTE
|
||||
# define FILE_MAP_EXECUTE 0x0020
|
||||
#endif
|
||||
|
||||
static inline int __map_mman_error(const DWORD err, const int deferr) {
|
||||
if (err == 0) return 0;
|
||||
// TODO: implement
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline DWORD __map_mmap_prot_page(const int prot) {
|
||||
DWORD protect = 0;
|
||||
|
||||
if (prot == PROT_NONE) return protect;
|
||||
|
||||
if ((prot & PROT_EXEC) != 0) {
|
||||
protect = ((prot & PROT_WRITE) != 0) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
|
||||
} else {
|
||||
protect = ((prot & PROT_WRITE) != 0) ? PAGE_READWRITE : PAGE_READONLY;
|
||||
}
|
||||
|
||||
return protect;
|
||||
}
|
||||
|
||||
static inline DWORD __map_mmap_prot_file(const int prot) {
|
||||
DWORD desiredAccess = 0;
|
||||
|
||||
if (prot == PROT_NONE) return desiredAccess;
|
||||
|
||||
if ((prot & PROT_READ) != 0) desiredAccess |= FILE_MAP_READ;
|
||||
if ((prot & PROT_WRITE) != 0) desiredAccess |= FILE_MAP_WRITE;
|
||||
if ((prot & PROT_EXEC) != 0) desiredAccess |= FILE_MAP_EXECUTE;
|
||||
|
||||
return desiredAccess;
|
||||
}
|
||||
|
||||
static inline void* mmap(void* addr, size_t len, int prot, int flags, int fildes,
|
||||
off_t off) {
|
||||
HANDLE fm, h;
|
||||
|
||||
void* map = MAP_FAILED;
|
||||
const uint64_t off64 = static_cast<uint64_t>(off);
|
||||
const uint64_t maxSize = off64 + len;
|
||||
|
||||
const DWORD dwFileOffsetLow = static_cast<DWORD>(off64 & 0xFFFFFFFFUL);
|
||||
const DWORD dwFileOffsetHigh = static_cast<DWORD>((off64 >> 32) & 0xFFFFFFFFUL);
|
||||
const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFUL);
|
||||
const DWORD dwMaxSizeHigh = static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFUL);
|
||||
|
||||
const DWORD protect = __map_mmap_prot_page(prot);
|
||||
const DWORD desiredAccess = __map_mmap_prot_file(prot);
|
||||
|
||||
errno = 0;
|
||||
|
||||
if (len == 0
|
||||
/* Unsupported flag combinations */
|
||||
|| (flags & MAP_FIXED) != 0
|
||||
/* Unsupported protection combinations */
|
||||
|| prot == PROT_EXEC) {
|
||||
errno = EINVAL;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes)
|
||||
: INVALID_HANDLE_VALUE;
|
||||
|
||||
if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE) {
|
||||
errno = EBADF;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
|
||||
|
||||
if (fm == NULL) {
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
|
||||
|
||||
CloseHandle(fm);
|
||||
|
||||
if (map == NULL) {
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
static inline int munmap(void* addr, size_t len) {
|
||||
if (UnmapViewOfFile(addr)) return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int mprotect(void* addr, size_t len, int prot) {
|
||||
DWORD newProtect = __map_mmap_prot_page(prot);
|
||||
DWORD oldProtect = 0;
|
||||
|
||||
if (VirtualProtect(addr, len, newProtect, &oldProtect)) return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int msync(void* addr, size_t len, int flags) {
|
||||
if (FlushViewOfFile(addr, len)) return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int mlock(const void* addr, size_t len) {
|
||||
if (VirtualLock((LPVOID)addr, len)) return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int munlock(const void* addr, size_t len) {
|
||||
if (VirtualUnlock((LPVOID)addr, len)) return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
118
venv/Lib/site-packages/pyarrow/include/arrow/io/slow.h
Normal file
118
venv/Lib/site-packages/pyarrow/include/arrow/io/slow.h
Normal file
|
@ -0,0 +1,118 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Slow stream implementations, mainly for testing and benchmarking
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class Buffer;
|
||||
class Status;
|
||||
|
||||
namespace io {
|
||||
|
||||
class ARROW_EXPORT LatencyGenerator {
|
||||
public:
|
||||
virtual ~LatencyGenerator();
|
||||
|
||||
void Sleep();
|
||||
|
||||
virtual double NextLatency() = 0;
|
||||
|
||||
static std::shared_ptr<LatencyGenerator> Make(double average_latency);
|
||||
static std::shared_ptr<LatencyGenerator> Make(double average_latency, int32_t seed);
|
||||
};
|
||||
|
||||
// XXX use ConcurrencyWrapper? It could increase chances of finding a race.
|
||||
|
||||
template <class StreamType>
|
||||
class SlowInputStreamBase : public StreamType {
|
||||
public:
|
||||
SlowInputStreamBase(std::shared_ptr<StreamType> stream,
|
||||
std::shared_ptr<LatencyGenerator> latencies)
|
||||
: stream_(std::move(stream)), latencies_(std::move(latencies)) {}
|
||||
|
||||
SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency)
|
||||
: stream_(std::move(stream)), latencies_(LatencyGenerator::Make(average_latency)) {}
|
||||
|
||||
SlowInputStreamBase(std::shared_ptr<StreamType> stream, double average_latency,
|
||||
int32_t seed)
|
||||
: stream_(std::move(stream)),
|
||||
latencies_(LatencyGenerator::Make(average_latency, seed)) {}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<StreamType> stream_;
|
||||
std::shared_ptr<LatencyGenerator> latencies_;
|
||||
};
|
||||
|
||||
/// \brief An InputStream wrapper that makes reads slower.
|
||||
///
|
||||
/// Read() calls are made slower by an average latency (in seconds).
|
||||
/// Actual latencies form a normal distribution closely centered
|
||||
/// on the average latency.
|
||||
/// Other calls are forwarded directly.
|
||||
class ARROW_EXPORT SlowInputStream : public SlowInputStreamBase<InputStream> {
|
||||
public:
|
||||
~SlowInputStream() override;
|
||||
|
||||
using SlowInputStreamBase<InputStream>::SlowInputStreamBase;
|
||||
|
||||
Status Close() override;
|
||||
Status Abort() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) override;
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
|
||||
Result<std::string_view> Peek(int64_t nbytes) override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
};
|
||||
|
||||
/// \brief A RandomAccessFile wrapper that makes reads slower.
|
||||
///
|
||||
/// Similar to SlowInputStream, but allows random access and seeking.
|
||||
class ARROW_EXPORT SlowRandomAccessFile : public SlowInputStreamBase<RandomAccessFile> {
|
||||
public:
|
||||
~SlowRandomAccessFile() override;
|
||||
|
||||
using SlowInputStreamBase<RandomAccessFile>::SlowInputStreamBase;
|
||||
|
||||
Status Close() override;
|
||||
Status Abort() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) override;
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
|
||||
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
|
||||
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
|
||||
Result<std::string_view> Peek(int64_t nbytes) override;
|
||||
|
||||
Result<int64_t> GetSize() override;
|
||||
Status Seek(int64_t position) override;
|
||||
Result<int64_t> Tell() const override;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
82
venv/Lib/site-packages/pyarrow/include/arrow/io/stdio.h
Normal file
82
venv/Lib/site-packages/pyarrow/include/arrow/io/stdio.h
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
|
||||
// Output stream that just writes to stdout.
|
||||
class ARROW_EXPORT StdoutStream : public OutputStream {
|
||||
public:
|
||||
StdoutStream();
|
||||
~StdoutStream() override {}
|
||||
|
||||
Status Close() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
|
||||
private:
|
||||
int64_t pos_;
|
||||
};
|
||||
|
||||
// Output stream that just writes to stderr.
|
||||
class ARROW_EXPORT StderrStream : public OutputStream {
|
||||
public:
|
||||
StderrStream();
|
||||
~StderrStream() override {}
|
||||
|
||||
Status Close() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
Status Write(const void* data, int64_t nbytes) override;
|
||||
|
||||
private:
|
||||
int64_t pos_;
|
||||
};
|
||||
|
||||
// Input stream that just reads from stdin.
|
||||
class ARROW_EXPORT StdinStream : public InputStream {
|
||||
public:
|
||||
StdinStream();
|
||||
~StdinStream() override {}
|
||||
|
||||
Status Close() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) override;
|
||||
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
|
||||
|
||||
private:
|
||||
int64_t pos_;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
|
@ -0,0 +1,69 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
|
||||
class MemoryMappedFile;
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void AssertFileContents(const std::string& path, const std::string& contents);
|
||||
|
||||
ARROW_TESTING_EXPORT bool FileExists(const std::string& path);
|
||||
|
||||
ARROW_TESTING_EXPORT Status PurgeLocalFileFromOsCache(const std::string& path);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
Status ZeroMemoryMap(MemoryMappedFile* file);
|
||||
|
||||
class ARROW_TESTING_EXPORT MemoryMapFixture {
|
||||
public:
|
||||
void TearDown();
|
||||
|
||||
void CreateFile(const std::string& path, int64_t size);
|
||||
|
||||
Result<std::shared_ptr<MemoryMappedFile>> InitMemoryMap(int64_t size,
|
||||
const std::string& path);
|
||||
|
||||
void AppendFile(const std::string& path);
|
||||
|
||||
private:
|
||||
std::vector<std::string> tmp_files_;
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT TrackedRandomAccessFile : public io::RandomAccessFile {
|
||||
public:
|
||||
virtual int64_t num_reads() const = 0;
|
||||
virtual int64_t bytes_read() const = 0;
|
||||
virtual const std::vector<io::ReadRange>& get_read_ranges() const = 0;
|
||||
virtual void ResetStats() = 0;
|
||||
|
||||
static std::unique_ptr<TrackedRandomAccessFile> Make(io::RandomAccessFile* target);
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
60
venv/Lib/site-packages/pyarrow/include/arrow/io/transform.h
Normal file
60
venv/Lib/site-packages/pyarrow/include/arrow/io/transform.h
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Transform stream implementations
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
|
||||
class ARROW_EXPORT TransformInputStream : public InputStream {
|
||||
public:
|
||||
using TransformFunc =
|
||||
std::function<Result<std::shared_ptr<Buffer>>(const std::shared_ptr<Buffer>&)>;
|
||||
|
||||
TransformInputStream(std::shared_ptr<InputStream> wrapped, TransformFunc transform);
|
||||
~TransformInputStream() override;
|
||||
|
||||
Status Close() override;
|
||||
Status Abort() override;
|
||||
bool closed() const override;
|
||||
|
||||
Result<int64_t> Read(int64_t nbytes, void* out) override;
|
||||
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
|
||||
|
||||
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
|
||||
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
|
||||
const IOContext& io_context) override;
|
||||
|
||||
Result<int64_t> Tell() const override;
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
77
venv/Lib/site-packages/pyarrow/include/arrow/io/type_fwd.h
Normal file
77
venv/Lib/site-packages/pyarrow/include/arrow/io/type_fwd.h
Normal file
|
@ -0,0 +1,77 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
|
||||
struct FileMode {
|
||||
enum type { READ, WRITE, READWRITE };
|
||||
};
|
||||
|
||||
struct IOContext;
|
||||
struct CacheOptions;
|
||||
|
||||
/// EXPERIMENTAL: convenience global singleton for default IOContext settings
|
||||
ARROW_EXPORT
|
||||
const IOContext& default_io_context();
|
||||
|
||||
/// \brief Get the capacity of the global I/O thread pool
|
||||
///
|
||||
/// Return the number of worker threads in the thread pool to which
|
||||
/// Arrow dispatches various I/O-bound tasks. This is an ideal number,
|
||||
/// not necessarily the exact number of threads at a given point in time.
|
||||
///
|
||||
/// You can change this number using SetIOThreadPoolCapacity().
|
||||
ARROW_EXPORT int GetIOThreadPoolCapacity();
|
||||
|
||||
/// \brief Set the capacity of the global I/O thread pool
|
||||
///
|
||||
/// Set the number of worker threads in the thread pool to which
|
||||
/// Arrow dispatches various I/O-bound tasks.
|
||||
///
|
||||
/// The current number is returned by GetIOThreadPoolCapacity().
|
||||
ARROW_EXPORT Status SetIOThreadPoolCapacity(int threads);
|
||||
|
||||
class FileInterface;
|
||||
class Seekable;
|
||||
class Writable;
|
||||
class Readable;
|
||||
class OutputStream;
|
||||
class FileOutputStream;
|
||||
class InputStream;
|
||||
class ReadableFile;
|
||||
class RandomAccessFile;
|
||||
class MemoryMappedFile;
|
||||
class WritableFile;
|
||||
class ReadWriteFileInterface;
|
||||
|
||||
class LatencyGenerator;
|
||||
|
||||
class BufferOutputStream;
|
||||
class BufferReader;
|
||||
class CompressedInputStream;
|
||||
class CompressedOutputStream;
|
||||
class BufferedInputStream;
|
||||
class BufferedOutputStream;
|
||||
|
||||
} // namespace io
|
||||
} // namespace arrow
|
Loading…
Add table
Add a link
Reference in a new issue