107 lines
3.3 KiB
Python
107 lines
3.3 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from __future__ import annotations
|
|
import enum
|
|
|
|
import pyarrow as pa
|
|
|
|
|
|
class DlpackDeviceType(enum.IntEnum):
|
|
"""Integer enum for device type codes matching DLPack."""
|
|
|
|
CPU = 1
|
|
CUDA = 2
|
|
CPU_PINNED = 3
|
|
OPENCL = 4
|
|
VULKAN = 7
|
|
METAL = 8
|
|
VPI = 9
|
|
ROCM = 10
|
|
|
|
|
|
class _PyArrowBuffer:
|
|
"""
|
|
Data in the buffer is guaranteed to be contiguous in memory.
|
|
|
|
Note that there is no dtype attribute present, a buffer can be thought of
|
|
as simply a block of memory. However, if the column that the buffer is
|
|
attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
|
|
implemented, then that dtype information will be contained in the return
|
|
value from ``__dlpack__``.
|
|
|
|
This distinction is useful to support both data exchange via DLPack on a
|
|
buffer and (b) dtypes like variable-length strings which do not have a
|
|
fixed number of bytes per element.
|
|
"""
|
|
|
|
def __init__(self, x: pa.Buffer, allow_copy: bool = True) -> None:
|
|
"""
|
|
Handle PyArrow Buffers.
|
|
"""
|
|
self._x = x
|
|
|
|
@property
|
|
def bufsize(self) -> int:
|
|
"""
|
|
Buffer size in bytes.
|
|
"""
|
|
return self._x.size
|
|
|
|
@property
|
|
def ptr(self) -> int:
|
|
"""
|
|
Pointer to start of the buffer as an integer.
|
|
"""
|
|
return self._x.address
|
|
|
|
def __dlpack__(self):
|
|
"""
|
|
Produce DLPack capsule (see array API standard).
|
|
|
|
Raises:
|
|
- TypeError : if the buffer contains unsupported dtypes.
|
|
- NotImplementedError : if DLPack support is not implemented
|
|
|
|
Useful to have to connect to array libraries. Support optional because
|
|
it's not completely trivial to implement for a Python-only library.
|
|
"""
|
|
raise NotImplementedError("__dlpack__")
|
|
|
|
def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
|
|
"""
|
|
Device type and device ID for where the data in the buffer resides.
|
|
Uses device type codes matching DLPack.
|
|
Note: must be implemented even if ``__dlpack__`` is not.
|
|
"""
|
|
if self._x.is_cpu:
|
|
return (DlpackDeviceType.CPU, None)
|
|
else:
|
|
raise NotImplementedError("__dlpack_device__")
|
|
|
|
def __repr__(self) -> str:
|
|
return (
|
|
"PyArrowBuffer(" +
|
|
str(
|
|
{
|
|
"bufsize": self.bufsize,
|
|
"ptr": self.ptr,
|
|
"device": self.__dlpack_device__()[0].name,
|
|
}
|
|
) +
|
|
")"
|
|
)
|