277 lines
8.4 KiB
Python
277 lines
8.4 KiB
Python
![]() |
# Licensed to the Apache Software Foundation (ASF) under one
|
||
|
# or more contributor license agreements. See the NOTICE file
|
||
|
# distributed with this work for additional information
|
||
|
# regarding copyright ownership. The ASF licenses this file
|
||
|
# to you under the Apache License, Version 2.0 (the
|
||
|
# "License"); you may not use this file except in compliance
|
||
|
# with the License. You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing,
|
||
|
# software distributed under the License is distributed on an
|
||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
|
# KIND, either express or implied. See the License for the
|
||
|
# specific language governing permissions and limitations
|
||
|
# under the License.
|
||
|
|
||
|
# Miscellaneous utility code
|
||
|
|
||
|
import os
|
||
|
import contextlib
|
||
|
import functools
|
||
|
import gc
|
||
|
import socket
|
||
|
import sys
|
||
|
import textwrap
|
||
|
import types
|
||
|
import warnings
|
||
|
|
||
|
|
||
|
_DEPR_MSG = (
|
||
|
"pyarrow.{} is deprecated as of {}, please use pyarrow.{} instead."
|
||
|
)
|
||
|
|
||
|
|
||
|
def doc(*docstrings, **params):
|
||
|
"""
|
||
|
A decorator that takes docstring templates, concatenates them, and finally
|
||
|
performs string substitution on them.
|
||
|
This decorator will add a variable "_docstring_components" to the wrapped
|
||
|
callable to keep track of the original docstring template for potential future use.
|
||
|
If the docstring is a template, it will be saved as a string.
|
||
|
Otherwise, it will be saved as a callable and the docstring will be obtained via
|
||
|
the __doc__ attribute.
|
||
|
This decorator cannot be used on Cython classes due to a CPython constraint,
|
||
|
which enforces the __doc__ attribute to be read-only.
|
||
|
See https://github.com/python/cpython/issues/91309
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
*docstrings : None, str, or callable
|
||
|
The string / docstring / docstring template to be prepended in order
|
||
|
before the default docstring under the callable.
|
||
|
**params
|
||
|
The key/value pairs used to format the docstring template.
|
||
|
"""
|
||
|
|
||
|
def decorator(decorated):
|
||
|
docstring_components = []
|
||
|
|
||
|
# collect docstrings and docstring templates
|
||
|
for docstring in docstrings:
|
||
|
if docstring is None:
|
||
|
continue
|
||
|
if hasattr(docstring, "_docstring_components"):
|
||
|
docstring_components.extend(
|
||
|
docstring._docstring_components
|
||
|
)
|
||
|
elif isinstance(docstring, str) or docstring.__doc__:
|
||
|
docstring_components.append(docstring)
|
||
|
|
||
|
# append the callable's docstring last
|
||
|
if decorated.__doc__:
|
||
|
docstring_components.append(textwrap.dedent(decorated.__doc__))
|
||
|
|
||
|
params_applied = [
|
||
|
component.format(**params)
|
||
|
if isinstance(component, str) and len(params) > 0
|
||
|
else component
|
||
|
for component in docstring_components
|
||
|
]
|
||
|
|
||
|
decorated.__doc__ = "".join(
|
||
|
[
|
||
|
component
|
||
|
if isinstance(component, str)
|
||
|
else textwrap.dedent(component.__doc__ or "")
|
||
|
for component in params_applied
|
||
|
]
|
||
|
)
|
||
|
|
||
|
decorated._docstring_components = (
|
||
|
docstring_components
|
||
|
)
|
||
|
return decorated
|
||
|
|
||
|
return decorator
|
||
|
|
||
|
|
||
|
def _deprecate_api(old_name, new_name, api, next_version, type=FutureWarning):
|
||
|
msg = _DEPR_MSG.format(old_name, next_version, new_name)
|
||
|
|
||
|
def wrapper(*args, **kwargs):
|
||
|
warnings.warn(msg, type)
|
||
|
return api(*args, **kwargs)
|
||
|
return wrapper
|
||
|
|
||
|
|
||
|
def _deprecate_class(old_name, new_class, next_version,
|
||
|
instancecheck=True):
|
||
|
"""
|
||
|
Raise warning if a deprecated class is used in an isinstance check.
|
||
|
"""
|
||
|
class _DeprecatedMeta(type):
|
||
|
def __instancecheck__(self, other):
|
||
|
warnings.warn(
|
||
|
_DEPR_MSG.format(old_name, next_version, new_class.__name__),
|
||
|
FutureWarning,
|
||
|
stacklevel=2
|
||
|
)
|
||
|
return isinstance(other, new_class)
|
||
|
|
||
|
return _DeprecatedMeta(old_name, (new_class,), {})
|
||
|
|
||
|
|
||
|
def _is_iterable(obj):
|
||
|
try:
|
||
|
iter(obj)
|
||
|
return True
|
||
|
except TypeError:
|
||
|
return False
|
||
|
|
||
|
|
||
|
def _is_path_like(path):
|
||
|
return isinstance(path, str) or hasattr(path, '__fspath__')
|
||
|
|
||
|
|
||
|
def _stringify_path(path):
|
||
|
"""
|
||
|
Convert *path* to a string or unicode path if possible.
|
||
|
"""
|
||
|
if isinstance(path, str):
|
||
|
return os.path.expanduser(path)
|
||
|
|
||
|
# checking whether path implements the filesystem protocol
|
||
|
try:
|
||
|
return os.path.expanduser(path.__fspath__())
|
||
|
except AttributeError:
|
||
|
pass
|
||
|
|
||
|
raise TypeError("not a path-like object")
|
||
|
|
||
|
|
||
|
def product(seq):
|
||
|
"""
|
||
|
Return a product of sequence items.
|
||
|
"""
|
||
|
return functools.reduce(lambda a, b: a*b, seq, 1)
|
||
|
|
||
|
|
||
|
def get_contiguous_span(shape, strides, itemsize):
|
||
|
"""
|
||
|
Return a contiguous span of N-D array data.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
shape : tuple
|
||
|
strides : tuple
|
||
|
itemsize : int
|
||
|
Specify array shape data
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
start, end : int
|
||
|
The span end points.
|
||
|
"""
|
||
|
if not strides:
|
||
|
start = 0
|
||
|
end = itemsize * product(shape)
|
||
|
else:
|
||
|
start = 0
|
||
|
end = itemsize
|
||
|
for i, dim in enumerate(shape):
|
||
|
if dim == 0:
|
||
|
start = end = 0
|
||
|
break
|
||
|
stride = strides[i]
|
||
|
if stride > 0:
|
||
|
end += stride * (dim - 1)
|
||
|
elif stride < 0:
|
||
|
start += stride * (dim - 1)
|
||
|
if end - start != itemsize * product(shape):
|
||
|
raise ValueError('array data is non-contiguous')
|
||
|
return start, end
|
||
|
|
||
|
|
||
|
def find_free_port():
|
||
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||
|
with contextlib.closing(sock) as sock:
|
||
|
sock.bind(('', 0))
|
||
|
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||
|
return sock.getsockname()[1]
|
||
|
|
||
|
|
||
|
def guid():
|
||
|
from uuid import uuid4
|
||
|
return uuid4().hex
|
||
|
|
||
|
|
||
|
def _break_traceback_cycle_from_frame(frame):
|
||
|
# Clear local variables in all inner frames, so as to break the
|
||
|
# reference cycle.
|
||
|
this_frame = sys._getframe(0)
|
||
|
refs = gc.get_referrers(frame)
|
||
|
while refs:
|
||
|
for frame in refs:
|
||
|
if frame is not this_frame and isinstance(frame, types.FrameType):
|
||
|
break
|
||
|
else:
|
||
|
# No frame found in referrers (finished?)
|
||
|
break
|
||
|
refs = None
|
||
|
# Clear the frame locals, to try and break the cycle (it is
|
||
|
# somewhere along the chain of execution frames).
|
||
|
frame.clear()
|
||
|
# To visit the inner frame, we need to find it among the
|
||
|
# referrers of this frame (while `frame.f_back` would let
|
||
|
# us visit the outer frame).
|
||
|
refs = gc.get_referrers(frame)
|
||
|
refs = frame = this_frame = None
|
||
|
|
||
|
|
||
|
def _download_urllib(url, out_path):
|
||
|
from urllib.request import urlopen
|
||
|
with urlopen(url) as response:
|
||
|
with open(out_path, 'wb') as f:
|
||
|
f.write(response.read())
|
||
|
|
||
|
|
||
|
def _download_requests(url, out_path):
|
||
|
import requests
|
||
|
with requests.get(url) as response:
|
||
|
with open(out_path, 'wb') as f:
|
||
|
f.write(response.content)
|
||
|
|
||
|
|
||
|
def download_tzdata_on_windows():
|
||
|
r"""
|
||
|
Download and extract latest IANA timezone database into the
|
||
|
location expected by Arrow which is %USERPROFILE%\Downloads\tzdata.
|
||
|
"""
|
||
|
if sys.platform != 'win32':
|
||
|
raise TypeError(f"Timezone database is already provided by {sys.platform}")
|
||
|
|
||
|
import tarfile
|
||
|
|
||
|
tzdata_url = "https://data.iana.org/time-zones/tzdata-latest.tar.gz"
|
||
|
tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
|
||
|
tzdata_compressed_path = os.path.join(tzdata_path, "tzdata.tar.gz")
|
||
|
windows_zones_url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml" # noqa
|
||
|
windows_zones_path = os.path.join(tzdata_path, "windowsZones.xml")
|
||
|
os.makedirs(tzdata_path, exist_ok=True)
|
||
|
|
||
|
# Try to download the files with requests and then fall back to urllib. This
|
||
|
# works around possible issues in certain older environment (GH-45295)
|
||
|
try:
|
||
|
_download_requests(tzdata_url, tzdata_compressed_path)
|
||
|
_download_requests(windows_zones_url, windows_zones_path)
|
||
|
except ImportError:
|
||
|
_download_urllib(tzdata_url, tzdata_compressed_path)
|
||
|
_download_urllib(windows_zones_url, windows_zones_path)
|
||
|
|
||
|
assert os.path.exists(tzdata_compressed_path)
|
||
|
assert os.path.exists(windows_zones_path)
|
||
|
|
||
|
tarfile.open(tzdata_compressed_path).extractall(tzdata_path)
|