Adding all project files
This commit is contained in:
parent
6c9e127bdc
commit
cd4316ad0f
42289 changed files with 8009643 additions and 0 deletions
920
venv/Lib/site-packages/scipy/spatial/_kdtree.py
Normal file
920
venv/Lib/site-packages/scipy/spatial/_kdtree.py
Normal file
|
@ -0,0 +1,920 @@
|
|||
# Copyright Anne M. Archibald 2008
|
||||
# Released under the scipy license
|
||||
import numpy as np
|
||||
from ._ckdtree import cKDTree, cKDTreeNode
|
||||
|
||||
__all__ = ['minkowski_distance_p', 'minkowski_distance',
|
||||
'distance_matrix',
|
||||
'Rectangle', 'KDTree']
|
||||
|
||||
|
||||
def minkowski_distance_p(x, y, p=2):
|
||||
"""Compute the pth power of the L**p distance between two arrays.
|
||||
|
||||
For efficiency, this function computes the L**p distance but does
|
||||
not extract the pth root. If `p` is 1 or infinity, this is equal to
|
||||
the actual L**p distance.
|
||||
|
||||
The last dimensions of `x` and `y` must be the same length. Any
|
||||
other dimensions must be compatible for broadcasting.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : (..., K) array_like
|
||||
Input array.
|
||||
y : (..., K) array_like
|
||||
Input array.
|
||||
p : float, 1 <= p <= infinity
|
||||
Which Minkowski p-norm to use.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dist : ndarray
|
||||
pth power of the distance between the input arrays.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.spatial import minkowski_distance_p
|
||||
>>> minkowski_distance_p([[0, 0], [0, 0]], [[1, 1], [0, 1]])
|
||||
array([2, 1])
|
||||
|
||||
"""
|
||||
x = np.asarray(x)
|
||||
y = np.asarray(y)
|
||||
|
||||
# Find smallest common datatype with float64 (return type of this
|
||||
# function) - addresses #10262.
|
||||
# Don't just cast to float64 for complex input case.
|
||||
common_datatype = np.promote_types(np.promote_types(x.dtype, y.dtype),
|
||||
'float64')
|
||||
|
||||
# Make sure x and y are NumPy arrays of correct datatype.
|
||||
x = x.astype(common_datatype)
|
||||
y = y.astype(common_datatype)
|
||||
|
||||
if p == np.inf:
|
||||
return np.amax(np.abs(y-x), axis=-1)
|
||||
elif p == 1:
|
||||
return np.sum(np.abs(y-x), axis=-1)
|
||||
else:
|
||||
return np.sum(np.abs(y-x)**p, axis=-1)
|
||||
|
||||
|
||||
def minkowski_distance(x, y, p=2):
|
||||
"""Compute the L**p distance between two arrays.
|
||||
|
||||
The last dimensions of `x` and `y` must be the same length. Any
|
||||
other dimensions must be compatible for broadcasting.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : (..., K) array_like
|
||||
Input array.
|
||||
y : (..., K) array_like
|
||||
Input array.
|
||||
p : float, 1 <= p <= infinity
|
||||
Which Minkowski p-norm to use.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dist : ndarray
|
||||
Distance between the input arrays.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.spatial import minkowski_distance
|
||||
>>> minkowski_distance([[0, 0], [0, 0]], [[1, 1], [0, 1]])
|
||||
array([ 1.41421356, 1. ])
|
||||
|
||||
"""
|
||||
x = np.asarray(x)
|
||||
y = np.asarray(y)
|
||||
if p == np.inf or p == 1:
|
||||
return minkowski_distance_p(x, y, p)
|
||||
else:
|
||||
return minkowski_distance_p(x, y, p)**(1./p)
|
||||
|
||||
|
||||
class Rectangle:
|
||||
"""Hyperrectangle class.
|
||||
|
||||
Represents a Cartesian product of intervals.
|
||||
"""
|
||||
def __init__(self, maxes, mins):
|
||||
"""Construct a hyperrectangle."""
|
||||
self.maxes = np.maximum(maxes,mins).astype(float)
|
||||
self.mins = np.minimum(maxes,mins).astype(float)
|
||||
self.m, = self.maxes.shape
|
||||
|
||||
def __repr__(self):
|
||||
return "<Rectangle %s>" % list(zip(self.mins, self.maxes))
|
||||
|
||||
def volume(self):
|
||||
"""Total volume."""
|
||||
return np.prod(self.maxes-self.mins)
|
||||
|
||||
def split(self, d, split):
|
||||
"""Produce two hyperrectangles by splitting.
|
||||
|
||||
In general, if you need to compute maximum and minimum
|
||||
distances to the children, it can be done more efficiently
|
||||
by updating the maximum and minimum distances to the parent.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
d : int
|
||||
Axis to split hyperrectangle along.
|
||||
split : float
|
||||
Position along axis `d` to split at.
|
||||
|
||||
"""
|
||||
mid = np.copy(self.maxes)
|
||||
mid[d] = split
|
||||
less = Rectangle(self.mins, mid)
|
||||
mid = np.copy(self.mins)
|
||||
mid[d] = split
|
||||
greater = Rectangle(mid, self.maxes)
|
||||
return less, greater
|
||||
|
||||
def min_distance_point(self, x, p=2.):
|
||||
"""
|
||||
Return the minimum distance between input and points in the
|
||||
hyperrectangle.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
Input.
|
||||
p : float, optional
|
||||
Input.
|
||||
|
||||
"""
|
||||
return minkowski_distance(
|
||||
0, np.maximum(0, np.maximum(self.mins-x, x-self.maxes)),
|
||||
p
|
||||
)
|
||||
|
||||
def max_distance_point(self, x, p=2.):
|
||||
"""
|
||||
Return the maximum distance between input and points in the hyperrectangle.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
Input array.
|
||||
p : float, optional
|
||||
Input.
|
||||
|
||||
"""
|
||||
return minkowski_distance(0, np.maximum(self.maxes-x, x-self.mins), p)
|
||||
|
||||
def min_distance_rectangle(self, other, p=2.):
|
||||
"""
|
||||
Compute the minimum distance between points in the two hyperrectangles.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : hyperrectangle
|
||||
Input.
|
||||
p : float
|
||||
Input.
|
||||
|
||||
"""
|
||||
return minkowski_distance(
|
||||
0,
|
||||
np.maximum(0, np.maximum(self.mins-other.maxes,
|
||||
other.mins-self.maxes)),
|
||||
p
|
||||
)
|
||||
|
||||
def max_distance_rectangle(self, other, p=2.):
|
||||
"""
|
||||
Compute the maximum distance between points in the two hyperrectangles.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : hyperrectangle
|
||||
Input.
|
||||
p : float, optional
|
||||
Input.
|
||||
|
||||
"""
|
||||
return minkowski_distance(
|
||||
0, np.maximum(self.maxes-other.mins, other.maxes-self.mins), p)
|
||||
|
||||
|
||||
class KDTree(cKDTree):
|
||||
"""kd-tree for quick nearest-neighbor lookup.
|
||||
|
||||
This class provides an index into a set of k-dimensional points
|
||||
which can be used to rapidly look up the nearest neighbors of any
|
||||
point.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : array_like, shape (n,m)
|
||||
The n data points of dimension m to be indexed. This array is
|
||||
not copied unless this is necessary to produce a contiguous
|
||||
array of doubles, and so modifying this data will result in
|
||||
bogus results. The data are also copied if the kd-tree is built
|
||||
with copy_data=True.
|
||||
leafsize : positive int, optional
|
||||
The number of points at which the algorithm switches over to
|
||||
brute-force. Default: 10.
|
||||
compact_nodes : bool, optional
|
||||
If True, the kd-tree is built to shrink the hyperrectangles to
|
||||
the actual data range. This usually gives a more compact tree that
|
||||
is robust against degenerated input data and gives faster queries
|
||||
at the expense of longer build time. Default: True.
|
||||
copy_data : bool, optional
|
||||
If True the data is always copied to protect the kd-tree against
|
||||
data corruption. Default: False.
|
||||
balanced_tree : bool, optional
|
||||
If True, the median is used to split the hyperrectangles instead of
|
||||
the midpoint. This usually gives a more compact tree and
|
||||
faster queries at the expense of longer build time. Default: True.
|
||||
boxsize : array_like or scalar, optional
|
||||
Apply a m-d toroidal topology to the KDTree.. The topology is generated
|
||||
by :math:`x_i + n_i L_i` where :math:`n_i` are integers and :math:`L_i`
|
||||
is the boxsize along i-th dimension. The input data shall be wrapped
|
||||
into :math:`[0, L_i)`. A ValueError is raised if any of the data is
|
||||
outside of this bound.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The algorithm used is described in Maneewongvatana and Mount 1999.
|
||||
The general idea is that the kd-tree is a binary tree, each of whose
|
||||
nodes represents an axis-aligned hyperrectangle. Each node specifies
|
||||
an axis and splits the set of points based on whether their coordinate
|
||||
along that axis is greater than or less than a particular value.
|
||||
|
||||
During construction, the axis and splitting point are chosen by the
|
||||
"sliding midpoint" rule, which ensures that the cells do not all
|
||||
become long and thin.
|
||||
|
||||
The tree can be queried for the r closest neighbors of any given point
|
||||
(optionally returning only those within some maximum distance of the
|
||||
point). It can also be queried, with a substantial gain in efficiency,
|
||||
for the r approximate closest neighbors.
|
||||
|
||||
For large dimensions (20 is already large) do not expect this to run
|
||||
significantly faster than brute force. High-dimensional nearest-neighbor
|
||||
queries are a substantial open problem in computer science.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
data : ndarray, shape (n,m)
|
||||
The n data points of dimension m to be indexed. This array is
|
||||
not copied unless this is necessary to produce a contiguous
|
||||
array of doubles. The data are also copied if the kd-tree is built
|
||||
with `copy_data=True`.
|
||||
leafsize : positive int
|
||||
The number of points at which the algorithm switches over to
|
||||
brute-force.
|
||||
m : int
|
||||
The dimension of a single data-point.
|
||||
n : int
|
||||
The number of data points.
|
||||
maxes : ndarray, shape (m,)
|
||||
The maximum value in each dimension of the n data points.
|
||||
mins : ndarray, shape (m,)
|
||||
The minimum value in each dimension of the n data points.
|
||||
size : int
|
||||
The number of nodes in the tree.
|
||||
|
||||
"""
|
||||
|
||||
class node:
|
||||
@staticmethod
|
||||
def _create(ckdtree_node=None):
|
||||
"""Create either an inner or leaf node, wrapping a cKDTreeNode instance"""
|
||||
if ckdtree_node is None:
|
||||
return KDTree.node(ckdtree_node)
|
||||
elif ckdtree_node.split_dim == -1:
|
||||
return KDTree.leafnode(ckdtree_node)
|
||||
else:
|
||||
return KDTree.innernode(ckdtree_node)
|
||||
|
||||
def __init__(self, ckdtree_node=None):
|
||||
if ckdtree_node is None:
|
||||
ckdtree_node = cKDTreeNode()
|
||||
self._node = ckdtree_node
|
||||
|
||||
def __lt__(self, other):
|
||||
return id(self) < id(other)
|
||||
|
||||
def __gt__(self, other):
|
||||
return id(self) > id(other)
|
||||
|
||||
def __le__(self, other):
|
||||
return id(self) <= id(other)
|
||||
|
||||
def __ge__(self, other):
|
||||
return id(self) >= id(other)
|
||||
|
||||
def __eq__(self, other):
|
||||
return id(self) == id(other)
|
||||
|
||||
class leafnode(node):
|
||||
@property
|
||||
def idx(self):
|
||||
return self._node.indices
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
return self._node.children
|
||||
|
||||
class innernode(node):
|
||||
def __init__(self, ckdtreenode):
|
||||
assert isinstance(ckdtreenode, cKDTreeNode)
|
||||
super().__init__(ckdtreenode)
|
||||
self.less = KDTree.node._create(ckdtreenode.lesser)
|
||||
self.greater = KDTree.node._create(ckdtreenode.greater)
|
||||
|
||||
@property
|
||||
def split_dim(self):
|
||||
return self._node.split_dim
|
||||
|
||||
@property
|
||||
def split(self):
|
||||
return self._node.split
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
return self._node.children
|
||||
|
||||
@property
|
||||
def tree(self):
|
||||
if not hasattr(self, "_tree"):
|
||||
self._tree = KDTree.node._create(super().tree)
|
||||
|
||||
return self._tree
|
||||
|
||||
def __init__(self, data, leafsize=10, compact_nodes=True, copy_data=False,
|
||||
balanced_tree=True, boxsize=None):
|
||||
data = np.asarray(data)
|
||||
if data.dtype.kind == 'c':
|
||||
raise TypeError("KDTree does not work with complex data")
|
||||
|
||||
# Note KDTree has different default leafsize from cKDTree
|
||||
super().__init__(data, leafsize, compact_nodes, copy_data,
|
||||
balanced_tree, boxsize)
|
||||
|
||||
def query(
|
||||
self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf, workers=1):
|
||||
r"""Query the kd-tree for nearest neighbors.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, last dimension self.m
|
||||
An array of points to query.
|
||||
k : int or Sequence[int], optional
|
||||
Either the number of nearest neighbors to return, or a list of the
|
||||
k-th nearest neighbors to return, starting from 1.
|
||||
eps : nonnegative float, optional
|
||||
Return approximate nearest neighbors; the kth returned value
|
||||
is guaranteed to be no further than (1+eps) times the
|
||||
distance to the real kth nearest neighbor.
|
||||
p : float, 1<=p<=infinity, optional
|
||||
Which Minkowski p-norm to use.
|
||||
1 is the sum-of-absolute-values distance ("Manhattan" distance).
|
||||
2 is the usual Euclidean distance.
|
||||
infinity is the maximum-coordinate-difference distance.
|
||||
A large, finite p may cause a ValueError if overflow can occur.
|
||||
distance_upper_bound : nonnegative float, optional
|
||||
Return only neighbors within this distance. This is used to prune
|
||||
tree searches, so if you are doing a series of nearest-neighbor
|
||||
queries, it may help to supply the distance to the nearest neighbor
|
||||
of the most recent point.
|
||||
workers : int, optional
|
||||
Number of workers to use for parallel processing. If -1 is given
|
||||
all CPU threads are used. Default: 1.
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
d : float or array of floats
|
||||
The distances to the nearest neighbors.
|
||||
If ``x`` has shape ``tuple+(self.m,)``, then ``d`` has shape
|
||||
``tuple+(k,)``.
|
||||
When k == 1, the last dimension of the output is squeezed.
|
||||
Missing neighbors are indicated with infinite distances.
|
||||
Hits are sorted by distance (nearest first).
|
||||
|
||||
.. versionchanged:: 1.9.0
|
||||
Previously if ``k=None``, then `d` was an object array of
|
||||
shape ``tuple``, containing lists of distances. This behavior
|
||||
has been removed, use `query_ball_point` instead.
|
||||
|
||||
i : integer or array of integers
|
||||
The index of each neighbor in ``self.data``.
|
||||
``i`` is the same shape as d.
|
||||
Missing neighbors are indicated with ``self.n``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.spatial import KDTree
|
||||
>>> x, y = np.mgrid[0:5, 2:8]
|
||||
>>> tree = KDTree(np.c_[x.ravel(), y.ravel()])
|
||||
|
||||
To query the nearest neighbours and return squeezed result, use
|
||||
|
||||
>>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=1)
|
||||
>>> print(dd, ii, sep='\n')
|
||||
[2. 0.2236068]
|
||||
[ 0 13]
|
||||
|
||||
To query the nearest neighbours and return unsqueezed result, use
|
||||
|
||||
>>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[1])
|
||||
>>> print(dd, ii, sep='\n')
|
||||
[[2. ]
|
||||
[0.2236068]]
|
||||
[[ 0]
|
||||
[13]]
|
||||
|
||||
To query the second nearest neighbours and return unsqueezed result,
|
||||
use
|
||||
|
||||
>>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[2])
|
||||
>>> print(dd, ii, sep='\n')
|
||||
[[2.23606798]
|
||||
[0.80622577]]
|
||||
[[ 6]
|
||||
[19]]
|
||||
|
||||
To query the first and second nearest neighbours, use
|
||||
|
||||
>>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=2)
|
||||
>>> print(dd, ii, sep='\n')
|
||||
[[2. 2.23606798]
|
||||
[0.2236068 0.80622577]]
|
||||
[[ 0 6]
|
||||
[13 19]]
|
||||
|
||||
or, be more specific
|
||||
|
||||
>>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[1, 2])
|
||||
>>> print(dd, ii, sep='\n')
|
||||
[[2. 2.23606798]
|
||||
[0.2236068 0.80622577]]
|
||||
[[ 0 6]
|
||||
[13 19]]
|
||||
|
||||
"""
|
||||
x = np.asarray(x)
|
||||
if x.dtype.kind == 'c':
|
||||
raise TypeError("KDTree does not work with complex data")
|
||||
|
||||
if k is None:
|
||||
raise ValueError("k must be an integer or a sequence of integers")
|
||||
|
||||
d, i = super().query(x, k, eps, p, distance_upper_bound, workers)
|
||||
if isinstance(i, int):
|
||||
i = np.intp(i)
|
||||
return d, i
|
||||
|
||||
def query_ball_point(self, x, r, p=2., eps=0, workers=1,
|
||||
return_sorted=None, return_length=False):
|
||||
"""Find all points within distance r of point(s) x.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, shape tuple + (self.m,)
|
||||
The point or points to search for neighbors of.
|
||||
r : array_like, float
|
||||
The radius of points to return, must broadcast to the length of x.
|
||||
p : float, optional
|
||||
Which Minkowski p-norm to use. Should be in the range [1, inf].
|
||||
A finite large p may cause a ValueError if overflow can occur.
|
||||
eps : nonnegative float, optional
|
||||
Approximate search. Branches of the tree are not explored if their
|
||||
nearest points are further than ``r / (1 + eps)``, and branches are
|
||||
added in bulk if their furthest points are nearer than
|
||||
``r * (1 + eps)``.
|
||||
workers : int, optional
|
||||
Number of jobs to schedule for parallel processing. If -1 is given
|
||||
all processors are used. Default: 1.
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
return_sorted : bool, optional
|
||||
Sorts returned indices if True and does not sort them if False. If
|
||||
None, does not sort single point queries, but does sort
|
||||
multi-point queries which was the behavior before this option
|
||||
was added.
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
return_length : bool, optional
|
||||
Return the number of points inside the radius instead of a list
|
||||
of the indices.
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
results : list or array of lists
|
||||
If `x` is a single point, returns a list of the indices of the
|
||||
neighbors of `x`. If `x` is an array of points, returns an object
|
||||
array of shape tuple containing lists of neighbors.
|
||||
|
||||
Notes
|
||||
-----
|
||||
If you have many points whose neighbors you want to find, you may save
|
||||
substantial amounts of time by putting them in a KDTree and using
|
||||
query_ball_tree.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from scipy import spatial
|
||||
>>> x, y = np.mgrid[0:5, 0:5]
|
||||
>>> points = np.c_[x.ravel(), y.ravel()]
|
||||
>>> tree = spatial.KDTree(points)
|
||||
>>> sorted(tree.query_ball_point([2, 0], 1))
|
||||
[5, 10, 11, 15]
|
||||
|
||||
Query multiple points and plot the results:
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> points = np.asarray(points)
|
||||
>>> plt.plot(points[:,0], points[:,1], '.')
|
||||
>>> for results in tree.query_ball_point(([2, 0], [3, 3]), 1):
|
||||
... nearby_points = points[results]
|
||||
... plt.plot(nearby_points[:,0], nearby_points[:,1], 'o')
|
||||
>>> plt.margins(0.1, 0.1)
|
||||
>>> plt.show()
|
||||
|
||||
"""
|
||||
x = np.asarray(x)
|
||||
if x.dtype.kind == 'c':
|
||||
raise TypeError("KDTree does not work with complex data")
|
||||
return super().query_ball_point(
|
||||
x, r, p, eps, workers, return_sorted, return_length)
|
||||
|
||||
def query_ball_tree(self, other, r, p=2., eps=0):
|
||||
"""
|
||||
Find all pairs of points between `self` and `other` whose distance is
|
||||
at most r.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : KDTree instance
|
||||
The tree containing points to search against.
|
||||
r : float
|
||||
The maximum distance, has to be positive.
|
||||
p : float, optional
|
||||
Which Minkowski norm to use. `p` has to meet the condition
|
||||
``1 <= p <= infinity``.
|
||||
eps : float, optional
|
||||
Approximate search. Branches of the tree are not explored
|
||||
if their nearest points are further than ``r/(1+eps)``, and
|
||||
branches are added in bulk if their furthest points are nearer
|
||||
than ``r * (1+eps)``. `eps` has to be non-negative.
|
||||
|
||||
Returns
|
||||
-------
|
||||
results : list of lists
|
||||
For each element ``self.data[i]`` of this tree, ``results[i]`` is a
|
||||
list of the indices of its neighbors in ``other.data``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
You can search all pairs of points between two kd-trees within a distance:
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> import numpy as np
|
||||
>>> from scipy.spatial import KDTree
|
||||
>>> rng = np.random.default_rng()
|
||||
>>> points1 = rng.random((15, 2))
|
||||
>>> points2 = rng.random((15, 2))
|
||||
>>> plt.figure(figsize=(6, 6))
|
||||
>>> plt.plot(points1[:, 0], points1[:, 1], "xk", markersize=14)
|
||||
>>> plt.plot(points2[:, 0], points2[:, 1], "og", markersize=14)
|
||||
>>> kd_tree1 = KDTree(points1)
|
||||
>>> kd_tree2 = KDTree(points2)
|
||||
>>> indexes = kd_tree1.query_ball_tree(kd_tree2, r=0.2)
|
||||
>>> for i in range(len(indexes)):
|
||||
... for j in indexes[i]:
|
||||
... plt.plot([points1[i, 0], points2[j, 0]],
|
||||
... [points1[i, 1], points2[j, 1]], "-r")
|
||||
>>> plt.show()
|
||||
|
||||
"""
|
||||
return super().query_ball_tree(other, r, p, eps)
|
||||
|
||||
def query_pairs(self, r, p=2., eps=0, output_type='set'):
|
||||
"""Find all pairs of points in `self` whose distance is at most r.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
r : positive float
|
||||
The maximum distance.
|
||||
p : float, optional
|
||||
Which Minkowski norm to use. `p` has to meet the condition
|
||||
``1 <= p <= infinity``.
|
||||
eps : float, optional
|
||||
Approximate search. Branches of the tree are not explored
|
||||
if their nearest points are further than ``r/(1+eps)``, and
|
||||
branches are added in bulk if their furthest points are nearer
|
||||
than ``r * (1+eps)``. `eps` has to be non-negative.
|
||||
output_type : string, optional
|
||||
Choose the output container, 'set' or 'ndarray'. Default: 'set'
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
results : set or ndarray
|
||||
Set of pairs ``(i,j)``, with ``i < j``, for which the corresponding
|
||||
positions are close. If output_type is 'ndarray', an ndarry is
|
||||
returned instead of a set.
|
||||
|
||||
Examples
|
||||
--------
|
||||
You can search all pairs of points in a kd-tree within a distance:
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> import numpy as np
|
||||
>>> from scipy.spatial import KDTree
|
||||
>>> rng = np.random.default_rng()
|
||||
>>> points = rng.random((20, 2))
|
||||
>>> plt.figure(figsize=(6, 6))
|
||||
>>> plt.plot(points[:, 0], points[:, 1], "xk", markersize=14)
|
||||
>>> kd_tree = KDTree(points)
|
||||
>>> pairs = kd_tree.query_pairs(r=0.2)
|
||||
>>> for (i, j) in pairs:
|
||||
... plt.plot([points[i, 0], points[j, 0]],
|
||||
... [points[i, 1], points[j, 1]], "-r")
|
||||
>>> plt.show()
|
||||
|
||||
"""
|
||||
return super().query_pairs(r, p, eps, output_type)
|
||||
|
||||
def count_neighbors(self, other, r, p=2., weights=None, cumulative=True):
|
||||
"""Count how many nearby pairs can be formed.
|
||||
|
||||
Count the number of pairs ``(x1,x2)`` can be formed, with ``x1`` drawn
|
||||
from ``self`` and ``x2`` drawn from ``other``, and where
|
||||
``distance(x1, x2, p) <= r``.
|
||||
|
||||
Data points on ``self`` and ``other`` are optionally weighted by the
|
||||
``weights`` argument. (See below)
|
||||
|
||||
This is adapted from the "two-point correlation" algorithm described by
|
||||
Gray and Moore [1]_. See notes for further discussion.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : KDTree
|
||||
The other tree to draw points from, can be the same tree as self.
|
||||
r : float or one-dimensional array of floats
|
||||
The radius to produce a count for. Multiple radii are searched with
|
||||
a single tree traversal.
|
||||
If the count is non-cumulative(``cumulative=False``), ``r`` defines
|
||||
the edges of the bins, and must be non-decreasing.
|
||||
p : float, optional
|
||||
1<=p<=infinity.
|
||||
Which Minkowski p-norm to use.
|
||||
Default 2.0.
|
||||
A finite large p may cause a ValueError if overflow can occur.
|
||||
weights : tuple, array_like, or None, optional
|
||||
If None, the pair-counting is unweighted.
|
||||
If given as a tuple, weights[0] is the weights of points in
|
||||
``self``, and weights[1] is the weights of points in ``other``;
|
||||
either can be None to indicate the points are unweighted.
|
||||
If given as an array_like, weights is the weights of points in
|
||||
``self`` and ``other``. For this to make sense, ``self`` and
|
||||
``other`` must be the same tree. If ``self`` and ``other`` are two
|
||||
different trees, a ``ValueError`` is raised.
|
||||
Default: None
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
cumulative : bool, optional
|
||||
Whether the returned counts are cumulative. When cumulative is set
|
||||
to ``False`` the algorithm is optimized to work with a large number
|
||||
of bins (>10) specified by ``r``. When ``cumulative`` is set to
|
||||
True, the algorithm is optimized to work with a small number of
|
||||
``r``. Default: True
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : scalar or 1-D array
|
||||
The number of pairs. For unweighted counts, the result is integer.
|
||||
For weighted counts, the result is float.
|
||||
If cumulative is False, ``result[i]`` contains the counts with
|
||||
``(-inf if i == 0 else r[i-1]) < R <= r[i]``
|
||||
|
||||
Notes
|
||||
-----
|
||||
Pair-counting is the basic operation used to calculate the two point
|
||||
correlation functions from a data set composed of position of objects.
|
||||
|
||||
Two point correlation function measures the clustering of objects and
|
||||
is widely used in cosmology to quantify the large scale structure
|
||||
in our Universe, but it may be useful for data analysis in other fields
|
||||
where self-similar assembly of objects also occur.
|
||||
|
||||
The Landy-Szalay estimator for the two point correlation function of
|
||||
``D`` measures the clustering signal in ``D``. [2]_
|
||||
|
||||
For example, given the position of two sets of objects,
|
||||
|
||||
- objects ``D`` (data) contains the clustering signal, and
|
||||
|
||||
- objects ``R`` (random) that contains no signal,
|
||||
|
||||
.. math::
|
||||
|
||||
\\xi(r) = \\frac{<D, D> - 2 f <D, R> + f^2<R, R>}{f^2<R, R>},
|
||||
|
||||
where the brackets represents counting pairs between two data sets
|
||||
in a finite bin around ``r`` (distance), corresponding to setting
|
||||
`cumulative=False`, and ``f = float(len(D)) / float(len(R))`` is the
|
||||
ratio between number of objects from data and random.
|
||||
|
||||
The algorithm implemented here is loosely based on the dual-tree
|
||||
algorithm described in [1]_. We switch between two different
|
||||
pair-cumulation scheme depending on the setting of ``cumulative``.
|
||||
The computing time of the method we use when for
|
||||
``cumulative == False`` does not scale with the total number of bins.
|
||||
The algorithm for ``cumulative == True`` scales linearly with the
|
||||
number of bins, though it is slightly faster when only
|
||||
1 or 2 bins are used. [5]_.
|
||||
|
||||
As an extension to the naive pair-counting,
|
||||
weighted pair-counting counts the product of weights instead
|
||||
of number of pairs.
|
||||
Weighted pair-counting is used to estimate marked correlation functions
|
||||
([3]_, section 2.2),
|
||||
or to properly calculate the average of data per distance bin
|
||||
(e.g. [4]_, section 2.1 on redshift).
|
||||
|
||||
.. [1] Gray and Moore,
|
||||
"N-body problems in statistical learning",
|
||||
Mining the sky, 2000,
|
||||
https://arxiv.org/abs/astro-ph/0012333
|
||||
|
||||
.. [2] Landy and Szalay,
|
||||
"Bias and variance of angular correlation functions",
|
||||
The Astrophysical Journal, 1993,
|
||||
http://adsabs.harvard.edu/abs/1993ApJ...412...64L
|
||||
|
||||
.. [3] Sheth, Connolly and Skibba,
|
||||
"Marked correlations in galaxy formation models",
|
||||
Arxiv e-print, 2005,
|
||||
https://arxiv.org/abs/astro-ph/0511773
|
||||
|
||||
.. [4] Hawkins, et al.,
|
||||
"The 2dF Galaxy Redshift Survey: correlation functions,
|
||||
peculiar velocities and the matter density of the Universe",
|
||||
Monthly Notices of the Royal Astronomical Society, 2002,
|
||||
http://adsabs.harvard.edu/abs/2003MNRAS.346...78H
|
||||
|
||||
.. [5] https://github.com/scipy/scipy/pull/5647#issuecomment-168474926
|
||||
|
||||
Examples
|
||||
--------
|
||||
You can count neighbors number between two kd-trees within a distance:
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.spatial import KDTree
|
||||
>>> rng = np.random.default_rng()
|
||||
>>> points1 = rng.random((5, 2))
|
||||
>>> points2 = rng.random((5, 2))
|
||||
>>> kd_tree1 = KDTree(points1)
|
||||
>>> kd_tree2 = KDTree(points2)
|
||||
>>> kd_tree1.count_neighbors(kd_tree2, 0.2)
|
||||
1
|
||||
|
||||
This number is same as the total pair number calculated by
|
||||
`query_ball_tree`:
|
||||
|
||||
>>> indexes = kd_tree1.query_ball_tree(kd_tree2, r=0.2)
|
||||
>>> sum([len(i) for i in indexes])
|
||||
1
|
||||
|
||||
"""
|
||||
return super().count_neighbors(other, r, p, weights, cumulative)
|
||||
|
||||
def sparse_distance_matrix(
|
||||
self, other, max_distance, p=2., output_type='dok_matrix'):
|
||||
"""Compute a sparse distance matrix.
|
||||
|
||||
Computes a distance matrix between two KDTrees, leaving as zero
|
||||
any distance greater than max_distance.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : KDTree
|
||||
|
||||
max_distance : positive float
|
||||
|
||||
p : float, 1<=p<=infinity
|
||||
Which Minkowski p-norm to use.
|
||||
A finite large p may cause a ValueError if overflow can occur.
|
||||
|
||||
output_type : string, optional
|
||||
Which container to use for output data. Options: 'dok_matrix',
|
||||
'coo_matrix', 'dict', or 'ndarray'. Default: 'dok_matrix'.
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : dok_matrix, coo_matrix, dict or ndarray
|
||||
Sparse matrix representing the results in "dictionary of keys"
|
||||
format. If a dict is returned the keys are (i,j) tuples of indices.
|
||||
If output_type is 'ndarray' a record array with fields 'i', 'j',
|
||||
and 'v' is returned,
|
||||
|
||||
Examples
|
||||
--------
|
||||
You can compute a sparse distance matrix between two kd-trees:
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.spatial import KDTree
|
||||
>>> rng = np.random.default_rng()
|
||||
>>> points1 = rng.random((5, 2))
|
||||
>>> points2 = rng.random((5, 2))
|
||||
>>> kd_tree1 = KDTree(points1)
|
||||
>>> kd_tree2 = KDTree(points2)
|
||||
>>> sdm = kd_tree1.sparse_distance_matrix(kd_tree2, 0.3)
|
||||
>>> sdm.toarray()
|
||||
array([[0. , 0. , 0.12295571, 0. , 0. ],
|
||||
[0. , 0. , 0. , 0. , 0. ],
|
||||
[0.28942611, 0. , 0. , 0.2333084 , 0. ],
|
||||
[0. , 0. , 0. , 0. , 0. ],
|
||||
[0.24617575, 0.29571802, 0.26836782, 0. , 0. ]])
|
||||
|
||||
You can check distances above the `max_distance` are zeros:
|
||||
|
||||
>>> from scipy.spatial import distance_matrix
|
||||
>>> distance_matrix(points1, points2)
|
||||
array([[0.56906522, 0.39923701, 0.12295571, 0.8658745 , 0.79428925],
|
||||
[0.37327919, 0.7225693 , 0.87665969, 0.32580855, 0.75679479],
|
||||
[0.28942611, 0.30088013, 0.6395831 , 0.2333084 , 0.33630734],
|
||||
[0.31994999, 0.72658602, 0.71124834, 0.55396483, 0.90785663],
|
||||
[0.24617575, 0.29571802, 0.26836782, 0.57714465, 0.6473269 ]])
|
||||
|
||||
"""
|
||||
return super().sparse_distance_matrix(
|
||||
other, max_distance, p, output_type)
|
||||
|
||||
|
||||
def distance_matrix(x, y, p=2, threshold=1000000):
|
||||
"""Compute the distance matrix.
|
||||
|
||||
Returns the matrix of all pair-wise distances.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : (M, K) array_like
|
||||
Matrix of M vectors in K dimensions.
|
||||
y : (N, K) array_like
|
||||
Matrix of N vectors in K dimensions.
|
||||
p : float, 1 <= p <= infinity
|
||||
Which Minkowski p-norm to use.
|
||||
threshold : positive int
|
||||
If ``M * N * K`` > `threshold`, algorithm uses a Python loop instead
|
||||
of large temporary arrays.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : (M, N) ndarray
|
||||
Matrix containing the distance from every vector in `x` to every vector
|
||||
in `y`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.spatial import distance_matrix
|
||||
>>> distance_matrix([[0,0],[0,1]], [[1,0],[1,1]])
|
||||
array([[ 1. , 1.41421356],
|
||||
[ 1.41421356, 1. ]])
|
||||
|
||||
"""
|
||||
|
||||
x = np.asarray(x)
|
||||
m, k = x.shape
|
||||
y = np.asarray(y)
|
||||
n, kk = y.shape
|
||||
|
||||
if k != kk:
|
||||
raise ValueError(f"x contains {k}-dimensional vectors but y contains "
|
||||
f"{kk}-dimensional vectors")
|
||||
|
||||
if m*n*k <= threshold:
|
||||
return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)
|
||||
else:
|
||||
result = np.empty((m,n),dtype=float) # FIXME: figure out the best dtype
|
||||
if m < n:
|
||||
for i in range(m):
|
||||
result[i,:] = minkowski_distance(x[i],y,p)
|
||||
else:
|
||||
for j in range(n):
|
||||
result[:,j] = minkowski_distance(x,y[j],p)
|
||||
return result
|
Loading…
Add table
Add a link
Reference in a new issue