Adding all project files
This commit is contained in:
parent
6c9e127bdc
commit
cd4316ad0f
42289 changed files with 8009643 additions and 0 deletions
147
venv/Lib/site-packages/sklearn/linear_model/tests/test_common.py
Normal file
147
venv/Lib/site-packages/sklearn/linear_model/tests/test_common.py
Normal file
|
@ -0,0 +1,147 @@
|
|||
# License: BSD 3 clause
|
||||
|
||||
import inspect
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.base import is_classifier
|
||||
from sklearn.datasets import make_low_rank_matrix
|
||||
from sklearn.linear_model import (
|
||||
ARDRegression,
|
||||
BayesianRidge,
|
||||
ElasticNet,
|
||||
ElasticNetCV,
|
||||
Lars,
|
||||
LarsCV,
|
||||
Lasso,
|
||||
LassoCV,
|
||||
LassoLarsCV,
|
||||
LassoLarsIC,
|
||||
LinearRegression,
|
||||
LogisticRegression,
|
||||
LogisticRegressionCV,
|
||||
MultiTaskElasticNet,
|
||||
MultiTaskElasticNetCV,
|
||||
MultiTaskLasso,
|
||||
MultiTaskLassoCV,
|
||||
OrthogonalMatchingPursuit,
|
||||
OrthogonalMatchingPursuitCV,
|
||||
PoissonRegressor,
|
||||
Ridge,
|
||||
RidgeCV,
|
||||
SGDRegressor,
|
||||
TweedieRegressor,
|
||||
)
|
||||
|
||||
|
||||
# Note: GammaRegressor() and TweedieRegressor(power != 1) have a non-canonical link.
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
ARDRegression(),
|
||||
BayesianRidge(),
|
||||
ElasticNet(),
|
||||
ElasticNetCV(),
|
||||
Lars(),
|
||||
LarsCV(),
|
||||
Lasso(),
|
||||
LassoCV(),
|
||||
LassoLarsCV(),
|
||||
LassoLarsIC(),
|
||||
LinearRegression(),
|
||||
# TODO: FIx SAGA which fails badly with sample_weights.
|
||||
# This is a known limitation, see:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/21305
|
||||
pytest.param(
|
||||
LogisticRegression(
|
||||
penalty="elasticnet", solver="saga", l1_ratio=0.5, tol=1e-15
|
||||
),
|
||||
marks=pytest.mark.xfail(reason="Missing importance sampling scheme"),
|
||||
),
|
||||
LogisticRegressionCV(),
|
||||
MultiTaskElasticNet(),
|
||||
MultiTaskElasticNetCV(),
|
||||
MultiTaskLasso(),
|
||||
MultiTaskLassoCV(),
|
||||
OrthogonalMatchingPursuit(),
|
||||
OrthogonalMatchingPursuitCV(),
|
||||
PoissonRegressor(),
|
||||
Ridge(),
|
||||
RidgeCV(),
|
||||
pytest.param(
|
||||
SGDRegressor(tol=1e-15),
|
||||
marks=pytest.mark.xfail(reason="Unsufficient precision."),
|
||||
),
|
||||
SGDRegressor(penalty="elasticnet", max_iter=10_000),
|
||||
TweedieRegressor(power=0), # same as Ridge
|
||||
],
|
||||
ids=lambda x: x.__class__.__name__,
|
||||
)
|
||||
@pytest.mark.parametrize("with_sample_weight", [False, True])
|
||||
def test_balance_property(model, with_sample_weight, global_random_seed):
|
||||
# Test that sum(y_predicted) == sum(y_observed) on the training set.
|
||||
# This must hold for all linear models with deviance of an exponential disperson
|
||||
# family as loss and the corresponding canonical link if fit_intercept=True.
|
||||
# Examples:
|
||||
# - squared error and identity link (most linear models)
|
||||
# - Poisson deviance with log link
|
||||
# - log loss with logit link
|
||||
# This is known as balance property or unconditional calibration/unbiasedness.
|
||||
# For reference, see Corollary 3.18, 3.20 and Chapter 5.1.5 of
|
||||
# M.V. Wuthrich and M. Merz, "Statistical Foundations of Actuarial Learning and its
|
||||
# Applications" (June 3, 2022). http://doi.org/10.2139/ssrn.3822407
|
||||
|
||||
if (
|
||||
with_sample_weight
|
||||
and "sample_weight" not in inspect.signature(model.fit).parameters.keys()
|
||||
):
|
||||
pytest.skip("Estimator does not support sample_weight.")
|
||||
|
||||
rel = 2e-4 # test precision
|
||||
if isinstance(model, SGDRegressor):
|
||||
rel = 1e-1
|
||||
elif hasattr(model, "solver") and model.solver == "saga":
|
||||
rel = 1e-2
|
||||
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_train, n_features, n_targets = 100, 10, None
|
||||
if isinstance(
|
||||
model,
|
||||
(MultiTaskElasticNet, MultiTaskElasticNetCV, MultiTaskLasso, MultiTaskLassoCV),
|
||||
):
|
||||
n_targets = 3
|
||||
X = make_low_rank_matrix(n_samples=n_train, n_features=n_features, random_state=rng)
|
||||
if n_targets:
|
||||
coef = (
|
||||
rng.uniform(low=-2, high=2, size=(n_features, n_targets))
|
||||
/ np.max(X, axis=0)[:, None]
|
||||
)
|
||||
else:
|
||||
coef = rng.uniform(low=-2, high=2, size=n_features) / np.max(X, axis=0)
|
||||
|
||||
expectation = np.exp(X @ coef + 0.5)
|
||||
y = rng.poisson(lam=expectation) + 1 # strict positive, i.e. y > 0
|
||||
if is_classifier(model):
|
||||
y = (y > expectation + 1).astype(np.float64)
|
||||
|
||||
if with_sample_weight:
|
||||
sw = rng.uniform(low=1, high=10, size=y.shape[0])
|
||||
else:
|
||||
sw = None
|
||||
|
||||
model.set_params(fit_intercept=True) # to be sure
|
||||
if with_sample_weight:
|
||||
model.fit(X, y, sample_weight=sw)
|
||||
else:
|
||||
model.fit(X, y)
|
||||
|
||||
# Assert balance property.
|
||||
if is_classifier(model):
|
||||
assert np.average(model.predict_proba(X)[:, 1], weights=sw) == pytest.approx(
|
||||
np.average(y, weights=sw), rel=rel
|
||||
)
|
||||
else:
|
||||
assert np.average(model.predict(X), weights=sw, axis=0) == pytest.approx(
|
||||
np.average(y, weights=sw, axis=0), rel=rel
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue