Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

class JackknifeAB restricted to a smaller class Resample and argument… #96

Merged
merged 8 commits into from
Oct 12, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
.DS_Store

# C extensions
*.so
Expand All @@ -13,7 +14,12 @@ doc/examples_regression/
doc/auto_examples/
doc/modules/generated/
doc/datasets/generated/
<<<<<<< HEAD
doc/examples_regression/
doc/examples_classification/
=======
doc/generated/
>>>>>>> dev

# Distribution / packaging

Expand Down
Binary file modified doc/images/quickstart_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
72 changes: 33 additions & 39 deletions examples/regression/plot_barber2020_simulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,20 @@
"Predictive inference with the jackknife+."
Ann. Statist., 49(1):486–507, February 2021.
"""
from typing import List, Dict, Any
from typing import Any, Dict, List

import numpy as np
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt

from mapie.regression import MapieRegressor
from mapie.metrics import regression_coverage_score
from mapie.regression import MapieRegressor
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression


def PIs_vs_dimensions(
strategies: Dict[str, Any],
alpha: float,
n_trial: int,
dimensions: List[int]
dimensions: List[int],
) -> Dict[str, Dict[int, Dict[str, np.ndarray]]]:
"""
Compute the prediction intervals for a linear regression problem.
Expand Down Expand Up @@ -89,15 +88,17 @@ def PIs_vs_dimensions(
strategy: {
dimension: {
"coverage": np.empty(n_trial),
"width_mean": np.empty(n_trial)
} for dimension in dimensions
} for strategy in strategies
"width_mean": np.empty(n_trial),
}
for dimension in dimensions
}
for strategy in strategies
}
for dimension in dimensions:
for trial in range(n_trial):
beta = np.random.normal(size=dimension)
beta_norm = np.sqrt((beta**2).sum())
beta = beta/beta_norm*np.sqrt(SNR)
beta_norm = np.sqrt((beta ** 2).sum())
beta = beta / beta_norm * np.sqrt(SNR)
X_train = np.random.normal(size=(n_train, dimension))
noise_train = np.random.normal(size=n_train)
noise_test = np.random.normal(size=n_test)
Expand All @@ -108,26 +109,23 @@ def PIs_vs_dimensions(
for strategy, params in strategies.items():
mapie = MapieRegressor(
LinearRegression(),
ensemble=True,
agg_function="median",
n_jobs=-1,
**params
)
mapie.fit(X_train, y_train)
y_pred, y_pis = mapie.predict(X_test, alpha=alpha)
results[strategy][dimension]["coverage"][trial] = (
regression_coverage_score(
y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
)
coverage = regression_coverage_score(
y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
)
results[strategy][dimension]["width_mean"][trial] = (
y_pis[:, 1, 0] - y_pis[:, 0, 0]
).mean()
results[strategy][dimension]["coverage"][trial] = coverage
width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()
results[strategy][dimension]["width_mean"][trial] = width_mean
return results


def plot_simulation_results(
results: Dict[str, Dict[int, Dict[str, np.ndarray]]],
title: str
results: Dict[str, Dict[int, Dict[str, np.ndarray]]], title: str
) -> None:
"""
Show the prediction interval coverages and widths as a function
Expand All @@ -148,35 +146,31 @@ def plot_simulation_results(
for strategy in results:
dimensions = list(results[strategy].keys())
n_dim = len(dimensions)
coverage_mean, coverage_SE, width_mean, width_SE = (
np.zeros(n_dim), np.zeros(n_dim), np.zeros(n_dim), np.zeros(n_dim)
)
coverage_mean = np.zeros(n_dim)
coverage_SE = np.zeros(n_dim)
width_mean = np.zeros(n_dim)
width_SE = np.zeros(n_dim)

for idim, dim in enumerate(dimensions):
coverage_mean[idim] = (
results[strategy][dim]["coverage"].mean()
)
coverage_SE[idim] = (
results[strategy][dim]["coverage"].std()/np.sqrt(ntrial)
)
width_mean[idim] = (
results[strategy][dim]["width_mean"].mean()
)
width_SE[idim] = (
results[strategy][dim]["width_mean"].std()/np.sqrt(ntrial)
)
coverage = results[strategy][dim]["coverage"]
coverage_mean[idim] = coverage.mean()
coverage_SE[idim] = coverage.std() / np.sqrt(ntrial)
width = results[strategy][dim]["width_mean"]
width_mean[idim] = width.mean()
width_SE[idim] = width.std() / np.sqrt(ntrial)
ax1.plot(dimensions, coverage_mean, label=strategy)
ax1.fill_between(
dimensions,
coverage_mean - coverage_SE,
coverage_mean + coverage_SE,
alpha=0.25
alpha=0.25,
)
ax2.plot(dimensions, width_mean, label=strategy)
ax2.fill_between(
dimensions,
width_mean - width_SE,
width_mean + width_SE,
alpha=0.25
alpha=0.25,
)
ax1.axhline(1 - alpha, linestyle="dashed", c="k")
ax1.set_ylim(0.0, 1.0)
Expand All @@ -192,7 +186,7 @@ def plot_simulation_results(
STRATEGIES = {
"naive": dict(method="naive"),
"cv": dict(method="base", cv=5),
"cv_plus": dict(method="plus", cv=5)
"cv_plus": dict(method="plus", cv=5),
}
alpha = 0.1
ntrial = 3
Expand Down
42 changes: 22 additions & 20 deletions examples/regression/plot_both_uncertainties.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,27 @@
prediction intervals capturing both aleatoric and epistemic uncertainties
on a one-dimensional dataset with homoscedastic noise and normal sampling.
"""
from typing import Tuple, Any, TypeVar, Callable
from typing_extensions import TypedDict
import numpy as np
from typing import Any, Callable, Tuple, TypeVar

import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
import numpy as np
from mapie.regression import MapieRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from mapie.regression import MapieRegressor
from typing_extensions import TypedDict

F = TypeVar("F", bound=Callable[..., Any])


# Functions for generating our dataset
def x_sinx(x: np.ndarray) -> Any:
"""One-dimensional x*sin(x) function."""
return x*np.sin(x)
return x * np.sin(x)


def get_1d_data_with_normal_distrib(
funct: F,
mu: float,
sigma: float,
n_samples: int,
noise: float
funct: F, mu: float, sigma: float, n_samples: int, noise: float
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""
Generate noisy 1D data with normal distribution from given function
Expand Down Expand Up @@ -59,12 +57,16 @@ def get_1d_data_with_normal_distrib(
"""
np.random.seed(42)
X_train = np.random.normal(mu, sigma, n_samples)
X_test = np.arange(mu - 4*sigma, mu + 4*sigma, sigma/20.)
X_test = np.arange(mu - 4 * sigma, mu + 4 * sigma, sigma / 20.0)
y_train, y_mesh, y_test = funct(X_train), funct(X_test), funct(X_test)
y_train += np.random.normal(0, noise, y_train.shape[0])
y_test += np.random.normal(0, noise, y_test.shape[0])
return (
X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh
X_train.reshape(-1, 1),
y_train,
X_test.reshape(-1, 1),
y_test,
y_mesh,
)


Expand All @@ -79,7 +81,7 @@ def get_1d_data_with_normal_distrib(
polyn_model = Pipeline(
[
("poly", PolynomialFeatures(degree=degree_polyn)),
("linear", LinearRegression())
("linear", LinearRegression()),
]
)

Expand All @@ -93,7 +95,7 @@ def get_1d_data_with_normal_distrib(
}
y_pred, y_pis = {}, {}
for strategy, params in STRATEGIES.items():
mapie = MapieRegressor(polyn_model, ensemble=False, **params)
mapie = MapieRegressor(polyn_model, **params)
mapie.fit(X_train, y_train)
y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)

Expand All @@ -109,12 +111,12 @@ def plot_1d_data(
y_pred_low: np.ndarray,
y_pred_up: np.ndarray,
ax: plt.Axes,
title: str
title: str,
) -> None:
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_xlim([-10, 10])
ax.set_ylim([np.min(y_test)*1.3, np.max(y_test)*1.3])
ax.set_ylim([np.min(y_test) * 1.3, np.max(y_test) * 1.3])
ax.fill_between(X_test, y_pred_low, y_pred_up, alpha=0.3)
ax.scatter(X_train, y_train, color="red", alpha=0.3, label="Training data")
ax.plot(X_test, y_test, color="gray", label="True confidence intervals")
Expand All @@ -135,12 +137,12 @@ def plot_1d_data(
y_train.ravel(),
X_test.ravel(),
y_mesh.ravel(),
1.96*noise,
1.96 * noise,
y_pred[strategy].ravel(),
y_pis[strategy][:, 0, 0].ravel(),
y_pis[strategy][:, 1, 0].ravel(),
ax=coord,
title=strategy
title=strategy,
)


Expand All @@ -149,7 +151,7 @@ def plot_1d_data(
ax.set_ylim([0, 4])
for strategy in STRATEGIES:
ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0])
ax.axhline(1.96*2*noise, ls="--", color="k")
ax.axhline(1.96 * 2 * noise, ls="--", color="k")
ax.set_xlabel("x")
ax.set_ylabel("Prediction Interval Width")
ax.legend(list(STRATEGIES.keys()) + ["True width"], fontsize=8)
Expand Down
47 changes: 24 additions & 23 deletions examples/regression/plot_homoscedastic_1d_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,24 @@
different strategies.
"""
from typing import Tuple
from typing_extensions import TypedDict

import numpy as np
import scipy
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt

from mapie.regression import MapieRegressor
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from typing_extensions import TypedDict


def f(x: np.ndarray) -> np.ndarray:
"""Polynomial function used to generate one-dimensional data"""
return np.array(5*x + 5*x**4 - 9*x**2)
return np.array(5 * x + 5 * x ** 4 - 9 * x ** 2)


def get_homoscedastic_data(
n_train: int = 200,
n_true: int = 200,
sigma: float = 0.1
n_train: int = 200, n_true: int = 200, sigma: float = 0.1
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, float]:
"""
Generate one-dimensional data from a given function,
Expand Down Expand Up @@ -61,7 +58,7 @@ def get_homoscedastic_data(
X_true = np.linspace(0, 1, n_true)
y_train = f(X_train) + np.random.normal(0, sigma, n_train)
y_true = f(X_true)
y_true_sigma = q95*sigma
y_true_sigma = q95 * sigma
return X_train, y_train, X_true, y_true, y_true_sigma


Expand All @@ -75,7 +72,7 @@ def plot_1d_data(
y_pred_low: np.ndarray,
y_pred_up: np.ndarray,
ax: plt.Axes,
title: str
title: str,
) -> None:
"""
Generate a figure showing the training data and estimated
Expand Down Expand Up @@ -120,10 +117,12 @@ def plot_1d_data(

X_train, y_train, X_test, y_test, y_test_sigma = get_homoscedastic_data()

polyn_model = Pipeline([
("poly", PolynomialFeatures(degree=4)),
("linear", LinearRegression(fit_intercept=False))
])
polyn_model = Pipeline(
[
("poly", PolynomialFeatures(degree=4)),
("linear", LinearRegression(fit_intercept=False)),
]
)

Params = TypedDict("Params", {"method": str, "cv": int})
STRATEGIES = {
Expand All @@ -134,17 +133,19 @@ def plot_1d_data(
"cv_plus": Params(method="plus", cv=10),
"cv_minmax": Params(method="minmax", cv=10),
}
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, figsize=(3*6, 12))
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(
2, 3, figsize=(3 * 6, 12)
)
axs = [ax1, ax2, ax3, ax4, ax5, ax6]
for i, (strategy, params) in enumerate(STRATEGIES.items()):
mapie = MapieRegressor(
polyn_model,
ensemble=True,
n_jobs=-1,
**params
polyn_model, agg_function="median", n_jobs=-1, **params
)
mapie.fit(X_train.reshape(-1, 1), y_train)
y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=0.05,)
y_pred, y_pis = mapie.predict(
X_test.reshape(-1, 1),
alpha=0.05,
)
plot_1d_data(
X_train,
y_train,
Expand All @@ -155,6 +156,6 @@ def plot_1d_data(
y_pis[:, 0, 0],
y_pis[:, 1, 0],
axs[i],
strategy
strategy,
)
plt.show()
Loading