PK {{LX,! pygam/__init__.py"""
GAM toolkit
"""
from __future__ import absolute_import
from pygam.pygam import GAM
from pygam.pygam import LinearGAM
from pygam.pygam import LogisticGAM
from pygam.pygam import GammaGAM
from pygam.pygam import PoissonGAM
from pygam.pygam import InvGaussGAM
__all__ = ['GAM', 'LinearGAM', 'LogisticGAM', 'GammaGAM', 'PoissonGAM',
'InvGaussGAM']
__version__ = '0.4.1'
PK u*L% pygam/callbacks.py"""
CallBacks
"""
from __future__ import absolute_import
from functools import wraps
import numpy as np
from pygam.core import Core
def validate_callback_data(method):
"""
wraps a callback's method to pull the desired arguments from the vars dict
also checks to ensure the method's arguments are in the vars dict
Parameters
----------
method : callable
Returns
-------
validated callable
"""
@wraps(method)
def method_wrapper(*args, **kwargs):
"""
Parameters
----------
*args
**kwargs
Returns
-------
method's output
"""
expected = method.__code__.co_varnames
# rename curret gam object
if 'self' in kwargs:
gam = kwargs['self']
del(kwargs['self'])
kwargs['gam'] = gam
# loop once to check any missing
missing = []
for e in expected:
if e == 'self':
continue
if e not in kwargs:
missing.append(e)
assert len(missing) == 0, 'CallBack cannot reference: {}'.\
format(', '.join(missing))
# loop again to extract desired
kwargs_subset = {}
for e in expected:
if e == 'self':
continue
kwargs_subset[e] = kwargs[e]
return method(*args, **kwargs_subset)
return method_wrapper
def validate_callback(callback):
"""
validates a callback's on_loop_start and on_loop_end methods
Parameters
----------
callback : Callback object
Returns
-------
validated callback
"""
if not(hasattr(callback, '_validated')) or callback._validated == False:
assert hasattr(callback, 'on_loop_start') \
or hasattr(callback, 'on_loop_end'), \
'callback must have `on_loop_start` or `on_loop_end` method'
if hasattr(callback, 'on_loop_start'):
setattr(callback, 'on_loop_start',
validate_callback_data(callback.on_loop_start))
if hasattr(callback, 'on_loop_end'):
setattr(callback, 'on_loop_end',
validate_callback_data(callback.on_loop_end))
setattr(callback, '_validated', True)
return callback
class CallBack(Core):
"""CallBack class"""
def __init__(self, name=None):
"""
creates a CallBack instance
Parameters
----------
None
Returns
-------
None
"""
super(CallBack, self).__init__(name=name)
@validate_callback
class Deviance(CallBack):
"""Deviance CallBack class"""
def __init__(self):
"""
creates a Deviance CallBack instance
useful for capturing the Deviance of a model on training data
at each iteration
Parameters
----------
None
Returns
-------
None
"""
super(Deviance, self).__init__(name='deviance')
def on_loop_start(self, gam, y, mu):
"""
runs the method at loop start
Parameters
----------
gam : GAM instance
y : array-like of length n
target data
mu : array-like of length n
expected value data
Returns
-------
deviance : np.array of length n
"""
return gam.distribution.deviance(y=y, mu=mu, scaled=False).sum()
@validate_callback
class Accuracy(CallBack):
def __init__(self):
"""
creates an Accuracy CallBack instance
useful for capturing the accuracy of a model on training data
at each iteration
Parameters
----------
None
Returns
-------
None
"""
super(Accuracy, self).__init__(name='accuracy')
def on_loop_start(self, y, mu):
"""
runs the method at start of each optimization loop
Parameters
----------
y : array-like of length n
target data
mu : array-like of length n
expected value data
Returns
-------
accuracy : np.array of length n
"""
return np.mean(y == (mu>0.5))
@validate_callback
class Diffs(CallBack):
def __init__(self):
"""
creates a Diffs CallBack instance
useful for capturing the differences in model coefficient norms between
iterations
Parameters
----------
None
Returns
-------
None
"""
super(Diffs, self).__init__(name='diffs')
def on_loop_end(self, diff):
"""
runs the method at end of each optimization loop
Parameters
----------
diff : float
Returns
-------
diff : float
"""
return diff
@validate_callback
class Coef(CallBack):
def __init__(self):
"""
creates a Coef CallBack instance
useful for capturing the models coefficients at each iteration
Parameters
----------
None
Returns
-------
None
"""
super(Coef, self).__init__(name='coef')
def on_loop_start(self, gam):
"""
runs the method at start of each optimization loop
Parameters
----------
gam : float
Returns
-------
coef_ : list of floats
"""
return gam.coef_
PK u*LV7,M M
pygam/core.py"""
Core classes
"""
from __future__ import absolute_import
import numpy as np
from pygam.utils import round_to_n_decimal_places
def nice_repr(name, param_kvs, line_width=30, line_offset=5, decimals=3):
"""
tool to do a nice repr of a class.
Parameters
----------
name : str
class name
param_kvs : dict
dict containing class parameters names as keys,
and the corresponding values as values
line_width : int
desired maximum line width.
default: 30
line_offset : int
desired offset for new lines
default: 5
decimals : int
number of decimal places to keep for float values
default: 3
Returns
-------
out : str
nicely formatted repr of class instance
"""
if len(param_kvs) == 0:
# if the object has no params it's easy
return '{}()'.format(name)
# sort keys and values
ks = list(param_kvs.keys())
vs = list(param_kvs.values())
idxs = np.argsort(ks)
param_kvs = [(ks[i],vs[i]) for i in idxs]
param_kvs = param_kvs[::-1]
out = ''
current_line = name + '('
while len(param_kvs) > 0:
k, v = param_kvs.pop()
if issubclass(v.__class__, (float, np.ndarray)):
# round the floats first
v = round_to_n_decimal_places(v, n=decimals)
param = '{}={},'.format(k, str(v))
else:
param = '{}={},'.format(k, repr(v))
if len(current_line + param) <= line_width:
current_line += param
else:
out += current_line + '\n'
current_line = ' '*line_offset + param
if len(current_line) < line_width and len(param_kvs) > 0:
current_line += ' '
out += current_line[:-1] # remove trailing comma
out += ')'
return out
class Core(object):
def __init__(self, name=None, line_width=70, line_offset=3):
"""
creates an instance of the Core class
comes loaded with useful methods
Parameters
----------
name : str, default: None
line_width : int, default: 70
number of characters to print on a line
line_offset : int, default: 3
number of characters to indent after the first line
Returns
-------
self
"""
self._name = name
self._line_width = line_width
self._line_offset = line_offset
self._exclude = []
def __str__(self):
"""__str__ method"""
if self._name is None:
return self.__repr__()
return self._name
def __repr__(self):
"""__repr__ method"""
name = self.__class__.__name__
return nice_repr(name, self.get_params(),
line_width=self._line_width,
line_offset=self._line_offset,
decimals=4)
def get_params(self, deep=False):
"""
returns a dict of all of the object's user-facing parameters
Parameters
----------
deep : boolean, default: False
when True, also gets non-user-facing paramters
Returns
-------
dict
"""
if deep is True:
return self.__dict__
return dict([(k,v) for k,v in list(self.__dict__.items()) \
if (k[0] != '_') \
and (k[-1] != '_') and (k not in self._exclude)])
def set_params(self, deep=False, force=False, **parameters):
"""
sets an object's paramters
Parameters
----------
deep : boolean, default: False
when True, also sets non-user-facing paramters
force : boolean, default: False
when True, also sets parameters that the object does not already
have
**parameters : paramters to set
Returns
------
self
"""
param_names = self.get_params(deep=deep).keys()
for parameter, value in parameters.items():
if (parameter in param_names) or force:
setattr(self, parameter, value)
return self
PK dcL\ٽA A pygam/distributions.py"""
Distributions
"""
from __future__ import division, absolute_import
from functools import wraps
import scipy as sp
import numpy as np
from abc import ABCMeta
from abc import abstractmethod
from pygam.core import Core
from pygam.utils import ylogydu
def multiply_weights(deviance):
@wraps(deviance)
def multiplied(self, y, mu, weights=None, **kwargs):
if weights is None:
weights = np.ones_like(mu)
return deviance(self, y, mu, **kwargs) * weights
return multiplied
def divide_weights(V):
@wraps(V)
def divided(self, mu, weights=None, **kwargs):
if weights is None:
weights = np.ones_like(mu)
return V(self, mu, **kwargs) / weights
return divided
class Distribution(Core):
__metaclass__ = ABCMeta
"""
base distribution class
"""
def __init__(self, name=None, scale=None):
"""
creates an instance of the Distribution class
Parameters
----------
name : str, default: None
scale : float or None, default: None
scale/standard deviation of the distribution
Returns
-------
self
"""
self.scale = scale
self._known_scale = self.scale is not None
super(Distribution, self).__init__(name=name)
if not self._known_scale:
self._exclude += ['scale']
def phi(self, y, mu, edof, weights):
"""
GLM scale parameter.
for Binomial and Poisson families this is unity
for Normal family this is variance
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
edof : float
estimated degrees of freedom
weights : array-like shape (n,) or None, default: None
sample weights
if None, defaults to array of ones
Returns
-------
scale : estimated model scale
"""
if self._known_scale:
return self.scale
else:
return (np.sum(weights * self.V(mu)**-1 * (y - mu)**2) /
(len(mu) - edof))
@abstractmethod
def sample(self, mu):
"""
Return random samples from this distribution.
Parameters
----------
mu : array-like of shape n_samples or shape (n_simulations, n_samples)
expected values
Returns
-------
random_samples : np.array of same shape as mu
"""
pass
class NormalDist(Distribution):
"""
Normal Distribution
"""
def __init__(self, scale=None):
"""
creates an instance of the NormalDist class
Parameters
----------
scale : float or None, default: None
scale/standard deviation of the distribution
Returns
-------
self
"""
super(NormalDist, self).__init__(name='normal', scale=scale)
def pdf(self, y, mu, weights=None):
"""
computes the pdf or pmf of the values under the current distribution
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
weights : array-like shape (n,) or None, default: None
sample weights
if None, defaults to array of ones
Returns
-------
pdf/pmf : np.array of length n
"""
if weights is None:
weights = np.ones_like(mu)
scale = self.scale / weights
return np.exp(-(y - mu)**2 / (2 * scale)) / (scale * 2 * np.pi)**0.5
@divide_weights
def V(self, mu):
"""
glm Variance function.
if
Y ~ ExpFam(theta, scale=phi)
such that
E[Y] = mu = b'(theta)
and
Var[Y] = b''(theta) * phi / w
then we seek V(mu) such that we can represent Var[y] as a fn of mu:
Var[Y] = V(mu) * phi
ie
V(mu) = b''(theta) / w
Parameters
----------
mu : array-like of length n
expected values
Returns
-------
V(mu) : np.array of length n
"""
return np.ones_like(mu)
@multiply_weights
def deviance(self, y, mu, scaled=True):
"""
model deviance
for a gaussian linear model, this is equal to the SSE
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
scaled : boolean, default: True
whether to divide the deviance by the distribution scaled
Returns
-------
deviances : np.array of length n
"""
dev = (y - mu)**2
if scaled:
dev /= self.scale
return dev
def sample(self, mu):
"""
Return random samples from this Normal distribution.
Samples are drawn independently from univariate normal distributions
with means given by the values in `mu` and with standard deviations
equal to the `scale` attribute if it exists otherwise 1.0.
Parameters
----------
mu : array-like of shape n_samples or shape (n_simulations, n_samples)
expected values
Returns
-------
random_samples : np.array of same shape as mu
"""
standard_deviation = self.scale**0.5 if self.scale else 1.0
return np.random.normal(loc=mu, scale=standard_deviation, size=None)
class BinomialDist(Distribution):
"""
Binomial Distribution
"""
def __init__(self, levels=1):
"""
creates an instance of the Binomial class
Parameters
----------
levels : int of None, default: 1
number of levels in the binomial distribution
Returns
-------
self
"""
if levels is None:
levels = 1
self.levels = levels
super(BinomialDist, self).__init__(name='binomial', scale=1.)
self._exclude.append('scale')
def pdf(self, y, mu, weights=None):
"""
computes the pdf or pmf of the values under the current distribution
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
weights : array-like shape (n,) or None, default: None
sample weights
if None, defaults to array of ones
Returns
-------
pdf/pmf : np.array of length n
"""
if weights is None:
weights = np.ones_like(mu)
n = self.levels
return weights * (sp.misc.comb(n, y) * (mu / n)**y *
(1 - (mu / n))**(n - y))
@divide_weights
def V(self, mu):
"""
glm Variance function
computes the variance of the distribution
Parameters
----------
mu : array-like of length n
expected values
Returns
-------
variance : np.array of length n
"""
return mu * (1 - mu / self.levels)
@multiply_weights
def deviance(self, y, mu, scaled=True):
"""
model deviance
for a bernoulli logistic model, this is equal to the twice the
negative loglikelihod.
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
scaled : boolean, default: True
whether to divide the deviance by the distribution scaled
Returns
-------
deviances : np.array of length n
"""
dev = 2 * (ylogydu(y, mu) + ylogydu(self.levels - y, self.levels - mu))
if scaled:
dev /= self.scale
return dev
def sample(self, mu):
"""
Return random samples from this Normal distribution.
Parameters
----------
mu : array-like of shape n_samples or shape (n_simulations, n_samples)
expected values
Returns
-------
random_samples : np.array of same shape as mu
"""
number_of_trials = self.levels
success_probability = mu / number_of_trials
return np.random.binomial(n=number_of_trials, p=success_probability,
size=None)
class PoissonDist(Distribution):
"""
Poisson Distribution
"""
def __init__(self):
"""
creates an instance of the PoissonDist class
Parameters
----------
None
Returns
-------
self
"""
super(PoissonDist, self).__init__(name='poisson', scale=1.)
self._exclude.append('scale')
def pdf(self, y, mu, weights=None):
"""
computes the pdf or pmf of the values under the current distribution
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
weights : array-like shape (n,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
pdf/pmf : np.array of length n
"""
if weights is None:
weights = np.ones_like(mu)
# in Poisson regression weights are proportional to the exposure
# so we want to pump up all our predictions
# NOTE: we assume the targets are unchanged
mu = mu * weights
return (mu**y) * np.exp(-mu) / sp.misc.factorial(y)
@divide_weights
def V(self, mu):
"""
glm Variance function
computes the variance of the distribution
Parameters
----------
mu : array-like of length n
expected values
Returns
-------
variance : np.array of length n
"""
return mu
@multiply_weights
def deviance(self, y, mu, scaled=True):
"""
model deviance
for a bernoulli logistic model, this is equal to the twice the
negative loglikelihod.
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
scaled : boolean, default: True
whether to divide the deviance by the distribution scaled
Returns
-------
deviances : np.array of length n
"""
dev = 2 * (ylogydu(y, mu) - (y - mu))
if scaled:
dev /= self.scale
return dev
def sample(self, mu):
"""
Return random samples from this Poisson distribution.
Parameters
----------
mu : array-like of shape n_samples or shape (n_simulations, n_samples)
expected values
Returns
-------
random_samples : np.array of same shape as mu
"""
return np.random.poisson(lam=mu, size=None)
class GammaDist(Distribution):
"""
Gamma Distribution
"""
def __init__(self, scale=None):
"""
creates an instance of the GammaDist class
Parameters
----------
scale : float or None, default: None
scale/standard deviation of the distribution
Returns
-------
self
"""
super(GammaDist, self).__init__(name='gamma', scale=scale)
def pdf(self, y, mu, weights=None):
"""
computes the pdf or pmf of the values under the current distribution
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
weights : array-like shape (n,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
pdf/pmf : np.array of length n
"""
if weights is None:
weights = np.ones_like(mu)
nu = weights / self.scale
return (1. / sp.special.gamma(nu) *
(nu / mu)**nu * y**(nu - 1) * np.exp(-nu * y / mu))
@divide_weights
def V(self, mu):
"""
glm Variance function
computes the variance of the distribution
Parameters
----------
mu : array-like of length n
expected values
Returns
-------
variance : np.array of length n
"""
return mu**2
@multiply_weights
def deviance(self, y, mu, scaled=True):
"""
model deviance
for a bernoulli logistic model, this is equal to the twice the
negative loglikelihod.
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
scaled : boolean, default: True
whether to divide the deviance by the distribution scaled
Returns
-------
deviances : np.array of length n
"""
dev = 2 * ((y - mu) / mu - np.log(y / mu))
if scaled:
dev /= self.scale
return dev
def sample(self, mu):
"""
Return random samples from this Gamma distribution.
Parameters
----------
mu : array-like of shape n_samples or shape (n_simulations, n_samples)
expected values
Returns
-------
random_samples : np.array of same shape as mu
"""
# in numpy.random.gamma, `shape` is the parameter sometimes denoted by
# `k` that corresponds to `nu` in S. Wood (2006) Table 2.1
shape = 1. / self.scale
# in numpy.random.gamma, `scale` is the parameter sometimes denoted by
# `theta` that corresponds to mu / nu in S. Wood (2006) Table 2.1
scale = mu / shape
return np.random.gamma(shape=shape, scale=scale, size=None)
class InvGaussDist(Distribution):
"""
Inverse Gaussian (Wald) Distribution
"""
def __init__(self, scale=None):
"""
creates an instance of the InvGaussDist class
Parameters
----------
scale : float or None, default: None
scale/standard deviation of the distribution
Returns
-------
self
"""
super(InvGaussDist, self).__init__(name='inv_gauss', scale=scale)
def pdf(self, y, mu, weights=None):
"""
computes the pdf or pmf of the values under the current distribution
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
weights : array-like shape (n,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
pdf/pmf : np.array of length n
"""
if weights is None:
weights = np.ones_like(mu)
gamma = weights / self.scale
return ((gamma / (2 * np.pi * y**3))**.5 *
np.exp(-gamma * (y - mu)**2 / (2 * mu**2 * y)))
@divide_weights
def V(self, mu):
"""
glm Variance function
computes the variance of the distribution
Parameters
----------
mu : array-like of length n
expected values
Returns
-------
variance : np.array of length n
"""
return mu**3
@multiply_weights
def deviance(self, y, mu, scaled=True):
"""
model deviance
for a bernoulli logistic model, this is equal to the twice the
negative loglikelihod.
Parameters
----------
y : array-like of length n
target values
mu : array-like of length n
expected values
scaled : boolean, default: True
whether to divide the deviance by the distribution scaled
Returns
-------
deviances : np.array of length n
"""
dev = ((y - mu)**2) / (mu**2 * y)
if scaled:
dev /= self.scale
return dev
def sample(self, mu):
"""
Return random samples from this Inverse Gaussian (Wald) distribution.
Parameters
----------
mu : array-like of shape n_samples or shape (n_simulations, n_samples)
expected values
Returns
-------
random_samples : np.array of same shape as mu
"""
return np.random.wald(mean=mu, scale=self.scale, size=None)
PK u*L' pygam/links.py"""
Link functions
"""
from __future__ import division, absolute_import
import numpy as np
from pygam.core import Core
class Link(Core):
def __init__(self, name=None):
"""
creates an instance of a Link object
Parameters
----------
name : str, default: None
Returns
-------
self
"""
super(Link, self).__init__(name=name)
class IdentityLink(Link):
def __init__(self):
"""
creates an instance of an IdentityLink object
Parameters
----------
None
Returns
-------
self
"""
super(IdentityLink, self).__init__(name='identity')
def link(self, mu, dist):
"""
glm link function
this is useful for going from mu to the linear prediction
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
lp : np.array of length n
"""
return mu
def mu(self, lp, dist):
"""
glm mean function, ie inverse of link function
this is useful for going from the linear prediction to mu
Parameters
----------
lp : array-like of legth n
dist : Distribution instance
Returns
-------
mu : np.array of length n
"""
return lp
def gradient(self, mu, dist):
"""
derivative of the link function wrt mu
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
grad : np.array of length n
"""
return np.ones_like(mu)
class LogitLink(Link):
def __init__(self):
"""
creates an instance of a LogitLink object
Parameters
----------
None
Returns
-------
self
"""
super(LogitLink, self).__init__(name='logit')
def link(self, mu, dist):
"""
glm link function
this is useful for going from mu to the linear prediction
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
lp : np.array of length n
"""
return np.log(mu) - np.log(dist.levels - mu)
def mu(self, lp, dist):
"""
glm mean function, ie inverse of link function
this is useful for going from the linear prediction to mu
Parameters
----------
lp : array-like of legth n
dist : Distribution instance
Returns
-------
mu : np.array of length n
"""
elp = np.exp(lp)
return dist.levels * elp / (elp + 1)
def gradient(self, mu, dist):
"""
derivative of the link function wrt mu
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
grad : np.array of length n
"""
return dist.levels/(mu*(dist.levels - mu))
class LogLink(Link):
def __init__(self):
"""
creates an instance of a LogitLink object
Parameters
----------
None
Returns
-------
self
"""
super(LogLink, self).__init__(name='log')
def link(self, mu, dist):
"""
glm link function
this is useful for going from mu to the linear prediction
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
lp : np.array of length n
"""
return np.log(mu)
def mu(self, lp, dist):
"""
glm mean function, ie inverse of link function
this is useful for going from the linear prediction to mu
Parameters
----------
lp : array-like of legth n
dist : Distribution instance
Returns
-------
mu : np.array of length n
"""
return np.exp(lp)
def gradient(self, mu, dist):
"""
derivative of the link function wrt mu
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
grad : np.array of length n
"""
return 1. / mu
class InverseLink(Link):
def __init__(self):
"""
creates an instance of a InverseLink object
Parameters
----------
None
Returns
-------
self
"""
super(InverseLink, self).__init__(name='inverse')
def link(self, mu, dist):
"""
glm link function
this is useful for going from mu to the linear prediction
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
lp : np.array of length n
"""
return mu ** -1.
def mu(self, lp, dist):
"""
glm mean function, ie inverse of link function
this is useful for going from the linear prediction to mu
Parameters
----------
lp : array-like of legth n
dist : Distribution instance
Returns
-------
mu : np.array of length n
"""
return lp ** -1.
def gradient(self, mu, dist):
"""
derivative of the link function wrt mu
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
grad : np.array of length n
"""
return -1 * mu**-2.
class InvSquaredLink(Link):
def __init__(self):
"""
creates an instance of an InverseLink object
Parameters
----------
name : str, default: None
Returns
-------
self
"""
super(InvSquaredLink, self).__init__(name='inv_squared')
def link(self, mu, dist):
"""
glm link function
this is useful for going from mu to the linear prediction
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
lp : np.array of length n
"""
return mu ** -2.
def mu(self, lp, dist):
"""
glm mean function, ie inverse of link function
this is useful for going from the linear prediction to mu
Parameters
----------
lp : array-like of legth n
dist : Distribution instance
Returns
-------
mu : np.array of length n
"""
return lp ** -0.5
def gradient(self, mu, dist):
"""
derivative of the link function wrt mu
Parameters
----------
mu : array-like of legth n
dist : Distribution instance
Returns
-------
grad : np.array of length n
"""
return -2 * mu**-3.
PK cLϫI! I! pygam/penalties.py"""
Penalty matrix generators
"""
import scipy as sp
import numpy as np
def derivative(n, coef, derivative=2):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes the squared differences between basis coefficients.
Parameters
----------
n : int
number of splines
coef : unused
for compatibility with constraints
derivative: int, default: 2
which derivative do we penalize.
derivative is 1, we penalize 1st order derivatives,
derivative is 2, we penalize 2nd order derivatives, etc
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
if n==1:
# no derivative for constant functions
return sp.sparse.csc_matrix(0.)
D = sparse_diff(sp.sparse.identity(n).tocsc(), n=derivative)
return D.dot(D.T).tocsc()
def l2(n, coef):
"""
Builds a penalty matrix for P-Splines with categorical features.
Penalizes the squared value of each basis coefficient.
Parameters
----------
n : int
number of splines
coef : unused
for compatibility with constraints
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
return sp.sparse.eye(n).tocsc()
def monotonicity_(n, coef, increasing=True):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes violation of monotonicity in the feature function.
Parameters
----------
n : int
number of splines
coef : array-like
coefficients of the feature function
increasing : bool, default: True
whether to enforce monotic increasing, or decreasing functions
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
if n != len(coef.ravel()):
raise ValueError('dimension mismatch: expected n equals len(coef), '\
'but found n = {}, coef.shape = {}.'\
.format(n, coef.shape))
if n==1:
# no monotonic penalty for constant functions
return sp.sparse.csc_matrix(0.)
if increasing:
# only penalize the case where coef_i-1 > coef_i
mask = sp.sparse.diags((np.diff(coef.ravel()) < 0).astype(float))
else:
# only penalize the case where coef_i-1 < coef_i
mask = sp.sparse.diags((np.diff(coef.ravel()) > 0).astype(float))
derivative = 1
D = sparse_diff(sp.sparse.identity(n).tocsc(), n=derivative) * mask
return D.dot(D.T).tocsc()
def monotonic_inc(n, coef):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes violation of a monotonic increasing feature function.
Parameters
----------
n : int
number of splines
coef : array-like, coefficients of the feature function
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
return monotonicity_(n, coef, increasing=True)
def monotonic_dec(n, coef):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes violation of a monotonic decreasing feature function.
Parameters
----------
n : int
number of splines
coef : array-like
coefficients of the feature function
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
return monotonicity_(n, coef, increasing=False)
def convexity_(n, coef, convex=True):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes violation of convexity in the feature function.
Parameters
----------
n : int
number of splines
coef : array-like
coefficients of the feature function
convex : bool, default: True
whether to enforce convex, or concave functions
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
if n != len(coef.ravel()):
raise ValueError('dimension mismatch: expected n equals len(coef), '\
'but found n = {}, coef.shape = {}.'\
.format(n, coef.shape))
if n==1:
# no convex penalty for constant functions
return sp.sparse.csc_matrix(0.)
if convex:
mask = sp.sparse.diags((np.diff(coef.ravel(), n=2) < 0).astype(float))
else:
mask = sp.sparse.diags((np.diff(coef.ravel(), n=2) > 0).astype(float))
derivative = 2
D = sparse_diff(sp.sparse.identity(n).tocsc(), n=derivative) * mask
return D.dot(D.T).tocsc()
def convex(n, coef):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes violation of a convex feature function.
Parameters
----------
n : int
number of splines
coef : array-like
coefficients of the feature function
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
return convexity_(n, coef, convex=True)
def concave(n, coef):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes violation of a concave feature function.
Parameters
----------
n : int
number of splines
coef : array-like
coefficients of the feature function
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
return convexity_(n, coef, convex=False)
def circular(n, coef):
"""
Builds a penalty matrix for P-Splines with continuous features.
Penalizes violation of a circular feature function.
Parameters
----------
n : int
number of splines
coef : unused
for compatibility with constraints
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
if n != len(coef.ravel()):
raise ValueError('dimension mismatch: expected n equals len(coef), '\
'but found n = {}, coef.shape = {}.'\
.format(n, coef.shape))
if n==1:
# no first circular penalty for constant functions
return sp.sparse.csc_matrix(0.)
row = np.zeros(n)
row[0] = 1
row[-1] = -1
P = sp.sparse.vstack([row, sp.sparse.csc_matrix((n-2, n)), row[::-1]])
return P.tocsc()
def none(n, coef):
"""
Build a matrix of zeros for features that should go unpenalized
Parameters
----------
n : int
number of splines
coef : unused
for compatibility with constraints
Returns
-------
penalty matrix : sparse csc matrix of shape (n,n)
"""
return sp.sparse.csc_matrix(np.zeros((n, n)))
def wrap_penalty(p, fit_linear, linear_penalty=0.):
"""
tool to account for unity penalty on the linear term of any feature.
Parameters
----------
p : callable.
penalty-matrix-generating function.
fit_linear : boolean.
whether the current feature has a linear term or not.
linear_penalty : float, default: 0.
penalty on the linear term
Returns
-------
wrapped_p : callable
modified penalty-matrix-generating function
"""
def wrapped_p(n, *args):
if fit_linear:
if n == 1:
return sp.sparse.block_diag([linear_penalty], format='csc')
return sp.sparse.block_diag([linear_penalty,
p(n-1, *args)], format='csc')
else:
return p(n, *args)
return wrapped_p
def sparse_diff(array, n=1, axis=-1):
"""
A ported sparse version of np.diff.
Uses recursion to compute higher order differences
Parameters
----------
array : sparse array
n : int, default: 1
differencing order
axis : int, default: -1
axis along which differences are computed
Returns
-------
diff_array : sparse array
same shape as input array,
but 'axis' dimension is smaller by 'n'.
"""
if (n < 0) or (int(n) != n):
raise ValueError('Expected order is non-negative integer, '\
'but found: {}'.format(n))
if not sp.sparse.issparse(array):
warnings.warn('Array is not sparse. Consider using numpy.diff')
if n == 0:
return array
nd = array.ndim
slice1 = [slice(None)]*nd
slice2 = [slice(None)]*nd
slice1[axis] = slice(1, None)
slice2[axis] = slice(None, -1)
slice1 = tuple(slice1)
slice2 = tuple(slice2)
A = sparse_diff(array, n-1, axis=axis)
return A[slice1] - A[slice2]
PK v{Lvqc pygam/pygam.py# -*- coding: utf-8 -*-
from __future__ import division, absolute_import
from collections import defaultdict
from collections import OrderedDict
from copy import deepcopy
from progressbar import ProgressBar
import warnings
import numpy as np
import scipy as sp
from scipy import stats
from pygam.core import Core
from pygam.penalties import derivative
from pygam.penalties import l2
from pygam.penalties import monotonic_inc
from pygam.penalties import monotonic_dec
from pygam.penalties import convex
from pygam.penalties import concave
from pygam.penalties import circular
from pygam.penalties import none
from pygam.penalties import wrap_penalty
from pygam.distributions import Distribution
from pygam.distributions import NormalDist
from pygam.distributions import BinomialDist
from pygam.distributions import PoissonDist
from pygam.distributions import GammaDist
from pygam.distributions import InvGaussDist
from pygam.links import Link
from pygam.links import IdentityLink
from pygam.links import LogitLink
from pygam.links import LogLink
from pygam.links import InverseLink
from pygam.links import InvSquaredLink
from pygam.callbacks import CallBack
from pygam.callbacks import Deviance
from pygam.callbacks import Diffs
from pygam.callbacks import Accuracy
from pygam.callbacks import Coef
from pygam.callbacks import validate_callback
from pygam.utils import check_dtype
from pygam.utils import check_y
from pygam.utils import check_X
from pygam.utils import check_X_y
from pygam.utils import check_array
from pygam.utils import check_lengths
from pygam.utils import print_data
from pygam.utils import gen_edge_knots
from pygam.utils import b_spline_basis
from pygam.utils import combine
from pygam.utils import cholesky
from pygam.utils import check_param
from pygam.utils import isiterable
from pygam.utils import NotPositiveDefiniteError
EPS = np.finfo(np.float64).eps # machine epsilon
DISTRIBUTIONS = {'normal': NormalDist,
'poisson': PoissonDist,
'binomial': BinomialDist,
'gamma': GammaDist,
'inv_gauss': InvGaussDist
}
LINK_FUNCTIONS = {'identity': IdentityLink,
'log': LogLink,
'logit': LogitLink,
'inverse': InverseLink,
'inv_squared': InvSquaredLink
}
CALLBACKS = {'deviance': Deviance,
'diffs': Diffs,
'accuracy': Accuracy,
'coef': Coef
}
PENALTIES = {'auto': 'auto',
'derivative': derivative,
'l2': l2,
'none': none,
}
CONSTRAINTS = {'convex': convex,
'concave': concave,
'monotonic_inc': monotonic_inc,
'monotonic_dec': monotonic_dec,
'circular': circular,
'none': none
}
def _make_positive_semi_definite(cov):
"""Return the given matrix with a small amount added to the diagonal
to make it positive semi-definite."""
return cov + np.eye(len(cov)) * np.sqrt(EPS)
class GAM(Core):
"""Generalized Additive Model
Parameters
----------
callbacks : list of strings or list of CallBack objects,
default: ['deviance', 'diffs']
Names of callback objects to call during the optimization loop.
constraints : str or callable, or iterable of str or callable,
default: None
Names of constraint functions to call during the optimization loop.
Must be in {'convex', 'concave', 'monotonic_inc', 'monotonic_dec',
'circular', 'none'}
If None, then the model will apply no constraints.
If only one str or callable is specified, then is it copied for all
features.
distribution : str or Distribution object, default: 'normal'
Distribution to use in the model.
link : str or Link object, default: 'identity'
Link function to use in the model.
dtype : str in {'auto', 'numerical', 'categorical'},
or list of str, default: 'auto'
String describing the data-type of each feature.
'numerical' is used for continuous-valued data-types,
like in regression.
'categorical' is used for discrete-valued data-types,
like in classification.
If only one str is specified, then is is copied for all features.
lam : float or iterable of floats > 0, default: 0.6
Smoothing strength; must be a positive float, or one positive float
per feature.
Larger values enforce stronger smoothing.
If only one float is specified, then it is copied for all features.
fit_intercept : bool, default: True
Specifies if a constant (a.k.a. bias or intercept) should be
added to the decision function.
NOTE: the intercept receives no smoothing penalty.
fit_linear : bool or iterable of bools, default: False
Specifies if a linear term should be added to any of the feature
functions. Useful for including pre-defined feature transformations
in the model.
If only one bool is specified, then it is copied for all features.
NOTE: Many constraints are incompatible with an additional linear fit.
eg. if a non-zero linear function is added to a periodic spline
function, it will cease to be periodic.
this is also possible for a monotonic spline function.
fit_splines : bool or iterable of bools, default: True
Specifies if a smoother should be added to any of the feature
functions. Useful for defining feature transformations a-priori
that should not have splines fitted to them.
If only one bool is specified, then it is copied for all features.
NOTE: fit_splines supercedes n_splines.
ie. if n_splines > 0 and fit_splines = False, no splines will be fitted.
max_iter : int, default: 100
Maximum number of iterations allowed for the solver to converge.
penalties : str or callable, or iterable of str or callable,
default: 'auto'
Type of penalty to use for each feature.
penalty should be in {'auto', 'none', 'derivative', 'l2', }
If 'auto', then the model will use 2nd derivative smoothing for features
of dtype 'numerical', and L2 smoothing for features of dtype
'categorical'.
If only one str or callable is specified, then is it copied for all
features.
n_splines : int, or iterable of ints, default: 25
Number of splines to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features.
Note: this value is set to 0 if fit_splines is False
spline_order : int, or iterable of ints, default: 3
Order of spline to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features
Note: if a feature is of type categorical, spline_order will be set to 0.
tol : float, default: 1e-4
Tolerance for stopping criteria.
Attributes
----------
coef_ : array, shape (n_classes, m_features)
Coefficient of the features in the decision function.
If fit_intercept is True, then self.coef_[0] will contain the bias.
statistics_ : dict
Dictionary containing model statistics like GCV/UBRE scores, AIC/c,
parameter covariances, estimated degrees of freedom, etc.
logs_ : dict
Dictionary containing the outputs of any callbacks at each
optimization loop.
The logs are structured as `{callback: [...]}`
References
----------
Simon N. Wood, 2006
Generalized Additive Models: an introduction with R
Hastie, Tibshirani, Friedman
The Elements of Statistical Learning
http://statweb.stanford.edu/~tibs/ElemStatLearn/printings/ESLII_print10.pdf
Paul Eilers & Brian Marx, 2015
International Biometric Society: A Crash Course on P-splines
http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
"""
def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
penalties='auto', tol=1e-4, distribution='normal',
link='identity', callbacks=['deviance', 'diffs'],
fit_intercept=True, fit_linear=False, fit_splines=True,
dtype='auto', constraints=None):
self.max_iter = max_iter
self.tol = tol
self.lam = lam
self.n_splines = n_splines
self.spline_order = spline_order
self.penalties = penalties
self.distribution = distribution
self.link = link
self.callbacks = callbacks
self.constraints = constraints
self.fit_intercept = fit_intercept
self.fit_linear = fit_linear
self.fit_splines = fit_splines
self.dtype = dtype
# created by other methods
self._n_coeffs = [] # useful for indexing into model coefficients
self._edge_knots = []
self._lam = []
self._n_splines = []
self._spline_order = []
self._penalties = []
self._constraints = []
self._dtype = []
self._fit_linear = []
self._fit_splines = []
self._fit_intercept = None
# internal settings
self._constraint_lam = 1e9 # regularization intensity for constraints
self._constraint_l2 = 1e-3 # diagononal loading to improve conditioning
self._constraint_l2_max = 1e-1 # maximum loading
self._opt = 0 # use 0 for numerically stable optimizer, 1 for naive
# call super and exclude any variables
super(GAM, self).__init__()
def _expand_attr(self, attr, n, dt_alt=None, msg=None):
"""
tool to parse and duplicate initialization arguments
into model parameters.
typically we use this tool to take a single attribute like:
self.lam = 0.6
and make one copy per feature, ie:
self._lam = [0.6, 0.6, 0.6]
for a model with 3 features.
if self.attr is an iterable of values of length n,
then copy it verbatim to self._attr.
otherwise extend the single value to a list of length n,
and copy that to self._attr
dt_alt is an alternative value for dtypes of type categorical (ie discrete).
so if our 3-feature dataset is of types
['numerical', 'numerical', 'categorical'],
we could use this method to turn
self.lam = 0.6
into
self.lam = [0.6, 0.6, 0.3]
by calling
self._expand_attr('lam', 3, dt_alt=0.3)
Parameters
----------
attr : string
name of the attribute to expand
n : int
number of time to repeat the attribute
dt_alt : object, deafult: None
object to subsitute attribute for categorical features.
if dt_alt is None, categorical features are treated the same as
numerical features.
msg: string, default: None
custom error message to report if
self.attr is iterable BUT len(self.attr) != n
if msg is None, default message is used:
'expected "attr" to have length X.shape[1], but found {}'.format(len(self.attr))
Returns
-------
None
"""
data = deepcopy(getattr(self, attr))
_attr = '_' + attr
if isiterable(data):
if not (len(data) == n):
if msg is None:
msg = 'expected {} to have length X.shape[1], '\
'but found {}'.format(attr, len(data))
raise ValueError(msg)
else:
data = [data] * n
if dt_alt is not None:
data = [d if dt != 'categorical' else dt_alt for d,dt in zip(data, self._dtype)]
setattr(self, _attr, data)
@property
def _is_fitted(self):
"""
simple way to check if the GAM has been fitted
Parameters
---------
None
Returns
-------
bool : whether or not the model is fitted
"""
return hasattr(self, 'coef_')
def _validate_params(self):
"""
method to sanitize model parameters
Parameters
---------
None
Returns
-------
None
"""
# fit_intercep
if not isinstance(self.fit_intercept, bool):
raise ValueError('fit_intercept must be type bool, but found {}'\
.format(self.fit_intercept.__class__))
# max_iter
self.max_iter = check_param(self.max_iter, param_name='max_iter',
dtype='int', constraint='>=1',
iterable=False)
# lam
self.lam = check_param(self.lam, param_name='lam',
dtype='float', constraint='>0')
# n_splines
self.n_splines = check_param(self.n_splines, param_name='n_splines',
dtype='int', constraint='>=0')
# spline_order
self.spline_order = check_param(self.spline_order,
param_name='spline_order',
dtype='int', constraint='>=0')
# n_splines + spline_order
if not (np.atleast_1d(self.n_splines) >
np.atleast_1d(self.spline_order)).all():
raise ValueError('n_splines must be > spline_order. '\
'found: n_splines = {} and spline_order = {}'\
.format(self.n_splines, self.spline_order))
# distribution
if not ((self.distribution in DISTRIBUTIONS)
or isinstance(self.distribution, Distribution)):
raise ValueError('unsupported distribution {}'.format(self.distribution))
if self.distribution in DISTRIBUTIONS:
self.distribution = DISTRIBUTIONS[self.distribution]()
# link
if not ((self.link in LINK_FUNCTIONS) or isinstance(self.link, Link)):
raise ValueError('unsupported link {}'.format(self.link))
if self.link in LINK_FUNCTIONS:
self.link = LINK_FUNCTIONS[self.link]()
# callbacks
if not isiterable(self.callbacks):
raise ValueError('Callbacks must be iterable, but found {}'\
.format(self.callbacks))
if not all([c in CALLBACKS or
isinstance(c, CallBack) for c in self.callbacks]):
raise ValueError('unsupported callback(s) {}'.format(self.callbacks))
callbacks = list(self.callbacks)
for i, c in enumerate(self.callbacks):
if c in CALLBACKS:
callbacks[i] = CALLBACKS[c]()
self.callbacks = [validate_callback(c) for c in callbacks]
# penalties
if not (isiterable(self.penalties) or
hasattr(self.penalties, '__call__') or
self.penalties in PENALTIES or
self.penalties is None):
raise ValueError('penalties must be iterable or callable, '\
'but found {}'.format(self.penalties))
if isiterable(self.penalties):
for i, p in enumerate(self.penalties):
if not (hasattr(p, '__call__') or
(p in PENALTIES) or
(p is None)):
raise ValueError("penalties must be callable or in "\
"{}, but found {} for {}th penalty"\
.format(list(PENALTIES.keys()), p, i))
# constraints
if not (isiterable(self.constraints) or
hasattr(self.constraints, '__call__') or
self.constraints in CONSTRAINTS or
self.constraints is None):
raise ValueError('constraints must be iterable or callable, '\
'but found {}'.format(self.constraints))
if isiterable(self.constraints):
for i, c in enumerate(self.constraints):
if not (hasattr(c, '__call__') or
(c in CONSTRAINTS) or
(c is None)):
raise ValueError("constraints must be callable or in "\
"{}, but found {} for {}th constraint"\
.format(list(CONSTRAINTS.keys()), c, i))
# dtype
if not (self.dtype in ['auto', 'numerical', 'categorical'] or
isiterable(self.dtype)):
raise ValueError("dtype must be in ['auto', 'numerical', "\
"'categorical'] or iterable of those strings, "\
"but found dtype = {}".format(self.dtype))
if isiterable(self.dtype):
for dt in self.dtype:
if dt not in ['auto', 'numerical', 'categorical']:
raise ValueError("elements of iterable dtype must be in "\
"['auto', 'numerical', 'categorical], "\
"but found dtype = {}".format(self.dtype))
def _validate_data_dep_params(self, X):
"""
method to validate and prepare data-dependent parameters
Parameters
---------
X : array-like
containing the input dataset
Returns
-------
None
"""
n_samples, m_features = X.shape
# set up dtypes and check types if 'auto'
self._expand_attr('dtype', m_features)
for i, (dt, x) in enumerate(zip(self._dtype, X.T)):
if dt == 'auto':
dt = check_dtype(x)[0]
if dt == 'categorical':
warnings.warn('detected catergorical data for feature {}'\
.format(i), stacklevel=2)
self._dtype[i] = dt
assert len(self._dtype) == m_features # sanity check
# set up lambdas
self._expand_attr('lam', m_features)
# add intercept term
if self.fit_intercept:
self._lam = [0.] + self._lam
# set up penalty matrices
self._expand_attr('penalties', m_features)
# set up constraints
self._expand_attr('constraints', m_features, dt_alt=None)
# set up fit_linear and fit_splines, copy fit_intercept
self._fit_intercept = self.fit_intercept
self._expand_attr('fit_linear', m_features, dt_alt=False)
self._expand_attr('fit_splines', m_features)
for i, (fl, c) in enumerate(zip(self._fit_linear, self._constraints)):
if bool(c) and (c is not 'none'):
if fl:
warnings.warn('cannot do fit_linear with constraints. '\
'setting fit_linear=False for feature {}'\
.format(i))
self._fit_linear[i] = False
line_or_spline = [bool(line + spline) for line, spline in \
zip(self._fit_linear, self._fit_splines)]
# problems
if not all(line_or_spline):
bad = [i for i, l_or_s in enumerate(line_or_spline) if not l_or_s]
raise ValueError('a line or a spline must be fit on each feature. '\
'Neither were found on feature(s): {}' \
.format(bad))
# expand spline_order, n_splines, and prepare edge_knots
self._expand_attr('spline_order', X.shape[1], dt_alt=0)
self._expand_attr('n_splines', X.shape[1], dt_alt=0)
self._edge_knots = [gen_edge_knots(feat, dtype) for feat, dtype in \
zip(X.T, self._dtype)]
# update our n_splines correcting for categorical features, no splines
for i, (fs, dt, ek) in enumerate(zip(self._fit_splines,
self._dtype,
self._edge_knots)):
if fs:
if dt == 'categorical':
self._n_splines[i] = len(ek) - 1
if not fs:
self._n_splines[i] = 0
# compute number of model coefficients
self._n_coeffs = []
for n_splines, fit_linear, fit_splines in zip(self._n_splines,
self._fit_linear,
self._fit_splines):
self._n_coeffs.append(n_splines * fit_splines + fit_linear)
if self._fit_intercept:
self._n_coeffs = [1] + self._n_coeffs
def loglikelihood(self, X, y, weights=None):
"""
compute the log-likelihood of the dataset using the current model
Parameters
---------
X : array-like of shape (n_samples, m_features)
containing the input dataset
y : array-like of shape (n,)
containing target values
weights : array-like of shape (n,)
containing sample weights
Returns
-------
log-likelihood : np.array of shape (n,)
containing log-likelihood scores
"""
y = check_y(y, self.link, self.distribution)
mu = self.predict_mu(X)
if weights is not None:
weights = check_array(weights, name='sample weights')
check_lengths(y, weights)
else:
weights = np.ones_like(y).astype('f')
return self._loglikelihood(y, mu, weights=weights)
def _loglikelihood(self, y, mu, weights=None):
"""
compute the log-likelihood of the dataset using the current model
Parameters
---------
y : array-like of shape (n,)
containing target values
mu : array-like of shape (n_samples,)
expected value of the targets given the model and inputs
weights : array-like of shape (n,)
containing sample weights
Returns
-------
log-likelihood : np.array of shape (n,)
containing log-likelihood scores
"""
return np.log(self.distribution.pdf(y=y, mu=mu, weights=weights)).sum()
def _linear_predictor(self, X=None, modelmat=None, b=None, feature=-1):
"""linear predictor
compute the linear predictor portion of the model
ie multiply the model matrix by the spline basis coefficients
Parameters
---------
at least 1 of (X, modelmat)
and
at least 1 of (b, feature)
X : array-like of shape (n_samples, m_features), default: None
containing the input dataset
if None, will attempt to use modelmat
modelmat : array-like, default: None
contains the spline basis for each feature evaluated at the input
values for each feature, ie model matrix
if None, will attempt to construct the model matrix from X
b : array-like, default: None
contains the spline coefficients
if None, will use current model coefficients
feature : int, deafult: -1
feature for which to compute the linear prediction
if -1, will compute for all features
Returns
-------
lp : np.array of shape (n_samples,)
"""
if modelmat is None:
modelmat = self._modelmat(X, feature=feature)
if b is None:
b = self.coef_[self._select_feature(feature)]
return modelmat.dot(b).flatten()
def predict_mu(self, X):
"""
preduct expected value of target given model and input X
Parameters
---------
X : array-like of shape (n_samples, m_features), default: None
containing the input dataset
Returns
-------
y : np.array of shape (n_samples,)
containing expected values under the model
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
lp = self._linear_predictor(X)
return self.link.mu(lp, self.distribution)
def predict(self, X):
"""
preduct expected value of target given model and input X
often this is done via expected value of GAM given input X
Parameters
---------
X : array-like of shape (n_samples, m_features), default: None
containing the input dataset
Returns
-------
y : np.array of shape (n_samples,)
containing predicted values under the model
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
return self.predict_mu(X)
def _modelmat(self, X, feature=-1):
"""
Builds a model matrix, B, out of the spline basis for each feature
B = [B_0, B_1, ..., B_p]
Parameters
---------
X : array-like of shape (n_samples, m_features), default: None
containing the input dataset
feature : int, default: -1
feature index for which to compute the model matrix
if -1, will create the model matrix for all features
Returns
-------
modelmat : np.array of len n_samples
containing model matrix of the spline basis for selected features
"""
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
if feature >= len(self._n_coeffs) or feature < -1:
raise ValueError('feature {} out of range for X with shape {}'\
.format(feature, X.shape))
# for all features, build matrix recursively
if feature == -1:
modelmat = []
for feat in range(X.shape[1] + self._fit_intercept):
modelmat.append(self._modelmat(X, feature=feat))
return sp.sparse.hstack(modelmat, format='csc')
# intercept
if (feature == 0) and self._fit_intercept:
return sp.sparse.csc_matrix(np.ones((X.shape[0], 1)))
# return only the basis functions for 1 feature
feature = feature - self._fit_intercept
featuremat = []
if self._fit_linear[feature]:
featuremat.append(sp.sparse.csc_matrix(X[:, feature][:,None]))
if self._fit_splines[feature]:
featuremat.append(b_spline_basis(X[:,feature],
edge_knots=self._edge_knots[feature],
spline_order=self._spline_order[feature],
n_splines=self._n_splines[feature],
sparse=True))
return sp.sparse.hstack(featuremat, format='csc')
def _cholesky(self, A, **kwargs):
"""
method to handle potential problems with the cholesky decomposition.
will try to increase L2 regularization of the penalty matrix to
do away with non-positive-definite errors
Parameters
----------
A : np.array
Returns
-------
np.array
"""
# create appropriate-size diagonal matrix
if sp.sparse.issparse(A):
diag = sp.sparse.eye(A.shape[0])
else:
diag = np.eye(A.shape[0])
constraint_l2 = self._constraint_l2
while constraint_l2 <= self._constraint_l2_max:
try:
L = cholesky(A, **kwargs)
self._constraint_l2 = constraint_l2
return L
except NotPositiveDefiniteError:
warnings.warn('Matrix is not positive definite. \n'\
'Increasing l2 reg by factor of 10.',
stacklevel=2)
A -= constraint_l2 * diag
constraint_l2 *= 10
A += constraint_l2 * diag
raise NotPositiveDefiniteError('Matrix is not positive \n'
'definite.')
def _C(self):
"""
builds the GAM block-diagonal constraint matrix in quadratic form
out of constraint matrices specified for each feature.
behaves like a penalty, but with a very large lambda value, ie 1e6.
Parameters
---------
None
Returns
-------
C : sparse CSC matrix containing the model constraints in quadratic form
"""
Cs = []
if self._fit_intercept:
Cs.append(np.array(0.))
for i, c in enumerate(self._constraints):
fit_linear = self._fit_linear[i]
dtype = self._dtype[i]
n = self._n_coeffs[i + self._fit_intercept]
coef = self.coef_[self._select_feature(i + self._fit_intercept)]
coef = coef[fit_linear:]
if c is None:
c = 'none'
if c in CONSTRAINTS:
c = CONSTRAINTS[c]
c = wrap_penalty(c, fit_linear)(n, coef) * self._constraint_lam
Cs.append(c)
Cs = sp.sparse.block_diag(Cs)
# improve condition
if Cs.nnz > 0:
Cs += sp.sparse.diags(self._constraint_l2 * np.ones(Cs.shape[0]))
return Cs
def _P(self):
"""
builds the GAM block-diagonal penalty matrix in quadratic form
out of penalty matrices specified for each feature.
each feature penalty matrix is multiplied by a lambda for that feature.
the first feature is the intercept.
so for m features:
P = block_diag[lam0 * P0, lam1 * P1, lam2 * P2, ... , lamm * Pm]
Parameters
---------
None
Returns
-------
P : sparse CSC matrix containing the model penalties in quadratic form
"""
Ps = []
if self._fit_intercept:
Ps.append(np.array(0.))
for i, p in enumerate(self._penalties):
fit_linear = self._fit_linear[i]
dtype = self._dtype[i]
n = self._n_coeffs[i + self._fit_intercept]
coef = self.coef_[self._select_feature(i + self._fit_intercept)]
coef = coef[fit_linear:]
if p == 'auto':
if dtype == 'numerical':
p = derivative
if dtype == 'categorical':
p = l2
if p is None:
p = 'none'
if p in PENALTIES:
p = PENALTIES[p]
p = wrap_penalty(p, fit_linear)(n, coef)
Ps.append(p)
P_matrix = tuple([np.multiply(P, lam) for lam, P in zip(self._lam, Ps)])
P_matrix = sp.sparse.block_diag(P_matrix)
return P_matrix
def _pseudo_data(self, y, lp, mu):
"""
compute the pseudo data for a PIRLS iterations
Parameters
---------
y : array-like of shape (n,)
containing target data
lp : array-like of shape (n,)
containing linear predictions by the model
mu : array-like of shape (n_samples,)
expected value of the targets given the model and inputs
Returns
-------
pseudo_data : np.array of shape (n,)
"""
return lp + (y - mu) * self.link.gradient(mu, self.distribution)
def _W(self, mu, weights):
"""
compute the PIRLS weights for model predictions.
TODO lets verify the formula for this.
if we use the square root of the mu with the stable opt,
we get the same results as when we use non-sqrt mu with naive opt.
this makes me think that they are equivalent.
also, using non-sqrt mu with stable opt gives very small edofs for even lam=0.001
and the parameter variance is huge. this seems strange to me.
computed [V * d(link)/d(mu)] ^(-1/2) by hand and the math checks out as hoped.
ive since moved the square to the naive pirls method to make the code modular.
Parameters
---------
mu : array-like of shape (n_samples,)
expected value of the targets given the model and inputs
weights : array-like of shape (n_samples,)
containing sample weights
Returns
-------
weights : sp..sparse array of shape (n_samples, n_samples)
"""
return sp.sparse.diags((self.link.gradient(mu, self.distribution)**2 *
self.distribution.V(mu=mu) *
weights ** -1)**-0.5)
def _mask(self, weights):
"""
identifies the mask at which the weights are
greater than sqrt(machine epsilon)
and
not NaN
and
not Inf
Parameters
---------
weights : array-like of shape (n,)
containing weights in [0,1]
Returns
-------
mask : boolean np.array of shape (n,) of good weight values
"""
mask = (np.abs(weights) >= np.sqrt(EPS)) * np.isfinite(weights)
assert mask.sum() != 0, 'increase regularization'
return mask
def _pirls(self, X, Y, weights):
"""
Performs stable PIRLS iterations to estimate GAM coefficients
Parameters
---------
X : array-like of shape (n_samples, m_features)
containing input data
Y : array-like of shape (n,)
containing target data
weights : array-like of shape (n,)
containing sample weights
Returns
-------
None
"""
modelmat = self._modelmat(X) # build a basis matrix for the GLM
n, m = modelmat.shape
# initialize GLM coefficients
if not self._is_fitted or len(self.coef_) != sum(self._n_coeffs):
self.coef_ = np.ones(m) * np.sqrt(EPS) # allow more training
# make a reasonable initial parameter guess
if self._fit_intercept:
# set the intercept as if we had a constant model
const_model = (self.link.link(Y, self.distribution))
if np.isfinite(const_model).sum() > 0:
const_model = np.median(const_model[np.isfinite(const_model)])
self.coef_[0] += const_model
# do our penalties require recomputing cholesky?
chol_pen = np.ravel([np.ravel(p) for p in self._penalties])
chol_pen = any([cp in ['convex', 'concave', 'monotonic_inc',
'monotonic_dec', 'circular']for cp in chol_pen])
P = self._P() # create penalty matrix
# base penalty
S = sp.sparse.diags(np.ones(m) * np.sqrt(EPS)) # improve condition
# S += self._H # add any user-chosen minumum penalty to the diagonal
# if we dont have any constraints, then do cholesky now
if not any(self._constraints) and not chol_pen:
E = self._cholesky(S + P, sparse=False)
min_n_m = np.min([m,n])
Dinv = np.zeros((min_n_m + m, m)).T
for _ in range(self.max_iter):
# recompute cholesky if needed
if any(self._constraints) or chol_pen:
P = self._P()
C = self._C()
E = self._cholesky(S + P + C, sparse=False)
# forward pass
y = deepcopy(Y) # for simplicity
lp = self._linear_predictor(modelmat=modelmat)
mu = self.link.mu(lp, self.distribution)
W = self._W(mu, weights) # create pirls weight matrix
# check for weghts == 0, nan, and update
mask = self._mask(W.diagonal())
y = y[mask] # update
lp = lp[mask] # update
mu = mu[mask] # update
W = sp.sparse.diags(W.diagonal()[mask]) # update
# PIRLS Wood pg 183
pseudo_data = W.dot(self._pseudo_data(y, lp, mu))
# log on-loop-start stats
self._on_loop_start(vars())
WB = W.dot(modelmat[mask,:]) # common matrix product
Q, R = np.linalg.qr(WB.todense())
if not np.isfinite(Q).all() or not np.isfinite(R).all():
raise ValueError('QR decomposition produced NaN or Inf. '\
'Check X data.')
# need to recompute the number of singular values
min_n_m = np.min([m, n, mask.sum()])
Dinv = np.zeros((min_n_m + m, m)).T
# SVD
U, d, Vt = np.linalg.svd(np.vstack([R, E.T]))
svd_mask = d <= (d.max() * np.sqrt(EPS)) # mask out small singular values
np.fill_diagonal(Dinv, d**-1) # invert the singular values
U1 = U[:min_n_m,:] # keep only top portion of U
# update coefficients
B = Vt.T.dot(Dinv).dot(U1.T).dot(Q.T)
coef_new = B.dot(pseudo_data).A.flatten()
diff = np.linalg.norm(self.coef_ - coef_new)/np.linalg.norm(coef_new)
self.coef_ = coef_new # update
# log on-loop-end stats
self._on_loop_end(vars())
# check convergence
if diff < self.tol:
break
# estimate statistics even if not converged
self._estimate_model_statistics(Y, modelmat, inner=None, BW=WB.T, B=B,
weights=weights)
if diff < self.tol:
return
print('did not converge')
return
def _pirls_naive(self, X, y):
"""
Performs naive PIRLS iterations to estimate GAM coefficients
Parameters
---------
X : array-like of shape (n_samples, m_features)
containing input data
y : array-like of shape (n,)
containing target data
Returns
-------
None
"""
modelmat = self._modelmat(X) # build a basis matrix for the GLM
m = modelmat.shape[1]
# initialize GLM coefficients
if not self._is_fitted or len(self.coef_) != sum(self._n_coeffs):
self.coef_ = np.ones(m) * np.sqrt(EPS) # allow more training
P = self._P() # create penalty matrix
P += sp.sparse.diags(np.ones(m) * np.sqrt(EPS)) # improve condition
for _ in range(self.max_iter):
lp = self._linear_predictor(modelmat=modelmat)
mu = self.link.mu(lp, self.distribution)
mask = self._mask(mu)
mu = mu[mask] # update
lp = lp[mask] # update
if self.family == 'binomial':
self.acc.append(self.accuracy(y=y[mask], mu=mu)) # log the training accuracy
self.dev.append(self.deviance_(y=y[mask], mu=mu, scaled=False)) # log the training deviance
weights = self._W(mu)**2 # PIRLS, added square for modularity
pseudo_data = self._pseudo_data(y, lp, mu) # PIRLS
BW = modelmat.T.dot(weights).tocsc() # common matrix product
inner = sp.sparse.linalg.inv(BW.dot(modelmat) + P) # keep for edof
coef_new = inner.dot(BW).dot(pseudo_data).flatten()
diff = np.linalg.norm(self.coef_ - coef_new)/np.linalg.norm(coef_new)
self.diffs.append(diff)
self.coef_ = coef_new # update
# check convergence
if diff < self.tol:
self.edof_ = self._estimate_edof(modelmat, inner, BW)
self.aic_ = self._estimate_AIC(X, y, mu)
self.aicc_ = self._estimate_AICc(X, y, mu)
return
print('did not converge')
def _on_loop_start(self, variables):
"""
performs on-loop-start actions like callbacks
variables contains local namespace variables.
Parameters
---------
variables : dict of available variables
Returns
-------
None
"""
for callback in self.callbacks:
if hasattr(callback, 'on_loop_start'):
self.logs_[str(callback)].append(callback.on_loop_start(**variables))
def _on_loop_end(self, variables):
"""
performs on-loop-end actions like callbacks
variables contains local namespace variables.
Parameters
---------
variables : dict of available variables
Returns
-------
None
"""
for callback in self.callbacks:
if hasattr(callback, 'on_loop_end'):
self.logs_[str(callback)].append(callback.on_loop_end(**variables))
def fit(self, X, y, weights=None):
"""Fit the generalized additive model.
Parameters
----------
X : array-like, shape (n_samples, m_features)
Training vectors, where n_samples is the number of samples
and m_features is the number of features.
y : array-like, shape (n_samples,)
Target values (integers in classification, real numbers in
regression)
For classification, labels must correspond to classes.
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
self : object
Returns fitted GAM object
"""
# validate parameters
self._validate_params()
# validate data
y = check_y(y, self.link, self.distribution)
X = check_X(X)
check_X_y(X, y)
if weights is not None:
weights = check_array(weights, name='sample weights')
check_lengths(y, weights)
else:
weights = np.ones_like(y).astype('f')
# validate data-dependent parameters
self._validate_data_dep_params(X)
# set up logging
if not hasattr(self, 'logs_'):
self.logs_ = defaultdict(list)
# optimize
if self._opt == 0:
self._pirls(X, y, weights)
if self._opt == 1:
self._pirls_naive(X, y)
return self
def deviance_residuals(self, X, y, weights=None, scaled=False):
"""
method to compute the deviance residuals of the model
these are analogous to the residuals of an OLS.
Parameters
----------
X : array-like
input data array of shape (n_saples, m_features)
y : array-like
output data vector of shape (n_samples,)
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
scaled : bool, default: False
whether to scale the deviance by the (estimated) distribution scale
Returns
-------
deviance_residuals : np.array
with shape (n_samples,)
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
y = check_y(y, self.link, self.distribution)
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
check_X_y(X, y)
if weights is not None:
weights = check_array(weights, name='sample weights')
check_lengths(y, weights)
else:
weights = np.ones_like(y).astype('f')
mu = self.predict_mu(X)
sign = np.sign(y-mu)
return sign * self.distribution.deviance(y, mu,
weights=weights,
scaled=scaled) ** 0.5
def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
B=None, weights=None):
"""
method to compute all of the model statistics
results are stored in the 'statistics_' attribute of the model, as a
dictionary keyed by:
- edof: estimated degrees freedom
- scale: distribution scale, if applicable
- cov: coefficient covariances
- AIC: Akaike Information Criterion
- AICc: corrected Akaike Information Criterion
- r2: explained_deviance Pseudo R-squared
- GCV: generailized cross-validation
or
- UBRE: Un-Biased Risk Estimator
Parameters
----------
y : array-like
output data vector of shape (n_samples,)
modelmat : array-like, default: None
contains the spline basis for each feature evaluated at the input
inner : array of intermediate computations from naive optimization
BW : array of intermediate computations from either optimization
B : array of intermediate computations from stable optimization
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
Returns
-------
None
"""
self.statistics_ = {}
lp = self._linear_predictor(modelmat=modelmat)
mu = self.link.mu(lp, self.distribution)
self.statistics_['edof'] = self._estimate_edof(BW=BW, B=B)
# self.edof_ = np.dot(U1, U1.T).trace().A.flatten() # this is wrong?
if not self.distribution._known_scale:
self.distribution.scale = self.distribution.phi(y=y, mu=mu, edof=self.statistics_['edof'], weights=weights)
self.statistics_['scale'] = self.distribution.scale
self.statistics_['cov'] = (B.dot(B.T)).A * self.distribution.scale # parameter covariances. no need to remove a W because we are using W^2. Wood pg 184
self.statistics_['se'] = self.statistics_['cov'].diagonal()**0.5
self.statistics_['AIC']= self._estimate_AIC(y=y, mu=mu, weights=weights)
self.statistics_['AICc'] = self._estimate_AICc(y=y, mu=mu, weights=weights)
self.statistics_['pseudo_r2'] = self._estimate_r2(y=y, mu=mu, weights=weights)
self.statistics_['GCV'], self.statistics_['UBRE'] = self._estimate_GCV_UBRE(modelmat=modelmat, y=y, weights=weights)
self.statistics_['loglikelihood'] = self._loglikelihood(y, mu, weights=weights)
self.statistics_['deviance'] = self.distribution.deviance(y=y, mu=mu, weights=weights).sum()
self.statistics_['n_samples'] = len(y)
def _estimate_edof(self, modelmat=None, inner=None, BW=None, B=None,
limit=50000):
"""
estimate effective degrees of freedom.
computes the only diagonal of the influence matrix and sums.
allows for subsampling when the number of samples is very large.
Parameters
----------
modelmat : array-like, default: None
contains the spline basis for each feature evaluated at the input
inner : array of intermediate computations from naive optimization
BW : array of intermediate computations from either optimization
B : array of intermediate computations from stable optimization
limit : int, default: 50000
number of samples required before subsampling the model matrix.
this requires less computation.
Returns
-------
None
"""
size = BW.shape[1] # number of samples
max_ = np.min([limit, size]) # since we only compute the diagonal, we can afford larger matrices
if max_ == limit:
# subsampling
scale = np.float(size)/max_
idxs = list(range(size))
np.random.shuffle(idxs)
if B is None:
return scale * modelmat.dot(inner).tocsr()[idxs[:max_]].T.multiply(BW[:,idxs[:max_]]).sum()
else:
return scale * BW[:,idxs[:max_]].multiply(B[:,idxs[:max_]]).sum()
else:
# no subsampling
if B is None:
return modelmat.dot(inner).T.multiply(BW).sum()
else:
return BW.multiply(B).sum()
def _estimate_AIC(self, y, mu, weights=None):
"""
estimate the Akaike Information Criterion
Parameters
----------
y : array-like of shape (n_samples,)
output data vector
mu : array-like of shape (n_samples,)
expected value of the targets given the model and inputs
Returns
-------
None
"""
estimated_scale = not(self.distribution._known_scale) # if we estimate the scale, that adds 2 dof
return -2*self._loglikelihood(y=y, mu=mu, weights=weights) + \
2*self.statistics_['edof'] + 2*estimated_scale
def _estimate_AICc(self, y, mu, weights=None):
"""
estimate the corrected Akaike Information Criterion
relies on the estimated degrees of freedom, which must be computed
before.
Parameters
----------
y : array-like of shape (n_samples,)
output data vector
mu : array-like of shape (n_samples,)
expected value of the targets given the model and inputs
Returns
-------
None
"""
edof = self.statistics_['edof']
if self.statistics_['AIC'] is None:
self.statistics_['AIC'] = self._estimate_AIC(y, mu, weights)
return self.statistics_['AIC'] + 2*(edof + 1)*(edof + 2)/(y.shape[0] - edof -2)
def _estimate_r2(self, X=None, y=None, mu=None, weights=None):
"""
estimate some pseudo R^2 values
currently only computes explained deviance.
results are stored
Parameters
----------
y : array-like of shape (n_samples,)
output data vector
mu : array-like of shape (n_samples,)
expected value of the targets given the model and inputs
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
None
"""
if mu is None:
mu = self.predict_mu_(X=X)
if weights is None:
weights = np.ones_like(y)
null_mu = y.mean() * np.ones_like(y)
null_d = self.distribution.deviance(y=y, mu=null_mu, weights=weights)
full_d = self.distribution.deviance(y=y, mu=mu, weights=weights)
null_ll = self._loglikelihood(y=y, mu=null_mu, weights=weights)
full_ll = self._loglikelihood(y=y, mu=mu, weights=weights)
r2 = OrderedDict()
r2['explained_deviance'] = 1. - full_d.sum()/null_d.sum()
r2['McFadden'] = 1. - full_ll/null_ll
r2['McFadden_adj'] = 1. - (full_ll - self.statistics_['edof'])/null_ll
return r2
def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
add_scale=True, weights=None):
"""
Generalized Cross Validation and Un-Biased Risk Estimator.
UBRE is used when the scale parameter is known,
like Poisson and Binomial families.
Parameters
----------
y : array-like of shape (n_samples,)
output data vector
modelmat : array-like, default: None
contains the spline basis for each feature evaluated at the input
gamma : float, default: 1.4
serves as a weighting to increase the impact of the influence matrix
on the score
add_scale : boolean, default: True
UBRE score can be negative because the distribution scale
is subtracted. to keep things positive we can add the scale back.
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
score : float
Either GCV or UBRE, depending on if the scale parameter is known.
Notes
-----
Sometimes the GCV or UBRE selected model is deemed to be too wiggly,
and a smoother model is desired. One way to achieve this, in a
systematic way, is to increase the amount that each model effective
degree of freedom counts, in the GCV or UBRE score, by a factor γ ≥ 1
see Wood 2006 pg. 177-182, 220 for more details.
"""
if gamma < 1:
raise ValueError('gamma scaling should be greater than 1, '\
'but found gamma = {}',format(gamma))
if modelmat is None:
modelmat = self._modelmat(X)
if weights is None:
weights = np.ones_like(y)
lp = self._linear_predictor(modelmat=modelmat)
mu = self.link.mu(lp, self.distribution)
n = y.shape[0]
edof = self.statistics_['edof']
GCV = None
UBRE = None
dev = self.distribution.deviance(mu=mu, y=y, scaled=False, weights=weights).sum()
if self.distribution._known_scale:
# scale is known, use UBRE
scale = self.distribution.scale
UBRE = 1./n * dev - (~add_scale)*(scale) + 2.*gamma/n * edof * scale
else:
# scale unkown, use GCV
GCV = (n * dev) / (n - gamma * edof)**2
return (GCV, UBRE)
def confidence_intervals(self, X, width=.95, quantiles=None):
"""
estimate confidence intervals for the model.
Parameters
----------
X : array-like of shape (n_samples, m_features)
input data matrix
width : float on [0,1], default: 0.95
quantiles : array-like of floats in [0, 1], default: None
instead of specifying the prediciton width, one can specify the
quantiles. so width=.95 is equivalent to quantiles=[.025, .975]
Returns
-------
intervals: np.array of shape (n_samples, 2 or len(quantiles))
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
return self._get_quantiles(X, width, quantiles, prediction=False)
def _get_quantiles(self, X, width, quantiles, modelmat=None, lp=None,
prediction=False, xform=True, feature=-1):
"""
estimate prediction intervals for LinearGAM
Parameters
----------
X : array
input data of shape (n_samples, m_features)
y : array
label data of shape (n_samples,)
width : float on [0,1]
quantiles : array-like of floats in [0, 1]
instead of specifying the prediciton width, one can specify the
quantiles. so width=.95 is equivalent to quantiles=[.025, .975]
modelmat : array of shape
Returns
-------
intervals: np.array of shape (n_samples, 2 or len(quantiles))
NOTES
-----
when the scale parameter is known, then we can proceed with a large
sample approximation to the distribution of the model coefficients
where B_hat ~ Normal(B, cov)
when the scale parameter is unknown, then we have to account for
the distribution of the estimated scale parameter, which is Chi-squared.
since we scale our estimate of B_hat by the sqrt of estimated scale,
we get a t distribution: Normal / sqrt(Chi-squared) ~ t
see Simon Wood section 1.3.2, 1.3.3, 1.5.5, 2.1.5
"""
if quantiles is not None:
quantiles = np.atleast_1d(quantiles)
else:
alpha = (1 - width)/2.
quantiles = [alpha, 1 - alpha]
for quantile in quantiles:
if (quantile > 1) or (quantile < 0):
raise ValueError('quantiles must be in [0, 1], but found {}'\
.format(quantiles))
if modelmat is None:
modelmat = self._modelmat(X, feature=feature)
if lp is None:
lp = self._linear_predictor(modelmat=modelmat, feature=feature)
idxs = self._select_feature(feature)
cov = self.statistics_['cov'][idxs][:,idxs]
var = (modelmat.dot(cov) * modelmat.todense().A).sum(axis=1)
if prediction:
var += self.distribution.scale
lines = []
for quantile in quantiles:
if self.distribution._known_scale:
q = sp.stats.norm.ppf(quantile)
else:
q = sp.stats.t.ppf(quantile, df=self.statistics_['n_samples'] -
self.statistics_['edof'])
lines.append(lp + q * var**0.5)
lines = np.vstack(lines).T
if xform:
lines = self.link.mu(lines, self.distribution)
return lines
def _select_feature(self, feature):
"""
tool for indexing by feature function.
many coefficients and parameters are organized by feature.
this tool returns all of the indices for a given feature.
GAM intercept is considered the 0th feature.
Parameters
----------
feature : int
feature to select from the data.
when fit_intercept=True, 0 corresponds to the intercept
when feature=-1, all features are selected
Returns
-------
np.array
indices into self.coef_ corresponding to the chosen feature
"""
if feature >= len(self._n_coeffs) or feature < -1:
raise ValueError('feature {} out of range for X with shape {}'\
.format(feature, X.shape))
if feature == -1:
# special case for selecting all features
return np.arange(np.sum(self._n_coeffs), dtype=int)
a = np.sum(self._n_coeffs[:feature])
b = np.sum(self._n_coeffs[feature])
return np.arange(a, a+b, dtype=int)
def partial_dependence(self, X, feature=-1, width=None, quantiles=None):
"""
Computes the feature functions for the GAM
and possibly their confidence intervals.
if both width=None and quantiles=None,
then no confidence intervals are computed
Parameters
----------
X : array
input data of shape (n_samples, m_features)
feature : array-like of ints, default: -1
feature for which to compute the partial dependence functions
if feature == -1, then all features are selected,
excluding the intercept
if feature == 0 and gam.fit_intercept is True, then the intercept's
patial dependence is returned
width : float in [0, 1], default: None
width of the confidence interval
if None, defaults to 0.95
quantiles : array-like of floats in [0, 1], default: None
instead of specifying the prediciton width, one can specify the
quantiles. so width=.95 is equivalent to quantiles=[.025, .975]
if None, defaults to width
Returns
-------
pdeps : np.array of shape (n_samples, len(feature))
conf_intervals : list of length len(feature)
containing np.arrays of shape (n_samples, 2 or len(quantiles))
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
m = len(self._n_coeffs) - self._fit_intercept
X = check_X(X, n_feats=m, edge_knots=self._edge_knots,
dtypes=self._dtype)
p_deps = []
compute_quantiles = (width is not None) or (quantiles is not None)
conf_intervals = []
if feature == -1:
feature = np.arange(m) + self._fit_intercept
# convert to array
feature = np.atleast_1d(feature)
# ensure feature exists
if (feature >= len(self._n_coeffs)).any() or (feature < -1).any():
raise ValueError('feature {} out of range for X with shape {}'\
.format(feature, X.shape))
for i in feature:
modelmat = self._modelmat(X, feature=i)
lp = self._linear_predictor(modelmat=modelmat, feature=i)
p_deps.append(lp)
if compute_quantiles:
conf_intervals.append(self._get_quantiles(X, width=width,
quantiles=quantiles,
modelmat=modelmat,
lp=lp,
feature=i,
xform=False))
pdeps = np.vstack(p_deps).T
if compute_quantiles:
return (pdeps, conf_intervals)
return pdeps
def summary(self):
"""
produce a summary of the model statistics
#TODO including feature significance via F-Test
Parameters
----------
None
Returns
-------
None
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
keys = ['edof', 'AIC', 'AICc']
if self.distribution._known_scale:
keys.append('UBRE')
else:
keys.append('GCV')
keys.append('loglikelihood')
keys.append('deviance')
keys.append('scale')
sub_data = OrderedDict([[k, self.statistics_[k]] for k in keys])
print_data(sub_data, title='Model Statistics')
print('')
print_data(self.statistics_['pseudo_r2'], title='Pseudo-R^2')
def gridsearch(self, X, y, weights=None, return_scores=False,
keep_best=True, objective='auto', **param_grids):
"""
performs a grid search over a space of parameters for a given objective
NOTE:
gridsearch method is lazy and will not remove useless combinations
from the search space, eg.
n_splines=np.arange(5,10), fit_splines=[True, False]
will result in 10 loops, of which 5 are equivalent because
even though fit_splines==False
it is not recommended to search over a grid that alternates
between known scales and unknown scales, as the scores of the
cadidate models will not be comparable.
Parameters
----------
X : array
input data of shape (n_samples, m_features)
y : array
label data of shape (n_samples,)
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
return_scores : boolean, default False
whether to return the hyperpamaters
and score for each element in the grid
keep_best : boolean
whether to keep the best GAM as self.
default: True
objective : string, default: 'auto'
metric to optimize. must be in ['AIC', 'AICc', 'GCV', 'UBRE', 'auto']
if 'auto', then grid search will optimize GCV for models with unknown
scale and UBRE for models with known scale.
**kwargs : dict, default {'lam': np.logspace(-3, 3, 11)}
pairs of parameters and iterables of floats, or
parameters and iterables of iterables of floats.
if iterable of iterables of floats, the outer iterable must have
length m_features.
the method will make a grid of all the combinations of the parameters
and fit a GAM to each combination.
Returns
-------
if return_scores == True:
model_scores : dict
Contains each fitted model as keys and corresponding
objective scores as values
else:
self, ie possibly the newly fitted model
"""
# check if model fitted
if not self._is_fitted:
self._validate_params()
y = check_y(y, self.link, self.distribution)
X = check_X(X)
check_X_y(X, y)
if weights is not None:
weights = check_array(weights, name='sample weights')
check_lengths(y, weights)
else:
weights = np.ones_like(y).astype('f')
# validate objective
if objective not in ['auto', 'GCV', 'UBRE', 'AIC', 'AICc']:
raise ValueError("objective mut be in "\
"['auto', 'GCV', 'UBRE', 'AIC', 'AICc'], '\
'but found objective = {}".format(objective))
# check objective
if self.distribution._known_scale:
if objective == 'GCV':
raise ValueError('GCV should be used for models with'\
'unknown scale')
if objective == 'auto':
objective = 'UBRE'
else:
if objective == 'UBRE':
raise ValueError('UBRE should be used for models with '\
'known scale')
if objective == 'auto':
objective = 'GCV'
# if no params, then set up default gridsearch
if not bool(param_grids):
param_grids['lam'] = np.logspace(-3, 3, 11)
# validate params
admissible_params = self.get_params()
params = []
grids = []
for param, grid in list(param_grids.items()):
if param not in (admissible_params):
raise ValueError('unknown parameter: {}'.format(param))
if not (isiterable(grid) and (len(grid) > 1)): \
raise ValueError('{} grid must either be iterable of '
'iterables, or an iterable of lengnth > 1, '\
'but found {}'.format(param, grid))
# prepare grid
if any(isiterable(g) for g in grid):
# cast to np.array
grid = [np.atleast_1d(g) for g in grid]
# set grid to combination of all grids
grid = combine(*grid)
# save param name and grid
params.append(param)
grids.append(grid)
# build a list of dicts of candidate model params
param_grid_list = []
for candidate in combine(*grids):
param_grid_list.append(dict(zip(params,candidate)))
# set up data collection
best_model = None # keep the best model
best_score = np.inf
scores = []
models = []
# check if our model has been fitted already and store it
if self._is_fitted:
models.append(self)
scores.append(self.statistics_[objective])
# our model is currently the best
best_model = models[-1]
best_score = scores[-1]
# loop through candidate model params
pbar = ProgressBar()
for param_grid in pbar(param_grid_list):
# define new model
gam = deepcopy(self)
gam.set_params(self.get_params())
gam.set_params(**param_grid)
# warm start with parameters from previous build
if models:
coef = models[-1].coef_
gam.set_params(coef_=coef, force=True)
try:
# try fitting
gam.fit(X, y, weights)
except ValueError as error:
msg = str(error) + '\non model:\n' + str(gam)
msg += '\nskipping...\n'
warnings.warn(msg)
continue
# record results
models.append(gam)
scores.append(gam.statistics_[objective])
# track best
if scores[-1] < best_score:
best_model = models[-1]
best_score = scores[-1]
# problems
if len(models) == 0:
msg = 'No models were fitted.'
warnings.warn(msg)
return self
# copy over the best
if keep_best:
self.set_params(deep=True,
force=True,
**best_model.get_params(deep=True))
if return_scores:
return OrderedDict(zip(models, scores))
else:
return self
def sample(self, X, y, quantity='y', sample_at_X=None,
weights=None, n_draws=100, n_bootstraps=1, objective='auto'):
"""Simulate from the posterior of the coefficients and smoothing params.
Samples are drawn from the posterior of the coefficients and smoothing
parameters given the response in an approximate way. The GAM must
already be fitted before calling this method; if the model has not
been fitted, then an exception is raised. Moreover, it is recommended
that the model and its hyperparameters be chosen with `gridsearch`
(with the parameter `keep_best=True`) before calling `sample`, so that
the result of that gridsearch can be used to generate useful response
data and so that the model's coefficients (and their covariance matrix)
can be used as the first bootstrap sample.
These samples are drawn as follows. Details are in the reference below.
1. `n_bootstraps` many "bootstrap samples" of the response (`y`) are
simulated by drawing random samples from the model's distribution
evaluated at the expected values (`mu`) for each sample in `X`.
2. A copy of the model is fitted to each of those bootstrap samples of
the response. The result is an approximation of the distribution over
the smoothing parameter `lam` given the response data `y`.
3. Samples of the coefficients are simulated from a multivariate normal
using the bootstrap samples of the coefficients and their covariance
matrices.
NOTE: A `gridsearch` is done `n_bootstraps` many times, so keep
`n_bootstraps` small. Make `n_bootstraps < n_draws` to take advantage
of the expensive bootstrap samples of the smoothing parameters.
NOTE: For now, the grid of `lam` values is the default of `gridsearch`.
Until randomized grid search is implemented, it is not worth setting
`n_bootstraps` to a value greater than one because the smoothing
parameters will be identical in each bootstrap sample.
Parameters
-----------
X : array of shape (n_samples, m_features)
empirical input data
y : array of shape (n_samples,)
empirical response vector
quantity : {'y', 'coef', 'mu'}, default: 'y'
What quantity to return pseudorandom samples of.
If `sample_at_X` is not None and `quantity` is either `'y'` or
`'mu'`, then samples are drawn at the values of `X` specified in
`sample_at_X`.
sample_at_X : array of shape (n_samples_to_simulate, m_features) or
None, default: None
Input data at which to draw new samples.
Only applies for `quantity` equal to `'y'` or to `'mu`'.
If `None`, then `sample_at_X` is replaced by `X`.
weights : np.array of shape (n_samples,)
sample weights
n_draws : positive int, default: 100
The number of samples to draw from the posterior distribution of
the coefficients and smoothing parameters
n_bootstraps : positive int, default: 1
The number of bootstrap samples to draw from simulations of the
response (from the already fitted model) to estimate the
distribution of the smoothing parameters given the response data.
If `n_bootstraps` is 1, then only the already fitted model's
smoothing parameter is used, and the distribution over the
smoothing parameters is not estimated using bootstrap sampling.
objective : string, default: 'auto'
metric to optimize in grid search. must be in
['AIC', 'AICc', 'GCV', 'UBRE', 'auto']
if 'auto', then grid search will optimize GCV for models with
unknown scale and UBRE for models with known scale.
Returns
-------
draws : 2D array of length n_draws
Simulations of the given `quantity` using samples from the
posterior distribution of the coefficients and smoothing parameter
given the response data. Each row is a pseudorandom sample.
If `quantity == 'coef'`, then the number of columns of `draws` is
the number of coefficients (`len(self.coef_)`).
Otherwise, the number of columns of `draws` is the number of
rows of `sample_at_X` if `sample_at_X` is not `None` or else
the number of rows of `X`.
References
----------
Simon N. Wood, 2006. Generalized Additive Models: an introduction with
R. Section 4.9.3 (pages 198–199) and Section 5.4.2 (page 256–257).
"""
if quantity not in {'mu', 'coef', 'y'}:
raise ValueError("`quantity` must be one of 'mu', 'coef', 'y';"
" got {}".format(quantity))
coef_draws = self._sample_coef(
X, y, weights=weights, n_draws=n_draws,
n_bootstraps=n_bootstraps, objective=objective)
if quantity == 'coef':
return coef_draws
if sample_at_X is None:
sample_at_X = X
linear_predictor = self._modelmat(sample_at_X).dot(coef_draws.T)
mu_shape_n_draws_by_n_samples = self.link.mu(
linear_predictor, self.distribution).T
if quantity == 'mu':
return mu_shape_n_draws_by_n_samples
else:
return self.distribution.sample(mu_shape_n_draws_by_n_samples)
def _sample_coef(self, X, y, weights=None, n_draws=100, n_bootstraps=1,
objective='auto'):
"""Simulate from the posterior of the coefficients.
NOTE: A `gridsearch` is done `n_bootstraps` many times, so keep
`n_bootstraps` small. Make `n_bootstraps < n_draws` to take advantage
of the expensive bootstrap samples of the smoothing parameters.
For now, the grid of `lam` values is the default of `gridsearch`.
Parameters
-----------
X : array of shape (n_samples, m_features)
input data
y : array of shape (n_samples,)
response vector
weights : np.array of shape (n_samples,)
sample weights
n_draws : positive int, default: 100
The number of samples to draw from the posterior distribution of
the coefficients and smoothing parameters
n_bootstraps : positive int, default: 1
The number of bootstrap samples to draw from simulations of the
response (from the already fitted model) to estimate the
distribution of the smoothing parameters given the response data.
If `n_bootstraps` is 1, then only the already fitted model's
smoothing parameters is used.
objective : string, default: 'auto'
metric to optimize in grid search. must be in
['AIC', 'AICc', 'GCV', 'UBRE', 'auto']
if 'auto', then grid search will optimize GCV for models with
unknown scale and UBRE for models with known scale.
Returns
-------
coef_samples : array of shape (n_draws, n_samples)
Approximate simulations of the coefficients drawn from the
posterior distribution of the coefficients and smoothing
parameters given the response data
References
----------
Simon N. Wood, 2006. Generalized Additive Models: an introduction with
R. Section 4.9.3 (pages 198–199) and Section 5.4.2 (page 256–257).
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
if n_bootstraps < 1:
raise ValueError('n_bootstraps must be >= 1;'
' got {}'.format(n_bootstraps))
if n_draws < 1:
raise ValueError('n_draws must be >= 1;'
' got {}'.format(n_draws))
coef_bootstraps, cov_bootstraps = (
self._bootstrap_samples_of_smoothing(X, y, weights=weights,
n_bootstraps=n_bootstraps,
objective=objective))
coef_draws = self._simulate_coef_from_bootstraps(
n_draws, coef_bootstraps, cov_bootstraps)
return coef_draws
def _bootstrap_samples_of_smoothing(self, X, y, weights=None,
n_bootstraps=1, objective='auto'):
"""Sample the smoothing parameters using simulated response data."""
mu = self.predict_mu(X) # Wood pg. 198 step 1
coef_bootstraps = [self.coef_]
cov_bootstraps = [
_make_positive_semi_definite(self.statistics_['cov'])]
for _ in range(n_bootstraps - 1): # Wood pg. 198 step 2
# generate response data from fitted model (Wood pg. 198 step 3)
y_bootstrap = self.distribution.sample(mu)
# fit smoothing parameters on the bootstrap data
# (Wood pg. 198 step 4)
# TODO: Either enable randomized searches over hyperparameters
# (like in sklearn's RandomizedSearchCV), or draw enough samples of
# `lam` so that each of these bootstrap samples get different
# values of `lam`. Right now, each bootstrap sample uses the exact
# same grid of values for `lam`, so it is not worth setting
# `n_bootstraps > 1`.
gam = deepcopy(self)
gam.set_params(self.get_params())
gam.gridsearch(X, y_bootstrap, weights=weights,
objective=objective)
lam = gam.lam
# fit coefficients on the original data given the smoothing params
# (Wood pg. 199 step 5)
gam = deepcopy(self)
gam.set_params(self.get_params())
gam.lam = lam
gam.fit(X, y, weights=weights)
coef_bootstraps.append(gam.coef_)
cov = _make_positive_semi_definite(gam.statistics_['cov'])
cov_bootstraps.append(cov)
return coef_bootstraps, cov_bootstraps
def _simulate_coef_from_bootstraps(
self, n_draws, coef_bootstraps, cov_bootstraps):
"""Simulate coefficients using bootstrap samples."""
# Sample indices uniformly from {0, ..., n_bootstraps - 1}
# (Wood pg. 199 step 6)
random_bootstrap_indices = np.random.choice(
np.arange(len(coef_bootstraps)), size=n_draws, replace=True)
# Simulate `n_draws` many random coefficient vectors from a
# multivariate normal distribution with mean and covariance given by
# the bootstrap samples (indexed by `random_bootstrap_indices`) of
# `coef_bootstraps` and `cov_bootstraps`. Because it's faster to draw
# many samples from a certain distribution all at once, we make a dict
# mapping bootstrap indices to draw indices and use the `size`
# parameter of `np.random.multivariate_normal` to sample the draws
# needed from that bootstrap sample all at once.
bootstrap_index_to_draw_indices = defaultdict(list)
for draw_index, bootstrap_index in enumerate(random_bootstrap_indices):
bootstrap_index_to_draw_indices[bootstrap_index].append(draw_index)
coef_draws = np.empty((n_draws, len(self.coef_)))
for bootstrap, draw_indices in bootstrap_index_to_draw_indices.items():
coef_draws[[draw_indices]] = np.random.multivariate_normal(
coef_bootstraps[bootstrap], cov_bootstraps[bootstrap],
size=len(draw_indices))
return coef_draws
class LinearGAM(GAM):
"""Linear GAM
This is a GAM with a Normal error distribution, and an identity link.
Parameters
----------
callbacks : list of strings or list of CallBack objects,
default: ['deviance', 'diffs']
Names of callback objects to call during the optimization loop.
constraints : str or callable, or iterable of str or callable,
default: None
Names of constraint functions to call during the optimization loop.
Must be in {'convex', 'concave', 'monotonic_inc', 'monotonic_dec',
'circular', 'none'}
If None, then the model will apply no constraints.
If only one str or callable is specified, then is it copied for all
features.
dtype : str in {'auto', 'numerical', 'categorical'},
or list of str, default: 'auto'
String describing the data-type of each feature.
'numerical' is used for continuous-valued data-types,
like in regression.
'categorical' is used for discrete-valued data-types,
like in classification.
If only one str is specified, then is is copied for all features.
lam : float or iterable of floats > 0, default: 0.6
Smoothing strength; must be a positive float, or one positive float
per feature.
Larger values enforce stronger smoothing.
If only one float is specified, then it is copied for all features.
fit_intercept : bool, default: True
Specifies if a constant (a.k.a. bias or intercept) should be
added to the decision function.
NOTE: the intercept receives no smoothing penalty.
fit_linear : bool or iterable of bools, default: False
Specifies if a linear term should be added to any of the feature
functions. Useful for including pre-defined feature transformations
in the model.
If only one bool is specified, then it is copied for all features.
NOTE: Many constraints are incompatible with an additional linear fit.
eg. if a non-zero linear function is added to a periodic spline
function, it will cease to be periodic.
this is also possible for a monotonic spline function.
fit_splines : bool or iterable of bools, default: True
Specifies if a smoother should be added to any of the feature
functions. Useful for defining feature transformations a-priori
that should not have splines fitted to them.
If only one bool is specified, then it is copied for all features.
NOTE: fit_splines supercedes n_splines.
ie. if n_splines > 0 and fit_splines = False, no splines will be fitted.
max_iter : int, default: 100
Maximum number of iterations allowed for the solver to converge.
penalties : str or callable, or iterable of str or callable,
default: 'auto'
Type of penalty to use for each feature.
penalty should be in {'auto', 'none', 'derivative', 'l2', }
If 'auto', then the model will use 2nd derivative smoothing for features
of dtype 'numerical', and L2 smoothing for features of dtype
'categorical'.
If only one str or callable is specified, then is it copied for all
features.
n_splines : int, or iterable of ints, default: 25
Number of splines to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features.
Note: this value is set to 0 if fit_splines is False
scale : float or None, default: None
scale of the distribution, if known a-priori.
if None, scale is estimated.
spline_order : int, or iterable of ints, default: 3
Order of spline to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features
Note: if a feature is of type categorical, spline_order will be set to 0.
tol : float, default: 1e-4
Tolerance for stopping criteria.
Attributes
----------
coef_ : array, shape (n_classes, m_features)
Coefficient of the features in the decision function.
If fit_intercept is True, then self.coef_[0] will contain the bias.
statistics_ : dict
Dictionary containing model statistics like GCV/UBRE scores, AIC/c,
parameter covariances, estimated degrees of freedom, etc.
logs_ : dict
Dictionary containing the outputs of any callbacks at each
optimization loop.
The logs are structured as `{callback: [...]}`
References
----------
Simon N. Wood, 2006
Generalized Additive Models: an introduction with R
Hastie, Tibshirani, Friedman
The Elements of Statistical Learning
http://statweb.stanford.edu/~tibs/ElemStatLearn/printings/ESLII_print10.pdf
Paul Eilers & Brian Marx, 2015
International Biometric Society: A Crash Course on P-splines
http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
"""
def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
penalties='auto', dtype='auto', tol=1e-4, scale=None,
callbacks=['deviance', 'diffs'],
fit_intercept=True, fit_linear=False, fit_splines=True,
constraints=None):
self.scale = scale
super(LinearGAM, self).__init__(distribution=NormalDist(scale=self.scale),
link='identity',
lam=lam,
dtype=dtype,
max_iter=max_iter,
n_splines=n_splines,
spline_order=spline_order,
penalties=penalties,
tol=tol,
callbacks=callbacks,
fit_intercept=fit_intercept,
fit_linear=fit_linear,
fit_splines=fit_splines,
constraints=constraints)
self._exclude += ['distribution', 'link']
def _validate_params(self):
"""
method to sanitize model parameters
Parameters
---------
None
Returns
-------
None
"""
self.distribution = NormalDist(scale=self.scale)
super(LinearGAM, self)._validate_params()
def prediction_intervals(self, X, width=.95, quantiles=None):
"""
estimate prediction intervals for LinearGAM
Parameters
----------
X : array-like of shape (n_samples, m_features)
input data matrix
width : float on [0,1], default: 0.95
quantiles : array-like of floats in [0, 1], default: None
instead of specifying the prediciton width, one can specify the
quantiles. so width=.95 is equivalent to quantiles=[.025, .975]
Returns
-------
intervals: np.array of shape (n_samples, 2 or len(quantiles))
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
return self._get_quantiles(X, width, quantiles, prediction=True)
class LogisticGAM(GAM):
"""Logistic GAM
This is a GAM with a Binomial error distribution, and a logit link.
Parameters
----------
callbacks : list of strings or list of CallBack objects,
default: ['deviance', 'diffs']
Names of callback objects to call during the optimization loop.
constraints : str or callable, or iterable of str or callable,
default: None
Names of constraint functions to call during the optimization loop.
Must be in {'convex', 'concave', 'monotonic_inc', 'monotonic_dec',
'circular', 'none'}
If None, then the model will apply no constraints.
If only one str or callable is specified, then is it copied for all
features.
dtype : str in {'auto', 'numerical', 'categorical'},
or list of str, default: 'auto'
String describing the data-type of each feature.
'numerical' is used for continuous-valued data-types,
like in regression.
'categorical' is used for discrete-valued data-types,
like in classification.
If only one str is specified, then is is copied for all features.
lam : float or iterable of floats > 0, default: 0.6
Smoothing strength; must be a positive float, or one positive float
per feature.
Larger values enforce stronger smoothing.
If only one float is specified, then it is copied for all features.
fit_intercept : bool, default: True
Specifies if a constant (a.k.a. bias or intercept) should be
added to the decision function.
NOTE: the intercept receives no smoothing penalty.
fit_linear : bool or iterable of bools, default: False
Specifies if a linear term should be added to any of the feature
functions. Useful for including pre-defined feature transformations
in the model.
If only one bool is specified, then it is copied for all features.
NOTE: Many constraints are incompatible with an additional linear fit.
eg. if a non-zero linear function is added to a periodic spline
function, it will cease to be periodic.
this is also possible for a monotonic spline function.
fit_splines : bool or iterable of bools, default: True
Specifies if a smoother should be added to any of the feature
functions. Useful for defining feature transformations a-priori
that should not have splines fitted to them.
If only one bool is specified, then it is copied for all features.
NOTE: fit_splines supercedes n_splines.
ie. if n_splines > 0 and fit_splines = False, no splines will be fitted.
max_iter : int, default: 100
Maximum number of iterations allowed for the solver to converge.
penalties : str or callable, or iterable of str or callable,
default: 'auto'
Type of penalty to use for each feature.
penalty should be in {'auto', 'none', 'derivative', 'l2', }
If 'auto', then the model will use 2nd derivative smoothing for features
of dtype 'numerical', and L2 smoothing for features of dtype
'categorical'.
If only one str or callable is specified, then is it copied for all
features.
n_splines : int, or iterable of ints, default: 25
Number of splines to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features.
Note: this value is set to 0 if fit_splines is False
spline_order : int, or iterable of ints, default: 3
Order of spline to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features
Note: if a feature is of type categorical, spline_order will be set to 0.
tol : float, default: 1e-4
Tolerance for stopping criteria.
Attributes
----------
coef_ : array, shape (n_classes, m_features)
Coefficient of the features in the decision function.
If fit_intercept is True, then self.coef_[0] will contain the bias.
statistics_ : dict
Dictionary containing model statistics like GCV/UBRE scores, AIC/c,
parameter covariances, estimated degrees of freedom, etc.
logs_ : dict
Dictionary containing the outputs of any callbacks at each
optimization loop.
The logs are structured as `{callback: [...]}`
References
----------
Simon N. Wood, 2006
Generalized Additive Models: an introduction with R
Hastie, Tibshirani, Friedman
The Elements of Statistical Learning
http://statweb.stanford.edu/~tibs/ElemStatLearn/printings/ESLII_print10.pdf
Paul Eilers & Brian Marx, 2015
International Biometric Society: A Crash Course on P-splines
http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
"""
def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
penalties='auto', dtype='auto', tol=1e-4,
callbacks=['deviance', 'diffs', 'accuracy'],
fit_intercept=True, fit_linear=False, fit_splines=True,
constraints=None):
# call super
super(LogisticGAM, self).__init__(distribution='binomial',
link='logit',
lam=lam,
dtype=dtype,
max_iter=max_iter,
n_splines=n_splines,
spline_order=spline_order,
penalties=penalties,
tol=tol,
callbacks=callbacks,
fit_intercept=fit_intercept,
fit_linear=fit_linear,
fit_splines=fit_splines,
constraints=constraints)
# ignore any variables
self._exclude += ['distribution', 'link']
def accuracy(self, X=None, y=None, mu=None):
"""
computes the accuracy of the LogisticGAM
Parameters
----------
note: X or mu must be defined. defaults to mu
X : array-like of shape (n_samples, m_features), default: None
containing input data
y : array-like of shape (n,)
containing target data
mu : array-like of shape (n_samples,), default: None
expected value of the targets given the model and inputs
Returns
-------
float in [0, 1]
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
y = check_y(y, self.link, self.distribution)
if X is not None:
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
if mu is None:
mu = self.predict_mu(X)
check_X_y(mu, y)
return ((mu > 0.5).astype(int) == y).mean()
def predict(self, X):
"""
preduct binary targets given model and input X
Parameters
---------
X : array-like of shape (n_samples, m_features), default: None
containing the input dataset
Returns
-------
y : np.array of shape (n_samples,)
containing binary targets under the model
"""
return self.predict_mu(X) > 0.5
def predict_proba(self, X):
"""
preduct targets given model and input X
Parameters
---------
X : array-like of shape (n_samples, m_features), default: None
containing the input dataset
Returns
-------
y : np.array of shape (n_samples,)
containing expected values under the model
"""
return self.predict_mu(X)
class PoissonGAM(GAM):
"""Poisson GAM
This is a GAM with a Poisson error distribution, and a log link.
Parameters
----------
callbacks : list of strings or list of CallBack objects,
default: ['deviance', 'diffs']
Names of callback objects to call during the optimization loop.
constraints : str or callable, or iterable of str or callable,
default: None
Names of constraint functions to call during the optimization loop.
Must be in {'convex', 'concave', 'monotonic_inc', 'monotonic_dec',
'circular', 'none'}
If None, then the model will apply no constraints.
If only one str or callable is specified, then is it copied for all
features.
dtype : str in {'auto', 'numerical', 'categorical'},
or list of str, default: 'auto'
String describing the data-type of each feature.
'numerical' is used for continuous-valued data-types,
like in regression.
'categorical' is used for discrete-valued data-types,
like in classification.
If only one str is specified, then is is copied for all features.
lam : float or iterable of floats > 0, default: 0.6
Smoothing strength; must be a positive float, or one positive float
per feature.
Larger values enforce stronger smoothing.
If only one float is specified, then it is copied for all features.
fit_intercept : bool, default: True
Specifies if a constant (a.k.a. bias or intercept) should be
added to the decision function.
NOTE: the intercept receives no smoothing penalty.
fit_linear : bool or iterable of bools, default: False
Specifies if a linear term should be added to any of the feature
functions. Useful for including pre-defined feature transformations
in the model.
If only one bool is specified, then it is copied for all features.
NOTE: Many constraints are incompatible with an additional linear fit.
eg. if a non-zero linear function is added to a periodic spline
function, it will cease to be periodic.
this is also possible for a monotonic spline function.
fit_splines : bool or iterable of bools, default: True
Specifies if a smoother should be added to any of the feature
functions. Useful for defining feature transformations a-priori
that should not have splines fitted to them.
If only one bool is specified, then it is copied for all features.
NOTE: fit_splines supercedes n_splines.
ie. if n_splines > 0 and fit_splines = False, no splines will be fitted.
max_iter : int, default: 100
Maximum number of iterations allowed for the solver to converge.
penalties : str or callable, or iterable of str or callable,
default: 'auto'
Type of penalty to use for each feature.
penalty should be in {'auto', 'none', 'derivative', 'l2', }
If 'auto', then the model will use 2nd derivative smoothing for features
of dtype 'numerical', and L2 smoothing for features of dtype
'categorical'.
If only one str or callable is specified, then is it copied for all
features.
n_splines : int, or iterable of ints, default: 25
Number of splines to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features.
Note: this value is set to 0 if fit_splines is False
spline_order : int, or iterable of ints, default: 3
Order of spline to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features
Note: if a feature is of type categorical, spline_order will be set to 0.
tol : float, default: 1e-4
Tolerance for stopping criteria.
Attributes
----------
coef_ : array, shape (n_classes, m_features)
Coefficient of the features in the decision function.
If fit_intercept is True, then self.coef_[0] will contain the bias.
statistics_ : dict
Dictionary containing model statistics like GCV/UBRE scores, AIC/c,
parameter covariances, estimated degrees of freedom, etc.
logs_ : dict
Dictionary containing the outputs of any callbacks at each
optimization loop.
The logs are structured as `{callback: [...]}`
References
----------
Simon N. Wood, 2006
Generalized Additive Models: an introduction with R
Hastie, Tibshirani, Friedman
The Elements of Statistical Learning
http://statweb.stanford.edu/~tibs/ElemStatLearn/printings/ESLII_print10.pdf
Paul Eilers & Brian Marx, 2015
International Biometric Society: A Crash Course on P-splines
http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
"""
def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
penalties='auto', dtype='auto', tol=1e-4,
callbacks=['deviance', 'diffs'],
fit_intercept=True, fit_linear=False, fit_splines=True,
constraints=None):
# call super
super(PoissonGAM, self).__init__(distribution='poisson',
link='log',
lam=lam,
dtype=dtype,
max_iter=max_iter,
n_splines=n_splines,
spline_order=spline_order,
penalties=penalties,
tol=tol,
callbacks=callbacks,
fit_intercept=fit_intercept,
fit_linear=fit_linear,
fit_splines=fit_splines,
constraints=constraints)
# ignore any variables
self._exclude += ['distribution', 'link']
def _loglikelihood(self, y, mu, weights=None, rescale_y=True):
"""
compute the log-likelihood of the dataset using the current model
Parameters
---------
y : array-like of shape (n,)
containing target values
mu : array-like of shape (n_samples,)
expected value of the targets given the model and inputs
weights : array-like of shape (n,)
containing sample weights
rescale_y : boolean, defaul: True
whether to scale the targets back up by
Returns
-------
log-likelihood : np.array of shape (n,)
containing log-likelihood scores
"""
if rescale_y:
y = y * weights
return np.log(self.distribution.pdf(y=y, mu=mu, weights=weights)).sum()
def loglikelihood(self, X, y, exposure=None, weights=None):
"""
compute the log-likelihood of the dataset using the current model
Parameters
---------
X : array-like of shape (n_samples, m_features)
containing the input dataset
y : array-like of shape (n,)
containing target values
exposure : array-like shape (n_samples,) or None, default: None
containing exposures
if None, defaults to array of ones
weights : array-like of shape (n,)
containing sample weights
Returns
-------
log-likelihood : np.array of shape (n,)
containing log-likelihood scores
"""
y = check_y(y, self.link, self.distribution)
mu = self.predict_mu(X)
if weights is not None:
weights = check_array(weights, name='sample weights')
check_lengths(y, weights)
else:
weights = np.ones_like(y).astype('f')
y, weights = self._exposure_to_weights(y, exposure, weights)
return self._loglikelihood(y, mu, weights=weights, rescale_y=True)
def _exposure_to_weights(self, y, exposure=None, weights=None):
"""simple tool to create a common API
Parameters
----------
y : array-like, shape (n_samples,)
Target values (integers in classification, real numbers in
regression)
For classification, labels must correspond to classes.
exposure : array-like shape (n_samples,) or None, default: None
containing exposures
if None, defaults to array of ones
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
y : y normalized by exposure
weights : array-like shape (n_samples,)
"""
if exposure is not None:
exposure = np.array(exposure).astype('f')
else:
exposure = np.ones_like(y).astype('f')
check_lengths(y, exposure)
# normalize response
y = y / exposure
if weights is not None:
weights = np.array(weights).astype('f')
else:
weights = np.ones_like(y).astype('f')
check_lengths(weights, exposure)
# set exposure as the weight
weights = weights * exposure
return y, weights
def fit(self, X, y, exposure=None, weights=None):
"""Fit the generalized additive model.
Parameters
----------
X : array-like, shape (n_samples, m_features)
Training vectors, where n_samples is the number of samples
and m_features is the number of features.
y : array-like, shape (n_samples,)
Target values (integers in classification, real numbers in
regression)
For classification, labels must correspond to classes.
exposure : array-like shape (n_samples,) or None, default: None
containing exposures
if None, defaults to array of ones
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
Returns
-------
self : object
Returns fitted GAM object
"""
y, weights = self._exposure_to_weights(y, exposure, weights)
return super(PoissonGAM, self).fit(X, y, weights)
def predict(self, X, exposure=None):
"""
preduct expected value of target given model and input X
often this is done via expected value of GAM given input X
Parameters
---------
X : array-like of shape (n_samples, m_features), default: None
containing the input dataset
exposure : array-like shape (n_samples,) or None, default: None
containing exposures
if None, defaults to array of ones
Returns
-------
y : np.array of shape (n_samples,)
containing predicted values under the model
"""
if not self._is_fitted:
raise AttributeError('GAM has not been fitted. Call fit first.')
X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
edge_knots=self._edge_knots, dtypes=self._dtype)
if exposure is not None:
exposure = np.array(exposure).astype('f')
else:
exposure = np.ones(X.shape[0]).astype('f')
check_lengths(X, exposure)
return self.predict_mu(X) * exposure
def gridsearch(self, X, y, exposure=None, weights=None,
return_scores=False, keep_best=True, objective='auto',
**param_grids):
"""
performs a grid search over a space of parameters for a given objective
NOTE:
gridsearch method is lazy and will not remove useless combinations
from the search space, eg.
n_splines=np.arange(5,10), fit_splines=[True, False]
will result in 10 loops, of which 5 are equivalent because
even though fit_splines==False
it is not recommended to search over a grid that alternates
between known scales and unknown scales, as the scores of the
cadidate models will not be comparable.
Parameters
----------
X : array
input data of shape (n_samples, m_features)
y : array
label data of shape (n_samples,)
exposure : array-like shape (n_samples,) or None, default: None
containing exposures
if None, defaults to array of ones
weights : array-like shape (n_samples,) or None, default: None
containing sample weights
if None, defaults to array of ones
return_scores : boolean, default False
whether to return the hyperpamaters
and score for each element in the grid
keep_best : boolean
whether to keep the best GAM as self.
default: True
objective : string, default: 'auto'
metric to optimize. must be in ['AIC', 'AICc', 'GCV', 'UBRE', 'auto']
if 'auto', then grid search will optimize GCV for models with unknown
scale and UBRE for models with known scale.
**kwargs : dict, default {'lam': np.logspace(-3, 3, 11)}
pairs of parameters and iterables of floats, or
parameters and iterables of iterables of floats.
if iterable of iterables of floats, the outer iterable must have
length m_features.
the method will make a grid of all the combinations of the parameters
and fit a GAM to each combination.
Returns
-------
if return_values == True:
model_scores : dict
Contains each fitted model as keys and corresponding
objective scores as values
else:
self, ie possibly the newly fitted model
"""
y, weights = self._exposure_to_weights(y, exposure, weights)
return super(PoissonGAM, self).gridsearch(X, y,
weights=weights,
return_scores=return_scores,
keep_best=keep_best,
objective=objective,
**param_grids)
class GammaGAM(GAM):
"""Gamma GAM
This is a GAM with a Gamma error distribution, and a log link.
NB
Although canonical link function for the Gamma GLM is the inverse link,
this function can create problems for numerical software because it becomes
difficult to enforce the requirement that the mean of the Gamma distribution
be positive. The log link guarantees this.
If you need to use the inverse link function, simply construct a custom GAM:
```
from pygam import GAM
gam = GAM(distribution='gamma', link='inverse')
```
Parameters
----------
callbacks : list of strings or list of CallBack objects,
default: ['deviance', 'diffs']
Names of callback objects to call during the optimization loop.
constraints : str or callable, or iterable of str or callable,
default: None
Names of constraint functions to call during the optimization loop.
Must be in {'convex', 'concave', 'monotonic_inc', 'monotonic_dec',
'circular', 'none'}
If None, then the model will apply no constraints.
If only one str or callable is specified, then is it copied for all
features.
dtype : str in {'auto', 'numerical', 'categorical'},
or list of str, default: 'auto'
String describing the data-type of each feature.
'numerical' is used for continuous-valued data-types,
like in regression.
'categorical' is used for discrete-valued data-types,
like in classification.
If only one str is specified, then is is copied for all features.
lam : float or iterable of floats > 0, default: 0.6
Smoothing strength; must be a positive float, or one positive float
per feature.
Larger values enforce stronger smoothing.
If only one float is specified, then it is copied for all features.
fit_intercept : bool, default: True
Specifies if a constant (a.k.a. bias or intercept) should be
added to the decision function.
NOTE: the intercept receives no smoothing penalty.
fit_linear : bool or iterable of bools, default: False
Specifies if a linear term should be added to any of the feature
functions. Useful for including pre-defined feature transformations
in the model.
If only one bool is specified, then it is copied for all features.
NOTE: Many constraints are incompatible with an additional linear fit.
eg. if a non-zero linear function is added to a periodic spline
function, it will cease to be periodic.
this is also possible for a monotonic spline function.
fit_splines : bool or iterable of bools, default: True
Specifies if a smoother should be added to any of the feature
functions. Useful for defining feature transformations a-priori
that should not have splines fitted to them.
If only one bool is specified, then it is copied for all features.
NOTE: fit_splines supercedes n_splines.
ie. if n_splines > 0 and fit_splines = False, no splines will be fitted.
max_iter : int, default: 100
Maximum number of iterations allowed for the solver to converge.
penalties : str or callable, or iterable of str or callable,
default: 'auto'
Type of penalty to use for each feature.
penalty should be in {'auto', 'none', 'derivative', 'l2', }
If 'auto', then the model will use 2nd derivative smoothing for features
of dtype 'numerical', and L2 smoothing for features of dtype
'categorical'.
If only one str or callable is specified, then is it copied for all
features.
n_splines : int, or iterable of ints, default: 25
Number of splines to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features.
Note: this value is set to 0 if fit_splines is False
scale : float or None, default: None
scale of the distribution, if known a-priori.
if None, scale is estimated.
spline_order : int, or iterable of ints, default: 3
Order of spline to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features
Note: if a feature is of type categorical, spline_order will be set to 0.
tol : float, default: 1e-4
Tolerance for stopping criteria.
Attributes
----------
coef_ : array, shape (n_classes, m_features)
Coefficient of the features in the decision function.
If fit_intercept is True, then self.coef_[0] will contain the bias.
statistics_ : dict
Dictionary containing model statistics like GCV/UBRE scores, AIC/c,
parameter covariances, estimated degrees of freedom, etc.
logs_ : dict
Dictionary containing the outputs of any callbacks at each
optimization loop.
The logs are structured as `{callback: [...]}`
References
----------
Simon N. Wood, 2006
Generalized Additive Models: an introduction with R
Hastie, Tibshirani, Friedman
The Elements of Statistical Learning
http://statweb.stanford.edu/~tibs/ElemStatLearn/printings/ESLII_print10.pdf
Paul Eilers & Brian Marx, 2015
International Biometric Society: A Crash Course on P-splines
http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
"""
def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
penalties='auto', dtype='auto', tol=1e-4, scale=None,
callbacks=['deviance', 'diffs'],
fit_intercept=True, fit_linear=False, fit_splines=True,
constraints=None):
self.scale = scale
super(GammaGAM, self).__init__(distribution=GammaDist(scale=self.scale),
link='log',
lam=lam,
dtype=dtype,
max_iter=max_iter,
n_splines=n_splines,
spline_order=spline_order,
penalties=penalties,
tol=tol,
callbacks=callbacks,
fit_intercept=fit_intercept,
fit_linear=fit_linear,
fit_splines=fit_splines,
constraints=constraints)
self._exclude += ['distribution', 'link']
def _validate_params(self):
"""
method to sanitize model parameters
Parameters
---------
None
Returns
-------
None
"""
self.distribution = GammaDist(scale=self.scale)
super(GammaGAM, self)._validate_params()
class InvGaussGAM(GAM):
"""Inverse Gaussian GAM
This is a GAM with a Inverse Gaussian error distribution, and a log link.
NB
Although canonical link function for the Inverse Gaussian GLM is the inverse squared link,
this function can create problems for numerical software because it becomes
difficult to enforce the requirement that the mean of the Inverse Gaussian distribution
be positive. The log link guarantees this.
If you need to use the inverse squared link function, simply construct a custom GAM:
```
from pygam import GAM
gam = GAM(distribution='inv_gauss', link='inv_squared')
```
Parameters
----------
callbacks : list of strings or list of CallBack objects,
default: ['deviance', 'diffs']
Names of callback objects to call during the optimization loop.
constraints : str or callable, or iterable of str or callable,
default: None
Names of constraint functions to call during the optimization loop.
Must be in {'convex', 'concave', 'monotonic_inc', 'monotonic_dec',
'circular', 'none'}
If None, then the model will apply no constraints.
If only one str or callable is specified, then is it copied for all
features.
dtype : str in {'auto', 'numerical', 'categorical'},
or list of str, default: 'auto'
String describing the data-type of each feature.
'numerical' is used for continuous-valued data-types,
like in regression.
'categorical' is used for discrete-valued data-types,
like in classification.
If only one str is specified, then is is copied for all features.
lam : float or iterable of floats > 0, default: 0.6
Smoothing strength; must be a positive float, or one positive float
per feature.
Larger values enforce stronger smoothing.
If only one float is specified, then it is copied for all features.
fit_intercept : bool, default: True
Specifies if a constant (a.k.a. bias or intercept) should be
added to the decision function.
NOTE: the intercept receives no smoothing penalty.
fit_linear : bool or iterable of bools, default: False
Specifies if a linear term should be added to any of the feature
functions. Useful for including pre-defined feature transformations
in the model.
If only one bool is specified, then it is copied for all features.
NOTE: Many constraints are incompatible with an additional linear fit.
eg. if a non-zero linear function is added to a periodic spline
function, it will cease to be periodic.
this is also possible for a monotonic spline function.
fit_splines : bool or iterable of bools, default: True
Specifies if a smoother should be added to any of the feature
functions. Useful for defining feature transformations a-priori
that should not have splines fitted to them.
If only one bool is specified, then it is copied for all features.
NOTE: fit_splines supercedes n_splines.
ie. if n_splines > 0 and fit_splines = False, no splines will be fitted.
max_iter : int, default: 100
Maximum number of iterations allowed for the solver to converge.
penalties : str or callable, or iterable of str or callable,
default: 'auto'
Type of penalty to use for each feature.
penalty should be in {'auto', 'none', 'derivative', 'l2', }
If 'auto', then the model will use 2nd derivative smoothing for features
of dtype 'numerical', and L2 smoothing for features of dtype
'categorical'.
If only one str or callable is specified, then is it copied for all
features.
n_splines : int, or iterable of ints, default: 25
Number of splines to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features.
Note: this value is set to 0 if fit_splines is False
scale : float or None, default: None
scale of the distribution, if known a-priori.
if None, scale is estimated.
spline_order : int, or iterable of ints, default: 3
Order of spline to use in each feature function; must be non-negative.
If only one int is specified, then it is copied for all features
Note: if a feature is of type categorical, spline_order will be set to 0.
tol : float, default: 1e-4
Tolerance for stopping criteria.
Attributes
----------
coef_ : array, shape (n_classes, m_features)
Coefficient of the features in the decision function.
If fit_intercept is True, then self.coef_[0] will contain the bias.
statistics_ : dict
Dictionary containing model statistics like GCV/UBRE scores, AIC/c,
parameter covariances, estimated degrees of freedom, etc.
logs_ : dict
Dictionary containing the outputs of any callbacks at each
optimization loop.
The logs are structured as `{callback: [...]}`
References
----------
Simon N. Wood, 2006
Generalized Additive Models: an introduction with R
Hastie, Tibshirani, Friedman
The Elements of Statistical Learning
http://statweb.stanford.edu/~tibs/ElemStatLearn/printings/ESLII_print10.pdf
Paul Eilers & Brian Marx, 2015
International Biometric Society: A Crash Course on P-splines
http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
"""
def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
penalties='auto', dtype='auto', tol=1e-4, scale=None,
callbacks=['deviance', 'diffs'],
fit_intercept=True, fit_linear=False, fit_splines=True,
constraints=None):
self.scale = scale
super(InvGaussGAM, self).__init__(distribution=InvGaussDist(scale=self.scale),
link='log',
lam=lam,
dtype=dtype,
max_iter=max_iter,
n_splines=n_splines,
spline_order=spline_order,
penalties=penalties,
tol=tol,
callbacks=callbacks,
fit_intercept=fit_intercept,
fit_linear=fit_linear,
fit_splines=fit_splines,
constraints=constraints)
self._exclude += ['distribution', 'link']
def _validate_params(self):
"""
method to sanitize model parameters
Parameters
---------
None
Returns
-------
None
"""
self.distribution = InvGaussDist(scale=self.scale)
super(InvGaussGAM, self)._validate_params()
PK KzLUhW W pygam/utils.py"""
Pygam utilities
"""
from __future__ import division
from copy import deepcopy
import warnings
import scipy as sp
from scipy import sparse
import numpy as np
from numpy.linalg import LinAlgError
try:
from sksparse.cholmod import cholesky as spcholesky
from sksparse.test_cholmod import CholmodNotPositiveDefiniteError
SKSPIMPORT = True
except ImportError:
SKSPIMPORT = False
class NotPositiveDefiniteError(ValueError):
"""Exception class to raise if a matrix is not positive definite
"""
def cholesky(A, sparse=True):
"""
Choose the best possible cholesky factorizor.
if possible, import the Scikit-Sparse sparse Cholesky method.
Permutes the output L to ensure A = L . L.H
otherwise defaults to numpy's non-sparse version
Parameters
----------
A : array-like
array to decompose
sparse : boolean, default: True
whether to return a sparse array
"""
if SKSPIMPORT:
A = sp.sparse.csc_matrix(A)
F = spcholesky(A)
# permutation matrix P
P = sp.sparse.lil_matrix(A.shape)
p = F.P()
P[np.arange(len(p)), p] = 1
# permute
try:
L = F.L()
L = P.T.dot(L)
except CholmodNotPositiveDefiniteError as e:
raise NotPositiveDefiniteError('Matrix is not positive definite')
if sparse:
return L
return L.todense()
else:
msg = 'Could not import Scikit-Sparse or Suite-Sparse.\n'\
'This will slow down optimization for models with '\
'monotonicity/convexity penalties and many splines.\n'\
'See installation instructions for installing '\
'Scikit-Sparse and Suite-Sparse via Conda.'
warnings.warn(msg)
if sp.sparse.issparse(A):
A = A.todense()
try:
L = np.linalg.cholesky(A)
except LinAlgError as e:
raise NotPositiveDefiniteError('Matrix is not positive definite')
if sparse:
return sp.sparse.csc_matrix(L)
return L
def generate_X_grid(gam, n=500):
"""
tool to create a nice grid of X data if no X data is supplied
array is sorted by feature and uniformly spaced, so the marginal and joint
distributions are likely wrong
Parameters
----------
gam : GAM instance
n : int, default: 500
number of data points to create
Returns
-------
np.array of shape (n, n_features)
"""
X = []
for ek in gam._edge_knots:
X.append(np.linspace(ek[0], ek[-1], num=n))
return np.vstack(X).T
def check_dtype(X, ratio=.95):
"""
tool to identify the data-types of the features in data matrix X.
checks for float and int data-types.
Parameters
----------
X : array of shape (n_samples, n_features)
ratio : float in [0, 1], default: 0.95
minimum ratio of unique values to samples before a feature is considered
categorical.
Returns
-------
dtypes : list of types of length n_features
"""
if X.ndim == 1:
X = X[:,None]
dtypes = []
for feat in X.T:
dtype = feat.dtype.kind
if dtype not in ['f', 'i']:
raise ValueError('Data must be type int or float, '\
'but found type: {}'.format(feat.dtype))
if dtype == 'f':
if not(np.isfinite(feat).all()):
raise ValueError('Data must not contain Inf nor NaN')
# if issubclass(dtype, np.int) or \
# (len(np.unique(feat))/len(feat) < ratio):
if (len(np.unique(feat))/len(feat) < ratio) and \
((np.min(feat)) == 0) and (np.max(feat) == len(np.unique(feat)) - 1):
dtypes.append('categorical')
continue
dtypes.append('numerical')
return dtypes
def make_2d(array):
"""
tiny tool to expand 1D arrays the way i want
Parameters
----------
array : array-like
Returns
-------
np.array of with ndim = 2
"""
array = np.asarray(array)
if array.ndim < 2:
msg = 'Expected 2D input data array, but found {}D. '\
'Expanding to 2D.'.format(array.ndim)
warnings.warn(msg)
array = np.atleast_1d(array)[:,None]
return array
def check_array(array, force_2d=False, n_feats=None, n_dims=None,
min_samples=1, name='Input data'):
"""
tool to perform basic data validation.
called by check_X and check_y.
ensures that data:
- is n_dims dimensional
- contains float-compatible data-types
- has at least min_samples
- has n_feats
- is finite
Parameters
----------
array : array-like
force_2d : boolean, default: False
whether to force a 2d array. Setting to True forces n_dims = 2
n_feats : int, default: None
represents number of features that the array should have.
not enforced if n_feats is None.
n_dims : int default: None
number of dimensions expected in the array
min_samples : int, default: 1
name : str, default: 'Input data'
name to use when referring to the array
Returns
-------
array : validated array
"""
# make array
if force_2d:
array = make_2d(array)
n_dims = 2
else:
array = np.array(array)
# cast to float
dtype = array.dtype
if dtype.kind not in ['i', 'f']:
try:
array = array.astype('float')
except ValueError as e:
raise ValueError('{} must be type int or float, '\
'but found type: {}\n'\
'Try transforming data with a LabelEncoder first.'\
.format(name, dtype.type))
# check finite
if not(np.isfinite(array).all()):
raise ValueError('{} must not contain Inf nor NaN'.format(name))
# check n_dims
if n_dims is not None:
if array.ndim != n_dims:
raise ValueError('{} must have {} dimensions. '\
'found shape {}'.format(name, n_dims, array.shape))
# check n_feats
if n_feats is not None:
m = array.shape[1]
if m != n_feats:
raise ValueError('{} must have {} features, '\
'but found {}'.format(name, n_feats, m))
# minimum samples
n = array.shape[0]
if n < min_samples:
raise ValueError('{} should have at least {} samples, '\
'but found {}'.format(name, min_samples, n))
return array
def check_y(y, link, dist, min_samples=1):
"""
tool to ensure that the targets:
- are in the domain of the link function
- are numerical
- have at least min_samples
- is finite
Parameters
----------
y : array-like
link : Link object
dist : Distribution object
min_samples : int, default: 1
Returns
-------
y : array containing validated y-data
"""
y = np.ravel(y)
y = check_array(y, force_2d=False, min_samples=min_samples, n_dims=1,
name='y data')
warnings.filterwarnings('ignore', 'divide by zero encountered in log')
if np.any(np.isnan(link.link(y, dist))):
raise ValueError('y data is not in domain of {} link function. ' \
'Expected domain: {}, but found {}' \
.format(link, get_link_domain(link, dist),
[float('%.2f'%np.min(y)),
float('%.2f'%np.max(y))]))
warnings.resetwarnings()
return y
def check_X(X, n_feats=None, min_samples=1, edge_knots=None, dtypes=None):
"""
tool to ensure that X:
- is 2 dimensional
- contains float-compatible data-types
- has at least min_samples
- has n_feats
- has caegorical features in the right range
- is finite
Parameters
----------
X : array-like
n_feats : int. default: None
represents number of features that X should have.
not enforced if n_feats is None.
min_samples : int, default: 1
edge_knots : list of arrays, default: None
dtypes : list of strings, default: None
Returns
-------
X : array with ndims == 2 containing validated X-data
"""
X = check_array(X, force_2d=True, n_feats=n_feats, min_samples=min_samples,
name='X data')
if (edge_knots is not None) and (dtypes is not None):
for i, (dt, ek, feat) in enumerate(zip(dtypes, edge_knots, X.T)):
if dt == 'categorical':
min_ = ek[0]
max_ = ek[-1]
if (np.unique(feat) < min_).any() or \
(np.unique(feat) > max_).any():
min_ += .5
max_ -= 0.5
feat_min = feat.min()
feat_max = feat.max()
raise ValueError('X data is out of domain for categorical '\
'feature {}. Expected data in [{}, {}], '\
'but found data in [{}, {}]'\
.format(i, min_, max_, feat_min, feat_max))
return X
def check_X_y(X, y):
"""
tool to ensure input and output data have the same number of samples
Parameters
----------
X : array-like
y : array-like
Returns
-------
None
"""
if len(X) != len(y):
raise ValueError('Inconsistent input and output data shapes. '\
'found X: {} and y: {}'.format(X.shape, y.shape))
def check_lengths(*arrays):
"""
tool to ensure input and output data have the same number of samples
Parameters
----------
*arrays : iterable of arrays to be checked
Returns
-------
None
"""
lengths = [len(array) for array in arrays]
if len(np.unique(lengths)) > 1:
raise ValueError('Inconsistent data lengths: {}'.format(lengths))
def check_param(param, param_name, dtype, iterable=True, constraint=None):
"""
checks the dtype of a parameter,
and whether it satisfies a numerical contraint
Parameters
---------
param : object
param_name : str, name of the parameter
dtype : str, desired dtype of the parameter
iterable : bool, default: True
whether to allow iterable param
contraint : str, default: None
numerical constraint of the parameter.
if None, no constraint is enforced
Returns
-------
list of validated and converted parameter(s)
"""
msg = []
msg.append(param_name + " must be "+ dtype)
if iterable:
msg.append(" or iterable of " + dtype + "s")
msg.append(", but found " + param_name + " = {}".format(repr(param)))
if constraint is not None:
msg = (" " + constraint).join(msg)
else:
msg = ''.join(msg)
# check param is numerical
try:
param_dt = np.array(param).astype(dtype)
except ValueError:
raise ValueError(msg)
# check iterable
if (not iterable) and (param_dt.size != 1):
raise ValueError(msg)
# check param is correct dtype
if not (param_dt == np.array(param).astype(float)).all():
raise ValueError(msg)
# check constraint
if constraint is not None:
if not (eval('np.' + repr(param_dt) + constraint)).all():
raise ValueError(msg)
return param_dt.tolist()
def get_link_domain(link, dist):
"""
tool to identify the domain of a given monotonic link function
Parameters
----------
link : Link object
dist : Distribution object
Returns
-------
domain : list of length 2, representing the interval of the domain.
"""
domain = np.array([-np.inf, -1, 0, 1, np.inf])
domain = domain[~np.isnan(link.link(domain, dist))]
return [domain[0], domain[-1]]
def round_to_n_decimal_places(array, n=3):
"""
tool to keep round a float to n decimal places.
n=3 by default
Parameters
----------
array : np.array
n : int. number of decimal places to keep
Returns
-------
array : rounded np.array
"""
# check if in scientific notation
if issubclass(array.__class__, float) and '%.e'%array == str(array):
return array # do nothing
shape = np.shape(array)
out = ((np.atleast_1d(array) * 10**n).round().astype('int') / (10.**n))
return out.reshape(shape)
def print_data(data_dict, width=-5, keep_decimals=3, fill=' ', title=None):
"""
tool to print a dictionary with a nice formatting
Parameters:
-----------
data_dict:
dict. Dictionary to be printed.
width:
int. Desired total line width.
A negative value will fill to minimum required width + neg(width)
default: -5
keep_decimals:
int. number of decimal places to keep:
default: 3
fill:
string. the character to fill between keys and values.
Must have length 1.
default: ' '
title:
string.
default: None
Returns
-------
None
"""
# find max length
keys = np.array(list(data_dict.keys()), dtype='str')
values = np.array(list(data_dict.values()))
values = round_to_n_decimal_places(values).astype('str')
M = max([len(k + v) for k, v in zip(keys, values)])
if width < 0:
# this is for a dynamic filling.
# fill to minimum required width + neg(width)
width = M - width
if M >= width:
raise ValueError('desired width is {}, '\
'but max data length is {}'.format(width, M))
fill = str(fill)
assert len(fill) == 1, 'fill must contain exactly one symbol'
if title is not None:
print(title)
print('-' * width)
for k, v in zip(keys, values):
nk = len(k)
nv = len(v)
filler = fill*(width - nk - nv)
print(k + filler + v)
def gen_edge_knots(data, dtype):
"""
generate uniform knots from data including the edges of the data
for discrete data, assumes k categories in [0, k-1] interval
Parameters
----------
data : array-like with one dimension
dtype : str in {'categorical', 'numerical'}
Returns
-------
np.array containing ordered knots
"""
if dtype not in ['categorical', 'numerical']:
raise ValueError('unsupported dtype: {}'.format(dtype))
if dtype == 'categorical':
return np.r_[np.min(data) - 0.5, np.unique(data) + 0.5]
else:
knots = np.r_[np.min(data), np.max(data)]
if knots[0] == knots[1]:
warnings.warn('Data contains constant feature. '\
'Consider removing and setting fit_intercept=True',
stacklevel=2)
return knots
def b_spline_basis(x, edge_knots, n_splines=20,
spline_order=3, sparse=True,
clamped=False):
"""
tool to generate b-spline basis using vectorized De Boor recursion
the basis functions extrapolate linearly past the end-knots.
Parameters
----------
x : array-like, with ndims == 1.
edge_knots : array-like contaning locations of the 2 edge knots.
n_splines : int. number of splines to generate. must be >= spline_order+1
default: 20
spline_order : int. order of spline basis to create
default: 3
sparse : boolean. whether to return a sparse basis matrix or not.
default: True
clamped : boolean, default: False
whether to force repeated knots at the ends of the domain.
NOTE: when Flase this results in interpretable basis functions
where creating a linearly incrasing function ammounts to
assigning linearly increasing coefficients.
when clamped, this is no longer true and constraints that depend
on this property, like monotonicity and convexity are no longer
valid.
Returns
-------
basis : sparse csc matrix or array containing b-spline basis functions
with shape (len(x), n_splines)
"""
if np.ravel(x).ndim != 1:
raise ValueError('Data must be 1-D, but found {}'\
.format(np.ravel(x).ndim))
if (n_splines < 1) or (type(n_splines) is not int):
raise ValueError('n_splines must be int >= 1')
if (spline_order < 0) or (type(spline_order) is not int):
raise ValueError('spline_order must be int >= 1')
if n_splines < spline_order + 1:
raise ValueError('n_splines must be >= spline_order + 1. '\
'found: n_splines = {} and spline_order = {}'\
.format(n_splines, spline_order))
if n_splines == 0:
warnings.warn('Requested 1 spline. This is equivalent to '\
'fitting an intercept', stacklevel=2)
# rescale edge_knots to [0,1], and generate boundary knots
edge_knots = np.sort(deepcopy(edge_knots))
offset = edge_knots[0]
scale = edge_knots[-1] - edge_knots[0]
if scale == 0:
scale = 1
boundary_knots = np.linspace(0, 1, 1 + n_splines - spline_order)
diff = np.diff(boundary_knots[:2])[0]
# rescale x as well
x = (np.ravel(deepcopy(x)) - offset) / scale
# append 0 and 1 in order to get derivatives for extrapolation
x = np.r_[x, 0., 1.]
# determine extrapolation indices
x_extrapolte_l = (x < 0)
x_extrapolte_r = (x > 1)
x_interpolate = ~(x_extrapolte_r + x_extrapolte_l)
# formatting
x = np.atleast_2d(x).T
n = len(x)
# augment knots
if clamped:
aug_knots = np.r_[np.zeros(spline_order),
boundary_knots,
np.ones(spline_order)]
else:
aug = np.arange(1, spline_order + 1) * diff
aug_knots = np.r_[-aug[::-1],
boundary_knots,
1 + aug]
aug_knots[-1] += 1e-9 # want last knot inclusive
# prepare Haar Basis
bases = (x >= aug_knots[:-1]).astype(np.int) * \
(x < aug_knots[1:]).astype(np.int)
bases[-1] = bases[-2][::-1] # force symmetric bases at 0 and 1
# do recursion from Hastie et al. vectorized
maxi = len(aug_knots) - 1
for m in range(2, spline_order + 2):
maxi -= 1
# bookkeeping to avoid div by 0
mask_l = aug_knots[m - 1 : maxi + m - 1] != aug_knots[:maxi]
mask_r = aug_knots[m : maxi + m] != aug_knots[1 : maxi + 1]
# left sub-basis
num = (x - aug_knots[:maxi][mask_l]) * bases[:, :maxi][:, mask_l]
denom = aug_knots[m-1 : maxi+m-1][mask_l] - aug_knots[:maxi][mask_l]
left = np.zeros((n, maxi))
left[:, mask_l] = num/denom
# right sub-basis
num = (aug_knots[m : maxi+m][mask_r]-x) * bases[:, 1:maxi+1][:, mask_r]
denom = aug_knots[m:maxi+m][mask_r] - aug_knots[1 : maxi+1][mask_r]
right = np.zeros((n, maxi))
right[:, mask_r] = num/denom
# track previous bases and update
prev_bases = bases[-2:]
bases = left + right
# extrapolate
# since we have repeated end-knots, only the last 2 basis functions are
# non-zero at the end-knots, and they have equal and opposite gradient.
if (any(x_extrapolte_r) or any(x_extrapolte_l)) and spline_order>0:
bases[~x_interpolate] = 0.
if not clamped:
denom = (aug_knots[spline_order:-1] - aug_knots[: -spline_order - 1])
left = prev_bases[:, :-1] / denom
denom = (aug_knots[spline_order+1:] - aug_knots[1: -spline_order])
right = prev_bases[:, 1:] / denom
grads = (spline_order) * (left - right)
if any(x_extrapolte_l):
val = grads[0] * x[x_extrapolte_l] + bases[-2]
bases[x_extrapolte_l] = val
if any(x_extrapolte_r):
val = grads[1] * (x[x_extrapolte_r] - 1) + bases[-1]
bases[x_extrapolte_r] = val
else:
grad = -spline_order/diff
if any(x_extrapolte_l):
bases[x_extrapolte_l, :1] = grad * x[x_extrapolte_l] + 1
bases[x_extrapolte_l, 1:2] = -grad * x[x_extrapolte_l]
if any(x_extrapolte_r):
bases[x_extrapolte_r, -1:] = -grad * (x[x_extrapolte_r] - 1) + 1
bases[x_extrapolte_r, -2:-1] = grad * (x[x_extrapolte_r] - 1)
# get rid of the added values at 0, and 1
bases = bases[:-2]
if sparse:
return sp.sparse.csc_matrix(bases)
return bases
def ylogydu(y, u):
"""
tool to give desired output for the limit as y -> 0, which is 0
Parameters
----------
y : array-like of len(n)
u : array-like of len(n)
Returns
-------
np.array len(n)
"""
mask = (np.atleast_1d(y)!=0.)
out = np.zeros_like(u)
out[mask] = y[mask] * np.log(y[mask] / u[mask])
return out
def combine(*args):
"""
tool to perform tree search via recursion
useful for developing the grid in a grid search
Parameters
----------
args : list of lists
Returns
-------
list of all the combinations of the elements in the input lists
"""
if hasattr(args, '__iter__') and (len(args) > 1):
subtree = combine(*args[:-1])
tree = []
for leaf in subtree:
for node in args[-1]:
if hasattr(leaf, '__iter__'):
tree.append(leaf + [node])
else:
tree.append([leaf] + [node])
return tree
else:
return [[arg] for arg in args[0]]
def isiterable(obj, reject_string=True):
"""
convenience tool to detect if something is iterable.
in python3, strings count as iterables to we have the option to exclude them
Parameters:
-----------
obj : object to analyse
reject_string : bool, whether to ignore strings
Returns:
--------
bool, if the object is itereable.
"""
iterable = hasattr(obj, '__iter__')
if reject_string:
iterable *= not isinstance(obj, str)
return iterable
PK u*L pygam/tests/__init__.pyPK u*L;- - pygam/tests/conftest.py# -*- coding: utf-8 -*-
import pytest
import pandas as pd
import numpy as np
from pygam import *
@pytest.fixture
def mcycle():
# y is real
# recommend LinearGAM
motor = pd.read_csv('datasets/mcycle.csv', index_col=0)
X = motor.times.values
y = motor.accel
return X, y
@pytest.fixture
def coal():
# y is counts
# recommend PoissonGAM
coal = pd.read_csv('datasets/coal.csv', index_col=0)
y, x = np.histogram(coal.values, bins=150)
X = x[:-1] + np.diff(x)/2 # get midpoints of bins
return X, y
@pytest.fixture
def faithful():
# y is counts
# recommend PoissonGAM
faithful = pd.read_csv('datasets/faithful.csv', index_col=0)
y, x = np.histogram(faithful.values, bins=200)
X = x[:-1] + np.diff(x)/2 # get midpoints of bins
return X, y
@pytest.fixture
def wage():
# y is real
# recommend LinearGAM
wage = pd.read_csv('datasets/wage.csv', index_col=0)
X = wage[['year', 'age', 'education']].values
X[:,-1] = np.unique(X[:,-1], return_inverse=True)[1]
y = wage['wage'].values
return X, y
@pytest.fixture
def trees():
# y is real.
# recommend InvGaussGAM, or GAM(distribution='gamma', link='log')
trees = pd.read_csv('datasets/trees.csv', index_col=0)
y = trees.Volume.values
X = trees[['Girth', 'Height']].values
return X, y
@pytest.fixture
def default():
# y is binary
# recommend LogisticGAM
default = pd.read_csv('datasets/default.csv', index_col=0)
default = default.values
default[:,0] = np.unique(default[:,0], return_inverse=True)[1]
default[:,1] = np.unique(default[:,1], return_inverse=True)[1]
X = default[:,1:]
y = default[:,0]
return X, y
@pytest.fixture
def cake():
# y is real
# recommend LinearGAM
cake = pd.read_csv('datasets/cake.csv', index_col=0)
X = cake[['recipe', 'replicate', 'temperature']].values
X[:,0] = np.unique(cake.values[:,1], return_inverse=True)[1]
X[:,1] -= 1
y = cake['angle'].values
return X, y
@pytest.fixture
def hepatitis():
# y is real
# recommend LinearGAM
hep = pd.read_csv('datasets/hepatitis_A_bulgaria.csv').astype(float)
# eliminate 0/0
mask = (hep.total > 0).values
hep = hep[mask]
X = hep.age.values
y = hep.hepatitis_A_positive.values / hep.total.values
return X, y
PK zdcLK|4 4 pygam/tests/test_GAM_methods.py# -*- coding: utf-8 -*-
import sys
import numpy as np
import pytest
import scipy as sp
from pygam import *
from pygam.utils import generate_X_grid
@pytest.fixture
def mcycle_gam(mcycle):
X, y = mcycle
gam = LinearGAM().fit(X,y)
return gam
def test_LinearGAM_pdeps_shape(wage):
"""
check that we get the expected number of partial dependence functions
"""
X, y = wage
gam = LinearGAM().fit(X, y)
pdeps = gam.partial_dependence(X)
assert(X.shape == pdeps.shape)
def test_LinearGAM_prediction(mcycle, mcycle_gam):
"""
check that we the predictions we get are correct shape
"""
X, y = mcycle
preds = mcycle_gam.predict(X)
assert(preds.shape == y.shape)
def test_LogisticGAM_accuracy(default):
"""
check that we can compute accuracy correctly
"""
X, y = default
gam = LogisticGAM().fit(X, y)
preds = gam.predict(X)
acc0 = (preds == y).mean()
acc1 = gam.accuracy(X, y)
assert(acc0 == acc1)
def test_PoissonGAM_exposure(coal):
"""
check that we can fit a Poisson GAM with exposure, and it scales predictions
"""
X, y = coal
gam = PoissonGAM().fit(X, y, exposure=np.ones_like(y))
assert((gam.predict(X, exposure=np.ones_like(y)*2) == 2 *gam.predict(X)).all())
def test_PoissonGAM_loglike(coal):
"""
check that our loglikelihood is scaled by exposure
predictions that are twice as large with twice the exposure
should have lower loglikelihood
"""
X, y = coal
exposure = np.ones_like(y)
gam_high_var = PoissonGAM().fit(X, y * 2, exposure=exposure * 2)
gam_low_var = PoissonGAM().fit(X, y, exposure=exposure)
assert gam_high_var.loglikelihood(X, y * 2, exposure * 2) < gam_low_var.loglikelihood(X, y, exposure)
def test_large_GAM(coal):
"""
check that we can fit a GAM in py3 when we have more than 50,000 samples
"""
X = np.linspace(0, 100, 100000)
y = X**2
gam = LinearGAM().fit(X, y)
assert(gam._is_fitted)
def test_summary(mcycle, mcycle_gam):
"""
check that we can get a summary if we've fitted the model, else not
"""
X, y = mcycle
gam = LinearGAM()
try:
gam.summary()
except AttributeError:
assert(True)
mcycle_gam.summary()
assert(True)
def test_more_splines_than_samples(mcycle):
"""
check that gridsearch returns the expected number of models
"""
X, y = mcycle
n = len(X)
gam = LinearGAM(n_splines=n+1).fit(X, y)
assert(gam._is_fitted)
def test_deviance_residuals(mcycle, mcycle_gam):
"""
for linear GAMs, the deviance residuals should be equal to the y - y_pred
"""
X, y = mcycle
res = mcycle_gam.deviance_residuals(X, y)
err = y - mcycle_gam.predict(X)
assert((res == err).all())
def test_conf_intervals_return_array(mcycle, mcycle_gam):
"""
make sure that the confidence_intervals method returns an array
"""
X, y = mcycle
conf_ints = mcycle_gam.confidence_intervals(X)
assert(conf_ints.ndim == 2)
def test_conf_intervals_quantiles_width_interchangable(mcycle, mcycle_gam):
"""
getting confidence_intervals via width or specifying quantiles
should return the same result
"""
X, y = mcycle
conf_ints_a = mcycle_gam.confidence_intervals(X, width=.9)
conf_ints_b = mcycle_gam.confidence_intervals(X, quantiles=[.05, .95])
assert(np.allclose(conf_ints_a, conf_ints_b))
def test_conf_intervals_ordered(mcycle, mcycle_gam):
"""
comfidence intervals returned via width should be ordered
"""
X, y = mcycle
conf_ints = mcycle_gam.confidence_intervals(X)
assert((conf_ints[:,0] <= conf_ints[:,1]).all())
def test_partial_dependence_on_univar_data(mcycle, mcycle_gam):
"""
partial dependence with univariate data should equal the overall model
if fit intercept is false
"""
X, y = mcycle
gam = LinearGAM(fit_intercept=False).fit(X,y)
pred = gam.predict(X)
pdep = gam.partial_dependence(X)
assert((pred == pdep.ravel()).all())
def test_partial_dependence_on_univar_data2(mcycle, mcycle_gam):
"""
partial dependence with univariate data should NOT equal the overall model
if fit intercept is false
"""
X, y = mcycle
gam = LinearGAM(fit_intercept=True).fit(X,y)
pred = gam.predict(X)
pdep = gam.partial_dependence(X)
assert((pred != pdep.ravel()).all())
def test_partial_dependence_feature_doesnt_exist(mcycle, mcycle_gam):
"""
partial dependence should raise ValueError when requesting a nonexistent
feature
"""
X, y = mcycle
try:
mcycle_gam.partial_dependence(X, feature=10)
except ValueError:
assert(True)
def test_summary_returns_12_lines(mcycle_gam):
"""
check that the summary method works and returns 16 lines like:
Model Statistics
-------------------------
edof 12.321
AIC 1221.082
AICc 1224.297
GCV 611.627
loglikelihood -597.22
deviance 120.679
scale 510.561
Pseudo-R^2
--------------------------
explained_deviance 0.8
McFadden 0.288
McFadden_adj 0.273
"""
if sys.version_info.major == 2:
from StringIO import StringIO
if sys.version_info.major == 3:
from io import StringIO
stdout = sys.stdout #keep a handle on the real standard output
sys.stdout = StringIO() #Choose a file-like object to write to
mcycle_gam.summary()
assert(len(sys.stdout.getvalue().split('\n')) == 16)
def test_is_fitted_predict(mcycle):
"""
test predict requires fitted model
"""
X, y = mcycle
gam = LinearGAM()
try:
gam.predict(X)
except AttributeError:
assert(True)
def test_is_fitted_predict_mu(mcycle):
"""
test predict_mu requires fitted model
"""
X, y = mcycle
gam = LinearGAM()
try:
gam.predict_mu(X)
except AttributeError:
assert(True)
def test_is_fitted_dev_resid(mcycle):
"""
test deviance_residuals requires fitted model
"""
X, y = mcycle
gam = LinearGAM()
try:
gam.deviance_residuals(X, y)
except AttributeError:
assert(True)
def test_is_fitted_conf_intervals(mcycle):
"""
test confidence_intervals requires fitted model
"""
X, y = mcycle
gam = LinearGAM()
try:
gam.confidence_intervals(X)
except AttributeError:
assert(True)
def test_is_fitted_pdep(mcycle):
"""
test partial_dependence requires fitted model
"""
X, y = mcycle
gam = LinearGAM()
try:
gam.partial_dependence(X)
except AttributeError:
assert(True)
def test_is_fitted_summary(mcycle):
"""
test summary requires fitted model
"""
X, y = mcycle
gam = LinearGAM()
try:
gam.summary()
except AttributeError:
assert(True)
def test_set_params_with_external_param():
"""
test set_params sets a real parameter
"""
gam = GAM(lam=1)
gam.set_params(lam=420)
assert(gam.lam == 420)
def test_set_params_with_hidden_param():
"""
test set_params should not set any params that are not exposed to the user
"""
gam = GAM()
gam.set_params(_lam=420)
assert(gam._lam != 420)
def test_set_params_with_phony_param():
"""
test set_params should not set any phony param
"""
gam = GAM()
gam.set_params(cat=420)
assert(not hasattr(gam, 'cat'))
def test_set_params_with_hidden_param_deep():
"""
test set_params can set hidden params if we use the deep=True
"""
gam = GAM()
assert(gam._lam != 420)
gam.set_params(_lam=420, deep=True)
assert(gam._lam == 420)
def test_set_params_with_phony_param_force():
"""
test set_params can set phony params if we use the force=True
"""
gam = GAM()
assert(not hasattr(gam, 'cat'))
gam.set_params(cat=420, force=True)
assert(gam.cat == 420)
def test_get_params():
"""
test gam gets our params
"""
gam = GAM(lam=420)
params = gam.get_params()
assert(params['lam'] == 420)
def test_get_params_hidden():
"""
test gam gets our params only if we do deep=True
"""
gam = GAM()
params = gam.get_params()
assert('_lam' not in list(params.keys()))
params = gam.get_params(deep=True)
assert('_lam' in list(params.keys()))
class TestSamplingFromPosterior(object):
def test_drawing_samples_from_unfitted_model(self, mcycle, mcycle_gam):
X, y = mcycle
gam = LinearGAM()
with pytest.raises(AttributeError):
gam.sample(X, y)
with pytest.raises(AttributeError):
gam._sample_coef(X, y)
with pytest.raises(AttributeError):
gam._bootstrap_samples_of_smoothing(X, y)
assert mcycle_gam._is_fitted
mcycle_gam.sample(X, y, n_draws=2)
mcycle_gam._sample_coef(X, y, n_draws=2)
mcycle_gam._bootstrap_samples_of_smoothing(X, y, n_bootstraps=1)
assert True
def test_sample_quantity(self, mcycle, mcycle_gam):
X, y = mcycle
for quantity in ['coefficients', 'response']:
with pytest.raises(ValueError):
mcycle_gam.sample(X, y, quantity=quantity, n_draws=2)
for quantity in ['coef', 'mu', 'y']:
mcycle_gam.sample(X, y, quantity=quantity, n_draws=2)
assert True
def test_shape_of_random_samples(self, mcycle, mcycle_gam):
X, y = mcycle
n_samples = len(X)
n_draws = 5
sample_coef = mcycle_gam.sample(X, y, quantity='coef', n_draws=n_draws)
sample_mu = mcycle_gam.sample(X, y, quantity='mu', n_draws=n_draws)
sample_y = mcycle_gam.sample(X, y, quantity='y', n_draws=n_draws)
assert sample_coef.shape == (n_draws, len(mcycle_gam.coef_))
assert sample_mu.shape == (n_draws, n_samples)
assert sample_y.shape == (n_draws, n_samples)
XX = generate_X_grid(mcycle_gam)
n_samples_in_grid = len(XX)
sample_coef = mcycle_gam.sample(X, y, quantity='coef', n_draws=n_draws,
sample_at_X=XX)
sample_mu = mcycle_gam.sample(X, y, quantity='mu', n_draws=n_draws,
sample_at_X=XX)
sample_y = mcycle_gam.sample(X, y, quantity='y', n_draws=n_draws,
sample_at_X=XX)
assert sample_coef.shape == (n_draws, len(mcycle_gam.coef_))
assert sample_mu.shape == (n_draws, n_samples_in_grid)
assert sample_y.shape == (n_draws, n_samples_in_grid)
def test_shape_bootstrap_samples_of_smoothing(self, mcycle, mcycle_gam):
X, y = mcycle
for n_bootstraps in [1, 2]:
coef_bootstraps, cov_bootstraps = (
mcycle_gam._bootstrap_samples_of_smoothing(
X, y, n_bootstraps=n_bootstraps))
assert len(coef_bootstraps) == len(cov_bootstraps) == n_bootstraps
for coef, cov in zip(coef_bootstraps, cov_bootstraps):
assert coef.shape == mcycle_gam.coef_.shape
assert cov.shape == mcycle_gam.statistics_['cov'].shape
for n_draws in [1, 2]:
coef_draws = mcycle_gam._simulate_coef_from_bootstraps(
n_draws, coef_bootstraps, cov_bootstraps)
assert coef_draws.shape == (n_draws, len(mcycle_gam.coef_))
def test_bad_sample_params(self, mcycle, mcycle_gam):
X, y = mcycle
with pytest.raises(ValueError):
mcycle_gam.sample(X, y, n_draws=0)
with pytest.raises(ValueError):
mcycle_gam.sample(X, y, n_bootstraps=0)
def test_prediction_interval_unknown_scale():
"""
the prediction intervals should be correct to a few decimal places
we test at a large sample limit, where the t distribution becomes normal
"""
n = 1000000
X = np.linspace(0,1,n)
y = np.random.randn(n)
gam_a = LinearGAM(fit_linear=True, fit_splines=False).fit(X, y)
gam_b = LinearGAM(n_splines=4).fit(X, y)
XX = generate_X_grid(gam_a)
intervals_a = gam_a.prediction_intervals(XX, quantiles=[0.1, .9]).mean(axis=0)
intervals_b = gam_b.prediction_intervals(XX, quantiles=[0.1, .9]).mean(axis=0)
assert np.allclose(intervals_a[0], sp.stats.norm.ppf(0.1), atol=0.01)
assert np.allclose(intervals_a[1], sp.stats.norm.ppf(0.9), atol=0.01)
assert np.allclose(intervals_b[0], sp.stats.norm.ppf(0.1), atol=0.01)
assert np.allclose(intervals_b[1], sp.stats.norm.ppf(0.9), atol=0.01)
def test_prediction_interval_known_scale():
"""
the prediction intervals should be correct to a few decimal places
we test at a large sample limit.
"""
n = 1000000
X = np.linspace(0,1,n)
y = np.random.randn(n)
gam_a = LinearGAM(fit_linear=True, fit_splines=False, scale=1.).fit(X, y)
gam_b = LinearGAM(n_splines=4, scale=1.).fit(X, y)
XX = generate_X_grid(gam_a)
intervals_a = gam_a.prediction_intervals(XX, quantiles=[0.1, .9]).mean(axis=0)
intervals_b = gam_b.prediction_intervals(XX, quantiles=[0.1, .9]).mean(axis=0)
assert np.allclose(intervals_a[0], sp.stats.norm.ppf(0.1), atol=0.01)
assert np.allclose(intervals_a[1], sp.stats.norm.ppf(0.9), atol=0.01)
assert np.allclose(intervals_b[0], sp.stats.norm.ppf(0.1), atol=0.01)
assert np.allclose(intervals_b[1], sp.stats.norm.ppf(0.9), atol=0.01)
PK u*L7C
pygam/tests/test_GAM_params.py# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pygam import *
def test_expand_params(cake):
"""
check that gam expands lam, dtype, n_splines, fit_linear, fit_splines
penalties, spline_order
"""
X, y = cake
m = X.shape[1]
gam = LinearGAM().fit(X, y)
for param in ['dtype', 'n_splines', 'spline_order', 'fit_linear',
'fit_splines', 'penalties',]:
assert(len(getattr(gam, '_' + param)) == m)
assert(len(gam._lam) == (m + gam.fit_intercept))
def test_lam_non_neg_array_like(cake):
"""
lambda must be a non-negative float or array of floats
"""
X, y = cake
try:
gam = LinearGAM(lam=-1).fit(X, y)
except ValueError:
assert(True)
try:
gam = LinearGAM(lam=['hi']).fit(X, y)
except ValueError:
assert(True)
def test_wrong_length_param(cake):
"""
If input param is iterable, then it must have have length equal to
the number of features.
"""
X, y = cake
m = X.shape[1]
n_splines = [20] * (m+1)
gam = LinearGAM(n_splines=n_splines)
try:
gam.fit(X, y)
except ValueError:
n_splines = [20] * (m)
gam = LinearGAM(n_splines=n_splines).fit(X, y)
assert(True)
def test_penalties_must_be_or_contain_callable_or_auto(mcycle):
"""
penalty matrix must be/contain callable or auto, otherwise raise ValueError
"""
X, y = mcycle
gam = LinearGAM(penalties='continuous')
try:
gam.fit(X, y)
except ValueError:
gam = LinearGAM(penalties='auto').fit(X, y)
assert(True)
# now do iterable
gam = LinearGAM(penalties=['continuous'])
try:
gam.fit(X, y)
except ValueError:
gam = LinearGAM(penalties=['auto']).fit(X, y)
assert(True)
def test_line_or_spline(mcycle):
"""
a line or spline must be fit on each feature
"""
X, y = mcycle
gam = LinearGAM(fit_linear=False ,fit_splines=False)
try:
gam.fit(X, y)
except ValueError:
gam = LinearGAM(fit_linear=False ,fit_splines=True).fit(X, y)
assert(True)
def test_linear_regression(mcycle):
"""
should be able to do linear regression
"""
X, y = mcycle
gam = LinearGAM(fit_linear=True, fit_splines=False).fit(X, y)
assert(gam._is_fitted)
def test_compute_stats_even_if_not_enough_iters(default):
"""
should be able to do linear regression
"""
X, y = default
gam = LogisticGAM(max_iter=1).fit(X, y)
assert(hasattr(gam, 'statistics_'))
# TODO categorical dtypes get no fit linear even if fit linear TRUE
# TODO categorical dtypes get their own number of splines
# TODO can force continuous dtypes on categorical vars if wanted
PK u*LNjer6 6 pygam/tests/test_GAMs.py# -*- coding: utf-8 -*-
import pytest
from pygam import *
def test_can_build_sub_models():
"""
check that the inits of all the sub-models are correct
"""
LinearGAM()
LogisticGAM()
PoissonGAM()
GammaGAM()
InvGaussGAM()
assert(True)
def test_LinearGAM_uni(mcycle):
"""
check that we can fit a Linear GAM on real, univariate data
"""
X, y = mcycle
gam = LinearGAM().fit(X, y)
assert(gam._is_fitted)
def test_LinearGAM_multi(wage):
"""
check that we can fit a Linear GAM on real, multivariate data
"""
X, y = wage
gam = LinearGAM().fit(X, y)
assert(gam._is_fitted)
def test_LogisticGAM(default):
"""
check that we can fit a Logistic GAM on real data
"""
X, y = default
gam = LogisticGAM().fit(X, y)
assert(gam._is_fitted)
def test_PoissonGAM(coal):
"""
check that we can fit a Poisson GAM on real data
"""
X, y = coal
gam = PoissonGAM().fit(X, y)
assert(gam._is_fitted)
def test_InvGaussGAM(trees):
"""
check that we can fit a InvGauss GAM on real data
"""
X, y = trees
gam = InvGaussGAM().fit(X, y)
assert(gam._is_fitted)
def test_GammaGAM(trees):
"""
check that we can fit a Gamma GAM on real data
"""
X, y = trees
gam = GammaGAM().fit(X, y)
assert(gam._is_fitted)
def test_CustomGAM(trees):
"""
check that we can fit a Custom GAM on real data
"""
X, y = trees
gam = GAM(distribution='gamma', link='log').fit(X, y)
assert(gam._is_fitted)
# TODO check dicts: DISTRIBUTIONS etc
PK u*Lb b pygam/tests/test_core.py# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pygam.core import *
def test_Core_class():
"""
test attributes of core class
"""
c = Core()
assert(c._name == None)
c = Core(name='cat', line_width=70, line_offset=3)
assert(c._name == 'cat')
assert(c._line_width == 70)
assert(c._line_offset == 3)
def test_nice_repr():
"""
test a simple repr for a fake object
"""
param_kvs = {}
out = nice_repr('hi', param_kvs, line_width=30, line_offset=5, decimals=3)
assert(out == "hi()")
def test_nice_repr_more_attrs():
"""
test a simple repr for a fake object with more attrs
"""
param_kvs = {'color': 'blue', 'n_ears': 3, 'height':1.3336}
out = nice_repr('hi', param_kvs, line_width=60, line_offset=5, decimals=3)
assert(out == "hi(color='blue', height=1.334, n_ears=3)")
PK u*L! pygam/tests/test_gridsearch.py# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pygam import *
def test_gridsearch_returns_scores(mcycle):
"""
check that gridsearch returns the expected number of models
"""
n = 5
X, y = mcycle
gam = LinearGAM()
scores = gam.gridsearch(X, y, lam=np.logspace(-3,3, n), return_scores=True)
assert(len(scores) == n)
def test_gridsearch_returns_extra_score_if_fitted(mcycle):
"""
check that gridsearch returns an extra score if our model is pre-fitted
"""
n = 5
X, y = mcycle
gam = LinearGAM().fit(X, y)
scores = gam.gridsearch(X, y, lam=np.logspace(-3,3, n), return_scores=True)
assert(len(scores) == n + 1)
def test_gridsearch_keep_best(mcycle):
"""
check that gridsearch returns worse model if keep_best=False
"""
n = 5
X, y = mcycle
gam = LinearGAM(lam=1000000).fit(X, y)
score1 = gam.statistics_['GCV']
scores = gam.gridsearch(X, y, lam=np.logspace(-3,3, n),
keep_best=False, return_scores=True)
assert(np.min(list(scores.values())) < score1)
def test_gridsearch_improves_objective(mcycle):
"""
check that gridsearch improves model objective
"""
n = 11
X, y = mcycle
gam = LinearGAM().fit(X, y)
objective_0 = gam.statistics_['GCV']
gam = LinearGAM().gridsearch(X, y, lam=np.logspace(-3,3, n))
objective_1 = gam.statistics_['GCV']
assert(objective_1 <= objective_0)
def test_gridsearch_all_dimensions_same(cake):
"""
check that gridsearch searches all dimensions of lambda with equal values
"""
n = 5
X, y = cake
scores = LinearGAM().gridsearch(X, y,
lam=np.logspace(-3,3, n),
return_scores=True)
assert(len(scores) == n)
assert(X.shape[1] > 1)
def test_gridsearch_all_dimensions_independent(cake):
"""
check that gridsearch searches all dimensions of lambda independently
"""
n = 3
X, y = cake
m = X.shape[1]
scores = LinearGAM().gridsearch(X, y,
lam=[np.logspace(-3,3, n)]*m,
return_scores=True)
assert(len(scores) == n**m)
assert(m > 1)
def test_GCV_objective_is_for_unknown_scale(mcycle, default, coal, trees):
"""
check that we use the GCV objective only for models with unknown scale
&
attempting to use it for models with known scale should return ValueError
"""
lam = np.linspace(1e-3, 1e3, 2)
unknown_scale = [(LinearGAM, mcycle),
(GammaGAM, trees),
(InvGaussGAM, trees)]
known_scale = [(LogisticGAM, default),
(PoissonGAM, coal)]
for gam, (X, y) in unknown_scale:
scores1 = list(gam().gridsearch(X, y, lam=lam, objective='auto',
return_scores=True).values())
scores2 = list(gam().gridsearch(X, y, lam=lam, objective='GCV',
return_scores=True).values())
assert(np.allclose(scores1, scores2))
for gam, (X, y) in known_scale:
try:
list(gam().gridsearch(X, y, lam=lam, objective='GCV',
return_scores=True).values())
except ValueError:
assert(True)
def test_UBRE_objective_is_for_known_scale(mcycle, default, coal, trees):
"""
check that we use the UBRE objective only for models with known scale
&
attempting to use it for models with unknown scale should return ValueError
"""
lam = np.linspace(1e-3, 1e3, 2)
unknown_scale = [(LinearGAM, mcycle),
(GammaGAM, trees),
(InvGaussGAM, trees)]
known_scale = [(LogisticGAM, default),
(PoissonGAM, coal)]
for gam, (X, y) in known_scale:
scores1 = list(gam().gridsearch(X, y, lam=lam, objective='auto',
return_scores=True).values())
scores2 = list(gam().gridsearch(X, y, lam=lam, objective='UBRE',
return_scores=True).values())
assert(np.allclose(scores1, scores2))
for gam, (X, y) in unknown_scale:
try:
list(gam().gridsearch(X, y, lam=lam, objective='UBRE',
return_scores=True).values())
except ValueError:
assert(True)
def test_no_models_fitted(mcycle):
"""
test no models fitted returns orginal gam
"""
X, y = mcycle
scores = LinearGAM().gridsearch(X, y, lam=[-3, -2,-1], return_scores=True)
# scores is not a dict of scores but an (unfitted) gam!
assert(not isinstance(scores, dict))
assert(isinstance(scores, LinearGAM))
assert(not scores._is_fitted)
PK u*LRHO O pygam/tests/test_penalties.py# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pygam import *
from pygam.penalties import derivative
from pygam.penalties import l2
from pygam.penalties import monotonic_inc
from pygam.penalties import monotonic_dec
from pygam.penalties import convex
from pygam.penalties import concave
from pygam.penalties import circular
from pygam.penalties import none
from pygam.penalties import wrap_penalty
from pygam.utils import generate_X_grid
def test_single_spline_penalty():
"""
check that feature functions with only 1 basis are penalized correctly
derivative penalty should be 0.
l2 should penalty be 1.
monotonic_ and convexity_ should be 0.
"""
coef = np.array(1.)
assert(np.alltrue(derivative(1, coef).A == 0.))
assert(np.alltrue(l2(1, coef).A == 1.))
assert(np.alltrue(monotonic_inc(1, coef).A == 0.))
assert(np.alltrue(monotonic_dec(1, coef).A == 0.))
assert(np.alltrue(convex(1, coef).A == 0.))
assert(np.alltrue(concave(1, coef).A == 0.))
assert(np.alltrue(circular(1, coef).A == 0.))
assert(np.alltrue(none(1, coef).A == 0.))
def test_wrap_penalty():
"""
check that wrap penalty indeed reduces inserts the desired penalty into the
linear term when fit_linear is True, and 0, when fit_linear is False.
"""
coef = np.array(1.)
n = 2
linear_penalty = -1
fit_linear = True
p = wrap_penalty(none, fit_linear, linear_penalty=linear_penalty)
P = p(n, coef).A
assert(P.sum() == linear_penalty)
fit_linear = False
p = wrap_penalty(none, fit_linear, linear_penalty=linear_penalty)
P = p(n, coef).A
assert(P.sum() == 0.)
def test_monotonic_inc(hepatitis):
"""
check that monotonic_inc constraint produces monotonic increasing function
"""
X, y = hepatitis
gam = LinearGAM(constraints='monotonic_inc')
gam.fit(X, y)
XX = generate_X_grid(gam)
Y = gam.predict(np.sort(XX))
diffs = np.diff(Y, n=1)
assert(((diffs >= 0) + np.isclose(diffs, 0.)).all())
def test_monotonic_dec(hepatitis):
"""
check that monotonic_dec constraint produces monotonic decreasing function
"""
X, y = hepatitis
gam = LinearGAM(constraints='monotonic_dec')
gam.fit(X, y)
XX = generate_X_grid(gam)
Y = gam.predict(np.sort(XX))
diffs = np.diff(Y, n=1)
assert(((diffs <= 0) + np.isclose(diffs, 0.)).all())
def test_convex(hepatitis):
"""
check that convex constraint produces convex function
"""
X, y = hepatitis
gam = LinearGAM(constraints='convex')
gam.fit(X, y)
XX = generate_X_grid(gam)
Y = gam.predict(np.sort(XX))
diffs = np.diff(Y, n=2)
assert(((diffs >= 0) + np.isclose(diffs, 0.)).all())
def test_concave(hepatitis):
"""
check that concave constraint produces concave function
"""
X, y = hepatitis
gam = LinearGAM(constraints='concave')
gam.fit(X, y)
XX = generate_X_grid(gam)
Y = gam.predict(np.sort(XX))
diffs = np.diff(Y, n=2)
assert(((diffs <= 0) + np.isclose(diffs, 0.)).all())
# TODO penalties gives expected matrix structure
# TODO circular constraints
PK zdcLO#| | pygam/tests/test_utils.py# -*- coding: utf-8 -*-
from copy import deepcopy
import numpy as np
import pytest
from pygam import *
from pygam.utils import check_X, check_y, check_X_y
# TODO check dtypes works as expected
# TODO checkX, checky, check XY expand as needed, call out bad domain
@pytest.fixture
def wage_gam(wage):
X, y = wage
gam = LinearGAM().fit(X,y)
return gam
@pytest.fixture
def default_gam(default):
X, y = default
gam = LogisticGAM().fit(X,y)
return gam
def test_check_X_categorical_prediction_exceeds_training(wage, wage_gam):
"""
if our categorical variable is outside the training range
we should get an error
"""
X, y = wage
gam = wage_gam
eks = gam._edge_knots[-1] # last feature of wage dataset is categorical
X[:,-1] = eks[-1] + 1
try:
gam.predict(X)
assert False
except ValueError:
X[:,-1] = eks[-1]
gam.predict(X)
assert(True)
def test_check_y_not_int_not_float(wage, wage_gam):
"""y must be int or float, or we should get a value error"""
X, y = wage
y_str = ['hi'] * len(y)
try:
check_y(y_str, wage_gam.link, wage_gam.distribution)
assert False
except ValueError:
check_y(y, wage_gam.link, wage_gam.distribution)
assert(True)
def test_check_y_casts_to_numerical(wage, wage_gam):
"""check_y will try to cast data to numerical types"""
X, y = wage
y = y.astype('object')
y = check_y(y, wage_gam.link, wage_gam.distribution)
assert y.dtype == 'float'
def test_check_y_not_min_samples(wage, wage_gam):
"""check_y expects a minimum number of samples"""
X, y = wage
try:
check_y(y, wage_gam.link, wage_gam.distribution, min_samples=len(y)+1)
assert False
except ValueError:
check_y(y, wage_gam.link, wage_gam.distribution, min_samples=len(y))
assert True
def test_check_y_not_in_doamin_link(default, default_gam):
"""if you give labels outide of the links domain, check_y will raise an error"""
X, y = default
gam = default_gam
try:
check_y(y + .1, default_gam.link, default_gam.distribution)
assert False
except ValueError:
check_y(y, default_gam.link, default_gam.distribution)
assert True
def test_check_X_not_int_not_float():
"""X must be an in or a float"""
try:
check_X(['hi'])
assert False
except ValueError:
check_X([4])
assert True
def test_check_X_too_many_dims():
"""check_X accepts at most 2D inputs"""
try:
check_X(np.ones((5,4,3)))
assert False
except ValueError:
check_X(np.ones((5,4)))
assert True
def test_check_X_not_min_samples():
try:
check_X(np.ones((5)), min_samples=6)
assert False
except ValueError:
check_X(np.ones((5)), min_samples=5)
assert True
def test_check_X_y_different_lengths():
try:
check_X_y(np.ones(5), np.ones(4))
assert False
except ValueError:
check_X_y(np.ones(5), np.ones(5))
assert True
def test_input_data_after_fitting(mcycle):
"""
our check_X and check_y functions should be invoked
any time external data is input to the model
"""
X, y = mcycle
weights = np.ones_like(y)
X_nan = deepcopy(X)
X_nan[0] = X_nan[0] * np.nan
y_nan = deepcopy(y.values)
y_nan[0] = y_nan[0] * np.nan
weights_nan = deepcopy(weights)
weights_nan[0] = weights_nan[0] * np.nan
gam = LinearGAM()
with pytest.raises(ValueError):
gam.fit(X_nan, y, weights)
with pytest.raises(ValueError):
gam.fit(X, y_nan, weights)
with pytest.raises(ValueError):
gam.fit(X, y, weights_nan)
gam = gam.fit(X, y)
# test X is nan
with pytest.raises(ValueError):
gam.predict(X_nan)
with pytest.raises(ValueError):
gam.predict_mu(X_nan)
with pytest.raises(ValueError):
gam.confidence_intervals(X_nan)
with pytest.raises(ValueError):
gam.prediction_intervals(X_nan)
with pytest.raises(ValueError):
gam.partial_dependence(X_nan)
with pytest.raises(ValueError):
gam.deviance_residuals(X_nan, y, weights)
with pytest.raises(ValueError):
gam.loglikelihood(X_nan, y, weights)
with pytest.raises(ValueError):
gam.gridsearch(X_nan, y, weights)
with pytest.raises(ValueError):
gam.sample(X_nan, y)
# test y is nan
with pytest.raises(ValueError):
gam.deviance_residuals(X, y_nan, weights)
with pytest.raises(ValueError):
gam.loglikelihood(X, y_nan, weights)
with pytest.raises(ValueError):
gam.gridsearch(X, y_nan, weights)
with pytest.raises(ValueError):
gam.sample(X, y_nan, weights=weights, n_bootstraps=2)
# test weights is nan
with pytest.raises(ValueError):
gam.deviance_residuals(X, y, weights_nan)
with pytest.raises(ValueError):
gam.loglikelihood(X, y, weights_nan)
with pytest.raises(ValueError):
gam.gridsearch(X, y, weights_nan)
with pytest.raises(ValueError):
gam.sample(X, y, weights=weights_nan, n_bootstraps=2)
# # def test_b_spline_basis_clamped_what_we_want():
PK u*LFLE E pygam-0.4.1.dist-info/LICENSE GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
{one line to give the program's name and a brief idea of what it does.}
Copyright (C) {year} {name of author}
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
{project} Copyright (C) {year} {fullname}
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
.
PK !HxyU b pygam-0.4.1.dist-info/WHEELHM
K-*ϳR03rOK-J,/RH,Q034
/,
(-JLR()*M
ILR(4KM̫#D PK !H(j C pygam-0.4.1.dist-info/METADATAKo0{l;Ej$VikyC-Wp_7$~3GLY BhHqܴwgc0L֫-$_N3xT4:i̇vp;7;?@6≦7QadK&s80ف|MCS2}NF$h.i ֻiC/TKU(3:YGHkVuscƲC`S
Zn*n1ot,leIkN9еjӵk>xPK !H@ X pygam-0.4.1.dist-info/RECORDurJ<xa1Q"HPD&_U%=^}uw^w[0'q&)6֎s,h-S(w4E!!uCITqT@WV* )kJyKw mxJ'!'ً""顏(kQmBJ?lkQs{(hp$R7[7q'i(*حU
B)z6XKs jprEy2S||vHO`b'e 2{\C>T+
o{vqSy]\1gbH}R#҃Ek<)Yu4vrQ,B(FsT3_i$Q G*5F9Z`^.! XәqZ)SwcގQ&l>eVՐ@{\%-?n/QL2aѪ8b{Y78hߙL48C&6}z:];I
*.(VBHxe@(Np
QGݞTwY\wWv
ziR {2j KH*@ԃ.̶EΧ0Z*եϞ\tGzD㨎 XOԮt$-κpB$F|Fވ7㗁K5u'T=}QuσtVh+S(<NSDQ{Tku2n֜i[G\/8.03D]t8s'ZJVhGx؈+lױєSG3
@`j; PK {{LX,! pygam/__init__.pyPK u*L% pygam/callbacks.pyPK u*LV7,M M
s pygam/core.pyPK dcL\ٽA A ' pygam/distributions.pyPK u*L' j pygam/links.pyPK cLϫI! I! pygam/penalties.pyPK v{Lvqc y pygam/pygam.pyPK KzLUhW W 4 pygam/utils.pyPK u*L z pygam/tests/__init__.pyPK u*L;- - pygam/tests/conftest.pyPK zdcLK|4 4 pygam/tests/test_GAM_methods.pyPK u*L7C
=( pygam/tests/test_GAM_params.pyPK u*LNjer6 6 ?3 pygam/tests/test_GAMs.pyPK u*Lb b 9 pygam/tests/test_core.pyPK u*L! C= pygam/tests/test_gridsearch.pyPK u*LRHO O cP pygam/tests/test_penalties.pyPK zdcLO#| | \ pygam/tests/test_utils.pyPK u*LFLE E q pygam-0.4.1.dist-info/LICENSEPK !HxyU b pygam-0.4.1.dist-info/WHEELPK !H(j C pygam-0.4.1.dist-info/METADATAPK !H@ X T pygam-0.4.1.dist-info/RECORDPK Y