Source code for foehnix.families

import numpy as np
from scipy.stats import logistic, norm
import logging

# logger
log = logging.getLogger(__name__)


[docs]class Family: """ Common logic for the foehnix families """
[docs] def __init__(self): self.name = 'Main family' self.scale_factor = None
def density(self, y, mu, sigma, logpdf=False): raise NotImplementedError def loglik(self, y, post, prob, theta): """ Calculate log-likelihood sum of the two-component mixture model Parameters ---------- y : :py:class:`numpy.ndarray` predictor values of shape(len(observations), 1) post : py:class:`numpy.array` posteriori prob : py:class:`numpy.array` probability theta : dict contains mu1, mu2, logsd1, logsd2 Returns ------- : dict Component, concomitant and sum of both """ # limit prob to [eps, 1-eps] eps = np.sqrt(np.finfo(float).eps) prob = np.maximum(eps, np.minimum(1-eps, prob)) # calculate densities, logistic/gaussian specific d1 = self.density(y, theta['mu1'], np.exp(theta['logsd1']), logpdf=True) d2 = self.density(y, theta['mu2'], np.exp(theta['logsd2']), logpdf=True) # calculate log liklihood component = np.sum(post * d2) + np.sum((1-post) * d1) concomitant = np.sum((1-post) * np.log(1-prob) + post * np.log(prob)) return {'component': component, 'concomitant': concomitant, 'full': component+concomitant} def posterior(self, y, prob, theta): """ Posterior probabilities used for model estimation (EM algorithm) Parameters ---------- y : :py:class:`numpy.ndarray` predictor values of shape(len(observations), 1) prob : py:class:`numpy.array` probability theta : dict contains mu1, mu2, logsd1, logsd2 Returns ------- :py:class:`numpy.ndarray` (updated) posterior probabilites """ # calculate densities, logistic/gaussian specific d1 = self.density(y, theta['mu1'], np.exp(theta['logsd1']), logpdf=False) d2 = self.density(y, theta['mu2'], np.exp(theta['logsd2']), logpdf=False) post = prob * d2 / ((1-prob) * d1 + prob * d2) return post def theta(self, y, post, init=False): """ Distribution parameters of the components of the mixture models. Used for model estimation in the EM algorithm. Parameters ---------- y : :py:class:`numpy.ndarray` predictor values of shape(len(observations), 1) post : py:class:`numpy.array` posteriori init : bool If True (first call) scale is just the standard deviation of y. Returns ------- : dict contains mu1, mu2, logsd1, logsd2 """ # Emperical update of mu and std mu1 = np.sum((1-post) * y) / (np.sum(1-post)) mu2 = np.sum(post * y) / np.sum(post) if init: sd1 = np.std(y) sd2 = np.std(y) else: sd1 = np.sqrt(np.sum((1-post) * (y - mu1)**2) / np.sum(1-post)) sd2 = np.sqrt(np.sum(post * (y - mu2)**2) / np.sum(post)) # necessary for the logistic distribution sd1 *= self.scale_factor sd2 *= self.scale_factor # return dict theta = {'mu1': mu1, 'logsd1': np.log(sd1) if sd1 > np.exp(-6) else -6, 'mu2': mu2, 'logsd2': np.log(sd2) if sd2 > np.exp(-6) else -6} return theta
[docs]class GaussianFamily(Family): """ Gaussian foehnix mixture model family """
[docs] def __init__(self): """ Initialize the Gaussian Family """ super(Family, self).__init__() self.name = 'Gaussian' self.scale_factor = 1 # factor for the scale of the distribution
def density(self, y, mu, sigma, logpdf=False): """ Density function of the mixture distribution Parameters ---------- y : :py:class:`numpy.ndarray` predictor values of shape(len(observations), 1) mu : float location of the distribution sigma : float scale of the distribution logpdf : bool If True, log of the probability density function will be returned. Returns ------- :py:class:`numpy.ndarray` Probability density function or log of it. """ if logpdf is True: dnorm = norm(loc=mu, scale=sigma).logpdf(y) else: dnorm = norm(loc=mu, scale=sigma).pdf(y) return dnorm
[docs]class LogisticFamily(Family): """ Logistic foehnix mixture model family """
[docs] def __init__(self): """ Initialize the Logistic Family """ super(Family, self).__init__() self.name = 'Logistic' self.scale_factor = np.sqrt(3)/np.pi # distribution scale factor
def density(self, y, mu, sigma, logpdf=False): """ Density function of the logistic mixture model distribution Parameters ---------- y : :py:class:`numpy.ndarray` predictor values of shape(len(observations), 1) mu : float location of the distribution sigma : float scale of the distribution logpdf : bool If True, log of the probability density function will be returned. Returns ------- :py:class:`numpy.ndarray` Probability density function or log of it. """ if logpdf is True: dlogis = logistic(loc=mu, scale=sigma).logpdf(y) else: dlogis = logistic(loc=mu, scale=sigma).pdf(y) return dlogis
def initialize_family(familyname='gaussian', left=float('-Inf'), right=float('Inf'), truncated=False): """ Helper function to initialize a Foehnix Family based on arguments Parameters ---------- familyname : str Gaussian or Logistic distribution. Possible values: - `gaussian' (default) - 'logistic' truncated : bool left : float right : float Returns ------- py:class:`foehnix.Family` object """ if not isinstance(truncated, bool): raise ValueError('truncated must be boolean True or False') if familyname == 'gaussian': if np.isfinite([left, right]).any(): if truncated is True: raise NotImplementedError # TODO: this is currently not implemented # family = TruncatedGaussianFamily(left=left, right=right) else: raise NotImplementedError # TODO: this is currently not implemented # family = CensoredGaussianFamily(left=left, right=right) else: family = GaussianFamily() elif familyname == 'logistic': if np.isfinite([left, right]).any(): if truncated is True: raise NotImplementedError # TODO: this is currently not implemented # family = TruncatedLogisticFamily(left=left, right=right) else: raise NotImplementedError # TODO: this is currently not implemented # family = CensoredLogsticFamily(left=left, right=right) else: family = LogisticFamily() else: raise ValueError('familyname must be gaussian or logistic') log.debug('%s model family initialized.' % family.name) return family