kde \(l\) ovlivňuje jak rychle korelace klesá a \(\sigma\) určuje škálování jádra.
import numpy as npdef kernel(X1, X2, l=1.0, sigma=1.0):""" Isotropic squared exponential kernel. Args: X1,2: Array of m points (m x d). X2: Array of n points (n x d). Returns: (m x n) matrix. """ sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) -2* np.dot(X1, X2.T)return sigma**2* np.exp(-0.5/ l**2* sqdist)
Prior (nevíme nic)
from helper import plot_gp# Vygeneruji body (konečný počet)X = np.arange(0, 10, 0.1).reshape(-1, 1) #počet sloupců dopočítámu = np.zeros(X.shape) # střední hodnotacov = kernel(X, X) # spočtu kovarianci (nenormovaná korelace)# Vygeneruji 5 funkcí jako výsledek náhodného gaussovského procesusamples = np.random.multivariate_normal(mu.ravel(), cov, 5)# Vykreslí střední hodnotu, oblast nejistoty a funkce plot_gp(mu, cov, X, samples=samples)
Posterior
from numpy.linalg import invdef posterior(X, X_train, Y_train, l=1.0, sigma=1.0, sigma_y=1e-8):""" Spočítá novou střední hodnotu a kovarianci při znalosti train dat Args: X: osa x X_train, Y_train: známá data (m x 1) l, sigma: Parametry jádra sigma_y: šum Returns: Posterior střední hodnota (n x d) a kovariance (n x n). """ K = kernel(X_train, X_train, l, sigma) + sigma_y**2* np.eye(len(X_train)) # jádro s šumem K_s = kernel(X_train, X, l, sigma) K_ss = kernel(X, X, l, sigma) +1e-8* np.eye(len(X)) mu_s = K_s.T.dot(inv(K)).dot(Y_train) # (7) cov_s = K_ss - K_s.T.dot(inv(K)).dot(K_s) # (8)return mu_s, cov_s
from numpy.linalg import cholesky, detfrom scipy.linalg import solve_triangularfrom scipy.optimize import minimizedef nll_fn(X_train, Y_train, noise, naive=True):""" Returns a function that computes the negative log marginal likelihood for training data X_train and Y_train and given noise level. Args: X_train: training locations (m x d). Y_train: training targets (m x 1). noise: known noise level of Y_train. naive: if True use a naive implementation of Eq. (11), if False use a numerically more stable implementation. Returns: Minimization objective. """ Y_train = Y_train.ravel()def nll_naive(theta):# Naive implementation of Eq. (11). Works well for the examples # in this article but is numerically less stable compared to # the implementation in nll_stable below. K = kernel(X_train, X_train, l=theta[0], sigma=theta[1]) +\ noise**2* np.eye(len(X_train))return0.5* np.log(det(K)) +\0.5* Y_train.dot(inv(K).dot(Y_train)) +\0.5*len(X_train) * np.log(2*np.pi)def nll_stable(theta):# Numerically more stable implementation of Eq. (11) as described# in http://www.gaussianprocess.org/gpml/chapters/RW2.pdf, Section# 2.2, Algorithm 2.1. K = kernel(X_train, X_train, l=theta[0], sigma=theta[1]) +\ noise**2* np.eye(len(X_train)) L = cholesky(K) S1 = solve_triangular(L, Y_train, lower=True) S2 = solve_triangular(L.T, S1, lower=False)return np.sum(np.log(np.diagonal(L))) +\0.5* Y_train.dot(S2) +\0.5*len(X_train) * np.log(2*np.pi)if naive:return nll_naiveelse:return nll_stable# Minimize the negative log-likelihood w.r.t. parameters l and sigma_f.# We should actually run the minimization several times with different# initializations to avoid local minima but this is skipped here for# simplicity.res = minimize(nll_fn(X_train, Y_train, noise), [1, 1], bounds=((1e-5, None), (1e-5, None)), method='L-BFGS-B')# Store the optimization results in global variables so that we can# compare it later with the results from other implementations.l_opt, sigma_f_opt = res.x# Compute posterior mean and covariance with optimized kernel parameters and plot the resultsmu_s, cov_s = posterior(X, X_train, Y_train, l=l_opt, sigma=sigma_f_opt, sigma_y=noise)plt.figure(figsize=(13, 4))plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train)