I am working on some price data with x = day1, day2, day3,...etc. on day1, I have let's say 15 price points(y), day2, I have 30 price points(y2), and so on.
When I read the documentation of Gaussian Process Regression:
http://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcess.html#sklearn.gaussian_process.GaussianProcess.fit
y is shape (n_samples, n_targets) with the observations of the output to be predicted.
I assume n_targets refers all the price points I observed on each day. However, the number of price points on each day are not the same. I wonder how to deal with a case like this?
Many thanks!
I have made an implementation of gaussian process for regression in python using only numpy. My aim was to understand it by implementing it. It may be helpful for you.
https://github.com/muatik/machine-learning-examples/blob/master/gaussianprocess2.ipynb
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(color_codes=True)%matplotlib inlineclass GP(object):@classmethoddef kernel_bell_shape(cls, x, y, delta=1.0):return np.exp(-1/2.0 * np.power(x - y, 2) / delta)@classmethoddef kernel_laplacian(cls, x, y, delta=1):return np.exp(-1/2.0 * np.abs(x - y) / delta)@classmethoddef generate_kernel(cls, kernel, delta=1):def wrapper(*args, **kwargs):kwargs.update({"delta": delta})return kernel(*args, **kwargs)return wrapperdef __init__(self, x, y, cov_f=None, R=0):super().__init__()self.x = xself.y = yself.N = len(self.x)self.R = Rself.sigma = []self.mean = []self.cov_f = cov_f if cov_f else self.kernel_bell_shapeself.setup_sigma()@classmethoddef calculate_sigma(cls, x, cov_f, R=0):N = len(x)sigma = np.ones((N, N))for i in range(N):for j in range(i+1, N):cov = cov_f(x[i], x[j])sigma[i][j] = covsigma[j][i] = covsigma = sigma + R * np.eye(N)return sigmadef setup_sigma(self):self.sigma = self.calculate_sigma(self.x, self.cov_f, self.R)def predict(self, x):cov = 1 + self.R * self.cov_f(x, x)sigma_1_2 = np.zeros((self.N, 1))for i in range(self.N):sigma_1_2[i] = self.cov_f(self.x[i], x)# SIGMA_1_2 * SIGMA_1_1.I * (Y.T -M)# M IS ZEROm_expt = (sigma_1_2.T * np.mat(self.sigma).I) * np.mat(self.y).T# sigma_expt = cov - (sigma_1_2.T * np.mat(self.sigma).I) * sigma_1_2sigma_expt = cov + self.R - (sigma_1_2.T * np.mat(self.sigma).I) * sigma_1_2return m_expt, sigma_expt@staticmethoddef get_probability(sigma, y, R):multiplier = np.power(np.linalg.det(2 * np.pi * sigma), -0.5)return multiplier * np.exp((-0.5) * (np.mat(y) * np.dot(np.mat(sigma).I, y).T))def optimize(self, R_list, B_list):def cov_f_proxy(delta, f):def wrapper(*args, **kwargs):kwargs.update({"delta": delta})return f(*args, **kwargs)return wrapperbest = (0, 0, 0)history = []for r in R_list:best_beta = (0, 0)for b in B_list:sigma = gaus.calculate_sigma(self.x, cov_f_proxy(b, self.cov_f), r)marginal = b* float(self.get_probability(sigma, self.y, r))if marginal > best_beta[0]:best_beta = (marginal, b)history.append((best_beta[0], r, best_beta[1]))return sorted(history)[-1], np.mat(history)
Now you can try it as follows:
# setting up a GP
x = np.array([-2, -1, 0, 3.5, 4]);
y = np.array([4.1, 0.9, 2, 12.3, 15.8])
gaus = GP(x, y)x_guess = np.linspace(-5, 16, 400)
y_pred = np.vectorize(gaus.predict)(x_guess)plt.scatter(x, y, c="black")
plt.plot(x_guess, y_pred[0], c="b")
plt.plot(x_guess, y_pred[0] - np.sqrt(y_pred[1]) * 3, "r:")
plt.plot(x_guess, y_pred[0] + np.sqrt(y_pred[1]) * 3, "r:")
The effects of regularization parameter
def create_case(kernel, R=0):x = np.array([-2, -1, 0, 3.5, 4]);y = np.array([4.1, 0.9, 2, 12.3, 15.8])gaus = GP(x, y, kernel, R=R)x_guess = np.linspace(-4, 6, 400)y_pred = np.vectorize(gaus.predict)(x_guess)plt.scatter(x, y, c="black")plt.plot(x_guess, y_pred[0], c="b")plt.plot(x_guess, y_pred[0] - np.sqrt(y_pred[1]) * 3, "r:")plt.plot(x_guess, y_pred[0] + np.sqrt(y_pred[1]) * 3, "r:")plt.figure(figsize=(16, 16))
for i, r in enumerate([0.0001, 0.03, 0.09, 0.8, 1.5, 5.0]):plt.subplot("32{}".format(i+1))plt.title("kernel={}, delta={}, beta={}".format("bell shape", 1, r))create_case(GP.generate_kernel(GP.kernel_bell_shape, delta=1), R=r)
plt.figure(figsize=(16, 16))
for i, d in enumerate([0.05, 0.5, 1, 3.2, 5.0, 7.0]):plt.subplot("32{}".format(i+1))plt.title("kernel={}, delta={}, beta={}".format("kernel_laplacian", d, 1))create_case(GP.generate_kernel(GP.kernel_bell_shape, delta=d), R=0)