Marginal Gaussian with y=None vs Latent Gaussian (original) (raw)

In a time series model with regressors, I would like to model the trend with a Gaussian process. I see that I can describe the model either with a Marginal Gaussian process (with y=None and a small noise) or a Latent Gaussian process. Both descriptions give the same results, though the number of divergent samples is higher in the second case. What is the recommended way to do it? I post below both models:

import numpy as np
import pymc as pm

# Simulated data
np.random.seed(42)
n = 50
t = np.linspace(0, 1, n)[:, None]       # Time
X1 = np.random.rand(n, 1)               # Regressor 1
X2 = np.random.rand(n, 1)               # Regressor 2

# True model: linear in X + nonlinear trend
true_trend = 0.5 * t + 0.3 * np.sin(5 * t)
y = (
    true_trend.ravel()
    + 1.5 * X1.ravel()
    - 2.0 * X2.ravel()
    + 0.05 * np.random.randn(n)
)

# PyMC model
with pm.Model() as model:
    # GP hyperpriors
    ls = pm.Gamma("lengthscale", alpha=2, beta=10)
    eta = pm.HalfNormal("eta", sigma=1.0)
    slope = pm.Normal("slope", mu=0.0, sigma=1.0)

    # GP kernel: Linear + ExpQuad
    cov = (
        pm.gp.cov.Linear(1, c=slope)
        + eta**2 * pm.gp.cov.ExpQuad(1, ls=ls)
    )
    gp = pm.gp.Marginal(cov_func=cov, mean_func=pm.gp.mean.Zero())

    # Linear regression weights
    a1 = pm.Normal("a1", mu=0.0, sigma=2.0)
    a2 = pm.Normal("a2", mu=0.0, sigma=2.0)

    # Observation noise
    sigma = pm.HalfNormal("sigma", sigma=0.1)

    # Linear component
    lin_term = a1 * X1.ravel() + a2 * X2.ravel()

    # GP trend component
    gp_trend = gp.marginal_likelihood("gp_trend", X=t, y=None, sigma=0.0001)

    # Total mean = GP trend + linear predictors
    mu = gp_trend + lin_term

    # Final observed likelihood
    y_obs = pm.Normal("y_obs", mu=mu, sigma=sigma, observed=y)
    trace = pm.sample(chains=5, cores=6, target_accept=0.95, draws=3000, tune=4000, nuts_sampler="nutpie", random_seed=42)

import numpy as np
import pymc as pm

# Simulated data

np.random.seed(42)
n = 50
t = np.linspace(0, 1, n)[:, None]       # Time
X1 = np.random.rand(n, 1)               # Regressor 1
X2 = np.random.rand(n, 1)               # Regressor 2

# True model: linear in X + nonlinear trend
true_trend = 0.5 * t + 0.3 * np.sin(5 * t)
y = (
    true_trend.ravel()
    + 1.5 * X1.ravel()
    - 2.0 * X2.ravel()
    + 0.05 * np.random.randn(n)
)

# PyMC model
with pm.Model() as model:
    # GP hyperpriors
    ls = pm.Gamma("lengthscale", alpha=2, beta=10)
    eta = pm.HalfNormal("eta", sigma=1.0)
    slope = pm.Normal("slope", mu=0.0, sigma=1.0)

    # GP kernel: Linear + ExpQuad
    cov = (
        pm.gp.cov.Linear(1, c=slope)
        + eta**2 * pm.gp.cov.ExpQuad(1, ls=ls)
    )
    gp = pm.gp.Latent(cov_func=cov, mean_func=pm.gp.mean.Zero())

    # Place a GP prior over the function f.
    f = gp.prior("f", X=t)

    # Linear regression weights
    a1 = pm.Normal("a1", mu=0.0, sigma=2.0)
    a2 = pm.Normal("a2", mu=0.0, sigma=2.0)

    # Observation noise
    sigma = pm.HalfNormal("sigma", sigma=0.1)

    # Linear component
    lin_term = a1 * X1.ravel() + a2 * X2.ravel()

    # Total mean = GP trend + linear predictors
    mu = f + lin_term

    # Final observed likelihood
    y_obs = pm.Normal("y_obs", mu=mu, sigma=sigma, observed=y)
    trace = pm.sample(chains=5, cores=6, target_accept=0.95, draws=3000, tune=4000, nuts_sampler="nutpie", random_seed=42)