Use of indexes to select slices of variable tensors (original) (raw)

Hello,

I am a new pymc and pytensor user and I am facing a problem with the following fit.
Here “data” is a 100 size array containing the data points with some np.nan elements and “total_cov” is the corresponding 100x100 covariance matrix, which contains np.nan elements when “data” is np.nan.



import numpy as np
import pytensor.tensor as pt
import pymc as pm

r = np.linspace(0, 1000, len(data))

#remove the NaNs from data and covariance matrix
w_cov = np.where(np.isfinite(total_cov))
total_cov = total_cov[w_cov]
w_not = np.where(np.isfinite(data))[0]
data = data[w_not]


def gnfw(x, p0, c500, a, b, c):

    """
    Compute the gNFW profile at x
    """  
    
    gnfw_profile = p0*(x*c500)**(-c)*(1+(x*c500)**a)**((c-b)/a)

    return gnfw_profile
 

with pm.Model() as model :

    #gNFW params
    p0 = pm.Uniform("p0", lower=0, upper=20)
    c500 = 1.177
    a = pm.Uniform("a", lower=0, upper=10)
    b = pm.Uniform("b", lower=0, upper=20)
    c = 0.3081
    
    #multiplicative factor
    eta = pm.Uniform("eta", lower=0., upper=4.)

    #compute the model profile
    prof = gnfw(r, p0, c500, a, b, c)

    #multiply last bins by "eta"
    mu = pt.subtensor.set_subtensor(prof[-50:], eta*prof[-50:])

    #consider only the positions at which "data" is not np.nan
    mu = mu[w_not]

    ll = pm.MvNormal("likelihood", mu=mu, cov=total_cov, observed=data)

with model as model:
    inference_data = pm.sample(chains=30, return_inferencedata=True, draws=1000, tune=1000, target_accept=0.95)

The fit works perfectly well when there is no np.nan in my data, so there’s no need to select the “w_not” elements in “mu”. On the contrary, when I need to select the “w_not” elements, this approach doesn’t seem to work.

I was not able to find a way to select some elements of “mu” with PyTensor functionalities. Also, I think that the problem is related to “subtensor.set_subtensor”. Is there a way to do what I would like to?

Can you provide a fully reproducible snippet (and as simple as possible, while still showing your problem)?

Miren June 13, 2024, 4:04pm 3

Yes, this reproduces my problem:

import numpy as np
import pytensor.tensor as pt
import pymc as pm
import matplotlib.pyplot as plt


def gnfw(x, p0, c500, a, b, c):

    """
    Compute the gNFW profile at x
    """  
    
    gnfw_profile = p0*(x*c500)**(-c)*(1+(x*c500)**a)**((c-b)/a)

    return gnfw_profile


### With subtensor to use eta

#create fake data & covariance matrix
r = np.linspace(0.1, 1, 100)
data = gnfw(r, 8.4, 1.177, 2., 4, 0.3081)
#add fake NaNs
data[3] = np.nan
data[50] = np.nan
data[80] = np.nan
#remove NaNs
w_not = np.where(np.isfinite(data))[0]
data = data[w_not]
#covariance matrix
fake_error = 0.1*data
total_cov = np.diag(fake_error**2)

plt.figure()
plt.errorbar(r[w_not], data, np.sqrt(np.diag(total_cov)))
plt.show()

with pm.Model() as model :

    #gNFW params
    p0 = pm.Uniform("p0", lower=0, upper=20)
    c500 = 1.177
    a = pm.Uniform("a", lower=0, upper=10)
    b = pm.Uniform("b", lower=0, upper=20)
    c = 0.3081
    
    #multiplicative factor
    eta = pm.Uniform("eta", lower=0., upper=4.)

    #compute the model profile
    prof = gnfw(r, p0, c500, a, b, c)

    #multiply last bins by "eta"
    mu = pt.subtensor.set_subtensor(prof[-50:], eta*prof[-50:])

    #consider only the positions at which "data" is not np.nan
    mu = mu[w_not]

    ll = pm.MvNormal("likelihood", mu=mu, cov=total_cov, observed=data)

with model as model:
    inference_data = pm.sample(chains=30, return_inferencedata=True, draws=1000, tune=1000, target_accept=0.95)

And before we jump, is there an error or the model just doesn’t sample as well?

Miren June 13, 2024, 4:38pm 5

No, there is no error, but convergence is never reached. Removing three data points should not cause this problem, so I wonder if this part is not really doing what I would like to:

    #multiply last bins by "eta"
    mu = pt.subtensor.set_subtensor(prof[-50:], eta*prof[-50:])

    #consider only the positions at which "data" is not np.nan
    mu = mu[w_not]

If prof was a numpy array I would write:

    #multiply last bins by "eta"
    mu = prof
    mu[-50:] = eta*prof[-50:]

    #consider only the positions at which "data" is not np.nan
    mu = mu[w_not]

Thank you very much for you responsiveness.

Miren June 17, 2024, 11:47am 6

In case it might be useful, this is the case for which there is no problem:


### Without subtensor & NaNs

#create fake data & covariance matrix
r = np.linspace(0.1, 1, 100)
data = gnfw(r, 8.4, 1.177, 2., 4, 0.3081)
#no NaNs
w_not = np.where(np.isfinite(data))[0]
data = data[w_not]
#covariance matrix
fake_error = 0.1*data
total_cov = np.diag(fake_error**2)


with pm.Model() as model :

    #gNFW params
    p0 = pm.Uniform("p0", lower=0, upper=20)
    c500 = 1.177
    a = pm.Uniform("a", lower=0, upper=10)
    b = pm.Uniform("b", lower=0, upper=20)
    c = 0.3081
    
    #multiplicative factor
    #eta = pm.Uniform("eta", lower=0., upper=4.)

    #compute the model profile
    prof = gnfw(r, p0, c500, a, b, c)

    #multiply last bins by "eta"
    mu = prof #pt.subtensor.set_subtensor(prof[-50:], eta*prof[-50:])

    #consider only the positions at which "data" is not np.nan
    mu = mu[w_not]

    ll = pm.MvNormal("likelihood", mu=mu, cov=total_cov, observed=data)

with model as model:
    inference_data = pm.sample(chains=30, return_inferencedata=True, draws=1000, tune=1000, target_accept=0.95)