I am implementing LDA with pymc3 using the referred code for pymc from the post
Latent Dirichlet Allocation in PyMC
I am trying to use it for pymc3 bt having problems defining
w
import numpy as np
import pymc3 as pm, theano, theano.tensor as t
K = 2 # number of topics
V = 4 # number of words
D = 3 # number of documents
data = np.array([[1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 0, 0]])
alpha = np.ones(K)
beta = np.ones(V)
model = pm.Model()
Wd = [len(doc) for doc in data]
(D, W) = data.shape
pi = []
def dist_over_words(z,phi):
# Phi = theano.shared(np.array(phi))
# print(phi[1])
for d in range(D):
for i in range(W):
zi = z[d][i]
pi.append(phi[zi])
with model:
theta = [pm.Dirichlet("pthetax_%s" % i, a=alpha, shape=K) for i in range(D)]
phi =[pm.Dirichlet("pphix_%s" % k, a=beta,shape=V) for k in range(K)]
z = [pm.Categorical('zx_%i' % d,
p = theta[d],
shape=Wd[d])
for d in range(D)]
w = pm.Categorical("wx_d_i",p = dist_over_words(z,phi), observed = data)
with model:
step1 = pm.Metroplolis(vars = [theta,phi,z,w])
tr = step1.sample(1000,step = [step1])
pm.plots.traceplot(tr, ['theta', 'phi', 'z','w']);
I am getting the error :
pi.append(phi[zi])
TypeError: list indices must be integers or slices, not TensorVariable
How can I model for w
since phi
is a list and z[d][i]
will always be a tensor?
Help much appreciated