I'm trying to code my own logistic regression, and compare different methods of maximizing the log-likelihood. Using the Newton-CG method, I get the error message "ValueError: setting an array element with a sequence". Reading around, it seems this error rises if the function sought to be minimized returns a non-skalar, but that is not the case here. I need the three methods given below to give the same result (approximately), but when running on my real data, one does not converge, and the other one gives a worse LL than the initial guess, and the third does not run at all.
Why do I get the ValueError message and how can I fix it?
My code (with dummy data, the real data is ~100 measurements) is as follows:
import numpy as np
from numpy import linalg
import scipy
from scipy.optimize import minimize
def CalcLL(beta,xinlist,yinlist):
LL=0.0
ncol=len(beta)
pi=FindPi(xinlist,beta.reshape(ncol,1))
for i in range(len(yinlist)):
LL=LL+np.where(yinlist[i]==1,np.log(pi[i]),np.log(1-pi[i]))
return -LL
def Jacobian(beta,xinlist,yinlist):
ncol=len(beta)
nrow=np.shape(xinlist)[0]
pi=FindPi(xinlist,beta.reshape(ncol,1))
Jac=np.transpose(np.matrix(yinlist-pi))*np.matrix(xinlist)
return Jac
def Hessian(beta,xinlist,yinlist):
ncol=len(beta)
nrow=np.shape(xinlist)[0]
pi=FindPi(xinlist,beta.reshape(ncol,1))
W=FindW(pi)
Hes=np.matrix(np.transpose(xinlist))*(np.matrix(W)*np.matrix(xinlist))
return Hes
def FindPi(xinlist,beta):
rows=np.shape(xinlist)[0]# Number of rows in x_new
cols=np.shape(xinlist)[1]# Number of columns in x_new
expon=np.dot(xinlist,beta)
expon=np.array(expon).reshape(rows,1)
pi=np.exp(expon)/(1+np.exp(expon))
return pi
def FindW(pi):
W=np.zeros(len(pi)*len(pi)).reshape(len(pi),len(pi))
for i in range(len(pi)):
W[i,i]=float(pi[i]*(1-pi[i]))
return W
xinlist=np.matrix([[1,1],[0,1],[1,1],[1,1],[1,1],[0,1],[0,1],[1,1],[1,1],[0,1]])
yinlist=np.transpose(np.matrix([0,0,0,0,0,1,1,1,1,1]))
ncol=np.shape(xinlist)[1]
beta1=np.zeros(ncol).reshape(ncol,1) # Initial guess for parameter values
limit=0.000001 # selfwritten Newton-Raphson method
iter_i=limit+1
while iter_i>limit:
Hes=Hessian(beta1,xinlist,yinlist)
Jac=np.transpose(Jacobian(beta1,xinlist,yinlist))
root_diff=np.array(linalg.inv(Hes)*Jac)
beta1=beta1+root_diff
iter_i=np.sum(root_diff*root_diff)
print "When running self-written algorithm, the log-likelihood is",-CalcLL(beta1,xinlist,yinlist)
beta2=np.zeros(ncol).reshape(ncol,1)
res=minimize(CalcLL,beta2,args=(xinlist,yinlist),method='Nelder-Mead',options={'xtol':1e-8,'disp':True,'maxiter':10000})
beta2=res.x
print "The log-likelihood using Nelder-Mead is", -CalcLL(beta2,xinlist,yinlist)
beta3=np.zeros(ncol).reshape(ncol,1)
res=minimize(CalcLL,beta3,args=(xinlist,yinlist),method='Newton-CG',jac=Jacobian,hess=Hes,options={'xtol':1e-8,'disp':True})
beta3=res.x
print "The log-likelihood using Newton-CG is", -CalcLL(beta3,xinlist,yinlist)
EDIT: The errorstack is as follows: Traceback (most recent call last):
File "MyLogisticRegression2.py", line 62, in res=minimize(CalcLL,beta3,args=(xinlist,yinlist),method='Newton-CG',jac=Jacobian,hess=Hes,options={'xtol':1e-8,'disp':True})
File C:\Python27\lib\site-packages\scipy\optimize_minimize.py, line 447, in minimize **options)
File C:\Python27\lib\site-packages\scipy\optimize\optimize.py, line 2393, in _minimize_newtoncg eta=numpy.min([0.5, numpy.sqrt(maggrad)])
File C:\Python27\lib\site-packages\numpy\core\fromnumeric.py, line 2393, in amin out=out, **kwargs)
File C:\Python27\lib\site-packages\numpy\core_methods.py, line 29, in _amin return umr_minimum(a,axis,None,out,keepdims)
ValueError: setting an array element with a sequence
A = np.array([1,2,3,4,5]); A[0] = [6,7]np.zeros(ncol).reshape(ncol,1)can be written asnp.zeros((ncol, 1)). Ifexponis not a scalar, thennp.array(expon).reshape(rows,1)can just beexpon.reshape(rows,1). I'd also encourage you to not usenp.matrix; better to stick withnp.array.