In short, I have data I'm trying to fit, and LMFIT and Scipy give 2 different solutions, with LMFIT being significantly worse.
Just some detail as to what is going on:
The "model" is a population weighted average. The "populations" are derived from 4 variables (the adjustable parameters). There are 2 independent "models" (n and h), but these are combined at the very end. The residuals is a combination of the residuals of both of these models against their respective data.
Assume the math is correct. The question is then why the different solutions when given identical functions, variables, bounds, and methods.
Given this MVE
import numpy as np
import scipy.optimize as so
from scipy.sparse.linalg import lsmr
from lmfit import minimize,Parameters
from lmfit.printfuncs import report_fit
data_1=[[117.417, 117.423, 117.438, 117.501], [124.16, 124.231, 124.089, 124.1], [115.632, 115.645, 115.828, 115.947], [118.314, 118.317, 118.287, 118.228], [108.407, 108.419, 108.396, 108.564]]
data_2=[[9.05, 9.044, 9.057, 9.079], [9.178, 9.167, 9.16, 9.176], [7.888, 7.893, 7.911, 7.895], [7.198, 7.202, 7.197, 7.213], [7.983, 7.976, 7.979, 8.02]]
def get_populations_lmfit(initial,io):
k,k1,x,y=initial['kvar'],initial['k1var'],initial['xvar'],initial['yvar']
kx,k1x=k*x,k1*x
ky,k1y=k*y,k1*y
kxy,k1xy=k*x*y,k1*x*y
partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
partial_open_concentration_WT=k1*partial_closed_concentration_WT
partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
local_chi2=0
for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
experimental_shifts_h=(np.array([experimental_shifts_h]))*800
least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
return local_chi2
def get_populations_scipy(initial,io):
k,k1,x,y=initial[0],initial[1],initial[2],initial[3]
kx,k1x=k*x,k1*x
ky,k1y=k*y,k1*y
kxy,k1xy=k*x*y,k1*x*y
partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
partial_open_concentration_WT=k1*partial_closed_concentration_WT
partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
local_chi2=0
for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
experimental_shifts_h=(np.array([experimental_shifts_h]))*800
least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
return local_chi2
io=270000
params=Parameters()
params.add('kvar',value=500,min=0,max=np.inf)
params.add('k1var',value=0.02,min=0,max=np.inf)
params.add('xvar',value=7,min=0,max=np.inf)
params.add('yvar',value=30,min=0,max=np.inf)
lmfit_solution=minimize(get_populations_lmfit,params,args=(io,),method='nelder',max_nfev=1000)
scipy_solution=so.minimize(get_populations_scipy,args=(io,), x0=np.array([500,0.02,7,30]),bounds=((0,np.inf),)*4,method='Nelder-Mead',options={'maxiter':1000})
print(report_fit(lmfit_solution))
print(scipy_solution)
You will note the solutions from scipy vs. lmfit are completely different. Not only that, the chi2 for scipy is significantly better than lmfit. I don't quite understand why however. The setup is practically identical, the conditions and bounds and methods are identical, why do they give different results? I.E. Why is LMFITs solution so much worse?
Now I know you can give LMFIT an array of residuals instead of the sum like I have, and while this does improve the solutions a bit, it's still worse than Scipy and significantly worse chi2.
I think you're asking the wrong questions.
lmfithas no advantage here, so the easy decision is to scrap it and use scipy directly; but - your current code is cursed, so that should be fixed up. Vectorise, factor out common expressions, and reduce to a single innerlstsqcall. The following shows that the old and new methods are numerically equivalent to ~13 significant digits, but the new method will be faster and is more simple to debug. Among many other reasons, your old code went through all of the trouble of doing many inner linear fits, but then always threw away the result.