Question

I'm working on the analysis of some data. Theoretically, there should be two gaussians, that overlap more or less. I found out that fitting the data works best if you don't fit the two gaussians but their distribution function. Actually, this fit seems to work rather well, but if i go back to the density representation of the data it looks somehow wired. The pictures attached speak for them self. Any idea what goes wrong there?Fit in the distribution

What the parameters give in Gaussians (Green true data, blue is the fit

Here is my code:`

import numpy as np
import scipy
import scipy.special
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import leastsq
from scipy.special import erf




def fitfunction(params,Bins):
    amp_ratio, sigma1, sigma2, mu, Delta = params
    return  amp_ratio * 0.5   * (1 + erf((Bins -mu)/np.sqrt(2*sigma1**2)))   + (1-amp_ratio)* 0.5 * (1 + erf((Bins - (mu + Delta))/np.sqrt(2*sigma2**2)))


def errorfunction(params, Reale_werte, Bins):
    amp_ratio, sigma1, sigma2, mu, Delta = params

    if(amp_ratio > 0) and (amp_ratio < 1):
        return (Reale_werte - fitfunction(params, Bins)) 
    else:
        return (Reale_werte - fitfunction(params, Bins))*100

def Gaussians(params, Bins):
    amp_ratio, sigma1, sigma2, mu, Delta = params
    return amp_ratio/np.sqrt(2*np.pi*sigma1*sigma1) * np.exp(-((Bins-mu)**2) / np.sqrt(2*np.pi*sigma1*sigma1)) + (1-amp_ratio)/np.sqrt(2*np.pi*sigma2*sigma2) * np.exp(-((Bins-(mu + Delta))**2) / np.sqrt(2*np.pi*sigma2*sigma2))

def Gaussian1(params, Bins):
    amp_ratio, sigma1, sigma2, mu, Delta = params
    return amp_ratio/np.sqrt(2*np.pi*sigma1*sigma1) * np.exp(-((Bins-mu)**2) / np.sqrt(2*np.pi*sigma1*sigma1))

def Gaussian2(params, Bins):
    amp_ratio, sigma1, sigma2, mu, Delta = params
    return (1-amp_ratio)/np.sqrt(2*np.pi*sigma2*sigma2) * np.exp(-((Bins-(mu + Delta))**2) / np.sqrt(2*np.pi*sigma2*sigma2))


params = 0.25,1,10,1,5
params_init = 0.75, 0.8, 2.5, 1.2, 4

Bins = np.linspace(-4,18,1024)

data = Gaussians(params, Bins)

summe = np.zeros_like(Bins)

for i in range(Bins.shape[0]-1):
    summe[i+1] = summe[i] + data[i]

summe = summe/summe[Bins.shape[0]-1]

params_result = leastsq(errorfunction, params_init, args=(summe, Bins))  
plt.plot(Bins,fitfunction(params_result[0], Bins))
plt.plot(Bins, summe, 'x')
plt.savefig("Distribution.png")
plt.show()



print params_result[0]

plt.plot(Bins,Gaussians(params_result[0], Bins))
plt.plot(Bins, data, 'x')
plt.savefig("Gaussian.png")
plt.show()`
Was it helpful?

Solution

I was wondering whether kernel density estimation works in your case:

from scipy.stats import kde
import matplotlib.pyplot as plt     
density = kde.gaussian_kde(x)  # your data
xgrid = np.linspace(x.min(), x.max(), 1024)   
plt.plot(xgrid, density(xgrid))
plt.show()
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top