DMelt:DataAnalysis/6 Advanced Data Fitting
Advanced data fitting
This section deals with advanced techniques of fitting data using data in arbitrary dimension, arbitrary function. In this section we will discuss how to perform more interactive fit using scripting.
Chi2 fit in 1D
Let us perform a fit of data points using a function defined by a Python script:
from java.lang.Math import *
from jhplot import *
class PowerLawFunc(): # function describing a power law
def __init__(self, p0,p1):
self.p0=p0; self.p1=p1;
def valueOf(self, x):
return self.p0*pow(x,self.p1); # power law
def getPlot(self,min,max): # plotting part
f=P1D("p0*x^p1")
step=(max-min)/100.0
for i in range(100):
x=min+step*i
y=self.valueOf(x)
f.add(x,y)
return f
As you can see, this is a simple power-law function of one variable "x" defined in the method
"valueOf". One interesting method of this function is
getPlot(min,max) that allows to plot this function between min and max.
This method returns jhplot.P1D plottable on
jhplot.HPlot
Now let us perform a fit with this function using Minuit minimisation program and the Chi2 method.
where "y" is the data point, "e" is the expectation, $\sigma$ is “variance” related to the measurement error for "y"
Let us make a Jython script which:
- Performs Chi2 fit with the above function
- Prints evolution of chi2 for each iteration and print final chi2
- Calculates Minuit convergence steps and change Migrad/Minuit strategy in case of a failed fit
- Calculates calculates Migrad parameter correlations
- Calculate uncertainty on the fit parameters
- Plot data and the minimized function
For this task, we will use org.freehep.math.minuit.MnMigrad class.
The script that does all the above using this class is shown bellow:
from org.freehep.math.minuit import FCNBase,MnMigrad,MnUserParameters
from java.lang.Math import *
from jhplot import *
from java.awt import Color
data=P1D("data") # input data
data.add(5,4,2); data.add(10,8,3) # each point has X, Y, standard error on Y
data.add(20,10,3); data.add(30,12,3)
data.add(40,18,4); data.add(70,20,4)
data.add(90,26,5); data.add(150,37,6)
data.add(200,50,6); data.add(300,60,7)
data.add(500,90,9); data.add(700,120,10)
class PowerLawFunc(FNon): # function describing a power law
def value(self, x):
return self.p[0]*pow(x[0],self.p[1])
class PowerLawChi2FCN(FCNBase): # define chi2 function
def __init__(self, meas, pos, mvar):
self.meas=meas;
self.pos=pos;
self.mvar=mvar;
def valueOf(self, par):
pl = PowerLawFunc("PowerLaw",1,2)
pl.setParameters(par)
chi2 = 0
for n in range(len(self.meas)):
delta = pl.value([self.pos[n]]) - self.meas[n]
sigma2=self.mvar[n]*self.mvar[n]
if (sigma2>0): chi2=chi2+(delta*delta)/sigma2
print "Chi2=",chi2
return chi2;
theFCN = PowerLawChi2FCN(data.getArrayY(), data.getArrayX(), data.getArrayErr())
upar = MnUserParameters()
upar.add("p0", 1.0, 0.1) # initial value and expected error
upar.add("p1", 0.2, 0.1);
migrad =MnMigrad(theFCN, upar)
print "start Migrad "
vmin = migrad.minimize()
if vmin.isValid()==False:
print "Alternative strategy"
migrad = MnMigrad(theFCN, upar, 2)
vmin = migrad.minimize()
state=vmin.userState()
output=state.params() # output parameters
print vmin # print details of the fit
c1 = HPlot() # plot data and the fit function
c1.visible()
min=0; max=800 # min and max values for plotting
c1.setRangeX(min,max)
c1.setRangeY(0,150)
c1.draw(data) # plot data
pl = PowerLawFunc("PowerLaw",1,2) # plot fit result
pl.setParameters(output)
ff=F1D(pl,min,max)
ff.setPenWidth(2); ff.setColor(Color.blue)
c1.draw(ff)
The fit result of the data is shown below, where the blue result is the fit:
The fit parameters are:
p0=1.14152 +/- 0.276695
p1=0.704415 +/- 0.0431544
Here is the complete output of the script:
start Migrad
Chi2= 470.358172895
Chi2= 151.491535555
Chi2= 152.13739461
Chi2= 151.491535555
Chi2= 150.199077966
Chi2= 146.963922788
Chi2= 137.231555692
Chi2= 108.020960351
Chi2= 29.5665676492
Chi2= 224.944087841
Chi2= 11.1675109612
Chi2= 3.09063261351
Chi2= 2.83286407423
Chi2= 2.83089798953
Chi2= 2.60969185498
Chi2= 2.58038208008
Chi2= 2.37208952261
Chi2= 2.37208949274
Chi2= 2.37209280753
Minuit did successfully converge.
# of function calls: 77
minimum function value: 2.37209
minimum edm: 2.72672e-09
minimum internal state vector: LAVector parameters: 1.14152 0.704415
minimum internal covariance matrix: LASymMatrix parameters:
0.0765603 -0.0117485
-0.0117485 0.00186230
# ext. || name || type || value || error +/-
0 || p0 || free || 1.14152 || 0.276695
1 || p1 || free || 0.704415 || 0.0431544
MnUserCovariance:
0.0765603 -0.0117485
-0.0117485 0.00186230
MnUserCovariance parameter correlations:
1.00000 -0.983907
-0.983907 1.00000
MnGlobalCorrelationCoeff:
0.983907
0.983907
Maximum Likelihood fit in 1D
Alternatively, one can fit data using the Maximum Likelihood method. Again, we will use
MnMigrad class. This approach is shown in this script:
from org.freehep.math.minuit import FCNBase,MnMigrad,MnUserParameters
from java.lang.Math import *
from jhplot import *
from java.awt import Color
data=P1D("data")
data.add(5,4,2); data.add(10,8,3) # each point has X, Y, standard error on Y
data.add(20,10,3); data.add(30,12,3)
data.add(40,18,4); data.add(70,20,4)
data.add(90,26,5); data.add(150,37,6)
data.add(200,50,6); data.add(300,60,7)
data.add(500,90,9); data.add(700,120,10)
class PowerLawFunc(FNon): # function describing a power law
def value(self, x):
return self.p[0]*pow(x[0],self.p[1])
class PowerLawLogLikeFCN(FCNBase): # define log-likehood.
def __init__(self, meas, pos, mvar):
self.meas=meas;
self.pos=pos;
self.mvar=mvar;
def valueOf(self, par):
pl = PowerLawFunc("PowerLaw",1,2)
pl.setParameters(par)
logsum = 0
for n in range(len(self.meas)):
mu = pl.value([self.pos[n]])
k=self.meas[n]
logsum = logsum+(k*log(mu) - mu);
print "Log likehood=",-1*logsum
return -1*logsum;
theFCN = PowerLawLogLikeFCN(data.getArrayY(), data.getArrayX(), data.getArrayErr())
upar = MnUserParameters()
upar.add("p0", 1.0, 0.1) # initial value and expected error
upar.add("p1", 0.2, 0.1);
migrad =MnMigrad(theFCN, upar)
print "start Migrad "
vmin = migrad.minimize()
if vmin.isValid()==False:
print "Alternative strategy"
migrad = MnMigrad(theFCN, upar, 2)
vmin = migrad.minimize()
state=vmin.userState()
output=state.params() # output parameters
print vmin # print details of the fit
c1 = HPlot() # plot data and the fit function
c1.visible()
min=0; max=800 # min and max values for plotting
c1.setRangeX(min,max)
c1.setRangeY(0,150)
c1.draw(data)
pl = PowerLawFunc("PowerLaw",1,2) # plot fit result
pl.setParameters(output)
ff=F1D(pl,min,max)
ff.setPenWidth(2); ff.setColor(Color.blue)
c1.draw(ff)
Note the difference: we replaced "PowerLawChi2FCN" that implements a Chi2 minimization function with "PowerLawLogLikeFCN" which calculates the sum of $y_i*\log(e_i)-e_i$. The Minuit minimizes this function (note the minus in the front, which means that the likelihood sum should be maximal). As before, the script prints the value of the maximum likelihood. The above examples can be written in Java.
Fitting data using GROOT
GROOT provides another Java API to fit data. Here is a simple example that shows how to fit data using a custom function:
from javax.swing import JFrame
from org.jlab.groot.data import H1F
from org.jlab.groot.fitter import DataFitter
from org.jlab.groot.graphics import *
from org.jlab.groot.math import *
from java.lang import Math
# define a function
class CustomFunction(Func1D): # function describing a power law
def evaluate(self, x):
xsum = 0.0
for i in range(self.getNPars()):
xsum = xsum + self.getParameter(i)*Math.pow(x,i)
return xsum
frame = JFrame("Basic GROOT Demo")
canvas =EmbeddedCanvas()
frame.setSize(800,500)
histogram = H1F("histogram",100,-5.0,5.0)
f1 = CustomFunction("CustomFunction", -5.1, 5.1)
f1.addParameter("p0")
f1.setParameter(0, 30.0)
f1.addParameter("p1")
f1.setParameter(1, -6.0)
f1.addParameter("p2")
f1.setParameter(2, 1.8)
f1.addParameter("p3")
f1.setParameter(3, .0001)
rndm = RandomFunc(f1)
for j in range(64000):
histogram.fill(rndm.random())
histogram.setTitleX("Randomly Generated Function")
histogram.setTitleY("Counts")
canvas.getPad(0).setTitle("CustomFunction Example")
histogram.setLineWidth(2)
histogram.setLineColor(21)
histogram.setFillColor(34)
histogram.setOptStat(10)
f1.setLineColor(5)
f1.setLineWidth(7)
f1.setOptStat(111110)
DataFitter.fit(f1, histogram, "Q")
canvas.draw(histogram)
canvas.draw(f1,"same")
canvas.setFont("Avenir")
canvas.setTitleSize(32)
canvas.setAxisTitleSize(24)
canvas.setAxisLabelSize(18)
canvas.setStatBoxFontSize(18)
frame.add(canvas)
frame.setLocationRelativeTo(None)
frame.setVisible(True)
which leads to this image:
Fitting data in multiple dimensions
The org.freehep.math.minuit.MnMigrad class can also perform fit
in multiple dimensions.
Now we will show how to fit data in a fully controlled way in several dimensions. As example, we will consider 2D data fitted with 2D function.
from org.freehep.math.minuit import FCNBase,MnMigrad,MnUserParameters
from java.lang.Math import *
from jhplot import *
from java.awt import Color
from java.util import Random
h1 = H2D("Test",16,-5.0, 5.0, 16, -5.0, 5.0)
rand = Random();
for i in range(10000):
h1.fill(2*rand.nextGaussian(),rand.nextGaussian())
class Gauss2D(FNon): # 2D Gaussian
def value(self, x):
if self.p[2]>0 and self.p[4]>0:
d1=(x[0]-self.p[1])/self.p[2]
d2=(x[1]-self.p[3])/self.p[4]
return self.p[0]*exp( -d1*d1-d2*d2 )
else:
return 1.0e10
class GaussChi2(FCNBase): # define chi2 function
def __init__(self, meas, posX, posY, errors):
self.meas=meas;
self.posX=posX;
self.posY=posY;
self.errX=0.5*(self.posX[1]-self.posX[0])
self.errY=0.5*(self.posY[1]-self.posY[0])
self.errors=errors;
def valueOf(self, par):
pl = Gauss2D("2D",2,5) # 2 varibles, 5 parameters
pl.setParameters(par)
chi2 = 0
for n1 in range(len(self.posX)):
for n2 in range(len(self.posY)):
delta = pl.value([self.posX[n1],self.posY[n2]])-self.meas[n1][n2]
sigma2=self.errors[n1][n2]*self.errors[n1][n2]
if (sigma2>0): chi2=chi2+(delta*delta)/sigma2;
print "Chi2=",chi2
return chi2;
theFCN = GaussChi2(h1.binHeights(), h1.getLowerEdgesX(), h1.getLowerEdgesY(), h1.binErrors())
upar = MnUserParameters()
upar.add("p0", 1000.0, 1) # initial value and expected error
upar.add("p1", 0.0, 0.1);
upar.add("p2", 1.0, 0.1) # initial value and expected error
upar.add("p3", 0.0, 0.1);
upar.add("p4", 1.0, 0.1);
migrad =MnMigrad(theFCN, upar)
vmin = migrad.minimize()
if vmin.isValid()==False:
print "Alternative strategy"
migrad = MnMigrad(theFCN, upar, 2)
vmin = migrad.minimize()
state=vmin.userState()
output=state.params() # output parameters
print vmin # print details of the minimization
c1 = HPlot3D("Plot",600,400,2,1)
c1.visible(1)
c1.setRange(-5,5,-5,5,0,500)
c1.draw(h1)
c1.cd(2,1) # plot fit function
c1.setRange(-5,5,-5,5,0,500)
pl =Gauss2D("Gauss2D",2,5) # 2 varibles, 5 parameters
pl.setParameters(output) # set fitted parameters
ff=F2D(pl,-5,5,-5,5)
c1.draw(ff)
The result of this script is shown below. The data are plotted on the left canvas, while the function with fitted parameters after the chi2 minimisation is on the right figure.
Similarly, one can implement the likelihood fit as it was discussed inn 1D case.