ComboCode.cc.ivs.sigproc.lmfit.confidence

1 #!/usr/bin/python 2 # -*- coding: utf-8 -*- 3 """ 4 Contains functions to calculate confidence intervals. 5 """ 6 from __future__ import print_function 7 import numpy as np 8 from scipy.stats import f 9 from scipy.optimize import brentq 10 from .minimizer import MinimizerException 11 from cc.ivs.aux import progressMeter as progress 12

13 -def f_compare(ndata, nparas, new_chi, best_chi, nfix=1.):

14 """ 15 Returns the probalitiy for two given parameter sets. 16 nfix is the number of fixed parameters. 17 """ 18 nparas = nparas + nfix 19 nfree = ndata - nparas 20 nfix = 1.0*nfix 21 dchi = new_chi / best_chi - 1.0 22 return f.cdf(dchi * nfree / nfix, nfix, nfree)

23

24 -def copy_vals(params):

25 """Saves the values and stderrs of params in temporay dict""" 26 tmp_params = {} 27 for para_key in params: 28 tmp_params[para_key] = (params[para_key].value, 29 params[para_key].stderr) 30 return tmp_params

31 32

33 -def restore_vals(tmp_params, params):

34 """Restores values and stderrs of params in temporay dict""" 35 for para_key in params: 36 params[para_key].value, params[para_key].stderr = tmp_params[para_key]

37

38 -def conf_interval(minimizer, p_names=None, sigmas=(0.674, 0.95, 0.997), 39 trace=False, maxiter=200, verbose=False, prob_func=None):

40 r"""Calculates the confidence interval for parameters 41 from the given minimizer. 42 43 The parameter for which the ci is calculated will be varied, while 44 the remaining parameters are re-optimized for minimizing chi-square. 45 The resulting chi-square is used to calculate the probability with 46 a given statistic e.g. F-statistic. This function uses a 1d-rootfinder 47 from scipy to find the values resulting in the searched confidence 48 region. 49 50 Parameters 51 ---------- 52 minimizer : Minimizer 53 The minimizer to use, should be already fitted via leastsq. 54 p_names : list, optional 55 Names of the parameters for which the ci is calculated. If None, 56 the ci is calculated for every parameter. 57 sigmas : list, optional 58 The probabilities (1-alpha) to find. Default is 1,2 and 3-sigma. 59 trace : bool, optional 60 Defaults to False, if true, each result of a probability calculation 61 is saved along with the parameter. This can be used to plot so 62 called "profile traces". 63 64 Returns 65 ------- 66 output : dict 67 A dict, which contains a list of (sigma, vals)-tuples for each name. 68 trace_dict : dict 69 Only if trace is set true. Is a dict, the key is the parameter which 70 was fixed.The values are again a dict with the names as keys, but with 71 an additional key 'prob'. Each contains an array of the corresponding 72 values. 73 74 See also 75 -------- 76 conf_interval2d 77 78 Other Parameters 79 ---------------- 80 maxiter : int 81 Maximum of iteration to find an upper limit. 82 prob_func : ``None`` or callable 83 Function to calculate the probability from the optimized chi-square. 84 Default (``None``) uses built-in f_compare (F test). 85 verbose: bool 86 print extra debuggin information. Default is ``False``. 87 88 89 Examples 90 -------- 91 92 >>> from lmfit.printfuncs import * 93 >>> mini=minimize(some_func, params) 94 >>> mini.leastsq() 95 True 96 >>> report_errors(params) 97 ... #report 98 >>> ci=conf_interval(mini) 99 >>> report_ci(ci) 100 ... #report 101 102 Now with quantiles for the sigmas and using the trace. 103 104 >>> ci, trace=conf_interval(mini, sigmas=(0.25,0.5,0.75,0.999),trace=True) 105 >>> fixed=trace['para1']['para1'] 106 >>> free=trace['para1']['not_para1'] 107 >>> prob=trace['para1']['prob'] 108 109 This makes it possible to plot the dependence between free and fixed. 110 """ 111 ci = ConfidenceInterval(minimizer, p_names, prob_func, sigmas, trace, 112 verbose, maxiter) 113 output = ci.calc_all_ci() 114 if trace: 115 return output, ci.trace_dict 116 return output

117

118 -def map_trace_to_names(trace, params):

119 "maps trace to param names" 120 out = {} 121 for name in trace.keys(): 122 tmp_dict = {} 123 tmp = np.array(trace[name]) 124 for para_name, values in zip(params.keys() + ['prob'], tmp.T): 125 tmp_dict[para_name] = values 126 out[name] = tmp_dict 127 return out

128

129 -class ConfidenceInterval(object):

130 """ 131 Class used to calculate the confidence interval. 132 """

133 - def __init__(self, minimizer, p_names=None, prob_func=None, 134 sigmas=(0.674, 0.95, 0.997), trace=False, verbose=False, 135 maxiter=20):

136 """ 137 138 """ 139 if p_names is None: 140 self.p_names = minimizer.params.keys() 141 else: 142 self.p_names = p_names 143 144 # used to detect that .leastsq() has run! 145 if not hasattr(minimizer, 'covar'): 146 minimizer.leastsq() 147 148 self.fit_params = [minimizer.params[p] for p in self.p_names] 149 150 # check that there are at least 2 true variables! 151 nvars = 0 152 for p in self.p_names: 153 if minimizer.params[p].vary: 154 nvars += 1 155 if nvars < 2: 156 raise MinimizerException( 157 'Cannot determine Confidence Intervals with < 2 variables!') 158 159 if prob_func is None or not hasattr(prob_func, '__call__'): 160 self.prob_func = f_compare 161 if trace: 162 self.trace_dict = dict([(i, []) for i in self.p_names]) 163 164 self.verbose = verbose 165 self.minimizer = minimizer 166 self.params = minimizer.params 167 self.trace = trace 168 self.maxiter = maxiter 169 170 self.sigmas = list(sigmas) 171 self.sigmas.sort() 172 173 # copy the best fit values. 174 self.org = copy_vals(minimizer.params) 175 self.best_chi = minimizer.chisqr

176

177 - def calc_all_ci(self):

178 """ 179 Calculates all cis. 180 """ 181 out = {} 182 for p in self.p_names: 183 out[p] = (self.calc_ci(p, -1)[::-1] + 184 [(0., self.params[p].value)] + 185 self.calc_ci(p, 1)) 186 if self.trace: 187 self.trace_dict = map_trace_to_names(self.trace_dict, 188 self.minimizer.params) 189 190 return out

191

192 - def calc_ci(self, para, direction):

193 """ 194 Calculate the ci for a single parameter for a single direction. 195 """ 196 if isinstance(para, str): 197 para = self.minimizer.params[para] 198 199 calc_prob = lambda val, prob: self.calc_prob(para, val, prob) 200 if self.trace: 201 x = [i.value for i in self.minimizer.params.values()] 202 self.trace_dict[para.name].append(x + [0]) 203 204 para.vary = False 205 self.minimizer.prepare_fit(self.params) 206 limit = self.find_limit(para, direction) 207 start_val = para.value 208 a_limit = start_val 209 ret = [] 210 211 for prob in self.sigmas: 212 try: 213 val = brentq(calc_prob, a_limit, limit, rtol=.5e-4, args=prob) 214 except ValueError: 215 self.reset_vals() 216 val = brentq(calc_prob, start_val, limit, rtol=.5e-4, args=prob) 217 a_limit = val 218 ret.append((prob, val)) 219 220 para.vary = True 221 self.reset_vals() 222 return ret

223

224 - def reset_vals(self):

225 restore_vals(self.org, self.minimizer.params)

226

227 - def find_limit(self, para, direction):

228 """ 229 For given para, search a value so that prob(val) > sigmas. 230 """ 231 if self.verbose: 232 print('Calculating CI for ' + para.name) 233 self.reset_vals() 234 #starting step: 235 if para.stderr > 0: 236 step = para.stderr 237 else: 238 step = max(abs(para.value) * 0.05, 0.01) 239 para.vary = False 240 start_val = para.value 241 242 change = 1 243 old_prob = 0 244 i = 0 245 limit = start_val 246 # Find an upper limit, 247 # TODO: also use the change as an stopping condition. 248 while change > 0.001 and old_prob < max(self.sigmas): 249 i += 1 250 limit += step * direction 251 new_prob = self.calc_prob(para, limit) 252 change = new_prob - old_prob 253 old_prob = new_prob 254 if i > self.maxiter: 255 break 256 self.reset_vals() 257 return limit

258

259 - def calc_prob(self, para, val, offset=0., restore=True):

260 """Returns the probability for given Value.""" 261 if restore: 262 restore_vals(self.org, self.minimizer.params) 263 para.value = val 264 self.minimizer.prepare_fit(para) 265 self.minimizer.leastsq() 266 out = self.minimizer 267 prob = self.prob_func(out.ndata, out.ndata - out.nfree, 268 out.chisqr, self.best_chi) 269 270 if self.trace: 271 x = [i.value for i in out.params.values()] 272 self.trace_dict[para.name].append(x + [prob]) 273 return prob - offset

274 275 276

277 -def conf_interval2d(minimizer, x_name, y_name, nx=10, ny=10, limits=None, 278 prob_func=None, verbose=True):

279 r"""Calculates confidence regions for two fixed parameters. 280 281 The method is explained in *conf_interval*: here we are fixing 282 two parameters. 283 284 Parameters 285 ---------- 286 minimizer : minimizer 287 The minimizer to use, should be already fitted via leastsq. 288 x_name : string 289 The name of the parameter which will be the x direction. 290 y_name : string 291 The name of the parameter which will be the y direction. 292 nx, ny : ints, optional 293 Number of points. 294 limits : tuple: optional 295 Should have the form ((x_upper, x_lower),(y_upper, y_lower)). If not 296 given, the default is 5 std-errs in each direction. 297 298 Returns 299 ------- 300 x : (nx)-array 301 x-coordinates 302 y : (ny)-array 303 y-coordinates 304 grid : (nx,ny)-array 305 grid contains the calculated probabilities. 306 307 Examples 308 -------- 309 310 >>> from lmfit.printfuncs import * 311 >>> mini=minimize(some_func, params) 312 >>> mini.leastsq() 313 True 314 >>> x,y,gr=conf_interval2d('para1','para2') 315 >>> plt.contour(x,y,gr) 316 317 Other Parameters 318 ---------------- 319 prob_func : ``None`` or callable 320 Function to calculate the probability from the optimized chi-square. 321 Default (``None``) uses built-in f_compare (F test). 322 """ 323 # used to detect that .leastsq() has run! 324 if not hasattr(minimizer, 'covar'): 325 minimizer.leastsq() 326 327 best_chi = minimizer.chisqr 328 org = copy_vals(minimizer.params) 329 330 if prob_func is None or not hasattr(prob_func, '__call__'): 331 prob_func = f_compare 332 333 x = minimizer.params[x_name] 334 y = minimizer.params[y_name] 335 336 if limits is None: 337 (x_upper, x_lower) = (x.value + 5 * x.stderr, x.value - 5 338 * x.stderr) 339 (y_upper, y_lower) = (y.value + 5 * y.stderr, y.value - 5 340 * y.stderr) 341 elif len(limits) == 2: 342 (x_upper, x_lower) = limits[0] 343 (y_upper, y_lower) = limits[1] 344 345 x_points = np.linspace(x_lower, x_upper, nx) 346 y_points = np.linspace(y_lower, y_upper, ny) 347 grid = np.dstack(np.meshgrid(x_points, y_points)) 348 349 x.vary = False 350 y.vary = False 351 352 def calc_prob(vals, restore=False): 353 if verbose: 354 pmeter.update(1) 355 if restore: 356 restore_vals(org, minimizer.params) 357 x.value = vals[0] 358 y.value = vals[1] 359 360 minimizer.prepare_fit([x, y]) 361 minimizer.leastsq() 362 out = minimizer 363 364 prob = prob_func(out.ndata, out.ndata - out.nfree, out.chisqr, 365 best_chi, nfix=2.) 366 return prob

367 368 if verbose: pmeter = progress.ProgressMeter(total=len(x_points) * len(y_points)) 369 out = x_points, y_points, np.apply_along_axis(calc_prob, -1, grid) 370 371 x.vary, y.vary = True, True 372 restore_vals(org, minimizer.params) 373 minimizer.chisqr = best_chi 374 return out 375

Source Code for Module ComboCode.cc.ivs.sigproc.lmfit.confidence