Monday, November 8, 2010

pyblocxs in Sherpa

pyblocxs: It is a Python Sherpa Extension for Bayesian Low Counts X-ray Spectral Analysis. It provides MCMC methods to explore a model parameter space and summarized the full posterior or profile posterior distributions. Computed parameter uncertainties can include calibration errors. It simulates replicate data from the posterior predictive distributions. It can test for added spectral components by computing the Likelihood Ratio Statistic on replicate data and the ppp-value.
It has two samplers:
(1) Metropolis-Hastings: centered on the best fit values
(2) Metropolis-Hastings mixed with Metropolis jumping rule: centered on the current draw of parameters with the prior scale specfied as a scalar multiple of the Sherpa covar() output.

http://hea-www.harvard.edu/AstroStat/pyBLoCXS/index.html

Saturday, November 6, 2010

User-defined Model and Fit Statistic

An advanced demo for ADASS 2010 illustrating a script that incorporates a user-defined model and a user-defined fit statistic with a prior into an X-ray spectral fit.

#!/usr/bin/env python
import sherpa.astro.ui as sherpa
from sherpa.models import Parameter, ArithmeticModel
from sherpa.stats import truncation_value
import numpy

class MyPowerLaw(ArithmeticModel):
"""
Class to characterize a power-law function

f(x) = ampl (x/ref) ^ (-gamma)

"""
def __init__(self, name='mypowerlaw'):

self.gamma = Parameter(name, "gamma", 1.0, min=-10, max=10)
self.ref = Parameter(name, "ref", 1.0, alwaysfrozen=True)
self.ampl = Parameter(name, "ampl", 1.0, min=0)

ArithmeticModel.__init__(self, name,
(self.gamma, self.ref, self.ampl))

def calc(self, p, x, xhi=None, *args, **kwargs):
"""
Params
`p` list of ordered parameter values.
`x` ndarray of domain values, bin midpoints or lower
bin edges.
`xhi` ndarray of upper bin edges.

returns ndarray of calculated function values.
"""

if xhi is None:
return self.point(p, x)
return self.integrated(p, x, xhi)


@staticmethod
def point(p, x):
"""
/ x_i \(-p[0])
point version, f_i(x_i) = p[2] |------|
\ p[1] /

Params
`p` list of ordered parameter values.
`x` ndarray of bin midpoints.

returns ndarray of calculated function values at
bin midpoints
"""

if xhi is None:
return p[2]*numpy.power(x/p[1], -p[0])


@staticmethod
def integrated(p, xlo, xhi):
"""

integrated form from lower bin edge to upper edge

⌠ xhi_i p[2] / /xhi \ (-p[0]) /xlo \ (-p[0]) \
| f_i(x_i) dx = ----- * |xhi |----| - xlo |----| |
⌡ xlo_i 1-p[0] \ \p[1]/ \p[1]/ /

Params
`p` list of ordered parameter values.
`xlo` ndarray of lower bin edges.
`xhi` ndarray of upper bin edges.

returns ndarray of integrated function values over
lower and upper bin edges.
"""
if p[0] == 1.0:
return p[2] * p[1] * (numpy.log(xhi) - numpy.log(xlo))

p1 = numpy.power(xlo, 1.0-p[0])/(1.0-p[0])
p2 = numpy.power(xhi, 1.0-p[0])/(1.0-p[0])
return p[2] / numpy.power(p[1], -p[0]) * (p2 - p1)

# <demo> stop

def logNormal(x, mu, sigma):
"""
Compute natural log of Normal distribution PDF analytically

Params
`x` x
`mu` mean
`sigma` standard deviation

returns ln(f_X(x; mu, sigma))
"""
sigma_sqr = sigma*sigma
return (-numpy.log(numpy.sqrt(2*numpy.pi*sigma_sqr)) -
numpy.square(x-mu)/2/sigma_sqr)

# <demo> stop

# stat interface instancemethod
def calc_stat(self, data, model, staterror=None,
syserror=None, weight=None):
"""
Calculate the fit statistic value and an array of statistic
value contributions per bin.

Params
`data` ndarray of observed data points
`model` ndarray of predicted data points
`staterror` ndarray of statistical error on observed data points
`syserror` ndarray of systematic error on observed data points
`weight` ndarray of statistic weights

returns a tuple of the fit statistic value and array of
stat contributions per bin.
"""
gamma = self.gamma.val # nH (abs1.nh)
beta = self.beta.val # powerlaw index (p1.gamma)
xi = numpy.log(self.alpha.val) # log powerlaw norm (p1.ampl)

phigamma = logNormal(gamma, self.mugamma.val, self.siggamma.val)
phibeta = logNormal(beta, self.mubeta.val, self.sigbeta.val)
phixi = logNormal(xi, self.muxi.val, self.sigxi.val)

# truncation_value used as an approximation for
# non-positive values in model
if truncation_value > 0:
model[model<=0.0] = truncation_value

# Poisson log-likelihood summation
likelihood = sum(-model + data*numpy.log(model))

# Add the prior to log-likelihood
prior = (phigamma+phibeta+phixi)
stat = likelihood + prior

# reverse sign to minimize log-likelihood!
return (-stat, numpy.ones_like(data))


# <demo> stop

sherpa.load_pha("3c273.pi")
sherpa.notice(0, 6.0)

sherpa.add_model(MyPowerLaw)
sherpa.set_source(sherpa.xsphabs.abs1 * mypowerlaw.p1)

sherpa.load_user_stat("stat", calc_stat,
priors=dict(mugamma=0.017, # hyperparameters
mubeta=1.26,
muxi=-8.5,
siggamma=1,
sigbeta=1,
sigxi=1,
gamma=abs1.nh, # fit parameters
beta=p1.gamma,
alpha=p1.ampl))

print stat

sherpa.set_stat(stat)
sherpa.set_method("neldermead")

sherpa.fit()

sherpa.plot_fit_delchi()

# <demo> stop

Estimate Flux Errors

A demo for ADASS 2010 to estimate the integrated flux from an X-ray spectral fit using simulations.

#!/usr/bin/env python
import sherpa.astro.ui as sherpa
import numpy

# load the data and fit with an absorbed power-law
sherpa.load_pha("3c273.pi")
sherpa.notice(0.5,7.0)
sherpa.subtract()

sherpa.set_stat("chi2datavar")
sherpa.set_model(sherpa.xsphabs.abs1*sherpa.xspowerlaw.p1)
abs1.nh=0.07
sherpa.freeze(abs1.nh)
sherpa.fit()

# <demo> stop

# simulate fluxes by sampling model parameters for 100 iterations.

flux100=sherpa.sample_energy_flux(0.5,7.,num=100)

# slice the flux column from simulation table.

fluxes = flux100[:,0]

print "Mean:", numpy.mean(fluxes)

print "Median:", numpy.median(fluxes)

print "Std Dev:", numpy.std(fluxes)

# determine 95% and 50% quantiles using numpy array sorting

sf=numpy.sort(fluxes)

print "95% quantile:", sf[0.95*(len(fluxes)-1)]
print "50% quantile:", sf[0.5*(len(fluxes)-1)]

# <demo> stop

# run 7.5e3 flux simulations and bin a histogram using the result

hist=sherpa.get_energy_flux_hist(0.5,7.,num=7500)
hist.histo_prefs['linecolor']=None
sherpa.plot_energy_flux(recalc=False)

# <demo> stop

# fit a Gaussian to the histogram
sherpa.load_arrays(2,hist.xlo,hist.xhi,hist.y,sherpa.Data1DInt)

sherpa.set_model(2,sherpa.gauss1d.g0)
g0.integrate=False
sherpa.guess(2,g0)
g0.ampl.min=0
g0.ampl.max=100
g0.ampl=1

sherpa.set_stat("leastsq")
sherpa.fit(2)

sherpa.plot_model(2, overplot=True)

# <demo> stop

# calculate sigma instead of FWHM
# FWHM = sigma * sqrt(8 ln 2)

fac0=numpy.sqrt(8*numpy.log(2))
sig=g0.fwhm.val/fac0

print "Sigma: ",sig,"Position: ", g0.pos.val

# write binned histogram arrays to file

sherpa.save_arrays("hist.dat",[hist.xlo,hist.xhi,hist.y],
fields=["xlo","xhi","y"], clobber=True)

# <demo> stop

Friday, November 5, 2010

Simple Simulation

A simple demo for ADASS 2010 to fake data with Poisson noise and fit.

#!/usr/bin/env python

import sherpa.astro.ui as sherpa

# create simple grid [0.1, 10] with a bin size of 0.01
sherpa.dataspace1d(0.1,10,0.01, dstype=sherpa.Data1D)

# <demo> stop

# shorthand for normgauss1d model
gauss = sherpa.normgauss1d

# define reference model as sum of two normalized gaussians
sherpa.set_model(gauss.ref1+gauss.ref2)

# initialize parameter values
ref1.pos = 2
ref1.fwhm = 2.5

# define area under curve ref1
ref1.ampl = 75

ref2.pos = 8

ref2.fwhm = 5

# define area under curve ref2
ref2.ampl = 125

sherpa.plot_model()

# <demo> stop

# evaluate the model, add poisson noise, and populate the dataspace
sherpa.fake()

# plot faked data as 1D line plot with errorbars
sherpa.plot_data()

# <demo> stop

# fit the faked data with new instances of normgauss1d

sherpa.set_model(gauss.g1+gauss.g2)

# narrow the parameter space boundaries according to data
sherpa.guess(g1)

g1.pos = 2

sherpa.freeze(g1)

sherpa.guess(g2)
g2.pos = 8

sherpa.fit()

sherpa.plot_fit()

# <demo> stop

sherpa.thaw(g1)

sherpa.fit()

sherpa.plot_fit()

# <demo> stop

# Run confidence to estimate limits of best-fit parameter values
sherpa.conf()

# Overplot reference model to show goodness-of-fit
# The reference model is show in orange compared to fitted model in red.
sherpa.set_model(ref1+ref2)

sherpa.get_model_plot_prefs()['linecolor']='aquamarine'
sherpa.plot_model(overplot=True)

# <demo> stop