/* FITTING ROUTINE -- WITH MODIFIED DATA TARGETS AND 4 INPUT PARAMETERS
This is a shell program which calls 'amebsa' from Numerical Recipes, a downhill
simplex method with simulated annealing. This shell program is tailored to the
TB individual-based model, where 'funx' and 'gof' were written by Adrienne Keen
and somewhat specific to the TB model. The rest of the code was adapted from
code for a testing program for amebsa fitting, 'atest' written by Clarence
Lehman in 2004, which in turn was based on the Numerical Recipes code for
amebsa.
Note: ptex is a batch file to convert these into printed page), calls tex and
then calls things to make dvi out of it.
Update: code modified to include parallel processing of separate calls to TB
program. This is done by integrating the MPI modules in mpi.c, written by
Clarence, based on his ideas and commands given to us by those at MSI.
Update2: This version reads in modified data targets and accepts only four
variable parameters, three disease risks and 'df'.
Example new program call:
1 2 3 4
tb32i df=3 d1uk20=0.15 d2uk20=0.0004 d3uk20=0.09
*/
#include
#include
#include
#include
#include
#include "nrutil.h"
#include "mpi.h"
#include "fileio.h"
#define X //Temporary marker on lines recently added or changed.
#define ITER 25 //Number of iterations at a given temperature.
#define W .8 //Displacement in the starting simplex.
#define NPROC 500 //Maximum number of processors.
#define RED .7 //Temperature reduction factor.
#define THRESH .01 //Convergence threshold for parameter convergence.
//Define statements specific to TB model
#define AC 4 //Number of age classes for notification rates.
#define SEX 2 //Number of sexes.
#define ROB 3 //Number of regions of birth in the model.
#define YR 11 //Number of years of model output for fitting
#define IND (AC*SEX*ROB*YR) //to notification data (1999-2009).
#define NOTIF 0 //Target data version, 0=rates, 1=numbers.
#define AR (r==2? 1: a) //Index forcing age class 1 for SSA. X
//Function prototypes
double converge(int,int,double[]); //Test for convergence of parameters.
float funx(float *); //Call TB model and evaluate fit of
//output to calibration targets.
float gof(float[ROB][YR][SEX][AC]); //Evaluate discrepancy between model
//output and data (targets).
int mpOpen(); //Open parallel processing.
int mpClose(); //Close parallel processing.
int mpCommon(double[][],double[],int); //Gather output from multiple processors.
void simo(float[ROB][YR][SEX][AC],double[NPROC][ROB][YR][SEX][AC],double[ROB][YR][SEX][AC],double[ROB][YR][SEX][AC]);//Write simulation output to tex file.
int Error(double);
int Error1(double,char*,double);
int Error2(double,char*,double,char*,double);
int Error3(double,char*,double,char*,double,char*,double);
#define forROB for(r=0; rNPROC) exit(3); //Check for appropriate number
//of processors.
char fitout[100]; //Create string for output file and
sprintf(fitout,"fitR56-%02d.txt",cproc); //open file.
fout=fopen(fitout,"w");
if(fout==NULL)
{ printf("Error opening output file!"); exit(3); }
if(cproc==0)
{ plot = fopen("plotR56.tex","w"); //Open file for simulation output
if(plot==NULL) //and print headers.
{ printf("Error opening output file!"); exit(3); }
for(i=0; macro[i]; i++) //Opening all graphs.
fprintf(plot, "\\input %s.tex\n", macro[i]);
fprintf(plot, "\n"); }
fprintf(fout,"Starting fit5, cproc=%d and nproc=%d\n",cproc,nproc); fflush(fout);
fprintf(fout,"temptr = %f iter = %d and temptr red factor = %f\n",temptr,iter,RED); fflush(fout);
if(NOTIF) //Read in data targets, where
{ if(ROB==3) //appropriate file depends on
FileIO("targc1n.txt",fmt2[0],"r|"); //model version (SSA/nonSSA) and
else //whether fitting to case numbers
FileIO("targc0n.txt",fmt2[0],"r|"); } //or notification rates (NOTIF=1
else //or NOTIF=0).
{ if(ROB==3)
FileIO("targc1.txt",fmt2[0],"r|");
else
FileIO("targc0.txt",fmt2[0],"r|"); }
char tout[100]; //For testing: Write out targets.
sprintf(tout,"tout%02d.txt",cproc);
FileIO(tout,fmt2[0],"w|");
if(argc>1) temptr = atof(argv[1]); //Retrieve starting temperature.
p = matrix((long)1, (long)ndim+1, //Allocate and initialize
(long)1, (long)ndim); //the starting simplex.
p[1][1]=1.841155; p[1][2]=0.163031; p[1][3]=0.001048; p[1][4]=0.0755;
p[2][1]=0.560994; p[2][2]=0.106223; p[2][3]=0.00038; p[2][4]=0.169378;
p[3][1]=1.297929; p[3][2]=0.07736; p[3][3]=0.000933; p[3][4]=0.089043;
p[4][1]=2.534156; p[4][2]=0.099439; p[4][3]=0.001034; p[4][4]=0.027495;
p[5][1]=1.822337; p[5][2]=0.094479; p[5][3]=0.000948; p[5][4]=0.086628;
pb = vector((long)1, (long)ndim+1);
//- fprintf(fout,"About to get funx value at initial simplex points\n"); fflush(fout);
y = vector((long)1, (long)ndim+1); //Allocate and initialize the
for(i=1; i<=ndim+1; i++) //objective function values at
y[i] = funx(p[i]); //each vertex of the initial simplex.
//- fprintf(fout,"About to start sim annealing loop \n"); fflush(fout);
for(yb=10000000; 1; temptr*=RED) //Run through the annealing.
{ iter = ITER;
amebsa(p, y, ndim, pb, &yb, ftol, funx, &iter, temptr);
fprintf(fout,"! %% temptr=%f idum=%ld iter=%d yb=%f pb=%f,%f,%f,%f\n\n",
temptr, idum, iter, yb, pb[1],pb[2],pb[3],pb[4]); fflush(fout);
if(iter>0) break; }
fclose(fout); //Close annealing output file.
if(cproc==0) //If this is first processor,
fclose(plot); //close simulation output file.
mpClose(); //Close parallel processing.
exit(0); //Return to operating system.
}
/*-----------------------------------------------------------------------------
GOODNESS OF FIT
This function evaluates and returns the goodness-of-fit (GOF), using the sum of
least squares, or other GOF statistic, of two arrays. Originally, the sum of
least squares statistic was used for simplicity, though this is currently not
implemented. Instead, a related method is used, but for any given age/sex/rob
category, the target value (number of cases or notification rate) and simulation
output value are both divided by the target value. This is done because the
notification rates (and case numbers) are essentially on different scales for
different categories. For example, SSA notification rates are around 250 per
100,000 each year, for the age category used for fitting. In UK-born, for the
same age category, notification rates would be on the order of 6 per 100,000
each year. In addition, an R-squared option can be chosen or a log-likelihood
deviance.
ENTRY: 'si' contains values from simulation output.
'tar2' contains the calibration targets or observed data.
'STAT' contains the type of GOF statistic to be used.
0 Sum of squared deviations, relative to the observed value.
1 Log likelihood deviance function.
2 R-squared, fraction of variance explained among categories.
X 3 Normalized squared deviation from the mean.
X 4 Normalized absolute deviation from the median (not implemented).
EXIT: 'gof' returns the goodness-of-fit of 'si' to 'tar2'.
*/
#define STAT 2
#define INF 1E100
float gof(float si[ROB][YR][SEX][AC])
{ float x,x1,x2,rs,rn; int r,y,s,a; X
// --- SQUARED DEVIATION FITTING
//- fprintf(fout,"About to calculate GOF\n"); fflush(fout);
if(STAT==0) //Get discrepancy between model and data
{ x=0; //using sum of least squares method.**
for(r=0; r<2; r++) //First add discrepancy for for non-UK and
for(y=0; y maxi[r][y][s][a])
maxi[r][y][s][a] = glob2[n][r][y][s][a]; }
x=gof(sim2); //Obtain GOF and check GOF is the
local2[0]=x; local2[1]=0; //same for all processors.
mpCommon(global2,local2,2);
for(i=1; i=0 && w125?50: max>50?25: 10) //Vertical resolution on graph size.
#define HS 4.0 //Horizontal graph size, inches.
#define HU 10.0 //Horizontal graph units.
#define Phi 1.618 //Horizontal to vertical graph ratio.
#define CBL "{" //Left curly brace, for balancing.
#define CBR "}" //Right curly brace, for balancing.
static int figure = 0; //Running figure number.
static char *wsex[] = { "Male", "Female" };
static char *wage[] = { "black ", "blue ", "green", "red " };
static char *wrob[] = { "Non-UK born", "UK-born", "SSA-born" };
void simo(float sim[ROB][YR][SEX][AC], double glob2[NPROC][ROB][YR][SEX][AC],double mini[ROB][YR][SEX][AC],double maxi[ROB][YR][SEX][AC])
{
int i,n,k,r,y,s,a; double max;
//- fprintf(fout,"Entering 'simo'\n"); fflush(fout);
fprintf(plot,"\\overfullrule=0pt\n");
for(r=ROB-1; r>=0; r--) //Determine the vertical scale for the
for(s=0; s<=1; s++) //current graph (auto-scaling).
{ max = -1E9;
for(a=0; a\n", HS/HU, (HS/Phi)/k);
fprintf(plot,"\\setplotarea x from 0 to 10, y from 0 to %d \n", k);
fprintf(plot,"\\plotheading {%s %s Notification Rates}\n", wrob[r], wsex[s]);
fprintf(plot,"\\axis bottom %%label {Year}\n");
fprintf(plot," ticks withvalues {1999} {2001} {2003} {2005} {2007} {2009} /\n");
fprintf(plot," at 0 2 4 6 8 10 / /\n");
fprintf(plot,"\\axis left label {\\vertical{\\lines{Cases per 100,000\\cr\\cr}}}\n");
fprintf(plot," ticks numbered from 0 to %d by %d /\n", k, k<=10?2: k<=30?5: k<=150?25: 50);
fprintf(plot,"\\axis right / \\axis top /\n");
for(a=0; a\n");
printf("\\setplotarea x from -6 to 6, y from -6 to 6\n");
printf("\\plotheading {$T=%.4f$}\n", temptr);
printf("\\putrule from -6 0 to 6 0\n");
printf("\\putrule from 0 -6 to 0 6\n");
printf("\\put {$\\bullet$} at 0 0\n");
printf("\\put {$\\bullet$} at 2 2\n");
printf("\\plot"); //Note: plotting should be fixed
for(i=1; i<=ndim+1; i++) //for the 4 parameters!!!
printf(" %f %f ", p[i][1], p[i][2]);
printf(" %f %f /\n", p[1][1], p[1][2]);
for(i=1; i<=ndim+1; i++)
printf("\\put {%.3g} at %f %f\n", y[i], p[i][1], p[i][2]);
printf("\\endpicture $$\n\n");
*/
}
// ADRIENNE KEEN, APRIL 2011.