/* --------------------------------------------------------------------- */ /* */ /* BAYESIAN MULTIMAPPER / INBRED LINE CROSSES Version 1.0 */ /* */ /* (c) Copyright 1996-2006 by Mikko J. Sillanpaa */ /* */ /* Rolf Nevanlinna Institute */ /* FIN-00014 University of Helsinki, FINLAND */ /* */ /* E-mail : mjs@rolf.helsinki.fi */ /* */ /* last updated : February 2002 */ /* */ /* */ /* */ /* Environmental covariates */ /* */ /* */ /* --------------------------------------------------------------------- */ /* This file is part of the Multimapper software. */ /* The Multimapper software has been written only for scientific use. */ /* There is absolutely no warranty in the Multimapper software. */ /* You are using the Multimapper software completely at your own risk. */ /* Beware! this is the version 1.0, the Multimapper software might */ /* contain some errors or bugs! */ /* */ /* --------------------------------------------------------------------- */ /* */ /* file name: metropolis.c */ /* */ /* This is a program that runs Metropolis-Hastings sampler */ /* with a reversible jumps for an inbred line cross */ /* offspring data. */ /* */ /* */ /* --------------------------------------------------------------------- */ #include #include #include #define _XOPEN_SOURCE #include #include #include "data_structure.h" #include "conversions.h" #define pi M_PI /* --------------------------------------------------------------------- */ double log_likelihood (int N_bc, int Num_ind, regpa *reg, int take[MAX_N_BG_CONT], individuals *first, int trait,int genotype[3][MAX_N_INDIVID], int bgc_table[MAX_N_INDIVID][MAX_N_BG_CONT], int N_qtl, int num_cov, int cov_index[MAX_N_COV], int cov_type [MAX_N_COV]) /* --------------------------------------------------------------------- */ { int i,j,k,index2,q; double l1,residual; l1=Num_ind*log(1.0/sqrt(2.0*pi*(reg->sigma))); for (i=0;iphenotype[trait]-reg->a; for (k=0;kcov[k][0])*(first->phenotype[cov_index[k]]); else residual-=(reg->cov[k][(int) first->phenotype[cov_index[k]]]); for (q=0;qb[q][genotype[q][i]]; for (j=0;jbc[j][index2]; } l1=l1+((-1.0/(2.0*(reg->sigma)))*residual*residual); first=first->next; } return(l1); } /* --------------------------------------------------------------------- */ double ind_log_likelihood (int N_bc, regpa *reg, int take[MAX_N_BG_CONT],double phenotype[10], int genotype[3][MAX_N_INDIVID], int bgc_table[MAX_N_INDIVID][MAX_N_BG_CONT], int individual, int N_qtl,int trait, int num_cov, int cov_index [MAX_N_COV], int cov_type [MAX_N_COV]) /* --------------------------------------------------------------------- */ { int j,k,index2,q; double l1,residual; l1=log(1.0/sqrt(2.0*pi*(reg->sigma))); residual=phenotype[trait]-reg->a; for (k=0;kcov[k][0])*(phenotype[cov_index[k]]); else residual-=(reg->cov[k][(int) phenotype[cov_index[k]]]); for (q=0;qb[q][genotype[q][individual]]; for (j=0;jbc[j][index2]; } l1=l1+((-1.0/(2.0*(reg->sigma)))*residual*residual); return(l1); } /* --------------------------------------------------------------------- */ void make_take (int take[MAX_N_BG_CONT],int bgc[MAX_N_BG_CONT][2], int N_bc, int cur_chr) /* --------------------------------------------------------------------- */ { int i,current; current=cur_chr+1; for (i=0;i0) { *R1=(q[f].r1-q[minl_i].r1)/(1.0-(2.0*q[minl_i].r1)); *left_i=minl_i; } if (right>0) { *R2=(q[f].r2-q[minr_i].r2)/(1.0-(2.0*q[minr_i].r2)); *right_i=minr_i; } if ((left>0) && (right>0)) return(3); else { if (left>0) return(1); else return(2); } } } /* --------------------------------------------------------------------- */ void get_regression_proposals(int f, int N_gen, int N_bc, double range[5], regpa *regress, int num_cov, int cov_type[MAX_N_COV]) /* --------------------------------------------------------------------- */ /* vector regress need to contain last round's accepted parameter */ /* values and range contain window sizes for parameters. */ /* */ /* --------------------------------------------------------------------- */ { double half,half2,ran1,ran2; int i,j,k; half=range[2]*0.5; /* X x 0.5= X / 2.0 */ half2=range[3]*0.5; ran1=range[2]; ran2=range[3]; if (f==0) { (*regress).a=(*regress).a-(range[0]*0.5)+((rand()*range[0])/RAND_MAX); (*regress).sigma=(*regress).sigma-(range[1]*0.5)+((rand()*range[1])/RAND_MAX); for (i=0;istart; while(ptr->chr->chromosome_number!=cur_chr) ptr=ptr->next; for (j=0;jgenotype; if (proposal<0) { if (proposal==-1000) { index=rand()%N_gen; proposal=g_possible[index]; } else { while(proposal<0) { index=rand()%N_gen; pro=g_possible[index]; if (pro==-proposal) proposal=pro; else { X1=(int) pro/1000; X2=pro%1000; if (X1==-proposal || X2==-proposal) proposal=pro; } } } } gene[i][j]=proposal; ptr=ptr->next; } start=start->next; } } /* --------------------------------------------------------------------- */ void change_coding(int N_ind, int convert [MAX_N_GENOTYP*2][2], individuals *start) /* --------------------------------------------------------------------- */ /* This function is called only by function readin_gpossible */ /* --------------------------------------------------------------------- */ { int i,k; geno_type *ptr; for (i=0;istart; while (ptr!=NULL) { k=0; while(ptr->genotype!=convert[k][0]) k++; ptr->genotype=convert[k][1]; ptr=ptr->next; } start=start->next; } } /* --------------------------------------------------------------------- */ void make_genotype(int N_ind, int N_gen, double p[MAX_N_INDIVID][MAX_N_GENOTYP], int genotype[MAX_N_INDIVID]) /* --------------------------------------------------------------------- */ { int i,j,s; double k,sum,add_in[MAX_N_GENOTYP]; for (i=0;iadd_in[s]) s++; genotype[i]=s; } } /* --------------------------------------------------------------------- */ void change_cur (int N_bc, int N_ind, int cur_chr, int bgc [MAX_N_BG_CONT][2], int bgc_new [MAX_N_INDIVID] [MAX_N_BG_CONT], int bgc_acc [MAX_N_INDIVID] [MAX_N_BG_CONT]) /* --------------------------------------------------------------------- */ { int i,j; cur_chr++; for (i=0;i=0) { indexi=0; while (proposal!=g_possible[indexi]) indexi++; } else { if (proposal==-1000) { indexi=rand()%N_gen; } else { while(proposal<0) { index=rand()%N_gen; pro=g_possible[index]; if (pro==-proposal) { proposal=pro; indexi=index; } else { X1=(int) pro/1000; X2=pro%1000; if (X1==-proposal || X2==-proposal) { proposal=pro; indexi=index; } } } /* while */ } /* else */ } /* if */ bgc_table2[i][j]=indexi; } /* for */ } /* --------------------------------------------------------------------- */ void make_bgc_table(int N_ind,int N_bc, int bgc [MAX_N_BG_CONT][2], int bgc_table [MAX_N_INDIVID] [MAX_N_BG_CONT], individuals *start) /* --------------------------------------------------------------------- */ { int i,j; geno_type *ptr; for (i=0;istart; while (bgc[j][0]!=ptr->chr->chromosome_number) ptr=ptr->next; while (bgc[j][1]!=ptr->marker->marker) ptr=ptr->next; bgc_table[i][j]=ptr->genotype; } start=start->next; } } /* --------------------------------------------------------------------- */ void readin_randomwalk(int *cur_chr, int *cur_trait, int *maxround, double *y, double *window, double range[5], double start_point[3], int *permission, int *design, int *num_cov, int cov_index[MAX_N_COV], int cov_type[MAX_N_COV]) /* --------------------------------------------------------------------- */ { FILE *filep; char line[250],temp[250]; int i; filep=fopen("randomwalk.file","r"); if (filep==NULL) { fprintf(stderr,"Cannot find file named randomwalk.file !!!!\n"); exit(1); } for (i=0;i<10;i++) { fgets(line, sizeof(line),filep); strncpy(temp,line,9); temp[9]='\0'; if (i==0) *cur_chr=atoi(temp)-1; if (i==1) *cur_trait=atoi(temp)-1; if (i==2) *maxround=atoi(temp); if (i==3) { start_point[0]=atof(temp); strncpy(temp,line+10,9); temp[9]='\0'; start_point[1]=atof(temp); strncpy(temp,line+20,9); temp[9]='\0'; start_point[2]=atof(temp); } if (i==4) *y=atof(temp); if (i==5) *window=atof(temp); if (i>5) range[i-6]=atof(temp); } fgets(line, sizeof(line),filep); strncpy(temp,line,9); temp[9]='\0'; *permission=atoi(temp); fgets(line, sizeof(line),filep); strncpy(temp,line,9); temp[9]='\0'; *design=atoi(temp); fgets(line, sizeof(line),filep); strncpy(temp,line,4); temp[4]='\0'; *num_cov=atoi(temp); for (i=0;i<*num_cov;i++) { strncpy(temp,line+5+(i*4),3); temp[3]='\0'; cov_index[i]=atoi(temp); } for (i=0;i<*num_cov;i++) { strncpy(temp,line+5+((*num_cov)*4)+(i*4),3); temp[3]='\0'; cov_type[i]=atoi(temp); } fclose(filep); printf("now randomwalk.file succesfully read in ... \n"); } /* --------------------------------------------------------------------- */ void readin_priorlimits(double limit[4][2],double lx_limit[2]) /* --------------------------------------------------------------------- */ { FILE *filep; char line[250],temp[250]; int i; filep=fopen("priorlimits.file","r"); if (filep==NULL) { fprintf(stderr,"Cannot find file named priorlimits.file !!!!\n"); exit(1); } for (i=0;i<5;i++) { fgets(line, sizeof(line),filep); strncpy(temp,line,9); temp[9]='\0'; if (i<4) limit[i][0]=atof(temp); else lx_limit[0]=atof(temp); strncpy(temp,line+10,9); temp[9]='\0'; if (i<4) limit[i][1]=atof(temp); else lx_limit[1]=atof(temp); } fclose(filep); printf("now priorlimits.file succesfully read in ... \n"); } /* --------------------------------------------------------------------- */ void readin_bg_controls(int N_bc,int bgc[MAX_N_BG_CONT][2]) /* --------------------------------------------------------------------- */ { FILE *filep; char line[250],temp[250]; int i; filep=fopen("bg_controls.file","r"); if (filep==NULL) { fprintf(stderr,"Cannot find file named bg_controls.file !!!!\n"); exit(1); } for (i=0;ial2) alleleshare=al1; else alleleshare=al2; if (alleleshare==2) p1=1.0-R1; else p1=R1; al1=(X1==MR1)+(X2==MR2); al2=(X1==MR2)+(X2==MR1); if (al1>al2) alleleshare=al1; else alleleshare=al2; if (alleleshare==2) p2=1.0-R2; else p2=R2; p[j][i]=p1*p2; total[j]=total[j]+p[j][i]; } /* for j */ } /* for i */ for (j=0;ja; (*target).sigma=source->sigma; for(i=0;ib[j][i]; for(j=0;jbc[j][i]; } for (i=0;icov[i][0]; for (k=1;kcov[i][k]; } } /* --------------------------------------------------------------------- */ void update_bgc(int N_ind, int N_bc, int target[MAX_N_INDIVID][MAX_N_BG_CONT], int source[MAX_N_INDIVID][MAX_N_BG_CONT]) /* --------------------------------------------------------------------- */ { int i,j; for(i=0;i Backcross (%d genotypes)\n",N_gen); else if (design==2) fprintf(fp2," Design 2 => F2 (%d genotypes)\n",N_gen); else fprintf(fp2," Design %d => UNKNOWN DESIGN (%d genotypes)\n",design,N_gen); fprintf(fp2,"----------------------------\n"); fprintf(fp2," Background controls :\n"); for (i=0;i1000) possi=1; } if (possi==0) { fprintf(fp2," !!!!!! you have only made homozygote genotypes !!!!\n"); fprintf(fp2," !!!!!! please check that your codesystem.file is correct !!!!\n"); } fprintf(fp2,"----------------------------\n"); srand(clock()); /* shuffle rand generator */ fp=fopen("MH_output__lx0","w"); fp0=fopen("MH_output__lx1","w"); fp1=fopen("MH_output__lx2","w"); fp3=fopen("MH_output__logL","w"); /* fp4=fopen("MH_output__regr","w"); */ fp5=fopen("MH_output__Nqtl","w"); fr0=fopen("MH_output__reg_sigma_a","w"); fr1=fopen("MH_output__reg0","w"); fr2=fopen("MH_output__reg1","w"); fr3=fopen("MH_output__reg2","w"); fr20=fopen("MH_output__env","w"); /* print_start_of_file(fp,(cur_chr+1),max_round,limit,range,window); print_start_of_file(fp3,(cur_chr+1),max_round,limit,range,window); */ print_start_of_file(fp2,(cur_chr+1),max_round,limit,range,window,y); /* print_start_of_file(fp4,(cur_chr+1),max_round,limit,range,window); */ fflush(NULL); /* for debugging */ /* ------------------------------------------------------------------- */ /* */ /* Here we calculate current chromosomal lenght */ /* */ /* ------------------------------------------------------------------- */ lenght=0.0; while(cur_chr!=(first->chromosome_number-1)) first=first->next; ptr=first->start; i=0; cumul[0]=0.0; while (ptr!=NULL) { lenght+=ptr->r; ptr_table[i]=ptr; cumul[++i]=lenght; ptr=ptr->next; } cumul[i+1]=0.0; fprintf(fp2,"Lenght of chromosome #%d is %f cM\n",cur_chr+1,lenght); if (window>=lenght) { printf("------------------------------------------------\n"); printf("ERROR : CHECK YOUR PROPOSAL RANGES!!!!!!!\n"); printf("PROPOSAL RANGE FOR QTL LOCATION PARAMETERS (%f)\n",window); printf("IS HIGHER OR EQUIVALENT TO THE LENGTH OF\n"); printf("THE CHROMOSOME #%d (%f)\n",cur_chr+1,lenght); printf("------------------------------------------------\n"); exit(1); } for (i=0;i<3;i++) if(acc_loca[i]>lenght) { printf("------------------------------------------------\n"); printf("ERROR: INITIAL LOCATION OUT OF MARKER MAP!!!!\n"); printf("PLEASE MODIFY INITIAL LOCATIONS IN FILE : randomwalk.file\n"); printf("Lenght of chromosome #%d is %f cM\n",cur_chr+1,lenght); printf("------------------------------------------------\n"); exit(1); } for (i=0;i<12;i++) printf("cumul[%d]=%f\n",i,cumul[i]); if (lx_limit[0]<0) lx_limit[0]=0; if (lx_limit[1]>lenght) lx_limit[1]=lenght; /* ------------------------------------------------------------------- */ /* acc_loca=lenght*0.5; */ /* starting value */ p_add=1.0/y; p_del=p_add; /* for initialization */ for (i=0;i<(Num_marks-1);i++) { wg[i]=Haldane_to_recomb(ptr_table[i]->r); if (wg[i]==0.0) { printf("------------------------------------------------\n"); printf("ERROR : DISTANCE BETWEEN TWO MARKERS IS 0.0 cM !!!!!\n"); printf("PLEASE DELETE ONE MARKER OR REPLACE ZERO DISTANCE\n"); printf("WITH SOME SMALL NON-ZERO VALUE IN .map\n"); printf("THIS ERROR OCCURRED IN CHROMOSOME %d\n",cur_chr+1); printf("------------------------------------------------\n"); exit(1); } } if (design==1) BCross_mrk_prior(N_ind,Num_marks,wg,acc_prior,a_gene); else F2_mrk_prior(N_ind,Num_marks,wg,acc_prior,a_gene); /*----------------( M-H iteration starts from here )----------------*/ while (round0 */ /* 2) add one new QTL, if N_qtl<3 */ /* 3) propose new location(s) of current QTL(s) */ /* */ /* ------------------------------------------------------------------- */ chance=(((double)rand())/RAND_MAX); add_one=(chancelocation) in--; if (location==lenght) in--; q_propo.l_x=location; q_propo.ML=in; q_propo.MR=in+1; q_propo.g_l=ptr_table[in]; dist=location-cumul[in]; q_propo.r1=Haldane_to_recomb(dist); dist=Haldane_to_recomb(q_propo.g_l->r); q_propo.r2=(dist-q_propo.r1)/(1.0-(2.0*q_propo.r1)); if (location==lenght) { temp_d=q_propo.r1; q_propo.r1=q_propo.r2; q_propo.r2=temp_d; } cp_field(N_qtl,(N_qtl+1),q_new,q,q_propo); flank_type=flanking_objects(N_qtl,q_new,&R1,&R2,(N_qtl+1), N_ind, &left_i, &right_i); x_prior_distribution(N_ind,q_propo,N_gen,g_possible,p,a_gene,design, flank_type,left_i,right_i,R1,R2,acc_geno); make_genotype(N_ind,N_gen,p,acc_geno[N_qtl]); for (i=0;ilx_limit[1])) /*|| ((f==0) && (acc_loca[1]-locationlocation) in--; if (location==lenght) in--; q_propo.l_x=location; q_propo.ML=in; q_propo.MR=in+1; q_propo.g_l=ptr_table[in]; /* q_propo.g_r=ptr_table[in+1]; */ dist=location-cumul[in]; q_propo.r1=Haldane_to_recomb(dist); /* from Haldane map function X2=(X3-X1)/(1-2X1), because X3=X1+X2-2X1X2 */ dist=Haldane_to_recomb(q_propo.g_l->r); q_propo.r2=(dist-q_propo.r1)/(1.0-(2.0*q_propo.r1)); if (location==lenght) { temp_d=q_propo.r1; q_propo.r1=q_propo.r2; q_propo.r2=temp_d; } /* ------------------------------------------------------------------- */ /* Here is core for updating QTL location */ /* ------------------------------------------------------------------- */ cp_field(f,N_qtl,q_new,q,q_propo); flank_type=flanking_objects(f,q_new,&R1,&R2,N_qtl, N_ind, &left_i, &right_i); x_prior_distribution(N_ind,q_propo,N_gen,g_possible,p,a_gene,design, flank_type,left_i,right_i,R1,R2,acc_geno); /* x_prior_distribution(N_ind,q_propo,N_gen,g_possible,p,a_gene,design);*/ if (round!=0) { flank_type=flanking_objects(f,q,&R1,&R2,N_qtl, N_ind, &left_i, &right_i); x_prior_distribution(N_ind,q[f],N_gen,g_possible,p1,a_gene,design, flank_type,left_i,right_i,R1,R2,acc_geno); /* x_prior_distribution(N_ind,q[f],N_gen,g_possible,p1,a_gene,design); */ /* tarkista acc_geno:n parametrivalityksen oikeellisuus */ make_p_ratio(N_ind,acc_geno[f],acc_geno[f],p,p1,&ratio); } chance=(((double)rand())/RAND_MAX); if ((round==0)||(chance<=ratio)) { q[f].l_x=q_propo.l_x; q[f].ML=q_propo.ML; q[f].MR=q_propo.MR; q[f].g_l=q_propo.g_l; q[f].r1=q_propo.r1; q[f].r2=q_propo.r2; acc_loca[f]=location; /* make_take(take,bgc,q.ML,q.MR,N_bc,cur_chr); */ } else num_reject[f][0]++; } /* ------------------------------------------------------------------- */ /* */ /* Here we update QTL genotypes individually */ /* */ /* ------------------------------------------------------------------- */ /* x_prior_distribution(N_ind,q,N_gen,g_possible,p,a_gene,design); */ flank_type=flanking_objects(f,q,&R1,&R2,N_qtl, N_ind, &left_i, &right_i); x_prior_distribution(N_ind,q[f],N_gen,g_possible,p,a_gene,design, flank_type,left_i,right_i,R1,R2,acc_geno); make_genotype(N_ind,N_gen,p,genotype[f]); if ((round==0) && (f==0)) { make_genotype(N_ind,N_gen,p,acc_geno[0]); make_genotype(N_ind,N_gen,p,acc_geno[1]); make_genotype(N_ind,N_gen,p,acc_geno[2]); } start_1=start; for (i=0;iphenotype, genotype,bgc_acc,i,N_qtl,trait, num_cov,cov_index,cov_type); l11=ind_log_likelihood (N_bc,®_acc,take,start_1->phenotype, acc_geno,bgc_acc,i,N_qtl,trait, num_cov,cov_index,cov_type); chance=(((double)rand())/RAND_MAX); if ((round==0) || (chance<=exp(l01-l11))) acc_geno[f][i]=genotype[f][i]; else mup3[f]++; start_1=start_1->next; } } } /* ------------------------------------------------------------------- */ /* */ /* Here we update marker genotype matrix */ /* in current chromosome under the search. */ /* */ /* ------------------------------------------------------------------- */ get_marker_proposal (N_ind,Num_marks,N_gen,gene,g_possible,start,cur_chr); if (design==1) BCross_mrk_prior(N_ind, Num_marks,wg,mc_prior,gene); else F2_mrk_prior(N_ind, Num_marks,wg,mc_prior,gene); for (f=0;f limit[0][1])) fail=1; if ((regress.sigma limit[1][1])) fail=1; for (i=0;i limit[0][1])) fail=1; for (m=1;m limit[0][1])) fail=1; } } if (f==1) { for (i=0;i limit[2][1])) fail=1; for (j=0;j limit[3][1])) fail=1; } } if (!fail) { l0=log_likelihood(N_bc,N_ind,®ress,take,start,trait,acc_geno, bgc_acc,N_qtl,num_cov,cov_index,cov_type); l1=log_likelihood(N_bc,N_ind,®_acc,take,start,trait,acc_geno, bgc_acc,N_qtl,num_cov,cov_index,cov_type); if ((((double)rand())/RAND_MAX)<=exp(l0-l1))/* accept with prob (l0/l1)*/ { cp_regress(N_gen,N_bc,®_acc,®ress,N_qtl,num_cov,cov_type); /* reg_acc=regress */ /* l1=l0; */ } else num_rej[f]++; } else num_rej[f]++; } /* ------------------------------------------------------------------- */ /* */ /* Here we update BG control parameters locating in other chromosomes */ /* and check an acceptance */ /* */ /* ------------------------------------------------------------------- */ bg_proposal (N_ind,N_bc,N_gen,bgc,bgc_table,bgc_new,g_possible); change_cur(N_bc,N_ind,cur_chr,bgc,bgc_new,bgc_acc); /* l0=log_likelihood(N_bc,N_ind,®_acc,take,start,trait,acc_geno,bgc_new); */ start_1=start; for (i=0;iphenotype, acc_geno,bgc_new,i,N_qtl,trait,num_cov,cov_index, cov_type); l11=ind_log_likelihood (N_bc,®_acc,take,start_1->phenotype, acc_geno,bgc_acc,i,N_qtl,trait,num_cov,cov_index, cov_type); chance=(((double)rand())/RAND_MAX); if (chance<=exp(l01-l11)) { for(j=0;jnext; } /* accept with prob (l0/l1) */ /* if ((((double)rand())/RAND_MAX)<=exp(l0-l1)) { update_bgc(N_ind,N_bc,bgc_acc,bgc_new); l1=l0; } else mup2++; */ if (N_qtl>0) fprintf(fp,"%12.7f\n",acc_loca[0]); if (N_qtl>1) fprintf(fp0,"%12.7f\n",acc_loca[1]); if (N_qtl>2) fprintf(fp1,"%12.7f\n",acc_loca[2]); fprintf(fp5,"%8d\n",N_qtl); if (permission) { true_inter=reg_acc.a; for (i=0;i0) { for (i=0;i1) { for (i=0;i2) { for (i=0;i