//***************************************************************************************************
//***************************************************************************************************
//An MCMC algorithm for detecting short adjacent repeats shared by multiple sequences
//Series: G (One segment per one sequence with gap allowed)
//Version: 3.6x
//Created: 07/07/2010; Last update: 13/04/2011
//Author: LI Qiwei
//***************************************************************************************************
//***************************************************************************************************


#include <iostream>
#include <string>
#include <cmath>
#include <ctime>
#include <fstream>
#include <iomanip>
#include "BASARD.h"
#include "stdlib.h"
using namespace std;


double BASARD(int type,int N,int J,int G,int Omega,double epsilon,double epsilon_2,double tao,double *Q,int I,char *inputfile,char *outputfile,int ss)
{
	//Record time
	clock_t start,finish;
	double runtime;
	start = clock();


	//Valuables for indirectly using
	int i,j,element_number,letter = 0;
	if(type == 1)
	{
		element_number = 20;
	}
	else
	{
		element_number = 4;
	}
	int *counter = new int[element_number];
	memset(counter,0,sizeof(int)*element_number);


	//Open the file to read the sequences
	ifstream infile(inputfile);
	if(!infile)
	{
		cout << "Unable to open the file!";
		exit(1);
	}
	
	char **R;
	R = new char *[N+1];
	for(i=0;i<=N;i++)
	{
		R[i] = new char[10000];
	}
	i = 0;
	while(infile)
	{
		infile.getline(R[i],10000);
		if(R[i][0]!='>')
		{
			i++;
		}
	}

	
	//Open the file to record the results
	ofstream outfile(outputfile);   //The report summary
	outfile.clear();


	//Basic settings
	//int N = getN(R);   //'N' is the number of sequences in dataset R
	int M = 10;   //'M' is the cycle of executing phase shifts
	int mu = J-1;   //'mu' is the maximum phase shifts number
	

	//Calculate the length of each sequence
	int *L = new int[N];
	memset(L,0,sizeof(int)*N);
	for(i=0;i<N;i++)
	{
		L[i] = strlen(R[i]);
	}


	//Calculate the number of each letter within each sequence
	int **letter_distribution;
	letter_distribution = new int *[element_number];
	for(i=0;i<element_number;i++)
	{
		letter_distribution[i] = new int[N];
		memset(letter_distribution[i],0,N*sizeof(int));
	}
	for(i=0;i<N;i++)
	{
		getValue(counter,0,element_number);
		for(j=0;j<L[i];j++)
		{
			adder(counter,R[i][j],type);
		}
		for(letter=0;letter<element_number;letter++)
		{
			letter_distribution[letter][i] = counter[letter];
		}
	}
	//***********************************************************************************************
	

	//Valuables for indirectly using
	int ii,k,m,aa,bb,cc,psn,opt,itemp,itemp2,lpsn,rpsn,flag = 0;
	double temp,sum,lamda,T = 0;
	double F,MF,F_2,MF_2,Fps;
	int *sum_counter = new int[element_number];
	memset(sum_counter,0,sizeof(int)*element_number);


	//Record the acceptance rate of each move
	int *receive = new int[6];
	memset(receive,0,sizeof(int)*6);
	int *accept = new int[6];
	memset(accept,0,sizeof(int)*6);


	//Initial the set of segment structures S
	int *atemp = new int[N];
	memset(atemp,0,sizeof(int)*N);
	randomabarray(atemp,L,N,0,Omega-1,J,G);
	int **s;
	s = new int *[N];
	for(i=0;i<N;i++)
	{
		s[i] = new int[Omega-1];
		memset(s[i],0,(Omega-1)*sizeof(int));
		getValue(s[i],-1,Omega-1);
		/*
		for(j=0;j<atemp[i];j++)
		{
			s[i][j] = randomab(0,G);
		}
		*/
	}
	int *cn = new int[N];
	memset(cn,0,sizeof(int)*N);
	for(i=0;i<N;i++)
	{
		cn[i] = getCopyNumber(s[i],Omega-1);
	}
	//Record the set of most probable segment structures S 
	int **final_s;
	final_s = new int *[N];
	for(i=0;i<N;i++)
	{
		final_s[i] = new int[Omega-1];
		memset(final_s[i],0,(Omega-1)*sizeof(int));
	}
	int *final_cn = new int[N];
	memset(final_cn,0,sizeof(int)*N);

	
	//Initial the set of segment locations A
	int *a = new int[N];
	memset(a,0,sizeof(int)*N);
	for(i=0;i<N;i++)
	{
		a[i] = randomab(0,L[i]-cn[i]*J-getGapNumber(s[i],Omega-1));
	}
	//Record the set of most probable segment locations A
	int *final_a = new int[N];
	memset(final_a,0,sizeof(int)*N);

	
	//Initial the motif matrix Theta
	double **q;
	q = new double*[element_number];
	for(i=0;i<element_number;i++)
	{
		q[i] = new double[J];
		memset(q[i],0,J*sizeof(double));
	}


	//Initial the background distribution Phi
	double *h = new double[element_number];
	memset(h,0,sizeof(double)*element_number);
	//***********************************************************************************************


	//Start MCMC algorithms
	cc = 0;
	for(ii=0;ii<I;ii++)
	{
		/*
		//Output the record of segment locations and copy numbers
		for(i=0;i<N;i++)
		{
			outfile<<a[i]<<" ";
		}
		outfile<<endl;
		for(i=0;i<N;i++)
		{
			outfile2<<cn[i]<<" ";
		}
		outfile2<<endl;
		*/


		//Start each sub-iteration
		F_2 = 0;
		for(i=0;i<N;i++)
		{
			//Step2.1 (Predictive update step): Fix A and S, update estimated Theta and Phi
			getValue(sum_counter,0,element_number);
			for(m=0;m<J;m++)
			{
				getValue(counter,1,element_number);
				for(j=0;j<N;j++)
				{
					if(j == i)
					{
						continue;
					}
					for(k=1;k<=cn[j];k++)
					{
						adder(counter,R[j][a[j]+getUnitPosition(s[j],Omega-1,k,J)+m],type);
					}
				}
				for(letter=0;letter<element_number;letter++)
				{
					q[letter][m] = double(counter[letter])/double(arraysum(cn,N)-cn[i]+element_number)*100;
					sum_counter[letter] = sum_counter[letter]+counter[letter]-1;
				}
			}
			for(letter=0;letter<element_number;letter++)
			{
				itemp = 0;
				for(j=0;j<N;j++)
				{
					if(j != i)
					{
						itemp = itemp+letter_distribution[letter][j];
					}
				}
				h[letter] = double(1+itemp-sum_counter[letter])/double(element_number+arraysum(L,N)-L[i]-(arraysum(cn,N)-cn[i])*J)*100;
			}
			
			
			//Step2.2 (Gibbs sampling step): Fix S and Theta, update A
			if((ss==1 && ii%M == 0)||ss==0)
			{
			itemp = L[i]-cn[i]*J-getGapNumber(s[i],Omega-1)+1;
			F_2 = F_2-log(double(itemp));
			if(itemp > 1)
			{
				double *p1 = new double[itemp];
				memset(p1,0,sizeof(double)*(itemp));
				for(m=0;m<itemp;m++)
			    {
					temp = 1;
				    for(j=0;j<J;j++)
				    {
						for(k=1;k<=cn[i];k++)
					    {
							temp = temp*multiplier(q,R[i][m+getUnitPosition(s[i],Omega-1,k,J)+j],j,type)/multiplier_2(h,R[i][m+getUnitPosition(s[i],Omega-1,k,J)+j],type);
						}
				    }					
				    p1[m] = pow(temp,1/tao);
				}
			    sum = 0;
				for(m=0;m<itemp;m++)
			    {
					sum = sum+p1[m];
				}
			    for(m=0;m<itemp;m++)
			    {
					p1[m] = p1[m]/sum;
					F_2 = F_2-p1[m]*log(p1[m]);
					if(m == 0)
					{
						continue;
					}
					p1[m] = p1[m]+p1[m-1];
				}
				temp = double(rand()%10001)/10000;
			    temp = temp+double(rand()%10001)/100000000;
			    for(m=0;m<itemp;m++)
				{
					if(temp <= p1[m])
				    {
						a[i] = m;
						break;
					}
				}
				delete [] p1;
			}
			}

			
			//Step2.3 (Metropolis-Hastings sampling): Fix A and Theta, update S
			double *p2 = new double[2];
			memset(p2,0,sizeof(double)*2);
			T = 0;   //'T' is the ratio of proposal densities
			temp = double(rand()%10001)/10000;
			for(j=0;j<5;j++)
			{
				if(temp <= Q[j])
				{
					opt = j;
					receive[j] = receive[j]+1;
					break;
				}
			}
			switch(opt)
			{
			case 0:   //Rear insertion
				{
					if(cn[i] >= Omega || a[i]+getUnitPosition(s[i],Omega-1,cn[i],J)+J+J > L[i])
					{
						break;
					}
					aa = getUnitPosition(s[i],Omega-1,cn[i],J)+J;
					bb = min(getUnitPosition(s[i],Omega-1,cn[i],J)+J+G,L[i]-a[i]-J);
					itemp = randomab(aa,bb);
					T = (Q[1]-Q[0])/Q[0]*double(bb-aa+1);
					temp = 1;
					for(j=0;j<J;j++)
					{
						temp = temp*multiplier(q,R[i][a[i]+itemp+j],j,type)/multiplier_2(h,R[i][a[i]+itemp+j],type);
					}
					lamda = pow(pow(epsilon_2,itemp-aa)*epsilon*temp,1/tao)*T;
					p2[0] = min(lamda,double(1));
					p2[1] = 1-p2[0];
					temp = double(rand()%10001)/10000;
					if(temp <= p2[0])
					{
						s[i][cn[i]-1] = itemp-aa;
						cn[i] = cn[i]+1;
						accept[0] = accept[0]+1;
					}
					break;
				}
			case 1: //Rear deletion
				{
					if(cn[i] == 1)
					{
						break;
					}
					itemp = getUnitPosition(s[i],Omega-1,cn[i],J);
					T = Q[0]/(Q[1]-Q[0])/double(G+1);
					temp = 1;
					for(j=0;j<J;j++)
					{
						temp = temp*multiplier_2(h,R[i][a[i]+itemp+j],type)/multiplier(q,R[i][a[i]+itemp+j],j,type);
					}
					lamda = pow(1/epsilon/pow(epsilon_2,s[i][cn[i]-2])*temp,1/tao)*T;
					p2[0] = min(lamda,double(1));
					p2[1] = 1-p2[0];
					temp = double(rand()%10001)/10000;
					if(temp <= p2[0])
					{
						s[i][cn[i]-2] = -1;
						cn[i] = cn[i]-1;
						accept[1] = accept[1]+1;
					}
					break;
				}
			case 2: //Partial shift
				{
					if(cn[i] == 1)
					{
						break;
					}
					itemp = randomab(2,cn[i]);
					itemp2 = randomab(itemp,cn[i]);
					if(itemp2 == cn[i])
					{
						lpsn = -(getUnitPosition(s[i],Omega-1,itemp,J)-getUnitPosition(s[i],Omega-1,itemp-1,J)-J);
						rpsn = min(L[i]-a[i]-getUnitPosition(s[i],Omega-1,itemp,J)-J,G-(getUnitPosition(s[i],Omega-1,itemp,J)-getUnitPosition(s[i],Omega-1,itemp-1,J)-J));
					}
					else
					{
						lpsn = -min(getUnitPosition(s[i],Omega-1,itemp,J)-getUnitPosition(s[i],Omega-1,itemp-1,J)-J,G-(getUnitPosition(s[i],Omega-1,itemp2+1,J)-getUnitPosition(s[i],Omega-1,itemp2,J)-J));
						rpsn = min(getUnitPosition(s[i],Omega-1,itemp2+1,J)-getUnitPosition(s[i],Omega-1,itemp2,J)-J,G-(getUnitPosition(s[i],Omega-1,itemp,J)-getUnitPosition(s[i],Omega-1,itemp-1,J)-J));
					}
					if(lpsn == 0 && rpsn ==0)
					{
						break;
					}
					else
					{
						do
						{
							psn = randomab(lpsn,rpsn);
						}while(psn == 0);
					}
					lamda = 1;
					for(j=0;j<J;j++)
					{
						for(k=itemp;k<=itemp2;k++)
						{
							lamda = lamda*multiplier(q,R[i][a[i]+psn+getUnitPosition(s[i],Omega-1,k,J)+j],j,type)/multiplier(q,R[i][a[i]+getUnitPosition(s[i],Omega-1,k,J)+j],j,type);
						}
					}
					p2[0] = min(pow(lamda,1/tao),double(1));
					p2[1] = 1-p2[0];
					temp = double(rand()%10001)/10000;
					if(temp <= p2[0])
					{
						if(itemp2 == cn[i])
						{
							s[i][itemp-2] = s[i][itemp-2]+psn;
						}
						else
						{
							s[i][itemp-2] = s[i][itemp-2]+psn;
							s[i][itemp2-1] = s[i][itemp2-1]-psn;
						}
						accept[2] = accept[2]+1;
					}
					break;
				}
			case 3: //Front insertion
				{
					if(a[i] < J || cn[i] >= Omega)
					{
						break;
					}
					aa = max(a[i]-G-J,0);
					bb = a[i]-J;
					itemp = randomab(aa,bb);
					T = (Q[4]-Q[3])/(Q[3]-Q[2])*double(bb-aa+1);
					temp = 1;
					for(j=0;j<J;j++)
					{
						temp = temp*multiplier(q,R[i][itemp+j],j,type)/multiplier_2(h,R[i][itemp+j],type);
					}
					lamda = pow(pow(epsilon_2,a[i]-itemp-J)*epsilon*temp,1/tao)*T;
					p2[0] = min(lamda,double(1));
					p2[1] = 1-p2[0];
					temp = double(rand()%10001)/10000;
					if(temp <= p2[0])
					{
						for(k=cn[i]-1;k>0;k--)
						{
							s[i][k] = s[i][k-1];
						}
						s[i][0] = a[i]-itemp-J;
						cn[i] = cn[i]+1;
						a[i] = itemp;
						accept[3] = accept[3]+1;
					}
					break;
				}
			case 4: //Front deletion
				{
					if(cn[i] == 1)
					{
						break;
					}
					T = (Q[3]-Q[2])/(Q[4]-Q[3])/double(G+1);
					temp = 1;
					for(j=0;j<J;j++)
					{
						temp = temp*multiplier_2(h,R[i][a[i]+j],type)/multiplier(q,R[i][a[i]+j],j,type);
					}
					lamda = pow(1/epsilon/pow(epsilon_2,s[i][0])*temp,1/tao)*T;
					p2[0] = min(lamda,double(1));
					p2[1] = 1-p2[0];
					temp = double(rand()%10001)/10000;
					if(temp <= p2[0])
					{
						a[i] = a[i]+J+s[i][0];
						for(k=1;k<=cn[i]-2;k++)
						{
							s[i][k-1] = s[i][k];
						}
						s[i][cn[i]-2] = -1;
						cn[i] = cn[i]-1;
						accept[4] = accept[4]+1;
					}
				}
			}
			delete [] p2;
		}

		
		//Generate F
		F = 0;
		getValue(sum_counter,0,element_number);
		for(m=0;m<J;m++)
		{
			getValue(counter,1,element_number);
			for(j=0;j<N;j++)
			{
				for(k=1;k<=cn[j];k++)
				{
					adder(counter,R[j][a[j]+getUnitPosition(s[j],Omega-1,k,J)+m],type);
				}
			}
			for(letter=0;letter<element_number;letter++)
			{
				sum_counter[letter] = sum_counter[letter]+counter[letter]-1;
				F = F+logfactorial(counter[letter]);
				q[letter][m] = double(counter[letter])/double(arraysum(cn,N)+element_number)*100;
			}
			F = F-logfactorial(arraysum(cn,N)+element_number);
		}
		for(letter=0;letter<element_number;letter++)
		{
			itemp = 0;
			for(j=0;j<N;j++)
			{
				itemp = itemp+letter_distribution[letter][j];
			}
			h[letter] = double(1+itemp-sum_counter[letter])/double(element_number+arraysum(L,N)-arraysum(cn,N)*J)*100;
			F = F+logfactorial(1+itemp-sum_counter[letter]);
		}
		for(m=0;m<J;m++)
		{
			getValue(counter,1,element_number);
			for(j=0;j<N;j++)
			{
				for(k=1;k<=cn[j];k++)
				{
					adder(counter,R[j][a[j]+getUnitPosition(s[j],Omega-1,k,J)+m],type);
				}
			}
			for(letter=0;letter<element_number;letter++)
			{
				F_2 = F_2+counter[letter]*log(q[letter][m]/h[letter]);
			}
		}
		F = F-logfactorial(arraysum(L,N)-arraysum(cn,N)*J+element_number);
		F = F+(J+1)*logfactorial(element_number);
		itemp = 0;
		for(i=0;i<N;i++)
		{
			itemp = itemp+cn[i];
		}
		F = F+itemp*log(epsilon);
		itemp = 0;
		for(i=0;i<N;i++)
		{
			itemp = itemp+getGapNumber(s[i],Omega-1);
		}
		F = F+itemp*log(epsilon_2);
		if(ii == 0)
		{
			MF = F;
		}
		if(F >= MF)
		{
			MF = F;
			MF_2 = F_2/(element_number-1)/J;
			for(i=0;i<N;i++)
			{
				final_cn[i] = cn[i];
				final_a[i] = a[i];
				for(j=0;j<Omega-1;j++)
				{
					final_s[i][j] = s[i][j];
				}
			}
		}
		/*
		outfile3<<F<<" ";
		outfile4<<F_2/(element_number-1)/J<<" ";
		*/

		
		//Phase shifts step
		if(ii%M == 0)
		{
			receive[5] = receive[5]+1;
			double *ps = new double[2];
			memset(ps,0,sizeof(double)*2);
			itemp = a[0];
			itemp2 = L[0]-a[0]-cn[0]*J-getGapNumber(s[0],Omega-1);
			for(i=1;i<N;i++)
			{
				if(a[i] < itemp)
				{
					itemp = a[i];
				}
				if(L[i]-a[i]-cn[i]*J-getGapNumber(s[i],Omega-1) < itemp2)
				{
					itemp2 = L[i]-a[i]-cn[i]*J-getGapNumber(s[i],Omega-1);
				}
			}
			lpsn = -min(mu,itemp);
			rpsn = min(mu,itemp2);
			psn = randomab(lpsn,rpsn);
			if(lpsn == 0 && rpsn ==0)
			{
				continue;
			}
			else
			{
				do
				{
					psn = randomab(lpsn,rpsn);
				}while(psn == 0);
			}
			Fps = 0;
			getValue(sum_counter,0,element_number);
			for(m=0;m<J;m++)
			{
				getValue(counter,1,element_number);
				for(j=0;j<N;j++)
				{
					for(k=1;k<=cn[j];k++)
					{
						adder(counter,R[j][a[j]+psn+getUnitPosition(s[j],Omega-1,k,J)+m],type);
					}
				}
				for(letter=0;letter<element_number;letter++)
				{
					sum_counter[letter] = sum_counter[letter]+counter[letter]-1;
					Fps = Fps+logfactorial(counter[letter]);
				}
				Fps = Fps-logfactorial(arraysum(cn,N)+element_number);
			}
			for(letter=0;letter<element_number;letter++)
			{
				itemp = 0;
				for(j=0;j<N;j++)
				{
					itemp = itemp+letter_distribution[letter][j];
				}
				Fps = Fps+logfactorial(1+itemp-sum_counter[letter]);
			}
			Fps = Fps-logfactorial(arraysum(L,N)-arraysum(cn,N)*J+element_number);
			Fps = Fps+(J+1)*logfactorial(element_number);
			itemp = 0;
			for(i=0;i<N;i++)
			{
				itemp = itemp+cn[i];
			}
			//Fps = Fps+itemp*log(epsilon)-itemp*J*log(double(1)/double(element_number));
			Fps = Fps+itemp*log(epsilon);
			itemp = 0;
			for(i=0;i<N;i++)
			{
				itemp = itemp+getGapNumber(s[i],Omega-1);
			}
			Fps = Fps+itemp*log(epsilon_2);
			ps[0] = min(exp((Fps-F)/tao),double(1));
			ps[1] = 1-ps[0]; 
			temp = double(rand()%10001)/10000;
			if(temp <= ps[0])
			{
				accept[5] = accept[5]+1;
				for(i=0;i<N;i++)
				{
					a[i] = a[i]+psn;
				}
				if(Fps > MF)
				{
					MF = Fps;
					for(j=0;j<N;j++)
					{
						final_a[j] = a[j];
						final_cn[j] = cn[j];
						for(k=0;k<Omega-1;k++)
						{
							final_s[j][k] = s[j][k];
						}
					}
				}
			}
			delete [] ps;
		}
		
		
		if(100*ii/I == cc)
		{
			//if(cc%10 == 0)
			{
				cout<<cc<<"% has finished"<<endl;
			}
			cc++;
		}
		
	}
	finish = clock();
	runtime = double(finish-start)/CLOCKS_PER_SEC;
	//***********************************************************************************************

    
	//Output the final set of segment starting positions A and the final set of segment structures S
	outfile<<endl<<"The estimate set of segment locations and the set of segment structures are: "<<endl;
	cout<<endl<<"The estimate set of segment locations and the set of segment structures are: "<<endl;
	outfile<<"(Seq #, Locations, Copy #: Structures)"<<endl;
	cout<<"(Seq #, Locations, Copy #: Structures)"<<endl;
	for(i=0;i<N;i++)
	{
		outfile<<setw(3)<<i+1<<","<<setw(4)<<final_a[i]+1<<","<<setw(3)<<final_cn[i]<<":";
		cout<<setw(3)<<i+1<<","<<setw(4)<<final_a[i]+1<<","<<setw(3)<<final_cn[i]<<":";
		for(j=0;j<Omega-1;j++)
		{
			if(final_s[i][j] != -1)
			{
				outfile<<setw(2)<<final_s[i][j]<<" ";
				cout<<setw(2)<<final_s[i][j]<<" ";
			}	
		}
		outfile<<endl;
		cout<<endl;
	}
	outfile<<endl;
	cout<<endl;


	//Output the final motif matrix Theta and the corresponding sequence motif
	outfile<<endl<<"The estimate motif matrix is: "<<endl;
	cout<<endl<<"The estimate motif matrix is: "<<endl;
	getValue(sum_counter,0,element_number);
	for(j=0;j<J;j++)
	{
		getValue(counter,1,element_number);
		for(i=0;i<N;i++)
		{
			for(k=1;k<=final_cn[i];k++)
			{
				adder(counter,R[i][final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+j],type);
			}
		}
		for(letter=0;letter<element_number;letter++)
		{
			sum_counter[letter] = sum_counter[letter]+counter[letter]-1;
			q[letter][j] = double(counter[letter])/double(arraysum(final_cn,N)+element_number)*100;
		}
	}
	outfile<<"   ";
	cout<<"   ";
	for(j=0;j<J;j++)
	{
		outfile<<setw(2)<<j+1<<"  ";
		cout<<setw(2)<<j+1<<"  ";
	}
	outfile<<endl;
	cout<<endl;
	for(letter=0;letter<element_number;letter++)
	{
		if(type == 1)
		{
			switch(letter)
			{
			case 0:
				{
					outfile<<" "<<'A'<<"  ";
					cout<<" "<<'A'<<"  ";
					break;
				}
			case 1:
				{
					outfile<<" "<<'C'<<"  ";
					cout<<" "<<'C'<<"  ";
					break;
				}
			case 2:
				{
					outfile<<" "<<'D'<<"  ";
					cout<<" "<<'D'<<"  ";
					break;
				}
			case 3:
				{
					outfile<<" "<<'E'<<"  ";
					cout<<" "<<'E'<<"  ";
					break;
				}
			case 4:
				{
					outfile<<" "<<'F'<<"  ";
					cout<<" "<<'F'<<"  ";
					break;
				}
			case 5:
				{
					outfile<<" "<<'G'<<"  ";
					cout<<" "<<'G'<<"  ";
					break;
				}
			case 6:
				{
					outfile<<" "<<'H'<<"  ";
					cout<<" "<<'H'<<"  ";
					break;
				}
			case 7:
				{
					outfile<<" "<<'I'<<"  ";
					cout<<" "<<'I'<<"  ";
					break;
				}
			case 8:
				{
					outfile<<" "<<'K'<<"  ";
					cout<<" "<<'K'<<"  ";
					break;
				}
			case 9:
				{
					outfile<<" "<<'L'<<"  ";
					cout<<" "<<'L'<<"  ";
					break;
				}
			case 10:
				{
					outfile<<" "<<'M'<<"  ";
					cout<<" "<<'M'<<"  ";
					break;
				}
			case 11:
				{
					outfile<<" "<<'N'<<"  ";
					cout<<" "<<'N'<<"  ";
					break;
				}
			case 12:
				{
					outfile<<" "<<'P'<<"  ";
					cout<<" "<<'P'<<"  ";
					break;
				}
			case 13:
				{
					outfile<<" "<<'Q'<<"  ";
					cout<<" "<<'Q'<<"  ";
					break;
				}
			case 14:
				{
					outfile<<" "<<'R'<<"  ";
					cout<<" "<<'R'<<"  ";
					break;
				}
			case 15:
				{
					outfile<<" "<<'S'<<"  ";
					cout<<" "<<'S'<<"  ";
					break;
				}
			case 16:
				{
					outfile<<" "<<'T'<<"  ";
					cout<<" "<<'T'<<"  ";
					break;
				}
			case 17:
				{
					outfile<<" "<<'V'<<"  ";
					cout<<" "<<'V'<<"  ";
					break;
				}
			case 18:
				{
					outfile<<" "<<'W'<<"  ";
					cout<<" "<<'W'<<"  ";
					break;
				}
			case 19:
				{
					outfile<<" "<<'Y'<<"  ";
					cout<<" "<<'Y'<<"  ";
					break;
				}
			}
		}
		else
		{
			switch(letter)
			{
			case 0:
				{
					outfile<<" "<<'A'<<"  ";
					cout<<" "<<'A'<<"  ";
					break;
				}
			case 1:
				{
					outfile<<" "<<'T'<<"  ";
					cout<<" "<<'T'<<"  ";
					break;
				}
			case 2:
				{
					outfile<<" "<<'C'<<"  ";
					cout<<" "<<'C'<<"  ";
					break;
				}
			case 3:
				{
					outfile<<" "<<'G'<<"  ";
					cout<<" "<<'G'<<"  ";
					break;
				}
			}
		}
		for(j=0;j<J;j++)
		{
			outfile<<setw(2)<<int(q[letter][j]+0.5)<<"  ";
			cout<<setw(2)<<int(q[letter][j]+0.5)<<"  ";
		}
		outfile<<endl;
		cout<<endl;
	}
	outfile<<"The corresponding sequence motif is: "<<endl;
	cout<<"The corresponding sequence motif is: "<<endl;
	if(type == 0)
	{
		for(j=0;j<J;j++)
		{
			itemp = 0;
			for(letter=1;letter<element_number;letter++)
			{
				if(q[letter][j] > q[itemp][j])
				{
					itemp = letter;
				}
			}
			switch(itemp)
			{
			case 0:
				{
					outfile<<" "<<'A'<<"  ";
					cout<<" "<<'A'<<"  ";
					break;
				}
			case 1:
				{
					outfile<<" "<<'T'<<"  ";
					cout<<" "<<'T'<<"  ";
					break;
				}
			case 2:
				{
					outfile<<" "<<'C'<<"  ";
					cout<<" "<<'C'<<"  ";
					break;
				}
			case 3:
				{
					outfile<<" "<<'G'<<"  ";
					cout<<" "<<'G'<<"  ";
					break;
				}
			}
		}
		outfile<<endl;
		cout<<endl;
	}
	else
	{
		for(j=0;j<J;j++)
		{
			itemp = 0;
			for(letter=1;letter<element_number;letter++)
			{
				if(q[letter][j] > q[itemp][j])
				{
					itemp = letter;
				}
			}
			switch(itemp)
			{
			case 0:
				{
					outfile<<" "<<'A'<<"  ";
					cout<<" "<<'A'<<"  ";
					break;
				}
			case 1:
				{
					outfile<<" "<<'C'<<"  ";
					cout<<" "<<'C'<<"  ";
					break;
				}
			case 2:
				{
					outfile<<" "<<'D'<<"  ";
					cout<<" "<<'D'<<"  ";
					break;
				}
			case 3:
				{
					outfile<<" "<<'E'<<"  ";
					cout<<" "<<'E'<<"  ";
					break;
				}
			case 4:
				{
					outfile<<" "<<'F'<<"  ";
					cout<<" "<<'F'<<"  ";
					break;
				}
			case 5:
				{
					outfile<<" "<<'G'<<"  ";
					cout<<" "<<'G'<<"  ";
					break;
				}
			case 6:
				{
					outfile<<" "<<'H'<<"  ";
					cout<<" "<<'H'<<"  ";
					break;
				}
			case 7:
				{
					outfile<<" "<<'I'<<"  ";
					cout<<" "<<'I'<<"  ";
					break;
				}
			case 8:
				{
					outfile<<" "<<'K'<<"  ";
					cout<<" "<<'K'<<"  ";
					break;
				}
			case 9:
				{
					outfile<<" "<<'L'<<"  ";
					cout<<" "<<'L'<<"  ";
					break;
				}
			case 10:
				{
					outfile<<" "<<'M'<<"  ";
					cout<<" "<<'M'<<"  ";
					break;
				}
			case 11:
				{
					outfile<<" "<<'N'<<"  ";
					cout<<" "<<'N'<<"  ";
					break;
				}
			case 12:
				{
					outfile<<" "<<'P'<<"  ";
					cout<<" "<<'P'<<"  ";
					break;
				}
			case 13:
				{
					outfile<<" "<<'Q'<<"  ";
					cout<<" "<<'Q'<<"  ";
					break;
				}
			case 14:
				{
					outfile<<" "<<'R'<<"  ";
					cout<<" "<<'R'<<"  ";
					break;
				}
			case 15:
				{
					outfile<<" "<<'S'<<"  ";
					cout<<" "<<'S'<<"  ";
					break;
				}
			case 16:
				{
					outfile<<" "<<'T'<<"  ";
					cout<<" "<<'T'<<"  ";
					break;
				}
			case 17:
				{
					outfile<<" "<<'V'<<"  ";
					cout<<" "<<'V'<<"  ";
					break;
				}
			case 18:
				{
					outfile<<" "<<'W'<<"  ";
					cout<<" "<<'W'<<"  ";
					break;
				}
			case 19:
				{
					outfile<<" "<<'Y'<<"  ";
					cout<<" "<<'Y'<<"  ";
					break;
				}
			}
		}
		outfile<<endl;
		cout<<endl;
	}
	outfile<<endl;
	cout<<endl;


	//Output the final motif matrix Theta and the corresponding sequence motif
	for(letter=0;letter<element_number;letter++)
	{
		itemp = 0;
		for(j=0;j<N;j++)
		{
			itemp = itemp+letter_distribution[letter][j];
		}
		h[letter] = double(1+itemp-sum_counter[letter])/double(element_number+arraysum(L,N)-arraysum(final_cn,N)*J)*100;
	}
	outfile<<endl<<"The estimate background distribution is: "<<endl;
	cout<<endl<<"The estimate background distribution is: "<<endl;
	itemp = 0;
	for(letter=0;letter<element_number;letter++)
	{
		outfile<<setw(2)<<int(h[letter]+0.5)<<"  ";
		cout<<setw(2)<<int(h[letter]+0.5)<<"  ";
	}
	outfile<<endl<<endl;
	cout<<endl<<endl;


	//
	double *p = new double[element_number];
	memset(p,0,sizeof(double)*element_number);
	for(i=0;i<element_number;i++)
	{
		p[i] = h[i]/100;
		if(i == 0)
		{
			continue;
		}
		p[i] = p[i]+p[i-1];
	}
	int SN = 100000;
	double *p_value = new double[SN];
	memset(p_value,0,sizeof(double)*SN);
	for(i=0;i<SN;i++)
	{
		for(j=0;j<J;j++)
		{
			temp = double(rand()%10001)/10000;
			if(type == 0)
			{
				if(temp >= 0 && temp< p[0])
				{
					p_value[i] = p_value[i]+log(q[0][j]/h[0]);
				}
				if(temp >= p[0] && temp< p[1])
				{
					p_value[i] = p_value[i]+log(q[1][j]/h[1]);
				}
				if(temp >= p[1] && temp< p[2])
				{
					p_value[i] = p_value[i]+log(q[2][j]/h[2]);
				}
				if(temp >= p[2])
				{
					p_value[i] = p_value[i]+log(q[3][j]/h[3]);
				}
			}
			else
			{
				if(temp >= 0 && temp< p[0])
				{
					p_value[i] = p_value[i]+log(q[0][j]/h[0]);
				}
				if(temp >= p[0] && temp< p[1])
				{
					p_value[i] = p_value[i]+log(q[1][j]/h[1]);
				}
				if(temp >= p[1] && temp< p[2])
				{
					p_value[i] = p_value[i]+log(q[2][j]/h[2]);
				}
				if(temp >= p[2] && temp< p[3])
				{
					p_value[i] = p_value[i]+log(q[3][j]/h[3]);
				}
				if(temp >= p[3] && temp< p[4])
				{
					p_value[i] = p_value[i]+log(q[4][j]/h[4]);
				}
				if(temp >= p[4] && temp< p[5])
				{
					p_value[i] = p_value[i]+log(q[5][j]/h[5]);
				}
				if(temp >= p[5] && temp< p[6])
				{
					p_value[i] = p_value[i]+log(q[6][j]/h[6]);
				}
				if(temp >= p[6] && temp< p[7])
				{
					p_value[i] = p_value[i]+log(q[7][j]/h[7]);
				}
				if(temp >= p[7] && temp< p[8])
				{
					p_value[i] = p_value[i]+log(q[8][j]/h[8]);
				}
				if(temp >= p[8] && temp< p[9])
				{
					p_value[i] = p_value[i]+log(q[9][j]/h[9]);
				}
				if(temp >= p[9] && temp< p[10])
				{
					p_value[i] = p_value[i]+log(q[10][j]/h[10]);
				}
				if(temp >= p[10] && temp< p[11])
				{
					p_value[i] = p_value[i]+log(q[11][j]/h[11]);
				}
				if(temp >= p[11] && temp< p[12])
				{
					p_value[i] = p_value[i]+log(q[12][j]/h[12]);
				}
				if(temp >= p[12] && temp< p[13])
				{
					p_value[i] = p_value[i]+log(q[13][j]/h[13]);
				}
				if(temp >= p[13] && temp< p[14])
				{
					p_value[i] = p_value[i]+log(q[14][j]/h[14]);
				}
				if(temp >= p[14] && temp< p[15])
				{
					p_value[i] = p_value[i]+log(q[15][j]/h[15]);
				}
				if(temp >= p[15] && temp< p[16])
				{
					p_value[i] = p_value[i]+log(q[16][j]/h[16]);
				}
				if(temp >= p[16] && temp< p[17])
				{
					p_value[i] = p_value[i]+log(q[17][j]/h[17]);
				}
				if(temp >= p[17] && temp< p[18])
				{
					p_value[i] = p_value[i]+log(q[18][j]/h[18]);
				}
				if(temp >= p[18])
				{
					p_value[i] = p_value[i]+log(q[19][j]/h[19]);
				}
			}
		}
	}
	qsort(p_value,SN,sizeof(double),cmp);
	delete [] p;


	//Output other information
	outfile<<endl<<"The total runtime is "<<runtime<<"s"<<endl;
	outfile<<"The maximum unnormalized joint posterior probability is "<<MF<<endl;
	outfile<<"The maximum information per parameter is "<<MF_2<<endl;
	outfile<<"The acceptance rates of each move and phase shifts are ";
	cout<<endl<<"The total runtime is "<<runtime<<"s"<<endl;
	cout<<"The maximum unnormalized joint posterior probability is "<<MF<<endl;
	cout<<"The maximum information per parameter is "<<MF_2<<endl;
	cout<<"The acceptance rates of each move and phase shifts are ";
	for(i=0;i<6;i++)
	{
		outfile<<setw(6)<<double(accept[i])/double(receive[i])*100<<"%   ";
		cout<<setw(6)<<double(accept[i])/double(receive[i])*100<<"%   ";
	}
	outfile<<endl;
	outfile<<endl;
	cout<<endl;
	cout<<endl;
	

	//Output the information of esimate repeat units 
	outfile<<endl<<"The estimate repeat units are: "<<endl;
	outfile<<"(Seq #, #, Starting, Repeat Unit, Ending, Likelihood, p-value)"<<endl;
	cout<<"(Seq #, #, Starting, Repeat Unit, Ending, Likelihood, p-value)"<<endl;
	for(i=0;i<N;i++)
	{
		for(k=1;k<=final_cn[i];k++)
		{
			outfile<<setw(3)<<i+1<<","<<setw(3)<<k<<","<<setw(6)<<final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+1<<", ";
			cout<<setw(3)<<i+1<<","<<setw(3)<<k<<","<<setw(6)<<final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+1<<", ";
			temp = 0;
			for(j=0;j<J;j++)
			{
				temp = temp+log(multiplier(q,R[i][final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+j],j,type)/multiplier_2(h,R[i][final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+j],type));
				outfile<<R[i][final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+j];
				cout<<R[i][final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+j];
			}
			for(j=SN-1;j>=0;j--)
			{
				if(temp >= p_value[j])
				{
					break;
				}
			}
			outfile<<setw(6)<<final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+J<<","<<setw(10)<<temp<<","<<setw(10)<<double(SN-1-j)/double(SN/100)<<"%"<<endl;
			cout<<setw(6)<<final_a[i]+getUnitPosition(final_s[i],Omega-1,k,J)+J<<","<<setw(3)<<int(temp+0.5)<<","<<setw(10)<<double(SN-1-j)/double(SN/100)<<"%"<<endl;
		}
		outfile<<endl;
		cout<<endl;
	}
	outfile<<endl;
	cout<<endl; 	
	//***********************************************************************************************
	

	/*
	outfile.close();
	outfile2.close();
	outfile3.close();
	outfile4.close();
	*/
	outfile.close();
	

	return 0;
}

int main(int argc,char *argv[])
{
	if(argc <12){
		cout<<"Invalid format!"<<endl;
		cout<<"The right format is: BASARD InputFilePath OutputFilePath SequenceType SequenceNumber PatterWidth MaximumCopyNumber MaximumGapLength Epsilon_1 Epsilon_2 IterationNumber RunningMode"<<endl;
		exit(0);
	}
	int i = 0;

	//Input the path of the input sequences file
	//char *inputfile = "C:/Users/Kiwi_Kina/Documents/My ePapers/5''. JA_An MCMC Algorithm for Detecting Short Adjacent Repeats shared by Multiple Sequences_20110128/BASARD/data_set/synthetic_data_set_12M-L_1.txt";

	//Input the path of the output results file
	//char *outputfile = "summary_2.txt";
	
	//Input the settings
	int type = atoi(argv[3]);   //'0' for nucleotide sequences and '1' for amino acid sequences
	int N = atoi(argv[4]);   //'N' for the number of sequences
	int J = atoi(argv[5]);   //'J' for pattern width
	int Omega = atoi(argv[6]);   //'Omega' for maximum allowed copy number
	int G = atoi(argv[7]);   //'G' for maximum allowed gap length
	double epsilon = atof(argv[8]);   //'epsilon' for the tuning constant, equal to 'epsilon_1' in the paper
	double epsilon_2 = atof(argv[9]);   //'epsilon_2' for the tuning constant, equal to 'epsilon_2' in the paper
	int I = atoi(argv[10]);   //'I' for iterations
	double Q[] = {double(20)/double(100),double(20)/double(100),double(20)/double(100),double(20)/double(100),double(20)/double(100)};   //'Q' for the proposing probabilities for five types of moves
	for(i=1;i<5;i++)
	{
		Q[i] = Q[i]+Q[i-1];
	}
	double tao = 1;   //'tao' for temperature
	int ss = atoi(argv[11]);   //'ss=1' for fast mode and 'ss=0' for complete mode

	
	BASARD(type,N,J,G,Omega,epsilon,epsilon_2,tao,Q,I,argv[1],argv[2],ss);

	return 0;
}
