#ifndef READFILE
#define READFILE

#include <iostream>  
#include <malloc.h>                                                                                                                                        
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
//#include <direct.h>
#include <fcntl.h>
#include <unistd.h>
#include <dirent.h>

#define MAXN 65530       // maximum length of a sequence
#define MAXK 2000        // maximum number of sequences
#define MAXL 256         // maximum number of characters in a label
#define _MAX_PATH 260   

void itoa ( unsigned long val, char *buf, unsigned radix ) 
{ 
        char *p;                /* pointer to traverse string */ 
        char *firstdig;         /* pointer to first digit */ 
        char temp;              /* temp char */ 
        unsigned digval;        /* value of digit */ 

        p = buf; 
        firstdig = p;           /* save pointer to first digit */ 

        do { 
            digval = (unsigned) (val % radix); 
            val /= radix;       /* get next digit */ 

            /* convert to ascii and store */ 
            if (digval > 9) 
                *p++ = (char ) (digval - 10 + 'a');  /* a letter */ 
            else 
                *p++ = (char ) (digval + '0');       /* a digit */ 
        } while (val > 0); 

        /* We now have the digit of the number in the buffer, but in reverse 
           order.  Thus we reverse them now. */ 

        *p-- = '\0';            /* terminate string; p points to last digit */ 

        do { 
            temp = *p; 
            *p = *firstdig; 
            *firstdig = temp;   /* swap *p and *firstdig */ 
            --p; 
            ++firstdig;         /* advance to next two digits */ 
        } while (firstdig < p); /* repeat until halfway */ 
} 

/*******************************************************
      Class: Data structure for storing
             all DNA sequences
 *******************************************************/

class DNAdata{
  private:
  
  int nseq;                        // Number of sequences read
  int *DNAlength;
  char **DNAseq, **DNAlabel;       // The sequences and their description
  
  public:

  DNAdata();       // Constructor
  ~DNAdata();      // Destructor

  //void DNAdata::print_sequence(int i); // Print the i-th sequence in the database
  void print_sequence(int i); // Print the i-th sequence in the database
  void Unalign();                      // Clear the indel (blanks) in the database
  void Save(char *filename);           // Save all sequences read to a fasta file
  void SaveSiteID(char *filename, int *loci_CRS, int nstart, int nend);  // Print the aligned sequence with the site id

  char* GetSeq(int i);         // Return DNAseq[i]
  int   GetNumSeq();           // Return nseq

  int  SequenceFileOpen(char *filename);           // Fetch a fasta file into the database
  int  SequenceDirectoryOpen(char *directoryname); // Fetch all fasta file in a directory into the database
  void AddSequence(char *seq, char *label);        // Add a new sequence to the database
 
  private:

  char **sequence(int len, int k);           // Allocate the memory for the sequences
  void free_sequence(char **seq);            // Free the memory for the sequences
};




/*******************************************************
             Constructors and destructors
 *******************************************************/

void DNAdata::free_sequence(char **seq){  // Free the memory for the sequences
  free(seq[0]);
  free(seq);
}

DNAdata::DNAdata(){
  nseq=0;
  DNAlabel=sequence(MAXL,MAXK);
  DNAseq=sequence(MAXN,MAXK);
  DNAlength=(int  *)malloc((unsigned) MAXK*sizeof(int ));
}

DNAdata::~DNAdata(){
  free_sequence(DNAlabel);
  free_sequence(DNAseq);
  free(DNAlength);
}

char** DNAdata::sequence(int len, int k){   // Allocate the memory for the sequences
  int i;
  char **amat,*avec;
  avec=(char *)malloc((unsigned) (len*k)*sizeof(char));
  amat=(char **)malloc((unsigned) k*sizeof(char*));
  for(i=0;i<k;i++)
    amat[i]=avec+i*len;
  return amat;
}

/*******************************************************
             Printing the data
 *******************************************************/

void DNAdata::print_sequence(int i){
//void print_sequence(int i){
  printf("%s\n",DNAlabel[i]);
  printf("%s\n",DNAseq[i]);
}


/*******************************************************
             Data retriving and writing
 *******************************************************/

int DNAdata::GetNumSeq(){
  return nseq;
}

char* DNAdata::GetSeq(int i){
  return DNAseq[i];
}

// Read sequences from a fasta file and return the number of sequences read,
// Note that a fasta file is allowed to have more than one sequence.

int DNAdata::SequenceFileOpen(char *filename)
{
  int k=0;
  unsigned int nLen;
  char string[MAXL],*p;
  FILE *fp;

  fp=fopen(filename,"r");
  while( fgets(string, MAXL, fp) != NULL ){
    if(string[0]=='>'){
      k++;
      if(((p=strchr(string,10))!=NULL)||((p=strchr(string,0))!=NULL)){
        nLen = p-string;
        DNAlabel[nseq+k-1][0]='\0';
        strncat(DNAlabel[nseq+k-1],string,nLen);
      }
      DNAseq[nseq+k-1][0]='\0';
    }else{
      if(((p=strchr(string,10))!=NULL)||((p=strchr(string,0))!=NULL)){
        nLen = p-string;
        strncat(DNAseq[nseq+k-1],string,nLen);
      }
    }
  }
  fclose(fp);
  nseq += k;
  return k;
}

// Read sequences from all fasta files in a directory
// and return the total number of sequences read.

int DNAdata::SequenceDirectoryOpen(char *directoryname){
  long hFile;
  int count=0,k;
  //struct _finddata_t c_file;
  DIR* dir_info;    ////
  struct dirent* dir_entry;    ////
  char buffer[_MAX_PATH], old_buffer[_MAX_PATH];

  //_getcwd(buffer,_MAX_PATH);
  getcwd(buffer,_MAX_PATH);    ////
  strcpy(old_buffer,buffer);

  if(directoryname!=NULL){
    // Go to the directory
    if(directoryname!=NULL){
      //strcat(buffer,"\\");
 strcat(buffer,"/");    ////
      strcat(buffer,directoryname);
    }
    //_chdir(buffer);
chdir(buffer);
  }

  /*if( (hFile = _findfirst( "*.fasta", &c_file )) != -1L ){
	do{
    	if(!(c_file.attrib&_A_SUBDIR))
          count += SequenceFileOpen(c_file.name);
      }while( _findnext( hFile, &c_file ) == 0 );
	}*/
   dir_info = opendir("./");
   if(dir_info)
      while((dir_entry=readdir(dir_info))!=NULL)
      {
         if ((strcmp(dir_entry->d_name, "..")==0) | (strcmp(dir_entry->d_name, ".")==0))
            continue;
         else if ((strstr(dir_entry->d_name,".fasta")!=NULL)&&(strstr(dir_entry->d_name,".fasta~")==NULL))
            count += SequenceFileOpen(dir_entry->d_name);
         else 
            continue;
      }
   else
      printf("can not open this directory!\n");
   //_findclose( hFile );
   //_chdir(old_buffer);
   closedir(dir_info);
   chdir(old_buffer);
   return count;
}

// Add a new sequence to the database

void DNAdata::AddSequence(char *seq, char *label)
{
  strcpy(DNAseq[nseq],seq);
  strcpy(DNAlabel[nseq],label);
  DNAlength[nseq] = strlen(seq);
  nseq ++;
}

// Save the results of alignment in fasta format

void DNAdata::Save(char *filename){
  int i;
  FILE *fp;
  
  fp = fopen(filename,"w");
  for(i=0;i<nseq;i++)
    fprintf(fp,">%s\n%s\n",DNAlabel[i],DNAseq[i]);
  fclose(fp);
}

// Save the results of alignment with site id shown

void DNAdata::SaveSiteID(char *filename, int *loci_CRS, int nstart, int nend){
  char label[7], locilabel[7];
  int i,j,len,maxlen;
  int digit_seq, digit_loci;
  FILE *fp;
  
  fp = fopen(filename,"w");
  
  maxlen = 0;
  for(i=nstart;i<=nend;i++)
    if(DNAlength[i]>maxlen)
      maxlen = DNAlength[i];

  itoa(nend,label,10);
  //sprintf(label,"%d",nend);
  digit_seq = strlen(label);

  itoa(maxlen,label,10);
  //sprintf(label,"%d",maxlen);
  digit_loci = strlen(label);
  
  for(i=0;i<digit_seq;i++){
    fprintf(fp,"      |");
    for(j=nstart;j<=nend;j++){
      itoa(j,label,10);
      //sprintf(label,"%d",j);
      fprintf(fp,"%c|",label[i]);
    }
    fprintf(fp,"\n");
  }
  fprintf(fp,"      |");
  for(j=nstart;j<=nend;j++)
    fprintf(fp," |");
  fprintf(fp,"\n");
  
  for(i=0;i<maxlen;i++){
    itoa(loci_CRS[i],label,10);
    //sprintf(label,"%d",loci_CRS[i]);
    len=strlen(label);

    strcpy(locilabel,"      ");
    for(j=0;j<len;j++)
      locilabel[j]=label[j];

    fprintf(fp,"%s|",locilabel);
    for(j=nstart;j<=nend;j++){
      if(i<DNAlength[j])
        fprintf(fp,"%c|",DNAseq[j][i]);
      else
        fprintf(fp," |");
      }
    fprintf(fp,"\n");
  }
  fclose(fp);
}

/*******************************************************
             Data operations
 *******************************************************/

// The function Unalign() do the followings:
// 1. Remove the indels in all sequences
// 2. Change all small letters to capital letters
// 3. Change U to T

void DNAdata::Unalign(){
  char temp;
  int i,j,k;

  for(k=0;k<nseq;k++){
    j=0;
    for(i=0;i<strlen(DNAseq[k]);i++){
      temp=DNAseq[k][i];
      if(temp=='.')
        temp=DNAseq[0][i];
      switch(temp){
        case 'a':
        case 'A': DNAseq[k][j]='A'; j++; break;
        case 'c':
        case 'C': DNAseq[k][j]='C'; j++; break;
        case 'g':
        case 'G': DNAseq[k][j]='G'; j++; break;
        case 't':
        case 'T':
        case 'u':
        case 'U': DNAseq[k][j]='T'; j++; break;
      }
    }
    DNAseq[k][j]='\0';
  }
}

#endif
