#ifndef PHYLOTREE
#define PHYLOTREE

#include <iostream>                                                                                                                                          
#include <malloc.h>
#include <stdio.h>
#include <string.h>

#include "readfile.cpp"
#include "lagan.cpp"
#include "phylonode.cpp"

#define LEFT 0
#define RIGHT 1

#define MAXN 65530       // maximum length of a sequence
#define MAXK 2000        // maximum number of sequences
#define MAXL 256         // maximum number of characters in a label



/*******************************************************
               Phylogeny
 *******************************************************/

class familytree{
  public:

  /*******************************************************************
     SeqComb    -- The combined sequence
     SeqRoot    -- The first-inputted sequence
     SeqNew     -- The newly-inputted sequence after alignment
     loci_id[i] -- the site id of the absolute position i
     pos_id[i]  -- the absolute position of the site labeled i

     To print the tree, the notations like 2226.1 refers to a
     site that does not appear in the Cambridge reference sequence due
     to insertion / deletion events. If it is related to an insertion event, it
     means the first inserted site on the right of the site labeled 2226.

     loci_CRS[i] -- the site id (before the dot) of the absolute position i
     loci_CRS2[i] -- the site id (after the dot) of the absolute position i
   *******************************************************************/

  char *SeqComb, *SeqRoot, *SeqNew;
  int  *loci_id, *pos_id, *loci_CRS, *loci_CRS2;
  int  LengthSeqComb, nseq;

  phylonode par_root; // It points to the root of the tree. Note that the root can be changes in the program.
  phylonode **table;  // Table[i] is the tree node corresponding to the sequence labeled i.

  familytree();
  ~familytree();

  void Is_human_mtdna();           // For human mitochondrial DNA sequence, the hypervariable regions are excluded for improving the accuracy.
  void InitializeCombRoot(char *input);

  int GetNumSeq();     // Return the number of leaves on the tree.
  int* GetCRSInfo();   // Return loci_CRS, the site-absolute correspondence of the Cambridge reference sequence.

  void SetCRS(int seq_id);         // To label the sites according to the sequence with identity seq_id.
  void SetRoot(int seq_id);        // To rotate the tree so that the node seq_id is the root.
  void SetRootTopologyOnly(int seq_id);

  void count_all_nodes();                      // Count the number of markers on each node.
  void join_tree(phylonode *child, phylonode *parent, int dir);
  void add_tree(char *input, int seq_id);      // Add a sequence to the tree.

  void print();                    // Print the combined sequence, root sequence, and the newly inputted sequence.
  void save_tree(char *filename);  // Save the tree in Newick format.
  void print_tree_to_file(char *filename);   // Save the dendrogram in an output file.
  void SaveAlignedSeq(DNAdata *my_aligned_data);

  void load_tree(char *filename);
  void load_topology(FILE *fp, char *textline);

 
  private:

  LaganAlignment *alignmachine;
  char *SeqComb2, *SeqRoot2, *seqP, *seqQ;
  int  *loci_id2, *freq;
  bool *indentation;

  void get_sequence(phylonode *node, char *seq);    // Reconstruct the sequence on node using the markers and the root sequence.

  phylonode* calculate_abvalue_from_node(phylonode *root);

  void free_familytree_from_node(phylonode *mytree);
  void free_familytree();

  // Count the number of markers on the nodes.
  int count_one_node(phylonode *root);
  void count_nodes(phylonode *root);

  // Align the inputted sequence with the combined sequence then update SeqComb, SeqRoot, and SeqNew
  void InputNewSequence(char *input);   
  void AddBlank(char *Seq, int start, int n);
  void AddBlankNum(int *id, int start, int from, int n);
  void numcpy(int *id, int *source, int start, int from, int n);
  void reconstruct_seq(char *input, FragmentChain *fragment);

  // For printing the nodes on the tree to an output file.
  void print_one_marker(FILE *fp, int loci);
  void print_one_node(FILE *fp, phylonode *root);
  void print_tree_from_node(FILE *fp, phylonode *root, int depth);

  // For saving and loading the tree in Newick format.
  void save_tree_from_node(FILE *fp, phylonode *mynode);
};



/*******************************************************
      Constructors and destructors
 *******************************************************/

familytree::familytree(){  // The input sequence will be set as the root and the combined sequence.
  int i;

  SeqComb  =(char *)malloc((unsigned) MAXN*sizeof(char));
  SeqComb2  =(char *)malloc((unsigned) MAXN*sizeof(char));
  SeqRoot =(char *)malloc((unsigned) MAXN*sizeof(char));
  SeqRoot2 =(char *)malloc((unsigned) MAXN*sizeof(char));
  SeqNew   =(char *)malloc((unsigned) MAXN*sizeof(char));
  seqP =(char *)malloc((unsigned) MAXN*sizeof(char));
  seqQ =(char *)malloc((unsigned) MAXN*sizeof(char));
  freq =(int  *)malloc((unsigned) MAXN*sizeof(int ));
  loci_id =(int  *)malloc((unsigned) MAXN*sizeof(int ));
  loci_id2 =(int  *)malloc((unsigned) MAXN*sizeof(int ));
  pos_id   =(int  *)malloc((unsigned) MAXN*sizeof(int ));
  loci_CRS =(int  *)malloc((unsigned) MAXN*sizeof(int ));
  loci_CRS2 =(int  *)malloc((unsigned) MAXN*sizeof(int ));
  table =(phylonode**)malloc((unsigned) MAXK*sizeof(phylonode*));
  indentation = (bool*)malloc((unsigned) 300*sizeof(bool));
  alignmachine = new LaganAlignment();

  nseq = 0;
  
  par_root.SetNode(-1);
  for(i=0;i<MAXK;i++)
    table[i]=NULL;

  for(i=0;i<MAXN;i++)
    freq[i]=1;
}

familytree::~familytree(){
  free(SeqComb);
  free(SeqComb2);
  free(SeqRoot);
  free(SeqRoot2);
  free(SeqNew);
  free(seqP);
  free(seqQ);
  free(loci_id);
  free(loci_id2);
  free(pos_id);
  free(loci_CRS);
  free(loci_CRS2);
  free(freq);
  free(table);
  free(indentation);
  free_familytree();
  delete alignmachine;
}

void familytree::free_familytree_from_node(phylonode *mytree){
  if(mytree==NULL)                              
    return;
  free_familytree_from_node(mytree->child1);
  free_familytree_from_node(mytree->child2);
  mytree->free_markers();
  delete mytree->markers;
}

void familytree::free_familytree(){
  free_familytree_from_node(par_root.child1);
}

// For human mitochondrial DNA sequence, the hypervariable regions are excluded for improving the accuracy.
void familytree::Is_human_mtdna(){
  int i;
  for(i=0;i<MAXN;i++){
    if(!(((i+1>=577)&&(i+1<=16023))||(i+1>=16577)))
      freq[i]=0;
  }
}

/*******************************************************
     Tree operations
 *******************************************************/

void familytree::join_tree(phylonode *child, phylonode *parent, int dir){
  if(parent!=NULL){
    if(dir==LEFT)
      parent->child1=child;
    else if(dir==RIGHT)
      parent->child2=child;
  }
  if(child!=NULL)
    child->parent=parent;
}

int familytree::GetNumSeq(){
  return nseq;
}

/*******************************************************
      The labeling sequence and the root can
        be changed using SetCRS and SetRoot functions.
 *******************************************************/

void familytree::InitializeCombRoot(char *input){
  int i;
  
  LengthSeqComb = strlen(input);
  strcpy(SeqComb,input);
  strcpy(SeqRoot,input);

  for(i=0;i<LengthSeqComb;i++){
    loci_id[i]=i;
    pos_id[i]=i;
  }
}


void familytree::SetCRS(int seq_id){
  int i, count_nonempty=0, count_empty=0;

  if(table[seq_id]!=NULL){
    get_sequence(table[seq_id],seqP);
    for(i=0; i < LengthSeqComb; i++){
      if(seqP[i]!='-'){
        count_nonempty++;
        count_empty = 0;
      }else{
        count_empty++;
      }
      loci_CRS[i] = count_nonempty;
      loci_CRS2[i] = count_empty;
    }
  }
}

// For the phylogeny with markers
void familytree::SetRoot(int seq_id){
  phylonode *new_root;
  phylonode *current, *parent, *sibling, *grandparent;
  marker *temp1, *temp2;
  char *tmpc;
  
  new_root = table[seq_id];
  get_sequence(new_root,SeqRoot);
  tmpc = SeqRoot; SeqRoot = SeqRoot2; SeqRoot2 = tmpc;

  current = NULL;
  parent = new_root;
  temp1 = NULL;
  while(parent!=NULL){
    grandparent = parent->parent;
    if(current==parent->child1)
      sibling = parent->child2;
    else
      sibling = parent->child1;
    join_tree(parent,current,LEFT);
    join_tree(sibling,parent,RIGHT);
    parent->child1 = NULL;
    temp2 = parent->markers->m_pNext;
    parent->markers->m_pNext = temp1;
    temp1 = temp2;
    parent->swap_markers();
    current = parent;
    parent = grandparent;
  }
  par_root.child1 = new_root;
}

// For the phylogeny without markers
void familytree::SetRootTopologyOnly(int seq_id){
  phylonode *new_root;
  phylonode *current, *parent, *sibling, *grandparent;
  int temp1, temp2;
  char *tmpc;
  
  new_root = table[seq_id];

  current = NULL;
  parent = new_root;
  temp1 = 0;
  while(parent!=NULL){
    grandparent = parent->parent;
    if(current==parent->child1)
      sibling = parent->child2;
    else
      sibling = parent->child1;
    join_tree(parent,current,LEFT);
    join_tree(sibling,parent,RIGHT);
    parent->child1 = NULL;
    temp2 = parent->n_markers;
    parent->n_markers = temp1;
    temp1 = temp2;
    current = parent;
    parent = grandparent;
  }
  par_root.child1 = new_root;
}



int* familytree::GetCRSInfo(){
  return loci_CRS;
};

/*******************************************************
     The following routine reconstructs the sequence
       located at a tree node using SeqRoot.
 *******************************************************/

void familytree::get_sequence(phylonode *node, char *seq){
  int i,pos;
  phylonode *parent, *current;
  marker *cur;

  for(i=0;i<LengthSeqComb;i++)
    seq[i]=' ';
  current=node;
  parent=node->parent;
  while(parent!=NULL){
    for(cur=current->markers->m_pNext;cur!=NULL;cur=cur->m_pNext){
      pos=pos_id[cur->loci];
      if(seq[pos]==' ')
        seq[pos]=cur->ch_child;
    }
    current=parent;
    parent=current->parent;
  }
  for(i=0;i<LengthSeqComb;i++)
    if(seq[i]==' ')
      seq[i]=SeqRoot[i];
  seq[LengthSeqComb]='\0';
}


/*******************************************************
     The following routines are used to
      input the new sequence and update the
      combined sequence after alignment
 *******************************************************/

void familytree::AddBlank(char *Seq, int start, int n){
  int k;
  for(k=0;k<n;k++)
    Seq[start+k]='-';
  Seq[start+n]='\0';
}

void familytree::AddBlankNum(int *id, int start, int from, int n){
  int k;
  for(k=0;k<n;k++)
    id[start+k]=from+k;
}

void familytree::numcpy(int *id, int *source, int start, int from, int n){
  int k;
  for(k=0;k<n;k++)
    id[start+k]=source[from+k];
}

void familytree::reconstruct_seq(char *input, FragmentChain *fragment){
  // i -- current position of SeqComb
  // j -- current position of input
  // k -- current position of SeqNew
  // m -- number of inserted letters read

  int i,j,k,m;
  int len;
  FragmentNode *current;
  char *tmpc;
  int  *tmpi;
  
  i=j=k=m=0;
  SeqComb2[0]=SeqRoot2[0]=SeqNew[0]='\0';
  for(current = fragment->head;current!=NULL;current=current->pNext){
    len = current->length;
    if(current->type==INS){
      strncpy(SeqComb2+k,input+j,len);
      strncpy(SeqNew+k,input+j,len);
      AddBlank(SeqRoot2,k,len);
      AddBlankNum(loci_id2,k,LengthSeqComb+m,len);
      j += len;
      m += len;
    }else if(current->type==DEL){
      strncpy(SeqComb2+k,SeqComb+i,len);
      AddBlank(SeqNew,k,len);
      strncpy(SeqRoot2+k,SeqRoot+i,len);
      numcpy(loci_id2,loci_id,k,i,len);
      i += len;
    }else{
      strncpy(SeqComb2+k,SeqComb+i,len);
      strncpy(SeqNew+k,input+j,len);
      strncpy(SeqRoot2+k,SeqRoot+i,len);
      numcpy(loci_id2,loci_id,k,i,len);
      i += len;
      j += len;
    }
    k += len;
  }
  SeqComb2[k]=SeqRoot2[k]=SeqNew[k]='\0';
  for(i=0;i<k;i++)
    pos_id[loci_id2[i]]=i;

  LengthSeqComb = k;
  tmpc = SeqComb; SeqComb = SeqComb2; SeqComb2 = tmpc;
  tmpc = SeqRoot; SeqRoot = SeqRoot2; SeqRoot2 = tmpc;
  tmpi = loci_id; loci_id = loci_id2; loci_id2 = tmpi;
}

void familytree::InputNewSequence(char *input){
  FragmentChain *chain;

  chain = new FragmentChain();
  alignmachine->LaganAlign(SeqComb,input,LengthSeqComb,strlen(input),chain);
  reconstruct_seq(input,chain);
  delete chain;
}

/*******************************************************
     The following routines are used to
       determine the place that a newly inputted
       sequence should be inserted to.
 *******************************************************/


// Calculate the a, b, i values and return the node with smallest i value.
phylonode* familytree::calculate_abvalue_from_node(phylonode *root){
  phylonode *parent, *child1, *child2;
  phylonode *min_node,*temp_node;
  marker *current;
  double temp;
  int pos;
  
  parent=root->parent;
  child1=root->child1;
  child2=root->child2;

  // Update a and b values.
  root->avalue=0;
  root->bvalue=0;
  for(current=root->markers->m_pNext;current!=NULL;current=current->m_pNext){
    pos=pos_id[current->loci];
    if(SeqNew[pos]==current->ch_child)
      root->bvalue += freq[current->loci];
    if(SeqNew[pos]==current->ch_parent)
      root->avalue += freq[current->loci];
  }

  // Update i values.
  if(root->parent==NULL){
    root->ivalue=0;
    root->ivalue2=0;
  }else{
    // ivalue for sum b, ivalue2 for - sum a, ivalue3 for sum(b-a)
    // Note: The optimizer here is a minimizer. In the manuscript, I do maximization.
    root->ivalue = parent->ivalue + parent->avalue - root->bvalue;
    root->ivalue2 = parent->ivalue2 - parent->bvalue + root->avalue ;
  }

  min_node=root;
  if(child1!=NULL){
    temp_node = calculate_abvalue_from_node(child1);
    temp = temp_node->ivalue - min_node->ivalue;
    if(temp<0){
      min_node = temp_node;
    }else if (temp==0){
      if(temp_node->ivalue2 < min_node->ivalue2)
        min_node = temp_node;
    }
  }
  if(child2!=NULL){
    temp_node = calculate_abvalue_from_node(child2);
    temp = temp_node->ivalue - min_node->ivalue;
    if(temp<0){
      min_node = temp_node;
    }else if (temp==0){
      if(temp_node->ivalue2 < min_node->ivalue2)
        min_node = temp_node;
    }
  }
  return min_node;
}

void familytree::add_tree(char *input, int seq_id){
  int i,j,k,pos,len;
  phylonode *new_node, *new_leaf, *ins, *par;
  marker *new_marker;
  marker *current1,*current2,*current3;

  if(par_root.child1!=NULL)
    InputNewSequence(input);

  if(par_root.child1==NULL){  // Empty tree
    InitializeCombRoot(input);
    new_node = new phylonode(seq_id);
    join_tree(new_node,NULL,LEFT);
    par_root.child1=new_node;
    table[seq_id]=new_node;
  }
  else if(par_root.child1->child1==NULL){  // One-node tree
    new_node = new phylonode(seq_id);
    join_tree(new_node,par_root.child1,LEFT);

    current1=NULL;
    for(i=LengthSeqComb-1;i>=0;i--){
      if(SeqNew[i]!=SeqRoot[i]){
        new_marker = new marker(loci_id[i],SeqRoot[i],SeqNew[i],current1);
        current1=new_marker;
      }
    }
    new_node->markers->m_pNext=current1;
    table[seq_id]=new_node;
  }
  else{  // The tree consists of at least an edge.
    // Determine where to insert
    ins=calculate_abvalue_from_node(par_root.child1);
    if(ins==par_root.child1)
      ins=par_root.child1->child1;
    par=ins->parent;

    get_sequence(ins,seqP);
    get_sequence(par,seqQ);

    new_node = new phylonode(-1);
    new_leaf = new phylonode(seq_id);

    if(par->child1==ins){
      join_tree(new_node,par,LEFT);
      join_tree(ins,new_node,LEFT);
      join_tree(new_leaf,new_node,RIGHT);
    }
    if(par->child2==ins){
      join_tree(new_node,par,RIGHT);
      join_tree(ins,new_node,RIGHT);
      join_tree(new_leaf,new_node,LEFT);
    }
    
    current1=current2=current3=NULL;

    for(i=LengthSeqComb-1;i>=0;i--){
      if((seqP[i]==seqQ[i])&&(seqP[i]!=SeqNew[i])){
        new_marker = new marker(loci_id[i],seqP[i],SeqNew[i],current3);
        current3=new_marker;
      }
      if((seqP[i]!=seqQ[i])&&(seqP[i]==SeqNew[i])){
        new_marker = new marker(loci_id[i],seqQ[i],SeqNew[i],current2);
        current2=new_marker;
      }
      if((seqP[i]!=seqQ[i])&&(seqQ[i]==SeqNew[i])){
        new_marker = new marker(loci_id[i],seqQ[i],seqP[i],current1);
        current1=new_marker;
      }
      if((seqP[i]!=seqQ[i])&&(seqQ[i]!=SeqNew[i])&&(seqP[i]!=SeqNew[i])){
        new_marker = new marker(loci_id[i],'N',seqP[i],current1);
        current1=new_marker;
        new_marker = new marker(loci_id[i],seqQ[i],'N',current2);
        current2=new_marker;
        new_marker = new marker(loci_id[i],'N',SeqNew[i],current3);
        current3=new_marker;
      }
    }

    new_leaf->markers->m_pNext=current3;
    new_node->markers->m_pNext=current2;
    ins->free_markers();
    ins->markers->m_pNext=current1;
    table[seq_id]=new_leaf;
  }
  std::cout<<std::endl<<"Sequence "<<seq_id<<" is inserted";
  nseq ++;
}




/*******************************************************
     The following routines are used to
       count the number of markers in a node.
       Here, indel of k letters is considered as one marker.
 *******************************************************/

int familytree::count_one_node(phylonode *root){
  int count,j,blkin,blkdel;
  marker *current,*previous;
  blkin=0;
  blkdel=0;
  count=0;
  for(current=root->markers->m_pNext;current!=NULL;current=current->m_pNext){
    if(blkin!=0)
      if((current->ch_parent!='-')||(pos_id[current->loci]-pos_id[previous->loci]!=1)){
        blkin=0;
      }
    if(blkdel!=0)
      if((current->ch_child!='-')||(pos_id[current->loci]-pos_id[previous->loci]!=1)){
        blkdel=0;
      }

    if(current->ch_parent=='-'){
      if(blkin==0)
        count++;
      blkin++;
    }else if(current->ch_child=='-'){
      if(blkdel==0)
        count++;
      blkdel++;
    }else{
      count++;
    }
    previous = current;
  }
  return count;
}

void familytree::count_nodes(phylonode *root){
  root->n_markers = count_one_node(root);
  if(root->child1!=NULL)
    count_nodes(root->child1);
  if(root->child2!=NULL)
    count_nodes(root->child2);
}

void familytree::count_all_nodes(){
  count_nodes(par_root.child1);
}


/*******************************************************
     The following routine prints the
       combined sequence, root sequence
       and the newly inputted sequence.
 *******************************************************/

void familytree::print(){
  int i;
  printf("Sequence Ref :\n\n%s\n\n",SeqComb);
  std::cin.get();
  printf("Sequence Root:\n\n%s\n\n",SeqRoot);
  std::cin.get();
  printf("Sequence New :\n\n%s\n\n",SeqNew);
  std::cin.get();
  printf("Loci ID:\n\n");
  for(i=0;i<LengthSeqComb;i++)
    std::cout<<loci_id[i]<<" ";
  std::cout<<std::endl;
  std::cin.get();
  printf("Pos ID:\n\n");
  for(i=0;i<LengthSeqComb;i++)
    std::cout<<pos_id[i]<<" ";
  std::cout<<std::endl;
  std::cin.get();
}


/*******************************************************
     Print the dendrogram to the output file
 *******************************************************/

void familytree::print_one_marker(FILE *fp, int loci){
  int loci1, loci2;
  loci1 = loci_CRS[pos_id[loci]];
  loci2 = loci_CRS2[pos_id[loci]];
  fprintf(fp,"%d",loci1);
  if(loci2!=0)
    fprintf(fp,".%d",loci2);
}

void familytree::print_one_node(FILE *fp, phylonode *root){
  int j,blkin,blkdel;
  marker *current,*previous=NULL;

  blkin=0;
  blkdel=0;
  for(current=root->markers->m_pNext;current!=NULL;current=current->m_pNext){
    if(blkin!=0){
      if((current->ch_parent!='-')||(pos_id[current->loci]-pos_id[previous->loci]!=1)){
        fprintf(fp,"-");
        print_one_marker(fp,previous->loci);
        fprintf(fp," %d",blkin);
        fprintf(fp," bp insertion, ");
        blkin=0;
      }
    }else if(blkdel!=0){
      if((current->ch_child!='-')||(pos_id[current->loci]-pos_id[previous->loci]!=1)){
        fprintf(fp,"-");
        print_one_marker(fp,previous->loci);
        fprintf(fp," %d",blkdel);
        fprintf(fp," bp deletion, ");
        blkdel=0;
      }
    }

    if(current->ch_parent=='-'){
      if(blkin==0)
        print_one_marker(fp,current->loci);
      blkin++;
    }else if(current->ch_child=='-'){
      if(blkdel==0)
        print_one_marker(fp,current->loci);
      blkdel++;
    }else{
      print_one_marker(fp,current->loci);
      fprintf(fp," %c%c, ",current->ch_parent,current->ch_child);
    }
    previous = current;
  }

  if(blkin!=0){
    fprintf(fp,"-");
    print_one_marker(fp,previous->loci);
    fprintf(fp," %d",blkin);
    fprintf(fp," bp insertion, ");
  }else if(blkdel!=0){
    fprintf(fp,"-");
    print_one_marker(fp,previous->loci);
    fprintf(fp," %d",blkdel);
    fprintf(fp," bp deletion, ");
  }
}

void familytree::print_tree_from_node(FILE *fp, phylonode *root, int depth){
  int i,j,len;
  phylonode *parent,*child1,*child2;
  char la[7],label[5]=" ---";

  if(root!=NULL){
    child1=root->child1;
    child2=root->child2;

    if((root->n_markers!=0)||(root->seq_id!=-1)){
      indentation[depth+1]=false;
      if(child1!=NULL){
        print_tree_from_node(fp,child1,depth+1);
        indentation[depth+1]=true;
      }
      if(child2!=NULL){
        print_tree_from_node(fp,child2,depth+1);
        indentation[depth+1]=true;
      }
     
      for(j=0;j<depth;j++){
        if(indentation[j])
          fprintf(fp,"|   ");
        else
          fprintf(fp,"    ");
      }
      strcpy(label," ---");
      if(root->seq_id!=-1){
        itoa(root->seq_id,la,10);
	//sprintf(la,"%d",root->seq_id);
        len=strlen(la);
        for(j=0;j<len;j++)
          label[4-len+j]=la[j];
      }
      if(root->n_markers!=0){
        fprintf(fp,"%s (%d mutations): ",label,root->n_markers);
        print_one_node(fp,root);
        fprintf(fp,"\n");
      }else
        fprintf(fp,"%s\n",label);

      for(j=0;j<depth;j++){
        if(indentation[j])
          fprintf(fp,"|   ");
        else
          fprintf(fp,"    ");
      }
      fprintf(fp,"|\n");

    }else{
      if(child1!=NULL){
        print_tree_from_node(fp,child1,depth);
        indentation[depth]=true;
      }
      if(child2!=NULL){
        print_tree_from_node(fp,child2,depth);
        indentation[depth]=true;
      }
    }
  }
}

void familytree::print_tree_to_file(char *filename){
  FILE *fp;

  fp = fopen(filename,"w");
  indentation[0]=false;
  count_all_nodes();
  print_tree_from_node(fp,par_root.child1,0);
  fclose(fp);
}

/*******************************************************
          Save the results of alignment to the
            DNAData structure.
 *******************************************************/

void familytree::SaveAlignedSeq(DNAdata *my_aligned_data){
  int i;
  char label[7];

  for(i=0;i<nseq;i++){
    if(table[i]!=NULL){
      get_sequence(table[i],seqP);
      itoa(i,label,10);
      //sprintf(label,"%d",i);
      my_aligned_data->AddSequence(seqP,label);
    }
  }
}


/*******************************************************
     Save the tree in Newick format.
 *******************************************************/

void familytree::save_tree_from_node(FILE *fp, phylonode *mynode){
  marker *current;
  int loci1;
  char indel;
  
  if((mynode->child1!=NULL)||(mynode->child2!=NULL)){
    fprintf(fp,"(");
    if(mynode->child1!=NULL)
      save_tree_from_node(fp,mynode->child1);
    if(mynode->child2!=NULL){
      fprintf(fp,",");
      save_tree_from_node(fp,mynode->child2);
    }
    fprintf(fp,")");
  }
  fprintf(fp,"%d:%d",mynode->seq_id,mynode->n_markers);
  for(current=mynode->markers->m_pNext;current!=NULL;current=current->m_pNext){
    loci1 = current->loci;
    if(current->ch_parent=='-')
      indel = 'i';
    else if(current->ch_child=='-')
      indel = 'd';
    else
      indel = ' ';

    if(indel!=' ')
      fprintf(fp,"<%d%c>%c/%c",loci1,indel,current->ch_parent,current->ch_child);
    else
      fprintf(fp,"<%d>%c/%c",loci1,current->ch_parent,current->ch_child);
  }
}

void familytree::save_tree(char *filename){
  FILE *fp;
  int i, prev_id, count_len, count_empty;

  fp = fopen(filename,"w");

  // Save the number of nodes, length of the combined sequence, and the number of blank space (related to indel) inserted to the root sequence.
  count_empty = (SeqRoot[0]=='-'?1:0);
  for(i=1;i<LengthSeqComb;i++){
    if(SeqRoot[i]=='-')
      count_empty ++;
  }
  fprintf(fp,"%d %d %d\n",2*nseq-2,LengthSeqComb,count_empty);

  // Save the tree topology in Newick format.
  save_tree_from_node(fp,par_root.child1);
  fprintf(fp,";\n");

  // Save the piecetable for reconstructing the site-absolute correspondence.
  prev_id = loci_id[0];
  count_len = 1;
  for(i=1;i<LengthSeqComb;i++){
    if(loci_id[i]==prev_id+count_len)
      count_len ++;
    else{
      fprintf(fp,"%d %d\n",prev_id,count_len);
      prev_id = loci_id[i];
      count_len = 1;
    }
  }
  fprintf(fp,"%d %d\n-1 0\n",prev_id,count_len);
  
  // Save the combined sequence.
  fprintf(fp,"%s\n>\n",SeqComb);

  // Save the root sequence.
  fprintf(fp,"%s\n",SeqRoot);

  fclose(fp);
}

/*******************************************************
     Load a tree in Newick format from file.
 *******************************************************/

void familytree::load_tree(char *filename){
  int i,n_nodes,count_empty,len_root;
  int count, start, len;
  char *string, *p;
  FILE *fp;
  bool bFinishedComb;

  fp = fopen(filename,"r");

  // Read the number of nodes, length of the combined sequence, and the number of blank space (related to indel) inserted to the root sequence.
  fscanf(fp,"%d %d %d ",&n_nodes,&LengthSeqComb,&count_empty);
 
  // Reconstruct the tree from the Newick representation.
  string = (char *)malloc((unsigned) MAXL*sizeof(char));
  string[0]='\0';
  load_topology(fp,string);
  
  // Reconstruct the site-absolute correspondence from the piecetable.
  count = 0;
  for(;;){
    fscanf(fp,"%d %d ",&start,&len);
    if(start!=-1){
      for(i=0;i<len;i++){
        loci_id[count+i] = start+i;
        pos_id[start+i] = count+i;
      }
      count += len;
    }else
      break;
  }

  // Read the combined sequence.
  SeqComb[0] = '\0';
  bFinishedComb = false;
  while((!bFinishedComb)&&( fgets(string, MAXL, fp) != NULL )){
    if(string[0]=='>')
      bFinishedComb = true;
    else{
      if(((p=strchr(string,10))!=NULL)||((p=strchr(string,0))!=NULL)){
        len = p-string;
        strncat(SeqComb,string,len);
      }
    }
  }

  // Read the root sequence.
  SeqRoot[0] = '\0';
  while( fgets(string, MAXL, fp) != NULL ){
    if(((p=strchr(string,10))!=NULL)||((p=strchr(string,0))!=NULL)){
      len = p-string;
      strncat(SeqRoot,string,len);
    }
  }

  free(string);
  fclose(fp);
}


void familytree::load_topology(FILE *fp, char *textline){
  int count,seq_id,len,stack_head,remained_length;
  phylonode **node_stack;
  phylonode *temp;
  marker *prev;
  char *token,*pch,*pch1;
  char delim;

  node_stack =(phylonode**)malloc((unsigned) MAXK*sizeof(phylonode*));
  token = (char *)malloc((unsigned) MAXL*sizeof(char));
  count = 0;
  stack_head = -1;
  remained_length = strlen(textline);
  
  while(fgets(textline+remained_length, MAXL-remained_length, fp)!=NULL){
    pch = textline-1;
    pch1 = strpbrk(textline,"():,;<>/");
    while(pch1!=NULL){
      len = pch1-pch-1;
      if(len!=0){
        strncpy(token, pch+1,len);
        token[len]='\0';
        if((delim=='(')||(delim==',')){ // Add a new node on the tree.
          seq_id = atoi(token);
          stack_head++;
          node_stack[stack_head] = new phylonode(seq_id);
          prev = node_stack[stack_head]->markers;
          if(seq_id!=-1){
            count ++;
            table[seq_id] = node_stack[stack_head];
          }
        }else if(delim==':')
          node_stack[stack_head]->n_markers = atoi(token);
        else if(delim=='<')
          prev->m_pNext = new marker(atoi(token),0,0,NULL);
        else if(delim=='>')
          prev->m_pNext->ch_parent = token[0];
        else if(delim=='/'){
          prev->m_pNext->ch_child = token[0];
          prev = prev->m_pNext;
        }else if(delim==')'){
          seq_id = atoi(token);
          node_stack[stack_head]->seq_id = seq_id;
          prev = node_stack[stack_head]->markers;
          if(seq_id!=-1){
            table[seq_id] = node_stack[stack_head];
            count ++;
            // Remove the nodes with order greater than two by inserting empty nodes
            if(stack_head!=0){
              if((node_stack[stack_head]->child1!=NULL)&&(node_stack[stack_head]->child2==NULL)){
                temp = new phylonode(seq_id);
                temp->n_markers = 0;
                table[seq_id] = temp;
                node_stack[stack_head]->seq_id = -1;
                join_tree(temp,node_stack[stack_head],RIGHT);
              }
            }
          }
        }
      }
      delim = pch1[0];
      if(delim=='('){
        stack_head++;
        node_stack[stack_head] = new phylonode(-1);
      }else if((delim==',')||(delim==')')){
        if(node_stack[stack_head-1]->child1==NULL) // The parent has no child
          join_tree(node_stack[stack_head],node_stack[stack_head-1],LEFT);
        else if(node_stack[stack_head-1]->child2==NULL)  // The parent has already got one child
          join_tree(node_stack[stack_head],node_stack[stack_head-1],RIGHT);
        else{  // The parent has already got two children
          temp = new phylonode(-1);
          temp->n_markers = 0;
          join_tree(node_stack[stack_head-1]->child1,temp,LEFT);
          join_tree(temp,node_stack[stack_head-1],LEFT);
          join_tree(node_stack[stack_head],temp,RIGHT);
        }
        stack_head --;
      }
      pch = pch1;
      pch1 = strpbrk(pch+1,"():,;<>/");
      if(delim==';') break;
    }
    strcpy(textline,pch+1);
    remained_length = strlen(textline);
    if(delim==';') break;
  }
  join_tree(node_stack[0],NULL,LEFT);
  par_root.child1 = node_stack[0];
  nseq = count;
  free(token);
  free(node_stack);
}


#endif
