
###############################################################
#                                                             #           
#      Pivotal Allocation based Relabeling (PAR) algorithm    #
#       for multivariate data, optimal Z = MAP of Z           #
#                    Han Li                                   #
#         last revision in Oct 8, 2017                        #               
#                                                             # 
###############################################################

            
library(lpSolve)  #use its "lp.assign" function
library(mvtnorm)   #use its "dmvnorm" function
library(MCMCpack)    #use its "diwish" functions


#input the observation data and the MCMC samples of parameters

modelName="M3"
inputFile=paste("data/",modelName,".txt",sep="")
y=as.matrix(read.table(inputFile))        #input the observation data
n=nrow(y)     #the number of samples
d=ncol(y)    #the dimeansion of the observation data


fileStr=paste("MCMC_result/",modelName,"/",modelName,"_",sep="")  # input the MCMC samples of parameters

#!!!FORMAT: each row denotes the samples in one MCMC iteration, and the samples are stored in componentwise manner, for both the input and output files!!!

fileName=paste(fileStr,"weight.txt",sep="")    #component weight
postWeight=as.matrix(read.table(fileName))
K=ncol(postWeight)      #the number of clusters
IT=nrow(postWeight)     #the number of MCMC samples
postMu=array(0,dim=c(IT,K,d))           #component means
fileName=paste(fileStr,"mu.txt",sep="")
input=as.matrix(read.table(fileName))
for(k in 1:K){
    postMu[,k,]=input[,(d*(k-1)+1):(d*k)]
    }
postSigma2=array(0,dim=c(IT,K,d,d))     #component covariance matrix    
fileName=paste(fileStr,"sigma2.txt",sep="")
input=as.matrix(read.table(fileName))
for(it in 1:IT){
    for(k in 1:K){
        postSigma2[it,k,,]=matrix(input[it,(d^2*(k-1)+1):(d^2*k)],d,d)
        }
     }
fileName=paste(fileStr,"z.txt",sep="")      #cluster indicator
postZ=as.matrix(read.table(fileName))


#the prior parameters

kappa=10
nu=4
delta=1


Obj=postZ
OptPerm=matrix(0,IT,K)     #optimal permutaion of cluster labels for each sample
OptZ=rep(0,n)        #the cluster indicator of the sample that has the maximum posterior log-likelihood
randIter=seq(1,IT,20)    #retain every 20 samples to find the MAP sample
postLogL=rep(0,length(randIter))
cost=matrix(0,K,K)
mu0=apply(y,2,mean)
kappa=1/kappa
temp=y
for(w in 1:d){
    temp[,w]=y[,w]-mu0[w]
    }
psi=t(temp)%*%temp/(n-1)
epsilon=0.01
maxIter=200   #the maximum number of optimization steps
tempPermObj=Obj
transTable=matrix(0,K,K)
tempMatch=rep(0,K)
phi=matrix(0,K,K)       #transition matrix
matchMat=array(0,dim=c(IT,K,K))


#find OptZ

j=1
for(it in randIter){
    for(i in 1:n){
        k=postZ[it,i]
        postLogL[j]=postLogL[j]+dmvnorm(y[i,],mean=postMu[it,k,],sigma=postSigma2[it,k,,],log=TRUE)+log(postWeight[it,k])
        }
    for(k in 1:K){
        postLogL[j]=postLogL[j]+dmvnorm(postMu[it,k,],mean=mu0,sigma=psi,log=TRUE)
        postLogL[j]=postLogL[j]+log(diwish(postSigma2[it,k,,],nu,psi))
        }
    j=j+1
    }



MAPIt=randIter[which.max(postLogL)]
for(it in 1:IT){
    OptPerm[it,]=order(postWeight[it,])
    for(k in 1:K){
        tempPermObj[it,Obj[it,]==OptPerm[it,k]]=k
        }
    }
    

OptZ=tempPermObj[MAPIt,]

for(it in 1:IT){
    for(i in 1:K){
        for(j in 1:K){
            matchMat[it,i,j]=sum(OptZ==i & Obj[it,]==j)
            }
         }
     }



for(itr in 1:maxIter){

    #M step for transition matrix

    tempPhi=phi
    for(i in 1:n){
        for(k in 1:K){
            tempMatch[k]=sum(tempPermObj[,i]==k)
            }
        transTable[OptZ[i],]=transTable[OptZ[i],]+tempMatch
        }

    phi=(transTable+1)/(apply(transTable,1,sum)+K)
    if(max(abs(tempPhi-phi))<epsilon){break}
    logPhi=log(phi+0.000001)

    #M step for permutation

    for(it in 1:IT){


        for(i in 1:K){
            cost[i,]=apply(logPhi[,i]*matchMat[it,,],2,sum)
            }

        cost=-cost
        cost=round(cost-min(cost))
        OptPerm[it,]=apply(lp.assign(cost)$solution,1,which.max)
        for(k in 1:K){
            tempPermObj[it,Obj[it,]==OptPerm[it,k]]=k
            }
        }


    }




######################################################

#output the relabeled results


relabeledWeight=matrix(0,IT,K)
relabeledMu=array(0,dim=c(IT,K,d))
relabeledSigma2=array(0,dim=c(IT,K,d,d))
relabeledZ=matrix(0,IT,n)
temp=rep(0,n)


for(it in 1:IT){
    relabeledWeight[it,]=postWeight[it,OptPerm[it,]]
    relabeledMu[it,,]=postMu[it,OptPerm[it,],]
    relabeledSigma2[it,,,]=postSigma2[it,OptPerm[it,],,]
    for(k in 1:K){
        temp[postZ[it,]==OptPerm[it,k]]=k
        }
    relabeledZ[it,]=temp
    }



outputMu=matrix(0,IT,K*d)
for(k in 1:K){
    outputMu[,(d*(k-1)+1):(d*k)]=relabeledMu[,k,]
    }

outputSigma2=matrix(0,IT,K*d^2)
for(it in 1:IT){
    for(k in 1:K){
        outputSigma2[it,(d^2*(k-1)+1):(d^2*k)]=as.numeric(relabeledSigma2[it,k,,])
        }
    }


fileStr=paste("relabel_result/",modelName,"/",modelName,"_relabeled_",sep="")


fileName=paste(fileStr,"weight.txt",sep="")
write.table(relabeledWeight,fileName,col.names=F,row.names=F,quote=F,sep="\t")

fileName=paste(fileStr,"mu.txt",sep="")
write.table(outputMu,fileName,col.names=F,row.names=F,quote=F,sep="\t")

fileName=paste(fileStr,"sigma2.txt",sep="")
write.table(outputSigma2,fileName,col.names=F,row.names=F,quote=F,sep="\t")

fileName=paste(fileStr,"z.txt",sep="")
write.table(relabeledZ,fileName,col.names=F,row.names=F,quote=F,sep="\t")

