
################################## randomly select 100 yeast cell-cycle time courses
##################################
load("./AllPombeData_MedianNormalized_AfterAveragingReplicates_Working.Rdata")
A=Y[,253:303]
dim(A)
A1=na.omit(A)
dim(A1)  #3680
time=Time[253:303]
#set.seed(1)
a=sample(1:dim(A1)[1],100)
A2=A1[a,]     
#write.csv(A2,"A2-100.csv")

library(splines)
k1=c()
for (i in 1:100) {
  b=A2[i,]
  BIC=c()
  for (k in 3:20) {
    fm <- lm(b ~ bs(time,df=k))
    BIC=c(BIC,BIC(fm))
  }
  k1=c(k1,which.min(BIC))
}
k=matrix(0,100,2)
k[,1]=a   
k[,2]=k1   
#write.csv(k,"k-100.csv")
rm(a,A,b,BIC,ColNames,ExpIndex,ExpNames,fm,i,k1,RowNames,Time,WorkingTimeCourseIndicator,Y)

k2 <- k
rm(k)
fit1=list()
for (i in 1:100) {
  f=as.numeric(A2[i,])
  fm=lm(f~bs(time,df=k2[i,2]+2))
  fit1[[i]]=fm
}
rm(f,i,fm)
#save.image("0705.RData")

A_test2 <- A1[-k2[,1],]


########################## generate 100 pairs of time courses (xx100, yy100)
##########################
xx100 <- matrix(NA, 100, 51) #tiao_data_no_noise
yy100 <- matrix(NA, 100, 49) #beitiao_data_noise
parameter <- matrix(NA, 100, 5)

for(i in 1:100){
  
  #set.seed(i)
  mu_k <- parameter[i,1] <- runif(1,0,0.35)  ##
  tau <- parameter[i,2] <- runif(1,0,20)    ##
  B <- matrix(0,3,1)
  B[1,1] <- parameter[i,3] <- runif(1,-10,10)  ##
  B[2,1] <- parameter[i,4] <- runif(1,-10,10)   ##
  B[3,1] <- parameter[i,5] <- runif(1,-10,10)   ##

  X_i=matrix(0,49,3)
  X_i[1,1]=1
  X_i[1,2]=1
  X_i[1,3]=0
  for (j in 2:49){
    X_i[j,1]=1
    X_i[j,2]=exp(-10*(j-1)*mu_k)
    delta=seq(35+10*(j-2),35+10*(j-1),0.01)
    jia=sum(predict(fit1[[i]],data.frame(time=delta[-1]-tau))*exp(mu_k*(delta[-1]-35))*0.01)
    X_i[j,3]=exp(-10*mu_k)*(X_i[j-1,3]+exp(-10*(j-2)*mu_k)*jia)
  }

  Y_k=c(X_i%*%B) 
  X_k=as.numeric(fitted(fit1[[i]]))
  
  xx100[i, ] <- X_k
  yy100[i, ] <- Y_k
 
}
rm(B,delta,i,j,jia,mu_k,tau,X_i,X_k,Y_k)
#write.csv(xx100, file = "xx100_no_noise.csv")
#write.csv(yy100, file = "yy100_no_noise.csv")
#write.csv(parameter, file = "parameter.csv")



rm(A2,fit1,parameter,time)
############################ generate dataset for traing and testing 1 
############################
xx100_no_noise <- xx100
yy100_no_noise <- yy100
rm(xx100, yy100)

XL1 <- matrix(NA, 1000, 51)
XL2 <- matrix(NA, 1000, 51)
XL3 <- matrix(NA, 1000, 51)
YL1 <- matrix(NA, 1000, 49)
YL2 <- matrix(NA, 1000, 49)
YL3 <- matrix(NA, 1000, 49)

#set.seed(13:5)
for(i in 1:100){
  X_k <- as.numeric(xx100_no_noise[i, ])
  Y_k <- as.numeric(yy100_no_noise[i, ])
  
  for (j in 1:10){
    XL1[(i-1)*10+j, ] <- X_k + rnorm(51, 0, sqrt(var(X_k)/10))  #add low noise
    YL1[(i-1)*10+j, ] <- Y_k + rnorm(49, 0, sqrt(var(Y_k)/10))
    
    XL2[(i-1)*10+j, ] <- X_k + rnorm(51, 0, sqrt(var(X_k)/5))  #add medium noise
    YL2[(i-1)*10+j, ] <- Y_k + rnorm(49, 0, sqrt(var(Y_k)/5))
    
    XL3[(i-1)*10+j, ] <- X_k + rnorm(51, 0, sqrt(var(X_k)/1))  #add high noise
    YL3[(i-1)*10+j, ] <- Y_k + rnorm(49, 0, sqrt(var(Y_k)/1))
  }
}
rm(i,j,X_k,Y_k)


no_reg_L1 <- matrix(NA, 1000, 2)
no_reg_L2 <- matrix(NA, 1000, 2)
no_reg_L3 <- matrix(NA, 1000, 2)
#set.seed(2:23)
for (i in 1:100){
  for (j in 1:10){
    a <- (((i-1)*10+1):(i*10))
    no_reg_L1[(i-1)*10+j, 1] <- sample(c(1:1000)[-a], 1)
    no_reg_L2[(i-1)*10+j, 1] <- sample(c(1:1000)[-a], 1)
    no_reg_L3[(i-1)*10+j, 1] <- sample(c(1:1000)[-a], 1)
    no_reg_L1[(i-1)*10+j, 2] <- no_reg_L2[(i-1)*10+j, 2] <- no_reg_L3[(i-1)*10+j, 2] <- (i-1)*10+j
  }
}

rm(a,i,j)


library(splines)
time <- seq(15,515,10)
fitXL1 <- list()
for(i in 1:1000){
  f <- XL1[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitXL1[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)


fitXL2 <- list()
for(i in 1:1000){
  f <- XL2[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitXL2[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)


fitXL3 <- list()
for(i in 1:1000){
  f <- XL3[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitXL3[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)
save.image("./data_train&test1.RData")




rm(A1,fitXL1,fitXL2,fitXL3,no_reg_L1,no_reg_L2,no_reg_L3,XL1,XL2,XL3,xx100_no_noise,YL1,YL2,YL3,yy100_no_noise)
########################## generate dataset for testing 2
##########################
A_test <- A_test2
rm(A_test2)

#set.seed(3:09)
a <- sample(1:3580, 1193)
b <- sample(c(1:3580)[-a], 1193)
c <- c(1:3580)[-c(a,b)]

xx1 <- A_test[a,] ##original data
xx2 <- A_test[b,]
xx3 <- A_test[c,]
rm(a,b,c,A_test)

fitxx1 <- list()
for(i in 1:1193){
  f <- xx1[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitxx1[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)

fitxx2 <- list()
for(i in 1:1193){
  f <- xx2[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitxx2[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)

fitxx3 <- list()
for(i in 1:1194){
  f <- xx3[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitxx3[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)


xx1_no_noise <- matrix(NA, 1193, 51) #tiao_data_no_noise
xx2_no_noise <- matrix(NA, 1193, 51)
xx3_no_noise <- matrix(NA, 1194, 51)
yy1_no_noise <- matrix(NA, 1193, 49) #beitiao_data_no_noise
yy2_no_noise <- matrix(NA, 1193, 49)
yy3_no_noise <- matrix(NA, 1194, 49)
XL1 <- matrix(NA, 1193, 51)
XL2 <- matrix(NA, 1193, 51)
XL3 <- matrix(NA, 1194, 51)
YL1 <- matrix(NA, 1193, 49)
YL2 <- matrix(NA, 1193, 49)
YL3 <- matrix(NA, 1194, 49)
parameter1 <- matrix(NA, 1193, 5)
parameter2 <- matrix(NA, 1193, 5)
parameter3 <- matrix(NA, 1194, 5)
colnames(parameter1) <- c("mu", "tau", "C", "D", "beta")
colnames(parameter2) <- c("mu", "tau", "C", "D", "beta")
colnames(parameter3) <- c("mu", "tau", "C", "D", "beta")

#set.seed(3:42)
for(i in 1:1193){
  mu_k <- parameter1[i,1] <- runif(1,0,0.35)  ##
  tau <- parameter1[i,2] <- runif(1,0,20)    ##
  B <- matrix(0,3,1)
  B[1,1] <- parameter1[i,3] <- runif(1,-10,10)  ##
  B[2,1] <- parameter1[i,4] <- runif(1,-10,10)   ##
  B[3,1] <- parameter1[i,5] <- runif(1,-10,10)   ##
  
  X_i=matrix(0,49,3)
  X_i[1,1]=1
  X_i[1,2]=1
  X_i[1,3]=0
  for (j in 2:49){
    X_i[j,1]=1
    X_i[j,2]=exp(-10*(j-1)*mu_k)
    delta=seq(35+10*(j-2),35+10*(j-1),0.01)
    jia=sum(predict(fitxx1[[i]],data.frame(time=delta[-1]-tau))*exp(mu_k*(delta[-1]-35))*0.01)
    X_i[j,3]=exp(-10*mu_k)*(X_i[j-1,3]+exp(-10*(j-2)*mu_k)*jia)
  }
  
  Y_k=c(X_i%*%B) 
  X_k=as.numeric(fitted(fitxx1[[i]]))
  
  xx1_no_noise[i, ] <- X_k
  yy1_no_noise[i, ] <- Y_k
  
  XL1[i, ] <- X_k + rnorm(51, 0, sqrt(var(X_k)/10))
  YL1[i, ] <- Y_k + rnorm(49, 0, sqrt(var(Y_k)/10))
}
rm(B,delta,i,j,jia,mu_k,tau,X_i,X_k,Y_k)



for(i in 1:1193){
  mu_k <- parameter2[i,1] <- runif(1,0,0.35)  ##
  tau <- parameter2[i,2] <- runif(1,0,20)    ##
  B <- matrix(0,3,1)
  B[1,1] <- parameter2[i,3] <- runif(1,-10,10)  ##
  B[2,1] <- parameter2[i,4] <- runif(1,-10,10)   ##
  B[3,1] <- parameter2[i,5] <- runif(1,-10,10)   ##
  
  X_i=matrix(0,49,3)
  X_i[1,1]=1
  X_i[1,2]=1
  X_i[1,3]=0
  for (j in 2:49){
    X_i[j,1]=1
    X_i[j,2]=exp(-10*(j-1)*mu_k)
    delta=seq(35+10*(j-2),35+10*(j-1),0.01)
    jia=sum(predict(fitxx2[[i]],data.frame(time=delta[-1]-tau))*exp(mu_k*(delta[-1]-35))*0.01)
    X_i[j,3]=exp(-10*mu_k)*(X_i[j-1,3]+exp(-10*(j-2)*mu_k)*jia)
  }
  
  Y_k=c(X_i%*%B) 
  X_k=as.numeric(fitted(fitxx2[[i]]))
  
  xx2_no_noise[i, ] <- X_k
  yy2_no_noise[i, ] <- Y_k
  
  XL2[i, ] <- X_k + rnorm(51, 0, sqrt(var(X_k)/5))
  YL2[i, ] <- Y_k + rnorm(49, 0, sqrt(var(Y_k)/5))
}
rm(B,delta,i,j,jia,mu_k,tau,X_i,X_k,Y_k)



for(i in 1:1194){
  mu_k <- parameter3[i,1] <- runif(1,0,0.35)  ##
  tau <- parameter3[i,2] <- runif(1,0,20)    ##
  B <- matrix(0,3,1)
  B[1,1] <- parameter3[i,3] <- runif(1,-10,10)  ##
  B[2,1] <- parameter3[i,4] <- runif(1,-10,10)   ##
  B[3,1] <- parameter3[i,5] <- runif(1,-10,10)   ##
  
  X_i=matrix(0,49,3)
  X_i[1,1]=1
  X_i[1,2]=1
  X_i[1,3]=0
  for (j in 2:49){
    X_i[j,1]=1
    X_i[j,2]=exp(-10*(j-1)*mu_k)
    delta=seq(35+10*(j-2),35+10*(j-1),0.01)
    jia=sum(predict(fitxx3[[i]],data.frame(time=delta[-1]-tau))*exp(mu_k*(delta[-1]-35))*0.01)
    X_i[j,3]=exp(-10*mu_k)*(X_i[j-1,3]+exp(-10*(j-2)*mu_k)*jia)
  }
  
  Y_k=c(X_i%*%B) 
  X_k=as.numeric(fitted(fitxx3[[i]]))
  
  xx3_no_noise[i, ] <- X_k
  yy3_no_noise[i, ] <- Y_k
  
  XL3[i, ] <- X_k + rnorm(51, 0, sqrt(var(X_k)/1))
  YL3[i, ] <- Y_k + rnorm(49, 0, sqrt(var(Y_k)/1))
}
rm(B,delta,i,j,jia,mu_k,tau,X_i,X_k,Y_k)



no_reg_L1 <- matrix(NA, 1193, 2)
no_reg_L2 <- matrix(NA, 1193, 2)
no_reg_L3 <- matrix(NA, 1194, 2)
for (i in 1:1193){
  no_reg_L1[i, 1] <- sample(c(1:1193)[-i], 1)
  no_reg_L1[i, 2] <- i
}
for (i in 1:1193){
  no_reg_L2[i, 1] <- sample(c(1:1193)[-i], 1)
  no_reg_L2[i, 2] <- i
}
for (i in 1:1194){
  no_reg_L3[i, 1] <- sample(c(1:1193)[-i], 1)
  no_reg_L3[i, 2] <- i
}


library(splines)

fitXL1 <- list()
for(i in 1:1193){
  f <- XL1[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitXL1[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)


fitXL2 <- list()
for(i in 1:1193){
  f <- XL2[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitXL2[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)


fitXL3 <- list()
for(i in 1:1194){
  f <- XL3[i, ]
  BIC <- c()
  for (k in 3:20){
    fm <- lm(f ~ bs(time, df = k))
    BIC <- c(BIC, BIC(fm))
  }
  k2 <- which.min(BIC)
  fm <- lm(f ~ bs(time, df = k2 + 2))
  fitXL3[[i]] <- fm
}
rm(BIC, f, fm, i, k ,k2)

rm(fitxx1,fitxx2,fitxx3,xx1,xx2,xx3,xx1_no_noise,xx2_no_noise,xx3_no_noise,yy1_no_noise, yy2_no_noise,yy3_no_noise)
save.image("./data_test2.RData")



################################################## real dataset
#################### find 8247 interactions in BioGRID
rm(list = ls())
bio = read.table('BIOGRID.txt',sep = '\t', fill = TRUE)
dim(bio)  ##62872*24
load("./AllPombeData_MedianNormalized_AfterAveragingReplicates_Working.Rdata")
rm(ColNames, ExpIndex, ExpNames, Time, WorkingTimeCourseIndicator)
A=Y[,253:303]
dim(A)
A1=na.omit(A)
dim(A1)  #3680
row_num <- which(rowSums(is.na(A))==0) ##
length(row_num)
row_id <- RowNames[row_num]  ##3680
name <- read.csv("./SPname.csv", header=FALSE)
name_A1 <- c()
for (i in 1:3680) {
  temp <- row_id[i]
  temp2 <- which(name == temp)
  temp_name <- as.character(name[temp2, 2])
  name_A1 <- c(name_A1, temp_name)
}
rm(temp, temp2, temp_name, i)
name_A1
length(name_A1)  ##3680
rm(A, A1)
result <- matrix(NA, 62874, 2)
for (i in 1:62874){
  a <- as.character(bio[i, 6])
  b <- which(name_A1 == a)
  if(length(b) != 0) result[i,1] <- b
  a1 <- as.character(bio[i, 7])
  b1 <- which(name_A1 == a1)
  if(length(b1) != 0) result[i,2] <- b1
}
rm(a, a1, b, b1, i)
result1 <- na.omit(result)
dim(result1)   ##8778*2
result1 <- result1[order(result1[,2]),]
result1 <- result1[order(result1[,1]),]
result2 <- unique(result1)
dim(result2) #8247*2
interactions_8247 <- result2
#write.csv(result, "interactions_8247.csv")  

############################################### produce 14720 false interactions
#interactions_8247 <- read.csv("interactions_8247.csv", row.names=1)
M <- interactions_8247
MM <- matrix(0, 3680, 3680)
for (i in 1:8247){
  temp <- M[i, 1]
  temp2 <- M[i, 2]
  MM[temp, temp2] <- 1
  MM[temp2, temp] <- 1
}
MM1 <- MM
for (s in 1:13){
  for (i in 1:3680){
    temp <- MM1[i, ]
    if(!all(temp == 0)){
      a <- which(temp == 1)
      for (j in 1:length(a)){
        b <- M[(M[, 1] == a[j]), 2]
        MM1[i, b] <- 1
      }
    }
  }
  print(sum(MM1))
  print(s)
}                  ######## needs long time
sum(MM1) ##1535005
all(MM1 == MM)
##write.csv(MM1, file = "reg_matrix_3680.csv")

MM2 <- matrix(NA, 3680*2, 2)
for (i in 1:3680) {
  temp_row <- which(MM1[i, ] == 0)
  temp_col <- which(MM1[, i] == 0)
  temp <- intersect(temp_row, temp_col)
  temp2 <- sample(temp, 2)
  MM2[(i-1)*2+1,1] <- i
  MM2[(i-1)*2+1,2] <- temp2[1]
  #MM2[(i-1)*2+2,1] <- i
  #MM2[(i-1)*2+2,2] <- temp2[2]
  MM2[(i-1)*2+2,1] <- temp2[2]
  MM2[(i-1)*2+2,2] <- i
  #MM2[(i-1)*2+4,1] <- temp2[4]
  #MM2[(i-1)*2+4,2] <- i
}
dim(MM2) #14720*2
#write.csv(MM2, file = "false_interactions_14720.csv")








