## Generate 3D and 2D (in barycentric coordinates) plots of clusterings and Principal Curves
## Copyright Huy Vo, Jonathan Dawes, and Robert Kelsh, 2022 - 2024


## Loading the required packages

if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

BiocManager::install("slingshot")
BiocManager::install("DelayedMatrixStats")

library(tidyverse)
library(RColorBrewer)
library(slingshot)
library(ggplot2)
library(plotly)
library(dplyr)
library(rgl)
library(TrajectoryUtils)
library(igraph)
library(gsubfn)


#################### Bubble sort function.

# function to sort the array using bubble sort
bubble_sort <- function(x)
{
  # calculate the length of array
  n <- length(x)
  arr <- c(1:n)
  # run loop n-1 times
  for (i in 1 : (n - 1)) {
    # run loop (n-i) times
    for (j in 1 : (n - i)) {
      # compare elements
      if (x[j] > x[j + 1]) {
        temp <- x[j]
        tempa <- arr[j]
        x[j] <- x[j + 1]
        arr[j] <- arr[j+1]
        x[j + 1] <- temp
        arr[j+1] <- tempa
      }
    }
  }
  return(arr)
}

## Set random seed.
set.seed(123)

######################### Main code starts here.

## Import data from csv file:
## Build filename
alpha <- 1.0         ## ODE parameter: 'twistyness' of trajectories
sigma <- 0.001       ## noise level
n <- 1               ## realisation number
## compare with MATLAB: num2str(alpha1),"s",num2str(sigma),"n",num2str(nn),".csv");

filename <- paste("C:/Users/jhpd20/Desktop/Huy Vo/sde_data/data_a", 
alpha, "s", sigma, "n", n,".csv", col="", sep="")

thedata <- read.csv(filename,header=FALSE)
coord <- thedata[c(2:4)]  ## coordinates
timme <- thedata[c(1)]    ## array of time points

## Log transformation
mydata <- log(coord)

#simp_mydata <-mydata[c(c(1:20),c(401:420),c(801:820)),]
## Plot the cluster map using kmeans:
k <- 40 # No of clusters
kme<-kmeans(mydata,k, iter.max=50,nstart = 50)
# Extracting the cluster array:
clus<-kme$cluster
## Average time vector

time_clus <- rowmean(timme,clus)
clus_order <- bubble_sort(time_clus)

step1 <- getLineages(mydata,clus,start.clus=clus_order[1])
 #Start clus is the first item as sorted by time
step2 <- getCurves(step1,shrink=0.5)
myplot <- as.SlingshotDataSet(step2)

plot3d.SlingshotDataSet(myplot, lwd = 3, col='blue')
plot3d(mydata[1:400,], col = clus[1:400], aspect = 'iso',size=3, add = TRUE)
plot3d(mydata[401:800,], col = clus[401:800], aspect = 'iso',size=4, add = TRUE)
plot3d(mydata[801:1200,], col = clus[801:1200], aspect = 'iso',size=5, add = TRUE)
#plot3d(mydata[1201:1600,], col = clus[1201:1600], aspect = 'iso',size=6, add = TRUE)
slingLineages(myplot)
centers <- rowmean(mydata, clus)
Ed <- rowSums(centers^2)
end_clus <- as.numeric(step1@metadata[["slingParams"]][["end.clus"]])
sum(Ed[end_clus]>8)





sum(time_clus[end_clus]>600)


############ Change dimension:
Bary_coord <- matrix(0,nrow=1200,ncol=4)
Bary_coord[,1] <- (mydata[,1]+mydata[,2]-2*mydata[,3])/sqrt(6)
Bary_coord[,2] <- (mydata[,2]-mydata[,1])/sqrt(2)
Bary_coord[,3] <- clus
Bary_coord[,4] <- rep(c(1,2,3),times=c(400,400,400))
Bary_coordx <- as.data.frame(Bary_coord)
colnames(Bary_coordx) <- c("y_1","y_2","cluster","trajectory")

new.myplot <- embedCurves(step2,Bary_coord[,c(1,2)])

p <- ggplot(Bary_coordx, aes(x = y_1, y = y_2)) +
  geom_point(aes(shape=as.character(trajectory),color=as.character(cluster)))+
  coord_fixed()
#  scale_color_manual(values=heat.colors(15))

p

curves <- slingCurves(new.myplot, as.df = TRUE)
colnames(curves) <- c("y_1","y_2","Order","Lineage")

plot_title <- paste("alpha=",alpha," sigma=",sigma," realisation #",n, col="", sep="")
x_label <- expression(y[1])
y_label <- expression(y[2])


opt = par(cex = 2.5) # Make everything a bit bigger

p + geom_path(data = curves %>% arrange(Order),
              aes(group = Lineage))+
  ggtitle(plot_title)+xlab(x_label)+ylab(y_label)+
  labs(color="clusters")+labs(shape="trajectories") + 
  geom_point(data=Bary_coordx %>% 
               group_by(cluster) %>% 
               summarise_at(vars(y_1,y_2), mean),
             size=5, shape=3)
