## Plot stacked bar charts, using data in format produced by code_4_labora.R
## Constructs the stacked bar charts shown in Figures 7,8 and 10.

## Copyright Huy Vo, Jonathan Dawes and Robert Kelsh, 2022 - 2024


# 1. Repair function
repair_mat <- function(mat){
  ls <- which(mat==-1,arr.ind=TRUE,useNames = FALSE);
  l <- length(ls[,1]);
  if (l!=0){
    for (i in 1:l){
      mat[ls[i,1],ls[i,2]] <- floor(mean(mat[,ls[i,2]]));
    }  
  }
  return(mat)
}
# 2. Reordering function
reord_mat <- function(mt){
  for (i in 1:36){
    mt[,i] <- sort(mt[,i]);
  }
  return(mt)
}
a <- reord_mat(a)

# 3. 
a1000s004_a <- reord_mat(repair_mat(no_mat_a1000s004))
a1000s004_b <- reord_mat(repair_mat(tp_mat_a1000s004))
a1000s004_c <- reord_mat(repair_mat(no_mat_a1000s004)-repair_mat(tp_mat_a1000s004))

a1000s002_a <- reord_mat(repair_mat(no_mat_a1000s002))
a1000s002_b <- reord_mat(repair_mat(tp_mat_a1000s002))
a1000s002_c <- reord_mat(repair_mat(no_mat_a1000s002)-repair_mat(tp_mat_a1000s002))

a1000s0015_a <- reord_mat(repair_mat(no_mat_a1000s0015))
a1000s0015_b <- reord_mat(repair_mat(tp_mat_a1000s0015))
a1000s0015_c <- reord_mat(repair_mat(no_mat_a1000s0015)-repair_mat(tp_mat_a1000s0015))

a1000s001_a <- reord_mat(repair_mat(no_mat_a1000s001))
a1000s001_b <- reord_mat(repair_mat(tp_mat_a1000s001))
a1000s001_c <- reord_mat(repair_mat(no_mat_a1000s001)-repair_mat(tp_mat_a1000s001))

a1000s0005_a <- reord_mat(repair_mat(no_mat_a1000s0005))
a1000s0005_b <- reord_mat(repair_mat(tp_mat_a1000s0005))
a1000s0005_c <- reord_mat(repair_mat(no_mat_a1000s0005)-repair_mat(tp_mat_a1000s0005))

a1000s0_a <- reord_mat(repair_mat(no_mat_a1000s0))
a1000s0_b <- reord_mat(repair_mat(tp_mat_a1000s0))
a1000s0_c <- reord_mat(repair_mat(no_mat_a1000s0)-repair_mat(tp_mat_a1000s0))
##
a100s004_a <- reord_mat(repair_mat(no_mat_a100s004))
a100s004_b <- reord_mat(repair_mat(tp_mat_a100s004))
a100s004_c <- reord_mat(repair_mat(no_mat_a100s004)-repair_mat(tp_mat_a100s004))

a100s002_a <- reord_mat(repair_mat(no_mat_a100s002))
a100s002_b <- reord_mat(repair_mat(tp_mat_a100s002))
a100s002_c <- reord_mat(repair_mat(no_mat_a100s002)-repair_mat(tp_mat_a100s002))

a100s0015_a <- reord_mat(repair_mat(no_mat_a100s0015))
a100s0015_b <- reord_mat(repair_mat(tp_mat_a100s0015))
a100s0015_c <- reord_mat(repair_mat(no_mat_a100s0015)-repair_mat(tp_mat_a100s0015))

a100s001_a <- reord_mat(repair_mat(no_mat_a100s001))
a100s001_b <- reord_mat(repair_mat(tp_mat_a100s001))
a100s001_c <- reord_mat(repair_mat(no_mat_a100s001)-repair_mat(tp_mat_a100s001))

a100s0005_a <- reord_mat(repair_mat(no_mat_a100s0005))
a100s0005_b <- reord_mat(repair_mat(tp_mat_a100s0005))
a100s0005_c <- reord_mat(repair_mat(no_mat_a100s0005)-repair_mat(tp_mat_a100s0005))

a100s0_a <- reord_mat(repair_mat(no_mat_a100s0))
a100s0_b <- reord_mat(repair_mat(tp_mat_a100s0))
a100s0_c <- reord_mat(repair_mat(no_mat_a100s0)-repair_mat(tp_mat_a100s0))
##
a10s004_a <- reord_mat(repair_mat(no_mat_a10s004))
a10s004_b <- reord_mat(repair_mat(tp_mat_a10s004))
a10s004_c <- reord_mat(repair_mat(no_mat_a10s004)-repair_mat(tp_mat_a10s004))

a10s002_a <- reord_mat(repair_mat(no_mat_a10s002))
a10s002_b <- reord_mat(repair_mat(tp_mat_a10s002))
a10s002_c <- reord_mat(repair_mat(no_mat_a10s002)-repair_mat(tp_mat_a10s002))

a10s0015_a <- reord_mat(repair_mat(no_mat_a10s0015))
a10s0015_b <- reord_mat(repair_mat(tp_mat_a10s0015))
a10s0015_c <- reord_mat(repair_mat(no_mat_a10s0015)-repair_mat(tp_mat_a10s0015))

a10s001_a <- reord_mat(repair_mat(no_mat_a10s001))
a10s001_b <- reord_mat(repair_mat(tp_mat_a10s001))
a10s001_c <- reord_mat(repair_mat(no_mat_a10s001)-repair_mat(tp_mat_a10s001))

a10s0005_a <- reord_mat(repair_mat(no_mat_a10s0005))
a10s0005_b <- reord_mat(repair_mat(tp_mat_a10s0005))
a10s0005_c <- reord_mat(repair_mat(no_mat_a10s0005)-repair_mat(tp_mat_a10s0005))

a10s0_a <- reord_mat(repair_mat(no_mat_a10s0))
a10s0_b <- reord_mat(repair_mat(tp_mat_a10s0))
a10s0_c <- reord_mat(repair_mat(no_mat_a10s0)-repair_mat(tp_mat_a10s0))
##
a1s004_a <- reord_mat(repair_mat(no_mat_a1s004))
a1s004_b <- reord_mat(repair_mat(tp_mat_a1s004))
a1s004_c <- reord_mat(repair_mat(no_mat_a1s004)-repair_mat(tp_mat_a1s004))

a1s002_a <- reord_mat(repair_mat(no_mat_a1s002))
a1s002_b <- reord_mat(repair_mat(tp_mat_a1s002))
a1s002_c <- reord_mat(repair_mat(no_mat_a1s002)-repair_mat(tp_mat_a1s002))

a1s0015_a <- reord_mat(repair_mat(no_mat_a1s0015))
a1s0015_b <- reord_mat(repair_mat(tp_mat_a1s0015))
a1s0015_c <- reord_mat(repair_mat(no_mat_a1s0015)-repair_mat(tp_mat_a1s0015))

a1s001_a <- reord_mat(repair_mat(no_mat_a1s001))
a1s001_b <- reord_mat(repair_mat(tp_mat_a1s001))
a1s001_c <- reord_mat(repair_mat(no_mat_a1s001)-repair_mat(tp_mat_a1s001))

a1s0005_a <- reord_mat(repair_mat(no_mat_a1s0005))
a1s0005_b <- reord_mat(repair_mat(tp_mat_a1s0005))
a1s0005_c <- reord_mat(repair_mat(no_mat_a1s0005)-repair_mat(tp_mat_a1s0005))

a1s0_a <- reord_mat(repair_mat(no_mat_a1s0))
a1s0_b <- reord_mat(repair_mat(tp_mat_a1s0))
a1s0_c <- reord_mat(repair_mat(no_mat_a1s0)-repair_mat(tp_mat_a1s0))
##

no_clus <- rep(c(5:40),each=500);
##
no_lineages <- as.vector(a1000s002_a)
df_a1000s002_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s002_b)
df_a1000s002_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s002_c)
df_a1000s002_c <- data.frame(no_lineages,no_clus);

no_lineages <- as.vector(a1000s001_a)
df_a1000s001_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s001_b)
df_a1000s001_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s001_c)
df_a1000s001_c <- data.frame(no_lineages,no_clus);

no_lineages <- as.vector(a1000s0_a)
df_a1000s0_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0_b)
df_a1000s0_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0_c)
df_a1000s0_c <- data.frame(no_lineages,no_clus);

no_lineages <- as.vector(a1000s0005_a)
df_a1000s0005_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0005_b)
df_a1000s0005_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0005_c)
df_a1000s0005_c <- data.frame(no_lineages,no_clus);
##
no_lineages <- as.vector(a1s002_a)
df_a1s002_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s002_b)
df_a1000s002_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s002_c)
df_a1000s002_c <- data.frame(no_lineages,no_clus);

no_lineages <- as.vector(a1000s001_a)
df_a1000s001_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s001_b)
df_a1000s001_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s001_c)
df_a1000s001_c <- data.frame(no_lineages,no_clus);

no_lineages <- as.vector(a1000s0_a)
df_a1000s0_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0_b)
df_a1000s0_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0_c)
df_a1000s0_c <- data.frame(no_lineages,no_clus);

no_lineages <- as.vector(a1000s0005_a)
df_a1000s0005_a <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0005_b)
df_a1000s0005_b <- data.frame(no_lineages,no_clus);
no_lineages <- as.vector(a1000s0005_c)
df_a1000s0005_c <- data.frame(no_lineages,no_clus);
##

no_lineages <- as.vector(a10s002_b-a10s001_b)
postt <- rep(c(1:500),36);
no_clus <- rep(c(5:40),each=500);
df <- data.frame(no_lineages,no_clus,postt);
#df<- df_a1000s002_a- df_a1000s0_a

# Normal stacked bar chart
ggplot(df %>% dplyr::count(no_clus, no_lineages) %>%    # Group by region and species (??), then count number in each group
         dplyr::mutate(pct=n/sum(n)),              # Calculate percent within each region
       aes(no_clus, n/500, fill=factor(as.character(no_lineages),
                                   levels=c("-4","-3","-2","-1","0","1","2","3","4","5","6","7","8","9",
                                            "10","11","12","13","14","15","16","17","18","19","20")))) +
  geom_bar(stat="identity") +
  scale_fill_manual(values=c("-4"="4","-3"="3","-2"="2","-1"="1","0"="#A6CEE3","1"="#1F78B4","2"="#B2DF8A","3"="#33A02C","4"="#FB9A99","5"="#E31A1C","6"="#FDBF6F",
                             "7"="#FF7F00","8"="#CAB2D6","9"="#6A3D9A","10"="#FFFF99","11"="#B15928","12"="#1B9E77","13"="#D95F02",
                             "14"="#7570B3","15"="#E7298A","16"="#66A61E","17"="#E6AB02","18"="#A6761D","19"="#666666","20"="#FF0049"))+
  ggtitle("Difference between alpha=10, sigma=0.02, and alpha=10, sigma=0.01, type A")+
  labs(fill="no lineages")+xlab("number of clusters")+ylab("proportion")

# Compare the "waves"
ggplot(df,aes(no_clus, postt, fill=as.character(no_lineages))) +
  geom_tile()+
  scale_fill_manual(values=c("-2"="2","-1"="1","0"="#A6CEE3","1"="#1F78B4","2"="#B2DF8A","3"="#33A02C","4"="#FB9A99","5"="#E31A1C","6"="#FDBF6F",
                             "7"="#FF7F00","8"="#CAB2D6","9"="#6A3D9A","10"="#FFFF99","11"="#B15928","12"="#1B9E77","13"="#D95F02",
                             "14"="#7570B3","15"="#E7298A","16"="#66A61E","17"="#E6AB02","18"="#A6761D","19"="#666666","20"="#FF0049"))+
  ggtitle("alpha=1, sigma=0.01")+
  labs(fill="no clus")

df <- df_a1000s002_a
ggplot(df,aes(no_clus,no_lineages)) +geom_bin2d(bins=100)+geom_smooth(level=0.95)+
  #  ggtitle("alpha=100, sigma=0.04") +
  xlab("no_clus") + ylab("no_lins")+
  geom_segment(aes(x = 10, y = 4, xend = 40, yend = 14))


ggplot(df, aes(x = no_clus, y = postt)) + 
  geom_raster(aes(fill=no_lineages))+
  theme(panel.grid.major = element_line(color = "red",
                                        size = 0.5,
                                        linetype = 2))
  
#  scale_fill_gradient(low="grey90", high="red") +
#  labs(x="letters", y="LETTERS", title="Matrix") +
#  theme_bw() + theme(axis.text.x=element_text(size=9, angle=0, vjust=0.3),
#                     axis.text.y=element_text(size=9),
#                     plot.title=element_text(size=11))

ggplot() +
  geom_tile(data = df, 
            aes(x = no_clus, y = postt, fill = no_lineages))

no_lineages_b <- as.vector(tp_mat_a100s002[,10])
no_lineages_c <- as.vector(no_mat_a100s002[,10]-tp_mat_a100s002[,10])
postt <- rep(c(1:1000),36);
no_clus <- rep(c(5:40),each=1000);
tf <- data.frame(no_lineages_b,no_lineages_c,no_clus,postt)

ggplot(tf,aes(x=no_lineages_b,y=no_lineages_c))+
  geom_bin2d(bins=50)

cor(no_lineages_b,no_lineages_c, method="pearson")

corr <- c(1:36);
for (i in 1:36){
  no_lineages_b <- as.vector(tp_mat_a1000s001[,i]);
  no_lineages_c <- as.vector(no_mat_a1000s001[,i]-tp_mat_a1000s001[,i]);
  corr[i] <-cor(no_lineages_b,no_lineages_c, method="pearson"); 
}

plot(c(5:40),corr)
# Type C:
diffehence <- c(1:16);

diffehence[5] <- sum(as.vector(a10s0_c[,c(32:36)]) > as.vector(a10s002_c[,c(32:36)]))-
                 sum(as.vector(a10s0_c[,c(32:36)]) < as.vector(a10s002_c[,c(32:36)])); 
diffehence[6] <- sum(as.vector(a10s0005_c[,c(27:36)]) > as.vector(a10s002_c[,c(27:36)]))-
  sum(as.vector(a10s0005_c[,c(27:36)]) < as.vector(a10s002_c[,c(27:36)]));
diffehence[7] <- sum(as.vector(a10s001_c[,c(27:36)]) > as.vector(a10s002_c[,c(27:36)]))-
  sum(as.vector(a10s001_c[,c(27:36)]) < as.vector(a10s002_c[,c(27:36)]));
diffehence[8] <- 0;

(a10s0_c-a10s002_c)[1000,]

diffehence <- c(1:16);
diffehence[1] <- sum(as.vector(a1s0_c[,c(32:36)]));
diffehence[2] <- sum(as.vector(a1s0005_c[,c(32:36)]));
diffehence[3] <- sum(as.vector(a1s001_c[,c(32:36)]));
diffehence[4] <- sum(as.vector(a1s002_c[,c(32:36)]));

diffehence[5] <- sum(as.vector(a10s0_c[,c(32:36)]));
diffehence[6] <- sum(as.vector(a10s0005_c[,c(32:36)]));
diffehence[7] <- sum(as.vector(a10s001_c[,c(32:36)]));
diffehence[8] <- sum(as.vector(a10s002_c[,c(32:36)]));

diffehence[9] <- sum(as.vector(a100s0_c[,c(32:36)]));
diffehence[10] <- sum(as.vector(a100s0005_c[,c(32:36)]));
diffehence[11] <- sum(as.vector(a100s001_c[,c(32:36)]));
diffehence[12] <- sum(as.vector(a100s002_c[,c(32:36)]));

diffehence[13] <- sum(as.vector(a1000s0_c[,c(32:36)]));
diffehence[14] <- sum(as.vector(a1000s0005_c[,c(32:36)]));
diffehence[15] <- sum(as.vector(a1000s001_c[,c(32:36)]));
diffehence[16] <- sum(as.vector(a1000s002_c[,c(32:36)]));

a=t(matrix(diffehence,ncol=4,nrow=4));
a

diffehence <- c(1:16);
diffehence[1] <- sum(as.vector(a1s0_b[,c(32:36)]));
diffehence[2] <- sum(as.vector(a1s0005_b[,c(32:36)]));
diffehence[3] <- sum(as.vector(a1s001_b[,c(32:36)]));
diffehence[4] <- sum(as.vector(a1s002_b[,c(32:36)]));

diffehence[5] <- sum(as.vector(a10s0_b[,c(32:36)]));
diffehence[6] <- sum(as.vector(a10s0005_b[,c(32:36)]));
diffehence[7] <- sum(as.vector(a10s001_b[,c(32:36)]));
diffehence[8] <- sum(as.vector(a10s002_b[,c(32:36)]));

diffehence[9] <- sum(as.vector(a100s0_b[,c(32:36)]));
diffehence[10] <- sum(as.vector(a100s0005_b[,c(32:36)]));
diffehence[11] <- sum(as.vector(a100s001_b[,c(32:36)]));
diffehence[12] <- sum(as.vector(a100s002_b[,c(32:36)]));

diffehence[13] <- sum(as.vector(a1000s0_b[,c(32:36)]));
diffehence[14] <- sum(as.vector(a1000s0005_b[,c(32:36)]));
diffehence[15] <- sum(as.vector(a1000s001_b[,c(32:36)]));
diffehence[16] <- sum(as.vector(a1000s002_b[,c(32:36)]));

a=t(matrix(diffehence,ncol=4,nrow=4));
a

diffehence <- c(1:16);
diffehence[1] <- sum(as.vector(a1s0_a[,c(32:36)]));
diffehence[2] <- sum(as.vector(a1s0005_a[,c(32:36)]));
diffehence[3] <- sum(as.vector(a1s001_a[,c(32:36)]));
diffehence[4] <- sum(as.vector(a1s002_a[,c(32:36)]));

diffehence[5] <- sum(as.vector(a10s0_a[,c(32:36)]));
diffehence[6] <- sum(as.vector(a10s0005_a[,c(32:36)]));
diffehence[7] <- sum(as.vector(a10s001_a[,c(32:36)]));
diffehence[8] <- sum(as.vector(a10s002_a[,c(32:36)]));

diffehence[9] <- sum(as.vector(a100s0_a[,c(32:36)]));
diffehence[10] <- sum(as.vector(a100s0005_a[,c(32:36)]));
diffehence[11] <- sum(as.vector(a100s001_a[,c(32:36)]));
diffehence[12] <- sum(as.vector(a100s002_a[,c(32:36)]));

diffehence[13] <- sum(as.vector(a1000s0_a[,c(32:36)]));
diffehence[14] <- sum(as.vector(a1000s0005_a[,c(32:36)]));
diffehence[15] <- sum(as.vector(a1000s001_a[,c(32:36)]));
diffehence[16] <- sum(as.vector(a1000s002_a[,c(32:36)]));

a=t(matrix(diffehence,ncol=4,nrow=4));
a