Additionnal file 1

Outlier and PCA on annotated unique probe dataset.

pacman::p_load(dplyr,
               tibble,
               ggplot2,
               dendextend,
               settings,
               FactoMineR,
               factoextra,
               ggpubr)

Outlier

load(file = "RData/data_TT.RData")

Hierarchical classification with 1-corr distance matrix

L <- "GPS24914"
hc <- hclust(as.dist(1 - cor(t(working_set_TT))), method = "ward.D2")
clust_corr <- as.dendrogram(hc)
par(mar = c(10,4,3,1))
clust_corr %>% 
  set("by_lists_branches_col", L, TF_value = "red") %>% 
  set("leaves_cex", 2) %>%
  color_labels(col = c("red"), labels = labels(.)[labels(.)=="GPS24914"]) %>% 
  ladderize(TRUE) %>% 
  plot(axes = F,
       cex = 2,
       main = "Hierarchical clustering with (1-corr) distance and Ward algorithm")
## reset par options
reset_par()

nMDS with (1-corr) distance matrix**

## nMDS with "1-cor" distance
set.seed(123)
tmp <- vegan::metaMDS(as.dist(1-cor(t(working_set_TT))), k = 2, maxit = 100)
Run 0 stress 0.1030165 
Run 1 stress 0.106524 
Run 2 stress 0.1074274 
Run 3 stress 0.1074881 
Run 4 stress 0.1089342 
Run 5 stress 0.1106128 
Run 6 stress 0.104248 
Run 7 stress 0.103058 
... Procrustes: rmse 0.006226324  max resid 0.02790997 
Run 8 stress 0.1085403 
Run 9 stress 0.1038872 
Run 10 stress 0.1047714 
Run 11 stress 0.1029835 
... New best solution
... Procrustes: rmse 0.006748053  max resid 0.02406572 
Run 12 stress 0.1036047 
Run 13 stress 0.1033931 
... Procrustes: rmse 0.02963116  max resid 0.08950939 
Run 14 stress 0.1032798 
... Procrustes: rmse 0.02395133  max resid 0.07011736 
Run 15 stress 0.103278 
... Procrustes: rmse 0.02433447  max resid 0.07724788 
Run 16 stress 0.1074629 
Run 17 stress 0.1085151 
Run 18 stress 0.1030281 
... Procrustes: rmse 0.0132038  max resid 0.0468257 
Run 19 stress 0.1029785 
... New best solution
... Procrustes: rmse 0.002907277  max resid 0.00843367 
... Similar to previous best
Run 20 stress 0.1033395 
... Procrustes: rmse 0.03002822  max resid 0.08943733 
*** Best solution repeated 1 times
tmp2 <- rownames_to_column(data.frame(tmp$points[,1:2]))
ggplot(data.frame(tmp2), aes(x = MDS1, y = MDS2)) +
  geom_point(aes(col = meta_data_TT$age), size = 4.5) + 
  geom_text(aes(label = ifelse(rownames(meta_data_TT) == "GPS24914", "GPS24914", '')),
            hjust = -.3,
            vjust = .6,
            colour = "black",
            cex = 3) +
  labs(title = "nMDS with (1-corr) distance", subtitle = paste0("Stress = ", round(tmp$stress, 3))) +
  labs(colour = "Age") +
  theme_bw() + 
  theme(axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_text(size = 12, color = "black"),
        axis.title.x = element_blank(),  
        axis.title.y = element_blank()) +
 # scale_color_grey() +
  theme(aspect.ratio = 1) +   ## square shape
  ggpubr::rremove("grid") -> p_nMDS_corr
p_nMDS_corr
ggsave(filename = "fig/nMDS_corr.png", plot = p_nMDS_corr, dpi=600, width = 6, height = 5)

PCA of unique annotated expressed probes according to age or treatment

## import data
load(file = "RData/data.RData")
load(file = "RData/data_DEG.RData")

## Compute PCA
work.pca <- PCA(working_set_fin, graph = FALSE)
df <- cbind(meta_data, work.pca$ind$coord)
df$treatment<-factor(df$treatment, levels=c("NF","FF", "FFab"))
## plot PCA for age
#png(filename = "fig/PCA_age_unique_probe.png", width = 600, height = 600)
df %>% 
  ggplot(aes(x = Dim.1, y = Dim.2, color = age)) +
  geom_point(size = 5) +
  stat_ellipse(size = 1) +
# coord_cartesian(xlim = c(-100, 100), ylim = c(-100, 100)) +
  coord_fixed(ratio = 1,xlim = c(-100, 100), ylim = c(-100, 100)) +
  labs(x = paste0("Dim 1 (", round(work.pca$eig[1, 2], 1), "%)"), 
       y = paste0("Dim 2 (", round(work.pca$eig[2, 2], 1), "%)")) +
  theme_classic() + #coord_fixed(ratio = 1) +
  theme(#legend.position = c(.88, .9),
        legend.background = element_rect(fill = "#FFFFFF", 
                                         size = 0.5, linetype="solid", 
                                         colour = "white"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.title = element_text(size = 15),
        panel.border = element_rect(colour = "black", fill = NA, size = .5),
        legend.title = element_text(size = 16),
        legend.text = element_text(size = 14)) +
  scale_color_discrete(name = "  Age")  -> p1

  #annotate("text", x = -190, y = 140, label = "A", size = 12)
#dev.off()
p1
ggsave("fig/PCA_age_unique_probe.tiff", p1, device='tiff', dpi=300, width = 6, height = 6)
## plot PCA for treatment
#png(filename = "fig/ACP Treatment 2.png", width = 600, height = 600)
p2<-df %>% 
  ggplot(aes(x = Dim.1, y = Dim.2, color = treatment)) +
  geom_point(size = 5) +
  stat_ellipse(size = 1) +
  #coord_cartesian(xlim = c(-100, 100), ylim = c(-100, 100)) +
  coord_fixed(ratio = 1,xlim = c(-100, 100), ylim = c(-100, 100)) +
  labs(x = paste0("Dim 1 (", round(work.pca$eig[1, 2], 1), "%)"), 
       y = paste0("Dim 2 (", round(work.pca$eig[2, 2], 1), "%)")) +
  theme_classic() + #coord_fixed(ratio = 1) +
  theme(#legend.position = c(.88, .9),
        legend.background = element_rect(fill = "#FFFFFF", 
                                         size = 0.5, linetype="solid", 
                                         colour = "white"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.title = element_text(size = 15),
        panel.border = element_rect(colour = "black", fill = NA, size = .5),
        legend.title = element_text(size = 16),
        legend.text = element_text(size = 14)) +
  scale_color_discrete(name = "  Treatment")#+
  #annotate("text", x = -190, y = 150, label = "B", size = 12) -> p2

p2
ggsave("fig/PCA_treatment_unique_probe.tiff", p2, device='tiff', dpi=300,width = 7, height = 7)
png(filename = "fig/ACP.png", width = 1400, height = 600)
gridExtra::grid.arrange(p1, p2, nrow = 1, newpage = TRUE)
dev.off()
png 
  2