Outlier and PCA on annotated unique probe dataset.
pacman::p_load(dplyr,
tibble,
ggplot2,
dendextend,
settings,
FactoMineR,
factoextra,
ggpubr)
load(file = "RData/data_TT.RData")
L <- "GPS24914"
hc <- hclust(as.dist(1 - cor(t(working_set_TT))), method = "ward.D2")
clust_corr <- as.dendrogram(hc)
par(mar = c(10,4,3,1))
clust_corr %>%
set("by_lists_branches_col", L, TF_value = "red") %>%
set("leaves_cex", 2) %>%
color_labels(col = c("red"), labels = labels(.)[labels(.)=="GPS24914"]) %>%
ladderize(TRUE) %>%
plot(axes = F,
cex = 2,
main = "Hierarchical clustering with (1-corr) distance and Ward algorithm")
## reset par options
reset_par()
## nMDS with "1-cor" distance
set.seed(123)
tmp <- vegan::metaMDS(as.dist(1-cor(t(working_set_TT))), k = 2, maxit = 100)
Run 0 stress 0.1030165
Run 1 stress 0.106524
Run 2 stress 0.1074274
Run 3 stress 0.1074881
Run 4 stress 0.1089342
Run 5 stress 0.1106128
Run 6 stress 0.104248
Run 7 stress 0.103058
... Procrustes: rmse 0.006226324 max resid 0.02790997
Run 8 stress 0.1085403
Run 9 stress 0.1038872
Run 10 stress 0.1047714
Run 11 stress 0.1029835
... New best solution
... Procrustes: rmse 0.006748053 max resid 0.02406572
Run 12 stress 0.1036047
Run 13 stress 0.1033931
... Procrustes: rmse 0.02963116 max resid 0.08950939
Run 14 stress 0.1032798
... Procrustes: rmse 0.02395133 max resid 0.07011736
Run 15 stress 0.103278
... Procrustes: rmse 0.02433447 max resid 0.07724788
Run 16 stress 0.1074629
Run 17 stress 0.1085151
Run 18 stress 0.1030281
... Procrustes: rmse 0.0132038 max resid 0.0468257
Run 19 stress 0.1029785
... New best solution
... Procrustes: rmse 0.002907277 max resid 0.00843367
... Similar to previous best
Run 20 stress 0.1033395
... Procrustes: rmse 0.03002822 max resid 0.08943733
*** Best solution repeated 1 times
tmp2 <- rownames_to_column(data.frame(tmp$points[,1:2]))
ggplot(data.frame(tmp2), aes(x = MDS1, y = MDS2)) +
geom_point(aes(col = meta_data_TT$age), size = 4.5) +
geom_text(aes(label = ifelse(rownames(meta_data_TT) == "GPS24914", "GPS24914", '')),
hjust = -.3,
vjust = .6,
colour = "black",
cex = 3) +
labs(title = "nMDS with (1-corr) distance", subtitle = paste0("Stress = ", round(tmp$stress, 3))) +
labs(colour = "Age") +
theme_bw() +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_text(size = 12, color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
# scale_color_grey() +
theme(aspect.ratio = 1) + ## square shape
ggpubr::rremove("grid") -> p_nMDS_corr
p_nMDS_corr
ggsave(filename = "fig/nMDS_corr.png", plot = p_nMDS_corr, dpi=600, width = 6, height = 5)
## plot PCA for age
#png(filename = "fig/PCA_age_unique_probe.png", width = 600, height = 600)
df %>%
ggplot(aes(x = Dim.1, y = Dim.2, color = age)) +
geom_point(size = 5) +
stat_ellipse(size = 1) +
# coord_cartesian(xlim = c(-100, 100), ylim = c(-100, 100)) +
coord_fixed(ratio = 1,xlim = c(-100, 100), ylim = c(-100, 100)) +
labs(x = paste0("Dim 1 (", round(work.pca$eig[1, 2], 1), "%)"),
y = paste0("Dim 2 (", round(work.pca$eig[2, 2], 1), "%)")) +
theme_classic() + #coord_fixed(ratio = 1) +
theme(#legend.position = c(.88, .9),
legend.background = element_rect(fill = "#FFFFFF",
size = 0.5, linetype="solid",
colour = "white"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title = element_text(size = 15),
panel.border = element_rect(colour = "black", fill = NA, size = .5),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)) +
scale_color_discrete(name = " Age") -> p1
#annotate("text", x = -190, y = 140, label = "A", size = 12)
#dev.off()
p1
ggsave("fig/PCA_age_unique_probe.tiff", p1, device='tiff', dpi=300, width = 6, height = 6)
## plot PCA for treatment
#png(filename = "fig/ACP Treatment 2.png", width = 600, height = 600)
p2<-df %>%
ggplot(aes(x = Dim.1, y = Dim.2, color = treatment)) +
geom_point(size = 5) +
stat_ellipse(size = 1) +
#coord_cartesian(xlim = c(-100, 100), ylim = c(-100, 100)) +
coord_fixed(ratio = 1,xlim = c(-100, 100), ylim = c(-100, 100)) +
labs(x = paste0("Dim 1 (", round(work.pca$eig[1, 2], 1), "%)"),
y = paste0("Dim 2 (", round(work.pca$eig[2, 2], 1), "%)")) +
theme_classic() + #coord_fixed(ratio = 1) +
theme(#legend.position = c(.88, .9),
legend.background = element_rect(fill = "#FFFFFF",
size = 0.5, linetype="solid",
colour = "white"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title = element_text(size = 15),
panel.border = element_rect(colour = "black", fill = NA, size = .5),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)) +
scale_color_discrete(name = " Treatment")#+
#annotate("text", x = -190, y = 150, label = "B", size = 12) -> p2
p2
ggsave("fig/PCA_treatment_unique_probe.tiff", p2, device='tiff', dpi=300,width = 7, height = 7)
png(filename = "fig/ACP.png", width = 1400, height = 600)
gridExtra::grid.arrange(p1, p2, nrow = 1, newpage = TRUE)
dev.off()
png
2