Enrichment analysis in 35d-49d FF contrast group
pacman::p_load(dplyr,
gprofiler2,
ggplot2,
downloadthis)
load(file = "RData/data_DEG.RData")
Up- and down-regulated genes (absolute log2-fold change greater than 0.5) were analyzed separately.
Take Human base with parameters:
Use Human.Ensembl.Gene.ID when Associated.Transcript.Name == 0
list_query <- ifelse(list_oligo$d35vs49_FF_UP$Associated.Transcript.Name != 0,
list_oligo$d35vs49_FF_UP$Associated.Transcript.Name,
list_oligo$d35vs49_FF_UP$Human.Ensembl.Gene.ID)
gprofiler analysis
We set the version of database to avoid recent updates
set_base_url("http://biit.cs.ut.ee/gprofiler_archive3/e102_eg49_p15")
## Ensembl release 102, Ensembl Genomes release 49 and WormBase ParaSite release 15
result_35d49d_FF_up_iea <- gost(query = list_query,
organism = "hsapiens",
ordered_query = TRUE,
significant = TRUE,
exclude_iea = TRUE,
correction_method = "g_SCS",
sources = c("GO"),
evcodes = T)
result_35d49d_FF_up_iea$meta$version
[1] "e102_eg49_p15_e7ff1c9"
Skipped genes: genes without annotation
result_35d49d_FF_up_iea[["meta"]][["genes_metadata"]][["failed"]]
[1] "TNLG6A" "0" "RLA-A2" "C17H15orf65"
[5] "IGHVDJ" "KIAA1211" "GRAMD3" "FAM46C"
Use GeneCards site to update gene name not found in Ensembl database
list_query[list_query == 'TNLG6A'] <- 'TNFSF10'
No curation in GeneCards for “RLA-A2”
Gprofiler analysis with annotation corrections
result_35d49d_FF_up_iea <- gost(query = list_query,
organism = "hsapiens",
ordered_query = TRUE,
significant = TRUE,
exclude_iea = TRUE,
correction_method = "g_SCS",
sources = c("GO"),
evcodes = T)
Summary result
## -log10 of p-values
neg_log10_adj_p_value <- -log10(result_35d49d_FF_up_iea$result$p_value)
fract_gene_func <- result_35d49d_FF_up_iea$result$intersection_size / result_35d49d_FF_up_iea$result$term_size
table_result_35d49d_FF_up <- cbind(result_35d49d_FF_up_iea$result[, c("source", "term_name", "term_id", "p_value")],
neg_log10_adj_p_value,
result_35d49d_FF_up_iea$result[, c("term_size", "query_size", "intersection_size")],
fract_gene_func,
"intersection" = result_35d49d_FF_up_iea$result[, c("intersection")])
## p-values are already adjusted
#colnames(table_result_35d49d_FF_up)[colnames(table_result_35d49d_FF_up) == "p_value"] <- "adjusted_p_value"
Download results
publish_gosttable(result_35d49d_FF_up_iea,
highlight_terms = result_35d49d_FF_up_iea$result[1:20,],
use_colors = TRUE,
show_columns = c("source", "term_name", "term_size", "intersection_size"),
filename = NULL)
## plot
gostplot(result_35d49d_FF_up_iea,
capped = FALSE,
interactive = FALSE)
ggsave(filename = "p_FF_up_iea.png", path = "fig/")
## plot
p<-gostplot(result_35d49d_FF_up_iea,
capped = FALSE,
interactive = FALSE)
p1<-publish_gostplot(p)
ggsave(filename = "fig/p_FF_up_iea.tiff",
plot = p1,
device = "tiff",
# height = 10, width = 16,
dpi = 300)
table_result_35d49d_FF_up %>%
dplyr::filter(source =='GO:BP')%>%
arrange((x = p_value)) %>%
head(30) -> table_result_35d49d_FF_up_10
table_result_35d49d_FF_up_10 %>%
ggplot(aes(x = reorder(term_name, fract_gene_func), y = round(fract_gene_func*100, 2))) +
geom_col(fill = "#FF9500",
width = 0.6) +
coord_flip() +
theme_classic() +
theme(panel.grid.major.x = element_line(color = "gray"),
panel.border = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title.x = element_text(size = rel(1.2)),
#axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.line.y = element_blank()) +#aspect.ratio = 4/6) +
labs(x = "",
y = "Percent of genes") +
theme(plot.margin=grid::unit(c(0,0,0,0), "mm")) -> barplot_GO_35d49d_FF_up
barplot_GO_35d49d_FF_up
ggsave(filename = "fig/barplot_GO_35d49d_FF_up.tiff",
plot = barplot_GO_35d49d_FF_up,
device = "tiff",
height = 7.5, width = 11,
dpi = 300)
ggsave(barplot_GO_35d49d_FF_up,filename = "barplot_GO_35d49d_FF_up.png", path = "fig/")
knitr::include_graphics('fig/barplot_GO_35d49d_FF_up.tiff')
Size of shapes adapted
onto_plot(go,
terms = get_ancestors(go, c("GO:0071357", "GO:0060337", "GO:0034340", "GO:0032479", "GO:0032606", "GO:0032480", "GO:0032481")), #GO:0034669"
fixedsize = FALSE)
Use Human.Ensembl.Gene.ID when Associated.Transcript.Name == 0
list_query <- ifelse(list_oligo$d35vs49_FF_DOWN$Associated.Transcript.Name != 0,
list_oligo$d35vs49_FF_DOWN$Associated.Transcript.Name,
list_oligo$d35vs49_FF_DOWN$Human.Ensembl.Gene.ID)
gprofiler analysis We set the version of database to avoid recent updates
set_base_url("http://biit.cs.ut.ee/gprofiler_archive3/e102_eg49_p15")
## Ensembl release 102, Ensembl Genomes release 49 and WormBase ParaSite release 15
result_35d49d_FF_down_iea <- gost(query = list_query,
organism = "hsapiens",
ordered_query = TRUE,
significant = TRUE,
exclude_iea = TRUE,
correction_method = "g_SCS",
sources = c("GO"),
evcodes = T)
result_35d49d_FF_down_iea$meta$version
[1] "e102_eg49_p15_e7ff1c9"
Skipped genes: genes without annotation
result_35d49d_FF_down_iea[["meta"]][["genes_metadata"]][["failed"]]
[1] "CYP2A10" "H1F0" "HO1"
Use GeneCards site to update gene name not found in Ensembl database
list_query[list_query == 'H1F0'] <- 'H1-0' #H1.0 Linker Histone
list_query[list_query == 'HO1'] <- 'HMOX1' #heme oxygenase-1
Gprofiler analysis with annotation corrections
result_35d49d_FF_down_iea <- gost(query = list_query,
organism = "hsapiens",
ordered_query = TRUE,
significant = TRUE,
exclude_iea = TRUE,
correction_method = "g_SCS",
sources = c("GO"),
evcodes = T)
Summary result
## -log10 of p-values
neg_log10_adj_p_value <- -log10(result_35d49d_FF_down_iea$result$p_value)
fract_gene_func <- result_35d49d_FF_down_iea$result$intersection_size / result_35d49d_FF_down_iea$result$term_size
table_result_35d49d_FF_down <- cbind(result_35d49d_FF_down_iea$result[, c("source", "term_name", "term_id", "p_value")],
neg_log10_adj_p_value,
result_35d49d_FF_down_iea$result[, c("term_size", "query_size", "intersection_size")],
fract_gene_func,
"intersection" = result_35d49d_FF_down_iea$result[, c("intersection")])
## p-values are already adjusted
#colnames(table_result_35d49d_FF_up)[colnames(table_result_35d49d_FF_up) == "p_value"] <- "adjusted_p_value"
Download results
publish_gosttable(result_35d49d_FF_down_iea,
highlight_terms = result_35d49d_FF_down_iea$result,
use_colors = TRUE,
show_columns = c("source", "term_name", "term_size", "intersection_size"),
filename = NULL)