Enrichment

Enrichment analysis in 35d-49d FF contrast group


pacman::p_load(dplyr,
               gprofiler2,
               ggplot2,
               downloadthis)
load(file = "RData/data_DEG.RData")

Up- and down-regulated genes (absolute log2-fold change greater than 0.5) were analyzed separately.

Take Human base with parameters:

Up-regulated genes in FF group between 35d and 49d

Use Human.Ensembl.Gene.ID when Associated.Transcript.Name == 0

list_query <- ifelse(list_oligo$d35vs49_FF_UP$Associated.Transcript.Name != 0,
             list_oligo$d35vs49_FF_UP$Associated.Transcript.Name,
             list_oligo$d35vs49_FF_UP$Human.Ensembl.Gene.ID)

gprofiler analysis
We set the version of database to avoid recent updates

set_base_url("http://biit.cs.ut.ee/gprofiler_archive3/e102_eg49_p15")
## Ensembl release 102, Ensembl Genomes release 49 and WormBase ParaSite release 15
result_35d49d_FF_up_iea <- gost(query = list_query,
                         organism = "hsapiens",
                         ordered_query = TRUE,
                         significant = TRUE,
                         exclude_iea = TRUE,
                         correction_method = "g_SCS",
                         sources = c("GO"),
                         evcodes = T)
result_35d49d_FF_up_iea$meta$version
[1] "e102_eg49_p15_e7ff1c9"

Skipped genes: genes without annotation

result_35d49d_FF_up_iea[["meta"]][["genes_metadata"]][["failed"]]
[1] "TNLG6A"      "0"           "RLA-A2"      "C17H15orf65"
[5] "IGHVDJ"      "KIAA1211"    "GRAMD3"      "FAM46C"     

Use GeneCards site to update gene name not found in Ensembl database

list_query[list_query == 'TNLG6A'] <- 'TNFSF10'

No curation in GeneCards for “RLA-A2”

Gprofiler analysis with annotation corrections

result_35d49d_FF_up_iea <- gost(query = list_query,
                         organism = "hsapiens",
                         ordered_query = TRUE,
                         significant = TRUE,
                         exclude_iea = TRUE,
                         correction_method = "g_SCS",
                         sources = c("GO"),
                         evcodes = T)

Summary result

## -log10 of p-values
neg_log10_adj_p_value <- -log10(result_35d49d_FF_up_iea$result$p_value)
fract_gene_func <- result_35d49d_FF_up_iea$result$intersection_size / result_35d49d_FF_up_iea$result$term_size

table_result_35d49d_FF_up <- cbind(result_35d49d_FF_up_iea$result[, c("source", "term_name", "term_id", "p_value")],
                            neg_log10_adj_p_value,
                            result_35d49d_FF_up_iea$result[, c("term_size", "query_size", "intersection_size")],
                            fract_gene_func,
                            "intersection" = result_35d49d_FF_up_iea$result[, c("intersection")])
## p-values are already adjusted
#colnames(table_result_35d49d_FF_up)[colnames(table_result_35d49d_FF_up) == "p_value"] <- "adjusted_p_value"

Download results

Sort by p-value

publish_gosttable(result_35d49d_FF_up_iea, 
                  highlight_terms = result_35d49d_FF_up_iea$result[1:20,],
                  use_colors = TRUE, 
                  show_columns = c("source", "term_name", "term_size", "intersection_size"),
                  filename = NULL)

Manhattan plot of functional enrichment results: gostplot

## plot
gostplot(result_35d49d_FF_up_iea,
                        capped = FALSE,
                        interactive = FALSE)
ggsave(filename = "p_FF_up_iea.png", path = "fig/") 
## plot
p<-gostplot(result_35d49d_FF_up_iea,
                        capped = FALSE,
                        interactive = FALSE)
p1<-publish_gostplot(p)


ggsave(filename = "fig/p_FF_up_iea.tiff",
        plot = p1,
       device = "tiff",
      # height = 10, width = 16,
       dpi = 300)

Sort GO:BP term by p-value and intersection_size

table_result_35d49d_FF_up %>% 
  dplyr::filter(source =='GO:BP')%>%
  arrange((x = p_value)) %>% 
  head(30) -> table_result_35d49d_FF_up_10 

table_result_35d49d_FF_up_10 %>% 
  ggplot(aes(x = reorder(term_name, fract_gene_func), y = round(fract_gene_func*100, 2))) +
  geom_col(fill = "#FF9500", 
           width = 0.6) +
  coord_flip() +
  theme_classic() +
  theme(panel.grid.major.x = element_line(color = "gray"),
        panel.border = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size = rel(1.2)),
        #axis.ticks.y = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank()) +#aspect.ratio = 4/6) +
  labs(x = "",
       y = "Percent of genes") +
  theme(plot.margin=grid::unit(c(0,0,0,0), "mm")) -> barplot_GO_35d49d_FF_up

barplot_GO_35d49d_FF_up
ggsave(filename = "fig/barplot_GO_35d49d_FF_up.tiff",
       plot = barplot_GO_35d49d_FF_up,
       device = "tiff",
       height = 7.5, width = 11,
       dpi = 300)

ggsave(barplot_GO_35d49d_FF_up,filename = "barplot_GO_35d49d_FF_up.png", path = "fig/") 
knitr::include_graphics('fig/barplot_GO_35d49d_FF_up.tiff')

Size of shapes adapted

onto_plot(go, 
          terms = get_ancestors(go, c("GO:0071357", "GO:0060337", "GO:0034340", "GO:0032479", "GO:0032606", "GO:0032480", "GO:0032481")), #GO:0034669"
          fixedsize = FALSE)

Down-regulated genes in FF group between 35d and 49d

Use Human.Ensembl.Gene.ID when Associated.Transcript.Name == 0

list_query <- ifelse(list_oligo$d35vs49_FF_DOWN$Associated.Transcript.Name != 0,
             list_oligo$d35vs49_FF_DOWN$Associated.Transcript.Name,
             list_oligo$d35vs49_FF_DOWN$Human.Ensembl.Gene.ID)

gprofiler analysis We set the version of database to avoid recent updates

set_base_url("http://biit.cs.ut.ee/gprofiler_archive3/e102_eg49_p15")
## Ensembl release 102, Ensembl Genomes release 49 and WormBase ParaSite release 15
result_35d49d_FF_down_iea <- gost(query = list_query,
                         organism = "hsapiens",
                         ordered_query = TRUE,
                         significant = TRUE,
                         exclude_iea = TRUE,
                         correction_method = "g_SCS",
                         sources = c("GO"),
                         evcodes = T)
result_35d49d_FF_down_iea$meta$version
[1] "e102_eg49_p15_e7ff1c9"

Skipped genes: genes without annotation

result_35d49d_FF_down_iea[["meta"]][["genes_metadata"]][["failed"]]
[1] "CYP2A10" "H1F0"    "HO1"    

Use GeneCards site to update gene name not found in Ensembl database

list_query[list_query == 'H1F0'] <- 'H1-0' #H1.0 Linker Histone
list_query[list_query == 'HO1'] <- 'HMOX1' #heme oxygenase-1

Gprofiler analysis with annotation corrections

result_35d49d_FF_down_iea <- gost(query = list_query,
                         organism = "hsapiens",
                         ordered_query = TRUE,
                         significant = TRUE,
                         exclude_iea = TRUE,
                         correction_method = "g_SCS",
                         sources = c("GO"),
                         evcodes = T)

Summary result

## -log10 of p-values
neg_log10_adj_p_value <- -log10(result_35d49d_FF_down_iea$result$p_value)
fract_gene_func <- result_35d49d_FF_down_iea$result$intersection_size / result_35d49d_FF_down_iea$result$term_size

table_result_35d49d_FF_down <- cbind(result_35d49d_FF_down_iea$result[, c("source", "term_name", "term_id", "p_value")],
                            neg_log10_adj_p_value,
                            result_35d49d_FF_down_iea$result[, c("term_size", "query_size", "intersection_size")],
                            fract_gene_func,
                            "intersection" = result_35d49d_FF_down_iea$result[, c("intersection")])
## p-values are already adjusted
#colnames(table_result_35d49d_FF_up)[colnames(table_result_35d49d_FF_up) == "p_value"] <- "adjusted_p_value"

Download results

publish_gosttable(result_35d49d_FF_down_iea, 
                  highlight_terms = result_35d49d_FF_down_iea$result,
                  use_colors = TRUE, 
                  show_columns = c("source", "term_name", "term_size", "intersection_size"),
                  filename = NULL)