Data

Import and format data

pacman::p_load(dplyr,
               xlsx,
               tibble,
               ggplot2,
               dendextend,
               settings,
               ggpubr,
               flextable)

Microarray

Import array

Import raw data of the microarray. The microarray data have been submitted to NCBI GEO (Gene Expression Omnibus; accession number: GSE104838 https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE104838 ).

working_set_raw <- read.csv("data/GPS_NED_MA0199_All_WorkingSet.csv", 
                            sep = ";", 
                            dec = ",", 
                            row.names = 1) %>% 
  as.matrix
dim(working_set_raw)
[1] 33284    72

Format working_set_raw

## add oligo_ to spot number rownames
rownames(working_set_raw) <- paste0("oligo_", rownames(working_set_raw))
## transpose
working_set_raw <- t(working_set_raw)
## sort on rows by sample name
working_set_raw <- working_set_raw[order(rownames(working_set_raw)),]

Glance at working_set_raw after formatting

working_set_raw[1:10,1:5] %>% 
  data.frame %>% 
  rownames_to_column(var = "Oligo") %>% 
  flextable %>% 
  autofit

Oligo

oligo_4

oligo_5

oligo_7

oligo_8

oligo_9

GPS24871

4.768867

10.56514

9.966930

7.712398

10.65519

GPS24872

5.123744

11.05010

10.175272

7.772364

10.28265

GPS24873

4.869991

10.68362

9.930170

7.962007

10.31245

GPS24874

5.424320

11.08182

9.761626

7.852183

10.55690

GPS24875

5.385345

10.97871

10.455120

8.092517

10.50853

GPS24876

4.721992

10.80676

9.905375

8.607926

10.23137

GPS24877

5.096335

10.77506

9.727906

8.346339

10.55745

GPS24878

5.021907

10.93837

9.878364

7.966467

10.56664

GPS24879

4.976638

10.72094

9.567397

8.185599

10.48794

GPS24880

6.157873

11.01759

9.741641

8.191910

10.16702

Metadata

Import metadata

Import table with environment variables

xp_design <- read.xlsx(file = "data/Envoi_St_Martin_SC1001_29102014.xlsx", 
                       sheetIndex = 1, 
                       colIndex = c(1:4), 
                       as.data.frame = TRUE, 
                       rowIndex = c(1:73), 
                       header = TRUE)

Format metadata

## remove fist column and use it as rownames
xp_design <- data.frame(xp_design[,-1], row.names = xp_design[,1])
## Rename colnames
colnames(xp_design) <- c("id", "treatment", "age")
## sort by id
xp_design <- xp_design[order(xp_design$id),]
## Convert id in character
xp_design$id <- as.character(xp_design$id)
## Rename treatment variable
xp_design$treatment[xp_design$treatment == "CAB"] <- "FFab"
xp_design$treatment[xp_design$treatment == "CC"] <- "FF"
xp_design$treatment[xp_design$treatment == "PP"] <- "NF"
levels(xp_design$treatment) <- list(FFab="FFab", FF="FF", NF="NF", TT = "TT")
## add "J" to each level of variable day and convert to factor
xp_design$age <- factor(paste0("d", xp_design$age))
## Create variable `age_treatment`, convert to factor
xp_design$contrast <- factor(paste0(xp_design$age, "_", xp_design$treatment))

Glance at metadata after formatting

xp_design[1:10,] %>% 
  data.frame %>% 
  rownames_to_column(var = "Sample") %>% 
  flextable %>% 
  autofit

Sample

id

treatment

age

contrast

GPS24894

77

NF

d35

d35_NF

GPS24914

78

NF

d35

d35_NF

GPS24932

79

NF

d35

d35_NF

GPS24913

80

NF

d35

d35_NF

GPS24911

81

FF

d35

d35_FF

GPS24930

82

FF

d35

d35_FF

GPS24877

83

NF

d35

d35_NF

GPS24895

84

NF

d35

d35_NF

GPS24879

85

TT

d35

d35_TT

GPS24897

86

TT

d35

d35_TT

Parameters

Import qualitative measures

param <- read.xlsx(file = "data/Identifiants_SC1001_35j-49j.xlsx", 
                   sheetIndex = 2, 
                   colIndex = c(1,4,8,10,13), 
                   as.data.frame = TRUE, 
                   rowIndex = c(1:73))

Format param

## Rename first column with "id"
colnames(param)[1] <- "id"
## Convert `id` to character
param[,1] <- as.character(param[,1])
## Convert cage number to factor
param[,2] <- as.factor(param[,2])

Create metadata

Create table with all data describing samples

## Merge xp_design and param without first column of param
meta_data_raw <- cbind(xp_design, param[,-1])
## sort by sample name (rows)
meta_data_raw <- meta_data_raw[order(rownames(meta_data_raw)),]

Rename colnames

meta_data_raw %>% 
  rename(day_2_w = pds_j2,
         day_35_w = pds_j35,
         slaughter_w = pds_abat) -> meta_data_raw

Select variables

meta_data_raw %>% 
  select(treatment,
         age,
         contrast,
         day_2_w,
         day_35_w,
         slaughter_w) -> meta_data_raw

Cleaning data

Remove TT treatment

Remove samples from TT treatment in working_set and metadata objects

## Remove samples from TT in working_set
working_set_TT <- working_set_raw[meta_data_raw$treatment != "TT", ]
meta_data_TT <- meta_data_raw[meta_data_raw$treatment != "TT", ]
meta_data_TT$treatment <- factor(meta_data_TT$treatment)
meta_data_TT$contrast <- factor(meta_data_TT$contrast)

Glance at meta_data_raw

meta_data_TT %>% 
  head(20) %>% 
  flextable %>% 
  autofit

treatment

age

contrast

day_2_w

day_35_w

slaughter_w

FFab

d35

d35_FFab

73

721

721

FFab

d35

d35_FFab

88

766

766

FF

d35

d35_FF

86

934

934

FF

d35

d35_FF

99

1,073

1,073

FF

d35

d35_FF

99

830

830

NF

d35

d35_NF

75

919

919

NF

d35

d35_NF

88

721

721

FFab

d49

d49_FFab

77

883

1,800

FFab

d49

d49_FFab

92

996

1,836

FFab

d49

d49_FFab

117

1,164

2,184

FF

d49

d49_FF

77

753

1,350

FF

d49

d49_FF

82

796

1,776

NF

d49

d49_NF

60

868

1,775

NF

d49

d49_NF

91

886

1,722

FFab

d35

d35_FFab

74

827

827

FFab

d35

d35_FFab

84

913

913

FF

d35

d35_FF

78

906

906

FF

d35

d35_FF

80

863

863

NF

d35

d35_NF

66

813

813

NF

d35

d35_NF

64

888

888

Outliers detection in microarray

Hierarchical classification with 1-corr distance matrix

L <- "GPS24914"
hc <- hclust(as.dist(1 - cor(t(working_set_TT))), method = "ward.D2")
clust_corr <- as.dendrogram(hc)
par(mar = c(10,4,3,1))
clust_corr %>% 
  set("by_lists_branches_col", L, TF_value = "red") %>% 
  set("leaves_cex", 2) %>%
  color_labels(col = c("red"), labels = labels(.)[labels(.)=="GPS24914"]) %>% 
  ladderize(TRUE) %>% 
  plot(axes = F,
       cex = 2,
       main = "Hierarchical clustering with (1-corr) distance and Ward algorithm")
## reset par options
reset_par()
## nMDS with "1-cor" distance
set.seed(123)
tmp <- vegan::metaMDS(as.dist(1-cor(t(working_set_TT))), k = 2, maxit = 100)
Run 0 stress 0.1030165 
Run 1 stress 0.106524 
Run 2 stress 0.1074274 
Run 3 stress 0.1074881 
Run 4 stress 0.1089342 
Run 5 stress 0.1106128 
Run 6 stress 0.104248 
Run 7 stress 0.103058 
... Procrustes: rmse 0.006226324  max resid 0.02790997 
Run 8 stress 0.1085403 
Run 9 stress 0.1038872 
Run 10 stress 0.1047714 
Run 11 stress 0.1029835 
... New best solution
... Procrustes: rmse 0.006748053  max resid 0.02406572 
Run 12 stress 0.1036047 
Run 13 stress 0.1033931 
... Procrustes: rmse 0.02963116  max resid 0.08950939 
Run 14 stress 0.1032798 
... Procrustes: rmse 0.02395133  max resid 0.07011736 
Run 15 stress 0.103278 
... Procrustes: rmse 0.02433447  max resid 0.07724788 
Run 16 stress 0.1074629 
Run 17 stress 0.1085151 
Run 18 stress 0.1030281 
... Procrustes: rmse 0.0132038  max resid 0.0468257 
Run 19 stress 0.1029785 
... New best solution
... Procrustes: rmse 0.002907277  max resid 0.00843367 
... Similar to previous best
Run 20 stress 0.1033395 
... Procrustes: rmse 0.03002822  max resid 0.08943733 
*** Best solution repeated 1 times
tmp2 <- rownames_to_column(data.frame(tmp$points[,1:2]))
ggplot(data.frame(tmp2), aes(x = MDS1, y = MDS2)) +
  geom_point(aes(col = meta_data_TT$age), size = 4.5) + 
  geom_text(aes(label = ifelse(rownames(meta_data_TT) == "GPS24914", "GPS24914", '')),
            hjust = -.3,
            vjust = .6,
            colour = "black",
            cex = 3) +
  labs(title = "nMDS with (1-corr) distance", subtitle = paste0("Stress = ", round(tmp$stress, 3))) +
  labs(colour = "Age") +
  theme_bw() + 
  theme(axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_text(size = 12, color = "black"),
        axis.title.x = element_blank(),  
        axis.title.y = element_blank()) +
 # scale_color_grey() +
  theme(aspect.ratio = 1) +   ## square shape
  ggpubr::rremove("grid") -> p_nMDS_corr
p_nMDS_corr
## ggsave(filename = fig/nMDS_corr.png", plot = p_nMDS_corr, dpi=600, width = 6, height = 5)

Remove sample GPS24914 in working_set and metadata

working_set <- working_set_TT[rownames(working_set_TT) != "GPS24914", ]
meta_data <- meta_data_TT[rownames(meta_data_TT) != "GPS24914", ]

Summary of qualitatives variables

meta_data %>% 
  select(where(is.factor)) %>% 
  summary
 treatment  age         contrast 
 FF  :19   d35:29   d35_FF  :10  
 FFab:19   d49:27   d35_FFab:10  
 NF  :18            d35_NF  : 9  
                    d49_FF  : 9  
                    d49_FFab: 9  
                    d49_NF  : 9  
save(working_set_raw, meta_data_raw, file = "RData/data_raw.RData")
save(working_set_TT, meta_data_TT, file = "RData/data_TT.RData")
save(working_set, meta_data, file = "RData/data.RData")