J. Taroni 2018
E-MTAB-2452: Sorted peripheral blood cells (CD4+ T cells, CD14+ monocytes, CD16+ neutrophils) profiled on microarray from several autoimmune diseases. This dataset is a good control for further investigating recount2 PLIER model (cross-platform) transfer learning.
`%>%` <- dplyr::`%>%`
library(AnnotationDbi)
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport,
clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply,
parSapply, parSapplyLB
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, cbind, colMeans, colnames,
colSums, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match,
mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which, which.max, which.min
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with 'browseVignettes()'.
To cite Bioconductor, see 'citation("Biobase")', and for packages
'citation("pkgname")'.
Loading required package: IRanges
Loading required package: S4Vectors
Attaching package: ‘S4Vectors’
The following object is masked from ‘package:base’:
expand.grid
# custom functions
source(file.path("util", "plier_util.R"))
# plot and result directory setup for this notebook
plot.dir <- file.path("plots", "03")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "03")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)
recount2
PLIER modelplier.results <- readRDS(file.path("data", "recount2_PLIER_data",
"recount_PLIER_model.RDS"))
# read in E-MTAB-2452 expression data that has been processed with SCANfast
exprs.df <- readr::read_tsv(file.path("data", "expression_data",
"E-MTAB-2452_hugene11st_SCANfast.pcl"))
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
.default = col_double(),
X1 = col_character()
)
See spec(...) for full column specifications.
|=== | 4%
|==== | 6%
|==== | 7% 1 MB
|===== | 8% 1 MB
|====== | 9% 1 MB
|======= | 10% 1 MB
|======= | 11% 1 MB
|======== | 12% 1 MB
|========= | 13% 2 MB
|========= | 14% 2 MB
|========== | 15% 2 MB
|=========== | 16% 2 MB
|============ | 17% 2 MB
|============ | 18% 2 MB
|============= | 19% 2 MB
|============== | 20% 3 MB
|============== | 21% 3 MB
|=============== | 22% 3 MB
|================ | 23% 3 MB
|================ | 24% 3 MB
|================= | 25% 3 MB
|================== | 26% 4 MB
|=================== | 27% 4 MB
|=================== | 28% 4 MB
|==================== | 29% 4 MB
|===================== | 30% 4 MB
|===================== | 31% 4 MB
|====================== | 32% 5 MB
|======================= | 33% 5 MB
|======================== | 34% 5 MB
|======================== | 35% 5 MB
|========================= | 36% 5 MB
|========================== | 37% 5 MB
|========================== | 38% 5 MB
|=========================== | 39% 6 MB
|============================ | 40% 6 MB
|============================= | 41% 6 MB
|============================= | 42% 6 MB
|============================== | 43% 6 MB
|=============================== | 44% 6 MB
|=============================== | 45% 7 MB
|================================ | 46% 7 MB
|================================= | 47% 7 MB
|================================== | 48% 7 MB
|================================== | 49% 7 MB
|=================================== | 50% 7 MB
|==================================== | 51% 8 MB
|==================================== | 52% 8 MB
|===================================== | 53% 8 MB
|====================================== | 54% 8 MB
|======================================= | 55% 8 MB
|======================================= | 56% 8 MB
|======================================== | 57% 9 MB
|========================================= | 58% 9 MB
|========================================= | 59% 9 MB
|========================================== | 60% 9 MB
|=========================================== | 61% 9 MB
|=========================================== | 62% 9 MB
|============================================ | 63% 9 MB
|============================================= | 64% 10 MB
|============================================= | 65% 10 MB
|============================================== | 66% 10 MB
|=============================================== | 67% 10 MB
|================================================ | 68% 10 MB
|================================================ | 69% 10 MB
|================================================= | 70% 11 MB
|================================================== | 71% 11 MB
|================================================== | 72% 11 MB
|=================================================== | 73% 11 MB
|==================================================== | 74% 11 MB
|===================================================== | 75% 11 MB
|===================================================== | 76% 11 MB
|====================================================== | 77% 12 MB
|======================================================= | 78% 12 MB
|======================================================= | 79% 12 MB
|======================================================== | 80% 12 MB
|========================================================= | 81% 12 MB
|========================================================= | 82% 12 MB
|========================================================== | 83% 13 MB
|=========================================================== | 84% 13 MB
|============================================================ | 85% 13 MB
|============================================================ | 86% 13 MB
|============================================================= | 87% 13 MB
|============================================================== | 88% 13 MB
|============================================================== | 89% 14 MB
|=============================================================== | 90% 14 MB
|================================================================ | 91% 14 MB
|================================================================ | 92% 14 MB
|================================================================= | 93% 14 MB
|================================================================== | 94% 14 MB
|=================================================================== | 95% 14 MB
|=================================================================== | 96% 15 MB
|==================================================================== | 97% 15 MB
|=====================================================================| 98% 15 MB
|=====================================================================| 99% 15 MB
|======================================================================| 100% 15 MB
colnames(exprs.df)[1] <- "EntrezID"
# remove trailing "_at" (result of using BrainArray) to get EntrezIDs
exprs.df$EntrezID <- gsub("_at", "", exprs.df$EntrezID)
# conversion to gene symbol
symbol.obj <- org.Hs.eg.db::org.Hs.egSYMBOL
mapped.genes <- AnnotationDbi::mappedkeys(symbol.obj)
symbol.list <- as.list(symbol.obj[mapped.genes])
symbol.df <- as.data.frame(cbind(names(symbol.list), unlist(symbol.list)))
colnames(symbol.df) <- c("EntrezID", "GeneSymbol")
# inner join
annot.data <- dplyr::inner_join(symbol.df, exprs.df, by = "EntrezID")
Column `EntrezID` joining factor and character vector, coercing into character vector
# remove objects only necessary for annotation
rm(mapped.genes, symbol.list, symbol.obj, symbol.df)
# write to file
gs.file <- file.path("data", "expression_data",
"E-MTAB-2452_hugene11st_SCANfast_with_GeneSymbol.pcl")
readr::write_tsv(annot.data, path = gs.file)
# get as a matrix
exprs.mat <- as.matrix(annot.data[, 3:ncol(annot.data)])
rownames(exprs.mat) <- annot.data$GeneSymbol
iso.b.matrix <- GetNewDataB(exprs.mat = exprs.mat,
plier.model = plier.results)
Loading required package: PLIER
Loading required package: RColorBrewer
Loading required package: gplots
Attaching package: ‘gplots’
The following object is masked from ‘package:IRanges’:
space
The following object is masked from ‘package:S4Vectors’:
space
The following object is masked from ‘package:stats’:
lowess
Loading required package: pheatmap
Loading required package: glmnet
Loading required package: Matrix
Attaching package: ‘Matrix’
The following object is masked from ‘package:S4Vectors’:
expand
Loading required package: foreach
Loaded glmnet 2.0-13
Loading required package: knitr
Loading required package: rsvd
Loading required package: qvalue
head(iso.b.matrix[, 1:10])
CD14_triad0058_1.CEL
1,REACTOME_MRNA_SPLICING 0.067106412
2,SVM Monocytes 0.232957061
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.011655091
4,REACTOME_NEURONAL_SYSTEM -0.067716284
LV 5 0.010245267
LV 6 0.001491663
CD14_triad0058_2.CEL
1,REACTOME_MRNA_SPLICING 0.04966645
2,SVM Monocytes 0.25400019
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.01469445
4,REACTOME_NEURONAL_SYSTEM -0.09668289
LV 5 0.01236375
LV 6 -0.02168559
CD14_triad0058_3.CEL
1,REACTOME_MRNA_SPLICING 0.082299659
2,SVM Monocytes 0.212679441
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.009044731
4,REACTOME_NEURONAL_SYSTEM -0.063541877
LV 5 0.003987257
LV 6 0.017852705
CD14_triad0058_4.CEL
1,REACTOME_MRNA_SPLICING 0.078970621
2,SVM Monocytes 0.257875145
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.043604428
4,REACTOME_NEURONAL_SYSTEM -0.076126418
LV 5 -0.007174178
LV 6 -0.019207945
CD14_triad0058_5.CEL
1,REACTOME_MRNA_SPLICING 0.066609286
2,SVM Monocytes 0.222294145
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.008187733
4,REACTOME_NEURONAL_SYSTEM -0.032182088
LV 5 -0.005368010
LV 6 -0.002825871
CD14_triad0058_6.CEL
1,REACTOME_MRNA_SPLICING 0.113726151
2,SVM Monocytes 0.264977049
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.035042814
4,REACTOME_NEURONAL_SYSTEM -0.072400980
LV 5 -0.029497357
LV 6 -0.001938951
CD14_triad0058_7.CEL
1,REACTOME_MRNA_SPLICING 0.06027642
2,SVM Monocytes 0.28752454
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.01073898
4,REACTOME_NEURONAL_SYSTEM -0.04019075
LV 5 0.01692032
LV 6 -0.03281626
CD14_triad0058_8.CEL
1,REACTOME_MRNA_SPLICING 0.074128734
2,SVM Monocytes 0.220520916
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.006925421
4,REACTOME_NEURONAL_SYSTEM -0.068050390
LV 5 -0.011935084
LV 6 0.006903267
CD14_triad0062_10.CEL
1,REACTOME_MRNA_SPLICING -0.007857945
2,SVM Monocytes 0.314180137
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.047472878
4,REACTOME_NEURONAL_SYSTEM 0.013726798
LV 5 -0.006147173
LV 6 -0.058201924
CD14_triad0062_11.CEL
1,REACTOME_MRNA_SPLICING 0.06326480
2,SVM Monocytes 0.25731682
3,REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES -0.01583182
4,REACTOME_NEURONAL_SYSTEM -0.04384992
LV 5 -0.00290343
LV 6 -0.02864669
# write to file
iso.b.file <- file.path(results.dir, "E-MTAB-2452_B_matrix_recount2_model.txt")
write.table(iso.b.matrix, file = iso.b.file, sep = "\t", quote = FALSE)
# differences in cell type LVs?
indx.relevant.lv <- c(grep("CD4", rownames(iso.b.matrix)),
grep("Monocyte", rownames(iso.b.matrix)),
grep("Neutrophil", rownames(iso.b.matrix)))
# get row side color vector matching relevant cell type LVs
row.colors.vec <- c(rep("#1E90EF", sum(grepl("CD4", rownames(iso.b.matrix)))),
rep("#000000",
sum(grepl("Monocyte", rownames(iso.b.matrix)))),
rep("#32CD32",
sum(grepl("Neutrophil", rownames(iso.b.matrix)))))
# get col side color vector based on what cell type each sample is
col.colors.vec <- rep("white", ncol(iso.b.matrix))
col.colors.vec[grep("CD4", colnames(iso.b.matrix))] <- "#1E90EF"
col.colors.vec[grep("CD14", colnames(iso.b.matrix))] <- "#000000"
col.colors.vec[grep("CD16", colnames(iso.b.matrix))] <- "#32CD32"
# heatmap
gplots::heatmap.2(iso.b.matrix[indx.relevant.lv, ],
trace = "none",
col = colorRampPalette(c("blue", "white", "red")),
labCol = FALSE, cexRow = 0.75,
RowSideColors = row.colors.vec,
ColSideColors = col.colors.vec,
density.info = "none",
margins = c(2.5, 14),
main = "Isolated cell populations\nmicroarray",
ylab = "recount2 LVs",
xlab = "Samples",
colsep = 0:ncol(iso.b.matrix),
rowsep = 0:length(indx.relevant.lv),
sepcolor = "#666666", sepwidth = c(0.001, 0.001))
legend("topright",
legend = c("CD4 T cell", "CD14, monocyte", "CD16, neutrophil"),
col = c("#1E90EF", "#000000", "#32CD32"), cex = 0.5,
lty = 1, lwd = 6)
# save heatmap as pdf -- can not figure out another way around this, hm just
# returns a list
pdf(file.path(plot.dir, "E-MTAB-2452_recount_PLIER_cell_type_LVs_B.pdf"),
width = 7, height = 5)
gplots::heatmap.2(iso.b.matrix[indx.relevant.lv, ],
trace = "none",
col = colorRampPalette(c("blue", "white", "red")),
labCol = FALSE, cexRow = 0.75,
RowSideColors = row.colors.vec,
ColSideColors = col.colors.vec,
density.info = "none",
margins = c(2.5, 14),
main = "Isolated cell populations\nmicroarray",
ylab = "recount2 LVs",
xlab = "Samples",
colsep = 0:ncol(iso.b.matrix),
rowsep = 0:length(indx.relevant.lv),
sepcolor = "#666666", sepwidth = c(0.001, 0.001))
legend("topright",
legend = c("CD4 T cell", "CD14, monocyte", "CD16, neutrophil"),
col = c("#1E90EF", "#000000", "#32CD32"), cex = 0.5,
lty = 1, lwd = 6)
dev.off()
null device
1