J. Taroni 2018
Explore the top differentially expressed LVs in the GPA blood dataset (PBMCs). Are they pathways we could have captured with a PLIER model fit to this dataset?
`%>%` <- dplyr::`%>%`
source(file.path("util", "plier_util.R"))
# plot and result directory setup for this notebook
plot.dir <- file.path("plots", "22")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "22")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)
limma.file <- file.path("results", "19",
"GPA_blood_recount2_model_LV_limma_results.tsv")
limma.df <- readr::read_tsv(limma.file)
Parsed with column specification:
cols(
LV = col_character(),
Control...GPAneg = col_double(),
Control...GPApos = col_double(),
GPAneg...GPApos = col_double(),
AveExpr = col_double(),
F = col_double(),
P.Value = col_double(),
adj.P.Val = col_double()
)
Let’s take a look at the top differentially expressed latent variables. We’ll sort by FDR.
limma.df %>%
dplyr::filter(adj.P.Val < 0.05) %>%
dplyr::arrange(adj.P.Val)
Let’s explore LV 599
a little further.
recount.b.file <- file.path("results", "19",
"GPA_blood_recount2_model_B_long_sample_info.tsv")
recount.b.df <- readr::read_tsv(recount.b.file) %>%
dplyr::mutate(GPA_signature =
dplyr::case_when(
GPA_signature == "GPApos" ~ "GPA-positive",
GPA_signature == "GPAneg" ~ "GPA-negative",
TRUE ~ "Control"
))
Parsed with column specification:
cols(
LV = col_character(),
Sample = col_character(),
Value = col_double(),
Name = col_character(),
GPA_signature = col_character(),
Cell_type = col_character()
)
recount.b.df %>%
dplyr::filter(LV == "599,DMAP_ERY2") %>%
ggplot2::ggplot(ggplot2::aes(x = GPA_signature, y = Value)) +
ggplot2::geom_boxplot() +
ggplot2::geom_jitter(position = ggplot2::position_jitter(0.2)) +
ggplot2::theme_bw() +
ggplot2::labs(x = "GPA signature", y = "LV 599",
title = "Cheadle, et al. top LV") +
ggplot2::theme(legend.position = "none",
text = ggplot2::element_text(size = 15),
plot.title = ggplot2::element_text(hjust = 0.5, face = "bold"))
plot.file <- file.path(plot.dir, "recount2_LV599_boxplot.pdf")
ggplot2::ggsave(plot.file, plot = ggplot2::last_plot())
Saving 7 x 7 in image
recount.plier <- readRDS(file.path("data", "recount2_PLIER_data",
"recount_PLIER_model.RDS"))
LV 599
?recount.plier$summary %>%
dplyr::filter(`LV index` == 599)
Let’s look at the top 50.
head(sort(recount.plier$Z[, 599], decreasing = TRUE), 50)
MPO ELANE AZU1 PRTN3 MS4A3 CPA3 RNASE2 CTSG CST7 GFI1 RNASE3 PRG2 SLC22A16 CALR HGF SRGN CYTL1 KIT
7.6287989 6.2953905 5.8638362 5.2984213 5.1141258 3.3066014 3.2086603 3.1774040 2.8926804 2.7491070 2.6836885 2.6471986 2.4480860 2.4046115 2.1717571 2.1278734 1.9043058 1.8300078
NTNG2 P4HB MEST P2RY2 BPI CITED2 EMILIN2 LYZ F13A1 SLC28A3 IGLL1 MYCN PRAM1 STAB1 CLC CCNA1 DEFA4 CFD
1.6251605 1.6066455 1.5609102 1.5177552 1.4498729 1.4286242 1.3158223 1.2705287 1.2486816 1.2135985 1.1392354 1.1310816 1.0429996 1.0369828 1.0200808 0.9918391 0.9905232 0.9404843
IRS2 CEACAM4 CAT PNP IRAK3 SLC1A3 SAP30 CLEC5A SLC22A15 SLC22A4 RAB5B TESC CEACAM8 HCST
0.9340689 0.9048998 0.8877785 0.8471992 0.8457302 0.8006944 0.7867686 0.7589677 0.7544687 0.7486729 0.7355585 0.7346467 0.7325653 0.7274270
Some of these are major or minor autoantigens in ANCA-associated vasculitis. This is consistent with the original publication (Cheadle, et al. A&R. 2010.): the authors found that “neutrophil granule constituent genes” were highly expressed in the PBMC fraction from patients with GPA that could not be attributed to contamination with mature granulocytes.
We’ll use the list of autoantigens in Cheadle, et al.
autoantigens <- c("MPO", "ELANE", "BPI", "CTSG", "LCN2", "AZU1", "PRTN3")
# get into appropriate data.frame for bar plot
top.z.df <- as.data.frame(sort(recount.plier$Z[, 599],
decreasing = TRUE)[1:50])
top.z.df <- tibble::rownames_to_column(top.z.df, var = "Gene")
colnames(top.z.df)[2] <- "Z"
# add in autoantigen information
top.z.df <- top.z.df %>%
dplyr::mutate(Autoantigen =
dplyr::case_when(
Gene %in% autoantigens ~ "Yes",
TRUE ~ "No"
))
# reorder for plotting
top.z.df$Gene <- factor(top.z.df$Gene,
levels = top.z.df$Gene[50:1])
p <- ggplot2::ggplot(top.z.df,
ggplot2::aes(x = Gene, y = Z, fill = Autoantigen)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::theme_bw() +
ggplot2::scale_fill_manual(values = c("black", "red")) +
ggplot2::coord_flip() +
ggplot2::ggtitle("recount2 LV 599 Loadings") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5,
face = "bold"))
p + ggplot2::theme(axis.text = ggplot2::element_text(size = 6))
plot.file <- file.path(plot.dir, "recount2_LV599_top_Z_bar.pdf")
ggplot2::ggsave(plot.file,
plot = p +
ggplot2::theme(text = ggplot2::element_text(size = 15)),
width = 6, height = 8)
Would we have detected this “autoantigen signature” using a PLIER model specifically trained on this GPA blood data?
# remove objects we won't need for this next set of analyses
rm(limma.df, recount.b.df, limma.file, plot.file, recount.b.file,
top.z.df)
exprs.file <- file.path("data", "expression_data",
"GSE18885_annotated_mean.pcl")
agg.ma.df <- readr::read_tsv(exprs.file)
Parsed with column specification:
cols(
.default = col_double(),
Gene = col_character()
)
See spec(...) for full column specifications.
# get expression matrix
exprs.mat <- as.matrix(dplyr::select(agg.ma.df, -Gene))
rownames(exprs.mat) <- agg.ma.df$Gene
gpa.plier <- PLIERNewData(exprs.mat)
Loading required package: PLIER
Loading required package: RColorBrewer
Loading required package: gplots
Attaching package: ‘gplots’
The following object is masked from ‘package:stats’:
lowess
Loading required package: pheatmap
Loading required package: glmnet
Loading required package: Matrix
Loading required package: foreach
Loaded glmnet 2.0-13
Loading required package: knitr
Loading required package: rsvd
Loading required package: qvalue
Removing 1 pathways with too few genesComputing SVD
Done
[1] 46.73248
[1] "L2 is set to 46.732480853389"
[1] "L1 is set to 23.3662404266945"
errorY (SVD based:best possible) = 0.1276
iter1 errorY= 0.9821, Bdiff= 8041, Bkappa=44.38
iter2 errorY= 0.4221, Bdiff= 0.733, Bkappa=66.01
iter3 errorY= 0.3073, Bdiff= 0.117, Bkappa=50.32
iter4 errorY= 0.3223, Bdiff= 0.03125, Bkappa=39.82
iter5 errorY= 0.3362, Bdiff= 0.01719, Bkappa=35.87
iter6 errorY= 0.3345, Bdiff= 0.0119, Bkappa=35.73
iter7 errorY= 0.3302, Bdiff= 0.008442, Bkappa=34.87
iter8 errorY= 0.3266, Bdiff= 0.006143, Bkappa=35.33
iter9 errorY= 0.3239, Bdiff= 0.004628, Bkappa=37.33
iter10 errorY= 0.3221, Bdiff= 0.003596, Bkappa=34.31
iter11 errorY= 0.3208, Bdiff= 0.002837, Bkappa=33.81
iter12 errorY= 0.32, Bdiff= 0.002287, Bkappa=35.92
iter13 errorY= 0.3196, Bdiff= 0.001885, Bkappa=37.48
iter14 errorY= 0.3194, Bdiff= 0.001611, Bkappa=37.42
iter15 errorY= 0.3194, Bdiff= 0.001431, Bkappa=35.62
iter16 errorY= 0.3195, Bdiff= 0.001309, Bkappa=44.48
iter17 errorY= 0.3195, Bdiff= 0.001214, Bkappa=44.1
iter18 errorY= 0.3195, Bdiff= 0.001136, Bkappa=38.53
iter19 errorY= 0.3195, Bdiff= 0.001065, Bkappa=35.94
Updating L3, current fraction= 0, target=0.7
L3 is set to 0.00586 in 9 iterations
iter20 errorY= 0.3168, prior information ratio= 0.01, Bdiff= 0.001051, Bkappa=35.96;pos. col. U=22
iter21 errorY= 0.3157, prior information ratio= 0.01, Bdiff= 0.001004, Bkappa=36.17;pos. col. U=22
iter22 errorY= 0.3151, prior information ratio= 0.01, Bdiff= 0.0009502, Bkappa=36.4;pos. col. U=21
iter23 errorY= 0.3146, prior information ratio= 0.01, Bdiff= 0.000896, Bkappa=36.59;pos. col. U=21
iter24 errorY= 0.3142, prior information ratio= 0.01, Bdiff= 0.0008584, Bkappa=31.44;pos. col. U=21
iter25 errorY= 0.3138, prior information ratio= 0.01, Bdiff= 0.000828, Bkappa=30.63;pos. col. U=21
iter26 errorY= 0.3134, prior information ratio= 0.02, Bdiff= 0.0007959, Bkappa=34.27;pos. col. U=21
iter27 errorY= 0.313, prior information ratio= 0.02, Bdiff= 0.0007541, Bkappa=34.52;pos. col. U=20
iter28 errorY= 0.3126, prior information ratio= 0.02, Bdiff= 0.0006987, Bkappa=36.15;pos. col. U=20
iter29 errorY= 0.3122, prior information ratio= 0.02, Bdiff= 0.0006339, Bkappa=34.4;pos. col. U=20
iter30 errorY= 0.3118, prior information ratio= 0.02, Bdiff= 0.0005679, Bkappa=34.83;pos. col. U=20
iter31 errorY= 0.3114, prior information ratio= 0.02, Bdiff= 0.000508, Bkappa=39.52;pos. col. U=20
iter32 errorY= 0.3112, prior information ratio= 0.02, Bdiff= 0.0004353, Bkappa=39.73;pos. col. U=20
iter33 errorY= 0.3109, prior information ratio= 0.02, Bdiff= 0.0003727, Bkappa=39.95;pos. col. U=20
iter34 errorY= 0.3106, prior information ratio= 0.02, Bdiff= 0.0003163, Bkappa=40.13;pos. col. U=20
iter35 errorY= 0.3105, prior information ratio= 0.02, Bdiff= 0.0002706, Bkappa=40.26;pos. col. U=20
iter36 errorY= 0.3103, prior information ratio= 0.02, Bdiff= 0.0002322, Bkappa=39.78;pos. col. U=21
iter37 errorY= 0.3102, prior information ratio= 0.02, Bdiff= 0.0002027, Bkappa=40.35;pos. col. U=21
iter38 errorY= 0.3101, prior information ratio= 0.02, Bdiff= 0.0001801, Bkappa=40.82;pos. col. U=21
iter39 errorY= 0.31, prior information ratio= 0.02, Bdiff= 0.0001604, Bkappa=41.19;pos. col. U=21
Updating L3, current fraction= 0.7, target=0.7
L3 not changed
iter40 errorY= 0.3099, prior information ratio= 0.02, Bdiff= 0.0001438, Bkappa=41.47;pos. col. U=21
iter41 errorY= 0.3099, prior information ratio= 0.02, Bdiff= 0.0001304, Bkappa=41.67;pos. col. U=21
iter42 errorY= 0.3098, prior information ratio= 0.02, Bdiff= 0.0001194, Bkappa=41.79;pos. col. U=21
iter43 errorY= 0.3098, prior information ratio= 0.02, Bdiff= 0.0001103, Bkappa=41.85;pos. col. U=22
iter44 errorY= 0.3098, prior information ratio= 0.02, Bdiff= 0.000103, Bkappa=41.86;pos. col. U=22
iter45 errorY= 0.3098, prior information ratio= 0.02, Bdiff= 9.801e-05, Bkappa=41.82;pos. col. U=23
iter46 errorY= 0.3098, prior information ratio= 0.02, Bdiff= 9.463e-05, Bkappa=41.73;pos. col. U=23
iter47 errorY= 0.3099, prior information ratio= 0.02, Bdiff= 9.25e-05, Bkappa=41.6;pos. col. U=23
iter48 errorY= 0.3099, prior information ratio= 0.02, Bdiff= 9.162e-05, Bkappa=41.43;pos. col. U=23
iter49 errorY= 0.3099, prior information ratio= 0.02, Bdiff= 9.179e-05, Bkappa=41.22;pos. col. U=23
iter50 errorY= 0.3099, prior information ratio= 0.02, Bdiff= 9.26e-05, Bkappa=40.98;pos. col. U=23
iter51 errorY= 0.31, prior information ratio= 0.02, Bdiff= 9.444e-05, Bkappa=39.24;pos. col. U=23
iter52 errorY= 0.31, prior information ratio= 0.02, Bdiff= 9.625e-05, Bkappa=38.58;pos. col. U=23
iter53 errorY= 0.3101, prior information ratio= 0.02, Bdiff= 0.0001004, Bkappa=38.22;pos. col. U=23
iter54 errorY= 0.3101, prior information ratio= 0.02, Bdiff= 0.0001028, Bkappa=37.86;pos. col. U=23
iter55 errorY= 0.3102, prior information ratio= 0.02, Bdiff= 0.0001046, Bkappa=37.52;pos. col. U=23
iter56 errorY= 0.3102, prior information ratio= 0.02, Bdiff= 0.0001044, Bkappa=37.21;pos. col. U=23
iter57 errorY= 0.3102, prior information ratio= 0.02, Bdiff= 0.0001018, Bkappa=38.14;pos. col. U=22
iter58 errorY= 0.3103, prior information ratio= 0.02, Bdiff= 9.819e-05, Bkappa=38.01;pos. col. U=22
iter59 errorY= 0.3103, prior information ratio= 0.02, Bdiff= 9.416e-05, Bkappa=37.92;pos. col. U=22
Updating L3, current fraction= 0.7333, target=0.7
L3 not changed
iter60 errorY= 0.3104, prior information ratio= 0.02, Bdiff= 8.968e-05, Bkappa=37.87;pos. col. U=22
iter61 errorY= 0.3105, prior information ratio= 0.02, Bdiff= 8.503e-05, Bkappa=37.84;pos. col. U=22
iter62 errorY= 0.3105, prior information ratio= 0.02, Bdiff= 8.244e-05, Bkappa=36.87;pos. col. U=22
iter63 errorY= 0.3106, prior information ratio= 0.02, Bdiff= 7.941e-05, Bkappa=36.85;pos. col. U=22
iter64 errorY= 0.3106, prior information ratio= 0.02, Bdiff= 7.61e-05, Bkappa=36.84;pos. col. U=22
iter65 errorY= 0.3107, prior information ratio= 0.02, Bdiff= 7.305e-05, Bkappa=36.84;pos. col. U=22
iter66 errorY= 0.3107, prior information ratio= 0.02, Bdiff= 7.039e-05, Bkappa=36.86;pos. col. U=22
iter67 errorY= 0.3108, prior information ratio= 0.02, Bdiff= 6.807e-05, Bkappa=39.25;pos. col. U=22
iter68 errorY= 0.3109, prior information ratio= 0.02, Bdiff= 6.615e-05, Bkappa=39.27;pos. col. U=22
iter69 errorY= 0.3109, prior information ratio= 0.02, Bdiff= 6.473e-05, Bkappa=39.28;pos. col. U=22
iter70 errorY= 0.311, prior information ratio= 0.02, Bdiff= 6.412e-05, Bkappa=40.31;pos. col. U=22
iter71 errorY= 0.311, prior information ratio= 0.02, Bdiff= 6.308e-05, Bkappa=40.45;pos. col. U=22
iter72 errorY= 0.3111, prior information ratio= 0.02, Bdiff= 6.223e-05, Bkappa=40.62;pos. col. U=22
iter73 errorY= 0.3112, prior information ratio= 0.02, Bdiff= 6.159e-05, Bkappa=40.79;pos. col. U=22
iter74 errorY= 0.3112, prior information ratio= 0.02, Bdiff= 6.098e-05, Bkappa=43.79;pos. col. U=22
iter75 errorY= 0.3113, prior information ratio= 0.02, Bdiff= 6.053e-05, Bkappa=43.91;pos. col. U=22
iter76 errorY= 0.3113, prior information ratio= 0.02, Bdiff= 6.015e-05, Bkappa=44.02;pos. col. U=22
iter77 errorY= 0.3114, prior information ratio= 0.02, Bdiff= 5.976e-05, Bkappa=44.12;pos. col. U=22
iter78 errorY= 0.3115, prior information ratio= 0.02, Bdiff= 5.927e-05, Bkappa=44.22;pos. col. U=22
iter79 errorY= 0.3115, prior information ratio= 0.02, Bdiff= 5.86e-05, Bkappa=44.32;pos. col. U=22
Updating L3, current fraction= 0.7333, target=0.7
L3 not changed
iter80 errorY= 0.3116, prior information ratio= 0.02, Bdiff= 5.87e-05, Bkappa=44.4;pos. col. U=21
iter81 errorY= 0.3116, prior information ratio= 0.02, Bdiff= 5.744e-05, Bkappa=44.49;pos. col. U=21
iter82 errorY= 0.3116, prior information ratio= 0.02, Bdiff= 5.614e-05, Bkappa=44.6;pos. col. U=21
iter83 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.506e-05, Bkappa=44.13;pos. col. U=21
iter84 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.399e-05, Bkappa=44.26;pos. col. U=21
iter85 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.288e-05, Bkappa=44.4;pos. col. U=21
iter86 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.182e-05, Bkappa=44.57;pos. col. U=21
iter87 errorY= 0.3118, prior information ratio= 0.02, Bdiff= 5.094e-05, Bkappa=44.76;pos. col. U=21
iter88 errorY= 0.3118, prior information ratio= 0.02, Bdiff= 5.051e-05, Bkappa=45.04;pos. col. U=21
iter89 errorY= 0.3118, prior information ratio= 0.02, Bdiff= 5.046e-05, Bkappa=30.03;pos. col. U=21
iter90 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.08e-05, Bkappa=30.44;pos. col. U=21
iter91 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.142e-05, Bkappa=30.86;pos. col. U=21
iter92 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.206e-05, Bkappa=31.29;pos. col. U=21
iter93 errorY= 0.3117, prior information ratio= 0.02, Bdiff= 5.275e-05, Bkappa=31.73;pos. col. U=21
iter94 errorY= 0.3116, prior information ratio= 0.02, Bdiff= 5.39e-05, Bkappa=32.18;pos. col. U=21
iter95 errorY= 0.3116, prior information ratio= 0.02, Bdiff= 5.55e-05, Bkappa=32.64;pos. col. U=21
iter96 errorY= 0.3116, prior information ratio= 0.02, Bdiff= 5.702e-05, Bkappa=33.11;pos. col. U=21
iter97 errorY= 0.3115, prior information ratio= 0.02, Bdiff= 5.854e-05, Bkappa=33.56;pos. col. U=21
iter98 errorY= 0.3114, prior information ratio= 0.02, Bdiff= 6.047e-05, Bkappa=33.98;pos. col. U=20
iter99 errorY= 0.3114, prior information ratio= 0.02, Bdiff= 6.167e-05, Bkappa=34.38;pos. col. U=20
Updating L3, current fraction= 0.6667, target=0.7
L3 not changed
iter100 errorY= 0.3113, prior information ratio= 0.02, Bdiff= 6.249e-05, Bkappa=34.76;pos. col. U=20
iter101 errorY= 0.3112, prior information ratio= 0.02, Bdiff= 6.307e-05, Bkappa=35.1;pos. col. U=20
iter102 errorY= 0.3111, prior information ratio= 0.02, Bdiff= 6.351e-05, Bkappa=35.37;pos. col. U=20
iter103 errorY= 0.311, prior information ratio= 0.02, Bdiff= 6.37e-05, Bkappa=35.64;pos. col. U=20
iter104 errorY= 0.3109, prior information ratio= 0.02, Bdiff= 6.362e-05, Bkappa=35.86;pos. col. U=20
iter105 errorY= 0.3108, prior information ratio= 0.02, Bdiff= 6.338e-05, Bkappa=36.05;pos. col. U=20
iter106 errorY= 0.3106, prior information ratio= 0.02, Bdiff= 6.316e-05, Bkappa=36.17;pos. col. U=20
iter107 errorY= 0.3105, prior information ratio= 0.02, Bdiff= 6.238e-05, Bkappa=36.3;pos. col. U=20
iter108 errorY= 0.3104, prior information ratio= 0.02, Bdiff= 6.162e-05, Bkappa=36.41;pos. col. U=20
iter109 errorY= 0.3103, prior information ratio= 0.02, Bdiff= 6.061e-05, Bkappa=36.47;pos. col. U=20
iter110 errorY= 0.3102, prior information ratio= 0.02, Bdiff= 5.965e-05, Bkappa=37.2;pos. col. U=20
iter111 errorY= 0.3101, prior information ratio= 0.02, Bdiff= 5.854e-05, Bkappa=37.11;pos. col. U=20
iter112 errorY= 0.3099, prior information ratio= 0.02, Bdiff= 5.774e-05, Bkappa=36.97;pos. col. U=20
iter113 errorY= 0.3098, prior information ratio= 0.02, Bdiff= 5.679e-05, Bkappa=36.77;pos. col. U=20
iter114 errorY= 0.3097, prior information ratio= 0.02, Bdiff= 5.579e-05, Bkappa=36.53;pos. col. U=20
iter115 errorY= 0.3096, prior information ratio= 0.02, Bdiff= 5.457e-05, Bkappa=35.16;pos. col. U=20
iter116 errorY= 0.3095, prior information ratio= 0.02, Bdiff= 5.336e-05, Bkappa=34.85;pos. col. U=20
iter117 errorY= 0.3094, prior information ratio= 0.02, Bdiff= 5.211e-05, Bkappa=34.5;pos. col. U=20
iter118 errorY= 0.3093, prior information ratio= 0.02, Bdiff= 5.093e-05, Bkappa=34.12;pos. col. U=20
iter119 errorY= 0.3092, prior information ratio= 0.02, Bdiff= 4.981e-05, Bkappa=33.62;pos. col. U=20
Updating L3, current fraction= 0.6667, target=0.7
L3 not changed
iter120 errorY= 0.3091, prior information ratio= 0.02, Bdiff= 4.967e-05, Bkappa=33.24;pos. col. U=20
iter121 errorY= 0.309, prior information ratio= 0.02, Bdiff= 4.853e-05, Bkappa=30.18;pos. col. U=20
iter122 errorY= 0.309, prior information ratio= 0.02, Bdiff= 4.726e-05, Bkappa=30.33;pos. col. U=20
iter123 errorY= 0.3089, prior information ratio= 0.02, Bdiff= 4.577e-05, Bkappa=30.49;pos. col. U=21
iter124 errorY= 0.3088, prior information ratio= 0.02, Bdiff= 4.5e-05, Bkappa=30.64;pos. col. U=21
iter125 errorY= 0.3087, prior information ratio= 0.02, Bdiff= 4.367e-05, Bkappa=29.16;pos. col. U=21
iter126 errorY= 0.3087, prior information ratio= 0.02, Bdiff= 4.244e-05, Bkappa=29.26;pos. col. U=21
iter127 errorY= 0.3086, prior information ratio= 0.02, Bdiff= 4.109e-05, Bkappa=29.37;pos. col. U=21
iter128 errorY= 0.3085, prior information ratio= 0.02, Bdiff= 3.965e-05, Bkappa=29.49;pos. col. U=21
iter129 errorY= 0.3085, prior information ratio= 0.02, Bdiff= 3.824e-05, Bkappa=29.61;pos. col. U=21
iter130 errorY= 0.3084, prior information ratio= 0.02, Bdiff= 3.709e-05, Bkappa=29.74;pos. col. U=21
iter131 errorY= 0.3084, prior information ratio= 0.02, Bdiff= 3.635e-05, Bkappa=29.87;pos. col. U=21
iter132 errorY= 0.3084, prior information ratio= 0.02, Bdiff= 3.551e-05, Bkappa=30;pos. col. U=21
iter133 errorY= 0.3083, prior information ratio= 0.02, Bdiff= 3.452e-05, Bkappa=37.73;pos. col. U=21
iter134 errorY= 0.3083, prior information ratio= 0.02, Bdiff= 3.348e-05, Bkappa=38.1;pos. col. U=21
iter135 errorY= 0.3083, prior information ratio= 0.02, Bdiff= 3.267e-05, Bkappa=38.49;pos. col. U=21
iter136 errorY= 0.3083, prior information ratio= 0.02, Bdiff= 3.202e-05, Bkappa=38.9;pos. col. U=21
iter137 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.146e-05, Bkappa=37.37;pos. col. U=21
iter138 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.107e-05, Bkappa=37.74;pos. col. U=21
iter139 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.091e-05, Bkappa=38.14;pos. col. U=21
Updating L3, current fraction= 0.7, target=0.7
L3 not changed
iter140 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.09e-05, Bkappa=38.56;pos. col. U=21
iter141 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.098e-05, Bkappa=38.98;pos. col. U=21
iter142 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.121e-05, Bkappa=39.41;pos. col. U=21
iter143 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.164e-05, Bkappa=39.86;pos. col. U=21
iter144 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.227e-05, Bkappa=40.31;pos. col. U=21
iter145 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.3e-05, Bkappa=40.78;pos. col. U=21
iter146 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.381e-05, Bkappa=37.58;pos. col. U=21
iter147 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.462e-05, Bkappa=37.73;pos. col. U=21
iter148 errorY= 0.3082, prior information ratio= 0.02, Bdiff= 3.523e-05, Bkappa=38.53;pos. col. U=21
iter149 errorY= 0.3083, prior information ratio= 0.02, Bdiff= 3.555e-05, Bkappa=38.7;pos. col. U=21
iter150 errorY= 0.3083, prior information ratio= 0.02, Bdiff= 3.557e-05, Bkappa=38.89;pos. col. U=21
iter151 errorY= 0.3083, prior information ratio= 0.02, Bdiff= 3.543e-05, Bkappa=39.07;pos. col. U=21
iter152 errorY= 0.3084, prior information ratio= 0.02, Bdiff= 3.502e-05, Bkappa=48.3;pos. col. U=21
iter153 errorY= 0.3084, prior information ratio= 0.02, Bdiff= 3.449e-05, Bkappa=48.77;pos. col. U=21
iter154 errorY= 0.3085, prior information ratio= 0.02, Bdiff= 3.384e-05, Bkappa=49.22;pos. col. U=21
iter155 errorY= 0.3085, prior information ratio= 0.02, Bdiff= 3.302e-05, Bkappa=49.65;pos. col. U=21
iter156 errorY= 0.3086, prior information ratio= 0.02, Bdiff= 3.203e-05, Bkappa=50.05;pos. col. U=21
iter157 errorY= 0.3086, prior information ratio= 0.02, Bdiff= 3.086e-05, Bkappa=50.44;pos. col. U=21
iter158 errorY= 0.3087, prior information ratio= 0.02, Bdiff= 3.019e-05, Bkappa=50.79;pos. col. U=21
iter159 errorY= 0.3088, prior information ratio= 0.02, Bdiff= 2.871e-05, Bkappa=51.1;pos. col. U=21
Updating L3, current fraction= 0.7, target=0.7
L3 not changed
iter160 errorY= 0.3088, prior information ratio= 0.02, Bdiff= 2.725e-05, Bkappa=51.36;pos. col. U=21
iter161 errorY= 0.3089, prior information ratio= 0.02, Bdiff= 2.587e-05, Bkappa=51.59;pos. col. U=22
iter162 errorY= 0.3089, prior information ratio= 0.02, Bdiff= 2.45e-05, Bkappa=51.79;pos. col. U=22
iter163 errorY= 0.309, prior information ratio= 0.02, Bdiff= 2.31e-05, Bkappa=51.95;pos. col. U=22
iter164 errorY= 0.3091, prior information ratio= 0.02, Bdiff= 2.177e-05, Bkappa=52.07;pos. col. U=22
iter165 errorY= 0.3091, prior information ratio= 0.02, Bdiff= 2.048e-05, Bkappa=52.17;pos. col. U=22
iter166 errorY= 0.3092, prior information ratio= 0.02, Bdiff= 1.924e-05, Bkappa=52.23;pos. col. U=22
iter167 errorY= 0.3092, prior information ratio= 0.02, Bdiff= 1.804e-05, Bkappa=52.25;pos. col. U=22
iter168 errorY= 0.3093, prior information ratio= 0.02, Bdiff= 1.688e-05, Bkappa=52.25;pos. col. U=22
iter169 errorY= 0.3093, prior information ratio= 0.02, Bdiff= 1.576e-05, Bkappa=52.23;pos. col. U=22
iter170 errorY= 0.3094, prior information ratio= 0.02, Bdiff= 1.468e-05, Bkappa=52.18;pos. col. U=22
iter171 errorY= 0.3094, prior information ratio= 0.02, Bdiff= 1.373e-05, Bkappa=52.12;pos. col. U=22
iter172 errorY= 0.3095, prior information ratio= 0.02, Bdiff= 1.275e-05, Bkappa=52.06;pos. col. U=22
iter173 errorY= 0.3095, prior information ratio= 0.02, Bdiff= 1.184e-05, Bkappa=51.96;pos. col. U=22
iter174 errorY= 0.3096, prior information ratio= 0.02, Bdiff= 1.099e-05, Bkappa=51.83;pos. col. U=22
iter175 errorY= 0.3096, prior information ratio= 0.02, Bdiff= 1.021e-05, Bkappa=51.67;pos. col. U=22
iter176 errorY= 0.3096, prior information ratio= 0.02, Bdiff= 9.472e-06, Bkappa=51.49;pos. col. U=22
converged at iteration 176
There are 16 LVs with AUC>0.70
plier.file <- file.path(results.dir, "GPA_blood_PLIER_model.RDS")
saveRDS(gpa.plier, plier.file)
U
matrixTaking a look at the U
(prior information coefficient) matrix will tell us what kind of pathways were identified with this model.
PLIER::plotU(gpa.plier, fontsize_row = 5, fontsize_col = 7)
pdf(file.path(plot.dir, "GPA_blood_U_plot.pdf"))
PLIER::plotU(gpa.plier, fontsize_row = 6, fontsize_col = 7)
dev.off()
null device
1
LV13
and LV20
look like the most likely candidates, but we’ll look across all 30 LVs.
GetTopNGenes <- function(z.vector, num.genes = 50){
names(head(sort(z.vector, decreasing = TRUE), num.genes))
}
top.genes <- apply(gpa.plier$Z, 2, GetTopNGenes)
ag.logical <- apply(top.genes, 2, function(x) any(x %in% autoantigens))
which(ag.logical)
[1] 8
head(sort(gpa.plier$Z[, 8], decreasing = TRUE), 50)
BPI MS4A3 LTF CEACAM8 CRISP3 MPO DEFA4 CTSG RNASE3 AZU1 TCN1 LCN2 MMP8 OLFM4 PCOLCE2 SLC2A5 OLR1 ABCA13
0.9707064 0.9598092 0.9155214 0.8608877 0.8515797 0.8488347 0.8485207 0.8324136 0.8207816 0.7919270 0.7917710 0.7556873 0.7221172 0.7180812 0.7051191 0.6897629 0.6747644 0.6720423
COL17A1 CAMP CD24 CLEC5A SLC27A2 RAB13 MYB CHIT1 FZD9 IQGAP3 ATP8B4 PRTN3 ANXA3 UGCG ITGA9 CEACAM1 CPA3 TFF3
0.6650740 0.6411719 0.6063798 0.5667845 0.5408962 0.5173488 0.5107821 0.4932357 0.4925844 0.4906760 0.4532180 0.4408022 0.4116064 0.4034772 0.4016386 0.3982147 0.3774761 0.3647995
CCNA1 SLPI S100A9 SLC28A3 RNASE2 ANLN MLF1IP BUB1B GATA2 POLE SCD POLE2 GPR84 ARG1
0.3644886 0.3601955 0.3562953 0.3509590 0.3489931 0.3414786 0.3402832 0.3372137 0.3315916 0.3287814 0.3194240 0.3157867 0.3126786 0.3098263
# get into appropriate data.frame for bar plot
top.z.df <- as.data.frame(sort(gpa.plier$Z[, 8],
decreasing = TRUE)[1:50])
top.z.df <- tibble::rownames_to_column(top.z.df, var = "Gene")
colnames(top.z.df)[2] <- "Z"
# add in autoantigen information
top.z.df <- top.z.df %>%
dplyr::mutate(Autoantigen =
dplyr::case_when(
Gene %in% autoantigens ~ "Yes",
TRUE ~ "No"
))
# reorder for plotting
top.z.df$Gene <- factor(top.z.df$Gene,
levels = top.z.df$Gene[50:1])
p <- ggplot2::ggplot(top.z.df,
ggplot2::aes(x = Gene, y = Z, fill = Autoantigen)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::theme_bw() +
ggplot2::scale_fill_manual(values = c("black", "red")) +
ggplot2::coord_flip() +
ggplot2::ggtitle("GPA Blood LV 8 Loadings") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, face = "bold"))
p + ggplot2::theme(axis.text = ggplot2::element_text(size = 6))
plot.file <- file.path(plot.dir, "GPA_blood_LV8_top_Z_bar.pdf")
ggplot2::ggsave(plot.file,
plot = p +
ggplot2::theme(text = ggplot2::element_text(size = 15)),
width = 6, height = 8)
GPA blood LV 8
may be most similar to another recount2 LV like recount2 LV 68
, which is also differentially expressed in this dataset. Let’s take a quick look at the top 50 genes for recount2 LV 68
.
head(sort(recount.plier$Z[, 68], decreasing = TRUE), 50)
CEACAM8 BPI DEFA4 CAMP MMP8 PGLYRP1 MS4A3 LTF RNASE3 PADI4 CTSG CRISP3 ABCA13 AZU1 ELANE RNASE2 FCAR PRG2 S100A9 RGL4
7.887587 6.853320 6.817395 6.184177 5.051100 4.875414 4.569362 4.197822 4.136600 3.748623 3.221135 3.205522 2.930908 2.852379 2.545959 2.397721 2.305586 2.226229 2.222850 2.214754
S100A8 CA1 SRGN TRIM10 ITGAM CST7 PLBD1 ARG1 CLC SLCO4C1 LYZ CEACAM1 BST1 ANXA3 SLC22A16 LCN2 PRTN3 PRAM1 EPB42 GP5
2.042006 1.985721 1.982392 1.942998 1.941000 1.921686 1.920328 1.843082 1.765571 1.685429 1.650977 1.572818 1.548932 1.534389 1.515435 1.509318 1.497583 1.344475 1.295891 1.280706
OLFM4 CHIT1 CYP4F3 KEL SLC22A15 SLC2A5 CLTCL1 NLRC4 FCN1 GFI1
1.231628 1.148422 1.141577 1.133186 1.129168 1.057860 1.040496 1.035317 1.011179 0.994169
By eye, this looks more similar to GPA blood LV 8
, but let’s do a more formal analysis.
# get Z matrices from both models
recount.z <- as.data.frame(recount.plier$Z)
gpa.z <- as.data.frame(gpa.plier$Z)
# we'll need to add the gene identifiers (symbols in this case) to a column
# rather than as rownames -- this will facilitate joining
recount.z <- tibble::rownames_to_column(recount.z, var = "Gene")
colnames(recount.z)[2:ncol(recount.z)] <- paste0("recountLV",
1:(ncol(recount.z) - 1))
gpa.z <- tibble::rownames_to_column(gpa.z, var = "Gene")
colnames(gpa.z)[2:ncol(gpa.z)] <- paste0("gpaLV", 1:(ncol(gpa.z) - 1))
# join -- only genes present in both models
z.df <- dplyr::inner_join(recount.z, gpa.z, by = "Gene")
# need matrix to calculate correlation
z.matrix <- as.matrix(z.df[, 2:ncol(z.df)])
rownames(z.matrix) <- z.df$Gene
# calculate pearson correlation between LVs -- can we map between models using
# this distance metric?
cor.z.mat <- cor(z.matrix, method = "pearson")
# set diagonal to 0 to help find max correlation between LVs
diag(cor.z.mat) <- 0
# indices for each model
gpa.indx <- grep("gpa", rownames(cor.z.mat))
recount.indx <- grep("recount", rownames(cor.z.mat))
# pertinent indices
impt.cor.z.mat <- cor.z.mat[recount.indx, gpa.indx]
# for each GPA blood model LV, want the highest correlated LV from recount
mapping.df <- reshape2::melt(impt.cor.z.mat,
varnames = c("recount_LV", "GPA_blood_LV"),
value.name = "pearson_Z") %>%
dplyr::group_by(GPA_blood_LV) %>%
dplyr::top_n(1, pearson_Z)
mapping.df
It looks like GPA blood LV 8
does in fact map to recount2 LV 68
as determined by taking the Pearson correlation between loadings.
We’ll select the top mapping LVs from the recount2 model as well as recount2 LV 599
from impt.cor.z.mat
for plotting as a heatmap.
plot.index <- which(rownames(impt.cor.z.mat) %in%
c(as.character(mapping.df$recount_LV), "recountLV599"))
heatmap.mat <- impt.cor.z.mat[plot.index, ]
pheatmap::pheatmap(heatmap.mat, main = "Z matrix mapping")
plot.file <- file.path(plot.dir, "selected_cor_z_mat_heatmap.pdf")
pdf(plot.file)
pheatmap::pheatmap(heatmap.mat, main = "Z matrix mapping")
dev.off()
null device
1
Despite the lack of AAV expression data in the training compendium, the recount2 PLIER model learns a latent variable highly relevant to AAV. Specifically, recount2 LV 599
captures a granulocyte progenitor signature. The top genes (as determined by the loadings, Z
) are major (PRTN3, MPO) and minor (e.g., AZU1, BPI) autoantigens in ANCA-associated vasculitis.
We find an LV in the model specifically trained on the GPA blood dataset that also captures the activity autoantigens (GPA blood LV 8
). However, the major autoantigens are not as highly ranked and this maps to another LV in the recount2 model, recount2 LV 68
. This suggests that the recount2 model (e.g., the multiPLIER approach) can identify additional, highly relevant LVs.