Functions and directory setup
`%>%` <- dplyr::`%>%`
# plot and result directory setup for this notebook
plot.dir <- file.path("plots", "17")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "17")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)
Read in data
Read in the three data.frame
from each set of experiments: sparsity, pathway coverage, and number of LVs.
SLE WB repeats
sle.sparsity <- readr::read_tsv(file.path("results", "16",
"sle-wb_repeated_sparsity.tsv")) %>%
dplyr::mutate(training_set = "SLE")
Parsed with column specification:
cols(
model = col_character(),
sparsity_type = col_character(),
value = col_double()
)
sle.num.lvs <- readr::read_tsv(file.path("results", "16",
"sle-wb_repeated_num_lvs.tsv")) %>%
dplyr::mutate(training_set = "SLE")
Parsed with column specification:
cols(
model = col_character(),
num_lvs = col_integer()
)
sle.pathway <- readr::read_tsv(file.path("results", "16",
"sle-wb_repeated_pathway.tsv")) %>%
dplyr::mutate(training_set = "SLE")
Parsed with column specification:
cols(
model = col_character(),
pathway_coverage_type = col_character(),
value = col_double()
)
Subsampled recount2
recount.sparsity <- readr::read_tsv(file.path("results", "15",
"subsampled_sparsity.tsv")) %>%
dplyr::mutate(training_set = "recount2 subsampled")
Parsed with column specification:
cols(
model = col_character(),
sparsity_type = col_character(),
value = col_double()
)
recount.num.lvs <- readr::read_tsv(file.path("results", "15",
"subsampled_num_lvs.tsv")) %>%
dplyr::mutate(training_set = "recount2 subsampled")
Parsed with column specification:
cols(
model = col_character(),
num_lvs = col_integer()
)
recount.pathway <- readr::read_tsv(file.path("results", "15",
"subsampled_pathway.tsv")) %>%
dplyr::mutate(training_set = "recount2 subsampled")
Parsed with column specification:
cols(
model = col_character(),
pathway_coverage_type = col_character(),
value = col_double()
)
Bind two experiments
num.lvs.df <- dplyr::bind_rows(sle.num.lvs, recount.num.lvs)
pathway.df <- dplyr::bind_rows(sle.pathway, recount.pathway)
sparsity.df <- dplyr::bind_rows(sle.sparsity, recount.sparsity)
rm(sle.num.lvs, sle.pathway, sle.sparsity, recount.num.lvs, recount.pathway,
recount.sparsity)
Write number of LVs data.frame
to file
# number of latent variables
num.lvs.file <- file.path(results.dir, "number_of_LVs.tsv")
readr::write_tsv(num.lvs.df, num.lvs.file)
Plotting
Number of latent variables
num.lvs.df %>%
ggplot2::ggplot(ggplot2::aes(x = training_set, y = num_lvs,
group = training_set)) +
ggplot2::geom_boxplot() +
ggplot2::geom_point(position = ggplot2::position_jitter(0.2),
alpha = 0.5) +
ggplot2::theme_bw() +
ggplot2::labs(x = "training set",
y = "number of latent variables",
title = "PLIER model n = 1640")

plot.file <- file.path(plot.dir, "number_of_lvs.pdf")
ggplot2::ggsave(plot.file, plot = ggplot2::last_plot())
Saving 7 x 7 in image
Pathway coverage
pathway.df <- pathway.df %>%
dplyr::filter(pathway_coverage_type != "sig.pathway.by.lv") %>%
dplyr::mutate(pathway_coverage_type =
dplyr::case_when(
(pathway_coverage_type == "lv") ~
"LV associated with pathways",
(pathway_coverage_type =="pathway") ~ "pathway coverage"
))
# pathway coverage
pathway.file <- file.path(results.dir, "pathway_coverage.tsv")
readr::write_tsv(pathway.df, pathway.file)
pathway.df %>%
ggplot2::ggplot(ggplot2::aes(x = training_set, y = value)) +
ggplot2::geom_boxplot() +
ggplot2::geom_point(position = ggplot2::position_jitter(0.2),
alpha = 0.5) +
ggplot2::facet_grid(~ pathway_coverage_type) +
ggplot2::theme_bw() +
ggplot2::labs(x = "training set",
y = "proportion",
title = "PLIER model n = 1640")

plot.file <- file.path(plot.dir, "pathway_coverage.pdf")
ggplot2::ggsave(plot.file, plot = ggplot2::last_plot())
Saving 7 x 7 in image
U
sparsity
sparsity.df %>%
dplyr::mutate(sparsity_type =
dplyr::case_when(
(sparsity_type == "all.sparsity") ~ "All",
(sparsity_type == "sig.sparsity") ~ "Significant association only"
)) %>%
ggplot2::ggplot(ggplot2::aes(x = training_set, y = value)) +
ggplot2::geom_violin() +
ggplot2::stat_summary(fun.y = median, geom = "point", shape = 18,
size = 4) +
ggplot2::facet_grid(~ sparsity_type) +
ggplot2::theme_bw() +
ggplot2::labs(x = "training set",
y = "proportion of positive entries in U",
title = "PLIER model n = 1640")

plot.file <- file.path(plot.dir, "u_sparsity.pdf")
ggplot2::ggsave(plot.file, plot = ggplot2::last_plot())
Saving 7 x 7 in image
LS0tCnRpdGxlOiAiUGxvdHRpbmcgZm9yIHRoZSByZXBlYXRlZCBTTEUgV0IgYW5kIHN1YnNhbXBsZWQgcmVjb3VudDIgbW9kZWwgZXZhbHVhdGlvbnMiCm91dHB1dDogICAKICBodG1sX25vdGVib29rOiAKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCi0tLQoKKipKLiBUYXJvbmkgMjAxOCoqCgpJbiBgMTUtZXZhbHVhdGVfc3Vic2FtcGxpbmdgIGFuZCBgMTYtcmVwZWF0X3NsZV93Yl9QTElFUi5SYCwgd2UgY2FsY3VsYXRlZCBzb21lCm1lYXN1cmVzIG9mIFBMSUVSIG1vZGVsIHBlcmZvcm1hbmNlIChlLmcuLCBzcGFyc2l0eSBvZiBgVWApLiAKV2UnbGwgcGxvdCB0aGUgcmVzdWx0cyBmb3IgdGhvc2UgdHdvIHNldHMgb2YgdHJhaW5pbmcgZGF0YSBpbiB0aGlzIG5vdGVib29rLgoKIyMgRnVuY3Rpb25zIGFuZCBkaXJlY3Rvcnkgc2V0dXAKCmBgYHtyfQpgJT4lYCA8LSBkcGx5cjo6YCU+JWAKYGBgCgpgYGB7cn0KIyBwbG90IGFuZCByZXN1bHQgZGlyZWN0b3J5IHNldHVwIGZvciB0aGlzIG5vdGVib29rCnBsb3QuZGlyIDwtIGZpbGUucGF0aCgicGxvdHMiLCAiMTciKQpkaXIuY3JlYXRlKHBsb3QuZGlyLCByZWN1cnNpdmUgPSBUUlVFLCBzaG93V2FybmluZ3MgPSBGQUxTRSkKcmVzdWx0cy5kaXIgPC0gZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE3IikKZGlyLmNyZWF0ZShyZXN1bHRzLmRpciwgcmVjdXJzaXZlID0gVFJVRSwgc2hvd1dhcm5pbmdzID0gRkFMU0UpCmBgYAoKIyMgUmVhZCBpbiBkYXRhCgpSZWFkIGluIHRoZSB0aHJlZSBgZGF0YS5mcmFtZWAgZnJvbSBlYWNoIHNldCBvZiBleHBlcmltZW50czogc3BhcnNpdHksIApwYXRod2F5IGNvdmVyYWdlLCBhbmQgbnVtYmVyIG9mIExWcy4KCiMjIyBTTEUgV0IgcmVwZWF0cwoKYGBge3J9CnNsZS5zcGFyc2l0eSA8LSByZWFkcjo6cmVhZF90c3YoZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE2IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInNsZS13Yl9yZXBlYXRlZF9zcGFyc2l0eS50c3YiKSkgJT4lCiAgICAgICAgICAgICAgICAgIGRwbHlyOjptdXRhdGUodHJhaW5pbmdfc2V0ID0gIlNMRSIpCnNsZS5udW0ubHZzIDwtIHJlYWRyOjpyZWFkX3RzdihmaWxlLnBhdGgoInJlc3VsdHMiLCAiMTYiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAic2xlLXdiX3JlcGVhdGVkX251bV9sdnMudHN2IikpICU+JQogICAgICAgICAgICAgICAgICBkcGx5cjo6bXV0YXRlKHRyYWluaW5nX3NldCA9ICJTTEUiKQpzbGUucGF0aHdheSA8LSByZWFkcjo6cmVhZF90c3YoZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE2IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInNsZS13Yl9yZXBlYXRlZF9wYXRod2F5LnRzdiIpKSAlPiUKICAgICAgICAgICAgICAgICAgZHBseXI6Om11dGF0ZSh0cmFpbmluZ19zZXQgPSAiU0xFIikKYGBgCgojIyMgU3Vic2FtcGxlZCByZWNvdW50MgoKYGBge3J9CnJlY291bnQuc3BhcnNpdHkgPC0gcmVhZHI6OnJlYWRfdHN2KGZpbGUucGF0aCgicmVzdWx0cyIsICIxNSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJzdWJzYW1wbGVkX3NwYXJzaXR5LnRzdiIpKSAlPiUKICAgICAgICAgICAgICAgICAgZHBseXI6Om11dGF0ZSh0cmFpbmluZ19zZXQgPSAicmVjb3VudDIgc3Vic2FtcGxlZCIpCnJlY291bnQubnVtLmx2cyA8LSByZWFkcjo6cmVhZF90c3YoZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE1IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInN1YnNhbXBsZWRfbnVtX2x2cy50c3YiKSkgJT4lCiAgICAgICAgICAgICAgICAgIGRwbHlyOjptdXRhdGUodHJhaW5pbmdfc2V0ID0gInJlY291bnQyIHN1YnNhbXBsZWQiKQpyZWNvdW50LnBhdGh3YXkgPC0gcmVhZHI6OnJlYWRfdHN2KGZpbGUucGF0aCgicmVzdWx0cyIsICIxNSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJzdWJzYW1wbGVkX3BhdGh3YXkudHN2IikpICU+JQogICAgICAgICAgICAgICAgICBkcGx5cjo6bXV0YXRlKHRyYWluaW5nX3NldCA9ICJyZWNvdW50MiBzdWJzYW1wbGVkIikKYGBgCgojIyMgQmluZCB0d28gZXhwZXJpbWVudHMKCmBgYHtyfQpudW0ubHZzLmRmIDwtIGRwbHlyOjpiaW5kX3Jvd3Moc2xlLm51bS5sdnMsIHJlY291bnQubnVtLmx2cykKcGF0aHdheS5kZiA8LSBkcGx5cjo6YmluZF9yb3dzKHNsZS5wYXRod2F5LCByZWNvdW50LnBhdGh3YXkpCnNwYXJzaXR5LmRmIDwtIGRwbHlyOjpiaW5kX3Jvd3Moc2xlLnNwYXJzaXR5LCByZWNvdW50LnNwYXJzaXR5KQpybShzbGUubnVtLmx2cywgc2xlLnBhdGh3YXksIHNsZS5zcGFyc2l0eSwgcmVjb3VudC5udW0ubHZzLCByZWNvdW50LnBhdGh3YXksCiAgICAgcmVjb3VudC5zcGFyc2l0eSkKYGBgCgpXcml0ZSBudW1iZXIgb2YgTFZzIGBkYXRhLmZyYW1lYCB0byBmaWxlCgpgYGB7cn0KIyBudW1iZXIgb2YgbGF0ZW50IHZhcmlhYmxlcwpudW0ubHZzLmZpbGUgPC0gZmlsZS5wYXRoKHJlc3VsdHMuZGlyLCAibnVtYmVyX29mX0xWcy50c3YiKQpyZWFkcjo6d3JpdGVfdHN2KG51bS5sdnMuZGYsIG51bS5sdnMuZmlsZSkKYGBgCgojIyBQbG90dGluZwoKIyMjIyBOdW1iZXIgb2YgbGF0ZW50IHZhcmlhYmxlcwoKYGBge3J9Cm51bS5sdnMuZGYgJT4lCiAgZ2dwbG90Mjo6Z2dwbG90KGdncGxvdDI6OmFlcyh4ID0gdHJhaW5pbmdfc2V0LCB5ID0gbnVtX2x2cywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdyb3VwID0gdHJhaW5pbmdfc2V0KSkgKwogIGdncGxvdDI6Omdlb21fYm94cGxvdCgpICsKICBnZ3Bsb3QyOjpnZW9tX3BvaW50KHBvc2l0aW9uID0gZ2dwbG90Mjo6cG9zaXRpb25faml0dGVyKDAuMiksCiAgICAgICAgICAgICAgICAgICAgICBhbHBoYSA9IDAuNSkgKwogIGdncGxvdDI6OnRoZW1lX2J3KCkgKwogIGdncGxvdDI6OmxhYnMoeCA9ICJ0cmFpbmluZyBzZXQiLCAKICAgICAgICAgICAgICAgIHkgPSAibnVtYmVyIG9mIGxhdGVudCB2YXJpYWJsZXMiLAogICAgICAgICAgICAgICAgdGl0bGUgPSAiUExJRVIgbW9kZWwgbiA9IDE2NDAiKQpgYGAKCmBgYHtyfQpwbG90LmZpbGUgPC0gZmlsZS5wYXRoKHBsb3QuZGlyLCAibnVtYmVyX29mX2x2cy5wZGYiKQpnZ3Bsb3QyOjpnZ3NhdmUocGxvdC5maWxlLCBwbG90ID0gZ2dwbG90Mjo6bGFzdF9wbG90KCkpCmBgYAoKIyMjIyBQYXRod2F5IGNvdmVyYWdlCgpgYGB7cn0KcGF0aHdheS5kZiA8LSBwYXRod2F5LmRmICU+JQogIGRwbHlyOjpmaWx0ZXIocGF0aHdheV9jb3ZlcmFnZV90eXBlICE9ICJzaWcucGF0aHdheS5ieS5sdiIpICU+JQogIGRwbHlyOjptdXRhdGUocGF0aHdheV9jb3ZlcmFnZV90eXBlID0gCiAgICAgICAgICAgICAgICAgIGRwbHlyOjpjYXNlX3doZW4oCiAgICAgICAgICAgICAgICAgICAgKHBhdGh3YXlfY292ZXJhZ2VfdHlwZSA9PSAibHYiKSB+IAogICAgICAgICAgICAgICAgICAgICAgIkxWIGFzc29jaWF0ZWQgd2l0aCBwYXRod2F5cyIsCiAgICAgICAgICAgICAgICAgICAgKHBhdGh3YXlfY292ZXJhZ2VfdHlwZSA9PSJwYXRod2F5IikgfiAicGF0aHdheSBjb3ZlcmFnZSIKICAgICAgICAgICAgICAgICAgKSkKCiMgcGF0aHdheSBjb3ZlcmFnZQpwYXRod2F5LmZpbGUgPC0gZmlsZS5wYXRoKHJlc3VsdHMuZGlyLCAicGF0aHdheV9jb3ZlcmFnZS50c3YiKQpyZWFkcjo6d3JpdGVfdHN2KHBhdGh3YXkuZGYsIHBhdGh3YXkuZmlsZSkKCnBhdGh3YXkuZGYgJT4lCiAgZ2dwbG90Mjo6Z2dwbG90KGdncGxvdDI6OmFlcyh4ID0gdHJhaW5pbmdfc2V0LCB5ID0gdmFsdWUpKSArCiAgZ2dwbG90Mjo6Z2VvbV9ib3hwbG90KCkgKwogIGdncGxvdDI6Omdlb21fcG9pbnQocG9zaXRpb24gPSBnZ3Bsb3QyOjpwb3NpdGlvbl9qaXR0ZXIoMC4yKSwKICAgICAgICAgICAgICAgICAgICAgIGFscGhhID0gMC41KSArCiAgZ2dwbG90Mjo6ZmFjZXRfZ3JpZCh+IHBhdGh3YXlfY292ZXJhZ2VfdHlwZSkgKwogIGdncGxvdDI6OnRoZW1lX2J3KCkgKwogIGdncGxvdDI6OmxhYnMoeCA9ICJ0cmFpbmluZyBzZXQiLCAKICAgICAgICAgICAgICAgIHkgPSAicHJvcG9ydGlvbiIsCiAgICAgICAgICAgICAgICB0aXRsZSA9ICJQTElFUiBtb2RlbCBuID0gMTY0MCIpCmBgYAoKYGBge3J9CnBsb3QuZmlsZSA8LSBmaWxlLnBhdGgocGxvdC5kaXIsICJwYXRod2F5X2NvdmVyYWdlLnBkZiIpCmdncGxvdDI6Omdnc2F2ZShwbG90LmZpbGUsIHBsb3QgPSBnZ3Bsb3QyOjpsYXN0X3Bsb3QoKSkKYGBgCgojIyMjIGBVYCBzcGFyc2l0eQoKYGBge3J9CnNwYXJzaXR5LmRmICU+JQogIGRwbHlyOjptdXRhdGUoc3BhcnNpdHlfdHlwZSA9IAogICAgICAgICAgICAgICAgZHBseXI6OmNhc2Vfd2hlbigKICAgICAgICAgICAgICAgICAgKHNwYXJzaXR5X3R5cGUgPT0gImFsbC5zcGFyc2l0eSIpIH4gIkFsbCIsCiAgICAgICAgICAgICAgICAgIChzcGFyc2l0eV90eXBlID09ICJzaWcuc3BhcnNpdHkiKSB+ICJTaWduaWZpY2FudCBhc3NvY2lhdGlvbiBvbmx5IgogICAgICAgICAgICAgICAgKSkgJT4lCiAgZ2dwbG90Mjo6Z2dwbG90KGdncGxvdDI6OmFlcyh4ID0gdHJhaW5pbmdfc2V0LCB5ID0gdmFsdWUpKSArCiAgZ2dwbG90Mjo6Z2VvbV92aW9saW4oKSArCiAgZ2dwbG90Mjo6c3RhdF9zdW1tYXJ5KGZ1bi55ID0gbWVkaWFuLCBnZW9tID0gInBvaW50Iiwgc2hhcGUgPSAxOCwKICAgICAgICAgICAgICAgICAgICAgICAgc2l6ZSA9IDQpICsKICBnZ3Bsb3QyOjpmYWNldF9ncmlkKH4gc3BhcnNpdHlfdHlwZSkgKwogIGdncGxvdDI6OnRoZW1lX2J3KCkgKwogIGdncGxvdDI6OmxhYnMoeCA9ICJ0cmFpbmluZyBzZXQiLCAKICAgICAgICAgICAgICAgIHkgPSAicHJvcG9ydGlvbiBvZiBwb3NpdGl2ZSBlbnRyaWVzIGluIFUiLAogICAgICAgICAgICAgICAgdGl0bGUgPSAiUExJRVIgbW9kZWwgbiA9IDE2NDAiKQpgYGAKCgpgYGB7cn0KcGxvdC5maWxlIDwtIGZpbGUucGF0aChwbG90LmRpciwgInVfc3BhcnNpdHkucGRmIikKZ2dwbG90Mjo6Z2dzYXZlKHBsb3QuZmlsZSwgcGxvdCA9IGdncGxvdDI6Omxhc3RfcGxvdCgpKQpgYGA=