J. Taroni 2018

In 15-evaluate_subsampling and 16-repeat_sle_wb_PLIER.R, we calculated some measures of PLIER model performance (e.g., sparsity of U). We’ll plot the results for those two sets of training data in this notebook.

Functions and directory setup

`%>%` <- dplyr::`%>%`
# plot and result directory setup for this notebook
plot.dir <- file.path("plots", "17")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "17")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)

Read in data

Read in the three data.frame from each set of experiments: sparsity, pathway coverage, and number of LVs.

SLE WB repeats

sle.sparsity <- readr::read_tsv(file.path("results", "16",
                                          "sle-wb_repeated_sparsity.tsv")) %>%
                  dplyr::mutate(training_set = "SLE")
Parsed with column specification:
cols(
  model = col_character(),
  sparsity_type = col_character(),
  value = col_double()
)
sle.num.lvs <- readr::read_tsv(file.path("results", "16",
                                          "sle-wb_repeated_num_lvs.tsv")) %>%
                  dplyr::mutate(training_set = "SLE")
Parsed with column specification:
cols(
  model = col_character(),
  num_lvs = col_integer()
)
sle.pathway <- readr::read_tsv(file.path("results", "16",
                                          "sle-wb_repeated_pathway.tsv")) %>%
                  dplyr::mutate(training_set = "SLE")
Parsed with column specification:
cols(
  model = col_character(),
  pathway_coverage_type = col_character(),
  value = col_double()
)

Subsampled recount2

recount.sparsity <- readr::read_tsv(file.path("results", "15",
                                          "subsampled_sparsity.tsv")) %>%
                  dplyr::mutate(training_set = "recount2 subsampled")
Parsed with column specification:
cols(
  model = col_character(),
  sparsity_type = col_character(),
  value = col_double()
)
recount.num.lvs <- readr::read_tsv(file.path("results", "15",
                                          "subsampled_num_lvs.tsv")) %>%
                  dplyr::mutate(training_set = "recount2 subsampled")
Parsed with column specification:
cols(
  model = col_character(),
  num_lvs = col_integer()
)
recount.pathway <- readr::read_tsv(file.path("results", "15",
                                          "subsampled_pathway.tsv")) %>%
                  dplyr::mutate(training_set = "recount2 subsampled")
Parsed with column specification:
cols(
  model = col_character(),
  pathway_coverage_type = col_character(),
  value = col_double()
)

Bind two experiments

num.lvs.df <- dplyr::bind_rows(sle.num.lvs, recount.num.lvs)
pathway.df <- dplyr::bind_rows(sle.pathway, recount.pathway)
sparsity.df <- dplyr::bind_rows(sle.sparsity, recount.sparsity)
rm(sle.num.lvs, sle.pathway, sle.sparsity, recount.num.lvs, recount.pathway,
     recount.sparsity)

Write number of LVs data.frame to file

# number of latent variables
num.lvs.file <- file.path(results.dir, "number_of_LVs.tsv")
readr::write_tsv(num.lvs.df, num.lvs.file)

Plotting

Number of latent variables

num.lvs.df %>%
  ggplot2::ggplot(ggplot2::aes(x = training_set, y = num_lvs,
                               group = training_set)) +
  ggplot2::geom_boxplot() +
  ggplot2::geom_point(position = ggplot2::position_jitter(0.2),
                      alpha = 0.5) +
  ggplot2::theme_bw() +
  ggplot2::labs(x = "training set", 
                y = "number of latent variables",
                title = "PLIER model n = 1640")

plot.file <- file.path(plot.dir, "number_of_lvs.pdf")
ggplot2::ggsave(plot.file, plot = ggplot2::last_plot())
Saving 7 x 7 in image

Pathway coverage

pathway.df <- pathway.df %>%
  dplyr::filter(pathway_coverage_type != "sig.pathway.by.lv") %>%
  dplyr::mutate(pathway_coverage_type = 
                  dplyr::case_when(
                    (pathway_coverage_type == "lv") ~ 
                      "LV associated with pathways",
                    (pathway_coverage_type =="pathway") ~ "pathway coverage"
                  ))
# pathway coverage
pathway.file <- file.path(results.dir, "pathway_coverage.tsv")
readr::write_tsv(pathway.df, pathway.file)
pathway.df %>%
  ggplot2::ggplot(ggplot2::aes(x = training_set, y = value)) +
  ggplot2::geom_boxplot() +
  ggplot2::geom_point(position = ggplot2::position_jitter(0.2),
                      alpha = 0.5) +
  ggplot2::facet_grid(~ pathway_coverage_type) +
  ggplot2::theme_bw() +
  ggplot2::labs(x = "training set", 
                y = "proportion",
                title = "PLIER model n = 1640")

plot.file <- file.path(plot.dir, "pathway_coverage.pdf")
ggplot2::ggsave(plot.file, plot = ggplot2::last_plot())
Saving 7 x 7 in image

U sparsity

sparsity.df %>%
  dplyr::mutate(sparsity_type = 
                dplyr::case_when(
                  (sparsity_type == "all.sparsity") ~ "All",
                  (sparsity_type == "sig.sparsity") ~ "Significant association only"
                )) %>%
  ggplot2::ggplot(ggplot2::aes(x = training_set, y = value)) +
  ggplot2::geom_violin() +
  ggplot2::stat_summary(fun.y = median, geom = "point", shape = 18,
                        size = 4) +
  ggplot2::facet_grid(~ sparsity_type) +
  ggplot2::theme_bw() +
  ggplot2::labs(x = "training set", 
                y = "proportion of positive entries in U",
                title = "PLIER model n = 1640")

plot.file <- file.path(plot.dir, "u_sparsity.pdf")
ggplot2::ggsave(plot.file, plot = ggplot2::last_plot())
Saving 7 x 7 in image
LS0tCnRpdGxlOiAiUGxvdHRpbmcgZm9yIHRoZSByZXBlYXRlZCBTTEUgV0IgYW5kIHN1YnNhbXBsZWQgcmVjb3VudDIgbW9kZWwgZXZhbHVhdGlvbnMiCm91dHB1dDogICAKICBodG1sX25vdGVib29rOiAKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCi0tLQoKKipKLiBUYXJvbmkgMjAxOCoqCgpJbiBgMTUtZXZhbHVhdGVfc3Vic2FtcGxpbmdgIGFuZCBgMTYtcmVwZWF0X3NsZV93Yl9QTElFUi5SYCwgd2UgY2FsY3VsYXRlZCBzb21lCm1lYXN1cmVzIG9mIFBMSUVSIG1vZGVsIHBlcmZvcm1hbmNlIChlLmcuLCBzcGFyc2l0eSBvZiBgVWApLiAKV2UnbGwgcGxvdCB0aGUgcmVzdWx0cyBmb3IgdGhvc2UgdHdvIHNldHMgb2YgdHJhaW5pbmcgZGF0YSBpbiB0aGlzIG5vdGVib29rLgoKIyMgRnVuY3Rpb25zIGFuZCBkaXJlY3Rvcnkgc2V0dXAKCmBgYHtyfQpgJT4lYCA8LSBkcGx5cjo6YCU+JWAKYGBgCgpgYGB7cn0KIyBwbG90IGFuZCByZXN1bHQgZGlyZWN0b3J5IHNldHVwIGZvciB0aGlzIG5vdGVib29rCnBsb3QuZGlyIDwtIGZpbGUucGF0aCgicGxvdHMiLCAiMTciKQpkaXIuY3JlYXRlKHBsb3QuZGlyLCByZWN1cnNpdmUgPSBUUlVFLCBzaG93V2FybmluZ3MgPSBGQUxTRSkKcmVzdWx0cy5kaXIgPC0gZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE3IikKZGlyLmNyZWF0ZShyZXN1bHRzLmRpciwgcmVjdXJzaXZlID0gVFJVRSwgc2hvd1dhcm5pbmdzID0gRkFMU0UpCmBgYAoKIyMgUmVhZCBpbiBkYXRhCgpSZWFkIGluIHRoZSB0aHJlZSBgZGF0YS5mcmFtZWAgZnJvbSBlYWNoIHNldCBvZiBleHBlcmltZW50czogc3BhcnNpdHksIApwYXRod2F5IGNvdmVyYWdlLCBhbmQgbnVtYmVyIG9mIExWcy4KCiMjIyBTTEUgV0IgcmVwZWF0cwoKYGBge3J9CnNsZS5zcGFyc2l0eSA8LSByZWFkcjo6cmVhZF90c3YoZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE2IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInNsZS13Yl9yZXBlYXRlZF9zcGFyc2l0eS50c3YiKSkgJT4lCiAgICAgICAgICAgICAgICAgIGRwbHlyOjptdXRhdGUodHJhaW5pbmdfc2V0ID0gIlNMRSIpCnNsZS5udW0ubHZzIDwtIHJlYWRyOjpyZWFkX3RzdihmaWxlLnBhdGgoInJlc3VsdHMiLCAiMTYiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAic2xlLXdiX3JlcGVhdGVkX251bV9sdnMudHN2IikpICU+JQogICAgICAgICAgICAgICAgICBkcGx5cjo6bXV0YXRlKHRyYWluaW5nX3NldCA9ICJTTEUiKQpzbGUucGF0aHdheSA8LSByZWFkcjo6cmVhZF90c3YoZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE2IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInNsZS13Yl9yZXBlYXRlZF9wYXRod2F5LnRzdiIpKSAlPiUKICAgICAgICAgICAgICAgICAgZHBseXI6Om11dGF0ZSh0cmFpbmluZ19zZXQgPSAiU0xFIikKYGBgCgojIyMgU3Vic2FtcGxlZCByZWNvdW50MgoKYGBge3J9CnJlY291bnQuc3BhcnNpdHkgPC0gcmVhZHI6OnJlYWRfdHN2KGZpbGUucGF0aCgicmVzdWx0cyIsICIxNSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJzdWJzYW1wbGVkX3NwYXJzaXR5LnRzdiIpKSAlPiUKICAgICAgICAgICAgICAgICAgZHBseXI6Om11dGF0ZSh0cmFpbmluZ19zZXQgPSAicmVjb3VudDIgc3Vic2FtcGxlZCIpCnJlY291bnQubnVtLmx2cyA8LSByZWFkcjo6cmVhZF90c3YoZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE1IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInN1YnNhbXBsZWRfbnVtX2x2cy50c3YiKSkgJT4lCiAgICAgICAgICAgICAgICAgIGRwbHlyOjptdXRhdGUodHJhaW5pbmdfc2V0ID0gInJlY291bnQyIHN1YnNhbXBsZWQiKQpyZWNvdW50LnBhdGh3YXkgPC0gcmVhZHI6OnJlYWRfdHN2KGZpbGUucGF0aCgicmVzdWx0cyIsICIxNSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJzdWJzYW1wbGVkX3BhdGh3YXkudHN2IikpICU+JQogICAgICAgICAgICAgICAgICBkcGx5cjo6bXV0YXRlKHRyYWluaW5nX3NldCA9ICJyZWNvdW50MiBzdWJzYW1wbGVkIikKYGBgCgojIyMgQmluZCB0d28gZXhwZXJpbWVudHMKCmBgYHtyfQpudW0ubHZzLmRmIDwtIGRwbHlyOjpiaW5kX3Jvd3Moc2xlLm51bS5sdnMsIHJlY291bnQubnVtLmx2cykKcGF0aHdheS5kZiA8LSBkcGx5cjo6YmluZF9yb3dzKHNsZS5wYXRod2F5LCByZWNvdW50LnBhdGh3YXkpCnNwYXJzaXR5LmRmIDwtIGRwbHlyOjpiaW5kX3Jvd3Moc2xlLnNwYXJzaXR5LCByZWNvdW50LnNwYXJzaXR5KQpybShzbGUubnVtLmx2cywgc2xlLnBhdGh3YXksIHNsZS5zcGFyc2l0eSwgcmVjb3VudC5udW0ubHZzLCByZWNvdW50LnBhdGh3YXksCiAgICAgcmVjb3VudC5zcGFyc2l0eSkKYGBgCgpXcml0ZSBudW1iZXIgb2YgTFZzIGBkYXRhLmZyYW1lYCB0byBmaWxlCgpgYGB7cn0KIyBudW1iZXIgb2YgbGF0ZW50IHZhcmlhYmxlcwpudW0ubHZzLmZpbGUgPC0gZmlsZS5wYXRoKHJlc3VsdHMuZGlyLCAibnVtYmVyX29mX0xWcy50c3YiKQpyZWFkcjo6d3JpdGVfdHN2KG51bS5sdnMuZGYsIG51bS5sdnMuZmlsZSkKYGBgCgojIyBQbG90dGluZwoKIyMjIyBOdW1iZXIgb2YgbGF0ZW50IHZhcmlhYmxlcwoKYGBge3J9Cm51bS5sdnMuZGYgJT4lCiAgZ2dwbG90Mjo6Z2dwbG90KGdncGxvdDI6OmFlcyh4ID0gdHJhaW5pbmdfc2V0LCB5ID0gbnVtX2x2cywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdyb3VwID0gdHJhaW5pbmdfc2V0KSkgKwogIGdncGxvdDI6Omdlb21fYm94cGxvdCgpICsKICBnZ3Bsb3QyOjpnZW9tX3BvaW50KHBvc2l0aW9uID0gZ2dwbG90Mjo6cG9zaXRpb25faml0dGVyKDAuMiksCiAgICAgICAgICAgICAgICAgICAgICBhbHBoYSA9IDAuNSkgKwogIGdncGxvdDI6OnRoZW1lX2J3KCkgKwogIGdncGxvdDI6OmxhYnMoeCA9ICJ0cmFpbmluZyBzZXQiLCAKICAgICAgICAgICAgICAgIHkgPSAibnVtYmVyIG9mIGxhdGVudCB2YXJpYWJsZXMiLAogICAgICAgICAgICAgICAgdGl0bGUgPSAiUExJRVIgbW9kZWwgbiA9IDE2NDAiKQpgYGAKCmBgYHtyfQpwbG90LmZpbGUgPC0gZmlsZS5wYXRoKHBsb3QuZGlyLCAibnVtYmVyX29mX2x2cy5wZGYiKQpnZ3Bsb3QyOjpnZ3NhdmUocGxvdC5maWxlLCBwbG90ID0gZ2dwbG90Mjo6bGFzdF9wbG90KCkpCmBgYAoKIyMjIyBQYXRod2F5IGNvdmVyYWdlCgpgYGB7cn0KcGF0aHdheS5kZiA8LSBwYXRod2F5LmRmICU+JQogIGRwbHlyOjpmaWx0ZXIocGF0aHdheV9jb3ZlcmFnZV90eXBlICE9ICJzaWcucGF0aHdheS5ieS5sdiIpICU+JQogIGRwbHlyOjptdXRhdGUocGF0aHdheV9jb3ZlcmFnZV90eXBlID0gCiAgICAgICAgICAgICAgICAgIGRwbHlyOjpjYXNlX3doZW4oCiAgICAgICAgICAgICAgICAgICAgKHBhdGh3YXlfY292ZXJhZ2VfdHlwZSA9PSAibHYiKSB+IAogICAgICAgICAgICAgICAgICAgICAgIkxWIGFzc29jaWF0ZWQgd2l0aCBwYXRod2F5cyIsCiAgICAgICAgICAgICAgICAgICAgKHBhdGh3YXlfY292ZXJhZ2VfdHlwZSA9PSJwYXRod2F5IikgfiAicGF0aHdheSBjb3ZlcmFnZSIKICAgICAgICAgICAgICAgICAgKSkKCiMgcGF0aHdheSBjb3ZlcmFnZQpwYXRod2F5LmZpbGUgPC0gZmlsZS5wYXRoKHJlc3VsdHMuZGlyLCAicGF0aHdheV9jb3ZlcmFnZS50c3YiKQpyZWFkcjo6d3JpdGVfdHN2KHBhdGh3YXkuZGYsIHBhdGh3YXkuZmlsZSkKCnBhdGh3YXkuZGYgJT4lCiAgZ2dwbG90Mjo6Z2dwbG90KGdncGxvdDI6OmFlcyh4ID0gdHJhaW5pbmdfc2V0LCB5ID0gdmFsdWUpKSArCiAgZ2dwbG90Mjo6Z2VvbV9ib3hwbG90KCkgKwogIGdncGxvdDI6Omdlb21fcG9pbnQocG9zaXRpb24gPSBnZ3Bsb3QyOjpwb3NpdGlvbl9qaXR0ZXIoMC4yKSwKICAgICAgICAgICAgICAgICAgICAgIGFscGhhID0gMC41KSArCiAgZ2dwbG90Mjo6ZmFjZXRfZ3JpZCh+IHBhdGh3YXlfY292ZXJhZ2VfdHlwZSkgKwogIGdncGxvdDI6OnRoZW1lX2J3KCkgKwogIGdncGxvdDI6OmxhYnMoeCA9ICJ0cmFpbmluZyBzZXQiLCAKICAgICAgICAgICAgICAgIHkgPSAicHJvcG9ydGlvbiIsCiAgICAgICAgICAgICAgICB0aXRsZSA9ICJQTElFUiBtb2RlbCBuID0gMTY0MCIpCmBgYAoKYGBge3J9CnBsb3QuZmlsZSA8LSBmaWxlLnBhdGgocGxvdC5kaXIsICJwYXRod2F5X2NvdmVyYWdlLnBkZiIpCmdncGxvdDI6Omdnc2F2ZShwbG90LmZpbGUsIHBsb3QgPSBnZ3Bsb3QyOjpsYXN0X3Bsb3QoKSkKYGBgCgojIyMjIGBVYCBzcGFyc2l0eQoKYGBge3J9CnNwYXJzaXR5LmRmICU+JQogIGRwbHlyOjptdXRhdGUoc3BhcnNpdHlfdHlwZSA9IAogICAgICAgICAgICAgICAgZHBseXI6OmNhc2Vfd2hlbigKICAgICAgICAgICAgICAgICAgKHNwYXJzaXR5X3R5cGUgPT0gImFsbC5zcGFyc2l0eSIpIH4gIkFsbCIsCiAgICAgICAgICAgICAgICAgIChzcGFyc2l0eV90eXBlID09ICJzaWcuc3BhcnNpdHkiKSB+ICJTaWduaWZpY2FudCBhc3NvY2lhdGlvbiBvbmx5IgogICAgICAgICAgICAgICAgKSkgJT4lCiAgZ2dwbG90Mjo6Z2dwbG90KGdncGxvdDI6OmFlcyh4ID0gdHJhaW5pbmdfc2V0LCB5ID0gdmFsdWUpKSArCiAgZ2dwbG90Mjo6Z2VvbV92aW9saW4oKSArCiAgZ2dwbG90Mjo6c3RhdF9zdW1tYXJ5KGZ1bi55ID0gbWVkaWFuLCBnZW9tID0gInBvaW50Iiwgc2hhcGUgPSAxOCwKICAgICAgICAgICAgICAgICAgICAgICAgc2l6ZSA9IDQpICsKICBnZ3Bsb3QyOjpmYWNldF9ncmlkKH4gc3BhcnNpdHlfdHlwZSkgKwogIGdncGxvdDI6OnRoZW1lX2J3KCkgKwogIGdncGxvdDI6OmxhYnMoeCA9ICJ0cmFpbmluZyBzZXQiLCAKICAgICAgICAgICAgICAgIHkgPSAicHJvcG9ydGlvbiBvZiBwb3NpdGl2ZSBlbnRyaWVzIGluIFUiLAogICAgICAgICAgICAgICAgdGl0bGUgPSAiUExJRVIgbW9kZWwgbiA9IDE2NDAiKQpgYGAKCgpgYGB7cn0KcGxvdC5maWxlIDwtIGZpbGUucGF0aChwbG90LmRpciwgInVfc3BhcnNpdHkucGRmIikKZ2dwbG90Mjo6Z2dzYXZlKHBsb3QuZmlsZSwgcGxvdCA9IGdncGxvdDI6Omxhc3RfcGxvdCgpKQpgYGA=