J. Taroni 2018

In 11-subsample_recount_PLIER.R, we subsampled the recount2 dataset such that it contained the same number of samples as the SLE WB compendium (n = 1640) ten times. We trained a PLIER model on each of the ten randomly selected datasets.

Here, we’ll evaluate the ten models in the following ways:

Functions and directory set up

`%>%` <- dplyr::`%>%`
source(file.path("util", "plier_util.R"))
# plot and result directory setup for this notebook
plot.dir <- file.path("plots", "15")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "15")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)

Main evaluation

# directory where the models RDS were saved
subsampled.dir <- file.path("results", "11")
# list files in the directory -- we'll us lapply to generate a list of list
plier.files <- list.files(subsampled.dir, full.names = TRUE)
# read in models -- each of the files contains a list where PLIER corresponds
# to the PLIER model
model.list <- lapply(plier.files, function(x) readRDS(x)$PLIER)
names(model.list) <- sub(".RDS", "", sub(".*\\/", "", plier.files))
# evaluate models with wrapper function 
eval.list <- lapply(model.list, EvalWrapper)
# reshape list to data.frame for wrangling
eval.df <- reshape2::melt(eval.list)
colnames(eval.df) <- c("value", "pathway_coverage_type", "metric", "model")
# U sparsity -- we'll keep all and significant only in the same data.frame
sparsity.df <- eval.df %>%
                dplyr::filter(metric %in% c("all.sparsity", "sig.sparsity")) %>%
                dplyr::mutate(sparsity_type = metric) %>%
                dplyr::select(c(model, sparsity_type, value))
# number of lvs
num.lvs.df <- eval.df %>%
                dplyr::filter(metric == "num.lvs") %>%
                dplyr::mutate(num_lvs = value) %>%
                dplyr::select(c(model, num_lvs))
# pathway coverage
pathway.df <- eval.df %>%
                dplyr::filter(metric == "pathway.coverage") %>% 
                dplyr::select(c(model, pathway_coverage_type, value))
# write to file
sparsity.file <- file.path(results.dir, "subsampled_sparsity.tsv")
readr::write_tsv(sparsity.df, sparsity.file)
num.file <- file.path(results.dir, "subsampled_num_lvs.tsv")
readr::write_tsv(num.lvs.df, num.file)
pathway.file <- file.path(results.dir, "subsampled_pathway.tsv")
readr::write_tsv(pathway.df, pathway.file)
LS0tCnRpdGxlOiAiRXZhbHVhdGluZyB0aGUgcmVjb3VudDIgc3Vic2FtcGxlZCBtb2RlbHMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCioqSi4gVGFyb25pIDIwMTgqKgoKSW4gYDExLXN1YnNhbXBsZV9yZWNvdW50X1BMSUVSLlJgLCB3ZSBzdWJzYW1wbGVkIHRoZSByZWNvdW50MiBkYXRhc2V0IHN1Y2ggdGhhdAppdCBjb250YWluZWQgdGhlIHNhbWUgbnVtYmVyIG9mIHNhbXBsZXMgYXMgdGhlIFNMRSBXQiBjb21wZW5kaXVtIChgbiA9IDE2NDBgKSAKdGVuIHRpbWVzLiAKV2UgdHJhaW5lZCBhIFBMSUVSIG1vZGVsIG9uIGVhY2ggb2YgdGhlIHRlbiByYW5kb21seSBzZWxlY3RlZCBkYXRhc2V0cy4gCgpIZXJlLCB3ZSdsbCBldmFsdWF0ZSB0aGUgdGVuIG1vZGVscyBpbiB0aGUgZm9sbG93aW5nIHdheXM6CgoqIFNwYXJzaXR5IG9mIGBVYCAocHJpb3IgaW5mb3JtYXRpb24gY29lZmZpY2llbnQgbWF0cml4OyBwcm94eSBmb3IgImVhc2UgCm9mIGludGVycHJldGF0aW9uIikKKiBOdW1iZXIgb2YgbGF0ZW50IHZhcmlhYmxlcwoqIFBhdGh3YXkgY292ZXJhZ2UgKGUuZy4sIHdoYXQgcGVyY2VudGFnZSBvZiBwYXRod2F5cyBhcmUgYXNzb2NpYXRlZCB3aXRoIGFuIApMViwgaG93IG1hbnkgTFZzIGhhdmUgYSBwYXRod2F5IHNpZ25pZmljYW50bHkgYXNzb2NpYXRlZCB3aXRoIHRoZW0pCgojIyBGdW5jdGlvbnMgYW5kIGRpcmVjdG9yeSBzZXQgdXAKCmBgYHtyfQpgJT4lYCA8LSBkcGx5cjo6YCU+JWAKc291cmNlKGZpbGUucGF0aCgidXRpbCIsICJwbGllcl91dGlsLlIiKSkKYGBgCgpgYGB7cn0KIyBwbG90IGFuZCByZXN1bHQgZGlyZWN0b3J5IHNldHVwIGZvciB0aGlzIG5vdGVib29rCnBsb3QuZGlyIDwtIGZpbGUucGF0aCgicGxvdHMiLCAiMTUiKQpkaXIuY3JlYXRlKHBsb3QuZGlyLCByZWN1cnNpdmUgPSBUUlVFLCBzaG93V2FybmluZ3MgPSBGQUxTRSkKcmVzdWx0cy5kaXIgPC0gZmlsZS5wYXRoKCJyZXN1bHRzIiwgIjE1IikKZGlyLmNyZWF0ZShyZXN1bHRzLmRpciwgcmVjdXJzaXZlID0gVFJVRSwgc2hvd1dhcm5pbmdzID0gRkFMU0UpCmBgYAoKIyMgTWFpbiBldmFsdWF0aW9uCgpgYGB7cn0KIyBkaXJlY3Rvcnkgd2hlcmUgdGhlIG1vZGVscyBSRFMgd2VyZSBzYXZlZApzdWJzYW1wbGVkLmRpciA8LSBmaWxlLnBhdGgoInJlc3VsdHMiLCAiMTEiKQoKIyBsaXN0IGZpbGVzIGluIHRoZSBkaXJlY3RvcnkgLS0gd2UnbGwgdXMgbGFwcGx5IHRvIGdlbmVyYXRlIGEgbGlzdCBvZiBsaXN0CnBsaWVyLmZpbGVzIDwtIGxpc3QuZmlsZXMoc3Vic2FtcGxlZC5kaXIsIGZ1bGwubmFtZXMgPSBUUlVFKQpgYGAKCmBgYHtyfQojIHJlYWQgaW4gbW9kZWxzIC0tIGVhY2ggb2YgdGhlIGZpbGVzIGNvbnRhaW5zIGEgbGlzdCB3aGVyZSBQTElFUiBjb3JyZXNwb25kcwojIHRvIHRoZSBQTElFUiBtb2RlbAptb2RlbC5saXN0IDwtIGxhcHBseShwbGllci5maWxlcywgZnVuY3Rpb24oeCkgcmVhZFJEUyh4KSRQTElFUikKbmFtZXMobW9kZWwubGlzdCkgPC0gc3ViKCIuUkRTIiwgIiIsIHN1YigiLipcXC8iLCAiIiwgcGxpZXIuZmlsZXMpKQojIGV2YWx1YXRlIG1vZGVscyB3aXRoIHdyYXBwZXIgZnVuY3Rpb24gCmV2YWwubGlzdCA8LSBsYXBwbHkobW9kZWwubGlzdCwgRXZhbFdyYXBwZXIpCmBgYAoKYGBge3J9CiMgcmVzaGFwZSBsaXN0IHRvIGRhdGEuZnJhbWUgZm9yIHdyYW5nbGluZwpldmFsLmRmIDwtIHJlc2hhcGUyOjptZWx0KGV2YWwubGlzdCkKY29sbmFtZXMoZXZhbC5kZikgPC0gYygidmFsdWUiLCAicGF0aHdheV9jb3ZlcmFnZV90eXBlIiwgIm1ldHJpYyIsICJtb2RlbCIpCgojIFUgc3BhcnNpdHkgLS0gd2UnbGwga2VlcCBhbGwgYW5kIHNpZ25pZmljYW50IG9ubHkgaW4gdGhlIHNhbWUgZGF0YS5mcmFtZQpzcGFyc2l0eS5kZiA8LSBldmFsLmRmICU+JQogICAgICAgICAgICAgICAgZHBseXI6OmZpbHRlcihtZXRyaWMgJWluJSBjKCJhbGwuc3BhcnNpdHkiLCAic2lnLnNwYXJzaXR5IikpICU+JQogICAgICAgICAgICAgICAgZHBseXI6Om11dGF0ZShzcGFyc2l0eV90eXBlID0gbWV0cmljKSAlPiUKICAgICAgICAgICAgICAgIGRwbHlyOjpzZWxlY3QoYyhtb2RlbCwgc3BhcnNpdHlfdHlwZSwgdmFsdWUpKQoKIyBudW1iZXIgb2YgbHZzCm51bS5sdnMuZGYgPC0gZXZhbC5kZiAlPiUKICAgICAgICAgICAgICAgIGRwbHlyOjpmaWx0ZXIobWV0cmljID09ICJudW0ubHZzIikgJT4lCiAgICAgICAgICAgICAgICBkcGx5cjo6bXV0YXRlKG51bV9sdnMgPSB2YWx1ZSkgJT4lCiAgICAgICAgICAgICAgICBkcGx5cjo6c2VsZWN0KGMobW9kZWwsIG51bV9sdnMpKQoKIyBwYXRod2F5IGNvdmVyYWdlCnBhdGh3YXkuZGYgPC0gZXZhbC5kZiAlPiUKICAgICAgICAgICAgICAgIGRwbHlyOjpmaWx0ZXIobWV0cmljID09ICJwYXRod2F5LmNvdmVyYWdlIikgJT4lIAogICAgICAgICAgICAgICAgZHBseXI6OnNlbGVjdChjKG1vZGVsLCBwYXRod2F5X2NvdmVyYWdlX3R5cGUsIHZhbHVlKSkKYGBgCgpgYGB7cn0KIyB3cml0ZSB0byBmaWxlCnNwYXJzaXR5LmZpbGUgPC0gZmlsZS5wYXRoKHJlc3VsdHMuZGlyLCAic3Vic2FtcGxlZF9zcGFyc2l0eS50c3YiKQpyZWFkcjo6d3JpdGVfdHN2KHNwYXJzaXR5LmRmLCBzcGFyc2l0eS5maWxlKQoKbnVtLmZpbGUgPC0gZmlsZS5wYXRoKHJlc3VsdHMuZGlyLCAic3Vic2FtcGxlZF9udW1fbHZzLnRzdiIpCnJlYWRyOjp3cml0ZV90c3YobnVtLmx2cy5kZiwgbnVtLmZpbGUpCgpwYXRod2F5LmZpbGUgPC0gZmlsZS5wYXRoKHJlc3VsdHMuZGlyLCAic3Vic2FtcGxlZF9wYXRod2F5LnRzdiIpCnJlYWRyOjp3cml0ZV90c3YocGF0aHdheS5kZiwgcGF0aHdheS5maWxlKQpgYGAKCg==