J. Taroni 2018

Here, we’re testing for latent variable differential expression between medulloblastoma subgroups (Group 3, Group 4, and SHH).

As in 37-medulloblastoma_recount2_model, where we prepped the data we are analyzing here, this is using Northcott, et al. and Robinson, et al. data.

Set up

# pipe is required for LVTestWrapper
`%>%` <- dplyr::`%>%`

We have several custom functions that we’ve written and previously used in our ANCA-associated vasculitis analyses. They are general enough that we can use them again here.

source(file.path("util", "test_LV_differences.R"))

Directories

# directories specific to this notebook
plot.dir <- file.path("plots", "38")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "38")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)

Read in files

We need two things to do this analysis: a B matrix (latent variable values) and a data.frame that contains the sample labels for the groups that we’d like to test for differences between.

GSE37382 (Northcott, et al.)

northcott.b.file <- file.path("results", "37", "GSE37382_recount2_B.RDS")
northcott.sample.file <- file.path("data", "sample_info", 
                                   "GSE37382_cleaned_metadata.tsv")

Read in B (MultiPLIER)

northcott.b.matrix <- readRDS(northcott.b.file)

Read in the data.frame with the subgroup information and check that the sample names match between these two files.

northcott.sample.df <- readr::read_tsv(northcott.sample.file)
Parsed with column specification:
cols(
  source_name = col_character(),
  sample_identifier = col_character(),
  tissue_type = col_character(),
  age = col_character(),
  histology = col_character(),
  sex = col_character(),
  subgroup = col_character()
)
all(northcott.sample.df$source_name %in% colnames(northcott.b.matrix))
[1] TRUE

LVTestWrapper requires that the sample names/identifiers are in a column named Sample, so we’ll change source_name to Sample.

colnames(northcott.sample.df)[1] <- "Sample"

GSE37418 (Robinson, et al.)

We’ll repeat that process for the Robinson, et al. data

robinson.b.file <- file.path("results", "37", "GSE37418_recount2_B.RDS")
robinson.sample.file <- file.path("data", "sample_info", 
                                  "metadata_GSE37418.tsv")
robinson.b.matrix <- readRDS(robinson.b.file)
robinson.sample.df <- readr::read_tsv(robinson.sample.file)
Parsed with column specification:
cols(
  .default = col_character(),
  channel_count = col_integer(),
  contact_phone = col_double(),
  `contact_zip/postal_code` = col_integer(),
  data_row_count = col_integer(),
  taxid_ch1 = col_integer()
)
See spec(...) for full column specifications.

The subgroup information in this data set is a bit different, let’s look at the counts

robinson.sample.df %>% 
  dplyr::group_by(subgroup) %>%
  dplyr::tally()

To do differential expression analysis, we’ll remove the SHH OUTLIER and U samples

sample.index <- which(robinson.sample.df$subgroup %in% c("U",
                                                         "SHH OUTLIER"))
samples.to.remove <- robinson.sample.df$refinebio_accession_code[sample.index]
samples.to.remove
[1] "GSM918628" "GSM918618" "GSM918644"
remove.column.index <- which(colnames(robinson.b.matrix) %in% samples.to.remove)

Do the filtering

robinson.b.matrix <- robinson.b.matrix[, -remove.column.index]
robinson.sample.df <- robinson.sample.df %>%
  dplyr::filter(!(refinebio_accession_code %in% samples.to.remove)) %>%
  dplyr::select(c("refinebio_accession_code", "subgroup", "Sex", "age",
                  "title"))
all(robinson.sample.df$refinebio_accession_code %in% 
      colnames(robinson.b.matrix))
[1] TRUE
colnames(robinson.sample.df)[1] <- "Sample"

Test for differential expression

LVTestWrapper gives us 3 things: 1) differential expression results from limma, 2) boxplot + jitter plots of the latent variable expression and 3) the B matrix in “long” format – this is what is used for plotting.

We’ll use "BH" correction for multiple hypotheses testing (the default).

northcott.results <- 
  LVTestWrapper(b.matrix = northcott.b.matrix,
                sample.info.df = northcott.sample.df,
                phenotype.col = "subgroup",
                # the boxplot output, the "long" format B data.frame (useful for 
                # plotting), and the limma differential expression results
                # will be output in files that begin with this string
                file.lead = "GSE37382_subgroup_recount2_model",
                plot.dir = plot.dir,
                results.dir = results.dir)
Column `Sample` joining factor and character vector, coercing into character vector
robinson.results <- 
  LVTestWrapper(b.matrix = robinson.b.matrix,
                sample.info.df = robinson.sample.df,
                phenotype.col = "subgroup",
                file.lead = "GSE37418_subgroup_recount2_model",
                plot.dir = plot.dir,
                results.dir = results.dir)
Column `Sample` joining factor and character vector, coercing into character vector

Any overlap at all?

northcott.delvs <- 
  northcott.results$limma$LV[which(northcott.results$limma$adj.P.Val < 0.05)]
robinson.delvs <- 
  robinson.results$limma$LV[which(robinson.results$limma$adj.P.Val < 0.05)]
VennDiagram::venn.diagram(list(Robinson = robinson.delvs,
                               Northcott = northcott.delvs),
                          file.path(plot.dir, "Medulloblastoma_DELV_Venn.tiff"))
[1] 1
LS0tCnRpdGxlOiAiSWRlbnRpZnlpbmcgZGlmZmVyZW50aWFsbHkgZXhwcmVzc2VkIGxhdGVudCB2YXJpYWJsZXMgaW4gCm1lZHVsbG9ibGFzdG9tYSIKb3V0cHV0OiAgIAogIGh0bWxfbm90ZWJvb2s6IAogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKLS0tCgoqKkouIFRhcm9uaSAyMDE4KioKCkhlcmUsIHdlJ3JlIHRlc3RpbmcgZm9yIGxhdGVudCB2YXJpYWJsZSBkaWZmZXJlbnRpYWwgZXhwcmVzc2lvbiBiZXR3ZWVuCm1lZHVsbG9ibGFzdG9tYSBzdWJncm91cHMgKEdyb3VwIDMsIEdyb3VwIDQsIGFuZCBTSEgpLgoKQXMgaW4gYDM3LW1lZHVsbG9ibGFzdG9tYV9yZWNvdW50Ml9tb2RlbGAsIHdoZXJlIHdlIHByZXBwZWQgdGhlIGRhdGEgd2UgYXJlCmFuYWx5emluZyBoZXJlLCB0aGlzIGlzIHVzaW5nIApbTm9ydGhjb3R0LCBldCBhbC5dKGh0dHBzOi8vZHguZG9pLm9yZy8xMC4xMDM4L25hdHVyZTExMzI3KSAgYW5kCltSb2JpbnNvbiwgZXQgYWwuXShodHRwczovL2R4LmRvaS5vcmcvMTAuMTAzOC9uYXR1cmUxMTIxMykgZGF0YS4KCiMjIFNldCB1cAoKYGBge3J9CiMgcGlwZSBpcyByZXF1aXJlZCBmb3IgTFZUZXN0V3JhcHBlcgpgJT4lYCA8LSBkcGx5cjo6YCU+JWAKYGBgCgpXZSBoYXZlIHNldmVyYWwgY3VzdG9tIGZ1bmN0aW9ucyB0aGF0IHdlJ3ZlIHdyaXR0ZW4gYW5kIHByZXZpb3VzbHkgdXNlZCBpbiBvdXIgCkFOQ0EtYXNzb2NpYXRlZCB2YXNjdWxpdGlzIGFuYWx5c2VzLgpUaGV5IGFyZSBnZW5lcmFsIGVub3VnaCB0aGF0IHdlIGNhbiB1c2UgdGhlbSBhZ2FpbiBoZXJlLgoKYGBge3J9CnNvdXJjZShmaWxlLnBhdGgoInV0aWwiLCAidGVzdF9MVl9kaWZmZXJlbmNlcy5SIikpCmBgYAoKIyMjIyBEaXJlY3RvcmllcwoKYGBge3J9CiMgZGlyZWN0b3JpZXMgc3BlY2lmaWMgdG8gdGhpcyBub3RlYm9vawpwbG90LmRpciA8LSBmaWxlLnBhdGgoInBsb3RzIiwgIjM4IikKZGlyLmNyZWF0ZShwbG90LmRpciwgcmVjdXJzaXZlID0gVFJVRSwgc2hvd1dhcm5pbmdzID0gRkFMU0UpCnJlc3VsdHMuZGlyIDwtIGZpbGUucGF0aCgicmVzdWx0cyIsICIzOCIpCmRpci5jcmVhdGUocmVzdWx0cy5kaXIsIHJlY3Vyc2l2ZSA9IFRSVUUsIHNob3dXYXJuaW5ncyA9IEZBTFNFKQpgYGAKCiMjIFJlYWQgaW4gZmlsZXMKCldlIG5lZWQgdHdvIHRoaW5ncyB0byBkbyB0aGlzIGFuYWx5c2lzOiBhIEIgbWF0cml4IChsYXRlbnQgdmFyaWFibGUgdmFsdWVzKSBhbmQKYSBgZGF0YS5mcmFtZWAgdGhhdCBjb250YWlucyB0aGUgc2FtcGxlIGxhYmVscyBmb3IgdGhlIGdyb3VwcyB0aGF0IHdlJ2QgbGlrZQp0byB0ZXN0IGZvciBkaWZmZXJlbmNlcyBiZXR3ZWVuLgoKIyMjIEdTRTM3MzgyIChOb3J0aGNvdHQsIGV0IGFsLikKCmBgYHtyfQpub3J0aGNvdHQuYi5maWxlIDwtIGZpbGUucGF0aCgicmVzdWx0cyIsICIzNyIsICJHU0UzNzM4Ml9yZWNvdW50Ml9CLlJEUyIpCm5vcnRoY290dC5zYW1wbGUuZmlsZSA8LSBmaWxlLnBhdGgoImRhdGEiLCAic2FtcGxlX2luZm8iLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiR1NFMzczODJfY2xlYW5lZF9tZXRhZGF0YS50c3YiKQpgYGAKClJlYWQgaW4gYEJgIChNdWx0aVBMSUVSKQoKYGBge3J9Cm5vcnRoY290dC5iLm1hdHJpeCA8LSByZWFkUkRTKG5vcnRoY290dC5iLmZpbGUpCmBgYAoKUmVhZCBpbiB0aGUgYGRhdGEuZnJhbWVgIHdpdGggdGhlIHN1Ymdyb3VwIGluZm9ybWF0aW9uIGFuZCBjaGVjayB0aGF0IHRoZQpzYW1wbGUgbmFtZXMgbWF0Y2ggYmV0d2VlbiB0aGVzZSB0d28gZmlsZXMuCgpgYGB7cn0Kbm9ydGhjb3R0LnNhbXBsZS5kZiA8LSByZWFkcjo6cmVhZF90c3Yobm9ydGhjb3R0LnNhbXBsZS5maWxlKQphbGwobm9ydGhjb3R0LnNhbXBsZS5kZiRzb3VyY2VfbmFtZSAlaW4lIGNvbG5hbWVzKG5vcnRoY290dC5iLm1hdHJpeCkpCmBgYAoKYExWVGVzdFdyYXBwZXJgIHJlcXVpcmVzIHRoYXQgdGhlIHNhbXBsZSBuYW1lcy9pZGVudGlmaWVycyBhcmUgaW4gYSBjb2x1bW4gbmFtZWQKYFNhbXBsZWAsIHNvIHdlJ2xsIGNoYW5nZSBgc291cmNlX25hbWVgIHRvIGBTYW1wbGVgLgoKYGBge3J9CmNvbG5hbWVzKG5vcnRoY290dC5zYW1wbGUuZGYpWzFdIDwtICJTYW1wbGUiCmBgYAoKIyMjIEdTRTM3NDE4IChSb2JpbnNvbiwgZXQgYWwuKQoKV2UnbGwgcmVwZWF0IHRoYXQgcHJvY2VzcyBmb3IgdGhlIFJvYmluc29uLCBldCBhbC4gZGF0YQoKYGBge3J9CnJvYmluc29uLmIuZmlsZSA8LSBmaWxlLnBhdGgoInJlc3VsdHMiLCAiMzciLCAiR1NFMzc0MThfcmVjb3VudDJfQi5SRFMiKQpyb2JpbnNvbi5zYW1wbGUuZmlsZSA8LSBmaWxlLnBhdGgoImRhdGEiLCAic2FtcGxlX2luZm8iLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJtZXRhZGF0YV9HU0UzNzQxOC50c3YiKQpgYGAKCmBgYHtyfQpyb2JpbnNvbi5iLm1hdHJpeCA8LSByZWFkUkRTKHJvYmluc29uLmIuZmlsZSkKcm9iaW5zb24uc2FtcGxlLmRmIDwtIHJlYWRyOjpyZWFkX3Rzdihyb2JpbnNvbi5zYW1wbGUuZmlsZSkKYGBgCgpUaGUgc3ViZ3JvdXAgaW5mb3JtYXRpb24gaW4gdGhpcyBkYXRhIHNldCBpcyBhIGJpdCBkaWZmZXJlbnQsIGxldCdzIGxvb2sgYXQKdGhlIGNvdW50cwoKYGBge3J9CnJvYmluc29uLnNhbXBsZS5kZiAlPiUgCiAgZHBseXI6Omdyb3VwX2J5KHN1Ymdyb3VwKSAlPiUKICBkcGx5cjo6dGFsbHkoKQpgYGAKClRvIGRvIGRpZmZlcmVudGlhbCBleHByZXNzaW9uIGFuYWx5c2lzLCB3ZSdsbCByZW1vdmUgdGhlIGBTSEggT1VUTElFUmAgYW5kCmBVYCBzYW1wbGVzCgpgYGB7cn0Kc2FtcGxlLmluZGV4IDwtIHdoaWNoKHJvYmluc29uLnNhbXBsZS5kZiRzdWJncm91cCAlaW4lIGMoIlUiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiU0hIIE9VVExJRVIiKSkKc2FtcGxlcy50by5yZW1vdmUgPC0gcm9iaW5zb24uc2FtcGxlLmRmJHJlZmluZWJpb19hY2Nlc3Npb25fY29kZVtzYW1wbGUuaW5kZXhdCnNhbXBsZXMudG8ucmVtb3ZlCmBgYAoKYGBge3J9CnJlbW92ZS5jb2x1bW4uaW5kZXggPC0gd2hpY2goY29sbmFtZXMocm9iaW5zb24uYi5tYXRyaXgpICVpbiUgc2FtcGxlcy50by5yZW1vdmUpCmBgYAoKRG8gdGhlIGZpbHRlcmluZwoKYGBge3J9CnJvYmluc29uLmIubWF0cml4IDwtIHJvYmluc29uLmIubWF0cml4WywgLXJlbW92ZS5jb2x1bW4uaW5kZXhdCnJvYmluc29uLnNhbXBsZS5kZiA8LSByb2JpbnNvbi5zYW1wbGUuZGYgJT4lCiAgZHBseXI6OmZpbHRlcighKHJlZmluZWJpb19hY2Nlc3Npb25fY29kZSAlaW4lIHNhbXBsZXMudG8ucmVtb3ZlKSkgJT4lCiAgZHBseXI6OnNlbGVjdChjKCJyZWZpbmViaW9fYWNjZXNzaW9uX2NvZGUiLCAic3ViZ3JvdXAiLCAiU2V4IiwgImFnZSIsCiAgICAgICAgICAgICAgICAgICJ0aXRsZSIpKQpgYGAKCmBgYHtyfQphbGwocm9iaW5zb24uc2FtcGxlLmRmJHJlZmluZWJpb19hY2Nlc3Npb25fY29kZSAlaW4lIAogICAgICBjb2xuYW1lcyhyb2JpbnNvbi5iLm1hdHJpeCkpCmBgYAoKYGBge3J9CmNvbG5hbWVzKHJvYmluc29uLnNhbXBsZS5kZilbMV0gPC0gIlNhbXBsZSIKYGBgCgojIyBUZXN0IGZvciBkaWZmZXJlbnRpYWwgZXhwcmVzc2lvbgoKYExWVGVzdFdyYXBwZXJgIGdpdmVzIHVzIDMgdGhpbmdzOiAxKSBkaWZmZXJlbnRpYWwgZXhwcmVzc2lvbiByZXN1bHRzIGZyb20KYGxpbW1hYCwgMikgYm94cGxvdCArIGppdHRlciBwbG90cyBvZiB0aGUgbGF0ZW50IHZhcmlhYmxlIGV4cHJlc3Npb24gYW5kIDMpCnRoZSBgQmAgbWF0cml4IGluICJsb25nIiBmb3JtYXQgLS0gdGhpcyBpcyB3aGF0IGlzIHVzZWQgZm9yIHBsb3R0aW5nLgoKV2UnbGwgdXNlIGAiQkgiYCBjb3JyZWN0aW9uIGZvciBtdWx0aXBsZSBoeXBvdGhlc2VzIHRlc3RpbmcgKHRoZSBkZWZhdWx0KS4KCmBgYHtyfQpub3J0aGNvdHQucmVzdWx0cyA8LSAKICBMVlRlc3RXcmFwcGVyKGIubWF0cml4ID0gbm9ydGhjb3R0LmIubWF0cml4LAogICAgICAgICAgICAgICAgc2FtcGxlLmluZm8uZGYgPSBub3J0aGNvdHQuc2FtcGxlLmRmLAogICAgICAgICAgICAgICAgcGhlbm90eXBlLmNvbCA9ICJzdWJncm91cCIsCiAgICAgICAgICAgICAgICAjIHRoZSBib3hwbG90IG91dHB1dCwgdGhlICJsb25nIiBmb3JtYXQgQiBkYXRhLmZyYW1lICh1c2VmdWwgZm9yIAogICAgICAgICAgICAgICAgIyBwbG90dGluZyksIGFuZCB0aGUgbGltbWEgZGlmZmVyZW50aWFsIGV4cHJlc3Npb24gcmVzdWx0cwogICAgICAgICAgICAgICAgIyB3aWxsIGJlIG91dHB1dCBpbiBmaWxlcyB0aGF0IGJlZ2luIHdpdGggdGhpcyBzdHJpbmcKICAgICAgICAgICAgICAgIGZpbGUubGVhZCA9ICJHU0UzNzM4Ml9zdWJncm91cF9yZWNvdW50Ml9tb2RlbCIsCiAgICAgICAgICAgICAgICBwbG90LmRpciA9IHBsb3QuZGlyLAogICAgICAgICAgICAgICAgcmVzdWx0cy5kaXIgPSByZXN1bHRzLmRpcikKYGBgCgpgYGB7cn0Kcm9iaW5zb24ucmVzdWx0cyA8LSAKICBMVlRlc3RXcmFwcGVyKGIubWF0cml4ID0gcm9iaW5zb24uYi5tYXRyaXgsCiAgICAgICAgICAgICAgICBzYW1wbGUuaW5mby5kZiA9IHJvYmluc29uLnNhbXBsZS5kZiwKICAgICAgICAgICAgICAgIHBoZW5vdHlwZS5jb2wgPSAic3ViZ3JvdXAiLAogICAgICAgICAgICAgICAgZmlsZS5sZWFkID0gIkdTRTM3NDE4X3N1Ymdyb3VwX3JlY291bnQyX21vZGVsIiwKICAgICAgICAgICAgICAgIHBsb3QuZGlyID0gcGxvdC5kaXIsCiAgICAgICAgICAgICAgICByZXN1bHRzLmRpciA9IHJlc3VsdHMuZGlyKQpgYGAKCiMjIyBBbnkgb3ZlcmxhcCBhdCBhbGw/CgpgYGB7cn0Kbm9ydGhjb3R0LmRlbHZzIDwtIAogIG5vcnRoY290dC5yZXN1bHRzJGxpbW1hJExWW3doaWNoKG5vcnRoY290dC5yZXN1bHRzJGxpbW1hJGFkai5QLlZhbCA8IDAuMDUpXQpgYGAKCmBgYHtyfQpyb2JpbnNvbi5kZWx2cyA8LSAKICByb2JpbnNvbi5yZXN1bHRzJGxpbW1hJExWW3doaWNoKHJvYmluc29uLnJlc3VsdHMkbGltbWEkYWRqLlAuVmFsIDwgMC4wNSldCmBgYAoKYGBge3J9ClZlbm5EaWFncmFtOjp2ZW5uLmRpYWdyYW0obGlzdChSb2JpbnNvbiA9IHJvYmluc29uLmRlbHZzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgTm9ydGhjb3R0ID0gbm9ydGhjb3R0LmRlbHZzKSwKICAgICAgICAgICAgICAgICAgICAgICAgICBmaWxlLnBhdGgocGxvdC5kaXIsICJNZWR1bGxvYmxhc3RvbWFfREVMVl9WZW5uLnRpZmYiKSkKYGBgCgo=