Main
# right join, only want arrays in this data set
mod.meta.df <-
dplyr::right_join(mod.summary.df, array.att, by = "Source Name")
Column `Source Name` joining factor and character vector, coercing into character vector
# baseline and healthy unstimulated only
baseline.mod.df <-
dplyr::bind_rows(dplyr::filter(mod.meta.df, Day == "day: 0"),
dplyr::filter(mod.meta.df,
`Disease state` == "healthy" & !(grepl("unstimulated",
Treatment))))
rm(mod.meta.df)
p <- ggplot2::ggplot(baseline.mod.df,
ggplot2::aes(x = `Disease state`, y = Summary)) +
ggplot2::geom_jitter(ggplot2::aes(colour = `Disease state`), width = 0.2) +
ggplot2::stat_summary(fun.y = "median", size = 4, shape = 18,
geom = "point", color = "black") +
ggplot2::facet_grid(~ Module) +
ggplot2::theme_bw() +
ggplot2::labs(y = "mean expression of genes in module\n(per sample)",
title = "IFN Modular Framework Expression - Baseline",
subtitle = "Lauwerys, et al.") +
ggplot2::scale_color_manual(values = c("seagreen3", "#3182bd")) +
ggplot2::theme(legend.position = "none")
p
Note that the increase in M1.2
expression was shown to be more strongly induced by IFN-beta than IFN-alpha in Chiche, et al.
# save plot
plot.file <- file.path(plot.dir,
"E-GEOD-39088_Chiche_et_al_baseline.pdf")
ggplot2::ggsave(plot.file, plot = p +
ggplot2::theme(text = ggplot2::element_text(size = 15)))
Saving 7 x 7 in image
# which are likely the 9 IFN-negative patients in the original publication
# we don't have these labels
low.ifn.sle <- dplyr::filter(baseline.mod.df,
`Disease state` == "SLE") %>%
dplyr::group_by(Module) %>%
dplyr::top_n((Summary * -1), n = 9)
# call low samples 9 lowest M1.2 scores -- TYPE I INTERFERON
low.ifn.samples <-low.ifn.sle$`Source Name`[low.ifn.sle$Module == "M1.2"]
low.ifn.samples
[1] "GSM955819_DNA11091-067.CEL" "GSM955807_DNA11091-061.CEL" "GSM955804_DNA10204-307.CEL"
[4] "GSM955786_DNA10204-285.CEL" "GSM955783_DNA10204-281.CEL" "GSM955774_DNA10204-333.CEL"
[7] "GSM955771_DNA10204-337.CEL" "GSM955762_DNA10204-341.CEL" "GSM955759_DNA10204-309.CEL"
# remove low ifn samples that are placebo, that will be its own category
low.ifn.samples <-
low.ifn.samples[!grepl("Placebo",
array.att$`Treatment`
[array.att$`Source Name` %in% low.ifn.samples])]
# get the treatment day (or timepoint information) from treatment column
# and get the patient identifier from patient column
array.att <-
array.att %>%
dplyr::mutate(Day = sub("^\\s+", "", sub(".*[,]", "", Treatment)),
Patient = sub("^\\s+", "", sub(".*[,]", "", Patient)))
# healthy controls do not have timepoint information, so replace with NA
array.att$Day[!grepl(paste(c("day", "baseline"), collapse = "|"),
array.att$Day)] <- NA
# add a column that contains grouping information (IFN-K treated, placebo
# unstimulated control, stimulated control)
array.att <-
array.att %>%
dplyr::mutate(Group =
dplyr::case_when(
grepl("IFN-K", array.att$Treatment) ~ "IFN-K",
grepl("Placebo", array.att$Treatment) ~ "Placebo",
grepl("absence", array.att$Treatment) ~
"Control, no treatment",
grepl("unstimulated = ", array.att$Treatment) ~
"Control, stimulated"
))
# which patients are in the following groups - placebo, IFN-positive,
# IFN-negative
low.ifn.pat <-
array.att$Patient[which(array.att$`Source Name` %in% low.ifn.samples)]
placebo.pat <- unique(array.att$Patient[which(array.att$Group == "Placebo")])
hi.ifn.pat <- setdiff(array.att$Patient, c(low.ifn.pat, placebo.pat))
hi.ifn.pat <- hi.ifn.pat[grep("patient", hi.ifn.pat)]
array.att <-
array.att %>%
dplyr::mutate(`IFN-level` = rep(NA, nrow(array.att))) %>%
dplyr::mutate(`IFN-level` = dplyr::case_when(
(Patient %in% low.ifn.pat) ~ "IFN-negative",
(Patient %in% placebo.pat) ~ "Placebo",
(Patient %in% hi.ifn.pat) ~ "IFN-positive"
))
# right join, only want arrays in this data set
mod.meta.df <-
dplyr::right_join(mod.summary.df, array.att, by = "Source Name")
Column `Source Name` joining factor and character vector, coercing into character vector
# write to file
readr::write_tsv(mod.meta.df,
path = file.path(results.dir,
"E-GEOD-39088_Chiche_et_al_module.tsv"))
rm(list = setdiff(ls(), c("%>%", "mod.summary.df", "array.att",
"results.dir", "plot.dir")))
array.att <- dplyr::select(array.att, -`IFN-level`)