# Install the required packages 
if (!require("SSPA")) remotes::install_github("mvaniterson/SSPA") 

# Load necessary libraries 
library(SSPA)
library(genefilter)
library(lattice)

# Access documentation for the "Nutrigenomics" dataset 
# help("Nutrigenomics", package = "SSPA") 
 
# Load the example data set
data(Nutrigenomics)
dim(Nutrigenomics)

pilot_data <- apply(Nutrigenomics, 2, function(x) SSPA::pilotData(statistics = x[-1], samplesize = x[1], distribution = "norm"))

# Visualization of the test statistics and p-values distribution 
par(mfrow = c(2, 2))
for (i in 1:4) {
    plot(pilot_data[[i]])
}

sample_size <- lapply(pilot_data, SSPA::sampleSize, method = "congrad", control = list(verbose = FALSE, resolution = 2^10, from = -10, to = 10))

compounds_vector <- c("Wy14,643", "fenofibrate", "trilinolenin (C18:3)", "Wy14,643", "fenofibrate")
compounds <- rep(compounds_vector, each = 1024) |> factor() 

exposure_vector <- c("5_Days", "6_Hours")
exposure <- rep(exposure_vector, c(2, 3)) |> rep(each = 1024) |> factor() 

lambda <- as.vector(sapply(sample_size, function(x) x@lambda)) 
theta <- as.vector(sapply(sample_size, function(x) x@theta))

effect_size <- data.frame(exposure = exposure, compounds = compounds, lambda = lambda, theta = theta)

lattice::xyplot(lambda ~ theta | exposure,
                group = compounds,
                data = effect_size,
                type = c('g', 'l'),
                layout = c(1, 2),
                lwd = 2, 
                xlab = "effect size",
                ylab = "",
                auto.key = list(columns = 3, lines = TRUE, points = FALSE, cex = 0.7)) |> print()

# Define the expected sample size 
sample_size_per_group <- seq(2, 20)

power = sapply(sample_size, function(x) as.numeric(SSPA::predictpower(x, samplesize = sqrt(sample_size_per_group)))) |> as.vector()

exposure = rep(exposure_vector, c(2, 3)) |> rep(each = length(sample_size_per_group)) |> factor() 

compounds = rep(compounds_vector, each = length(sample_size_per_group)) |> factor()

samplesize = rep(sample_size_per_group, 5)
average_power <- data.frame(power = power, exposure = exposure, compounds = compounds, samplesize = samplesize)

lattice::xyplot(power ~ samplesize | exposure,
                group = compounds,
                data = average_power,
                type = c('g', 'b'), # g for lines, b for points 
                layout = c(1, 2),
                lwd = 2, 
                pch = 16, 
                xlab = "sample size (per group)",
                ylab = "",
                auto.key = list(columns = 3, lines = TRUE, points = FALSE, cex = 0.7)) |> print()

# Load the required packages
require(limma)

# Read in the pilot metabolomics data
df_metabo_data <- read.csv("human_cachexia.csv", check.names = FALSE) 
cat("The pilot data set contains ", dim(df_metabo_data)[1], " sample and ", dim(df_metabo_data)[2]-1, " metabolites.")

The pilot data set contains  77  sample and  64  metabolites.

df_metabo_data2 <- df_metabo_data[ ,-2] |> t() |> as.data.frame()
colnames(df_metabo_data2) <- df_metabo_data2[1, ] 
df_metabo_data3 <- df_metabo_data2[-1, ]
df_metabo_data4 <- apply(df_metabo_data3, 2, as.numeric) |> as.data.frame()
rownames(df_metabo_data4) <- rownames(df_metabo_data3)

# Define the experimental design
treatment_group <- df_metabo_data[ ,2] |> factor()

exp_design <- model.matrix(~treatment_group, data = df_metabo_data4)
fit_metabo_data <- limma::lmFit(df_metabo_data4, exp_design) 
fit_metabo_data_ebayes <- limma::eBayes(fit_metabo_data)
# Extract the test statistics
test_statistics <- limma::topTable(fit_metabo_data_ebayes, coef = 2, number = Inf)
# Extract t test statistics
t_statistics <- test_statistics[ , "t"] |> as.numeric()

# Calculate the effective sample size
sample_size_treatment <- as.numeric(table(treatment_group))[1] 
sample_size_control <- as.numeric(table(treatment_group))[2]

effective_sample_size <- sqrt(1 / (1/sample_size_treatment + 1/sample_size_control))

pilot_data_metabo <- SSPA::pilotData(statistics = t_statistics, samplesize = effective_sample_size, distribution = "norm")
plot(pilot_data_metabo)

sample_size_distribution <- SSPA::sampleSize(pilot_data_metabo, method = "congrad", control = list(verbose = FALSE, resolution = 2^10, from = -10, to = 10))
plot(sample_size_distribution)

sample_size_expected_metabo <- seq(2, 20, by = 2)
effective_sample_size_expected <- sqrt(sample_size_expected_metabo / 2)

power_metabo <- predictpower(sample_size_distribution, samplesize = effective_sample_size_expected)

matplot(
  sample_size_expected_metabo,
  power_metabo,
  type = "b",
  pch = 16,
  ylim = c(0, 1),
  ylab = "predicted power",
  xlab = "sample size (per group)"
)
grid()

Background¶

Minimal Statistical Concepts¶

Special Consideration to Complex Omics Data¶

Introduction of SSPA¶

Installing R Packages¶

Case Study¶

Case 1 - Nutrigeonmics and Intestinal Transcription Factor¶

1. Microarray Data Overview¶

2. Initializing PilotData Object¶

3. Estimating Effect Sizes¶

4. Calculating the Average Power¶

Case Study 2 - Metabolomics in Cancer Study¶