library(ggplot2)
library(dplyr)2 Create Example Data
2.1 Get random data
In this simulated experiment, a group of researchers is studying the effects of two different treatments (A and B) on a biological response (e.g., gene expression level) in three types of cells (“HEK293”, “MCF7”, “A549”). For each treatment 50 cells are tested across 3 biological replicates, and the control (no treatment) group is included as a baseline. The researchers aim to evaluate how each treatment affects gene expression across the different cell types.
# Set seed for reproducibility
set.seed(123)
# Simulate data
n <- 50 # Number of cells per replicate
i <- 3 # Number of replicates
cell_lines <- c("HEK293", "MCF7", "A549")
treatments <- c("Control", "TreatmentA", "TreatmentB")
# Create a data frame
data <- expand.grid(
CellLine = cell_lines,
Treatment = treatments,
CellID = 1:n,
Replicate = 1:i
)
# Add simulated gene expression data
# Control: Baseline gene expression
# TreatmentA: Increased gene expression with some variance
# TreatmentB: Variable effect depending on cell line
data <- data %>%
mutate(GeneExpression = case_when(
Treatment == "Control" ~ rnorm(n(), mean = 50, sd = 5),
Treatment == "TreatmentA" ~ rnorm(n(), mean = 70, sd = 8),
Treatment == "TreatmentB" & CellLine == "HEK293" ~ rnorm(n(), mean = 60, sd = 7),
Treatment == "TreatmentB" & CellLine == "MCF7" ~ rnorm(n(), mean = 55, sd = 6),
Treatment == "TreatmentB" & CellLine == "A549" ~ rnorm(n(), mean = 65, sd = 9)
),
ProteinLevel = case_when(Treatment == "Control" ~ GeneExpression*1+rnorm(n(), mean = 100, sd = 10),
Treatment == "TreatmentA" & CellLine == "HEK293" ~ GeneExpression*0.2+rnorm(n(), mean = 100, sd = 10),
Treatment == "TreatmentA" & CellLine == "MCF7" ~ GeneExpression*1.1+rnorm(n(), mean = 100, sd = 10),
Treatment == "TreatmentA" & CellLine == "A549" ~ GeneExpression*0.9+rnorm(n(), mean = 100, sd = 10),
Treatment == "TreatmentB" ~ GeneExpression*1.1+rnorm(n(), mean = 100, sd = 10)))2.2 Write Data
# Create folder for data
dir.create("./data", showWarnings = FALSE)
# Save the data as CSV
write.csv(data, "./data/biological_lab_data.csv", row.names = FALSE)
# remove all stored variables
rm(list = ls())2.3 Load Data
data <- read.csv("./data/biological_lab_data.csv")