4  Plotting

library(ggplot2)
library(dplyr)
data <- read.csv("./data/biological_lab_data.csv")

4.1 Scatter plot (two numeric variables)

Here, we want to plot the effect of gene expression on the protein level. Used for seeing correlation between variables by adding fit (geom_smooth)

# filter data for only one cell line, one replicate and one treatment
data.plot1 <- data %>% filter(CellLine=="HEK293",
                              Replicate==1,
                              Treatment=="Control")

#defining first plot
data.plot1 %>% 
  ggplot(aes(GeneExpression, ProteinLevel))+
  geom_point()

#defining first plot
  ggplot(data.plot1, aes(GeneExpression, ProteinLevel))+
  geom_point()

4.1.1 Changing color

#change color of points
data.plot1 %>% 
  ggplot(aes(GeneExpression, ProteinLevel))+
  geom_point(color="red")

4.1.2 Changing symbols

#change color of points
data.plot1 %>% 
  ggplot(aes(GeneExpression, ProteinLevel))+
  geom_point(pch=17)

#change format (Title, axis names, background)
data.plot1 %>% 
  ggplot(aes(GeneExpression, ProteinLevel))+
  geom_point()+
  xlab("Gene Expression")+
  ylab("Protein Level")+
  ggtitle("Gene Expression Influence Protein Level")+
  theme_bw()

data.plot1 %>% 
  ggplot(aes(GeneExpression, ProteinLevel))+
  geom_point()+
  geom_smooth(method='lm', formula= y~x)+
  xlab("Gene Expression")+
  ylab("Protein Level")+
  ggtitle("Gene Expression Influence Protein Level")+
  theme_bw()

# filter data for only one cell line, one replicate and two treatments
# introducing facets
data.plot2 <- data %>% filter(CellLine=="HEK293",
                              Replicate==1,
                              Treatment!="TreatmentB")

data.plot2 %>% 
  ggplot(aes(GeneExpression, ProteinLevel))+
  geom_point()+
  geom_smooth(method='lm', formula= y~x)+
  facet_wrap(~Treatment, scales = "free")+
  xlab("Gene Expression")+
  ylab("Protein Level")+
  ggtitle("Gene Expression Influence Protein Level")+
  theme_bw()

# filter data for only one replicate
# facets with two variables
data.plot3 <- data %>% filter(Replicate==1)

data.plot3 %>% ggplot(aes(GeneExpression, ProteinLevel))+
  geom_point()+
  geom_smooth(method='lm', formula= y~x)+
  facet_grid(CellLine ~ Treatment,
             scales = "free")+
  xlab("Gene Expression")+
  ylab("Protein Level")+
  ggtitle("Gene Expression Influence Protein Level")+
  theme_bw()

4.2 Density/Histogram

data.plot4 <- data %>% filter(CellLine=="HEK293",
                              Treatment=="Control")


data.plot4 %>% ggplot(aes(GeneExpression))+
  geom_histogram(bins = 10)+
  theme_bw()

data.plot4 %>% ggplot(aes(GeneExpression))+
  geom_density()+
  theme_bw()

data %>% filter(Treatment=="Control") %>% ggplot(aes(GeneExpression))+
  geom_density(aes(color=CellLine, fill=CellLine), alpha=0.3)+
  theme_bw()

4.3 Boxplot

data.plot3 %>% ggplot(aes(CellLine, ProteinLevel))+
  geom_boxplot()+
  facet_wrap(~Treatment)+
  xlab("Cell Line")+
  ylab("Protein Level")+
  ggtitle("Protein Level")+
  theme_bw()

data.plot3 %>% ggplot(aes(CellLine, ProteinLevel))+
  geom_boxplot(aes(fill=Treatment))+
  xlab("Cell Line")+
  ylab("Protein Level")+
  ggtitle("Protein Level")+
  theme_bw()

4.3.1 Violin Plot

data.plot3 %>% ggplot(aes(CellLine, ProteinLevel))+
  geom_violin(aes(fill=Treatment))+
  xlab("Cell Line")+
  ylab("Protein Level")+
  ggtitle("Protein Level")+
  theme_bw()

4.3.2 Combined Box/Violin Plot

data.plot3 %>% 
  ggplot(aes(CellLine, ProteinLevel, fill = Treatment)) +
  geom_violin() +
  geom_boxplot(width = 0.2, position = position_dodge(width = 0.9), color = "black") +
  xlab("Cell Line") +
  ylab("Protein Level") +
  ggtitle("Protein Level") +
  theme_bw()

4.4 Barplot

data.plot4 <- data.plot3 %>% group_by(CellLine, Treatment) %>% summarize(MeanGeneExpression=mean(GeneExpression),
                                                                         MeanProteinLevel=mean(ProteinLevel))
`summarise()` has grouped output by 'CellLine'. You can override using the
`.groups` argument.
data.plot4 %>% ggplot(aes(CellLine, MeanProteinLevel))+
  geom_bar(aes(fill=CellLine),stat="identity")+
  xlab("Cell Line")+
  ylab("Protein Level")+
  ggtitle("Mean Protein Level")+
  theme_bw()

4.4.1 Set custom colors

RColorBrewer

# Set wanted colors
cols <- c("#1e3963", "#75051f","#a66908")


data.plot4 %>% ggplot(aes(CellLine, MeanProteinLevel))+
  geom_bar(aes(fill=CellLine),stat="identity")+
  xlab("Cell Line")+
  ylab("Protein Level")+
  ggtitle("Mean Protein Level")+
  theme_bw()+
  theme(legend.position = "none")+ #Remove Legend
  scale_fill_manual(values = cols)

# Set wanted colors
cols <- c("#1e3963", "#75051f","#a66908")


data.plot4 %>% ggplot(aes(CellLine, MeanProteinLevel))+
  geom_bar(aes(fill=Treatment),stat="identity")+
  xlab("Cell Line")+
  ylab("Protein Level")+
  ggtitle("Mean Protein Level")+
  theme_bw()+
  scale_fill_manual(values = cols)

# Set wanted colors
cols <- c("#1e3963", "#75051f","#a66908")


data.plot4 %>% ggplot(aes(CellLine, MeanProteinLevel))+
  geom_bar(aes(fill=Treatment),stat="identity", position=position_dodge())+
  xlab("Cell Line")+
  ylab("Protein Level")+
  ggtitle("Mean Protein Level")+
  theme_bw()+
  scale_fill_manual(values = cols)

4.5 Heatmap

data.plot3 %>% ggplot(aes(CellLine, Treatment, fill= scale(ProteinLevel))) + 
  geom_tile()

data.plot3 %>% ggplot(aes(CellLine, Treatment, fill= scale(ProteinLevel))) + 
  geom_tile()+
  scale_fill_gradient2(low = "blue", mid="white",high = "red")

4.6 UMAP

UMAP Code
library(umap)
# Prepare data for UMAP
umap_data <- data %>%
  dplyr::select(GeneExpression, ProteinLevel) %>%
  as.matrix()

# Perform UMAP
set.seed(123)  # Ensure reproducibility
umap_result <- umap(umap_data)

# Convert UMAP result to a data frame
umap_df <- as.data.frame(umap_result$layout) %>%
  bind_cols(data %>% dplyr::select(CellLine, Treatment))

# Plot UMAP
umap_df %>%
  ggplot(aes(x = V1, y = V2, color = Treatment, shape = CellLine)) +
  geom_point(size = 3, alpha = 0.8) +
  labs(
    title = "UMAP of Gene Expression and Protein Levels",
    x = "UMAP Dimension 1",
    y = "UMAP Dimension 2"
  ) +
  theme_minimal() +
  scale_color_brewer(palette = "Set2")