火山图的教程实在太多了,可以参考我之前写的R绘图:ggplot2绘制火山图,但是从美学角度来看,对称火山图更令人赏心悦目,有种流式细胞的style。
##########################################################################
#2022-11-24
#R绘图:ggplot2绘制火山图
rm(list = ls())
library(ggplot2)
library(dplyr)
library(readxl)
#设置工作路径
setwd("D:/01_科研数据/miR-8-3 data/bio_information_analysis/")
#导入表达矩阵数据,用于绘图的数据
matrix <- read_excel(path ="mir8vswt_deg.xlsx",
sheet = "mir8vswt_deg", col_names = TRUE, na = "NA")
matrix2 <- matrix %>%
select(mir8:gene_name)
#设置FDR和logFC的阈值
cut_off_pvalue = 0.05
cut_off_FDR = 0.0000001 #统计显著性
cut_off_logFC = 1 #差异倍数值
# 根据阈值参数,上调基因设置为‘up’,下调基因设置为‘Down’,无差异设置为‘Stable’,并保存到change列中
matrix2$diff = ifelse(matrix2$pvalue < cut_off_pvalue & abs(matrix2$log2FoldChange) >= cut_off_logFC,
ifelse(matrix2$log2FoldChange> cut_off_logFC ,'up','down'),
'none')
#将基因表达值取个log(1+)转换
matrix2$mir8 <- log(matrix2$mir8+1)
matrix2$wt <- log(matrix2$wt+1)
#排序,目的是将显著的基因展示在前方图层,避免被不显著基因的点遮盖
matrix2$diff <- factor(matrix2$diff, levels = c('up', 'down', 'none'))
matrix2 <- matrix2[order(matrix2$diff, decreasing = TRUE), ]
绘图
# - 1st-----------------------------------------------------------------------
#绘制散点图,显著上、下调基因以不同颜色区分
library(ggplot2)
ggplot(matrix2, aes(x = wt, y = mir8)) +
geom_point(aes(color = diff), size = 1) + #按上下调指定基因点的颜色
scale_color_manual(values = c('red', 'gray', 'green4'),
limits = c('up', 'none', 'down')) + #上下调基因颜色赋值
theme_bw() + #背景调整
labs(x = 'wild type', y = 'miR-8-3p-/-', color = '') + #坐标轴标题设置
geom_abline(intercept = 1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) + #这3句用于添加 |log2FC|>1 的阈值线
geom_abline(intercept = -1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) +
geom_abline(intercept = 0, slope = 1, col = 'black', linetype = 'dashed', size = 0.5)
# -2nd -----------------------------------------------------------------------
#按 p 值数值的渐变色散点图
ggplot(matrix2, aes(x = wt, y = mir8)) +
geom_point(aes(color = pvalue), size = 0.8) + #按 p 值大小指定基因点的颜色
scale_color_gradient2(low = 'red', mid = 'darkgoldenrod2', high = 'royalblue2', midpoint = 0.5) + #渐变色颜色指定
theme_bw() + #背景调整
labs(x = 'wild type', y = 'miR-8-3p-/-', color = 'p-value') + #坐标轴标题设置
geom_abline(intercept = 1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) + #这3句用于添加 |log2FC|>1 的阈值线
geom_abline(intercept = -1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) +
geom_abline(intercept = 0, slope = 1, col = 'black', linetype = 'dashed', size = 0.5)
# - 3th-----------------------------------------------------------------------
ggplot(matrix2, aes(x = wt, y = mir8)) +
geom_point(aes(color = diff), size = 0.65) + #按上下调指定基因点的颜色
scale_color_manual(values = c('red','RoyalBlue', 'green3'),
limits = c('up', 'none', 'down')) + #上下调基因颜色赋值
theme_bw() + #背景调整
labs(x = 'wild type', y = 'miR-8-3p-/-', color = '') + #坐标轴标题设置
geom_abline(intercept = 1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) + #这3句用于添加 |log2FC|>1 的阈值线
geom_abline(intercept = -1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) +
geom_abline(intercept = 0, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) +
theme(
strip.text = element_text(face = "bold"),
axis.title.x = element_text(face = "bold.italic", size = 12),
axis.title.y = element_text(face = "bold.italic",size = 12),
axis.text.x = element_text(size = 10, color = "black"), #angle = 45,hjust = 1
axis.text.y = element_text(size = 10, color = "black"),
legend.title = element_text(size = 12, face = "italic"),
legend.text = element_text(size = 10))
# - WJJ data-----------------------------------------------------------------------
wjj <- read_excel(path ="larval_pupa_gene_express_matrix_from_wjj.xlsx",
sheet = "Sheet2", col_names = TRUE, na = "NA")
# 根据阈值参数,上调基因设置为‘up’,下调基因设置为‘Down’,无差异设置为‘Stable’,并保存到change列中
wjj$diff = ifelse(wjj$pvalue < cut_off_pvalue & abs(wjj$log2FoldChange) >= cut_off_logFC,
ifelse(wjj$log2FoldChange> cut_off_logFC ,'up','down'),
'none')
#将基因表达值取个log(1+)转换
wjj$baseMeanA <- log(wjj$baseMeanA+1)
wjj$baseMeanB <- log(wjj$baseMeanB+1)
#排序,目的是将显著的基因展示在前方图层,避免被不显著基因的点遮盖
wjj$diff <- factor(wjj$diff, levels = c('up', 'down', 'none'))
wjj <- wjj[order(wjj$diff, decreasing = TRUE), ]
# - -----------------------------------------------------------------------
ggplot(wjj, aes(x = baseMeanB, y = baseMeanA)) +
geom_point(aes(color = diff), size = 0.65) + #按上下调指定基因点的颜色
scale_color_manual(values = c('red','RoyalBlue', 'green3'),
limits = c('up', 'none', 'down')) + #上下调基因颜色赋值
theme_bw() + #背景调整
labs(x = 'white puparium stage', y = 'wandering stage', color = '') + #坐标轴标题设置
geom_abline(intercept = 1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) + #这3句用于添加 |log2FC|>1 的阈值线
geom_abline(intercept = -1, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) +
geom_abline(intercept = 0, slope = 1, col = 'black', linetype = 'dashed', size = 0.5) +
theme(
strip.text = element_text(face = "bold"),
axis.title.x = element_text(face = "bold.italic", size = 12),
axis.title.y = element_text(face = "bold.italic",size = 12),
axis.text.x = element_text(size = 10, color = "black"), #angle = 45,hjust = 1
axis.text.y = element_text(size = 10, color = "black"),
legend.title = element_text(size = 12, face = "italic"),
legend.text = element_text(size = 10))
summary(wjj$diff)
参考资料:
除了火山图,差异表达基因还可以这样展示