据说,多种R包可以绘制雷达图,对ggplot系列函数熟悉的小伙伴可以选择ggradar
和echarts4r
。fmsb
和radarchart
也同样能实现你的需求
下面,从单细胞差异分析入手,直到GO富集分析
1.单细胞差异分析
FindAllMarkers
# 6. FindAllMarkers celltype----
sce
# An object of class Seurat
# 25288 features across 13560 samples within 1 assay
# Active assay: RNA (25288 features, 2000 variable features)
# 4 dimensional reductions calculated: pca, tsne, umap, harmony
table(sce$celltype)
Idents(sce) <- "celltype"
Idents(sce)
sce.markers <- FindAllMarkers(sce)
save(sce.markers,file = "harmony_celltype.markers.Rdata")
write.csv(sce.markers,file = "harmony_celltype.markers.csv")
2. 挑top100
head(harmony_celltype_markers)
# p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
# FCER1A 0.000000e+00 2.016015 0.734 0.126 0.000000e+00 Inflammatory_DC FCER1A
# CD1C 0.000000e+00 1.647918 0.503 0.064 0.000000e+00 Inflammatory_DC CD1C
# IL1R2 1.776186e-291 1.655928 0.567 0.099 4.491619e-287 Inflammatory_DC IL1R2
# CD1E 3.616531e-250 1.093357 0.333 0.036 9.145483e-246 Inflammatory_DC CD1E
# NAPSB 3.350806e-242 1.358380 0.521 0.097 8.473517e-238 Inflammatory_DC NAPSB
# NR4A3 5.633118e-232 1.374079 0.625 0.145 1.424503e-227 Inflammatory_DC NR4A3
sce.markers <- harmony_celltype_markers
DT::datatable(sce.markers)
# 对每一个cluster,根据 avg_log2FC,取前100个基因
top100 <- sce.markers %>% group_by(cluster) %>% top_n(100, avg_log2FC)
head(top100)
dim(top100)
#1000 7
3. GO富集分析
-
bitr
: 转换ID
# GO富集分析
# https://mp.weixin.qq.com/s/z-Kk02I2g8vAESgcGuncUg
library(clusterProfiler)
# 挑选p_val<0.05
de_genes <- subset(top100, p_val<0.05)
length(de_genes$gene) # 1000
head(de_genes)
# 转化ID
entrez_genes <- bitr(de_genes$gene, fromType="SYMBOL",
toType="ENTREZID",
OrgDb="org.Hs.eg.db") # 人
# 5.45% of input gene IDs are fail to map...
head(entrez_genes)
# SYMBOL ENTREZID
# 1 FCER1A 2205
# 2 CD1C 911
# 3 IL1R2 7850
# 4 CD1E 913
# 5 NAPSB 256236
# 6 NR4A3 8013
# 将原来的SYMBOL 和 ENTREZID 对应起来
mix <- merge(de_genes,entrez_genes,by.x="gene",by.y="SYMBOL")
dim(mix) #950 8
head(mix)
# gene p_val avg_log2FC pct.1 pct.2 p_val_adj cluster ENTREZID
# 1 A2M 4.322616e-68 0.4559287 0.402 0.183 1.093103e-63 TREM2_Mf 2
# 2 A2M 2.036888e-05 0.5190426 0.210 0.200 5.150881e-01 CCR2- HLA-DRhi C1 2
# 3 ABCA6 7.529550e-58 0.7018329 0.127 0.052 1.904073e-53 CCR2- HLA-DRhi C1 23460
# 4 ABI3 1.146337e-237 1.3451063 0.489 0.106 2.898856e-233 CD16+ Mono 51225
# 5 ABL2 0.000000e+00 1.3000718 0.501 0.180 0.000000e+00 CCR2+ HLA-DRhi C2 27
# 6 ACAP1 2.340444e-179 0.9188265 0.333 0.047 5.918515e-175 Mast cell 9744
-
merge
: 整合 -
split
: 拆分 -
compareCluster
: 富集分析(多个clusters)
# 整合起来
de_gene_clusters <- data.frame(ENTREZID=mix$ENTREZID,
cluster=mix$cluster)
table(de_gene_clusters$cluster)
# 只挑选其中3个cluster作富集
de_gene_clusters <- de_gene_clusters[(de_gene_clusters$cluster == 'CCR2- HLA-DRhi C1'|
de_gene_clusters$cluster=='CCR2+ HLA-DRhi C2'|
de_gene_clusters$cluster== 'CCR2+ HLA-DRhi C3'),]
table(de_gene_clusters$cluster)
# CCR2- HLA-DRhi C1 CCR2+ HLA-DRhi C2 CCR2+ HLA-DRhi C3
# 96 98 95
# 拆分成list
list_de_gene_clusters <- split(de_gene_clusters$ENTREZID,de_gene_clusters$cluster)
因为我下面可视化是针对某些通路,所以这里pvalueCutoff
,qvalueCutoff
都调的很大,尽可能富集出感兴趣的通路出来
# 多个clusters富集分析
formula_res <- compareCluster(
ENTREZID~cluster,
data=de_gene_clusters,
fun="enrichGO",
OrgDb="org.Hs.eg.db",
ont = "ALL", # GO富集类型,One of "BP", "MF", and "CC" or "ALL"
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff =1,
minGSSize = 5, # 最小的基因富集数量
maxGSSize = 1000) # 最大的基因富集数量
head(formula_res)
f=as.data.frame(formula_res)
dim(f) #9805 12
- 挑选感兴趣的通路
enrich <- c("response to oxidative stress",
"response to endoplasmic reticulum stress",
"interleukin-10 production",
'antigen processing and presentation',
'endocytosis',
'response to tumor necrosis factor',
'I-kappaB kinase/NF-kappaB signaling',
'response to interferon-gamma',
'cytokine production',
'leukocyte chemotaxis')
choose_f <- f[(f$Description %in% enrich),]
table(choose_f$Description)
df=choose_f
4. 气泡图
p=ggplot(df,aes(x = cluster,
y = Description, # 按照富集度大小排序
size = Count,
colour=p.adjust)) +
geom_point(shape = 16)+ # 设置点的形状
labs(x = "Ratio", y = "Pathway")+ # 设置x,y轴的名称
scale_colour_continuous( # 设置颜色图例
name="Enrichment", # 图例名称
low="#7fcdbb", # 设置颜色范围
high="#fc9272")+
scale_radius( # 设置点大小图例
range=c(5,9), # 设置点大小的范围
name="Size")+ # 图例名称
guides(
color = guide_colorbar(order = 1), # 决定图例的位置顺序
size = guide_legend(order = 2)
)+theme_bw() # 设置主题
p
- 如果名字太长,可以换行显示
p + scale_y_discrete(labels=function(x) str_wrap(x, width=12)) +
scale_x_discrete(labels=function(x) str_wrap(x, width=10)) # 名字换行
5. 雷达图
-
dcast
: 先换成短矩阵
ff <- choose_f[,c("Cluster","Description","Count")]
head(ff)
# Cluster Description Count
# 2 CCR2- HLA-DRhi C1 endocytosis 17
# 51 CCR2- HLA-DRhi C1 response to tumor necrosis factor 7
# 74 CCR2- HLA-DRhi C1 response to interferon-gamma 5
# 93 CCR2- HLA-DRhi C1 leukocyte chemotaxis 6
# 128 CCR2- HLA-DRhi C1 antigen processing and presentation 4
# 785 CCR2- HLA-DRhi C1 response to oxidative stress 5
# 长数据换成短数据形式
dd=dcast(ff,Cluster~Description,value.var = 'Count')
- 调整格式
rownames(dd) <- dd[,1]
dd <- dd[,-1]
# 第一行包含每个变量的最大值
# 第二行包含每个变量的最小值
dd=rbind(rep(20,5) , rep(0,5) , dd)
# 变量需要换成数值形式
for (i in 1:10) {
#i=1
dd[,i] <- as.numeric(dd[,i])
}
str(dd)
# 'data.frame': 5 obs. of 10 variables:
# $ antigen processing and presentation : num 20 0 4 1 3
# $ cytokine production : num 20 0 7 15 17
# $ endocytosis : num 20 0 17 4 4
# $ I-kappaB kinase/NF-kappaB signaling : num 20 0 1 6 4
# $ interleukin-10 production : num 20 0 1 1 NA
# $ leukocyte chemotaxis : num 20 0 6 11 10
# $ response to endoplasmic reticulum stress: num 20 0 2 6 2
# $ response to interferon-gamma : num 20 0 5 4 3
# $ response to oxidative stress : num 20 0 5 12 8
# $ response to tumor necrosis factor : num 20 0 7 14 3
-
radarchart
: 终于到画图了
library(fmsb)
colnames(dd)
# 有些名字太长了,需要调整下距离
# https://stackoverflow.com/questions/69306607/how-to-move-radar-chart-spider-chart-labels-in-r-fmsb-for-r-shiny-so-labels-do
f=c("antigen processing and presentation" ,"cytokine production" ,
"endocytosis" ," NF-kappaB
signaling",
"interleukin-10
production" ,"leukocyte chemotaxis" ,
" response to
endoplasmic reticulum stress" ,
" response to
interferon-gamma",
" response to
oxidative stress" ,
" response to
tumor necrosis factor" )
# https://zhuanlan.zhihu.com/p/363992240
{cairo_pdf(file = "mouse-heart/fig8_radarchart.pdf",
width = 20,
height = 10,
family = "STSong")
#?radarchart
radarchart(dd,
axistype=0, # 坐标轴的类型
vlcex=2, # 标签大小
palcex=5, # 轴四周字体大小缩放比例
pcol=colors_border , #设置颜色
pfcol=colors_in , # 内部填充色
plwd=1.3 , #线条粗线
plty=1,#虚线,实线
vlabels = c(f), # 标签
pty=32, # 点的形状
cglcol="black", # 雷达图网络格颜色
cglty=3, #背景线条虚线,实线
cglwd=0.6 #背景线条粗细
)
legend(x=1.5, y=0.90, legend = rownames(dd[-c(1,2),]),
bty = "n",
pch=20 ,
col=colors_border,
text.col = "black",
cex=1.5, pt.cex=2)
dev.off()
}