1.小提琴图标注p值
(接lesson4)
仍以基因Rbp1的表达为例
#提取Rbp1的表达量并与metadata中其他信息合并,使metadata中增加一列表示每个细胞的Rbp1表达量
scFURIN =subset(scRNA1,Rbp1>0)
FURIN_exp=scFURIN@assays$RNA@data["Rbp1",]
FURIN_exp.m =scFURIN@meta.data
FURIN_exp.m$exp=FURIN_exp
FURIN_exp.m$Cohort=factor(FURIN_exp.m$orig.ident,levels=c("dapi1","dapi2","CAF1","CAF2"))
#计算p值
library(ggsignif)
#两两计算的分组
my_comparisons=list(c("DapiNeg1","CAF1"),c("DapiNeg2",'CAF2'),
                    c("DapiNeg2" ,"CAF1"))
#可视化
ggplot(FURIN_exp.m,aes(Cohort,exp,fill=Cohort))+
  geom_violin()+theme_classic()+
  theme(plot.title = element_text(size=15,face='bold'),
        axis.text.x = element_text(size=15,face='bold',angle=25,hjust=1,vjust=1),
        axis.text.y = element_text(size=18,face='bold'),
        axis.title.x =element_text(size=0,vjust=1,face='bold'),
        axis.title.y = element_text(size=15,face='bold')) +
  labs(y='Expression',x="Group")+
  geom_signif(comparisons=my_comparisons,step_increase=0.1,map_signif_level=F,test=t.test,size=1,textsize=6)+
  geom_jitter(shape=16,position=position_jitter(0.2))+
  ggtitle("Rbp1")+theme(plot.title=element_text(size=18,hjust=0.5))+
  guides(fill=guide_legend(title=' '))+
  theme(axis.line.x=element_line(linetype=1,color="black",size=1))+
  theme(axis.line.y=element_line(linetype=1,color="black",size=1))+
  theme(legend.text=element_text(size=15))

violin plot with p value
2.比例图标注p值
画比例图
加载数据与r package
##系统报错改为英文
Sys.setenv(LANGUAGE = "en")
##禁止转化为因子
options(stringsAsFactors = FALSE)
##清空环境
rm(list=ls())
setwd("C:/Users/86269/Desktop/shun.C/single_cell")
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
load("sc.combined.rdata")
为画图挑选颜色
library(RColorBrewer)
qual_col_pals=brewer.pal.info[brewer.pal.info$category=='qual',]
col_vector = unlist(mapply(brewer.pal,qual_col_pals$maxcolors,rownames(qual_col_pals)))
提取细胞的样品来源、cluster以及数量
library(reshape2)
pB2_df <- table(sc.combined@meta.data$new.celltype.2,sc.combined@meta.data$type) %>% melt()
colnames(pB2_df) <- c("Cluster","Sample","Number")
sample_color <- col_vector[1:9]
可视化
pB2 <- ggplot(data = pB2_df,aes(x=Cluster,y=Number,fill=Sample))+
  geom_bar(stat="identity",width=0.8,position="fill")+
  scale_fill_manual(values=sample_color)+
  theme_bw()+
  theme(panel.grid=element_blank())+
  labs(x="",y="Ratio")+
  theme(axis.text.y=element_text(size=12,colour="black"))+
  theme(axis.text.x=element_text(hjust=1,vjust=1,angle=45))
pB2

比例图
该图存在的问题是,三种sample的细胞总数不同,因此正常情况下必须从不同sample中随机抽取同等数量的细胞作比例图才有意义。但是此处三种sample中细胞总数分别为

image.png
数量差别不大,因此可以省去随机抽样这一步
对换横纵坐标
pB3 <- ggplot(data=pB2_df,aes(x=Number,y=Cluster,fill=Sample))+
  geom_bar(stat='identity',width=0.8,position='fill')+
  scale_fill_manual(values=sample_color)+
  theme_bw()+
  theme(panel.grid=element_blank())+
  labs(x="",y="Ratio")+
  theme(axis.text.y=element_text(size=12,colour="black"))+
  theme(axis.text.x=element_text(size=12,colour="black"))+
  theme(
    axis.text.x.bottom =element_text(hjust=1,vjust=1,angle=45)
    
  )
pB3

比例图
将填充颜色对应为细胞种类(即cluster)
pB4 <- ggplot(data=pB2_df,aes(x=Number,y=Sample,fill=Cluster))+
  geom_bar(stat='identity',width=0.8,position='fill')+
  scale_fill_manual(values=col_vector[1:20])+
  theme_bw()+
  theme(panel.grid=element_blank())+
  labs(x="",y="Ratio")+
  theme(axis.text.y=element_text(size=12,colour="black"))+
  theme(axis.text.x=element_text(size=12,colour="black"))+
  theme(
    axis.text.x.bottom =element_text(hjust=1,vjust=1,angle=45)
    
  )
pB4

比例图
在图中标注误差线
#计算细胞比例
x <- table(sc.combined@meta.data$new.celltype.2,sc.combined@meta.data$orig.ident)
#计算每个sample中每种细胞的占比
x3<-t(t(x)/rowSums(t(x)))
x4<-as.data.frame(as.table(t(x3)))
colnames(x4)=c("sample","celltype","Freq")
x4$group =x4$sample %>%str_replace("_.","")
#计算误差线上下值
top<-function(x){
  return(mean(x)+sd(x)/sqrt(length(x)))
}
bottom <- function(x){
  return(mean(x)-sd(x)/sqrt(length(x)))
}
#每个细胞类型在各分组中的情况,以上皮细胞为例
dose_epi <- x4[which(x4$celltype=="Epithelial"),]
 
ggplot(data=dose_epi,aes(x=group,y=Freq,fill=group))+
  stat_summary(geom="bar",fun='mean',
               position=position_dodge(0.9))+
  stat_summary(geom='errorbar',fun.min = bottom,fun.max = top,position=position_dodge(0.9),width=0.2)+
  scale_y_continuous(expand=expansion(mult = c(0.01)))+
  theme_bw()+
  theme(panel.grid=element_blank())+
  labs(x="Group",y="Proportion")+
  geom_point(data=dose_epi,aes(group,Freq),size=1,pch=19)+
  theme(
    axis.text.x.bottom=element_text(hjust=1,vjust=1,angle=45)
    
  )+ggtitle("Epithelial")

带误差线的柱状图,points表示初始数据中单个样本上皮细胞的占比

dose_epi
3.聚类图上加标签
加载数据
##系统报错改为英文
Sys.setenv(LANGUAGE = "en")
##禁止转化为因子
options(stringsAsFactors = FALSE)
##清空环境
rm(list=ls())
setwd("C:/Users/86269/Desktop/shun.C/single_cell")
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
library(harmony)
library(cowplot)
load("scRNA1.Rdata")
将聚类结果与降维结果整合
x=as.data.frame(scRNA1@active.ident)
df<-scRNA1@reductions$tsne@cell.embeddings
df <- cbind(df,x)
colnames(df)<-c("x","y","ident")

df
初步画出聚类图
p<- ggplot(df,aes(x=x,y=y))+geom_point(aes(colour=ident),size=1)+
  labs(x="t-SNE 1",y="t-SNE 2")
p

p
删去分组信息
p1 <- p+NoLegend()
p1

p1
#加上metadata中的其他信息
meta=scRNA1@meta.data
meta=meta[,c(1:6)]
df.m=merge(df,meta,by=0)

df.m
在图中加上celltype信息
df.m <- df.m %>% group_by(celltype) %>%summarise(x=median(x),y=median(y))
ggplot(df,aes(x,y))+
  geom_point(aes(colour=ident))+
  ggrepel::geom_text_repel(aes(label=celltype),
                           data=df.m,
                           size=5,
                           label.size=1,
                           segment.color=NA)+
  theme(legend.position = "none")+theme_bw()

在图中加上cluster信息
df.m <- df.m %>% group_by(seurat_cluster) %>%summarise(x=median(x),y=median(y))
ggplot(df,aes(x,y))+
  geom_point(aes(colour=ident))+
  ggrepel::geom_text_repel(aes(label=seurat_clusters),
                           data=df.m,
                           size=5,
                           label.size=1,
                           segment.color=NA)+
  theme(legend.position = "none")+theme_bw()

4.在聚类图上画椭圆线(暂时没搞懂)
以p为例

p
obs <- p$data[,c("x","y","ident")]
ellipse_pro <- 0.98
theta <- c(seq(-pi,pi,length=50),seq(pi,-pi,length=50))
circle <- cbind(cos(theta),sin(theta))
ell <- ddply(obs,'ident',function(x){
  if(nrow(x)<=2){
    return(NULL)
  }
  sigma<-var(cbind(x$x,x$y))#求方差
  mu<-c(mean(x$x),mean(x$y))#求平均数
  ed <-sqrt(qchisq(ellipse_pro,df=2))
  data.frame(sweep(circle %*% chol(sigma)*ed,2,mu,FUN="+"))#chol是求一个正定矩阵的上三角矩阵,%*%是矩阵相乘
})
names(ell)[2:3]<- c('x','y')
ell <-ddply(ell,.(ident),function(x)x[chull(x$x,x$y),])
p1 <- p+geom_polygon(data=ell,aes(group=ident),
                     colour= 'black',
                       alpha=1,fill=NA,
                       linetype="dashed")
p1

5.在聚类图中将某些点进行highlight
取某些细胞在聚类图中用不同的颜色标注
#以scRNA1前100个细胞为例
mutlist <- colnames(scRNA1)[1:100]
DimPlot(scRNA1,reduction="tsne",
        cells.highlight = mutlist,
        cols.highlight='#11AA4D',
        sizes.highlight=1.5,
        group.by='orig.ident')

6.3d PCA聚类图
加载数据与package
##系统报错改为英文
Sys.setenv(LANGUAGE = "en")
##禁止转化为因子
options(stringsAsFactors = FALSE)
##清空环境
rm(list=ls())
library(Seurat)
library(scatterplot3d)
setwd("C:/Users/86269/Desktop/shun.C/single_cell")
load("sc.combined.rdata")
数据预处理
#提取三种不同类型的细胞
sc.1=subset(sc.combined,idents = c("Epithelial","Endothelial","Stem_cell"))
#将分组依据换为sample
Idents(sc.1)="orig.ident"
#用取平均值的方式将每个sample的每种细胞合并成一个表达矩阵(将每个sample的每个细胞种类的所有细胞的基因表达量加和,再除以细胞数)
x=AverageExpression(sc.1 ,add.ident ="new.celltype.2")
x=x$RNA

x
进行PCA降维
#PCA降维
pca <- prcomp(t(x))
#提取每个细胞类型的降维结果
pca.data=pca$x
#提取画图需要的PC1,PC2,PC3
pca.data=as.data.frame(pca.data)
pca.data=pca.data[,1:3]
s1 <- strsplit(rownames(pca.data),split = "_")
s2 <- sapply(s1,function(x){x[1]})
s3 <- sapply(s1,function(x){x[3]})
pca.data$Type <- s2
pca.data$celltype = s3

pca.data
可视化
###不同type用形状区分 不同celltype用颜色区分
Type.p=c(rep(11,9),rep(16,9),rep(17,9))
cell.type.p = c(rep(c("blue","red","orange"),9 ))
colors.lib <- c("blue","red","orange")
shapes.lib = c(11,16,17)
###画图
source('huage.3D.plot.R')
s3d <- scatterplot3d(pca.data[,c("PC1","PC2","PC3")],
                     pch = Type.p,color = cell.type.p,                   
                     cex.symbols = 1,grid=FALSE, box=FALSE,                     
                     main = "3D PCA plot")
#加入celltype信息
legend("topright",
       c("Epithelial","Endothelial","Stem_cell"),
       fill=c('blue',"red","orange"),
       box.col=NA,
       cex=0.6)
#加入type信息
legend("topleft",
       c('GF','SPF','FMT'),
       pch = shapes.lib,
       box.col=NA,
       cex =0.6,
       y.intersp = 0.5)
#加上平面
addgrids3d(pca.data[,c("PC1","PC2","PC3")], grid = c("xy", "xz", "yz"))

3D PCA
plot
从图中可看出不同分组里哪些细胞类型差异较大,分散越明显差异越大
