导读
pheatmap默认会对输入矩阵数据的行和列同时进行聚类,但是也可以通过布尔型参数cluster_rows和cluster_cols设置是否对行或列进行聚类,具体看分析需求。利用display_numbers参数可以在热图中的每个cell中填入想要的信息,例如相对丰度信息。利用cutree_rows和cutree_cols参数可以根据聚类产生的tree信息对热图进行分割。利用annotation_col和annotation_row参数可以给横或列添加分组信息。本文将先模拟输入矩阵数据,然后再展示这些参数的具体使用方法。
一、模拟输入矩阵
set.seed(1995)
# 随机种子
data=matrix(abs(round(rnorm(200, mean=0.5, sd=0.25), 2)), 20, 10)
# 随机正整数,20行,20列
colnames(data)=paste("Species", 1:10, sep=".")
# 列名-细菌
rownames(data)=paste("Sample", 1:20, sep=".")
# 行名-样品
data_norm=data
for(i in 1:20){
sample_sum=apply(data, 1, sum)
for(j in 1:10){
data_norm[i,j]=data[i,j]/sample_sum[i]
}
}
# 标准化
data_norm
# 模拟完成的标准化矩阵数据如下:
Species.1 Species.2 Species.3 Species.4 ... Species.10
Sample.1 0.14032835 0.076767862 0.12225993 0.08713198
Sample.2 0.08434712 0.116281427 0.14405921 0.12976480
Sample.3 0.09997205 0.026460449 0.11571788 0.10006522
Sample.4 0.10753751 0.102236996 0.03449825 0.12766149
...
Sample.20
二、聚类分析和热图
1. 基础热图
library(pheatmap)
# 加载pheatmap包
pheatmap(data_norm)
# 绘制热图,结果如下:
pheatmap(data_norm, border_color=NA)
2. colorRampPalette渐变色、cell尺寸调整
cellheight=15 # 设置单元格高度
cellwidth=20 # 设置单元格宽度
color=colorRampPalette(colors = c("blue","white","red"))(10) # 渐变取色方案
pheatmap(data_norm,
cellheight=15,
cellwidth=20,
color=colorRampPalette(colors = c("blue","white","red"))(10)
)
3. 在cell中添加丰度
display_numbers=TRUE:使用默认矩阵数据
pheatmap(data_norm,
display_numbers=TRUE,
cellheight=15,
cellwidth=20,
color=colorRampPalette(colors = c("purple", "white", "green"))(10)
)
4. 在cell中添加mark
display_numbers=matrix:使用自定义矩阵数据
fontsize_number=18:mark大小
filename="name.png/pdf": 保存
data_mark=data_norm
# 新建mark矩阵
for(i in 1:20){
for(j in 1:10){
if(data_norm[i,j] <= 0.001)
{
data_mark[i,j]="***"
}
else if(data_norm[i,j] <= 0.01 && data_norm[i,j] > 0.001)
{
data_mark[i,j]="**"
}
else if(data_norm[i,j] <= 0.05 && data_norm[i,j] > 0.01)
{
data_mark[i,j]="*"
}
else
{
data_mark[i,j]=""
}
}
}
# * 0.05>=p>0.01; ** 0.01>=p>0.001; *** 0.001>=p
pheatmap(data_norm,
cellheight=20,
cellwidth=25,
color=colorRampPalette(colors = c("purple", "white", "green"))(10),
display_numbers=data_mark,
fontsize_number=18,
filename="mark.pdf"
)
5. 根据tree将热图分割成2行3列
cutree_rows=num:分割行
cutree_cols=num:分割列
pheatmap(data_norm,
cellheight=20,
cellwidth=25,
color=colorRampPalette(colors = c("purple", "white", "green"))(10),
display_numbers=data_mark,
fontsize_number=18,
filename="mark_cut.pdf",
cutree_rows=2,
cutree_cols=3)
5. 添加样品和物种的分组信息
annotation_col:列分组
annotation_row:行分组
annotation_colors:分组颜色
Group=c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B")
group_sample=data.frame(Group)
rownames(group_sample)=rownames(data_norm)
# 模拟样品分组文件
group_sample
# 查看:
Group
Sample.1 A
Sample.2 A
Sample.3 A
Sample.4 A
Sample.5 A
Sample.6 A
Sample.7 A
Sample.8 A
Sample.9 A
Sample.10 A
Sample.11 B
Sample.12 B
Sample.13 B
Sample.14 B
Sample.15 B
Sample.16 B
Sample.17 B
Sample.18 B
Sample.19 B
Sample.20 B
Genus=c("G1", "G1", "G1", "G1", "G1", "G2", "G2", "G2", "G2", "G2")
group_genus=data.frame(Genus)
rownames(group_genus)=colnames(data_norm)
# 模拟物种分组文件
group_genus
# 查看:
Genus
Species.1 G1
Species.2 G1
Species.3 G1
Species.4 G1
Species.5 G1
Species.6 G2
Species.7 G2
Species.8 G2
Species.9 G2
Species.10 G2
colors=list(Group=c(A="#1B9E77", B="#D95F02"),
Genus=c(G1="pink", G2="lightgreen"))
# 自定义样品分组颜色,Genus分组使用默认颜色
pheatmap(data_norm,
cellheight=20,
cellwidth=25,
color=colorRampPalette(colors = c("purple", "white", "green"))(10),
display_numbers=data_mark,
fontsize_number=18,
filename="mark_group.pdf",
cutree_rows=2,
cutree_cols=3,
annotation_col=group_genus,
annotation_row=group_sample,
annotation_colors=colors
)
单方面斜体
library(pheatmap)
## 合并种名,株名
name = paste(rose$Species, rownames(rose), sep=" ")
## 修改CAZYme排序
input = input[,c("GH29","GH33","GH95","GH136","GH112","GH2","GH42","GH20","CBM32","CBM51")]
newnames <- lapply(
name,
function(x) bquote(italic(.(x))))
pheatmap(input, filename="rose_hmo_number_num_sp_2.pdf",
cluster_row=F, cluster_col=F,
cellheight=20, cellwidth=20,
fontsize_col=15, fontsize_row=18, fontsize=12,
fontfamily="serif",
colorRampPalette(c("snow", "red"))(50),
legend=T, annotation_legend = F,
labels_row = as.expression(newnames))
标签旋转:
pheatmap(input,
cluster_col = T,
color = colorRampPalette(colors = c("white", "deepskyblue1", "indianred1"))(3),
#legend = F,
fontsize_col = 11,
fontsize_row = 13,
cellwidth = 16,
cellheight = 16, angle_col = 45,
filename = "pan_pav.pdf")
pheatmap常用参数汇总:
display_numbers=TRUE # 使用默认矩阵数据
display_numbers=matrix # 使用自定义矩阵数据
cutree_rows=num # 分割行
cutree_cols=num # 分割列
scale="column" # 列标准化
scale="row" # 行标准化
cellwidth=20 # cell宽度
cellheight=20 # cell高度
fontsize_number=18 # mark大小
filename="name.pdf/png" # 保存,自动调整纸张大小
cluster_row = F # 横向不聚类
cluster_col = F # 纵向不聚类
legend = F # 去除legend层度色
annotation_legend = F # 去除legend注释
border = F # 去除cell边框
border_color = "blue" # cell边框颜色
border_color = NA # cell边框无色
annotation_names_col = F # 不展示列legend的名称
labels_row=""
show_rownames = F # 去除row标签
fontsize = 10 # legend整体大小
fontsize_col = 13 # col标签大小
fontsize_row = 13 # row标签大小
fontsize_number=18 # mark大小
fontfamily="serif" # 新罗马字体
fontface="italic" # 斜体
newnames <- lapply(
current_name,
function(x) bquote(italic(.(x))))
labels_row = as.expression(newnames) # 仅列斜体
color=colorRampPalette(colors = c("purple", "snow", "green"))(10) # 渐变的10种颜色
color=colorRampPalette(colors = c("snow", "green", "red"))(3) # 只取三种颜色,与matrix值对应
## 下方高级颜色分组
names(colors) <- c("strings")
colors = list(
group = colors, # group名统一
) # 配置颜色
annotation_row # 行分组
annotation_col = data.frame(group = c()) # 列分组,group名与配色统一
annotation_colors = colors # 使用配置色,group名保持一致
## 色库
col = read.table("C:/Users/hutongyuan/Desktop/group_color.list", header=F, sep="\t", check.names=F, comment.char="")
colors = col[1:length(unique(group$CAZyme)),]
names(colors) <- unique(group$CAZyme)
## 获取聚类后的矩阵
out = pheatmap(data,
fontsize_col = 3, fontsize_row = 3, scale = 'column',
color = colorRampPalette(c("black", "yellow"))(30),
filename="heat_column.pdf")
str(out, max.level = 2)
cluster = data[out$tree_row$order, out$tree_col$order]
write.table(cluster, file="data_cluster.txt", sep="\t", quote=F)
参考:
R语言绘制热图——pheatmap
用R包中heatmap画热图
使用pheatmap包绘制热图
更多R语言分析和绘图:
[1] R语言UPGMA聚类分析和树状图
[2] R语言菌群组成分析和Stackplot堆叠图
[3] R语言菌群Alpha多样性分析和Boxplot箱形图
[4] Is it possible to italicize row names with pheatmap()?