##Time:2017-10-8
##Author:Feng Shengyu
#----------------------------------------------------
#一、安装必须的R包(推荐使用的R版本3.2.2)
#必须要安装的包:
# 1、clusterprofilter
# source("http://bioconductor.org/biocLite.R")
# biocLite("clusterprofilter")
# 2、org.Mm.eg.db/org.Hs.eg.db(对应需要研究的物种-小鼠/人)
# biocLite("org.Mm.eg.db")/biocLite("org.Hs.eg.db")
# 3、DOSE
# biocLite(DOSE)
library(clusterProfiler)
library(DOSE)
library(org.Mm.eg.db)
#二 change the type of gene
#使用的上游数据是RNA-seq做完的差异表达的基因列表
#example:
# 15431
# 244091
# 15430
# 319158
# 13871
# 109663
# 735269
# 378431
# 21384
# 105247262
#读取gene list
gene <- read.table("C:\\Users\\Feng\\Desktop\\up_regulate_symbolGene.txt")
geneSymbol <- gene[,1]
geneSymbol
#转化基因类型,一般用cufflinks做的结果是symbol,此时需要转化为entrzid
geneEntrezID <- bitr(geneSymbol, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Mm.eg.db")
#可以同时转为多个类型的基因
#geneEntrezID <- bitr(geneSymbol, fromType="SYMBOL", toType=c("ENTREZID","UNIPROT"), OrgDb="org.Mm.eg.db",)
#三、enrichment analysis
#GO富集分析
ego_cc <- enrichGO(gene = geneEntrezID[,2], #使用entrezID作为输入
OrgDb=org.Mm.eg.db,
ont = "CC",
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.05,
qvalueCutoff = 0.05,
readable = TRUE
)
setwd("F:\\生信工具大全\\R")
write.table(as.data.frame(ego_cc@result),file="test_CC.txt",sep="\t")
#KEGG富集分析
kk <- enrichKEGG(gene = geneEntrezID[,2],
organism ="mouse",
pvalueCutoff = 0.05,
qvalueCutoff = 0.01,
minGSSize = 1,
use_internal_data =FALSE
)
write.table(as.data.frame(kk@result), file="test_kk.txt",sep="\t")
#作图展示结果
barplot(ego_cc, showCategory=15, title="EnrichmentGO_CC") #条状图,按p从小到大排的
dotplot(ego_BP,title="EnrichmentGO_CC_dot") #点图,按富集的数从大到小的
#--------------------核心代码-----------------------
setwd("F:\\硕士生\\GO和KEGG富集分析")
library(clusterProfiler)
library(DOSE)
library(org.Mm.eg.db)
gene <- read.table("C:\\Users\\Feng\\Desktop\\up_regulate.gene")
geneSymbol <- gene[,1]
geneEntrezID <- bitr(geneSymbol, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Mm.eg.db")
ego_cc <- enrichGO(gene = geneEntrezID[,2], #使用entrezID作为输入
OrgDb=org.Mm.eg.db,
ont = "CC",
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.01,
qvalueCutoff = 0.01,
readable = TRUE
)
write.table(as.data.frame(ego_cc@result),file="haimati_M_up_enrich_GO.txt",sep="\t")
barplot(ego_cc, showCategory=15, title="GO_Enrichment") #条状图,按p从小到大排的
ego_BP <- enrichKEGG(gene = geneEntrezID[,2],
organism ="mouse", #http://www.genome.jp/kegg/catalog/org_list.html(species names)
pvalueCutoff = 0.05,
qvalueCutoff = 0.01,
minGSSize = 1,
use_internal_data =FALSE
)
write.table(as.data.frame(ego_BP@result), file="haimati_M_up_enrich_KEGG.txt",sep="\t")
dotplot(ego_BP,title="EnrichmentGO_CC_dot") #点图,按富集的数从大到小的