对于更新版的Affymetrix芯片,如Affymetrix Human Gene 1.1 ST Array,affy就显得无能为力了。这时候oligo包就粉末登场了。 下面以GSE48452为例
核心函数:
- read.celfiles()
- oligo::rma()
1. 数据下载及读取
rm(list = ls())
library(GEOquery)
library(oligo)
library(affy)
gse="GSE43332"
#rawdata <- getGEOSuppFiles(gse) #下载原始数据
#setwd(gse)
gset <- getGEO(gse,getGPL = F,destdir = ".")
untar("GSE43332_RAW.tar",exdir = ".")
celfiles <- list.files(pattern = "*CEL.gz$") #批量查找并列出后缀为.gz的文件
#sapply(celfiles, gunzip) #批量解压
data.raw <- read.celfiles(celfiles)
## Reading in : GSM1060627_1A_ML_Cindy_9-8-10_2.CEL.gz
## Reading in : GSM1060628_1B_ML_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060629_2A_N_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060630_2B_N_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060631_3A_NRa_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060632_3B_NRa_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060633_5A_DU145_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060634_5B_DU145_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060635_6A_DU145Ra_Cindy_09-08-10.CEL.gz
## Reading in : GSM1060636_6B_DU145Ra_Cindy_9-8-10.CEL.gz
## Reading in : GSM1060637_1Clone1-P09_Cindy_8-30-11_HuGene-1_0-st-v1_.CEL.gz
## Reading in : GSM1060638_2Clone1-P10_Cindy_8-30-11_HuGene-1_0-st-v1_.CEL.gz
## Reading in : GSM1060639_3Clone3-P05_Cindy_8-30-11_HuGene-1_0-st-v1_.CEL.gz
## Reading in : GSM1060640_4Clone3-P06_Cindy_8-30-11_HuGene-1_0-st-v1_.CEL.gz
sampleNames(data.raw) <- sapply(strsplit(sampleNames(data.raw),"_",fixed=T), "[",1)
#group
pd <- pData(gset[[1]])
pd <- pd %>%
select(geo_accession,'bone metastatic potential:ch1')
colnames(pd) <- c("id","type")
pd$type <- case_when(pd$type=="non-bone metastatic"~"nbm",
T~"bm")
pd$type <- factor(pd$type,levels = c("nbm","bm"))
pd <- pd[order(pd$type),]
group_list <- pd$type
names(group_list) <- pd$id
2.数据预处理
oligo读取的CEL文件质量控制时,也需要数据拟合fitProbeLevelModel
,类似affyPLM的功能。
fit1 <- fitProbeLevelModel(data.raw)
image(fit1,type="weights",which=1,main="weights")
image(fit1,type="residuals",which=1,main="Residuals")
image(fit1,type="sign.residuals",which=1,main="Residuals.sign")
data.eset <- oligo::rma(data.raw)
## Background correcting
## Normalizing
## Calculating Expression
# featureData(data.eset) <- getNetAffx(gene.eset, "probeset") #有时需自己把ID赋值给表达矩阵
data.exprs <- exprs(data.eset)
library(RColorBrewer)
colors <- brewer.pal(12,"Set2")
boxplot(data.exprs,col=colors,main="afterRMA")
有一点很疑惑,为什么oligoPLM对象不能计算RLE、NUSE和RNA降解情况?
其余的与affy处理类似。
参考链接: