RBP = read.table("RBPexp.txt", row.names=1 ,header=T,sep="\t",check.names=F)
AS = read.table("AS.txt", row.names=1 ,header=T,sep="\t",check.names=F)
rownames(AS)=gsub("\\|","\\-",rownames(AS))
corFilter=0.6
pvalueFilter=0.05
outTab=data.frame()
for(i in row.names(RBP)){
if(sd(RBP[i,])>1){
for(j in row.names(AS)){
x=as.numeric(RBP[i,])
y=as.numeric(AS[j,])
corT=cor.test(x,y)
cor=corT$estimate
pvalue=corT$p.value
if((cor>corFilter) & (pvalue<pvalueFilter)){
outTab=rbind(outTab,cbind(SF=i,AS=j,cor,pvalue,Regulation="postive"))
}
if((cor< -corFilter) & (pvalue<pvalueFilter)){
outTab=rbind(outTab,cbind(RBP=i,AS=j,cor,pvalue,Regulation="negative"))
}
}
}
}
head(outTab)
write.table(file="corResult.txt",outTab,sep="\t",quote=F,row.names=F)
library(openxlsx) #读取xlsx文件
library(tidyr) #separate函数
#第一步:整理出 基因|ID|剪接类型
RI <- read.xlsx("RI.xlsx")
head(RI[,21])
RI$splicetype <- paste(RI$geneSymbol,RI$ID,"RI",sep = "|")
head(RI[,24])
RI2 <- RI[,c(21,22,24)]
RI2 <- RI2[,c(3,1,2)]
head(RI2)
RI2 <- separate(RI2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
sep = ",",
remove =TRUE)
RI2 <- separate(RI2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
"normal5","normal6","normal7","normal8"),
sep = ",",remove =TRUE)
head(RI2)
rownames(RI2) <- RI2$splicetype
RI2 <- RI2[,-1]
head(RI2)
SE <- read.xlsx("SE.xlsx")
head(SE[,21])
SE$splicetype <- paste(SE$geneSymbol,SE$ID,"SE",sep = "|")
head(SE[,24])
SE2 <- SE[,c(21,22,24)]
SE2 <- SE2[,c(3,1,2)]
head(SE2)
SE2 <- separate(SE2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
sep = ",",
remove =TRUE)
SE2 <- separate(SE2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
"normal5","normal6","normal7","normal8"),
sep = ",",remove =TRUE)
head(SE2)
rownames(SE2) <- SE2$splicetype
SE2 <- SE2[,-1]
head(SE2)
A3SS <- read.xlsx("A3SS.xlsx")
head(A3SS[,21])
A3SS$splicetype <- paste(A3SS$geneSymbol,A3SS$ID,"A3SS",sep = "|")
head(A3SS[,24])
A3SS2 <- A3SS[,c(21,22,24)]
A3SS2 <- A3SS2[,c(3,1,2)]
head(A3SS2)
A3SS2 <- separate(A3SS2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
sep = ",",
remove =TRUE)
A3SS2 <- separate(A3SS2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
"normal5","normal6","normal7","normal8"),
sep = ",",remove =TRUE)
head(A3SS2)
rownames(A3SS2) <- A3SS2$splicetype
A3SS2 <- A3SS2[,-1]
head(A3SS2)
A5SS <- read.xlsx("A5SS.xlsx")
head(A5SS[,21])
A5SS$splicetype <- paste(A5SS$geneSymbol,A5SS$ID,"A5SS",sep = "|")
head(A5SS[,24])
A5SS2 <- A5SS[,c(21,22,24)]
A5SS2 <- A5SS2[,c(3,1,2)]
head(A5SS2)
A5SS2 <- separate(A5SS2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
sep = ",",
remove =TRUE)
A5SS2 <- separate(A5SS2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
"normal5","normal6","normal7","normal8"),
sep = ",",remove =TRUE)
head(A5SS2)
rownames(A5SS2) <- A5SS2$splicetype
A5SS2 <- A5SS2[,-1]
head(A5SS2)
MXE <- read.xlsx("MXE.xlsx")
MXE$splicetype <- paste(MXE$geneSymbol,MXE$ID,"MXE",sep = "|")
head(MXE[,26])
MXE2 <- MXE[,c(23,24,26)]
MXE2 <- MXE2[,c(3,1,2)]
head(MXE2)
MXE2 <- separate(MXE2,IncLevel1,into = c("HF1","HF2","HF3","HF4",
"HF5","HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
sep = ",",
remove =TRUE)
MXE2 <- separate(MXE2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
"normal5","normal6","normal7","normal8"),
sep = ",",remove =TRUE)
head(MXE2)
rownames(MXE2) <- MXE2$splicetype
MXE2 <- MXE2[,-1]
head(MXE2)
allsplice <- rbind(RI2,SE2,A3SS2,A5SS2,MXE2)
#第二步:过滤掉在大于四分之一的样本中PSI值为缺失值的可变剪接事件
allsplice <- allsplice[apply(allsplice,
1,
function(x)sum(x!="NA")>17),]
#第三步:过滤波动太小的可变剪接事件
exp=as.matrix(allsplice)
mat=impute.knn(exp)
data=mat$data
data=data[rowMeans(data)>0.05,]
genes=c()
for(i in row.names(data)){
if(sd(data[i,])>0.2){
genes=c(genes,i)
}
}
all=data[genes,]
all <- cbind(id=row.names(all),all)
write.table(all,file="AS.txt",sep="\t",row.names=F,quote=F)