setwd("D:\\GWAS\\QTL")
library(BGLR)
library(plyr)
library(data.table)
wheat.Y <- read.table("wheat_pheno_imputed.txt", header = T,row.names="id",sep = "")
Y <- wheat.Y
MAS<-fread("MAS.txt", sep = "\t")+1
MAS <- scale(MAS,center = TRUE,scale = TRUE)
M<-tcrossprod(MAS )/ncol(MAS )
#参数设置:
nIter<-12000
burnIn<-2000
# 10-fold cross validation
folds<-10
for (p in 1:ncol(Y)) {
y <- Y[, p]
set.seed(2023)
n <- length(y)
sets <- rep(1:10, round(length(y) / folds, digits = 0))
sets<-sets[order(runif(nrow(M)))]
# 定义评价指标
COR.CV<-rep(NA,times=(folds+1))
MSE.CV<-rep(NA,times=(folds+1))
MAE.CV<-rep(NA,times=(folds+1))
RMSE.CV<-rep(NA,times=(folds+1))
yHatCV<-numeric() #定义一个变量来存储交叉验证结果
result_list <- list() # 创建一个空的列表来存储每一折的结果
for(fold in 1:folds)
{
yNa<-y # 创建一个变量,将回归变量赋值给yNA
whichNa<-which(sets==fold) # 创建一个变量,用于存储交叉验证中当前折的索引
yNa[whichNa]<-NA # 将当前索引位置的值设置为缺失
ETA = list(
list(X=M, model="FIXED") # Random effects
)
#创建一个列表
file_path <- paste('MAS_BLUP_', colnames(Y)[p], sep='')
fm <- BGLR(y = yNa,
ETA = ETA,
nIter = nIter,
burnIn = burnIn,
saveAt = file_path)#利用BGLR函数进行回归分析,yNA作为目标值,ETA作为特征矩阵
yHatCV[whichNa]<-fm$yHat[fm$whichNa] #将交叉验证中当前折的预测值存储在定义好的变量中
COR.CV[fold]<-cor(fm$yHat[fm$whichNa],y[whichNa])
MAE.CV[fold]= mean(abs(fm$yHat[fm$whichNa]-y[whichNa]))
MSE.CV[fold]<-mean((fm$yHat[fm$whichNa]-y[whichNa])^2)
RMSE.CV[fold]<-sqrt(mean((fm$yHat[fm$whichNa]-y[whichNa])^2))
# 将每一折的结果存储到结果列表中
result_list[[fold]] <- list(
COR = COR.CV[fold],
MAE = MAE.CV[fold],
MSE = MSE.CV[fold],
RMSE = RMSE.CV[fold],
real_value = y[whichNa],
pred_value = fm$yHat[fm$whichNa]
)
}
combined_data <- data.frame()
for (fold in 1:folds) {
real_value <- result_list[[fold]]$real_value
pred_value <- result_list[[fold]]$pred_value
fold_data <- data.frame(Fold = fold, RealValue = real_value, PredValue = pred_value)
combined_data <- rbind(combined_data, fold_data)
}
# 为每个表型保存单独的CSV文件
file_name <- paste("MAS_results_", colnames(Y)[p], ".csv", sep = "")
write.csv(combined_data, file_name, row.names = FALSE)
}