下载mature.fa文件
1.打开miRbase网站:miRBase - Downloads
2.查看mature.fa文件,发现里面包含很多物种的miRNA序列,人的miRNA序列是以hsa开头,按照这个特征提取人的miRNA序列
使用R语言从mature.fa提取人的miRNA序列
library(tidyverse)
library(dplyr)
library(Biostrings)
data = read.delim("mature.fa", header = F)
View(data)
data$V1[seq(2,length(data$V1),by = 2)]
df = data.frame("miRNA" = data$V1[seq(1,length(data$V1),by = 2)],
"sequence" = data$V1[seq(2,length(data$V1),by = 2)])
df = subset(df, miRNA %in% grep("^>hsa-", df$miRNA, value = T))
head(df)
fasta_content = paste(df$miRNA,"\n",df$sequence,"\n")
writeLines(fasta_content, "human_miRNA.fasta")
使用targetscan软件预测miRNA的靶基因
进行这一步操作时,需要获取到miRNA的2-9个核酸序列,可以通过任何方式获取这段序列,闲来无事用C语言写了一个脚本来获取这段序列。
输出的文件为:human_miRNA_seed.fasta
#include <stdio.h>
#include <string.h>
struct targetscan
{
char miRNA[1024];
char sequence[1024];
int ID;
} data;
int main(void)
{
FILE *res_file;
FILE *new_file;
char line[1024];
int line_number = 0;
// 修改文件打开方式为 "rt"(读取文本文件)
if ((res_file = fopen("human_miRNA.fasta", "rt")) == NULL)
{
printf("The file is not exist.\n");
return 1;
}
// 修改文件打开方式为 "wt"(写入文本文件)
if ((new_file = fopen("human_miRNA_seed.fasta", "wt")) == NULL)
{
printf("The file is not exist.\n");
// 如果打开失败,关闭已经打开的文件
fclose(res_file);
return 1;
}
fprintf(new_file, "miRNA\tsequence\tID\n");
while (fgets(line, sizeof(line), res_file))
{
if (line_number % 2 == 0)
{
sscanf(line, "%s", data.miRNA);
}
else
{
// 复制第二个到第九个字符到 data.sequence
strncpy(data.sequence, line + 1, 8);
data.sequence[8] = '\0'; // 手动添加字符串结束符
data.ID = 9606;
// 输出到新文件
fprintf(new_file, "%s\t%s\t%d\n", data.miRNA, data.sequence,data.ID);
}
line_number++;
}
// 关闭文件
fclose(res_file);
fclose(new_file);
return 0;
}