Salmon 主要用于从RNA-seq数据中计算转录本的丰度。
# 安装
conda config --add channels bioconda
conda install salmon
salmon -h
salmon v0.14.1
Usage: salmon -h|--help or
salmon -v|--version or
salmon -c|--cite or
salmon [--no-version-check] <COMMAND> [-h | options]
Commands:
index Create a salmon index
quant Quantify a sample
alevin single cell analysis
swim Perform super-secret operation
quantmerge Merge multiple quantifications into a single file
# 基于Salmon分析RNAseq数据
## 获取转录组和建立索引
- 使用Arabidopsis thaliana数据的实例
mkdir salmon_tutorial
cd salmon_tutorial
### 下载转录组
- 可以使用物种的参考转录组,也可以使用StringTie和Trinity组装的转录组
wget ftp://ftp.ensemblgenomes.org/pub/plants/release-28/fasta/arabidopsis_thaliana/cdna/Arabidopsis_thaliana.TAIR10.28.cdna.all.fa.gz -o athal.fa.gz
### 建立转录组索引
salmon index -t athal.fa.gz -i athal_index
## 下载RNAseq数据
- 例子数据下载于: this 4-condition experiment [accession PRJDB2508]
- 基于dl_tut_reads.sh下载和整理数据
#!/bin/bash
mkdir data
cd data
for i in `seq 25 40`;
do
mkdir DRR0161${i};
cd DRR0161${i};
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR016/DRR0161${i}/DRR0161${i}_1.fastq.gz;
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR016/DRR0161${i}/DRR0161${i}_2.fastq.gz;
cd ..;
done
cd ..
### 运行
bash dl_tut_reads.sh
## 定量转录本
- 基于quant_tut_samples.sh批量执行这一步分析
#!/bin/bash
for fn in data/DRR0161{25..40};
do
samp=`basename ${fn}`
echo "Processing sample ${samp}"
salmon quant -i athal_index -l A \
-1 ${fn}/${samp}_1.fastq.gz \
-2 ${fn}/${samp}_2.fastq.gz \
-p 8 --validateMappings -o quants/${samp}_quant
done
- 程序运行成功后,会生成
quants
文件夹,其下包含各个样本的结果。 - 主要结果保存在
quant.sf
.
Name Length EffectiveLength TPM NumReads
ATMG00010.1 462 268.686 0.000000 0.000
ATMG00030.1 324 139.978 3.600408 7.000
ATMG00040.1 948 753.398 1.051199 11.000
ATMG00050.1 396 205.168 0.000000 0.000
ATMG00060.1 542 347.707 5.590678 27.000
ATMG00070.1 573 378.573 16.165297 85.000
ATMG00080.1 540 345.719 43.941438 211.000