参考:https://tiramisutes.github.io/2018/12/04/ref-RNA-seq.html
一、质控
1. fastqc
fastqc rawdata/*.fq.gz -o fastqc -t 3
2.1 trim_galore
trim_galore -q 25 --phred33 --stringency 3 --length 25 --paired B_1_R1.fq.gz B_1_R2.fq.gz --gzip -o ../cleandata_2
ls *.gz|cut -d"_" -f 1,2 |sort -u |while read id;do
trim_galore -q 25 --phred33 --stringency 3 --length 25 \
--paired ${id}_R1.fq.gz ${id}_R2.fq.gz \
--gzip \
-o ../cleandata
done
2.2 trimmomatic
trimmomatic PE -threads 3 \
B_1_R1.fq.gz B_1_R2.fq.gz \
../cleandata_1/B_1.cleandata_paired_R1.fq.gz \
../cleandata_1/B_1.cleandata_unpaired_R1.fq.gz \
../cleandata_1/B_1.cleandata_paired_R2.fq.gz \
../cleandata_1/B_1.cleandata_unpaired_R2.fq.gz \
ILLUMINACLIP:/apps/users/user01/wanghhh/software/anaconda/anaconda3/envs/RNASeq/share/trimmomatic-0.39-1/adapters/TruSeq3-PE.fa:2:30:10 \
-threads 6 -phred33 \
LEADING:5 TRAILING:5 \
SLIDINGWINDOW:5:15 MINLEN:25
3. fastqc
fastqc cleandata/*_paired_*.fq.gz -o fastq_trim -t 3
二、比对
1.hisat2
1.1 构建索引
hisat2-build -p 10 CNU.scaffolds.fasta esteya_genome
hisat2-build -p 20 Pseudomonas.fna Pseudomonas_ref
1.2 比对
hisat2 -t -p 10 \
-x /apps/users/user01/wanghhh/RNASeq/esteya_resin/reference/esteya_genome \
-1 B_1_R1_val_1.fq.gz \
-2 B_1_R2_val_2.fq.gz \
-S ../alignment/B_1.sam \
--un-gz ../alignment/B_1_unpaired_unaligned.fastq.gz \
--al-gz ../alignment/B_1_unpaired_aligned.fastq.gz \
--un-conc-gz ../alignment/B_1_paired_unaligned.fastq.gz \
--al-conc-gz ../alignment/B_1_paired_aligned.fastq.gz \
--summary-file ../alignment/B_1_summary.txt \
--met-file ../alignment/B_1_met.txt &
#多样品
nohup ls *.gz|cut -d"_" -f 1,2 |sort -u |while read id;do
hisat2 -t -p 6 \
-x /apps/users/user01/wanghhh/RNASeq/esteya_resin/reference/esteya_genome \
-1 ${id}_R1_val_1.fq.gz \
-2 ${id}_R2_val_2.fq.gz \
-S ../alignment/${id}.sam
--un ../alignment/${id}_unpaired_unaligned.fastq.gz
--al ../alignment/${id}_unpaired_aligned.fastq.gz
--un-conc ../alignment/${id}_paired_unaligned.fastq.gz
--al-conc ../alignment/${id}_paired_aligned.fastq.gz
done &
hisat2 -t -p 10
-x /apps/users/user01/wanghhh/RNASeq/database/Pseudomonas
-1 B_1_R1_val_1.fq.gz
-2 B_1_R2_val_2.fq.gz
-S ../alignment/B_1.sam
--un-gz ../alignment/B_1_unpaired_unaligned.fastq.gz
--al-gz ../alignment/B_1_unpaired_aligned.fastq.gz
--un-conc-gz ../alignment/B_1_paired_unaligned.fastq.gz
--al-conc-gz ../alignment/B_1_paired_aligned.fastq.gz
--summary-file ../alignment/B_1_summary.txt
--met-file ../alignment/B_1_met.txt &
2. STAR
2.1 构建索引
#star build ref
STAR --runMode genomeGenerate \
--genomeDir /apps/users/user01/wanghhh/RNASeq/database/star_ref \
--genomeFastaFiles /apps/users/user01/wanghhh/RNASeq/database/Pseudomonas.fna \
--sjdbGTFfile /apps/users/user01/wanghhh/RNASeq/database/Pseudomonas.gff \
--sjdbOverhang 149 --runThreadN 15
2.2 star alignment
STAR --runThreadN 5 --genomeDir /apps/users/user01/wanghhh/RNASeq/esteya_resin/reference_star \
--readFilesCommand zcat \
--readFilesIn cleandata/B_1_R1_val_1.fq.gz \
cleandata/B_1_R2_val_2.fq.gz \
--outFileNamePrefix alignment_star/B_1 \
--outReadsUnmapped alignment_star/B_1_unmapped.fastq.gz \
--outSAMtype BAM SortedByCoordinate \
--outBAMsortingThreadN 5 \
--quantMode TranscriptomeSAM GeneCounts
--outSAMunmapped alignment_star/B_1_unmapped \
3. salmon
3.1 构建索引
salmon index -t Pseudomonas.fna -i salmon_ref/Pseudomonas
3.2 比对
salmon quant -t transcripts.fa -l A -a aln.bam -o salmon_quant
##multi
for fn in data/DRR0161{25..40};
do
samp=`basename ${fn}`
echo "Processing sample ${samp}"
salmon quant -i athal_index -l A \
-1 ${fn}/${samp}_1.fastq.gz \
-2 ${fn}/${samp}_2.fastq.gz \
-p 8 -o quants/${samp}_quant
done
4. bowtie2:http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
4.1构建索引
bowtie2-build --threads 10 Pseudomonas.fna bowtie2_ref/Pseudomonas.fna