#本地blast命令
formatdb -t "name" -i sequence.fasta -p F -o T
blastall -i query.fasta -d database prefix -o blast.out -p blastn -e 1e-10
foreach j in testseq[1-5].fasta ; do blastx –db blastdb/sprot -query $j -evalue 0.01 -out $j.blastx;done&
eg:
foreach j in * ; do blastn -db /home/zhenhua/blastdb/chlorophyta/chlocp -query $j -out $j.blastn -evalue 1e-5 ; done&
ls -l | grep '^-' | wc -l
find | xargs grep 'No hits found' -l | wc -l
find | xargs grep 'No hits found' -l > 9b
find | xargs grep '+' -l | wc -l
#bowtie2比对命令
bowtie2-build -f yizao pt rf.fa YIZAO
bowtie2 -p 2 -x YIZAO -1 /home1/zhangzh/yizao dna/1.fq -2 /home1/zhangzh/yizao dna/2.fq -S out0.sam
samtools view -b -S abc.sam -o abc.bam
samtools view -bF 4 -S /home1/zhangzh/bowtie/bowtie2-2.2.6/out0.sam > /home1/zhangzh/bowtie/bowtie2-2.2.6/out0.bam
bam2fastq <in.bam>
bowtie2 -p 2 -x index -1 reads -2 reads --al-conc
#soap软件使用命令
soap –a <reads a> -D <index.files> -o <output></output>
soap –a <reads a> -b <reads b> -D <index.files> -o <PE output> -2 <SE output> -m <min insert size> -x <max insert size>
GapCloser –b config file –a scaffold file –o output file
nohup /home/zhenhua/price/PriceSource140408/PriceTI -fp /home/zhenhua/yizao/yizao ylt/origin data/Cleandata/Chlamy01/clean1.fq /home/zhenhua/yizao/yizao ylt/origin data/Cleandata/Chlamy01/clean2.fq 700 -picf 1 NODE 1 length 646 cov 45.659443.fasta 1 1 1 -nc 30 -a 2 -o 1out.fasta &
#删除 BSM 文件夹
find /home/zhenhua/147/ -mindepth 2 -type d -name "BSM" -o -name "BSM" -o -name "BSM"|xargs \rm -r
#删除 .txt 的文件
find . -name "*.txt" -type f -print -exec rm -rf {} \;
#批量codeml
for a in $(ls *.phy); do perl autocodeml_ctrl.pl $a ;done
nohup zsh -c 'for a in $(ls *.phy); do perl codeml_free.pl $a ;done' >> out.txt &
#raxml法构建系统发生树
nohup raxmlHPC-PTHREADS-SSE3 -f a -m PROTGAMMAAUTO -x 37893 -p 43455 -#1000 -T 8 -s 53genes 43taxa unsorted aa.phy -n 53genes 43taxa unsorted aao2 &
#氨基酸
/home/zhenhua/RAxML/raxmlHPC-PTHREADS-SSE3 -f a -m PROTGAMMAAUTO -x 37893 -p 43455 -#1000 -T 16 -s 60 gene aa.fasta -n 60 gene aa
#核苷酸
/home/zhenhua/RAxML/raxmlHPC-PTHREADS-SSE3 -f a -m GTRGAMMA -x 37121 -p 44521 -#1000 -T 16 -s 57 gene.fasta -n 57 gene
#查找文件中的某一行并输出
grep "(((Tre spp" -r -n > try
#合并文件
cat *chlN.fasta >chlN.fasta
#price 序列延长
nohup /home/zhenhua/price/PriceSource140408/PriceTI -fp /home/zhenhua/yizao/chloroplast/origin data/Cleandata/Chlamy01/clean1.fq /home/zhenhua/yizao/chloroplast/origin data/Cleandata/Chlamy01/clean2.fq 700 -picf 1 contig 1.fasta 1 1 1 -nc 5 -a 2 -o contig 1.fasta &
#氨基酸序列、partation后 最大似然法建树
nohup raxmlHPC -f a -q par -m GTRGAMMA -x 87000 -p 65993 -#1000 -s 15 aa.phy -n 15 aa &
#blast+ 建库以及运行 nucl
makeblastdb -in db.fasta -dbtype prot -parse_seqids -out dbname
blastp -query seq.fasta -out seq.blast -db dbname -outfmt 6 -evalue 1e-5 -num_threads 8 -best_hit_overhang 0.1 -best_hit_score_edge 0.1
#outfmt-6 每列的含义
[00] Query id [01] Subject id [02] % identity [03] alignment length [04] mismatches [05] gap openings [06] q. start [07] q. end [08] s. start [09] s. end [10] e-value [11] bit scor
#删除重复项
perl -e '$name col=0; $score col=11; while(<>) {s/\r?\n//; @F=split /\t/, $ ; ($n, $s) = @F[$name col, $score col]; if (! exists($max{$n})) {push @names, $n}; if (! exists($max{$n}) || $s > $max{$n}) {$max{$n} = $s; $best{$n} = ()}; if ($s == $max{$n}) {$best{$n} .= "$ \n"};} for $n (@names) {print $best{$n}}' A B > A B.best
#foreach 循环
foreach j in * ; do blastn -db /home/zhenhua/blastdb/chlorophyta/chlocp -query $j -out $j.blastn -outfmt 6 -num descriptions 5 -num threads 10 -evalue 1e-10 -best hit overhang 0.1 -best hit score edge 0.1 -max target seqs 1 ; done &
#R中输出table格式 将数据框第一列输出、输出名字ice2 没有双引号 分隔符为空格 没有行名 没有列名
write.table(a[1], "ice2", quote = FALSE, sep = " ", row.names = FALSE, col.names = FALSE)
#将两个table按照共有的元素合并
merge(a,b,all=FALSE)
#提取序列
perl $0 gene.list *fa > out
# 将fq、fas测序文件合并成一个(velvet软件里有这个脚本)
perl $0 1.fq 2.fq all.fq
#改名字 将第九行改成>chl ice 循环
foreach j in * ; do sed -i '9c \>Chl ice' $j ;done &
#从末尾开始匹配一个字符 4次 循环
foreach j in * ; do sed -i 's/\(.\)\{4\}$//' $j ;done &
#prank bidui
for i in $(ls *.fasta) ;do prank -d=$i -codon -F -o=$i.ali ;done &
for i in $(ls *.fasta) ;do prank -d=$i -translate -F -o=$i.tran.ali ;done &
prank -convert -d=alignment_pep.fas -dna=input_dna.fas -o=alignment_dna -keep
# find找到文件并将文件复制到指定的目录
find -name c26676 g1 orf1.fasta | xargs -i cp {} 12
#find找到目录并将目录整个复制到指定的目录
find -path '/tmp/mnt/disk1/ignore' -prune -o \( -name '*' ! -name "*.tmp" \) | xargs cp "目的目录" "{}" \;
# 大小写转换
cat file | tr a-z A-Z > newfile
# mysql 数据库简单操作
#登录
mysql -u name -p
#创建用户
CREATE USER 'username'@'hostname' IDENTIFIED BY 'password';
#授予权限
GRANT privileges ON databasename.tablename TO 'username'@'host'
#eg:GRANT SELECT, INSERT ON test.user TO 'pig'@'%';
#eg:GRANT ALL ON *.* TO 'pig'@'%';
#设置密码
SET PASSWORD FOR 'username'@'host' = PASSWORD('newpassword');
#当前登陆用户设置密码
SET PASSWORD = PASSWORD("newpassword");
#eg:SET PASSWORD FOR 'pig'@'%' = PASSWORD("123456");
#撤销用户权限
REVOKE privilege ON databasename.tablename FROM 'username'@'host';
#显示用户权限
SHOW GRANTS FOR 'username'@'host';
#删除用户
DROP USER 'username'@'host';
#常用操作
show databases; 显示数据库
show tables;显示表
create database name; 创建数据库
drop database orthomcltest;删除数据库
use databasename; 选择数据库
show global variables like 'port'; 查看端口号
#pgrep -u参数查出用户的所有pid,然后依次kill
pgrep -u ttlsa | sudo xargs kill -9
#替换字符的命令
for i in $(ls );do sed -i 's/\w\{4\}|//' $i ;done
for i in $(ls );do sed -i 's/|\w\{1,50\}//' $i ;done
grep "[0-9]" *.fasta
for i in $(ls );do sed -i 's/-//' $i;done
awk '/Bayes Empirical Bayes/{while(getline)if($0!~/The grid/)print;else exit}'
#读取test1的每一行
while read line
do
echo $line
grep "$line" *.codeml
done <test1
#找出Apro出现大于两次的行
awk -F "Apro" 'NF>2' 8 group1 > new
#查找两行之间的内容
awk '/3/{while(getline)if($0!~/7/)print;else exit}'
awk '/3/,/7/{if(i>1)print x;x=$0;i++}' file
for i in $(ls *.codeml); do awk '/Bayes Empirical Bayes/{while(getline)if($0!~/The/)print;else exit}' $i ;done
for file in $(ls *.codeml);do awk '/dN & dS for each branch/{while(getline)if($0!~/tree length for dN:/)print;else exit}' $file > ${file}.ds;done
# test cut -d " " -f 8 my_prefix4797.phy.codeml.ds
for i in $(ls *.ds);do sed -i 's/ / /g' $i;done
for i in $(ls *.ds);do cut -d " " -f 8 $i > ${i}.s;done
#删除文件1-n行
sed -i '1,nd' filename
for i in $(ls *.s);do sed -i '1,3d' $i;done
#删除从第二行到末尾行
sed '2,$d' -i aa.txt
for i in $(ls *.s);do sed -i '$d' $i;done
#提取值
grep "Max" *.s > max.txt
grep "Min" *.s > min.txt
grep "Median" *.s > median.txt
#求最大值:
awk 'BEGIN {max = 0} {if ($1+0 > max+0) max=$1} END {print "Max=", max}' test.txt
for i in $(ls *.s);do awk 'BEGIN {max = 0} {if ($1+0 > max+0) max=$1} END {print "Max=", max}' $i > ${i}.max;done
#求最小值:
awk 'BEGIN {min = 65536} {if ($1+0 < min+0) min=$1} END {print "Min=", min}' test.txt
for i in $(ls *.s); do awk 'BEGIN {min = 65536} {if ($1+0 < min+0) min=$1} END {print "Min=", min}' $i > ${i}.min;done
#求和:
cat test.txt|awk '{sum+=$1} END {print "Sum= ", sum}'
#求平均值:
cat test.txt|awk '{sum+=$1} END {print "Avg= ", sum/NR}'
for i in $(ls *.s);do awk '{sum+=$1} END {print "Avg= ", sum/NR}' $i > ${i}.mean;done
#将文件读为一行
tt <- scan("2317.txt.bak",what=character(),sep=" ",fill=F,comment.char="#")
for i in $(ls *.phy);do perl codeml_null.pl $i;done
for i in $(ls *.phy);do perl codeml_alt.pl $i;done
for i in $(ls *.xls);do sed -i 's/\[translate(\w)\]\[\w\{0,99\} aa\]//g' $i;done
for i in $(ls *.xls);do sed -i 's/\[revcomp\]://g' $i;done
for i in $(ls *.xls);do sed -i 's/\t/ /g' $i;done
for i in $(ls *.xls);do sed -i 's/ / /g' $i;done
for i in $(ls *.xls);do sort -t ' ' -k 2 $i > $i.sort.xls;done
for i in $(ls *sort.xls);do sed -i 's/ /\t/g' $i;done
#求相同
for i in $(ls *sort.xls);do awk -F " " '{if ($2 ~ $3 && $2 ~ $4 && $2 ~ $5 && $2 ~ $6 && $2 ~ $7 && $2 ~ $8 && $2 ~ $9 && $2 ~ $10 && $2 ~ $11) print $1,$2 }' $i >> $i.f.xls;done
#convergent 处理文件nodes
for i in $(ls *.nodes);do sed -i '1,3d' $i;done
for i in $(ls *.nodes);do sed -i '$d' $i;done
for i in $(ls *.nodes);do sed -i '$d' $i;done
for i in $(ls *.nodes);do sed -i 's/ //g' $i;done
sed -i 's/#//g' ./*
sed -i 's/ /\t/g' ./*
#rates 文件
for i in $(ls *.rates);do sed -i '1,10d' $i;done
for i in $(ls *.rates);do sed -i '$d' $i;done
for i in $(ls *.rates);do sed -i '$d' $i;done
for i in $(ls *.rates);do sed -i '$d' $i;done
for i in $(ls *.rates);do sed -i '$d' $i;done
for i in $(ls *.rates);do sed -i '$d' $i;done
for i in $(ls *.rates);do sed -i 's/ / /g' $i;done
for i in $(ls *.rates);do sed -i 's/ / /g' $i;done
for i in $(ls *.rates);do sed -i 's/ / /g' $i;done
for i in $(ls *.rates);do sed -i 's/ / /g' $i;done
grep " " *rates
for i in $(ls *.rates);do cut -d " " -f 5 $i > ${i}.s;done
rename 's/\.s//' *
#tree 文件
for i in $(ls *.tree);do sed -i '1d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
for i in $(ls *.tree);do sed -i '$d' $i;done
sed -i 's/):/)18:/' *tree
sed -i 's/ //g' *tree
#pb
pb -d moc.ali -T moc.tree -r bikont.outgroup -cal calib -ln -rp 750 750 -cat -gtr mocln1
# 将行装换成列
for i in $(ls *.phy);do sort $i > ${i}.sort;done
for i in $(ls *.sort);do awk '{for(i=1;i<=NF;i++)a[NR,i]=$i}END{for(j=1;j<=NF;j++)for(k=1;k<=NR;k++)printf k==NR?a[k,j] RS:a[k,j] FS}' $i > ${i}.s;done
for i in $(ls *.sort);do awk '{for(i=0;++i<=NF;)a[i]=a[i]?a[i] FS $i:$i}END{for(i=0;i++<NF;)print a[i]}' $i > ${i}.s;done
#R 卡方检验
data1<-rbind(c(9800,200),c(8810,190))
chisq.test(data1)
#rm 参数列表过长
find . -name "*.log"|xargs rm -rf "*.log"
#mafft 氨基酸比对
for i in *.fasta; do mafft-linsi --thread 2 $i > mafft_$i; done
#pal2nal
nohup perl /home/lingxiao/software/pal2nal/pal2nal.pl mafft_aa_431143.fasta nt_431143.fasta -output fasta -nomismatch > align_nt_431143.fasta &
#iq_tree构树 -m TEST 寻找最适的模型
nohup /home/lingxiao/software/iqtree/bin/iqtree-omp -s /home/zhenhua/genome_project/species_tree/13_aa/13_trim_aa.phy -nt 16 -m WAG+G -bb 1000 &
nohup /home/lingxiao/software/iqtree/bin/iqtree-omp -s /home/zhenhua/genome_project/species_tree/12_nt/12_trim_nt.phy -nt 16 -m GTR+G -bb 1000 &
# trimal
/home/lingxiao/software/trimal-trimAl/source/trimal -in inputfile -out output -fasta -automated1 -colnumbering > outputfile_info
for i in $(ls ./mafft*aa.fasta); do /home/lingxiao/software/trimal-trimAl/source/trimal -in $i -out $i.trim.fasta -fasta -automated1 -colnumbering > $i.info;done
/home/lingxiao/software/trim4nal outputfile_info nt_align > nt_trim_out
for i in *.fasta; do perl Phylip2Fasta.pl $i > $i.fas; done
#ASTRAL构建物种树#
nohup java -jar /home/lingxiao/software/ASTRAL-master/astral.5.5.9.jar -i bipartitions_1696 -o bipartitions_1696_BS20_speciestree.tre -g -r 500 2>bipartitions_1696_BS20.log &
#去除bs小于20的树
/home/lingxiao/software/newick-utils-1.6/src/nw_ed bipartitions_2778 'i & b<=20' o > bipartitions_2778_BS20
#批量下载序列
http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&rettype=fasta&retmode=text&id=6701965,6701969,6702094,6702105,6702160
#按照第一列合并第二列
awk '{if(!a[$1]){a[$1]=$1" "$2;}else{a[$1]=a[$1]"_"$2}}END{for(i in a){print a[i]}}' file.txt