shell

#本地blast命令

formatdb -t "name" -i sequence.fasta -p F -o T

blastall -i query.fasta -d database prefix -o blast.out -p blastn -e 1e-10

foreach j in testseq[1-5].fasta ; do blastx –db blastdb/sprot -query $j -evalue 0.01 -out $j.blastx;done&

eg:

foreach j in * ; do blastn -db /home/zhenhua/blastdb/chlorophyta/chlocp -query $j -out $j.blastn -evalue 1e-5 ; done&

ls -l | grep '^-' | wc -l

find | xargs grep 'No hits found' -l | wc -l

find | xargs grep 'No hits found' -l > 9b

find | xargs grep '+' -l | wc -l

#bowtie2比对命令

bowtie2-build -f yizao pt rf.fa YIZAO

bowtie2 -p 2 -x YIZAO -1 /home1/zhangzh/yizao dna/1.fq -2 /home1/zhangzh/yizao dna/2.fq -S out0.sam

samtools view -b -S abc.sam -o abc.bam

samtools view -bF 4 -S /home1/zhangzh/bowtie/bowtie2-2.2.6/out0.sam > /home1/zhangzh/bowtie/bowtie2-2.2.6/out0.bam

bam2fastq <in.bam>

bowtie2 -p 2 -x index -1 reads -2 reads --al-conc

#soap软件使用命令

soap –a <reads a> -D <index.files> -o <output></output>

soap –a <reads a> -b <reads b> -D <index.files> -o <PE output> -2 <SE output> -m <min insert size> -x <max insert size>

GapCloser –b config file –a scaffold file –o output file

nohup /home/zhenhua/price/PriceSource140408/PriceTI -fp /home/zhenhua/yizao/yizao ylt/origin data/Cleandata/Chlamy01/clean1.fq /home/zhenhua/yizao/yizao ylt/origin data/Cleandata/Chlamy01/clean2.fq 700 -picf 1 NODE 1 length 646 cov 45.659443.fasta 1 1 1 -nc 30 -a 2 -o 1out.fasta &

#删除 BSM 文件夹

find /home/zhenhua/147/ -mindepth 2 -type d -name "BSM" -o -name "BSM" -o -name "BSM"|xargs \rm -r

#删除 .txt 的文件

find . -name "*.txt" -type f -print -exec rm -rf {} \;

#批量codeml

for a in $(ls *.phy); do perl autocodeml_ctrl.pl $a ;done

nohup zsh -c 'for a in $(ls *.phy); do perl codeml_free.pl $a ;done' >> out.txt &

#raxml法构建系统发生树

nohup raxmlHPC-PTHREADS-SSE3 -f a -m PROTGAMMAAUTO -x 37893 -p 43455 -#1000 -T 8 -s 53genes 43taxa unsorted aa.phy -n 53genes 43taxa unsorted aao2 &

#氨基酸

/home/zhenhua/RAxML/raxmlHPC-PTHREADS-SSE3 -f a -m PROTGAMMAAUTO -x 37893 -p 43455 -#1000 -T 16 -s 60 gene aa.fasta -n 60 gene aa

#核苷酸

/home/zhenhua/RAxML/raxmlHPC-PTHREADS-SSE3 -f a -m GTRGAMMA -x 37121 -p 44521 -#1000 -T 16 -s 57 gene.fasta -n 57 gene

#查找文件中的某一行并输出

grep "(((Tre spp" -r -n > try

#合并文件

cat *chlN.fasta >chlN.fasta

#price 序列延长

nohup /home/zhenhua/price/PriceSource140408/PriceTI -fp /home/zhenhua/yizao/chloroplast/origin data/Cleandata/Chlamy01/clean1.fq /home/zhenhua/yizao/chloroplast/origin data/Cleandata/Chlamy01/clean2.fq 700 -picf 1 contig 1.fasta 1 1 1 -nc 5 -a 2 -o contig 1.fasta &

#氨基酸序列、partation后最大似然法建树

nohup raxmlHPC -f a -q par -m GTRGAMMA -x 87000 -p 65993 -#1000 -s 15 aa.phy -n 15 aa &

#blast+ 建库以及运行 nucl

makeblastdb -in db.fasta -dbtype prot -parse_seqids -out dbname

blastp -query seq.fasta -out seq.blast -db dbname -outfmt 6 -evalue 1e-5 -num_threads 8 -best_hit_overhang 0.1 -best_hit_score_edge 0.1

#outfmt-6 每列的含义

[00] Query id [01] Subject id [02] % identity [03] alignment length [04] mismatches [05] gap openings [06] q. start [07] q. end [08] s. start [09] s. end [10] e-value [11] bit scor

#删除重复项

perl -e '$name col=0; $score col=11; while(<>) {s/\r?\n//; @F=split /\t/, $ ; ($n, $s) = @F[$name col, $score col]; if (! exists($max{$n})) {push @names, $n}; if (! exists($max{$n}) || $s > $max{$n}) {$max{$n} = $s; $best{$n} = ()}; if ($s == $max{$n}) {$best{$n} .= "$ \n"};} for $n (@names) {print $best{$n}}' A B > A B.best

#foreach 循环

foreach j in * ; do blastn -db /home/zhenhua/blastdb/chlorophyta/chlocp -query $j -out $j.blastn -outfmt 6 -num descriptions 5 -num threads 10 -evalue 1e-10 -best hit overhang 0.1 -best hit score edge 0.1 -max target seqs 1 ; done &

#R中输出table格式将数据框第一列输出、输出名字ice2 没有双引号分隔符为空格没有行名没有列名

write.table(a[1], "ice2", quote = FALSE, sep = " ", row.names = FALSE, col.names = FALSE)

#将两个table按照共有的元素合并

merge(a,b,all=FALSE)

#提取序列

perl $0 gene.list *fa > out

# 将fq、fas测序文件合并成一个(velvet软件里有这个脚本)

perl $0 1.fq 2.fq all.fq

#改名字将第九行改成>chl ice 循环

foreach j in * ; do sed -i '9c \>Chl ice' $j ;done &

#从末尾开始匹配一个字符 4次循环

foreach j in * ; do sed -i 's/$.$\{4\}$//' $j ;done &

#prank bidui

for i in $(ls *.fasta) ;do prank -d=$i -codon -F -o=$i.ali ;done &

for i in $(ls *.fasta) ;do prank -d=$i -translate -F -o=$i.tran.ali ;done &

prank -convert -d=alignment_pep.fas -dna=input_dna.fas -o=alignment_dna -keep

# find找到文件并将文件复制到指定的目录

find -name c26676 g1 orf1.fasta | xargs -i cp {} 12

#find找到目录并将目录整个复制到指定的目录

find -path '/tmp/mnt/disk1/ignore' -prune -o $ -name '*' ! -name "*.tmp" $ | xargs cp "目的目录" "{}" \;

# 大小写转换

cat file | tr a-z A-Z > newfile

# mysql 数据库简单操作

#登录

mysql -u name -p

#创建用户

CREATE USER 'username'@'hostname' IDENTIFIED BY 'password';

#授予权限

GRANT privileges ON databasename.tablename TO 'username'@'host'

#eg:GRANT SELECT, INSERT ON test.user TO 'pig'@'%';

#eg:GRANT ALL ON *.* TO 'pig'@'%';

#设置密码

SET PASSWORD FOR 'username'@'host' = PASSWORD('newpassword');

#当前登陆用户设置密码

SET PASSWORD = PASSWORD("newpassword");

#eg：SET PASSWORD FOR 'pig'@'%' = PASSWORD("123456");

#撤销用户权限

REVOKE privilege ON databasename.tablename FROM 'username'@'host';

#显示用户权限

SHOW GRANTS FOR 'username'@'host';

#删除用户

DROP USER 'username'@'host';

#常用操作

show databases; 显示数据库

show tables；显示表

create database name; 创建数据库

drop database orthomcltest;删除数据库

use databasename; 选择数据库

show global variables like 'port'; 查看端口号

#pgrep -u参数查出用户的所有pid，然后依次kill

pgrep -u ttlsa | sudo xargs kill -9

#替换字符的命令

for i in $(ls );do sed -i 's/\w\{4\}|//' $i ;done

for i in $(ls );do sed -i 's/|\w\{1,50\}//' $i ;done

grep "[0-9]" *.fasta

for i in $(ls );do sed -i 's/-//' $i;done

awk '/Bayes Empirical Bayes/{while(getline)if($0!~/The grid/)print;else exit}'

#读取test1的每一行

while read line

echo $line

grep "$line" *.codeml

done <test1

#找出Apro出现大于两次的行

awk -F "Apro" 'NF>2' 8 group1 > new

#查找两行之间的内容

awk '/3/{while(getline)if($0!~/7/)print;else exit}'

awk '/3/,/7/{if(i>1)print x;x=$0;i++}' file

for i in $(ls *.codeml); do awk '/Bayes Empirical Bayes/{while(getline)if($0!~/The/)print;else exit}' $i ;done

for file in $(ls *.codeml);do awk '/dN & dS for each branch/{while(getline)if($0!~/tree length for dN:/)print;else exit}' $file > ${file}.ds;done

# test cut -d " " -f 8 my_prefix4797.phy.codeml.ds

for i in $(ls *.ds);do sed -i 's/ / /g' $i;done

for i in $(ls *.ds);do cut -d " " -f 8 $i > ${i}.s;done

#删除文件1-n行

sed -i '1,nd' filename

for i in $(ls *.s);do sed -i '1,3d' $i;done

#删除从第二行到末尾行

sed '2,$d' -i aa.txt

for i in $(ls *.s);do sed -i '$d' $i;done

#提取值

grep "Max" *.s > max.txt

grep "Min" *.s > min.txt

grep "Median" *.s > median.txt

#求最大值：

awk 'BEGIN {max = 0} {if ($1+0 > max+0) max=$1} END {print "Max=", max}' test.txt

for i in $(ls *.s);do awk 'BEGIN {max = 0} {if ($1+0 > max+0) max=$1} END {print "Max=", max}' $i > ${i}.max;done

#求最小值：

awk 'BEGIN {min = 65536} {if ($1+0 < min+0) min=$1} END {print "Min=", min}' test.txt

for i in $(ls *.s); do awk 'BEGIN {min = 65536} {if ($1+0 < min+0) min=$1} END {print "Min=", min}' $i > ${i}.min;done

#求和：

cat test.txt|awk '{sum+=$1} END {print "Sum= ", sum}'

#求平均值：

cat test.txt|awk '{sum+=$1} END {print "Avg= ", sum/NR}'

for i in $(ls *.s);do awk '{sum+=$1} END {print "Avg= ", sum/NR}' $i > ${i}.mean;done

#将文件读为一行

tt <- scan("2317.txt.bak",what=character(),sep=" ",fill=F,comment.char="#")

for i in $(ls *.phy);do perl codeml_null.pl $i;done

for i in $(ls *.phy);do perl codeml_alt.pl $i;done

for i in $(ls *.xls);do sed -i 's/\[translate(\w)\]\[\w\{0,99\} aa\]//g' $i;done

for i in $(ls *.xls);do sed -i 's/\[revcomp\]://g' $i;done

for i in $(ls *.xls);do sed -i 's/\t/ /g' $i;done

for i in $(ls *.xls);do sed -i 's/ / /g' $i;done

for i in $(ls *.xls);do sort -t ' ' -k 2 $i > $i.sort.xls;done

for i in $(ls *sort.xls);do sed -i 's/ /\t/g' $i;done

#求相同

for i in $(ls *sort.xls);do awk -F " " '{if ($2 ~ $3 && $2 ~ $4 && $2 ~ $5 && $2 ~ $6 && $2 ~ $7 && $2 ~ $8 && $2 ~ $9 && $2 ~ $10 && $2 ~ $11) print $1,$2 }' $i >> $i.f.xls;done

#convergent 处理文件nodes

for i in $(ls *.nodes);do sed -i '1,3d' $i;done

for i in $(ls *.nodes);do sed -i '$d' $i;done

for i in $(ls *.nodes);do sed -i 's/ //g' $i;done

sed -i 's/#//g' ./*

sed -i 's/ /\t/g' ./*

#rates 文件

for i in $(ls *.rates);do sed -i '1,10d' $i;done

for i in $(ls *.rates);do sed -i '$d' $i;done

for i in $(ls *.rates);do sed -i 's/ / /g' $i;done

grep " " *rates

for i in $(ls *.rates);do cut -d " " -f 5 $i > ${i}.s;done

rename 's/\.s//' *

#tree 文件

for i in $(ls *.tree);do sed -i '1d' $i;done

for i in $(ls *.tree);do sed -i '$d' $i;done

sed -i 's/):/)18:/' *tree

sed -i 's/ //g' *tree

#pb

pb -d moc.ali -T moc.tree -r bikont.outgroup -cal calib -ln -rp 750 750 -cat -gtr mocln1

# 将行装换成列

for i in $(ls *.phy);do sort $i > ${i}.sort;done

for i in $(ls *.sort);do awk '{for(i=1;i<=NF;i++)a[NR,i]=$i}END{for(j=1;j<=NF;j++)for(k=1;k<=NR;k++)printf k==NR?a[k,j] RS:a[k,j] FS}' $i > ${i}.s;done

for i in $(ls *.sort);do awk '{for(i=0;++i<=NF;)a[i]=a[i]?a[i] FS $i:$i}END{for(i=0;i++<NF;)print a[i]}' $i > ${i}.s;done

#R 卡方检验

data1<-rbind(c(9800,200),c(8810,190))

chisq.test(data1)

#rm 参数列表过长

find . -name "*.log"|xargs rm -rf "*.log"

#mafft 氨基酸比对

for i in *.fasta; do mafft-linsi --thread 2 $i > mafft_$i; done

#pal2nal

nohup perl /home/lingxiao/software/pal2nal/pal2nal.pl mafft_aa_431143.fasta nt_431143.fasta -output fasta -nomismatch > align_nt_431143.fasta &

#iq_tree构树 -m TEST 寻找最适的模型

nohup /home/lingxiao/software/iqtree/bin/iqtree-omp -s /home/zhenhua/genome_project/species_tree/13_aa/13_trim_aa.phy -nt 16 -m WAG+G -bb 1000 &

nohup /home/lingxiao/software/iqtree/bin/iqtree-omp -s /home/zhenhua/genome_project/species_tree/12_nt/12_trim_nt.phy -nt 16 -m GTR+G -bb 1000 &

# trimal

/home/lingxiao/software/trimal-trimAl/source/trimal -in inputfile -out output -fasta -automated1 -colnumbering > outputfile_info

for i in $(ls ./mafft*aa.fasta); do /home/lingxiao/software/trimal-trimAl/source/trimal -in $i -out $i.trim.fasta -fasta -automated1 -colnumbering > $i.info;done

/home/lingxiao/software/trim4nal outputfile_info nt_align > nt_trim_out

for i in *.fasta; do perl Phylip2Fasta.pl $i > $i.fas; done

#ASTRAL构建物种树#

nohup java -jar /home/lingxiao/software/ASTRAL-master/astral.5.5.9.jar -i bipartitions_1696 -o bipartitions_1696_BS20_speciestree.tre -g -r 500 2>bipartitions_1696_BS20.log &

#去除bs小于20的树

/home/lingxiao/software/newick-utils-1.6/src/nw_ed bipartitions_2778 'i & b<=20' o > bipartitions_2778_BS20

#批量下载序列

http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&rettype=fasta&retmode=text&id=6701965,6701969,6702094,6702105,6702160

#按照第一列合并第二列

awk '{if(!a[$1]){a[$1]=$1" "$2;}else{a[$1]=a[$1]"_"$2}}END{for(i in a){print a[i]}}' file.txt

shell

推荐阅读更多精彩内容