文件读取写出
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys, os
file=open("./huyangzxz","r")
lines = file.readlines()
for line in lines:
tmp=line.split("\t")
gene=tmp[1].split('.')[0]+"\n"
with open("./7.30test.txt","a") as f:
f.write(gene)
file.close()
用pandas处理 按列名 合并
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys, os
import pandas as pd
import numpy as np
df1=pd.read_excel('/home/zjp/1/aaxabPsimoniiMap.xlsx',sheet_name='SLG1')
df2=pd.read_csv('/home/zjp/1/aaxab.txt',sep='\t')
data=df1.loc[:,['Female ref']] #取出这一行
data.columns=['SP']
a=pd.merge(data,df2) #按data顺序合并
a.drop(labels=['1'],axis = 1,inplace=True) #删除 axis是列 0是行 inplace替换原文件
a.to_csv("aaxabSLG1",sep='\t') 保存
对列重命名
x = sys.argv[1]
out= sys.argv[2]#
df1 = pd.read_table(x,header=None)
list1=df1.values.tolist()
data=pd.DataFrame(list1)
newcol=[]
for i in range(1,146):
i=str(i)
newcol.append(i)
#print(newcol)
data.columns=newcol
#print(data)
data=data.drop(["55","56","57","58","59","60","101","102","103","104","105","106"],axis=1)
data.to_csv(out,sep='\t',header=False,index=False)
字典
#!/usr/bin/env python
chr_length = {}
with open('./p.simonii.fasta.fai') as f:
for line in f:
line = line.strip().split('\t')
chr_length[line[0]] = int(line[1])
###两列构建字典
with open('./p.simonii.fasta.ssr.bed') as f:
for line in f:
line = line.strip().split('\t')
chr_name = line[0]
start = line[1]
End = line[2]
if int(start) < 0:
start = 0
if int(End) > chr_length[chr_name]:
End = chr_length[chr_name]
print(chr_name,start,End,sep='\t')
import os,sys
x=sys.argv[1]
with open (x) as f:
lines=f.readlines()
for line in lines :
tmp=line.strip().split('\t')
a=tmp[0]
b=a+'.R1.fastq_filtered'
c=a+'.R2.fastq_filtered'
print (tmp[0]+"\t"+b+"\t"+c)
import re,os,sys
x=sys.argv[1]
y=sys.argv[2]
def mufun(a):
b=re.findall(r'\w+',a)
if(abs(len(b[0]*int(b[1]))-len(b[2]*int(b[3])))>4):
return 1
else:
return 0
with open (x) as f:
lines=f.readlines()
for line in lines:
tmp=line.strip().split('\t')
sum=0
if(tmp[3].find("(")!=-1 and tmp[4].find("(")!=-1 and tmp[5].find("(")!=-1):
sum=sum+mufun(tmp[3])+mufun(tmp[4])+ mufun(tmp[5])
if(sum>0):
with open(y,"a+") as f2:
f2.write(line)