一:对起始,终止密码子计数
codon_table = { 'TAA':'STOP', 'TAG':'STOP', 'TGA':'STOP', 'ATG':'START'}
rna = ''
for line in open('C:\\shiyan\\honghua\\rna_test.fa','r'):
if not line.startswith('>'): ##判断是否为标题行,如果不是以'>'开头则是序列行,
rna = rna + line.strip()
for frame in range(3):
print('Reading frame'+str(frame + 1)) ## 需要遍历RNA序列三次:一次从序列第一位开始,一次从第二位开始,所以是第三位所以是range(3)
count_STOP = 0
count_START = 0
for i in range(frame,len(rna),3): ##第一次从0-len(rna), 第二次从1-len(rna),第三次从2-len(rna)
codon = rna[i:i + 3]
if codon in codon_table:
if codon_table[codon] == 'STOP':
count_STOP += 1
if codon_table[codon] == 'START':
count_START += 1
print('START',count_START)
print('STOP',count_STOP)
Reading frame1
START 81
STOP 234
Reading frame2
START 82
STOP 222
Reading frame3
START 83
STOP 204
二:写一个基于序列的二级结构元素预测的程序。
提示:使用如下偏好表,其中pref_H是α螺旋,pref_E是β折叠:
pref_H = {
'A':1.45,
'C':0.77,
'D':0.98,
'E':1.53,
'F':1.12,
'G':0.53,
'H':1.24,
'I':1.00,
'K':1.07,
'L':1.34,
'M':1.20,
'N':0.73,
'P':0.59,
'Q':1.17,
'R':0.79,
'S':0.79,
'T':0.82,
'V':1.14,
'W':1.14,
'Y':0.61
}
pref_E = {
'A':0.97,
'C':1.30,
'D':0.80,
'E':0.26,
'F':1.28,
'G':0.81,
'H':0.71,
'I':1.60,
'K':0.74,
'L':1.22,
'M':1.67,
'N':0.65,
'P':0.62,
'Q':1.23,
'R':0.90,
'S':0.72,
'T':1.20,
'V':1.65,
'W':1.19,
'Y':1.29
}
rna = ''
struc_rna = ''
for line in open('C:\\shiyan\\honghua\\Tempeture_reverse\\sequence.fasta','r'):
if not line.startswith('>'):
rna = rna + line.strip()
for i in range(len(rna)):
nt = rna[i]
if pref_H[nt] >= 1 and pref_E[nt] < pref_H[nt]:
struc_rna = struc_rna + 'H'
elif pref_E[nt] >= 1 and pref_H[nt] <= pref_E[nt]:
struc_rna = struc_rna + 'E'
else:
struc_rna = struc_rna + 'l'
print(struc_rna)
EHEEEEEEEEEEEEEEEEEEEEHEEEHEELEHHHLHELHLLEEEELEEEEHEELEEEHLHEHEEEEEEEEEEEEEEEEEEEEHEEEHEELEHHHLHELHLLEEEELEEEEHEELEEEHLHEELHEEEEHHHEEEEELLLEEEHHLEEHEEEEEEELEEEELLEEEEEEHHHLELEEHLLHEHEEEEEEEEEEEEEEEEEEEEHEEEHEELEHHHLHELHLLEEEELEEEEHEELEEEHLHEELHEEEEHHHEEEEELLLEEEHHLEEHEEEEEEELEEEELLEEEEEEHHHLELEEHLLHEEHEHLLELELHLEEHEELELEEEHLEEEEHEEEEELEHHEEEEHLEHEEHLEHHEEHEEEHEEEEEEEEEEEEEEEEEEEEHEEEHEELEHHHLHELHLLEEEELEEEEHEELEEEHLHEELHEEEEHHHEEEEELLLEEEHHLEEHEEEEEEELEEEELLEEEEEEHHHLELEEHLLHEEHEHLLELELHLEEHEELELEEEHLEEEEHEEEEELEHHEEEEHLEHEEHLEHHEEHEEEEHELEHEHHLEEHEEHEEHLHHEHHEHHHEEEEEEHEHEHEHELEHEHEHELEHEHEEEEHEEEEEEEEEEEEEEEEEEEEHEEEHEELEHHHLHELHLLEEEELEEEEHEELEEEHLHEELHEEEEHHHEEEEELLLEEEHHLEEHEEEEEEELEEEELLEEEEEEHHHLELEEHLLHEEHEHLLELELHLEEHEELELEEEHLEEEEHEEEEELEHHEEEEHLEHEEHLEHHEEHEEEEHELEHEHHLEEHEEHEEHLHHEHHEHHHEEEEEEHEHEHEHELEHEHEHELEHEHEEEEEHEEHHHHHHEHHEEHLEELEHHHELEEEEHLHHHELHELLHEEHHLHEHELEHHEEEEEHEEEEEEEEEEEEEEEEEEEEHEEEHEELEHHHLHELHLLEEEELEEEEHEELEEEHLHEELHEEEEHHHEEEEELLLEEEHHLEEHEEEEEEELEEEELLEEEEEEHHHLELEEHLLHEEHEHLLELELHLEEHEELELEEEHLEEEEHEEEEELEHHEEEEHLEHEEHLEHHEEHEEEEHELEHEHHLEEHEEHEEHLHHEHHEHHHEEEEEEHEHEHEHELEHEHEHELEHEHEEEEEHEEHHHHHHEHHEEHLEELEHHHELEEEEHLHHHELHELLHEEHHLHEHELEHHEEEEHLHEHEHEEEEEHLEHHEHHEHHEEEEEHEHEHHLEEHEEHHLHHHHHHLEHLLL