def reverse_complement(seq):
ntComplement = {'A':'T','T':'A','G':'C','C':'G','N':'N'}
RevSeqList = list(reversed(seq))
RevComSeqList = [ntComplement[k] for k in RevSeqList]
RevComSeq = ''.join(RevComSeqList)
return RevComSeq
gff_file = open('Sitalica_312_v2.2.gene.gff3')
genome_file = open('Sitalica_312_v2.fa')
out_file = open('Si.full.code.sequences.fasta','w')
chl_fasta = {}
for row in genome_file:
if ">" in row:
fsy = row.split()
chl_gene = fsy[0].replace('>', '')
fsy_sequence = ''
else:
row.strip()
sequences = row.split()
store = sequences[0]
fsy_sequence = fsy_sequence + store
chl_fasta[chl_gene] = fsy_sequence
genome_file.seek(0,0)
for line in gff_file:
fsy1 = line.split()
sign = fsy1[2]
if sign == 'mRNA':
if fsy1[6] == '+':
site_number = int(fsy1[3])-1000
if fsy1[0] in chl_fasta:
result = ">" + fsy1[8].split(';')[0].replace('ID=', '') + "\n" + chl_fasta[fsy1[0]][site_number:int(fsy1[3])] + "\n"
out_file.write(result)
# print(">" + fsy1[8] + "\n" + chl_fasta[fsy1[0]][site_number:fsy1[3]])
# genome_file.seek(0,0)
if fsy1[6] == '-':
site_number = int(fsy1[4])+1000
if fsy1[0] in chl_fasta:
word = chl_fasta[fsy1[0]][site_number:int(fsy1[3])]
reverse_complement(word)
result = ">" + fsy1[8].split(':')[0].replace('ID=', '') + "\n" + word + "\n"
out_file.write(result)
# print(">" + fsy1[8] + "\n" + chl_fasta[fsy1[0]][fsy1[4]:site_number])
这个程序写的不是很好,运行起来很慢,哎!