一、计算某个string中长度为k的情况下,出现比例最高的字符串
# Copy your updated FrequentWords function (along with all required subroutines) below this line
def FrequentWords(Text, k):
words = []
freq = FrequencyMap(Text, k)
m = max(freq.values())
for key in freq:
# add each key to words whose corresponding frequency value is equal to m
if freq[key]==m:
pattern=key
words.append(pattern)
return words
# your code here
# Copy your FrequencyMap() function here.
def FrequencyMap(Text, k):
freq = {}
n = len(Text)
for i in range(n-k+1):
Pattern = Text[i:i+k]
if Pattern in freq:
freq[Pattern]+=1
else:
freq[Pattern]=1
# hint: your code goes here!
return freq
# Now set Text equal to the Vibrio cholerae oriC and k equal to 10
Text="ATCAATGATCAACGTAAGCTTCTAAGCATGATCAAGGTGCTCACACAGTTTATCCACAACCTGAGTGGATGACATCAAGATAGGTCGTTGTATCTCCTTCCTCTCGTACTCTCATGACCACGGAAAGATGATCAAGAGAGGATGATTTCTTGGCCATATCGCAATGAATACTTGTGACTTGTGCTTCCAATTGACATCTTCAGCGCCATATTGCGCTGGCCAAGGTGACGGAGCGGGATTACGAAAGCATGATCATGGCTGTTGTTCTGTTTATCTTGTTTTGACTGAGACTTGTTAGGATAGACGGTTTTTCATCACTGACTAGCCAAAGCCTTACTCTGCCTGACATCGACCGTAAATTGATAATGAATTTACATGCTTCCGCGACGATTTACCTCTTGATCATCGATCCGATTGAAGATCTTCAATTGTTAATTCTCTTGCCTCGACTCATAGCCATGATGAGCTCTTGATCATGTTTCCTTAACCCTCTATTTTTTACGGAAGAATGATCAAGCTGCTGCTCTTGATCATCGTTTC"
k=10
# Finally, print the result of calling FrequentWords on Text and k.
print(FrequentWords(Text,k))
二、产生DNA互补链
1.将string形式的DNA链反转
def Reverse(Pattern):
# your code here
n=len(Pattern)
time=0
rev=""
for i in range(n):
rev=Pattern[time]+rev
time+=1
return rev
2.互补链
# Input: A DNA string Pattern
# Output: The complementary string of Pattern (with every nucleotide replaced by its complement).
def Complement(Pattern):
# your code here
n=len(Pattern)
com=""
time=0
for i in range(n):
if Pattern[time]=="A":
com=com+"T"
elif Pattern[time]=="T":
com=com+"A"
elif Pattern[time]=="C":
com=com+"G"
else:
com=com+"C"
time+=1
return com
3.根据以上两个程序根据DNA单链生成对应的5‘-3’的互补链
# Output: The reverse complement of Pattern
def ReverseComplement(Pattern):
rev=Reverse(Pattern)
com=Complement(rev)
return com
# Copy your Reverse() function here.
def Reverse(Pattern):
# your code here
n=len(Pattern)
time=0
rev=""
for i in range(n):
rev=Pattern[time]+rev
time+=1
return rev
# Copy your Complement() function here.
def Complement(Pattern):
# your code here
n=len(Pattern)
com=""
time=0
for i in range(n):
if Pattern[time]=="A":
com=com+"T"
elif Pattern[time]=="T":
com=com+"A"
elif Pattern[time]=="C":
com=com+"G"
else:
com=com+"C"
time+=1
return com
三、从基因组中查找计算得出的Pattern的位置
def PatternMatching(Pattern, Genome):
positions = [] # output variable
# your code here
n=len(Genome)
k=len(Pattern)
for i in range(n-k+1):
test=Genome[i:i+k]
if test==Pattern:
positions.append(i)
return positions
四:实际应用——包括读取基因组数据
# Copy your PatternMatching function below this line.
def PatternMatching(Pattern, Genome):
positions = [] # output variable
# your code here
n=len(Genome)
k=len(Pattern)
for i in range(n-k+1):
test=Genome[i:i+k]
if test==Pattern:
positions.append(i)
return positions
# The following lines will automatically read in the Vibrio cholerae genome for you and store it in a variable named v_cholerae
import sys # needed to read the genome
input = sys.stdin.read().splitlines() #
v_cholerae = input[1] # store the genome as 'v_cholerae'
# Call PatternMatching with Pattern equal to "CTTGATCAT" and Genome equal to v_cholerae,
# and store the output as a variable called positions
Pattern="CTTGATCAT"
positions=PatternMatching(Pattern,v_cholerae)
# print the positions variable
print (positions)