def fmmseg(sen,max,strs):
'''
sen: 待切分句子
max: 最大切分长度
strs: 词典列表
'''
maxs=max
returnlist=[]
while len(sen)>0:
while max>0:
if(max==1):
returnlist.append(sen[:1])
sen=sen[1:]
max=maxs
break
else:
word=sen[:max]
if(word in strs):
returnlist.append(word)
sen=sen[max:]
max=maxs
break
else:
max-=1
return returnlist
def bmmseg(sen,max,strs):
'''
sen: 待切分句子
max: 最大切分长度
strs: 词典列表
'''
maxs=max
returnlist=[]
while len(sen)>0:
while max>0:
if(max==1):
returnlist.append(sen[-1:])
sen=sen[:-1]
max=maxs
break
else:
word=sen[-max:]
if(word in strs):
returnlist.append(word)
sen=sen[:-max]
max=maxs
break
else:
max-=1
return returnlist
import time
def bmmseg(sen,max,strs):
'''
sen: 待切分句子
max: 最大切分长度
strs: 词典列表
'''
maxs=max
returnlist=[]
while len(sen)>0:
while max>0:
if(max==1):
returnlist.append(sen[-1:])
sen=sen[:-1]
max=maxs
break
else:
word=sen[-max:]
if(word in strs):
returnlist.append(word)
sen=sen[:-max]
max=maxs
break
else:
max-=1
return returnlist
def fmmseg(sen,max,strs):
'''
sen: 待切分句子
max: 最大切分长度
strs: 词典列表
'''
maxs=max
returnlist=[]
while len(sen)>0:
while max>0:
if(max==1):
returnlist.append(sen[:1])
sen=sen[1:]
max=maxs
break
else:
word=sen[:max]
if(word in strs):
returnlist.append(word)
sen=sen[max:]
max=maxs
break
else:
max-=1
return returnlist
if __name__=="__main__":
start=time.time()
print("=============go=============")
str=open("dict.txt",'r',encoding='utf-8').readlines()
strs=[i.strip() for i in str]
sen="你昨天晚上吃饭团了吗"
r=fmmseg(sen,4,strs)
print(r)
r=bmmseg(sen,4,strs)
print(r[::-1])
end=time.time()
print(end-start)
吃饭
昨天
晚上
饭团
=============go=============
['你', '昨天', '晚上', '吃饭', '团', '了', '吗']
['你', '昨天', '晚上', '吃', '饭团', '了', '吗']
0.0
[Finished in 0.2s]