NSLinguisticTagger是iOS自然语言处理工具,支持中文简体,繁体,日文,英文等多种语言。使用期做了一个分词的功能,结果很准确。连南京市长江大桥这样易混淆的句子都能够分的很准确。
NSString *string = self.text.text;
NSLinguisticTaggerOptions options = NSLinguisticTaggerOmitWhitespace | NSLinguisticTaggerJoinNames |NSLinguisticTaggerOmitPunctuation;
NSArray *arr = [NSLinguisticTagger availableTagSchemesForUnit:NSLinguisticTaggerUnitWord|NSLinguisticTaggerUnitDocument language:@"zh-Hans"];
NSLinguisticTagger * tagger = [[NSLinguisticTagger alloc]initWithTagSchemes:arr options:options];
tagger.string = string;
NSMutableArray *array = [[NSMutableArray alloc]init];
NSString *printStr = @"";
[tagger enumerateTagsInRange:NSMakeRange(0, string.length) scheme:NSLinguisticTagSchemeScript options:options usingBlock:^(NSString * _Nonnull tag, NSRange tokenRange, NSRange sentenceRange, BOOL * _Nonnull stop) {
NSString *token = [string substringWithRange:tokenRange];
[array addObject:token];
//[str stringByAppendingFormat:token];
//NSLog(@"%@",array);
}];
for(int i = 0; i < [array count]; i++){
printStr = [printStr stringByAppendingFormat:@"%@\n", [array objectAtIndex:i]];
}
效果如图