[nlp]edit distance with trie

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.nio.Buffer;
import java.util.*;

class Solution{

    public static void main(String[] args){
        try{

            Solution s = new Solution();
            FileReader fr = new FileReader("/Users/kangyue/Downloads/dictionary.txt");
            BufferedReader br = new BufferedReader(fr);
            List<String> dic = new LinkedList<>();
            String line = "";

            while((line = br.readLine()) != null){
                dic.add(line);
            }

            FileReader raw_fr = new FileReader("/Users/kangyue/Downloads/raw.txt");
            BufferedReader raw_br = new BufferedReader(raw_fr);


            FileWriter fw = new FileWriter("output_task1_cost1");
            BufferedWriter bw = new BufferedWriter(fw);





            s.construct(dic);
            int i = 0;

//            line = "rentan";
//            Result r = s.traverse(line);
//
//            for(String word : r.words){
//                System.out.println(word +  " ");
//
//
//                }

            while((line = raw_br.readLine()) != null){
                Result r = s.traverse(line);

                for(String word : r.words){
                    bw.write(word +  " ");

                }
                bw.write(r.step + "\n");


                bw.flush();



            }
            bw.close();


        }catch(Exception e){
            e.printStackTrace();
        }


    }

    public String getWord(TrieNode t){
        StringBuilder sb = new StringBuilder();


        while(t.c != '#'){
            sb.insert(0,t.c);
            t = t.parent;
        }

        return sb.toString();
    }





    Trie trie;
    class TrieNode{
        boolean isWord;
        TrieNode parent;
        char c;
        Map<Character,TrieNode> next;
        int[] dp;
        TrieNode(char c){
            next = new HashMap<>();
            this.c = c;
        }
        TrieNode(char c,TrieNode parent){
            this(c);
            this.parent = parent;
        }

    }
    class Trie{
        TrieNode root;
        Trie(){
            root = new TrieNode('#');
        }

        public void insert(String s){
            TrieNode cur = root;
            for(int i = 0; i < s.length(); i++){
                char c = s.charAt(i);
                if(!cur.next.containsKey(c)){
                    cur.next.put(c,new TrieNode(c,cur));

                }
                cur = cur.next.get(c);

            }
            cur.isWord = true;
        }
    }

    public void construct(List<String> dic){
        trie = new Trie();

        for(String s : dic){
            trie.insert(s);
        }



    }
    class Result{
        List<String> words;
        int step;
        Result(List<String> words,int step){
            this.words = words;
            this.step = step;
        }
    }

    public Result traverse(String misWord){
        List<String> minDisWord = new LinkedList<>();
        int minDis = Integer.MAX_VALUE;


        Queue<TrieNode> q = new LinkedList<>();
        q.offer(trie.root);


        int len = 0;

        while(!q.isEmpty()){
            int size = q.size();
            for(int k = 0; k < size; k++){
                TrieNode cur = q.poll();

                cur.dp = new int[misWord.length() + 1];
                char c = cur.c;




                if(c == '#'){
                    for(int j = 0; j < cur.dp.length; j++){
                        cur.dp[j] = j;
                    }

                }else {
                    cur.dp[0] = len;

                    for (int j = 1; j < cur.dp.length; j++) {
                        cur.dp[j] = c == misWord.charAt(j - 1) ? cur.parent.dp[j - 1] : Math.min(cur.parent.dp[j - 1] + 1, Math.min(cur.parent.dp[j], cur.dp[j - 1]) + 1);

                    }
                }

                if(cur.isWord && cur.dp[misWord.length()] <= minDis){


                    if(cur.dp[misWord.length()] == minDis){
                        minDisWord.add(getWord(cur));
                    }
                    else{
                        minDis = cur.dp[misWord.length()];
                        minDisWord = new LinkedList<>();
                        minDisWord.add(getWord(cur));
                    }


                }

                for(TrieNode t : cur.next.values()){
                    q.offer(t);
                }



            }
            len++;
        }

        return new Result(minDisWord,minDis);





    }

}
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容