20190724工作进展

add py /home/hengsong/query_co_video/src/hs_utdf_7.py;
CREATE FUNCTION hs_delete_album_7 AS hs_utdf_7.Processor USING hs_utdf_7.py;

create table hs_tmp_47 as
select graph_embedding:hs_delete_album_7(se_keyword_ws, title_ws, label) as (se_keyword_ws, title_ws, label) from hs_tmp_dssm_3 limit 10;

create table hs_tmp_48 as
select index, graph_embedding:hs_delete_album_1(se_keyword_ws) as se_keyword_ws, item_id, graph_embedding:hs_delete_album_1(title_ws) as title_ws from hs_tmp_dssm_3;

  1. 使用主搜进行分词

create table if not exists hs_tmp_dssm_14 LIFECYCLE 20 as
select index, se_keyword, se_keyword_ws,
search_kg:alinlp_segment(se_keyword, "MAINSE", "0", "1") as se_keyword_mainse_ws,
item_id, title, title_ws, search_kg:alinlp_segment(title, "MAINSE", "0", "1") as title_mainse_ws,
pict_url from hs_tmp_dssm_3;

判断主搜得到的结果中词库大小:

select count(DISTINCT word) from
(
select bi_udf:bi_split_value(se_keyword_mainse_ws, " ") as word from
graph_embedding.hs_tmp_dssm_14
)a;

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容