add py /home/hengsong/query_co_video/src/hs_utdf_7.py;
CREATE FUNCTION hs_delete_album_7 AS hs_utdf_7.Processor USING hs_utdf_7.py;
create table hs_tmp_47 as
select graph_embedding:hs_delete_album_7(se_keyword_ws, title_ws, label) as (se_keyword_ws, title_ws, label) from hs_tmp_dssm_3 limit 10;
create table hs_tmp_48 as
select index, graph_embedding:hs_delete_album_1(se_keyword_ws) as se_keyword_ws, item_id, graph_embedding:hs_delete_album_1(title_ws) as title_ws from hs_tmp_dssm_3;
- 使用主搜进行分词
create table if not exists hs_tmp_dssm_14 LIFECYCLE 20 as
select index, se_keyword, se_keyword_ws,
search_kg:alinlp_segment(se_keyword, "MAINSE", "0", "1") as se_keyword_mainse_ws,
item_id, title, title_ws, search_kg:alinlp_segment(title, "MAINSE", "0", "1") as title_mainse_ws,
pict_url from hs_tmp_dssm_3;
判断主搜得到的结果中词库大小:
select count(DISTINCT word) from
(
select bi_udf:bi_split_value(se_keyword_mainse_ws, " ") as word from
graph_embedding.hs_tmp_dssm_14
)a;