- dssm常用表
create table hs_dssm_dic_query_7 as
select distinct query_id, query from hs_tmp_247;
create table hs_dssm_dic_title_11 as
select distinct item_id, title from hs_tmp_247;
create table graph_embedding.hs_dssm_dic_title_12 as
select graph_embedding:hs_split_1(item_id, pair, "|") as (item_id, word, weight) from
(select bi_udf:bi_split_value(item_id, tag_result, "%") as (item_id, pair) from
(select item_id, search_kg:alinlp_termweight_ecom(title, "%", "{weight}", 1, 0) as tag_result from graph_embedding.hs_dssm_dic_title_11 where lengthb(title) > 0)a)b where lengthb(b.pair) > 0;
create table graph_embedding.hs_dssm_dic_query_8 as
select graph_embedding:hs_split_1(query_id, pair, "|") as (query_id, word, weight) from
(select bi_udf:bi_split_value(query_id, tag_result, "%") as (query_id, pair) from
(select query_id, search_kg:alinlp_termweight_ecom(query, "%", "{weight}", 1, 0) as tag_result from graph_embedding.hs_dssm_dic_query_7 where lengthb(query) > 0)a)b where lengthb(b.pair) > 0;
create table hs_dssm_dic_query_9 as select query_id as id, word, search_kg:alinlp_word_embedding(hs_return_clean(word), "100", "CONTENT_SEARCH") as emb from hs_dssm_dic_query_8;
create table hs_dssm_dic_query_10 as
select b.id, a.word, b.emb, a.weight, graph_embedding:change_weight_query_key_1(a.word, a.weight) as new_weight from hs_dssm_dic_query_8 a join hs_dssm_dic_query_9 b on a.query_id == b.id and a.word == b.word;
create table hs_dssm_dic_query_11 as
select id, return_concat_1(new_weight, emb) as query_emb from hs_dssm_dic_query_10 group by id;
create table hs_dssm_dic_title_13 as select item_id as id, word, search_kg:alinlp_word_embedding(hs_return_clean(word), "100", "CONTENT_SEARCH") as emb from hs_dssm_dic_title_12;
create table hs_dssm_dic_title_14 as
select b.id, a.word, b.emb, a.weight, graph_embedding:change_weight_query_key_1(a.word, a.weight) as new_weight from hs_dssm_dic_title_12 a join hs_dssm_dic_title_13 b on a.item_id == b.id and a.word == b.word;
create table hs_dssm_dic_title_15 as
select id, return_concat_1(new_weight, emb) as title_emb from hs_dssm_dic_title_14 group by id;
- 构造ground truth表
create table hs_query_title_inference_gt_3 as
select distinct c., d.item_id as title_id from
(select a., b.query_id as query_id from (select * from hs_query_title_inference_gt)a join (select * from hs_dssm_dic_query_7)b on a.query == b.query)c join (select * from hs_dssm_dic_title_inf_10)d on c.title == d.title;
insert overwrite table hs_query_title_inference_gt_3 select distinct * from hs_query_title_inference_gt_3 where label == 1 or label == 0;
create table hs_tmp_267 as
select distinct c.query_id, c.query_ws, c.title_id as video_id, d.title_emb as video_ws from
(select a.*, b.query_emb as query_ws from hs_query_title_inference_gt_3 a join hs_dssm_dic_query_inf_11 b on a.query_id == b.id)c join hs_dssm_dic_title_inf_14 d on c.title_id == d.id;
网友评论