- 现在的结果在hs_tmp_79
tf.flags.DEFINE_boolean("infer_query", True, "infer query")
tf.flags.DEFINE_boolean("infer_doc", False, "infer doc")
truncate table hs_dssm_result_query_0;
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="inference_v5.py" -Dcluster='{"worker":{"count":1, "cpu":200, "memory":4000}, "ps":{"count":1, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_dssm_inf_querys" -Doutputs="odps://graph_embedding/tables/hs_dssm_result_query_0" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=5000 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video.ckpt-3 --infer_query=True --infer_doc=False" -DuseSparseClusterSchema=True;
truncate table hs_dssm_result_title_0;
truncate table hs_dssm_result_query_0;
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="inference_v5.py" -Dcluster='{"worker":{"count":10, "cpu":200, "memory":4000}, "ps":{"count":1, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_dssm_inf_titles" -Doutputs="odps://graph_embedding/tables/hs_dssm_result_title_0" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=5000 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video.ckpt-3 --infer_query=False --infer_doc=True" -DuseSparseClusterSchema=True;
- 结果
hs_tmp_dssm_inf_querys
hs_tmp_dssm_inf_titles
hs_dssm_result_0
create table hs_tmp_77 as
select c., d.words as video_words from
(select a., b.words as query_words from
(select video_id, query_id, score from hs_dssm_result_0 where score > 0.5)a join (select * from hs_tmp_dssm_inf_querys)b on a.query_id == b.id)c join (select * from hs_tmp_dssm_inf_titles)d on c.video_id == d.id;
hs_dssm_result_2
drop table hs_tmp_79;
yes
create table hs_tmp_79 as
select c., d.words as video_words from
(select a., b.words as query_words from
(select video_id, query_id, score from hs_dssm_result_2 where score < 0.3)a join (select * from hs_tmp_dssm_inf_querys)b on a.query_id == b.id)c join (select * from hs_tmp_dssm_inf_titles)d on c.video_id == d.id;
- 测试knn
create table hs_tmp_80 as select distinct video_id, video_emb from hs_dssm_result_0;
drop table hs_tmp_81;
yes
create table hs_tmp_81 as select distinct query_id, query_emb from hs_dssm_result_0;
create table hs_tmp_82 as select video_id as id, video_emb as emb from hs_tmp_80;
create table hs_tmp_83 as select query_id as id, query_emb as emb from hs_tmp_81;
create table
PAI -name am_vsearch_nearest_neighbor_014 -project algo_market
-Dcluster="{"worker":{"count":1,"gpu":100}}"
-Ddim=100
-Did_col="id"
-Dvector_col="emb"
-Dinput_slice=1
-Dtopk=50
-Dnprob=1024
-Dmetric="l2"
-Dinput="odps://graph_embedding/tables/hs_tmp_82"
-Dquery="odps://graph_embedding/tables/hs_dssm_result_query_0"
-Doutputs="odps://graph_embedding/tables/hs_tmp_84"
-DenableDynamicCluster=true -DmaxTrainingTimeInHour=60;
得到title的emb
网友评论