上周五工作:
- 假如商品的图片信息,训练mvdssm,当前训练测试已完成,inference结果:acc:0.84; auc:0.52; precision:0.87
- 由于dssm使用hard sample的数据进行训练的过程中出现欠拟合现象,尝试修改模型进行改善,当前网络正在修改
今天计划:
尝试使用attention等方式来进行dssm模型在hard sample数据上的效果改善
- 准备新的训练数据
hs_dssm_dic_query_11 : | id | query_emb |
hs_dssm_dic_title_15 : | id | title_emb |
hs_tmp_242 : | query_id | item_id | label | query | title |
create table hs_tmp_264 as
select c.se_keyword_mainse_ws, d.title_emb as title_mainse_ws, c.label from
(select a.*, b.query_emb as se_keyword_mainse_ws from hs_tmp_242 a join hs_dssm_dic_query_11 b on a.query_id == b.id) c join hs_dssm_dic_title_15 d on c.item_id == d.id;
drop table hs_train_data_dssm_v2_5;
yes
drop table hs_test_data_dssm_v2_5;
yes
PAI -name split -project algo_public
-DinputTableName=graph_embedding.hs_tmp_264
-Doutput1TableName=graph_embedding.hs_train_data_dssm_v2_5
-Doutput2TableName=graph_embedding.hs_test_data_dssm_v2_5
-Dfraction=0.9
-DmemSizePerCore=4096
-DcoreNum=100
;
- 开始训练
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_inference_v8.py" -Dcluster='{"worker":{"count":1, "cpu":200, "memory":4000}, "ps":{"count":1, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_train_data_dssm_v2_5,odps://graph_embedding/tables/hs_test_data_dssm_v2_5,odps://graph_embedding/tables/hs_tmp_207" -Doutputs="odps://graph_embedding/tables/hs_dssm_result_3" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_4e_ .ckpt" -DuseSparseClusterSchema=True;
将维度设置为1024,25,100,可能会有问题
网友评论