美文网首页
2019-08-16工作进展

2019-08-16工作进展

作者: Songger | 来源:发表于2019-08-16 20:53 被阅读0次
    1. mvdssm训练集生成

    pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v2.py" -Dcluster='{"worker":{"count":30, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_250,odps://graph_embedding/tables/hs_tmp_251,odps://graph_embedding/tables/hs_tmp_221" -Doutputs="odps://graph_embedding/tables/hs_tmp_211" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_4e_5.ckpt" -DuseSparseClusterSchema=True;

    hs_dssm_train_v2_0

    1. 构造训练测试验证集

    训练集测试集构造 : se_keyword_mainse_ws,title_mainse_ws,pic_mainse_ws

    drop table hs_tmp_220;
    yes
    create table hs_tmp_220 as
    select c., d.emb as title_mainse_ws from (select a., b.emb as se_keyword_mainse_ws from (select * from hs_dssm_train_v2_0)a join (select * from hs_tmp_202)b on a. query_id == b. id)c join (select * from hs_tmp_203)d on c.item_id == d.id;

    drop table hs_tmp_223;
    yes
    create table hs_tmp_223 as
    select a.*, b.pic_ws from hs_dssm_train_v2_0 a join hs_tmp_214 b on a.item_id == b.item_id;

    drop table hs_tmp_224;
    yes
    create table hs_tmp_224 as
    select distinct a.se_keyword_mainse_ws, a.title_mainse_ws, a.label, b.pic_ws as pic_mainse_ws from hs_tmp_220 a join hs_tmp_223 b on a.item_id == b.item_id;

    验证集构造 : query_id, query_ws, video_id, video_ws, pic_ws

    drop table hs_tmp_221;
    yes
    create table hs_tmp_221 as
    select distinct e.query_id, e.title_id as video_id, e.query_ws, e.video_ws, f.pic_ws from
    (select c., d.emb as video_ws from (select a., b.emb as query_ws from hs_tmp_157 a join hs_tmp_204 b on a.query_id == b.id)c join hs_tmp_205 d on c.title_id == d.id)e join hs_tmp_217 f on e.title_id == f.item_id;

    train & inference

    create table hs_tmp_231 as select * from hs_tmp_224 limit 1000000;
    insert overwrite table hs_tmp_224 select * from hs_tmp_224;
    insert overwrite table hs_tmp_224 select * from hs_tmp_224;
    insert overwrite table hs_tmp_224 select * from hs_tmp_224;
    insert overwrite table hs_tmp_224 select * from hs_tmp_224;
    insert overwrite table hs_tmp_224 select * from hs_tmp_224;
    insert overwrite table hs_tmp_224 select * from hs_tmp_224;
    se_keyword_mainse_ws,title_mainse_ws, pic_mainse_ws, label
    query_id, query_ws, video_id, video_ws, pic_ws

    pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v2.py" -Dcluster='{"worker":{"count":10, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_224,odps://graph_embedding/tables/hs_tmp_231,odps://graph_embedding/tables/hs_tmp_221" -Doutputs="odps://graph_embedding/tables/hs_tmp_211" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_4e_5.ckpt" -DuseSparseClusterSchema=True;

    pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v2.py" -Dcluster='{"worker":{"count":50, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_224,odps://graph_embedding/tables/hs_tmp_231,odps://graph_embedding/tables/hs_tmp_221" -Doutputs="odps://graph_embedding/tables/hs_tmp_211" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_4e_5.ckpt" -DuseSparseClusterSchema=True;

    20190816124945153g7e6vyvj2

    hs_tmp_252 : | query_id | video_id | score | active_view |
    hs_tmp_215: | item_id | title | pic_url |
    hs_dssm_dic_query_inf_1 : | id | words_mainse_ids | query |

    drop table hs_tmp_248;
    yes
    create table hs_tmp_248 as select c., d.query from
    (select a.
    , b.title, b.pic_url from hs_tmp_211 a join hs_tmp_215 b on a.video_id == b.item_id)c join hs_dssm_dic_query_inf_1 d on c.query_id == d.id;

    相关文章

      网友评论

          本文标题:2019-08-16工作进展

          本文链接:https://www.haomeiwen.com/subject/pulfsctx.html