美文网首页数据分析
python执行mysql 计算复购率+pyechart+Exc

python执行mysql 计算复购率+pyechart+Exc

作者: DeepWindLee | 来源:发表于2019-10-26 12:32 被阅读0次

    现有某超市的订单数据,内容如下:

    demo库订单表.png

    先求每个会员在每个月的订单数

    sql = ''' -- 
    select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
    from OrderList 
    where MemberID IS NOT NULL 
    group by MemberID ,YearMonth
    order by YearMonth
    ;
    ''' 
    df = pd.read_sql_query(sql, engine)
    df.head(8)
    
    每个会员每个月的订单数.png

    给上表新增一列,判断其是否在本月多次购买

    select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,
    case when temp.每个会员的订单数>1 then 1 else null 
    end as 是否在本月多次购买 
    from
    (
    select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
    from OrderList 
    where MemberID IS NOT NULL 
    group by MemberID ,YearMonth
    order by YearMonth
    ) temp
    ;
    
    新增一列.png

    再对上表进行分组统计多少会员是复购会员

    sql = ''' -- 使用 count(temp_out.是否在本月多次购买) 也可以
    select temp_out.YearMonth ,count(temp_out.MemberID) AS 本月购物的会员数, sum(temp_out.是否在本月多次购买) as 复购会员人数,
    sum(temp_out.是否在本月多次购买)/count(temp_out.MemberID) as 占比
    from (
        select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,
        case when temp.每个会员的订单数>1 then 1 else null 
        end as 是否在本月多次购买 
        from
            (
            select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
            from OrderList 
            where MemberID IS NOT NULL 
            group by MemberID ,YearMonth
            order by YearMonth
            ) temp
    
        ) temp_out
    group by temp_out.YearMonth
    ;
    ''' 
    df = pd.read_sql_query(sql, engine)
    df
    
    统计复购人数.png
    上面的方法嵌套了三层子表:orderlist本身, temp , temp_out,实际上可以不用产生 是否在本月多次购买的子表,即上表的temp表,可以使用 sum(case when 每个会员的订单数 >1 else 0 end )统计,这样就少产生了一个子表。
    sql = ''' 
    select temp.YearMonth ,count(temp.MemberID) AS 本月购物的会员数,
    sum(case when temp.每个会员的订单数 >1 then 1 else 0 end) as 复购会员人数,
    sum(case when temp.每个会员的订单数 >1 then 1 else 0 end)/count(temp.MemberID) as 占比
    from
        (
        select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
        from OrderList 
        where MemberID IS NOT NULL 
        group by MemberID ,YearMonth
        order by YearMonth
        ) temp
    
    group by temp.YearMonth
    ;
    ''' 
    df = pd.read_sql_query(sql, engine)
    df.tail(24).to_csv('每月的复购率.csv',encoding='utf_8_sig')
    df.head(8)
    
    sumcase统计复购率.png

    使用pyecharts 0.5画图

    #0.5版本
    import pyecharts
    from pyecharts import Overlap, Bar, Line, Grid, EffectScatter
    grid = Grid()
    v1 = list( df.tail(24)['本月购物的会员数'].values)        
    v2 =  list(df.tail(24)['复购会员人数'].values)           
    v3 = list( df.tail(24)['占比'].values*100) #
    my_attr = list(df.tail(24)['YearMonth'].values) # ["{}号".format(i) for i in range(1, len(v1)+1)]  #attr =
    bar = Bar(title="DeepWind超市(南沙区)", title_pos="20%")
    bar.add("会员人数", my_attr, v1)
    bar.add("复购人数",my_attr,v2,yaxis_formatter=" 人",
        yaxis_max=5200,
        legend_pos="25%",
        legend_orient="horizontal",
        legend_top="15%",
    )
    overlap = Overlap(width=1200, height=600)
    overlap.add(bar)
    line = Line()
    line.add("复购人数占比", my_attr, v3, yaxis_formatter=" %",yaxis_max=100)
    es = EffectScatter()
    #overlap = Overlap(width=1200, height=600)
    overlap.add(line, is_add_yaxis=True, yaxis_index=1)
    #es.add("", my_attr, v3, effect_scale=8,is_add_yaxis=True, yaxis_index=1,yaxis_max=4000)
    #overlap.add(es)
    grid.add(overlap, grid_right="20%")
    grid.render()
    overlap.render()
    #bar
    grid
    
    pyechart画复购率.png

    使用pyecharts1.6 画图

    import pyecharts.options as opts
    from pyecharts.charts import Bar, Line
    from pyecharts.globals import ThemeType
    v1 = list( df.tail(24)['本月购物的会员数'].values)        
    v2 =  list(df.tail(24)['复购会员人数'].values)           
    v3 = list( df.tail(24)['占比'].values*100) #
    x_data= list(df.tail(24)['YearMonth'].values) 
    
    v1 = [int(each) for each in v1]
    v2 = [int(each) for each in v2]
    v3 = [int(each) for each in v3]
    x_data= [str(each) for each in x_data]
    bar = (
        Bar(init_opts=opts.InitOpts(width="800px", height="400px",theme=ThemeType.DARK))
        .add_xaxis(xaxis_data=x_data)
        .add_yaxis(
            series_name="会员人数",
            yaxis_data= v1,
            label_opts=opts.LabelOpts(is_show=False),
            
            markpoint_opts=opts.MarkPointOpts(
                data=[
                    opts.MarkPointItem(type_="max", name="最大值"),
                    opts.MarkPointItem(type_="min", name="最小值"),
                ]
            ),
        )
        .add_yaxis(
            series_name="复购人数",
            yaxis_data= v2,
            label_opts=opts.LabelOpts(is_show=False),
            
            markpoint_opts=opts.MarkPointOpts(
                data=[
                    opts.MarkPointItem(type_="max", name="最大值"),
                    opts.MarkPointItem(type_="min", name="最小值"),
                ]
            ),
        )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name="占比",
                type_="value",
                min_=50,
                max_=100,
                interval=10,
                axislabel_opts=opts.LabelOpts(formatter="{value} %"),
            )
        )
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(
                is_show=True, trigger="axis", axis_pointer_type="cross"
            ),
            xaxis_opts=opts.AxisOpts(
                type_="category",
                axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
            ),
            yaxis_opts=opts.AxisOpts(
                name="人数",
                type_="value",
                min_=0,
                max_=5500,
                interval=500,
                axislabel_opts=opts.LabelOpts(formatter="{value}人"),
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
            ),
        )
    )
    
    line = (
        Line()
        .add_xaxis(xaxis_data=x_data)
        .add_yaxis(
            series_name="占比",
            yaxis_index=1,
            y_axis=v3,
            label_opts=opts.LabelOpts(is_show=False),
            
            markpoint_opts=opts.MarkPointOpts(
                data=[
                    opts.MarkPointItem(type_="max", name="最大值"),
                    opts.MarkPointItem(type_="min", name="最小值"),
                ]
            ),
        )
    )
    bar.overlap(line).render("复购人数.html")
    bar.overlap(line).render_notebook()#render("mixed_bar_and_line.html")
    
    复购1.6.png

    使用Excel画双Y轴图

    excel画图11.png

    此时只有一个Y轴,需要对占比重新生成一个列。

    设置数据系列格式22.jpg
    EXCEL 次坐标.png

    得到下图


    y轴1.png
    更改次y轴.png 更改y轴为线型.png

    最终得到


    最终结果.png

    使用Tableau画双轴图

    占比y轴.png

    将得到


    占比y轴2.png

    相关文章

      网友评论

        本文标题:python执行mysql 计算复购率+pyechart+Exc

        本文链接:https://www.haomeiwen.com/subject/ytbyvctx.html