美文网首页
Flink-10.Flink 双流join订单对账

Flink-10.Flink 双流join订单对账

作者: 笨鸡 | 来源:发表于2022-03-13 10:31 被阅读0次
package com.ctgu.flink.project;

import com.ctgu.flink.entity.OrderEvent;
import com.ctgu.flink.entity.ReceiptEvent;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoProcessFunction;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;

import java.time.Duration;


public class Flink_Sql_Join {

    static OutputTag<OrderEvent> orderOutput = new OutputTag<OrderEvent>("unCompare Pay") {};
    static OutputTag<ReceiptEvent> receiptOutput = new OutputTag<ReceiptEvent>("unReceipt") {};

    public static void main(String[] args) throws Exception {
        long start = System.currentTimeMillis();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        DataStream<OrderEvent> orderStream = env.readTextFile("data/OrderLog.csv")
                .filter(line -> line.split(",").length >= 4)
                .map(line -> {
                    String[] s = line.split(",");
                    return new OrderEvent(Long.valueOf(s[0]), s[1], s[2], Long.valueOf(s[3]) * 1000);
                })
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<OrderEvent>forBoundedOutOfOrderness(Duration.ZERO)
                                .withTimestampAssigner((event, timestamp) -> event.getTimestamp()))
                .filter(data -> !data.getTxId().equals(""));

        DataStream<ReceiptEvent> receiptStream = env.readTextFile("data/ReceiptLog.csv")
                .filter(line -> line.split(",").length >= 3)
                .map(line -> {
                    String[] s = line.split(",");
                    return new ReceiptEvent(s[0], s[1], Long.valueOf(s[2]) * 1000);
                })
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<ReceiptEvent>forBoundedOutOfOrderness(Duration.ZERO)
                                .withTimestampAssigner((event, timestamp) -> event.getTimestamp()));

        SingleOutputStreamOperator<Tuple2<OrderEvent, ReceiptEvent>> resultStream
                = orderStream.keyBy(OrderEvent::getTxId)
                .connect(receiptStream.keyBy(ReceiptEvent::getTxId))
                .process(new MyCoProcessFunction());

        resultStream.print("success");
        resultStream.getSideOutput(orderOutput).print("unPay");
        resultStream.getSideOutput(receiptOutput).print("unReceipt");

        orderStream.keyBy(OrderEvent::getTxId)
                .intervalJoin(receiptStream.keyBy(ReceiptEvent::getTxId))
                .between(Time.seconds(-30), Time.seconds(50))
                .process(new Flink_Sql_MatchByJoin.MyProcessJoinFunction()).print("success join");

        env.execute("Table SQL");

        System.out.println("耗时: " + (System.currentTimeMillis() - start) / 1000);

    }

    public static class MyCoProcessFunction
            extends CoProcessFunction<OrderEvent, ReceiptEvent, Tuple2<OrderEvent, ReceiptEvent>> {

        ValueState<OrderEvent> payState;

        ValueState<ReceiptEvent> receiptState;

        @Override
        public void open(Configuration parameters) throws Exception {
            payState = getRuntimeContext().getState(new ValueStateDescriptor<>("pay", OrderEvent.class));
            receiptState = getRuntimeContext().getState(new ValueStateDescriptor<>("receipt", ReceiptEvent.class));
        }

        @Override
        public void processElement1(OrderEvent pay, Context context,
                                    Collector<Tuple2<OrderEvent, ReceiptEvent>> out) throws Exception {
            ReceiptEvent receipt = receiptState.value();
            if (receipt != null) {
                out.collect(new Tuple2<>(pay, receipt));
                payState.clear();
                receiptState.clear();
            } else {
                context.timerService().registerEventTimeTimer(pay.getTimestamp() + 5000);
                payState.update(pay);
            }
        }

        @Override
        public void processElement2(ReceiptEvent receipt, Context context,
                                    Collector<Tuple2<OrderEvent, ReceiptEvent>> out) throws Exception {
            OrderEvent pay = payState.value();
            if (pay != null) {
                out.collect(new Tuple2<>(pay, receipt));
                payState.clear();
                receiptState.clear();
            } else {
                context.timerService().registerEventTimeTimer(receipt.getTimestamp() + 3000);
                receiptState.update(receipt);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple2<OrderEvent, ReceiptEvent>> out) throws Exception {
            if(payState.value() != null) {
                ctx.output(orderOutput, payState.value());
            }
            if(receiptState.value() != null) {
                ctx.output(receiptOutput, receiptState.value());
            }
            payState.clear();
            receiptState.clear();
        }
    }

    public static class MyProcessJoinFunction
            extends ProcessJoinFunction<OrderEvent, ReceiptEvent, Tuple2<OrderEvent, ReceiptEvent>> {

        @Override
        public void processElement(OrderEvent orderEvent, ReceiptEvent receiptEvent,
                                   Context context, Collector<Tuple2<OrderEvent, ReceiptEvent>> collector) throws Exception {
            collector.collect(new Tuple2<>(orderEvent, receiptEvent));
        }
    }

}

测试data

ewr342as4,wechat,1558430845
sd76f87d6,wechat,1558430847
3hu3k2432,alipay,1558430848
8fdsfae83,alipay,1558430850
32h3h4b4t,wechat,1558430852
766lk5nk4,wechat,1558430855
435kjb45d,alipay,1558430859
5k432k4n,wechat,1558430862
435kjb45s,wechat,1558430866
324jnd45s,wechat,1558430868
43jhin3k4,wechat,1558430871
98x0f8asd,alipay,1558430874
392094j32,wechat,1558430877
88df0wn92,alipay,1558430882
435kjb4432,alipay,1558430884
3hefw8jf,alipay,1558430885
499dfano2,wechat,1558430886
8xz09ddsaf,wechat,1558430889
3243hr9h9,wechat,1558430892
329d09f9f,alipay,1558430893
809saf0ff,wechat,1558430895
324n0239,wechat,1558430899
sad90df3,alipay,1558430901
24309dsf,alipay,1558430902
rnp435rk,wechat,1558430905
8c6vs8dd,wechat,1558430906
3245nbo7,alipay,1558430908
8x0zvy8w3,alipay,1558430911
9032n4fd2,wechat,1558430913
d8938034,wechat,1558430915
32499fd9w,alipay,1558430921
9203kmfn,alipay,1558430922
390mf2398,alipay,1558430926
902dsqw45,wechat,1558430927
84309dw31r,alipay,1558430933
sddf9809ew,alipay,1558430936
832jksmd9,wechat,1558430938
m23sare32e,wechat,1558430940
92nr903msa,wechat,1558430944
sdafen9932,alipay,1558430949

相关文章

  • Flink-10.Flink 双流join订单对账

    测试data

  • Flink双流实时对账

    背景 在电商、金融、银行、支付等涉及到金钱相关的领域,为了安全起见,一般都有对账的需求。 比如,对于订单支付事件,...

  • Flink双流join

    最近工作需要用到Flink,在查找资料的过程中读到几篇不错的双流join的文章,转发于此,以备后用。 Flink ...

  • 对账系统

    对账流程 对账流程节点:数据抓取、数据清洗、对账、结果汇总4个操作节点;对账结果:上游单边(订单上游存在,下游不存...

  • 财务系统-对账模块简述

    公司财务对账系统分为大客户对账,门店对账和资金对账,大客户对账即统计大客户的订单并生成对账单的模块,门店对账即根据...

  • Flink 双流Join原理

    Window JoinTumbling Window JoinSliding Window JoinSession...

  • Flink_双流Join

    问题1:在Flink DataStream流计算中,事实表数据流关联维度数据有哪些方式? [大表与小表]1.Bro...

  • Flink 源码解读(四) Flink DataSream 双流

    Flink DataStream 实现双流 Join 的主要方式有 WindowJoin、connect 和 In...

  • Flink实战双流join之Window Join

    Window Join将流中两个key相同的元素联结在一起。这种联结方式看起来非常像inner join,两个元素...

  • Flink实战双流join之interval Join

    通俗易懂篇: 前面学习的Window Join必须要在一个Window中进行JOIN,那如果没有Window如何处...

网友评论

      本文标题:Flink-10.Flink 双流join订单对账

      本文链接:https://www.haomeiwen.com/subject/vgthdrtx.html