Librdkafka用于kafka topic-partitio

作者: 扫帚的影子 | 来源:发表于2018-01-14 11:56 被阅读822次
    • topic-partition是kafka分布式的精华, 也是针对kafka进行生产或消费的最小单元;
    • 在这篇里我们开始介绍相关的数据结构
    • 内容如下:
      1. rd_kafka_topic_partition_t
      2. rd_kafka_topic_partition_list_t
      3. rd_kafka_toppar_s

    rd_kafka_topic_partition_t
    • 所在文件: src/rdkafka.h
    • 定义了一个partition的相关数据结构, 简单定义, 占位符
    • 定义:
    typedef struct rd_kafka_topic_partition_s {
            char        *topic;             /**< Topic name */
            int32_t      partition;         /**< Partition */
        int64_t      offset;            /**< Offset */
            void        *metadata;          /**< Metadata */ // 主要是leader, replicas, isr等信息
            size_t       metadata_size;     /**< Metadata size */
            void        *opaque;            /**< Application opaque */
            rd_kafka_resp_err_t err;        /**< Error code, depending on use. */
            void       *_private;           /**< INTERNAL USE ONLY,
                                             *   INITIALIZE TO ZERO, DO NOT TOUCH */
    } rd_kafka_topic_partition_t;
    
    
    rd_kafka_topic_partition_list_t
    • 所在文件: src/rdkafka.h
    • 用来存储 rd_kafka_topic_partition_t的可动态扩容的数组
    • 定义:
    typedef struct rd_kafka_topic_partition_list_s {
            int cnt;               /**< Current number of elements */ 当前数组中放入的element数量
            int size;              /**< Current allocated size */ // 当前数组的容量
            rd_kafka_topic_partition_t *elems; /**< Element array[] */ 动态数组指针
    } rd_kafka_topic_partition_list_t;
    
    • 扩容操作 rd_kafka_topic_partition_list_grow:
    rd_kafka_topic_partition_list_grow (rd_kafka_topic_partition_list_t *rktparlist,
                                        int add_size) {
            if (add_size < rktparlist->size)
                    add_size = RD_MAX(rktparlist->size, 32);
    
            rktparlist->size += add_size;
            // 使用realloc重新分配内存
            rktparlist->elems = rd_realloc(rktparlist->elems,
                                           sizeof(*rktparlist->elems) *
                                           rktparlist->size);
    
    }
    
    • 创建操作 rd_kafka_topic_partition_list_new:
    rd_kafka_topic_partition_list_t *rd_kafka_topic_partition_list_new (int size) {
            rd_kafka_topic_partition_list_t *rktparlist;
            rktparlist = rd_calloc(1, sizeof(*rktparlist));
            rktparlist->size = size;
            rktparlist->cnt = 0;
            if (size > 0)
                    rd_kafka_topic_partition_list_grow(rktparlist, size);
            return rktparlist;
    }
    
    • 查找操作 rd_kafka_topic_partition_list_find:
      topic和partition都相等才算是相等
    rd_kafka_topic_partition_list_find (rd_kafka_topic_partition_list_t *rktparlist,
                         const char *topic, int32_t partition) {
        int i = rd_kafka_topic_partition_list_find0(rktparlist,
                                topic, partition);
        if (i == -1)
            return NULL;
        else
            return &rktparlist->elems[i];
    }
    
    • 按索引删除 rd_kafka_topic_partition_list_del_by_idx
    rd_kafka_topic_partition_list_del_by_idx (rd_kafka_topic_partition_list_t *rktparlist,
                          int idx) {
        if (unlikely(idx < 0 || idx >= rktparlist->cnt))
            return 0;
    
            // element数量减1
        rktparlist->cnt--;
            // destory 删除的元素 
        rd_kafka_topic_partition_destroy0(&rktparlist->elems[idx], 0);
    
            // 作内存的移动, 但不回收
        memmove(&rktparlist->elems[idx], &rktparlist->elems[idx+1],
            (rktparlist->cnt - idx) * sizeof(rktparlist->elems[idx]));
    
        return 1;
    }
    
    • 排序rd_kafka_topic_partition_list_sort_by_topic
      topic名字不同按topic名字排,topic名字相同按partition排
    void rd_kafka_topic_partition_list_sort_by_topic (
            rd_kafka_topic_partition_list_t *rktparlist) {
            rd_kafka_topic_partition_list_sort(rktparlist,
                                               rd_kafka_topic_partition_cmp, NULL);
    }
    
    rd_kafka_toppar_s
    • 所在文件: src/rdkafka_partition.h
    • 重量数据结构,topic, partition, leader, 生产, 消费, 各种定时timer都在里面
    • 定义, 这个结构体巨庞大
    struct rd_kafka_toppar_s { /* rd_kafka_toppar_t */
        TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rklink;  /* rd_kafka_t link */
        TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rkblink; /* rd_kafka_broker_t link*/
            CIRCLEQ_ENTRY(rd_kafka_toppar_s) rktp_fetchlink; /* rkb_fetch_toppars */
        TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rktlink; /* rd_kafka_itopic_t link*/
            TAILQ_ENTRY(rd_kafka_toppar_s) rktp_cgrplink;/* rd_kafka_cgrp_t link */
            rd_kafka_itopic_t       *rktp_rkt;
            shptr_rd_kafka_itopic_t *rktp_s_rkt;  /* shared pointer for rktp_rkt */
        int32_t            rktp_partition;
            //LOCK: toppar_lock() + topic_wrlock()
            //LOCK: .. in partition_available()
            int32_t            rktp_leader_id;   /**< Current leader broker id.
                                                  *   This is updated directly
                                                  *   from metadata. */
        rd_kafka_broker_t *rktp_leader;      /**< Current leader broker
                                                  *   This updated asynchronously
                                                  *   by issuing JOIN op to
                                                  *   broker thread, so be careful
                                                  *   in using this since it
                                                  *   may lag. */
            rd_kafka_broker_t *rktp_next_leader; /**< Next leader broker after
                                                  *   async migration op. */
        rd_refcnt_t        rktp_refcnt;
        mtx_t              rktp_lock;
     rd_atomic32_t      rktp_version;         /* Latest op version.
                                                      * Authoritative (app thread)*/
        int32_t            rktp_op_version;      /* Op version of curr command
                              * state from.
                              * (broker thread) */
            int32_t            rktp_fetch_version;   /* Op version of curr fetch.
                                                        (broker thread) */
    
        enum {
            RD_KAFKA_TOPPAR_FETCH_NONE = 0,
                    RD_KAFKA_TOPPAR_FETCH_STOPPING,
                    RD_KAFKA_TOPPAR_FETCH_STOPPED,
            RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY,
            RD_KAFKA_TOPPAR_FETCH_OFFSET_WAIT,
            RD_KAFKA_TOPPAR_FETCH_ACTIVE,
        } rktp_fetch_state;    
    int32_t            rktp_fetch_msg_max_bytes; /* Max number of bytes to
                                                          * fetch.
                                                          * Locality: broker thread
                                                          */
    
            rd_ts_t            rktp_ts_fetch_backoff; /* Back off fetcher for
                                                       * this partition until this
                                                       * absolute timestamp
                                                       * expires. */
    
        int64_t            rktp_query_offset;    /* Offset to query broker for*/
        int64_t            rktp_next_offset;     /* Next offset to start
                                                      * fetching from.
                                                      * Locality: toppar thread */
        int64_t            rktp_last_next_offset; /* Last next_offset handled
                               * by fetch_decide().
                               * Locality: broker thread */
        int64_t            rktp_app_offset;      /* Last offset delivered to
                              * application + 1 */
        int64_t            rktp_stored_offset;   /* Last stored offset, but
                              * maybe not committed yet. */
            int64_t            rktp_committing_offset; /* Offset currently being
                                                        * committed */
        int64_t            rktp_committed_offset; /* Last committed offset */
        rd_ts_t            rktp_ts_committed_offset; /* Timestamp of last
                                                          * commit */
    
            struct offset_stats rktp_offsets; /* Current offsets.
                                               * Locality: broker thread*/
            struct offset_stats rktp_offsets_fin; /* Finalized offset for stats.
                                                   * Updated periodically
                                                   * by broker thread.
                                                   * Locks: toppar_lock */
    
        int64_t rktp_hi_offset;              /* Current high offset.
                              * Locks: toppar_lock */
            int64_t rktp_lo_offset;         
     rd_ts_t            rktp_ts_offset_lag;
    
        char              *rktp_offset_path;     /* Path to offset file */
        FILE              *rktp_offset_fp;       /* Offset file pointer */
            rd_kafka_cgrp_t   *rktp_cgrp;            /* Belongs to this cgrp */
    
            int                rktp_assigned;   /* Partition in cgrp assignment */
    
            rd_kafka_replyq_t  rktp_replyq; /* Current replyq+version
                         * for propagating
                         * major operations, e.g.,
                         * FETCH_STOP. */
        int                rktp_flags;
    
            shptr_rd_kafka_toppar_t *rktp_s_for_desp; /* Shared pointer for
                                                       * rkt_desp list */
            shptr_rd_kafka_toppar_t *rktp_s_for_cgrp; /* Shared pointer for
                                                       * rkcg_toppars list */
            shptr_rd_kafka_toppar_t *rktp_s_for_rkb;  /* Shared pointer for
                                                       * rkb_toppars list */
    
        /*
         * Timers
         */
        rd_kafka_timer_t rktp_offset_query_tmr;  /* Offset query timer */
        rd_kafka_timer_t rktp_offset_commit_tmr; /* Offset commit timer */
        rd_kafka_timer_t rktp_offset_sync_tmr;   /* Offset file sync timer */
            rd_kafka_timer_t rktp_consumer_lag_tmr;  /* Consumer lag monitoring
                              * timer */
    
            int rktp_wait_consumer_lag_resp;         /* Waiting for consumer lag
                                                      * response. */
    
        struct {
            rd_atomic64_t tx_msgs;
            rd_atomic64_t tx_bytes;
                    rd_atomic64_t msgs;
                    rd_atomic64_t rx_ver_drops;
        } rktp_c;
    }
    
    • 创建一个 rd_kafka_toppar_t对象 rd_kafka_toppar_new0:
    shptr_rd_kafka_toppar_t *rd_kafka_toppar_new0 (rd_kafka_itopic_t *rkt,
                               int32_t partition,
                               const char *func, int line) {
        rd_kafka_toppar_t *rktp;
    
           // 分配内存
        rktp = rd_calloc(1, sizeof(*rktp));
    
            // 各项赋值
        rktp->rktp_partition = partition;
    
            // 属于哪个topic
        rktp->rktp_rkt = rkt;
    
            rktp->rktp_leader_id = -1;
        rktp->rktp_fetch_state = RD_KAFKA_TOPPAR_FETCH_NONE;
            rktp->rktp_fetch_msg_max_bytes
                = rkt->rkt_rk->rk_conf.fetch_msg_max_bytes;
        rktp->rktp_offset_fp = NULL;
            rd_kafka_offset_stats_reset(&rktp->rktp_offsets);
            rd_kafka_offset_stats_reset(&rktp->rktp_offsets_fin);
            rktp->rktp_hi_offset = RD_KAFKA_OFFSET_INVALID;
        rktp->rktp_lo_offset = RD_KAFKA_OFFSET_INVALID;
        rktp->rktp_app_offset = RD_KAFKA_OFFSET_INVALID;
            rktp->rktp_stored_offset = RD_KAFKA_OFFSET_INVALID;
            rktp->rktp_committed_offset = RD_KAFKA_OFFSET_INVALID;
        rd_kafka_msgq_init(&rktp->rktp_msgq);
            rktp->rktp_msgq_wakeup_fd = -1;
        rd_kafka_msgq_init(&rktp->rktp_xmit_msgq);
        mtx_init(&rktp->rktp_lock, mtx_plain);
    
            rd_refcnt_init(&rktp->rktp_refcnt, 0);
        rktp->rktp_fetchq = rd_kafka_q_new(rkt->rkt_rk);
            rktp->rktp_ops    = rd_kafka_q_new(rkt->rkt_rk);
            rktp->rktp_ops->rkq_serve = rd_kafka_toppar_op_serve;
            rktp->rktp_ops->rkq_opaque = rktp;
            rd_atomic32_init(&rktp->rktp_version, 1);
        rktp->rktp_op_version = rd_atomic32_get(&rktp->rktp_version);
    
            // 开始一个timer, 来定时统计消息的lag情况, 目前看是一个`rd_kafka_toppar_t`对象就一个timer, 太多了, 可以用时间轮来作所有partiton的timer
            if (rktp->rktp_rkt->rkt_rk->rk_conf.stats_interval_ms > 0 &&
                rkt->rkt_rk->rk_type == RD_KAFKA_CONSUMER &&
                rktp->rktp_partition != RD_KAFKA_PARTITION_UA) {
                    int intvl = rkt->rkt_rk->rk_conf.stats_interval_ms;
                    if (intvl < 10 * 1000 /* 10s */)
                            intvl = 10 * 1000;
            rd_kafka_timer_start(&rkt->rkt_rk->rk_timers,
                         &rktp->rktp_consumer_lag_tmr,
                                         intvl * 1000ll,
                         rd_kafka_toppar_consumer_lag_tmr_cb,
                         rktp);
            }
    
            rktp->rktp_s_rkt = rd_kafka_topic_keep(rkt);
    
            // 设置其fwd op queue到rd_kakfa_t中的rd_ops, 这样这个rd_kafka_toppar_t对象用到的ops_queue就是rd_kafka_t的了
        rd_kafka_q_fwd_set(rktp->rktp_ops, rkt->rkt_rk->rk_ops);
        rd_kafka_dbg(rkt->rkt_rk, TOPIC, "TOPPARNEW", "NEW %s [%"PRId32"] %p (at %s:%d)",
                 rkt->rkt_topic->str, rktp->rktp_partition, rktp,
                 func, line);
    
        return rd_kafka_toppar_keep_src(func, line, rktp);
    }
    
    • 销毁一个rd_kafka_toppar_t对象rd_kafka_toppar_destroy_final
    void rd_kafka_toppar_destroy_final (rd_kafka_toppar_t *rktp) {
            // 停掉相应的timer, 清空ops queue
            rd_kafka_toppar_remove(rktp);
    
            // 将msgq中的kafka message回调给app层后清空
        rd_kafka_dr_msgq(rktp->rktp_rkt, &rktp->rktp_msgq,
                 RD_KAFKA_RESP_ERR__DESTROY);
        rd_kafka_q_destroy_owner(rktp->rktp_fetchq);
            rd_kafka_q_destroy_owner(rktp->rktp_ops);
    
        rd_kafka_replyq_destroy(&rktp->rktp_replyq);
    
        rd_kafka_topic_destroy0(rktp->rktp_s_rkt);
    
        mtx_destroy(&rktp->rktp_lock);
    
            rd_refcnt_destroy(&rktp->rktp_refcnt);
    
        rd_free(rktp);
    }
    
    • 从一个rd_kafka_itopic_t(这个我们后面会有专门篇章来介绍, 这里只需要知道它表示topic即可, 里面包括属于它的parition列表)获取指定parition:
    shptr_rd_kafka_toppar_t *rd_kafka_toppar_get0 (const char *func, int line,
                                                   const rd_kafka_itopic_t *rkt,
                                                   int32_t partition,
                                                   int ua_on_miss) {
            shptr_rd_kafka_toppar_t *s_rktp;
     
            // 数组索引下标来获取 partition
        if (partition >= 0 && partition < rkt->rkt_partition_cnt)
            s_rktp = rkt->rkt_p[partition];
        else if (partition == RD_KAFKA_PARTITION_UA || ua_on_miss)
            s_rktp = rkt->rkt_ua;
        else
            return NULL;
    
        if (s_rktp)
                   // 引用计数加1 
                    return rd_kafka_toppar_keep_src(func,line,
                                                    rd_kafka_toppar_s2i(s_rktp));
    
        return NULL;
    }
    
    • 按topic名字和partition来获取一个rd_kafka_toppar_t对象, 没有找到topic, 就先创建这个 rd_kafka_itopic_t对象
    shptr_rd_kafka_toppar_t *rd_kafka_toppar_get2 (rd_kafka_t *rk,
                                                   const char *topic,
                                                   int32_t partition,
                                                   int ua_on_miss,
                                                   int create_on_miss) {
        shptr_rd_kafka_itopic_t *s_rkt;
            rd_kafka_itopic_t *rkt;
            shptr_rd_kafka_toppar_t *s_rktp;
    
            rd_kafka_wrlock(rk);
    
            /* Find or create topic */
            // 所有的 rd_kafka_itopic_t对象都存在rd_kafka_t的rkt_topic的tailq队列里, 这里先查找
        if (unlikely(!(s_rkt = rd_kafka_topic_find(rk, topic, 0/*no-lock*/)))) {
                    if (!create_on_miss) {
                            rd_kafka_wrunlock(rk);
                            return NULL;
                    }
                    // 没找到就先创建  rd_kafka_itopic_t对象
                    s_rkt = rd_kafka_topic_new0(rk, topic, NULL,
                            NULL, 0/*no-lock*/);
                    if (!s_rkt) {
                            rd_kafka_wrunlock(rk);
                            rd_kafka_log(rk, LOG_ERR, "TOPIC",
                                         "Failed to create local topic \"%s\": %s",
                                         topic, rd_strerror(errno));
                            return NULL;
                    }
            }
    
            rd_kafka_wrunlock(rk);
    
            rkt = rd_kafka_topic_s2i(s_rkt);
    
        rd_kafka_topic_wrlock(rkt);
        s_rktp = rd_kafka_toppar_desired_add(rkt, partition);
        rd_kafka_topic_wrunlock(rkt);
    
            rd_kafka_topic_destroy0(s_rkt);
    
        return s_rktp;
    }
    
    • desired partition: desired partition状态的parititon, 源码中的解释如下:

    The desired partition list is the list of partitions that are desired
    (e.g., by the consumer) but not yet seen on a broker.
    As soon as the partition is seen on a broker the toppar is moved from
    the desired list and onto the normal rkt_p array.
    When the partition on the broker goes away a desired partition is put
    back on the desired list

    简单说就是需要某一个partition, 但是这个parition的具体信息还没从broker拿掉,这样的parition就是desired parition, 在rd_kafka_itopic_t中有一个rkt_desp的list, 专门用来存这样的parition, 针对其有如下几个操作,都比较简单:

    rd_kafka_toppar_desired_get
    rd_kafka_toppar_desired_link
    rd_kafka_toppar_desired_unlink
    rd_kafka_toppar_desired_add0
    rd_kafka_toppar_desired_add
    rd_kafka_toppar_desired_del
    
    • partition在broker间迁移rd_kafka_toppar_broker_migrate:
    static void rd_kafka_toppar_broker_migrate (rd_kafka_toppar_t *rktp,
                                                rd_kafka_broker_t *old_rkb,
                                                rd_kafka_broker_t *new_rkb) {
            rd_kafka_op_t *rko;
            rd_kafka_broker_t *dest_rkb;
            int had_next_leader = rktp->rktp_next_leader ? 1 : 0;
    
            /* Update next leader */
            if (new_rkb)
                    rd_kafka_broker_keep(new_rkb);
            if (rktp->rktp_next_leader)
                    rd_kafka_broker_destroy(rktp->rktp_next_leader);
            rktp->rktp_next_leader = new_rkb;
            
            // 在迁移没完成时有可能再次迁移了, 这个时候是不是需要加锁? 
            if (had_next_leader)
                    return;
    
        if (rktp->rktp_fetch_state == RD_KAFKA_TOPPAR_FETCH_OFFSET_WAIT) {
            rd_kafka_toppar_set_fetch_state(
                rktp, RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY);
            rd_kafka_timer_start(&rktp->rktp_rkt->rkt_rk->rk_timers,
                         &rktp->rktp_offset_query_tmr,
                         500*1000,
                         rd_kafka_offset_query_tmr_cb,
                         rktp);
        }
    
            //  迁移前broker放到LEAVE op
            if (old_rkb) {
                    rko = rd_kafka_op_new(RD_KAFKA_OP_PARTITION_LEAVE);
                    dest_rkb = old_rkb;
            } else {
                    /* No existing broker, send join op directly to new leader. */
                    rko = rd_kafka_op_new(RD_KAFKA_OP_PARTITION_JOIN);
                    dest_rkb = new_rkb;
            }
    
            rko->rko_rktp = rd_kafka_toppar_keep(rktp);
    
            rd_kafka_q_enq(dest_rkb->rkb_ops, rko);
    }
    
    • broker的delegate操作:
    void rd_kafka_toppar_broker_delegate (rd_kafka_toppar_t *rktp,
                          rd_kafka_broker_t *rkb,
                          int for_removal) {
            rd_kafka_t *rk = rktp->rktp_rkt->rkt_rk;
            int internal_fallback = 0;
    
            /* Delegate toppars with no leader to the
             * internal broker for bookkeeping. */
            // 如果迁移到的broker是NULL, 就获取一个internal broker -> rkb
            if (!rkb && !for_removal && !rd_kafka_terminating(rk)) {
                    rkb = rd_kafka_broker_internal(rk);
                    internal_fallback = 1;
            }
    
        if (rktp->rktp_leader == rkb && !rktp->rktp_next_leader) {
                    rd_kafka_dbg(rktp->rktp_rkt->rkt_rk, TOPIC, "BRKDELGT",
                     "%.*s [%"PRId32"]: not updating broker: "
                                 "already on correct broker %s",
                     RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic),
                     rktp->rktp_partition,
                                 rkb ? rd_kafka_broker_name(rkb) : "(none)");
    
                    if (internal_fallback)
                            rd_kafka_broker_destroy(rkb);
            return;
            }
    
            // 实际的迁移操作
            if (rktp->rktp_leader || rkb)
                    rd_kafka_toppar_broker_migrate(rktp, rktp->rktp_leader, rkb);
    
            if (internal_fallback)
                    rd_kafka_broker_destroy(rkb);
    }
    
    • 提交offstet到broker rd_kafka_toppar_offset_commit
    void rd_kafka_toppar_offset_commit (rd_kafka_toppar_t *rktp, int64_t offset,
                        const char *metadata) {
            rd_kafka_topic_partition_list_t *offsets;
            rd_kafka_topic_partition_t *rktpar;
    
            // 构造 一个rd_kafka_topic_partition_list, 把当前的topic添加进去, 包括要提交的offset
            offsets = rd_kafka_topic_partition_list_new(1);
            rktpar = rd_kafka_topic_partition_list_add(
                    offsets, rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition);
            rktpar->offset = offset;
            if (metadata) {
                    rktpar->metadata = rd_strdup(metadata);
                    rktpar->metadata_size = strlen(metadata);
            }
    
            // rd_kafka_toppar_t对象更新rktp_committing_offset,表示正在提交的offset
            rktp->rktp_committing_offset = offset;
    
           // 异步提交offset, 这个操作在之后介绍kafka consumer是会详细分析
            rd_kafka_commit(rktp->rktp_rkt->rkt_rk, offsets, 1/*async*/);
    
            rd_kafka_topic_partition_list_destroy(offsets);
    }
    
    • 设置下一次拉取数据时开始的offset位置,即rd_kafka_toppar_trktp_next_offset
    void rd_kafka_toppar_next_offset_handle (rd_kafka_toppar_t *rktp,
                                             int64_t Offset) {
            // 如果Offset是BEGINNING,END, 发起一个rd_kafka_toppar_offset_request操作,从broker获取offset
            // 如果Offset是RD_KAFKA_OFFSET_INVALID, 需要enqueue一个error op, 设置fetch状态为RD_KAFKA_TOPPAR_FETCH_NONE
            if (RD_KAFKA_OFFSET_IS_LOGICAL(Offset)) {
                    /* Offset storage returned logical offset (e.g. "end"),
                     * look it up. */
                    rd_kafka_offset_reset(rktp, Offset, RD_KAFKA_RESP_ERR_NO_ERROR,
                                          "update");
                    return;
            }
    
            /* Adjust by TAIL count if, if wanted */
            // 获取从tail开始往前推cnt个offset的位置
            if (rktp->rktp_query_offset <=
                RD_KAFKA_OFFSET_TAIL_BASE) {
                    int64_t orig_Offset = Offset;
                    int64_t tail_cnt =
                            llabs(rktp->rktp_query_offset -
                                  RD_KAFKA_OFFSET_TAIL_BASE);
    
                    if (tail_cnt > Offset)
                            Offset = 0;
                    else
                            Offset -= tail_cnt;
            }
    
            //设置rktp_next_offset
            rktp->rktp_next_offset = Offset;
    
            rd_kafka_toppar_set_fetch_state(rktp, RD_KAFKA_TOPPAR_FETCH_ACTIVE);
    
            /* Wake-up broker thread which might be idling on IO */
            if (rktp->rktp_leader)
                    rd_kafka_broker_wakeup(rktp->rktp_leader);
    
    }
    
    • 从coordinattor获取已提交的offset(FetchOffsetRequest) rd_kafka_toppar_offset_fetch:
    void rd_kafka_toppar_offset_fetch (rd_kafka_toppar_t *rktp,
                                       rd_kafka_replyq_t replyq) {
            rd_kafka_t *rk = rktp->rktp_rkt->rkt_rk;
            rd_kafka_topic_partition_list_t *part;
            rd_kafka_op_t *rko;
    
            part = rd_kafka_topic_partition_list_new(1);
            rd_kafka_topic_partition_list_add0(part,
                                               rktp->rktp_rkt->rkt_topic->str,
                                               rktp->rktp_partition,
                           rd_kafka_toppar_keep(rktp));
    
            // 构造OffsetFetch的operator
            rko = rd_kafka_op_new(RD_KAFKA_OP_OFFSET_FETCH);
        rko->rko_rktp = rd_kafka_toppar_keep(rktp);
        rko->rko_replyq = replyq;
    
        rko->rko_u.offset_fetch.partitions = part;
        rko->rko_u.offset_fetch.do_free = 1;
    
            // OffsetFetch 请求是与消费有关的,放入cgrp的op queue里
            rd_kafka_q_enq(rktp->rktp_cgrp->rkcg_ops, rko);
    }
    
    • 获取用于消费的有效的offset
    void rd_kafka_toppar_offset_request (rd_kafka_toppar_t *rktp,
                         int64_t query_offset, int backoff_ms) {
        rd_kafka_broker_t *rkb;
            rkb = rktp->rktp_leader;
    
             // 如果rkb是无效的,需要下一个timer来定时query
            if (!backoff_ms && (!rkb || rkb->rkb_source == RD_KAFKA_INTERNAL))
                    backoff_ms = 500;
    
            if (backoff_ms) {
                    rd_kafka_toppar_set_fetch_state(
                            rktp, RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY);
                    // 启动timer, timer到期会执行rd_kafka_offset_query_tmr_cb回调,这个回调还是调用当前这个函数
            rd_kafka_timer_start(&rktp->rktp_rkt->rkt_rk->rk_timers,
                         &rktp->rktp_offset_query_tmr,
                         backoff_ms*1000ll,
                         rd_kafka_offset_query_tmr_cb, rktp);
            return;
            }
    
            // stop这个重试的timer
            rd_kafka_timer_stop(&rktp->rktp_rkt->rkt_rk->rk_timers,
                                &rktp->rktp_offset_query_tmr, 1/*lock*/);
    
            // 从coordinattor获取需要消费的offset
        if (query_offset == RD_KAFKA_OFFSET_STORED &&
                rktp->rktp_rkt->rkt_conf.offset_store_method ==
                RD_KAFKA_OFFSET_METHOD_BROKER) {
                    /*
                     * Get stored offset from broker based storage:
                     * ask cgrp manager for offsets
                     */
                    rd_kafka_toppar_offset_fetch(
                rktp,
                RD_KAFKA_REPLYQ(rktp->rktp_ops,
                        rktp->rktp_op_version));
    
        } else {
                    shptr_rd_kafka_toppar_t *s_rktp;
                    rd_kafka_topic_partition_list_t *offsets;
    
                    /*
                     * Look up logical offset (end,beginning,tail,..)
                     */
                    s_rktp = rd_kafka_toppar_keep(rktp);
    
            if (query_offset <= RD_KAFKA_OFFSET_TAIL_BASE)
                query_offset = RD_KAFKA_OFFSET_END;
    
                    offsets = rd_kafka_topic_partition_list_new(1);
                    rd_kafka_topic_partition_list_add(
                            offsets,
                            rktp->rktp_rkt->rkt_topic->str,
                            rktp->rktp_partition)->offset = query_offset;
                    
                    // 基本上用于reset offset, 获取当前partition的最旧offset或最新offset
                    rd_kafka_OffsetRequest(rkb, offsets, 0,
                                           RD_KAFKA_REPLYQ(rktp->rktp_ops,
                                                           rktp->rktp_op_version),
                                           rd_kafka_toppar_handle_Offset,
                                           s_rktp);
    
                    rd_kafka_topic_partition_list_destroy(offsets);
            }
    
            rd_kafka_toppar_set_fetch_state(rktp,
                    RD_KAFKA_TOPPAR_FETCH_OFFSET_WAIT);
    }
    

    Librdkafka源码分析-Content Table

    相关文章

      网友评论

        本文标题:Librdkafka用于kafka topic-partitio

        本文链接:https://www.haomeiwen.com/subject/zmlznxtx.html