美文网首页
MySQL死锁分析

MySQL死锁分析

作者: 王白告龙 | 来源:发表于2019-05-02 12:43 被阅读0次

    死锁场景1:并发插入重复key

    场景重现

    表结构如下:

    CREATE TABLE t1 (i INT, PRIMARY KEY (i)) ENGINE = InnoDB;
    

    三个session按顺序执行下面的操作

    Session 1:

    START TRANSACTION;
    INSERT INTO t1 VALUES(1);
    

    Session 2:

    START TRANSACTION;
    INSERT INTO t1 VALUES(1);
    

    Session 3:

     START TRANSACTION;
    INSERT INTO t1 VALUES(1);
    

    Session 1:

    ROLLBACK; 
    

    然后Session2 和 Session3发生死锁

    原因分析

    分析之前,我们要知道:

    1. 隐式锁:事务发现当前记录没有锁竞争,则暂时不加锁,直接操作
    2. 隐式锁转换为显示锁:后续发现锁竞争的事务,会为之前的事务加锁
    3. 发生"duplicate-key"错误,则会在"duplicate index record"上设置共享GAP锁
    4. 删除记录会导致锁继承
    5. mysql表有2个伪记录:infimum表示最小记录,supremum表示最大记录
    6. Session1,Session2,Session3简称:S1,S2,S3。主键为1的记录,简称:row1

    基于以上事实,我们梳理下发生死锁的原因

    1. S1插入row1时,由于没有锁竞争,不加锁直接插入

      image.png
    1. S2/S3插入row1时,发现row1上有活动的事务S1,帮S1在row1上加记录锁
    image.png
    1. S2/S3插入row1时,发现主键重复, 对row1请求"共享next-key锁"
    image.png
    1. "共享next-key锁"与"记录锁"冲突,S2/S2进入等待队列
    image.png
    1. S1回滚:删除row1,将row1上的锁继承给supremum,然后授予给S2/S3
    image.png
    1. row1被删除后,S2/S3重新定位到infinum,对其下一条记录supremum加"插入意向锁"

    ​ S2对supremum加"插入意向锁",而S3持有supremum的共享GAP锁

    ​ S3对supremum加"插入意向锁",而S2持有supremum的共享GAP锁

    ​ 于是S2与S3发生死锁

    image.png

    关键代码分析

    "............"表示省略部分代码

    1 S2/S3 插入流程代码分析

    1.1 分析入口:row0ins.cc类的row_ins_clust_index_entry_low方法
    /**
    调用路径是:row_insert_for_mysql -> row_insert_for_mysql_using_ins_graph
              -> row_ins_step -> row_ins -> row_ins_index_entry_step
              -> row_ins_index_entry -> row_ins_clust_index_entry
              -> row_ins_clust_index_entry_low
    */
    dberr_t row_ins_clust_index_entry_low(...........){
       //如果发生"duplicate-key"错误
      if (!index->allow_duplicates && n_uniq &&
          (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
          ............  
         
          // 此方法会请求共享锁,并加入等待锁的队列
          err = row_ins_duplicate_error_in_clust(flags, cursor, entry, thr, &mtr);
          if (err != DB_SUCCESS) {
           err_exit:
            mtr.commit();
            //如果共享锁无法立即获取,直接返回,上层方法会将此线程休眠
            //Session2和Session3无法立即获取共享锁后,会从这里返回后休眠
            goto func_exit;
          }
          ............
      }
      ............ 
      /**
        处理完"duplicate-key"的情况后,开始执行插入流程
        获取了共享锁后的Session2和Session3,会在这个方法里面申请“插入意向锁”,从而发生死锁
        
        后续调用路径:-> btr_cur_ins_lock_and_undo -> lock_rec_insert_check_and_lock
                -> lock_rec_insert_check_and_lock 
                -> rec_lock.add_to_waitq(此方法会检测死锁)
      */
      err = btr_cur_optimistic_insert(flags, cursor, &offsets, &offsets_heap,
                                          entry, &insert_rec, &big_rec, n_ext, thr,
                                          &mtr);
      
      ............ 
    }
    
    1.2 请求"共享GAP锁"的相关代码分析
    /**
     为重复记录,设置共享锁,如果锁冲突,则加入等待队列
     Checks if a unique key violation error would occur at an index entry
     insert. Sets shared locks on possible duplicate records. Works only
     for a clustered index!
     
     */
    static  row_ins_duplicate_error_in_clust(...........)
    {
      ...........
      ...........
          //隔离级别>RC 且 不是特殊表,则加LOCK_ORDINARY锁(LOCK_ORDINARY就是next-key锁)
          lock_type = ((trx->isolation_level <= TRX_ISO_READ_COMMITTED) ||
                       (cursor->index->table->skip_gap_locks()))
                          ? LOCK_REC_NOT_GAP
                          : LOCK_ORDINARY;
    
          /* We set a lock on the possible duplicate: this
          is needed in logical logging of MySQL to make
          sure that in roll-forward we get the same duplicate
          errors as in original execution */
    
          if (flags & BTR_NO_LOCKING_FLAG) {
            /* Do nothing if no-locking is set */
            err = DB_SUCCESS;
          } else if (trx->duplicates) {
            /* If the SQL-query will update or replace
            duplicate key we will take X-lock for
            duplicates ( REPLACE, LOAD DATAFILE REPLACE,
            INSERT ON DUPLICATE KEY UPDATE). */
            
                    // INSERT ON DUPLICATE KEY UPDATE 等语句需要加”排他锁“
            err =
                row_ins_set_exclusive_rec_lock(lock_type, btr_cur_get_block(cursor),
                                               rec, cursor->index, offsets, thr);
          } else {
            // 普通insert 加”共享锁“
            // 后续调用路径  -> lock_clust_rec_read_check_and_lock
            //             -> lock_rec_lock -> lock_rec_lock_slow -> rec_lock.add_to_waitq
            err = row_ins_set_shared_rec_lock(lock_type, btr_cur_get_block(cursor),
                                              rec, cursor->index, offsets, thr);
          }
      
      ...........
      ...........
     
    }
    // S2/S3通过此方法请求"共享GAP锁"
    static dberr_t lock_rec_lock_slow(...........) {
      ...........
        
      if (lock_rec_has_expl(mode, block, heap_no, trx)) {
        /* The trx already has a strong enough lock on rec: do
        nothing */
        err = DB_SUCCESS;
      } else {
        //判断“mysql行”上是否有和当前锁模式冲突的锁
        const lock_t *wait_for =
            lock_rec_other_has_conflicting(mode, block, heap_no, trx);
    
        if (wait_for != NULL) {
          //Session1已经获取了row1的记录锁,因此与当前锁模式冲突
          switch (sel_mode) {
            ...........
            case SELECT_ORDINARY:
                        //创建锁对象
              RecLock rec_lock(thr, index, block, heap_no, mode);
              //进入等待队列
              err = rec_lock.add_to_waitq(wait_for);
              break;
          }
        }else if (!impl) {
                //显示锁
          lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx);
          err = DB_SUCCESS_LOCKED_REC;
        } else {
          //隐式锁
          err = DB_SUCCESS;
        }
        ............
        ............
    }
    
    1.3 请求"插入意向锁"的相关代码分析
    dberr_t lock_rec_insert_check_and_lock(..........)  
    {
        //获取当前定位记录的下一条记录
      //对应我们的例子,回滚后row1已经被删除
      //因此s2/s3重新定位获取的:rec就是infimum,  next_rec就是supremum
      const rec_t *next_rec = page_rec_get_next_const(rec);
        //next_rec的heap_no
      ulint heap_no = page_rec_get_heap_no(next_rec);
    
        ...........
        
      //获取的锁模式:插入意向锁
      const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
      //判断next_rec上是否有和“插入意向锁”冲突的锁
      //对应我们的例子:S1回滚后,supremum列继承了row1的“共享GAP锁”
      //此时S2/S3定位到的next_rec就是supremum,因此发生“锁冲突”
      const lock_t *wait_for =
          lock_rec_other_has_conflicting(type_mode, block, heap_no, trx);
    
      if (wait_for != NULL) {
        RecLock rec_lock(thr, index, block, heap_no, type_mode);
    
        trx_mutex_enter(trx);
    
        trx->owns_mutex = true;
            //创建锁,进入等待队列,此方法内部会进行死锁检测
        err = rec_lock.add_to_waitq(wait_for);
    
        trx->owns_mutex = false;
    
        trx_mutex_exit(trx);
    
      } else {
        //没有冲突,不加锁(隐式锁)
        err = DB_SUCCESS;
      }
    
      ...........
    }
    

    2 S1 回滚流程代码分析

    调用路径:trx_rollback_for_mysql —> trx_rollback_to_savepoint_low

    static void trx_rollback_to_savepoint_low(...........)
    {
        ............
          
        //此方法会执行”锁继承“
        //调用路径:btr_cur_optimistic_delete_func -> lock_rec_inherit_to_gap
        //对应我们的例子:row1删除后,row1的gap锁继承给了supremum
        //由(infimum,row1] 变成了 (infimum,supremum]
        que_run_threads(thr);
      
        .............
      }
      if (savept == NULL) {
        //此方法会执行”锁授予“
        //调用路径:lock_rec_dequeue_from_page —> lock_rec_grant
        //对应我们的例子:将共享GAP锁(infimum,supremum],授予给了S2/S3
        trx_rollback_finish(trx);
        MONITOR_INC(MONITOR_TRX_ROLLBACK);
      } else {
        trx->lock.que_state = TRX_QUE_RUNNING;
        MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
      }
      ............
    }
    

    参考

    死锁分析

    MySQL · 特性分析 · innodb 锁分裂继承与迁移

    相关文章

      网友评论

          本文标题:MySQL死锁分析

          本文链接:https://www.haomeiwen.com/subject/eetnnqtx.html