美文网首页
PHP数组源码解析与GDB实战分析

PHP数组源码解析与GDB实战分析

作者: Atman666 | 来源:发表于2019-09-26 21:50 被阅读0次

    一、源码

    一、PHP数组源码初步分析
    1、基本类型定义(php7.3.9)

    typedef union _zend_value {
        zend_long         lval;             /* long value */
        double            dval;             /* double value */
        zend_refcounted  *counted;
        zend_string      *str;
        zend_array       *arr;
        zend_object      *obj;
        zend_resource    *res;
        zend_reference   *ref;
        zend_ast_ref     *ast;
        zval             *zv;
        void             *ptr;
        zend_class_entry *ce;
        zend_function    *func;
        struct {
            uint32_t w1;
            uint32_t w2;
        } ww;
    } zend_value;
    
    struct _zval_struct {
        zend_value        value;            /* value */
        union {
            struct {
                ZEND_ENDIAN_LOHI_3(
                    zend_uchar    type,         /* active type */
                    zend_uchar    type_flags,
                    union {
                        uint16_t  call_info;    /* call info for EX(This) */
                        uint16_t  extra;        /* not further specified */
                    } u)
            } v;
            uint32_t type_info;
        } u1;
        union {
            uint32_t     next;                 /* hash collision chain */
            uint32_t     cache_slot;           /* cache slot (for RECV_INIT) */
            uint32_t     opline_num;           /* opline number (for FAST_CALL) */
            uint32_t     lineno;               /* line number (for ast nodes) */
            uint32_t     num_args;             /* arguments number for EX(This) */
            uint32_t     fe_pos;               /* foreach position */
            uint32_t     fe_iter_idx;          /* foreach iterator index */
            uint32_t     access_flags;         /* class constant access flags */
            uint32_t     property_guard;       /* single property guard */
            uint32_t     constant_flags;       /* constant flags */
            uint32_t     extra;                /* not further specified */
        } u2;
    };
    
    
    /* regular data types */
    #define IS_UNDEF                    0
    #define IS_NULL                     1
    #define IS_FALSE                    2
    #define IS_TRUE                     3
    #define IS_LONG                     4
    #define IS_DOUBLE                   5
    #define IS_STRING                   6
    #define IS_ARRAY                    7
    #define IS_OBJECT                   8
    #define IS_RESOURCE                 9
    #define IS_REFERENCE                10
    
    /* constant expressions */
    #define IS_CONSTANT_AST             11
    
    /* internal types */
    #define IS_INDIRECT                 13
    #define IS_PTR                      14
    #define _IS_ERROR                   15
    
    /* fake types used only for type hinting (Z_TYPE(zv) can not use them) */
    #define _IS_BOOL                    16
    #define IS_CALLABLE                 17
    #define IS_ITERABLE                 18
    #define IS_VOID                     19
    #define _IS_NUMBER                  20
    
    typedef struct _Bucket {
        zval              val;
        zend_ulong        h;                /* hash value (or numeric index)   */
        zend_string      *key;              /* string key or NULL for numerics */
    } Bucket;
    
    typedef struct _zend_array HashTable;
    
    struct _zend_array {
        zend_refcounted_h gc;
        union {
            struct {
                ZEND_ENDIAN_LOHI_4(
                    zend_uchar    flags,
                    zend_uchar    _unused,
                    zend_uchar    nIteratorsCount,
                    zend_uchar    _unused2)
            } v;
            uint32_t flags;
        } u;
        uint32_t          nTableMask;
        Bucket           *arData;
        uint32_t          nNumUsed;
        uint32_t          nNumOfElements;
        uint32_t          nTableSize;
        uint32_t          nInternalPointer;
        zend_long         nNextFreeElement;
        dtor_func_t       pDestructor;
    };
    

    2、常见宏

    #define HT_MIN_MASK ((uint32_t) -2)
    #define HT_MIN_SIZE 8
    #define HT_HASH_EX(data, idx)  ((uint32_t*)(data))[(int32_t)(idx)] 
    #define HT_SET_DATA_ADDR(ht, ptr) do { \
            (ht)->arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); \
        } while (0)
    
    

    3、几个GDB技巧

    #根据Bucket的h值推算槽位  p (int32_t)($54.h|-8) ,$54为Bucket
    p (int32_t)(9223372036854953484|-8)  
    -4
    #根据槽位推算存储位置 p ((uint32_t*)($14.arData))[(int32_t)(-3)]
    p ((uint32_t*)($14.arData))[(int32_t)(-1)]
    1
    #带符号数字转换
    p (int32_t )4294967288
     -8
    #查看-8的二进制
    p /t -8
    11111111111111111111111111111000
    

    4、HashTable的API

    a、初始化

    static zend_always_inline void _zend_hash_init_int(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent)
    {
        GC_SET_REFCOUNT(ht, 1);
        GC_TYPE_INFO(ht) = IS_ARRAY | (persistent ? (GC_PERSISTENT << GC_FLAGS_SHIFT) : (GC_COLLECTABLE << GC_FLAGS_SHIFT));
        HT_FLAGS(ht) = HASH_FLAG_STATIC_KEYS;
        ht->nTableMask = HT_MIN_MASK;
        HT_SET_DATA_ADDR(ht, &uninitialized_bucket);
        ht->nNumUsed = 0;
        ht->nNumOfElements = 0;
        ht->nInternalPointer = 0;
        ht->nNextFreeElement = 0;
        ht->pDestructor = pDestructor;
        ht->nTableSize = zend_hash_check_size(nSize);
    }
    

    注意宏 HT_SET_DATA_ADDR

    HashTable结构体arData地址获取宏
    #define HT_SET_DATA_ADDR(ht, ptr) do { \  /*ptr为申请内存地址 先转化为char* 再加nTableMask */
            (ht)->arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); \ 
        } while (0)
    static const uint32_t uninitialized_bucket[-HT_MIN_MASK] =
        {HT_INVALID_IDX, HT_INVALID_IDX};
    

    c、查找 zend_hash_find_bucket

    static zend_always_inline Bucket *zend_hash_find_bucket(const HashTable *ht, zend_string *key, zend_bool known_hash) { 
        zend_ulong h; 
        uint32_t nIndex; 
        uint32_t idx; 
        Bucket *p, *arData; 
        if (known_hash) { 
            h = ZSTR_H(key); //直接读取
        } else { 
            h = zend_string_hash_val(key); //计算得到hash
         }
         arData = ht->arData; 
        nIndex = h | ht->nTableMask; //计算搜索的key应该存储到的nIndex
        idx = HT_HASH_EX(arData, nIndex); //实际nIndex存储的idx
        if (UNEXPECTED(idx == HT_INVALID_IDX)) { //如果idx为空 不存在 HT_INVALID_IDX=-1
            return NULL; 
        } 
        p = HT_HASH_TO_BUCKET_EX(arData, idx);//获取idx存储位置对应的Bucket
         if (EXPECTED(p->key == key)) { /* check for the same interned string 即同一个zend_string实例*/ 
            return p; 
        } 
        while (1) { 
            if (p->h == ZSTR_H(key) && EXPECTED(p->key) && zend_string_equal_content(p->key, key)) {//hash及内容比较 
            return p; 
            } 
            idx = Z_NEXT(p->val); 
            if (idx == HT_INVALID_IDX) { //不存在拉链
                return NULL; 
            }
             p = HT_HASH_TO_BUCKET_EX(arData, idx); 
            if (p->key == key) { /* check for the same interned string */ 
                return p;
             }
         }
     }
    

    d、扩容

        static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht) { 
        IS_CONSISTENT(ht); 
        HT_ASSERT_RC1(ht);
        if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { //已使用的Bucket > 有效Bucket+(有效Bucket /32)
            /* additional term is there to amortize the cost of compaction */ 
            zend_hash_rehash(ht); 
        } else if (ht->nTableSize < HT_MAX_SIZE) {// 小于最大数组限制
            /* Let's double the table size */ 
            void *new_data, *old_data = HT_GET_DATA_ADDR(ht); 
            uint32_t nSize = ht->nTableSize + ht->nTableSize; 
            Bucket *old_buckets = ht->arData; 
            ht->nTableSize = nSize; 
            new_data = pemalloc(HT_SIZE_EX(nSize, HT_SIZE_TO_MASK(nSize)), GC_FLAGS(ht) & IS_ARRAY_PERSISTENT); 
            ht->nTableMask = HT_SIZE_TO_MASK(ht->nTableSize); 
            HT_SET_DATA_ADDR(ht, new_data); //设置新arData地址
            memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed); //直接内存拷贝
            pefree(old_data, GC_FLAGS(ht) & IS_ARRAY_PERSISTENT); //释放原内存
            zend_hash_rehash(ht); //rehash
        } else { 
            zend_error_noreturn(E_ERROR, “Possible integer overflow in memory allocation (%u * %zu + %zu)”, ht->nTableSize * 2, \
                sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket));
         } 
    }
    
    

    e、Hash冲突

    static zend_always_inline void _zend_hash_append_ind(HashTable *ht, zend_string *key, zval *ptr) { 
        uint32_t idx = ht->nNumUsed++; 
        uint32_t nIndex; 
        Bucket *p = ht->arData + idx; //获取追加新位置
        ZVAL_INDIRECT(&p->val, ptr); 
        if (!ZSTR_IS_INTERNED(key)) { 
            HT_FLAGS(ht) &= ~HASH_FLAG_STATIC_KEYS; 
            zend_string_addref(key); 
            zend_string_hash_val(key); 
        }
        p->key = key; 
        p->h = ZSTR_H(key); 
        nIndex = (uint32_t)p->h | ht->nTableMask; 
        Z_NEXT(p->val) = HT_HASH(ht, nIndex); //将原存idx存储到p.u2.next
        HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx); //将新idx存储到nIndex
        ht->nNumOfElements++; 
    }
    
    #define Z_NEXT(zval) (zval).u2.next
    #define HT_HASH_EX(data, idx) \
         ((uint32_t*)(data))[(int32_t)(idx)] 
    #define HT_HASH(ht, idx) \ 
        HT_HASH_EX((ht)->arData, idx)
    # define HT_IDX_TO_HASH(idx) \ 
        (idx)
    
    

    f、rehash

    ZEND_API int ZEND_FASTCALL zend_hash_rehash(HashTable *ht)
    {
        Bucket *p;
        uint32_t nIndex, i;
    
        IS_CONSISTENT(ht);
    
        if (UNEXPECTED(ht->nNumOfElements == 0)) {//空数组
            if (HT_FLAGS(ht) & HASH_FLAG_INITIALIZED) {//如果已初始化
                ht->nNumUsed = 0;
                HT_HASH_RESET(ht);
            }
            return SUCCESS;
        }
    
        HT_HASH_RESET(ht);
        i = 0;
        p = ht->arData;
        if (HT_IS_WITHOUT_HOLES(ht)) {//不存在删除元素
            do {
                nIndex = p->h | ht->nTableMask;//逐个计算索引位置
                Z_NEXT(p->val) = HT_HASH(ht, nIndex);//原索引位置存储的槽位到当前Bucket的next
                HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);//将现Bucket的槽位存储到索引空间
                p++;
            } while (++i < ht->nNumUsed);
        } else {//存在已删除情况
            uint32_t old_num_used = ht->nNumUsed;
            do {
                if (UNEXPECTED(Z_TYPE(p->val) == IS_UNDEF)) {//已删除元素
                    uint32_t j = i;
                    Bucket *q = p;
    
                    if (EXPECTED(!HT_HAS_ITERATORS(ht))) {
                        while (++i < ht->nNumUsed) {
                            p++;//下一个Bucket
                            if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {//正常Bucket
                                ZVAL_COPY_VALUE(&q->val, &p->val);//拷贝
                                q->h = p->h;
                                nIndex = q->h | ht->nTableMask;
                                q->key = p->key;
                                Z_NEXT(q->val) = HT_HASH(ht, nIndex);
                                HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
                                if (UNEXPECTED(ht->nInternalPointer == i)) {
                                    ht->nInternalPointer = j;
                                }
                                q++;
                                j++;
                            }
                        }
                    } else {
                        uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, 0);
    
                        while (++i < ht->nNumUsed) {
                            p++;
                            if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {
                                ZVAL_COPY_VALUE(&q->val, &p->val);
                                q->h = p->h;
                                nIndex = q->h | ht->nTableMask;
                                q->key = p->key;
                                Z_NEXT(q->val) = HT_HASH(ht, nIndex);
                                HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
                                if (UNEXPECTED(ht->nInternalPointer == i)) {
                                    ht->nInternalPointer = j;
                                }
                                if (UNEXPECTED(i >= iter_pos)) {
                                    do {
                                        zend_hash_iterators_update(ht, iter_pos, j);
                                        iter_pos = zend_hash_iterators_lower_pos(ht, iter_pos + 1);
                                    } while (iter_pos < i);
                                }
                                q++;
                                j++;
                            }
                        }
                    }
                    ht->nNumUsed = j;
                    break;
                }
                nIndex = p->h | ht->nTableMask;
                Z_NEXT(p->val) = HT_HASH(ht, nIndex);
                HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);
                p++;
            } while (++i < ht->nNumUsed);
    
            /* Migrate pointer to one past the end of the array to the new one past the end, so that
             * newly inserted elements are picked up correctly. */
            if (UNEXPECTED(HT_HAS_ITERATORS(ht))) {
                _zend_hash_iterators_update(ht, old_num_used, ht->nNumUsed);
            }
        }
        return SUCCESS;
    }
    

    二、实战

    1、脚本ht.php

    <?php
    $ar = ['a'=>'abc','b'=>'bcd','c'=>'cde','d'=>'def','e'=>'fgh','q'=>'ghi','i'=>'gkl','y'=>'glz'];
    var_export($ar);
    unset($ar['q']);
    var_export($ar);
    $ar['g'] = 'glz';
    var_export($ar);
    $ar['h'] = 'hello';
    var_export($ar);
    

    2、GDB运行,执行如下

    b zend_execute
    r ht.php
    p op_array
    p *op_array
    p $2.opcodes
    p *$2.opcodes
    p $3+1
    p *$5
    ...
    p $3+10
    p *$23
    

    3、获取opcode_handler如下,附猜测可能对应的php代码:

    1、ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER  $ar = ['a'=>'abc','b'=>'bcd','c'=>'cde','d'=>'def','e'=>'fgh','q'=>'ghi','i'=>'gkl','y'=>'glz'];
    2、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
    3、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
    4、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
    5、ZEND_UNSET_DIM_SPEC_CV_CONST_HANDLER unset
    6、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
    7、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
    8、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
    9、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER $ar['g'] = 'glz';
    10、ZEND_NULL_HANDLER
    11、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
    12、ZEND_SEND_VAR_SPEC_CV_HANDLER  $ar
    13、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
    14、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER $ar['h'] = 'hello';
    15、ZEND_NULL_HANDLER
    16、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
    17、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
    18、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
    19、ZEND_RETURN_SPEC_CONST_HANDLER 1
    

    4、打若干断点,以便查看不同阶段HashTable内容

    (gdb) b ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER #value
    (gdb) b ZEND_SEND_VAR_SPEC_CV_HANDLER #varptr
    (gdb) b ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER # 执行到最后p object_ptr
    

    5、ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER

    39441       value = EX_CONSTANT(opline->op2);
    n
    p value
    p *value
    p $28.value.arr
    p *$28.value.arr
    
    #$30 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
    #   flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5ea20, nNumUsed = 8, nNumOfElements = 8, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0, 
    #  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
    
    p $30.arData
    p *$30.arData
    ...
    p $31+7
    p *$43
    #查看next的具体指
    p (int32_t)4294967295 # -1
    #查看key值
    p *$32.key.val@1
    #根据h查看槽位
    p (int32_t)($32.h|-8)
    #根据槽位查看arDataIdex
    p ((uint32_t*)($30.arData))[(int32_t)(-1)]
    ##观察next和key
    arDataIdx      next  key  h                      h|-8 
    0                -1    a  9223372036854953478   -2
    1                -1    b  9223372036854953479   -1
    2                -1    c  9223372036854953480   -8
    3                -1    d  9223372036854953481   -7
    4                -1    e  9223372036854953482   -6
    5                0     q  9223372036854953494   -2
    6                5     i  9223372036854953486   -2
    7                6     y  9223372036854953502   -2
    ##查看-2槽位
    p ((uint32_t*)($30.arData))[(int32_t)(-2)] # 7
    ##观察个槽位存储情况
    slot  idx
    -1    1
    -2    7
    -3    -1#p (int32_t)4294967295 -> -1
    -4    -1
    -5    -1
    -6    4
    -7    3
    -8    2
    #总结:拉链在-2槽位形成 7->6->5->0
    

    6、ZEND_SEND_VAR_SPEC_CV_HANDLER

    p varptr
    p *varptr.value.arr
    #$27 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
    #    flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5eb60, nNumUsed = 8, nNumOfElements = 7, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0, 
    #  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
    p $29+5
    next = 0
    p $29+6
    next = 0
    (gdb) p *$30.key.val@1
    $35 = "q"
    (gdb) p *$32.key.val@1
    $36 = "i"
    #总结 拉链:7->6->0, 5已被跳过
    #注意5的内容:type=0 如下:
    $31 = {val = {value = {lval = 140737314303296, dval = 6.9533472085220443e-310, counted = 0x7ffff5a02d40, str = 0x7ffff5a02d40, arr = 0x7ffff5a02d40, obj = 0x7ffff5a02d40, res = 0x7ffff5a02d40, 
          ref = 0x7ffff5a02d40, ast = 0x7ffff5a02d40, zv = 0x7ffff5a02d40, ptr = 0x7ffff5a02d40, ce = 0x7ffff5a02d40, func = 0x7ffff5a02d40, ww = {w1 = 4120915264, w2 = 32767}}, u1 = {v = {type = 0 '\000', 
            type_flags = 0 '\000', const_flags = 0 '\000', reserved = 0 '\000'}, type_info = 0}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, 
          property_guard = 0}}, h = 9223372036854953494, key = 0x7ffff5a02d00}
    

    7、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER

    # value = zend_assign_to_variable(variable_ptr, value, IS_CONST);
    p  *object_ptr
    p *$47.value.arr
    #$48 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
    #    flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5eb60, nNumUsed = 8, nNumOfElements = 8, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0, 
    #  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
    p $57+7
    p *$64.key.val@1 # g
    ##总结:HashTable被rehash
    

    8、最后一个 ZEND_SEND_VAR_SPEC_CV_HANDLER

    p *varptr
    #$11 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
    #    flags = 10}, nTableMask = 4294967280, arData = 0x7ffff5a67540, nNumUsed = 9, nNumOfElements = 9, nTableSize = 16, nInternalPointer = 0, nNextFreeElement = 0, 
    #  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
    # p (int32_t)4294967280 -> -16
    #总结:数组进行了扩容
    

    相关文章

      网友评论

          本文标题:PHP数组源码解析与GDB实战分析

          本文链接:https://www.haomeiwen.com/subject/jfunuctx.html