美文网首页
PHP数组源码解析与GDB实战分析

PHP数组源码解析与GDB实战分析

作者: Atman666 | 来源:发表于2019-09-26 21:50 被阅读0次

一、源码

一、PHP数组源码初步分析
1、基本类型定义(php7.3.9)

typedef union _zend_value {
    zend_long         lval;             /* long value */
    double            dval;             /* double value */
    zend_refcounted  *counted;
    zend_string      *str;
    zend_array       *arr;
    zend_object      *obj;
    zend_resource    *res;
    zend_reference   *ref;
    zend_ast_ref     *ast;
    zval             *zv;
    void             *ptr;
    zend_class_entry *ce;
    zend_function    *func;
    struct {
        uint32_t w1;
        uint32_t w2;
    } ww;
} zend_value;

struct _zval_struct {
    zend_value        value;            /* value */
    union {
        struct {
            ZEND_ENDIAN_LOHI_3(
                zend_uchar    type,         /* active type */
                zend_uchar    type_flags,
                union {
                    uint16_t  call_info;    /* call info for EX(This) */
                    uint16_t  extra;        /* not further specified */
                } u)
        } v;
        uint32_t type_info;
    } u1;
    union {
        uint32_t     next;                 /* hash collision chain */
        uint32_t     cache_slot;           /* cache slot (for RECV_INIT) */
        uint32_t     opline_num;           /* opline number (for FAST_CALL) */
        uint32_t     lineno;               /* line number (for ast nodes) */
        uint32_t     num_args;             /* arguments number for EX(This) */
        uint32_t     fe_pos;               /* foreach position */
        uint32_t     fe_iter_idx;          /* foreach iterator index */
        uint32_t     access_flags;         /* class constant access flags */
        uint32_t     property_guard;       /* single property guard */
        uint32_t     constant_flags;       /* constant flags */
        uint32_t     extra;                /* not further specified */
    } u2;
};


/* regular data types */
#define IS_UNDEF                    0
#define IS_NULL                     1
#define IS_FALSE                    2
#define IS_TRUE                     3
#define IS_LONG                     4
#define IS_DOUBLE                   5
#define IS_STRING                   6
#define IS_ARRAY                    7
#define IS_OBJECT                   8
#define IS_RESOURCE                 9
#define IS_REFERENCE                10

/* constant expressions */
#define IS_CONSTANT_AST             11

/* internal types */
#define IS_INDIRECT                 13
#define IS_PTR                      14
#define _IS_ERROR                   15

/* fake types used only for type hinting (Z_TYPE(zv) can not use them) */
#define _IS_BOOL                    16
#define IS_CALLABLE                 17
#define IS_ITERABLE                 18
#define IS_VOID                     19
#define _IS_NUMBER                  20

typedef struct _Bucket {
    zval              val;
    zend_ulong        h;                /* hash value (or numeric index)   */
    zend_string      *key;              /* string key or NULL for numerics */
} Bucket;

typedef struct _zend_array HashTable;

struct _zend_array {
    zend_refcounted_h gc;
    union {
        struct {
            ZEND_ENDIAN_LOHI_4(
                zend_uchar    flags,
                zend_uchar    _unused,
                zend_uchar    nIteratorsCount,
                zend_uchar    _unused2)
        } v;
        uint32_t flags;
    } u;
    uint32_t          nTableMask;
    Bucket           *arData;
    uint32_t          nNumUsed;
    uint32_t          nNumOfElements;
    uint32_t          nTableSize;
    uint32_t          nInternalPointer;
    zend_long         nNextFreeElement;
    dtor_func_t       pDestructor;
};

2、常见宏

#define HT_MIN_MASK ((uint32_t) -2)
#define HT_MIN_SIZE 8
#define HT_HASH_EX(data, idx)  ((uint32_t*)(data))[(int32_t)(idx)] 
#define HT_SET_DATA_ADDR(ht, ptr) do { \
        (ht)->arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); \
    } while (0)

3、几个GDB技巧

#根据Bucket的h值推算槽位  p (int32_t)($54.h|-8) ,$54为Bucket
p (int32_t)(9223372036854953484|-8)  
-4
#根据槽位推算存储位置 p ((uint32_t*)($14.arData))[(int32_t)(-3)]
p ((uint32_t*)($14.arData))[(int32_t)(-1)]
1
#带符号数字转换
p (int32_t )4294967288
 -8
#查看-8的二进制
p /t -8
11111111111111111111111111111000

4、HashTable的API

a、初始化

static zend_always_inline void _zend_hash_init_int(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent)
{
    GC_SET_REFCOUNT(ht, 1);
    GC_TYPE_INFO(ht) = IS_ARRAY | (persistent ? (GC_PERSISTENT << GC_FLAGS_SHIFT) : (GC_COLLECTABLE << GC_FLAGS_SHIFT));
    HT_FLAGS(ht) = HASH_FLAG_STATIC_KEYS;
    ht->nTableMask = HT_MIN_MASK;
    HT_SET_DATA_ADDR(ht, &uninitialized_bucket);
    ht->nNumUsed = 0;
    ht->nNumOfElements = 0;
    ht->nInternalPointer = 0;
    ht->nNextFreeElement = 0;
    ht->pDestructor = pDestructor;
    ht->nTableSize = zend_hash_check_size(nSize);
}

注意宏 HT_SET_DATA_ADDR

HashTable结构体arData地址获取宏
#define HT_SET_DATA_ADDR(ht, ptr) do { \  /*ptr为申请内存地址 先转化为char* 再加nTableMask */
        (ht)->arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); \ 
    } while (0)
static const uint32_t uninitialized_bucket[-HT_MIN_MASK] =
    {HT_INVALID_IDX, HT_INVALID_IDX};

c、查找 zend_hash_find_bucket

static zend_always_inline Bucket *zend_hash_find_bucket(const HashTable *ht, zend_string *key, zend_bool known_hash) { 
    zend_ulong h; 
    uint32_t nIndex; 
    uint32_t idx; 
    Bucket *p, *arData; 
    if (known_hash) { 
        h = ZSTR_H(key); //直接读取
    } else { 
        h = zend_string_hash_val(key); //计算得到hash
     }
     arData = ht->arData; 
    nIndex = h | ht->nTableMask; //计算搜索的key应该存储到的nIndex
    idx = HT_HASH_EX(arData, nIndex); //实际nIndex存储的idx
    if (UNEXPECTED(idx == HT_INVALID_IDX)) { //如果idx为空 不存在 HT_INVALID_IDX=-1
        return NULL; 
    } 
    p = HT_HASH_TO_BUCKET_EX(arData, idx);//获取idx存储位置对应的Bucket
     if (EXPECTED(p->key == key)) { /* check for the same interned string 即同一个zend_string实例*/ 
        return p; 
    } 
    while (1) { 
        if (p->h == ZSTR_H(key) && EXPECTED(p->key) && zend_string_equal_content(p->key, key)) {//hash及内容比较 
        return p; 
        } 
        idx = Z_NEXT(p->val); 
        if (idx == HT_INVALID_IDX) { //不存在拉链
            return NULL; 
        }
         p = HT_HASH_TO_BUCKET_EX(arData, idx); 
        if (p->key == key) { /* check for the same interned string */ 
            return p;
         }
     }
 }

d、扩容

    static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht) { 
    IS_CONSISTENT(ht); 
    HT_ASSERT_RC1(ht);
    if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { //已使用的Bucket > 有效Bucket+(有效Bucket /32)
        /* additional term is there to amortize the cost of compaction */ 
        zend_hash_rehash(ht); 
    } else if (ht->nTableSize < HT_MAX_SIZE) {// 小于最大数组限制
        /* Let's double the table size */ 
        void *new_data, *old_data = HT_GET_DATA_ADDR(ht); 
        uint32_t nSize = ht->nTableSize + ht->nTableSize; 
        Bucket *old_buckets = ht->arData; 
        ht->nTableSize = nSize; 
        new_data = pemalloc(HT_SIZE_EX(nSize, HT_SIZE_TO_MASK(nSize)), GC_FLAGS(ht) & IS_ARRAY_PERSISTENT); 
        ht->nTableMask = HT_SIZE_TO_MASK(ht->nTableSize); 
        HT_SET_DATA_ADDR(ht, new_data); //设置新arData地址
        memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed); //直接内存拷贝
        pefree(old_data, GC_FLAGS(ht) & IS_ARRAY_PERSISTENT); //释放原内存
        zend_hash_rehash(ht); //rehash
    } else { 
        zend_error_noreturn(E_ERROR, “Possible integer overflow in memory allocation (%u * %zu + %zu)”, ht->nTableSize * 2, \
            sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket));
     } 
}

e、Hash冲突

static zend_always_inline void _zend_hash_append_ind(HashTable *ht, zend_string *key, zval *ptr) { 
    uint32_t idx = ht->nNumUsed++; 
    uint32_t nIndex; 
    Bucket *p = ht->arData + idx; //获取追加新位置
    ZVAL_INDIRECT(&p->val, ptr); 
    if (!ZSTR_IS_INTERNED(key)) { 
        HT_FLAGS(ht) &= ~HASH_FLAG_STATIC_KEYS; 
        zend_string_addref(key); 
        zend_string_hash_val(key); 
    }
    p->key = key; 
    p->h = ZSTR_H(key); 
    nIndex = (uint32_t)p->h | ht->nTableMask; 
    Z_NEXT(p->val) = HT_HASH(ht, nIndex); //将原存idx存储到p.u2.next
    HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx); //将新idx存储到nIndex
    ht->nNumOfElements++; 
}

#define Z_NEXT(zval) (zval).u2.next
#define HT_HASH_EX(data, idx) \
     ((uint32_t*)(data))[(int32_t)(idx)] 
#define HT_HASH(ht, idx) \ 
    HT_HASH_EX((ht)->arData, idx)
# define HT_IDX_TO_HASH(idx) \ 
    (idx)

f、rehash

ZEND_API int ZEND_FASTCALL zend_hash_rehash(HashTable *ht)
{
    Bucket *p;
    uint32_t nIndex, i;

    IS_CONSISTENT(ht);

    if (UNEXPECTED(ht->nNumOfElements == 0)) {//空数组
        if (HT_FLAGS(ht) & HASH_FLAG_INITIALIZED) {//如果已初始化
            ht->nNumUsed = 0;
            HT_HASH_RESET(ht);
        }
        return SUCCESS;
    }

    HT_HASH_RESET(ht);
    i = 0;
    p = ht->arData;
    if (HT_IS_WITHOUT_HOLES(ht)) {//不存在删除元素
        do {
            nIndex = p->h | ht->nTableMask;//逐个计算索引位置
            Z_NEXT(p->val) = HT_HASH(ht, nIndex);//原索引位置存储的槽位到当前Bucket的next
            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);//将现Bucket的槽位存储到索引空间
            p++;
        } while (++i < ht->nNumUsed);
    } else {//存在已删除情况
        uint32_t old_num_used = ht->nNumUsed;
        do {
            if (UNEXPECTED(Z_TYPE(p->val) == IS_UNDEF)) {//已删除元素
                uint32_t j = i;
                Bucket *q = p;

                if (EXPECTED(!HT_HAS_ITERATORS(ht))) {
                    while (++i < ht->nNumUsed) {
                        p++;//下一个Bucket
                        if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {//正常Bucket
                            ZVAL_COPY_VALUE(&q->val, &p->val);//拷贝
                            q->h = p->h;
                            nIndex = q->h | ht->nTableMask;
                            q->key = p->key;
                            Z_NEXT(q->val) = HT_HASH(ht, nIndex);
                            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
                            if (UNEXPECTED(ht->nInternalPointer == i)) {
                                ht->nInternalPointer = j;
                            }
                            q++;
                            j++;
                        }
                    }
                } else {
                    uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, 0);

                    while (++i < ht->nNumUsed) {
                        p++;
                        if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {
                            ZVAL_COPY_VALUE(&q->val, &p->val);
                            q->h = p->h;
                            nIndex = q->h | ht->nTableMask;
                            q->key = p->key;
                            Z_NEXT(q->val) = HT_HASH(ht, nIndex);
                            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
                            if (UNEXPECTED(ht->nInternalPointer == i)) {
                                ht->nInternalPointer = j;
                            }
                            if (UNEXPECTED(i >= iter_pos)) {
                                do {
                                    zend_hash_iterators_update(ht, iter_pos, j);
                                    iter_pos = zend_hash_iterators_lower_pos(ht, iter_pos + 1);
                                } while (iter_pos < i);
                            }
                            q++;
                            j++;
                        }
                    }
                }
                ht->nNumUsed = j;
                break;
            }
            nIndex = p->h | ht->nTableMask;
            Z_NEXT(p->val) = HT_HASH(ht, nIndex);
            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);
            p++;
        } while (++i < ht->nNumUsed);

        /* Migrate pointer to one past the end of the array to the new one past the end, so that
         * newly inserted elements are picked up correctly. */
        if (UNEXPECTED(HT_HAS_ITERATORS(ht))) {
            _zend_hash_iterators_update(ht, old_num_used, ht->nNumUsed);
        }
    }
    return SUCCESS;
}

二、实战

1、脚本ht.php

<?php
$ar = ['a'=>'abc','b'=>'bcd','c'=>'cde','d'=>'def','e'=>'fgh','q'=>'ghi','i'=>'gkl','y'=>'glz'];
var_export($ar);
unset($ar['q']);
var_export($ar);
$ar['g'] = 'glz';
var_export($ar);
$ar['h'] = 'hello';
var_export($ar);

2、GDB运行,执行如下

b zend_execute
r ht.php
p op_array
p *op_array
p $2.opcodes
p *$2.opcodes
p $3+1
p *$5
...
p $3+10
p *$23

3、获取opcode_handler如下,附猜测可能对应的php代码:

1、ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER  $ar = ['a'=>'abc','b'=>'bcd','c'=>'cde','d'=>'def','e'=>'fgh','q'=>'ghi','i'=>'gkl','y'=>'glz'];
2、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
3、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
4、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
5、ZEND_UNSET_DIM_SPEC_CV_CONST_HANDLER unset
6、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
7、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
8、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
9、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER $ar['g'] = 'glz';
10、ZEND_NULL_HANDLER
11、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
12、ZEND_SEND_VAR_SPEC_CV_HANDLER  $ar
13、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
14、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER $ar['h'] = 'hello';
15、ZEND_NULL_HANDLER
16、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
17、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
18、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
19、ZEND_RETURN_SPEC_CONST_HANDLER 1

4、打若干断点,以便查看不同阶段HashTable内容

(gdb) b ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER #value
(gdb) b ZEND_SEND_VAR_SPEC_CV_HANDLER #varptr
(gdb) b ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER # 执行到最后p object_ptr

5、ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER

39441       value = EX_CONSTANT(opline->op2);
n
p value
p *value
p $28.value.arr
p *$28.value.arr

#$30 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
#   flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5ea20, nNumUsed = 8, nNumOfElements = 8, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0, 
#  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}

p $30.arData
p *$30.arData
...
p $31+7
p *$43
#查看next的具体指
p (int32_t)4294967295 # -1
#查看key值
p *$32.key.val@1
#根据h查看槽位
p (int32_t)($32.h|-8)
#根据槽位查看arDataIdex
p ((uint32_t*)($30.arData))[(int32_t)(-1)]
##观察next和key
arDataIdx      next  key  h                      h|-8 
0                -1    a  9223372036854953478   -2
1                -1    b  9223372036854953479   -1
2                -1    c  9223372036854953480   -8
3                -1    d  9223372036854953481   -7
4                -1    e  9223372036854953482   -6
5                0     q  9223372036854953494   -2
6                5     i  9223372036854953486   -2
7                6     y  9223372036854953502   -2
##查看-2槽位
p ((uint32_t*)($30.arData))[(int32_t)(-2)] # 7
##观察个槽位存储情况
slot  idx
-1    1
-2    7
-3    -1#p (int32_t)4294967295 -> -1
-4    -1
-5    -1
-6    4
-7    3
-8    2
#总结:拉链在-2槽位形成 7->6->5->0

6、ZEND_SEND_VAR_SPEC_CV_HANDLER

p varptr
p *varptr.value.arr
#$27 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
#    flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5eb60, nNumUsed = 8, nNumOfElements = 7, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0, 
#  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
p $29+5
next = 0
p $29+6
next = 0
(gdb) p *$30.key.val@1
$35 = "q"
(gdb) p *$32.key.val@1
$36 = "i"
#总结 拉链:7->6->0, 5已被跳过
#注意5的内容:type=0 如下:
$31 = {val = {value = {lval = 140737314303296, dval = 6.9533472085220443e-310, counted = 0x7ffff5a02d40, str = 0x7ffff5a02d40, arr = 0x7ffff5a02d40, obj = 0x7ffff5a02d40, res = 0x7ffff5a02d40, 
      ref = 0x7ffff5a02d40, ast = 0x7ffff5a02d40, zv = 0x7ffff5a02d40, ptr = 0x7ffff5a02d40, ce = 0x7ffff5a02d40, func = 0x7ffff5a02d40, ww = {w1 = 4120915264, w2 = 32767}}, u1 = {v = {type = 0 '\000', 
        type_flags = 0 '\000', const_flags = 0 '\000', reserved = 0 '\000'}, type_info = 0}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, 
      property_guard = 0}}, h = 9223372036854953494, key = 0x7ffff5a02d00}

7、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER

# value = zend_assign_to_variable(variable_ptr, value, IS_CONST);
p  *object_ptr
p *$47.value.arr
#$48 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
#    flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5eb60, nNumUsed = 8, nNumOfElements = 8, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0, 
#  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
p $57+7
p *$64.key.val@1 # g
##总结:HashTable被rehash

8、最后一个 ZEND_SEND_VAR_SPEC_CV_HANDLER

p *varptr
#$11 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'}, 
#    flags = 10}, nTableMask = 4294967280, arData = 0x7ffff5a67540, nNumUsed = 9, nNumOfElements = 9, nTableSize = 16, nInternalPointer = 0, nNextFreeElement = 0, 
#  pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
# p (int32_t)4294967280 -> -16
#总结:数组进行了扩容

相关文章

网友评论

      本文标题:PHP数组源码解析与GDB实战分析

      本文链接:https://www.haomeiwen.com/subject/jfunuctx.html