背景
前面我们在分析ios类结构过程,在oc类结构那点事(四)中我们了解到class有几个关键属性,其中bits信息已经分析过了,它里面存储了类的属性、方法、协议、ivars列表等信息,superclass就不用说了,那cache是用来干什么呢的,今天就来深入研究一下吧
一、cache中有什么
先看一下cache_t的定义:
//先看一下相关定义
#define CACHE_MASK_STORAGE_OUTLINED 1
#define CACHE_MASK_STORAGE_HIGH_16 2
#define CACHE_MASK_STORAGE_LOW_4 3
//arm64构架,并且是64位系统
#if defined(__arm64__) && __LP64__
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16
//arm64构架,但是非64位系统
#elif defined(__arm64__) && !__LP64__
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_LOW_4
#else
//除了上面两种情况,如mac系统
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_OUTLINED
#endif
struct cache_t {
//所以根据不同的情况cache_t中定义的属性也不一致,这里是mac系统
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
explicit_atomic<struct bucket_t *> _buckets;
explicit_atomic<mask_t> _mask;
//这里是64位真机
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
// How much the mask is shifted by.
static constexpr uintptr_t maskShift = 48;
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
// Ensure we have enough bits for the buckets pointer.
static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
//这里是非64位真机
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
// _maskAndBuckets stores the mask shift in the low 4 bits, and
// the buckets pointer in the remainder of the value. The mask
// shift is the value where (0xffff >> shift) produces the correct
// mask. This is equal to 16 - log2(cache_size).
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
static constexpr uintptr_t maskBits = 4;
static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
#if __LP64__
uint16_t _flags;
#endif
uint16_t _occupied;
...
}
这里说明一下为什么在mac上是定义成
_buckets
和_mask
,而在手机上是_maskAndBuckets
,这是苹果为了节约内存,把两个属性合并成一个了,如果是2个属性需要点击8+4=12字节,但放在一个属性上只需要占用8字节,这里跟isa结构的设计思路一样。
当然看到这里还是无法确定cache是干什么的,所以继续往下看,在上面的定义中不管在什么架构下都有buckets这个东西,并且它都跟bucket_t
有关,可以先看一下它的定义:
truct bucket_t {
private:
// IMP-first is better for arm64e ptrauth and no worse for arm64.
// SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
#endif
}
所以这里不就是SEL和IMP的存储吗,那就好理解了,cache中存储了方法编号以及其实现,写个demo验证一下:
//定义一个LGPerson类声明并实现几个方法
@interface LGPerson : NSObject
- (void)testFunc1;
- (void)testFunc2;
- (void)testFunc3;
- (void)testFunc4;
- (void)testFunc5;
- (void)testFunc6;
- (void)testFunc7;
//在main方法中先不调用任何方法
- int main(int argc, const char * argv[]) {
@autoreleasepool {
// insert code here...
//在此处断点
LGPerson *p = [LGPerson alloc];
}
return 0;
}
//lldb调试,先查看LGPerson类的地址
(lldb) p/x LGPerson.class
(Class) $0 = 0x0000000100008358 LGPerson
//偏移16字节并强转,前面分析过类的存储结构,isa和superclass各8字节,cache紧跟它们后面
(lldb) p/x (cache_t *)(0x0000000100008358 + 0x10)
(cache_t *) $1 = 0x0000000100008368
//打印cache_t中信息
(lldb) p *$1
(cache_t) $2 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x0000000100346430
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 0
}
}
_flags = 32804
_occupied = 0
}
//获取其中buckets列表
(lldb) p $2.buckets()
(bucket_t *) $3 = 0x0000000100346430
(lldb) p *$3
(bucket_t) $4 = {
_sel = {
std::__1::atomic<objc_selector *> = (null) {
Value = (null)
}
}
_imp = {
std::__1::atomic<unsigned long> = {
Value = 0
}
}
}
至此看到_occupied
为0,下面打印的sel和imp都是默认值。继续调用[p testFunc1];
方法后调试信息如下:
//再次查看cache_t中信息,发现_occupied变成了1,mask和buckets也变了
(lldb) p *$1
(cache_t) $6 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x0000000100793160
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 3
}
}
_flags = 32804
_occupied = 1
}
//取出buckets列表,what,sel和imp还是默认值null和0
(lldb) p $6.buckets()
(bucket_t *) $7 = 0x0000000100793160
(lldb) p *$7
(bucket_t) $8 = {
_sel = {
std::__1::atomic<objc_selector *> = (null) {
Value = (null)
}
}
_imp = {
std::__1::atomic<unsigned long> = {
Value = 0
}
}
}
//既然_buckets是个指针,上面直接*$7只是打印了首地址的信息,后面的其他信息如何访问呢,假设_buckets指向的是一块连续多个大小为bucket_t的内存区域,所以可以通过指针偏移的方法访问,后面再来分析为什么可以这样做
(lldb) p $7[0].sel()
(SEL) $9 = <no value available>
(lldb) p $7[1].sel()
(SEL) $10 = <no value available>
//在第3块bucket_t中看到了我们调用的testFunc1
(lldb) p $7[2].sel()
(SEL) $11 = "testFunc1"
所以cache中存储和sel和imp并不是按顺序存储的
二、cache的实现
我们再来看多调用几个方法会发生什么:
//main函数中有测试代码如下:
LGPerson *p = [LGPerson alloc];
[p testFunc1];
[p testFunc2];
[p testFunc3];
[p testFunc4];
[p testFunc5];
[p testFunc6];
[p testFunc7];
//先通过指针偏移指向LGPerson类中cache变量
(lldb) p/x LGPerson.class
(Class) $0 = 0x0000000100008358 LGPerson
(lldb) p/x (cache_t *)0x0000000100008368
(cache_t *) $1 = 0x0000000100008368
//什么方法都不调用时,查看cache信息如下
(lldb) p *$1
(cache_t) $2 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x0000000100346430
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 0
}
}
_flags = 32804
_occupied = 0
}
//调用[p testFunc1];方法后查看cache信息如下
(lldb) p *$1
(cache_t) $3 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x0000000101107480
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 3
}
}
_flags = 32804
_occupied = 1
}
//调用[p testFunc2];方法后查看cache信息如下
(lldb) p *$1
(cache_t) $4 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x0000000101107480
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 3
}
}
_flags = 32804
_occupied = 2
}
//调用[p testFunc3];方法后查看cache信息如下
(lldb) p *$1
(cache_t) $3 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x000000010067b4c0
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 7
}
}
_flags = 32804
_occupied = 1
}
//调用[p testFunc4];方法后查看cache信息如下
(lldb) p *$1
(cache_t) $4 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x000000010067b4c0
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 7
}
}
_flags = 32804
_occupied = 2
}
//调用[p testFunc5];方法后查看cache信息如下
(lldb) p *$1
(cache_t) $5 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x000000010067b4c0
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 7
}
}
_flags = 32804
_occupied = 3
}
//调用[p testFunc6];方法后查看cache信息如下
(lldb) p *$1
(cache_t) $6 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x000000010067b4c0
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 7
}
}
_flags = 32804
_occupied = 4
}
//调用[p testFunc7];方法后查看cache信息如下
(lldb) p *$1
(cache_t) $7 = {
_buckets = {
std::__1::atomic<bucket_t *> = {
Value = 0x000000010067b4c0
}
}
_mask = {
std::__1::atomic<unsigned int> = {
Value = 7
}
}
_flags = 32804
_occupied = 5
}
可以发现,_occupied和_mask的值有一定增长规律,那如何分析呢?从源码入手,看有谁可以改变_occupied的值,我们发现cache_t的结构体定义中有个方法incrementOccupied()
,只有它在改变_occupied的值,然后查看该方法使用情况,全局只有一处:
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
...
//1.当一个方法都没有时候occupied为0
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
//INIT_CACHE_SIZE 为 4
if (!capacity) capacity = INIT_CACHE_SIZE;
//2.当cache为空时,申请一块连续的内存空间,大小为4个bucket_t
reallocate(oldCapacity, capacity, /* freeOld */false);
}
//3.当新的occupied+1小于等于总容量的3/4时,什么都不做,直接使用目前的内存区域即可
else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
}
else {
//容量不够,扩容到2倍大小
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
//重新申请内存区域,capacity个bucket_t,并标记要释放老的内存
reallocate(oldCapacity, capacity, true); // 内存 库容完毕
}
//拿到最新申请到的内存区域首地址
bucket_t *b = buckets();
//以sel的内存指针与上capacity-1算出一个hash值,确保这个值一定是小于capacity的
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
do {
//尝试把当前传进来的sel放到index为i的位置下,如果当前下标对应的内存下没有存放sel,则可以把sel和imp放到此处
if (fastpath(b[i].sel() == 0)) {
//此时把_occupied加1
incrementOccupied();
b[i].set<Atomic, Encoded>(sel, imp, cls);
return;
}
//此处是考虑多线程并发的场景,当前sel已经在cache中了
if (b[i].sel() == sel) {
return;
}
//否则尝试往下一个下标对应的内存地址下存放
} while (fastpath((i = cache_next(i, m)) != begin));
cache_t::bad_cache(receiver, (SEL)sel, cls);
}
整个流程已经非常清楚了,cache中存放了sel和imp的实现,初始化容量为4个bucket_t,如果达到总容量的3/4则开始扩容,并且sel和imp的存储的位置是根据sel内存指针计算出的hash值。接下来看一下申请内存的逻辑:
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();
//申请新的内存区域
bucket_t *newBuckets = allocateBuckets(newCapacity);
//初始化_buckets和mask变量
setBucketsAndMask(newBuckets, newCapacity - 1);
if (freeOld) {
//释放老的内存区域
cache_collect_free(oldBuckets, oldCapacity);
}
}
bucket_t *allocateBuckets(mask_t newCapacity)
{
//开启newCapacity大小的内存
bucket_t *newBuckets = (bucket_t *)
calloc(cache_t::bytesForCapacity(newCapacity), 1);
//取出最后一块内存区域
bucket_t *end = cache_t::endMarker(newBuckets, newCapacity);
#if __arm__
end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil);
#else
//把最后一块内存指向新申请内存的首地址,为了构造成循环数组??
end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)newBuckets, nil);
#endif
if (PrintCaches) recordNewCache(newCapacity);
return newBuckets;
}
void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
#ifdef __arm__
mega_barrier();
_buckets.store(newBuckets, memory_order::memory_order_relaxed);
mega_barrier();
_mask.store(newMask, memory_order::memory_order_relaxed);
_occupied = 0;
#elif __x86_64__ || i386
_buckets.store(newBuckets, memory_order::memory_order_release);
_mask.store(newMask, memory_order::memory_order_release);
//此处需要注意,被置为0了,所以重新申请内存相当于重新初始化了,之前存储的方法信息也丢失了
_occupied = 0;
#else
#error Don't know how to do setBucketsAndMask on this architecture.
#endif
}
其实cache中内存的释放也不是实时的,看一下cache_collect_free
的实现:
static void cache_collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
if (PrintCaches) recordDeadCache(capacity);
//初始化垃圾释放池大小
_garbage_make_room ();
//先记录待释放内存的总大小
garbage_byte_size += cache_t::bytesForCapacity(capacity);
//存储待释放内存的指针地址
garbage_refs[garbage_count++] = data;
//尝试真正的释放
cache_collect(false);
}
static void _garbage_make_room(void)
{
static int first = 1;
if (first)
{
first = 0;
//默认开辟128个指向 bucket_t* 的指针内存
garbage_refs = (bucket_t**)
malloc(INIT_GARBAGE_COUNT * sizeof(void *));
garbage_max = INIT_GARBAGE_COUNT;
}
else if (garbage_count == garbage_max)
{
//当待释放的指针个数达到128个时,扩容到2倍
garbage_refs = (bucket_t**)
realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
garbage_max *= 2;
}
}
void cache_collect(bool collectALot)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
//当内存大小还没有满时,或者不强制释放,直接返回
if (garbage_byte_size < garbage_threshold && !collectALot) {
return;
}
if (!collectALot) {
if (_collecting_in_critical ()) {
if (PrintCaches) {
_objc_inform ("CACHES: not collecting; "
"objc_msgSend in progress");
}
return;
}
}
else {
//检测是否有线程正在访问cache,如果有,则一直等待
while (_collecting_in_critical())
;
}
...
//真正的释放内存
while (garbage_count--) {
auto dead = garbage_refs[garbage_count];
garbage_refs[garbage_count] = nil;
free(dead);
}
garbage_count = 0;
garbage_byte_size = 0;
...
}
最后
通过分析cache底层的代码实现,我们发现苹果在内存和性能这一块确实做了很多,有很多值得开发者借鉴的地方,当然这只是oc底层实现的冰山一脚,还有很多技术点待我们去探究,今天就先到这里吧~
未完待续...
网友评论