在iOS中的cache是一个很重要的东西,其作用是起到缓存的作用;那么你知道它缓存实现的底层原理么?本文就来分析一下cache_t的相关内容。
下面附上iOS类存在的内容代码:
struct objc_class : objc_object {
// Class ISA;
Class superclass; //8+8 = 16 平移16
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits; // class_rw_t * plus custom rr/alloc flags
以下省略;
在上面的代码中,我们可以得知iOS中一个类存在的内容有:ISA,superclass,cache和bits。
我们着重来看一下cache_t的结构。
首先附上cache_t的部分源码:
struct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
explicit_atomic<struct bucket_t *> _buckets;
explicit_atomic<mask_t> _mask;
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
// How much the mask is shifted by.
static constexpr uintptr_t maskShift = 48;
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
// Ensure we have enough bits for the buckets pointer.
static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
// _maskAndBuckets stores the mask shift in the low 4 bits, and
// the buckets pointer in the remainder of the value. The mask
// shift is the value where (0xffff >> shift) produces the correct
// mask. This is equal to 16 - log2(cache_size).
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
static constexpr uintptr_t maskBits = 4;
static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
#if __LP64__
uint16_t _flags;
#endif
uint16_t _occupied;
public:
static bucket_t *emptyBuckets();
struct bucket_t *buckets();
mask_t mask();
mask_t occupied();
void incrementOccupied();
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
void initializeToEmpty();
unsigned capacity();
bool isConstantEmptyCache();
bool canBeFreed();
……
}
在看了cache_t的部分源码之后,我们首先观察_buckets
和_mask
,为什么他们需要将类型外放置一个explicit_atomic
,这边我们了解到,原子性是为了安全,对于cache缓存来说,他需要对缓存的数据进行增删改查,需要确保数据的安全。
在了解完explicit_atomic
的作用后,我们来了解一下_buckets
的结构,首先来看一下bucket_t
的源码,它的源码很多,省略了一部分,其中最重要的只有imp和sel:
#if __arm64__
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
#endif
SEL:方法编号;
IMP:函数指针地址;
在看完源码之后,我们可以得到cache_t的宏观结构:
16003424460394.png但是,我们还是不知道cache_t是如何缓存,存储什么数据,下面继续探索:
首先拿到我们从苹果下载下来objc4-781源码并且调试成可执行的代码下创建一个person类,在类中创建两个属性和几个方法:
我们在main函数中初始化Person类,并调用其中的几个方法:
int main(int argc, const char * argv[]) {
@autoreleasepool {
// insert code here...
Person *p = [Person alloc];
Class pClass = [Person class];
// p.name = @"objective-c";
// p.nickName = @"Swift";
// 缓存一次方法 sayHello
// 4
[p sayHello];
[p sayCode];
[p sayMaster];
[p sayNB];
NSLog(@"%@",pClass);
}
return 0;
}
在调用方法前打上几个断点,在程序运行之后,在控制台进行调试:
(lldb) p/x pClass
(Class) $0 = 0x0000000100002298 Person
(lldb) p (cache_t *)0x00000001000022a8
(指针进行偏移,因为类的结构中第三个才是cache_t,因此偏移字节为16字节)
(cache_t *) $1 = 0x00000001000022a8
(lldb) p *$1
(cache_t) $2 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x000000010032e410 {
_sel = {
std::__1::atomic<objc_selector *> = (null)
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 0
}
_flags = 32804
_occupied = 0
}
2020-09-17 19:38:25.304726+0800 KCObjc[28258:4358972] LGPerson say : -[Person sayHello]
(lldb) p *$1
(cache_t) $3 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x0000000101056c10 {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11912
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 3
}
_flags = 32804
_occupied = 1
}
2020-09-17 19:39:29.686646+0800 KCObjc[28258:4358972] LGPerson say : -[Person sayCode]
(lldb) p *$1
(cache_t) $4 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x0000000101056c10 {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11912
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 3
}
_flags = 32804
_occupied = 2
}
(lldb) p $3._buckets
(explicit_atomic<bucket_t *>) $5 = {
std::__1::atomic<bucket_t *> = 0x0000000101056c10 {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11912
}
}
}
(lldb) p *$5
error: Couldn't lookup symbols:
__ZNK15explicit_atomicIP8bucket_tEcvS1_Ev
在经过一翻测试之后,我们得知每执行一个方法,_occupied会+1;而_buckets
获取的是_sel
和_imp
,我们对_buckets
进行打印,得到了(explicit_atomic<bucket_t *>)
类型的$5
;但是,当我们对$5
进行打印时,却出现了一堆乱码,探索的思路中断了。
那我们转换思路,既然cache_t中有_buckets
存在,那么再查看结构体cache_t
中是否有获取_buckets
的方法,在经过对cache_t
源码查找之后,找到了一个struct bucket_t *buckets();
的方法;那么继续探索:
(lldb) p *$1
(cache_t) $6 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x0000000101056c10 {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11912
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 3
}
_flags = 32804
_occupied = 2
}
(lldb) p $6.buckets()
(bucket_t *) $7 = 0x0000000101056c10
(lldb) p *$7
(bucket_t) $8 = {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11912
}
}
在调用buckets()
方法后,我们成功的获取到了cache_t
的值,那么我们如何获取缓存的方法呢?
既然是获取buckets
中的内容,那么我们应该去bucket_t
中查找我们需要的方法,请看源码:
public:
inline SEL sel() const { return _sel.load(memory_order::memory_order_relaxed); }
inline IMP imp(Class cls) const {
uintptr_t imp = _imp.load(memory_order::memory_order_relaxed);
if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
SEL sel = _sel.load(memory_order::memory_order_relaxed);
return (IMP)
ptrauth_auth_and_resign((const void *)imp,
ptrauth_key_process_dependent_code,
modifierForSEL(sel, cls),
ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
}
找到了bucket_t
中获取_sel
和_imp
的两个方法,那么继续打印:
(lldb) p $8.sel()
(SEL) $9 = "sayHello"
(lldb) p $8.imp(pClass)
(IMP) $10 = 0x0000000100000c10 (KCObjc`-[Person sayHello])
通过打印,我们得到了sayHello
方法存储的指针地址,那么我们用machOview打开项目的可执行文件,点击function
,得到了各个方法缓存的指针地址:
其中sayHello
方法的指针地址与我们获取的sayHello地址的指针地址一摸一样。
那么继续探索,我们在main函数中只写两个方法,继续执行:
(lldb) p/x pClass
(Class) $0 = 0x0000000100002290 Person
(lldb) p (cache_t *)0x00000001000022a0
(cache_t *) $1 = 0x00000001000022a0
(lldb) p *$1
(cache_t) $2 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x0000000100744690 {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11904
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 3
}
_flags = 32804
_occupied = 2
}
(lldb) p $2.buckets()
(bucket_t *) $3 = 0x0000000100744690
(lldb) p *$3
(bucket_t) $4 = {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11904
}
}
(lldb) p $4.sel()
(SEL) $5 = "sayHello"
(lldb) p $4.imp(pClass)
(IMP) $6 = 0x0000000100000c10 (KCObjc`-[Person sayHello])
(lldb)
在上面的调试中,我们只获得了一个sayHello
方法,那么另一个方法哪里去了?
我们在查看cache_t
的源码时,发现有很多static
属性,他们的值类似于一种偏移的东西,那么我们就以偏移来探索:
(lldb) p *($3 + 1) //(c语言内容)
(bucket_t) $7 = {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11984
}
}
(lldb) p $7.sel()
(SEL) $8 = "sayCode"
(lldb) p $7.imp(pClass)
(IMP) $9 = 0x0000000100000c40 (KCObjc`-[Person sayCode])
(lldb)
经过$3
地址偏移,真的找到了sayCode
方法的信息。
那么,我们如果没有想到地址偏移,那么我们如何去找到我们想要的内容呢?
下面换一种方式去探索上面获取的信息,我们知道方法是在buckets
里面获取的,那么我们有没有可能在buckets()
方法后面通过下表来获取呢?代码实现:
(lldb) p $2.buckets()[0]
(bucket_t) $10 = {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11904
}
}
(lldb) p $2.buckets()[1]
(bucket_t) $11 = {
_sel = {
std::__1::atomic<objc_selector *> = ""
}
_imp = {
std::__1::atomic<unsigned long> = 11984
}
}
(lldb) p $10.sel()
(SEL) $12 = "sayHello"
(lldb) p $11.sel()
(SEL) $13 = "sayCode"
结果是可以通过buckets()
函数下标来获取,有点类似数组,每个数组类似于集合类型;
上面的代码是有源码环境才能实现,那么如果不通过代码环境,直接在项目中如何查找呢?
那还是需要上帝视角,在项目中模拟源码的环境,去探索,这个方法我就不介绍了,实现起来比较繁琐。
那么我们在之前的探索中,知道_occupied
值会改变,那么他是如何改变的呢?
我们在cache_t
中找到有一个void incrementOccupied();
方法;他的源码实现很简单,就一个_occupied++
;那我们需要知道它何时调用这个方法,继续查找源码:
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
ASSERT(sel != 0 && cls->isInitialized());
// Use the cache as-is if it is less than 3/4 full
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
if (!capacity) capacity = INIT_CACHE_SIZE;
reallocate(oldCapacity, capacity, /* freeOld */false);
}
else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) { // 4 3 + 1 bucket cache_t
// Cache is less than 3/4 full. Use it as-is.
}
else {
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容两倍 4
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true); // 内存 库容完毕
}
bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot because the
// minimum size is 4 and we resized at 3/4 full.
do {
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(sel, imp, cls);
return;
}
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
cache_t::bad_cache(receiver, (SEL)sel, cls);
}
发现了是在插入的时候进行++
的。
下面我直接给出我探索的结果:在类中添加的属性,会生成setter
方法,如果对属性进行赋值,那么setter
方法会自动调用,那么incrementOccupied()
方法就会调用,而方法的调用,incrementOccupied()
方法也会调用。
在insert
函数的源码中,他的作用是进行一系列的初始化、对内存超过3/4的时候进行扩容和通过哈希算法来存储sel和imp,至于他是如何用哈希算法去实现sel和imp的存储,这边就不详细讲解。
网友评论