runtime
在研究objc_msgSend时先来了解一下runtime。
- 1.什么是runtime
我们的oc代码转化成可执行的程序,大致需要进过3个阶段:编译、链接、运行。oc是一门动态语言,它在编译阶段并不知道变量具体的数据类型,一节函数要怎么调用,只有在运行时才会去确定这些东西。而实现oc的运行时就是runtime。
- 2.runtime的调用
如上图所示电泳runtime的方法有三种:
- 通过Objective-C Code调用;如[person say];
- 通过Framework&Service调用;如isKindofClass;
- 通过Runtime API调用。如:class_getInstanceSize。
而上图中的Compiler是编译层,也就是我们常说的llvm;Runtime System Library是runtime的底层API。
objc_msgSend
方法在底层的调用方式
- 首先我们创建一个person类,任意实现两个方法,然后在mian中调用一下。
- 然后通过clang去编译一下main,得到mian.cpp,找到它的mian函数,可以看到代码如下:
LDQPerson *person = ((LDQPerson *(*)(id, SEL))(void *)objc_msgSend)((id)((LDQPerson *(*)(id, SEL))(void *)objc_msgSend)((id)objc_getClass("LDQPerson"), sel_registerName("alloc")), sel_registerName("init"));
((void (*)(id, SEL))(void *)objc_msgSend)((id)person, sel_registerName("say666"));
((void (*)(id, SEL))(void *)objc_msgSend)((id)person, sel_registerName("say777"));
看到main函数中的代码,我们可以发现代码的底层实现并不是通过调用oc的上层方法,而是调用了一些objc_msgSend
、sel_registerName
这些函数来实现的。
我们的方法在底层都叫做消息,而消息里面又包含消息接受者
和消息主体
。
- 所以我们在main函数中调用方法除了用oc去调用还可直接用底层方法去调用。调用方法如下:
LDQPerson *person = [LDQPerson alloc];
objc_msgSend(person,sel_registerName("say666"));
运行时可能会报错Too many arguments to function call, expected 0, have 2
,解决办法:arget > Build Settings > Enable Strict Checking of objc_msgSend Calls 修改为 No 即可
。
- 除了上面使用objc_msgSend来调用方法,我们还可以去调用它的父类。
步骤:创建FatherClass类,实现方法say666
,LDQPerson类继承FatherClass,将之前LDQPerson的say666方法删掉,然后在mian中去调用:
LDQPerson *person = [LDQPerson alloc];
objc_msgSend(person,sel_registerName("say666"));
struct objc_super ldqsuper;
ldqsuper.receiver = person;
ldqsuper.super_class = [FatherClass class];
运行结果如下
以上是我们使用runtime底层api调用方法的两种方式,那么在底层编译之后,我们的程序是怎么去找到这些方法的呢?
消息查找流程
objc_megSend里面会有一个消息接收者,在查找方法是首先回去找到对象,然后通过对象找到isa,之后找到类,然后找cache_t,如果cache里面不存在,那么就回去找methodlist。
- 快速查找(从cache_t里面找)
通过对objc_msgSend的探索发现它的底层是用汇编实现的。用汇编实现的好处是:1.速度快;2.动态性
。
下面开始探索objc_msgSend的汇编底层实现:
1. 首先打开objc源码,全局搜索objc_msgSend,找到arm64.s这个文件,找到下面这段代码:
//ENTRY进入,很显然这里是objc_msgSend的入口,我们的探索就从这里开始
ENTRY _objc_msgSend
UNWIND _objc_msgSend, NoFrame
// 对比p0和空,p0使我们寄存器的第一号位置,存的是消息接收者传过来的参数,receiver对象,所以这句汇编的意思就是判断当前receiver对象是否存在
cmp p0, #0 // nil check and tagged pointer check
#if SUPPORT_TAGGED_POINTERS
b.le LNilOrTagged // (MSB tagged pointer looks negative)
#else
b.eq LReturnZero
#endif
//根据对象拿出isa ,即从x0寄存器指向的地址 取出 isa,存入 p13寄存器
ldr p13, [x0] // p13 = isa
//p16 根据isa p13获取到Class
GetClassFromIsa_p16 p13 // p16 = class
LGetIsaDone:
// calls imp or objc_msgSend_uncached
CacheLookup NORMAL, _objc_msgSend
以上只是一个简单的判断对象是否存在,并且获取到isa的过程,接下来看到CacheLookup流程
2. CacheLookup
.macro CacheLookup
//
// Restart protocol:
//
// As soon as we're past the LLookupStart$1 label we may have loaded
// an invalid cache pointer or mask.
//
// When task_restartable_ranges_synchronize() is called,
// (or when a signal hits us) before we're past LLookupEnd$1,
// then our PC will be reset to LLookupRecover$1 which forcefully
// jumps to the cache-miss codepath which have the following
// requirements:
//
// GETIMP:
// The cache-miss is just returning NULL (setting x0 to 0)
//
// NORMAL and LOOKUP:
// - x0 contains the receiver
// - x1 contains the selector
// - x16 contains the isa
// - other registers are set as per calling conventions
//
LLookupStart$1:
// p1 = SEL, p16 = isa
//将p16也就是isa平移16个字节(x16),得到cache放入p11
ldr p11, [x16, #CACHE] // p11 = mask|buckets
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
//p11 & 0x0000ffffffffffff 相当于高16位抹0,低48位 置1
and p10, p11, #0x0000ffffffffffff // p10 = buckets
//p11逻辑右移48位,获取到mask,然后与上p1(也就是sel),然后放入p12,得到一个搜索方法的下标,因为我们在存方法的时候在计算下标的时候正好是sel & mask
and p12, p1, p11, LSR #48 // x12 = _cmd & mask
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
and p10, p11, #~0xf // p10 = buckets
and p11, p11, #0xf // p11 = maskShift
mov p12, #0xffff
lsr p11, p12, p11 // p11 = mask = 0xffff >> p11
and p12, p1, p11 // x12 = _cmd & mask
#else
#error Unsupported cache mask storage for ARM64.
#endif
//全局搜索PTRSHIFT 可以找到PTRSHIFT的定义为
//#define PTRSHIFT 3 // 1<<PTRSHIFT == PTRSIZE
//将p12(当前index)逻辑左移4位,得到当前index所对应的bucket
//为什么平移4位,因为一个bucket里包含一个sel和imp,它们都是8个字节,加起来16个字节
//这里的操作总的来说就是用buckets的首地址加上一个偏移量得到当前index所对应的bucket
add p12, p10, p12, LSL #(1+PTRSHIFT)
// p12 = buckets + ((_cmd & mask) << (1+PTRSHIFT))
//根据bucket的指针地址得到imp和sel
ldp p17, p9, [x12] // {imp, sel} = *bucket
//接下来开始进行循环,判断传进来的cmd(也就是sel)和我们当前查询的bucket的sel是否相同
1: cmp p9, p1 // if (bucket->sel != _cmd)
//如果不同就跳转到下面的2:
b.ne 2f // scan more
//相同,所以缓存命中,直接返回改sel的imp
CacheHit $0 // call or return imp
2: // not hit: p12 = not-hit bucket
CheckMiss $0 // miss if bucket->sel == 0
//不同的情况1:
//如果当前要查找的bucket-p12 等于 buckets(p10)的第一个元素
cmp p12, p10 // wrap if bucket == buckets
//那么就找后面第3个元素,也就是buckets中的最后一个,以为开辟的缓存空间为4,为它的元素下标只能取到0、1、2、3,所以之前移到最后一个元素去查找,然后逐步向前查找,然后继续进行递归循环
b.eq 3f
ldp p17, p9, [x12, #-BUCKET_SIZE]! // {imp, sel} = *--bucket
b 1b // loop
3: // wrap: p12 = first bucket, w11 = mask
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
add p12, p12, p11, LSR #(48 - (1+PTRSHIFT))
// p12 = buckets + (mask << 1+PTRSHIFT)
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
add p12, p12, p11, LSL #(1+PTRSHIFT)
// p12 = buckets + (mask << 1+PTRSHIFT)
#else
#error Unsupported cache mask storage for ARM64.
#endif
// Clone scanning loop to miss instead of hang when cache is corrupt.
// The slow path may detect any corruption and halt later.
ldp p17, p9, [x12] // {imp, sel} = *bucket
1: cmp p9, p1 // if (bucket->sel != _cmd)
b.ne 2f // scan more
CacheHit $0 // call or return imp
2: // not hit: p12 = not-hit bucket
CheckMiss $0 // miss if bucket->sel == 0
cmp p12, p10 // wrap if bucket == buckets
b.eq 3f
ldp p17, p9, [x12, #-BUCKET_SIZE]! // {imp, sel} = *--bucket
b 1b // loop
LLookupEnd$1:
LLookupRecover$1:
3: // double wrap
JumpMiss $0
.endmacro
以上就是快速查找流程,那么如果cache里面找不到则会去方法列表里面查找,进入慢速查找流程。
- 慢速查找(从methodlist里面找)
在快速查找流程中如果没有找到方法,不论是CheckMiss
还是JumpMiss
它都会去执行__objc_msgSend_uncached,在查看源码时会发现以下代码:
.macro MethodTableLookup
//代码省略
// lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
// receiver and selector already in x0 and x1
mov x2, x16
mov x3, #3
bl _lookUpImpOrForward
//代码省略
.endmacro
最终会发现程序会跳转到lookUpImpOrForward这样一个函数去,而当我们搜索时发现当前汇编源码文件中并没有这个方法的实现,而当我们全局搜索时最终会发现在objc-runtime-new.mm
文件中有这个方法的实现,oc层代码实现如下:
IMP lookUpImpOrForward(id inst, SEL sel, Class cls, int behavior)
{
const IMP forward_imp = (IMP)_objc_msgForward_impcache;
IMP imp = nil;
Class curClass;
runtimeLock.assertUnlocked();
// Optimistic cache lookup
//这个if里面从cache里面查找是因为有可能在进行慢速查找是有一个线程缓存了方法,找到了就直接返回一个imp,从而不需要继续查找
if (fastpath(behavior & LOOKUP_CACHE)) {
imp = cache_getImp(cls, sel);
if (imp) goto done_nolock;
}
// runtimeLock is held during isRealized and isInitialized checking
// to prevent races against concurrent realization.
// runtimeLock is held during method search to make
// method-lookup + cache-fill atomic with respect to method addition.
// Otherwise, a category could be added but ignored indefinitely because
// the cache was re-filled with the old value after the cache flush on
// behalf of the category.
runtimeLock.lock();
// We don't want people to be able to craft a binary blob that looks like
// a class but really isn't one and do a CFI attack.
//
// To make these harder we want to make sure this is a class that was
// either built into the binary or legitimately registered through
// objc_duplicateClass, objc_initializeClassPair or objc_allocateClassPair.
//
// TODO: this check is quite costly during process startup.
//所有的方法都会以一种缓存的形式加载到内存中去,判断是否是已知类,从而去获取到类的信息
checkIsKnownClass(cls);
if (slowpath(!cls->isRealized())) {
//去实现这个类
cls = realizeClassMaybeSwiftAndLeaveLocked(cls, runtimeLock);
// runtimeLock may have been dropped but is now locked again
}
if (slowpath((behavior & LOOKUP_INITIALIZE) && !cls->isInitialized())) {
cls = initializeAndLeaveLocked(cls, inst, runtimeLock);
// runtimeLock may have been dropped but is now locked again
// If sel == initialize, class_initialize will send +initialize and
// then the messenger will send +initialize again after this
// procedure finishes. Of course, if this is not being called
// from the messenger then it won't happen. 2778172
}
runtimeLock.assertLocked();
curClass = cls;
// The code used to lookpu the class's cache again right after
// we take the lock but for the vast majority of the cases
// evidence shows this is a miss most of the time, hence a time loss.
//
// The only codepath calling into this without having performed some
// kind of cache lookup is class_getInstanceMethod().
//重点
for (unsigned attempts = unreasonableClassCount();;) {
// curClass method list.
//首先第一步:从当前类的方法里面查找,如果能找到则不需要去父类查找了
//这里是使用二分查找的方法来找的
Method meth = getMethodNoSuper_nolock(curClass, sel);
if (meth) {
//找到了,返回imp,找到之后会进行一次cache_fill操作,去写入缓存,这是为了下一次查找的时候能直接通过objc_msgSend进行快速查找,从而提高性能
//写入操作log_and_fill_cache(cls, imp, sel, inst, curClass); -> cache_fill(cls, sel, imp, receiver);
imp = meth->imp;
goto done;
}
//如果本类里面没有找到,则去父类里面查找
//curClass = curClass->superclass一个赋值操作,将当前类的父类赋给当前类
if (slowpath((curClass = curClass->superclass) == nil)) {
// No implementation found, and method resolver didn't help.
// Use forwarding.
imp = forward_imp;
break;
}
// Halt if there is a cycle in the superclass chain.
if (slowpath(--attempts == 0)) {
_objc_fatal("Memory corruption in class list.");
}
//这从这里先从父类的缓存进行快速查找
// Superclass cache.
imp = cache_getImp(curClass, sel); //这里开始进行递归查找,进行汇编的快速查找,如果找不到,再次来到lookUpImpOrForward这个方法,但是现在是找父类了,直到找完当前类的继承链
if (slowpath(imp == forward_imp)) {
// Found a forward:: entry in a superclass.
// Stop searching, but don't cache yet; call method
// resolver for this class first.
//直到找完都没有找到,由于递归到最后一层,它已经没有父类了,所以会走上面的if来对imp进行赋值 imp = forward_imp;从而进入到这个break,结束递归
break;
}
if (fastpath(imp)) {
// Found the method in a superclass. Cache it in this class.
goto done;
}
}
// No implementation found. Try method resolver once.
//在一直没有找到的情况下:从上面跳出递归之后就回来到这里,进行动态方法决议。
if (slowpath(behavior & LOOKUP_RESOLVER)) {
behavior ^= LOOKUP_RESOLVER;
return resolveMethod_locked(inst, sel, cls, behavior);
}
done:
log_and_fill_cache(cls, imp, sel, inst, curClass);
runtimeLock.unlock();
done_nolock:
if (slowpath((behavior & LOOKUP_NIL) && imp == forward_imp)) {
return nil;
}
return imp;
}
从上面的流程不难看出慢速查找是从本类->父类->NSObject->nil.
- 二分法查找
findMethodInSortedMethodList(SEL key, const method_list_t *list)
{
ASSERT(list);
const method_t * const first = &list->first;
const method_t *base = first;
const method_t *probe;
uintptr_t keyValue = (uintptr_t)key;
uint32_t count;
//base相当于low,count是max,probe是middle,这就是二分
for (count = list->count; count != 0; count >>= 1) {
//从首地址+下标 --> 移动到中间位置(count >> 1 左移1位即 count/2 = 4)
probe = base + (count >> 1);
uintptr_t probeValue = (uintptr_t)probe->name;
//如果查找的key的keyvalue等于中间位置(probe)的probeValue,则直接返回中间位置
if (keyValue == probeValue) {
// `probe` is a match.
// Rewind looking for the *first* occurrence of this value.
// This is required for correct category overrides.
// -- while 平移 -- 排除分类重名方法
while (probe > first && keyValue == (uintptr_t)probe[-1].name) {
//排除分类重名方法(方法的存储是先存储类方法,在存储分类---按照先进后出的原则,分类方法最先出,而我们要取的类方法,所以需要先排除分类方法)
//如果是两个分类,就看谁先进行加载
//分类方法在前
probe--;
}
return (method_t *)probe;
}
//如果keyValue 大于 probeValue,就往probe即中间位置的右边查找
if (keyValue > probeValue) {
base = probe + 1;
count--;
}
}
return nil;
}
二分查找要求查找内容必须是有序的,例如升序,首先查找中间的,进行比较,如果要查找的元素比它大,那么再查找中间元素之后的那一部分,再查找这一部分中间元素,进行比较,如此不断查找,知道找到要查找的元素,这种方式大大减少了查找次数,提高了效率。