十、dyld流程分析

作者: Mjs | 来源:发表于2020-10-09 16:14 被阅读0次

十、dyld流程分析
Objective-C 类的加载原理（上）
dyld 流程分析
dyld流程分析
dyld加载流程
dyld 调用流程分析
iOS dyld流程分析
类的加载（上）-- _objc_init&read_image
iOS dyld与objc的关联
dyld加载流程

dyld

dyld（the dynamic link editor）是苹果的动态链接器，是苹果操作系统一个重要组成部分，在系统内核做好程序准备工作之后，交由dyld负责余下的工作。而且它是开源的，任何人可以通过苹果官网下载它的源码来阅读理解它的运作方式，了解系统加载动态库的细节。

我们可以在这里下载源码

dyld流程

新建一个application,我们在ViewController中添加+ (void)load方法,断掉调试,bt打印出堆栈信息,最后一行显示

    frame #12: 0x0000000105dd0025 dyld`_dyld_start + 37

我们在dyld源码中全局搜索找到入口
我们通过注释来进行分析
找到

# call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)

在dyldbootstrap命名空间中找到start方法

uintptr_t start(const dyld3::MachOLoaded* appsMachHeader, int argc, const char* argv[],
                const dyld3::MachOLoaded* dyldsMachHeader, uintptr_t* startGlue)
{
    // Emit kdebug tracepoint to indicate dyld bootstrap has started <rdar://46878536>
    dyld3::kdebug_trace_dyld_marker(DBG_DYLD_TIMING_BOOTSTRAP_START, 0, 0, 0, 0);

    // if kernel had to slide dyld, we need to fix up load sensitive locations
    // we have to do this before using any global variables
    //dyld重定位
    rebaseDyld(dyldsMachHeader);

    // kernel sets up env pointer to be just past end of agv array
    const char** envp = &argv[argc+1];
    
    // kernel sets up apple pointer to be just past end of envp array
    const char** apple = envp;
    while(*apple != NULL) { ++apple; }
    ++apple;

    // set up random value for stack canary
    //栈溢出保护
    __guard_setup(apple);

#if DYLD_INITIALIZER_SUPPORT
    // run all C++ initializers inside dyld
    runDyldInitializers(argc, argv, envp, apple);
#endif

    // now that we are done bootstrapping dyld, call dyld's main
    uintptr_t appsSlide = appsMachHeader->getSlide();
    return dyld::_main((macho_header*)appsMachHeader, appsSlide, argc, argv, envp, apple, startGlue);
}

初始化dyld后,调用dyld::_main()
dyld::_main()代码有600+行,我们一步步分析

1. 环境变量配置


    ///从环境中获取主可执行文件的cdHash
    uint8_t mainExecutableCDHashBuffer[20];
    const uint8_t* mainExecutableCDHash = nullptr;
    if ( hexToBytes(_simple_getenv(apple, "executable_cdhash"), 40, mainExecutableCDHashBuffer) )
        mainExecutableCDHash = mainExecutableCDHashBuffer;

    // 获取主程序的macho_header
    sMainExecutableMachHeader = mainExecutableMH;
    // 获取主程序的slide值
    sMainExecutableSlide = mainExecutableSlide;
        // 设置上下文信息
    setContext(mainExecutableMH, argc, argv, envp, apple);

    // Pickup the pointer to the exec path.
    //获取主程序路径
    sExecPath = _simple_getenv(apple, "executable_path");
    //进程的头环境配置
    configureProcessRestrictions(mainExecutableMH, envp);
    //检测环境变量
    checkEnvironmentVariables(envp);
    defaultUninitializedFallbackPaths(envp);
    // 获取当前运行环境架构的信息
    getHostInfo(mainExecutableMH, mainExecutableSlide);

2.共享缓存

检查是否开启了共享缓存，以及共享缓存是否映射到共享区域，例如UIKit、CoreFoundation等


    // load shared cache
    // 检测共享缓存是否开启,在iOS中必须开启
    checkSharedRegionDisable((dyld3::MachOLoaded*)mainExecutableMH, mainExecutableSlide);
    if ( gLinkContext.sharedRegionMode != ImageLoader::kDontUseSharedRegion ) {
#if TARGET_OS_SIMULATOR
        if ( sSharedCacheOverrideDir)
            ///检测共享缓存是否映射到了共享区域
            mapSharedCache();
#else
        mapSharedCache();
#endif


static void mapSharedCache()
{
    dyld3::SharedCacheOptions opts;
    opts.cacheDirOverride   = sSharedCacheOverrideDir;
    opts.forcePrivate       = (gLinkContext.sharedRegionMode == ImageLoader::kUsePrivateSharedRegion);


#if __x86_64__ && !TARGET_OS_SIMULATOR
    opts.useHaswell         = sHaswell;
#else
    opts.useHaswell         = false;
#endif
    opts.verbose            = gLinkContext.verboseMapping;
    // 加载dyld缓存
    loadDyldCache(opts, &sSharedCacheLoadInfo);

    // update global state
    if ( sSharedCacheLoadInfo.loadAddress != nullptr ) {
        gLinkContext.dyldCache                              = sSharedCacheLoadInfo.loadAddress;
        dyld::gProcessInfo->processDetachedFromSharedRegion = opts.forcePrivate;
        dyld::gProcessInfo->sharedCacheSlide                = sSharedCacheLoadInfo.slide;
        dyld::gProcessInfo->sharedCacheBaseAddress          = (unsigned long)sSharedCacheLoadInfo.loadAddress;
        sSharedCacheLoadInfo.loadAddress->getUUID(dyld::gProcessInfo->sharedCacheUUID);
        dyld3::kdebug_trace_dyld_image(DBG_DYLD_UUID_SHARED_CACHE_A, sSharedCacheLoadInfo.path, (const uuid_t *)&dyld::gProcessInfo->sharedCacheUUID[0], {0,0}, {{ 0, 0 }}, (const mach_header *)sSharedCacheLoadInfo.loadAddress);
    }

}

bool loadDyldCache(const SharedCacheOptions& options, SharedCacheLoadInfo* results)
{
    results->loadAddress        = 0;
    results->slide              = 0;
    results->errorMessage       = nullptr;

#if TARGET_OS_SIMULATOR
    // simulator only supports mmap()ing cache privately into process
    //模拟器只支持mmap()将缓存私有地放入进程中
    return mapCachePrivate(options, results);
#else
    if ( options.forcePrivate ) {
        // mmap cache into this process only
        //只缓存到这个进程中
        return mapCachePrivate(options, results);
    }
    else {
        // fast path: when cache is already mapped into shared region
        //快速路径:当缓存已经映射到共享区域时
        bool hasError = false;
        if ( reuseExistingCache(options, results) ) {
            hasError = (results->errorMessage != nullptr);
        } else {
            // slow path: this is first process to load cache
            //慢路径:这是第一个加载缓存的进程
            hasError = mapCacheSystemWide(options, results);
        }
        return hasError;
    }
#endif
}

3. 主程序的初始化

        /// 实例化主程序
        /// 加载可执行文件,并生成一个ImageLoader
        sMainExecutable = instantiateFromLoadedImage(mainExecutableMH, mainExecutableSlide, sExecPath);
        gLinkContext.mainExecutable = sMainExecutable;
        gLinkContext.mainExecutableCodeSigned = hasCodeSignatureLoadCommand(mainExecutableMH);

4. 插入动态库


        //加载所有DYLD_INSERT_LIBRARIES指定的库
        if  ( sEnv.DYLD_INSERT_LIBRARIES != NULL ) {
            for (const char* const* lib = sEnv.DYLD_INSERT_LIBRARIES; *lib != NULL; ++lib) 
                loadInsertedDylib(*lib);
        }
        // record count of inserted libraries so that a flat search will look at
        //记录插入的库的数量,以便进行统一搜索
        // inserted libraries, then main, then others.
        //插入库,然后是main,然后是其他
        sInsertedDylibCount = sAllImages.size()-1;

5. link 主程序

        // 链接主程序
        link(sMainExecutable, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);

6. link 动态库

        // link any inserted libraries
        // do this after linking main executable so that any dylibs pulled in by inserted 
        // dylibs (e.g. libSystem) will not be in front of dylibs the program uses
        // 链接插入动态库
        // 链接主可执行文件猴执行此操作,这样插入的dylibs (e.g. libSystem)不会再程序使用的dylib前面
        if ( sInsertedDylibCount > 0 ) {
            for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
                ImageLoader* image = sAllImages[i+1];
                link(image, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);
                image->setNeverUnloadRecursive();
            }
            if ( gLinkContext.allowInterposing ) {
                // only INSERTED libraries can interpose 只有插入的库可以插入
                // register interposing info after all inserted libraries are bound so chaining works
                //绑定所有插入的库猴,注入插入信息,以便链接工作
                for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
                    ImageLoader* image = sAllImages[i+1];
                    // 驻车符号插入
                    image->registerInterposing(gLinkContext);
                }
            }
        }

7. 弱引用绑定


        // Bind and notify for the main executable now that interposing has been registered
        //现在已经注册了插入操作，为主可执行文件绑定和通知
        uint64_t bindMainExecutableStartTime = mach_absolute_time();
        sMainExecutable->recursiveBindWithAccounting(gLinkContext, sEnv.DYLD_BIND_AT_LAUNCH, true);
        uint64_t bindMainExecutableEndTime = mach_absolute_time();
        ImageLoaderMachO::fgTotalBindTime += bindMainExecutableEndTime - bindMainExecutableStartTime;
        gLinkContext.notifyBatch(dyld_image_state_bound, false);

        // Bind and notify for the inserted images now interposing has been registered
        if ( sInsertedDylibCount > 0 ) {
            for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
                ImageLoader* image = sAllImages[i+1];
                image->recursiveBind(gLinkContext, sEnv.DYLD_BIND_AT_LAUNCH, true);
            }
        }
        
        // <rdar://problem/12186933> do weak binding only after all inserted images linked
        //弱符号绑定
        sMainExecutable->weakBind(gLinkContext);
        gLinkContext.linkingMainExecutable = false;

8. 执行初始化方法


    #if SUPPORT_OLD_CRT_INITIALIZATION
        // Old way is to run initializers via a callback from crt1.o
        //旧的方法是通过crt1.o的回调来运行初始化器
        if ( ! gRunInitializersOldWay ) 
            initializeMainExecutable(); 
    #else
        // run all initializers
        //运行所有初始化项
        initializeMainExecutable(); 
    #endif


void initializeMainExecutable()
{
    // record that we've reached this step
    gLinkContext.startedInitializingMainExecutable = true;

    // run initialzers for any inserted dylibs
    ImageLoader::InitializerTimingList initializerTimes[allImagesCount()];
    initializerTimes[0].count = 0;
    const size_t rootCount = sImageRoots.size();
    if ( rootCount > 1 ) {
        for(size_t i=1; i < rootCount; ++i) {
            sImageRoots[i]->runInitializers(gLinkContext, initializerTimes[0]);
        }
    }
    
    // run initializers for main executable and everything it brings up 
    sMainExecutable->runInitializers(gLinkContext, initializerTimes[0]);
    
    // register cxa_atexit() handler to run static terminators in all loaded images when this process exits
    if ( gLibSystemHelpers != NULL ) 
        (*gLibSystemHelpers->cxa_atexit)(&runAllStaticTerminators, NULL, NULL);

    // dump info if requested
    if ( sEnv.DYLD_PRINT_STATISTICS )
        ImageLoader::printStatistics((unsigned int)allImagesCount(), initializerTimes[0]);
    if ( sEnv.DYLD_PRINT_STATISTICS_DETAILS )
        ImageLoaderMachO::printStatisticsDetails((unsigned int)allImagesCount(), initializerTimes[0]);
}

全局搜索runInitializers(const

void ImageLoader::runInitializers(const LinkContext& context, InitializerTimingList& timingInfo)
{
    uint64_t t1 = mach_absolute_time();
    mach_port_t thisThread = mach_thread_self();
    ImageLoader::UninitedUpwards up;
    up.count = 1;
    up.imagesAndPaths[0] = { this, this->getPath() };
    processInitializers(context, thisThread, timingInfo, up);
    context.notifyBatch(dyld_image_state_initialized, false);
    mach_port_deallocate(mach_task_self(), thisThread);
    uint64_t t2 = mach_absolute_time();
    fgTotalInitTime += (t2 - t1);
}

查看processInitializers

void ImageLoader::processInitializers(const LinkContext& context, mach_port_t thisThread,
                                     InitializerTimingList& timingInfo, ImageLoader::UninitedUpwards& images)
{
    uint32_t maxImageCount = context.imageCount()+2;
    ImageLoader::UninitedUpwards upsBuffer[maxImageCount];
    ImageLoader::UninitedUpwards& ups = upsBuffer[0];
    ups.count = 0;
    // Calling recursive init on all images in images list, building a new list of
    // uninitialized upward dependencies.
    for (uintptr_t i=0; i < images.count; ++i) {
        images.imagesAndPaths[i].first->recursiveInitialization(context, thisThread, images.imagesAndPaths[i].second, timingInfo, ups);
    }
    // If any upward dependencies remain, init them.
    if ( ups.count > 0 )
        processInitializers(context, thisThread, timingInfo, ups);
}

void ImageLoader::recursiveInitialization(const LinkContext& context, mach_port_t this_thread, const char* pathToInitialize,
                                          InitializerTimingList& timingInfo, UninitedUpwards& uninitUps)
{
    recursive_lock lock_info(this_thread);
    recursiveSpinLock(lock_info);//递归加锁

    if ( fState < dyld_image_state_dependents_initialized-1 ) {
        uint8_t oldState = fState;
        // break cycles 结束递归循环
        fState = dyld_image_state_dependents_initialized-1;
        try {
            // initialize lower level libraries first
            ...
            
            // record termination order
            if ( this->needsTermination() )
                context.terminationRecorder(this);

            // let objc know we are about to initialize this image
            // 让objc知道我们将要初始化此镜像
            uint64_t t1 = mach_absolute_time();
            fState = dyld_image_state_dependents_initialized;
            oldState = fState;
            context.notifySingle(dyld_image_state_dependents_initialized, this, &timingInfo);
            
            // initialize this image
            // 初始化此镜像
            bool hasInitializers = this->doInitialization(context);

            // let anyone know we finished initializing this image
            ///让所有人知道我们已经完成了这个映像的初始化
            fState = dyld_image_state_initialized;
            oldState = fState;
            context.notifySingle(dyld_image_state_initialized, this, NULL);
            
            ...
        }
        ...
    }
    
    recursiveSpinUnLock();
}

我们先来看notifySingle


static void notifySingle(dyld_image_states state, const ImageLoader* image, ImageLoader::InitializerTimingList* timingInfo)
{
    //dyld::log("notifySingle(state=%d, image=%s)\n", state, image->getPath());
    std::vector<dyld_image_state_change_handler>* handlers = stateToHandlers(state, sSingleHandlers);
    if ( handlers != NULL ) {
        dyld_image_info info;
        info.imageLoadAddress   = image->machHeader();
        info.imageFilePath      = image->getRealPath();
        info.imageFileModDate   = image->lastModified();
        for (std::vector<dyld_image_state_change_handler>::iterator it = handlers->begin(); it != handlers->end(); ++it) {
            const char* result = (*it)(state, 1, &info);
            if ( (result != NULL) && (state == dyld_image_state_mapped) ) {
                //fprintf(stderr, "  image rejected by handler=%p\n", *it);
                // make copy of thrown string so that later catch clauses can free it
                const char* str = strdup(result);
                throw str;
            }
        }
    }
    // 是否被映射进来
    if ( state == dyld_image_state_mapped ) {
        // <rdar://problem/7008875> Save load addr + UUID for images from outside the shared cache
        if ( !image->inSharedCache() ) {
            dyld_uuid_info info;
            if ( image->getUUID(info.imageUUID) ) {
                info.imageLoadAddress = image->machHeader();//头部信息处理
                addNonSharedCacheImageUUID(info);
            }
        }
    }
    if ( (state == dyld_image_state_dependents_initialized) && (sNotifyObjCInit != NULL) && image->notifyObjC() ) {
        uint64_t t0 = mach_absolute_time();
        dyld3::ScopedTimer timer(DBG_DYLD_TIMING_OBJC_INIT, (uint64_t)image->machHeader(), 0, 0);
        (*sNotifyObjCInit)(image->getRealPath(), image->machHeader());
        uint64_t t1 = mach_absolute_time();
        uint64_t t2 = mach_absolute_time();
        uint64_t timeInObjC = t1-t0;
        uint64_t emptyTime = (t2-t1)*100;
        if ( (timeInObjC > emptyTime) && (timingInfo != NULL) ) {
            timingInfo->addTime(image->getShortName(), timeInObjC);
        }
    }
    // mach message csdlc about dynamically unloaded images
    //关于动态未加载镜像的mach消息csdlc
    if ( image->addFuncNotified() && (state == dyld_image_state_terminated) ) {
        notifyKernel(*image, false);
        const struct mach_header* loadAddress[] = { image->machHeader() };
        const char* loadPath[] = { image->getPath() };
        notifyMonitoringDyld(true, 1, loadAddress, loadPath);
    }
}

最后我们找到了(*sNotifyObjCInit),全局搜索,在registerObjCNotifiers中找到了对sNotifyObjCInit的赋值

void registerObjCNotifiers(_dyld_objc_notify_mapped mapped, _dyld_objc_notify_init init, _dyld_objc_notify_unmapped unmapped)
{
    // record functions to call
    sNotifyObjCMapped   = mapped;
    sNotifyObjCInit     = init;
    sNotifyObjCUnmapped = unmapped;
...
}

void _dyld_objc_notify_register(_dyld_objc_notify_mapped    mapped,
                                _dyld_objc_notify_init      init,
                                _dyld_objc_notify_unmapped  unmapped)
{
    dyld::registerObjCNotifiers(mapped, init, unmapped);
}

这个时候我们就要去objc4-781源码中找到_dyld_objc_notify_register


void _objc_init(void)
{
    static bool initialized = false;
    if (initialized) return;
    initialized = true;
    
    // fixme defer initialization until an objc-using image is found?
    environ_init();
    tls_init();
    static_init();
    runtime_init();
    exception_init();
    cache_init();
    _imp_implementationWithBlock_init();

    _dyld_objc_notify_register(&map_images, load_images, unmap_image);

#if __OBJC2__
    didCallDyldNotifyRegister = true;
#endif
}

在这里我们看到是把load_images赋给sNotifyObjCInit,而load_images会调用所有的+load方法,所以notifySingle是个回调函数

doInitialization 函数

bool ImageLoaderMachO::doInitialization(const LinkContext& context)
{
    CRSetCrashLogMessage2(this->getPath());

    // mach-o has -init and static initializers
    // mach-o有-init和静态初始化器
    doImageInit(context);
    doModInitFunctions(context);
    
    CRSetCrashLogMessage2(NULL);
    
    return (fHasDashInit || fHasInitializers);
}

doImageInit for循环加载所有方法调用, libSystem的初始化必须先执行

void ImageLoaderMachO::doImageInit(const LinkContext& context)
{
    if ( fHasDashInit ) {
        const uint32_t cmd_count = ((macho_header*)fMachOData)->ncmds;
        const struct load_command* const cmds = (struct load_command*)&fMachOData[sizeof(macho_header)];
        const struct load_command* cmd = cmds;
        for (uint32_t i = 0; i < cmd_count; ++i) {//方法的调用
            switch (cmd->cmd) {
                case LC_ROUTINES_COMMAND:
                    // Mach-O地址平移,得到一个函数方法
                    Initializer func = (Initializer)(((struct macho_routines_command*)cmd)->init_address + fSlide);
#if __has_feature(ptrauth_calls)
                    func = (Initializer)__builtin_ptrauth_sign_unauthenticated((void*)func, ptrauth_key_asia, 0);
#endif
                    // <rdar://problem/8543820&9228031> verify initializers are in image//验证初始化程序在镜像中
                    if ( ! this->containsAddress(stripPointer((void*)func)) ) {
                        dyld::throwf("initializer function %p not in mapped image for %s\n", func, this->getPath());
                    }
                    if ( ! dyld::gProcessInfo->libSystemInitialized ) {
                        // <rdar://problem/17973316> libSystem initializer must run first
                        //libSystem必须先初始化,优先级最高
                        dyld::throwf("-init function in image (%s) that does not link with libSystem.dylib\n", this->getPath());
                    }
                    if ( context.verboseInit )
                        dyld::log("dyld: calling -init function %p in %s\n", func, this->getPath());
                    {
                        dyld3::ScopedTimer(DBG_DYLD_TIMING_STATIC_INITIALIZER, (uint64_t)fMachOData, (uint64_t)func, 0);
                        func(context.argc, context.argv, context.envp, context.apple, &context.programVars);
                    }
                    break;
            }
            cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
        }
    }
}

doModInitFunctions 调用所有的cxx方法

我们验证一下在c++方法中加上断点

C++验证.png

我们在_objc_init 方法中加上断点,可以看到

_objc_init 流程.png

结合上面的分析，从初始化_objc_init注册的_dyld_objc_notify_register的参数2，即load_images，到sNotifySingle --> sNotifyObjCInie=参数2 到sNotifyObjcInit()调用，形成了一个闭环
所以可以简单的理解为sNotifySingle这里是添加通知即addObserver，_objc_init中调用_dyld_objc_notify_register相当于发送通知，即push，而sNotifyObjcInit相当于通知的处理函数，即selector

_dyld_start --> dyldbootstrap::start --> dyld::_main --> dyld::initializeMainExecutable --> ImageLoader::runInitializers --> ImageLoader::processInitializers --> ImageLoader::recursiveInitialization --> doInitialization -->libSystem_initializer（libSystem.B.dylib） --> _os_object_init（libdispatch.dylib） --> _objc_init(libobjc.A.dylib)

9.找到主程序入口`main`,从`Load Command`读取`LC_MAIN`入口，如果没有，就读取`LC_UNIXTHREAD`


            // find entry point for main executable
            // 找到main入口并执行
            result = (uintptr_t)sMainExecutable->getEntryFromLC_MAIN();
            if ( result != 0 ) {
                // main executable uses LC_MAIN, we need to use helper in libdyld to call into main()
                // main可执行文件使用LC_MAIN，我们需要使用libdyld中的helper函数来调用main()
                if ( (gLibSystemHelpers != NULL) && (gLibSystemHelpers->version >= 9) )
                    *startGlue = (uintptr_t)gLibSystemHelpers->startGlueToCallExit;
                else
                    halt("libdyld.dylib support not present for LC_MAIN");
            }
            else {
                // main executable uses LC_UNIXTHREAD, dyld needs to let "start" in program set up for main()
                //主可执行文件使用LC_UNIXTHREAD, dyld需要让“start”在程序设置为main()
                result = (uintptr_t)sMainExecutable->getEntryFromLC_UNIXTHREAD();
                *startGlue = 0;
            }

十、dyld流程分析
dyld dyld（the dynamic link editor）是苹果的动态链接器，是苹果操作系统一个重要组成...
Objective-C 类的加载原理（上）
上篇文章中分析了dyld整个流程以及dyld与objc的交互。这篇文章将继续分析dyld调用map_images究...
dyld 流程分析
前言在编写一个应用程序时候，我们看到的入口函数都是main.m 里面的 main函数，曾以为这是程序的入口，其实...
dyld流程分析
编译流程在开始分析dyld之前，我们先看下分析下可执行文件的整个编译流程：如上图所示，我们编写的源文件，会在预...
dyld加载流程
dyld加载流程配置环境变量依赖DYLD（dyld）dyld（the dynamic link editor）是...
dyld 调用流程分析
dyld 简介 dyld（the dynamic link editor）是苹果的动态链接器，用来加载所有的库和可...
iOS dyld流程分析
本文的目的主要是分析dyld的加载流程，了解在main函数之前，底层还做了什么引子创建一个project，在V...
类的加载（上）-- _objc_init&read_image
前言上一篇文章主要分析dyld的整个流程以及dyld与_objc_init之间的交互，_objc_init向dy...
iOS dyld与objc的关联
本文的主要目的是理解dyld与objc是如何关联的在上一篇文章iOS dyld流程分析[https://www....
dyld加载流程
dyld加载流程结合最新的DYLD开源库dyld-832.7.3。程序开始于_dyld_start汇编函数--...