fishhook

作者: JerrySi | 来源:发表于2021-12-13 22:07 被阅读0次

    背景

    最近在搞KSCrash,出现了动态库C不能捕获的问题。 官方给的方法是使用fishhook,但是又出现了perform_rebinding_with_section崩溃的问题。 不懂fishhook心里慌啊, 最近完整看了下fishhook。

    源码注释

    #include "fishhook.h"
    
    #include <dlfcn.h>
    #include <stdbool.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/mman.h>
    #include <sys/types.h>
    #include <mach/mach.h>
    #include <mach/vm_map.h>
    #include <mach/vm_region.h>
    #include <mach-o/dyld.h>
    #include <mach-o/loader.h>
    #include <mach-o/nlist.h>
    
    #ifdef __LP64__
    typedef struct mach_header_64 mach_header_t;
    typedef struct segment_command_64 segment_command_t;
    typedef struct section_64 section_t;
    typedef struct nlist_64 nlist_t;
    #define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
    #else
    typedef struct mach_header mach_header_t;
    typedef struct segment_command segment_command_t;
    typedef struct section section_t;
    typedef struct nlist nlist_t;
    #define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
    #endif
    
    #ifndef SEG_DATA_CONST
    #define SEG_DATA_CONST  "__DATA_CONST"
    #endif
    
    struct rebindings_entry {
      struct rebinding *rebindings;
      size_t rebindings_nel;
      struct rebindings_entry *next;
    };
    
    static struct rebindings_entry *_rebindings_head;
    
    static int prepend_rebindings(struct rebindings_entry **rebindings_head,
                                  struct rebinding rebindings[],
                                  size_t nel) {
      struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
      if (!new_entry) {
        return -1;
      }
      new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
      if (!new_entry->rebindings) {
        free(new_entry);
        return -1;
      }
      memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
      new_entry->rebindings_nel = nel;
      new_entry->next = *rebindings_head;
      *rebindings_head = new_entry;
      return 0;
    }
    
    // Symbol Table: 符号表存储了符号信息,Symbol Table的DATA字段是对应的StringTable的偏移值
    // String Table: 二进制中的所有字符串都存储在 String Table 中
    // Dnamic Symbol Table: 仅存储了符号位于Symbol Table中的下标
    
    
    // rebindings:透传下来,需要替换函数的指针结构体
    // sect存放c函数的section
    // slide: 偏移地址ASLR
    // symtab: Symbol Table
    // strtab: 我们最终的需要hook的符号位置
    // indirect_symtab: Dnamic Symbol Table 
    
    // Dynamic Symbol Table符号表里面符号出现的下标和Lazy_symbol_ptr里面出现的是一样的
    // 需要hook的函数在Symbol里面出现的下标是100,那么在Dynamic Symbol Table出现的下标也是100
    // https://www.jianshu.com/p/fa31e76549de
    static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                               section_t *section,
                                               intptr_t slide,
                                               nlist_t *symtab,
                                               char *strtab,
                                               uint32_t *indirect_symtab) {
      // 从indirect_symtab的DATA段开始
      // section->reserved1 就是在indirect_symbol_tab偏移了部分TEXT段
      uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
      // 得到la_symbol_poiniter的地址, 函数起始地址
      void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
    
      // 遍历section
      for (uint i = 0; i < section->size / sizeof(void *); i++) {
        uint32_t symtab_index = indirect_symbol_indices[i];
        if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
            symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
          continue;
        }
        // strtab_offset就是Symbol Table对应的Data段的值
        uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
        // 得到了这个符号的地址, 就是函数名
        char *symbol_name = strtab + strtab_offset;
    
         // string_table中的所有函数名都是以"."开始的,所以一个函数一定有两个字符
        bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
        struct rebindings_entry *cur = rebindings;
        while (cur) {
          for (uint j = 0; j < cur->rebindings_nel; j++) {
            // 比较两个符号是否相同,相同就就hook处理,不相同就不处理
            if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
              kern_return_t err;
    
              // 判断replaced的地址不为NULL以及我方法的实现和rebindings[j].replacement的方法不一致 
              if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement)
               
                // 让rebindings[j].replaced保存indirect_symbol_bindings[i]的函数地址
                *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
    
              /**
               * 1. Moved the vm protection modifying codes to here to reduce the
               *    changing scope.
               * 2. Adding VM_PROT_WRITE mode unconditionally because vm_region
               *    API on some iOS/Mac reports mismatch vm protection attributes.
               **/
              err = vm_protect (mach_task_self (), (uintptr_t)indirect_symbol_bindings, section->size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
              if (err == KERN_SUCCESS) {
                /**
                 * Once we failed to change the vm protection, we
                 * MUST NOT continue the following write actions!
                 * iOS 15 has corrected the const segments prot.
                 **/
    
                // 将替换后的方法给原先的方法,也就是替换内容为自定义函数地址
                indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
              }
              goto symbol_loop;
            }
          }
          // 继续下一个需要绑定的函数
          cur = cur->next;
        }
      symbol_loop:;
      }
    }
    
    static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                         const struct mach_header *header,
                                         intptr_t slide) {
      Dl_info info;
      // 获取某个地址的符号信息
      if (dladdr(header, &info) == 0) {
        return;
      }
    
      // 记录指向当前这个load_commond的首地址
      segment_command_t *cur_seg_cmd;
      // 段虚拟内存的起始地址
      segment_command_t *linkedit_segment = NULL;
    
      // 为了求出需要hook的函数的偏移地址
      struct symtab_command* symtab_cmd = NULL;
      struct dysymtab_command* dysymtab_cmd = NULL;
    
      // machO文件最开始的元素是mach_header
      // sizeof(mach_header_t) 就是移动到load_commond的起始位置
      // mach_header_64有8个变量,每一个变量都是占用4个字节,那么就是占用32个字节
      uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
    
      // 遍历machO里面所有的load_commond元素
      for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
        cur_seg_cmd = (segment_command_t *)cur;
        if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
          if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
            linkedit_segment = cur_seg_cmd;
          }
        } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
          symtab_cmd = (struct symtab_command*)cur_seg_cmd;
        } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
          dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
        }
      }
    
      if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
          !dysymtab_cmd->nindirectsyms) {
        return;
      }
    
      // Find base symbol/string table addresses
      // linkedit_base:MachO的真实内存地址
      // slide + 0x0000000100000000(arm64虚拟内存起始的地址)
      uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
      // symtab_cmd->symoff就是Symbol Table的偏移地址
      nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
      // symtab_cmd->stroff对应的String Table的偏移地址
      // strtab是我们最终的需要hook的符号位置
      char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
    
      // Get indirect symbol table (array of uint32_t indices into symbol table)
      // dysymtab_cmd->indirectsymoff是Dnamic Symbol Table的偏移地址
      uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
    
      cur = (uintptr_t)header + sizeof(mach_header_t);
      // 再次遍历找到SEG_DATA 和 SEG_DATA_CONST
      for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
        cur_seg_cmd = (segment_command_t *)cur;
        if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
          if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
              strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
            continue;
          }
          // 遍历command下面的section
          for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
            // sizeof(segment_command_t) 是72个字节, 一个section_t 占用80个字节
            section_t *sect =
              (section_t *)(cur + sizeof(segment_command_t)) + j;
    
            // 系统的C函数会在lz_symbol_ptr里面
            if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
              // sect存放c函数的section
              // slide: 偏移地址ASLR
              // symtab: Symbol Table
              // strtab: 我们最终的需要hook的符号位置
              // indirect_symtab: Dnamic Symbol Table
              perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
            }
            if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
              perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
            }
          }
        }
      }
    }
    
    static void _rebind_symbols_for_image(const struct mach_header *header,
                                          intptr_t slide) {
        rebind_symbols_for_image(_rebindings_head, header, slide);
    }
    
    // 在知道确定的MachO,可以使用该方法
    int rebind_symbols_image(void *header,
                             intptr_t slide,
                             struct rebinding rebindings[],
                             size_t rebindings_nel) {
        struct rebindings_entry *rebindings_head = NULL;
    
        int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
        rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide);
        if (rebindings_head) {
          free(rebindings_head->rebindings);
        }
        free(rebindings_head);
        return retval;
    }
    
    int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
      //prepend_rebindings的函数会将整个 rebindings 数组添加到 _rebindings_head 这个链表的头部
      //Fishhook采用链表的方式来存储每一次调用rebind_symbols传入的参数,每次调用,就会在链表的头部插入一个节点,链表的头部是:_rebindings_head 
      int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
      if (retval < 0) {
        return retval;
      }
      // If this was the first call, register callback for image additions (which is also invoked for
      // existing images, otherwise, just run on existing images
      if (!_rebindings_head->next) {
        // 第一次调用的话,调用_dyld_register_func_for_add_image注册监听方法.
        // 已经被dyld加载的image会立刻进入回调。
        // 之后的image会在dyld装载的时候触发回调。
        _dyld_register_func_for_add_image(_rebind_symbols_for_image);
      } else {
        uint32_t c = _dyld_image_count();
        for (uint32_t i = 0; i < c; i++) {
          _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
        }
      }
      return retval;
    }
    
    

    相关文章

      网友评论

          本文标题:fishhook

          本文链接:https://www.haomeiwen.com/subject/ffvsfrtx.html