美文网首页
FishHook 源码解读

FishHook 源码解读

作者: 子木易 | 来源:发表于2017-09-05 15:56 被阅读0次

    结合官方的例子和MachOView对源码进行理解

    static int (*orig_close)(int);
    static int (*orig_open)(const char *, int, ...);
    
    int my_close(int fd) {
        printf("Calling real close(%d)\n", fd);
        return orig_close(fd);
    }
    
    int my_open(const char *path, int oflag, ...) {
        va_list ap = {0};
        mode_t mode = 0;
        
        if ((oflag & O_CREAT) != 0) {
            // mode only applies to O_CREAT
            va_start(ap, oflag);
            mode = va_arg(ap, int);
            va_end(ap);
            printf("Calling real open('%s', %d, %d)\n", path, oflag, mode);
            return orig_open(path, oflag, mode);
        } else {
            printf("Calling real open('%s', %d)\n", path, oflag);
            return orig_open(path, oflag, mode);
        }
    }
    
    struct rebinding reb[2];
    reb[0].name = "close";
    reb[0].replacement = my_close;
    reb[0].replaced = (void **)&orig_close;
            
    reb[1].name = "open";
    reb[1].replacement = my_open;
    reb[1].replaced = (void **)&orig_open;
          
    rebind_symbols(reb, 2);
    

    官方的例子是对标准库中open和close中进行hook的,两个结构体,my_close和my_close分别是新写的两个函数。

    进入rebind_symbols

    int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
      int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
      if (retval < 0) {
        return retval;
      }
      // If this was the first call, register callback for image additions (which is also invoked for
      // existing images, otherwise, just run on existing images
      if (!_rebindings_head->next) {   
         _dyld_register_func_for_add_image(_rebind_symbols_for_image);
      } else {
        uint32_t c = _dyld_image_count();
        for (uint32_t i = 0; i < c; i++) {
          _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
        }
      }
      return retval;
    }
    

    再进入prepend_rebindings函数

    struct rebindings_entry {
      struct rebinding *rebindings;
      size_t rebindings_nel;
      struct rebindings_entry *next;
    };
    static int prepend_rebindings(struct rebindings_entry **rebindings_head,
                                  struct rebinding rebindings[],
                                  size_t nel) {
      struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
      if (!new_entry) {
        return -1;
      }
      new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
      if (!new_entry->rebindings) {
        free(new_entry);
        return -1;
      }
      memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
      new_entry->rebindings_nel = nel;
      new_entry->next = *rebindings_head;
      *rebindings_head = new_entry;
      return 0;
    }
    

    就是fishhook会维护一个链表,所有新添加的hook函数会添加到链表头部,如果你hook了同一个函数多次,那么后添加的就会先执行到。

    if (!_rebindings_head->next) {
          _dyld_register_func_for_add_image(_rebind_symbols_for_image);
      }
    

    _dyld_register_func_for_add_image 注册回调,当访问动态链接库函数的时候,链接器会链接这个库,在链接库的时候,会触发回调函数,再回调函数中,会对需要hook的函数进行替换,具体看源码:

    static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                         const struct mach_header *header,
                                         intptr_t slide) {
      Dl_info info;
       
      if (dladdr(header, &info) == 0) {
        return;
      }
    
      segment_command_t *cur_seg_cmd;
      segment_command_t *linkedit_segment = NULL;
      struct symtab_command* symtab_cmd = NULL;
      struct dysymtab_command* dysymtab_cmd = NULL;
    
      uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
      for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
        cur_seg_cmd = (segment_command_t *)cur;
        if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
          if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
            linkedit_segment = cur_seg_cmd;
          }
        } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
          symtab_cmd = (struct symtab_command*)cur_seg_cmd;
        } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
          dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
        }
      }
    
      if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
          !dysymtab_cmd->nindirectsyms) {
        return;
      }
    
      // Find base symbol/string table addresses
      uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
      nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
      char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
    
      // Get indirect symbol table (array of uint32_t indices into symbol table)
      uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
    
      cur = (uintptr_t)header + sizeof(mach_header_t);
      for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
        cur_seg_cmd = (segment_command_t *)cur;
        if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
          if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
              strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
            continue;
          }
          for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
            section_t *sect =
              (section_t *)(cur + sizeof(segment_command_t)) + j;
            if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
              perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
            }
            if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
              perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
            }
          }
        }
      }
    }
    

    接下来会通过三部分对该代码进行分析:
    1、dladdr(header, &info) == 0 会对头进行检测是否合法,这里的头mach-o文件的header部分,先来看下用MachOView查看的可执行文件:

    Mach-O

    分为三部分:
    头部(header structure)
    加载命令(load command)
    段(segment)

    这里的检测就是对header。

    2、获取LC结构体的地址

      segment_command_t *cur_seg_cmd;
      segment_command_t *linkedit_segment = NULL;
      struct symtab_command* symtab_cmd = NULL;
      struct dysymtab_command* dysymtab_cmd = NULL;
    
      uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
      for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
        cur_seg_cmd = (segment_command_t *)cur;
        if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
          if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
            linkedit_segment = cur_seg_cmd;
          }
        } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
          symtab_cmd = (struct symtab_command*)cur_seg_cmd;
        } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
          dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
        }
      }
    

    这里通过for循环获取Load Commands中的
    LC_SEGMENT_64(_LINKEDIT)
    LC_SYMTAB
    LC_DYSYMTAB
    三个结构体在虚拟内存中的地址。

    LC_SEGMENT_64(_LINKEDIT) 包含给动态链接器的原始数据的段,包括符号和字符串表,压缩动态链接信息,以及动态符号表等。
    LC_SYMTAB 符号表。
    LC_DYSYMTAB 表示动态符号表。
    在xcode中进行断点调试和MachOView中的结果进行比较:
    LC_SEGMENT_64(_LINKEDIT)地址:

    linkedit_segment指针 LC_SEGMENT_64(_LINKEDIT)

    LC_SYMTAB 符号表地址:

    symtab_cmd LC_SYMTAB

    LC_DYSYMTAB 动态符号表地址:

    dysymtab_cmd LC_DYSYMTAB

    3、得到延迟加载和非延迟加载的Section

    uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
      nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
      char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
    
      // Get indirect symbol table (array of uint32_t indices into symbol table)
      uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
    
      cur = (uintptr_t)header + sizeof(mach_header_t);
      for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
        cur_seg_cmd = (segment_command_t *)cur;
        if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
          if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
              strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
            continue;
          }
          for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
            section_t *sect =
              (section_t *)(cur + sizeof(segment_command_t)) + j;
            if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
              perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
            }
            if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
              perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
            }
          }
        }
      }
    

    symtab 符号表地址,作用是为了寻找函数名称的字符
    indirect_symtab 动态符号表地址,作用是为了寻找在函数在字符表中的地址。
    strtab 字符标地址,也就是函数名城地址,作用在hook的时候会对字符进行比较。

    通过for循环,可以拿到延迟加载和非延迟加载的Section,通过Section中的偏移量,可以找到在indirect_symtab中的地址。

    非延迟加载的section WeChat_1504596261.jpeg

    得着Section后,调用perform_rebinding_with_section。
    源码如下:

    static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                               section_t *section,
                                               intptr_t slide,
                                               nlist_t *symtab,
                                               char *strtab,
                                               uint32_t *indirect_symtab) {
      uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
      void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
      for (uint i = 0; i < section->size / sizeof(void *); i++) {
        uint32_t symtab_index = indirect_symbol_indices[i];
        if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
            symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
          continue;
        }
        uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
        char *symbol_name = strtab + strtab_offset;
        if (strnlen(symbol_name, 2) < 2) {
          continue;
        }
        struct rebindings_entry *cur = rebindings;
        while (cur) {
          for (uint j = 0; j < cur->rebindings_nel; j++) {
            if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
              if (cur->rebindings[j].replaced != NULL &&
                  indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
                *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
              }
              indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
              goto symbol_loop;
            }
          }
          cur = cur->next;
        }
      symbol_loop:;
      }
    }
    

    uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1; 通过Section偏移量得到在动态符号表中的地址。

    uint32_t symtab_index = indirect_symbol_indices[i]; 得到在符号表中的偏移量。

    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;得到函数符号在字符表中的偏移量,从而得到函数字符symbol_name,通过while循环和链表中的每个节点进行函数名称的比较。

    while (cur) {
          for (uint j = 0; j < cur->rebindings_nel; j++) {
            if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
              if (cur->rebindings[j].replaced != NULL &&
                  indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
                *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
              }
              indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
              goto symbol_loop;
            }
          }
          cur = cur->next;
        }
    

    如果相同的话,会在具体的函数地址indirect_symbol_bindings中进行替换。indirect_symbol_bindings[i] = cur->rebindings[j].replacement;

    *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];将原来的函数保存。

    到此,hook动作就完成了。

    结论:
    初始化时
    myclose的函数地址:
    Printing description of reb[0].replacement:
    (void *) replacement = 0x00000001000019b0

    myopen的函数地址:
    Printing description of reb[1].replacement:
    (void *) replacement = 0x00000001000019f0

    当替换完成后查看虚拟内存地址:

    已经hook完成

    相关文章

      网友评论

          本文标题:FishHook 源码解读

          本文链接:https://www.haomeiwen.com/subject/mfrujxtx.html