Facebook Redex分析(一)

作者: LSteven | 来源:发表于2020-04-03 16:58 被阅读0次

    拖了好久,想开始分析下redex系列

    redex_frontend

    这一part会分析redex_frontend,redex_frontend是在开始各个pass之前做的预处理,包括加载classes.dex, 收集类,分析混淆关系,分析代码可达性。

    DexStore root_store("classes");
    // Only set dex magic to root DexStore since all dex magic
    // should be consistent within one APK.
    root_store.set_dex_magic(get_dex_magic(args.dex_files));
    

    这里get_dex_magic其实就会去加载dex文件,然后从dex头部获取magic。

    loadClass

    dex_stats_t dex_stats;
    DexClasses classes =
        load_classes_from_dex(filename.c_str(), &dex_stats);
    input_totals += dex_stats;
    input_dexes_stats.push_back(dex_stats);
    stores[0].add_classes(std::move(classes));
    

    找到文件里的dex_files开始遍历加载类

    DexLoader

    DexClasses DexLoader::load_dex(const char* location,
                                   dex_stats_t* stats,
                                   bool support_dex_v37) {
      const dex_header* dh = get_dex_header(location);
      validate_dex_header(dh, m_file.size(), support_dex_v37);
      return load_dex(dh, stats);
    }
    
    DexClasses DexLoader::load_dex(const dex_header* dh, dex_stats_t* stats) {
      if (dh->class_defs_size == 0) {
        return DexClasses(0);
      }
      m_idx = new DexIdx(dh);
      auto off = (uint64_t)dh->class_defs_off;
      m_class_defs =
          reinterpret_cast<const dex_class_def*>((const uint8_t*)dh + off);
      DexClasses classes(dh->class_defs_size);
      m_classes = &classes;
    
      auto lwork = new class_load_work[dh->class_defs_size];
      for(work in class_load_work):
        clw->dl->load_dex_class(clw->num); //这里简化了一下
      ...
      gather_input_stats(stats, dh);
    
      // Remove nulls from the classes list. They may have been introduced by benign
      // duplicate classes.
      classes.erase(std::remove(classes.begin(), classes.end(), nullptr),
                    classes.end());
    
      return classes;
    }
    

    每个DexClass都有自己的dex_class_def结构,以数组形式index作为偏移量

    Dexclass Create

    开始为每个类构建DexClass

    DexClass* DexClass::create(DexIdx* idx,
                               const dex_class_def* cdef,
                               const std::string& location) {
      DexClass* cls = new DexClass(idx, cdef, location);
      if (g_redex->class_already_loaded(cls)) {
        // FIXME: This isn't deterministic. We're keeping whichever class we loaded
        // first, which may not always be from the same dex (if we load them in
        // parallel, for example).
        delete cls;
        return nullptr;
      }
      cls->load_class_annotations(idx, cdef->annotations_off);
      auto deva = std::unique_ptr<DexEncodedValueArray>(
          load_static_values(idx, cdef->static_values_off));
      cls->load_class_data_item(idx, cdef->class_data_offset, deva.get());
      g_redex->publish_class(cls);
      return cls;
    }
    

    分为三块:

    load_class_annotation

    代码看着很复杂,就是就是分别找到class&field&method绑定的annotation以及方法内部参数对应的annotation

    void DexClass::load_class_annotations(DexIdx* idx, uint32_t anno_off) {
      if (anno_off == 0) return;
      const dex_annotations_directory_item* annodir =
          (const dex_annotations_directory_item*)idx->get_uint_data(anno_off);
      m_anno =
          DexAnnotationSet::get_annotation_set(idx, annodir->class_annotations_off);
      const uint32_t* annodata = (uint32_t*)(annodir + 1);
      for (uint32_t i = 0; i < annodir->fields_size; i++) {
        uint32_t fidx = *annodata++;
        uint32_t off = *annodata++;
        DexField* field = static_cast<DexField*>(idx->get_fieldidx(fidx));
        DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
        field->attach_annotation_set(aset);
      }
      for (uint32_t i = 0; i < annodir->methods_size; i++) {
        uint32_t midx = *annodata++;
        uint32_t off = *annodata++;
        DexMethod* method = static_cast<DexMethod*>(idx->get_methodidx(midx));
        DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
        method->attach_annotation_set(aset);
      }
      for (uint32_t i = 0; i < annodir->parameters_size; i++) {
        uint32_t midx = *annodata++;
        uint32_t xrefoff = *annodata++;
        if (xrefoff != 0) {
          DexMethod* method = static_cast<DexMethod*>(idx->get_methodidx(midx));
          const uint32_t* annoxref = idx->get_uint_data(xrefoff);
          uint32_t count = *annoxref++;
          for (uint32_t j = 0; j < count; j++) {
            uint32_t off = annoxref[j];
            DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
            if (aset != nullptr) {
              method->attach_param_annotation_set(j, aset);
              redex_assert(method->get_param_anno());
            }
          }
        }
      }
    }
    
    
    load_static_value
    DexEncodedValueArray* get_encoded_value_array(DexIdx* idx,
                                                  const uint8_t*& encdata) {
      uint32_t size = read_uleb128(&encdata);
      auto* evlist = new std::deque<DexEncodedValue*>();
      for (uint32_t i = 0; i < size; i++) {
        DexEncodedValue* adev = DexEncodedValue::get_encoded_value(idx, encdata);
        evlist->push_back(adev);
      }
      return new DexEncodedValueArray(evlist);
    }
    

    获取静态变量背后的值,比如static int a = 1,那就是1,后面会把这个1和变量a绑定在一起

    load_class_data_item

    重点函数,拿到sfield&ifield&dmethod&vmethod的个数,然后加载进来,构造出DexField&DexMethod

    
    void DexClass::load_class_data_item(DexIdx* idx,
                                        uint32_t cdi_off,
                                        DexEncodedValueArray* svalues) {
      if (cdi_off == 0) return;
      const uint8_t* encd = idx->get_uleb_data(cdi_off);
      uint32_t sfield_count = read_uleb128(&encd);
      uint32_t ifield_count = read_uleb128(&encd);
      uint32_t dmethod_count = read_uleb128(&encd);
      uint32_t vmethod_count = read_uleb128(&encd);
      uint32_t ndex = 0;
      for (uint32_t i = 0; i < sfield_count; i++) { //获取静态fields
        ndex += read_uleb128(&encd); //获取field index
        auto access_flags = (DexAccessFlags)read_uleb128(&encd); //获取access_flags
        DexField* df = static_cast<DexField*>(idx->get_fieldidx(ndex));
        DexEncodedValue* ev = nullptr; //static field 对应的value值
        if (svalues != nullptr) {
          ev = svalues->pop_next();
        }
        df->make_concrete(access_flags, ev);
        m_sfields.push_back(df);
      }
      ndex = 0;
      for (uint32_t i = 0; i < ifield_count; i++) {  
        ndex += read_uleb128(&encd);
        auto access_flags = (DexAccessFlags)read_uleb128(&encd);
        DexField* df = static_cast<DexField*>(idx->get_fieldidx(ndex)); //从index获取dexField,一个dexField由三部分组成,class的DexType,type//对应的Dextype,name对应的dexString
        df->make_concrete(access_flags);
        m_ifields.push_back(df);
      }
    
      std::unordered_set<DexMethod*> method_pointer_cache;
    
      ndex = 0;
      for (uint32_t i = 0; i < dmethod_count; i++) {
        ndex += read_uleb128(&encd);
        auto access_flags = (DexAccessFlags)read_uleb128(&encd);
        uint32_t code_off = read_uleb128(&encd);
        // Find method in method index, returns same pointer for same method.
        DexMethod* dm = static_cast<DexMethod*>(idx->get_methodidx(ndex));//class DexType, proto DexProto, name DexString
        std::unique_ptr<DexCode> dc = DexCode::get_dex_code(idx, code_off);
        if (dc && dc->get_debug_item()) {
          dc->get_debug_item()->bind_positions(dm, m_source_file);
        }
        dm->make_concrete(access_flags, std::move(dc), false);
    
        assert_or_throw(
            method_pointer_cache.count(dm) == 0, RedexError::DUPLICATE_METHODS,
            "Found duplicate methods in the same class.", {{"method", SHOW(dm)}});
    
        method_pointer_cache.insert(dm);
        m_dmethods.push_back(dm);
      }
      ndex = 0;
      for (uint32_t i = 0; i < vmethod_count; i++) {
        ndex += read_uleb128(&encd);
        auto access_flags = (DexAccessFlags)read_uleb128(&encd);
        uint32_t code_off = read_uleb128(&encd);
        // Find method in method index, returns same pointer for same method.
        DexMethod* dm = static_cast<DexMethod*>(idx->get_methodidx(ndex));
        auto dc = DexCode::get_dex_code(idx, code_off);
        if (dc && dc->get_debug_item()) {
          dc->get_debug_item()->bind_positions(dm, m_source_file);
        }
        dm->make_concrete(access_flags, std::move(dc), true);
    
        assert_or_throw(
            method_pointer_cache.count(dm) == 0, RedexError::DUPLICATE_METHODS,
            "Found duplicate methods in the same class.", {{"method", SHOW(dm)}});
    
        method_pointer_cache.insert(dm);
        m_vmethods.push_back(dm);
      }
    }
    

    我们以DexField为例:

    ndex += read_uleb128(&encd);
    auto access_flags = (DexAccessFlags)read_uleb128(&encd);
    DexField* df = static_cast<DexField*>(idx->get_fieldidx(ndex));
      DexEncodedValue* ev = nullptr;
      if (svalues != nullptr) {
          ev = svalues->pop_next();
      }
      df->make_concrete(access_flags, ev);
    m_sfields.push_back(df);
    

    首先获得该fieldindex,然后读取出accessFlag, 然后根据idx构造DexField:

    DexFieldRef* DexIdx::get_fieldidx_fromdex(uint32_t fidx) {
      redex_assert(fidx < m_field_ids_size);
      DexType* container = get_typeidx(m_field_ids[fidx].classidx);
      DexType* ftype = get_typeidx(m_field_ids[fidx].typeidx);
      DexString* name = get_stringidx(m_field_ids[fidx].nameidx);
      return DexField::make_field(container, name, ftype);
    }
    

    其实就是把field所出去类的类型,field的类型和名字找到拼起来。

    get_dex_code 组织dexcode

    构建DexMethod时比变量多一步dexcode,会将这个dexCodedexMethod绑定。

    std::unique_ptr<DexCode> DexCode::get_dex_code(DexIdx* idx, uint32_t offset) {
      if (offset == 0) return std::unique_ptr<DexCode>();
      const dex_code_item* code = (const dex_code_item*)idx->get_uint_data(offset);
      std::unique_ptr<DexCode> dc(new DexCode());
      dc->m_registers_size = code->registers_size;
      dc->m_ins_size = code->ins_size;
      dc->m_outs_size = code->outs_size;
      dc->m_insns.reset(new std::vector<DexInstruction*>());
      const uint16_t* cdata = (const uint16_t*)(code + 1);
      uint32_t tries = code->tries_size;
      if (code->insns_size) { //收集所有指令 构成instruction
        const uint16_t* end = cdata + code->insns_size;
        while (cdata < end) {
          DexInstruction* dop = DexInstruction::make_instruction(idx, &cdata);
          always_assert_log(dop != nullptr,
                            "Failed to parse method at offset 0x%08x", offset);
          dc->m_insns->push_back(dop);
        }
        /*
         * Padding, see dex-spec.
         * Per my memory, there are dex-files where the padding is
         * implemented not according to spec.  Just FYI in case
         * something weird happens in the future.
         */
        if (code->insns_size & 1 && tries) cdata++;
      }
    
      if (tries) { //try catch结构。。不具体分析了
        const dex_tries_item* dti = (const dex_tries_item*)cdata;
        const uint8_t* handlers = (const uint8_t*)(dti + tries);
        for (uint32_t i = 0; i < tries; i++) {
          DexTryItem* dextry = new DexTryItem(dti[i].start_addr, dti[i].insn_count);
          const uint8_t* handler = handlers + dti[i].handler_off;
          int32_t count = read_sleb128(&handler);
          bool has_catchall = false;
          if (count <= 0) {
            count = -count;
            has_catchall = true;
          }
          while (count--) {
            uint32_t tidx = read_uleb128(&handler);
            uint32_t hoff = read_uleb128(&handler);
            DexType* dt = idx->get_typeidx(tidx);
            dextry->m_catches.push_back(std::make_pair(dt, hoff));
          }
          if (has_catchall) {
            auto hoff = read_uleb128(&handler);
            dextry->m_catches.push_back(std::make_pair(nullptr, hoff));
          }
          dc->m_tries.emplace_back(dextry);
        }
      }
      dc->m_dbg = DexDebugItem::get_dex_debug(idx, code->debug_info_off); //留到后面debug的时候分析
      return dc;
    }
    

    debug信息

    这里处理dexFile内的调试信息,后面分析

    if (dc && dc->get_debug_item()) {
      dc->get_debug_item()->bind_positions(dm, m_source_file);
    }
    

    解混淆

    这里处理混淆关系,后面分析

    for (const auto& pg_config_path : args.proguard_config_paths) {
        Timer time_pg_parsing("Parsed ProGuard config file");
        keep_rules::proguard_parser::parse_file(pg_config_path, &pg_config);
    }
    keep_rules::proguard_parser::remove_blacklisted_rules(&pg_config);
    
    for (auto& store : stores) {
      apply_deobfuscated_names(store.get_dexen(), conf.get_proguard_map());
    }
    

    build_class_scope

    很简单,收集DexClasses而已

    DexStoreClassesIterator it(stores);
    Scope scope = build_class_scope(it);
    

    oprimization

    这里还是和混淆有关,后面分析

    {
        Timer t("Processing proguard rules");
    
        bool keep_all_annotation_classes;
        json_config.get("keep_all_annotation_classes", true,
                        keep_all_annotation_classes);
        process_proguard_rules(conf.get_proguard_map(), scope, external_classes,
                               pg_config, keep_all_annotation_classes);
      }
      {
        Timer t("No Optimizations Rules");
        // this will change rstate of methods
        keep_rules::process_no_optimizations_rules(
            conf.get_no_optimizations_annos(), scope);
        monitor_count::mark_sketchy_methods_with_no_optimize(scope);
      }
    

    分析可达性

    分析反射&代码调用中可达的类,后面分析

    /*
     * Initializes list of classes that are reachable via reflection, and calls
     * or from code.
     *
     * These include:
     *  - Classes used in the manifest (e.g. activities, services, etc)
     *  - View or Fragment classes used in layouts
     *  - Classes that are in certain packages (specified in the reflected_packages
     *    section of the config) and classes that extend from them
     *  - Classes marked with special annotations (keep_annotations in config)
     *  - Classes reachable from native libraries
     */
    
      {
        Timer t("Initializing reachable classes");
        // init reachable will change rstate of classes, methods and fields
        init_reachable_classes(scope, json_config,
                               conf.get_no_optimizations_annos());
      }
    

    相关文章

      网友评论

        本文标题:Facebook Redex分析(一)

        本文链接:https://www.haomeiwen.com/subject/allkphtx.html