美文网首页Android安全-源码分析
彻底弄懂dalvik字节码【二】

彻底弄懂dalvik字节码【二】

作者: difcareer | 来源:发表于2016-10-23 23:28 被阅读233次

    【一】中讲到了最重要的dvmInterpret,继续跟:

    void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
    {
        InterpSaveState interpSaveState;
        ExecutionSubModes savedSubModes;
    
    #if defined(WITH_JIT)
        /* Target-specific save/restore */
        double calleeSave[JIT_CALLEE_SAVE_DOUBLE_COUNT];
        /*
         * If the previous VM left the code cache through single-stepping the
         * inJitCodeCache flag will be set when the VM is re-entered (for example,
         * in self-verification mode we single-step NEW_INSTANCE which may re-enter
         * the VM through findClassFromLoaderNoInit). Because of that, we cannot
         * assert that self->inJitCodeCache is NULL here.
         */
    #endif
    
        /*
         * Save interpreter state from previous activation, linking
         * new to last.
         */
        interpSaveState = self->interpSave;
        self->interpSave.prev = &interpSaveState;
        /*
         * Strip out and save any flags that should not be inherited by
         * nested interpreter activation.
         */
        savedSubModes = (ExecutionSubModes)(
                  self->interpBreak.ctl.subMode & LOCAL_SUBMODE);
        if (savedSubModes != kSubModeNormal) {
            dvmDisableSubMode(self, savedSubModes);
        }
    #if defined(WITH_JIT)
        dvmJitCalleeSave(calleeSave);
    #endif
    
    
    #if defined(WITH_TRACKREF_CHECKS)
        self->interpSave.debugTrackedRefStart =
            dvmReferenceTableEntries(&self->internalLocalRefTable);
    #endif
        self->debugIsMethodEntry = true;
    #if defined(WITH_JIT)
        /* Initialize the state to kJitNot */
        self->jitState = kJitNot;
    #endif
    
        /*
         * Initialize working state.
         *
         * No need to initialize "retval".
         */
        self->interpSave.method = method;
        self->interpSave.curFrame = (u4*) self->interpSave.curFrame;
        self->interpSave.pc = method->insns;
    
        assert(!dvmIsNativeMethod(method));
    
        /*
         * Make sure the class is ready to go.  Shouldn't be possible to get
         * here otherwise.
         */
        if (method->clazz->status < CLASS_INITIALIZING ||
            method->clazz->status == CLASS_ERROR)
        {
            ALOGE("ERROR: tried to execute code in unprepared class '%s' (%d)",
                method->clazz->descriptor, method->clazz->status);
            dvmDumpThread(self, false);
            dvmAbort();
        }
    
        typedef void (*Interpreter)(Thread*);
        Interpreter stdInterp;
        if (gDvm.executionMode == kExecutionModeInterpFast)
            stdInterp = dvmMterpStd;
    #if defined(WITH_JIT)
        else if (gDvm.executionMode == kExecutionModeJit ||
                 gDvm.executionMode == kExecutionModeNcgO0 ||
                 gDvm.executionMode == kExecutionModeNcgO1)
            stdInterp = dvmMterpStd;
    #endif
        else
            stdInterp = dvmInterpretPortable;
    
        // Call the interpreter
        (*stdInterp)(self);
    
        *pResult = self->interpSave.retval;
    
        /* Restore interpreter state from previous activation */
        self->interpSave = interpSaveState;
    #if defined(WITH_JIT)
        dvmJitCalleeRestore(calleeSave);
    #endif
        if (savedSubModes != kSubModeNormal) {
            dvmEnableSubMode(self, savedSubModes);
        }
    }
    

    这个方法中先保存了前一个方法的状态,然后初始化当前方法的状态,比如设置pc指向方法的字节码开始处等。然后调用dvmInterpretPortable开始解释执行,执行完毕后,恢复了前一个方法的状态。

    继续跟dvmInterpretPortable:

    void dvmInterpretPortable(Thread* self)
    {
    #if defined(EASY_GDB)
        StackSaveArea* debugSaveArea = SAVEAREA_FROM_FP(self->interpSave.curFrame);
    #endif
        DvmDex* methodClassDex;     // curMethod->clazz->pDvmDex
        JValue retval;
    
        /* core state */
        const Method* curMethod;    // method we're interpreting
        const u2* pc;               // program counter
        u4* fp;                     // frame pointer
        u2 inst;                    // current instruction
        /* instruction decoding */
        u4 ref;                     // 16 or 32-bit quantity fetched directly
        u2 vsrc1, vsrc2, vdst;      // usually used for register indexes
        /* method call setup */
        const Method* methodToCall;
        bool methodCallRange;
    
        /* static computed goto table */
        DEFINE_GOTO_TABLE(handlerTable);
    
        /* copy state in */
        curMethod = self->interpSave.method;
        pc = self->interpSave.pc;
        fp = self->interpSave.curFrame;
        retval = self->interpSave.retval;   /* only need for kInterpEntryReturn? */
    
        methodClassDex = curMethod->clazz->pDvmDex;
    
        LOGVV("threadid=%d: %s.%s pc=%#x fp=%p",
            self->threadId, curMethod->clazz->descriptor, curMethod->name,
            pc - curMethod->insns, fp);
    
        /*
         * Handle any ongoing profiling and prep for debugging.
         */
        if (self->interpBreak.ctl.subMode != 0) {
            TRACE_METHOD_ENTER(self, curMethod);
            self->debugIsMethodEntry = true;   // Always true on startup
        }
        /*
         * DEBUG: scramble this to ensure we're not relying on it.
         */
        methodToCall = (const Method*) -1;
    
    #if 0
        if (self->debugIsMethodEntry) {
            ILOGD("|-- Now interpreting %s.%s", curMethod->clazz->descriptor,
                    curMethod->name);
            DUMP_REGS(curMethod, self->interpSave.curFrame, false);
        }
    #endif
    
        FINISH(0);                  /* fetch and execute first instruction */
    
    /*--- start of opcodes ---*/
    
    

    细心的朋友在阅读源码的时候,可能会发现这个方法的方法体括号居然没有闭合,这是有原因的,因为这里面有很多的宏定义,宏定义展开后,才是完整的方法体。

    我们可以看到,这个方法中,直接从之前分配的栈帧中获取各类信息,比如当前执行的method等,同时申明了若干变量:pc、fp、inst等,这些变量在后面分析的宏中被直接赋值和使用,所以在后面分析宏的时候,留意这些变量。

    第一个宏DEFINE_GOTO_TABLE:

    #define DEFINE_GOTO_TABLE(_name) \
        static const void* _name[kNumPackedOpcodes] = {                      \
            /* BEGIN(libdex-goto-table); GENERATED AUTOMATICALLY BY opcode-gen */ \
            H(OP_NOP),                                                            \
            H(OP_MOVE),                                                           \
            H(OP_MOVE_FROM16),                                                    \
            H(OP_MOVE_16),                                                        \
            H(OP_MOVE_WIDE),                                                      \
            H(OP_MOVE_WIDE_FROM16),                                               \
            H(OP_MOVE_WIDE_16),                                                   \
            H(OP_MOVE_OBJECT),                                                    \
            H(OP_MOVE_OBJECT_FROM16),                                             \
            H(OP_MOVE_OBJECT_16),                                                 \
            H(OP_MOVE_RESULT),                                                    \
            H(OP_MOVE_RESULT_WIDE),                                               \
            H(OP_MOVE_RESULT_OBJECT),                                             \
            H(OP_MOVE_EXCEPTION),                                                 \
            H(OP_RETURN_VOID),                                                    \
            H(OP_RETURN),                                                         \
            H(OP_RETURN_WIDE),                                                    \
            H(OP_RETURN_OBJECT),                                                  \
            H(OP_CONST_4),                                                        \
            H(OP_CONST_16),                                                       \
            H(OP_CONST),                                                          \
            H(OP_CONST_HIGH16),                                                   \
            H(OP_CONST_WIDE_16),                                                  \
            H(OP_CONST_WIDE_32),                                                  \
            H(OP_CONST_WIDE),                                                     \
            H(OP_CONST_WIDE_HIGH16),                                              \
            H(OP_CONST_STRING),                                                   \
            H(OP_CONST_STRING_JUMBO),                                             \
            H(OP_CONST_CLASS),                                                    \
            H(OP_MONITOR_ENTER),                                                  \
            H(OP_MONITOR_EXIT),                                                   \
            H(OP_CHECK_CAST),                                                     \
            H(OP_INSTANCE_OF),                                                    \
            H(OP_ARRAY_LENGTH),                                                   \
            H(OP_NEW_INSTANCE),                                                   \
            H(OP_NEW_ARRAY),                                                      \
            H(OP_FILLED_NEW_ARRAY),                                               \
            H(OP_FILLED_NEW_ARRAY_RANGE),                                         \
            H(OP_FILL_ARRAY_DATA),                                                \
            H(OP_THROW),                                                          \
            H(OP_GOTO),                                                           \
            H(OP_GOTO_16),                                                        \
            H(OP_GOTO_32),                                                        \
            H(OP_PACKED_SWITCH),                                                  \
            H(OP_SPARSE_SWITCH),                                                  \
            H(OP_CMPL_FLOAT),                                                     \
            H(OP_CMPG_FLOAT),                                                     \
            H(OP_CMPL_DOUBLE),                                                    \
            H(OP_CMPG_DOUBLE),                                                    \
            H(OP_CMP_LONG),                                                       \
            H(OP_IF_EQ),                                                          \
            H(OP_IF_NE),                                                          \
            H(OP_IF_LT),                                                          \
            H(OP_IF_GE),                                                          \
            H(OP_IF_GT),                                                          \
            H(OP_IF_LE),                                                          \
            H(OP_IF_EQZ),                                                         \
            H(OP_IF_NEZ),                                                         \
            H(OP_IF_LTZ),                                                         \
            H(OP_IF_GEZ),                                                         \
            H(OP_IF_GTZ),                                                         \
            H(OP_IF_LEZ),                                                         \
            H(OP_UNUSED_3E),                                                      \
            H(OP_UNUSED_3F),                                                      \
            H(OP_UNUSED_40),                                                      \
            H(OP_UNUSED_41),                                                      \
            H(OP_UNUSED_42),                                                      \
            H(OP_UNUSED_43),                                                      \
            H(OP_AGET),                                                           \
            H(OP_AGET_WIDE),                                                      \
            H(OP_AGET_OBJECT),                                                    \
            H(OP_AGET_BOOLEAN),                                                   \
            H(OP_AGET_BYTE),                                                      \
            H(OP_AGET_CHAR),                                                      \
            H(OP_AGET_SHORT),                                                     \
            H(OP_APUT),                                                           \
            H(OP_APUT_WIDE),                                                      \
            H(OP_APUT_OBJECT),                                                    \
            H(OP_APUT_BOOLEAN),                                                   \
            H(OP_APUT_BYTE),                                                      \
            H(OP_APUT_CHAR),                                                      \
            H(OP_APUT_SHORT),                                                     \
            H(OP_IGET),                                                           \
            H(OP_IGET_WIDE),                                                      \
            H(OP_IGET_OBJECT),                                                    \
            H(OP_IGET_BOOLEAN),                                                   \
            H(OP_IGET_BYTE),                                                      \
            H(OP_IGET_CHAR),                                                      \
            H(OP_IGET_SHORT),                                                     \
            H(OP_IPUT),                                                           \
            H(OP_IPUT_WIDE),                                                      \
            H(OP_IPUT_OBJECT),                                                    \
            H(OP_IPUT_BOOLEAN),                                                   \
            H(OP_IPUT_BYTE),                                                      \
            H(OP_IPUT_CHAR),                                                      \
            H(OP_IPUT_SHORT),                                                     \
            H(OP_SGET),                                                           \
            H(OP_SGET_WIDE),                                                      \
            H(OP_SGET_OBJECT),                                                    \
            H(OP_SGET_BOOLEAN),                                                   \
            H(OP_SGET_BYTE),                                                      \
            H(OP_SGET_CHAR),                                                      \
            H(OP_SGET_SHORT),                                                     \
            H(OP_SPUT),                                                           \
            H(OP_SPUT_WIDE),                                                      \
            H(OP_SPUT_OBJECT),                                                    \
            H(OP_SPUT_BOOLEAN),                                                   \
            H(OP_SPUT_BYTE),                                                      \
            H(OP_SPUT_CHAR),                                                      \
            H(OP_SPUT_SHORT),                                                     \
            H(OP_INVOKE_VIRTUAL),                                                 \
            H(OP_INVOKE_SUPER),                                                   \
            H(OP_INVOKE_DIRECT),                                                  \
            H(OP_INVOKE_STATIC),                                                  \
            H(OP_INVOKE_INTERFACE),                                               \
            H(OP_UNUSED_73),                                                      \
            H(OP_INVOKE_VIRTUAL_RANGE),                                           \
            H(OP_INVOKE_SUPER_RANGE),                                             \
            H(OP_INVOKE_DIRECT_RANGE),                                            \
            H(OP_INVOKE_STATIC_RANGE),                                            \
            H(OP_INVOKE_INTERFACE_RANGE),                                         \
            H(OP_UNUSED_79),                                                      \
            H(OP_UNUSED_7A),                                                      \
            H(OP_NEG_INT),                                                        \
            H(OP_NOT_INT),                                                        \
            H(OP_NEG_LONG),                                                       \
            H(OP_NOT_LONG),                                                       \
            H(OP_NEG_FLOAT),                                                      \
            H(OP_NEG_DOUBLE),                                                     \
            H(OP_INT_TO_LONG),                                                    \
            H(OP_INT_TO_FLOAT),                                                   \
            H(OP_INT_TO_DOUBLE),                                                  \
            H(OP_LONG_TO_INT),                                                    \
            H(OP_LONG_TO_FLOAT),                                                  \
            H(OP_LONG_TO_DOUBLE),                                                 \
            H(OP_FLOAT_TO_INT),                                                   \
            H(OP_FLOAT_TO_LONG),                                                  \
            H(OP_FLOAT_TO_DOUBLE),                                                \
            H(OP_DOUBLE_TO_INT),                                                  \
            H(OP_DOUBLE_TO_LONG),                                                 \
            H(OP_DOUBLE_TO_FLOAT),                                                \
            H(OP_INT_TO_BYTE),                                                    \
            H(OP_INT_TO_CHAR),                                                    \
            H(OP_INT_TO_SHORT),                                                   \
            H(OP_ADD_INT),                                                        \
            H(OP_SUB_INT),                                                        \
            H(OP_MUL_INT),                                                        \
            H(OP_DIV_INT),                                                        \
            H(OP_REM_INT),                                                        \
            H(OP_AND_INT),                                                        \
            H(OP_OR_INT),                                                         \
            H(OP_XOR_INT),                                                        \
            H(OP_SHL_INT),                                                        \
            H(OP_SHR_INT),                                                        \
            H(OP_USHR_INT),                                                       \
            H(OP_ADD_LONG),                                                       \
            H(OP_SUB_LONG),                                                       \
            H(OP_MUL_LONG),                                                       \
            H(OP_DIV_LONG),                                                       \
            H(OP_REM_LONG),                                                       \
            H(OP_AND_LONG),                                                       \
            H(OP_OR_LONG),                                                        \
            H(OP_XOR_LONG),                                                       \
            H(OP_SHL_LONG),                                                       \
            H(OP_SHR_LONG),                                                       \
            H(OP_USHR_LONG),                                                      \
            H(OP_ADD_FLOAT),                                                      \
            H(OP_SUB_FLOAT),                                                      \
            H(OP_MUL_FLOAT),                                                      \
            H(OP_DIV_FLOAT),                                                      \
            H(OP_REM_FLOAT),                                                      \
            H(OP_ADD_DOUBLE),                                                     \
            H(OP_SUB_DOUBLE),                                                     \
            H(OP_MUL_DOUBLE),                                                     \
            H(OP_DIV_DOUBLE),                                                     \
            H(OP_REM_DOUBLE),                                                     \
            H(OP_ADD_INT_2ADDR),                                                  \
            H(OP_SUB_INT_2ADDR),                                                  \
            H(OP_MUL_INT_2ADDR),                                                  \
            H(OP_DIV_INT_2ADDR),                                                  \
            H(OP_REM_INT_2ADDR),                                                  \
            H(OP_AND_INT_2ADDR),                                                  \
            H(OP_OR_INT_2ADDR),                                                   \
            H(OP_XOR_INT_2ADDR),                                                  \
            H(OP_SHL_INT_2ADDR),                                                  \
            H(OP_SHR_INT_2ADDR),                                                  \
            H(OP_USHR_INT_2ADDR),                                                 \
            H(OP_ADD_LONG_2ADDR),                                                 \
            H(OP_SUB_LONG_2ADDR),                                                 \
            H(OP_MUL_LONG_2ADDR),                                                 \
            H(OP_DIV_LONG_2ADDR),                                                 \
            H(OP_REM_LONG_2ADDR),                                                 \
            H(OP_AND_LONG_2ADDR),                                                 \
            H(OP_OR_LONG_2ADDR),                                                  \
            H(OP_XOR_LONG_2ADDR),                                                 \
            H(OP_SHL_LONG_2ADDR),                                                 \
            H(OP_SHR_LONG_2ADDR),                                                 \
            H(OP_USHR_LONG_2ADDR),                                                \
            H(OP_ADD_FLOAT_2ADDR),                                                \
            H(OP_SUB_FLOAT_2ADDR),                                                \
            H(OP_MUL_FLOAT_2ADDR),                                                \
            H(OP_DIV_FLOAT_2ADDR),                                                \
            H(OP_REM_FLOAT_2ADDR),                                                \
            H(OP_ADD_DOUBLE_2ADDR),                                               \
            H(OP_SUB_DOUBLE_2ADDR),                                               \
            H(OP_MUL_DOUBLE_2ADDR),                                               \
            H(OP_DIV_DOUBLE_2ADDR),                                               \
            H(OP_REM_DOUBLE_2ADDR),                                               \
            H(OP_ADD_INT_LIT16),                                                  \
            H(OP_RSUB_INT),                                                       \
            H(OP_MUL_INT_LIT16),                                                  \
            H(OP_DIV_INT_LIT16),                                                  \
            H(OP_REM_INT_LIT16),                                                  \
            H(OP_AND_INT_LIT16),                                                  \
            H(OP_OR_INT_LIT16),                                                   \
            H(OP_XOR_INT_LIT16),                                                  \
            H(OP_ADD_INT_LIT8),                                                   \
            H(OP_RSUB_INT_LIT8),                                                  \
            H(OP_MUL_INT_LIT8),                                                   \
            H(OP_DIV_INT_LIT8),                                                   \
            H(OP_REM_INT_LIT8),                                                   \
            H(OP_AND_INT_LIT8),                                                   \
            H(OP_OR_INT_LIT8),                                                    \
            H(OP_XOR_INT_LIT8),                                                   \
            H(OP_SHL_INT_LIT8),                                                   \
            H(OP_SHR_INT_LIT8),                                                   \
            H(OP_USHR_INT_LIT8),                                                  \
            H(OP_IGET_VOLATILE),                                                  \
            H(OP_IPUT_VOLATILE),                                                  \
            H(OP_SGET_VOLATILE),                                                  \
            H(OP_SPUT_VOLATILE),                                                  \
            H(OP_IGET_OBJECT_VOLATILE),                                           \
            H(OP_IGET_WIDE_VOLATILE),                                             \
            H(OP_IPUT_WIDE_VOLATILE),                                             \
            H(OP_SGET_WIDE_VOLATILE),                                             \
            H(OP_SPUT_WIDE_VOLATILE),                                             \
            H(OP_BREAKPOINT),                                                     \
            H(OP_THROW_VERIFICATION_ERROR),                                       \
            H(OP_EXECUTE_INLINE),                                                 \
            H(OP_EXECUTE_INLINE_RANGE),                                           \
            H(OP_INVOKE_OBJECT_INIT_RANGE),                                       \
            H(OP_RETURN_VOID_BARRIER),                                            \
            H(OP_IGET_QUICK),                                                     \
            H(OP_IGET_WIDE_QUICK),                                                \
            H(OP_IGET_OBJECT_QUICK),                                              \
            H(OP_IPUT_QUICK),                                                     \
            H(OP_IPUT_WIDE_QUICK),                                                \
            H(OP_IPUT_OBJECT_QUICK),                                              \
            H(OP_INVOKE_VIRTUAL_QUICK),                                           \
            H(OP_INVOKE_VIRTUAL_QUICK_RANGE),                                     \
            H(OP_INVOKE_SUPER_QUICK),                                             \
            H(OP_INVOKE_SUPER_QUICK_RANGE),                                       \
            H(OP_IPUT_OBJECT_VOLATILE),                                           \
            H(OP_SGET_OBJECT_VOLATILE),                                           \
            H(OP_SPUT_OBJECT_VOLATILE),                                           \
            H(OP_UNUSED_FF),                                                      \
            /* END(libdex-goto-table) */                                          \
        };
    

    这个宏展开了就是定义了一个指针数组handlerTable,共256项,每一项对应dalvik的一个操作码。
    这个指针数组是在dvmInterpretPortable被展开的,也就是说是局部变量,指令的跳转,就是在这张表中跳转,与传统的方法调用相比,省去了方法调用的栈构造,执行效率得到提升。但是这对编码的要求就很高,其中用到大量的宏就可以看出他们的深厚功底。

    继续分析宏H:

    # define H(_op)             &&op_##_op
    

    其中&&表示间接引用,##表示字符串拼接。比如说H(OP_NOP)展开就是:&&op_OP_NOP,也就是对op_OP_NOP的间接引用(指针)。

    op_OP_NOP又是通过另外一个宏HANDLE_OPCODE来定义的:

    # define HANDLE_OPCODE(_op) op_##_op:
    

    . HANDLE_OPCODE(OP_NOP)展开就是:op_OP_NOP:
    注意最后的冒号,这表示它其实是一个位置标签。

    所以handlerTable就是若干地址标签的引用数组。

    回到dvmInterpretPortable,继续分析宏FINISH

    # define FINISH(_offset) {                                                  \
            ADJUST_PC(_offset);                                                 \
            inst = FETCH(0);                                                    \
            if (self->interpBreak.ctl.subMode) {                                \
                dvmCheckBefore(pc, fp, self);                                   \
            }                                                                   \
            goto *handlerTable[INST_INST(inst)];                                \
        }
    # define FINISH_BKPT(_opcode) {                                             \
            goto *handlerTable[_opcode];                                        \
        }
    
    #define OP_END
    

    其中的宏ADJUST_PC:

    #ifdef CHECK_BRANCH_OFFSETS
    # define ADJUST_PC(_offset) do {                                            \
            int myoff = _offset;        /* deref only once */                   \
            if (pc + myoff < curMethod->insns ||                                \
                pc + myoff >= curMethod->insns + dvmGetMethodInsnsSize(curMethod)) \
            {                                                                   \
                char* desc;                                                     \
                desc = dexProtoCopyMethodDescriptor(&curMethod->prototype);     \
                ALOGE("Invalid branch %d at 0x%04x in %s.%s %s",                 \
                    myoff, (int) (pc - curMethod->insns),                       \
                    curMethod->clazz->descriptor, curMethod->name, desc);       \
                free(desc);                                                     \
                dvmAbort();                                                     \
            }                                                                   \
            pc += myoff;                                                        \
            EXPORT_EXTRA_PC();                                                  \
        } while (false)
    #else
    # define ADJUST_PC(_offset) do {                                            \
            pc += _offset;                                                      \
            EXPORT_EXTRA_PC();                                                  \
        } while (false)
    #endif
    

    其实就是将pc调整_offset个偏移量。

    接下来就是宏FETCH:

    #define FETCH(_offset)     (pc[(_offset)])
    

    inst = FETCH(0);就是从pc的0偏移处开始取指令(两个字节,前面的申明: u2 inst)存放到inst中。

    然后通过宏INST_INST,得到该指令在handlerTable中的索引:

    #define INST_INST(_inst)    ((_inst) & 0xff)
    

    也就是说是低字节是操作码的索引号。当获取到索引号之后,就通过handlerTable跳转到对应的代码处开始执行。

    前面我们知道,通过宏HANDLE_OPCODE对标签进行定义,在dalvik/vm/mterp/c目录下,对每一个操作码都有个文件,里面对应就是其HANDLE_OPCODE标签的定义,也就是其实现细节:

    我们以OP_NOP为例分析一下:

    HANDLE_OPCODE(OP_NOP)
        FINISH(1);
    OP_END
    

    其逻辑就是啥也没干,继续读取下一条指令FINISH(1)执行。

    ok,先到这里,下一篇以一个实际的例子来说明具体的解析过程。

    相关文章

      网友评论

        本文标题:彻底弄懂dalvik字节码【二】

        本文链接:https://www.haomeiwen.com/subject/cibkuttx.html