美文网首页
第二章 中间表示

第二章 中间表示

作者: JAVA孙剑 | 来源:发表于2019-08-08 16:57 被阅读0次
    1. java字节码作为中间表示(使用javap命令查看)
    Classfile /home/jiansun/mydemo/sootdemo/Add.class
      Last modified 2019-8-7; size 488 bytes
      MD5 checksum 74331c11abb06b528003d93bae6e9d83
      Compiled from "Add.java"
    public class Add
      minor version: 0
      major version: 52
      flags: ACC_PUBLIC, ACC_SUPER
    Constant pool:
       #1 = Methodref          #6.#17         // java/lang/Object."<init>":()V
       #2 = Methodref          #5.#18         // Add.addTwoNum:(II)I
       #3 = Fieldref           #19.#20        // java/lang/System.out:Ljava/io/PrintStream;
       #4 = Methodref          #21.#22        // java/io/PrintStream.println:(I)V
       #5 = Class              #23            // Add
       #6 = Class              #24            // java/lang/Object
       #7 = Utf8               <init>
       #8 = Utf8               ()V
       #9 = Utf8               Code
      #10 = Utf8               LineNumberTable
      #11 = Utf8               main
      #12 = Utf8               ([Ljava/lang/String;)V
      #13 = Utf8               addTwoNum
      #14 = Utf8               (II)I
      #15 = Utf8               SourceFile
      #16 = Utf8               Add.java
      #17 = NameAndType        #7:#8          // "<init>":()V
      #18 = NameAndType        #13:#14        // addTwoNum:(II)I
      #19 = Class              #25            // java/lang/System
      #20 = NameAndType        #26:#27        // out:Ljava/io/PrintStream;
      #21 = Class              #28            // java/io/PrintStream
      #22 = NameAndType        #29:#30        // println:(I)V
      #23 = Utf8               Add
      #24 = Utf8               java/lang/Object
      #25 = Utf8               java/lang/System
      #26 = Utf8               out
      #27 = Utf8               Ljava/io/PrintStream;
      #28 = Utf8               java/io/PrintStream
      #29 = Utf8               println
      #30 = Utf8               (I)V
    {
      public Add();
        descriptor: ()V
        flags: ACC_PUBLIC
        Code:
          stack=1, locals=1, args_size=1
             0: aload_0
             1: invokespecial #1                  // Method java/lang/Object."<init>":()V
             4: return
          LineNumberTable:
            line 1: 0
    
      public static void main(java.lang.String[]);
        descriptor: ([Ljava/lang/String;)V
        flags: ACC_PUBLIC, ACC_STATIC
        Code:
          stack=2, locals=4, args_size=1
             0: bipush        13
             2: istore_2
             3: bipush        17
             5: istore_3
             6: iload_2
             7: iload_3
             8: invokestatic  #2                  // Method addTwoNum:(II)I
            11: istore_1
            12: getstatic     #3                  // Field java/lang/System.out:Ljava/io/PrintStream;
            15: iload_1
            16: invokevirtual #4                  // Method java/io/PrintStream.println:(I)V
            19: return
          LineNumberTable:
            line 4: 0
            line 5: 3
            line 6: 6
            line 7: 12
            line 8: 19
    
      public static int addTwoNum(int, int);
        descriptor: (II)I
        flags: ACC_PUBLIC, ACC_STATIC
        Code:
          stack=2, locals=4, args_size=2
             0: bipush        7
             2: istore_2
             3: bipush        8
             5: istore_3
             6: iload_0
             7: iload_1
             8: iadd
             9: iconst_2
            10: imul
            11: ireturn
          LineNumberTable:
            line 11: 0
            line 12: 3
            line 13: 6
    }
    SourceFile: "Add.java"
    
    1. baf中间表示
      • 把200多种不同的字节码精简为60种
      • 没有常量池,直接通过函数名和字段名访问方法和字段
      • 使用word/dword显式定义局部变量(64/32位)
      • 无需过多关注变量类型,例如int、short、long等相加的指令,在baf中统称为相加指令
    public class Add extends java.lang.Object
    {
    
        public void <init>()
        {
            word r0;
    
            r0 := @this: Add;
            load.r r0;
            specialinvoke <java.lang.Object: void <init>()>;
            return;
        }
    
        public static void main(java.lang.String[])
        {
            word r0;
    
            r0 := @parameter0: java.lang.String[];
            push 13;
            push 17;
            staticinvoke <Add: int addTwoNum(int,int)>;
            store.i r0;
            staticget <java.lang.System: java.io.PrintStream out>;
            load.i r0;
            virtualinvoke <java.io.PrintStream: void println(int)>;
            return;
        }
    
        public static int addTwoNum(int, int)
        {
            word i0, i1, b2;
    
            i0 := @parameter0: int;
            i1 := @parameter1: int;
            push 7;
            store.b b2;
            push 8;
            store.b b2;
            load.i i0;
            load.i i1;
            add.i;
            push 2;
            mul.i;
            return.i;
        }
    }
    
    1. jimple作为中间表示

      soot中Jimple语句类型

      • 负责过程内控制流的语句:IfStmt , GotoStmt , TableSwitchStmt , LookupSwitchStmt

      • 负责过程间的控制流语句:InvokeStmt , ReturnStmt , ReturnVoidStmt.

      • 监控语句:EnterMonitorStmt , ExitMonitorStmt.

      • ThrowStmt , RetStmt

    • jimple是类型化的、三地址的、基于语句的中间代码

    • Java字节码有大约200个不同的字节码指令,BAF大约有60个,而JIMPLE有15个。

    • 因为其紧凑型,利于分析和优化。

    • 有类型和命名的局部变量可以改进分析

    • 运算符是无类型的,而baf的运算符是有类型的(add.i,add.f,add.d或add.l)。在转换成baf时,不再需要有类型的运算符(因为局部变量有类型)。

    Map m = new HashMap();
    m.get("key");
    
    //becomes the following JIMPLE code:
    
    java.util.HashMap $r1, r2;
    $r1 = new java.util.HashMap;
    specialinvoke $r1.<java.util.HashMap: void <init>()>();
    r2 = $r1;
    //这里知道了r2的类型是hashmap,如果不知道的话,interfaceinvoke可以映射到实现Map接口的任何方法。
    interfaceinvoke r2.<java.util.Map:     
    java.lang.Object get(java.lang.Object)>("key");
    
    • 局部变量都在方法的顶部声明,包括引用数据类型和基本数据类型。
    • identity语句定义了使用特殊值预加载的局部变量,如this或方法参数
    • jimple类似于简单的Java代码(所以叫做jimple)
    • 赋值语句占主导地位


      1565142816842.png

    ​ jimple代码优化之后,可以重新转化成字节码文件,在虚拟中运行。

    public class Add extends java.lang.Object
    {
    
        public void <init>()
        {
            Add r0;
    
            r0 := @this: Add;
            specialinvoke r0.<java.lang.Object: void <init>()>();
            return;
        }
    
        public static void main(java.lang.String[])
        {
            java.lang.String[] r0;
            byte b0, b1;
            int i2;
            java.io.PrintStream $r1;
    
            r0 := @parameter0: java.lang.String[];
            b0 = 13;
            b1 = 17;
            i2 = staticinvoke <Add: int addTwoNum(int,int)>(b0, b1);
            $r1 = <java.lang.System: java.io.PrintStream out>;
            virtualinvoke $r1.<java.io.PrintStream: void println(int)>(i2);
            return;
        }
    
        public static int addTwoNum(int, int)
        {
            int i0, i1, $i4, $i5;
            byte b2, b3;
    
            i0 := @parameter0: int;
            i1 := @parameter1: int;
            b2 = 7;
            b3 = 8;
            $i4 = i0 + i1;
            $i5 = $i4 * 2;
            return $i5;
        }
    }
    
    1. Grimp是更易阅读的中间表示(更像是反编译的java代码,grimp作为反编译器的基础)


      1565145414041.png

      grimp允许使用new操作,因此更接近java源代码

    public class Add extends java.lang.Object
    {
    
        public void <init>()
        {
            Add r0;
    
            r0 := @this: Add;
            specialinvoke r0.<java.lang.Object: void <init>()>();
            return;
        }
    
        public static void main(java.lang.String[])
        {
            java.lang.String[] r0;
            byte b0, b1;
            int i2;
            java.io.PrintStream $r1;
    
            r0 := @parameter0: java.lang.String[];
            b0 = 13;
            b1 = 17;
            i2 = staticinvoke <Add: int addTwoNum(int,int)>(b0, b1);
            $r1 = <java.lang.System: java.io.PrintStream out>;
            virtualinvoke $r1.<java.io.PrintStream: void println(int)>(i2);
            return;
        }
    
        public static int addTwoNum(int, int)
        {
            int i0, i1, $i4, $i5;
            byte b2, b3;
    
            i0 := @parameter0: int;
            i1 := @parameter1: int;
            b2 = 7;
            b3 = 8;
            $i4 = i0 + i1;
            $i5 = $i4 * 2;
            return $i5;
        }
    }
    

    相关文章

      网友评论

          本文标题:第二章 中间表示

          本文链接:https://www.haomeiwen.com/subject/vzaxjctx.html