在阅读开源库源码时,往往会发现 static inline
的关键字,这标识了该函数为内联样式。据称使用后 App 运行效率会提升。其中最具有支撑性的原因是:内联函数减少了函数调用时的指令转移
。真的是这样吗?
使用 iOS 中的单元测试,分别设置如下 3 组函数,对调用时间进行对比。
//对比组1
#define add_define(a,b) (a + b)
#define add_define2(a,b) do { int res = a + b; } while(0)
static inline void add_inline(int x, int y) {
int res = x + y;
}
void add_normal(int x, int y) {
int res = x + y;
}
static inline int add_inline2(int x, int y) {
return x + y;
}
int add_normal2(int x, int y) {
return x + y;
}
//对比组2
static inline CGFloat CGFloatFromPixel(CGFloat value) {
return value / YYScreenScale();
}
CGFloat CGFloatFromPixel2(CGFloat value) {
return value / YYScreenScale();
}
CGFloat YYScreenScale() {
static CGFloat scale;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
scale = [UIScreen mainScreen].scale;
});
return scale;
}
//对比组3
//SDWebImage 中 static inline CGRect SDCGRectFitWithScaleMode
(1)分别使用宏定义、内联函数两种方式定义两个数字的加法;(2)选取YYImage中的代码段;(3)选取 SDWebImage 中的代码段。在 iPhone8
真机上分别运行100万次,单元测试中又会运行10次,然后取均值。
- (void)testPerformanceExample {
// This is an example of a performance test case.
[self measureBlock:^{
// Put the code you want to measure the time of here.
for (int i=0,j=0; i<1000000; i++,j++) {
add_define(10000, 10000);
add_define2(10000, 10000);
add_normal(10000, 10000);
add_inline(10000, 10000);
add_normal2(100000, 100000);
add_inline2(100000, 100000);
CGFloatFromPixel(100);
CGFloatFromPixel2(100);
CGRect rect = CGRectMake(0, 0, 300, 300);
SDCGRectFitWithScaleMode(rect, rect.size, 0);
SDCGRectFitWithScaleMode2(rect, rect.size, 0);
}
}];
}
每组实验中以非内联函数为基准,发现 3 组实验中的结果均没有效率的提高,有时候还会出现1%~2% worse
的提示。
修改 main.m 文件如下:
int add(int x, int y) {
return x + y;
}
选中 main.m 文件,在 Xcode->Product->Perform Action->Assemble
中查看汇编后的代码:
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 10, 15 sdk_version 10, 15, 4
.globl _add ## -- Begin function add
.p2align 4, 0x90
_add: ## @add
Lfunc_begin0:
.file 1 "/Users/zhudongdong/Desktop/iOS/58/TestInLine" "TestInLine/main.m"
.loc 1 11 0 ## TestInLine/main.m:11:0
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
Ltmp0:
.loc 1 12 13 prologue_end ## TestInLine/main.m:12:13
movl -4(%rbp), %eax
.loc 1 12 15 is_stmt 0 ## TestInLine/main.m:12:15
addl -8(%rbp), %eax
.loc 1 12 6 ## TestInLine/main.m:12:6
popq %rbp
retq
Ltmp1:
Lfunc_end0:
.cfi_endproc
## -- End function
.section __DATA,__objc_imageinfo,regular,no_dead_strip
L_OBJC_IMAGE_INFO:
.long 0
.long 64
.section __DWARF,__debug_str,regular,debug
Linfo_string:
.asciz "Apple clang version 11.0.3 (clang-1103.0.32.62)" ## string offset=0
.asciz "/Users/zhudongdong/Desktop/iOS/58/TestInLine/TestInLine/main.m" ## string offset=48
.asciz "/Users/zhudongdong/Desktop/iOS/58/TestInLine" ## string offset=111
.asciz "add" ## string offset=156
.asciz "int" ## string offset=160
.asciz "x" ## string offset=164
.asciz "y" ## string offset=166
.section __DWARF,__debug_abbrev,regular,debug
Lsection_abbrev:
.byte 1 ## Abbreviation Code
.byte 17 ## DW_TAG_compile_unit
.byte 1 ## DW_CHILDREN_yes
.byte 37 ## DW_AT_producer
.byte 14 ## DW_FORM_strp
.byte 19 ## DW_AT_language
.byte 5 ## DW_FORM_data2
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 16 ## DW_AT_stmt_list
.byte 23 ## DW_FORM_sec_offset
.byte 27 ## DW_AT_comp_dir
.byte 14 ## DW_FORM_strp
.ascii "\345\177" ## DW_AT_APPLE_major_runtime_vers
.byte 11 ## DW_FORM_data1
.byte 17 ## DW_AT_low_pc
.byte 1 ## DW_FORM_addr
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 2 ## Abbreviation Code
.byte 46 ## DW_TAG_subprogram
.byte 1 ## DW_CHILDREN_yes
.byte 17 ## DW_AT_low_pc
.byte 1 ## DW_FORM_addr
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 64 ## DW_AT_frame_base
.byte 24 ## DW_FORM_exprloc
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 58 ## DW_AT_decl_file
.byte 11 ## DW_FORM_data1
.byte 59 ## DW_AT_decl_line
.byte 11 ## DW_FORM_data1
.byte 39 ## DW_AT_prototyped
.byte 25 ## DW_FORM_flag_present
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 63 ## DW_AT_external
.byte 25 ## DW_FORM_flag_present
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 3 ## Abbreviation Code
.byte 5 ## DW_TAG_formal_parameter
.byte 0 ## DW_CHILDREN_no
.byte 2 ## DW_AT_location
.byte 24 ## DW_FORM_exprloc
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 58 ## DW_AT_decl_file
.byte 11 ## DW_FORM_data1
.byte 59 ## DW_AT_decl_line
.byte 11 ## DW_FORM_data1
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 4 ## Abbreviation Code
.byte 36 ## DW_TAG_base_type
.byte 0 ## DW_CHILDREN_no
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 62 ## DW_AT_encoding
.byte 11 ## DW_FORM_data1
.byte 11 ## DW_AT_byte_size
.byte 11 ## DW_FORM_data1
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
Lsection_info:
Lcu_begin0:
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
Ldebug_info_start0:
.short 4 ## DWARF version number
.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
.long Lset1
.byte 8 ## Address Size (in bytes)
.byte 1 ## Abbrev [1] 0xb:0x5e DW_TAG_compile_unit
.long 0 ## DW_AT_producer
.short 16 ## DW_AT_language
.long 48 ## DW_AT_name
.set Lset2, Lline_table_start0-Lsection_line ## DW_AT_stmt_list
.long Lset2
.long 111 ## DW_AT_comp_dir
.byte 2 ## DW_AT_APPLE_major_runtime_vers
.quad Lfunc_begin0 ## DW_AT_low_pc
.set Lset3, Lfunc_end0-Lfunc_begin0 ## DW_AT_high_pc
.long Lset3
.byte 2 ## Abbrev [2] 0x2b:0x36 DW_TAG_subprogram
.quad Lfunc_begin0 ## DW_AT_low_pc
.set Lset4, Lfunc_end0-Lfunc_begin0 ## DW_AT_high_pc
.long Lset4
.byte 1 ## DW_AT_frame_base
.byte 86
.long 156 ## DW_AT_name
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
## DW_AT_prototyped
.long 97 ## DW_AT_type
## DW_AT_external
.byte 3 ## Abbrev [3] 0x44:0xe DW_TAG_formal_parameter
.byte 2 ## DW_AT_location
.byte 145
.byte 124
.long 164 ## DW_AT_name
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.long 97 ## DW_AT_type
.byte 3 ## Abbrev [3] 0x52:0xe DW_TAG_formal_parameter
.byte 2 ## DW_AT_location
.byte 145
.byte 120
.long 166 ## DW_AT_name
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.long 97 ## DW_AT_type
.byte 0 ## End Of Children Mark
.byte 4 ## Abbrev [4] 0x61:0x7 DW_TAG_base_type
.long 160 ## DW_AT_name
.byte 5 ## DW_AT_encoding
.byte 4 ## DW_AT_byte_size
.byte 0 ## End Of Children Mark
Ldebug_info_end0:
.section __DWARF,__debug_macinfo,regular,debug
Ldebug_macinfo:
.byte 0 ## End Of Macro List Mark
.section __DWARF,__apple_names,regular,debug
Lnames_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 1 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.long 0 ## Bucket 0
.long 193486030 ## Hash in Bucket 0
.set Lset5, LNames0-Lnames_begin ## Offset in Bucket 0
.long Lset5
LNames0:
.long 156 ## add
.long 1 ## Num DIEs
.long 43
.long 0
.section __DWARF,__apple_objc,regular,debug
Lobjc_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_namespac,regular,debug
Lnamespac_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_types,regular,debug
Ltypes_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 1 ## Header Hash Count
.long 20 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 3 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.short 3 ## DW_ATOM_die_tag
.short 5 ## DW_FORM_data2
.short 4 ## DW_ATOM_type_flags
.short 11 ## DW_FORM_data1
.long 0 ## Bucket 0
.long 193495088 ## Hash in Bucket 0
.set Lset6, Ltypes0-Ltypes_begin ## Offset in Bucket 0
.long Lset6
Ltypes0:
.long 160 ## int
.long 1 ## Num DIEs
.long 97
.short 36
.byte 0
.long 0
.subsections_via_symbols
.section __DWARF,__debug_line,regular,debug
Lsection_line:
Lline_table_start0:
替换成内联函数的样式:
static inline int add(int x, int y) {
return x + y;
}
再次查看汇编代码:
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 10, 15 sdk_version 10, 15, 4
.section __DATA,__objc_imageinfo,regular,no_dead_strip
L_OBJC_IMAGE_INFO:
.long 0
.long 64
.section __DWARF,__apple_names,regular,debug
Lnames_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_objc,regular,debug
Lobjc_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_namespac,regular,debug
Lnamespac_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_types,regular,debug
Ltypes_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 20 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 3 ## HeaderData Atom Count
.short 1 ## DW_ATOM_die_offset
.short 6 ## DW_FORM_data4
.short 3 ## DW_ATOM_die_tag
.short 5 ## DW_FORM_data2
.short 4 ## DW_ATOM_type_flags
.short 11 ## DW_FORM_data1
.long -1 ## Bucket 0
.subsections_via_symbols
发现汇编后的指令确实少了一些。但是从 Unit 测试结果上来看,执行效率并没有提升,个人猜测 CPU 执行时统一进行了优化,和是否使用内联函数无关。
总结
根据测试,得到如下结论:
1 在 iOS 设备上内联函数并不能大幅提高函数执行效率;
2 内联函数相比宏的优势: 类似普通函数一样,调用时会对传入的参数进行检查;不会出现宏修改导致Xcode重新编译缓慢的问题。
网友评论