对象的hash
oc对象的hash,就是对象地址本身
Person *p1 = [Person new];
NSLog(@"p1.hash:%lu\np1:%ld",p1.hash, (long)p1);
//打印结果
p1.hash:4333796016
p1:4333796016
NSString中的hash函数
NSString 中hash是如何处理的呢?我在CFString中找到了如下部分代码:
翻译如下
/ *字符串散列:无论编码如何,都应该给出相同的结果;所以我们哈希UniChars。
如果长度小于或等于96,则哈希函数就是
以下(n是第n个UniChar字符,从0开始):
hash(-1)= length
hash(n)= hash(n-1)* 257 + unichar(n);
Hash = hash(length-1)*((length&31)+ 1)
如果长度大于96,则上述算法适用于
字符0..31,(长度/ 2)-16 ..(长度/ 2)+15,长度-32..length-1,包括端点;
因此,第一个,中间和最后32个字符。
请注意,下面的循环已展开;并且:257 ^ 2 = 66049; 257 ^ 3 = 16974593; 257 ^ 4 = 4362470401; 67503105是257 ^ 4 - 256 ^ 4
如果哈希码从UInt32更改为其他内容,则需要重新调整最后一块。
!我们尚未更新LP64
注意:哈希算法曾经在CF和Foundation中重复;但现在它应该只在下面的四个函数中。
Hash功能在Panther和Tiger,Tiger和Leopard之间发生了变化。
* /
源码中的实现
#define HashEverythingLimit 96
#define HashNextFourUniChars(accessStart, accessEnd, pointer) \
{result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
#define HashNextUniChar(accessStart, accessEnd, pointer) \
{result = result * 257 + (accessStart 0 accessEnd); pointer++;}
/* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
*/
CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
CFHashCode result = actualLen;
if (len <= HashEverythingLimit) { //如果长度小于96
const UniChar *end4 = uContents + (len & ~3);
const UniChar *end = uContents + len;
while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
} else { //字符串长度
const UniChar *contents, *end;
contents = uContents;
end = contents + 32;
while (contents < end) HashNextFourUniChars(contents[, ], contents);
contents = uContents + (len >> 1) - 16;
end = contents + 32;
while (contents < end) HashNextFourUniChars(contents[, ], contents);
end = uContents + len;
contents = end - 32;
while (contents < end) HashNextFourUniChars(contents[, ], contents);
}
return result + (result << (actualLen & 31));
}
如果讲宏的方法写入代码中
//96:0000 0000 0110 0000
//UInt16 UniChar 16位无符号整型
CFHashCode wj_CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
// 3:0000 0000 0000 0011
//~3:1111 1111 1111 1100
CFHashCode result = actualLen;
if (len <= HashEverythingLimit) {
// 3:0000 0000 0000 0011
//~3:1111 1111 1111 1100
const UniChar *end4 = uContents + (len & ~3); //字符串是否大于4位
const UniChar *end = uContents + len;
while (uContents < end4) {
//257 ^ 2 = 66049; 257 ^ 3 = 16974593; 257 ^ 4 = 4362470401; 67503105是257 ^ 4 - 256 ^ 4
result = result * 67503105 + (uContents[ 0 ]) * 16974593 + (uContents[ 1 ]) * 66049 + (uContents[ 2 ]) * 257 + (uContents[ 3 ]);
uContents += 4;
} // First count in fours
while (uContents < end){
//
result = result * 257 + (uContents[ 0 ]);
uContents++;
}
} else {
const UniChar *contents, *end;
contents = uContents;
end = contents + 32;
while (contents < end){
result = result * 67503105 + (contents[ 0 ]) * 16974593 + (contents[ 1 ]) * 66049 + (contents[ 2 ]) * 257 + (contents[ 3 ]);
contents += 4;
}
contents = uContents + (len >> 1) - 16;
end = contents + 32;
while (contents < end){
result = result * 67503105 + (contents[ 0 ]) * 16974593 + (contents[ 1 ]) * 66049 + (contents[ 2 ]) * 257 + (contents[ 3 ]);
uContents += 4;
}
end = uContents + len;
contents = end - 32;
while (contents < end) {
result = result * 67503105 + (contents[ 0 ]) * 16974593 + (contents[ 1 ]) * 66049 + (contents[ 2 ]) * 257 + (contents[ 3 ]);
uContents += 4;
}
}
//31: 0001 1111
//
return result + (result << (actualLen & 31));
}
如果字符串小于4位,
会执行
while (uContents < end){
//
result = result * 257 + (uContents[ 0 ]);
uContents++;
}
return result + (result << (actualLen & 31));
我们以字符串“ abc”举例
NSString *string = @"abc";
UniChar buffer[string.length];
// [string getCharacters:buffer range:NSMakeRange(0, string.length)];
CFStringGetCharacters((CFStringRef)string, CFRangeMake(0, string.length), buffer);
//buffer[0] = 97
CFHashCode code = __CFStrHashCharacters(buffer,string.length,string.length);
NSLog(@"hash:%lu,code:%lu", string.hash,code);
字符"a"的ACISS码是97,字符"b"的ACISS码是97,字符"c"的ACISS码是99,字符串的长度是3,会执行
len = 3
CFHashCode result = 3;
while (uContents < end){
//
result = result * 257 + (uContents[ 0 ]);
uContents++;
}
return result + (result << (actualLen & 31));
以上可以写成如下公式:
假设长度为3
len = 3;
(((len*257 + content[0])*257 + content[0])*257 + content[0]
(((3 * 257 + 97) * 257 + 98) * 257 +99
3 * 257^3 + 97 * 257^2 + 98 * 257^1 + 99 * 257^0 = 57355817
公式为:result = len * 257^len + content[len-1] * 257^len-1……content[0] * 257^0
31 转换成二进制为 0001 1111
(actualLen & 31) 取actualLen二进制的后五位
result + (result << (actualLen & 31)); //左移(actualLen & 31)位,最多31
由此可见最终执行的结果是
result = 3 * 257^3 + 97 * 257^2 + 96 * 257^1 + 95 * 257^0 = 57355817
result + (result << (3 & 31))
31可以写成2^5-1,转换为二进制0001 1111
所以(1 & 31)的结果是 1
0000 0011
& 0001 1111
-------------
0000 0011
abc的hash简化为
57355817 + (57355817 <<3)
最终字符串a的hash结果是516202353
如果字符串长度大于等于4小于96,
会执行
const UniChar *end4 = uContents + (len & ~3); //字符串是否大于4位
while (uContents < end4) {
result = result * 67503105 + (uContents[ 0 ]) * 16974593 + (uContents[ 1 ]) * 66049 + (uContents[ 2 ]) * 257 + (uContents[ 3 ]);
uContents += 4;
}
return result + (result << (actualLen & 31));
根据源码的注释我们可以看到,字符串是以4个为一组进行运算的,取上一次的结果乘以67503105,源码注释中有写到
257 ^ 2 = 66049; 257 ^ 3 = 16974593; 257 ^ 4 = 4362470401; 67503105是257 ^ 4 - 256 ^ 4
所以上述代码可以总结为以下公式
字符串长度为len
第一次while循环
result = len * (257 ^ 4 - 256 ^ 4) + (uContents[ 0 ]) * 257 ^ 3 + (uContents[ 1 ]) * 257 ^ 2 + (uContents[ 2 ]) * 257 + (uContents[ 3 ] * 257^0);
第n次
result = result * (257 ^ 4 - 256 ^ 4) + (uContents[ 0 + n*4]) * 257 ^ 3 + (uContents[ 1 + n*4 ]) * 257 ^ 2 + (uContents[ 2 + n*4 ]) * 257 + (uContents[ 3 + n*4] * 257^0);
以字符串“ abcd”举例
字符"a"的ACISS码是97,字符"b"的ACISS码是98,字符"c"的ACISS码是99,字符"c"的ACISS码是100,字符串的长度是4,会执行
result = 4;
result = result * 67503105 + (uContents[ 0 ]) * 16974593 + (uContents[ 1 ]) * 66049 + (uContents[ 2 ]) * 257 + (uContents[ 3 ]);
// result = 4 * 67503105 + (97) * 16974593 + (98) * 66049 + (99) * 257 + (100) = 1923046286;
return result + (result << (actualLen & 31)); //32691786862
总结
- 当字符串长度小于4时:result = len * 257^len + content[len-1] * 257^len-1……content[0] * 257^0
- 当字符串长度大于等于4小于96时:
第一次while循环
result = len * (257 ^ 4 - 256 ^ 4) + (uContents[ 0 ]) * 257 ^ 3 + (uContents[ 1 ]) * 257 ^ 2 + (uContents[ 2 ]) * 257 + (uContents[ 3 ] * 257^0);
第n次while循环
result = result * (257 ^ 4 - 256 ^ 4) + (uContents[ 0 + n4]) * 257 ^ 3 + (uContents[ 1 + n4 ]) * 257 ^ 2 + (uContents[ 2 + n4 ]) * 257 + (uContents[ 3 + n4] * 257^0); - 如果长度大于96,则上述算法适用于
字符0..31,(长度/ 2)-16 ..(长度/ 2)+15,长度-32..length-1,包括端点;
网友评论