特殊的集合结构

平时开发中,大部分使用NSSetNSMutableSet。下面介绍个特殊的。

  • NSHashTable
  • NXHashTable(objc源码)

NSHashTable

NSHashTable是更广泛意义的NSSet,区别于NSSet/NSMutableSetNSHashTable有如下特性:

  • NSHashTable 是可变的;
  • NSHashTable 可以持有 weak 类型的成员变量;
  • NSHashTable 可以在添加成员变量的时候复制成员;
  • NSHashTable 可以随意的存储指针并且利用指针的唯一性来进行 hash 同一性检查(检查成员变量是否有重复)和对比操作(equal);

参考: https://nshipster.com/nshashtable-and-nsmaptable/

Hash冲突解决方案:拉链法

使用方法

1
2
3
4
NSString *str = @"10";
NSHashTable *table = [[NSHashTable alloc] initWithOptions:NSHashTableCopyIn capacity:0];
[table addObject:str];
[table removeObject:str];

NSHashTableOptions介绍(源码中是使用静态常量做关联)

1
2
3
4
5
6
7
8
9
10
11
12
enum {
// 默认行为,强引用集合中的对象,等同于NSMutableSet
NSHashTableStrongMemory = 0,
// 在将对象添加到集合之前,会拷贝对象
NSHashTableCopyIn = NSPointerFunctionsCopyIn,
// 使用移位指针(shifted pointer)来做hash检测和确定两个对象是否相等;
// 同时使用description方法来做描述字符串
NSHashTableObjectPointerPersonality = NSPointerFunctionsObjectPointerPersonality,
// 弱引用集合中的对象,且在对象被释放后,会被正确的移除。
NSHashTableWeakMemory = NSPointerFunctionsWeakMemory
};
typedef NSUInteger NSHashTableOptions;

NXHashTable

在objc源码中,找到了关于NSHashTable的类似实现。可以参考下

NXHashTable存储结构(精简过)

1
2
3
4
5
6
7
8
9
10
11
12
typedef struct {
// 附属实体
const NXHashTablePrototype *prototype;
// 真实的长度(存储了多少有效数据)
unsigned count;
// buckets实际所占用的空间大小(比理论高,2^n,不够会扩容)
unsigned nbBuckets;
// 数据[HashBucket?]
void *buckets;
// 信息
const void *info;
} NXHashTable;

NXHashTablePrototype存储结构(精简过)

1
2
3
4
5
6
7
8
9
10
11
typedef struct {
// hash方法的函数指针
uintptr_t (* hash)(const void * info,
const void * data);
// isEqual方法的函数指针
int (* isEqual)(const void * info, const void * data1, const void * data2);
// free方法的函数指针
void (* free)(const void * info, void * data);
// 保留字段
int style;
} NXHashTablePrototype;

HashBucket存储结构

1
2
3
4
5
6
7
8
9
10
11
12
typedef union {
// 存储的数据
const void *one;
// 存储数据的集合[x, x],新数据在前面
const void **many;
} oneOrMany;
typedef struct {
unsigned count;
// 如果count == 1,则取elements.one
// 如果count > 1,则取elements.many
oneOrMany elements;
} HashBucket;

NXHashTable的构造器NXCreateHashTableNXCreateHashTableFromZone

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
NXHashTable *NXCreateHashTable (NXHashTablePrototype prototype, unsigned capacity, const void *info) {
return NXCreateHashTableFromZone(prototype, capacity, info, DEFAULT_ZONE);
}

NXHashTable *NXCreateHashTableFromZone (NXHashTablePrototype prototype, unsigned capacity, const void *info, void *z) {
NXHashTable *table;
NXHashTablePrototype *proto;

table = ALLOCTABLE(z);
if (! prototypes) bootstrap ();
if (! prototype.hash) prototype.hash = NXPtrHash;
if (! prototype.isEqual) prototype.isEqual = NXPtrIsEqual;
if (! prototype.free) prototype.free = NXNoEffectFree;
if (prototype.style) {
_objc_inform ("*** NXCreateHashTable: invalid style\n");
return NULL;
};
proto = (NXHashTablePrototype *)NXHashGet (prototypes, &prototype);
if (! proto) {
proto
= (NXHashTablePrototype *) malloc(sizeof (NXHashTablePrototype));
bcopy ((const char*)&prototype, (char*)proto, sizeof (NXHashTablePrototype));
(void) NXHashInsert (prototypes, proto);
proto = (NXHashTablePrototype *)NXHashGet (prototypes, &prototype);
if (! proto) {
_objc_inform ("*** NXCreateHashTable: bug\n");
return NULL;
};
};
table->prototype = proto; table->count = 0; table->info = info;
table->nbBuckets = GOOD_CAPACITY(capacity);
table->buckets = ALLOCBUCKETS(z, table->nbBuckets);
return table;
}

取数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
// 通过hash函数(data指针位运算得到的散列数)->取模(得到下标)-> 加上头指针(得到数据的具体地址)
#define BUCKETOF(table, data) (((HashBucket *)table->buckets)+((*table->prototype->hash)(table->info, data) % table->nbBuckets))
// 判断数据指针是否一致
#define ISEQUAL(table, data1, data2) ((data1 == data2) || (*table->prototype->isEqual)(table->info, data1, data2))
void *NXHashGet (NXHashTable *table, const void *data) {
HashBucket *bucket = BUCKETOF(table, data);
unsigned j = bucket->count;
const void **pairs;

if (! j) return NULL;
if (j == 1) {
return ISEQUAL(table, data, bucket->elements.one)
? (void *) bucket->elements.one : NULL;
};
pairs = bucket->elements.many;
// 从列表中遍历寻找元素
while (j--) {
/* we don't cache isEqual because lists are short */
if (ISEQUAL(table, data, *pairs)) return (void *) *pairs;
pairs ++;
};
return NULL;
}

插入数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
void *NXHashInsert (NXHashTable *table, const void *data) {
HashBucket *bucket = BUCKETOF(table, data);
unsigned j = bucket->count;
const void **pairs;
const void **newt;
__unused void *z = ZONE_FROM_PTR(table);

if (! j) {
bucket->count++; bucket->elements.one = data;
table->count++;
return NULL;
};
if (j == 1) {
if (ISEQUAL(table, data, bucket->elements.one)) {
const void *old = bucket->elements.one;
bucket->elements.one = data;
return (void *) old;
};
// 数据迁移one->many
newt = ALLOCPAIRS(z, 2);
newt[1] = bucket->elements.one;
*newt = data;
bucket->count++; bucket->elements.many = newt;
table->count++;
// 如果列表的实际长度大于物理长度,就扩容buckets
if (table->count > table->nbBuckets) _NXHashRehash (table);
return NULL;
};
pairs = bucket->elements.many;
// 遍历先有的列表,如果数据已经存在,就返回
while (j--) {
/* we don't cache isEqual because lists are short */
if (ISEQUAL(table, data, *pairs)) {
const void *old = *pairs;
*pairs = data;
return (void *) old;
};
pairs ++;
};
/* 我们扩大这个buckets里面的many;并将新数据放在前面 */
newt = ALLOCPAIRS(z, bucket->count+1);
if (bucket->count) bcopy ((const char*)bucket->elements.many, (char*)(newt+1), bucket->count * PTRSIZE);
*newt = data;
FREEPAIRS (bucket->elements.many);
bucket->count++; bucket->elements.many = newt;
table->count++;
if (table->count > table->nbBuckets) _NXHashRehash (table);
return NULL;
}

扩容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// 思路:
// 将table的信息缓存到一个临时的table中
// 将table内的信息更新成最新
// 将table内的buckets数据仓重新分配大的内存
// 遍历将临时table中的数据依次再插入到新的buckets中
#define MORE_CAPACITY(b) (b*2+1)
static void _NXHashRehash (NXHashTable *table) {
_NXHashRehashToCapacity (table, MORE_CAPACITY(table->nbBuckets));
}
void _NXHashRehashToCapacity (NXHashTable *table, unsigned newCapacity) {
/* Rehash: we create a pseudo table pointing really to the old guys,
extend self, copy the old pairs, and free the pseudo table */
NXHashTable *old;
NXHashState state;
void *aux;
__unused void *z = ZONE_FROM_PTR(table);

old = ALLOCTABLE(z);
old->prototype = table->prototype; old->count = table->count;
old->nbBuckets = table->nbBuckets; old->buckets = table->buckets;
table->nbBuckets = newCapacity;
table->count = 0; table->buckets = ALLOCBUCKETS(z, table->nbBuckets);
state = NXInitHashState (old);
while (NXNextHashState (old, &state, &aux))
(void) NXHashInsert (table, aux);
freeBuckets (old, NO);
if (old->count != table->count)
_objc_inform("*** hashtable: count differs after rehashing; probably indicates a broken invariant: there are x and y such as isEqual(x, y) is TRUE but hash(x) != hash (y)\n");
free (old->buckets);
free (old);
}

删除某个元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
void *NXHashRemove (NXHashTable *table, const void *data) {
HashBucket *bucket = BUCKETOF(table, data);
unsigned j = bucket->count;
const void **pairs;
const void **newt;
__unused void *z = ZONE_FROM_PTR(table);

if (! j) return NULL;
if (j == 1) {
if (! ISEQUAL(table, data, bucket->elements.one)) return NULL;
data = bucket->elements.one;
table->count--; bucket->count--; bucket->elements.one = NULL;
return (void *) data;
};
pairs = bucket->elements.many;
if (j == 2) {
if (ISEQUAL(table, data, pairs[0])) {
bucket->elements.one = pairs[1]; data = pairs[0];
}
else if (ISEQUAL(table, data, pairs[1])) {
bucket->elements.one = pairs[0]; data = pairs[1];
}
else return NULL;
FREEPAIRS (pairs);
table->count--; bucket->count--;
return (void *) data;
};
while (j--) {
if (ISEQUAL(table, data, *pairs)) {
data = *pairs;
/* we shrink this bucket */
newt = (bucket->count-1)
? ALLOCPAIRS(z, bucket->count-1) : NULL;
if (bucket->count-1 != j)
bcopy ((const char*)bucket->elements.many, (char*)newt, PTRSIZE*(bucket->count-j-1));
if (j)
bcopy ((const char*)(bucket->elements.many + bucket->count-j), (char*)(newt+bucket->count-j-1), PTRSIZE*j);
FREEPAIRS (bucket->elements.many);
table->count--; bucket->count--; bucket->elements.many = newt;
return (void *) data;
};
pairs ++;
};
return NULL;
}