特殊的集合结构

发表于 2020-01-15

本文字数： 7.9k

平时开发中，大部分使用NSSet和NSMutableSet。下面介绍个特殊的。

NSHashTable
NXHashTable（objc源码）

NSHashTable

NSHashTable是更广泛意义的NSSet，区别于NSSet/NSMutableSet，NSHashTable有如下特性：

NSHashTable 是可变的；
NSHashTable 可以持有 weak 类型的成员变量；
NSHashTable 可以在添加成员变量的时候复制成员；
NSHashTable 可以随意的存储指针并且利用指针的唯一性来进行 hash 同一性检查（检查成员变量是否有重复）和对比操作（equal）；

参考： https://nshipster.com/nshashtable-and-nsmaptable/

Hash冲突解决方案：拉链法

使用方法

NSString *str = @"10";
NSHashTable *table = [[NSHashTable alloc] initWithOptions:NSHashTableCopyIn capacity:0];
[table addObject:str];
[table removeObject:str];

NSHashTableOptions介绍（源码中是使用静态常量做关联）

enum {
    // 默认行为，强引用集合中的对象，等同于NSMutableSet
    NSHashTableStrongMemory             = 0,
    // 在将对象添加到集合之前，会拷贝对象
    NSHashTableCopyIn                   = NSPointerFunctionsCopyIn,
    // 使用移位指针(shifted pointer)来做hash检测和确定两个对象是否相等；
    // 同时使用description方法来做描述字符串
    NSHashTableObjectPointerPersonality = NSPointerFunctionsObjectPointerPersonality,
    // 弱引用集合中的对象，且在对象被释放后，会被正确的移除。
    NSHashTableWeakMemory               = NSPointerFunctionsWeakMemory
};
typedef NSUInteger NSHashTableOptions;

NXHashTable

在objc源码中，找到了关于NSHashTable的类似实现。可以参考下

NXHashTable存储结构（精简过）

typedef struct {
    // 附属实体
    const NXHashTablePrototype	*prototype;
    // 真实的长度（存储了多少有效数据）
    unsigned		  count;
    // buckets实际所占用的空间大小（比理论高，2^n，不够会扩容）
    unsigned		  nbBuckets;
    // 数据[HashBucket?]
    void			 *buckets;
    // 信息
    const void	     *info;
} NXHashTable;

NXHashTablePrototype存储结构（精简过）

typedef struct {
    // hash方法的函数指针
    uintptr_t	(* hash)(const void * info,
                                  const void * data);
    // isEqual方法的函数指针
    int		(* isEqual)(const void * info, const void * data1, const void * data2);
    // free方法的函数指针
    void	(* free)(const void * info, void * data);
    // 保留字段
    int		style;
} NXHashTablePrototype;

HashBucket存储结构

typedef union {
    // 存储的数据
    const void	*one;
    // 存储数据的集合[x, x]，新数据在前面
    const void	**many;
} oneOrMany;
typedef struct	{
    unsigned 	count; 
    // 如果count == 1，则取elements.one
    // 如果count >  1，则取elements.many
    oneOrMany	elements;
} HashBucket;

NXHashTable的构造器NXCreateHashTable和NXCreateHashTableFromZone

NXHashTable *NXCreateHashTable (NXHashTablePrototype prototype, unsigned capacity, const void *info) {
    return NXCreateHashTableFromZone(prototype, capacity, info, DEFAULT_ZONE);
}

NXHashTable *NXCreateHashTableFromZone (NXHashTablePrototype prototype, unsigned capacity, const void *info, void *z) {
    NXHashTable			*table;
    NXHashTablePrototype	*proto;
    
    table = ALLOCTABLE(z);
    if (! prototypes) bootstrap ();
    if (! prototype.hash) prototype.hash = NXPtrHash;
    if (! prototype.isEqual) prototype.isEqual = NXPtrIsEqual;
    if (! prototype.free) prototype.free = NXNoEffectFree;
    if (prototype.style) {
	_objc_inform ("*** NXCreateHashTable: invalid style\n");
	return NULL;
	};
    proto = (NXHashTablePrototype *)NXHashGet (prototypes, &prototype); 
    if (! proto) {
	proto
            = (NXHashTablePrototype *) malloc(sizeof (NXHashTablePrototype));
	bcopy ((const char*)&prototype, (char*)proto, sizeof (NXHashTablePrototype));
    	(void) NXHashInsert (prototypes, proto);
	proto = (NXHashTablePrototype *)NXHashGet (prototypes, &prototype);
	if (! proto) {
	    _objc_inform ("*** NXCreateHashTable: bug\n");
	    return NULL;
	    };
	};
    table->prototype = proto; table->count = 0; table->info = info;
    table->nbBuckets = GOOD_CAPACITY(capacity);
    table->buckets = ALLOCBUCKETS(z, table->nbBuckets);
    return table;
}

取数据

// 通过hash函数（data指针位运算得到的散列数）->取模（得到下标）-> 加上头指针（得到数据的具体地址）
#define	BUCKETOF(table, data) (((HashBucket *)table->buckets)+((*table->prototype->hash)(table->info, data) % table->nbBuckets))
// 判断数据指针是否一致
#define ISEQUAL(table, data1, data2) ((data1 == data2) || (*table->prototype->isEqual)(table->info, data1, data2))
void *NXHashGet (NXHashTable *table, const void *data) {
    HashBucket	*bucket = BUCKETOF(table, data);
    unsigned	j = bucket->count;
    const void	**pairs;
    
    if (! j) return NULL;
    if (j == 1) {
    	return ISEQUAL(table, data, bucket->elements.one)
	    ? (void *) bucket->elements.one : NULL; 
	};
    pairs = bucket->elements.many;
    // 从列表中遍历寻找元素
    while (j--) {
	/* we don't cache isEqual because lists are short */
    	if (ISEQUAL(table, data, *pairs)) return (void *) *pairs; 
	pairs ++;
	};
    return NULL;
    }

插入数据

void *NXHashInsert (NXHashTable *table, const void *data) {
    HashBucket	*bucket = BUCKETOF(table, data);
    unsigned	j = bucket->count;
    const void	**pairs;
    const void	**newt;
    __unused void *z = ZONE_FROM_PTR(table);
    
    if (! j) {
	bucket->count++; bucket->elements.one = data; 
	table->count++; 
	return NULL;
	};
    if (j == 1) {
    	if (ISEQUAL(table, data, bucket->elements.one)) {
	       const void	*old = bucket->elements.one;
	       bucket->elements.one = data;
	       return (void *) old;
	   };
        // 数据迁移one->many
	   newt = ALLOCPAIRS(z, 2);
	   newt[1] = bucket->elements.one;
	   *newt = data;
	   bucket->count++; bucket->elements.many = newt; 
	   table->count++; 
	   // 如果列表的实际长度大于物理长度，就扩容buckets
	   if (table->count > table->nbBuckets) _NXHashRehash (table);
	   return NULL;
	};
    pairs = bucket->elements.many;
    // 遍历先有的列表，如果数据已经存在，就返回
    while (j--) {
	/* we don't cache isEqual because lists are short */
    	if (ISEQUAL(table, data, *pairs)) {
	       const void	*old = *pairs;
	       *pairs = data;
	       return (void *) old;
	    };
	   pairs ++;
	};
    /* 我们扩大这个buckets里面的many；并将新数据放在前面 */
    newt = ALLOCPAIRS(z, bucket->count+1);
    if (bucket->count) bcopy ((const char*)bucket->elements.many, (char*)(newt+1), bucket->count * PTRSIZE);
    *newt = data;
    FREEPAIRS (bucket->elements.many);
    bucket->count++; bucket->elements.many = newt; 
    table->count++; 
    if (table->count > table->nbBuckets) _NXHashRehash (table);
    return NULL;
    }

扩容

// 思路：
// 将table的信息缓存到一个临时的table中
// 将table内的信息更新成最新
// 将table内的buckets数据仓重新分配大的内存
// 遍历将临时table中的数据依次再插入到新的buckets中
#define MORE_CAPACITY(b) (b*2+1)
static void _NXHashRehash (NXHashTable *table) {
    _NXHashRehashToCapacity (table, MORE_CAPACITY(table->nbBuckets));
}
void _NXHashRehashToCapacity (NXHashTable *table, unsigned newCapacity) {
    /* Rehash: we create a pseudo table pointing really to the old guys,
    extend self, copy the old pairs, and free the pseudo table */
    NXHashTable	*old;
    NXHashState	state;
    void	*aux;
    __unused void *z = ZONE_FROM_PTR(table);
    
    old = ALLOCTABLE(z);
    old->prototype = table->prototype; old->count = table->count; 
    old->nbBuckets = table->nbBuckets; old->buckets = table->buckets;
    table->nbBuckets = newCapacity;
    table->count = 0; table->buckets = ALLOCBUCKETS(z, table->nbBuckets);
    state = NXInitHashState (old);
    while (NXNextHashState (old, &state, &aux))
	(void) NXHashInsert (table, aux);
    freeBuckets (old, NO);
    if (old->count != table->count)
	_objc_inform("*** hashtable: count differs after rehashing; probably indicates a broken invariant: there are x and y such as isEqual(x, y) is TRUE but hash(x) != hash (y)\n");
    free (old->buckets); 
    free (old);
}

删除某个元素

void *NXHashRemove (NXHashTable *table, const void *data) {
    HashBucket	*bucket = BUCKETOF(table, data);
    unsigned	j = bucket->count;
    const void	**pairs;
    const void	**newt;
    __unused void *z = ZONE_FROM_PTR(table);
    
    if (! j) return NULL;
    if (j == 1) {
	if (! ISEQUAL(table, data, bucket->elements.one)) return NULL;
	data = bucket->elements.one;
	table->count--; bucket->count--; bucket->elements.one = NULL;
	return (void *) data;
	};
    pairs = bucket->elements.many;
    if (j == 2) {
    	if (ISEQUAL(table, data, pairs[0])) {
	    bucket->elements.one = pairs[1]; data = pairs[0];
	    }
	else if (ISEQUAL(table, data, pairs[1])) {
	    bucket->elements.one = pairs[0]; data = pairs[1];
	    }
	else return NULL;
	FREEPAIRS (pairs);
	table->count--; bucket->count--;
	return (void *) data;
	};
    while (j--) {
    	if (ISEQUAL(table, data, *pairs)) {
	    data = *pairs;
	    /* we shrink this bucket */
	    newt = (bucket->count-1) 
		? ALLOCPAIRS(z, bucket->count-1) : NULL;
	    if (bucket->count-1 != j)
		    bcopy ((const char*)bucket->elements.many, (char*)newt, PTRSIZE*(bucket->count-j-1));
	    if (j)
		    bcopy ((const char*)(bucket->elements.many + bucket->count-j), (char*)(newt+bucket->count-j-1), PTRSIZE*j);
	    FREEPAIRS (bucket->elements.many);
	    table->count--; bucket->count--; bucket->elements.many = newt;
	    return (void *) data;
	    };
	pairs ++;
	};
    return NULL;
}