//
//  SieveHash.h
//  iLegal
//
//  Created by Ben Reeves on 07/02/2010.
//  Copyright 2010 Ben Reeves. All rights reserved.
//

#ifndef CHASHINCLUDE
#define CHASHINCLUDE

#ifdef __cplusplus
extern "C" {
#endif
	
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>

#define DEBUG(x, ...)//  printf(x, __VA_ARGS__)
	
static const double CHashRehashBound = 2.0 - 0.8;

		
#define HASH_FUNC BKDRHash
#define FREE_KEYS
#undef CHGROUP_LENGTHS
#define INCREMENTAL_RESIZE
#define NODE_SIZE sizeof(SieveHashNode)
#define SieveHashNodeSize sizeof(SieveHashNode)

typedef struct
{
	const char * key;
	void * value;
} SieveHashNode;

typedef struct
{
	SieveHashNode * data;
	unsigned int count;
	unsigned int capacity;
} CHashArray;
	
typedef enum
{
	IterInvalid,
	IterExists,
	IterPartialLoc,
	IterPartialHash,
} CHashIterStatus;
	
typedef struct 
{
	int status;
	const char * key;
	void * value;
	unsigned int hash;
	unsigned int nArray;
	unsigned int bucket;
} CHashIter;
	
static const CHashIter CHashNULLIter = {0};
static const SieveHashNode CHashNULLINode = {0};

typedef struct
{
	CHashArray * arrays;
	unsigned int nArrays;
	unsigned int defaultCapacity;
} SieveHash;
	

static inline unsigned int _hash_char(const char *s)
{
	unsigned int h = *s;
	if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
	return h;
}

	
static inline unsigned int BKDRHash(char* str, unsigned int len)
{
	unsigned int seed = 131; /* 31 131 1313 13131 131313 etc.. */
	unsigned int hash = 0;
	unsigned int i    = 0;
	
	for(i = 0; i < len; str++, i++)
	{
		hash = (hash * seed) + (*str);
	}
	
	return hash;
}
/* End Of BKDR Hash Function */


static inline unsigned int MurmurHash2 (char * key, unsigned int len)
{
	// 'm' and 'r' are mixing constants generated offline.
	// They're not really 'magic', they just happen to work well.
	
	const unsigned int m = 0x5bd1e995;
	const int r = 24;
	
	// Initialize the hash to a 'random' value
	
	unsigned int h = 39402 ^ len;
	
	// Mix 4 bytes at a time into the hash
	
	const unsigned char * data = (const unsigned char *)key;
	
	while(len >= 4)
	{
		unsigned int k = *(unsigned int *)data;
		
		k *= m; 
		k ^= k >> r; 
		k *= m; 
		
		h *= m; 
		h ^= k;
		
		data += 4;
		len -= 4;
	}
	
	// Handle the last few bytes of the input array
	
	switch(len)
	{
		case 3: h ^= data[2] << 16;
		case 2: h ^= data[1] << 8;
		case 1: h ^= data[0];
			h *= m;
	};
	
	// Do a few final mixes of the hash to ensure the last few
	// bytes are well-incorporated.
	
	h ^= h >> 13;
	h *= m;
	h ^= h >> 15;
	
	return h;
} 

SieveHash * SieveHashInit(unsigned int capacity);

unsigned int SieveHashCount(SieveHash * table);

void SieveHashPrintCapactities(SieveHash * table);

static inline void SieveHashAdd(SieveHash * table, const char * key,  void * value) __attribute__((always_inline));

static inline void SieveHashAddNode(SieveHash * table, SieveHashNode * node);
	
static inline void SieveHashEmpty(SieveHash * table);
		
static inline void SieveHashSetCapacity(SieveHash * table, int nArray, unsigned int newCapacity);
	
static inline CHashIter SieveHashLookUp(SieveHash * table, const char * key) __attribute__((always_inline));

static inline void SieveHashSetIterValue(SieveHash * table, CHashIter * iter, void * value) __attribute__((always_inline));

static inline void SieveHashDestroyNode(SieveHashNode * node) __attribute__((always_inline));

static inline int SieveHashNodeIsEqualToKey(SieveHashNode * nodeOne, const char * key) __attribute__((always_inline)); 

static inline unsigned int SieveHashArrayHash(SieveHash * table, const char * key) __attribute__((always_inline));

static void inline SieveHashSetNArrays(SieveHash * table, unsigned int nArrays) __attribute__((always_inline));

static inline void SieveHashDoubleCapacity(SieveHash * table, unsigned int nArray) __attribute__((always_inline));

static inline int SieveHashShouldRehash(SieveHash * table, unsigned int nArray) __attribute__((always_inline));
	
static inline void SieveHashAddUniqueNode(SieveHash * table, SieveHashNode * node) __attribute__((always_inline));

static inline void SieveHashAddUniqueNodePreHash(SieveHash * table, SieveHashNode * node, unsigned int hash);

static inline void SieveHashAddUniqueKey(SieveHash * table, const char * key,  void * value) __attribute__((always_inline));

static inline void SieveHashRemoveIter(SieveHash * table, CHashIter * iter) __attribute__((always_inline));

void SieveHashDestroy(SieveHash * table);
	
float SieveHashLoadFactor(SieveHash * table);
	
unsigned int SieveHashSize(SieveHash * table);
	
void SieveHashTestRoutine();
	
	
/** Funcs **/

static inline void SieveHashSetCapacity(SieveHash * table, int nArray, unsigned int newCapacity)
{
	
	//Array to save the old arays
	
	CHashArray oldArray = table->arrays[nArray];
	
	table->arrays[nArray].data = (SieveHashNode*)calloc(newCapacity, SieveHashNodeSize);	
	table->arrays[nArray].count = 0;
	table->arrays[nArray].capacity = newCapacity;
	
	int ii;
	for (ii = 0; ii < oldArray.capacity; ++ii)
	{
		SieveHashNode * node = &oldArray.data[ii];
		
		if (node->key)
		{		
			SieveHashAddUniqueNode(table, node);
		}
	}
	
	free(oldArray.data);
	
}
	
static inline void SieveHashSetIterValue(SieveHash * table, CHashIter * iter, void * value)
{
	
	if (iter->status == IterPartialLoc)
	{
		table->arrays[iter->nArray].data[iter->bucket].key = iter->key;
		table->arrays[iter->nArray].data[iter->bucket].value = value;

		++table->arrays[iter->nArray].count;
	}
	else if (iter->status == IterPartialHash)
	{
		SieveHashNode node;
		node.key = iter->key;
		node.value = value;
		
		SieveHashAddUniqueNodePreHash(table, &node, iter->hash);
	}
	
}

static inline void SieveHashRemoveIter(SieveHash * table, CHashIter * iter)
{
	table->arrays[iter->nArray].data[iter->bucket] = CHashNULLINode;
	--table->arrays[iter->nArray].count;
}
	
		
static inline void SieveHashDestroyNode(SieveHashNode * node)
{	
	#ifdef FREE_KEYS
	free((char*)node->key);
	#endif
	
	free(node);
}
	
static inline int SieveHashNodeIsEqualToKey(SieveHashNode * nodeOne, const char * key) 
{	
	if (strcmp(nodeOne->key, key) == 0)
		return 1;
	
	return 0;
}

static inline unsigned int SieveHashArrayHash(SieveHash * table, const char * key)
{
	unsigned int bucket = BKDRHash((char*)key, strlen(key));
	
	return bucket;
}
	
static inline CHashIter SieveHashLookUp(SieveHash * table, const char * key)
{
	unsigned int hash = SieveHashArrayHash(table, key);
	int ii;
	CHashIter iter;
	iter.key = key;
	iter.hash = hash;
	iter.status = IterPartialHash;
		
	for (ii = 0; ii < table->nArrays; ++ii)
	{	
		unsigned int bucket = hash % table->arrays[ii].capacity;
		
		if (table->arrays[ii].data[bucket].key)
		{			
			if (SieveHashNodeIsEqualToKey(&table->arrays[ii].data[bucket], key))
			{
				iter.value =  table->arrays[ii].data[bucket].value;
				iter.nArray = ii;
				iter.bucket = bucket;
				iter.status = IterExists;
				
				return iter;
			}
		}
		else if (iter.status == IterPartialHash) 
		{
			iter.status = IterPartialLoc;
			iter.nArray = ii;
			iter.bucket = bucket;

		}
	
	}
	
	return iter;
}

static inline void SieveHashEmpty(SieveHash * table)
{
	int i;
	for (i = 0; i < table->nArrays; ++i)
	{		
		memset(table->arrays[i].data, 0, table->arrays[i].capacity);
		table->arrays[i].count = 0;
	}

}

void SieveHashDestroy(SieveHash * table)
{
	int i;
	for (i = 0; i < table->nArrays; ++i)
	{
		free(table->arrays[i].data);
	}
	
	free(table->arrays);
	free(table);
}


static void inline SieveHashSetNArrays(SieveHash * table, unsigned int nArrays)
{
	
	table->arrays = (CHashArray*)realloc(table->arrays, sizeof(CHashArray)*nArrays);
	
	int ii;
	for (ii = table->nArrays; ii < nArrays; ++ii)
	{		
		unsigned int capacity = table->defaultCapacity / 2;
		
		table->arrays[ii].data = (SieveHashNode *)calloc(2, SieveHashNodeSize);		
		table->arrays[ii].capacity = capacity;
		table->arrays[ii].count = 0;
		
	}
	
	table->nArrays = nArrays;
}

SieveHash * SieveHashInit(unsigned int capacity)
{
	SieveHash * table = (SieveHash*)malloc(sizeof(SieveHash));
	table->nArrays = 0;
	table->arrays = NULL;
	
	if (capacity < 4) capacity = 4;
	
	table->defaultCapacity = capacity;
	
	SieveHashSetNArrays(table, 1);
		
	return table;
}

unsigned int SieveHashCount(SieveHash * table)
{
	unsigned int total = 0;
	int ii;
	for (ii = 0; ii < table->nArrays; ++ii)
	{	
		total += table->arrays[ii].count;
	}
	
	return total;
}

static inline void SieveHashDoubleCapacity(SieveHash * table, unsigned int nArray)
{
	unsigned int half = table->arrays[nArray].capacity / 2;
	SieveHashSetCapacity(table, nArray, table->arrays[nArray].capacity + half + 10);
}

static inline int SieveHashShouldRehash(SieveHash * table, unsigned int nArray)
{	
	if (table->arrays[nArray].count *  CHashRehashBound > table->arrays[nArray].capacity)
		return 1;
	else 
		return 0;
	
}

float SieveHashLoadFactor(SieveHash * table)
{
	return (float)SieveHashCount(table) / (float)SieveHashSize(table);
}


unsigned int SieveHashSize(SieveHash * table)
{
	unsigned int total = 0;
	int ii;
	for (ii = 0; ii < table->nArrays; ++ii)
	{	
		total += table->arrays[ii].capacity;
	}
	
	return total;
}

void SieveHashPrintCapactities(SieveHash * table)
{
	printf("Capacities:\n");
	unsigned int total = 0;
	int ii;
	for (ii = 0; ii < table->nArrays; ++ii)
	{	
		total += table->arrays[ii].capacity;
		printf("Table %d capacity: %d\n", ii, table->arrays[ii].capacity);
	}
	
	printf("Total: %u-- Percent Full: %f\n", total, (float)SieveHashCount(table) / total * 100.0f);
}

static inline void SieveHashAdd(SieveHash * table, const char * key,  void * value)
{
	SieveHashNode node;
	node.key = key;
	node.value = value;
	
	SieveHashAddNode(table, &node);
}

static inline void SieveHashAddUniqueKey(SieveHash * table, const char * key,  void * value)
{
	SieveHashNode node;
	node.key = key;
	node.value = value;
	
	SieveHashAddUniqueNode(table, &node);
}

static inline void SieveHashAddUniqueNode(SieveHash * table, SieveHashNode * node)
{
	unsigned int hash = SieveHashArrayHash(table, node->key);
	
	SieveHashAddUniqueNodePreHash(table, node, hash);
}


static inline void SieveHashAddUniqueNodePreHash(SieveHash * table, SieveHashNode * node, unsigned int hash)
{
	int ii;
	for (ii = 0; ii < table->nArrays; ++ii)
	{
		CHashArray * curArray = &table->arrays[ii];
		
		unsigned int bucket = hash  % curArray->capacity;
		
		SieveHashNode * onode = &curArray->data[bucket];
		
		if (onode->key)
		{	
			//If Capacity is more than LOAD_FACTOR resize
			if (SieveHashShouldRehash(table, ii))
			{
				SieveHashDoubleCapacity(table, ii);
				SieveHashAddUniqueNodePreHash(table, node, hash);
				return;
			}
			//If were in the last table and none are near capacity then we create a new one
			else if (ii == table->nArrays-1)
			{
				SieveHashSetNArrays(table, table->nArrays+1);
			}
		}
		//No colission, simple insert it
		else
		{	
			
			++curArray->count;
			*onode = *node;
			return;
		}
	}
	
}


static inline void SieveHashAddNode(SieveHash * table, SieveHashNode * node)
{
	unsigned int hash = SieveHashArrayHash(table, node->key);
	
	int ii;
	for (ii = 0; ii < table->nArrays; ++ii)
	{		
		CHashArray * curArray = &table->arrays[ii];
		
		unsigned int bucket = hash  % curArray->capacity;
		
		SieveHashNode * onode = &curArray->data[bucket];
		
		//A collission occured
		if (onode->key)
		{			
			//If the node is equal then do nothing
			if (SieveHashNodeIsEqualToKey(onode, node->key))
			{
				return;
			}
		}
	}
	
	SieveHashAddUniqueNodePreHash(table, node, hash);
}

#ifdef __cplusplus
}
#endif

#endif
