/* %Z%%M% %I% %E% */

/*
 * Copyright (c) 1990-2005 by Wayne C. Gramlich.
 * All rights reserved.
 */

/* An implementation of a hash table (set) data abstraction: */

#ifndef HASH_DEFS_H
#include "hash_defs.h"
#endif

#ifndef HEAP_EXPORTS_H
#include "heap_exports.h"
#endif

#ifndef STR_EXPORTS_H
#include "str_exports.h"
#endif

#ifndef UNIX_MEMORY_H
#include "unix_memory.h"
#endif

#ifndef VECTOR_DEFS_H
#include "vector_defs.h"
#endif

extern int	hsh_insert_replace(Hsh, Hsh_value, int /*replace*/);
extern void	hsh_rehash(Hsh, Hsh_value);
extern void	hsh_bucket_delete(Hsh_header, int);
extern void	hsh_bucket_append(Hsh_header, int, Hsh_value, Heap);

/* LINTLIBRARY */

/*
 * hsh_client_data(hash)
 *	This routine will return the client data (i.e. handle) for "hash".
 */
Hsh_value
hsh_client_data(
	Hsh		hash)
{
	return hash->handle;
}

/*
 * hsh_create(size, routine_equal, routine_hash, handle, heap)
 *	This routine will create and return a new hash object.  "size"
 *	specifies the initial minimum number of hash slots.  "routine_hash"
 *	is a routine that given an object will return a hash value.
 *	"routine_equal" is a routine that given two objects will return
 *	1 if they are equal and 0 otherwise.  "handle" is passed as the
 *	last argument to "routine_hash" and "routine_equal".  In addition,
 *	for "handle" is returned by hsh_lookup whenever an object can
 *	not be found.  "heap" is used to allocate the new hash object.
 */
Hsh
hsh_create(
	int			size,
	Hsh_routine_equal	routine_equal,
	Hsh_routine_hash	routine_hash,
	Hsh_value		handle,
	Heap			heap)
{
	Hsh			hash;
	int			power;
	int			length;
	Hsh_header		headers;

	for (power = 0, length = 1; length < size; power++, length <<= 1)
		;

	hash = heap_allocate(heap, Hsh);
	hash->handle = handle;
	headers = (Hsh_header)heap_alloc_zeroed(heap,
				    sizeof(Hsh_header_rec) * length);
	hash->headers = headers;
	hash->heap = heap;
	hash->length = length;
	hash->limit = (length >> 1) + (length >> 2);	/* 75% */
	hash->mask = length - 1;
	hash->min_power = power;
	hash->power = power;
	hash->routine_equal = routine_equal;
	hash->routine_hash = routine_hash;
	hsh_erase(hash);
	return hash;
}

/*
 * hsh_delete(hash, value)
 *	This routine will remove "value" from "hash".  1 is returned if
 *	"value" was not present in "hash" and 0 otherwise.
 */
int
hsh_delete(
	Hsh			hash,
	Hsh_value		value)
{
	Hsh_bucket		bucket;
	int			count;
	Hsh_value		handle;
	Hsh_header		header;
	int			index;
	Hsh_routine_equal	routine_equal;

	handle = hash->handle;
	index = hash->routine_hash(value, handle);
	header = hash->headers + (index & hash->mask);
	if (header->rehash != hash->power) {
		hsh_rehash(hash, index);
	}
	routine_equal = hash->routine_equal;
	for (count = header->size, bucket = header->buckets;
	     count > 0; count--, bucket++) {
		if ((bucket->index == index) &&
		    (routine_equal(value, bucket->value, handle))) {
			hsh_bucket_delete(header, (int)(header->size - count));
			hash->size--;
			return 0;
		}
	}
	return 1;
}

/*
 * hsh_destroy(hash)
 *	This routine will release all of the storage associated with "hash".
 */
void
hsh_destroy(
	Hsh		hash)
{
	hsh_erase(hash);
	heap_free(hash->heap, (Pointer)hash->headers);
	heap_free(hash->heap, (Pointer)hash);
}

/*
 * hsh_erase(hash)
 *	This routine will remove all entries from "hash".
 */
void
hsh_erase(
	Hsh		hash)
{
	int		count;
	Hsh_header	header;
	Hsh_header_rec	header_zero;
	Heap		heap;

	header_zero.buckets = (Hsh_bucket)0;
	header_zero.limit = 0;
	header_zero.size = 0;
	header_zero.rehash = hash->power;
	heap = hash->heap;
	for (count = hash->length, header = hash->headers;
	     count > 0; count--, header++) {
		if (header->buckets != (Hsh_bucket)0) {
			heap_free(heap, (Pointer)header->buckets);
		}
		*header = header_zero; /* Copy entire contents */
	}
	hash->size = 0;
}

/*
 * hsh_exists(hash, value)
 *	This routine will return 1 if "value" is in "hash" and 0 otherwise.
 */
int
hsh_exists(
	Hsh			hash,
	Hsh_value		value)
{
	Hsh_bucket		bucket;
	int			count;
	Hsh_value		handle;
	Hsh_header		header;
	int			index;
	Hsh_routine_equal	routine_equal;

	handle = hash->handle;
	index = hash->routine_hash(value, handle);
	header = hash->headers + (index & hash->mask);
	if (header->rehash != hash->power) {
		hsh_rehash(hash, index);
	}
	routine_equal = hash->routine_equal;
	for (count = header->size, bucket = header->buckets;
	     count > 0; count--, bucket++) {
		if ((bucket->index == index) &&
		    routine_equal(value, bucket->value, handle)) {
			return 1;
		}
	}
	return 0;
}

/*
 * hsh_iterate(hash, routine, handle)
 *	This routine will iterate over each value in "hash" calling
 *	"routine(value, handle, hash->handle)=>int" for each value.
 *	The sum of the values returned from "routine" is returned. O(n)
 */
int
hsh_iterate(
	Hsh			hash,
	Hsh_routine_iterate	routine,
	Hsh_value		handle)
{
	Hsh_bucket		bucket;
	int			count;
	int			counter;
	Hsh_header		header;
	int			sum;

	sum = 0;
	for (count = hash->size, header = hash->headers;
	     count > 0; header++) {
		for (counter = header->size, bucket = header->buckets;
		     counter > 0; count--, counter--, bucket++) {
			sum += routine(bucket->value, handle);
		}
	}
	return sum;
}

/*
 * hsh_insert(hash, value)
 *	This routine will insert "value" into "hash" provided that "value
 *	is not already in "hash".  1 is returned if "value" was already in
 *	"hash" and 0 otherwise. O(k)
 */
int
hsh_insert(
	Hsh		hash,
	Hsh_value	value)
{
	return hsh_insert_replace(hash, value, 0);
}

/*
 * hsh_insert_replace(hash, value, replace)
 *	If "replace" is 1, this routine will replace "value" in "hash";
 *	otherwise, "value" will be inserted into "hash".
 */
int
hsh_insert_replace(
	Hsh			hash,
	Hsh_value		value,
	int			replace)
{
	int			bytes;
	Hsh_bucket		bucket;
	int			count;
	Hsh_value		handle;
	Hsh_header		header;
	int			index;
	Hsh_routine_equal	routine_equal;

	/* Grow header table if necessary: */
	if (hash->size >= hash->limit) {
		bytes = hash->length * sizeof(Hsh_header_rec);
		hash->limit += hash->length;
		hash->length <<= 1;
		hash->power++;
		hash->mask = hash->length - 1;
		hash->headers = (Hsh_header)heap_realloc(hash->heap,
					(Pointer)hash->headers,
					bytes + bytes);
		memset(((Str)hash->headers) + bytes, (char)0, bytes);
	}
	handle = hash->handle;
	index = hash->routine_hash(value, handle);
	header = hash->headers + (index & hash->mask);
	if (header->rehash != hash->power) {
		hsh_rehash(hash, index);
	}
	routine_equal = hash->routine_equal;
	for (count = header->size, bucket = header->buckets;
	     count > 0; count--, bucket++) {
		if ((bucket->index == index) &&
		    routine_equal(value, bucket->value, handle)) {
			if (replace) {
				bucket->value = value;
				return 0;
			} else  {
				return 1;
			}
		}
	}
	/* Not found! */
	hsh_bucket_append(header, index, value, hash->heap);
	hash->size++;
	return replace;
}

/*
 * hsh_lookup(hash, value)
 *	This routine will return the "value" from "hash".  If "value" is
 *	not in "hash", the default handle (specified as an argument to
 *	hsh_create) will be returned. O(k)
 */
Hsh_value
hsh_lookup(
	Hsh			hash,
	Hsh_value		value)
{
	Hsh_bucket		bucket;
	int			count;
	Hsh_value		handle;
	Hsh_header		header;
	int			index;
	Hsh_routine_equal	routine_equal;

	handle = hash->handle;
	index = hash->routine_hash(value, handle);
	header = hash->headers + (index & hash->mask);
	if (header->rehash != hash->power) {
		hsh_rehash(hash, index);
	}
	routine_equal = hash->routine_equal;
	for (count = header->size, bucket = header->buckets;
	     count > 0; count--, bucket++) {
		if ((bucket->index == index) &&
		    routine_equal(value, bucket->value, handle)) {
			return bucket->value;
		}
	}
	return handle;
}

/*
 * hsh_rehash(hash, index)
 *	This routine will ensure that all of the buckets at "index"
 *	in "hash" are up-to-date.
 */
void
hsh_rehash(
	Hsh		hash,
	int		index)
{
	Hsh_bucket	bucket;
	int		count;
	Hsh_header	header;
	Hsh_header	headers;
	Heap		heap;
	int		mask;
	int		max_mask;
	int		min_power;
	int		power;
	Hsh_header	previous;

	headers = hash->headers;
	heap = hash->heap;
	index &= hash->mask;
	max_mask = hash->mask;
	min_power = hash->min_power;
	previous = (Hsh_header)0;
	for (power = hash->power, mask = max_mask;
	     power >= min_power; power--, mask >>= 1) {
		header = headers + (index & mask);
		if (header == previous) {
			continue;
		}
		previous = header;
		bucket = header->buckets;
		for (count = header->size - 1; count >= 0; count--) {
			if ((bucket->index & max_mask) != (index & mask)) {
				/* Bucket is on wrong chain -- move it */
				hsh_bucket_append(
					headers + (bucket->index & max_mask),
					bucket->index,
					bucket->value,
					heap);
				/* Copy last chain bucket over moved bucket */
				if (count > 0) {
					*bucket = bucket[count];
				}
				header->size--;
			} else {
				bucket++;
			}
		}
	}
	header = headers + index;
	header->rehash = hash->power;
}

/*
 * hsh_replace(hash, value)
 *	This routine will store "value" into "hash" replacing any previous
 *	value.  1 is returned if "value" was not already in "hash" and 0
 *	otherwise. O(k)
 */
int
hsh_replace(
	Hsh		hash,
	Hsh_value	value)
{
	return hsh_insert_replace(hash, value, 1);
}

/*
 * hsh_size(hash)
 *	This routine will return the size of "hash".
 */
int
hsh_size(
	Hsh		hash)
{
	return hash->size;
}

/*
 * hsh_vector_extract(hash, vector)
 *	This routine will take each value from "hash" and append it to
 *	the end of "vector".  The number of values appended to "vector"
 *	is returned.
 */
int
hsh_vector_extract(
	Hsh		hash,
	Vektor		vector)
{
	Hsh_bucket	bucket;
	int		count;
	int		counter;
	Hsh_header	header;
	int		total;

	total = 0;
	for (count = hash->size, header = hash->headers;
	     count > 0; header++) {
		for (counter = header->size, bucket = header->buckets;
		     counter > 0; count--, counter--, bucket++) {
			vector_append(vector, (Vector_datum)bucket->value);
			total++;
		}
	}
	return total;
}

/*
 * hsh_vector_insert(hash, vector)
 *	This routine will take each element from "vector" and insert it
 *	into "hash".  The number of inserted elements is returned. O(n)
 */
int
hsh_vector_insert(
	Hsh		hash,
	Vektor		vector)
{
	int		count;
	Hsh_value	value;

	count = 0;
	VEC_LOOP(Hsh_value, vector, value) {
		if (!hsh_insert(hash, value)) {
			count++;
		}
	}
	return count;
}

/*
 * hsh_vector_replace(hash, vector)
 *	This routine will take each element from "vector" and replace it
 *	in "hash".  The number of replaced elements is returned. O(n)
 */
int
hsh_vector_replace(
	Hsh		hash,
	Vektor		vector)
{
	int		count;
	Hsh_value	value;

	count = 0;
	VEC_LOOP(Hsh_value, vector, value) {
		if (!hsh_replace(hash, value)) {
			count++;
		}
	}
	return count;
}


/*
 * hsh_bucket_delete(header, offset)
 *	This routine will delete the "offset"'th bucket from "header"
 */
void
hsh_bucket_delete(
	Hsh_header	header,
	int		offset)
{
	header->size--;
	if (offset < (int)header->size) {
		*(header->buckets + offset) = /* Copy contents */
			*(header->buckets + header->size);
	}
}

/*
 * hsh_bucket_append(header, index, value, heap)
 *	This routine will append a bucket containing "index" and "value"
 *	to the end of "header"->buckets.
 */
void
hsh_bucket_append(
	Hsh_header	header,
	int		index,
	Hsh_value	value,
	Heap		heap)
{
	Hsh_bucket	buckets;

	if (header->size == 0) {
		buckets = (Hsh_bucket)heap_alloc(heap,
						 (int)sizeof(Hsh_bucket_rec));
		header->limit = 1;
		header->buckets = buckets;
	} else if ((int)header->size >= (int)(1 << (header->limit-1))) {
		buckets = (Hsh_bucket)heap_realloc(heap,
			(Pointer)header->buckets,
			(int)((1 << header->limit) * sizeof(Hsh_bucket_rec)));
		header->limit++;
		header->buckets = buckets;
	} else {
		buckets = header->buckets;
	}
	buckets += header->size;
	header->size++;
	buckets->index = index;
	buckets->value = value;
}

