/*	set.c

	The following is a general-purpose set library originally developed
	by Hank Dietz and enhanced by Terence Parr to allow dynamic sets.
	
	Sets are now structs containing the #words in the set and
	a pointer to the actual set words.
	
	Generally, sets need not be explicitly allocated.  They are
	created/extended/shrunk when appropriate (e.g. in set_of()).
	HOWEVER, sets need to be destroyed (free()ed) when they go out of scope
	or are otherwise no longer needed.  A routine is provided to
	free a set.
	
	Sets can be explicitly created with set_new(s, max_elem).
	
	Sets can be declared to have minimum size to reduce realloc traffic.
	Default minimum size = 1.
	
	Sets can be explicitly initialized to have no elements (set.n == 0)
	by using the 'empty' initializer:
	
	Examples:
		set a = empty;	-- set_deg(a) == 0
		
		return( empty );
	
	Example set creation and destruction:
	
	set
	set_of2(e,g)
	unsigned e,g;
	{
		set a,b,c;
		
		b = set_of(e);		-- Creates space for b and sticks in e
		set_new(c, g);		-- set_new(); set_orel() ==> set_of()
		set_orel(g, &c);
		a = set_or(b, c);
		.
		.
		.
		set_free(b);
		set_free(c);
		return( a );
	}

	1987 by Hank Dietz
	
	Modified by:
		Terence Parr
		Purdue University
		October 1989
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef MEMCHK
#include "trax.h"
#endif
#include "set.h"

/* elems can be a maximum of 32 bits */
static unsigned bitmask[] = {
	0x00000001, 0x00000002, 0x00000004, 0x00000008,
	0x00000010, 0x00000020, 0x00000040, 0x00000080,
	0x00000100, 0x00000200, 0x00000400, 0x00000800,
	0x00001000, 0x00002000, 0x00004000, 0x00008000,
	0x00010000, 0x00020000, 0x00040000, 0x00080000,
	0x00100000, 0x00200000, 0x00400000, 0x00800000,
	0x01000000, 0x02000000, 0x04000000, 0x08000000,
	0x10000000, 0x20000000, 0x40000000, 0x80000000
};

set empty = set_init;
static unsigned min=1;

#define StrSize		200

#ifdef MEMCHK
#define CHK(a)					\
	if ( a.setword != NULL )	\
	  if ( !valid(a.setword) )	\
		{fprintf(stderr, "%s(%d): invalid set\n",__FILE__,__LINE__); exit(-1);}
#else
#define CHK(a)
#endif

/*
 * Set the minimum size (in words) of a set to reduce realloc calls
 */
void
set_size(n)
unsigned n;
{
	min = n;
}

unsigned int
set_deg(a)
set a;
{
	/* Fast compute degree of a set... the number
	   of elements present in the set.  Assumes
	   that all word bits are used in the set
	   and that SETSIZE(a) is a multiple of WORDSIZE.
	*/
	register unsigned *p = &(a.setword[0]);
	register unsigned *endp = &(a.setword[a.n]);
	register unsigned degree = 0;

	CHK(a);
	if ( a.n == 0 ) return(0);
	while ( p < endp )
	{
		register unsigned t = *p;
		register unsigned *b = &(bitmask[0]);
		do {
			if (t & *b) ++degree;
		} while (++b < &(bitmask[WORDSIZE]));
		p++;
	}

	return(degree);
}

set
set_or(b,c)
set b, c;
{
	/* Fast set union operation */
	/* resultant set size is max(b, c); */
	set *big;
	set t;
	unsigned int m,n;
	register unsigned *r, *p, *q, *endp;

	CHK(b); CHK(c);
	t = empty;
	if (b.n > c.n) {big= &b; m=b.n; n=c.n;} else {big= &c; m=c.n; n=b.n;}
	set_ext(&t, m);
	r = t.setword;

	/* Or b,c until max of smaller set */
	q = c.setword;
	p = b.setword;
	endp = &(b.setword[n]);
	while ( p < endp ) *r++ = *p++ | *q++;	

	/* Copy rest of bigger set into result */
	p = &(big->setword[n]);
	endp = &(big->setword[m]);
	while ( p < endp ) *r++ = *p++;

	return(t);
}

set
set_and(b,c)
set b, c;
{
	/* Fast set intersection operation */
	/* resultant set size is min(b, c); */
	set t;
	unsigned int n;
	register unsigned *r, *p, *q, *endp;

	CHK(b); CHK(c);
	t = empty;
	n = (b.n > c.n) ? c.n : b.n;
	set_ext(&t, n);
	r = t.setword;

	/* & b,c until max of smaller set */
	q = c.setword;
	p = b.setword;
	endp = &(b.setword[n]);
	while ( p < endp ) *r++ = *p++ & *q++;	

	return(t);
}

set
set_dif(b,c)
set b, c;
{
	/* Fast set difference operation b - c */
	/* resultant set size is size(b) */
	set t;
	unsigned int n;
	register unsigned *r, *p, *q, *endp;

	CHK(b); CHK(c);
	t = empty;
	n = (b.n <= c.n) ? b.n : c.n ;
	set_ext(&t, b.n);
	r = t.setword;

	/* Dif b,c until smaller set size */
	q = c.setword;
	p = b.setword;
	endp = &(b.setword[n]);
	while ( p < endp ) *r++ = *p++ & (~ *q++);	

	/* Copy rest of b into result if size(b) > c */
	if ( b.n > n )
	{
		p = &(b.setword[n]);
		endp = &(b.setword[b.n]);
		while ( p < endp ) *r++ = *p++;
	}

	return(t);
}

set
set_of(b)
unsigned b;
{
	/* Fast singleton set constructor operation */
	static set a;

	if ( b == nil ) return( empty );
	set_new(a, b);
	a.setword[DIVWORD(b)] = bitmask[MODWORD(b)];

	return(a);
}

/*
 * Extend (or shrink) the set passed in to have n words.
 *
 * if n is smaller than the minimum, boost n to have the minimum.
 * if the new set size is the same as the old one, do nothing.
 */
void
set_ext(a, n)
set *a;
unsigned int n;
{
	register unsigned *p;
	register unsigned *endp;
	unsigned int size;
	
	CHK((*a));
    if ( a->n == 0 )
    {
        a->setword = (unsigned *) calloc(n, BytesPerWord);
        if ( a->setword == NULL )
        {
            fprintf(stderr, "set_ext(%d words): cannot allocate set\n", n);
			*((char *)5) = 1;
            exit(-1);
        }
        a->n = n;
        return;
    }
	if ( n < min ) n = min;
	if ( a->n == n || n == 0 ) return;
	size = a->n;
	a->n = n;
	a->setword = (unsigned *) realloc( a->setword, (n*BytesPerWord) );
	if ( a->setword == NULL )
	{
		fprintf(stderr, "set_ext(%d words): cannot allocate set\n", n);
		exit(-1);
	}

	p    = &(a->setword[size]);		/* clear from old size to new size */
	endp = &(a->setword[a->n]);
	do {
		*p++ = 0;
	} while ( p < endp );
}

set
set_not(a)
set a;
{
	/* Fast not of set a (assumes all bits used) */
	/* size of resultant set is size(a) */
	/* ~empty = empty cause we don't know how bit to make set */
	set t;
	register unsigned *r;
	register unsigned *p = a.setword;
	register unsigned *endp = &(a.setword[a.n]);

	CHK(a);
	t = empty;
	if ( a.n == 0 ) return( empty );
	set_ext(&t, a.n);
	r = t.setword;
	
	do {
		*r++ = (~ *p++);
	} while ( p < endp );

	return(t);
}

int
set_equ(a,b)
set a, b;
{
	/* Fast set equality comparison operation */
	register unsigned *p = a.setword;
	register unsigned *q = b.setword;
	register unsigned *endp = &(a.setword[a.n]);

	CHK(a); CHK(b);
	if ( a.n != b.n ) return(0);
	else if ( a.n==0 ) return(1);	/* empty == empty */
	
	do {
		if (*p != *q) return(0);
		++q;
	} while ( ++p < endp );

	return(1);
}

int
set_sub(a,b)
set a, b;
{
	/* Fast check for a is a proper subset of b (alias a < b) */
	register unsigned *p = a.setword;
	register unsigned *q = b.setword;
	register unsigned *endp = &(a.setword[a.n]);
	register int asubset = 0;

	CHK(a); CHK(b);
	if ( a.n > b.n ) return(0);
	if ( a.n == 0 ) return(1);		/* empty is sub of everything */
	if (a.n==0 && b.n==0) return(1);/* empty prop sub of empty */

	do {
		/* Prune tests based on guess that most set words
		   will match, particularly if a is a subset of b.
		*/
		if (*p != *q) {
			if (*p & ~(*q)) {
				/* Fail -- a contains something b does not */
				return(0);
			}
			/* At least this word was a proper subset, hence
			   even if all other words are equal, a is a
			   proper subset of b.
			*/
			asubset = 1;
		}
		++q;
	} while (++p < endp);

	/* at this point, a,b are equ or a subset */
	if ( asubset || b.n == a.n ) return(asubset);
	
	/* if !asubset, size(b) > size(a), then a=b and must check rest of b */
	p = q;
	endp = &(a.setword[b.n]);
	do
	{
		if ( *p++ ) return(1);
	} while ( p < endp );

	return(0);
}

unsigned
set_int(b)
set b;
{
	/* Fast pick any element of the set b */
	register unsigned *p = b.setword;
	register unsigned *endp = &(b.setword[b.n]);

	CHK(b);
	if ( b.n == 0 ) return( nil );

	do {
		if (*p) {
			/* Found a non-empty word of the set */
			register unsigned i = ((p - b.setword) << LogWordSize);
			register unsigned t = *p;
			p = &(bitmask[0]);
			while (!(*p & t)) {
				++i; ++p;
			}
			return(i);
		}
	} while (++p < endp);

	/* Empty -- only element it contains is nil */
	return(nil);
}

int
set_el(b,a)
unsigned b;
set a;
{
	CHK(a);
	/* nil is an element of every set */
	if (b == nil) return(1);
	if ( a.n == 0 || NumWords(b) > a.n ) return(0);
	
	/* Otherwise, we have to check */
	return( a.setword[DIVWORD(b)] & bitmask[MODWORD(b)] );
}

int set_nil(a)
set a;
{
	/* Fast check for nil set */
	register unsigned *p = a.setword;
	register unsigned *endp = &(a.setword[a.n]);

	CHK(a);
	if ( a.n == 0 ) return(1);
	/* The set is not empty if any word used to store
	   the set is non-zero.  This means one must be a
	   bit careful about doing things like negation.
	*/
	do {
		if (*p) return(0);
	} while (++p < endp);
	
	return(1);
}

char *
set_str(a)
set a;
{
	/* Fast convert set a into ASCII char string...
	   assumes that all word bits are used in the set
	   and that SETSIZE is a multiple of WORDSIZE.
	   Trailing 0 bits are removed from the string.
	   if no bits are on or set is empty, "" is returned.
	*/
	register unsigned *p = a.setword;
	register unsigned *endp = &(a.setword[a.n]);
	static char str_tmp[StrSize+1];
	register char *q = &(str_tmp[0]);

	CHK(a);
	if ( a.n==0 ) {*q=0; return( &(str_tmp[0]) );}
	do {
		register unsigned t = *p;
		register unsigned *b = &(bitmask[0]);
		do {
			*(q++) = ((t & *b) ? '1' : '0');
		} while (++b < &(bitmask[WORDSIZE]));
	} while (++p < endp);

	/* Trim trailing 0s & NULL terminate the string */
	while ((q > &(str_tmp[0])) && (*(q-1) != '1')) --q;
	*q = 0;

	return(&(str_tmp[0]));
}

set
set_val(s)
register char *s;
{
	/* Fast convert set ASCII char string into a set.
	   If the string ends early, the remaining set bits
	   are all made zero.
	   The resulting set size is just big enough to hold all elements.
	*/
	static set a;
	register unsigned *p, *endp;

	set_new(a, strlen(s));
	p = a.setword;
	endp = &(a.setword[a.n]);
	do {
		register unsigned *b = &(bitmask[0]);
		/* Start with a word with no bits on */
		*p = 0;
		do {
			if (*s) {
				if (*s == '1') {
					/* Turn-on this bit */
					*p |= *b;
				}
				++s;
			}
		} while (++b < &(bitmask[WORDSIZE]));
	} while (++p < endp);

	return(a);
}

/*
 * Or element e into set a.  a can be empty.
 */
void
set_orel(e, a)
unsigned e;
set *a;
{
	CHK((*a));
	if ( e == nil ) return;
	if ( NumWords(e) > a->n ) set_ext(a, NumWords(e));
	a->setword[DIVWORD(e)] |= bitmask[MODWORD(e)];
}

/*
 * Or set b into set a.  a can be empty. does nothing if b empty.
 */
void
set_orin(a, b)
set *a, b;
{
	/* Fast set union operation */
	/* size(a) is max(a, b); */
	unsigned int m;
	register unsigned *p,
					  *q    = b.setword,
					  *endq = &(b.setword[b.n]);

	CHK((*a)); CHK(b);
	if ( b.n == 0 ) return;
	m = (a->n > b.n) ? a->n : b.n;
	set_ext(a, m);
	p = a->setword;
	do {
		*p++ |= *q++;
	} while ( q < endq );
}

void
set_rm(e,a)		/* removes elem arg1 from set arg2 */
unsigned e;
set a;
{
	/* Does not effect size of set */
	CHK(a);
	if ( (e == nil) || (NumWords(e) > a.n) ) return;
	a.setword[DIVWORD(e)] ^= (a.setword[DIVWORD(e)]&bitmask[MODWORD(e)]);
}

void
set_clr(a)		/* clears all elems of set arg1 */
set a;
{
	/* Does not effect size of set */
	register unsigned *p = a.setword;
	register unsigned *endp = &(a.setword[a.n]);
	
	CHK(a);
	if ( a.n == 0 ) return;
	do {
		*p++ = 0;
	} while ( p < endp );
}

set
set_dup(a)
set a;
{
	set b;
	register unsigned *p,
					  *q    = a.setword,
					  *endq = &(a.setword[a.n]);
	
	CHK(a);
	b = empty;
	if ( a.n == 0 ) return( empty );
	set_ext(&b, a.n);
	p = b.setword;
	do {
		*p++ = *q++;
	} while ( q < endq );
	
	return(b);
}

/*
 * Return a nil terminated list of unsigned ints that represents all
 * "on" bits in the bit set.
 *
 * e.g. {011011} --> {1, 2, 4, 5, nil}
 *
 * set_PDQ and set_pdq are useful when an operation is required on each element
 * of a set.  Normally, the sequence is:
 *
 *		while ( set_deg(a) > 0 ) {
 *			e = set_int(a);
 *			set_rm(e, a);
 *			...process e...
 *		}
 * Now,
 *
 *		t = e = set_pdq(a);
 *		while ( *e != nil ) {
 *			...process *e...
 *			e++;
 *		}
 *		free( t );
 *
 * We have saved many set calls and have not destroyed set a.
 */
void set_PDQ(a, q)
set a;
register unsigned *q;
{
	register unsigned *p = a.setword,
					  *endp = &(a.setword[a.n]);
	register unsigned e=0;

	CHK(a);
	/* are there any space (possibility of elements)? */
	if ( a.n == 0 ) return;
	do {
		register unsigned t = *p;
		register unsigned *b = &(bitmask[0]);
		do {
			if ( t & *b ) *q++ = e;
			e++;
		} while (++b < &(bitmask[WORDSIZE]));
	} while (++p < endp);
	*q = nil;
}

/*
 * Same as set_PDQ except allocate memory.  set_pdq is the natural function
 * to use.
 */
unsigned *
set_pdq(a)
set a;
{
	unsigned *q;
	int max_deg;
	
	CHK(a);
	max_deg = WORDSIZE*a.n;
	/* assume a.n!=0 & no elements is rare, but still ok */
	if ( a.n == 0 ) return(NULL);
	q = (unsigned *) malloc((max_deg+1)*BytesPerWord);
	if ( q == NULL ) return( NULL );
	set_PDQ(a, q);
	return( q );
}

/* a function that produces a hash number for the set
 */
int set_hash(a,mod)
set a;
register int mod;
{
	/* Fast hash of set a (assumes all bits used) */
	register unsigned *p = &(a.setword[0]);
	register unsigned *endp = &(a.setword[a.n]);
	register int i= 0;

	CHK(a);
	while (p<endp){
		i = ((i + i + (i<0)) ^ (~ *p));
		p++;
	}

	return(((i< 0) ? -i : i) % mod);
}
