// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/common/gc/hash_table.cpp,v 1.1.1.1 2001/07/23 07:25:39 xli18 Exp $
//

#include "platform.h"
#include <iostream.h>
#include "hash_table.h"
// Pick up InterlockedCompareExchangePointer.
#include "orp_synch.h"
 
#include "orp_for_gc.h"

#include <string.h>
 
//
// Hash codes are unsigned ints which are 32 bit quantities on both ia32 and ia64.
// This naturally limits the size of hash tables to sizes that will fit in 32 bits.
//

// RLH-TRAIN Are these really primes.. This limits the size of a remset.
unsigned primes [] = {2017,
                      5501, 
                      10091, 
                      20021, 
                      40009, 
                      80021, 
                      160001, 
                      320009, 
                      640007, 
                      1280023,
                      2560037,
                      5120053    };

const int NUMBER_OF_PRIMES     = 12;

const double HASH_TABLE_THRESHOLD = 0.6;

Hash_Table::Hash_Table()
{
    _prime_index = 0;

    _size_in_entries = primes[_prime_index++];

    double threshold = HASH_TABLE_THRESHOLD;

    _resident_count       = 0;
#if (GC_DEBUG>3)
    _maximum_resident_count = 0;
#endif
    _size_in_bytes        = sizeof(void *) * _size_in_entries;
    _threshold_entries    = (unsigned int)(_size_in_entries * threshold);

    if ((_table = (volatile void **)malloc(_size_in_bytes)) == NULL) {
        DWORD LastError = GetLastError();
        fprintf(stderr,
                "Error: malloc failed when creating hash table "
                "%d (0x%x)\n",
                LastError, LastError);
        assert(LastError);
    }

    memset(_table, 0, _size_in_bytes);

#if (GC_DEBUG>2)
    //
    // This state will be used to ensure that nested operations
    // on this hash table don't subvert each other.
    //
    _table_state = empty;
#endif // _DEBUG
//    orp_cout << "Created hash_table " << (void *)this << endl;
//OBSOLETE	InitializeCriticalSection(&_critical_section);
    return;
}

Hash_Table::~Hash_Table()
/* Discard this hash table. */
{
#if (GC_DEBUG>2)
    //
    // Shouldn't destroy a table in the middle of a scan.
    //
    assert(_table_state != scanning);
#endif // _DEBUG

#if 0 //(GC_DEBUG>3)
    cout << "Deleted a hash table with residency ";
    cout << _maximum_resident_count << endl;
#endif

//    orp_cout << "Freeing hash_table " << (void *)this << endl;
//OBSOLETE	DeleteCriticalSection(&_critical_section);
    free(_table);
}

//
// Add an entry into this hash table, if it doesn't already exist.
//
unsigned
Hash_Table::add_entry(void *address)
{
	//orp_cout << "Adding entry " << address << endl;

    // Before we add the entry, if we might possible overflow
    // extend. Once passed this extent point we know we have
    // enough room for this entry...
    if (_resident_count > _threshold_entries) {
        _extend();
    }
	//
	// Adding a null entry is illegal, since we can't distinguish
	// it from an empty slot.
	//
    assert(address != NULL);
	//
	// Obtain the hash associated with this entry.
	//
    unsigned int hash_code = _do_rs_hash((POINTER_SIZE_INT)address,
                                         _size_in_entries);

    volatile void *target = _table[hash_code];

    if (target == address) {  // already there
        return hash_code;
    }
    //
    // Beyond this point, the table will get modified.
    //
#if (GC_DEBUG>2)
    //
    // Make sure this addition won't destroy a scan in progress.
    //
    assert(_table_state != scanning);
    //
    // Record that this hash table has just been modified.
    //
    _table_state = modified;
#endif // _DEBUG
    // The code that was not thread safe simple did  _table[hash_code] = address;
    if (target == NULL) { // empty: try to insert in a thread safe way.

#ifdef GC_SAPPHIRE
        if (InterlockedCompareExchangePointer((void **)&(_table[hash_code]), address, NULL) == NULL) {
            // This slot was not taken before we could get to it, great, return.
            _resident_count++;
            return hash_code;
        }
#else
        // This is not thread safe but putting things in remsets is only
        // done while holding the gc_lock.
        _table[hash_code] = address;
        _resident_count++;
        return hash_code;
#endif // GC_THREAD_SAFE_REMSET
    }

#if 0 
    // done above.
	//
	// Check if we have grown too large for our current table size.
    // This is moved down here for speed since we assume that
    // the check can be eliminated if we find an empty slow immediately.
	//
    if (_resident_count > _threshold_entries) {
		//
		// Yes - increase it.
		//
#if (GC_DEBUG>2)
        //
        // Make sure we aren't disrupting a scan in progress.
        //
        assert(_table_state != scanning);
#endif
        _extend(); // Is this thread safe??? I don't think so...
        // Always rehash after doing an extend.
        hash_code = _do_rs_hash((POINTER_SIZE_INT)address,
                                         _size_in_entries);
    }
#endif // 0

    while (TRUE) {
        // This loop is guaranteed to terminate since our residency
        // rate is guaranteed to be less than 90%
        hash_code = (hash_code + 1) % _size_in_entries;
#if (GC_DEBUG>3)
		void *p_viewme2 = _table[hash_code];
#endif // _DEBUG
        if (address == _table[hash_code]) { // hit
            return hash_code;
        }

        if (_table[hash_code] == NULL) {// empty slot for now
#ifdef GC_SAPPHIRE
            // Thread unsafe code does _table[hash_code] = address;
            if (InterlockedCompareExchangePointer((void **)&(_table[hash_code]), address, NULL) == NULL) {
                // This slot was not taken before we could get to it, great, return.
                _resident_count++;
                return hash_code;
            }
#else
            // This is not thread safe but putting things in remsets is only
            // done while holding the gc_lock.
            _table[hash_code] = address;
            _resident_count++;
            return hash_code;
#endif 
        }
    }
}

//
// An entry at location hash_code has just been deleted. We need
// to scan to the next zero entry and rehash every intervening
// entry so that this new zero entry doesn't confuse subsequent
// add_entries from creating duplicates. This needs to be done
// in a batch, (delete all, then rehash all) to minimize complexity.
//
void
Hash_Table::_rehash(unsigned int hash_code)
{
//	orp_cout << " Rehashing entry " << hash_code << endl;
    volatile void *address = _table[hash_code];
    //
    // Since we start scanning at the freshly deleted
    // slot, we have to accomodate an initial zero.
    //
    if (address!=0) {
//		orp_cout << " In rehash removing " << address << endl;
        _resident_count--;
        _table[hash_code] = 0;
    }
    //
    // Hitting a zero at the next entry indicates that
    // we have scanned far enough. This is guaranteed to
    // terminate since we rehash only , and immediately
    // after, a deletion. (Beyond that, our residency 
    // rate is never near 100% anyway.)
    //
    unsigned int next_entry = (hash_code + 1) % _size_in_entries;
    if (_table[next_entry]!=0) {
        _rehash(next_entry);
    }
    //
    // On the unrecursion path, do the re-insertion of
    // the address that we saved.
    //
    if (address!=0) {
        add_entry((void *)address);
    }
}

//
// Remove an entry from this hash table.
//
int
Hash_Table::delete_entry(void *address)
{
#if (GC_DEBUG>2)
	assert(address);
#endif // _DEBUG
//	orp_cout << "deleting entry " << address << endl;

    unsigned int hash_code = _do_rs_hash((POINTER_SIZE_INT)address,
                                         _size_in_entries);
    volatile void *target;

	if (_resident_count == 0) {
		return - 1;
    }

    target = _table[hash_code];

    if (address == target) { // hit
		_table[hash_code] = 0;
		_resident_count--;
        //
        // Now need to rehash everything to the next zero so
        // that subsequent add_entries don't create duplicates.
        //
        if (_resident_count>0) {
            _rehash(hash_code);
            // The _save_pointer might need to be updated since it might
            // have consumed the object that we just deleted. If it did
            // then another object might have taken its place and that 
            // object would have been skipped if we don't update the save 
            // pointer.
            if (_save_pointer > 0) {
                _save_pointer = _save_pointer - 1; 
        }
        }
        return true;
	}
    unsigned int hash_idx = hash_code;
    while (TRUE) {
        // This loop is guaranteed to terminate since our residency
        // rate is guaranteed to be less than 90%
        hash_idx = (hash_idx + 1) % _size_in_entries;
        
        if (_table[hash_idx] == 0) {
            return false;
        }   
		
        if (hash_idx == hash_code) {
			return false;
        }

        if (address == _table[hash_idx]) { // hit
			_table[hash_idx] = 0;
			_resident_count--;
            //
            // Now need to rehash everything to the next zero so
            // that subsequent add_entries don't create duplicates.
            //
            _rehash(hash_idx);

            if (_save_pointer > 0) {
                _save_pointer = _save_pointer - 1;
            }
            return true;
		}
    }
}

#if (GC_DEBUG>3)
void
Hash_Table::dump()
{
	void *pp_obj;

	printf(">>>>>>>>>>>>>>>>>>\n");
	printf("%d entries>>>>>>>>>>\n", _resident_count);
	rewind();

	while ((pp_obj = next()) != NULL) {
		printf("\t==> [%x]\n", pp_obj);
	}
	printf("<<<<<<<<<<<<<<<<<<<\n");
	return;
}
#endif // _DEBUG

void
Hash_Table::empty_all()
{
    memset (_table, 0, _size_in_bytes);
    _resident_count = 0;
    return;
#if 0
    // Earlier slow code.
    for (int index = 0; index < _size_in_entries; index++) {
        _table[index] = 0;
    }
	_resident_count = 0;
#endif
}

Hash_Table *
Hash_Table::merge(Hash_Table *p_Hash_Table)
/* Merge two remembered sets, and return
   the merged one. Typical use includes merging the
   remembered sets of all the cars of a train, or of
   merging the remembered set of a region with the
   relevant portion of the root set. */
{
#if (GC_DEBUG>4)
	this->scan_for_bogus_entries();
	p_Hash_Table->scan_for_bogus_entries();
#endif 

    void *p_entry;

    p_Hash_Table->rewind();

    while ((p_entry = p_Hash_Table->next()) != NULL) {
        this->add_entry(p_entry);
#if (GC_DEBUG>4)
		p_Hash_Table->scan_for_bogus_entries();
		this->scan_for_bogus_entries();
#endif // _DEBUG
    }

#if (GC_DEBUG>4)
	p_Hash_Table->scan_for_bogus_entries();
	this->scan_for_bogus_entries();
#endif 
    return this;
}

unsigned int
Hash_Table::_do_rs_hash(POINTER_SIZE_INT address, unsigned int table_size)
/* A dumb hashing function for insertion of a new entry
   into the remembered set. Need to improve. */
{
    POINTER_SIZE_INT result = address * 42283;
	assert((POINTER_SIZE_INT)(result % table_size) <= (POINTER_SIZE_INT)0xFFFFFFFF);
    return ((unsigned int)result % table_size);
}

void
Hash_Table::_extend()
/* The residency in our remembered set has exceeded a pre-defined
   threshold. Therefore we create a larger remembered set and re-
   hash. 
   Always rehash after doing an extend. */
{
#if (GC_DEBUG>4)
	scan_for_bogus_entries();
#endif // _DEBUG

    volatile void **p_save_table       = _table;
	int saved_size_in_entries = _size_in_entries;
    //p_save_table = _table;

    if (_prime_index >= NUMBER_OF_PRIMES) {
        _size_in_entries = _size_in_entries * 2; // Not a prime but really big.
//        cerr << "Internal Error: prime table exceeded" << endl;
//        assert(0);
    } else {
        _size_in_entries   = primes[_prime_index++];
    }
    _size_in_bytes     = sizeof(void *) * _size_in_entries;
    _threshold_entries = (unsigned int)(_size_in_entries * HASH_TABLE_THRESHOLD);

	_resident_count     = 0;
    _table = (volatile void **)malloc(_size_in_bytes);

    if (_table == NULL) {
        fprintf(stderr,"Error: malloc failed when extending remembered set\n");
        assert(0);
    }

	memset(_table, 0, _size_in_bytes);

    for (int index = 0; index < saved_size_in_entries; index++) {
        if (p_save_table[index] != NULL) {
            this->add_entry((void *)(p_save_table[index]));
        }
    }

    free(p_save_table);

#if (GC_DEBUG>4)
	this->scan_for_bogus_entries();
#endif // _DEBUG
}

bool
Hash_Table::is_present(void *address)
/* Add an entry into the remembered set. This represents an
   address of a slot of some object in a different space
   that is significant to the space associated with this
   remembered set. */
{
    if (address == NULL) 
        return false;
#if 0
    move add_entry to avoid readers causing writes if we extend.... 
    if (_resident_count > _threshold_entries) {
        _extend();
    }
#endif 
    // Always rehash after doing an extend.
    unsigned int hash_code = _do_rs_hash((POINTER_SIZE_INT)address,
                                         _size_in_entries);

    volatile void *target = _table[hash_code];

    if (target == address) { // already there
        return true;
	}

    if (target == NULL) { // empty: absent
        return false;
    }
	//
	// Save our position before looping.
	//
	unsigned int saved_hash_code = hash_code;
	//
	// Loop through subsequent entries looking for match.
	//
    while (TRUE) {
        // This loop is guaranteed to terminate since our residency
        // rate is guaranteed to be less than 90%
        hash_code = (hash_code + 1) % _size_in_entries;
#if (GC_DEBUG>3)
		void *p_viewme = _table[hash_code];
#endif // _DEBUG

        if (_table[hash_code] == NULL) 
            return false;
       
        if (address == _table[hash_code])  // hit
            return true;

		if (hash_code == saved_hash_code) {
			//
			// We have traversed a full circle and are back
			// where we started, so we are sure it isn't there.
			//
			return false;
		}
    }
}

int
Hash_Table::_get_offset(void *address)
/* Add an entry into the remembered set. This represents an
   address of a slot of some object in a different space
   that is significant to the space associated with this
   remembered set. */
{
    if (address == NULL) 
        return -1;

#if 0 
    // Count on add_entry to do the extend. Otherwise to_from_table logic
    // doesn't have the appropriate locks. 
    if (_resident_count > _threshold_entries) {
        orp_cout << "Extending" << endl;
        _extend();
    }
#endif

    // Always rehash after doing an extend.
    unsigned int hash_code = _do_rs_hash((POINTER_SIZE_INT)address,
                                         _size_in_entries);

    volatile void *target = _table[hash_code];

    if (target == address) { // already there
        return hash_code;
	}

    if (target == NULL) { // empty: absent
        return -1;
    }
	//
	// Save our position before looping.
	//
	unsigned int saved_hash_code = hash_code;
	//
	// Loop through subsequent entries looking for match.
	//
    while (TRUE) {
        // This loop is guaranteed to terminate since our residency
        // rate is guaranteed to be less than 90%
        hash_code = (hash_code + 1) % _size_in_entries;
#if (GC_DEBUG>3)
		void *p_viewme = _table[hash_code];
#endif // _DEBUG

        if (_table[hash_code] == NULL) 
            return -1;
        
        if (address == _table[hash_code])  // hit
            return hash_code;

		if (hash_code == saved_hash_code) {
			//
			// We have traversed a full circle and are back
			// where we started, so we are sure it isn't there.
			//
			return -1;
		}
    }
}


void *
Hash_Table::next()
{
    //
    // See if there are any entries in this hash table.
    //
    if (_resident_count == 0) {
        //
        // Nope - bail out.
        //
#if (GC_DEBUG>2)
        _table_state = scanned;
#endif 
        return NULL;
    }

#if (GC_DEBUG>2)
    //
    // Set debug table state to indicate we are scanning
    // if this is the first iteration of the scan.
    //
    if (_table_state == rewound) {
        _table_state = scanning;
    }
#endif 

#if (GC_DEBUG>2)
    //
    // Verify that the hash table wasn't modified from under us
    // since we started the scan.
    //

    // delete logic can cause the table to be modified.

    //    assert(_table_state != modified);
#endif // _DEBUG

    if (_save_pointer >= _size_in_entries) {
#if (GC_DEBUG>2)
        _table_state = scanned;
#endif
        return NULL;
    }

    while (_table[_save_pointer] == NULL) {
        _save_pointer += 1;

        if (_save_pointer == _size_in_entries) {
#if (GC_DEBUG>2)
            _table_state = scanned;
#endif
            return NULL;
        }
    }

    void *p_return = (void *)_table[_save_pointer];
    _save_pointer++;
#if (GC_DEBUG>2)
    assert(p_return != NULL);
#endif
    return p_return;
}

//
// Return the next item in the remembered set and also delete it.
//
void *
Hash_Table::next_delete()
{
//	orp_cout << "In next_delete " << endl;
	void *p_return;
#if (GC_DEBUG>2)
	assert(_save_pointer >= 0);
    assert(_resident_count >= 0);
#endif // _DEBUG

	//if (_resident_count <= 0) {
    if (_resident_count == 0) {
		//
		// Hash Table is empty.
		//
#if (GC_DEBUG>2)
        _table_state = scanned;
#endif // _DEBUG

		return NULL;
	}

    if (_save_pointer >= _size_in_entries) {
		//
		// We still have entries - start at the beginning.
		//
        _save_pointer = 0;
	}

	int loop_count = 0;

    while (_table[_save_pointer] == NULL)  {
		//
		// Skip empty slots.
		//
        if (++_save_pointer >= _size_in_entries) {
			assert (loop_count == 0);
			loop_count++ ;  
			_save_pointer = 0;
        }
    }

	//
	// OK - finally found a non-null entry.
	//
	p_return = (void *)_table[_save_pointer];
	//
	// Delete it.
	//
	_table[_save_pointer] = 0;
	//
	// Bump down the resident count.
	//
	_resident_count--;

    _rehash(_save_pointer);

#if (GC_DEBUG>2)
	assert(_resident_count >= 0);
#endif // _DEBUG
    return p_return;
}

//
// Start at the beginning for subsequent scans.
//
void
Hash_Table::rewind()
{
#if (GC_DEBUG>2)
    assert(_table_state != scanning);
    _table_state  = rewound;
#endif // _DEBUG
    _save_pointer = 0;
}

#if (GC_DEBUG>4)
//
// Use some heuristics to see if there is a bad
// entry in this hash table;
//
void Hash_Table::scan_for_bogus_entries()
{
	int index;

	for (index = 0; index < _size_in_entries; index++) {
		unsigned entry = (unsigned)_table[index];
		if (entry == 0) {
			continue;
		}
		if (entry & 0x80000000) {
			cout << "Error: bogus entry " << entry;
			cout << " in hash table" << endl;
			orp_exit(1);
		}
		if ((entry & 0xFFFFE000) == 0) {
			cout << "Error: bogus entry " << entry;
			cout << " in hash table" << endl;
			orp_exit(1);
		}
	}
}
#endif // _DEBUG

// end file gc\hash_table.cpp




