/*------------------------------------------------------------------------------
** Name: hashtbl.cpp
**
** Title: Hash Table
**
** Purpose:
**  Create and manage the by-name hash table. Provides:
**
**
**  Base class = table
**
** Status: PRELIMINARY | CODED
**
** $Log:   S:/tbird/mt2_68k/symbol/hashtbl.cpv  $
** 
**    Rev 1.0   13 Feb 1997 09:06:08   gene
** Initial revision.
** 
**    Rev 1.0   07 Sep 1995 11:16:32   gene
** Initial revision.
** 
**    Rev 1.7   06 Nov 1992 07:11:54   brucea
** Fixed: ObliterateTable now sets initialSize and expandSize so that they 
**    override the inherited lower level initial values.
** Fixed: ExtendHashTable - it wasn't passing the pointer to the symbol name
**    to HashFunc; instead it was passing pointer to the entire symbol (which
**    holds offset to name).
** Resetting totalPutMisses and totalGetMisses when extending the table then
**    incrementing totalPutMisses when hashing symbols into new table.  This
**    corrects problem with the extention algorithm when in the MISSES_STATE.
** 
** 
**    Rev 1.6   19 Jul 1992 21:51:00   brucea
** Modified: init and expansion size of table to 4*4096
** Cleanup
** 
**    Rev 1.5   15 Jun 1992 15:33:32   brucea
** Added: return SUCCESS in ::ExtendHashTable
** 
**    Rev 1.4   09 Dec 1991 09:16:38   john
** Added member function to free the allocated memory for the 
** name hash table.
** 
**    Rev 1.3   06 Nov 1991 14:41:00   brucea
** Removed local declarations that were no longer being used
** 
**    Rev 1.2   31 Oct 1991 16:55:58   john
** Fixed errors from previous put.
** 
**    Rev 1.1   31 Oct 1991 14:47:16   brucea
** Added #ifdef DEBUG around _WINIO_
** Changed LookupName call to use symOffset instead of nameOffset and accesses
** name through the symbol rather than the name directly
** Changed PutName for the same reason - symbol offsets rather than name offsets
** 
** 
**    Rev 1.0   11 Sep 1991 09:08:46   brucea
** Initial revision.
**
** $Header:   S:/tbird/mt2_68k/symbol/hashtbl.cpv   1.0   13 Feb 1997 09:06:08   gene  $
**
** Copyright (C) 1991 Microtek International.  All rights reserved.
**
**--------------------------------------------------------------------------*/

                       /****************************
                        *                          *
                        *       INCLUDE FILES      *
                        *                          *
                        ****************************/
#ifndef _HASHTBL_
#include "hashtbl.h"
#endif

#ifndef _MEMPOOL_
#include "mempool.h"
#endif

#ifndef _SYMMGR_
#include "symmgr.h"
#endif

#ifdef DEBUG
#ifndef _WINIO_
#include "winio.h"
#endif
#endif
                       /****************************
                        *                          *
                        *        DEFINITIONS       *
                        *                          *
                        ****************************/


#define HASHTABLE_EXPANSION_SIZE (4L * 0x1000L)
#define HASHTABLE_INIT_SIZE      (4L * 0x1000L)

#define SEARCH_INCREMENT         7
#define HASH_TABLE_FULL_MISSES   896   // 3.0 * 256: the ratio of Put misses
                                       // to table entries
#define HASH_TABLE_FULL_RATIO    171   // .67 * 256: table 2/3 full

extern MemPool st;

                       /****************************
                        *                          *
                        *     EXECUTABLE CODE      *
                        *                          *
                        ****************************/

//------------------------------------------------------------------------
// HashTable::HashTable
//
// member functions for HashTable
//------------------------------------------------------------------------
HashTable::HashTable():Table() {     // constructor with no params
   entryCount = 0L;  // initialize member vars
   totalPutMisses = totalGetMisses = 0L;
   maxPutMisses = maxGetMisses = 0;

   initialSize = HASHTABLE_INIT_SIZE;
   expandSize  = HASHTABLE_EXPANSION_SIZE;
   extendHashState = MISSES_STATE;
   return;
};  // end of HashTable constructor


//------------------------------------------------------------------------
// HashTable::HashTable
//
// constructor with initial parameters for table and expansion sizes
//------------------------------------------------------------------------
HashTable::HashTable(U32 initSize, U32 expSize):Table() {

   entryCount = 0L;  // initialize member vars
   totalPutMisses = totalGetMisses = 0L;
   maxPutMisses = maxGetMisses = 0;
   extendHashState = MISSES_STATE;

   initialSize = initSize;
   expandSize = expSize;
   return;
}  // end of HashTable constructor with parameters


//------------------------------------------------------------------------
// HashTable::ObliterateTable
//------------------------------------------------------------------------
VOID HashTable::ObliterateTable(VOID) {     // remove table and reinitialize

   Table::ObliterateTable();  // remove inherited (parent) table

   // init parent vars
   initialSize = HASHTABLE_INIT_SIZE;
   expandSize  = HASHTABLE_EXPANSION_SIZE;

   entryCount = 0L;  // initialize member vars
   totalPutMisses = totalGetMisses = 0L;
   maxPutMisses = maxGetMisses = 0;
   extendHashState = MISSES_STATE;
   return;
}  // end of HashTable::ObliterateTable


//------------------------------------------------------------------------
// HashTable::ExtendHashTable
//
// Pseudo-code
//    Alloc a new memory block equal to the present table size +
//       expandSize
//    Copy majority of new object member vars to old, esp. hashTableSize
//       so that the hash function will work with new table size
//    Rehash all names into bigger table and copy each name offset as the
//       index is calculated.
//    Update new table handle.
//    GlobalFree the old handle.
//------------------------------------------------------------------------
RETCODE HashTable::ExtendHashTable(VOID)  {

   RETCODE         retCode;
   TABLE_OFFSET    symOffset;
   U32             i;
   HP_TABLE_OFFSET hashTblPtr;
   COMMON_SYMBOL_HEADER *symPtr;
   U32             oldTableSize; // used to keep track of previous hash table
                                 // size
   HANDLE          oldTableHandle;  // and handle
   U32             newIndex;
   U16             totalNamesWithMisses;
   BOOLEAN         noMissesState;

   oldTableSize = tableSize;      // save present member vars for later use
   oldTableHandle = tableHandle;

   // allocate a temporary table object, to be made into the permanent
   // larger hash table
   HashTable tmpTable((tableSize + expandSize), expandSize);

   // allocate the memory for new table
   if ((retCode =
        tmpTable.InitOrExtendTable(GMEM_ZEROINIT)) == ER_SYM_OUT_OF_MEMORY) {
      return retCode;
   }
   tmpTable.SetHashTableSize();       // adjust size for hash function fit

#ifdef DEBUG
   printf("\nextending hash table to size %lu\n", tmpTable.hashTableSize);
   PrintStats();
#endif

   // copy member vars of new hash object to "old" existing table
   tableSize = tmpTable.tableSize;
   initialSize = tmpTable.initialSize;
   constRatio = tmpTable.constRatio;  // new size gets new constant ratio;
                                      // must save
// expandSize = tmpTable.expandSize;  // not really needed since the same
                                      // same as the old
   hashTableSize = tmpTable.hashTableSize;  // required for HashFunc to
                                            // mod (%) with larger table
   // reset put and get misses so that the algorithm to extend table will
   // only look at number associated with new table, includes misses generated
   // to load the new table
   totalPutMisses = totalGetMisses = 0L;
   totalNamesWithMisses = 0;

   // now copy all symbol offsets to the new table;
   // get symbol offset out of old (ht) table
   // check symbol offset for NULL
   // if not NULL, get pointer to symbol; get name out of struct, rehash
   // Use rehash value to point into new table and store symbol offset
   for (i = 0; i < oldTableSize; i+= sizeof(TABLE_OFFSET)) {
      hashTblPtr = (HP_TABLE_OFFSET)GetHugeDataPtr(i);  // ptr to sym offset
      symOffset = *hashTblPtr;
      if (symOffset) {               // pass by if NULL

         // get ptr to symbol structure; extract name, then rehash,
         // insert symOffset where empty.
         // Must use same insert algorithm used for adding new names
         symPtr = (COMMON_SYMBOL_HEADER *)st.GetHugeDataPtr(symOffset);
         newIndex = 
            tmpTable.HashFunc(st.GetHugeDataPtr(symPtr->symbolNameOffset));
         noMissesState = TRUE;

         while (TRUE) {
            // find slot in new hash table for symbol
            hashTblPtr = (HP_TABLE_OFFSET)tmpTable.GetHugeDataPtr(newIndex);
            if (*hashTblPtr == 0L)  {
               *hashTblPtr = symOffset; // put offset to symbol into new table
               break;
            }
            else  {
               newIndex = (newIndex + 
                           (SEARCH_INCREMENT * sizeof(TABLE_OFFSET)))
                           % hashTableSize;
               noMissesState = FALSE;
               totalPutMisses++;    // record misses during transfer
            }
         }  // end infinite while
      }  // end of if (symOffset)
      if (!noMissesState)
         totalNamesWithMisses++;
   }  // end of table loop "for"
   // now save the rest of the data from temporary table to the "old"
   tableHandle = tmpTable.tableHandle;      // save new table handle
   hpTableObject = tmpTable.hpTableObject;  // save new table base ptr

   GlobalUnlock(oldTableHandle);            // free up old table memory
   GlobalFree(oldTableHandle);

#ifdef DEBUG
   printf("words without perfect rehash = %u\n", totalNamesWithMisses);
#endif

   return SUCCESS;
}   // end of HashTable::ExtendHashTable


//------------------------------------------------------------------------
// HashTable::HashFunc
//
// Input parameter:
//    name: made of a byte length followed by the characters
//
// The hash function multiplies the previous hash value by 261, then adds
// in the new character.
//------------------------------------------------------------------------
U32 HashTable::HashFunc(HPU8 name) {

   register U32 hash;
   register U8 length;
   register U8 i;

   length = *name++;     // get the first byte - the length
   hash = (U32)length;
   for (i = 0; i < length; i++) {

#ifdef DEBUG
   if ((U32)*name > 0x7FL) {printf("casting error in HashFunc\n");}
#endif

      hash = (hash<<8) + (hash<<2) + hash + (U32)(*name++);
   }
   // reduce table size to number of elements; find remainder; multiple
   // back up to physical size for the index
   return((hash % (hashTableSize / sizeof(TABLE_OFFSET)))
           * sizeof(TABLE_OFFSET));
};  // end of HashFunc


//------------------------------------------------------------------------
// HashTable::HashFuncString
//
// Input parameter:
//    name: long ptr to string
//    length: length of string
//
// The hash function multiplies the previous hash value by 261, then adds
// in the new character.
//------------------------------------------------------------------------
U32 HashTable::HashFuncString(LPSTR name, U8 length)  {

   register U32 hash;
   register U8 i;

   hash = (U32)length;     // set the initial hash value
   for (i = 0; i < length; i++) {

#ifdef DEBUG
   if ((U32)*name > 0x7FL) {printf("casting error in HashFuncString\n");}
#endif

      hash = (hash<<8) + (hash<<2) + hash + (U32)(*name++);
   }
   // reduce table size to number of elements; find remainder; multiple
   // back up to physical size for the index
   return((hash % (hashTableSize / sizeof(TABLE_OFFSET)))
           * sizeof(TABLE_OFFSET));
};  // end of HashTable::HashFuncString


//------------------------------------------------------------------------
// HashTable::LookupName
//
// pseudo-code
//
// hash name
// go to location in hash table
// if location empty, report "name not found"
// else
//    compare name of entry to requested name
//    if same return symbol pool entry offset
//    else
//       add 7 to hash table index, mod to length of table, repeat loop
//------------------------------------------------------------------------
RETCODE HashTable::LookupName(LPSTR lpName, TABLE_OFFSET& symOffset,
                              U8& misses)  {

   TABLE_OFFSET index;
   U16          length;
   BOOLEAN      equalFlag;
   HPU8         namePtr;
   COMMON_SYMBOL_HEADER *symPtr;
   LPSTR        lpNameStart;

   misses = 0;
   length = (U16)lstrlen(lpName);
   if (length == 0) return ER_STRING_LENGTH_ZERO;

   length = min(length, 255);    // truncate long strings
   index = HashFuncString(lpName, (U8)length);   // get hashed index
   lpNameStart = lpName;   // must save start of string to recompare on miss
   for (;;) {
      // get offset out of hash table that points to the name in st
      symOffset = *(HP_TABLE_OFFSET)GetHugeDataPtr(index);
      if (symOffset == 0L) {         // check for NULL
         // not found in st; symOffset equals 0
         totalGetMisses += misses;
         maxGetMisses = max(maxGetMisses, misses);
         return SUCCESS;
      } else {     // name is not NULL, compare passed name to table name
         equalFlag = TRUE;
         symPtr =(COMMON_SYMBOL_HEADER *) st.GetHugeDataPtr(symOffset);
         namePtr = st.GetHugeDataPtr(symPtr->symbolNameOffset);

         // compare search string length with first byte (length) of
         // comparison string in st
         if (length != *namePtr++) {
            equalFlag = FALSE;       // names not equal
         } else {
            lpName = lpNameStart;      // reset name pointer for fresh compare
            for (U8 i = 0; i < length; i++) {
               if ((U8)*lpName++ != *namePtr++) {
                  equalFlag = FALSE;
                  break;      // leave "for" name compare loop if not equal
                  }
            }   // end of for
         }  // end of else if
         if (equalFlag) {
            // names are the same; match found
            // symOffset already contains offset to desired symbol
            totalGetMisses += misses;
            maxGetMisses = max(maxGetMisses, misses);
            return(SUCCESS);
         } else {
            // names not equal; index to next slot and try again
            index = (index + (SEARCH_INCREMENT * sizeof(TABLE_OFFSET)))
                     % hashTableSize;
#ifdef DEBUG
            if ((index & 0x00000003L) > 0)
               printf("index not on even U32 boundary\n");
#endif
            misses++;
         }
      }
   }    // end of infinite for loop
};      // end of HashTable::LookupName


//------------------------------------------------------------------------
// HashTable::PrintStats
//------------------------------------------------------------------------
#ifdef DEBUG
VOID HashTable::PrintStats() {

   printf("total Put misses=%lu\n", GetTotalPutMisses());
   printf("entries=%lu\n", GetEntryCount());
   printf("ratio of Put misses to entries=%7.6f\n",
           double(GetTotalPutMisses())/double(GetEntryCount()));
   printf("ratio of entries to total table size=%7.6f\n",
           double(GetEntryCount())/double((GetHashTableSize()/4)));
   return;
}  // end of HashTable::PrintStats
#endif


//------------------------------------------------------------------------
// HashTable::PutName
//
// pseudo-code
//
// hash name
// go to location in hash table
// if empty
//    insert symbol pool name offset
// else
//    compare name pointed to by offset with name to be inserted
//    if same return success = FALSE; caller can then generate
//      "duplicate string" error or, if precedence name, change the name and
//      re-enter name
//    else
//       add 7 to hash table index, mod to length of table, repeat loop
//
// increment entryCount
// if table used > 2/3
//    extend table
//------------------------------------------------------------------------
RETCODE HashTable::PutName(TABLE_OFFSET symOffset,
                           BOOLEAN& duplicateName) {

   U32      index;
   HPU8     hashPtr, newNamePtr, newNamePtrStart, compareNamePtr;
   COMMON_SYMBOL_HEADER *symPtr;
   RETCODE  retCode;
   U8       tempMisses;
   U8       newNameLength;
   BOOLEAN  equalFlag;

   tempMisses = 0;
   duplicateName = FALSE;
   
   symPtr = (COMMON_SYMBOL_HEADER *) st.GetHugeDataPtr(symOffset);
   // get pointer to name
   newNamePtr = st.GetHugeDataPtr(symPtr->symbolNameOffset);
   newNameLength = *newNamePtr;     // doesn't change inside loop, so bring
                                    // it outside the compare loop
   if (0 == newNameLength)
      return ER_STRING_LENGTH_ZERO;
   newNamePtrStart = newNamePtr;    // must save start of name so that
                                    // newNamePtr can be incremented to
                                    // compare new name with hit name
   index = HashFunc(newNamePtr);

   for (;;) {
      hashPtr = GetHugeDataPtr(index);   // calculate huge pointer in hash tbl
      if (*((HP_TABLE_OFFSET)hashPtr) == 0L) { // see if entry is empty (NULL)
         *((HP_TABLE_OFFSET)hashPtr) = symOffset;  // entry empty; put in
                                                   // new name
         entryCount++;
         totalPutMisses += tempMisses;
         maxPutMisses = max(maxPutMisses, tempMisses);
         duplicateName = FALSE;
         retCode = SUCCESS;
         // see if table is filled to the level requiring its size extension

      // MISSES_STATE
         // adjust totalPutMisses so that multiplication by 256 is a left
         // shift by 8.  Divide by entryCount and compare to the desired
         // result * 256.  This arithmetic will result in 2 integer
         // digits of percent precision.  totalPutMisses is U32, so with
         // 1 million symbols (20 bits), there is still 12 bits left for the
         // multiply without overflow; the shift is only 8 bits.

         if (extendHashState == MISSES_STATE)  {
            if (((totalPutMisses << 8) / entryCount)
                  > HASH_TABLE_FULL_MISSES) {
               // toggle evaluation state for extendHashState
               extendHashState = RATIO_STATE;
               retCode = ExtendHashTable();
            }
         } else  {  // RATIO_STATE
         // multiple totalPutMisses by 8; divide by size of table
            // toggle evaluation state
            if (entryCount > constRatio)  {
               extendHashState = MISSES_STATE;
               retCode = ExtendHashTable();
            }
         }
         return retCode;
      } else {  // compare name already in hash table to name to be inserted

         // get index out of hash table (pointed to by hashPtr), then
         // calculate pointer to symbol table
         compareNamePtr = st.GetHugeDataPtr(*((HP_TABLE_OFFSET)hashPtr));
         equalFlag = TRUE;
         newNamePtr = newNamePtrStart;    // init pointer to start of new name
         for (U8 i = 0; i <= newNameLength; i++) {
            if (*newNamePtr++ != *compareNamePtr++) {
               equalFlag = FALSE;
               break;
            }
         } // end of for
         if (equalFlag) {  // names are the same
            duplicateName = TRUE;
            return(SUCCESS);        // do not enter name; already exists
         }
         // names not equal; index to next slot and try again
         index = (index + (SEARCH_INCREMENT * sizeof(TABLE_OFFSET)))
                  % hashTableSize;
#ifdef DEBUG
            if ((index & 0x00000003L) > 0)
               printf("index not on even U32 boundary\n");
#endif
         tempMisses++;
      }
   };  // end of infinite for
};  // end of HashTable::PutName


//------------------------------------------------------------------------
// HashTable::SetHashTableSize
//
// Purpose:
//    Adjust size of hash table to guarantee that misses will eventually
//    access all locations in the table.
//
//    Calculate the ratio value to be compared with the entryCount when
//    a new entry is added to hash table.  If greater than value,
//    ExtendHashTable is called.
//
// Pseudo-code
//    comparison is:
//                    ratio * 256 * (hashTableSize / sizeof(TABLE_OFFSET)
//       entryCount > ---------------------------------------------------
//                                         256
//
//       ratio * 256 = HASH_TABLE_FULL_RATIO
//
//       divide by 245 = >> 8
//------------------------------------------------------------------------
VOID HashTable::SetHashTableSize(VOID) {

   hashTableSize = tableSize;
   if ((hashTableSize % (SEARCH_INCREMENT * sizeof(TABLE_OFFSET))) == 0) {
      hashTableSize -= sizeof(TABLE_OFFSET);
      }
   constRatio = ((hashTableSize / (sizeof(TABLE_OFFSET)))
                   * HASH_TABLE_FULL_RATIO) >> 8;
   return;
};  // end of SethashTableSize

/******************************** E O F *************************************/