// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/common/gc_v2/include/gc_header.h,v 1.15 2002/01/11 15:47:38 weldon Exp $
//

#ifndef _gc_header_H_
#define _gc_header_H_


// The structures of the GC header prepended to each object and v-table.
// Prototypes of the routines that exclusively manage them.
// This is for the exclusive use of the GC and Thread Manager components
// of the ORP.



#include <string.h>
#include "platform.h"
#include "orp_types.h"
typedef POINTER_SIZE_INT Object_Gc_Header;  
#include "object_layout.h"
#include "remembered_set.h"
#include "gc_header_format.h"
#include "Class.h" 
#include "gc_for_orp.h"
#include "orp_for_gc.h"
 

#ifdef __cplusplus
extern "C" {
#endif

#ifdef POINTER64
#define FREE_BLOCK_HEADER 0xADDEADDEADDEADDE
#define UNIQUE_HEADER_PATTERN 0xDAEDECAFDAEDECAF
#else
#define FREE_BLOCK_HEADER 0xADDEADDE
#define UNIQUE_HEADER_PATTERN 0xDAEDECAF
#endif

// Use the high bit for OBJECT_LOCK_V2 and the low bit for the original locking code.
#ifdef POINTER64

#ifdef OBJECT_LOCK_V2
#define FORWARDING_BIT_MASK 0x8000000000000000
#else
#define FORWARDING_BIT_MASK 0x01
#endif

#else

#ifdef OBJECT_LOCK_V2
#define FORWARDING_BIT_MASK 0x80000000
#else
#define FORWARDING_BIT_MASK 0x01
#endif

#endif

/**************
 * 
 * These are macros that can be used to access the important fields in a block.
 *
 * The structure of a block is simple. They fall on 65536 byt boundaries.
 * they are divided into 4096 byte cards. That means that each block holds 16 cards.
 * A card is the same size as a page so that we can use the windows interface to
 * get at the PTE dirty bits.
 * 
 **************/

// These are the basic data structures used by the core of the GC. Once one understands how
// these are used they should be able to understand what the code must do.
//

// This is what is held in the mark field for fixed objects and card table for moveable objects.
typedef bool MARK;
#ifdef GC_SAPPHIRE
// We can fit a tri-color into 2 bits.
#define SAPPHIRE_WHITE 0
#define SAPPHIRE_GREY 1
#define SAPPHIRE_BLACK 2
#define SAPPHIRE_BAD_COLOR 3
typedef tri_color byte; 

#define SAPPHIRE_TRI_COLOR(ADDR) (GC_BLOCK_INFO(ADDR)->tri_color_table[ADDR >> TRI_COLOR_SHIFT]

#define TRI_COLOR_SIZE_BYTES 8192
#else
#define TRI_COLOR_SIZE_BYTES 8192
#endif

// The GC heap is divided into blocks that are sized based on several constraints.
// They must be small enough to allow unused blocks not to be a big problem.
// They must be large enough so that we are avoid spending a lot of time requesting additional
// blocks.
// They must be small enough to allow them to be scanned without a lot of problem.
// They must be aligned so that clearing lower bits in any address in the block will get 
// the start of the block.
// There are several tables that must be held in the first page (hardware specific, for IA32 4096 bytee)
// These table have a single entry for every page in the block. This limits the size of the block.
// 
// Currently we use blocks size of 65K. If we want 128K use 17, 256K use 18. All should work well.
#define GC_BLOCK_SHIFT_COUNT 16
#define GC_BLOCK_SIZE_BYTES (1 << GC_BLOCK_SHIFT_COUNT)
// Since the JIT needs to know the card size GC_CARD_SHIFT_COUNT is defined in gc_for_orp.h
#define GC_CARD_SIZE_BYTES (1 << GC_CARD_SHIFT_COUNT)
// The information about the block is held in the first card (page) of the block.
#define GC_BLOCK_INFO_SIZE_BYTES (GC_CARD_SIZE_BYTES + TRI_COLOR_SIZE_BYTES)
// The number of cards. This is used to determine the size of the block information tables.
#define GC_CARDS_IN_BLOCK (GC_BLOCK_SIZE_BYTES/GC_CARD_SIZE_BYTES)
// Mask of 1's that when ANDed in will give the offset into the block
#define GC_BLOCK_LOW_MASK ((POINTER_SIZE_INT)(GC_BLOCK_SIZE_BYTES - 1))
// Mask that gives the base of the block.
#define GC_BLOCK_HIGH_MASK (~GC_BLOCK_LOW_MASK)
// Used to go from an object reference to the block information.
#define GC_BLOCK_INFO(ADDR) ((block_info *)((POINTER_SIZE_INT)ADDR & GC_BLOCK_HIGH_MASK))
// The start of the allocation area for this block, page 0 gets block info page 1 gets objects
#define GC_BLOCK_ALLOC_START(BLOCK_ADDR) ( (void *)((POINTER_SIZE_INT)BLOCK_ADDR + GC_BLOCK_INFO_SIZE_BYTES) )
// The card index of an object ref.
#define GC_CARD_INDEX(ADDR) (( (unsigned int)(ADDR) & GC_BLOCK_LOW_MASK ) >> GC_CARD_SHIFT_COUNT)
// The maximum size of an object that can be allocated in this block.
#define GC_BLOCK_ALLOC_SIZE (GC_BLOCK_SIZE_BYTES - GC_BLOCK_INFO_SIZE_BYTES)
// If we have an LOS block use this to get the index into the mark table in the block info.
// This limits the size of a fixed object to be greater than 2**GC_LOS_MARK_SHIFT_COUNT (2**6 == 64)
#define GC_LOS_MARK_SHIFT_COUNT 6
// The largest mark index for objects in LOS bucket objects.
#define GC_LOS_MAX_MARK_INDEX (GC_LOS_MARK_INDEX(GC_BLOCK_SIZE_BYTES - 1))
// Use this to calculate the mark index.
#define GC_LOS_MARK_INDEX(P_OBJ) (((POINTER_SIZE_INT)P_OBJ & GC_BLOCK_LOW_MASK) >> GC_LOS_MARK_SHIFT_COUNT)
// Use this to get or set a mark.
#define GC_LOS_MARK(P_OBJ) (GC_BLOCK_INFO(P_OBJ)->mark_table[GC_LOS_MARK_INDEX(P_OBJ)])
// The last byte in a block.
#define GC_BLOCK_CEILING(P_OBJ) ( ((char *)(GC_BLOCK_INFO(P_OBJ)) + GC_BLOCK_SIZE_BYTES) - 1 )

// Specify the maximum number of blocks in a car before we create a new car.
// For now a GC can cause the number of blocks in a car to increase but if it does then the next
// collection will create a new car. If we have 65536 blocks this gives us 1Mb cars which seems 
// about right but we need to build and measure.
// Basically we want the size to be large enough to reduce dead object float but not so large as to 
// cause problems with latency. 

#ifndef GC_TRAIN_V5
// If we aren't using trains set blocks per car to 1 gig cars
#define GC_MAX_BLOCKS_PER_CAR 16384
#else
#define GC_MAX_BLOCKS_PER_CAR 512
#endif 

/************
 * The first page consists of several important pieces of data. 
 * Container - the nursery, step, or car, LOS that this block is part of.
 * Scan - the cheney scan pointer splitting the difference between scanned and unscanned
 *        objects.
 * LastObject - the last object in this block. - This might not be needed since the
 *              last object is followed by an object with a zero vtable.
 * Card Table a 16 byte card table indicating which cards potential hold interesting pointers
 *               (an interesting pointer is one that points to a younger generation or older train/car.
 *
 ***************/

// Places that need to know the block information.
// Slots - Given a slot we need to know what generation it is in. 
// Large objects can overlap into multiple blocks but since the base of the object will always be
// in the first block we never have to go from a pointer in an overlapped block to the container.
//
// Forward reference.
//
struct block_info;  // Forward ref
struct train_info;  // Forward ref

//
// Steps can have multiple blocks. This links them together. 
//
struct block_list_info {
    block_info *alloc_block;   // The last block usually
    block_info *blocks;        // Head of the list of blocks
    block_info *scan_block;    // Block with the scan pointer. Scanning is done when this is alloc_block and block info's free==scan
    train_info *my_train;      // If this is a car then this the train info structure associated with this car.
    POINTER_SIZE_INT birthday; // First car is 0.
    block_list_info *next;     // Next car or step, NULL terminated list.
};

// Steps are just block_list_info structures.
typedef block_list_info step_info;
// Cars are just block_list_info structures.
typedef block_list_info car_info;

//
// The basic train data structure.
// Trains a lists of cars. The head of the trains list is the oldest train.
// The last car field is just an optimization.
//
struct train_info {
    car_info    *cars;  // Linked list of cars (block_list_info *)
    car_info    *last_car;
    POINTER_SIZE_INT birthday;
    train_info  *next; // linked list from oldest (focus train) to youngest.
    train_info  *previous; // Linked from the youngest to the oldest train.
};

struct los_free_link {
    los_free_link *next;
};
//
// As we allocate new groups of blocks we link them together with this structure
// This allows sequential scanning of the heap when we want to coalesce adjacent
// blocks.
//
struct block_group_link {
    block_info *block_list;
    block_group_link *next;
};

// Nurseries can be in one of five states and the transitions rules are as follows.
// Nurseries start off in a free_uncleared_nursery state. The zeroing thread atomically
// moves it to the thread_clearing_nursery state. 
// Once the thread is cleared a nonatomic write can place make the nursery a free_nursery.
// The cycle_nursery routine can atomically move A nursery from the free_uncleared_nursery 
// to active_nursery and then clear the thread.
// If cycle_nursery sees a free_nursery it can atomically move it to active nursery without
// having to clear it.
// Once the thread is active it can be filled up by only the owning thread. This thread
// will be responsible for using a write to make it a spent_nursery.
// The GC can move the threads from spent_nursery to free_uncleared_nursery using a simple 
// write.

enum nursery_state {
    free_uncleared_nursery = 0,
    thread_clearing_nursery,
    free_nursery,
    active_nursery,
    spent_nursery,
    bogus_nursery,
};

//
// Blocks can be free or associated with a nursery or a step in yos, los, or mos.
// The gc_block_status_table is kept up to date each time a block goes from one state to 
// another. To index into the table one can either iterate (and this will be racy) or if
// a thread owns a block it can change the status.
// We maintain this table so that we don't have to chain through all the blocks to get
// information about the state of all the blocks, a very expensive thing to do on the P4 
// since block infor tables are 64K apart.
//
enum gc_block_status {
    block_in_free = 0,
    block_in_nursery,
    block_in_step,
    block_in_los,
    block_in_mos
};

extern gc_block_status *gc_block_status_table;


//
// block_info - This is what resides at the start of each block holding the start of an object.
//              This is all blocks except for sequential blocks holding objects that will not
//              fit into a single block.
// 
struct block_info {

    // Put c_area_p at the top to avoid a dereference.
    // If train birtday is 0 then these are of interest to determine which part of the YOS is being referenced.
    // in_free_p is the other possibility but we had better not be checking if a free block is in YOS.
    bool c_area_p; // true if this area being collected (a C area)? false if a U (uncollected) area
    bool in_nursery_p; // true if the block is in a nursery.
    bool in_los_p;     // true if the block is in large object (fixed) space.
    bool in_step_p;    // true if the object is in a step.

    // These are used to determine if we have an interesting pointer. By convention steps and nurseries have
    // a train_birthday and a car_birthday of 0. This means that if slot birthday > object birthday we need
    // to mark the slots card.

    POINTER_SIZE_INT train_birthday;
    POINTER_SIZE_INT car_birthday;

    // Allocation needs to look at the free pointer and the scanner needs to match free when Cheney scanning
    void *free;                                 // Alloc pointer, for los the free list.
    void *scan;                                 // Used by cheney scanning.
    void *ceiling;                              // Do I need this.

    // Things like steps, cars and the free list use this to link up blocks. It is NULL terminated

    block_info *next;
    // ------------------------ For IA32 the above should fit into a single 32 byte cache line.

    // If this block is part of a car or a step then this is the list_info related to the
    // list it is part of.
    block_list_info *list_info;

    // Each block has an index into the block status table. When in_nursery_p, in_los_p, in_step_p, in_free_p is changed
    // the block status table will be updated.
    unsigned int block_status_table_index;

#ifdef GC_THREAD_LOCAL_GC
    bool private_area_p;    
    // Experimental - true if this area is only reachable by a single thread.
    // We need to be able to link all the local blocks belonging to one thread into a list. The
    // cool thing about this list is that since the block is only owned by one thread we do not
    // have to use atomic compare and exchange instruction to deal with it. 
    // This is not to say that when we get global blocks we don't have to use atomic instructions.
    // We can link spent nurseries, step blocks, and so forth through this 
    // The only question I now have is why we can't use just next field for this????
    block_info *next_local_block;
    // In a thread local gc situation there is a step for each thread. When the nursery is 
    // about to be collected then the nursery_to_step should hold the thread local to step
    // that private objects need to be moved into.
    step_info *target_step;
#endif

    // LOS and the free list need to know how many blocks are represented by this block.
    unsigned int number_of_blocks;
    
    // Nursery blocks such as nurseries can be active which means that a thread using them to 
    // allocate objects in. Once such a nursery is collected it must be recleared and reset.
    // The hope is that the same nursery will have some affinity to the thread.
    
    nursery_state   nursery_status; 
    //
    // We hold the following null terminated lists, each holds the obvious list of weak/soft/phantom/finalizer
    // objects. At the start of a GC the referent object will be in this block.
    // If the referennt objects on the list are moved by the GC then the object is transferred to the
    // block associated with where the object was moved. If the referent object was not moved then
    // it holds the only thing keeping this object alive is the weak/soft/phantom/finalizer. 
    // We do the appropriate thing at that time.
    void *weak_ref_list;
    void *soft_ref_list;
    void *phantom_ref_list;
    void *finalizer_ref_list;

    // If this block is free, this should be set. This allows us to sequentially order and coalesce blocks
    // simply by sequentially scanning all the blocks.
    bool in_free_p;
    block_info *all_blocks_next;

    // This block holds one or more los objects of this size.

    POINTER_SIZE_INT los_object_size;               // If this is an los block this is the size of the objects in the block
                                                    // If 0 the object takes up the entire block or many blocks.
    
    // ------------------------ For IA32 the above should fit into a single 32 byte cache line.
    // ------------------------ This means that the following should start at byte 64 in a new cache line.
    // ------------------------ Hopefully, the P4 will be off prefetching this stuff for us.
    // Marks the *base* of an object that is modified.

    byte card_table[GC_BLOCK_SIZE_BYTES/GC_CARD_SIZE_BYTES];
    // We have two choices here either force all objects to start on a card  boundary
    // or maintain a last object table.
    Java_java_lang_Object *card_last_object_table[GC_BLOCK_SIZE_BYTES/GC_CARD_SIZE_BYTES]; 
    
    // The fact that we mark the base means that objects that are large enough to require 2 or more blocks will
    // always have the property that the base of the object is in the first block. This means that we don't need
    // to worry about this table overflowing the block.
    // LOS space is a bipob scheme so the header is marked in the mark table.
    MARK mark_table[GC_BLOCK_SIZE_BYTES >> GC_LOS_MARK_SHIFT_COUNT]; // mark bits for objects falling on each 64 byte boundary. 

#ifdef GC_SAPPHIRE
    // This is a table giving the color (white, grey or black) needed by sapphire. We use a byte each 8 bytes
    // of object. Since only 1 object can start every 8 bytes we are fine.
    // So a 64K block can hold 8K of objects and if a card is 4K we need 2 of them.
    // magic number 8 is minimum number of bytes in an object.
    tri_color tri_color_table[GC_BLOCK_SIZE_BYTES/8]
#endif
    char *unused;
};

// 
// The following header is prepended to every VTable:
//
typedef struct _VTABLE_GC_HEADER_ {
    ULONG object_size_bytes;
    ULONG object_pointer_bitmap;
} VTable_Gc_Header;


#ifdef __cplusplus
}
#endif

//
// gc_header.h/cpp: check if instances of this class
// require double alignment
//
 
inline bool is_double_alignment_required(Partial_Reveal_VTable *p_vtable)
{

#if 1 // #ifdef GC_ALIGNMENT
    if ((p_vtable->class_properties & CL_PROP_ALIGNMENT_MASK) == 8) {
		return true;
	}
#endif

	return false;
}

 
//
// gc_header.h/cpp: check to see if the specified address
// is aligned on a double word boundary.
// INLINE THIS WHEN DONE DEBUGGING.
//
inline bool is_double_aligned(void *p_address)
{

    // NOTE: SINCE WE ARE TRYING TO ALIGN THE VTABLE,
    // WHICH WILL BE THE ADDRESS FOLLOWING THIS HEADER,
    // THE LOGIC IS INVERTED
    if (((POINTER_SIZE_INT)p_address) & 0x7)
        return true;                  //false;

    return false; // true;
}
    
// INLINE THIS WHEN DONE DEBUGGING
inline bool is_not_double_aligned(void *p_address)
{
    if (((POINTER_SIZE_INT)p_address) & 0x7)
        return false;  // true;

    return true; // false;	
}

void *get_free_block_link(void *p_block_ptr);
unsigned int get_free_block_size(void *p_block_ptr);

inline unsigned int 
*get_object_descendents_offset_map(Java_java_lang_Object *p_obj)
{
	return p_obj->vt->gc_information;
}

inline bool is_object_forwarded(IN Java_java_lang_Object *p_obj)
{
	Object_Gc_Header *p_gc_hdr = P_OBJ_INFO(p_obj);
    return (*p_gc_hdr & FORWARDING_BIT_MASK)?true:false; 
}

inline Java_java_lang_Object *
p_get_already_forwarded_object(Java_java_lang_Object *p_obj)
{
    assert(is_object_forwarded(p_obj));
    assert(!(GC_BLOCK_INFO(p_obj)->in_los_p));
    Object_Gc_Header *p_gc_hdr =
        P_OBJ_INFO(p_obj);
    Java_java_lang_Object *p_new_obj =
        (Java_java_lang_Object *)((POINTER_SIZE_INT)*p_gc_hdr & ~FORWARDING_BIT_MASK);
    return p_new_obj;
}
 
inline bool
is_array(Java_java_lang_Object *p_obj)
{
    if (p_obj->vt->class_properties & CL_PROP_ARRAY_MASK) {
        return true;
    } else {
        return false;
    }
}


inline bool
is_array_of_primitives(Java_java_lang_Object *p_obj)
{
    if (p_obj->vt->class_properties & CL_PROP_NON_REF_ARRAY_MASK) {
        return true;
    } else {
        return false;
    }
}

 

inline Partial_Reveal_Class *
get_object_class(Partial_Reveal_VTable *p_vtable)
{
	assert(p_vtable);
    return (Partial_Reveal_Class *)p_vtable->clss;
}

extern unsigned orp_array_size(Class *vector_class, int length);

#ifdef EIGHT_BYTE_ALIGN_ARRAY

inline unsigned int
get_object_size_bytes(Java_java_lang_Object *p_obj)
{
    //
    bool arrayp = is_array (p_obj);
    unsigned int sz;
    if (arrayp) {

        if (((JavaArray *)p_obj)->padding) {
            return ((JavaArray *)p_obj)->padding;
        }

            unsigned int array_length = ((JavaArray *)p_obj)->length;
            // NO: zero is an OK length for null strings! assert(array_length > 0);
            unsigned int element_size = p_obj->vt->array_element_size;
            assert (element_size <= 8);
            assert (element_size > 0); // Do we have any primitive sizes <0 and >8?
            sz = sizeof (JavaArray) + OBJECT_HEADER_SIZE + array_length * element_size;
            // Make it aligned on GC_OBJECT_ALIGNMENT
            sz = ( ((sz + (GC_OBJECT_ALIGNMENT - 1)) & (~(GC_OBJECT_ALIGNMENT - 1))) );

            assert (((JavaArray *)p_obj)->padding == 0);
            ((JavaArray *)p_obj)->padding = sz;
   
#ifdef OBJECT_SPLITTING
            // Objects allocated in the Large Object Space (LOS) are not split.
          if (GC_BLOCK_INFO(p_obj)->in_los_p) 
                return sz;
            else
                return 2 * sz;
#else
         return sz; 
#endif // OBJECT_SPLITTING
    } else {
        return p_obj->vt->allocated_size;
    }
}

#else // EIGHT_BYTE_ALIGN_ARRAY

inline unsigned int
get_object_size_bytes(Java_java_lang_Object *p_obj)
{
    //
    bool arrayp = is_array (p_obj);
    unsigned int sz;
    if (arrayp) {
        unsigned int array_length = ((JavaArray *)p_obj)->length;
            
#if 1
        // NO: zero is an OK length for null strings! assert(array_length > 0);
            unsigned int element_size = p_obj->vt->array_element_size;
            assert (element_size <= 8);
            assert (element_size > 0); // Do we have any primitive sizes <0 and >8?

            if (element_size != 8) { 

                sz = sizeof (JavaArray) + OBJECT_HEADER_SIZE + array_length * element_size;
                // Make it aligned on GC_OBJECT_ALIGNMENT
                sz = ( ((sz + (GC_OBJECT_ALIGNMENT - 1)) & (~(GC_OBJECT_ALIGNMENT - 1))) );
            } else {
                sz = orp_array_size(p_obj->vt->clss, array_length);
            }
            assert (sz == orp_array_size(p_obj->vt->clss, array_length));
#else 
         sz = orp_array_size(p_obj->vt->clss, array_length);
#endif 

#ifdef OBJECT_SPLITTING
            // Objects allocated in the Large Object Space (LOS) are not split.
          if (GC_BLOCK_INFO(p_obj)->in_los_p) 
                return sz;
            else
                return 2 * sz;
#else
         return sz; 
#endif // OBJECT_SPLITTING
    } else {
        return p_obj->vt->allocated_size;
    }
}
#endif // else EIGHT_BYTE_ALIGN_ARRAY

inline unsigned int
get_real_object_size_bytes(Java_java_lang_Object *p_obj)
{
	unsigned real_size_bytes = get_object_size_bytes(p_obj);
	return real_size_bytes;
}

bool is_java_object(void *p_thing);
bool is_java_object(Java_java_lang_Object *p_obj);
bool is_vtable(VTable *p_vtable);

bool
is_object_marked(Java_java_lang_Object *p_obj);

bool
is_object_unmarked(Java_java_lang_Object *p_obj);

void set_free_block_header(void *p_block_ptr);
void set_free_block_link(void *p_block_ptr, void *p_next_block);
void set_free_block_size(void *p_block_ptr, unsigned int size);

void
set_object_marked(Object_Gc_Header *p_gc_hdr);
void
set_object_marked(Java_java_lang_Object *p_obj);

void
set_object_unmarked(Object_Gc_Header *p_gc_hdr);
void
set_object_unmarked(Java_java_lang_Object *p_obj);

void
update_reference_forwarded(Java_java_lang_Object **pp_obj);

#endif // _gc_header_H_
