// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/flow_graph_dom.cpp,v 1.6 2001/10/15 06:50:50 xhshi Exp $
//


#include "defines.h"
#include <stdio.h>
#include <string.h>
#include "flow_graph.h"
#include "expression.h"

#define LOOP_NORMALIZATION
//#define LOOP_LINEARIZATION
#define LOOP_PEELING
//#define LOOP_UNROLLING

#define ALLOW_MULTI_TAIL_EDGE
#define DOM_INCLUDES_EH_NODES

/////////////////////////////////////////////////////////////////
// function declarations
/////////////////////////////////////////////////////////////////
#define MAX_DOM_HASH 32
class Dom_ID_Map
{
public:
    Dom_ID_Map() : _next_dom_id(0) {
        int i;
        for (i = 0; i < MAX_DOM_HASH; i++) _table[i] = NULL;
    }
    int lookup(void *o)
    {
        unsigned entry = (((unsigned)o) >> 2)% MAX_DOM_HASH;
        // search if there is an existing dom_set
        ID_Link *l = _table[entry];
        for (; l != NULL; l = l->next)
            if (l->opnd == o) return l->id;
        return -1;
    }
    int insert(Mem_Manager& mem, void *o)
    {
        assert(lookup(o)==-1);
        // create ID link to hold operand o
        unsigned entry = (((unsigned)o) >> 2)% MAX_DOM_HASH;
        _table[entry] = new (mem) ID_Link(o,_next_dom_id,_table[entry]);
        _next_dom_id++;
        return _next_dom_id-1;
    }
    int next_dom_id() { return _next_dom_id; }
private:
    int _next_dom_id;
    ID_Link *_table[MAX_DOM_HASH];
};
struct Dom_Node
{
    void *node;
    bool is_eh_node;
};

int     do_depth_first(void* root,  Dom_Node* node,
						Dom_ID_Map &vertex,	int* semi, int* label,  
						int* ancestor, int* parent, 
						unsigned short traversal_number, Mem_Manager &mm, bool is_eh_node=false);


void	add_element(Mem_Manager &mm, 
						int** array, int* array_size,	int* array_capacity,
						int this_id, int add_id);

void	compress(int b, int* ancestor, int* semi, int* label);
int		eval(int b, int* ancestor, int* semi, int* label);
int mark_blocks_in_loop(Cfg_Node *header, unsigned marker);
int traverse_loop_mark_block(Cfg_Node *node, Cfg_Node *loop_header, unsigned short traversal_number) ;
void	traverse_loop(Cfg_Node *node, 
					   Cfg_Node *loop_header, 
					   unsigned short traversal_number,
                                           bool update_inc);

static void traverse_doms_to_header(Cfg_Node *node, Cfg_Node *original, Back_Edge *be);
Cfg_Node *duplicate_header(Cfg_Node* header, unsigned short block_marker, Mem_Manager &mm);

void loop_unrolling(Flow_Graph *fg, unsigned n);
bool loop_peeling(Flow_Graph *fg, unsigned n);
void linearize_loop(Cfg_Node *header, unsigned short block_marker, Mem_Manager &mm, Cfg_Node *start_node);
#ifdef _DEBUG
Cfg_Node **get_loop_headers(Back_Edge &be, Mem_Manager &mm);
#endif

/////////////////////////////////////////////////////////////////
// loop transformation driver
/////////////////////////////////////////////////////////////////
void loop_transformation(Flow_Graph *fg, bool &did_peeling)
{

#ifdef TRACE_O3
    fg->build_dom_tree();
    fg->find_loop_depths_and_headers();

    fg->print_cfg("0a");
#endif // TRACE_O3

#ifdef LOOP_LINEARIZATION
    fg->loop_linearization();
#ifdef TRACE_O3
    fg->build_dom_tree();
    fg->find_loop_depths_and_headers();

    fg->print_cfg("0b");
#endif // TRACE_O3
#endif

#ifdef LOOP_NORMALIZATION
    fg->loop_entry_normalization();
    fg->loop_exit_normalization();
#ifdef TRACE_O3
    fg->build_dom_tree();
    fg->find_loop_depths_and_headers();
    fg->print_cfg("0c");
#endif // TRACE_O3
#endif

#ifdef LOOP_PEELING
    did_peeling = loop_peeling(fg, 3);
#endif // LOOP_PEELING

#ifdef LOOP_UNROLLING
    loop_unrolling(fg, 3);
#ifdef TRACE_O3
    fg->build_dom_tree();
    fg->find_loop_depths_and_headers();

    fg->print_cfg("0d");
#endif // TRACE_O3
#endif // LOOP_UNROLLING
    // peeling and unrolling shouldn't produce any multiple entry loop
    //assert(fg->loop_normalization()==0);
}

/////////////////////////////////////////////////////////////////
// utility routine
/////////////////////////////////////////////////////////////////
bool is_in_loop(Cfg_Node *node, Cfg_Node *check_header)
{
    assert(check_header->loop_header == check_header);
    Cfg_Node *header = node->loop_header;
    while (header != NULL && header->idom != NULL) // quit if hitting prolog
    {   if (check_header == header) return true;
        header = header->idom->loop_header; // go to next level up
    }
    return false;
}

/////////////////////////////////////////////////////////////////
// loop transformation routines
/////////////////////////////////////////////////////////////////

unsigned Flow_Graph::loop_linearization()
{
    build_dom_tree();
    find_loop_depths_and_headers();
    Mem_Manager mm(num_fg_nodes * sizeof(void *));
    Back_Edge be(mm);
    apply(find_back_edges, &be);

    // we rotate until the header is not the exit node
    traversal_number++;
    unsigned num_rotate = 0;
    int i;
    for (i = be.back_edge_count-1; i >=0 ; i --) 
    {
        Cfg_Node *header = be.back_edge_heads[i];
        if (be.back_edge_tails[i]==header) continue; // skip single block loop
        // look at the successors of the header
        linearize_loop(header, traversal_number, mem_manager, header);
    }
    return num_rotate;
}

// this is a very loose policy
#ifdef _DEBUG
unsigned peel_policy(Flow_Graph *fg, Cfg_Node *header)
{
    Cfg_Node *tail;
    Cfg_Int j;
    for (j = 0; j < header->in_edge_size(); j++)
    {
        tail = header->in_edges(j);
        if (tail->loop_header == header) break;
    }
#else
unsigned peel_policy(Flow_Graph *fg, Cfg_Node *header, Cfg_Node *tail)
{
#endif
    // policy #1 -- head = tail which means a single block loop
    if (header != tail) return 0;
    // policy #2 -- bytecode contains constant trip count
    const unsigned char *bytecode = fg->bytecodes();
    // start from last byte
    unsigned curr_index = header->first_bc_idx()+header->bc_length();
    int branch_flag = -1;
    int trip_count = -1;
    int induc_pos = -1;
    int incre_value = 0;
        // iinc 3 1
        // iload_Y
        // iconst_X
        // if_icmp<cond>

    // the last two bytes must be the target to first_bc_idx
    // because header == tail
    curr_index -= 3;
    short target_addr = (short)(bytecode[curr_index+1]<<8|bytecode[curr_index+2]);
    if (target_addr!=(short)(0-header->bc_length()+3))
        return 0;

    // get if_icmp<cond> bytecode
    if (bytecode[curr_index]>=0x9f || bytecode[curr_index]<=0xa4)
        branch_flag = bytecode[curr_index] - 0x9f;
    else
        return 0;
    
    curr_index --;
    if (bytecode[curr_index]>=0x3 || bytecode[curr_index]<=0x8)
        trip_count = bytecode[curr_index] - 0x3;
    else // maybe check for bipush
    {
        curr_index --;
        return 0;
    }

    curr_index --;
    if (bytecode[curr_index]>=0x1a || bytecode[curr_index]<=0x1d)
        induc_pos = bytecode[curr_index] - 0x1a;
    else // maybe check for load
    {
        curr_index --;
        return 0;
    }

    curr_index -=3;
    if (bytecode[curr_index]==0x84)
    {
        if (bytecode[curr_index+1]!=induc_pos) return 0;
        incre_value = bytecode[curr_index+2];
    }
    else
        return 0;

#ifdef _DEBUG
    fprintf(stderr,"***** Loop block %d peel %d times.....\n",header->label,trip_count);
#endif
    // this is very dangerous, but for now, we'll just return trip_count
    return (unsigned) trip_count;
}

bool loop_peeling(Flow_Graph *fg, unsigned n=0)
{
    int k;
    if (n==0) return false;
    bool result = false;

    fg->build_dom_tree();
    fg->find_loop_depths_and_headers();
    Mem_Manager mm(fg->num_fg_nodes * sizeof(void *));
    Back_Edge be(mm);
    fg->apply(find_back_edges, &be);

#ifdef _DEBUG
    Cfg_Node **header_nodes = get_loop_headers(be, mm);
    if (header_nodes==NULL) return false;
    
    for (k = 0; header_nodes[k]!=NULL; k ++) 
    {
        if ((n=peel_policy(fg,header_nodes[k]))>0)
        {
            result = true;
            fg->peel_loop(header_nodes[k], n, fg->mem_manager, be, 100);
        }
    }
#else
    for (k = be.back_edge_count-1; k >=0 ; k--) 
    {
        if ((n=peel_policy(fg,be.back_edge_heads[k],be.back_edge_tails[k]))>0)
        {
            result = true;
            fg->peel_loop(be.back_edge_heads[k], n, fg->mem_manager, be, 50);
        }
    }
#endif
    return result;
}

void loop_unrolling(Flow_Graph *fg, unsigned n=0)
{
    int k;
    if (n==0) return;

    fg->build_dom_tree();
    fg->find_loop_depths_and_headers();
    Mem_Manager mm(fg->num_fg_nodes * sizeof(void *));
    Back_Edge be(mm);
    fg->apply(find_back_edges, &be);

#ifdef _DEBUG
    Cfg_Node **header_nodes = get_loop_headers(be, mm);
    if (header_nodes==NULL) return;
    for (k = 0; header_nodes[k]!=NULL; k ++) 
        fg->unroll_loop(header_nodes[k], n, fg->mem_manager, be, 100);
#else
    for (k = be.back_edge_count-1; k >=0 ; k--) 
        fg->unroll_loop(be.back_edge_heads[k], n, fg->mem_manager, be, 50);
#endif
}

unsigned Flow_Graph::loop_entry_normalization()
{
    build_dom_tree();
    find_loop_depths_and_headers();

    Mem_Manager mm(num_fg_nodes * sizeof(void *));
    Back_Edge be(mm);
    apply(find_back_edges, &be);

    // now normalize loops
    unsigned number_dups = 0;
    traversal_number ++;
    // first count the dup backedges
    int i;
    for (i = 0; i < be.back_edge_count; i++) 
    {
        if (be.back_edge_heads[i]->latest_traversal < traversal_number)
        {
            be.back_edge_heads[i]->latest_traversal = traversal_number;
            be.back_edge_heads[i]->set_traversing(true);
        }
        else if (be.back_edge_heads[i]->latest_traversal == traversal_number)
        {
            be.back_edge_heads[i]->set_traversing(false);
            number_dups ++;
        }
    }
    if (number_dups==0) return number_dups; // early return

    for (i = 0; i < be.back_edge_count; i++) 
    {
        Cfg_Node *header = be.back_edge_heads[i];
        if (be.back_edge_heads[i]->traversing()) continue;
        // set to true so that we don't come back the second time
        be.back_edge_heads[i]->set_traversing(true);
        // mark the tail nodes
        traversal_number++;
        unsigned short preheader_number = traversal_number; // reserved one number
        traversal_number++;
#ifdef _DEBUG
        int remain_count = 0;
#endif
        int j;
        for (j = 0; j < be.back_edge_count; j++) 
        {
            if (be.back_edge_heads[j]==header)
            {
                be.back_edge_tails[j]->latest_traversal = traversal_number;
#ifdef _DEBUG
                remain_count++;
#endif
            }
        }
        // calculate max of preheader depth
        int preheader_depth = 0;
        int tail_count = 0;
        Cfg_Node *pred_node;
        for (j = 0; j < header->in_edge_size(); j++)
        {
            pred_node = header->in_edges(j);
            if (pred_node->latest_traversal!=traversal_number)
            {   // preheader
                pred_node->latest_traversal = preheader_number;
                if (pred_node->loop_depth() > preheader_depth) 
                    preheader_depth = pred_node->loop_depth();
            }
            else
                tail_count ++;
        }
        assert(remain_count==tail_count);
        assert(tail_count>1);
        assert(preheader_depth <= header->loop_depth());
        Cfg_Node *outer_header = NULL;
        Cfg_Node *candidate = NULL;
        int k;
        for (k = tail_count-1; k >= 0; k--) // iterate one less than the total count
        {   // let's see, who's the lucky one that will be the inner header?
            // the one with hightest loop depth
#ifdef _DEBUG
            candidate = NULL;
#endif
            int candidate_depth = -1;
            bool inc_depth = false;
            int p;
            for (p = 0; p < header->in_edge_size(); p++)
            {
                pred_node = header->in_edges(p);
                if (pred_node->latest_traversal==preheader_number) continue;
                if (pred_node->loop_depth() > candidate_depth)
                {
                    candidate = pred_node;
                    candidate_depth = pred_node->loop_depth();
                    inc_depth = false;
                }
                else if (pred_node->loop_depth() == candidate_depth)
                {   // compare the bc index of the nodes
                    // the one with smaller bc index gets to be in the inner loop
                    if (pred_node->first_bc_idx() < candidate->first_bc_idx())
                        candidate = pred_node;
                    // second one with same depth, so need to increment depth
                    inc_depth = true;
                }
            }
            assert(candidate!=NULL);
#ifdef _DEBUG
            if (candidate->loop_header==header)
                assert(candidate_depth <= header->loop_depth());
#endif
            // split
            if (k > 0)
            {
                outer_header = split_cfg_node_pred(header);
                outer_header->add_edge(mem_manager,header); // add_edge isn't in split_cfg_node_pred
                // now candidate holds the highest loop depth, so change the backedge
                candidate->replace_edge(mem_manager,outer_header,header);
#ifdef ALLOW_MULTI_TAIL_EDGE
                for (p = 0; p < candidate->out_edge_size(); p++)
                {
                    if (candidate->out_edges(p)==outer_header)
                    {
                        candidate->replace_edge(mem_manager,outer_header,header);
                        k--;
                    }
                }
#endif
            }
            traversal_number++;
            traverse_loop(candidate,header,traversal_number,inc_depth);
            // need to set loop_depth now, after fixing the depth of original header
            if (k > 0)
            {
                outer_header->set_loop_depth(header->loop_depth()-1);
                header = outer_header; // for next iteration
            }
        }
    }
    return number_dups;
}

unsigned Flow_Graph::loop_exit_normalization()
{
    build_dom_tree();
    find_loop_depths_and_headers();

    Mem_Manager mm(num_fg_nodes * sizeof(void *));
    Back_Edge be(mm);
    apply(find_back_edges, &be);
	
    unsigned num_splices = 0;
    Cfg_Node *new_node = NULL;
    int i;
    for (i = 0; i < be.back_edge_count; i++) 
    {
        if (be.back_edge_tails[i]->loop_header != be.back_edge_heads[i])
        {
            new_node = splice_cfg_nodes(be.back_edge_tails[i],be.back_edge_heads[i]);
            num_splices++;
        }
    }
    return num_splices;
}

/////////////////////////////////////////////////////////////////
// count_nodes: find number of nodes in a flowgraph
/////////////////////////////////////////////////////////////////
static void count_nodes(Cfg_Node *node, Closure *c) {
    Num_Nodes *cc = (Num_Nodes *) c;
    //node->latest_traversal = cc->count;
    cc->count ++;
}

////////////////////////////////////////////////////////
// reset_depth: resets node loop_depth to zero,
////////////////////////////////////////////////////////
static void reset_depth(Cfg_Node *node, Closure *c) {
    node->set_loop_depth(0);
    node->loop_header = NULL;
    node->set_traversing(false); // temporary flag
}

////////////////////////////////////////////////////////////////////////
// find_back_edges and put them in array of node ptrs to heads & tails
////////////////////////////////////////////////////////////////////////
extern void find_back_edges(Cfg_Node *node, Closure *c) 
{
    Back_Edge *be = (Back_Edge *) c;
    int i;
    for (i = 0; i < node->in_edge_size(); i++) 
    {
        Cfg_Node *this_pred = node->in_edges(i);
        if(node->dominates(this_pred)) 
        {
            int head_capacity = be->back_edge_capacity;
            int tail_capacity = be->back_edge_capacity;

            // add head to heads array
            RESIZE_ARRAY(Cfg_Node *, be->back_edge_heads, head_capacity, 
                         be->back_edge_count, 4, be->mm);
            be->back_edge_heads[be->back_edge_count] = node;
            // add tail to tails array
            RESIZE_ARRAY(Cfg_Node *, be->back_edge_tails, tail_capacity, 
                         be->back_edge_count, 4, be->mm);
            be->back_edge_tails[be->back_edge_count] = this_pred;
            // increment
            assert(head_capacity==tail_capacity); // make sure they grow evenly
            be->back_edge_capacity = head_capacity;
            be->back_edge_count++;
        }
    }
}

#ifdef _DEBUG

////////////////////////////////////////////////////////////////////////
// find_loop_header: finds loop header for each node using a function
//       that traverses the dom tree up to the loop header.
////////////////////////////////////////////////////////////////////////
static void find_loop_header(Cfg_Node *node, Closure *c) 
{
    Back_Edge *be = (Back_Edge *) c;
    traverse_doms_to_header(node, node, be);
}

////////////////////////////////////////////////////////////////////////
// traverse_doms_to_header : searches up dom tree for the next loop
//                           header that has the same depth as node.
////////////////////////////////////////////////////////////////////////
static void traverse_doms_to_header(Cfg_Node *node, Cfg_Node *original, Back_Edge *be) 
    {
    int found_header = 0; // flag to break from loop

    int i;
    for (i = 0; i < be->back_edge_count; i++) 
    {
        // node is a back_edge AND
        if(node == be->back_edge_heads[i] &&				
            // ...AND has equal loop depth OR
            (node->loop_depth() == original->loop_depth()	
            // ... OR node's idom has lesser loop depth for case of node
            // that is header for multiply nested loops.
            || node->idom->loop_depth() < original->loop_depth())) 
        {
            // assign loop_header to current node.   
            //original->loop_header = node;
#ifdef HACK_ww
            if (original->loop_header==NULL)
                fprintf(stderr,"***** node: %d loop header NULL old header %d\n",original->unique_label,node->unique_label);
            else if (original->loop_header != node)
                fprintf(stderr,"***** node: %d loop header %d old header %d\n",original->unique_label,original->loop_header->unique_label,node->unique_label);
#endif
            found_header = 1;
            break;
        }
    }

    // check to see if hit prolog
    if(node->idom == NULL) 
        return;
	
    // keep traversing if header not found yet.
    if (found_header == 0) 
        traverse_doms_to_header(node->idom, original, be);
}

#endif


#define DFS_INSERT_NODE(wl, parent_id, nd, tn, is_eh) { \
    assert(nd->latest_traversal < tn);  \
    nd->latest_traversal = tn;  \
    WL_DFS_Node *wn = (WL_DFS_Node*)ws.get_free_node(); \
    if (wn == NULL) \
        wn = new (mm) WL_DFS_Node(); \
    wn->set(parent_id, nd, is_eh); \
    wn->insert_after(wl); \
}

#define DFS_INIT_NODE(id, parent_id, nd, is_eh) { \
    id                      = vertex.insert(mm, nd); \
    parent[id]              = parent_id; \
    node_list[id].node      = nd; \
    node_list[id].is_eh_node= is_eh;  \
    semi[id]                = id; \
    label[id]               = id; \
    ancestor[id]            = -1; \
}

void  do_depth_first_non_recursive(Mem_Manager &mm,
                                   Dom_Node*   node_list,
                                   Dom_ID_Map& vertex, 
                                   Cfg_Node*   root,
                                   int*        semi, 
                                   int*        label,
                                   int*        ancestor, 
                                   int*        parent,
                                   int         num_of_nodes,
                                   unsigned short traversal_number) 
{
    //
    // initialize working set
    //
    Work_Set ws;
    DFS_INSERT_NODE(&ws.work_list, 0, root, traversal_number, false);
    //
    // iterate until working set is empty
    //
    while (!ws.is_empty()) // not empty
    {
        int id;
        WL_DFS_Node *w = (WL_DFS_Node *)ws.work_list.get_next();
        DFS_INIT_NODE(id, w->parent_id, w->node, w->is_eh);

        if (node_list[id].is_eh_node) // Eh_Node
        {
            Eh_Node *nodd = (Eh_Node *)w->node;
            // loop for all successors of this node.
            int s;
            for (s = nodd->out_edge_size()-1; s >= 0 ; s--) 
            {
                Cfg_Node *succ_bb = nodd->out_edges(s)->handler;
                if (succ_bb->latest_traversal < traversal_number) 
                    DFS_INSERT_NODE(w, id, succ_bb, traversal_number, false);
            }
        }
        else // Cfg_Node
        {
            Cfg_Node *nodd = (Cfg_Node *)w->node;
            if (nodd->eh_out_edge())
            {
                Eh_Node *succ_bb = nodd->eh_out_edge();
                if (succ_bb->latest_traversal < traversal_number) 
                    DFS_INSERT_NODE(w, id, succ_bb, traversal_number, true);
            }
            // loop for all successors of this node.
            int s;
            for (s = nodd->out_edge_size()-1; s >= 0 ; s--) 
            {
                Cfg_Node *succ_bb = nodd->out_edges(s);
                // check if NOT visited yet this traversal. if not, then recurse.
                if (succ_bb->latest_traversal < traversal_number) 
                    DFS_INSERT_NODE(w, id, succ_bb, traversal_number, false);
            }
        }
        ws.free(w);
    } // while
}

////////////////////////////////////////////////////////
// build_dom_tree: uses Tarjan's algorithm 
//      to find immediate dominator of each cfg_node.
////////////////////////////////////////////////////////
void Flow_Graph::build_dom_tree() 
{
    int     *semi,      // contains df order # for each node.
            *parent,    // contains a df predecessor for each node.
            *dom,       // contains updated dom info for each node.
            *label,     // used for compress.
            *ancestor;
    Dom_ID_Map vertex;    // df ordered array of node dom_id's
    Dom_Node *node;
    int     **bucket, 
            *bucket_size,
            *bucket_capacity;


    ////////////////////////////////////////////////////////
    // determine node count
    ////////////////////////////////////////////////////////

    Num_Nodes nn;
    //traversal_number++;
    //nn.count = traversal_number;
    apply(count_nodes, &nn);
    // save fg counts for later use
    num_fg_nodes = nn.count;
    int number_nodes = num_fg_nodes;
    // add exception handler nodes
    Eh_Node *eh;
    for (eh = _handlers.next(); eh != &_handlers; eh = eh->next())
    {
        //eh->latest_traversal = traversal_number;
        number_nodes++;
    }
    //if (_handlers.next() != &_handlers) return;

    // Eight integer arrays, two pointer arrays, estimate size of 4 for buckets
    Mem_Manager mm(number_nodes * (sizeof(int) * 12 + sizeof(void *) * 3));

    ////////////////////////////////////////////////////////
    // allocate memory for arrays
    ////////////////////////////////////////////////////////
    semi            = (int *) mm.alloc(sizeof(int)*number_nodes);
    ancestor        = (int *) mm.alloc(sizeof(int)*number_nodes);
    label           = (int *) mm.alloc(sizeof(int)*number_nodes);
    parent          = (int *) mm.alloc(sizeof(int)*number_nodes);
    dom             = (int *) mm.alloc(sizeof(int)*number_nodes);
    //vertex          = (int *) mm.alloc(sizeof(int)*number_nodes);
		
    node            = (Dom_Node *) mm.alloc(sizeof(Dom_Node)*number_nodes);

    bucket          = (int **) mm.alloc(sizeof(int *)*number_nodes);
    bucket_size     = (int *)  mm.alloc(sizeof(int)*number_nodes);
    bucket_capacity = (int *)  mm.alloc(sizeof(int)*number_nodes);

    // initialize size & capacity of bucket array.
    int i;
    for (i = 0; i < number_nodes; i++) 
    {
        bucket_size[i]     = 0;
        bucket_capacity[i] = 0;
        node[i].node = NULL;
    }

    // do depth-first ordering
    traversal_number++; // global?
	
    ///////////////////////////////////////////////////
    // step 1:depth first ordering into vertex & semi
    ///////////////////////////////////////////////////
#if 1
    do_depth_first_non_recursive(mm, node, vertex, prolog(), semi, label,
                                 ancestor, parent, number_nodes, 
                                 traversal_number);
#else // KEN keep do_depth_first for the time being. Remove it later

    int prolog_id = do_depth_first(prolog(), node, vertex, semi, label, ancestor,
                                   parent, traversal_number, mm);
    parent[prolog_id] = prolog_id;
#endif
    assert(number_nodes==vertex.next_dom_id());
    //number_nodes = vertex.next_dom_id();
    ////////////////////////////////////////////////////////////
    // create dominator tree
    //		use tarjan's algorithm from gil_block code
    //      combined with cfg_node struct from flow_graph
    ////////////////////////////////////////////////////////////
    traversal_number++;

    // Iterate through node[]. start at top of DF Ordering and
    // stop before zero since node[0] is always the prolog.
    int w,p,b;
    for (w = number_nodes-1; w > 0; w--) 
    {
        void *this_node = node[w].node;
        bool this_eh_type = (node[w].is_eh_node);
        // this may look complicated, but it's not
        // we're getting both types of in_edge_nodes
        int in_edge_size =  this_eh_type ? 
            ((Eh_Node *)this_node)->in_edge_size() : 
            ((Cfg_Node *)this_node)->in_edge_size() + 
                (((Cfg_Node *)this_node)->eh_in_edge() ? 
                    ((Cfg_Node *)this_node)->eh_in_edge()->eh_in_edge_size() : 0);
        /////////////////////////////////////////////////////////
        // step 2: iterate through predecessors of passed node
        /////////////////////////////////////////////////////////
        for (p = 0; p < in_edge_size; p++) 
        {
            // this again may look complicated, but we're just getting the pred node
            void *this_pred = (this_eh_type) ?
                (void *)((Eh_Node *)this_node)->in_edges(p) :
                ((p < ((Cfg_Node *)this_node)->in_edge_size()) ?
                    (void *)((Cfg_Node *)this_node)->in_edges(p) :
                    (void *)((Cfg_Node *)this_node)->eh_in_edge()->eh_in_edges(p-((Cfg_Node *)this_node)->in_edge_size()));
            int v = vertex.lookup(this_pred); //int v = this_pred->dom_id;
			assert(v != -1);
            int u = eval(v, ancestor, semi, label);

            if (semi[u] < semi[w]) 
                semi[w] = semi[u];
        }
		int this_id = vertex.lookup(this_node);
		assert(this_id != -1);
        //add node from bucket to this.. type = 1 for bucket
        add_element(mm, bucket, bucket_size, bucket_capacity,
                    semi[w], this_id);
                    //vertex[semi[w]], node[w]->dom_id);
		
        // make parent of w also ancestor of w.		
        ancestor[w] = parent[w]; 

        ////////////////////////////////////////////////////////////////
        // step 3: traverse through all nodes in bucket of parent of w.
        ////////////////////////////////////////////////////////////////
        for (b = 0; b < bucket_size[parent[w]]; b++) 
        {
            void *thisBucket = node[bucket[parent[w]][b]].node;
            int v = vertex.lookup(thisBucket); //int v = thisBucket->dom_id;
			assert(v != -1);
            int u = eval(v, ancestor, semi, label);
            if (semi[u] < semi[v]) 
                dom[v] = u;
            else
                dom[v] = parent[w];
        }
    }
    ///////////////////////////////////////////////////////////////////
    // step 4: final pass to assign true immediate dominators.
    ///////////////////////////////////////////////////////////////////
    for (w = 1; w < number_nodes ; w ++) 
    { // again for all nodes but prolog = node[0].
        //if (dom[w] != vertex[semi[w]])
        if (dom[w] != semi[w])
            dom[w] = dom[dom[w]];
        // if idom is an eh_node, use eh_node's idom
        if (node[dom[w]].is_eh_node) dom[w] = dom[dom[w]];
        assert(node[dom[w]].is_eh_node==false);
        // do for all nodes but prolog.
        if (node[w].is_eh_node) //put in every idom field except prolog
            ((Eh_Node *)node[w].node)->idom = (Cfg_Node *)node[dom[w]].node;
        else
            ((Cfg_Node *)node[w].node)->idom = (Cfg_Node *)node[dom[w]].node;
    }
    // finally assign prolog immediate dominator to NULL.
    prolog()->idom = NULL;
}

//////////////////////////////////////////////////
// do_depth_first: especially for tarjan's 
//		create a depth first ordered 
//      list of id's from nodeList.
//      also sets up semidominator array & others.
//      required for tarjan's algorithm.
//////////////////////////////////////////////////
int do_depth_first(void* tN, Dom_Node* node_list,
				  Dom_ID_Map &vertex, int* semi, int* label,
				  int* ancestor, int* parent,
				  unsigned short traversal_number, Mem_Manager &mm, bool is_eh_node)
{
    //int id = tN->dom_id;    // get id
    int id = vertex.insert(mm,tN);
		
    node_list[id].node = tN;      // set node array id pointer to tN.
    semi[id] = id;       // enter ordering number into DForder array at id.
    label[id] = id;
    ancestor[id] = -1;  // initialize ancestors to -1.

    if (is_eh_node) // Eh_Node
    {
        node_list[id].is_eh_node = true;
        Eh_Node *nodd = (Eh_Node *)tN;
        assert(nodd->latest_traversal<traversal_number);
        nodd->latest_traversal = traversal_number;    // mark visited without resetting.
        // loop for all successors of this node.
        int s;
        for (s = 0; s < nodd->out_edge_size(); s++) 
        {
            Cfg_Node *succ_bb = nodd->out_edges(s)->handler;
            if (succ_bb->latest_traversal < traversal_number) 
            { 
                int succ_id = do_depth_first(succ_bb, node_list, vertex, semi, label, ancestor, parent, traversal_number, mm);
                parent[succ_id] = id;
            }
            assert(succ_bb->latest_traversal==traversal_number);
        }
    }
    else // Cfg_Node
    {
        node_list[id].is_eh_node = false;
        Cfg_Node *nodd = (Cfg_Node *)tN;
        assert(nodd->latest_traversal<traversal_number);
        nodd->latest_traversal = traversal_number;    // mark visited without resetting.
        // loop for all successors of this node.
        int s;
        for (s = 0; s < nodd->out_edge_size(); s++) 
        {
            Cfg_Node *succ_bb = nodd->out_edges(s);
            // check if NOT visited yet this traversal. if not, then recurse.
            if (succ_bb->latest_traversal < traversal_number) 
            { 
                int succ_id = do_depth_first(succ_bb, node_list, vertex, semi, label, ancestor, parent, traversal_number, mm);
                parent[succ_id] = id;
            }
            assert(succ_bb->latest_traversal==traversal_number);
        }
        if (nodd->eh_out_edge())
        {
            Eh_Node *succ_bb = nodd->eh_out_edge();
            if (succ_bb->latest_traversal < traversal_number) 
            { 
                int succ_id = do_depth_first(succ_bb, node_list, vertex, semi, label, ancestor, parent, traversal_number, mm, true);
                parent[succ_id] = id;
            }
            assert(succ_bb->latest_traversal==traversal_number);
        }
    }
    return id;
}


///////////////////////////////////////////////
// add_element: add one int to an int*[]
///////////////////////////////////////////////
void add_element(Mem_Manager &mm, int** array, int* array_size, int* array_capacity,
                 int this_id, int add_id) 
{
    // resize array.
    RESIZE_ARRAY(int, array[this_id], array_capacity[this_id], 
                 array_size[this_id], 4, mm);
    // add new entry at end of array.
    array[this_id][array_size[this_id]] = add_id;
    array_size[this_id] ++;
}

////////////////////////////////////////////////////////////////////////
// find_loop_depths_and_headers: traverses all loops of a fg completely 
//    	  and finds the nesting depth & loop header for each cfg_node.
////////////////////////////////////////////////////////////////////////
void Flow_Graph::find_loop_depths_and_headers() 
{
    int i;
    Mem_Manager mm(num_fg_nodes * sizeof(void *));
	
    // reset loop depths
    apply(reset_depth, NULL);

    // traverse graph looking for back edges using apply.
    Back_Edge be(mm);
    apply(find_back_edges, &be);

    // make sure I mark all the tails first
    for (i = 0; i < be.back_edge_count; i ++) 
        be.back_edge_tails[i]->set_traversing(true); // mark tail node
    // do for each back edge. traverse up to loop header
    // and increment node loop depths along the way.
    for (i = 0; i < be.back_edge_count; i ++) 
    {
        traversal_number++;
        traverse_loop(be.back_edge_tails[i], be.back_edge_heads[i], traversal_number, true);
    }
}

#ifdef _DEBUG

Cfg_Node **get_loop_headers(Back_Edge &be, Mem_Manager &mm)
{
    // keep track of cloned nodes so only cloned once.
    Cfg_Node **cloned_nodes = NULL;
    cloned_nodes = (Cfg_Node **) mm.alloc(sizeof(Cfg_Node)*be.back_edge_count);
    int number_clones = 0;
    int already_have_clone = 0;
    int i,j;

    // do for each head of backedge
    //for (int i = 0; i < be.back_edge_count; i ++) 
    for (i = be.back_edge_count-1; i >=0 ; i --) 
    {
        for (j = 0; j < number_clones; j ++) 
        {
            if (cloned_nodes[j] == be.back_edge_heads[i])
            {
                //means we already cloned head of this backedge, set flag.
                already_have_clone = 1;
                break;
            }
        }
        // check to see if not yet duplicated.
        if(already_have_clone == 0)
        {
            cloned_nodes[number_clones] = be.back_edge_heads[i];
            number_clones++;
        }
        //reset flag
        already_have_clone = 0;
    }
    // end it with a NULL
    if (number_clones)
        cloned_nodes[number_clones] = NULL;
    return cloned_nodes;
}

#endif

///////////////////////////////////////////////
// supporting routines for loop transformations
///////////////////////////////////////////////
void linearize_loop(Cfg_Node *header, unsigned short block_marker, Mem_Manager &mm, Cfg_Node *start_node)
{
    if (header->latest_traversal==block_marker) return; // already traversed, so return
    if (!is_in_loop(header,start_node)) return;

    int out_edges_in_loop = 0;
    Cfg_Node *sole_succ = NULL;
    int l;
    for (l = 0; l < header->out_edge_size(); l ++)
    {   if (is_in_loop(header->out_edges(l),start_node))
        {
            sole_succ = header->out_edges(l);
            out_edges_in_loop ++;
        }
    }
    assert(out_edges_in_loop > 0);
    if (out_edges_in_loop > 1) return; // don't rotate with succ > 1
    if (header->out_edge_size()==1) return; // don't NEED to rotate

    header->latest_traversal = block_marker;
    duplicate_header(header, block_marker, mm);
    // look at the sole successor
    if (sole_succ->loop_header != sole_succ)
        linearize_loop(sole_succ,block_marker,mm,start_node);
    // mark it again, to recognize that it has been traversed
    header->latest_traversal = block_marker; // do this to handle multiple entry loop
}

Cfg_Node *Flow_Graph::unroll_loop(Cfg_Node *header, unsigned unroll_factor, Mem_Manager &mm, Back_Edge &be, int block_limit)
{
    // need to recompute
    build_dom_tree();
    find_loop_depths_and_headers();

    Cfg_Node *new_header = peel_loop(header,unroll_factor,mm,be,block_limit);
    if (new_header==NULL) return NULL;
    // now, point tails of the loop to the topmost header
    // pred of new_header have backedges
#ifdef _DEBUG
    Cfg_Node *one_tail = NULL;
    int check_tail = 0;
#endif
    int b,i;
    for (b = 0; b < be.back_edge_count; b++) 
    {
        for (i = 0; i < header->in_edge_size(); i ++) 
        {
            if(header->in_edges(i) == be.back_edge_tails[b])
            {
                be.back_edge_tails[b]->replace_edge(mm,header,new_header);
                be.back_edge_heads[b] = new_header;
#ifdef _DEBUG
                if (one_tail != be.back_edge_tails[b])
                {
                    check_tail++;
                    one_tail = be.back_edge_tails[b];
                }
#endif
                break;
            }
        }
    }
    assert(check_tail==1);
    return NULL;
}
// input: header
// process: peel the loop
// return: new header of the loop, old header get push to the top
Cfg_Node *Flow_Graph::peel_loop(Cfg_Node *header, unsigned peel_factor, Mem_Manager &mm, Back_Edge &be, int block_limit)
{
    assert(header->loop_depth() > 0);
    assert(peel_factor > 0);
#ifdef _DEBUG
    Num_Nodes nn;
    apply(count_nodes, &nn);
#endif
    // initial setting
    Cfg_Node *new_header = NULL;
    int loop_block_counts = 0;
    unsigned i;
    for (i = 0; i < peel_factor; i++)
    {
        traversal_number++;
        int block_counts = mark_blocks_in_loop(header, traversal_number);
        if (block_counts==-1) return NULL; // if block_counts is -1, the loop contains exception node
        if (loop_block_counts + block_counts > block_limit) break;
        // peel entire loop
        Cfg_Node *return_header = duplicate_header(header, traversal_number, mm);
        loop_block_counts += block_counts;
        if (i == 0) new_header = return_header;
    }
#ifdef _DEBUG
    Num_Nodes nnn;
    apply(count_nodes, &nnn);
    assert(nnn.count == nn.count + loop_block_counts);
#endif
    return new_header;
}

//////////////////////////////////////////////////////////////////////
// duplicate header:
//////////////////////////////////////////////////////////////////////
Cfg_Node *_duplicate_header(Cfg_Node* header, Cfg_Node *start_node, unsigned short block_marker, Mem_Manager &mm) 
{
    // cloned is moved to outside of loop
    // header becomes the tail

    if (header->latest_traversal != block_marker) return NULL;
    // make copy of header with same out_edges.
    Cfg_Node* this_clone = header->clone(mm);
    this_clone->loop_header = NULL; // initialize to NULL
    header->latest_traversal--; // decrement for stopping
    int i,l;
    for (i = 0; i < header->out_edge_size(); i ++) 
    {
        // do this in topological order
        Cfg_Node *succ = header->out_edges(i);
        bool do_duplicate = true;
        for (l = 0; l < succ->in_edge_size(); l ++)
        {
            if (succ->in_edges(l)->latest_traversal == block_marker && // this ensures breadth-first order
                !(succ == succ->loop_header && // this ensures we go into headers of inner loops
                  succ->in_edges(l)->loop_depth() == succ->loop_depth()))
                do_duplicate = false;
        }
        if (do_duplicate)
            _duplicate_header(header->out_edges(i), start_node, block_marker, mm);
    }
    // fixed in coming edges
    // this function works only if loops are normalized
    i = 0;
    while (i < header->in_edge_size())
    {
        if (!is_in_loop(header->in_edges(i),start_node->loop_header))
            header->in_edges(i)->replace_edge(mm,header,this_clone);
        else
        {
            // keep tail status
            header->in_edges(i)->set_traversing(false);
            header->set_traversing(true);
            i++;
        }
    }
    // set loop information as accurate as we can
    if (header->loop_header == header) // inner loop header
    {
        this_clone->loop_header = this_clone;
        for (l = 0; l < this_clone->in_edge_size(); l ++)
            if (this_clone->in_edges(l)->loop_header != NULL) // already set, so reset it
                this_clone->in_edges(l)->loop_header = this_clone;
    }
    else
        this_clone->loop_header = start_node->loop_header->idom->loop_header;
    this_clone->set_loop_depth(start_node->loop_header->idom->loop_depth());
    this_clone->idom = start_node->loop_header->idom;

    return this_clone;
}

Cfg_Node *duplicate_header(Cfg_Node* header, unsigned short block_marker, Mem_Manager &mm) 
{
    if (header->latest_traversal != block_marker) return NULL;
    Cfg_Node* this_clone = header->clone(mm);
    header->latest_traversal--; // decrement for stopping

#ifdef _DEBUG
    bool check_tail = false;
#endif
    assert(header->loop_header->idom!=NULL);
    this_clone->idom = header->loop_header->idom;
    this_clone->loop_header = header->loop_header->idom->loop_header;
    this_clone->set_loop_depth(header->loop_header->idom->loop_depth());

    int i = 0;
    while (i < header->in_edge_size())
    {
#ifdef _DEBUG
        if (header->in_edges(i)->traversing()) check_tail = true;
#endif
        if (!is_in_loop(header->in_edges(i),header->loop_header) &&
            header->in_edges(i) != this_clone) // avoid self loop
            header->in_edges(i)->replace_edge(mm,header,this_clone);
        else
        {
            // keep tail status
            header->in_edges(i)->set_traversing(false);
            header->set_traversing(true);
            i++;
        }
    }
    assert(check_tail);

    for (i = 0; i < header->out_edge_size(); i ++) 
    {
        // do this in topological order
        Cfg_Node *succ = header->out_edges(i);
        bool do_duplicate = true;
        int l;
        for (l = 0; l < succ->in_edge_size(); l ++)
        {
            if (succ->in_edges(l)->latest_traversal == block_marker && // this ensures breadth-first
                !(succ == succ->loop_header && // this ensures we go into headers of inner loops
                  succ->in_edges(l)->loop_depth() == succ->loop_depth()))
                do_duplicate = false;
        }
        if (do_duplicate)
            _duplicate_header(header->out_edges(i), header, block_marker, mm);
    }
    return this_clone;
}

////////////////////////////////////////////////////////////////////////
// traverse_loop: Starts at loop tail and RDFS's up to head including
//                all backedges and nested loops. 
//                Increments node loop_depth each pass.
////////////////////////////////////////////////////////////////////////
void traverse_loop(Cfg_Node *node, Cfg_Node *loop_header, unsigned short traversal_number, bool update_inc) 
{
    // if visited OR if head node == tail node then end traversal.
    if (node->latest_traversal == traversal_number)
        return;
    // set as visited.
    node->latest_traversal = traversal_number;
    // set loop_header
    if (node->loop_depth() <= loop_header->loop_depth())
        node->loop_header = loop_header;

    // increment loop_depth for each traversal
    if (update_inc)
        node->inc_loop_depth();

    if (node == loop_header) // return if hit header
        return;

	// Do for all predecessors. Allow to traverse nested loops via
	// back_edges. These loops may already have been or will be 
	// traversed again by parent loop(s). This allows loop_depth to 
	// be incremented an additional time for each level of nesting.
    Cfg_Int i,j;
    for (i = 0; i < node->in_edge_size(); i++) 
    {
        Cfg_Node *pred_node = node->in_edges(i);
        // should we go in to increment loop depth ?
        // if same loop and not tail node, then skip
        if (pred_node->loop_header==loop_header && 
            pred_node->loop_depth() >= node->loop_depth() &&
            !pred_node->traversing())
            continue;
        // will recurse until we reach loop_header.
        traverse_loop(pred_node, loop_header, traversal_number, update_inc);
    }
    // we need to look over the eh nodes and find their predecessors
    if (node->eh_in_edge()==NULL) return;
    for (i = 0; i < node->eh_in_edge()->eh_in_edge_size(); i++)
    {
        Eh_Node *eh_node = node->eh_in_edge()->eh_in_edges(i);
        for (j = 0; j < eh_node->in_edge_size(); j++)
        {
            Cfg_Node *pred_node = eh_node->in_edges(j);
            // do the same check
            if (pred_node->loop_header==loop_header && 
                pred_node->loop_depth() >= node->loop_depth() &&
                !pred_node->traversing())
                continue;
            // call the same function
            traverse_loop(pred_node, loop_header, traversal_number, update_inc);
        }
    }
}

int mark_blocks_in_loop(Cfg_Node *header, unsigned marker) 
{
    int num_blocks = 0;
    int i;
    for (i = 0; i < header->in_edge_size(); i++) 
    {
        //if (header->in_edges(i)->traversing()) // is it a tail node?
        if (is_in_loop(header->in_edges(i),header))
        {
            int child_blocks = traverse_loop_mark_block(header->in_edges(i), header, marker);
            if (child_blocks >= 0) num_blocks += child_blocks;
            else return -1; // propagate -1 upward
#ifdef _DEBUG
            header->in_edges(i)->set_traversing(true);
#endif
        }
    }
    return num_blocks;
}

int traverse_loop_mark_block(Cfg_Node *node, Cfg_Node *loop_header, unsigned short traversal_number) 
{
    if (node->latest_traversal == traversal_number) return 0;
    node->latest_traversal = traversal_number;
    assert(is_in_loop(node,loop_header));
    if (node->eh_out_edge()!=NULL) return -1;
    if (node == loop_header) return 1;
    unsigned num_blocks = 1; // count myself
    int i;
    for (i = 0; i < node->in_edge_size(); i++)
    {
        int child_blocks = traverse_loop_mark_block(node->in_edges(i), loop_header, traversal_number);
        if (child_blocks >= 0) num_blocks += child_blocks; // plus children blocks
        else return -1; // propagate -1 upward
    }
    return num_blocks;
}

////////////////////////////////////////
// dominates: checks if 'this' node 
//            dominates passed node.
////////////////////////////////////////
int Cfg_Node::dominates(Cfg_Node *node) {
    if (node == NULL)
        return 0;
    if (this == node)
        return 1;
    return dominates(node->idom);
}

//////////////////////////////////////////////////////////////////////
// basic functions taken directly from tarjan's dominator algorithm
//////////////////////////////////////////////////////////////////////
void compress(int b, int* ancestor, int* semi, int* label) {
	if (ancestor[ancestor[b]] != -1) { // means still unvisited.
		compress(ancestor[b], ancestor, semi, label);
		// if anc comes soone in df ordering then label = anc label.
		if (semi[label[ancestor[b]]] < semi[label[b]]) {
			label[b] = label[ancestor[b]];
		}
		ancestor[b] = ancestor[ancestor[b]];
	}
}

int eval(int b, int* ancestor, int* semi, int* label) {
	if (ancestor[b] == -1) return b;	// means unvisited, leave alone.
	compress(b, ancestor, semi, label); // otherwise compress.
	return label[b];
}
