// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o1_jit/jcfg.cpp,v 1.2 2001/08/13 09:59:58 xhshi Exp $
//


#include "defines.h"
#ifndef MAX_VARIABLES_TO_ALLOCATE
// Set to -1 to disable the limit.
#define MAX_VARIABLES_TO_ALLOCATE 16
#endif // MAX_VARIABLES_TO_ALLOCATE

#include "regalloc.h"

#if (1 || REG_ALLOC_METHOD == REG_ALLOC_CHOW)

#include <iostream.h>
#include <assert.h>
#include "jcfg.h"
#include "Mem_Manager.h"
#include "jit_intf.h"
#include "bit_vector.h"
#include "jit.h"

#include "x86.h"

static const int reg_mapping[] = { -1, -1, -1, 0, -1, 1, 2, 3 };

// Returns the number of bits set in a byte, by doing a fast lookup
// into a 256-element table.
inline static int num_bits_set(unsigned char c)
{
  static unsigned char table[256] =
  {
    0+0,0+1,0+1,0+2,0+1,0+2,0+2,0+3,0+1,0+2,0+2,0+3,0+2,0+3,0+3,0+4,
    1+0,1+1,1+1,1+2,1+1,1+2,1+2,1+3,1+1,1+2,1+2,1+3,1+2,1+3,1+3,1+4,
    1+0,1+1,1+1,1+2,1+1,1+2,1+2,1+3,1+1,1+2,1+2,1+3,1+2,1+3,1+3,1+4,
    2+0,2+1,2+1,2+2,2+1,2+2,2+2,2+3,2+1,2+2,2+2,2+3,2+2,2+3,2+3,2+4,
    1+0,1+1,1+1,1+2,1+1,1+2,1+2,1+3,1+1,1+2,1+2,1+3,1+2,1+3,1+3,1+4,
    2+0,2+1,2+1,2+2,2+1,2+2,2+2,2+3,2+1,2+2,2+2,2+3,2+2,2+3,2+3,2+4,
    2+0,2+1,2+1,2+2,2+1,2+2,2+2,2+3,2+1,2+2,2+2,2+3,2+2,2+3,2+3,2+4,
    3+0,3+1,3+1,3+2,3+1,3+2,3+2,3+3,3+1,3+2,3+2,3+3,3+2,3+3,3+3,3+4,
    1+0,1+1,1+1,1+2,1+1,1+2,1+2,1+3,1+1,1+2,1+2,1+3,1+2,1+3,1+3,1+4,
    2+0,2+1,2+1,2+2,2+1,2+2,2+2,2+3,2+1,2+2,2+2,2+3,2+2,2+3,2+3,2+4,
    2+0,2+1,2+1,2+2,2+1,2+2,2+2,2+3,2+1,2+2,2+2,2+3,2+2,2+3,2+3,2+4,
    3+0,3+1,3+1,3+2,3+1,3+2,3+2,3+3,3+1,3+2,3+2,3+3,3+2,3+3,3+3,3+4,
    2+0,2+1,2+1,2+2,2+1,2+2,2+2,2+3,2+1,2+2,2+2,2+3,2+2,2+3,2+3,2+4,
    3+0,3+1,3+1,3+2,3+1,3+2,3+2,3+3,3+1,3+2,3+2,3+3,3+2,3+3,3+3,3+4,
    3+0,3+1,3+1,3+2,3+1,3+2,3+2,3+3,3+1,3+2,3+2,3+3,3+2,3+3,3+3,3+4,
    4+0,4+1,4+1,4+2,4+1,4+2,4+2,4+3,4+1,4+2,4+2,4+3,4+2,4+3,4+3,4+4
  };

  return table[c];
}

inline static int lowest_bit_set(unsigned char c)
{
  static char table[256] =
  {
    -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
     4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  };

  return table[c];
}

// Adds an edge from src to dest.  It deals with splitting basic
// blocks when necessary.  Note: branching to the same instruction
// (i.e., src==dest) shouldn't happen, but we'll treat it as a
// backward branch in any case.
void Jcfg::count_edge(unsigned src, unsigned dest)
{
  unsigned newsrc;

  // Don't try to detect duplicate edges.
  edge_array_size ++;
  assert (src < bc_length);
  assert (dest < bc_length);
#if CFG_FORWARD_EDGES
  summary[src].num_out_edges ++;
#endif // CFG_FORWARD_EDGES
  summary[dest].num_in_edges ++;

  if (src >= dest)  // backward branch
    {
      if (!summary[dest].is_basic_block_start)
        {
          summary[dest].is_basic_block_start = 1;
          basic_block_count ++;
          // We're splitting a basic block, so add an edge from the
          // previous statement.
          summary[dest].num_in_edges ++;
          for (newsrc=dest-1;!summary[newsrc].is_stmt_start;newsrc--)
            ;
#if CFG_FORWARD_EDGES
          summary[newsrc].num_out_edges ++;
#endif // CFG_FORWARD_EDGES
          edge_array_size ++;
        }
    }
  else  // forward branch
    {
      if (!summary[dest].is_basic_block_start)
        {
          summary[dest].is_basic_block_start = 1;
          basic_block_count ++;
        }
    }
}

// Add an edge from the src node to the dest node in the CFG.
void Jcfg::add_edge(unsigned src, unsigned dest)
{
  assert (src < basic_block_count);
  assert (dest < basic_block_count);
#if CFG_FORWARD_EDGES
  forward_edge_array[cfg_node_array[src].cur_out_edge_idx++] = dest;
#endif // CFG_FORWARD_EDGES
  backward_edge_array[cfg_node_array[dest].cur_in_edge_idx++] = src;
}

int Jcfg::find_node(const unsigned char *where)
{
  // Do a binary search to find out which CFG node contains dest.
  int idx;
  int lower = 0;
  int upper = basic_block_count - 1;

  assert(where >= bytecodes);
  assert(where < bytecodes+bc_length);

  idx = basic_block_count / 2;

  while (cfg_node_array[idx].bc_start > where ||
         cfg_node_array[idx].bc_start+cfg_node_array[idx].bc_length <= where)
    {
      assert(lower!=upper && lower!=idx);

      if (cfg_node_array[idx].bc_start > where)  // guess is too high
        {
          upper = idx;
          idx = (lower + upper) / 2;
        }
      else  // guess is too low
        {
          lower = idx;
          idx = (lower + upper + 1) / 2;
        }
    }

  return idx;
}

void Jcfg::add_edge(unsigned src, const unsigned char *dest)
{
  add_edge(src, (unsigned)find_node(dest));
}

void Jcfg::create_summary(const unsigned char *bc,
                          unsigned code_length)
{
  int previous_statement_is_unconditional_branch = 0;
  int previous_statement_falls_through = 0;
  unsigned idx, prev_idx;
  int offset;
  int i;
  int new_idx, default_offset, low, high, n_entries, npairs;

  // Mark the very first statement as the beginning of a new basic block.
  summary[0].is_basic_block_start = 1;
  basic_block_count ++;
  idx = 0;

  while (idx < code_length)
    {
      summary[idx].is_stmt_start = 1;

      // If the current statement is an astore, mark it as a new BB,
      // for the purposes of garbage collection.
      switch (bc[idx])
        {
        case 0x3a:  // astore
        case 0x4b: case 0x4c: case 0x4d: case 0x4e:  // astore_*
          if (!summary[idx].is_basic_block_start)
            {
              summary[idx].is_basic_block_start = 1;
              basic_block_count ++;
            }
          break;
        case 0xc4:  // wide
          if (bc[idx+1] >= 0x4b && bc[idx+1] <= 0x4e)  // astore_*
            if (!summary[idx].is_basic_block_start)
              {
                summary[idx].is_basic_block_start = 1;
                basic_block_count ++;
              }
          break;
        }

      // If previous stmt is unconditional branch, mark new BB.
      if (previous_statement_is_unconditional_branch)
        {
          if (!summary[idx].is_basic_block_start)
            {
              summary[idx].is_basic_block_start = 1;
              basic_block_count ++;
            }
        }
      previous_statement_is_unconditional_branch = 0;

      // If previous statement doesn't affect the control flow, and
      // the current statement is already marked as the beginning of a
      // basic block, then add an edge.
      if (previous_statement_falls_through &&
          summary[idx].is_basic_block_start)
        {
          count_edge(prev_idx,idx);
        }
      previous_statement_falls_through = 0;
      prev_idx = idx;

      // If some kind of branch, then add edges.
      switch (bc[idx])
        {
        case 0x15:      // iload
        case 0x19:      // aload
        case 0x16:      // lload
        case 0x36:      // istore
        case 0x3a:      // astore
        case 0x37:      // lstore
          summary[idx].is_load_or_store = 1;
          previous_statement_falls_through = 1;
          idx += 2;
          break;
        case 0x10:      // bipush
        case 0x12:      // ldc
        case 0x17:      // fload
        case 0x18:      // dload
        case 0x38:      // fstore
        case 0x39:      // dstore
        case 0xbc:      // newarray
        case 0xcb:      // ldc_quick
          previous_statement_falls_through = 1;
          idx += 2;
          break;
        case 0x11:      // sipush
        case 0x13:      // ldc_w
        case 0x14:      // ldc2_w
        case 0xcc:      // ldc_w_quick
        case 0xcd:      // ldc2_w_quick
        case 0xce:      // getfield_quick
        case 0xcf:      // putfield_quick
        case 0xd0:      // getfield2_quick
        case 0xd1:      // putfield2_quick
        case 0xd2:      // getstatic_quick
        case 0xd3:      // putstatic_quick
        case 0xd4:      // getstatic2_quick
        case 0xd5:      // putstatic2_quick
        case 0xb2:      // getstatic
        case 0xb3:      // putstatic
        case 0xb4:      // getfield
        case 0xe3:      // getfield_quick_w
        case 0xb5:      // putfield
        case 0xe4:      // putfield_quick_w
        case 0xc0:      // checkcast
        case 0xe0:      // checkcast_quick
        case 0xc1:      // instanceof
        case 0xe1:      // instanceof_quick
        case 0xbd:      // anewarray
        case 0xde:      // anewarray_quick
        case 0xbb:      // new
        case 0xdd:      // new_quick
          previous_statement_falls_through = 1;
          idx += 3;
          break;
        case 0xd8:      // invokesuper_quick
        case 0xd6:      // invokevirtual_quick
        case 0xe2:      // invokevirtual_quick_w
        case 0xd7:      // invokenonvirtual_quick
        case 0xdb:      // invokevirtualobject_quick
        case 0xb6:      // invokevirtual
        case 0xb7:      // invokespecial
        case 0xb8:      // invokestatic
        case 0xd9:      // invokestatic_quick
          previous_statement_falls_through = 1;
          idx += 3;
          break;
        case 0xb9:      // invokeinterface
        case 0xda:      // invokeinterface_quick
          previous_statement_falls_through = 1;
          idx += 5;
          break;
        case 0xc4:      // wide
          if (bc[idx+1] == 0x84)  // iinc
            {
              previous_statement_falls_through = 1;
              summary[idx].is_load_or_store = 1;
              idx += 6;
            }
          else
            {
              if (bc[idx+1] == 0xa9)  // ret
                previous_statement_is_unconditional_branch = 1;
              else // load/store
                {
                  summary[idx].is_load_or_store = 1;
                  previous_statement_falls_through = 1;
                }
              idx += 4;
            }
          break;
        case 0x99: case 0x9a:   // if{eq,ne,lt,ge,gt,le} int comparisons against zero
        case 0x9b: case 0x9c:
        case 0x9d: case 0x9e:
        case 0x9f: case 0xa0:   // if_icmp{eq,ne,lt,ge,gt,le}
        case 0xa1: case 0xa2:   // integer conditional branch
        case 0xa3: case 0xa4:
        case 0xa5: case 0xa6:   // if_acmp{eq,ne}
        case 0xc6: case 0xc7:   // if(non)null
          offset = JCFG_OFFSET2(bc,idx+1);
          count_edge(idx,idx+offset);  // add an edge to the branch target
          count_edge(idx,idx+3);  // add an edge to the next statement
          idx += 3;
          break;
        case 0xa7:      // goto
          offset = JCFG_OFFSET2(bc, idx+1);
          count_edge(idx,idx+offset);
          previous_statement_is_unconditional_branch = 1;
          idx += 3;
          break;
        case 0xc8:      // goto_w
          offset = JCFG_OFFSET4(bc, idx+1);
          count_edge(idx,idx+offset);
          previous_statement_is_unconditional_branch = 1;
          idx += 5;
          break;
        case 0xa8:      // jsr
          is_jsr_or_ret = 1;
          idx += 3;
          return;  // Failure -- there shouldn't be any jsr's here.
          break;
        case 0xc9:      // jsr_w
          is_jsr_or_ret = 1;
          idx += 5;
          return;  // Failure -- there shouldn't be any jsr's here.
          break;
        case 0xaa: // tableswitch
          previous_statement_is_unconditional_branch = 1;
          new_idx = ((idx + 4) & (~3));
          default_offset = JCFG_OFFSET4(bc,new_idx);
          low = JCFG_OFFSET4(bc,new_idx+4);
          high = JCFG_OFFSET4(bc,new_idx+8);
          new_idx += 12;
          n_entries = high - low + 1;
          count_edge(idx,idx+default_offset);
          for (i=0; i<n_entries; i++)
            {
              offset = JCFG_OFFSET4(bc,new_idx);
              count_edge(idx,idx+offset);
              new_idx += 4;
            }
          idx = new_idx;
          break;
        case 0xab:      // lookupswitch (key match and jump)
          previous_statement_is_unconditional_branch = 1;
          new_idx = ((idx + 4) & (~3));
          default_offset = JCFG_OFFSET4(bc,new_idx);
          count_edge(idx,idx+default_offset);
          npairs = JCFG_OFFSET4(bc,new_idx+4);
          new_idx += 8;
          for (i=0; i<npairs; i++)
            {
              offset = JCFG_OFFSET4(bc,new_idx+4);
              count_edge(idx,idx+offset);
              new_idx += 8;
            }
          idx = new_idx;
          break;
        case 0xc5:      // multianewarray
        case 0xdf:      // multianewarray_quick
          previous_statement_falls_through = 1;
          idx += 4;
          break;
        case 0xa9:      // ret
        case 0xac:      // ireturn
        case 0xae:      // freturn
        case 0xb0:      // areturn
        case 0xad:      // lreturn
        case 0xaf:      // dreturn
        case 0xb1:      // return
        case 0xbf:      // athrow
          previous_statement_is_unconditional_branch = 1;
          idx ++;
          break;
        case 0x1a: case 0x1b: case 0x1c: case 0x1d:  // iload_*
        case 0x1e: case 0x1f: case 0x20: case 0x21:  // lload_*
        case 0x2a: case 0x2b: case 0x2c: case 0x2d:  // aload_*
        case 0x3b: case 0x3c: case 0x3d: case 0x3e:  // istore_*
        case 0x3f: case 0x40: case 0x41: case 0x42:  // lstore_*
        case 0x4b: case 0x4c: case 0x4d: case 0x4e:  // astore_*
          summary[idx].is_load_or_store = 1;
          previous_statement_falls_through = 1;
          idx ++;
          break;
        case 0x84:  // iinc
          summary[idx].is_load_or_store = 1;
          previous_statement_falls_through = 1;
          idx += 3;
          break;
        default:
          previous_statement_falls_through = 1;
          idx ++;
          break;
        }
    }
}

Jcfg::Jcfg(const unsigned char *bc, unsigned code_length, unsigned maxLocals,
           Mem_Manager &m, void *methodHandle, void *classHandle)
{
  unsigned i;
  int j;
  int offset;
  const unsigned char *tbc;
  int new_idx, default_offset, low, high, n_entries, npairs;

  success = 1;  // assume that all will succeed
  mem_manager = &m;
  num_locals = maxLocals;
  bc_length = code_length;
  bytecodes = bc;
  m_handle = methodHandle;
  c_handle = classHandle;

  if (num_locals == 0)
    {
      success = 0;
      return;  // don't bother if there are no locals
    }
  if (method_get_num_handlers(methodHandle) != 0)
    {
      success = 0;
      return;  // don't globally register allocate if there are exceptions
    }
  if (method_get_flags(methodHandle) & ACC_SYNCHRONIZED)
    {
      success = 0;
      return;  // don't globally register allocate if (who knows why)
    }

  edge_array_size = 0;
  is_jsr_or_ret = 0;
  basic_block_count = 0;

  is_long = new(m) Bit_Vector(maxLocals,m);

  // allocate the summary array
  summary = (struct Jcfg_bytecode_info *)
    m.alloc(code_length * sizeof(*summary));

  // clear summary -- should I use a bzero() or something instead?
  for (i=0; i<code_length; i++)
    {
      summary[i].num_in_edges = 0;
#if CFG_FORWARD_EDGES
      summary[i].num_out_edges = 0;
#endif // CFG_FORWARD_EDGES
      summary[i].is_stmt_start = 0;
      summary[i].is_basic_block_start = 0;
      summary[i].is_load_or_store = 0;
    }

  // walk through the byte codes and create the summary information
  create_summary(bc, code_length);
  if (is_jsr_or_ret)
    {
      success = 0;
      return;
    }

  // Allocate the array of flow graph nodes.
  cfg_node_array = (struct Jcfg_node *)
    m.alloc(basic_block_count * sizeof(*cfg_node_array));
  // Allocate the arrays of edges.
#if CFG_FORWARD_EDGES
  forward_edge_array = (int *) m.alloc(edge_array_size * sizeof(int));
#endif // CFG_FORWARD_EDGES
  backward_edge_array = (int *) m.alloc(edge_array_size * sizeof(int));
  // Allocate the reference count array.
  refcount = (int *) m.alloc(maxLocals * sizeof(int));
  for (i=0; i<maxLocals; i++)
    refcount[i] = 0;
  // Allocate the bit vectors for the variables.
  for (i=0; i<basic_block_count; i++)
    {
      cfg_node_array[i].loaded_vars = new(m) Bit_Vector(maxLocals,m);
      cfg_node_array[i].stored_vars = new(m) Bit_Vector(maxLocals,m);
    }

  // Record beginning/end of BBs, count in/out edges for each BB.  The
  // way this works is the following.  Loop through all the byte
  // codes.  When we see the start of a basic block, add all the edges
  // for the last statement encountered.  Then increment the current
  // BB counter.  This assumes that all arrows go from the last
  // statement of the BB to the first statement of the target BB.
  int current_node = -1;
  unsigned last_stmt = 0;
  unsigned current_out_edge_total = 0;
  unsigned current_in_edge_total = 0;
  for (i=0; i<code_length; i++)
    {
      // If we reach the start of a new BB, then last_stmt points to
      // the outgoing information for the previous BB.
      if (summary[i].is_basic_block_start)
        {
          // Don't update "previous BB" info for the first BB.
          if (current_node >= 0)
            {
              cfg_node_array[current_node].bc_end = &bc[last_stmt];
              cfg_node_array[current_node].bc_length =
                &bc[i] - cfg_node_array[current_node].bc_start;
#if CFG_FORWARD_EDGES
              cfg_node_array[current_node].num_out_edges =
                summary[last_stmt].num_out_edges;
              cfg_node_array[current_node].out_edges =
                &forward_edge_array[current_out_edge_total];
              cfg_node_array[current_node].cur_out_edge_idx =
                current_out_edge_total;
              current_out_edge_total +=
                cfg_node_array[current_node].num_out_edges;
#endif // CFG_FORWARD_EDGES
            }
          current_node ++;
          cfg_node_array[current_node].bc_start = &bc[i];
          cfg_node_array[current_node].num_in_edges = summary[i].num_in_edges;
          cfg_node_array[current_node].in_edges =
            &backward_edge_array[current_in_edge_total];
          cfg_node_array[current_node].cur_in_edge_idx =
            current_in_edge_total;
          current_in_edge_total += cfg_node_array[current_node].num_in_edges;
        }

      // Update the load/store bit vectors.  Also, for the variable,
      // update the list of CFG nodes where it is read (this list
      // represents the root nodes for the DFS).
      if (summary[i].is_load_or_store)
        {
          unsigned varnum;
          switch (bc[i])
            {
            case 0x84:  // iinc
              varnum = bc[i+1];
              refcount[varnum] += 2;
              cfg_node_array[current_node].loaded_vars->set(varnum);
              break;
            case 0xc4:  // wide
              switch (bc[i+1])
                {
                case 0x84:  // wide iinc
                  varnum = JCFG_OFFSET2(bc,i+2);
                  refcount[varnum] += 2;
                  cfg_node_array[current_node].loaded_vars->set(varnum);
                  break;
                case 0x15:      // wide iload
                  varnum = JCFG_OFFSET2(bc,i+2);
                  refcount[varnum] ++;
                  cfg_node_array[current_node].loaded_vars->set(varnum);
                  break;
                case 0x16:      // wide lload
                  varnum = JCFG_OFFSET2(bc,i+2);
                  refcount[varnum] ++;
                  cfg_node_array[current_node].loaded_vars->set(varnum);
                  is_long->set(varnum);
                  break;
                case 0x19:      // wide aload
                  varnum = JCFG_OFFSET2(bc,i+2);
                  refcount[varnum] ++;
                  cfg_node_array[current_node].loaded_vars->set(varnum);
                  break;
                case 0x36:      // wide istore
                  varnum = bc[i+1];
                  refcount[varnum] ++;
                  // If the write comes after the first read in the
                  // basic block, then we should treat it as though
                  // the write never happened.  This is because when
                  // we do the DFS for register allocation, we don't
                  // want to stop the DFS in such a node, because the
                  // variable is *not* killed in this basic block.
                  if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                    {
                      cfg_node_array[current_node].stored_vars->set(varnum);
                    }
                  break;
                case 0x37:      // wide lstore
                  varnum = bc[i+1];
                  refcount[varnum] ++;
                  if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                    {
                      cfg_node_array[current_node].stored_vars->set(varnum);
                    }
                  is_long->set(varnum);
                  break;
                case 0x3a:      // wide astore
                  varnum = bc[i+1];
                  refcount[varnum] ++;
                  if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                    {
                      cfg_node_array[current_node].stored_vars->set(varnum);
                    }
                  break;
                }
              break;
            case 0x15:      // iload
              varnum = bc[i+1];
              refcount[varnum] ++;
              cfg_node_array[current_node].loaded_vars->set(varnum);
              break;
            case 0x16:      // lload
              varnum = bc[i+1];
              refcount[varnum] ++;
              cfg_node_array[current_node].loaded_vars->set(varnum);
              is_long->set(varnum);
              break;
            case 0x19:      // aload
              varnum = bc[i+1];
              refcount[varnum] ++;
              cfg_node_array[current_node].loaded_vars->set(varnum);
              break;
            case 0x1a: case 0x1b: case 0x1c: case 0x1d:  // iload_*
              varnum = bc[i] - 0x1a;
              refcount[varnum] ++;
              cfg_node_array[current_node].loaded_vars->set(varnum);
              break;
            case 0x1e: case 0x1f: case 0x20: case 0x21:  // lload_*
              varnum = bc[i] - 0x1e;
              refcount[varnum] ++;
              cfg_node_array[current_node].loaded_vars->set(varnum);
              is_long->set(varnum);
              break;
            case 0x2a: case 0x2b: case 0x2c: case 0x2d:  // aload_*
              varnum = bc[i] - 0x2a;
              refcount[varnum] ++;
              cfg_node_array[current_node].loaded_vars->set(varnum);
              break;
            case 0x36:      // istore
              varnum = bc[i+1];
              refcount[varnum] ++;
              if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                {
                  cfg_node_array[current_node].stored_vars->set(varnum);
                }
              break;
            case 0x37:      // lstore
              varnum = bc[i+1];
              refcount[varnum] ++;
              if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                {
                  cfg_node_array[current_node].stored_vars->set(varnum);
                }
              is_long->set(varnum);
              break;
            case 0x3a:      // astore
              varnum = bc[i+1];
              refcount[varnum] ++;
              if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                {
                  cfg_node_array[current_node].stored_vars->set(varnum);
                }
              break;
            case 0x3b: case 0x3c: case 0x3d: case 0x3e:  // istore_*
              varnum = bc[i] - 0x3b;
              refcount[varnum] ++;
              // See explanation above.
              if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                {
                  cfg_node_array[current_node].stored_vars->set(varnum);
                }
              break;
            case 0x3f: case 0x40: case 0x41: case 0x42:  // lstore_*
              varnum = bc[i] - 0x3f;
              refcount[varnum] ++;
              // See explanation above.
              if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                {
                  cfg_node_array[current_node].stored_vars->set(varnum);
                }
              is_long->set(varnum);
              break;
            case 0x4b: case 0x4c: case 0x4d: case 0x4e:  // astore_*
              varnum = bc[i] - 0x4b;
              refcount[varnum] ++;
              // See explanation above.
              if (!cfg_node_array[current_node].loaded_vars->is_set(varnum))
                {
                  cfg_node_array[current_node].stored_vars->set(varnum);
                }
              break;
            }
        }

      if (summary[i].is_stmt_start)
        last_stmt = i;
    }

  if (current_node >= 0)
    {
      cfg_node_array[current_node].bc_end = &bc[last_stmt];
      cfg_node_array[current_node].bc_length =
        &bc[i] - cfg_node_array[current_node].bc_start;
#if CFG_FORWARD_EDGES
      cfg_node_array[current_node].num_out_edges =
        summary[last_stmt].num_out_edges;
      cfg_node_array[current_node].out_edges =
        &forward_edge_array[current_out_edge_total];
      cfg_node_array[current_node].cur_out_edge_idx = current_out_edge_total;
      current_out_edge_total += cfg_node_array[current_node].num_out_edges;
#endif // CFG_FORWARD_EDGES
    }

  // Final pass: look at the last statement of each basic block, and
  // record the actual edges.  If the last statement is an INVOKE
  // statement, record the live references at the call site for the
  // purposes of garbage collection.
  for (i=0; i<basic_block_count; i++)
    {
      cfg_node_array[i].allocation[0] = -1;
      cfg_node_array[i].allocation[1] = -1;
      cfg_node_array[i].allocation[2] = -1;
      cfg_node_array[i].allocation[3] = -1;
      tbc = cfg_node_array[i].bc_end;
      switch (tbc[0])
        {
        case 0x99: case 0x9a:   // if{eq,ne,lt,ge,gt,le}
        case 0x9b: case 0x9c:
        case 0x9d: case 0x9e:
        case 0x9f: case 0xa0:   // if_icmp{eq,ne,lt,ge,gt,le}
        case 0xa1: case 0xa2:   // integer conditional branch
        case 0xa3: case 0xa4:
        case 0xa5: case 0xa6:   // if_acmp{eq,ne}
        case 0xc6: case 0xc7:   // if(non)null
          offset = JCFG_OFFSET2(tbc,1);
          add_edge(i,tbc+offset);
          add_edge(i,i+1);
          break;
        case 0xa7:      // goto
          offset = JCFG_OFFSET2(tbc,1);
          add_edge(i,tbc+offset);
          break;
        case 0xc8:      // goto_w
          offset = JCFG_OFFSET4(tbc,1);
          add_edge(i,tbc+offset);
          break;
        case 0xaa: // tableswitch
          new_idx = ((tbc - bc + 4) & (~3));
          default_offset = JCFG_OFFSET4(bc,new_idx);
          low = JCFG_OFFSET4(bc,new_idx+4);
          high = JCFG_OFFSET4(bc,new_idx+8);
          new_idx += 12;
          n_entries = high - low + 1;
          add_edge(i,tbc+default_offset);
          for (j=0; j<n_entries; j++)
            {
              offset = JCFG_OFFSET4(bc,new_idx);
              add_edge(i,tbc+offset);
              new_idx += 4;
            }
          break;
        case 0xab:      // lookupswitch (key match and jump)
          new_idx = ((tbc - bc + 4) & (~3));
          default_offset = JCFG_OFFSET4(bc,new_idx);
          add_edge(i,tbc+default_offset);
          npairs = JCFG_OFFSET4(bc,new_idx+4);
          new_idx += 8;
          for (j=0; j<npairs; j++)
            {
              offset = JCFG_OFFSET4(bc,new_idx+4);
              add_edge(i,tbc+offset);
              new_idx += 8;
            }
          break;
        case 0xc4:  // wide
          if (tbc[1] != 0xa9)  // ret
            add_edge(i,i+1);
          break;
        case 0xa9:      // ret
        case 0xac:      // ireturn
        case 0xae:      // freturn
        case 0xb0:      // areturn
        case 0xad:      // lreturn
        case 0xaf:      // dreturn
        case 0xb1:      // return
        case 0xbf:      // athrow
          break;
        default:
          // add a normal edge to the next basic block
          add_edge(i,i+1);
          break;
        }
    }
}

// Right now, this is an O(n^2) insertion sort.
static void sort_refcount(int *idxarray, int *valarray, int size)
{
  int i, j;
  int maxidx;
  int tmp;

  for (i=0; i<size-1; i++)
    {
      // find the index of the largest element in valarray[i..size-1]
      maxidx = i;
      for (j=i+1; j<size; j++)
        {
          if (valarray[j] > valarray[maxidx])
            maxidx = j;
        }
      // swap valarray[maxidx] and valarray[i]
      tmp = idxarray[i];
      idxarray[i] = idxarray[maxidx];
      idxarray[maxidx] = tmp;
      tmp = valarray[i];
      valarray[i] = valarray[maxidx];
      valarray[maxidx] = tmp;
    }
}

// Prepares the CFG for the DFS of all variables.
static void init_dfs_overall(unsigned basic_block_count,
                             struct Jcfg_node *cfg_node_array)
{
  unsigned i;

  for (i=0; i<basic_block_count; i++)
    cfg_node_array[i].registers_available = (1<<esi_reg) | (1<<edi_reg) | (1<<ebp_reg) | (1<<ebx_reg);
}

// Prepares the CFG for the DFS of a single variable.
static void init_dfs_onevar(unsigned basic_block_count,
                            struct Jcfg_node *cfg_node_array)
{
  unsigned i;

  for (i=0; i<basic_block_count; i++)
    cfg_node_array[i].visited = 0;
}

static int ra_dfs_1(unsigned varnum, unsigned basic_block_count,
                    struct Jcfg_node *cfg_node_array, unsigned node,
                    unsigned char &registers)
{
  int i;

  if (cfg_node_array[node].visited)
    return 1;
  cfg_node_array[node].visited = 1;
  registers &= cfg_node_array[node].registers_available;
  if (registers == 0x0)
    {
      return 0;
    }
  if (cfg_node_array[node].stored_vars->is_set(varnum))
    return 1;
  for (i=0; i<cfg_node_array[node].num_in_edges; i++)
    {
      if (!ra_dfs_1(varnum,basic_block_count,cfg_node_array,
                    cfg_node_array[node].in_edges[i],registers))
        return 0;
    }
  return 1;
}

static unsigned int ra_dfs(unsigned varnum, unsigned basic_block_count,
                           struct Jcfg_node *cfg_node_array)
{
  unsigned node;
  int result = 1;
  unsigned char registers = 0xff;

  for (node=0; result && node<basic_block_count; node++)
    {
      if (cfg_node_array[node].visited)
        continue;
      if (cfg_node_array[node].loaded_vars->is_set(varnum) ||
          cfg_node_array[node].stored_vars->is_set(varnum)) // a root
        result = ra_dfs_1(varnum,basic_block_count,cfg_node_array,
                          node,registers);
    }

  return registers;
}

unsigned Jcfg::assign_register(unsigned result, unsigned varnum,
                               unsigned char &rbitvec,
                               unsigned basic_block_count,
                               struct Jcfg_node *cfg_node_array)
{
  unsigned mask, unmask;
  unsigned i;
  unsigned regno;

  regno = lowest_bit_set((unsigned char)result);

  unmask = 1 << regno;
  mask = ~unmask;
  for (i=0; i<basic_block_count; i++)
    {
      if (cfg_node_array[i].visited)
        {
          cfg_node_array[i].registers_available &= (unsigned char) mask;
          cfg_node_array[i].allocation[reg_mapping[regno]] = (short) varnum;
        }
    }

  registers_used |= unmask;
  rbitvec |= unmask;
  var_is_register_allocated->set(varnum);

  return result & mask;
}

// This is slightly trickier than it seems.  If there appears to be no
// register assigned to the variable, and the variable is a single
// word wide, then we should check whether the "previous" variable is
// a long and has a register pair assigned to it.  If so, return the
// higher-numbered register.
//
// Note that if the variable is a long, we return the lower-numbered
// register of the pair.
void Jcfg::fixup_allocation(int varnum)
{
  unsigned char result = register_allocations[varnum];

  if (result == 0)
    {
      if (varnum > 0 && !is_long->is_set(varnum) &&
          is_long->is_set(varnum-1) &&
          (result=register_allocations[varnum-1]) != 0)
        {
          // Clear the least significant set bit.
          result = (result & (result-1));
        }
    }
  else if (is_long->is_set(varnum))
    // Clear all but the least significant set bit.
    result = (result & ~(result-1));

  register_allocations[varnum] = result;
}

/* Register allocation algorithm:

   1. Sort the variables according to reference count, so that we give
   priority to variables with higher reference counts.

   2. Do a DFS through the CFG for each candidate variable.  The roots
   of the DFS are the nodes where the variable is loaded.  The DFS
   stops (and fails) if at any point, the set of available registers
   becomes empty.  The DFS stops (and succeeds) if it runs to
   completion.

   3. For that variable, choose the lowest numbered register or
   registers, in case there's a chance to decrease the number of
   callee saved registers we need to use.

 */
void Jcfg::register_allocate()
{
  int *tmparray;
  unsigned i;
  unsigned varnum;
  unsigned dfs_result;

  /* Clear the register usage bit vectors. */
  registers_used = 0;
  register_allocations = (unsigned char *)mem_manager->alloc(num_locals);
  for (i=0; i<num_locals; i++)
    register_allocations[i] = 0;
  var_is_register_allocated =
    new(*mem_manager) Bit_Vector(num_locals,*mem_manager);

  if (!success)
    return;

  tmparray = (int *)mem_manager->alloc(2 * num_locals * sizeof(int));
  for (i=0; i<num_locals; i++)
    {
      tmparray[i] = i;
      tmparray[i+num_locals] = refcount[i];
    }
  sort_refcount(tmparray,tmparray+num_locals,num_locals);

  init_dfs_overall(basic_block_count,cfg_node_array);
  for (i=0; i<num_locals; i++)
    {
      varnum = tmparray[i];
      if (tmparray[i+num_locals] < 2)
        {
          // Some variables actually have no explicit references.  If
          // this is one, we might as well break out of the loop and
          // stop.
          break;
        }
      init_dfs_onevar(basic_block_count,cfg_node_array);
      dfs_result = ra_dfs(varnum,basic_block_count,cfg_node_array);
      if (!dfs_result)  // there were no registers available for the variable
        continue;
      // If the variable is double-width, it requires 2 registers, so
      // we need to check that the bit vector contains at least two
      // "1" entries.  If it contains only one "1" entry, then it is a
      // power of 2.  If X is a power of 2, then (X & (X-1)) == 0.
      if (is_long->is_set(varnum))
        {
          if ((dfs_result & (dfs_result-1)) == 0)
            {
              continue;  // only 1 register available, so fail
            }
          dfs_result = assign_register(dfs_result,varnum,
                                       register_allocations[varnum],
                                       basic_block_count,cfg_node_array);
        }
      assign_register(dfs_result,varnum,register_allocations[varnum],
                      basic_block_count,cfg_node_array);
    }

  // Do a final pass and fix things up with respect to longs.  Do it
  // in reverse order.
  for (int j=num_locals-1; j>=0; j--)
    {
      fixup_allocation(j);
    }
}

unsigned char Jcfg::callee_save_registers_used()
{
  return success ? registers_used : 0;
}

unsigned char Jcfg::available_registers_in_BB(const unsigned char *addr)
{
  if (!success)
    return 0;

  // Do a binary search to find out which CFG node contains dest.
  int idx;
  int lower = 0;
  int upper = basic_block_count - 1;
  idx = basic_block_count / 2;

  while (cfg_node_array[idx].bc_start > addr ||
         cfg_node_array[idx].bc_start+cfg_node_array[idx].bc_length <= addr)
    {
      if (cfg_node_array[idx].bc_start > addr)  // guess is too high
        {
          upper = idx;
          idx = (lower + upper) / 2;
        }
      else  // guess is too low
        {
          lower = idx;
          idx = (lower + upper + 1) / 2;
        }
    }

  return cfg_node_array[idx].registers_available;
}

unsigned char Jcfg::register_assigned_to_var(int varnum)
{
  return success ? register_allocations[varnum] : 0;
}

// Set the mapping of callee-save registers to local variables for all
// the call sites.  We assume that both the call sites and the control
// flow graph nodes are sorted by bytecode location.
void Jcfg::set_call_site_info(Jit_Method_Info *mi, unsigned size)
{
  int csidx;
  unsigned cfgidx = 0;
  unsigned ui;
  int i;
  Call_Site_Info *cs_info = mi->cs_info;
  int num_call_sites = mi->num_call_sites;
  int offset = size - ((num_locals + 7) / 8);

  assert(num_locals == mi->num_in_args + mi->num_vars);

  // Point vars_register_allocated to the space at the end of mi.
  mi->vars_register_allocated = ((char *) mi) + offset;
  // Clear the space at the end of mi.
  for (ui=offset; ui<size; ui++)
    ((char *)mi)[ui] = 0;
  // Set the values at the end of mi.
  for (ui=0; ui<num_locals; ui++)
    {
      if (register_assigned_to_var(ui))
        {
          mi->vars_register_allocated[ui/8] |= (1 << (ui%8));
        }
    }

  if (success)
    {
      for (csidx=0; csidx<num_call_sites; csidx++)
        {
          if (cs_info[csidx].bc == NULL)
            continue;
          // This code sequence assumes that the cs_info[] array is
          // sorted by increasing bc.  Set cfgidx to 0 here to assume
          // otherwise.
          while (cfg_node_array[cfgidx].bc_start+cfg_node_array[cfgidx].bc_length
                 <= cs_info[csidx].bc)
            cfgidx ++;
          for (i=0; i<4; i++)
            {
              cs_info[csidx].register_mapping[i] = cfg_node_array[cfgidx].allocation[i];
           }
        }
    }
  else
    {
      for (csidx=0; csidx<num_call_sites; csidx++)
        {
          for (i=0; i<4; i++)
            cs_info[csidx].register_mapping[i] = -1;
        }
    }
}

// Returns the variable that the given register is assigned to at the
// given bytecode instruction.
unsigned Jcfg::var_in_register(unsigned reg_no, const unsigned char *where)
{
  int idx = find_node(where);
  return cfg_node_array[idx].allocation[reg_mapping[reg_no]];
}

unsigned Jcfg::var_in_register(unsigned reg_no, int where)
{
  return var_in_register(reg_no,where+bytecodes);
}

// Finds the intersection of all callee-save registers available
// within a given region of the code.  The resulting registers can be
// used within that range for local register allocation, for free.
unsigned char Jcfg::registers_available_in_range(const unsigned char *bc,
                                                 unsigned size, int &hint)
{
  if (!success) return 0;
  int start, end;
  unsigned char result;

  // First, check whether the hint is correct.  The hint is supposed
  // to be the CFG index for the "bc" instruction.
  if (hint >= 0 && (unsigned)hint < basic_block_count &&
      bc >= cfg_node_array[hint].bc_start &&
      bc < cfg_node_array[hint].bc_start+cfg_node_array[hint].bc_length)
    {
      start = hint;
    }
  else
    {
      start = find_node(bc);
    }

  result = (registers_used & cfg_node_array[start].registers_available);
  end = start;
  // Should I check whether "end" ever gets advanced too far?
  while (end < (int)basic_block_count-1 &&
         cfg_node_array[end].bc_start+cfg_node_array[end].bc_length < bc+size)
    {
      end ++;
      result &= cfg_node_array[end].registers_available;
    }

  // Assume that (next_bc == current_bc + size), set hint accordingly.
  if (cfg_node_array[end].bc_start+cfg_node_array[end].bc_length == bc+size)
    {
      hint = end+1;
    }
  else
    {
      hint = end;
    }

  return result;
}

// Same as registers_available_in_range(), except that it searches from
// "bc" until the end of the "classical" basic block.
unsigned char Jcfg::registers_available_in_codegen_bb(const unsigned char *bc, int &hint)
{
  if (!success) return 0;
  unsigned start, end;
  unsigned char result;

  // First, check whether the hint is correct.  The hint is supposed
  // to be the CFG index for the "bc" instruction.
  if (hint >= 0 && (unsigned)hint < basic_block_count &&
      bc >= cfg_node_array[hint].bc_start &&
      bc < cfg_node_array[hint].bc_start+cfg_node_array[hint].bc_length)
    {
      start = hint;
    }
  else
    {
      start = find_node(bc);
    }

  for (end=start; end<basic_block_count-1; end++)
  {
      // Check whether the next basic block is an artifically split one.
      int stop = 0;
      // This switch statement tests whether the first instruction of
      // the next basic block is an astore.
      switch (*cfg_node_array[end+1].bc_start)
      {
      case 0x3a:  // astore
      case 0x4b: case 0x4c: case 0x4d: case 0x4e:  // astore_*
          break;
      case 0xc4:  // wide
          if (cfg_node_array[end+1].bc_start[1] >= 0x4b && cfg_node_array[end+1].bc_start[1] <= 0x4e)  // astore_*
              ; // do nothing
          else
              stop = 1;
          break;
      default:
          stop = 1;
          break;
      }
      if (stop)
          break;

      // Check whether the the next basic block hass exactly one incoming edge.
      if (cfg_node_array[end+1].num_in_edges != 1)
          break;

      // Check whether the next basic block's incoming edge comes from the
      // current basic block.
      if (cfg_node_array[end+1].in_edges[0] != (int)end)
          break;

      // Check whether the last statement of the current basic block is some sort
      // of a branch.  If it is, then we can stop.
      switch (*cfg_node_array[end].bc_end)
      {
        case 0x99: case 0x9a:   // if{eq,ne,lt,ge,gt,le} int comparisons against zero
        case 0x9b: case 0x9c:
        case 0x9d: case 0x9e:
        case 0x9f: case 0xa0:   // if_icmp{eq,ne,lt,ge,gt,le}
        case 0xa1: case 0xa2:   // integer conditional branch
        case 0xa3: case 0xa4:
        case 0xa5: case 0xa6:   // if_acmp{eq,ne}
        case 0xc6: case 0xc7:   // if(non)null
        case 0xa7:      // goto
        case 0xc8:      // goto_w
        case 0xa8:      // jsr
        case 0xc9:      // jsr_w
        case 0xaa: // tableswitch
        case 0xab:      // lookupswitch (key match and jump)
            stop = 1;
          break;
        default:
            break;
      }
      if (stop)
          break;
  }

  result = registers_used;
  for (; start<=end; start++)
  {
      result &= cfg_node_array[start].registers_available;
  }

  hint = end+1;
  return result;
}

int Jcfg::var_is_live_on_entry(int varnum)
{
	int reg_no = lowest_bit_set(register_assigned_to_var(varnum));

	if (reg_no < 0)
		return 0;

	if (cfg_node_array[0].allocation[reg_mapping[reg_no]] == varnum)
		return 1;

	if (varnum == 0) // can't be the second half of a long
		return 0;

	varnum --;
	if (!is_long->is_set(varnum))
		return 0;

	reg_no = lowest_bit_set(register_assigned_to_var(varnum));
	if (reg_no < 0)
		return 0;

	if (cfg_node_array[0].allocation[reg_mapping[reg_no]] == varnum)
		return 1;

	return 0;
}

#else // REG_ALLOC_METHOD
#ifdef LOCAL_CALLEE
#error Check project settings -- LOCAL_CALLEE should only be defined if REG_ALLOC_METHOD is set to REG_ALLOC_CHOW in RegAlloc.h
#endif // LOCAL_CALLEE

#endif // REG_ALLOC_METHOD
