// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/bytecode_opt.cpp,v 1.2 2001/08/13 09:54:55 xhshi Exp $
//


#include "defines.h"
#include "flow_graph.h"
#include "bytecode_opt.h"
#include "bit_vector.h"
#include "jit_common.h"

size_t search_backward_for_xor_1(Byte *bytecode, size_t curr_pos, int load_value, 
                                 Bit_Vector &bv)
{
    if (load_value < 0) return 0;
    unsigned curr_state = 0;
    size_t return_pos = 0;
    while (curr_pos > 0)
    {
        if (!bv.is_set(curr_pos))
        {
            curr_pos--;
            continue;
        }
        if (bytecode[curr_pos]>=0x99 && bytecode[curr_pos]<=0xc9 && bytecode[curr_pos]!=0xc4)
            break; // reach a control flow instruction
        if (curr_state==0 && bytecode[curr_pos]==0x82)
        {
            curr_state = 1;
        }
        else if (curr_state==1 && bytecode[curr_pos]==0x04)
        {
            curr_state = 2;
            return_pos = curr_pos;
        }
        else if (curr_state==2 && bytecode[curr_pos]==0x15)
        {
            if (bytecode[curr_pos+1]==load_value)
                return return_pos;
        }
        else if (curr_state==2 && bytecode[curr_pos]>=0x1a && bytecode[curr_pos]<=0x1d)
        {
            if (bytecode[curr_pos]-0x1a==load_value)
                return return_pos;
        }
        else
        {
            return_pos = 0;
            curr_state = 0;
        }
        curr_pos--;
    }
    return 0;
}

extern bool O3_bc_opt;
void bytecode_optimization(Byte *bytecode_addr, size_t bytecode_size, size_t &max_stack, size_t max_locals)
{
    if (!O3_bc_opt)
        return;
    bool inc_stk_size = false;
    unsigned curr_state = 0;
    size_t curr_pos = 0;
    size_t mark_addr = 0;
    size_t jump1_addr = 0;
    size_t jump2_addr = 0;
    Mem_Manager mem((bytecode_size+max_locals)<<1); // should be large enough
    Bit_Vector bytecode_mark(bytecode_size,mem,false);
    Bit_Vector local_zero_one(max_locals, mem, false);
    Bit_Vector local_dirty(max_locals, mem, false);
    while (curr_pos < bytecode_size)
    {
        bytecode_mark.set(curr_pos);
        unsigned inst_len = instruction_length(bytecode_addr,curr_pos);
        if (curr_state==0 && bytecode_addr[curr_pos]==0x99)  // ifeq <branch bytes>
        {
            mark_addr = curr_pos;
            assert(inst_len==3);
            jump1_addr = curr_pos + (((char *)bytecode_addr)[curr_pos+1]<<8 | bytecode_addr[curr_pos+2]);
            curr_state=1;
        }
        else if (curr_state==1 && bytecode_addr[curr_pos]==0x03) // iconst_0
        {
            assert(inst_len==1);
            curr_state=2;
        }
        else if (curr_state==2 && bytecode_addr[curr_pos]==0xa7) // goto <branch bytes>
        {
            assert(inst_len==3);
            jump2_addr = curr_pos + (((char *)bytecode_addr)[curr_pos+1]<<8 | bytecode_addr[curr_pos+2]);
            curr_state=3;
        }
        else if (curr_state==3 && curr_pos==jump1_addr && bytecode_addr[curr_pos]==0x04) // iconst_1
        {
            curr_state=4;
        }
        else if (curr_state==4 && curr_pos==jump2_addr)
        {
#ifdef TRACE_O3
            cout << "Detecting XOR opportunity between " <<
                mark_addr << " and " << jump2_addr << "..." << endl;
#endif // TRACE_O3
            size_t old_xor_addr = 0;
            int search_value = -1;
            if (bytecode_addr[mark_addr-2]==0x15 &&
                bytecode_mark.is_set(mark_addr-2))
                search_value = bytecode_addr[mark_addr-1];
            else if (bytecode_addr[mark_addr-1]>=0x1a && bytecode_addr[mark_addr-1]<=0x1d &&
                bytecode_mark.is_set(mark_addr-1))
                search_value = bytecode_addr[mark_addr-1] - 0x1a;
            if (search_value >=0 && 
                local_zero_one.is_set(search_value) &&
                !local_dirty.is_set(search_value)) // init value is 0 or 1
            {
                if ((old_xor_addr = search_backward_for_xor_1(bytecode_addr,mark_addr-2,search_value,bytecode_mark)) != 0)
                {
                    bytecode_addr[old_xor_addr] = 0x0;
                    bytecode_addr[old_xor_addr+1] = 0x0;
                    bytecode_mark.reset(old_xor_addr);
                    bytecode_mark.reset(old_xor_addr+1);
                    bytecode_addr[mark_addr] = 0x0;
                    bytecode_addr[mark_addr+1] = 0x0;
                    bytecode_mark.reset(mark_addr);
                    bytecode_mark.reset(mark_addr+1);
                }
                else
                {
                    bytecode_addr[mark_addr] = 0x04; // iconst_1
                    bytecode_addr[mark_addr+1] = 0x82; // ixor
                    bytecode_mark.set(mark_addr);
                    bytecode_mark.set(mark_addr+1);
                    inc_stk_size = true;
                }
                size_t i;
                for (i = mark_addr+2; i < curr_pos; i++)
                {
                    bytecode_addr[i] = 0x0; // nop
                    bytecode_mark.reset(i);
                }
#ifdef TRACE_O3
                cout << "Bytecodes modified" << endl;
#endif // TRACE_O3
            }
        }
        else if (bytecode_addr[curr_pos]==0x36 || // istore
                 (bytecode_addr[curr_pos]>=0x3b && bytecode_addr[curr_pos]<=0x3e))
        {
            unsigned store_index;
            if (bytecode_addr[curr_pos]==0x36)
            {
                store_index = bytecode_addr[curr_pos+1];
                assert(!bytecode_mark.is_set(curr_pos+1));
            }
            else
                store_index = bytecode_addr[curr_pos] - 0x3b;
            if (bytecode_mark.is_set(curr_pos-1) &&
                (bytecode_addr[curr_pos-1]==0x3 || bytecode_addr[curr_pos-1]==0x4))
                local_zero_one.set(store_index);
            else
                local_dirty.set(store_index);
            curr_state = 0;
        }
        else
            curr_state = 0;

        curr_pos += inst_len;
    }
    if (inc_stk_size)
        max_stack ++;
}
