issue #169 Update to latest MIR codebase and set default opt level to 2

arrays
Dibyendu Majumdar 4 years ago
parent 6500e11c01
commit 23141174d4

@ -3,4 +3,5 @@ mkdir buildllvm
cd buildllvm
#cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM/share/llvm/cmake ..
#cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM5/lib/cmake/llvm ..
cmake -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm801/lib/cmake/llvm ..
#cmake -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm801/lib/cmake/llvm ..
cmake3 -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm10/lib/cmake/llvm ..

@ -12179,6 +12179,12 @@ static void init_include_dirs (MIR_context_t ctx) {
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/x86_64-linux-gnu");
#elif defined(__linux__) && defined(__aarch64__)
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/aarch64-linux-gnu");
#elif defined(__linux__) && defined(__PPC64__)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/powerpc64le-linux-gnu");
#else
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/powerpc64-linux-gnu");
#endif
#endif
#if defined(__APPLE__) || defined(__unix__)
VARR_PUSH (char_ptr_t, system_headers, "/usr/include");

@ -7,6 +7,13 @@ static char ppc64_mirc[]
"#define _ARCH_PPC64 1\n"
"#define _LP64 1\n"
"#define __LP64__ 1\n"
"#define __powerpc64__ 1\n"
"#define __powerpc__ 1\n"
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
"#define _CALL_ELF 1\n"
#else
"#define _CALL_ELF 2\n"
#endif
"\n"
"#define __LONG_DOUBLE_128__ 1\n" // ???
"#define __SIZEOF_DOUBLE__ 8\n"
@ -20,10 +27,15 @@ static char ppc64_mirc[]
"#define __SIZEOF_SHORT__ 2\n"
"#define __SIZEOF_SIZE_T__ 8\n"
"\n"
"#define _BIG_ENDIAN 1\n" // ??? Implement LE too
"#define __ORDER_LITTLE_ENDIAN__ 1234\n"
"#define __ORDER_BIG_ENDIAN__ 4321\n"
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
"#define _BIG_ENDIAN 1\n"
"#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__\n"
#else
"#define _LITTLE_ENDIAN 1\n"
"#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__\n"
#endif
"\n"
"/* Some GCC predefined macros: */\n"
"#define __SIZE_TYPE__ unsigned long\n"

File diff suppressed because it is too large Load Diff

@ -72,12 +72,21 @@ static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
|---------------|
| slots for | dynamically allocated/deallocated by caller
| passing args |
|---------------|
| spill space | WIN64 only, 32 bytes spill space for register args
|---------------|
size of slots and saved regs is multiple of 16 bytes
*/
#ifndef _WIN64
static const int reg_save_area_size = 176;
static const int spill_space_size = 0;
#else
static const int reg_save_area_size = 0;
static const int spill_space_size = 32;
#endif
static MIR_disp_t target_get_stack_slot_offset (MIR_context_t ctx, MIR_type_t type,
MIR_reg_t slot) {
@ -156,14 +165,14 @@ static MIR_reg_t get_arg_reg (MIR_type_t arg_type, size_t *int_arg_num, size_t *
arg_reg = MIR_NON_HARD_REG;
*mov_code = MIR_LDMOV;
} else if (arg_type == MIR_T_F || arg_type == MIR_T_D) {
arg_reg = get_fp_arg_reg(*fp_arg_num);
arg_reg = get_fp_arg_reg (*fp_arg_num);
(*fp_arg_num)++;
#ifdef _WIN64
(*int_arg_num)++; /* arg slot used by fp, skip int register */
#endif
*mov_code = arg_type == MIR_T_F ? MIR_FMOV : MIR_DMOV;
} else {
arg_reg = get_int_arg_reg(*int_arg_num);
arg_reg = get_int_arg_reg (*int_arg_num);
#ifdef _WIN64
(*fp_arg_num)++; /* arg slot used by int, skip fp register */
#endif
@ -178,7 +187,7 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
MIR_func_t func = curr_func_item->u.func;
MIR_proto_t proto = call_insn->ops[0].u.ref->u.proto;
size_t nargs, nops = MIR_insn_nops (ctx, call_insn), start = proto->nres + 2;
size_t int_arg_num = 0, fp_arg_num = 0, mem_size = 0, xmm_args = 0;
size_t int_arg_num = 0, fp_arg_num = 0, xmm_args = 0, mem_size = spill_space_size;
MIR_type_t type, mem_type;
MIR_op_mode_t mode;
MIR_var_t *arg_vars = NULL;
@ -198,9 +207,6 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
nargs = VARR_LENGTH (MIR_var_t, proto->args);
arg_vars = VARR_ADDR (MIR_var_t, proto->args);
}
#ifdef _WIN64
if (nargs > 4 || proto->vararg_p) mem_size = 32; /* spill space for register args */
#endif
if (call_insn->ops[1].mode != MIR_OP_REG && call_insn->ops[1].mode != MIR_OP_HARD_REG) {
temp_op = MIR_new_reg_op (ctx, gen_new_temp_reg (ctx, MIR_T_I64, func));
new_insn = MIR_new_insn (ctx, MIR_MOV, temp_op, call_insn->ops[1]);
@ -275,6 +281,10 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
MIR_new_int_op (ctx, xmm_args));
gen_add_insn_before (ctx, call_insn, new_insn);
}
#else
if (proto->nres > 1)
(*MIR_get_error_func (ctx)) (MIR_ret_error,
"Windows x86-64 doesn't support multiple return values");
#endif
n_iregs = n_xregs = n_fregs = 0;
for (size_t i = 0; i < proto->nres; i++) {
@ -458,13 +468,13 @@ static void target_machinize (MIR_context_t ctx) {
MIR_insn_t insn, next_insn, new_insn;
MIR_reg_t ret_reg, arg_reg;
MIR_op_t ret_reg_op, arg_reg_op, mem_op;
size_t i, int_arg_num, fp_arg_num, mem_size;
size_t i, int_arg_num = 0, fp_arg_num = 0, mem_size = spill_space_size;
assert (curr_func_item->item_type == MIR_func_item);
func = curr_func_item->u.func;
stack_arg_func_p = FALSE;
start_sp_from_bp_offset = 8;
for (i = int_arg_num = fp_arg_num = mem_size = 0; i < func->nargs; i++) {
for (i = 0; i < func->nargs; i++) {
/* Argument extensions is already done in simplify */
/* Prologue: generate arg_var = hard_reg|stack mem ... */
type = VARR_GET (MIR_var_t, func->vars, i).type;
@ -520,6 +530,7 @@ static void target_machinize (MIR_context_t ctx) {
= MIR_new_reg_op (ctx, gen_new_temp_reg (ctx, MIR_T_I64, curr_func_item->u.func));
MIR_op_t va_op = insn->ops[0];
MIR_reg_t va_reg;
#ifndef _WIN64
int gp_offset = 0, fp_offset = 48;
MIR_var_t var;
@ -549,10 +560,30 @@ static void target_machinize (MIR_context_t ctx) {
MIR_new_int_op (ctx, -reg_save_area_size));
gen_add_insn_before (ctx, insn, new_insn);
gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 16, va_reg, 0, 1), treg_op);
#else
stack_arg_func_p = TRUE;
/* spill reg args */
mem_size = 8 /*ret*/ + start_sp_from_bp_offset;
for (int i = 0; i < 4; i++) {
arg_reg = get_int_arg_reg (i);
mem_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, mem_size, FP_HARD_REG, MIR_NON_HARD_REG, 1);
new_insn = MIR_new_insn (ctx, MIR_MOV, mem_op, _MIR_new_hard_reg_op (ctx, arg_reg));
gen_add_insn_before (ctx, insn, new_insn);
mem_size += 8;
}
/* init va_list */
mem_size = 8 /*ret*/ + start_sp_from_bp_offset + func->nargs * 8;
new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, _MIR_new_hard_reg_op (ctx, FP_HARD_REG),
MIR_new_int_op (ctx, mem_size));
gen_add_insn_before (ctx, insn, new_insn);
va_reg = va_op.mode == MIR_OP_REG ? va_op.u.reg : va_op.u.hard_reg;
gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 0, va_reg, 0, 1), treg_op);
#endif
gen_delete_insn (ctx, insn);
} else if (code == MIR_VA_END) { /* do nothing */
gen_delete_insn (ctx, insn);
} else if (code == MIR_VA_ARG) { /* do nothing */
#ifndef _WIN64
/* Use a builtin func call:
mov func_reg, func ref; mov flag_reg, <0|1>; call proto, func_reg, res_reg, va_reg,
flag_reg */
@ -578,6 +609,19 @@ static void target_machinize (MIR_context_t ctx) {
ops[4] = flag_reg_op;
new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
gen_add_insn_before (ctx, insn, new_insn);
#else
MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], mem_op = insn->ops[2], treg_op;
assert (res_reg_op.mode == MIR_OP_REG && va_reg_op.mode == MIR_OP_REG
&& mem_op.mode == MIR_OP_MEM);
/* load and increment va pointer */
treg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (ctx, MIR_T_I64, curr_func_item->u.func));
gen_mov (ctx, insn, MIR_MOV, treg_op, MIR_new_mem_op (ctx, MIR_T_I64, 0, va_reg_op.u.reg, 0, 1));
new_insn = MIR_new_insn (ctx, MIR_MOV, res_reg_op, treg_op);
gen_add_insn_before (ctx, insn, new_insn);
new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, treg_op, MIR_new_int_op (ctx, 8));
gen_add_insn_before (ctx, insn, new_insn);
gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 0, va_reg_op.u.reg, 0, 1), treg_op);
#endif
gen_delete_insn (ctx, insn);
} else if (MIR_call_code_p (code)) {
machinize_call (ctx, insn);
@ -589,6 +633,11 @@ static void target_machinize (MIR_context_t ctx) {
and added extension in return (if any). */
uint32_t n_iregs = 0, n_xregs = 0, n_fregs = 0;
#ifdef _WIN64
if (curr_func_item->u.func->nres > 1)
(*MIR_get_error_func (ctx)) (MIR_ret_error,
"Windows x86-64 doesn't support multiple return values");
#endif
assert (curr_func_item->u.func->nres == MIR_insn_nops (ctx, insn));
for (size_t i = 0; i < curr_func_item->u.func->nres; i++) {
assert (insn->ops[i].mode == MIR_OP_REG);
@ -683,7 +732,8 @@ static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_reg
func = curr_func_item->u.func;
for (i = saved_hard_regs_num = 0; i <= MAX_HARD_REG; i++)
if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) saved_hard_regs_num++;
if (leaf_p && !alloca_p && saved_hard_regs_num == 0 && !func->vararg_p && stack_slots_num == 0)
if (leaf_p && !alloca_p && !stack_arg_func_p && saved_hard_regs_num == 0 && !func->vararg_p
&& stack_slots_num == 0)
return;
sp_reg_op.mode = fp_reg_op.mode = MIR_OP_HARD_REG;
sp_reg_op.u.hard_reg = SP_HARD_REG;
@ -702,6 +752,7 @@ static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_reg
service_area_size = 8;
} else {
service_area_size = reg_save_area_size + 8;
#ifndef _WIN64
start = -(int64_t) service_area_size;
isave (ctx, anchor, start, DI_HARD_REG);
isave (ctx, anchor, start + 8, SI_HARD_REG);
@ -717,6 +768,7 @@ static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_reg
dsave (ctx, anchor, start + 128, XMM5_HARD_REG);
dsave (ctx, anchor, start + 144, XMM6_HARD_REG);
dsave (ctx, anchor, start + 160, XMM7_HARD_REG);
#endif
}
stack_slots_size = stack_slots_num * 8;
/* stack slots, and saved regs as multiple of 16 bytes: */

@ -4455,6 +4455,7 @@ static void assign (MIR_context_t ctx) {
for (loc = 0; loc <= func_stack_slots_num + MAX_HARD_REG; loc++) {
if (loc <= MAX_HARD_REG && !target_hard_reg_type_ok_p (loc, type)) continue;
slots_num = target_locs_num (loc, type);
if (loc + slots_num - 1 > func_stack_slots_num + MAX_HARD_REG) break;
for (k = 0; k < slots_num; k++)
if ((loc + k <= MAX_HARD_REG
&& (target_fixed_hard_reg_p (loc + k)
@ -4462,7 +4463,7 @@ static void assign (MIR_context_t ctx) {
|| bitmap_bit_p (conflict_locs, loc + k))
break;
if (k < slots_num) continue;
if (loc > MAX_HARD_REG && loc % slots_num != 0)
if (loc > MAX_HARD_REG && (loc - MAX_HARD_REG - 1) % slots_num != 0)
continue; /* we align stack slots according to the type size */
profit = (VARR_GET (size_t, loc_profit_ages, loc) != curr_age
? 0
@ -4478,12 +4479,12 @@ static void assign (MIR_context_t ctx) {
for (k = 0; k < slots_num; k++) bitmap_set_bit_p (func_assigned_hard_regs, best_loc + k);
} else if (best_loc == MIR_NON_HARD_REG) { /* Add stack slot ??? */
for (k = 0; k < slots_num; k++) {
best_loc = VARR_LENGTH (size_t, loc_profits);
if (k == 0) best_loc = VARR_LENGTH (size_t, loc_profits);
VARR_PUSH (size_t, loc_profits, 0);
VARR_PUSH (size_t, loc_profit_ages, 0);
if (k == 0 && (best_loc - MAX_HARD_REG - 1) % slots_num != 0) k--; /* align */
}
func_stack_slots_num = best_loc - MAX_HARD_REG;
best_loc -= slots_num - 1;
func_stack_slots_num = VARR_LENGTH (size_t, loc_profits) - MAX_HARD_REG - 1;
}
#if !MIR_NO_GEN_DEBUG
if (debug_file != NULL) {
@ -5259,6 +5260,7 @@ static void combine (MIR_context_t ctx) {
}
}
if (change_p) block_change_p = TRUE;
if (code == MIR_BSTART || code == MIR_BEND) last_mem_ref_insn_num = curr_insn_num;
}
for (iter = 0; iter < 2; iter++) { /* update hreg ref info: */

@ -5,25 +5,38 @@
// _MIR_get_thunk, _MIR_redirect_thunk, _MIR_get_interp_shim, _MIR_get_ff_call, _MIR_get_wrapper
#define VA_LIST_IS_ARRAY_P 1 /* one element which is a pointer to args */
#define FUNC_DESC_LEN 24
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define PPC64_STACK_HEADER_SIZE 32
#define PPC64_TOC_OFFSET 24
#define PPC64_FUNC_DESC_LEN 0
#else
#define PPC64_STACK_HEADER_SIZE 48
#define PPC64_TOC_OFFSET 40
#define PPC64_FUNC_DESC_LEN 24
#endif
static void ppc64_push_func_desc (MIR_context_t ctx);
void (*ppc64_func_desc) (MIR_context_t ctx) = ppc64_push_func_desc;
static void ppc64_push_func_desc (MIR_context_t ctx) {
VARR_TRUNC (uint8_t, machine_insns, 0);
for (int i = 0; i < FUNC_DESC_LEN; i++)
for (int i = 0; i < PPC64_FUNC_DESC_LEN; i++)
VARR_PUSH (uint8_t, machine_insns, ((uint8_t *) ppc64_func_desc)[i]);
}
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
static void ppc64_redirect_func_desc (MIR_context_t ctx, void *desc, void *to) {
mir_assert (((uint64_t) desc & 0x3) == 0 && ((uint64_t) to & 0x3) == 0); /* alignment */
_MIR_change_code (ctx, desc, (uint8_t *) &to, sizeof (to));
}
#endif
static void *ppc64_publish_func_and_redirect (MIR_context_t ctx) {
void *res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
ppc64_redirect_func_desc (ctx, res, (uint8_t *) res + FUNC_DESC_LEN);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ppc64_redirect_func_desc (ctx, res, (uint8_t *) res + PPC64_FUNC_DESC_LEN);
#endif
return res;
}
@ -37,70 +50,6 @@ static void push_insns (MIR_context_t ctx, const uint32_t *pat, size_t pat_len)
for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
}
void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
static const uint32_t bstart_code[] = {
0x7c230b78, /* mr 3,1 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
push_insns (ctx, bstart_code, sizeof (bstart_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
static const uint32_t bend_code[] = {
0xe8010000, /* ld r0,0(r1) */
0xf8030000, /* std r0,0(r3) */
0xe8010028, /* ld r0,40(r1) */
0xf8030028, /* std r0,40(r3) */
0x7c611b78, /* mr r1,r3 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
push_insns (ctx, bend_code, sizeof (bend_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_thunk (MIR_context_t ctx) { /* emit 3 doublewords for func descriptor: */
ppc64_push_func_desc (ctx);
return ppc64_publish_func_and_redirect (ctx);
}
void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
ppc64_redirect_func_desc (ctx, thunk, to);
}
struct ppc64_va_list {
uint64_t *arg_area;
};
void *va_arg_builtin (void *p, uint64_t t) {
struct ppc64_va_list *va = p;
MIR_type_t type = t;
int fp_p = type == MIR_T_F || type == MIR_T_D;
void *a = va->arg_area;
if (type == MIR_T_F || type == MIR_T_I32) {
a = (char *) a + 4; /* 2nd word of doubleword */
va->arg_area = (uint64_t *) ((char *) a + 4);
} else if (type == MIR_T_LD) {
va->arg_area += 2;
} else {
va->arg_area++;
}
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct ppc64_va_list **va = p;
va_list *vap = a;
assert (sizeof (struct ppc64_va_list) == sizeof (va_list));
*va = (struct ppc64_va_list *) vap;
}
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
static void ppc64_gen_mov (MIR_context_t ctx, unsigned to, unsigned from) {
/* or to,from,from: */
push_insn (ctx, (31 << 26) | (444 << 1) | (from << 21) | (to << 16) | (from << 11));
@ -155,12 +104,103 @@ static void ppc64_gen_address (MIR_context_t ctx, unsigned int reg, void *p) {
}
static void ppc64_gen_jump (MIR_context_t ctx, unsigned int reg, int call_p) {
ppc64_gen_ld (ctx, 0, reg, 0, MIR_T_I64); /* 0 = func addr */
ppc64_gen_ld (ctx, 2, reg, 8, MIR_T_I64); /* r2 = TOC */
push_insn (ctx, (31 << 26) | (467 << 1) | (0 << 21) | (9 << 16)); /* mctr 0 */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
assert (reg != 0);
ppc64_gen_ld (ctx, 0, reg, 0, MIR_T_I64); /* 0 = func addr */
ppc64_gen_ld (ctx, 2, reg, 8, MIR_T_I64); /* r2 = TOC */
push_insn (ctx, (31 << 26) | (467 << 1) | (0 << 21) | (9 << 16)); /* mctr 0 */
#else
if (reg != 12) ppc64_gen_mov (ctx, 12, reg); /* 12 = func addr */
push_insn (ctx, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
#endif
push_insn (ctx, (19 << 26) | (528 << 1) | (20 << 21) | (call_p ? 1 : 0)); /* bcctr[l] */
}
void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
static const uint32_t bstart_code[] = {
0x7c230b78, /* mr 3,1 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
push_insns (ctx, bstart_code, sizeof (bstart_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
static const uint32_t bend_finish_code[] = {
0x7c611b78, /* mr r1,r3 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
ppc64_gen_ld (ctx, 0, 1, 0, MIR_T_I64); /* r0 = 0(r1) */
ppc64_gen_st (ctx, 0, 3, 0, MIR_T_I64); /* 0(r3) = r0 */
ppc64_gen_ld (ctx, 0, 1, PPC64_TOC_OFFSET, MIR_T_I64); /* r0 = toc_offset(r1) */
ppc64_gen_st (ctx, 0, 3, PPC64_TOC_OFFSET, MIR_T_I64); /* toc_offset(r3) = r0 */
push_insns (ctx, bend_finish_code, sizeof (bend_finish_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_thunk (MIR_context_t ctx) { /* emit 3 doublewords for func descriptor: */
ppc64_push_func_desc (ctx);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return ppc64_publish_func_and_redirect (ctx);
#else
const uint32_t nop_insn = 24 << (32 - 6); /* ori 0,0,0 */
const int max_thunk_len = (7 * 8);
VARR_TRUNC (uint8_t, machine_insns, 0);
for (int i = 0; i < max_thunk_len; i++) push_insn (ctx, nop_insn);
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
#endif
}
void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ppc64_redirect_func_desc (ctx, thunk, to);
#else
static const uint32_t global_entry_end[] = {
0x7d8903a6, /* mtctr r12 */
0x4e800420, /* bctr */
};
VARR_TRUNC (uint8_t, machine_insns, 0);
ppc64_gen_address (ctx, 12, to);
push_insns (ctx, global_entry_end, sizeof (global_entry_end));
_MIR_change_code (ctx, thunk, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
#endif
}
struct ppc64_va_list {
uint64_t *arg_area;
};
void *va_arg_builtin (void *p, uint64_t t) {
struct ppc64_va_list *va = p;
MIR_type_t type = t;
int fp_p = type == MIR_T_F || type == MIR_T_D;
void *a = va->arg_area;
if (type == MIR_T_LD) {
va->arg_area += 2;
} else {
va->arg_area++;
}
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
if (type == MIR_T_F || type == MIR_T_I32) a = (char *) a + 4; /* 2nd word of doubleword */
#endif
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct ppc64_va_list **va = p;
va_list *vap = a;
assert (sizeof (struct ppc64_va_list) == sizeof (va_list));
*va = (struct ppc64_va_list *) vap;
}
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
/* Generation: fun (fun_addr, res_arg_addresses):
save lr (r1 + 16); allocate and form minimal stack frame (with necessary param area); save r14;
r12=fun_addr (r3); r14 = res_arg_addresses (r4);
@ -186,18 +226,19 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
ppc64_push_func_desc (ctx);
for (uint32_t i = 0; i < nargs; i++) param_size += arg_types[i] == MIR_T_LD ? 16 : 8;
if (param_size < 64) param_size = 64;
frame_size = 48 + param_size + 8; /* +local var to save res_reg */
if (frame_size % 8 != 0) frame_size += 8; /* align */
ppc64_gen_st (ctx, 2, 1, 40, MIR_T_I64);
frame_size = PPC64_STACK_HEADER_SIZE + param_size + 8; /* +local var to save res_reg */
if (frame_size % 16 != 0) frame_size += 8; /* align */
ppc64_gen_st (ctx, 2, 1, PPC64_TOC_OFFSET, MIR_T_I64);
push_insns (ctx, start_pattern, sizeof (start_pattern));
ppc64_gen_stdu (ctx, -frame_size);
ppc64_gen_st (ctx, res_reg, 1, 48 + param_size, MIR_T_I64); /* save res_reg */
ppc64_gen_st (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
MIR_T_I64); /* save res_reg */
mir_assert (sizeof (long double) == 16);
ppc64_gen_mov (ctx, res_reg, 4); /* results & args */
ppc64_gen_mov (ctx, 12, 3); /* func addr */
n_gpregs = n_fpregs = 0;
param_offset = nres * 16; /* args start */
disp = 48; /* param area start */
disp = PPC64_STACK_HEADER_SIZE; /* param area start */
for (uint32_t i = 0; i < nargs; i++) { /* load args: */
type = arg_types[i];
if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 13) {
@ -205,7 +246,7 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
if (vararg_p) {
if (n_gpregs >= 8) {
ppc64_gen_st (ctx, 1 + n_fpregs, 1, disp, MIR_T_D);
} else { /* load gp reg to */
} else { /* load into gp reg too */
ppc64_gen_st (ctx, 1 + n_fpregs, 1, -8, MIR_T_D);
ppc64_gen_ld (ctx, 3 + n_gpregs, 1, -8, MIR_T_I64);
}
@ -228,8 +269,6 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
}
}
} else if (n_gpregs < 8) {
ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, param_offset, MIR_T_I64);
} else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
ppc64_gen_ld (ctx, 0, res_reg, param_offset, type);
ppc64_gen_st (ctx, 0, 1, disp, MIR_T_D);
@ -237,6 +276,8 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
ppc64_gen_ld (ctx, 0, res_reg, param_offset + 8, type);
ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
}
} else if (n_gpregs < 8) {
ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, param_offset, MIR_T_I64);
} else {
ppc64_gen_ld (ctx, 0, res_reg, param_offset, MIR_T_I64);
ppc64_gen_st (ctx, 0, 1, disp, MIR_T_I64);
@ -267,7 +308,8 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
}
disp += 16;
}
ppc64_gen_ld (ctx, res_reg, 1, 48 + param_size, MIR_T_I64); /* restore res_reg */
ppc64_gen_ld (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
MIR_T_I64); /* restore res_reg */
ppc64_gen_addi (ctx, 1, 1, frame_size);
push_insns (ctx, finish_pattern, sizeof (finish_pattern));
return ppc64_publish_func_and_redirect (ctx);
@ -296,23 +338,14 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
0x7c0803a6, /* mtlr r0 */
0x4e800020, /* blr */
};
static uint32_t save_gp_regs_pattern[] = {
0xf8610030, /* std r3,48(r1) */
0xf8810038, /* std r4,56(r1) */
0xf8a10040, /* std r5,64(r1) */
0xf8c10048, /* std r6,72(r1) */
0xf8e10050, /* std r7,80(r1) */
0xf9010058, /* std r8,88(r1) */
0xf9210060, /* std r9,96(r1) */
0xf9410068, /* std r10,104(r1) */
};
VARR_TRUNC (uint8_t, machine_insns, 0);
frame_size = 112; /* 6(frame start) + 8(param area) */
local_var_size = nres * 16 + 8; /* saved r14, results */
frame_size = PPC64_STACK_HEADER_SIZE + 64; /* header + 8(param area) */
local_var_size = nres * 16 + 8; /* saved r14, results */
if (vararg_p) {
push_insns (ctx, save_gp_regs_pattern, sizeof (save_gp_regs_pattern));
ppc64_gen_addi (ctx, va_reg, 1, 48);
for (unsigned reg = 3; reg <= 10; reg++) /* std rn,dispn(r1) : */
ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8, MIR_T_I64);
ppc64_gen_addi (ctx, va_reg, 1, PPC64_STACK_HEADER_SIZE);
} else {
ppc64_gen_mov (ctx, caller_r1, 1); /* caller frame r1 */
for (uint32_t i = 0; i < nargs; i++) {
@ -321,14 +354,15 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
}
}
frame_size += local_var_size;
if (frame_size % 8 != 0) frame_size += 8; /* align */
if (frame_size % 16 != 0) frame_size += 8; /* align */
push_insns (ctx, start_pattern, sizeof (start_pattern));
ppc64_gen_stdu (ctx, -frame_size);
ppc64_gen_st (ctx, res_reg, 1, 48 + 64, MIR_T_I64); /* save res_reg */
if (!vararg_p) { /* save args in local vars: */
disp = 112 + nres * 16 + 8; /* 48 + 64 + nres * 16 + 8: start of local vars to keep args */
ppc64_gen_st (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* save res_reg */
if (!vararg_p) { /* save args in local vars: */
/* header_size + 64 + nres * 16 + 8 -- start of stack memory to keep args: */
disp = PPC64_STACK_HEADER_SIZE + 64 + nres * 16 + 8;
ppc64_gen_addi (ctx, va_reg, 1, disp);
param_offset = 48;
param_offset = PPC64_STACK_HEADER_SIZE;
n_gpregs = n_fpregs = 0;
for (uint32_t i = 0; i < nargs; i++) {
type = arg_vars[i].type;
@ -363,13 +397,13 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
n_gpregs += type == MIR_T_LD ? 2 : 1;
}
}
ppc64_gen_addi (ctx, res_reg, 1, 64 + 48 + 8);
ppc64_gen_addi (ctx, res_reg, 1, 64 + PPC64_STACK_HEADER_SIZE + 8);
ppc64_gen_address (ctx, 3, ctx);
ppc64_gen_address (ctx, 4, func_item);
ppc64_gen_mov (ctx, 5, va_reg);
ppc64_gen_mov (ctx, 6, res_reg);
ppc64_gen_address (ctx, 7, handler);
ppc64_gen_jump (ctx, 7, TRUE);
ppc64_gen_address (ctx, 12, handler);
ppc64_gen_jump (ctx, 12, TRUE);
disp = n_gpregs = n_fpregs = 0;
for (uint32_t i = 0; i < nres; i++) {
type = res_types[i];
@ -390,78 +424,50 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
}
disp += 16;
}
ppc64_gen_ld (ctx, res_reg, 1, 48 + 64, MIR_T_I64); /* restore res_reg */
ppc64_gen_ld (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* restore res_reg */
ppc64_gen_addi (ctx, 1, 1, frame_size);
push_insns (ctx, finish_pattern, sizeof (finish_pattern));
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
/* Brief: save lr (r1+16); update r1, save all param regs (r1+112);
/* Brief: save lr (r1+16); update r1, save all param regs (r1+header+64);
allocate and form minimal wrapper stack frame (param area = 8*8);
r3 = call hook_address (ctx, called_func);
restore params regs (r1+112), r1, lr (r1+16); ctr=r11; b *ctr */
r3 = call hook_address (ctx, called_func); r12=r3
restore params regs (r1+header+64), r1, lr (r1+16); ctr=r12; b *ctr */
void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
static uint32_t prologue[] = {
0x7c0802a6, /* mflr r0 */
0xf8010010, /* std r0,16(r1) */
0xf821fee9, /* stdu r1,-280(r1): 6(frame start) + 8(gp args) + 13(fp args) + 8(param area) */
0xf8610070, /* std r3,112(r1) */
0xf8810078, /* std r4,120(r1) */
0xf8a10080, /* std r5,128(r1) */
0xf8c10088, /* std r6,136(r1) */
0xf8e10090, /* std r7,144(r1) */
0xf9010098, /* std r8,152(r1) */
0xf92100a0, /* std r9,160(r1) */
0xf94100a8, /* std r10,168(r1) */
0xd82100b0, /* stfd f1,176(r1) */
0xd84100b8, /* stfd f2,184(r1) */
0xd86100c0, /* stfd f3,192(r1) */
0xd88100c8, /* stfd f4,200(r1) */
0xd8a100d0, /* stfd f5,208(r1) */
0xd8c100d8, /* stfd f6,216(r1) */
0xd8e100e0, /* stfd f7,224(r1) */
0xd90100e8, /* stfd f8,232(r1) */
0xd92100f0, /* stfd f9,240(r1) */
0xd94100f8, /* stfd f10,248(r1) */
0xd9610100, /* stfd f11,256(r1) */
0xd9810108, /* stfd f12,264(r1) */
0xd9a10110, /* stfd f13,272(r1) */
};
static uint32_t epilogue[] = {
0xe8610070, /* ld r3,112(r1) */
0xe8810078, /* ld r4,120(r1) */
0xe8a10080, /* ld r5,128(r1) */
0xe8c10088, /* ld r6,136(r1) */
0xe8e10090, /* ld r7,144(r1) */
0xe9010098, /* ld r8,152(r1) */
0xe92100a0, /* ld r9,160(r1) */
0xe94100a8, /* ld r10,168(r1) */
0xc82100b0, /* lfd f1,176(r1) */
0xc84100b8, /* lfd f2,184(r1) */
0xc86100c0, /* lfd f3,192(r1) */
0xc88100c8, /* lfd f4,200(r1) */
0xc8a100d0, /* lfd f5,208(r1) */
0xc8c100d8, /* lfd f6,216(r1) */
0xc8e100e0, /* lfd f7,224(r1) */
0xc90100e8, /* lfd f8,232(r1) */
0xc92100f0, /* lfd f9,240(r1) */
0xc94100f8, /* lfd f10,248(r1) */
0xc9610100, /* lfd f11,256(r1) */
0xc9810108, /* lfd f12,264(r1) */
0xc9a10110, /* lfd f13,272(r1) */
0x38210118, /* addi r1,r1,280 */
0xe8010010, /* ld r0,16(r1) */
0x7c0803a6, /* mtlr r0 */
};
int frame_size = PPC64_STACK_HEADER_SIZE + 8 * 8 + 13 * 8 + 8 * 8;
VARR_TRUNC (uint8_t, machine_insns, 0);
push_insns (ctx, prologue, sizeof (prologue));
/* stdu r1,n(r1): header + 8(gp args) + 13(fp args) + 8(param area): */
if (frame_size % 16 != 0) frame_size += 8;
ppc64_gen_stdu (ctx, -frame_size);
for (unsigned reg = 3; reg <= 10; reg++) /* std rn,dispn(r1) : */
ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
for (unsigned reg = 1; reg <= 13; reg++) /* stfd fn,dispn(r1) : */
ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
ppc64_gen_address (ctx, 3, ctx);
ppc64_gen_address (ctx, 4, called_func);
ppc64_gen_address (ctx, 5, hook_address);
ppc64_gen_jump (ctx, 5, TRUE);
ppc64_gen_mov (ctx, 11, 3);
ppc64_gen_address (ctx, 12, hook_address);
ppc64_gen_jump (ctx, 12, TRUE);
ppc64_gen_mov (ctx, 12, 3);
for (unsigned reg = 3; reg <= 10; reg++) /* ld rn,dispn(r1) : */
ppc64_gen_ld (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
for (unsigned reg = 1; reg <= 13; reg++) /* lfd fn,dispn(r1) : */
ppc64_gen_ld (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
ppc64_gen_addi (ctx, 1, 1, frame_size);
push_insns (ctx, epilogue, sizeof (epilogue));
ppc64_gen_jump (ctx, 11, FALSE);
push_insn (ctx, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
push_insn (ctx, (19 << 26) | (528 << 1) | (20 << 21)); /* bcctr */
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}

@ -13,13 +13,20 @@ void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
static const uint8_t bend_code[] = {
#ifndef _WIN64
0x48, 0x8b, 0x04, 0x24, /* rax = (rsp) */
0x48, 0x89, 0xfc, /* rsp = rdi */
0xff, 0xe0, /* jmp *rax */
#else
0x48, 0x8b, 0x04, 0x24, /* rax = (rsp) */
0x48, 0x89, 0xcc, /* rsp = rcx */
0xff, 0xe0, /* jmp *rax */
#endif
};
return _MIR_publish_code (ctx, bend_code, sizeof (bend_code));
}
#ifndef _WIN64
struct x86_64_va_list {
uint32_t gp_offset, fp_offset;
uint64_t *overflow_arg_area, *reg_save_area;
@ -51,6 +58,26 @@ void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
*va = *(struct x86_64_va_list *) vap;
}
#else
struct x86_64_va_list {
uint64_t *arg_area;
};
void *va_arg_builtin (void *p, uint64_t t) {
struct x86_64_va_list *va = p;
void *a = va->arg_area;
va->arg_area++;
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct x86_64_va_list **va = p;
va_list *vap = a;
assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
*va = (struct x86_64_va_list *) vap;
}
#endif
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
@ -70,6 +97,7 @@ void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
}
static const uint8_t save_pat[] = {
#ifndef _WIN64
0x48, 0x81, 0xec, 0x80, 0, 0, 0, /*sub $0x80,%rsp */
0xf3, 0x0f, 0x7f, 0x04, 0x24, /*movdqu %xmm0,(%rsp) */
0xf3, 0x0f, 0x7f, 0x4c, 0x24, 0x10, /*movdqu %xmm1,0x10(%rsp) */
@ -85,9 +113,16 @@ static const uint8_t save_pat[] = {
0x52, /*push %rdx */
0x56, /*push %rsi */
0x57, /*push %rdi */
#else
0x48, 0x89, 0x4c, 0x24, 0x08, /*mov %rcx,0x08(%rsp) */
0x48, 0x89, 0x54, 0x24, 0x10, /*mov %rdx,0x10(%rsp) */
0x4c, 0x89, 0x44, 0x24, 0x18, /*mov %r8, 0x18(%rsp) */
0x4c, 0x89, 0x4c, 0x24, 0x20, /*mov %r9, 0x20(%rsp) */
#endif
};
static const uint8_t restore_pat[] = {
#ifndef _WIN64
0x5f, /*pop %rdi */
0x5e, /*pop %rsi */
0x5a, /*pop %rdx */
@ -103,6 +138,16 @@ static const uint8_t restore_pat[] = {
0xf3, 0x0f, 0x6f, 0x74, 0x24, 0x60, /*movdqu 0x60(%rsp),%xmm6 */
0xf3, 0x0f, 0x6f, 0x7c, 0x24, 0x70, /*movdqu 0x70(%rsp),%xmm7 */
0x48, 0x81, 0xc4, 0x80, 0, 0, 0, /*add $0x80,%rsp */
#else
0x48, 0x8b, 0x4c, 0x24, 0x08, /*mov 0x08(%rsp),%rcx */
0x48, 0x8b, 0x54, 0x24, 0x10, /*mov 0x10(%rsp),%rdx */
0x4c, 0x8b, 0x44, 0x24, 0x18, /*mov 0x18(%rsp),%r8 */
0x4c, 0x8b, 0x4c, 0x24, 0x20, /*mov 0x20(%rsp),%r9 */
0xf3, 0x0f, 0x7e, 0x44, 0x24, 0x08, /*movq 0x08(%rsp),%xmm0*/
0xf3, 0x0f, 0x7e, 0x4c, 0x24, 0x10, /*movq 0x10(%rsp),%xmm1*/
0xf3, 0x0f, 0x7e, 0x54, 0x24, 0x18, /*movq 0x18(%rsp),%xmm2*/
0xf3, 0x0f, 0x7e, 0x5c, 0x24, 0x20, /*movq 0x20(%rsp),%xmm3*/
#endif
};
static uint8_t *push_insns (MIR_context_t ctx, const uint8_t *pat, size_t pat_len) {
@ -174,13 +219,22 @@ static void gen_st80 (MIR_context_t ctx, uint32_t src_offset) {
void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
MIR_type_t *arg_types, int vararg_p) {
static const uint8_t prolog[] = {
#ifndef _WIN64
0x53, /* pushq %rbx */
0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
0x49, 0x89, 0xfb, /* mov $rdi, $r11 -- fun addr */
0x48, 0x89, 0xf3, /* mov $rsi, $rbx -- result/arg addresses */
#else
0x53, /* pushq %rbx */
0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
0x49, 0x89, 0xcb, /* mov $rcx, $r11 -- fun addr */
0x48, 0x89, 0xd3, /* mov $rdx, $rbx -- result/arg addresses */
#endif
};
static const uint8_t call_end[] = {
#ifndef _WIN64
0x48, 0xc7, 0xc0, 0x08, 0, 0, 0, /* mov $8, rax -- to save xmm varargs */
#endif
0x41, 0xff, 0xd3, /* callq *%r11 */
0x48, 0x81, 0xc4, 0, 0, 0, 0, /* addq <sp_offset>, %rsp */
};
@ -188,24 +242,38 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
0x5b, /* pop %rbx */
0xc3, /* ret */
};
#ifndef _WIN64
static const uint8_t iregs[] = {7, 6, 2, 1, 8, 9}; /* rdi, rsi, rdx, rcx, r8, r9 */
uint32_t n_iregs = 0, n_xregs = 0, n_fregs, sp_offset = 0;
static const uint32_t max_iregs = 6, max_xregs = 8;
uint32_t sp_offset = 0;
#else
static const uint8_t iregs[] = {1, 2, 8, 9}; /* rcx, rdx, r8, r9 */
static const uint32_t max_iregs = 4, max_xregs = 4;
uint32_t sp_offset = 32;
#endif
uint32_t n_iregs = 0, n_xregs = 0, n_fregs;
uint8_t *addr;
VARR_TRUNC (uint8_t, machine_insns, 0);
push_insns (ctx, prolog, sizeof (prolog));
for (size_t i = 0; i < nargs; i++) {
if ((MIR_T_I8 <= arg_types[i] && arg_types[i] <= MIR_T_U64) || arg_types[i] == MIR_T_P) {
if (n_iregs < 6) {
if (n_iregs < max_iregs) {
gen_mov (ctx, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
#ifdef _WIN64
n_xregs++;
#endif
} else {
gen_ldst (ctx, sp_offset, (i + nres) * sizeof (long double), TRUE);
sp_offset += 8;
}
} else if (arg_types[i] == MIR_T_F || arg_types[i] == MIR_T_D) {
if (n_xregs < 8) {
if (n_xregs < max_xregs) {
gen_movxmm (ctx, (i + nres) * sizeof (long double), n_xregs++, arg_types[i] == MIR_T_F,
TRUE);
#ifdef _WIN64
gen_mov (ctx, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
#endif
} else {
gen_ldst (ctx, sp_offset, (i + nres) * sizeof (long double), arg_types[i] == MIR_T_D);
sp_offset += 8;
@ -221,7 +289,11 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
addr = VARR_ADDR (uint8_t, machine_insns);
memcpy (addr + 4, &sp_offset, sizeof (uint32_t));
addr = push_insns (ctx, call_end, sizeof (call_end));
memcpy (addr + 13, &sp_offset, sizeof (uint32_t));
memcpy (addr + sizeof (call_end) - 4, &sp_offset, sizeof (uint32_t));
#ifdef _WIN64
if (nres > 1)
(*error_func) (MIR_call_op_error, "Windows x86-64 doesn't support multiple return values");
#endif
n_iregs = n_xregs = n_fregs = 0;
for (size_t i = 0; i < nres; i++) {
if (((MIR_T_I8 <= res_types[i] && res_types[i] <= MIR_T_U64) || res_types[i] == MIR_T_P)
@ -245,6 +317,7 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
static const uint8_t push_rbx[] = {0x53, /*push %rbx */};
static const uint8_t prepare_pat[] = {
#ifndef _WIN64
/* 0: */ 0x48, 0x83, 0xec, 0x20, /* sub 32,%rsp */
/* 4: */ 0x48, 0x89, 0xe2, /* mov %rsp,%rdx */
/* 7: */ 0xc7, 0x02, 0, 0, 0, 0, /* movl 0,(%rdx) */
@ -261,10 +334,33 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
/* 4a: */ 0x48, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <handler>,%rax */
/* 54: */ 0xff, 0xd0, /* callq *%rax */
};
static const uint32_t nres_offset = 0x2c;
static const uint32_t ctx_offset = 0x38;
static const uint32_t func_offset = 0x42;
static const uint32_t hndl_offset = 0x4c;
static const uint32_t prep_stack_size = 208;
#else
/* 0: */ 0x4c, 0x8d, 0x44, 0x24, 0x08, /* lea 8(%rsp),%r8 */
/* 5: */ 0x53, /* push %rbx */
/* 6: */ 0x48, 0x81, 0xec, 0, 0, 0, 0, /* sub <n>,%rsp */
/* d: */ 0x48, 0x89, 0xe3, /* mov %rsp,%rbx */
/* 10: */ 0x49, 0x89, 0xe1, /* mov %rsp,%r9 */
/* 13: */ 0x48, 0x83, 0xec, 0x20, /* sub 32,%rsp */
/* 17: */ 0x48, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <ctx>,%rcx */
/* 21: */ 0x48, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <func_item>,%rdx*/
/* 2b: */ 0x48, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <handler>,%rax */
/* 35: */ 0xff, 0xd0, /* callq *%rax */
};
static const uint32_t nres_offset = 0x09;
static const uint32_t ctx_offset = 0x19;
static const uint32_t func_offset = 0x23;
static const uint32_t hndl_offset = 0x2d;
static const uint32_t prep_stack_size = 32;
#endif
static const uint8_t shim_end[] = {
/* 0: */ 0x48, 0x81, 0xc4, 0, 0, 0, 0, /*add 208+n,%rsp*/
/* 7: */ 0x5b, /*pop %rbx*/
/* 8: */ 0xc3, /*retq */
/* 0: */ 0x48, 0x81, 0xc4, 0, 0, 0, 0, /*add prep_stack_size+n,%rsp*/
/* 7: */ 0x5b, /*pop %rbx*/
/* 8: */ 0xc3, /*retq */
};
static const uint8_t ld_pat[] = {0x48, 0x8b, 0x83, 0, 0, 0, 0}; /* mov <offset>(%rbx), %reg */
static const uint8_t movss_pat[]
@ -279,15 +375,21 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
MIR_type_t *results = func_item->u.func->res_types;
VARR_TRUNC (uint8_t, machine_insns, 0);
#ifndef _WIN64
push_insns (ctx, push_rbx, sizeof (push_rbx));
#endif
push_insns (ctx, save_pat, sizeof (save_pat));
addr = push_insns (ctx, prepare_pat, sizeof (prepare_pat));
imm = nres * 16;
memcpy (addr + 0x2c, &imm, sizeof (uint32_t));
memcpy (addr + 0x38, &ctx, sizeof (void *));
memcpy (addr + 0x42, &func_item, sizeof (void *));
memcpy (addr + 0x4c, &handler, sizeof (void *));
memcpy (addr + nres_offset, &imm, sizeof (uint32_t));
memcpy (addr + ctx_offset, &ctx, sizeof (void *));
memcpy (addr + func_offset, &func_item, sizeof (void *));
memcpy (addr + hndl_offset, &handler, sizeof (void *));
/* move results: */
#ifdef _WIN64
if (nres > 1)
(*error_func) (MIR_call_op_error, "Windows x86-64 doesn't support multiple return values");
#endif
n_iregs = n_xregs = n_fregs = offset = 0;
for (uint32_t i = 0; i < nres; i++) {
if (results[i] == MIR_T_F && n_xregs < 2) {
@ -316,7 +418,7 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
offset += 16;
}
addr = push_insns (ctx, shim_end, sizeof (shim_end));
imm = 208 + nres * 16;
imm = prep_stack_size + nres * 16;
memcpy (addr + 3, &imm, sizeof (uint32_t));
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
@ -327,20 +429,36 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
static const uint8_t push_rax[] = {0x50, /*push %rax */};
static const uint8_t wrap_end[] = {
#ifndef _WIN64
0x58, /*pop %rax */
#endif
0x41, 0xff, 0xe2, /*jmpq *%r10 */
};
static const uint8_t call_pat[] = {
#ifndef _WIN64
0x48, 0xbe, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs called_func,%rsi */
0x48, 0xbf, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs ctx,%rdi */
0x49, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs <hook_address>,%r10 */
0x41, 0xff, 0xd2, /*callq *%r10 */
0x49, 0x89, 0xc2, /*mov %rax,%r10 */
#else
0x48, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs called_func,%rdx */
0x48, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs ctx,%rcx */
0x49, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs <hook_address>,%r10*/
0x50, /*push %rax */
0x48, 0x83, 0xec, 0x20, /*sub 32,%rsp */
0x41, 0xff, 0xd2, /*callq *%r10 */
0x49, 0x89, 0xc2, /*mov %rax,%r10 */
0x48, 0x83, 0xc4, 0x20, /*add 32,%rsp */
0x58, /*pop %rax */
#endif
};
uint8_t *addr;
VARR_TRUNC (uint8_t, machine_insns, 0);
#ifndef _WIN64
push_insns (ctx, push_rax, sizeof (push_rax));
#endif
push_insns (ctx, save_pat, sizeof (save_pat));
addr = push_insns (ctx, call_pat, sizeof (call_pat));
memcpy (addr + 2, &called_func, sizeof (void *));

@ -947,6 +947,13 @@ MIR_item_t MIR_new_bss (MIR_context_t ctx, const char *name, size_t len) {
return item;
}
static MIR_type_t canon_type (MIR_type_t type) {
#if __SIZEOF_LONG_DOUBLE__ == 8
if (type == MIR_T_LD) type = MIR_T_D;
#endif
return type;
}
size_t _MIR_type_size (MIR_context_t ctx, MIR_type_t type) {
switch (type) {
case MIR_T_I8: return sizeof (int8_t);
@ -986,7 +993,7 @@ MIR_item_t MIR_new_data (MIR_context_t ctx, const char *name, MIR_type_t el_type
free (item);
item = tab_item;
}
data->el_type = el_type;
data->el_type = canon_type (el_type);
data->nel = nel;
memcpy (data->u.els, els, el_len * nel);
return item;
@ -1148,7 +1155,7 @@ static MIR_item_t new_func_arr (MIR_context_t ctx, const char *name, size_t nres
= string_store (ctx, &strings, &string_tab, (MIR_str_t){strlen (name) + 1, name}).str.s;
func->nres = nres;
func->res_types = (MIR_type_t *) ((char *) func + sizeof (struct MIR_func));
memcpy (func->res_types, res_types, nres * sizeof (MIR_type_t));
for (size_t i = 0; i < nres; i++) func->res_types[i] = canon_type (res_types[i]);
tab_item = add_item (ctx, func_item);
mir_assert (tab_item == func_item);
DLIST_INIT (MIR_insn_t, func->insns);
@ -1161,7 +1168,7 @@ static MIR_item_t new_func_arr (MIR_context_t ctx, const char *name, size_t nres
func->n_inlines = 0;
func->machine_code = func->call_addr = NULL;
for (size_t i = 0; i < nargs; i++) {
MIR_type_t type = vars[i].type;
MIR_type_t type = canon_type (vars[i].type);
VARR_PUSH (MIR_var_t, func->vars, vars[i]);
create_func_reg (ctx, func, vars[i].name, i + 1,
@ -1694,6 +1701,35 @@ static MIR_insn_t create_insn (MIR_context_t ctx, size_t nops, MIR_insn_code_t c
if (nops == 0) nops = 1;
insn = malloc (sizeof (struct MIR_insn) + sizeof (MIR_op_t) * (nops - 1));
if (insn == NULL) (*error_func) (MIR_alloc_error, "Not enough memory for insn creation");
#if __SIZEOF_LONG_DOUBLE__ == 8
switch (code) {
case MIR_LDMOV: code = MIR_DMOV; break;
case MIR_I2LD: code = MIR_I2D; break;
case MIR_UI2LD: code = MIR_UI2D; break;
case MIR_LD2I: code = MIR_D2I; break;
case MIR_F2LD: code = MIR_F2D; break;
case MIR_D2LD: code = MIR_DMOV; break;
case MIR_LD2F: code = MIR_D2F; break;
case MIR_LD2D: code = MIR_DMOV; break;
case MIR_LDNEG: code = MIR_DNEG; break;
case MIR_LDADD: code = MIR_DADD; break;
case MIR_LDSUB: code = MIR_DSUB; break;
case MIR_LDMUL: code = MIR_DMUL; break;
case MIR_LDDIV: code = MIR_DDIV; break;
case MIR_LDEQ: code = MIR_DEQ; break;
case MIR_LDNE: code = MIR_DNE; break;
case MIR_LDLT: code = MIR_DLT; break;
case MIR_LDLE: code = MIR_DLE; break;
case MIR_LDGT: code = MIR_DGT; break;
case MIR_LDGE: code = MIR_DGE; break;
case MIR_LDBEQ: code = MIR_DBEQ; break;
case MIR_LDBNE: code = MIR_DBNE; break;
case MIR_LDBLT: code = MIR_DBLT; break;
case MIR_LDBLE: code = MIR_DBLE; break;
case MIR_LDBGT: code = MIR_DBGT; break;
case MIR_LDBGE: code = MIR_DBGE; break;
}
#endif
insn->code = code;
insn->data = NULL;
return insn;
@ -1901,6 +1937,9 @@ MIR_op_t MIR_new_double_op (MIR_context_t ctx, double d) {
MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double ld) {
MIR_op_t op;
#if __SIZEOF_LONG_DOUBLE__ == 8
return MIR_new_double_op (ctx, ld);
#endif
mir_assert (sizeof (long double) == 16); /* machine-defined 80- or 128-bit FP */
init_op (&op, MIR_OP_LDOUBLE);
op.u.ld = ld;
@ -1928,7 +1967,7 @@ MIR_op_t MIR_new_mem_op (MIR_context_t ctx, MIR_type_t type, MIR_disp_t disp, MI
MIR_op_t op;
init_op (&op, MIR_OP_MEM);
op.u.mem.type = type;
op.u.mem.type = canon_type (type);
op.u.mem.disp = disp;
op.u.mem.base = base;
op.u.mem.index = index;
@ -3244,6 +3283,14 @@ static code_holder_t *get_last_code_holder (MIR_context_t ctx, size_t size) {
return VARR_ADDR (code_holder_t, code_holders) + len - 1;
}
#ifndef __MIRC__
void _MIR_flush_code_cache (void *start, void *bound) {
#ifdef __GNUC__
__clear_cache (start, bound);
#endif
}
#endif
static uint8_t *add_code (MIR_context_t ctx, code_holder_t *ch_ptr, const uint8_t *code,
size_t code_len) {
uint8_t *mem = ch_ptr->free;
@ -3253,6 +3300,7 @@ static uint8_t *add_code (MIR_context_t ctx, code_holder_t *ch_ptr, const uint8_
mprotect (ch_ptr->start, ch_ptr->bound - ch_ptr->start, PROT_WRITE | PROT_EXEC);
memcpy (mem, code, code_len);
mprotect (ch_ptr->start, ch_ptr->bound - ch_ptr->start, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (mem, ch_ptr->free);
return mem;
}
@ -3286,6 +3334,7 @@ void _MIR_change_code (MIR_context_t ctx, uint8_t *addr, const uint8_t *code, si
mprotect ((uint8_t *) start, len, PROT_WRITE | PROT_EXEC);
memcpy (addr, code, code_len);
mprotect ((uint8_t *) start, len, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (addr, addr + code_len);
}
void _MIR_update_code_arr (MIR_context_t ctx, uint8_t *base, size_t nloc,
@ -3300,6 +3349,7 @@ void _MIR_update_code_arr (MIR_context_t ctx, uint8_t *base, size_t nloc,
mprotect ((uint8_t *) start, len, PROT_WRITE | PROT_EXEC);
for (i = 0; i < nloc; i++) memcpy (base + relocs[i].offset, &relocs[i].value, sizeof (void *));
mprotect ((uint8_t *) start, len, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (base, base + max_offset + sizeof (void *));
}
void _MIR_update_code (MIR_context_t ctx, uint8_t *base, size_t nloc, ...) {
@ -3324,6 +3374,7 @@ void _MIR_update_code (MIR_context_t ctx, uint8_t *base, size_t nloc, ...) {
memcpy (base + offset, &value, sizeof (void *));
}
mprotect ((uint8_t *) start, len, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (base, base + max_offset + sizeof (void *));
va_end (args);
}
@ -4689,8 +4740,10 @@ static void scan_number (MIR_context_t ctx, int ch, int get_char (MIR_context_t)
*double_p = FALSE;
ch = get_char (ctx);
} else if (ch == 'l' || ch == 'L') {
#if __SIZEOF_LONG_DOUBLE__ != 8
*ldouble_p = TRUE;
*double_p = FALSE;
#endif
ch = get_char (ctx);
}
} else if (*base == 8 && dec_p)
@ -5165,14 +5218,14 @@ void MIR_scan_string (MIR_context_t ctx, const char *str) {
op.mode = MIR_OP_FLOAT;
op.u.f = t.u.f;
break;
case TC_LDOUBLE: op.mode = MIR_OP_LDOUBLE; op.u.ld = t.u.ld;
#if __SIZEOF_LONG_DOUBLE__ != 8
break;
#endif
case TC_DOUBLE:
op.mode = MIR_OP_DOUBLE;
op.u.d = t.u.d;
break;
case TC_LDOUBLE:
op.mode = MIR_OP_LDOUBLE;
op.u.ld = t.u.ld;
break;
case TC_STR:
op.mode = MIR_OP_STR;
op.u.str = t.u.str;

@ -564,6 +564,7 @@ extern MIR_item_t _MIR_builtin_proto (MIR_context_t ctx, MIR_module_t module, co
extern MIR_item_t _MIR_builtin_func (MIR_context_t ctx, MIR_module_t module, const char *name,
void *addr);
extern void _MIR_flush_code_cache (void *start, void *bound);
extern uint8_t *_MIR_publish_code (MIR_context_t ctx, const uint8_t *code, size_t code_len);
extern uint8_t *_MIR_get_new_code_addr (MIR_context_t ctx, size_t size);
extern uint8_t *_MIR_publish_code_by_addr (MIR_context_t ctx, void *addr, const uint8_t *code,

@ -406,7 +406,7 @@ void *MIR_compile_C_module(
}
MIR_load_module (ctx, module);
MIR_gen_init (ctx);
MIR_gen_set_optimize_level(ctx, 3);
MIR_gen_set_optimize_level(ctx, 2);
MIR_link (ctx, MIR_set_gen_interface, Import_resolver_func);
fun_addr = MIR_gen (ctx, main_func);
MIR_gen_finish (ctx);

Loading…
Cancel
Save