You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
728 lines
30 KiB
728 lines
30 KiB
/* This file is a part of MIR project.
|
|
Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
|
|
*/
|
|
|
|
/* RBLK args are always passed by address.
|
|
BLK0 first is copied on the caller stack and passed implicitly.
|
|
BLK1 is passed in general regs
|
|
BLK2 is passed in fp regs
|
|
BLK3 is passed in gpr and then fpr
|
|
BLK4 is passed in fpr and then gpr
|
|
If there are no enough regs, they work as BLK.
|
|
Windows: small BLKs (<= 8 bytes) are passed by value;
|
|
all other BLKs is always passed by pointer as regular int arg. */
|
|
|
|
#define VA_LIST_IS_ARRAY_P 1
|
|
|
|
void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
|
|
static const uint8_t bstart_code[] = {
|
|
0x48, 0x8d, 0x44, 0x24, 0x08, /* rax = rsp + 8 (lea) */
|
|
0xc3, /* ret */
|
|
};
|
|
return _MIR_publish_code (ctx, bstart_code, sizeof (bstart_code));
|
|
}
|
|
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
|
|
static const uint8_t bend_code[] = {
|
|
#ifndef _WIN32
|
|
0x48, 0x8b, 0x04, 0x24, /* rax = (rsp) */
|
|
0x48, 0x89, 0xfc, /* rsp = rdi */
|
|
0xff, 0xe0, /* jmp *rax */
|
|
#else
|
|
0x48, 0x8b, 0x04, 0x24, /* rax = (rsp) */
|
|
0x48, 0x89, 0xcc, /* rsp = rcx */
|
|
0xff, 0xe0, /* jmp *rax */
|
|
#endif
|
|
};
|
|
return _MIR_publish_code (ctx, bend_code, sizeof (bend_code));
|
|
}
|
|
|
|
#ifndef _WIN32
|
|
struct x86_64_va_list {
|
|
uint32_t gp_offset, fp_offset;
|
|
uint64_t *overflow_arg_area, *reg_save_area;
|
|
};
|
|
|
|
void *va_arg_builtin (void *p, uint64_t t) {
|
|
struct x86_64_va_list *va = p;
|
|
MIR_type_t type = t;
|
|
int fp_p = type == MIR_T_F || type == MIR_T_D;
|
|
void *a;
|
|
|
|
if (fp_p && va->fp_offset <= 160) {
|
|
a = (char *) va->reg_save_area + va->fp_offset;
|
|
va->fp_offset += 16;
|
|
} else if (!fp_p && type != MIR_T_LD && va->gp_offset <= 40) {
|
|
a = (char *) va->reg_save_area + va->gp_offset;
|
|
va->gp_offset += 8;
|
|
} else {
|
|
a = va->overflow_arg_area;
|
|
va->overflow_arg_area += type == MIR_T_LD ? 2 : 1;
|
|
}
|
|
return a;
|
|
}
|
|
|
|
void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t ncase) {
|
|
struct x86_64_va_list *va = p;
|
|
size_t size = ((s + 7) / 8) * 8;
|
|
void *a = va->overflow_arg_area;
|
|
union {
|
|
uint64_t i;
|
|
double d;
|
|
} u[2];
|
|
|
|
switch (ncase) {
|
|
case 1:
|
|
if (va->gp_offset + size > 48) break;
|
|
u[0].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
|
|
va->gp_offset += 8;
|
|
if (size > 8) {
|
|
u[1].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
|
|
va->gp_offset += 8;
|
|
}
|
|
memcpy (res, &u, s);
|
|
return;
|
|
case 2:
|
|
u[0].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
|
|
va->fp_offset += 16;
|
|
if (size > 8) {
|
|
u[1].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
|
|
va->fp_offset += 16;
|
|
}
|
|
memcpy (res, &u, s);
|
|
return;
|
|
case 3:
|
|
case 4:
|
|
if (va->fp_offset > 160 || va->gp_offset > 40) break;
|
|
if (ncase == 3) {
|
|
u[0].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
|
|
u[1].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
|
|
} else {
|
|
u[0].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
|
|
u[1].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
|
|
}
|
|
va->fp_offset += 8;
|
|
va->gp_offset += 8;
|
|
memcpy (res, &u, s);
|
|
return;
|
|
default: break;
|
|
}
|
|
memcpy (res, a, s);
|
|
va->overflow_arg_area += size / 8;
|
|
}
|
|
|
|
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
|
|
struct x86_64_va_list *va = p;
|
|
va_list *vap = a;
|
|
|
|
assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
|
|
*va = *(struct x86_64_va_list *) vap;
|
|
}
|
|
|
|
#else
|
|
|
|
struct x86_64_va_list {
|
|
uint64_t *arg_area;
|
|
};
|
|
|
|
void *va_arg_builtin (void *p, uint64_t t) {
|
|
struct x86_64_va_list *va = p;
|
|
void *a = va->arg_area;
|
|
va->arg_area++;
|
|
return a;
|
|
}
|
|
|
|
void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t ncase) {
|
|
struct x86_64_va_list *va = p;
|
|
void *a = s <= 8 ? va->arg_area : *(void **) va->arg_area; /* pass by pointer */
|
|
memcpy (res, a, s);
|
|
va->arg_area++;
|
|
}
|
|
|
|
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
|
|
struct x86_64_va_list **va = p;
|
|
va_list *vap = a;
|
|
|
|
assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
|
|
*va = (struct x86_64_va_list *) vap;
|
|
}
|
|
|
|
#endif
|
|
|
|
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
|
|
|
|
/* r11=<address to go to>; jump *r11 */
|
|
void *_MIR_get_thunk (MIR_context_t ctx) {
|
|
void *res;
|
|
static const uint8_t pattern[] = {
|
|
0x49, 0xbb, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0: movabsq 0, r11 */
|
|
0x41, 0xff, 0xe3, /* 0x14: jmpq *%r11 */
|
|
};
|
|
res = _MIR_publish_code (ctx, pattern, sizeof (pattern));
|
|
return res;
|
|
}
|
|
|
|
void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
|
|
_MIR_update_code (ctx, thunk, 1, 2, to);
|
|
}
|
|
|
|
static const uint8_t save_pat[] = {
|
|
#ifndef _WIN32
|
|
0x48, 0x81, 0xec, 0x80, 0, 0, 0, /*sub $0x80,%rsp */
|
|
0xf3, 0x0f, 0x7f, 0x04, 0x24, /*movdqu %xmm0,(%rsp) */
|
|
0xf3, 0x0f, 0x7f, 0x4c, 0x24, 0x10, /*movdqu %xmm1,0x10(%rsp) */
|
|
0xf3, 0x0f, 0x7f, 0x54, 0x24, 0x20, /*movdqu %xmm2,0x20(%rsp) */
|
|
0xf3, 0x0f, 0x7f, 0x5c, 0x24, 0x30, /*movdqu %xmm3,0x30(%rsp) */
|
|
0xf3, 0x0f, 0x7f, 0x64, 0x24, 0x40, /*movdqu %xmm4,0x40(%rsp) */
|
|
0xf3, 0x0f, 0x7f, 0x6c, 0x24, 0x50, /*movdqu %xmm5,0x50(%rsp) */
|
|
0xf3, 0x0f, 0x7f, 0x74, 0x24, 0x60, /*movdqu %xmm6,0x60(%rsp) */
|
|
0xf3, 0x0f, 0x7f, 0x7c, 0x24, 0x70, /*movdqu %xmm7,0x70(%rsp) */
|
|
0x41, 0x51, /*push %r9 */
|
|
0x41, 0x50, /*push %r8 */
|
|
0x51, /*push %rcx */
|
|
0x52, /*push %rdx */
|
|
0x56, /*push %rsi */
|
|
0x57, /*push %rdi */
|
|
#else
|
|
0x48, 0x89, 0x4c, 0x24, 0x08, /*mov %rcx,0x08(%rsp) */
|
|
0x48, 0x89, 0x54, 0x24, 0x10, /*mov %rdx,0x10(%rsp) */
|
|
0x4c, 0x89, 0x44, 0x24, 0x18, /*mov %r8, 0x18(%rsp) */
|
|
0x4c, 0x89, 0x4c, 0x24, 0x20, /*mov %r9, 0x20(%rsp) */
|
|
#endif
|
|
};
|
|
|
|
static const uint8_t restore_pat[] = {
|
|
#ifndef _WIN32
|
|
0x5f, /*pop %rdi */
|
|
0x5e, /*pop %rsi */
|
|
0x5a, /*pop %rdx */
|
|
0x59, /*pop %rcx */
|
|
0x41, 0x58, /*pop %r8 */
|
|
0x41, 0x59, /*pop %r9 */
|
|
0xf3, 0x0f, 0x6f, 0x04, 0x24, /*movdqu (%rsp),%xmm0 */
|
|
0xf3, 0x0f, 0x6f, 0x4c, 0x24, 0x10, /*movdqu 0x10(%rsp),%xmm1 */
|
|
0xf3, 0x0f, 0x6f, 0x54, 0x24, 0x20, /*movdqu 0x20(%rsp),%xmm2 */
|
|
0xf3, 0x0f, 0x6f, 0x5c, 0x24, 0x30, /*movdqu 0x30(%rsp),%xmm3 */
|
|
0xf3, 0x0f, 0x6f, 0x64, 0x24, 0x40, /*movdqu 0x40(%rsp),%xmm4 */
|
|
0xf3, 0x0f, 0x6f, 0x6c, 0x24, 0x50, /*movdqu 0x50(%rsp),%xmm5 */
|
|
0xf3, 0x0f, 0x6f, 0x74, 0x24, 0x60, /*movdqu 0x60(%rsp),%xmm6 */
|
|
0xf3, 0x0f, 0x6f, 0x7c, 0x24, 0x70, /*movdqu 0x70(%rsp),%xmm7 */
|
|
0x48, 0x81, 0xc4, 0x80, 0, 0, 0, /*add $0x80,%rsp */
|
|
#else
|
|
0x48, 0x8b, 0x4c, 0x24, 0x08, /*mov 0x08(%rsp),%rcx */
|
|
0x48, 0x8b, 0x54, 0x24, 0x10, /*mov 0x10(%rsp),%rdx */
|
|
0x4c, 0x8b, 0x44, 0x24, 0x18, /*mov 0x18(%rsp),%r8 */
|
|
0x4c, 0x8b, 0x4c, 0x24, 0x20, /*mov 0x20(%rsp),%r9 */
|
|
0xf3, 0x0f, 0x7e, 0x44, 0x24, 0x08, /*movq 0x08(%rsp),%xmm0*/
|
|
0xf3, 0x0f, 0x7e, 0x4c, 0x24, 0x10, /*movq 0x10(%rsp),%xmm1*/
|
|
0xf3, 0x0f, 0x7e, 0x54, 0x24, 0x18, /*movq 0x18(%rsp),%xmm2*/
|
|
0xf3, 0x0f, 0x7e, 0x5c, 0x24, 0x20, /*movq 0x20(%rsp),%xmm3*/
|
|
#endif
|
|
};
|
|
|
|
static uint8_t *push_insns (VARR (uint8_t) * insn_varr, const uint8_t *pat, size_t pat_len) {
|
|
for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, insn_varr, pat[i]);
|
|
return VARR_ADDR (uint8_t, insn_varr) + VARR_LENGTH (uint8_t, insn_varr) - pat_len;
|
|
}
|
|
|
|
static void gen_mov (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int ld_p) {
|
|
static const uint8_t ld_gp_reg[] = {0x48, 0x8b, 0x83, 0, 0, 0, 0 /* mov <offset>(%rbx),%reg */};
|
|
static const uint8_t st_gp_reg[] = {0x48, 0x89, 0x83, 0, 0, 0, 0 /* mov %reg,<offset>(%rbx) */};
|
|
uint8_t *addr = push_insns (insn_varr, ld_p ? ld_gp_reg : st_gp_reg,
|
|
ld_p ? sizeof (ld_gp_reg) : sizeof (st_gp_reg));
|
|
memcpy (addr + 3, &offset, sizeof (uint32_t));
|
|
assert (reg <= 15);
|
|
addr[0] |= (reg >> 1) & 4;
|
|
addr[2] |= (reg & 7) << 3;
|
|
}
|
|
|
|
static void gen_mov2 (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int ld_p) {
|
|
static const uint8_t ld_gp_reg[] = {0x49, 0x8b, 0x44, 0x24, 0 /* mov <offset>(%r12),%reg */};
|
|
static const uint8_t st_gp_reg[] = {0x49, 0x89, 0x44, 0x24, 0 /* mov %reg,<offset>(%r12) */};
|
|
uint8_t *addr = push_insns (insn_varr, ld_p ? ld_gp_reg : st_gp_reg,
|
|
ld_p ? sizeof (ld_gp_reg) : sizeof (st_gp_reg));
|
|
addr[4] = offset;
|
|
assert (reg <= 15);
|
|
addr[0] |= (reg >> 1) & 4;
|
|
addr[2] |= (reg & 7) << 3;
|
|
}
|
|
|
|
static void gen_blk_mov (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t addr_offset,
|
|
uint32_t qwords) {
|
|
static const uint8_t blk_mov_pat[] = {
|
|
/*0:*/ 0x4c, 0x8b, 0xa3, 0, 0, 0, 0, /*mov <addr_offset>(%rbx),%r12*/
|
|
/*7:*/ 0x48, 0xc7, 0xc0, 0, 0, 0, 0, /*mov <qwords>,%rax*/
|
|
/*e:*/ 0x48, 0x83, 0xe8, 0x01, /*sub $0x1,%rax*/
|
|
/*12:*/ 0x4d, 0x8b, 0x14, 0xc4, /*mov (%r12,%rax,8),%r10*/
|
|
/*16:*/ 0x4c, 0x89, 0x94, 0xc4, 0, 0, 0, 0, /*mov %r10,<offset>(%rsp,%rax,8)*/
|
|
/*1e:*/ 0x48, 0x85, 0xc0, /*test %rax,%rax*/
|
|
/*21:*/ 0x7f, 0xeb, /*jg e <L0>*/
|
|
};
|
|
uint8_t *addr = push_insns (insn_varr, blk_mov_pat, sizeof (blk_mov_pat));
|
|
memcpy (addr + 3, &addr_offset, sizeof (uint32_t));
|
|
memcpy (addr + 10, &qwords, sizeof (uint32_t));
|
|
memcpy (addr + 26, &offset, sizeof (uint32_t));
|
|
}
|
|
|
|
static void gen_movxmm (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int b32_p,
|
|
int ld_p) {
|
|
static const uint8_t ld_xmm_reg_pat[] = {
|
|
0xf2, 0x0f, 0x10, 0x83, 0, 0, 0, 0 /* movs[sd] <offset>(%rbx),%xmm */
|
|
};
|
|
static const uint8_t st_xmm_reg_pat[] = {
|
|
0xf2, 0x0f, 0x11, 0x83, 0, 0, 0, 0 /* movs[sd] %xmm, <offset>(%rbx) */
|
|
};
|
|
uint8_t *addr = push_insns (insn_varr, ld_p ? ld_xmm_reg_pat : st_xmm_reg_pat,
|
|
ld_p ? sizeof (ld_xmm_reg_pat) : sizeof (st_xmm_reg_pat));
|
|
memcpy (addr + 4, &offset, sizeof (uint32_t));
|
|
assert (reg <= 7);
|
|
addr[3] |= reg << 3;
|
|
if (b32_p) addr[0] |= 1;
|
|
}
|
|
|
|
static void gen_movxmm2 (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int ld_p) {
|
|
static const uint8_t ld_xmm_reg_pat[] = {
|
|
0xf2, 0x41, 0x0f, 0x10, 0x44, 0x24, 0 /* movsd <offset>(%r12),%xmm */
|
|
};
|
|
static const uint8_t st_xmm_reg_pat[] = {
|
|
0xf2, 0x41, 0x0f, 0x11, 0x44, 0x24, 0 /* movsd %xmm, <offset>(%r12) */
|
|
};
|
|
uint8_t *addr = push_insns (insn_varr, ld_p ? ld_xmm_reg_pat : st_xmm_reg_pat,
|
|
ld_p ? sizeof (ld_xmm_reg_pat) : sizeof (st_xmm_reg_pat));
|
|
addr[6] = offset;
|
|
assert (reg <= 7);
|
|
addr[4] |= reg << 3;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
static void gen_add (VARR (uint8_t) * insn_varr, uint32_t sp_offset, int reg) {
|
|
static const uint8_t lea_pat[] = {
|
|
0x48, 0x8d, 0x84, 0x24, 0, 0, 0, 0, /* lea <sp_offset>(%sp),reg */
|
|
};
|
|
uint8_t *addr = push_insns (insn_varr, lea_pat, sizeof (lea_pat));
|
|
memcpy (addr + 4, &sp_offset, sizeof (uint32_t));
|
|
addr[2] |= (reg & 7) << 3;
|
|
if (reg > 7) addr[0] |= 4;
|
|
}
|
|
#endif
|
|
|
|
static void gen_st (VARR (uint8_t) * insn_varr, uint32_t sp_offset, int b64_p) {
|
|
static const uint8_t st_pat[] = {
|
|
0x44, 0x89, 0x94, 0x24, 0, 0, 0, 0, /* mov %r10,<sp_offset>(%sp) */
|
|
};
|
|
uint8_t *addr = push_insns (insn_varr, st_pat, sizeof (st_pat));
|
|
memcpy (addr + 4, &sp_offset, sizeof (uint32_t));
|
|
if (b64_p) addr[0] |= 8;
|
|
}
|
|
|
|
static void gen_ldst (VARR (uint8_t) * insn_varr, uint32_t sp_offset, uint32_t src_offset,
|
|
int b64_p) {
|
|
static const uint8_t ld_pat[] = {
|
|
0x44, 0x8b, 0x93, 0, 0, 0, 0, /* mov <src_offset>(%rbx),%r10 */
|
|
};
|
|
uint8_t *addr = push_insns (insn_varr, ld_pat, sizeof (ld_pat));
|
|
memcpy (addr + 3, &src_offset, sizeof (uint32_t));
|
|
if (b64_p) addr[0] |= 8;
|
|
gen_st (insn_varr, sp_offset, b64_p);
|
|
}
|
|
|
|
static void gen_ldst80 (VARR (uint8_t) * insn_varr, uint32_t sp_offset, uint32_t src_offset) {
|
|
static uint8_t const ldst80_pat[] = {
|
|
0xdb, 0xab, 0, 0, 0, 0, /* fldt <src_offset>(%rbx) */
|
|
0xdb, 0xbc, 0x24, 0, 0, 0, 0, /* fstpt <sp_offset>(%sp) */
|
|
};
|
|
uint8_t *addr = push_insns (insn_varr, ldst80_pat, sizeof (ldst80_pat));
|
|
memcpy (addr + 2, &src_offset, sizeof (uint32_t));
|
|
memcpy (addr + 9, &sp_offset, sizeof (uint32_t));
|
|
}
|
|
|
|
static void gen_st80 (VARR (uint8_t) * insn_varr, uint32_t src_offset) {
|
|
static const uint8_t st80_pat[] = {0xdb, 0xbb, 0, 0, 0, 0 /* fstpt <src_offset>(%rbx) */};
|
|
memcpy (push_insns (insn_varr, st80_pat, sizeof (st80_pat)) + 2, &src_offset, sizeof (uint32_t));
|
|
}
|
|
|
|
/* Generation: fun (fun_addr, res_arg_addresses):
|
|
push r12, push rbx; sp-=sp_offset; r11=fun_addr; rbx=res/arg_addrs
|
|
r10=mem[rbx,<offset>]; (arg_reg=mem[r10] or r10=mem[r10];mem[sp,sp_offset]=r10
|
|
or r12=mem[rbx,arg_offset]; arg_reg=mem[r12]
|
|
[;(arg_reg + 1)=mem[r12 + 8]]
|
|
...
|
|
or r12=mem[rbx,arg_offset];rax=qwords;
|
|
L:rax-=1;r10=mem[r12,rax]; mem[sp,sp_offset,rax]=r10;
|
|
goto L if rax > 0) ...
|
|
rax=8; call *r11; sp+=offset
|
|
r10=mem[rbx,<offset>]; res_reg=mem[r10]; ...
|
|
pop rbx; pop r12; ret. */
|
|
void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
|
|
_MIR_arg_desc_t *arg_descs, int vararg_p) {
|
|
static const uint8_t prolog[] = {
|
|
#ifndef _WIN32
|
|
0x41, 0x54, /* pushq %r12 */
|
|
0x53, /* pushq %rbx */
|
|
0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
|
|
0x49, 0x89, 0xfb, /* mov $rdi, $r11 -- fun addr */
|
|
0x48, 0x89, 0xf3, /* mov $rsi, $rbx -- result/arg addresses */
|
|
#else
|
|
/* 0x0: */ 0x41, 0x54, /* pushq %r12 */
|
|
/* 0x2: */ 0x53, /* pushq %rbx */
|
|
/* 0x3: */ 0x55, /* push %rbp */
|
|
/* 0x4: */ 0x48, 0x89, 0xe5, /* mov %rsp,%rbp */
|
|
/* 0x7: */ 0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
|
|
/* 0xe: */ 0x49, 0x89, 0xcb, /* mov $rcx, $r11 -- fun addr */
|
|
/* 0x11: */ 0x48, 0x89, 0xd3, /* mov $rdx, $rbx -- result/arg addresses */
|
|
#endif
|
|
};
|
|
static const uint8_t call_end[] = {
|
|
#ifndef _WIN32
|
|
0x48, 0xc7, 0xc0, 0x08, 0, 0, 0, /* mov $8, rax -- to save xmm varargs */
|
|
#endif
|
|
0x41, 0xff, 0xd3, /* callq *%r11 */
|
|
#ifndef _WIN32
|
|
0x48, 0x81, 0xc4, 0, 0, 0, 0, /* addq <sp_offset>, %rsp */
|
|
#endif
|
|
};
|
|
static const uint8_t epilog[] = {
|
|
#ifdef _WIN32 /* Strict form of windows epilogue for unwinding: */
|
|
0x48, 0x8d, 0x65, 0x0, /* lea 0x0(%rbp),%rsp */
|
|
0x5d, /* pop %rbp */
|
|
#endif
|
|
0x5b, /* pop %rbx */
|
|
0x41, 0x5c, /* pop %r12 */
|
|
0xc3, /* ret */
|
|
};
|
|
#ifndef _WIN32
|
|
static const uint8_t iregs[] = {7, 6, 2, 1, 8, 9}; /* rdi, rsi, rdx, rcx, r8, r9 */
|
|
static const uint32_t max_iregs = 6, max_xregs = 8;
|
|
uint32_t sp_offset = 0;
|
|
#else
|
|
static const uint8_t iregs[] = {1, 2, 8, 9}; /* rcx, rdx, r8, r9 */
|
|
static const uint32_t max_iregs = 4, max_xregs = 4;
|
|
uint32_t blk_offset = nargs < 4 ? 32 : nargs * 8, sp_offset = 32; /* spill area */
|
|
#endif
|
|
uint32_t n_iregs = 0, n_xregs = 0, n_fregs, qwords;
|
|
uint8_t *addr;
|
|
VARR (uint8_t) * code;
|
|
void *res;
|
|
|
|
VARR_CREATE (uint8_t, code, 128);
|
|
push_insns (code, prolog, sizeof (prolog));
|
|
for (size_t i = 0; i < nargs; i++) {
|
|
MIR_type_t type = arg_descs[i].type;
|
|
|
|
if ((MIR_T_I8 <= type && type <= MIR_T_U64) || type == MIR_T_P || type == MIR_T_RBLK) {
|
|
if (n_iregs < max_iregs) {
|
|
gen_mov (code, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
|
|
#ifdef _WIN32
|
|
n_xregs++;
|
|
#endif
|
|
} else {
|
|
gen_ldst (code, sp_offset, (i + nres) * sizeof (long double), TRUE);
|
|
sp_offset += 8;
|
|
}
|
|
} else if (type == MIR_T_F || type == MIR_T_D) {
|
|
if (n_xregs < max_xregs) {
|
|
gen_movxmm (code, (i + nres) * sizeof (long double), n_xregs++, type == MIR_T_F, TRUE);
|
|
#ifdef _WIN32
|
|
gen_mov (code, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
|
|
#endif
|
|
} else {
|
|
gen_ldst (code, sp_offset, (i + nres) * sizeof (long double), type == MIR_T_D);
|
|
sp_offset += 8;
|
|
}
|
|
} else if (type == MIR_T_LD) {
|
|
gen_ldst80 (code, sp_offset, (i + nres) * sizeof (long double));
|
|
sp_offset += 16;
|
|
} else if (MIR_blk_type_p (type)) {
|
|
qwords = (arg_descs[i].size + 7) / 8;
|
|
#ifndef _WIN32
|
|
if (type == MIR_T_BLK + 1 && n_iregs + qwords <= max_iregs) {
|
|
assert (qwords <= 2);
|
|
gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = block addr */
|
|
gen_mov2 (code, 0, iregs[n_iregs], TRUE); /* arg_reg = mem[r12] */
|
|
if (qwords == 2) gen_mov2 (code, 8, iregs[n_iregs + 1], TRUE); /* arg_reg = mem[r12 + 8] */
|
|
n_iregs += qwords;
|
|
n_xregs += qwords;
|
|
continue;
|
|
} else if (type == MIR_T_BLK + 2 && n_xregs + qwords <= max_xregs) {
|
|
assert (qwords <= 2);
|
|
gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = block addr */
|
|
gen_movxmm2 (code, 0, n_xregs, TRUE); /* xmm = mem[r12] */
|
|
if (qwords == 2) gen_movxmm2 (code, 8, n_xregs + 1, TRUE); /* xmm = mem[r12 + 8] */
|
|
n_xregs += qwords;
|
|
continue;
|
|
} else if (type == MIR_T_BLK + 3 && n_iregs < max_iregs && n_xregs < max_xregs) {
|
|
assert (qwords == 2);
|
|
gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = block addr */
|
|
gen_mov2 (code, 0, iregs[n_iregs], TRUE); /* arg_reg = mem[r12] */
|
|
n_iregs++;
|
|
n_xregs++;
|
|
gen_movxmm2 (code, 8, n_xregs, TRUE); /* xmm = mem[r12 + 8] */
|
|
n_xregs++;
|
|
continue;
|
|
} else if (type == MIR_T_BLK + 4 && n_iregs < max_iregs && n_xregs < max_xregs) {
|
|
assert (qwords == 2);
|
|
gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = block addr */
|
|
gen_movxmm2 (code, 0, n_xregs, TRUE); /* xmm = mem[r12] */
|
|
n_xregs++;
|
|
gen_mov2 (code, 8, iregs[n_iregs], TRUE); /* arg_reg = mem[r12 + 8] */
|
|
n_iregs++;
|
|
n_xregs++;
|
|
continue;
|
|
}
|
|
gen_blk_mov (code, sp_offset, (i + nres) * sizeof (long double), qwords);
|
|
sp_offset += qwords * 8;
|
|
#else
|
|
if (qwords <= 1) {
|
|
gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = mem[disp + rbx] */
|
|
if (n_iregs < max_iregs) {
|
|
gen_mov2 (code, 0, iregs[n_iregs++], TRUE); /* arg_reg = mem[r12] */
|
|
n_xregs++;
|
|
} else {
|
|
gen_mov2 (code, 0, 10, TRUE); /* r10 = mem[r12] */
|
|
gen_st (code, sp_offset, TRUE); /* mem[sp+sp_offset] = r10; */
|
|
sp_offset += 8;
|
|
}
|
|
} else {
|
|
/* r12 = mem[disp + rbx]; mem[rsp+blk_offset + nw] = r10 = mem[r12 + nw]; */
|
|
gen_blk_mov (code, blk_offset, (i + nres) * sizeof (long double), qwords);
|
|
if (n_iregs < max_iregs) {
|
|
gen_add (code, blk_offset, iregs[n_iregs++]); /* arg_reg = sp + blk_offset */
|
|
n_xregs++;
|
|
} else {
|
|
gen_add (code, blk_offset, 10); /* r10 = sp + blk_offset */
|
|
gen_st (code, sp_offset, TRUE); /* mem[sp+sp_offset] = r10; */
|
|
sp_offset += 8;
|
|
}
|
|
blk_offset += qwords * 8;
|
|
}
|
|
#endif
|
|
} else {
|
|
MIR_get_error_func (ctx) (MIR_call_op_error, "wrong type of arg value");
|
|
}
|
|
}
|
|
#ifdef _WIN32
|
|
if (blk_offset > sp_offset) sp_offset = blk_offset;
|
|
#endif
|
|
sp_offset = (sp_offset + 15) / 16 * 16;
|
|
#ifndef _WIN32
|
|
sp_offset += 8; /* align */
|
|
#endif
|
|
addr = VARR_ADDR (uint8_t, code);
|
|
#ifndef _WIN32
|
|
memcpy (addr + 6, &sp_offset, sizeof (uint32_t));
|
|
#else
|
|
memcpy (addr + 10, &sp_offset, sizeof (uint32_t));
|
|
#endif
|
|
addr = push_insns (code, call_end, sizeof (call_end));
|
|
#ifndef _WIN32
|
|
memcpy (addr + sizeof (call_end) - 4, &sp_offset, sizeof (uint32_t));
|
|
#else
|
|
if (nres > 1)
|
|
MIR_get_error_func (ctx) (MIR_call_op_error,
|
|
"Windows x86-64 doesn't support multiple return values");
|
|
#endif
|
|
n_iregs = n_xregs = n_fregs = 0;
|
|
for (size_t i = 0; i < nres; i++) {
|
|
if (((MIR_T_I8 <= res_types[i] && res_types[i] <= MIR_T_U64) || res_types[i] == MIR_T_P)
|
|
&& n_iregs < 2) {
|
|
gen_mov (code, i * sizeof (long double), n_iregs++ == 0 ? 0 : 2, FALSE); /* rax or rdx */
|
|
} else if ((res_types[i] == MIR_T_F || res_types[i] == MIR_T_D) && n_xregs < 2) {
|
|
gen_movxmm (code, i * sizeof (long double), n_xregs++, res_types[i] == MIR_T_F, FALSE);
|
|
} else if (res_types[i] == MIR_T_LD && n_fregs < 2) {
|
|
gen_st80 (code, i * sizeof (long double));
|
|
} else {
|
|
MIR_get_error_func (ctx) (MIR_ret_error,
|
|
"x86-64 can not handle this combination of return values");
|
|
}
|
|
}
|
|
push_insns (code, epilog, sizeof (epilog));
|
|
res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
|
|
VARR_DESTROY (uint8_t, code);
|
|
return res;
|
|
}
|
|
|
|
/* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
|
|
va_list va, MIR_val_t *results) */
|
|
void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
|
|
static const uint8_t push_rbx[] = {0x53, /*push %rbx */};
|
|
static const uint8_t prepare_pat[] = {
|
|
#ifndef _WIN32
|
|
/* 0: */ 0x48, 0x83, 0xec, 0x20, /* sub 32,%rsp */
|
|
/* 4: */ 0x48, 0x89, 0xe2, /* mov %rsp,%rdx */
|
|
/* 7: */ 0xc7, 0x02, 0, 0, 0, 0, /* movl 0,(%rdx) */
|
|
/* d: */ 0xc7, 0x42, 0x04, 0x30, 0, 0, 0, /* movl 48, 4(%rdx) */
|
|
/* 14: */ 0x48, 0x8d, 0x44, 0x24, 0x20, /* lea 32(%rsp),%rax */
|
|
/* 19: */ 0x48, 0x89, 0x42, 0x10, /* mov %rax,16(%rdx) */
|
|
/* 1d: */ 0x48, 0x8d, 0x84, 0x24, 0xe0, 0, 0, 0, /* lea 224(%rsp),%rax */
|
|
/* 25: */ 0x48, 0x89, 0x42, 0x08, /* mov %rax,8(%rdx) */
|
|
/* 29: */ 0x48, 0x81, 0xec, 0, 0, 0, 0, /* sub <n>,%rsp */
|
|
/* 30: */ 0x48, 0x89, 0xe3, /* mov %rsp,%rbx */
|
|
/* 33: */ 0x48, 0x89, 0xe1, /* mov %rsp,%rcx */
|
|
/* 36: */ 0x48, 0xbf, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <ctx>,%rdi */
|
|
/* 40: */ 0x48, 0xbe, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <func_item>,%rsi*/
|
|
/* 4a: */ 0x48, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <handler>,%rax */
|
|
/* 54: */ 0xff, 0xd0, /* callq *%rax */
|
|
};
|
|
static const uint32_t nres_offset = 0x2c;
|
|
static const uint32_t ctx_offset = 0x38;
|
|
static const uint32_t func_offset = 0x42;
|
|
static const uint32_t hndl_offset = 0x4c;
|
|
static const uint32_t prep_stack_size = 208;
|
|
#else
|
|
/* 0: */ 0x53, /* push %rbx */
|
|
/* 1: */ 0x55, /* push %rbp */
|
|
/* 2: */ 0x48, 0x89, 0xe5, /* mov %rsp,%rbp */
|
|
/* 5: */ 0x4c, 0x8d, 0x44, 0x24, 0x18, /* lea 24(%rsp),%r8 */
|
|
/* a: */ 0x48, 0x81, 0xec, 0, 0, 0, 0, /* sub <n>,%rsp */
|
|
/* 11: */ 0x48, 0x89, 0xe3, /* mov %rsp,%rbx */
|
|
/* 14: */ 0x49, 0x89, 0xe1, /* mov %rsp,%r9 */
|
|
/* 17: */ 0x48, 0x83, 0xec, 0x20, /* sub 32,%rsp */
|
|
/* 1b: */ 0x48, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <ctx>,%rcx */
|
|
/* 25: */ 0x48, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <func_item>,%rdx*/
|
|
/* 2f: */ 0x48, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <handler>,%rax */
|
|
/* 39: */ 0xff, 0xd0, /* callq *%rax */
|
|
};
|
|
static const uint32_t nres_offset = 0x0d;
|
|
static const uint32_t ctx_offset = 0x1d;
|
|
static const uint32_t func_offset = 0x27;
|
|
static const uint32_t hndl_offset = 0x31;
|
|
static const uint32_t prep_stack_size = 32;
|
|
#endif
|
|
static const uint8_t shim_end[] = {
|
|
#ifndef _WIN32
|
|
/* 0: */ 0x48, 0x81, 0xc4, 0, 0, 0, 0, /*add prep_stack_size+n,%rsp*/
|
|
#else /* Strict form of windows epilogue for unwinding: */
|
|
/* 0 */ 0x48, 0x8d, 0x65, 0x0, /* lea 0x0(%rbp),%rsp */
|
|
/* 4: */ 0x5d, /* pop %rbp */
|
|
#endif
|
|
0x5b, /*pop %rbx*/
|
|
0xc3, /*retq */
|
|
};
|
|
static const uint8_t ld_pat[] = {0x48, 0x8b, 0x83, 0, 0, 0, 0}; /* mov <offset>(%rbx), %reg */
|
|
static const uint8_t movss_pat[]
|
|
= {0xf3, 0x0f, 0x10, 0x83, 0, 0, 0, 0}; /* movss <offset>(%rbx), %xmm[01] */
|
|
static const uint8_t movsd_pat[]
|
|
= {0xf2, 0x0f, 0x10, 0x83, 0, 0, 0, 0}; /* movsd <offset>(%rbx), %xmm[01] */
|
|
static const uint8_t fldt_pat[] = {0xdb, 0xab, 0, 0, 0, 0}; /* fldt <offset>(%rbx) */
|
|
static const uint8_t fxch_pat[] = {0xd9, 0xc9}; /* fxch */
|
|
uint8_t *addr;
|
|
uint32_t imm, n_iregs, n_xregs, n_fregs, offset;
|
|
uint32_t nres = func_item->u.func->nres;
|
|
MIR_type_t *results = func_item->u.func->res_types;
|
|
VARR (uint8_t) * code;
|
|
void *res;
|
|
|
|
VARR_CREATE (uint8_t, code, 128);
|
|
#ifndef _WIN32
|
|
push_insns (code, push_rbx, sizeof (push_rbx));
|
|
#endif
|
|
push_insns (code, save_pat, sizeof (save_pat));
|
|
addr = push_insns (code, prepare_pat, sizeof (prepare_pat));
|
|
imm = nres * 16;
|
|
#ifdef _WIN32
|
|
imm += 8; /*align */
|
|
#endif
|
|
memcpy (addr + nres_offset, &imm, sizeof (uint32_t));
|
|
memcpy (addr + ctx_offset, &ctx, sizeof (void *));
|
|
memcpy (addr + func_offset, &func_item, sizeof (void *));
|
|
memcpy (addr + hndl_offset, &handler, sizeof (void *));
|
|
/* move results: */
|
|
#ifdef _WIN32
|
|
if (nres > 1)
|
|
MIR_get_error_func (ctx) (MIR_call_op_error,
|
|
"Windows x86-64 doesn't support multiple return values");
|
|
#endif
|
|
n_iregs = n_xregs = n_fregs = offset = 0;
|
|
for (uint32_t i = 0; i < nres; i++) {
|
|
if (results[i] == MIR_T_F && n_xregs < 2) {
|
|
addr = push_insns (code, movss_pat, sizeof (movss_pat));
|
|
addr[3] |= n_xregs << 3;
|
|
memcpy (addr + 4, &offset, sizeof (uint32_t));
|
|
n_xregs++;
|
|
} else if (results[i] == MIR_T_D && n_xregs < 2) {
|
|
addr = push_insns (code, movsd_pat, sizeof (movsd_pat));
|
|
addr[3] |= n_xregs << 3;
|
|
memcpy (addr + 4, &offset, sizeof (uint32_t));
|
|
n_xregs++;
|
|
} else if (results[i] == MIR_T_LD && n_fregs < 2) {
|
|
addr = push_insns (code, fldt_pat, sizeof (fldt_pat));
|
|
memcpy (addr + 2, &offset, sizeof (uint32_t));
|
|
if (n_fregs == 1) push_insns (code, fxch_pat, sizeof (fxch_pat));
|
|
n_fregs++;
|
|
} else if (n_iregs < 2) {
|
|
addr = push_insns (code, ld_pat, sizeof (ld_pat));
|
|
addr[2] |= n_iregs << 4;
|
|
memcpy (addr + 3, &offset, sizeof (uint32_t));
|
|
n_iregs++;
|
|
} else {
|
|
MIR_get_error_func (ctx) (MIR_ret_error,
|
|
"x86-64 can not handle this combination of return values");
|
|
}
|
|
offset += 16;
|
|
}
|
|
addr = push_insns (code, shim_end, sizeof (shim_end));
|
|
#ifndef _WIN32
|
|
imm = prep_stack_size + nres * 16;
|
|
memcpy (addr + 3, &imm, sizeof (uint32_t));
|
|
#endif
|
|
res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
|
|
VARR_DESTROY (uint8_t, code);
|
|
return res;
|
|
}
|
|
|
|
/* save regs; r10 = call hook_address (ctx, called_func); restore regs; jmp *r10
|
|
*/
|
|
void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
|
|
static const uint8_t push_rax[] = {0x50, /*push %rax */};
|
|
static const uint8_t wrap_end[] = {
|
|
#ifndef _WIN32
|
|
0x58, /*pop %rax */
|
|
#endif
|
|
0x41, 0xff, 0xe2, /*jmpq *%r10 */
|
|
};
|
|
static const uint8_t call_pat[] =
|
|
#ifndef _WIN32
|
|
{
|
|
0x48, 0xbe, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs called_func,%rsi */
|
|
0x48, 0xbf, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs ctx,%rdi */
|
|
0x49, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <hook_address>,%r10 */
|
|
0x41, 0xff, 0xd2, /* callq *%r10 */
|
|
0x49, 0x89, 0xc2, /* mov %rax,%r10 */
|
|
};
|
|
size_t call_func_offset = 2, ctx_offset = 12, hook_offset = 22;
|
|
#else
|
|
{
|
|
0x55, /* push %rbp */
|
|
0x48, 0x89, 0xe5, /* mov %rsp,%rbp */
|
|
0x48, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs called_func,%rdx */
|
|
0x48, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs ctx,%rcx */
|
|
0x49, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <hook_address>,%r10*/
|
|
0x50, /* push %rax */
|
|
0x48, 0x83, 0xec, 0x28, /* sub 40,%rsp */
|
|
0x41, 0xff, 0xd2, /* callq *%r10 */
|
|
0x49, 0x89, 0xc2, /* mov %rax,%r10 */
|
|
0x48, 0x83, 0xc4, 0x28, /* add 40,%rsp */
|
|
0x58, /* pop %rax */
|
|
0x5d, /* pop %rbp */
|
|
};
|
|
size_t call_func_offset = 6, ctx_offset = 16, hook_offset = 26;
|
|
#endif
|
|
uint8_t *addr;
|
|
VARR (uint8_t) * code;
|
|
void *res;
|
|
|
|
VARR_CREATE (uint8_t, code, 128);
|
|
#ifndef _WIN32
|
|
push_insns (code, push_rax, sizeof (push_rax));
|
|
#endif
|
|
push_insns (code, save_pat, sizeof (save_pat));
|
|
addr = push_insns (code, call_pat, sizeof (call_pat));
|
|
memcpy (addr + call_func_offset, &called_func, sizeof (void *));
|
|
memcpy (addr + ctx_offset, &ctx, sizeof (void *));
|
|
memcpy (addr + hook_offset, &hook_address, sizeof (void *));
|
|
push_insns (code, restore_pat, sizeof (restore_pat));
|
|
push_insns (code, wrap_end, sizeof (wrap_end));
|
|
res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
|
|
VARR_DESTROY (uint8_t, code);
|
|
return res;
|
|
}
|