Dibyendu Majumdar 4 years ago
commit eb3d9ca72d

@ -0,0 +1,15 @@
# Run this on LLVM 10 source dir
mkdir build
cd build
cmake3 -DCMAKE_INSTALL_PREFIX=$HOME/Software/llvm10 \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DLLVM_BUILD_TOOLS=OFF \
-DLLVM_INCLUDE_TOOLS=OFF \
-DLLVM_BUILD_EXAMPLES=OFF \
-DLLVM_INCLUDE_EXAMPLES=OFF \
-DLLVM_BUILD_TESTS=OFF \
-DLLVM_INCLUDE_TESTS=OFF \
..
cmake3 --build . --config Release --target install

@ -3,4 +3,5 @@ mkdir buildllvm
cd buildllvm
#cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM/share/llvm/cmake ..
#cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM5/lib/cmake/llvm ..
cmake -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm801/lib/cmake/llvm ..
#cmake -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm801/lib/cmake/llvm ..
cmake3 -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm10/lib/cmake/llvm ..

@ -31,6 +31,8 @@
#include "aarch64/caarch64.h"
#elif defined(__PPC64__)
#include "ppc64/cppc64.h"
#elif defined(__s390x__)
#include "s390x/cs390x.h"
#else
#error "undefined or unsupported generation target for C"
#endif
@ -317,6 +319,8 @@ static mir_size_t raw_type_size (c2m_ctx_t c2m_ctx, struct type *type) {
#include "aarch64/caarch64-code.c"
#elif defined(__PPC64__)
#include "ppc64/cppc64-code.c"
#elif defined(__s390x__)
#include "s390x/cs390x-code.c"
#else
#error "undefined or unsupported generation target for C"
#endif
@ -10237,7 +10241,8 @@ static void gen_memcpy (MIR_context_t ctx, MIR_disp_t disp, MIR_reg_t base, op_t
MIR_op_t treg_op, args[6];
MIR_module_t module;
if (val.mir_op.u.mem.index == 0 && val.mir_op.u.mem.disp == disp && val.mir_op.u.mem.base == base)
if (val.mir_op.mode == MIR_OP_MEM && val.mir_op.u.mem.index == 0 && val.mir_op.u.mem.disp == disp
&& val.mir_op.u.mem.base == base)
return;
if (memcpy_item == NULL) {
ret_type = get_int_mir_type (sizeof (mir_size_t));
@ -11435,6 +11440,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
signed_p ? MIR_new_int_op (ctx, e->u.i_val) : MIR_new_uint_op (ctx, e->u.u_val));
emit3 (ctx, short_p ? MIR_UBGTS : MIR_UBGT, MIR_new_label_op (ctx, label), index.mir_op,
MIR_new_uint_op (ctx, range));
if (short_p) emit2 (ctx, MIR_UEXT32, index.mir_op, index.mir_op);
VARR_TRUNC (case_t, switch_cases, 0);
for (c = DLIST_HEAD (case_t, switch_attr->case_labels);
c != NULL && c->case_node->code != N_DEFAULT; c = DLIST_NEXT (case_t, c))
@ -12179,6 +12185,14 @@ static void init_include_dirs (MIR_context_t ctx) {
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/x86_64-linux-gnu");
#elif defined(__linux__) && defined(__aarch64__)
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/aarch64-linux-gnu");
#elif defined(__linux__) && defined(__PPC64__)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/powerpc64le-linux-gnu");
#else
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/powerpc64-linux-gnu");
#endif
#elif defined(__linux__) && defined(__s390x__)
VARR_PUSH (char_ptr_t, system_headers, "/usr/include/s390x-linux-gnu");
#endif
#if defined(__APPLE__) || defined(__unix__)
VARR_PUSH (char_ptr_t, system_headers, "/usr/include");

@ -7,6 +7,13 @@ static char ppc64_mirc[]
"#define _ARCH_PPC64 1\n"
"#define _LP64 1\n"
"#define __LP64__ 1\n"
"#define __powerpc64__ 1\n"
"#define __powerpc__ 1\n"
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
"#define _CALL_ELF 1\n"
#else
"#define _CALL_ELF 2\n"
#endif
"\n"
"#define __LONG_DOUBLE_128__ 1\n" // ???
"#define __SIZEOF_DOUBLE__ 8\n"
@ -20,10 +27,15 @@ static char ppc64_mirc[]
"#define __SIZEOF_SHORT__ 2\n"
"#define __SIZEOF_SIZE_T__ 8\n"
"\n"
"#define _BIG_ENDIAN 1\n" // ??? Implement LE too
"#define __ORDER_LITTLE_ENDIAN__ 1234\n"
"#define __ORDER_BIG_ENDIAN__ 4321\n"
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
"#define _BIG_ENDIAN 1\n"
"#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__\n"
#else
"#define _LITTLE_ENDIAN 1\n"
"#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__\n"
#endif
"\n"
"/* Some GCC predefined macros: */\n"
"#define __SIZE_TYPE__ unsigned long\n"

@ -0,0 +1,23 @@
/* This file is a part of MIR project.
Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/
#include "../mirc.h"
#include "mirc-s390x-linux.h"
static const char *standard_includes[] = {mirc, s390x_mirc};
static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/s390x/"};
#define MAX_ALIGNMENT 16
#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) \
s390x_adjust_var_alignment (c2m_ctx, align, type)
static int s390x_adjust_var_alignment (c2m_ctx_t c2m_ctx, int align, struct type *type) {
return align;
}
static int invalid_alignment (mir_llong align) {
return align != 0 && align != 1 && align != 2 && align != 4 && align != 8 && align != 16;
}

@ -0,0 +1,50 @@
/* This file is a part of MIR project.
Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/
#include <stdint.h>
#define MIR_CHAR_BIT 8
typedef int8_t mir_schar;
typedef int16_t mir_short;
typedef int32_t mir_int;
typedef int64_t mir_long;
typedef int64_t mir_llong;
#define MIR_SCHAR_MIN INT8_MIN
#define MIR_SCHAR_MAX INT8_MAX
#define MIR_SHORT_MIN INT16_MIN
#define MIR_SHORT_MAX INT16_MAX
#define MIR_INT_MIN INT32_MIN
#define MIR_INT_MAX INT32_MAX
#define MIR_LONG_MIN INT64_MIN
#define MIR_LONG_MAX INT64_MAX
#define MIR_LLONG_MIN INT64_MIN
#define MIR_LLONG_MAX INT64_MAX
typedef uint8_t mir_uchar;
typedef uint16_t mir_ushort;
typedef uint32_t mir_uint;
typedef uint64_t mir_ulong;
typedef uint64_t mir_ullong;
#define MIR_UCHAR_MAX UINT8_MAX
#define MIR_USHORT_MAX UINT16_MAX
#define MIR_UINT_MAX UINT32_MAX
#define MIR_ULONG_MAX UINT64_MAX
#define MIR_ULLONG_MAX UINT64_MAX
typedef mir_schar mir_char;
#define MIR_CHAR_MIN MIR_SCHAR_MIN
#define MIR_CHAR_MAX MIR_SCHAR_MAX
typedef float mir_float;
typedef double mir_double;
typedef long double mir_ldouble;
typedef uint8_t mir_bool;
typedef int64_t mir_ptrdiff_t;
typedef uint64_t mir_size_t;
#define MIR_SIZE_MAX UINT64_MAX

@ -0,0 +1,102 @@
/* This file is a part of MIR project.
Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/
static char s390x_mirc[]
= "#define __zarch__ 1\n"
"#define _LP64 1\n"
"#define __LP64__ 1\n"
"#define __s390__ 1\n"
"#define __s390x__ 1\n"
"#define __ELF__ 1\n"
"\n"
#if __SIZEOF_LONG_DOUBLE__ == 16
"#define __LONG_DOUBLE_128__ 1\n" // ???
"#define __SIZEOF_LONG_DOUBLE__ 16\n"
#else
"#define __SIZEOF_LONG_DOUBLE__ 8\n"
#endif
"#define __SIZEOF_FLOAT__ 4\n"
"#define __SIZEOF_INT__ 4\n"
"#define __SIZEOF_LONG_DOUBLE__ 16\n"
"#define __SIZEOF_LONG_LONG__ 8\n"
"#define __SIZEOF_LONG__ 8\n"
"#define __SIZEOF_POINTER__ 8\n"
"#define __SIZEOF_PTRDIFF_T__ 8\n"
"#define __SIZEOF_SHORT__ 2\n"
"#define __SIZEOF_SIZE_T__ 8\n"
"\n"
"#define __ORDER_LITTLE_ENDIAN__ 1234\n"
"#define __ORDER_BIG_ENDIAN__ 4321\n"
"#define _BIG_ENDIAN 1\n"
"#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__\n"
"\n"
"/* Some GCC predefined macros: */\n"
"#define __SIZE_TYPE__ unsigned long\n"
"#define __PTRDIFF_TYPE__ long\n"
"#define __INTMAX_TYPE__ long\n"
"#define __UINTMAX_TYPE__ unsigned long\n"
"#define __INT8_TYPE__ signed char\n"
"#define __INT16_TYPE__ short\n"
"#define __INT32_TYPE__ int\n"
"#define __INT64_TYPE__ long\n"
"#define __UINT8_TYPE__ unsigned char\n"
"#define __UINT16_TYPE__ unsigned short\n"
"#define __UINT32_TYPE__ unsigned int\n"
"#define __UINT64_TYPE__ unsigned long\n"
"#define __INTPTR_TYPE__ long\n"
"#define __UINTPTR_TYPE__ unsigned long\n"
"\n"
"#define __CHAR_BIT__ 8\n"
"#define __INT8_MAX__ 127\n"
"#define __INT16_MAX__ 32767\n"
"#define __INT32_MAX__ 2147483647\n"
"#define __INT64_MAX__ 9223372036854775807l\n"
"#define __UINT8_MAX__ (__INT8_MAX__ * 2u + 1u)\n"
"#define __UINT16_MAX__ (__INT16_MAX__ * 2u + 1u)\n"
"#define __UINT32_MAX__ (__INT32_MAX__ * 2u + 1u)\n"
"#define __UINT64_MAX__ (__INT64_MAX__ * 2u + 1u)\n"
"#define __SCHAR_MAX__ __INT8_MAX__\n"
"#define __SHRT_MAX__ __INT16_MAX__\n"
"#define __INT_MAX__ __INT32_MAX__\n"
"#define __LONG_MAX__ __INT64_MAX__\n"
"#define __LONG_LONG_MAX__ __INT64_MAX__\n"
"#define __SIZE_MAX__ __UINT64_MAX__\n"
"#define __PTRDIFF_MAX__ __INT64_MAX__\n"
"#define __INTMAX_MAX__ __INT64_MAX__\n"
"#define __UINTMAX_MAX__ __UINT64_MAX__\n"
"#define __INTPTR_MAX__ __INT64_MAX__\n"
"#define __UINTPTR_MAX__ __UINT64_MAX__\n"
"\n"
"#define __FLT_MIN_EXP__ (-125)\n"
"#define __FLT_MAX_EXP__ 128\n"
"#define __FLT_DIG__ 6\n"
"#define __FLT_DECIMAL_DIG__ 9\n"
"#define __FLT_MANT_DIG__ 24\n"
"#define __FLT_MIN__ 1.17549435082228750796873653722224568e-38F\n"
"#define __FLT_MAX__ 3.40282346638528859811704183484516925e+38F\n"
"#define __FLT_EPSILON__ 1.19209289550781250000000000000000000e-7F\n"
"\n"
"#define __DBL_MIN_EXP__ (-1021)\n"
"#define __DBL_MAX_EXP__ 1024\n"
"#define __DBL_DIG__ 15\n"
"#define __DBL_DECIMAL_DIG__ 17\n"
"#define __DBL_MANT_DIG__ 53\n"
"#define __DBL_MAX__ ((double) 1.79769313486231570814527423731704357e+308L)\n"
"#define __DBL_MIN__ ((double) 2.22507385850720138309023271733240406e-308L)\n"
"#define __DBL_EPSILON__ ((double) 2.22044604925031308084726333618164062e-16L)\n"
"\n"
"typedef unsigned short char16_t;\n"
"typedef unsigned int char32_t;\n"
"\n"
#if defined(__linux__)
"#define __gnu_linux__ 1\n"
"#define __linux 1\n"
"#define __linux__ 1\n"
"#define linux 1\n"
"#define unix 1\n"
"#define __unix 1\n"
"#define __unix__ 1\n"
#endif
"\n"
"void *alloca (unsigned long);\n";

@ -33,6 +33,8 @@ static int target_locs_num (MIR_reg_t loc, MIR_type_t type) {
return loc > MAX_HARD_REG && type == MIR_T_LD ? 2 : 1;
}
static inline MIR_reg_t target_nth_loc (MIR_reg_t loc, MIR_type_t type, int n) { return loc + n; }
/* Hard regs not used in machinized code, preferably call used ones. */
const MIR_reg_t TEMP_INT_HARD_REG1 = R9_HARD_REG, TEMP_INT_HARD_REG2 = R10_HARD_REG;
const MIR_reg_t TEMP_FLOAT_HARD_REG1 = V16_HARD_REG, TEMP_FLOAT_HARD_REG2 = V17_HARD_REG;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -22,6 +22,8 @@ static int target_locs_num (MIR_reg_t loc, MIR_type_t type) {
return loc > MAX_HARD_REG && type == MIR_T_LD ? 2 : 1;
}
static inline MIR_reg_t target_nth_loc (MIR_reg_t loc, MIR_type_t type, int n) { return loc + n; }
/* Hard regs not used in machinized code, preferably call used ones. */
const MIR_reg_t TEMP_INT_HARD_REG1 = R10_HARD_REG, TEMP_INT_HARD_REG2 = R11_HARD_REG;
const MIR_reg_t TEMP_FLOAT_HARD_REG1 = XMM8_HARD_REG, TEMP_FLOAT_HARD_REG2 = XMM9_HARD_REG;
@ -72,12 +74,21 @@ static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
|---------------|
| slots for | dynamically allocated/deallocated by caller
| passing args |
|---------------|
| spill space | WIN64 only, 32 bytes spill space for register args
|---------------|
size of slots and saved regs is multiple of 16 bytes
*/
#ifndef _WIN64
static const int reg_save_area_size = 176;
static const int spill_space_size = 0;
#else
static const int reg_save_area_size = 0;
static const int spill_space_size = 32;
#endif
static MIR_disp_t target_get_stack_slot_offset (MIR_context_t ctx, MIR_type_t type,
MIR_reg_t slot) {
@ -156,14 +167,14 @@ static MIR_reg_t get_arg_reg (MIR_type_t arg_type, size_t *int_arg_num, size_t *
arg_reg = MIR_NON_HARD_REG;
*mov_code = MIR_LDMOV;
} else if (arg_type == MIR_T_F || arg_type == MIR_T_D) {
arg_reg = get_fp_arg_reg(*fp_arg_num);
arg_reg = get_fp_arg_reg (*fp_arg_num);
(*fp_arg_num)++;
#ifdef _WIN64
(*int_arg_num)++; /* arg slot used by fp, skip int register */
#endif
*mov_code = arg_type == MIR_T_F ? MIR_FMOV : MIR_DMOV;
} else {
arg_reg = get_int_arg_reg(*int_arg_num);
arg_reg = get_int_arg_reg (*int_arg_num);
#ifdef _WIN64
(*fp_arg_num)++; /* arg slot used by int, skip fp register */
#endif
@ -178,7 +189,7 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
MIR_func_t func = curr_func_item->u.func;
MIR_proto_t proto = call_insn->ops[0].u.ref->u.proto;
size_t nargs, nops = MIR_insn_nops (ctx, call_insn), start = proto->nres + 2;
size_t int_arg_num = 0, fp_arg_num = 0, mem_size = 0, xmm_args = 0;
size_t int_arg_num = 0, fp_arg_num = 0, xmm_args = 0, mem_size = spill_space_size;
MIR_type_t type, mem_type;
MIR_op_mode_t mode;
MIR_var_t *arg_vars = NULL;
@ -198,9 +209,6 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
nargs = VARR_LENGTH (MIR_var_t, proto->args);
arg_vars = VARR_ADDR (MIR_var_t, proto->args);
}
#ifdef _WIN64
if (nargs > 4 || proto->vararg_p) mem_size = 32; /* spill space for register args */
#endif
if (call_insn->ops[1].mode != MIR_OP_REG && call_insn->ops[1].mode != MIR_OP_HARD_REG) {
temp_op = MIR_new_reg_op (ctx, gen_new_temp_reg (ctx, MIR_T_I64, func));
new_insn = MIR_new_insn (ctx, MIR_MOV, temp_op, call_insn->ops[1]);
@ -275,6 +283,10 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
MIR_new_int_op (ctx, xmm_args));
gen_add_insn_before (ctx, call_insn, new_insn);
}
#else
if (proto->nres > 1)
(*MIR_get_error_func (ctx)) (MIR_ret_error,
"Windows x86-64 doesn't support multiple return values");
#endif
n_iregs = n_xregs = n_fregs = 0;
for (size_t i = 0; i < proto->nres; i++) {
@ -458,13 +470,13 @@ static void target_machinize (MIR_context_t ctx) {
MIR_insn_t insn, next_insn, new_insn;
MIR_reg_t ret_reg, arg_reg;
MIR_op_t ret_reg_op, arg_reg_op, mem_op;
size_t i, int_arg_num, fp_arg_num, mem_size;
size_t i, int_arg_num = 0, fp_arg_num = 0, mem_size = spill_space_size;
assert (curr_func_item->item_type == MIR_func_item);
func = curr_func_item->u.func;
stack_arg_func_p = FALSE;
start_sp_from_bp_offset = 8;
for (i = int_arg_num = fp_arg_num = mem_size = 0; i < func->nargs; i++) {
for (i = 0; i < func->nargs; i++) {
/* Argument extensions is already done in simplify */
/* Prologue: generate arg_var = hard_reg|stack mem ... */
type = VARR_GET (MIR_var_t, func->vars, i).type;
@ -520,6 +532,7 @@ static void target_machinize (MIR_context_t ctx) {
= MIR_new_reg_op (ctx, gen_new_temp_reg (ctx, MIR_T_I64, curr_func_item->u.func));
MIR_op_t va_op = insn->ops[0];
MIR_reg_t va_reg;
#ifndef _WIN64
int gp_offset = 0, fp_offset = 48;
MIR_var_t var;
@ -549,10 +562,30 @@ static void target_machinize (MIR_context_t ctx) {
MIR_new_int_op (ctx, -reg_save_area_size));
gen_add_insn_before (ctx, insn, new_insn);
gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 16, va_reg, 0, 1), treg_op);
#else
stack_arg_func_p = TRUE;
/* spill reg args */
mem_size = 8 /*ret*/ + start_sp_from_bp_offset;
for (int i = 0; i < 4; i++) {
arg_reg = get_int_arg_reg (i);
mem_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, mem_size, FP_HARD_REG, MIR_NON_HARD_REG, 1);
new_insn = MIR_new_insn (ctx, MIR_MOV, mem_op, _MIR_new_hard_reg_op (ctx, arg_reg));
gen_add_insn_before (ctx, insn, new_insn);
mem_size += 8;
}
/* init va_list */
mem_size = 8 /*ret*/ + start_sp_from_bp_offset + func->nargs * 8;
new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, _MIR_new_hard_reg_op (ctx, FP_HARD_REG),
MIR_new_int_op (ctx, mem_size));
gen_add_insn_before (ctx, insn, new_insn);
va_reg = va_op.mode == MIR_OP_REG ? va_op.u.reg : va_op.u.hard_reg;
gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 0, va_reg, 0, 1), treg_op);
#endif
gen_delete_insn (ctx, insn);
} else if (code == MIR_VA_END) { /* do nothing */
gen_delete_insn (ctx, insn);
} else if (code == MIR_VA_ARG) { /* do nothing */
#ifndef _WIN64
/* Use a builtin func call:
mov func_reg, func ref; mov flag_reg, <0|1>; call proto, func_reg, res_reg, va_reg,
flag_reg */
@ -578,6 +611,19 @@ static void target_machinize (MIR_context_t ctx) {
ops[4] = flag_reg_op;
new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
gen_add_insn_before (ctx, insn, new_insn);
#else
MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], mem_op = insn->ops[2], treg_op;
assert (res_reg_op.mode == MIR_OP_REG && va_reg_op.mode == MIR_OP_REG
&& mem_op.mode == MIR_OP_MEM);
/* load and increment va pointer */
treg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (ctx, MIR_T_I64, curr_func_item->u.func));
gen_mov (ctx, insn, MIR_MOV, treg_op, MIR_new_mem_op (ctx, MIR_T_I64, 0, va_reg_op.u.reg, 0, 1));
new_insn = MIR_new_insn (ctx, MIR_MOV, res_reg_op, treg_op);
gen_add_insn_before (ctx, insn, new_insn);
new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, treg_op, MIR_new_int_op (ctx, 8));
gen_add_insn_before (ctx, insn, new_insn);
gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 0, va_reg_op.u.reg, 0, 1), treg_op);
#endif
gen_delete_insn (ctx, insn);
} else if (MIR_call_code_p (code)) {
machinize_call (ctx, insn);
@ -589,6 +635,11 @@ static void target_machinize (MIR_context_t ctx) {
and added extension in return (if any). */
uint32_t n_iregs = 0, n_xregs = 0, n_fregs = 0;
#ifdef _WIN64
if (curr_func_item->u.func->nres > 1)
(*MIR_get_error_func (ctx)) (MIR_ret_error,
"Windows x86-64 doesn't support multiple return values");
#endif
assert (curr_func_item->u.func->nres == MIR_insn_nops (ctx, insn));
for (size_t i = 0; i < curr_func_item->u.func->nres; i++) {
assert (insn->ops[i].mode == MIR_OP_REG);
@ -644,7 +695,7 @@ static void target_machinize (MIR_context_t ctx) {
|| code == MIR_LE || code == MIR_ULE || code == MIR_GT || code == MIR_UGT
|| code == MIR_GE || code == MIR_UGE || code == MIR_EQS || code == MIR_NES
|| code == MIR_LTS || code == MIR_ULTS || code == MIR_LES || code == MIR_ULES
|| code == MIR_GTS || code == MIR_UGT || code == MIR_GES || code == MIR_UGES
|| code == MIR_GTS || code == MIR_UGTS || code == MIR_GES || code == MIR_UGES
|| code == MIR_FEQ || code == MIR_FNE || code == MIR_FLT || code == MIR_FLE
|| code == MIR_FGT || code == MIR_FGE || code == MIR_DEQ || code == MIR_DNE
|| code == MIR_DLT || code == MIR_DLE || code == MIR_DGT || code == MIR_DGE) {
@ -683,7 +734,8 @@ static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_reg
func = curr_func_item->u.func;
for (i = saved_hard_regs_num = 0; i <= MAX_HARD_REG; i++)
if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) saved_hard_regs_num++;
if (leaf_p && !alloca_p && saved_hard_regs_num == 0 && !func->vararg_p && stack_slots_num == 0)
if (leaf_p && !alloca_p && !stack_arg_func_p && saved_hard_regs_num == 0 && !func->vararg_p
&& stack_slots_num == 0)
return;
sp_reg_op.mode = fp_reg_op.mode = MIR_OP_HARD_REG;
sp_reg_op.u.hard_reg = SP_HARD_REG;
@ -702,6 +754,7 @@ static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_reg
service_area_size = 8;
} else {
service_area_size = reg_save_area_size + 8;
#ifndef _WIN64
start = -(int64_t) service_area_size;
isave (ctx, anchor, start, DI_HARD_REG);
isave (ctx, anchor, start + 8, SI_HARD_REG);
@ -717,6 +770,7 @@ static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_reg
dsave (ctx, anchor, start + 128, XMM5_HARD_REG);
dsave (ctx, anchor, start + 144, XMM6_HARD_REG);
dsave (ctx, anchor, start + 160, XMM7_HARD_REG);
#endif
}
stack_slots_size = stack_slots_num * 8;
/* stack slots, and saved regs as multiple of 16 bytes: */

@ -172,12 +172,14 @@ static inline struct gen_ctx **gen_ctx_loc (MIR_context_t ctx) { return (struct
#define max_int_hard_regs gen_ctx->max_int_hard_regs
#define max_fp_hard_regs gen_ctx->max_fp_hard_regs
#ifdef __x86_64__
#if defined(__x86_64__)
#include "mir-gen-x86_64.c"
#elif defined(__aarch64__)
#include "mir-gen-aarch64.c"
#elif defined(__PPC64__)
#include "mir-gen-ppc64.c"
#elif defined(__s390x__)
#include "mir-gen-s390x.c"
#else
#error "undefined or unsupported generation target"
#endif
@ -4302,7 +4304,7 @@ struct ra_ctx {
size_t curr_age;
/* Slots num for variables. Some variable can take several slots. */
size_t func_stack_slots_num;
bitmap_t func_assigned_hard_regs;
bitmap_t func_used_hard_regs;
};
#define breg_renumber gen_ctx->ra_ctx->breg_renumber
@ -4314,7 +4316,7 @@ struct ra_ctx {
#define loc_profit_ages gen_ctx->ra_ctx->loc_profit_ages
#define curr_age gen_ctx->ra_ctx->curr_age
#define func_stack_slots_num gen_ctx->ra_ctx->func_stack_slots_num
#define func_assigned_hard_regs gen_ctx->ra_ctx->func_assigned_hard_regs
#define func_used_hard_regs gen_ctx->ra_ctx->func_used_hard_regs
static void process_move_to_form_thread (MIR_context_t ctx, mv_t mv) {
struct gen_ctx *gen_ctx = *gen_ctx_loc (ctx);
@ -4385,9 +4387,19 @@ static void setup_loc_profits (MIR_context_t ctx, MIR_reg_t breg) {
setup_loc_profit_from_op (ctx, mv->bb_insn->insn->ops[1], mv->freq);
}
static void setup_used_hard_regs (MIR_context_t ctx, MIR_type_t type, MIR_reg_t hard_reg) {
struct gen_ctx *gen_ctx = *gen_ctx_loc (ctx);
MIR_reg_t curr_hard_reg;
int i, slots_num = target_locs_num (hard_reg, type);
for (i = 0; i < slots_num; i++)
if ((curr_hard_reg = target_nth_loc (hard_reg, type, i)) <= MAX_HARD_REG)
bitmap_set_bit_p (func_used_hard_regs, curr_hard_reg);
}
static void assign (MIR_context_t ctx) {
struct gen_ctx *gen_ctx = *gen_ctx_loc (ctx);
MIR_reg_t loc, best_loc, i, reg, breg, var, nregs = get_nregs (ctx);
MIR_reg_t loc, curr_loc, best_loc, i, reg, breg, var, nregs = get_nregs (ctx);
MIR_type_t type;
int slots_num;
int j, k;
@ -4437,7 +4449,7 @@ static void assign (MIR_context_t ctx) {
for (lr = VARR_GET (live_range_t, var_live_ranges, i); lr != NULL; lr = lr->next)
for (j = lr->start; j <= lr->finish; j++) bitmap_set_bit_p (point_used_locs_addr[j], i);
}
bitmap_clear (func_assigned_hard_regs);
bitmap_clear (func_used_hard_regs);
for (i = 0; i < nregs; i++) { /* hard reg and stack slot assignment */
breg = VARR_GET (breg_info_t, sorted_bregs, i).breg;
if (VARR_GET (MIR_reg_t, breg_renumber, breg) != MIR_NON_HARD_REG) continue;
@ -4455,14 +4467,18 @@ static void assign (MIR_context_t ctx) {
for (loc = 0; loc <= func_stack_slots_num + MAX_HARD_REG; loc++) {
if (loc <= MAX_HARD_REG && !target_hard_reg_type_ok_p (loc, type)) continue;
slots_num = target_locs_num (loc, type);
for (k = 0; k < slots_num; k++)
if ((loc + k <= MAX_HARD_REG
&& (target_fixed_hard_reg_p (loc + k)
|| (target_call_used_hard_reg_p (loc + k) && curr_breg_infos[breg].calls_num > 0)))
|| bitmap_bit_p (conflict_locs, loc + k))
if (target_nth_loc (loc, type, slots_num - 1) > func_stack_slots_num + MAX_HARD_REG) break;
for (k = 0; k < slots_num; k++) {
curr_loc = target_nth_loc (loc, type, k);
if ((curr_loc <= MAX_HARD_REG
&& (target_fixed_hard_reg_p (curr_loc)
|| (target_call_used_hard_reg_p (curr_loc)
&& curr_breg_infos[breg].calls_num > 0)))
|| bitmap_bit_p (conflict_locs, curr_loc))
break;
}
if (k < slots_num) continue;
if (loc > MAX_HARD_REG && loc % slots_num != 0)
if (loc > MAX_HARD_REG && (loc - MAX_HARD_REG - 1) % slots_num != 0)
continue; /* we align stack slots according to the type size */
profit = (VARR_GET (size_t, loc_profit_ages, loc) != curr_age
? 0
@ -4475,15 +4491,15 @@ static void assign (MIR_context_t ctx) {
}
slots_num = target_locs_num (best_loc, type);
if (best_loc <= MAX_HARD_REG) {
for (k = 0; k < slots_num; k++) bitmap_set_bit_p (func_assigned_hard_regs, best_loc + k);
setup_used_hard_regs (ctx, type, best_loc);
} else if (best_loc == MIR_NON_HARD_REG) { /* Add stack slot ??? */
for (k = 0; k < slots_num; k++) {
best_loc = VARR_LENGTH (size_t, loc_profits);
if (k == 0) best_loc = VARR_LENGTH (size_t, loc_profits);
VARR_PUSH (size_t, loc_profits, 0);
VARR_PUSH (size_t, loc_profit_ages, 0);
if (k == 0 && (best_loc - MAX_HARD_REG - 1) % slots_num != 0) k--; /* align */
}
func_stack_slots_num = best_loc - MAX_HARD_REG;
best_loc -= slots_num - 1;
func_stack_slots_num = VARR_LENGTH (size_t, loc_profits) - MAX_HARD_REG - 1;
}
#if !MIR_NO_GEN_DEBUG
if (debug_file != NULL) {
@ -4498,9 +4514,11 @@ static void assign (MIR_context_t ctx) {
}
#endif
VARR_SET (MIR_reg_t, breg_renumber, breg, best_loc);
slots_num = target_locs_num (best_loc, type);
for (lr = VARR_GET (live_range_t, var_live_ranges, var); lr != NULL; lr = lr->next)
for (j = lr->start; j <= lr->finish; j++)
for (k = 0; k < slots_num; k++) bitmap_set_bit_p (point_used_locs_addr[j], best_loc + k);
for (k = 0; k < slots_num; k++)
bitmap_set_bit_p (point_used_locs_addr[j], target_nth_loc (best_loc, type, k));
}
for (i = 0; i <= curr_point; i++) bitmap_destroy (VARR_POP (bitmap_t, point_used_locs));
#if !MIR_NO_GEN_DEBUG
@ -4550,6 +4568,7 @@ static MIR_reg_t change_reg (MIR_context_t ctx, MIR_op_t *mem_op, MIR_reg_t reg,
code = MIR_LDMOV;
hard_reg = first_p ? TEMP_LDOUBLE_HARD_REG1 : TEMP_LDOUBLE_HARD_REG2;
}
setup_used_hard_regs (ctx, type, hard_reg);
offset = target_get_stack_slot_offset (ctx, type, loc - MAX_HARD_REG - 1);
*mem_op = _MIR_new_hard_reg_mem_op (ctx, type, offset, FP_HARD_REG, MIR_NON_HARD_REG, 0);
if (hard_reg == MIR_NON_HARD_REG) return hard_reg;
@ -4601,6 +4620,13 @@ static void rewrite (MIR_context_t ctx) {
in_op = *op;
#endif
switch (op->mode) {
case MIR_OP_HARD_REG: bitmap_set_bit_p (func_used_hard_regs, op->u.hard_reg); break;
case MIR_OP_HARD_REG_MEM:
if (op->u.hard_reg_mem.base != MIR_NON_HARD_REG)
bitmap_set_bit_p (func_used_hard_regs, op->u.hard_reg_mem.base);
if (op->u.hard_reg_mem.index != MIR_NON_HARD_REG)
bitmap_set_bit_p (func_used_hard_regs, op->u.hard_reg_mem.index);
break;
case MIR_OP_REG:
hard_reg
= change_reg (ctx, &mem_op, op->u.reg, data_mode, out_p || first_in_p, bb_insn, out_p);
@ -4672,7 +4698,7 @@ static void init_ra (MIR_context_t ctx) {
VARR_CREATE (size_t, loc_profits, 0);
VARR_CREATE (size_t, loc_profit_ages, 0);
conflict_locs = bitmap_create2 (3 * MAX_HARD_REG / 2);
func_assigned_hard_regs = bitmap_create2 (MAX_HARD_REG + 1);
func_used_hard_regs = bitmap_create2 (MAX_HARD_REG + 1);
}
static void finish_ra (MIR_context_t ctx) {
@ -4684,7 +4710,7 @@ static void finish_ra (MIR_context_t ctx) {
VARR_DESTROY (size_t, loc_profits);
VARR_DESTROY (size_t, loc_profit_ages);
bitmap_destroy (conflict_locs);
bitmap_destroy (func_assigned_hard_regs);
bitmap_destroy (func_used_hard_regs);
free (gen_ctx->ra_ctx);
gen_ctx->ra_ctx = NULL;
}
@ -5259,6 +5285,7 @@ static void combine (MIR_context_t ctx) {
}
}
if (change_p) block_change_p = TRUE;
if (code == MIR_BSTART || code == MIR_BEND) last_mem_ref_insn_num = curr_insn_num;
}
for (iter = 0; iter < 2; iter++) { /* update hreg ref info: */
@ -5634,7 +5661,7 @@ void *MIR_gen (MIR_context_t ctx, MIR_item_t func_item) {
#endif
}
#endif /* #ifndef NO_COMBINE */
target_make_prolog_epilog (ctx, func_assigned_hard_regs, func_stack_slots_num);
target_make_prolog_epilog (ctx, func_used_hard_regs, func_stack_slots_num);
#if !MIR_NO_GEN_DEBUG
if (debug_file != NULL) {
fprintf (debug_file, "+++++++++++++MIR after forming prolog/epilog:\n");

@ -5,25 +5,38 @@
// _MIR_get_thunk, _MIR_redirect_thunk, _MIR_get_interp_shim, _MIR_get_ff_call, _MIR_get_wrapper
#define VA_LIST_IS_ARRAY_P 1 /* one element which is a pointer to args */
#define FUNC_DESC_LEN 24
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define PPC64_STACK_HEADER_SIZE 32
#define PPC64_TOC_OFFSET 24
#define PPC64_FUNC_DESC_LEN 0
#else
#define PPC64_STACK_HEADER_SIZE 48
#define PPC64_TOC_OFFSET 40
#define PPC64_FUNC_DESC_LEN 24
#endif
static void ppc64_push_func_desc (MIR_context_t ctx);
void (*ppc64_func_desc) (MIR_context_t ctx) = ppc64_push_func_desc;
static void ppc64_push_func_desc (MIR_context_t ctx) {
VARR_TRUNC (uint8_t, machine_insns, 0);
for (int i = 0; i < FUNC_DESC_LEN; i++)
for (int i = 0; i < PPC64_FUNC_DESC_LEN; i++)
VARR_PUSH (uint8_t, machine_insns, ((uint8_t *) ppc64_func_desc)[i]);
}
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
static void ppc64_redirect_func_desc (MIR_context_t ctx, void *desc, void *to) {
mir_assert (((uint64_t) desc & 0x3) == 0 && ((uint64_t) to & 0x3) == 0); /* alignment */
_MIR_change_code (ctx, desc, (uint8_t *) &to, sizeof (to));
}
#endif
static void *ppc64_publish_func_and_redirect (MIR_context_t ctx) {
void *res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
ppc64_redirect_func_desc (ctx, res, (uint8_t *) res + FUNC_DESC_LEN);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ppc64_redirect_func_desc (ctx, res, (uint8_t *) res + PPC64_FUNC_DESC_LEN);
#endif
return res;
}
@ -37,70 +50,6 @@ static void push_insns (MIR_context_t ctx, const uint32_t *pat, size_t pat_len)
for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
}
void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
static const uint32_t bstart_code[] = {
0x7c230b78, /* mr 3,1 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
push_insns (ctx, bstart_code, sizeof (bstart_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
static const uint32_t bend_code[] = {
0xe8010000, /* ld r0,0(r1) */
0xf8030000, /* std r0,0(r3) */
0xe8010028, /* ld r0,40(r1) */
0xf8030028, /* std r0,40(r3) */
0x7c611b78, /* mr r1,r3 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
push_insns (ctx, bend_code, sizeof (bend_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_thunk (MIR_context_t ctx) { /* emit 3 doublewords for func descriptor: */
ppc64_push_func_desc (ctx);
return ppc64_publish_func_and_redirect (ctx);
}
void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
ppc64_redirect_func_desc (ctx, thunk, to);
}
struct ppc64_va_list {
uint64_t *arg_area;
};
void *va_arg_builtin (void *p, uint64_t t) {
struct ppc64_va_list *va = p;
MIR_type_t type = t;
int fp_p = type == MIR_T_F || type == MIR_T_D;
void *a = va->arg_area;
if (type == MIR_T_F || type == MIR_T_I32) {
a = (char *) a + 4; /* 2nd word of doubleword */
va->arg_area = (uint64_t *) ((char *) a + 4);
} else if (type == MIR_T_LD) {
va->arg_area += 2;
} else {
va->arg_area++;
}
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct ppc64_va_list **va = p;
va_list *vap = a;
assert (sizeof (struct ppc64_va_list) == sizeof (va_list));
*va = (struct ppc64_va_list *) vap;
}
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
static void ppc64_gen_mov (MIR_context_t ctx, unsigned to, unsigned from) {
/* or to,from,from: */
push_insn (ctx, (31 << 26) | (444 << 1) | (from << 21) | (to << 16) | (from << 11));
@ -155,12 +104,103 @@ static void ppc64_gen_address (MIR_context_t ctx, unsigned int reg, void *p) {
}
static void ppc64_gen_jump (MIR_context_t ctx, unsigned int reg, int call_p) {
ppc64_gen_ld (ctx, 0, reg, 0, MIR_T_I64); /* 0 = func addr */
ppc64_gen_ld (ctx, 2, reg, 8, MIR_T_I64); /* r2 = TOC */
push_insn (ctx, (31 << 26) | (467 << 1) | (0 << 21) | (9 << 16)); /* mctr 0 */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
assert (reg != 0);
ppc64_gen_ld (ctx, 0, reg, 0, MIR_T_I64); /* 0 = func addr */
ppc64_gen_ld (ctx, 2, reg, 8, MIR_T_I64); /* r2 = TOC */
push_insn (ctx, (31 << 26) | (467 << 1) | (0 << 21) | (9 << 16)); /* mctr 0 */
#else
if (reg != 12) ppc64_gen_mov (ctx, 12, reg); /* 12 = func addr */
push_insn (ctx, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
#endif
push_insn (ctx, (19 << 26) | (528 << 1) | (20 << 21) | (call_p ? 1 : 0)); /* bcctr[l] */
}
void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
static const uint32_t bstart_code[] = {
0x7c230b78, /* mr 3,1 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
push_insns (ctx, bstart_code, sizeof (bstart_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
static const uint32_t bend_finish_code[] = {
0x7c611b78, /* mr r1,r3 */
0x4e800020, /* blr */
};
ppc64_push_func_desc (ctx);
ppc64_gen_ld (ctx, 0, 1, 0, MIR_T_I64); /* r0 = 0(r1) */
ppc64_gen_st (ctx, 0, 3, 0, MIR_T_I64); /* 0(r3) = r0 */
ppc64_gen_ld (ctx, 0, 1, PPC64_TOC_OFFSET, MIR_T_I64); /* r0 = toc_offset(r1) */
ppc64_gen_st (ctx, 0, 3, PPC64_TOC_OFFSET, MIR_T_I64); /* toc_offset(r3) = r0 */
push_insns (ctx, bend_finish_code, sizeof (bend_finish_code));
return ppc64_publish_func_and_redirect (ctx);
}
void *_MIR_get_thunk (MIR_context_t ctx) { /* emit 3 doublewords for func descriptor: */
ppc64_push_func_desc (ctx);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return ppc64_publish_func_and_redirect (ctx);
#else
const uint32_t nop_insn = 24 << (32 - 6); /* ori 0,0,0 */
const int max_thunk_len = (7 * 8);
VARR_TRUNC (uint8_t, machine_insns, 0);
for (int i = 0; i < max_thunk_len; i++) push_insn (ctx, nop_insn);
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
#endif
}
void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ppc64_redirect_func_desc (ctx, thunk, to);
#else
static const uint32_t global_entry_end[] = {
0x7d8903a6, /* mtctr r12 */
0x4e800420, /* bctr */
};
VARR_TRUNC (uint8_t, machine_insns, 0);
ppc64_gen_address (ctx, 12, to);
push_insns (ctx, global_entry_end, sizeof (global_entry_end));
_MIR_change_code (ctx, thunk, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
#endif
}
struct ppc64_va_list {
uint64_t *arg_area;
};
void *va_arg_builtin (void *p, uint64_t t) {
struct ppc64_va_list *va = p;
MIR_type_t type = t;
int fp_p = type == MIR_T_F || type == MIR_T_D;
void *a = va->arg_area;
if (type == MIR_T_LD) {
va->arg_area += 2;
} else {
va->arg_area++;
}
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
if (type == MIR_T_F || type == MIR_T_I32) a = (char *) a + 4; /* 2nd word of doubleword */
#endif
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct ppc64_va_list **va = p;
va_list *vap = a;
assert (sizeof (struct ppc64_va_list) == sizeof (va_list));
*va = (struct ppc64_va_list *) vap;
}
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
/* Generation: fun (fun_addr, res_arg_addresses):
save lr (r1 + 16); allocate and form minimal stack frame (with necessary param area); save r14;
r12=fun_addr (r3); r14 = res_arg_addresses (r4);
@ -186,18 +226,19 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
ppc64_push_func_desc (ctx);
for (uint32_t i = 0; i < nargs; i++) param_size += arg_types[i] == MIR_T_LD ? 16 : 8;
if (param_size < 64) param_size = 64;
frame_size = 48 + param_size + 8; /* +local var to save res_reg */
if (frame_size % 8 != 0) frame_size += 8; /* align */
ppc64_gen_st (ctx, 2, 1, 40, MIR_T_I64);
frame_size = PPC64_STACK_HEADER_SIZE + param_size + 8; /* +local var to save res_reg */
if (frame_size % 16 != 0) frame_size += 8; /* align */
ppc64_gen_st (ctx, 2, 1, PPC64_TOC_OFFSET, MIR_T_I64);
push_insns (ctx, start_pattern, sizeof (start_pattern));
ppc64_gen_stdu (ctx, -frame_size);
ppc64_gen_st (ctx, res_reg, 1, 48 + param_size, MIR_T_I64); /* save res_reg */
ppc64_gen_st (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
MIR_T_I64); /* save res_reg */
mir_assert (sizeof (long double) == 16);
ppc64_gen_mov (ctx, res_reg, 4); /* results & args */
ppc64_gen_mov (ctx, 12, 3); /* func addr */
n_gpregs = n_fpregs = 0;
param_offset = nres * 16; /* args start */
disp = 48; /* param area start */
disp = PPC64_STACK_HEADER_SIZE; /* param area start */
for (uint32_t i = 0; i < nargs; i++) { /* load args: */
type = arg_types[i];
if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 13) {
@ -205,7 +246,7 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
if (vararg_p) {
if (n_gpregs >= 8) {
ppc64_gen_st (ctx, 1 + n_fpregs, 1, disp, MIR_T_D);
} else { /* load gp reg to */
} else { /* load into gp reg too */
ppc64_gen_st (ctx, 1 + n_fpregs, 1, -8, MIR_T_D);
ppc64_gen_ld (ctx, 3 + n_gpregs, 1, -8, MIR_T_I64);
}
@ -228,8 +269,6 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
}
}
} else if (n_gpregs < 8) {
ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, param_offset, MIR_T_I64);
} else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
ppc64_gen_ld (ctx, 0, res_reg, param_offset, type);
ppc64_gen_st (ctx, 0, 1, disp, MIR_T_D);
@ -237,6 +276,8 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
ppc64_gen_ld (ctx, 0, res_reg, param_offset + 8, type);
ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
}
} else if (n_gpregs < 8) {
ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, param_offset, MIR_T_I64);
} else {
ppc64_gen_ld (ctx, 0, res_reg, param_offset, MIR_T_I64);
ppc64_gen_st (ctx, 0, 1, disp, MIR_T_I64);
@ -267,7 +308,8 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
}
disp += 16;
}
ppc64_gen_ld (ctx, res_reg, 1, 48 + param_size, MIR_T_I64); /* restore res_reg */
ppc64_gen_ld (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
MIR_T_I64); /* restore res_reg */
ppc64_gen_addi (ctx, 1, 1, frame_size);
push_insns (ctx, finish_pattern, sizeof (finish_pattern));
return ppc64_publish_func_and_redirect (ctx);
@ -296,23 +338,14 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
0x7c0803a6, /* mtlr r0 */
0x4e800020, /* blr */
};
static uint32_t save_gp_regs_pattern[] = {
0xf8610030, /* std r3,48(r1) */
0xf8810038, /* std r4,56(r1) */
0xf8a10040, /* std r5,64(r1) */
0xf8c10048, /* std r6,72(r1) */
0xf8e10050, /* std r7,80(r1) */
0xf9010058, /* std r8,88(r1) */
0xf9210060, /* std r9,96(r1) */
0xf9410068, /* std r10,104(r1) */
};
VARR_TRUNC (uint8_t, machine_insns, 0);
frame_size = 112; /* 6(frame start) + 8(param area) */
local_var_size = nres * 16 + 8; /* saved r14, results */
frame_size = PPC64_STACK_HEADER_SIZE + 64; /* header + 8(param area) */
local_var_size = nres * 16 + 8; /* saved r14, results */
if (vararg_p) {
push_insns (ctx, save_gp_regs_pattern, sizeof (save_gp_regs_pattern));
ppc64_gen_addi (ctx, va_reg, 1, 48);
for (unsigned reg = 3; reg <= 10; reg++) /* std rn,dispn(r1) : */
ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8, MIR_T_I64);
ppc64_gen_addi (ctx, va_reg, 1, PPC64_STACK_HEADER_SIZE);
} else {
ppc64_gen_mov (ctx, caller_r1, 1); /* caller frame r1 */
for (uint32_t i = 0; i < nargs; i++) {
@ -321,14 +354,15 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
}
}
frame_size += local_var_size;
if (frame_size % 8 != 0) frame_size += 8; /* align */
if (frame_size % 16 != 0) frame_size += 8; /* align */
push_insns (ctx, start_pattern, sizeof (start_pattern));
ppc64_gen_stdu (ctx, -frame_size);
ppc64_gen_st (ctx, res_reg, 1, 48 + 64, MIR_T_I64); /* save res_reg */
if (!vararg_p) { /* save args in local vars: */
disp = 112 + nres * 16 + 8; /* 48 + 64 + nres * 16 + 8: start of local vars to keep args */
ppc64_gen_st (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* save res_reg */
if (!vararg_p) { /* save args in local vars: */
/* header_size + 64 + nres * 16 + 8 -- start of stack memory to keep args: */
disp = PPC64_STACK_HEADER_SIZE + 64 + nres * 16 + 8;
ppc64_gen_addi (ctx, va_reg, 1, disp);
param_offset = 48;
param_offset = PPC64_STACK_HEADER_SIZE;
n_gpregs = n_fpregs = 0;
for (uint32_t i = 0; i < nargs; i++) {
type = arg_vars[i].type;
@ -363,13 +397,13 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
n_gpregs += type == MIR_T_LD ? 2 : 1;
}
}
ppc64_gen_addi (ctx, res_reg, 1, 64 + 48 + 8);
ppc64_gen_addi (ctx, res_reg, 1, 64 + PPC64_STACK_HEADER_SIZE + 8);
ppc64_gen_address (ctx, 3, ctx);
ppc64_gen_address (ctx, 4, func_item);
ppc64_gen_mov (ctx, 5, va_reg);
ppc64_gen_mov (ctx, 6, res_reg);
ppc64_gen_address (ctx, 7, handler);
ppc64_gen_jump (ctx, 7, TRUE);
ppc64_gen_address (ctx, 12, handler);
ppc64_gen_jump (ctx, 12, TRUE);
disp = n_gpregs = n_fpregs = 0;
for (uint32_t i = 0; i < nres; i++) {
type = res_types[i];
@ -390,78 +424,50 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
}
disp += 16;
}
ppc64_gen_ld (ctx, res_reg, 1, 48 + 64, MIR_T_I64); /* restore res_reg */
ppc64_gen_ld (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* restore res_reg */
ppc64_gen_addi (ctx, 1, 1, frame_size);
push_insns (ctx, finish_pattern, sizeof (finish_pattern));
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
/* Brief: save lr (r1+16); update r1, save all param regs (r1+112);
/* Brief: save lr (r1+16); update r1, save all param regs (r1+header+64);
allocate and form minimal wrapper stack frame (param area = 8*8);
r3 = call hook_address (ctx, called_func);
restore params regs (r1+112), r1, lr (r1+16); ctr=r11; b *ctr */
r3 = call hook_address (ctx, called_func); r12=r3
restore params regs (r1+header+64), r1, lr (r1+16); ctr=r12; b *ctr */
void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
static uint32_t prologue[] = {
0x7c0802a6, /* mflr r0 */
0xf8010010, /* std r0,16(r1) */
0xf821fee9, /* stdu r1,-280(r1): 6(frame start) + 8(gp args) + 13(fp args) + 8(param area) */
0xf8610070, /* std r3,112(r1) */
0xf8810078, /* std r4,120(r1) */
0xf8a10080, /* std r5,128(r1) */
0xf8c10088, /* std r6,136(r1) */
0xf8e10090, /* std r7,144(r1) */
0xf9010098, /* std r8,152(r1) */
0xf92100a0, /* std r9,160(r1) */
0xf94100a8, /* std r10,168(r1) */
0xd82100b0, /* stfd f1,176(r1) */
0xd84100b8, /* stfd f2,184(r1) */
0xd86100c0, /* stfd f3,192(r1) */
0xd88100c8, /* stfd f4,200(r1) */
0xd8a100d0, /* stfd f5,208(r1) */
0xd8c100d8, /* stfd f6,216(r1) */
0xd8e100e0, /* stfd f7,224(r1) */
0xd90100e8, /* stfd f8,232(r1) */
0xd92100f0, /* stfd f9,240(r1) */
0xd94100f8, /* stfd f10,248(r1) */
0xd9610100, /* stfd f11,256(r1) */
0xd9810108, /* stfd f12,264(r1) */
0xd9a10110, /* stfd f13,272(r1) */
};
static uint32_t epilogue[] = {
0xe8610070, /* ld r3,112(r1) */
0xe8810078, /* ld r4,120(r1) */
0xe8a10080, /* ld r5,128(r1) */
0xe8c10088, /* ld r6,136(r1) */
0xe8e10090, /* ld r7,144(r1) */
0xe9010098, /* ld r8,152(r1) */
0xe92100a0, /* ld r9,160(r1) */
0xe94100a8, /* ld r10,168(r1) */
0xc82100b0, /* lfd f1,176(r1) */
0xc84100b8, /* lfd f2,184(r1) */
0xc86100c0, /* lfd f3,192(r1) */
0xc88100c8, /* lfd f4,200(r1) */
0xc8a100d0, /* lfd f5,208(r1) */
0xc8c100d8, /* lfd f6,216(r1) */
0xc8e100e0, /* lfd f7,224(r1) */
0xc90100e8, /* lfd f8,232(r1) */
0xc92100f0, /* lfd f9,240(r1) */
0xc94100f8, /* lfd f10,248(r1) */
0xc9610100, /* lfd f11,256(r1) */
0xc9810108, /* lfd f12,264(r1) */
0xc9a10110, /* lfd f13,272(r1) */
0x38210118, /* addi r1,r1,280 */
0xe8010010, /* ld r0,16(r1) */
0x7c0803a6, /* mtlr r0 */
};
int frame_size = PPC64_STACK_HEADER_SIZE + 8 * 8 + 13 * 8 + 8 * 8;
VARR_TRUNC (uint8_t, machine_insns, 0);
push_insns (ctx, prologue, sizeof (prologue));
/* stdu r1,n(r1): header + 8(gp args) + 13(fp args) + 8(param area): */
if (frame_size % 16 != 0) frame_size += 8;
ppc64_gen_stdu (ctx, -frame_size);
for (unsigned reg = 3; reg <= 10; reg++) /* std rn,dispn(r1) : */
ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
for (unsigned reg = 1; reg <= 13; reg++) /* stfd fn,dispn(r1) : */
ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
ppc64_gen_address (ctx, 3, ctx);
ppc64_gen_address (ctx, 4, called_func);
ppc64_gen_address (ctx, 5, hook_address);
ppc64_gen_jump (ctx, 5, TRUE);
ppc64_gen_mov (ctx, 11, 3);
ppc64_gen_address (ctx, 12, hook_address);
ppc64_gen_jump (ctx, 12, TRUE);
ppc64_gen_mov (ctx, 12, 3);
for (unsigned reg = 3; reg <= 10; reg++) /* ld rn,dispn(r1) : */
ppc64_gen_ld (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
for (unsigned reg = 1; reg <= 13; reg++) /* lfd fn,dispn(r1) : */
ppc64_gen_ld (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
ppc64_gen_addi (ctx, 1, 1, frame_size);
push_insns (ctx, epilogue, sizeof (epilogue));
ppc64_gen_jump (ctx, 11, FALSE);
push_insn (ctx, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
push_insn (ctx, (19 << 26) | (528 << 1) | (20 << 21)); /* bcctr */
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}

@ -0,0 +1,392 @@
/* This file is a part of MIR project.
Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/
/* Long doubles (-mlong-double=128) are always passed by its address (for args and results) */
#if 0 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#error "s390x works only in BE mode"
#endif
#define VA_LIST_IS_ARRAY_P 1 /* one element array of struct s390x_va_list */
#define S390X_STACK_HEADER_SIZE 160
static void push_insns (MIR_context_t ctx, const uint8_t *pat, size_t pat_len) {
for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, pat[i]);
}
static void s390x_gen_mov (MIR_context_t ctx, unsigned to, unsigned from) {
uint32_t lgr = (0xb904 << 16) | (to << 4) | from; /* lgr to,from: */
assert (to < 16 && from < 16);
push_insns (ctx, (uint8_t *) &lgr, 4);
}
static void s390x_gen_mvi (MIR_context_t ctx, int val, unsigned base, int disp) {
uint64_t mvghi /* mvghi disp(base), val: */
= ((0xe548l << 32) | ((uint64_t) base << 28) | ((disp & 0xfff) << 16) | (val & 0xffff)) << 16;
assert (base < 16 && 0 <= disp && disp < (1 << 12) && -(1 << 15) < val && val < (1 << 15));
push_insns (ctx, (uint8_t *) &mvghi, 6);
}
static void s390x_gen_ld_st (MIR_context_t ctx, unsigned reg, unsigned base, int disp,
MIR_type_t type, int ld_p) {
int single_p = type == MIR_T_F;
int double_p = type == MIR_T_D;
uint64_t dl = disp & 0xfff, dh = (disp >> 12) & 0xff;
uint64_t common = ((uint64_t) reg << 36) | ((uint64_t) base << 28) | (dl << 16) | (dh << 8);
uint64_t lgopcode
= (type == MIR_T_I8
? (ld_p ? 0x77 : 0x72)
: type == MIR_T_U8 ? (ld_p ? 0x90 : 0x72)
: type == MIR_T_I16
? (ld_p ? 0x78 : 0x70)
: type == MIR_T_U16
? (ld_p ? 0x91 : 0x70)
: type == MIR_T_I32 ? (ld_p ? 0x14 : 0x50)
: type == MIR_T_U32 ? (ld_p ? 0x16 : 0x50)
: (ld_p ? 0x04 : 0x24));
uint64_t g = ((0xe3l << 40) | common | lgopcode) << 16;
uint64_t ey = ((0xedl << 40) | common | (ld_p ? 0x64 : 0x66)) << 16;
uint64_t dy = ((0xedl << 40) | common | (ld_p ? 0x65 : 0x67)) << 16;
/* (lg|lgf|llgf|lgb|llgc|lhy|llgh|ley|ldy|stg|sty|sthy|stcy|stey|stdy) reg, disp(base): */
assert (type != MIR_T_LD && reg < 16 && base < 16 && -(1 << 19) < disp && disp < (1 << 19));
push_insns (ctx, (uint8_t *) (single_p ? &ey : double_p ? &dy : &g), 6);
}
static void s390x_gen_ld (MIR_context_t ctx, unsigned to, unsigned base, int disp,
MIR_type_t type) {
s390x_gen_ld_st (ctx, to, base, disp, type, TRUE);
}
static void s390x_gen_st (MIR_context_t ctx, unsigned from, unsigned base, int disp,
MIR_type_t type) {
s390x_gen_ld_st (ctx, from, base, disp, type, FALSE);
}
static void s390x_gen_ldstm (MIR_context_t ctx, unsigned from, unsigned to, unsigned base, int disp,
int ld_p) {
uint64_t dl = disp & 0xfff, dh = (disp >> 12) & 0xff;
uint64_t common = ((uint64_t) from << 36) | ((uint64_t) to << 32) | ((uint64_t) base << 28)
| (dl << 16) | (dh << 8);
uint64_t g = ((0xebl << 40) | common | (ld_p ? 0x4 : 0x24)) << 16;
/* (lmg|stmg) from,to,disp(base): */
assert (from < 16 && to < 16 && base < 16 && -(1 << 19) < disp && disp < (1 << 19));
push_insns (ctx, (uint8_t *) &g, 6);
}
static void s390x_gen_jump (MIR_context_t ctx, unsigned int reg, int call_p) {
uint16_t bcr = (0x7 << 8) | (15 << 4) | reg; /* bcr 15,reg: */
uint16_t balr = (0x5 << 8) | (14 << 4) | reg; /* balr 14,reg: */
assert (reg < 16);
push_insns (ctx, (uint8_t *) (call_p ? &balr : &bcr), 2);
}
static void s390x_gen_addi (MIR_context_t ctx, unsigned dst, unsigned src, int disp) {
uint64_t dl = disp & 0xfff, dh = (disp >> 12) & 0xff;
uint64_t ops = ((uint64_t) dst << 36) | ((uint64_t) src << 28) | (dl << 16) | (dh << 8);
uint64_t lay = ((0xe3l << 40) | ops | 0x71) << 16; /* lay dst,disp(src) */
assert (dst < 16 && src < 16 && -(1 << 19) < disp && disp < (1 << 19));
push_insns (ctx, (uint8_t *) &lay, 6);
}
static void s390x_gen_3addrs (MIR_context_t ctx, unsigned int r1, void *a1, unsigned int r2,
void *a2, unsigned int r3, void *a3) {
/* 6b:lalr r3,22+align;6b:lg r1,0(r3);6b:lg r2,8(r3);6b:lg r3,16(r3);4b:bc m15,28;align;a1-a3 */
size_t rem = (VARR_LENGTH (uint8_t, machine_insns) + 28) % 8;
size_t padding = rem == 0 ? 0 : 8 - rem;
uint64_t lalr = ((0xc0l << 40) | ((uint64_t) r1 << 36) | (28 + padding) / 2) << 16;
uint32_t brc = (0xa7 << 24) | (15 << 20) | (4 << 16) | (28 + padding) / 2; /* brc m15,28: */
assert (r1 != 0);
push_insns (ctx, (uint8_t *) &lalr, 6);
s390x_gen_ld (ctx, r3, r1, 16, MIR_T_I64); /* lg r3,16(r1) */
s390x_gen_ld (ctx, r2, r1, 8, MIR_T_I64); /* lg r2,8(r1) */
s390x_gen_ld (ctx, r1, r1, 0, MIR_T_I64); /* lg r1,0(r1) */
push_insns (ctx, (uint8_t *) &brc, 4);
for (size_t i = 0; i < padding; i++) VARR_PUSH (uint8_t, machine_insns, 0);
push_insns (ctx, (uint8_t *) &a1, 8);
push_insns (ctx, (uint8_t *) &a2, 8);
push_insns (ctx, (uint8_t *) &a3, 8);
}
void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
VARR_TRUNC (uint8_t, machine_insns, 0);
s390x_gen_mov (ctx, 2, 15); /* lgr r2,15 */
s390x_gen_jump (ctx, 14, FALSE); /* bcr m15,r14 */
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
VARR_TRUNC (uint8_t, machine_insns, 0);
s390x_gen_ld (ctx, 0, 15, 0, MIR_T_I64); /* r0 = 0(r15) */
s390x_gen_st (ctx, 0, 2, 0, MIR_T_I64); /* 0(r2) = r0 */
s390x_gen_mov (ctx, 15, 2); /* lgr r15,2 */
s390x_gen_jump (ctx, 14, FALSE); /* bcr m15,r14 */
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
void *_MIR_get_thunk (MIR_context_t ctx) {
const int max_thunk_len = (4 * 8); /* see _MIR_redirect_thunk */
VARR_TRUNC (uint8_t, machine_insns, 0);
for (int i = 0; i < max_thunk_len; i++) VARR_PUSH (uint8_t, machine_insns, 0);
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
int64_t offset = (uint8_t *) to - (uint8_t *) thunk;
VARR_TRUNC (uint8_t, machine_insns, 0);
assert (offset % 2 == 0);
offset /= 2;
if (-(1l << 31) < offset && offset < (1l << 31)) { /* brcl m15,offset: */
uint64_t brcl = ((0xc0l << 40) | (15l << 36) | (4l << 32) | offset & 0xffffffff) << 16;
push_insns (ctx, (uint8_t *) &brcl, 6);
} else { /* 6b:lalr r1,8+padding; 6b:lg r1,0(r1); 2b:bcr m15,r1;padding; 64-bit address: */
size_t rem = (VARR_LENGTH (uint8_t, machine_insns) + 14) % 8;
size_t padding = rem == 0 ? 0 : 8 - rem;
uint64_t lalr = ((0xc0l << 40) | (1l << 36) | (14 + padding) / 2) << 16;
uint64_t lg = ((0xe3l << 40) | (1l << 36) | (1l << 28) | 0x4) << 16;
uint16_t bcr = (0x7 << 8) | (15 << 4) | 1; /* bcr 15,r1: */
push_insns (ctx, (uint8_t *) &lalr, 6);
push_insns (ctx, (uint8_t *) &lg, 6);
push_insns (ctx, (uint8_t *) &bcr, 2);
for (size_t i = 0; i < padding; i++) VARR_PUSH (uint8_t, machine_insns, 0);
push_insns (ctx, (uint8_t *) &to, 8);
}
_MIR_change_code (ctx, thunk, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
struct s390x_va_list {
long __gpr, __fpr; /* number of args read until now */
void *__overflow_arg_area; /* argument on the stack to read next */
void *__reg_save_area; /* curr func frame start */
};
void *va_arg_builtin (void *p, uint64_t t) {
struct s390x_va_list *va = p;
MIR_type_t type = t;
int fp_p = type == MIR_T_F || type == MIR_T_D;
void *a;
if (!fp_p) {
if (va->__gpr < 5) {
a = (char *) va->__reg_save_area + 16 + 8 * va->__gpr;
} else {
a = va->__overflow_arg_area;
va->__overflow_arg_area = (char *) va->__overflow_arg_area + 8;
}
va->__gpr++;
if (type == MIR_T_LD) a = *(void **) a; /* always passed by address */
} else {
if (va->__fpr < 4) {
a = (char *) va->__reg_save_area + 128 + 8 * va->__fpr;
} else {
a = va->__overflow_arg_area;
va->__overflow_arg_area = (char *) va->__overflow_arg_area + 8;
}
va->__fpr++;
}
if (type == MIR_T_F || type == MIR_T_I32) a = (char *) a + 4; /* 2nd word of doubleword */
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct s390x_va_list *va = p;
va_list *vap = a;
assert (sizeof (struct s390x_va_list) == sizeof (va_list));
*va = *(struct s390x_va_list *) vap;
}
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
/* Generation: fun (fun_addr, res_arg_addresses):
save r6, r7, r14 (r15 + 48,112);
allocate and stack frame (S390X_STACK_HEADER_SIZE + param area size + ld arg values size);
r1=r2 (fun_addr);
r7=r3 (res_arg_addresses);
(arg_reg=mem[r7,arg_offset] or (f1,r0)=mem[r7,arg_offset];mem[r15,S390X_STACK_HEADER_SIZE+offset]=(f1,r0)) ...
call *r1;
r0=mem[r7,<res_offset>]; res_reg=mem[r0]; ...
restore r15; restore r6, r7, r14; return. */
void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
MIR_type_t *arg_types, int vararg_p) {
MIR_type_t type;
int n_gpregs = 0, n_fpregs = 0, res_reg = 7, frame_size, disp, param_offset, param_size = 0;
VARR_TRUNC (uint8_t, machine_insns, 0);
frame_size = S390X_STACK_HEADER_SIZE;
if (nres > 0 && res_types[0] == MIR_T_LD) n_gpregs++; /* ld address */
for (uint32_t i = 0; i < nargs; i++) { /* calculate param area size: */
type = arg_types[i];
if (type == MIR_T_LD) frame_size += 16; /* address for ld value */
if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
n_fpregs++;
} else if (type != MIR_T_F && type != MIR_T_D && n_gpregs < 5) {
n_gpregs++;
} else {
frame_size += 8;
param_size += 8;
}
}
s390x_gen_ldstm (ctx, 6, 7, 15, 48, FALSE); /* stmg 6,7,48(r15) : */
s390x_gen_st (ctx, 14, 15, 112, MIR_T_I64); /* stg r14,112(r15) */
s390x_gen_addi (ctx, 15, 15, -frame_size); /* lay r15,-frame_size(r15) */
s390x_gen_mov (ctx, 1, 2); /* fun_addr */
s390x_gen_mov (ctx, res_reg, 3); /* results & args */
n_gpregs = n_fpregs = 0;
param_offset = nres * 16; /* args start */
disp = S390X_STACK_HEADER_SIZE; /* param area start */
if (nres > 0 && res_types[0] == MIR_T_LD) { /* ld address: */
s390x_gen_mov (ctx, 2, res_reg); /* lgr r2,r7 */
n_gpregs++;
}
for (uint32_t i = 0; i < nargs; i++) { /* load args: */
type = arg_types[i];
if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
/* (le,ld) (f0,f2,f4,f6),param_ofset(r7) */
s390x_gen_ld (ctx, n_fpregs * 2, res_reg, param_offset, type);
n_fpregs++;
} else if (type == MIR_T_F || type == MIR_T_D) {
s390x_gen_ld (ctx, 1, res_reg, param_offset, type); /* (le,ld) f1,param_offset(r7) */
s390x_gen_st (ctx, 1, 15, disp, type); /* (ste,std) f1,disp(r15) */
disp += 8;
} else if (type == MIR_T_LD && n_gpregs < 5) { /* ld address */
s390x_gen_addi (ctx, n_gpregs + 2, res_reg, param_offset); /* lay rn,param_offset(r7) */
n_gpregs++;
} else if (type == MIR_T_LD) { /* pass address of location in the result: */
s390x_gen_addi (ctx, 0, res_reg, param_offset); /* lay r0,param_offset(r7) */
s390x_gen_st (ctx, 0, 15, disp, MIR_T_I64); /* stg r0,disp(r15) */
disp += 8;
} else if (n_gpregs < 5) {
s390x_gen_ld (ctx, n_gpregs + 2, res_reg, param_offset, MIR_T_I64); /* lg* rn,param_offset(r7) */
n_gpregs++;
} else {
s390x_gen_ld (ctx, 0, res_reg, param_offset, MIR_T_I64); /* lg* r0,param_offset(r7) */
s390x_gen_st (ctx, 0, 15, disp, MIR_T_I64); /* stg* r0,disp(r15) */
disp += 8;
}
param_offset += 16;
}
s390x_gen_jump (ctx, 1, TRUE); /* call *r1 */
n_gpregs = n_fpregs = 0;
disp = 0;
for (uint32_t i = 0; i < nres; i++) {
type = res_types[i];
if (type == MIR_T_LD) continue; /* do nothing: the result value is already in results */
if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
s390x_gen_st (ctx, n_fpregs * 2, res_reg, disp, type);
n_fpregs++;
} else if (type != MIR_T_F && type != MIR_T_D && n_gpregs < 1) { // just one gp reg
s390x_gen_st (ctx, n_gpregs + 2, res_reg, disp, MIR_T_I64);
n_gpregs++;
} else {
(*error_func) (MIR_ret_error, "s390x can not handle this combination of return values");
}
disp += 16;
}
s390x_gen_addi (ctx, 15, 15, frame_size); /* lay 15,frame_size(15) */
s390x_gen_ldstm (ctx, 6, 7, 15, 48, TRUE); /* lmg 6,7,48(r15) : */
s390x_gen_ld (ctx, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
s390x_gen_jump (ctx, 14, FALSE); /* bcr m15,r14 */
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
/* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
va_list va, MIR_val_t *results):
Brief: save all C call args to register save area; save r7, r14;
allocate shim stack frame (S390X_STACK_HEADER_SIZE + space for results and va);
call handler with args; move results to return regs; restore r7,r14,r15; return */
void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
MIR_func_t func = func_item->u.func;
uint32_t nres = func->nres, nargs = func->nargs;
MIR_type_t type, *res_types = func->res_types;
int disp, frame_size, local_var_size, n_gpregs, n_fpregs, va_list_disp, results_disp;
VARR_TRUNC (uint8_t, machine_insns, 0);
frame_size = S390X_STACK_HEADER_SIZE; /* register save area */
s390x_gen_st (ctx, 14, 15, 112, MIR_T_I64); /* stg 14,112(r15) */
s390x_gen_ldstm (ctx, 2, 6, 15, 16, FALSE); /* stmg 2,6,16(r15) : */
for (unsigned reg = 0; reg <= 6; reg += 2) /* stdy f0,f2,f4,f6,128(r15) : */
s390x_gen_st (ctx, reg, 15, reg * 4 + 128, MIR_T_D);
local_var_size = sizeof (struct s390x_va_list) + nres * 16; /* allocate va and results */
va_list_disp = frame_size;
results_disp = va_list_disp + sizeof (struct s390x_va_list);
frame_size += local_var_size;
assert (frame_size % 8 == 0);
s390x_gen_addi (ctx, 15, 15, -frame_size);
/* setup va: mvghi va(15),(0,1): __gpr */
s390x_gen_mvi (ctx, nres > 0 && res_types[0] == MIR_T_LD ? 1 : 0, 15, va_list_disp);
s390x_gen_mvi (ctx, 0, 15, va_list_disp + 8); /* mvghi va+8(15),0: __fpr */
s390x_gen_addi (ctx, 1, 15, frame_size); /* lay 1,frame_size(15) */
s390x_gen_st (ctx, 1, 15, va_list_disp + 24, MIR_T_I64); /* stg 1,va+24(r15): __reg_save_area */
s390x_gen_addi (ctx, 1, 1, S390X_STACK_HEADER_SIZE); /* lay 1,S390X_STACK_HEADER_SIZE(1) */
/* stg 1,va+16(r15):__overflow_arg_area: */
s390x_gen_st (ctx, 1, 15, va_list_disp + 16, MIR_T_I64);
/* call handler: */
s390x_gen_3addrs (ctx, 2, ctx, 3, func_item, 1, handler);
s390x_gen_addi (ctx, 4, 15, va_list_disp);
s390x_gen_addi (ctx, 5, 15, results_disp);
s390x_gen_jump (ctx, 1, TRUE);
/* setup result regs: */
disp = results_disp;
n_gpregs = n_fpregs = 0;
for (uint32_t i = 0; i < nres; i++) {
type = res_types[i];
if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
s390x_gen_ld (ctx, n_fpregs * 2, 15, disp, type);
n_fpregs++;
} else if (type != MIR_T_F && type != MIR_T_D && n_gpregs < 1) { // just one gp reg
if (type != MIR_T_LD) {
s390x_gen_ld (ctx, n_gpregs + 2, 15, disp, MIR_T_I64);
} else {
/* ld address: lg r2,16+frame_size(r15) */
s390x_gen_ld (ctx, 2, 15, 16 + frame_size, MIR_T_I64);
s390x_gen_ld (ctx, 0, 15, disp, MIR_T_D); /* ld f0,disp(r15) */
s390x_gen_ld (ctx, 2, 15, disp + 8, MIR_T_D); /* ld f2,disp + 8(r15) */
s390x_gen_st (ctx, 0, 2, 0, MIR_T_D); /* st f0,0(r2) */
s390x_gen_st (ctx, 2, 2, 8, MIR_T_D); /* st f2,8(r2) */
}
n_gpregs++;
} else {
(*error_func) (MIR_ret_error, "s390x can not handle this combination of return values");
}
disp += 16;
}
s390x_gen_addi (ctx, 15, 15, frame_size); /* lay 15,frame_size(15) */
s390x_gen_ld (ctx, 6, 15, 48, MIR_T_I64); /* lg 6,48(r15) : */
s390x_gen_ld (ctx, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
s390x_gen_jump (ctx, 14, FALSE); /* bcr m15,r14 */
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
/* Brief: save r14 (r15+120); save all param regs r2-r6 (r15+16),f0,f2,f4,f6 (r15+128);
update r15; allocate and form minimal wrapper stack frame (S390X_STACK_HEADER_SIZE);
r2 = call hook_address (ctx, called_func); r1=r2; restore all params regs, r15, r14; bcr r1 */
void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
int frame_size = S390X_STACK_HEADER_SIZE;
VARR_TRUNC (uint8_t, machine_insns, 0);
s390x_gen_st (ctx, 14, 15, 112, MIR_T_I64); /* stg 14,112(r15) */
s390x_gen_ldstm (ctx, 2, 6, 15, 16, FALSE); /* stmg 2,6,16(r15) : */
for (unsigned reg = 0; reg <= 6; reg += 2) /* stdy f0,f2,f4,f6,128(r15) : */
s390x_gen_st (ctx, reg, 15, reg * 4 + 128, MIR_T_D);
/* r15 -= frame_size: */
s390x_gen_addi (ctx, 15, 15, -frame_size);
s390x_gen_3addrs (ctx, 2, ctx, 3, called_func, 4, hook_address);
s390x_gen_jump (ctx, 4, TRUE);
s390x_gen_mov (ctx, 1, 2);
s390x_gen_addi (ctx, 15, 15, frame_size);
for (unsigned reg = 0; reg <= 6; reg += 2) /* ldy fn,disp(r15) : */
s390x_gen_ld (ctx, reg, 15, reg * 4 + 128, MIR_T_D);
s390x_gen_ldstm (ctx, 2, 6, 15, 16, TRUE); /* lmg 2,6,16(r15) : */
s390x_gen_ld (ctx, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
s390x_gen_jump (ctx, 1, FALSE);
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}

@ -13,13 +13,20 @@ void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
static const uint8_t bend_code[] = {
#ifndef _WIN64
0x48, 0x8b, 0x04, 0x24, /* rax = (rsp) */
0x48, 0x89, 0xfc, /* rsp = rdi */
0xff, 0xe0, /* jmp *rax */
#else
0x48, 0x8b, 0x04, 0x24, /* rax = (rsp) */
0x48, 0x89, 0xcc, /* rsp = rcx */
0xff, 0xe0, /* jmp *rax */
#endif
};
return _MIR_publish_code (ctx, bend_code, sizeof (bend_code));
}
#ifndef _WIN64
struct x86_64_va_list {
uint32_t gp_offset, fp_offset;
uint64_t *overflow_arg_area, *reg_save_area;
@ -51,6 +58,26 @@ void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
*va = *(struct x86_64_va_list *) vap;
}
#else
struct x86_64_va_list {
uint64_t *arg_area;
};
void *va_arg_builtin (void *p, uint64_t t) {
struct x86_64_va_list *va = p;
void *a = va->arg_area;
va->arg_area++;
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct x86_64_va_list **va = p;
va_list *vap = a;
assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
*va = (struct x86_64_va_list *) vap;
}
#endif
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
@ -70,6 +97,7 @@ void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
}
static const uint8_t save_pat[] = {
#ifndef _WIN64
0x48, 0x81, 0xec, 0x80, 0, 0, 0, /*sub $0x80,%rsp */
0xf3, 0x0f, 0x7f, 0x04, 0x24, /*movdqu %xmm0,(%rsp) */
0xf3, 0x0f, 0x7f, 0x4c, 0x24, 0x10, /*movdqu %xmm1,0x10(%rsp) */
@ -85,9 +113,16 @@ static const uint8_t save_pat[] = {
0x52, /*push %rdx */
0x56, /*push %rsi */
0x57, /*push %rdi */
#else
0x48, 0x89, 0x4c, 0x24, 0x08, /*mov %rcx,0x08(%rsp) */
0x48, 0x89, 0x54, 0x24, 0x10, /*mov %rdx,0x10(%rsp) */
0x4c, 0x89, 0x44, 0x24, 0x18, /*mov %r8, 0x18(%rsp) */
0x4c, 0x89, 0x4c, 0x24, 0x20, /*mov %r9, 0x20(%rsp) */
#endif
};
static const uint8_t restore_pat[] = {
#ifndef _WIN64
0x5f, /*pop %rdi */
0x5e, /*pop %rsi */
0x5a, /*pop %rdx */
@ -103,6 +138,16 @@ static const uint8_t restore_pat[] = {
0xf3, 0x0f, 0x6f, 0x74, 0x24, 0x60, /*movdqu 0x60(%rsp),%xmm6 */
0xf3, 0x0f, 0x6f, 0x7c, 0x24, 0x70, /*movdqu 0x70(%rsp),%xmm7 */
0x48, 0x81, 0xc4, 0x80, 0, 0, 0, /*add $0x80,%rsp */
#else
0x48, 0x8b, 0x4c, 0x24, 0x08, /*mov 0x08(%rsp),%rcx */
0x48, 0x8b, 0x54, 0x24, 0x10, /*mov 0x10(%rsp),%rdx */
0x4c, 0x8b, 0x44, 0x24, 0x18, /*mov 0x18(%rsp),%r8 */
0x4c, 0x8b, 0x4c, 0x24, 0x20, /*mov 0x20(%rsp),%r9 */
0xf3, 0x0f, 0x7e, 0x44, 0x24, 0x08, /*movq 0x08(%rsp),%xmm0*/
0xf3, 0x0f, 0x7e, 0x4c, 0x24, 0x10, /*movq 0x10(%rsp),%xmm1*/
0xf3, 0x0f, 0x7e, 0x54, 0x24, 0x18, /*movq 0x18(%rsp),%xmm2*/
0xf3, 0x0f, 0x7e, 0x5c, 0x24, 0x20, /*movq 0x20(%rsp),%xmm3*/
#endif
};
static uint8_t *push_insns (MIR_context_t ctx, const uint8_t *pat, size_t pat_len) {
@ -174,13 +219,22 @@ static void gen_st80 (MIR_context_t ctx, uint32_t src_offset) {
void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
MIR_type_t *arg_types, int vararg_p) {
static const uint8_t prolog[] = {
#ifndef _WIN64
0x53, /* pushq %rbx */
0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
0x49, 0x89, 0xfb, /* mov $rdi, $r11 -- fun addr */
0x48, 0x89, 0xf3, /* mov $rsi, $rbx -- result/arg addresses */
#else
0x53, /* pushq %rbx */
0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
0x49, 0x89, 0xcb, /* mov $rcx, $r11 -- fun addr */
0x48, 0x89, 0xd3, /* mov $rdx, $rbx -- result/arg addresses */
#endif
};
static const uint8_t call_end[] = {
#ifndef _WIN64
0x48, 0xc7, 0xc0, 0x08, 0, 0, 0, /* mov $8, rax -- to save xmm varargs */
#endif
0x41, 0xff, 0xd3, /* callq *%r11 */
0x48, 0x81, 0xc4, 0, 0, 0, 0, /* addq <sp_offset>, %rsp */
};
@ -188,24 +242,38 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
0x5b, /* pop %rbx */
0xc3, /* ret */
};
#ifndef _WIN64
static const uint8_t iregs[] = {7, 6, 2, 1, 8, 9}; /* rdi, rsi, rdx, rcx, r8, r9 */
uint32_t n_iregs = 0, n_xregs = 0, n_fregs, sp_offset = 0;
static const uint32_t max_iregs = 6, max_xregs = 8;
uint32_t sp_offset = 0;
#else
static const uint8_t iregs[] = {1, 2, 8, 9}; /* rcx, rdx, r8, r9 */
static const uint32_t max_iregs = 4, max_xregs = 4;
uint32_t sp_offset = 32;
#endif
uint32_t n_iregs = 0, n_xregs = 0, n_fregs;
uint8_t *addr;
VARR_TRUNC (uint8_t, machine_insns, 0);
push_insns (ctx, prolog, sizeof (prolog));
for (size_t i = 0; i < nargs; i++) {
if ((MIR_T_I8 <= arg_types[i] && arg_types[i] <= MIR_T_U64) || arg_types[i] == MIR_T_P) {
if (n_iregs < 6) {
if (n_iregs < max_iregs) {
gen_mov (ctx, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
#ifdef _WIN64
n_xregs++;
#endif
} else {
gen_ldst (ctx, sp_offset, (i + nres) * sizeof (long double), TRUE);
sp_offset += 8;
}
} else if (arg_types[i] == MIR_T_F || arg_types[i] == MIR_T_D) {
if (n_xregs < 8) {
if (n_xregs < max_xregs) {
gen_movxmm (ctx, (i + nres) * sizeof (long double), n_xregs++, arg_types[i] == MIR_T_F,
TRUE);
#ifdef _WIN64
gen_mov (ctx, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
#endif
} else {
gen_ldst (ctx, sp_offset, (i + nres) * sizeof (long double), arg_types[i] == MIR_T_D);
sp_offset += 8;
@ -221,7 +289,11 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
addr = VARR_ADDR (uint8_t, machine_insns);
memcpy (addr + 4, &sp_offset, sizeof (uint32_t));
addr = push_insns (ctx, call_end, sizeof (call_end));
memcpy (addr + 13, &sp_offset, sizeof (uint32_t));
memcpy (addr + sizeof (call_end) - 4, &sp_offset, sizeof (uint32_t));
#ifdef _WIN64
if (nres > 1)
(*error_func) (MIR_call_op_error, "Windows x86-64 doesn't support multiple return values");
#endif
n_iregs = n_xregs = n_fregs = 0;
for (size_t i = 0; i < nres; i++) {
if (((MIR_T_I8 <= res_types[i] && res_types[i] <= MIR_T_U64) || res_types[i] == MIR_T_P)
@ -245,6 +317,7 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
static const uint8_t push_rbx[] = {0x53, /*push %rbx */};
static const uint8_t prepare_pat[] = {
#ifndef _WIN64
/* 0: */ 0x48, 0x83, 0xec, 0x20, /* sub 32,%rsp */
/* 4: */ 0x48, 0x89, 0xe2, /* mov %rsp,%rdx */
/* 7: */ 0xc7, 0x02, 0, 0, 0, 0, /* movl 0,(%rdx) */
@ -261,10 +334,33 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
/* 4a: */ 0x48, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <handler>,%rax */
/* 54: */ 0xff, 0xd0, /* callq *%rax */
};
static const uint32_t nres_offset = 0x2c;
static const uint32_t ctx_offset = 0x38;
static const uint32_t func_offset = 0x42;
static const uint32_t hndl_offset = 0x4c;
static const uint32_t prep_stack_size = 208;
#else
/* 0: */ 0x4c, 0x8d, 0x44, 0x24, 0x08, /* lea 8(%rsp),%r8 */
/* 5: */ 0x53, /* push %rbx */
/* 6: */ 0x48, 0x81, 0xec, 0, 0, 0, 0, /* sub <n>,%rsp */
/* d: */ 0x48, 0x89, 0xe3, /* mov %rsp,%rbx */
/* 10: */ 0x49, 0x89, 0xe1, /* mov %rsp,%r9 */
/* 13: */ 0x48, 0x83, 0xec, 0x20, /* sub 32,%rsp */
/* 17: */ 0x48, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <ctx>,%rcx */
/* 21: */ 0x48, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <func_item>,%rdx*/
/* 2b: */ 0x48, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, /* movabs <handler>,%rax */
/* 35: */ 0xff, 0xd0, /* callq *%rax */
};
static const uint32_t nres_offset = 0x09;
static const uint32_t ctx_offset = 0x19;
static const uint32_t func_offset = 0x23;
static const uint32_t hndl_offset = 0x2d;
static const uint32_t prep_stack_size = 32;
#endif
static const uint8_t shim_end[] = {
/* 0: */ 0x48, 0x81, 0xc4, 0, 0, 0, 0, /*add 208+n,%rsp*/
/* 7: */ 0x5b, /*pop %rbx*/
/* 8: */ 0xc3, /*retq */
/* 0: */ 0x48, 0x81, 0xc4, 0, 0, 0, 0, /*add prep_stack_size+n,%rsp*/
/* 7: */ 0x5b, /*pop %rbx*/
/* 8: */ 0xc3, /*retq */
};
static const uint8_t ld_pat[] = {0x48, 0x8b, 0x83, 0, 0, 0, 0}; /* mov <offset>(%rbx), %reg */
static const uint8_t movss_pat[]
@ -279,15 +375,21 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
MIR_type_t *results = func_item->u.func->res_types;
VARR_TRUNC (uint8_t, machine_insns, 0);
#ifndef _WIN64
push_insns (ctx, push_rbx, sizeof (push_rbx));
#endif
push_insns (ctx, save_pat, sizeof (save_pat));
addr = push_insns (ctx, prepare_pat, sizeof (prepare_pat));
imm = nres * 16;
memcpy (addr + 0x2c, &imm, sizeof (uint32_t));
memcpy (addr + 0x38, &ctx, sizeof (void *));
memcpy (addr + 0x42, &func_item, sizeof (void *));
memcpy (addr + 0x4c, &handler, sizeof (void *));
memcpy (addr + nres_offset, &imm, sizeof (uint32_t));
memcpy (addr + ctx_offset, &ctx, sizeof (void *));
memcpy (addr + func_offset, &func_item, sizeof (void *));
memcpy (addr + hndl_offset, &handler, sizeof (void *));
/* move results: */
#ifdef _WIN64
if (nres > 1)
(*error_func) (MIR_call_op_error, "Windows x86-64 doesn't support multiple return values");
#endif
n_iregs = n_xregs = n_fregs = offset = 0;
for (uint32_t i = 0; i < nres; i++) {
if (results[i] == MIR_T_F && n_xregs < 2) {
@ -316,7 +418,7 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
offset += 16;
}
addr = push_insns (ctx, shim_end, sizeof (shim_end));
imm = 208 + nres * 16;
imm = prep_stack_size + nres * 16;
memcpy (addr + 3, &imm, sizeof (uint32_t));
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
@ -327,20 +429,36 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
static const uint8_t push_rax[] = {0x50, /*push %rax */};
static const uint8_t wrap_end[] = {
#ifndef _WIN64
0x58, /*pop %rax */
#endif
0x41, 0xff, 0xe2, /*jmpq *%r10 */
};
static const uint8_t call_pat[] = {
#ifndef _WIN64
0x48, 0xbe, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs called_func,%rsi */
0x48, 0xbf, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs ctx,%rdi */
0x49, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs <hook_address>,%r10 */
0x41, 0xff, 0xd2, /*callq *%r10 */
0x49, 0x89, 0xc2, /*mov %rax,%r10 */
#else
0x48, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs called_func,%rdx */
0x48, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs ctx,%rcx */
0x49, 0xba, 0, 0, 0, 0, 0, 0, 0, 0, /*movabs <hook_address>,%r10*/
0x50, /*push %rax */
0x48, 0x83, 0xec, 0x20, /*sub 32,%rsp */
0x41, 0xff, 0xd2, /*callq *%r10 */
0x49, 0x89, 0xc2, /*mov %rax,%r10 */
0x48, 0x83, 0xc4, 0x20, /*add 32,%rsp */
0x58, /*pop %rax */
#endif
};
uint8_t *addr;
VARR_TRUNC (uint8_t, machine_insns, 0);
#ifndef _WIN64
push_insns (ctx, push_rax, sizeof (push_rax));
#endif
push_insns (ctx, save_pat, sizeof (save_pat));
addr = push_insns (ctx, call_pat, sizeof (call_pat));
memcpy (addr + 2, &called_func, sizeof (void *));

@ -947,6 +947,13 @@ MIR_item_t MIR_new_bss (MIR_context_t ctx, const char *name, size_t len) {
return item;
}
static MIR_type_t canon_type (MIR_type_t type) {
#if __SIZEOF_LONG_DOUBLE__ == 8
if (type == MIR_T_LD) type = MIR_T_D;
#endif
return type;
}
size_t _MIR_type_size (MIR_context_t ctx, MIR_type_t type) {
switch (type) {
case MIR_T_I8: return sizeof (int8_t);
@ -986,7 +993,7 @@ MIR_item_t MIR_new_data (MIR_context_t ctx, const char *name, MIR_type_t el_type
free (item);
item = tab_item;
}
data->el_type = el_type;
data->el_type = canon_type (el_type);
data->nel = nel;
memcpy (data->u.els, els, el_len * nel);
return item;
@ -1148,7 +1155,7 @@ static MIR_item_t new_func_arr (MIR_context_t ctx, const char *name, size_t nres
= string_store (ctx, &strings, &string_tab, (MIR_str_t){strlen (name) + 1, name}).str.s;
func->nres = nres;
func->res_types = (MIR_type_t *) ((char *) func + sizeof (struct MIR_func));
memcpy (func->res_types, res_types, nres * sizeof (MIR_type_t));
for (size_t i = 0; i < nres; i++) func->res_types[i] = canon_type (res_types[i]);
tab_item = add_item (ctx, func_item);
mir_assert (tab_item == func_item);
DLIST_INIT (MIR_insn_t, func->insns);
@ -1161,7 +1168,7 @@ static MIR_item_t new_func_arr (MIR_context_t ctx, const char *name, size_t nres
func->n_inlines = 0;
func->machine_code = func->call_addr = NULL;
for (size_t i = 0; i < nargs; i++) {
MIR_type_t type = vars[i].type;
MIR_type_t type = canon_type (vars[i].type);
VARR_PUSH (MIR_var_t, func->vars, vars[i]);
create_func_reg (ctx, func, vars[i].name, i + 1,
@ -1694,6 +1701,35 @@ static MIR_insn_t create_insn (MIR_context_t ctx, size_t nops, MIR_insn_code_t c
if (nops == 0) nops = 1;
insn = malloc (sizeof (struct MIR_insn) + sizeof (MIR_op_t) * (nops - 1));
if (insn == NULL) (*error_func) (MIR_alloc_error, "Not enough memory for insn creation");
#if __SIZEOF_LONG_DOUBLE__ == 8
switch (code) {
case MIR_LDMOV: code = MIR_DMOV; break;
case MIR_I2LD: code = MIR_I2D; break;
case MIR_UI2LD: code = MIR_UI2D; break;
case MIR_LD2I: code = MIR_D2I; break;
case MIR_F2LD: code = MIR_F2D; break;
case MIR_D2LD: code = MIR_DMOV; break;
case MIR_LD2F: code = MIR_D2F; break;
case MIR_LD2D: code = MIR_DMOV; break;
case MIR_LDNEG: code = MIR_DNEG; break;
case MIR_LDADD: code = MIR_DADD; break;
case MIR_LDSUB: code = MIR_DSUB; break;
case MIR_LDMUL: code = MIR_DMUL; break;
case MIR_LDDIV: code = MIR_DDIV; break;
case MIR_LDEQ: code = MIR_DEQ; break;
case MIR_LDNE: code = MIR_DNE; break;
case MIR_LDLT: code = MIR_DLT; break;
case MIR_LDLE: code = MIR_DLE; break;
case MIR_LDGT: code = MIR_DGT; break;
case MIR_LDGE: code = MIR_DGE; break;
case MIR_LDBEQ: code = MIR_DBEQ; break;
case MIR_LDBNE: code = MIR_DBNE; break;
case MIR_LDBLT: code = MIR_DBLT; break;
case MIR_LDBLE: code = MIR_DBLE; break;
case MIR_LDBGT: code = MIR_DBGT; break;
case MIR_LDBGE: code = MIR_DBGE; break;
}
#endif
insn->code = code;
insn->data = NULL;
return insn;
@ -1901,6 +1937,9 @@ MIR_op_t MIR_new_double_op (MIR_context_t ctx, double d) {
MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double ld) {
MIR_op_t op;
#if __SIZEOF_LONG_DOUBLE__ == 8
return MIR_new_double_op (ctx, ld);
#endif
mir_assert (sizeof (long double) == 16); /* machine-defined 80- or 128-bit FP */
init_op (&op, MIR_OP_LDOUBLE);
op.u.ld = ld;
@ -1928,7 +1967,7 @@ MIR_op_t MIR_new_mem_op (MIR_context_t ctx, MIR_type_t type, MIR_disp_t disp, MI
MIR_op_t op;
init_op (&op, MIR_OP_MEM);
op.u.mem.type = type;
op.u.mem.type = canon_type (type);
op.u.mem.disp = disp;
op.u.mem.base = base;
op.u.mem.index = index;
@ -3244,6 +3283,14 @@ static code_holder_t *get_last_code_holder (MIR_context_t ctx, size_t size) {
return VARR_ADDR (code_holder_t, code_holders) + len - 1;
}
#ifndef __MIRC__
void _MIR_flush_code_cache (void *start, void *bound) {
#ifdef __GNUC__
__clear_cache (start, bound);
#endif
}
#endif
static uint8_t *add_code (MIR_context_t ctx, code_holder_t *ch_ptr, const uint8_t *code,
size_t code_len) {
uint8_t *mem = ch_ptr->free;
@ -3253,6 +3300,7 @@ static uint8_t *add_code (MIR_context_t ctx, code_holder_t *ch_ptr, const uint8_
mprotect (ch_ptr->start, ch_ptr->bound - ch_ptr->start, PROT_WRITE | PROT_EXEC);
memcpy (mem, code, code_len);
mprotect (ch_ptr->start, ch_ptr->bound - ch_ptr->start, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (mem, ch_ptr->free);
return mem;
}
@ -3286,6 +3334,7 @@ void _MIR_change_code (MIR_context_t ctx, uint8_t *addr, const uint8_t *code, si
mprotect ((uint8_t *) start, len, PROT_WRITE | PROT_EXEC);
memcpy (addr, code, code_len);
mprotect ((uint8_t *) start, len, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (addr, addr + code_len);
}
void _MIR_update_code_arr (MIR_context_t ctx, uint8_t *base, size_t nloc,
@ -3300,6 +3349,7 @@ void _MIR_update_code_arr (MIR_context_t ctx, uint8_t *base, size_t nloc,
mprotect ((uint8_t *) start, len, PROT_WRITE | PROT_EXEC);
for (i = 0; i < nloc; i++) memcpy (base + relocs[i].offset, &relocs[i].value, sizeof (void *));
mprotect ((uint8_t *) start, len, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (base, base + max_offset + sizeof (void *));
}
void _MIR_update_code (MIR_context_t ctx, uint8_t *base, size_t nloc, ...) {
@ -3324,6 +3374,7 @@ void _MIR_update_code (MIR_context_t ctx, uint8_t *base, size_t nloc, ...) {
memcpy (base + offset, &value, sizeof (void *));
}
mprotect ((uint8_t *) start, len, PROT_READ | PROT_EXEC);
_MIR_flush_code_cache (base, base + max_offset + sizeof (void *));
va_end (args);
}
@ -4689,8 +4740,10 @@ static void scan_number (MIR_context_t ctx, int ch, int get_char (MIR_context_t)
*double_p = FALSE;
ch = get_char (ctx);
} else if (ch == 'l' || ch == 'L') {
#if __SIZEOF_LONG_DOUBLE__ != 8
*ldouble_p = TRUE;
*double_p = FALSE;
#endif
ch = get_char (ctx);
}
} else if (*base == 8 && dec_p)
@ -5165,14 +5218,14 @@ void MIR_scan_string (MIR_context_t ctx, const char *str) {
op.mode = MIR_OP_FLOAT;
op.u.f = t.u.f;
break;
case TC_LDOUBLE: op.mode = MIR_OP_LDOUBLE; op.u.ld = t.u.ld;
#if __SIZEOF_LONG_DOUBLE__ != 8
break;
#endif
case TC_DOUBLE:
op.mode = MIR_OP_DOUBLE;
op.u.d = t.u.d;
break;
case TC_LDOUBLE:
op.mode = MIR_OP_LDOUBLE;
op.u.ld = t.u.ld;
break;
case TC_STR:
op.mode = MIR_OP_STR;
op.u.str = t.u.str;
@ -5398,6 +5451,8 @@ static void scan_finish (MIR_context_t ctx) {
#include "mir-aarch64.c"
#elif defined(__PPC64__)
#include "mir-ppc64.c"
#elif defined(__s390x__)
#include "mir-s390x.c"
#else
#error "undefined or unsupported generation target"
#endif

@ -564,6 +564,7 @@ extern MIR_item_t _MIR_builtin_proto (MIR_context_t ctx, MIR_module_t module, co
extern MIR_item_t _MIR_builtin_func (MIR_context_t ctx, MIR_module_t module, const char *name,
void *addr);
extern void _MIR_flush_code_cache (void *start, void *bound);
extern uint8_t *_MIR_publish_code (MIR_context_t ctx, const uint8_t *code, size_t code_len);
extern uint8_t *_MIR_get_new_code_addr (MIR_context_t ctx, size_t size);
extern uint8_t *_MIR_publish_code_by_addr (MIR_context_t ctx, void *addr, const uint8_t *code,

@ -938,8 +938,8 @@ Syntax
::
TEST A C if (boolean(R(A)) != C) then pc++
TESTSET A B C if (boolean(R(B)) == C) then R(A) := R(B) else pc++
TEST A C if (boolean(R(A)) != C) then PC++
TESTSET A B C if (boolean(R(B)) != C) then PC++ else R(A) := R(B)
where boolean(x) => ((x == nil || x == false) ? 0 : 1)

@ -23,7 +23,7 @@ LLVM JIT Backend
================
Following versions of LLVM work with Ravi.
* LLVM 3.7, 3.8, 3.9, 4.0, 5.0, 6.0, 8.0.1
* LLVM 3.7, 3.8, 3.9, 4.0, 5.0, 6.0, 8.0.1, 9.0.1, 10.0
* LLVM 7.0 was skipped because of unstable ORC api changes
* LLVM 3.5 and 3.6 should also work but have not been recently tested
@ -40,7 +40,7 @@ I built LLVM from source. I used the following sequence from the VS2017 command
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=c:\Software\llvm801 -DLLVM_TARGETS_TO_BUILD="X86" -G "Visual Studio 15 2017 Win64" ..
cmake -DCMAKE_INSTALL_PREFIX=c:\Software\llvm801 -DLLVM_TARGETS_TO_BUILD="X86" -G "Visual Studio 15 2017 Win64" ..
I then opened the generated solution in VS2017 and performed a INSTALL build from there. Above will build the 64-bit version of LLVM libraries. To build a 32-bit version omit the ``Win64`` parameter.

@ -129,48 +129,49 @@ static struct {
{"lua_concat", reinterpret_cast<void *>(lua_concat)},
{"lua_len", reinterpret_cast<void *>(lua_len)},
{"lua_stringtonumber", reinterpret_cast<void *>(lua_stringtonumber)},
{"luaC_upvalbarrier_", reinterpret_cast<void *>(luaC_upvalbarrier_)},
{"luaD_call", reinterpret_cast<void *>(luaD_call)},
{"luaD_poscall", reinterpret_cast<void *>(luaD_poscall)},
{"luaD_precall", reinterpret_cast<void *>(luaD_precall)},
{"luaF_close", reinterpret_cast<void *>(luaF_close)},
{"raise_error", reinterpret_cast<void *>(raise_error)},
{"luaG_runerror", reinterpret_cast<void *>(luaG_runerror)},
{"luaH_getstr", reinterpret_cast<void *>(luaH_getstr)},
{"luaO_arith", reinterpret_cast<void *>(luaO_arith)},
{"luaT_trybinTM", reinterpret_cast<void *>(luaT_trybinTM)},
{"luaV_tonumber_", reinterpret_cast<void *>(luaV_tonumber_)},
{"luaV_tointeger", reinterpret_cast<void *>(luaV_tointeger)},
{"luaV_tointeger_", reinterpret_cast<void *>(luaV_tointeger_)},
{"luaD_poscall", reinterpret_cast<void *>(luaD_poscall)},
{"luaV_equalobj", reinterpret_cast<void *>(luaV_equalobj)},
{"luaV_lessthan", reinterpret_cast<void *>(luaV_lessthan)},
{"luaV_lessequal", reinterpret_cast<void *>(luaV_lessequal)},
{"luaV_execute", reinterpret_cast<void *>(luaV_execute)},
{"luaV_gettable", reinterpret_cast<void *>(luaV_gettable)},
{"luaV_settable", reinterpret_cast<void *>(luaV_settable)},
{"luaD_precall", reinterpret_cast<void *>(luaD_precall)},
{"luaV_objlen", reinterpret_cast<void *>(luaV_objlen)},
{"luaV_forlimit", reinterpret_cast<void *>(luaV_forlimit)},
{"luaV_finishget", reinterpret_cast<void *>(luaV_finishget)},
{"luaV_mod", reinterpret_cast<void *>(luaV_mod)},
{"luaV_div", reinterpret_cast<void *>(luaV_div)},
{"raviV_op_newtable", reinterpret_cast<void *>(raviV_op_newtable)},
{"luaO_arith", reinterpret_cast<void *>(luaO_arith)},
{"raviV_op_newarrayint", reinterpret_cast<void *>(raviV_op_newarrayint)},
{"raviV_op_newarrayfloat", reinterpret_cast<void *>(raviV_op_newarrayfloat)},
{"raviV_op_setlist", reinterpret_cast<void *>(raviV_op_setlist)},
{"raviV_op_concat", reinterpret_cast<void *>(raviV_op_concat)},
{"raviV_op_closure", reinterpret_cast<void *>(raviV_op_closure)},
{"raviV_op_vararg", reinterpret_cast<void *>(raviV_op_vararg)},
{"luaV_objlen", reinterpret_cast<void *>(luaV_objlen)},
{"luaV_forlimit", reinterpret_cast<void *>(luaV_forlimit)},
{"raviV_op_setupval", reinterpret_cast<void *>(raviV_op_setupval)},
{"raviV_op_setupvali", reinterpret_cast<void *>(raviV_op_setupvali)},
{"raviV_op_setupvalf", reinterpret_cast<void *>(raviV_op_setupvalf)},
{"raviV_op_setupvalai", reinterpret_cast<void *>(raviV_op_setupvalai)},
{"raviV_op_setupvalaf", reinterpret_cast<void *>(raviV_op_setupvalaf)},
{"raviV_op_setupvalt", reinterpret_cast<void *>(raviV_op_setupvalt)},
{"luaD_call", reinterpret_cast<void *>(luaD_call)},
{"raviH_set_int", reinterpret_cast<void *>(raviH_set_int)},
{"raviH_set_float", reinterpret_cast<void *>(raviH_set_float)},
{"raviV_check_usertype", reinterpret_cast<void *>(raviV_check_usertype)},
{"luaT_trybinTM", reinterpret_cast<void *>(luaT_trybinTM)},
{"raviV_gettable_sskey", reinterpret_cast<void *>(raviV_gettable_sskey)},
{"raviV_settable_sskey", reinterpret_cast<void *>(raviV_settable_sskey)},
{"raviV_gettable_i", reinterpret_cast<void *>(raviV_gettable_i)},
{"raviV_settable_i", reinterpret_cast<void *>(raviV_settable_i)},
{"luaG_runerror", reinterpret_cast<void *>(luaG_runerror)},
{"luaC_upvalbarrier_", reinterpret_cast<void *>(luaC_upvalbarrier_)},
{"luaV_finishget", reinterpret_cast<void *>(luaV_finishget)},
{"luaH_getstr", reinterpret_cast<void *>(luaH_getstr)},
{"raviV_op_add", reinterpret_cast<void *>(raviV_op_add)},
{"raviV_op_mul", reinterpret_cast<void *>(raviV_op_mul)},
{"raviV_op_sub", reinterpret_cast<void *>(raviV_op_sub)},
@ -181,12 +182,12 @@ static struct {
{"raviV_op_band", reinterpret_cast<void *>(raviV_op_band)},
{"raviV_op_bor", reinterpret_cast<void *>(raviV_op_bor)},
{"raviV_op_bxor", reinterpret_cast<void *>(raviV_op_bxor)},
{"luaV_mod", reinterpret_cast<void *>(luaV_mod)},
{"luaV_div", reinterpret_cast<void *>(luaV_div)},
{"raviV_op_totype", reinterpret_cast<void *>(raviV_op_totype)},
#ifdef RAVI_DEFER_STATEMENT
{"raviV_op_defer", reinterpret_cast<void *>(raviV_op_defer)},
#endif
{"raviV_debug_trace", reinterpret_cast<void *>(raviV_debug_trace)},
{"raise_error", reinterpret_cast<void *>(raise_error)},
{"printf", reinterpret_cast<void *>(printf)},
{"puts", reinterpret_cast<void *>(puts)},
{nullptr, nullptr}};

@ -406,7 +406,7 @@ void *MIR_compile_C_module(
}
MIR_load_module (ctx, module);
MIR_gen_init (ctx);
MIR_gen_set_optimize_level(ctx, 3);
MIR_gen_set_optimize_level(ctx, 2);
MIR_link (ctx, MIR_set_gen_interface, Import_resolver_func);
fun_addr = MIR_gen (ctx, main_func);
MIR_gen_finish (ctx);

Loading…
Cancel
Save