From 6ccca76ff11404fa6c88711e8d4f0783ee22050e Mon Sep 17 00:00:00 2001 From: Dibyendu Majumdar Date: Sun, 16 Feb 2020 23:15:58 +0000 Subject: [PATCH 01/13] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 72325a6..ee035eb 100644 --- a/README.rst +++ b/README.rst @@ -102,7 +102,7 @@ History - New JIT backend `MIR `_. * 2020 (Plan) - - New optimizing byte code generator based on new parser / type checker + - `New optimizing byte code generator based on new parser / type checker `_ - Ravi 1.0 release License From ab9fb1f8fb1a9cb5ad8d68ab690ea139b6779246 Mon Sep 17 00:00:00 2001 From: Dibyendu Majumdar Date: Sun, 8 Mar 2020 16:14:23 +0000 Subject: [PATCH 02/13] issue #98 fix scoping of local vars --- src/ravi_ast_parse.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/ravi_ast_parse.c b/src/ravi_ast_parse.c index 2e3661a..bdd6c4f 100644 --- a/src/ravi_ast_parse.c +++ b/src/ravi_ast_parse.c @@ -24,6 +24,7 @@ static void end_scope(struct parser_state *parser); static struct ast_node *new_literal_expression(struct parser_state *parser, ravitype_t type); static struct ast_node *generate_label(struct parser_state *parser, TString *label); static struct ast_container *new_ast_container(lua_State *L); +static void add_local_symbol_to_current_scope(struct parser_state *parser, struct lua_symbol *sym); static void add_symbol(struct ast_container *container, struct lua_symbol_list **list, struct lua_symbol *sym) { ptrlist_add((struct ptr_list **)list, sym, &container->ptrlist_allocator); @@ -118,10 +119,6 @@ static struct lua_symbol *new_local_symbol(struct parser_state *parser, TString symbol->symbol_type = SYM_LOCAL; symbol->var.block = scope; symbol->var.var_name = name; - add_symbol(parser->container, &scope->symbol_list, symbol); // Add to the end of the symbol list - add_symbol(parser->container, &scope->function->function_expr.locals, symbol); - // Note that Lua allows multiple local declarations of the same name - // so a new instance just gets added to the end return symbol; } @@ -540,6 +537,7 @@ static bool parse_parameter_list(struct parser_state *parser, struct lua_symbol_ /* RAVI change - add type */ struct lua_symbol *symbol = declare_local_variable(parser); add_symbol(parser->container, list, symbol); + add_local_symbol_to_current_scope(parser, symbol); nparams++; break; } @@ -933,6 +931,13 @@ static struct ast_node *parse_expression(struct parser_state *parser) { ** ======================================================================= */ +static void add_local_symbol_to_current_scope(struct parser_state *parser, struct lua_symbol *sym) { + // Note that Lua allows multiple local declarations of the same name + // so a new instance just gets added to the end + add_symbol(parser->container, &parser->current_scope->symbol_list, sym); + add_symbol(parser->container, &parser->current_scope->function->function_expr.locals, sym); +} + static struct block_scope *parse_block(struct parser_state *parser, struct ast_node_list **statement_list) { /* block -> statlist */ struct block_scope *scope = new_scope(parser); @@ -1038,7 +1043,9 @@ static void parse_forbody(struct parser_state *parser, struct ast_node *stmt, in static void parse_fornum_statement(struct parser_state *parser, struct ast_node *stmt, TString *varname, int line) { LexState *ls = parser->ls; /* fornum -> NAME = exp1,exp1[,exp1] forbody */ - add_symbol(parser->container, &stmt->for_stmt.symbols, new_local_symbol(parser, varname, RAVI_TANY, NULL)); + struct lua_symbol *local = new_local_symbol(parser, varname, RAVI_TANY, NULL); + add_symbol(parser->container, &stmt->for_stmt.symbols, local); + add_local_symbol_to_current_scope(parser, local); checknext(ls, '='); /* get the type of each expression */ add_ast_node(parser->container, &stmt->for_stmt.expr_list, parse_expression(parser)); /* initial value */ @@ -1056,10 +1063,13 @@ static void parse_for_list(struct parser_state *parser, struct ast_node *stmt, T /* forlist -> NAME {,NAME} IN explist forbody */ int nvars = 4; /* gen, state, control, plus at least one declared var */ /* create declared variables */ - add_symbol(parser->container, &stmt->for_stmt.symbols, new_local_symbol(parser, indexname, RAVI_TANY, NULL)); + struct lua_symbol *local = new_local_symbol(parser, indexname, RAVI_TANY, NULL); + add_symbol(parser->container, &stmt->for_stmt.symbols, local); + add_local_symbol_to_current_scope(parser, local); while (testnext(ls, ',')) { - add_symbol(parser->container, &stmt->for_stmt.symbols, - new_local_symbol(parser, check_name_and_next(ls), RAVI_TANY, NULL)); + local = new_local_symbol(parser, check_name_and_next(ls), RAVI_TANY, NULL); + add_symbol(parser->container, &stmt->for_stmt.symbols, local); + add_local_symbol_to_current_scope(parser, local); nvars++; } checknext(ls, TK_IN); @@ -1156,6 +1166,8 @@ static struct ast_node *parse_local_function_statement(struct parser_state *pars LexState *ls = parser->ls; struct lua_symbol *symbol = new_local_symbol(parser, check_name_and_next(ls), RAVI_TFUNCTION, NULL); /* new local variable */ + /* local function f ... is parsed as local f; f = function ... */ + add_local_symbol_to_current_scope(parser, symbol); struct ast_node *function_ast = new_function(parser); parse_function_body(parser, function_ast, 0, ls->linenumber); /* function created in next register */ end_function(parser); @@ -1190,6 +1202,10 @@ static struct ast_node *parse_local_statement(struct parser_state *parser) { /* nexps = 0; */ ; } + /* local symbols are only added to scope at the end of the local statement */ + struct lua_symbol *sym = NULL; + FOR_EACH_PTR(node->local_stmt.var_list, sym) { add_local_symbol_to_current_scope(parser, sym); } + END_FOR_EACH_PTR(sym); return node; } From 788cd0d92de2ee2bc2de50cc408c7fa330c20296 Mon Sep 17 00:00:00 2001 From: Dibyendu Majumdar Date: Sat, 28 Mar 2020 16:09:41 +0000 Subject: [PATCH 03/13] issue #169 Update MIR to latest version --- .gitignore | 11 + mir/CMakeLists.txt | 6 +- mir/MIR.md | 585 +++++++ mir/c2mir/aarch64/caarch64-code.c | 23 + mir/c2mir/aarch64/caarch64.h | 50 + mir/c2mir/aarch64/mirc-aarch64-linux.h | 93 ++ mir/c2mir/c2mir.c | 242 ++- mir/c2mir/ppc64/cppc64-code.c | 23 + mir/c2mir/ppc64/cppc64.h | 50 + mir/c2mir/ppc64/mirc-ppc64-linux.h | 95 ++ mir/c2mir/x86_64/mirc-x86_64-linux.h | 6 +- mir/mir-aarch64.c | 375 +++++ mir/mir-bitmap.h | 69 +- mir/mir-gen-aarch64.c | 2132 ++++++++++++++++++++++++ mir/mir-gen-ppc64.c | 1 + mir/mir-gen-stub.c | 95 ++ mir/mir-gen-x86_64.c | 155 +- mir/mir-gen.c | 2107 +++++++++++++++++------ mir/mir-gen.h | 17 +- mir/mir-htab.h | 282 ++-- mir/mir-interp.c | 66 +- mir/mir-ppc64.c | 467 ++++++ mir/mir-x86_64.c | 27 +- mir/mir.c | 365 ++-- mir/mir.h | 22 +- src/ravi_mirjit.c | 1 + 26 files changed, 6358 insertions(+), 1007 deletions(-) create mode 100644 mir/MIR.md create mode 100644 mir/c2mir/aarch64/caarch64-code.c create mode 100644 mir/c2mir/aarch64/caarch64.h create mode 100644 mir/c2mir/aarch64/mirc-aarch64-linux.h create mode 100644 mir/c2mir/ppc64/cppc64-code.c create mode 100644 mir/c2mir/ppc64/cppc64.h create mode 100644 mir/c2mir/ppc64/mirc-ppc64-linux.h create mode 100644 mir/mir-aarch64.c create mode 100644 mir/mir-gen-aarch64.c create mode 100644 mir/mir-gen-ppc64.c create mode 100644 mir/mir-gen-stub.c create mode 100644 mir/mir-ppc64.c diff --git a/.gitignore b/.gitignore index 951d953..0182915 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,14 @@ CMakeScripts cmake_install.cmake install_manifest.txt CTestTestfile.cmake +build +buildmir +omrjit +buildllvm +.vscode +.idea +cmake-build-debug +cmake-build-release +buildnojit +nojit +nojita diff --git a/mir/CMakeLists.txt b/mir/CMakeLists.txt index e3790a5..3c48e0b 100644 --- a/mir/CMakeLists.txt +++ b/mir/CMakeLists.txt @@ -6,7 +6,11 @@ message(STATUS "OS type is ${CMAKE_SYSTEM_NAME}") message(STATUS "System processor is ${CMAKE_HOST_SYSTEM_PROCESSOR}") message(STATUS "Build type is ${CMAKE_BUILD_TYPE}") -set(TARGET x86_64) +if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + set(TARGET x86_64) +else() + message(FATAL "Unsupported platform") +endif() set(MIR_HEADERS mir.h diff --git a/mir/MIR.md b/mir/MIR.md new file mode 100644 index 0000000..dc9d551 --- /dev/null +++ b/mir/MIR.md @@ -0,0 +1,585 @@ +# Medium Intermediate Representation (file mir.h) + * This document describes MIR itself, API for its creation, and MIR textual representation + * MIR textual representation is assembler like. Each directive or insn should be put on a separate line + * In MIR textual syntax we use + * `[]` for optional construction + * `{}` for repeating zero or more times + * `<>` for some informal construction description or construction already described or will be described + +## MIR context + * MIR API code has an implicit state called by MIR context + * MIR context is represented by data of `MIR_context_t` + * MIR context is created by function `MIR_context_t MIR_init (void)` + * Every MIR API function (except for `MIR_init`) requires MIR context passed through the first argument of type `MIR_context_t` + * You can use MIR functions in different threads without any synchronization + if they work with different contexts in each thread + +## MIR program + * MIR program consists of MIR **modules** + * To start work with MIR program, you should first call API function `MIR_init` + * API function `MIR_finish (MIR_context_t ctx)` should be called last. It frees all internal data used to work with MIR program and all IR (insns, functions, items, and modules) created in this context + * API function `MIR_output (MIR_context_t ctx, FILE *f)` outputs MIR textual representation of the program into given file + * API function `MIR_scan_string (MIR_context_t ctx, const char *str)` reads textual MIR representation given by a string + * API functions `MIR_write (MIR_context_t ctx, FILE *f)` and + `MIR_read (MIR_context_t ctx, FILE *f)` outputs and reads + **binary MIR representation** to/from given file. There are also + functions `MIR_write_with_func (MIR_context_t ctx, const int + (*writer_func) (MIR_context_t, uint8_t))` and `MIR_read_with_func + (MIR_context_t ctx, const int (*reader_func) (MIR_context_t))` to + output and read **binary MIR representation** through a function + given as an argument. The reader function should return EOF as + the end of the binary MIR representation, the writer function + should be return the number of successfully output bytes + * Binary MIR representation much more compact and faster to read than textual one + +## MIR data type + * MIR program works with the following **data types**: + * `MIR_T_I8` and `MIR_T_U8` -- signed and unsigned 8-bit integer values + * `MIR_T_I16` and `MIR_T_U16` -- signed and unsigned 16-bit integer values + * `MIR_T_I32` and `MIR_T_U32` -- signed and unsigned 32-bit integer values + * `MIR_T_I64` and `MIR_T_U64` -- signed and unsigned 64-bit integer values + * ??? signed and unsigned 64-bit integer types in most cases + are interchangeable as insns themselves decide how to treat + their value + * `MIR_T_F` and `MIR_T_D` -- IEEE single and double precision floating point values + * `MIR_T_LD` - long double values. It is machine-dependent and can be IEEE double, x86 80-bit FP, + or IEEE quad precision FP values + * `MIR_T_P` -- pointer values. Depending on the target pointer value is actually 32-bit or 64-bit integer value + * MIR textual representation of the types are correspondingly `i8`, + `u8`, `i16`, `u16`, `i32`, `u32`, `i64`, `u64`, `f`, `d`, `p`, + and `v` + * Function `int MIR_int_type_p (MIR_type_t t)` returns TRUE if given type is an integer one (it includes pointer type too) + * Function `int MIR_fp_type_p (MIR_type_t t)` returns TRUE if given type is a floating point type + +## MIR module + * Module is a high level entity of MIR program + * Module is created through API function `MIR_module_t MIR_new_module (const char *name)` + * Module creation is finished by calling API function `MIR_finish_module` + * You can create only one module at any given time + * List of all created modules can be gotten by function `DLIST (MIR_module_t) *MIR_get_module_list (MIR_context_t ctx)` + * MIR module consists of **items**. There are following **item types** (and function for their creation): + * **Function**: `MIR_func_item` + * **Import**: `MIR_import_item` (`MIR_item_t MIR_new_import (MIR_context_t ctx, const char *name)`) + * **Export**: `MIR_export_item` (`MIR_item_t MIR_new_export (MIR_context_t ctx, const char *name)`) + * **Forward declaration**: `MIR_forward_item` (`MIR_item_t MIR_new_forward (MIR_context_t ctx, const char *name)`) + * **Prototype**: `MIR_proto_item` (`MIR_new_proto_arr`, `MIR_new_proto`, `MIR_new_vararg_proto_arr`, + `MIR_new_vararg_proto` analogous to `MIR_new_func_arr`, `MIR_new_func`, `MIR_new_vararg_func_arr` and + `MIR_new_vararg_func` -- see below). The only difference is that + two or more prototype argument names can be the same + * **Data**: `MIR_data_item` with optional name + (`MIR_item_t MIR_new_data (MIR_context_t ctx, const char *name, MIR_type_t el_type, size_t nel, const void *els)` + or `MIR_item_t MIR_new_string_data (MIR_context_t ctx, const char *name, MIR_str_t str)`) + * **Reference data**: `MIR_ref_data_item` with optional name + (`MIR_item_t MIR_new_ref_data (MIR_context_t ctx, const char *name, MIR_item_t item, int64_t disp)` + * The address of the item after linking plus `disp` is used to initialize the data + * **Expression Data**: `MIR_expr_data_item` with optional name + (`MIR_item_t MIR_new_expr_data (MIR_context_t ctx, const char *name, MIR_item_func_item)`) + * Not all MIR functions can be used for expression data. The expression function should have + only one result, have no arguments, not use any call or any instruction with memory + * The expression function is called during linking and its result is used to initialize the data + * **Memory segment**: `MIR_bss_item` with optional name (`MIR_item_t MIR_new_bss (MIR_context_t ctx, const char *name, size_t len)`) + * Names of MIR functions, imports, and prototypes should be unique in a module + * API functions `MIR_output_item (MIR_context_t ctx, FILE *f, MIR_item_t item)` + and `MIR_output_module (MIR_context_t ctx, FILE *f, MIR_module_t module)` output item or module + textual representation into given file + * MIR text module syntax looks the following: +``` + : module + {} + endmodule +``` + +## MIR function + * Function is an module item + * Function has a **frame**, a stack memory reserved for each function invocation + * Function has **local variables** (sometimes called **registers**), a part of which are **arguments** + * A variable should have an unique name in the function + * A variable is represented by a structure of type `MIR_var_t` + * The structure contains variable name and its type + * MIR function with its arguments is created through API function `MIR_item_t MIR_new_func (MIR_context_t ctx, const + char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)` + or function `MIR_item_t MIR_new_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)` + * Argument variables can be any type + * This type only denotes how the argument value is passed + * Any integer type argument variable has actually type `MIR_T_I64` + * MIR functions with variable number of arguments are created through API functions + `MIR_item_t MIR_new_vararg_func (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)` + or function `MIR_item_t MIR_new_vararg_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)` + * `nargs` and `arg_vars` define only fixed arguments + * MIR functions can have more one result but possible number of results + and combination of their types are machine-defined. For example, for x86-64 + the function can have upto six results and return two integer + values, two float or double values, and two long double values + in any combination + * MIR function creation is finished by calling API function `MIR_finish_func (MIR_context_t ctx)` + * You can create only one MIR function at any given time + * MIR text function syntax looks the following (arg-var always has a name besides type): +``` + : func {, } [ arg-var {, } [, ...]] + {} + endfun +``` + * Non-argument function variables are created through API function + `MIR_reg_t MIR_new_func_reg (MIR_context_t ctx, MIR_func_t func, MIR_type_t type, const char *name)` + * The only permitted integer type for the variable is `MIR_T_I64` (or MIR_T_U64???) + * Names in form `t` can not be used as they are fixed for internal purposes + * You can create function variables even after finishing the + function creation. This can be used to modify function insns, + e.g. for optimizations + * Non-argument variable declaration syntax in MIR textual representation looks the following: +``` + local [ : {, :} ] +``` + * In MIR textual representation variable should be defined through `local` before its use + +## MIR insn operands + * MIR insns work with operands + * There are following operands: + * Signed or unsigned **64-bit integer value operands** created through API functions + `MIR_op_t MIR_new_int_op (MIR_context_t ctx, int64_t v)` and `MIR_op_t MIR_new_uint_op (MIR_context_t ctx, uint64_t v)` + * In MIR text they are represented the same way as C integer numbers (e.g. octal, decimal, hexadecimal ones) + * **Float, double or long double value operands** created through API functions `MIR_op_t MIR_new_float_op (MIR_context_t ctx, float v)`, + `MIR_op_t MIR_new_double_op (MIR_context_t ctx, double v)`, and `MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double v)` + * In MIR text they are represented the same way as C floating point numbers + * **String operands** created through API functions `MIR_op_t MIR_new_str_op (MIR_context_t ctx, MIR_str_t str)` + * In MIR text they are represented by `typedef struct MIR_str {size_t len; const char *s;} MIR_str_t` + * Strings for each operand are put into memory (which can be modified) and the memory address actually presents the string + * **Label operand** created through API function `MIR_op_t MIR_new_label_op (MIR_context_t ctx, MIR_label_t label)` + * Here `label` is a special insn created by API function `MIR_insn_t MIR_new_label (MIR_context_t ctx)` + * In MIR text, they are represented by unique label name + * **Reference operands** created through API function `MIR_op_t MIR_new_ref_op (MIR_context_t ctx, MIR_item_t item)` + * In MIR text, they are represented by the corresponding item name + * **Register (variable) operands** created through API function `MIR_op_t MIR_new_reg_op (MIR_context_t ctx, MIR_reg_t reg)` + * In MIR text they are represented by the corresponding variable name + * Value of type `MIR_reg_t` is returned by function `MIR_new_func_reg` + or can be gotten by function `MIR_reg_t MIR_reg (MIR_context_t ctx, const char *reg_name, MIR_func_t func)`, e.g. for argument-variables + * **Memory operands** consists of type, displacement, base + register, index register and index scale. Memory operand is + created through API function `MIR_op_t MIR_new_mem_op (MIR_context_t ctx, MIR_type_t type, + MIR_disp_t disp, MIR_reg_t base, MIR_reg_t index, MIR_scale_t + scale)` + * The arguments define address of memory as `disp + base + index * scale` + * Integer type input memory is transformed to 64-bit integer value with sign or zero extension + depending on signedness of the type + * result 64-bit integer value is truncated to integer memory type + * Memory operand has the following syntax in MIR text (absent displacement means zero one, + absent scale means one, scale should be 1, 2, 4, or 8): + +``` + : + : [] ( [, [, ]]) +``` + * API function `MIR_output_op (MIR_context_t ctx, FILE *f, MIR_op_t op, MIR_func_t func)` outputs the operand + textual representation into given file + + +## MIR insns + * All MIR insns (but call or ret one) expects fixed number of operands + * Most MIR insns are 3-operand insns: two inputs and one output + * In majority cases **the first insn operand** describes where the insn result (if any) will be placed + * Only register or memory operand can be insn output (result) operand + * MIR insn can be created through API functions `MIR_insn_t MIR_new_insn (MIR_context_t ctx, MIR_insn_code_t code, ...)` + and `MIR_insn_t MIR_new_insn_arr (MIR_context_t ctx, MIR_insn_code_t code, size_t nops, MIR_op_t *ops)` + * Number of operands and their types should be what is expected by the insn being created + * You can not use `MIR_new_insn` for the creation of call and ret insns as these insns have a variable number of operands. + To create such insns you should use `MIR_new_insn_arr` or special functions + `MIR_insn_t MIR_new_call_insn (MIR_context_t ctx, size_t nops, ...)` and `MIR_insn_t MIR_new_ret_insn (MIR_context_t ctx, size_t nops, ...)` + * You can get insn name and number of insn operands through API functions + `const char *MIR_insn_name (MIR_context_t ctx, MIR_insn_code_t code)` and `size_t MIR_insn_nops (MIR_context_t ctx, MIR_insn_t insn)` + * You can add a created insn at the beginning or end of function insn list through API functions + `MIR_prepend_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` and `MIR_append_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` + * You can insert a created insn in the middle of function insn list through API functions + `MIR_insert_insn_after (MIR_context_t ctx, MIR_item_t func, MIR_insn_t after, MIR_insn_t insn)` and + `MIR_insert_insn_before (MIR_context_t ctx, MIR_item_t func, MIR_insn_t before, MIR_insn_t insn)` + * The insn `after` and `before` should be already in the list + * You can remove insn from the function list through API function `MIR_remove_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` + * The insn should be not inserted in the list if it is already there + * The insn should be not removed form the list if it is not there + * API function `MIR_output_insn (MIR_context_t ctx, FILE *f, MIR_insn_t insn, MIR_func_t func, int newline_p)` outputs the insn + textual representation into given file with a newline at the end depending on value of `newline_p` + * Insn has the following syntax in MIR text: +``` + {