diff --git a/.gitignore b/.gitignore index 951d953..0182915 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,14 @@ CMakeScripts cmake_install.cmake install_manifest.txt CTestTestfile.cmake +build +buildmir +omrjit +buildllvm +.vscode +.idea +cmake-build-debug +cmake-build-release +buildnojit +nojit +nojita diff --git a/.travis.yml b/.travis.yml index 341281e..543d229 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ addons: - gcc - ccache install: - - curl http://releases.llvm.org/6.0.1/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJf - + - curl https://releases.llvm.org/6.0.1/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJf - script: - mkdir $TRAVIS_BUILD_DIR/build - cd $TRAVIS_BUILD_DIR/build && cmake -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=$TRAVIS_BUILD_DIR/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04/lib/cmake/llvm -G "Unix Makefiles" -DLLVM_JIT=ON .. diff --git a/CMakeLists.txt b/CMakeLists.txt index 8abfbfd..da66e92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,6 +108,21 @@ if (COMPUTED_GOTO AND NOT MSVC) endif () endif () +include(CheckCCompilerFlag) +check_c_compiler_flag("-march=native" COMPILER_OPT_ARCH_NATIVE_SUPPORTED) +if (COMPILER_OPT_ARCH_NATIVE_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-march=") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native") +endif() +check_c_compiler_flag("-fno-common" COMPILER_OPT_NO_COMMON_SUPPORTED) +if (COMPILER_OPT_NO_COMMON_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-fno-common") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common") +endif() +check_c_compiler_flag("-fno-stack-protector" COMPILER_OPT_NO_STACK_PROTECTOR_SUPPORTED) +if (COMPILER_OPT_NO_STACK_PROTECTOR_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-fno-stack-protector") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-stack-protector") +endif() + + set(EMBEDDED_DMRC ON) if (LLVM_JIT) find_package(LLVM REQUIRED CONFIG) diff --git a/README.rst b/README.rst index 72325a6..009ee93 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ Ravi Programming Language :target: https://travis-ci.org/dibyendumajumdar/ravi Ravi is a derivative/dialect of `Lua 5.3 `_ with limited optional static typing and -features `LLVM `_ and `Eclipse OMR `_ +features `MIR `_, `LLVM `_ and `Eclipse OMR `_ powered JIT compilers. The name Ravi comes from the Sanskrit word for the Sun. Interestingly a precursor to Lua was `Sol `_ which had support for static types; Sol means the Sun in Portugese. @@ -33,10 +33,9 @@ Features * Optional static typing - for details `see the reference manual `_. * Type specific bytecodes to improve performance * Compatibility with Lua 5.3 (see Compatibility section below) +* New! JIT backend `MIR `_; only Linux and x86-64 supported for now. * `LLVM `_ powered JIT compiler * `Eclipse OMR `_ powered JIT compiler -* New (wip) small JIT backend based on `MIR `_; only Linux and x86-64 supported. -* Built-in C pre-processor, parser and JIT compiler * A `distribution with batteries `_. Documentation @@ -102,7 +101,7 @@ History - New JIT backend `MIR `_. * 2020 (Plan) - - New optimizing byte code generator based on new parser / type checker + - `New optimizing byte code generator based on new parser / type checker `_ - Ravi 1.0 release License diff --git a/include/lfunc.h b/include/lfunc.h index 7d42b79..8910e5c 100644 --- a/include/lfunc.h +++ b/include/lfunc.h @@ -35,8 +35,12 @@ */ struct UpVal { TValue *v; /* points to stack or to its own value */ +#ifdef RAVI_DEFER_STATEMENT unsigned int refcount; /* reference counter */ unsigned int flags; /* Used to mark deferred values */ +#else + lu_mem refcount; /* reference counter */ +#endif union { struct { /* (when open) */ UpVal *next; /* linked list */ @@ -54,7 +58,11 @@ LUAI_FUNC CClosure *luaF_newCclosure (lua_State *L, int nelems); LUAI_FUNC LClosure *luaF_newLclosure (lua_State *L, int nelems); LUAI_FUNC void luaF_initupvals (lua_State *L, LClosure *cl); LUAI_FUNC UpVal *luaF_findupval (lua_State *L, StkId level); +#ifdef RAVI_DEFER_STATEMENT LUAI_FUNC int luaF_close (lua_State *L, StkId level, int status); +#else +LUAI_FUNC void luaF_close (lua_State *L, StkId level); +#endif LUAI_FUNC void luaF_freeproto (lua_State *L, Proto *f); /* The additional type argument is a Ravi extension */ LUAI_FUNC const char *luaF_getlocalname (const Proto *func, int local_number, diff --git a/include/llex.h b/include/llex.h index 4dc619f..6b66fa9 100644 --- a/include/llex.h +++ b/include/llex.h @@ -27,7 +27,11 @@ enum RESERVED { /* terminal symbols denoted by reserved words */ TK_AND = FIRST_RESERVED, TK_BREAK, TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION, +#ifdef RAVI_DEFER_STATEMENT TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_DEFER, TK_NIL, TK_NOT, TK_OR, TK_REPEAT, +#else + TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT, +#endif TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE, /* other terminal symbols */ TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, diff --git a/include/lopcodes.h b/include/lopcodes.h index 3bc624f..e5381eb 100644 --- a/include/lopcodes.h +++ b/include/lopcodes.h @@ -280,13 +280,16 @@ OP_RAVI_GETFIELD, /* A B C R(A) := R(B)[RK(C)], string key */ OP_RAVI_SELF_SK, /* A B C R(A+1) := R(B); R(A) := R(B)[RK(C)], string key */ OP_RAVI_SETFIELD, /* A B C R(A)[RK(B)] := RK(C), string key */ OP_RAVI_GETTABUP_SK, /* A B C R(A) := UpValue[B][RK(C)], string key */ - +#ifdef RAVI_DEFER_STATEMENT OP_RAVI_DEFER, /* A mark variable A "deferred" */ - +#endif } OpCode; - +#ifdef RAVI_DEFER_STATEMENT #define NUM_OPCODES (cast(int, OP_RAVI_DEFER) + 1) +#else +#define NUM_OPCODES (cast(int, OP_RAVI_GETTABUP_SK) + 1) +#endif /*=========================================================================== Notes: diff --git a/include/luaconf.h b/include/luaconf.h index c73e1cb..5dd6d23 100644 --- a/include/luaconf.h +++ b/include/luaconf.h @@ -834,6 +834,8 @@ /* If following is defined as true then LLVM instructions emitted for arithmetic ops priority floating point ops, else default is to prioritise integer ops */ #define RAVI_USE_LLVM_ARITH_FLOATPRIORITY 1 +/* Enables the 'defer' statement - RAVI extension */ +#define RAVI_DEFER_STATEMENT #endif diff --git a/include/lvm.h b/include/lvm.h index 924db5d..2d378b0 100644 --- a/include/lvm.h +++ b/include/lvm.h @@ -161,7 +161,9 @@ LUAI_FUNC void raviV_gettable_i(lua_State *L, const TValue *t, TValue *key, StkI LUAI_FUNC void raviV_settable_i(lua_State *L, const TValue *t, TValue *key, StkId val); LUAI_FUNC void raviV_op_totype(lua_State *L, TValue *ra, TValue *rb); LUAI_FUNC int raviV_check_usertype(lua_State *L, TString *name, const TValue *o); +#ifdef RAVI_DEFER_STATEMENT LUAI_FUNC void raviV_op_defer(lua_State *L, TValue *ra); +#endif LUAI_FUNC void raviV_debug_trace(lua_State *L, int opCode, int pc); #endif diff --git a/include/ravi_llvmcodegen.h b/include/ravi_llvmcodegen.h index 7d4c7fe..c49c569 100644 --- a/include/ravi_llvmcodegen.h +++ b/include/ravi_llvmcodegen.h @@ -288,7 +288,9 @@ struct LuaLLVMTypes { llvm::FunctionType *raviV_gettable_iT; llvm::FunctionType *raviV_settable_iT; llvm::FunctionType *raviV_op_totypeT; +#ifdef RAVI_DEFER_STATEMENT llvm::FunctionType *raviV_op_deferT; +#endif llvm::FunctionType *raviH_set_intT; llvm::FunctionType *raviH_set_floatT; @@ -830,7 +832,9 @@ struct RaviFunctionDef { llvm::Function *raviV_gettable_iF; llvm::Function *raviV_settable_iF; llvm::Function *raviV_op_totypeF; +#ifdef RAVI_DEFER_STATEMENT llvm::Function *raviV_op_deferF; +#endif // array setters llvm::Function *raviH_set_intF; @@ -1373,7 +1377,9 @@ class RaviCodeGenerator { void emit_BNOT(RaviFunctionDef *def, int A, int B, int pc); +#ifdef RAVI_DEFER_STATEMENT void emit_DEFER(RaviFunctionDef *def, int A, int pc); +#endif void emit_bitwise_shiftl(RaviFunctionDef *def, llvm::Value *ra, int B, lua_Integer y); diff --git a/mir/CMakeLists.txt b/mir/CMakeLists.txt index e3790a5..3c48e0b 100644 --- a/mir/CMakeLists.txt +++ b/mir/CMakeLists.txt @@ -6,7 +6,11 @@ message(STATUS "OS type is ${CMAKE_SYSTEM_NAME}") message(STATUS "System processor is ${CMAKE_HOST_SYSTEM_PROCESSOR}") message(STATUS "Build type is ${CMAKE_BUILD_TYPE}") -set(TARGET x86_64) +if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + set(TARGET x86_64) +else() + message(FATAL "Unsupported platform") +endif() set(MIR_HEADERS mir.h diff --git a/mir/MIR.md b/mir/MIR.md new file mode 100644 index 0000000..dc9d551 --- /dev/null +++ b/mir/MIR.md @@ -0,0 +1,585 @@ +# Medium Intermediate Representation (file mir.h) + * This document describes MIR itself, API for its creation, and MIR textual representation + * MIR textual representation is assembler like. Each directive or insn should be put on a separate line + * In MIR textual syntax we use + * `[]` for optional construction + * `{}` for repeating zero or more times + * `<>` for some informal construction description or construction already described or will be described + +## MIR context + * MIR API code has an implicit state called by MIR context + * MIR context is represented by data of `MIR_context_t` + * MIR context is created by function `MIR_context_t MIR_init (void)` + * Every MIR API function (except for `MIR_init`) requires MIR context passed through the first argument of type `MIR_context_t` + * You can use MIR functions in different threads without any synchronization + if they work with different contexts in each thread + +## MIR program + * MIR program consists of MIR **modules** + * To start work with MIR program, you should first call API function `MIR_init` + * API function `MIR_finish (MIR_context_t ctx)` should be called last. It frees all internal data used to work with MIR program and all IR (insns, functions, items, and modules) created in this context + * API function `MIR_output (MIR_context_t ctx, FILE *f)` outputs MIR textual representation of the program into given file + * API function `MIR_scan_string (MIR_context_t ctx, const char *str)` reads textual MIR representation given by a string + * API functions `MIR_write (MIR_context_t ctx, FILE *f)` and + `MIR_read (MIR_context_t ctx, FILE *f)` outputs and reads + **binary MIR representation** to/from given file. There are also + functions `MIR_write_with_func (MIR_context_t ctx, const int + (*writer_func) (MIR_context_t, uint8_t))` and `MIR_read_with_func + (MIR_context_t ctx, const int (*reader_func) (MIR_context_t))` to + output and read **binary MIR representation** through a function + given as an argument. The reader function should return EOF as + the end of the binary MIR representation, the writer function + should be return the number of successfully output bytes + * Binary MIR representation much more compact and faster to read than textual one + +## MIR data type + * MIR program works with the following **data types**: + * `MIR_T_I8` and `MIR_T_U8` -- signed and unsigned 8-bit integer values + * `MIR_T_I16` and `MIR_T_U16` -- signed and unsigned 16-bit integer values + * `MIR_T_I32` and `MIR_T_U32` -- signed and unsigned 32-bit integer values + * `MIR_T_I64` and `MIR_T_U64` -- signed and unsigned 64-bit integer values + * ??? signed and unsigned 64-bit integer types in most cases + are interchangeable as insns themselves decide how to treat + their value + * `MIR_T_F` and `MIR_T_D` -- IEEE single and double precision floating point values + * `MIR_T_LD` - long double values. It is machine-dependent and can be IEEE double, x86 80-bit FP, + or IEEE quad precision FP values + * `MIR_T_P` -- pointer values. Depending on the target pointer value is actually 32-bit or 64-bit integer value + * MIR textual representation of the types are correspondingly `i8`, + `u8`, `i16`, `u16`, `i32`, `u32`, `i64`, `u64`, `f`, `d`, `p`, + and `v` + * Function `int MIR_int_type_p (MIR_type_t t)` returns TRUE if given type is an integer one (it includes pointer type too) + * Function `int MIR_fp_type_p (MIR_type_t t)` returns TRUE if given type is a floating point type + +## MIR module + * Module is a high level entity of MIR program + * Module is created through API function `MIR_module_t MIR_new_module (const char *name)` + * Module creation is finished by calling API function `MIR_finish_module` + * You can create only one module at any given time + * List of all created modules can be gotten by function `DLIST (MIR_module_t) *MIR_get_module_list (MIR_context_t ctx)` + * MIR module consists of **items**. There are following **item types** (and function for their creation): + * **Function**: `MIR_func_item` + * **Import**: `MIR_import_item` (`MIR_item_t MIR_new_import (MIR_context_t ctx, const char *name)`) + * **Export**: `MIR_export_item` (`MIR_item_t MIR_new_export (MIR_context_t ctx, const char *name)`) + * **Forward declaration**: `MIR_forward_item` (`MIR_item_t MIR_new_forward (MIR_context_t ctx, const char *name)`) + * **Prototype**: `MIR_proto_item` (`MIR_new_proto_arr`, `MIR_new_proto`, `MIR_new_vararg_proto_arr`, + `MIR_new_vararg_proto` analogous to `MIR_new_func_arr`, `MIR_new_func`, `MIR_new_vararg_func_arr` and + `MIR_new_vararg_func` -- see below). The only difference is that + two or more prototype argument names can be the same + * **Data**: `MIR_data_item` with optional name + (`MIR_item_t MIR_new_data (MIR_context_t ctx, const char *name, MIR_type_t el_type, size_t nel, const void *els)` + or `MIR_item_t MIR_new_string_data (MIR_context_t ctx, const char *name, MIR_str_t str)`) + * **Reference data**: `MIR_ref_data_item` with optional name + (`MIR_item_t MIR_new_ref_data (MIR_context_t ctx, const char *name, MIR_item_t item, int64_t disp)` + * The address of the item after linking plus `disp` is used to initialize the data + * **Expression Data**: `MIR_expr_data_item` with optional name + (`MIR_item_t MIR_new_expr_data (MIR_context_t ctx, const char *name, MIR_item_func_item)`) + * Not all MIR functions can be used for expression data. The expression function should have + only one result, have no arguments, not use any call or any instruction with memory + * The expression function is called during linking and its result is used to initialize the data + * **Memory segment**: `MIR_bss_item` with optional name (`MIR_item_t MIR_new_bss (MIR_context_t ctx, const char *name, size_t len)`) + * Names of MIR functions, imports, and prototypes should be unique in a module + * API functions `MIR_output_item (MIR_context_t ctx, FILE *f, MIR_item_t item)` + and `MIR_output_module (MIR_context_t ctx, FILE *f, MIR_module_t module)` output item or module + textual representation into given file + * MIR text module syntax looks the following: +``` + : module + {} + endmodule +``` + +## MIR function + * Function is an module item + * Function has a **frame**, a stack memory reserved for each function invocation + * Function has **local variables** (sometimes called **registers**), a part of which are **arguments** + * A variable should have an unique name in the function + * A variable is represented by a structure of type `MIR_var_t` + * The structure contains variable name and its type + * MIR function with its arguments is created through API function `MIR_item_t MIR_new_func (MIR_context_t ctx, const + char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)` + or function `MIR_item_t MIR_new_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)` + * Argument variables can be any type + * This type only denotes how the argument value is passed + * Any integer type argument variable has actually type `MIR_T_I64` + * MIR functions with variable number of arguments are created through API functions + `MIR_item_t MIR_new_vararg_func (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)` + or function `MIR_item_t MIR_new_vararg_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)` + * `nargs` and `arg_vars` define only fixed arguments + * MIR functions can have more one result but possible number of results + and combination of their types are machine-defined. For example, for x86-64 + the function can have upto six results and return two integer + values, two float or double values, and two long double values + in any combination + * MIR function creation is finished by calling API function `MIR_finish_func (MIR_context_t ctx)` + * You can create only one MIR function at any given time + * MIR text function syntax looks the following (arg-var always has a name besides type): +``` + : func {, } [ arg-var {, } [, ...]] + {} + endfun +``` + * Non-argument function variables are created through API function + `MIR_reg_t MIR_new_func_reg (MIR_context_t ctx, MIR_func_t func, MIR_type_t type, const char *name)` + * The only permitted integer type for the variable is `MIR_T_I64` (or MIR_T_U64???) + * Names in form `t` can not be used as they are fixed for internal purposes + * You can create function variables even after finishing the + function creation. This can be used to modify function insns, + e.g. for optimizations + * Non-argument variable declaration syntax in MIR textual representation looks the following: +``` + local [ : {, :} ] +``` + * In MIR textual representation variable should be defined through `local` before its use + +## MIR insn operands + * MIR insns work with operands + * There are following operands: + * Signed or unsigned **64-bit integer value operands** created through API functions + `MIR_op_t MIR_new_int_op (MIR_context_t ctx, int64_t v)` and `MIR_op_t MIR_new_uint_op (MIR_context_t ctx, uint64_t v)` + * In MIR text they are represented the same way as C integer numbers (e.g. octal, decimal, hexadecimal ones) + * **Float, double or long double value operands** created through API functions `MIR_op_t MIR_new_float_op (MIR_context_t ctx, float v)`, + `MIR_op_t MIR_new_double_op (MIR_context_t ctx, double v)`, and `MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double v)` + * In MIR text they are represented the same way as C floating point numbers + * **String operands** created through API functions `MIR_op_t MIR_new_str_op (MIR_context_t ctx, MIR_str_t str)` + * In MIR text they are represented by `typedef struct MIR_str {size_t len; const char *s;} MIR_str_t` + * Strings for each operand are put into memory (which can be modified) and the memory address actually presents the string + * **Label operand** created through API function `MIR_op_t MIR_new_label_op (MIR_context_t ctx, MIR_label_t label)` + * Here `label` is a special insn created by API function `MIR_insn_t MIR_new_label (MIR_context_t ctx)` + * In MIR text, they are represented by unique label name + * **Reference operands** created through API function `MIR_op_t MIR_new_ref_op (MIR_context_t ctx, MIR_item_t item)` + * In MIR text, they are represented by the corresponding item name + * **Register (variable) operands** created through API function `MIR_op_t MIR_new_reg_op (MIR_context_t ctx, MIR_reg_t reg)` + * In MIR text they are represented by the corresponding variable name + * Value of type `MIR_reg_t` is returned by function `MIR_new_func_reg` + or can be gotten by function `MIR_reg_t MIR_reg (MIR_context_t ctx, const char *reg_name, MIR_func_t func)`, e.g. for argument-variables + * **Memory operands** consists of type, displacement, base + register, index register and index scale. Memory operand is + created through API function `MIR_op_t MIR_new_mem_op (MIR_context_t ctx, MIR_type_t type, + MIR_disp_t disp, MIR_reg_t base, MIR_reg_t index, MIR_scale_t + scale)` + * The arguments define address of memory as `disp + base + index * scale` + * Integer type input memory is transformed to 64-bit integer value with sign or zero extension + depending on signedness of the type + * result 64-bit integer value is truncated to integer memory type + * Memory operand has the following syntax in MIR text (absent displacement means zero one, + absent scale means one, scale should be 1, 2, 4, or 8): + +``` + : + : [] ( [, [, ]]) +``` + * API function `MIR_output_op (MIR_context_t ctx, FILE *f, MIR_op_t op, MIR_func_t func)` outputs the operand + textual representation into given file + + +## MIR insns + * All MIR insns (but call or ret one) expects fixed number of operands + * Most MIR insns are 3-operand insns: two inputs and one output + * In majority cases **the first insn operand** describes where the insn result (if any) will be placed + * Only register or memory operand can be insn output (result) operand + * MIR insn can be created through API functions `MIR_insn_t MIR_new_insn (MIR_context_t ctx, MIR_insn_code_t code, ...)` + and `MIR_insn_t MIR_new_insn_arr (MIR_context_t ctx, MIR_insn_code_t code, size_t nops, MIR_op_t *ops)` + * Number of operands and their types should be what is expected by the insn being created + * You can not use `MIR_new_insn` for the creation of call and ret insns as these insns have a variable number of operands. + To create such insns you should use `MIR_new_insn_arr` or special functions + `MIR_insn_t MIR_new_call_insn (MIR_context_t ctx, size_t nops, ...)` and `MIR_insn_t MIR_new_ret_insn (MIR_context_t ctx, size_t nops, ...)` + * You can get insn name and number of insn operands through API functions + `const char *MIR_insn_name (MIR_context_t ctx, MIR_insn_code_t code)` and `size_t MIR_insn_nops (MIR_context_t ctx, MIR_insn_t insn)` + * You can add a created insn at the beginning or end of function insn list through API functions + `MIR_prepend_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` and `MIR_append_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` + * You can insert a created insn in the middle of function insn list through API functions + `MIR_insert_insn_after (MIR_context_t ctx, MIR_item_t func, MIR_insn_t after, MIR_insn_t insn)` and + `MIR_insert_insn_before (MIR_context_t ctx, MIR_item_t func, MIR_insn_t before, MIR_insn_t insn)` + * The insn `after` and `before` should be already in the list + * You can remove insn from the function list through API function `MIR_remove_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` + * The insn should be not inserted in the list if it is already there + * The insn should be not removed form the list if it is not there + * API function `MIR_output_insn (MIR_context_t ctx, FILE *f, MIR_insn_t insn, MIR_func_t func, int newline_p)` outputs the insn + textual representation into given file with a newline at the end depending on value of `newline_p` + * Insn has the following syntax in MIR text: +``` + {