issue #98 save current state in parser branch

4 years ago · 93670a4bb9
parent 804c7a61a7 bfaf2a3cef
commit 93670a4bb9
54 changed files with 6764 additions and 1082 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,14 @@ CMakeScripts
 cmake_install.cmake
 install_manifest.txt
 CTestTestfile.cmake
+build
+buildmir
+omrjit
+buildllvm
+.vscode
+.idea
+cmake-build-debug
+cmake-build-release
+buildnojit
+nojit
+nojita
--- a/.travis.yml
+++ b/.travis.yml
@ -14,7 +14,7 @@ addons:
      - gcc
      - ccache
 install:
- - curl http://releases.llvm.org/6.0.1/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJf -
+ - curl https://releases.llvm.org/6.0.1/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJf -
 script:
 - mkdir $TRAVIS_BUILD_DIR/build
 - cd $TRAVIS_BUILD_DIR/build && cmake -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=$TRAVIS_BUILD_DIR/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04/lib/cmake/llvm -G "Unix Makefiles" -DLLVM_JIT=ON ..
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -108,6 +108,21 @@ if (COMPUTED_GOTO AND NOT MSVC)
    endif ()
 endif ()

+include(CheckCCompilerFlag)
+check_c_compiler_flag("-march=native" COMPILER_OPT_ARCH_NATIVE_SUPPORTED)
+if (COMPILER_OPT_ARCH_NATIVE_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-march=")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
+endif()
+check_c_compiler_flag("-fno-common" COMPILER_OPT_NO_COMMON_SUPPORTED)
+if (COMPILER_OPT_NO_COMMON_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-fno-common")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common")
+endif()
+check_c_compiler_flag("-fno-stack-protector" COMPILER_OPT_NO_STACK_PROTECTOR_SUPPORTED)
+if (COMPILER_OPT_NO_STACK_PROTECTOR_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-fno-stack-protector")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-stack-protector")
+endif()
+
+
 set(EMBEDDED_DMRC ON)
 if (LLVM_JIT)
    find_package(LLVM REQUIRED CONFIG)
--- a/README.rst
+++ b/README.rst
@ -5,7 +5,7 @@ Ravi Programming Language
    :target: https://travis-ci.org/dibyendumajumdar/ravi

 Ravi is a derivative/dialect of `Lua 5.3 <http://www.lua.org/>`_ with limited optional static typing and 
-features `LLVM <http://www.llvm.org/>`_ and `Eclipse OMR <https://github.com/dibyendumajumdar/nj>`_ 
+features `MIR <https://github.com/vnmakarov/mir>`_, `LLVM <http://www.llvm.org/>`_ and `Eclipse OMR <https://github.com/dibyendumajumdar/nj>`_ 
 powered JIT compilers. The name Ravi comes from the Sanskrit word for the Sun. 
 Interestingly a precursor to Lua was `Sol <http://www.lua.org/history.html>`_ which had support for 
 static types; Sol means the Sun in Portugese.
@ -33,10 +33,9 @@ Features
 * Optional static typing - for details `see the reference manual <https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-reference.html>`_.
 * Type specific bytecodes to improve performance
 * Compatibility with Lua 5.3 (see Compatibility section below)
+* New! JIT backend `MIR <https://github.com/vnmakarov/mir>`_; only Linux and x86-64 supported for now.
 * `LLVM <http://www.llvm.org/>`_ powered JIT compiler
 * `Eclipse OMR <https://github.com/dibyendumajumdar/nj>`_ powered JIT compiler
-* New (wip) small JIT backend based on `MIR <https://github.com/vnmakarov/mir>`_; only Linux and x86-64 supported.
-* Built-in C pre-processor, parser and JIT compiler
 * A `distribution with batteries <https://github.com/dibyendumajumdar/Suravi>`_.

 Documentation
@ -102,7 +101,7 @@ History
       - New JIT backend `MIR <https://github.com/vnmakarov/mir>`_. 

 * 2020 (Plan)
-       - New optimizing byte code generator based on new parser / type checker
+       - `New optimizing byte code generator based on new parser / type checker <https://github.com/dibyendumajumdar/ravi-compiler>`_
       - Ravi 1.0 release

 License
--- a/include/lfunc.h
+++ b/include/lfunc.h
@ -35,8 +35,12 @@
 */
 struct UpVal {
  TValue *v;  /* points to stack or to its own value */
+#ifdef RAVI_DEFER_STATEMENT
  unsigned int refcount;  /* reference counter */
  unsigned int flags; /* Used to mark deferred values */
+#else
+  lu_mem refcount;  /* reference counter */
+#endif
  union {
    struct {  /* (when open) */
      UpVal *next;  /* linked list */
@ -54,7 +58,11 @@ LUAI_FUNC CClosure *luaF_newCclosure (lua_State *L, int nelems);
 LUAI_FUNC LClosure *luaF_newLclosure (lua_State *L, int nelems);
 LUAI_FUNC void luaF_initupvals (lua_State *L, LClosure *cl);
 LUAI_FUNC UpVal *luaF_findupval (lua_State *L, StkId level);
+#ifdef RAVI_DEFER_STATEMENT
 LUAI_FUNC int luaF_close (lua_State *L, StkId level, int status);
+#else
+LUAI_FUNC void luaF_close (lua_State *L, StkId level);
+#endif
 LUAI_FUNC void luaF_freeproto (lua_State *L, Proto *f);
 /* The additional type argument is a Ravi extension */
 LUAI_FUNC const char *luaF_getlocalname (const Proto *func, int local_number,
--- a/include/llex.h
+++ b/include/llex.h
@ -27,7 +27,11 @@ enum RESERVED {
  /* terminal symbols denoted by reserved words */
  TK_AND = FIRST_RESERVED, TK_BREAK,
  TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
+#ifdef RAVI_DEFER_STATEMENT
  TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_DEFER, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
+#else
+  TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
+#endif
  TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
  /* other terminal symbols */
  TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
--- a/include/lopcodes.h
+++ b/include/lopcodes.h
@ -280,13 +280,16 @@ OP_RAVI_GETFIELD,     /*	A B C	R(A) := R(B)[RK(C)], string key  */
 OP_RAVI_SELF_SK,      /*	A B C	R(A+1) := R(B); R(A) := R(B)[RK(C)], string key */
 OP_RAVI_SETFIELD,     /*	A B C	R(A)[RK(B)] := RK(C), string key */
 OP_RAVI_GETTABUP_SK,  /*	A B C	R(A) := UpValue[B][RK(C)], string key */
-
+#ifdef RAVI_DEFER_STATEMENT
 OP_RAVI_DEFER,        /*        A	mark variable A "deferred"	  */
-
+#endif
 } OpCode;

-
+#ifdef RAVI_DEFER_STATEMENT
 #define NUM_OPCODES	(cast(int, OP_RAVI_DEFER) + 1)
+#else
+#define NUM_OPCODES	(cast(int, OP_RAVI_GETTABUP_SK) + 1)
+#endif

 /*===========================================================================
  Notes:
--- a/include/luaconf.h
+++ b/include/luaconf.h
@ -834,6 +834,8 @@
 /* If following is defined as true then LLVM instructions emitted for arithmetic ops 
   priority floating point ops, else default is to prioritise integer ops */
 #define RAVI_USE_LLVM_ARITH_FLOATPRIORITY 1
+/* Enables the 'defer' statement - RAVI extension */
+#define RAVI_DEFER_STATEMENT

 #endif

--- a/include/lvm.h
+++ b/include/lvm.h
@ -161,7 +161,9 @@ LUAI_FUNC void raviV_gettable_i(lua_State *L, const TValue *t, TValue *key, StkI
 LUAI_FUNC void raviV_settable_i(lua_State *L, const TValue *t, TValue *key, StkId val);
 LUAI_FUNC void raviV_op_totype(lua_State *L, TValue *ra, TValue *rb);
 LUAI_FUNC int raviV_check_usertype(lua_State *L, TString *name, const TValue *o);
+#ifdef RAVI_DEFER_STATEMENT
 LUAI_FUNC void raviV_op_defer(lua_State *L, TValue *ra);
+#endif
 LUAI_FUNC void raviV_debug_trace(lua_State *L, int opCode, int pc);

 #endif
--- a/include/ravi_llvmcodegen.h
+++ b/include/ravi_llvmcodegen.h
@ -288,7 +288,9 @@ struct LuaLLVMTypes {
  llvm::FunctionType *raviV_gettable_iT;
  llvm::FunctionType *raviV_settable_iT;
  llvm::FunctionType *raviV_op_totypeT;
+#ifdef RAVI_DEFER_STATEMENT
  llvm::FunctionType *raviV_op_deferT;
+#endif

  llvm::FunctionType *raviH_set_intT;
  llvm::FunctionType *raviH_set_floatT;
@ -830,7 +832,9 @@ struct RaviFunctionDef {
  llvm::Function *raviV_gettable_iF;
  llvm::Function *raviV_settable_iF;
  llvm::Function *raviV_op_totypeF;
+#ifdef RAVI_DEFER_STATEMENT
  llvm::Function *raviV_op_deferF;
+#endif

  // array setters
  llvm::Function *raviH_set_intF;
@ -1373,7 +1377,9 @@ class RaviCodeGenerator {

  void emit_BNOT(RaviFunctionDef *def, int A, int B, int pc);

+#ifdef RAVI_DEFER_STATEMENT
  void emit_DEFER(RaviFunctionDef *def, int A, int pc);
+#endif

  void emit_bitwise_shiftl(RaviFunctionDef *def, llvm::Value *ra, int B, lua_Integer y);

--- a/mir/CMakeLists.txt
+++ b/mir/CMakeLists.txt
@ -6,7 +6,11 @@ message(STATUS "OS type is ${CMAKE_SYSTEM_NAME}")
 message(STATUS "System processor is ${CMAKE_HOST_SYSTEM_PROCESSOR}")
 message(STATUS "Build type is ${CMAKE_BUILD_TYPE}")

-set(TARGET x86_64)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
+    set(TARGET x86_64)
+else()
+    message(FATAL "Unsupported platform")
+endif()

 set(MIR_HEADERS 
    mir.h
--- a/mir/MIR.md
+++ b/mir/MIR.md
@ -0,0 +1,585 @@
+# Medium Intermediate Representation (file mir.h)
+  * This document describes MIR itself, API for its creation, and MIR textual representation
+  * MIR textual representation is assembler like.  Each directive or insn should be put on a separate line
+  * In MIR textual syntax we use
+    * `[]` for optional construction
+    * `{}` for repeating zero or more times
+    * `<>` for some informal construction description or construction already described or will be described
+  
+## MIR context
+  * MIR API code has an implicit state called by MIR context
+  * MIR context is represented by data of `MIR_context_t`
+  * MIR context is created by function `MIR_context_t MIR_init (void)`
+  * Every MIR API function (except for `MIR_init`) requires MIR context passed through the first argument of type `MIR_context_t`
+  * You can use MIR functions in different threads without any synchronization
+    if they work with different contexts in each thread
+
+## MIR program
+   * MIR program consists of MIR **modules**
+   * To start work with MIR program, you should first call API function `MIR_init`
+   * API function `MIR_finish (MIR_context_t ctx)` should be called last.  It frees all internal data used to work with MIR program and all IR (insns, functions, items, and modules) created in this context
+   * API function `MIR_output (MIR_context_t ctx, FILE *f)` outputs MIR textual representation of the program into given file
+   * API function `MIR_scan_string (MIR_context_t ctx, const char *str)` reads textual MIR representation given by a string
+   * API functions `MIR_write (MIR_context_t ctx, FILE *f)` and
+     `MIR_read (MIR_context_t ctx, FILE *f)` outputs and reads
+     **binary MIR representation** to/from given file.  There are also
+     functions `MIR_write_with_func (MIR_context_t ctx, const int
+     (*writer_func) (MIR_context_t, uint8_t))` and `MIR_read_with_func
+     (MIR_context_t ctx, const int (*reader_func) (MIR_context_t))` to
+     output and read **binary MIR representation** through a function
+     given as an argument.  The reader function should return EOF as
+     the end of the binary MIR representation, the writer function
+     should be return the number of successfully output bytes
+     * Binary MIR representation much more compact and faster to read than textual one
+
+## MIR data type
+   * MIR program works with the following **data types**:
+     * `MIR_T_I8` and `MIR_T_U8` -- signed and unsigned 8-bit integer values
+     * `MIR_T_I16` and `MIR_T_U16` -- signed and unsigned 16-bit integer values
+     * `MIR_T_I32` and `MIR_T_U32` -- signed and unsigned 32-bit integer values
+     * `MIR_T_I64` and `MIR_T_U64` -- signed and unsigned 64-bit integer values
+       * ??? signed and unsigned 64-bit integer types in most cases
+         are interchangeable as insns themselves decide how to treat
+         their value
+     * `MIR_T_F` and `MIR_T_D` -- IEEE single and double precision floating point values
+     * `MIR_T_LD` - long double values.  It is machine-dependent and can be IEEE double, x86 80-bit FP,
+       or IEEE quad precision FP values
+     * `MIR_T_P` -- pointer values.  Depending on the target pointer value is actually 32-bit or 64-bit integer value
+   * MIR textual representation of the types are correspondingly `i8`,
+     `u8`, `i16`, `u16`, `i32`, `u32`, `i64`, `u64`, `f`, `d`, `p`,
+     and `v`
+   * Function `int MIR_int_type_p (MIR_type_t t)` returns TRUE if given type is an integer one (it includes pointer type too)
+   * Function `int MIR_fp_type_p (MIR_type_t t)` returns TRUE if given type is a floating point type
+   
+## MIR module
+  * Module is a high level entity of MIR program
+  * Module is created through API function `MIR_module_t MIR_new_module (const char *name)`
+  * Module creation is finished by calling API function `MIR_finish_module`
+  * You can create only one module at any given time
+  * List of all created modules can be gotten by function `DLIST (MIR_module_t) *MIR_get_module_list (MIR_context_t ctx)`
+  * MIR module consists of **items**.  There are following **item types** (and function for their creation):
+    * **Function**: `MIR_func_item`
+    * **Import**: `MIR_import_item` (`MIR_item_t MIR_new_import (MIR_context_t ctx, const char *name)`)
+    * **Export**: `MIR_export_item` (`MIR_item_t MIR_new_export (MIR_context_t ctx, const char *name)`)
+    * **Forward declaration**: `MIR_forward_item` (`MIR_item_t MIR_new_forward (MIR_context_t ctx, const char *name)`)
+    * **Prototype**: `MIR_proto_item` (`MIR_new_proto_arr`, `MIR_new_proto`, `MIR_new_vararg_proto_arr`,
+      `MIR_new_vararg_proto` analogous to `MIR_new_func_arr`, `MIR_new_func`, `MIR_new_vararg_func_arr` and
+      `MIR_new_vararg_func` -- see below).  The only difference is that
+      two or more prototype argument names can be the same
+    * **Data**: `MIR_data_item` with optional name
+      (`MIR_item_t MIR_new_data (MIR_context_t ctx, const char *name, MIR_type_t el_type, size_t nel, const void *els)`
+       or `MIR_item_t MIR_new_string_data (MIR_context_t ctx, const char *name, MIR_str_t str)`)
+    * **Reference data**: `MIR_ref_data_item` with optional name
+      (`MIR_item_t MIR_new_ref_data (MIR_context_t ctx, const char *name, MIR_item_t item, int64_t disp)`
+      * The address of the item after linking plus `disp` is used to initialize the data
+    * **Expression Data**: `MIR_expr_data_item` with optional name
+      (`MIR_item_t MIR_new_expr_data (MIR_context_t ctx, const char *name, MIR_item_func_item)`)
+      * Not all MIR functions can be used for expression data.  The expression function should have
+        only one result, have no arguments, not use any call or any instruction with memory
+      * The expression function is called during linking and its result is used to initialize the data
+    * **Memory segment**: `MIR_bss_item` with optional name (`MIR_item_t MIR_new_bss (MIR_context_t ctx, const char *name, size_t len)`)
+  * Names of MIR functions, imports, and prototypes should be unique in a module
+  * API functions `MIR_output_item (MIR_context_t ctx, FILE *f, MIR_item_t item)`
+    and `MIR_output_module (MIR_context_t ctx, FILE *f, MIR_module_t module)` output item or module
+    textual representation into given file
+  * MIR text module syntax looks the following:
+```
+    <module name>: module
+                   {<module item>}
+                   endmodule
+```
+
+## MIR function
+  * Function is an module item
+  * Function has a **frame**, a stack memory reserved for each function invocation
+  * Function has **local variables** (sometimes called **registers**), a part of which are **arguments**
+    * A variable should have an unique name in the function
+    * A variable is represented by a structure of type `MIR_var_t`
+      * The structure contains variable name and its type
+  * MIR function with its arguments is created through API function `MIR_item_t MIR_new_func (MIR_context_t ctx, const
+    char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)`
+    or function `MIR_item_t MIR_new_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)`
+    * Argument variables can be any type
+      * This type only denotes how the argument value is passed
+      * Any integer type argument variable has actually type `MIR_T_I64`
+  * MIR functions with variable number of arguments are created through API functions
+    `MIR_item_t MIR_new_vararg_func (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)`
+    or function `MIR_item_t MIR_new_vararg_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)`
+    * `nargs` and `arg_vars` define only fixed arguments
+    * MIR functions can have more one result but possible number of results
+      and combination of their types are machine-defined.  For example, for x86-64
+      the function can have upto six results and return two integer
+      values, two float or double values, and two long double values
+      in any combination
+  * MIR function creation is finished by calling API function `MIR_finish_func (MIR_context_t ctx)`
+  * You can create only one MIR function at any given time
+  * MIR text function syntax looks the following (arg-var always has a name besides type):
+```
+    <function name>: func {<result type>, } [ arg-var {, <arg-var> } [, ...]]
+                     {<insn>}
+                     endfun
+```
+  * Non-argument function variables are created through API function
+    `MIR_reg_t MIR_new_func_reg (MIR_context_t ctx, MIR_func_t func, MIR_type_t type, const char *name)`
+    * The only permitted integer type for the variable is `MIR_T_I64` (or MIR_T_U64???)
+    * Names in form `t<number>` can not be used as they are fixed for internal purposes
+    * You can create function variables even after finishing the
+      function creation.  This can be used to modify function insns,
+      e.g. for optimizations
+  * Non-argument variable declaration syntax in MIR textual representation looks the following:
+```
+    local [ <var type>:<var name> {, <var type>:<var name>} ]
+```
+  * In MIR textual representation variable should be defined through `local` before its use
+    
+## MIR insn operands
+  * MIR insns work with operands
+  * There are following operands:
+    * Signed or unsigned **64-bit integer value operands** created through API functions
+      `MIR_op_t MIR_new_int_op (MIR_context_t ctx, int64_t v)` and `MIR_op_t MIR_new_uint_op (MIR_context_t ctx, uint64_t v)`
+      * In MIR text they are represented the same way as C integer numbers (e.g. octal, decimal, hexadecimal ones)
+    * **Float, double or long double value operands** created through API functions `MIR_op_t MIR_new_float_op (MIR_context_t ctx, float v)`,
+      `MIR_op_t MIR_new_double_op (MIR_context_t ctx, double v)`, and `MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double v)`
+      * In MIR text they are represented the same way as C floating point numbers
+    * **String operands** created through API functions `MIR_op_t MIR_new_str_op (MIR_context_t ctx, MIR_str_t str)`
+      * In MIR text they are represented by `typedef struct MIR_str {size_t len; const char *s;} MIR_str_t`
+      * Strings for each operand are put into memory (which can be modified) and the memory address actually presents the string
+    * **Label operand** created through API function `MIR_op_t MIR_new_label_op (MIR_context_t ctx, MIR_label_t label)`
+      * Here `label` is a special insn created by API function `MIR_insn_t MIR_new_label (MIR_context_t ctx)`
+      * In MIR text, they are represented by unique label name
+    * **Reference operands** created through API function `MIR_op_t MIR_new_ref_op (MIR_context_t ctx, MIR_item_t item)`
+      * In MIR text, they are represented by the corresponding item name
+    * **Register (variable) operands** created through API function `MIR_op_t MIR_new_reg_op (MIR_context_t ctx, MIR_reg_t reg)`
+      * In MIR text they are represented by the corresponding variable name
+      * Value of type `MIR_reg_t` is returned by function `MIR_new_func_reg`
+        or can be gotten by function `MIR_reg_t MIR_reg (MIR_context_t ctx, const char *reg_name, MIR_func_t func)`, e.g. for argument-variables
+    * **Memory operands** consists of type, displacement, base
+      register, index register and index scale.  Memory operand is
+      created through API function `MIR_op_t MIR_new_mem_op (MIR_context_t ctx, MIR_type_t type,
+      MIR_disp_t disp, MIR_reg_t base, MIR_reg_t index, MIR_scale_t
+      scale)`
+      * The arguments define address of memory as `disp + base + index * scale`
+      * Integer type input memory is transformed to 64-bit integer value with sign or zero extension
+        depending on signedness of the type
+      * result 64-bit integer value is truncated to integer memory type
+      * Memory operand has the following syntax in MIR text (absent displacement means zero one,
+        absent scale means one, scale should be 1, 2, 4, or 8):
+      
+```
+	  <type>: <disp>
+	  <type>: [<disp>] (<base reg> [, <index reg> [, <scale> ]])
+```
+  * API function `MIR_output_op (MIR_context_t ctx, FILE *f, MIR_op_t op, MIR_func_t func)` outputs the operand
+    textual representation into given file
+        
+
+## MIR insns
+  * All MIR insns (but call or ret one) expects fixed number of operands
+  * Most MIR insns are 3-operand insns: two inputs and one output
+  * In majority cases **the first insn operand** describes where the insn result (if any) will be placed
+  * Only register or memory operand can be insn output (result) operand
+  * MIR insn can be created through API functions `MIR_insn_t MIR_new_insn (MIR_context_t ctx, MIR_insn_code_t code, ...)`
+    and `MIR_insn_t MIR_new_insn_arr (MIR_context_t ctx, MIR_insn_code_t code, size_t nops, MIR_op_t *ops)`
+    * Number of operands and their types should be what is expected by the insn being created
+    * You can not use `MIR_new_insn` for the creation of call and ret insns as these insns have a variable number of operands.
+      To create such insns you should use `MIR_new_insn_arr` or special functions
+      `MIR_insn_t MIR_new_call_insn (MIR_context_t ctx, size_t nops, ...)` and `MIR_insn_t MIR_new_ret_insn (MIR_context_t ctx, size_t nops, ...)`
+  * You can get insn name and number of insn operands through API functions
+    `const char *MIR_insn_name (MIR_context_t ctx, MIR_insn_code_t code)` and `size_t MIR_insn_nops (MIR_context_t ctx, MIR_insn_t insn)`
+  * You can add a created insn at the beginning or end of function insn list through API functions
+    `MIR_prepend_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` and `MIR_append_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)`
+  * You can insert a created insn in the middle of function insn list through API functions
+    `MIR_insert_insn_after (MIR_context_t ctx, MIR_item_t func, MIR_insn_t after, MIR_insn_t insn)` and
+    `MIR_insert_insn_before (MIR_context_t ctx, MIR_item_t func, MIR_insn_t before, MIR_insn_t insn)`
+    * The insn `after` and `before` should be already in the list
+  * You can remove insn from the function list through API function `MIR_remove_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)`
+  * The insn should be not inserted in the list if it is already there
+  * The insn should be not removed form the list if it is not there
+  * API function `MIR_output_insn (MIR_context_t ctx, FILE *f, MIR_insn_t insn, MIR_func_t func, int newline_p)` outputs the insn
+    textual representation into given file with a newline at the end depending on value of `newline_p`
+  * Insn has the following syntax in MIR text:
+```
+	  {<label name>:} [<insn name> <operand> {, <operand>}]
+```
+  * More one insn can be put on the same line by separating the insns by `;`
+
+### MIR move insns
+  * There are following MIR move insns:
+
+    | Insn Code               | Nops |   Description                                          |
+    |-------------------------|-----:|--------------------------------------------------------|
+    | `MIR_MOV`               | 2    | move 64-bit integer values                             |
+    | `MIR_FMOV`              | 2    | move **single precision** floating point values        |
+    | `MIR_DMOV`              | 2    | move **double precision** floating point values        |
+    | `MIR_LDMOV`             | 2    | move **long double** floating point values             |
+
+### MIR integer insns
+  * If insn has suffix `S` in insn name, the insn works with lower 32-bit part of 64-bit integer value
+  * The higher part of 32-bit insn result is undefined
+  * If insn has prefix `U` in insn name, the insn treats integer as unsigned integers
+  * Some insns has no unsigned variant as MIR is oriented to CPUs with two complement integer arithmetic
+    (the huge majority of all CPUs)
+  
+    | Insn Code               | Nops |   Description                                          |
+    |-------------------------|-----:|--------------------------------------------------------|
+    | `MIR_EXT8`              | 2    | **sign** extension of lower **8 bit** input part       |
+    | `MIR_UEXT8`             | 2    | **zero** extension of lower **8 bit** input part       |
+    | `MIR_EXT16`             | 2    | **sign** extension of lower **16 bit** input part      |
+    | `MIR_UEXT16`            | 2    | **zero** extension of lower **16 bit** input part      |
+    | `MIR_EXT32`             | 2    | **sign** extension of lower **32 bit** input part      |
+    | `MIR_UEXT32`            | 2    | **zero** extension of lower **32 bit** input part      |
+    |                         |      |                                                        |
+    | `MIR_NEG`               | 2    | changing sign of **64-bit* integer value               |
+    | `MIR_NEGS`              | 2    | changing sign of **32-bit* integer value               |
+    |                         |      |                                                        |
+    | `MIR_ADD`, `MIR_SUB`    | 3    | **64-bit** integer addition and subtraction            |
+    | `MIR_ADDS`, `MIR_SUBS`  | 3    | **32-bit** integer addition and subtraction            |
+    | `MIR_MUL`, `MIR_DIV`    | 3    | **64-bit signed**  multiplication and divison          |
+    | `MIR_UMUL`, `MIR_UDIV`  | 3    | **64-bit unsigned** integer multiplication and divison |
+    | `MIR_MULS`, `MIR_DIVS`  | 3    | **32-bit signed**  multiplication and divison          |
+    | `MIR_UMULS`, `MIR_UDIVS`| 3    | **32-bit unsigned** integer multiplication and divison |
+    | `MIR_MOD`               | 3    | **64-bit signed**  modulo operation                    |
+    | `MIR_UMOD`              | 3    | **64-bit unsigned** integer modulo operation           |
+    | `MIR_MODS`              | 3    | **32-bit signed**  modulo operation                    |
+    | `MIR_UMODS`             | 3    | **32-bit unsigned** integer modulo operation           |
+    |                         |      |                                                        |
+    | `MIR_AND`, `MIR_OR`     | 3    | **64-bit** integer bitwise AND and OR                  |
+    | `MIR_ANDS`, `MIR_ORS`   | 3    | **32-bit** integer bitwise AND and OR                  |
+    | `MIR_XOR`               | 3    | **64-bit** integer bitwise XOR                         |
+    | `MIR_XORS`              | 3    | **32-bit** integer bitwise XOR                         |
+    |                         |      |                                                        |
+    | `MIR_LSH`               | 3    | **64-bit** integer left shift                          |
+    | `MIR_LSHS`              | 3    | **32-bit** integer left shift                          |
+    | `MIR_RSH`               | 3    | **64-bit** integer right shift with **sign** extension |
+    | `MIR_RSHS`              | 3    | **32-bit** integer right shift with **sign** extension |
+    | `MIR_URSH`              | 3    | **64-bit** integer right shift with **zero** extension |
+    | `MIR_URSHS`             | 3    | **32-bit** integer right shift with **zero** extension |
+    |                         |      |                                                        |
+    | `MIR_EQ`, `MIR_NE`      | 3    | equality/inequality of **64-bit** integers             |
+    | `MIR_EQS`, `MIR_NES`    | 3    | equality/inequality of **32-bit** integers             |
+    | `MIR_LT`, `MIR_LE`      | 3    | **64-bit signed** less than/less than or equal         |
+    | `MIR_ULT`, `MIR_ULE`    | 3    | **64-bit unsigned** less than/less than or equal       |
+    | `MIR_LTS`, `MIR_LES`    | 3    | **32-bit signed** less than/less than or equal         |
+    | `MIR_ULTS`, `MIR_ULES`  | 3    | **32-bit unsigned** less than/less than or equal       |
+    | `MIR_GT`, `MIR_GE`      | 3    | **64-bit signed** greater than/greater than or equal   |
+    | `MIR_UGT`, `MIR_UGE`    | 3    | **64-bit unsigned** greater than/greater than or equal |
+    | `MIR_GTS`, `MIR_GES`    | 3    | **32-bit signed** greater than/greater than or equal   |
+    | `MIR_UGTS`, `MIR_UGES`  | 3    | **32-bit unsigned** greater than/greater than or equal |
+
+### MIR floating point insns
+  * If insn has prefix `F` in insn name, the insn is single precision float point insn.  Its operands should have `MIR_T_F` type
+  * If insn has prefix `D` in insn name, the insn is double precision float point insn.  Its operands should have `MIR_T_D` type
+  * Otherwise, insn has prefix `LD` in insn name and the insn is a long double insn.
+    Its operands should have `MIR_T_LD` type.
+  * The result of comparison insn is a 64-bit integer value, so the result operand should be of integer type
+  
+    | Insn Code                            | Nops |   Description                                                   |
+    |--------------------------------------|-----:|-----------------------------------------------------------------|
+    | `MIR_F2I`, `MIR_D2I`, `MIR_LD2I`     | 2    | transforming floating point value into 64-bit integer           |
+    | `MIR_F2D`                            | 2    | transforming single to double precision FP value                |
+    | `MIR_F2LD`                           | 2    | transforming single precision to long double FP value           |
+    | `MIR_D2F`                            | 2    | transforming double to single precision FP value                |
+    | `MIR_D2LD`                           | 2    | transforming double precision to long double FP value           |
+    | `MIR_LD2F`                           | 2    | transforming long double to single precision FP value           |
+    | `MIR_LD2D`                           | 2    | transforming long double to double precision FP value           |
+    | `MIR_I2F`, `MIR_I2D`, `MIR_I2LD`     | 2    | transforming 64-bit integer into a floating point value         |
+    | `MIR_UI2F`, `MIR_UI2D`, `MIR_UI2LD`  | 2    | transforming unsigned 64-bit integer into a floating point value|
+    | `MIR_FNEG`, `MIR_DNEG`, `MIR_LDNEG`  | 2    | changing sign of floating point value                           |
+    | `MIR_FADD`, `MIR_FSUB`               | 3    | **single** precision addition and subtraction                   |
+    | `MIR_DADD`, `MIR_DSUB`               | 3    | **double** precision addition and subtraction                   |
+    | `MIR_LDADD`, `MIR_LDSUB`             | 3    | **long double** addition and subtraction                        |
+    | `MIR_FMUL`, `MIR_FDIV`               | 3    | **single** precision multiplication and divison                 |
+    | `MIR_DMUL`, `MIR_DDIV`               | 3    | **double** precision multiplication and divison                 |
+    | `MIR_LDMUL`, `MIR_LDDIV`             | 3    | **long double** multiplication and divison                      |
+    | `MIR_FEQ`, `MIR_FNE`                 | 3    | equality/inequality of **single** precision values              |
+    | `MIR_DEQ`, `MIR_DNE`                 | 3    | equality/inequality of **double** precision values              |
+    | `MIR_LDEQ`, `MIR_LDNE`               | 3    | equality/inequality of **long double** values                   |
+    | `MIR_FLT`, `MIR_FLE`                 | 3    | **single** precision less than/less than or equal               |
+    | `MIR_DLT`, `MIR_DLE`                 | 3    | **double** precision less than/less than or equal               |
+    | `MIR_LDLT`, `MIR_LDLE`               | 3    | **long double** less than/less than or equal                    |
+    | `MIR_FGT`, `MIR_FGE`                 | 3    | **single** precision greater than/greater than or equal         |
+    | `MIR_DGT`, `MIR_DGE`                 | 3    | **double** precision greater than/greater than or equal         |
+    | `MIR_LDGT`, `MIR_LDGE`               | 3    | **long double** greater than/greater than or equal              |
+
+### MIR branch insns
+  * The first operand of the insn should be label
+
+    | Insn Code               | Nops |   Description                                                 |
+    |-------------------------|-----:|---------------------------------------------------------------|
+    | `MIR_JMP`               | 1    | unconditional jump to the label                               |
+    | `MIR_BT`                | 2    | jump to the label when 2nd **64-bit** operand is **nonzero**  |
+    | `MIR_BTS`               | 2    | jump to the label when 2nd **32-bit** operand is **nonzero**  |
+    | `MIR_BF`                | 2    | jump to the label when 2nd **64-bit** operand is **zero**     |
+    | `MIR_BFS`               | 2    | jump to the label when 2nd **32-bit** operand is **zero**     |
+
+### MIR switch insn
+  * The first operand of `MIR_SWITCH` insn should have an integer value from 0 to `N - 1` inclusive
+  * The rest operands should be `N` labels, where `N > 0`
+  * Execution of the insn will be an jump on the label corresponding to the first operand value
+  * If the first operand value is out of the range of permitted values, the execution result is undefined
+
+### MIR integer comparison and branch insn
+  * The first operand of the insn should be label.  Label will be the next executed insn if the result of comparison is non-zero
+
+    | Insn Code               | Nops |   Description                                                 |
+    |-------------------------|-----:|---------------------------------------------------------------|
+    | `MIR_BEQ`, `MIR_BNE`    | 3    | jump on **64-bit** equality/inequality                        |
+    | `MIR_BEQS`, `MIR_BNES`  | 3    | jump on **32-bit** equality/inequality                        |
+    | `MIR_BLT`, `MIR_BLE`    | 3    | jump on **signed 64-bit** less than/less than or equal        |
+    | `MIR_UBLT`, `MIR_UBLE`  | 3    | jump on **unsigned 64-bit** less than/less than or equal      |
+    | `MIR_BLTS`, `MIR_BLES`  | 3    | jump on **signed 32-bit** less than/less than or equal        |
+    | `MIR_UBLTS`, `MIR_UBLES`| 3    | jump on **unsigned 32-bit** less than/less than or equal      |
+    | `MIR_BGT`, `MIR_BGE`    | 3    | jump on **signed 64-bit** greater than/greater than or equal  |
+    | `MIR_UBGT`, `MIR_UBGE`  | 3    | jump on **unsigned 64-bit** greater than/greater than or equal|
+    | `MIR_BGTS`, `MIR_BGES`  | 3    | jump on **signed 32-bit** greater than/greater than or equal  |
+    | `MIR_UBGTS`, `MIR_UBLES`| 3    | jump on **unsigned 32-bit** greater than/greater than or equal|
+
+### MIR floating point comparison and branch insn
+  * The first operand of the insn should be label.  Label will be the next executed insn if the result of comparison is non-zero
+  * See comparison semantics in the corresponding comparison insns
+
+    | Insn Code                 | Nops |   Description                                                  |
+    |---------------------------|-----:|----------------------------------------------------------------|
+    | `MIR_FBEQ`, `MIR_FBNE`    | 3    | jump on **single** precision equality/inequality               |
+    | `MIR_DBEQ`, `MIR_DBNE`    | 3    | jump on **double** precision equality/inequality               |
+    | `MIR_LDBEQ`, `MIR_LDBNE`  | 3    | jump on **long double** equality/inequality                    |
+    | `MIR_FBLT`, `MIR_FBLE`    | 3    | jump on **single** precision less than/less than or equal      |
+    | `MIR_DBLT`, `MIR_DBLE`    | 3    | jump on **double** precision less than/less than or equal      |
+    | `MIR_LDBLT`, `MIR_LDBLE`  | 3    | jump on **long double** less than/less than or equal           |
+    | `MIR_FBGT`, `MIR_FBGE`    | 3    | jump on **single** precision greater than/greater than or equal|
+    | `MIR_DBGT`, `MIR_DBGE`    | 3    | jump on **double** precision greater than/less/ than or equal  |
+    | `MIR_LDBGT`, `MIR_LDBGE`  | 3    | jump on **long double** greater than/less/ than or equal       |
+
+### MIR return insn
+  * Return insn has zero or more operands
+  * Return insn operands should correspond to return types of the function
+  * 64-bit integer value is truncated to the corresponding function return type first
+  * The return values will be the function call values
+
+### MIR_CALL insn
+  * The insn has variable number of operands
+  * The first operand is a prototype reference operand
+  * The second operand is a called function address
+    * The prototype should correspond MIR function definition if function address represents a MIR function
+    * The prototype should correspond C function definition if the address is C function address
+  * If the prototype has *N* return types, the next *N* operands are
+    output operands which will contain the result values of the function
+    call
+  * The subsequent operands are arguments.  Their types and number and should be the same as in the prototype
+    * Integer arguments are truncated according to integer prototype argument type
+  
+### MIR_INLINE insn
+  * This insn is analogous to `MIR_CALL` but after linking this insn
+    will be changed by inlined function body if it is possible
+  * Calls of vararg functions are never inlined
+  
+### MIR_ALLOCA insn
+  * Reserve memory on the stack whose size is given as the 2nd operand and assign the memory address to the 1st operand
+  * The reserved memory will be aligned according target ABI
+
+### MIR_BSTART and MIR_BEND insns
+  * MIR users can use them implement blocks with automatic
+    deallocation of memory allocated by `MIR_ALLOCA` inside the
+    blocks.  But mostly these insns are used to implement call
+    inlining of functions using alloca
+  * The both insns use one operand
+  * The first insn saves the stack pointer in the operand
+  * The second insn restores stack pointer from the operand
+  
+### MIR_VA_START, MIR_VA_ARG, and MIR_VA_END insns
+  * These insns are only for variable number arguments functions
+  * `MIR_VA_START` and `MIR_VA_END` have one input operand, an address
+    of va_list structure (see C stdarg.h for more details).  Unlike C
+    va_start, MIR_VA_START just takes one parameter
+  * `MIR_VA_ARG` takes va_list and any memory operand and returns
+    address of the next argument in the 1st insn operand.  The memory
+    operand type defines the type of the argument
+  * va_list operand can be memory with undefined type.  In this case
+    address of the va_list is not in the memory but is the
+    memory address
+  
+## MIR API example
+  * The following code on C creates MIR analog of C code
+    `int64_t loop (int64_t arg1) {int64_t count = 0; while (count < arg1) count++; return count;}`
+```
+  MIR_module_t m = MIR_new_module (ctx, "m");
+  MIR_item_t func = MIR_new_func (ctx, "loop", MIR_T_I64, 1, MIR_T_I64, "arg1");
+  MIR_reg_t COUNT = MIR_new_func_reg (ctx, func->u.func, MIR_T_I64, "count");
+  MIR_reg_t ARG1 = MIR_reg (ctx, "arg1", func->u.func);
+  MIR_label_t fin = MIR_new_label (ctx), cont = MIR_new_label (ctx);
+
+  MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_MOV, MIR_new_reg_op (ctx, COUNT),
+                                            MIR_new_int_op (ctx, 0)));
+  MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_BGE, MIR_new_label_op (ctx, fin),
+                                            MIR_new_reg_op (ctx, COUNT), MIR_new_reg_op (ctx, ARG1)));
+  MIR_append_insn (ctx, func, cont);
+  MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_ADD, MIR_new_reg_op (ctx, COUNT),
+                                            MIR_new_reg_op (ctx, COUNT), MIR_new_int_op (ctx, 1)));
+  MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_BLT, MIR_new_label_op (ctx, cont),
+                                            MIR_new_reg_op (ctx, COUNT), MIR_new_reg_op (ctx, ARG1)));
+  MIR_append_insn (ctx, func, fin);
+  MIR_append_insn (ctx, func, MIR_new_ret_insn (ctx, 1, MIR_new_reg_op (ctx, COUNT)));
+  MIR_finish_func (ctx);
+  MIR_finish_module (ctx);
+```
+
+## MIR text example
+
+```
+m_sieve:  module
+          export sieve
+sieve:    func i32, i32:N
+          local i64:iter, i64:count, i64:i, i64:k, i64:prime, i64:temp, i64:flags
+          alloca flags, 819000
+          mov iter, 0
+loop:     bge fin, iter, N
+          mov count, 0;  mov i, 0
+loop2:    bge fin2, i, 819000
+          mov u8:(flags, i), 1;  add i, i, 1
+          jmp loop2
+fin2:     mov i, 0
+loop3:    bge fin3, i, 819000
+          beq cont3, u8:(flags,i), 0
+          add temp, i, i;  add prime, temp, 3;  add k, i, prime
+loop4:    bge fin4, k, 819000
+          mov u8:(flags, k), 0;  add k, k, prime
+          jmp loop4
+fin4:     add count, count, 1
+cont3:    add i, i, 1
+          jmp loop3
+fin3:     add iter, iter, 1
+          jmp loop
+fin:      rets count
+          endfunc
+          endmodule
+m_ex100:  module
+format:   string "sieve (10) = %d\n"
+p_printf: proto p:fmt, i32:v
+p_seive:  proto i32, i32:iter
+          export ex100
+          import sieve, printf
+ex100:    func v
+          local i64:r
+          call p_sieve, sieve, r, 100
+          call p_printf, printf, format, r
+          endfunc
+          endmodule
+```
+
+## Other MIR API functions
+  * MIR API can find a lot of errors.  They are reported through a
+    error function of type `void (*MIR_error_func_t) (MIR_context ctx, MIR_error_type_t
+    error_type, const char *message)`.  The function is considered to
+    never return.  To see all error types, please look at the
+    definition of error type `MIR_error_type_t` in file mir.h
+  * You can get and set up the current error function through API
+    functions `MIR_error_func_t MIR_get_error_func (MIR_context ctx)` and `MIR_set_error_func
+    (MIR_context ctx, MIR_error_func_t func)`.
+    * The default error function prints the message into stderr and call `exit (1)`
+  * MIR is pretty flexible and can describe complex insns, e.g. insns
+    whose all operands are memory.  Sometimes you need a very simple
+    form of MIR representation.  During load of module all its functions are simplified as much
+    as possible by adding new insns and registers resulting in a form in which:
+    * immediate, memory, reference operands can be used only in move insns
+    * memory have only base register (no displacement and index register)
+    * string and float immediate operands (if `mem_float_p`) are changed onto
+      references for new string and data items
+  * Before execution of MIR code (through interpreter or machine code generated by JIT),
+    you need to load and link it
+    * You can load MIR module through API function `MIR_load_module
+      (MIR_context ctx, MIR_module_t m)`.  The function simplifies module code.
+      It also allocates the module data/bss
+      and makes visible the exported module items to other module
+      during subsequent linking.  There is a guarantee that the
+      different data/bss items will be in adjacent memory if the
+      data/bss items go one after another and all the data/bss items
+      except the first one are anonymous (it means they have no name).
+      Such adjacent data/bss items are called a **section**.
+      Alignment of the section is malloc alignment.  There are no any
+      memory space between data/bss in the section.  If you need to
+      provide necessary alignment of a data/bss in the section you
+      should do it yourself by putting additional anonymous data/bss
+      before given data/bss if it is necessary.  BSS memory is
+      initialized by zero and data memory is initialized by the
+      corresponding data.  If there is already an exported item with
+      the same name, it will be not visible for linking anymore.  Such
+      visibility mechanism permits usage of different versions of the
+      same function
+    * Reference data are initialized not during loading but during linking after
+      the referenced item address is known.  The address is used for the data
+      initialization
+    * Expression data are also initialized not during loading but during linking after
+      all addresses are known.  The expression function is evaluated by the interpreter
+      and its evaluation result is used for the data initialization.  For example, if
+      you need to initialize data by item address plus offset you should use
+      an expression data
+    * MIR permits to use imported items not implemented in MIR, for
+      example to use C standard function `strcmp`.  You need to inform
+      MIR about it.  API function `MIR_load_external (MIR_context ctx, const char
+      *name, void *addr)` informs that imported items with given name
+      have given address (e.g. C function address or data)
+    * Imports/exports of modules loaded since the last link can be
+      linked through API function `MIR_link (MIR_context ctx, void (*set_interface) (MIR_item_t item),
+      void * (*import_resolver) (const char *))`
+    * `MIR_link` function inlines most `MIR_INLINE` calls
+    * `MIR_link` function also sets up call interface
+      * If you pass `MIR_set_interp_interface` to `MIR_link`, then
+        called functions from MIR code will be interpreted
+      * If you pass `MIR_set_gen_interface` to `MIR_link`, then
+        MIR-generator will generate machine code for all loaded MIR
+        functions and called functions from MIR code will execute the
+        machine code
+      * If you pass `MIR_set_lazy_gen_interface` to `MIR_link`, then
+        MIR-generator will generate machine code only on the first
+        function call and called functions from MIR code will execute
+        the machine code
+      * If you pass non-null `import_resolver` function, it will be
+        called for defining address for import without definition.
+        The function get the import name and return the address which
+        will be used for the import item.  This function can be useful
+        for searching `dlopen` library symbols when use of
+        MIR_load_external is not convenient
+
+# MIR code execution
+  * Linked MIR code can be executed by an **interpreter** or machine code generated by **MIR generator**
+
+# MIR code interpretation
+  * The interpreter is an obligatory part of MIR API because it can be used during linking
+  * The interpreter is automatically initialized and finished with MIR API initialization and finishing
+  * The interpreter works with values represented by type `MIR_val_t` which is union
+    `union {..., int64_t i; uint64_t u; float f; double d; long double d;}`
+  * You can execute a MIR function code by API functions `void
+    MIR_interp (MIR_context ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs, ...)` and
+    `void MIR_interp_arr (MIR_context ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs,
+    MIR_val_t *vals)`
+    * The function results are returned through parameter `results`.  You should pass
+      a container of enough size to return all function results.
+  * You can execute a MIR function code also through C function call
+    mechanism.  First you need to setup the C function interface
+    through API function `MIR_set_interp_interface (MIR_context ctx, MIR_item_t
+    func_item)`.  After that you can `func_item->addr` to call the
+    MIR function as usual C function
+    * C function interface is implemented by generation of machine
+      code specialized for MIR function.  Therefore the interface
+      works only on the same targets as MIR generator
+
+# MIR generator (file mir-gen.h)
+  * Before use of MIR generator you should initialize it by API function `MIR_gen_init (MIR_context ctx)`
+  * API function `MIR_gen_finish (MIR_context ctx)` should be called last after any generator usage.
+    It frees all internal generator data
+  * API function `void *MIR_gen (MIR_context ctx, MIR_item_t func_item)` generates machine code of given MIR function
+    and returns an address to call it.  You can call the code as usual C function by using this address
+    as the called function address
+  * API function `void MIR_gen_set_debug_file (MIR_context_t ctx, FILE *f)` sets up MIR generator debug file to `f`.
+    If it is not NULL a lot of debugging and optimization information will be output to the file.  It is useful mostly
+    for MIR developers
+  * API function `void MIR_gen_set_optimize_level (MIR_context_t ctx, unsigned int level)` sets up optimization
+    level for MIR generator:
+    * `0` means only register allocator and machine code generator work
+    * `1` means additional code selection task.  On this level MIR generator creates more compact and faster
+      code than on zero level with practically on the same speed
+    * `2` means additionally common sub-expression elimination and sparse conditional constant propagation.
+       This is a default level.  This level is valuable if you generate bad input MIR code with a lot redundancy
+       and constants.  The generation speed on level `1` is about 50% faster than on level `2`
+    * `3` means additionally register renaming and loop invariant code motion.  The generation speed
+      on level `2` is about 50% faster than on level `3`
--- a/mir/c2mir/aarch64/caarch64-code.c
+++ b/mir/c2mir/aarch64/caarch64-code.c
@ -0,0 +1,23 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+#include "../mirc.h"
+#include "mirc-aarch64-linux.h"
+
+static const char *standard_includes[] = {mirc, aarch64_mirc};
+
+static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/aarch64/"};
+
+#define MAX_ALIGNMENT 16
+
+#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) \
+  aarch64_adjust_var_alignment (c2m_ctx, align, type)
+
+static int aarch64_adjust_var_alignment (c2m_ctx_t c2m_ctx, int align, struct type *type) {
+  return align;
+}
+
+static int invalid_alignment (mir_llong align) {
+  return align != 0 && align != 1 && align != 2 && align != 4 && align != 8 && align != 16;
+}
--- a/mir/c2mir/aarch64/caarch64.h
+++ b/mir/c2mir/aarch64/caarch64.h
@ -0,0 +1,50 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+#include <stdint.h>
+
+#define MIR_CHAR_BIT 8
+
+typedef int8_t mir_schar;
+typedef int16_t mir_short;
+typedef int32_t mir_int;
+typedef int64_t mir_long;
+typedef int64_t mir_llong;
+
+#define MIR_SCHAR_MIN INT8_MIN
+#define MIR_SCHAR_MAX INT8_MAX
+#define MIR_SHORT_MIN INT16_MIN
+#define MIR_SHORT_MAX INT16_MAX
+#define MIR_INT_MIN INT32_MIN
+#define MIR_INT_MAX INT32_MAX
+#define MIR_LONG_MIN INT64_MIN
+#define MIR_LONG_MAX INT64_MAX
+#define MIR_LLONG_MIN INT64_MIN
+#define MIR_LLONG_MAX INT64_MAX
+
+typedef uint8_t mir_uchar;
+typedef uint16_t mir_ushort;
+typedef uint32_t mir_uint;
+typedef uint64_t mir_ulong;
+typedef uint64_t mir_ullong;
+
+#define MIR_UCHAR_MAX UINT8_MAX
+#define MIR_USHORT_MAX UINT16_MAX
+#define MIR_UINT_MAX UINT32_MAX
+#define MIR_ULONG_MAX UINT64_MAX
+#define MIR_ULLONG_MAX UINT64_MAX
+
+typedef mir_schar mir_char;
+#define MIR_CHAR_MIN MIR_SCHAR_MIN
+#define MIR_CHAR_MAX MIR_SCHAR_MAX
+
+typedef float mir_float;
+typedef double mir_double;
+typedef long double mir_ldouble;
+
+typedef uint8_t mir_bool;
+typedef int64_t mir_ptrdiff_t;
+typedef uint64_t mir_size_t;
+
+#define MIR_SIZE_MAX UINT64_MAX
--- a/mir/c2mir/aarch64/mirc-aarch64-linux.h
+++ b/mir/c2mir/aarch64/mirc-aarch64-linux.h
@ -0,0 +1,93 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+static char aarch64_mirc[]
+  = "#define __aarch64__ 1\n"
+    "#define _LP64 1\n"
+    "#define __LP64__ 1\n"
+    "#define __ARM_ARCH 8\n"
+    "\n"
+    "#define __SIZEOF_DOUBLE__ 8\n"
+    "#define __SIZEOF_FLOAT__ 4\n"
+    "#define __SIZEOF_INT__ 4\n"
+    "#define __SIZEOF_LONG_DOUBLE__ 16\n"
+    "#define __SIZEOF_LONG_LONG__ 8\n"
+    "#define __SIZEOF_LONG__ 8\n"
+    "#define __SIZEOF_POINTER__ 8\n"
+    "#define __SIZEOF_PTRDIFF_T__ 8\n"
+    "#define __SIZEOF_SHORT__ 2\n"
+    "#define __SIZEOF_SIZE_T__ 8\n"
+    "\n"
+    "#define __BYTE_ORDER__ 1234\n"
+    "#define __ORDER_LITTLE_ENDIAN__ 1234\n"
+    "#define __ORDER_BIG_ENDIAN__ 4321\n"
+    "\n"
+    "/* Some GCC predefined macros: */\n"
+    "#define __SIZE_TYPE__ unsigned long\n"
+    "#define __PTRDIFF_TYPE__ long\n"
+    "#define __INTMAX_TYPE__ long\n"
+    "#define __UINTMAX_TYPE__ unsigned long\n"
+    "#define __INT8_TYPE__ signed char\n"
+    "#define __INT16_TYPE__ short\n"
+    "#define __INT32_TYPE__ int\n"
+    "#define __INT64_TYPE__ long\n"
+    "#define __UINT8_TYPE__ unsigned char\n"
+    "#define __UINT16_TYPE__ unsigned short\n"
+    "#define __UINT32_TYPE__ unsigned int\n"
+    "#define __UINT64_TYPE__ unsigned long\n"
+    "#define __INTPTR_TYPE__ long\n"
+    "#define __UINTPTR_TYPE__ unsigned long\n"
+    "\n"
+    "#define __CHAR_BIT__ 8\n"
+    "#define __INT8_MAX__ 127\n"
+    "#define __INT16_MAX__ 32767\n"
+    "#define __INT32_MAX__ 2147483647\n"
+    "#define __INT64_MAX__ 9223372036854775807l\n"
+    "#define __UINT8_MAX__ (__INT8_MAX__ * 2u + 1u)\n"
+    "#define __UINT16_MAX__ (__INT16_MAX__ * 2u + 1u)\n"
+    "#define __UINT32_MAX__ (__INT32_MAX__ * 2u + 1u)\n"
+    "#define __UINT64_MAX__ (__INT64_MAX__ * 2u + 1u)\n"
+    "#define __SCHAR_MAX__ __INT8_MAX__\n"
+    "#define __SHRT_MAX__ __INT16_MAX__\n"
+    "#define __INT_MAX__ __INT32_MAX__\n"
+    "#define __LONG_MAX__ __INT64_MAX__\n"
+    "#define __LONG_LONG_MAX__ __INT64_MAX__\n"
+    "#define __SIZE_MAX__ __UINT64_MAX__\n"
+    "#define __PTRDIFF_MAX__ __INT64_MAX__\n"
+    "#define __INTMAX_MAX__ __INT64_MAX__\n"
+    "#define __UINTMAX_MAX__ __UINT64_MAX__\n"
+    "#define __INTPTR_MAX__ __INT64_MAX__\n"
+    "#define __UINTPTR_MAX__ __UINT64_MAX__\n"
+    "\n"
+    "#define __FLT_MIN_EXP__ (-125)\n"
+    "#define __FLT_MAX_EXP__ 128\n"
+    "#define __FLT_DIG__ 6\n"
+    "#define __FLT_DECIMAL_DIG__ 9\n"
+    "#define __FLT_MANT_DIG__ 24\n"
+    "#define __FLT_MIN__ 1.17549435082228750796873653722224568e-38F\n"
+    "#define __FLT_MAX__ 3.40282346638528859811704183484516925e+38F\n"
+    "#define __FLT_EPSILON__ 1.19209289550781250000000000000000000e-7F\n"
+    "\n"
+    "#define __DBL_MIN_EXP__ (-1021)\n"
+    "#define __DBL_MAX_EXP__ 1024\n"
+    "#define __DBL_DIG__ 15\n"
+    "#define __DBL_DECIMAL_DIG__ 17\n"
+    "#define __DBL_MANT_DIG__ 53\n"
+    "#define __DBL_MAX__ ((double) 1.79769313486231570814527423731704357e+308L)\n"
+    "#define __DBL_MIN__ ((double) 2.22507385850720138309023271733240406e-308L)\n"
+    "#define __DBL_EPSILON__ ((double) 2.22044604925031308084726333618164062e-16L)\n"
+    "\n"
+    "typedef unsigned short char16_t;\n"
+    "typedef unsigned int char32_t;\n"
+    "\n"
+#if defined(__linux__)
+    "#define __gnu_linux__ 1\n"
+    "#define __linux 1\n"
+    "#define __linux__ 1\n"
+    "#define linux 1\n"
+    "#define __unix 1\n"
+    "#define __unix__ 1\n"
+#endif
+    "\n"
+    "void *alloca (unsigned long);\n";
--- a/mir/c2mir/c2mir.c
+++ b/mir/c2mir/c2mir.c
@ -25,12 +25,23 @@

 #include "c2mir.h"

-#ifdef __x86_64__
+#if defined(__x86_64__)
 #include "x86_64/cx86_64.h"
+#elif defined(__aarch64__)
+#include "aarch64/caarch64.h"
+#elif defined(__PPC64__)
+#include "ppc64/cppc64.h"
 #else
 #error "undefined or unsupported generation target for C"
 #endif

+#define SWAP(a1, a2, t) \
+  do {                  \
+    t = a1;             \
+    a1 = a2;            \
+    a2 = t;             \
+  } while (0)
+
 typedef enum {
  C_alloc_error,
  C_unfinished_comment,
@ -300,8 +311,12 @@ static mir_size_t raw_type_size (c2m_ctx_t c2m_ctx, struct type *type) {
  return type->raw_size;
 }

-#ifdef __x86_64__
+#if defined(__x86_64__)
 #include "x86_64/cx86_64-code.c"
+#elif defined(__aarch64__)
+#include "aarch64/caarch64-code.c"
+#elif defined(__PPC64__)
+#include "ppc64/cppc64-code.c"
 #else
 #error "undefined or unsupported generation target for C"
 #endif
@ -332,18 +347,18 @@ static int char_is_signed_p (void) { return MIR_CHAR_MAX == MIR_SCHAR_MAX; }

 enum str_flag { FLAG_EXT = 1, FLAG_C89, FLAG_EXT89 };

-static int str_eq (str_t str1, str_t str2) {
+static int str_eq (str_t str1, str_t str2, void *arg) {
  return str1.len == str2.len && memcmp (str1.s, str2.s, str1.len) == 0;
 }
-static htab_hash_t str_hash (str_t str) { return mir_hash (str.s, str.len, 0x42); }
-static int str_key_eq (str_t str1, str_t str2) { return str1.key == str2.key; }
-static htab_hash_t str_key_hash (str_t str) { return mir_hash64 (str.key, 0x24); }
+static htab_hash_t str_hash (str_t str, void *arg) { return mir_hash (str.s, str.len, 0x42); }
+static int str_key_eq (str_t str1, str_t str2, void *arg) { return str1.key == str2.key; }
+static htab_hash_t str_key_hash (str_t str, void *arg) { return mir_hash64 (str.key, 0x24); }

 static str_t uniq_cstr (c2m_ctx_t c2m_ctx, const char *str);

 static void str_init (c2m_ctx_t c2m_ctx) {
-  HTAB_CREATE (str_t, str_tab, 1000, str_hash, str_eq);
-  HTAB_CREATE (str_t, str_key_tab, 200, str_key_hash, str_key_eq);
+  HTAB_CREATE (str_t, str_tab, 1000, str_hash, str_eq, NULL);
+  HTAB_CREATE (str_t, str_key_tab, 200, str_key_hash, str_key_eq, NULL);
  empty_str = uniq_cstr (c2m_ctx, "");
 }

@ -927,11 +942,7 @@ static char *reverse (VARR (char) * v) {
  int i, j, temp, last = (int) VARR_LENGTH (char, v) - 1;

  if (last >= 0 && addr[last] == '\0') last--;
-  for (i = last, j = 0; i > j; i--, j++) {
-    temp = addr[i];
-    addr[i] = addr[j];
-    addr[j] = temp;
-  }
+  for (i = last, j = 0; i > j; i--, j++) SWAP (addr[i], addr[j], temp);
  return addr;
 }

@ -1804,11 +1815,11 @@ static void add_to_temp_string (c2m_ctx_t c2m_ctx, const char *str) {
  VARR_PUSH (char, temp_string, '\0');
 }

-static int macro_eq (macro_t macro1, macro_t macro2) {
+static int macro_eq (macro_t macro1, macro_t macro2, void *arg) {
  return macro1->id->repr == macro2->id->repr;
 }

-static htab_hash_t macro_hash (macro_t macro) {
+static htab_hash_t macro_hash (macro_t macro, void *arg) {
  return mir_hash (macro->id->repr, strlen (macro->id->repr), 0x42);
 }

@ -1823,7 +1834,7 @@ static void init_macros (c2m_ctx_t c2m_ctx) {
  VARR (token_t) * params;

  VARR_CREATE (macro_t, macros, 2048);
-  HTAB_CREATE (macro_t, macro_tab, 2048, macro_hash, macro_eq);
+  HTAB_CREATE (macro_t, macro_tab, 2048, macro_hash, macro_eq, NULL);
  /* Standard macros : */
  new_std_macro (c2m_ctx, "__DATE__");
  new_std_macro (c2m_ctx, "__TIME__");
@ -3529,17 +3540,19 @@ typedef struct {
 DEF_HTAB (tpname_t);
 static HTAB (tpname_t) * tpname_tab;

-static int tpname_eq (tpname_t tpname1, tpname_t tpname2) {
+static int tpname_eq (tpname_t tpname1, tpname_t tpname2, void *arg) {
  return tpname1.id->u.s.s == tpname2.id->u.s.s && tpname1.scope == tpname2.scope;
 }

-static htab_hash_t tpname_hash (tpname_t tpname) {
+static htab_hash_t tpname_hash (tpname_t tpname, void *arg) {
  return (mir_hash_finish (
    mir_hash_step (mir_hash_step (mir_hash_init (0x42), (uint64_t) tpname.id->u.s.s),
                   (uint64_t) tpname.scope)));
 }

-static void tpname_init (void) { HTAB_CREATE (tpname_t, tpname_tab, 1000, tpname_hash, tpname_eq); }
+static void tpname_init (void) {
+  HTAB_CREATE (tpname_t, tpname_tab, 1000, tpname_hash, tpname_eq, NULL);
+}

 static int tpname_find (node_t id, node_t scope, tpname_t *res) {
  int found_p;
@ -5013,21 +5026,22 @@ struct check_ctx {

 static int supported_alignment_p (mir_llong align) { return TRUE; }  // ???

-static int symbol_eq (symbol_t s1, symbol_t s2) {
+static int symbol_eq (symbol_t s1, symbol_t s2, void *arg) {
  return s1.mode == s2.mode && s1.id->u.s.s == s2.id->u.s.s && s1.scope == s2.scope;
 }

-static htab_hash_t symbol_hash (symbol_t s) {
+static htab_hash_t symbol_hash (symbol_t s, void *arg) {
  return (mir_hash_finish (
    mir_hash_step (mir_hash_step (mir_hash_step (mir_hash_init (0x42), (uint64_t) s.mode),
                                  (uint64_t) s.id->u.s.s),
                   (uint64_t) s.scope)));
 }

-static void symbol_clear (symbol_t sym) { VARR_DESTROY (node_t, sym.defs); }
+static void symbol_clear (symbol_t sym, void *arg) { VARR_DESTROY (node_t, sym.defs); }

 static void symbol_init (c2m_ctx_t c2m_ctx) {
-  HTAB_CREATE_WITH_FREE_FUNC (symbol_t, symbol_tab, 5000, symbol_hash, symbol_eq, symbol_clear);
+  HTAB_CREATE_WITH_FREE_FUNC (symbol_t, symbol_tab, 5000, symbol_hash, symbol_eq, symbol_clear,
+                              NULL);
 }

 static int symbol_find (c2m_ctx_t c2m_ctx, enum symbol_mode mode, node_t id, node_t scope,
@ -5183,13 +5197,6 @@ static struct type integer_promotion (const struct type *type) {
  return res;
 }

-#define SWAP(a1, a2, t) \
-  do {                  \
-    t = a1;             \
-    a1 = a2;            \
-    a2 = t;             \
-  } while (0)
-
 static struct type arithmetic_conversion (const struct type *type1, const struct type *type2) {
  struct type res, t1, t2;

@ -5884,10 +5891,8 @@ static node_t process_tag (c2m_ctx_t c2m_ctx, node_t r, node_t id, node_t decl_l
    error (c2m_ctx, id->pos, "tag %s redeclaration", id->u.s.s);
  } else {
    if (decl_list->code != N_IGNORE) { /* swap decl lists */
-      DLIST (node_t) temp = r->ops;
-
-      r->ops = sym.def_node->ops;
-      sym.def_node->ops = temp;
+      DLIST (node_t) temp;
+      SWAP (r->ops, sym.def_node->ops, temp);
    }
    r = sym.def_node;
  }
@ -6302,9 +6307,13 @@ static void adjust_param_type (c2m_ctx_t c2m_ctx, struct type **type_ptr) {

  if (type->mode == TM_ARR) {  // ??? static, old type qual
    arr_type = type->u.arr_type;
-    type->mode = TM_PTR;
-    type->u.ptr_type = arr_type->el_type;
-    type->type_qual = arr_type->ind_type_qual;
+    par_type = create_type (c2m_ctx, NULL);
+    par_type->mode = TM_PTR;
+    par_type->pos_node = type->pos_node;
+    par_type->u.ptr_type = arr_type->el_type;
+    par_type->type_qual = arr_type->ind_type_qual;
+    par_type->arr_type = type;
+    *type_ptr = type = par_type;
    make_type_complete (c2m_ctx, type);
  } else if (type->mode == TM_FUNC) {
    par_type = create_type (c2m_ctx, NULL);
@ -7486,7 +7495,7 @@ static struct expr *check_assign_op (c2m_ctx_t c2m_ctx, node_t r, node_t op1, no
  return e;
 }

-static unsigned case_hash (case_t el) {
+static unsigned case_hash (case_t el, void *arg) {
  node_t case_expr = NL_HEAD (el->case_node->ops);
  struct expr *expr;

@ -7498,7 +7507,7 @@ static unsigned case_hash (case_t el) {
  return mir_hash (&expr->u.u_val, sizeof (expr->u.u_val), 0x42);
 }

-static int case_eq (case_t el1, case_t el2) {
+static int case_eq (case_t el1, case_t el2, void *arg) {
  node_t case_expr1 = NL_HEAD (el1->case_node->ops);
  node_t case_expr2 = NL_HEAD (el2->case_node->ops);
  struct expr *expr1, *expr2;
@ -9050,7 +9059,7 @@ static void context_init (MIR_context_t ctx) {
  symbol_init (c2m_ctx);
  in_params_p = FALSE;
  curr_unnamed_anon_struct_union_member = NULL;
-  HTAB_CREATE (case_t, case_tab, 100, case_hash, case_eq);
+  HTAB_CREATE (case_t, case_tab, 100, case_hash, case_eq, NULL);
  VARR_CREATE (decl_t, func_decls_for_allocation, 1024);
 }

@ -9162,14 +9171,18 @@ static op_t new_op (decl_t decl, MIR_op_t mir_op) {
  return res;
 }

-static htab_hash_t reg_var_hash (reg_var_t r) { return mir_hash (r.name, strlen (r.name), 0x42); }
-static int reg_var_eq (reg_var_t r1, reg_var_t r2) { return strcmp (r1.name, r2.name) == 0; }
+static htab_hash_t reg_var_hash (reg_var_t r, void *arg) {
+  return mir_hash (r.name, strlen (r.name), 0x42);
+}
+static int reg_var_eq (reg_var_t r1, reg_var_t r2, void *arg) {
+  return strcmp (r1.name, r2.name) == 0;
+}

 static void init_reg_vars (MIR_context_t ctx) {
  c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);

  reg_free_mark = 0;
-  HTAB_CREATE (reg_var_t, reg_var_tab, 128, reg_var_hash, reg_var_eq);
+  HTAB_CREATE (reg_var_t, reg_var_tab, 128, reg_var_hash, reg_var_eq, NULL);
 }

 static void finish_curr_func_reg_vars (MIR_context_t ctx) {
@ -9328,6 +9341,29 @@ static void emit_insn (MIR_context_t ctx, MIR_insn_t insn) {
  MIR_append_insn (ctx, curr_func, insn);
 }

+/* BCOND T, L1; JMP L2; L1: => BNCOND T, L2; L1:
+   JMP L; L: => L: */
+static void emit_label_insn_opt (MIR_context_t ctx, MIR_insn_t insn) {
+  c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
+  MIR_insn_code_t rev_code;
+  MIR_insn_t last, prev;
+
+  assert (insn->code == MIR_LABEL);
+  if ((last = DLIST_TAIL (MIR_insn_t, curr_func->u.func->insns)) != NULL
+      && (prev = DLIST_PREV (MIR_insn_t, last)) != NULL && last->code == MIR_JMP
+      && (rev_code = MIR_reverse_branch_code (prev->code)) != MIR_INSN_BOUND
+      && prev->ops[0].mode == MIR_OP_LABEL && prev->ops[0].u.label == insn) {
+    prev->ops[0] = last->ops[0];
+    prev->code = rev_code;
+    MIR_remove_insn (ctx, curr_func, last);
+  }
+  if ((last = DLIST_TAIL (MIR_insn_t, curr_func->u.func->insns)) != NULL && last->code == MIR_JMP
+      && last->ops[0].mode == MIR_OP_LABEL && last->ops[0].u.label == insn) {
+    MIR_remove_insn (ctx, curr_func, last);
+  }
+  MIR_append_insn (ctx, curr_func, insn);
+}
+
 /* Change t1 = expr; v = t1 to v = expr */
 static void emit_insn_opt (MIR_context_t ctx, MIR_insn_t insn) {
  c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
@ -9589,16 +9625,22 @@ static op_t mem_to_address (MIR_context_t ctx, op_t mem) {
 static op_t force_val (MIR_context_t ctx, op_t op, int arr_p) {
  op_t temp_op;
  int sh;
+  c2m_ctx_t c2m_ctx;

  if (arr_p && op.mir_op.mode == MIR_OP_MEM) {
    /* an array -- use a pointer: */
    return mem_to_address (ctx, op);
  }
  if (op.decl == NULL || op.decl->bit_offset < 0) return op;
+  c2m_ctx = *c2m_ctx_loc (ctx);
  assert (op.mir_op.mode == MIR_OP_MEM);
  temp_op = get_new_temp (ctx, MIR_T_I64);
  emit2 (ctx, MIR_MOV, temp_op.mir_op, op.mir_op);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  sh = 64 - op.decl->bit_offset - op.decl->width;
+#else
+  sh = op.decl->bit_offset + (64 - type_size (c2m_ctx, op.decl->decl_spec.type) * MIR_CHAR_BIT);
+#endif
  if (sh != 0) emit3 (ctx, MIR_LSH, temp_op.mir_op, temp_op.mir_op, MIR_new_int_op (ctx, sh));
  emit3 (ctx,
         signed_integer_type_p (op.decl->decl_spec.type)
@ -9820,7 +9862,7 @@ static void emit_label (MIR_context_t ctx, node_t r) {
  assert (labels->code == N_LIST);
  if (NL_HEAD (labels->ops) == NULL) return;
  if (labels->attr == NULL) labels->attr = MIR_new_label (ctx);
-  emit_insn (ctx, labels->attr);
+  emit_label_insn_opt (ctx, labels->attr);
 }

 static MIR_label_t get_label (MIR_context_t ctx, node_t target) {
@ -9877,7 +9919,7 @@ static void block_move (MIR_context_t ctx, op_t var, op_t val, mir_size_t size)
    emit2 (ctx, MIR_MOV, index.mir_op, MIR_new_int_op (ctx, size));
    val = modify_for_block_move (ctx, val, index);
    var = modify_for_block_move (ctx, var, index);
-    emit_insn (ctx, repeat_label);
+    emit_label_insn_opt (ctx, repeat_label);
    emit3 (ctx, MIR_SUB, index.mir_op, index.mir_op, one_op.mir_op);
    assert (var.mir_op.mode == MIR_OP_MEM && val.mir_op.mode == MIR_OP_MEM);
    val.mir_op.u.mem.type = var.mir_op.u.mem.type = MIR_T_I8;
@ -10234,10 +10276,16 @@ static void emit_scalar_assign (MIR_context_t ctx, op_t var, op_t *val, MIR_type
    int width = var.decl->width;
    uint64_t mask, mask2;
    op_t temp_op1, temp_op2, temp_op3, temp_op4;
+    c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
+    size_t size = type_size (c2m_ctx, var.decl->decl_spec.type) * MIR_CHAR_BIT;

    assert (var.mir_op.mode == MIR_OP_MEM);
    mask = 0xffffffffffffffff >> (64 - width);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    mask2 = ~(mask << var.decl->bit_offset);
+#else
+    mask2 = ~(mask << (size - var.decl->bit_offset - width));
+#endif
    temp_op1 = get_new_temp (ctx, MIR_T_I64);
    temp_op2 = get_new_temp (ctx, MIR_T_I64);
    temp_op3 = get_new_temp (ctx, MIR_T_I64);
@ -10255,12 +10303,21 @@ static void emit_scalar_assign (MIR_context_t ctx, op_t var, op_t *val, MIR_type
    }
    emit3 (ctx, MIR_AND, temp_op3.mir_op, temp_op1.mir_op, MIR_new_uint_op (ctx, mask));
    temp_op4 = get_new_temp (ctx, MIR_T_I64);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    if (var.decl->bit_offset == 0) {
      temp_op4 = temp_op3;
    } else {
      emit3 (ctx, MIR_LSH, temp_op4.mir_op, temp_op3.mir_op,
             MIR_new_int_op (ctx, var.decl->bit_offset));
    }
+#else
+    if (size - var.decl->bit_offset - width == 0) {
+      temp_op4 = temp_op3;
+    } else {
+      emit3 (ctx, MIR_LSH, temp_op4.mir_op, temp_op3.mir_op,
+             MIR_new_int_op (ctx, size - var.decl->bit_offset - width));
+    }
+#endif
    if (!ignore_others_p) {
      emit3 (ctx, MIR_OR, temp_op4.mir_op, temp_op4.mir_op, temp_op2.mir_op);
    }
@ -10268,19 +10325,29 @@ static void emit_scalar_assign (MIR_context_t ctx, op_t var, op_t *val, MIR_type
  }
 }

-static void add_bit_field (uint64_t *u, uint64_t v, decl_t member_decl) {
+static void add_bit_field (MIR_context_t ctx, uint64_t *u, uint64_t v, decl_t member_decl) {
  uint64_t mask, mask2;
  int bit_offset = member_decl->bit_offset, width = member_decl->width;
+  c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
+  size_t size = type_size (c2m_ctx, member_decl->decl_spec.type) * MIR_CHAR_BIT;

  mask = 0xffffffffffffffff >> (64 - width);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  mask2 = ~(mask << bit_offset);
+#else
+  mask2 = ~(mask << (size - bit_offset - width));
+#endif
  *u &= mask2;
-  v &= mask;
  if (signed_integer_type_p (member_decl->decl_spec.type)) {
    v <<= (64 - width);
    v = (int64_t) v >> (64 - width);
  }
+  v &= mask;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  v <<= bit_offset;
+#else
+  v <<= size - bit_offset - width;
+#endif
  *u |= v;
 }

@ -10400,14 +10467,14 @@ static void gen_initializer (MIR_context_t ctx, size_t init_start, op_t var,
          uint64_t u = 0;

          assert (val.mir_op.mode == MIR_OP_INT || val.mir_op.mode == MIR_OP_UINT);
-          add_bit_field (&u, val.mir_op.u.u, init_el.member_decl);
+          add_bit_field (ctx, &u, val.mir_op.u.u, init_el.member_decl);
          for (; i + 1 < VARR_LENGTH (init_el_t, init_els); i++, init_el = next_init_el) {
            next_init_el = VARR_GET (init_el_t, init_els, i + 1);
            if (next_init_el.offset != init_el.offset) break;
            if (next_init_el.member_decl->bit_offset == init_el.member_decl->bit_offset) continue;
            val = gen (ctx, next_init_el.init, NULL, NULL, TRUE, NULL);
            assert (val.mir_op.mode == MIR_OP_INT || val.mir_op.mode == MIR_OP_UINT);
-            add_bit_field (&u, val.mir_op.u.u, next_init_el.member_decl);
+            add_bit_field (ctx, &u, val.mir_op.u.u, next_init_el.member_decl);
          }
          val.mir_op.u.u = u;
        }
@ -10589,19 +10656,19 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
      assert (t_label != NULL && f_label != NULL);
      gen (ctx, NL_HEAD (r->ops), r->code == N_ANDAND ? temp_label : t_label,
           r->code == N_ANDAND ? f_label : temp_label, FALSE, NULL);
-      emit_insn (ctx, temp_label);
+      emit_label_insn_opt (ctx, temp_label);
      gen (ctx, NL_EL (r->ops, 1), t_label, f_label, FALSE, NULL);
      if (make_val_p) {
        MIR_label_t end_label = MIR_new_label (ctx);

        type = ((struct expr *) r->attr)->type;
        res = get_new_temp (ctx, get_mir_type (ctx, type));
-        emit_insn (ctx, t_label);
+        emit_label_insn_opt (ctx, t_label);
        emit2 (ctx, MIR_MOV, res.mir_op, one_op.mir_op);
        emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
-        emit_insn (ctx, f_label);
+        emit_label_insn_opt (ctx, f_label);
        emit2 (ctx, MIR_MOV, res.mir_op, zero_op.mir_op);
-        emit_insn (ctx, end_label);
+        emit_label_insn_opt (ctx, end_label);
      }
      true_label = false_label = NULL;
    } else if (true_label != NULL) {
@ -10633,12 +10700,12 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_

      res = get_new_temp (ctx, MIR_T_I64);
      gen (ctx, NL_HEAD (r->ops), t_label, f_label, FALSE, NULL);
-      emit_insn (ctx, t_label);
+      emit_label_insn_opt (ctx, t_label);
      emit2 (ctx, MIR_MOV, res.mir_op, zero_op.mir_op);
      emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
-      emit_insn (ctx, f_label);
+      emit_label_insn_opt (ctx, f_label);
      emit2 (ctx, MIR_MOV, res.mir_op, one_op.mir_op);
-      emit_insn (ctx, end_label);
+      emit_label_insn_opt (ctx, end_label);
    }
    break;
  case N_ADD:
@ -10705,8 +10772,10 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
    t = get_mir_type (ctx, type);
    var = gen (ctx, NL_HEAD (r->ops), NULL, NULL, FALSE, NULL);
    op1 = force_val (ctx, var, FALSE);
-    res = get_new_temp (ctx, t);
-    emit2 (ctx, tp_mov (t), res.mir_op, op1.mir_op);
+    if (val_p || true_label != NULL) {
+      res = get_new_temp (ctx, t);
+      emit2 (ctx, tp_mov (t), res.mir_op, op1.mir_op);
+    }
    val = promote (ctx, op1, t, TRUE);
    op2 = promote (ctx,
                   type->mode != TM_PTR
@ -10756,7 +10825,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
    t = get_op_type (ctx, var);
    op2
      = gen (ctx, NL_EL (r->ops, 1), NULL, NULL, t != MIR_T_UNDEF, t != MIR_T_UNDEF ? NULL : &var);
-    if (t == MIR_T_UNDEF) {
+    if ((!val_p && true_label == NULL) || t == MIR_T_UNDEF) {
      res = var;
      val = op2;
    } else {
@ -10769,7 +10838,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
      assert (t != MIR_T_UNDEF);
      val = cast (ctx, val, get_mir_type (ctx, ((struct expr *) r->attr)->type), FALSE);
      emit_scalar_assign (ctx, var, &val, t, FALSE);
-      if (r->code != N_POST_INC && r->code != N_POST_DEC)
+      if ((val_p || true_label != NULL) && r->code != N_POST_INC && r->code != N_POST_DEC)
        emit2_noopt (ctx, tp_mov (t), res.mir_op, val.mir_op);
    } else { /* block move */
      mir_size_t size = type_size (c2m_ctx, ((struct expr *) r->attr)->type);
@ -10929,7 +10998,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_

    if (!void_p) t = get_mir_type (ctx, type);
    gen (ctx, cond, true_label, false_label, FALSE, NULL);
-    emit_insn (ctx, true_label);
+    emit_label_insn_opt (ctx, true_label);
    op1 = gen (ctx, true_expr, NULL, NULL, !void_p && t != MIR_T_UNDEF, NULL);
    if (!void_p) {
      if (t != MIR_T_UNDEF) {
@ -10945,7 +11014,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
      }
    }
    emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
-    emit_insn (ctx, false_label);
+    emit_label_insn_opt (ctx, false_label);
    op1 = gen (ctx, false_expr, NULL, NULL, !void_p && t != MIR_T_UNDEF, NULL);
    if (!void_p) {
      if (t != MIR_T_UNDEF) {
@ -10958,7 +11027,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
        block_move (ctx, res, op1, size);
      }
    }
-    emit_insn (ctx, end_label);
+    emit_label_insn_opt (ctx, end_label);
    break;
  }
  case N_ALIGNOF:
@ -11317,12 +11386,12 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
    assert (false_label == NULL && true_label == NULL);
    emit_label (ctx, r);
    top_gen (ctx, expr, if_label, else_label);
-    emit_insn (ctx, if_label);
+    emit_label_insn_opt (ctx, if_label);
    gen (ctx, if_stmt, NULL, NULL, FALSE, NULL);
    emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
-    emit_insn (ctx, else_label);
+    emit_label_insn_opt (ctx, else_label);
    gen (ctx, else_stmt, NULL, NULL, FALSE, NULL);
-    emit_insn (ctx, end_label);
+    emit_label_insn_opt (ctx, end_label);
    break;
  }
  case N_SWITCH: {
@ -11421,14 +11490,14 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
            emit3 (ctx, short_p ? MIR_UBLES : MIR_UBLE, MIR_new_label_op (ctx, label),
                   case_reg_op.mir_op, MIR_new_int_op (ctx, e2->u.i_val));
          }
-          emit_insn (ctx, cont_label);
+          emit_label_insn_opt (ctx, cont_label);
        }
      }
      if (c == NULL) /* no default: */
        emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, break_label));
    }
    top_gen (ctx, stmt, NULL, NULL);
-    emit_insn (ctx, break_label);
+    emit_label_insn_opt (ctx, break_label);
    break_label = saved_break_label;
    break;
  }
@ -11442,11 +11511,11 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
    continue_label = MIR_new_label (ctx);
    break_label = MIR_new_label (ctx);
    emit_label (ctx, r);
-    emit_insn (ctx, start_label);
+    emit_label_insn_opt (ctx, start_label);
    gen (ctx, stmt, NULL, NULL, FALSE, NULL);
-    emit_insn (ctx, continue_label);
+    emit_label_insn_opt (ctx, continue_label);
    top_gen (ctx, expr, start_label, break_label);
-    emit_insn (ctx, break_label);
+    emit_label_insn_opt (ctx, break_label);
    continue_label = saved_continue_label;
    break_label = saved_break_label;
    break;
@ -11461,12 +11530,12 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
    continue_label = MIR_new_label (ctx);
    break_label = MIR_new_label (ctx);
    emit_label (ctx, r);
-    emit_insn (ctx, continue_label);
+    emit_label_insn_opt (ctx, continue_label);
    top_gen (ctx, expr, stmt_label, break_label);
-    emit_insn (ctx, stmt_label);
+    emit_label_insn_opt (ctx, stmt_label);
    gen (ctx, stmt, NULL, NULL, FALSE, NULL);
-    emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, continue_label));
-    emit_insn (ctx, break_label);
+    top_gen (ctx, expr, stmt_label, break_label);
+    emit_label_insn_opt (ctx, break_label);
    continue_label = saved_continue_label;
    break_label = saved_break_label;
    break;
@ -11476,7 +11545,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
    node_t cond = NL_NEXT (init);
    node_t iter = NL_NEXT (cond);
    node_t stmt = NL_NEXT (iter);
-    MIR_label_t start_label = MIR_new_label (ctx), stmt_label = MIR_new_label (ctx);
+    MIR_label_t stmt_label = MIR_new_label (ctx);
    MIR_label_t saved_continue_label = continue_label, saved_break_label = break_label;

    assert (false_label == NULL && true_label == NULL);
@ -11484,15 +11553,18 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
    break_label = MIR_new_label (ctx);
    emit_label (ctx, r);
    top_gen (ctx, init, NULL, NULL);
-    emit_insn (ctx, start_label);
    if (cond->code != N_IGNORE) /* non-empty condition: */
      top_gen (ctx, cond, stmt_label, break_label);
-    emit_insn (ctx, stmt_label);
+    emit_label_insn_opt (ctx, stmt_label);
    gen (ctx, stmt, NULL, NULL, FALSE, NULL);
-    emit_insn (ctx, continue_label);
+    emit_label_insn_opt (ctx, continue_label);
    top_gen (ctx, iter, NULL, NULL);
-    emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, start_label));
-    emit_insn (ctx, break_label);
+    if (cond->code == N_IGNORE) { /* empty condition: */
+      emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, stmt_label));
+    } else {
+      top_gen (ctx, cond, stmt_label, break_label);
+    }
+    emit_label_insn_opt (ctx, break_label);
    continue_label = saved_continue_label;
    break_label = saved_break_label;
    break;
@ -11579,7 +11651,7 @@ finish:
 DEF_HTAB (MIR_item_t);
 static HTAB (MIR_item_t) * proto_tab;

-static htab_hash_t proto_hash (MIR_item_t pi) {
+static htab_hash_t proto_hash (MIR_item_t pi, void *arg) {
  MIR_proto_t p = pi->u.proto;
  MIR_var_t *args = VARR_ADDR (MIR_var_t, p->args);
  uint64_t h = mir_hash_init (42);
@ -11594,7 +11666,7 @@ static htab_hash_t proto_hash (MIR_item_t pi) {
  return mir_hash_finish (h);
 }

-static int proto_eq (MIR_item_t pi1, MIR_item_t pi2) {
+static int proto_eq (MIR_item_t pi1, MIR_item_t pi2, void *arg) {
  MIR_proto_t p1 = pi1->u.proto, p2 = pi2->u.proto;

  if (p1->nres != p2->nres || p1->vararg_p != p2->vararg_p
@ -11639,7 +11711,7 @@ static void gen_mir_protos (MIR_context_t ctx) {
  MIR_type_t ret_type;

  curr_mir_proto_num = 0;
-  HTAB_CREATE (MIR_item_t, proto_tab, 512, proto_hash, proto_eq);
+  HTAB_CREATE (MIR_item_t, proto_tab, 512, proto_hash, proto_eq, NULL);
  for (size_t i = 0; i < VARR_LENGTH (node_t, call_nodes); i++) {
    call = VARR_GET (node_t, call_nodes, i);
    assert (call->code == N_CALL);
@ -12105,6 +12177,8 @@ static void init_include_dirs (MIR_context_t ctx) {
 #endif
 #if defined(__linux__) && defined(__x86_64__)
  VARR_PUSH (char_ptr_t, system_headers, "/usr/include/x86_64-linux-gnu");
+#elif defined(__linux__) && defined(__aarch64__)
+  VARR_PUSH (char_ptr_t, system_headers, "/usr/include/aarch64-linux-gnu");
 #endif
 #if defined(__APPLE__) || defined(__unix__)
  VARR_PUSH (char_ptr_t, system_headers, "/usr/include");
--- a/mir/c2mir/ppc64/cppc64-code.c
+++ b/mir/c2mir/ppc64/cppc64-code.c
@ -0,0 +1,23 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+#include "../mirc.h"
+#include "mirc-ppc64-linux.h"
+
+static const char *standard_includes[] = {mirc, ppc64_mirc};
+
+static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/ppc64/"};
+
+#define MAX_ALIGNMENT 16
+
+#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) \
+  ppc64_adjust_var_alignment (c2m_ctx, align, type)
+
+static int ppc64_adjust_var_alignment (c2m_ctx_t c2m_ctx, int align, struct type *type) {
+  return align;
+}
+
+static int invalid_alignment (mir_llong align) {
+  return align != 0 && align != 1 && align != 2 && align != 4 && align != 8 && align != 16;
+}
--- a/mir/c2mir/ppc64/cppc64.h
+++ b/mir/c2mir/ppc64/cppc64.h
@ -0,0 +1,50 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+#include <stdint.h>
+
+#define MIR_CHAR_BIT 8
+
+typedef int8_t mir_schar;
+typedef int16_t mir_short;
+typedef int32_t mir_int;
+typedef int64_t mir_long;
+typedef int64_t mir_llong;
+
+#define MIR_SCHAR_MIN INT8_MIN
+#define MIR_SCHAR_MAX INT8_MAX
+#define MIR_SHORT_MIN INT16_MIN
+#define MIR_SHORT_MAX INT16_MAX
+#define MIR_INT_MIN INT32_MIN
+#define MIR_INT_MAX INT32_MAX
+#define MIR_LONG_MIN INT64_MIN
+#define MIR_LONG_MAX INT64_MAX
+#define MIR_LLONG_MIN INT64_MIN
+#define MIR_LLONG_MAX INT64_MAX
+
+typedef uint8_t mir_uchar;
+typedef uint16_t mir_ushort;
+typedef uint32_t mir_uint;
+typedef uint64_t mir_ulong;
+typedef uint64_t mir_ullong;
+
+#define MIR_UCHAR_MAX UINT8_MAX
+#define MIR_USHORT_MAX UINT16_MAX
+#define MIR_UINT_MAX UINT32_MAX
+#define MIR_ULONG_MAX UINT64_MAX
+#define MIR_ULLONG_MAX UINT64_MAX
+
+typedef mir_schar mir_char;
+#define MIR_CHAR_MIN MIR_SCHAR_MIN
+#define MIR_CHAR_MAX MIR_SCHAR_MAX
+
+typedef float mir_float;
+typedef double mir_double;
+typedef long double mir_ldouble;
+
+typedef uint8_t mir_bool;
+typedef int64_t mir_ptrdiff_t;
+typedef uint64_t mir_size_t;
+
+#define MIR_SIZE_MAX UINT64_MAX
--- a/mir/c2mir/ppc64/mirc-ppc64-linux.h
+++ b/mir/c2mir/ppc64/mirc-ppc64-linux.h
@ -0,0 +1,95 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+static char ppc64_mirc[]
+  = "#define __PPC64__ 1\n"
+    "#define _ARCH_PPC64 1\n"
+    "#define _LP64 1\n"
+    "#define __LP64__ 1\n"
+    "\n"
+    "#define __LONG_DOUBLE_128__ 1\n" // ???
+    "#define __SIZEOF_DOUBLE__ 8\n"
+    "#define __SIZEOF_FLOAT__ 4\n"
+    "#define __SIZEOF_INT__ 4\n"
+    "#define __SIZEOF_LONG_DOUBLE__ 16\n"
+    "#define __SIZEOF_LONG_LONG__ 8\n"
+    "#define __SIZEOF_LONG__ 8\n"
+    "#define __SIZEOF_POINTER__ 8\n"
+    "#define __SIZEOF_PTRDIFF_T__ 8\n"
+    "#define __SIZEOF_SHORT__ 2\n"
+    "#define __SIZEOF_SIZE_T__ 8\n"
+    "\n"
+    "#define _BIG_ENDIAN 1\n" // ??? Implement LE too
+    "#define __ORDER_LITTLE_ENDIAN__ 1234\n"
+    "#define __ORDER_BIG_ENDIAN__ 4321\n"
+    "#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__\n"
+    "\n"
+    "/* Some GCC predefined macros: */\n"
+    "#define __SIZE_TYPE__ unsigned long\n"
+    "#define __PTRDIFF_TYPE__ long\n"
+    "#define __INTMAX_TYPE__ long\n"
+    "#define __UINTMAX_TYPE__ unsigned long\n"
+    "#define __INT8_TYPE__ signed char\n"
+    "#define __INT16_TYPE__ short\n"
+    "#define __INT32_TYPE__ int\n"
+    "#define __INT64_TYPE__ long\n"
+    "#define __UINT8_TYPE__ unsigned char\n"
+    "#define __UINT16_TYPE__ unsigned short\n"
+    "#define __UINT32_TYPE__ unsigned int\n"
+    "#define __UINT64_TYPE__ unsigned long\n"
+    "#define __INTPTR_TYPE__ long\n"
+    "#define __UINTPTR_TYPE__ unsigned long\n"
+    "\n"
+    "#define __CHAR_BIT__ 8\n"
+    "#define __INT8_MAX__ 127\n"
+    "#define __INT16_MAX__ 32767\n"
+    "#define __INT32_MAX__ 2147483647\n"
+    "#define __INT64_MAX__ 9223372036854775807l\n"
+    "#define __UINT8_MAX__ (__INT8_MAX__ * 2u + 1u)\n"
+    "#define __UINT16_MAX__ (__INT16_MAX__ * 2u + 1u)\n"
+    "#define __UINT32_MAX__ (__INT32_MAX__ * 2u + 1u)\n"
+    "#define __UINT64_MAX__ (__INT64_MAX__ * 2u + 1u)\n"
+    "#define __SCHAR_MAX__ __INT8_MAX__\n"
+    "#define __SHRT_MAX__ __INT16_MAX__\n"
+    "#define __INT_MAX__ __INT32_MAX__\n"
+    "#define __LONG_MAX__ __INT64_MAX__\n"
+    "#define __LONG_LONG_MAX__ __INT64_MAX__\n"
+    "#define __SIZE_MAX__ __UINT64_MAX__\n"
+    "#define __PTRDIFF_MAX__ __INT64_MAX__\n"
+    "#define __INTMAX_MAX__ __INT64_MAX__\n"
+    "#define __UINTMAX_MAX__ __UINT64_MAX__\n"
+    "#define __INTPTR_MAX__ __INT64_MAX__\n"
+    "#define __UINTPTR_MAX__ __UINT64_MAX__\n"
+    "\n"
+    "#define __FLT_MIN_EXP__ (-125)\n"
+    "#define __FLT_MAX_EXP__ 128\n"
+    "#define __FLT_DIG__ 6\n"
+    "#define __FLT_DECIMAL_DIG__ 9\n"
+    "#define __FLT_MANT_DIG__ 24\n"
+    "#define __FLT_MIN__ 1.17549435082228750796873653722224568e-38F\n"
+    "#define __FLT_MAX__ 3.40282346638528859811704183484516925e+38F\n"
+    "#define __FLT_EPSILON__ 1.19209289550781250000000000000000000e-7F\n"
+    "\n"
+    "#define __DBL_MIN_EXP__ (-1021)\n"
+    "#define __DBL_MAX_EXP__ 1024\n"
+    "#define __DBL_DIG__ 15\n"
+    "#define __DBL_DECIMAL_DIG__ 17\n"
+    "#define __DBL_MANT_DIG__ 53\n"
+    "#define __DBL_MAX__ ((double) 1.79769313486231570814527423731704357e+308L)\n"
+    "#define __DBL_MIN__ ((double) 2.22507385850720138309023271733240406e-308L)\n"
+    "#define __DBL_EPSILON__ ((double) 2.22044604925031308084726333618164062e-16L)\n"
+    "\n"
+    "typedef unsigned short char16_t;\n"
+    "typedef unsigned int char32_t;\n"
+    "\n"
+#if defined(__linux__)
+    "#define __gnu_linux__ 1\n"
+    "#define __linux 1\n"
+    "#define __linux__ 1\n"
+    "#define linux 1\n"
+    "#define __unix 1\n"
+    "#define __unix__ 1\n"
+#endif
+    "\n"
+    "void *alloca (unsigned long);\n";
--- a/mir/c2mir/x86_64/mirc-x86_64-linux.h
+++ b/mir/c2mir/x86_64/mirc-x86_64-linux.h
@ -1,3 +1,7 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2019-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
 static char x86_64_mirc[]
  = "#define __amd64 1\n"
    "#define __amd64__ 1\n"
@ -9,7 +13,7 @@ static char x86_64_mirc[]
    "#define __SIZEOF_DOUBLE__ 8\n"
    "#define __SIZEOF_FLOAT__ 4\n"
    "#define __SIZEOF_INT__ 4\n"
-    "#define __SIZEOF_LONG_DOUBLE__ 8\n"
+    "#define __SIZEOF_LONG_DOUBLE__ 16\n"
    "#define __SIZEOF_LONG_LONG__ 8\n"
    "#define __SIZEOF_LONG__ 8\n"
    "#define __SIZEOF_POINTER__ 8\n"
--- a/mir/mir-aarch64.c
+++ b/mir/mir-aarch64.c
@ -0,0 +1,375 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+#define VA_LIST_IS_ARRAY_P 0
+
+// _MIR_get_thunk, _MIR_redirect_thunk, _MIR_get_interp_shim, _MIR_get_ff_call, _MIR_get_wrapper
+
+void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
+  static const uint32_t bstart_code[] = {
+    0x910003e0, /* r0 = rsp */
+    0xd65f03c0, /* ret r30 */
+  };
+  return _MIR_publish_code (ctx, (uint8_t *) bstart_code, sizeof (bstart_code));
+}
+
+void *_MIR_get_bend_builtin (MIR_context_t ctx) {
+  static const uint32_t bend_code[] = {
+    0x9100001f, /* rsp = r0 */
+    0xd65f03c0, /* ret r30 */
+  };
+  return _MIR_publish_code (ctx, (uint8_t *) bend_code, sizeof (bend_code));
+}
+
+struct aarch64_va_list {
+  /* address following the last (highest addressed) named incoming
+     argument on the stack, rounded upwards to a multiple of 8 bytes,
+     or if there are no named arguments on the stack, then the value
+     of the stack pointer when the function was entered. */
+  void *__stack;
+  /* the address of the byte immediately following the general
+     register argument save area, the end of the save area being
+     aligned to a 16 byte boundary. */
+  void *__gr_top;
+  /* the address of the byte immediately following the FP/SIMD
+     register argument save area, the end of the save area being
+     aligned to a 16 byte boundary. */
+  void *__vr_top;
+  int __gr_offs; /* set to 0 – ((8 – named_gr) * 8) */
+  int __vr_offs; /* set to 0 – ((8 – named_vr) * 16) */
+};
+
+void *va_arg_builtin (void *p, uint64_t t) {
+  struct aarch64_va_list *va = p;
+  MIR_type_t type = t;
+  int fp_p = type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD;
+  void *a;
+
+  if (fp_p && va->__vr_offs < 0) {
+    a = (char *) va->__vr_top + va->__vr_offs;
+    va->__vr_offs += 16;
+  } else if (!fp_p && va->__gr_offs < 0) {
+    a = (char *) va->__gr_top + va->__gr_offs;
+    va->__gr_offs += 8;
+  } else {
+    if (type == MIR_T_LD) va->__stack = (void *) (((uint64_t) va->__stack + 15) % 16);
+    a = va->__stack;
+    va->__stack = (char *) va->__stack + (type == MIR_T_LD ? 16 : 8);
+  }
+  return a;
+}
+
+void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
+  struct aarch64_va_list *va = p;
+  va_list *vap = a;
+
+  assert (sizeof (struct aarch64_va_list) == sizeof (va_list));
+  *va = *(struct aarch64_va_list *) vap;
+}
+
+void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
+
+static int setup_imm64_insns (MIR_context_t ctx, uint32_t *to, int reg, uint64_t imm64) {
+  /* xd=imm64 */
+  static const uint32_t imm64_pat[] = {
+    0xd2800000, /*  0: mov xd, xxxx(0-15) */
+    0xf2a00000, /*  4: movk xd, xxxx(16-31) */
+    0xf2c00000, /*  8: movk xd, xxxx(32-47) */
+    0xf2e00000, /* 12: movk xd, xxxx(48-63) */
+  };
+  uint32_t mask = ~(0xffff << 5);
+
+  mir_assert (0 <= reg && reg <= 31);
+  to[0] = (imm64_pat[0] & mask) | ((uint32_t) (imm64 & 0xffff) << 5) | reg;
+  to[1] = (imm64_pat[1] & mask) | (((uint32_t) (imm64 >> 16) & 0xffff) << 5) | reg;
+  to[2] = (imm64_pat[2] & mask) | (((uint32_t) (imm64 >> 32) & 0xffff) << 5) | reg;
+  to[3] = (imm64_pat[3] & mask) | (((uint32_t) (imm64 >> 48) & 0xffff) << 5) | reg;
+  return sizeof (imm64_pat) / sizeof (uint32_t);
+}
+
+static void push_insns (MIR_context_t ctx, const uint32_t *pat, size_t pat_len) {
+  uint8_t *p = (uint8_t *) pat;
+
+  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
+}
+
+static size_t gen_mov_addr (MIR_context_t ctx, int reg, void *addr) {
+  uint32_t insns[4];
+  int insns_num = setup_imm64_insns (ctx, insns, reg, (uint64_t) addr);
+
+  mir_assert (insns_num == 4 && sizeof (insns) == insns_num * sizeof (uint32_t));
+  push_insns (ctx, insns, insns_num * sizeof (uint32_t));
+  return insns_num * sizeof (uint32_t);
+}
+
+#define BR_OFFSET_BITS 26
+#define MAX_BR_OFFSET (1 << (BR_OFFSET_BITS - 1)) /* 1 for sign */
+#define BR_OFFSET_MASK (~(-1 << BR_OFFSET_BITS))
+
+static void gen_call_addr (MIR_context_t ctx, void *base_addr, int temp_reg, void *call_addr) {
+  static const uint32_t call_pat1 = 0x94000000; /* bl x */
+  static const uint32_t call_pat2 = 0xd63f0000; /* blr x */
+  uint32_t insn;
+  int64_t offset = (uint32_t *) call_addr - (uint32_t *) base_addr;
+
+  mir_assert (0 <= temp_reg && temp_reg <= 31);
+  if (base_addr != NULL && -(int64_t) MAX_BR_OFFSET <= offset && offset < (int64_t) MAX_BR_OFFSET) {
+    insn = call_pat1 | ((uint32_t) offset & BR_OFFSET_MASK);
+  } else {
+    gen_mov_addr (ctx, temp_reg, call_addr);
+    insn = call_pat2 | (temp_reg << 5);
+  }
+  push_insns (ctx, &insn, sizeof (insn));
+}
+
+#define NOP 0xd503201f
+
+void *_MIR_get_thunk (MIR_context_t ctx) {
+  int pat[5] = {NOP, NOP, NOP, NOP, NOP}; /* maximal size thunk -- see _MIR_redirect_thunk */
+
+  return _MIR_publish_code (ctx, (uint8_t *) pat, sizeof (pat));
+}
+
+void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
+  static const uint32_t branch_pat1 = 0xd61f0120; /* br x9 */
+  static const uint32_t branch_pat2 = 0x14000000; /* b x */
+  int64_t offset = (uint32_t *) to - (uint32_t *) thunk;
+  uint32_t code[5];
+
+  mir_assert (((uint64_t) thunk & 0x3) == 0 && ((uint64_t) to & 0x3) == 0); /* alignment */
+  if (-(int64_t) MAX_BR_OFFSET <= offset && offset < (int64_t) MAX_BR_OFFSET) {
+    code[0] = branch_pat2 | ((uint32_t) offset & BR_OFFSET_MASK);
+    _MIR_change_code (ctx, thunk, (uint8_t *) &code[0], sizeof (code[0]));
+  } else {
+    int n = setup_imm64_insns (ctx, code, 9, (uint64_t) to);
+
+    mir_assert (n == 4);
+    code[4] = branch_pat1;
+    _MIR_change_code (ctx, thunk, (uint8_t *) code, sizeof (code));
+  }
+}
+
+/* save r0-r7, v0-v7 */
+static const uint32_t save_insns[] = {
+  0xa9bf1fe6, /* stp R6, R7, [SP, #-16]! */
+  0xa9bf17e4, /* stp R4, R5, [SP, #-16]! */
+  0xa9bf0fe2, /* stp R2, R3, [SP, #-16]! */
+  0xa9bf07e0, /* stp R0, R1, [SP, #-16]! */
+  0xadbf1fe6, /* stp Q6, Q7, [SP, #-32]! */
+  0xadbf17e4, /* stp Q4, Q5, [SP, #-32]! */
+  0xadbf0fe2, /* stp Q2, Q3, [SP, #-32]! */
+  0xadbf07e0, /* stp Q0, Q1, [SP, #-32]! */
+};
+
+static const uint32_t restore_insns[] = {
+  0xacc107e0, /* ldp Q0, Q1, SP, #32 */
+  0xacc10fe2, /* ldp Q2, Q3, SP, #32 */
+  0xacc117e4, /* ldp Q4, Q5, SP, #32 */
+  0xacc11fe6, /* ldp Q6, Q7, SP, #32 */
+  0xa8c107e0, /* ldp R0, R1, SP, #16 */
+  0xa8c10fe2, /* ldp R2, R3, SP, #16 */
+  0xa8c117e4, /* ldp R4, R5, SP, #16 */
+  0xa8c11fe6, /* ldp R6, R7, SP, #16 */
+};
+
+static const uint32_t ld_pat = 0xf9400260;   /* ldr x, [x19], offset */
+static const uint32_t lds_pat = 0xbd400260;  /* ldr s, [x19], offset */
+static const uint32_t ldd_pat = 0xfd400260;  /* ldr d, [x19], offset */
+static const uint32_t ldld_pat = 0x3dc00260; /* ldr q, [x19], offset */
+
+/* Generation: fun (fun_addr, res_arg_addresses):
+   push x19, x30; sp-=sp_offset; x9=fun_addr; x19=res/arg_addrs
+   x8=mem[x19,<offset>]; (arg_reg=mem[x8] or x8=mem[x8];mem[sp,sp_offset]=x8) ...
+   call fun_addr; sp+=offset
+   x8=mem[x19,<offset>]; res_reg=mem[x8]; ...
+   pop x19, x30; ret x30. */
+void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
+                        MIR_type_t *arg_types, int vararg_p) {
+  static const uint32_t prolog[] = {
+    0xa9bf7bf3, /* stp x19,x30,[sp, -16]! */
+    0xd10003ff, /* sub sp,sp,<sp_offset> */
+    0xaa0003e9, /* mov x9,x0   # fun addr */
+    0xaa0103f3, /* mov x19, x1 # result/arg addresses */
+  };
+  static const uint32_t call_end[] = {
+    0xd63f0120, /* blr  x9	   */
+    0x910003ff, /* add sp,sp,<sp_offset> */
+  };
+  static const uint32_t epilog[] = {
+    0xa8c17bf3, /* ldp x19,x30,[sp],16 */
+    0xd65f03c0, /* ret x30 */
+  };
+  static const uint32_t st_pat = 0xf9000000;   /* str x, [xn|sp], offset */
+  static const uint32_t sts_pat = 0xbd000000;  /* str s, [xn|sp], offset */
+  static const uint32_t std_pat = 0xfd000000;  /* str d, [xn|sp], offset */
+  static const uint32_t stld_pat = 0x3d800000; /* str q, [xn|sp], offset */
+  uint32_t n_xregs = 0, n_vregs = 0, sp_offset = 0, pat, offset_imm, scale, sp = 31;
+  uint32_t *addr;
+  const uint32_t temp_reg = 8; /* x8 or v9 */
+
+  VARR_TRUNC (uint8_t, machine_insns, 0);
+  push_insns (ctx, prolog, sizeof (prolog));
+  mir_assert (sizeof (long double) == 16);
+  for (size_t i = 0; i < nargs; i++) { /* args */
+    scale = arg_types[i] == MIR_T_F ? 2 : arg_types[i] == MIR_T_LD ? 4 : 3;
+    offset_imm = (((i + nres) * sizeof (long double) << 10)) >> scale;
+    if ((MIR_T_I8 <= arg_types[i] && arg_types[i] <= MIR_T_U64) || arg_types[i] == MIR_T_P) {
+      if (n_xregs < 8) {
+        pat = ld_pat | offset_imm | n_xregs++;
+      } else {
+        pat = ld_pat | offset_imm | temp_reg;
+        push_insns (ctx, &pat, sizeof (pat));
+        pat = st_pat | ((sp_offset >> scale) << 10) | temp_reg | (sp << 5);
+        sp_offset += 8;
+      }
+      push_insns (ctx, &pat, sizeof (pat));
+    } else if (arg_types[i] == MIR_T_F || arg_types[i] == MIR_T_D || arg_types[i] == MIR_T_LD) {
+      pat = arg_types[i] == MIR_T_F ? lds_pat : arg_types[i] == MIR_T_D ? ldd_pat : ldld_pat;
+      if (n_vregs < 8) {
+        pat |= offset_imm | n_vregs++;
+      } else {
+        if (arg_types[i] == MIR_T_LD) sp_offset = (sp_offset + 15) % 16;
+        pat |= offset_imm | temp_reg;
+        push_insns (ctx, &pat, sizeof (pat));
+        pat = arg_types[i] == MIR_T_F ? sts_pat : arg_types[i] == MIR_T_D ? std_pat : stld_pat;
+        pat |= ((sp_offset >> scale) << 10) | temp_reg | (sp << 5);
+        sp_offset += arg_types[i] == MIR_T_LD ? 16 : 8;
+      }
+      push_insns (ctx, &pat, sizeof (pat));
+    } else {
+      (*error_func) (MIR_call_op_error, "wrong type of arg value");
+    }
+  }
+  sp_offset = (sp_offset + 15) / 16 * 16;
+  mir_assert (sp_offset < (1 << 12));
+  ((uint32_t *) VARR_ADDR (uint8_t, machine_insns))[1] |= sp_offset << 10; /* sub sp,sp,<offset> */
+  push_insns (ctx, call_end, sizeof (call_end));
+  ((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-1]
+    |= sp_offset << 10;
+  n_xregs = n_vregs = 0;
+  for (size_t i = 0; i < nres; i++) { /* results */
+    offset_imm = i * sizeof (long double) << 10;
+    offset_imm >>= res_types[i] == MIR_T_F ? 2 : res_types[i] == MIR_T_D ? 3 : 4;
+    if (((MIR_T_I8 <= res_types[i] && res_types[i] <= MIR_T_U64) || res_types[i] == MIR_T_P)
+        && n_xregs < 8) {
+      pat = st_pat | offset_imm | n_xregs++ | (19 << 5);
+      push_insns (ctx, &pat, sizeof (pat));
+    } else if ((res_types[i] == MIR_T_F || res_types[i] == MIR_T_D || res_types[i] == MIR_T_LD)
+               && n_vregs < 8) {
+      pat = res_types[i] == MIR_T_F ? sts_pat : res_types[i] == MIR_T_D ? std_pat : stld_pat;
+      pat |= offset_imm | n_vregs++ | (19 << 5);
+      push_insns (ctx, &pat, sizeof (pat));
+    } else {
+      (*error_func) (MIR_ret_error, "x86-64 can not handle this combination of return values");
+    }
+  }
+  push_insns (ctx, epilog, sizeof (epilog));
+  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
+                            VARR_LENGTH (uint8_t, machine_insns));
+}
+
+/* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
+                                             va_list va, MIR_val_t *results) */
+void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
+  static const uint32_t save_x19_pat = 0xf81f0ff3; /* str x19, [sp,-16]! */
+  static const uint32_t prepare_pat[] = {
+    0xd10083ff, /* sub sp, sp, 32 # allocate va_list */
+    0x910003e8, /* mov x8, sp # va_list addr         */
+    0x128007e9, /* mov w9, #-64 # gr_offs */
+    0xb9001909, /* str w9,[x8, 24] # va_list.gr_offs */
+    0x12800fe9, /* mov w9, #-128 # vr_offs */
+    0xb9001d09, /* str w9,[x8, 28]  #va_list.vr_offs */
+    0x910383e9, /* add x9, sp, #224 # gr_top */
+    0xf9000509, /* str x9,[x8, 8] # va_list.gr_top */
+    0x91004129, /* add x9, x9, #16 # stack */
+    0xf9000109, /* str x9,[x8] # valist.stack */
+    0x910283e9, /* add x9, sp, #160 # vr_top*/
+    0xf9000909, /* str x9,[x8, 16] # va_list.vr_top */
+    0xaa0803e2, /* mov x2, x8 # va arg  */
+    0xd2800009, /* mov x9, <(nres+1)*16> */
+    0xcb2963ff, /* sub sp, sp, x9 */
+    0x910043e3, /* add x3, sp, 16 # results arg */
+    0xaa0303f3, /* mov x19, x3 # results */
+    0xf90003fe, /* str x30, [sp] # save lr */
+  };
+  static const uint32_t shim_end[] = {
+    0xf94003fe, /* ldr x30, [sp] */
+    0xd2800009, /* mov x9, 224+(nres+1)*16 */
+    0x8b2963ff, /* add sp, sp, x9 */
+    0xf84107f3, /* ldr x19, sp, 16 */
+    0xd65f03c0, /* ret x30 */
+  };
+  uint32_t pat, imm, n_xregs, n_vregs, offset, offset_imm;
+  uint32_t nres = func_item->u.func->nres;
+  MIR_type_t *results = func_item->u.func->res_types;
+
+  VARR_TRUNC (uint8_t, machine_insns, 0);
+  push_insns (ctx, &save_x19_pat, sizeof (save_x19_pat));
+  push_insns (ctx, save_insns, sizeof (save_insns));
+  push_insns (ctx, prepare_pat, sizeof (prepare_pat));
+  imm = (nres + 1) * 16;
+  mir_assert (imm < (1 << 16));
+  ((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-5]
+    |= imm << 5;
+  gen_mov_addr (ctx, 0, ctx);       /* mov x0, ctx */
+  gen_mov_addr (ctx, 1, func_item); /* mov x1, func_item */
+  gen_call_addr (ctx, NULL, 9, handler);
+  /* move results: */
+  n_xregs = n_vregs = offset = 0;
+  mir_assert (sizeof (long double) == 16);
+  for (uint32_t i = 0; i < nres; i++) {
+    if ((results[i] == MIR_T_F || results[i] == MIR_T_D || results[i] == MIR_T_LD) && n_vregs < 8) {
+      pat = results[i] == MIR_T_F ? lds_pat : results[i] == MIR_T_D ? ldd_pat : ldld_pat;
+      pat |= n_vregs;
+      n_vregs++;
+    } else if (n_xregs < 8) {  // ??? ltp use
+      pat = ld_pat | n_xregs;
+      n_xregs++;
+    } else {
+      (*error_func) (MIR_ret_error, "aarch64 can not handle this combination of return values");
+    }
+    offset_imm = offset >> (results[i] == MIR_T_F ? 2 : results[i] == MIR_T_LD ? 4 : 3);
+    mir_assert (offset_imm < (1 << 12));
+    pat |= offset_imm << 10;
+    push_insns (ctx, &pat, sizeof (pat));
+    offset += 16;
+  }
+  push_insns (ctx, shim_end, sizeof (shim_end));
+  imm = 224 + (nres + 1) * 16;
+  mir_assert (imm < (1 << 16));
+  ((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-4]
+    |= imm << 5;
+  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
+                            VARR_LENGTH (uint8_t, machine_insns));
+}
+
+/* Save regs x0-x7, q0-q7; x9 = call hook_address (ctx, called_func); restore regs; br x9 */
+void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
+  static const uint32_t jmp_insn = 0xd61f0120;     /* br x9 */
+  static const uint32_t move_insn = 0xaa0003e9;    /* mov x9, x0 */
+  static const uint32_t save_fplr = 0xa9bf7bfd;    /* stp R29, R30, [SP, #-16]! */
+  static const uint32_t restore_fplr = 0xa8c17bfd; /* ldp R29, R30, SP, #16 */
+  uint8_t *base_addr, *curr_addr, *code;
+  size_t len = sizeof (save_insns) + sizeof (restore_insns); /* initial code length */
+
+  for (;;) {
+    curr_addr = base_addr = _MIR_get_new_code_addr (ctx, len);
+    if (curr_addr == NULL) return NULL;
+    VARR_TRUNC (uint8_t, machine_insns, 0);
+    push_insns (ctx, &save_fplr, sizeof (save_fplr));
+    curr_addr += 4;
+    push_insns (ctx, save_insns, sizeof (save_insns));
+    curr_addr += sizeof (save_insns);
+    curr_addr += gen_mov_addr (ctx, 0, ctx);          /*mov x0,ctx  	   */
+    curr_addr += gen_mov_addr (ctx, 1, called_func);  /*mov x1,called_func */
+    gen_call_addr (ctx, curr_addr, 10, hook_address); /*call <hook_address>, use x10 as temp   */
+    push_insns (ctx, &move_insn, sizeof (move_insn));
+    push_insns (ctx, restore_insns, sizeof (restore_insns));
+    push_insns (ctx, &restore_fplr, sizeof (restore_fplr));
+    push_insns (ctx, &jmp_insn, sizeof (jmp_insn));
+    len = VARR_LENGTH (uint8_t, machine_insns);
+    code = _MIR_publish_code_by_addr (ctx, base_addr, VARR_ADDR (uint8_t, machine_insns), len);
+    if (code != NULL) return code;
+  }
+}
--- a/mir/mir-bitmap.h
+++ b/mir/mir-bitmap.h
@ -11,6 +11,7 @@
 #include <string.h>
 #include <assert.h>
 #include <stdint.h>
+#include <limits.h>
 #include "mir-varr.h"

 #define FALSE 0
@ -99,6 +100,40 @@ static inline int bitmap_clear_bit_p (bitmap_t bm, size_t nb) {
  return res;
 }

+static inline int bitmap_set_or_clear_bit_range_p (bitmap_t bm, size_t nb, size_t len, int set_p) {
+  size_t nw, lsh, rsh, range_len;
+  bitmap_el_t mask, *addr;
+  int res = 0;
+
+  bitmap_expand (bm, nb + len);
+  addr = VARR_ADDR (bitmap_el_t, bm);
+  while (len > 0) {
+    nw = nb / BITMAP_WORD_BITS;
+    lsh = nb % BITMAP_WORD_BITS;
+    rsh = len >= BITMAP_WORD_BITS - lsh ? 0 : BITMAP_WORD_BITS - (nb + len) % BITMAP_WORD_BITS;
+    mask = ((~(bitmap_el_t) 0) >> (rsh + lsh)) << lsh;
+    if (set_p) {
+      res |= (~addr[nw] & mask) != 0;
+      addr[nw] |= mask;
+    } else {
+      res |= (addr[nw] & mask) != 0;
+      addr[nw] &= ~mask;
+    }
+    range_len = BITMAP_WORD_BITS - rsh - lsh;
+    len -= range_len;
+    nb += range_len;
+  }
+  return res;
+}
+
+static inline int bitmap_set_bit_range_p (bitmap_t bm, size_t nb, size_t len) {
+  return bitmap_set_or_clear_bit_range_p (bm, nb, len, TRUE);
+}
+
+static inline int bitmap_clear_bit_range_p (bitmap_t bm, size_t nb, size_t len) {
+  return bitmap_set_or_clear_bit_range_p (bm, nb, len, FALSE);
+}
+
 static inline void bitmap_copy (bitmap_t dst, const_bitmap_t src) {
  size_t dst_len = VARR_LENGTH (bitmap_el_t, dst);
  size_t src_len = VARR_LENGTH (bitmap_el_t, src);
@ -271,16 +306,32 @@ static inline int bitmap_ior_and_compl (bitmap_t dst, bitmap_t src1, bitmap_t sr
  return bitmap_op3 (dst, src1, src2, src3, bitmap_el_ior_and_compl);
 }

-static inline void bitmap_for_each (bitmap_t bm, void (*func) (size_t, void *), void *data) {
-  size_t i, nb, len = VARR_LENGTH (bitmap_el_t, bm);
-  bitmap_el_t el, *addr = VARR_ADDR (bitmap_el_t, bm);
+typedef struct {
+  bitmap_t bitmap;
+  size_t nbit;
+} bitmap_iterator_t;

-  for (i = 0; i < len; i++) {
-    if ((el = addr[i]) != 0) {
-      for (nb = 0; el != 0; el >>= 1, nb++)
-        if (el & 1) func (i * BITMAP_WORD_BITS + nb, data);
-    }
-  }
+static inline void bitmap_iterator_init (bitmap_iterator_t *iter, bitmap_t bitmap) {
+  iter->bitmap = bitmap;
+  iter->nbit = 0;
 }

+static inline int bitmap_iterator_next (bitmap_iterator_t *iter, size_t *nbit) {
+  const size_t el_bits_num = sizeof (bitmap_el_t) * CHAR_BIT;
+  size_t curr_nel = iter->nbit / el_bits_num, len = VARR_LENGTH (bitmap_el_t, iter->bitmap);
+  bitmap_el_t el, *addr = VARR_ADDR (bitmap_el_t, iter->bitmap);
+
+  for (; curr_nel < len; curr_nel++, iter->nbit = curr_nel * el_bits_num)
+    if ((el = addr[curr_nel]) != 0)
+      for (el >>= iter->nbit % el_bits_num; el != 0; el >>= 1, iter->nbit++)
+        if (el & 1) {
+          *nbit = iter->nbit++;
+          return TRUE;
+        }
+  return FALSE;
+}
+
+#define FOREACH_BITMAP_BIT(iter, bitmap, nbit) \
+  for (bitmap_iterator_init (&iter, bitmap); bitmap_iterator_next (&iter, &nbit);)
+
 #endif /* #ifndef MIR_BITMAP_H */
--- a/mir/mir-gen-aarch64.c
+++ b/mir/mir-gen-aarch64.c
--- a/mir/mir-gen-ppc64.c
+++ b/mir/mir-gen-ppc64.c
@ -0,0 +1 @@
+#include "mir-gen-stub.c"
--- a/mir/mir-gen-stub.c
+++ b/mir/mir-gen-stub.c
@ -0,0 +1,95 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+
+   Stub for MIR generator machine dependent file.  It contains
+   definitions used by MIR generator.  You can use this file for
+   successful compilation of mir-gen.c.
+
+   See HOW-TO-PORT-MIR.md document for the definitions description.
+*/
+
+enum {
+  R0_HARD_REG,
+  R1_HARD_REG,
+  R2_HARD_REG,
+  R3_HARD_REG,
+  R4_HARD_REG,
+  R5_HARD_REG,
+  R6_HARD_REG,
+  R7_HARD_REG,
+  F0_HARD_REG,
+  F1_HARD_REG,
+  F2_HARD_REG,
+  F3_HARD_REG,
+  F4_HARD_REG,
+  F5_HARD_REG,
+  F6_HARD_REG,
+  F7_HARD_REG
+};
+
+static const MIR_reg_t MAX_HARD_REG = F7_HARD_REG;           /* max value for the previous regs */
+static const MIR_reg_t FP_HARD_REG = R6_HARD_REG; /* stack frame pointer according ABI */
+static const MIR_reg_t SP_HARD_REG = R7_HARD_REG;            /* stack pointer according ABI */
+
+const MIR_reg_t TEMP_INT_HARD_REG1 = R2_HARD_REG, TEMP_INT_HARD_REG2 = R3_HARD_REG;
+const MIR_reg_t TEMP_FLOAT_HARD_REG1 = F2_HARD_REG, TEMP_FLOAT_HARD_REG2 = F3_HARD_REG;
+const MIR_reg_t TEMP_DOUBLE_HARD_REG1 = F2_HARD_REG, TEMP_DOUBLE_HARD_REG2 = F3_HARD_REG;
+const MIR_reg_t TEMP_LDOUBLE_HARD_REG1 = F2_HARD_REG;
+const MIR_reg_t TEMP_LDOUBLE_HARD_REG2 = F3_HARD_REG;
+
+static int target_locs_num (MIR_reg_t loc, MIR_type_t type) {
+  return loc > MAX_HARD_REG && type == MIR_T_LD ? 2 : 1;
+}
+
+static inline int target_hard_reg_type_ok_p (MIR_reg_t hard_reg, MIR_type_t type) {
+  assert (hard_reg <= MAX_HARD_REG);
+  return (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD ? hard_reg >= F0_HARD_REG
+                                                                 : hard_reg < F0_HARD_REG);
+}
+
+static inline int target_fixed_hard_reg_p (MIR_reg_t hard_reg) {
+  assert (hard_reg <= MAX_HARD_REG);
+  return (hard_reg == FP_HARD_REG || hard_reg == SP_HARD_REG
+          || hard_reg == TEMP_INT_HARD_REG1 || hard_reg == TEMP_INT_HARD_REG2
+          || hard_reg == TEMP_FLOAT_HARD_REG1 || hard_reg == TEMP_FLOAT_HARD_REG2
+          || hard_reg == TEMP_DOUBLE_HARD_REG1 || hard_reg == TEMP_DOUBLE_HARD_REG2
+          || hard_reg == TEMP_LDOUBLE_HARD_REG1 || hard_reg == TEMP_LDOUBLE_HARD_REG2);
+}
+
+static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
+  assert (hard_reg <= MAX_HARD_REG);
+  return !((hard_reg >= R4_HARD_REG && hard_reg <= R5_HARD_REG)
+           || (hard_reg >= F2_HARD_REG && hard_reg <= F7_HARD_REG));
+}
+
+static const int slots_offset = 176; /* It is used in this file but not in MIR generator */
+
+static MIR_disp_t target_get_stack_slot_offset (MIR_context_t ctx, MIR_type_t type,
+                                                MIR_reg_t slot) {
+  /* slot is 0, 1, ... */
+  struct gen_ctx *gen_ctx = *gen_ctx_loc (ctx);
+
+  return -((MIR_disp_t) (slot + (type == MIR_T_LD ? 2 : 1)) * 8 + slots_offset);
+}
+
+static const MIR_insn_code_t target_io_dup_op_insn_codes[] = {MIR_INSN_BOUND};
+
+static void target_machinize (MIR_context_t ctx) {}
+
+static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_regs,
+                                       size_t stack_slots_num) {}
+
+static void target_get_early_clobbered_hard_regs (MIR_insn_t insn, MIR_reg_t *hr1, MIR_reg_t *hr2) {
+  *hr1 = *hr2 = MIR_NON_HARD_REG;
+}
+
+static int target_insn_ok_p (MIR_context_t ctx, MIR_insn_t insn) { return FALSE; }
+static uint8_t *target_translate (MIR_context_t ctx, size_t *len) { return NULL; }
+static void target_rebase (MIR_context_t ctx, uint8_t *base) {}
+
+static void target_init (MIR_context_t ctx) {
+  fprintf (stderr, "Your generator target dependent file is just a stub!\n");
+  fprintf (stderr, "MIR generator can not use it -- good bye.\n");
+  exit (1);
+}
+static void target_finish (MIR_context_t ctx) {}
--- a/mir/mir-gen-x86_64.c
+++ b/mir/mir-gen-x86_64.c
@ -16,9 +16,9 @@ enum {
 #undef REP_SEP

 static const MIR_reg_t MAX_HARD_REG = ST1_HARD_REG;
-static const MIR_reg_t HARD_REG_FRAME_POINTER = BP_HARD_REG;
+static const MIR_reg_t FP_HARD_REG = BP_HARD_REG;

-static int locs_num (MIR_reg_t loc, MIR_type_t type) {
+static int target_locs_num (MIR_reg_t loc, MIR_type_t type) {
  return loc > MAX_HARD_REG && type == MIR_T_LD ? 2 : 1;
 }

@ -29,15 +29,15 @@ const MIR_reg_t TEMP_DOUBLE_HARD_REG1 = XMM8_HARD_REG, TEMP_DOUBLE_HARD_REG2 = X
 const MIR_reg_t TEMP_LDOUBLE_HARD_REG1 = MIR_NON_HARD_REG;
 const MIR_reg_t TEMP_LDOUBLE_HARD_REG2 = MIR_NON_HARD_REG;

-static inline int hard_reg_type_ok_p (MIR_reg_t hard_reg, MIR_type_t type) {
+static inline int target_hard_reg_type_ok_p (MIR_reg_t hard_reg, MIR_type_t type) {
  assert (hard_reg <= MAX_HARD_REG);
  /* For LD we need x87 stack regs and it is too complicated so no
     hard register allocation for LD: */
  if (type == MIR_T_LD) return FALSE;
-  return type == MIR_T_F || type == MIR_T_D ? hard_reg >= XMM0_HARD_REG : hard_reg < XMM0_HARD_REG;
+  return MIR_int_type_p (type) ? hard_reg < XMM0_HARD_REG : hard_reg >= XMM0_HARD_REG;
 }

-static inline int fixed_hard_reg_p (MIR_reg_t hard_reg) {
+static inline int target_fixed_hard_reg_p (MIR_reg_t hard_reg) {
  assert (hard_reg <= MAX_HARD_REG);
  return (hard_reg == BP_HARD_REG || hard_reg == SP_HARD_REG || hard_reg == TEMP_INT_HARD_REG1
          || hard_reg == TEMP_INT_HARD_REG2 || hard_reg == TEMP_FLOAT_HARD_REG1
@ -46,7 +46,7 @@ static inline int fixed_hard_reg_p (MIR_reg_t hard_reg) {
          || hard_reg == ST1_HARD_REG);
 }

-static inline int call_used_hard_reg_p (MIR_reg_t hard_reg) {
+static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
  assert (hard_reg <= MAX_HARD_REG);
  return !(hard_reg == BX_HARD_REG || (hard_reg >= R12_HARD_REG && hard_reg <= R15_HARD_REG));
 }
@ -79,7 +79,8 @@ static inline int call_used_hard_reg_p (MIR_reg_t hard_reg) {

 static const int reg_save_area_size = 176;

-static MIR_disp_t get_stack_slot_offset (MIR_context_t ctx, MIR_type_t type, MIR_reg_t slot) {
+static MIR_disp_t target_get_stack_slot_offset (MIR_context_t ctx, MIR_type_t type,
+                                                MIR_reg_t slot) {
  /* slot is 0, 1, ... */
  struct gen_ctx *gen_ctx = *gen_ctx_loc (ctx);

@ -87,17 +88,16 @@ static MIR_disp_t get_stack_slot_offset (MIR_context_t ctx, MIR_type_t type, MIR
           + (curr_func_item->u.func->vararg_p ? reg_save_area_size : 0));
 }

-static const MIR_insn_code_t io_dup_op_insn_codes[] = {
+static const MIR_insn_code_t target_io_dup_op_insn_codes[] = {
  /* see possible patterns */
-  MIR_FADD, MIR_DADD, MIR_LDADD, MIR_SUB,  MIR_SUBS,  MIR_FSUB, MIR_DSUB,  MIR_LDSUB,
-  MIR_MUL,  MIR_MULS, MIR_FMUL,  MIR_DMUL, MIR_LDMUL, MIR_DIV,  MIR_DIVS,  MIR_UDIV,
-  MIR_FDIV, MIR_DDIV, MIR_LDDIV, MIR_MOD,  MIR_MODS,  MIR_UMOD, MIR_UMODS, MIR_AND,
-  MIR_ANDS, MIR_OR,   MIR_ORS,   MIR_XOR,  MIR_XORS,  MIR_LSH,  MIR_LSHS,  MIR_RSH,
-  MIR_RSHS, MIR_URSH, MIR_URSHS, MIR_NEG,  MIR_NEGS,  MIR_FNEG, MIR_DNEG,  MIR_LDNEG,
+  MIR_FADD,  MIR_DADD,  MIR_LDADD, MIR_SUB,  MIR_SUBS,  MIR_FSUB,       MIR_DSUB,
+  MIR_LDSUB, MIR_MUL,   MIR_MULS,  MIR_FMUL, MIR_DMUL,  MIR_LDMUL,      MIR_DIV,
+  MIR_DIVS,  MIR_UDIV,  MIR_FDIV,  MIR_DDIV, MIR_LDDIV, MIR_MOD,        MIR_MODS,
+  MIR_UMOD,  MIR_UMODS, MIR_AND,   MIR_ANDS, MIR_OR,    MIR_ORS,        MIR_XOR,
+  MIR_XORS,  MIR_LSH,   MIR_LSHS,  MIR_RSH,  MIR_RSHS,  MIR_URSH,       MIR_URSHS,
+  MIR_NEG,   MIR_NEGS,  MIR_FNEG,  MIR_DNEG, MIR_LDNEG, MIR_INSN_BOUND,
 };

-typedef enum { GC_INSN_PUSH = MIR_INSN_BOUND, GC_INSN_BOUND } MIR_full_insn_code_t;
-
 static MIR_insn_code_t get_ext_code (MIR_type_t type) {
  switch (type) {
  case MIR_T_I8: return MIR_EXT8;
@ -110,6 +110,44 @@ static MIR_insn_code_t get_ext_code (MIR_type_t type) {
  }
 }

+static MIR_reg_t get_fp_arg_reg (size_t fp_arg_num) {
+  switch (fp_arg_num) {
+  case 0:
+  case 1:
+  case 2:
+  case 3:
+#ifndef _WIN64
+  case 4:
+  case 5:
+  case 6:
+  case 7:
+#endif
+    return XMM0_HARD_REG + fp_arg_num;
+  default: return MIR_NON_HARD_REG;
+  }
+}
+
+static MIR_reg_t get_int_arg_reg (size_t int_arg_num) {
+  switch (int_arg_num
+#ifdef _WIN64
+          + 2
+#endif
+  ) {
+  case 0: return DI_HARD_REG;
+  case 1: return SI_HARD_REG;
+#ifdef _WIN64
+  case 2: return CX_HARD_REG;
+  case 3: return DX_HARD_REG;
+#else
+  case 2: return DX_HARD_REG;
+  case 3: return CX_HARD_REG;
+#endif
+  case 4: return R8_HARD_REG;
+  case 5: return R9_HARD_REG;
+  default: return MIR_NON_HARD_REG;
+  }
+}
+
 static MIR_reg_t get_arg_reg (MIR_type_t arg_type, size_t *int_arg_num, size_t *fp_arg_num,
                              MIR_insn_code_t *mov_code) {
  MIR_reg_t arg_reg;
@ -118,42 +156,17 @@ static MIR_reg_t get_arg_reg (MIR_type_t arg_type, size_t *int_arg_num, size_t *
    arg_reg = MIR_NON_HARD_REG;
    *mov_code = MIR_LDMOV;
  } else if (arg_type == MIR_T_F || arg_type == MIR_T_D) {
-    switch (*fp_arg_num) {
-    case 0:
-    case 1:
-    case 2:
-    case 3:
-#ifndef _WIN64
-    case 4:
-    case 5:
-    case 6:
-    case 7:
-#endif
-      arg_reg = XMM0_HARD_REG + *fp_arg_num;
-      break;
-    default: arg_reg = MIR_NON_HARD_REG; break;
-    }
+    arg_reg = get_fp_arg_reg(*fp_arg_num);
    (*fp_arg_num)++;
-    *mov_code = arg_type == MIR_T_F ? MIR_FMOV : MIR_DMOV;
-  } else {
-    switch (*int_arg_num
 #ifdef _WIN64
-            + 2
+    (*int_arg_num)++; /* arg slot used by fp, skip int register */
 #endif
-    ) {
-    case 0: arg_reg = DI_HARD_REG; break;
-    case 1: arg_reg = SI_HARD_REG; break;
+    *mov_code = arg_type == MIR_T_F ? MIR_FMOV : MIR_DMOV;
+  } else {
+    arg_reg = get_int_arg_reg(*int_arg_num);
 #ifdef _WIN64
-    case 2: arg_reg = CX_HARD_REG; break;
-    case 3: arg_reg = DX_HARD_REG; break;
-#else
-    case 2: arg_reg = DX_HARD_REG; break;
-    case 3: arg_reg = CX_HARD_REG; break;
+    (*fp_arg_num)++; /* arg slot used by int, skip fp register */
 #endif
-    case 4: arg_reg = R8_HARD_REG; break;
-    case 5: arg_reg = R9_HARD_REG; break;
-    default: arg_reg = MIR_NON_HARD_REG; break;
-    }
    (*int_arg_num)++;
    *mov_code = MIR_MOV;
  }
@ -185,6 +198,9 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
    nargs = VARR_LENGTH (MIR_var_t, proto->args);
    arg_vars = VARR_ADDR (MIR_var_t, proto->args);
  }
+#ifdef _WIN64
+  if (nargs > 4 || proto->vararg_p) mem_size = 32; /* spill space for register args */
+#endif
  if (call_insn->ops[1].mode != MIR_OP_REG && call_insn->ops[1].mode != MIR_OP_HARD_REG) {
    temp_op = MIR_new_reg_op (ctx, gen_new_temp_reg (ctx, MIR_T_I64, func));
    new_insn = MIR_new_insn (ctx, MIR_MOV, temp_op, call_insn->ops[1]);
@ -220,6 +236,21 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
      new_insn = MIR_new_insn (ctx, new_insn_code, arg_reg_op, arg_op);
      gen_add_insn_before (ctx, call_insn, new_insn);
      call_insn->ops[i] = arg_reg_op;
+#ifdef _WIN64
+      /* copy fp reg varargs into corresponding int regs */
+      if (proto->vararg_p && type == MIR_T_D) {
+        gen_assert (int_arg_num > 0 && int_arg_num <= 4);
+        arg_reg = get_int_arg_reg (int_arg_num - 1);
+        setup_call_hard_reg_args (call_insn, arg_reg);
+        /* mir does not support moving fp to int regs directly, spill and load them instead */
+        mem_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_D, 8, SP_HARD_REG, MIR_NON_HARD_REG, 1);
+        new_insn = MIR_new_insn (ctx, MIR_DMOV, mem_op, arg_op);
+        gen_add_insn_before (ctx, call_insn, new_insn);
+        mem_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, 8, SP_HARD_REG, MIR_NON_HARD_REG, 1);
+        new_insn = MIR_new_insn (ctx, MIR_MOV, _MIR_new_hard_reg_op (ctx, arg_reg), mem_op);
+        gen_add_insn_before (ctx, call_insn, new_insn);
+      }
+#endif
    } else { /* put arguments on the stack */
      mem_type = type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD ? type : MIR_T_I64;
      new_insn_code
@ -237,12 +268,14 @@ static void machinize_call (MIR_context_t ctx, MIR_insn_t call_insn) {
      if (ext_insn != NULL) gen_add_insn_after (ctx, prev_call_insn, ext_insn);
    }
  }
+#ifndef _WIN64
  if (proto->vararg_p) {
    setup_call_hard_reg_args (call_insn, AX_HARD_REG);
    new_insn = MIR_new_insn (ctx, MIR_MOV, _MIR_new_hard_reg_op (ctx, AX_HARD_REG),
                             MIR_new_int_op (ctx, xmm_args));
    gen_add_insn_before (ctx, call_insn, new_insn);
  }
+#endif
  n_iregs = n_xregs = n_fregs = 0;
  for (size_t i = 0; i < proto->nres; i++) {
    ret_reg_op = call_insn->ops[i + 2];
@ -417,7 +450,7 @@ struct target_ctx {
 #define abs_address_locs gen_ctx->target_ctx->abs_address_locs
 #define relocs gen_ctx->target_ctx->relocs

-static void machinize (MIR_context_t ctx) {
+static void target_machinize (MIR_context_t ctx) {
  struct gen_ctx *gen_ctx = *gen_ctx_loc (ctx);
  MIR_func_t func;
  MIR_type_t type, mem_type, res_type;
@ -451,7 +484,7 @@ static void machinize (MIR_context_t ctx) {
      mem_op = _MIR_new_hard_reg_mem_op (ctx, mem_type,
                                         mem_size + 8 /* ret */
                                           + start_sp_from_bp_offset,
-                                         BP_HARD_REG, MIR_NON_HARD_REG, 1);
+                                         FP_HARD_REG, MIR_NON_HARD_REG, 1);
      new_insn = MIR_new_insn (ctx, new_insn_code, MIR_new_reg_op (ctx, i + 1), mem_op);
      MIR_prepend_insn (ctx, curr_func_item, new_insn);
      next_insn = DLIST_NEXT (MIR_insn_t, new_insn);
@ -507,12 +540,12 @@ static void machinize (MIR_context_t ctx) {
      gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_U32, 4, va_reg, 0, 1),
               MIR_new_int_op (ctx, fp_offset));
      /* overflow_arg_area_reg: treg = start sp + 8; mem64[va_reg + 8] = treg */
-      new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, _MIR_new_hard_reg_op (ctx, BP_HARD_REG),
+      new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, _MIR_new_hard_reg_op (ctx, FP_HARD_REG),
                               MIR_new_int_op (ctx, 8 /*ret*/ + start_sp_from_bp_offset));
      gen_add_insn_before (ctx, insn, new_insn);
      gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 8, va_reg, 0, 1), treg_op);
      /* reg_save_area: treg = start sp - reg_save_area_size; mem64[va_reg + 16] = treg */
-      new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, _MIR_new_hard_reg_op (ctx, BP_HARD_REG),
+      new_insn = MIR_new_insn (ctx, MIR_ADD, treg_op, _MIR_new_hard_reg_op (ctx, FP_HARD_REG),
                               MIR_new_int_op (ctx, -reg_save_area_size));
      gen_add_insn_before (ctx, insn, new_insn);
      gen_mov (ctx, insn, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 16, va_reg, 0, 1), treg_op);
@ -637,8 +670,8 @@ static void dsave (MIR_context_t ctx, MIR_insn_t anchor, int disp, MIR_reg_t har
           _MIR_new_hard_reg_op (ctx, hard_reg));
 }

-static void make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_regs,
-                                size_t stack_slots_num) {
+static void target_make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_regs,
+                                       size_t stack_slots_num) {
  struct gen_ctx *gen_ctx = *gen_ctx_loc (ctx);
  MIR_func_t func;
  MIR_insn_t anchor, new_insn;
@ -649,12 +682,12 @@ static void make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_regs,
  assert (curr_func_item->item_type == MIR_func_item);
  func = curr_func_item->u.func;
  for (i = saved_hard_regs_num = 0; i <= MAX_HARD_REG; i++)
-    if (!call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) saved_hard_regs_num++;
+    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) saved_hard_regs_num++;
  if (leaf_p && !alloca_p && saved_hard_regs_num == 0 && !func->vararg_p && stack_slots_num == 0)
    return;
  sp_reg_op.mode = fp_reg_op.mode = MIR_OP_HARD_REG;
  sp_reg_op.u.hard_reg = SP_HARD_REG;
-  fp_reg_op.u.hard_reg = BP_HARD_REG;
+  fp_reg_op.u.hard_reg = FP_HARD_REG;
  /* Prologue: */
  anchor = DLIST_HEAD (MIR_insn_t, func->insns);
  new_insn
@ -694,12 +727,12 @@ static void make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_regs,
  bp_saved_reg_offset = block_size + (func->vararg_p ? reg_save_area_size : 0);
  /* Saving callee saved hard registers: */
  for (i = n = 0; i <= MAX_HARD_REG; i++)
-    if (!call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
      assert (i <= R15_HARD_REG); /* xmm regs are always callee-clobbered */
      new_insn = MIR_new_insn (ctx, MIR_MOV,
                               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64,
                                                         (int64_t) (n++ * 8) - bp_saved_reg_offset,
-                                                         BP_HARD_REG, MIR_NON_HARD_REG, 1),
+                                                         FP_HARD_REG, MIR_NON_HARD_REG, 1),
                               _MIR_new_hard_reg_op (ctx, i));
      gen_add_insn_before (ctx, anchor, new_insn); /* disp(sp) = saved hard reg */
    }
@ -707,11 +740,11 @@ static void make_prolog_epilog (MIR_context_t ctx, bitmap_t used_hard_regs,
  anchor = DLIST_TAIL (MIR_insn_t, func->insns);
  /* Restoring hard registers: */
  for (i = n = 0; i <= MAX_HARD_REG; i++)
-    if (!call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
      new_insn = MIR_new_insn (ctx, MIR_MOV, _MIR_new_hard_reg_op (ctx, i),
                               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64,
                                                         (int64_t) (n++ * 8) - bp_saved_reg_offset,
-                                                         BP_HARD_REG, MIR_NON_HARD_REG, 1));
+                                                         FP_HARD_REG, MIR_NON_HARD_REG, 1));
      gen_add_insn_before (ctx, anchor, new_insn); /* hard reg = disp(sp) */
    }
  new_insn = MIR_new_insn (ctx, MIR_ADD, sp_reg_op, fp_reg_op, MIR_new_int_op (ctx, 8));
@ -743,7 +776,7 @@ struct pattern {
     l - label which can be present by 32-bit
     [0-9] - an operand matching n-th operand (n should be less than given operand number)

-     Remmeber we have no float or (long) double immediate at this stage. They are represented by
+     Remember we have no float or (long) double immediate at this stage. They are represented by
     a reference to data item.  */
  const char *pattern;
  /* Replacement elements:
@ -1092,7 +1125,7 @@ static const struct pattern patterns[] = {
  {MIR_RET, "$", "C3"}, /* ret ax, dx, xmm0, xmm1, st0, st1  */
 };

-static void get_early_clobbered_hard_reg (MIR_insn_t insn, MIR_reg_t *hr1, MIR_reg_t *hr2) {
+static void target_get_early_clobbered_hard_regs (MIR_insn_t insn, MIR_reg_t *hr1, MIR_reg_t *hr2) {
  MIR_insn_code_t code = insn->code;

  *hr1 = *hr2 = MIR_NON_HARD_REG;
@ -1807,7 +1840,7 @@ static uint8_t MIR_UNUSED get_short_jump_opcode (uint8_t *long_jump_opcode) {
  return long_jump_opcode[1] - 0x10;
 }

-static int insn_ok_p (MIR_context_t ctx, MIR_insn_t insn) {
+static int target_insn_ok_p (MIR_context_t ctx, MIR_insn_t insn) {
  return find_insn_pattern_replacement (ctx, insn) != NULL;
 }

--- a/mir/mir-gen.c
+++ b/mir/mir-gen.c
--- a/mir/mir-gen.h
+++ b/mir/mir-gen.h
@ -8,15 +8,16 @@

 #include "mir.h"

-#ifndef MIR_GEN_DEBUG
-#define MIR_GEN_DEBUG 0
+#ifndef MIR_NO_GEN_DEBUG
+#define MIR_NO_GEN_DEBUG 0
 #endif

-extern void MIR_gen_init (MIR_context_t context);
-extern void MIR_gen_set_debug_file (MIR_context_t context, FILE *f);
-extern void *MIR_gen (MIR_context_t context, MIR_item_t func_item);
-extern void MIR_set_gen_interface (MIR_context_t context, MIR_item_t func_item);
-extern void MIR_set_lazy_gen_interface (MIR_context_t context, MIR_item_t func_item);
-extern void MIR_gen_finish (MIR_context_t context);
+extern void MIR_gen_init (MIR_context_t ctx);
+extern void MIR_gen_set_debug_file (MIR_context_t ctx, FILE *f);
+extern void MIR_gen_set_optimize_level (MIR_context_t ctx, unsigned int level);
+extern void *MIR_gen (MIR_context_t ctx, MIR_item_t func_item);
+extern void MIR_set_gen_interface (MIR_context_t ctx, MIR_item_t func_item);
+extern void MIR_set_lazy_gen_interface (MIR_context_t ctx, MIR_item_t func_item);
+extern void MIR_gen_finish (MIR_context_t ctx);

 #endif /* #ifndef MIR_GEN_H */
--- a/mir/mir-htab.h
+++ b/mir/mir-htab.h
@ -72,149 +72,157 @@ DEF_VARR (htab_ind_t)
  DEF_VARR (HTAB_EL (T))                                   \
  typedef struct {                                         \
    htab_size_t els_num, els_start, els_bound, collisions; \
-    htab_hash_t (*hash_func) (T el);                       \
-    int (*eq_func) (T el1, T el2);                         \
-    void (*free_func) (T el);                              \
+    void *arg;                                             \
+    htab_hash_t (*hash_func) (T el, void *arg);            \
+    int (*eq_func) (T el1, T el2, void *arg);              \
+    void (*free_func) (T el, void *arg);                   \
    VARR (HTAB_EL (T)) * els;                              \
    VARR (htab_ind_t) * entries;                           \
  } HTAB (T);

-#define DEF_HTAB(T)                                                                               \
-  HTAB_T (T)                                                                                      \
-                                                                                                  \
-  static inline void HTAB_OP_DEF (T, create) (HTAB (T) * *htab, htab_size_t min_size,             \
-                                              htab_hash_t (*hash_func) (T el),                    \
-                                              int (*eq_func) (T el1, T el2),                      \
-                                              void (*free_func) (T el)) {                         \
-    HTAB (T) * ht;                                                                                \
-    htab_size_t i, size;                                                                          \
-                                                                                                  \
-    for (size = 2; min_size > size; size *= 2)                                                    \
-      ;                                                                                           \
-    ht = malloc (sizeof (*ht));                                                                   \
-    if (ht == NULL) mir_htab_error ("htab: no memory");                                           \
-    VARR_CREATE (HTAB_EL (T), ht->els, size);                                                     \
-    VARR_TAILOR (HTAB_EL (T), ht->els, size);                                                     \
-    VARR_CREATE (htab_ind_t, ht->entries, 2 * size);                                              \
-    ht->hash_func = hash_func;                                                                    \
-    ht->eq_func = eq_func;                                                                        \
-    ht->free_func = free_func;                                                                    \
-    ht->els_num = ht->els_start = ht->els_bound = ht->collisions = 0;                             \
-    for (i = 0; i < 2 * size; i++) VARR_PUSH (htab_ind_t, ht->entries, HTAB_EMPTY_IND);           \
-    *htab = ht;                                                                                   \
-  }                                                                                               \
-                                                                                                  \
-  static inline void HTAB_OP_DEF (T, clear) (HTAB (T) * htab) {                                   \
-    htab_ind_t *addr;                                                                             \
-    htab_size_t i, size;                                                                          \
-    HTAB_EL (T) * els_addr;                                                                       \
-                                                                                                  \
-    HTAB_ASSERT (htab != NULL, "clear", T);                                                       \
-    if (htab->free_func != NULL) {                                                                \
-      els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                              \
-      size = VARR_LENGTH (HTAB_EL (T), htab->els);                                                \
-      for (i = 0; i < htab->els_bound; i++)                                                       \
-        if (els_addr[i].hash != HTAB_DELETED_HASH) htab->free_func (els_addr[i].el);              \
-    }                                                                                             \
-    htab->els_num = htab->els_start = htab->els_bound = 0;                                        \
-    addr = VARR_ADDR (htab_ind_t, htab->entries);                                                 \
-    size = VARR_LENGTH (htab_ind_t, htab->entries);                                               \
-    for (i = 0; i < size; i++) addr[i] = HTAB_EMPTY_IND;                                          \
-  }                                                                                               \
-                                                                                                  \
-  static inline void HTAB_OP_DEF (T, destroy) (HTAB (T) * *htab) {                                \
-    HTAB_ASSERT (*htab != NULL, "destroy", T);                                                    \
-    if ((*htab)->free_func != NULL) HTAB_OP (T, clear) (*htab);                                   \
-    VARR_DESTROY (HTAB_EL (T), (*htab)->els);                                                     \
-    VARR_DESTROY (htab_ind_t, (*htab)->entries);                                                  \
-    free (*htab);                                                                                 \
-    *htab = NULL;                                                                                 \
-  }                                                                                               \
-                                                                                                  \
-  static inline int HTAB_OP_DEF (T, do) (HTAB (T) * htab, T el, enum htab_action action,          \
-                                         T * res) {                                               \
-    htab_ind_t ind, el_ind, *entry, *first_deleted_entry = NULL;                                  \
-    htab_hash_t hash, peterb;                                                                     \
-    htab_size_t els_size, size, mask, start, bound, i;                                            \
-    htab_ind_t *addr;                                                                             \
-    HTAB_EL (T) * els_addr;                                                                       \
-                                                                                                  \
-    HTAB_ASSERT (htab != NULL, "do htab", T);                                                     \
-    size = VARR_LENGTH (htab_ind_t, htab->entries);                                               \
-    els_size = VARR_LENGTH (HTAB_EL (T), htab->els);                                              \
-    HTAB_ASSERT (els_size * 2 == size, "do size", T);                                             \
-    if ((action == HTAB_INSERT || action == HTAB_REPLACE) && htab->els_bound == els_size) {       \
-      size *= 2;                                                                                  \
-      VARR_TAILOR (htab_ind_t, htab->entries, size);                                              \
-      addr = VARR_ADDR (htab_ind_t, htab->entries);                                               \
-      for (i = 0; i < size; i++) addr[i] = HTAB_EMPTY_IND;                                        \
-      VARR_TAILOR (HTAB_EL (T), htab->els, els_size * 2);                                         \
-      els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                              \
-      start = htab->els_start;                                                                    \
-      bound = htab->els_bound;                                                                    \
-      htab->els_start = htab->els_bound = htab->els_num = 0;                                      \
-      for (i = start; i < bound; i++)                                                             \
-        if (els_addr[i].hash != HTAB_DELETED_HASH) {                                              \
-          HTAB_OP (T, do) (htab, els_addr[i].el, HTAB_INSERT, res);                               \
-          HTAB_ASSERT ((*htab->eq_func) (*res, els_addr[i].el), "do expand", T);                  \
-        }                                                                                         \
-      HTAB_ASSERT (bound - start >= htab->els_bound, "do bound", T);                              \
-    }                                                                                             \
-    mask = size - 1;                                                                              \
-    hash = (*htab->hash_func) (el);                                                               \
-    if (hash == HTAB_DELETED_HASH) hash += 1;                                                     \
-    peterb = hash;                                                                                \
-    ind = hash & mask;                                                                            \
-    addr = VARR_ADDR (htab_ind_t, htab->entries);                                                 \
-    els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                                \
-    for (;; htab->collisions++) {                                                                 \
-      entry = addr + ind;                                                                         \
-      el_ind = *entry;                                                                            \
-      if (el_ind != HTAB_EMPTY_IND) {                                                             \
-        if (el_ind == HTAB_DELETED_IND) {                                                         \
-          first_deleted_entry = entry;                                                            \
-        } else if (els_addr[el_ind].hash == hash && (*htab->eq_func) (els_addr[el_ind].el, el)) { \
-          if (action == HTAB_REPLACE) {                                                           \
-            if (htab->free_func != NULL) htab->free_func (els_addr[el_ind].el);                   \
-            els_addr[el_ind].el = el;                                                             \
-          }                                                                                       \
-          if (action != HTAB_DELETE) {                                                            \
-            *res = els_addr[el_ind].el;                                                           \
-          } else {                                                                                \
-            htab->els_num--;                                                                      \
-            *entry = HTAB_DELETED_IND;                                                            \
-            if (htab->free_func != NULL) htab->free_func (els_addr[el_ind].el);                   \
-            els_addr[el_ind].hash = HTAB_DELETED_HASH;                                            \
-          }                                                                                       \
-          return TRUE;                                                                            \
-        }                                                                                         \
-      } else {                                                                                    \
-        if (action == HTAB_INSERT || action == HTAB_REPLACE) {                                    \
-          htab->els_num++;                                                                        \
-          if (first_deleted_entry != NULL) entry = first_deleted_entry;                           \
-          els_addr[htab->els_bound].hash = hash;                                                  \
-          els_addr[htab->els_bound].el = el;                                                      \
-          *entry = htab->els_bound++;                                                             \
-          *res = el;                                                                              \
-        }                                                                                         \
-        return FALSE;                                                                             \
-      }                                                                                           \
-      peterb >>= 11;                                                                              \
-      ind = (5 * ind + peterb + 1) & mask;                                                        \
-    }                                                                                             \
-  }                                                                                               \
-                                                                                                  \
-  static inline htab_size_t HTAB_OP_DEF (T, els_num) (HTAB (T) * htab) {                          \
-    HTAB_ASSERT (htab != NULL, "els_num", T);                                                     \
-    return htab->els_num;                                                                         \
-  }                                                                                               \
-  static inline htab_size_t HTAB_OP_DEF (T, collisions) (HTAB (T) * htab) {                       \
-    HTAB_ASSERT (htab != NULL, "collisions", T);                                                  \
-    return htab->collisions;                                                                      \
+#define DEF_HTAB(T)                                                                             \
+  HTAB_T (T)                                                                                    \
+                                                                                                \
+  static inline void HTAB_OP_DEF (T, create) (HTAB (T) * *htab, htab_size_t min_size,           \
+                                              htab_hash_t (*hash_func) (T el, void *arg),       \
+                                              int (*eq_func) (T el1, T el2, void *arg),         \
+                                              void (*free_func) (T el, void *arg), void *arg) { \
+    HTAB (T) * ht;                                                                              \
+    htab_size_t i, size;                                                                        \
+                                                                                                \
+    for (size = 2; min_size > size; size *= 2)                                                  \
+      ;                                                                                         \
+    ht = malloc (sizeof (*ht));                                                                 \
+    if (ht == NULL) mir_htab_error ("htab: no memory");                                         \
+    VARR_CREATE (HTAB_EL (T), ht->els, size);                                                   \
+    VARR_TAILOR (HTAB_EL (T), ht->els, size);                                                   \
+    VARR_CREATE (htab_ind_t, ht->entries, 2 * size);                                            \
+    ht->arg = arg;                                                                              \
+    ht->hash_func = hash_func;                                                                  \
+    ht->eq_func = eq_func;                                                                      \
+    ht->free_func = free_func;                                                                  \
+    ht->els_num = ht->els_start = ht->els_bound = ht->collisions = 0;                           \
+    for (i = 0; i < 2 * size; i++) VARR_PUSH (htab_ind_t, ht->entries, HTAB_EMPTY_IND);         \
+    *htab = ht;                                                                                 \
+  }                                                                                             \
+                                                                                                \
+  static inline void HTAB_OP_DEF (T, clear) (HTAB (T) * htab) {                                 \
+    htab_ind_t *addr;                                                                           \
+    htab_size_t i, size;                                                                        \
+    HTAB_EL (T) * els_addr;                                                                     \
+    void *arg;                                                                                  \
+                                                                                                \
+    HTAB_ASSERT (htab != NULL, "clear", T);                                                     \
+    arg = htab->arg;                                                                            \
+    if (htab->free_func != NULL) {                                                              \
+      els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                            \
+      size = VARR_LENGTH (HTAB_EL (T), htab->els);                                              \
+      for (i = 0; i < htab->els_bound; i++)                                                     \
+        if (els_addr[i].hash != HTAB_DELETED_HASH) htab->free_func (els_addr[i].el, arg);       \
+    }                                                                                           \
+    htab->els_num = htab->els_start = htab->els_bound = 0;                                      \
+    addr = VARR_ADDR (htab_ind_t, htab->entries);                                               \
+    size = VARR_LENGTH (htab_ind_t, htab->entries);                                             \
+    for (i = 0; i < size; i++) addr[i] = HTAB_EMPTY_IND;                                        \
+  }                                                                                             \
+                                                                                                \
+  static inline void HTAB_OP_DEF (T, destroy) (HTAB (T) * *htab) {                              \
+    HTAB_ASSERT (*htab != NULL, "destroy", T);                                                  \
+    if ((*htab)->free_func != NULL) HTAB_OP (T, clear) (*htab);                                 \
+    VARR_DESTROY (HTAB_EL (T), (*htab)->els);                                                   \
+    VARR_DESTROY (htab_ind_t, (*htab)->entries);                                                \
+    free (*htab);                                                                               \
+    *htab = NULL;                                                                               \
+  }                                                                                             \
+                                                                                                \
+  static inline int HTAB_OP_DEF (T, do) (HTAB (T) * htab, T el, enum htab_action action,        \
+                                         T * res) {                                             \
+    htab_ind_t ind, el_ind, *entry, *first_deleted_entry = NULL;                                \
+    htab_hash_t hash, peterb;                                                                   \
+    htab_size_t els_size, size, mask, start, bound, i;                                          \
+    htab_ind_t *addr;                                                                           \
+    HTAB_EL (T) * els_addr;                                                                     \
+    void *arg;                                                                                  \
+                                                                                                \
+    HTAB_ASSERT (htab != NULL, "do htab", T);                                                   \
+    size = VARR_LENGTH (htab_ind_t, htab->entries);                                             \
+    els_size = VARR_LENGTH (HTAB_EL (T), htab->els);                                            \
+    arg = htab->arg;                                                                            \
+    HTAB_ASSERT (els_size * 2 == size, "do size", T);                                           \
+    if ((action == HTAB_INSERT || action == HTAB_REPLACE) && htab->els_bound == els_size) {     \
+      size *= 2;                                                                                \
+      VARR_TAILOR (htab_ind_t, htab->entries, size);                                            \
+      addr = VARR_ADDR (htab_ind_t, htab->entries);                                             \
+      for (i = 0; i < size; i++) addr[i] = HTAB_EMPTY_IND;                                      \
+      VARR_TAILOR (HTAB_EL (T), htab->els, els_size * 2);                                       \
+      els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                            \
+      start = htab->els_start;                                                                  \
+      bound = htab->els_bound;                                                                  \
+      htab->els_start = htab->els_bound = htab->els_num = 0;                                    \
+      for (i = start; i < bound; i++)                                                           \
+        if (els_addr[i].hash != HTAB_DELETED_HASH) {                                            \
+          HTAB_OP (T, do) (htab, els_addr[i].el, HTAB_INSERT, res);                             \
+          HTAB_ASSERT ((*htab->eq_func) (*res, els_addr[i].el, arg), "do expand", T);           \
+        }                                                                                       \
+      HTAB_ASSERT (bound - start >= htab->els_bound, "do bound", T);                            \
+    }                                                                                           \
+    mask = size - 1;                                                                            \
+    hash = (*htab->hash_func) (el, arg);                                                        \
+    if (hash == HTAB_DELETED_HASH) hash += 1;                                                   \
+    peterb = hash;                                                                              \
+    ind = hash & mask;                                                                          \
+    addr = VARR_ADDR (htab_ind_t, htab->entries);                                               \
+    els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                              \
+    for (;; htab->collisions++) {                                                               \
+      entry = addr + ind;                                                                       \
+      el_ind = *entry;                                                                          \
+      if (el_ind != HTAB_EMPTY_IND) {                                                           \
+        if (el_ind == HTAB_DELETED_IND) {                                                       \
+          first_deleted_entry = entry;                                                          \
+        } else if (els_addr[el_ind].hash == hash                                                \
+                   && (*htab->eq_func) (els_addr[el_ind].el, el, arg)) {                        \
+          if (action == HTAB_REPLACE) {                                                         \
+            if (htab->free_func != NULL) htab->free_func (els_addr[el_ind].el, arg);            \
+            els_addr[el_ind].el = el;                                                           \
+          }                                                                                     \
+          if (action != HTAB_DELETE) {                                                          \
+            *res = els_addr[el_ind].el;                                                         \
+          } else {                                                                              \
+            htab->els_num--;                                                                    \
+            *entry = HTAB_DELETED_IND;                                                          \
+            if (htab->free_func != NULL) htab->free_func (els_addr[el_ind].el, arg);            \
+            els_addr[el_ind].hash = HTAB_DELETED_HASH;                                          \
+          }                                                                                     \
+          return TRUE;                                                                          \
+        }                                                                                       \
+      } else {                                                                                  \
+        if (action == HTAB_INSERT || action == HTAB_REPLACE) {                                  \
+          htab->els_num++;                                                                      \
+          if (first_deleted_entry != NULL) entry = first_deleted_entry;                         \
+          els_addr[htab->els_bound].hash = hash;                                                \
+          els_addr[htab->els_bound].el = el;                                                    \
+          *entry = htab->els_bound++;                                                           \
+          *res = el;                                                                            \
+        }                                                                                       \
+        return FALSE;                                                                           \
+      }                                                                                         \
+      peterb >>= 11;                                                                            \
+      ind = (5 * ind + peterb + 1) & mask;                                                      \
+    }                                                                                           \
+  }                                                                                             \
+                                                                                                \
+  static inline htab_size_t HTAB_OP_DEF (T, els_num) (HTAB (T) * htab) {                        \
+    HTAB_ASSERT (htab != NULL, "els_num", T);                                                   \
+    return htab->els_num;                                                                       \
+  }                                                                                             \
+  static inline htab_size_t HTAB_OP_DEF (T, collisions) (HTAB (T) * htab) {                     \
+    HTAB_ASSERT (htab != NULL, "collisions", T);                                                \
+    return htab->collisions;                                                                    \
  }

-#define HTAB_CREATE(T, V, S, H, EQ) (HTAB_OP (T, create) (&(V), S, H, EQ, NULL))
-#define HTAB_CREATE_WITH_FREE_FUNC(T, V, S, H, EQ, F) (HTAB_OP (T, create) (&(V), S, H, EQ, F))
+#define HTAB_CREATE(T, V, S, H, EQ, A) (HTAB_OP (T, create) (&(V), S, H, EQ, NULL, A))
+#define HTAB_CREATE_WITH_FREE_FUNC(T, V, S, H, EQ, F, A) \
+  (HTAB_OP (T, create) (&(V), S, H, EQ, F, A))
 #define HTAB_CLEAR(T, V) (HTAB_OP (T, clear) (V))
 #define HTAB_DESTROY(T, V) (HTAB_OP (T, destroy) (&(V)))
 /* It returns TRUE if the element existed in the table.  */
--- a/mir/mir-interp.c
+++ b/mir/mir-interp.c
@ -51,6 +51,7 @@ DEF_VARR (MIR_val_t);

 struct ff_interface {
  size_t nres, nargs;
+  int vararg_p;
  MIR_type_t *res_types, *arg_types;
  void *interface_addr;
 };
@ -731,7 +732,7 @@ static ALWAYS_INLINE int64_t get_mem_addr (MIR_val_t *bp, code_t c) { return bp[
    *((mem_type *) a) = v;                  \
  } while (0)

-#if defined(__GNUC__) && !defined(__clang__)
+#if !MIR_INTERP_TRACE && defined(__GNUC__) && !defined(__clang__)
 #define OPTIMIZE \
  __attribute__ ((__optimize__ ("O2"))) __attribute__ ((__optimize__ ("-fno-ipa-cp-clone")))
 #else
@ -806,7 +807,7 @@ static void finish_insn_trace (MIR_context_t ctx, MIR_full_insn_code_t code, cod
  case MIR_OP_LDOUBLE:
    fprintf (stderr, "\t# res = %.*Le", LDBL_DECIMAL_DIG, bp[ops[0].i].ld);
    break;
-  default: assert (FALSE);
+  default: assert (op_mode == MIR_OP_UNDEF);
  }
  fprintf (stderr, "\n");
 }
@ -1336,23 +1337,25 @@ static inline func_desc_t get_func_desc (MIR_item_t func_item) {
  return func_item->data;
 }

-static htab_hash_t ff_interface_hash (ff_interface_t i) {
-  return mir_hash_finish (
-    mir_hash_step (mir_hash_step (mir_hash_step (mir_hash_init (0), i->nres), i->nargs),
-                   mir_hash (i->res_types, sizeof (MIR_type_t) * i->nres,
-                             mir_hash (i->arg_types, sizeof (MIR_type_t) * i->nargs, 42))));
+static htab_hash_t ff_interface_hash (ff_interface_t i, void *arg) {
+  htab_hash_t h = mir_hash_step (mir_hash_init (0), i->nres);
+  h = mir_hash_step (h, i->nargs);
+  h = mir_hash_step (h, i->vararg_p);
+  h = mir_hash (i->res_types, sizeof (MIR_type_t) * i->nres, h);
+  h = mir_hash (i->arg_types, sizeof (MIR_type_t) * i->nargs, h);
+  return mir_hash_finish (h);
 }

-static int ff_interface_eq (ff_interface_t i1, ff_interface_t i2) {
-  return (i1->nres == i2->nres && i1->nargs == i2->nargs
+static int ff_interface_eq (ff_interface_t i1, ff_interface_t i2, void *arg) {
+  return (i1->nres == i2->nres && i1->nargs == i2->nargs && i1->vararg_p == i2->vararg_p
          && memcmp (i1->res_types, i2->res_types, sizeof (MIR_type_t) * i1->nres) == 0
          && memcmp (i1->arg_types, i2->arg_types, sizeof (MIR_type_t) * i1->nargs) == 0);
 }

-static void ff_interface_clear (ff_interface_t ffi) { free (ffi); }
+static void ff_interface_clear (ff_interface_t ffi, void *arg) { free (ffi); }

 static void *get_ff_interface (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                               MIR_type_t *arg_types) {
+                               MIR_type_t *arg_types, int vararg_p) {
  struct interp_ctx *interp_ctx = ctx->interp_ctx;
  struct ff_interface ffi_s;
  ff_interface_t tab_ffi, ffi;
@ -1360,6 +1363,7 @@ static void *get_ff_interface (MIR_context_t ctx, size_t nres, MIR_type_t *res_t

  ffi_s.nres = nres;
  ffi_s.nargs = nargs;
+  ffi_s.vararg_p = !!vararg_p;
  ffi_s.res_types = res_types;
  ffi_s.arg_types = arg_types;
  if (HTAB_DO (ff_interface_t, ff_interface_tab, &ffi_s, HTAB_FIND, tab_ffi))
@ -1367,11 +1371,12 @@ static void *get_ff_interface (MIR_context_t ctx, size_t nres, MIR_type_t *res_t
  ffi = malloc (sizeof (struct ff_interface) + sizeof (MIR_type_t) * (nres + nargs));
  ffi->nres = nres;
  ffi->nargs = nargs;
+  ffi->vararg_p = !!vararg_p;
  ffi->res_types = (MIR_type_t *) ((char *) ffi + sizeof (struct ff_interface));
  ffi->arg_types = ffi->res_types + nres;
  memcpy (ffi->res_types, res_types, sizeof (MIR_type_t) * nres);
  memcpy (ffi->arg_types, arg_types, sizeof (MIR_type_t) * nargs);
-  ffi->interface_addr = _MIR_get_ff_call (ctx, nres, res_types, nargs, call_arg_types);
+  ffi->interface_addr = _MIR_get_ff_call (ctx, nres, res_types, nargs, call_arg_types, vararg_p);
  htab_res = HTAB_DO (ff_interface_t, ff_interface_tab, ffi, HTAB_INSERT, tab_ffi);
  mir_assert (!htab_res && ffi == tab_ffi);
  return ffi->interface_addr;
@ -1419,7 +1424,7 @@ static void call (MIR_context_t ctx, MIR_val_t *bp, MIR_op_t *insn_arg_ops, code
        = (mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64);
    }
    ff_interface_addr = ffi_address_ptr->a
-      = get_ff_interface (ctx, nres, proto->res_types, nargs, call_arg_types);
+      = get_ff_interface (ctx, nres, proto->res_types, nargs, call_arg_types, proto->vararg_p);
  }

  for (i = 0; i < nargs; i++) {
@ -1482,7 +1487,7 @@ static void interp_init (MIR_context_t ctx) {
  call_res_args = VARR_ADDR (MIR_val_t, call_res_args_varr);
  call_arg_types = VARR_ADDR (MIR_type_t, call_arg_types_varr);
  HTAB_CREATE_WITH_FREE_FUNC (ff_interface_t, ff_interface_tab, 1000, ff_interface_hash,
-                              ff_interface_eq, ff_interface_clear);
+                              ff_interface_eq, ff_interface_clear, NULL);
 #if MIR_INTERP_TRACE
  trace_insn_ident = 0;
 #endif
@ -1504,8 +1509,14 @@ static void interp_finish (MIR_context_t ctx) {
  ctx->interp_ctx = NULL;
 }

+#if VA_LIST_IS_ARRAY_P
+typedef va_list va_t;
+#else
+    typedef va_list *va_t;
+#endif
+
 static void interp_arr_varg (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *results,
-                             size_t nargs, MIR_val_t *vals, va_list va) {
+                             size_t nargs, MIR_val_t *vals, va_t va) {
  func_desc_t func_desc;
  MIR_val_t *bp;

@ -1519,7 +1530,12 @@ static void interp_arr_varg (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t
  bp[0].i = 0;
  memcpy (&bp[1], vals, sizeof (MIR_val_t) * nargs);
  eval (ctx, func_desc, bp, results);
-  if (va != NULL) va_end (va);
+  if (va != NULL)
+#if VA_LIST_IS_ARRAY_P
+    va_end (va);
+#else
+        va_end (*va);
+#endif
 }

 void MIR_interp (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs, ...) {
@ -1531,7 +1547,11 @@ void MIR_interp (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *results, si
    arg_vals = VARR_ADDR (MIR_val_t, arg_vals_varr);
  va_start (argp, nargs);
  for (i = 0; i < nargs; i++) arg_vals[i] = va_arg (argp, MIR_val_t);
+#if VA_LIST_IS_ARRAY_P
  interp_arr_varg (ctx, func_item, results, nargs, arg_vals, argp);
+#else
+      interp_arr_varg (ctx, func_item, results, nargs, arg_vals, (va_t) &argp);
+#endif
 }

 void MIR_interp_arr_varg (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs,
@ -1543,7 +1563,11 @@ void MIR_interp_arr_varg (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *re
  if (func_item->data == NULL) generate_icode (ctx, func_item);
  func_desc = get_func_desc (func_item);
  bp = alloca ((func_desc->nregs + 1) * sizeof (MIR_val_t));
+#if VA_LIST_IS_ARRAY_P
  bp[0].a = va;
+#else
+      bp[0].a = &va;
+#endif
  bp++;
  if (func_desc->nregs < nargs + 1) nargs = func_desc->nregs - 1;
  bp[0].i = 0;
@ -1586,7 +1610,11 @@ static void interp (MIR_context_t ctx, MIR_item_t func_item, va_list va, MIR_val
        float f;
      } u;
      u.d = va_arg (va, double);
-      arg_vals[i].f = u.f;
+#if defined(__PPC64__)
+      arg_vals[i].f = u.d;
+#else
+          arg_vals[i].f = u.f;
+#endif
      break;
    }
    case MIR_T_D: arg_vals[i].d = va_arg (va, double); break;
@ -1595,7 +1623,11 @@ static void interp (MIR_context_t ctx, MIR_item_t func_item, va_list va, MIR_val
    default: mir_assert (FALSE);
    }
  }
+#if VA_LIST_IS_ARRAY_P
  interp_arr_varg (ctx, func_item, results, nargs, arg_vals, va);
+#else
+      interp_arr_varg (ctx, func_item, results, nargs, arg_vals, (va_t) &va);
+#endif
 }

 static void redirect_interface_to_interp (MIR_context_t ctx, MIR_item_t func_item) {
--- a/mir/mir-ppc64.c
+++ b/mir/mir-ppc64.c
@ -0,0 +1,467 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+// _MIR_get_thunk, _MIR_redirect_thunk, _MIR_get_interp_shim, _MIR_get_ff_call, _MIR_get_wrapper
+#define VA_LIST_IS_ARRAY_P 1 /* one element which is a pointer to args */
+
+#define FUNC_DESC_LEN 24
+static void ppc64_push_func_desc (MIR_context_t ctx);
+void (*ppc64_func_desc) (MIR_context_t ctx) = ppc64_push_func_desc;
+
+static void ppc64_push_func_desc (MIR_context_t ctx) {
+  VARR_TRUNC (uint8_t, machine_insns, 0);
+  for (int i = 0; i < FUNC_DESC_LEN; i++)
+    VARR_PUSH (uint8_t, machine_insns, ((uint8_t *) ppc64_func_desc)[i]);
+}
+
+static void ppc64_redirect_func_desc (MIR_context_t ctx, void *desc, void *to) {
+  mir_assert (((uint64_t) desc & 0x3) == 0 && ((uint64_t) to & 0x3) == 0); /* alignment */
+  _MIR_change_code (ctx, desc, (uint8_t *) &to, sizeof (to));
+}
+
+static void *ppc64_publish_func_and_redirect (MIR_context_t ctx) {
+  void *res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
+                                 VARR_LENGTH (uint8_t, machine_insns));
+  ppc64_redirect_func_desc (ctx, res, (uint8_t *) res + FUNC_DESC_LEN);
+  return res;
+}
+
+static void push_insn (MIR_context_t ctx, uint32_t insn) {
+  uint8_t *p = (uint8_t *) &insn;
+  for (size_t i = 0; i < 4; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
+}
+
+static void push_insns (MIR_context_t ctx, const uint32_t *pat, size_t pat_len) {
+  uint8_t *p = (uint8_t *) pat;
+  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
+}
+
+void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
+  static const uint32_t bstart_code[] = {
+    0x7c230b78, /* mr 3,1 */
+    0x4e800020, /* blr */
+  };
+  ppc64_push_func_desc (ctx);
+  push_insns (ctx, bstart_code, sizeof (bstart_code));
+  return ppc64_publish_func_and_redirect (ctx);
+}
+
+void *_MIR_get_bend_builtin (MIR_context_t ctx) {
+  static const uint32_t bend_code[] = {
+    0xe8010000, /* ld      r0,0(r1) */
+    0xf8030000, /* std     r0,0(r3) */
+    0xe8010028, /* ld      r0,40(r1) */
+    0xf8030028, /* std     r0,40(r3) */
+    0x7c611b78, /* mr      r1,r3 */
+    0x4e800020, /* blr */
+  };
+  ppc64_push_func_desc (ctx);
+  push_insns (ctx, bend_code, sizeof (bend_code));
+  return ppc64_publish_func_and_redirect (ctx);
+}
+
+void *_MIR_get_thunk (MIR_context_t ctx) { /* emit 3 doublewords for func descriptor: */
+  ppc64_push_func_desc (ctx);
+  return ppc64_publish_func_and_redirect (ctx);
+}
+
+void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
+  ppc64_redirect_func_desc (ctx, thunk, to);
+}
+
+struct ppc64_va_list {
+  uint64_t *arg_area;
+};
+
+void *va_arg_builtin (void *p, uint64_t t) {
+  struct ppc64_va_list *va = p;
+  MIR_type_t type = t;
+  int fp_p = type == MIR_T_F || type == MIR_T_D;
+  void *a = va->arg_area;
+
+  if (type == MIR_T_F || type == MIR_T_I32) {
+    a = (char *) a + 4; /* 2nd word of doubleword */
+    va->arg_area = (uint64_t *) ((char *) a + 4);
+  } else if (type == MIR_T_LD) {
+    va->arg_area += 2;
+  } else {
+    va->arg_area++;
+  }
+  return a;
+}
+
+void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
+  struct ppc64_va_list **va = p;
+  va_list *vap = a;
+
+  assert (sizeof (struct ppc64_va_list) == sizeof (va_list));
+  *va = (struct ppc64_va_list *) vap;
+}
+
+void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
+
+static void ppc64_gen_mov (MIR_context_t ctx, unsigned to, unsigned from) {
+  /* or to,from,from: */
+  push_insn (ctx, (31 << 26) | (444 << 1) | (from << 21) | (to << 16) | (from << 11));
+}
+
+static void ppc64_gen_addi (MIR_context_t ctx, unsigned rt_reg, unsigned ra_reg, int disp) {
+  push_insn (ctx, (14 << 26) | (rt_reg << 21) | (ra_reg << 16) | (disp & 0xffff));
+}
+
+static void ppc64_gen_ld (MIR_context_t ctx, unsigned to, unsigned base, int disp,
+                          MIR_type_t type) {
+  int single_p = type == MIR_T_F;
+  int double_p = type == MIR_T_D || type == MIR_T_LD;
+  /* (ld | lf[sd]) to, disp(base): */
+  assert (base != 0 && base < 32 && to < 32 && (single_p || double_p || (disp & 0x3) == 0));
+  push_insn (ctx, ((single_p ? 48 : double_p ? 50 : 58) << 26) | (to << 21) | (base << 16)
+                    | (disp & 0xffff));
+}
+
+static void ppc64_gen_st (MIR_context_t ctx, unsigned from, unsigned base, int disp,
+                          MIR_type_t type) {
+  int single_p = type == MIR_T_F;
+  int double_p = type == MIR_T_D || type == MIR_T_LD;
+  /* std|stf[sd] from, disp(base): */
+  assert (base != 0 && base < 32 && from < 32 && (single_p || double_p || (disp & 0x3) == 0));
+  push_insn (ctx, ((single_p ? 52 : double_p ? 54 : 62) << 26) | (from << 21) | (base << 16)
+                    | (disp & 0xffff));
+}
+
+static void ppc64_gen_stdu (MIR_context_t ctx, int disp) {
+  assert ((disp & 0x3) == 0);
+  push_insn (ctx, 0xf8210001 | disp & 0xfffc); /* stdu 1, disp (1) */
+}
+
+static void ppc64_gen_address (MIR_context_t ctx, unsigned int reg, void *p) {
+  uint64_t a = (uint64_t) p;
+  if ((a >> 32) == 0) {
+    if (((a >> 31) & 1) == 0) { /* lis r,0,Z2 */
+      push_insn (ctx, (15 << 26) | (reg << 21) | (0 << 16) | (a >> 16) & 0xffff);
+    } else { /* xor r,r,r; oris r,r,Z2 */
+      push_insn (ctx, (31 << 26) | (316 << 1) | (reg << 21) | (reg << 16) | (reg << 11));
+      push_insn (ctx, (25 << 26) | (reg << 21) | (reg << 16) | (a >> 16) & 0xffff);
+    }
+  } else {
+    /* lis r,0,Z0; ori r,r,Z1; rldicr r,r,32,31; oris r,r,Z2; ori r,r,Z3: */
+    push_insn (ctx, (15 << 26) | (reg << 21) | (0 << 16) | (a >> 48));
+    push_insn (ctx, (24 << 26) | (reg << 21) | (reg << 16) | (a >> 32) & 0xffff);
+    push_insn (ctx, (30 << 26) | (reg << 21) | (reg << 16) | 0x07c6);
+    push_insn (ctx, (25 << 26) | (reg << 21) | (reg << 16) | (a >> 16) & 0xffff);
+  }
+  push_insn (ctx, (24 << 26) | (reg << 21) | (reg << 16) | a & 0xffff);
+}
+
+static void ppc64_gen_jump (MIR_context_t ctx, unsigned int reg, int call_p) {
+  ppc64_gen_ld (ctx, 0, reg, 0, MIR_T_I64);                                 /* 0 = func addr */
+  ppc64_gen_ld (ctx, 2, reg, 8, MIR_T_I64);                                 /* r2 = TOC */
+  push_insn (ctx, (31 << 26) | (467 << 1) | (0 << 21) | (9 << 16));         /* mctr 0 */
+  push_insn (ctx, (19 << 26) | (528 << 1) | (20 << 21) | (call_p ? 1 : 0)); /* bcctr[l] */
+}
+
+/* Generation: fun (fun_addr, res_arg_addresses):
+   save lr (r1 + 16); allocate and form minimal stack frame (with necessary param area); save r14;
+   r12=fun_addr (r3); r14 = res_arg_addresses (r4);
+   r0=mem[r14,<args_offset>]; (arg_reg=mem[r0] or r0=mem[r0];mem[r1,r1_offset]=r0) ...
+   if func is vararg: put fp args also in gp regs
+   call *r12;
+   r0=mem[r14,<offset>]; res_reg=mem[r0]; ...
+   restore r14, r1, lr; return. */
+void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
+                        MIR_type_t *arg_types, int vararg_p) {
+  static uint32_t start_pattern[] = {
+    0x7c0802a6, /* mflr r0 */
+    0xf8010010, /* std  r0,16(r1) */
+  };
+  static uint32_t finish_pattern[] = {
+    0xe8010010, /* ld   r0,16(r1) */
+    0x7c0803a6, /* mtlr r0 */
+    0x4e800020, /* blr */
+  };
+  MIR_type_t type;
+  int n_gpregs = 0, n_fpregs = 0, res_reg = 14, frame_size, disp, param_offset, param_size = 0;
+
+  ppc64_push_func_desc (ctx);
+  for (uint32_t i = 0; i < nargs; i++) param_size += arg_types[i] == MIR_T_LD ? 16 : 8;
+  if (param_size < 64) param_size = 64;
+  frame_size = 48 + param_size + 8;         /* +local var to save res_reg */
+  if (frame_size % 8 != 0) frame_size += 8; /* align */
+  ppc64_gen_st (ctx, 2, 1, 40, MIR_T_I64);
+  push_insns (ctx, start_pattern, sizeof (start_pattern));
+  ppc64_gen_stdu (ctx, -frame_size);
+  ppc64_gen_st (ctx, res_reg, 1, 48 + param_size, MIR_T_I64); /* save res_reg */
+  mir_assert (sizeof (long double) == 16);
+  ppc64_gen_mov (ctx, res_reg, 4); /* results & args */
+  ppc64_gen_mov (ctx, 12, 3);      /* func addr */
+  n_gpregs = n_fpregs = 0;
+  param_offset = nres * 16;              /* args start */
+  disp = 48;                             /* param area start */
+  for (uint32_t i = 0; i < nargs; i++) { /* load args: */
+    type = arg_types[i];
+    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 13) {
+      ppc64_gen_ld (ctx, 1 + n_fpregs, res_reg, param_offset, type);
+      if (vararg_p) {
+        if (n_gpregs >= 8) {
+          ppc64_gen_st (ctx, 1 + n_fpregs, 1, disp, MIR_T_D);
+        } else { /* load gp reg to */
+          ppc64_gen_st (ctx, 1 + n_fpregs, 1, -8, MIR_T_D);
+          ppc64_gen_ld (ctx, 3 + n_gpregs, 1, -8, MIR_T_I64);
+        }
+      }
+      n_fpregs++;
+      if (type == MIR_T_LD) {
+        if (n_fpregs < 13) {
+          ppc64_gen_ld (ctx, 1 + n_fpregs, res_reg, param_offset + 8, type);
+          if (vararg_p) {
+            if (n_gpregs + 1 >= 8) {
+              ppc64_gen_st (ctx, 1 + n_fpregs, 1, disp + 8, MIR_T_D);
+            } else { /* load gp reg to */
+              ppc64_gen_st (ctx, 1 + n_fpregs, 1, -8, MIR_T_D);
+              ppc64_gen_ld (ctx, 4 + n_gpregs, 1, -8, MIR_T_I64);
+            }
+          }
+          n_fpregs++;
+        } else {
+          ppc64_gen_ld (ctx, 0, res_reg, param_offset + 8, type);
+          ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+        }
+      }
+    } else if (n_gpregs < 8) {
+      ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, param_offset, MIR_T_I64);
+    } else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
+      ppc64_gen_ld (ctx, 0, res_reg, param_offset, type);
+      ppc64_gen_st (ctx, 0, 1, disp, MIR_T_D);
+      if (type == MIR_T_LD) {
+        ppc64_gen_ld (ctx, 0, res_reg, param_offset + 8, type);
+        ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+      }
+    } else {
+      ppc64_gen_ld (ctx, 0, res_reg, param_offset, MIR_T_I64);
+      ppc64_gen_st (ctx, 0, 1, disp, MIR_T_I64);
+    }
+    disp += type == MIR_T_LD ? 16 : 8;
+    param_offset += 16;
+    n_gpregs += type == MIR_T_LD ? 2 : 1;
+  }
+  ppc64_gen_jump (ctx, 12, TRUE); /* call func_addr */
+  n_gpregs = n_fpregs = 0;
+  disp = 0;
+  for (uint32_t i = 0; i < nres; i++) {
+    type = res_types[i];
+    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 4) {
+      ppc64_gen_st (ctx, n_fpregs + 1, res_reg, disp, type);
+      n_fpregs++;
+      if (type == MIR_T_LD) {
+        if (n_fpregs >= 4)
+          (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
+        ppc64_gen_st (ctx, n_fpregs + 1, res_reg, disp + 8, type);
+        n_fpregs++;
+      }
+    } else if (n_gpregs < 1) {  // just one gp reg
+      ppc64_gen_st (ctx, n_gpregs + 3, res_reg, disp, MIR_T_I64);
+      n_gpregs++;
+    } else {
+      (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
+    }
+    disp += 16;
+  }
+  ppc64_gen_ld (ctx, res_reg, 1, 48 + param_size, MIR_T_I64); /* restore res_reg */
+  ppc64_gen_addi (ctx, 1, 1, frame_size);
+  push_insns (ctx, finish_pattern, sizeof (finish_pattern));
+  return ppc64_publish_func_and_redirect (ctx);
+}
+
+/* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
+                                             va_list va, MIR_val_t *results):
+   Brief: put all C call args to local vars (or if va_arg do nothing); save lr (r1+16), r14;
+          allocate and form minimal shim stack frame (param area = 8 * 8);
+          call handler with args; move results(r14) to return regs; restore lr,r14,r1; return */
+void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
+  MIR_func_t func = func_item->u.func;
+  uint32_t nres = func->nres, nargs = func->nargs;
+  int vararg_p = func->vararg_p;
+  MIR_type_t type, *res_types = func->res_types;
+  MIR_var_t *arg_vars = VARR_ADDR (MIR_var_t, func->vars);
+  int disp, size, frame_size, local_var_size, param_offset, va_reg = 11, caller_r1 = 12,
+                                                            res_reg = 14;
+  int n_gpregs, n_fpregs;
+  static uint32_t start_pattern[] = {
+    0x7c0802a6, /* mflr r0 */
+    0xf8010010, /* std  r0,16(r1) */
+  };
+  static uint32_t finish_pattern[] = {
+    0xe8010010, /* ld   r0,16(r1) */
+    0x7c0803a6, /* mtlr r0 */
+    0x4e800020, /* blr */
+  };
+  static uint32_t save_gp_regs_pattern[] = {
+    0xf8610030, /* std r3,48(r1) */
+    0xf8810038, /* std r4,56(r1) */
+    0xf8a10040, /* std r5,64(r1) */
+    0xf8c10048, /* std r6,72(r1) */
+    0xf8e10050, /* std r7,80(r1) */
+    0xf9010058, /* std r8,88(r1) */
+    0xf9210060, /* std r9,96(r1) */
+    0xf9410068, /* std r10,104(r1) */
+  };
+
+  VARR_TRUNC (uint8_t, machine_insns, 0);
+  frame_size = 112;               /* 6(frame start) + 8(param area) */
+  local_var_size = nres * 16 + 8; /* saved r14, results */
+  if (vararg_p) {
+    push_insns (ctx, save_gp_regs_pattern, sizeof (save_gp_regs_pattern));
+    ppc64_gen_addi (ctx, va_reg, 1, 48);
+  } else {
+    ppc64_gen_mov (ctx, caller_r1, 1); /* caller frame r1 */
+    for (uint32_t i = 0; i < nargs; i++) {
+      type = arg_vars[i].type;
+      local_var_size += type == MIR_T_LD ? 16 : 8;
+    }
+  }
+  frame_size += local_var_size;
+  if (frame_size % 8 != 0) frame_size += 8; /* align */
+  push_insns (ctx, start_pattern, sizeof (start_pattern));
+  ppc64_gen_stdu (ctx, -frame_size);
+  ppc64_gen_st (ctx, res_reg, 1, 48 + 64, MIR_T_I64); /* save res_reg */
+  if (!vararg_p) {                                    /* save args in local vars: */
+    disp = 112 + nres * 16 + 8; /* 48 + 64 + nres * 16 + 8: start of local vars to keep args */
+    ppc64_gen_addi (ctx, va_reg, 1, disp);
+    param_offset = 48;
+    n_gpregs = n_fpregs = 0;
+    for (uint32_t i = 0; i < nargs; i++) {
+      type = arg_vars[i].type;
+      if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 13) {
+        ppc64_gen_st (ctx, n_fpregs + 1, 1, disp, MIR_T_D);
+        n_fpregs++;
+        if (type == MIR_T_LD) {
+          if (n_fpregs < 13) {
+            ppc64_gen_st (ctx, n_fpregs + 1, 1, disp + 8, MIR_T_D);
+            n_fpregs++;
+          } else {
+            ppc64_gen_ld (ctx, 0, caller_r1, param_offset + 8, MIR_T_D);
+            ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+          }
+        }
+      } else if (n_gpregs < 8) {
+        ppc64_gen_st (ctx, n_gpregs + 3, 1, disp, MIR_T_I64);
+      } else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
+        ppc64_gen_ld (ctx, 0, caller_r1, param_offset + (type == MIR_T_F ? 4 : 0), type);
+        ppc64_gen_st (ctx, 0, 1, disp, MIR_T_D);
+        if (type == MIR_T_LD) {
+          ppc64_gen_ld (ctx, 0, caller_r1, param_offset + 8, MIR_T_D);
+          ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+        }
+      } else {
+        ppc64_gen_ld (ctx, 0, caller_r1, param_offset, MIR_T_I64);
+        ppc64_gen_st (ctx, 0, 1, disp, MIR_T_I64);
+      }
+      size = type == MIR_T_LD ? 16 : 8;
+      disp += size;
+      param_offset += size;
+      n_gpregs += type == MIR_T_LD ? 2 : 1;
+    }
+  }
+  ppc64_gen_addi (ctx, res_reg, 1, 64 + 48 + 8);
+  ppc64_gen_address (ctx, 3, ctx);
+  ppc64_gen_address (ctx, 4, func_item);
+  ppc64_gen_mov (ctx, 5, va_reg);
+  ppc64_gen_mov (ctx, 6, res_reg);
+  ppc64_gen_address (ctx, 7, handler);
+  ppc64_gen_jump (ctx, 7, TRUE);
+  disp = n_gpregs = n_fpregs = 0;
+  for (uint32_t i = 0; i < nres; i++) {
+    type = res_types[i];
+    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 4) {
+      ppc64_gen_ld (ctx, n_fpregs + 1, res_reg, disp, type);
+      n_fpregs++;
+      if (type == MIR_T_LD) {
+        if (n_fpregs >= 4)
+          (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
+        ppc64_gen_ld (ctx, n_fpregs + 1, res_reg, disp + 8, type);
+        n_fpregs++;
+      }
+    } else if (n_gpregs < 1) {  // just one gp reg
+      ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, disp, MIR_T_I64);
+      n_gpregs++;
+    } else {
+      (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
+    }
+    disp += 16;
+  }
+  ppc64_gen_ld (ctx, res_reg, 1, 48 + 64, MIR_T_I64); /* restore res_reg */
+  ppc64_gen_addi (ctx, 1, 1, frame_size);
+  push_insns (ctx, finish_pattern, sizeof (finish_pattern));
+  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
+                            VARR_LENGTH (uint8_t, machine_insns));
+}
+
+/* Brief: save lr (r1+16); update r1, save all param regs (r1+112);
+          allocate and form minimal wrapper stack frame (param area = 8*8);
+          r3 = call hook_address (ctx, called_func);
+          restore params regs (r1+112),  r1, lr (r1+16); ctr=r11; b *ctr */
+void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
+  static uint32_t prologue[] = {
+    0x7c0802a6, /* mflr r0 */
+    0xf8010010, /* std  r0,16(r1) */
+    0xf821fee9, /* stdu r1,-280(r1): 6(frame start) + 8(gp args) + 13(fp args) + 8(param area) */
+    0xf8610070, /* std  r3,112(r1) */
+    0xf8810078, /* std  r4,120(r1) */
+    0xf8a10080, /* std  r5,128(r1) */
+    0xf8c10088, /* std  r6,136(r1) */
+    0xf8e10090, /* std  r7,144(r1) */
+    0xf9010098, /* std  r8,152(r1) */
+    0xf92100a0, /* std  r9,160(r1) */
+    0xf94100a8, /* std  r10,168(r1) */
+    0xd82100b0, /* stfd f1,176(r1) */
+    0xd84100b8, /* stfd f2,184(r1) */
+    0xd86100c0, /* stfd f3,192(r1) */
+    0xd88100c8, /* stfd f4,200(r1) */
+    0xd8a100d0, /* stfd f5,208(r1) */
+    0xd8c100d8, /* stfd f6,216(r1) */
+    0xd8e100e0, /* stfd f7,224(r1) */
+    0xd90100e8, /* stfd f8,232(r1) */
+    0xd92100f0, /* stfd f9,240(r1) */
+    0xd94100f8, /* stfd f10,248(r1) */
+    0xd9610100, /* stfd f11,256(r1) */
+    0xd9810108, /* stfd f12,264(r1) */
+    0xd9a10110, /* stfd f13,272(r1) */
+  };
+  static uint32_t epilogue[] = {
+    0xe8610070, /* ld   r3,112(r1) */
+    0xe8810078, /* ld   r4,120(r1) */
+    0xe8a10080, /* ld   r5,128(r1) */
+    0xe8c10088, /* ld   r6,136(r1) */
+    0xe8e10090, /* ld   r7,144(r1) */
+    0xe9010098, /* ld   r8,152(r1) */
+    0xe92100a0, /* ld   r9,160(r1) */
+    0xe94100a8, /* ld   r10,168(r1) */
+    0xc82100b0, /* lfd  f1,176(r1) */
+    0xc84100b8, /* lfd  f2,184(r1) */
+    0xc86100c0, /* lfd  f3,192(r1) */
+    0xc88100c8, /* lfd  f4,200(r1) */
+    0xc8a100d0, /* lfd  f5,208(r1) */
+    0xc8c100d8, /* lfd  f6,216(r1) */
+    0xc8e100e0, /* lfd  f7,224(r1) */
+    0xc90100e8, /* lfd  f8,232(r1) */
+    0xc92100f0, /* lfd  f9,240(r1) */
+    0xc94100f8, /* lfd  f10,248(r1) */
+    0xc9610100, /* lfd  f11,256(r1) */
+    0xc9810108, /* lfd  f12,264(r1) */
+    0xc9a10110, /* lfd  f13,272(r1) */
+    0x38210118, /* addi r1,r1,280 */
+    0xe8010010, /* ld   r0,16(r1) */
+    0x7c0803a6, /* mtlr r0 */
+  };
+
+  VARR_TRUNC (uint8_t, machine_insns, 0);
+  push_insns (ctx, prologue, sizeof (prologue));
+  ppc64_gen_address (ctx, 3, ctx);
+  ppc64_gen_address (ctx, 4, called_func);
+  ppc64_gen_address (ctx, 5, hook_address);
+  ppc64_gen_jump (ctx, 5, TRUE);
+  ppc64_gen_mov (ctx, 11, 3);
+  push_insns (ctx, epilogue, sizeof (epilogue));
+  ppc64_gen_jump (ctx, 11, FALSE);
+}
--- a/mir/mir-x86_64.c
+++ b/mir/mir-x86_64.c
@ -2,6 +2,8 @@
   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

+#define VA_LIST_IS_ARRAY_P 1
+
 void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
  static const uint8_t bstart_code[] = {
    0x48, 0x8d, 0x44, 0x24, 0x08, /* rax = rsp + 8 (lea) */
@ -170,7 +172,7 @@ static void gen_st80 (MIR_context_t ctx, uint32_t src_offset) {
   r10=mem[rbx,<offset>]; res_reg=mem[r10]; ...
   pop rbx; ret. */
 void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                        MIR_type_t *arg_types) {
+                        MIR_type_t *arg_types, int vararg_p) {
  static const uint8_t prolog[] = {
    0x53,                         /* pushq %rbx */
    0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
@ -238,32 +240,33 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
                            VARR_LENGTH (uint8_t, machine_insns));
 }

+/* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
+                                             va_list va, MIR_val_t *results) */
 void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
  static const uint8_t push_rbx[] = {0x53, /*push   %rbx  */};
  static const uint8_t prepare_pat[] = {
    /*  0: */ 0x48, 0x83, 0xec, 0x20,                      /* sub    32,%rsp	     */
    /*  4: */ 0x48, 0x89, 0xe2,                            /* mov    %rsp,%rdx	     */
    /*  7: */ 0xc7, 0x02, 0,    0,    0,    0,             /* movl   0,(%rdx)	     */
-    /*  d: */ 0xc7, 0x42, 0x04, 0x30, 0,    0, 0,          /* movl   48, 4(%rdx)	     */
+    /*  d: */ 0xc7, 0x42, 0x04, 0x30, 0,    0, 0,          /* movl   48, 4(%rdx)     */
    /* 14: */ 0x48, 0x8d, 0x44, 0x24, 0x20,                /* lea    32(%rsp),%rax   */
-    /* 19: */ 0x48, 0x89, 0x42, 0x10,                      /* mov    %rax,16(%rdx)     */
-    /* 1d: */ 0x48, 0x8d, 0x84, 0x24, 0xe0, 0, 0, 0,       /* lea    224(%rsp),%rax   */
+    /* 19: */ 0x48, 0x89, 0x42, 0x10,                      /* mov    %rax,16(%rdx)   */
+    /* 1d: */ 0x48, 0x8d, 0x84, 0x24, 0xe0, 0, 0, 0,       /* lea    224(%rsp),%rax  */
    /* 25: */ 0x48, 0x89, 0x42, 0x08,                      /* mov    %rax,8(%rdx)    */
    /* 29: */ 0x48, 0x81, 0xec, 0,    0,    0, 0,          /* sub    <n>,%rsp	     */
    /* 30: */ 0x48, 0x89, 0xe3,                            /* mov    %rsp,%rbx	     */
    /* 33: */ 0x48, 0x89, 0xe1,                            /* mov    %rsp,%rcx	     */
-    /* 36: */ 0x48, 0xbf, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <ctx>,%rdi */
-    /* 40: */ 0x48, 0xbe, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <func_item>,%rsi */
-    /* 4a: */ 0x48, 0xb8, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <handler>,%rax    */
-    /* 54: */ 0xff, 0xd0,                                  /* callq  *%rax            */
+    /* 36: */ 0x48, 0xbf, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <ctx>,%rdi      */
+    /* 40: */ 0x48, 0xbe, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <func_item>,%rsi*/
+    /* 4a: */ 0x48, 0xb8, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <handler>,%rax  */
+    /* 54: */ 0xff, 0xd0,                                  /* callq  *%rax           */
  };
  static const uint8_t shim_end[] = {
    /* 0: */ 0x48, 0x81, 0xc4, 0, 0, 0, 0, /*add    208+n,%rsp*/
    /* 7: */ 0x5b,                         /*pop          %rbx*/
    /* 8: */ 0xc3,                         /*retq             */
  };
-  static const uint8_t ld_pat[]
-    = {0x48, 0x8b, 0x83, 0, 0, 0, 0}; /* movss <offset>(%rbx), %xmm[01] */
+  static const uint8_t ld_pat[] = {0x48, 0x8b, 0x83, 0, 0, 0, 0}; /* mov <offset>(%rbx), %reg */
  static const uint8_t movss_pat[]
    = {0xf3, 0x0f, 0x10, 0x83, 0, 0, 0, 0}; /* movss <offset>(%rbx), %xmm[01] */
  static const uint8_t movsd_pat[]
@ -348,7 +351,3 @@ void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_ad
  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
                            VARR_LENGTH (uint8_t, machine_insns));
 }
-
-static void machine_init (MIR_context_t ctx) { VARR_CREATE (uint8_t, machine_insns, 1024); }
-
-static void machine_finish (MIR_context_t ctx) { VARR_DESTROY (uint8_t, machine_insns); }
--- a/mir/mir.c
+++ b/mir/mir.c
@ -328,9 +328,11 @@ static void check_and_prepare_insn_descs (MIR_context_t ctx) {
 }

 static MIR_op_mode_t type2mode (MIR_type_t type) {
-  return (type == MIR_T_F
-            ? MIR_OP_FLOAT
-            : type == MIR_T_D ? MIR_OP_DOUBLE : type == MIR_T_LD ? MIR_OP_LDOUBLE : MIR_OP_INT);
+  return (type == MIR_T_UNDEF
+            ? MIR_OP_UNDEF
+            : type == MIR_T_F
+                ? MIR_OP_FLOAT
+                : type == MIR_T_D ? MIR_OP_DOUBLE : type == MIR_T_LD ? MIR_OP_LDOUBLE : MIR_OP_INT);
 }

 /* New Page */
@ -351,8 +353,10 @@ struct string_ctx {
 #define strings ctx->string_ctx->strings
 #define string_tab ctx->string_ctx->string_tab

-static htab_hash_t str_hash (string_t str) { return mir_hash (str.str.s, str.str.len, 0); }
-static int str_eq (string_t str1, string_t str2) {
+static htab_hash_t str_hash (string_t str, void *arg) {
+  return mir_hash (str.str.s, str.str.len, 0);
+}
+static int str_eq (string_t str1, string_t str2, void *arg) {
  return str1.str.len == str2.str.len && memcmp (str1.str.s, str2.str.s, str1.str.len) == 0;
 }

@ -361,7 +365,7 @@ static void string_init (VARR (string_t) * *strs, HTAB (string_t) * *str_tab) {

  VARR_CREATE (string_t, *strs, 0);
  VARR_PUSH (string_t, *strs, string); /* don't use 0th string */
-  HTAB_CREATE (string_t, *str_tab, 1000, str_hash, str_eq);
+  HTAB_CREATE (string_t, *str_tab, 1000, str_hash, str_eq, NULL);
 }

 static int string_find (VARR (string_t) * *strs, HTAB (string_t) * *str_tab, MIR_str_t str,
@ -409,56 +413,47 @@ typedef struct reg_desc {

 DEF_VARR (reg_desc_t);

-typedef struct size_ctx {
-  size_t rdn;
-  MIR_context_t ctx;
-} size_ctx_t;
-
-DEF_HTAB (size_ctx_t);
+DEF_HTAB (size_t);

 struct reg_ctx {
  VARR (reg_desc_t) * reg_descs;
-  HTAB (size_ctx_t) * namenum2rdn_tab;
-  HTAB (size_ctx_t) * reg2rdn_tab;
+  HTAB (size_t) * namenum2rdn_tab;
+  HTAB (size_t) * reg2rdn_tab;
 };

 #define reg_descs ctx->reg_ctx->reg_descs
 #define namenum2rdn_tab ctx->reg_ctx->namenum2rdn_tab
 #define reg2rdn_tab ctx->reg_ctx->reg2rdn_tab

-static int namenum2rdn_eq (size_ctx_t sc1, size_ctx_t sc2) {
-  MIR_context_t ctx = sc1.ctx;
+static int namenum2rdn_eq (size_t rdn1, size_t rdn2, void *arg) {
+  MIR_context_t ctx = arg;
  reg_desc_t *addr = VARR_ADDR (reg_desc_t, reg_descs);

-  mir_assert (ctx == sc2.ctx);
-  return (addr[sc1.rdn].name_num == addr[sc2.rdn].name_num
-          && addr[sc1.rdn].func == addr[sc2.rdn].func);
+  return (addr[rdn1].name_num == addr[rdn2].name_num && addr[rdn1].func == addr[rdn2].func);
 }

-static htab_hash_t namenum2rdn_hash (size_ctx_t sc) {
-  MIR_context_t ctx = sc.ctx;
+static htab_hash_t namenum2rdn_hash (size_t rdn, void *arg) {
+  MIR_context_t ctx = arg;
  reg_desc_t *addr = VARR_ADDR (reg_desc_t, reg_descs);

  return mir_hash_finish (
-    mir_hash_step (mir_hash_step (mir_hash_init (0), (uint64_t) addr[sc.rdn].func),
-                   (uint64_t) addr[sc.rdn].name_num));
+    mir_hash_step (mir_hash_step (mir_hash_init (0), (uint64_t) addr[rdn].func),
+                   (uint64_t) addr[rdn].name_num));
 }

-static int reg2rdn_eq (size_ctx_t sc1, size_ctx_t sc2) {
-  MIR_context_t ctx = sc1.ctx;
+static int reg2rdn_eq (size_t rdn1, size_t rdn2, void *arg) {
+  MIR_context_t ctx = arg;
  reg_desc_t *addr = VARR_ADDR (reg_desc_t, reg_descs);

-  mir_assert (ctx == sc2.ctx);
-  return addr[sc1.rdn].reg == addr[sc2.rdn].reg && addr[sc1.rdn].func == addr[sc2.rdn].func;
+  return addr[rdn1].reg == addr[rdn2].reg && addr[rdn1].func == addr[rdn2].func;
 }

-static htab_hash_t reg2rdn_hash (size_ctx_t sc) {
-  MIR_context_t ctx = sc.ctx;
+static htab_hash_t reg2rdn_hash (size_t rdn, void *arg) {
+  MIR_context_t ctx = arg;
  reg_desc_t *addr = VARR_ADDR (reg_desc_t, reg_descs);

  return mir_hash_finish (
-    mir_hash_step (mir_hash_step (mir_hash_init (0), (uint64_t) addr[sc.rdn].func),
-                   addr[sc.rdn].reg));
+    mir_hash_step (mir_hash_step (mir_hash_init (0), (uint64_t) addr[rdn].func), addr[rdn].reg));
 }

 static void reg_init (MIR_context_t ctx) {
@ -468,14 +463,14 @@ static void reg_init (MIR_context_t ctx) {
    (*error_func) (MIR_alloc_error, "Not enough memory for ctx");
  VARR_CREATE (reg_desc_t, reg_descs, 300);
  VARR_PUSH (reg_desc_t, reg_descs, rd); /* for 0 reg */
-  HTAB_CREATE (size_ctx_t, namenum2rdn_tab, 300, namenum2rdn_hash, namenum2rdn_eq);
-  HTAB_CREATE (size_ctx_t, reg2rdn_tab, 300, reg2rdn_hash, reg2rdn_eq);
+  HTAB_CREATE (size_t, namenum2rdn_tab, 300, namenum2rdn_hash, namenum2rdn_eq, ctx);
+  HTAB_CREATE (size_t, reg2rdn_tab, 300, reg2rdn_hash, reg2rdn_eq, ctx);
 }

 static MIR_reg_t create_func_reg (MIR_context_t ctx, MIR_func_t func, const char *name,
                                  MIR_reg_t reg, MIR_type_t type, int any_p) {
  reg_desc_t rd;
-  size_ctx_t sc, tab_sc;
+  size_t rdn, tab_rdn;
  int htab_res;

  if (!any_p && _MIR_reserved_name_p (ctx, name))
@ -484,24 +479,23 @@ static MIR_reg_t create_func_reg (MIR_context_t ctx, MIR_func_t func, const char
  rd.func = func;
  rd.type = type;
  rd.reg = reg; /* 0 is reserved */
-  sc.rdn = VARR_LENGTH (reg_desc_t, reg_descs);
-  sc.ctx = ctx;
+  rdn = VARR_LENGTH (reg_desc_t, reg_descs);
  VARR_PUSH (reg_desc_t, reg_descs, rd);
-  if (HTAB_DO (size_ctx_t, namenum2rdn_tab, sc, HTAB_FIND, tab_sc)) {
+  if (HTAB_DO (size_t, namenum2rdn_tab, rdn, HTAB_FIND, tab_rdn)) {
    VARR_POP (reg_desc_t, reg_descs);
    (*error_func) (MIR_repeated_decl_error, "Repeated reg declaration %s", name);
  }
-  htab_res = HTAB_DO (size_ctx_t, namenum2rdn_tab, sc, HTAB_INSERT, tab_sc);
+  htab_res = HTAB_DO (size_t, namenum2rdn_tab, rdn, HTAB_INSERT, tab_rdn);
  mir_assert (!htab_res);
-  htab_res = HTAB_DO (size_ctx_t, reg2rdn_tab, sc, HTAB_INSERT, tab_sc);
+  htab_res = HTAB_DO (size_t, reg2rdn_tab, rdn, HTAB_INSERT, tab_rdn);
  mir_assert (!htab_res);
  return reg;
 }

 static void reg_finish (MIR_context_t ctx) {
  VARR_DESTROY (reg_desc_t, reg_descs);
-  HTAB_DESTROY (size_ctx_t, namenum2rdn_tab);
-  HTAB_DESTROY (size_ctx_t, reg2rdn_tab);
+  HTAB_DESTROY (size_t, namenum2rdn_tab);
+  HTAB_DESTROY (size_t, reg2rdn_tab);
  free (ctx->reg_ctx);
  ctx->reg_ctx = NULL;
 }
@ -519,11 +513,11 @@ const char *MIR_item_name (MIR_context_t ctx, MIR_item_t item) {
            : item->item_type == MIR_proto_item
                ? item->u.proto->name
                : item->item_type == MIR_import_item
-                    ? item->u.import
+                    ? item->u.import_id
                    : item->item_type == MIR_export_item
-                        ? item->u.export
+                        ? item->u.export_id
                        : item->item_type == MIR_forward_item
-                            ? item->u.forward
+                            ? item->u.forward_id
                            : item->item_type == MIR_bss_item
                                ? item->u.bss->name
                                : item->item_type == MIR_data_item
@ -550,12 +544,12 @@ MIR_error_func_t MIR_get_error_func (MIR_context_t ctx) { return error_func; }

 void MIR_set_error_func (MIR_context_t ctx, MIR_error_func_t func) { error_func = func; }

-static htab_hash_t item_hash (MIR_item_t it) {
+static htab_hash_t item_hash (MIR_item_t it, void *arg) {
  return mir_hash_finish (
    mir_hash_step (mir_hash_step (mir_hash_init (28), (uint64_t) MIR_item_name (NULL, it)),
                   (uint64_t) it->module));
 }
-static int item_eq (MIR_item_t it1, MIR_item_t it2) {
+static int item_eq (MIR_item_t it1, MIR_item_t it2, void *arg) {
  return it1->module == it2->module && MIR_item_name (NULL, it1) == MIR_item_name (NULL, it2);
 }

@ -624,7 +618,7 @@ MIR_context_t MIR_init (void) {
 #endif
  VARR_CREATE (MIR_module_t, modules_to_link, 0);
  init_module (ctx, &environment_module, ".environment");
-  HTAB_CREATE (MIR_item_t, module_item_tab, 512, item_hash, item_eq);
+  HTAB_CREATE (MIR_item_t, module_item_tab, 512, item_hash, item_eq, NULL);
  code_init (ctx);
  interp_init (ctx);
  inlined_calls = inline_insns_before = inline_insns_after = 0;
@ -775,6 +769,7 @@ static const char *type_str (MIR_type_t tp) {
  case MIR_T_D: return "d";
  case MIR_T_LD: return "ld";
  case MIR_T_P: return "p";
+  case MIR_T_UNDEF: return "undef";
  default: return "";
  }
 }
@ -802,6 +797,7 @@ static const char *mode_str (MIR_op_mode_t mode) {
  case MIR_OP_HARD_REG_MEM: return "hard_reg_mem";
  case MIR_OP_LABEL: return "label";
  case MIR_OP_BOUND: return "bound";
+  case MIR_OP_UNDEF: return "undef";
  default: return "";
  }
 }
@ -819,14 +815,15 @@ static MIR_item_t add_item (MIR_context_t ctx, MIR_item_t item) {
  case MIR_import_item:
    if (item->item_type != MIR_import_item)
      (*error_func) (MIR_import_export_error,
-                     "existing module definition %s already defined as import", tab_item->u.import);
+                     "existing module definition %s already defined as import",
+                     tab_item->u.import_id);
    item = tab_item;
    break;
  case MIR_export_item:
  case MIR_forward_item:
    replace_p = FALSE;
    if (item->item_type == MIR_import_item) {
-      (*error_func) (MIR_import_export_error, "export/forward of import %s", item->u.import);
+      (*error_func) (MIR_import_export_error, "export/forward of import %s", item->u.import_id);
    } else if (item->item_type != MIR_export_item && item->item_type != MIR_forward_item) {
      replace_p = TRUE;
      DLIST_APPEND (MIR_item_t, curr_module->items, item);
@ -867,7 +864,7 @@ static MIR_item_t add_item (MIR_context_t ctx, MIR_item_t item) {
      DLIST_APPEND (MIR_item_t, curr_module->items, item);
      item->ref_def = tab_item;
    } else if (item->item_type == MIR_import_item) {
-      (*error_func) (MIR_import_export_error, "import of local definition %s", item->u.import);
+      (*error_func) (MIR_import_export_error, "import of local definition %s", item->u.import_id);
    } else {
      (*error_func) (MIR_repeated_decl_error, "Repeated item declaration %s",
                     MIR_item_name (ctx, item));
@ -904,11 +901,11 @@ static MIR_item_t new_export_import_forward (MIR_context_t ctx, const char *name
  item = create_item (ctx, item_type, item_name);
  uniq_name = string_store (ctx, &strings, &string_tab, (MIR_str_t){strlen (name) + 1, name}).str.s;
  if (item_type == MIR_export_item)
-    item->u.export = uniq_name;
+    item->u.export_id = uniq_name;
  else if (item_type == MIR_import_item)
-    item->u.import = uniq_name;
+    item->u.import_id = uniq_name;
  else
-    item->u.forward = uniq_name;
+    item->u.forward_id = uniq_name;
  if (create_only_p) return item;
  if ((tab_item = add_item (ctx, item)) != item) {
    free (item);
@ -1235,41 +1232,39 @@ MIR_reg_t MIR_new_func_reg (MIR_context_t ctx, MIR_func_t func, MIR_type_t type,
 }

 static reg_desc_t *find_rd_by_name_num (MIR_context_t ctx, size_t name_num, MIR_func_t func) {
-  size_ctx_t sc, temp_sc;
+  size_t rdn, temp_rdn;
  reg_desc_t rd;

  rd.name_num = name_num;
  rd.func = func; /* keys */
  rd.type = MIR_T_I64;
  rd.reg = 0; /* to eliminate warnings */
-  temp_sc.rdn = VARR_LENGTH (reg_desc_t, reg_descs);
-  temp_sc.ctx = ctx;
+  temp_rdn = VARR_LENGTH (reg_desc_t, reg_descs);
  VARR_PUSH (reg_desc_t, reg_descs, rd);
-  if (!HTAB_DO (size_ctx_t, namenum2rdn_tab, temp_sc, HTAB_FIND, sc)) {
+  if (!HTAB_DO (size_t, namenum2rdn_tab, temp_rdn, HTAB_FIND, rdn)) {
    VARR_POP (reg_desc_t, reg_descs);
    return NULL; /* undeclared */
  }
  VARR_POP (reg_desc_t, reg_descs);
-  return &VARR_ADDR (reg_desc_t, reg_descs)[sc.rdn];
+  return &VARR_ADDR (reg_desc_t, reg_descs)[rdn];
 }

 static reg_desc_t *find_rd_by_reg (MIR_context_t ctx, MIR_reg_t reg, MIR_func_t func) {
-  size_ctx_t sc, temp_sc;
+  size_t rdn, temp_rdn;
  reg_desc_t rd;

  rd.reg = reg;
  rd.func = func; /* keys */
  rd.name_num = 0;
  rd.type = MIR_T_I64; /* to eliminate warnings */
-  temp_sc.rdn = VARR_LENGTH (reg_desc_t, reg_descs);
-  temp_sc.ctx = ctx;
+  temp_rdn = VARR_LENGTH (reg_desc_t, reg_descs);
  VARR_PUSH (reg_desc_t, reg_descs, rd);
-  if (!HTAB_DO (size_ctx_t, reg2rdn_tab, temp_sc, HTAB_FIND, sc)) {
+  if (!HTAB_DO (size_t, reg2rdn_tab, temp_rdn, HTAB_FIND, rdn)) {
    VARR_POP (reg_desc_t, reg_descs);
    (*error_func) (MIR_undeclared_func_reg_error, "undeclared reg %u of func %s", reg, func->name);
  }
  VARR_POP (reg_desc_t, reg_descs);
-  return &VARR_ADDR (reg_desc_t, reg_descs)[sc.rdn];
+  return &VARR_ADDR (reg_desc_t, reg_descs)[rdn];
 }

 void MIR_finish_func (MIR_context_t ctx) {
@ -1369,8 +1364,12 @@ void MIR_finish_func (MIR_context_t ctx) {
        break;
      }
      insn->ops[i].value_mode = mode;
-      if (expected_mode != MIR_OP_UNDEF
-          && (mode == MIR_OP_UINT ? MIR_OP_INT : mode) != expected_mode) {
+      if (mode == MIR_OP_UNDEF && insn->ops[i].mode == MIR_OP_MEM
+          && ((code == MIR_VA_START && i == 0) || (code == MIR_VA_ARG && i == 1)
+              || (code == MIR_VA_END && i == 1))) { /* a special case: va_list as undef type mem */
+        insn->ops[i].value_mode = expected_mode;
+      } else if (expected_mode != MIR_OP_UNDEF
+                 && (mode == MIR_OP_UINT ? MIR_OP_INT : mode) != expected_mode) {
        curr_func = NULL;
        (*error_func) (MIR_op_mode_error,
                       "in instruction '%s': unexpected operand mode for operand #%d. Got '%s', "
@ -1543,28 +1542,28 @@ void MIR_link (MIR_context_t ctx, void (*set_interface) (MIR_context_t ctx, MIR_
         item = DLIST_NEXT (MIR_item_t, item))
      if (item->item_type == MIR_func_item) {
        assert (item->data == NULL);
-        if (simplify_func (ctx, item, TRUE)) item->data = (void *) 1;
+        if (simplify_func (ctx, item, TRUE)) item->data = (void *) 1; /* flag inlining */
      } else if (item->item_type == MIR_import_item) {
-        if ((tab_item = find_item (ctx, item->u.import, &environment_module)) == NULL) {
-          if (import_resolver == NULL || (addr = import_resolver (item->u.import)) == NULL)
+        if ((tab_item = find_item (ctx, item->u.import_id, &environment_module)) == NULL) {
+          if (import_resolver == NULL || (addr = import_resolver (item->u.import_id)) == NULL)
            (*error_func) (MIR_undeclared_op_ref_error, "import of undefined item %s",
-                           item->u.import);
-          MIR_load_external (ctx, item->u.import, addr);
-          tab_item = find_item (ctx, item->u.import, &environment_module);
+                           item->u.import_id);
+          MIR_load_external (ctx, item->u.import_id, addr);
+          tab_item = find_item (ctx, item->u.import_id, &environment_module);
          mir_assert (tab_item != NULL);
        }
        item->addr = tab_item->addr;
        item->ref_def = tab_item;
      } else if (item->item_type == MIR_export_item) {
-        if ((tab_item = find_item (ctx, item->u.export, m)) == NULL)
+        if ((tab_item = find_item (ctx, item->u.export_id, m)) == NULL)
          (*error_func) (MIR_undeclared_op_ref_error, "export of undefined item %s",
-                         item->u.export);
+                         item->u.export_id);
        item->addr = tab_item->addr;
        item->ref_def = tab_item;
      } else if (item->item_type == MIR_forward_item) {
-        if ((tab_item = find_item (ctx, item->u.forward, m)) == NULL)
+        if ((tab_item = find_item (ctx, item->u.forward_id, m)) == NULL)
          (*error_func) (MIR_undeclared_op_ref_error, "forward of undefined item %s",
-                         item->u.forward);
+                         item->u.forward_id);
        item->addr = tab_item->addr;
        item->ref_def = tab_item;
      }
@ -1576,6 +1575,10 @@ void MIR_link (MIR_context_t ctx, void (*set_interface) (MIR_context_t ctx, MIR_
      if (item->item_type == MIR_func_item && item->data != NULL) {
        process_inlines (ctx, item);
        item->data = NULL;
+#if 0
+	fprintf (stderr, "+++++ Function after inlining:\n");
+	MIR_output_item (ctx, stderr, func_item);
+#endif
      } else if (item->item_type == MIR_ref_data_item) {
        assert (item->u.ref_data->ref_item->addr != NULL);
        addr = (char *) item->u.ref_data->ref_item->addr + item->u.ref_data->disp;
@ -2174,7 +2177,7 @@ void MIR_output_op (MIR_context_t ctx, FILE *f, MIR_op_t op, MIR_func_t func) {
  case MIR_OP_UINT: fprintf (f, "%" PRIu64, op.u.u); break;
  case MIR_OP_FLOAT: fprintf (f, "%.*ef", FLT_MANT_DIG, op.u.f); break;
  case MIR_OP_DOUBLE: fprintf (f, "%.*e", DBL_MANT_DIG, op.u.d); break;
-  case MIR_OP_LDOUBLE: fprintf (f, "%.*Le", LDBL_MANT_DIG, op.u.ld); break;
+  case MIR_OP_LDOUBLE: fprintf (f, "%.*LeL", LDBL_MANT_DIG, op.u.ld); break;
  case MIR_OP_MEM:
  case MIR_OP_HARD_REG_MEM: {
    MIR_reg_t no_reg = op.mode == MIR_OP_MEM ? 0 : MIR_NON_HARD_REG;
@ -2267,15 +2270,15 @@ void MIR_output_item (MIR_context_t ctx, FILE *f, MIR_item_t item) {

  mir_assert (f != NULL && item != NULL);
  if (item->item_type == MIR_export_item) {
-    fprintf (f, "\texport\t%s\n", item->u.export);
+    fprintf (f, "\texport\t%s\n", item->u.export_id);
    return;
  }
  if (item->item_type == MIR_import_item) {
-    fprintf (f, "\timport\t%s\n", item->u.import);
+    fprintf (f, "\timport\t%s\n", item->u.import_id);
    return;
  }
  if (item->item_type == MIR_forward_item) {
-    fprintf (f, "\tforward\t%s\n", item->u.forward);
+    fprintf (f, "\tforward\t%s\n", item->u.forward_id);
    return;
  }
  if (item->item_type == MIR_bss_item) {
@ -2312,7 +2315,7 @@ void MIR_output_item (MIR_context_t ctx, FILE *f, MIR_item_t item) {
      case MIR_T_F: fprintf (f, "%.*ef", FLT_MANT_DIG, ((float *) data->u.els)[i]); break;
      case MIR_T_D: fprintf (f, "%.*e", DBL_MANT_DIG, ((double *) data->u.els)[i]); break;
      case MIR_T_LD:
-        fprintf (f, "%.*Le", LDBL_MANT_DIG, ((long double *) data->u.els)[i]);
+        fprintf (f, "%.*LeL", LDBL_MANT_DIG, ((long double *) data->u.els)[i]);
        break;
        /* only ptr as ref ??? */
      case MIR_T_P: fprintf (f, "0x%" PRIxPTR, ((uintptr_t *) data->u.els)[i]); break;
@ -2385,7 +2388,6 @@ typedef struct {
  MIR_type_t type;
  MIR_op_t op1, op2;
  MIR_reg_t reg;
-  MIR_context_t ctx;
 } val_t;

 DEF_HTAB (val_t);
@ -2396,27 +2398,28 @@ struct simplify_ctx {

 #define val_tab ctx->simplify_ctx->val_tab

-static htab_hash_t val_hash (val_t v) {
+static htab_hash_t val_hash (val_t v, void *arg) {
+  MIR_context_t ctx = arg;
  htab_hash_t h;

  h = mir_hash_step (mir_hash_init (0), (uint64_t) v.code);
  h = mir_hash_step (h, (uint64_t) v.type);
-  h = MIR_op_hash_step (v.ctx, h, v.op1);
-  if (v.code != MIR_INSN_BOUND) h = MIR_op_hash_step (v.ctx, h, v.op2);
+  h = MIR_op_hash_step (ctx, h, v.op1);
+  if (v.code != MIR_INSN_BOUND) h = MIR_op_hash_step (ctx, h, v.op2);
  return mir_hash_finish (h);
 }

-static int val_eq (val_t v1, val_t v2) {
-  assert (v1.ctx == v2.ctx);
-  if (v1.code != v2.code || v1.type != v2.type || !MIR_op_eq_p (v1.ctx, v1.op1, v2.op1))
-    return FALSE;
-  return v1.code == MIR_INSN_BOUND || MIR_op_eq_p (v1.ctx, v1.op2, v2.op2);
+static int val_eq (val_t v1, val_t v2, void *arg) {
+  MIR_context_t ctx = arg;
+
+  if (v1.code != v2.code || v1.type != v2.type || !MIR_op_eq_p (ctx, v1.op1, v2.op1)) return FALSE;
+  return v1.code == MIR_INSN_BOUND || MIR_op_eq_p (ctx, v1.op2, v2.op2);
 }

 static void vn_init (MIR_context_t ctx) {
  if ((ctx->simplify_ctx = malloc (sizeof (struct simplify_ctx))) == NULL)
    (*error_func) (MIR_alloc_error, "Not enough memory for ctx");
-  HTAB_CREATE (val_t, val_tab, 512, val_hash, val_eq);
+  HTAB_CREATE (val_t, val_tab, 512, val_hash, val_eq, ctx);
 }

 static void vn_finish (MIR_context_t ctx) {
@ -2435,7 +2438,6 @@ static MIR_reg_t vn_add_val (MIR_context_t ctx, MIR_func_t func, MIR_type_t type
  val.code = code;
  val.op1 = op1;
  val.op2 = op2;
-  val.ctx = ctx;
  if (HTAB_DO (val_t, val_tab, val, HTAB_FIND, tab_val)) return tab_val.reg;
  val.reg = _MIR_new_temp_reg (ctx, type, func);
  HTAB_DO (val_t, val_tab, val, HTAB_INSERT, tab_val);
@ -2604,6 +2606,10 @@ void MIR_simplify_op (MIR_context_t ctx, MIR_item_t func_item, MIR_insn_t insn,
    mem_op.u.mem.scale = 0;
    if (move_p && (nop == 1 || insn->ops[1].mode == MIR_OP_REG)) {
      *op = mem_op;
+    } else if (((code == MIR_VA_START && nop == 0) || (code == MIR_VA_ARG && nop == 1)
+                || (code == MIR_VA_END && nop == 0))
+               && mem_op.u.mem.type == MIR_T_UNDEF) {
+      *op = MIR_new_reg_op (ctx, addr_reg);
    } else {
      type = (mem_op.u.mem.type == MIR_T_F || mem_op.u.mem.type == MIR_T_D
                  || mem_op.u.mem.type == MIR_T_LD
@ -2730,7 +2736,7 @@ static void remove_unused_labels (MIR_context_t ctx, MIR_item_t func_item) {
  }
 }

-static MIR_insn_code_t reverse_branch_code (MIR_insn_code_t code) {
+MIR_insn_code_t MIR_reverse_branch_code (MIR_insn_code_t code) {
  switch (code) {
  case MIR_BT: return MIR_BF;
  case MIR_BTS: return MIR_BFS;
@ -2756,7 +2762,7 @@ static MIR_insn_code_t reverse_branch_code (MIR_insn_code_t code) {
  case MIR_BGES: return MIR_BLTS;
  case MIR_UBGE: return MIR_UBLT;
  case MIR_UBGES: return MIR_UBLTS;
-  default: assert (FALSE); return code;
+  default: return MIR_INSN_BOUND;
  }
 }

@ -2772,7 +2778,7 @@ static const int MAX_JUMP_CHAIN_LEN = 32;
 static int simplify_func (MIR_context_t ctx, MIR_item_t func_item, int mem_float_p) {
  MIR_func_t func = func_item->u.func;
  MIR_insn_t insn, next_insn, next_next_insn, jmp_insn, new_insn;
-  MIR_insn_code_t ext_code;
+  MIR_insn_code_t ext_code, rev_code;
  int jmps_num = 0, inline_p = FALSE;

  if (func_item->item_type != MIR_func_item)
@ -2890,13 +2896,14 @@ static int simplify_func (MIR_context_t ctx, MIR_item_t func_item, int mem_float
      }
      MIR_remove_insn (ctx, func_item, insn);
      // ??? make imm always second,  what is about mem?
-    } else if (MIR_int_branch_code_p (code) && next_insn != NULL && next_insn->code == MIR_JMP
+    } else if ((rev_code = MIR_reverse_branch_code (insn->code)) != MIR_INSN_BOUND
+               && next_insn != NULL && next_insn->code == MIR_JMP
               && (next_next_insn = DLIST_NEXT (MIR_insn_t, next_insn)) != NULL
               && next_next_insn->code == MIR_LABEL && insn->ops[0].mode == MIR_OP_LABEL
               && skip_labels (next_next_insn, insn->ops[0].u.label) == insn->ops[0].u.label) {
      /* BCond L;JMP L2;<lables>L: => BNCond L2;<labels>L: */
      insn->ops[0] = next_insn->ops[0];
-      insn->code = reverse_branch_code (insn->code);
+      insn->code = rev_code;
      MIR_remove_insn (ctx, func_item, next_insn);
      next_insn = insn;
    } else if (MIR_branch_code_p (code) && insn->ops[0].mode == MIR_OP_LABEL
@ -2928,6 +2935,10 @@ static int simplify_func (MIR_context_t ctx, MIR_item_t func_item, int mem_float
  }
  make_one_ret (ctx, func_item);
  remove_unused_labels (ctx, func_item);
+#if 0
+  fprintf (stderr, "+++++ Function after simplification:\n");
+  MIR_output_item (ctx, stderr, func_item);
+#endif
  return inline_p;
 }

@ -3210,42 +3221,73 @@ struct machine_code_ctx {
 #define page_size ctx->machine_code_ctx->page_size
 #define machine_insns ctx->machine_code_ctx->machine_insns

-uint8_t *_MIR_publish_code (MIR_context_t ctx, const uint8_t *code, size_t code_len) {
-  uint8_t *start, *mem;
-  size_t len;
-  code_holder_t ch;
+static code_holder_t *get_last_code_holder (MIR_context_t ctx, size_t size) {
+  uint8_t *mem, *free_adddr;
+  size_t len, npages;
+  code_holder_t ch, *ch_ptr;
  int new_p = TRUE;

  if ((len = VARR_LENGTH (code_holder_t, code_holders)) > 0) {
-    code_holder_t *ch_ptr = VARR_ADDR (code_holder_t, code_holders) + len - 1;
-    uint8_t *free_addr = (uint8_t *) ((uint64_t) (ch_ptr->free + 15) / 16 * 16); /* align */
-
-    if (free_addr + code_len < ch_ptr->bound) {
-      mem = free_addr;
-      ch_ptr->free = free_addr + code_len;
-      new_p = FALSE;
-      start = ch_ptr->start;
-      len = ch_ptr->bound - start;
-      ch = *ch_ptr;
-    }
+    ch_ptr = VARR_ADDR (code_holder_t, code_holders) + len - 1;
+    ch_ptr->free = (uint8_t *) ((uint64_t) (ch_ptr->free + 15) / 16 * 16); /* align */
+    if (ch_ptr->free + size <= ch_ptr->bound) return ch_ptr;
  }
-  if (new_p) {
-    size_t npages = (code_len + page_size - 1) / page_size;
-
-    len = page_size * npages;
-    mem = (uint8_t *) mmap (NULL, len, PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    if (mem == MAP_FAILED) return NULL;
-    start = ch.start = mem;
-    ch.free = mem + code_len;
-    ch.bound = mem + len;
-    VARR_PUSH (code_holder_t, code_holders, ch);
-  }
-  mprotect (ch.start, ch.bound - ch.start, PROT_WRITE | PROT_EXEC);
+  npages = (size + page_size) / page_size;
+  len = page_size * npages;
+  mem = (uint8_t *) mmap (NULL, len, PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) return NULL;
+  ch.start = mem;
+  ch.free = mem;
+  ch.bound = mem + len;
+  VARR_PUSH (code_holder_t, code_holders, ch);
+  len = VARR_LENGTH (code_holder_t, code_holders);
+  return VARR_ADDR (code_holder_t, code_holders) + len - 1;
+}
+
+static uint8_t *add_code (MIR_context_t ctx, code_holder_t *ch_ptr, const uint8_t *code,
+                          size_t code_len) {
+  uint8_t *mem = ch_ptr->free;
+
+  ch_ptr->free += code_len;
+  mir_assert (ch_ptr->free <= ch_ptr->bound);
+  mprotect (ch_ptr->start, ch_ptr->bound - ch_ptr->start, PROT_WRITE | PROT_EXEC);
  memcpy (mem, code, code_len);
-  mprotect (ch.start, ch.bound - ch.start, PROT_EXEC);
+  mprotect (ch_ptr->start, ch_ptr->bound - ch_ptr->start, PROT_READ | PROT_EXEC);
  return mem;
 }

+uint8_t *_MIR_publish_code (MIR_context_t ctx, const uint8_t *code, size_t code_len) {
+  code_holder_t *ch_ptr;
+
+  if ((ch_ptr = get_last_code_holder (ctx, code_len)) == NULL) return NULL;
+  return add_code (ctx, ch_ptr, code, code_len);
+}
+
+uint8_t *_MIR_get_new_code_addr (MIR_context_t ctx, size_t size) {
+  code_holder_t *ch_ptr = get_last_code_holder (ctx, size);
+
+  return ch_ptr == NULL ? NULL : ch_ptr->free;
+}
+
+uint8_t *_MIR_publish_code_by_addr (MIR_context_t ctx, void *addr, const uint8_t *code,
+                                    size_t code_len) {
+  code_holder_t *ch_ptr = get_last_code_holder (ctx, 0);
+
+  if (ch_ptr == NULL || ch_ptr->free != addr || ch_ptr->free + code_len >= ch_ptr->bound)
+    return NULL;
+  return add_code (ctx, ch_ptr, code, code_len);
+}
+
+void _MIR_change_code (MIR_context_t ctx, uint8_t *addr, const uint8_t *code, size_t code_len) {
+  size_t len, start;
+
+  start = (size_t) addr / page_size * page_size;
+  len = (size_t) addr + code_len - start;
+  mprotect ((uint8_t *) start, len, PROT_WRITE | PROT_EXEC);
+  memcpy (addr, code, code_len);
+  mprotect ((uint8_t *) start, len, PROT_READ | PROT_EXEC);
+}
+
 void _MIR_update_code_arr (MIR_context_t ctx, uint8_t *base, size_t nloc,
                           const MIR_code_reloc_t *relocs) {
  size_t i, len, start, max_offset = 0;
@ -3285,15 +3327,12 @@ void _MIR_update_code (MIR_context_t ctx, uint8_t *base, size_t nloc, ...) {
  va_end (args);
 }

-static void machine_init (MIR_context_t ctx);
-static void machine_finish (MIR_context_t ctx);
-
 static void code_init (MIR_context_t ctx) {
  if ((ctx->machine_code_ctx = malloc (sizeof (struct machine_code_ctx))) == NULL)
    (*error_func) (MIR_alloc_error, "Not enough memory for ctx");
  page_size = sysconf (_SC_PAGE_SIZE);
  VARR_CREATE (code_holder_t, code_holders, 128);
-  machine_init (ctx);
+  VARR_CREATE (uint8_t, machine_insns, 1024);
 }

 static void code_finish (MIR_context_t ctx) {
@ -3302,7 +3341,7 @@ static void code_finish (MIR_context_t ctx) {
    munmap (ch.start, ch.bound - ch.start);
  }
  VARR_DESTROY (code_holder_t, code_holders);
-  machine_finish (ctx);
+  VARR_DESTROY (uint8_t, machine_insns);
  free (ctx->machine_code_ctx);
  ctx->machine_code_ctx = NULL;
 }
@ -3651,17 +3690,17 @@ static size_t write_item (MIR_context_t ctx, writer_func_t writer, MIR_item_t it

  if (item->item_type == MIR_import_item) {
    len += write_name (ctx, writer, "import");
-    len += write_name (ctx, writer, item->u.import);
+    len += write_name (ctx, writer, item->u.import_id);
    return len;
  }
  if (item->item_type == MIR_export_item) {
    len += write_name (ctx, writer, "export");
-    len += write_name (ctx, writer, item->u.export);
+    len += write_name (ctx, writer, item->u.export_id);
    return len;
  }
  if (item->item_type == MIR_forward_item) {
    len += write_name (ctx, writer, "forward");
-    len += write_name (ctx, writer, item->u.forward);
+    len += write_name (ctx, writer, item->u.forward_id);
    return len;
  }
  if (item->item_type == MIR_bss_item) {
@ -4508,10 +4547,10 @@ typedef struct insn_name {
  MIR_insn_code_t code;
 } insn_name_t;

-static int insn_name_eq (insn_name_t in1, insn_name_t in2) {
+static int insn_name_eq (insn_name_t in1, insn_name_t in2, void *arg) {
  return strcmp (in1.name, in2.name) == 0;
 }
-static htab_hash_t insn_name_hash (insn_name_t in) {
+static htab_hash_t insn_name_hash (insn_name_t in, void *arg) {
  return mir_hash (in.name, strlen (in.name), 0);
 }

@ -4566,7 +4605,13 @@ struct scan_ctx {

 static void MIR_NO_RETURN MIR_UNUSED process_error (MIR_context_t ctx,
                                                    enum MIR_error_type error_type,
-                                                    const char *message) {
+                                                    const char *format, ...) {
+#define MAX_MESSAGE_LEN 300
+  char message[MAX_MESSAGE_LEN];
+  va_list va;
+
+  va_start (va, format);
+  vsnprintf (message, MAX_MESSAGE_LEN, format, va);
  (*error_func) (error_type, "ln %lu: %s", (unsigned long) curr_lno, message);
  longjmp (error_jmp_buf, TRUE);
 }
@ -4661,8 +4706,11 @@ static void scan_string (MIR_context_t ctx, token_t *t, int c, int get_char (MIR
  mir_assert (c == '\"');
  VARR_TRUNC (char, temp_string, 0);
  for (;;) {
-    if ((c = get_char (ctx)) == EOF || c == '\n')
-      process_error (ctx, MIR_syntax_error, "unfinished string");
+    if ((c = get_char (ctx)) == EOF || c == '\n') {
+      VARR_PUSH (char, temp_string, '\0');
+      process_error (ctx, MIR_syntax_error, "unfinished string \"%s",
+                     VARR_ADDR (char, temp_string));
+    }
    if (c == '"') break;
    if (c == '\\') {
      if ((c = get_char (ctx)) == 'n')
@ -4703,7 +4751,11 @@ static void scan_string (MIR_context_t ctx, token_t *t, int c, int get_char (MIR
        ch_code = 0;
        for (int i = 2; i > 0; i--) {
          c = get_char (ctx);
-          if (!isxdigit (c)) process_error (ctx, MIR_syntax_error, "wrong hexadecimal escape");
+          if (!isxdigit (c)) {
+            VARR_PUSH (char, temp_string, '\0');
+            process_error (ctx, MIR_syntax_error, "wrong hexadecimal escape in %s",
+                           VARR_ADDR (char, temp_string));
+          }
          c = '0' <= c && c <= '9' ? c - '0' : 'a' <= c && c <= 'f' ? c - 'a' + 10 : c - 'A' + 10;
          ch_code = (ch_code << 4) | c;
        }
@ -4777,7 +4829,8 @@ static void scan_token (MIR_context_t ctx, token_t *token, int (*get_char) (MIR_

        if (ch == '+' || ch == '-') {
          next_ch = get_char (ctx);
-          if (!isdigit (next_ch)) process_error (ctx, MIR_syntax_error, "no number after a sign");
+          if (!isdigit (next_ch))
+            process_error (ctx, MIR_syntax_error, "no number after a sign %c", ch);
          unget_char (ctx, next_ch);
        }
        scan_number (ctx, ch, get_char, unget_char, &base, &float_p, &double_p, &ldouble_p);
@ -4802,14 +4855,19 @@ static void scan_token (MIR_context_t ctx, token_t *token, int (*get_char) (MIR_
          ;
        return;
      } else {
-        process_error (ctx, MIR_syntax_error, "wrong char");
+        VARR_PUSH (char, temp_string, '\0');
+        process_error (ctx, MIR_syntax_error, "wrong char after %s", VARR_ADDR (char, temp_string));
      }
    }
  }
 }

-static int label_eq (label_desc_t l1, label_desc_t l2) { return strcmp (l1.name, l2.name) == 0; }
-static htab_hash_t label_hash (label_desc_t l) { return mir_hash (l.name, strlen (l.name), 0); }
+static int label_eq (label_desc_t l1, label_desc_t l2, void *arg) {
+  return strcmp (l1.name, l2.name) == 0;
+}
+static htab_hash_t label_hash (label_desc_t l, void *arg) {
+  return mir_hash (l.name, strlen (l.name), 0);
+}

 static MIR_label_t create_label_desc (MIR_context_t ctx, const char *name) {
  MIR_label_t label;
@ -4842,16 +4900,15 @@ MIR_type_t MIR_str2type (MIR_context_t ctx, const char *type_name) {
 }

 static int func_reg_p (MIR_context_t ctx, MIR_func_t func, const char *name) {
-  size_ctx_t sc, tab_sc;
+  size_t rdn, tab_rdn;
  reg_desc_t rd;
  int res;

  rd.name_num = string_store (ctx, &strings, &string_tab, (MIR_str_t){strlen (name) + 1, name}).num;
  rd.func = func;
-  sc.rdn = VARR_LENGTH (reg_desc_t, reg_descs);
-  sc.ctx = ctx;
+  rdn = VARR_LENGTH (reg_desc_t, reg_descs);
  VARR_PUSH (reg_desc_t, reg_descs, rd);
-  res = HTAB_DO (size_ctx_t, namenum2rdn_tab, sc, HTAB_FIND, tab_sc);
+  res = HTAB_DO (size_t, namenum2rdn_tab, rdn, HTAB_FIND, tab_rdn);
  VARR_POP (reg_desc_t, reg_descs);
  return res;
 }
@ -4997,7 +5054,7 @@ void MIR_scan_string (MIR_context_t ctx, const char *str) {
    } else {
      in.name = name;
      if (!HTAB_DO (insn_name_t, insn_name_tab, in, HTAB_FIND, el))
-        process_error (ctx, MIR_syntax_error, "Unknown insn");
+        process_error (ctx, MIR_syntax_error, "Unknown insn %s", name);
      insn_code = el.code;
      for (n = 0; n < VARR_LENGTH (label_name_t, label_names); n++) {
        label = create_label_desc (ctx, VARR_GET (label_name_t, label_names, n));
@ -5043,17 +5100,17 @@ void MIR_scan_string (MIR_context_t ctx, const char *str) {
          } else if ((item = find_item (ctx, name, module)) != NULL) {
            op = MIR_new_ref_op (ctx, item);
          } else {
-            process_error (ctx, MIR_syntax_error, "undeclared name");
+            process_error (ctx, MIR_syntax_error, "undeclared name %s", name);
          }
          break;
        }
        /* Memory, type only, arg, or var */
        type = MIR_str2type (ctx, name);
        if (type == MIR_T_BOUND)
-          process_error (ctx, MIR_syntax_error, "Unknown type");
+          process_error (ctx, MIR_syntax_error, "Unknown type %s", name);
        else if (local_p && type != MIR_T_I64 && type != MIR_T_F && type != MIR_T_D
                 && type != MIR_T_LD)
-          process_error (ctx, MIR_syntax_error, "wrong type for local var");
+          process_error (ctx, MIR_syntax_error, "wrong type %s for local var", name);
        op = MIR_new_mem_op (ctx, type, 0, 0, 0, 1);
        if (proto_p || func_p || local_p) {
          if (t.code == TC_COL) {
@ -5314,8 +5371,8 @@ static void scan_init (MIR_context_t ctx) {
  if ((ctx->scan_ctx = malloc (sizeof (struct scan_ctx))) == NULL)
    (*error_func) (MIR_alloc_error, "Not enough memory for ctx");
  VARR_CREATE (label_name_t, label_names, 0);
-  HTAB_CREATE (label_desc_t, label_desc_tab, 100, label_hash, label_eq);
-  HTAB_CREATE (insn_name_t, insn_name_tab, MIR_INSN_BOUND, insn_name_hash, insn_name_eq);
+  HTAB_CREATE (label_desc_t, label_desc_tab, 100, label_hash, label_eq, NULL);
+  HTAB_CREATE (insn_name_t, insn_name_tab, MIR_INSN_BOUND, insn_name_hash, insn_name_eq, NULL);
  for (i = 0; i < MIR_INSN_BOUND; i++) {
    in.code = i;
    in.name = MIR_insn_name (ctx, i);
@ -5337,10 +5394,10 @@ static void scan_finish (MIR_context_t ctx) {

 #if defined(__x86_64__)
 #include "mir-x86_64.c"
-#elif defined(__PPC64__)
-#include "mir-ppc64.c"
 #elif defined(__aarch64__)
 #include "mir-aarch64.c"
+#elif defined(__PPC64__)
+#include "mir-ppc64.c"
 #else
 #error "undefined or unsupported generation target"
 #endif
--- a/mir/mir.h
+++ b/mir/mir.h
@ -143,6 +143,12 @@ typedef enum {
  REP2 (TYPE_EL, UNDEF, BOUND),
 } MIR_type_t;

+static inline int MIR_int_type_p (MIR_type_t t) {
+  return (MIR_T_I8 <= t && t <= MIR_T_U64) || t == MIR_T_P;
+}
+
+static inline int MIR_fp_type_p (MIR_type_t t) { return MIR_T_F <= t && t <= MIR_T_LD; }
+
 #if UINTPTR_MAX == 0xffffffff
 #define MIR_PTR32 1
 #define MIR_PTR64 0
@ -342,9 +348,9 @@ struct MIR_item {
  union {
    MIR_func_t func;
    MIR_proto_t proto;
-    MIR_name_t import;
-    MIR_name_t export;
-    MIR_name_t forward;
+    MIR_name_t import_id;
+    MIR_name_t export_id;
+    MIR_name_t forward_id;
    MIR_data_t data;
    MIR_ref_data_t ref_data;
    MIR_expr_data_t expr_data;
@ -481,6 +487,8 @@ extern void MIR_insert_insn_before (MIR_context_t ctx, MIR_item_t func, MIR_insn
                                    MIR_insn_t insn);
 extern void MIR_remove_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn);

+extern MIR_insn_code_t MIR_reverse_branch_code (MIR_insn_code_t code);
+
 extern const char *MIR_type_str (MIR_context_t ctx, MIR_type_t tp);
 extern void MIR_output_op (MIR_context_t ctx, FILE *f, MIR_op_t op, MIR_func_t func);
 extern void MIR_output_insn (MIR_context_t ctx, FILE *f, MIR_insn_t insn, MIR_func_t func,
@ -557,7 +565,9 @@ extern MIR_item_t _MIR_builtin_func (MIR_context_t ctx, MIR_module_t module, con
                                     void *addr);

 extern uint8_t *_MIR_publish_code (MIR_context_t ctx, const uint8_t *code, size_t code_len);
-
+extern uint8_t *_MIR_get_new_code_addr (MIR_context_t ctx, size_t size);
+extern uint8_t *_MIR_publish_code_by_addr (MIR_context_t ctx, void *addr, const uint8_t *code,
+                                           size_t code_len);
 struct MIR_code_reloc {
  size_t offset;
  void *value;
@ -565,6 +575,8 @@ struct MIR_code_reloc {

 typedef struct MIR_code_reloc MIR_code_reloc_t;

+extern void _MIR_change_code (MIR_context_t ctx, uint8_t *addr, const uint8_t *code,
+                              size_t code_len);
 extern void _MIR_update_code_arr (MIR_context_t ctx, uint8_t *base, size_t nloc,
                                  const MIR_code_reloc_t *relocs);
 extern void _MIR_update_code (MIR_context_t ctx, uint8_t *base, size_t nloc, ...);
@ -577,7 +589,7 @@ extern void *_MIR_get_bstart_builtin (MIR_context_t ctx);
 extern void *_MIR_get_bend_builtin (MIR_context_t ctx);

 extern void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                               MIR_type_t *arg_types);
+                               MIR_type_t *arg_types, int vararg_p);
 extern void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler);
 extern void *_MIR_get_thunk (MIR_context_t ctx);
 extern void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to);
--- a/ravi-tests/ravi_tests1.ravi
+++ b/ravi-tests/ravi_tests1.ravi
@ -1736,6 +1736,8 @@ compile(x)
 assert(x(2,3) == 2^3)
 print 'Test 77 OK'

+--
+
 -- Test defer statement
 y = 0
 function x()
@ -1836,6 +1838,8 @@ assert(z.count == 0)
 assert(y == 1)
 print 'Test 82 OK'

+--
+
 for k,v in pairs(opcodes_coverage)
 do
  print(k, v)
--- a/ravi-tests/sieve.lua
+++ b/ravi-tests/sieve.lua
@ -1,7 +1,7 @@
 -- Copyright vnmakarov see https://github.com/vnmakarov/mir/issues/2
 local function sieve()
-	local i: integer, k: integer, prime: integer, count: integer
-	local flags: integer[] = table.intarray(8190)
+	local i, k, prime, count
+	local flags = {}

 	for iter=0,100000  do
 		count = 0
@ -21,7 +21,7 @@ local function sieve()
 	return count
 end

-if ravi.jit() then
+if ravi and ravi.jit() then
 	print('JIT ON')
 	ravi.optlevel(2)
 	ravi.compile(sieve, {omitArrayGetRangeCheck=1})
--- a/ravi-tests/sieve.ravi
+++ b/ravi-tests/sieve.ravi
@ -0,0 +1,37 @@
+-- Copyright vnmakarov see https://github.com/vnmakarov/mir/issues/2
+local function sieve()
+	local i: integer, k: integer, prime: integer, count: integer
+	local flags: integer[] = table.intarray(8190)
+
+	for iter=0,100000  do
+		count = 0
+		for i=0,8190 do 
+			flags[i] = 1
+		end
+		for i=0,8190 do
+			if flags[i] == 1 then
+				prime = i + i + 3;
+				for k = i + prime, 8190, prime do
+					flags[k] = 0
+				end
+				count = count + 1
+			end
+		end
+	end
+	return count
+end
+
+if ravi.jit() then
+	print('JIT ON')
+	ravi.optlevel(2)
+	ravi.compile(sieve, {omitArrayGetRangeCheck=1})
+end
+-- ravi.dumplua(sieve)
+-- ravi.dumpir(sieve)
+
+local t1 = os.clock()
+local count = sieve()
+local t2 = os.clock()
+print("time taken ", t2-t1)
+print(count)
+assert(count == 1899)
--- a/readthedocs/ravi-overview.rst
+++ b/readthedocs/ravi-overview.rst
@ -1,9 +1,11 @@
 =========================
 Ravi Programming Language
 =========================
+.. image:: https://travis-ci.org/dibyendumajumdar/ravi.svg?branch=master
+    :target: https://travis-ci.org/dibyendumajumdar/ravi

 Ravi is a derivative/dialect of `Lua 5.3 <http://www.lua.org/>`_ with limited optional static typing and 
-features `LLVM <http://www.llvm.org/>`_ and `Eclipse OMR <https://github.com/dibyendumajumdar/nj>`_ 
+features `MIR <https://github.com/vnmakarov/mir>`_, `LLVM <http://www.llvm.org/>`_ and `Eclipse OMR <https://github.com/dibyendumajumdar/nj>`_ 
 powered JIT compilers. The name Ravi comes from the Sanskrit word for the Sun. 
 Interestingly a precursor to Lua was `Sol <http://www.lua.org/history.html>`_ which had support for 
 static types; Sol means the Sun in Portugese.
@ -31,10 +33,9 @@ Features
 * Optional static typing - for details `see the reference manual <https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-reference.html>`_.
 * Type specific bytecodes to improve performance
 * Compatibility with Lua 5.3 (see Compatibility section below)
+* New! JIT backend `MIR <https://github.com/vnmakarov/mir>`_; only Linux and x86-64 supported for now.
 * `LLVM <http://www.llvm.org/>`_ powered JIT compiler
 * `Eclipse OMR <https://github.com/dibyendumajumdar/nj>`_ powered JIT compiler
-* New (wip) small JIT backend based on `MIR <https://github.com/vnmakarov/mir>`_; only Linux and x86-64 supported.
-* Built-in C pre-processor, parser and JIT compiler
 * A `distribution with batteries <https://github.com/dibyendumajumdar/Suravi>`_.

 Documentation
@ -50,6 +51,7 @@ Lua Goodies
 ===========
 * `An Introduction to Lua <http://the-ravi-programming-language.readthedocs.io/en/latest/lua-introduction.html>`_ attempts to provide a quick overview of Lua for folks coming from other languages.
 * `Lua 5.3 Bytecode Reference <http://the-ravi-programming-language.readthedocs.io/en/latest/lua_bytecode_reference.html>`_ is my attempt to bring up to date the `Lua 5.1 Bytecode Reference <http://luaforge.net/docman/83/98/ANoFrillsIntroToLua51VMInstructions.pdf>`_.
+* A `patch for Lua 5.3 <http://lua-users.org/lists/lua-l/2020-01/msg00004.html>`_ implements the 'defer' statement.

 Compatibility with Lua
 ======================
@ -99,7 +101,7 @@ History
       - New JIT backend `MIR <https://github.com/vnmakarov/mir>`_. 

 * 2020 (Plan)
-       - New optimizing byte code generator based on new parser / type checker
+       - `New optimizing byte code generator based on new parser / type checker <https://github.com/dibyendumajumdar/ravi-compiler>`_
       - Ravi 1.0 release

 License
--- a/src/ldo.c
+++ b/src/ldo.c
@ -681,7 +681,11 @@ static int recover (lua_State *L, int status) {
  if (ci == NULL) return 0;  /* no recovery point */
  /* "finish" luaD_pcall */
  oldtop = restorestack(L, ci->extra);
+#ifdef RAVI_DEFER_STATEMENT
  luaF_close(L, oldtop, status);
+#else
+  luaF_close(L, oldtop);
+#endif
  luaD_seterrorobj(L, status, oldtop);
  L->ci = ci;
  L->allowhook = getoah(ci->callstatus);  /* restore original 'allowhook' */
@ -826,12 +830,18 @@ int luaD_pcall (lua_State *L, Pfunc func, void *u,
  status = luaD_rawrunprotected(L, func, u);
  if (status != LUA_OK) {  /* an error occurred? */
    StkId oldtop = restorestack(L, old_top);
+#ifndef RAVI_DEFER_STATEMENT
+    luaF_close(L, oldtop);  /* close possible pending closures */
+    luaD_seterrorobj(L, status, oldtop);
+#endif
    L->ci = old_ci;
    L->allowhook = old_allowhooks;
    L->nny = old_nny;
+#ifdef RAVI_DEFER_STATEMENT
    status = luaF_close(L, oldtop, status);  /* close possible pending closures */
    oldtop = restorestack(L, old_top);
    luaD_seterrorobj(L, status, oldtop);
+#endif
    luaD_shrinkstack(L);
  }
  L->errfunc = old_errfunc;
--- a/src/lfunc.c
+++ b/src/lfunc.c
@ -68,15 +68,22 @@ UpVal *luaF_findupval (lua_State *L, StkId level) {
  lua_assert(isintwups(L) || L->openupval == NULL);
  while (*pp != NULL && (p = *pp)->v >= level) {
    lua_assert(upisopen(p));
+#ifdef RAVI_DEFER_STATEMENT
    if (p->v == level && !p->flags)  /* found a corresponding upvalue that is not a deferred value? */ {
      return p; /* return it */
    }
+#else
+    if (p->v == level)  /* found a corresponding upvalue? */
+      return p;  /* return it */
+#endif
    pp = &p->u.open.next;
  }
  /* not found: create a new upvalue */
  uv = luaM_new(L, UpVal);
  uv->refcount = 0;
+#ifdef RAVI_DEFER_STATEMENT
  uv->flags = 0;
+#endif
  uv->u.open.next = *pp;  /* link it to list of open upvalues */
  uv->u.open.touched = 1;
  *pp = uv;
@ -88,6 +95,7 @@ UpVal *luaF_findupval (lua_State *L, StkId level) {
  return uv;
 }

+#ifdef RAVI_DEFER_STATEMENT
 static void calldeferred(lua_State *L, void *ud) {
  UNUSED(ud);
  luaD_callnoyield(L, L->top - 2, 0);
@ -167,6 +175,22 @@ int luaF_close (lua_State *L, StkId level, int status) {
  }
  return status;
 }
+#else
+void luaF_close (lua_State *L, StkId level) {
+  UpVal *uv;
+  while (L->openupval != NULL && (uv = L->openupval)->v >= level) {
+    lua_assert(upisopen(uv));
+    L->openupval = uv->u.open.next;  /* remove from 'open' list */
+    if (uv->refcount == 0)  /* no references? */
+      luaM_free(L, uv);  /* free upvalue */
+    else {
+      setobj(L, &uv->u.value, uv->v);  /* move value to upvalue slot */
+      uv->v = &uv->u.value;  /* now current value lives here */
+      luaC_upvalbarrier(L, uv);
+    }
+  }
+}
+#endif


 Proto *luaF_newproto (lua_State *L) {
--- a/src/llex.c
+++ b/src/llex.c
@ -40,7 +40,11 @@
 static const char *const luaX_tokens [] = {
    "and", "break", "do", "else", "elseif",
    "end", "false", "for", "function", "goto", "if",
+#ifdef RAVI_DEFER_STATEMENT
    "in", "local", "defer", "nil", "not", "or", "repeat",
+#else
+    "in", "local", "nil", "not", "or", "repeat",
+#endif
    "return", "then", "true", "until", "while",
    "//", "..", "...", "==", ">=", "<=", "~=",
    "<<", ">>", "::", "<eof>",
--- a/src/lopcodes.c
+++ b/src/lopcodes.c
@ -164,7 +164,9 @@ LUAI_DDEF const char *const luaP_opnames[NUM_OPCODES+1] = {
  "SELF_SK",    /* _SK*/ /* A B C	R(A+1) := R(B); R(A) := R(B)[RK(C)]		*/
  "SETFIELD", /*_SK */ /*	A B C	R(A)[RK(B)] := RK(C), string key  */
  "GETTABUP_SK",
+#ifdef RAVI_DEFER_STATEMENT
  "DEFER",
+#endif
   NULL
 };

@ -306,9 +308,9 @@ LUAI_DDEF const lu_byte luaP_opmodes[NUM_OPCODES] = {
 ,opmode(0, 1, OpArgR, OpArgK, iABC)		/* OP_RAVI_SELF_SK */
 ,opmode(0, 0, OpArgK, OpArgK, iABC)		/* OP_RAVI_SETFIELD */
 ,opmode(0, 1, OpArgU, OpArgK, iABC)		/* OP_RAVI_GETTABUP_SK */
-
+#ifdef RAVI_DEFER_STATEMENT
 ,opmode(0, 1, OpArgN, OpArgN, iABC)		/* OP_RAVI_DEFER */
-
+#endif
 };


--- a/src/lparser.c
+++ b/src/lparser.c
@ -920,6 +920,7 @@ static Proto *addprototype (LexState *ls) {
 ** so that, if it invokes the GC, the GC knows which registers
 ** are in use at that time.
 */
+#ifdef RAVI_DEFER_STATEMENT
 static void codeclosure (LexState *ls, expdesc *v, int deferred) {
  FuncState *fs = ls->fs->prev;
  int pc = -1;
@ -933,6 +934,14 @@ static void codeclosure (LexState *ls, expdesc *v, int deferred) {
  }
  DEBUG_VARS(raviY_printf(ls->fs, "codeclosure -> closure created %e\n", v));
 }
+#else
+static void codeclosure (LexState *ls, expdesc *v) {
+  FuncState *fs = ls->fs->prev;
+  init_exp(v, VRELOCABLE, luaK_codeABx(fs, OP_CLOSURE, 0, fs->np - 1), RAVI_TFUNCTION, NULL);
+  luaK_exp2nextreg(fs, v);  /* fix it at the last register */
+  DEBUG_VARS(raviY_printf(ls->fs, "codeclosure -> closure created %e\n", v));
+}
+#endif


 static void open_func (LexState *ls, FuncState *fs, BlockCnt *bl) {
@ -1290,7 +1299,7 @@ static void parlist (LexState *ls) {
  }
 }

-
+#ifdef RAVI_DEFER_STATEMENT
 static void body (LexState *ls, expdesc *e, int ismethod, int line, int deferred) {
  /* body ->  '(' parlist ')' block END */
  FuncState new_fs;
@ -1313,6 +1322,28 @@ static void body (LexState *ls, expdesc *e, int ismethod, int line, int deferred
  codeclosure(ls, e, deferred);
  close_func(ls);
 }
+#else
+static void body (LexState *ls, expdesc *e, int ismethod, int line) {
+  /* body ->  '(' parlist ')' block END */
+  FuncState new_fs;
+  BlockCnt bl;
+  new_fs.f = addprototype(ls);
+  new_fs.f->linedefined = line;
+  open_func(ls, &new_fs, &bl);
+  checknext(ls, '(');
+  if (ismethod) {
+    new_localvarliteral(ls, "self");  /* create 'self' parameter */
+    adjustlocalvars(ls, 1);
+  }
+  parlist(ls);
+  checknext(ls, ')');
+  statlist(ls);
+  new_fs.f->lastlinedefined = ls->linenumber;
+  check_match(ls, TK_END, TK_FUNCTION, line);
+  codeclosure(ls, e);
+  close_func(ls);
+}
+#endif

 /* parse expression list */
 static int explist (LexState *ls, expdesc *v) {
@ -1602,7 +1633,11 @@ static void simpleexp (LexState *ls, expdesc *v) {
    }
    case TK_FUNCTION: {
      luaX_next(ls);
+#ifdef RAVI_DEFER_STATEMENT
      body(ls, v, 0, ls->linenumber, 0);
+#else
+      body(ls, v, 0, ls->linenumber);
+#endif
      return;
    }
    default: {
@ -2187,6 +2222,7 @@ static void ifstat (LexState *ls, int line) {
 }

 /* parse a local function statement - called from statement() */
+#ifdef RAVI_DEFER_STATEMENT
 static void localfunc (LexState *ls, int defer) {
  expdesc b = {.ravi_type = RAVI_TANY, .pc = -1};
  FuncState *fs = ls->fs;
@ -2203,6 +2239,18 @@ static void localfunc (LexState *ls, int defer) {
  /* debug information will only see the variable after this point! */
  getlocvar(fs, b.u.info)->startpc = fs->pc;
 }
+#else
+static void localfunc (LexState *ls) {
+  expdesc b = {.ravi_type = RAVI_TANY, .pc = -1};
+  FuncState *fs = ls->fs;
+  /* RAVI change - add type */
+  new_localvar(ls, str_checkname(ls), RAVI_TFUNCTION, NULL);  /* new local variable */
+  adjustlocalvars(ls, 1);  /* enter its scope */
+  body(ls, &b, 0, ls->linenumber);  /* function created in next register */
+  /* debug information will only see the variable after this point! */
+  getlocvar(fs, b.u.info)->startpc = fs->pc;
+}
+#endif

 /* parse a local variable declaration statement - called from statement() */
 static void localstat (LexState *ls) {
@ -2262,7 +2310,11 @@ static void funcstat (LexState *ls, int line) {
  luaX_next(ls); /* skip FUNCTION */
  ismethod = funcname(ls, &v);
  DEBUG_VARS(raviY_printf(ls->fs, "funcstat -> declaring function %e\n", &v));
+#ifdef RAVI_DEFER_STATEMENT
  body(ls, &b, ismethod, line, 0);
+#else
+  body(ls, &b, ismethod, line);
+#endif
  luaK_storevar(ls->fs, &v, &b);
  luaK_fixline(ls->fs, line);  /* definition "happens" in the first line */
 }
@ -2355,16 +2407,22 @@ static void statement (LexState *ls) {
    case TK_LOCAL: {  /* stat -> localstat */
      luaX_next(ls);  /* skip LOCAL */
      if (testnext(ls, TK_FUNCTION))  /* local function? */
+#ifdef RAVI_DEFER_STATEMENT
        localfunc(ls, 0);
+#else
+        localfunc(ls);
+#endif
      else
        localstat(ls);
      break;
    }
+#ifdef RAVI_DEFER_STATEMENT
    case TK_DEFER: {  /* stat -> deferstat */
      luaX_next(ls);  /* skip DEFER */
      localfunc(ls, 1);
      break;
    }
+#endif
    case TK_DBCOLON: {  /* stat -> label */
      luaX_next(ls);  /* skip double colon */
      labelstat(ls, str_checkname(ls), line);
--- a/src/lstate.c
+++ b/src/lstate.c
@ -266,7 +266,11 @@ void *ravi_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize)

 static void close_state (lua_State *L) {
  global_State *g = G(L);
+#ifdef RAVI_DEFER_STATEMENT
  luaF_close(L, L->stack, -1);  /* close all upvalues for this thread */
+#else
+  luaF_close(L, L->stack);  /* close all upvalues for this thread */
+#endif
  luaC_freeallobjects(L);  /* collect all objects */
  if (g->version)  /* closing a fully built state? */
    luai_userstateclose(L);
@ -313,7 +317,11 @@ LUA_API lua_State *lua_newthread (lua_State *L) {

 void luaE_freethread (lua_State *L, lua_State *L1) {
  LX *l = fromstate(L1);
+#ifdef RAVI_DEFER_STATEMENT
  luaF_close(L1, L1->stack, -1);  /* close all upvalues for this thread */
+#else
+  luaF_close(L1, L1->stack);  /* close all upvalues for this thread */
+#endif
  lua_assert(L1->openupval == NULL);
  luai_userstatefree(L, L1);
  freestack(L1);
--- a/src/lvm.c
+++ b/src/lvm.c
@ -1025,6 +1025,7 @@ void luaV_finishOp (lua_State *L) {
 ** Execute a jump instruction. The 'updatemask' allows signals to stop
 ** tight loops. (Without it, the local copy of 'mask' could never change.)
 */
+#ifdef RAVI_DEFER_STATEMENT
 #define dojump(ci, i, e)                                         \
  {                                                              \
    int a = GETARG_A(i);                                         \
@ -1033,6 +1034,12 @@ void luaV_finishOp (lua_State *L) {
    pc += GETARG_sBx(i) + e;                                     \
    updatemask(L);                                               \
  }
+#else
+#define dojump(ci,i,e) \
+  { int a = GETARG_A(i); \
+    if (a != 0) luaF_close(L, ci->u.l.base + a - 1); \
+    pc += GETARG_sBx(i) + e; updatemask(L); }
+#endif

 /* for test instructions, execute the jump instruction that follows it */
 #define donextjump(ci)	{ i = *pc; dojump(ci, i, 1); }
@ -1274,7 +1281,9 @@ int luaV_execute (lua_State *L) {
    &&vmlabel(OP_RAVI_SELF_SK),
    &&vmlabel(OP_RAVI_SETFIELD),
    &&vmlabel(OP_RAVI_GETTABUP_SK),
+#ifdef RAVI_DEFER_STATEMENT
    &&vmlabel(OP_RAVI_DEFER),
+#endif
  };
 #endif
  
@ -1717,8 +1726,12 @@ int luaV_execute (lua_State *L) {
          StkId lim = nci->u.l.base + getproto(nfunc)->numparams;
          int aux;
          /* close all upvalues from previous call */
+#ifdef RAVI_DEFER_STATEMENT
          if (cl->p->sizep > 0)
            Protect_base(luaF_close(L, oci->u.l.base, LUA_OK));
+#else
+          if (cl->p->sizep > 0) luaF_close(L, oci->u.l.base);
+#endif
          /* move new frame into old one */
          for (aux = 0; nfunc + aux < lim; aux++)
            setobjs2s(L, ofunc + aux, nfunc + aux);
@ -1735,8 +1748,12 @@ int luaV_execute (lua_State *L) {
      }
      vmcase(OP_RETURN) {
        int b = GETARG_B(i);
+#ifdef RAVI_DEFER_STATEMENT
        if (cl->p->sizep > 0)
          Protect_base(luaF_close(L, base, LUA_OK));
+#else
+        if (cl->p->sizep > 0) luaF_close(L, base);
+#endif
        savepc(L);
        int nres = (b != 0 ? b - 1 : cast_int(L->top - ra));
        b = luaD_poscall(L, ci, ra, nres);
@ -2494,12 +2511,14 @@ int luaV_execute (lua_State *L) {
        }
        vmbreak;
      }
+#ifdef RAVI_DEFER_STATEMENT
      vmcase(OP_RAVI_DEFER) {
        UpVal *up = luaF_findupval(L, ra); /* create new upvalue */
        up->flags = 1;                     /* mark it as deferred */
        setnilvalue(ra);                   /* initialize it with nil */
        vmbreak;
      }
+#endif
    }
  }
 }
@ -3032,6 +3051,7 @@ void raviV_op_totype(lua_State *L, TValue *ra, TValue *rb) {
    luaG_runerror(L, "type mismatch: expected %s", getstr(key));
 }

+#ifdef RAVI_DEFER_STATEMENT
 /*
 ** OP_RAVI_DEFER 
 */
@ -3040,6 +3060,7 @@ void raviV_op_defer(lua_State *L, TValue *ra) {
  up->flags = 1;                     /* mark it as deferred */
  setnilvalue(ra);                   /* initialize it with nil */
 }
+#endif

 /* }================================================================== */

--- a/src/ravi_ast_parse.c
+++ b/src/ravi_ast_parse.c
@ -11,7 +11,7 @@ Copyright (C) 2018-2020 Dibyendu Majumdar

 */

-#include <ravi_ast.h>
+#include "ravi_ast.h"

 /* forward declarations */
 static struct ast_node *parse_expression(struct parser_state *);
@ -24,6 +24,7 @@ static void end_scope(struct parser_state *parser);
 static struct ast_node *new_literal_expression(struct parser_state *parser, ravitype_t type);
 static struct ast_node *generate_label(struct parser_state *parser, TString *label);
 static struct ast_container *new_ast_container(lua_State *L);
+static void add_local_symbol_to_current_scope(struct parser_state *parser, struct lua_symbol *sym);

 static void add_symbol(struct ast_container *container, struct lua_symbol_list **list, struct lua_symbol *sym) {
  ptrlist_add((struct ptr_list **)list, sym, &container->ptrlist_allocator);
@ -118,11 +119,6 @@ static struct lua_symbol *new_local_symbol(struct parser_state *parser, TString
  symbol->symbol_type = SYM_LOCAL;
  symbol->var.block = scope;
  symbol->var.var_name = name;
-  symbol->var.pseudo = NULL;
-  add_symbol(parser->container, &scope->symbol_list, symbol);  // Add to the end of the symbol list
-  add_symbol(parser->container, &scope->function->function_expr.locals, symbol);
-  // Note that Lua allows multiple local declarations of the same name
-  // so a new instance just gets added to the end
  return symbol;
 }

@ -541,6 +537,7 @@ static bool parse_parameter_list(struct parser_state *parser, struct lua_symbol_
                        /* RAVI change - add type */
          struct lua_symbol *symbol = declare_local_variable(parser);
          add_symbol(parser->container, list, symbol);
+          add_local_symbol_to_current_scope(parser, symbol);
          nparams++;
          break;
        }
@ -934,6 +931,13 @@ static struct ast_node *parse_expression(struct parser_state *parser) {
 ** =======================================================================
 */

+static void add_local_symbol_to_current_scope(struct parser_state *parser, struct lua_symbol *sym) {
+  // Note that Lua allows multiple local declarations of the same name
+  // so a new instance just gets added to the end
+  add_symbol(parser->container, &parser->current_scope->symbol_list, sym);
+  add_symbol(parser->container, &parser->current_scope->function->function_expr.locals, sym);
+}
+
 static struct block_scope *parse_block(struct parser_state *parser, struct ast_node_list **statement_list) {
  /* block -> statlist */
  struct block_scope *scope = new_scope(parser);
@ -1039,7 +1043,9 @@ static void parse_forbody(struct parser_state *parser, struct ast_node *stmt, in
 static void parse_fornum_statement(struct parser_state *parser, struct ast_node *stmt, TString *varname, int line) {
  LexState *ls = parser->ls;
  /* fornum -> NAME = exp1,exp1[,exp1] forbody */
-  add_symbol(parser->container, &stmt->for_stmt.symbols, new_local_symbol(parser, varname, RAVI_TANY, NULL));
+  struct lua_symbol *local = new_local_symbol(parser, varname, RAVI_TANY, NULL);
+  add_symbol(parser->container, &stmt->for_stmt.symbols, local);
+  add_local_symbol_to_current_scope(parser, local);
  checknext(ls, '=');
  /* get the type of each expression */
  add_ast_node(parser->container, &stmt->for_stmt.expr_list, parse_expression(parser)); /* initial value */
@ -1057,10 +1063,13 @@ static void parse_for_list(struct parser_state *parser, struct ast_node *stmt, T
  /* forlist -> NAME {,NAME} IN explist forbody */
  int nvars = 4; /* gen, state, control, plus at least one declared var */
  /* create declared variables */
-  add_symbol(parser->container, &stmt->for_stmt.symbols, new_local_symbol(parser, indexname, RAVI_TANY, NULL));
+  struct lua_symbol *local = new_local_symbol(parser, indexname, RAVI_TANY, NULL);
+  add_symbol(parser->container, &stmt->for_stmt.symbols, local);
+  add_local_symbol_to_current_scope(parser, local);
  while (testnext(ls, ',')) {
-    add_symbol(parser->container, &stmt->for_stmt.symbols,
-               new_local_symbol(parser, check_name_and_next(ls), RAVI_TANY, NULL));
+    local = new_local_symbol(parser, check_name_and_next(ls), RAVI_TANY, NULL);
+    add_symbol(parser->container, &stmt->for_stmt.symbols, local);
+    add_local_symbol_to_current_scope(parser, local);
    nvars++;
  }
  checknext(ls, TK_IN);
@ -1157,6 +1166,8 @@ static struct ast_node *parse_local_function_statement(struct parser_state *pars
  LexState *ls = parser->ls;
  struct lua_symbol *symbol =
      new_local_symbol(parser, check_name_and_next(ls), RAVI_TFUNCTION, NULL); /* new local variable */
+  /* local function f ... is parsed as local f; f = function ... */
+  add_local_symbol_to_current_scope(parser, symbol);
  struct ast_node *function_ast = new_function(parser);
  parse_function_body(parser, function_ast, 0, ls->linenumber); /* function created in next register */
  end_function(parser);
@ -1191,6 +1202,10 @@ static struct ast_node *parse_local_statement(struct parser_state *parser) {
    /* nexps = 0; */
    ;
  }
+  /* local symbols are only added to scope at the end of the local statement */
+  struct lua_symbol *sym = NULL;
+  FOR_EACH_PTR(node->local_stmt.var_list, sym) { add_local_symbol_to_current_scope(parser, sym); }
+  END_FOR_EACH_PTR(sym);
  return node;
 }

@ -1651,7 +1666,6 @@ static struct ast_container *new_ast_container(lua_State *L) {
  dmrC_allocator_init(&container->symbol_allocator, "symbols", sizeof(struct lua_symbol), sizeof(double), CHUNK);
  container->main_function = NULL;
  container->killed = false;
-  container->linearizer = NULL;
  luaL_getmetatable(L, AST_type);
  lua_setmetatable(L, -2);
  return container;
@ -1661,10 +1675,6 @@ static struct ast_container *new_ast_container(lua_State *L) {
 static int collect_ast_container(lua_State *L) {
  struct ast_container *container = check_Ravi_AST(L, 1);
  if (!container->killed) {
-    if (container->linearizer) {
-      raviA_destroy_linearizer(container->linearizer);
-      free(container->linearizer);
-    }
    dmrC_allocator_destroy(&container->symbol_allocator);
    dmrC_allocator_destroy(&container->block_scope_allocator);
    dmrC_allocator_destroy(&container->ast_node_allocator);
@ -1674,37 +1684,8 @@ static int collect_ast_container(lua_State *L) {
  return 0;
 }

-static int ast_linearize(lua_State *L) {
-  struct ast_container *container = check_Ravi_AST(L, 1);
-  if (container->linearizer) {
-    luaL_error(L, "Already linearized");
-  }
-  struct linearizer *linearizer = (struct linearizer *)calloc(1, sizeof(struct linearizer));
-  raviA_init_linearizer(linearizer, container);
-  container->linearizer = linearizer;
-  raviA_ast_linearize(container->linearizer);
-  return 0;
-}
-
-static int ast_show_linearized(lua_State *L) {
-  struct ast_container *container = check_Ravi_AST(L, 1);
-  if (!container->linearizer) {
-    luaL_error(L, "Not yet linearized");
-    return 0;
-  }
-  membuff_t mb;
-  membuff_init(&mb, 1024);
-  raviA_show_linearizer(container->linearizer, &mb);
-  lua_pushstring(L, mb.buf);
-  membuff_free(&mb);
-  return 1;
-}
-
-static const luaL_Reg container_methods[] = {{"tostring", ast_container_to_string},
-                                             {"release", collect_ast_container},
-                                             {"linearize", ast_linearize},
-                                             {"showlinear", ast_show_linearized},
-                                             {NULL, NULL}};
+static const luaL_Reg container_methods[] = {
+    {"tostring", ast_container_to_string}, {"release", collect_ast_container}, {NULL, NULL}};

 static const luaL_Reg astlib[] = {
    /* Entrypoint for new AST */
--- a/src/ravi_jitshared.c
+++ b/src/ravi_jitshared.c
@ -517,8 +517,12 @@ static const char Lua_header[] =
    "};\n"
    "struct UpVal {\n"
    "	TValue *v;\n"
+#ifdef RAVI_DEFER_STATEMENT
    "       unsigned int refcount;\n"
    "       unsigned int flags;\n"
+#else
+    "	lu_mem refcount;\n"
+#endif
    "	union {\n"
    "		struct {\n"
    "			UpVal *next;\n"
@ -547,7 +551,11 @@ static const char Lua_header[] =
    "  (ttisinteger(o) ? (*(i) = ivalue(o), 1) : luaV_tointeger(o,i,LUA_FLOORN2I))\n"
    "extern int luaV_tonumber_(const TValue *obj, lua_Number *n);\n"
    "extern int luaV_tointeger(const TValue *obj, lua_Integer *p, int mode);\n"
+#ifdef RAVI_DEFER_STATEMENT
    "extern int luaF_close (lua_State *L, StkId level, int status);\n"
+#else
+    "extern void luaF_close (lua_State *L, StkId level);\n"
+#endif
    "extern int luaD_poscall (lua_State *L, CallInfo *ci, StkId firstResult, int nres);\n"
    "extern int luaV_equalobj(lua_State *L, const TValue *t1, const TValue *t2);\n"
    "extern int luaV_lessthan(lua_State *L, const TValue *l, const TValue *r);\n"
@ -583,7 +591,9 @@ static const char Lua_header[] =
    "extern void raviV_settable_sskey(lua_State *L, const TValue *t, TValue *key, TValue *val);\n"
    "extern void raviV_gettable_i(lua_State *L, const TValue *t, TValue *key, TValue *val);\n"
    "extern void raviV_settable_i(lua_State *L, const TValue *t, TValue *key, TValue *val);\n"
+#ifdef RAVI_DEFER_STATEMENT
    "extern void raviV_op_defer(lua_State *L, TValue *ra);\n"
+#endif
    "extern lua_Integer luaV_shiftl(lua_Integer x, lua_Integer y);\n"
    "extern void ravi_dump_value(lua_State *L, const struct lua_TValue *v);\n"
    "extern void raviV_op_bnot(lua_State *L, TValue *ra, TValue *rb);\n"
@ -967,6 +977,25 @@ static void emit_comparison(struct function *fn, int A, int B, int C, int j, int
        membuff_add_fstring(&fn->body, "result = (fltvalue(rb) %s fltvalue(rc));\n", oper);
      }
      break;
+    case OP_LT:
+      oper = "<";
+      goto Lemit;
+    case OP_LE:
+      oper = "<=";
+    Lemit:
+      emit_reg_or_k(fn, "rb", B);
+      emit_reg_or_k(fn, "rc", C);
+      membuff_add_string(&fn->body, "if (ttisinteger(rb) && ttisinteger(rc))\n");
+      membuff_add_fstring(&fn->body, "  result = (ivalue(rb) %s ivalue(rc));\n", oper);
+      membuff_add_string(&fn->body, "else {\n");
+      emit_update_savedpc(fn, pc);
+      membuff_add_fstring(&fn->body, "  result = %s(L, rb, rc);\n", compfunc);
+      // Reload pointer to base as the call to luaV_equalobj() may
+      // have invoked a Lua function and as a result the stack may have
+      // been reallocated - so the previous base pointer could be stale
+      membuff_add_string(&fn->body, "  base = ci->u.l.base;\n");
+      membuff_add_string(&fn->body, "}\n");
+      break;
    default:
      emit_reg_or_k(fn, "rb", B);
      emit_reg_or_k(fn, "rc", C);
@ -981,8 +1010,12 @@ static void emit_comparison(struct function *fn, int A, int B, int C, int j, int
  membuff_add_fstring(&fn->body, "if (result == %d) {\n", A);
  if (jA > 0) {
    membuff_add_fstring(&fn->body, " ra = R(%d);\n", jA - 1);
+#ifdef RAVI_DEFER_STATEMENT
    membuff_add_string(&fn->body, " luaF_close(L, ra, LUA_OK);\n");
    membuff_add_string(&fn->body, " base = ci->u.l.base;\n");
+#else
+    membuff_add_string(&fn->body, " luaF_close(L, ra);\n");
+#endif
  }
  membuff_add_fstring(&fn->body, "  goto Lbc_%d;\n", j);
  membuff_add_string(&fn->body, "}\n");
@ -1021,9 +1054,13 @@ static void emit_op_loadk(struct function *fn, int A, int Bx, int pc) {
 static void emit_op_return(struct function *fn, int A, int B, int pc) {
  (void)pc;
  emit_reg(fn, "ra", A);
+#ifdef RAVI_DEFER_STATEMENT
  membuff_add_string(&fn->body, "if (cl->p->sizep > 0) {\n luaF_close(L, base, LUA_OK);\n");
  membuff_add_string(&fn->body, " base = ci->u.l.base;\n");
  membuff_add_string(&fn->body, "}\n");
+#else
+  membuff_add_string(&fn->body, "if (cl->p->sizep > 0) luaF_close(L, base);\n");
+#endif
  membuff_add_fstring(&fn->body, "result = (%d != 0 ? %d - 1 : cast_int(L->top - ra));\n", B, B);
  membuff_add_string(&fn->body, "return luaD_poscall(L, ci, ra, result);\n");
 }
@ -1048,8 +1085,12 @@ static void emit_op_jmp(struct function *fn, int A, int sBx, int pc) {
  (void)pc;
  if (A > 0) {
    membuff_add_fstring(&fn->body, "ra = R(%d);\n", A - 1);
+#ifdef RAVI_DEFER_STATEMENT
    membuff_add_string(&fn->body, "luaF_close(L, ra, LUA_OK);\n");
    membuff_add_string(&fn->body, "base = ci->u.l.base;\n");
+#else
+    membuff_add_string(&fn->body, "luaF_close(L, ra);\n");
+#endif
  }
  membuff_add_fstring(&fn->body, "goto Lbc_%d;\n", sBx);
 }
@ -1077,8 +1118,12 @@ static void emit_op_test(struct function *fn, int A, int B, int C, int j, int jA
  membuff_add_fstring(&fn->body, "if (!result) {\n", A);
  if (jA > 0) {
    membuff_add_fstring(&fn->body, " ra = R(%d);\n", jA - 1);
+#ifdef RAVI_DEFER_STATEMENT
    membuff_add_string(&fn->body, " luaF_close(L, ra, LUA_OK);\n");
    membuff_add_string(&fn->body, " base = ci->u.l.base;\n");
+#else
+    membuff_add_string(&fn->body, " luaF_close(L, ra);\n");
+#endif
  }
  membuff_add_fstring(&fn->body, "  goto Lbc_%d;\n", j);
  membuff_add_string(&fn->body, " }\n");
@ -1098,8 +1143,12 @@ static void emit_op_testset(struct function *fn, int A, int B, int C, int j, int
  membuff_add_string(&fn->body, "  setobjs2s(L, ra, rb);");
  if (jA > 0) {
    membuff_add_fstring(&fn->body, " ra = R(%d);\n", jA - 1);
+#ifdef RAVI_DEFER_STATEMENT
    membuff_add_string(&fn->body, " luaF_close(L, ra, LUA_OK);\n");
    membuff_add_string(&fn->body, " base = ci->u.l.base;\n");
+#else
+    membuff_add_string(&fn->body, " luaF_close(L, ra);\n");
+#endif
  }
  membuff_add_fstring(&fn->body, "  goto Lbc_%d;\n", j);
  membuff_add_string(&fn->body, " }\n");
@ -1303,9 +1352,21 @@ static void emit_binary_op(struct function *fn, int A, int B, int C, OpCode op,
 void emit_ff_op(struct function *fn, int A, int B, int C, int pc, const char *op) {
  (void)pc;
  emit_reg(fn, "ra", A);
-  emit_reg_or_k(fn, "rb", B);
-  emit_reg_or_k(fn, "rc", C);
-  membuff_add_fstring(&fn->body, "setfltvalue(ra, fltvalue(rb) %s fltvalue(rc));\n", op);
+  if (ISK(B)) {
+    TValue *Konst1 = &fn->p->k[INDEXK(B)];
+    emit_reg_or_k(fn, "rc", C);
+    membuff_add_fstring(&fn->body, "setfltvalue(ra, %.17g %s fltvalue(rc));\n", Konst1->value_.n, op);
+  }
+  else if (ISK(C)) {
+    TValue *Konst1 = &fn->p->k[INDEXK(C)];
+    emit_reg_or_k(fn, "rb", B);
+    membuff_add_fstring(&fn->body, "setfltvalue(ra, fltvalue(rb) %s %.17g);\n", op, Konst1->value_.n);
+  }
+  else {
+    emit_reg_or_k(fn, "rb", B);
+    emit_reg_or_k(fn, "rc", C);
+    membuff_add_fstring(&fn->body, "setfltvalue(ra, fltvalue(rb) %s fltvalue(rc));\n", op);
+  }
 }

 static void emit_fi_op(struct function *fn, int A, int B, int C, int pc, const char *op) {
@ -1327,9 +1388,21 @@ static void emit_if_op(struct function *fn, int A, int B, int C, int pc, const c
 static void emit_ii_op(struct function *fn, int A, int B, int C, int pc, const char *op) {
  (void)pc;
  emit_reg(fn, "ra", A);
-  emit_reg_or_k(fn, "rb", B);
-  emit_reg_or_k(fn, "rc", C);
-  membuff_add_fstring(&fn->body, "setivalue(ra, ivalue(rb) %s ivalue(rc));\n", op);
+  if (ISK(B)) {
+    TValue *Konst1 = &fn->p->k[INDEXK(B)];
+    emit_reg_or_k(fn, "rc", C);
+    membuff_add_fstring(&fn->body, "setivalue(ra, %lld %s ivalue(rc));\n", Konst1->value_.i, op);
+  }
+  else if (ISK(C)) {
+    TValue *Konst1 = &fn->p->k[INDEXK(C)];
+    emit_reg_or_k(fn, "rb", B);
+    membuff_add_fstring(&fn->body, "setivalue(ra, ivalue(rb) %s %lld);\n", op, Konst1->value_.i);
+  }
+  else {
+    emit_reg_or_k(fn, "rb", B);
+    emit_reg_or_k(fn, "rc", C);
+    membuff_add_fstring(&fn->body, "setivalue(ra, ivalue(rb) %s ivalue(rc));\n", op);
+  }
 }

 static void emit_op_divii(struct function *fn, int A, int B, int C, int pc) {
@ -1342,11 +1415,13 @@ static void emit_op_divii(struct function *fn, int A, int B, int C, int pc) {
                     "(lua_Number)(ivalue(rc)));\n");
 }

+#ifdef RAVI_DEFER_STATEMENT
 static void emit_op_defer(struct function *fn, int A, int pc) {
  (void)pc;
  emit_reg(fn, "ra", A);
  membuff_add_string(&fn->body, "raviV_op_defer(L, ra);\n");
 }
+#endif

 static void emit_op_loadfz(struct function *fn, int A, int pc) {
  (void)pc;
@ -2210,9 +2285,11 @@ bool raviJ_codegen(struct lua_State *L, struct Proto *p, struct ravi_compile_opt
        int B = GETARG_B(i);
        emit_op_len(&fn, A, B, pc);
      } break;
+#ifdef RAVI_DEFER_STATEMENT
      case OP_RAVI_DEFER: {
        emit_op_defer(&fn, A, pc);
      } break;
+#endif
      case OP_RAVI_SHR_II:
      case OP_RAVI_SHL_II:
      case OP_RAVI_BXOR_II:
--- a/src/ravi_llvmcall.cpp
+++ b/src/ravi_llvmcall.cpp
@ -51,10 +51,14 @@ void RaviCodeGenerator::emit_JMP(RaviFunctionDef *def, int A, int sBx, int pc) {
    emit_load_base(def);
    // base + a - 1
    llvm::Value *val = emit_gep_register(def, A - 1);
+#ifdef RAVI_DEFER_STATEMENT
    if (!traced)
      emit_update_savedpc(def, pc);
    // Call luaF_close
    CreateCall3(def->builder, def->luaF_closeF, def->L, val, def->types->kInt[LUA_OK]);
+#else
+    CreateCall2(def->builder, def->luaF_closeF, def->L, val);
+#endif
  }

  // Do the actual jump
@ -173,11 +177,13 @@ void RaviCodeGenerator::emit_CALL(RaviFunctionDef *def, int A, int B, int C,
  def->builder->SetInsertPoint(end_block);
 }

+#ifdef RAVI_DEFER_STATEMENT
 void RaviCodeGenerator::emit_DEFER(RaviFunctionDef *def, int A, int pc) {
  emit_debug_trace(def, OP_RAVI_DEFER, pc);
  emit_load_base(def);
  llvm::Value *ra = emit_gep_register(def, A);
  CreateCall2(def->builder, def->raviV_op_deferF, def->L, ra);
 }
+#endif

 }
--- a/src/ravi_llvmcodegen.cpp
+++ b/src/ravi_llvmcodegen.cpp
@ -1204,10 +1204,11 @@ void RaviCodeGenerator::emit_extern_declarations(RaviFunctionDef *def) {
  def->raviV_op_totypeF = def->raviF->addExternFunction(
      def->types->raviV_op_totypeT, reinterpret_cast<void *>(&raviV_op_totype),
      "raviV_op_totype");
+#ifdef RAVI_DEFER_STATEMENT
  def->raviV_op_deferF = def->raviF->addExternFunction(
 	  def->types->raviV_op_deferT, reinterpret_cast<void *>(&raviV_op_defer), 
 	  "raviV_op_defer");
-
+#endif
 #if 0
  // DEBUG routines
  def->ravi_dump_valueF = def->raviF->addExternFunction(
@ -1337,7 +1338,11 @@ llvm::Value *RaviCodeGenerator::emit_gep_upval_v(RaviFunctionDef *def,
 // Get &upval->value -> result is TValue *
 llvm::Value *RaviCodeGenerator::emit_gep_upval_value(
    RaviFunctionDef *def, llvm::Instruction *pupval) {
+#ifdef RAVI_DEFER_STATEMENT
  return emit_gep(def, "value", pupval, 0, 3);
+#else
+  return emit_gep(def, "value", pupval, 0, 2);
+#endif
 }

 // Alternative code generator uses dmrC based C front-end
@ -2022,9 +2027,11 @@ bool RaviCodeGenerator::compile(lua_State *L, Proto *p,
        int B = GETARG_B(i);
        emit_UNM(def, A, B, pc);
      } break;
+#ifdef RAVI_DEFER_STATEMENT
      case OP_RAVI_DEFER: {
        emit_DEFER(def, A, pc);
      } break;
+#endif
      default: {
        fprintf(stderr, "Unexpected bytecode %d\n", op);
        abort();
--- a/src/ravi_llvmcomp.cpp
+++ b/src/ravi_llvmcomp.cpp
@ -131,10 +131,14 @@ void RaviCodeGenerator::emit_EQ(RaviFunctionDef *def, int A, int B, int C,

    // base + a - 1
    llvm::Value *val = emit_gep_register(def, jA - 1);
+#ifdef RAVI_DEFER_STATEMENT
    if (!traced)
      emit_update_savedpc(def, pc);
    // Call luaF_close
    CreateCall3(def->builder, def->luaF_closeF, def->L, val, def->types->kInt[LUA_OK]);
+#else
+    CreateCall2(def->builder, def->luaF_closeF, def->L, val);
+#endif
  }
  // Do the jump
  def->builder->CreateBr(def->jmp_targets[j].jmp1);
@ -238,10 +242,14 @@ void RaviCodeGenerator::emit_TEST(RaviFunctionDef *def, int A, int B, int C,

    // base + a - 1
    llvm::Value *val = emit_gep_register(def, jA - 1);
+#ifdef RAVI_DEFER_STATEMENT
    if (!traced)
      emit_update_savedpc(def, pc);
    // Call luaF_close
    CreateCall3(def->builder, def->luaF_closeF, def->L, val, def->types->kInt[LUA_OK]);
+#else
+    CreateCall2(def->builder, def->luaF_closeF, def->L, val);
+#endif
  }
  // Do the jump
  def->builder->CreateBr(def->jmp_targets[j].jmp1);
@ -311,10 +319,14 @@ void RaviCodeGenerator::emit_TESTSET(RaviFunctionDef *def, int A, int B, int C,

    // base + a - 1
    llvm::Value *val = emit_gep_register(def, jA - 1);
+#ifdef RAVI_DEFER_STATEMENT
    if (!traced)
      emit_update_savedpc(def, pc);
    // Call luaF_close
    CreateCall3(def->builder, def->luaF_closeF, def->L, val, def->types->kInt[LUA_OK]);
+#else
+    CreateCall2(def->builder, def->luaF_closeF, def->L, val);
+#endif
  }
  // Do the jump
  def->builder->CreateBr(def->jmp_targets[j].jmp1);
--- a/src/ravi_llvmreturn.cpp
+++ b/src/ravi_llvmreturn.cpp
@ -56,6 +56,14 @@ void RaviCodeGenerator::emit_RETURN(RaviFunctionDef *def, int A, int B,

  bool traced = emit_debug_trace(def, OP_RETURN, pc);

+#ifndef RAVI_DEFER_STATEMENT
+  // Load pointer to base
+  emit_load_base(def);
+
+  // Get pointer to register A
+  llvm::Value *ra_ptr = emit_gep_register(def, A);
+#endif
+
  // if (cl->p->sizep > 0) luaF_close(L, base);
  // Get pointer to Proto->sizep
  llvm::Instruction *psize = emit_load_proto_sizep(def);
@ -70,22 +78,28 @@ void RaviCodeGenerator::emit_RETURN(RaviFunctionDef *def, int A, int B,
  def->builder->CreateCondBr(psize_gt_0, then_block, else_block);
  def->builder->SetInsertPoint(then_block);

+#ifdef RAVI_DEFER_STATEMENT
  // Load pointer to base
  emit_load_base(def);
  // Get pointer to register A
  llvm::Value *ra_ptr = emit_gep_register(def, A);
  if (!traced)
    emit_update_savedpc(def, pc);
+#endif
  // Call luaF_close
+#ifdef RAVI_DEFER_STATEMENT
  CreateCall3(def->builder, def->luaF_closeF, def->L, def->base_ptr, def->types->kInt[LUA_OK]);
+#else
+  CreateCall2(def->builder, def->luaF_closeF, def->L, def->base_ptr);
+#endif
  def->builder->CreateBr(else_block);

  def->f->getBasicBlockList().push_back(else_block);
  def->builder->SetInsertPoint(else_block);
-
+#ifdef RAVI_DEFER_STATEMENT
  emit_load_base(def);                 // As luaF_close() may have changed the stack
  ra_ptr = emit_gep_register(def, A);  // load RA
-
+#endif
  //*  b = luaD_poscall(L, ra, (b != 0 ? b - 1 : L->top - ra));
  llvm::Value *nresults = NULL;
  if (B != 0)
--- a/src/ravi_llvmtypes.cpp
+++ b/src/ravi_llvmtypes.cpp
@ -694,8 +694,12 @@ LuaLLVMTypes::LuaLLVMTypes(llvm::LLVMContext &context) : mdbuilder(context) {

  // struct UpVal {
  //  struct TValue *v;  /* points to stack or to its own value */
+#ifdef RAVI_DEFER_STATEMENT
  //  unsigned int refcount;  /* reference counter */
  //  unsigned int flags; /* Used to mark deferred values */
+#else
+  //  unsigned long long refcount;  /* reference counter */
+#endif
  //  union {
  //    struct {  /* (when open) */
  //      struct UpVal *next;  /* linked list */
@ -706,8 +710,12 @@ LuaLLVMTypes::LuaLLVMTypes(llvm::LLVMContext &context) : mdbuilder(context) {
  //};
  elements.clear();
  elements.push_back(pTValueT);
+#ifdef RAVI_DEFER_STATEMENT
  elements.push_back(C_intT);
  elements.push_back(C_intT);
+#else
+  elements.push_back(C_size_t);
+#endif
  elements.push_back(TValueT);
  UpValT->setBody(elements);

@ -747,22 +755,31 @@ LuaLLVMTypes::LuaLLVMTypes(llvm::LLVMContext &context) : mdbuilder(context) {
  elements.push_back(plua_StateT);
  luaV_executeT = llvm::FunctionType::get(C_intT, elements, false);

+#ifdef RAVI_DEFER_STATEMENT
  // int luaF_close (lua_State *L, StkId level, int status)
+#else
+  // void luaF_close (lua_State *L, StkId level)
+#endif
  elements.clear();
  elements.push_back(plua_StateT);
  elements.push_back(StkIdT);
+#ifdef RAVI_DEFER_STATEMENT
  elements.push_back(C_intT);
  luaF_closeT =
      llvm::FunctionType::get(C_intT, elements, false);
+#else
+  luaF_closeT =
+      llvm::FunctionType::get(llvm::Type::getVoidTy(context), elements, false);
+#endif

  // int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2)
  elements.clear();
  elements.push_back(plua_StateT);
  elements.push_back(pTValueT);
-
+#ifdef RAVI_DEFER_STATEMENT
  // void raviV_op_defer(lua_State *L, TValue *ra);
  raviV_op_deferT = llvm::FunctionType::get(llvm::Type::getVoidTy(context), elements, false);
-
+#endif
  elements.push_back(pTValueT);
  luaV_equalobjT = llvm::FunctionType::get(C_intT, elements, false);

@ -1266,8 +1283,12 @@ LuaLLVMTypes::LuaLLVMTypes(llvm::LLVMContext &context) : mdbuilder(context) {

  nodes.clear();
  nodes.push_back(std::pair<llvm::MDNode *, uint64_t>(tbaa_pointerT, 0));
+#ifdef RAVI_DEFER_STATEMENT
  nodes.push_back(std::pair<llvm::MDNode *, uint64_t>(tbaa_intT, 4));
  nodes.push_back(std::pair<llvm::MDNode *, uint64_t>(tbaa_intT, 4));
+#else
+  nodes.push_back(std::pair<llvm::MDNode *, uint64_t>(tbaa_longlongT, 8));
+#endif
  nodes.push_back(std::pair<llvm::MDNode *, uint64_t>(tbaa_TValueT, 16));
  tbaa_UpValT = mdbuilder.createTBAAStructTypeNode("UpVal", nodes);
  tbaa_UpVal_vT =
--- a/src/ravi_mirjit.c
+++ b/src/ravi_mirjit.c
@ -111,7 +111,9 @@ static LuaFunc Lua_functions[] = {
    { "raviV_settable_sskey", raviV_settable_sskey },
    { "raviV_gettable_i", raviV_gettable_i },
    { "raviV_settable_i", raviV_settable_i },
+#ifdef RAVI_DEFER_STATEMENT
    { "raviV_op_defer", raviV_op_defer },
+#endif
    { "raviV_op_bnot", raviV_op_bnot},

    { "lua_absindex", lua_absindex },
@ -404,6 +406,7 @@ void *MIR_compile_C_module(
    }
    MIR_load_module (ctx, module);
    MIR_gen_init (ctx);
+    MIR_gen_set_optimize_level(ctx, 3);
    MIR_link (ctx, MIR_set_gen_interface, Import_resolver_func);
    fun_addr = MIR_gen (ctx, main_func);
    MIR_gen_finish (ctx);
--- a/src/ravi_omrjit.c
+++ b/src/ravi_omrjit.c
@ -319,8 +319,10 @@ int raviV_initjit(struct lua_State *L) {
  register_builtin_arg2(jit->jit, "lua_setmetatable", lua_setmetatable, JIT_Int32, JIT_Address, JIT_Int32);
  //LUA_API void  (lua_setuservalue)(lua_State *L, int idx);
  register_builtin_arg2(jit->jit, "lua_setuservalue", lua_setuservalue, JIT_NoType, JIT_Address, JIT_Int32);
+#ifdef RAVI_DEFER_STATEMENT
  //LUA_API void raviV_op_defer(lua_State *L, TValue *ra);
  register_builtin_arg2(jit->jit, "raviV_op_defer", raviV_op_defer, JIT_NoType, JIT_Address, JIT_Address);
+#endif
  //LUAI_FUNC lua_Integer luaV_shiftl (lua_Integer x, lua_Integer y);
  register_builtin_arg2(jit->jit, "luaV_shiftl", luaV_shiftl, JIT_Int64, JIT_Int64, JIT_Int64);
  // extern void raviV_op_bnot(lua_State *L, TValue *ra, TValue *rb);