changed types to strict (nil disallowed) in lcode.c

Fixed possibility to initialize empty strings (and booleans)
added missing optional types opcodes to JIT interface
299 changed files with 59190 additions and 21056 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@ -0,0 +1,7 @@
+# [Choice] Debian / Ubuntu version: debian-10, debian-9, ubuntu-20.04, ubuntu-18.04
+ARG VARIANT=buster
+FROM mcr.microsoft.com/vscode/devcontainers/cpp:dev-${VARIANT}
+
+# [Optional] Uncomment this section to install additional packages.
+# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+#     && apt-get -y install --no-install-recommends <your-package-list-here>
--- a/.devcontainer/base.Dockerfile
+++ b/.devcontainer/base.Dockerfile
@ -0,0 +1,12 @@
+# [Choice] Debian / Ubuntu version: debian-10, debian-9, ubuntu-20.04, ubuntu-18.04
+ARG VARIANT=buster
+FROM mcr.microsoft.com/vscode/devcontainers/base:${VARIANT}
+
+# Install needed packages. Use a separate RUN statement to add your own dependencies.
+RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+    && apt-get -y install build-essential cmake cppcheck valgrind clang lldb llvm gdb \
+    && apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
+
+# [Optional] Uncomment this section to install additional OS packages.
+# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+#     && apt-get -y install --no-install-recommends <your-package-list-here>
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -0,0 +1,28 @@
+{
+	"name": "C++",
+	"build": {
+		"dockerfile": "Dockerfile",
+		// Update 'VARIANT' to pick an Debian / Ubuntu OS version: debian-10, debian-9, ubuntu-20.04, ubuntu-18.04
+		"args": { "VARIANT": "debian-10" }
+	},
+	"runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"],
+
+	// Set *default* container specific settings.json values on container create.
+	"settings": { 
+		"terminal.integrated.shell.linux": "/bin/bash"
+	},
+
+	// Add the IDs of extensions you want installed when the container is created.
+	"extensions": [
+		"ms-vscode.cpptools"
+	],
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Use 'postCreateCommand' to run commands after the container is created.
+	// "postCreateCommand": "gcc -v",
+
+	// Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
+	"remoteUser": "vscode"
+}
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -12,8 +12,11 @@ env:
  
 jobs:
  build:
-
-    runs-on: ubuntu-latest
+    
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
    
    steps:
    - uses: actions/checkout@v2
--- a/.travis.yml
+++ b/.travis.yml
@ -3,23 +3,17 @@ os:
 - linux
 arch:
 - amd64
+ - arm64
 compiler:
 - gcc
 cache: ccache
-dist: bionic
+dist: focal
 addons:
  apt:
    packages:
-      - g++
      - gcc
      - ccache
-install:
- - curl https://releases.llvm.org/6.0.1/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJf -
 script:
- - mkdir $TRAVIS_BUILD_DIR/build
- - cd $TRAVIS_BUILD_DIR/build && cmake -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=$TRAVIS_BUILD_DIR/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04/lib/cmake/llvm -G "Unix Makefiles" -DLLVM_JIT=ON ..
- - cd $TRAVIS_BUILD_DIR/build && make
- - cd $TRAVIS_BUILD_DIR/tests && sh ./run_tests.sh $TRAVIS_BUILD_DIR/build/ravi
 - mkdir $TRAVIS_BUILD_DIR/buildmir
 - cd $TRAVIS_BUILD_DIR/buildmir && cmake -DCMAKE_BUILD_TYPE=Debug -DLTESTS=ON -DMIR_JIT=ON -G "Unix Makefiles" ..
 - cd $TRAVIS_BUILD_DIR/buildmir && make
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,29 +1,22 @@
 cmake_minimum_required(VERSION 3.12)
-project(Ravi VERSION 1.0.4 LANGUAGES C CXX)
+project(Ravi VERSION 1.0.4 LANGUAGES C)

-# By default on platforms where MIR is supported (right now Linux X86_64)
-# MIR JIT backend is automatically enabled. To disable user must specify
+# By default MIR JIT backend is automatically enabled. To disable user must specify
 # NO_JIT=ON
-# LLVM has to be explicitly specified using LLVM_JIT=ON

-option(LLVM_JIT "Controls whether LLVM JIT compilation will be enabled, default is OFF" OFF)
 option(NO_JIT "Controls whether JIT should be disabled, default is OFF" OFF)
 option(STATIC_BUILD "Build static version of Ravi, default is OFF" OFF)
 option(COMPUTED_GOTO "Controls whether the interpreter switch will use computed gotos on gcc/clang, default is ON" ON)
 option(LTESTS "Controls whether ltests are enabled in Debug mode; note requires Debug build" ON)
+option(ASAN "Controls whether address sanitizer should be enabled" OFF)
+option(RAVICOMP "Controls whether to link in RaviComp" ON)

-# By default on non-Windows platforms we enable MIR JIT
-if (NOT WIN32
-        AND NOT LLVM_JIT
-        AND NOT NO_JIT)
+# By we enable MIR JIT
+if (NOT NO_JIT)
    set(MIR_JIT ON)
 endif ()

 if (MIR_JIT)
-    if (MSVC OR WIN32)
-        message(FATAL_ERROR "MIR_JIT is not supported when using MSVC and/or WIN32")
-    endif ()
-    set(LLVM_JIT OFF)
    set(STATIC_BUILD OFF) # Because we need to expose the symbols in the library
 endif ()

@ -51,25 +44,10 @@ set(LUA_CORE_SRCS src/lapi.c src/lcode.c src/lctype.c src/ldebug.c src/ldo.c src
 set(LUA_LIB_SRCS src/lauxlib.c src/lbaselib.c src/lbitlib.c src/lcorolib.c src/ldblib.c src/liolib.c
        src/lmathlib.c src/loslib.c src/ltablib.c src/lstrlib.c src/loadlib.c src/linit.c src/lutf8lib.c)
 set(LUA_HEADERS include/lua.h include/luaconf.h include/lualib.h include/lauxlib.h)
-# LLVM code gen
-set(LLVM_JIT_SRCS src/ravi_llvmjit.cpp src/ravi_llvmtypes.cpp
-        src/ravi_llvmcodegen.cpp src/ravi_llvmforprep.cpp src/ravi_llvmcomp.cpp
-        src/ravi_llvmreturn.cpp src/ravi_llvmload.cpp src/ravi_llvmforloop.cpp
-        src/ravi_llvmarith1.cpp src/ravi_llvmcall.cpp src/ravi_llvmtable.cpp
-        src/ravi_llvmarith2.cpp src/ravi_llvmtforcall.cpp src/ravi_llvmrest.cpp)
-# MIR sources
-#if (MIR_JIT)
-#    add_subdirectory(mir)
-#    set(MIRJIT_LIBRARIES c2mir)
-#endif()
-set(MIR_HEADERS mir/mir.h mir/mir-gen.h mir/mir-varr.h mir/mir-dlist.h mir/mir-htab.h
-        mir/mir-hash.h mir/mir-bitmap.h)
-set(MIR_SRCS mir/mir.c mir/mir-gen.c)
-set(C2MIR_SRCS mir/c2mir/c2mir.c)
-# MIR code gen
 set(MIR_JIT_SRCS src/ravi_mirjit.c)
 set(NO_JIT_SRCS src/ravi_nojit.c)
 set(LUA_CMD_SRCS src/lua.c)
+set(RAVICOMP_SRCS src/ravi_complib.c)
 file(GLOB RAVI_HEADERS "${PROJECT_SOURCE_DIR}/include/*.h")

 if (COMPUTED_GOTO AND NOT MSVC)
@ -83,60 +61,35 @@ endif ()
 include(CheckCCompilerFlag)
 check_c_compiler_flag("-march=native" COMPILER_OPT_ARCH_NATIVE_SUPPORTED)
 if (COMPILER_OPT_ARCH_NATIVE_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-march=")
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
-endif()
-
-if (LLVM_JIT)
-    find_package(LLVM REQUIRED CONFIG)
-    message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
-    message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
-
-    message(STATUS "LLVM Definitions ${LLVM_DEFINITIONS}")
-    message(STATUS "LLVMJIT enabled")
-
-    set(JIT_SRCS ${LLVM_JIT_SRCS})
-    if (NOT MSVC)
-        set_source_files_properties(${LLVM_JIT_SRCS} PROPERTIES
-                COMPILE_FLAGS "-fno-rtti -fno-exceptions ${LLVM_DEFINITIONS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
+endif ()
+if (ASAN)
+    set(CMAKE_REQUIRED_FLAGS "-fsanitize=address")
+    check_c_compiler_flag("-fsanitize=address" COMPILER_ASAN_SUPPORTED)
+    if (COMPILER_ASAN_SUPPORTED AND NOT CMAKE_C_FLAGS_DEBUG MATCHES "-fsanitize=address")
+        set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fsanitize=address")
    endif ()
-    set_property(
-            SOURCE ${LLVM_JIT_SRCS}
-            APPEND
-            PROPERTY INCLUDE_DIRECTORIES ${LLVM_INCLUDE_DIRS}
-    )
-    # FIXME get rid of this dependency
-    set_property(
-            SOURCE ${LUA_CMD_SRCS}
-            APPEND
-            PROPERTY INCLUDE_DIRECTORIES ${LLVM_INCLUDE_DIRS})
+endif ()

-    if ($ENV{CLION_IDE})
-        # CLion seems unable to handle include paths set on sources
-        include_directories(${LLVM_INCLUDE_DIRS})
-    endif ()
+if (MIR_JIT)
+    message(STATUS "MIRJIT enabled")
+    add_subdirectory(mir)
+    set(MIRJIT_LIBRARIES c2mir)
+    set(JIT_SRCS ${MIR_JIT_SRCS})
 else ()
-    if (MIR_JIT)
-        message(STATUS "MIRJIT enabled")
-#        set(JIT_SRCS ${MIR_JIT_SRCS})
-#        set_property(SOURCE ${MIR_JIT_SRCS}
-#                APPEND
-#                PROPERTY INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/mir;${CMAKE_SOURCE_DIR}/mir/c2mir")
-        set(JIT_SRCS ${MIR_SRCS} ${C2MIR_SRCS} ${MIR_JIT_SRCS})
-        set_property(SOURCE ${MIR_SRCS} ${C2MIR_SRCS} ${MIR_JIT_SRCS}
-                APPEND
-                PROPERTY INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/mir;${CMAKE_SOURCE_DIR}/mir/c2mir")
-        set_property(SOURCE ${MIR_SRCS} ${C2MIR_SRCS} ${MIR_JIT_SRCS}
-                APPEND
-                PROPERTY COMPILE_DEFINITIONS "MIR_NO_IO=0;MIR_NO_SCAN=1")
-    else()
-        set(JIT_SRCS ${NO_JIT_SRCS})
-    endif ()
+    set(JIT_SRCS ${NO_JIT_SRCS})
+endif ()
+
+if (RAVICOMP)
+    set(ADDON_SRCS ${RAVICOMP_SRCS})
+    add_subdirectory(ravicomp)
+    set(RAVICOMP_LIBRARIES ravicomp)
 endif ()

 # IDE stuff
 if (MSVC OR APPLE)
    source_group("Ravi Headers" FILES ${RAVI_HEADERS})
-    source_group("Ravi Source Files" FILES ${LUA_CORE_SRCS} ${LUA_LIB_SRCS} ${JIT_SRCS})
+    source_group("Ravi Source Files" FILES ${LUA_CORE_SRCS} ${LUA_LIB_SRCS} ${JIT_SRCS} ${ADDON_SRCS})
 endif ()

 # Misc setup
@ -151,63 +104,6 @@ elseif (NOT WIN32)
    set(EXTRA_LIBRARIES m dl readline)
 endif ()

-# Enable minimal required LLVM components so that the
-# the size of the resulting binary is manageable
-if (LLVM_JIT)
-    if (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.8")
-        set(LLVM_EXTRA_LIBS ipa)
-    endif ()
-    if (NOT ${LLVM_PACKAGE_VERSION} VERSION_LESS "5.0.0")
-        set(LLVM_EXTRA_LIBS orcjit)
-    endif ()
-    message(STATUS "SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}")
-    if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
-        set(LLVM_LIBS_PROCESSOR
-                X86CodeGen
-                X86AsmParser
-                X86Disassembler
-                X86Desc
-                X86Info
-                X86Utils
-                )
-        if (${LLVM_PACKAGE_VERSION} VERSION_LESS "9.0.0")
-            list(APPEND LLVM_LIBS_PROCESSOR X86AsmPrinter)
-        endif ()
-    elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
-        set(LLVM_LIBS_PROCESSOR
-                ARMCodeGen
-                ARMAsmParser
-                ARMDisassembler
-                ARMAsmPrinter
-                ARMDesc
-                ARMInfo
-                )
-    endif ()
-    llvm_map_components_to_libnames(LLVM_LIBS
-            Analysis
-            AsmParser
-            AsmPrinter
-            BitReader
-            Core
-            CodeGen
-            ExecutionEngine
-            InstCombine
-            ${LLVM_EXTRA_LIBS}
-            ipo
-            MC
-            MCJIT
-            MCParser
-            Object
-            RuntimeDyld
-            ScalarOpts
-            Support
-            Target
-            TransformUtils
-            ${LLVM_LIBS_PROCESSOR}
-            )
-    message(STATUS "LLVM_LIBS ${LLVM_LIBS}")
-endif ()
-
 set(LIBRAVI_NAME libravi)

 #Main library
@ -215,8 +111,9 @@ add_library(${LIBRAVI_NAME} ${LIBRAVI_BUILD_TYPE}
        ${RAVI_HEADERS}
        ${LUA_LIB_SRCS}
        ${LUA_CORE_SRCS}
-        ${JIT_SRCS})
-target_link_libraries(${LIBRAVI_NAME} ${EXTRA_LIBRARIES} ${LLVM_LIBS} ${MIRJIT_LIBRARIES})
+        ${JIT_SRCS}
+        ${ADDON_SRCS})
+target_link_libraries(${LIBRAVI_NAME} LINK_PUBLIC ${EXTRA_LIBRARIES} ${MIRJIT_LIBRARIES} ${RAVICOMP_LIBRARIES})

 # Main Ravi executable
 add_executable(ravi ${LUA_CMD_SRCS})
@ -253,13 +150,7 @@ if (NOT LTESTS)
            PROPERTY COMPILE_DEFINITIONS NO_LUA_DEBUG)
    set(NO_LUA_DEBUG 1)
 endif ()
-if (LLVM_JIT)
-    set_property(
-            TARGET ${LIBRAVI_NAME} ravi
-            APPEND
-            PROPERTY COMPILE_DEFINITIONS "USE_LLVM=1")
-    set(USE_LLVM 1)
-elseif (MIR_JIT)
+if (MIR_JIT)
    set_property(
            TARGET ${LIBRAVI_NAME} ravi
            APPEND
@ -279,6 +170,13 @@ if (NOT STATIC_BUILD)
 else ()
    set_target_properties(${LIBRAVI_NAME} PROPERTIES PREFIX "")
 endif ()
+if (RAVICOMP)
+    set_property(
+            TARGET ${LIBRAVI_NAME}
+            APPEND
+            PROPERTY COMPILE_DEFINITIONS "USE_RAVICOMP=1")
+    set(USE_RAVICOMP 1)
+endif ()
 if (APPLE)
    set_property(
            TARGET ${LIBRAVI_NAME} libravinojit_static
@ -290,8 +188,6 @@ elseif (UNIX)
            APPEND
            PROPERTY COMPILE_DEFINITIONS "LUA_USE_LINUX=1")
 endif ()
-set_property(TARGET ${LIBRAVI_NAME} PROPERTY CXX_STANDARD 14)
-set_property(TARGET ${LIBRAVI_NAME} PROPERTY CXX_EXTENSIONS OFF)

 include(GNUInstallDirs)
 configure_file(ravi-config.h.in ravi-config.h @ONLY)
--- a/4
+++ b/4
@ -6,7 +6,7 @@
 # == CHANGE THE SETTINGS BELOW TO SUIT YOUR ENVIRONMENT =======================

 # Your platform. See PLATS for possible values.
-PLAT= none
+PLAT= guess

 # Where to install. The installation starts in the src and doc directories,
 # so take care if INSTALL_TOP is not an absolute path. See the local target.
@ -38,7 +38,7 @@ RM= rm -f
 # == END OF USER SETTINGS -- NO NEED TO CHANGE ANYTHING BELOW THIS LINE =======

 # Convenience platforms targets.
-PLATS= aix bsd c89 freebsd generic linux macosx mingw posix solaris
+PLATS= guess aix bsd c89 freebsd generic linux linux-noreadline macosx mingw posix solaris

 # What to install.
 TO_BIN= ravi
--- a/README.md
+++ b/README.md
@ -0,0 +1,195 @@
+Ravi Programming Language
+=========================
+
+![image](https://travis-ci.org/dibyendumajumdar/ravi.svg?branch=master%0A%20:target:%20https://travis-ci.org/dibyendumajumdar/ravi)
+
+Ravi is a dialect of [Lua](http://www.lua.org/) with limited optional
+static typing and features [MIR](https://github.com/vnmakarov/mir)
+powered JIT compilers. The name Ravi comes from the Sanskrit word for
+the Sun. Interestingly a precursor to Lua was
+[Sol](http://www.lua.org/history.html) which had support for static
+types; Sol means the Sun in Portugese.
+
+Lua is perfect as a small embeddable dynamic language so why a
+derivative? Ravi extends Lua with static typing for improved performance
+when JIT compilation is enabled. However, the static typing is optional
+and therefore Lua programs are also valid Ravi programs.
+
+There are other attempts to add static typing to Lua - e.g. [Typed
+Lua](https://github.com/andremm/typedlua) but these efforts are mostly
+about adding static type checks in the language while leaving the VM
+unmodified. The Typed Lua effort is very similar to the approach taken
+by Typescript in the JavaScript world. The static typing is to aid
+programming in the large - the code is eventually translated to standard
+Lua and executed in the unmodified Lua VM.
+
+My motivation is somewhat different - I want to enhance the VM to
+support more efficient operations when types are known. Type information
+can be exploited by JIT compilation technology to improve performance.
+At the same time, I want to keep the language safe and therefore usable
+by non-expert programmers.
+
+Of course there is the fantastic [LuaJIT](http://luajit.org)
+implementation. Ravi has a different goal compared to LuaJIT. Ravi
+prioritizes ease of maintenance and support, language safety, and
+compatibility with Lua 5.3, over maximum performance. For more detailed
+comparison please refer to the documentation links below.
+
+Features
+--------
+
+-   Optional static typing - for details [see the reference
+    manual](https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-reference.html).
+-   Type specific bytecodes to improve performance
+-   Compatibility with Lua 5.3 (see Compatibility section below)
+-   Generational GC from Lua 5.4
+-   `defer` statement for releasing resources
+-   Compact JIT backend [MIR](https://github.com/vnmakarov/mir).
+-   A [distribution with
+    batteries](https://github.com/dibyendumajumdar/Suravi).
+-   A [Visual Studio Code debugger
+    extension](https://marketplace.visualstudio.com/items?itemName=ravilang.ravi-debug)
+    - interpreted mode debugger.
+
+Documentation
+-------------
+
+-   For the Lua extensions in Ravi see the [Reference
+    Manual](https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-reference.html).
+-   [MIR JIT Build
+    instructions](https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-mir-instructions.html).
+-   Also see [Ravi
+    Documentation](http://the-ravi-programming-language.readthedocs.org/en/latest/index.html).
+-   and the slides I presented at the [Lua 2015
+    Workshop](http://www.lua.org/wshop15.html).
+
+Lua Goodies
+-----------
+
+-   [An Introduction to
+    Lua](http://the-ravi-programming-language.readthedocs.io/en/latest/lua-introduction.html)
+    attempts to provide a quick overview of Lua for folks coming from
+    other languages.
+-   [Lua 5.3 Bytecode
+    Reference](http://the-ravi-programming-language.readthedocs.io/en/latest/lua_bytecode_reference.html)
+    is my attempt to bring up to date the [Lua 5.1 Bytecode
+    Reference](http://luaforge.net/docman/83/98/ANoFrillsIntroToLua51VMInstructions.pdf).
+-   A [patch for Lua
+    5.3](https://github.com/dibyendumajumdar/ravi/blob/master/patches/defer_statement_for_Lua_5_3.patch)
+    implements the 'defer' statement.
+-   A [patch for Lua
+    5.4.[0-2]](https://github.com/dibyendumajumdar/ravi/blob/master/patches/defer_statement_for_Lua_5_4.patch)
+    implements the 'defer' statement.
+-   Updated [patch for Lua
+    5.4.3](https://github.com/dibyendumajumdar/ravi/blob/master/patches/defer_statement_patch_for_Lua_5_4_3.patch)
+    implements the 'defer' statement.
+
+Lua 5.4 Position Statement
+--------------------------
+
+Lua 5.4 relationship to Ravi is as follows:
+
+-   Generational GC - back-ported to Ravi.
+-   New random number generator - back-ported to Ravi.
+-   Multiple user values can be associated with userdata - under
+    consideration.
+-   `<const>` variables - not planned.
+-   `<close>` variables - Ravi has `'defer'` statement which is the
+    better option in my opinion, hence no plans to support `<close>`
+    variables.
+-   Interpreter performance improvements - these are beneficial to Lua
+    interpreter but not to the JIT backends, hence not much point in
+    back-porting.
+-   Table implementation changes - under consideration.
+-   String to number coercion is now part of string library metamethods
+    - back-ported to Ravi.
+-   utf8 library accepts codepoints up to 2\^31 - back-ported to Ravi.
+-   Removal of compatibility layers for 5.1, and 5.2 - not implemented
+    as Ravi continues to provide these layers as per Lua 5.3.
+
+Compatibility with Lua 5.3
+--------------------------
+
+Ravi should be able to run all Lua 5.3 programs in interpreted mode, but
+following should be noted:
+
+-   Ravi supports optional typing and enhanced types such as arrays (see
+    the documentation). Programs using these features cannot be run by
+    standard Lua. However all types in Ravi can be passed to Lua
+    functions; operations on Ravi arrays within Lua code will be subject
+    to restrictions as described in the section above on arrays.
+-   Values crossing from Lua to Ravi will be subjected to typechecks
+    should these values be assigned to typed variables.
+-   Upvalues cannot subvert the static typing of local variables (issue
+    \#26) when types are annotated.
+-   Certain Lua limits are reduced due to changed byte code structure.
+    These are described below.
+-   Ravi uses an extended bytecode which means it is not compatible with
+    Lua 5.x bytecode.
+-   Ravi incorporates the new Generational GC from Lua 5.4, hence the GC
+    interface has changed.
+
+  Limit name         Lua value      Ravi value
+  ------------------ -------------- --------------
+  MAXUPVAL           255            125
+  LUAI\_MAXCCALLS    200            125
+  MAXREGS            255            125
+  MAXVARS            200            125
+  MAXARGLINE         250            120
+
+When JIT compilation is enabled there are following additional
+constraints:
+
+-   Ravi will only execute JITed code from the main Lua thread; any
+    secondary threads (coroutines) execute in interpreter mode.
+-   In JITed code tailcalls are implemented as regular calls so unlike
+    the interpreter VM which supports infinite tail recursion JIT
+    compiled code only supports tail recursion to a depth of about 110
+    (issue \#17)
+-   Debug api and hooks are not supported in JIT mode
+
+History
+-------
+
+-   2015
+    :   -   Implemented JIT compilation using LLVM
+        -   Implemented [libgccjit based alternative
+            JIT](https://github.com/dibyendumajumdar/ravi/tree/gccjit-ravi534)
+            (now discontinued)
+
+-   2016
+    :   -   Implemented debugger for Ravi and Lua 5.3 for [Visual Studio
+            Code](https://github.com/dibyendumajumdar/ravi/tree/master/vscode-debugger)
+
+-   2017
+    :   -   Embedded C compiler using dmrC project (C JIT compiler) (now
+            discontinued)
+        -   Additional type-annotations
+
+-   2018
+    :   -   Implemented [Eclipse OMR JIT
+            backend](https://github.com/dibyendumajumdar/ravi/tree/omrjit)
+            (now discontinued)
+        -   Created [Ravi with
+            batteries](https://github.com/dibyendumajumdar/Suravi).
+
+-   2019
+    :   -   New language feature - defer statement
+        -   New JIT backend [MIR](https://github.com/vnmakarov/mir).
+
+-   2020
+    :   -   [New parser / type checker /
+            compiler](https://github.com/dibyendumajumdar/ravi-compiler)
+        -   Generational GC back-ported from Lua 5.4
+        -   Support for [LLVM
+            backend](https://github.com/dibyendumajumdar/ravi/tree/llvm)
+            archived
+
+-   2021 (Plan)
+    :   -   Integrated AOT and JIT compilation support
+        -   Ravi 1.0 release
+
+License
+-------
+
+MIT License
--- a/README.rst
+++ b/README.rst
@ -1,125 +0,0 @@
-=========================
-Ravi Programming Language
-=========================
-.. image:: https://travis-ci.org/dibyendumajumdar/ravi.svg?branch=master
-    :target: https://travis-ci.org/dibyendumajumdar/ravi
-
-Ravi is a derivative/dialect of `Lua 5.3 <http://www.lua.org/>`_ with limited optional static typing and 
-features `MIR <https://github.com/vnmakarov/mir>`_ and `LLVM <http://www.llvm.org/>`_ powered JIT compilers. 
-The name Ravi comes from the Sanskrit word for the Sun. 
-Interestingly a precursor to Lua was `Sol <http://www.lua.org/history.html>`_ which had support for 
-static types; Sol means the Sun in Portugese.
-
-Lua is perfect as a small embeddable dynamic language so why a derivative? Ravi extends Lua with 
-static typing for improved performance when JIT compilation is enabled. However, the static typing is 
-optional and therefore Lua programs are also valid Ravi programs.
-
-There are other attempts to add static typing to Lua - e.g. `Typed Lua <https://github.com/andremm/typedlua>`_ but 
-these efforts are mostly about adding static type checks in the language while leaving the VM unmodified. 
-The Typed Lua effort is very similar to the approach taken by Typescript in the JavaScript world. 
-The static typing is to aid programming in the large - the code is eventually translated to standard Lua 
-and executed in the unmodified Lua VM.
-
-My motivation is somewhat different - I want to enhance the VM to support more efficient operations when types are 
-known. Type information can be exploited by JIT compilation technology to improve performance. At the same time, 
-I want to keep the language safe and therefore usable by non-expert programmers. 
-
-Of course there is the fantastic `LuaJIT <http://luajit.org>`_ implementation. Ravi has a different goal compared to 
-LuaJIT. Ravi prioritizes ease of maintenance and support, language safety, and compatibility with Lua 5.3, 
-over maximum performance. For more detailed comparison please refer to the documentation links below.
-
-Features
-========
-* Optional static typing - for details `see the reference manual <https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-reference.html>`_.
-* Type specific bytecodes to improve performance
-* Compatibility with Lua 5.3 (see Compatibility section below)
-* Generational GC from Lua 5.4
-* ``defer`` statement for releasing resources
-* Compact JIT backend `MIR <https://github.com/vnmakarov/mir>`_; only Linux and x86-64 supported for now.
-* `LLVM <http://www.llvm.org/>`_ supported as alternative JIT backend.
-* A `distribution with batteries <https://github.com/dibyendumajumdar/Suravi>`_.
-
-Documentation
-=============
-* For the Lua extensions in Ravi see the `Reference Manual <https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-reference.html>`_.
-* `MIR JIT Build instructions <https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-mir-instructions.html>`_.
-* `LLVM JIT Build instructions <https://the-ravi-programming-language.readthedocs.io/en/latest/ravi-llvm-instructions.html>`_.
-* Also see `Ravi Documentation <http://the-ravi-programming-language.readthedocs.org/en/latest/index.html>`_.
-* and the slides I presented at the `Lua 2015 Workshop <http://www.lua.org/wshop15.html>`_.
-
-Lua Goodies
-===========
-* `An Introduction to Lua <http://the-ravi-programming-language.readthedocs.io/en/latest/lua-introduction.html>`_ attempts to provide a quick overview of Lua for folks coming from other languages.
-* `Lua 5.3 Bytecode Reference <http://the-ravi-programming-language.readthedocs.io/en/latest/lua_bytecode_reference.html>`_ is my attempt to bring up to date the `Lua 5.1 Bytecode Reference <http://luaforge.net/docman/83/98/ANoFrillsIntroToLua51VMInstructions.pdf>`_.
-* A `patch for Lua 5.3 <http://lua-users.org/lists/lua-l/2020-01/msg00004.html>`_ implements the 'defer' statement.
-
-Lua 5.4 Position Statement
-==========================
-Lua 5.4 relationship to Ravi is as follows:
-
-* Generational GC - back-ported to Ravi.
-* New random number generator - back-ported to Ravi.
-* Multiple user values can be associated with userdata - under consideration.
-* ``<const>`` variables - not planned. 
-* ``<close>`` variables - Ravi has ``'defer'`` statement which is better option in my opinion, hence no plans to support ``<close>`` variables.
-* Interpreter performance improvements - these are beneficial to Lua interpreter but not to the JIT backends, hence not much point in back-porting.
-* Table implementation changes - under consideration. 
-* String to number coercion is now part of string library metamethods - back-ported to Ravi.
-* utf8 library accepts codepoints up to 2^31 - back-ported to Ravi.
-* Removal of compatibility layers for 5.1, and 5.2 - not implemented as Ravi continues to provide these layers as per Lua 5.3.
-
-Compatibility with Lua 5.3
-==========================
-Ravi should be able to run all Lua 5.3 programs in interpreted mode, but following should be noted:
-
-* Ravi supports optional typing and enhanced types such as arrays (described above). Programs using these features cannot be run by standard Lua. However all types in Ravi can be passed to Lua functions; operations on Ravi arrays within Lua code will be subject to restrictions as described in the section above on arrays.
-* Values crossing from Lua to Ravi will be subjected to typechecks should these values be assigned to typed variables.
-* Upvalues cannot subvert the static typing of local variables (issue #26) when types are annotated.
-* Certain Lua limits are reduced due to changed byte code structure. These are described below.
-* Ravi uses an extended bytecode which means it is not compatible with Lua 5.3 bytecode.
-* Ravi incorporates the new Generational GC from Lua 5.4, hence the GC interface has changed. 
-
-+-----------------+-------------+-------------+
-| Limit name      | Lua value   | Ravi value  |
-+=================+=============+=============+
-| MAXUPVAL        | 255         | 125         |
-+-----------------+-------------+-------------+
-| LUAI_MAXCCALLS  | 200         | 125         |
-+-----------------+-------------+-------------+
-| MAXREGS         | 255         | 125         |
-+-----------------+-------------+-------------+
-| MAXVARS         | 200         | 125         |
-+-----------------+-------------+-------------+
-| MAXARGLINE      | 250         | 120         |
-+-----------------+-------------+-------------+
-
-When JIT compilation is enabled there are following additional constraints:
-
-* Ravi will only execute JITed code from the main Lua thread; any secondary threads (coroutines) execute in interpreter mode.
-* In JITed code tailcalls are implemented as regular calls so unlike the interpreter VM which supports infinite tail recursion JIT compiled code only supports tail recursion to a depth of about 110 (issue #17)
-
-History
-=======
-* 2015
-       - Implemented JIT compilation using LLVM
-       - Implemented libgccjit based alternative JIT (now discontinued)
-* 2016
-       - Implemented debugger for Ravi and Lua 5.3 for `Visual Studio Code <https://github.com/dibyendumajumdar/ravi/tree/master/vscode-debugger>`_
-* 2017
-       - Embedded C compiler using dmrC project (C JIT compiler) (now discontinued)
-       - Additional type-annotations
-* 2018
-       - Implemented Eclipse OMR JIT backend (now discontinued)
-       - Created `Ravi with batteries <https://github.com/dibyendumajumdar/Suravi>`_.
-* 2019 
-       - New language feature - `defer` statement
-       - New JIT backend `MIR <https://github.com/vnmakarov/mir>`_. 
-
-* 2020 (Plan)
-       - `New optimizing byte code generator based on new parser / type checker <https://github.com/dibyendumajumdar/ravi-compiler>`_
-       - Generational GC back-ported from Lua 5.4
-       - Ravi 1.0 release
-
-License
-=======
-MIT License
--- a/build-utils/README.rst
+++ b/build-utils/README.rst
@ -5,4 +5,4 @@ The scripts here are unsupported - these are just my personal
 build scripts.


-The unix LLVM debug builds enable 'ltests' and address sanitizer.
+The debug builds enable 'ltests' and address sanitizer.
--- a/build-utils/buildllvm10.sh
+++ b/build-utils/buildllvm10.sh
@ -1,16 +0,0 @@
-# Run this on LLVM 10 source dir
-
-mkdir build
-cd build
-
-cmake3 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$HOME/Software/llvm10 \
-  -DLLVM_TARGETS_TO_BUILD="X86" \
-  -DLLVM_BUILD_TOOLS=OFF \
-  -DLLVM_INCLUDE_TOOLS=OFF \
-  -DLLVM_BUILD_EXAMPLES=OFF \
-  -DLLVM_INCLUDE_EXAMPLES=OFF \
-  -DLLVM_BUILD_TESTS=OFF \
-  -DLLVM_INCLUDE_TESTS=OFF \
-  -DLLVM_OPTIMIZED_TABLEGEN=ON \
-  ..
-make install
--- a/build-utils/cmake-32bit-llvm-shared.bat
+++ b/build-utils/cmake-32bit-llvm-shared.bat
@ -1,5 +0,0 @@
-mkdir llvm32
-cd llvm32
-rem cmake -DCMAKE_INSTALL_PREFIX=\d\ravi32 -G "Visual Studio 14" -DLLVM_JIT=ON -DLLVM_DIR=\d\LLVM37_32\share\llvm\cmake -DBUILD_STATIC=OFF ..
-cmake -DCMAKE_INSTALL_PREFIX=\d\ravi32 -G "Visual Studio 14" -DLLVM_JIT=ON -DLLVM_DIR=\d\LLVM39_32\lib\cmake\llvm -DSTATIC_BUILD=OFF ..
-cd ..
--- a/build-utils/cmake-32bit-llvm.bat
+++ b/build-utils/cmake-32bit-llvm.bat
@ -1,5 +0,0 @@
-mkdir llvm32
-cd llvm32
-rem cmake -DCMAKE_INSTALL_PREFIX=\d\ravi32 -G "Visual Studio 14" -DLLVM_JIT=ON -DLLVM_DIR=\d\LLVM37_32\share\llvm\cmake ..
-cmake -DCMAKE_INSTALL_PREFIX=\d\ravi32 -G "Visual Studio 14" -DLLVM_JIT=ON -DLLVM_DIR=\d\LLVM39\lib\cmake\llvm -DSTATIC_BUILD=ON ..
-cd ..
--- a/build-utils/cmake-32bit-llvmd.bat
+++ b/build-utils/cmake-32bit-llvmd.bat
@ -1,5 +0,0 @@
-mkdir llvm32d
-cd llvm32d
-rem cmake -DCMAKE_INSTALL_PREFIX=\d\ravi32 -G "Visual Studio 14" -DLLVM_JIT=ON -DLLVM_DIR=\d\LLVM37_32\share\llvm\cmake ..
-cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=\d\ravi32 -G "Visual Studio 14" -DLLVM_JIT=ON -DLLVM_DIR=\d\LLVM39D_32\lib\cmake\llvm -DSTATIC_BUILD=ON ..
-cd ..
--- a/build-utils/cmake-64bit-llvm-debug.bat
+++ b/build-utils/cmake-64bit-llvm-debug.bat
@ -1,6 +0,0 @@
-mkdir llvm64d
-cd llvm64d
-rem cmake -DCMAKE_INSTALL_PREFIX=c:\ravi64llvmd -G "Visual Studio 14 Win64" -DLLVM_JIT=ON -DLLVM_DIR=c:\LLVM37debug\share\llvm\cmake ..
-rem cmake -DSTATIC_BUILD=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=c:\d\ravi64llvmd -G "Visual Studio 15 2017 Win64" -DLLVM_JIT=ON -DEMBEDDED_DMRC=ON -DLLVM_DIR=c:\d\LLVM39D64\lib\cmake\llvm ..
-cmake -DCMAKE_INSTALL_PREFIX=c:\Software\ravi -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Debug -DLLVM_JIT=ON -DLLVM_DIR=c:\Software\llvm501d\lib\cmake\llvm ..
-cd ..
--- a/build-utils/cmake-64bit-llvm.bat
+++ b/build-utils/cmake-64bit-llvm.bat
@ -1,7 +0,0 @@
-mkdir llvm64
-cd llvm64
-rem pre LLVM 3.9
-rem cmake -DCMAKE_INSTALL_PREFIX=c:\ravi -G "Visual Studio 14 Win64" -DLLVM_JIT=ON -DLLVM_DIR=c:\LLVM37\share\llvm\cmake ..
-rem cmake -DCMAKE_INSTALL_PREFIX=c:\ravi -G "Visual Studio 15 2017 Win64" -DLLVM_JIT=ON -DLLVM_DIR=c:\d\LLVM40_64\lib\cmake\llvm ..
-cmake -DCMAKE_INSTALL_PREFIX=c:\Software\ravi -G "Visual Studio 15 2017 Win64" -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DLLVM_DIR=c:\Software\llvm601r\lib\cmake\llvm ..
-cd ..
--- a/build-utils/cmake-64bit-llvm8.bat
+++ b/build-utils/cmake-64bit-llvm8.bat
@ -1,5 +0,0 @@
-rmdir /s llvm8
-mkdir llvm8
-cd llvm8
-cmake -DCMAKE_INSTALL_PREFIX=c:\Software\ravi -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release -DLLVM_JIT=ON -DLLVM_DIR=c:\Software\llvm801\lib\cmake\llvm ..
-cd ..
--- a/build-utils/cmake-64bit-llvm9d.bat
+++ b/build-utils/cmake-64bit-llvm9d.bat
@ -1,5 +0,0 @@
-rmdir /s llvm10d
-mkdir llvm10d
-cd llvm10d
-cmake -DCMAKE_INSTALL_PREFIX=c:\Software\ravi -G "Visual Studio 16 2019" -DLLVM_JIT=ON -DLLVM_DIR=c:\Software\llvm10d\lib\cmake\llvm ..
-cd ..
--- a/build-utils/cmake-64bit-llvm9r.bat
+++ b/build-utils/cmake-64bit-llvm9r.bat
@ -1,6 +0,0 @@
-rmdir /s llvm9r
-mkdir llvm9r
-cd llvm9r
-rem cmake -DCMAKE_INSTALL_PREFIX=c:\Software\ravi -G "Visual Studio 15 2017 Win64" -DLLVM_JIT=ON -DLLVM_DIR=c:\Software\llvm900r\lib\cmake\llvm ..
-cmake -DCMAKE_INSTALL_PREFIX=c:\Software\ravi -G "Visual Studio 16 2019" -DLLVM_JIT=ON -DLLVM_DIR=c:\Software\llvm900r\lib\cmake\llvm ..
-cd ..
--- a/build-utils/cmake-64bit-omrjit-debug.bat
+++ b/build-utils/cmake-64bit-omrjit-debug.bat
@ -1,4 +0,0 @@
-mkdir omrjit
-cd omrjit
-cmake -DCMAKE_INSTALL_PREFIX=c:\Software\ravi -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Debug -DOMR_JIT=ON ..
-cd ..
--- a/build-utils/cmake-64bit-osx-xcode-llvm-debug.sh
+++ b/build-utils/cmake-64bit-osx-xcode-llvm-debug.sh
@ -1,4 +0,0 @@
-mkdir xcodellvm
-cd xcodellvm
-#cmake -DCMAKE_BUILD_TYPE=Debug -G Xcode -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM/share/llvm/cmake ..
-cmake -DCMAKE_BUILD_TYPE=Debug -G Xcode -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM5/lib/cmake/llvm ..
--- a/build-utils/cmake-64bit-unix-llvm-debug.sh
+++ b/build-utils/cmake-64bit-unix-llvm-debug.sh
@ -1,5 +0,0 @@
-mkdir buildllvmd
-cd buildllvmd
-#cmake -DCMAKE_BUILD_TYPE=Debug -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM/share/llvm/cmake ..
-#cmake -DCMAKE_BUILD_TYPE=Debug -DLLVM_JIT=ON -DLTESTS=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM5/lib/cmake/llvm ..
-cmake -DCMAKE_BUILD_TYPE=Debug -DLLVM_JIT=ON -DLTESTS=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravillvm -DLLVM_DIR=$HOME/Software/llvm600/lib/cmake/llvm ..
--- a/build-utils/cmake-64bit-unix-llvm-release.sh
+++ b/build-utils/cmake-64bit-unix-llvm-release.sh
@ -1,7 +0,0 @@
-rm -rf buildllvm
-mkdir buildllvm
-cd buildllvm
-#cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM/share/llvm/cmake ..
-#cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi -DLLVM_DIR=$HOME/LLVM5/lib/cmake/llvm ..
-#cmake -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm801/lib/cmake/llvm ..
-cmake3 -DCMAKE_BUILD_TYPE=Release -DSTATIC_BUILD=ON -DLLVM_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/Software/ravi -DLLVM_DIR=$HOME/Software/llvm10/lib/cmake/llvm ..
--- a/build-utils/cmake-64bit-unix-omrjit-release.sh
+++ b/build-utils/cmake-64bit-unix-omrjit-release.sh
@ -1,3 +0,0 @@
-mkdir omrjit
-cd omrjit
-cmake -DCMAKE_BUILD_TYPE=Release -DOMR_JIT=ON -DCMAKE_INSTALL_PREFIX=$HOME/ravi ..
--- a/docker/linux-ubuntu/Dockerfile
+++ b/docker/linux-ubuntu/Dockerfile
@ -12,20 +12,15 @@ RUN set -x \
    && cd /Software \
    && tar xvf "cmake-3.14.5-Linux-x86_64.tar.gz" \
    && rm -rf "/Software/cmake-3.14.5-Linux-x86_64.tar.gz" \
-    && wget -O "/Software/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" "http://releases.llvm.org/8.0.0/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" \
-    && cd /Software \
-    && tar xvf "clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" \
-    && rm -rf "/Software/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" \
    && mkdir -p /sources \
    && cd /sources \
    && git clone https://github.com/dibyendumajumdar/ravi.git \
    && cd /sources/ravi \
    && mkdir build \
    && cd build \
-    && /Software/cmake-3.14.5-Linux-x86_64/bin/cmake -DSTATIC_BUILD=ON -DCMAKE_INSTALL_PREFIX=/Software/ravi -DLLVM_JIT=ON -DCMAKE_BUILD_TYPE=Release -DLLVM_DIR=/Software/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-18.04/lib/cmake/llvm .. \
+    && /Software/cmake-3.14.5-Linux-x86_64/bin/cmake -DCMAKE_INSTALL_PREFIX=/Software/ravi -DCMAKE_BUILD_TYPE=Release .. \
    && make install \
    && rm -rf /Software/cmake-3.14.5-Linux-x86_64 \
-    && rm -rf /Software/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-18.04 \
    && rm -rf /sources \
    && apt-get autoremove \
    && apt-get remove -y --purge git wget build-essential \
--- a/include/lua.h
+++ b/include/lua.h
@ -226,10 +226,7 @@ extern const char lua_ident[];
 LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud);
 LUA_API void       (lua_close) (lua_State *L);
 LUA_API lua_State *(lua_newthread) (lua_State *L);
-#ifdef RAVI_DEFER_STATEMENT
 LUA_API int        (lua_resetthread) (lua_State *L);
-#endif
-LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf);


 LUA_API const lua_Number *(lua_version) (lua_State *L);
@ -366,11 +363,14 @@ LUA_API int   (lua_pcallk) (lua_State *L, int nargs, int nresults, int errfunc,
                            lua_KContext ctx, lua_KFunction k);
 #define lua_pcall(L,n,r,f)	lua_pcallk(L, (n), (r), (f), 0, NULL)

+/* A Lua Closure must be on top of the stack. This will set _ENV upvalue */
+LUA_API void  (ravi_closure_setenv) (lua_State* L);
 LUA_API int   (lua_load) (lua_State *L, lua_Reader reader, void *dt,
                          const char *chunkname, const char *mode);

 LUA_API int (lua_dump) (lua_State *L, lua_Writer writer, void *data, int strip);

+LUA_API void (raviV_raise_error) (lua_State *L, int errorcode);

 /*
 ** coroutine functions
@ -639,6 +639,9 @@ LUA_API int ravi_list_code(lua_State *L);
 /* Returns a table with various system limits */
 LUA_API int ravi_get_limits(lua_State *L);

+/* Options */
+LUA_API const char *raviV_options(struct lua_State *L);
+
 /* Following are for debugging purposes only */
 LUAI_DDEC int ravi_parser_debug;
 LUA_API void ravi_set_debuglevel(int level);
--- a/include/luaconf.h
+++ b/include/luaconf.h
@ -61,15 +61,19 @@
 #if defined(LUA_USE_LINUX)
 #define LUA_USE_POSIX
 #define LUA_USE_DLOPEN		/* needs an extra library: -ldl */
+#ifndef LUA_USE_READLINE
 #define LUA_USE_READLINE	/* needs some extra libraries */
 #endif
+#endif


 #if defined(LUA_USE_MACOSX)
 #define LUA_USE_POSIX
 #define LUA_USE_DLOPEN		/* MacOS does not need -ldl */
+#ifndef LUA_USE_READLINE
 #define LUA_USE_READLINE	/* needs an extra library: -lreadline */
 #endif
+#endif


 /*
@ -256,9 +260,11 @@
 #endif				/* } */


-/* more often than not the libs go together with the core */
+/*
+** More often than not the libs go together with the core.
+*/
 #define LUALIB_API	LUA_API
-#define LUAMOD_API	LUALIB_API
+#define LUAMOD_API	LUA_API


 /*
@ -275,15 +281,17 @@
 ** give a warning about it. To avoid these warnings, change to the
 ** default definition.
 */
+#if 0
 #if defined(__GNUC__) && ((__GNUC__*100 + __GNUC_MINOR__) >= 302) && \
    defined(__ELF__)		/* { */
-/** RAVI change **/
-#define LUAI_FUNC	/* __attribute__((visibility("hidden")))*/ extern
+#define LUAI_FUNC	__attribute__((visibility("internal"))) extern
 #else				/* }{ */
 #define LUAI_FUNC	extern
 #endif				/* } */
+#endif
+#define LUAI_FUNC	LUA_API /* AOT code needs to access symbols */

-#define LUAI_DDEC	LUAI_FUNC
+#define LUAI_DDEC	extern
 #define LUAI_DDEF	/* empty */

 /* }================================================================== */
@ -844,12 +852,6 @@
 /* Following cause the first hash lookup to be inlined,
   and if value is 2 then the second hash lookup is also inlined. */
 #define RAVI_USE_INLINE_SHORTSTR_TGET 1
-#define RAVI_USE_LLVM_BRANCH_WEIGHTS 1
-/* If following is defined as true then LLVM instructions emitted for arithmetic ops 
-   priority floating point ops, else default is to prioritise integer ops */
-#define RAVI_USE_LLVM_ARITH_FLOATPRIORITY 1
-/* Enables the 'defer' statement - RAVI extension */
-#define RAVI_DEFER_STATEMENT

 #endif

--- a/include/lualib.h
+++ b/include/lualib.h
@ -54,8 +54,10 @@ LUAMOD_API int (luaopen_package) (lua_State *L);
 #define LUA_RAVILIBNAME	"ravi"
 LUAMOD_API int (raviopen_jit)(lua_State *L);

-#define LUA_ASTLIBNAME	"ast"
-LUAMOD_API int (raviopen_ast_library)(lua_State *L);
+#define LUA_RAVICOMPLIBNAME	"compiler"
+LUAMOD_API int (raviopen_compiler)(lua_State *L);
+
+

 /* open all previous libraries */
 LUALIB_API void (luaL_openlibs) (lua_State *L);
--- a/include/ravi_llvm.h
+++ b/include/ravi_llvm.h
@ -1,121 +0,0 @@
-/******************************************************************************
-* Copyright (C) 2015 Dibyendu Majumdar
-*
-* Permission is hereby granted, free of charge, to any person obtaining
-* a copy of this software and associated documentation files (the
-* "Software"), to deal in the Software without restriction, including
-* without limitation the rights to use, copy, modify, merge, publish,
-* distribute, sublicense, and/or sell copies of the Software, and to
-* permit persons to whom the Software is furnished to do so, subject to
-* the following conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-******************************************************************************/
-#ifndef RAVI_LLVM_H
-#define RAVI_LLVM_H
-
-#ifdef USE_LLVM
-
-#include "llvm/Config/llvm-config.h"
-
-#if (LLVM_VERSION_MAJOR < 3 || LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 || LLVM_VERSION_MAJOR == 7)
-#error Unsupported LLVM version
-#endif
-
-#if LLVM_VERSION_MAJOR >= 5
-#define USE_ORC_JIT 1
-#else
-#define USE_ORC_JIT 0
-#endif
-
-#if LLVM_VERSION_MAJOR >= 8 && !defined(_WIN32)
-#define USE_ORCv2_JIT 0
-#else
-#define USE_ORCv2_JIT 0
-#endif
-
-#if LLVM_VERSION_MAJOR >= 10
-#undef USE_ORCv2_JIT
-#define USE_ORCv2_JIT 1
-#endif
-
-// In lua.c we include this just to get version numbers
-// We cannot have C++ headers in that case
-#ifdef __cplusplus
-
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/IR/Metadata.h"
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7
-#include "llvm/PassManager.h"
-#else
-#include "llvm/IR/LegacyPassManager.h"
-#endif
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/FormattedStream.h"
-
-
-#if USE_ORC_JIT || USE_ORCv2_JIT
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
-#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-
-#if LLVM_VERSION_MAJOR >= 8
-#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
-#include "llvm/ExecutionEngine/Orc/Legacy.h"
-#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
-#endif
-#endif
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <memory>
-#include <string>
-#include <cstdio>
-#include <vector>
-
-#endif //__cplusplus
-
-#endif //USE_LLVM
-
-#endif
--- a/include/ravi_llvmcodegen.h
+++ b/include/ravi_llvmcodegen.h
--- a/mir/CMakeLists.txt
+++ b/mir/CMakeLists.txt
@ -1,6 +1,5 @@
-project(mir)
-
-enable_language(C)
+cmake_minimum_required(VERSION 3.12)
+project(mir LANGUAGES C)

 message(STATUS "OS type is ${CMAKE_SYSTEM_NAME}")
 message(STATUS "System processor is ${CMAKE_HOST_SYSTEM_PROCESSOR}")
@ -25,13 +24,23 @@ set(C2MIR_SRCS
    c2mir/c2mir.c
    )

-set(LIBS dl)
+set(LIBS ${CMAKE_DL_LIBS})
+if (WIN32)
+    set(MIR_BUILD_TYPE STATIC)
+else()
+    set(MIR_BUILD_TYPE SHARED)
+endif()

-add_definitions(-DMIR_NO_IO=0)
-add_definitions(-DMIR_NO_SCAN=1)
+#add_definitions(-DMIR_NO_IO=0)
+#add_definitions(-DMIR_NO_SCAN=1)
+set_property(SOURCE ${MIR_SRCS} ${C2MIR_SRCS} ${MIR_JIT_SRCS}
+        APPEND
+        PROPERTY COMPILE_DEFINITIONS "MIR_NO_IO=0;MIR_NO_SCAN=1;MIR_NO_INTERP=1")

-include_directories(".")
-include_directories("./c2mir")
+if ($ENV{CLION_IDE})
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/c2mir)
+endif()

 include(CheckCCompilerFlag)
 check_c_compiler_flag("-fsigned-char" COMPILER_OPT_SIGNED_CHAR_SUPPORTED)
@ -47,10 +56,16 @@ if (COMPILER_OPT_NO_IPA_CP_CLONE_SUPPORTED AND NOT CMAKE_C_FLAGS MATCHES "-fno-i
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-ipa-cp-clone")
 endif()

-
-add_library(c2mir
+include(GNUInstallDirs)
+add_library(c2mir ${MIR_BUILD_TYPE}
    ${MIR_HEADERS}
    ${MIR_SRCS}
    ${C2MIR_SRCS})
 target_link_libraries(c2mir ${LIBS})
+target_include_directories(c2mir PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/c2mir)
 set_property(TARGET c2mir PROPERTY C_STANDARD 11)
+
+install(TARGETS c2mir
+        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT mir_Runtime
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT mir_Development
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT mir_Runtime)
--- a/mir/MIR.md
+++ b/mir/MIR.md
@ -43,11 +43,17 @@
         their value
     * `MIR_T_F` and `MIR_T_D` -- IEEE single and double precision floating point values
     * `MIR_T_LD` - long double values.  It is machine-dependent and can be IEEE double, x86 80-bit FP,
-       or IEEE quad precision FP values
+       or IEEE quad precision FP values.  If it is the same as double, the double type will be used instead.
+       So please don't expect machine-independence of MIR code working with long double values
     * `MIR_T_P` -- pointer values.  Depending on the target pointer value is actually 32-bit or 64-bit integer value
+     * `MIR_T_BLK` .. `MIR_T_BLK + MIR_BLK_NUM - 1` -- block data with given case.  This type can be used only
+       for argument of function.  Different case numbers can denote different ways to pass the block data
+       on a particular target to implement the target call ABI.  Currently there are 5 block
+       types (`MIR_BLK_NUM = 5`)
+     * `MIR_T_RBLK` -- return block data.  This type can be used only for argument of function
   * MIR textual representation of the types are correspondingly `i8`,
-     `u8`, `i16`, `u16`, `i32`, `u32`, `i64`, `u64`, `f`, `d`, `p`,
-     and `v`
+     `u8`, `i16`, `u16`, `i32`, `u32`, `i64`, `u64`, `f`, `d`, `ld`, `p`,
+     and `blk`
   * Function `int MIR_int_type_p (MIR_type_t t)` returns TRUE if given type is an integer one (it includes pointer type too)
   * Function `int MIR_fp_type_p (MIR_type_t t)` returns TRUE if given type is a floating point type
   
@ -78,6 +84,7 @@
        only one result, have no arguments, not use any call or any instruction with memory
      * The expression function is called during linking and its result is used to initialize the data
    * **Memory segment**: `MIR_bss_item` with optional name (`MIR_item_t MIR_new_bss (MIR_context_t ctx, const char *name, size_t len)`)
+  * Long double data item is changed to double one, if long double coincides with double for given target or ABI
  * Names of MIR functions, imports, and prototypes should be unique in a module
  * API functions `MIR_output_item (MIR_context_t ctx, FILE *f, MIR_item_t item)`
    and `MIR_output_module (MIR_context_t ctx, FILE *f, MIR_module_t module)` output item or module
@ -96,6 +103,8 @@
    * A variable should have an unique name in the function
    * A variable is represented by a structure of type `MIR_var_t`
      * The structure contains variable name and its type
+      * The structure contains also type size for variable of block types (`MIR_T_BLK`..`MIR_T_BLK + MIR_BLK_NUM - 1`)
+        or `MIR_T_RBLK` type
  * MIR function with its arguments is created through API function `MIR_item_t MIR_new_func (MIR_context_t ctx, const
    char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)`
    or function `MIR_item_t MIR_new_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)`
@ -119,6 +128,12 @@
                     {<insn>}
                     endfun
 ```
+    * Textual presentation of block type argument in `func` has form `blk:<size>(<var_name>)`.
+      The corresponding argument in `call` insn should have analogous form
+      `blk:<the same size>(<local var name containing address of passed block data>)`
+    * Block data are passed by value.  How they are exactly passed is machine-defined:
+      * they are always passed on stack for x86-64, aarch64, and s390x
+      * they can (partially) passed through registers and on stack for ppc64
  * Non-argument function variables are created through API function
    `MIR_reg_t MIR_new_func_reg (MIR_context_t ctx, MIR_func_t func, MIR_type_t type, const char *name)`
    * The only permitted integer type for the variable is `MIR_T_I64` (or MIR_T_U64???)
@ -139,7 +154,9 @@
      `MIR_op_t MIR_new_int_op (MIR_context_t ctx, int64_t v)` and `MIR_op_t MIR_new_uint_op (MIR_context_t ctx, uint64_t v)`
      * In MIR text they are represented the same way as C integer numbers (e.g. octal, decimal, hexadecimal ones)
    * **Float, double or long double value operands** created through API functions `MIR_op_t MIR_new_float_op (MIR_context_t ctx, float v)`,
-      `MIR_op_t MIR_new_double_op (MIR_context_t ctx, double v)`, and `MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double v)`
+      `MIR_op_t MIR_new_double_op (MIR_context_t ctx, double v)`,
+      and `MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double v)`.
+      Long double operand is changed to double one when long double coincides with double for given target or ABI
      * In MIR text they are represented the same way as C floating point numbers
    * **String operands** created through API functions `MIR_op_t MIR_new_str_op (MIR_context_t ctx, MIR_str_t str)`
      * In MIR text they are represented by `typedef struct MIR_str {size_t len; const char *s;} MIR_str_t`
@ -184,6 +201,7 @@
    * You can not use `MIR_new_insn` for the creation of call and ret insns as these insns have a variable number of operands.
      To create such insns you should use `MIR_new_insn_arr` or special functions
      `MIR_insn_t MIR_new_call_insn (MIR_context_t ctx, size_t nops, ...)` and `MIR_insn_t MIR_new_ret_insn (MIR_context_t ctx, size_t nops, ...)`
+    * Long double insns are changed by double ones if long double coincides with double for given target or ABI
  * You can get insn name and number of insn operands through API functions
    `const char *MIR_insn_name (MIR_context_t ctx, MIR_insn_code_t code)` and `size_t MIR_insn_nops (MIR_context_t ctx, MIR_insn_t insn)`
  * You can add a created insn at the beginning or end of function insn list through API functions
@ -386,7 +404,7 @@
  * The first insn saves the stack pointer in the operand
  * The second insn restores stack pointer from the operand
  
-### MIR_VA_START, MIR_VA_ARG, and MIR_VA_END insns
+### MIR_VA_START, MIR_VA_ARG, MIR_VA_BLOCK_ARG, and MIR_VA_END insns
  * These insns are only for variable number arguments functions
  * `MIR_VA_START` and `MIR_VA_END` have one input operand, an address
    of va_list structure (see C stdarg.h for more details).  Unlike C
@ -394,6 +412,9 @@
  * `MIR_VA_ARG` takes va_list and any memory operand and returns
    address of the next argument in the 1st insn operand.  The memory
    operand type defines the type of the argument
+  * `MIR_VA_BLOCK_ARG` takes result address, va_list address, integer operand (size),
+    and block type (case) number and moves the next argument passed as block of given
+    size and type to the result address
  * va_list operand can be memory with undefined type.  In this case
    address of the va_list is not in the memory but is the
    memory address
@ -401,7 +422,7 @@
 ## MIR API example
  * The following code on C creates MIR analog of C code
    `int64_t loop (int64_t arg1) {int64_t count = 0; while (count < arg1) count++; return count;}`
-```
+```c
  MIR_module_t m = MIR_new_module (ctx, "m");
  MIR_item_t func = MIR_new_func (ctx, "loop", MIR_T_I64, 1, MIR_T_I64, "arg1");
  MIR_reg_t COUNT = MIR_new_func_reg (ctx, func->u.func, MIR_T_I64, "count");
@ -423,9 +444,11 @@
  MIR_finish_module (ctx);
 ```

-## MIR text example
+## MIR text examples

-```
+  * Sieve of eratosthenes:
+
+```mir
 m_sieve:  module
          export sieve
 sieve:    func i32, i32:N
@ -466,6 +489,29 @@ ex100:    func v
          endmodule
 ```

+  * Example of block arguments and `va_stack_arg`
+  
+```mir
+m0:       module
+f_p:	  proto i64, 16:blk(a), ...
+f:	  func i64, 16:blk(a), ...
+          local i64:r, i64:va, i64:a2
+	  alloca va, 32  # allocate enough space va_list
+	  va_start va
+	  va_stack_arg a2, va, 16 # get address of the 2nd blk arg
+	  add r, i64:0(a), i64:8(a2)
+	  ret r
+main:	  func
+	  local i64:a, i64:r
+	  alloca a, 16
+          mov i64:0(a), 42
+          mov i64:8(a), 24
+	  call f_p, f, r, blk:16(a), blk:16(a)
+	  ret r
+	  endfunc
+          endmodule
+```
+
 ## Other MIR API functions
  * MIR API can find a lot of errors.  They are reported through a
    error function of type `void (*MIR_error_func_t) (MIR_context ctx, MIR_error_type_t
@ -564,17 +610,23 @@ ex100:    func v
      works only on the same targets as MIR generator

 # MIR generator (file mir-gen.h)
-  * Before use of MIR generator you should initialize it by API function `MIR_gen_init (MIR_context ctx)`
-  * API function `MIR_gen_finish (MIR_context ctx)` should be called last after any generator usage.
-    It frees all internal generator data
-  * API function `void *MIR_gen (MIR_context ctx, MIR_item_t func_item)` generates machine code of given MIR function
-    and returns an address to call it.  You can call the code as usual C function by using this address
-    as the called function address
-  * API function `void MIR_gen_set_debug_file (MIR_context_t ctx, FILE *f)` sets up MIR generator debug file to `f`.
+  * Before use of MIR generator for given context you should initialize it by API function
+    `MIR_gen_init (MIR_context ctx, int gens_num)`.  `gens_num` defines how many generator instances you need.
+    Each generator instance can be used in a different thread to compile different MIR functions from the same context.
+    If you pass a negative or zero number `gens_num`, it will have the same effect as value `1`
+  * API function `MIR_gen_finish (MIR_context ctx)` frees all internal generator data (and its instances) for the context.
+    If you want to generate code for the context again after the `MIR_gen_finish` call, you should call
+    `MIR_gen_init` again first
+  * API function `void *MIR_gen (MIR_context ctx, int gen_num, MIR_item_t func_item)` generates machine code
+    of given MIR function in generator instance `gen_num` and returns an address to call it.  You can call
+    the code as usual C function by using this address as the called function address.
+    `gen_num`  should be a number in the range `0` .. `gens_num - 1` from corresponding `MIR_gen_init`
+  * API function `void MIR_gen_set_debug_file (MIR_context_t ctx, int gen_num, FILE *f)` sets up MIR generator
+    debug file to `f` for generator instance `gen_num`.
    If it is not NULL a lot of debugging and optimization information will be output to the file.  It is useful mostly
    for MIR developers
-  * API function `void MIR_gen_set_optimize_level (MIR_context_t ctx, unsigned int level)` sets up optimization
-    level for MIR generator:
+  * API function `void MIR_gen_set_optimize_level (MIR_context_t ctx, int gen_num, unsigned int level)` sets up optimization
+    level for MIR generator instance `gen_num`:
    * `0` means only register allocator and machine code generator work
    * `1` means additional code selection task.  On this level MIR generator creates more compact and faster
      code than on zero level with practically on the same speed
--- a/mir/README.md
+++ b/mir/README.md
@ -3,3 +3,5 @@ project. Following changes have been made:

 * A CMake build script added to create a library
 * Unused files / tests have been removed to avoid clutter
+
+Last update: 7th Jan 2021
--- a/mir/c2mir/aarch64/caarch64-ABI-code.c
+++ b/mir/c2mir/aarch64/caarch64-ABI-code.c
@ -0,0 +1,98 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   aarch64 call ABI target specific code.
+*/
+
+typedef int target_arg_info_t;
+
+static void target_init_arg_vars (c2m_ctx_t c2m_ctx, target_arg_info_t *arg_info) {}
+
+static int target_return_by_addr_p (c2m_ctx_t c2m_ctx, struct type *ret_type) {
+  return ((ret_type->mode == TM_STRUCT || ret_type->mode == TM_UNION)
+          && type_size (c2m_ctx, ret_type) > 2 * 8);
+}
+
+static int reg_aggregate_size (c2m_ctx_t c2m_ctx, struct type *type) {
+  int size;
+
+  if (type->mode != TM_STRUCT && type->mode != TM_UNION) return -1;
+  return (size = type_size (c2m_ctx, type)) <= 2 * 8 ? size : -1;
+}
+
+static void target_add_res_proto (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_type_t) * res_types,
+                                  VARR (MIR_var_t) * arg_vars) {
+  MIR_var_t var;
+  int size;
+
+  if ((size = reg_aggregate_size (c2m_ctx, ret_type)) < 0) {
+    simple_add_res_proto (c2m_ctx, ret_type, arg_info, res_types, arg_vars);
+    return;
+  }
+  if (size == 0) return;
+  VARR_PUSH (MIR_type_t, res_types, MIR_T_I64);
+  if (size > 8) VARR_PUSH (MIR_type_t, res_types, MIR_T_I64);
+}
+
+static int target_add_call_res_op (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                   target_arg_info_t *arg_info, size_t call_arg_area_offset) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  int size;
+
+  if ((size = reg_aggregate_size (c2m_ctx, ret_type)) < 0)
+    return simple_add_call_res_op (c2m_ctx, ret_type, arg_info, call_arg_area_offset);
+  if (size == 0) return -1;
+  VARR_PUSH (MIR_op_t, call_ops,
+             MIR_new_reg_op (ctx, get_new_temp (c2m_ctx, MIR_T_I64).mir_op.u.reg));
+  if (size > 8)
+    VARR_PUSH (MIR_op_t, call_ops,
+               MIR_new_reg_op (ctx, get_new_temp (c2m_ctx, MIR_T_I64).mir_op.u.reg));
+  return size <= 8 ? 1 : 2;
+}
+
+static op_t target_gen_post_call_res_code (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res,
+                                           MIR_insn_t call, size_t call_ops_start) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  int size;
+
+  if ((size = reg_aggregate_size (c2m_ctx, ret_type)) < 0)
+    return simple_gen_post_call_res_code (c2m_ctx, ret_type, res, call, call_ops_start);
+  if (size != 0)
+    gen_multiple_load_store (c2m_ctx, ret_type, &VARR_ADDR (MIR_op_t, call_ops)[call_ops_start + 2],
+                             res.mir_op, FALSE);
+  return res;
+}
+
+static void target_add_ret_ops (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  int i, size;
+
+  if ((size = reg_aggregate_size (c2m_ctx, ret_type)) < 0) {
+    simple_add_ret_ops (c2m_ctx, ret_type, res);
+    return;
+  }
+  assert (res.mir_op.mode == MIR_OP_MEM && VARR_LENGTH (MIR_op_t, ret_ops) == 0 && size <= 2 * 8);
+  for (i = 0; size > 0; size -= 8, i++)
+    VARR_PUSH (MIR_op_t, ret_ops, get_new_temp (c2m_ctx, MIR_T_I64).mir_op);
+  gen_multiple_load_store (c2m_ctx, ret_type, VARR_ADDR (MIR_op_t, ret_ops), res.mir_op, TRUE);
+}
+
+static MIR_type_t target_get_blk_type (c2m_ctx_t c2m_ctx, struct type *arg_type) {
+  return MIR_T_BLK; /* one BLK is enough */
+}
+
+static void target_add_arg_proto (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_var_t) * arg_vars) {
+  simple_add_arg_proto (c2m_ctx, name, arg_type, arg_info, arg_vars);
+}
+
+static void target_add_call_arg_op (c2m_ctx_t c2m_ctx, struct type *arg_type,
+                                    target_arg_info_t *arg_info, op_t arg) {
+  simple_add_call_arg_op (c2m_ctx, arg_type, arg_info, arg);
+}
+
+static int target_gen_gather_arg (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  decl_t param_decl, target_arg_info_t *arg_info) {
+  return FALSE;
+}
--- a/mir/c2mir/aarch64/caarch64-code.c
+++ b/mir/c2mir/aarch64/caarch64-code.c
@ -1,13 +1,18 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include "../mirc.h"
-#include "mirc-aarch64-linux.h"
+#include "mirc_aarch64_linux.h"

-static const char *standard_includes[] = {mirc, aarch64_mirc};
+#include "mirc_aarch64_float.h"
+#include "mirc_aarch64_limits.h"
+#include "mirc_aarch64_stdarg.h"
+#include "mirc_aarch64_stdint.h"
+#include "mirc_aarch64_stddef.h"

-static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/aarch64/"};
+static string_include_t standard_includes[]
+  = {{NULL, mirc}, {NULL, aarch64_mirc}, TARGET_STD_INCLUDES};

 #define MAX_ALIGNMENT 16

--- a/mir/c2mir/aarch64/caarch64.h
+++ b/mir/c2mir/aarch64/caarch64.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include <stdint.h>
@ -28,12 +28,17 @@ typedef uint16_t mir_ushort;
 typedef uint32_t mir_uint;
 typedef uint64_t mir_ulong;
 typedef uint64_t mir_ullong;
+typedef uint32_t mir_wchar;
+typedef uint16_t mir_char16;
+typedef uint32_t mir_char32;

 #define MIR_UCHAR_MAX UINT8_MAX
 #define MIR_USHORT_MAX UINT16_MAX
 #define MIR_UINT_MAX UINT32_MAX
 #define MIR_ULONG_MAX UINT64_MAX
 #define MIR_ULLONG_MAX UINT64_MAX
+#define MIR_WCHAR_MIN 0
+#define MIR_WCHAR_MAX UINT32_MAX

 typedef mir_schar mir_char;
 #define MIR_CHAR_MIN MIR_SCHAR_MIN
--- a/mir/c2mir/aarch64/mirc_aarch64_float.h
+++ b/mir/c2mir/aarch64/mirc_aarch64_float.h
@ -0,0 +1,60 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 5.2.4.2.2 */
+static char float_str[]
+  = "#ifndef __FLOAT_H\n"
+    "#define __FLOAT_H\n"
+    "\n"
+    "#define FLT_RADIX 2\n"
+    "\n"
+    "#define FLT_MANT_DIG 24\n"
+    "#define DBL_MANT_DIG 53\n"
+    "#define LDBL_MANT_DIG DBL_MANT_DIG\n"
+    "\n"
+    "#define FLT_DECIMAL_DIG 9\n"
+    "#define DBL_DECIMAL_DIG 17\n"
+    "#define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG\n"
+    "#define FLT_DIG FLT_DECIMAL_DIG\n"
+    "#define DBL_DIG DBL_DECIMAL_DIG\n"
+    "#define LDBL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define DECIMAL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define FLT_MIN_EXP -125\n"
+    "#define DBL_MIN_EXP -1021\n"
+    "#define LDBL_MIN_EXP DBL_MIN_EXP\n"
+    "\n"
+    "#define FLT_MIN_10_EXP -37\n"
+    "#define DBL_MIN_10_EXP -307\n"
+    "#define LDBL_MIN_10_EXP DBL_MIN_10_EXP\n"
+    "\n"
+    "#define FLT_MAX_EXP 128\n"
+    "#define DBL_MAX_EXP 1024\n"
+    "#define LDBL_MAX_EXP DBL_MAX_EXP\n"
+    "\n"
+    "#define FLT_MAX_10_EXP 38\n"
+    "#define DBL_MAX_10_EXP 308\n"
+    "#define LDBL_MAX_10_EXP DBL_MAX_10_EXP\n"
+    "\n"
+    "#define FLT_MAX 0x1.fffffep+127\n"
+    "#define DBL_MAX 0x1.fffffffffffffp+1023\n"
+    "#define LDBL_MAX DBL_MAX\n"
+    "\n"
+    "#define FLT_EPSILON 0x1p-23\n"
+    "#define DBL_EPSILON 0x1p-52\n"
+    "#define LDBL_EPSILON DBL_EPSILON\n"
+    "\n"
+    "#define FLT_MIN 0x1p-126\n"
+    "#define DBL_MIN 0x1p-1022\n"
+    "#define LDBL_MIN DBL_MIN\n"
+    "\n"
+    "#define FLT_TRUE_MIN 0x1p-149\n"
+    "#define DBL_TRUE_MIN 0x0.0000000000001p-1022\n"
+    "#define LDBL_TRUE_MIN DBL_TRUE_MIN\n"
+    "\n"
+    "#define FLT_EVAL_METHOD 0\n"
+    "#define FLT_ROUNDS 1 /* round to the nearest */\n"
+    "\n"
+    "#endif /* #ifndef __FLOAT_H */\n";
--- a/mir/c2mir/aarch64/mirc_aarch64_limits.h
+++ b/mir/c2mir/aarch64/mirc_aarch64_limits.h
@ -0,0 +1,38 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See 5.2.4.2 */
+static char limits_str[]
+  = "#ifndef __LIMITS_H\n"
+    "#define __LIMITS_H\n"
+    "\n"
+    "#define CHAR_BIT 8\n"
+    "\n"
+    "#define SCHAR_MIN (-SCHAR_MAX - 1)\n"
+    "#define SCHAR_MAX 127\n"
+    "#define UCHAR_MAX (SCHAR_MAX * 2 + 1)\n"
+    "\n"
+    "#define MB_LEN_MAX 1\n"
+    "\n"
+    "#define SHRT_MIN (-SHRT_MAX - 1)\n"
+    "#define SHRT_MAX 32767\n"
+    "#define USHRT_MAX (SHRT_MAX * 2 + 1)\n"
+    "\n"
+    "#define INT_MIN (-INT_MAX - 1)\n"
+    "#define INT_MAX 2147483647\n"
+    "#define UINT_MAX (INT_MAX * 2u + 1u)\n"
+    "\n"
+    "#define LONG_MIN (-LONG_MAX - 1l)\n"
+    "#define LONG_MAX 9223372036854775807l\n"
+    "#define ULONG_MAX (LONG_MAX * 2ul + 1ul)\n"
+    "\n"
+    "#define LLONG_MIN LONG_MIN\n"
+    "#define LLONG_MAX LONG_MAX\n"
+    "#define ULLONG_MAX ULONG_MAX\n"
+    "\n"
+    "/* signed char by default */\n"
+    "#define CHAR_MIN SCHAR_MIN\n"
+    "#define CHAR_MAX SCHAR_MAX\n"
+    "\n"
+    "#endif /* #ifndef __LIMITS_H */\n";
--- a/mir/c2mir/aarch64/mirc_aarch64_linux.h
+++ b/mir/c2mir/aarch64/mirc_aarch64_linux.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 static char aarch64_mirc[]
--- a/mir/c2mir/aarch64/mirc_aarch64_stdarg.h
+++ b/mir/c2mir/aarch64/mirc_aarch64_stdarg.h
@ -0,0 +1,27 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+static char stdarg_str[]
+  = "#ifndef __STDARG_H\n"
+    "#define __STDARG_H\n"
+    "\n"
+    "typedef struct {\n"
+    "  void *__stack;\n"
+    "  void *__gr_top;\n"
+    "  void *__vr_top;\n"
+    "  int __gr_offs;\n"
+    "  int __vr_offs;\n"
+    "} va_list;\n"
+    "\n"
+    "#define va_start(ap, param) __builtin_va_start (ap)\n"
+    "#define va_arg(ap, type) __builtin_va_arg(ap, (type *) 0)\n"
+    "#define va_end(ap) 0\n"
+    "#define va_copy(dest, src) ((dest)[0] = (src)[0])\n"
+    "\n"
+    "/* For standard headers of a GNU system: */\n"
+    "#ifndef __GNUC_VA_LIST\n"
+    "#define __GNUC_VA_LIST 1\n"
+    "#endif\n"
+    "typedef va_list __gnuc_va_list;\n"
+    "#endif /* #ifndef __STDARG_H */\n";
--- a/mir/c2mir/aarch64/mirc_aarch64_stddef.h
+++ b/mir/c2mir/aarch64/mirc_aarch64_stddef.h
@ -0,0 +1,19 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.19 */
+static char stddef_str[]
+  = "#ifndef __STDDEF_H\n"
+    "#define __STDDEF_H\n"
+    "\n"
+    "typedef long ptrdiff_t;\n"
+    "typedef unsigned long size_t;\n"
+    "typedef long double max_align_t;\n"
+    "typedef unsigned int wchar_t;\n"
+    "\n"
+    "#define NULL ((void *) 0)\n"
+    "\n"
+    "#define offsetof(type, member_designator) ((size_t) & ((type *) 0)->member_designator)\n"
+    "\n"
+    "#endif /* #ifndef __STDDEF_H */\n";
--- a/mir/c2mir/aarch64/mirc_aarch64_stdint.h
+++ b/mir/c2mir/aarch64/mirc_aarch64_stdint.h
@ -0,0 +1,130 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.20 */
+static char stdint_str[]
+  = "#ifndef _STDINT_H\n"
+    "#define _STDINT_H 1\n"
+    "\n"
+    "#ifndef __int8_t_defined\n"
+    "#define __int8_t_defined\n"
+    "typedef signed char int8_t;\n"
+    "#endif\n"
+    "typedef short int int16_t;\n"
+    "typedef int int32_t;\n"
+    "typedef long int int64_t;\n"
+    "\n"
+    "typedef unsigned char uint8_t;\n"
+    "typedef unsigned short int uint16_t;\n"
+    "typedef unsigned int uint32_t;\n"
+    "typedef unsigned long int uint64_t;\n"
+    "\n"
+    "typedef signed char int_least8_t;\n"
+    "typedef short int int_least16_t;\n"
+    "typedef int int_least32_t;\n"
+    "typedef long int int_least64_t;\n"
+    "\n"
+    "typedef unsigned char uint_least8_t;\n"
+    "typedef unsigned short int uint_least16_t;\n"
+    "typedef unsigned int uint_least32_t;\n"
+    "typedef unsigned long int uint_least64_t;\n"
+    "\n"
+    "typedef signed char int_fast8_t;\n"
+    "typedef long int int_fast16_t;\n"
+    "typedef long int int_fast32_t;\n"
+    "typedef long int int_fast64_t;\n"
+    "\n"
+    "typedef unsigned char uint_fast8_t;\n"
+    "typedef unsigned long int uint_fast16_t;\n"
+    "typedef unsigned long int uint_fast32_t;\n"
+    "typedef unsigned long int uint_fast64_t;\n"
+    "\n"
+    "#define __intptr_t_defined\n"
+    "typedef long int intptr_t;\n"
+    "typedef unsigned long int uintptr_t;\n"
+    "\n"
+    "typedef long int intmax_t;\n"
+    "typedef unsigned long int uintmax_t;\n"
+    "\n"
+    "#define __INT64_C(c) c##L\n"
+    "#define __UINT64_C(c) c##UL\n"
+    "\n"
+    "#define INT8_MIN (-128)\n"
+    "#define INT16_MIN (-32768)\n"
+    "#define INT32_MIN (-2147483648)\n"
+    "#define INT64_MIN (-9223372036854775808l)\n"
+    "\n"
+    "#define INT8_MAX (127)\n"
+    "#define INT16_MAX (32767)\n"
+    "#define INT32_MAX (2147483647)\n"
+    "#define INT64_MAX (9223372036854775807l)\n"
+    "\n"
+    "#define UINT8_MAX (255)\n"
+    "#define UINT16_MAX (65535)\n"
+    "#define UINT32_MAX (4294967295u)\n"
+    "#define UINT64_MAX (18446744073709551615ul)\n"
+    "\n"
+    "#define INT_LEAST8_MIN (-128)\n"
+    "#define INT_LEAST16_MIN (-32768)\n"
+    "#define INT_LEAST32_MIN (-2147483648)\n"
+    "#define INT_LEAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_LEAST8_MAX (127)\n"
+    "#define INT_LEAST16_MAX (32767)\n"
+    "#define INT_LEAST32_MAX (2147483647)\n"
+    "#define INT_LEAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_LEAST8_MAX (255)\n"
+    "#define UINT_LEAST16_MAX (65535)\n"
+    "#define UINT_LEAST32_MAX (4294967295U)\n"
+    "#define UINT_LEAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INT_FAST8_MIN (-128)\n"
+    "#define INT_FAST16_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST32_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_FAST8_MAX (127)\n"
+    "#define INT_FAST16_MAX (9223372036854775807L)\n"
+    "#define INT_FAST32_MAX (9223372036854775807L)\n"
+    "#define INT_FAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_FAST8_MAX (255)\n"
+    "#define UINT_FAST16_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST32_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTPTR_MIN (-9223372036854775808L)\n"
+    "#define INTPTR_MAX (9223372036854775807L)\n"
+    "#define UINTPTR_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTMAX_MIN (-9223372036854775808L)\n"
+    "#define INTMAX_MAX (9223372036854775807L)\n"
+    "#define UINTMAX_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define PTRDIFF_MIN (-9223372036854775808L)\n"
+    "#define PTRDIFF_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define SIZE_MAX (18446744073709551615UL)\n"
+    "\n"
+    "/* For signed wchar_t and wint_t: */\n"
+    "#define WCHAR_MIN INT32_MIN\n"
+    "#define WCHAR_MAX INT32_MAX\n"
+    "#define WINT_MIN WCHAR_MIN\n"
+    "#define WINT_MAX WCHAR_MAX\n"
+    "\n"
+    "#define INT8_C(value) value\n"
+    "#define INT16_C(value) value\n"
+    "#define INT32_C(value) value\n"
+    "#define INT64_C(value) value##L\n"
+    "\n"
+    "#define UINT8_C(value) value\n"
+    "#define UINT16_C(value) value\n"
+    "#define UINT32_C(value) value##U\n"
+    "#define UINT64_C(value) value##UL\n"
+    "\n"
+    "#define INTMAX_C(value) value##L\n"
+    "#define UINTMAX_C(value) value##UL\n"
+    "\n"
+    "#endif /* #ifndef _STDINT_H */\n";
--- a/mir/c2mir/c2mir.c
+++ b/mir/c2mir/c2mir.c
--- a/mir/c2mir/c2mir.h
+++ b/mir/c2mir/c2mir.h
@ -1,3 +1,11 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+#ifndef C2MIR_H
+
+#define C2MIR_H
+
 #include "mir.h"

 #define COMMAND_LINE_SOURCE_NAME "<command-line>"
@ -10,7 +18,8 @@ struct c2mir_macro_command {

 struct c2mir_options {
  FILE *message_file;
-  int debug_p, verbose_p, no_prepro_p, prepro_only_p, syntax_only_p, pedantic_p, asm_p, object_p;
+  int debug_p, verbose_p, ignore_warnings_p, no_prepro_p, prepro_only_p;
+  int syntax_only_p, pedantic_p, asm_p, object_p;
  size_t module_num;
  FILE *prepro_output_file; /* non-null for prepro_only_p */
  const char *output_file_name;
@ -23,3 +32,5 @@ void c2mir_init (MIR_context_t ctx);
 void c2mir_finish (MIR_context_t ctx);
 int c2mir_compile (MIR_context_t ctx, struct c2mir_options *ops, int (*getc_func) (void *),
                   void *getc_data, const char *source_name, FILE *output_file);
+
+#endif
--- a/mir/c2mir/mirc.h
+++ b/mir/c2mir/mirc.h
@ -1,3 +1,7 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
 static const char mirc[]
  = "#define __mirc__ 1\n"
    "#define __MIRC__ 1\n"
@ -21,3 +25,15 @@ static const char mirc[]
    "#define __signed__ signed\n"
    "#define __volatile volatile\n"
    "#define __volatile__ volatile\n";
+
+#include "mirc_iso646.h"
+#include "mirc_stdalign.h"
+#include "mirc_stdbool.h"
+#include "mirc_stdnoreturn.h"
+
+#define TARGET_STD_INCLUDES                                                               \
+  {"iso646.h", iso646_str}, {"stdalign.h", stdalign_str}, {"stdbool.h", stdbool_str},     \
+    {"stdnoreturn.h", stdnoreturn_str}, {"float.h", float_str}, {"limits.h", limits_str}, \
+    {"stdarg.h", stdarg_str}, {"stdint.h", stdint_str}, {                                 \
+    "stddef.h", stddef_str                                                                \
+  }
--- a/mir/c2mir/mirc_iso646.h
+++ b/mir/c2mir/mirc_iso646.h
@ -0,0 +1,21 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.9 */
+static char iso646_str[]
+  = "#ifndef __ISO646_H\n"
+    "#define __ISO646_H\n"
+    "\n"
+    "#define and &&\n"
+    "#define and_eq &=\n"
+    "#define bitand &\n"
+    "#define bitor |\n"
+    "#define compl ~\n"
+    "#define not !\n"
+    "#define not_eq !=\n"
+    "#define or ||\n"
+    "#define or_eq |=\n"
+    "#define xor ^\n"
+    "#define xor_eq ^=\n"
+    "#endif /* #ifndef __ISO646_H */\n";
--- a/mir/c2mir/mirc_stdalign.h
+++ b/mir/c2mir/mirc_stdalign.h
@ -0,0 +1,14 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.15 */
+static char stdalign_str[]
+  = "#ifndef __STDALIGN_H\n"
+    "#define __STDALIGN_H\n"
+    "\n"
+    "#define alignas _Alignas\n"
+    "#define alignof _Alignof\n"
+    "#define __alignas_is_defined 1\n"
+    "#define __alignof_is_defined 1\n"
+    "#endif /* #ifndef __STDALIGN_H */\n";
--- a/mir/c2mir/mirc_stdbool.h
+++ b/mir/c2mir/mirc_stdbool.h
@ -0,0 +1,14 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.18 */
+static char stdbool_str[]
+  = "#ifndef __STDBOOL_H\n"
+    "#define __STDBOOL_H\n"
+    "\n"
+    "#define bool _Bool\n"
+    "#define true 1\n"
+    "#define false 0\n"
+    "#define __bool_true_false_are_defined 1\n"
+    "#endif /* #ifndef __STDBOOL_H */\n";
--- a/mir/c2mir/mirc_stdnoreturn.h
+++ b/mir/c2mir/mirc_stdnoreturn.h
@ -0,0 +1,11 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.23 */
+static char stdnoreturn_str[]
+  = "#ifndef __STDNORETURN_H\n"
+    "#define __STDNORETURN_H\n"
+    "\n"
+    "#define noreturn _Noreturn\n"
+    "#endif /* #ifndef __STDNORETURN_H */\n";
--- a/mir/c2mir/ppc64/cppc64-ABI-code.c
+++ b/mir/c2mir/ppc64/cppc64-ABI-code.c
@ -0,0 +1,310 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   ppc64 call ABI target specific code.
+*/
+
+typedef int target_arg_info_t;
+
+static void target_init_arg_vars (c2m_ctx_t c2m_ctx, target_arg_info_t *arg_info) {}
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+static MIR_type_t fp_homogeneous_type (c2m_ctx_t c2m_ctx, struct type *param_type, int *num) {
+  return MIR_T_UNDEF;
+}
+#else
+static MIR_type_t fp_homogeneous_type_1 (c2m_ctx_t c2m_ctx, MIR_type_t curr_type, struct type *type,
+                                         int *num) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  int n;
+  MIR_type_t t;
+
+  if (type->mode == TM_STRUCT || type->mode == TM_UNION || type->mode == TM_ARR) {
+    switch (type->mode) {
+    case TM_ARR: { /* Arrays are handled as small records.  */
+      struct arr_type *arr_type = type->u.arr_type;
+      struct expr *cexpr = arr_type->size->attr;
+
+      if ((t = fp_homogeneous_type_1 (c2m_ctx, curr_type, type->u.arr_type->el_type, &n))
+          == MIR_T_UNDEF)
+        return MIR_T_UNDEF;
+      *num = arr_type->size->code == N_IGNORE || !cexpr->const_p ? 1 : cexpr->u.i_val;
+      return t;
+    }
+    case TM_STRUCT:
+    case TM_UNION:
+      t = curr_type;
+      *num = 0;
+      for (node_t el = NL_HEAD (NL_EL (type->u.tag_type->u.ops, 1)->u.ops); el != NULL;
+           el = NL_NEXT (el))
+        if (el->code == N_MEMBER) {
+          decl_t decl = el->attr;
+
+          if ((t = fp_homogeneous_type_1 (c2m_ctx, t, decl->decl_spec.type, &n)) == MIR_T_UNDEF)
+            return MIR_T_UNDEF;
+          if (type->mode == TM_STRUCT)
+            *num += n;
+          else if (*num < n)
+            *num = n;
+        }
+      return t;
+    default: assert (FALSE);
+    }
+  }
+
+  assert (scalar_type_p (type));
+  if ((t = get_mir_type (c2m_ctx, type)) != MIR_T_F && t != MIR_T_D) return MIR_T_UNDEF;
+  if (curr_type != t && curr_type != MIR_T_UNDEF) return MIR_T_UNDEF;
+  *num = 1;
+  return t;
+}
+
+static MIR_type_t fp_homogeneous_type (c2m_ctx_t c2m_ctx, struct type *param_type, int *num) {
+  if (param_type->mode != TM_STRUCT && param_type->mode != TM_UNION) return MIR_T_UNDEF;
+  return fp_homogeneous_type_1 (c2m_ctx, MIR_T_UNDEF, param_type, num);
+}
+#endif
+
+static int reg_aggregate_p (c2m_ctx_t c2m_ctx, struct type *ret_type) {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return FALSE;
+#else
+  return type_size (c2m_ctx, ret_type) <= 2 * 8;
+#endif
+}
+
+static int target_return_by_addr_p (c2m_ctx_t c2m_ctx, struct type *ret_type) {
+  MIR_type_t type;
+  int n;
+
+  if (ret_type->mode != TM_STRUCT && ret_type->mode != TM_UNION) return FALSE;
+  if (((type = fp_homogeneous_type (c2m_ctx, ret_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8)
+    return FALSE;
+  return !reg_aggregate_p (c2m_ctx, ret_type);
+}
+
+static void target_add_res_proto (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_type_t) * res_types,
+                                  VARR (MIR_var_t) * arg_vars) {
+  MIR_var_t var;
+  MIR_type_t type;
+  int i, n, size;
+
+  if (void_type_p (ret_type)) return;
+  if (((type = fp_homogeneous_type (c2m_ctx, ret_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8) {
+    for (i = 0; i < n; i++) VARR_PUSH (MIR_type_t, res_types, type);
+  } else if (ret_type->mode != TM_STRUCT && ret_type->mode != TM_UNION) {
+    VARR_PUSH (MIR_type_t, res_types, get_mir_type (c2m_ctx, ret_type));
+  } else if (reg_aggregate_p (c2m_ctx, ret_type)) {
+    size = type_size (c2m_ctx, ret_type);
+    for (; size > 0; size -= 8) VARR_PUSH (MIR_type_t, res_types, MIR_T_I64);
+  } else {
+    var.name = RET_ADDR_NAME;
+    var.type = MIR_T_RBLK;
+    var.size = type_size (c2m_ctx, ret_type);
+    VARR_PUSH (MIR_var_t, arg_vars, var);
+  }
+}
+
+static int target_add_call_res_op (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                   target_arg_info_t *arg_info, size_t call_arg_area_offset) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  op_t temp;
+  int i, n, size;
+
+  if (void_type_p (ret_type)) return -1;
+  if (((type = fp_homogeneous_type (c2m_ctx, ret_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8) {
+    for (i = 0; i < n; i++) {
+      temp = get_new_temp (c2m_ctx, type);
+      VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    }
+    return n;
+  } else if (ret_type->mode != TM_STRUCT && ret_type->mode != TM_UNION) {
+    type = get_mir_type (c2m_ctx, ret_type);
+    type = promote_mir_int_type (type);
+    temp = get_new_temp (c2m_ctx, type);
+    VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    return 1;
+  } else if (reg_aggregate_p (c2m_ctx, ret_type)) {
+    size = type_size (c2m_ctx, ret_type);
+    if (size == 0) return -1;
+    for (int s = size; s > 0; s -= 8) {
+      temp = get_new_temp (c2m_ctx, MIR_T_I64);
+      VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    }
+    return (size + 7) / 8;
+  } else {
+    temp = get_new_temp (c2m_ctx, MIR_T_I64);
+    emit3 (c2m_ctx, MIR_ADD, temp.mir_op,
+           MIR_new_reg_op (ctx, MIR_reg (ctx, FP_NAME, curr_func->u.func)),
+           MIR_new_int_op (ctx, call_arg_area_offset));
+    temp.mir_op
+      = MIR_new_mem_op (ctx, MIR_T_RBLK, type_size (c2m_ctx, ret_type), temp.mir_op.u.reg, 0, 1);
+    VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    return 0;
+  }
+}
+
+static op_t target_gen_post_call_res_code (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res,
+                                           MIR_insn_t call, size_t call_ops_start) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  MIR_insn_t insn;
+  int i, n;
+
+  if (void_type_p (ret_type)) return res;
+  if (((type = fp_homogeneous_type (c2m_ctx, ret_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8) {
+    assert (res.mir_op.mode == MIR_OP_MEM);
+    for (i = 0; i < n; i++) {
+      insn = MIR_new_insn (ctx, tp_mov (type),
+                           MIR_new_mem_op (ctx, type,
+                                           res.mir_op.u.mem.disp + (type == MIR_T_F ? 4 : 8) * i,
+                                           res.mir_op.u.mem.base, res.mir_op.u.mem.index,
+                                           res.mir_op.u.mem.scale),
+                           VARR_GET (MIR_op_t, call_ops, i + call_ops_start + 2));
+      MIR_append_insn (ctx, curr_func, insn);
+    }
+  } else if ((ret_type->mode == TM_STRUCT || ret_type->mode == TM_UNION)
+             && reg_aggregate_p (c2m_ctx, ret_type)) {
+    assert (res.mir_op.mode == MIR_OP_MEM); /* addr */
+    gen_multiple_load_store (c2m_ctx, ret_type, &VARR_ADDR (MIR_op_t, call_ops)[call_ops_start + 2],
+                             res.mir_op, FALSE);
+  }
+  return res;
+}
+
+static void target_add_ret_ops (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  MIR_insn_t insn;
+  MIR_reg_t ret_addr_reg;
+  op_t temp, var;
+  int i, n, size;
+
+  if (void_type_p (ret_type)) return;
+  if (((type = fp_homogeneous_type (c2m_ctx, ret_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8) {
+    assert (res.mir_op.mode == MIR_OP_MEM);
+    for (int i = 0; i < n; i++) {
+      temp = get_new_temp (c2m_ctx, type);
+      insn = MIR_new_insn (ctx, tp_mov (type), temp.mir_op,
+                           MIR_new_mem_op (ctx, type,
+                                           res.mir_op.u.mem.disp + (type == MIR_T_F ? 4 : 8) * i,
+                                           res.mir_op.u.mem.base, res.mir_op.u.mem.index,
+                                           res.mir_op.u.mem.scale));
+      MIR_append_insn (ctx, curr_func, insn);
+      VARR_PUSH (MIR_op_t, ret_ops, temp.mir_op);
+    }
+  } else if (ret_type->mode != TM_STRUCT && ret_type->mode != TM_UNION) {
+    VARR_PUSH (MIR_op_t, ret_ops, res.mir_op);
+  } else if (reg_aggregate_p (c2m_ctx, ret_type)) {
+    size = type_size (c2m_ctx, ret_type);
+    assert (res.mir_op.mode == MIR_OP_MEM && VARR_LENGTH (MIR_op_t, ret_ops) == 0);
+    for (int i = 0; size > 0; size -= 8, i++)
+      VARR_PUSH (MIR_op_t, ret_ops, get_new_temp (c2m_ctx, MIR_T_I64).mir_op);
+    gen_multiple_load_store (c2m_ctx, ret_type, &VARR_ADDR (MIR_op_t, ret_ops)[0], res.mir_op,
+                             TRUE);
+  } else {
+    ret_addr_reg = MIR_reg (ctx, RET_ADDR_NAME, curr_func->u.func);
+    var = new_op (NULL, MIR_new_mem_op (ctx, MIR_T_I8, 0, ret_addr_reg, 0, 1));
+    size = type_size (c2m_ctx, ret_type);
+    block_move (c2m_ctx, var, res, size);
+  }
+}
+
+static MIR_type_t target_get_blk_type (c2m_ctx_t c2m_ctx, struct type *arg_type) {
+  return MIR_T_BLK; /* one BLK is enough */
+}
+
+static void target_add_arg_proto (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_var_t) * arg_vars) {
+  MIR_var_t var;
+  MIR_type_t type;
+  int n;
+
+  if (((type = fp_homogeneous_type (c2m_ctx, arg_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8) {
+    for (int i = 0; i < n; i++) {
+      var.name = gen_get_indexed_name (c2m_ctx, name, i);
+      var.type = type;
+      VARR_PUSH (MIR_var_t, arg_vars, var);
+    }
+    return;
+  }
+  type = (arg_type->mode == TM_STRUCT || arg_type->mode == TM_UNION
+            ? MIR_T_BLK
+            : get_mir_type (c2m_ctx, arg_type));
+  var.name = name;
+  var.type = type;
+  if (type == MIR_T_BLK) var.size = type_size (c2m_ctx, arg_type);
+  VARR_PUSH (MIR_var_t, arg_vars, var);
+}
+
+static void target_add_call_arg_op (c2m_ctx_t c2m_ctx, struct type *arg_type,
+                                    target_arg_info_t *arg_info, op_t arg) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_var_t var;
+  MIR_type_t type;
+  op_t temp;
+  int n;
+
+  if (((type = fp_homogeneous_type (c2m_ctx, arg_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8) {
+    assert (arg.mir_op.mode == MIR_OP_MEM);
+    arg = mem_to_address (c2m_ctx, arg, TRUE);
+    for (int i = 0; i < n; i++) {
+      temp = get_new_temp (c2m_ctx, type);
+      MIR_append_insn (ctx, curr_func,
+                       MIR_new_insn (ctx, tp_mov (type), temp.mir_op,
+                                     MIR_new_mem_op (ctx, type, (type == MIR_T_F ? 4 : 8) * i,
+                                                     arg.mir_op.u.reg, 0, 1)));
+      VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    }
+    return;
+  }
+  if (arg_type->mode != TM_STRUCT && arg_type->mode != TM_UNION) {
+    VARR_PUSH (MIR_op_t, call_ops, arg.mir_op);
+  } else {
+    assert (arg.mir_op.mode == MIR_OP_MEM);
+    arg = mem_to_address (c2m_ctx, arg, TRUE);
+    VARR_PUSH (MIR_op_t, call_ops,
+               MIR_new_mem_op (ctx, MIR_T_BLK, type_size (c2m_ctx, arg_type), arg.mir_op.u.reg, 0,
+                               1));
+  }
+}
+
+static int target_gen_gather_arg (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  decl_t param_decl, target_arg_info_t *arg_info) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_var_t var;
+  MIR_type_t type;
+  reg_var_t reg_var;
+  int i, n;
+
+  if (((type = fp_homogeneous_type (c2m_ctx, arg_type, &n)) == MIR_T_F || type == MIR_T_D)
+      && n <= 8) {
+    for (i = 0; i < n; i++) {
+      assert (!param_decl->reg_p);
+      reg_var = get_reg_var (c2m_ctx, type, gen_get_indexed_name (c2m_ctx, name, i));
+      MIR_append_insn (ctx, curr_func,
+                       MIR_new_insn (ctx, tp_mov (type),
+                                     MIR_new_mem_op (ctx, type,
+                                                     param_decl->offset
+                                                       + (type == MIR_T_F ? 4 : 8) * i,
+                                                     MIR_reg (ctx, FP_NAME, curr_func->u.func), 0,
+                                                     1),
+                                     MIR_new_reg_op (ctx, reg_var.reg)));
+    }
+    return TRUE;
+  }
+  return FALSE;
+}
--- a/mir/c2mir/ppc64/cppc64-code.c
+++ b/mir/c2mir/ppc64/cppc64-code.c
@ -1,18 +1,22 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include "../mirc.h"
-#include "mirc-ppc64-linux.h"
+#include "mirc_ppc64_linux.h"

-static const char *standard_includes[] = {mirc, ppc64_mirc};
+#include "mirc_ppc64_float.h"
+#include "mirc_ppc64_limits.h"
+#include "mirc_ppc64_stdarg.h"
+#include "mirc_ppc64_stdint.h"
+#include "mirc_ppc64_stddef.h"

-static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/ppc64/"};
+static string_include_t standard_includes[]
+  = {{NULL, mirc}, {NULL, ppc64_mirc}, TARGET_STD_INCLUDES};

 #define MAX_ALIGNMENT 16

-#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) \
-  ppc64_adjust_var_alignment (c2m_ctx, align, type)
+#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) ppc64_adjust_var_alignment (c2m_ctx, align, type)

 static int ppc64_adjust_var_alignment (c2m_ctx_t c2m_ctx, int align, struct type *type) {
  return align;
--- a/mir/c2mir/ppc64/cppc64.h
+++ b/mir/c2mir/ppc64/cppc64.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include <stdint.h>
@ -28,12 +28,17 @@ typedef uint16_t mir_ushort;
 typedef uint32_t mir_uint;
 typedef uint64_t mir_ulong;
 typedef uint64_t mir_ullong;
+typedef uint32_t mir_wchar;
+typedef uint16_t mir_char16;
+typedef uint32_t mir_char32;

 #define MIR_UCHAR_MAX UINT8_MAX
 #define MIR_USHORT_MAX UINT16_MAX
 #define MIR_UINT_MAX UINT32_MAX
 #define MIR_ULONG_MAX UINT64_MAX
 #define MIR_ULLONG_MAX UINT64_MAX
+#define MIR_WCHAR_MIN 0
+#define MIR_WCHAR_MAX UINT32_MAX

 typedef mir_schar mir_char;
 #define MIR_CHAR_MIN MIR_SCHAR_MIN
--- a/mir/c2mir/ppc64/mirc_ppc64_float.h
+++ b/mir/c2mir/ppc64/mirc_ppc64_float.h
@ -0,0 +1,60 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 5.2.4.2.2 */
+static char float_str[]
+  = "#ifndef __FLOAT_H\n"
+    "#define __FLOAT_H\n"
+    "\n"
+    "#define FLT_RADIX 2\n"
+    "\n"
+    "#define FLT_MANT_DIG 24\n"
+    "#define DBL_MANT_DIG 53\n"
+    "#define LDBL_MANT_DIG DBL_MANT_DIG\n"
+    "\n"
+    "#define FLT_DECIMAL_DIG 9\n"
+    "#define DBL_DECIMAL_DIG 17\n"
+    "#define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG\n"
+    "#define FLT_DIG FLT_DECIMAL_DIG\n"
+    "#define DBL_DIG DBL_DECIMAL_DIG\n"
+    "#define LDBL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define DECIMAL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define FLT_MIN_EXP -125\n"
+    "#define DBL_MIN_EXP -1021\n"
+    "#define LDBL_MIN_EXP DBL_MIN_EXP\n"
+    "\n"
+    "#define FLT_MIN_10_EXP -37\n"
+    "#define DBL_MIN_10_EXP -307\n"
+    "#define LDBL_MIN_10_EXP DBL_MIN_10_EXP\n"
+    "\n"
+    "#define FLT_MAX_EXP 128\n"
+    "#define DBL_MAX_EXP 1024\n"
+    "#define LDBL_MAX_EXP DBL_MAX_EXP\n"
+    "\n"
+    "#define FLT_MAX_10_EXP 38\n"
+    "#define DBL_MAX_10_EXP 308\n"
+    "#define LDBL_MAX_10_EXP DBL_MAX_10_EXP\n"
+    "\n"
+    "#define FLT_MAX 0x1.fffffep+127\n"
+    "#define DBL_MAX 0x1.fffffffffffffp+1023\n"
+    "#define LDBL_MAX DBL_MAX\n"
+    "\n"
+    "#define FLT_EPSILON 0x1p-23\n"
+    "#define DBL_EPSILON 0x1p-52\n"
+    "#define LDBL_EPSILON DBL_EPSILON\n"
+    "\n"
+    "#define FLT_MIN 0x1p-126\n"
+    "#define DBL_MIN 0x1p-1022\n"
+    "#define LDBL_MIN DBL_MIN\n"
+    "\n"
+    "#define FLT_TRUE_MIN 0x1p-149\n"
+    "#define DBL_TRUE_MIN 0x0.0000000000001p-1022\n"
+    "#define LDBL_TRUE_MIN DBL_TRUE_MIN\n"
+    "\n"
+    "#define FLT_EVAL_METHOD 0\n"
+    "#define FLT_ROUNDS 1 /* round to the nearest */\n"
+    "\n"
+    "#endif /* #ifndef __FLOAT_H */\n";
--- a/mir/c2mir/ppc64/mirc_ppc64_limits.h
+++ b/mir/c2mir/ppc64/mirc_ppc64_limits.h
@ -0,0 +1,38 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See 5.2.4.2 */
+static char limits_str[]
+  = "#ifndef __LIMITS_H\n"
+    "#define __LIMITS_H\n"
+    "\n"
+    "#define CHAR_BIT 8\n"
+    "\n"
+    "#define SCHAR_MIN (-SCHAR_MAX - 1)\n"
+    "#define SCHAR_MAX 127\n"
+    "#define UCHAR_MAX (SCHAR_MAX * 2 + 1)\n"
+    "\n"
+    "#define MB_LEN_MAX 1\n"
+    "\n"
+    "#define SHRT_MIN (-SHRT_MAX - 1)\n"
+    "#define SHRT_MAX 32767\n"
+    "#define USHRT_MAX (SHRT_MAX * 2 + 1)\n"
+    "\n"
+    "#define INT_MIN (-INT_MAX - 1)\n"
+    "#define INT_MAX 2147483647\n"
+    "#define UINT_MAX (INT_MAX * 2u + 1u)\n"
+    "\n"
+    "#define LONG_MIN (-LONG_MAX - 1l)\n"
+    "#define LONG_MAX 9223372036854775807l\n"
+    "#define ULONG_MAX (LONG_MAX * 2ul + 1ul)\n"
+    "\n"
+    "#define LLONG_MIN LONG_MIN\n"
+    "#define LLONG_MAX LONG_MAX\n"
+    "#define ULLONG_MAX ULONG_MAX\n"
+    "\n"
+    "/* unsigned char by default */\n"
+    "#define CHAR_MIN 0\n"
+    "#define CHAR_MAX UCHAR_MAX\n"
+    "\n"
+    "#endif /* #ifndef __LIMITS_H */\n";
--- a/mir/c2mir/ppc64/mirc_ppc64_linux.h
+++ b/mir/c2mir/ppc64/mirc_ppc64_linux.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 static char ppc64_mirc[]
@ -15,7 +15,7 @@ static char ppc64_mirc[]
    "#define _CALL_ELF 2\n"
 #endif
    "\n"
-    "#define __LONG_DOUBLE_128__ 1\n" // ???
+    "#define __LONG_DOUBLE_128__ 1\n"  // ???
    "#define __SIZEOF_DOUBLE__ 8\n"
    "#define __SIZEOF_FLOAT__ 4\n"
    "#define __SIZEOF_INT__ 4\n"
--- a/mir/c2mir/ppc64/mirc_ppc64_stdarg.h
+++ b/mir/c2mir/ppc64/mirc_ppc64_stdarg.h
@ -0,0 +1,21 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+static char stdarg_str[]
+  = "#ifndef __STDARG_H\n"
+    "#define __STDARG_H\n"
+    "\n"
+    "typedef void *va_list[1];\n"
+    "\n"
+    "#define va_start(ap, param) __builtin_va_start (ap)\n"
+    "#define va_arg(ap, type) __builtin_va_arg(ap, (type *) 0)\n"
+    "#define va_end(ap) 0\n"
+    "#define va_copy(dest, src) ((dest) = (src))\n"
+    "\n"
+    "/* For standard headers of a GNU system: */\n"
+    "#ifndef __GNUC_VA_LIST\n"
+    "#define __GNUC_VA_LIST 1\n"
+    "#endif\n"
+    "typedef va_list __gnuc_va_list;\n"
+    "#endif /* #ifndef __STDARG_H */\n";
--- a/mir/c2mir/ppc64/mirc_ppc64_stddef.h
+++ b/mir/c2mir/ppc64/mirc_ppc64_stddef.h
@ -0,0 +1,19 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.19 */
+static char stddef_str[]
+  = "#ifndef __STDDEF_H\n"
+    "#define __STDDEF_H\n"
+    "\n"
+    "typedef long ptrdiff_t;\n"
+    "typedef unsigned long size_t;\n"
+    "typedef long double max_align_t;\n"
+    "typedef unsigned int wchar_t;\n"
+    "\n"
+    "#define NULL ((void *) 0)\n"
+    "\n"
+    "#define offsetof(type, member_designator) ((size_t) & ((type *) 0)->member_designator)\n"
+    "\n"
+    "#endif /* #ifndef __STDDEF_H */\n";
--- a/mir/c2mir/ppc64/mirc_ppc64_stdint.h
+++ b/mir/c2mir/ppc64/mirc_ppc64_stdint.h
@ -0,0 +1,130 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.20 */
+static char stdint_str[]
+  = "#ifndef _STDINT_H\n"
+    "#define _STDINT_H 1\n"
+    "\n"
+    "#ifndef __int8_t_defined\n"
+    "#define __int8_t_defined\n"
+    "typedef signed char int8_t;\n"
+    "#endif\n"
+    "typedef short int int16_t;\n"
+    "typedef int int32_t;\n"
+    "typedef long int int64_t;\n"
+    "\n"
+    "typedef unsigned char uint8_t;\n"
+    "typedef unsigned short int uint16_t;\n"
+    "typedef unsigned int uint32_t;\n"
+    "typedef unsigned long int uint64_t;\n"
+    "\n"
+    "typedef signed char int_least8_t;\n"
+    "typedef short int int_least16_t;\n"
+    "typedef int int_least32_t;\n"
+    "typedef long int int_least64_t;\n"
+    "\n"
+    "typedef unsigned char uint_least8_t;\n"
+    "typedef unsigned short int uint_least16_t;\n"
+    "typedef unsigned int uint_least32_t;\n"
+    "typedef unsigned long int uint_least64_t;\n"
+    "\n"
+    "typedef signed char int_fast8_t;\n"
+    "typedef long int int_fast16_t;\n"
+    "typedef long int int_fast32_t;\n"
+    "typedef long int int_fast64_t;\n"
+    "\n"
+    "typedef unsigned char uint_fast8_t;\n"
+    "typedef unsigned long int uint_fast16_t;\n"
+    "typedef unsigned long int uint_fast32_t;\n"
+    "typedef unsigned long int uint_fast64_t;\n"
+    "\n"
+    "#define __intptr_t_defined\n"
+    "typedef long int intptr_t;\n"
+    "typedef unsigned long int uintptr_t;\n"
+    "\n"
+    "typedef long int intmax_t;\n"
+    "typedef unsigned long int uintmax_t;\n"
+    "\n"
+    "#define __INT64_C(c) c##L\n"
+    "#define __UINT64_C(c) c##UL\n"
+    "\n"
+    "#define INT8_MIN (-128)\n"
+    "#define INT16_MIN (-32768)\n"
+    "#define INT32_MIN (-2147483648)\n"
+    "#define INT64_MIN (-9223372036854775808l)\n"
+    "\n"
+    "#define INT8_MAX (127)\n"
+    "#define INT16_MAX (32767)\n"
+    "#define INT32_MAX (2147483647)\n"
+    "#define INT64_MAX (9223372036854775807l)\n"
+    "\n"
+    "#define UINT8_MAX (255)\n"
+    "#define UINT16_MAX (65535)\n"
+    "#define UINT32_MAX (4294967295u)\n"
+    "#define UINT64_MAX (18446744073709551615ul)\n"
+    "\n"
+    "#define INT_LEAST8_MIN (-128)\n"
+    "#define INT_LEAST16_MIN (-32768)\n"
+    "#define INT_LEAST32_MIN (-2147483648)\n"
+    "#define INT_LEAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_LEAST8_MAX (127)\n"
+    "#define INT_LEAST16_MAX (32767)\n"
+    "#define INT_LEAST32_MAX (2147483647)\n"
+    "#define INT_LEAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_LEAST8_MAX (255)\n"
+    "#define UINT_LEAST16_MAX (65535)\n"
+    "#define UINT_LEAST32_MAX (4294967295U)\n"
+    "#define UINT_LEAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INT_FAST8_MIN (-128)\n"
+    "#define INT_FAST16_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST32_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_FAST8_MAX (127)\n"
+    "#define INT_FAST16_MAX (9223372036854775807L)\n"
+    "#define INT_FAST32_MAX (9223372036854775807L)\n"
+    "#define INT_FAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_FAST8_MAX (255)\n"
+    "#define UINT_FAST16_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST32_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTPTR_MIN (-9223372036854775808L)\n"
+    "#define INTPTR_MAX (9223372036854775807L)\n"
+    "#define UINTPTR_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTMAX_MIN (-9223372036854775808L)\n"
+    "#define INTMAX_MAX (9223372036854775807L)\n"
+    "#define UINTMAX_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define PTRDIFF_MIN (-9223372036854775808L)\n"
+    "#define PTRDIFF_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define SIZE_MAX (18446744073709551615UL)\n"
+    "\n"
+    "/* For signed wchar_t and wint_t: */\n"
+    "#define WCHAR_MIN INT32_MIN\n"
+    "#define WCHAR_MAX INT32_MAX\n"
+    "#define WINT_MIN WCHAR_MIN\n"
+    "#define WINT_MAX WCHAR_MAX\n"
+    "\n"
+    "#define INT8_C(value) value\n"
+    "#define INT16_C(value) value\n"
+    "#define INT32_C(value) value\n"
+    "#define INT64_C(value) value##L\n"
+    "\n"
+    "#define UINT8_C(value) value\n"
+    "#define UINT16_C(value) value\n"
+    "#define UINT32_C(value) value##U\n"
+    "#define UINT64_C(value) value##UL\n"
+    "\n"
+    "#define INTMAX_C(value) value##L\n"
+    "#define UINTMAX_C(value) value##UL\n"
+    "\n"
+    "#endif /* #ifndef _STDINT_H */\n";
--- a/mir/c2mir/s390x/cs390x-ABI-code.c
+++ b/mir/c2mir/s390x/cs390x-ABI-code.c
@ -0,0 +1,100 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   s390x call ABI target specific code.
+*/
+
+typedef int target_arg_info_t;
+
+static void target_init_arg_vars (c2m_ctx_t c2m_ctx, target_arg_info_t *arg_info) {}
+
+static int target_return_by_addr_p (c2m_ctx_t c2m_ctx, struct type *ret_type) {
+  return simple_return_by_addr_p (c2m_ctx, ret_type);
+}
+
+static void target_add_res_proto (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_type_t) * res_types,
+                                  VARR (MIR_var_t) * arg_vars) {
+  simple_add_res_proto (c2m_ctx, ret_type, arg_info, res_types, arg_vars);
+}
+
+static int target_add_call_res_op (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                   target_arg_info_t *arg_info, size_t call_arg_area_offset) {
+  return simple_add_call_res_op (c2m_ctx, ret_type, arg_info, call_arg_area_offset);
+}
+
+static op_t target_gen_post_call_res_code (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res,
+                                           MIR_insn_t call, size_t call_ops_start) {
+  return simple_gen_post_call_res_code (c2m_ctx, ret_type, res, call, call_ops_start);
+}
+
+static void target_add_ret_ops (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res) {
+  simple_add_ret_ops (c2m_ctx, ret_type, res);
+}
+
+static int reg_aggregate_p (c2m_ctx_t c2m_ctx, struct type *arg_type) {
+  size_t size = type_size (c2m_ctx, arg_type);
+  return size == 1 || size == 2 || size == 4 || size == 8;
+}
+
+static MIR_type_t target_get_blk_type (c2m_ctx_t c2m_ctx, struct type *arg_type) {
+  return MIR_T_BLK; /* one BLK is enough */
+}
+
+static void target_add_arg_proto (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_var_t) * arg_vars) {
+  MIR_var_t var;
+  MIR_type_t type;
+
+  if (arg_type->mode != TM_STRUCT && arg_type->mode != TM_UNION)
+    type = get_mir_type (c2m_ctx, arg_type);
+  else if (reg_aggregate_p (c2m_ctx, arg_type))
+    type = MIR_T_I64;
+  else
+    type = MIR_T_BLK;
+  var.name = name;
+  var.type = type;
+  if (type == MIR_T_BLK) var.size = type_size (c2m_ctx, arg_type);
+  VARR_PUSH (MIR_var_t, arg_vars, var);
+}
+
+static void target_add_call_arg_op (c2m_ctx_t c2m_ctx, struct type *arg_type,
+                                    target_arg_info_t *arg_info, op_t arg) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  op_t temp;
+
+  if (arg_type->mode != TM_STRUCT && arg_type->mode != TM_UNION) {
+    VARR_PUSH (MIR_op_t, call_ops, arg.mir_op);
+  } else if (reg_aggregate_p (c2m_ctx, arg_type)) {
+    assert (arg.mir_op.mode == MIR_OP_MEM);
+    temp = get_new_temp (c2m_ctx, MIR_T_I64);
+    gen_multiple_load_store (c2m_ctx, arg_type, &temp.mir_op, arg.mir_op, TRUE);
+    VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+  } else {
+    assert (arg.mir_op.mode == MIR_OP_MEM);
+    arg = mem_to_address (c2m_ctx, arg, TRUE);
+    VARR_PUSH (MIR_op_t, call_ops,
+               MIR_new_mem_op (c2m_ctx->ctx, MIR_T_BLK, type_size (c2m_ctx, arg_type),
+                               arg.mir_op.u.reg, 0, 1));
+  }
+}
+
+static int target_gen_gather_arg (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  decl_t param_decl, target_arg_info_t *arg_info) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  MIR_op_t param_op;
+  reg_var_t reg_var;
+
+  if ((arg_type->mode != TM_STRUCT && arg_type->mode != TM_UNION)
+      || !reg_aggregate_p (c2m_ctx, arg_type))
+    return FALSE;
+  assert (!param_decl->reg_p);
+  reg_var = get_reg_var (c2m_ctx, MIR_T_I64, name);
+  param_op = MIR_new_reg_op (ctx, reg_var.reg);
+  gen_multiple_load_store (c2m_ctx, arg_type, &param_op,
+                           MIR_new_mem_op (ctx, MIR_T_UNDEF, param_decl->offset,
+                                           MIR_reg (ctx, FP_NAME, curr_func->u.func), 0, 1),
+                           FALSE);
+  return TRUE;
+}
--- a/mir/c2mir/s390x/cs390x-code.c
+++ b/mir/c2mir/s390x/cs390x-code.c
@ -1,18 +1,22 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include "../mirc.h"
-#include "mirc-s390x-linux.h"
+#include "mirc_s390x_linux.h"

-static const char *standard_includes[] = {mirc, s390x_mirc};
+#include "mirc_s390x_float.h"
+#include "mirc_s390x_limits.h"
+#include "mirc_s390x_stdarg.h"
+#include "mirc_s390x_stdint.h"
+#include "mirc_s390x_stddef.h"

-static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/s390x/"};
+static string_include_t standard_includes[]
+  = {{NULL, mirc}, {NULL, s390x_mirc}, TARGET_STD_INCLUDES};

 #define MAX_ALIGNMENT 16

-#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) \
-  s390x_adjust_var_alignment (c2m_ctx, align, type)
+#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) s390x_adjust_var_alignment (c2m_ctx, align, type)

 static int s390x_adjust_var_alignment (c2m_ctx_t c2m_ctx, int align, struct type *type) {
  return align;
--- a/mir/c2mir/s390x/cs390x.h
+++ b/mir/c2mir/s390x/cs390x.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include <stdint.h>
@ -28,12 +28,17 @@ typedef uint16_t mir_ushort;
 typedef uint32_t mir_uint;
 typedef uint64_t mir_ulong;
 typedef uint64_t mir_ullong;
+typedef uint32_t mir_wchar;
+typedef uint16_t mir_char16;
+typedef uint32_t mir_char32;

 #define MIR_UCHAR_MAX UINT8_MAX
 #define MIR_USHORT_MAX UINT16_MAX
 #define MIR_UINT_MAX UINT32_MAX
 #define MIR_ULONG_MAX UINT64_MAX
 #define MIR_ULLONG_MAX UINT64_MAX
+#define MIR_WCHAR_MIN 0
+#define MIR_WCHAR_MAX UINT32_MAX

 typedef mir_schar mir_char;
 #define MIR_CHAR_MIN MIR_SCHAR_MIN
--- a/mir/c2mir/s390x/mirc_s390x_float.h
+++ b/mir/c2mir/s390x/mirc_s390x_float.h
@ -0,0 +1,60 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 5.2.4.2.2 */
+static char float_str[]
+  = "#ifndef __FLOAT_H\n"
+    "#define __FLOAT_H\n"
+    "\n"
+    "#define FLT_RADIX 2\n"
+    "\n"
+    "#define FLT_MANT_DIG 24\n"
+    "#define DBL_MANT_DIG 53\n"
+    "#define LDBL_MANT_DIG DBL_MANT_DIG\n"
+    "\n"
+    "#define FLT_DECIMAL_DIG 9\n"
+    "#define DBL_DECIMAL_DIG 17\n"
+    "#define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG\n"
+    "#define FLT_DIG FLT_DECIMAL_DIG\n"
+    "#define DBL_DIG DBL_DECIMAL_DIG\n"
+    "#define LDBL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define DECIMAL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define FLT_MIN_EXP -125\n"
+    "#define DBL_MIN_EXP -1021\n"
+    "#define LDBL_MIN_EXP DBL_MIN_EXP\n"
+    "\n"
+    "#define FLT_MIN_10_EXP -37\n"
+    "#define DBL_MIN_10_EXP -307\n"
+    "#define LDBL_MIN_10_EXP DBL_MIN_10_EXP\n"
+    "\n"
+    "#define FLT_MAX_EXP 128\n"
+    "#define DBL_MAX_EXP 1024\n"
+    "#define LDBL_MAX_EXP DBL_MAX_EXP\n"
+    "\n"
+    "#define FLT_MAX_10_EXP 38\n"
+    "#define DBL_MAX_10_EXP 308\n"
+    "#define LDBL_MAX_10_EXP DBL_MAX_10_EXP\n"
+    "\n"
+    "#define FLT_MAX 0x1.fffffep+127\n"
+    "#define DBL_MAX 0x1.fffffffffffffp+1023\n"
+    "#define LDBL_MAX DBL_MAX\n"
+    "\n"
+    "#define FLT_EPSILON 0x1p-23\n"
+    "#define DBL_EPSILON 0x1p-52\n"
+    "#define LDBL_EPSILON DBL_EPSILON\n"
+    "\n"
+    "#define FLT_MIN 0x1p-126\n"
+    "#define DBL_MIN 0x1p-1022\n"
+    "#define LDBL_MIN DBL_MIN\n"
+    "\n"
+    "#define FLT_TRUE_MIN 0x1p-149\n"
+    "#define DBL_TRUE_MIN 0x0.0000000000001p-1022\n"
+    "#define LDBL_TRUE_MIN DBL_TRUE_MIN\n"
+    "\n"
+    "#define FLT_EVAL_METHOD 0\n"
+    "#define FLT_ROUNDS 1 /* round to the nearest */\n"
+    "\n"
+    "#endif /* #ifndef __FLOAT_H */\n";
--- a/mir/c2mir/s390x/mirc_s390x_limits.h
+++ b/mir/c2mir/s390x/mirc_s390x_limits.h
@ -0,0 +1,38 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See 5.2.4.2 */
+static char limits_str[]
+  = "#ifndef __LIMITS_H\n"
+    "#define __LIMITS_H\n"
+    "\n"
+    "#define CHAR_BIT 8\n"
+    "\n"
+    "#define SCHAR_MIN (-SCHAR_MAX - 1)\n"
+    "#define SCHAR_MAX 127\n"
+    "#define UCHAR_MAX (SCHAR_MAX * 2 + 1)\n"
+    "\n"
+    "#define MB_LEN_MAX 1\n"
+    "\n"
+    "#define SHRT_MIN (-SHRT_MAX - 1)\n"
+    "#define SHRT_MAX 32767\n"
+    "#define USHRT_MAX (SHRT_MAX * 2 + 1)\n"
+    "\n"
+    "#define INT_MIN (-INT_MAX - 1)\n"
+    "#define INT_MAX 2147483647\n"
+    "#define UINT_MAX (INT_MAX * 2u + 1u)\n"
+    "\n"
+    "#define LONG_MIN (-LONG_MAX - 1l)\n"
+    "#define LONG_MAX 9223372036854775807l\n"
+    "#define ULONG_MAX (LONG_MAX * 2ul + 1ul)\n"
+    "\n"
+    "#define LLONG_MIN LONG_MIN\n"
+    "#define LLONG_MAX LONG_MAX\n"
+    "#define ULLONG_MAX ULONG_MAX\n"
+    "\n"
+    "/* unsigned char by default */\n"
+    "#define CHAR_MIN 0\n"
+    "#define CHAR_MAX UCHAR_MAX\n"
+    "\n"
+    "#endif /* #ifndef __LIMITS_H */\n";
--- a/mir/c2mir/s390x/mirc_s390x_linux.h
+++ b/mir/c2mir/s390x/mirc_s390x_linux.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 static char s390x_mirc[]
@ -11,7 +11,7 @@ static char s390x_mirc[]
    "#define __ELF__ 1\n"
    "\n"
 #if __SIZEOF_LONG_DOUBLE__ == 16
-    "#define __LONG_DOUBLE_128__ 1\n" // ???
+    "#define __LONG_DOUBLE_128__ 1\n"  // ???
    "#define __SIZEOF_LONG_DOUBLE__ 16\n"
 #else
    "#define __SIZEOF_LONG_DOUBLE__ 8\n"
--- a/mir/c2mir/s390x/mirc_s390x_stdarg.h
+++ b/mir/c2mir/s390x/mirc_s390x_stdarg.h
@ -0,0 +1,25 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+static char stdarg_str[]
+  = "#ifndef __STDARG_H\n"
+    "#define __STDARG_H\n"
+    "\n"
+    "typedef struct {\n"
+    "  long __gpr, __fpr;\n"
+    "  void *__overflow_arg_area;\n"
+    "  void *__reg_save_area;\n"
+    "} va_list[1];\n"
+    "\n"
+    "#define va_start(ap, param) __builtin_va_start (ap)\n"
+    "#define va_arg(ap, type) __builtin_va_arg(ap, (type *) 0)\n"
+    "#define va_end(ap) 0\n"
+    "#define va_copy(dest, src) ((dest) = (src))\n"
+    "\n"
+    "/* For standard headers of a GNU system: */\n"
+    "#ifndef __GNUC_VA_LIST\n"
+    "#define __GNUC_VA_LIST 1\n"
+    "#endif\n"
+    "typedef va_list __gnuc_va_list;\n"
+    "#endif /* #ifndef __STDARG_H */\n";
--- a/mir/c2mir/s390x/mirc_s390x_stddef.h
+++ b/mir/c2mir/s390x/mirc_s390x_stddef.h
@ -0,0 +1,19 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.19 */
+static char stddef_str[]
+  = "#ifndef __STDDEF_H\n"
+    "#define __STDDEF_H\n"
+    "\n"
+    "typedef long ptrdiff_t;\n"
+    "typedef unsigned long size_t;\n"
+    "typedef long double max_align_t;\n"
+    "typedef unsigned int wchar_t;\n"
+    "\n"
+    "#define NULL ((void *) 0)\n"
+    "\n"
+    "#define offsetof(type, member_designator) ((size_t) & ((type *) 0)->member_designator)\n"
+    "\n"
+    "#endif /* #ifndef __STDDEF_H */\n";
--- a/mir/c2mir/s390x/mirc_s390x_stdint.h
+++ b/mir/c2mir/s390x/mirc_s390x_stdint.h
@ -0,0 +1,130 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.20 */
+static char stdint_str[]
+  = "#ifndef _STDINT_H\n"
+    "#define _STDINT_H 1\n"
+    "\n"
+    "#ifndef __int8_t_defined\n"
+    "#define __int8_t_defined\n"
+    "typedef signed char int8_t;\n"
+    "#endif\n"
+    "typedef short int int16_t;\n"
+    "typedef int int32_t;\n"
+    "typedef long int int64_t;\n"
+    "\n"
+    "typedef unsigned char uint8_t;\n"
+    "typedef unsigned short int uint16_t;\n"
+    "typedef unsigned int uint32_t;\n"
+    "typedef unsigned long int uint64_t;\n"
+    "\n"
+    "typedef signed char int_least8_t;\n"
+    "typedef short int int_least16_t;\n"
+    "typedef int int_least32_t;\n"
+    "typedef long int int_least64_t;\n"
+    "\n"
+    "typedef unsigned char uint_least8_t;\n"
+    "typedef unsigned short int uint_least16_t;\n"
+    "typedef unsigned int uint_least32_t;\n"
+    "typedef unsigned long int uint_least64_t;\n"
+    "\n"
+    "typedef signed char int_fast8_t;\n"
+    "typedef long int int_fast16_t;\n"
+    "typedef long int int_fast32_t;\n"
+    "typedef long int int_fast64_t;\n"
+    "\n"
+    "typedef unsigned char uint_fast8_t;\n"
+    "typedef unsigned long int uint_fast16_t;\n"
+    "typedef unsigned long int uint_fast32_t;\n"
+    "typedef unsigned long int uint_fast64_t;\n"
+    "\n"
+    "#define __intptr_t_defined\n"
+    "typedef long int intptr_t;\n"
+    "typedef unsigned long int uintptr_t;\n"
+    "\n"
+    "typedef long int intmax_t;\n"
+    "typedef unsigned long int uintmax_t;\n"
+    "\n"
+    "#define __INT64_C(c) c##L\n"
+    "#define __UINT64_C(c) c##UL\n"
+    "\n"
+    "#define INT8_MIN (-128)\n"
+    "#define INT16_MIN (-32768)\n"
+    "#define INT32_MIN (-2147483648)\n"
+    "#define INT64_MIN (-9223372036854775808l)\n"
+    "\n"
+    "#define INT8_MAX (127)\n"
+    "#define INT16_MAX (32767)\n"
+    "#define INT32_MAX (2147483647)\n"
+    "#define INT64_MAX (9223372036854775807l)\n"
+    "\n"
+    "#define UINT8_MAX (255)\n"
+    "#define UINT16_MAX (65535)\n"
+    "#define UINT32_MAX (4294967295u)\n"
+    "#define UINT64_MAX (18446744073709551615ul)\n"
+    "\n"
+    "#define INT_LEAST8_MIN (-128)\n"
+    "#define INT_LEAST16_MIN (-32768)\n"
+    "#define INT_LEAST32_MIN (-2147483648)\n"
+    "#define INT_LEAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_LEAST8_MAX (127)\n"
+    "#define INT_LEAST16_MAX (32767)\n"
+    "#define INT_LEAST32_MAX (2147483647)\n"
+    "#define INT_LEAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_LEAST8_MAX (255)\n"
+    "#define UINT_LEAST16_MAX (65535)\n"
+    "#define UINT_LEAST32_MAX (4294967295U)\n"
+    "#define UINT_LEAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INT_FAST8_MIN (-128)\n"
+    "#define INT_FAST16_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST32_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_FAST8_MAX (127)\n"
+    "#define INT_FAST16_MAX (9223372036854775807L)\n"
+    "#define INT_FAST32_MAX (9223372036854775807L)\n"
+    "#define INT_FAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_FAST8_MAX (255)\n"
+    "#define UINT_FAST16_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST32_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTPTR_MIN (-9223372036854775808L)\n"
+    "#define INTPTR_MAX (9223372036854775807L)\n"
+    "#define UINTPTR_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTMAX_MIN (-9223372036854775808L)\n"
+    "#define INTMAX_MAX (9223372036854775807L)\n"
+    "#define UINTMAX_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define PTRDIFF_MIN (-9223372036854775808L)\n"
+    "#define PTRDIFF_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define SIZE_MAX (18446744073709551615UL)\n"
+    "\n"
+    "/* For signed wchar_t and wint_t: */\n"
+    "#define WCHAR_MIN INT32_MIN\n"
+    "#define WCHAR_MAX INT32_MAX\n"
+    "#define WINT_MIN WCHAR_MIN\n"
+    "#define WINT_MAX WCHAR_MAX\n"
+    "\n"
+    "#define INT8_C(value) value\n"
+    "#define INT16_C(value) value\n"
+    "#define INT32_C(value) value\n"
+    "#define INT64_C(value) value##L\n"
+    "\n"
+    "#define UINT8_C(value) value\n"
+    "#define UINT16_C(value) value\n"
+    "#define UINT32_C(value) value##U\n"
+    "#define UINT64_C(value) value##UL\n"
+    "\n"
+    "#define INTMAX_C(value) value##L\n"
+    "#define UINTMAX_C(value) value##UL\n"
+    "\n"
+    "#endif /* #ifndef _STDINT_H */\n";
--- a/mir/c2mir/x86_64/cx86_64-ABI-code.c
+++ b/mir/c2mir/x86_64/cx86_64-ABI-code.c
@ -0,0 +1,401 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   x86_64 ABI target specific code.
+*/
+
+/* See https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf.  We use MIR_T_UNDEF for
+   MEMORY. */
+
+enum add_arg_class { NO_CLASS = MIR_T_BOUND + 1, X87UP_CLASS };
+
+#ifndef _WIN32
+#define MAX_QWORDS 2
+#else
+#define MAX_QWORDS 1
+#endif
+
+static MIR_type_t get_result_type (MIR_type_t arg_type1, MIR_type_t arg_type2) {
+  if (arg_type1 == arg_type2) return arg_type1;
+  if ((enum add_arg_class) arg_type1 == NO_CLASS) return arg_type2;
+  if ((enum add_arg_class) arg_type2 == NO_CLASS) return arg_type1;
+
+  if (arg_type1 == MIR_T_UNDEF || arg_type2 == MIR_T_UNDEF) return MIR_T_UNDEF;
+
+  if (arg_type1 == MIR_T_I64 || arg_type1 == MIR_T_I32 || arg_type2 == MIR_T_I64
+      || arg_type2 == MIR_T_I32)
+    return MIR_T_I64;
+
+  if (arg_type1 == MIR_T_LD || arg_type2 == MIR_T_LD
+      || (enum add_arg_class) arg_type1 == X87UP_CLASS
+      || (enum add_arg_class) arg_type2 == X87UP_CLASS)
+    return MIR_T_UNDEF;
+
+  return MIR_T_D;
+}
+
+static int classify_arg (c2m_ctx_t c2m_ctx, struct type *type, MIR_type_t types[MAX_QWORDS],
+                         int bit_field_p) {
+  size_t size = type_size (c2m_ctx, type);
+  int i, n_el_qwords, n_qwords = (size + 7) / 8;
+  MIR_type_t mir_type;
+
+  if (type->mode == TM_STRUCT || type->mode == TM_UNION || type->mode == TM_ARR) {
+    MIR_type_t subtypes[MAX_QWORDS];
+
+    if (n_qwords > MAX_QWORDS) return 0; /* too big aggregate */
+
+#ifndef _WIN32
+    for (i = 0; i < n_qwords; i++) types[i] = (MIR_type_t) NO_CLASS;
+
+    switch (type->mode) {
+    case TM_ARR: { /* Arrays are handled as small records.  */
+      n_el_qwords = classify_arg (c2m_ctx, type->u.arr_type->el_type, subtypes, FALSE);
+      if (n_el_qwords == 0) return 0;
+      /* make full types: */
+      for (i = 0; i < n_qwords; i++)
+        types[i] = get_result_type (types[i], subtypes[i % n_el_qwords]);
+      break;
+    }
+    case TM_STRUCT:
+    case TM_UNION:
+      for (node_t el = NL_HEAD (NL_EL (type->u.tag_type->u.ops, 1)->u.ops); el != NULL;
+           el = NL_NEXT (el))
+        if (el->code == N_MEMBER) {
+          decl_t decl = el->attr;
+          int start_qword = decl->offset / 8;
+
+          if (decl->bit_offset >= 0) {
+            types[start_qword] = get_result_type (MIR_T_I64, types[start_qword]);
+          } else {
+            n_el_qwords
+              = classify_arg (c2m_ctx, decl->decl_spec.type, subtypes, decl->bit_offset >= 0);
+            if (n_el_qwords == 0) return 0;
+            for (i = 0; i < n_el_qwords && (i + start_qword) < n_qwords; i++)
+              types[i + start_qword] = get_result_type (subtypes[i], types[i + start_qword]);
+          }
+        }
+      break;
+    default: assert (FALSE);
+    }
+
+    if (n_qwords > 2) return 0; /* as we don't have vector values (see SSEUP_CLASS) */
+
+    for (i = 0; i < n_qwords; i++) {
+      if (types[i] == MIR_T_UNDEF) return 0; /* pass in memory if a word class is memory.  */
+      if ((enum add_arg_class) types[i] == X87UP_CLASS && (i == 0 || types[i - 1] != MIR_T_LD))
+        return 0;
+    }
+    return n_qwords;
+#else
+    types[0] = MIR_T_I64;
+    return 1;
+#endif
+  }
+
+  assert (scalar_type_p (type));
+  switch (mir_type = get_mir_type (c2m_ctx, type)) {
+  case MIR_T_F:
+  case MIR_T_D: types[0] = MIR_T_D; return 1;
+  case MIR_T_LD:
+    types[0] = MIR_T_LD;
+    types[1] = (MIR_type_t) X87UP_CLASS;
+    return 2;
+  default: types[0] = MIR_T_I64; return 1;
+  }
+}
+
+typedef struct target_arg_info {
+  int n_iregs, n_fregs;
+} target_arg_info_t;
+
+static void target_init_arg_vars (c2m_ctx_t c2m_ctx, target_arg_info_t *arg_info) {
+  arg_info->n_iregs = arg_info->n_fregs = 0;
+}
+
+static void update_last_qword_type (c2m_ctx_t c2m_ctx, struct type *type,
+                                    MIR_type_t qword_types[MAX_QWORDS], int n) {
+  size_t last_size, size = type_size (c2m_ctx, type);
+  MIR_type_t mir_type;
+
+  assert (n != 0);
+  if ((last_size = size % 8) == 0 || n > 1) return;
+  mir_type = qword_types[n - 1];
+  if (last_size <= 4 && mir_type == MIR_T_D) qword_types[n - 1] = MIR_T_F;
+  if (last_size <= 4 && mir_type == MIR_T_I64)
+    qword_types[n - 1] = last_size <= 1 ? MIR_T_I8 : last_size <= 2 ? MIR_T_I16 : MIR_T_I32;
+}
+
+static int process_ret_type (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                             MIR_type_t qword_types[MAX_QWORDS]) {
+  MIR_type_t type;
+  int n, n_iregs, n_fregs, n_stregs, curr;
+  int n_qwords = classify_arg (c2m_ctx, ret_type, qword_types, FALSE);
+
+  if (ret_type->mode != TM_STRUCT && ret_type->mode != TM_UNION) return 0;
+  if (n_qwords != 0) {
+    update_last_qword_type (c2m_ctx, ret_type, qword_types, n_qwords);
+    n_iregs = n_fregs = n_stregs = curr = 0;
+    for (n = 0; n < n_qwords; n++) { /* start from the last qword */
+      type = qword_types[n];
+      qword_types[curr++] = type;
+      switch ((int) type) {
+      case MIR_T_I8:
+      case MIR_T_I16:
+      case MIR_T_I32:
+      case MIR_T_I64: n_iregs++; break;
+      case MIR_T_F:
+      case MIR_T_D: n_fregs++; break;
+      case MIR_T_LD: n_stregs++; break;
+      case X87UP_CLASS:
+        n_qwords--;
+        curr--;
+        break;
+      default: assert (FALSE);
+      }
+    }
+    if (n_iregs > 2 || n_fregs > 2 || n_stregs > 1) n_qwords = 0;
+  }
+  return n_qwords;
+}
+
+static int target_return_by_addr_p (c2m_ctx_t c2m_ctx, struct type *ret_type) {
+  MIR_type_t qword_types[MAX_QWORDS];
+  int n_qwords;
+
+  if (void_type_p (ret_type)) return FALSE;
+  n_qwords = process_ret_type (c2m_ctx, ret_type, qword_types);
+  return n_qwords == 0 && (ret_type->mode == TM_STRUCT || ret_type->mode == TM_UNION);
+}
+
+static void target_add_res_proto (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_type_t) * res_types,
+                                  VARR (MIR_var_t) * arg_vars) {
+  MIR_var_t var;
+  MIR_type_t type;
+  MIR_type_t qword_types[MAX_QWORDS];
+  int n, n_qwords;
+
+  if (void_type_p (ret_type)) return;
+  n_qwords = process_ret_type (c2m_ctx, ret_type, qword_types);
+  if (n_qwords != 0) {
+    for (n = 0; n < n_qwords; n++)
+      VARR_PUSH (MIR_type_t, res_types, promote_mir_int_type (qword_types[n]));
+  } else if (ret_type->mode != TM_STRUCT && ret_type->mode != TM_UNION) {
+    type = get_mir_type (c2m_ctx, ret_type);
+    VARR_PUSH (MIR_type_t, res_types, type);
+  } else { /* return by reference */
+    var.name = RET_ADDR_NAME;
+    var.type = MIR_T_RBLK;
+    var.size = type_size (c2m_ctx, ret_type);
+    VARR_PUSH (MIR_var_t, arg_vars, var);
+    arg_info->n_iregs++;
+  }
+}
+
+static int target_add_call_res_op (c2m_ctx_t c2m_ctx, struct type *ret_type,
+                                   target_arg_info_t *arg_info, size_t call_arg_area_offset) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  MIR_type_t qword_types[MAX_QWORDS];
+  op_t temp;
+  int i, n_qwords;
+
+  if (void_type_p (ret_type)) return -1;
+  n_qwords = process_ret_type (c2m_ctx, ret_type, qword_types);
+  if (n_qwords != 0) {
+    for (i = 0; i < n_qwords; i++) {
+      temp = get_new_temp (c2m_ctx, promote_mir_int_type (qword_types[i]));
+      VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    }
+    return n_qwords;
+  } else if (ret_type->mode == TM_STRUCT || ret_type->mode == TM_UNION) { /* return by reference */
+    arg_info->n_iregs++;
+    temp = get_new_temp (c2m_ctx, MIR_T_I64);
+    emit3 (c2m_ctx, MIR_ADD, temp.mir_op,
+           MIR_new_reg_op (ctx, MIR_reg (ctx, FP_NAME, curr_func->u.func)),
+           MIR_new_int_op (ctx, call_arg_area_offset));
+    temp.mir_op
+      = MIR_new_mem_op (ctx, MIR_T_RBLK, type_size (c2m_ctx, ret_type), temp.mir_op.u.reg, 0, 1);
+    VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    return 0;
+  } else {
+    type = get_mir_type (c2m_ctx, ret_type);
+    type = promote_mir_int_type (type);
+    temp = get_new_temp (c2m_ctx, type);
+    VARR_PUSH (MIR_op_t, call_ops, temp.mir_op);
+    return 1;
+  }
+}
+
+static op_t target_gen_post_call_res_code (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res,
+                                           MIR_insn_t call, size_t call_ops_start) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  MIR_insn_t insn;
+  MIR_type_t qword_types[MAX_QWORDS];
+  int i, n_qwords;
+
+  if (void_type_p (ret_type)) return res;
+  n_qwords = process_ret_type (c2m_ctx, ret_type, qword_types);
+  if (n_qwords != 0) {
+    assert (res.mir_op.mode == MIR_OP_MEM);
+    for (i = 0; i < n_qwords; i++) {
+      type = qword_types[i];
+      insn = MIR_new_insn (ctx, tp_mov (type),
+                           MIR_new_mem_op (ctx, type, res.mir_op.u.mem.disp + 8 * i,
+                                           res.mir_op.u.mem.base, res.mir_op.u.mem.index,
+                                           res.mir_op.u.mem.scale),
+                           VARR_GET (MIR_op_t, call_ops, i + call_ops_start + 2));
+      MIR_append_insn (ctx, curr_func, insn);
+    }
+  }
+  return res;
+}
+
+static void target_add_ret_ops (c2m_ctx_t c2m_ctx, struct type *ret_type, op_t res) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  MIR_type_t qword_types[MAX_QWORDS];
+  MIR_insn_t insn;
+  MIR_reg_t ret_addr_reg;
+  op_t temp, var;
+  int i, size, n_qwords;
+
+  if (void_type_p (ret_type)) return;
+  n_qwords = process_ret_type (c2m_ctx, ret_type, qword_types);
+  if (n_qwords != 0) {
+    for (i = 0; i < n_qwords; i++) {
+      type = qword_types[i];
+      temp = get_new_temp (c2m_ctx, promote_mir_int_type (type));
+      insn = MIR_new_insn (ctx, tp_mov (type), temp.mir_op,
+                           MIR_new_mem_op (ctx, type, res.mir_op.u.mem.disp + 8 * i,
+                                           res.mir_op.u.mem.base, res.mir_op.u.mem.index,
+                                           res.mir_op.u.mem.scale));
+      MIR_append_insn (ctx, curr_func, insn);
+      VARR_PUSH (MIR_op_t, ret_ops, temp.mir_op);
+    }
+  } else if (ret_type->mode != TM_STRUCT && ret_type->mode != TM_UNION) {
+    VARR_PUSH (MIR_op_t, ret_ops, res.mir_op);
+  } else {
+    ret_addr_reg = MIR_reg (ctx, RET_ADDR_NAME, curr_func->u.func);
+    var = new_op (NULL, MIR_new_mem_op (ctx, MIR_T_I8, 0, ret_addr_reg, 0, 1));
+    size = type_size (c2m_ctx, ret_type);
+    block_move (c2m_ctx, var, res, size);
+  }
+}
+
+static int process_aggregate_arg (c2m_ctx_t c2m_ctx, struct type *arg_type,
+                                  target_arg_info_t *arg_info, MIR_type_t qword_types[MAX_QWORDS]) {
+  MIR_type_t type;
+  int n, n_iregs, n_fregs, n_qwords = classify_arg (c2m_ctx, arg_type, qword_types, FALSE);
+
+  if (n_qwords == 0) return 0;
+  if (arg_type->mode != TM_STRUCT && arg_type->mode != TM_UNION) return 0;
+  update_last_qword_type (c2m_ctx, arg_type, qword_types, n_qwords);
+  n_iregs = n_fregs = 0;
+  for (n = 0; n < n_qwords; n++) { /* start from the last qword */
+    switch ((int) (type = qword_types[n])) {
+    case MIR_T_I8:
+    case MIR_T_I16:
+    case MIR_T_I32:
+    case MIR_T_I64: n_iregs++; break;
+    case MIR_T_F:
+    case MIR_T_D: n_fregs++; break;
+    case X87UP_CLASS:
+    case MIR_T_LD: return 0;
+    default: assert (FALSE);
+    }
+  }
+  if (arg_info->n_iregs + n_iregs > 6 || arg_info->n_fregs + n_fregs > 8) return 0;
+  /* aggregate passed by value: update arg_info */
+  arg_info->n_iregs += n_iregs;
+  arg_info->n_fregs += n_fregs;
+  return n_qwords;
+}
+
+static MIR_type_t get_blk_type (int n_qwords, MIR_type_t *qword_types) {
+  int n, n_iregs = 0, n_fregs = 0;
+
+  assert (n_qwords <= 2);
+  if (n_qwords == 0) return MIR_T_BLK;
+  for (n = 0; n < n_qwords; n++) { /* start from the last qword */
+    switch ((int) qword_types[n]) {
+    case MIR_T_I8:
+    case MIR_T_I16:
+    case MIR_T_I32:
+    case MIR_T_I64: n_iregs++; break;
+    case MIR_T_F:
+    case MIR_T_D: n_fregs++; break;
+    case X87UP_CLASS:
+    case MIR_T_LD: return MIR_T_BLK;
+    default: assert (FALSE);
+    }
+  }
+  if (n_iregs == n_qwords) return MIR_T_BLK + 1;
+  if (n_fregs == n_qwords) return MIR_T_BLK + 2;
+  if (qword_types[0] == MIR_T_F || qword_types[0] == MIR_T_D) return MIR_T_BLK + 4;
+  return MIR_T_BLK + 3;
+}
+
+static MIR_type_t target_get_blk_type (c2m_ctx_t c2m_ctx, struct type *arg_type) {
+  MIR_type_t qword_types[MAX_QWORDS];
+  int n_qwords = classify_arg (c2m_ctx, arg_type, qword_types, FALSE);
+  assert (arg_type->mode == TM_STRUCT || arg_type->mode == TM_UNION);
+  return get_blk_type (n_qwords, qword_types);
+}
+
+static void target_add_arg_proto (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  target_arg_info_t *arg_info, VARR (MIR_var_t) * arg_vars) {
+  MIR_var_t var;
+  MIR_type_t type;
+  MIR_type_t qword_types[MAX_QWORDS];
+  int n_qwords = process_aggregate_arg (c2m_ctx, arg_type, arg_info, qword_types);
+
+  /* pass aggregates on the stack and pass by value for others: */
+  var.name = name;
+  if (arg_type->mode != TM_STRUCT && arg_type->mode != TM_UNION) {
+    type = get_mir_type (c2m_ctx, arg_type);
+    var.type = type;
+    if (type == MIR_T_F || type == MIR_T_D)
+      arg_info->n_fregs++;
+    else if (type != MIR_T_LD)
+      arg_info->n_iregs++;
+  } else {
+    var.type = get_blk_type (n_qwords, qword_types);
+    var.size = type_size (c2m_ctx, arg_type);
+  }
+  VARR_PUSH (MIR_var_t, arg_vars, var);
+}
+
+static void target_add_call_arg_op (c2m_ctx_t c2m_ctx, struct type *arg_type,
+                                    target_arg_info_t *arg_info, op_t arg) {
+  gen_ctx_t gen_ctx = c2m_ctx->gen_ctx;
+  MIR_context_t ctx = c2m_ctx->ctx;
+  MIR_type_t type;
+  MIR_type_t qword_types[MAX_QWORDS];
+  int n_qwords = process_aggregate_arg (c2m_ctx, arg_type, arg_info, qword_types);
+
+  /* pass aggregates on the stack and pass by value for others: */
+  if (arg_type->mode != TM_STRUCT && arg_type->mode != TM_UNION) {
+    type = get_mir_type (c2m_ctx, arg_type);
+    VARR_PUSH (MIR_op_t, call_ops, arg.mir_op);
+    if (type == MIR_T_F || type == MIR_T_D)
+      arg_info->n_fregs++;
+    else if (type != MIR_T_LD)
+      arg_info->n_iregs++;
+  } else {
+    assert (arg.mir_op.mode == MIR_OP_MEM);
+    arg = mem_to_address (c2m_ctx, arg, TRUE);
+    type = get_blk_type (n_qwords, qword_types);
+    VARR_PUSH (MIR_op_t, call_ops,
+               MIR_new_mem_op (ctx, type, type_size (c2m_ctx, arg_type), arg.mir_op.u.reg, 0, 1));
+  }
+}
+
+static int target_gen_gather_arg (c2m_ctx_t c2m_ctx, const char *name, struct type *arg_type,
+                                  decl_t param_decl, target_arg_info_t *arg_info) {
+  return FALSE;
+}
--- a/mir/c2mir/x86_64/cx86_64-code.c
+++ b/mir/c2mir/x86_64/cx86_64-code.c
@ -1,13 +1,23 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include "../mirc.h"
-#include "mirc-x86_64-linux.h"

-static const char *standard_includes[] = {mirc, x86_64_mirc};
+#ifndef _WIN32
+#include "mirc_x86_64_linux.h"
+#else
+#include "mirc_x86_64_win.h"
+#endif

-static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/x86-64/"};
+#include "mirc_x86_64_float.h"
+#include "mirc_x86_64_limits.h"
+#include "mirc_x86_64_stdarg.h"
+#include "mirc_x86_64_stdint.h"
+#include "mirc_x86_64_stddef.h"
+
+static string_include_t standard_includes[]
+  = {{NULL, mirc}, {NULL, x86_64_mirc}, TARGET_STD_INCLUDES};

 #define MAX_ALIGNMENT 16

--- a/mir/c2mir/x86_64/cx86_64.h
+++ b/mir/c2mir/x86_64/cx86_64.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #include <stdint.h>
@ -9,7 +9,11 @@
 typedef int8_t mir_schar;
 typedef int16_t mir_short;
 typedef int32_t mir_int;
+#ifdef _WIN32
+typedef int32_t mir_long;
+#else
 typedef int64_t mir_long;
+#endif
 typedef int64_t mir_llong;

 #define MIR_SCHAR_MIN INT8_MIN
@ -18,21 +22,41 @@ typedef int64_t mir_llong;
 #define MIR_SHORT_MAX INT16_MAX
 #define MIR_INT_MIN INT32_MIN
 #define MIR_INT_MAX INT32_MAX
+#ifdef _WIN32
+#define MIR_LONG_MIN INT32_MIN
+#define MIR_LONG_MAX INT32_MAX
+#else
 #define MIR_LONG_MIN INT64_MIN
 #define MIR_LONG_MAX INT64_MAX
+#endif
 #define MIR_LLONG_MIN INT64_MIN
 #define MIR_LLONG_MAX INT64_MAX

 typedef uint8_t mir_uchar;
 typedef uint16_t mir_ushort;
 typedef uint32_t mir_uint;
-typedef uint64_t mir_ulong;
 typedef uint64_t mir_ullong;
+#ifdef _WIN32
+typedef uint32_t mir_ulong;
+typedef uint16_t mir_wchar;
+#else
+typedef uint64_t mir_ulong;
+typedef uint32_t mir_wchar;
+#endif
+typedef uint16_t mir_char16;
+typedef uint32_t mir_char32;

 #define MIR_UCHAR_MAX UINT8_MAX
 #define MIR_USHORT_MAX UINT16_MAX
 #define MIR_UINT_MAX UINT32_MAX
+#ifdef _WIN32
+#define MIR_ULONG_MAX UINT32_MAX
+#define MIR_WCHAR_MAX UINT16_MAX
+#else
 #define MIR_ULONG_MAX UINT64_MAX
+#define MIR_WCHAR_MAX UINT32_MAX
+#endif
+#define MIR_WCHAR_MIN 0
 #define MIR_ULLONG_MAX UINT64_MAX

 typedef mir_schar mir_char;
@ -41,7 +65,11 @@ typedef mir_schar mir_char;

 typedef float mir_float;
 typedef double mir_double;
+#ifdef _WIN32
+typedef double mir_ldouble;
+#else
 typedef long double mir_ldouble;
+#endif

 typedef uint8_t mir_bool;
 typedef int64_t mir_ptrdiff_t;
--- a/mir/c2mir/x86_64/mirc_x86_64_float.h
+++ b/mir/c2mir/x86_64/mirc_x86_64_float.h
@ -0,0 +1,60 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2019-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 5.2.4.2.2 */
+static char float_str[]
+  = "#ifndef __FLOAT_H\n"
+    "#define __FLOAT_H\n"
+    "\n"
+    "#define FLT_RADIX 2\n"
+    "\n"
+    "#define FLT_MANT_DIG 24\n"
+    "#define DBL_MANT_DIG 53\n"
+    "#define LDBL_MANT_DIG DBL_MANT_DIG\n"
+    "\n"
+    "#define FLT_DECIMAL_DIG 9\n"
+    "#define DBL_DECIMAL_DIG 17\n"
+    "#define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG\n"
+    "#define FLT_DIG FLT_DECIMAL_DIG\n"
+    "#define DBL_DIG DBL_DECIMAL_DIG\n"
+    "#define LDBL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define DECIMAL_DIG LDBL_DECIMAL_DIG\n"
+    "\n"
+    "#define FLT_MIN_EXP -125\n"
+    "#define DBL_MIN_EXP -1021\n"
+    "#define LDBL_MIN_EXP DBL_MIN_EXP\n"
+    "\n"
+    "#define FLT_MIN_10_EXP -37\n"
+    "#define DBL_MIN_10_EXP -307\n"
+    "#define LDBL_MIN_10_EXP DBL_MIN_10_EXP\n"
+    "\n"
+    "#define FLT_MAX_EXP 128\n"
+    "#define DBL_MAX_EXP 1024\n"
+    "#define LDBL_MAX_EXP DBL_MAX_EXP\n"
+    "\n"
+    "#define FLT_MAX_10_EXP 38\n"
+    "#define DBL_MAX_10_EXP 308\n"
+    "#define LDBL_MAX_10_EXP DBL_MAX_10_EXP\n"
+    "\n"
+    "#define FLT_MAX 0x1.fffffep+127\n"
+    "#define DBL_MAX 0x1.fffffffffffffp+1023\n"
+    "#define LDBL_MAX DBL_MAX\n"
+    "\n"
+    "#define FLT_EPSILON 0x1p-23\n"
+    "#define DBL_EPSILON 0x1p-52\n"
+    "#define LDBL_EPSILON DBL_EPSILON\n"
+    "\n"
+    "#define FLT_MIN 0x1p-126\n"
+    "#define DBL_MIN 0x1p-1022\n"
+    "#define LDBL_MIN DBL_MIN\n"
+    "\n"
+    "#define FLT_TRUE_MIN 0x1p-149\n"
+    "#define DBL_TRUE_MIN 0x0.0000000000001p-1022\n"
+    "#define LDBL_TRUE_MIN DBL_TRUE_MIN\n"
+    "\n"
+    "#define FLT_EVAL_METHOD 0\n"
+    "#define FLT_ROUNDS 1 /* round to the nearest */\n"
+    "\n"
+    "#endif /* #ifndef __FLOAT_H */\n";
--- a/mir/c2mir/x86_64/mirc_x86_64_limits.h
+++ b/mir/c2mir/x86_64/mirc_x86_64_limits.h
@ -0,0 +1,48 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2019-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See 5.2.4.2 */
+static char limits_str[]
+  = "#ifndef __LIMITS_H\n"
+    "#define __LIMITS_H\n"
+    "\n"
+    "#define CHAR_BIT 8\n"
+    "\n"
+    "#define SCHAR_MIN (-SCHAR_MAX - 1)\n"
+    "#define SCHAR_MAX 127\n"
+    "#define UCHAR_MAX (SCHAR_MAX * 2 + 1)\n"
+    "\n"
+    "#define MB_LEN_MAX 1\n"
+    "\n"
+    "#define SHRT_MIN (-SHRT_MAX - 1)\n"
+    "#define SHRT_MAX 32767\n"
+    "#define USHRT_MAX (SHRT_MAX * 2 + 1)\n"
+    "\n"
+    "#define INT_MIN (-INT_MAX - 1)\n"
+    "#define INT_MAX 2147483647\n"
+    "#define UINT_MAX (INT_MAX * 2u + 1u)\n"
+    "\n"
+#if defined(_WIN32)
+    "#define LONG_MIN (-LONG_MAX - 1l)\n"
+    "#define LONG_MAX 2147483647l\n"
+    "#define ULONG_MAX (LONG_MAX * 2ul + 1ul)\n"
+    "\n"
+    "#define LLONG_MIN (-LLONG_MAX - 1ll)\n"
+    "#define LLONG_MAX 9223372036854775807ll\n"
+    "#define ULLONG_MAX (LLONG_MAX * 2ull + 1ull)\n"
+#else
+    "#define LONG_MIN (-LONG_MAX - 1l)\n"
+    "#define LONG_MAX 9223372036854775807l\n"
+    "#define ULONG_MAX (LONG_MAX * 2ul + 1ul)\n"
+    "\n"
+    "#define LLONG_MIN (-LLONG_MAX - 1ll)\n"
+    "#define LLONG_MAX 9223372036854775807ll\n"
+    "#define ULLONG_MAX (LLONG_MAX * 2ull + 1ull)\n"
+#endif
+    "\n"
+    "/* signed char by default */\n"
+    "#define CHAR_MIN SCHAR_MIN\n"
+    "#define CHAR_MAX SCHAR_MAX\n"
+    "\n"
+    "#endif /* #ifndef __LIMITS_H */\n";
--- a/mir/c2mir/x86_64/mirc_x86_64_linux.h
+++ b/mir/c2mir/x86_64/mirc_x86_64_linux.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2019-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2019-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 static char x86_64_mirc[]
@ -92,6 +92,7 @@ static char x86_64_mirc[]
    "#define __unix__ 1\n"
 #elif defined(__APPLE__)
    "#define __APPLE__ 1\n"
+    "#define __DARWIN_OS_INLINE static inline\n"
    "typedef struct {\n"
    "  unsigned int gp_offset;\n"
    "  unsigned int fp_offset;\n"
--- a/mir/c2mir/x86_64/mirc_x86_64_stdarg.h
+++ b/mir/c2mir/x86_64/mirc_x86_64_stdarg.h
@ -0,0 +1,41 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2019-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.16 and https://www.uclibc.org/docs/psABI-x86_64.pdf */
+static char stdarg_str[]
+  = "#ifndef __STDARG_H\n"
+    "#define __STDARG_H\n"
+    "\n"
+#if defined(__APPLE__)
+    "typedef __darwin_va_list va_list;\n"
+#elif defined(__WIN32)
+    "typedef char *va_list;\n"
+#else
+    "typedef struct {\n"
+    "  unsigned int gp_offset;\n"
+    "  unsigned int fp_offset;\n"
+    "  void *overflow_arg_area;\n"
+    "  void *reg_save_area;\n"
+    "} va_list[1];\n"
+#endif
+    "\n"
+#if defined(__WIN32)
+    "#define va_start(ap, param) __va_start (ap, param)\n"
+#else
+    "#define va_start(ap, param) __builtin_va_start (ap)\n"
+#endif
+    "#define va_arg(ap, type) __builtin_va_arg(ap, (type *) 0)\n"
+    "#define va_end(ap) 0\n"
+#if defined(__APPLE__) || defined(__WIN32)
+    "#define va_copy(dest, src) ((dest) = (src))\n"
+#else
+    "#define va_copy(dest, src) ((dest)[0] = (src)[0])\n"
+#endif
+    "\n"
+    "/* For standard headers of a GNU system: */\n"
+    "#ifndef __GNUC_VA_LIST\n"
+    "#define __GNUC_VA_LIST 1\n"
+    "#endif\n"
+    "typedef va_list __gnuc_va_list;\n"
+    "#endif /* #ifndef __STDARG_H */\n";
--- a/mir/c2mir/x86_64/mirc_x86_64_stddef.h
+++ b/mir/c2mir/x86_64/mirc_x86_64_stddef.h
@ -0,0 +1,32 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2019-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.19 */
+static char stddef_str[]
+  = "#ifndef __STDDEF_H\n"
+    "#define __STDDEF_H\n"
+    "\n"
+#ifdef _WIN32
+    "typedef long long int ptrdiff_t;\n"
+    "typedef unsigned long long size_t;\n"
+#else
+    "typedef long ptrdiff_t;\n"
+    "typedef unsigned long size_t;\n"
+#endif
+    "typedef long double max_align_t;\n"
+#if defined(__APPLE__)
+    "typedef int wchar_t;\n"
+#elif defined(_WIN32)
+    "typedef unsigned short wchar_t;\n"
+#else
+    "typedef unsigned int wchar_t;\n"
+#endif
+    "\n"
+#if !defined(__APPLE__) && !defined(_WIN32)
+    "#define NULL ((void *) 0)\n"
+#endif
+    "\n"
+    "#define offsetof(type, member_designator) ((size_t) & ((type *) 0)->member_designator)\n"
+    "\n"
+    "#endif /* #ifndef __STDDEF_H */\n";
--- a/mir/c2mir/x86_64/mirc_x86_64_stdint.h
+++ b/mir/c2mir/x86_64/mirc_x86_64_stdint.h
@ -0,0 +1,175 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2019-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+/* See C11 7.20 */
+static char stdint_str[]
+  = "#ifndef _STDINT_H\n"
+    "#define _STDINT_H 1\n"
+    "\n"
+    "typedef signed char int8_t;\n"
+    "typedef short int int16_t;\n"
+    "typedef int int32_t;\n"
+#if defined(__APPLE__) || defined(_WIN32)
+    "typedef long long int int64_t;\n"
+#else
+    "typedef long int int64_t;\n"
+#endif
+    "\n"
+    "typedef unsigned char uint8_t;\n"
+    "typedef unsigned short int uint16_t;\n"
+    "typedef unsigned int uint32_t;\n"
+#if defined(__APPLE__) || defined(_WIN32)
+    "typedef unsigned long long int uint64_t;\n"
+#else
+    "typedef unsigned long int uint64_t;\n"
+#endif
+    "\n"
+    "typedef signed char int_least8_t;\n"
+    "typedef short int int_least16_t;\n"
+    "typedef int int_least32_t;\n"
+#if defined(_WIN32)
+    "typedef long long int int_least64_t;\n"
+#else
+    "typedef long int int_least64_t;\n"
+#endif
+    "\n"
+    "typedef unsigned char uint_least8_t;\n"
+    "typedef unsigned short int uint_least16_t;\n"
+    "typedef unsigned int uint_least32_t;\n"
+#if defined(_WIN32)
+    "typedef unsigned long long int uint_least64_t;\n"
+#else
+    "typedef unsigned long int uint_least64_t;\n"
+#endif
+    "\n"
+    "typedef signed char int_fast8_t;\n"
+    "typedef long int int_fast16_t;\n"
+    "typedef long int int_fast32_t;\n"
+#if defined(_WIN32)
+    "typedef long long int int_fast64_t;\n"
+#else
+    "typedef long int int_fast64_t;\n"
+#endif
+    "\n"
+    "typedef unsigned char uint_fast8_t;\n"
+    "typedef unsigned long int uint_fast16_t;\n"
+    "typedef unsigned long int uint_fast32_t;\n"
+#if defined(_WIN32)
+    "typedef unsigned long long int uint_fast64_t;\n"
+#else
+    "typedef unsigned long int uint_fast64_t;\n"
+#endif
+    "\n"
+    "#define __intptr_t_defined\n"
+#if defined(_WIN32)
+    "typedef long long int intptr_t;\n"
+    "typedef unsigned long long int uintptr_t;\n"
+    "\n"
+    "typedef long long int intmax_t;\n"
+    "typedef unsigned long long int uintmax_t;\n"
+    "\n"
+    "#define __INT64_C(c) c##LL\n"
+    "#define __UINT64_C(c) c##ULL\n"
+#else
+    "typedef long int intptr_t;\n"
+    "typedef unsigned long int uintptr_t;\n"
+    "\n"
+    "typedef long int intmax_t;\n"
+    "typedef unsigned long int uintmax_t;\n"
+    "\n"
+    "#define __INT64_C(c) c##L\n"
+    "#define __UINT64_C(c) c##UL\n"
+#endif
+    "\n"
+    "#define INT8_MIN (-128)\n"
+    "#define INT16_MIN (-32768)\n"
+    "#define INT32_MIN (-2147483648)\n"
+    "#define INT64_MIN (-9223372036854775808l)\n"
+    "\n"
+    "#define INT8_MAX (127)\n"
+    "#define INT16_MAX (32767)\n"
+    "#define INT32_MAX (2147483647)\n"
+    "#define INT64_MAX (9223372036854775807l)\n"
+    "\n"
+    "#define UINT8_MAX (255)\n"
+    "#define UINT16_MAX (65535)\n"
+    "#define UINT32_MAX (4294967295u)\n"
+    "#define UINT64_MAX (18446744073709551615ul)\n"
+    "\n"
+    "#define INT_LEAST8_MIN (-128)\n"
+    "#define INT_LEAST16_MIN (-32768)\n"
+    "#define INT_LEAST32_MIN (-2147483648)\n"
+    "#define INT_LEAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_LEAST8_MAX (127)\n"
+    "#define INT_LEAST16_MAX (32767)\n"
+    "#define INT_LEAST32_MAX (2147483647)\n"
+    "#define INT_LEAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_LEAST8_MAX (255)\n"
+    "#define UINT_LEAST16_MAX (65535)\n"
+    "#define UINT_LEAST32_MAX (4294967295U)\n"
+    "#define UINT_LEAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INT_FAST8_MIN (-128)\n"
+    "#define INT_FAST16_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST32_MIN (-9223372036854775808L)\n"
+    "#define INT_FAST64_MIN (-9223372036854775808L)\n"
+    "\n"
+    "#define INT_FAST8_MAX (127)\n"
+    "#define INT_FAST16_MAX (9223372036854775807L)\n"
+    "#define INT_FAST32_MAX (9223372036854775807L)\n"
+    "#define INT_FAST64_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define UINT_FAST8_MAX (255)\n"
+    "#define UINT_FAST16_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST32_MAX (18446744073709551615UL)\n"
+    "#define UINT_FAST64_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTPTR_MIN (-9223372036854775808L)\n"
+    "#define INTPTR_MAX (9223372036854775807L)\n"
+    "#define UINTPTR_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define INTMAX_MIN (-9223372036854775808L)\n"
+    "#define INTMAX_MAX (9223372036854775807L)\n"
+    "#define UINTMAX_MAX (18446744073709551615UL)\n"
+    "\n"
+    "#define PTRDIFF_MIN (-9223372036854775808L)\n"
+    "#define PTRDIFF_MAX (9223372036854775807L)\n"
+    "\n"
+    "#define SIZE_MAX (18446744073709551615UL)\n"
+    "\n"
+    "/* For signed wchar_t and wint_t: */\n"
+    "#define WCHAR_MIN INT32_MIN\n"
+    "#define WCHAR_MAX INT32_MAX\n"
+    "#define WINT_MIN WCHAR_MIN\n"
+    "#define WINT_MAX WCHAR_MAX\n"
+    "\n"
+    "#define INT8_C(value) value\n"
+    "#define INT16_C(value) value\n"
+    "#define INT32_C(value) value\n"
+#if defined(_WIN32)
+    "#define INT64_C(value) value##LL\n"
+#else
+    "#define INT64_C(value) value##L\n"
+#endif
+    "\n"
+    "#define UINT8_C(value) value\n"
+    "#define UINT16_C(value) value\n"
+    "#define UINT32_C(value) value##U\n"
+#if defined(_WIN32)
+    "#define UINT64_C(value) value##ULL\n"
+#else
+    "#define UINT64_C(value) value##UL\n"
+#endif
+    "\n"
+#if defined(_WIN32)
+    "#define INTMAX_C(value) value##L\n"
+    "#define UINTMAX_C(value) value##UL\n"
+#else
+    "#define INTMAX_C(value) value##LL\n"
+    "#define UINTMAX_C(value) value##ULL\n"
+#endif
+    "\n"
+    "#endif /* #ifndef _STDINT_H */\n";
--- a/mir/c2mir/x86_64/mirc_x86_64_win.h
+++ b/mir/c2mir/x86_64/mirc_x86_64_win.h
@ -0,0 +1,116 @@
+/* This file is a part of MIR project.
+   Copyright (C) 2019-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+*/
+
+static char x86_64_mirc[]
+  = "#define __amd64 1\n"
+    "#define __amd64__ 1\n"
+    "#define __x86_64 1\n"
+    "#define __x86_64__ 1\n"
+    "#define _M_AMD64 1\n"
+    "#define _M_X64 1\n"
+    "\n"
+    "#define __SIZEOF_DOUBLE__ 8\n"
+    "#define __SIZEOF_FLOAT__ 4\n"
+    "#define __SIZEOF_INT__ 4\n"
+#if !defined(_WIN32) && __SIZEOF_LONG_DOUBLE__ == 16
+    "#define __SIZEOF_LONG_DOUBLE__ 16\n"
+#else
+    "#define __SIZEOF_LONG_DOUBLE__ 8\n"
+#endif
+    "#define __SIZEOF_LONG_LONG__ 8\n"
+    "#define __SIZEOF_LONG__ 4\n"
+    "#define __SIZEOF_POINTER__ 8\n"
+    "#define __SIZEOF_PTRDIFF_T__ 8\n"
+    "#define __SIZEOF_SHORT__ 2\n"
+    "#define __SIZEOF_SIZE_T__ 8\n"
+    "\n"
+    "#define __BYTE_ORDER__ 1234\n"
+    "#define __ORDER_LITTLE_ENDIAN__ 1234\n"
+    "#define __ORDER_BIG_ENDIAN__ 4321\n"
+    "\n"
+    "/* Some type macros: */\n"
+    "#define __SIZE_TYPE__ long long unsigned int\n"
+    "#define __PTRDIFF_TYPE__ long long int\n"
+    "#define __INTMAX_TYPE__ long long int\n"
+    "#define __UINTMAX_TYPE__ long long unsigned int\n"
+    "#define __INT8_TYPE__ signed char\n"
+    "#define __INT16_TYPE__ short\n"
+    "#define __INT32_TYPE__ int\n"
+    "#define __INT64_TYPE__ long long int\n"
+    "#define __UINT8_TYPE__ unsigned char\n"
+    "#define __UINT16_TYPE__ unsigned short\n"
+    "#define __UINT32_TYPE__ unsigned int\n"
+    "#define __UINT64_TYPE__ long long unsigned int\n"
+    "#define __INTPTR_TYPE__ long long int\n"
+    "#define __UINTPTR_TYPE__ long long unsigned int\n"
+    "\n"
+    "#define __int8 __INT8_TYPE__\n"
+    "#define __int16 __INT16_TYPE__\n"
+    "#define __int32 __INT32_TYPE__\n"
+    "#define __int64 __INT64_TYPE__\n"
+    "\n"
+    "#define __ptr32\n"
+    "#define __ptr64\n"
+    "#define __forceinline inline\n"
+    "#define __cdecl\n"
+    "#define __pragma(p)\n"
+    "#define __declspec(attr)\n"
+    "#define __unaligned\n"
+    "\n"
+    "#define __CHAR_BIT__ 8\n"
+    "#define __INT8_MAX__ 127\n"
+    "#define __INT16_MAX__ 32767\n"
+    "#define __INT32_MAX__ 2147483647\n"
+    "#define __INT64_MAX__ 9223372036854775807LL\n"
+    "#define __UINT8_MAX__ (__INT8_MAX__ * 2u + 1u)\n"
+    "#define __UINT16_MAX__ (__INT16_MAX__ * 2u + 1u)\n"
+    "#define __UINT32_MAX__ (__INT32_MAX__ * 2u + 1u)\n"
+    "#define __UINT64_MAX__ (__INT64_MAX__ * 2u + 1u)\n"
+    "#define __SCHAR_MAX__ __INT8_MAX__\n"
+    "#define __SHRT_MAX__ __INT16_MAX__\n"
+    "#define __INT_MAX__ __INT32_MAX__\n"
+    "#define __LONG_MAX__ __INT32_MAX__\n"
+    "#define __LONG_LONG_MAX__ __INT64_MAX__\n"
+    "#define __SIZE_MAX__ __UINT64_MAX__\n"
+    "#define __PTRDIFF_MAX__ __INT64_MAX__\n"
+    "#define __INTMAX_MAX__ __INT64_MAX__\n"
+    "#define __UINTMAX_MAX__ __UINT64_MAX__\n"
+    "#define __INTPTR_MAX__ __INT64_MAX__\n"
+    "#define __UINTPTR_MAX__ __UINT64_MAX__\n"
+    "\n"
+    "#define __FLT_MIN_EXP__ (-125)\n"
+    "#define __FLT_MAX_EXP__ 128\n"
+    "#define __FLT_DIG__ 6\n"
+    "#define __FLT_DECIMAL_DIG__ 9\n"
+    "#define __FLT_MANT_DIG__ 24\n"
+    "#define __FLT_MIN__ 1.17549435082228750796873653722224568e-38F\n"
+    "#define __FLT_MAX__ 3.40282346638528859811704183484516925e+38F\n"
+    "#define __FLT_EPSILON__ 1.19209289550781250000000000000000000e-7F\n"
+    "\n"
+    "#define __DBL_MIN_EXP__ (-1021)\n"
+    "#define __DBL_MAX_EXP__ 1024\n"
+    "#define __DBL_DIG__ 15\n"
+    "#define __DBL_DECIMAL_DIG__ 17\n"
+    "#define __DBL_MANT_DIG__ 53\n"
+    "#define __DBL_MAX__ ((double) 1.79769313486231570814527423731704357e+308L)\n"
+    "#define __DBL_MIN__ ((double) 2.22507385850720138309023271733240406e-308L)\n"
+    "#define __DBL_EPSILON__ ((double) 2.22044604925031308084726333618164062e-16L)\n"
+    "\n"
+    "typedef unsigned short char16_t;\n"
+    "typedef unsigned int char32_t;\n"
+    "\n"
+    "#define WIN32 1\n"
+    "#define _WIN32 1\n"
+    "#define __WIN32 1\n"
+    "#define __WIN32__ 1\n"
+    "#define WIN64 1\n"
+    "#define _WIN64 1\n"
+    "#define __WIN64 1\n"
+    "#define __WIN64__ 1\n"
+    "#define WINNT 1\n"
+    "#define __WINNT 1\n"
+    "#define __WINNT__ 1\n"
+    "#define __MSVCRT__ 1\n"
+    "\n"
+    "void *alloca (long long unsigned);\n";
--- a/mir/mir-aarch64.c
+++ b/mir/mir-aarch64.c
@ -1,10 +1,14 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #define VA_LIST_IS_ARRAY_P 0

-// _MIR_get_thunk, _MIR_redirect_thunk, _MIR_get_interp_shim, _MIR_get_ff_call, _MIR_get_wrapper
+/* Any small BLK type (less or equal to two quadwords) args are passed in
+   *fully* regs or on stack (w/o address), otherwise it is put
+   somehwere on stack and its address passed instead. First RBLK arg
+   is passed in r8. Other RBLK independently of size is always passed
+   by address as an usual argument.  */

 void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
  static const uint32_t bstart_code[] = {
@ -60,6 +64,30 @@ void *va_arg_builtin (void *p, uint64_t t) {
  return a;
 }

+void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t ncase) {
+  struct aarch64_va_list *va = p;
+  void *a;
+  long size = (s + 7) / 8 * 8;
+
+  if (size <= 2 * 8 && va->__gr_offs + size > 0) { /* not enough regs to pass: */
+    a = va->__stack;
+    va->__stack = (char *) va->__stack + size;
+    va->__gr_offs += size;
+    memcpy (res, a, s);
+    return;
+  }
+  if (size > 2 * 8) size = 8;
+  if (va->__gr_offs < 0) {
+    a = (char *) va->__gr_top + va->__gr_offs;
+    va->__gr_offs += size;
+  } else {
+    a = va->__stack;
+    va->__stack = (char *) va->__stack + size;
+  }
+  if (s > 2 * 8) a = *(void **) a; /* address */
+  memcpy (res, a, s);
+}
+
 void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
  struct aarch64_va_list *va = p;
  va_list *vap = a;
@ -70,7 +98,7 @@ void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {

 void va_end_interp_builtin (MIR_context_t ctx, void *p) {}

-static int setup_imm64_insns (MIR_context_t ctx, uint32_t *to, int reg, uint64_t imm64) {
+static int setup_imm64_insns (uint32_t *to, int reg, uint64_t imm64) {
  /* xd=imm64 */
  static const uint32_t imm64_pat[] = {
    0xd2800000, /*  0: mov xd, xxxx(0-15) */
@ -88,18 +116,19 @@ static int setup_imm64_insns (MIR_context_t ctx, uint32_t *to, int reg, uint64_t
  return sizeof (imm64_pat) / sizeof (uint32_t);
 }

-static void push_insns (MIR_context_t ctx, const uint32_t *pat, size_t pat_len) {
+static uint8_t *push_insns (VARR (uint8_t) * insn_varr, const uint32_t *pat, size_t pat_len) {
  uint8_t *p = (uint8_t *) pat;

-  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
+  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, insn_varr, p[i]);
+  return VARR_ADDR (uint8_t, insn_varr) + VARR_LENGTH (uint8_t, insn_varr) - pat_len;
 }

-static size_t gen_mov_addr (MIR_context_t ctx, int reg, void *addr) {
+static size_t gen_mov_addr (VARR (uint8_t) * insn_varr, int reg, void *addr) {
  uint32_t insns[4];
-  int insns_num = setup_imm64_insns (ctx, insns, reg, (uint64_t) addr);
+  int insns_num = setup_imm64_insns (insns, reg, (uint64_t) addr);

  mir_assert (insns_num == 4 && sizeof (insns) == insns_num * sizeof (uint32_t));
-  push_insns (ctx, insns, insns_num * sizeof (uint32_t));
+  push_insns (insn_varr, insns, insns_num * sizeof (uint32_t));
  return insns_num * sizeof (uint32_t);
 }

@ -107,7 +136,8 @@ static size_t gen_mov_addr (MIR_context_t ctx, int reg, void *addr) {
 #define MAX_BR_OFFSET (1 << (BR_OFFSET_BITS - 1)) /* 1 for sign */
 #define BR_OFFSET_MASK (~(-1 << BR_OFFSET_BITS))

-static void gen_call_addr (MIR_context_t ctx, void *base_addr, int temp_reg, void *call_addr) {
+static void gen_call_addr (VARR (uint8_t) * insn_varr, void *base_addr, int temp_reg,
+                           void *call_addr) {
  static const uint32_t call_pat1 = 0x94000000; /* bl x */
  static const uint32_t call_pat2 = 0xd63f0000; /* blr x */
  uint32_t insn;
@ -117,10 +147,10 @@ static void gen_call_addr (MIR_context_t ctx, void *base_addr, int temp_reg, voi
  if (base_addr != NULL && -(int64_t) MAX_BR_OFFSET <= offset && offset < (int64_t) MAX_BR_OFFSET) {
    insn = call_pat1 | ((uint32_t) offset & BR_OFFSET_MASK);
  } else {
-    gen_mov_addr (ctx, temp_reg, call_addr);
+    gen_mov_addr (insn_varr, temp_reg, call_addr);
    insn = call_pat2 | (temp_reg << 5);
  }
-  push_insns (ctx, &insn, sizeof (insn));
+  push_insns (insn_varr, &insn, sizeof (insn));
 }

 #define NOP 0xd503201f
@ -142,7 +172,7 @@ void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
    code[0] = branch_pat2 | ((uint32_t) offset & BR_OFFSET_MASK);
    _MIR_change_code (ctx, thunk, (uint8_t *) &code[0], sizeof (code[0]));
  } else {
-    int n = setup_imm64_insns (ctx, code, 9, (uint64_t) to);
+    int n = setup_imm64_insns (code, 9, (uint64_t) to);

    mir_assert (n == 4);
    code[4] = branch_pat1;
@ -150,23 +180,55 @@ void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
  }
 }

-/* save r0-r7, v0-v7 */
+static void gen_blk_mov (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t addr_offset,
+                         uint32_t qwords, uint32_t addr_reg) {
+  static const uint32_t blk_mov_pat[] = {
+    /* 0:*/ 0xf940026c,  /* ldr x12, [x19,<addr_offset>]*/
+    /* 4:*/ 0x910003e0,  /* add <addr_reg>, sp, <offset>*/
+    /* 8:*/ 0xd280000b,  /* mov x11, 0*/
+    /* c:*/ 0xd280000e,  /* mov x14, <qwords>*/
+    /* 10:*/ 0xf86c696a, /* ldr x10, [x11,x12]*/
+    /* 14:*/ 0xd10005ce, /* sub x14, x14, #0x1*/
+    /* 18:*/ 0xf820696a, /* str x10, [x11,<addr_reg>x13]*/
+    /* 1c:*/ 0xf10001df, /* cmp x14, 0*/
+    /* 20:*/ 0x9100216b, /* add x11, x11, 8*/
+    /* 24:*/ 0x54ffff61, /* b.ne 10 */
+  };
+  if (qwords == 0) {
+    uint32_t pat = 0x910003e0 | addr_reg | (offset << 10); /* add <add_reg>, sp, <offset>*/
+    push_insns (insn_varr, &pat, sizeof (pat));
+  } else {
+    uint32_t *addr = (uint32_t *) push_insns (insn_varr, blk_mov_pat, sizeof (blk_mov_pat));
+    mir_assert (offset < (1 << 12) && addr_offset % 8 == 0 && (addr_offset >> 3) < (1 << 12));
+    mir_assert (addr_reg < 32 && qwords < (1 << 16));
+    addr[0] |= (addr_offset >> 3) << 10;
+    addr[1] |= addr_reg | (offset << 10);
+    addr[3] |= qwords << 5;
+    addr[6] |= addr_reg << 16;
+  }
+}
+
 static const uint32_t save_insns[] = {
+  /* save r0-r8,v0-v7 */
  0xa9bf1fe6, /* stp R6, R7, [SP, #-16]! */
  0xa9bf17e4, /* stp R4, R5, [SP, #-16]! */
  0xa9bf0fe2, /* stp R2, R3, [SP, #-16]! */
  0xa9bf07e0, /* stp R0, R1, [SP, #-16]! */
+  0xd10043ff, /* sub SP, SP, #16 */
+  0xf90007e8, /* str x8, [SP, #8] */
  0xadbf1fe6, /* stp Q6, Q7, [SP, #-32]! */
  0xadbf17e4, /* stp Q4, Q5, [SP, #-32]! */
  0xadbf0fe2, /* stp Q2, Q3, [SP, #-32]! */
  0xadbf07e0, /* stp Q0, Q1, [SP, #-32]! */
 };
-
 static const uint32_t restore_insns[] = {
+  /* restore r0-r8,v0-v7 */
  0xacc107e0, /* ldp Q0, Q1, SP, #32 */
  0xacc10fe2, /* ldp Q2, Q3, SP, #32 */
  0xacc117e4, /* ldp Q4, Q5, SP, #32 */
  0xacc11fe6, /* ldp Q6, Q7, SP, #32 */
+  0xf94007e8, /* ldr x8, [SP, #8] */
+  0x910043ff, /* add SP, SP, #16 */
  0xa8c107e0, /* ldp R0, R1, SP, #16 */
  0xa8c10fe2, /* ldp R2, R3, SP, #16 */
  0xa8c117e4, /* ldp R4, R5, SP, #16 */
@ -180,12 +242,14 @@ static const uint32_t ldld_pat = 0x3dc00260; /* ldr q, [x19], offset */

 /* Generation: fun (fun_addr, res_arg_addresses):
   push x19, x30; sp-=sp_offset; x9=fun_addr; x19=res/arg_addrs
-   x8=mem[x19,<offset>]; (arg_reg=mem[x8] or x8=mem[x8];mem[sp,sp_offset]=x8) ...
+   x10=mem[x19,<offset>]; (arg_reg=mem[x10](or addr of blk copy on the stack)
+                          or x10=mem[x10] or x13=addr of blk copy on the stack;
+                             mem[sp,sp_offset]=x10|x13) ...
   call fun_addr; sp+=offset
-   x8=mem[x19,<offset>]; res_reg=mem[x8]; ...
+   x10=mem[x19,<offset>]; res_reg=mem[x10]; ...
   pop x19, x30; ret x30. */
 void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                        MIR_type_t *arg_types, int vararg_p) {
+                        _MIR_arg_desc_t *arg_descs, int vararg_p) {
  static const uint32_t prolog[] = {
    0xa9bf7bf3, /* stp x19,x30,[sp, -16]! */
    0xd10003ff, /* sub sp,sp,<sp_offset> */
@ -200,93 +264,154 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
    0xa8c17bf3, /* ldp x19,x30,[sp],16 */
    0xd65f03c0, /* ret x30 */
  };
-  static const uint32_t st_pat = 0xf9000000;   /* str x, [xn|sp], offset */
-  static const uint32_t sts_pat = 0xbd000000;  /* str s, [xn|sp], offset */
-  static const uint32_t std_pat = 0xfd000000;  /* str d, [xn|sp], offset */
-  static const uint32_t stld_pat = 0x3d800000; /* str q, [xn|sp], offset */
-  uint32_t n_xregs = 0, n_vregs = 0, sp_offset = 0, pat, offset_imm, scale, sp = 31;
+  static const uint32_t gen_ld_pat = 0xf9400000; /* ldr x, [xn|sp], offset */
+  static const uint32_t st_pat = 0xf9000000;     /* str x, [xn|sp], offset */
+  static const uint32_t sts_pat = 0xbd000000;    /* str s, [xn|sp], offset */
+  static const uint32_t std_pat = 0xfd000000;    /* str d, [xn|sp], offset */
+  static const uint32_t stld_pat = 0x3d800000;   /* str q, [xn|sp], offset */
+  MIR_type_t type;
+  uint32_t n_xregs = 0, n_vregs = 0, sp_offset = 0, blk_offset = 0, pat, offset_imm, scale;
+  uint32_t sp = 31, addr_reg, qwords;
  uint32_t *addr;
-  const uint32_t temp_reg = 8; /* x8 or v9 */
+  const uint32_t temp_reg = 10; /* x10 */
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  push_insns (ctx, prolog, sizeof (prolog));
+  VARR_CREATE (uint8_t, code, 128);
  mir_assert (sizeof (long double) == 16);
+  for (size_t i = 0; i < nargs; i++) { /* calculate offset for blk params */
+    type = arg_descs[i].type;
+    if ((MIR_T_I8 <= type && type <= MIR_T_U64) || type == MIR_T_P || MIR_all_blk_type_p (type)) {
+      if (MIR_blk_type_p (type) && (qwords = (arg_descs[i].size + 7) / 8) <= 2) {
+        if (n_xregs + qwords > 8) blk_offset += qwords * 8;
+        n_xregs += qwords;
+      } else {
+        if (n_xregs++ >= 8) blk_offset += 8;
+      }
+    } else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
+      if (n_vregs++ >= 8) blk_offset += type == MIR_T_LD ? 16 : 8;
+    } else {
+      MIR_get_error_func (ctx) (MIR_call_op_error, "wrong type of arg value");
+    }
+  }
+  blk_offset = (blk_offset + 15) / 16 * 16;
+  push_insns (code, prolog, sizeof (prolog));
+  n_xregs = n_vregs = 0;
  for (size_t i = 0; i < nargs; i++) { /* args */
-    scale = arg_types[i] == MIR_T_F ? 2 : arg_types[i] == MIR_T_LD ? 4 : 3;
+    type = arg_descs[i].type;
+    scale = type == MIR_T_F ? 2 : type == MIR_T_LD ? 4 : 3;
    offset_imm = (((i + nres) * sizeof (long double) << 10)) >> scale;
-    if ((MIR_T_I8 <= arg_types[i] && arg_types[i] <= MIR_T_U64) || arg_types[i] == MIR_T_P) {
-      if (n_xregs < 8) {
+    if (MIR_blk_type_p (type)) {
+      qwords = (arg_descs[i].size + 7) / 8;
+      if (qwords <= 2) {
+        addr_reg = 13;
+        pat = ld_pat | offset_imm | addr_reg;
+        push_insns (code, &pat, sizeof (pat));
+        if (n_xregs + qwords <= 8) {
+          for (int n = 0; n < qwords; n++) {
+            pat = gen_ld_pat | (((n * 8) >> scale) << 10) | (n_xregs + n) | (addr_reg << 5);
+            push_insns (code, &pat, sizeof (pat));
+          }
+        } else {
+          for (int n = 0; n < qwords; n++) {
+            pat = gen_ld_pat | (((n * 8) >> scale) << 10) | temp_reg | (addr_reg << 5);
+            push_insns (code, &pat, sizeof (pat));
+            pat = st_pat | ((sp_offset >> scale) << 10) | temp_reg | (sp << 5);
+            push_insns (code, &pat, sizeof (pat));
+            sp_offset += 8;
+          }
+        }
+        n_xregs += qwords;
+      } else {
+        addr_reg = n_xregs < 8 ? n_xregs : 13;
+        gen_blk_mov (code, blk_offset, (i + nres) * sizeof (long double), qwords, addr_reg);
+        blk_offset += qwords * 8;
+        if (n_xregs++ >= 8) {
+          pat = st_pat | ((sp_offset >> scale) << 10) | addr_reg | (sp << 5);
+          push_insns (code, &pat, sizeof (pat));
+          sp_offset += 8;
+        }
+      }
+    } else if ((MIR_T_I8 <= type && type <= MIR_T_U64) || type == MIR_T_P || type == MIR_T_RBLK) {
+      if (type == MIR_T_RBLK && i == 0) {
+        pat = ld_pat | offset_imm | 8; /* x8 - hidden result address */
+      } else if (n_xregs < 8) {
        pat = ld_pat | offset_imm | n_xregs++;
      } else {
        pat = ld_pat | offset_imm | temp_reg;
-        push_insns (ctx, &pat, sizeof (pat));
+        push_insns (code, &pat, sizeof (pat));
        pat = st_pat | ((sp_offset >> scale) << 10) | temp_reg | (sp << 5);
        sp_offset += 8;
      }
-      push_insns (ctx, &pat, sizeof (pat));
-    } else if (arg_types[i] == MIR_T_F || arg_types[i] == MIR_T_D || arg_types[i] == MIR_T_LD) {
-      pat = arg_types[i] == MIR_T_F ? lds_pat : arg_types[i] == MIR_T_D ? ldd_pat : ldld_pat;
+      push_insns (code, &pat, sizeof (pat));
+    } else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
+      pat = type == MIR_T_F ? lds_pat : type == MIR_T_D ? ldd_pat : ldld_pat;
      if (n_vregs < 8) {
        pat |= offset_imm | n_vregs++;
      } else {
-        if (arg_types[i] == MIR_T_LD) sp_offset = (sp_offset + 15) % 16;
+        if (type == MIR_T_LD) sp_offset = (sp_offset + 15) % 16;
        pat |= offset_imm | temp_reg;
-        push_insns (ctx, &pat, sizeof (pat));
-        pat = arg_types[i] == MIR_T_F ? sts_pat : arg_types[i] == MIR_T_D ? std_pat : stld_pat;
+        push_insns (code, &pat, sizeof (pat));
+        pat = type == MIR_T_F ? sts_pat : type == MIR_T_D ? std_pat : stld_pat;
        pat |= ((sp_offset >> scale) << 10) | temp_reg | (sp << 5);
-        sp_offset += arg_types[i] == MIR_T_LD ? 16 : 8;
+        sp_offset += type == MIR_T_LD ? 16 : 8;
      }
-      push_insns (ctx, &pat, sizeof (pat));
+      push_insns (code, &pat, sizeof (pat));
    } else {
-      (*error_func) (MIR_call_op_error, "wrong type of arg value");
+      MIR_get_error_func (ctx) (MIR_call_op_error, "wrong type of arg value");
    }
  }
  sp_offset = (sp_offset + 15) / 16 * 16;
+  blk_offset = (blk_offset + 15) / 16 * 16;
+  if (blk_offset != 0) sp_offset = blk_offset;
  mir_assert (sp_offset < (1 << 12));
-  ((uint32_t *) VARR_ADDR (uint8_t, machine_insns))[1] |= sp_offset << 10; /* sub sp,sp,<offset> */
-  push_insns (ctx, call_end, sizeof (call_end));
-  ((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-1]
-    |= sp_offset << 10;
+  ((uint32_t *) VARR_ADDR (uint8_t, code))[1] |= sp_offset << 10; /* sub sp,sp,<offset> */
+  push_insns (code, call_end, sizeof (call_end));
+  ((uint32_t *) (VARR_ADDR (uint8_t, code) + VARR_LENGTH (uint8_t, code)))[-1] |= sp_offset << 10;
  n_xregs = n_vregs = 0;
  for (size_t i = 0; i < nres; i++) { /* results */
    offset_imm = i * sizeof (long double) << 10;
-    offset_imm >>= res_types[i] == MIR_T_F ? 2 : res_types[i] == MIR_T_D ? 3 : 4;
    if (((MIR_T_I8 <= res_types[i] && res_types[i] <= MIR_T_U64) || res_types[i] == MIR_T_P)
        && n_xregs < 8) {
+      offset_imm >>= 3;
      pat = st_pat | offset_imm | n_xregs++ | (19 << 5);
-      push_insns (ctx, &pat, sizeof (pat));
+      push_insns (code, &pat, sizeof (pat));
    } else if ((res_types[i] == MIR_T_F || res_types[i] == MIR_T_D || res_types[i] == MIR_T_LD)
               && n_vregs < 8) {
+      offset_imm >>= res_types[i] == MIR_T_F ? 2 : res_types[i] == MIR_T_D ? 3 : 4;
      pat = res_types[i] == MIR_T_F ? sts_pat : res_types[i] == MIR_T_D ? std_pat : stld_pat;
      pat |= offset_imm | n_vregs++ | (19 << 5);
-      push_insns (ctx, &pat, sizeof (pat));
+      push_insns (code, &pat, sizeof (pat));
    } else {
-      (*error_func) (MIR_ret_error, "x86-64 can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "aarch64 can not handle this combination of return values");
    }
  }
-  push_insns (ctx, epilog, sizeof (epilog));
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  push_insns (code, epilog, sizeof (epilog));
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 /* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
                                             va_list va, MIR_val_t *results) */
 void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
-  static const uint32_t save_x19_pat = 0xf81f0ff3; /* str x19, [sp,-16]! */
+  static const uint32_t save_x19_pat = 0xf81f0ff3;   /* str x19, [sp,-16]! */
+  static const uint32_t set_gr_offs = 0x128007e9;    /* mov w9, #-64 # gr_offs */
+  static const uint32_t set_x8_gr_offs = 0x128008e9; /* mov w9, #-72 # gr_offs */
  static const uint32_t prepare_pat[] = {
    0xd10083ff, /* sub sp, sp, 32 # allocate va_list */
-    0x910003e8, /* mov x8, sp # va_list addr         */
-    0x128007e9, /* mov w9, #-64 # gr_offs */
-    0xb9001909, /* str w9,[x8, 24] # va_list.gr_offs */
+    0x910003ea, /* mov x10, sp # va_list addr         */
+    0xb9001949, /* str w9,[x10, 24] # va_list.gr_offs */
    0x12800fe9, /* mov w9, #-128 # vr_offs */
-    0xb9001d09, /* str w9,[x8, 28]  #va_list.vr_offs */
-    0x910383e9, /* add x9, sp, #224 # gr_top */
-    0xf9000509, /* str x9,[x8, 8] # va_list.gr_top */
+    0xb9001d49, /* str w9,[x10, 28]  #va_list.vr_offs */
+    0x9103c3e9, /* add x9, sp, #240 # gr_top */
+    0xf9000549, /* str x9,[x10, 8] # va_list.gr_top */
    0x91004129, /* add x9, x9, #16 # stack */
-    0xf9000109, /* str x9,[x8] # valist.stack */
+    0xf9000149, /* str x9,[x10] # valist.stack */
    0x910283e9, /* add x9, sp, #160 # vr_top*/
-    0xf9000909, /* str x9,[x8, 16] # va_list.vr_top */
-    0xaa0803e2, /* mov x2, x8 # va arg  */
+    0xf9000949, /* str x9,[x10, 16] # va_list.vr_top */
+    0xaa0a03e2, /* mov x2, x10 # va arg  */
    0xd2800009, /* mov x9, <(nres+1)*16> */
    0xcb2963ff, /* sub sp, sp, x9 */
    0x910043e3, /* add x3, sp, 16 # results arg */
@ -295,26 +420,33 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
  };
  static const uint32_t shim_end[] = {
    0xf94003fe, /* ldr x30, [sp] */
-    0xd2800009, /* mov x9, 224+(nres+1)*16 */
+    0xd2800009, /* mov x9, 240+(nres+1)*16 */
    0x8b2963ff, /* add sp, sp, x9 */
    0xf84107f3, /* ldr x19, sp, 16 */
    0xd65f03c0, /* ret x30 */
  };
  uint32_t pat, imm, n_xregs, n_vregs, offset, offset_imm;
-  uint32_t nres = func_item->u.func->nres;
-  MIR_type_t *results = func_item->u.func->res_types;
-
-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  push_insns (ctx, &save_x19_pat, sizeof (save_x19_pat));
-  push_insns (ctx, save_insns, sizeof (save_insns));
-  push_insns (ctx, prepare_pat, sizeof (prepare_pat));
+  MIR_func_t func = func_item->u.func;
+  uint32_t nres = func->nres;
+  int x8_res_p = func->nargs != 0 && VARR_GET (MIR_var_t, func->vars, 0).type == MIR_T_RBLK;
+  MIR_type_t *results = func->res_types;
+  VARR (uint8_t) * code;
+  void *res;
+
+  VARR_CREATE (uint8_t, code, 128);
+  push_insns (code, &save_x19_pat, sizeof (save_x19_pat));
+  push_insns (code, save_insns, sizeof (save_insns));
+  if (x8_res_p)
+    push_insns (code, &set_x8_gr_offs, sizeof (set_x8_gr_offs));
+  else
+    push_insns (code, &set_gr_offs, sizeof (set_gr_offs));
+  push_insns (code, prepare_pat, sizeof (prepare_pat));
  imm = (nres + 1) * 16;
  mir_assert (imm < (1 << 16));
-  ((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-5]
-    |= imm << 5;
-  gen_mov_addr (ctx, 0, ctx);       /* mov x0, ctx */
-  gen_mov_addr (ctx, 1, func_item); /* mov x1, func_item */
-  gen_call_addr (ctx, NULL, 9, handler);
+  ((uint32_t *) (VARR_ADDR (uint8_t, code) + VARR_LENGTH (uint8_t, code)))[-5] |= imm << 5;
+  gen_mov_addr (code, 0, ctx);       /* mov x0, ctx */
+  gen_mov_addr (code, 1, func_item); /* mov x1, func_item */
+  gen_call_addr (code, NULL, 9, handler);
  /* move results: */
  n_xregs = n_vregs = offset = 0;
  mir_assert (sizeof (long double) == 16);
@ -327,49 +459,56 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
      pat = ld_pat | n_xregs;
      n_xregs++;
    } else {
-      (*error_func) (MIR_ret_error, "aarch64 can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "aarch64 can not handle this combination of return values");
    }
    offset_imm = offset >> (results[i] == MIR_T_F ? 2 : results[i] == MIR_T_LD ? 4 : 3);
    mir_assert (offset_imm < (1 << 12));
    pat |= offset_imm << 10;
-    push_insns (ctx, &pat, sizeof (pat));
+    push_insns (code, &pat, sizeof (pat));
    offset += 16;
  }
-  push_insns (ctx, shim_end, sizeof (shim_end));
-  imm = 224 + (nres + 1) * 16;
+  push_insns (code, shim_end, sizeof (shim_end));
+  imm = 240 + (nres + 1) * 16;
  mir_assert (imm < (1 << 16));
-  ((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-4]
-    |= imm << 5;
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  ((uint32_t *) (VARR_ADDR (uint8_t, code) + VARR_LENGTH (uint8_t, code)))[-4] |= imm << 5;
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

-/* Save regs x0-x7, q0-q7; x9 = call hook_address (ctx, called_func); restore regs; br x9 */
+/* Save regs x8, x0-x7, q0-q7; x9 = call hook_address (ctx, called_func); restore regs; br x9 */
 void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
  static const uint32_t jmp_insn = 0xd61f0120;     /* br x9 */
  static const uint32_t move_insn = 0xaa0003e9;    /* mov x9, x0 */
  static const uint32_t save_fplr = 0xa9bf7bfd;    /* stp R29, R30, [SP, #-16]! */
  static const uint32_t restore_fplr = 0xa8c17bfd; /* ldp R29, R30, SP, #16 */
-  uint8_t *base_addr, *curr_addr, *code;
+  uint8_t *base_addr, *curr_addr, *res_code = NULL;
  size_t len = sizeof (save_insns) + sizeof (restore_insns); /* initial code length */
+  VARR (uint8_t) * code;

-  for (;;) {
+  mir_mutex_lock (&code_mutex);
+  VARR_CREATE (uint8_t, code, 128);
+  for (;;) { /* dealing with moving code to another page */
    curr_addr = base_addr = _MIR_get_new_code_addr (ctx, len);
-    if (curr_addr == NULL) return NULL;
-    VARR_TRUNC (uint8_t, machine_insns, 0);
-    push_insns (ctx, &save_fplr, sizeof (save_fplr));
+    if (curr_addr == NULL) break;
+    VARR_TRUNC (uint8_t, code, 0);
+    push_insns (code, &save_fplr, sizeof (save_fplr));
    curr_addr += 4;
-    push_insns (ctx, save_insns, sizeof (save_insns));
+    push_insns (code, save_insns, sizeof (save_insns));
    curr_addr += sizeof (save_insns);
-    curr_addr += gen_mov_addr (ctx, 0, ctx);          /*mov x0,ctx  	   */
-    curr_addr += gen_mov_addr (ctx, 1, called_func);  /*mov x1,called_func */
-    gen_call_addr (ctx, curr_addr, 10, hook_address); /*call <hook_address>, use x10 as temp   */
-    push_insns (ctx, &move_insn, sizeof (move_insn));
-    push_insns (ctx, restore_insns, sizeof (restore_insns));
-    push_insns (ctx, &restore_fplr, sizeof (restore_fplr));
-    push_insns (ctx, &jmp_insn, sizeof (jmp_insn));
-    len = VARR_LENGTH (uint8_t, machine_insns);
-    code = _MIR_publish_code_by_addr (ctx, base_addr, VARR_ADDR (uint8_t, machine_insns), len);
-    if (code != NULL) return code;
+    curr_addr += gen_mov_addr (code, 0, ctx);          /*mov x0,ctx  	   */
+    curr_addr += gen_mov_addr (code, 1, called_func);  /*mov x1,called_func */
+    gen_call_addr (code, curr_addr, 10, hook_address); /*call <hook_address>, use x10 as temp   */
+    push_insns (code, &move_insn, sizeof (move_insn));
+    push_insns (code, restore_insns, sizeof (restore_insns));
+    push_insns (code, &restore_fplr, sizeof (restore_fplr));
+    push_insns (code, &jmp_insn, sizeof (jmp_insn));
+    len = VARR_LENGTH (uint8_t, code);
+    res_code = _MIR_publish_code_by_addr (ctx, base_addr, VARR_ADDR (uint8_t, code), len);
+    if (res_code != NULL) break;
  }
+  VARR_DESTROY (uint8_t, code);
+  mir_mutex_unlock (&code_mutex);
+  return res_code;
 }
--- a/mir/mir-bitmap.h
+++ b/mir/mir-bitmap.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #ifndef MIR_BITMAP_H
--- a/mir/mir-dlist.h
+++ b/mir/mir-dlist.h
@ -1,5 +1,5 @@
 /* This file is part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 /* Typed doubly linked lists.  */
--- a/mir/mir-gen-aarch64.c
+++ b/mir/mir-gen-aarch64.c
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 static void fancy_abort (int code) {
@ -56,10 +56,10 @@ static inline int target_fixed_hard_reg_p (MIR_reg_t hard_reg) {
          || hard_reg == TEMP_LDOUBLE_HARD_REG2);
 }

-static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
+static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg, MIR_type_t type) {
  assert (hard_reg <= MAX_HARD_REG);
-  return !((hard_reg >= R19_HARD_REG && hard_reg <= R28_HARD_REG)
-           || (hard_reg >= V8_HARD_REG && hard_reg <= V15_HARD_REG));
+  if (hard_reg <= SP_HARD_REG) return !(hard_reg >= R19_HARD_REG && hard_reg <= R28_HARD_REG);
+  return type == MIR_T_LD || !(hard_reg >= V8_HARD_REG && hard_reg <= V15_HARD_REG);
 }

 /* Stack layout (sp refers to the last reserved stack slot address)
@ -84,6 +84,9 @@ static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
   | old FP        |  frame pointer for previous func stack frame; new FP refers for here
   |               |  it has lowest address as 12-bit offsets are only positive
   |---------------|
+   |  small aggr   |
+   |  save area    |  optional
+   |---------------|
   | alloca areas  |  optional
   |---------------|
   | slots for     |  dynamically allocated/deallocated by caller
@ -128,7 +131,7 @@ static MIR_reg_t get_arg_reg (MIR_type_t arg_type, size_t *int_arg_num, size_t *
    }
    (*fp_arg_num)++;
    *mov_code = arg_type == MIR_T_F ? MIR_FMOV : arg_type == MIR_T_D ? MIR_DMOV : MIR_LDMOV;
-  } else {
+  } else { /* including BLK, RBLK: */
    switch (*int_arg_num) {
    case 0:
    case 1:
@ -146,12 +149,87 @@ static MIR_reg_t get_arg_reg (MIR_type_t arg_type, size_t *int_arg_num, size_t *
  return arg_reg;
 }

+static void mir_blk_mov (uint64_t *to, uint64_t *from, uint64_t nwords) {
+  for (; nwords > 0; nwords--) *to++ = *from++;
+}
+
+static void gen_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, MIR_insn_code_t code, MIR_op_t dst_op,
+                     MIR_op_t src_op) {
+  gen_add_insn_before (gen_ctx, anchor, MIR_new_insn (gen_ctx->ctx, code, dst_op, src_op));
+}
+
+static const char *BLK_MOV = "mir.blk_mov";
+static const char *BLK_MOV_P = "mir.blk_mov.p";
+
+static void gen_blk_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, size_t to_disp,
+                         MIR_reg_t to_base_hard_reg, size_t from_disp, MIR_reg_t from_base_reg,
+                         size_t qwords, int save_regs) {
+  MIR_context_t ctx = gen_ctx->ctx;
+  MIR_func_t func = curr_func_item->u.func;
+  MIR_item_t proto_item, func_import_item;
+  MIR_insn_t new_insn;
+  MIR_op_t ops[5], freg_op, treg_op, treg_op2, treg_op3;
+
+  treg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  if (qwords <= 16) {
+    for (; qwords > 0; qwords--, to_disp += 8, from_disp += 8) {
+      gen_mov (gen_ctx, anchor, MIR_MOV, treg_op,
+               MIR_new_mem_op (ctx, MIR_T_I64, from_disp, from_base_reg, 0, 1));
+      gen_mov (gen_ctx, anchor, MIR_MOV,
+               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, to_disp, to_base_hard_reg,
+                                         MIR_NON_HARD_REG, 1),
+               treg_op);
+    }
+    return;
+  }
+  treg_op2 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  treg_op3 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  /* Save arg regs: */
+  if (save_regs > 0)
+    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op, _MIR_new_hard_reg_op (ctx, R0_HARD_REG));
+  if (save_regs > 1)
+    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op2, _MIR_new_hard_reg_op (ctx, R1_HARD_REG));
+  if (save_regs > 2)
+    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op3, _MIR_new_hard_reg_op (ctx, R2_HARD_REG));
+  /* call blk move: */
+  proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, BLK_MOV_P, 0, NULL, 3, MIR_T_I64,
+                                   "to", MIR_T_I64, "from", MIR_T_I64, "nwords");
+  func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, BLK_MOV, mir_blk_mov);
+  freg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  new_insn = MIR_new_insn (ctx, MIR_MOV, freg_op, MIR_new_ref_op (ctx, func_import_item));
+  gen_add_insn_before (gen_ctx, anchor, new_insn);
+  gen_add_insn_before (gen_ctx, anchor,
+                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_hard_reg_op (ctx, R0_HARD_REG),
+                                     _MIR_new_hard_reg_op (ctx, to_base_hard_reg),
+                                     MIR_new_int_op (ctx, to_disp)));
+  gen_add_insn_before (gen_ctx, anchor,
+                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_hard_reg_op (ctx, R1_HARD_REG),
+                                     MIR_new_reg_op (ctx, from_base_reg),
+                                     MIR_new_int_op (ctx, from_disp)));
+  gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R2_HARD_REG),
+           MIR_new_int_op (ctx, qwords));
+  ops[0] = MIR_new_ref_op (ctx, proto_item);
+  ops[1] = freg_op;
+  ops[2] = _MIR_new_hard_reg_op (ctx, R0_HARD_REG);
+  ops[3] = _MIR_new_hard_reg_op (ctx, R1_HARD_REG);
+  ops[4] = _MIR_new_hard_reg_op (ctx, R2_HARD_REG);
+  new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
+  gen_add_insn_before (gen_ctx, anchor, new_insn);
+  /* Restore arg regs: */
+  if (save_regs > 0)
+    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R0_HARD_REG), treg_op);
+  if (save_regs > 1)
+    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R1_HARD_REG), treg_op2);
+  if (save_regs > 2)
+    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R2_HARD_REG), treg_op3);
+}
+
 static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_func_t func = curr_func_item->u.func;
  MIR_proto_t proto = call_insn->ops[0].u.ref->u.proto;
  size_t nargs, nops = MIR_insn_nops (ctx, call_insn), start = proto->nres + 2;
-  size_t int_arg_num = 0, fp_arg_num = 0, mem_size = 0;
+  size_t int_arg_num = 0, fp_arg_num = 0, mem_size = 0, blk_offset = 0, qwords;
  MIR_type_t type, mem_type;
  MIR_op_mode_t mode;
  MIR_var_t *arg_vars = NULL;
@ -178,11 +256,12 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
    call_insn->ops[1] = temp_op;
    gen_add_insn_before (gen_ctx, call_insn, new_insn);
  }
-  for (size_t i = start; i < nops; i++) {
-    arg_op = call_insn->ops[i];
-    gen_assert (arg_op.mode == MIR_OP_REG || arg_op.mode == MIR_OP_HARD_REG);
+  for (size_t i = start; i < nops; i++) { /* calculate offset for blk params */
    if (i - start < nargs) {
      type = arg_vars[i - start].type;
+    } else if (call_insn->ops[i].mode == MIR_OP_MEM) {
+      type = call_insn->ops[i].u.mem.type;
+      gen_assert (MIR_all_blk_type_p (type));
    } else {
      mode = call_insn->ops[i].value_mode;  // ??? smaller ints
      gen_assert (mode == MIR_OP_INT || mode == MIR_OP_UINT || mode == MIR_OP_FLOAT
@ -192,18 +271,98 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
                                     "passing float variadic arg (should be passed as double)");
      type = mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64;
    }
+    gen_assert (!MIR_all_blk_type_p (type) || call_insn->ops[i].mode == MIR_OP_MEM);
+    if (type == MIR_T_RBLK && i == start) continue; /* hidden arg */
+    if (MIR_blk_type_p (type) && (qwords = (call_insn->ops[i].u.mem.disp + 7) / 8) <= 2) {
+      if (int_arg_num + qwords > 8) blk_offset += qwords * 8;
+      int_arg_num += qwords;
+    } else if (get_arg_reg (type, &int_arg_num, &fp_arg_num, &new_insn_code) == MIR_NON_HARD_REG) {
+      if (type == MIR_T_LD && blk_offset % 16 != 0) blk_offset = (blk_offset + 15) / 16 * 16;
+      blk_offset += type == MIR_T_LD ? 16 : 8;
+    }
+  }
+  blk_offset = (blk_offset + 15) / 16 * 16;
+  int_arg_num = fp_arg_num = 0;
+  for (size_t i = start; i < nops; i++) {
+    arg_op = call_insn->ops[i];
+    gen_assert (arg_op.mode == MIR_OP_REG || arg_op.mode == MIR_OP_HARD_REG
+                || (arg_op.mode == MIR_OP_MEM && MIR_all_blk_type_p (arg_op.u.mem.type)));
+    if (i - start < nargs) {
+      type = arg_vars[i - start].type;
+    } else if (call_insn->ops[i].mode == MIR_OP_MEM) {
+      type = call_insn->ops[i].u.mem.type;
+      gen_assert (MIR_all_blk_type_p (type));
+    } else {
+      mode = call_insn->ops[i].value_mode;  // ??? smaller ints
+      type = mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64;
+    }
    ext_insn = NULL;
    if ((ext_code = get_ext_code (type)) != MIR_INVALID_INSN) { /* extend arg if necessary */
      temp_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      ext_insn = MIR_new_insn (ctx, ext_code, temp_op, arg_op);
      call_insn->ops[i] = arg_op = temp_op;
    }
+    gen_assert (
+      !MIR_all_blk_type_p (type)
+      || (arg_op.mode == MIR_OP_MEM && arg_op.u.mem.disp >= 0 && arg_op.u.mem.index == 0));
+    if (type == MIR_T_RBLK && i == start) { /* hidden arg */
+      arg_reg_op = _MIR_new_hard_reg_op (ctx, R8_HARD_REG);
+      gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op, MIR_new_reg_op (ctx, arg_op.u.mem.base));
+      call_insn->ops[i] = arg_reg_op;
+      continue;
+    } else if (MIR_blk_type_p (type)) {
+      qwords = (arg_op.u.mem.disp + 7) / 8;
+      if (qwords <= 2) {
+        arg_reg = R0_HARD_REG + int_arg_num;
+        if (int_arg_num + qwords <= 8) {
+          /* A trick to keep arg regs live: */
+          call_insn->ops[i]
+            = _MIR_new_hard_reg_mem_op (ctx, MIR_T_UNDEF, 0, int_arg_num,
+                                        qwords < 2 ? MIR_NON_HARD_REG : int_arg_num + 1, 1);
+          if (qwords == 0) continue;
+          new_insn
+            = MIR_new_insn (ctx, MIR_MOV, _MIR_new_hard_reg_op (ctx, R0_HARD_REG + int_arg_num++),
+                            MIR_new_mem_op (ctx, MIR_T_I64, 0, arg_op.u.mem.base, 0, 1));
+          gen_add_insn_before (gen_ctx, call_insn, new_insn);
+          if (qwords == 2) {
+            new_insn
+              = MIR_new_insn (ctx, MIR_MOV, _MIR_new_hard_reg_op (ctx, R0_HARD_REG + int_arg_num++),
+                              MIR_new_mem_op (ctx, MIR_T_I64, 8, arg_op.u.mem.base, 0, 1));
+            gen_add_insn_before (gen_ctx, call_insn, new_insn);
+          }
+        } else { /* pass on stack w/o address: */
+          gen_blk_mov (gen_ctx, call_insn, mem_size, SP_HARD_REG, 0, arg_op.u.mem.base, qwords,
+                       int_arg_num);
+          call_insn->ops[i] = _MIR_new_hard_reg_mem_op (ctx, MIR_T_UNDEF, mem_size, SP_HARD_REG,
+                                                        MIR_NON_HARD_REG, 1);
+          mem_size += qwords * 8;
+          blk_offset += qwords * 8;
+          int_arg_num += qwords;
+        }
+        continue;
+      }
+      gen_blk_mov (gen_ctx, call_insn, blk_offset, SP_HARD_REG, 0, arg_op.u.mem.base, qwords,
+                   int_arg_num);
+      arg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+      gen_add_insn_before (gen_ctx, call_insn,
+                           MIR_new_insn (gen_ctx->ctx, MIR_ADD, arg_op,
+                                         _MIR_new_hard_reg_op (ctx, SP_HARD_REG),
+                                         MIR_new_int_op (ctx, blk_offset)));
+      blk_offset += qwords * 8;
+    }
    if ((arg_reg = get_arg_reg (type, &int_arg_num, &fp_arg_num, &new_insn_code))
        != MIR_NON_HARD_REG) {
      /* put arguments to argument hard regs */
      if (ext_insn != NULL) gen_add_insn_before (gen_ctx, call_insn, ext_insn);
      arg_reg_op = _MIR_new_hard_reg_op (ctx, arg_reg);
-      new_insn = MIR_new_insn (ctx, new_insn_code, arg_reg_op, arg_op);
+      if (type != MIR_T_RBLK) {
+        new_insn = MIR_new_insn (ctx, new_insn_code, arg_reg_op, arg_op);
+      } else {
+        assert (arg_op.mode == MIR_OP_MEM);
+        new_insn = MIR_new_insn (ctx, MIR_MOV, arg_reg_op, MIR_new_reg_op (ctx, arg_op.u.mem.base));
+        arg_reg_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_RBLK, arg_op.u.mem.disp, arg_reg,
+                                               MIR_NON_HARD_REG, 1);
+      }
      gen_add_insn_before (gen_ctx, call_insn, new_insn);
      call_insn->ops[i] = arg_reg_op;
    } else { /* put arguments on the stack */
@ -213,7 +372,13 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
        = (type == MIR_T_F ? MIR_FMOV
                           : type == MIR_T_D ? MIR_DMOV : type == MIR_T_LD ? MIR_LDMOV : MIR_MOV);
      mem_op = _MIR_new_hard_reg_mem_op (ctx, mem_type, mem_size, SP_HARD_REG, MIR_NON_HARD_REG, 1);
-      new_insn = MIR_new_insn (ctx, new_insn_code, mem_op, arg_op);
+      if (type != MIR_T_RBLK) {
+        new_insn = MIR_new_insn (ctx, new_insn_code, mem_op, arg_op);
+      } else {
+        assert (arg_op.mode == MIR_OP_MEM);
+        new_insn
+          = MIR_new_insn (ctx, new_insn_code, mem_op, MIR_new_reg_op (ctx, arg_op.u.mem.base));
+      }
      gen_assert (prev_call_insn != NULL); /* call_insn should not be 1st after simplification */
      MIR_insert_insn_after (ctx, curr_func_item, prev_call_insn, new_insn);
      prev_insn = DLIST_PREV (MIR_insn_t, new_insn);
@ -224,6 +389,8 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
      if (ext_insn != NULL) gen_add_insn_after (gen_ctx, prev_call_insn, ext_insn);
    }
  }
+  blk_offset = (blk_offset + 15) / 16 * 16;
+  if (blk_offset != 0) mem_size = blk_offset;
  n_iregs = n_vregs = 0;
  for (size_t i = 0; i < proto->nres; i++) {
    int float_p;
@ -321,6 +488,8 @@ static const char *LDNEG_P = "mir.ldneg.p";

 static const char *VA_ARG_P = "mir.va_arg.p";
 static const char *VA_ARG = "mir.va_arg";
+static const char *VA_BLOCK_ARG_P = "mir.va_block_arg.p";
+static const char *VA_BLOCK_ARG = "mir.va_block_arg";

 static int64_t mir_ldeq (long double d1, long double d2) { return d1 == d2; }
 static const char *LDEQ = "mir.ldeq";
@ -467,15 +636,17 @@ static int get_builtin (gen_ctx_t gen_ctx, MIR_insn_code_t code, MIR_item_t *pro
                                      MIR_T_I64, "va", MIR_T_I64, "type");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, VA_ARG, va_arg_builtin);
    return 2;
+  case MIR_VA_BLOCK_ARG:
+    *proto_item
+      = _MIR_builtin_proto (ctx, curr_func_item->module, VA_BLOCK_ARG_P, 0, NULL, 4, MIR_T_I64,
+                            "res", MIR_T_I64, "va", MIR_T_I64, "size", MIR_T_I64, "ncase");
+    *func_import_item
+      = _MIR_builtin_func (ctx, curr_func_item->module, VA_BLOCK_ARG, va_block_arg_builtin);
+    return 4;
  default: return 0;
  }
 }

-static void gen_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, MIR_insn_code_t code, MIR_op_t dst_op,
-                     MIR_op_t src_op) {
-  gen_add_insn_before (gen_ctx, anchor, MIR_new_insn (gen_ctx->ctx, code, dst_op, src_op));
-}
-
 DEF_VARR (int);
 DEF_VARR (uint8_t);
 DEF_VARR (uint64_t);
@ -499,7 +670,8 @@ DEF_VARR (label_ref_t);
 DEF_VARR (MIR_code_reloc_t);

 struct target_ctx {
-  unsigned char alloca_p, stack_arg_func_p, leaf_p;
+  unsigned char alloca_p, block_arg_func_p, leaf_p;
+  size_t small_aggregate_save_area;
  VARR (int) * pattern_indexes;
  VARR (insn_pattern_info_t) * insn_pattern_info;
  VARR (uint8_t) * result_code;
@ -509,8 +681,9 @@ struct target_ctx {
 };

 #define alloca_p gen_ctx->target_ctx->alloca_p
-#define stack_arg_func_p gen_ctx->target_ctx->stack_arg_func_p
+#define block_arg_func_p gen_ctx->target_ctx->block_arg_func_p
 #define leaf_p gen_ctx->target_ctx->leaf_p
+#define small_aggregate_save_area gen_ctx->target_ctx->small_aggregate_save_area
 #define pattern_indexes gen_ctx->target_ctx->pattern_indexes
 #define insn_pattern_info gen_ctx->target_ctx->insn_pattern_info
 #define result_code gen_ctx->target_ctx->result_code
@ -521,7 +694,7 @@ struct target_ctx {
 static MIR_disp_t target_get_stack_slot_offset (gen_ctx_t gen_ctx, MIR_type_t type,
                                                MIR_reg_t slot) {
  /* slot is 0, 1, ... */
-  size_t offset = curr_func_item->u.func->vararg_p || stack_arg_func_p ? 32 : 16;
+  size_t offset = curr_func_item->u.func->vararg_p || block_arg_func_p ? 32 : 16;

  return ((MIR_disp_t) slot * 8 + offset);
 }
@ -532,28 +705,62 @@ static void target_machinize (gen_ctx_t gen_ctx) {
  MIR_type_t type, mem_type, res_type;
  MIR_insn_code_t code, new_insn_code;
  MIR_insn_t insn, next_insn, new_insn, anchor;
+  MIR_var_t var;
  MIR_reg_t ret_reg, arg_reg;
-  MIR_op_t ret_reg_op, arg_reg_op, mem_op, prev_sp_op, temp_op;
-  size_t i, int_arg_num, fp_arg_num, mem_size;
+  MIR_op_t ret_reg_op, arg_reg_op, mem_op, temp_op;
+  size_t i, int_arg_num, fp_arg_num, mem_size, qwords;

  assert (curr_func_item->item_type == MIR_func_item);
  func = curr_func_item->u.func;
-  stack_arg_func_p = FALSE;
+  block_arg_func_p = FALSE;
  anchor = DLIST_HEAD (MIR_insn_t, func->insns);
+  small_aggregate_save_area = 0;
  for (i = int_arg_num = fp_arg_num = mem_size = 0; i < func->nargs; i++) {
    /* Argument extensions is already done in simplify */
-    /* Prologue: generate arg_var = hard_reg|stack mem ... */
-    type = VARR_GET (MIR_var_t, func->vars, i).type;
+    /* Prologue: generate arg_var = hard_reg|stack mem|stack addr ... */
+    var = VARR_GET (MIR_var_t, func->vars, i);
+    type = var.type;
+    if (type == MIR_T_RBLK && i == 0) { /* hidden arg */
+      arg_reg_op = _MIR_new_hard_reg_op (ctx, R8_HARD_REG);
+      gen_mov (gen_ctx, anchor, MIR_MOV, MIR_new_reg_op (ctx, i + 1), arg_reg_op);
+      continue;
+    } else if (MIR_blk_type_p (type) && (qwords = (var.size + 7) / 8) <= 2) {
+      if (int_arg_num + qwords <= 8) {
+        small_aggregate_save_area += qwords * 8;
+        new_insn = MIR_new_insn (ctx, MIR_SUB, MIR_new_reg_op (ctx, i + 1),
+                                 _MIR_new_hard_reg_op (ctx, FP_HARD_REG),
+                                 MIR_new_int_op (ctx, small_aggregate_save_area));
+        gen_add_insn_before (gen_ctx, anchor, new_insn);
+        if (qwords == 0) continue;
+        gen_mov (gen_ctx, anchor, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 0, i + 1, 0, 1),
+                 _MIR_new_hard_reg_op (ctx, int_arg_num));
+        if (qwords == 2)
+          gen_mov (gen_ctx, anchor, MIR_MOV, MIR_new_mem_op (ctx, MIR_T_I64, 8, i + 1, 0, 1),
+                   _MIR_new_hard_reg_op (ctx, int_arg_num + 1));
+      } else { /* pass on stack w/o address: */
+        if (!block_arg_func_p) {
+          block_arg_func_p = TRUE;
+          gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R8_HARD_REG),
+                   _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, 16, FP_HARD_REG, MIR_NON_HARD_REG, 1));
+        }
+        gen_add_insn_before (gen_ctx, anchor,
+                             MIR_new_insn (ctx, MIR_ADD, MIR_new_reg_op (ctx, i + 1),
+                                           _MIR_new_hard_reg_op (ctx, R8_HARD_REG),
+                                           MIR_new_int_op (ctx, mem_size)));
+        mem_size += qwords * 8;
+      }
+      int_arg_num += qwords;
+      continue;
+    }
    arg_reg = get_arg_reg (type, &int_arg_num, &fp_arg_num, &new_insn_code);
    if (arg_reg != MIR_NON_HARD_REG) {
      arg_reg_op = _MIR_new_hard_reg_op (ctx, arg_reg);
      gen_mov (gen_ctx, anchor, new_insn_code, MIR_new_reg_op (ctx, i + 1), arg_reg_op);
    } else {
      /* arg is on the stack */
-      if (!stack_arg_func_p) {
-        stack_arg_func_p = TRUE;
-        prev_sp_op = _MIR_new_hard_reg_op (ctx, R8_HARD_REG);
-        gen_mov (gen_ctx, anchor, MIR_MOV, prev_sp_op,
+      if (!block_arg_func_p) {
+        block_arg_func_p = TRUE;
+        gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R8_HARD_REG),
                 _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, 16, FP_HARD_REG, MIR_NON_HARD_REG, 1));
      }
      mem_type = type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD ? type : MIR_T_I64;
@ -592,29 +799,33 @@ static void target_machinize (gen_ctx_t gen_ctx) {
      insn = new_insn;
    }
    if ((nargs = get_builtin (gen_ctx, code, &proto_item, &func_import_item)) > 0) {
-      if (code == MIR_VA_ARG) { /* do nothing */
+      if (code == MIR_VA_ARG || code == MIR_VA_BLOCK_ARG) {
        /* Use a builtin func call:
-           mov func_reg, func ref; mov flag_reg, <type>; call proto, func_reg, res_reg, va_reg,
-           flag_reg */
-        MIR_op_t ops[5], func_reg_op, flag_reg_op;
-        MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], mem_op = insn->ops[2];
+           mov func_reg, func ref; [mov reg3, type;] call proto, func_reg, res_reg, va_reg,
+           reg3 */
+        MIR_op_t ops[6], func_reg_op, reg_op3;
+        MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], op3 = insn->ops[2];

        assert (res_reg_op.mode == MIR_OP_REG && va_reg_op.mode == MIR_OP_REG
-                && mem_op.mode == MIR_OP_MEM);
+                && op3.mode == (code == MIR_VA_ARG ? MIR_OP_MEM : MIR_OP_REG));
        func_reg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
-        flag_reg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+        reg_op3 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
        next_insn = new_insn
          = MIR_new_insn (ctx, MIR_MOV, func_reg_op, MIR_new_ref_op (ctx, func_import_item));
        gen_add_insn_before (gen_ctx, insn, new_insn);
-        new_insn = MIR_new_insn (ctx, MIR_MOV, flag_reg_op,
-                                 MIR_new_int_op (ctx, (int64_t) mem_op.u.mem.type));
-        gen_add_insn_before (gen_ctx, insn, new_insn);
+        if (code == MIR_VA_ARG) {
+          new_insn
+            = MIR_new_insn (ctx, MIR_MOV, reg_op3, MIR_new_int_op (ctx, (int64_t) op3.u.mem.type));
+          op3 = reg_op3;
+          gen_add_insn_before (gen_ctx, insn, new_insn);
+        }
        ops[0] = MIR_new_ref_op (ctx, proto_item);
        ops[1] = func_reg_op;
        ops[2] = res_reg_op;
        ops[3] = va_reg_op;
-        ops[4] = flag_reg_op;
-        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
+        ops[4] = op3;
+        if (code == MIR_VA_BLOCK_ARG) ops[5] = insn->ops[3];
+        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, code == MIR_VA_ARG ? 5 : 6, ops);
        gen_add_insn_before (gen_ctx, insn, new_insn);
        gen_delete_insn (gen_ctx, insn);
      } else { /* Use builtin: mov freg, func ref; call proto, freg, res_reg, op_reg[, op_reg2] */
@ -728,14 +939,14 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  assert (curr_func_item->item_type == MIR_func_item);
  func = curr_func_item->u.func;
  for (i = saved_iregs_num = saved_fregs_num = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      if (i < V0_HARD_REG)
        saved_iregs_num++;
      else
        saved_fregs_num++;
    }
  if (leaf_p && !alloca_p && saved_iregs_num == 0 && saved_fregs_num == 0 && !func->vararg_p
-      && stack_slots_num == 0)
+      && stack_slots_num == 0 && !block_arg_func_p && small_aggregate_save_area == 0)
    return;
  sp_reg_op = _MIR_new_hard_reg_op (ctx, SP_HARD_REG);
  fp_reg_op = _MIR_new_hard_reg_op (ctx, FP_HARD_REG);
@ -743,7 +954,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  anchor = DLIST_HEAD (MIR_insn_t, func->insns);
  frame_size = func->vararg_p ? reg_save_area_size : 0;
  for (i = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      if (i < V0_HARD_REG) {
        frame_size += 8;
      } else {
@ -755,7 +966,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  frame_size_after_saved_regs = frame_size;
  frame_size += stack_slots_num * 8;
  if (frame_size % 16 != 0) frame_size = (frame_size + 15) / 16 * 16;
-  save_prev_stack_p = func->vararg_p || stack_arg_func_p;
+  save_prev_stack_p = func->vararg_p || block_arg_func_p;
  treg_op = _MIR_new_hard_reg_op (ctx, R9_HARD_REG);
  if (save_prev_stack_p) { /* prev stack pointer */
    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op, sp_reg_op);
@ -782,7 +993,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
           _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, 0, SP_HARD_REG, MIR_NON_HARD_REG, 1),
           _MIR_new_hard_reg_op (ctx, FP_HARD_REG));        /* mem[sp] = fp */
  gen_mov (gen_ctx, anchor, MIR_MOV, fp_reg_op, sp_reg_op); /* fp = sp */
-  if (func->vararg_p) {
+  if (func->vararg_p) {  // ??? saving only regs corresponding to ...
    MIR_reg_t base = SP_HARD_REG;

    start = (int64_t) frame_size - reg_save_area_size;
@ -812,7 +1023,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  /* Saving callee saved hard registers: */
  offset = frame_size - frame_size_after_saved_regs;
  for (i = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      if (i < V0_HARD_REG) {
        gen_mov (gen_ctx, anchor, MIR_MOV,
                 _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, offset, FP_HARD_REG, MIR_NON_HARD_REG,
@ -827,13 +1038,20 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
        offset += 16;
      }
    }
+  if (small_aggregate_save_area != 0) {  // ??? duplication with vararg saved regs
+    if (small_aggregate_save_area % 16 != 0)
+      small_aggregate_save_area = (small_aggregate_save_area + 15) / 16 * 16;
+    new_insn = MIR_new_insn (ctx, MIR_SUB, sp_reg_op, sp_reg_op,
+                             MIR_new_int_op (ctx, small_aggregate_save_area));
+    gen_add_insn_before (gen_ctx, anchor, new_insn); /* sp -= <small aggr save area size> */
+  }
  /* Epilogue: */
  anchor = DLIST_TAIL (MIR_insn_t, func->insns);
  assert (anchor->code == MIR_RET);
  /* Restoring hard registers: */
  offset = frame_size - frame_size_after_saved_regs;
  for (i = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      if (i < V0_HARD_REG) {
        gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, i),
                 _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, offset, FP_HARD_REG, MIR_NON_HARD_REG,
@ -1435,9 +1653,10 @@ static const struct pattern patterns[] = {
  {MIR_BSTART, "r", "91000000:fffffc00 rd0 hn1f"}, /* Rd = sp */
  {MIR_BEND, "r", "91000000:fffffc00 hd1f rn0"},   /* sp = Rn */

-  /* adr r9,PC-relative TableAddress; ldr r9,(r9,r,8);br r9; TableContent */
+  /* adr r10,PC-relative TableAddress; ldr r10,(r10,r,8);br r10; TableContent
+     We use r10 as r9 can be used if switch operand is memory. */
  {MIR_SWITCH, "r $",
-   "10000000:ff000000 hd9 T; f8607800:ffe0fc00 hd9 hn9 rm0; d61f0000:fffffc00 hn9;"},
+   "10000000:ff000000 hda T; f8607800:ffe0fc00 hda hna rm0; d61f0000:fffffc00 hna;"},

 };

--- a/mir/mir-gen-ppc64.c
+++ b/mir/mir-gen-ppc64.c
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 /* We don't use TOC.  So r2 is not necessary for the generated code.  */
@ -63,7 +63,7 @@ static inline int target_fixed_hard_reg_p (MIR_reg_t hard_reg) {
          || hard_reg == TEMP_LDOUBLE_HARD_REG1 || hard_reg == TEMP_LDOUBLE_HARD_REG2);
 }

-static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
+static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg, MIR_type_t type) {
  assert (hard_reg <= MAX_HARD_REG);
  return ((R0_HARD_REG <= hard_reg && hard_reg <= R13_HARD_REG)
          || (F0_HARD_REG <= hard_reg && hard_reg <= F13_HARD_REG));
@ -138,7 +138,7 @@ DEF_VARR (label_ref_t);
 DEF_VARR (MIR_code_reloc_t);

 struct target_ctx {
-  unsigned char alloca_p, stack_arg_func_p, leaf_p, switch_p;
+  unsigned char alloca_p, block_arg_func_p, leaf_p, switch_p;
  size_t param_save_area_size;
  VARR (int) * pattern_indexes;
  VARR (insn_pattern_info_t) * insn_pattern_info;
@ -149,7 +149,7 @@ struct target_ctx {
 };

 #define alloca_p gen_ctx->target_ctx->alloca_p
-#define stack_arg_func_p gen_ctx->target_ctx->stack_arg_func_p
+#define block_arg_func_p gen_ctx->target_ctx->block_arg_func_p
 #define leaf_p gen_ctx->target_ctx->leaf_p
 #define switch_p gen_ctx->target_ctx->switch_p
 #define param_save_area_size gen_ctx->target_ctx->param_save_area_size
@ -165,12 +165,75 @@ static void gen_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, MIR_insn_code_t code,
  gen_add_insn_before (gen_ctx, anchor, MIR_new_insn (gen_ctx->ctx, code, dst_op, src_op));
 }

+static void mir_blk_mov (uint64_t *to, uint64_t *from, uint64_t nwords) {
+  for (; nwords > 0; nwords--) *to++ = *from++;
+}
+
+static const char *BLK_MOV = "mir.blk_mov";
+static const char *BLK_MOV_P = "mir.blk_mov.p";
+
+static void gen_blk_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, size_t to_disp,
+                         MIR_reg_t to_base_hard_reg, size_t from_disp, MIR_reg_t from_base_reg,
+                         size_t qwords, int save_regs) {
+  MIR_context_t ctx = gen_ctx->ctx;
+  MIR_func_t func = curr_func_item->u.func;
+  MIR_item_t proto_item, func_import_item;
+  MIR_insn_t new_insn;
+  MIR_op_t ops[5], freg_op, treg_op, treg_op2, treg_op3;
+
+  treg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  if (qwords <= 16) {
+    for (; qwords > 0; qwords--, to_disp += 8, from_disp += 8) {
+      gen_mov (gen_ctx, anchor, MIR_MOV, treg_op,
+               MIR_new_mem_op (ctx, MIR_T_I64, from_disp, from_base_reg, 0, 1));
+      gen_mov (gen_ctx, anchor, MIR_MOV,
+               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, to_disp, to_base_hard_reg,
+                                         MIR_NON_HARD_REG, 1),
+               treg_op);
+    }
+    return;
+  }
+  treg_op2 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  treg_op3 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  /* Save arg regs: */
+  if (save_regs > 0) gen_mov (gen_ctx, anchor, MIR_MOV, treg_op, _MIR_new_hard_reg_op (ctx, 3));
+  if (save_regs > 1) gen_mov (gen_ctx, anchor, MIR_MOV, treg_op2, _MIR_new_hard_reg_op (ctx, 4));
+  if (save_regs > 2) gen_mov (gen_ctx, anchor, MIR_MOV, treg_op3, _MIR_new_hard_reg_op (ctx, 5));
+  /* call blk move: */
+  proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, BLK_MOV_P, 0, NULL, 3, MIR_T_I64,
+                                   "to", MIR_T_I64, "from", MIR_T_I64, "nwords");
+  func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, BLK_MOV, mir_blk_mov);
+  freg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  new_insn = MIR_new_insn (ctx, MIR_MOV, freg_op, MIR_new_ref_op (ctx, func_import_item));
+  gen_add_insn_before (gen_ctx, anchor, new_insn);
+  gen_add_insn_before (gen_ctx, anchor,
+                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_hard_reg_op (ctx, 3),
+                                     _MIR_new_hard_reg_op (ctx, to_base_hard_reg),
+                                     MIR_new_int_op (ctx, to_disp)));
+  gen_add_insn_before (gen_ctx, anchor,
+                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_hard_reg_op (ctx, 4),
+                                     MIR_new_reg_op (ctx, from_base_reg),
+                                     MIR_new_int_op (ctx, from_disp)));
+  gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, 5), MIR_new_int_op (ctx, qwords));
+  ops[0] = MIR_new_ref_op (ctx, proto_item);
+  ops[1] = freg_op;
+  ops[2] = _MIR_new_hard_reg_op (ctx, 3);
+  ops[3] = _MIR_new_hard_reg_op (ctx, 4);
+  ops[4] = _MIR_new_hard_reg_op (ctx, 5);
+  new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
+  gen_add_insn_before (gen_ctx, anchor, new_insn);
+  /* Restore arg regs: */
+  if (save_regs > 0) gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, 3), treg_op);
+  if (save_regs > 1) gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, 4), treg_op2);
+  if (save_regs > 2) gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, 5), treg_op3);
+}
+
 static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_func_t func = curr_func_item->u.func;
  MIR_proto_t proto = call_insn->ops[0].u.ref->u.proto;
  int vararg_p = proto->vararg_p;
-  size_t nargs, nops = MIR_insn_nops (ctx, call_insn), start = proto->nres + 2;
+  size_t qwords, disp, nargs, nops = MIR_insn_nops (ctx, call_insn), start = proto->nres + 2;
  size_t mem_size = 0, n_iregs = 0, n_fregs = 0;
  MIR_type_t type, mem_type;
  MIR_op_mode_t mode;
@ -198,9 +261,13 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
  }
  for (size_t i = start; i < nops; i++) {
    arg_op = call_insn->ops[i];
-    gen_assert (arg_op.mode == MIR_OP_REG || arg_op.mode == MIR_OP_HARD_REG);
+    gen_assert (arg_op.mode == MIR_OP_REG || arg_op.mode == MIR_OP_HARD_REG
+                || (arg_op.mode == MIR_OP_MEM && MIR_all_blk_type_p (arg_op.u.mem.type)));
    if (i - start < nargs) {
      type = arg_vars[i - start].type;
+    } else if (call_insn->ops[i].mode == MIR_OP_MEM) {
+      type = arg_op.u.mem.type;
+      gen_assert (MIR_all_blk_type_p (type));
    } else {
      mode = call_insn->ops[i].value_mode;  // ??? smaller ints
      gen_assert (mode == MIR_OP_INT || mode == MIR_OP_UINT || mode == MIR_OP_FLOAT
@ -248,10 +315,32 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
        }
      }
      n_fregs += type == MIR_T_LD ? 2 : 1;
+    } else if (MIR_blk_type_p (type)) {
+      gen_assert (arg_op.mode == MIR_OP_MEM && arg_op.u.mem.disp >= 0 && arg_op.u.mem.index == 0);
+      qwords = (arg_op.u.mem.disp + 7) / 8;
+      for (disp = 0; qwords > 0 && n_iregs < 8; qwords--, n_iregs++, mem_size += 8, disp += 8) {
+        arg_reg_op = _MIR_new_hard_reg_op (ctx, R3_HARD_REG + n_iregs);
+        gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op,
+                 MIR_new_mem_op (ctx, MIR_T_I64, disp, arg_op.u.mem.base, 0, 1));
+        setup_call_hard_reg_args (gen_ctx, call_insn, R3_HARD_REG + n_iregs);
+      }
+      if (qwords > 0)
+        gen_blk_mov (gen_ctx, call_insn, mem_size + PPC64_STACK_HEADER_SIZE, SP_HARD_REG, disp,
+                     arg_op.u.mem.base, qwords, n_iregs);
+      mem_size += qwords * 8;
+      n_iregs += qwords;
+      continue;
    } else if (type != MIR_T_F && type != MIR_T_D && type != MIR_T_LD && n_iregs < 8) {
      if (ext_insn != NULL) gen_add_insn_before (gen_ctx, call_insn, ext_insn);
      arg_reg_op = _MIR_new_hard_reg_op (ctx, R3_HARD_REG + n_iregs);
-      gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op, arg_op);
+      if (type != MIR_T_RBLK) {
+        gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op, arg_op);
+      } else {
+        assert (arg_op.mode == MIR_OP_MEM);
+        gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op, MIR_new_reg_op (ctx, arg_op.u.mem.base));
+        arg_reg_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_RBLK, arg_op.u.mem.disp,
+                                               R3_HARD_REG + n_iregs, MIR_NON_HARD_REG, 1);
+      }
      call_insn->ops[i] = arg_reg_op;
    } else { /* put arguments on the stack */
      if (ext_insn != NULL) gen_add_insn_before (gen_ctx, call_insn, ext_insn);
@ -260,7 +349,13 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
                           : type == MIR_T_D ? MIR_DMOV : type == MIR_T_LD ? MIR_LDMOV : MIR_MOV);
      mem_op = _MIR_new_hard_reg_mem_op (ctx, mem_type, mem_size + PPC64_STACK_HEADER_SIZE,
                                         SP_HARD_REG, MIR_NON_HARD_REG, 1);
-      gen_mov (gen_ctx, call_insn, new_insn_code, mem_op, arg_op);
+      if (type != MIR_T_RBLK) {
+        gen_mov (gen_ctx, call_insn, new_insn_code, mem_op, arg_op);
+      } else {
+        assert (arg_op.mode == MIR_OP_MEM);
+        gen_mov (gen_ctx, call_insn, new_insn_code, mem_op,
+                 MIR_new_reg_op (ctx, arg_op.u.mem.base));
+      }
      call_insn->ops[i] = mem_op;
    }
    mem_size += type == MIR_T_LD ? 16 : 8;
@ -350,6 +445,8 @@ static const char *LDNEG_P = "mir.ldneg.p";

 static const char *VA_ARG_P = "mir.va_arg.p";
 static const char *VA_ARG = "mir.va_arg";
+static const char *VA_BLOCK_ARG_P = "mir.va_block_arg.p";
+static const char *VA_BLOCK_ARG = "mir.va_block_arg";

 static int64_t mir_ldeq (long double d1, long double d2) { return d1 == d2; }
 static const char *LDEQ = "mir.ldeq";
@ -496,6 +593,13 @@ static int get_builtin (gen_ctx_t gen_ctx, MIR_insn_code_t code, MIR_item_t *pro
                                      MIR_T_I64, "va", MIR_T_I64, "type");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, VA_ARG, va_arg_builtin);
    return 2;
+  case MIR_VA_BLOCK_ARG:
+    *proto_item
+      = _MIR_builtin_proto (ctx, curr_func_item->module, VA_BLOCK_ARG_P, 0, NULL, 4, MIR_T_I64,
+                            "res", MIR_T_I64, "va", MIR_T_I64, "size", MIR_T_I64, "ncase");
+    *func_import_item
+      = _MIR_builtin_func (ctx, curr_func_item->module, VA_BLOCK_ARG, va_block_arg_builtin);
+    return 4;
  default: return 0;
  }
 }
@ -507,9 +611,10 @@ static MIR_disp_t target_get_stack_slot_offset (gen_ctx_t gen_ctx, MIR_type_t ty
 }

 static void set_prev_sp_op (gen_ctx_t gen_ctx, MIR_insn_t anchor, MIR_op_t *prev_sp_op) {
-  if (!stack_arg_func_p) {
-    stack_arg_func_p = TRUE;
-    *prev_sp_op = _MIR_new_hard_reg_op (gen_ctx->ctx, R11_HARD_REG);
+  if (!block_arg_func_p) {
+    /* don't use r11 as we can have spilled param<-mem in param set up which needs r11 as a temp */
+    block_arg_func_p = TRUE;
+    *prev_sp_op = _MIR_new_hard_reg_op (gen_ctx->ctx, R12_HARD_REG);
    gen_mov (gen_ctx, anchor, MIR_MOV, *prev_sp_op,
             _MIR_new_hard_reg_mem_op (gen_ctx->ctx, MIR_T_I64, 0, SP_HARD_REG, MIR_NON_HARD_REG,
                                       1));
@ -524,11 +629,11 @@ static void target_machinize (gen_ctx_t gen_ctx) {
  MIR_insn_t insn, next_insn, new_insn, anchor;
  MIR_reg_t ret_reg;
  MIR_op_t ret_reg_op, arg_reg_op, prev_sp_op, temp_op, arg_var_op;
-  size_t i, int_arg_num, fp_arg_num, disp, var_args_start;
+  size_t i, int_arg_num, fp_arg_num, disp, var_args_start, qwords, offset;

  assert (curr_func_item->item_type == MIR_func_item);
  func = curr_func_item->u.func;
-  stack_arg_func_p = FALSE;
+  block_arg_func_p = FALSE;
  param_save_area_size = 0;
  anchor = DLIST_HEAD (MIR_insn_t, func->insns);
  if (func->vararg_p)
@ -544,7 +649,7 @@ static void target_machinize (gen_ctx_t gen_ctx) {
        set_prev_sp_op (gen_ctx, anchor, &prev_sp_op);
        arg_reg_op = _MIR_new_hard_reg_op (ctx, F14_HARD_REG);
        gen_mov (gen_ctx, anchor, MIR_DMOV, arg_reg_op,
-                 _MIR_new_hard_reg_mem_op (ctx, MIR_T_D, disp + 8, R11_HARD_REG, MIR_NON_HARD_REG,
+                 _MIR_new_hard_reg_mem_op (ctx, MIR_T_D, disp + 8, R12_HARD_REG, MIR_NON_HARD_REG,
                                           1));
      }
      arg_reg_op = _MIR_new_hard_reg_op (ctx, F1_HARD_REG + fp_arg_num);
@ -556,14 +661,33 @@ static void target_machinize (gen_ctx_t gen_ctx) {
      set_prev_sp_op (gen_ctx, anchor, &prev_sp_op);
      gen_mov (gen_ctx, anchor, type == MIR_T_F ? MIR_FMOV : type == MIR_T_D ? MIR_DMOV : MIR_LDMOV,
               arg_var_op,
-               _MIR_new_hard_reg_mem_op (ctx, type, disp, R11_HARD_REG, MIR_NON_HARD_REG, 1));
+               _MIR_new_hard_reg_mem_op (ctx, type, disp, R12_HARD_REG, MIR_NON_HARD_REG, 1));
+    } else if (MIR_blk_type_p (type)) {
+      qwords = (VARR_GET (MIR_var_t, func->vars, i).size + 7) / 8;
+      offset = int_arg_num < 8 ? PPC64_STACK_HEADER_SIZE + int_arg_num * 8 : disp;
+      set_prev_sp_op (gen_ctx, anchor, &prev_sp_op);
+      for (; qwords > 0 && int_arg_num < 8; qwords--, int_arg_num++, disp += 8) {
+        if (!func->vararg_p)
+          gen_mov (gen_ctx, anchor, MIR_MOV,
+                   _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64,
+                                             PPC64_STACK_HEADER_SIZE + int_arg_num * 8,
+                                             R12_HARD_REG, MIR_NON_HARD_REG, 1),
+                   _MIR_new_hard_reg_op (ctx, R3_HARD_REG + int_arg_num));
+      }
+      gen_add_insn_before (gen_ctx, anchor,
+                           MIR_new_insn (ctx, MIR_ADD, arg_var_op,
+                                         _MIR_new_hard_reg_op (ctx, R12_HARD_REG),
+                                         MIR_new_int_op (ctx, offset)));
+      disp += qwords * 8;
+      int_arg_num += qwords;
+      continue;
    } else if (int_arg_num < 8) { /* mov arg, arg_hard_reg */
      arg_reg_op = _MIR_new_hard_reg_op (ctx, R3_HARD_REG + int_arg_num);
      gen_mov (gen_ctx, anchor, MIR_MOV, arg_var_op, arg_reg_op);
    } else { /* mov arg, arg_memory */
      set_prev_sp_op (gen_ctx, anchor, &prev_sp_op);
      gen_mov (gen_ctx, anchor, MIR_MOV, arg_var_op,
-               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, disp, R11_HARD_REG, MIR_NON_HARD_REG, 1));
+               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, disp, R12_HARD_REG, MIR_NON_HARD_REG, 1));
    }
    disp += type == MIR_T_LD ? 16 : 8;
    int_arg_num += type == MIR_T_LD ? 2 : 1;
@ -595,29 +719,33 @@ static void target_machinize (gen_ctx_t gen_ctx) {
      insn = new_insn;
    }
    if ((nargs = get_builtin (gen_ctx, code, &proto_item, &func_import_item)) > 0) {
-      if (code == MIR_VA_ARG) {
+      if (code == MIR_VA_ARG || code == MIR_VA_BLOCK_ARG) {
        /* Use a builtin func call:
-           mov func_reg, func ref; mov flag_reg, <type>; call proto, func_reg, res_reg, va_reg,
-           flag_reg */
-        MIR_op_t ops[5], func_reg_op, flag_reg_op;
-        MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], mem_op = insn->ops[2];
+           mov func_reg, func ref; [mov reg3, type;] call proto, func_reg, res_reg, va_reg,
+           reg3 */
+        MIR_op_t ops[6], func_reg_op, reg_op3;
+        MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], op3 = insn->ops[2];

        assert (res_reg_op.mode == MIR_OP_REG && va_reg_op.mode == MIR_OP_REG
-                && mem_op.mode == MIR_OP_MEM);
+                && op3.mode == (code == MIR_VA_ARG ? MIR_OP_MEM : MIR_OP_REG));
        func_reg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
-        flag_reg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+        reg_op3 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
        next_insn = new_insn
          = MIR_new_insn (ctx, MIR_MOV, func_reg_op, MIR_new_ref_op (ctx, func_import_item));
        gen_add_insn_before (gen_ctx, insn, new_insn);
-        new_insn = MIR_new_insn (ctx, MIR_MOV, flag_reg_op,
-                                 MIR_new_int_op (ctx, (int64_t) mem_op.u.mem.type));
-        gen_add_insn_before (gen_ctx, insn, new_insn);
+        if (code == MIR_VA_ARG) {
+          new_insn
+            = MIR_new_insn (ctx, MIR_MOV, reg_op3, MIR_new_int_op (ctx, (int64_t) op3.u.mem.type));
+          op3 = reg_op3;
+          gen_add_insn_before (gen_ctx, insn, new_insn);
+        }
        ops[0] = MIR_new_ref_op (ctx, proto_item);
        ops[1] = func_reg_op;
        ops[2] = res_reg_op;
        ops[3] = va_reg_op;
-        ops[4] = flag_reg_op;
-        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
+        ops[4] = op3;
+        if (code == MIR_VA_BLOCK_ARG) ops[5] = insn->ops[3];
+        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, code == MIR_VA_ARG ? 5 : 6, ops);
        gen_add_insn_before (gen_ctx, insn, new_insn);
        gen_delete_insn (gen_ctx, insn);
      } else { /* Use builtin: mov freg, func ref; call proto, freg, res_reg, op_reg[, op_reg2] */
@ -723,7 +851,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
      isave (gen_ctx, anchor, PPC64_STACK_HEADER_SIZE + i * 8, i + R3_HARD_REG);
  }
  for (i = saved_iregs_num = saved_fregs_num = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      if (i < F0_HARD_REG)
        saved_iregs_num++;
      else
@ -757,7 +885,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
                                     MIR_NON_HARD_REG, 1),
           _MIR_new_hard_reg_op (ctx, R2_HARD_REG)); /* mem[r1+toc_off] = r2 */
  for (n = i = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      if (i < F0_HARD_REG)
        isave (gen_ctx, anchor, start_save_regs_offset + (n++) * 8, i);
      else
@ -770,7 +898,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  assert (anchor->code == MIR_RET);
  /* Restoring hard registers: */
  for (i = n = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      if (i < F0_HARD_REG) {
        gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, i),
                 _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, start_save_regs_offset + (n++) * 8,
@ -972,11 +1100,11 @@ static const struct pattern patterns[] = {
  {MIR_LDMOV, "r mld", "o50 rt0 m; o50 nt0 mn"},         /* lfd rt,disp-mem; lfd rt+1,disp+8-mem */
  {MIR_LDMOV, "mld r", "o54 rt1 m; o54 nt1 mn"}, /* stfd rt,disp-mem; stfdx rt+1,disp+8-mem */
  {MIR_LDMOV, "r mld0",
-   "o31 O444 ha10 hs0 hb0; o50 rt0 ha10; o50 nt0 ha10 i8"}, /* mr r10,r0; lfd rt,(r10); lfd
-                                                               rt+1,8(r10) */
+   "o31 O444 ha11 hs0 hb0; o50 rt0 ha11; o50 nt0 ha11 i8"}, /* mr r11,r0; lfd rt,(r11); lfd
+                                                               rt+1,8(r11) */
  {MIR_LDMOV, "mld0 r",
-   "o31 O444 ha10 hs0 hb0; o54 rt1 ha10; o54 nt1 ha10 i8"}, /* mr r10,r0; stfd rt,(r10); stfdx
-                                                               rt+1,8(r10) */
+   "o31 O444 ha11 hs0 hb0; o54 rt1 ha11; o54 nt1 ha11 i8"}, /* mr r11,r0; stfd rt,(r11); stfdx
+                                                               rt+1,8(r11) */

  {MIR_EXT8, "r r", "o31 O954 ra0 rs1"},  /* extsb ra,rs */
  {MIR_EXT16, "r r", "o31 O922 ra0 rs1"}, /* extsh ra,rs */
@ -1336,7 +1464,7 @@ static void target_get_early_clobbered_hard_regs (MIR_insn_t insn, MIR_reg_t *hr
             || code == MIR_F2I || code == MIR_D2I) {
    *hr1 = F0_HARD_REG;
  } else if (code == MIR_LDMOV) { /* if mem base reg is R0 */
-    *hr1 = R10_HARD_REG;
+    *hr1 = R11_HARD_REG; /* don't use arg regs as ldmov can be used in param passing part */
  } else if (code == MIR_CALL || code == MIR_INLINE) {
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    *hr1 = R12_HARD_REG;
@ -2218,7 +2346,7 @@ static uint8_t *target_translate (gen_ctx_t gen_ctx, size_t *len) {
        if (insn->ops[2].u.i == 0) {
          gen_mov (gen_ctx, insn, MIR_MOV, insn->ops[0], insn->ops[1]);
          old_insn = insn;
-          insn = DLIST_NEXT (MIR_insn_t, insn);
+          insn = DLIST_PREV (MIR_insn_t, insn);
          gen_delete_insn (gen_ctx, old_insn);
        } else {
          if (insn->ops[2].mode == MIR_OP_INT && insn->ops[2].u.i < 0) {
--- a/mir/mir-gen-s390x.c
+++ b/mir/mir-gen-s390x.c
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2020-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 // ??? More patterns (ult, ugt, ule, uge w/o branches, multi-insn combining).
@ -62,7 +62,7 @@ static inline int target_fixed_hard_reg_p (MIR_reg_t hard_reg) {
          || hard_reg == TEMP_LDOUBLE_HARD_REG2);
 }

-static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
+static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg, MIR_type_t type) {
  gen_assert (hard_reg <= MAX_HARD_REG);
  return ((R0_HARD_REG <= hard_reg && hard_reg <= R5_HARD_REG) || hard_reg == R14_HARD_REG
          || (F0_HARD_REG <= hard_reg && hard_reg <= F7_HARD_REG));
@ -143,7 +143,7 @@ DEF_VARR (MIR_code_reloc_t);

 struct target_ctx {
  unsigned char alloca_p, leaf_p, stack_param_p, switch_p;
-  size_t param_save_area_size, ld_value_save_area_size;
+  size_t param_save_area_size, blk_ld_value_save_area_size;
  VARR (int) * pattern_indexes;
  VARR (insn_pattern_info_t) * insn_pattern_info;
  VARR (uint8_t) * result_code;
@ -160,7 +160,7 @@ struct target_ctx {
 #define stack_param_p gen_ctx->target_ctx->stack_param_p
 #define switch_p gen_ctx->target_ctx->switch_p
 #define param_save_area_size gen_ctx->target_ctx->param_save_area_size
-#define ld_value_save_area_size gen_ctx->target_ctx->ld_value_save_area_size
+#define blk_ld_value_save_area_size gen_ctx->target_ctx->blk_ld_value_save_area_size
 #define pattern_indexes gen_ctx->target_ctx->pattern_indexes
 #define insn_pattern_info gen_ctx->target_ctx->insn_pattern_info
 #define result_code gen_ctx->target_ctx->result_code
@ -176,13 +176,84 @@ static void gen_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, MIR_insn_code_t code,
  gen_add_insn_before (gen_ctx, anchor, MIR_new_insn (gen_ctx->ctx, code, dst_op, src_op));
 }

+static void mir_blk_mov (uint64_t *to, uint64_t *from, uint64_t nwords) {
+  for (; nwords > 0; nwords--) *to++ = *from++;
+}
+
+static const char *BLK_MOV = "mir.blk_mov";
+static const char *BLK_MOV_P = "mir.blk_mov.p";
+
+static void gen_blk_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, size_t to_disp,
+                         MIR_reg_t to_base_hard_reg, size_t from_disp, MIR_reg_t from_base_reg,
+                         size_t qwords, int save_regs) {
+  MIR_context_t ctx = gen_ctx->ctx;
+  MIR_func_t func = curr_func_item->u.func;
+  MIR_item_t proto_item, func_import_item;
+  MIR_insn_t new_insn;
+  MIR_op_t ops[5], freg_op, treg_op, treg_op2, treg_op3;
+
+  treg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  if (qwords <= 16) {
+    for (; qwords > 0; qwords--, to_disp += 8, from_disp += 8) {
+      gen_mov (gen_ctx, anchor, MIR_MOV, treg_op,
+               MIR_new_mem_op (ctx, MIR_T_I64, from_disp, from_base_reg, 0, 1));
+      gen_mov (gen_ctx, anchor, MIR_MOV,
+               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, to_disp, to_base_hard_reg,
+                                         MIR_NON_HARD_REG, 1),
+               treg_op);
+    }
+    return;
+  }
+  treg_op2 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  treg_op3 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  /* Save arg regs: */
+  if (save_regs > 0)
+    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op, _MIR_new_hard_reg_op (ctx, R2_HARD_REG));
+  if (save_regs > 1)
+    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op2, _MIR_new_hard_reg_op (ctx, R3_HARD_REG));
+  if (save_regs > 2)
+    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op3, _MIR_new_hard_reg_op (ctx, R4_HARD_REG));
+  /* call blk move: */
+  proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, BLK_MOV_P, 0, NULL, 3, MIR_T_I64,
+                                   "to", MIR_T_I64, "from", MIR_T_I64, "nwords");
+  func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, BLK_MOV, mir_blk_mov);
+  freg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+  new_insn = MIR_new_insn (ctx, MIR_MOV, freg_op, MIR_new_ref_op (ctx, func_import_item));
+  gen_add_insn_before (gen_ctx, anchor, new_insn);
+  gen_add_insn_before (gen_ctx, anchor,
+                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_hard_reg_op (ctx, R2_HARD_REG),
+                                     _MIR_new_hard_reg_op (ctx, to_base_hard_reg),
+                                     MIR_new_int_op (ctx, to_disp)));
+  gen_add_insn_before (gen_ctx, anchor,
+                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_hard_reg_op (ctx, R3_HARD_REG),
+                                     MIR_new_reg_op (ctx, from_base_reg),
+                                     MIR_new_int_op (ctx, from_disp)));
+  gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R4_HARD_REG),
+           MIR_new_int_op (ctx, qwords));
+  ops[0] = MIR_new_ref_op (ctx, proto_item);
+  ops[1] = freg_op;
+  ops[2] = _MIR_new_hard_reg_op (ctx, R2_HARD_REG);
+  ops[3] = _MIR_new_hard_reg_op (ctx, R3_HARD_REG);
+  ops[4] = _MIR_new_hard_reg_op (ctx, R4_HARD_REG);
+  new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
+  gen_add_insn_before (gen_ctx, anchor, new_insn);
+  /* Restore arg regs: */
+  if (save_regs > 0)
+    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R2_HARD_REG), treg_op);
+  if (save_regs > 1)
+    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R3_HARD_REG), treg_op2);
+  if (save_regs > 2)
+    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, R4_HARD_REG), treg_op3);
+}
+
 static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_func_t func = curr_func_item->u.func;
  MIR_proto_t proto = call_insn->ops[0].u.ref->u.proto;
  int vararg_p = proto->vararg_p;
  size_t nargs, nops = MIR_insn_nops (ctx, call_insn), start = proto->nres + 2;
-  size_t param_mem_size, call_ld_value_area_size, ld_n_iregs, n_iregs, n_fregs, ld_value_disp;
+  size_t param_mem_size, call_blk_ld_value_area_size, ld_n_iregs, n_iregs, n_fregs;
+  size_t qwords, blk_ld_value_disp;
  MIR_type_t type, mem_type;
  MIR_op_mode_t mode;
  MIR_var_t *arg_vars = NULL;
@ -206,15 +277,19 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
    call_insn->ops[1] = temp_op;
    gen_add_insn_before (gen_ctx, call_insn, new_insn);
  }
-  n_iregs = n_fregs = param_mem_size = call_ld_value_area_size = 0;
+  n_iregs = n_fregs = param_mem_size = call_blk_ld_value_area_size = 0;
  for (size_t i = 2; i < nops; i++) {
-    /* process long double results and args to calculate memory for them: */
+    arg_op = call_insn->ops[i];
+    /* process long double results and ld and block args to calculate memory for them: */
    if (i < start) {
      type = proto->res_types[i - 2];
    } else if (i - start < nargs) {
      type = arg_vars[i - start].type;
+    } else if (arg_op.mode == MIR_OP_MEM) {
+      type = arg_op.u.mem.type;
+      gen_assert (MIR_all_blk_type_p (type));
    } else {
-      mode = call_insn->ops[i].value_mode;  // ??? smaller ints
+      mode = arg_op.value_mode;  // ??? smaller ints
      gen_assert (mode == MIR_OP_INT || mode == MIR_OP_UINT || mode == MIR_OP_FLOAT
                  || mode == MIR_OP_DOUBLE || mode == MIR_OP_LDOUBLE);
      if (mode == MIR_OP_FLOAT)
@ -223,28 +298,37 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
      type = mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64;
    }
    if (type != MIR_T_LD && i < start) continue;
-    if (type == MIR_T_LD) call_ld_value_area_size += 16;
+    if (type == MIR_T_LD)
+      call_blk_ld_value_area_size += 16;
+    else if (MIR_blk_type_p (type)) {
+      gen_assert (arg_op.mode == MIR_OP_MEM && arg_op.u.mem.disp >= 0 && arg_op.u.mem.index == 0);
+      call_blk_ld_value_area_size += (arg_op.u.mem.disp + 7) / 8 * 8;
+    }
    if ((type == MIR_T_F || type == MIR_T_D) && n_fregs < 4) {
      /* put arguments to argument hard regs: */
      n_fregs++;
-    } else if (type != MIR_T_F && type != MIR_T_D && n_iregs < 5) {
+    } else if (type != MIR_T_F && type != MIR_T_D && n_iregs < 5) { /* RBLK too */
      n_iregs++;
    } else { /* put arguments on the stack */
      param_mem_size += 8;
    }
  }
  if (param_save_area_size < param_mem_size) param_save_area_size = param_mem_size;
-  if (ld_value_save_area_size < call_ld_value_area_size)
-    ld_value_save_area_size = call_ld_value_area_size;
-  ld_value_disp = param_mem_size;
+  if (blk_ld_value_save_area_size < call_blk_ld_value_area_size)
+    blk_ld_value_save_area_size = call_blk_ld_value_area_size;
+  blk_ld_value_disp = param_mem_size;
  param_mem_size = n_fregs = n_iregs = 0;
  for (size_t i = 2; i < nops; i++) { /* process args and ???long double results: */
    arg_op = call_insn->ops[i];
-    gen_assert (arg_op.mode == MIR_OP_REG || arg_op.mode == MIR_OP_HARD_REG);
+    gen_assert (arg_op.mode == MIR_OP_REG || arg_op.mode == MIR_OP_HARD_REG
+                || (arg_op.mode == MIR_OP_MEM && MIR_all_blk_type_p (arg_op.u.mem.type)));
    if (i < start) {
      type = proto->res_types[i - 2];
    } else if (i - start < nargs) {
      type = arg_vars[i - start].type;
+    } else if (call_insn->ops[i].mode == MIR_OP_MEM) {
+      type = call_insn->ops[i].u.mem.type;
+      gen_assert (MIR_all_blk_type_p (type));
    } else {
      mode = call_insn->ops[i].value_mode;  // ??? smaller ints
      gen_assert (mode == MIR_OP_INT || mode == MIR_OP_UINT || mode == MIR_OP_DOUBLE
@ -258,17 +342,24 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
      ext_insn = MIR_new_insn (ctx, ext_code, temp_op, arg_op);
      call_insn->ops[i] = arg_op = temp_op;
    }
-    if (type == MIR_T_LD) {
-      if (i >= start) { /* put arg value in saved ld value area: */
-        mem_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_LD, ld_value_disp + S390X_STACK_HEADER_SIZE,
-                                           FP_HARD_REG, MIR_NON_HARD_REG, 1);
-        gen_mov (gen_ctx, call_insn, MIR_LDMOV, mem_op, arg_op);
+    if (type == MIR_T_LD || MIR_blk_type_p (type)) {
+      if (i >= start) { /* put arg value in saved blk/ld value area: */
+        if (type == MIR_T_LD) {
+          mem_op
+            = _MIR_new_hard_reg_mem_op (ctx, MIR_T_LD, blk_ld_value_disp + S390X_STACK_HEADER_SIZE,
+                                        FP_HARD_REG, MIR_NON_HARD_REG, 1);
+          gen_mov (gen_ctx, call_insn, MIR_LDMOV, mem_op, arg_op);
+        } else {
+          qwords = (arg_op.u.mem.disp + 7) / 8;
+          gen_blk_mov (gen_ctx, call_insn, S390X_STACK_HEADER_SIZE + blk_ld_value_disp, FP_HARD_REG,
+                       0, arg_op.u.mem.base, qwords, n_iregs);
+        }
      }
      arg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      new_insn = MIR_new_insn (ctx, MIR_ADD, arg_op, _MIR_new_hard_reg_op (ctx, FP_HARD_REG),
-                               MIR_new_int_op (ctx, S390X_STACK_HEADER_SIZE + ld_value_disp));
+                               MIR_new_int_op (ctx, S390X_STACK_HEADER_SIZE + blk_ld_value_disp));
      gen_add_insn_before (gen_ctx, call_insn, new_insn);
-      ld_value_disp += 16;
+      blk_ld_value_disp += type == MIR_T_LD ? 16 : qwords * 8;
    }
    mem_type = type == MIR_T_F || type == MIR_T_D ? type : MIR_T_I64;
    if ((type == MIR_T_F || type == MIR_T_D) && n_fregs < 4) {
@ -281,7 +372,14 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
    } else if (type != MIR_T_F && type != MIR_T_D && n_iregs < 5) {
      if (ext_insn != NULL) gen_add_insn_before (gen_ctx, call_insn, ext_insn);
      arg_reg_op = _MIR_new_hard_reg_op (ctx, R2_HARD_REG + n_iregs);
-      gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op, arg_op);
+      if (type != MIR_T_RBLK) {
+        gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op, arg_op);
+      } else {
+        assert (arg_op.mode == MIR_OP_MEM);
+        gen_mov (gen_ctx, call_insn, MIR_MOV, arg_reg_op, MIR_new_reg_op (ctx, arg_op.u.mem.base));
+        arg_reg_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_RBLK, arg_op.u.mem.disp,
+                                               R2_HARD_REG + n_iregs, MIR_NON_HARD_REG, 1);
+      }
      if (i >= start) call_insn->ops[i] = arg_reg_op; /* don't change LD return yet */
      n_iregs++;
    } else { /* put arguments on the stack: */
@ -289,13 +387,19 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
      new_insn_code = (type == MIR_T_F ? MIR_FMOV : type == MIR_T_D ? MIR_DMOV : MIR_MOV);
      mem_op = _MIR_new_hard_reg_mem_op (ctx, mem_type, param_mem_size + S390X_STACK_HEADER_SIZE,
                                         SP_HARD_REG, MIR_NON_HARD_REG, 1);
-      gen_mov (gen_ctx, call_insn, new_insn_code, mem_op, arg_op);
+      if (type != MIR_T_RBLK) {
+        gen_mov (gen_ctx, call_insn, new_insn_code, mem_op, arg_op);
+      } else {
+        assert (arg_op.mode == MIR_OP_MEM);
+        gen_mov (gen_ctx, call_insn, new_insn_code, mem_op,
+                 MIR_new_reg_op (ctx, arg_op.u.mem.base));
+      }
      if (i >= start) call_insn->ops[i] = mem_op;
      param_mem_size += 8;
    }
  }
  ld_n_iregs = n_iregs = n_fregs = 0;
-  ld_value_disp = param_mem_size;
+  blk_ld_value_disp = param_mem_size;
  for (size_t i = 0; i < proto->nres; i++) {
    ret_op = call_insn->ops[i + 2];
    gen_assert (ret_op.mode == MIR_OP_REG || ret_op.mode == MIR_OP_HARD_REG);
@ -303,14 +407,14 @@ static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
    if (type == MIR_T_LD) { /* returned by address */
      new_insn_code = MIR_LDMOV;
      call_res_op = ret_val_op
-        = _MIR_new_hard_reg_mem_op (ctx, MIR_T_LD, S390X_STACK_HEADER_SIZE + ld_value_disp,
+        = _MIR_new_hard_reg_mem_op (ctx, MIR_T_LD, S390X_STACK_HEADER_SIZE + blk_ld_value_disp,
                                    FP_HARD_REG, MIR_NON_HARD_REG, 1);
      if (n_iregs < 5) { /* use it as a call result to keep assignment to ld_n_iregs: */
        call_res_op = _MIR_new_hard_reg_mem_op (ctx, MIR_T_LD, 0, R2_HARD_REG + ld_n_iregs,
                                                MIR_NON_HARD_REG, 1);
        ld_n_iregs++;
      }
-      ld_value_disp += 16;
+      blk_ld_value_disp += 16;
    } else if ((type == MIR_T_F || type == MIR_T_D) && n_fregs < 4) {
      new_insn_code = type == MIR_T_F ? MIR_FMOV : MIR_DMOV;
      call_res_op = ret_val_op = _MIR_new_hard_reg_op (ctx, F0_HARD_REG + n_fregs * 2);
@ -386,6 +490,8 @@ static const char *LDNEG_P = "mir.ldneg.p";

 static const char *VA_ARG_P = "mir.va_arg.p";
 static const char *VA_ARG = "mir.va_arg";
+static const char *VA_BLOCK_ARG_P = "mir.va_block_arg.p";
+static const char *VA_BLOCK_ARG = "mir.va_block_arg";

 static int64_t mir_ldeq (long double d1, long double d2) { return d1 == d2; }
 static const char *LDEQ = "mir.ldeq";
@ -532,6 +638,13 @@ static int get_builtin (gen_ctx_t gen_ctx, MIR_insn_code_t code, MIR_item_t *pro
                                      MIR_T_I64, "va", MIR_T_I64, "type");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, VA_ARG, va_arg_builtin);
    return 2;
+  case MIR_VA_BLOCK_ARG:
+    *proto_item
+      = _MIR_builtin_proto (ctx, curr_func_item->module, VA_BLOCK_ARG_P, 0, NULL, 4, MIR_T_I64,
+                            "res", MIR_T_I64, "va", MIR_T_I64, "size", MIR_T_I64, "ncase");
+    *func_import_item
+      = _MIR_builtin_func (ctx, curr_func_item->module, VA_BLOCK_ARG, va_block_arg_builtin);
+    return 4;
  default: return 0;
  }
 }
@ -540,7 +653,7 @@ static MIR_disp_t target_get_stack_slot_offset (gen_ctx_t gen_ctx, MIR_type_t ty
                                                MIR_reg_t slot) {
  /* slot is 0, 1, ... */
  return ((MIR_disp_t) slot * 8 + S390X_STACK_HEADER_SIZE + param_save_area_size
-          + ld_value_save_area_size);
+          + blk_ld_value_save_area_size);
 }

 static void set_prev_sp_reg (gen_ctx_t gen_ctx, MIR_insn_t anchor, int *prev_sp_set_p,
@ -627,7 +740,7 @@ static void target_machinize (gen_ctx_t gen_ctx) {
  stack_param_p = disp != 0;
  switch_p = alloca_p = FALSE;
  leaf_p = TRUE;
-  param_save_area_size = ld_value_save_area_size = 0;
+  param_save_area_size = blk_ld_value_save_area_size = 0;
  for (insn = DLIST_HEAD (MIR_insn_t, func->insns); insn != NULL; insn = next_insn) {
    MIR_item_t proto_item, func_import_item;
    int nargs;
@ -652,29 +765,33 @@ static void target_machinize (gen_ctx_t gen_ctx) {
      insn = new_insn;
    }
    if ((nargs = get_builtin (gen_ctx, code, &proto_item, &func_import_item)) > 0) {
-      if (code == MIR_VA_ARG) {
+      if (code == MIR_VA_ARG || code == MIR_VA_BLOCK_ARG) {
        /* Use a builtin func call:
-           mov func_reg, func ref; mov flag_reg, <type>; call proto, func_reg, res_reg, va_reg,
-           flag_reg */
-        MIR_op_t ops[5], func_reg_op, flag_reg_op;
-        MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], mem_op = insn->ops[2];
+           mov func_reg, func ref; [mov reg3, type;] call proto, func_reg, res_reg, va_reg,
+           reg3 */
+        MIR_op_t ops[6], func_reg_op, reg_op3;
+        MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], op3 = insn->ops[2];

-        gen_assert (res_reg_op.mode == MIR_OP_REG && va_reg_op.mode == MIR_OP_REG
-                    && mem_op.mode == MIR_OP_MEM);
+        assert (res_reg_op.mode == MIR_OP_REG && va_reg_op.mode == MIR_OP_REG
+                && op3.mode == (code == MIR_VA_ARG ? MIR_OP_MEM : MIR_OP_REG));
        func_reg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
-        flag_reg_op = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
+        reg_op3 = MIR_new_reg_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
        next_insn = new_insn
          = MIR_new_insn (ctx, MIR_MOV, func_reg_op, MIR_new_ref_op (ctx, func_import_item));
        gen_add_insn_before (gen_ctx, insn, new_insn);
-        new_insn = MIR_new_insn (ctx, MIR_MOV, flag_reg_op,
-                                 MIR_new_int_op (ctx, (int64_t) mem_op.u.mem.type));
-        gen_add_insn_before (gen_ctx, insn, new_insn);
+        if (code == MIR_VA_ARG) {
+          new_insn
+            = MIR_new_insn (ctx, MIR_MOV, reg_op3, MIR_new_int_op (ctx, (int64_t) op3.u.mem.type));
+          op3 = reg_op3;
+          gen_add_insn_before (gen_ctx, insn, new_insn);
+        }
        ops[0] = MIR_new_ref_op (ctx, proto_item);
        ops[1] = func_reg_op;
        ops[2] = res_reg_op;
        ops[3] = va_reg_op;
-        ops[4] = flag_reg_op;
-        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
+        ops[4] = op3;
+        if (code == MIR_VA_BLOCK_ARG) ops[5] = insn->ops[3];
+        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, code == MIR_VA_ARG ? 5 : 6, ops);
        gen_add_insn_before (gen_ctx, insn, new_insn);
        gen_delete_insn (gen_ctx, insn);
      } else { /* Use builtin: mov freg, func ref; call proto, freg, res_reg, op_reg[, op_reg2] */
@ -815,7 +932,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
      fsave (gen_ctx, anchor, S390X_FP_REG_ARG_SAVE_AREA_START + i * 8, i * 2 + F0_HARD_REG);
  }
  for (i = saved_fregs_num = 0; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)) {
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)) {
      saved_regs_p = TRUE;
      if (i >= F0_HARD_REG) saved_fregs_num++;
    }
@ -825,7 +942,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  r14_reg_op = _MIR_new_hard_reg_op (ctx, R14_HARD_REG);
  r15_reg_op = _MIR_new_hard_reg_op (ctx, R15_HARD_REG);
  /* Prologue: */
-  frame_size = (param_save_area_size + S390X_STACK_HEADER_SIZE + ld_value_save_area_size
+  frame_size = (param_save_area_size + S390X_STACK_HEADER_SIZE + blk_ld_value_save_area_size
                + stack_slots_num * 8);
  start_saved_fregs_offset = frame_size;
  frame_size += saved_fregs_num * 8;
@ -839,7 +956,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
                                     R15_HARD_REG, MIR_NON_HARD_REG, 1),
           r11_reg_op);                        /* mem[r15+76] = r11 */
  for (i = R2_HARD_REG; i < R15_HARD_REG; i++) /* exclude r15 */
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i)
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)
        && (i != 6 || !func->vararg_p))
      isave (gen_ctx, anchor, S390X_GP_REG_RSAVE_AREA_START + (i - R2_HARD_REG) * 8, i);
  gen_mov (gen_ctx, anchor, MIR_MOV, r0_reg_op, r15_reg_op); /* r0 = r15 */
@ -849,7 +966,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
           _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64, 0, R15_HARD_REG, MIR_NON_HARD_REG, 1),
           r0_reg_op); /* mem[r15] = r0 */
  for (n = 0, i = F0_HARD_REG; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i))
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i))
      fsave (gen_ctx, anchor, start_saved_fregs_offset + (n++) * 8, i);
  gen_mov (gen_ctx, anchor, MIR_MOV, r11_reg_op, r15_reg_op); /* r11 = r15 */
  /* Epilogue: */
@ -857,7 +974,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  gen_assert (anchor->code == MIR_RET || anchor->code == MIR_JMP);
  /* Restoring fp hard registers: */
  for (n = 0, i = F0_HARD_REG; i <= MAX_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i))
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i))
      gen_mov (gen_ctx, anchor, MIR_DMOV, _MIR_new_hard_reg_op (ctx, i),
               _MIR_new_hard_reg_mem_op (ctx, MIR_T_D, start_saved_fregs_offset + (n++) * 8,
                                         R11_HARD_REG, MIR_NON_HARD_REG, 1));
@ -865,7 +982,7 @@ static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_reg
  gen_add_insn_before (gen_ctx, anchor, new_insn); /* r15 = r11 + frame_size */
  /* Restore saved gp regs (including r11 and excluding r15) and r14 */
  for (i = R2_HARD_REG; i < R15_HARD_REG; i++)
-    if (!target_call_used_hard_reg_p (i) && bitmap_bit_p (used_hard_regs, i))
+    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i))
      gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_hard_reg_op (ctx, i),
               _MIR_new_hard_reg_mem_op (ctx, MIR_T_I64,
                                         S390X_GP_REG_RSAVE_AREA_START + (i - R2_HARD_REG) * 8,
--- a/mir/mir-gen-stub.c
+++ b/mir/mir-gen-stub.c
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.

   Stub for MIR generator machine dependent file.  It contains
   definitions used by MIR generator.  You can use this file for
@ -27,9 +27,9 @@ enum {
  F7_HARD_REG
 };

-static const MIR_reg_t MAX_HARD_REG = F7_HARD_REG;           /* max value for the previous regs */
-static const MIR_reg_t FP_HARD_REG = R6_HARD_REG; /* stack frame pointer according ABI */
-static const MIR_reg_t SP_HARD_REG = R7_HARD_REG;            /* stack pointer according ABI */
+static const MIR_reg_t MAX_HARD_REG = F7_HARD_REG; /* max value for the previous regs */
+static const MIR_reg_t FP_HARD_REG = R6_HARD_REG;  /* stack frame pointer according ABI */
+static const MIR_reg_t SP_HARD_REG = R7_HARD_REG;  /* stack pointer according ABI */

 const MIR_reg_t TEMP_INT_HARD_REG1 = R2_HARD_REG, TEMP_INT_HARD_REG2 = R3_HARD_REG;
 const MIR_reg_t TEMP_FLOAT_HARD_REG1 = F2_HARD_REG, TEMP_FLOAT_HARD_REG2 = F3_HARD_REG;
@ -49,11 +49,11 @@ static inline int target_hard_reg_type_ok_p (MIR_reg_t hard_reg, MIR_type_t type

 static inline int target_fixed_hard_reg_p (MIR_reg_t hard_reg) {
  assert (hard_reg <= MAX_HARD_REG);
-  return (hard_reg == FP_HARD_REG || hard_reg == SP_HARD_REG
-          || hard_reg == TEMP_INT_HARD_REG1 || hard_reg == TEMP_INT_HARD_REG2
-          || hard_reg == TEMP_FLOAT_HARD_REG1 || hard_reg == TEMP_FLOAT_HARD_REG2
-          || hard_reg == TEMP_DOUBLE_HARD_REG1 || hard_reg == TEMP_DOUBLE_HARD_REG2
-          || hard_reg == TEMP_LDOUBLE_HARD_REG1 || hard_reg == TEMP_LDOUBLE_HARD_REG2);
+  return (hard_reg == FP_HARD_REG || hard_reg == SP_HARD_REG || hard_reg == TEMP_INT_HARD_REG1
+          || hard_reg == TEMP_INT_HARD_REG2 || hard_reg == TEMP_FLOAT_HARD_REG1
+          || hard_reg == TEMP_FLOAT_HARD_REG2 || hard_reg == TEMP_DOUBLE_HARD_REG1
+          || hard_reg == TEMP_DOUBLE_HARD_REG2 || hard_reg == TEMP_LDOUBLE_HARD_REG1
+          || hard_reg == TEMP_LDOUBLE_HARD_REG2);
 }

 static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg) {
--- a/mir/mir-gen-x86_64.c
+++ b/mir/mir-gen-x86_64.c
--- a/mir/mir-gen.c
+++ b/mir/mir-gen.c
--- a/mir/mir-gen.h
+++ b/mir/mir-gen.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #ifndef MIR_GEN_H
@ -12,11 +12,12 @@
 #define MIR_NO_GEN_DEBUG 0
 #endif

-extern void MIR_gen_init (MIR_context_t ctx);
-extern void MIR_gen_set_debug_file (MIR_context_t ctx, FILE *f);
-extern void MIR_gen_set_optimize_level (MIR_context_t ctx, unsigned int level);
-extern void *MIR_gen (MIR_context_t ctx, MIR_item_t func_item);
+extern void MIR_gen_init (MIR_context_t ctx, int gens_num);
+extern void MIR_gen_set_debug_file (MIR_context_t ctx, int gen_num, FILE *f);
+extern void MIR_gen_set_optimize_level (MIR_context_t ctx, int gen_num, unsigned int level);
+extern void *MIR_gen (MIR_context_t ctx, int gen_num, MIR_item_t func_item);
 extern void MIR_set_gen_interface (MIR_context_t ctx, MIR_item_t func_item);
+extern void MIR_set_parallel_gen_interface (MIR_context_t ctx, MIR_item_t func_item);
 extern void MIR_set_lazy_gen_interface (MIR_context_t ctx, MIR_item_t func_item);
 extern void MIR_gen_finish (MIR_context_t ctx);

--- a/mir/mir-hash.h
+++ b/mir/mir-hash.h
@ -1,6 +1,6 @@
 /* This file is a part of MIR project.

-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 /* Simple high-quality multiplicative hash passing demerphq-smhsher,
@ -22,11 +22,17 @@
 #define MIR_HASH_UNALIGNED_ACCESS 0
 #endif

+#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || defined(_MSC_VER)
+#define MIR_LITTLE_ENDIAN 1
+#else
+#define MIR_LITTLE_ENDIAN 0
+#endif
+
 static inline uint64_t mir_get_key_part (const uint8_t *v, size_t len, int relax_p) {
  size_t i, start = 0;
  uint64_t tail = 0;

-  if (relax_p || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) {
+  if (relax_p || MIR_LITTLE_ENDIAN) {
 #if MIR_HASH_UNALIGNED_ACCESS
    if (len == sizeof (uint64_t)) return *(uint64_t *) v;
    if (len >= sizeof (uint32_t)) {
--- a/mir/mir-htab.h
+++ b/mir/mir-htab.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #ifndef MIR_HTAB_H
@ -116,13 +116,13 @@ DEF_VARR (htab_ind_t)
    arg = htab->arg;                                                                             \
    if (htab->free_func != NULL) {                                                               \
      els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                             \
-      size = VARR_LENGTH (HTAB_EL (T), htab->els);                                               \
+      size = (htab_size_t) VARR_LENGTH (HTAB_EL (T), htab->els);                                 \
      for (i = 0; i < htab->els_bound; i++)                                                      \
        if (els_addr[i].hash != HTAB_DELETED_HASH) htab->free_func (els_addr[i].el, arg);        \
    }                                                                                            \
    htab->els_num = htab->els_start = htab->els_bound = 0;                                       \
    addr = VARR_ADDR (htab_ind_t, htab->entries);                                                \
-    size = VARR_LENGTH (htab_ind_t, htab->entries);                                              \
+    size = (htab_size_t) VARR_LENGTH (htab_ind_t, htab->entries);                                \
    for (i = 0; i < size; i++) addr[i] = HTAB_EMPTY_IND;                                         \
  }                                                                                              \
                                                                                                 \
@ -145,8 +145,8 @@ DEF_VARR (htab_ind_t)
    void *arg;                                                                                   \
                                                                                                 \
    HTAB_ASSERT (htab != NULL, "do htab", T);                                                    \
-    size = VARR_LENGTH (htab_ind_t, htab->entries);                                              \
-    els_size = VARR_LENGTH (HTAB_EL (T), htab->els);                                             \
+    size = (htab_size_t) VARR_LENGTH (htab_ind_t, htab->entries);                                \
+    els_size = (htab_size_t) VARR_LENGTH (HTAB_EL (T), htab->els);                               \
    arg = htab->arg;                                                                             \
    HTAB_ASSERT (els_size * 2 == size, "do size", T);                                            \
    if ((action == HTAB_INSERT || action == HTAB_REPLACE) && htab->els_bound == els_size) {      \
@ -222,13 +222,11 @@ DEF_VARR (htab_ind_t)
                                                                                                 \
  static inline void HTAB_OP_DEF (T, foreach_elem) (HTAB (T) * htab,                             \
                                                    void (*func) (T el, void *arg), void *arg) { \
-    htab_ind_t *addr;                                                                            \
-    htab_size_t i, size;                                                                         \
+    htab_size_t i;                                                                               \
    HTAB_EL (T) * els_addr;                                                                      \
                                                                                                 \
    HTAB_ASSERT (htab != NULL, "foreach_elem", T);                                               \
    els_addr = VARR_ADDR (HTAB_EL (T), htab->els);                                               \
-    size = VARR_LENGTH (HTAB_EL (T), htab->els);                                                 \
    for (i = 0; i < htab->els_bound; i++)                                                        \
      if (els_addr[i].hash != HTAB_DELETED_HASH) func (els_addr[i].el, arg);                     \
  }
--- a/mir/mir-interp.c
+++ b/mir/mir-interp.c
@ -1,9 +1,21 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.

   File contains MIR interpreter which is an obligatory part of MIR API.
 */

+#ifdef MIR_NO_INTERP
+static void interp_init (MIR_context_t ctx) {}
+static void finish_func_interpretation (MIR_item_t func_item) {}
+static void interp_finish (MIR_context_t ctx) {}
+void MIR_interp (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs, ...) {}
+void MIR_interp_arr_varg (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs,
+                          MIR_val_t *vals, va_list va) {}
+void MIR_interp_arr (MIR_context_t ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs,
+                     MIR_val_t *vals) {}
+void MIR_set_interp_interface (MIR_context_t ctx, MIR_item_t func_item) {}
+#else
+
 #ifndef MIR_INTERP_TRACE
 #define MIR_INTERP_TRACE 0
 #endif
@ -20,6 +32,10 @@
 #define ALWAYS_INLINE inline
 #endif

+#if defined(_MSC_VER)
+#define alloca _alloca
+#endif
+
 typedef MIR_val_t *code_t;

 typedef struct func_desc {
@ -28,10 +44,14 @@ typedef struct func_desc {
  MIR_val_t code[1];
 } * func_desc_t;

+static void update_max_nreg (MIR_reg_t reg, MIR_reg_t *max_nreg) {
+  if (*max_nreg < reg) *max_nreg = reg;
+}
+
 static MIR_reg_t get_reg (MIR_op_t op, MIR_reg_t *max_nreg) {
  /* We do not interpret code with hard regs */
  mir_assert (op.mode == MIR_OP_REG);
-  if (*max_nreg < op.u.reg) *max_nreg = op.u.reg;
+  update_max_nreg (op.u.reg, max_nreg);
  return op.u.reg;
 }

@ -52,13 +72,16 @@ DEF_VARR (MIR_val_t);
 struct ff_interface {
  size_t nres, nargs;
  int vararg_p;
-  MIR_type_t *res_types, *arg_types;
+  MIR_type_t *res_types;
+  _MIR_arg_desc_t *arg_descs;
  void *interface_addr;
 };

 typedef struct ff_interface *ff_interface_t;
 DEF_HTAB (ff_interface_t);

+DEF_VARR (_MIR_arg_desc_t);
+
 struct interp_ctx {
 #if DIRECT_THREADED_DISPATCH
  void *dispatch_label_tab[IC_INSN_BOUND];
@ -74,8 +97,8 @@ struct interp_ctx {
  void (*bend_builtin) (void *);
  VARR (MIR_val_t) * call_res_args_varr;
  MIR_val_t *call_res_args;
-  VARR (MIR_type_t) * call_arg_types_varr;
-  MIR_type_t *call_arg_types;
+  VARR (_MIR_arg_desc_t) * call_arg_descs_varr;
+  _MIR_arg_desc_t *call_arg_descs;
  HTAB (ff_interface_t) * ff_interface_tab;
 };

@ -90,8 +113,8 @@ struct interp_ctx {
 #define bend_builtin interp_ctx->bend_builtin
 #define call_res_args_varr interp_ctx->call_res_args_varr
 #define call_res_args interp_ctx->call_res_args
-#define call_arg_types_varr interp_ctx->call_arg_types_varr
-#define call_arg_types interp_ctx->call_arg_types
+#define call_arg_descs_varr interp_ctx->call_arg_descs_varr
+#define call_arg_descs interp_ctx->call_arg_descs
 #define ff_interface_tab interp_ctx->ff_interface_tab

 static void get_icode (struct interp_ctx *interp_ctx, MIR_val_t *v, int code) {
@ -378,17 +401,21 @@ static void generate_icode (MIR_context_t ctx, MIR_item_t func_item) {
          mir_assert (ops[i].mode == MIR_OP_REF && ops[i].u.ref->item_type == MIR_proto_item);
          v.a = ops[i].u.ref;
        } else if (i == 1 && imm_call_p) {
-          mir_assert (ops[i].u.ref->item_type == MIR_import_item
-                      || ops[i].u.ref->item_type == MIR_export_item
-                      || ops[i].u.ref->item_type == MIR_forward_item
-                      || ops[i].u.ref->item_type == MIR_func_item);
-          v.a = ops[i].u.ref->addr;
+          MIR_item_t item = ops[i].u.ref;
+
+          mir_assert (item->item_type == MIR_import_item || item->item_type == MIR_export_item
+                      || item->item_type == MIR_forward_item || item->item_type == MIR_func_item);
+          v.a = item->addr;
        } else if (code == MIR_VA_ARG && i == 2) { /* type */
          mir_assert (ops[i].mode == MIR_OP_MEM);
          v.i = ops[i].u.mem.type;
        } else if (code == MIR_SWITCH && i > 0) {
          mir_assert (ops[i].mode == MIR_OP_LABEL);
          v.i = 0;
+        } else if (MIR_call_code_p (code) && ops[i].mode == MIR_OP_MEM) {
+          mir_assert (MIR_all_blk_type_p (ops[i].u.mem.type));
+          v.i = ops[i].u.mem.base;
+          update_max_nreg (v.i, &max_nreg);
        } else {
          mir_assert (ops[i].mode == MIR_OP_REG);
          v.i = get_reg (ops[i], &max_nreg);
@ -803,10 +830,12 @@ static void finish_insn_trace (MIR_context_t ctx, MIR_full_insn_code_t code, cod
             bp[ops[0].i].u, bp[ops[0].i].u);
    break;
  case MIR_OP_FLOAT: fprintf (stderr, "\t# res = %.*ef", FLT_DECIMAL_DIG, bp[ops[0].i].f); break;
-  case MIR_OP_DOUBLE: fprintf (stderr, "\t# res = %.*e", DBL_DECIMAL_DIG, bp[ops[0].i].d); break;
  case MIR_OP_LDOUBLE:
+#ifndef _WIN32
    fprintf (stderr, "\t# res = %.*Le", LDBL_DECIMAL_DIG, bp[ops[0].i].ld);
    break;
+#endif
+  case MIR_OP_DOUBLE: fprintf (stderr, "\t# res = %.*e", DBL_DECIMAL_DIG, bp[ops[0].i].d); break;
  default: assert (op_mode == MIR_OP_UNDEF);
  }
  fprintf (stderr, "\n");
@ -888,7 +917,8 @@ static void OPTIMIZE eval (MIR_context_t ctx, func_desc_t func_desc, MIR_val_t *
    REP8 (LAB_EL, MIR_BGTS, MIR_UBGT, MIR_UBGTS, MIR_FBGT, MIR_DBGT, MIR_LDBGT, MIR_BGE, MIR_BGES);
    REP5 (LAB_EL, MIR_UBGE, MIR_UBGES, MIR_FBGE, MIR_DBGE, MIR_LDBGE);
    REP4 (LAB_EL, MIR_CALL, MIR_INLINE, MIR_SWITCH, MIR_RET);
-    REP6 (LAB_EL, MIR_ALLOCA, MIR_BSTART, MIR_BEND, MIR_VA_ARG, MIR_VA_START, MIR_VA_END);
+    REP3 (LAB_EL, MIR_ALLOCA, MIR_BSTART, MIR_BEND);
+    REP4 (LAB_EL, MIR_VA_ARG, MIR_VA_BLOCK_ARG, MIR_VA_START, MIR_VA_END);
    REP8 (LAB_EL, IC_LDI8, IC_LDU8, IC_LDI16, IC_LDU16, IC_LDI32, IC_LDU32, IC_LDI64, IC_LDF);
    REP8 (LAB_EL, IC_LDD, IC_LDLD, IC_STI8, IC_STU8, IC_STI16, IC_STU16, IC_STI32, IC_STU32);
    REP8 (LAB_EL, IC_STI64, IC_STF, IC_STD, IC_STLD, IC_MOVI, IC_MOVP, IC_MOVF, IC_MOVD);
@ -1233,8 +1263,37 @@ static void OPTIMIZE eval (MIR_context_t ctx, func_desc_t func_desc, MIR_val_t *
  SCASE (MIR_DBGE, 3, BDCMP (>=));
  SCASE (MIR_LDBGE, 3, BLDCMP (>=));

-  SCASE (MIR_CALL, 0, pc = call_insn_execute (ctx, pc, bp, ops, FALSE));
-  SCASE (IC_IMM_CALL, 0, pc = call_insn_execute (ctx, pc, bp, ops, TRUE));
+  CASE (MIR_CALL, 0) {
+    int (*func_addr) (void *buf) = *get_aop (bp, ops + 4);
+
+    if (func_addr != setjmp_addr) {
+      pc = call_insn_execute (ctx, pc, bp, ops, FALSE);
+    } else {
+      int64_t nops = get_i (ops); /* #args w/o nop, insn, and ff interface address */
+      MIR_item_t proto_item = get_a (ops + 3);
+      size_t start = proto_item->u.proto->nres + 5;
+
+      bp[get_i (ops + 5)].i = (*func_addr) (*get_aop (bp, ops + start));
+      pc += nops + 3; /* nops itself, the call insn, add ff interface address */
+    }
+    END_INSN;
+  }
+  CASE (IC_IMM_CALL, 0) {
+    int (*func_addr) (void *buf) = get_a (ops + 4);
+
+    if (func_addr != setjmp_addr) {
+      pc = call_insn_execute (ctx, pc, bp, ops, TRUE);
+    } else {
+      int64_t nops = get_i (ops); /* #args w/o nop, insn, and ff interface address */
+      MIR_item_t proto_item = get_a (ops + 3);
+      size_t start = proto_item->u.proto->nres + 5;
+
+      bp[get_i (ops + 5)].i = (*func_addr) (*get_aop (bp, ops + start));
+      pc += nops + 3; /* nops itself, the call insn, add ff interface address */
+    }
+    END_INSN;
+  }
+
  SCASE (MIR_INLINE, 0, mir_assert (FALSE));

  CASE (MIR_SWITCH, 0) {
@ -1277,6 +1336,13 @@ static void OPTIMIZE eval (MIR_context_t ctx, func_desc_t func_desc, MIR_val_t *
    *r = (uint64_t) va_arg_builtin ((void *) va, tp);
    END_INSN;
  }
+  CASE (MIR_VA_BLOCK_ARG, 4) {
+    int64_t *r, va, size;
+
+    r = get_3iops (bp, ops, &va, &size);
+    va_block_arg_builtin ((void *) *r, (void *) va, size, *get_iop (bp, ops + 3));
+    END_INSN;
+  }
  SCASE (MIR_VA_START, 1, va_start_interp_builtin (ctx, bp[get_i (ops)].a, bp[-1].a));
  SCASE (MIR_VA_END, 1, va_end_interp_builtin (ctx, bp[get_i (ops)].a));

@ -1342,20 +1408,29 @@ static htab_hash_t ff_interface_hash (ff_interface_t i, void *arg) {
  h = mir_hash_step (h, i->nargs);
  h = mir_hash_step (h, i->vararg_p);
  h = mir_hash (i->res_types, sizeof (MIR_type_t) * i->nres, h);
-  h = mir_hash (i->arg_types, sizeof (MIR_type_t) * i->nargs, h);
+  for (size_t n = 0; n < i->nargs; n++) {
+    h = mir_hash_step (h, i->arg_descs[n].type);
+    if (MIR_all_blk_type_p (i->arg_descs[n].type)) h = mir_hash_step (h, i->arg_descs[n].size);
+  }
  return mir_hash_finish (h);
 }

 static int ff_interface_eq (ff_interface_t i1, ff_interface_t i2, void *arg) {
-  return (i1->nres == i2->nres && i1->nargs == i2->nargs && i1->vararg_p == i2->vararg_p
-          && memcmp (i1->res_types, i2->res_types, sizeof (MIR_type_t) * i1->nres) == 0
-          && memcmp (i1->arg_types, i2->arg_types, sizeof (MIR_type_t) * i1->nargs) == 0);
+  if (i1->nres != i2->nres || i1->nargs != i2->nargs || i1->vararg_p != i2->vararg_p) return FALSE;
+  if (memcmp (i1->res_types, i2->res_types, sizeof (MIR_type_t) * i1->nres) != 0) return FALSE;
+  for (size_t n = 0; n < i1->nargs; n++) {
+    if (i1->arg_descs[n].type != i2->arg_descs[n].type) return FALSE;
+    if (MIR_all_blk_type_p (i1->arg_descs[n].type)
+        && i1->arg_descs[n].size != i2->arg_descs[n].size)
+      return FALSE;
+  }
+  return TRUE;
 }

 static void ff_interface_clear (ff_interface_t ffi, void *arg) { free (ffi); }

 static void *get_ff_interface (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                               MIR_type_t *arg_types, int vararg_p) {
+                               _MIR_arg_desc_t *arg_descs, int vararg_p) {
  struct interp_ctx *interp_ctx = ctx->interp_ctx;
  struct ff_interface ffi_s;
  ff_interface_t tab_ffi, ffi;
@ -1365,18 +1440,19 @@ static void *get_ff_interface (MIR_context_t ctx, size_t nres, MIR_type_t *res_t
  ffi_s.nargs = nargs;
  ffi_s.vararg_p = !!vararg_p;
  ffi_s.res_types = res_types;
-  ffi_s.arg_types = arg_types;
+  ffi_s.arg_descs = arg_descs;
  if (HTAB_DO (ff_interface_t, ff_interface_tab, &ffi_s, HTAB_FIND, tab_ffi))
    return tab_ffi->interface_addr;
-  ffi = malloc (sizeof (struct ff_interface) + sizeof (MIR_type_t) * (nres + nargs));
+  ffi = malloc (sizeof (struct ff_interface) + sizeof (_MIR_arg_desc_t) * nargs
+                + sizeof (MIR_type_t) * nres);
  ffi->nres = nres;
  ffi->nargs = nargs;
  ffi->vararg_p = !!vararg_p;
-  ffi->res_types = (MIR_type_t *) ((char *) ffi + sizeof (struct ff_interface));
-  ffi->arg_types = ffi->res_types + nres;
+  ffi->arg_descs = (_MIR_arg_desc_t *) ((char *) ffi + sizeof (struct ff_interface));
+  ffi->res_types = (MIR_type_t *) ((char *) ffi->arg_descs + nargs * sizeof (_MIR_arg_desc_t));
  memcpy (ffi->res_types, res_types, sizeof (MIR_type_t) * nres);
-  memcpy (ffi->arg_types, arg_types, sizeof (MIR_type_t) * nargs);
-  ffi->interface_addr = _MIR_get_ff_call (ctx, nres, res_types, nargs, call_arg_types, vararg_p);
+  memcpy (ffi->arg_descs, arg_descs, sizeof (_MIR_arg_desc_t) * nargs);
+  ffi->interface_addr = _MIR_get_ff_call (ctx, nres, res_types, nargs, call_arg_descs, vararg_p);
  htab_res = HTAB_DO (ff_interface_t, ff_interface_tab, ffi, HTAB_INSERT, tab_ffi);
  mir_assert (!htab_res && ffi == tab_ffi);
  return ffi->interface_addr;
@ -1404,27 +1480,34 @@ static void call (MIR_context_t ctx, MIR_val_t *bp, MIR_op_t *insn_arg_ops, code
  }
  nres = proto->nres;
  if (VARR_EXPAND (MIR_val_t, call_res_args_varr, nargs + nres)
-      || VARR_EXPAND (MIR_type_t, call_arg_types_varr, nargs + nres)) {
+      || VARR_EXPAND (_MIR_arg_desc_t, call_arg_descs_varr, nargs)) {
    call_res_args = VARR_ADDR (MIR_val_t, call_res_args_varr);
-    call_arg_types = VARR_ADDR (MIR_type_t, call_arg_types_varr);
+    call_arg_descs = VARR_ADDR (_MIR_arg_desc_t, call_arg_descs_varr);
  }
  if ((ff_interface_addr = ffi_address_ptr->a) == NULL) {
    for (i = 0; i < nargs; i++) {
      if (i < arg_vars_num) {
-        call_arg_types[i] = arg_vars[i].type;
+        call_arg_descs[i].type = arg_vars[i].type;
+        if (MIR_all_blk_type_p (arg_vars[i].type)) call_arg_descs[i].size = arg_vars[i].size;
        continue;
      }
-      mode = insn_arg_ops[i].value_mode;
-      mir_assert (mode == MIR_OP_INT || mode == MIR_OP_UINT || mode == MIR_OP_FLOAT
-                  || mode == MIR_OP_DOUBLE || mode == MIR_OP_LDOUBLE);
-      if (mode == MIR_OP_FLOAT)
-        (*MIR_get_error_func (ctx)) (MIR_call_op_error,
-                                     "passing float variadic arg (should be passed as double)");
-      call_arg_types[i]
-        = (mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64);
+      if (insn_arg_ops[i].mode == MIR_OP_MEM) { /* (r)block arg */
+        mir_assert (MIR_all_blk_type_p (insn_arg_ops[i].u.mem.type));
+        call_arg_descs[i].type = insn_arg_ops[i].u.mem.type;
+        call_arg_descs[i].size = insn_arg_ops[i].u.mem.disp;
+      } else {
+        mode = insn_arg_ops[i].value_mode;
+        mir_assert (mode == MIR_OP_INT || mode == MIR_OP_UINT || mode == MIR_OP_FLOAT
+                    || mode == MIR_OP_DOUBLE || mode == MIR_OP_LDOUBLE);
+        if (mode == MIR_OP_FLOAT)
+          (*MIR_get_error_func (ctx)) (MIR_call_op_error,
+                                       "passing float variadic arg (should be passed as double)");
+        call_arg_descs[i].type
+          = (mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64);
+      }
    }
    ff_interface_addr = ffi_address_ptr->a
-      = get_ff_interface (ctx, nres, proto->res_types, nargs, call_arg_types, proto->vararg_p);
+      = get_ff_interface (ctx, nres, proto->res_types, nargs, call_arg_descs, proto->vararg_p);
  }

  for (i = 0; i < nargs; i++) {
@ -1446,7 +1529,10 @@ static void call (MIR_context_t ctx, MIR_val_t *bp, MIR_op_t *insn_arg_ops, code
    case MIR_T_D: call_res_args[i + nres].d = arg_vals[i].d; break;
    case MIR_T_LD: call_res_args[i + nres].ld = arg_vals[i].ld; break;
    case MIR_T_P: call_res_args[i + nres].u = (uint64_t) arg_vals[i].a; break;
-    default: mir_assert (FALSE);
+    default:
+      mir_assert (MIR_all_blk_type_p (type));
+      call_res_args[i + nres].u = (uint64_t) arg_vals[i].a;
+      break;
    }
  }
  ((void (*) (void *, void *)) ff_interface_addr) (addr, call_res_args); /* call */
@ -1474,7 +1560,7 @@ static void interp_init (MIR_context_t ctx) {
  struct interp_ctx *interp_ctx;

  if ((interp_ctx = ctx->interp_ctx = malloc (sizeof (struct interp_ctx))) == NULL)
-    (*error_func) (MIR_alloc_error, "Not enough memory for ctx");
+    MIR_get_error_func (ctx) (MIR_alloc_error, "Not enough memory for ctx");
 #if DIRECT_THREADED_DISPATCH
  eval (ctx, NULL, NULL, NULL);
 #endif
@ -1483,9 +1569,9 @@ static void interp_init (MIR_context_t ctx) {
  VARR_CREATE (MIR_val_t, arg_vals_varr, 0);
  arg_vals = VARR_ADDR (MIR_val_t, arg_vals_varr);
  VARR_CREATE (MIR_val_t, call_res_args_varr, 0);
-  VARR_CREATE (MIR_type_t, call_arg_types_varr, 0);
+  VARR_CREATE (_MIR_arg_desc_t, call_arg_descs_varr, 0);
  call_res_args = VARR_ADDR (MIR_val_t, call_res_args_varr);
-  call_arg_types = VARR_ADDR (MIR_type_t, call_arg_types_varr);
+  call_arg_descs = VARR_ADDR (_MIR_arg_desc_t, call_arg_descs_varr);
  HTAB_CREATE_WITH_FREE_FUNC (ff_interface_t, ff_interface_tab, 1000, ff_interface_hash,
                              ff_interface_eq, ff_interface_clear, NULL);
 #if MIR_INTERP_TRACE
@ -1502,7 +1588,7 @@ static void interp_finish (MIR_context_t ctx) {
  VARR_DESTROY (MIR_val_t, code_varr);
  VARR_DESTROY (MIR_val_t, arg_vals_varr);
  VARR_DESTROY (MIR_val_t, call_res_args_varr);
-  VARR_DESTROY (MIR_type_t, call_arg_types_varr);
+  VARR_DESTROY (_MIR_arg_desc_t, call_arg_descs_varr);
  HTAB_DESTROY (ff_interface_t, ff_interface_tab);
  /* Clear func descs???  */
  free (ctx->interp_ctx);
@ -1619,8 +1705,15 @@ static void interp (MIR_context_t ctx, MIR_item_t func_item, va_list va, MIR_val
    }
    case MIR_T_D: arg_vals[i].d = va_arg (va, double); break;
    case MIR_T_LD: arg_vals[i].ld = va_arg (va, long double); break;
-    case MIR_T_P: arg_vals[i].a = va_arg (va, void *); break;
-    default: mir_assert (FALSE);
+    case MIR_T_P:
+    case MIR_T_RBLK: arg_vals[i].a = va_arg (va, void *); break;
+    default: mir_assert (MIR_blk_type_p (type)); arg_vals[i].a = alloca (arg_vars[i].size);
+#if defined(__PPC64__) || defined(__aarch64__) || defined(_WIN32)
+      va_block_arg_builtin (arg_vals[i].a, &va, arg_vars[i].size, type - MIR_T_BLK);
+#else
+          va_block_arg_builtin (arg_vals[i].a, va, arg_vars[i].size, type - MIR_T_BLK);
+#endif
+      break;
    }
  }
 #if VA_LIST_IS_ARRAY_P
@ -1635,5 +1728,7 @@ static void redirect_interface_to_interp (MIR_context_t ctx, MIR_item_t func_ite
 }

 void MIR_set_interp_interface (MIR_context_t ctx, MIR_item_t func_item) {
-  redirect_interface_to_interp (ctx, func_item);
+  if (func_item != NULL) redirect_interface_to_interp (ctx, func_item);
 }
+
+#endif /* #ifdef MIR_NO_INTERP */
--- a/mir/mir-ppc64.c
+++ b/mir/mir-ppc64.c
@ -1,8 +1,10 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

-// _MIR_get_thunk, _MIR_redirect_thunk, _MIR_get_interp_shim, _MIR_get_ff_call, _MIR_get_wrapper
+/* All BLK type values is passed in int regs, and if the regs are not enough, the rest is passed on
+   the stack. RBLK is always passed by address.  */
+
 #define VA_LIST_IS_ARRAY_P 1 /* one element which is a pointer to args */

 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@ -15,13 +17,13 @@
 #define PPC64_FUNC_DESC_LEN 24
 #endif

-static void ppc64_push_func_desc (MIR_context_t ctx);
-void (*ppc64_func_desc) (MIR_context_t ctx) = ppc64_push_func_desc;
+static void ppc64_push_func_desc (VARR (uint8_t) * *insn_varr);
+void (*ppc64_func_desc) (VARR (uint8_t) * *insn_varr) = ppc64_push_func_desc;

-static void ppc64_push_func_desc (MIR_context_t ctx) {
-  VARR_TRUNC (uint8_t, machine_insns, 0);
+static void ppc64_push_func_desc (VARR (uint8_t) * *insn_varr) {
+  VARR_CREATE (uint8_t, *insn_varr, 128);
  for (int i = 0; i < PPC64_FUNC_DESC_LEN; i++)
-    VARR_PUSH (uint8_t, machine_insns, ((uint8_t *) ppc64_func_desc)[i]);
+    VARR_PUSH (uint8_t, *insn_varr, ((uint8_t *) ppc64_func_desc)[i]);
 }

 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
@ -31,89 +33,120 @@ static void ppc64_redirect_func_desc (MIR_context_t ctx, void *desc, void *to) {
 }
 #endif

-static void *ppc64_publish_func_and_redirect (MIR_context_t ctx) {
-  void *res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                                 VARR_LENGTH (uint8_t, machine_insns));
+static void *ppc64_publish_func_and_redirect (MIR_context_t ctx, VARR (uint8_t) * insn_varr) {
+  void *res
+    = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, insn_varr), VARR_LENGTH (uint8_t, insn_varr));
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  ppc64_redirect_func_desc (ctx, res, (uint8_t *) res + PPC64_FUNC_DESC_LEN);
 #endif
+  VARR_DESTROY (uint8_t, insn_varr);
  return res;
 }

-static void push_insn (MIR_context_t ctx, uint32_t insn) {
+static void push_insn (VARR (uint8_t) * insn_varr, uint32_t insn) {
  uint8_t *p = (uint8_t *) &insn;
-  for (size_t i = 0; i < 4; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
+  for (size_t i = 0; i < 4; i++) VARR_PUSH (uint8_t, insn_varr, p[i]);
 }

-static void push_insns (MIR_context_t ctx, const uint32_t *pat, size_t pat_len) {
+static void push_insns (VARR (uint8_t) * insn_varr, const uint32_t *pat, size_t pat_len) {
  uint8_t *p = (uint8_t *) pat;
-  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
+  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, insn_varr, p[i]);
 }

-static void ppc64_gen_mov (MIR_context_t ctx, unsigned to, unsigned from) {
+static void ppc64_gen_mov (VARR (uint8_t) * insn_varr, unsigned to, unsigned from) {
  /* or to,from,from: */
-  push_insn (ctx, (31 << 26) | (444 << 1) | (from << 21) | (to << 16) | (from << 11));
+  push_insn (insn_varr, (31 << 26) | (444 << 1) | (from << 21) | (to << 16) | (from << 11));
+}
+
+static void ppc64_gen_addi (VARR (uint8_t) * insn_varr, unsigned rt_reg, unsigned ra_reg,
+                            int disp) {
+  push_insn (insn_varr, (14 << 26) | (rt_reg << 21) | (ra_reg << 16) | (disp & 0xffff));
 }

-static void ppc64_gen_addi (MIR_context_t ctx, unsigned rt_reg, unsigned ra_reg, int disp) {
-  push_insn (ctx, (14 << 26) | (rt_reg << 21) | (ra_reg << 16) | (disp & 0xffff));
+static void ppc64_gen_add (VARR (uint8_t) * insn_varr, unsigned rt_reg, unsigned ra_reg,
+                           unsigned rb_reg) {
+  push_insn (insn_varr, (31 << 26) | (266 << 1) | (rt_reg << 21) | (ra_reg << 16) | (rb_reg << 11));
 }

-static void ppc64_gen_ld (MIR_context_t ctx, unsigned to, unsigned base, int disp,
+static void ppc64_gen_ld (VARR (uint8_t) * insn_varr, unsigned to, unsigned base, int disp,
                          MIR_type_t type) {
  int single_p = type == MIR_T_F;
  int double_p = type == MIR_T_D || type == MIR_T_LD;
  /* (ld | lf[sd]) to, disp(base): */
  assert (base != 0 && base < 32 && to < 32 && (single_p || double_p || (disp & 0x3) == 0));
-  push_insn (ctx, ((single_p ? 48 : double_p ? 50 : 58) << 26) | (to << 21) | (base << 16)
-                    | (disp & 0xffff));
+  push_insn (insn_varr, ((single_p ? 48 : double_p ? 50 : 58) << 26) | (to << 21) | (base << 16)
+                          | (disp & 0xffff));
 }

-static void ppc64_gen_st (MIR_context_t ctx, unsigned from, unsigned base, int disp,
+static void ppc64_gen_st (VARR (uint8_t) * insn_varr, unsigned from, unsigned base, int disp,
                          MIR_type_t type) {
  int single_p = type == MIR_T_F;
  int double_p = type == MIR_T_D || type == MIR_T_LD;
  /* std|stf[sd] from, disp(base): */
  assert (base != 0 && base < 32 && from < 32 && (single_p || double_p || (disp & 0x3) == 0));
-  push_insn (ctx, ((single_p ? 52 : double_p ? 54 : 62) << 26) | (from << 21) | (base << 16)
-                    | (disp & 0xffff));
+  push_insn (insn_varr, ((single_p ? 52 : double_p ? 54 : 62) << 26) | (from << 21) | (base << 16)
+                          | (disp & 0xffff));
 }

-static void ppc64_gen_stdu (MIR_context_t ctx, int disp) {
+static void ppc64_gen_stdu (VARR (uint8_t) * insn_varr, int disp) {
  assert ((disp & 0x3) == 0);
-  push_insn (ctx, 0xf8210001 | disp & 0xfffc); /* stdu 1, disp (1) */
+  push_insn (insn_varr, 0xf8210001 | disp & 0xfffc); /* stdu 1, disp (1) */
 }

-static void ppc64_gen_address (MIR_context_t ctx, unsigned int reg, void *p) {
+static void ppc64_gen_address (VARR (uint8_t) * insn_varr, unsigned int reg, void *p) {
  uint64_t a = (uint64_t) p;
  if ((a >> 32) == 0) {
    if (((a >> 31) & 1) == 0) { /* lis r,0,Z2 */
-      push_insn (ctx, (15 << 26) | (reg << 21) | (0 << 16) | (a >> 16) & 0xffff);
+      push_insn (insn_varr, (15 << 26) | (reg << 21) | (0 << 16) | (a >> 16) & 0xffff);
    } else { /* xor r,r,r; oris r,r,Z2 */
-      push_insn (ctx, (31 << 26) | (316 << 1) | (reg << 21) | (reg << 16) | (reg << 11));
-      push_insn (ctx, (25 << 26) | (reg << 21) | (reg << 16) | (a >> 16) & 0xffff);
+      push_insn (insn_varr, (31 << 26) | (316 << 1) | (reg << 21) | (reg << 16) | (reg << 11));
+      push_insn (insn_varr, (25 << 26) | (reg << 21) | (reg << 16) | (a >> 16) & 0xffff);
    }
  } else {
    /* lis r,0,Z0; ori r,r,Z1; rldicr r,r,32,31; oris r,r,Z2; ori r,r,Z3: */
-    push_insn (ctx, (15 << 26) | (reg << 21) | (0 << 16) | (a >> 48));
-    push_insn (ctx, (24 << 26) | (reg << 21) | (reg << 16) | (a >> 32) & 0xffff);
-    push_insn (ctx, (30 << 26) | (reg << 21) | (reg << 16) | 0x07c6);
-    push_insn (ctx, (25 << 26) | (reg << 21) | (reg << 16) | (a >> 16) & 0xffff);
+    push_insn (insn_varr, (15 << 26) | (reg << 21) | (0 << 16) | (a >> 48));
+    push_insn (insn_varr, (24 << 26) | (reg << 21) | (reg << 16) | (a >> 32) & 0xffff);
+    push_insn (insn_varr, (30 << 26) | (reg << 21) | (reg << 16) | 0x07c6);
+    push_insn (insn_varr, (25 << 26) | (reg << 21) | (reg << 16) | (a >> 16) & 0xffff);
  }
-  push_insn (ctx, (24 << 26) | (reg << 21) | (reg << 16) | a & 0xffff);
+  push_insn (insn_varr, (24 << 26) | (reg << 21) | (reg << 16) | a & 0xffff);
 }

-static void ppc64_gen_jump (MIR_context_t ctx, unsigned int reg, int call_p) {
+static void ppc64_gen_jump (VARR (uint8_t) * insn_varr, unsigned int reg, int call_p) {
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  assert (reg != 0);
-  ppc64_gen_ld (ctx, 0, reg, 0, MIR_T_I64);                         /* 0 = func addr */
-  ppc64_gen_ld (ctx, 2, reg, 8, MIR_T_I64);                         /* r2 = TOC */
-  push_insn (ctx, (31 << 26) | (467 << 1) | (0 << 21) | (9 << 16)); /* mctr 0 */
+  ppc64_gen_ld (insn_varr, 0, reg, 0, MIR_T_I64);                         /* 0 = func addr */
+  ppc64_gen_ld (insn_varr, 2, reg, 8, MIR_T_I64);                         /* r2 = TOC */
+  push_insn (insn_varr, (31 << 26) | (467 << 1) | (0 << 21) | (9 << 16)); /* mctr 0 */
 #else
-  if (reg != 12) ppc64_gen_mov (ctx, 12, reg);                       /* 12 = func addr */
-  push_insn (ctx, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
+  if (reg != 12) ppc64_gen_mov (insn_varr, 12, reg);                       /* 12 = func addr */
+  push_insn (insn_varr, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
 #endif
-  push_insn (ctx, (19 << 26) | (528 << 1) | (20 << 21) | (call_p ? 1 : 0)); /* bcctr[l] */
+  push_insn (insn_varr, (19 << 26) | (528 << 1) | (20 << 21) | (call_p ? 1 : 0)); /* bcctr[l] */
+}
+
+/* r11=addr_reg+addr_disp; r15=r1(sp)+sp_offset; r0=qwords-1;
+   ctr=r0; L: r0=mem[r11]; r11+=8; mem[r15]=r0; r15+=8; bdnz L; */
+static void gen_blk_mov (VARR (uint8_t) * insn_varr, size_t sp_offset, unsigned int addr_reg,
+                         int addr_disp, size_t qwords) {
+  static const uint32_t blk_mov_loop[] = {
+    /*0:*/ 0x7c0903a6,  /*mctr r0*/
+    /*4:*/ 0xe80b0000,  /*ld r0,0(r11)*/
+    /*8:*/ 0x396b0008,  /*addi r11,r11,8*/
+    /*12:*/ 0xf80f0000, /*std r0,0(r15)*/
+    /*16:*/ 0x39ef0008, /*addi r15,r15,8*/
+    /*20:*/ 0x4200fff0, /*bdnz 4*/
+  };
+  /* r11=addr_reg+addr_disp: */
+  if (addr_reg != 11 || addr_disp != 0) ppc64_gen_addi (insn_varr, 11, addr_reg, addr_disp);
+  if (sp_offset < 0x10000) {
+    ppc64_gen_addi (insn_varr, 15, 1, sp_offset);
+  } else {
+    ppc64_gen_address (insn_varr, 15, (void *) sp_offset);
+    ppc64_gen_add (insn_varr, 15, 15, 1);
+  }
+  ppc64_gen_address (insn_varr, 0, (void *) qwords); /*r0 = qwords*/
+  push_insns (insn_varr, blk_mov_loop, sizeof (blk_mov_loop));
 }

 void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
@ -121,9 +154,11 @@ void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
    0x7c230b78, /* mr 3,1 */
    0x4e800020, /* blr */
  };
-  ppc64_push_func_desc (ctx);
-  push_insns (ctx, bstart_code, sizeof (bstart_code));
-  return ppc64_publish_func_and_redirect (ctx);
+  VARR (uint8_t) * code;
+
+  ppc64_push_func_desc (&code);
+  push_insns (code, bstart_code, sizeof (bstart_code));
+  return ppc64_publish_func_and_redirect (ctx, code);
 }

 void *_MIR_get_bend_builtin (MIR_context_t ctx) {
@ -131,26 +166,33 @@ void *_MIR_get_bend_builtin (MIR_context_t ctx) {
    0x7c611b78, /* mr      r1,r3 */
    0x4e800020, /* blr */
  };
-  ppc64_push_func_desc (ctx);
-  ppc64_gen_ld (ctx, 0, 1, 0, MIR_T_I64);                /* r0 = 0(r1) */
-  ppc64_gen_st (ctx, 0, 3, 0, MIR_T_I64);                /* 0(r3) = r0 */
-  ppc64_gen_ld (ctx, 0, 1, PPC64_TOC_OFFSET, MIR_T_I64); /* r0 = toc_offset(r1) */
-  ppc64_gen_st (ctx, 0, 3, PPC64_TOC_OFFSET, MIR_T_I64); /* toc_offset(r3) = r0 */
-  push_insns (ctx, bend_finish_code, sizeof (bend_finish_code));
-  return ppc64_publish_func_and_redirect (ctx);
+  VARR (uint8_t) * code;
+
+  ppc64_push_func_desc (&code);
+  ppc64_gen_ld (code, 0, 1, 0, MIR_T_I64);                /* r0 = 0(r1) */
+  ppc64_gen_st (code, 0, 3, 0, MIR_T_I64);                /* 0(r3) = r0 */
+  ppc64_gen_ld (code, 0, 1, PPC64_TOC_OFFSET, MIR_T_I64); /* r0 = toc_offset(r1) */
+  ppc64_gen_st (code, 0, 3, PPC64_TOC_OFFSET, MIR_T_I64); /* toc_offset(r3) = r0 */
+  push_insns (code, bend_finish_code, sizeof (bend_finish_code));
+  return ppc64_publish_func_and_redirect (ctx, code);
 }

 void *_MIR_get_thunk (MIR_context_t ctx) { /* emit 3 doublewords for func descriptor: */
-  ppc64_push_func_desc (ctx);
+  VARR (uint8_t) * code;
+
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  return ppc64_publish_func_and_redirect (ctx);
+  ppc64_push_func_desc (&code);
+  return ppc64_publish_func_and_redirect (ctx, code);
 #else
-  const uint32_t nop_insn = 24 << (32 - 6);                          /* ori 0,0,0 */
+  const uint32_t nop_insn = 24 << (32 - 6);                                /* ori 0,0,0 */
  const int max_thunk_len = (7 * 8);
-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  for (int i = 0; i < max_thunk_len; i++) push_insn (ctx, nop_insn);
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  void *res;
+
+  VARR_CREATE (uint8_t, code, 128);
+  for (int i = 0; i < max_thunk_len; i++) push_insn (code, nop_insn);
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 #endif
 }

@ -162,11 +204,13 @@ void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
    0x7d8903a6, /* mtctr r12 */
    0x4e800420, /* bctr */
  };
-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  ppc64_gen_address (ctx, 12, to);
-  push_insns (ctx, global_entry_end, sizeof (global_entry_end));
-  _MIR_change_code (ctx, thunk, VARR_ADDR (uint8_t, machine_insns),
-                    VARR_LENGTH (uint8_t, machine_insns));
+  VARR (uint8_t) * code;
+
+  VARR_CREATE (uint8_t, code, 256);
+  ppc64_gen_address (code, 12, to);
+  push_insns (code, global_entry_end, sizeof (global_entry_end));
+  _MIR_change_code (ctx, thunk, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
 #endif
 }

@ -191,6 +235,13 @@ void *va_arg_builtin (void *p, uint64_t t) {
  return a;
 }

+void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t ncase) {
+  struct ppc64_va_list *va = p;
+  void *a = va->arg_area;
+  memcpy (res, a, s);
+  va->arg_area += (s + sizeof (uint64_t) - 1) / sizeof (uint64_t);
+}
+
 void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
  struct ppc64_va_list **va = p;
  va_list *vap = a;
@ -202,15 +253,13 @@ void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
 void va_end_interp_builtin (MIR_context_t ctx, void *p) {}

 /* Generation: fun (fun_addr, res_arg_addresses):
-   save lr (r1 + 16); allocate and form minimal stack frame (with necessary param area); save r14;
-   r12=fun_addr (r3); r14 = res_arg_addresses (r4);
-   r0=mem[r14,<args_offset>]; (arg_reg=mem[r0] or r0=mem[r0];mem[r1,r1_offset]=r0) ...
-   if func is vararg: put fp args also in gp regs
-   call *r12;
-   r0=mem[r14,<offset>]; res_reg=mem[r0]; ...
-   restore r14, r1, lr; return. */
+   save lr (r1 + 16); allocate and form minimal stack frame (with necessary param area); save
+   r14,r15; r12=fun_addr (r3); r14 = res_arg_addresses (r4); r0=mem[r14,<args_offset>];
+   (arg_reg=mem[r0] or r0=mem[r0];mem[r1,r1_offset]=r0) ... if func is vararg: put fp args also in
+   gp regs call *r12; r0=mem[r14,<offset>]; res_reg=mem[r0]; ... restore r15, r14, r1, lr; return.
+ */
 void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                        MIR_type_t *arg_types, int vararg_p) {
+                        _MIR_arg_desc_t *arg_descs, int vararg_p) {
  static uint32_t start_pattern[] = {
    0x7c0802a6, /* mflr r0 */
    0xf8010010, /* std  r0,16(r1) */
@ -221,98 +270,119 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
    0x4e800020, /* blr */
  };
  MIR_type_t type;
-  int n_gpregs = 0, n_fpregs = 0, res_reg = 14, frame_size, disp, param_offset, param_size = 0;
+  int n_gpregs = 0, n_fpregs = 0, res_reg = 14, qwords, frame_size;
+  int disp, blk_disp, param_offset, param_size = 0;
+  VARR (uint8_t) * code;

-  ppc64_push_func_desc (ctx);
-  for (uint32_t i = 0; i < nargs; i++) param_size += arg_types[i] == MIR_T_LD ? 16 : 8;
+  ppc64_push_func_desc (&code);
+  for (uint32_t i = 0; i < nargs; i++) {
+    type = arg_descs[i].type;
+    if (MIR_blk_type_p (type))
+      param_size += (arg_descs[i].size + 7) / 8 * 8;
+    else
+      param_size += type == MIR_T_LD ? 16 : 8;
+  }
  if (param_size < 64) param_size = 64;
-  frame_size = PPC64_STACK_HEADER_SIZE + param_size + 8; /* +local var to save res_reg */
-  if (frame_size % 16 != 0) frame_size += 8;             /* align */
-  ppc64_gen_st (ctx, 2, 1, PPC64_TOC_OFFSET, MIR_T_I64);
-  push_insns (ctx, start_pattern, sizeof (start_pattern));
-  ppc64_gen_stdu (ctx, -frame_size);
-  ppc64_gen_st (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
+  frame_size = PPC64_STACK_HEADER_SIZE + param_size + 16; /* +local var to save res_reg and 15 */
+  if (frame_size % 16 != 0) frame_size += 8;              /* align */
+  ppc64_gen_st (code, 2, 1, PPC64_TOC_OFFSET, MIR_T_I64);
+  push_insns (code, start_pattern, sizeof (start_pattern));
+  ppc64_gen_stdu (code, -frame_size);
+  ppc64_gen_st (code, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
                MIR_T_I64); /* save res_reg */
+  ppc64_gen_st (code, 15, 1, PPC64_STACK_HEADER_SIZE + param_size + 8, MIR_T_I64); /* save 15 */
  mir_assert (sizeof (long double) == 16);
-  ppc64_gen_mov (ctx, res_reg, 4); /* results & args */
-  ppc64_gen_mov (ctx, 12, 3);      /* func addr */
+  ppc64_gen_mov (code, res_reg, 4); /* results & args */
+  ppc64_gen_mov (code, 12, 3);      /* func addr */
  n_gpregs = n_fpregs = 0;
  param_offset = nres * 16;              /* args start */
  disp = PPC64_STACK_HEADER_SIZE;        /* param area start */
  for (uint32_t i = 0; i < nargs; i++) { /* load args: */
-    type = arg_types[i];
+    type = arg_descs[i].type;
    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 13) {
-      ppc64_gen_ld (ctx, 1 + n_fpregs, res_reg, param_offset, type);
+      ppc64_gen_ld (code, 1 + n_fpregs, res_reg, param_offset, type);
      if (vararg_p) {
        if (n_gpregs >= 8) {
-          ppc64_gen_st (ctx, 1 + n_fpregs, 1, disp, MIR_T_D);
+          ppc64_gen_st (code, 1 + n_fpregs, 1, disp, MIR_T_D);
        } else { /* load into gp reg too */
-          ppc64_gen_st (ctx, 1 + n_fpregs, 1, -8, MIR_T_D);
-          ppc64_gen_ld (ctx, 3 + n_gpregs, 1, -8, MIR_T_I64);
+          ppc64_gen_st (code, 1 + n_fpregs, 1, -8, MIR_T_D);
+          ppc64_gen_ld (code, 3 + n_gpregs, 1, -8, MIR_T_I64);
        }
      }
      n_fpregs++;
      if (type == MIR_T_LD) {
        if (n_fpregs < 13) {
-          ppc64_gen_ld (ctx, 1 + n_fpregs, res_reg, param_offset + 8, type);
+          ppc64_gen_ld (code, 1 + n_fpregs, res_reg, param_offset + 8, type);
          if (vararg_p) {
            if (n_gpregs + 1 >= 8) {
-              ppc64_gen_st (ctx, 1 + n_fpregs, 1, disp + 8, MIR_T_D);
+              ppc64_gen_st (code, 1 + n_fpregs, 1, disp + 8, MIR_T_D);
            } else { /* load gp reg to */
-              ppc64_gen_st (ctx, 1 + n_fpregs, 1, -8, MIR_T_D);
-              ppc64_gen_ld (ctx, 4 + n_gpregs, 1, -8, MIR_T_I64);
+              ppc64_gen_st (code, 1 + n_fpregs, 1, -8, MIR_T_D);
+              ppc64_gen_ld (code, 4 + n_gpregs, 1, -8, MIR_T_I64);
            }
          }
          n_fpregs++;
        } else {
-          ppc64_gen_ld (ctx, 0, res_reg, param_offset + 8, type);
-          ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+          ppc64_gen_ld (code, 0, res_reg, param_offset + 8, type);
+          ppc64_gen_st (code, 0, 1, disp + 8, MIR_T_D);
        }
      }
    } else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
-      ppc64_gen_ld (ctx, 0, res_reg, param_offset, type);
-      ppc64_gen_st (ctx, 0, 1, disp, MIR_T_D);
+      ppc64_gen_ld (code, 0, res_reg, param_offset, type);
+      ppc64_gen_st (code, 0, 1, disp, MIR_T_D);
      if (type == MIR_T_LD) {
-        ppc64_gen_ld (ctx, 0, res_reg, param_offset + 8, type);
-        ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+        ppc64_gen_ld (code, 0, res_reg, param_offset + 8, type);
+        ppc64_gen_st (code, 0, 1, disp + 8, MIR_T_D);
      }
-    } else if (n_gpregs < 8) {
-      ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, param_offset, MIR_T_I64);
+    } else if (MIR_blk_type_p (type)) {
+      qwords = (arg_descs[i].size + 7) / 8;
+      if (qwords > 0) ppc64_gen_ld (code, 11, res_reg, param_offset, MIR_T_I64);
+      for (blk_disp = 0; qwords > 0 && n_gpregs < 8; qwords--, n_gpregs++, blk_disp += 8, disp += 8)
+        ppc64_gen_ld (code, n_gpregs + 3, 11, blk_disp, MIR_T_I64);
+      if (qwords > 0) gen_blk_mov (code, disp, 11, blk_disp, qwords);
+      disp += qwords * 8;
+      param_offset += 16;
+      continue;
+    } else if (n_gpregs < 8) { /* including RBLK */
+      ppc64_gen_ld (code, n_gpregs + 3, res_reg, param_offset, MIR_T_I64);
    } else {
-      ppc64_gen_ld (ctx, 0, res_reg, param_offset, MIR_T_I64);
-      ppc64_gen_st (ctx, 0, 1, disp, MIR_T_I64);
+      ppc64_gen_ld (code, 0, res_reg, param_offset, MIR_T_I64);
+      ppc64_gen_st (code, 0, 1, disp, MIR_T_I64);
    }
    disp += type == MIR_T_LD ? 16 : 8;
    param_offset += 16;
    n_gpregs += type == MIR_T_LD ? 2 : 1;
  }
-  ppc64_gen_jump (ctx, 12, TRUE); /* call func_addr */
+  ppc64_gen_jump (code, 12, TRUE); /* call func_addr */
  n_gpregs = n_fpregs = 0;
  disp = 0;
  for (uint32_t i = 0; i < nres; i++) {
    type = res_types[i];
-    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 4) {
-      ppc64_gen_st (ctx, n_fpregs + 1, res_reg, disp, type);
+    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 8) {
+      ppc64_gen_st (code, n_fpregs + 1, res_reg, disp, type);
      n_fpregs++;
      if (type == MIR_T_LD) {
-        if (n_fpregs >= 4)
-          (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
-        ppc64_gen_st (ctx, n_fpregs + 1, res_reg, disp + 8, type);
+        if (n_fpregs >= 8)
+          MIR_get_error_func (ctx) (MIR_ret_error,
+                                    "ppc64 can not handle this combination of return values");
+        ppc64_gen_st (code, n_fpregs + 1, res_reg, disp + 8, type);
        n_fpregs++;
      }
-    } else if (n_gpregs < 1) {  // just one gp reg
-      ppc64_gen_st (ctx, n_gpregs + 3, res_reg, disp, MIR_T_I64);
+    } else if (n_gpregs < 2) {  // just one-two gp reg
+      ppc64_gen_st (code, n_gpregs + 3, res_reg, disp, MIR_T_I64);
      n_gpregs++;
    } else {
-      (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "ppc64 can not handle this combination of return values");
    }
    disp += 16;
  }
-  ppc64_gen_ld (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
+  ppc64_gen_ld (code, res_reg, 1, PPC64_STACK_HEADER_SIZE + param_size,
                MIR_T_I64); /* restore res_reg */
-  ppc64_gen_addi (ctx, 1, 1, frame_size);
-  push_insns (ctx, finish_pattern, sizeof (finish_pattern));
-  return ppc64_publish_func_and_redirect (ctx);
+  ppc64_gen_ld (code, 15, 1, PPC64_STACK_HEADER_SIZE + param_size + 8, MIR_T_I64); /* restore r15 */
+  ppc64_gen_addi (code, 1, 1, frame_size);
+  push_insns (code, finish_pattern, sizeof (finish_pattern));
+  return ppc64_publish_func_and_redirect (ctx, code);
 }

 /* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
@ -326,8 +396,8 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
  int vararg_p = func->vararg_p;
  MIR_type_t type, *res_types = func->res_types;
  MIR_var_t *arg_vars = VARR_ADDR (MIR_var_t, func->vars);
-  int disp, size, frame_size, local_var_size, param_offset, va_reg = 11, caller_r1 = 12,
-                                                            res_reg = 14;
+  int disp, start_disp, qwords, size, frame_size, local_var_size, param_offset;
+  int va_reg = 11, caller_r1 = 12, res_reg = 14;
  int n_gpregs, n_fpregs;
  static uint32_t start_pattern[] = {
    0x7c0802a6, /* mflr r0 */
@ -338,97 +408,117 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
    0x7c0803a6, /* mtlr r0 */
    0x4e800020, /* blr */
  };
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
+  VARR_CREATE (uint8_t, code, 256);
  frame_size = PPC64_STACK_HEADER_SIZE + 64; /* header + 8(param area) */
-  local_var_size = nres * 16 + 8;            /* saved r14, results */
+  local_var_size = nres * 16 + 16;           /* saved r14, r15, results */
  if (vararg_p) {
    for (unsigned reg = 3; reg <= 10; reg++) /* std rn,dispn(r1) : */
-      ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8, MIR_T_I64);
-    ppc64_gen_addi (ctx, va_reg, 1, PPC64_STACK_HEADER_SIZE);
+      ppc64_gen_st (code, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8, MIR_T_I64);
+    ppc64_gen_addi (code, va_reg, 1, PPC64_STACK_HEADER_SIZE);
  } else {
-    ppc64_gen_mov (ctx, caller_r1, 1); /* caller frame r1 */
+    ppc64_gen_mov (code, caller_r1, 1); /* caller frame r1 */
    for (uint32_t i = 0; i < nargs; i++) {
      type = arg_vars[i].type;
-      local_var_size += type == MIR_T_LD ? 16 : 8;
+      if (MIR_blk_type_p (type))
+        local_var_size += (arg_vars[i].size + 7) / 8 * 8;
+      else
+        local_var_size += type == MIR_T_LD ? 16 : 8;
    }
  }
  frame_size += local_var_size;
  if (frame_size % 16 != 0) frame_size += 8; /* align */
-  push_insns (ctx, start_pattern, sizeof (start_pattern));
-  ppc64_gen_stdu (ctx, -frame_size);
-  ppc64_gen_st (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* save res_reg */
+  push_insns (code, start_pattern, sizeof (start_pattern));
+  ppc64_gen_stdu (code, -frame_size);
+  ppc64_gen_st (code, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* save res_reg */
+  ppc64_gen_st (code, 15, 1, PPC64_STACK_HEADER_SIZE + 72, MIR_T_I64);      /* save r15 */
  if (!vararg_p) { /* save args in local vars: */
-    /* header_size + 64 + nres * 16 + 8 -- start of stack memory to keep args: */
-    disp = PPC64_STACK_HEADER_SIZE + 64 + nres * 16 + 8;
-    ppc64_gen_addi (ctx, va_reg, 1, disp);
+    /* header_size + 64 + nres * 16 + 16 -- start of stack memory to keep args: */
+    start_disp = disp = PPC64_STACK_HEADER_SIZE + 64 + nres * 16 + 16;
    param_offset = PPC64_STACK_HEADER_SIZE;
    n_gpregs = n_fpregs = 0;
    for (uint32_t i = 0; i < nargs; i++) {
      type = arg_vars[i].type;
      if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 13) {
-        ppc64_gen_st (ctx, n_fpregs + 1, 1, disp, MIR_T_D);
+        ppc64_gen_st (code, n_fpregs + 1, 1, disp, MIR_T_D);
        n_fpregs++;
        if (type == MIR_T_LD) {
          if (n_fpregs < 13) {
-            ppc64_gen_st (ctx, n_fpregs + 1, 1, disp + 8, MIR_T_D);
+            ppc64_gen_st (code, n_fpregs + 1, 1, disp + 8, MIR_T_D);
            n_fpregs++;
          } else {
-            ppc64_gen_ld (ctx, 0, caller_r1, param_offset + 8, MIR_T_D);
-            ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+            ppc64_gen_ld (code, 0, caller_r1, param_offset + 8, MIR_T_D);
+            ppc64_gen_st (code, 0, 1, disp + 8, MIR_T_D);
          }
        }
+      } else if (MIR_blk_type_p (type)) {
+        qwords = (arg_vars[i].size + 7) / 8;
+        for (; qwords > 0 && n_gpregs < 8; qwords--, n_gpregs++, disp += 8, param_offset += 8)
+          ppc64_gen_st (code, n_gpregs + 3, 1, disp, MIR_T_I64);
+        if (qwords > 0) {
+          gen_blk_mov (code, disp, caller_r1, param_offset, qwords);
+          disp += qwords * 8;
+          param_offset += qwords * 8;
+        }
+        continue;
      } else if (n_gpregs < 8) {
-        ppc64_gen_st (ctx, n_gpregs + 3, 1, disp, MIR_T_I64);
+        ppc64_gen_st (code, n_gpregs + 3, 1, disp, MIR_T_I64);
      } else if (type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) {
-        ppc64_gen_ld (ctx, 0, caller_r1, param_offset + (type == MIR_T_F ? 4 : 0), type);
-        ppc64_gen_st (ctx, 0, 1, disp, MIR_T_D);
+        ppc64_gen_ld (code, 0, caller_r1, param_offset + (type == MIR_T_F ? 4 : 0), type);
+        ppc64_gen_st (code, 0, 1, disp, MIR_T_D);
        if (type == MIR_T_LD) {
-          ppc64_gen_ld (ctx, 0, caller_r1, param_offset + 8, MIR_T_D);
-          ppc64_gen_st (ctx, 0, 1, disp + 8, MIR_T_D);
+          ppc64_gen_ld (code, 0, caller_r1, param_offset + 8, MIR_T_D);
+          ppc64_gen_st (code, 0, 1, disp + 8, MIR_T_D);
        }
      } else {
-        ppc64_gen_ld (ctx, 0, caller_r1, param_offset, MIR_T_I64);
-        ppc64_gen_st (ctx, 0, 1, disp, MIR_T_I64);
+        ppc64_gen_ld (code, 0, caller_r1, param_offset, MIR_T_I64);
+        ppc64_gen_st (code, 0, 1, disp, MIR_T_I64);
      }
      size = type == MIR_T_LD ? 16 : 8;
      disp += size;
      param_offset += size;
      n_gpregs += type == MIR_T_LD ? 2 : 1;
    }
+    ppc64_gen_addi (code, va_reg, 1, start_disp);
  }
-  ppc64_gen_addi (ctx, res_reg, 1, 64 + PPC64_STACK_HEADER_SIZE + 8);
-  ppc64_gen_address (ctx, 3, ctx);
-  ppc64_gen_address (ctx, 4, func_item);
-  ppc64_gen_mov (ctx, 5, va_reg);
-  ppc64_gen_mov (ctx, 6, res_reg);
-  ppc64_gen_address (ctx, 12, handler);
-  ppc64_gen_jump (ctx, 12, TRUE);
+  ppc64_gen_addi (code, res_reg, 1, 64 + PPC64_STACK_HEADER_SIZE + 16);
+  ppc64_gen_address (code, 3, ctx);
+  ppc64_gen_address (code, 4, func_item);
+  ppc64_gen_mov (code, 5, va_reg);
+  ppc64_gen_mov (code, 6, res_reg);
+  ppc64_gen_address (code, 12, handler);
+  ppc64_gen_jump (code, 12, TRUE);
  disp = n_gpregs = n_fpregs = 0;
  for (uint32_t i = 0; i < nres; i++) {
    type = res_types[i];
-    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 4) {
-      ppc64_gen_ld (ctx, n_fpregs + 1, res_reg, disp, type);
+    if ((type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD) && n_fpregs < 8) {
+      ppc64_gen_ld (code, n_fpregs + 1, res_reg, disp, type);
      n_fpregs++;
      if (type == MIR_T_LD) {
-        if (n_fpregs >= 4)
-          (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
-        ppc64_gen_ld (ctx, n_fpregs + 1, res_reg, disp + 8, type);
+        if (n_fpregs >= 8)
+          MIR_get_error_func (ctx) (MIR_ret_error,
+                                    "ppc64 can not handle this combination of return values");
+        ppc64_gen_ld (code, n_fpregs + 1, res_reg, disp + 8, type);
        n_fpregs++;
      }
-    } else if (n_gpregs < 1) {  // just one gp reg
-      ppc64_gen_ld (ctx, n_gpregs + 3, res_reg, disp, MIR_T_I64);
+    } else if (n_gpregs < 2) {  // just one-two gp reg
+      ppc64_gen_ld (code, n_gpregs + 3, res_reg, disp, MIR_T_I64);
      n_gpregs++;
    } else {
-      (*error_func) (MIR_ret_error, "ppc64 can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "ppc64 can not handle this combination of return values");
    }
    disp += 16;
  }
-  ppc64_gen_ld (ctx, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* restore res_reg */
-  ppc64_gen_addi (ctx, 1, 1, frame_size);
-  push_insns (ctx, finish_pattern, sizeof (finish_pattern));
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  ppc64_gen_ld (code, res_reg, 1, PPC64_STACK_HEADER_SIZE + 64, MIR_T_I64); /* restore res_reg */
+  ppc64_gen_ld (code, 15, 1, PPC64_STACK_HEADER_SIZE + 72, MIR_T_I64);      /* restore r15 */
+  ppc64_gen_addi (code, 1, 1, frame_size);
+  push_insns (code, finish_pattern, sizeof (finish_pattern));
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 /* Brief: save lr (r1+16); update r1, save all param regs (r1+header+64);
@ -445,29 +535,32 @@ void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_ad
    0x7c0803a6, /* mtlr r0 */
  };
  int frame_size = PPC64_STACK_HEADER_SIZE + 8 * 8 + 13 * 8 + 8 * 8;
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  push_insns (ctx, prologue, sizeof (prologue));
+  VARR_CREATE (uint8_t, code, 256);
+  push_insns (code, prologue, sizeof (prologue));
  /* stdu r1,n(r1): header + 8(gp args) + 13(fp args) + 8(param area): */
  if (frame_size % 16 != 0) frame_size += 8;
-  ppc64_gen_stdu (ctx, -frame_size);
+  ppc64_gen_stdu (code, -frame_size);
  for (unsigned reg = 3; reg <= 10; reg++) /* std rn,dispn(r1) : */
-    ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
+    ppc64_gen_st (code, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
  for (unsigned reg = 1; reg <= 13; reg++) /* stfd fn,dispn(r1) : */
-    ppc64_gen_st (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
-  ppc64_gen_address (ctx, 3, ctx);
-  ppc64_gen_address (ctx, 4, called_func);
-  ppc64_gen_address (ctx, 12, hook_address);
-  ppc64_gen_jump (ctx, 12, TRUE);
-  ppc64_gen_mov (ctx, 12, 3);
+    ppc64_gen_st (code, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
+  ppc64_gen_address (code, 3, ctx);
+  ppc64_gen_address (code, 4, called_func);
+  ppc64_gen_address (code, 12, hook_address);
+  ppc64_gen_jump (code, 12, TRUE);
+  ppc64_gen_mov (code, 12, 3);
  for (unsigned reg = 3; reg <= 10; reg++) /* ld rn,dispn(r1) : */
-    ppc64_gen_ld (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
+    ppc64_gen_ld (code, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 3) * 8 + 64, MIR_T_I64);
  for (unsigned reg = 1; reg <= 13; reg++) /* lfd fn,dispn(r1) : */
-    ppc64_gen_ld (ctx, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
-  ppc64_gen_addi (ctx, 1, 1, frame_size);
-  push_insns (ctx, epilogue, sizeof (epilogue));
-  push_insn (ctx, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
-  push_insn (ctx, (19 << 26) | (528 << 1) | (20 << 21));             /* bcctr */
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+    ppc64_gen_ld (code, reg, 1, PPC64_STACK_HEADER_SIZE + (reg - 1 + 8) * 8 + 64, MIR_T_D);
+  ppc64_gen_addi (code, 1, 1, frame_size);
+  push_insns (code, epilogue, sizeof (epilogue));
+  push_insn (code, (31 << 26) | (467 << 1) | (12 << 21) | (9 << 16)); /* mctr 12 */
+  push_insn (code, (19 << 26) | (528 << 1) | (20 << 21));             /* bcctr */
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }
--- a/mir/mir-reduce.h
+++ b/mir/mir-reduce.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #ifndef MIR_REDUCE_H
--- a/mir/mir-s390x.c
+++ b/mir/mir-s390x.c
@ -1,9 +1,11 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 /* Long doubles (-mlong-double=128) are always passed by its address (for args and results) */

+/* All BLK type values and RBLK args are always passed by address.  */
+
 #if 0 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #error "s390x works only in BE mode"
 #endif
@ -12,24 +14,25 @@

 #define S390X_STACK_HEADER_SIZE 160

-static void push_insns (MIR_context_t ctx, const uint8_t *pat, size_t pat_len) {
-  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, pat[i]);
+static uint8_t *push_insns (VARR (uint8_t) * insn_varr, const uint8_t *pat, size_t pat_len) {
+  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, insn_varr, pat[i]);
+  return VARR_ADDR (uint8_t, insn_varr) + VARR_LENGTH (uint8_t, insn_varr) - pat_len;
 }

-static void s390x_gen_mov (MIR_context_t ctx, unsigned to, unsigned from) {
+static void s390x_gen_mov (VARR (uint8_t) * insn_varr, unsigned to, unsigned from) {
  uint32_t lgr = (0xb904 << 16) | (to << 4) | from; /* lgr to,from: */
  assert (to < 16 && from < 16);
-  push_insns (ctx, (uint8_t *) &lgr, 4);
+  push_insns (insn_varr, (uint8_t *) &lgr, 4);
 }

-static void s390x_gen_mvi (MIR_context_t ctx, int val, unsigned base, int disp) {
+static void s390x_gen_mvi (VARR (uint8_t) * insn_varr, int val, unsigned base, int disp) {
  uint64_t mvghi /* mvghi disp(base), val: */
    = ((0xe548l << 32) | ((uint64_t) base << 28) | ((disp & 0xfff) << 16) | (val & 0xffff)) << 16;
  assert (base < 16 && 0 <= disp && disp < (1 << 12) && -(1 << 15) < val && val < (1 << 15));
-  push_insns (ctx, (uint8_t *) &mvghi, 6);
+  push_insns (insn_varr, (uint8_t *) &mvghi, 6);
 }

-static void s390x_gen_ld_st (MIR_context_t ctx, unsigned reg, unsigned base, int disp,
+static void s390x_gen_ld_st (VARR (uint8_t) * insn_varr, unsigned reg, unsigned base, int disp,
                             MIR_type_t type, int ld_p) {
  int single_p = type == MIR_T_F;
  int double_p = type == MIR_T_D;
@ -51,112 +54,146 @@ static void s390x_gen_ld_st (MIR_context_t ctx, unsigned reg, unsigned base, int
  uint64_t dy = ((0xedl << 40) | common | (ld_p ? 0x65 : 0x67)) << 16;
  /* (lg|lgf|llgf|lgb|llgc|lhy|llgh|ley|ldy|stg|sty|sthy|stcy|stey|stdy) reg, disp(base): */
  assert (type != MIR_T_LD && reg < 16 && base < 16 && -(1 << 19) < disp && disp < (1 << 19));
-  push_insns (ctx, (uint8_t *) (single_p ? &ey : double_p ? &dy : &g), 6);
+  push_insns (insn_varr, (uint8_t *) (single_p ? &ey : double_p ? &dy : &g), 6);
 }

-static void s390x_gen_ld (MIR_context_t ctx, unsigned to, unsigned base, int disp,
+static void s390x_gen_ld (VARR (uint8_t) * insn_varr, unsigned to, unsigned base, int disp,
                          MIR_type_t type) {
-  s390x_gen_ld_st (ctx, to, base, disp, type, TRUE);
+  s390x_gen_ld_st (insn_varr, to, base, disp, type, TRUE);
 }

-static void s390x_gen_st (MIR_context_t ctx, unsigned from, unsigned base, int disp,
+static void s390x_gen_st (VARR (uint8_t) * insn_varr, unsigned from, unsigned base, int disp,
                          MIR_type_t type) {
-  s390x_gen_ld_st (ctx, from, base, disp, type, FALSE);
+  s390x_gen_ld_st (insn_varr, from, base, disp, type, FALSE);
 }

-static void s390x_gen_ldstm (MIR_context_t ctx, unsigned from, unsigned to, unsigned base, int disp,
-                             int ld_p) {
+static void s390x_gen_ldstm (VARR (uint8_t) * insn_varr, unsigned from, unsigned to, unsigned base,
+                             int disp, int ld_p) {
  uint64_t dl = disp & 0xfff, dh = (disp >> 12) & 0xff;
  uint64_t common = ((uint64_t) from << 36) | ((uint64_t) to << 32) | ((uint64_t) base << 28)
                    | (dl << 16) | (dh << 8);
  uint64_t g = ((0xebl << 40) | common | (ld_p ? 0x4 : 0x24)) << 16;
  /* (lmg|stmg) from,to,disp(base): */
  assert (from < 16 && to < 16 && base < 16 && -(1 << 19) < disp && disp < (1 << 19));
-  push_insns (ctx, (uint8_t *) &g, 6);
+  push_insns (insn_varr, (uint8_t *) &g, 6);
 }

-static void s390x_gen_jump (MIR_context_t ctx, unsigned int reg, int call_p) {
+static void s390x_gen_jump (VARR (uint8_t) * insn_varr, unsigned int reg, int call_p) {
  uint16_t bcr = (0x7 << 8) | (15 << 4) | reg;  /* bcr 15,reg: */
  uint16_t balr = (0x5 << 8) | (14 << 4) | reg; /* balr 14,reg: */
  assert (reg < 16);
-  push_insns (ctx, (uint8_t *) (call_p ? &balr : &bcr), 2);
+  push_insns (insn_varr, (uint8_t *) (call_p ? &balr : &bcr), 2);
 }

-static void s390x_gen_addi (MIR_context_t ctx, unsigned dst, unsigned src, int disp) {
+static void s390x_gen_addi (VARR (uint8_t) * insn_varr, unsigned dst, unsigned src, int disp) {
  uint64_t dl = disp & 0xfff, dh = (disp >> 12) & 0xff;
  uint64_t ops = ((uint64_t) dst << 36) | ((uint64_t) src << 28) | (dl << 16) | (dh << 8);
  uint64_t lay = ((0xe3l << 40) | ops | 0x71) << 16; /* lay dst,disp(src) */
  assert (dst < 16 && src < 16 && -(1 << 19) < disp && disp < (1 << 19));
-  push_insns (ctx, (uint8_t *) &lay, 6);
+  push_insns (insn_varr, (uint8_t *) &lay, 6);
 }

-static void s390x_gen_3addrs (MIR_context_t ctx, unsigned int r1, void *a1, unsigned int r2,
-                              void *a2, unsigned int r3, void *a3) {
+static void s390x_gen_3addrs (VARR (uint8_t) * insn_varr, unsigned int r1, void *a1,
+                              unsigned int r2, void *a2, unsigned int r3, void *a3) {
  /* 6b:lalr r3,22+align;6b:lg r1,0(r3);6b:lg r2,8(r3);6b:lg r3,16(r3);4b:bc m15,28;align;a1-a3 */
-  size_t rem = (VARR_LENGTH (uint8_t, machine_insns) + 28) % 8;
+  size_t rem = (VARR_LENGTH (uint8_t, insn_varr) + 28) % 8;
  size_t padding = rem == 0 ? 0 : 8 - rem;
  uint64_t lalr = ((0xc0l << 40) | ((uint64_t) r1 << 36) | (28 + padding) / 2) << 16;
  uint32_t brc = (0xa7 << 24) | (15 << 20) | (4 << 16) | (28 + padding) / 2; /* brc m15,28: */
  assert (r1 != 0);
-  push_insns (ctx, (uint8_t *) &lalr, 6);
-  s390x_gen_ld (ctx, r3, r1, 16, MIR_T_I64); /* lg r3,16(r1) */
-  s390x_gen_ld (ctx, r2, r1, 8, MIR_T_I64);  /* lg r2,8(r1) */
-  s390x_gen_ld (ctx, r1, r1, 0, MIR_T_I64);  /* lg r1,0(r1) */
-  push_insns (ctx, (uint8_t *) &brc, 4);
-  for (size_t i = 0; i < padding; i++) VARR_PUSH (uint8_t, machine_insns, 0);
-  push_insns (ctx, (uint8_t *) &a1, 8);
-  push_insns (ctx, (uint8_t *) &a2, 8);
-  push_insns (ctx, (uint8_t *) &a3, 8);
+  push_insns (insn_varr, (uint8_t *) &lalr, 6);
+  s390x_gen_ld (insn_varr, r3, r1, 16, MIR_T_I64); /* lg r3,16(r1) */
+  s390x_gen_ld (insn_varr, r2, r1, 8, MIR_T_I64);  /* lg r2,8(r1) */
+  s390x_gen_ld (insn_varr, r1, r1, 0, MIR_T_I64);  /* lg r1,0(r1) */
+  push_insns (insn_varr, (uint8_t *) &brc, 4);
+  for (size_t i = 0; i < padding; i++) VARR_PUSH (uint8_t, insn_varr, 0);
+  push_insns (insn_varr, (uint8_t *) &a1, 8);
+  push_insns (insn_varr, (uint8_t *) &a2, 8);
+  push_insns (insn_varr, (uint8_t *) &a3, 8);
+}
+
+static void s390x_gen_blk_mov (VARR (uint8_t) * insn_varr, uint32_t param_offset,
+                               uint32_t addr_offset, uint32_t qwords, uint32_t addr_reg) {
+  uint16_t *addr;
+  static const uint16_t blk_mov_pat[] = {
+    /*0:*/ 0xa7a9,  0x0000,         /* lghi	%r10,<size> */
+    /*4:*/ 0xa7ab,  0xfff8,         /* aghi	%r10,-8 */
+    /*8:*/ 0xe30a,  0x9000, 0x0004, /* lg %r0,0(%r10,%r9) */
+    /*14:*/ 0xe30a, 0x0000, 0x0024, /* stg %r0,0(%r10,<addr_reg:2-6,8>) */
+    /*20:*/ 0xb902, 0x00aa,         /* ltgr %r10,%r10 */
+    /*24:*/ 0xa724, 0xfff6,         /* jh 4 */
+  };
+  s390x_gen_addi (insn_varr, addr_reg, 15, addr_offset); /* lay <addr_reg>,addr_offset(r15) */
+  if (qwords == 0) return;
+  assert (qwords * 8 < (1 << 15) && addr_reg < 16 && addr_offset % 8 == 0);
+  s390x_gen_ld (insn_varr, 9, 7, param_offset, MIR_T_I64); /* lg* 9,param_offset(r7) */
+  addr = (uint16_t *) push_insns (insn_varr, (uint8_t *) blk_mov_pat, sizeof (blk_mov_pat));
+  addr[1] |= qwords * 8;     /* lghi */
+  addr[8] |= addr_reg << 12; /* stg */
 }

 void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  s390x_gen_mov (ctx, 2, 15);      /* lgr r2,15 */
-  s390x_gen_jump (ctx, 14, FALSE); /* bcr m15,r14 */
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  VARR (uint8_t) * code;
+  void *res;
+
+  VARR_CREATE (uint8_t, code, 128);
+  s390x_gen_mov (code, 2, 15);      /* lgr r2,15 */
+  s390x_gen_jump (code, 14, FALSE); /* bcr m15,r14 */
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 void *_MIR_get_bend_builtin (MIR_context_t ctx) {
-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  s390x_gen_ld (ctx, 0, 15, 0, MIR_T_I64); /* r0 = 0(r15) */
-  s390x_gen_st (ctx, 0, 2, 0, MIR_T_I64);  /* 0(r2) = r0 */
-  s390x_gen_mov (ctx, 15, 2);              /* lgr r15,2 */
-  s390x_gen_jump (ctx, 14, FALSE);         /* bcr m15,r14 */
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  VARR (uint8_t) * code;
+  void *res;
+
+  VARR_CREATE (uint8_t, code, 128);
+  s390x_gen_ld (code, 0, 15, 0, MIR_T_I64); /* r0 = 0(r15) */
+  s390x_gen_st (code, 0, 2, 0, MIR_T_I64);  /* 0(r2) = r0 */
+  s390x_gen_mov (code, 15, 2);              /* lgr r15,2 */
+  s390x_gen_jump (code, 14, FALSE);         /* bcr m15,r14 */
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 void *_MIR_get_thunk (MIR_context_t ctx) {
  const int max_thunk_len = (4 * 8); /* see _MIR_redirect_thunk */
-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  for (int i = 0; i < max_thunk_len; i++) VARR_PUSH (uint8_t, machine_insns, 0);
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  VARR (uint8_t) * code;
+  void *res;
+
+  VARR_CREATE (uint8_t, code, 128);
+  for (int i = 0; i < max_thunk_len; i++) VARR_PUSH (uint8_t, code, 0);
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
  int64_t offset = (uint8_t *) to - (uint8_t *) thunk;
-  VARR_TRUNC (uint8_t, machine_insns, 0);
+  VARR (uint8_t) * code;
+
+  VARR_CREATE (uint8_t, code, 128);
  assert (offset % 2 == 0);
  offset /= 2;
  if (-(1l << 31) < offset && offset < (1l << 31)) { /* brcl m15,offset: */
    uint64_t brcl = ((0xc0l << 40) | (15l << 36) | (4l << 32) | offset & 0xffffffff) << 16;
-    push_insns (ctx, (uint8_t *) &brcl, 6);
+    push_insns (code, (uint8_t *) &brcl, 6);
  } else { /* 6b:lalr r1,8+padding; 6b:lg r1,0(r1); 2b:bcr m15,r1;padding; 64-bit address: */
-    size_t rem = (VARR_LENGTH (uint8_t, machine_insns) + 14) % 8;
+    size_t rem = (VARR_LENGTH (uint8_t, code) + 14) % 8;
    size_t padding = rem == 0 ? 0 : 8 - rem;
    uint64_t lalr = ((0xc0l << 40) | (1l << 36) | (14 + padding) / 2) << 16;
    uint64_t lg = ((0xe3l << 40) | (1l << 36) | (1l << 28) | 0x4) << 16;
    uint16_t bcr = (0x7 << 8) | (15 << 4) | 1; /* bcr 15,r1: */
-    push_insns (ctx, (uint8_t *) &lalr, 6);
-    push_insns (ctx, (uint8_t *) &lg, 6);
-    push_insns (ctx, (uint8_t *) &bcr, 2);
-    for (size_t i = 0; i < padding; i++) VARR_PUSH (uint8_t, machine_insns, 0);
-    push_insns (ctx, (uint8_t *) &to, 8);
+    push_insns (code, (uint8_t *) &lalr, 6);
+    push_insns (code, (uint8_t *) &lg, 6);
+    push_insns (code, (uint8_t *) &bcr, 2);
+    for (size_t i = 0; i < padding; i++) VARR_PUSH (uint8_t, code, 0);
+    push_insns (code, (uint8_t *) &to, 8);
  }
-  _MIR_change_code (ctx, thunk, VARR_ADDR (uint8_t, machine_insns),
-                    VARR_LENGTH (uint8_t, machine_insns));
+  _MIR_change_code (ctx, thunk, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
 }

 struct s390x_va_list {
@ -193,6 +230,10 @@ void *va_arg_builtin (void *p, uint64_t t) {
  return a;
 }

+void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t ncase) {
+  memcpy (res, *(void **) va_arg_builtin (p, MIR_T_I64), s);
+}
+
 void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
  struct s390x_va_list *va = p;
  va_list *vap = a;
@ -208,92 +249,113 @@ void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
   allocate and stack frame (S390X_STACK_HEADER_SIZE + param area size + ld arg values size);
   r1=r2 (fun_addr);
   r7=r3 (res_arg_addresses);
-   (arg_reg=mem[r7,arg_offset] or (f1,r0)=mem[r7,arg_offset];mem[r15,S390X_STACK_HEADER_SIZE+offset]=(f1,r0)) ...
-   call *r1;
+   (arg_reg=mem[r7,arg_offset] or
+   (f1,r0)=mem[r7,arg_offset];mem[r15,S390X_STACK_HEADER_SIZE+offset]=(f1,r0)) ... call *r1;
   r0=mem[r7,<res_offset>]; res_reg=mem[r0]; ...
   restore r15; restore r6, r7, r14; return. */
 void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                        MIR_type_t *arg_types, int vararg_p) {
+                        _MIR_arg_desc_t *arg_descs, int vararg_p) {
  MIR_type_t type;
-  int n_gpregs = 0, n_fpregs = 0, res_reg = 7, frame_size, disp, param_offset, param_size = 0;
+  int n_gpregs = 0, n_fpregs = 0, res_reg = 7, frame_size, disp, param_offset, blk_offset;
+  uint32_t qwords, addr_reg;
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  frame_size = S390X_STACK_HEADER_SIZE;
+  VARR_CREATE (uint8_t, code, 128);
+  blk_offset = frame_size = S390X_STACK_HEADER_SIZE;
  if (nres > 0 && res_types[0] == MIR_T_LD) n_gpregs++; /* ld address */
  for (uint32_t i = 0; i < nargs; i++) {                /* calculate param area size: */
-    type = arg_types[i];
-    if (type == MIR_T_LD) frame_size += 16; /* address for ld value */
+    type = arg_descs[i].type;
+    if (MIR_blk_type_p (type)) frame_size += (arg_descs[i].size + 7) / 8; /* blk value space */
    if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
      n_fpregs++;
-    } else if (type != MIR_T_F && type != MIR_T_D && n_gpregs < 5) {
+    } else if (type != MIR_T_F && type != MIR_T_D && n_gpregs < 5) { /* RBLK too */
      n_gpregs++;
    } else {
      frame_size += 8;
-      param_size += 8;
+      blk_offset += 8;
    }
  }
-  s390x_gen_ldstm (ctx, 6, 7, 15, 48, FALSE); /* stmg 6,7,48(r15) : */
-  s390x_gen_st (ctx, 14, 15, 112, MIR_T_I64); /* stg r14,112(r15) */
-  s390x_gen_addi (ctx, 15, 15, -frame_size);  /* lay r15,-frame_size(r15) */
-  s390x_gen_mov (ctx, 1, 2);                  /* fun_addr */
-  s390x_gen_mov (ctx, res_reg, 3);            /* results & args */
+  s390x_gen_ldstm (code, 6, 7, 15, 48, FALSE); /* stmg 6,7,48(r15) : */
+  s390x_gen_ldstm (code, 8, 9, 15, 64, FALSE); /* stmg 8,9,64(r15) : */
+  s390x_gen_st (code, 10, 15, 80, MIR_T_I64);  /* stg r10,80(r15) */
+  s390x_gen_st (code, 14, 15, 112, MIR_T_I64); /* stg r14,112(r15) */
+  s390x_gen_addi (code, 15, 15, -frame_size);  /* lay r15,-frame_size(r15) */
+  s390x_gen_mov (code, 1, 2);                  /* fun_addr */
+  s390x_gen_mov (code, res_reg, 3);            /* results & args */
  n_gpregs = n_fpregs = 0;
  param_offset = nres * 16;                   /* args start */
  disp = S390X_STACK_HEADER_SIZE;             /* param area start */
  if (nres > 0 && res_types[0] == MIR_T_LD) { /* ld address: */
-    s390x_gen_mov (ctx, 2, res_reg);          /* lgr r2,r7 */
+    s390x_gen_mov (code, 2, res_reg);         /* lgr r2,r7 */
    n_gpregs++;
  }
  for (uint32_t i = 0; i < nargs; i++) { /* load args: */
-    type = arg_types[i];
+    type = arg_descs[i].type;
    if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
      /* (le,ld) (f0,f2,f4,f6),param_ofset(r7) */
-      s390x_gen_ld (ctx, n_fpregs * 2, res_reg, param_offset, type);
+      s390x_gen_ld (code, n_fpregs * 2, res_reg, param_offset, type);
      n_fpregs++;
    } else if (type == MIR_T_F || type == MIR_T_D) {
-      s390x_gen_ld (ctx, 1, res_reg, param_offset, type);        /* (le,ld) f1,param_offset(r7) */
-      s390x_gen_st (ctx, 1, 15, disp, type);                     /* (ste,std) f1,disp(r15) */
+      s390x_gen_ld (code, 1, res_reg, param_offset, type); /* (le,ld) f1,param_offset(r7) */
+      s390x_gen_st (code, 1, 15, disp, type);              /* (ste,std) f1,disp(r15) */
      disp += 8;
-    } else if (type == MIR_T_LD && n_gpregs < 5) {               /* ld address */
-      s390x_gen_addi (ctx, n_gpregs + 2, res_reg, param_offset); /* lay rn,param_offset(r7) */
+    } else if (type == MIR_T_LD && n_gpregs < 5) {                /* ld address */
+      s390x_gen_addi (code, n_gpregs + 2, res_reg, param_offset); /* lay rn,param_offset(r7) */
      n_gpregs++;
-    } else if (type == MIR_T_LD) {                    /* pass address of location in the result: */
-      s390x_gen_addi (ctx, 0, res_reg, param_offset); /* lay r0,param_offset(r7) */
-      s390x_gen_st (ctx, 0, 15, disp, MIR_T_I64);     /* stg r0,disp(r15) */
+    } else if (type == MIR_T_LD) {                     /* pass address of location in the result: */
+      s390x_gen_addi (code, 0, res_reg, param_offset); /* lay r0,param_offset(r7) */
+      s390x_gen_st (code, 0, 15, disp, MIR_T_I64);     /* stg r0,disp(r15) */
      disp += 8;
-    } else if (n_gpregs < 5) {
-      s390x_gen_ld (ctx, n_gpregs + 2, res_reg, param_offset, MIR_T_I64); /* lg* rn,param_offset(r7) */
+    } else if (MIR_blk_type_p (type)) {
+      qwords = (arg_descs[i].size + 7) / 8;
+      addr_reg = n_gpregs < 5 ? n_gpregs + 2 : 8;
+      s390x_gen_blk_mov (code, param_offset, blk_offset, qwords, addr_reg);
+      blk_offset += qwords * 8;
+      if (n_gpregs < 5) {
+        n_gpregs++;
+      } else {
+        s390x_gen_st (code, 8, 15, disp, MIR_T_I64); /* stg r8,disp(r15) */
+        disp += 8;
+      }
+    } else if (n_gpregs < 5) { /* RBLK too */
+      s390x_gen_ld (code, n_gpregs + 2, res_reg, param_offset,
+                    MIR_T_I64); /* lg* rn,param_offset(r7) */
      n_gpregs++;
    } else {
-      s390x_gen_ld (ctx, 0, res_reg, param_offset, MIR_T_I64); /* lg* r0,param_offset(r7) */
-      s390x_gen_st (ctx, 0, 15, disp, MIR_T_I64);         /* stg* r0,disp(r15) */
+      s390x_gen_ld (code, 0, res_reg, param_offset, MIR_T_I64); /* lg* r0,param_offset(r7) */
+      s390x_gen_st (code, 0, 15, disp, MIR_T_I64);              /* stg* r0,disp(r15) */
      disp += 8;
    }
    param_offset += 16;
  }
-  s390x_gen_jump (ctx, 1, TRUE); /* call *r1 */
+  s390x_gen_jump (code, 1, TRUE); /* call *r1 */
  n_gpregs = n_fpregs = 0;
  disp = 0;
  for (uint32_t i = 0; i < nres; i++) {
    type = res_types[i];
    if (type == MIR_T_LD) continue; /* do nothing: the result value is already in results */
    if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
-      s390x_gen_st (ctx, n_fpregs * 2, res_reg, disp, type);
+      s390x_gen_st (code, n_fpregs * 2, res_reg, disp, type);
      n_fpregs++;
    } else if (type != MIR_T_F && type != MIR_T_D && n_gpregs < 1) {  // just one gp reg
-      s390x_gen_st (ctx, n_gpregs + 2, res_reg, disp, MIR_T_I64);
+      s390x_gen_st (code, n_gpregs + 2, res_reg, disp, MIR_T_I64);
      n_gpregs++;
    } else {
-      (*error_func) (MIR_ret_error, "s390x can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "s390x can not handle this combination of return values");
    }
    disp += 16;
  }
-  s390x_gen_addi (ctx, 15, 15, frame_size);   /* lay 15,frame_size(15) */
-  s390x_gen_ldstm (ctx, 6, 7, 15, 48, TRUE);  /* lmg 6,7,48(r15) : */
-  s390x_gen_ld (ctx, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
-  s390x_gen_jump (ctx, 14, FALSE);            /* bcr m15,r14 */
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  s390x_gen_addi (code, 15, 15, frame_size);   /* lay 15,frame_size(15) */
+  s390x_gen_ldstm (code, 6, 7, 15, 48, TRUE);  /* lmg 6,7,48(r15) : */
+  s390x_gen_ldstm (code, 8, 9, 15, 64, TRUE);  /* lmg 8,9,64(r15) : */
+  s390x_gen_ld (code, 10, 15, 80, MIR_T_I64);  /* lg 10,80(r15) */
+  s390x_gen_ld (code, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
+  s390x_gen_jump (code, 14, FALSE);            /* bcr m15,r14 */
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 /* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
@ -306,63 +368,67 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
  uint32_t nres = func->nres, nargs = func->nargs;
  MIR_type_t type, *res_types = func->res_types;
  int disp, frame_size, local_var_size, n_gpregs, n_fpregs, va_list_disp, results_disp;
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  frame_size = S390X_STACK_HEADER_SIZE;       /* register save area */
-  s390x_gen_st (ctx, 14, 15, 112, MIR_T_I64); /* stg 14,112(r15) */
-  s390x_gen_ldstm (ctx, 2, 6, 15, 16, FALSE); /* stmg 2,6,16(r15) : */
-  for (unsigned reg = 0; reg <= 6; reg += 2)  /* stdy f0,f2,f4,f6,128(r15) : */
-    s390x_gen_st (ctx, reg, 15, reg * 4 + 128, MIR_T_D);
+  VARR_CREATE (uint8_t, code, 128);
+  frame_size = S390X_STACK_HEADER_SIZE;        /* register save area */
+  s390x_gen_st (code, 14, 15, 112, MIR_T_I64); /* stg 14,112(r15) */
+  s390x_gen_ldstm (code, 2, 6, 15, 16, FALSE); /* stmg 2,6,16(r15) : */
+  for (unsigned reg = 0; reg <= 6; reg += 2)   /* stdy f0,f2,f4,f6,128(r15) : */
+    s390x_gen_st (code, reg, 15, reg * 4 + 128, MIR_T_D);
  local_var_size = sizeof (struct s390x_va_list) + nres * 16; /* allocate va and results */
  va_list_disp = frame_size;
  results_disp = va_list_disp + sizeof (struct s390x_va_list);
  frame_size += local_var_size;
  assert (frame_size % 8 == 0);
-  s390x_gen_addi (ctx, 15, 15, -frame_size);
+  s390x_gen_addi (code, 15, 15, -frame_size);
  /* setup va: mvghi va(15),(0,1): __gpr */
-  s390x_gen_mvi (ctx, nres > 0 && res_types[0] == MIR_T_LD ? 1 : 0, 15, va_list_disp);
-  s390x_gen_mvi (ctx, 0, 15, va_list_disp + 8);            /* mvghi va+8(15),0: __fpr */
-  s390x_gen_addi (ctx, 1, 15, frame_size);                 /* lay 1,frame_size(15) */
-  s390x_gen_st (ctx, 1, 15, va_list_disp + 24, MIR_T_I64); /* stg 1,va+24(r15): __reg_save_area */
-  s390x_gen_addi (ctx, 1, 1, S390X_STACK_HEADER_SIZE);     /* lay 1,S390X_STACK_HEADER_SIZE(1) */
+  s390x_gen_mvi (code, nres > 0 && res_types[0] == MIR_T_LD ? 1 : 0, 15, va_list_disp);
+  s390x_gen_mvi (code, 0, 15, va_list_disp + 8);            /* mvghi va+8(15),0: __fpr */
+  s390x_gen_addi (code, 1, 15, frame_size);                 /* lay 1,frame_size(15) */
+  s390x_gen_st (code, 1, 15, va_list_disp + 24, MIR_T_I64); /* stg 1,va+24(r15): __reg_save_area */
+  s390x_gen_addi (code, 1, 1, S390X_STACK_HEADER_SIZE);     /* lay 1,S390X_STACK_HEADER_SIZE(1) */
  /* stg 1,va+16(r15):__overflow_arg_area: */
-  s390x_gen_st (ctx, 1, 15, va_list_disp + 16, MIR_T_I64);
+  s390x_gen_st (code, 1, 15, va_list_disp + 16, MIR_T_I64);
  /* call handler: */
-  s390x_gen_3addrs (ctx, 2, ctx, 3, func_item, 1, handler);
-  s390x_gen_addi (ctx, 4, 15, va_list_disp);
-  s390x_gen_addi (ctx, 5, 15, results_disp);
-  s390x_gen_jump (ctx, 1, TRUE);
+  s390x_gen_3addrs (code, 2, ctx, 3, func_item, 1, handler);
+  s390x_gen_addi (code, 4, 15, va_list_disp);
+  s390x_gen_addi (code, 5, 15, results_disp);
+  s390x_gen_jump (code, 1, TRUE);
  /* setup result regs: */
  disp = results_disp;
  n_gpregs = n_fpregs = 0;
  for (uint32_t i = 0; i < nres; i++) {
    type = res_types[i];
    if ((type == MIR_T_F || type == MIR_T_D) && n_fpregs < 4) {
-      s390x_gen_ld (ctx, n_fpregs * 2, 15, disp, type);
+      s390x_gen_ld (code, n_fpregs * 2, 15, disp, type);
      n_fpregs++;
    } else if (type != MIR_T_F && type != MIR_T_D && n_gpregs < 1) {  // just one gp reg
      if (type != MIR_T_LD) {
-        s390x_gen_ld (ctx, n_gpregs + 2, 15, disp, MIR_T_I64);
+        s390x_gen_ld (code, n_gpregs + 2, 15, disp, MIR_T_I64);
      } else {
        /* ld address: lg r2,16+frame_size(r15)  */
-        s390x_gen_ld (ctx, 2, 15, 16 + frame_size, MIR_T_I64);
-        s390x_gen_ld (ctx, 0, 15, disp, MIR_T_D);     /* ld f0,disp(r15) */
-        s390x_gen_ld (ctx, 2, 15, disp + 8, MIR_T_D); /* ld f2,disp + 8(r15) */
-        s390x_gen_st (ctx, 0, 2, 0, MIR_T_D);         /* st f0,0(r2) */
-        s390x_gen_st (ctx, 2, 2, 8, MIR_T_D);         /* st f2,8(r2) */
+        s390x_gen_ld (code, 2, 15, 16 + frame_size, MIR_T_I64);
+        s390x_gen_ld (code, 0, 15, disp, MIR_T_D);     /* ld f0,disp(r15) */
+        s390x_gen_ld (code, 2, 15, disp + 8, MIR_T_D); /* ld f2,disp + 8(r15) */
+        s390x_gen_st (code, 0, 2, 0, MIR_T_D);         /* st f0,0(r2) */
+        s390x_gen_st (code, 2, 2, 8, MIR_T_D);         /* st f2,8(r2) */
      }
      n_gpregs++;
    } else {
-      (*error_func) (MIR_ret_error, "s390x can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "s390x can not handle this combination of return values");
    }
    disp += 16;
  }
-  s390x_gen_addi (ctx, 15, 15, frame_size);   /* lay 15,frame_size(15) */
-  s390x_gen_ld (ctx, 6, 15, 48, MIR_T_I64);       /* lg 6,48(r15) : */
-  s390x_gen_ld (ctx, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
-  s390x_gen_jump (ctx, 14, FALSE);            /* bcr m15,r14 */
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  s390x_gen_addi (code, 15, 15, frame_size);   /* lay 15,frame_size(15) */
+  s390x_gen_ld (code, 6, 15, 48, MIR_T_I64);   /* lg 6,48(r15) : */
+  s390x_gen_ld (code, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
+  s390x_gen_jump (code, 14, FALSE);            /* bcr m15,r14 */
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 /* Brief: save r14 (r15+120); save all param regs r2-r6 (r15+16),f0,f2,f4,f6 (r15+128);
@ -370,23 +436,26 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
   r2 = call hook_address (ctx, called_func); r1=r2; restore all params regs, r15, r14; bcr r1 */
 void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
  int frame_size = S390X_STACK_HEADER_SIZE;
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  s390x_gen_st (ctx, 14, 15, 112, MIR_T_I64); /* stg 14,112(r15) */
-  s390x_gen_ldstm (ctx, 2, 6, 15, 16, FALSE); /* stmg 2,6,16(r15) : */
-  for (unsigned reg = 0; reg <= 6; reg += 2)  /* stdy f0,f2,f4,f6,128(r15) : */
-    s390x_gen_st (ctx, reg, 15, reg * 4 + 128, MIR_T_D);
+  VARR_CREATE (uint8_t, code, 128);
+  s390x_gen_st (code, 14, 15, 112, MIR_T_I64); /* stg 14,112(r15) */
+  s390x_gen_ldstm (code, 2, 6, 15, 16, FALSE); /* stmg 2,6,16(r15) : */
+  for (unsigned reg = 0; reg <= 6; reg += 2)   /* stdy f0,f2,f4,f6,128(r15) : */
+    s390x_gen_st (code, reg, 15, reg * 4 + 128, MIR_T_D);
  /* r15 -= frame_size: */
-  s390x_gen_addi (ctx, 15, 15, -frame_size);
-  s390x_gen_3addrs (ctx, 2, ctx, 3, called_func, 4, hook_address);
-  s390x_gen_jump (ctx, 4, TRUE);
-  s390x_gen_mov (ctx, 1, 2);
-  s390x_gen_addi (ctx, 15, 15, frame_size);
+  s390x_gen_addi (code, 15, 15, -frame_size);
+  s390x_gen_3addrs (code, 2, ctx, 3, called_func, 4, hook_address);
+  s390x_gen_jump (code, 4, TRUE);
+  s390x_gen_mov (code, 1, 2);
+  s390x_gen_addi (code, 15, 15, frame_size);
  for (unsigned reg = 0; reg <= 6; reg += 2) /* ldy fn,disp(r15) : */
-    s390x_gen_ld (ctx, reg, 15, reg * 4 + 128, MIR_T_D);
-  s390x_gen_ldstm (ctx, 2, 6, 15, 16, TRUE);  /* lmg 2,6,16(r15) : */
-  s390x_gen_ld (ctx, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
-  s390x_gen_jump (ctx, 1, FALSE);
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+    s390x_gen_ld (code, reg, 15, reg * 4 + 128, MIR_T_D);
+  s390x_gen_ldstm (code, 2, 6, 15, 16, TRUE);  /* lmg 2,6,16(r15) : */
+  s390x_gen_ld (code, 14, 15, 112, MIR_T_I64); /* lg 14,112(r15) */
+  s390x_gen_jump (code, 1, FALSE);
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }
--- a/mir/mir-varr.h
+++ b/mir/mir-varr.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #ifndef MIR_VARR_H
@ -17,12 +17,12 @@
 #define VARR_ASSERT(EXPR, OP, T) ((void) (EXPR))

 #else
-static inline void mir_var_assert_fail (const char *op, const char *var) {
+static inline void mir_varr_assert_fail (const char *op, const char *var) {
  fprintf (stderr, "wrong %s for %s", op, var);
  assert (0);
 }

-#define VARR_ASSERT(EXPR, OP, T) (void) ((EXPR) ? 0 : (mir_var_assert_fail (OP, #T), 0))
+#define VARR_ASSERT(EXPR, OP, T) (void) ((EXPR) ? 0 : (mir_varr_assert_fail (OP, #T), 0))

 #endif

@ -97,17 +97,14 @@ static inline void MIR_VARR_NO_RETURN mir_varr_error (const char *message) {
    return varr->varr[varr->els_num - 1];                                                     \
  }                                                                                           \
                                                                                              \
-  static inline T VARR_OP_DEF (T, get) (const VARR (T) * varr, unsigned ix) {                 \
+  static inline T VARR_OP_DEF (T, get) (const VARR (T) * varr, size_t ix) {                   \
    VARR_ASSERT (varr && varr->varr && ix < varr->els_num, "get", T);                         \
    return varr->varr[ix];                                                                    \
  }                                                                                           \
                                                                                              \
-  static inline T VARR_OP_DEF (T, set) (const VARR (T) * varr, unsigned ix, T obj) {          \
-    T old_obj;                                                                                \
+  static inline void VARR_OP_DEF (T, set) (const VARR (T) * varr, size_t ix, T obj) {         \
    VARR_ASSERT (varr && varr->varr && ix < varr->els_num, "set", T);                         \
-    old_obj = varr->varr[ix];                                                                 \
    varr->varr[ix] = obj;                                                                     \
-    return old_obj;                                                                           \
  }                                                                                           \
                                                                                              \
  static inline void VARR_OP_DEF (T, trunc) (VARR (T) * varr, size_t size) {                  \
--- a/mir/mir-x86_64.c
+++ b/mir/mir-x86_64.c
@ -1,7 +1,17 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

+/* RBLK args are always passed by address.
+   BLK0 first is copied on the caller stack and passed implicitly.
+   BLK1 is passed in general regs
+   BLK2 is passed in fp regs
+   BLK3 is passed in gpr and then fpr
+   BLK4 is passed in fpr and then gpr
+   If there are no enough regs, they work as BLK.
+   Windows: small BLKs (<= 8 bytes) are passed by value;
+            all other BLKs is always passed by pointer as regular int arg.  */
+
 #define VA_LIST_IS_ARRAY_P 1

 void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
@ -13,7 +23,7 @@ void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
 }
 void *_MIR_get_bend_builtin (MIR_context_t ctx) {
  static const uint8_t bend_code[] = {
-#ifndef _WIN64
+#ifndef _WIN32
    0x48, 0x8b, 0x04, 0x24, /* rax = (rsp) */
    0x48, 0x89, 0xfc,       /* rsp = rdi */
    0xff, 0xe0,             /* jmp *rax */
@ -26,7 +36,7 @@ void *_MIR_get_bend_builtin (MIR_context_t ctx) {
  return _MIR_publish_code (ctx, bend_code, sizeof (bend_code));
 }

-#ifndef _WIN64
+#ifndef _WIN32
 struct x86_64_va_list {
  uint32_t gp_offset, fp_offset;
  uint64_t *overflow_arg_area, *reg_save_area;
@ -51,6 +61,55 @@ void *va_arg_builtin (void *p, uint64_t t) {
  return a;
 }

+void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t ncase) {
+  struct x86_64_va_list *va = p;
+  size_t size = ((s + 7) / 8) * 8;
+  void *a = va->overflow_arg_area;
+  union {
+    uint64_t i;
+    double d;
+  } u[2];
+
+  switch (ncase) {
+  case 1:
+    if (va->gp_offset + size > 48) break;
+    u[0].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
+    va->gp_offset += 8;
+    if (size > 8) {
+      u[1].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
+      va->gp_offset += 8;
+    }
+    memcpy (res, &u, s);
+    return;
+  case 2:
+    u[0].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
+    va->fp_offset += 16;
+    if (size > 8) {
+      u[1].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
+      va->fp_offset += 16;
+    }
+    memcpy (res, &u, s);
+    return;
+  case 3:
+  case 4:
+    if (va->fp_offset > 160 || va->gp_offset > 40) break;
+    if (ncase == 3) {
+      u[0].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
+      u[1].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
+    } else {
+      u[0].d = *(double *) ((char *) va->reg_save_area + va->fp_offset);
+      u[1].i = *(uint64_t *) ((char *) va->reg_save_area + va->gp_offset);
+    }
+    va->fp_offset += 8;
+    va->gp_offset += 8;
+    memcpy (res, &u, s);
+    return;
+  default: break;
+  }
+  memcpy (res, a, s);
+  va->overflow_arg_area += size / 8;
+}
+
 void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
  struct x86_64_va_list *va = p;
  va_list *vap = a;
@ -58,7 +117,9 @@ void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
  assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
  *va = *(struct x86_64_va_list *) vap;
 }
+
 #else
+
 struct x86_64_va_list {
  uint64_t *arg_area;
 };
@ -70,6 +131,13 @@ void *va_arg_builtin (void *p, uint64_t t) {
  return a;
 }

+void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t ncase) {
+  struct x86_64_va_list *va = p;
+  void *a = s <= 8 ? va->arg_area : *(void **) va->arg_area; /* pass by pointer */
+  memcpy (res, a, s);
+  va->arg_area++;
+}
+
 void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
  struct x86_64_va_list **va = p;
  va_list *vap = a;
@ -77,6 +145,7 @@ void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
  assert (sizeof (struct x86_64_va_list) == sizeof (va_list));
  *va = (struct x86_64_va_list *) vap;
 }
+
 #endif

 void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
@ -97,7 +166,7 @@ void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
 }

 static const uint8_t save_pat[] = {
-#ifndef _WIN64
+#ifndef _WIN32
  0x48, 0x81, 0xec, 0x80, 0,    0,    0, /*sub    $0x80,%rsp		   */
  0xf3, 0x0f, 0x7f, 0x04, 0x24,          /*movdqu %xmm0,(%rsp)		   */
  0xf3, 0x0f, 0x7f, 0x4c, 0x24, 0x10,    /*movdqu %xmm1,0x10(%rsp)	   */
@ -114,15 +183,15 @@ static const uint8_t save_pat[] = {
  0x56,                                  /*push   %rsi			   */
  0x57,                                  /*push   %rdi			   */
 #else
-  0x48, 0x89, 0x4c, 0x24, 0x08,          /*mov  %rcx,0x08(%rsp) */
-  0x48, 0x89, 0x54, 0x24, 0x10,          /*mov  %rdx,0x10(%rsp) */
-  0x4c, 0x89, 0x44, 0x24, 0x18,          /*mov  %r8, 0x18(%rsp) */
-  0x4c, 0x89, 0x4c, 0x24, 0x20,          /*mov  %r9, 0x20(%rsp) */
+  0x48, 0x89, 0x4c, 0x24, 0x08,                /*mov  %rcx,0x08(%rsp) */
+  0x48, 0x89, 0x54, 0x24, 0x10,                /*mov  %rdx,0x10(%rsp) */
+  0x4c, 0x89, 0x44, 0x24, 0x18,                /*mov  %r8, 0x18(%rsp) */
+  0x4c, 0x89, 0x4c, 0x24, 0x20,                /*mov  %r9, 0x20(%rsp) */
 #endif
 };

 static const uint8_t restore_pat[] = {
-#ifndef _WIN64
+#ifndef _WIN32
  0x5f,                                  /*pop    %rdi			   */
  0x5e,                                  /*pop    %rsi			   */
  0x5a,                                  /*pop    %rdx			   */
@ -139,26 +208,26 @@ static const uint8_t restore_pat[] = {
  0xf3, 0x0f, 0x6f, 0x7c, 0x24, 0x70,    /*movdqu 0x70(%rsp),%xmm7	   */
  0x48, 0x81, 0xc4, 0x80, 0,    0,    0, /*add    $0x80,%rsp		   */
 #else
-  0x48, 0x8b, 0x4c, 0x24, 0x08,          /*mov  0x08(%rsp),%rcx */
-  0x48, 0x8b, 0x54, 0x24, 0x10,          /*mov  0x10(%rsp),%rdx */
-  0x4c, 0x8b, 0x44, 0x24, 0x18,          /*mov  0x18(%rsp),%r8  */
-  0x4c, 0x8b, 0x4c, 0x24, 0x20,          /*mov  0x20(%rsp),%r9  */
-  0xf3, 0x0f, 0x7e, 0x44, 0x24, 0x08,    /*movq 0x08(%rsp),%xmm0*/
-  0xf3, 0x0f, 0x7e, 0x4c, 0x24, 0x10,    /*movq 0x10(%rsp),%xmm1*/
-  0xf3, 0x0f, 0x7e, 0x54, 0x24, 0x18,    /*movq 0x18(%rsp),%xmm2*/
-  0xf3, 0x0f, 0x7e, 0x5c, 0x24, 0x20,    /*movq 0x20(%rsp),%xmm3*/
+  0x48, 0x8b, 0x4c, 0x24, 0x08,                /*mov  0x08(%rsp),%rcx */
+  0x48, 0x8b, 0x54, 0x24, 0x10,                /*mov  0x10(%rsp),%rdx */
+  0x4c, 0x8b, 0x44, 0x24, 0x18,                /*mov  0x18(%rsp),%r8  */
+  0x4c, 0x8b, 0x4c, 0x24, 0x20,                /*mov  0x20(%rsp),%r9  */
+  0xf3, 0x0f, 0x7e, 0x44, 0x24, 0x08,          /*movq 0x08(%rsp),%xmm0*/
+  0xf3, 0x0f, 0x7e, 0x4c, 0x24, 0x10,          /*movq 0x10(%rsp),%xmm1*/
+  0xf3, 0x0f, 0x7e, 0x54, 0x24, 0x18,          /*movq 0x18(%rsp),%xmm2*/
+  0xf3, 0x0f, 0x7e, 0x5c, 0x24, 0x20,          /*movq 0x20(%rsp),%xmm3*/
 #endif
 };

-static uint8_t *push_insns (MIR_context_t ctx, const uint8_t *pat, size_t pat_len) {
-  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, pat[i]);
-  return VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns) - pat_len;
+static uint8_t *push_insns (VARR (uint8_t) * insn_varr, const uint8_t *pat, size_t pat_len) {
+  for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, insn_varr, pat[i]);
+  return VARR_ADDR (uint8_t, insn_varr) + VARR_LENGTH (uint8_t, insn_varr) - pat_len;
 }

-static void gen_mov (MIR_context_t ctx, uint32_t offset, uint32_t reg, int ld_p) {
+static void gen_mov (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int ld_p) {
  static const uint8_t ld_gp_reg[] = {0x48, 0x8b, 0x83, 0, 0, 0, 0 /* mov <offset>(%rbx),%reg */};
  static const uint8_t st_gp_reg[] = {0x48, 0x89, 0x83, 0, 0, 0, 0 /* mov %reg,<offset>(%rbx) */};
-  uint8_t *addr = push_insns (ctx, ld_p ? ld_gp_reg : st_gp_reg,
+  uint8_t *addr = push_insns (insn_varr, ld_p ? ld_gp_reg : st_gp_reg,
                              ld_p ? sizeof (ld_gp_reg) : sizeof (st_gp_reg));
  memcpy (addr + 3, &offset, sizeof (uint32_t));
  assert (reg <= 15);
@ -166,14 +235,43 @@ static void gen_mov (MIR_context_t ctx, uint32_t offset, uint32_t reg, int ld_p)
  addr[2] |= (reg & 7) << 3;
 }

-static void gen_movxmm (MIR_context_t ctx, uint32_t offset, uint32_t reg, int b32_p, int ld_p) {
+static void gen_mov2 (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int ld_p) {
+  static const uint8_t ld_gp_reg[] = {0x49, 0x8b, 0x44, 0x24, 0 /* mov <offset>(%r12),%reg */};
+  static const uint8_t st_gp_reg[] = {0x49, 0x89, 0x44, 0x24, 0 /* mov %reg,<offset>(%r12) */};
+  uint8_t *addr = push_insns (insn_varr, ld_p ? ld_gp_reg : st_gp_reg,
+                              ld_p ? sizeof (ld_gp_reg) : sizeof (st_gp_reg));
+  addr[4] = offset;
+  assert (reg <= 15);
+  addr[0] |= (reg >> 1) & 4;
+  addr[2] |= (reg & 7) << 3;
+}
+
+static void gen_blk_mov (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t addr_offset,
+                         uint32_t qwords) {
+  static const uint8_t blk_mov_pat[] = {
+    /*0:*/ 0x4c,  0x8b, 0xa3, 0,    0, 0, 0,    /*mov <addr_offset>(%rbx),%r12*/
+    /*7:*/ 0x48,  0xc7, 0xc0, 0,    0, 0, 0,    /*mov <qwords>,%rax*/
+    /*e:*/ 0x48,  0x83, 0xe8, 0x01,             /*sub $0x1,%rax*/
+    /*12:*/ 0x4d, 0x8b, 0x14, 0xc4,             /*mov (%r12,%rax,8),%r10*/
+    /*16:*/ 0x4c, 0x89, 0x94, 0xc4, 0, 0, 0, 0, /*mov %r10,<offset>(%rsp,%rax,8)*/
+    /*1e:*/ 0x48, 0x85, 0xc0,                   /*test %rax,%rax*/
+    /*21:*/ 0x7f, 0xeb,                         /*jg e <L0>*/
+  };
+  uint8_t *addr = push_insns (insn_varr, blk_mov_pat, sizeof (blk_mov_pat));
+  memcpy (addr + 3, &addr_offset, sizeof (uint32_t));
+  memcpy (addr + 10, &qwords, sizeof (uint32_t));
+  memcpy (addr + 26, &offset, sizeof (uint32_t));
+}
+
+static void gen_movxmm (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int b32_p,
+                        int ld_p) {
  static const uint8_t ld_xmm_reg_pat[] = {
    0xf2, 0x0f, 0x10, 0x83, 0, 0, 0, 0 /* movs[sd] <offset>(%rbx),%xmm */
  };
  static const uint8_t st_xmm_reg_pat[] = {
    0xf2, 0x0f, 0x11, 0x83, 0, 0, 0, 0 /* movs[sd] %xmm, <offset>(%rbx) */
  };
-  uint8_t *addr = push_insns (ctx, ld_p ? ld_xmm_reg_pat : st_xmm_reg_pat,
+  uint8_t *addr = push_insns (insn_varr, ld_p ? ld_xmm_reg_pat : st_xmm_reg_pat,
                              ld_p ? sizeof (ld_xmm_reg_pat) : sizeof (st_xmm_reg_pat));
  memcpy (addr + 4, &offset, sizeof (uint32_t));
  assert (reg <= 7);
@ -181,135 +279,265 @@ static void gen_movxmm (MIR_context_t ctx, uint32_t offset, uint32_t reg, int b3
  if (b32_p) addr[0] |= 1;
 }

-static void gen_ldst (MIR_context_t ctx, uint32_t sp_offset, uint32_t src_offset, int b64_p) {
-  static const uint8_t ldst_pat[] = {
-    0x44, 0x8b, 0x93, 0,    0, 0, 0,    /* mov    <src_offset>(%rbx),%r10 */
+static void gen_movxmm2 (VARR (uint8_t) * insn_varr, uint32_t offset, uint32_t reg, int ld_p) {
+  static const uint8_t ld_xmm_reg_pat[] = {
+    0xf2, 0x41, 0x0f, 0x10, 0x44, 0x24, 0 /* movsd <offset>(%r12),%xmm */
+  };
+  static const uint8_t st_xmm_reg_pat[] = {
+    0xf2, 0x41, 0x0f, 0x11, 0x44, 0x24, 0 /* movsd %xmm, <offset>(%r12) */
+  };
+  uint8_t *addr = push_insns (insn_varr, ld_p ? ld_xmm_reg_pat : st_xmm_reg_pat,
+                              ld_p ? sizeof (ld_xmm_reg_pat) : sizeof (st_xmm_reg_pat));
+  addr[6] = offset;
+  assert (reg <= 7);
+  addr[4] |= reg << 3;
+}
+
+#ifdef _WIN32
+static void gen_add (VARR (uint8_t) * insn_varr, uint32_t sp_offset, int reg) {
+  static const uint8_t lea_pat[] = {
+    0x48, 0x8d, 0x84, 0x24, 0, 0, 0, 0, /* lea    <sp_offset>(%sp),reg */
+  };
+  uint8_t *addr = push_insns (insn_varr, lea_pat, sizeof (lea_pat));
+  memcpy (addr + 4, &sp_offset, sizeof (uint32_t));
+  addr[2] |= (reg & 7) << 3;
+  if (reg > 7) addr[0] |= 4;
+}
+#endif
+
+static void gen_st (VARR (uint8_t) * insn_varr, uint32_t sp_offset, int b64_p) {
+  static const uint8_t st_pat[] = {
    0x44, 0x89, 0x94, 0x24, 0, 0, 0, 0, /* mov    %r10,<sp_offset>(%sp) */
  };
-  uint8_t *addr = push_insns (ctx, ldst_pat, sizeof (ldst_pat));
+  uint8_t *addr = push_insns (insn_varr, st_pat, sizeof (st_pat));
+  memcpy (addr + 4, &sp_offset, sizeof (uint32_t));
+  if (b64_p) addr[0] |= 8;
+}
+
+static void gen_ldst (VARR (uint8_t) * insn_varr, uint32_t sp_offset, uint32_t src_offset,
+                      int b64_p) {
+  static const uint8_t ld_pat[] = {
+    0x44, 0x8b, 0x93, 0, 0, 0, 0, /* mov    <src_offset>(%rbx),%r10 */
+  };
+  uint8_t *addr = push_insns (insn_varr, ld_pat, sizeof (ld_pat));
  memcpy (addr + 3, &src_offset, sizeof (uint32_t));
-  memcpy (addr + 11, &sp_offset, sizeof (uint32_t));
-  if (b64_p) {
-    addr[0] |= 8;
-    addr[7] |= 8;
-  }
+  if (b64_p) addr[0] |= 8;
+  gen_st (insn_varr, sp_offset, b64_p);
 }

-static void gen_ldst80 (MIR_context_t ctx, uint32_t sp_offset, uint32_t src_offset) {
+static void gen_ldst80 (VARR (uint8_t) * insn_varr, uint32_t sp_offset, uint32_t src_offset) {
  static uint8_t const ldst80_pat[] = {
    0xdb, 0xab, 0,    0, 0, 0,    /* fldt   <src_offset>(%rbx) */
    0xdb, 0xbc, 0x24, 0, 0, 0, 0, /* fstpt  <sp_offset>(%sp) */
  };
-  uint8_t *addr = push_insns (ctx, ldst80_pat, sizeof (ldst80_pat));
+  uint8_t *addr = push_insns (insn_varr, ldst80_pat, sizeof (ldst80_pat));
  memcpy (addr + 2, &src_offset, sizeof (uint32_t));
  memcpy (addr + 9, &sp_offset, sizeof (uint32_t));
 }

-static void gen_st80 (MIR_context_t ctx, uint32_t src_offset) {
+static void gen_st80 (VARR (uint8_t) * insn_varr, uint32_t src_offset) {
  static const uint8_t st80_pat[] = {0xdb, 0xbb, 0, 0, 0, 0 /* fstpt   <src_offset>(%rbx) */};
-  memcpy (push_insns (ctx, st80_pat, sizeof (st80_pat)) + 2, &src_offset, sizeof (uint32_t));
+  memcpy (push_insns (insn_varr, st80_pat, sizeof (st80_pat)) + 2, &src_offset, sizeof (uint32_t));
 }

 /* Generation: fun (fun_addr, res_arg_addresses):
-   push rbx; sp-=sp_offset; r11=fun_addr; rbx=res/arg_addrs
-   r10=mem[rbx,<offset>]; (arg_reg=mem[r10] or r10=mem[r10];mem[sp,sp_offset]=r10) ...
+   push r12, push rbx; sp-=sp_offset; r11=fun_addr; rbx=res/arg_addrs
+   r10=mem[rbx,<offset>]; (arg_reg=mem[r10] or r10=mem[r10];mem[sp,sp_offset]=r10
+                           or r12=mem[rbx,arg_offset]; arg_reg=mem[r12]
+                                                       [;(arg_reg + 1)=mem[r12 + 8]]
+                           ...
+                           or r12=mem[rbx,arg_offset];rax=qwords;
+                              L:rax-=1;r10=mem[r12,rax]; mem[sp,sp_offset,rax]=r10;
+                                goto L if rax > 0) ...
   rax=8; call *r11; sp+=offset
   r10=mem[rbx,<offset>]; res_reg=mem[r10]; ...
-   pop rbx; ret. */
+   pop rbx; pop r12; ret. */
 void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                        MIR_type_t *arg_types, int vararg_p) {
+                        _MIR_arg_desc_t *arg_descs, int vararg_p) {
  static const uint8_t prolog[] = {
-#ifndef _WIN64
+#ifndef _WIN32
+    0x41, 0x54,                   /* pushq %r12 */
    0x53,                         /* pushq %rbx */
    0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
    0x49, 0x89, 0xfb,             /* mov $rdi, $r11 -- fun addr */
    0x48, 0x89, 0xf3,             /* mov $rsi, $rbx -- result/arg addresses */
 #else
-    0x53,                         /* pushq %rbx */
-    0x48, 0x81, 0xec, 0, 0, 0, 0, /* subq <sp_offset>, %rsp */
-    0x49, 0x89, 0xcb,             /* mov $rcx, $r11 -- fun addr */
-    0x48, 0x89, 0xd3,             /* mov $rdx, $rbx -- result/arg addresses */
+    /* 0x0: */ 0x41,  0x54,                    /* pushq %r12 */
+    /* 0x2: */ 0x53,                           /* pushq %rbx */
+    /* 0x3: */ 0x55,                           /* push %rbp */
+    /* 0x4: */ 0x48,  0x89, 0xe5,              /* mov %rsp,%rbp */
+    /* 0x7: */ 0x48,  0x81, 0xec, 0, 0, 0, 0,  /* subq <sp_offset>, %rsp */
+    /* 0xe: */ 0x49,  0x89, 0xcb,              /* mov $rcx, $r11 -- fun addr */
+    /* 0x11: */ 0x48, 0x89, 0xd3,              /* mov $rdx, $rbx -- result/arg addresses */
 #endif
  };
  static const uint8_t call_end[] = {
-#ifndef _WIN64
+#ifndef _WIN32
    0x48, 0xc7, 0xc0, 0x08, 0, 0, 0, /* mov $8, rax -- to save xmm varargs */
 #endif
-    0x41, 0xff, 0xd3,                /* callq  *%r11	   */
+    0x41, 0xff, 0xd3, /* callq  *%r11	   */
+#ifndef _WIN32
    0x48, 0x81, 0xc4, 0,    0, 0, 0, /* addq <sp_offset>, %rsp */
+#endif
  };
  static const uint8_t epilog[] = {
-    0x5b, /* pop %rbx */
-    0xc3, /* ret */
+#ifdef _WIN32              /* Strict form of windows epilogue for unwinding: */
+    0x48, 0x8d, 0x65, 0x0, /* lea  0x0(%rbp),%rsp */
+    0x5d,                  /* pop %rbp */
+#endif
+    0x5b,       /* pop %rbx */
+    0x41, 0x5c, /* pop %r12 */
+    0xc3,       /* ret */
  };
-#ifndef _WIN64
+#ifndef _WIN32
  static const uint8_t iregs[] = {7, 6, 2, 1, 8, 9}; /* rdi, rsi, rdx, rcx, r8, r9 */
  static const uint32_t max_iregs = 6, max_xregs = 8;
  uint32_t sp_offset = 0;
 #else
  static const uint8_t iregs[] = {1, 2, 8, 9}; /* rcx, rdx, r8, r9 */
  static const uint32_t max_iregs = 4, max_xregs = 4;
-  uint32_t sp_offset = 32;
+  uint32_t blk_offset = nargs < 4 ? 32 : nargs * 8, sp_offset = 32; /* spill area */
 #endif
-  uint32_t n_iregs = 0, n_xregs = 0, n_fregs;
+  uint32_t n_iregs = 0, n_xregs = 0, n_fregs, qwords;
  uint8_t *addr;
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-  push_insns (ctx, prolog, sizeof (prolog));
+  VARR_CREATE (uint8_t, code, 128);
+  push_insns (code, prolog, sizeof (prolog));
  for (size_t i = 0; i < nargs; i++) {
-    if ((MIR_T_I8 <= arg_types[i] && arg_types[i] <= MIR_T_U64) || arg_types[i] == MIR_T_P) {
+    MIR_type_t type = arg_descs[i].type;
+
+    if ((MIR_T_I8 <= type && type <= MIR_T_U64) || type == MIR_T_P || type == MIR_T_RBLK) {
      if (n_iregs < max_iregs) {
-        gen_mov (ctx, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
-#ifdef _WIN64
+        gen_mov (code, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
+#ifdef _WIN32
        n_xregs++;
 #endif
      } else {
-        gen_ldst (ctx, sp_offset, (i + nres) * sizeof (long double), TRUE);
+        gen_ldst (code, sp_offset, (i + nres) * sizeof (long double), TRUE);
        sp_offset += 8;
      }
-    } else if (arg_types[i] == MIR_T_F || arg_types[i] == MIR_T_D) {
+    } else if (type == MIR_T_F || type == MIR_T_D) {
      if (n_xregs < max_xregs) {
-        gen_movxmm (ctx, (i + nres) * sizeof (long double), n_xregs++, arg_types[i] == MIR_T_F,
-                    TRUE);
-#ifdef _WIN64
-        gen_mov (ctx, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
+        gen_movxmm (code, (i + nres) * sizeof (long double), n_xregs++, type == MIR_T_F, TRUE);
+#ifdef _WIN32
+        gen_mov (code, (i + nres) * sizeof (long double), iregs[n_iregs++], TRUE);
 #endif
      } else {
-        gen_ldst (ctx, sp_offset, (i + nres) * sizeof (long double), arg_types[i] == MIR_T_D);
+        gen_ldst (code, sp_offset, (i + nres) * sizeof (long double), type == MIR_T_D);
        sp_offset += 8;
      }
-    } else if (arg_types[i] == MIR_T_LD) {
-      gen_ldst80 (ctx, sp_offset, (i + nres) * sizeof (long double));
+    } else if (type == MIR_T_LD) {
+      gen_ldst80 (code, sp_offset, (i + nres) * sizeof (long double));
      sp_offset += 16;
+    } else if (MIR_blk_type_p (type)) {
+      qwords = (arg_descs[i].size + 7) / 8;
+#ifndef _WIN32
+      if (type == MIR_T_BLK + 1 && n_iregs + qwords <= max_iregs) {
+        assert (qwords <= 2);
+        gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE);   /* r12 = block addr */
+        gen_mov2 (code, 0, iregs[n_iregs], TRUE);                      /* arg_reg = mem[r12] */
+        if (qwords == 2) gen_mov2 (code, 8, iregs[n_iregs + 1], TRUE); /* arg_reg = mem[r12 + 8] */
+        n_iregs += qwords;
+        n_xregs += qwords;
+        continue;
+      } else if (type == MIR_T_BLK + 2 && n_xregs + qwords <= max_xregs) {
+        assert (qwords <= 2);
+        gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = block addr */
+        gen_movxmm2 (code, 0, n_xregs, TRUE);                        /* xmm = mem[r12] */
+        if (qwords == 2) gen_movxmm2 (code, 8, n_xregs + 1, TRUE);   /* xmm = mem[r12 +  8] */
+        n_xregs += qwords;
+        continue;
+      } else if (type == MIR_T_BLK + 3 && n_iregs < max_iregs && n_xregs < max_xregs) {
+        assert (qwords == 2);
+        gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = block addr */
+        gen_mov2 (code, 0, iregs[n_iregs], TRUE);                    /* arg_reg = mem[r12] */
+        n_iregs++;
+        n_xregs++;
+        gen_movxmm2 (code, 8, n_xregs, TRUE); /* xmm = mem[r12 + 8] */
+        n_xregs++;
+        continue;
+      } else if (type == MIR_T_BLK + 4 && n_iregs < max_iregs && n_xregs < max_xregs) {
+        assert (qwords == 2);
+        gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = block addr */
+        gen_movxmm2 (code, 0, n_xregs, TRUE);                        /* xmm = mem[r12] */
+        n_xregs++;
+        gen_mov2 (code, 8, iregs[n_iregs], TRUE); /* arg_reg = mem[r12 + 8] */
+        n_iregs++;
+        n_xregs++;
+        continue;
+      }
+      gen_blk_mov (code, sp_offset, (i + nres) * sizeof (long double), qwords);
+      sp_offset += qwords * 8;
+#else
+      if (qwords <= 1) {
+        gen_mov (code, (i + nres) * sizeof (long double), 12, TRUE); /* r12 = mem[disp + rbx] */
+        if (n_iregs < max_iregs) {
+          gen_mov2 (code, 0, iregs[n_iregs++], TRUE); /* arg_reg = mem[r12] */
+          n_xregs++;
+        } else {
+          gen_mov2 (code, 0, 10, TRUE);   /* r10 = mem[r12] */
+          gen_st (code, sp_offset, TRUE); /* mem[sp+sp_offset] = r10; */
+          sp_offset += 8;
+        }
+      } else {
+        /* r12 = mem[disp + rbx]; mem[rsp+blk_offset + nw] = r10 = mem[r12 + nw]; */
+        gen_blk_mov (code, blk_offset, (i + nres) * sizeof (long double), qwords);
+        if (n_iregs < max_iregs) {
+          gen_add (code, blk_offset, iregs[n_iregs++]); /* arg_reg = sp + blk_offset */
+          n_xregs++;
+        } else {
+          gen_add (code, blk_offset, 10); /* r10 = sp + blk_offset */
+          gen_st (code, sp_offset, TRUE); /* mem[sp+sp_offset] = r10; */
+          sp_offset += 8;
+        }
+        blk_offset += qwords * 8;
+      }
+#endif
    } else {
-      (*error_func) (MIR_call_op_error, "wrong type of arg value");
+      MIR_get_error_func (ctx) (MIR_call_op_error, "wrong type of arg value");
    }
  }
+#ifdef _WIN32
+  if (blk_offset > sp_offset) sp_offset = blk_offset;
+#endif
  sp_offset = (sp_offset + 15) / 16 * 16;
-  addr = VARR_ADDR (uint8_t, machine_insns);
-  memcpy (addr + 4, &sp_offset, sizeof (uint32_t));
-  addr = push_insns (ctx, call_end, sizeof (call_end));
+#ifndef _WIN32
+  sp_offset += 8; /* align */
+#endif
+  addr = VARR_ADDR (uint8_t, code);
+#ifndef _WIN32
+  memcpy (addr + 6, &sp_offset, sizeof (uint32_t));
+#else
+  memcpy (addr + 10, &sp_offset, sizeof (uint32_t));
+#endif
+  addr = push_insns (code, call_end, sizeof (call_end));
+#ifndef _WIN32
  memcpy (addr + sizeof (call_end) - 4, &sp_offset, sizeof (uint32_t));
-#ifdef _WIN64
+#else
  if (nres > 1)
-    (*error_func) (MIR_call_op_error, "Windows x86-64 doesn't support multiple return values");
+    MIR_get_error_func (ctx) (MIR_call_op_error,
+                              "Windows x86-64 doesn't support multiple return values");
 #endif
  n_iregs = n_xregs = n_fregs = 0;
  for (size_t i = 0; i < nres; i++) {
    if (((MIR_T_I8 <= res_types[i] && res_types[i] <= MIR_T_U64) || res_types[i] == MIR_T_P)
        && n_iregs < 2) {
-      gen_mov (ctx, i * sizeof (long double), n_iregs++ == 0 ? 0 : 2, FALSE); /* rax or rdx */
+      gen_mov (code, i * sizeof (long double), n_iregs++ == 0 ? 0 : 2, FALSE); /* rax or rdx */
    } else if ((res_types[i] == MIR_T_F || res_types[i] == MIR_T_D) && n_xregs < 2) {
-      gen_movxmm (ctx, i * sizeof (long double), n_xregs++, res_types[i] == MIR_T_F, FALSE);
+      gen_movxmm (code, i * sizeof (long double), n_xregs++, res_types[i] == MIR_T_F, FALSE);
    } else if (res_types[i] == MIR_T_LD && n_fregs < 2) {
-      gen_st80 (ctx, i * sizeof (long double));
+      gen_st80 (code, i * sizeof (long double));
    } else {
-      (*error_func) (MIR_ret_error, "x86-64 can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "x86-64 can not handle this combination of return values");
    }
  }
-  push_insns (ctx, epilog, sizeof (epilog));
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  push_insns (code, epilog, sizeof (epilog));
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 /* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
@ -317,7 +545,7 @@ void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, s
 void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
  static const uint8_t push_rbx[] = {0x53, /*push   %rbx  */};
  static const uint8_t prepare_pat[] = {
-#ifndef _WIN64
+#ifndef _WIN32
    /*  0: */ 0x48, 0x83, 0xec, 0x20,                      /* sub    32,%rsp	     */
    /*  4: */ 0x48, 0x89, 0xe2,                            /* mov    %rsp,%rdx	     */
    /*  7: */ 0xc7, 0x02, 0,    0,    0,    0,             /* movl   0,(%rdx)	     */
@ -340,27 +568,34 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
  static const uint32_t hndl_offset = 0x4c;
  static const uint32_t prep_stack_size = 208;
 #else
-    /*  0: */ 0x4c, 0x8d, 0x44, 0x24, 0x08,                /* lea    8(%rsp),%r8     */
-    /*  5: */ 0x53,                                        /* push   %rbx            */
-    /*  6: */ 0x48, 0x81, 0xec, 0,    0,    0, 0,          /* sub    <n>,%rsp        */
-    /*  d: */ 0x48, 0x89, 0xe3,                            /* mov    %rsp,%rbx       */
-    /* 10: */ 0x49, 0x89, 0xe1,                            /* mov    %rsp,%r9        */
-    /* 13: */ 0x48, 0x83, 0xec, 0x20,                      /* sub    32,%rsp         */
-    /* 17: */ 0x48, 0xb9, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <ctx>,%rcx      */
-    /* 21: */ 0x48, 0xba, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <func_item>,%rdx*/
-    /* 2b: */ 0x48, 0xb8, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <handler>,%rax  */
-    /* 35: */ 0xff, 0xd0,                                  /* callq  *%rax           */
+    /*  0: */ 0x53,                                        /* push   %rbx            */
+    /*  1: */ 0x55,                                        /* push %rbp */
+    /*  2: */ 0x48, 0x89, 0xe5,                            /* mov %rsp,%rbp */
+    /*  5: */ 0x4c, 0x8d, 0x44, 0x24, 0x18,                /* lea    24(%rsp),%r8     */
+    /*  a: */ 0x48, 0x81, 0xec, 0,    0,    0, 0,          /* sub    <n>,%rsp        */
+    /* 11: */ 0x48, 0x89, 0xe3,                            /* mov    %rsp,%rbx       */
+    /* 14: */ 0x49, 0x89, 0xe1,                            /* mov    %rsp,%r9        */
+    /* 17: */ 0x48, 0x83, 0xec, 0x20,                      /* sub    32,%rsp         */
+    /* 1b: */ 0x48, 0xb9, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <ctx>,%rcx      */
+    /* 25: */ 0x48, 0xba, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <func_item>,%rdx*/
+    /* 2f: */ 0x48, 0xb8, 0,    0,    0,    0, 0, 0, 0, 0, /* movabs <handler>,%rax  */
+    /* 39: */ 0xff, 0xd0,                                  /* callq  *%rax           */
  };
-  static const uint32_t nres_offset = 0x09;
-  static const uint32_t ctx_offset = 0x19;
-  static const uint32_t func_offset = 0x23;
-  static const uint32_t hndl_offset = 0x2d;
+  static const uint32_t nres_offset = 0x0d;
+  static const uint32_t ctx_offset = 0x1d;
+  static const uint32_t func_offset = 0x27;
+  static const uint32_t hndl_offset = 0x31;
  static const uint32_t prep_stack_size = 32;
 #endif
  static const uint8_t shim_end[] = {
+#ifndef _WIN32
    /* 0: */ 0x48, 0x81, 0xc4, 0, 0, 0, 0, /*add    prep_stack_size+n,%rsp*/
-    /* 7: */ 0x5b,                         /*pop                      %rbx*/
-    /* 8: */ 0xc3,                         /*retq                         */
+#else                                      /* Strict form of windows epilogue for unwinding: */
+    /* 0 */ 0x48,  0x8d, 0x65, 0x0, /* lea  0x0(%rbp),%rsp */
+    /* 4: */ 0x5d,                  /* pop %rbp */
+#endif
+    0x5b, /*pop                      %rbx*/
+    0xc3, /*retq                         */
  };
  static const uint8_t ld_pat[] = {0x48, 0x8b, 0x83, 0, 0, 0, 0}; /* mov <offset>(%rbx), %reg */
  static const uint8_t movss_pat[]
@ -373,55 +608,65 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
  uint32_t imm, n_iregs, n_xregs, n_fregs, offset;
  uint32_t nres = func_item->u.func->nres;
  MIR_type_t *results = func_item->u.func->res_types;
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-#ifndef _WIN64
-  push_insns (ctx, push_rbx, sizeof (push_rbx));
+  VARR_CREATE (uint8_t, code, 128);
+#ifndef _WIN32
+  push_insns (code, push_rbx, sizeof (push_rbx));
 #endif
-  push_insns (ctx, save_pat, sizeof (save_pat));
-  addr = push_insns (ctx, prepare_pat, sizeof (prepare_pat));
+  push_insns (code, save_pat, sizeof (save_pat));
+  addr = push_insns (code, prepare_pat, sizeof (prepare_pat));
  imm = nres * 16;
+#ifdef _WIN32
+  imm += 8; /*align */
+#endif
  memcpy (addr + nres_offset, &imm, sizeof (uint32_t));
  memcpy (addr + ctx_offset, &ctx, sizeof (void *));
  memcpy (addr + func_offset, &func_item, sizeof (void *));
  memcpy (addr + hndl_offset, &handler, sizeof (void *));
  /* move results: */
-#ifdef _WIN64
+#ifdef _WIN32
  if (nres > 1)
-    (*error_func) (MIR_call_op_error, "Windows x86-64 doesn't support multiple return values");
+    MIR_get_error_func (ctx) (MIR_call_op_error,
+                              "Windows x86-64 doesn't support multiple return values");
 #endif
  n_iregs = n_xregs = n_fregs = offset = 0;
  for (uint32_t i = 0; i < nres; i++) {
    if (results[i] == MIR_T_F && n_xregs < 2) {
-      addr = push_insns (ctx, movss_pat, sizeof (movss_pat));
+      addr = push_insns (code, movss_pat, sizeof (movss_pat));
      addr[3] |= n_xregs << 3;
      memcpy (addr + 4, &offset, sizeof (uint32_t));
      n_xregs++;
    } else if (results[i] == MIR_T_D && n_xregs < 2) {
-      addr = push_insns (ctx, movsd_pat, sizeof (movsd_pat));
+      addr = push_insns (code, movsd_pat, sizeof (movsd_pat));
      addr[3] |= n_xregs << 3;
      memcpy (addr + 4, &offset, sizeof (uint32_t));
      n_xregs++;
    } else if (results[i] == MIR_T_LD && n_fregs < 2) {
-      addr = push_insns (ctx, fldt_pat, sizeof (fldt_pat));
+      addr = push_insns (code, fldt_pat, sizeof (fldt_pat));
      memcpy (addr + 2, &offset, sizeof (uint32_t));
-      if (n_fregs == 1) push_insns (ctx, fxch_pat, sizeof (fxch_pat));
+      if (n_fregs == 1) push_insns (code, fxch_pat, sizeof (fxch_pat));
      n_fregs++;
    } else if (n_iregs < 2) {
-      addr = push_insns (ctx, ld_pat, sizeof (ld_pat));
+      addr = push_insns (code, ld_pat, sizeof (ld_pat));
      addr[2] |= n_iregs << 4;
      memcpy (addr + 3, &offset, sizeof (uint32_t));
      n_iregs++;
    } else {
-      (*error_func) (MIR_ret_error, "x86-64 can not handle this combination of return values");
+      MIR_get_error_func (ctx) (MIR_ret_error,
+                                "x86-64 can not handle this combination of return values");
    }
    offset += 16;
  }
-  addr = push_insns (ctx, shim_end, sizeof (shim_end));
+  addr = push_insns (code, shim_end, sizeof (shim_end));
+#ifndef _WIN32
  imm = prep_stack_size + nres * 16;
  memcpy (addr + 3, &imm, sizeof (uint32_t));
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+#endif
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }

 /* save regs; r10 = call hook_address (ctx, called_func); restore regs; jmp *r10
@ -429,43 +674,54 @@ void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handl
 void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
  static const uint8_t push_rax[] = {0x50, /*push   %rax */};
  static const uint8_t wrap_end[] = {
-#ifndef _WIN64
-    0x58,             /*pop   %rax */
+#ifndef _WIN32
+    0x58, /*pop   %rax */
 #endif
    0x41, 0xff, 0xe2, /*jmpq   *%r10			   */
  };
-  static const uint8_t call_pat[] = {
-#ifndef _WIN64
-    0x48, 0xbe, 0,    0, 0, 0, 0, 0, 0, 0, /*movabs called_func,%rsi  	   */
-    0x48, 0xbf, 0,    0, 0, 0, 0, 0, 0, 0, /*movabs ctx,%rdi  	   */
-    0x49, 0xba, 0,    0, 0, 0, 0, 0, 0, 0, /*movabs <hook_address>,%r10  	   */
-    0x41, 0xff, 0xd2,                      /*callq  *%r10			   */
-    0x49, 0x89, 0xc2,                      /*mov    %rax,%r10		   */
+  static const uint8_t call_pat[] =
+#ifndef _WIN32
+    {
+      0x48, 0xbe, 0,    0, 0, 0, 0, 0, 0, 0, /* movabs called_func,%rsi  	   */
+      0x48, 0xbf, 0,    0, 0, 0, 0, 0, 0, 0, /* movabs ctx,%rdi  	   */
+      0x49, 0xba, 0,    0, 0, 0, 0, 0, 0, 0, /* movabs <hook_address>,%r10  	   */
+      0x41, 0xff, 0xd2,                      /* callq  *%r10			   */
+      0x49, 0x89, 0xc2,                      /* mov %rax,%r10		   */
+    };
+  size_t call_func_offset = 2, ctx_offset = 12, hook_offset = 22;
 #else
-    0x48, 0xba, 0,    0, 0, 0, 0, 0, 0, 0, /*movabs called_func,%rdx   */
-    0x48, 0xb9, 0,    0, 0, 0, 0, 0, 0, 0, /*movabs ctx,%rcx           */
-    0x49, 0xba, 0,    0, 0, 0, 0, 0, 0, 0, /*movabs <hook_address>,%r10*/
-    0x50,                                  /*push   %rax               */
-    0x48, 0x83, 0xec, 0x20,                /*sub    32,%rsp            */
-    0x41, 0xff, 0xd2,                      /*callq  *%r10              */
-    0x49, 0x89, 0xc2,                      /*mov    %rax,%r10          */
-    0x48, 0x83, 0xc4, 0x20,                /*add    32,%rsp            */
-    0x58,                                  /*pop    %rax               */
+    {
+      0x55,                                     /* push %rbp */
+      0x48, 0x89, 0xe5,                         /* mov %rsp,%rbp */
+      0x48, 0xba, 0,    0,    0, 0, 0, 0, 0, 0, /* movabs called_func,%rdx   */
+      0x48, 0xb9, 0,    0,    0, 0, 0, 0, 0, 0, /* movabs ctx,%rcx           */
+      0x49, 0xba, 0,    0,    0, 0, 0, 0, 0, 0, /* movabs <hook_address>,%r10*/
+      0x50,                                     /* push   %rax               */
+      0x48, 0x83, 0xec, 0x28,                   /* sub    40,%rsp            */
+      0x41, 0xff, 0xd2,                         /* callq  *%r10              */
+      0x49, 0x89, 0xc2,                         /* mov    %rax,%r10          */
+      0x48, 0x83, 0xc4, 0x28,                   /* add    40,%rsp            */
+      0x58,                                     /* pop    %rax               */
+      0x5d,                                     /* pop %rbp */
+    };
+  size_t call_func_offset = 6, ctx_offset = 16, hook_offset = 26;
 #endif
-  };
  uint8_t *addr;
+  VARR (uint8_t) * code;
+  void *res;

-  VARR_TRUNC (uint8_t, machine_insns, 0);
-#ifndef _WIN64
-  push_insns (ctx, push_rax, sizeof (push_rax));
+  VARR_CREATE (uint8_t, code, 128);
+#ifndef _WIN32
+  push_insns (code, push_rax, sizeof (push_rax));
 #endif
-  push_insns (ctx, save_pat, sizeof (save_pat));
-  addr = push_insns (ctx, call_pat, sizeof (call_pat));
-  memcpy (addr + 2, &called_func, sizeof (void *));
-  memcpy (addr + 12, &ctx, sizeof (void *));
-  memcpy (addr + 22, &hook_address, sizeof (void *));
-  push_insns (ctx, restore_pat, sizeof (restore_pat));
-  push_insns (ctx, wrap_end, sizeof (wrap_end));
-  return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
-                            VARR_LENGTH (uint8_t, machine_insns));
+  push_insns (code, save_pat, sizeof (save_pat));
+  addr = push_insns (code, call_pat, sizeof (call_pat));
+  memcpy (addr + call_func_offset, &called_func, sizeof (void *));
+  memcpy (addr + ctx_offset, &ctx, sizeof (void *));
+  memcpy (addr + hook_offset, &hook_address, sizeof (void *));
+  push_insns (code, restore_pat, sizeof (restore_pat));
+  push_insns (code, wrap_end, sizeof (wrap_end));
+  res = _MIR_publish_code (ctx, VARR_ADDR (uint8_t, code), VARR_LENGTH (uint8_t, code));
+  VARR_DESTROY (uint8_t, code);
+  return res;
 }
--- a/mir/mir.c
+++ b/mir/mir.c
--- a/mir/mir.h
+++ b/mir/mir.h
@ -1,11 +1,15 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com>.
 */

 #ifndef MIR_H

 #define MIR_H

+#if defined(_WIN32) && !defined(_WIN64)
+#error "MIR does not work on 32-bit Windows"
+#endif
+
 #include <stdio.h>
 #include <stdint.h>
 #include <assert.h>
@ -31,6 +35,15 @@ static inline int mir_assert (int cond) { return 0 && cond; }
 #define MIR_NO_SCAN 0
 #endif

+#ifndef MIR_PARALLEL_GEN
+#define MIR_PARALLEL_GEN 0
+#endif
+
+#if MIR_PARALLEL_GEN && defined(_WIN32) /* TODO: Win thread primitives ??? */
+#undef MIR_PARALLEL_GEN
+#define MIR_PARALLEL_GEN 0
+#endif
+
 #ifdef __GNUC__
 #define MIR_UNUSED __attribute__ ((unused))
 #else
@ -51,9 +64,9 @@ static inline int mir_assert (int cond) { return 0 && cond; }
 typedef enum MIR_error_type {
  REP8 (ERR_EL, no, syntax, binary_io, alloc, finish, no_module, nested_module, no_func),
  REP4 (ERR_EL, func, vararg_func, nested_func, wrong_param_value),
-  REP4 (ERR_EL, reserved_name, import_export, undeclared_func_reg, repeated_decl),
-  REP8 (ERR_EL, reg_type, unique_reg, undeclared_op_ref, ops_num, call_op, ret, op_mode, out_op),
-  ERR_EL (invalid_insn)
+  REP5 (ERR_EL, reserved_name, import_export, undeclared_func_reg, repeated_decl, reg_type),
+  REP6 (ERR_EL, wrong_type, unique_reg, undeclared_op_ref, ops_num, call_op, unspec_op),
+  REP6 (ERR_EL, ret, op_mode, out_op, invalid_insn, ctx_change, parallel)
 } MIR_error_type_t;

 #ifdef __GNUC__
@ -65,6 +78,31 @@ typedef enum MIR_error_type {
 typedef void MIR_NO_RETURN (*MIR_error_func_t) (MIR_error_type_t error_type, const char *format,
                                                ...);

+#if MIR_PARALLEL_GEN
+#include <pthread.h>
+typedef pthread_mutex_t mir_mutex_t;
+typedef pthread_cond_t mir_cond_t;
+typedef pthread_attr_t mir_thread_attr_t;
+#define mir_thread_create(m, attr, f, arg) pthread_create (m, attr, f, arg)
+#define mir_thread_join(t, r) pthread_join (t, r)
+#define mir_mutex_init(m, a) pthread_mutex_init (m, a)
+#define mir_mutex_destroy(m) pthread_mutex_destroy (m)
+#define mir_mutex_lock(m) pthread_mutex_lock (m)
+#define mir_mutex_unlock(m) pthread_mutex_unlock (m)
+#define mir_cond_init(m, a) pthread_cond_init (m, a)
+#define mir_cond_destroy(m) pthread_cond_destroy (m)
+#define mir_cond_wait(c, m) pthread_cond_wait (c, m)
+#define mir_cond_signal(c) pthread_cond_signal (c)
+#define mir_cond_broadcast(c) pthread_cond_broadcast (c)
+#define mir_thread_attr_init(a) pthread_attr_init (a)
+#define mir_thread_attr_setstacksize(a, s) pthread_attr_setstacksize (a, s)
+#else
+#define mir_mutex_init(m, a) 0
+#define mir_mutex_destroy(m) 0
+#define mir_mutex_lock(m) 0
+#define mir_mutex_unlock(m) 0
+#endif
+
 #define INSN_EL(i) MIR_##i

 /* The most MIR insns have destination operand and one or two source
@ -125,21 +163,26 @@ typedef enum {
  INSN_EL (ALLOCA),             /* 2 operands: result address and size  */
  REP2 (INSN_EL, BSTART, BEND), /* block start: result addr; block end: addr from block start */
  /* Special insns: */
-  INSN_EL (VA_ARG), /* result is arg address, operands: va_list addr and memory */
+  INSN_EL (VA_ARG),       /* result is arg address, operands: va_list addr and memory */
+  INSN_EL (VA_BLOCK_ARG), /* result is arg address, operands: va_list addr and integer (size) */
  INSN_EL (VA_START),
  INSN_EL (VA_END), /* operand is va_list */
  INSN_EL (LABEL),  /* One immediate operand is unique label number  */
+  INSN_EL (UNSPEC), /* First operand unspec code and the rest are args */
+  INSN_EL (PHI),    /* Used only internally in the generator, the first operand is output */
  INSN_EL (INVALID_INSN),
  INSN_EL (INSN_BOUND), /* Should be the last  */
 } MIR_insn_code_t;

 #define TYPE_EL(t) MIR_T_##t

+#define MIR_BLK_NUM 5
 /* Data types: */
 typedef enum {
  REP8 (TYPE_EL, I8, U8, I16, U16, I32, U32, I64, U64), /* Integer types of different size: */
  REP3 (TYPE_EL, F, D, LD),                             /* Float or (long) double type */
-  TYPE_EL (P),                                          /* Pointer */
+  REP2 (TYPE_EL, P, BLK),                               /* Pointer, memory blocks */
+  TYPE_EL (RBLK) = TYPE_EL (BLK) + MIR_BLK_NUM,         /* return block */
  REP2 (TYPE_EL, UNDEF, BOUND),
 } MIR_type_t;

@ -149,6 +192,9 @@ static inline int MIR_int_type_p (MIR_type_t t) {

 static inline int MIR_fp_type_p (MIR_type_t t) { return MIR_T_F <= t && t <= MIR_T_LD; }

+static inline int MIR_blk_type_p (MIR_type_t t) { return MIR_T_BLK <= t && t < MIR_T_RBLK; }
+static inline int MIR_all_blk_type_p (MIR_type_t t) { return MIR_T_BLK <= t && t <= MIR_T_RBLK; }
+
 #if UINTPTR_MAX == 0xffffffff
 #define MIR_PTR32 1
 #define MIR_PTR64 0
@ -256,8 +302,9 @@ struct MIR_insn {
 DEF_DLIST (MIR_insn_t, insn_link);

 typedef struct MIR_var {
-  MIR_type_t type;
+  MIR_type_t type; /* MIR_T_BLK .. MIR_T_RBLK can be used only args */
  const char *name;
+  size_t size; /* ignored for type != [MIR_T_BLK .. MIR_T_RBLK] */
 } MIR_var_t;

 DEF_VARR (MIR_var_t);
@ -273,6 +320,7 @@ typedef struct MIR_func {
  VARR (MIR_var_t) * vars; /* args and locals but temps */
  void *machine_code;      /* address of generated machine code or NULL */
  void *call_addr;         /* address to call the function, it can be the same as machine_code */
+  void *internal;          /* internal data structure */
 } * MIR_func_t;

 typedef struct MIR_proto {
@ -487,6 +535,8 @@ extern void MIR_insert_insn_before (MIR_context_t ctx, MIR_item_t func, MIR_insn
                                    MIR_insn_t insn);
 extern void MIR_remove_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn);

+extern void MIR_change_module_ctx (MIR_context_t old_ctx, MIR_module_t m, MIR_context_t new_ctx);
+
 extern MIR_insn_code_t MIR_reverse_branch_code (MIR_insn_code_t code);

 extern const char *MIR_type_str (MIR_context_t ctx, MIR_type_t tp);
@ -547,12 +597,17 @@ extern MIR_reg_t _MIR_new_temp_reg (MIR_context_t ctx, MIR_type_t type,
 extern size_t _MIR_type_size (MIR_context_t ctx, MIR_type_t type);
 extern MIR_op_mode_t _MIR_insn_code_op_mode (MIR_context_t ctx, MIR_insn_code_t code, size_t nop,
                                             int *out_p);
+extern MIR_insn_t _MIR_new_unspec_insn (MIR_context_t ctx, size_t nops, ...);
+extern void _MIR_register_unspec_insn (MIR_context_t ctx, uint64_t code, const char *name,
+                                       size_t nres, MIR_type_t *res_types, size_t nargs,
+                                       int vararg_p, MIR_var_t *args);
 extern void _MIR_duplicate_func_insns (MIR_context_t ctx, MIR_item_t func_item);
 extern void _MIR_restore_func_insns (MIR_context_t ctx, MIR_item_t func_item);
 extern void _MIR_simplify_insn (MIR_context_t ctx, MIR_item_t func_item, MIR_insn_t insn,
                                int keep_ref_p, int mem_float_p);

-extern const char *_MIR_get_temp_item_name (MIR_context_t ctx, MIR_module_t module);
+extern void _MIR_get_temp_item_name (MIR_context_t ctx, MIR_module_t module, char *buff,
+                                     size_t buff_len);

 extern MIR_op_t _MIR_new_hard_reg_op (MIR_context_t ctx, MIR_reg_t hard_reg);

@ -583,14 +638,20 @@ extern void _MIR_update_code_arr (MIR_context_t ctx, uint8_t *base, size_t nloc,
 extern void _MIR_update_code (MIR_context_t ctx, uint8_t *base, size_t nloc, ...);

 extern void *va_arg_builtin (void *p, uint64_t t);
+extern void va_block_arg_builtin (void *res, void *p, size_t s, uint64_t t);
 extern void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a);
 extern void va_end_interp_builtin (MIR_context_t ctx, void *p);

 extern void *_MIR_get_bstart_builtin (MIR_context_t ctx);
 extern void *_MIR_get_bend_builtin (MIR_context_t ctx);

+typedef struct {
+  MIR_type_t type;
+  size_t size; /* used only for block arg (type == [MIR_T_BLK ..  MIR_T_RBLK]) */
+} _MIR_arg_desc_t;
+
 extern void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
-                               MIR_type_t *arg_types, int vararg_p);
+                               _MIR_arg_desc_t *arg_descs, int vararg_p);
 extern void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler);
 extern void *_MIR_get_thunk (MIR_context_t ctx);
 extern void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to);
--- a/mir/real-time.h
+++ b/mir/real-time.h
@ -1,5 +1,5 @@
 /* This file is a part of MIR project.
-   Copyright (C) 2018-2020 Vladimir Makarov <vmakarov.gcc@gmail.com> and logzero <core13@gmx.net>
+   Copyright (C) 2018-2021 Vladimir Makarov <vmakarov.gcc@gmail.com> and logzero <core13@gmx.net>
 */

 #ifndef _WIN32
@ -19,7 +19,8 @@ static double __attribute__ ((unused)) real_usec_time (void) {
  return tv.tv_usec + tv.tv_sec * 1000000.0;
 }
 #else
-#include <profileapi.h>
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>

 // does not return actual time, use as a stopwatch only
 static double real_sec_time (void) {
--- a/patches/README.md
+++ b/patches/README.md
@ -0,0 +1,11 @@
+These patches are for Lua 5.3 and 5.4.
+
+The 'defer' patch adds the defer statement to Lua.
+
+Note that in Lua 5.4 versions prior to 5.4.3 a deferred closure may be called more than once
+just as the close method of a to-be-closed variable may be called more than once, when exiting the scope.
+
+I think this is fixed in Lua 5.4.3.
+
+The original patch for 5.4 is applicable to versions prior to 5.4.3.
+The 5.4.3 version has a new approach to the implementation of toclose values, hence a new patch had to be created.
--- a/patches/defer_statement_for_Lua_5_3.patch
+++ b/patches/defer_statement_for_Lua_5_3.patch
@ -0,0 +1,974 @@
+Index: testes/all.lua
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- testes/all.lua	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ testes/all.lua	(date 1594850137246)
+@@ -184,6 +184,7 @@
+ dofile('bitwise.lua')
+ assert(dofile('verybig.lua', true) == 10); collectgarbage()
+ dofile('files.lua')
+dofile('defer.lua')
+ 
+ if #msgs > 0 then
+   print("\ntests not performed:")
+Index: ldo.h
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- ldo.h	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ ldo.h	(date 1594850124117)
+@@ -53,6 +53,7 @@
+ 
+ LUAI_FUNC l_noret luaD_throw (lua_State *L, int errcode);
+ LUAI_FUNC int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud);
+LUAI_FUNC void luaD_seterrorobj (lua_State *L, int errcode, StkId oldtop);
+ 
+ #endif
+ 
+Index: ldo.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- ldo.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ ldo.c	(date 1594850137277)
+@@ -88,7 +88,7 @@
+ };
+ 
+ 
+-static void seterrorobj (lua_State *L, int errcode, StkId oldtop) {
+void luaD_seterrorobj (lua_State *L, int errcode, StkId oldtop) {
+   switch (errcode) {
+     case LUA_ERRMEM: {  /* memory error? */
+       setsvalue2s(L, oldtop, G(L)->memerrmsg); /* reuse preregistered msg. */
+@@ -98,6 +98,10 @@
+       setsvalue2s(L, oldtop, luaS_newliteral(L, "error in error handling"));
+       break;
+     }
+    case CLOSEPROTECT: {
+      setnilvalue(oldtop);  /* no error message */
+      break;
+    }
+     default: {
+       setobjs2s(L, oldtop, L->top - 1);  /* error message on current top */
+       break;
+@@ -114,6 +118,7 @@
+   }
+   else {  /* thread has no error handler */
+     global_State *g = G(L);
+    errcode = luaF_close(L, L->stack, errcode);  /* close all upvalues */
+     L->status = cast_byte(errcode);  /* mark it as dead */
+     if (g->mainthread->errorJmp) {  /* main thread has a handler? */
+       setobjs2s(L, g->mainthread->top++, L->top - 1);  /* copy error obj. */
+@@ -121,7 +126,7 @@
+     }
+     else {  /* no handler at all; abort */
+       if (g->panic) {  /* panic function? */
+-        seterrorobj(L, errcode, L->top);  /* assume EXTRA_STACK */
+        luaD_seterrorobj(L, errcode, L->top);  /* assume EXTRA_STACK */
+         if (L->ci->top < L->top)
+           L->ci->top = L->top;  /* pushing msg. can break this invariant */
+         lua_unlock(L);
+@@ -584,8 +589,8 @@
+   if (ci == NULL) return 0;  /* no recovery point */
+   /* "finish" luaD_pcall */
+   oldtop = restorestack(L, ci->extra);
+-  luaF_close(L, oldtop);
+-  seterrorobj(L, status, oldtop);
+  luaF_close(L, oldtop, status);
+  luaD_seterrorobj(L, status, oldtop);
+   L->ci = ci;
+   L->allowhook = getoah(ci->callstatus);  /* restore original 'allowhook' */
+   L->nny = 0;  /* should be zero to be yieldable */
+@@ -662,19 +667,17 @@
+   L->nny = 0;  /* allow yields */
+   api_checknelems(L, (L->status == LUA_OK) ? nargs + 1 : nargs);
+   status = luaD_rawrunprotected(L, resume, &nargs);
+-  if (status == -1)  /* error calling 'lua_resume'? */
+-    status = LUA_ERRRUN;
+-  else {  /* continue running after recoverable errors */
+-    while (errorstatus(status) && recover(L, status)) {
+-      /* unroll continuation */
+-      status = luaD_rawrunprotected(L, unroll, &status);
+-    }
+-    if (errorstatus(status)) {  /* unrecoverable error? */
+-      L->status = cast_byte(status);  /* mark thread as 'dead' */
+-      seterrorobj(L, status, L->top);  /* push error message */
+-      L->ci->top = L->top;
+-    }
+-    else lua_assert(status == L->status);  /* normal end or yield */
+  /* continue running after recoverable errors */
+  while (errorstatus(status) && recover(L, status)) {
+    /* unroll continuation */
+    status = luaD_rawrunprotected(L, unroll, &status);
+  }
+  if (!errorstatus(status))
+    lua_assert(status == L->status);  /* normal end or yield */
+  else {  /* unrecoverable error */
+    L->status = cast_byte(status);  /* mark thread as 'dead' */
+    luaD_seterrorobj(L, status, L->top);  /* push error message */
+    L->ci->top = L->top;
+   }
+   L->nny = oldnny;  /* restore 'nny' */
+   L->nCcalls--;
+@@ -729,11 +732,12 @@
+   status = luaD_rawrunprotected(L, func, u);
+   if (status != LUA_OK) {  /* an error occurred? */
+     StkId oldtop = restorestack(L, old_top);
+-    luaF_close(L, oldtop);  /* close possible pending closures */
+-    seterrorobj(L, status, oldtop);
+     L->ci = old_ci;
+     L->allowhook = old_allowhooks;
+     L->nny = old_nny;
+    status = luaF_close(L, oldtop, status);  /* close possible pending closures */
+    oldtop = restorestack(L, old_top);
+    luaD_seterrorobj(L, status, oldtop);
+     luaD_shrinkstack(L);
+   }
+   L->errfunc = old_errfunc;
+Index: lfunc.h
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lfunc.h	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lfunc.h	(date 1594850137293)
+@@ -34,7 +34,8 @@
+ */
+ struct UpVal {
+   TValue *v;  /* points to stack or to its own value */
+-  lu_mem refcount;  /* reference counter */
+  unsigned int refcount;  /* reference counter */
+  unsigned int flags; /* Used to mark deferred values */
+   union {
+     struct {  /* (when open) */
+       UpVal *next;  /* linked list */
+@@ -46,13 +47,22 @@
+ 
+ #define upisopen(up)	((up)->v != &(up)->u.value)
+ 
+/*
+** Special "status" for 'luaF_close'
+*/
+
+/* close upvalues without running their closing methods */
+#define NOCLOSINGMETH	(-1)
+
+/* close upvalues running all closing methods in protected mode */
+#define CLOSEPROTECT	(-2)
+ 
+ LUAI_FUNC Proto *luaF_newproto (lua_State *L);
+ LUAI_FUNC CClosure *luaF_newCclosure (lua_State *L, int nelems);
+ LUAI_FUNC LClosure *luaF_newLclosure (lua_State *L, int nelems);
+ LUAI_FUNC void luaF_initupvals (lua_State *L, LClosure *cl);
+ LUAI_FUNC UpVal *luaF_findupval (lua_State *L, StkId level);
+-LUAI_FUNC void luaF_close (lua_State *L, StkId level);
+LUAI_FUNC int luaF_close (lua_State *L, StkId level, int status);
+ LUAI_FUNC void luaF_freeproto (lua_State *L, Proto *f);
+ LUAI_FUNC const char *luaF_getlocalname (const Proto *func, int local_number,
+                                          int pc);
+Index: lstate.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lstate.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lstate.c	(date 1594850137347)
+@@ -241,7 +241,7 @@
+ 
+ static void close_state (lua_State *L) {
+   global_State *g = G(L);
+-  luaF_close(L, L->stack);  /* close all upvalues for this thread */
+  luaF_close(L, L->stack, CLOSEPROTECT);  /* close all upvalues for this thread */
+   luaC_freeallobjects(L);  /* collect all objects */
+   if (g->version)  /* closing a fully built state? */
+     luai_userstateclose(L);
+@@ -284,13 +284,33 @@
+ 
+ void luaE_freethread (lua_State *L, lua_State *L1) {
+   LX *l = fromstate(L1);
+-  luaF_close(L1, L1->stack);  /* close all upvalues for this thread */
+  luaF_close(L1, L1->stack, NOCLOSINGMETH);  /* close all upvalues for this thread */
+   lua_assert(L1->openupval == NULL);
+   luai_userstatefree(L, L1);
+   freestack(L1);
+   luaM_free(L, l);
+ }
+ 
+int lua_resetthread (lua_State *L) {
+  CallInfo *ci;
+  int status;
+  lua_lock(L);
+  L->ci = ci = &L->base_ci;  /* unwind CallInfo list */
+  setnilvalue(L->stack);  /* 'function' entry for basic 'ci' */
+  ci->func = L->stack;
+  ci->callstatus = 0;
+  status = luaF_close(L, L->stack, CLOSEPROTECT);
+  if (status != CLOSEPROTECT)  /* real errors? */
+    luaD_seterrorobj(L, status, L->stack + 1);
+  else {
+    status = LUA_OK;
+    L->top = L->stack + 1;
+  }
+  ci->top = L->top + LUA_MINSTACK;
+  L->status = status;
+  lua_unlock(L);
+  return status;
+}
+ 
+ LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {
+   int i;
+Index: lfunc.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lfunc.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lfunc.c	(date 1594850137293)
+@@ -14,6 +14,7 @@
+ 
+ #include "lua.h"
+ 
+#include "ldo.h"
+ #include "lfunc.h"
+ #include "lgc.h"
+ #include "lmem.h"
+@@ -61,13 +62,15 @@
+   lua_assert(isintwups(L) || L->openupval == NULL);
+   while (*pp != NULL && (p = *pp)->v >= level) {
+     lua_assert(upisopen(p));
+-    if (p->v == level)  /* found a corresponding upvalue? */
+-      return p;  /* return it */
+    if (p->v == level && !p->flags)  /* found a corresponding upvalue that is not a deferred value? */ {
+      return p; /* return it */
+    }
+     pp = &p->u.open.next;
+   }
+   /* not found: create a new upvalue */
+   uv = luaM_new(L, UpVal);
+   uv->refcount = 0;
+  uv->flags = 0;
+   uv->u.open.next = *pp;  /* link it to list of open upvalues */
+   uv->u.open.touched = 1;
+   *pp = uv;
+@@ -79,20 +82,84 @@
+   return uv;
+ }
+ 
+static void calldeferred(lua_State *L, void *ud) {
+  UNUSED(ud);
+  luaD_callnoyield(L, L->top - 2, 0);
+}
+
+/*
+** Prepare deferred function plus its arguments for object 'obj' with
+** error message 'err'. (This function assumes EXTRA_STACK.)
+*/
+static int preparetocall(lua_State *L, TValue *func, TValue *err) {
+  StkId top = L->top;
+  setobj2s(L, top, func);  /* will call deferred function */
+  if (err) {
+    setobj2s(L, top + 1, err); /* and error msg. as 1st argument */
+  }
+  else {
+    setnilvalue(top + 1);
+  }
+  L->top = top + 2;  /* add function and arguments */
+  return 1;
+}
+ 
+-void luaF_close (lua_State *L, StkId level) {
+/*
+** Prepare and call a deferred function. If status is OK, code is still
+** inside the original protected call, and so any error will be handled
+** there. Otherwise, a previous error already activated the original
+** protected call, and so the call to the deferred method must be
+** protected here. (A status == -1 behaves like a previous
+** error, to also run the closing method in protected mode).
+** If status is OK, the call to the deferred method will be pushed
+** at the top of the stack. Otherwise, values are pushed after
+** the 'level' of the upvalue containing deferred function, as everything after
+** that won't be used again.
+*/
+static int calldeferredfunction(lua_State *L, StkId level, int status) {
+  TValue *uv = level; /* value being closed */
+  if (status == LUA_OK) {
+    preparetocall(L, uv, NULL); /* something to call? */
+    calldeferred(L, NULL);      /* call closing method */
+  }
+  else { /* must close the object in protected mode */
+    ptrdiff_t oldtop;
+    level++;                            /* space for error message */
+    oldtop = savestack(L, level + 1);   /* top will be after that */
+    luaD_seterrorobj(L, status, level); /* set error message */
+    preparetocall(L, uv, level);
+    int newstatus = luaD_pcall(L, calldeferred, NULL, oldtop, 0);
+    if (newstatus != LUA_OK && status == CLOSEPROTECT) /* first error? */
+      status = newstatus;                    /* this will be the new error */
+    else {
+      /* leave original error (or nil) on top */
+      L->top = restorestack(L, oldtop);
+    }
+  }
+  return status;
+}
+
+int luaF_close (lua_State *L, StkId level, int status) {
+   UpVal *uv;
+   while (L->openupval != NULL && (uv = L->openupval)->v >= level) {
+     lua_assert(upisopen(uv));
+     L->openupval = uv->u.open.next;  /* remove from 'open' list */
+-    if (uv->refcount == 0)  /* no references? */
+-      luaM_free(L, uv);  /* free upvalue */
+    if (uv->refcount == 0) {        /* no references? */
+      UpVal uv1 = *uv;              /* copy the upvalue as we will free it below */
+      luaM_free(L, uv);             /* free upvalue before invoking any deferred functions */
+      if (status != NOCLOSINGMETH && uv1.flags && ttisfunction(uv1.v)) {
+        ptrdiff_t levelrel = savestack(L, level);
+        status = calldeferredfunction(L, uv1.v, status);
+        level = restorestack(L, levelrel);
+      }
+    }
+     else {
+       setobj(L, &uv->u.value, uv->v);  /* move value to upvalue slot */
+       uv->v = &uv->u.value;  /* now current value lives here */
+       luaC_upvalbarrier(L, uv);
+     }
+   }
+  return status;
+ }
+ 
+ 
+Index: llex.h
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- llex.h	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ llex.h	(date 1594850124176)
+@@ -27,7 +27,7 @@
+   /* terminal symbols denoted by reserved words */
+   TK_AND = FIRST_RESERVED, TK_BREAK,
+   TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
+-  TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
+  TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_DEFER, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
+   TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
+   /* other terminal symbols */
+   TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
+Index: lparser.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lparser.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lparser.c	(date 1594850137347)
+@@ -52,6 +52,7 @@
+   lu_byte nactvar;  /* # active locals outside the block */
+   lu_byte upval;  /* true if some variable in the block is an upvalue */
+   lu_byte isloop;  /* true if 'block' is a loop */
+  lu_byte insidetbc;  /* true if inside the scope of a defer stmt (i.e. defer closure var) */
+ } BlockCnt;
+ 
+ 
+@@ -442,6 +443,7 @@
+   bl->firstlabel = fs->ls->dyd->label.n;
+   bl->firstgoto = fs->ls->dyd->gt.n;
+   bl->upval = 0;
+  bl->insidetbc = (fs->bl != NULL && fs->bl->insidetbc);
+   bl->previous = fs->bl;
+   fs->bl = bl;
+   lua_assert(fs->freereg == fs->nactvar);
+@@ -519,10 +521,17 @@
+ ** so that, if it invokes the GC, the GC knows which registers
+ ** are in use at that time.
+ */
+-static void codeclosure (LexState *ls, expdesc *v) {
+static void codeclosure (LexState *ls, expdesc *v, int deferred) {
+   FuncState *fs = ls->fs->prev;
+  int pc = -1;
+  if (deferred) {
+    pc = luaK_codeABC(fs, OP_DEFER, 0, 0, 0);
+  }
+   init_exp(v, VRELOCABLE, luaK_codeABx(fs, OP_CLOSURE, 0, fs->np - 1));
+   luaK_exp2nextreg(fs, v);  /* fix it at the last register */
+  if (deferred) {
+    SETARG_A(fs->f->code[pc], v->u.info);
+  }
+ }
+ 
+ 
+@@ -780,24 +789,26 @@
+ }
+ 
+ 
+-static void body (LexState *ls, expdesc *e, int ismethod, int line) {
+static void body (LexState *ls, expdesc *e, int ismethod, int line, int deferred) {
+   /* body ->  '(' parlist ')' block END */
+   FuncState new_fs;
+   BlockCnt bl;
+   new_fs.f = addprototype(ls);
+   new_fs.f->linedefined = line;
+   open_func(ls, &new_fs, &bl);
+-  checknext(ls, '(');
+-  if (ismethod) {
+-    new_localvarliteral(ls, "self");  /* create 'self' parameter */
+-    adjustlocalvars(ls, 1);
+-  }
+-  parlist(ls);
+-  checknext(ls, ')');
+  if (!deferred) {
+    checknext(ls, '(');
+    if (ismethod) {
+      new_localvarliteral(ls, "self");  /* create 'self' parameter */
+      adjustlocalvars(ls, 1);
+    }
+    parlist(ls);
+    checknext(ls, ')');
+  }
+   statlist(ls);
+   new_fs.f->lastlinedefined = ls->linenumber;
+   check_match(ls, TK_END, TK_FUNCTION, line);
+-  codeclosure(ls, e);
+  codeclosure(ls, e, deferred);
+   close_func(ls);
+ }
+ 
+@@ -972,7 +983,7 @@
+     }
+     case TK_FUNCTION: {
+       luaX_next(ls);
+-      body(ls, v, 0, ls->linenumber);
+      body(ls, v, 0, ls->linenumber, 0);
+       return;
+     }
+     default: {
+@@ -1429,12 +1440,19 @@
+ }
+ 
+ 
+-static void localfunc (LexState *ls) {
+static void localfunc (LexState *ls, int defer) {
+   expdesc b;
+   FuncState *fs = ls->fs;
+-  new_localvar(ls, str_checkname(ls));  /* new local variable */
+  if (defer) {
+    static const char funcname[] = "(deferred function)";
+    new_localvar(ls, luaX_newstring(ls, funcname, sizeof funcname-1));  /* new local variable */
+    markupval(fs, fs->nactvar);
+    fs->bl->insidetbc = 1;  /* in the scope of a defer closure variable */
+  } else {
+    new_localvar(ls, str_checkname(ls));  /* new local variable */
+  }
+   adjustlocalvars(ls, 1);  /* enter its scope */
+-  body(ls, &b, 0, ls->linenumber);  /* function created in next register */
+  body(ls, &b, 0, ls->linenumber, defer);  /* function created in next register */
+   /* debug information will only see the variable after this point! */
+   getlocvar(fs, b.u.info)->startpc = fs->pc;
+ }
+@@ -1480,7 +1498,7 @@
+   expdesc v, b;
+   luaX_next(ls);  /* skip FUNCTION */
+   ismethod = funcname(ls, &v);
+-  body(ls, &b, ismethod, line);
+  body(ls, &b, ismethod, line, 0);
+   luaK_storevar(ls->fs, &v, &b);
+   luaK_fixline(ls->fs, line);  /* definition "happens" in the first line */
+ }
+@@ -1513,7 +1531,7 @@
+     nret = explist(ls, &e);  /* optional return values */
+     if (hasmultret(e.k)) {
+       luaK_setmultret(fs, &e);
+-      if (e.k == VCALL && nret == 1) {  /* tail call? */
+      if (e.k == VCALL && nret == 1 && !fs->bl->insidetbc) {  /* tail call? */
+         SET_OPCODE(getinstruction(fs,&e), OP_TAILCALL);
+         lua_assert(GETARG_A(getinstruction(fs,&e)) == fs->nactvar);
+       }
+@@ -1572,10 +1590,15 @@
+     case TK_LOCAL: {  /* stat -> localstat */
+       luaX_next(ls);  /* skip LOCAL */
+       if (testnext(ls, TK_FUNCTION))  /* local function? */
+-        localfunc(ls);
+        localfunc(ls, 0);
+       else
+         localstat(ls);
+       break;
+    }
+    case TK_DEFER: {  /* stat -> deferstat */
+      luaX_next(ls);  /* skip DEFER */
+      localfunc(ls, 1);
+      break;
+     }
+     case TK_DBCOLON: {  /* stat -> label */
+       luaX_next(ls);  /* skip double colon */
+Index: llex.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- llex.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ llex.c	(date 1594850124157)
+@@ -40,7 +40,7 @@
+ static const char *const luaX_tokens [] = {
+     "and", "break", "do", "else", "elseif",
+     "end", "false", "for", "function", "goto", "if",
+-    "in", "local", "nil", "not", "or", "repeat",
+    "in", "local", "defer", "nil", "not", "or", "repeat",
+     "return", "then", "true", "until", "while",
+     "//", "..", "...", "==", ">=", "<=", "~=",
+     "<<", ">>", "::", "<eof>",
+Index: testes/defer.lua
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- testes/defer.lua	(date 1594849634130)
+++ testes/defer.lua	(date 1594849634130)
+@@ -0,0 +1,313 @@
+-- ================================================================
+-- Following section is an extract from the code.lua test 
+-- These functions test bytecode generation, and also provide
+-- helper routines that we use later on in other test cases
+
+-- testing opcodes
+function check (f, ...)
+  if not T then
+    return true
+  end
+  local arg = {...}
+  local c = T.listcode(f)
+  for i=1, #arg do
+    --print(arg[i], c[i])
+    opcodes_coverage[arg[i]] = opcodes_coverage[arg[i]]+1
+    assert(string.find(c[i], '- '..arg[i]..' *[AB][xs]?=%d'))
+  end
+  assert(c[#arg+2] == nil)
+end
+
+-- Test defer statement
+do
+    local y = 0
+    local function x()
+        defer y = y + 1 end
+        defer y = y + 1 end
+    end
+    check(x, 'DEFER', 'CLOSURE', 'DEFER', 'CLOSURE', 'RETURN')
+    x()
+    assert(y == 2)
+    print 'Test 1 OK'
+end
+
+-- Test defer statement
+do
+    local y = 0
+    local function x()
+        defer y = y + 1 end
+        error('raise error')
+        defer y = y + 2 end -- will not be called
+    end
+    pcall(x)
+    assert(y == 1)
+    print 'Test 2 OK'
+end
+
+-- Test defer statement
+do
+    local y = 0
+    local function x()
+        defer y = y + 1 end
+        defer y = y + 2; error('err') end 
+        defer y = y + 3 end
+    end
+    pcall(x)
+    assert(y == 6)
+    print 'Test 3 OK'
+end
+
+-- Test defer statement in tailcalls
+do
+    local y = 0
+    local function x (n)
+        defer y = y + 1 end
+        if n > 0 then return x(n - 1) end
+    end
+    pcall(x, 3)
+    assert(y == 4)
+    print 'Test 4 OK'
+end
+
+-- Simulate a test of resource closure with defer
+do
+    local y = 0
+    local z = { count = 0 }
+    z.__index = z;
+    function z:new()
+        local object = {}
+        setmetatable(object, z)
+        return object
+    end
+    function z:open(arg) 
+        if (arg) then
+            z.count = z.count + 1
+            return
+        end
+        y = 1
+        error('error opening')
+    end
+    function z.close()
+        z.count = z.count - 1
+    end
+    local function x(arg) 
+        local f = z:new()
+        f:open(arg)
+        assert(z.count == 1)
+        defer f:close() end
+    end
+    x('filename')
+    assert(y == 0)
+    assert(z.count == 0)
+    pcall(x, false)
+    assert(z.count == 0)
+    assert(y == 1)
+    print 'Test 5 OK'
+end
+
+--- Test stack reallocation in defer statement
+do
+    local function x(a) if a <= 0 then return else x(a-1) end end
+    local y = 1000
+    local function z(...)
+        -- recursive call to make stack
+	    defer x(y) end
+	    return ...
+    end
+    do
+        local a,b,c = z(1,2,3)
+        assert(a == 1 and b == 2 and c == 3)
+        a,b,c = z(3,2,1)
+        assert(a == 3 and b == 2 and c == 1)
+    end
+    print 'Test 6 OK'
+end
+
+-- Adapted from Lua 5.4
+local function stack(n) n = ((n == 0) or stack(n - 1)) end
+
+local function func2close (f, x, y)
+    local obj = setmetatable({}, {__close = f})
+    if x then
+        return x, obj, y
+    else
+        return obj
+    end
+end
+
+do
+    local function t() 
+        local a = {}
+        do
+            local b = false   -- not to be closed
+            -- x is <close>
+            local x = setmetatable({"x"}, {__close = function (self)
+                                                    a[#a + 1] = self[1] end})
+            defer getmetatable(x).__close(x) end
+            -- y is <close>
+            local w, y, z = func2close(function (self, err)
+                                    assert(err == nil); a[#a + 1] = "y"
+                                end, 10, 20)
+            defer getmetatable(y).__close(y) end
+            local c = nil  -- not to be closed
+            a[#a + 1] = "in"
+            assert(w == 10 and z == 20)
+        end
+        a[#a + 1] = "out"
+        assert(a[1] == "in" and a[2] == "y" and a[3] == "x" and a[4] == "out")
+    end
+    t()
+    print 'Test 7 OK'
+end
+
+do
+    local function t()
+    local X = false
+
+    local x, closescope = func2close(function () stack(10); X = true end, 100)
+    assert(x == 100);  x = 101;   -- 'x' is not read-only
+
+    -- closing functions do not corrupt returning values
+    local function foo (x)
+        local _ = closescope
+        defer getmetatable(_).__close(_) end
+        return x, X, 23
+    end
+
+    local a, b, c = foo(1.5)
+    assert(a == 1.5 and b == false and c == 23 and X == true)
+
+    X = false
+    foo = function (x)
+        local _ = closescope
+        defer getmetatable(_).__close(_) end
+        local y = 15
+        return y
+    end
+
+    assert(foo() == 15 and X == true)
+
+    X = false
+    foo = function ()
+        local x = closescope
+        defer getmetatable(x).__close(x) end
+        return x
+    end
+
+    assert(foo() == closescope and X == true)
+    end
+    t()
+    print 'Test 8 OK'
+end
+
+do
+    local function t()
+        -- calls cannot be tail in the scope of to-be-closed variables
+        local X, Y
+        local function foo ()
+            local _ = func2close(function () Y = 10 end)
+            defer getmetatable(_).__close(_) end
+            assert(X == true and Y == nil)    -- 'X' not closed yet
+            return 1,2,3
+        end
+
+        local function bar ()
+            local _ = func2close(function () X = false end)
+            defer getmetatable(_).__close(_) end
+            X = true
+            do
+                return foo()    -- not a tail call!
+            end
+        end
+        
+        local a, b, c, d = bar()
+        assert(a == 1 and b == 2 and c == 3 and X == false and Y == 10 and d == nil)
+        return foo, bar
+    end
+    local f,b = t()
+    print 'Test 9 OK'
+end
+
+do
+    local function t()
+        -- an error in a wrapped coroutine closes variables
+        local x = false
+        local y = false
+        local co = coroutine.wrap(function ()
+            local xv = func2close(function () x = true end)
+            defer getmetatable(xv).__close(xv) end
+            do
+                local yv = func2close(function () y = true end)
+                defer getmetatable(yv).__close(yv) end
+                coroutine.yield(100)   -- yield doesn't close variable
+            end
+            coroutine.yield(200)   -- yield doesn't close variable
+            error(23)              -- error does
+        end)
+
+        local b = co()
+        assert(b == 100 and not x and not y)
+        b = co()
+        assert(b == 200 and not x and y)
+        local a, b = pcall(co)
+        assert(not a and b == 23 and x and y)
+    end
+    t()
+    print 'Test 10 OK'
+end
+
+-- a suspended coroutine should not close its variables when collected
+do
+    function t()
+        local co
+        co = coroutine.wrap(function()
+            -- should not run
+            local x = func2close(function () os.exit(false) end)
+            defer getmetatable(x).__close(x) end
+            co = nil
+            coroutine.yield()
+        end)
+        co()                 -- start coroutine
+        assert(co == nil)    -- eventually it will be collected
+        collectgarbage()
+    end
+    t()
+    print 'Test 11 OK'
+end
+
+do
+    local function t()
+        -- error in a wrapped coroutine raising errors when closing a variable
+        local x = 0
+        local co = coroutine.wrap(function ()
+            local xx = func2close(function () x = x + 1; error("@YYY") end)
+            defer getmetatable(xx).__close(xx) end
+            local xv = func2close(function () x = x + 1; error("@XXX") end)
+            defer getmetatable(xv).__close(xv) end
+            coroutine.yield(100)
+            error(200)
+        end)
+        assert(co() == 100); assert(x == 0)
+        local st, msg = pcall(co); assert(x == 2)
+        assert(not st and msg == 200)   -- should get first error raised
+
+        local x = 0
+        local y = 0
+        co = coroutine.wrap(function ()
+            local xx = func2close(function () y = y + 1; error("YYY") end)
+            defer getmetatable(xx).__close(xx) end
+            local xv = func2close(function () x = x + 1; error("XXX") end)
+            defer getmetatable(xv).__close(xv) end
+            coroutine.yield(100)
+            return 200
+        end)
+        assert(co() == 100); assert(x == 0)
+        local st, msg = pcall(co)
+        assert(not st and string.find(msg, "%w+%.%w+:%d+: XXX"))
+        assert(x == 1 and y == 1)  
+    end
+    t()
+    print 'Test 12 OK'
+end
+
+print 'OK'
+Index: lopcodes.h
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lopcodes.h	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lopcodes.h	(date 1594850124198)
+@@ -230,11 +230,13 @@
+ 
+ OP_VARARG,/*	A B	R(A), R(A+1), ..., R(A+B-2) = vararg		*/
+ 
+-OP_EXTRAARG/*	Ax	extra (larger) argument for previous opcode	*/
+OP_EXTRAARG,/*	Ax	extra (larger) argument for previous opcode	*/
+OP_DEFER    /*  A   mark variable A "deferred"	  */
+
+ } OpCode;
+ 
+ 
+-#define NUM_OPCODES	(cast(int, OP_EXTRAARG) + 1)
+#define NUM_OPCODES	(cast(int, OP_DEFER) + 1)
+ 
+ 
+ 
+Index: lvm.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lvm.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lvm.c	(date 1594850137378)
+@@ -737,7 +737,7 @@
+ /* execute a jump instruction */
+ #define dojump(ci,i,e) \
+   { int a = GETARG_A(i); \
+-    if (a != 0) luaF_close(L, ci->u.l.base + a - 1); \
+    if (a != 0) Protect(luaF_close(L, ci->u.l.base + a - 1, LUA_OK)); \
+     ci->u.l.savedpc += GETARG_sBx(i) + e; }
+ 
+ /* for test instructions, execute the jump instruction that follows it */
+@@ -1159,7 +1159,7 @@
+           StkId lim = nci->u.l.base + getproto(nfunc)->numparams;
+           int aux;
+           /* close all upvalues from previous call */
+-          if (cl->p->sizep > 0) luaF_close(L, oci->u.l.base);
+          if (cl->p->sizep > 0) Protect(luaF_close(L, oci->u.l.base, NOCLOSINGMETH));
+           /* move new frame into old one */
+           for (aux = 0; nfunc + aux < lim; aux++)
+             setobjs2s(L, ofunc + aux, nfunc + aux);
+@@ -1175,7 +1175,10 @@
+       }
+       vmcase(OP_RETURN) {
+         int b = GETARG_B(i);
+-        if (cl->p->sizep > 0) luaF_close(L, base);
+        if (cl->p->sizep > 0) {
+          Protect(luaF_close(L, base, LUA_OK));
+          ra = RA(i);
+        }
+         b = luaD_poscall(L, ci, ra, (b != 0 ? b - 1 : cast_int(L->top - ra)));
+         if (ci->callstatus & CIST_FRESH)  /* local 'ci' still from callee */
+           return;  /* external invocation: return */
+@@ -1313,6 +1316,12 @@
+       vmcase(OP_EXTRAARG) {
+         lua_assert(0);
+         vmbreak;
+      }
+      vmcase(OP_DEFER) {
+        UpVal *up = luaF_findupval(L, ra); /* create new upvalue */
+        up->flags = 1;  /* mark it as deferred */
+        setnilvalue(ra);  /* initialize it with nil */
+        vmbreak;
+       }
+     }
+   }
+Index: lcorolib.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lcorolib.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lcorolib.c	(date 1594850137262)
+@@ -75,8 +75,11 @@
+   lua_State *co = lua_tothread(L, lua_upvalueindex(1));
+   int r = auxresume(L, co, lua_gettop(L));
+   if (r < 0) {
+    int stat = lua_status(co);
+    if (stat != LUA_OK && stat != LUA_YIELD)
+      lua_resetthread(co);  /* close variables in case of errors */
+     if (lua_type(L, -1) == LUA_TSTRING) {  /* error object is a string? */
+-      luaL_where(L, 1);  /* add extra info */
+      luaL_where(L, 1);  /* add extra info, if available */
+       lua_insert(L, -2);
+       lua_concat(L, 2);
+     }
+Index: lua.h
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lua.h	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lua.h	(date 1594850137362)
+@@ -144,6 +144,7 @@
+ LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud);
+ LUA_API void       (lua_close) (lua_State *L);
+ LUA_API lua_State *(lua_newthread) (lua_State *L);
+LUA_API int        (lua_resetthread) (lua_State *L);
+ 
+ LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf);
+ 
+Index: lopcodes.c
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- lopcodes.c	(revision e7411fab800e2cfa810a1ba296356532eabdde40)
+++ lopcodes.c	(date 1594850124178)
+@@ -65,6 +65,7 @@
+   "CLOSURE",
+   "VARARG",
+   "EXTRAARG",
+  "DEFER",
+   NULL
+ };
+ 
+@@ -119,6 +120,7 @@
+  ,opmode(0, 0, OpArgU, OpArgU, iABC)		/* OP_SETLIST */
+  ,opmode(0, 1, OpArgU, OpArgN, iABx)		/* OP_CLOSURE */
+  ,opmode(0, 1, OpArgU, OpArgN, iABC)		/* OP_VARARG */
+- ,opmode(0, 0, OpArgU, OpArgU, iAx)		/* OP_EXTRAARG */
+ ,opmode(0, 0, OpArgU, OpArgU, iAx)		  /* OP_EXTRAARG */
+ ,opmode(0, 1, OpArgN, OpArgN, iABC)		/* OP_DEFER */
+ };
+ 
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Pavel R.	7adbc171f6	changed types to strict (nil disallowed) in lcode.c	3 years ago
Pavel R.	072d2c37f0	Fixed possibility to initialize empty strings (and booleans)	3 years ago
Pavel R.	21fd437e89	added missing optional types opcodes to JIT interface	3 years ago
annelin	73de201165	added new (optional) types to lparser.c (no idea what does it affect)	3 years ago
annelin	be65484feb	Removed RAVI_DEFER_STATEMENT define (there are no reasons to disable it)	3 years ago
annelin	7d376acf62	Added `debug.settype(typ, metatable)` function	3 years ago
Pavel R.	3995761bf1	Add README.md	3 years ago
Pavel R.	5359ca1d5f	- Added boolean type - Types now can be optional (example: string?) - Non-optional types now are more strict (e.g. you can not pass nil to function that expects string)	3 years ago
Dibyendu Majumdar	d022074ae6	Update Lua 5.4.3 defer patch - minor change to remove some leftover stuff	3 years ago
Dibyendu Majumdar	cf3be2e688	Update README.rst	3 years ago
Dibyendu Majumdar	bbf2c29077	Lua 5.4.3 defer statement patch	3 years ago
Dibyendu Majumdar	2c81edafa0	issue #169 Sync with upstream project	3 years ago
Dibyendu Majumdar	b073242254	issue #198 Sync with upstream project	3 years ago
Dibyendu Majumdar	6eb43324dd	issue #169 Add GNUInstallDirs	3 years ago
Dibyendu Majumdar	4bfc061a2a	issue #169 Apply upstream fixes	3 years ago
Dibyendu Majumdar	3efaddee6b	issue #222 Lua 5.4 change for bitwise ops	3 years ago
Dibyendu Majumdar	d5e324fd53	issue #217 fix the ravicomp compiler to use the same typecodes as Ravi	3 years ago
Dibyendu Majumdar	e3d8203bd9	Refactor	3 years ago
Dibyendu Majumdar	53ec0c19b3	Merge pull request #218 from XmiliaH/bump-binary-version Bump binary version & smaller sizes for stripped binaries	3 years ago
XmiliaH	5ca801596a	Bump binary version & smaller sizes for stripped binaries	3 years ago
Dibyendu Majumdar	58980db5cb	issue #217 define ravitype_t in terms of the new values	3 years ago
Dibyendu Majumdar	347ae985bc	Merge pull request #213 from XmiliaH/type-maps Change ravi_type to a Bitmap	3 years ago
Dibyendu Majumdar	063a55604e	Merge branch 'new_type_info' into type-maps	3 years ago
Dibyendu Majumdar	8e815bd67a	issue #215 adapt fix by XmiliaH from pull request	3 years ago
Dibyendu Majumdar	658f04c3d8	issue #215 adapt fix by XmiliaH from pull request	3 years ago
Dibyendu Majumdar	8fd3a1bbab	issue #214 since we haven't implemented boolean type annotation fully, the parser will no longer recognize the :boolean annotation.	3 years ago
Dibyendu Majumdar	b5afdfaa46	issue #215 The top level check v->ravi_type != vartype is not correct as it means we miss out checking scenario where v->k == VINDEXED.	3 years ago
Dibyendu Majumdar	88ccaf34a0	Reformat	3 years ago
Dibyendu Majumdar	5c30d255c7	some improvements to type checking via XmiliaH	3 years ago
Dibyendu Majumdar	248c730c43	refactoring via XmiliaH	3 years ago
Dibyendu Majumdar	4a20693671	refactoring via XmiliaH	3 years ago
Dibyendu Majumdar	cb6943a1eb	tests updated	3 years ago
XmiliaH	837dc959cf	Merge branch 'master' into type-maps	3 years ago
Dibyendu Majumdar	754fcefc26	Merge pull request #212 from XmiliaH/some-improvements Improve type deduction	3 years ago
XmiliaH	b4359b9391	Cleanup and Fixes	3 years ago
XmiliaH	a9537957d5	Remove old ravi_type_t from jit	3 years ago
XmiliaH	435480f4db	Better deduction for not	3 years ago
XmiliaH	62ab591343	Fix concat	3 years ago
XmiliaH	f80e4d5442	Fix concat	3 years ago
XmiliaH	7789f4f32d	Finish type maps	3 years ago
XmiliaH	c0122cd816	FIx tests	3 years ago
XmiliaH	58cdc8d7dc	RAVI_TSTRINGs can be nil	3 years ago
XmiliaH	6af3d804a4	First part for type maps	3 years ago
XmiliaH	871b76fea2	Format changed code	3 years ago
XmiliaH	0b43c94a4d	Use the right enum for opcodes	3 years ago
XmiliaH	e85634270f	Add back that len of table is int and fix tests.	3 years ago
XmiliaH	4cce67ec13	Improve type deduction	3 years ago
Dibyendu Majumdar	a2ec53624d	issue #211 since the up-value carries the type of the local variable, we can safely infer that the result of a get on array is a primitive type	3 years ago
Dibyendu Majumdar	9bceadd099	issue #211 since the up-value carries the type of the local variable, we can safely infer that the result of a get on array is a primitive type	3 years ago
Dibyendu Majumdar	25dbc31393	issue #211 since the up-value carries the type of the local variable, we can safely infer that the result of a get on array is a primitive type	3 years ago
Dibyendu Majumdar	2c5b958d04	issue #211 since the up-value carries the type of the local variable, we can safely infer that the result of a get on array is a primitive type	3 years ago
Dibyendu Majumdar	b0a5b01142	issue #211 The type of update index op is not known.	3 years ago
Dibyendu Majumdar	170fd797a2	It seems that we don't really need this - must be the bug was elsewhere	3 years ago
Dibyendu Majumdar	b2cc7c30c5	issue #210 tests	3 years ago
Dibyendu Majumdar	2decef927a	issue #210 tests	3 years ago
Dibyendu Majumdar	486145900b	issue #208 check for overflow when resizing array	3 years ago
Dibyendu Majumdar	a3b933aa43	issue #208 check for overflow when resizing array	3 years ago
Dibyendu Majumdar	1e8597de7d	issue #210 fix a test	3 years ago
Dibyendu Majumdar	9ced7e2bc0	issue #210 bug in recognizing the type change when an expression of VINDEXED type gets resolved	3 years ago
Dibyendu Majumdar	fe7c76fff5	issue #207 add test	3 years ago
Dibyendu Majumdar	ad4fc4e2d2	issue #207 check size more carefully when creating a slice	3 years ago
Dibyendu Majumdar	2c44bdd68b	issue #208 add test	3 years ago
Dibyendu Majumdar	8bd404f62e	issue #208 check for size overflow when creating integer/number array	3 years ago
Dibyendu Majumdar	4342fc7630	issue #210 #209 tests	3 years ago
Dibyendu Majumdar	927ddbf6a0	issue #210 LEN on any value other than array or table should be any type	3 years ago
Dibyendu Majumdar	b549302d70	issue #209 BNOT on any value other than integer should result in any type	3 years ago
Dibyendu Majumdar	56a59a1f31	issue #169 apply latest upstream changes	3 years ago
Dibyendu Majumdar	773ebd9d32	issue #169 Update MIR sources - includes a memory leak bug fix	3 years ago
Dibyendu Majumdar	def4d76ac9	Remove cmake branch from build.yml	3 years ago
Dibyendu Majumdar	ef24ca67cb	Merge branch 'master' of https://github.com/dibyendumajumdar/ravi	3 years ago
Dibyendu Majumdar	77cd6b9527	Update copyright notices	3 years ago
Dibyendu Majumdar	61a8ac889f	Update docs	3 years ago
Dibyendu Majumdar	102c8bee8b	Update copyright notices	3 years ago
Dibyendu Majumdar	3e05644f51	issue #198 Cleanup	3 years ago
Dibyendu Majumdar	f7f59eed0f	issue #198 Cleanup	3 years ago
Dibyendu Majumdar	2aeeea4dd3	issue #198 Include Ravi Compiler	3 years ago
Dibyendu Majumdar	31774723ff	Merge branch 'cmake'	3 years ago
Dibyendu Majumdar	cc0c098011	CMake refactoring	3 years ago
Dibyendu Majumdar	f072326ea4	CMake build refactoring	3 years ago
Dibyendu Majumdar	66615703a6	Merge branch 'master' of https://github.com/dibyendumajumdar/ravi	3 years ago
Dibyendu Majumdar	ef30fb2065	Update build.yml	3 years ago
Dibyendu Majumdar	2335973259	Update build.yml	3 years ago
Dibyendu Majumdar	fae2a2bf77	issue #186 leftover cleanup	3 years ago
Dibyendu Majumdar	08c05100cf	issue #203 leftover cleanup	3 years ago
Dibyendu Majumdar	3b09a22e96	issue #204 Code reorg	3 years ago
Dibyendu Majumdar	1f1d8cb428	issue #198 Add tests	3 years ago
Dibyendu Majumdar	9083492dcb	issue #198 Add a test	3 years ago
Dibyendu Majumdar	e6890743f0	issue #169 Update MIR	3 years ago
Dibyendu Majumdar	9cc59144ad	issue #195 lstrlib	3 years ago
Dibyendu Majumdar	f27792f050	issue #195 strings tests part 7	3 years ago
Dibyendu Majumdar	df748ee844	issue #195 strings tests part 6	3 years ago
Dibyendu Majumdar	ed90926c8d	issue #195 strings tests part 5	3 years ago
Dibyendu Majumdar	1fb5b08491	issue #195 strings tests part 4	3 years ago
Dibyendu Majumdar	facb56e0c1	issue #195 strings tests part 3	3 years ago
Dibyendu Majumdar	9a86f11bc3	issue #195 strings tests part 2	3 years ago
Dibyendu Majumdar	92fd0e3faf	issue #195 strings tests part 1	3 years ago
Dibyendu Majumdar	54fc5277d6	issue #195 vararg tests part 2	3 years ago
Dibyendu Majumdar	c11122376e	issue #195 vararg tests part 1	3 years ago
Dibyendu Majumdar	36639d863a	issue #196 reformat	3 years ago
Dibyendu Majumdar	71644f5450	issue #196 use a macro for slices	3 years ago
Dibyendu Majumdar	b717be20e0	Fix a missing check	3 years ago
Dibyendu Majumdar	249f22c215	issue #196 Another test restored to 5.3 version.	3 years ago
Dibyendu Majumdar	2f76a1e869	issue #196 GC upvalue/thread cycle test reverted to 5.3 version due to difference in behaviour in Lua 5.4 (as up-values are not reference counted anymore).	3 years ago
Dibyendu Majumdar	4acc325c97	issue #196 Fix test case	3 years ago
Dibyendu Majumdar	ec2d27a7dc	issue #196 Fix test case	3 years ago
Dibyendu Majumdar	7320131275	Merge branch 'master' of https://github.com/dibyendumajumdar/ravi	3 years ago
Dibyendu Majumdar	f9c4b5dbec	issue #203 Removing LLVM support, see llvm branch for archived version	3 years ago
Dibyendu Majumdar	0c3277ad4a	issue #203 Doc update	3 years ago
Dibyendu Majumdar	d6422fe2e3	Update .travis.yml	3 years ago
Dibyendu Majumdar	66511033fd	issue #203 Removing LLVM support, see llvm branch for archived version	3 years ago
Dibyendu Majumdar	05f365352c	issue #169 Fix issue on ARM64	3 years ago
Dibyendu Majumdar	219d44c2ba	issue #196 Revise upval barrier code - TBC	3 years ago
Dibyendu Majumdar	e4a240be77	add assertions	3 years ago
Dibyendu Majumdar	2ace67282d	issue #196 Undo a change that was not needed	3 years ago
Dibyendu Majumdar	6909d79f61	Merge branch 'master' of https://github.com/dibyendumajumdar/ravi	3 years ago
Dibyendu Majumdar	16ea0643cd	issue #202 Prepare for testing on different archs	3 years ago
Dibyendu Majumdar	a83afe2d02	Update build.yml	3 years ago
Dibyendu Majumdar	d5668379b1	Update build.yml	3 years ago
Dibyendu Majumdar	849819e891	Update build.yml	3 years ago
Dibyendu Majumdar	53ff35455a	issue #198 Updated interface to the compiler lib	3 years ago
Dibyendu Majumdar	f9acba8142	issue #169 MIR update	3 years ago
Dibyendu Majumdar	389c98ee35	Merge pull request #200 from dibyendumajumdar/lua542-gc Lua 5.4.2 gc	3 years ago
Dibyendu Majumdar	e9cb48e15c	issue #196 Update GC code from Lua 5.4.2	3 years ago
Dibyendu Majumdar	f346122073	issue #196 Update GC code from Lua 5.4.2	3 years ago
Dibyendu Majumdar	001dceb996	issue #198 Initial support for AOT compiled code	3 years ago
Dibyendu Majumdar	0f6a4084ae	issue #198 Refactor	3 years ago
Dibyendu Majumdar	95ac6123a9	issue #198 Refactored api that puts all of the codegen at the compiler end, making the way for an AOT solution	3 years ago
Dibyendu Majumdar	8d6e403f30	issue #198 Refactored api that puts all of the codegen at the compiler end, making the way for an AOT solution	3 years ago
Dibyendu Majumdar	bb218051ba	Add support for codespaces	4 years ago
Dibyendu Majumdar	bc4142428c	issue #198 add expected results for the tests	4 years ago
Dibyendu Majumdar	86c2020411	issue #198 more tests	4 years ago
Dibyendu Majumdar	5a0c3600fc	issue #198 add test	4 years ago
Dibyendu Majumdar	a54f156304	issue #198 add test	4 years ago
Dibyendu Majumdar	8cec4cc827	issue #198 More tests	4 years ago
Dibyendu Majumdar	a82d42b847	issue #169 MIR update	4 years ago
Dibyendu Majumdar	a3bf9dc10e	issue #198 add another test for sieve - but using while loop	4 years ago
Dibyendu Majumdar	ea6c408c97	issue #198 tests for farray opcodes	4 years ago
Dibyendu Majumdar	487624cee9	issue #198 tests for iarray opcodes	4 years ago
Dibyendu Majumdar	e361bd7387	issue #198 add luaD_growstack to the list of linked functions in the JIT	4 years ago
Dibyendu Majumdar	69840eacdb	issue #198 Tests for op_call instruction - couple fail due to missing support for multiret in return	4 years ago
Dibyendu Majumdar	d0b3ed989b	issue #169 Enable MIR on Windows	4 years ago
Dibyendu Majumdar	f6378c97b4	issue #169 Apply upstream changes	4 years ago
Dibyendu Majumdar	7f94078e8e	issue #169 update MIR sources from upstream	4 years ago
Dibyendu Majumdar	174cfa0168	issue #198 add tests for op_loadglobal, op_storeglobal	4 years ago
Dibyendu Majumdar	523f07ba57	issue #198 initialize the _ENV upvalue	4 years ago
Dibyendu Majumdar	71bb74f773	Merge branch 'ravi-compiler'	4 years ago
Dibyendu Majumdar	6b273f7468	issue #198 Make the MIR portion of the compiler optional so that we can just get the C codegen	4 years ago
Dibyendu Majumdar	a3940b0120	Add link to the VSCode debugger	4 years ago
Dibyendu Majumdar	b1f36a56e1	Merge branch 'ravi-compiler' of https://github.com/dibyendumajumdar/ravi into ravi-compiler	4 years ago
Dibyendu Majumdar	6daf1a5529	Add link to the VSCode debugger	4 years ago
Dibyendu Majumdar	4819f38a80	issue #198 Test cases for op_mov, op_br, op_cbr	4 years ago
Dibyendu Majumdar	4f7cab694c	issue #198 Test case for op_mov	4 years ago
Dibyendu Majumdar	d65beddb75	issue #198 Test case for op_ret	4 years ago
Dibyendu Majumdar	45f823b604	issue #198 Missing initializer for proto->upvalues[i].usertype when usertype is NULL	4 years ago
Dibyendu Majumdar	a45eaa8a24	issue #198 Cmake build failing to find c2mir header	4 years ago
Dibyendu Majumdar	902f5c1653	Comments	4 years ago
Dibyendu Majumdar	48617fda44	issue #198 WIP add api functions for setting some proto attributes	4 years ago
Dibyendu Majumdar	fba87f24f1	issue #198 WIP add api for error reporting and generating debug messages	4 years ago
Dibyendu Majumdar	90a26a4b88	issue #198 WIP add api for marking a proto as var arg	4 years ago
Dibyendu Majumdar	0f698f354c	issue #198 WIP add api for adding an upvalue. Hook up the compiler library.	4 years ago
Dibyendu Majumdar	7515ef9d89	Merge branch 'master' into ravi-compiler	4 years ago
Dibyendu Majumdar	2a6c5575e3	issue #198 WIP revise the api for compiling C code using MIR	4 years ago
Dibyendu Majumdar	98c96c11eb	issue #198 WIP flesh out some more api functions	4 years ago
Dibyendu Majumdar	2cab1f104a	issue #198 WIP flesh out some more api functions	4 years ago
Dibyendu Majumdar	be67d117c2	Merge branch 'master' into ravi-compiler	4 years ago
Dibyendu Majumdar	430f9a532b	issue #169 preparation for ravicomp integration	4 years ago
Dibyendu Majumdar	e20df095df	issue #169 Fix builds failure	4 years ago
Dibyendu Majumdar	90f54987eb	issue #169 Update MIR to latest version	4 years ago
Dibyendu Majumdar	cde0a39bc2	issue #169 Refactor the c2mir invocation so that we can reuse this when integrating with ravicomp library.	4 years ago
Dibyendu Majumdar	16d59f65ee	issue #169 Add include directories of MIR to help CLion find them	4 years ago
Dibyendu Majumdar	534abc7525	issue #169 fix memory leak	4 years ago
Dibyendu Majumdar	02a7796f77	issue #198 WIP	4 years ago
Dibyendu Majumdar	34c6b33a2f	issue #198 WIP add interface to RaviCompiler project	4 years ago
Dibyendu Majumdar	0d22f81b2a	Merge branch 'master' of https://github.com/dibyendumajumdar/ravi	4 years ago
Dibyendu Majumdar	ee056a1bd6	Comment added aboit upvalues	4 years ago
Dibyendu Majumdar	40e020916a	Update README.rst	4 years ago
Dibyendu Majumdar	57f6fc82f7	Update README.rst	4 years ago
Dibyendu Majumdar	c4535d5d9f	Update README.rst	4 years ago
Dibyendu Majumdar	940a80d9bb	On returning from a function the results are copied to the registers starting at at the function's reference in CallInfo->func	4 years ago
Dibyendu Majumdar	4d348e0609	Update readme / intro	4 years ago
Dibyendu Majumdar	f76b4690f0	Initial patch to implement 'defer' statement in Lua 5.4	4 years ago
Dibyendu Majumdar	42cfa54e9e	Initial patch to implement 'defer' statement in Lua 5.4	4 years ago
Dibyendu Majumdar	81555acc72	Update docs to correctly state the default garbage collector.	4 years ago
Dibyendu Majumdar	7e219f76bd	Lua 5.3 patch that implements 'defer' statement	4 years ago
Dibyendu Majumdar	8b854b0a02	Lua 5.3 patch that implements 'defer' statement	4 years ago
Dibyendu Majumdar	920cd2dab8	issue #196 test case for the gen GC bug	4 years ago
Dibyendu Majumdar	b73322ad16	issue #157 Detail	4 years ago
Dibyendu Majumdar	844b15683b	issue #196 When an object aged OLD1 is finalized, it is moved from the list 'finobj' to the beginning of the list 'allgc'. So, this part of the list (and not only the survival list) must be visited by 'markold'. Roberto	4 years ago
Dibyendu Majumdar	63bf14b43c	issue #169 Merge latest MIR code from upstream	4 years ago
Dibyendu Majumdar	73d6bc1c07	issue #195 Some improvements to Makefile build - based on Lua 5.4 build improvements	4 years ago
Dibyendu Majumdar	c42ac0e9fa	issue #196 don't make generational GC the default yet - various issues reported on Lua mailing list.	4 years ago
Dibyendu Majumdar	d26e3626b9	issue #182 ASAN support must be explicitly enabled as not all installations have asan installed	4 years ago
Dibyendu Majumdar	f8a680ed82	Merge branch 'master' of https://github.com/dibyendumajumdar/ravi	4 years ago
Dibyendu Majumdar	4bbcb908ec	issue #157 fix build error on windows	4 years ago
Dibyendu Majumdar	7f63b9fae7	issue #182 add asan support if available	4 years ago
Dibyendu Majumdar	3a75ee9eb7	Merge branch 'master' of https://github.com/dibyendumajumdar/ravi	4 years ago
Dibyendu Majumdar	cdd6fbfeaa	issue #182 : remove commented lines	4 years ago