issue #169 Update MIR to latest version

nometajit
Dibyendu Majumdar 4 years ago
parent ab9fb1f8fb
commit 788cd0d92d

11
.gitignore vendored

@ -4,3 +4,14 @@ CMakeScripts
cmake_install.cmake
install_manifest.txt
CTestTestfile.cmake
build
buildmir
omrjit
buildllvm
.vscode
.idea
cmake-build-debug
cmake-build-release
buildnojit
nojit
nojita

@ -6,7 +6,11 @@ message(STATUS "OS type is ${CMAKE_SYSTEM_NAME}")
message(STATUS "System processor is ${CMAKE_HOST_SYSTEM_PROCESSOR}")
message(STATUS "Build type is ${CMAKE_BUILD_TYPE}")
set(TARGET x86_64)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
set(TARGET x86_64)
else()
message(FATAL "Unsupported platform")
endif()
set(MIR_HEADERS
mir.h

@ -0,0 +1,585 @@
# Medium Intermediate Representation (file mir.h)
* This document describes MIR itself, API for its creation, and MIR textual representation
* MIR textual representation is assembler like. Each directive or insn should be put on a separate line
* In MIR textual syntax we use
* `[]` for optional construction
* `{}` for repeating zero or more times
* `<>` for some informal construction description or construction already described or will be described
## MIR context
* MIR API code has an implicit state called by MIR context
* MIR context is represented by data of `MIR_context_t`
* MIR context is created by function `MIR_context_t MIR_init (void)`
* Every MIR API function (except for `MIR_init`) requires MIR context passed through the first argument of type `MIR_context_t`
* You can use MIR functions in different threads without any synchronization
if they work with different contexts in each thread
## MIR program
* MIR program consists of MIR **modules**
* To start work with MIR program, you should first call API function `MIR_init`
* API function `MIR_finish (MIR_context_t ctx)` should be called last. It frees all internal data used to work with MIR program and all IR (insns, functions, items, and modules) created in this context
* API function `MIR_output (MIR_context_t ctx, FILE *f)` outputs MIR textual representation of the program into given file
* API function `MIR_scan_string (MIR_context_t ctx, const char *str)` reads textual MIR representation given by a string
* API functions `MIR_write (MIR_context_t ctx, FILE *f)` and
`MIR_read (MIR_context_t ctx, FILE *f)` outputs and reads
**binary MIR representation** to/from given file. There are also
functions `MIR_write_with_func (MIR_context_t ctx, const int
(*writer_func) (MIR_context_t, uint8_t))` and `MIR_read_with_func
(MIR_context_t ctx, const int (*reader_func) (MIR_context_t))` to
output and read **binary MIR representation** through a function
given as an argument. The reader function should return EOF as
the end of the binary MIR representation, the writer function
should be return the number of successfully output bytes
* Binary MIR representation much more compact and faster to read than textual one
## MIR data type
* MIR program works with the following **data types**:
* `MIR_T_I8` and `MIR_T_U8` -- signed and unsigned 8-bit integer values
* `MIR_T_I16` and `MIR_T_U16` -- signed and unsigned 16-bit integer values
* `MIR_T_I32` and `MIR_T_U32` -- signed and unsigned 32-bit integer values
* `MIR_T_I64` and `MIR_T_U64` -- signed and unsigned 64-bit integer values
* ??? signed and unsigned 64-bit integer types in most cases
are interchangeable as insns themselves decide how to treat
their value
* `MIR_T_F` and `MIR_T_D` -- IEEE single and double precision floating point values
* `MIR_T_LD` - long double values. It is machine-dependent and can be IEEE double, x86 80-bit FP,
or IEEE quad precision FP values
* `MIR_T_P` -- pointer values. Depending on the target pointer value is actually 32-bit or 64-bit integer value
* MIR textual representation of the types are correspondingly `i8`,
`u8`, `i16`, `u16`, `i32`, `u32`, `i64`, `u64`, `f`, `d`, `p`,
and `v`
* Function `int MIR_int_type_p (MIR_type_t t)` returns TRUE if given type is an integer one (it includes pointer type too)
* Function `int MIR_fp_type_p (MIR_type_t t)` returns TRUE if given type is a floating point type
## MIR module
* Module is a high level entity of MIR program
* Module is created through API function `MIR_module_t MIR_new_module (const char *name)`
* Module creation is finished by calling API function `MIR_finish_module`
* You can create only one module at any given time
* List of all created modules can be gotten by function `DLIST (MIR_module_t) *MIR_get_module_list (MIR_context_t ctx)`
* MIR module consists of **items**. There are following **item types** (and function for their creation):
* **Function**: `MIR_func_item`
* **Import**: `MIR_import_item` (`MIR_item_t MIR_new_import (MIR_context_t ctx, const char *name)`)
* **Export**: `MIR_export_item` (`MIR_item_t MIR_new_export (MIR_context_t ctx, const char *name)`)
* **Forward declaration**: `MIR_forward_item` (`MIR_item_t MIR_new_forward (MIR_context_t ctx, const char *name)`)
* **Prototype**: `MIR_proto_item` (`MIR_new_proto_arr`, `MIR_new_proto`, `MIR_new_vararg_proto_arr`,
`MIR_new_vararg_proto` analogous to `MIR_new_func_arr`, `MIR_new_func`, `MIR_new_vararg_func_arr` and
`MIR_new_vararg_func` -- see below). The only difference is that
two or more prototype argument names can be the same
* **Data**: `MIR_data_item` with optional name
(`MIR_item_t MIR_new_data (MIR_context_t ctx, const char *name, MIR_type_t el_type, size_t nel, const void *els)`
or `MIR_item_t MIR_new_string_data (MIR_context_t ctx, const char *name, MIR_str_t str)`)
* **Reference data**: `MIR_ref_data_item` with optional name
(`MIR_item_t MIR_new_ref_data (MIR_context_t ctx, const char *name, MIR_item_t item, int64_t disp)`
* The address of the item after linking plus `disp` is used to initialize the data
* **Expression Data**: `MIR_expr_data_item` with optional name
(`MIR_item_t MIR_new_expr_data (MIR_context_t ctx, const char *name, MIR_item_func_item)`)
* Not all MIR functions can be used for expression data. The expression function should have
only one result, have no arguments, not use any call or any instruction with memory
* The expression function is called during linking and its result is used to initialize the data
* **Memory segment**: `MIR_bss_item` with optional name (`MIR_item_t MIR_new_bss (MIR_context_t ctx, const char *name, size_t len)`)
* Names of MIR functions, imports, and prototypes should be unique in a module
* API functions `MIR_output_item (MIR_context_t ctx, FILE *f, MIR_item_t item)`
and `MIR_output_module (MIR_context_t ctx, FILE *f, MIR_module_t module)` output item or module
textual representation into given file
* MIR text module syntax looks the following:
```
<module name>: module
{<module item>}
endmodule
```
## MIR function
* Function is an module item
* Function has a **frame**, a stack memory reserved for each function invocation
* Function has **local variables** (sometimes called **registers**), a part of which are **arguments**
* A variable should have an unique name in the function
* A variable is represented by a structure of type `MIR_var_t`
* The structure contains variable name and its type
* MIR function with its arguments is created through API function `MIR_item_t MIR_new_func (MIR_context_t ctx, const
char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)`
or function `MIR_item_t MIR_new_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)`
* Argument variables can be any type
* This type only denotes how the argument value is passed
* Any integer type argument variable has actually type `MIR_T_I64`
* MIR functions with variable number of arguments are created through API functions
`MIR_item_t MIR_new_vararg_func (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, ...)`
or function `MIR_item_t MIR_new_vararg_func_arr (MIR_context_t ctx, const char *name, size_t nres, MIR_type_t *res_types, size_t nargs, MIR_var_t *arg_vars)`
* `nargs` and `arg_vars` define only fixed arguments
* MIR functions can have more one result but possible number of results
and combination of their types are machine-defined. For example, for x86-64
the function can have upto six results and return two integer
values, two float or double values, and two long double values
in any combination
* MIR function creation is finished by calling API function `MIR_finish_func (MIR_context_t ctx)`
* You can create only one MIR function at any given time
* MIR text function syntax looks the following (arg-var always has a name besides type):
```
<function name>: func {<result type>, } [ arg-var {, <arg-var> } [, ...]]
{<insn>}
endfun
```
* Non-argument function variables are created through API function
`MIR_reg_t MIR_new_func_reg (MIR_context_t ctx, MIR_func_t func, MIR_type_t type, const char *name)`
* The only permitted integer type for the variable is `MIR_T_I64` (or MIR_T_U64???)
* Names in form `t<number>` can not be used as they are fixed for internal purposes
* You can create function variables even after finishing the
function creation. This can be used to modify function insns,
e.g. for optimizations
* Non-argument variable declaration syntax in MIR textual representation looks the following:
```
local [ <var type>:<var name> {, <var type>:<var name>} ]
```
* In MIR textual representation variable should be defined through `local` before its use
## MIR insn operands
* MIR insns work with operands
* There are following operands:
* Signed or unsigned **64-bit integer value operands** created through API functions
`MIR_op_t MIR_new_int_op (MIR_context_t ctx, int64_t v)` and `MIR_op_t MIR_new_uint_op (MIR_context_t ctx, uint64_t v)`
* In MIR text they are represented the same way as C integer numbers (e.g. octal, decimal, hexadecimal ones)
* **Float, double or long double value operands** created through API functions `MIR_op_t MIR_new_float_op (MIR_context_t ctx, float v)`,
`MIR_op_t MIR_new_double_op (MIR_context_t ctx, double v)`, and `MIR_op_t MIR_new_ldouble_op (MIR_context_t ctx, long double v)`
* In MIR text they are represented the same way as C floating point numbers
* **String operands** created through API functions `MIR_op_t MIR_new_str_op (MIR_context_t ctx, MIR_str_t str)`
* In MIR text they are represented by `typedef struct MIR_str {size_t len; const char *s;} MIR_str_t`
* Strings for each operand are put into memory (which can be modified) and the memory address actually presents the string
* **Label operand** created through API function `MIR_op_t MIR_new_label_op (MIR_context_t ctx, MIR_label_t label)`
* Here `label` is a special insn created by API function `MIR_insn_t MIR_new_label (MIR_context_t ctx)`
* In MIR text, they are represented by unique label name
* **Reference operands** created through API function `MIR_op_t MIR_new_ref_op (MIR_context_t ctx, MIR_item_t item)`
* In MIR text, they are represented by the corresponding item name
* **Register (variable) operands** created through API function `MIR_op_t MIR_new_reg_op (MIR_context_t ctx, MIR_reg_t reg)`
* In MIR text they are represented by the corresponding variable name
* Value of type `MIR_reg_t` is returned by function `MIR_new_func_reg`
or can be gotten by function `MIR_reg_t MIR_reg (MIR_context_t ctx, const char *reg_name, MIR_func_t func)`, e.g. for argument-variables
* **Memory operands** consists of type, displacement, base
register, index register and index scale. Memory operand is
created through API function `MIR_op_t MIR_new_mem_op (MIR_context_t ctx, MIR_type_t type,
MIR_disp_t disp, MIR_reg_t base, MIR_reg_t index, MIR_scale_t
scale)`
* The arguments define address of memory as `disp + base + index * scale`
* Integer type input memory is transformed to 64-bit integer value with sign or zero extension
depending on signedness of the type
* result 64-bit integer value is truncated to integer memory type
* Memory operand has the following syntax in MIR text (absent displacement means zero one,
absent scale means one, scale should be 1, 2, 4, or 8):
```
<type>: <disp>
<type>: [<disp>] (<base reg> [, <index reg> [, <scale> ]])
```
* API function `MIR_output_op (MIR_context_t ctx, FILE *f, MIR_op_t op, MIR_func_t func)` outputs the operand
textual representation into given file
## MIR insns
* All MIR insns (but call or ret one) expects fixed number of operands
* Most MIR insns are 3-operand insns: two inputs and one output
* In majority cases **the first insn operand** describes where the insn result (if any) will be placed
* Only register or memory operand can be insn output (result) operand
* MIR insn can be created through API functions `MIR_insn_t MIR_new_insn (MIR_context_t ctx, MIR_insn_code_t code, ...)`
and `MIR_insn_t MIR_new_insn_arr (MIR_context_t ctx, MIR_insn_code_t code, size_t nops, MIR_op_t *ops)`
* Number of operands and their types should be what is expected by the insn being created
* You can not use `MIR_new_insn` for the creation of call and ret insns as these insns have a variable number of operands.
To create such insns you should use `MIR_new_insn_arr` or special functions
`MIR_insn_t MIR_new_call_insn (MIR_context_t ctx, size_t nops, ...)` and `MIR_insn_t MIR_new_ret_insn (MIR_context_t ctx, size_t nops, ...)`
* You can get insn name and number of insn operands through API functions
`const char *MIR_insn_name (MIR_context_t ctx, MIR_insn_code_t code)` and `size_t MIR_insn_nops (MIR_context_t ctx, MIR_insn_t insn)`
* You can add a created insn at the beginning or end of function insn list through API functions
`MIR_prepend_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)` and `MIR_append_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)`
* You can insert a created insn in the middle of function insn list through API functions
`MIR_insert_insn_after (MIR_context_t ctx, MIR_item_t func, MIR_insn_t after, MIR_insn_t insn)` and
`MIR_insert_insn_before (MIR_context_t ctx, MIR_item_t func, MIR_insn_t before, MIR_insn_t insn)`
* The insn `after` and `before` should be already in the list
* You can remove insn from the function list through API function `MIR_remove_insn (MIR_context_t ctx, MIR_item_t func, MIR_insn_t insn)`
* The insn should be not inserted in the list if it is already there
* The insn should be not removed form the list if it is not there
* API function `MIR_output_insn (MIR_context_t ctx, FILE *f, MIR_insn_t insn, MIR_func_t func, int newline_p)` outputs the insn
textual representation into given file with a newline at the end depending on value of `newline_p`
* Insn has the following syntax in MIR text:
```
{<label name>:} [<insn name> <operand> {, <operand>}]
```
* More one insn can be put on the same line by separating the insns by `;`
### MIR move insns
* There are following MIR move insns:
| Insn Code | Nops | Description |
|-------------------------|-----:|--------------------------------------------------------|
| `MIR_MOV` | 2 | move 64-bit integer values |
| `MIR_FMOV` | 2 | move **single precision** floating point values |
| `MIR_DMOV` | 2 | move **double precision** floating point values |
| `MIR_LDMOV` | 2 | move **long double** floating point values |
### MIR integer insns
* If insn has suffix `S` in insn name, the insn works with lower 32-bit part of 64-bit integer value
* The higher part of 32-bit insn result is undefined
* If insn has prefix `U` in insn name, the insn treats integer as unsigned integers
* Some insns has no unsigned variant as MIR is oriented to CPUs with two complement integer arithmetic
(the huge majority of all CPUs)
| Insn Code | Nops | Description |
|-------------------------|-----:|--------------------------------------------------------|
| `MIR_EXT8` | 2 | **sign** extension of lower **8 bit** input part |
| `MIR_UEXT8` | 2 | **zero** extension of lower **8 bit** input part |
| `MIR_EXT16` | 2 | **sign** extension of lower **16 bit** input part |
| `MIR_UEXT16` | 2 | **zero** extension of lower **16 bit** input part |
| `MIR_EXT32` | 2 | **sign** extension of lower **32 bit** input part |
| `MIR_UEXT32` | 2 | **zero** extension of lower **32 bit** input part |
| | | |
| `MIR_NEG` | 2 | changing sign of **64-bit* integer value |
| `MIR_NEGS` | 2 | changing sign of **32-bit* integer value |
| | | |
| `MIR_ADD`, `MIR_SUB` | 3 | **64-bit** integer addition and subtraction |
| `MIR_ADDS`, `MIR_SUBS` | 3 | **32-bit** integer addition and subtraction |
| `MIR_MUL`, `MIR_DIV` | 3 | **64-bit signed** multiplication and divison |
| `MIR_UMUL`, `MIR_UDIV` | 3 | **64-bit unsigned** integer multiplication and divison |
| `MIR_MULS`, `MIR_DIVS` | 3 | **32-bit signed** multiplication and divison |
| `MIR_UMULS`, `MIR_UDIVS`| 3 | **32-bit unsigned** integer multiplication and divison |
| `MIR_MOD` | 3 | **64-bit signed** modulo operation |
| `MIR_UMOD` | 3 | **64-bit unsigned** integer modulo operation |
| `MIR_MODS` | 3 | **32-bit signed** modulo operation |
| `MIR_UMODS` | 3 | **32-bit unsigned** integer modulo operation |
| | | |
| `MIR_AND`, `MIR_OR` | 3 | **64-bit** integer bitwise AND and OR |
| `MIR_ANDS`, `MIR_ORS` | 3 | **32-bit** integer bitwise AND and OR |
| `MIR_XOR` | 3 | **64-bit** integer bitwise XOR |
| `MIR_XORS` | 3 | **32-bit** integer bitwise XOR |
| | | |
| `MIR_LSH` | 3 | **64-bit** integer left shift |
| `MIR_LSHS` | 3 | **32-bit** integer left shift |
| `MIR_RSH` | 3 | **64-bit** integer right shift with **sign** extension |
| `MIR_RSHS` | 3 | **32-bit** integer right shift with **sign** extension |
| `MIR_URSH` | 3 | **64-bit** integer right shift with **zero** extension |
| `MIR_URSHS` | 3 | **32-bit** integer right shift with **zero** extension |
| | | |
| `MIR_EQ`, `MIR_NE` | 3 | equality/inequality of **64-bit** integers |
| `MIR_EQS`, `MIR_NES` | 3 | equality/inequality of **32-bit** integers |
| `MIR_LT`, `MIR_LE` | 3 | **64-bit signed** less than/less than or equal |
| `MIR_ULT`, `MIR_ULE` | 3 | **64-bit unsigned** less than/less than or equal |
| `MIR_LTS`, `MIR_LES` | 3 | **32-bit signed** less than/less than or equal |
| `MIR_ULTS`, `MIR_ULES` | 3 | **32-bit unsigned** less than/less than or equal |
| `MIR_GT`, `MIR_GE` | 3 | **64-bit signed** greater than/greater than or equal |
| `MIR_UGT`, `MIR_UGE` | 3 | **64-bit unsigned** greater than/greater than or equal |
| `MIR_GTS`, `MIR_GES` | 3 | **32-bit signed** greater than/greater than or equal |
| `MIR_UGTS`, `MIR_UGES` | 3 | **32-bit unsigned** greater than/greater than or equal |
### MIR floating point insns
* If insn has prefix `F` in insn name, the insn is single precision float point insn. Its operands should have `MIR_T_F` type
* If insn has prefix `D` in insn name, the insn is double precision float point insn. Its operands should have `MIR_T_D` type
* Otherwise, insn has prefix `LD` in insn name and the insn is a long double insn.
Its operands should have `MIR_T_LD` type.
* The result of comparison insn is a 64-bit integer value, so the result operand should be of integer type
| Insn Code | Nops | Description |
|--------------------------------------|-----:|-----------------------------------------------------------------|
| `MIR_F2I`, `MIR_D2I`, `MIR_LD2I` | 2 | transforming floating point value into 64-bit integer |
| `MIR_F2D` | 2 | transforming single to double precision FP value |
| `MIR_F2LD` | 2 | transforming single precision to long double FP value |
| `MIR_D2F` | 2 | transforming double to single precision FP value |
| `MIR_D2LD` | 2 | transforming double precision to long double FP value |
| `MIR_LD2F` | 2 | transforming long double to single precision FP value |
| `MIR_LD2D` | 2 | transforming long double to double precision FP value |
| `MIR_I2F`, `MIR_I2D`, `MIR_I2LD` | 2 | transforming 64-bit integer into a floating point value |
| `MIR_UI2F`, `MIR_UI2D`, `MIR_UI2LD` | 2 | transforming unsigned 64-bit integer into a floating point value|
| `MIR_FNEG`, `MIR_DNEG`, `MIR_LDNEG` | 2 | changing sign of floating point value |
| `MIR_FADD`, `MIR_FSUB` | 3 | **single** precision addition and subtraction |
| `MIR_DADD`, `MIR_DSUB` | 3 | **double** precision addition and subtraction |
| `MIR_LDADD`, `MIR_LDSUB` | 3 | **long double** addition and subtraction |
| `MIR_FMUL`, `MIR_FDIV` | 3 | **single** precision multiplication and divison |
| `MIR_DMUL`, `MIR_DDIV` | 3 | **double** precision multiplication and divison |
| `MIR_LDMUL`, `MIR_LDDIV` | 3 | **long double** multiplication and divison |
| `MIR_FEQ`, `MIR_FNE` | 3 | equality/inequality of **single** precision values |
| `MIR_DEQ`, `MIR_DNE` | 3 | equality/inequality of **double** precision values |
| `MIR_LDEQ`, `MIR_LDNE` | 3 | equality/inequality of **long double** values |
| `MIR_FLT`, `MIR_FLE` | 3 | **single** precision less than/less than or equal |
| `MIR_DLT`, `MIR_DLE` | 3 | **double** precision less than/less than or equal |
| `MIR_LDLT`, `MIR_LDLE` | 3 | **long double** less than/less than or equal |
| `MIR_FGT`, `MIR_FGE` | 3 | **single** precision greater than/greater than or equal |
| `MIR_DGT`, `MIR_DGE` | 3 | **double** precision greater than/greater than or equal |
| `MIR_LDGT`, `MIR_LDGE` | 3 | **long double** greater than/greater than or equal |
### MIR branch insns
* The first operand of the insn should be label
| Insn Code | Nops | Description |
|-------------------------|-----:|---------------------------------------------------------------|
| `MIR_JMP` | 1 | unconditional jump to the label |
| `MIR_BT` | 2 | jump to the label when 2nd **64-bit** operand is **nonzero** |
| `MIR_BTS` | 2 | jump to the label when 2nd **32-bit** operand is **nonzero** |
| `MIR_BF` | 2 | jump to the label when 2nd **64-bit** operand is **zero** |
| `MIR_BFS` | 2 | jump to the label when 2nd **32-bit** operand is **zero** |
### MIR switch insn
* The first operand of `MIR_SWITCH` insn should have an integer value from 0 to `N - 1` inclusive
* The rest operands should be `N` labels, where `N > 0`
* Execution of the insn will be an jump on the label corresponding to the first operand value
* If the first operand value is out of the range of permitted values, the execution result is undefined
### MIR integer comparison and branch insn
* The first operand of the insn should be label. Label will be the next executed insn if the result of comparison is non-zero
| Insn Code | Nops | Description |
|-------------------------|-----:|---------------------------------------------------------------|
| `MIR_BEQ`, `MIR_BNE` | 3 | jump on **64-bit** equality/inequality |
| `MIR_BEQS`, `MIR_BNES` | 3 | jump on **32-bit** equality/inequality |
| `MIR_BLT`, `MIR_BLE` | 3 | jump on **signed 64-bit** less than/less than or equal |
| `MIR_UBLT`, `MIR_UBLE` | 3 | jump on **unsigned 64-bit** less than/less than or equal |
| `MIR_BLTS`, `MIR_BLES` | 3 | jump on **signed 32-bit** less than/less than or equal |
| `MIR_UBLTS`, `MIR_UBLES`| 3 | jump on **unsigned 32-bit** less than/less than or equal |
| `MIR_BGT`, `MIR_BGE` | 3 | jump on **signed 64-bit** greater than/greater than or equal |
| `MIR_UBGT`, `MIR_UBGE` | 3 | jump on **unsigned 64-bit** greater than/greater than or equal|
| `MIR_BGTS`, `MIR_BGES` | 3 | jump on **signed 32-bit** greater than/greater than or equal |
| `MIR_UBGTS`, `MIR_UBLES`| 3 | jump on **unsigned 32-bit** greater than/greater than or equal|
### MIR floating point comparison and branch insn
* The first operand of the insn should be label. Label will be the next executed insn if the result of comparison is non-zero
* See comparison semantics in the corresponding comparison insns
| Insn Code | Nops | Description |
|---------------------------|-----:|----------------------------------------------------------------|
| `MIR_FBEQ`, `MIR_FBNE` | 3 | jump on **single** precision equality/inequality |
| `MIR_DBEQ`, `MIR_DBNE` | 3 | jump on **double** precision equality/inequality |
| `MIR_LDBEQ`, `MIR_LDBNE` | 3 | jump on **long double** equality/inequality |
| `MIR_FBLT`, `MIR_FBLE` | 3 | jump on **single** precision less than/less than or equal |
| `MIR_DBLT`, `MIR_DBLE` | 3 | jump on **double** precision less than/less than or equal |
| `MIR_LDBLT`, `MIR_LDBLE` | 3 | jump on **long double** less than/less than or equal |
| `MIR_FBGT`, `MIR_FBGE` | 3 | jump on **single** precision greater than/greater than or equal|
| `MIR_DBGT`, `MIR_DBGE` | 3 | jump on **double** precision greater than/less/ than or equal |
| `MIR_LDBGT`, `MIR_LDBGE` | 3 | jump on **long double** greater than/less/ than or equal |
### MIR return insn
* Return insn has zero or more operands
* Return insn operands should correspond to return types of the function
* 64-bit integer value is truncated to the corresponding function return type first
* The return values will be the function call values
### MIR_CALL insn
* The insn has variable number of operands
* The first operand is a prototype reference operand
* The second operand is a called function address
* The prototype should correspond MIR function definition if function address represents a MIR function
* The prototype should correspond C function definition if the address is C function address
* If the prototype has *N* return types, the next *N* operands are
output operands which will contain the result values of the function
call
* The subsequent operands are arguments. Their types and number and should be the same as in the prototype
* Integer arguments are truncated according to integer prototype argument type
### MIR_INLINE insn
* This insn is analogous to `MIR_CALL` but after linking this insn
will be changed by inlined function body if it is possible
* Calls of vararg functions are never inlined
### MIR_ALLOCA insn
* Reserve memory on the stack whose size is given as the 2nd operand and assign the memory address to the 1st operand
* The reserved memory will be aligned according target ABI
### MIR_BSTART and MIR_BEND insns
* MIR users can use them implement blocks with automatic
deallocation of memory allocated by `MIR_ALLOCA` inside the
blocks. But mostly these insns are used to implement call
inlining of functions using alloca
* The both insns use one operand
* The first insn saves the stack pointer in the operand
* The second insn restores stack pointer from the operand
### MIR_VA_START, MIR_VA_ARG, and MIR_VA_END insns
* These insns are only for variable number arguments functions
* `MIR_VA_START` and `MIR_VA_END` have one input operand, an address
of va_list structure (see C stdarg.h for more details). Unlike C
va_start, MIR_VA_START just takes one parameter
* `MIR_VA_ARG` takes va_list and any memory operand and returns
address of the next argument in the 1st insn operand. The memory
operand type defines the type of the argument
* va_list operand can be memory with undefined type. In this case
address of the va_list is not in the memory but is the
memory address
## MIR API example
* The following code on C creates MIR analog of C code
`int64_t loop (int64_t arg1) {int64_t count = 0; while (count < arg1) count++; return count;}`
```
MIR_module_t m = MIR_new_module (ctx, "m");
MIR_item_t func = MIR_new_func (ctx, "loop", MIR_T_I64, 1, MIR_T_I64, "arg1");
MIR_reg_t COUNT = MIR_new_func_reg (ctx, func->u.func, MIR_T_I64, "count");
MIR_reg_t ARG1 = MIR_reg (ctx, "arg1", func->u.func);
MIR_label_t fin = MIR_new_label (ctx), cont = MIR_new_label (ctx);
MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_MOV, MIR_new_reg_op (ctx, COUNT),
MIR_new_int_op (ctx, 0)));
MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_BGE, MIR_new_label_op (ctx, fin),
MIR_new_reg_op (ctx, COUNT), MIR_new_reg_op (ctx, ARG1)));
MIR_append_insn (ctx, func, cont);
MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_ADD, MIR_new_reg_op (ctx, COUNT),
MIR_new_reg_op (ctx, COUNT), MIR_new_int_op (ctx, 1)));
MIR_append_insn (ctx, func, MIR_new_insn (ctx, MIR_BLT, MIR_new_label_op (ctx, cont),
MIR_new_reg_op (ctx, COUNT), MIR_new_reg_op (ctx, ARG1)));
MIR_append_insn (ctx, func, fin);
MIR_append_insn (ctx, func, MIR_new_ret_insn (ctx, 1, MIR_new_reg_op (ctx, COUNT)));
MIR_finish_func (ctx);
MIR_finish_module (ctx);
```
## MIR text example
```
m_sieve: module
export sieve
sieve: func i32, i32:N
local i64:iter, i64:count, i64:i, i64:k, i64:prime, i64:temp, i64:flags
alloca flags, 819000
mov iter, 0
loop: bge fin, iter, N
mov count, 0; mov i, 0
loop2: bge fin2, i, 819000
mov u8:(flags, i), 1; add i, i, 1
jmp loop2
fin2: mov i, 0
loop3: bge fin3, i, 819000
beq cont3, u8:(flags,i), 0
add temp, i, i; add prime, temp, 3; add k, i, prime
loop4: bge fin4, k, 819000
mov u8:(flags, k), 0; add k, k, prime
jmp loop4
fin4: add count, count, 1
cont3: add i, i, 1
jmp loop3
fin3: add iter, iter, 1
jmp loop
fin: rets count
endfunc
endmodule
m_ex100: module
format: string "sieve (10) = %d\n"
p_printf: proto p:fmt, i32:v
p_seive: proto i32, i32:iter
export ex100
import sieve, printf
ex100: func v
local i64:r
call p_sieve, sieve, r, 100
call p_printf, printf, format, r
endfunc
endmodule
```
## Other MIR API functions
* MIR API can find a lot of errors. They are reported through a
error function of type `void (*MIR_error_func_t) (MIR_context ctx, MIR_error_type_t
error_type, const char *message)`. The function is considered to
never return. To see all error types, please look at the
definition of error type `MIR_error_type_t` in file mir.h
* You can get and set up the current error function through API
functions `MIR_error_func_t MIR_get_error_func (MIR_context ctx)` and `MIR_set_error_func
(MIR_context ctx, MIR_error_func_t func)`.
* The default error function prints the message into stderr and call `exit (1)`
* MIR is pretty flexible and can describe complex insns, e.g. insns
whose all operands are memory. Sometimes you need a very simple
form of MIR representation. During load of module all its functions are simplified as much
as possible by adding new insns and registers resulting in a form in which:
* immediate, memory, reference operands can be used only in move insns
* memory have only base register (no displacement and index register)
* string and float immediate operands (if `mem_float_p`) are changed onto
references for new string and data items
* Before execution of MIR code (through interpreter or machine code generated by JIT),
you need to load and link it
* You can load MIR module through API function `MIR_load_module
(MIR_context ctx, MIR_module_t m)`. The function simplifies module code.
It also allocates the module data/bss
and makes visible the exported module items to other module
during subsequent linking. There is a guarantee that the
different data/bss items will be in adjacent memory if the
data/bss items go one after another and all the data/bss items
except the first one are anonymous (it means they have no name).
Such adjacent data/bss items are called a **section**.
Alignment of the section is malloc alignment. There are no any
memory space between data/bss in the section. If you need to
provide necessary alignment of a data/bss in the section you
should do it yourself by putting additional anonymous data/bss
before given data/bss if it is necessary. BSS memory is
initialized by zero and data memory is initialized by the
corresponding data. If there is already an exported item with
the same name, it will be not visible for linking anymore. Such
visibility mechanism permits usage of different versions of the
same function
* Reference data are initialized not during loading but during linking after
the referenced item address is known. The address is used for the data
initialization
* Expression data are also initialized not during loading but during linking after
all addresses are known. The expression function is evaluated by the interpreter
and its evaluation result is used for the data initialization. For example, if
you need to initialize data by item address plus offset you should use
an expression data
* MIR permits to use imported items not implemented in MIR, for
example to use C standard function `strcmp`. You need to inform
MIR about it. API function `MIR_load_external (MIR_context ctx, const char
*name, void *addr)` informs that imported items with given name
have given address (e.g. C function address or data)
* Imports/exports of modules loaded since the last link can be
linked through API function `MIR_link (MIR_context ctx, void (*set_interface) (MIR_item_t item),
void * (*import_resolver) (const char *))`
* `MIR_link` function inlines most `MIR_INLINE` calls
* `MIR_link` function also sets up call interface
* If you pass `MIR_set_interp_interface` to `MIR_link`, then
called functions from MIR code will be interpreted
* If you pass `MIR_set_gen_interface` to `MIR_link`, then
MIR-generator will generate machine code for all loaded MIR
functions and called functions from MIR code will execute the
machine code
* If you pass `MIR_set_lazy_gen_interface` to `MIR_link`, then
MIR-generator will generate machine code only on the first
function call and called functions from MIR code will execute
the machine code
* If you pass non-null `import_resolver` function, it will be
called for defining address for import without definition.
The function get the import name and return the address which
will be used for the import item. This function can be useful
for searching `dlopen` library symbols when use of
MIR_load_external is not convenient
# MIR code execution
* Linked MIR code can be executed by an **interpreter** or machine code generated by **MIR generator**
# MIR code interpretation
* The interpreter is an obligatory part of MIR API because it can be used during linking
* The interpreter is automatically initialized and finished with MIR API initialization and finishing
* The interpreter works with values represented by type `MIR_val_t` which is union
`union {..., int64_t i; uint64_t u; float f; double d; long double d;}`
* You can execute a MIR function code by API functions `void
MIR_interp (MIR_context ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs, ...)` and
`void MIR_interp_arr (MIR_context ctx, MIR_item_t func_item, MIR_val_t *results, size_t nargs,
MIR_val_t *vals)`
* The function results are returned through parameter `results`. You should pass
a container of enough size to return all function results.
* You can execute a MIR function code also through C function call
mechanism. First you need to setup the C function interface
through API function `MIR_set_interp_interface (MIR_context ctx, MIR_item_t
func_item)`. After that you can `func_item->addr` to call the
MIR function as usual C function
* C function interface is implemented by generation of machine
code specialized for MIR function. Therefore the interface
works only on the same targets as MIR generator
# MIR generator (file mir-gen.h)
* Before use of MIR generator you should initialize it by API function `MIR_gen_init (MIR_context ctx)`
* API function `MIR_gen_finish (MIR_context ctx)` should be called last after any generator usage.
It frees all internal generator data
* API function `void *MIR_gen (MIR_context ctx, MIR_item_t func_item)` generates machine code of given MIR function
and returns an address to call it. You can call the code as usual C function by using this address
as the called function address
* API function `void MIR_gen_set_debug_file (MIR_context_t ctx, FILE *f)` sets up MIR generator debug file to `f`.
If it is not NULL a lot of debugging and optimization information will be output to the file. It is useful mostly
for MIR developers
* API function `void MIR_gen_set_optimize_level (MIR_context_t ctx, unsigned int level)` sets up optimization
level for MIR generator:
* `0` means only register allocator and machine code generator work
* `1` means additional code selection task. On this level MIR generator creates more compact and faster
code than on zero level with practically on the same speed
* `2` means additionally common sub-expression elimination and sparse conditional constant propagation.
This is a default level. This level is valuable if you generate bad input MIR code with a lot redundancy
and constants. The generation speed on level `1` is about 50% faster than on level `2`
* `3` means additionally register renaming and loop invariant code motion. The generation speed
on level `2` is about 50% faster than on level `3`

@ -0,0 +1,23 @@
/* This file is a part of MIR project.
Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/
#include "../mirc.h"
#include "mirc-aarch64-linux.h"
static const char *standard_includes[] = {mirc, aarch64_mirc};
static const char *standard_include_dirs[] = {"include/mirc/", "include/mirc/aarch64/"};
#define MAX_ALIGNMENT 16
#define ADJUST_VAR_ALIGNMENT(c2m_ctx, align, type) \
aarch64_adjust_var_alignment (c2m_ctx, align, type)
static int aarch64_adjust_var_alignment (c2m_ctx_t c2m_ctx, int align, struct type *type) {
return align;
}
static int invalid_alignment (mir_llong align) {
return align != 0 && align != 1 && align != 2 && align != 4 && align != 8 && align != 16;
}

@ -0,0 +1,50 @@
/* This file is a part of MIR project.
Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/
#include <stdint.h>
#define MIR_CHAR_BIT 8
typedef int8_t mir_schar;
typedef int16_t mir_short;
typedef int32_t mir_int;
typedef int64_t mir_long;
typedef int64_t mir_llong;
#define MIR_SCHAR_MIN INT8_MIN
#define MIR_SCHAR_MAX INT8_MAX
#define MIR_SHORT_MIN INT16_MIN
#define MIR_SHORT_MAX INT16_MAX
#define MIR_INT_MIN INT32_MIN
#define MIR_INT_MAX INT32_MAX
#define MIR_LONG_MIN INT64_MIN
#define MIR_LONG_MAX INT64_MAX
#define MIR_LLONG_MIN INT64_MIN
#define MIR_LLONG_MAX INT64_MAX
typedef uint8_t mir_uchar;
typedef uint16_t mir_ushort;
typedef uint32_t mir_uint;
typedef uint64_t mir_ulong;
typedef uint64_t mir_ullong;
#define MIR_UCHAR_MAX UINT8_MAX
#define MIR_USHORT_MAX UINT16_MAX
#define MIR_UINT_MAX UINT32_MAX
#define MIR_ULONG_MAX UINT64_MAX
#define MIR_ULLONG_MAX UINT64_MAX
typedef mir_schar mir_char;
#define MIR_CHAR_MIN MIR_SCHAR_MIN
#define MIR_CHAR_MAX MIR_SCHAR_MAX
typedef float mir_float;
typedef double mir_double;
typedef long double mir_ldouble;
typedef uint8_t mir_bool;
typedef int64_t mir_ptrdiff_t;
typedef uint64_t mir_size_t;
#define MIR_SIZE_MAX UINT64_MAX

@ -0,0 +1,93 @@
/* This file is a part of MIR project.
Copyright (C) 2020 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/
static char aarch64_mirc[]
= "#define __aarch64__ 1\n"
"#define _LP64 1\n"
"#define __LP64__ 1\n"
"#define __ARM_ARCH 8\n"
"\n"
"#define __SIZEOF_DOUBLE__ 8\n"
"#define __SIZEOF_FLOAT__ 4\n"
"#define __SIZEOF_INT__ 4\n"
"#define __SIZEOF_LONG_DOUBLE__ 16\n"
"#define __SIZEOF_LONG_LONG__ 8\n"
"#define __SIZEOF_LONG__ 8\n"
"#define __SIZEOF_POINTER__ 8\n"
"#define __SIZEOF_PTRDIFF_T__ 8\n"
"#define __SIZEOF_SHORT__ 2\n"
"#define __SIZEOF_SIZE_T__ 8\n"
"\n"
"#define __BYTE_ORDER__ 1234\n"
"#define __ORDER_LITTLE_ENDIAN__ 1234\n"
"#define __ORDER_BIG_ENDIAN__ 4321\n"
"\n"
"/* Some GCC predefined macros: */\n"
"#define __SIZE_TYPE__ unsigned long\n"
"#define __PTRDIFF_TYPE__ long\n"
"#define __INTMAX_TYPE__ long\n"
"#define __UINTMAX_TYPE__ unsigned long\n"
"#define __INT8_TYPE__ signed char\n"
"#define __INT16_TYPE__ short\n"
"#define __INT32_TYPE__ int\n"
"#define __INT64_TYPE__ long\n"
"#define __UINT8_TYPE__ unsigned char\n"
"#define __UINT16_TYPE__ unsigned short\n"
"#define __UINT32_TYPE__ unsigned int\n"
"#define __UINT64_TYPE__ unsigned long\n"
"#define __INTPTR_TYPE__ long\n"
"#define __UINTPTR_TYPE__ unsigned long\n"
"\n"
"#define __CHAR_BIT__ 8\n"
"#define __INT8_MAX__ 127\n"
"#define __INT16_MAX__ 32767\n"
"#define __INT32_MAX__ 2147483647\n"
"#define __INT64_MAX__ 9223372036854775807l\n"
"#define __UINT8_MAX__ (__INT8_MAX__ * 2u + 1u)\n"
"#define __UINT16_MAX__ (__INT16_MAX__ * 2u + 1u)\n"
"#define __UINT32_MAX__ (__INT32_MAX__ * 2u + 1u)\n"
"#define __UINT64_MAX__ (__INT64_MAX__ * 2u + 1u)\n"
"#define __SCHAR_MAX__ __INT8_MAX__\n"
"#define __SHRT_MAX__ __INT16_MAX__\n"
"#define __INT_MAX__ __INT32_MAX__\n"
"#define __LONG_MAX__ __INT64_MAX__\n"
"#define __LONG_LONG_MAX__ __INT64_MAX__\n"
"#define __SIZE_MAX__ __UINT64_MAX__\n"
"#define __PTRDIFF_MAX__ __INT64_MAX__\n"
"#define __INTMAX_MAX__ __INT64_MAX__\n"
"#define __UINTMAX_MAX__ __UINT64_MAX__\n"
"#define __INTPTR_MAX__ __INT64_MAX__\n"
"#define __UINTPTR_MAX__ __UINT64_MAX__\n"
"\n"
"#define __FLT_MIN_EXP__ (-125)\n"
"#define __FLT_MAX_EXP__ 128\n"
"#define __FLT_DIG__ 6\n"
"#define __FLT_DECIMAL_DIG__ 9\n"
"#define __FLT_MANT_DIG__ 24\n"
"#define __FLT_MIN__ 1.17549435082228750796873653722224568e-38F\n"
"#define __FLT_MAX__ 3.40282346638528859811704183484516925e+38F\n"
"#define __FLT_EPSILON__ 1.19209289550781250000000000000000000e-7F\n"
"\n"
"#define __DBL_MIN_EXP__ (-1021)\n"
"#define __DBL_MAX_EXP__ 1024\n"
"#define __DBL_DIG__ 15\n"
"#define __DBL_DECIMAL_DIG__ 17\n"
"#define __DBL_MANT_DIG__ 53\n"
"#define __DBL_MAX__ ((double) 1.79769313486231570814527423731704357e+308L)\n"
"#define __DBL_MIN__ ((double) 2.22507385850720138309023271733240406e-308L)\n"
"#define __DBL_EPSILON__ ((double) 2.22044604925031308084726333618164062e-16L)\n"
"\n"
"typedef unsigned short char16_t;\n"
"typedef unsigned int char32_t;\n"
"\n"
#if defined(__linux__)
"#define __gnu_linux__ 1\n"
"#define __linux 1\n"
"#define __linux__ 1\n"
"#define linux 1\n"
"#define __unix 1\n"
"#define __unix__ 1\n"
#endif
"\n"
"void *alloca (unsigned long);\n";

@ -25,12 +25,23 @@
#include "c2mir.h"
#ifdef __x86_64__
#if defined(__x86_64__)
#include "x86_64/cx86_64.h"
#elif defined(__aarch64__)
#include "aarch64/caarch64.h"
#elif defined(__PPC64__)
#include "ppc64/cppc64.h"
#else
#error "undefined or unsupported generation target for C"
#endif
#define SWAP(a1, a2, t) \
do { \
t = a1; \
a1 = a2; \
a2 = t; \
} while (0)
typedef enum {
C_alloc_error,
C_unfinished_comment,
@ -300,8 +311,12 @@ static mir_size_t raw_type_size (c2m_ctx_t c2m_ctx, struct type *type) {
return type->raw_size;
}
#ifdef __x86_64__
#if defined(__x86_64__)
#include "x86_64/cx86_64-code.c"
#elif defined(__aarch64__)
#include "aarch64/caarch64-code.c"
#elif defined(__PPC64__)
#include "ppc64/cppc64-code.c"
#else
#error "undefined or unsupported generation target for C"
#endif
@ -332,18 +347,18 @@ static int char_is_signed_p (void) { return MIR_CHAR_MAX == MIR_SCHAR_MAX; }
enum str_flag { FLAG_EXT = 1, FLAG_C89, FLAG_EXT89 };
static int str_eq (str_t str1, str_t str2) {
static int str_eq (str_t str1, str_t str2, void *arg) {
return str1.len == str2.len && memcmp (str1.s, str2.s, str1.len) == 0;
}
static htab_hash_t str_hash (str_t str) { return mir_hash (str.s, str.len, 0x42); }
static int str_key_eq (str_t str1, str_t str2) { return str1.key == str2.key; }
static htab_hash_t str_key_hash (str_t str) { return mir_hash64 (str.key, 0x24); }
static htab_hash_t str_hash (str_t str, void *arg) { return mir_hash (str.s, str.len, 0x42); }
static int str_key_eq (str_t str1, str_t str2, void *arg) { return str1.key == str2.key; }
static htab_hash_t str_key_hash (str_t str, void *arg) { return mir_hash64 (str.key, 0x24); }
static str_t uniq_cstr (c2m_ctx_t c2m_ctx, const char *str);
static void str_init (c2m_ctx_t c2m_ctx) {
HTAB_CREATE (str_t, str_tab, 1000, str_hash, str_eq);
HTAB_CREATE (str_t, str_key_tab, 200, str_key_hash, str_key_eq);
HTAB_CREATE (str_t, str_tab, 1000, str_hash, str_eq, NULL);
HTAB_CREATE (str_t, str_key_tab, 200, str_key_hash, str_key_eq, NULL);
empty_str = uniq_cstr (c2m_ctx, "");
}
@ -927,11 +942,7 @@ static char *reverse (VARR (char) * v) {
int i, j, temp, last = (int) VARR_LENGTH (char, v) - 1;
if (last >= 0 && addr[last] == '\0') last--;
for (i = last, j = 0; i > j; i--, j++) {
temp = addr[i];
addr[i] = addr[j];
addr[j] = temp;
}
for (i = last, j = 0; i > j; i--, j++) SWAP (addr[i], addr[j], temp);
return addr;
}
@ -1804,11 +1815,11 @@ static void add_to_temp_string (c2m_ctx_t c2m_ctx, const char *str) {
VARR_PUSH (char, temp_string, '\0');
}
static int macro_eq (macro_t macro1, macro_t macro2) {
static int macro_eq (macro_t macro1, macro_t macro2, void *arg) {
return macro1->id->repr == macro2->id->repr;
}
static htab_hash_t macro_hash (macro_t macro) {
static htab_hash_t macro_hash (macro_t macro, void *arg) {
return mir_hash (macro->id->repr, strlen (macro->id->repr), 0x42);
}
@ -1823,7 +1834,7 @@ static void init_macros (c2m_ctx_t c2m_ctx) {
VARR (token_t) * params;
VARR_CREATE (macro_t, macros, 2048);
HTAB_CREATE (macro_t, macro_tab, 2048, macro_hash, macro_eq);
HTAB_CREATE (macro_t, macro_tab, 2048, macro_hash, macro_eq, NULL);
/* Standard macros : */
new_std_macro (c2m_ctx, "__DATE__");
new_std_macro (c2m_ctx, "__TIME__");
@ -3529,17 +3540,19 @@ typedef struct {
DEF_HTAB (tpname_t);
static HTAB (tpname_t) * tpname_tab;
static int tpname_eq (tpname_t tpname1, tpname_t tpname2) {
static int tpname_eq (tpname_t tpname1, tpname_t tpname2, void *arg) {
return tpname1.id->u.s.s == tpname2.id->u.s.s && tpname1.scope == tpname2.scope;
}
static htab_hash_t tpname_hash (tpname_t tpname) {
static htab_hash_t tpname_hash (tpname_t tpname, void *arg) {
return (mir_hash_finish (
mir_hash_step (mir_hash_step (mir_hash_init (0x42), (uint64_t) tpname.id->u.s.s),
(uint64_t) tpname.scope)));
}
static void tpname_init (void) { HTAB_CREATE (tpname_t, tpname_tab, 1000, tpname_hash, tpname_eq); }
static void tpname_init (void) {
HTAB_CREATE (tpname_t, tpname_tab, 1000, tpname_hash, tpname_eq, NULL);
}
static int tpname_find (node_t id, node_t scope, tpname_t *res) {
int found_p;
@ -5013,21 +5026,22 @@ struct check_ctx {
static int supported_alignment_p (mir_llong align) { return TRUE; } // ???
static int symbol_eq (symbol_t s1, symbol_t s2) {
static int symbol_eq (symbol_t s1, symbol_t s2, void *arg) {
return s1.mode == s2.mode && s1.id->u.s.s == s2.id->u.s.s && s1.scope == s2.scope;
}
static htab_hash_t symbol_hash (symbol_t s) {
static htab_hash_t symbol_hash (symbol_t s, void *arg) {
return (mir_hash_finish (
mir_hash_step (mir_hash_step (mir_hash_step (mir_hash_init (0x42), (uint64_t) s.mode),
(uint64_t) s.id->u.s.s),
(uint64_t) s.scope)));
}
static void symbol_clear (symbol_t sym) { VARR_DESTROY (node_t, sym.defs); }
static void symbol_clear (symbol_t sym, void *arg) { VARR_DESTROY (node_t, sym.defs); }
static void symbol_init (c2m_ctx_t c2m_ctx) {
HTAB_CREATE_WITH_FREE_FUNC (symbol_t, symbol_tab, 5000, symbol_hash, symbol_eq, symbol_clear);
HTAB_CREATE_WITH_FREE_FUNC (symbol_t, symbol_tab, 5000, symbol_hash, symbol_eq, symbol_clear,
NULL);
}
static int symbol_find (c2m_ctx_t c2m_ctx, enum symbol_mode mode, node_t id, node_t scope,
@ -5183,13 +5197,6 @@ static struct type integer_promotion (const struct type *type) {
return res;
}
#define SWAP(a1, a2, t) \
do { \
t = a1; \
a1 = a2; \
a2 = t; \
} while (0)
static struct type arithmetic_conversion (const struct type *type1, const struct type *type2) {
struct type res, t1, t2;
@ -5884,10 +5891,8 @@ static node_t process_tag (c2m_ctx_t c2m_ctx, node_t r, node_t id, node_t decl_l
error (c2m_ctx, id->pos, "tag %s redeclaration", id->u.s.s);
} else {
if (decl_list->code != N_IGNORE) { /* swap decl lists */
DLIST (node_t) temp = r->ops;
r->ops = sym.def_node->ops;
sym.def_node->ops = temp;
DLIST (node_t) temp;
SWAP (r->ops, sym.def_node->ops, temp);
}
r = sym.def_node;
}
@ -6302,9 +6307,13 @@ static void adjust_param_type (c2m_ctx_t c2m_ctx, struct type **type_ptr) {
if (type->mode == TM_ARR) { // ??? static, old type qual
arr_type = type->u.arr_type;
type->mode = TM_PTR;
type->u.ptr_type = arr_type->el_type;
type->type_qual = arr_type->ind_type_qual;
par_type = create_type (c2m_ctx, NULL);
par_type->mode = TM_PTR;
par_type->pos_node = type->pos_node;
par_type->u.ptr_type = arr_type->el_type;
par_type->type_qual = arr_type->ind_type_qual;
par_type->arr_type = type;
*type_ptr = type = par_type;
make_type_complete (c2m_ctx, type);
} else if (type->mode == TM_FUNC) {
par_type = create_type (c2m_ctx, NULL);
@ -7486,7 +7495,7 @@ static struct expr *check_assign_op (c2m_ctx_t c2m_ctx, node_t r, node_t op1, no
return e;
}
static unsigned case_hash (case_t el) {
static unsigned case_hash (case_t el, void *arg) {
node_t case_expr = NL_HEAD (el->case_node->ops);
struct expr *expr;
@ -7498,7 +7507,7 @@ static unsigned case_hash (case_t el) {
return mir_hash (&expr->u.u_val, sizeof (expr->u.u_val), 0x42);
}
static int case_eq (case_t el1, case_t el2) {
static int case_eq (case_t el1, case_t el2, void *arg) {
node_t case_expr1 = NL_HEAD (el1->case_node->ops);
node_t case_expr2 = NL_HEAD (el2->case_node->ops);
struct expr *expr1, *expr2;
@ -9050,7 +9059,7 @@ static void context_init (MIR_context_t ctx) {
symbol_init (c2m_ctx);
in_params_p = FALSE;
curr_unnamed_anon_struct_union_member = NULL;
HTAB_CREATE (case_t, case_tab, 100, case_hash, case_eq);
HTAB_CREATE (case_t, case_tab, 100, case_hash, case_eq, NULL);
VARR_CREATE (decl_t, func_decls_for_allocation, 1024);
}
@ -9162,14 +9171,18 @@ static op_t new_op (decl_t decl, MIR_op_t mir_op) {
return res;
}
static htab_hash_t reg_var_hash (reg_var_t r) { return mir_hash (r.name, strlen (r.name), 0x42); }
static int reg_var_eq (reg_var_t r1, reg_var_t r2) { return strcmp (r1.name, r2.name) == 0; }
static htab_hash_t reg_var_hash (reg_var_t r, void *arg) {
return mir_hash (r.name, strlen (r.name), 0x42);
}
static int reg_var_eq (reg_var_t r1, reg_var_t r2, void *arg) {
return strcmp (r1.name, r2.name) == 0;
}
static void init_reg_vars (MIR_context_t ctx) {
c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
reg_free_mark = 0;
HTAB_CREATE (reg_var_t, reg_var_tab, 128, reg_var_hash, reg_var_eq);
HTAB_CREATE (reg_var_t, reg_var_tab, 128, reg_var_hash, reg_var_eq, NULL);
}
static void finish_curr_func_reg_vars (MIR_context_t ctx) {
@ -9328,6 +9341,29 @@ static void emit_insn (MIR_context_t ctx, MIR_insn_t insn) {
MIR_append_insn (ctx, curr_func, insn);
}
/* BCOND T, L1; JMP L2; L1: => BNCOND T, L2; L1:
JMP L; L: => L: */
static void emit_label_insn_opt (MIR_context_t ctx, MIR_insn_t insn) {
c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
MIR_insn_code_t rev_code;
MIR_insn_t last, prev;
assert (insn->code == MIR_LABEL);
if ((last = DLIST_TAIL (MIR_insn_t, curr_func->u.func->insns)) != NULL
&& (prev = DLIST_PREV (MIR_insn_t, last)) != NULL && last->code == MIR_JMP
&& (rev_code = MIR_reverse_branch_code (prev->code)) != MIR_INSN_BOUND
&& prev->ops[0].mode == MIR_OP_LABEL && prev->ops[0].u.label == insn) {
prev->ops[0] = last->ops[0];
prev->code = rev_code;
MIR_remove_insn (ctx, curr_func, last);
}
if ((last = DLIST_TAIL (MIR_insn_t, curr_func->u.func->insns)) != NULL && last->code == MIR_JMP
&& last->ops[0].mode == MIR_OP_LABEL && last->ops[0].u.label == insn) {
MIR_remove_insn (ctx, curr_func, last);
}
MIR_append_insn (ctx, curr_func, insn);
}
/* Change t1 = expr; v = t1 to v = expr */
static void emit_insn_opt (MIR_context_t ctx, MIR_insn_t insn) {
c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
@ -9589,16 +9625,22 @@ static op_t mem_to_address (MIR_context_t ctx, op_t mem) {
static op_t force_val (MIR_context_t ctx, op_t op, int arr_p) {
op_t temp_op;
int sh;
c2m_ctx_t c2m_ctx;
if (arr_p && op.mir_op.mode == MIR_OP_MEM) {
/* an array -- use a pointer: */
return mem_to_address (ctx, op);
}
if (op.decl == NULL || op.decl->bit_offset < 0) return op;
c2m_ctx = *c2m_ctx_loc (ctx);
assert (op.mir_op.mode == MIR_OP_MEM);
temp_op = get_new_temp (ctx, MIR_T_I64);
emit2 (ctx, MIR_MOV, temp_op.mir_op, op.mir_op);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
sh = 64 - op.decl->bit_offset - op.decl->width;
#else
sh = op.decl->bit_offset + (64 - type_size (c2m_ctx, op.decl->decl_spec.type) * MIR_CHAR_BIT);
#endif
if (sh != 0) emit3 (ctx, MIR_LSH, temp_op.mir_op, temp_op.mir_op, MIR_new_int_op (ctx, sh));
emit3 (ctx,
signed_integer_type_p (op.decl->decl_spec.type)
@ -9820,7 +9862,7 @@ static void emit_label (MIR_context_t ctx, node_t r) {
assert (labels->code == N_LIST);
if (NL_HEAD (labels->ops) == NULL) return;
if (labels->attr == NULL) labels->attr = MIR_new_label (ctx);
emit_insn (ctx, labels->attr);
emit_label_insn_opt (ctx, labels->attr);
}
static MIR_label_t get_label (MIR_context_t ctx, node_t target) {
@ -9877,7 +9919,7 @@ static void block_move (MIR_context_t ctx, op_t var, op_t val, mir_size_t size)
emit2 (ctx, MIR_MOV, index.mir_op, MIR_new_int_op (ctx, size));
val = modify_for_block_move (ctx, val, index);
var = modify_for_block_move (ctx, var, index);
emit_insn (ctx, repeat_label);
emit_label_insn_opt (ctx, repeat_label);
emit3 (ctx, MIR_SUB, index.mir_op, index.mir_op, one_op.mir_op);
assert (var.mir_op.mode == MIR_OP_MEM && val.mir_op.mode == MIR_OP_MEM);
val.mir_op.u.mem.type = var.mir_op.u.mem.type = MIR_T_I8;
@ -10234,10 +10276,16 @@ static void emit_scalar_assign (MIR_context_t ctx, op_t var, op_t *val, MIR_type
int width = var.decl->width;
uint64_t mask, mask2;
op_t temp_op1, temp_op2, temp_op3, temp_op4;
c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
size_t size = type_size (c2m_ctx, var.decl->decl_spec.type) * MIR_CHAR_BIT;
assert (var.mir_op.mode == MIR_OP_MEM);
mask = 0xffffffffffffffff >> (64 - width);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
mask2 = ~(mask << var.decl->bit_offset);
#else
mask2 = ~(mask << (size - var.decl->bit_offset - width));
#endif
temp_op1 = get_new_temp (ctx, MIR_T_I64);
temp_op2 = get_new_temp (ctx, MIR_T_I64);
temp_op3 = get_new_temp (ctx, MIR_T_I64);
@ -10255,12 +10303,21 @@ static void emit_scalar_assign (MIR_context_t ctx, op_t var, op_t *val, MIR_type
}
emit3 (ctx, MIR_AND, temp_op3.mir_op, temp_op1.mir_op, MIR_new_uint_op (ctx, mask));
temp_op4 = get_new_temp (ctx, MIR_T_I64);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
if (var.decl->bit_offset == 0) {
temp_op4 = temp_op3;
} else {
emit3 (ctx, MIR_LSH, temp_op4.mir_op, temp_op3.mir_op,
MIR_new_int_op (ctx, var.decl->bit_offset));
}
#else
if (size - var.decl->bit_offset - width == 0) {
temp_op4 = temp_op3;
} else {
emit3 (ctx, MIR_LSH, temp_op4.mir_op, temp_op3.mir_op,
MIR_new_int_op (ctx, size - var.decl->bit_offset - width));
}
#endif
if (!ignore_others_p) {
emit3 (ctx, MIR_OR, temp_op4.mir_op, temp_op4.mir_op, temp_op2.mir_op);
}
@ -10268,19 +10325,29 @@ static void emit_scalar_assign (MIR_context_t ctx, op_t var, op_t *val, MIR_type
}
}
static void add_bit_field (uint64_t *u, uint64_t v, decl_t member_decl) {
static void add_bit_field (MIR_context_t ctx, uint64_t *u, uint64_t v, decl_t member_decl) {
uint64_t mask, mask2;
int bit_offset = member_decl->bit_offset, width = member_decl->width;
c2m_ctx_t c2m_ctx = *c2m_ctx_loc (ctx);
size_t size = type_size (c2m_ctx, member_decl->decl_spec.type) * MIR_CHAR_BIT;
mask = 0xffffffffffffffff >> (64 - width);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
mask2 = ~(mask << bit_offset);
#else
mask2 = ~(mask << (size - bit_offset - width));
#endif
*u &= mask2;
v &= mask;
if (signed_integer_type_p (member_decl->decl_spec.type)) {
v <<= (64 - width);
v = (int64_t) v >> (64 - width);
}
v &= mask;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
v <<= bit_offset;
#else
v <<= size - bit_offset - width;
#endif
*u |= v;
}
@ -10400,14 +10467,14 @@ static void gen_initializer (MIR_context_t ctx, size_t init_start, op_t var,
uint64_t u = 0;
assert (val.mir_op.mode == MIR_OP_INT || val.mir_op.mode == MIR_OP_UINT);
add_bit_field (&u, val.mir_op.u.u, init_el.member_decl);
add_bit_field (ctx, &u, val.mir_op.u.u, init_el.member_decl);
for (; i + 1 < VARR_LENGTH (init_el_t, init_els); i++, init_el = next_init_el) {
next_init_el = VARR_GET (init_el_t, init_els, i + 1);
if (next_init_el.offset != init_el.offset) break;
if (next_init_el.member_decl->bit_offset == init_el.member_decl->bit_offset) continue;
val = gen (ctx, next_init_el.init, NULL, NULL, TRUE, NULL);
assert (val.mir_op.mode == MIR_OP_INT || val.mir_op.mode == MIR_OP_UINT);
add_bit_field (&u, val.mir_op.u.u, next_init_el.member_decl);
add_bit_field (ctx, &u, val.mir_op.u.u, next_init_el.member_decl);
}
val.mir_op.u.u = u;
}
@ -10589,19 +10656,19 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
assert (t_label != NULL && f_label != NULL);
gen (ctx, NL_HEAD (r->ops), r->code == N_ANDAND ? temp_label : t_label,
r->code == N_ANDAND ? f_label : temp_label, FALSE, NULL);
emit_insn (ctx, temp_label);
emit_label_insn_opt (ctx, temp_label);
gen (ctx, NL_EL (r->ops, 1), t_label, f_label, FALSE, NULL);
if (make_val_p) {
MIR_label_t end_label = MIR_new_label (ctx);
type = ((struct expr *) r->attr)->type;
res = get_new_temp (ctx, get_mir_type (ctx, type));
emit_insn (ctx, t_label);
emit_label_insn_opt (ctx, t_label);
emit2 (ctx, MIR_MOV, res.mir_op, one_op.mir_op);
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
emit_insn (ctx, f_label);
emit_label_insn_opt (ctx, f_label);
emit2 (ctx, MIR_MOV, res.mir_op, zero_op.mir_op);
emit_insn (ctx, end_label);
emit_label_insn_opt (ctx, end_label);
}
true_label = false_label = NULL;
} else if (true_label != NULL) {
@ -10633,12 +10700,12 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
res = get_new_temp (ctx, MIR_T_I64);
gen (ctx, NL_HEAD (r->ops), t_label, f_label, FALSE, NULL);
emit_insn (ctx, t_label);
emit_label_insn_opt (ctx, t_label);
emit2 (ctx, MIR_MOV, res.mir_op, zero_op.mir_op);
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
emit_insn (ctx, f_label);
emit_label_insn_opt (ctx, f_label);
emit2 (ctx, MIR_MOV, res.mir_op, one_op.mir_op);
emit_insn (ctx, end_label);
emit_label_insn_opt (ctx, end_label);
}
break;
case N_ADD:
@ -10705,8 +10772,10 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
t = get_mir_type (ctx, type);
var = gen (ctx, NL_HEAD (r->ops), NULL, NULL, FALSE, NULL);
op1 = force_val (ctx, var, FALSE);
res = get_new_temp (ctx, t);
emit2 (ctx, tp_mov (t), res.mir_op, op1.mir_op);
if (val_p || true_label != NULL) {
res = get_new_temp (ctx, t);
emit2 (ctx, tp_mov (t), res.mir_op, op1.mir_op);
}
val = promote (ctx, op1, t, TRUE);
op2 = promote (ctx,
type->mode != TM_PTR
@ -10756,7 +10825,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
t = get_op_type (ctx, var);
op2
= gen (ctx, NL_EL (r->ops, 1), NULL, NULL, t != MIR_T_UNDEF, t != MIR_T_UNDEF ? NULL : &var);
if (t == MIR_T_UNDEF) {
if ((!val_p && true_label == NULL) || t == MIR_T_UNDEF) {
res = var;
val = op2;
} else {
@ -10769,7 +10838,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
assert (t != MIR_T_UNDEF);
val = cast (ctx, val, get_mir_type (ctx, ((struct expr *) r->attr)->type), FALSE);
emit_scalar_assign (ctx, var, &val, t, FALSE);
if (r->code != N_POST_INC && r->code != N_POST_DEC)
if ((val_p || true_label != NULL) && r->code != N_POST_INC && r->code != N_POST_DEC)
emit2_noopt (ctx, tp_mov (t), res.mir_op, val.mir_op);
} else { /* block move */
mir_size_t size = type_size (c2m_ctx, ((struct expr *) r->attr)->type);
@ -10929,7 +10998,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
if (!void_p) t = get_mir_type (ctx, type);
gen (ctx, cond, true_label, false_label, FALSE, NULL);
emit_insn (ctx, true_label);
emit_label_insn_opt (ctx, true_label);
op1 = gen (ctx, true_expr, NULL, NULL, !void_p && t != MIR_T_UNDEF, NULL);
if (!void_p) {
if (t != MIR_T_UNDEF) {
@ -10945,7 +11014,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
}
}
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
emit_insn (ctx, false_label);
emit_label_insn_opt (ctx, false_label);
op1 = gen (ctx, false_expr, NULL, NULL, !void_p && t != MIR_T_UNDEF, NULL);
if (!void_p) {
if (t != MIR_T_UNDEF) {
@ -10958,7 +11027,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
block_move (ctx, res, op1, size);
}
}
emit_insn (ctx, end_label);
emit_label_insn_opt (ctx, end_label);
break;
}
case N_ALIGNOF:
@ -11317,12 +11386,12 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
assert (false_label == NULL && true_label == NULL);
emit_label (ctx, r);
top_gen (ctx, expr, if_label, else_label);
emit_insn (ctx, if_label);
emit_label_insn_opt (ctx, if_label);
gen (ctx, if_stmt, NULL, NULL, FALSE, NULL);
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, end_label));
emit_insn (ctx, else_label);
emit_label_insn_opt (ctx, else_label);
gen (ctx, else_stmt, NULL, NULL, FALSE, NULL);
emit_insn (ctx, end_label);
emit_label_insn_opt (ctx, end_label);
break;
}
case N_SWITCH: {
@ -11421,14 +11490,14 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
emit3 (ctx, short_p ? MIR_UBLES : MIR_UBLE, MIR_new_label_op (ctx, label),
case_reg_op.mir_op, MIR_new_int_op (ctx, e2->u.i_val));
}
emit_insn (ctx, cont_label);
emit_label_insn_opt (ctx, cont_label);
}
}
if (c == NULL) /* no default: */
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, break_label));
}
top_gen (ctx, stmt, NULL, NULL);
emit_insn (ctx, break_label);
emit_label_insn_opt (ctx, break_label);
break_label = saved_break_label;
break;
}
@ -11442,11 +11511,11 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
continue_label = MIR_new_label (ctx);
break_label = MIR_new_label (ctx);
emit_label (ctx, r);
emit_insn (ctx, start_label);
emit_label_insn_opt (ctx, start_label);
gen (ctx, stmt, NULL, NULL, FALSE, NULL);
emit_insn (ctx, continue_label);
emit_label_insn_opt (ctx, continue_label);
top_gen (ctx, expr, start_label, break_label);
emit_insn (ctx, break_label);
emit_label_insn_opt (ctx, break_label);
continue_label = saved_continue_label;
break_label = saved_break_label;
break;
@ -11461,12 +11530,12 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
continue_label = MIR_new_label (ctx);
break_label = MIR_new_label (ctx);
emit_label (ctx, r);
emit_insn (ctx, continue_label);
emit_label_insn_opt (ctx, continue_label);
top_gen (ctx, expr, stmt_label, break_label);
emit_insn (ctx, stmt_label);
emit_label_insn_opt (ctx, stmt_label);
gen (ctx, stmt, NULL, NULL, FALSE, NULL);
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, continue_label));
emit_insn (ctx, break_label);
top_gen (ctx, expr, stmt_label, break_label);
emit_label_insn_opt (ctx, break_label);
continue_label = saved_continue_label;
break_label = saved_break_label;
break;
@ -11476,7 +11545,7 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
node_t cond = NL_NEXT (init);
node_t iter = NL_NEXT (cond);
node_t stmt = NL_NEXT (iter);
MIR_label_t start_label = MIR_new_label (ctx), stmt_label = MIR_new_label (ctx);
MIR_label_t stmt_label = MIR_new_label (ctx);
MIR_label_t saved_continue_label = continue_label, saved_break_label = break_label;
assert (false_label == NULL && true_label == NULL);
@ -11484,15 +11553,18 @@ static op_t gen (MIR_context_t ctx, node_t r, MIR_label_t true_label, MIR_label_
break_label = MIR_new_label (ctx);
emit_label (ctx, r);
top_gen (ctx, init, NULL, NULL);
emit_insn (ctx, start_label);
if (cond->code != N_IGNORE) /* non-empty condition: */
top_gen (ctx, cond, stmt_label, break_label);
emit_insn (ctx, stmt_label);
emit_label_insn_opt (ctx, stmt_label);
gen (ctx, stmt, NULL, NULL, FALSE, NULL);
emit_insn (ctx, continue_label);
emit_label_insn_opt (ctx, continue_label);
top_gen (ctx, iter, NULL, NULL);
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, start_label));
emit_insn (ctx, break_label);
if (cond->code == N_IGNORE) { /* empty condition: */
emit1 (ctx, MIR_JMP, MIR_new_label_op (ctx, stmt_label));