You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ravi/ravicomp/src/linearizer.c

2655 lines
83 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/******************************************************************************
* Copyright (C) 2020-2021 Dibyendu Majumdar
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
******************************************************************************/
/*
This file contains the Linearizer. The goal of the Linearizer is
generate a linear intermediate representation (IR) from the AST
suitable for further analysis.
The linear IR is organized in basic blocks.
Each proc has an entry block and exit block. Additional blocks
are created as necessary.
Each basic block contains a sequence of instructions.
The final instruction of a block must always be a branch instruction.
*/
#include "parser.h"
#include "linearizer.h"
#include "fnv_hash.h"
#include "ptrlist.h"
#include "graph.h"
#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#ifndef _WIN32
#include <alloca.h>
#include <unistd.h>
#else
#include <io.h>
#include <malloc.h>
#endif
static void handle_error(CompilerState *container, const char *msg)
{
// TODO source and line number
raviX_buffer_add_string(&container->error_message, msg);
longjmp(container->env, 1);
}
static Pseudo *linearize_expression(Proc *proc, AstNode *expr);
static BasicBlock *create_block(Proc *proc);
static void start_block(Proc *proc, BasicBlock *bb);
static void linearize_statement(Proc *proc, AstNode *node);
static void linearize_statement_list(Proc *proc, AstNodeList *list);
static void start_scope(LinearizerState *linearizer, Proc *proc, Scope *scope);
static void end_scope(LinearizerState *linearizer, Proc *proc);
static void instruct_br(Proc *proc, Pseudo *pseudo);
static bool is_block_terminated(BasicBlock *block);
static Pseudo *instruct_move(Proc *proc, enum opcode op, Pseudo *target, Pseudo *src);
static void linearize_function(LinearizerState *linearizer);
static Instruction *allocate_instruction(Proc *proc, enum opcode op);
static void free_temp_pseudo(Proc *proc, Pseudo *pseudo, bool free_temp_pseudo);
/**
* Allocates a register by reusing a free'd register if possible otherwise
* allocating a new one
*/
static inline unsigned allocate_register(PseudoGenerator *generator)
{
if (generator->free_pos > 0) {
return generator->free_regs[--generator->free_pos];
}
return generator->next_reg++;
}
/**
* Puts a register in the free list (must not already have been put there).
*/
static inline void free_register(Proc *proc, PseudoGenerator *generator, unsigned reg)
{
if (generator->free_pos == (sizeof generator->free_regs / sizeof generator->free_regs[0])) {
/* TODO proper error handling */
handle_error(proc->linearizer->ast_container, "Out of register space\n");
return;
}
// Debug check - ensure register being freed hasn't already been freed
for (int i = 0; i < generator->free_pos; i++) {
assert(generator->free_regs[i] != reg);
}
generator->free_regs[generator->free_pos++] = (uint8_t)reg;
}
/* Linearizer initialization */
LinearizerState *raviX_init_linearizer(CompilerState *container)
{
LinearizerState *linearizer = (LinearizerState *)raviX_calloc(1, sizeof(LinearizerState));
linearizer->ast_container = container;
raviX_allocator_init(&linearizer->instruction_allocator, "instruction_allocator", sizeof(Instruction),
sizeof(double), sizeof(Instruction) * 128);
raviX_allocator_init(&linearizer->ptrlist_allocator, "ptrlist_allocator", sizeof(PtrList),
sizeof(double), sizeof(PtrList) * 64);
raviX_allocator_init(&linearizer->pseudo_allocator, "pseudo_allocator", sizeof(Pseudo), sizeof(double),
sizeof(Pseudo) * 128);
raviX_allocator_init(&linearizer->basic_block_allocator, "basic_block_allocator", sizeof(BasicBlock),
sizeof(double), sizeof(BasicBlock) * 32);
raviX_allocator_init(&linearizer->proc_allocator, "proc_allocator", sizeof(Proc), sizeof(double),
sizeof(Proc) * 32);
raviX_allocator_init(&linearizer->unsized_allocator, "unsized_allocator", 0, sizeof(double), CHUNK);
raviX_allocator_init(&linearizer->constant_allocator, "constant_allocator", sizeof(Constant),
sizeof(double), sizeof(Constant) * 64);
linearizer->proc_id = 0;
return linearizer;
}
void raviX_destroy_linearizer(LinearizerState *linearizer)
{
if (linearizer == NULL)
return;
Proc *proc;
FOR_EACH_PTR(linearizer->all_procs, Proc, proc)
{
if (proc->constants)
raviX_set_destroy(proc->constants, NULL);
if (proc->cfg)
raviX_destroy_graph(proc->cfg);
}
END_FOR_EACH_PTR(proc)
raviX_allocator_destroy(&linearizer->instruction_allocator);
raviX_allocator_destroy(&linearizer->ptrlist_allocator);
raviX_allocator_destroy(&linearizer->pseudo_allocator);
raviX_allocator_destroy(&linearizer->basic_block_allocator);
raviX_allocator_destroy(&linearizer->proc_allocator);
raviX_allocator_destroy(&linearizer->unsized_allocator);
raviX_allocator_destroy(&linearizer->constant_allocator);
raviX_free(linearizer);
}
/**
* We assume strings are all interned and can be compared by
* address. Return true if values match else false.
*/
static int compare_constants(const void *a, const void *b)
{
const Constant *c1 = (const Constant *)a;
const Constant *c2 = (const Constant *)b;
if (c1->type != c2->type)
return 0;
if (c1->type == RAVI_TNUMINT)
return c1->i == c2->i;
else if (c1->type == RAVI_TNUMFLT)
return c1->n == c2->n;
else
return c1->s == c2->s;
}
/**
* Hashes a constant
*/
static uint32_t hash_constant(const void *c)
{
const Constant *c1 = (const Constant *)c;
if (c1->type == RAVI_TNUMINT)
return (uint32_t)c1->i;
else if (c1->type == RAVI_TNUMFLT)
return (uint32_t)c1->n; // FIXME maybe use Lua's hash gen
else
return (uint32_t)c1->s->hash;
}
/**
* Adds a constant to the proc's constant table. The constant is also assigned a
* pseudo register.
*/
static const Constant *add_constant(Proc *proc, const Constant *c)
{
SetEntry *entry = raviX_set_search(proc->constants, c);
if (entry == NULL) {
int reg = 0;
/* Assign each type of constant a different range so that if backend
* doesn't need to emit the regnum for a particular type it can do so.
* If backend needs to emit all constants then 2 of the 3 ranges can
* easily adjusted.
*/
switch(c->type) {
case RAVI_TNUMINT:
reg = proc->num_intconstants++;
break;
case RAVI_TNUMFLT:
reg = proc->num_fltconstants++;
break;
default:
assert(c->type == RAVI_TSTRING);
reg = proc->num_strconstants++;
break;
}
Constant *c1 = (Constant *) raviX_allocator_allocate(&proc->linearizer->constant_allocator, 0);
assert(c1); // FIXME
memcpy(c1, c, sizeof(Constant));
c1->index = reg;
raviX_set_add(proc->constants, c1);
// printf("Created new constant of type %d and assigned reg %d\n", c->type, reg);
return c1;
} else {
const Constant *c1 = (Constant *) entry->key;
// printf("Found constant at reg %d\n", c1->index);
return c1;
}
}
/**
* Allocates and adds a constant to the Proc's constants table.
* Input is expected to be EXPR_LITERAL
*/
static const Constant *allocate_constant(Proc *proc, AstNode *node)
{
assert(node->type == EXPR_LITERAL);
Constant c = {.type = (uint16_t) node->literal_expr.type.type_code};
assert(c.type == node->literal_expr.type.type_code);
if (c.type == RAVI_TNUMINT)
c.i = node->literal_expr.u.i;
else if (c.type == RAVI_TNUMFLT)
c.n = node->literal_expr.u.r;
else
c.s = node->literal_expr.u.ts;
return add_constant(proc, &c);
}
static const Constant *allocate_integer_constant(Proc *proc, int i)
{
Constant c = {.type = RAVI_TNUMINT, .i = i};
assert(c.type == RAVI_TNUMINT);
return add_constant(proc, &c);
}
static inline void add_instruction_operand(Proc *proc, Instruction *insn, Pseudo *pseudo)
{
raviX_ptrlist_add((PtrList **)&insn->operands, pseudo, &proc->linearizer->ptrlist_allocator);
}
static inline void add_instruction_target(Proc *proc, Instruction *insn, Pseudo *pseudo)
{
raviX_ptrlist_add((PtrList **)&insn->targets, pseudo, &proc->linearizer->ptrlist_allocator);
}
static Instruction *allocate_instruction(Proc *proc, enum opcode op)
{
Instruction *insn = (Instruction *) raviX_allocator_allocate(&proc->linearizer->instruction_allocator, 0);
insn->opcode = op;
return insn;
}
static void free_instruction_operand_pseudos(Proc *proc, Instruction *insn)
{
Pseudo *operand;
FOR_EACH_PTR_REVERSE(insn->operands, Pseudo, operand) { free_temp_pseudo(proc, operand, false); }
END_FOR_EACH_PTR_REVERSE(operand)
}
static inline void add_instruction(Proc *proc, Instruction *insn)
{
assert(insn->block == NULL || insn->block == proc->current_bb);
raviX_ptrlist_add((PtrList **)&proc->current_bb->insns, insn, &proc->linearizer->ptrlist_allocator);
insn->block = proc->current_bb;
}
static inline void remove_instruction(BasicBlock *block, Instruction *insn)
{
raviX_ptrlist_remove((PtrList **)&block->insns, insn, 1);
insn->block = NULL;
}
Instruction *raviX_last_instruction(BasicBlock *block)
{
if (raviX_ptrlist_size((PtrList *)block->insns) == 0)
return NULL;
return (Instruction *)raviX_ptrlist_last((PtrList *)block->insns);
}
static const Constant *allocate_string_constant(Proc *proc, const StringObject *s)
{
Constant c = {.type = RAVI_TSTRING, .s = s};
assert(c.type == RAVI_TSTRING);
return add_constant(proc, &c);
}
Pseudo* raviX_allocate_stack_pseudo(Proc* proc, unsigned reg)
{
Pseudo* pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_LUASTACK;
pseudo->regnum = reg;
return pseudo;
}
static Pseudo *allocate_symbol_pseudo(Proc *proc, LuaSymbol *sym, unsigned reg)
{
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_SYMBOL;
pseudo->symbol = sym;
pseudo->regnum = reg;
if (sym->symbol_type == SYM_LOCAL) {
assert(sym->variable.pseudo == NULL);
sym->variable.pseudo = pseudo;
}
return pseudo;
}
static Pseudo *allocate_constant_pseudo(Proc *proc, const Constant *constant)
{
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_CONSTANT;
pseudo->constant = constant;
pseudo->regnum = constant->index;
return pseudo;
}
static Pseudo *allocate_closure_pseudo(Proc *proc)
{
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_PROC;
pseudo->proc = proc;
return pseudo;
}
static Pseudo *allocate_nil_pseudo(Proc *proc)
{
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_NIL;
pseudo->proc = proc;
return pseudo;
}
static Pseudo *allocate_boolean_pseudo(Proc *proc, bool is_true)
{
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = is_true ? PSEUDO_TRUE : PSEUDO_FALSE;
pseudo->proc = proc;
return pseudo;
}
static Pseudo *allocate_block_pseudo(Proc *proc, BasicBlock *block)
{
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_BLOCK;
pseudo->block = block;
return pseudo;
}
/*
We have several types of temp pseudos.
Specific types for floating and integer values so that we can
localise the assignment of these to registers.
The generic 'any' type is used for other types
but has variant called PSEUDO_RANGE. This is used in function calls
to represent multiple return values. Most of the time these get converted
back to normal temp pseudo, but in some cases we need to reference
a particular value in the range and for that we use PSEUDO_RANGE_SELECT.
*/
static Pseudo *allocate_temp_pseudo(Proc *proc, ravitype_t type)
{
PseudoGenerator *gen;
enum PseudoType pseudo_type;
switch (type) {
case RAVI_TNUMFLT:
gen = &proc->temp_flt_pseudos;
pseudo_type = PSEUDO_TEMP_FLT;
break;
case RAVI_TNUMINT:
case RAVI_TBOOLEAN:
gen = &proc->temp_int_pseudos;
pseudo_type = type == RAVI_TNUMINT ? PSEUDO_TEMP_INT: PSEUDO_TEMP_BOOL;
break;
default:
gen = &proc->temp_pseudos;
pseudo_type = PSEUDO_TEMP_ANY;
break;
}
unsigned reg = allocate_register(gen);
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = pseudo_type;
pseudo->regnum = reg;
pseudo->temp_for_local = NULL;
return pseudo;
}
static Pseudo *allocate_range_pseudo(Proc *proc, Pseudo *orig_pseudo)
{
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_RANGE;
pseudo->regnum = orig_pseudo->regnum;
if (orig_pseudo->type == PSEUDO_TEMP_ANY) {
orig_pseudo->freed = 1;
}
return pseudo;
}
/*
A PSEUDO_RANGE_SELECT picks or selects a particular offset in the range
specified by a PSEUDO_RANGE. Pick of 0 means pick first value from the range.
*/
static Pseudo *allocate_range_select_pseudo(Proc *proc, Pseudo *range_pseudo, int pick)
{
assert(range_pseudo->type == PSEUDO_RANGE);
Pseudo *pseudo = (Pseudo *) raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0);
pseudo->type = PSEUDO_RANGE_SELECT;
pseudo->regnum = range_pseudo->regnum + pick;
pseudo->range_pseudo = range_pseudo;
return pseudo;
}
static void free_temp_pseudo(Proc *proc, Pseudo *pseudo, bool free_local)
{
if (pseudo->freed)
return;
if (!free_local && pseudo->temp_for_local) {
return;
}
PseudoGenerator *gen;
switch (pseudo->type) {
case PSEUDO_TEMP_FLT:
gen = &proc->temp_flt_pseudos;
break;
case PSEUDO_TEMP_INT:
case PSEUDO_TEMP_BOOL:
gen = &proc->temp_int_pseudos;
break;
case PSEUDO_RANGE:
case PSEUDO_TEMP_ANY:
gen = &proc->temp_pseudos;
break;
default:
// Not a temp, so no need to do anything
return;
}
free_register(proc, gen, pseudo->regnum);
}
/**
* Allocate a new Proc. If there is a current Proc, then the new Proc gets added to the
* current Proc's children.
*/
static Proc *allocate_proc(LinearizerState *linearizer, AstNode *function_expr)
{
assert(function_expr->type == EXPR_FUNCTION);
Proc *proc = (Proc *) raviX_allocator_allocate(&linearizer->proc_allocator, 0);
proc->function_expr = function_expr;
proc->id = raviX_ptrlist_size((PtrList *)linearizer->all_procs)+1; // so that 0 is not assigned
function_expr->function_expr.proc_id = proc->id;
raviX_ptrlist_add((PtrList **)&linearizer->all_procs, proc, &linearizer->ptrlist_allocator);
if (linearizer->current_proc) {
proc->parent = linearizer->current_proc;
raviX_ptrlist_add((PtrList **)&linearizer->current_proc->procs, proc,
&linearizer->ptrlist_allocator);
}
proc->constants = raviX_set_create(hash_constant, compare_constants);
proc->linearizer = linearizer;
proc->cfg = NULL;
return proc;
}
static void set_main_proc(LinearizerState *linearizer, Proc *proc)
{
assert(linearizer->main_proc == NULL);
assert(linearizer->current_proc == NULL);
linearizer->main_proc = proc;
assert(proc->function_expr->function_expr.parent_function == NULL);
}
static inline void set_current_proc(LinearizerState *linearizer, Proc *proc)
{
linearizer->current_proc = proc;
}
static void instruct_totype(Proc *proc, Pseudo *target, const VariableType *vtype)
{
enum opcode targetop = op_nop;
switch (vtype->type_code) {
case RAVI_TNUMFLT:
targetop = op_toflt;
break;
case RAVI_TNUMINT:
targetop = op_toint;
break;
case RAVI_TSTRING:
targetop = op_tostring;
break;
case RAVI_TFUNCTION:
targetop = op_toclosure;
break;
case RAVI_TTABLE:
targetop = op_totable;
break;
case RAVI_TARRAYFLT:
targetop = op_tofarray;
break;
case RAVI_TARRAYINT:
targetop = op_toiarray;
break;
case RAVI_TUSERDATA:
targetop = op_totype;
break;
default:
return;
}
Instruction *insn = allocate_instruction(proc, targetop);
if (targetop == op_totype) {
assert(vtype->type_name);
const Constant *tname_constant = allocate_string_constant(proc, vtype->type_name);
Pseudo *tname_pseudo = allocate_constant_pseudo(proc, tname_constant);
add_instruction_operand(proc, insn, tname_pseudo);
}
add_instruction_target(proc, insn, target);
add_instruction(proc, insn);
}
static void linearize_function_args(LinearizerState *linearizer)
{
Proc *proc = linearizer->current_proc;
AstNode *func_expr = proc->function_expr;
LuaSymbol *sym;
FOR_EACH_PTR(func_expr->function_expr.args, LuaSymbol, sym)
{
/* The arg symbols already have register assigned by the local scope */
assert(sym->variable.pseudo); // We should already have a register assigned
instruct_totype(proc, sym->variable.pseudo, &sym->variable.value_type);
}
END_FOR_EACH_PTR(sym)
}
static void linearize_statement_list(Proc *proc, AstNodeList *list)
{
AstNode *node;
FOR_EACH_PTR(list, AstNode, node) { linearize_statement(proc, node); }
END_FOR_EACH_PTR(node)
}
static inline Pseudo *convert_range_to_temp(Pseudo *pseudo)
{
assert(pseudo->type == PSEUDO_RANGE);
pseudo->type = PSEUDO_TEMP_ANY;
return pseudo;
}
static Pseudo *linearize_literal(Proc *proc, AstNode *expr)
{
assert(expr->type == EXPR_LITERAL);
ravitype_t type = expr->literal_expr.type.type_code;
Pseudo *pseudo = NULL;
switch (type) {
case RAVI_TNUMFLT:
case RAVI_TNUMINT:
case RAVI_TSTRING:
pseudo = allocate_constant_pseudo(proc, allocate_constant(proc, expr));
break;
case RAVI_TNIL:
pseudo = allocate_nil_pseudo(proc);
break;
case RAVI_TBOOLEAN:
pseudo = allocate_boolean_pseudo(proc, expr->literal_expr.u.i);
break;
case RAVI_TVARARGS:
handle_error(proc->linearizer->ast_container, "Var args not supported");
break;
default:
handle_error(proc->linearizer->ast_container, "feature not yet implemented");
break;
}
return pseudo;
}
static Pseudo *linearize_unary_operator(Proc *proc, AstNode *node)
{
// TODO if any expr is range we need to convert to temp?
UnaryOperatorType op = node->unary_expr.unary_op;
Pseudo *subexpr = linearize_expression(proc, node->unary_expr.expr);
ravitype_t subexpr_type = node->unary_expr.expr->common_expr.type.type_code;
enum opcode targetop = op_nop;
switch (op) {
case UNOPR_MINUS:
if (subexpr_type == RAVI_TNUMINT)
targetop = op_unmi;
else if (subexpr_type == RAVI_TNUMFLT)
targetop = op_unmf;
else
targetop = op_unm;
break;
case UNOPR_LEN:
if (subexpr_type == RAVI_TARRAYINT || subexpr_type == RAVI_TARRAYFLT)
targetop = op_leni;
else
targetop = op_len;
subexpr_type = node->unary_expr.type.type_code;
break;
case UNOPR_TO_INTEGER:
targetop = subexpr_type != RAVI_TNUMINT ? op_toint : op_nop;
break;
case UNOPR_TO_NUMBER:
targetop = subexpr_type != RAVI_TNUMFLT ? op_toflt : op_nop;
break;
case UNOPR_TO_CLOSURE:
targetop = subexpr_type != RAVI_TFUNCTION ? op_toclosure : op_nop;
break;
case UNOPR_TO_STRING:
targetop = subexpr_type != RAVI_TSTRING ? op_tostring : op_nop;
break;
case UNOPR_TO_INTARRAY:
targetop = subexpr_type != RAVI_TARRAYINT ? op_toiarray : op_nop;
break;
case UNOPR_TO_NUMARRAY:
targetop = subexpr_type != RAVI_TARRAYFLT ? op_tofarray : op_nop;
break;
case UNOPR_TO_TABLE:
targetop = subexpr_type != RAVI_TTABLE ? op_totable : op_nop;
break;
case UNOPR_TO_TYPE:
targetop = op_totype;
break;
case UNOPR_NOT:
targetop = op_not;
break;
case UNOPR_BNOT:
targetop = op_bnot;
break;
default: {
char err[100];
snprintf(err, sizeof err, "unexpected unary op %s", raviX_get_unary_opr_str(op));
handle_error(proc->linearizer->ast_container, err);
break;
}
}
if (targetop == op_nop) {
return subexpr;
}
Instruction *insn = allocate_instruction(proc, targetop);
Pseudo *target = subexpr;
if (op == UNOPR_TO_TYPE) {
const Constant *tname_constant = allocate_string_constant(proc, node->unary_expr.type.type_name);
Pseudo *tname_pseudo = allocate_constant_pseudo(proc, tname_constant);
add_instruction_operand(proc, insn, tname_pseudo);
} else if (op == UNOPR_NOT || op == UNOPR_BNOT) {
add_instruction_operand(proc, insn, target);
target = allocate_temp_pseudo(proc, RAVI_TANY);
} else if (op == UNOPR_MINUS || op == UNOPR_LEN) {
add_instruction_operand(proc, insn, target);
target = allocate_temp_pseudo(proc, subexpr_type);
}
add_instruction_target(proc, insn, target);
add_instruction(proc, insn);
return target;
}
static Pseudo *instruct_move(Proc *proc, enum opcode op, Pseudo *target, Pseudo *src)
{
// TODO we should use type specific MOVE instructions
Instruction *mov = allocate_instruction(proc, op);
add_instruction_operand(proc, mov, src);
add_instruction_target(proc, mov, target);
add_instruction(proc, mov);
return target;
}
static void instruct_cbr(Proc *proc, Pseudo *condition_pseudo, BasicBlock *true_block,
BasicBlock *false_block)
{
Pseudo *true_pseudo = allocate_block_pseudo(proc, true_block);
Pseudo *false_pseudo = allocate_block_pseudo(proc, false_block);
Instruction *insn = allocate_instruction(proc, op_cbr);
add_instruction_operand(proc, insn, condition_pseudo);
add_instruction_target(proc, insn, true_pseudo);
add_instruction_target(proc, insn, false_pseudo);
add_instruction(proc, insn);
}
static void instruct_br(Proc *proc, Pseudo *pseudo)
{
assert(pseudo->type == PSEUDO_BLOCK);
if (is_block_terminated(proc->current_bb)) {
start_block(proc, create_block(proc));
}
Instruction *insn = allocate_instruction(proc, op_br);
add_instruction_target(proc, insn, pseudo);
add_instruction(proc, insn);
}
// clang-format off
/*
Lua and/or operators are processed so that with 'and' the result is the final
true value, and with 'or' it is the first true value.
and IR
result = eval(expr_left);
if (result)
goto Lnext:
else
goto Ldone;
Lnext:
result = eval(expr_right);
goto Ldone;
Ldone:
or IR
result = eval(expr_left);
if (result)
goto Ldone:
else
goto Lnext;
Lnext:
result = eval(expr_right);
goto Ldone;
Ldone:
*/
// clang-format on
static Pseudo *linearize_bool(Proc *proc, AstNode *node, bool is_and)
{
AstNode *e1 = node->binary_expr.expr_left;
AstNode *e2 = node->binary_expr.expr_right;
BasicBlock *first_block = create_block(proc);
BasicBlock *end_block = create_block(proc);
Pseudo *result = allocate_temp_pseudo(proc, RAVI_TANY);
Pseudo *operand1 = linearize_expression(proc, e1);
instruct_move(proc, op_mov, result, operand1);
free_temp_pseudo(proc, operand1, false);
if (is_and)
instruct_cbr(proc, result, first_block, end_block); // If first value is true then evaluate the second
else
instruct_cbr(proc, result, end_block, first_block);
start_block(proc, first_block);
Pseudo *operand2 = linearize_expression(proc, e2);
instruct_move(proc, op_mov, result, operand2);
free_temp_pseudo(proc, operand2, false);
instruct_br(proc, allocate_block_pseudo(proc, end_block));
start_block(proc, end_block);
return result;
}
/* Utility to create a binary instruction where operands and target pseudo is known */
static void create_binary_instruction(Proc *proc, enum opcode targetop, Pseudo *operand1,
Pseudo *operand2, Pseudo *target)
{
Instruction *insn = allocate_instruction(proc, targetop);
add_instruction_operand(proc, insn, operand1);
add_instruction_operand(proc, insn, operand2);
add_instruction_target(proc, insn, target);
add_instruction(proc, insn);
}
static Pseudo *linearize_binary_operator(Proc *proc, AstNode *node)
{
// TODO if any expr is range we need to convert to temp?
BinaryOperatorType op = node->binary_expr.binary_op;
if (op == BINOPR_AND) {
return linearize_bool(proc, node, true);
} else if (op == BINOPR_OR) {
return linearize_bool(proc, node, false);
}
AstNode *e1 = node->binary_expr.expr_left;
AstNode *e2 = node->binary_expr.expr_right;
Pseudo *operand1 = linearize_expression(proc, e1);
Pseudo *operand2 = linearize_expression(proc, e2);
int targetop;
switch (op) {
case BINOPR_ADD:
targetop = op_add;
break;
case BINOPR_SUB:
targetop = op_sub;
break;
case BINOPR_MUL:
targetop = op_mul;
break;
case BINOPR_DIV:
targetop = op_div;
break;
case BINOPR_IDIV:
targetop = op_idiv;
break;
case BINOPR_BAND:
targetop = op_band;
break;
case BINOPR_BOR:
targetop = op_bor;
break;
case BINOPR_BXOR:
targetop = op_bxor;
break;
case BINOPR_SHL:
targetop = op_shl;
break;
case BINOPR_SHR:
targetop = op_shr;
break;
case BINOPR_EQ:
case BINOPR_NE:
targetop = op_eq;
break;
case BINOPR_LT:
case BINOPR_GT:
targetop = op_lt;
break;
case BINOPR_LE:
case BINOPR_GE:
targetop = op_le;
break;
case BINOPR_MOD:
targetop = op_mod;
break;
case BINOPR_POW:
targetop = op_pow;
break;
case BINOPR_CONCAT:
targetop = op_string_concat;
break;
default: {
char err[100];
snprintf(err, sizeof err, "unexpected binary op %s", raviX_get_binary_opr_str(op));
handle_error(proc->linearizer->ast_container, err);
targetop = op_nop;
break;
}
}
ravitype_t t1 = e1->common_expr.type.type_code;
ravitype_t t2 = e2->common_expr.type.type_code;
bool swap = false;
switch (targetop) {
case op_add:
case op_mul:
swap = t1 == RAVI_TNUMINT && t2 == RAVI_TNUMFLT;
break;
case op_eq:
case op_lt:
case op_le:
swap = op == BINOPR_NE || op == BINOPR_GT || op == BINOPR_GE;
break;
default:
break;
}
if (swap) {
Pseudo *temp;
AstNode *ntemp;
temp = operand1;
operand1 = operand2;
operand2 = temp;
ntemp = e1;
e1 = e2;
e2 = ntemp;
t1 = e1->common_expr.type.type_code;
t2 = e2->common_expr.type.type_code;
}
switch (targetop) {
case op_add:
case op_mul:
if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMFLT)
targetop += 1;
else if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMINT)
targetop += 2;
else if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT)
targetop += 3;
break;
case op_div:
case op_sub:
if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMFLT)
targetop += 1;
else if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMINT)
targetop += 2;
else if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMFLT)
targetop += 3;
else if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT)
targetop += 4;
break;
case op_band:
case op_bor:
case op_bxor:
case op_shl:
case op_shr:
if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT)
targetop += 1;
break;
case op_eq:
case op_le:
case op_lt:
if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT)
targetop += 1;
else if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMFLT)
targetop += 2;
break;
default:
break;
}
ravitype_t target_type = node->binary_expr.type.type_code;
Pseudo *target = allocate_temp_pseudo(proc, target_type);
create_binary_instruction(proc, (enum opcode) targetop, operand1, operand2, target);
free_temp_pseudo(proc, operand1, false);
free_temp_pseudo(proc, operand2, false);
return target;
}
/* generates closure instruction - linearizes a Proc, and then adds instruction to create closure from it */
static Pseudo *linearize_function_expr(Proc *proc, AstNode *expr)
{
Proc *curproc = proc->linearizer->current_proc;
Proc *newproc = allocate_proc(proc->linearizer, expr);
set_current_proc(proc->linearizer, newproc);
linearize_function(proc->linearizer);
set_current_proc(proc->linearizer, curproc); // restore the proc
ravitype_t target_type = expr->function_expr.type.type_code;
Pseudo *target = allocate_temp_pseudo(proc, target_type);
Pseudo *operand = allocate_closure_pseudo(newproc);
Instruction *insn = allocate_instruction(proc, op_closure);
add_instruction_operand(proc, insn, operand);
add_instruction_target(proc, insn, target);
add_instruction(proc, insn);
return target;
}
static Pseudo *linearize_symbol_expression(Proc *proc, AstNode *expr)
{
LuaSymbol *sym = expr->symbol_expr.var;
if (sym->symbol_type == SYM_GLOBAL) {
assert(sym->variable.env);
Pseudo *target = allocate_temp_pseudo(proc, RAVI_TANY);
const Constant *constant = allocate_string_constant(proc, sym->variable.var_name);
Pseudo *operand_varname = allocate_constant_pseudo(proc, constant);
Pseudo* operand_env = allocate_symbol_pseudo(proc, sym->variable.env, 0); // no register
Instruction *insn = allocate_instruction(proc, op_loadglobal);
target->insn = insn;
add_instruction_operand(proc, insn, operand_env);
add_instruction_operand(proc, insn, operand_varname);
add_instruction_target(proc, insn, target);
add_instruction(proc, insn);
return target;
} else if (sym->symbol_type == SYM_LOCAL) {
return sym->variable.pseudo;
} else if (sym->symbol_type == SYM_UPVALUE) {
/* upvalue index is the position of upvalue in the function, we treat this as the pseudo register for
* the upvalue */
/* TODO maybe the pseudo be pre-created when we start linearizing the funcon and stored in the symbol
* like we do for locals? */
return allocate_symbol_pseudo(proc, sym, sym->upvalue.upvalue_index);
} else {
handle_error(proc->linearizer->ast_container, "feature not yet implemented");
return NULL;
}
}
static Pseudo *instruct_indexed_load(Proc *proc, ravitype_t container_type,
Pseudo *container_pseudo, ravitype_t key_type,
Pseudo *key_pseudo, ravitype_t target_type)
{
int op = op_get;
switch (container_type) {
case RAVI_TTABLE:
op = op_tget;
break;
case RAVI_TARRAYINT:
op = op_iaget;
break;
case RAVI_TARRAYFLT:
op = op_faget;
break;
default:
break;
}
/* Note we rely upon ordering of enums here */
switch (key_type) {
case RAVI_TNUMINT:
op++;
break;
case RAVI_TSTRING:
assert(container_type != RAVI_TARRAYINT && container_type != RAVI_TARRAYFLT);
op += 2;
break;
default:
break;
}
Pseudo *target_pseudo = allocate_temp_pseudo(proc, target_type);
Instruction *insn = allocate_instruction(proc, (enum opcode)op);
add_instruction_operand(proc, insn, container_pseudo);
add_instruction_operand(proc, insn, key_pseudo);
add_instruction_target(proc, insn, target_pseudo);
add_instruction(proc, insn);
target_pseudo->insn = insn;
return target_pseudo;
}
static void instruct_indexed_store(Proc *proc, ravitype_t table_type, Pseudo *table,
Pseudo *index_pseudo, ravitype_t index_type, Pseudo *value_pseudo,
ravitype_t value_type)
{
// TODO validate the type of assignment
// Insert type assertions if needed
int op;
switch (table_type) {
case RAVI_TARRAYINT:
op = op_iaput;
if (value_type == RAVI_TNUMINT) {
op = op_iaput_ival;
}
break;
case RAVI_TARRAYFLT:
op = op_faput;
if (value_type == RAVI_TNUMFLT) {
op = op_faput_fval;
}
break;
default:
op = table_type == RAVI_TTABLE ? op_tput : op_put;
if (index_type == RAVI_TNUMINT) {
op += 1;
} else if (index_type == RAVI_TSTRING) {
op += 2;
}
break;
}
Instruction *insn = allocate_instruction(proc, (enum opcode) op);
add_instruction_target(proc, insn, table);
add_instruction_target(proc, insn, index_pseudo);
add_instruction_operand(proc, insn, value_pseudo);
add_instruction(proc, insn);
}
static void convert_loadglobal_to_store(Proc *proc, Instruction *insn, Pseudo *value_pseudo,
ravitype_t value_type)
{
assert(insn->opcode == op_loadglobal);
remove_instruction(insn->block, insn); // remove the instruction from its original block
insn->opcode = op_storeglobal;
// Remove the targets
Pseudo *get_target = (Pseudo *) raviX_ptrlist_delete_last((PtrList **)&insn->targets);
free_temp_pseudo(proc, get_target, false);
Pseudo *pseudo;
// Move the loadglobal operands to target
FOR_EACH_PTR(insn->operands, Pseudo, pseudo) { add_instruction_target(proc, insn, pseudo); }
END_FOR_EACH_PTR(pseudo);
raviX_ptrlist_remove_all((PtrList **)&insn->operands);
// Add new operand
add_instruction_operand(proc, insn, value_pseudo);
add_instruction(proc, insn);
}
static void convert_indexed_load_to_store(Proc *proc, Instruction *insn, Pseudo *value_pseudo,
ravitype_t value_type)
{
enum opcode putop;
switch (insn->opcode) {
case op_iaget:
case op_iaget_ikey:
putop = value_type == RAVI_TNUMINT ? op_iaput_ival : op_iaput;
break;
case op_faget:
case op_faget_ikey:
putop = value_type == RAVI_TNUMFLT ? op_faput_fval : op_faput;
break;
case op_tget:
putop = op_tput;
break;
case op_tget_ikey:
putop = op_tput_ikey;
break;
case op_tget_skey:
putop = op_tput_skey;
break;
case op_get:
putop = op_put;
break;
case op_get_ikey:
putop = op_put_ikey;
break;
case op_get_skey:
putop = op_put_skey;
break;
default:
return;
}
remove_instruction(insn->block, insn);
insn->opcode = putop;
// Remove target
Pseudo *get_target = (Pseudo *) raviX_ptrlist_delete_last((PtrList **)&insn->targets);
free_temp_pseudo(proc, get_target, false);
Pseudo *pseudo;
// Move the get operands to put target (table, key)
FOR_EACH_PTR(insn->operands, Pseudo, pseudo) { add_instruction_target(proc, insn, pseudo); }
END_FOR_EACH_PTR(pseudo);
raviX_ptrlist_remove_all((PtrList **)&insn->operands);
// Add new operand
add_instruction_operand(proc, insn, value_pseudo);
add_instruction(proc, insn);
}
/**
* Lua function calls can return multiple values, and the caller decides how many values to accept.
* We indicate multiple values using a PSEUDO_RANGE.
* We also handle method call:
* <pseudo>:name(...) -> is translated to <pseudo>.name(<pseudo>, ...)
*/
static Pseudo *linearize_function_call_expression(Proc *proc, AstNode *expr,
AstNode *callsite_expr, Pseudo *callsite_pseudo)
{
Instruction *insn = allocate_instruction(proc, op_call);
Pseudo *self_arg = NULL; /* For method call */
if (expr->function_call_expr.method_name) {
const Constant *name_constant =
allocate_string_constant(proc, expr->function_call_expr.method_name);
Pseudo *name_pseudo = allocate_constant_pseudo(proc, name_constant);
self_arg = callsite_pseudo; /* The original callsite must be passed as 'self' */
/* create new call site as callsite[name] */
callsite_pseudo = instruct_indexed_load(proc, callsite_expr->common_expr.type.type_code,
callsite_pseudo, RAVI_TSTRING, name_pseudo, RAVI_TANY);
}
add_instruction_operand(proc, insn, callsite_pseudo);
if (self_arg) {
add_instruction_operand(proc, insn, self_arg);
}
AstNode *arg;
int argc = raviX_ptrlist_size((const PtrList *)expr->function_call_expr.arg_list);
FOR_EACH_PTR(expr->function_call_expr.arg_list, AstNode, arg)
{
argc -= 1;
Pseudo *arg_pseudo = linearize_expression(proc, arg);
if (argc != 0 && arg_pseudo->type == PSEUDO_RANGE) {
// Not last one, so range can only be 1
convert_range_to_temp(arg_pseudo);
}
add_instruction_operand(proc, insn, arg_pseudo);
}
END_FOR_EACH_PTR(arg)
Pseudo *return_pseudo = allocate_range_pseudo(
proc, callsite_pseudo); /* Base reg for function call - where return values will be placed */
add_instruction_target(proc, insn, return_pseudo);
add_instruction_target(proc, insn, allocate_constant_pseudo(proc, allocate_integer_constant(proc, expr->function_call_expr.num_results)));
add_instruction(proc, insn);
free_instruction_operand_pseudos(proc, insn);
return return_pseudo;
}
/*
* Suffixed expression examples:
* f()[1]
* x[1][2]
* x.y[1]
*
* The result type of a suffixed expression may initially be an indexed load, but when used in the context of
* an assignment statement the load will be converted to a store.
* Lua parser does this by creating a VINDEXED node which is only converted to load/store
* when the VINDEXED node is used.
*/
static Pseudo *linearize_suffixedexpr(Proc *proc, AstNode *node)
{
/* suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */
Pseudo *prev_pseudo = linearize_expression(proc, node->suffixed_expr.primary_expr);
AstNode *prev_node = node->suffixed_expr.primary_expr;
AstNode *this_node;
FOR_EACH_PTR(node->suffixed_expr.suffix_list, AstNode, this_node)
{
Pseudo *next;
if (prev_pseudo->type == PSEUDO_RANGE)
convert_range_to_temp(prev_pseudo);
if (this_node->type == EXPR_Y_INDEX || this_node->type == EXPR_FIELD_SELECTOR) {
Pseudo *key_pseudo = linearize_expression(proc, this_node->index_expr.expr);
ravitype_t key_type = this_node->index_expr.expr->common_expr.type.type_code;
next = instruct_indexed_load(proc, prev_node->common_expr.type.type_code, prev_pseudo, key_type,
key_pseudo, this_node->common_expr.type.type_code);
} else if (this_node->type == EXPR_FUNCTION_CALL) {
next = linearize_function_call_expression(proc, this_node, prev_node, prev_pseudo);
} else {
next = NULL;
handle_error(proc->linearizer->ast_container, "Unexpected expr type in suffix list");
}
prev_node = this_node;
prev_pseudo = next;
}
END_FOR_EACH_PTR(node)
return prev_pseudo;
}
static int linearize_indexed_assign(Proc *proc, Pseudo *table, ravitype_t table_type,
AstNode *expr, int next)
{
Pseudo *index_pseudo;
ravitype_t index_type;
if (expr->table_elem_assign_expr.key_expr) {
index_pseudo = linearize_expression(proc, expr->table_elem_assign_expr.key_expr);
index_type = expr->table_elem_assign_expr.key_expr->index_expr.expr->common_expr.type.type_code;
// TODO check valid index
} else {
const Constant *constant = allocate_integer_constant(proc, next++);
index_pseudo = allocate_constant_pseudo(proc, constant);
index_type = RAVI_TNUMINT;
}
Pseudo *value_pseudo = linearize_expression(proc, expr->table_elem_assign_expr.value_expr);
ravitype_t value_type = expr->table_elem_assign_expr.value_expr->common_expr.type.type_code;
instruct_indexed_store(proc, table_type, table, index_pseudo, index_type, value_pseudo, value_type);
free_temp_pseudo(proc, index_pseudo, false);
free_temp_pseudo(proc, value_pseudo, false);
return next;
}
static Pseudo *linearize_table_constructor(Proc *proc, AstNode *expr)
{
/* constructor -> '{' [ field { sep field } [sep] ] '}' where sep -> ',' | ';' */
Pseudo *target = allocate_temp_pseudo(proc, expr->table_expr.type.type_code);
enum opcode op = op_newtable;
if (expr->table_expr.type.type_code == RAVI_TARRAYINT)
op = op_newiarray;
else if (expr->table_expr.type.type_code == RAVI_TARRAYFLT)
op = op_newfarray;
Instruction *insn = allocate_instruction(proc, op);
add_instruction_target(proc, insn, target);
add_instruction(proc, insn);
/*TODO process constructor elements */
AstNode *ia;
int i = 1;
FOR_EACH_PTR(expr->table_expr.expr_list, AstNode, ia)
{
i = linearize_indexed_assign(proc, target, expr->table_expr.type.type_code, ia, i);
}
END_FOR_EACH_PTR(ia)
return target;
}
/** Is the type NIL-able */
static bool is_nillable(const VariableType *var_type)
{
return var_type->type_code != RAVI_TARRAYFLT && var_type->type_code != RAVI_TARRAYINT &&
var_type->type_code != RAVI_TNUMFLT && var_type->type_code != RAVI_TNUMINT;
}
/* Check if we can assign value to variable */
static bool is_compatible(const VariableType *var_type, const VariableType *val_type)
{
if (var_type->type_code == RAVI_TANY)
return true;
if (is_nillable(var_type) && val_type->type_code == RAVI_TNIL)
return true;
if (val_type->type_code == var_type->type_code && val_type->type_name == var_type->type_name)
return true;
if ((var_type->type_code == RAVI_TNUMFLT && val_type->type_code == RAVI_TNUMINT) ||
(var_type->type_code == RAVI_TNUMINT && val_type->type_code == RAVI_TNUMFLT))
/* Maybe conversion is possible so allow */
return true;
return false;
}
static void linearize_store_var(Proc *proc, const VariableType *var_type, Pseudo *var_pseudo,
const VariableType *val_type, Pseudo *val_pseudo)
{
if (var_pseudo->insn && var_pseudo->insn->opcode >= op_get && var_pseudo->insn->opcode <= op_faget_ikey) {
convert_indexed_load_to_store(proc, var_pseudo->insn, val_pseudo, val_type->type_code);
} else if (var_pseudo->insn && var_pseudo->insn->opcode == op_loadglobal) {
convert_loadglobal_to_store(proc, var_pseudo->insn, val_pseudo, val_type->type_code);
} else {
assert(!var_pseudo->insn);
assert(var_type->type_code != RAVI_TVARARGS && var_type->type_code != RAVI_TNIL);
if (!is_compatible(var_type, val_type)) {
instruct_totype(proc, val_pseudo, var_type);
val_type = var_type; // Because of the type assertion!
}
enum opcode op = op_mov;
if (var_type->type_code == RAVI_TNUMINT) {
op = val_type->type_code == RAVI_TNUMINT ? op_movi : op_movfi;
} else if (var_type->type_code == RAVI_TNUMFLT) {
op = val_type->type_code == RAVI_TNUMFLT ? op_movf : op_movif;
}
instruct_move(proc, op, var_pseudo, val_pseudo);
}
}
struct node_info {
const VariableType *vartype;
Pseudo *pseudo;
};
static void linearize_assignment(Proc *proc, AstNodeList *expr_list, struct node_info *varinfo, int nv)
{
AstNode *expr;
int ne = raviX_ptrlist_size((const PtrList *)expr_list);
struct node_info *valinfo = (struct node_info *)alloca(ne * sizeof(struct node_info));
Pseudo *last_val_pseudo = NULL;
int i = 0;
FOR_EACH_PTR(expr_list, AstNode, expr)
{
Pseudo *val_pseudo = last_val_pseudo = linearize_expression(proc, expr);
valinfo[i].vartype = &expr->common_expr.type;
valinfo[i].pseudo = val_pseudo;
i++;
if (i < ne && val_pseudo->type == PSEUDO_RANGE) {
convert_range_to_temp(val_pseudo);
}
}
END_FOR_EACH_PTR(expr)
/* TODO do we need to insert type assertions in some cases such as function return values ? */
int note_ne = ne;
while (nv > 0) {
if (nv > ne) {
if (last_val_pseudo != NULL && last_val_pseudo->type == PSEUDO_RANGE) {
int pick = nv - ne;
linearize_store_var(proc, varinfo[nv - 1].vartype, varinfo[nv - 1].pseudo,
valinfo[ne - 1].vartype,
allocate_range_select_pseudo(proc, last_val_pseudo, pick));
} else {
// TODO store NIL
}
nv--;
} else {
if (valinfo[ne - 1].pseudo->type == PSEUDO_RANGE) {
/* Only the topmost expression can be a range ... assert */
assert(ne == note_ne);
valinfo[ne - 1].pseudo = allocate_range_select_pseudo(proc, valinfo[ne - 1].pseudo, 0);
}
linearize_store_var(proc, varinfo[nv - 1].vartype, varinfo[nv - 1].pseudo,
valinfo[ne - 1].vartype, valinfo[ne - 1].pseudo);
free_temp_pseudo(proc, valinfo[ne - 1].pseudo, false);
nv--;
ne--;
}
}
}
/*
Expression or assignment statement is of the form:
<LHS exp list...> = <RHS exp list...>
Lua requires some special handling of this statement. Firstly
the LHS expressions are evaluated left to right.
The RHS is processed right to left. If there is a corresponding LHS expr
then we need to assign the value of the RHS expr to the LHS expr.
Excess RHS expression results are discarded.
Excess LHS expressions have to be set to the default value.
So for example if we had:
expr1, expr2 = expr3, expr4, expr5
Then following needs to be generated
result1 = eval(expr1)
result2 = eval(expr2)
eval(expr5)
*result2 = eval(expr4)
*result1 = eval(expr3)
Our code generation has an issue:
We initially generate load instructions for LHS expressions.
Subsequently we convert these to store instructions (marked above with asterisk)
The handling of 'local' and expression statements can be partially combined
because the main difference is the LHS side of it. The rest of the processing has to be
the same.
*/
static void linearize_expression_statement(Proc *proc, AstNode *node)
{
AstNode *var;
int nv = raviX_ptrlist_size((const PtrList *)node->expression_stmt.var_expr_list);
struct node_info *varinfo = (struct node_info *)alloca(nv * sizeof(struct node_info));
int i = 0;
FOR_EACH_PTR(node->expression_stmt.var_expr_list, AstNode, var)
{
Pseudo *var_pseudo = linearize_expression(proc, var);
varinfo[i].vartype = &var->common_expr.type;
varinfo[i].pseudo = var_pseudo;
i++;
}
END_FOR_EACH_PTR(var)
linearize_assignment(proc, node->expression_stmt.expr_list, varinfo, nv);
}
static void linearize_local_statement(Proc *proc, AstNode *stmt)
{
LuaSymbol *sym;
int nv = raviX_ptrlist_size((const PtrList *)stmt->local_stmt.var_list);
struct node_info *varinfo = (struct node_info *)alloca(nv * sizeof(struct node_info));
int i = 0;
FOR_EACH_PTR(stmt->local_stmt.var_list, LuaSymbol, sym)
{
Pseudo *var_pseudo = sym->variable.pseudo;
assert(var_pseudo);
varinfo[i].vartype = &sym->variable.value_type;
varinfo[i].pseudo = var_pseudo;
i++;
}
END_FOR_EACH_PTR(var)
linearize_assignment(proc, stmt->local_stmt.expr_list, varinfo, nv);
}
static Pseudo *linearize_expression(Proc *proc, AstNode *expr)
{
Pseudo *result = NULL;
switch (expr->type) {
case EXPR_LITERAL: {
result = linearize_literal(proc, expr);
} break;
case EXPR_BINARY: {
result = linearize_binary_operator(proc, expr);
} break;
case EXPR_FUNCTION: {
result = linearize_function_expr(proc, expr);
} break;
case EXPR_UNARY: {
result = linearize_unary_operator(proc, expr);
} break;
case EXPR_SUFFIXED: {
result = linearize_suffixedexpr(proc, expr);
} break;
case EXPR_SYMBOL: {
result = linearize_symbol_expression(proc, expr);
} break;
case EXPR_TABLE_LITERAL: {
result = linearize_table_constructor(proc, expr);
} break;
case EXPR_Y_INDEX:
case EXPR_FIELD_SELECTOR: {
result = linearize_expression(proc, expr->index_expr.expr);
} break;
default:
handle_error(proc->linearizer->ast_container, "feature not yet implemented");
break;
}
assert(result);
if (result->type == PSEUDO_RANGE && expr->common_expr.truncate_results) {
// Need to truncate the results to 1
return allocate_range_select_pseudo(proc, result, 0);
}
return result;
}
static void linearize_expr_list(Proc *proc, AstNodeList *expr_list, Instruction *insn,
PseudoList **pseudo_list)
{
AstNode *expr;
int ne = raviX_ptrlist_size((const PtrList *)expr_list);
FOR_EACH_PTR(expr_list, AstNode, expr)
{
ne -= 1;
Pseudo *pseudo = linearize_expression(proc, expr);
if (ne != 0 && pseudo->type == PSEUDO_RANGE) {
convert_range_to_temp(pseudo); // Only accept one result unless it is the last expr
}
raviX_ptrlist_add((PtrList **)pseudo_list, pseudo, &proc->linearizer->ptrlist_allocator);
}
END_FOR_EACH_PTR(expr)
}
static void linearize_return(Proc *proc, AstNode *node)
{
assert(node->type == STMT_RETURN);
Instruction *insn = allocate_instruction(proc, op_ret);
linearize_expr_list(proc, node->return_stmt.expr_list, insn, &insn->operands);
add_instruction_target(proc, insn, allocate_block_pseudo(proc, proc->nodes[EXIT_BLOCK]));
add_instruction(proc, insn);
}
/* A block is considered terminated if the last instruction is
a return or a branch */
static bool is_block_terminated(BasicBlock *block)
{
Instruction *last_insn = raviX_last_instruction(block);
if (last_insn == NULL)
return false;
if (last_insn->opcode == op_ret || last_insn->opcode == op_cbr || last_insn->opcode == op_br)
return true;
return false;
}
static void linearize_test_cond(Proc *proc, AstNode *node, BasicBlock *true_block,
BasicBlock *false_block)
{
Pseudo *condition_pseudo = linearize_expression(proc, node->test_then_block.condition);
instruct_cbr(proc, condition_pseudo, true_block, false_block);
}
/* linearize the 'else if' block */
static void linearize_test_then(Proc *proc, AstNode *node, BasicBlock *true_block,
BasicBlock *end_block)
{
start_block(proc, true_block);
start_scope(proc->linearizer, proc, node->test_then_block.test_then_scope);
linearize_statement_list(proc, node->test_then_block.test_then_statement_list);
end_scope(proc->linearizer, proc);
if (!is_block_terminated(proc->current_bb))
instruct_br(proc, allocate_block_pseudo(proc, end_block));
}
// clang-format off
/*
The Lua if statement has a complex structure as it is somewhat like
a combination of case and if statement. The if block is followed by
1 or more elseif blocks. Finally we have an optinal else block.
The elseif blocks are like case statements.
Given
if cond1 then
block for cond1
elseif cond2 then
block for cond2
else
block for else
end
We linearize the statement as follows.
B0:
if cond1 goto Bcond1 else B2; // Initial if condition
B2:
if cond2 goto Bcond2 else B3: // This is an elseif condition
B3:
<if AST has else>
goto Belse;
<else>
goto Bend;
Bcond1:
start scope
block for cond1
end scope
goto Bend;
Bcond2:
start scope
block for cond2
end scope
goto Bend;
Belse:
start scope
block for else
end scope
goto Bend;
Bend:
*/
// clang-format on
static void linearize_if_statement(Proc *proc, AstNode *ifnode)
{
BasicBlock *end_block = NULL;
BasicBlock *else_block = NULL;
BasicBlockList *if_blocks = NULL;
BasicBlockList *if_true_blocks = NULL;
AstNodeList *if_else_stmts = ifnode->if_stmt.if_condition_list;
AstNodeList *else_stmts = ifnode->if_stmt.else_statement_list;
Scope *else_scope = ifnode->if_stmt.else_block;
AstNode *this_node;
FOR_EACH_PTR(if_else_stmts, AstNode, this_node)
{
BasicBlock *block = create_block(proc);
raviX_ptrlist_add((PtrList **)&if_blocks, block, &proc->linearizer->ptrlist_allocator);
}
END_FOR_EACH_PTR(this_node)
FOR_EACH_PTR(if_else_stmts, AstNode, this_node)
{
BasicBlock *block = create_block(proc);
raviX_ptrlist_add((PtrList **)&if_true_blocks, block, &proc->linearizer->ptrlist_allocator);
}
END_FOR_EACH_PTR(this_node)
if (ifnode->if_stmt.else_statement_list) {
else_block = create_block(proc);
}
end_block = create_block(proc);
BasicBlock *true_block = NULL;
BasicBlock *false_block = NULL;
BasicBlock *block = NULL;
{
PREPARE_PTR_LIST(if_blocks, BasicBlock, block);
PREPARE_PTR_LIST(if_true_blocks, BasicBlock, true_block);
FOR_EACH_PTR(if_else_stmts, AstNode, this_node)
{
start_block(proc, block);
NEXT_PTR_LIST(BasicBlock, block);
if (!block) {
// last one
if (else_block)
false_block = else_block;
else
false_block = end_block;
} else {
false_block = block;
}
linearize_test_cond(proc, this_node, true_block, false_block);
NEXT_PTR_LIST(BasicBlock, true_block);
}
END_FOR_EACH_PTR(node)
FINISH_PTR_LIST(block);
FINISH_PTR_LIST(true_block);
}
{
PREPARE_PTR_LIST(if_true_blocks, BasicBlock, true_block);
FOR_EACH_PTR(if_else_stmts, AstNode, this_node)
{
linearize_test_then(proc, this_node, true_block, end_block);
NEXT_PTR_LIST(BasicBlock, true_block);
}
END_FOR_EACH_PTR(node)
FINISH_PTR_LIST(true_block);
}
if (else_block) {
start_block(proc, else_block);
start_scope(proc->linearizer, proc, else_scope);
linearize_statement_list(proc, else_stmts);
end_scope(proc->linearizer, proc);
if (!is_block_terminated(proc->current_bb))
instruct_br(proc, allocate_block_pseudo(proc, end_block));
}
start_block(proc, end_block);
}
/*
handle label statement.
We start a new block which will get associated with the label.
We have to handle the situation where the label pseudo was already created when we
encountered a goto statement but we did not know the block then.
*/
static void linearize_label_statement(Proc *proc, AstNode *node)
{
BasicBlock* block;
if (node->label_stmt.symbol->label.pseudo != NULL) {
/* This means the block got created when we saw the goto statement, so we just need to make it current */
assert(node->label_stmt.symbol->label.pseudo->block != NULL);
block = node->label_stmt.symbol->label.pseudo->block;
start_block(proc, block);
}
else {
block = proc->current_bb;
/* If the current block is empty then we can use it as the label target */
if (raviX_ptrlist_size((const PtrList *)block->insns) > 0) {
/* Create new block as label target */
block = create_block(proc);
start_block(proc, block);
}
node->label_stmt.symbol->label.pseudo = allocate_block_pseudo(proc, block);
}
}
/* TODO move this logic to parser? */
/* Search for a label going up scopes starting from the scope where the goto statement appeared.
* Also return via min_closing_block the ancestor scope that is greater than or equal to the
* label scope, and where a local variable escaped.
*/
static LuaSymbol *find_label(Proc *proc, Scope *block,
const StringObject *label_name, Scope **min_closing_block)
{
AstNode *function = block->function; /* We need to stay inside the function when lookng for the label */
*min_closing_block = NULL;
while (block != NULL && block->function == function) {
LuaSymbol *symbol;
if (block->need_close) {
*min_closing_block = block;
}
FOR_EACH_PTR_REVERSE(block->symbol_list, LuaSymbol, symbol)
{
if (symbol->symbol_type == SYM_LABEL && symbol->label.label_name == label_name) {
return symbol;
}
}
END_FOR_EACH_PTR_REVERSE(symbol)
block = block->parent;
}
return NULL;
}
/*
* Starting from block, go up the hierarchy until target_block and determine the oldest
* ancestor block that has escaped variables and thus needs to be closed.
*/
static Scope *find_min_closing_block(Scope *block, Scope *target_block)
{
AstNode *function = block->function; /* We need to stay inside the function when lookng for the label */
Scope *min_closing_block = NULL;
while (block != NULL && block->function == function) {
if (block->need_close) {
min_closing_block = block;
}
if (block == target_block)
break;
block = block->parent;
}
return min_closing_block;
}
/*
* Checks if a basic block is already closed - for now we check if the last
* instruction in the block is op_ret, which also handles closing of up-values.
*/
static bool is_already_closed(Proc *proc, BasicBlock *block)
{
Instruction *last_insn = raviX_last_instruction(block);
if (last_insn == NULL)
return false;
if (last_insn->opcode == op_ret)
return true;
if (last_insn->opcode == op_close) {
// hmmm
assert(false);
}
return false;
}
/* Adds a OP_CLOSE instruction at the specified basic block, but only if any local variables in the given
* scope escaped, i.e. were referenced as upvalues.
* Note that the proc's current_bb remains unchanged after this call. Normally we would expect
* the current basic block to be where we insert instructions but in this case there are scenarios
* such as when processing goto or break statemnt where the close instruction must be added to the
* the goto / break target block.
*/
static void instruct_close(Proc *proc, BasicBlock *block, Scope *scope)
{
if (is_already_closed(proc, block))
return;
/* temporarily make block current */
BasicBlock *prev_current = proc->current_bb;
proc->current_bb = block;
LuaSymbol *symbol;
FOR_EACH_PTR(scope->symbol_list, LuaSymbol, symbol)
{
/* We add the first escaping variable as the operand to op_close.
* op_close is meant to scan the stack from that point and close
* any open upvalues
*/
if (symbol->symbol_type == SYM_LOCAL && symbol->variable.escaped) {
assert(symbol->variable.pseudo);
Instruction *insn = allocate_instruction(proc, op_close);
add_instruction_operand(proc, insn, symbol->variable.pseudo);
add_instruction(proc, insn);
break;
}
}
END_FOR_EACH_PTR(symbol)
/* restore current basic block */
proc->current_bb = prev_current;
}
/*
When linearizing the goto statement we create a pseudo for the label if it hasn't been already created.
But at this point we may not know the target basic block to goto, which we expect to be filled when the label is
encountered. Of course if the label was linearized before we got to the goto statement then the target block
would already be known and specified in the pseudo.
*/
static void linearize_goto_statement(Proc *proc, const AstNode *node)
{
if (node->goto_stmt.is_break) {
if (proc->current_break_target == NULL) {
handle_error(proc->linearizer->ast_container, "no current break target");
}
/* Find the oldest ancestor scope that may need to be closed */
Scope *min_closing_block = find_min_closing_block(node->goto_stmt.goto_scope, proc->current_break_scope);
instruct_br(proc, allocate_block_pseudo(proc, proc->current_break_target));
start_block(proc, create_block(proc));
if (min_closing_block) {
/* Note that the close instruction goes to the target block of the goto */
instruct_close(proc, proc->current_break_target, min_closing_block);
}
return;
}
/* The AST does not provide link to the label so we have to search for the label in the goto scope
and above */
if (node->goto_stmt.goto_scope) {
Scope *min_closing_block = NULL;
LuaSymbol *symbol = find_label(proc, node->goto_stmt.goto_scope, node->goto_stmt.name, &min_closing_block);
if (symbol) {
/* label found */
if (symbol->label.pseudo == NULL) {
/* No pseudo? create with target block to be processed later when label is encountered */
symbol->label.pseudo = allocate_block_pseudo(proc, create_block(proc));
}
else {
assert(symbol->label.pseudo->block != NULL);
}
instruct_br(proc, symbol->label.pseudo);
start_block(proc, create_block(proc));
if (min_closing_block) {
/* Note that the close instruction goes to the target block of the goto */
instruct_close(proc, symbol->label.pseudo->block, min_closing_block);
}
return;
}
}
handle_error(proc->linearizer->ast_container, "goto label not found");
}
static void linearize_do_statement(Proc *proc, AstNode *node)
{
assert(node->type == STMT_DO);
start_scope(proc->linearizer, proc, node->do_stmt.scope);
linearize_statement_list(proc, node->do_stmt.do_statement_list);
end_scope(proc->linearizer, proc);
}
//clang-format off
/*
Lua manual states:
for v = e1, e2, e3 do block end
is equivalent to the code:
do
local var, limit, step = tonumber(e1), tonumber(e2), tonumber(e3)
if not (var and limit and step) then error() end
var = var - step
while true do
var = var + step
if (step >= 0 and var > limit) or (step < 0 and var < limit) then
break
end
local v = var
block
end
end
We do not need local vars to hold var, limit, step as these can be
temporaries.
step_positive = 0 < step
var = var - step
goto L1
L1:
var = var + step;
if step_positive goto L2;
else goto L3;
L2:
stop = var > limit
if stop goto Lend
else goto Lbody
L3:
stop = var < limit
if stop goto Lend
else goto Lbody
Lbody:
set local symbol in for loop to var
do body
goto L1;
Lend:
Above is the general case
When we know the increment to be negative or positive we can simplify.
Example for positive case
var = var - step
goto L1
L1:
var = var + step;
goto L2
L2:
stop = var > limit
if stop goto Lend
else goto Lbody
Lbody:
set local symbol in for loop to var
do body
goto L1;
Lend:
Negative case
var = var - step
goto L1
L1:
var = var + step;
goto L3;
L3:
stop = var < limit
if stop goto Lend
else goto Lbody
Lbody:
set local symbol in for loop to var
do body
goto L1;
Lend:
*/
//clang-format on
static void linearize_for_num_statement_positivestep(Proc *proc, AstNode *node)
{
start_scope(proc->linearizer, proc, node->for_stmt.for_scope);
AstNode *index_var_expr = (AstNode *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.expr_list, 0);
AstNode *limit_expr = (AstNode *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.expr_list, 1);
AstNode *step_expr = (AstNode *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.expr_list, 2);
LuaSymbol *var_sym = (LuaSymbol *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.symbols, 0);
if (index_var_expr == NULL || limit_expr == NULL) {
handle_error(proc->linearizer->ast_container, "A least index and limit must be supplied");
}
Pseudo *t = linearize_expression(proc, index_var_expr);
if (t->type == PSEUDO_RANGE) {
convert_range_to_temp(t); // Only accept one result
}
Pseudo *index_var_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
instruct_move(proc, op_mov, index_var_pseudo, t);
t = linearize_expression(proc, limit_expr);
if (t->type == PSEUDO_RANGE) {
convert_range_to_temp(t); // Only accept one result
}
Pseudo *limit_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
instruct_move(proc, op_mov, limit_pseudo, t);
if (step_expr == NULL)
t = allocate_constant_pseudo(proc, allocate_integer_constant(proc, 1));
else {
t = linearize_expression(proc, step_expr);
if (t->type == PSEUDO_RANGE) {
convert_range_to_temp(t); // Only accept one result
}
}
Pseudo *step_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
instruct_move(proc, op_mov, step_pseudo, t);
Pseudo *stop_pseudo = allocate_temp_pseudo(proc, RAVI_TBOOLEAN);
create_binary_instruction(proc, op_subii, index_var_pseudo, step_pseudo, index_var_pseudo);
BasicBlock *L1 = create_block(proc);
BasicBlock *L2 = create_block(proc);
BasicBlock *Lbody = create_block(proc);
BasicBlock *Lend = create_block(proc);
BasicBlock *previous_break_target = proc->current_break_target;
Scope *previous_break_scope = proc->current_break_scope;
proc->current_break_target = Lend;
proc->current_break_scope = proc->current_scope;
start_block(proc, L1);
create_binary_instruction(proc, op_addii, index_var_pseudo, step_pseudo, index_var_pseudo);
instruct_br(proc, allocate_block_pseudo(proc, L2));
start_block(proc, L2);
create_binary_instruction(proc, op_ltii, limit_pseudo, index_var_pseudo, stop_pseudo);
instruct_cbr(proc, stop_pseudo, Lend, Lbody);
start_block(proc, Lbody);
instruct_move(proc, op_mov, var_sym->variable.pseudo, index_var_pseudo);
start_scope(proc->linearizer, proc, node->for_stmt.for_body);
linearize_statement_list(proc, node->for_stmt.for_statement_list);
end_scope(proc->linearizer, proc);
/* If the fornum block has escaped local vars then we need to close */
if (proc->current_break_scope->need_close) {
/* Note we put close instruction in current basic block */
instruct_close(proc, proc->current_bb, proc->current_break_scope);
}
instruct_br(proc, allocate_block_pseudo(proc, L1));
end_scope(proc->linearizer, proc);
free_temp_pseudo(proc, stop_pseudo, false);
free_temp_pseudo(proc, step_pseudo, false);
free_temp_pseudo(proc, limit_pseudo, false);
free_temp_pseudo(proc, index_var_pseudo, false);
start_block(proc, Lend);
proc->current_break_target = previous_break_target;
proc->current_break_scope = previous_break_scope;
}
static void linearize_for_num_statement(Proc *proc, AstNode *node)
{
assert(node->type == STMT_FOR_NUM);
/* For now we only allow integer expressions */
AstNode *expr;
FOR_EACH_PTR(node->for_stmt.expr_list, AstNode, expr)
{
if (expr->common_expr.type.type_code != RAVI_TNUMINT) {
handle_error(proc->linearizer->ast_container,
"Only for loops with integer expressions currently supported");
}
}
END_FOR_EACH_PTR(expr)
/* Check if we can optimize */
AstNode *step_expr = (AstNode *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.expr_list, 2);
{
bool step_known_positive = false;
// bool step_known_negative = false;
if (step_expr == NULL) {
step_known_positive = true;
} else if (step_expr->type == EXPR_LITERAL) {
if (step_expr->literal_expr.type.type_code == RAVI_TNUMINT) {
if (step_expr->literal_expr.u.i > 0)
step_known_positive = true;
// else if (step_expr->literal_expr.u.i < 0)
// step_known_negative = true;
}
}
if (step_known_positive) {
linearize_for_num_statement_positivestep(proc, node);
return;
}
}
/* Default case where we do not know if step is negative or positive */
start_scope(proc->linearizer, proc, node->for_stmt.for_scope);
AstNode *index_var_expr = (AstNode *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.expr_list, 0);
AstNode *limit_expr = (AstNode *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.expr_list, 1);
LuaSymbol *var_sym = (LuaSymbol *) raviX_ptrlist_nth_entry((PtrList *)node->for_stmt.symbols, 0);
if (index_var_expr == NULL || limit_expr == NULL) {
handle_error(proc->linearizer->ast_container, "A least index and limit must be supplied");
}
Pseudo *t = linearize_expression(proc, index_var_expr);
if (t->type == PSEUDO_RANGE) {
convert_range_to_temp(t); // Only accept one result
}
Pseudo *index_var_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
instruct_move(proc, op_mov, index_var_pseudo, t);
t = linearize_expression(proc, limit_expr);
if (t->type == PSEUDO_RANGE) {
convert_range_to_temp(t); // Only accept one result
}
Pseudo *limit_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
instruct_move(proc, op_mov, limit_pseudo, t);
if (step_expr == NULL)
t = allocate_constant_pseudo(proc, allocate_integer_constant(proc, 1));
else {
t = linearize_expression(proc, step_expr);
if (t->type == PSEUDO_RANGE) {
convert_range_to_temp(t); // Only accept one result
}
}
Pseudo *step_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
instruct_move(proc, op_mov, step_pseudo, t);
Pseudo *step_positive = allocate_temp_pseudo(proc, RAVI_TBOOLEAN);
create_binary_instruction(proc, op_ltii, allocate_constant_pseudo(proc, allocate_integer_constant(proc, 0)),
step_pseudo, step_positive);
Pseudo *stop_pseudo = allocate_temp_pseudo(proc, RAVI_TBOOLEAN);
create_binary_instruction(proc, op_subii, index_var_pseudo, step_pseudo, index_var_pseudo);
BasicBlock *L1 = create_block(proc);
BasicBlock *L2 = create_block(proc);
BasicBlock *L3 = create_block(proc);
BasicBlock *Lbody = create_block(proc);
BasicBlock *Lend = create_block(proc);
BasicBlock *previous_break_target = proc->current_break_target;
Scope *previous_break_scope = proc->current_break_scope;
proc->current_break_target = Lend;
proc->current_break_scope = proc->current_scope;
start_block(proc, L1);
create_binary_instruction(proc, op_addii, index_var_pseudo, step_pseudo, index_var_pseudo);
instruct_cbr(proc, step_positive, L2, L3);
start_block(proc, L2);
create_binary_instruction(proc, op_ltii, limit_pseudo, index_var_pseudo, stop_pseudo);
instruct_cbr(proc, stop_pseudo, Lend, Lbody);
start_block(proc, L3);
create_binary_instruction(proc, op_ltii, index_var_pseudo, limit_pseudo, stop_pseudo);
instruct_cbr(proc, stop_pseudo, Lend, Lbody);
start_block(proc, Lbody);
instruct_move(proc, op_mov, var_sym->variable.pseudo, index_var_pseudo);
start_scope(proc->linearizer, proc, node->for_stmt.for_body);
linearize_statement_list(proc, node->for_stmt.for_statement_list);
end_scope(proc->linearizer, proc);
/* If the fornum block has escaped local vars then we need to close */
if (proc->current_break_scope->need_close) {
/* Note we put close instruction in current basic block */
instruct_close(proc, proc->current_bb, proc->current_break_scope);
}
instruct_br(proc, allocate_block_pseudo(proc, L1));
end_scope(proc->linearizer, proc);
free_temp_pseudo(proc, stop_pseudo, false);
free_temp_pseudo(proc, step_positive, false);
free_temp_pseudo(proc, step_pseudo, false);
free_temp_pseudo(proc, limit_pseudo, false);
free_temp_pseudo(proc, index_var_pseudo, false);
start_block(proc, Lend);
proc->current_break_target = previous_break_target;
proc->current_break_scope = previous_break_scope;
}
static void linearize_while_statment(Proc *proc, AstNode *node)
{
BasicBlock *test_block = create_block(proc);
BasicBlock *body_block = create_block(proc);
BasicBlock *end_block = create_block(proc);
BasicBlock *previous_break_target = proc->current_break_target;
Scope *previous_break_scope = proc->current_break_scope;
proc->current_break_target = end_block;
proc->current_break_scope = node->while_or_repeat_stmt.loop_scope;
if (node->type == STMT_REPEAT) {
instruct_br(proc, allocate_block_pseudo(proc, body_block));
}
start_block(proc, test_block);
Pseudo *condition_pseudo = linearize_expression(proc, node->while_or_repeat_stmt.condition);
instruct_cbr(proc, condition_pseudo, body_block, end_block);
free_temp_pseudo(proc, condition_pseudo, false);
start_block(proc, body_block);
start_scope(proc->linearizer, proc, node->while_or_repeat_stmt.loop_scope);
linearize_statement_list(proc, node->while_or_repeat_stmt.loop_statement_list);
end_scope(proc->linearizer, proc);
/* If the while/repeat block has escaped local vars then we need to close */
if (proc->current_break_scope->need_close) {
instruct_close(proc, proc->current_bb, proc->current_break_scope);
}
instruct_br(proc, allocate_block_pseudo(proc, test_block));
start_block(proc, end_block);
proc->current_break_target = previous_break_target;
proc->current_break_scope = previous_break_scope;
}
static void linearize_function_statement(Proc *proc, AstNode *node)
{
/* function funcname funcbody */
/* funcname ::= Name {. Name} [: Name] */
// Note the similarity of following to the handling of suffixed expressions and assignment expressions
// In truth we could translate this to an expression statement - the only benefit here is that we
// do not allow selectors to be arbitrary expressions
Pseudo *prev_pseudo = linearize_symbol_expression(proc, node->function_stmt.name);
AstNode *prev_node = node->function_stmt.name;
AstNode *this_node;
FOR_EACH_PTR(node->function_stmt.selectors, AstNode, this_node)
{
Pseudo *next;
if (this_node->type == EXPR_FIELD_SELECTOR) {
Pseudo *key_pseudo = linearize_expression(proc, this_node->index_expr.expr);
ravitype_t key_type = this_node->index_expr.expr->common_expr.type.type_code;
next = instruct_indexed_load(proc, prev_node->common_expr.type.type_code, prev_pseudo, key_type,
key_pseudo, this_node->common_expr.type.type_code);
} else {
next = NULL;
handle_error(proc->linearizer->ast_container,
"Unexpected expr type in function name selector list");
}
prev_node = this_node;
prev_pseudo = next;
}
END_FOR_EACH_PTR(node)
// FIXME maybe better to add the method name to the selector list above in the parser
// then we could have just handled it above rather than as a special case
if (node->function_stmt.method_name) {
this_node = node->function_stmt.method_name;
if (this_node->type == EXPR_FIELD_SELECTOR) {
Pseudo *key_pseudo = linearize_expression(proc, this_node->index_expr.expr);
ravitype_t key_type = this_node->index_expr.expr->common_expr.type.type_code;
prev_pseudo =
instruct_indexed_load(proc, prev_node->common_expr.type.type_code, prev_pseudo, key_type,
key_pseudo, this_node->common_expr.type.type_code);
} else {
handle_error(proc->linearizer->ast_container,
"Unexpected expr type in function name selector list");
}
prev_node = this_node;
}
Pseudo *function_pseudo = linearize_function_expr(proc, node->function_stmt.function_expr);
/* Following will potentially convert load to store */
linearize_store_var(proc, &prev_node->common_expr.type, prev_pseudo,
&node->function_stmt.function_expr->common_expr.type, function_pseudo);
}
static void linearize_statement(Proc *proc, AstNode *node)
{
switch (node->type) {
case AST_NONE: {
break;
}
case STMT_RETURN: {
linearize_return(proc, node);
break;
}
case STMT_LOCAL: {
linearize_local_statement(proc, node);
break;
}
case STMT_FUNCTION: {
linearize_function_statement(proc, node);
break;
}
case STMT_LABEL: {
linearize_label_statement(proc, node);
break;
}
case STMT_GOTO: {
linearize_goto_statement(proc, node);
break;
}
case STMT_DO: {
linearize_do_statement(proc, node);
break;
}
case STMT_EXPR: {
linearize_expression_statement(proc, node);
break;
}
case STMT_IF: {
linearize_if_statement(proc, node);
break;
}
case STMT_WHILE:
case STMT_REPEAT: {
linearize_while_statment(proc, node);
break;
}
case STMT_FOR_IN: {
handle_error(proc->linearizer->ast_container, "STMT_FOR_IN not yet implemented");
break;
}
case STMT_FOR_NUM: {
linearize_for_num_statement(proc, node);
break;
}
default:
handle_error(proc->linearizer->ast_container, "unknown statement type");
break;
}
}
/**
* Creates and initializes a basic block to be an empty block. Returns the new basic block.
*/
static BasicBlock *create_block(Proc *proc)
{
if (proc->node_count >= proc->allocated) {
unsigned new_size = proc->allocated + 25;
BasicBlock **new_data = (BasicBlock **)
raviX_allocator_allocate(&proc->linearizer->unsized_allocator, new_size * sizeof(BasicBlock *));
assert(new_data != NULL);
if (proc->node_count > 0) {
memcpy(new_data, proc->nodes, proc->allocated * sizeof(BasicBlock *));
}
proc->allocated = new_size;
proc->nodes = new_data;
}
assert(proc->node_count < proc->allocated);
BasicBlock *new_block = (BasicBlock *) raviX_allocator_allocate(&proc->linearizer->basic_block_allocator, 0);
/* note that each block must have an index that can be used to access the block as nodes[index] */
new_block->index = proc->node_count;
proc->nodes[proc->node_count++] = new_block;
return new_block;
}
/**
* Takes a basic block as an argument and makes it the current block.
*
* If the old current block is unterminated then this will terminate that
* block by adding an unconditional branch to the new current block.
*
* All future instructions will be added to the end of the new current block
*/
static void start_block(Proc *proc, BasicBlock *bb_to_start)
{
// printf("Starting block %d\n", bb_to_start->index);
if (proc->current_bb && !is_block_terminated(proc->current_bb)) {
instruct_br(proc, allocate_block_pseudo(proc, bb_to_start));
}
proc->current_bb = bb_to_start;
}
/**
* Create the initial blocks entry and exit for the proc.
* sets current block to entry block.
*/
static void initialize_graph(Proc *proc)
{
assert(proc != NULL);
BasicBlock *entry = create_block(proc);
assert(entry->index == ENTRY_BLOCK);
BasicBlock *exit = create_block(proc);
assert(exit->index == EXIT_BLOCK);
start_block(proc, entry);
}
/**
* Makes given scope the current scope, and allocates registers for locals.
*/
static void start_scope(LinearizerState *linearizer, Proc *proc, Scope *scope)
{
proc->current_scope = scope;
LuaSymbol *sym;
FOR_EACH_PTR(scope->symbol_list, LuaSymbol, sym)
{
if (sym->symbol_type == SYM_LOCAL) {
uint8_t reg;
if (!sym->variable.escaped && !sym->variable.function_parameter &&
(sym->variable.value_type.type_code == RAVI_TNUMFLT ||
sym->variable.value_type.type_code == RAVI_TNUMINT)) {
Pseudo *pseudo;
if (sym->variable.value_type.type_code == RAVI_TNUMFLT)
pseudo = allocate_temp_pseudo(proc, RAVI_TNUMFLT);
else
pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
sym->variable.pseudo = pseudo;
pseudo->temp_for_local = sym; /* Note that this temp is for a local */
}
else {
reg = allocate_register(&proc->local_pseudos);
allocate_symbol_pseudo(proc, sym, reg);
}
// printf("Assigning register %d to local %s\n", (int)reg, getstr(sym->var.var_name));
}
}
END_FOR_EACH_PTR(sym)
}
/**
* Deallocate local registers when the scope ends, in reverse order
* so that we have a stack discipline, and then changes current scope to be the
* parent scope.
*/
static void end_scope(LinearizerState *linearizer, Proc *proc)
{
Scope *scope = proc->current_scope;
LuaSymbol *sym;
if (scope->need_close) {
instruct_close(proc, proc->current_bb, scope);
}
FOR_EACH_PTR_REVERSE(scope->symbol_list, LuaSymbol, sym)
{
if (sym->symbol_type == SYM_LOCAL) {
Pseudo *pseudo = sym->variable.pseudo;
if (pseudo->type == PSEUDO_SYMBOL) {
assert(pseudo && pseudo->type == PSEUDO_SYMBOL && pseudo->symbol == sym);
// printf("Free register %d for local %s\n", (int)pseudo->regnum, getstr(sym->var.var_name));
free_register(proc, &proc->local_pseudos, pseudo->regnum);
}
else if (pseudo->type == PSEUDO_TEMP_INT || pseudo->type == PSEUDO_TEMP_FLT || pseudo->type == PSEUDO_TEMP_BOOL) {
assert(sym == pseudo->temp_for_local);
free_temp_pseudo(proc, sym->variable.pseudo, true);
}
else {
assert(false);
}
}
}
END_FOR_EACH_PTR_REVERSE(sym)
proc->current_scope = scope->parent;
}
static void linearize_function(LinearizerState *linearizer)
{
Proc *proc = linearizer->current_proc;
assert(proc != NULL);
AstNode *func_expr = proc->function_expr;
assert(func_expr->type == EXPR_FUNCTION);
initialize_graph(proc);
assert(proc->node_count >= 2);
assert(proc->nodes[ENTRY_BLOCK] != NULL);
assert(proc->nodes[EXIT_BLOCK] != NULL);
start_scope(linearizer, proc, func_expr->function_expr.main_block);
linearize_function_args(linearizer);
linearize_statement_list(proc, func_expr->function_expr.function_statement_list);
end_scope(linearizer, proc);
if (!is_block_terminated(proc->current_bb)) {
//instruct_br(proc, allocate_block_pseudo(proc, proc->nodes[EXIT_BLOCK]));
Instruction *insn = allocate_instruction(proc, op_ret);
add_instruction_target(proc, insn, allocate_block_pseudo(proc, proc->nodes[EXIT_BLOCK]));
add_instruction(proc, insn);
}
}
static void output_pseudo(Pseudo *pseudo, TextBuffer *mb)
{
switch (pseudo->type) {
case PSEUDO_CONSTANT: {
const Constant *constant = pseudo->constant;
const char *tc = "";
if (constant->type == RAVI_TNUMFLT) {
raviX_buffer_add_fstring(mb, "%.12f", constant->n);
tc = "flt";
} else if (constant->type == RAVI_TNUMINT) {
raviX_buffer_add_fstring(mb, "%lld", (long long)constant->i);
tc = "int";
} else {
raviX_buffer_add_fstring(mb, "'%s'", constant->s->str);
tc = "s";
}
raviX_buffer_add_fstring(mb, " K%s(%d)", tc, pseudo->regnum);
} break;
case PSEUDO_TEMP_INT:
raviX_buffer_add_fstring(mb, "Tint(%d)", pseudo->regnum);
break;
case PSEUDO_TEMP_BOOL:
raviX_buffer_add_fstring(mb, "Tbool(%d)", pseudo->regnum);
break;
case PSEUDO_TEMP_FLT:
raviX_buffer_add_fstring(mb, "Tflt(%d)", pseudo->regnum);
break;
case PSEUDO_TEMP_ANY:
raviX_buffer_add_fstring(mb, "T(%d)", pseudo->regnum);
break;
case PSEUDO_RANGE_SELECT:
raviX_buffer_add_fstring(mb, "T(%d[%d..])", pseudo->regnum, pseudo->range_pseudo->regnum);
break;
case PSEUDO_PROC:
raviX_buffer_add_fstring(mb, "Proc%%%d", pseudo->proc->id);
break;
case PSEUDO_NIL:
raviX_buffer_add_string(mb, "nil");
break;
case PSEUDO_FALSE:
raviX_buffer_add_string(mb, "false");
break;
case PSEUDO_TRUE:
raviX_buffer_add_string(mb, "true");
break;
case PSEUDO_SYMBOL:
switch (pseudo->symbol->symbol_type) {
case SYM_LOCAL: {
raviX_buffer_add_fstring(mb, "local(%s, %d)", pseudo->symbol->variable.var_name->str,
pseudo->regnum);
break;
}
case SYM_UPVALUE: {
if (pseudo->symbol->upvalue.target_variable->symbol_type == SYM_LOCAL) {
raviX_buffer_add_fstring(mb, "Upval(%u, Proc%%%d, %s)", pseudo->regnum,
pseudo->symbol->upvalue.target_variable->variable.block->function->function_expr.proc_id,
pseudo->symbol->upvalue.target_variable->variable.var_name->str);
}
else if (pseudo->symbol->upvalue.target_variable->symbol_type == SYM_ENV) {
raviX_buffer_add_fstring(mb, "Upval(%s)",
pseudo->symbol->upvalue.target_variable->variable.var_name->str);
}
break;
}
case SYM_GLOBAL: {
raviX_buffer_add_string(mb, pseudo->symbol->variable.var_name->str);
break;
}
default:
// handle_error(proc->linearizer->ast_container, "feature not yet implemented");
abort();
}
break;
case PSEUDO_BLOCK: {
raviX_buffer_add_fstring(mb, "L%d", pseudo->block ? (int)pseudo->block->index : -1);
break;
}
case PSEUDO_RANGE: {
raviX_buffer_add_fstring(mb, "T(%d..)", pseudo->regnum);
break;
}
}
}
static const char *op_codenames[] = {
"NOOP", "RET", "ADD", "ADDff", "ADDfi", "ADDii", "SUB", "SUBff", "SUBfi",
"SUBif", "SUBii", "MUL", "MULff", "MULfi", "MULii", "DIV", "DIVff", "DIVfi",
"DIVif", "DIVii", "IDIV", "BAND", "BANDii", "BOR", "BORii", "BXOR", "BXORii",
"SHL", "SHLii", "SHR", "SHRii", "EQ", "EQii", "EQff", "LT", "LIii",
"LTff", "LE", "LEii", "LEff", "MOD", "POW", "CLOSURE", "UNM", "UNMi",
"UNMf", "LEN", "LENi", "TOINT", "TOFLT", "TOCLOSURE", "TOSTRING", "TOIARRAY", "TOFARRAY",
"TOTABLE", "TOTYPE", "NOT", "BNOT", "LOADGLOBAL", "NEWTABLE", "NEWIARRAY", "NEWFARRAY", "PUT",
"PUTik", "PUTsk", "TPUT", "TPUTik", "TPUTsk", "IAPUT", "IAPUTiv", "FAPUT", "FAPUTfv",
"CBR", "BR", "MOV", "MOVi", "MOVif", "MOVf", "MOVfi", "CALL", "GET",
"GETik", "GETsk", "TGET", "TGETik", "TGETsk", "IAGET", "IAGETik", "FAGET", "FAGETik",
"STOREGLOBAL", "CLOSE", "STRCONCAT"};
static void output_pseudo_list(PseudoList *list, TextBuffer *mb)
{
Pseudo *pseudo;
raviX_buffer_add_string(mb, " {");
int i = 0;
FOR_EACH_PTR(list, Pseudo, pseudo)
{
if (i > 0)
raviX_buffer_add_string(mb, ", ");
output_pseudo(pseudo, mb);
i++;
}
END_FOR_EACH_PTR(pseudo)
raviX_buffer_add_string(mb, "}");
}
const char *raviX_opcode_name(unsigned int opcode) {
return op_codenames[opcode];
}
/* Outputs a single instruction. A prefix and suffix can be prepended/appended for formatting reasons.
Each instruction is output in following format.
instruction_name { operands } { targets }
*/
static void output_instruction(Instruction *insn, TextBuffer *mb, const char *prefix, const char *suffix)
{
raviX_buffer_add_fstring(mb, "%s%s", prefix, op_codenames[insn->opcode]);
if (insn->operands) {
output_pseudo_list(insn->operands, mb);
}
if (insn->targets) {
output_pseudo_list(insn->targets, mb);
}
raviX_buffer_add_string(mb, suffix);
}
/* Outputs linear IR instructions in text format, each instruction is prefixed and suffixed by given strings */
static void output_instructions(InstructionList *list, TextBuffer *mb, const char *prefix, const char *suffix)
{
Instruction *insn;
FOR_EACH_PTR(list, Instruction, insn) { output_instruction(insn, mb, prefix, suffix); }
END_FOR_EACH_PTR(insn)
}
/* Outputs the linear IR instructions within a basic block
in textual format.
*/
static void output_basic_block(Proc *proc, BasicBlock *bb, TextBuffer *mb)
{
raviX_buffer_add_fstring(mb, "L%d", bb->index);
if (bb->index == ENTRY_BLOCK) {
raviX_buffer_add_string(mb, " (entry)\n");
} else if (bb->index == EXIT_BLOCK) {
raviX_buffer_add_string(mb, " (exit)\n");
} else {
raviX_buffer_add_string(mb, "\n");
}
output_instructions(bb->insns, mb, "\t", "\n");
}
/* Outputs the basic block and instructions inside it as an HTML table. This is
meant to be used in .dot files, hence the table is formatted in a way that works
with graphviz tool */
void raviX_output_basic_block_as_table(Proc *proc, BasicBlock *bb, TextBuffer *mb)
{
raviX_buffer_add_string(mb, "<TABLE BORDER=\"1\" CELLBORDER=\"0\">\n");
raviX_buffer_add_fstring(mb, "<TR><TD><B>L%d</B></TD></TR>\n", bb->index);
output_instructions(bb->insns, mb, "<TR><TD>", "</TD></TR>\n");
raviX_buffer_add_string(mb, "</TABLE>");
}
/* Outputs the linear IR for the given proc in a text format. */
static void output_proc(Proc *proc, TextBuffer *mb)
{
BasicBlock *bb;
raviX_buffer_add_fstring(mb, "define Proc%%%d\n", proc->id);
for (int i = 0; i < (int)proc->node_count; i++) {
bb = proc->nodes[i];
output_basic_block(proc, bb, mb);
}
}
/*
Generates the linear IR for the current parse tree (AST).
Returns 0 on success.
*/
int raviX_ast_linearize(LinearizerState *linearizer)
{
Proc *proc = allocate_proc(linearizer, linearizer->ast_container->main_function);
set_main_proc(linearizer, proc);
set_current_proc(linearizer, proc);
int rc = setjmp(linearizer->ast_container->env);
if (rc == 0) {
linearize_function(linearizer);
}
return rc;
}
/* Outputs the linear IR in text format to the given text buffer. Procs are output
recursively starting at the main proc representing the Lua chunk.
*/
void raviX_show_linearizer(LinearizerState *linearizer, TextBuffer *mb)
{
output_proc(linearizer->main_proc, mb);
Proc *proc;
FOR_EACH_PTR(linearizer->all_procs, Proc, proc)
{
if (proc == linearizer->main_proc)
continue;
output_proc(proc, mb);
}
END_FOR_EACH_PTR(proc)
}
/* Outputs the linear IR in text format to the given file */
void raviX_output_linearizer(LinearizerState *linearizer, FILE *fp)
{
TextBuffer mb;
raviX_buffer_init(&mb, 4096);
raviX_show_linearizer(linearizer, &mb);
fputs(mb.buf, fp);
raviX_buffer_free(&mb);
}