You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
505 lines
15 KiB
505 lines
15 KiB
#ifndef ravi_ast_h
|
|
#define ravi_ast_h
|
|
|
|
/*
|
|
A parser and syntax tree builder for Ravi. This is work in progress.
|
|
Once ready it will be used to create a new byte code generator for Ravi.
|
|
|
|
The parser will perform following actions:
|
|
|
|
a) Generate syntax tree
|
|
b) Perform type checking (Ravi enhancement)
|
|
*/
|
|
|
|
#define LUA_CORE
|
|
#include "lprefix.h"
|
|
#include "lua.h"
|
|
|
|
#include "lcode.h"
|
|
#include "ldo.h"
|
|
#include "lstring.h"
|
|
#include "ltable.h"
|
|
#include "lauxlib.h"
|
|
|
|
#include "ravi_ast.h"
|
|
#include "ravi_membuf.h"
|
|
#include "ravi_set.h"
|
|
|
|
#include "allocate.h"
|
|
#include "ptrlist.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdarg.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
|
|
#define MAXVARS 125
|
|
|
|
//////////////////////////
|
|
|
|
struct lua_symbol_list;
|
|
struct linearizer;
|
|
|
|
/*
|
|
* Userdata object to hold the abstract syntax tree;
|
|
* All memory is held by this object. Memory is freed when
|
|
* the object is GC collected; or when
|
|
* ast_container:release() method is called
|
|
* by user.
|
|
*/
|
|
struct ast_container {
|
|
struct allocator ast_node_allocator;
|
|
struct allocator ptrlist_allocator;
|
|
struct allocator block_scope_allocator;
|
|
struct allocator symbol_allocator;
|
|
struct ast_node *main_function;
|
|
struct linearizer *linearizer;
|
|
bool killed; /* flag to check if this is already destroyed */
|
|
};
|
|
|
|
struct ast_node;
|
|
DECLARE_PTR_LIST(ast_node_list, struct ast_node);
|
|
|
|
struct var_type;
|
|
DECLARE_PTR_LIST(var_type_list, struct var_type);
|
|
|
|
/* Lua type info. We need to support user defined types too which are known by name */
|
|
struct var_type {
|
|
ravitype_t type_code;
|
|
const TString *type_name; /* type name for user defined types; used to lookup metatable in registry, only set when
|
|
type_code is RAVI_TUSERDATA */
|
|
};
|
|
|
|
struct pseudo;
|
|
struct lua_symbol;
|
|
DECLARE_PTR_LIST(lua_symbol_list, struct lua_symbol);
|
|
|
|
struct block_scope;
|
|
|
|
/* Types of symbols */
|
|
enum symbol_type {
|
|
SYM_LOCAL,
|
|
SYM_UPVALUE,
|
|
SYM_GLOBAL, /* Global symbols are never added to a scope so they are always looked up */
|
|
SYM_LABEL
|
|
};
|
|
|
|
/* A symbol is a name recognised in Ravi/Lua code*/
|
|
struct lua_symbol {
|
|
enum symbol_type symbol_type;
|
|
struct var_type value_type;
|
|
union {
|
|
struct {
|
|
const TString *var_name; /* name of the variable */
|
|
struct block_scope *block; /* NULL if global symbol, as globals are never added to a scope */
|
|
struct pseudo *pseudo; /* backend data for the symbol */
|
|
} var;
|
|
struct {
|
|
const TString *label_name;
|
|
struct block_scope *block;
|
|
} label;
|
|
struct {
|
|
struct lua_symbol *var; /* variable reference */
|
|
struct ast_node *function; /* Where the upvalue lives */
|
|
} upvalue;
|
|
};
|
|
};
|
|
|
|
struct block_scope {
|
|
struct ast_node *function; /* function owning this block - of type FUNCTION_EXPR */
|
|
struct block_scope *parent; /* parent block, may belong to parent function */
|
|
struct lua_symbol_list *symbol_list; /* symbols defined in this block */
|
|
};
|
|
|
|
enum ast_node_type {
|
|
AST_NONE, /* Used when the node doesn't represent an AST such as test_then_block. */
|
|
AST_RETURN_STMT,
|
|
AST_GOTO_STMT,
|
|
AST_LABEL_STMT,
|
|
AST_DO_STMT,
|
|
AST_LOCAL_STMT,
|
|
AST_FUNCTION_STMT,
|
|
AST_IF_STMT,
|
|
AST_WHILE_STMT,
|
|
AST_FORIN_STMT,
|
|
AST_FORNUM_STMT,
|
|
AST_REPEAT_STMT,
|
|
AST_EXPR_STMT, /* Also used for assignment statements */
|
|
AST_LITERAL_EXPR,
|
|
AST_SYMBOL_EXPR,
|
|
AST_Y_INDEX_EXPR, /* [] operator */
|
|
AST_FIELD_SELECTOR_EXPR, /* table field access - '.' or ':' operator */
|
|
AST_INDEXED_ASSIGN_EXPR, /* table value assign in table constructor */
|
|
AST_SUFFIXED_EXPR,
|
|
AST_UNARY_EXPR,
|
|
AST_BINARY_EXPR,
|
|
AST_FUNCTION_EXPR, /* function literal */
|
|
AST_TABLE_EXPR, /* table constructor */
|
|
AST_FUNCTION_CALL_EXPR
|
|
};
|
|
|
|
/* The parse tree is made up of ast_node objects. Some of the ast_nodes reference the appropriate block
|
|
scopes but not all scopes may be referenced. The tree captures Lua syntax tree - i.e. statements such as
|
|
while, repeat, and for are captured in the way user uses them and not the way Lua generates code. Potentially
|
|
we can have a transformation step to convert to a tree that is more like the code generation */
|
|
struct ast_node {
|
|
enum ast_node_type type;
|
|
union {
|
|
struct {
|
|
struct ast_node_list *expr_list;
|
|
} return_stmt;
|
|
struct {
|
|
struct lua_symbol *symbol;
|
|
} label_stmt;
|
|
struct {
|
|
const TString *name; /* target label, used to resolve the goto destination */
|
|
struct ast_node *label_stmt; /* Initially this will be NULL; set by a separate pass */
|
|
} goto_stmt;
|
|
struct {
|
|
struct lua_symbol_list *var_list;
|
|
struct ast_node_list *expr_list;
|
|
} local_stmt; /* local declarations */
|
|
struct {
|
|
struct ast_node_list *var_expr_list; /* Optional var expressions, comma separated */
|
|
struct ast_node_list *expr_list; /* Comma separated expressions */
|
|
} expression_stmt; /* Also covers assignments */
|
|
struct {
|
|
struct ast_node *name; /* base symbol to be looked up */
|
|
struct ast_node_list *selectors; /* Optional */
|
|
struct ast_node *method_name; /* Optional */
|
|
struct ast_node *function_expr; /* Function's AST */
|
|
} function_stmt;
|
|
struct {
|
|
struct block_scope *scope; /* The do statement only creates a new scope */
|
|
struct ast_node_list *do_statement_list; /* statements in this block */
|
|
} do_stmt;
|
|
struct {
|
|
struct ast_node *condition;
|
|
struct block_scope *test_then_scope;
|
|
struct ast_node_list *test_then_statement_list; /* statements in this block */
|
|
} test_then_block; /* Used internally in if_stmt, not an independent AST node */
|
|
struct {
|
|
struct ast_node_list *if_condition_list; /* Actually a list of test_then_blocks */
|
|
struct block_scope *else_block;
|
|
struct ast_node_list *else_statement_list; /* statements in this block */
|
|
} if_stmt;
|
|
struct {
|
|
struct ast_node *condition;
|
|
struct block_scope *loop_scope;
|
|
struct ast_node_list *loop_statement_list; /* statements in this block */
|
|
} while_or_repeat_stmt;
|
|
struct {
|
|
struct lua_symbol_list *symbols;
|
|
struct ast_node_list *expr_list;
|
|
struct block_scope *for_body;
|
|
struct ast_node_list *for_statement_list; /* statements in this block */
|
|
} for_stmt; /* Used for both generic and numeric for loops */
|
|
struct {
|
|
struct var_type type;
|
|
} common_expr; /* To access the type field common to all expr objects */
|
|
/* all expr types must be compatible with common_expr */
|
|
struct {
|
|
struct var_type type;
|
|
union {
|
|
lua_Integer i;
|
|
lua_Number n;
|
|
const TString *s;
|
|
} u;
|
|
} literal_expr;
|
|
struct { /* primaryexp -> NAME | '(' expr ')', NAME is parsed as AST_SYMBOL_EXPR */
|
|
struct var_type type;
|
|
struct lua_symbol *var;
|
|
} symbol_expr;
|
|
struct { /* AST_Y_INDEX_EXPR or AST_FIELD_SELECTOR_EXPR */
|
|
struct var_type type;
|
|
struct ast_node *expr; /* '[' expr ']' */
|
|
} index_expr;
|
|
struct { /* AST_UNARY_EXPR */
|
|
struct var_type type;
|
|
UnOpr unary_op;
|
|
struct ast_node *expr;
|
|
} unary_expr;
|
|
struct {
|
|
struct var_type type;
|
|
BinOpr binary_op;
|
|
struct ast_node *expr_left;
|
|
struct ast_node *expr_right;
|
|
} binary_expr;
|
|
struct {
|
|
struct var_type type;
|
|
unsigned int is_vararg : 1;
|
|
unsigned int is_method : 1;
|
|
struct ast_node *parent_function; /* parent function or NULL if main chunk */
|
|
struct block_scope *main_block; /* the function's main block */
|
|
struct ast_node_list *function_statement_list; /* statements in this block */
|
|
struct lua_symbol_list *args; /* arguments, also must be part of the function block's symbol list */
|
|
struct ast_node_list *child_functions; /* child functions declared in this function */
|
|
struct lua_symbol_list *upvalues; /* List of upvalues */
|
|
struct lua_symbol_list *locals; /* List of locals */
|
|
} function_expr; /* a literal expression whose result is a value of type function */
|
|
struct { /* AST_INDEXED_ASSIGN_EXPR - used in table constructor */
|
|
struct var_type type;
|
|
struct ast_node *index_expr; /* If NULL means this is a list field with next available index, else specifies index
|
|
expression */
|
|
struct ast_node *value_expr;
|
|
} indexed_assign_expr; /* Assign values in table constructor */
|
|
struct { /* constructor -> '{' [ field { sep field } [sep] ] '}' where sep -> ',' | ';' */
|
|
struct var_type type;
|
|
struct ast_node_list *expr_list;
|
|
} table_expr; /* table constructor expression AST_TABLE_EXPR occurs in function call and simple expr */
|
|
struct {
|
|
/* suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */
|
|
/* suffix_list may have AST_FIELD_SELECTOR_EXPR, AST_Y_INDEX_EXPR, AST_FUNCTION_CALL_EXPR */
|
|
struct var_type type;
|
|
struct ast_node *primary_expr;
|
|
struct ast_node_list *suffix_list;
|
|
} suffixed_expr;
|
|
struct {
|
|
/* Note that in Ravi the results from a function call must be type asserted during assignment to variables.
|
|
* This is not explicit in the AST but is required to ensure that function return values do not
|
|
* overwrite the type of the variables in an inconsistent way.
|
|
*/
|
|
struct var_type type;
|
|
TString *method_name; /* Optional method_name */
|
|
struct ast_node_list *arg_list; /* Call arguments */
|
|
} function_call_expr;
|
|
};
|
|
};
|
|
|
|
#define set_typecode(vt, t) (vt).type_code = t
|
|
#define set_type(vt, t) (vt).type_code = t, (vt).type_name = NULL
|
|
#define set_typename(vt, t, name) (vt).type_code = t, (vt).type_name = (name)
|
|
#define is_type_same(a, b) ((a).type_code == (b).type_code && (a).type_name == (b).type_name)
|
|
#define copy_type(a, b) (a).type_code = (b).type_code, (a).type_name = (b).type_name
|
|
|
|
struct parser_state {
|
|
LexState *ls;
|
|
struct ast_container *container;
|
|
struct ast_node *current_function;
|
|
struct block_scope *current_scope;
|
|
};
|
|
|
|
LUAMOD_API int raviopen_ast_library(lua_State *L);
|
|
|
|
void raviA_print_ast_node(membuff_t *buf, struct ast_node *node, int level); /* output the AST structure recusrively */
|
|
void raviA_ast_typecheck(struct ast_container *container); /* Perform type checks and assign types to AST */
|
|
|
|
/*
|
|
Linearizer
|
|
*/
|
|
struct instruction;
|
|
struct node;
|
|
struct basic_block;
|
|
struct edge;
|
|
struct cfg;
|
|
struct proc;
|
|
struct constant;
|
|
|
|
DECLARE_PTR_LIST(instruction_list, struct instruction);
|
|
DECLARE_PTR_LIST(edge_list, struct edge);
|
|
DECLARE_PTR_LIST(pseudo_list, struct pseudo);
|
|
DECLARE_PTR_LIST(proc_list, struct proc);
|
|
|
|
#define container_of(ptr, type, member) ((type *)((char *)(ptr)-offsetof(type, member)))
|
|
|
|
/* order is important here ! */
|
|
enum opcode {
|
|
op_nop,
|
|
op_ret,
|
|
op_loadk,
|
|
op_loadnil,
|
|
op_loadbool,
|
|
op_add,
|
|
op_addff,
|
|
op_addfi,
|
|
op_addii,
|
|
op_sub,
|
|
op_subff,
|
|
op_subfi,
|
|
op_subif,
|
|
op_subii,
|
|
op_mul,
|
|
op_mulff,
|
|
op_mulfi,
|
|
op_mulii,
|
|
op_div,
|
|
op_divff,
|
|
op_divfi,
|
|
op_divif,
|
|
op_divii,
|
|
op_idiv,
|
|
op_band,
|
|
op_bandii,
|
|
op_bor,
|
|
op_borii,
|
|
op_bxor,
|
|
op_bxorii,
|
|
op_shl,
|
|
op_shlii,
|
|
op_shr,
|
|
op_shrii,
|
|
op_eq,
|
|
op_eqii,
|
|
op_eqff,
|
|
op_lt,
|
|
op_ltii,
|
|
op_ltff,
|
|
op_le,
|
|
op_leii,
|
|
op_leff,
|
|
op_mod,
|
|
op_pow,
|
|
op_closure,
|
|
op_unm,
|
|
op_unmi,
|
|
op_unmf,
|
|
op_len,
|
|
op_leni,
|
|
op_toint,
|
|
op_toflt,
|
|
op_toclosure,
|
|
op_tostring,
|
|
op_toiarray,
|
|
op_tofarray,
|
|
op_totable,
|
|
op_totype,
|
|
op_not,
|
|
op_bnot,
|
|
op_loadglobal,
|
|
op_newtable,
|
|
op_newiarray,
|
|
op_newfarray,
|
|
op_put, /* target is any */
|
|
op_put_ikey,
|
|
op_put_skey,
|
|
op_tput, /* target is table */
|
|
op_tput_ikey,
|
|
op_tput_skey,
|
|
op_iaput, /* target is integer[]*/
|
|
op_iaput_ival,
|
|
op_faput, /* target is number[] */
|
|
op_faput_fval
|
|
};
|
|
|
|
enum pseudo_type {
|
|
PSEUDO_SYMBOL,
|
|
PSEUDO_TEMP_FLT,
|
|
PSEUDO_TEMP_INT,
|
|
PSEUDO_TEMP_ANY,
|
|
PSEUDO_CONSTANT,
|
|
PSEUDO_PROC,
|
|
PSEUDO_NIL,
|
|
PSEUDO_TRUE,
|
|
PSEUDO_FALSE
|
|
};
|
|
|
|
/* pseudo represents a pseudo (virtual) register */
|
|
struct pseudo {
|
|
unsigned type : 4, regnum : 16;
|
|
union {
|
|
struct lua_symbol *symbol; /* PSEUDO_SYMBOL */
|
|
const struct constant *constant; /* PSEUDO_CONSTANT */
|
|
ravitype_t temp_type; /* PSEUDO_TEMP* */
|
|
struct proc *proc; /* PSEUDO_PROC */
|
|
};
|
|
};
|
|
|
|
/* single instruction */
|
|
struct instruction {
|
|
unsigned opcode : 8;
|
|
struct pseudo_list *operands;
|
|
struct pseudo_list *targets;
|
|
};
|
|
|
|
struct edge {
|
|
struct node *from;
|
|
struct node *to;
|
|
};
|
|
|
|
#define NODE_FIELDS \
|
|
uint32_t index; \
|
|
struct edge_list *pred; \
|
|
struct edge_list *succ
|
|
|
|
struct node {
|
|
NODE_FIELDS;
|
|
};
|
|
|
|
/* Basic block is a specialization of node */
|
|
struct basic_block {
|
|
NODE_FIELDS;
|
|
struct instruction_list *insns;
|
|
};
|
|
|
|
#define CFG_FIELDS \
|
|
unsigned node_count; \
|
|
unsigned allocated; \
|
|
struct node **nodes; \
|
|
struct node *entry; \
|
|
struct node *exit
|
|
|
|
struct cfg {
|
|
CFG_FIELDS;
|
|
};
|
|
|
|
struct pseudo_generator {
|
|
uint8_t next_reg;
|
|
int16_t free_pos;
|
|
uint8_t free_regs[256];
|
|
};
|
|
|
|
struct constant {
|
|
uint8_t type;
|
|
uint16_t index; /* index number starting from 0 assigned to each constant - acts like a reg num */
|
|
union {
|
|
lua_Integer i;
|
|
lua_Number n;
|
|
const TString *s;
|
|
};
|
|
};
|
|
|
|
/* proc is a type of cfg */
|
|
struct proc {
|
|
CFG_FIELDS;
|
|
struct linearizer *linearizer;
|
|
struct proc_list *procs; /* procs defined in this proc */
|
|
struct proc *parent; /* enclosing proc */
|
|
struct ast_node *function_expr; /* function ast that we are compiling */
|
|
struct block_scope *current_scope;
|
|
struct basic_block *current_bb;
|
|
struct pseudo_generator local_pseudos; /* locals */
|
|
struct pseudo_generator temp_int_pseudos; /* temporaries known to be integer type */
|
|
struct pseudo_generator temp_flt_pseudos; /* temporaries known to be number type */
|
|
struct pseudo_generator temp_pseudos; /* All other temporaries */
|
|
struct set *constants;
|
|
unsigned num_constants;
|
|
};
|
|
|
|
static inline struct basic_block *n2bb(struct node *n) { return (struct basic_block *)n; }
|
|
static inline struct node *bb2n(struct basic_block *bb) { return (struct node *)bb; }
|
|
|
|
struct linearizer {
|
|
struct allocator instruction_allocator;
|
|
struct allocator edge_allocator;
|
|
struct allocator pseudo_allocator;
|
|
struct allocator ptrlist_allocator;
|
|
struct allocator basic_block_allocator;
|
|
struct allocator proc_allocator;
|
|
struct allocator unsized_allocator;
|
|
struct allocator constant_allocator;
|
|
struct ast_container *ast_container;
|
|
struct proc *main_proc; /* The root of the compiled chunk of code */
|
|
struct proc_list *all_procs; /* All procs allocated by the linearizer */
|
|
struct proc *current_proc; /* proc being compiled */
|
|
};
|
|
|
|
void raviA_init_linearizer(struct linearizer *linearizer, struct ast_container *container);
|
|
void raviA_destroy_linearizer(struct linearizer *linearizer);
|
|
void raviA_ast_linearize(struct linearizer *linearizer);
|
|
void raviA_show_linearizer(struct linearizer *linearizer, membuff_t *mb);
|
|
|
|
#endif
|