From 2aeeea4dd3d9a3875ad756b94662cb17b674ddc9 Mon Sep 17 00:00:00 2001 From: Dibyendu Majumdar Date: Sat, 2 Jan 2021 23:56:35 +0000 Subject: [PATCH] issue #198 Include Ravi Compiler --- CMakeLists.txt | 30 +- ravicomp/.clang-format | 7 + ravicomp/CMakeLists.txt | 110 ++ ravicomp/LICENSE | 22 + ravicomp/README.md | 6 + ravicomp/include/ravi_api.h | 36 + ravicomp/include/ravi_compiler.h | 589 +++++++ ravicomp/src/README.md | 25 + ravicomp/src/allocate.c | 275 +++ ravicomp/src/allocate.h | 130 ++ ravicomp/src/ast_printer.c | 543 ++++++ ravicomp/src/ast_simplify.c | 516 ++++++ ravicomp/src/ast_walker.c | 617 +++++++ ravicomp/src/bitset.c | 274 +++ ravicomp/src/bitset.h | 107 ++ ravicomp/src/cfg.c | 81 + ravicomp/src/cfg.h | 11 + ravicomp/src/codegen.c | 2688 +++++++++++++++++++++++++++++ ravicomp/src/codegen.h | 11 + ravicomp/src/common.h | 14 + ravicomp/src/dataflow_framework.c | 94 + ravicomp/src/dataflow_framework.h | 19 + ravicomp/src/df_liveness.c | 99 ++ ravicomp/src/dominator.c | 149 ++ ravicomp/src/dominator.h | 16 + ravicomp/src/fnv_hash.c | 72 + ravicomp/src/fnv_hash.h | 51 + ravicomp/src/graph.c | 389 +++++ ravicomp/src/graph.h | 99 ++ ravicomp/src/hash_table.c | 427 +++++ ravicomp/src/hash_table.h | 109 ++ ravicomp/src/lexer.c | 990 +++++++++++ ravicomp/src/linearizer.c | 2614 ++++++++++++++++++++++++++++ ravicomp/src/linearizer.h | 244 +++ ravicomp/src/membuf.c | 117 ++ ravicomp/src/membuf.h | 27 + ravicomp/src/opt_unusedcode.c | 80 + ravicomp/src/optimizer.h | 10 + ravicomp/src/parser.c | 1748 +++++++++++++++++++ ravicomp/src/parser.h | 376 ++++ ravicomp/src/ptrlist.c | 987 +++++++++++ ravicomp/src/ptrlist.h | 149 ++ ravicomp/src/ravi_binding.c | 66 + ravicomp/src/set.c | 409 +++++ ravicomp/src/set.h | 87 + ravicomp/src/typechecker.c | 522 ++++++ 46 files changed, 16028 insertions(+), 14 deletions(-) create mode 100644 ravicomp/.clang-format create mode 100644 ravicomp/CMakeLists.txt create mode 100644 ravicomp/LICENSE create mode 100644 ravicomp/README.md create mode 100644 ravicomp/include/ravi_api.h create mode 100644 ravicomp/include/ravi_compiler.h create mode 100644 ravicomp/src/README.md create mode 100644 ravicomp/src/allocate.c create mode 100644 ravicomp/src/allocate.h create mode 100644 ravicomp/src/ast_printer.c create mode 100644 ravicomp/src/ast_simplify.c create mode 100644 ravicomp/src/ast_walker.c create mode 100644 ravicomp/src/bitset.c create mode 100644 ravicomp/src/bitset.h create mode 100644 ravicomp/src/cfg.c create mode 100644 ravicomp/src/cfg.h create mode 100644 ravicomp/src/codegen.c create mode 100644 ravicomp/src/codegen.h create mode 100644 ravicomp/src/common.h create mode 100644 ravicomp/src/dataflow_framework.c create mode 100644 ravicomp/src/dataflow_framework.h create mode 100644 ravicomp/src/df_liveness.c create mode 100644 ravicomp/src/dominator.c create mode 100644 ravicomp/src/dominator.h create mode 100644 ravicomp/src/fnv_hash.c create mode 100644 ravicomp/src/fnv_hash.h create mode 100644 ravicomp/src/graph.c create mode 100644 ravicomp/src/graph.h create mode 100644 ravicomp/src/hash_table.c create mode 100644 ravicomp/src/hash_table.h create mode 100644 ravicomp/src/lexer.c create mode 100644 ravicomp/src/linearizer.c create mode 100644 ravicomp/src/linearizer.h create mode 100644 ravicomp/src/membuf.c create mode 100644 ravicomp/src/membuf.h create mode 100644 ravicomp/src/opt_unusedcode.c create mode 100644 ravicomp/src/optimizer.h create mode 100644 ravicomp/src/parser.c create mode 100644 ravicomp/src/parser.h create mode 100644 ravicomp/src/ptrlist.c create mode 100644 ravicomp/src/ptrlist.h create mode 100644 ravicomp/src/ravi_binding.c create mode 100644 ravicomp/src/set.c create mode 100644 ravicomp/src/set.h create mode 100644 ravicomp/src/typechecker.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ca9d10..23dcf44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ option(STATIC_BUILD "Build static version of Ravi, default is OFF" OFF) option(COMPUTED_GOTO "Controls whether the interpreter switch will use computed gotos on gcc/clang, default is ON" ON) option(LTESTS "Controls whether ltests are enabled in Debug mode; note requires Debug build" ON) option(ASAN "Controls whether address sanitizer should be enabled" OFF) -option(RAVICOMP "Controls whether to link in RaviComp" OFF) +option(RAVICOMP "Controls whether to link in RaviComp" ON) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") @@ -105,20 +105,22 @@ endif () if (RAVICOMP) # Need MIR_JIT for the compiler add-on - find_package(RaviComp REQUIRED) + #find_package(RaviComp REQUIRED) set(ADDON_SRCS ${RAVICOMP_SRCS}) - set_property(SOURCE ${RAVICOMP_SRCS} - APPEND - PROPERTY INCLUDE_DIRECTORIES ${RAVICOMP_INCLUDE_DIRS}) - if (MIR_JIT) - set_property(SOURCE ${RAVICOMP_SRCS} - APPEND - PROPERTY INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/mir;${CMAKE_SOURCE_DIR}/mir/c2mir") - endif () - if ($ENV{CLION_IDE}) - # CLion seems unable to handle include paths set on sources - include_directories(${RAVICOMP_INCLUDE_DIRS}) - endif () + #set_property(SOURCE ${RAVICOMP_SRCS} + # APPEND + # PROPERTY INCLUDE_DIRECTORIES ${RAVICOMP_INCLUDE_DIRS}) + #if (MIR_JIT) + # set_property(SOURCE ${RAVICOMP_SRCS} + # APPEND + # PROPERTY INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/mir;${CMAKE_SOURCE_DIR}/mir/c2mir") + #endif () + #if ($ENV{CLION_IDE}) + # # CLion seems unable to handle include paths set on sources + # include_directories(${RAVICOMP_INCLUDE_DIRS}) + #endif () + add_subdirectory(ravicomp) + set(RAVICOMP_LIBRARIES ravicomp) endif () # IDE stuff diff --git a/ravicomp/.clang-format b/ravicomp/.clang-format new file mode 100644 index 0000000..04bafc9 --- /dev/null +++ b/ravicomp/.clang-format @@ -0,0 +1,7 @@ +BasedOnStyle: LLVM +IndentWidth: 8 +UseTab: Always +BreakBeforeBraces: Linux +AllowShortIfStatementsOnASingleLine: false +IndentCaseLabels: false +ColumnLimit: 120 \ No newline at end of file diff --git a/ravicomp/CMakeLists.txt b/ravicomp/CMakeLists.txt new file mode 100644 index 0000000..8b0545a --- /dev/null +++ b/ravicomp/CMakeLists.txt @@ -0,0 +1,110 @@ +cmake_minimum_required(VERSION 3.12) +project(RaviCompiler VERSION 0.0.1 LANGUAGES C) + +option(ASAN "Controls whether address sanitizer should be enabled" OFF) + +set(PUBLIC_HEADERS + include/ravi_compiler.h + include/ravi_api.h) + +set(HEADERS + ${PUBLIC_HEADERS} + src/allocate.h + src/bitset.h + src/ptrlist.h + src/fnv_hash.h + src/graph.h + src/hash_table.h + src/set.h + src/membuf.h + src/cfg.h + src/dominator.h + src/linearizer.h + src/common.h + src/dataflow_framework.h + src/optimizer.h + src/parser.h + src/codegen.h) + +set(SRCS + src/allocate.c + src/ast_walker.c + src/ast_simplify.c + src/bitset.c + src/ptrlist.c + src/fnv_hash.c + src/graph.c + src/cfg.c + src/dominator.c + src/hash_table.c + src/set.c + src/lexer.c + src/parser.c + src/ast_printer.c + src/typechecker.c + src/linearizer.c + src/dataflow_framework.c + src/opt_unusedcode.c + src/membuf.c + src/df_liveness.c + src/codegen.c + src/ravi_binding.c + ) + +message("SOURCE dir is ${RaviCompiler_SOURCE_DIR}") + +if ($ENV{CLION_IDE}) + # CLion seems unable to handle include paths set on sources + include_directories("${RaviCompiler_SOURCE_DIR}/include") +endif () + +if (WIN32) + # disable warnings about C string functions + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) +endif() + +include(CheckCCompilerFlag) +if (NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wmissing-prototypes -Wstrict-prototypes -Werror=return-type") + if (ASAN) + set(CMAKE_REQUIRED_FLAGS "-fsanitize=address") + check_c_compiler_flag("-fsanitize=address" COMPILER_ASAN_SUPPORTED) + if (COMPILER_ASAN_SUPPORTED AND NOT CMAKE_C_FLAGS_DEBUG MATCHES "-fsanitize=address") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fsanitize=address") + endif () + endif () +endif() + +include(GNUInstallDirs) + +set(CMAKE_VISIBILITY_INLINES_HIDDEN YES) + +if (NOT WIN32) + set(EXTRA_LIBRARIES m) +endif () + +if (WIN32) + set(LIBTYPE STATIC) +else() + set(LIBTYPE SHARED) +endif() +add_library(ravicomp ${LIBTYPE} + ${HEADERS} + ${SRCS}) +target_include_directories(ravicomp + PUBLIC "${CMAKE_CURRENT_BINARY_DIR}" + PUBLIC "${RaviCompiler_SOURCE_DIR}/include" + PRIVATE "${RaviCompiler_SOURCE_DIR}/src") +target_link_libraries(ravicomp ${EXTRA_LIBRARIES}) +include(GenerateExportHeader) +generate_export_header(ravicomp) + +install(FILES ${PUBLIC_HEADERS} + DESTINATION include/ravicomp) +install(TARGETS ravicomp + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT RaviCompiler_Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT RaviCompiler_Development + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT RaviCompiler_Runtime) +install(FILES + ${PROJECT_BINARY_DIR}/ravicomp_export.h DESTINATION include/ravicomp + ) \ No newline at end of file diff --git a/ravicomp/LICENSE b/ravicomp/LICENSE new file mode 100644 index 0000000..89584aa --- /dev/null +++ b/ravicomp/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2019-2020 Dibyendu Majumdar +Portions Copyright (c) 1994–2019 Lua.org, PUC-Rio. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ravicomp/README.md b/ravicomp/README.md new file mode 100644 index 0000000..8e10c94 --- /dev/null +++ b/ravicomp/README.md @@ -0,0 +1,6 @@ +# ravi-compiler +A compiler for Ravi and Lua that processes Lua/Ravi source code and generates C code. + +This is a mirror of the project at [https://github.com/dibyendumajumdar/ravi-compiler](https://github.com/dibyendumajumdar/ravi-compiler). +Note that the command line tools are not included here. + diff --git a/ravicomp/include/ravi_api.h b/ravicomp/include/ravi_api.h new file mode 100644 index 0000000..f542c59 --- /dev/null +++ b/ravicomp/include/ravi_api.h @@ -0,0 +1,36 @@ +#ifndef ravicomp_RAVIAPI_H +#define ravicomp_RAVIAPI_H + +#include "ravicomp_export.h" +#include "ravi_compiler.h" + +#include + +struct Ravi_CompilerInterface { + /* ------------------------ Inputs ------------------------------ */ + void *context; /* Ravi supplied context */ + + const char *source; /* Source code to be compiled - managed by Ravi */ + size_t source_len; /* Size of source code */ + const char *source_name; /* Name of the source */ + const char *compiler_options; /* flags to be passed to compiler */ + + char main_func_name[31]; /* Name of the generated function that when called will set up the Lua closure */ + + /* ------------------------- Outputs ------------------------------ */ + const char* generated_code; /* Output of the compiler, must be freed by caller. */ + + /* ------------------------ Debugging and error handling ----------------------------------------- */ + void (*debug_message)(void *context, const char *filename, long long line, const char *message); + void (*error_message)(void *context, const char *message); +}; + +/** + * This is the API exposed by the Compiler itself. This function is invoked by + * Ravi when it is necessary to compile some Ravi code. + * @param compiler_interface The interface expected by the compiler must be setup + * @return 0 for success, non-zero for failure + */ +RAVICOMP_EXPORT int raviX_compile(struct Ravi_CompilerInterface *compiler_interface); + +#endif diff --git a/ravicomp/include/ravi_compiler.h b/ravicomp/include/ravi_compiler.h new file mode 100644 index 0000000..1485942 --- /dev/null +++ b/ravicomp/include/ravi_compiler.h @@ -0,0 +1,589 @@ +/* +A compiler for Ravi and Lua 5.3. This is work in progress. +Once ready it will be used to create a JIT compiler for Ravi. + +This header file defines the public api + +Copyright 2018-2020 Dibyendu Majumdar +*/ + +#ifndef ravicomp_COMPILER_H +#define ravicomp_COMPILER_H + +#include "ravicomp_export.h" + +#include +#include +#include + +typedef struct CompilerState CompilerState; +typedef struct LexerState LexerState; +typedef struct LinearizerState LinearizerState; +typedef struct VariableType VariableType; + +typedef long long lua_Integer; +typedef double lua_Number; + +/* Initialize the compiler state */ +/* During compilation all data structures are stored in the compiler state */ +RAVICOMP_EXPORT CompilerState *raviX_init_compiler(void); +/* Destroy the compiler state */ +RAVICOMP_EXPORT void raviX_destroy_compiler(CompilerState *compiler); + +/* ------------------------ LEXICAL ANALYZER API -------------------------------*/ +/* Note: following enum was generate using utils/tokenenum.h */ +enum TokenType { + TOK_OFS = 256, + + TOK_and, + TOK_break, + TOK_do, + TOK_else, + TOK_elseif, + TOK_end, + TOK_false, + TOK_for, + TOK_function, + TOK_goto, + TOK_if, + TOK_in, + TOK_local, + TOK_defer, + TOK_nil, + TOK_not, + TOK_or, + TOK_repeat, + TOK_return, + TOK_then, + TOK_true, + TOK_until, + TOK_while, + TOK_IDIV, + TOK_CONCAT, + TOK_DOTS, + TOK_EQ, + TOK_GE, + TOK_LE, + TOK_NE, + TOK_SHL, + TOK_SHR, + TOK_DBCOLON, + TOK_TO_INTEGER, + TOK_TO_NUMBER, + TOK_TO_INTARRAY, + TOK_TO_NUMARRAY, + TOK_TO_TABLE, + TOK_TO_STRING, + TOK_TO_CLOSURE, + TOK_EOS, + TOK_FLT, + TOK_INT, + TOK_NAME, + TOK_STRING, + + FIRST_RESERVED = TOK_OFS + 1, + LAST_RESERVED = TOK_while - TOK_OFS +}; + +/* + * Lua strings can have embedded 0 bytes therefore we + * need a string type that has a length associated with it. + * + * The compiler stores a single copy of each string so that strings + * can be compared by equality. + */ +typedef struct StringObject { + uint32_t len; /* length of the string */ + int32_t reserved; /* if is this a keyword then token id else -1 */ + uint32_t hash; /* hash value of the string */ + const char *str; /* string data */ +} StringObject; + +/* + * Lua literals + */ +typedef union { + lua_Number r; + lua_Integer i; + const StringObject *ts; +} SemInfo; + +typedef struct Token { + int token; /* Token value or character value; token values start from FIRST_RESERVED which is 257, values < 256 + are characters */ + SemInfo seminfo; /* Literal associated with the token, only valid when token is a literal or an identifier, i.e. + token is > TOK_EOS */ +} Token; + +/* + * Everything below should be treated as readonly; for efficiency these fields are exposed, however treat them + * as fields managed by the lexer. + */ +typedef struct { + int current; /* current character (char value as int) */ + int linenumber; /* current input line counter */ + int lastline; /* line number of the last token 'consumed' */ + Token t; /* current token, set after call to raviX_next() */ + Token lookahead; /* look ahead token, set after call to raviX_lookahead() */ +} LexerInfo; + +/* Following is a dynamic buffer implementation that is not strictly part of the + * compiler api but is relied upon by various compiler parts. We should perhaps avoid + * exposing it. + * + * The reason for exposing this is that we use it for getting the token string in one of the + * api calls. + */ +typedef struct { + char *buf; /* pointer to allocated memory, can be reallocated */ + size_t capacity; /* allocated size */ + size_t pos; /* current position in the buffer */ +} TextBuffer; + +/* all strings are interned and stored in a hash set, strings may have embedded + * 0 bytes therefore explicit length is necessary + */ +RAVICOMP_EXPORT const StringObject *raviX_create_string(CompilerState *compiler_state, const char *s, + uint32_t len); + +/* Initialize lexical analyser. Takes as input a buffer containing Lua/Ravi source and the source name */ +RAVICOMP_EXPORT LexerState *raviX_init_lexer(CompilerState *compiler_state, const char *buf, + size_t buflen, const char *source_name); +/* Gets the public part of the lexer data structure to allow access the current token. Note that the returned + * value should be treated as readonly data structure + */ +RAVICOMP_EXPORT const LexerInfo *raviX_get_lexer_info(LexerState *ls); +/* Retrieves the next token and saves it is LexState structure. If a lookahead was set then that is retrieved + * (and reset to EOS) else the next token is retrieved + */ +RAVICOMP_EXPORT void raviX_next(LexerState *ls); +/* Retrieves the next token and sets it as the lookahead. This means that a next call will get the lookahead. + * Returns the token id. + */ +RAVICOMP_EXPORT int raviX_lookahead(LexerState *ls); +/* Convert a token to text format. The token will be written to current position in mb. */ +RAVICOMP_EXPORT void raviX_token2str(int token, TextBuffer *mb); +/* Release all data structures used by the lexer */ +RAVICOMP_EXPORT void raviX_destroy_lexer(LexerState *); + +/* ---------------- PARSER API -------------------------- */ + +/* + * Parse a Lua chunk (i.e. script). + * The Lua chunk will be wrapped in an anonymous Lua function (the 'main' function), so all the code + * in the chunk will be part of that function. Any functions defined in the chunk will become child functions + * of the 'main' function. + * + * Each Lua chunk / script therefore has an anonymous 'main' function. The name 'main' is just to refer + * to this function as it has no name in reality. + * + * Note that at present a new compiler state should be created when processing a Lua chunk. + * + * Returns 0 on success, non-zero on failure. + */ +RAVICOMP_EXPORT int raviX_parse(CompilerState *compiler_state, const char *buffer, size_t buflen, + const char *source_name); +/* Prints out the AST to the file */ +RAVICOMP_EXPORT void raviX_output_ast(CompilerState *compiler_state, FILE *fp); +/* Performs type checks on the AST and annotates types of expressions nad variables where possible. + * As a result the AST will be modified. + * + * Returns 0 on success, non-zero on failure. + */ +RAVICOMP_EXPORT int +raviX_ast_typecheck(CompilerState *compiler_state); /* Perform type checks and assign types to AST */ + +/* ---------------------------- LINEARIZER API --------------------------------------- */ +/* linear IR generator. + * The goal of this component is to convert the AST to a linear IR. + * This is work in progress, therefore the IR is not yet publicly exposed. + */ +RAVICOMP_EXPORT LinearizerState *raviX_init_linearizer(CompilerState *compiler_state); +/* Attempts to create linear IR for given AST. + * Returns 0 on success. + */ +RAVICOMP_EXPORT int raviX_ast_linearize(LinearizerState *linearizer); +/* Prints out the content of the linear IR */ +RAVICOMP_EXPORT void raviX_output_linearizer(LinearizerState *linearizer, FILE *fp); +/* Cleanup the linearizer */ +RAVICOMP_EXPORT void raviX_destroy_linearizer(LinearizerState *linearizer); + +/* utilies */ +RAVICOMP_EXPORT const char *raviX_get_last_error(CompilerState *compiler_state); + +/* ----------------------- AST WALKING API ------------------------ */ + +/* Binary operators */ +typedef enum BinaryOperatorType { + BINOPR_ADD, + BINOPR_SUB, + BINOPR_MUL, + BINOPR_MOD, + BINOPR_POW, + BINOPR_DIV, + BINOPR_IDIV, + BINOPR_BAND, + BINOPR_BOR, + BINOPR_BXOR, + BINOPR_SHL, + BINOPR_SHR, + BINOPR_CONCAT, + BINOPR_EQ, + BINOPR_LT, + BINOPR_LE, + BINOPR_NE, + BINOPR_GT, + BINOPR_GE, + BINOPR_AND, + BINOPR_OR, + BINOPR_NOBINOPR +} BinaryOperatorType; + +RAVICOMP_EXPORT const char *raviX_get_binary_opr_str(BinaryOperatorType op); + +/* Unary operators */ +typedef enum UnaryOperatorType { + UNOPR_MINUS = BINOPR_NOBINOPR + 1, + UNOPR_BNOT, + UNOPR_NOT, + UNOPR_LEN, + UNOPR_TO_INTEGER, + UNOPR_TO_NUMBER, + UNOPR_TO_INTARRAY, + UNOPR_TO_NUMARRAY, + UNOPR_TO_TABLE, + UNOPR_TO_STRING, + UNOPR_TO_CLOSURE, + UNOPR_TO_TYPE, + UNOPR_NOUNOPR +} UnaryOperatorType; + +RAVICOMP_EXPORT const char *raviX_get_unary_opr_str(UnaryOperatorType op); + +/* Types of AST nodes */ +enum AstNodeType { + AST_NONE, /* Will never be set on a properly initialized node */ + STMT_RETURN, + STMT_GOTO, + STMT_LABEL, + STMT_DO, + STMT_LOCAL, + STMT_FUNCTION, + STMT_IF, + STMT_TEST_THEN, + STMT_WHILE, + STMT_FOR_IN, + STMT_FOR_NUM, + STMT_REPEAT, + STMT_EXPR, /* Also used for assignment statements */ + EXPR_LITERAL, + EXPR_SYMBOL, + EXPR_Y_INDEX, /* [] operator */ + EXPR_FIELD_SELECTOR, /* table field access - '.' or ':' operator */ + EXPR_TABLE_ELEMENT_ASSIGN, /* table element assignment in table constructor */ + EXPR_SUFFIXED, + EXPR_UNARY, + EXPR_BINARY, + EXPR_FUNCTION, /* function literal */ + EXPR_TABLE_LITERAL, /* table constructor */ + EXPR_FUNCTION_CALL +}; + +typedef struct Statement Statement; +typedef struct ReturnStatement ReturnStatement; +typedef struct LabelStatement LabelStatement; +typedef struct GotoStatement GotoStatement; +typedef struct LocalStatement LocalStatement; +typedef struct ExpressionStatement ExpressionStatement; +typedef struct FunctionStatement FunctionStatement; +typedef struct DoStatement DoStatement; +typedef struct TestThenStatement TestThenStatement; +typedef struct IfStatement IfStatement; +typedef struct WhileOrRepeatStatement WhileOrRepeatStatement; +typedef struct ForStatement ForStatement; + +typedef struct Expression Expression; +typedef struct LiteralExpression LiteralExpression; +typedef struct SymbolExpression SymbolExpression; +typedef struct IndexExpression IndexExpression; +typedef struct UnaryExpression UnaryExpression; +typedef struct BinaryExpression BinaryExpression; +typedef struct FunctionExpression FunctionExpression; +typedef struct TableElementAssignmentExpression TableElementAssignmentExpression; +typedef struct TableLiteralExpression TableLiteralExpression; +typedef struct SuffixedExpression SuffixedExpression; +typedef struct FunctionCallExpression FunctionCallExpression; + +typedef struct Scope Scope; + +/* Types of symbols */ +enum SymbolType { + SYM_LOCAL, /* lua_variable_symbol */ + SYM_UPVALUE, /* lua_upvalue_symbol */ + SYM_GLOBAL, /* lua_variable_symbol, Global symbols are never added to a scope so they are always looked up */ + SYM_LABEL, /* lua_label_symbol */ + SYM_ENV /* Special symbol type for _ENV */ +}; +typedef struct LuaSymbol LuaSymbol; +typedef struct LuaUpvalueSymbol LuaUpvalueSymbol; +typedef struct LuaVariableSymbol LuaVariableSymbol; +typedef struct LuaLabelSymbol LuaLabelSymbol; + +/* As described before each parsed Lua script or chunk is wrapped in an anonymous 'main' + * function hence the AST root is this function. + */ +RAVICOMP_EXPORT const FunctionExpression * +raviX_ast_get_main_function(const CompilerState *compiler_state); + +/* return statement walking */ +RAVICOMP_EXPORT void raviX_return_statement_foreach_expression(const ReturnStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)); + +/* label statement walking */ +RAVICOMP_EXPORT const StringObject *raviX_label_statement_label_name(const LabelStatement *statement); +RAVICOMP_EXPORT const Scope *raviX_label_statement_label_scope(const LabelStatement *statement); + +/* goto statement walking */ +RAVICOMP_EXPORT const StringObject *raviX_goto_statement_label_name(const GotoStatement *statement); +RAVICOMP_EXPORT const Scope *raviX_goto_statement_scope(const GotoStatement *statement); +RAVICOMP_EXPORT bool raviX_goto_statement_is_break(const GotoStatement *statement); + +/* local statement walking */ +RAVICOMP_EXPORT void raviX_local_statement_foreach_expression(const LocalStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)); +RAVICOMP_EXPORT void raviX_local_statement_foreach_symbol(const LocalStatement *statement, void *userdata, + void (*callback)(void *, + const LuaVariableSymbol *expr)); + +/* expression or assignment statement walking */ +RAVICOMP_EXPORT void +raviX_expression_statement_foreach_lhs_expression(const ExpressionStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)); +RAVICOMP_EXPORT void +raviX_expression_statement_foreach_rhs_expression(const ExpressionStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)); + +/* function statement walking */ +RAVICOMP_EXPORT const SymbolExpression * +raviX_function_statement_name(const FunctionStatement *statement); +RAVICOMP_EXPORT bool raviX_function_statement_is_method(const FunctionStatement *statement); +RAVICOMP_EXPORT const IndexExpression * +raviX_function_statement_method_name(const FunctionStatement *statement); +RAVICOMP_EXPORT bool raviX_function_statement_has_selectors(const FunctionStatement *statement); +RAVICOMP_EXPORT void +raviX_function_statement_foreach_selector(const FunctionStatement *statement, void *userdata, + void (*callback)(void *, const IndexExpression *expr)); +RAVICOMP_EXPORT const FunctionExpression *raviX_function_ast(const FunctionStatement *statement); + +/* do statement walking */ +RAVICOMP_EXPORT const Scope *raviX_do_statement_scope(const DoStatement *statement); +RAVICOMP_EXPORT void raviX_do_statement_foreach_statement(const DoStatement *statement, void *userdata, + void (*callback)(void *userdata, + const Statement *statement)); +/* if statement walking */ +/* Lua if statements are a mix of select/case and if/else statements in + * other languages. The AST represents the initial if condition block and all subsequent + * elseif blocks as test_then_statments. The final else block is treated as an optional + * else block. + */ +RAVICOMP_EXPORT void +raviX_if_statement_foreach_test_then_statement(const IfStatement *statement, void *userdata, + void (*callback)(void *, const TestThenStatement *stmt)); +RAVICOMP_EXPORT const Scope *raviX_if_then_statement_else_scope(const IfStatement *statement); +RAVICOMP_EXPORT void raviX_if_statement_foreach_else_statement(const IfStatement *statement, void *userdata, + void (*callback)(void *userdata, + const Statement *statement)); +RAVICOMP_EXPORT const Scope *raviX_test_then_statement_scope(const TestThenStatement *statement); +RAVICOMP_EXPORT void +raviX_test_then_statement_foreach_statement(const TestThenStatement *statement, void *userdata, + void (*callback)(void *userdata, const Statement *statement)); +RAVICOMP_EXPORT const Expression * +raviX_test_then_statement_condition(const TestThenStatement *statement); + +/* while or repeat statement walking */ +RAVICOMP_EXPORT const Expression * +raviX_while_or_repeat_statement_condition(const WhileOrRepeatStatement *statement); +RAVICOMP_EXPORT const Scope * +raviX_while_or_repeat_statement_scope(const WhileOrRepeatStatement *statement); +RAVICOMP_EXPORT void +raviX_while_or_repeat_statement_foreach_statement(const WhileOrRepeatStatement *statement, void *userdata, + void (*callback)(void *userdata, const Statement *statement)); + +/* for statement walking */ +RAVICOMP_EXPORT const Scope *raviX_for_statement_scope(const ForStatement *statement); +RAVICOMP_EXPORT void raviX_for_statement_foreach_symbol(const ForStatement *statement, void *userdata, + void (*callback)(void *, + const LuaVariableSymbol *expr)); +RAVICOMP_EXPORT void raviX_for_statement_foreach_expression(const ForStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)); +RAVICOMP_EXPORT const Scope *raviX_for_statement_body_scope(const ForStatement *statement); +RAVICOMP_EXPORT void raviX_for_statement_body_foreach_statement(const ForStatement *statement, void *userdata, + void (*callback)(void *userdata, + const Statement *statement)); + +/* literal expression */ +/* Note: '...' value has type RAVI_TVARARGS and no associated SemInfo. */ +RAVICOMP_EXPORT const VariableType *raviX_literal_expression_type(const LiteralExpression *expression); +RAVICOMP_EXPORT const SemInfo *raviX_literal_expression_literal(const LiteralExpression *expression); + +/* symbol expression */ +RAVICOMP_EXPORT const VariableType *raviX_symbol_expression_type(const SymbolExpression *expression); +RAVICOMP_EXPORT const LuaSymbol *raviX_symbol_expression_symbol(const SymbolExpression *expression); + +/* index expression */ +RAVICOMP_EXPORT const VariableType *raviX_index_expression_type(const IndexExpression *expression); +RAVICOMP_EXPORT const Expression *raviX_index_expression_expression(const IndexExpression *expression); + +/* unary expression */ +RAVICOMP_EXPORT const VariableType *raviX_unary_expression_type(const UnaryExpression *expression); +RAVICOMP_EXPORT const Expression *raviX_unary_expression_expression(const UnaryExpression *expression); +RAVICOMP_EXPORT UnaryOperatorType raviX_unary_expression_operator(const UnaryExpression *expression); + +/* binary expression */ +RAVICOMP_EXPORT const VariableType *raviX_binary_expression_type(const BinaryExpression *expression); +RAVICOMP_EXPORT const Expression * +raviX_binary_expression_left_expression(const BinaryExpression *expression); +RAVICOMP_EXPORT const Expression * +raviX_binary_expression_right_expression(const BinaryExpression *expression); +RAVICOMP_EXPORT BinaryOperatorType raviX_binary_expression_operator(const BinaryExpression *expression); + +/* function expression */ +RAVICOMP_EXPORT const VariableType *raviX_function_type(const FunctionExpression *function_expression); +RAVICOMP_EXPORT bool raviX_function_is_vararg(const FunctionExpression *function_expression); +RAVICOMP_EXPORT bool raviX_function_is_method(const FunctionExpression *function_expression); +RAVICOMP_EXPORT const FunctionExpression * +raviX_function_parent(const FunctionExpression *function_expression); +RAVICOMP_EXPORT void +raviX_function_foreach_child(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const FunctionExpression *function_expression)); +RAVICOMP_EXPORT const Scope *raviX_function_scope(const FunctionExpression *function_expression); +RAVICOMP_EXPORT void +raviX_function_foreach_statement(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const Statement *statement)); +RAVICOMP_EXPORT void +raviX_function_foreach_argument(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const LuaVariableSymbol *symbol)); +RAVICOMP_EXPORT void raviX_function_foreach_local(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, + const LuaVariableSymbol *lua_local_symbol)); +RAVICOMP_EXPORT void +raviX_function_foreach_upvalue(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const LuaUpvalueSymbol *symbol)); + +/* table element assignment expression */ +RAVICOMP_EXPORT const VariableType * +raviX_table_element_assignment_expression_type(const TableElementAssignmentExpression *expression); +RAVICOMP_EXPORT const Expression * +raviX_table_element_assignment_expression_key(const TableElementAssignmentExpression *expression); +RAVICOMP_EXPORT const Expression * +raviX_table_element_assignment_expression_value(const TableElementAssignmentExpression *expression); + +/* table_literal_expression */ +RAVICOMP_EXPORT const VariableType * +raviX_table_literal_expression_type(const TableLiteralExpression *expression); +RAVICOMP_EXPORT void raviX_table_literal_expression_foreach_element( + const TableLiteralExpression *expression, void *userdata, + void (*callback)(void *, const TableElementAssignmentExpression *expr)); + +/* suffixed_expression */ +RAVICOMP_EXPORT const VariableType *raviX_suffixed_expression_type(const SuffixedExpression *expression); +RAVICOMP_EXPORT const Expression * +raviX_suffixed_expression_primary(const SuffixedExpression *expression); +RAVICOMP_EXPORT void raviX_suffixed_expression_foreach_suffix(const SuffixedExpression *expression, + void *userdata, + void (*callback)(void *, const Expression *expr)); + +/* function call expression */ +RAVICOMP_EXPORT const VariableType * +raviX_function_call_expression_type(const FunctionCallExpression *expression); +// can return NULL +RAVICOMP_EXPORT const StringObject * +raviX_function_call_expression_method_name(const FunctionCallExpression *expression); +RAVICOMP_EXPORT void +raviX_function_call_expression_foreach_argument(const FunctionCallExpression *expression, void *userdata, + void (*callback)(void *, const Expression *expr)); + +/* Convert a statement to the correct type */ +RAVICOMP_EXPORT enum AstNodeType raviX_statement_type(const Statement *statement); +RAVICOMP_EXPORT const ReturnStatement *raviX_return_statement(const Statement *stmt); +RAVICOMP_EXPORT const LabelStatement *raviX_label_statement(const Statement *stmt); +RAVICOMP_EXPORT const GotoStatement *raviX_goto_statement(const Statement *stmt); +RAVICOMP_EXPORT const LocalStatement *raviX_local_statement(const Statement *stmt); +RAVICOMP_EXPORT const ExpressionStatement *raviX_expression_statement(const Statement *stmt); +RAVICOMP_EXPORT const FunctionStatement *raviX_function_statement(const Statement *stmt); +RAVICOMP_EXPORT const DoStatement *raviX_do_statement(const Statement *stmt); +RAVICOMP_EXPORT const TestThenStatement *raviX_test_then_statement(const Statement *stmt); +RAVICOMP_EXPORT const IfStatement *raviX_if_statement(const Statement *stmt); +RAVICOMP_EXPORT const WhileOrRepeatStatement *raviX_while_or_repeat_statement(const Statement *stmt); +RAVICOMP_EXPORT const ForStatement *raviX_for_statement(const Statement *stmt); + +/* Convert an expression to the correct type */ +RAVICOMP_EXPORT enum AstNodeType raviX_expression_type(const Expression *expression); +RAVICOMP_EXPORT const LiteralExpression *raviX_literal_expression(const Expression *expr); +RAVICOMP_EXPORT const SymbolExpression *raviX_symbol_expression(const Expression *expr); +RAVICOMP_EXPORT const IndexExpression *raviX_index_expression(const Expression *expr); +RAVICOMP_EXPORT const UnaryExpression *raviX_unary_expression(const Expression *expr); +RAVICOMP_EXPORT const BinaryExpression *raviX_binary_expression(const Expression *expr); +RAVICOMP_EXPORT const FunctionExpression *raviX_function_expression(const Expression *expr); +RAVICOMP_EXPORT const TableElementAssignmentExpression * +raviX_table_element_assignment_expression(const Expression *expr); +RAVICOMP_EXPORT const TableLiteralExpression *raviX_table_literal_expression(const Expression *expr); +RAVICOMP_EXPORT const SuffixedExpression *raviX_suffixed_expression(const Expression *expr); +RAVICOMP_EXPORT const FunctionCallExpression *raviX_function_call_expression(const Expression *expr); + +RAVICOMP_EXPORT const FunctionExpression *raviX_scope_owning_function(const Scope *scope); +RAVICOMP_EXPORT const Scope *raviX_scope_parent_scope(const Scope *scope); +RAVICOMP_EXPORT void raviX_scope_foreach_symbol(const Scope *scope, void *userdata, + void (*callback)(void *userdata, const LuaSymbol *symbol)); + +RAVICOMP_EXPORT enum SymbolType raviX_symbol_type(const LuaSymbol *symbol); +/* symbol downcast */ +RAVICOMP_EXPORT const LuaVariableSymbol *raviX_symbol_variable(const LuaSymbol *symbol); +RAVICOMP_EXPORT const LuaUpvalueSymbol *raviX_symbol_upvalue(const LuaSymbol *symbol); +RAVICOMP_EXPORT const LuaLabelSymbol *raviX_symbol_label(const LuaSymbol *symbol); + +/* variable symbol - local and global variables */ +RAVICOMP_EXPORT const StringObject * +raviX_variable_symbol_name(const LuaVariableSymbol *lua_local_symbol); +RAVICOMP_EXPORT const VariableType *raviX_variable_symbol_type(const LuaVariableSymbol *lua_local_symbol); +// NULL if global +RAVICOMP_EXPORT const Scope * +raviX_variable_symbol_scope(const LuaVariableSymbol *lua_local_symbol); + +/* label symbol */ +RAVICOMP_EXPORT const StringObject *raviX_label_name(const LuaLabelSymbol *symbol); +RAVICOMP_EXPORT const Scope *raviX_label_scope(const LuaLabelSymbol *symbol); + +/* upvalue symbol */ +RAVICOMP_EXPORT const VariableType *raviX_upvalue_symbol_type(const LuaUpvalueSymbol *symbol); +RAVICOMP_EXPORT const LuaVariableSymbol * +raviX_upvalue_target_variable(const LuaUpvalueSymbol *symbol); +RAVICOMP_EXPORT const FunctionExpression * +raviX_upvalue_target_function(const LuaUpvalueSymbol *symbol); +RAVICOMP_EXPORT unsigned raviX_upvalue_index(const LuaUpvalueSymbol *symbol); + +/* Utilities */ +#ifdef __GNUC__ +#define FORMAT_ATTR(pos) __attribute__((__format__(__printf__, pos, pos + 1))) +#else +#define FORMAT_ATTR(pos) +#endif + +RAVICOMP_EXPORT void raviX_buffer_init(TextBuffer *mb, size_t initial_size); +RAVICOMP_EXPORT void raviX_buffer_resize(TextBuffer *mb, size_t new_size); +RAVICOMP_EXPORT void raviX_buffer_reserve(TextBuffer *mb, size_t n); +RAVICOMP_EXPORT void raviX_buffer_free(TextBuffer *mb); +static inline char *raviX_buffer_data(const TextBuffer *mb) { return mb->buf; } +static inline size_t raviX_buffer_size(const TextBuffer *mb) { return mb->capacity; } +static inline size_t raviX_buffer_len(const TextBuffer *mb) { return mb->pos; } +static inline void raviX_buffer_reset(TextBuffer *mb) { mb->pos = 0; } + +/* following convert input to string before adding */ +RAVICOMP_EXPORT void raviX_buffer_add_string(TextBuffer *mb, const char *str); +RAVICOMP_EXPORT void raviX_buffer_add_bytes(TextBuffer *mb, const char *str, size_t len); +RAVICOMP_EXPORT void raviX_buffer_add_fstring(TextBuffer *mb, const char *str, ...) FORMAT_ATTR(2); + +/* strncpy() replacement with guaranteed 0 termination */ +RAVICOMP_EXPORT void raviX_string_copy(char *buf, const char *src, size_t buflen); + +#endif diff --git a/ravicomp/src/README.md b/ravicomp/src/README.md new file mode 100644 index 0000000..1bc37c2 --- /dev/null +++ b/ravicomp/src/README.md @@ -0,0 +1,25 @@ +# Sources + +* `lexer.c` - derived from Lua 5.3 lexer but modified to work as a standalone lexer +* `parser.c` - responsible for generating abstract syntax tree (AST) - consumes lexer output. +* `ast_printer.c` - responsible for printing out the AST +* `ast_walker.c` - API for walking the AST +* `ast_simplify.c` - responsible for simplifications done on AST such as constant folding +* `typechecker.c` - responsible for performing typechecking and assigning types to various things. Runs on the AST. +* `linearizer.c` (WIP) - responsible for generating linear intermediate code (linear IR). +* `cfg.c` - responsible for constructing a control flow graph from the output of the linearizer. +* `dominator.c` - implementation of dominator tree calculation - this is not used yet +* `dataflow_framework.c` - a framework for calculating dataflow equations - not used yet +* `opt_unusedcode.c` - a simple optimization pass that deletes unreachable basic blocks +* `codegen.c` - responsible for generating C code from the linear IR + +## Utilities + +* `allocate.c` - memory allocator +* `fnv_hash.c` - string hashing function +* `hash_table.c` - hash table +* `set.c` - set data structure +* `ptrlist.c` - a hybrid array/linked list data structure +* `membuf.c` - dynamic memory buffer that supports formatted input - used to build strings incrementally +* `graph.c` - simple graph data structure used to generate control flow graph. +* `bitset.c` - bitset data structure diff --git a/ravicomp/src/allocate.c b/ravicomp/src/allocate.c new file mode 100644 index 0000000..c96b461 --- /dev/null +++ b/ravicomp/src/allocate.c @@ -0,0 +1,275 @@ +/* + * allocate.c - simple space-efficient blob allocator. + * + * Copyright (C) 2003 Transmeta Corp. + * 2003-2004 Linus Torvalds + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Simple allocator for data that doesn't get partially free'd. + * The tokenizer and parser allocate a _lot_ of small data structures + * (often just two-three bytes for things like small integers), + * and since they all depend on each other you can't free them + * individually _anyway_. So do something that is very space- + * efficient: allocate larger "blobs", and give out individual + * small bits and pieces of it with no maintenance overhead. + */ +/* + * This version is part of the Ravi Compiler project. + * Copyright (C) 2017-2020 Dibyendu Majumdar + */ + +#include + +#include +#include +#include +#include + +static void *blob_alloc(size_t size) +{ + void *ptr; + ptr = malloc(size); + if (ptr != NULL) + memset(ptr, 0, size); + return ptr; +} + +static void blob_free(void *addr, size_t size) +{ + (void)size; + free(addr); +} + +void raviX_allocator_init(Allocator *A, const char *name, size_t size, unsigned int alignment, + unsigned int chunking) +{ + A->name_ = name; + A->blobs_ = NULL; + A->size_ = size; + A->alignment_ = alignment; + A->chunking_ = chunking; + A->freelist_ = NULL; + A->allocations = 0; + A->total_bytes = 0; + A->useful_bytes = 0; +} + +void *raviX_allocator_allocate(Allocator *A, size_t extra) +{ + size_t size = extra + A->size_; + size_t alignment = A->alignment_; + AllocationBlob *blob = A->blobs_; + void *retval; + + if (size > A->chunking_) { + fprintf(stderr, "allocation failure: requested size %lld is larger than maximum chunk size %lld\n", + (long long)size, (long long) A->chunking_); + exit(1); + } + /* + * NOTE! The freelist only works with things that are + * (a) sufficiently aligned + * (b) use a constant size + * Don't try to free allocators that don't follow + * these rules. + */ + if (A->freelist_) { + void **p = (void **)A->freelist_; + retval = p; + A->freelist_ = *p; + memset(retval, 0, size); + return retval; + } + + A->allocations++; + A->useful_bytes += size; + size = (size + alignment - 1) & ~(alignment - 1); + if (!blob || blob->left < size) { + size_t offset, chunking = A->chunking_; + AllocationBlob *newblob = (AllocationBlob *)blob_alloc(chunking); + if (!newblob) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + A->total_bytes += chunking; + newblob->next = blob; + blob = newblob; + A->blobs_ = newblob; + offset = offsetof(AllocationBlob, data); + offset = (offset + alignment - 1) & ~(alignment - 1); + blob->left = chunking - offset; + blob->offset = offset - offsetof(AllocationBlob, data); + } + retval = blob->data + blob->offset; + blob->offset += size; + blob->left -= size; + return retval; +} + +void raviX_allocator_free(Allocator *A, void *entry) +{ + void **p = (void **)entry; + *p = A->freelist_; + A->freelist_ = p; +} +void raviX_allocator_show_allocations(Allocator *A) +{ + fprintf(stderr, + "%s: %d allocations, %d bytes (%d total bytes, " + "%6.2f%% usage, %6.2f average size)\n", + A->name_, (int)A->allocations, (int)A->useful_bytes, (int)A->total_bytes, + 100 * (double)A->useful_bytes / A->total_bytes, (double)A->useful_bytes / A->allocations); +} +void raviX_allocator_drop_all_allocations(Allocator *A) +{ + AllocationBlob *blob = A->blobs_; + A->blobs_ = NULL; + A->allocations = 0; + A->total_bytes = 0; + A->useful_bytes = 0; + A->freelist_ = NULL; + while (blob) { + AllocationBlob *next = blob->next; + blob_free(blob, A->chunking_); + blob = next; + } +} +void raviX_allocator_destroy(Allocator *A) +{ + raviX_allocator_drop_all_allocations(A); + A->blobs_ = NULL; + A->allocations = 0; + A->total_bytes = 0; + A->useful_bytes = 0; + A->freelist_ = NULL; +} +void raviX_allocator_transfer(Allocator *A, Allocator *transfer_to) +{ + assert(transfer_to->blobs_ == NULL); + assert(transfer_to->freelist_ == NULL); + transfer_to->blobs_ = A->blobs_; + transfer_to->allocations = A->allocations; + transfer_to->total_bytes = A->total_bytes; + transfer_to->useful_bytes = A->useful_bytes; + transfer_to->freelist_ = A->freelist_; + transfer_to->alignment_ = A->alignment_; + transfer_to->chunking_ = A->chunking_; + transfer_to->size_ = A->size_; + A->blobs_ = NULL; + A->allocations = 0; + A->total_bytes = 0; + A->useful_bytes = 0; + A->freelist_ = NULL; +} + +/* +Reallocate array from old_n to new_n. If new_n is 0 then array memeory is freed. +If new_n is greater than old_n then old data is copied across and the +additional allocated space is zeroed out so caller can rely on the extra space being +initialized to zeros. +*/ +void *raviX_realloc_array(void *oldp, size_t element_size, size_t old_n, size_t new_n) +{ + if (new_n == 0) { + free(oldp); + return NULL; + } + assert (new_n > old_n); + size_t newsize = element_size * new_n; + void *newp = realloc(oldp, newsize); + if (!newp) { + fprintf(stderr, "out of memory\n"); + abort(); + } + size_t oldsize = old_n * element_size; + char *p = newp; + memset(p + oldsize, 0, newsize - oldsize); + return newp; +} + +/* +Delete n elements starting at i from array a of size array_size, where sizeof(each element) is element_size. +The freed up space will be zero initialized. +*/ +size_t raviX_del_array_element(void *a, size_t element_size, size_t array_size, size_t i, size_t n) +{ + assert(i + n <= array_size); + char *p = (char *)a; + char *dest = p + i * element_size; + char *src = p + (i + n) * element_size; + size_t count = element_size * (array_size - n - i); + memmove(dest, src, count); + size_t new_array_size = array_size - n; + size_t newsize = element_size * new_array_size; + size_t oldsize = element_size * array_size; + memset(p + newsize, 0, oldsize - newsize); + return new_array_size; +} + +#if 0 + +struct foo { + int a, b; +}; + +int raviX_test_allocator() { + Allocator alloc; + raviX_allocator_init(&alloc, "foo", sizeof(struct foo), __alignof__(struct foo), + sizeof(AllocationBlob) + sizeof(struct foo) * 2); + struct foo *t1 = (struct foo *)raviX_allocator_allocate(&alloc, 0); + if (t1 == NULL) + return 1; + if (alloc.alignment_ != __alignof__(struct foo)) + return 1; + if (alloc.allocations != 1) + return 1; + if (alloc.freelist_ != NULL) + return 1; + struct foo *t2 = (struct foo *)raviX_allocator_allocate(&alloc, 0); + if (t2 != t1 + 1) + return 1; + //dmrC_allocator_show_allocations(&alloc); + raviX_allocator_free(&alloc, t1); + raviX_allocator_free(&alloc, t2); + struct foo *t3 = (struct foo *)raviX_allocator_allocate(&alloc, 0); + if (t3 != t2) + return 1; + struct foo *t4 = (struct foo *)raviX_allocator_allocate(&alloc, 0); + if (t4 != t1) + return 1; + struct foo *t5 = (struct foo *)raviX_allocator_allocate(&alloc, 0); + (void)t5; + if (alloc.total_bytes != + (sizeof(AllocationBlob) + sizeof(struct foo) * 2) * 2) + return 1; + Allocator alloc2; + memset(&alloc2, 0, sizeof alloc2); + AllocationBlob *saved = alloc.blobs_; + raviX_allocator_transfer(&alloc, &alloc2); + if (alloc.blobs_ != NULL) + return 1; + if (alloc2.blobs_ != saved) + return 1; + raviX_allocator_destroy(&alloc2); + printf("allocator tests okay\n"); + return 0; +} + +#endif diff --git a/ravicomp/src/allocate.h b/ravicomp/src/allocate.h new file mode 100644 index 0000000..3d9da88 --- /dev/null +++ b/ravicomp/src/allocate.h @@ -0,0 +1,130 @@ +#ifndef ravicomp_ALLOCATOR_H +#define ravicomp_ALLOCATOR_H + +/* + * allocate.c - simple space-efficient blob allocator. + * + * Copyright (C) 2003 Transmeta Corp. + * 2003-2004 Linus Torvalds + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Simple allocator for data that doesn't get partially free'd. + * The tokenizer and parser allocate a _lot_ of small data structures + * (often just two-three bytes for things like small integers), + * and since they all depend on each other you can't free them + * individually _anyway_. So do something that is very space- + * efficient: allocate larger "blobs", and give out individual + * small bits and pieces of it with no maintenance overhead. + */ +/* + * Portions Copyright (C) 2017-2020 Dibyendu Majumdar + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct AllocationBlob AllocationBlob; +struct AllocationBlob { + AllocationBlob *next; + size_t left, offset; + unsigned char data[]; +}; + +/* + * Our "blob" allocator works on chunks that are multiples + * of this size (the underlying allocator may be a mmap that + * cannot handle smaller chunks, for example, so trying to + * allocate blobs that aren't aligned is not going to work). + */ +#define CHUNK 32768 + +typedef struct Allocator { + const char *name_; + AllocationBlob *blobs_; + size_t size_; + unsigned int alignment_; + unsigned int chunking_; + void *freelist_; + size_t allocations, total_bytes, useful_bytes; +} Allocator; + +extern void raviX_allocator_init(Allocator *A, const char *name, size_t size, unsigned int alignment, + unsigned int chunking); + +extern void *raviX_allocator_allocate(Allocator *A, size_t extra); + +extern void raviX_allocator_free(Allocator *A, void *entry); + +extern void raviX_allocator_show_allocations(Allocator *A); + +extern void raviX_allocator_drop_all_allocations(Allocator *A); + +extern void raviX_allocator_destroy(Allocator *A); + +extern void raviX_allocator_transfer(Allocator *A, Allocator *transfer_to); + +/* +Reallocate array from old_n to new_n. If new_n is 0 then array memory is freed. +If new_n is greater than old_n then old data is copied across and the +additional allocated space is zeroed out so caller can rely on the extra space being +initialized to zeros. +*/ +extern void *raviX_realloc_array(void *oldp, size_t element_size, size_t old_n, size_t new_n); +/* +Delete n elements starting at i from array a of size array_size, where sizeof(each element) is element_size. +The freed up space will be zero initialized. Returns the new array_size. +*/ +extern size_t raviX_del_array_element(void *p, size_t element_size, size_t array_size, size_t i, size_t n); + +/* structure of a node */ +#define DECLARE_ARRAY(array_type, TYPE) \ + typedef struct array_type { \ + unsigned allocated; \ + unsigned count; \ + TYPE *data; \ + } array_type +#define array_push(A, value) \ + { \ + if ((A)->count == (A)->allocated) { \ + unsigned newsize = (A)->allocated += 10; \ + (A)->data = raviX_realloc_array((A)->data, sizeof((A)->data[0]), (A)->allocated, newsize); \ + (A)->allocated = newsize; \ + } \ + (A)->data[(A)->count++] = value; \ + } +#define array_clearmem(A) \ + { \ + raviX_realloc_array((A)->data, sizeof((A)->data[0]), (A)->allocated, 0); \ + (A)->data = NULL; \ + (A)->allocated = 0; \ + (A)->count = 0; \ + } + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/ravicomp/src/ast_printer.c b/ravicomp/src/ast_printer.c new file mode 100644 index 0000000..1b9c8e2 --- /dev/null +++ b/ravicomp/src/ast_printer.c @@ -0,0 +1,543 @@ +/* +Copyright (C) 2018-2020 Dibyendu Majumdar +*/ + +#include + +const char *raviX_get_type_name(ravitype_t tt) +{ + switch (tt) { + case RAVI_TANY: + return "any"; + case RAVI_TNIL: + return "nil"; + case RAVI_TBOOLEAN: + return "boolean"; + case RAVI_TNUMFLT: + return "number"; + case RAVI_TNUMINT: + return "integer"; + case RAVI_TTABLE: + return "table"; + case RAVI_TSTRING: + return "string"; + case RAVI_TARRAYINT: + return "integer[]"; + case RAVI_TARRAYFLT: + return "number[]"; + case RAVI_TFUNCTION: + return "closure"; + case RAVI_TUSERDATA: + return "userdata"; + default: + return "?"; + } +} + +static void printf_buf(TextBuffer *buf, const char *format, ...) +{ + static const char *PADDING = " "; + char tbuf[128] = {0}; + va_list ap; + const char *cp; + va_start(ap, format); + for (cp = format; *cp; cp++) { + if (cp[0] == '%' && cp[1] == 'p') { /* padding */ + int level = va_arg(ap, int); + snprintf(tbuf, sizeof tbuf, "%.*s", level, PADDING); + raviX_buffer_add_string(buf, tbuf); + cp++; + } else if (cp[0] == '%' && cp[1] == 't') { /* string_object */ + const StringObject *s = va_arg(ap, const StringObject *); + raviX_buffer_add_string(buf, s->str); + cp++; + } else if (cp[0] == '%' && cp[1] == 'T') { /* VariableType */ + const VariableType *type; + type = va_arg(ap, const VariableType *); + if (type->type_code == RAVI_TUSERDATA) { + const StringObject *s = type->type_name; + raviX_buffer_add_string(buf, s->str); + } else { + raviX_buffer_add_string(buf, raviX_get_type_name(type->type_code)); + } + cp++; + } else if (cp[0] == '%' && cp[1] == 's') { /* const char * */ + const char *s; + s = va_arg(ap, const char *); + raviX_buffer_add_string(buf, s); + cp++; + } else if (cp[0] == '%' && cp[1] == 'c') { /* comment */ + const char *s; + s = va_arg(ap, const char *); + raviX_buffer_add_fstring(buf, "--%s", s); + cp++; + } else if (cp[0] == '%' && cp[1] == 'i') { /* integer */ + lua_Integer i; + i = va_arg(ap, lua_Integer); + raviX_buffer_add_longlong(buf, i); + cp++; + } else if (cp[0] == '%' && cp[1] == 'f') { /* float */ + double d; + d = va_arg(ap, double); + raviX_buffer_add_fstring(buf, "%.16f", d); + cp++; + } else if (cp[0] == '%' && cp[1] == 'b') { /* boolean */ + lua_Integer i; + i = va_arg(ap, lua_Integer); + raviX_buffer_add_bool(buf, i != 0); + cp++; + } else { + raviX_buffer_add_char(buf, *cp); + } + } + va_end(ap); +} + +static void print_ast_node_list(TextBuffer *buf, AstNodeList *list, int level, const char *delimiter) +{ + AstNode *node; + bool is_first = true; + FOR_EACH_PTR(list, node) + { + if (is_first) + is_first = false; + else if (delimiter) + printf_buf(buf, "%p%s\n", level, delimiter); + raviX_print_ast_node(buf, node, level + 1); + } + END_FOR_EACH_PTR(node); +} + +static void print_statement_list(TextBuffer *buf, AstNodeList *statement_list, int level) +{ + print_ast_node_list(buf, statement_list, level + 1, NULL); +} + +static inline const char *get_as_str(const StringObject *ts) { return ts ? ts->str : ""; } + +static void print_symbol(TextBuffer *buf, LuaSymbol *sym, int level) +{ + switch (sym->symbol_type) { + case SYM_ENV: { + printf_buf(buf, "%p%t %c %s %s\n", level, sym->variable.var_name, "_ENV", + raviX_get_type_name(sym->variable.value_type.type_code), get_as_str(sym->variable.value_type.type_name)); + break; + } + case SYM_GLOBAL: { + printf_buf(buf, "%p%t %c %s %s\n", level, sym->variable.var_name, "global symbol", + raviX_get_type_name(sym->variable.value_type.type_code), get_as_str(sym->variable.value_type.type_name)); + break; + } + case SYM_LOCAL: { + printf_buf(buf, "%p%t %c %s %s\n", level, sym->variable.var_name, "local symbol", + raviX_get_type_name(sym->variable.value_type.type_code), get_as_str(sym->variable.value_type.type_name)); + break; + } + case SYM_UPVALUE: { + printf_buf(buf, "%p%t %c %s %s\n", level, sym->upvalue.target_variable->variable.var_name, "upvalue", + raviX_get_type_name(sym->upvalue.target_variable->variable.value_type.type_code), + get_as_str(sym->upvalue.target_variable->variable.value_type.type_name)); + break; + } + default: + assert(0); + } +} + +static void print_symbol_name(TextBuffer *buf, LuaSymbol *sym) +{ + switch (sym->symbol_type) { + case SYM_LOCAL: + case SYM_ENV: + case SYM_GLOBAL: { + printf_buf(buf, "%t", sym->variable.var_name); + break; + } + case SYM_UPVALUE: { + if (sym->upvalue.target_variable->symbol_type == SYM_ENV) { + printf_buf(buf, "%t*", sym->upvalue.target_variable->variable.var_name); + } + else { + printf_buf(buf, "%t", sym->upvalue.target_variable->variable.var_name); + } + break; + } + default: + assert(0); + } +} + +static void print_symbol_list(TextBuffer *buf, LuaSymbolList *list, int level, const char *delimiter) +{ + LuaSymbol *node; + bool is_first = true; + FOR_EACH_PTR(list, node) + { + if (is_first) + is_first = false; + else if (delimiter) + printf_buf(buf, "%p%s\n", level, delimiter); + print_symbol(buf, node, level + 1); + } + END_FOR_EACH_PTR(node); +} + +static void print_symbol_names(TextBuffer *buf, LuaSymbolList *list) +{ + LuaSymbol *node; + bool is_first = true; + FOR_EACH_PTR(list, node) + { + if (is_first) + is_first = false; + else + printf_buf(buf, ", "); + print_symbol_name(buf, node); + } + END_FOR_EACH_PTR(node); +} + +const char *raviX_get_unary_opr_str(UnaryOperatorType op) +{ + switch (op) { + case UNOPR_NOT: + return "not"; + case UNOPR_MINUS: + return "-"; + case UNOPR_BNOT: + return "~"; + case UNOPR_LEN: + return "#"; + case UNOPR_TO_INTEGER: + return "@integer"; + case UNOPR_TO_NUMBER: + return "@number"; + case UNOPR_TO_INTARRAY: + return "@integer[]"; + case UNOPR_TO_NUMARRAY: + return "@number[]"; + case UNOPR_TO_TABLE: + return "@table"; + case UNOPR_TO_CLOSURE: + return "@closure"; + case UNOPR_TO_STRING: + return "@string"; + case UNOPR_TO_TYPE: + return "@"; + default: + return ""; + } +} + +const char *raviX_get_binary_opr_str(BinaryOperatorType op) +{ + switch (op) { + case BINOPR_ADD: + return "+"; + case BINOPR_SUB: + return "-"; + case BINOPR_MUL: + return "*"; + case BINOPR_MOD: + return "%"; + case BINOPR_POW: + return "^"; + case BINOPR_DIV: + return "/"; + case BINOPR_IDIV: + return "//"; + case BINOPR_BAND: + return "&"; + case BINOPR_BOR: + return "|"; + case BINOPR_BXOR: + return "~"; + case BINOPR_SHL: + return "<<"; + case BINOPR_SHR: + return ">>"; + case BINOPR_CONCAT: + return ".."; + case BINOPR_NE: + return "~="; + case BINOPR_EQ: + return "=="; + case BINOPR_LT: + return "<"; + case BINOPR_LE: + return "<="; + case BINOPR_GT: + return ">"; + case BINOPR_GE: + return ">="; + case BINOPR_AND: + return "and"; + case BINOPR_OR: + return "or"; + default: + return ""; + } +} + +void raviX_print_ast_node(TextBuffer *buf, AstNode *node, int level) +{ + switch (node->type) { + case EXPR_FUNCTION: { + if (node->function_expr.args) { + printf_buf(buf, "%pfunction(\n", level); + print_symbol_list(buf, node->function_expr.args, level + 1, ","); + printf_buf(buf, "%p)\n", level); + } else { + printf_buf(buf, "%pfunction()\n", level); + } + if (node->function_expr.locals) { + printf_buf(buf, "%p%c ", level, "locals "); + print_symbol_names(buf, node->function_expr.locals); + printf_buf(buf, "\n"); + } + if (node->function_expr.upvalues) { + printf_buf(buf, "%p%c ", level, "upvalues "); + print_symbol_names(buf, node->function_expr.upvalues); + printf_buf(buf, "\n"); + } + print_statement_list(buf, node->function_expr.function_statement_list, level); + printf_buf(buf, "%pend\n", level); + break; + } + case AST_NONE: + break; + case STMT_RETURN: { + printf_buf(buf, "%preturn\n", level); + print_ast_node_list(buf, node->return_stmt.expr_list, level + 1, ","); + break; + } + case STMT_LOCAL: { + printf_buf(buf, "%plocal\n", level); + printf_buf(buf, "%p%c\n", level, "[symbols]"); + print_symbol_list(buf, node->local_stmt.var_list, level + 1, ","); + if (node->local_stmt.expr_list) { + printf_buf(buf, "%p%c\n", level, "[expressions]"); + print_ast_node_list(buf, node->local_stmt.expr_list, level + 1, ","); + } + break; + } + case STMT_FUNCTION: { + raviX_print_ast_node(buf, node->function_stmt.name, level); + if (node->function_stmt.selectors) { + printf_buf(buf, "%p%c\n", level + 1, "[selectors]"); + print_ast_node_list(buf, node->function_stmt.selectors, level + 2, NULL); + } + if (node->function_stmt.method_name) { + printf_buf(buf, "%p%c\n", level + 1, "[method name]"); + raviX_print_ast_node(buf, node->function_stmt.method_name, level + 2); + } + printf_buf(buf, "%p=\n", level + 1); + raviX_print_ast_node(buf, node->function_stmt.function_expr, level + 2); + break; + } + case STMT_LABEL: { + printf_buf(buf, "%p::%t::\n", level, node->label_stmt.symbol->label.label_name); + break; + } + case STMT_GOTO: { + printf_buf(buf, "%pgoto %t\n", level, node->goto_stmt.name); + break; + } + case STMT_DO: { + printf_buf(buf, "%pdo\n", level); + print_ast_node_list(buf, node->do_stmt.do_statement_list, level + 1, NULL); + printf_buf(buf, "%pend\n", level); + break; + } + case STMT_EXPR: { + printf_buf(buf, "%p%c\n", level, "[expression statement start]"); + if (node->expression_stmt.var_expr_list) { + printf_buf(buf, "%p%c\n", level + 1, "[var list start]"); + print_ast_node_list(buf, node->expression_stmt.var_expr_list, level + 2, ","); + printf_buf(buf, "%p= %c\n", level + 1, "[var list end]"); + } + printf_buf(buf, "%p%c\n", level + 1, "[expression list start]"); + print_ast_node_list(buf, node->expression_stmt.expr_list, level + 2, ","); + printf_buf(buf, "%p%c\n", level + 1, "[expression list end]"); + printf_buf(buf, "%p%c\n", level, "[expression statement end]"); + break; + } + case STMT_IF: { + AstNode *test_then_block; + bool is_first = true; + FOR_EACH_PTR(node->if_stmt.if_condition_list, test_then_block) + { + if (is_first) { + is_first = false; + printf_buf(buf, "%pif\n", level); + } else + printf_buf(buf, "%pelseif\n", level); + raviX_print_ast_node(buf, test_then_block->test_then_block.condition, level + 1); + printf_buf(buf, "%pthen\n", level); + print_ast_node_list(buf, test_then_block->test_then_block.test_then_statement_list, level + 1, + NULL); + } + END_FOR_EACH_PTR(node); + if (node->if_stmt.else_block) { + printf_buf(buf, "%pelse\n", level); + print_ast_node_list(buf, node->if_stmt.else_statement_list, level + 1, NULL); + } + printf_buf(buf, "%pend\n", level); + break; + } + case STMT_WHILE: { + printf_buf(buf, "%pwhile\n", level); + raviX_print_ast_node(buf, node->while_or_repeat_stmt.condition, level + 1); + printf_buf(buf, "%pdo\n", level); + print_ast_node_list(buf, node->while_or_repeat_stmt.loop_statement_list, level + 1, NULL); + printf_buf(buf, "%pend\n", level); + break; + } + case STMT_REPEAT: { + printf_buf(buf, "%prepeat\n", level); + print_ast_node_list(buf, node->while_or_repeat_stmt.loop_statement_list, level + 1, NULL); + printf_buf(buf, "%puntil\n", level); + raviX_print_ast_node(buf, node->while_or_repeat_stmt.condition, level + 1); + printf_buf(buf, "%p%c\n", level, "[repeat end]"); + break; + } + case STMT_FOR_IN: { + printf_buf(buf, "%pfor\n", level); + print_symbol_list(buf, node->for_stmt.symbols, level + 1, ","); + printf_buf(buf, "%pin\n", level); + print_ast_node_list(buf, node->for_stmt.expr_list, level + 1, ","); + printf_buf(buf, "%pdo\n", level); + print_statement_list(buf, node->for_stmt.for_statement_list, level + 1); + printf_buf(buf, "%pend\n", level); + break; + } + case STMT_FOR_NUM: { + printf_buf(buf, "%pfor\n", level); + print_symbol_list(buf, node->for_stmt.symbols, level + 1, NULL); + printf_buf(buf, "%p=\n", level); + print_ast_node_list(buf, node->for_stmt.expr_list, level + 1, ","); + printf_buf(buf, "%pdo\n", level); + print_statement_list(buf, node->for_stmt.for_statement_list, level + 1); + printf_buf(buf, "%pend\n", level); + break; + } + case EXPR_SUFFIXED: { + printf_buf(buf, "%p%c %T\n", level, "[suffixed expr start]", &node->suffixed_expr.type); + printf_buf(buf, "%p%c %T\n", level + 1, "[primary start]", + &node->suffixed_expr.primary_expr->common_expr.type); + raviX_print_ast_node(buf, node->suffixed_expr.primary_expr, level + 2); + printf_buf(buf, "%p%c\n", level + 1, "[primary end]"); + if (node->suffixed_expr.suffix_list) { + printf_buf(buf, "%p%c\n", level + 1, "[suffix list start]"); + print_ast_node_list(buf, node->suffixed_expr.suffix_list, level + 2, NULL); + printf_buf(buf, "%p%c\n", level + 1, "[suffix list end]"); + } + printf_buf(buf, "%p%c\n", level, "[suffixed expr end]"); + break; + } + case EXPR_FUNCTION_CALL: { + printf_buf(buf, "%p%c %T\n", level, "[function call start]", &node->function_call_expr.type); + if (node->function_call_expr.method_name) { + printf_buf(buf, "%p: %t (\n", level + 1, node->function_call_expr.method_name); + } else { + printf_buf(buf, "%p(\n", level + 1); + } + print_ast_node_list(buf, node->function_call_expr.arg_list, level + 2, ","); + printf_buf(buf, "%p)\n", level + 1); + printf_buf(buf, "%p%c\n", level, "[function call end]"); + break; + } + case EXPR_SYMBOL: { + print_symbol(buf, node->symbol_expr.var, level + 1); + break; + } + case EXPR_BINARY: { + printf_buf(buf, "%p%c %T\n", level, "[binary expr start]", &node->binary_expr.type); + raviX_print_ast_node(buf, node->binary_expr.expr_left, level + 1); + printf_buf(buf, "%p%s\n", level, raviX_get_binary_opr_str(node->binary_expr.binary_op)); + raviX_print_ast_node(buf, node->binary_expr.expr_right, level + 1); + printf_buf(buf, "%p%c\n", level, "[binary expr end]"); + break; + } + case EXPR_UNARY: { + printf_buf(buf, "%p%c %T\n", level, "[unary expr start]", &node->unary_expr.type); + printf_buf(buf, "%p%s\n", level, raviX_get_unary_opr_str(node->unary_expr.unary_op)); + raviX_print_ast_node(buf, node->unary_expr.expr, level + 1); + printf_buf(buf, "%p%c\n", level, "[unary expr end]"); + break; + } + case EXPR_LITERAL: { + printf_buf(buf, "%p", level); + switch (node->literal_expr.type.type_code) { + case RAVI_TNIL: + printf_buf(buf, "nil"); + break; + case RAVI_TBOOLEAN: + printf_buf(buf, "%b", node->literal_expr.u.i); + break; + case RAVI_TNUMINT: + printf_buf(buf, "%i", node->literal_expr.u.i); + break; + case RAVI_TNUMFLT: + printf_buf(buf, "%f", node->literal_expr.u.r); + break; + case RAVI_TSTRING: + printf_buf(buf, "'%t'", node->literal_expr.u.ts); + break; + case RAVI_TVARARGS: + printf_buf(buf, "..."); + break; + default: + assert(0); + } + printf_buf(buf, "\n"); + break; + } + case EXPR_FIELD_SELECTOR: { + printf_buf(buf, "%p%c %T\n", level, "[field selector start]", &node->index_expr.type); + printf_buf(buf, "%p.\n", level + 1); + raviX_print_ast_node(buf, node->index_expr.expr, level + 2); + printf_buf(buf, "%p%c\n", level, "[field selector end]"); + break; + } + case EXPR_Y_INDEX: { + printf_buf(buf, "%p%c %T\n", level, "[Y index start]", &node->index_expr.type); + printf_buf(buf, "%p[\n", level + 1); + raviX_print_ast_node(buf, node->index_expr.expr, level + 2); + printf_buf(buf, "%p]\n", level + 1); + printf_buf(buf, "%p%c\n", level, "[Y index end]"); + break; + } + case EXPR_TABLE_ELEMENT_ASSIGN: { + printf_buf(buf, "%p%c %T\n", level, "[indexed assign start]", &node->table_elem_assign_expr.type); + if (node->table_elem_assign_expr.key_expr) { + printf_buf(buf, "%p%c\n", level, "[index start]"); + raviX_print_ast_node(buf, node->table_elem_assign_expr.key_expr, level + 1); + printf_buf(buf, "%p%c\n", level, "[index end]"); + } + printf_buf(buf, "%p%c\n", level, "[value start]"); + raviX_print_ast_node(buf, node->table_elem_assign_expr.value_expr, level + 1); + printf_buf(buf, "%p%c\n", level, "[value end]"); + printf_buf(buf, "%p%c\n", level, "[indexed assign end]"); + break; + } + case EXPR_TABLE_LITERAL: { + printf_buf(buf, "%p{ %c %T\n", level, "[table constructor start]", &node->table_expr.type); + print_ast_node_list(buf, node->table_expr.expr_list, level + 1, ","); + printf_buf(buf, "%p} %c\n", level, "[table constructor end]"); + break; + } + default: + printf_buf(buf, "%pUnsupported node type %d\n", level, node->type); + assert(0); + } +} + +void raviX_output_ast(CompilerState *container, FILE *fp) +{ + TextBuffer mbuf; + raviX_buffer_init(&mbuf, 1024); + raviX_print_ast_node(&mbuf, container->main_function, 0); + fputs(mbuf.buf, fp); + raviX_buffer_free(&mbuf); +} diff --git a/ravicomp/src/ast_simplify.c b/ravicomp/src/ast_simplify.c new file mode 100644 index 0000000..e3fe75d --- /dev/null +++ b/ravicomp/src/ast_simplify.c @@ -0,0 +1,516 @@ +/* Replace constant expressions with constants, and simply any other expressions if possible */ +/* Portions Copyright (C) 1994-2019 Lua.org, PUC-Rio.*/ + +#include + +#include +#include + +static void process_expression_list(CompilerState *container, AstNodeList *node); +static void process_statement_list(CompilerState *container, AstNodeList *node); +static void process_statement(CompilerState *container, AstNode *node); + +#define l_mathop(op) op + +#ifndef CHAR_BIT +#define CHAR_BIT 8 +#endif +/* number of bits in an integer */ +#define NBITS ((int)(sizeof(lua_Integer) * CHAR_BIT)) +/* +@@ lua_numbertointeger converts a float number with an integral value +** to an integer, or returns 0 if float is not within the range of +** a lua_Integer. (The range comparisons are tricky because of +** rounding. The tests here assume a two-complement representation, +** where MININTEGER always has an exact representation as a float; +** MAXINTEGER may not have one, and therefore its conversion to float +** may have an ill-defined value.) +*/ +#define lua_numbertointeger(n, p) \ + ((n) >= (lua_Number)(LUA_MININTEGER) && (n) < -(lua_Number)(LUA_MININTEGER) && (*(p) = (lua_Integer)(n), 1)) + +/* +** Rounding modes for float->integer coercion +*/ +typedef enum { + F2Ieq, /* no rounding; accepts only integral values */ + F2Ifloor, /* takes the floor of the number */ + F2Iceil /* takes the ceil of the number */ +} F2Imod; + +#if !defined(LUA_FLOORN2I) +#define LUA_FLOORN2I F2Ieq +#endif + +/* +** The luai_num* macros define the primitive operations over numbers. +*/ + +/* floor division (defined as 'floor(a/b)') */ +#if !defined(luai_numidiv) +#define luai_numidiv(a, b) (l_floor(luai_numdiv(a, b))) +#endif + +/* float division */ +#if !defined(luai_numdiv) +#define luai_numdiv(a, b) ((a) / (b)) +#endif + +/* +** modulo: defined as 'a - floor(a/b)*b'; this definition gives NaN when +** 'b' is huge, but the result should be 'a'. 'fmod' gives the result of +** 'a - trunc(a/b)*b', and therefore must be corrected when 'trunc(a/b) +** ~= floor(a/b)'. That happens when the division has a non-integer +** negative result, which is equivalent to the test below. +*/ +#if !defined(luai_nummod) +#define luai_nummod(a, b, m) \ + { \ + (m) = l_mathop(fmod)(a, b); \ + if ((m) * (b) < 0) \ + (m) += (b); \ + } +#endif +#define l_floor(x) (l_mathop(floor)(x)) + +/* exponentiation */ +#if !defined(luai_numpow) +#define luai_numpow(a, b) (l_mathop(pow)(a, b)) +#endif + +/* the others are quite standard operations */ +#if !defined(luai_numadd) +#define luai_numadd(a, b) ((a) + (b)) +#define luai_numsub(a, b) ((a) - (b)) +#define luai_nummul(a, b) ((a) * (b)) +#define luai_numunm(a) (-(a)) +#define luai_numeq(a, b) ((a) == (b)) +#define luai_numlt(a, b) ((a) < (b)) +#define luai_numle(a, b) ((a) <= (b)) +#define luai_numisnan(a) (!luai_numeq((a), (a))) +#endif + +/* cast a signed lua_Integer to lua_Unsigned */ +#if !defined(l_castS2U) +#define l_castS2U(i) ((lua_Unsigned)(i)) +#endif + +/* +** cast a lua_Unsigned to a signed lua_Integer; this cast is +** not strict ISO C, but two-complement architectures should +** work fine. +*/ +#if !defined(l_castU2S) +#define l_castU2S(i) ((lua_Integer)(i)) +#endif + +/* +** macros to improve jump prediction (used mainly for error handling) +*/ +#if !defined(likely) + +#if defined(__GNUC__) +#define likely(x) (__builtin_expect(((x) != 0), 1)) +#define unlikely(x) (__builtin_expect(((x) != 0), 0)) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + +#endif + +#define cast(t, exp) ((t)(exp)) +#define cast_num(i) cast(lua_Number, (i)) +#define cast_int(i) cast(int, (i)) + +#define ttisfloat(o) (o->type.type_code == RAVI_TNUMFLT) +#define ttisinteger(o) (o->type.type_code == RAVI_TNUMINT) +#define fltvalue(o) (o->u.r) +#define ivalue(o) (o->u.i) +#define setivalue(o, v) (o->type.type_code = RAVI_TNUMINT, o->u.i = (v)) +#define setfltvalue(o, v) (o->type.type_code = RAVI_TNUMFLT, o->u.r = (v)) + +/* convert an object to a float (without string coercion) */ +#define tonumberns(o, n) (ttisfloat(o) ? ((n) = fltvalue(o), 1) : (ttisinteger(o) ? ((n) = cast_num(ivalue(o)), 1) : 0)) + +/* convert an object to an integer (including string coercion) */ +#define tointeger(o, i) (RAVI_LIKELY(ttisinteger(o)) ? (*(i) = ivalue(o), 1) : luaV_tointeger(o, i, LUA_FLOORN2I)) + +/* convert an object to an integer (without string coercion) */ +#define tointegerns(o, i) (ttisinteger(o) ? (*(i) = ivalue(o), 1) : luaV_tointegerns(o, i, LUA_FLOORN2I)) + +static void handle_error(CompilerState *container, const char *msg) +{ + // TODO source and line number + raviX_buffer_add_string(&container->error_message, msg); + longjmp(container->env, 1); +} + +/* +** Integer division; return 'm // n', that is, floor(m/n). +** C division truncates its result (rounds towards zero). +** 'floor(q) == trunc(q)' when 'q >= 0' or when 'q' is integer, +** otherwise 'floor(q) == trunc(q) - 1'. +*/ +static lua_Integer luaV_idiv(CompilerState *compiler_state, lua_Integer m, lua_Integer n) +{ + if (unlikely(l_castS2U(n) + 1u <= 1u)) { /* special cases: -1 or 0 */ + if (n == 0) + handle_error(compiler_state, "attempt to divide by zero"); + return 0 - m; /* n==-1; avoid overflow with 0x80000...//-1 */ + } else { + lua_Integer q = m / n; /* perform C division */ + if ((m ^ n) < 0 && m % n != 0) /* 'm/n' would be negative non-integer? */ + q -= 1; /* correct result for different rounding */ + return q; + } +} + +/* +** Integer modulus; return 'm % n'. (Assume that C '%' with +** negative operands follows C99 behavior. See previous comment +** about luaV_idiv.) +*/ +static lua_Integer luaV_mod(CompilerState *compiler_state, lua_Integer m, lua_Integer n) +{ + if (unlikely(l_castS2U(n) + 1u <= 1u)) { /* special cases: -1 or 0 */ + if (n == 0) + handle_error(compiler_state, "attempt to perform 'n%%0'"); + return 0; /* m % -1 == 0; avoid overflow with 0x80000...%-1 */ + } else { + lua_Integer r = m % n; + if (r != 0 && (r ^ n) < 0) /* 'm/n' would be non-integer negative? */ + r += n; /* correct result for different rounding */ + return r; + } +} + +/* +** Float modulus +*/ +static lua_Number luaV_modf(lua_Number m, lua_Number n) +{ + lua_Number r; + luai_nummod(m, n, r); + return r; +} + +/* +** Shift left operation. (Shift right just negates 'y'.) +*/ +static lua_Integer luaV_shiftl(lua_Integer x, lua_Integer y) +{ + if (y < 0) { /* shift right? */ + if (y <= -NBITS) + return 0; + else + return x >> (-y); + } else { /* shift left */ + if (y >= NBITS) + return 0; + else + return x << y; + } +} + +static lua_Integer intarith(CompilerState *compiler_state, int op, lua_Integer v1, lua_Integer v2) +{ + switch (op) { + case BINOPR_ADD: + return v1 + v2; + case BINOPR_SUB: + return v1 - v2; + case BINOPR_MUL: + return v1 * v2; + case BINOPR_MOD: + return luaV_mod(compiler_state, v1, v2); + case BINOPR_IDIV: + return luaV_idiv(compiler_state, v1, v2); + case BINOPR_BAND: + return v1 & v2; + case BINOPR_BOR: + return v1 | v2; + case BINOPR_BXOR: + return v1 ^ v2; + case BINOPR_SHL: + return luaV_shiftl(v1, v2); + case BINOPR_SHR: + return luaV_shiftl(v1, -v2); + case UNOPR_MINUS: + return 0 - v1; + case UNOPR_BNOT: + return ~l_castS2U(0) ^ v1; + default: + assert(0); + return 0; + } +} + +static lua_Number numarith(CompilerState *compiler_state, int op, lua_Number v1, lua_Number v2) +{ + switch (op) { + case BINOPR_ADD: + return luai_numadd(v1, v2); + case BINOPR_SUB: + return luai_numsub(v1, v2); + case BINOPR_MUL: + return luai_nummul(v1, v2); + case BINOPR_DIV: + return luai_numdiv(v1, v2); + case BINOPR_POW: + return luai_numpow(v1, v2); + case BINOPR_IDIV: + return luai_numidiv(v1, v2); + case UNOPR_MINUS: + return luai_numunm(v1); + case BINOPR_MOD: + return luaV_modf(v1, v2); + default: + assert(0); + return 0; + } +} + +/* +** try to convert a float to an integer, rounding according to 'mode'. +*/ +static int luaV_flttointeger(lua_Number n, lua_Integer *p, F2Imod mode) +{ + lua_Number f = l_floor(n); + if (n != f) { /* not an integral value? */ + if (mode == F2Ieq) + return 0; /* fails if mode demands integral value */ + else if (mode == F2Iceil) /* needs ceil? */ + f += 1; /* convert floor to ceil (remember: n != f) */ + } + return lua_numbertointeger(f, p); +} + +/* +** try to convert a value to an integer, rounding according to 'mode', +** without string coercion. +** ("Fast track" handled by macro 'tointegerns'.) +*/ +static int luaV_tointegerns(const LiteralExpression *obj, lua_Integer *p, F2Imod mode) +{ + if (ttisfloat(obj)) + return luaV_flttointeger(fltvalue(obj), p, mode); + else if (ttisinteger(obj)) { + *p = ivalue(obj); + return 1; + } else + return 0; +} + +static int luaO_rawarith(CompilerState *compiler_state, int op, const LiteralExpression *p1, + const LiteralExpression *p2, LiteralExpression *res) +{ + switch (op) { + case BINOPR_BAND: + case BINOPR_BOR: + case BINOPR_BXOR: + case BINOPR_SHL: + case BINOPR_SHR: + case UNOPR_BNOT: { /* operate only on integers */ + lua_Integer i1; + lua_Integer i2; + if (tointegerns(p1, &i1) && tointegerns(p2, &i2)) { + setivalue(res, intarith(compiler_state, op, i1, i2)); + return 1; + } else + return 0; /* fail */ + } + case BINOPR_DIV: + case BINOPR_POW: { /* operate only on floats */ + lua_Number n1; + lua_Number n2; + if (tonumberns(p1, n1) && tonumberns(p2, n2)) { + setfltvalue(res, numarith(compiler_state, op, n1, n2)); + return 1; + } else + return 0; /* fail */ + } + default: { /* other operations */ + lua_Number n1; + lua_Number n2; + if (ttisinteger(p1) && ttisinteger(p2)) { + setivalue(res, intarith(compiler_state, op, ivalue(p1), ivalue(p2))); + return 1; + } else if (tonumberns(p1, n1) && tonumberns(p2, n2)) { + setfltvalue(res, numarith(compiler_state, op, n1, n2)); + return 1; + } else + return 0; /* fail */ + } + } +} + +static void process_expression(CompilerState *container, AstNode *node) +{ + switch (node->type) { + case EXPR_FUNCTION: + process_statement_list(container, node->function_expr.function_statement_list); + break; + case EXPR_SUFFIXED: + process_expression(container, node->suffixed_expr.primary_expr); + if (node->suffixed_expr.suffix_list) { + process_expression_list(container, node->suffixed_expr.suffix_list); + } else { + // We can simplify and get rid of the suffixed expr + // TODO free primary_expr + memcpy(node, node->suffixed_expr.primary_expr, sizeof(AstNode)); + } + break; + case EXPR_FUNCTION_CALL: + process_expression_list(container, node->function_call_expr.arg_list); + break; + case EXPR_SYMBOL: + break; + case EXPR_BINARY: + process_expression(container, node->binary_expr.expr_left); + process_expression(container, node->binary_expr.expr_right); + if (node->binary_expr.expr_left->type == EXPR_LITERAL && + node->binary_expr.expr_right->type == EXPR_LITERAL && + node->binary_expr.binary_op >= BINOPR_ADD && + node->binary_expr.binary_op <= BINOPR_SHR) { + LiteralExpression result = {.type.type_code = RAVI_TANY}; + if (luaO_rawarith(container, node->binary_expr.binary_op, + &node->binary_expr.expr_left->literal_expr, + &node->binary_expr.expr_right->literal_expr, &result)) { + node->type = EXPR_LITERAL; + node->literal_expr.type.type_code = result.type.type_code; + if (node->literal_expr.type.type_code == RAVI_TNUMFLT) + node->literal_expr.u.r = result.u.r; + else { + assert(node->literal_expr.type.type_code == RAVI_TNUMINT); + node->literal_expr.u.i = result.u.i; + } + // TODO free expr_left and expr_right + } + } + break; + case EXPR_UNARY: + process_expression(container, node->unary_expr.expr); + if (node->unary_expr.expr->type == EXPR_LITERAL && + (node->unary_expr.unary_op == UNOPR_BNOT || node->unary_expr.unary_op == UNOPR_MINUS)) { + LiteralExpression result = {.type.type_code = RAVI_TANY}; + if (luaO_rawarith(container, node->unary_expr.unary_op, &node->unary_expr.expr->literal_expr, + &node->unary_expr.expr->literal_expr, &result)) { + node->type = EXPR_LITERAL; + node->literal_expr.type.type_code = result.type.type_code; + if (node->literal_expr.type.type_code == RAVI_TNUMFLT) + node->literal_expr.u.r = result.u.r; + else { + assert(node->literal_expr.type.type_code == RAVI_TNUMINT); + node->literal_expr.u.i = result.u.i; + } + // TODO free unary_expr.expr + } + } + break; + case EXPR_LITERAL: + break; + case EXPR_FIELD_SELECTOR: + process_expression(container, node->index_expr.expr); + break; + case EXPR_Y_INDEX: + process_expression(container, node->index_expr.expr); + break; + case EXPR_TABLE_ELEMENT_ASSIGN: + if (node->table_elem_assign_expr.key_expr) { + process_expression(container, node->table_elem_assign_expr.key_expr); + } + process_expression(container, node->table_elem_assign_expr.value_expr); + break; + case EXPR_TABLE_LITERAL: + process_expression_list(container, node->table_expr.expr_list); + break; + default: + assert(0); + break; + } +} + +static void process_expression_list(CompilerState *container, AstNodeList *list) +{ + AstNode *node; + FOR_EACH_PTR(list, node) { process_expression(container, node); } + END_FOR_EACH_PTR(node); +} + +static void process_statement_list(CompilerState *container, AstNodeList *list) +{ + AstNode *node; + FOR_EACH_PTR(list, node) { process_statement(container, node); } + END_FOR_EACH_PTR(node); +} + +static void process_statement(CompilerState *container, AstNode *node) +{ + switch (node->type) { + case AST_NONE: + break; + case STMT_RETURN: + process_expression_list(container, node->return_stmt.expr_list); + break; + case STMT_LOCAL: + process_expression_list(container, node->local_stmt.expr_list); + break; + case STMT_FUNCTION: + process_expression(container, node->function_stmt.function_expr); + break; + case STMT_LABEL: + case STMT_GOTO: + break; + case STMT_DO: + process_statement_list(container, node->do_stmt.do_statement_list); + break; + case STMT_EXPR: + if (node->expression_stmt.var_expr_list) { + process_expression_list(container, node->expression_stmt.var_expr_list); + } + process_expression_list(container, node->expression_stmt.expr_list); + break; + case STMT_IF: { + AstNode *test_then_block; + FOR_EACH_PTR(node->if_stmt.if_condition_list, test_then_block) + { + process_expression(container, test_then_block->test_then_block.condition); + process_statement_list(container, test_then_block->test_then_block.test_then_statement_list); + } + END_FOR_EACH_PTR(node); + if (node->if_stmt.else_block) { + process_statement_list(container, node->if_stmt.else_statement_list); + } + break; + } + case STMT_WHILE: + process_expression(container, node->while_or_repeat_stmt.condition); + process_statement_list(container, node->while_or_repeat_stmt.loop_statement_list); + break; + case STMT_REPEAT: + process_statement_list(container, node->while_or_repeat_stmt.loop_statement_list); + process_expression(container, node->while_or_repeat_stmt.condition); + break; + case STMT_FOR_IN: + case STMT_FOR_NUM: + process_expression_list(container, node->for_stmt.expr_list); + process_statement_list(container, node->for_stmt.for_statement_list); + break; + default: + fprintf(stderr, "AST = %d\n", node->type); + assert(0); + break; + } +} + +int raviX_ast_simplify(CompilerState *container) +{ + int rc = setjmp(container->env); + if (rc == 0) { + process_expression(container, container->main_function); + } else { + // dump it? + } + return rc; +} diff --git a/ravicomp/src/ast_walker.c b/ravicomp/src/ast_walker.c new file mode 100644 index 0000000..c143080 --- /dev/null +++ b/ravicomp/src/ast_walker.c @@ -0,0 +1,617 @@ +#include + +#include + +const FunctionExpression *raviX_ast_get_main_function(const CompilerState *compiler_state) +{ + return &compiler_state->main_function->function_expr; +} +const VariableType *raviX_function_type(const FunctionExpression *function_expression) +{ + return &function_expression->type; +} +bool raviX_function_is_vararg(const FunctionExpression *function_expression) +{ + return function_expression->is_vararg; +} +bool raviX_function_is_method(const FunctionExpression *function_expression) +{ + return function_expression->is_method; +} +const FunctionExpression *raviX_function_parent(const FunctionExpression *function_expression) +{ + if (function_expression->parent_function == NULL) + return NULL; + else + return &function_expression->parent_function->function_expr; +} +void raviX_function_foreach_child(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, + const FunctionExpression *function_expression)) +{ + AstNode *node; + FOR_EACH_PTR(function_expression->child_functions, node) { callback(userdata, &node->function_expr); } + END_FOR_EACH_PTR(node) +} +const Scope *raviX_function_scope(const FunctionExpression *function_expression) +{ + return function_expression->main_block; +} +void raviX_function_foreach_statement(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const Statement *statement)) +{ + AstNode *node; + FOR_EACH_PTR(function_expression->function_statement_list, node) + { + assert(node->type <= STMT_EXPR); + callback(userdata, (Statement *)node); + } + END_FOR_EACH_PTR(node) +} +enum AstNodeType raviX_statement_type(const Statement *statement) { return statement->type; } +void raviX_function_foreach_argument(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const LuaVariableSymbol *symbol)) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(function_expression->args, symbol) { callback(userdata, &symbol->variable); } + END_FOR_EACH_PTR(symbol) +} +void raviX_function_foreach_local(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const LuaVariableSymbol *lua_local_symbol)) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(function_expression->locals, symbol) { callback(userdata, &symbol->variable); } + END_FOR_EACH_PTR(symbol) +} +void raviX_function_foreach_upvalue(const FunctionExpression *function_expression, void *userdata, + void (*callback)(void *userdata, const LuaUpvalueSymbol *symbol)) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(function_expression->upvalues, symbol) { callback(userdata, &symbol->upvalue); } + END_FOR_EACH_PTR(symbol) +} + +const StringObject *raviX_variable_symbol_name(const LuaVariableSymbol *lua_local_symbol) +{ + return lua_local_symbol->var_name; +} + +const VariableType *raviX_variable_symbol_type(const LuaVariableSymbol *lua_local_symbol) +{ + return &lua_local_symbol->value_type; +} + +const Scope *raviX_variable_symbol_scope(const LuaVariableSymbol *lua_local_symbol) +{ + return lua_local_symbol->block; +} + +#define n(v) ((AstNode *)v) +const ReturnStatement *raviX_return_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_RETURN); + return &n(stmt)->return_stmt; +} +const LabelStatement *raviX_label_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_LABEL); + return &n(stmt)->label_stmt; +} +const GotoStatement *raviX_goto_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_GOTO); + return &n(stmt)->goto_stmt; +} +const LocalStatement *raviX_local_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_LOCAL); + return &n(stmt)->local_stmt; +} +const ExpressionStatement *raviX_expression_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_EXPR); + return &n(stmt)->expression_stmt; +} +const FunctionStatement *raviX_function_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_FUNCTION); + return &n(stmt)->function_stmt; +} +const DoStatement *raviX_do_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_DO); + return &n(stmt)->do_stmt; +} +const TestThenStatement *raviX_test_then_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_TEST_THEN); + return &n(stmt)->test_then_block; +} +const IfStatement *raviX_if_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_IF); + return &n(stmt)->if_stmt; +} +const WhileOrRepeatStatement *raviX_while_or_repeat_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_WHILE || stmt->type == STMT_REPEAT); + return &n(stmt)->while_or_repeat_stmt; +} +const ForStatement *raviX_for_statement(const Statement *stmt) +{ + assert(stmt->type == STMT_FOR_IN || stmt->type == STMT_FOR_NUM); + return &n(stmt)->for_stmt; +} +enum AstNodeType raviX_expression_type(const Expression *expression) { return expression->type; } +const LiteralExpression *raviX_literal_expression(const Expression *expr) +{ + assert(expr->type == EXPR_LITERAL); + return &n(expr)->literal_expr; +} +const SymbolExpression *raviX_symbol_expression(const Expression *expr) +{ + assert(expr->type == EXPR_SYMBOL); + return &n(expr)->symbol_expr; +} +const IndexExpression *raviX_index_expression(const Expression *expr) +{ + assert(expr->type == EXPR_Y_INDEX || expr->type == EXPR_FIELD_SELECTOR); + return &n(expr)->index_expr; +} +const UnaryExpression *raviX_unary_expression(const Expression *expr) +{ + assert(expr->type == EXPR_UNARY); + return &n(expr)->unary_expr; +} +const BinaryExpression *raviX_binary_expression(const Expression *expr) +{ + assert(expr->type == EXPR_BINARY); + return &n(expr)->binary_expr; +} +const FunctionExpression *raviX_function_expression(const Expression *expr) +{ + assert(expr->type == EXPR_FUNCTION); + return &n(expr)->function_expr; +} +const TableElementAssignmentExpression * +raviX_table_element_assignment_expression(const Expression *expr) +{ + assert(expr->type == EXPR_TABLE_ELEMENT_ASSIGN); + return &n(expr)->table_elem_assign_expr; +} +const TableLiteralExpression *raviX_table_literal_expression(const Expression *expr) +{ + assert(expr->type == EXPR_TABLE_LITERAL); + return &n(expr)->table_expr; +} +const SuffixedExpression *raviX_suffixed_expression(const Expression *expr) +{ + assert(expr->type == EXPR_SUFFIXED); + return &n(expr)->suffixed_expr; +} +const FunctionCallExpression *raviX_function_call_expression(const Expression *expr) +{ + assert(expr->type == EXPR_FUNCTION_CALL); + return &n(expr)->function_call_expr; +} +#undef n + +void raviX_return_statement_foreach_expression(const ReturnStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(statement->expr_list, node) + { + assert(node->type >= EXPR_LITERAL && node->type <= EXPR_FUNCTION_CALL); + callback(userdata, (Expression *)node); + } + END_FOR_EACH_PTR(node) +} + +const StringObject *raviX_label_statement_label_name(const LabelStatement *statement) +{ + return statement->symbol->label.label_name; +} +const Scope *raviX_label_statement_label_scope(const LabelStatement *statement) +{ + return statement->symbol->label.block; +} + +const StringObject *raviX_goto_statement_label_name(const GotoStatement *statement) +{ + return statement->name; +} +const Scope *raviX_goto_statement_scope(const GotoStatement *statement) +{ + return statement->goto_scope; +} +bool raviX_goto_statement_is_break(const GotoStatement *statement) { return statement->is_break; } + +void raviX_local_statement_foreach_expression(const LocalStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(statement->expr_list, node) + { + assert(node->type >= EXPR_LITERAL && node->type <= EXPR_FUNCTION_CALL); + callback(userdata, (Expression *)node); + } + END_FOR_EACH_PTR(node) +} +void raviX_local_statement_foreach_symbol(const LocalStatement *statement, void *userdata, + void (*callback)(void *, const LuaVariableSymbol *expr)) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(statement->var_list, symbol) + { + assert(symbol->symbol_type == SYM_LOCAL); + callback(userdata, &symbol->variable); + } + END_FOR_EACH_PTR(node) +} +void raviX_expression_statement_foreach_lhs_expression(const ExpressionStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(statement->var_expr_list, node) + { + assert(node->type >= EXPR_LITERAL && node->type <= EXPR_FUNCTION_CALL); + callback(userdata, (Expression *)node); + } + END_FOR_EACH_PTR(node) +} +void raviX_expression_statement_foreach_rhs_expression(const ExpressionStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(statement->expr_list, node) + { + assert(node->type >= EXPR_LITERAL && node->type <= EXPR_FUNCTION_CALL); + callback(userdata, (Expression *)node); + } + END_FOR_EACH_PTR(node) +} +const SymbolExpression *raviX_function_statement_name(const FunctionStatement *statement) +{ + assert(statement->name->type == EXPR_SYMBOL); + return &statement->name->symbol_expr; +} +bool raviX_function_statement_is_method(const FunctionStatement *statement) +{ + return statement->method_name != NULL; +} +const IndexExpression *raviX_function_statement_method_name(const FunctionStatement *statement) +{ + assert(statement->method_name->type == EXPR_Y_INDEX || statement->method_name->type == EXPR_FIELD_SELECTOR); + return &statement->method_name->index_expr; +} +bool raviX_function_statement_has_selectors(const FunctionStatement *statement) +{ + return statement->selectors != NULL; +} +void raviX_function_statement_foreach_selector(const FunctionStatement *statement, void *userdata, + void (*callback)(void *, const IndexExpression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(statement->selectors, node) + { + assert(node->type == EXPR_Y_INDEX || node->type == EXPR_FIELD_SELECTOR); + callback(userdata, &node->index_expr); + } + END_FOR_EACH_PTR(node) +} +const FunctionExpression *raviX_function_ast(const FunctionStatement *statement) +{ + assert(statement->function_expr->type == EXPR_FUNCTION); + return &statement->function_expr->function_expr; +} +const Scope *raviX_do_statement_scope(const DoStatement *statement) { return statement->scope; } +void raviX_do_statement_foreach_statement(const DoStatement *statement, void *userdata, + void (*callback)(void *userdata, const Statement *statement)) +{ + AstNode *node; + FOR_EACH_PTR(statement->do_statement_list, node) + { + assert(node->type <= STMT_EXPR); + callback(userdata, (Statement *)node); + } + END_FOR_EACH_PTR(node) +} +const Scope *raviX_test_then_statement_scope(const TestThenStatement *statement) +{ + return statement->test_then_scope; +} +void raviX_test_then_statement_foreach_statement(const TestThenStatement *statement, void *userdata, + void (*callback)(void *userdata, const Statement *statement)) +{ + AstNode *node; + FOR_EACH_PTR(statement->test_then_statement_list, node) + { + assert(node->type <= STMT_EXPR); + callback(userdata, (Statement *)node); + } + END_FOR_EACH_PTR(node) +} +const Expression *raviX_test_then_statement_condition(const TestThenStatement *statement) +{ + assert(statement->condition->type >= EXPR_LITERAL && statement->condition->type <= EXPR_FUNCTION_CALL); + return (Expression *)statement->condition; +} +void raviX_if_statement_foreach_test_then_statement(const IfStatement *statement, void *userdata, + void (*callback)(void *, const TestThenStatement *stmt)) +{ + AstNode *node; + FOR_EACH_PTR(statement->if_condition_list, node) + { + assert(node->type == STMT_TEST_THEN); + callback(userdata, &node->test_then_block); + } + END_FOR_EACH_PTR(node) +} +const Scope *raviX_if_then_statement_else_scope(const IfStatement *statement) +{ + return statement->else_block; +} +void raviX_if_statement_foreach_else_statement(const IfStatement *statement, void *userdata, + void (*callback)(void *userdata, const Statement *statement)) +{ + AstNode *node; + FOR_EACH_PTR(statement->else_statement_list, node) + { + assert(node->type <= STMT_EXPR); + callback(userdata, (Statement *)node); + } + END_FOR_EACH_PTR(node) +} + +const Expression *raviX_while_or_repeat_statement_condition(const WhileOrRepeatStatement *statement) +{ + assert(statement->condition->type >= EXPR_LITERAL && statement->condition->type <= EXPR_FUNCTION_CALL); + return (Expression *)statement->condition; +} +const Scope *raviX_while_or_repeat_statement_scope(const WhileOrRepeatStatement *statement) +{ + return statement->loop_scope; +} +void raviX_while_or_repeat_statement_foreach_statement(const WhileOrRepeatStatement *statement, + void *userdata, + void (*callback)(void *userdata, + const Statement *statement)) +{ + AstNode *node; + FOR_EACH_PTR(statement->loop_statement_list, node) + { + assert(node->type <= STMT_EXPR); + callback(userdata, (Statement *)node); + } + END_FOR_EACH_PTR(node) +} +const Scope *raviX_for_statement_scope(const ForStatement *statement) +{ + return statement->for_scope; +} +void raviX_for_statement_foreach_symbol(const ForStatement *statement, void *userdata, + void (*callback)(void *, const LuaVariableSymbol *expr)) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(statement->symbols, symbol) + { + assert(symbol->symbol_type == SYM_LOCAL); + callback(userdata, &symbol->variable); + } + END_FOR_EACH_PTR(node) +} +void raviX_for_statement_foreach_expression(const ForStatement *statement, void *userdata, + void (*callback)(void *, const Expression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(statement->expr_list, node) + { + assert(node->type >= EXPR_LITERAL && node->type <= EXPR_FUNCTION_CALL); + callback(userdata, (Expression *)node); + } + END_FOR_EACH_PTR(node) +} +const Scope *raviX_for_statement_body_scope(const ForStatement *statement) +{ + return statement->for_body; +} +void raviX_for_statement_body_foreach_statement(const ForStatement *statement, void *userdata, + void (*callback)(void *userdata, const Statement *statement)) +{ + AstNode *node; + FOR_EACH_PTR(statement->for_statement_list, node) + { + assert(node->type <= STMT_EXPR); + callback(userdata, (Statement *)node); + } + END_FOR_EACH_PTR(node) +} +const VariableType *raviX_literal_expression_type(const LiteralExpression *expression) +{ + return &expression->type; +} +const SemInfo *raviX_literal_expression_literal(const LiteralExpression *expression) { return &expression->u; } +const VariableType *raviX_symbol_expression_type(const SymbolExpression *expression) +{ + return &expression->type; +} +const LuaSymbol *raviX_symbol_expression_symbol(const SymbolExpression *expression) +{ + return expression->var; +} +const VariableType *raviX_index_expression_type(const IndexExpression *expression) +{ + return &expression->type; +} +const Expression *raviX_index_expression_expression(const IndexExpression *expression) +{ + assert(expression->expr->type >= EXPR_LITERAL && expression->expr->type <= EXPR_FUNCTION_CALL); + return (const Expression *)expression->expr; +} +const VariableType *raviX_unary_expression_type(const UnaryExpression *expression) +{ + return &expression->type; +} +const Expression *raviX_unary_expression_expression(const UnaryExpression *expression) +{ + assert(expression->expr->type >= EXPR_LITERAL && expression->expr->type <= EXPR_FUNCTION_CALL); + return (const Expression *)expression->expr; +} +UnaryOperatorType raviX_unary_expression_operator(const UnaryExpression *expression) +{ + return expression->unary_op; +} +const VariableType *raviX_binary_expression_type(const BinaryExpression *expression) +{ + return &expression->type; +} +const Expression *raviX_binary_expression_left_expression(const BinaryExpression *expression) +{ + assert(expression->expr_left->type >= EXPR_LITERAL && expression->expr_left->type <= EXPR_FUNCTION_CALL); + return (const Expression *)expression->expr_left; +} +const Expression *raviX_binary_expression_right_expression(const BinaryExpression *expression) +{ + assert(expression->expr_right->type >= EXPR_LITERAL && expression->expr_right->type <= EXPR_FUNCTION_CALL); + return (const Expression *)expression->expr_right; +} +BinaryOperatorType raviX_binary_expression_operator(const BinaryExpression *expression) +{ + return expression->binary_op; +} +const VariableType * +raviX_table_element_assignment_expression_type(const TableElementAssignmentExpression *expression) +{ + return &expression->type; +} +const Expression * +raviX_table_element_assignment_expression_key(const TableElementAssignmentExpression *expression) +{ + if (!expression->key_expr) + return NULL; + assert(expression->key_expr->type >= EXPR_LITERAL && expression->key_expr->type <= EXPR_FUNCTION_CALL); + return (const Expression *)expression->key_expr; +} +const Expression * +raviX_table_element_assignment_expression_value(const TableElementAssignmentExpression *expression) +{ + assert(expression->value_expr->type >= EXPR_LITERAL && expression->value_expr->type <= EXPR_FUNCTION_CALL); + return (const Expression *)expression->value_expr; +} +const VariableType *raviX_table_literal_expression_type(const TableLiteralExpression *expression) +{ + return &expression->type; +} +void raviX_table_literal_expression_foreach_element( + const TableLiteralExpression *expression, void *userdata, + void (*callback)(void *, const TableElementAssignmentExpression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(expression->expr_list, node) + { + assert(node->type == EXPR_TABLE_ELEMENT_ASSIGN); + callback(userdata, &node->table_elem_assign_expr); + } + END_FOR_EACH_PTR(node) +} + +const VariableType *raviX_suffixed_expression_type(const SuffixedExpression *expression) +{ + return &expression->type; +} +const Expression *raviX_suffixed_expression_primary(const SuffixedExpression *expression) +{ + assert(expression->primary_expr->type >= EXPR_LITERAL && expression->primary_expr->type <= EXPR_FUNCTION_CALL); + return (const Expression *)expression->primary_expr; +} +void raviX_suffixed_expression_foreach_suffix(const SuffixedExpression *expression, void *userdata, + void (*callback)(void *, const Expression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(expression->suffix_list, node) + { + assert(node->type >= EXPR_LITERAL && node->type <= EXPR_FUNCTION_CALL); + callback(userdata, (Expression *)node); + } + END_FOR_EACH_PTR(node) +} + +const VariableType *raviX_function_call_expression_type(const FunctionCallExpression *expression) +{ + return &expression->type; +} +// Can return NULL +const StringObject * +raviX_function_call_expression_method_name(const FunctionCallExpression *expression) +{ + return expression->method_name; +} +void raviX_function_call_expression_foreach_argument(const FunctionCallExpression *expression, void *userdata, + void (*callback)(void *, const Expression *expr)) +{ + AstNode *node; + FOR_EACH_PTR(expression->arg_list, node) + { + assert(node->type >= EXPR_LITERAL && node->type <= EXPR_FUNCTION_CALL); + callback(userdata, (Expression *)node); + } + END_FOR_EACH_PTR(node) +} +const FunctionExpression *raviX_scope_owning_function(const Scope *scope) +{ + assert(scope->function->type == EXPR_FUNCTION); + return &scope->function->function_expr; +} +RAVICOMP_EXPORT const Scope *raviX_scope_parent_scope(const Scope *scope) +{ + return scope->parent; +} +RAVICOMP_EXPORT void raviX_scope_foreach_symbol(const Scope *scope, void *userdata, + void (*callback)(void *userdata, const LuaSymbol *symbol)) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(scope->symbol_list, symbol) { callback(userdata, symbol); } + END_FOR_EACH_PTR(node) +} +enum SymbolType raviX_symbol_type(const LuaSymbol *symbol) { return symbol->symbol_type; } +const LuaVariableSymbol *raviX_symbol_variable(const LuaSymbol *symbol) +{ + assert(symbol->symbol_type == SYM_GLOBAL || symbol->symbol_type == SYM_LOCAL); + return &symbol->variable; +} +const LuaUpvalueSymbol *raviX_symbol_upvalue(const LuaSymbol *symbol) +{ + assert(symbol->symbol_type == SYM_UPVALUE); + return &symbol->upvalue; +} +const LuaLabelSymbol *raviX_symbol_label(const LuaSymbol *symbol) +{ + assert(symbol->symbol_type == SYM_LABEL); + return &symbol->label; +} +const StringObject *raviX_label_name(const LuaLabelSymbol *symbol) { return symbol->label_name; } +const Scope *raviX_label_scope(const LuaLabelSymbol *symbol) { return symbol->block; } +const VariableType *raviX_upvalue_symbol_type(const LuaUpvalueSymbol *symbol) +{ + return &symbol->value_type; +} +const LuaVariableSymbol *raviX_upvalue_target_variable(const LuaUpvalueSymbol *symbol) +{ + if (symbol->target_variable->symbol_type == SYM_ENV) { + assert(symbol->target_function == NULL); + return NULL; + } + assert(symbol->target_variable->symbol_type == SYM_LOCAL); + return &symbol->target_variable->variable; +} +const FunctionExpression *raviX_upvalue_target_function(const LuaUpvalueSymbol *symbol) +{ + if (symbol->target_variable->symbol_type == SYM_ENV) { + assert(symbol->target_function == NULL); + return NULL; + } + assert(symbol->target_function->type == EXPR_FUNCTION); + return &symbol->target_function->function_expr; +} +unsigned raviX_upvalue_index(const LuaUpvalueSymbol *symbol) { return symbol->upvalue_index; } diff --git a/ravicomp/src/bitset.c b/ravicomp/src/bitset.c new file mode 100644 index 0000000..11c49a9 --- /dev/null +++ b/ravicomp/src/bitset.c @@ -0,0 +1,274 @@ +/* This file is a part of MIR project. + Copyright (C) 2018-2020 Vladimir Makarov . +*/ +/* + * Adapted for Ravi Compiler project + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if !defined(BITMAP_ENABLE_CHECKING) && !defined(NDEBUG) +#define BITMAP_ENABLE_CHECKING +#endif + +#ifndef BITMAP_ENABLE_CHECKING +#define BITMAP_ASSERT(EXPR, OP) ((void) (EXPR)) + +#else +static inline void mir_bitset_assert_fail (const char *op) { + fprintf (stderr, "wrong %s for a bitset", op); + assert (0); +} + +#define BITMAP_ASSERT(EXPR, OP) (void) ((EXPR) ? 0 : (mir_bitset_assert_fail (#OP), 0)) + +#endif + +#define BITMAP_WORD_BITS 64 + +void raviX_bitset_create2(BitSet *bm, size_t init_bits_num) { + bm->els_num = 0; + bm->size = (init_bits_num + BITMAP_WORD_BITS - 1) / BITMAP_WORD_BITS; + bm->varr = calloc(bm->size, sizeof(bitset_el_t)); +} + +void raviX_bitset_destroy(BitSet * bm) +{ + free(bm->varr); +} + +static void bitset_expand (BitSet * bm, size_t nb) { + size_t new_len = (nb + BITMAP_WORD_BITS - 1) / BITMAP_WORD_BITS; + if (new_len > bm->els_num) { + if (new_len > bm->size) { + bm->varr = raviX_realloc_array(bm->varr, sizeof(bitset_el_t), bm->size, new_len); + bm->size = new_len; + } + bm->els_num = new_len; + } +} + +int raviX_bitset_bit_p(const BitSet * bm, size_t nb) { + size_t nw, sh, len = bm->els_num; + bitset_el_t *addr = bm->varr; + + if (nb >= BITMAP_WORD_BITS * len) return 0; + nw = nb / BITMAP_WORD_BITS; + sh = nb % BITMAP_WORD_BITS; + return (addr[nw] >> sh) & 1; +} + +/* Set the given bit to 1, and return true if the bit was previously unset, i.e. + * this set caused bit to change from 0 to 1 + */ +int raviX_bitset_set_bit_p(BitSet * bm, size_t bit) { + size_t nw, sh; + bitset_el_t *addr; + int res; + + bitset_expand (bm, bit + 1); + addr = bm->varr; + nw = bit / BITMAP_WORD_BITS; + sh = bit % BITMAP_WORD_BITS; + res = ((addr[nw] >> sh) & 1) == 0; /* Was this bit previously unset? */ + assert(nw < bm->els_num); + addr[nw] |= (bitset_el_t) 1 << sh; + return res; +} + +int raviX_bitset_clear_bit_p(BitSet * bm, size_t nb) { + size_t nw, sh, len = bm->els_num; + bitset_el_t *addr = bm->varr; + int res; + + if (nb >= BITMAP_WORD_BITS * len) return 0; + nw = nb / BITMAP_WORD_BITS; + sh = nb % BITMAP_WORD_BITS; + res = (addr[nw] >> sh) & 1; + addr[nw] &= ~((bitset_el_t) 1 << sh); + return res; +} + +int raviX_bitset_set_or_clear_bit_range_p(BitSet * bm, size_t nb, size_t len, int set_p) { + size_t nw, lsh, rsh, range_len; + bitset_el_t mask, *addr; + int res = 0; + + bitset_expand (bm, nb + len); + addr = bm->varr; + while (len > 0) { + nw = nb / BITMAP_WORD_BITS; + lsh = nb % BITMAP_WORD_BITS; + rsh = len >= BITMAP_WORD_BITS - lsh ? 0 : BITMAP_WORD_BITS - (nb + len) % BITMAP_WORD_BITS; + mask = ((~(bitset_el_t) 0) >> (rsh + lsh)) << lsh; + if (set_p) { + res |= (~addr[nw] & mask) != 0; + addr[nw] |= mask; + } else { + res |= (addr[nw] & mask) != 0; + addr[nw] &= ~mask; + } + range_len = BITMAP_WORD_BITS - rsh - lsh; + len -= range_len; + nb += range_len; + } + return res; +} + +void raviX_bitset_copy(BitSet * dst, const BitSet * src) { + + size_t dst_len = dst->els_num; + size_t src_len = src->els_num; + + if (dst_len >= src_len) + dst->els_num = src_len; + else + bitset_expand (dst, src_len * BITMAP_WORD_BITS); + memcpy (dst->varr, src->varr, + src_len * sizeof (bitset_el_t)); +} + +int raviX_bitset_equal_p(const BitSet * bm1, const BitSet * bm2) { + const BitSet * temp_bm; + size_t i, temp_len, bm1_len = bm1->els_num; + size_t bm2_len = bm2->els_num; + bitset_el_t *addr1, *addr2; + + if (bm1_len > bm2_len) { + temp_bm = bm1; + bm1 = bm2; + bm2 = temp_bm; + temp_len = bm1_len; + bm1_len = bm2_len; + bm2_len = temp_len; + } + addr1 = bm1->varr; + addr2 = bm2->varr; + if (memcmp (addr1, addr2, bm1_len * sizeof (bitset_el_t)) != 0) return false; + for (i = bm1_len; i < bm2_len; i++) + if (addr2[i] != 0) return false; + return true; +} + +int raviX_bitset_intersect_p(const BitSet * bm1, const BitSet * bm2) { + size_t i, min_len, bm1_len = bm1->els_num; + size_t bm2_len = bm2->els_num; + bitset_el_t *addr1 = bm1->varr; + bitset_el_t *addr2 = bm2->varr; + + min_len = bm1_len <= bm2_len ? bm1_len : bm2_len; + for (i = 0; i < min_len; i++) + if ((addr1[i] & addr2[i]) != 0) return true; + return false; +} + +int raviX_bitset_empty_p(const BitSet * bm) { + size_t i, len = bm->els_num; + bitset_el_t *addr = bm->varr; + + for (i = 0; i < len; i++) + if (addr[i] != 0) return false; + return true; +} + +static bitset_el_t bitset_el_max2 (bitset_el_t el1, bitset_el_t el2) { + return el1 < el2 ? el2 : el1; +} + +static bitset_el_t bitset_el_max3 (bitset_el_t el1, bitset_el_t el2, bitset_el_t el3) { + if (el1 <= el2) return el2 < el3 ? el3 : el2; + return el1 < el3 ? el3 : el1; +} + +/* Return the number of bits set in BM. */ +size_t raviX_bitset_bit_count(const BitSet * bm) { + size_t i, len = bm->els_num; + bitset_el_t el, *addr = bm->varr; + size_t count = 0; + + for (i = 0; i < len; i++) { + if ((el = addr[i]) != 0) { + for (; el != 0; el >>= 1) + if (el & 1) count++; + } + } + return count; +} + +int raviX_bitset_op2(BitSet * dst, const BitSet * src1, const BitSet * src2, + bitset_el_t (*op) (bitset_el_t, bitset_el_t)) { + size_t i, len, bound, src1_len, src2_len; + bitset_el_t old, *dst_addr, *src1_addr, *src2_addr; + int change_p = false; + + src1_len = src1->els_num; + src2_len = src2->els_num; + len = bitset_el_max2 (src1_len, src2_len); + bitset_expand (dst, len * BITMAP_WORD_BITS); + dst_addr = dst->varr; + src1_addr = src1->varr; + src2_addr = src2->varr; + for (bound = i = 0; i < len; i++) { + old = dst_addr[i]; + if ((dst_addr[i] = op (i >= src1_len ? 0 : src1_addr[i], i >= src2_len ? 0 : src2_addr[i])) + != 0) + bound = i + 1; + if (old != dst_addr[i]) change_p = true; + } + dst->els_num = bound; + return change_p; +} + +int raviX_bitset_op3(BitSet * dst, const BitSet * src1, const BitSet * src2, + const BitSet * src3, bitset_el_t (*op) (bitset_el_t, bitset_el_t, bitset_el_t)) { + size_t i, len, bound, src1_len, src2_len, src3_len; + bitset_el_t old, *dst_addr, *src1_addr, *src2_addr, *src3_addr; + int change_p = false; + + src1_len = src1->els_num; + src2_len = src2->els_num; + src3_len = src3->els_num; + len = bitset_el_max3 (src1_len, src2_len, src3_len); + bitset_expand (dst, len * BITMAP_WORD_BITS); + dst_addr = dst->varr; + src1_addr = src1->varr; + src2_addr = src2->varr; + src3_addr = src3->varr; + for (bound = i = 0; i < len; i++) { + old = dst_addr[i]; + if ((dst_addr[i] = op (i >= src1_len ? 0 : src1_addr[i], i >= src2_len ? 0 : src2_addr[i], + i >= src3_len ? 0 : src3_addr[i])) + != 0) + bound = i + 1; + if (old != dst_addr[i]) change_p = true; + } + dst->els_num = bound; + return change_p; +} + +int raviX_bitset_iterator_next(BitSetIterator *iter, size_t *nbit) { + const size_t el_bits_num = sizeof (bitset_el_t) * CHAR_BIT; + size_t curr_nel = iter->nbit / el_bits_num, len = iter->bitset->els_num; + bitset_el_t el, *addr = iter->bitset->varr; + + for (; curr_nel < len; curr_nel++, iter->nbit = curr_nel * el_bits_num) + if ((el = addr[curr_nel]) != 0) + for (el >>= iter->nbit % el_bits_num; el != 0; el >>= 1, iter->nbit++) + if (el & 1) { + *nbit = iter->nbit++; + return true; + } + return false; +} + + + diff --git a/ravicomp/src/bitset.h b/ravicomp/src/bitset.h new file mode 100644 index 0000000..01ee10b --- /dev/null +++ b/ravicomp/src/bitset.h @@ -0,0 +1,107 @@ +/* This file is a part of MIR project. + Copyright (C) 2018-2020 Vladimir Makarov . +*/ +/* + * Adapted for Ravi Compiler project + */ + +#ifndef ravicomp_BITSET_H +#define ravicomp_BITSET_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint64_t bitset_el_t; + +typedef struct BitSet { + size_t els_num; + size_t size; + bitset_el_t *varr; +} BitSet; + +extern void raviX_bitset_create2(BitSet *, size_t init_bits_num); +static inline void raviX_bitset_create(BitSet *bm) +{ + raviX_bitset_create2(bm, 0); +} +extern void raviX_bitset_destroy(BitSet * bm); +static inline void raviX_bitset_clear(BitSet * bm) +{ + bm->els_num = 0; +} +extern int raviX_bitset_bit_p(const BitSet * bm, size_t nb); +/* Sets a bit ON and returns true if previously bit was not set */ +extern int raviX_bitset_set_bit_p(BitSet * bm, size_t bit); +extern int raviX_bitset_clear_bit_p(BitSet * bm, size_t nb); +extern int raviX_bitset_set_or_clear_bit_range_p(BitSet * bm, size_t nb, size_t len, int set_p); +static inline int raviX_bitset_set_bit_range_p(BitSet * bm, size_t nb, size_t len) { + return raviX_bitset_set_or_clear_bit_range_p(bm, nb, len, true); +} +static inline int raviX_bitset_clear_bit_range_p(BitSet * bm, size_t nb, size_t len) { + return raviX_bitset_set_or_clear_bit_range_p(bm, nb, len, false); +} +extern void raviX_bitset_copy(BitSet * dst, const BitSet * src); +extern int raviX_bitset_equal_p(const BitSet * bm1, const BitSet * bm2); +extern int raviX_bitset_intersect_p(const BitSet * bm1, const BitSet * bm2); +extern int raviX_bitset_empty_p(const BitSet * bm); +/* Return the number of bits set in BM. */ +extern size_t raviX_bitset_bit_count(const BitSet * bm); +extern int raviX_bitset_op2(BitSet * dst, const BitSet * src1, const BitSet * src2, + bitset_el_t (*op) (bitset_el_t, bitset_el_t)); +static inline bitset_el_t raviX_bitset_el_and(bitset_el_t el1, bitset_el_t el2) { return el1 & el2; } +static inline int raviX_bitset_and(BitSet * dst, BitSet * src1, BitSet * src2) { + return raviX_bitset_op2(dst, src1, src2, raviX_bitset_el_and); +} +static inline bitset_el_t raviX_bitset_el_and_compl(bitset_el_t el1, bitset_el_t el2) { + return el1 & ~el2; +} +static inline int raviX_bitset_and_compl(BitSet * dst, BitSet * src1, BitSet * src2) { + return raviX_bitset_op2(dst, src1, src2, raviX_bitset_el_and_compl); +} +static inline bitset_el_t raviX_bitset_el_ior(bitset_el_t el1, bitset_el_t el2) { return el1 | el2; } +static inline int raviX_bitset_ior(BitSet * dst, BitSet * src1, BitSet * src2) { + return raviX_bitset_op2(dst, src1, src2, raviX_bitset_el_ior); +} +int raviX_bitset_op3(BitSet * dst, const BitSet * src1, const BitSet * src2, + const BitSet * src3, bitset_el_t (*op) (bitset_el_t, bitset_el_t, bitset_el_t)); +static inline bitset_el_t raviX_bitset_el_ior_and(bitset_el_t el1, bitset_el_t el2, bitset_el_t el3) { + return el1 | (el2 & el3); +} +/* DST = SRC1 | (SRC2 & SRC3). Return true if DST changed. */ +static inline int raviX_bitset_ior_and(BitSet * dst, BitSet * src1, BitSet * src2, BitSet * src3) { + return raviX_bitset_op3(dst, src1, src2, src3, raviX_bitset_el_ior_and); +} +static inline bitset_el_t raviX_bitset_el_ior_and_compl(bitset_el_t el1, bitset_el_t el2, bitset_el_t el3) { + return el1 | (el2 & ~el3); +} +/* DST = SRC1 | (SRC2 & ~SRC3). Return true if DST changed. */ +static inline int raviX_bitset_ior_and_compl(BitSet * dst, BitSet * src1, BitSet * src2, BitSet * src3) { + return raviX_bitset_op3(dst, src1, src2, src3, raviX_bitset_el_ior_and_compl); +} + +typedef struct { + BitSet * bitset; + size_t nbit; +} BitSetIterator; +static inline void raviX_bitset_iterator_init(BitSetIterator *iter, BitSet * bitset) { + iter->bitset = bitset; + iter->nbit = 0; +} +extern int raviX_bitset_iterator_next(BitSetIterator *iter, size_t *nbit); +#define FOREACH_BITSET_BIT(iter, bitset, nbit) \ + for (raviX_bitset_iterator_init (&iter, bitset); raviX_bitset_iterator_next (&iter, &nbit);) + + + + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif + diff --git a/ravicomp/src/cfg.c b/ravicomp/src/cfg.c new file mode 100644 index 0000000..e06c13a --- /dev/null +++ b/ravicomp/src/cfg.c @@ -0,0 +1,81 @@ +/* Build CFG */ + + +#include "graph.h" +#include "cfg.h" + +#include + +/* Recursively create control flow graph for each proc + * Return 0 on success + */ +int raviX_construct_cfg(Proc *proc) +{ + Graph *g = raviX_init_graph(ENTRY_BLOCK, EXIT_BLOCK, proc); + for (unsigned i = 0; i < proc->node_count; i++) { + BasicBlock *block = proc->nodes[i]; + Instruction *insn = raviX_last_instruction(block); + if (insn == NULL) + continue; + if (insn->opcode == op_br || insn->opcode == op_cbr || insn->opcode == op_ret) { + Pseudo *pseudo; + FOR_EACH_PTR(insn->targets, pseudo) + { + assert(pseudo->type == PSEUDO_BLOCK); + raviX_add_edge(g, block->index, pseudo->block->index); + } + END_FOR_EACH_PTR(pseudo) + } else { + return 1; + } + } + proc->cfg = g; + Proc *childproc; + FOR_EACH_PTR(proc->procs, childproc) + { + if (raviX_construct_cfg(childproc) != 0) + return 1; + } + END_FOR_EACH_PTR(childproc) + return 0; +} + +struct CfgArg { + FILE *fp; + Proc *proc; +}; + +static void output_node(void *arg, Graph *g, uint32_t nodeid) +{ + struct CfgArg *myargs = (struct CfgArg *)arg; + FILE *fp = myargs->fp; + Proc *proc = myargs->proc; + GraphNodeList *successors = raviX_successors(raviX_graph_node(g, nodeid)); + if (!successors) + return; + BasicBlock *block = proc->nodes[nodeid]; + if (raviX_ptrlist_size((const struct ptr_list *)block->insns) > 0) { + TextBuffer buf; + raviX_buffer_init(&buf, 1024); + raviX_output_basic_block_as_table(proc, block, &buf); + fprintf(fp, "L%d [shape=none, margin=0, label=<%s>];\n", nodeid, raviX_buffer_data(&buf)); + raviX_buffer_free(&buf); + } + for (unsigned i = 0; i < raviX_node_list_size(successors); i++) { + fprintf(fp, "L%d -> L%d\n", nodeid, raviX_node_list_at(successors, i)); + } + Proc *childproc; + FOR_EACH_PTR(proc->procs, childproc) { raviX_output_cfg(childproc, fp); } + END_FOR_EACH_PTR(childproc) +} + +void raviX_output_cfg(Proc *proc, FILE *fp) +{ + Graph *g = proc->cfg; + if (!g) + return; + fprintf(fp, "digraph Proc%d {\n", proc->id); + struct CfgArg args = {.proc = proc, .fp = fp}; + raviX_for_each_node(g, output_node, &args); + fprintf(fp, "}\n"); +} \ No newline at end of file diff --git a/ravicomp/src/cfg.h b/ravicomp/src/cfg.h new file mode 100644 index 0000000..e93d478 --- /dev/null +++ b/ravicomp/src/cfg.h @@ -0,0 +1,11 @@ +#ifndef ravicomp_CFG_H +#define ravicomp_CFG_H + +#include "linearizer.h" + +#include + +int raviX_construct_cfg(Proc *proc); +void raviX_output_cfg(Proc *proc, FILE *fp); + +#endif diff --git a/ravicomp/src/codegen.c b/ravicomp/src/codegen.c new file mode 100644 index 0000000..12d3c12 --- /dev/null +++ b/ravicomp/src/codegen.c @@ -0,0 +1,2688 @@ +/* + * Convert the linear IR to C code that can be compiled and + * executed by Ravi VM. + */ + +#include "codegen.h" +#include "ravi_api.h" + +#include +#include + +/* + * Only 64-bits supported right now + * Following must be kept in sync with changes in the actual header files + */ + +static const char Lua_header[] = + "#ifdef __MIRC__\n" + "typedef __SIZE_TYPE__ size_t;\n" + "typedef __PTRDIFF_TYPE__ ptrdiff_t;\n" + "typedef __INTPTR_TYPE__ intptr_t;\n" + "typedef __INT64_TYPE__ int64_t;\n" + "typedef __UINT64_TYPE__ uint64_t;\n" + "typedef __INT32_TYPE__ int32_t;\n" + "typedef __UINT32_TYPE__ uint32_t;\n" + "typedef __INT16_TYPE__ int16_t;\n" + "typedef __UINT16_TYPE__ uint16_t;\n" + "typedef __INT8_TYPE__ int8_t;\n" + "typedef __UINT8_TYPE__ uint8_t;\n" + "#define NULL ((void *)0)\n" + "#define EXPORT\n" + "#else\n" + "#include \n" + "#include \n" + "#ifdef _WIN32\n" + "#define EXPORT __declspec(dllexport)\n" + "#else\n" + "#define EXPORT\n" + "#endif\n" + "#endif\n" + "typedef size_t lu_mem;\n" + "typedef unsigned char lu_byte;\n" + "typedef uint16_t LuaType;\n" + "typedef struct lua_State lua_State;\n" + "#define LUA_TNONE (-1)\n" + "#define LUA_TNIL 0\n" + "#define LUA_TBOOLEAN 1\n" + "#define LUA_TLIGHTUSERDATA 2\n" + "#define LUA_TNUMBER 3\n" + "#define LUA_TSTRING 4\n" + "#define LUA_TTABLE 5\n" + "#define LUA_TFUNCTION 6\n" + "#define LUA_TUSERDATA 7\n" + "#define LUA_TTHREAD 8\n" + "#define LUA_OK 0\n" + "typedef enum {TM_INDEX,TM_NEWINDEX,TM_GC,\n" + " TM_MODE,TM_LEN,TM_EQ,TM_ADD,TM_SUB,TM_MUL,\n" + " TM_MOD,TM_POW,TM_DIV,TM_IDIV,TM_BAND,TM_BOR,\n" + " TM_BXOR,TM_SHL,TM_SHR,TM_UNM,TM_BNOT,TM_LT,\n" + " TM_LE,TM_CONCAT,TM_CALL,TM_N\n" + "} TMS;\n" + "typedef double lua_Number;\n" + "typedef int64_t lua_Integer;\n" + "typedef uint64_t lua_Unsigned;\n" + "typedef int (*lua_CFunction) (lua_State *L);\n" + "typedef union {\n" + " lua_Number n;\n" + " double u;\n" + " void *s;\n" + " lua_Integer i;\n" + " long l;\n" + "} L_Umaxalign;\n" + "#define lua_assert(c) ((void)0)\n" + "#define check_exp(c,e) (e)\n" + "#define lua_longassert(c) ((void)0)\n" + "#define luai_apicheck(l,e) lua_assert(e)\n" + "#define api_check(l,e,msg) luai_apicheck(l,(e) && msg)\n" + "#define UNUSED(x) ((void)(x))\n" + "#define cast(t, exp) ((t)(exp))\n" + "#define cast_void(i) cast(void, (i))\n" + "#define cast_byte(i) cast(lu_byte, (i))\n" + "#define cast_num(i) cast(lua_Number, (i))\n" + "#define cast_int(i) cast(int, (i))\n" + "#define cast_uchar(i) cast(unsigned char, (i))\n" + "#define l_castS2U(i) ((lua_Unsigned)(i))\n" + "#define l_castU2S(i) ((lua_Integer)(i))\n" + "#define l_noret void\n" + "typedef unsigned int Instruction;\n" + "#define luai_numidiv(L,a,b) ((void)L, l_floor(luai_numdiv(L,a,b)))\n" + "#define luai_numdiv(L,a,b) ((a)/(b))\n" + "#define luai_nummod(L,a,b,m) \\\n" + " { (m) = l_mathop(fmod)(a,b); if ((m)*(b) < 0) (m) += (b); }\n" + "#define LUA_TLCL (LUA_TFUNCTION | (0 << 4))\n" + "#define LUA_TLCF (LUA_TFUNCTION | (1 << 4))\n" + "#define LUA_TCCL (LUA_TFUNCTION | (2 << 4))\n" + "#define RAVI_TFCF (LUA_TFUNCTION | (4 << 4))\n" + "#define LUA_TSHRSTR (LUA_TSTRING | (0 << 4))\n" + "#define LUA_TLNGSTR (LUA_TSTRING | (1 << 4))\n" + "#define LUA_TNUMFLT (LUA_TNUMBER | (0 << 4))\n" + "#define LUA_TNUMINT (LUA_TNUMBER | (1 << 4))\n" + "#define RAVI_TIARRAY (LUA_TTABLE | (1 << 4))\n" + "#define RAVI_TFARRAY (LUA_TTABLE | (2 << 4))\n" + "#define BIT_ISCOLLECTABLE (1 << 15)\n" + "#define ctb(t) ((t) | BIT_ISCOLLECTABLE)\n" + "typedef struct GCObject GCObject;\n" + "#define CommonHeader GCObject *next; lu_byte tt; lu_byte marked\n" + "struct GCObject {\n" + " CommonHeader;\n" + "};\n" + "typedef union Value {\n" + " GCObject *gc;\n" + " void *p;\n" + " int b;\n" + " lua_CFunction f;\n" + " lua_Integer i;\n" + " lua_Number n;\n" + "} Value;\n" + "#define TValuefields Value value_; LuaType tt_\n" + "typedef struct lua_TValue {\n" + " TValuefields;\n" + "} TValue;\n" + "#define NILCONSTANT {NULL}, LUA_TNIL\n" + "#define val_(o) ((o)->value_)\n" + "#define rttype(o) ((o)->tt_)\n" + "#define novariant(x) ((x) & 0x0F)\n" + "#define ttype(o) (rttype(o) & 0x7F)\n" + "#define ttnov(o) (novariant(rttype(o)))\n" + "#define checktag(o,t) (rttype(o) == (t))\n" + "#define checktype(o,t) (ttnov(o) == (t))\n" + "#define ttisnumber(o) checktype((o), LUA_TNUMBER)\n" + "#define ttisfloat(o) checktag((o), LUA_TNUMFLT)\n" + "#define ttisinteger(o) checktag((o), LUA_TNUMINT)\n" + "#define ttisnil(o) checktag((o), LUA_TNIL)\n" + "#define ttisboolean(o) checktag((o), LUA_TBOOLEAN)\n" + "#define ttislightuserdata(o) checktag((o), LUA_TLIGHTUSERDATA)\n" + "#define ttisstring(o) checktype((o), LUA_TSTRING)\n" + "#define ttisshrstring(o) checktag((o), ctb(LUA_TSHRSTR))\n" + "#define ttislngstring(o) checktag((o), ctb(LUA_TLNGSTR))\n" + "#define ttistable(o) checktype((o), LUA_TTABLE)\n" + "#define ttisiarray(o) checktag((o), ctb(RAVI_TIARRAY))\n" + "#define ttisfarray(o) checktag((o), ctb(RAVI_TFARRAY))\n" + "#define ttisarray(o) (ttisiarray(o) || ttisfarray(o))\n" + "#define ttisLtable(o) checktag((o), ctb(LUA_TTABLE))\n" + "#define ttisfunction(o) checktype(o, LUA_TFUNCTION)\n" + "#define ttisclosure(o) ((rttype(o) & 0x1F) == LUA_TFUNCTION)\n" + "#define ttisCclosure(o) checktag((o), ctb(LUA_TCCL))\n" + "#define ttisLclosure(o) checktag((o), ctb(LUA_TLCL))\n" + "#define ttislcf(o) checktag((o), LUA_TLCF)\n" + "#define ttisfcf(o) (ttype(o) == RAVI_TFCF)\n" + "#define ttisfulluserdata(o) checktag((o), ctb(LUA_TUSERDATA))\n" + "#define ttisthread(o) checktag((o), ctb(LUA_TTHREAD))\n" + "#define ttisdeadkey(o) checktag((o), LUA_TDEADKEY)\n" + "#define ivalue(o) check_exp(ttisinteger(o), val_(o).i)\n" + "#define fltvalue(o) check_exp(ttisfloat(o), val_(o).n)\n" + "#define nvalue(o) check_exp(ttisnumber(o), \\\n" + " (ttisinteger(o) ? cast_num(ivalue(o)) : fltvalue(o)))\n" + "#define gcvalue(o) check_exp(iscollectable(o), val_(o).gc)\n" + "#define pvalue(o) check_exp(ttislightuserdata(o), val_(o).p)\n" + "#define tsvalue(o) check_exp(ttisstring(o), gco2ts(val_(o).gc))\n" + "#define uvalue(o) check_exp(ttisfulluserdata(o), gco2u(val_(o).gc))\n" + "#define clvalue(o) check_exp(ttisclosure(o), gco2cl(val_(o).gc))\n" + "#define clLvalue(o) check_exp(ttisLclosure(o), gco2lcl(val_(o).gc))\n" + "#define clCvalue(o) check_exp(ttisCclosure(o), gco2ccl(val_(o).gc))\n" + "#define fvalue(o) check_exp(ttislcf(o), val_(o).f)\n" + "#define fcfvalue(o) check_exp(ttisfcf(o), val_(o).p)\n" + "#define hvalue(o) check_exp(ttistable(o), gco2t(val_(o).gc))\n" + "#define arrvalue(o) check_exp(ttisarray(o), gco2array(val_(o).gc))\n" + "#define arrvalue(o) check_exp(ttisarray(o), gco2array(val_(o).gc))\n" + "#define bvalue(o) check_exp(ttisboolean(o), val_(o).b)\n" + "#define thvalue(o) check_exp(ttisthread(o), gco2th(val_(o).gc))\n" + "#define deadvalue(o) check_exp(ttisdeadkey(o), cast(void *, val_(o).gc))\n" + "#define l_isfalse(o) (ttisnil(o) || (ttisboolean(o) && bvalue(o) == 0))\n" + "#define iscollectable(o) (rttype(o) & BIT_ISCOLLECTABLE)\n" + "#define righttt(obj) (ttype(obj) == gcvalue(obj)->tt)\n" + "#define checkliveness(L,obj) \\\n" + " lua_longassert(!iscollectable(obj) || \\\n" + " (righttt(obj) && (L == NULL || !isdead(G(L),gcvalue(obj)))))\n" + "#define settt_(o,t) ((o)->tt_=(t))\n" + "#define setfltvalue(obj,x) \\\n" + " { TValue *io=(obj); val_(io).n=(x); settt_(io, LUA_TNUMFLT); }\n" + "#define chgfltvalue(obj,x) \\\n" + " { TValue *io=(obj); lua_assert(ttisfloat(io)); val_(io).n=(x); }\n" + "#define setivalue(obj,x) \\\n" + " { TValue *io=(obj); val_(io).i=(x); settt_(io, LUA_TNUMINT); }\n" + "#define chgivalue(obj,x) \\\n" + " { TValue *io=(obj); lua_assert(ttisinteger(io)); val_(io).i=(x); }\n" + "#define setnilvalue(obj) settt_(obj, LUA_TNIL)\n" + "#define setfvalue(obj,x) \\\n" + " { TValue *io=(obj); val_(io).f=(x); settt_(io, LUA_TLCF); }\n" + "#define setfvalue_fastcall(obj, x, tag) \\\n" + "{ \\\n" + " TValue *io = (obj); \\\n" + " lua_assert(tag >= 1 && tag < 0x80); \\\n" + " val_(io).p = (x); \\\n" + " settt_(io, ((tag << 8) | RAVI_TFCF)); \\\n" + "}\n" + "#define setpvalue(obj,x) \\\n" + " { TValue *io=(obj); val_(io).p=(x); settt_(io, LUA_TLIGHTUSERDATA); }\n" + "#define setbvalue(obj,x) \\\n" + " { TValue *io=(obj); val_(io).b=(x); settt_(io, LUA_TBOOLEAN); }\n" + "#define setgcovalue(L,obj,x) \\\n" + " { TValue *io = (obj); GCObject *i_g=(x); \\\n" + " val_(io).gc = i_g; settt_(io, ctb(i_g->tt)); }\n" + "#define setsvalue(L,obj,x) \\\n" + " { TValue *io = (obj); TString *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(x_->tt)); \\\n" + " checkliveness(L,io); }\n" + "#define setuvalue(L,obj,x) \\\n" + " { TValue *io = (obj); Udata *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(LUA_TUSERDATA)); \\\n" + " checkliveness(L,io); }\n" + "#define setthvalue(L,obj,x) \\\n" + " { TValue *io = (obj); lua_State *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(LUA_TTHREAD)); \\\n" + " checkliveness(L,io); }\n" + "#define setclLvalue(L,obj,x) \\\n" + " { TValue *io = (obj); LClosure *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(LUA_TLCL)); \\\n" + " checkliveness(L,io); }\n" + "#define setclCvalue(L,obj,x) \\\n" + " { TValue *io = (obj); CClosure *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(LUA_TCCL)); \\\n" + " checkliveness(L,io); }\n" + "#define sethvalue(L,obj,x) \\\n" + " { TValue *io = (obj); Table *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(LUA_TTABLE)); \\\n" + " checkliveness(L,io); }\n" + "#define setiarrayvalue(L,obj,x) \\\n" + " { TValue *io = (obj); Table *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(RAVI_TIARRAY)); \\\n" + " checkliveness(L,io); }\n" + "#define setfarrayvalue(L,obj,x) \\\n" + " { TValue *io = (obj); Table *x_ = (x); \\\n" + " val_(io).gc = obj2gco(x_); settt_(io, ctb(RAVI_TFARRAY)); \\\n" + " checkliveness(L,io); }\n" + "#define setdeadvalue(obj) settt_(obj, LUA_TDEADKEY)\n" + "#define setobj(L,obj1,obj2) \\\n" + // NOTE we cannot use aggregate assign so following assigns by field but assumes + // n covers all value types + " { TValue *io1=(obj1); const TValue *io2=(obj2); io1->tt_ = io2->tt_; val_(io1).n = val_(io2).n; \\\n" + " (void)L; checkliveness(L,io1); }\n" + "#define setobjs2s setobj\n" + "#define setobj2s setobj\n" + "#define setsvalue2s setsvalue\n" + "#define sethvalue2s sethvalue\n" + "#define setptvalue2s setptvalue\n" + "#define setobjt2t setobj\n" + "#define setobj2n setobj\n" + "#define setsvalue2n setsvalue\n" + "#define setobj2t setobj\n" + "typedef TValue *StkId;\n" + "typedef struct TString {\n" + " CommonHeader;\n" + " lu_byte extra;\n" + " lu_byte shrlen;\n" + " unsigned int hash;\n" + " union {\n" + " size_t lnglen;\n" + " struct TString *hnext;\n" + " } u;\n" + "} TString;\n" + "typedef union UTString {\n" + " L_Umaxalign dummy;\n" + " TString tsv;\n" + "} UTString;\n" + "#define getstr(ts) \\\n" + " check_exp(sizeof((ts)->extra), cast(char *, (ts)) + sizeof(UTString))\n" + "#define svalue(o) getstr(tsvalue(o))\n" + "#define tsslen(s) ((s)->tt == LUA_TSHRSTR ? (s)->shrlen : (s)->u.lnglen)\n" + "#define vslen(o) tsslen(tsvalue(o))\n" + "typedef struct Udata {\n" + " CommonHeader;\n" + " LuaType ttuv_;\n" + " struct Table *metatable;\n" + " size_t len;\n" + " union Value user_;\n" + "} Udata;\n" + "typedef union UUdata {\n" + " L_Umaxalign dummy;\n" + " Udata uv;\n" + "} UUdata;\n" + "#define getudatamem(u) \\\n" + " check_exp(sizeof((u)->ttuv_), (cast(char*, (u)) + sizeof(UUdata)))\n" + "#define setuservalue(L,u,o) \\\n" + " { const TValue *io=(o); Udata *iu = (u); \\\n" + " iu->user_ = io->value_; iu->ttuv_ = rttype(io); \\\n" + " checkliveness(L,io); }\n" + "#define getuservalue(L,u,o) \\\n" + " { TValue *io=(o); const Udata *iu = (u); \\\n" + " io->value_ = iu->user_; settt_(io, iu->ttuv_); \\\n" + " checkliveness(L,io); }\n" + "typedef enum {\n" + " RAVI_TANY = 0,\n" + " RAVI_TNUMINT = 1,\n" + " RAVI_TNUMFLT,\n" + " RAVI_TARRAYINT,\n" + " RAVI_TARRAYFLT,\n" + " RAVI_TFUNCTION,\n" + " RAVI_TTABLE,\n" + " RAVI_TSTRING,\n" + " RAVI_TNIL,\n" + " RAVI_TBOOLEAN,\n" + " RAVI_TUSERDATA\n" + "} ravitype_t;\n" + "typedef struct Upvaldesc {\n" + " TString *name;\n" + " TString *usertype;\n" + " lu_byte ravi_type;\n" + " lu_byte instack;\n" + " lu_byte idx;\n" + "} Upvaldesc;\n" + "typedef struct LocVar {\n" + " TString *varname;\n" + " TString *usertype;\n" + " int startpc;\n" + " int endpc;\n" + " lu_byte ravi_type;\n" + "} LocVar;\n" + "typedef enum {\n" + " RAVI_JIT_NOT_COMPILED = 0,\n" + " RAVI_JIT_CANT_COMPILE = 1,\n" + " RAVI_JIT_COMPILED = 2\n" + "} ravi_jit_status_t;\n" + "typedef enum {\n" + " RAVI_JIT_FLAG_NONE = 0,\n" + " RAVI_JIT_FLAG_HASFORLOOP = 1\n" + "} ravi_jit_flag_t;\n" + "typedef struct RaviJITProto {\n" + " lu_byte jit_status;\n" + " lu_byte jit_flags;\n" + " unsigned short execution_count;\n" + " void *jit_data;\n" + " lua_CFunction jit_function;\n" + "} RaviJITProto;\n" + "typedef struct Proto {\n" + " CommonHeader;\n" + " lu_byte numparams;\n" + " lu_byte is_vararg;\n" + " lu_byte maxstacksize;\n" + " int sizeupvalues;\n" + " int sizek;\n" + " int sizecode;\n" + " int sizelineinfo;\n" + " int sizep;\n" + " int sizelocvars;\n" + " int linedefined;\n" + " int lastlinedefined;\n" + " TValue *k;\n" + " Instruction *code;\n" + " struct Proto **p;\n" + " int *lineinfo;\n" + " LocVar *locvars;\n" + " Upvaldesc *upvalues;\n" + " struct LClosure *cache;\n" + " TString *source;\n" + " GCObject *gclist;\n" + " RaviJITProto ravi_jit;\n" + "} Proto;\n" + "typedef struct UpVal UpVal;\n" + "#define ClosureHeader \\\n" + " CommonHeader; lu_byte nupvalues; GCObject *gclist\n" + "typedef struct CClosure {\n" + " ClosureHeader;\n" + " lua_CFunction f;\n" + " TValue upvalue[1];\n" + "} CClosure;\n" + "typedef struct LClosure {\n" + " ClosureHeader;\n" + " struct Proto *p;\n" + " UpVal *upvals[1];\n" + "} LClosure;\n" + "typedef union Closure {\n" + " CClosure c;\n" + " LClosure l;\n" + "} Closure;\n" + "#define isLfunction(o) ttisLclosure(o)\n" + "#define getproto(o) (clLvalue(o)->p)\n" + "typedef union TKey {\n" + " struct {\n" + " TValuefields;\n" + " int next;\n" + " } nk;\n" + " TValue tvk;\n" + "} TKey;\n" + "#define setnodekey(L,key,obj) \\\n" + " { TKey *k_=(key); const TValue *io_=(obj); \\\n" + " k_->nk.value_ = io_->value_; k_->nk.tt_ = io_->tt_; \\\n" + " (void)L; checkliveness(L,io_); }\n" + "typedef struct Node {\n" + " TValue i_val;\n" + " TKey i_key;\n" + "} Node;\n" + "typedef enum RaviArrayModifer {\n" + " RAVI_ARRAY_SLICE = 1,\n" + " RAVI_ARRAY_FIXEDSIZE = 2,\n" + " RAVI_ARRAY_ALLOCATED = 4,\n" + " RAVI_ARRAY_ISFLOAT = 8\n" + "} RaviArrayModifier;\n" + "enum {\n" + " RAVI_ARRAY_MAX_INLINE = 3,\n" + "};\n" + "typedef struct RaviArray {\n" + " CommonHeader;\n" + " lu_byte flags;\n" + " unsigned int len;\n" + " unsigned int size;\n" + " union {\n" + " lua_Number numarray[RAVI_ARRAY_MAX_INLINE];\n" + " lua_Integer intarray[RAVI_ARRAY_MAX_INLINE];\n" + " struct RaviArray* parent;\n" + " };\n" + " char *data;\n" + " struct Table *metatable;\n" + "} RaviArray;\n" + "typedef struct Table {\n" + " CommonHeader;\n" + " lu_byte flags;\n" + " lu_byte lsizenode;\n" + " unsigned int sizearray;\n" + " TValue *array;\n" + " Node *node;\n" + " Node *lastfree;\n" + " struct Table *metatable;\n" + " GCObject *gclist;\n" + " unsigned int hmask;\n" + "} Table;\n" + "typedef struct Mbuffer {\n" + " char *buffer;\n" + " size_t n;\n" + " size_t buffsize;\n" + "} Mbuffer;\n" + "typedef struct stringtable {\n" + " TString **hash;\n" + " int nuse;\n" + " int size;\n" + "} stringtable;\n" + "struct lua_Debug;\n" + "typedef intptr_t lua_KContext;\n" + "typedef int(*lua_KFunction)(struct lua_State *L, int status, lua_KContext ctx);\n" + "typedef void *(*lua_Alloc)(void *ud, void *ptr, size_t osize,\n" + " size_t nsize);\n" + "typedef void(*lua_Hook)(struct lua_State *L, struct lua_Debug *ar);\n" + "typedef struct CallInfo {\n" + " StkId func;\n" + " StkId top;\n" + " struct CallInfo *previous, *next;\n" + " union {\n" + " struct {\n" + " StkId base;\n" + " const Instruction *savedpc;\n" + " } l;\n" + " struct {\n" + " lua_KFunction k;\n" + " ptrdiff_t old_errfunc;\n" + " lua_KContext ctx;\n" + " } c;\n" + " } u;\n" + " ptrdiff_t extra;\n" + " short nresults;\n" + " unsigned short callstatus;\n" + " unsigned short stacklevel;\n" + " lu_byte jitstatus;\n" + " lu_byte magic;\n" + "} CallInfo;\n" + "#define CIST_OAH (1<<0)\n" + "#define CIST_LUA (1<<1)\n" + "#define CIST_HOOKED (1<<2)\n" + "#define CIST_FRESH (1<<3)\n" + "#define CIST_YPCALL (1<<4)\n" + "#define CIST_TAIL (1<<5)\n" + "#define CIST_HOOKYIELD (1<<6)\n" + "#define CIST_LEQ (1<<7)\n" + "#define CIST_FIN (1<<8)\n" + "#define isLua(ci) ((ci)->callstatus & CIST_LUA)\n" + "#define isJITed(ci) ((ci)->jitstatus)\n" + "#define setoah(st,v) ((st) = ((st) & ~CIST_OAH) | (v))\n" + "#define getoah(st) ((st) & CIST_OAH)\n" + "typedef struct global_State global_State;\n" + "struct lua_State {\n" + " CommonHeader;\n" + " lu_byte status;\n" + " StkId top;\n" + " global_State *l_G;\n" + " CallInfo *ci;\n" + " const Instruction *oldpc;\n" + " StkId stack_last;\n" + " StkId stack;\n" + " UpVal *openupval;\n" + " GCObject *gclist;\n" + " struct lua_State *twups;\n" + " struct lua_longjmp *errorJmp;\n" + " CallInfo base_ci;\n" + " volatile lua_Hook hook;\n" + " ptrdiff_t errfunc;\n" + " int stacksize;\n" + " int basehookcount;\n" + " int hookcount;\n" + " unsigned short nny;\n" + " unsigned short nCcalls;\n" + " lu_byte hookmask;\n" + " lu_byte allowhook;\n" + " unsigned short nci;\n" + " lu_byte magic;\n" + "};\n" + "#define G(L) (L->l_G)\n" + "union GCUnion {\n" + " GCObject gc;\n" + " struct TString ts;\n" + " struct Udata u;\n" + " union Closure cl;\n" + " struct Table h;\n" + " struct RaviArray arr;\n" + " struct Proto p;\n" + " struct lua_State th;\n" + "};\n" + "struct UpVal {\n" + " TValue *v;\n" +#ifdef RAVI_DEFER_STATEMENT + " unsigned int refcount;\n" + " unsigned int flags;\n" +#else + " lu_mem refcount;\n" +#endif + " union {\n" + " struct {\n" + " UpVal *next;\n" + " int touched;\n" + " } open;\n" + " TValue value;\n" + " } u;\n" + "};\n" + "#define cast_u(o) cast(union GCUnion *, (o))\n" + "#define gco2ts(o) \\\n" + " check_exp(novariant((o)->tt) == LUA_TSTRING, &((cast_u(o))->ts))\n" + "#define gco2u(o) check_exp((o)->tt == LUA_TUSERDATA, &((cast_u(o))->u))\n" + "#define gco2lcl(o) check_exp((o)->tt == LUA_TLCL, &((cast_u(o))->cl.l))\n" + "#define gco2ccl(o) check_exp((o)->tt == LUA_TCCL, &((cast_u(o))->cl.c))\n" + "#define gco2cl(o) \\\n" + " check_exp(novariant((o)->tt) == LUA_TFUNCTION, &((cast_u(o))->cl))\n" + "#define gco2t(o) check_exp((o)->tt == LUA_TTABLE, &((cast_u(o))->h))\n" + "#define gco2array(o) check_exp(((o)->tt == RAVI_TIARRAY || (o)->tt == RAVI_TFARRAY), &((cast_u(o))->arr))\n" + "#define gco2p(o) check_exp((o)->tt == LUA_TPROTO, &((cast_u(o))->p))\n" + "#define gco2th(o) check_exp((o)->tt == LUA_TTHREAD, &((cast_u(o))->th))\n" + "#define obj2gco(v) \\\n" + " check_exp(novariant((v)->tt) < LUA_TDEADKEY, (&(cast_u(v)->gc)))\n" + "#define LUA_FLOORN2I 0\n" + "#define tonumber(o,n) \\\n" + " (ttisfloat(o) ? (*(n) = fltvalue(o), 1) : luaV_tonumber_(o,n))\n" + "#define tointeger(o,i) \\\n" + " (ttisinteger(o) ? (*(i) = ivalue(o), 1) : luaV_tointeger(o,i,LUA_FLOORN2I))\n" + "extern int luaV_tonumber_(const TValue *obj, lua_Number *n);\n" + "extern int luaV_tointeger(const TValue *obj, lua_Integer *p, int mode);\n" +#ifdef RAVI_DEFER_STATEMENT + "extern int luaF_close (lua_State *L, StkId level, int status);\n" +#else + "extern void luaF_close (lua_State *L, StkId level);\n" +#endif + "extern int luaD_poscall (lua_State *L, CallInfo *ci, StkId firstResult, int nres);\n" + "extern void luaD_growstack (lua_State *L, int n);\n" + "extern int luaV_equalobj(lua_State *L, const TValue *t1, const TValue *t2);\n" + "extern int luaV_lessthan(lua_State *L, const TValue *l, const TValue *r);\n" + "extern int luaV_lessequal(lua_State *L, const TValue *l, const TValue *r);\n" + "extern void luaV_gettable (lua_State *L, const TValue *t, TValue *key, StkId val);\n" + "extern void luaV_settable (lua_State *L, const TValue *t, TValue *key, StkId val);\n" + "extern int luaV_execute(lua_State *L);\n" + "extern int luaD_precall (lua_State *L, StkId func, int nresults, int op_call);\n" + "extern void raviV_op_newtable(lua_State *L, CallInfo *ci, TValue *ra, int b, int c);\n" + "extern void raviV_op_newarrayint(lua_State *L, CallInfo *ci, TValue *ra);\n" + "extern void raviV_op_newarrayfloat(lua_State *L, CallInfo *ci, TValue *ra);\n" + "extern void luaO_arith (lua_State *L, int op, const TValue *p1, const TValue *p2, TValue *res);\n" + "extern void raviV_op_setlist(lua_State *L, CallInfo *ci, TValue *ra, int b, int c);\n" + "extern void raviV_op_concat(lua_State *L, CallInfo *ci, int a, int b, int c);\n" + "extern void raviV_op_closure(lua_State *L, CallInfo *ci, LClosure *cl, int a, int Bx);\n" + "extern void raviV_op_vararg(lua_State *L, CallInfo *ci, LClosure *cl, int a, int b);\n" + "extern void luaV_objlen (lua_State *L, StkId ra, const TValue *rb);\n" + "extern int luaV_forlimit(const TValue *obj, lua_Integer *p, lua_Integer step, int *stopnow);\n" + "extern void raviV_op_setupval(lua_State *L, LClosure *cl, TValue *ra, int b);\n" + "extern void raviV_op_setupvali(lua_State *L, LClosure *cl, TValue *ra, int b);\n" + "extern void raviV_op_setupvalf(lua_State *L, LClosure *cl, TValue *ra, int b);\n" + "extern void raviV_op_setupvalai(lua_State *L, LClosure *cl, TValue *ra, int b);\n" + "extern void raviV_op_setupvalaf(lua_State *L, LClosure *cl, TValue *ra, int b);\n" + "extern void raviV_op_setupvalt(lua_State *L, LClosure *cl, TValue *ra, int b);\n" + "extern void raviV_raise_error(lua_State *L, int errorcode);\n" + "extern void raviV_raise_error_with_info(lua_State *L, int errorcode, const char *info);\n" + "extern void luaD_call (lua_State *L, StkId func, int nResults);\n" + "extern void raviH_set_int(lua_State *L, RaviArray *t, lua_Unsigned key, lua_Integer value);\n" + "extern void raviH_set_float(lua_State *L, RaviArray *t, lua_Unsigned key, lua_Number value);\n" + "extern int raviV_check_usertype(lua_State *L, TString *name, const TValue *o);\n" + "extern void luaT_trybinTM (lua_State *L, const TValue *p1, const TValue *p2, TValue *res, TMS event);\n" + "extern void raviV_gettable_sskey(lua_State *L, const TValue *t, TValue *key, TValue *val);\n" + "extern void raviV_settable_sskey(lua_State *L, const TValue *t, TValue *key, TValue *val);\n" + "extern void raviV_gettable_i(lua_State *L, const TValue *t, TValue *key, TValue *val);\n" + "extern void raviV_settable_i(lua_State *L, const TValue *t, TValue *key, TValue *val);\n" +#ifdef RAVI_DEFER_STATEMENT + "extern void raviV_op_defer(lua_State *L, TValue *ra);\n" +#endif + "extern lua_Integer luaV_shiftl(lua_Integer x, lua_Integer y);\n" + "extern void ravi_dump_value(lua_State *L, const struct lua_TValue *v);\n" + "extern void raviV_op_bnot(lua_State *L, TValue *ra, TValue *rb);\n" + "extern void *luaM_realloc_ (lua_State *L, void *block, size_t osize, size_t nsize);\n" + "extern LClosure *luaF_newLclosure (lua_State *L, int n);\n" + "extern TString *luaS_newlstr (lua_State *L, const char *str, size_t l);\n" + "extern Proto *luaF_newproto (lua_State *L);\n" + "extern void luaD_inctop (lua_State *L);\n" + "#define luaM_reallocv(L,b,on,n,e) luaM_realloc_(L, (b), (on)*(e), (n)*(e))\n" + "#define luaM_newvector(L,n,t) cast(t *, luaM_reallocv(L, NULL, 0, n, sizeof(t)))\n" + "#define R(i) (base + i)\n" + "#define K(i) (k + i)\n" + "#define S(i) (stackbase + i)\n" + "#define stackoverflow(L, n) (((int)(L->top - L->stack) + (n) + 5) >= L->stacksize)\n" + "#define savestack(L,p) ((char *)(p) - (char *)L->stack)\n" + "#define restorestack(L,n) ((TValue *)((char *)L->stack + (n)))\n" + "#define tonumberns(o,n) \\\n" + " (ttisfloat(o) ? ((n) = fltvalue(o), 1) : \\\n" + " (ttisinteger(o) ? ((n) = cast_num(ivalue(o)), 1) : 0))\n" + "#define intop(op,v1,v2) l_castU2S(l_castS2U(v1) op l_castS2U(v2))\n" + "#define nan (0./0.)\n" + "#define inf (1./0.)\n" + "#define luai_numunm(L,a) (-(a))\n"; + +struct function { + Proc *proc; + TextBuffer prologue; + TextBuffer body; + struct Ravi_CompilerInterface *api; +}; + +/* readonly statics */ +static const char *int_var_prefix = "i_"; +static const char *flt_var_prefix = "f_"; +// static Pseudo NIL_pseudo = {.type = PSEUDO_NIL}; + +enum errorcode { + Error_integer_expected, + Error_number_expected, + Error_integer_array_expected, + Error_number_array_expected, + Error_table_expected, + Error_upval_needs_integer, + Error_upval_needs_number, + Error_upval_needs_integer_array, + Error_upval_needs_number_array, + Error_upval_needs_table, + Error_for_limit_must_be_number, + Error_for_step_must_be_number, + Error_for_initial_value_must_be_number, + Error_array_out_of_bounds, + Error_string_expected, + Error_closure_expected, + Error_type_mismatch, +}; + +// Opcodes used by luaO_arith +enum { + LUA_OPADD = 0, /* ORDER TM, ORDER OP */ + LUA_OPSUB = 1, + LUA_OPMUL = 2, + LUA_OPMOD = 3, + LUA_OPPOW = 4, + LUA_OPDIV = 5, + LUA_OPIDIV = 6, + LUA_OPBAND = 7, + LUA_OPBOR = 8, + LUA_OPBXOR = 9, + LUA_OPSHL = 10, + LUA_OPSHR = 11, + LUA_OPUNM = 12, + LUA_OPBNOT = 13 +}; + +static inline Pseudo *get_operand(Instruction *insn, unsigned idx) +{ + return (Pseudo *)raviX_ptrlist_nth_entry((struct ptr_list *)insn->operands, idx); +} + +static inline Pseudo *get_first_operand(Instruction *insn) +{ + return (Pseudo *)raviX_ptrlist_first((struct ptr_list *)insn->operands); +} + +static inline Pseudo *get_last_operand(Instruction *insn) +{ + return (Pseudo *)raviX_ptrlist_last((struct ptr_list *)insn->operands); +} + +static inline Pseudo *get_target(Instruction *insn, unsigned idx) +{ + return (Pseudo *)raviX_ptrlist_nth_entry((struct ptr_list *)insn->targets, idx); +} + +static inline Pseudo *get_first_target(Instruction *insn) +{ + return (Pseudo *)raviX_ptrlist_first((struct ptr_list *)insn->targets); +} + +static inline Pseudo *get_last_target(Instruction *insn) +{ + return (Pseudo *)raviX_ptrlist_last((struct ptr_list *)insn->targets); +} + +static inline unsigned get_num_operands(Instruction *insn) +{ + return raviX_ptrlist_size((const struct ptr_list *)insn->operands); +} + +static inline unsigned get_num_targets(Instruction *insn) +{ + return raviX_ptrlist_size((const struct ptr_list *)insn->targets); +} + +static inline unsigned get_num_instructions(BasicBlock *bb) +{ + return raviX_ptrlist_size((const struct ptr_list *)bb->insns); +} + +static inline unsigned get_num_childprocs(Proc *proc) +{ + return raviX_ptrlist_size((const struct ptr_list *)proc->procs); +} + +/** + * Helper to generate a list of primitive C variables representing temp int/float values. + */ +static void emit_vars(const char *type, const char *prefix, PseudoGenerator *gen, TextBuffer *mb) +{ + if (gen->next_reg == 0) + return; + for (unsigned i = 0; i < gen->next_reg; i++) { + if (i == 0) { + raviX_buffer_add_fstring(mb, "%s ", type); + } + if (i > 0) { + raviX_buffer_add_string(mb, " = 0, "); + } + raviX_buffer_add_fstring(mb, "%s%d", prefix, i); + } + raviX_buffer_add_string(mb, " = 0;\n"); +} + +static void emit_varname(const Pseudo *pseudo, TextBuffer *mb) +{ + if (pseudo->type == PSEUDO_TEMP_INT || pseudo->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_fstring(mb, "%s%d", int_var_prefix, pseudo->regnum); + } else if (pseudo->type == PSEUDO_TEMP_FLT) { + raviX_buffer_add_fstring(mb, "%s%d", flt_var_prefix, pseudo->regnum); + } else { + fprintf(stderr, "Unexpected pseudo type %d\n", pseudo->type); + assert(0); + } +} + +static void initfn(struct function *fn, Proc *proc, struct Ravi_CompilerInterface *api) +{ + fn->proc = proc; + fn->api = api; + /* Set a name that can be used later to retrieve the compiled code */ + snprintf(proc->funcname, sizeof proc->funcname, "__ravifunc_%d", proc->id); + raviX_buffer_init(&fn->prologue, 4096); + raviX_buffer_init(&fn->body, 4096); + raviX_buffer_add_fstring(&fn->prologue, "static int %s(lua_State *L) {\n", proc->funcname); + raviX_buffer_add_string(&fn->prologue, "int error_code = 0;\n"); + raviX_buffer_add_string(&fn->prologue, "int result = 0;\n"); + raviX_buffer_add_string(&fn->prologue, "CallInfo *ci = L->ci;\n"); + raviX_buffer_add_string(&fn->prologue, "LClosure *cl = clLvalue(ci->func);\n"); + raviX_buffer_add_string(&fn->prologue, "TValue *k = cl->p->k;\n"); + raviX_buffer_add_string(&fn->prologue, "StkId base = ci->u.l.base;\n"); + emit_vars("lua_Integer", int_var_prefix, &proc->temp_int_pseudos, &fn->prologue); + emit_vars("lua_Number", flt_var_prefix, &proc->temp_flt_pseudos, &fn->prologue); + // Following are temp dummy regs + // In ops like luaV_settable we may use up to two variables + raviX_buffer_add_string(&fn->prologue, "TValue ival0; settt_(&ival0, LUA_TNUMINT);\n"); + raviX_buffer_add_string(&fn->prologue, "TValue fval0; settt_(&fval0, LUA_TNUMFLT);\n"); + raviX_buffer_add_string(&fn->prologue, "TValue bval0; settt_(&bval0, LUA_TBOOLEAN);\n"); + raviX_buffer_add_string(&fn->prologue, "TValue ival1; settt_(&ival1, LUA_TNUMINT);\n"); + raviX_buffer_add_string(&fn->prologue, "TValue fval1; settt_(&fval1, LUA_TNUMFLT);\n"); + raviX_buffer_add_string(&fn->prologue, "TValue bval1; settt_(&bval1, LUA_TBOOLEAN);\n"); + raviX_buffer_add_string(&fn->prologue, "TValue nilval; setnilvalue(&nilval);\n"); +} + +static void cleanup(struct function *fn) +{ + raviX_buffer_free(&fn->prologue); + raviX_buffer_free(&fn->body); +} + +static void emit_reload_base(struct function *fn) { raviX_buffer_add_string(&fn->body, "base = ci->u.l.base;\n"); } + +static inline unsigned num_locals(Proc *proc) { return proc->local_pseudos.next_reg; } + +static inline unsigned num_temps(Proc *proc) { return proc->temp_pseudos.next_reg; } + +/* + * Max stack size is number of Lua vars and any temps that live on Lua stack during execution. + * Note that this is the number of slots that is known to the compiler - at runtime additional + * stack space may be needed when making function calls - that is not accounted for here. + */ +static unsigned compute_max_stack_size(Proc *proc) { return num_locals(proc) + num_temps(proc); } + +/** + * Computes the register offset from base. Input pseudo must be a local variable, + * or temp register or range register (on Lua stack) + */ +static unsigned compute_register_from_base(struct function *fn, const Pseudo *pseudo) +{ + switch (pseudo->type) { + case PSEUDO_TEMP_ANY: + case PSEUDO_RANGE: // Compute starting register + case PSEUDO_RANGE_SELECT: + // All temps start after the locals + return pseudo->regnum + num_locals(fn->proc); + case PSEUDO_SYMBOL: + if (pseudo->symbol->symbol_type == SYM_LOCAL) { + return pseudo->regnum; + } + // fallthrough + default: + assert(false); + return (unsigned)-1; + } +} + +// Check if two pseudos point to the same register +// note we cannot easily check PSEUDO_LUASTACK type because there may +// be var args between CI->func and base. So stackbase may not be base-1 always. +static bool refers_to_same_register(struct function *fn, Pseudo *src, Pseudo *dst) +{ + static bool reg_pseudos[] = { + [PSEUDO_SYMBOL] = true, /* An object of type lua_symbol representing local var or upvalue */ + [PSEUDO_TEMP_FLT] = false, /* A floating point temp - may also be used for locals that don't escape */ + [PSEUDO_TEMP_INT] = false, /* An integer temp - may also be used for locals that don't escape */ + [PSEUDO_TEMP_BOOL] = false, /* An (bool) integer temp - may also be used for locals that don't escape */ + [PSEUDO_TEMP_ANY] = true, /* A temp of any type - will always be on Lua stack */ + [PSEUDO_CONSTANT] = false, /* A literal value */ + [PSEUDO_PROC] = false, /* A proc / function */ + [PSEUDO_NIL] = false, + [PSEUDO_TRUE] = false, + [PSEUDO_FALSE] = false, + [PSEUDO_BLOCK] = false, /* Points to a basic block, used as targets for jumps */ + [PSEUDO_RANGE] = true, /* Represents a range of registers from a certain starting register */ + [PSEUDO_RANGE_SELECT] = true, /* Picks a certain register from a range */ + /* TODO we need a type for var args */ + [PSEUDO_LUASTACK] = true /* Specifies a Lua stack position - not used by linearizer - for use by codegen */ + }; + if (!reg_pseudos[src->type] || !reg_pseudos[dst->type]) + return false; + if (src->type == PSEUDO_LUASTACK || dst->type == PSEUDO_LUASTACK) { + return src->type == dst->type && src->stackidx == dst->stackidx; + } + if (src->type == PSEUDO_SYMBOL && dst->type != PSEUDO_SYMBOL) + // a temp reg can never equate local reg + return false; + if (src->type == PSEUDO_SYMBOL && dst->type == PSEUDO_SYMBOL) { + // up-values are not registers + if (src->symbol->symbol_type != SYM_LOCAL || dst->symbol->symbol_type != SYM_LOCAL) { + return false; + } + } + return compute_register_from_base(fn, src) == compute_register_from_base(fn, dst); +} + +/* +Outputs accessor for a pseudo so that the accessor is always of type +TValue *. Thus for constants, we need to use a temp stack variable of type TValue. +The issue is what happens if we need two values at the same time and both are constants +of the same type. This is where the discriminator comes in - to help differentiate. +*/ +static int emit_reg_accessor(struct function *fn, const Pseudo *pseudo, unsigned discriminator) +{ + if (pseudo->type == PSEUDO_LUASTACK) { + // Note pseudo->stackidx is relative to ci->func + // But ci->func is not always base-1 because of var args + // Therefore we need a different way to compute these + raviX_buffer_add_fstring(&fn->body, "S(%d)", pseudo->stackidx); + } else if (pseudo->type == PSEUDO_TEMP_ANY || pseudo->type == PSEUDO_RANGE || + pseudo->type == PSEUDO_RANGE_SELECT) { + // we put all temps on Lua stack after the locals + raviX_buffer_add_fstring(&fn->body, "R(%d)", compute_register_from_base(fn, pseudo)); + } else if (pseudo->type == PSEUDO_SYMBOL) { + if (pseudo->symbol->symbol_type == SYM_LOCAL) { + raviX_buffer_add_fstring(&fn->body, "R(%d)", pseudo->regnum); + } else if (pseudo->symbol->symbol_type == SYM_UPVALUE) { + raviX_buffer_add_fstring(&fn->body, "cl->upvals[%d]->v", pseudo->regnum); + } else { + fn->api->error_message(fn->api->context, "Unexpected pseudo symbol type"); + assert(0); + return -1; + } + } else if (pseudo->type == PSEUDO_CONSTANT) { + if (pseudo->constant->type == RAVI_TSTRING) { + unsigned k = pseudo->constant->index; + raviX_buffer_add_fstring(&fn->body, "K(%d)", k); + } else if (pseudo->constant->type == RAVI_TNUMINT) { + raviX_buffer_add_fstring(&fn->body, "&ival%u; ival%u.value_.i = %lld", discriminator, + discriminator, pseudo->constant->i); + } else if (pseudo->constant->type == RAVI_TNUMFLT) { + raviX_buffer_add_fstring(&fn->body, "&fval%u; fval%u.value_.n = %g", discriminator, + discriminator, pseudo->constant->n); + } else if (pseudo->constant->type == RAVI_TNIL) { + raviX_buffer_add_string(&fn->body, "&nilval"); + } else if (pseudo->constant->type == RAVI_TBOOLEAN) { + raviX_buffer_add_fstring(&fn->body, "&bval%u; bval%u.value_.b = %d", discriminator, + discriminator, (int)pseudo->constant->i); + } else { + fn->api->error_message(fn->api->context, "Unexpected pseudo constant type"); + assert(0); + return -1; + } + } else if (pseudo->type == PSEUDO_TEMP_FLT) { + raviX_buffer_add_fstring(&fn->body, "&fval%u; fval%u.value_.n = ", discriminator, discriminator); + emit_varname(pseudo, &fn->body); + } else if (pseudo->type == PSEUDO_TEMP_INT) { + raviX_buffer_add_fstring(&fn->body, "&ival%u; ival%u.value_.i = ", discriminator, discriminator); + emit_varname(pseudo, &fn->body); + } else if (pseudo->type == PSEUDO_NIL) { + raviX_buffer_add_string(&fn->body, "&nilval"); + } else if (pseudo->type == PSEUDO_TRUE) { + raviX_buffer_add_fstring(&fn->body, "&bval%u; bval%u.value_.b = 1", discriminator, discriminator); + } else if (pseudo->type == PSEUDO_FALSE) { + raviX_buffer_add_fstring(&fn->body, "&bval%u; bval%u.value_.b = 0", discriminator, discriminator); + } else if (pseudo->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_fstring(&fn->body, "&bval%u; bval%u.value_.b = ", discriminator, discriminator); + emit_varname(pseudo, &fn->body); + } else { + fn->api->error_message(fn->api->context, "Unexpected pseudo type"); + assert(0); + return -1; + } + return 0; +} + +/*copy floating point value to a temporary float */ +static int emit_move_flttemp(struct function *fn, Pseudo *src, Pseudo *dst) +{ + if (src->type == PSEUDO_CONSTANT) { + if (src->constant->type == RAVI_TNUMFLT) { + emit_varname(dst, &fn->body); + raviX_buffer_add_fstring(&fn->body, " = %.16g;\n", src->constant->n); + } else if (src->constant->type == RAVI_TNUMINT) { + emit_varname(dst, &fn->body); + raviX_buffer_add_fstring(&fn->body, " = (lua_Number)%lld;\n", src->constant->i); + } else { + assert(0); + return -1; + } + } else if (src->type == PSEUDO_TEMP_FLT) { + emit_varname(dst, &fn->body); + raviX_buffer_add_string(&fn->body, " = "); + emit_varname(src, &fn->body); + raviX_buffer_add_string(&fn->body, ";\n"); + } else if (src->type == PSEUDO_LUASTACK || src->type == PSEUDO_TEMP_ANY || src->type == PSEUDO_SYMBOL) { + raviX_buffer_add_string(&fn->body, "{\nTValue *reg = "); + emit_reg_accessor(fn, src, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + emit_varname(dst, &fn->body); + raviX_buffer_add_string(&fn->body, " = fltvalue(reg);\n}\n"); + } else { + assert(0); + return -1; + } + return 0; +} + +/*copy integer value to temporary int */ +static int emit_move_inttemp(struct function *fn, Pseudo *src, Pseudo *dst) +{ + if (src->type == PSEUDO_CONSTANT) { + if (src->constant->type == RAVI_TNUMINT) { + emit_varname(dst, &fn->body); + raviX_buffer_add_fstring(&fn->body, " = %lld;\n", src->constant->i); + } else { + // FIXME can we have float value? + assert(0); + return -1; + } + } else if (src->type == PSEUDO_TEMP_INT || src->type == PSEUDO_TEMP_BOOL) { + emit_varname(dst, &fn->body); + raviX_buffer_add_string(&fn->body, " = "); + emit_varname(src, &fn->body); + raviX_buffer_add_string(&fn->body, ";\n"); + } else if (src->type == PSEUDO_LUASTACK || src->type == PSEUDO_TEMP_ANY || src->type == PSEUDO_SYMBOL) { + raviX_buffer_add_string(&fn->body, "{\nTValue *reg = "); + emit_reg_accessor(fn, src, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + emit_varname(dst, &fn->body); + raviX_buffer_add_string(&fn->body, " = ivalue(reg);\n}\n"); + } else { + assert(0); + return -1; + } + return 0; +} + +/* copy a value from source pseudo to destination pseudo.*/ +static int emit_move(struct function *fn, Pseudo *src, Pseudo *dst) +{ + if (dst->type == PSEUDO_TEMP_FLT) { + emit_move_flttemp(fn, src, dst); + } else if (dst->type == PSEUDO_TEMP_INT || dst->type == PSEUDO_TEMP_BOOL) { + emit_move_inttemp(fn, src, dst); + } else if (dst->type == PSEUDO_TEMP_ANY || dst->type == PSEUDO_SYMBOL || dst->type == PSEUDO_LUASTACK) { + if (src->type == PSEUDO_LUASTACK || src->type == PSEUDO_TEMP_ANY || src->type == PSEUDO_SYMBOL || + src->type == PSEUDO_RANGE_SELECT) { + // Only emit a move if we are not referencing the same register + if (!refers_to_same_register(fn, src, dst)) { + raviX_buffer_add_string(&fn->body, "{\nconst TValue *src_reg = "); + emit_reg_accessor(fn, src, 0); + raviX_buffer_add_string(&fn->body, ";\nTValue *dst_reg = "); + emit_reg_accessor(fn, dst, 0); + // FIXME - check value assignment approach + raviX_buffer_add_string( + &fn->body, + ";\ndst_reg->tt_ = src_reg->tt_;\ndst_reg->value_.n = src_reg->value_.n;\n}\n"); + } + } else if (src->type == PSEUDO_TEMP_INT) { + raviX_buffer_add_string(&fn->body, "{\nTValue *dst_reg = "); + emit_reg_accessor(fn, dst, 0); + raviX_buffer_add_string(&fn->body, ";\nsetivalue(dst_reg, "); + emit_varname(src, &fn->body); + raviX_buffer_add_string(&fn->body, ");\n}\n"); + } else if (src->type == PSEUDO_TEMP_FLT) { + raviX_buffer_add_string(&fn->body, "{\nTValue *dst_reg = "); + emit_reg_accessor(fn, dst, 0); + raviX_buffer_add_string(&fn->body, ";\nsetfltvalue(dst_reg, "); + emit_varname(src, &fn->body); + raviX_buffer_add_string(&fn->body, ");\n}\n"); + } else if (src->type == PSEUDO_TRUE || src->type == PSEUDO_FALSE) { + raviX_buffer_add_string(&fn->body, "{\nTValue *dst_reg = "); + emit_reg_accessor(fn, dst, 0); + raviX_buffer_add_fstring(&fn->body, ";\nsetbvalue(dst_reg, %d);\n}\n", + src->type == PSEUDO_TRUE ? 1 : 0); + } else if (src->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_string(&fn->body, "{\nTValue *dst_reg = "); + emit_reg_accessor(fn, dst, 0); + raviX_buffer_add_string(&fn->body, ";\nsetbvalue(dst_reg, "); + emit_varname(src, &fn->body); + raviX_buffer_add_string(&fn->body, ");\n}\n"); + } else if (src->type == PSEUDO_NIL) { + raviX_buffer_add_string(&fn->body, "{\nTValue *dst_reg = "); + emit_reg_accessor(fn, dst, 0); + raviX_buffer_add_string(&fn->body, ";\nsetnilvalue(dst_reg);\n}\n"); + } else if (src->type == PSEUDO_CONSTANT) { + raviX_buffer_add_string(&fn->body, "{\nTValue *dst_reg = "); + emit_reg_accessor(fn, dst, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + if (src->constant->type == RAVI_TNUMINT) { + raviX_buffer_add_fstring(&fn->body, "setivalue(dst_reg, %lld);\n", src->constant->i); + } else if (src->constant->type == RAVI_TNUMFLT) { + raviX_buffer_add_fstring(&fn->body, "setfltvalue(dst_reg, %g);\n", src->constant->n); + } else if (src->constant->type == RAVI_TBOOLEAN) { + raviX_buffer_add_fstring(&fn->body, "setbvalue(dst_reg, %i);\n", (int)src->constant->i); + } else if (src->constant->type == RAVI_TNIL) { + raviX_buffer_add_string(&fn->body, "setnilvalue(dst_reg);\n"); + } else if (src->constant->type == RAVI_TSTRING) { + raviX_buffer_add_string(&fn->body, "TValue *src_reg = "); + emit_reg_accessor(fn, src, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string( + &fn->body, + "dst_reg->tt_ = src_reg->tt_; dst_reg->value_.gc = src_reg->value_.gc;\n"); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "}\n"); + } else { + /* range pseudos not supported yet */ + assert(0); + return -1; + } + } else { + assert(0); + return -1; + } + return 0; +} + +static int emit_jump(struct function *fn, Pseudo *pseudo) +{ + assert(pseudo->type == PSEUDO_BLOCK); + raviX_buffer_add_fstring(&fn->body, "goto L%d;\n", pseudo->block->index); + return 0; +} + +static int emit_op_cbr(struct function *fn, Instruction *insn) +{ + assert(insn->opcode == op_cbr); + Pseudo *cond_pseudo = get_operand(insn, 0); + if (cond_pseudo->type == PSEUDO_FALSE || cond_pseudo->type == PSEUDO_NIL) { + emit_jump(fn, get_target(insn, 1)); + } else if (cond_pseudo->type == PSEUDO_TRUE || cond_pseudo->type == PSEUDO_CONSTANT) { + emit_jump(fn, get_target(insn, 0)); + } else if (cond_pseudo->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_string(&fn->body, "{"); + raviX_buffer_add_string(&fn->body, " if ("); + emit_varname(cond_pseudo, &fn->body); + raviX_buffer_add_fstring(&fn->body, " != 0) goto L%d;", get_target(insn, 0)->block->index); + raviX_buffer_add_fstring(&fn->body, " else goto L%d; ", get_target(insn, 1)->block->index); + raviX_buffer_add_string(&fn->body, "}\n"); + } else if (cond_pseudo->type == PSEUDO_TEMP_ANY || cond_pseudo->type == PSEUDO_SYMBOL) { + raviX_buffer_add_string(&fn->body, "{\nconst TValue *src_reg = "); + emit_reg_accessor(fn, cond_pseudo, 0); + raviX_buffer_add_fstring(&fn->body, ";\nif (!l_isfalse(src_reg)) goto L%d;\n", + get_target(insn, 0)->block->index); + raviX_buffer_add_fstring(&fn->body, "else goto L%d;\n", get_target(insn, 1)->block->index); + raviX_buffer_add_string(&fn->body, "}\n"); + } else { + assert(0); + return -1; + } + return 0; +} + +static int emit_op_br(struct function *fn, Instruction *insn) +{ + assert(insn->opcode == op_br); + return emit_jump(fn, get_target(insn, 0)); +} + +static int emit_op_mov(struct function *fn, Instruction *insn) +{ + assert(insn->opcode == op_mov || insn->opcode == op_movi || insn->opcode == op_movf); + return emit_move(fn, get_operand(insn, 0), get_target(insn, 0)); +} + +static int emit_op_ret(struct function *fn, Instruction *insn) +{ + // TODO Only call luaF_close if needed (i.e. some variable escaped) +#ifdef RAVI_DEFER_STATEMENT + if (raviX_ptrlist_size((const struct ptr_list *)fn->proc->procs) > 0) { + raviX_buffer_add_string(&fn->body, "{\nluaF_close(L, base, LUA_OK);\n"); + raviX_buffer_add_string(&fn->body, "base = ci->u.l.base;\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + } +#else + if (raviX_ptrlist_size((const struct ptr_list *)fn->proc->procs) > 0) { + raviX_buffer_add_string(&fn->body, "luaF_close(L, base);\n"); + } +#endif + raviX_buffer_add_string(&fn->body, "{\n"); + /* Results are copied to stack position given by ci->func and above. + * stackbase is set here so S(n) refers to (stackbase+n) + */ + raviX_buffer_add_string(&fn->body, " TValue *stackbase = ci->func;\n"); + raviX_buffer_add_string(&fn->body, " int wanted = ci->nresults;\n"); + raviX_buffer_add_string(&fn->body, " result = wanted == -1 ? 0 : 1;\n"); /* see OP_RETURN impl in JIT */ + int n = get_num_operands(insn); + if (n > 0) { + Pseudo *last_operand = get_operand(insn, n - 1); + /* the last operand might be a range pseudo */ + if (last_operand->type == PSEUDO_RANGE) { + raviX_buffer_add_string(&fn->body, " if (wanted == -1) {\n"); + raviX_buffer_add_string(&fn->body, " TValue *start_vararg = "); + Pseudo tmp = {.type = PSEUDO_TEMP_ANY, .regnum = last_operand->regnum}; + emit_reg_accessor(fn, &tmp, 0); + raviX_buffer_add_string(&fn->body, " ;\n"); + raviX_buffer_add_fstring(&fn->body, " wanted = (L->top - start_vararg) + %d;\n", n - 1); + raviX_buffer_add_string(&fn->body, " }\n"); + } else { + raviX_buffer_add_fstring(&fn->body, " if (wanted == -1) wanted = %d;\n", n); + } + } else { + raviX_buffer_add_string(&fn->body, " if (wanted == -1) wanted = 0;\n"); + } + Pseudo *pseudo; + int i = 0; + raviX_buffer_add_string(&fn->body, " int j = 0;\n"); + FOR_EACH_PTR(insn->operands, pseudo) + { + if (pseudo->type != PSEUDO_RANGE) { + Pseudo dummy_dest = {.type = PSEUDO_LUASTACK, + .stackidx = i}; /* will go to stackbase[i] */ + raviX_buffer_add_fstring(&fn->body, " if (%d < wanted) {\n", i); + /* FIXME last argument might be a range pseudo */ + emit_move(fn, pseudo, &dummy_dest); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_fstring(&fn->body, " j++;\n"); + i++; + } else { + /* copy values starting at the range to L->top */ + // raviX_buffer_add_fstring(&fn->body, " j = %d;\n", i); + raviX_buffer_add_fstring(&fn->body, " {\n int reg = %d;\n", pseudo->regnum); + raviX_buffer_add_string(&fn->body, " while (j < wanted) {\n"); + raviX_buffer_add_string(&fn->body, " TValue *dest_reg = S(j);\n"); + raviX_buffer_add_string(&fn->body, " TValue *src_reg = R(reg);\n"); + raviX_buffer_add_string( + &fn->body, " dest_reg->tt_ = src_reg->tt_; dest_reg->value_.gc = src_reg->value_.gc;\n"); + raviX_buffer_add_string(&fn->body, " j++, reg++;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + } + } + END_FOR_EACH_PTR(pseudo); + /* Set any excess results to nil */ + raviX_buffer_add_string(&fn->body, " while (j < wanted) {\n"); + { + raviX_buffer_add_string(&fn->body, " setnilvalue(S(j));\n"); + raviX_buffer_add_string(&fn->body, " j++;\n"); + } + raviX_buffer_add_string(&fn->body, " }\n"); + /* FIXME the rule for L->top needs to be checked */ + raviX_buffer_add_string(&fn->body, " L->top = S(0) + wanted;\n"); + raviX_buffer_add_string(&fn->body, " L->ci = ci->previous;\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + emit_jump(fn, get_target(insn, 0)); + return 0; +} + +/* Generate code for various types of load table operations */ +static int emit_op_load_table(struct function *fn, Instruction *insn) +{ + const char *fname = "luaV_gettable"; + if (insn->opcode == op_tget_ikey) { + fname = "raviV_gettable_i"; + } else if (insn->opcode == op_tget_skey) { + fname = "raviV_gettable_sskey"; + } + Pseudo *env = get_operand(insn, 0); + Pseudo *varname = get_operand(insn, 1); + Pseudo *dst = get_target(insn, 0); + if (varname->type == PSEUDO_CONSTANT && varname->constant->type == RAVI_TSTRING) { + if (varname->constant->s->len < 40) { + fname = "raviV_gettable_sskey"; + } + } + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *tab = "); + emit_reg_accessor(fn, env, 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *key = "); + emit_reg_accessor(fn, varname, 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *dst = "); + emit_reg_accessor(fn, dst, 1); + raviX_buffer_add_fstring(&fn->body, ";\n %s(L, tab, key, dst);\n ", fname); + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +/* Emit code for a variety of store table operations */ +static int emit_op_store_table(struct function *fn, Instruction *insn) +{ + // FIXME what happens if key and value are both constants + // Our pseudo reg will break I think + const char *fname = "luaV_settable"; + if (insn->opcode == op_tput_ikey) { + fname = "raviV_settable_i"; + } else if (insn->opcode == op_tput_skey) { + fname = "raviV_settable_sskey"; + } + Pseudo *env = get_target(insn, 0); + Pseudo *varname = get_target(insn, 1); + Pseudo *src = get_operand(insn, 0); + if (varname->type == PSEUDO_CONSTANT && varname->constant->type == RAVI_TSTRING) { + if (varname->constant->s->len < 40) { + fname = "raviV_settable_sskey"; + } + } + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *tab = "); + emit_reg_accessor(fn, env, 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *key = "); + emit_reg_accessor(fn, varname, 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *src = "); + emit_reg_accessor(fn, src, 1); + raviX_buffer_add_fstring(&fn->body, ";\n %s(L, tab, key, src);\n ", fname); + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +// From implementation point of view the main work is copy the registers to the +// right place. If we assume that at any time there is a 'fixed' stack size for the +// functions regular variables and temps and that when we call functions, we need +// to put the function followed by arguments on top of this 'fixed' stack. +// However the complication is that some of the arguments of the function may come +// from a previous function call and therefore may be occupying the same space! +// For example: +// local x = f() +// g(x, h()) +// Here the return values from h() will be on the stack above the fixed stack space +// and g() expects x, followed by all the return values from h(). +// But the nature of the byte code execution is that the return values of h() +// will be at the top of the fixed stack and will have offsets less than the +// parameter positions of g() because when we call g() we will at least have the +// function value at the position of the first result from h(). Suppose the h() return values +// are at stack[10], stack[11], stack[12], etc. +// Then when we call g() we will put stack[10] = g, stack[11] = x, +// and stack[12] = stack[10], etc. To do this correctly we need to copy the +// last argument first. +static int emit_op_call(struct function *fn, Instruction *insn) +{ + assert(get_num_targets(insn) == 2); + unsigned int n = get_num_operands(insn); + // target register is where results should end up after the call + // so it also tells us where we need to place the new frame + // Note that this is typically a range starting at a register + unsigned target_register = get_target(insn, 0)->regnum; + // Number of values expected by the caller + // If -1 it means all available values + int nresults = (int)get_target(insn, 1)->constant->i; + // I think it is okay to just use n as the check because if L->top was set + // then n will be on top of that + raviX_buffer_add_fstring( + &fn->body, " if (stackoverflow(L,%d)) { luaD_growstack(L, %d); base = ci->u.l.base; }\n", n + 1, n + 1); + if (n > 1) { + // We have function arguments (as n=0 is the function itself) + Pseudo *last_arg = get_operand(insn, n - 1); + if (last_arg->type == PSEUDO_RANGE) { + // If last argument is a range that tells us that we need + // to copy all available values from the register to L->top + // But first check whether copy is necessary + // suppose n = 2 + // then, + // target_register[0] will have function + // target_register[1] will have arg 1 + unsigned copy_to = target_register + n - 1; + if (last_arg->regnum != copy_to) { + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *src_base = "); + emit_reg_accessor(fn, last_arg, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " TValue *dest_base = "); + Pseudo tmp = {.type = PSEUDO_TEMP_ANY, .regnum = copy_to}; + emit_reg_accessor(fn, &tmp, 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *src = L->top-1;\n"); + raviX_buffer_add_string(&fn->body, " L->top = dest_base + (L->top-src_base);\n"); + raviX_buffer_add_string(&fn->body, " TValue *dest = L->top-1;\n"); + raviX_buffer_add_string(&fn->body, " while (src >= src_base) {\n"); + raviX_buffer_add_string(&fn->body, + " dest->tt_ = src->tt_; dest->value_.gc = src->value_.gc;\n"); + raviX_buffer_add_string(&fn->body, " src--;\n"); + raviX_buffer_add_string(&fn->body, " dest--;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + } else { + // L->top stays where it is ... + } + n--; // discard the last arg + } else { + // L->top must be just past the last arg + raviX_buffer_add_string(&fn->body, " L->top = "); + emit_reg_accessor(fn, get_target(insn, 0), 0); + raviX_buffer_add_fstring(&fn->body, " + %d;\n", n); + } + } + // Copy the rest of the args + for (int j = n - 1; j >= 0; j--) { + Pseudo tmp = {.type = PSEUDO_TEMP_ANY, .regnum = target_register + j}; + emit_move(fn, get_operand(insn, j), &tmp); + } + // Call the function + raviX_buffer_add_string(&fn->body, "{\n TValue *ra = "); + emit_reg_accessor(fn, get_target(insn, 0), 0); + raviX_buffer_add_fstring(&fn->body, ";\n int result = luaD_precall(L, ra, %d, 1);\n", nresults); + raviX_buffer_add_string(&fn->body, " if (result) {\n"); + raviX_buffer_add_fstring(&fn->body, " if (result == 1 && %d >= 0)\n", nresults); + raviX_buffer_add_string(&fn->body, " L->top = ci->top;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, " else { /* Lua function */\n"); + raviX_buffer_add_string(&fn->body, " result = luaV_execute(L);\n"); + raviX_buffer_add_string(&fn->body, " if (result) L->top = ci->top;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, " base = ci->u.l.base;\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +/* + * Output a C stack variable representing int/float value or constant + */ +static void emit_varname_or_constant(struct function *fn, Pseudo *pseudo) +{ + if (pseudo->type == PSEUDO_CONSTANT) { + if (pseudo->constant->type == RAVI_TNUMINT) { + raviX_buffer_add_fstring(&fn->body, "%lld", pseudo->constant->i); + } else if (pseudo->constant->type == RAVI_TNUMFLT) { + raviX_buffer_add_fstring(&fn->body, "%.16g", pseudo->constant->n); + } else { + assert(0); + } + } else if (pseudo->type == PSEUDO_TEMP_INT || pseudo->type == PSEUDO_TEMP_BOOL || + pseudo->type == PSEUDO_TEMP_FLT) { + emit_varname(pseudo, &fn->body); + } else if (pseudo->type == PSEUDO_SYMBOL) { + ravitype_t typecode = RAVI_TANY; + if (pseudo->symbol->symbol_type == SYM_LOCAL) { + typecode = pseudo->symbol->variable.value_type.type_code; + } else if (pseudo->symbol->symbol_type == SYM_UPVALUE) { + typecode = pseudo->symbol->upvalue.value_type.type_code; + } + if (typecode == RAVI_TNUMFLT) { + raviX_buffer_add_string(&fn->body, "fltvalue("); + emit_reg_accessor(fn, pseudo, 0); + raviX_buffer_add_string(&fn->body, ")"); + } else if (typecode == RAVI_TNUMINT) { + raviX_buffer_add_string(&fn->body, "ivalue("); + emit_reg_accessor(fn, pseudo, 0); + raviX_buffer_add_string(&fn->body, ")"); + } else { + assert(0); + } + } else { + assert(0); + } +} + +static int emit_comp_ii(struct function *fn, Instruction *insn) +{ + raviX_buffer_add_string(&fn->body, "{ "); + Pseudo *target = get_target(insn, 0); + if (target->type == PSEUDO_TEMP_BOOL) { + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = "); + } else { + raviX_buffer_add_string(&fn->body, "TValue *dst_reg = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, "; setbvalue(dst_reg, "); + } + const char *oper = NULL; + switch (insn->opcode) { + case op_eqii: + case op_eqff: + oper = "=="; + break; + case op_ltii: + case op_ltff: + oper = "<"; + break; + case op_leii: + case op_leff: + oper = "<="; + break; + default: + assert(0); + return -1; + } + emit_varname_or_constant(fn, get_operand(insn, 0)); + raviX_buffer_add_fstring(&fn->body, " %s ", oper); + emit_varname_or_constant(fn, get_operand(insn, 1)); + if (target->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_string(&fn->body, "; }\n"); + } else { + raviX_buffer_add_string(&fn->body, "); }\n"); + } + return 0; +} + +static int emit_bin_ii(struct function *fn, Instruction *insn) +{ + // FIXME - needs to also work with typed function params + raviX_buffer_add_string(&fn->body, "{ "); + Pseudo *target = get_target(insn, 0); + if (target->type == PSEUDO_TEMP_FLT || target->type == PSEUDO_TEMP_INT || target->type == PSEUDO_TEMP_BOOL) { + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = "); + } else { + raviX_buffer_add_string(&fn->body, "TValue *dst_reg = "); + emit_reg_accessor(fn, target, 0); + if (insn->opcode == op_addff || insn->opcode == op_subff || insn->opcode == op_mulff || + insn->opcode == op_divff) { + raviX_buffer_add_string(&fn->body, "; setfltvalue(dst_reg, "); + } else { + raviX_buffer_add_string(&fn->body, "; setivalue(dst_reg, "); + } + } + const char *oper = NULL; + switch (insn->opcode) { + case op_addff: + case op_addii: + oper = "+"; + break; + + case op_subff: + case op_subii: + oper = "-"; + break; + + case op_mulff: + case op_mulii: + oper = "*"; + break; + + case op_divff: + case op_divii: + oper = "/"; + break; + + case op_bandii: + oper = "&"; + break; + + case op_borii: + oper = "|"; + break; + + case op_bxorii: + oper = "^"; + break; + default: + assert(0); + return -1; + } + emit_varname_or_constant(fn, get_operand(insn, 0)); + raviX_buffer_add_fstring(&fn->body, " %s ", oper); + emit_varname_or_constant(fn, get_operand(insn, 1)); + if (target->type == PSEUDO_TEMP_FLT || target->type == PSEUDO_TEMP_INT || target->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_string(&fn->body, "; }\n"); + } else { + raviX_buffer_add_string(&fn->body, "); }\n"); + } + return 0; +} + +static int emit_bitop_ii(struct function *fn, Instruction *insn) +{ + raviX_buffer_add_string(&fn->body, "{\n "); + Pseudo *target = get_target(insn, 0); + if (target->type == PSEUDO_TEMP_FLT || target->type == PSEUDO_TEMP_INT || target->type == PSEUDO_TEMP_BOOL) { + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = "); + } else { + raviX_buffer_add_string(&fn->body, "TValue *dst_reg = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, "; setivalue(dst_reg, "); + } + raviX_buffer_add_string(&fn->body, "luaV_shiftl("); + emit_varname_or_constant(fn, get_operand(insn, 0)); + if (insn->opcode == op_shlii) + raviX_buffer_add_string(&fn->body, ", "); + else if (insn->opcode == op_shrii) + raviX_buffer_add_string(&fn->body, ", -"); + else { + assert(0); + return -1; + } + emit_varname_or_constant(fn, get_operand(insn, 1)); + raviX_buffer_add_string(&fn->body, ")"); + if (target->type == PSEUDO_TEMP_FLT || target->type == PSEUDO_TEMP_INT || target->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_string(&fn->body, ";\n}\n"); + } else { + raviX_buffer_add_string(&fn->body, ");\n}\n"); + } + return 0; +} + +static int emit_bin_fi(struct function *fn, Instruction *insn) +{ + // FIXME - needs to also work with typed function params + raviX_buffer_add_string(&fn->body, "{ "); + Pseudo *target = get_target(insn, 0); + if (target->type == PSEUDO_TEMP_FLT) { + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = "); + } else { + raviX_buffer_add_string(&fn->body, "TValue *dst_reg = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, "; setfltvalue(dst_reg, "); + } + const char *oper = NULL; + switch (insn->opcode) { + case op_addfi: + oper = "+"; + break; + + case op_subfi: + oper = "-"; + break; + + case op_mulfi: + oper = "*"; + break; + + case op_divfi: + oper = "/"; + break; + + default: + assert(0); + return -1; + } + emit_varname_or_constant(fn, get_operand(insn, 0)); + raviX_buffer_add_fstring(&fn->body, " %s ((lua_Number)(", oper); + emit_varname_or_constant(fn, get_operand(insn, 1)); + raviX_buffer_add_string(&fn->body, "))"); + if (target->type == PSEUDO_TEMP_FLT) { + raviX_buffer_add_string(&fn->body, "; }\n"); + } else { + raviX_buffer_add_string(&fn->body, "); }\n"); + } + return 0; +} + +static int emit_bin_if(struct function *fn, Instruction *insn) +{ + // FIXME - needs to also work with typed function params + raviX_buffer_add_string(&fn->body, "{ "); + Pseudo *target = get_target(insn, 0); + if (target->type == PSEUDO_TEMP_FLT) { + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = "); + } else { + raviX_buffer_add_string(&fn->body, "TValue *dst_reg = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, "; setfltvalue(dst_reg, "); + } + const char *oper = NULL; + switch (insn->opcode) { + case op_subif: + oper = "-"; + break; + + case op_divif: + oper = "/"; + break; + + default: + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "((lua_Number)("); + emit_varname_or_constant(fn, get_operand(insn, 0)); + raviX_buffer_add_fstring(&fn->body, ")) %s ", oper); + emit_varname_or_constant(fn, get_operand(insn, 1)); + if (target->type == PSEUDO_TEMP_FLT) { + raviX_buffer_add_string(&fn->body, "; }\n"); + } else { + raviX_buffer_add_string(&fn->body, "); }\n"); + } + return 0; +} + +static int emit_op_arrayget_ikey(struct function *fn, Instruction *insn) +{ + const char *array_type = insn->opcode == op_iaget_ikey ? "lua_Integer *" : "lua_Number *"; + const char *setterfunc = insn->opcode == op_iaget_ikey ? "setivalue" : "setfltvalue"; + unsigned type = insn->opcode == op_iaget_ikey ? PSEUDO_TEMP_INT : PSEUDO_TEMP_FLT; + Pseudo *arr = get_operand(insn, 0); + Pseudo *key = get_operand(insn, 1); + Pseudo *dst = get_target(insn, 0); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " RaviArray *arr = arrvalue("); + emit_reg_accessor(fn, arr, 0); + raviX_buffer_add_string(&fn->body, ");\n lua_Unsigned ukey = (lua_Unsigned) "); + if (key->type == PSEUDO_CONSTANT) { + raviX_buffer_add_fstring(&fn->body, "%lld", key->constant->i); + } else if (key->type == PSEUDO_TEMP_INT) { + emit_varname(key, &fn->body); + } else if (key->type == PSEUDO_SYMBOL) { + // this must be an integer + raviX_buffer_add_string(&fn->body, "ivalue("); + emit_reg_accessor(fn, key, 0); + raviX_buffer_add_string(&fn->body, ")"); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_fstring(&fn->body, " %siptr = (%s)arr->data;\n ", array_type, array_type); + if (dst->type == type) { + emit_varname(dst, &fn->body); + raviX_buffer_add_string(&fn->body, " = iptr[ukey];\n"); + } else if (dst->type == PSEUDO_TEMP_ANY || dst->type == PSEUDO_SYMBOL || dst->type == PSEUDO_LUASTACK) { + raviX_buffer_add_string(&fn->body, "TValue *dest_reg = "); + emit_reg_accessor(fn, dst, 0); + raviX_buffer_add_fstring(&fn->body, "; %s(dest_reg, iptr[ukey]);\n", setterfunc); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_arrayput_val(struct function *fn, Instruction *insn) +{ + const char *array_type = insn->opcode == op_iaput_ival ? "lua_Integer *" : "lua_Number *"; + const char *getterfunc = insn->opcode == op_iaput_ival ? "ivalue" : "fltvalue"; + const char *setterfunc = insn->opcode == op_iaput_ival ? "raviH_set_int" : "raviH_set_float"; + unsigned type = insn->opcode == op_iaput_ival ? PSEUDO_TEMP_INT : PSEUDO_TEMP_FLT; + Pseudo *arr = get_target(insn, 0); + Pseudo *key = get_target(insn, 1); + Pseudo *src = get_operand(insn, 0); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " RaviArray *arr = arrvalue("); + emit_reg_accessor(fn, arr, 0); + raviX_buffer_add_string(&fn->body, ");\n lua_Unsigned ukey = (lua_Unsigned) "); + if (key->type == PSEUDO_CONSTANT) { + raviX_buffer_add_fstring(&fn->body, "%lld", key->constant->i); + } else if (key->type == PSEUDO_TEMP_INT) { + emit_varname(key, &fn->body); + } else if (key->type == PSEUDO_SYMBOL) { + // this must be an integer + raviX_buffer_add_string(&fn->body, "ivalue("); + emit_reg_accessor(fn, key, 0); + raviX_buffer_add_string(&fn->body, ")"); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_fstring(&fn->body, " %siptr = (%s)arr->data;\n ", array_type, array_type); + raviX_buffer_add_string(&fn->body, "if (ukey < (lua_Unsigned)(arr->len)) {\n"); + raviX_buffer_add_string(&fn->body, " iptr[ukey] = "); + if (src->type == type) { + emit_varname(src, &fn->body); + } else if (src->type == PSEUDO_TEMP_ANY || src->type == PSEUDO_SYMBOL || src->type == PSEUDO_LUASTACK) { + raviX_buffer_add_fstring(&fn->body, "%s(", getterfunc); + emit_reg_accessor(fn, src, 0); + raviX_buffer_add_string(&fn->body, ")"); + } else if (src->type == PSEUDO_CONSTANT) { + if (src->constant->type == RAVI_TNUMINT) { + raviX_buffer_add_fstring(&fn->body, "%lld", src->constant->i); + } else { + raviX_buffer_add_fstring(&fn->body, "%g", src->constant->n); + } + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, ";\n} else {\n"); + raviX_buffer_add_fstring(&fn->body, " %s(L, arr, ukey, ", setterfunc); + if (src->type == type) { + emit_varname(src, &fn->body); + } else if (src->type == PSEUDO_TEMP_ANY || src->type == PSEUDO_SYMBOL || src->type == PSEUDO_LUASTACK) { + raviX_buffer_add_fstring(&fn->body, "%s(", getterfunc); + emit_reg_accessor(fn, src, 0); + raviX_buffer_add_string(&fn->body, ")"); + } else if (src->type == PSEUDO_CONSTANT) { + if (src->constant->type == RAVI_TNUMINT) { + raviX_buffer_add_fstring(&fn->body, "%lld", src->constant->i); + } else { + raviX_buffer_add_fstring(&fn->body, "%g", src->constant->n); + } + } + raviX_buffer_add_string(&fn->body, ");\n"); + raviX_buffer_add_string(&fn->body, "}\n}\n"); + return 0; +} + +static int emit_op_totype(struct function *fn, Instruction *insn) +{ + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, get_first_target(insn), 0); + if (insn->opcode == op_toiarray) { + raviX_buffer_add_string(&fn->body, ";\n if (!ttisiarray(ra)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_integer_array_expected); + } else if (insn->opcode == op_tofarray) { + raviX_buffer_add_string(&fn->body, ";\n if (!ttisfarray(ra)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_number_array_expected); + } else if (insn->opcode == op_totable) { + raviX_buffer_add_string(&fn->body, ";\n if (!ttisLtable(ra)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_table_expected); + } else if (insn->opcode == op_toclosure) { + raviX_buffer_add_string(&fn->body, ";\n if (!ttisclosure(ra)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_closure_expected); + } else if (insn->opcode == op_tostring) { + raviX_buffer_add_string(&fn->body, ";\n if (!ttisstring(ra)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_string_expected); + } else if (insn->opcode == op_toint) { + raviX_buffer_add_string(&fn->body, ";\n if (!ttisinteger(ra)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_integer_expected); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, " goto Lraise_error;\n"); + raviX_buffer_add_string(&fn->body, " }\n}\n"); + return 0; +} + +static int emit_op_toflt(struct function *fn, Instruction *insn) +{ + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, get_first_target(insn), 0); + raviX_buffer_add_string(&fn->body, ";\n lua_Number n = 0;\n"); + raviX_buffer_add_string(&fn->body, " if (ttisnumber(ra)) { n = (ttisinteger(ra) ? (double) ivalue(ra) : " + "fltvalue(ra)); setfltvalue(ra, n); }\n"); + raviX_buffer_add_string(&fn->body, " else {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_number_expected); + raviX_buffer_add_string(&fn->body, " goto Lraise_error;\n"); + raviX_buffer_add_string(&fn->body, " }\n}\n"); + return 0; +} + +static int emit_op_tousertype(struct function *fn, Instruction *insn) +{ + Pseudo *typename = get_first_operand(insn); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, get_first_target(insn), 0); + raviX_buffer_add_string(&fn->body, ";\n if (!ttisnil(ra)) {\n"); + raviX_buffer_add_string(&fn->body, " TValue *rb = "); + emit_reg_accessor(fn, typename, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, + " if (!ttisshrstring(rb) || !raviV_check_usertype(L, tsvalue(rb), ra)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_type_mismatch); + raviX_buffer_add_string(&fn->body, " goto Lraise_error;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_newtable(struct function *fn, Instruction *insn) +{ + Pseudo *target_pseudo = get_first_target(insn); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, target_pseudo, 0); + raviX_buffer_add_string(&fn->body, ";\n raviV_op_newtable(L, ci, ra, 0, 0);\n"); + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_newarray(struct function *fn, Instruction *insn) +{ + Pseudo *target_pseudo = get_first_target(insn); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, target_pseudo, 0); + raviX_buffer_add_fstring(&fn->body, ";\n %s(L, ci, ra);\n", + insn->opcode == op_newfarray ? "raviV_op_newarrayfloat" : "raviV_op_newarrayint"); + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_closure(struct function *fn, Instruction *insn) +{ + Pseudo *closure_pseudo = get_first_operand(insn); + Pseudo *target_pseudo = get_first_target(insn); + + assert(closure_pseudo->type == PSEUDO_PROC); + Proc *proc = closure_pseudo->proc; + Proc *parent_proc = proc->parent; + Proc *cursor; + int parent_index = -1; + int i = 0; + FOR_EACH_PTR(parent_proc->procs, cursor) + { + if (cursor->id == proc->id) { + assert(cursor == proc); + parent_index = i; + break; + } + i++; + } + END_FOR_EACH_PTR(cursor); + if (parent_index == -1) { + assert(0); + return -1; + } + unsigned reg = compute_register_from_base(fn, target_pseudo); + raviX_buffer_add_fstring(&fn->body, "raviV_op_closure(L, ci, cl, %d, %d);\n", reg, parent_index); + emit_reload_base(fn); + return 0; +} + +static int emit_op_close(struct function *fn, Instruction *insn) +{ + Pseudo *pseudo = get_first_operand(insn); + raviX_buffer_add_string(&fn->body, "{\n TValue *clsvar = "); + emit_reg_accessor(fn, pseudo, 0); + raviX_buffer_add_string(&fn->body, ";\n"); +#ifdef RAVI_DEFER_STATEMENT + raviX_buffer_add_string(&fn->body, " luaF_close(L, clsvar, LUA_OK);\n"); + emit_reload_base(fn); +#else + raviX_buffer_add_string(&fn->body, " luaF_close(L, clsvar);\n"); +#endif + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_len(struct function *fn, Instruction *insn) +{ + Pseudo *obj = get_first_operand(insn); + Pseudo *target = get_first_target(insn); + raviX_buffer_add_string(&fn->body, "{\n TValue *len = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *obj = "); + emit_reg_accessor(fn, obj, 0); + raviX_buffer_add_string(&fn->body, ";\n luaV_objlen(L, len, obj);\n"); + emit_reload_base(fn); + if (target->type == PSEUDO_TEMP_INT) { + raviX_buffer_add_string(&fn->body, " "); + emit_varname_or_constant(fn, target); + raviX_buffer_add_string(&fn->body, " = ival0.value_.i;\n"); // FIXME use some accessor + } + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_generic_comp(struct function *fn, Instruction *insn) +{ + const char *oper = "=="; + if (insn->opcode == op_lt) { + oper = "<"; + } else if (insn->opcode == op_le) { + oper = "<="; + } + const char *comparison_function = + (insn->opcode == op_eq) ? "luaV_equalobj" : ((insn->opcode == op_lt) ? "luaV_lessthan" : "luaV_lessequal"); + raviX_buffer_add_string(&fn->body, "{\n int result = 0;\n"); + raviX_buffer_add_string(&fn->body, " TValue *rb = "); + emit_reg_accessor(fn, get_operand(insn, 0), 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *rc = "); + emit_reg_accessor(fn, get_operand(insn, 1), 1); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " if (ttisinteger(rb) && ttisinteger(rc))\n"); + raviX_buffer_add_fstring(&fn->body, " result = (ivalue(rb) %s ivalue(rc));\n", oper); + raviX_buffer_add_string(&fn->body, " else {\n"); + raviX_buffer_add_fstring(&fn->body, " result = %s(L, rb, rc);\n ", comparison_function); + // Reload pointer to base as the call to luaV_equalobj() may + // have invoked a Lua function and as a result the stack may have + // been reallocated - so the previous base pointer could be stale + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, " }\n"); + Pseudo *target = get_first_target(insn); + if (target->type == PSEUDO_TEMP_ANY) { + raviX_buffer_add_string(&fn->body, " setbvalue("); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, ", result != 0);\n"); + } else if (target->type == PSEUDO_TEMP_BOOL) { + raviX_buffer_add_string(&fn->body, " "); + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = result != 0;\n"); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_arith(struct function *fn, Instruction *insn) +{ + raviX_buffer_add_string(&fn->body, "{\n"); + Pseudo *target = get_target(insn, 0); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + const char *oper = NULL; + const char *tm = NULL; + switch (insn->opcode) { + case op_add: + oper = "+"; + tm = "TM_ADD"; + break; + + case op_sub: + oper = "-"; + tm = "TM_SUB"; + break; + + case op_mul: + oper = "*"; + tm = "TM_MUL"; + break; + + default: + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, " TValue *rb = "); + emit_reg_accessor(fn, get_operand(insn, 0), 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " TValue *rc = "); + emit_reg_accessor(fn, get_operand(insn, 1), 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " lua_Integer i = 0;\n"); + raviX_buffer_add_string(&fn->body, " lua_Integer ic = 0;\n"); + raviX_buffer_add_string(&fn->body, " lua_Number n = 0.0;\n"); + raviX_buffer_add_string(&fn->body, " lua_Number nc = 0.0;\n"); + + raviX_buffer_add_string(&fn->body, " if (ttisinteger(rb) && ttisinteger(rc)) {\n"); + raviX_buffer_add_string(&fn->body, " i = ivalue(rb);\n"); + raviX_buffer_add_string(&fn->body, " ic = ivalue(rc);\n"); + raviX_buffer_add_fstring(&fn->body, " setivalue(ra, (i %s ic));\n", oper); + raviX_buffer_add_string(&fn->body, " } else if (tonumberns(rb, n) && tonumberns(rc, nc)) {\n"); + raviX_buffer_add_fstring(&fn->body, " setfltvalue(ra, (n %s nc));\n", oper); + raviX_buffer_add_string(&fn->body, " } else {\n"); + raviX_buffer_add_fstring(&fn->body, " luaT_trybinTM(L, rb, rc, ra, %s);\n", tm); + raviX_buffer_add_string(&fn->body, " base = ci->u.l.base;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_not(struct function *fn, Instruction *insn) +{ + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, get_first_target(insn), 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *rb = "); + emit_reg_accessor(fn, get_first_operand(insn), 0); + raviX_buffer_add_string(&fn->body, ";\n int result = l_isfalse(rb);\n"); + raviX_buffer_add_string(&fn->body, " setbvalue(ra, result);\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_bnot(struct function *fn, Instruction *insn) +{ + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, get_first_target(insn), 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *rb = "); + emit_reg_accessor(fn, get_first_operand(insn), 0); + raviX_buffer_add_string(&fn->body, ";\n raviV_op_bnot(L, ra, rb);\n"); + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +/* + * Following generates code that uses luaO_arith() calls + * so not very efficient. + */ +static int emit_op_binary(struct function *fn, Instruction *insn) +{ + int op = 0; + switch (insn->opcode) { + case op_div: + op = LUA_OPDIV; + break; + case op_idiv: + op = LUA_OPIDIV; + break; + case op_band: + op = LUA_OPBAND; + break; + case op_bor: + op = LUA_OPBOR; + break; + case op_bxor: + op = LUA_OPBXOR; + break; + case op_shl: + op = LUA_OPSHL; + break; + case op_shr: + op = LUA_OPSHR; + break; + case op_mod: + op = LUA_OPMOD; + break; + case op_pow: + op = LUA_OPPOW; + break; + default: + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, get_first_target(insn), 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *rb = "); + emit_reg_accessor(fn, get_operand(insn, 0), 0); + raviX_buffer_add_string(&fn->body, ";\n TValue *rc = "); + emit_reg_accessor(fn, get_operand(insn, 1), 1); + raviX_buffer_add_fstring(&fn->body, ";\n luaO_arith(L, %d, rb, rc, ra);\n", op); + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_unmi_unmf(struct function *fn, Instruction *insn) +{ + Pseudo *target = get_first_target(insn); + Pseudo *operand = get_first_operand(insn); + int type = insn->opcode == op_unmi ? PSEUDO_TEMP_INT : PSEUDO_TEMP_FLT; + const char *setter = insn->opcode == op_unmi ? "setivalue" : "setfltvalue"; + const char *getter = insn->opcode == op_unmi ? "ivalue" : "fltvalue"; + raviX_buffer_add_string(&fn->body, "{\n"); + if (operand->type != type && operand->type != PSEUDO_CONSTANT) { + raviX_buffer_add_string(&fn->body, " TValue *rb = "); + emit_reg_accessor(fn, operand, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + } + if (target->type == type) { + raviX_buffer_add_string(&fn->body, " "); + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = -"); + if (operand->type != type && operand->type != PSEUDO_CONSTANT) { + raviX_buffer_add_fstring(&fn->body, "%s(rb)", getter); + } else { + emit_varname_or_constant(fn, operand); + } + raviX_buffer_add_string(&fn->body, ";\n"); + } else if (target->type == PSEUDO_TEMP_ANY || target->type == PSEUDO_SYMBOL || + target->type == PSEUDO_LUASTACK) { + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_fstring(&fn->body, ";\n %s(ra, ", setter); + if (operand->type != type && operand->type != PSEUDO_CONSTANT) { + raviX_buffer_add_fstring(&fn->body, "%s(rb)", getter); + } else { + emit_varname_or_constant(fn, operand); + } + raviX_buffer_add_string(&fn->body, ");\n"); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_unm(struct function *fn, Instruction *insn) +{ + Pseudo *target = get_first_target(insn); + Pseudo *operand = get_first_operand(insn); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " lua_Number n = 0.0;\n"); + raviX_buffer_add_string(&fn->body, " TValue *rb = "); + emit_reg_accessor(fn, operand, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " if (ttisinteger(rb)) {\n"); + raviX_buffer_add_string(&fn->body, " lua_Integer i = ivalue(rb);\n"); + raviX_buffer_add_string(&fn->body, " setivalue(ra, intop(-, 0, i));\n"); + raviX_buffer_add_string(&fn->body, " } else if (tonumberns(rb, n)) {\n"); + raviX_buffer_add_string(&fn->body, " setfltvalue(ra, luai_numunm(L, n));\n"); + raviX_buffer_add_string(&fn->body, " } else {\n"); + raviX_buffer_add_string(&fn->body, " luaT_trybinTM(L, rb, rb, ra, TM_UNM);\n"); + emit_reload_base(fn); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_movfi(struct function *fn, Instruction *insn) +{ + Pseudo *target = get_first_target(insn); + Pseudo *operand = get_first_operand(insn); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *rb = "); + emit_reg_accessor(fn, operand, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " lua_Integer i = 0;\n"); + raviX_buffer_add_string(&fn->body, " if (!tointeger(rb, &i)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_integer_expected); + raviX_buffer_add_string(&fn->body, " goto Lraise_error;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + if (target->type == PSEUDO_TEMP_INT) { + raviX_buffer_add_string(&fn->body, " "); + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = i;\n"); + } else if (target->type == PSEUDO_TEMP_ANY || target->type == PSEUDO_SYMBOL || + target->type == PSEUDO_LUASTACK) { + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, ";\n setivalue(ra, i);\n"); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int emit_op_movif(struct function *fn, Instruction *insn) +{ + Pseudo *target = get_first_target(insn); + Pseudo *operand = get_first_operand(insn); + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *rb = "); + emit_reg_accessor(fn, operand, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " lua_Number n = 0.0;\n"); + raviX_buffer_add_string(&fn->body, " if (!tonumber(rb, &n)) {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_number_expected); + raviX_buffer_add_string(&fn->body, " goto Lraise_error;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + if (target->type == PSEUDO_TEMP_FLT) { + raviX_buffer_add_string(&fn->body, " "); + emit_varname(target, &fn->body); + raviX_buffer_add_string(&fn->body, " = n;\n"); + } else if (target->type == PSEUDO_TEMP_ANY || target->type == PSEUDO_SYMBOL || + target->type == PSEUDO_LUASTACK) { + raviX_buffer_add_string(&fn->body, " TValue *ra = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, ";\n setfltvalue(ra, n);\n"); + } else { + assert(0); + return -1; + } + raviX_buffer_add_string(&fn->body, "}\n"); + return 0; +} + +static int output_instruction(struct function *fn, Instruction *insn) +{ + int rc = 0; + switch (insn->opcode) { + case op_ret: + rc = emit_op_ret(fn, insn); + break; + case op_br: + rc = emit_op_br(fn, insn); + break; + case op_cbr: + rc = emit_op_cbr(fn, insn); + break; + case op_mov: + case op_movi: + case op_movf: + rc = emit_op_mov(fn, insn); + break; + case op_movfi: + rc = emit_op_movfi(fn, insn); + break; + case op_movif: + rc = emit_op_movif(fn, insn); + break; + case op_loadglobal: + case op_get: + case op_get_skey: + case op_get_ikey: + case op_tget_skey: + case op_tget_ikey: + case op_tget: /* TODO custom codegen */ + case op_iaget: + case op_faget: + rc = emit_op_load_table(fn, insn); + break; + case op_storeglobal: + case op_put: + case op_put_skey: + case op_put_ikey: + case op_tput_skey: + case op_tput_ikey: + case op_tput: /* TODO custom codegen */ + case op_iaput: + case op_faput: + rc = emit_op_store_table(fn, insn); + break; + case op_call: + rc = emit_op_call(fn, insn); + break; + + case op_addff: + case op_subff: + case op_mulff: + case op_divff: + + case op_addii: + case op_subii: + case op_mulii: + case op_divii: + case op_bandii: + case op_borii: + case op_bxorii: + rc = emit_bin_ii(fn, insn); + break; + + case op_shlii: + case op_shrii: + rc = emit_bitop_ii(fn, insn); + break; + + case op_eqii: + case op_ltii: + case op_leii: + case op_eqff: + case op_ltff: + case op_leff: + rc = emit_comp_ii(fn, insn); + break; + + case op_addfi: + case op_subfi: + case op_mulfi: + case op_divfi: + rc = emit_bin_fi(fn, insn); + break; + + case op_subif: + case op_divif: + rc = emit_bin_if(fn, insn); + break; + + case op_add: + case op_sub: + case op_mul: + rc = emit_op_arith(fn, insn); + break; + + case op_not: + rc = emit_op_not(fn, insn); + break; + + case op_bnot: + rc = emit_op_bnot(fn, insn); + break; + + case op_div: + case op_idiv: + case op_band: + case op_bor: + case op_bxor: + case op_shl: + case op_shr: + case op_mod: + case op_pow: + rc = emit_op_binary(fn, insn); + break; + + case op_unmi: + case op_unmf: + rc = emit_op_unmi_unmf(fn, insn); + break; + + case op_unm: + rc = emit_op_unm(fn, insn); + break; + + // case op_leni: + + // op_string_concat + + case op_eq: + case op_lt: + case op_le: + rc = emit_generic_comp(fn, insn); + break; + + case op_iaget_ikey: + case op_faget_ikey: + rc = emit_op_arrayget_ikey(fn, insn); + break; + + case op_iaput_ival: + case op_faput_fval: + rc = emit_op_arrayput_val(fn, insn); + break; + + case op_toiarray: + case op_tofarray: + case op_totable: + case op_tostring: + case op_toclosure: + case op_toint: + rc = emit_op_totype(fn, insn); + break; + + case op_toflt: + rc = emit_op_toflt(fn, insn); + break; + + case op_totype: + rc = emit_op_tousertype(fn, insn); + break; + + case op_closure: + rc = emit_op_closure(fn, insn); + break; + + case op_newtable: + rc = emit_op_newtable(fn, insn); + break; + + case op_newiarray: + rc = emit_op_newarray(fn, insn); + break; + + case op_newfarray: + rc = emit_op_newarray(fn, insn); + break; + + case op_close: + rc = emit_op_close(fn, insn); + break; + + case op_len: + case op_leni: + rc = emit_op_len(fn, insn); + break; + + default: + fprintf(stderr, "Unsupported opcode %s\n", raviX_opcode_name(insn->opcode)); + rc = -1; + } + return rc; +} + +static int output_instructions(struct function *fn, InstructionList *list) +{ + Instruction *insn; + int rc = 0; + FOR_EACH_PTR(list, insn) + { + rc = output_instruction(fn, insn); + if (rc != 0) + break; + } + END_FOR_EACH_PTR(insn) + return rc; +} + +static inline bool is_block_deleted(BasicBlock *bb) +{ + return bb->index != ENTRY_BLOCK && bb->index != EXIT_BLOCK && get_num_instructions(bb) == 0; + // block was logically deleted if it has got zero instructions and + // it isn't the entry/exit block. +} + +static int output_basic_block(struct function *fn, BasicBlock *bb) +{ + if (is_block_deleted(bb)) + return 0; + int rc = 0; + raviX_buffer_add_fstring(&fn->body, "L%d:\n", bb->index); + if (bb->index == ENTRY_BLOCK) { + } else if (bb->index == EXIT_BLOCK) { + } else { + } + rc = output_instructions(fn, bb->insns); + if (bb->index == EXIT_BLOCK) { + raviX_buffer_add_string(&fn->body, " return result;\n"); + raviX_buffer_add_string(&fn->body, "Lraise_error:\n"); + raviX_buffer_add_string(&fn->body, " raviV_raise_error(L, error_code); /* does not return */\n"); + raviX_buffer_add_string(&fn->body, " return result;\n"); + } + return rc; +} + +static inline unsigned get_num_params(Proc *proc) +{ + return raviX_ptrlist_size((const struct ptr_list *)proc->function_expr->function_expr.args); +} + +static inline unsigned get_num_upvalues(Proc *proc) +{ + return raviX_ptrlist_size((const struct ptr_list *)proc->function_expr->function_expr.upvalues); +} + +/* Generate code for setting up a Lua Proto structure, recursively for each child function */ +static int generate_lua_proc(Proc *proc, TextBuffer *mb) +{ + raviX_buffer_add_fstring(mb, " f->ravi_jit.jit_function = %s;\n", proc->funcname); + raviX_buffer_add_string(mb, " f->ravi_jit.jit_status = RAVI_JIT_COMPILED;\n"); + raviX_buffer_add_fstring(mb, " f->numparams = %u;\n", get_num_params(proc)); + raviX_buffer_add_fstring(mb, " f->is_vararg = 0;\n"); // FIXME Var arg not supported yet + raviX_buffer_add_fstring(mb, " f->maxstacksize = %u;\n", compute_max_stack_size(proc)); + + // Load constants - we only need to load string constants as integer/floats are coded in + raviX_buffer_add_fstring(mb, " f->k = luaM_newvector(L, %u, TValue);\n", proc->num_strconstants); + raviX_buffer_add_fstring(mb, " f->sizek = %u;\n", proc->num_strconstants); + raviX_buffer_add_fstring(mb, " for (int i = 0; i < %u; i++)\n", proc->num_strconstants); + raviX_buffer_add_string( + mb, " setnilvalue(&f->k[i]);\n"); // Do this in case there is a problem allocating the strings + SetEntry *entry; + set_foreach(proc->constants, entry) + { + const Constant *constant = (Constant *)entry->key; + // We only need to register string constants + if (constant->type == RAVI_TSTRING) { + raviX_buffer_add_fstring(mb, " {\n TValue *o = &f->k[%u];\n", constant->index); + if (constant->s->len == 0) { + raviX_buffer_add_string(mb, " o = NULL;\n"); + } else { + // FIXME we need to escape chars? + raviX_buffer_add_fstring(mb, " setsvalue2n(L, o, luaS_newlstr(L, \"%.*s\", %u));\n", + constant->s->len, constant->s->str, constant->s->len); + } + raviX_buffer_add_string(mb, " }\n"); + } + } + + // Load up-values + raviX_buffer_add_fstring(mb, " f->upvalues = luaM_newvector(L, %u, Upvaldesc);\n", get_num_upvalues(proc)); + raviX_buffer_add_fstring(mb, " f->sizeupvalues = %u;\n", get_num_upvalues(proc)); + int i = 0; + LuaSymbol *sym; + FOR_EACH_PTR(proc->function_expr->function_expr.upvalues, sym) + { + raviX_buffer_add_fstring(mb, " f->upvalues[%u].instack = %u;\n", i, sym->upvalue.is_in_parent_stack); + raviX_buffer_add_fstring(mb, " f->upvalues[%u].idx = %u;\n", i, sym->upvalue.parent_upvalue_index); + raviX_buffer_add_fstring(mb, " f->upvalues[%u].name = NULL;\n", i); + raviX_buffer_add_fstring(mb, " f->upvalues[%u].usertype = NULL;\n", i); + raviX_buffer_add_fstring(mb, " f->upvalues[%u].ravi_type = %d;\n", i, + sym->upvalue.value_type.type_code); + i++; + } + END_FOR_EACH_PTR(sym); + + // Load child protos recursively + if (get_num_childprocs(proc) > 0) { + raviX_buffer_add_fstring(mb, " f->p = luaM_newvector(L, %u, Proto *);\n", get_num_childprocs(proc)); + raviX_buffer_add_fstring(mb, " f->sizep = %u;\n", get_num_childprocs(proc)); + raviX_buffer_add_fstring(mb, " for (int i = 0; i < %u; i++)\n", get_num_childprocs(proc)); + raviX_buffer_add_string(mb, " f->p[i] = NULL;\n"); + Proc *childproc; + i = 0; + FOR_EACH_PTR(proc->procs, childproc) + { + raviX_buffer_add_fstring(mb, " f->p[%u] = luaF_newproto(L);\n", i); + raviX_buffer_add_string(mb, "{ \n"); + raviX_buffer_add_fstring(mb, " Proto *parent = f; f = f->p[%u];\n", i); + generate_lua_proc(childproc, mb); + raviX_buffer_add_string(mb, " f = parent;\n"); + raviX_buffer_add_string(mb, "}\n"); + i++; + } + END_FOR_EACH_PTR(childproc); + } + return 0; +} + +/* Generate the equivalent of a luaU_undump such that when called from Lua/Ravi code + * it will build the closure encapsulating the Lua chunk. + */ +static int generate_lua_closure(Proc *proc, const char *funcname, TextBuffer *mb) +{ + raviX_buffer_add_fstring(mb, "EXPORT LClosure *%s(lua_State *L) {\n", funcname); + raviX_buffer_add_fstring(mb, " LClosure *cl = luaF_newLclosure(L, %u);\n", get_num_upvalues(proc)); + raviX_buffer_add_string(mb, " setclLvalue(L, L->top, cl);\n"); + raviX_buffer_add_string(mb, " luaD_inctop(L);\n"); + raviX_buffer_add_string(mb, " cl->p = luaF_newproto(L);\n"); + raviX_buffer_add_string(mb, " Proto *f = cl->p;\n"); + generate_lua_proc(proc, mb); + raviX_buffer_add_string(mb, " return cl;\n"); + raviX_buffer_add_string(mb, "}\n"); + return 0; +} + +/* Generate C code for each proc recursively */ +static int generate_C_code(struct Ravi_CompilerInterface *ravi_interface, Proc *proc, TextBuffer *mb) +{ + int rc = 0; + struct function fn; + + initfn(&fn, proc, ravi_interface); + + BasicBlock *bb; + for (int i = 0; i < (int)proc->node_count; i++) { + bb = proc->nodes[i]; + rc = output_basic_block(&fn, bb); + if (rc != 0) + break; + } + + raviX_buffer_add_string(&fn.body, "}\n"); + raviX_buffer_add_string(mb, fn.prologue.buf); + raviX_buffer_add_string(mb, fn.body.buf); + cleanup(&fn); + + if (rc != 0) + return rc; + + Proc *childproc; + FOR_EACH_PTR(proc->procs, childproc) + { + rc = generate_C_code(ravi_interface, childproc, mb); + if (rc != 0) + return rc; + } + END_FOR_EACH_PTR(childproc); + return 0; +} + +static inline AstNode *get_parent_function_of_upvalue(LuaSymbol *symbol) +{ + AstNode *upvalue_function = symbol->upvalue.target_function; + AstNode *parent_function = upvalue_function->function_expr.parent_function; + return parent_function; +} + +/* + * Returns an index for the up-value as required by Lua/Ravi runtime. + * If the upvalue refers to a local variable in parent proto then idx should contain + * the register for the local variable and instack should be true, else idx should have the index of + * upvalue in parent proto and instack should be false. + */ +static unsigned get_upvalue_idx(Proc *proc, LuaSymbol *upvalue_symbol, bool *in_stack) +{ + *in_stack = false; + LuaSymbol *underlying = upvalue_symbol->upvalue.target_variable; + if (underlying->symbol_type == SYM_LOCAL) { + /* Upvalue is in the stack of parent ? */ + AstNode *function_containing_local = underlying->variable.block->function; + AstNode *parent_function = get_parent_function_of_upvalue(upvalue_symbol); + if (parent_function == function_containing_local) { + /* Upvalue is a local in parent function */ + *in_stack = true; + return underlying->variable.pseudo->regnum; + } + } + /* Search for the upvalue in parent function */ + LuaSymbol *sym; + AstNode *this_function = upvalue_symbol->upvalue.target_function; + FOR_EACH_PTR(this_function->function_expr.upvalues, sym) + { + if (sym->upvalue.target_variable == upvalue_symbol->upvalue.target_variable) { + // Same variable + return sym->upvalue.upvalue_index; + } + } + END_FOR_EACH_PTR(sym); + assert(0); + return 0; +} + +/** + * Computes upvalue attributes needed by the Lua side + */ +static void compute_upvalue_attributes(Proc *proc) +{ + LuaSymbol *sym; + AstNode *this_function = proc->function_expr; + FOR_EACH_PTR(this_function->function_expr.upvalues, sym) + { + bool in_stack = false; + unsigned idx = get_upvalue_idx(proc, sym, &in_stack); + sym->upvalue.is_in_parent_stack = in_stack ? 1 : 0; + sym->upvalue.parent_upvalue_index = idx; // TODO check overflow? + } + END_FOR_EACH_PTR(sym); +} + +/* + * Preprocess upvalues by populating a couple of attributes needed by the Lua side + */ +static void preprocess_upvalues(Proc *proc) +{ + compute_upvalue_attributes(proc); + Proc *child_proc; + FOR_EACH_PTR(proc->procs, child_proc) { preprocess_upvalues(child_proc); } + END_FOR_EACH_PTR(childproc); +} + +static void debug_message(void *context, const char *filename, long long line, const char *message) +{ + fprintf(stdout, "%s:%lld: %s\n", filename, line, message); +} +static void error_message(void *context, const char *message) { fprintf(stdout, "ERROR: %s\n", message); } + +static struct Ravi_CompilerInterface stub_compilerInterface = {.context = NULL, + .source_name = "input", + .source = NULL, + .source_len = 0, + .generated_code = NULL, + .main_func_name = {"setup"}, + .error_message = error_message, + .debug_message = debug_message}; + +/* Generate and compile C code */ +int raviX_generate_C(LinearizerState *linearizer, TextBuffer *mb, struct Ravi_CompilerInterface *ravi_interface) +{ + if (ravi_interface == NULL) + ravi_interface = &stub_compilerInterface; + + // _ENV is the name of the Lua up-value that points to the globals table + raviX_create_string(linearizer->ast_container, "_ENV", 4); + + /* Add the common header portion */ + // FIXME we need a way to customise this for 32-bit vs 64-bit + raviX_buffer_add_string(mb, Lua_header); + + /* Preprocess upvalue attributes */ + preprocess_upvalues(linearizer->main_proc); + + /* Recursively generate C code for procs */ + if (generate_C_code(ravi_interface, linearizer->main_proc, mb) != 0) { + return -1; + } + generate_lua_closure(linearizer->main_proc, ravi_interface->main_func_name, mb); + return 0; +} + +void raviX_generate_C_tofile(LinearizerState *linearizer, const char *mainfunc, FILE *fp) +{ + struct Ravi_CompilerInterface *ravi_interface = &stub_compilerInterface; + raviX_string_copy(ravi_interface->main_func_name, (mainfunc != NULL ? mainfunc : "setup"), + sizeof ravi_interface->main_func_name); + TextBuffer mb; + raviX_buffer_init(&mb, 4096); + raviX_generate_C(linearizer, &mb, NULL); + fprintf(fp, "%s\n", mb.buf); + raviX_buffer_free(&mb); +} diff --git a/ravicomp/src/codegen.h b/ravicomp/src/codegen.h new file mode 100644 index 0000000..7dc14e9 --- /dev/null +++ b/ravicomp/src/codegen.h @@ -0,0 +1,11 @@ +#ifndef ravicomp_CODEGEN_H +#define ravicomp_CODEGEN_H + +#include "ravi_compiler.h" +#include "ravi_api.h" +#include "linearizer.h" + +RAVICOMP_EXPORT int raviX_generate_C(LinearizerState *linearizer, TextBuffer *mb, struct Ravi_CompilerInterface *ravi_interface); +RAVICOMP_EXPORT void raviX_generate_C_tofile(LinearizerState *linearizer, const char *mainfunc, FILE *fp); + +#endif \ No newline at end of file diff --git a/ravicomp/src/common.h b/ravicomp/src/common.h new file mode 100644 index 0000000..48be199 --- /dev/null +++ b/ravicomp/src/common.h @@ -0,0 +1,14 @@ +#ifndef ravicomp_COMMON_H +#define ravicomp_COMMON_H + +#include + +typedef uint32_t nodeId_t; /* The type used to identify nodes in CFG */ + +/* We have two distinguished basic blocks in every proc */ +enum { + ENTRY_BLOCK = 0, + EXIT_BLOCK = 1 +}; + +#endif diff --git a/ravicomp/src/dataflow_framework.c b/ravicomp/src/dataflow_framework.c new file mode 100644 index 0000000..8d5f229 --- /dev/null +++ b/ravicomp/src/dataflow_framework.c @@ -0,0 +1,94 @@ +/** + * A framework for performing data flow analysis. + * The framework is based upon similar framework in MIR project (https://github.com/vnmakarov/mir) + */ + +#include "dataflow_framework.h" +#include "allocate.h" +#include "graph.h" +#include "bitset.h" + +#include + +DECLARE_ARRAY(GraphNodeArray, GraphNode *); + +struct dataflow_context { + Graph *g; + GraphNodeArray worklist; + GraphNodeArray pending; + BitSet bb_to_consider; + void *userdata; +}; + +static void init_data_flow(struct dataflow_context *dataflow_context, Graph *g) +{ + memset(dataflow_context, 0, sizeof *dataflow_context); + raviX_bitset_create2(&dataflow_context->bb_to_consider, 512); + dataflow_context->g = g; +} + +static void finish_data_flow(struct dataflow_context *dataflow_context) +{ + array_clearmem(&dataflow_context->worklist); + array_clearmem(&dataflow_context->pending); + raviX_bitset_destroy(&dataflow_context->bb_to_consider); +} + +void raviX_solve_dataflow(Graph *g, bool forward_p, + int (*join_function)(void *, nodeId_t, bool), + int (*transfer_function)(void *, nodeId_t), void *userdata) +{ + unsigned iter; + struct dataflow_context ctx; + GraphNodeArray *worklist; + GraphNodeArray *pending; + + init_data_flow(&ctx, g); + worklist = &ctx.worklist; + pending = &ctx.pending; + + /* ensure that the graph has RPO calculated */ + raviX_classify_edges(ctx.g); + + worklist->count = 0; + /* Initially the basic blocks are added to the worklist */ + for (uint32_t i = 0; i < raviX_graph_size(ctx.g); i++) { + array_push(worklist, raviX_graph_node(ctx.g, i)); + } + iter = 0; + while (worklist->count != 0) { + GraphNode **addr = worklist->data; + raviX_sort_nodes_by_RPO(addr, worklist->count, forward_p); + raviX_bitset_clear(&ctx.bb_to_consider); + pending->count = 0; + for (unsigned i = 0; i < worklist->count; i++) { + int changed_p = iter == 0; + GraphNode *bb = addr[i]; + GraphNodeList *nodes = forward_p ? raviX_predecessors(bb) : raviX_successors(bb); + // TODO should we pass the nodes array to the join function? + if (raviX_node_list_size(nodes) == 0) + join_function(ctx.userdata, raviX_node_index(bb), true); + else + changed_p |= join_function(ctx.userdata, raviX_node_index(bb), false); + if (changed_p && transfer_function(ctx.userdata, raviX_node_index(bb))) { + GraphNodeList *list = forward_p ? raviX_successors(bb) : raviX_predecessors(bb); + for (unsigned i = 0; i < raviX_node_list_size(list); i++) { + nodeId_t index = raviX_node_list_at(list, i); + /* If this bb is not already been added to pending then add it */ + if (raviX_bitset_set_bit_p(&ctx.bb_to_consider, index)) { + array_push(pending, raviX_graph_node(ctx.g, index)); + } + } + } + } + iter++; + { + /* Swap worklist and pending */ + GraphNodeArray *t = worklist; + worklist = pending; + pending = t; + } + } + + finish_data_flow(&ctx); +} diff --git a/ravicomp/src/dataflow_framework.h b/ravicomp/src/dataflow_framework.h new file mode 100644 index 0000000..7e77ccf --- /dev/null +++ b/ravicomp/src/dataflow_framework.h @@ -0,0 +1,19 @@ +#ifndef ravicomp_DATAFLOW_FRAMEWORK_H +#define ravicomp_DATAFLOW_FRAMEWORK_H + +#include "graph.h" +#include + +/* + * Data Flow Analysis framework. + * The Join/Transfer functions should return 1 if they made any changes else 0. + */ +extern void raviX_solve_dataflow( + Graph *g, + bool forward_p, /* Set to true for forward data flow */ + int (*join_function)(void *userdata, nodeId_t, bool init), /* Join/Meet operator - if init is true reset the bitsets */ + int (*transfer_function)(void *userdata, nodeId_t), /* transfer function */ + void *userdata); /* pointer to user data, will be passed to join/transfer functions */ + + +#endif \ No newline at end of file diff --git a/ravicomp/src/df_liveness.c b/ravicomp/src/df_liveness.c new file mode 100644 index 0000000..687e04e --- /dev/null +++ b/ravicomp/src/df_liveness.c @@ -0,0 +1,99 @@ +/* + * Calculate variable liveness + * This will use the Dataflow Framework. + * Implementation inspired by one in MIR + */ + + +#include "bitset.h" +#include "dataflow_framework.h" +#include "linearizer.h" + +struct liveness_info { + nodeId_t node_id; + BitSet in; + BitSet out; + BitSet use; + BitSet def; +}; + +DECLARE_ARRAY(liveness_info_array, struct liveness_info *); + +struct liveness_data { + Proc *proc; + struct liveness_info_array lives; +}; + +static void init_liveness_data(Proc *proc, struct liveness_data *liveness_data) +{ + memset(liveness_data, 0, sizeof(*liveness_data)); + for (unsigned i = 0; i < proc->node_count; i++) { + struct liveness_info *liveness_info = (struct liveness_info *)calloc(1, sizeof(struct liveness_info)); + liveness_info->node_id = i; + raviX_bitset_create(&liveness_info->use); + raviX_bitset_create(&liveness_info->def); + raviX_bitset_create(&liveness_info->in); + raviX_bitset_create(&liveness_info->out); + array_push(&liveness_data->lives, liveness_info); + } +} + +static void destroy_liveness_data(struct liveness_data *liveness_data) +{ + for (unsigned i = 0; i < liveness_data->lives.count; i++) { + raviX_bitset_create(&liveness_data->lives.data[i]->use); + raviX_bitset_create(&liveness_data->lives.data[i]->def); + raviX_bitset_create(&liveness_data->lives.data[i]->in); + raviX_bitset_create(&liveness_data->lives.data[i]->out); + } + array_clearmem(&liveness_data->lives); +} + +static inline struct liveness_info *get_liveness_info(struct liveness_data *liveness_data, nodeId_t id) +{ + return liveness_data->lives.data[id]; +} + +/* Life analysis */ +static int live_join_func(void *userdata, nodeId_t id, bool init) +{ + struct liveness_data *liveness_data = (struct liveness_data *)userdata; + struct liveness_info *liveness_info = get_liveness_info(liveness_data, id); + if (init) { + raviX_bitset_clear(&liveness_info->in); + return 0; + } else { + GraphNodeList *successors = raviX_successors(raviX_graph_node(liveness_data->proc->cfg, id)); + int changed = 0; + // out[n] = Union of in[s] where s in succ[n] + for (unsigned i = 0; i < raviX_node_list_size(successors); i++) { + nodeId_t succ_id = raviX_node_list_at(successors, i); + struct liveness_info *successor_liveness_info = get_liveness_info(liveness_data, succ_id); + changed |= + raviX_bitset_ior(&liveness_info->out, &liveness_info->out, &successor_liveness_info->in); + } + return changed; + } +} + +static int live_transfer_func(void *userdata, nodeId_t id) +{ + struct liveness_data *liveness_data = (struct liveness_data *)userdata; + struct liveness_info *liveness_info = get_liveness_info(liveness_data, id); + // out[n] = use[n] U (out[n] - def[n]) + // In bitset terms out[n] = use[n] | (out[n] & ~def[n]) + return raviX_bitset_ior_and_compl(&liveness_info->in, &liveness_info->use, &liveness_info->out, + &liveness_info->def); +} + +// TODO + +// Compute use/def sets of each node +// If a reg appears as the target of an instruction that's a def +// If a reg is used as operand then its a use +// Need to handle ranges / var args too +// Or should we restrict analysis to certain types of regs? + +// Right now we have disjoint sets for temps / locals - to do this efficiently we need a merged set of regs for each proc +// Liveness analysis is a backward data flow problem +// see calculate_func_cfg_live_info in mir_genc.c \ No newline at end of file diff --git a/ravicomp/src/dominator.c b/ravicomp/src/dominator.c new file mode 100644 index 0000000..7748dfd --- /dev/null +++ b/ravicomp/src/dominator.c @@ -0,0 +1,149 @@ +#include "dominator.h" + +#include "ravi_compiler.h" +#include "graph.h" + +#include + +/* + * The dominator tree construction algorithm is based on figure 9.24, + * chapter 9, p 532, of Engineering a Compiler. + * + * The algorithm is also described in the paper 'A Simple, Fast + * Dominance Algorithm' by Keith D. Cooper, Timothy J. Harvey and + * Ken Kennedy. + */ + +/* +Some terminology: + +DOM(b): A node n in the CFG dominates b if n lies on every path from the entry node of the CFG to b. + DOM9b) contains every node n that dominates b. + +IDOM(b): For a node b, the set IDOM(b) contains exactly one node, the immediate dominator of b. + If n is b's immediate dominator then every node in {DOM(b) - b} is also in DOM(n). + +The dominator tree algorithm is an optimised version of forward data flow solver. The +algorithm iterates until a fixed point is reached. The output of the algorithm is the IDOM +array that describes the dominator tree. +*/ + +struct dominator_tree { + Graph *g; + GraphNode **IDOM; /* IDOM[] - array of immediate dominators, one per node in the graph, indexed by node id */ + uint32_t N; /* sizeof IDOM */ +}; + +struct dominator_tree *raviX_new_dominator_tree(Graph *g) +{ + struct dominator_tree *state = (struct dominator_tree *)calloc(1, sizeof(struct dominator_tree)); + state->N = raviX_graph_size(g); + state->IDOM = (GraphNode **)calloc(state->N, sizeof(GraphNode *)); + state->g = g; + return state; +} + +void raviX_destroy_dominator_tree(struct dominator_tree *state) +{ + free(state->IDOM); + free(state); +} + +/* Finds nearest common ancestor */ +/* The algorithm starts at the two nodes whose sets are being intersected, and walks + * upward from each toward the root. By comparing the nodes with their RPO numbers + * the algorithm finds the common ancestor - the immediate dominator of i and j. + */ +static GraphNode *intersect(struct dominator_tree *state, GraphNode *i, GraphNode *j) +{ + GraphNode *finger1 = i; + GraphNode *finger2 = j; + while (finger1 != finger2) { + while (raviX_node_RPO(finger1) > raviX_node_RPO(finger2)) { + finger1 = state->IDOM[raviX_node_index(finger1)]; + assert(finger1); + } + while (raviX_node_RPO(finger2) > raviX_node_RPO(finger1)) { + finger2 = state->IDOM[raviX_node_index(finger2)]; + assert(finger2); + } + } + return finger1; +} + +/* Look for the first predecessor whose immediate dominator has been calculated. + * Because of the order in which this search occurs, we will always find at least 1 + * such predecessor. + */ +static GraphNode *find_first_predecessor_with_idom(struct dominator_tree *state, GraphNodeList *predlist) +{ + for (uint32_t i = 0; i < raviX_node_list_size(predlist); i++) { + nodeId_t id = raviX_node_list_at(predlist, i); + if (state->IDOM[id]) + return raviX_graph_node(state->g, id); + } + return NULL; +} + +/** + * Calculates the dominator tree. + * Before this is called the graph links should have been numbered in + * reverse post order. + */ +void raviX_calculate_dominator_tree(struct dominator_tree *state) +{ + /* + Some implementation details: + The graph and links reference nodes by node ids. + However the IDOM array references the node objects - i.e. + pointers to 'GraphNode'. So we have some conversion from node id + to the node, and vice versa at various points. + */ + + uint32_t N = raviX_graph_size(state->g); + GraphNode **nodes_in_reverse_postorder = raviX_graph_nodes_sorted_by_RPO(state->g, false); + for (uint32_t i = 0; i < state->N; i++) { + state->IDOM[i] = NULL; /* undefined - set to a invalid value */ + } + // Set IDom entry for root to itself + state->IDOM[ENTRY_BLOCK] = raviX_graph_node(state->g, ENTRY_BLOCK); + bool changed = true; + while (changed) { + changed = false; + // for all nodes, b, in reverse postorder (except root) + for (uint32_t i = 0; i < N; i++) { + GraphNode *b = nodes_in_reverse_postorder[i]; + nodeId_t bid = raviX_node_index(b); + if (bid == ENTRY_BLOCK) // skip root + continue; + GraphNodeList *predecessors = raviX_predecessors(b); // Predecessors of b + // NewIDom = first (processed) predecessor of b, pick one + GraphNode *firstpred = find_first_predecessor_with_idom(state, predecessors); + assert(firstpred != NULL); + GraphNode *NewIDom = firstpred; + // for all other predecessors, p, of b + for (uint32_t k = 0; k < raviX_node_list_size(predecessors); k++) { + nodeId_t pid = raviX_node_list_at(predecessors, k); + GraphNode *p = raviX_graph_node(state->g, pid); + if (p == firstpred) + continue; // all other predecessors + if (state->IDOM[raviX_node_index(p)] != NULL) { + // i.e. IDoms[p] calculated + NewIDom = intersect(state, p, NewIDom); + } + } + if (state->IDOM[bid] != NewIDom) { + state->IDOM[bid] = NewIDom; + changed = true; + } + } + } + free(nodes_in_reverse_postorder); +} + +void raviX_dominator_tree_output(struct dominator_tree *tree, FILE *fp) +{ + for (uint32_t i = 0; i < tree->N; i++) { + fprintf(stdout, "IDOM[%d] = %d\n", i, raviX_node_index(tree->IDOM[i])); + } +} \ No newline at end of file diff --git a/ravicomp/src/dominator.h b/ravicomp/src/dominator.h new file mode 100644 index 0000000..3d6b880 --- /dev/null +++ b/ravicomp/src/dominator.h @@ -0,0 +1,16 @@ +#ifndef ravicomp_DOMINATOR_H +#define ravicomp_DOMINATOR_H + +#include "graph.h" + +#include + +struct dominator_tree; + +struct dominator_tree *raviX_new_dominator_tree(Graph *g); +void raviX_calculate_dominator_tree(struct dominator_tree *state); +void raviX_destroy_dominator_tree(struct dominator_tree *state); +void raviX_dominator_tree_output(struct dominator_tree *tree, FILE *fp); + + +#endif \ No newline at end of file diff --git a/ravicomp/src/fnv_hash.c b/ravicomp/src/fnv_hash.c new file mode 100644 index 0000000..0d6a5bb --- /dev/null +++ b/ravicomp/src/fnv_hash.c @@ -0,0 +1,72 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + */ + +/* Quick FNV-1a hash implementation based on: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * FNV-1a may not be the best hash out there -- Jenkins's lookup3 is supposed + * to be quite good, and it may beat FNV. But FNV has the advantage that it + * involves almost no code. + */ +#include + +#include + +uint32_t +fnv1_hash_string(const char *key) +{ + uint32_t hash = 2166136261ul; + const uint8_t *bytes = (uint8_t *)key; + + while (*bytes != 0) { + hash ^= *bytes; + hash = hash * 0x01000193; + bytes++; + } + + return hash; +} + +uint32_t +fnv1_hash_data(const void *data, size_t size) +{ + uint32_t hash = 2166136261ul; + const uint8_t *bytes = (uint8_t *)data; + + while (size-- != 0) { + hash ^= *bytes; + hash = hash * 0x01000193; + bytes++; + } + + return hash; +} + +int +string_key_equals(const void *a, const void *b) +{ + return strcmp(a, b) == 0; +} diff --git a/ravicomp/src/fnv_hash.h b/ravicomp/src/fnv_hash.h new file mode 100644 index 0000000..d5c0e3b --- /dev/null +++ b/ravicomp/src/fnv_hash.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2009 Intel Corporation + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + */ + +/* Quick FNV-1 hash implementation based on: + * http://www.isthe.com/chongo/tech/comp/fnv/ + */ + +#ifndef ravicomp_FNV_HASH_H +#define ravicomp_FNV_HASH_H + +#include +#include + +uint32_t fnv1_hash_string(const char *key); +uint32_t fnv1_hash_data(const void *data, size_t size); + +int string_key_equals(const void *a, const void *b); + +#define hash_table_create_for_string() \ + raviX_hash_table_create((uint32_t (*)(const void *key))fnv1_hash_string, \ + string_key_equals) + +#define set_create_for_string() \ + set_create((uint32_t (*)(const void *key))fnv1_hash_string, \ + string_key_equals) + +#endif diff --git a/ravicomp/src/graph.c b/ravicomp/src/graph.c new file mode 100644 index 0000000..dca5b93 --- /dev/null +++ b/ravicomp/src/graph.c @@ -0,0 +1,389 @@ +#include "graph.h" + +#include +#include + +struct Graph { + unsigned allocated; /* tracks allocated size of nodes */ + GraphNode **nodes; /* array[allocated] indexed by nodeId_t, note user must check if nodes[i] != NULL */ + Allocator node_allocator; + nodeId_t entry, exit; /* entry and exit nodes */ + void *userdata; +}; + +struct GraphNodeLink { + nodeId_t node_index; + unsigned char edge_type; +}; +/* A node_list is simply a dynamic array/vector of node ids. + */ +struct GraphNodeList { + unsigned count : 16; /* in use */ + unsigned allocated : 16; /* tracks allocated size of links array */ + struct GraphNodeLink *links; /* array[allocated] of links, populated [0..count) */ +}; + +/* A node in the graph. For each node we maintain a list of predecessor nodes and successor nodes. + */ +struct GraphNode { + nodeId_t index; /* the id of the basic_block */ + uint32_t pre; /* preorder */ + uint32_t rpost; /* reverse postorder */ + GraphNodeList preds; /* predecessor nodes */ + GraphNodeList succs; /* successor nodes */ +}; + +static GraphNode *raviX_add_node(Graph *g, nodeId_t index); + +static void node_list_init(GraphNodeList *node_list) +{ + node_list->count = 0; + node_list->allocated = 0; + node_list->links = NULL; +} + +static void node_list_destroy(GraphNodeList *node_list) +{ + node_list->count = 0; + node_list->allocated = 0; + free(node_list->links); + node_list->links = NULL; +} + +/* Gets the offset of the node or -1 if not found */ +static int64_t node_list_search(const GraphNodeList *node_list, nodeId_t index) +{ + for (unsigned i = 0; i < node_list->count; i++) { + if (node_list->links[i].node_index == index) { + return i; + } + } + return -1; +} + +/* Gets the given node or NULL if node does not exist */ +static inline struct GraphNodeLink *node_list_get(const GraphNodeList *node_list, nodeId_t index) +{ + int64_t i = node_list_search(node_list, index); + if (i < 0) + return NULL; + return &node_list->links[i]; +} + +/* Grows the node list array */ +static void node_list_grow(GraphNodeList *node_list) +{ + unsigned new_size = node_list->allocated + 8u; + struct GraphNodeLink *edges = raviX_realloc_array(node_list->links, sizeof(struct GraphNodeLink), node_list->allocated, + new_size); + node_list->allocated = new_size; + node_list->links = edges; +} + +/* add an node to the node_list if not already added */ +static void node_list_add(GraphNodeList *node_list, nodeId_t index) +{ + if (node_list_search(node_list, index) != -1) + return; + if (node_list->count >= node_list->allocated) { + node_list_grow(node_list); + } + assert(node_list->count < node_list->allocated); + node_list->links[node_list->count].node_index = index; + assert(node_list->links[node_list->count].edge_type == 0); + + node_list->count++; +} + +/* delete an node from the node_list if it exists */ +static void node_list_delete(GraphNodeList *node_list, nodeId_t index) +{ + int64_t i = node_list_search(node_list, index); + if (i < 0) + return; + node_list->count = (unsigned) raviX_del_array_element(node_list->links, sizeof node_list->links[0], node_list->count, i, 1); +} + +uint32_t raviX_node_list_size(GraphNodeList *list) { return list->count; } + +nodeId_t raviX_node_list_at(GraphNodeList *list, uint32_t i) +{ + if (i < list->count) + return list->links[i].node_index; + assert(false); + return (nodeId_t)-1; +} + +Graph *raviX_init_graph(nodeId_t entry, nodeId_t exit, void *userdata) +{ + Graph *g = (Graph *)calloc(1, sizeof(Graph)); + g->allocated = 0; + g->nodes = NULL; + raviX_allocator_init(&g->node_allocator, "node_allocator", sizeof(GraphNode), sizeof(double), + sizeof(GraphNode) * 32); + raviX_add_node(g, entry); + raviX_add_node(g, exit); + g->entry = entry; + g->exit = exit; + g->userdata = userdata; + return g; +} + +static void raviX_destroy_node(GraphNode *n) +{ + if (n == NULL) + return; + node_list_destroy(&n->preds); + node_list_destroy(&n->succs); +} + +void raviX_destroy_graph(Graph *g) +{ + for (unsigned i = 0; i < g->allocated; i++) { + raviX_destroy_node(g->nodes[i]); + } + raviX_allocator_destroy(&g->node_allocator); + free(g->nodes); + free(g); +} + +static GraphNode *raviX_get_node(const Graph *g, nodeId_t index) +{ + if (index < g->allocated && g->nodes[index] != NULL) { + // already allocated + return g->nodes[index]; + } + return NULL; +} + +static void raviX_graph_grow(Graph *g, nodeId_t needed) +{ + unsigned new_size = needed + 8; + GraphNode **new_data = + raviX_realloc_array(g->nodes, sizeof(GraphNode*), g->allocated, new_size); + g->allocated = new_size; + g->nodes = new_data; +} + +static GraphNode *raviX_add_node(Graph *g, nodeId_t index) +{ + if (index < g->allocated && g->nodes[index] != NULL) { + // already allocated + return g->nodes[index]; + } + if (index >= g->allocated) { + raviX_graph_grow(g, index); + } + assert(index < g->allocated); + GraphNode *n = raviX_allocator_allocate(&g->node_allocator, 0); + assert(n->pre == 0); + assert(n->rpost == 0); + node_list_init(&n->preds); + node_list_init(&n->succs); + /* note that each node must have an index such that n = nodes[index] */ + n->index = index; + g->nodes[index] = n; + return n; +} + +void raviX_add_edge(Graph *g, nodeId_t from, nodeId_t to) +{ + GraphNode *prednode = raviX_add_node(g, from); + GraphNode *succnode = raviX_add_node(g, to); + + node_list_add(&prednode->succs, to); + node_list_add(&succnode->preds, from); +} + +void raviX_delete_edge(Graph *g, nodeId_t a, nodeId_t b) +{ + GraphNodeList *successors_of_a = raviX_successors(raviX_graph_node(g, a)); + GraphNodeList *predecessors_of_b = raviX_predecessors(raviX_graph_node(g, b)); + + assert(successors_of_a); + assert(predecessors_of_b); + + if (successors_of_a == NULL || predecessors_of_b == NULL) + return; + + node_list_delete(successors_of_a, b); + node_list_delete(predecessors_of_b, a); +} + +bool raviX_has_edge(Graph *g, nodeId_t from, nodeId_t to) +{ + GraphNode *prednode = raviX_get_node(g, from); + if (prednode == NULL) + return false; + return node_list_search(&prednode->succs, to) != -1; +} + +enum EdgeType raviX_get_edge_type(Graph *g, nodeId_t from, nodeId_t to) +{ + GraphNode *prednode = raviX_get_node(g, from); + if (prednode == NULL) + return EDGE_TYPE_UNCLASSIFIED; + struct GraphNodeLink *node_link = node_list_get(&prednode->succs, to); + if (node_link == NULL) + return EDGE_TYPE_UNCLASSIFIED; + return node_link->edge_type; +} + +void raviX_for_each_node(Graph *g, void (*callback)(void *arg, Graph *g, nodeId_t nodeid), void *arg) +{ + for (unsigned i = 0; i < g->allocated; i++) { + if (g->nodes[i] != NULL) { + callback(arg, g, g->nodes[i]->index); + } + } +} + +/* says how many nodes are in the graph */ +uint32_t raviX_graph_size(Graph *g) +{ + uint32_t count = 0; + for (unsigned i = 0; i < g->allocated; i++) { + if (g->nodes[i] != NULL) { + count++; + } + } + return count; +} + +uint32_t raviX_node_RPO(GraphNode *n) +{ + assert(n); + return n->rpost; +} +nodeId_t raviX_node_index(GraphNode *n) +{ + assert(n); + return n->index; +} +GraphNode *raviX_graph_node(Graph *g, nodeId_t index) +{ + assert(index < g->allocated); + return g->nodes[index]; +} +GraphNodeList *raviX_predecessors(GraphNode *n) +{ + assert(n); + return &n->preds; +} +GraphNodeList *raviX_successors(GraphNode *n) +{ + assert(n); + return &n->succs; +} + +struct classifier_state { + uint32_t preorder; + uint32_t rpostorder; +}; + +/* + * Do a recursive depth first search and mark nodes with pre/reverse post order sequence, as well + * as classify links. Algorithm from figure 3.2 in Building and Optimizing Compiler + */ +static void DFS_classify(Graph *g, GraphNode *n, struct classifier_state *state) +{ + assert(n); + + n->pre = state->preorder; + state->preorder++; + + /* For each successor node */ + for (unsigned i = 0; i < n->succs.count; i++) { + struct GraphNodeLink *E = &n->succs.links[i]; + GraphNode *S = g->nodes[E->node_index]; + if (S->pre == 0) { + E->edge_type = EDGE_TYPE_TREE; + DFS_classify(g, S, state); + } else if (S->rpost == 0) { + E->edge_type = EDGE_TYPE_BACKWARD; + } else if (n->pre < S->pre) { + E->edge_type = EDGE_TYPE_FORWARD; + } else { + E->edge_type = EDGE_TYPE_CROSS; + } + } + + n->rpost = state->rpostorder; + state->rpostorder--; +} + +/* +Classify links in the graph. Implements algorithm described in +figure 3.2 - Building an Optimizing Compiler. This algorithm is also implemented +in MIR. +*/ +void raviX_classify_edges(Graph *g) +{ + uint32_t N = raviX_graph_size(g); + if (N == 0) + return; + + struct classifier_state state = {.preorder = 1, .rpostorder = N}; + + /* reset all data we will be computing */ + for (unsigned i = 0; i < g->allocated; i++) { + if (g->nodes[i] != NULL) { + g->nodes[i]->pre = 0; + g->nodes[i]->rpost = 0; + for (unsigned i = 0; i < g->nodes[i]->succs.count; i++) { + struct GraphNodeLink *E = &g->nodes[i]->succs.links[i]; + E->edge_type = 0; + } + } + } + + DFS_classify(g, g->nodes[g->entry], &state); +} + +static int rpost_cmp(const void *a1, const void *a2) +{ + const GraphNode *n1 = *((const GraphNode **)a1); + const GraphNode *n2 = *((const GraphNode **)a2); + int result = n1->rpost - n2->rpost; + return result; +} + +static int post_cmp(const void *a1, const void *a2) { return -rpost_cmp(a1, a2); } + +void raviX_sort_nodes_by_RPO(GraphNode **nodes, size_t count, bool forward) +{ + qsort(nodes, count, sizeof(GraphNode *), forward ? post_cmp : rpost_cmp); +} + +GraphNode **raviX_graph_nodes_sorted_by_RPO(Graph *g, bool forward) +{ + uint32_t N = raviX_graph_size(g); + GraphNode **nodes = calloc(N, sizeof(GraphNode *)); + unsigned j = 0; + for (unsigned i = 0; i < g->allocated; i++) { + if (g->nodes[i] == NULL) + continue; + nodes[j++] = g->nodes[i]; + } + assert(j == N); + raviX_sort_nodes_by_RPO(nodes, N, forward); + return nodes; +} + +static void draw_node(void *arg, Graph *g, uint32_t nodeid) +{ + FILE *fp = (FILE *)arg; + GraphNodeList *successors = raviX_successors(raviX_graph_node(g, nodeid)); + if (!successors) + return; + for (unsigned i = 0; i < raviX_node_list_size(successors); i++) { + fprintf(fp, "L%d -> L%d\n", nodeid, raviX_node_list_at(successors, i)); + } +} + +void raviX_draw_graph(Graph *g, FILE *fp) +{ + fprintf(fp, "digraph {\n"); + raviX_for_each_node(g, draw_node, fp); + fprintf(fp, "}\n"); +} diff --git a/ravicomp/src/graph.h b/ravicomp/src/graph.h new file mode 100644 index 0000000..26ca9ae --- /dev/null +++ b/ravicomp/src/graph.h @@ -0,0 +1,99 @@ +#ifndef ravicomp_GRAPH_H +#define ravicomp_GRAPH_H + +#include "allocate.h" +#include "common.h" + +#include +#include + +/* + * Various graph manipulation routines. + * The graph is designed to manage nodes that are just integer ids. + * Node ids range from [0..n) - hence one can simply represent nodes as arrays. + * + * The graph structure does not care what the node represents and + * knows nothing about it. The benefit of this approach is that we can make + * the graph algorithms reusable. There may be some performance cost as we + * need to map node ids to nodes. + * + * The assumption here is that each node corresponds to a basic block in + * the program intermediate code. And each basic block is identified by a node + * id which can be used to construct the control flow graph. + */ + +/* nodeId_t is declared elsewhere */ +typedef struct Graph Graph; +typedef struct GraphNode GraphNode; +typedef struct GraphNodeList GraphNodeList; +enum EdgeType { + EDGE_TYPE_UNCLASSIFIED = 0, + EDGE_TYPE_TREE = 1, + EDGE_TYPE_BACKWARD = 2, + EDGE_TYPE_FORWARD = 4, + EDGE_TYPE_CROSS = 8 +}; + + +/* Initialize the graph data structure and associate some userdata with it. */ +Graph *raviX_init_graph(nodeId_t entry, nodeId_t exit, void *userdata); +/* Destroy the graph data structure */ +void raviX_destroy_graph(Graph *g); + +/* Add an edge from one node a to b. Both nodes a and b will be implicitly added + * to the graph if they do not already exist. + */ +void raviX_add_edge(Graph *g, nodeId_t a, nodeId_t b); +/* Check if an edge exists from one node a to b */ +bool raviX_has_edge(Graph *g, nodeId_t a, nodeId_t b); +/* Delete an edge from a to b */ +void raviX_delete_edge(Graph *g, nodeId_t a, nodeId_t b); +/* Get the edge classification for edge from a to b; this is only available if graph has been + * analyzed for edges. */ +enum EdgeType raviX_get_edge_type(Graph *g, nodeId_t a, nodeId_t b); + +/* Get node identified by index */ +GraphNode *raviX_graph_node(Graph *g, nodeId_t index); +/* Get the RPO - reverse post order index of the node */ +uint32_t raviX_node_RPO(GraphNode *n); +/* Get the node's id */ +nodeId_t raviX_node_index(GraphNode *n); +/* Get list of predecessors */ +GraphNodeList *raviX_predecessors(GraphNode *n); +/* Get list of successors */ +GraphNodeList *raviX_successors(GraphNode *n); + +/* Number of entries in the node_list */ +uint32_t raviX_node_list_size(GraphNodeList *list); +/* Get the nodeId at given node_link position */ +nodeId_t raviX_node_list_at(GraphNodeList *list, uint32_t i); + +void raviX_for_each_node(Graph *g, void (*callback)(void *arg, Graph *g, nodeId_t nodeid), void *arg); + +/* + * Classifies links in the graph and also computes the + * reverse post order value. + */ +void raviX_classify_edges(Graph *g); +/* + * Returns a sorted array (allocated). + * Sorted by reverse postorder value. + * If forward=true then + * it will be the opposite direction, so to get reverse postorder, + * set forward=false. + * You must deallocate the array when done. + * The array size will be equal to raviX_graph_size(g). + * Before attempting to sort, you must have called + * raviX_classify_edges(g). + */ +GraphNode **raviX_graph_nodes_sorted_by_RPO(Graph *g, bool forward); + +void raviX_sort_nodes_by_RPO(GraphNode **nodes, size_t count, bool forward); + +/* says how many nodes are in the graph */ +uint32_t raviX_graph_size(Graph *g); +/* Generates GraphViz (dot) output */ +void raviX_draw_graph(Graph *g, FILE *fp); + + +#endif \ No newline at end of file diff --git a/ravicomp/src/hash_table.c b/ravicomp/src/hash_table.c new file mode 100644 index 0000000..192b5b5 --- /dev/null +++ b/ravicomp/src/hash_table.c @@ -0,0 +1,427 @@ +/* + * Copyright © 2009 Intel Corporation + * Copyright © 1988-2004 Keith Packard and Bart Massey. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Except as contained in this notice, the names of the authors + * or their institutions shall not be used in advertising or + * otherwise to promote the sale, use or other dealings in this + * Software without prior written authorization from the + * authors. + * + * Authors: + * Eric Anholt + * Keith Packard + */ + +#include +#include + +#include + +#define ARRAY_SIZE(array) ((int)(sizeof(array) / sizeof(array[0]))) + +/* + * From Knuth -- a good choice for hash/rehash values is p, p-2 where + * p and p-2 are both prime. These tables are sized to have an extra 10% + * free to avoid exponential performance degradation as the hash table fills + */ + +static const uint32_t deleted_key_value; +static const void *deleted_key = &deleted_key_value; + +static const struct { + uint32_t max_entries, size, rehash; +} hash_sizes[] = { + { 2, 5, 3 }, + { 4, 7, 5 }, + { 8, 13, 11 }, + { 16, 19, 17 }, + { 32, 43, 41 }, + { 64, 73, 71 }, + { 128, 151, 149 }, + { 256, 283, 281 }, + { 512, 571, 569 }, + { 1024, 1153, 1151 }, + { 2048, 2269, 2267 }, + { 4096, 4519, 4517 }, + { 8192, 9013, 9011 }, + { 16384, 18043, 18041 }, + { 32768, 36109, 36107 }, + { 65536, 72091, 72089 }, + { 131072, 144409, 144407 }, + { 262144, 288361, 288359 }, + { 524288, 576883, 576881 }, + { 1048576, 1153459, 1153457 }, + { 2097152, 2307163, 2307161 }, + { 4194304, 4613893, 4613891 }, + { 8388608, 9227641, 9227639 }, + { 16777216, 18455029, 18455027 }, + { 33554432, 36911011, 36911009 }, + { 67108864, 73819861, 73819859 }, + { 134217728, 147639589, 147639587 }, + { 268435456, 295279081, 295279079 }, + { 536870912, 590559793, 590559791 }, + { 1073741824, 1181116273, 1181116271}, + { 2147483648ul, 2362232233ul, 2362232231ul} +}; + +static int +entry_is_free(const HashEntry *entry) +{ + return entry->key == NULL; +} + +static int +entry_is_deleted(const HashEntry *entry) +{ + return entry->key == deleted_key; +} + +static int +entry_is_present(const HashEntry *entry) +{ + return entry->key != NULL && entry->key != deleted_key; +} + +HashTable * +raviX_hash_table_create(uint32_t (*hash_function)(const void *key), + int (*key_equals_function)(const void *a, + const void *b)) +{ + HashTable *ht; + + ht = malloc(sizeof(*ht)); + if (ht == NULL) + return NULL; + + ht->size_index = 0; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->hash_function = hash_function; + ht->key_equals_function = key_equals_function; + ht->table = calloc(ht->size, sizeof(*ht->table)); + ht->entries = 0; + ht->deleted_entries = 0; + + if (ht->table == NULL) { + free(ht); + return NULL; + } + + return ht; +} + +/** + * Frees the given hash table. + * + * If delete_function is passed, it gets called on each entry present before + * freeing. + */ +void +raviX_hash_table_destroy(HashTable *ht, + void (*delete_function)(HashEntry *entry)) +{ + if (!ht) + return; + + if (delete_function) { + HashEntry *entry; + + hash_table_foreach(ht, entry) { + delete_function(entry); + } + } + free(ht->table); + free(ht); +} + +/** + * Finds a hash table entry with the given key. + * + * Returns NULL if no entry is found. Note that the data pointer may be + * modified by the user. + */ +HashEntry * +raviX_hash_table_search(HashTable *ht, const void *key) +{ + uint32_t hash = ht->hash_function(key); + + return raviX_hash_table_search_pre_hashed(ht, hash, key); +} + +/** + * Finds a hash table entry with the given key and hash of that key. + * + * Returns NULL if no entry is found. Note that the data pointer may be + * modified by the user. + */ +HashEntry * +raviX_hash_table_search_pre_hashed(HashTable *ht, uint32_t hash, + const void *key) +{ + uint32_t start_hash_address = hash % ht->size; + uint32_t hash_address = start_hash_address; + + do { + uint32_t double_hash; + + HashEntry *entry = ht->table + hash_address; + + if (entry_is_free(entry)) { + return NULL; + } else if (entry_is_present(entry) && entry->hash == hash) { + if (ht->key_equals_function(key, entry->key)) { + return entry; + } + } + + double_hash = 1 + hash % ht->rehash; + + hash_address = (hash_address + double_hash) % ht->size; + } while (hash_address != start_hash_address); + + return NULL; +} + +static void +hash_table_rehash(HashTable *ht, int new_size_index) +{ + HashTable old_ht; + HashEntry *table, *entry; + + if (new_size_index >= ARRAY_SIZE(hash_sizes)) + return; + + table = calloc(hash_sizes[new_size_index].size, sizeof(*ht->table)); + if (table == NULL) + return; + + old_ht = *ht; + + ht->table = table; + ht->size_index = new_size_index; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->entries = 0; + ht->deleted_entries = 0; + + hash_table_foreach(&old_ht, entry) { + raviX_hash_table_insert_pre_hashed(ht, entry->hash, + entry->key, entry->data); + } + + free(old_ht.table); +} + +/** + * Inserts the key into the table. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ +HashEntry * +raviX_hash_table_insert(HashTable *ht, const void *key, void *data) +{ + uint32_t hash = ht->hash_function(key); + + /* Make sure nobody tries to add one of the magic values as a + * key. If you need to do so, either do so in a wrapper, or + * store keys with the magic values separately in the struct + * hash_table. + */ + assert(key != NULL); + + return raviX_hash_table_insert_pre_hashed(ht, hash, key, data); +} + +/** + * Inserts the key with the given hash into the table. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ +HashEntry * +raviX_hash_table_insert_pre_hashed(HashTable *ht, uint32_t hash, + const void *key, void *data) +{ + uint32_t start_hash_address, hash_address; + HashEntry *available_entry = NULL; + + if (ht->entries >= ht->max_entries) { + hash_table_rehash(ht, ht->size_index + 1); + } else if (ht->deleted_entries + ht->entries >= ht->max_entries) { + hash_table_rehash(ht, ht->size_index); + } + + start_hash_address = hash % ht->size; + hash_address = start_hash_address; + do { + HashEntry *entry = ht->table + hash_address; + uint32_t double_hash; + + if (!entry_is_present(entry)) { + /* Stash the first available entry we find */ + if (available_entry == NULL) + available_entry = entry; + if (entry_is_free(entry)) + break; + } + + /* Implement replacement when another insert happens + * with a matching key. This is a relatively common + * feature of hash tables, with the alternative + * generally being "insert the new value as well, and + * return it first when the key is searched for". + * + * Note that the hash table doesn't have a delete + * callback. If freeing of old data pointers is + * required to avoid memory leaks, perform a search + * before inserting. + */ + if (!entry_is_deleted(entry) && + entry->hash == hash && + ht->key_equals_function(key, entry->key)) { + entry->key = key; + entry->data = data; + return entry; + } + + + double_hash = 1 + hash % ht->rehash; + + hash_address = (hash_address + double_hash) % ht->size; + } while (hash_address != start_hash_address); + + if (available_entry) { + if (entry_is_deleted(available_entry)) + ht->deleted_entries--; + available_entry->hash = hash; + available_entry->key = key; + available_entry->data = data; + ht->entries++; + return available_entry; + } + + /* We could hit here if a required resize failed. An unchecked-malloc + * application could ignore this result. + */ + return NULL; +} + +/** + * This function searches for, and removes an entry from the hash table. + * + * If the caller has previously found a HashEntry pointer, + * (from calling hash_table_search or remembering it from + * hash_table_insert), then hash_table_remove_entry can be called + * instead to avoid an extra search. + */ +void +raviX_hash_table_remove(HashTable *ht, const void *key) +{ + HashEntry *entry; + + entry = raviX_hash_table_search(ht, key); + + raviX_hash_table_remove_entry(ht, entry); +} + +/** + * This function deletes the given hash table entry. + * + * Note that deletion doesn't otherwise modify the table, so an iteration over + * the table deleting entries is safe. + */ +void +raviX_hash_table_remove_entry(HashTable *ht, HashEntry *entry) +{ + if (!entry) + return; + + entry->key = deleted_key; + ht->entries--; + ht->deleted_entries++; +} + +/** + * This function is an iterator over the hash table. + * + * Pass in NULL for the first entry, as in the start of a for loop. Note that + * an iteration over the table is O(table_size) not O(entries). + */ +HashEntry * +raviX_hash_table_next_entry(HashTable *ht, HashEntry *entry) +{ + if (entry == NULL) + entry = ht->table; + else + entry = entry + 1; + + for (; entry != ht->table + ht->size; entry++) { + if (entry_is_present(entry)) { + return entry; + } + } + + return NULL; +} + +#if 0 +#ifndef _WIN32 +/** + * Returns a random entry from the hash table. + * + * This may be useful in implementing random replacement (as opposed + * to just removing everything) in caches based on this hash table + * implementation. @predicate may be used to filter entries, or may + * be set to NULL for no filtering. + */ +HashEntry * +hash_table_random_entry(HashTable *ht, + int (*predicate)(HashEntry *entry)) +{ + HashEntry *entry; + uint32_t i = random() % ht->size; + + if (ht->entries == 0) + return NULL; + + for (entry = ht->table + i; entry != ht->table + ht->size; entry++) { + if (entry_is_present(entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + for (entry = ht->table; entry != ht->table + i; entry++) { + if (entry_is_present(entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + return NULL; +} +#endif +#endif \ No newline at end of file diff --git a/ravicomp/src/hash_table.h b/ravicomp/src/hash_table.h new file mode 100644 index 0000000..0ed5810 --- /dev/null +++ b/ravicomp/src/hash_table.h @@ -0,0 +1,109 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#ifndef ravicomp_HASH_TABLE_H +#define ravicomp_HASH_TABLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct HashEntry { + uint32_t hash; + const void *key; + void *data; +} HashEntry; + +typedef struct HashTable { + HashEntry *table; + uint32_t (*hash_function)(const void *key); + int (*key_equals_function)(const void *a, const void *b); + uint32_t size; + uint32_t rehash; + uint32_t max_entries; + uint32_t size_index; + uint32_t entries; + uint32_t deleted_entries; +} HashTable; + +HashTable * +raviX_hash_table_create(uint32_t (*hash_function)(const void *key), + int (*key_equals_function)(const void *a, + const void *b)); +void +raviX_hash_table_destroy(HashTable *ht, + void (*delete_function)(HashEntry *entry)); + +HashEntry * +raviX_hash_table_insert(HashTable *ht, const void *key, void *data); + +HashEntry * +raviX_hash_table_search(HashTable *ht, const void *key); + +void +raviX_hash_table_remove(HashTable *ht, const void *key); + +void +raviX_hash_table_remove_entry(HashTable *ht, HashEntry *entry); + +HashEntry * +raviX_hash_table_next_entry(HashTable *ht, + HashEntry *entry); + +//HashEntry * +//hash_table_random_entry(HashTable *ht, +// int (*predicate)(HashEntry *entry)); + +/** + * This foreach function is safe against deletion (which just replaces + * an entry's data with the deleted marker), but not against insertion + * (which may rehash the table, making entry a dangling pointer). + */ +#define hash_table_foreach(ht, entry) \ + for (entry = raviX_hash_table_next_entry(ht, NULL); \ + entry != NULL; \ + entry = raviX_hash_table_next_entry(ht, entry)) + +/* Alternate interfaces to reduce repeated calls to hash function. */ +HashEntry * +raviX_hash_table_search_pre_hashed(HashTable *ht, + uint32_t hash, + const void *key); + +HashEntry * +raviX_hash_table_insert_pre_hashed(HashTable *ht, + uint32_t hash, + const void *key, void *data); + + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif diff --git a/ravicomp/src/lexer.c b/ravicomp/src/lexer.c new file mode 100644 index 0000000..7d41f71 --- /dev/null +++ b/ravicomp/src/lexer.c @@ -0,0 +1,990 @@ +/** + * The lexer is basically a hacked version of Lua 5.3 lexer. + * Copyright (C) 1994-2019 Lua.org, PUC-Rio. + */ + +#include "fnv_hash.h" +#include "parser.h" + +#include +#include +#include + +enum { EOZ = -1 }; /* end of stream */ +#define cast(t, v) ((t)v) +#define cast_int(v) cast(int, v) +#define cast_uchar(c) cast(unsigned char, c) +#define cast_num(n) cast(lua_Number, n) +#define l_castU2S(i) ((lua_Integer)(i)) +static inline int zgetc(LexerState *z) { return z->n-- > 0 ? cast_uchar(*z->p++) : EOZ; } +static inline void next(LexerState *ls) { ls->current = zgetc(ls); } +static inline bool currIsNewline(LexerState *ls) { return ls->current == '\n' || ls->current == '\r'; } + +static inline char lua_getlocaledecpoint(void) { return localeconv()->decimal_point[0]; } + +#define ARRAY_SIZE(array) ((int)(sizeof(array) / sizeof(array[0]))) +/*Note: Following array was generated using utils/tokenstr.h */ +static const char *const luaX_tokens[] = { + "and", + "break", + "do", + "else", + "elseif", + "end", + "false", + "for", + "function", + "goto", + "if", + "in", + "local", + "defer", + "nil", + "not", + "or", + "repeat", + "return", + "then", + "true", + "until", + "while", + "/" + "/", + "..", + "...", + "==", + ">=", + "<=", + "~=", + "<<", + ">>", + "::", + "@integer", + "@number", + "@integer[]", + "@number[]", + "@table", + "@string", + "@closure", + "", + "", + "", + "", + "", +}; +/* Says whether the given string represents a Lua/Ravi keyword i.e. reserved word */ +static inline int is_reserved(const StringObject *s) { return s->reserved; } + +enum { ALPHABIT = 0, DIGITBIT = 1, PRINTBIT = 2, SPACEBIT = 3, XDIGITBIT = 4 }; + +#define MASK(B) (1 << (B)) + +static const lu_byte luai_ctype_[UCHAR_MAX + 2] = { + 0x00, /* EOZ */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0. */ + 0x00, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* 2. */ + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, /* 3. */ + 0x16, 0x16, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x05, /* 4. */ + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, /* 5. */ + 0x05, 0x05, 0x05, 0x04, 0x04, 0x04, 0x04, 0x05, 0x04, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x05, /* 6. */ + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, /* 7. */ + 0x05, 0x05, 0x05, 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 9. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* a. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* b. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* c. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* d. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* e. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* f. */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +/* +** add 1 to char to allow index -1 (EOZ) +*/ +#define testprop(c, p) (luai_ctype_[(c) + 1] & (p)) + +/* +** 'lalpha' (Lua alphabetic) and 'lalnum' (Lua alphanumeric) both include '_' +*/ +static inline bool lislalpha(int c) { return testprop(c, MASK(ALPHABIT)); } +static inline bool lislalnum(int c) { return testprop(c, (MASK(ALPHABIT) | MASK(DIGITBIT))); } +static inline bool lisdigit(int c) { return testprop(c, MASK(DIGITBIT)); } +static inline bool lisspace(int c) { return testprop(c, MASK(SPACEBIT)); } +// static inline bool lisprint(int c) { return testprop(c, MASK(PRINTBIT)); } +static inline bool lisxdigit(int c) { return testprop(c, MASK(XDIGITBIT)); } + +/* +** this 'ltolower' only works for alphabetic characters +*/ +static inline int ltolower(int c) { return ((c) | ('A' ^ 'a')); } + +/* +Creates a new string object. string objects are interned in a hash set. +If the string matches a keyword then the reserved attribute will be set to the token id associated +with the keyword else this attribute will be -1. The hash value of the string is stored the 'hash' +attribute. Note that we need to allow strings that have embedded 0 character hence the length +is explicit. But all tokens and reserved keywords are expected to be standard C strings. +*/ +const StringObject *raviX_create_string(CompilerState *compiler_state, const char *input, uint32_t len) +{ + StringObject temp = {.len = len, .str = input, .hash = fnv1_hash_data(input, len), .reserved = -1}; + SetEntry *entry = raviX_set_search_pre_hashed(compiler_state->strings, temp.hash, &temp); + if (entry != NULL) + /* found the string */ + return (StringObject *)entry->key; + else { + StringObject *new_string = raviX_allocator_allocate(&compiler_state->string_object_allocator, 0); + char *s = raviX_allocator_allocate(&compiler_state->string_allocator, len + 1); /* allow for 0 terminator */ + memcpy(s, input, len); + s[len] = 0; /* 0 terminate string, however string may contain embedded 0 characters */ + new_string->str = s; + new_string->len = len; + new_string->hash = temp.hash; + new_string->reserved = -1; + /* Check if this is a keyword, linear search is okay as we do this only when we first + * encounter a keyword + */ + for (int i = 0; i < ARRAY_SIZE(luaX_tokens); i++) { + if (lislalpha(luaX_tokens[i][0]) || luaX_tokens[i][0] == '@') { + if (strcmp(luaX_tokens[i], s) == 0) { + new_string->reserved = i; /* save index of the keyword */ + break; + } + } + } + raviX_set_add_pre_hashed(compiler_state->strings, temp.hash, new_string); + return new_string; + } +} + +#define lua_str2number(s, p) ((lua_Number)strtod((s), (p))) +static void save(LexerState *ls, int c); + +void raviX_token2str(int token, TextBuffer *mb) +{ + if (token < FIRST_RESERVED) { /* single-byte symbols? */ + assert(token == cast_uchar(token)); + raviX_buffer_add_fstring(mb, "'%c'", token); + } else { + const char *s = luaX_tokens[token - FIRST_RESERVED]; + if (token < TOK_EOS) /* fixed format - reserved keywords */ + raviX_buffer_add_fstring(mb, "'%s'", s); + else /* names, strings, and numerals - note that @ is covered here */ + raviX_buffer_add_string(mb, s); + } +} + +static void txtToken(LexerState *ls, int token) +{ + switch (token) { + case TOK_NAME: + case TOK_STRING: + case TOK_FLT: + case TOK_INT: + save(ls, '\0'); + raviX_buffer_add_fstring(&ls->container->error_message, "'%s'", raviX_buffer_data(ls->buff)); + break; + default: + raviX_token2str(token, &ls->container->error_message); + } +} +static void lexerror(LexerState *ls, const char *msg, int token) +{ + raviX_buffer_add_fstring(&ls->container->error_message, "%s(%d): %s", ls->source, ls->linenumber, msg); + if (token) { + raviX_buffer_add_string(&ls->container->error_message, " near "); + txtToken(ls, token); + } + longjmp(ls->container->env, 1); +} + +void raviX_syntaxerror(LexerState *ls, const char *msg) { lexerror(ls, msg, ls->t.token); } + +static void save(LexerState *ls, int c) +{ + TextBuffer *b = ls->buff; + if (raviX_buffer_len(b) + 1 > raviX_buffer_size(b)) { + size_t newsize; + if (raviX_buffer_size(b) >= INT_MAX / 2) + lexerror(ls, "lexical element too long", 0); + size_t oldsize = raviX_buffer_size(b); + if (oldsize == 0) + newsize = 32; + else + newsize = oldsize * 2; + raviX_buffer_resize(b, newsize); + } + raviX_buffer_addc(b, c); +} + +static inline void save_and_next(LexerState *ls) +{ + save(ls, ls->current); + next(ls); +} + +/* +** creates a new interned string. +*/ +static const StringObject *luaX_newstring(LexerState *ls, const char *str, uint32_t l) +{ + return raviX_create_string(ls->container, str, l); +} + +/* +** increment line number and skips newline sequence (any of +** \n, \r, \n\r, or \r\n) +*/ +static void inclinenumber(LexerState *ls) +{ + int old = ls->current; + assert(currIsNewline(ls)); + next(ls); /* skip '\n' or '\r' */ + if (currIsNewline(ls) && ls->current != old) + next(ls); /* skip '\n\r' or '\r\n' */ + if (++ls->linenumber >= INT_MAX) + lexerror(ls, "chunk has too many lines", 0); +} + +LexerState *raviX_init_lexer(CompilerState *container, const char *buf, size_t buflen, + const char *source) +{ + LexerState *ls = (LexerState *)calloc(1, sizeof(LexerState)); + ls->container = container; + ls->t.token = 0; + ls->buf = buf; + ls->bufsize = buflen; + ls->n = ls->bufsize; + ls->p = ls->buf; + ls->current = zgetc(ls); + ls->lookahead.token = TOK_EOS; /* no look-ahead token */ + ls->linenumber = 1; + ls->lastline = 1; + ls->source = source; + ls->envn = raviX_create_string(ls->container, LUA_ENV, (uint32_t)strlen(LUA_ENV))->str; /* get env name */ + ls->buff = &container->buff; + for (int i = 0; i < NUM_RESERVED; i++) { + raviX_create_string(ls->container, luaX_tokens[i], (uint32_t)strlen(luaX_tokens[i])); + } + return ls; +} + +void raviX_destroy_lexer(LexerState *ls) +{ + if (ls == NULL) + return; + free(ls); +} + +const LexerInfo *raviX_get_lexer_info(LexerState *ls) { return (LexerInfo *)ls; } + +/* +** ======================================================= +** LEXICAL ANALYZER +** ======================================================= +*/ + +static int check_next1(LexerState *ls, int c) +{ + if (ls->current == c) { + next(ls); + return 1; + } else + return 0; +} + +static int check_save_next1(LexerState *ls, int c) +{ + if (ls->current == c) { + save_and_next(ls); + return 1; + } else + return 0; +} + +/* +** Check whether current char is in set 'set' (with two chars) and +** saves it +*/ +static int check_next2(LexerState *ls, const char *set) +{ + assert(set[2] == '\0'); + if (ls->current == set[0] || ls->current == set[1]) { + save_and_next(ls); + return 1; + } else + return 0; +} + +static int luaO_hexavalue(int c) +{ + if (lisdigit(c)) + return c - '0'; + else + return (ltolower(c) - 'a') + 10; +} + +static int isneg(const char **s) +{ + if (**s == '-') { + (*s)++; + return 1; + } else if (**s == '+') + (*s)++; + return 0; +} + +/* +** {================================================================== +** Lua's implementation for 'lua_strx2number' +** =================================================================== +*/ + +#if !defined(lua_strx2number) + +/* maximum number of significant digits to read (to avoid overflows + even with single floats) */ +#define MAXSIGDIG 30 + +/* +** convert an hexadecimal numeric string to a number, following +** C99 specification for 'strtod' +*/ +static lua_Number lua_strx2number(const char *s, char **endptr) +{ + int dot = lua_getlocaledecpoint(); + lua_Number r = 0.0; /* result (accumulator) */ + int sigdig = 0; /* number of significant digits */ + int nosigdig = 0; /* number of non-significant digits */ + int e = 0; /* exponent correction */ + int neg; /* 1 if number is negative */ + int hasdot = 0; /* true after seen a dot */ + *endptr = (char *)s; /* nothing is valid yet */ + while (lisspace(cast_uchar(*s))) + s++; /* skip initial spaces */ + neg = isneg(&s); /* check signal */ + if (!(*s == '0' && (*(s + 1) == 'x' || *(s + 1) == 'X'))) /* check '0x' */ + return 0.0; /* invalid format (no '0x') */ + for (s += 2;; s++) { /* skip '0x' and read numeral */ + if (*s == dot) { + if (hasdot) + break; /* second dot? stop loop */ + else + hasdot = 1; + } else if (lisxdigit(cast_uchar(*s))) { + if (sigdig == 0 && *s == '0') /* non-significant digit (zero)? */ + nosigdig++; + else if (++sigdig <= MAXSIGDIG) /* can read it without overflow? */ + r = (r * cast_num(16.0)) + luaO_hexavalue(*s); + else + e++; /* too many digits; ignore, but still count for exponent */ + if (hasdot) + e--; /* decimal digit? correct exponent */ + } else + break; /* neither a dot nor a digit */ + } + if (nosigdig + sigdig == 0) /* no digits? */ + return 0.0; /* invalid format */ + *endptr = (char *)s; /* valid up to here */ + e *= 4; /* each digit multiplies/divides value by 2^4 */ + if (*s == 'p' || *s == 'P') { /* exponent part? */ + int exp1 = 0; /* exponent value */ + int neg1; /* exponent signal */ + s++; /* skip 'p' */ + neg1 = isneg(&s); /* signal */ + if (!lisdigit(cast_uchar(*s))) + return 0.0; /* invalid; must have at least one digit */ + while (lisdigit(cast_uchar(*s))) /* read exponent */ + exp1 = exp1 * 10 + *(s++) - '0'; + if (neg1) + exp1 = -exp1; + e += exp1; + *endptr = (char *)s; /* valid up to here */ + } + if (neg) + r = -r; + return (lua_Number)ldexp(r, e); +} + +#endif +/* }====================================================== */ + +/* maximum length of a numeral */ +#if !defined(L_MAXLENNUM) +#define L_MAXLENNUM 200 +#endif + +static const char *l_str2dloc(const char *s, lua_Number *result, int mode) +{ + char *endptr; + *result = (mode == 'x') ? lua_strx2number(s, &endptr) /* try to convert */ + : lua_str2number(s, &endptr); + if (endptr == s) + return NULL; /* nothing recognized? */ + while (lisspace(cast_uchar(*endptr))) + endptr++; /* skip trailing spaces */ + return (*endptr == '\0') ? endptr : NULL; /* OK if no trailing characters */ +} + +/* +** Convert string 's' to a Lua number (put in 'result'). Return NULL +** on fail or the address of the ending '\0' on success. +** 'pmode' points to (and 'mode' contains) special things in the string: +** - 'x'/'X' means an hexadecimal numeral +** - 'n'/'N' means 'inf' or 'nan' (which should be rejected) +** - '.' just optimizes the search for the common case (nothing special) +** This function accepts both the current locale or a dot as the radix +** mark. If the convertion fails, it may mean number has a dot but +** locale accepts something else. In that case, the code copies 's' +** to a buffer (because 's' is read-only), changes the dot to the +** current locale radix mark, and tries to convert again. +*/ +static const char *l_str2d(const char *s, lua_Number *result) +{ + const char *endptr; + const char *pmode = strpbrk(s, ".xXnN"); + int mode = pmode ? ltolower(cast_uchar(*pmode)) : 0; + if (mode == 'n') /* reject 'inf' and 'nan' */ + return NULL; + endptr = l_str2dloc(s, result, mode); /* try to convert */ + if (endptr == NULL) { /* failed? may be a different locale */ + char buff[L_MAXLENNUM + 1]; + const char *pdot = strchr(s, '.'); + if (strlen(s) > L_MAXLENNUM || pdot == NULL) + return NULL; /* string too long or no dot; fail */ + strcpy(buff, s); /* copy string to buffer */ + buff[pdot - s] = lua_getlocaledecpoint(); /* correct decimal point */ + endptr = l_str2dloc(buff, result, mode); /* try again */ + if (endptr != NULL) + endptr = s + (endptr - buff); /* make relative to 's' */ + } + return endptr; +} + +#define MAXBY10 cast(lua_Unsigned, LUA_MAXINTEGER / 10) +#define MAXLASTD cast_int(LUA_MAXINTEGER % 10) + +static const char *l_str2int(const char *s, lua_Integer *result) +{ + lua_Unsigned a = 0; + int empty = 1; + int neg; + while (lisspace(cast_uchar(*s))) + s++; /* skip initial spaces */ + neg = isneg(&s); + if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { /* hex? */ + s += 2; /* skip '0x' */ + for (; lisxdigit(cast_uchar(*s)); s++) { + a = a * 16 + luaO_hexavalue(*s); + empty = 0; + } + } else { /* decimal */ + for (; lisdigit(cast_uchar(*s)); s++) { + int d = *s - '0'; + if (a >= MAXBY10 && (a > MAXBY10 || d > MAXLASTD + neg)) /* overflow? */ + return NULL; /* do not accept it (as integer) */ + a = a * 10 + d; + empty = 0; + } + } + while (lisspace(cast_uchar(*s))) + s++; /* skip trailing spaces */ + if (empty || *s != '\0') + return NULL; /* something wrong in the numeral */ + else { + *result = l_castU2S((neg) ? 0u - a : a); + return s; + } +} + +struct konst { + uint8_t type; + union { + lua_Integer i; + lua_Number n; + }; +}; + +static size_t luaO_str2num(const char *s, struct konst *o) +{ + lua_Integer i; + lua_Number n; + const char *e; + if ((e = l_str2int(s, &i)) != NULL) { /* try as an integer */ + o->i = i; + o->type = 1; + } else if ((e = l_str2d(s, &n)) != NULL) { /* else try as a float */ + o->n = n; + o->type = 2; + } else + return 0; /* conversion failed */ + return (e - s) + 1; /* success; return string size */ +} + +/* LUA_NUMBER */ +/* +** this function is quite liberal in what it accepts, as 'luaO_str2num' +** will reject ill-formed numerals. +*/ +static int read_numeral(LexerState *ls, SemInfo *seminfo) +{ + struct konst obj; + const char *expo = "Ee"; + int first = ls->current; + assert(lisdigit(ls->current)); + save_and_next(ls); + if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */ + expo = "Pp"; + for (;;) { + if (check_next2(ls, expo)) /* exponent part? */ + check_next2(ls, "-+"); /* optional exponent sign */ + if (lisxdigit(ls->current)) + save_and_next(ls); + else if (ls->current == '.') + save_and_next(ls); + else + break; + } + save(ls, '\0'); + if (luaO_str2num(raviX_buffer_data(ls->buff), &obj) == 0) /* format error? */ + lexerror(ls, "malformed number", TOK_FLT); + if (obj.type == 1) { + seminfo->i = obj.i; + return TOK_INT; + } else { + assert(obj.type == 2); + seminfo->r = obj.n; + return TOK_FLT; + } +} + +/* +** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return +** its number of '='s; otherwise, return a negative number (-1 iff there +** are no '='s after initial bracket) +*/ +static int skip_sep(LexerState *ls) +{ + int count = 0; + int s = ls->current; + assert(s == '[' || s == ']'); + save_and_next(ls); + while (ls->current == '=') { + save_and_next(ls); + count++; + } + return (ls->current == s) ? count : (-count) - 1; +} + +static void read_long_string(LexerState *ls, SemInfo *seminfo, int sep) +{ + int line = ls->linenumber; /* initial line (for error message) */ + (void)line; + save_and_next(ls); /* skip 2nd '[' */ + if (currIsNewline(ls)) /* string starts with a newline? */ + inclinenumber(ls); /* skip it */ + for (;;) { + switch (ls->current) { + case EOZ: { /* error */ + const char *what = (seminfo ? "string" : "comment"); + const char *msg = ""; + (void)what; + // luaO_pushfstring(ls->L, "unfinished long %s (starting at line %d)", + // what, line); + lexerror(ls, msg, TOK_EOS); + break; /* to avoid warnings */ + } + case ']': { + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd ']' */ + goto endloop; + } + break; + } + case '\n': + case '\r': { + save(ls, '\n'); + inclinenumber(ls); + if (!seminfo) + raviX_buffer_reset(ls->buff); /* avoid wasting space */ + break; + } + default: { + if (seminfo) + save_and_next(ls); + else + next(ls); + } + } + } +endloop: + if (seminfo) + seminfo->ts = luaX_newstring(ls, raviX_buffer_data(ls->buff) + (2 + sep), + (uint32_t)(raviX_buffer_len(ls->buff) - 2 * (2 + sep))); +} + +static void esccheck(LexerState *ls, int c, const char *msg) +{ + if (!c) { + if (ls->current != EOZ) + save_and_next(ls); /* add current to buffer for error message */ + lexerror(ls, msg, TOK_STRING); + } +} + +static int gethexa(LexerState *ls) +{ + save_and_next(ls); + esccheck(ls, lisxdigit(ls->current), "hexadecimal digit expected"); + return luaO_hexavalue(ls->current); +} + +static int readhexaesc(LexerState *ls) +{ + int r = gethexa(ls); + r = (r << 4) + gethexa(ls); + raviX_buffer_remove(ls->buff, 2); /* remove saved chars from buffer */ + return r; +} + +// static unsigned long readutf8esc (LexerState *ls) { +// unsigned long r; +// int i = 4; /* chars to be removed: '\', 'u', '{', and first digit */ +// save_and_next(ls); /* skip 'u' */ +// esccheck(ls, ls->current == '{', "missing '{'"); +// r = gethexa(ls); /* must have at least one digit */ +// while ((save_and_next(ls), lisxdigit(ls->current))) { +// i++; +// r = (r << 4) + luaO_hexavalue(ls->current); +// esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large"); +// } +// esccheck(ls, ls->current == '}', "missing '}'"); +// next(ls); /* skip '}' */ +// raviX_buffer_remove(ls->buff, i); /* remove saved chars from buffer */ +// return r; +//} +// +// +// static void utf8esc (LexerState *ls) { +// char buff[UTF8BUFFSZ]; +// int n = luaO_utf8esc(buff, readutf8esc(ls)); +// for (; n > 0; n--) /* add 'buff' to string */ +// save(ls, buff[UTF8BUFFSZ - n]); +//} + +static int readdecesc(LexerState *ls) +{ + int i; + int r = 0; /* result accumulator */ + for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */ + r = 10 * r + ls->current - '0'; + save_and_next(ls); + } + esccheck(ls, r <= UCHAR_MAX, "decimal escape too large"); + raviX_buffer_remove(ls->buff, i); /* remove read digits from buffer */ + return r; +} + +static void read_string(LexerState *ls, int del, SemInfo *seminfo) +{ + save_and_next(ls); /* keep delimiter (for error messages) */ + while (ls->current != del) { + switch (ls->current) { + case EOZ: + lexerror(ls, "unfinished string", TOK_EOS); + break; /* to avoid warnings */ + case '\n': + case '\r': + lexerror(ls, "unfinished string", TOK_STRING); + break; /* to avoid warnings */ + case '\\': { /* escape sequences */ + int c; /* final character to be saved */ + save_and_next(ls); /* keep '\\' for error messages */ + switch (ls->current) { + case 'a': + c = '\a'; + goto read_save; + case 'b': + c = '\b'; + goto read_save; + case 'f': + c = '\f'; + goto read_save; + case 'n': + c = '\n'; + goto read_save; + case 'r': + c = '\r'; + goto read_save; + case 't': + c = '\t'; + goto read_save; + case 'v': + c = '\v'; + goto read_save; + case 'x': + c = readhexaesc(ls); + goto read_save; + // TODO - FIXME + // case 'u': utf8esc(ls); goto no_save; + case '\n': + case '\r': + inclinenumber(ls); + c = '\n'; + goto only_save; + case '\\': + case '\"': + case '\'': + c = ls->current; + goto read_save; + case EOZ: + goto no_save; /* will raise an error next loop */ + case 'z': { /* zap following span of spaces */ + raviX_buffer_remove(ls->buff, 1); /* remove '\\' */ + next(ls); /* skip the 'z' */ + while (lisspace(ls->current)) { + if (currIsNewline(ls)) + inclinenumber(ls); + else + next(ls); + } + goto no_save; + } + default: { + esccheck(ls, lisdigit(ls->current), "invalid escape sequence"); + c = readdecesc(ls); /* digital escape '\ddd' */ + goto only_save; + } + } + read_save: + next(ls); + /* go through */ + only_save: + raviX_buffer_remove(ls->buff, 1); /* remove '\\' */ + save(ls, c); + /* go through */ + no_save: + break; + } + default: + save_and_next(ls); + } + } + save_and_next(ls); /* skip delimiter */ + seminfo->ts = luaX_newstring(ls, raviX_buffer_data(ls->buff) + 1, (uint32_t)(raviX_buffer_len(ls->buff) - 2)); +} + +/* +** RAVI extension: generate a token for the cast operators - +** @number, @number[], @integer, @integer[], @table, @string, @closure +*/ +static int casttoken(LexerState *ls, SemInfo *seminfo) +{ + size_t n = raviX_buffer_len(ls->buff); + const char *s = raviX_buffer_data(ls->buff); + int tok; + + /* @integer or @integer[] */ + if (strncmp(s, "@integer", n) == 0) + tok = TOK_TO_INTEGER; + else if (strncmp(s, "@integer[]", n) == 0) + tok = TOK_TO_INTARRAY; + /* @number or @number[] */ + else if (strncmp(s, "@number", n) == 0) + tok = TOK_TO_NUMBER; + else if (strncmp(s, "@number[]", n) == 0) + tok = TOK_TO_NUMARRAY; + /* @table */ + else if (strncmp(s, "@table", n) == 0) + tok = TOK_TO_TABLE; + else if (strncmp(s, "@string", n) == 0) + tok = TOK_TO_STRING; + else if (strncmp(s, "@closure", n) == 0) + tok = TOK_TO_CLOSURE; + else { + seminfo->ts = luaX_newstring(ls, s + 1, (uint32_t)(n - 1)); /* omit @ */ + tok = '@'; + } + raviX_buffer_remove(ls->buff, (int)n); /* rewind but buffer still holds the saved characters */ + return tok; +} + +static int llex(LexerState *ls, SemInfo *seminfo) +{ + raviX_buffer_reset(ls->buff); + for (;;) { + switch (ls->current) { + case '\n': + case '\r': { /* line breaks */ + inclinenumber(ls); + break; + } + case ' ': + case '\f': + case '\t': + case '\v': { /* spaces */ + next(ls); + break; + } + case '-': { /* '-' or '--' (comment) */ + next(ls); + if (ls->current != '-') + return '-'; + /* else is a comment */ + next(ls); + if (ls->current == '[') { /* long comment? */ + int sep = skip_sep(ls); + raviX_buffer_reset(ls->buff); /* 'skip_sep' may dirty the buffer */ + if (sep >= 0) { + read_long_string(ls, NULL, sep); /* skip long comment */ + raviX_buffer_reset(ls->buff); /* previous call may dirty the buff. */ + break; + } + } + /* else short comment */ + while (!currIsNewline(ls) && ls->current != EOZ) + next(ls); /* skip until end of line (or end of file) */ + break; + } + case '[': { /* long string or simply '[' */ + int sep = skip_sep(ls); + if (sep >= 0) { + read_long_string(ls, seminfo, sep); + return TOK_STRING; + } else if (sep != -1) /* '[=...' missing second bracket */ + lexerror(ls, "invalid long string delimiter", TOK_STRING); + return '['; + } + case '=': { + next(ls); + if (check_next1(ls, '=')) + return TOK_EQ; + else + return '='; + } + case '<': { + next(ls); + if (check_next1(ls, '=')) + return TOK_LE; + else if (check_next1(ls, '<')) + return TOK_SHL; + else + return '<'; + } + case '>': { + next(ls); + if (check_next1(ls, '=')) + return TOK_GE; + else if (check_next1(ls, '>')) + return TOK_SHR; + else + return '>'; + } + case '/': { + next(ls); + if (check_next1(ls, '/')) + return TOK_IDIV; + else + return '/'; + } + case '~': { + next(ls); + if (check_next1(ls, '=')) + return TOK_NE; + else + return '~'; + } + case ':': { + next(ls); + if (check_next1(ls, ':')) + return TOK_DBCOLON; + else + return ':'; + } + case '"': + case '\'': { /* short literal strings */ + read_string(ls, ls->current, seminfo); + return TOK_STRING; + } + case '.': { /* '.', '..', '...', or number */ + save_and_next(ls); + if (check_next1(ls, '.')) { + if (check_next1(ls, '.')) + return TOK_DOTS; /* '...' */ + else + return TOK_CONCAT; /* '..' */ + } else if (!lisdigit(ls->current)) + return '.'; + else + return read_numeral(ls, seminfo); + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + return read_numeral(ls, seminfo); + } + case EOZ: { + return TOK_EOS; + } + case '@': { + /* RAVI change: @ introduces a type assertion operator */ + save_and_next(ls); + while (lislalnum(ls->current)) { + save_and_next(ls); + } + check_save_next1(ls, '['); + check_save_next1(ls, ']'); + return casttoken(ls, seminfo); + } + default: { + if (lislalpha(ls->current)) { /* identifier or reserved word? */ + const StringObject *ts; + do { + save_and_next(ls); + } while (lislalnum(ls->current)); + ts = raviX_create_string(ls->container, raviX_buffer_data(ls->buff), + (int32_t)raviX_buffer_len(ls->buff)); + seminfo->ts = ts; + int tok = is_reserved(ts); + if (tok != -1) /* reserved word? */ + return tok + FIRST_RESERVED; + else { + return TOK_NAME; + } + } else { /* single-char tokens (+ - / ...) */ + int c = ls->current; + next(ls); + return c; + } + } + } + } +} + +void raviX_next(LexerState *ls) +{ + ls->lastline = ls->linenumber; + if (ls->lookahead.token != TOK_EOS) { /* is there a look-ahead token? */ + ls->t = ls->lookahead; /* use this one */ + ls->lookahead.token = TOK_EOS; /* and discharge it */ + } else + ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ +} + +int raviX_lookahead(LexerState *ls) +{ + assert(ls->lookahead.token == TOK_EOS); + ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); + return ls->lookahead.token; +} + +const char *raviX_get_last_error(CompilerState *container) { return container->error_message.buf; } diff --git a/ravicomp/src/linearizer.c b/ravicomp/src/linearizer.c new file mode 100644 index 0000000..4b09ee1 --- /dev/null +++ b/ravicomp/src/linearizer.c @@ -0,0 +1,2614 @@ +/* +Copyright (C) 2018-2020 Dibyendu Majumdar + +This file contains the Linearizer. The goal of the Linearizer is +generate a linear intermediate representation (IR) from the AST +suitable for further analysis. + +The linear IR is organized in basic blocks. +Each proc has an entry block and exit block. Additional blocks +are created as necessary. + +Each basic block contains a sequence of instructions. +The final instruction of a block must always be a branch instruction. +*/ + +#include "parser.h" +#include "linearizer.h" +#include "fnv_hash.h" +#include "ptrlist.h" +#include "graph.h" + +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#include +#else +#include +#include +#endif + +static void handle_error(CompilerState *container, const char *msg) +{ + // TODO source and line number + raviX_buffer_add_string(&container->error_message, msg); + longjmp(container->env, 1); +} + +static Pseudo *linearize_expression(Proc *proc, AstNode *expr); +static BasicBlock *create_block(Proc *proc); +static void start_block(Proc *proc, BasicBlock *bb); +static void linearize_statement(Proc *proc, AstNode *node); +static void linearize_statement_list(Proc *proc, AstNodeList *list); +static void start_scope(LinearizerState *linearizer, Proc *proc, Scope *scope); +static void end_scope(LinearizerState *linearizer, Proc *proc); +static void instruct_br(Proc *proc, Pseudo *pseudo); +static bool is_block_terminated(BasicBlock *block); +static Pseudo *instruct_move(Proc *proc, enum opcode op, Pseudo *target, Pseudo *src); +static void linearize_function(LinearizerState *linearizer); +static Instruction *allocate_instruction(Proc *proc, enum opcode op); +static void free_temp_pseudo(Proc *proc, Pseudo *pseudo, bool free_temp_pseudo); + +/** + * Allocates a register by reusing a free'd register if possible otherwise + * allocating a new one + */ +static inline unsigned allocate_register(PseudoGenerator *generator) +{ + if (generator->free_pos > 0) { + return generator->free_regs[--generator->free_pos]; + } + return generator->next_reg++; +} + +/** + * Puts a register in the free list (must not already have been put there). + */ +static inline void free_register(Proc *proc, PseudoGenerator *generator, unsigned reg) +{ + if (generator->free_pos == (sizeof generator->free_regs / sizeof generator->free_regs[0])) { + /* TODO proper error handling */ + handle_error(proc->linearizer->ast_container, "Out of register space\n"); + return; + } + // Debug check - ensure register being freed hasn't already been freed + for (int i = 0; i < generator->free_pos; i++) { + assert(generator->free_regs[i] != reg); + } + generator->free_regs[generator->free_pos++] = (uint8_t)reg; +} + +/* Linearizer initialization */ +LinearizerState *raviX_init_linearizer(CompilerState *container) +{ + LinearizerState *linearizer = (LinearizerState *)calloc(1, sizeof(LinearizerState)); + linearizer->ast_container = container; + raviX_allocator_init(&linearizer->instruction_allocator, "instruction_allocator", sizeof(Instruction), + sizeof(double), sizeof(Instruction) * 128); + raviX_allocator_init(&linearizer->ptrlist_allocator, "ptrlist_allocator", sizeof(struct ptr_list), + sizeof(double), sizeof(struct ptr_list) * 64); + raviX_allocator_init(&linearizer->pseudo_allocator, "pseudo_allocator", sizeof(Pseudo), sizeof(double), + sizeof(Pseudo) * 128); + raviX_allocator_init(&linearizer->basic_block_allocator, "basic_block_allocator", sizeof(BasicBlock), + sizeof(double), sizeof(BasicBlock) * 32); + raviX_allocator_init(&linearizer->proc_allocator, "proc_allocator", sizeof(Proc), sizeof(double), + sizeof(Proc) * 32); + raviX_allocator_init(&linearizer->unsized_allocator, "unsized_allocator", 0, sizeof(double), CHUNK); + raviX_allocator_init(&linearizer->constant_allocator, "constant_allocator", sizeof(Constant), + sizeof(double), sizeof(Constant) * 64); + linearizer->proc_id = 0; + return linearizer; +} + +void raviX_destroy_linearizer(LinearizerState *linearizer) +{ + if (linearizer == NULL) + return; + Proc *proc; + FOR_EACH_PTR(linearizer->all_procs, proc) + { + if (proc->constants) + raviX_set_destroy(proc->constants, NULL); + if (proc->cfg) + raviX_destroy_graph(proc->cfg); + } + END_FOR_EACH_PTR(proc) + raviX_allocator_destroy(&linearizer->instruction_allocator); + raviX_allocator_destroy(&linearizer->ptrlist_allocator); + raviX_allocator_destroy(&linearizer->pseudo_allocator); + raviX_allocator_destroy(&linearizer->basic_block_allocator); + raviX_allocator_destroy(&linearizer->proc_allocator); + raviX_allocator_destroy(&linearizer->unsized_allocator); + raviX_allocator_destroy(&linearizer->constant_allocator); + free(linearizer); +} + +/** + * We assume strings are all interned and can be compared by + * address. Return true if values match else false. + */ +static int compare_constants(const void *a, const void *b) +{ + const Constant *c1 = (const Constant *)a; + const Constant *c2 = (const Constant *)b; + if (c1->type != c2->type) + return 0; + if (c1->type == RAVI_TNUMINT) + return c1->i == c2->i; + else if (c1->type == RAVI_TNUMFLT) + return c1->n == c2->n; + else + return c1->s == c2->s; +} + +/** + * Hashes a constant + */ +static uint32_t hash_constant(const void *c) +{ + const Constant *c1 = (const Constant *)c; + if (c1->type == RAVI_TNUMINT) + return (uint32_t)c1->i; + else if (c1->type == RAVI_TNUMFLT) + return (uint32_t)c1->n; // FIXME maybe use Lua's hash gen + else + return (uint32_t)c1->s->hash; +} + +/** + * Adds a constant to the proc's constant table. The constant is also assigned a + * pseudo register. + */ +static const Constant *add_constant(Proc *proc, const Constant *c) +{ + SetEntry *entry = raviX_set_search(proc->constants, c); + if (entry == NULL) { + int reg = 0; + /* Assign each type of constant a different range so that if backend + * doesn't need to emit the regnum for a particular type it can do so. + * If backend needs to emit all constants then 2 of the 3 ranges can + * easily adjusted. + */ + switch(c->type) { + case RAVI_TNUMINT: + reg = proc->num_intconstants++; + break; + case RAVI_TNUMFLT: + reg = proc->num_fltconstants++; + break; + default: + assert(c->type == RAVI_TSTRING); + reg = proc->num_strconstants++; + break; + } + Constant *c1 = raviX_allocator_allocate(&proc->linearizer->constant_allocator, 0); + assert(c1); // FIXME + memcpy(c1, c, sizeof(Constant)); + c1->index = reg; + raviX_set_add(proc->constants, c1); + // printf("Created new constant of type %d and assigned reg %d\n", c->type, reg); + return c1; + } else { + const Constant *c1 = entry->key; + // printf("Found constant at reg %d\n", c1->index); + return c1; + } +} + +/** + * Allocates and adds a constant to the Proc's constants table. + * Input is expected to be EXPR_LITERAL + */ +static const Constant *allocate_constant(Proc *proc, AstNode *node) +{ + assert(node->type == EXPR_LITERAL); + Constant c = {.type = node->literal_expr.type.type_code}; + if (c.type == RAVI_TNUMINT) + c.i = node->literal_expr.u.i; + else if (c.type == RAVI_TNUMFLT) + c.n = node->literal_expr.u.r; + else + c.s = node->literal_expr.u.ts; + return add_constant(proc, &c); +} + +static const Constant *allocate_integer_constant(Proc *proc, int i) +{ + Constant c = {.type = RAVI_TNUMINT, .i = i}; + return add_constant(proc, &c); +} + +static inline void add_instruction_operand(Proc *proc, Instruction *insn, Pseudo *pseudo) +{ + raviX_ptrlist_add((struct ptr_list **)&insn->operands, pseudo, &proc->linearizer->ptrlist_allocator); +} + +static inline void add_instruction_target(Proc *proc, Instruction *insn, Pseudo *pseudo) +{ + raviX_ptrlist_add((struct ptr_list **)&insn->targets, pseudo, &proc->linearizer->ptrlist_allocator); +} + +static Instruction *allocate_instruction(Proc *proc, enum opcode op) +{ + Instruction *insn = raviX_allocator_allocate(&proc->linearizer->instruction_allocator, 0); + insn->opcode = op; + return insn; +} + +static void free_instruction_operand_pseudos(Proc *proc, Instruction *insn) +{ + Pseudo *operand; + FOR_EACH_PTR_REVERSE(insn->operands, operand) { free_temp_pseudo(proc, operand, false); } + END_FOR_EACH_PTR_REVERSE(operand) +} + +static inline void add_instruction(Proc *proc, Instruction *insn) +{ + assert(insn->block == NULL || insn->block == proc->current_bb); + raviX_ptrlist_add((struct ptr_list **)&proc->current_bb->insns, insn, &proc->linearizer->ptrlist_allocator); + insn->block = proc->current_bb; +} + +static inline void remove_instruction(BasicBlock *block, Instruction *insn) +{ + raviX_ptrlist_remove((struct ptr_list **)&block->insns, insn, 1); + insn->block = NULL; +} + +Instruction *raviX_last_instruction(BasicBlock *block) +{ + if (raviX_ptrlist_size((struct ptr_list *)block->insns) == 0) + return NULL; + return (Instruction *)raviX_ptrlist_last((struct ptr_list *)block->insns); +} + +static const Constant *allocate_string_constant(Proc *proc, const StringObject *s) +{ + Constant c = {.type = RAVI_TSTRING, .s = s}; + return add_constant(proc, &c); +} + +Pseudo* raviX_allocate_stack_pseudo(Proc* proc, unsigned reg) +{ + Pseudo* pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_LUASTACK; + pseudo->regnum = reg; + return pseudo; +} + +static Pseudo *allocate_symbol_pseudo(Proc *proc, LuaSymbol *sym, unsigned reg) +{ + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_SYMBOL; + pseudo->symbol = sym; + pseudo->regnum = reg; + if (sym->symbol_type == SYM_LOCAL) { + assert(sym->variable.pseudo == NULL); + sym->variable.pseudo = pseudo; + } + return pseudo; +} + +static Pseudo *allocate_constant_pseudo(Proc *proc, const Constant *constant) +{ + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_CONSTANT; + pseudo->constant = constant; + pseudo->regnum = constant->index; + return pseudo; +} + +static Pseudo *allocate_closure_pseudo(Proc *proc) +{ + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_PROC; + pseudo->proc = proc; + return pseudo; +} + +static Pseudo *allocate_nil_pseudo(Proc *proc) +{ + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_NIL; + pseudo->proc = proc; + return pseudo; +} + +static Pseudo *allocate_boolean_pseudo(Proc *proc, bool is_true) +{ + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = is_true ? PSEUDO_TRUE : PSEUDO_FALSE; + pseudo->proc = proc; + return pseudo; +} + +static Pseudo *allocate_block_pseudo(Proc *proc, BasicBlock *block) +{ + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_BLOCK; + pseudo->block = block; + return pseudo; +} + +/* +We have several types of temp pseudos. +Specific types for floating and integer values so that we can +localise the assignment of these to registers. +The generic 'any' type is used for other types +but has variant called PSEUDO_RANGE. This is used in function calls +to represent multiple return values. Most of the time these get converted +back to normal temp pseudo, but in some cases we need to reference +a particular value in the range and for that we use PSEUDO_RANGE_SELECT. +*/ +static Pseudo *allocate_temp_pseudo(Proc *proc, ravitype_t type) +{ + PseudoGenerator *gen; + enum PseudoType pseudo_type; + switch (type) { + case RAVI_TNUMFLT: + gen = &proc->temp_flt_pseudos; + pseudo_type = PSEUDO_TEMP_FLT; + break; + case RAVI_TNUMINT: + case RAVI_TBOOLEAN: + gen = &proc->temp_int_pseudos; + pseudo_type = type == RAVI_TNUMINT ? PSEUDO_TEMP_INT: PSEUDO_TEMP_BOOL; + break; + default: + gen = &proc->temp_pseudos; + pseudo_type = PSEUDO_TEMP_ANY; + break; + } + unsigned reg = allocate_register(gen); + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = pseudo_type; + pseudo->regnum = reg; + pseudo->temp_for_local = NULL; + return pseudo; +} + +static Pseudo *allocate_range_pseudo(Proc *proc, Pseudo *orig_pseudo) +{ + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_RANGE; + pseudo->regnum = orig_pseudo->regnum; + if (orig_pseudo->type == PSEUDO_TEMP_ANY) { + orig_pseudo->freed = 1; + } + return pseudo; +} + +/* +A PSEUDO_RANGE_SELECT picks or selects a particular offset in the range +specified by a PSEUDO_RANGE. Pick of 0 means pick first value from the range. +*/ +static Pseudo *allocate_range_select_pseudo(Proc *proc, Pseudo *range_pseudo, int pick) +{ + assert(range_pseudo->type == PSEUDO_RANGE); + Pseudo *pseudo = raviX_allocator_allocate(&proc->linearizer->pseudo_allocator, 0); + pseudo->type = PSEUDO_RANGE_SELECT; + pseudo->regnum = range_pseudo->regnum + pick; + pseudo->range_pseudo = range_pseudo; + return pseudo; +} + +static void free_temp_pseudo(Proc *proc, Pseudo *pseudo, bool free_local) +{ + if (pseudo->freed) + return; + if (!free_local && pseudo->temp_for_local) { + return; + } + PseudoGenerator *gen; + switch (pseudo->type) { + case PSEUDO_TEMP_FLT: + gen = &proc->temp_flt_pseudos; + break; + case PSEUDO_TEMP_INT: + case PSEUDO_TEMP_BOOL: + gen = &proc->temp_int_pseudos; + break; + case PSEUDO_RANGE: + case PSEUDO_TEMP_ANY: + gen = &proc->temp_pseudos; + break; + default: + // Not a temp, so no need to do anything + return; + } + free_register(proc, gen, pseudo->regnum); +} + +/** + * Allocate a new Proc. If there is a current Proc, then the new Proc gets added to the + * current Proc's children. + */ +static Proc *allocate_proc(LinearizerState *linearizer, AstNode *function_expr) +{ + assert(function_expr->type == EXPR_FUNCTION); + Proc *proc = raviX_allocator_allocate(&linearizer->proc_allocator, 0); + proc->function_expr = function_expr; + proc->id = raviX_ptrlist_size((struct ptr_list *)linearizer->all_procs)+1; // so that 0 is not assigned + function_expr->function_expr.proc_id = proc->id; + raviX_ptrlist_add((struct ptr_list **)&linearizer->all_procs, proc, &linearizer->ptrlist_allocator); + if (linearizer->current_proc) { + proc->parent = linearizer->current_proc; + raviX_ptrlist_add((struct ptr_list **)&linearizer->current_proc->procs, proc, + &linearizer->ptrlist_allocator); + } + proc->constants = raviX_set_create(hash_constant, compare_constants); + proc->linearizer = linearizer; + proc->cfg = NULL; + return proc; +} + +static void set_main_proc(LinearizerState *linearizer, Proc *proc) +{ + assert(linearizer->main_proc == NULL); + assert(linearizer->current_proc == NULL); + linearizer->main_proc = proc; + assert(proc->function_expr->function_expr.parent_function == NULL); +} + +static inline void set_current_proc(LinearizerState *linearizer, Proc *proc) +{ + linearizer->current_proc = proc; +} + +static void instruct_totype(Proc *proc, Pseudo *target, const VariableType *vtype) +{ + enum opcode targetop = op_nop; + switch (vtype->type_code) { + case RAVI_TNUMFLT: + targetop = op_toflt; + break; + case RAVI_TNUMINT: + targetop = op_toint; + break; + case RAVI_TSTRING: + targetop = op_tostring; + break; + case RAVI_TFUNCTION: + targetop = op_toclosure; + break; + case RAVI_TTABLE: + targetop = op_totable; + break; + case RAVI_TARRAYFLT: + targetop = op_tofarray; + break; + case RAVI_TARRAYINT: + targetop = op_toiarray; + break; + case RAVI_TUSERDATA: + targetop = op_totype; + break; + default: + return; + } + Instruction *insn = allocate_instruction(proc, targetop); + if (targetop == op_totype) { + assert(vtype->type_name); + const Constant *tname_constant = allocate_string_constant(proc, vtype->type_name); + Pseudo *tname_pseudo = allocate_constant_pseudo(proc, tname_constant); + add_instruction_operand(proc, insn, tname_pseudo); + } + add_instruction_target(proc, insn, target); + add_instruction(proc, insn); +} + +static void linearize_function_args(LinearizerState *linearizer) +{ + Proc *proc = linearizer->current_proc; + AstNode *func_expr = proc->function_expr; + LuaSymbol *sym; + FOR_EACH_PTR(func_expr->function_expr.args, sym) + { + /* The arg symbols already have register assigned by the local scope */ + assert(sym->variable.pseudo); // We should already have a register assigned + instruct_totype(proc, sym->variable.pseudo, &sym->variable.value_type); + } + END_FOR_EACH_PTR(sym) +} + +static void linearize_statement_list(Proc *proc, AstNodeList *list) +{ + AstNode *node; + FOR_EACH_PTR(list, node) { linearize_statement(proc, node); } + END_FOR_EACH_PTR(node) +} + +static inline Pseudo *convert_range_to_temp(Pseudo *pseudo) +{ + assert(pseudo->type == PSEUDO_RANGE); + pseudo->type = PSEUDO_TEMP_ANY; + return pseudo; +} + +static Pseudo *linearize_literal(Proc *proc, AstNode *expr) +{ + assert(expr->type == EXPR_LITERAL); + ravitype_t type = expr->literal_expr.type.type_code; + Pseudo *pseudo = NULL; + switch (type) { + case RAVI_TNUMFLT: + case RAVI_TNUMINT: + case RAVI_TSTRING: + pseudo = allocate_constant_pseudo(proc, allocate_constant(proc, expr)); + break; + case RAVI_TNIL: + pseudo = allocate_nil_pseudo(proc); + break; + case RAVI_TBOOLEAN: + pseudo = allocate_boolean_pseudo(proc, expr->literal_expr.u.i); + break; + case RAVI_TVARARGS: + handle_error(proc->linearizer->ast_container, "Var args not supported"); + break; + default: + handle_error(proc->linearizer->ast_container, "feature not yet implemented"); + break; + } + return pseudo; +} + +static Pseudo *linearize_unary_operator(Proc *proc, AstNode *node) +{ + // TODO if any expr is range we need to convert to temp? + UnaryOperatorType op = node->unary_expr.unary_op; + Pseudo *subexpr = linearize_expression(proc, node->unary_expr.expr); + ravitype_t subexpr_type = node->unary_expr.expr->common_expr.type.type_code; + enum opcode targetop = op_nop; + switch (op) { + case UNOPR_MINUS: + if (subexpr_type == RAVI_TNUMINT) + targetop = op_unmi; + else if (subexpr_type == RAVI_TNUMFLT) + targetop = op_unmf; + else + targetop = op_unm; + break; + case UNOPR_LEN: + if (subexpr_type == RAVI_TARRAYINT || subexpr_type == RAVI_TARRAYFLT) + targetop = op_leni; + else + targetop = op_len; + subexpr_type = node->unary_expr.type.type_code; + break; + case UNOPR_TO_INTEGER: + targetop = subexpr_type != RAVI_TNUMINT ? op_toint : op_nop; + break; + case UNOPR_TO_NUMBER: + targetop = subexpr_type != RAVI_TNUMFLT ? op_toflt : op_nop; + break; + case UNOPR_TO_CLOSURE: + targetop = subexpr_type != RAVI_TFUNCTION ? op_toclosure : op_nop; + break; + case UNOPR_TO_STRING: + targetop = subexpr_type != RAVI_TSTRING ? op_tostring : op_nop; + break; + case UNOPR_TO_INTARRAY: + targetop = subexpr_type != RAVI_TARRAYINT ? op_toiarray : op_nop; + break; + case UNOPR_TO_NUMARRAY: + targetop = subexpr_type != RAVI_TARRAYFLT ? op_tofarray : op_nop; + break; + case UNOPR_TO_TABLE: + targetop = subexpr_type != RAVI_TTABLE ? op_totable : op_nop; + break; + case UNOPR_TO_TYPE: + targetop = op_totype; + break; + case UNOPR_NOT: + targetop = op_not; + break; + case UNOPR_BNOT: + targetop = op_bnot; + break; + default: { + char err[100]; + snprintf(err, sizeof err, "unexpected unary op %s", raviX_get_unary_opr_str(op)); + handle_error(proc->linearizer->ast_container, err); + break; + } + } + if (targetop == op_nop) { + return subexpr; + } + Instruction *insn = allocate_instruction(proc, targetop); + Pseudo *target = subexpr; + if (op == UNOPR_TO_TYPE) { + const Constant *tname_constant = allocate_string_constant(proc, node->unary_expr.type.type_name); + Pseudo *tname_pseudo = allocate_constant_pseudo(proc, tname_constant); + add_instruction_operand(proc, insn, tname_pseudo); + } else if (op == UNOPR_NOT || op == UNOPR_BNOT) { + add_instruction_operand(proc, insn, target); + target = allocate_temp_pseudo(proc, RAVI_TANY); + } else if (op == UNOPR_MINUS || op == UNOPR_LEN) { + add_instruction_operand(proc, insn, target); + target = allocate_temp_pseudo(proc, subexpr_type); + } + add_instruction_target(proc, insn, target); + add_instruction(proc, insn); + return target; +} + +static Pseudo *instruct_move(Proc *proc, enum opcode op, Pseudo *target, Pseudo *src) +{ + // TODO we should use type specific MOVE instructions + Instruction *mov = allocate_instruction(proc, op); + add_instruction_operand(proc, mov, src); + add_instruction_target(proc, mov, target); + add_instruction(proc, mov); + return target; +} + +static void instruct_cbr(Proc *proc, Pseudo *condition_pseudo, BasicBlock *true_block, + BasicBlock *false_block) +{ + Pseudo *true_pseudo = allocate_block_pseudo(proc, true_block); + Pseudo *false_pseudo = allocate_block_pseudo(proc, false_block); + Instruction *insn = allocate_instruction(proc, op_cbr); + add_instruction_operand(proc, insn, condition_pseudo); + add_instruction_target(proc, insn, true_pseudo); + add_instruction_target(proc, insn, false_pseudo); + add_instruction(proc, insn); +} + +static void instruct_br(Proc *proc, Pseudo *pseudo) +{ + assert(pseudo->type == PSEUDO_BLOCK); + if (is_block_terminated(proc->current_bb)) { + start_block(proc, create_block(proc)); + } + Instruction *insn = allocate_instruction(proc, op_br); + add_instruction_target(proc, insn, pseudo); + add_instruction(proc, insn); +} + +// clang-format off +/* +Lua and/or operators are processed so that with 'and' the result is the final +true value, and with 'or' it is the first true value. + +and IR + + result = eval(expr_left); + if (result) + goto Lnext: + else + goto Ldone; +Lnext: + result = eval(expr_right); + goto Ldone; +Ldone: + +or IR + + result = eval(expr_left); + if (result) + goto Ldone: + else + goto Lnext; +Lnext: + result = eval(expr_right); + goto Ldone; +Ldone: + +*/ +// clang-format on +static Pseudo *linearize_bool(Proc *proc, AstNode *node, bool is_and) +{ + AstNode *e1 = node->binary_expr.expr_left; + AstNode *e2 = node->binary_expr.expr_right; + + BasicBlock *first_block = create_block(proc); + BasicBlock *end_block = create_block(proc); + + Pseudo *result = allocate_temp_pseudo(proc, RAVI_TANY); + Pseudo *operand1 = linearize_expression(proc, e1); + instruct_move(proc, op_mov, result, operand1); + free_temp_pseudo(proc, operand1, false); + if (is_and) + instruct_cbr(proc, result, first_block, end_block); // If first value is true then evaluate the second + else + instruct_cbr(proc, result, end_block, first_block); + + start_block(proc, first_block); + Pseudo *operand2 = linearize_expression(proc, e2); + instruct_move(proc, op_mov, result, operand2); + free_temp_pseudo(proc, operand2, false); + instruct_br(proc, allocate_block_pseudo(proc, end_block)); + + start_block(proc, end_block); + + return result; +} + +/* Utility to create a binary instruction where operands and target pseudo is known */ +static void create_binary_instruction(Proc *proc, enum opcode targetop, Pseudo *operand1, + Pseudo *operand2, Pseudo *target) +{ + Instruction *insn = allocate_instruction(proc, targetop); + add_instruction_operand(proc, insn, operand1); + add_instruction_operand(proc, insn, operand2); + add_instruction_target(proc, insn, target); + add_instruction(proc, insn); +} + +static Pseudo *linearize_binary_operator(Proc *proc, AstNode *node) +{ + // TODO if any expr is range we need to convert to temp? + + BinaryOperatorType op = node->binary_expr.binary_op; + + if (op == BINOPR_AND) { + return linearize_bool(proc, node, true); + } else if (op == BINOPR_OR) { + return linearize_bool(proc, node, false); + } + + AstNode *e1 = node->binary_expr.expr_left; + AstNode *e2 = node->binary_expr.expr_right; + Pseudo *operand1 = linearize_expression(proc, e1); + Pseudo *operand2 = linearize_expression(proc, e2); + + enum opcode targetop; + switch (op) { + case BINOPR_ADD: + targetop = op_add; + break; + case BINOPR_SUB: + targetop = op_sub; + break; + case BINOPR_MUL: + targetop = op_mul; + break; + case BINOPR_DIV: + targetop = op_div; + break; + case BINOPR_IDIV: + targetop = op_idiv; + break; + case BINOPR_BAND: + targetop = op_band; + break; + case BINOPR_BOR: + targetop = op_bor; + break; + case BINOPR_BXOR: + targetop = op_bxor; + break; + case BINOPR_SHL: + targetop = op_shl; + break; + case BINOPR_SHR: + targetop = op_shr; + break; + case BINOPR_EQ: + case BINOPR_NE: + targetop = op_eq; + break; + case BINOPR_LT: + case BINOPR_GT: + targetop = op_lt; + break; + case BINOPR_LE: + case BINOPR_GE: + targetop = op_le; + break; + case BINOPR_MOD: + targetop = op_mod; + break; + case BINOPR_POW: + targetop = op_pow; + break; + case BINOPR_CONCAT: + targetop = op_string_concat; + break; + default: { + char err[100]; + snprintf(err, sizeof err, "unexpected binary op %s", raviX_get_binary_opr_str(op)); + handle_error(proc->linearizer->ast_container, err); + targetop = op_nop; + break; + } + } + + ravitype_t t1 = e1->common_expr.type.type_code; + ravitype_t t2 = e2->common_expr.type.type_code; + + bool swap = false; + switch (targetop) { + case op_add: + case op_mul: + swap = t1 == RAVI_TNUMINT && t2 == RAVI_TNUMFLT; + break; + case op_eq: + case op_lt: + case op_le: + swap = op == BINOPR_NE || op == BINOPR_GT || op == BINOPR_GE; + break; + default: + break; + } + + if (swap) { + Pseudo *temp; + AstNode *ntemp; + temp = operand1; + operand1 = operand2; + operand2 = temp; + ntemp = e1; + e1 = e2; + e2 = ntemp; + t1 = e1->common_expr.type.type_code; + t2 = e2->common_expr.type.type_code; + } + + switch (targetop) { + case op_add: + case op_mul: + if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMFLT) + targetop += 1; + else if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMINT) + targetop += 2; + else if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT) + targetop += 3; + break; + case op_div: + case op_sub: + if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMFLT) + targetop += 1; + else if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMINT) + targetop += 2; + else if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMFLT) + targetop += 3; + else if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT) + targetop += 4; + break; + case op_band: + case op_bor: + case op_bxor: + case op_shl: + case op_shr: + if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT) + targetop += 1; + break; + case op_eq: + case op_le: + case op_lt: + if (t1 == RAVI_TNUMINT && t2 == RAVI_TNUMINT) + targetop += 1; + else if (t1 == RAVI_TNUMFLT && t2 == RAVI_TNUMFLT) + targetop += 2; + break; + default: + break; + } + + ravitype_t target_type = node->binary_expr.type.type_code; + Pseudo *target = allocate_temp_pseudo(proc, target_type); + create_binary_instruction(proc, targetop, operand1, operand2, target); + free_temp_pseudo(proc, operand1, false); + free_temp_pseudo(proc, operand2, false); + + return target; +} + +/* generates closure instruction - linearizes a Proc, and then adds instruction to create closure from it */ +static Pseudo *linearize_function_expr(Proc *proc, AstNode *expr) +{ + Proc *curproc = proc->linearizer->current_proc; + Proc *newproc = allocate_proc(proc->linearizer, expr); + set_current_proc(proc->linearizer, newproc); + linearize_function(proc->linearizer); + set_current_proc(proc->linearizer, curproc); // restore the proc + ravitype_t target_type = expr->function_expr.type.type_code; + Pseudo *target = allocate_temp_pseudo(proc, target_type); + Pseudo *operand = allocate_closure_pseudo(newproc); + Instruction *insn = allocate_instruction(proc, op_closure); + add_instruction_operand(proc, insn, operand); + add_instruction_target(proc, insn, target); + add_instruction(proc, insn); + + return target; +} + +static Pseudo *linearize_symbol_expression(Proc *proc, AstNode *expr) +{ + LuaSymbol *sym = expr->symbol_expr.var; + if (sym->symbol_type == SYM_GLOBAL) { + assert(sym->variable.env); + Pseudo *target = allocate_temp_pseudo(proc, RAVI_TANY); + const Constant *constant = allocate_string_constant(proc, sym->variable.var_name); + Pseudo *operand_varname = allocate_constant_pseudo(proc, constant); + Pseudo* operand_env = allocate_symbol_pseudo(proc, sym->variable.env, 0); // no register + Instruction *insn = allocate_instruction(proc, op_loadglobal); + target->insn = insn; + add_instruction_operand(proc, insn, operand_env); + add_instruction_operand(proc, insn, operand_varname); + add_instruction_target(proc, insn, target); + add_instruction(proc, insn); + return target; + } else if (sym->symbol_type == SYM_LOCAL) { + return sym->variable.pseudo; + } else if (sym->symbol_type == SYM_UPVALUE) { + /* upvalue index is the position of upvalue in the function, we treat this as the pseudo register for + * the upvalue */ + /* TODO maybe the pseudo be pre-created when we start linearizing the funcon and stored in the symbol + * like we do for locals? */ + return allocate_symbol_pseudo(proc, sym, sym->upvalue.upvalue_index); + } else { + handle_error(proc->linearizer->ast_container, "feature not yet implemented"); + return NULL; + } +} + +static Pseudo *instruct_indexed_load(Proc *proc, ravitype_t container_type, + Pseudo *container_pseudo, ravitype_t key_type, + Pseudo *key_pseudo, ravitype_t target_type) +{ + enum opcode op = op_get; + switch (container_type) { + case RAVI_TTABLE: + op = op_tget; + break; + case RAVI_TARRAYINT: + op = op_iaget; + break; + case RAVI_TARRAYFLT: + op = op_faget; + break; + default: + break; + } + /* Note we rely upon ordering of enums here */ + switch (key_type) { + case RAVI_TNUMINT: + op++; + break; + case RAVI_TSTRING: + assert(container_type != RAVI_TARRAYINT && container_type != RAVI_TARRAYFLT); + op += 2; + break; + default: + break; + } + Pseudo *target_pseudo = allocate_temp_pseudo(proc, target_type); + Instruction *insn = allocate_instruction(proc, op); + add_instruction_operand(proc, insn, container_pseudo); + add_instruction_operand(proc, insn, key_pseudo); + add_instruction_target(proc, insn, target_pseudo); + add_instruction(proc, insn); + target_pseudo->insn = insn; + return target_pseudo; +} + +static void instruct_indexed_store(Proc *proc, ravitype_t table_type, Pseudo *table, + Pseudo *index_pseudo, ravitype_t index_type, Pseudo *value_pseudo, + ravitype_t value_type) +{ + // TODO validate the type of assignment + // Insert type assertions if needed + enum opcode op; + switch (table_type) { + case RAVI_TARRAYINT: + op = op_iaput; + if (value_type == RAVI_TNUMINT) { + op = op_iaput_ival; + } + break; + case RAVI_TARRAYFLT: + op = op_faput; + if (value_type == RAVI_TNUMFLT) { + op = op_faput_fval; + } + break; + default: + op = table_type == RAVI_TTABLE ? op_tput : op_put; + if (index_type == RAVI_TNUMINT) { + op += 1; + } else if (index_type == RAVI_TSTRING) { + op += 2; + } + break; + } + + Instruction *insn = allocate_instruction(proc, op); + add_instruction_target(proc, insn, table); + add_instruction_target(proc, insn, index_pseudo); + add_instruction_operand(proc, insn, value_pseudo); + add_instruction(proc, insn); +} + +static void convert_loadglobal_to_store(Proc *proc, Instruction *insn, Pseudo *value_pseudo, + ravitype_t value_type) +{ + assert(insn->opcode == op_loadglobal); + remove_instruction(insn->block, insn); // remove the instruction from its original block + insn->opcode = op_storeglobal; + // Remove the targets + Pseudo *get_target = raviX_ptrlist_delete_last((struct ptr_list **)&insn->targets); + free_temp_pseudo(proc, get_target, false); + Pseudo *pseudo; + // Move the loadglobal operands to target + FOR_EACH_PTR(insn->operands, pseudo) { add_instruction_target(proc, insn, pseudo); } + END_FOR_EACH_PTR(pseudo); + raviX_ptrlist_remove_all((struct ptr_list **)&insn->operands); + // Add new operand + add_instruction_operand(proc, insn, value_pseudo); + add_instruction(proc, insn); +} + +static void convert_indexed_load_to_store(Proc *proc, Instruction *insn, Pseudo *value_pseudo, + ravitype_t value_type) +{ + enum opcode putop; + switch (insn->opcode) { + case op_iaget: + case op_iaget_ikey: + putop = value_type == RAVI_TNUMINT ? op_iaput_ival : op_iaput; + break; + case op_faget: + case op_faget_ikey: + putop = value_type == RAVI_TNUMFLT ? op_faput_fval : op_faput; + break; + case op_tget: + putop = op_tput; + break; + case op_tget_ikey: + putop = op_tput_ikey; + break; + case op_tget_skey: + putop = op_tput_skey; + break; + case op_get: + putop = op_put; + break; + case op_get_ikey: + putop = op_put_ikey; + break; + case op_get_skey: + putop = op_put_skey; + break; + default: + return; + } + remove_instruction(insn->block, insn); + insn->opcode = putop; + // Remove target + Pseudo *get_target = raviX_ptrlist_delete_last((struct ptr_list **)&insn->targets); + free_temp_pseudo(proc, get_target, false); + Pseudo *pseudo; + // Move the get operands to put target (table, key) + FOR_EACH_PTR(insn->operands, pseudo) { add_instruction_target(proc, insn, pseudo); } + END_FOR_EACH_PTR(pseudo); + raviX_ptrlist_remove_all((struct ptr_list **)&insn->operands); + // Add new operand + add_instruction_operand(proc, insn, value_pseudo); + add_instruction(proc, insn); +} + +/** + * Lua function calls can return multiple values, and the caller decides how many values to accept. + * We indicate multiple values using a PSEUDO_RANGE. + * We also handle method call: + * :name(...) -> is translated to .name(, ...) + */ +static Pseudo *linearize_function_call_expression(Proc *proc, AstNode *expr, + AstNode *callsite_expr, Pseudo *callsite_pseudo) +{ + Instruction *insn = allocate_instruction(proc, op_call); + Pseudo *self_arg = NULL; /* For method call */ + if (expr->function_call_expr.method_name) { + const Constant *name_constant = + allocate_string_constant(proc, expr->function_call_expr.method_name); + Pseudo *name_pseudo = allocate_constant_pseudo(proc, name_constant); + self_arg = callsite_pseudo; /* The original callsite must be passed as 'self' */ + /* create new call site as callsite[name] */ + callsite_pseudo = instruct_indexed_load(proc, callsite_expr->common_expr.type.type_code, + callsite_pseudo, RAVI_TSTRING, name_pseudo, RAVI_TANY); + } + + add_instruction_operand(proc, insn, callsite_pseudo); + if (self_arg) { + add_instruction_operand(proc, insn, self_arg); + } + + AstNode *arg; + int argc = raviX_ptrlist_size((const struct ptr_list *)expr->function_call_expr.arg_list); + FOR_EACH_PTR(expr->function_call_expr.arg_list, arg) + { + argc -= 1; + Pseudo *arg_pseudo = linearize_expression(proc, arg); + if (argc != 0 && arg_pseudo->type == PSEUDO_RANGE) { + // Not last one, so range can only be 1 + convert_range_to_temp(arg_pseudo); + } + add_instruction_operand(proc, insn, arg_pseudo); + } + END_FOR_EACH_PTR(arg) + + Pseudo *return_pseudo = allocate_range_pseudo( + proc, callsite_pseudo); /* Base reg for function call - where return values will be placed */ + add_instruction_target(proc, insn, return_pseudo); + add_instruction_target(proc, insn, allocate_constant_pseudo(proc, allocate_integer_constant(proc, expr->function_call_expr.num_results))); + add_instruction(proc, insn); + + free_instruction_operand_pseudos(proc, insn); + + return return_pseudo; +} + +/* + * Suffixed expression examples: + * f()[1] + * x[1][2] + * x.y[1] + * + * The result type of a suffixed expression may initially be an indexed load, but when used in the context of + * an assignment statement the load will be converted to a store. + * Lua parser does this by creating a VINDEXED node which is only converted to load/store + * when the VINDEXED node is used. + */ +static Pseudo *linearize_suffixedexpr(Proc *proc, AstNode *node) +{ + /* suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */ + Pseudo *prev_pseudo = linearize_expression(proc, node->suffixed_expr.primary_expr); + AstNode *prev_node = node->suffixed_expr.primary_expr; + AstNode *this_node; + FOR_EACH_PTR(node->suffixed_expr.suffix_list, this_node) + { + Pseudo *next; + if (prev_pseudo->type == PSEUDO_RANGE) + convert_range_to_temp(prev_pseudo); + if (this_node->type == EXPR_Y_INDEX || this_node->type == EXPR_FIELD_SELECTOR) { + Pseudo *key_pseudo = linearize_expression(proc, this_node->index_expr.expr); + ravitype_t key_type = this_node->index_expr.expr->common_expr.type.type_code; + next = instruct_indexed_load(proc, prev_node->common_expr.type.type_code, prev_pseudo, key_type, + key_pseudo, this_node->common_expr.type.type_code); + } else if (this_node->type == EXPR_FUNCTION_CALL) { + next = linearize_function_call_expression(proc, this_node, prev_node, prev_pseudo); + } else { + next = NULL; + handle_error(proc->linearizer->ast_container, "Unexpected expr type in suffix list"); + } + prev_node = this_node; + prev_pseudo = next; + } + END_FOR_EACH_PTR(node) + return prev_pseudo; +} + +static int linearize_indexed_assign(Proc *proc, Pseudo *table, ravitype_t table_type, + AstNode *expr, int next) +{ + Pseudo *index_pseudo; + ravitype_t index_type; + if (expr->table_elem_assign_expr.key_expr) { + index_pseudo = linearize_expression(proc, expr->table_elem_assign_expr.key_expr); + index_type = expr->table_elem_assign_expr.key_expr->index_expr.expr->common_expr.type.type_code; + // TODO check valid index + } else { + const Constant *constant = allocate_integer_constant(proc, next++); + index_pseudo = allocate_constant_pseudo(proc, constant); + index_type = RAVI_TNUMINT; + } + Pseudo *value_pseudo = linearize_expression(proc, expr->table_elem_assign_expr.value_expr); + ravitype_t value_type = expr->table_elem_assign_expr.value_expr->common_expr.type.type_code; + instruct_indexed_store(proc, table_type, table, index_pseudo, index_type, value_pseudo, value_type); + free_temp_pseudo(proc, index_pseudo, false); + free_temp_pseudo(proc, value_pseudo, false); + return next; +} + +static Pseudo *linearize_table_constructor(Proc *proc, AstNode *expr) +{ + /* constructor -> '{' [ field { sep field } [sep] ] '}' where sep -> ',' | ';' */ + Pseudo *target = allocate_temp_pseudo(proc, expr->table_expr.type.type_code); + enum opcode op = op_newtable; + if (expr->table_expr.type.type_code == RAVI_TARRAYINT) + op = op_newiarray; + else if (expr->table_expr.type.type_code == RAVI_TARRAYFLT) + op = op_newfarray; + Instruction *insn = allocate_instruction(proc, op); + add_instruction_target(proc, insn, target); + add_instruction(proc, insn); + + /*TODO process constructor elements */ + AstNode *ia; + int i = 1; + FOR_EACH_PTR(expr->table_expr.expr_list, ia) + { + i = linearize_indexed_assign(proc, target, expr->table_expr.type.type_code, ia, i); + } + END_FOR_EACH_PTR(ia) + + return target; +} + +/** Is the type NIL-able */ +static bool is_nillable(const VariableType *var_type) +{ + return var_type->type_code != RAVI_TARRAYFLT && var_type->type_code != RAVI_TARRAYINT && + var_type->type_code != RAVI_TNUMFLT && var_type->type_code != RAVI_TNUMINT; +} + +/* Check if we can assign value to variable */ +static bool is_compatible(const VariableType *var_type, const VariableType *val_type) +{ + if (var_type->type_code == RAVI_TANY) + return true; + if (is_nillable(var_type) && val_type->type_code == RAVI_TNIL) + return true; + if (val_type->type_code == var_type->type_code && val_type->type_name == var_type->type_name) + return true; + if ((var_type->type_code == RAVI_TNUMFLT && val_type->type_code == RAVI_TNUMINT) || + (var_type->type_code == RAVI_TNUMINT && val_type->type_code == RAVI_TNUMFLT)) + /* Maybe conversion is possible so allow */ + return true; + return false; +} + +static void linearize_store_var(Proc *proc, const VariableType *var_type, Pseudo *var_pseudo, + const VariableType *val_type, Pseudo *val_pseudo) +{ + if (var_pseudo->insn && var_pseudo->insn->opcode >= op_get && var_pseudo->insn->opcode <= op_faget_ikey) { + convert_indexed_load_to_store(proc, var_pseudo->insn, val_pseudo, val_type->type_code); + } else if (var_pseudo->insn && var_pseudo->insn->opcode == op_loadglobal) { + convert_loadglobal_to_store(proc, var_pseudo->insn, val_pseudo, val_type->type_code); + } else { + assert(!var_pseudo->insn); + assert(var_type->type_code != RAVI_TVARARGS && var_type->type_code != RAVI_TNIL); + if (!is_compatible(var_type, val_type)) { + instruct_totype(proc, val_pseudo, var_type); + val_type = var_type; // Because of the type assertion! + } + enum opcode op = op_mov; + if (var_type->type_code == RAVI_TNUMINT) { + op = val_type->type_code == RAVI_TNUMINT ? op_movi : op_movfi; + } else if (var_type->type_code == RAVI_TNUMFLT) { + op = val_type->type_code == RAVI_TNUMFLT ? op_movf : op_movif; + } + instruct_move(proc, op, var_pseudo, val_pseudo); + } +} + +struct node_info { + const VariableType *vartype; + Pseudo *pseudo; +}; + +static void linearize_assignment(Proc *proc, AstNodeList *expr_list, struct node_info *varinfo, int nv) +{ + AstNode *expr; + + int ne = raviX_ptrlist_size((const struct ptr_list *)expr_list); + struct node_info *valinfo = (struct node_info *)alloca(ne * sizeof(struct node_info)); + Pseudo *last_val_pseudo = NULL; + int i = 0; + FOR_EACH_PTR(expr_list, expr) + { + Pseudo *val_pseudo = last_val_pseudo = linearize_expression(proc, expr); + valinfo[i].vartype = &expr->common_expr.type; + valinfo[i].pseudo = val_pseudo; + i++; + if (i < ne && val_pseudo->type == PSEUDO_RANGE) { + convert_range_to_temp(val_pseudo); + } + } + END_FOR_EACH_PTR(expr) + + /* TODO do we need to insert type assertions in some cases such as function return values ? */ + + int note_ne = ne; + while (nv > 0) { + if (nv > ne) { + if (last_val_pseudo != NULL && last_val_pseudo->type == PSEUDO_RANGE) { + int pick = nv - ne; + linearize_store_var(proc, varinfo[nv - 1].vartype, varinfo[nv - 1].pseudo, + valinfo[ne - 1].vartype, + allocate_range_select_pseudo(proc, last_val_pseudo, pick)); + } else { + // TODO store NIL + } + nv--; + } else { + if (valinfo[ne - 1].pseudo->type == PSEUDO_RANGE) { + /* Only the topmost expression can be a range ... assert */ + assert(ne == note_ne); + valinfo[ne - 1].pseudo = allocate_range_select_pseudo(proc, valinfo[ne - 1].pseudo, 0); + } + linearize_store_var(proc, varinfo[nv - 1].vartype, varinfo[nv - 1].pseudo, + valinfo[ne - 1].vartype, valinfo[ne - 1].pseudo); + free_temp_pseudo(proc, valinfo[ne - 1].pseudo, false); + nv--; + ne--; + } + } +} + +/* +Expression or assignment statement is of the form: + + = + +Lua requires some special handling of this statement. Firstly +the LHS expressions are evaluated left to right. + +The RHS is processed right to left. If there is a corresponding LHS expr +then we need to assign the value of the RHS expr to the LHS expr. +Excess RHS expression results are discarded. +Excess LHS expressions have to be set to the default value. + +So for example if we had: + +expr1, expr2 = expr3, expr4, expr5 + +Then following needs to be generated + +result1 = eval(expr1) +result2 = eval(expr2) + +eval(expr5) +*result2 = eval(expr4) +*result1 = eval(expr3) + +Our code generation has an issue: +We initially generate load instructions for LHS expressions. +Subsequently we convert these to store instructions (marked above with asterisk) + +The handling of 'local' and expression statements can be partially combined +because the main difference is the LHS side of it. The rest of the processing has to be +the same. +*/ +static void linearize_expression_statement(Proc *proc, AstNode *node) +{ + AstNode *var; + + int nv = raviX_ptrlist_size((const struct ptr_list *)node->expression_stmt.var_expr_list); + struct node_info *varinfo = (struct node_info *)alloca(nv * sizeof(struct node_info)); + int i = 0; + FOR_EACH_PTR(node->expression_stmt.var_expr_list, var) + { + Pseudo *var_pseudo = linearize_expression(proc, var); + varinfo[i].vartype = &var->common_expr.type; + varinfo[i].pseudo = var_pseudo; + i++; + } + END_FOR_EACH_PTR(var) + + linearize_assignment(proc, node->expression_stmt.expr_list, varinfo, nv); +} + +static void linearize_local_statement(Proc *proc, AstNode *stmt) +{ + LuaSymbol *sym; + + int nv = raviX_ptrlist_size((const struct ptr_list *)stmt->local_stmt.var_list); + struct node_info *varinfo = (struct node_info *)alloca(nv * sizeof(struct node_info)); + int i = 0; + + FOR_EACH_PTR(stmt->local_stmt.var_list, sym) + { + Pseudo *var_pseudo = sym->variable.pseudo; + assert(var_pseudo); + varinfo[i].vartype = &sym->variable.value_type; + varinfo[i].pseudo = var_pseudo; + i++; + } + END_FOR_EACH_PTR(var) + + linearize_assignment(proc, stmt->local_stmt.expr_list, varinfo, nv); +} + +static Pseudo *linearize_expression(Proc *proc, AstNode *expr) +{ + Pseudo *result = NULL; + switch (expr->type) { + case EXPR_LITERAL: { + result = linearize_literal(proc, expr); + } break; + case EXPR_BINARY: { + result = linearize_binary_operator(proc, expr); + } break; + case EXPR_FUNCTION: { + result = linearize_function_expr(proc, expr); + } break; + case EXPR_UNARY: { + result = linearize_unary_operator(proc, expr); + } break; + case EXPR_SUFFIXED: { + result = linearize_suffixedexpr(proc, expr); + } break; + case EXPR_SYMBOL: { + result = linearize_symbol_expression(proc, expr); + } break; + case EXPR_TABLE_LITERAL: { + result = linearize_table_constructor(proc, expr); + } break; + case EXPR_Y_INDEX: + case EXPR_FIELD_SELECTOR: { + result = linearize_expression(proc, expr->index_expr.expr); + } break; + default: + handle_error(proc->linearizer->ast_container, "feature not yet implemented"); + break; + } + assert(result); + if (result->type == PSEUDO_RANGE && expr->common_expr.truncate_results) { + // Need to truncate the results to 1 + return allocate_range_select_pseudo(proc, result, 0); + } + return result; +} + +static void linearize_expr_list(Proc *proc, AstNodeList *expr_list, Instruction *insn, + PseudoList **pseudo_list) +{ + AstNode *expr; + int ne = raviX_ptrlist_size((const struct ptr_list *)expr_list); + FOR_EACH_PTR(expr_list, expr) + { + ne -= 1; + Pseudo *pseudo = linearize_expression(proc, expr); + if (ne != 0 && pseudo->type == PSEUDO_RANGE) { + convert_range_to_temp(pseudo); // Only accept one result unless it is the last expr + } + raviX_ptrlist_add((struct ptr_list **)pseudo_list, pseudo, &proc->linearizer->ptrlist_allocator); + } + END_FOR_EACH_PTR(expr) +} + +static void linearize_return(Proc *proc, AstNode *node) +{ + assert(node->type == STMT_RETURN); + Instruction *insn = allocate_instruction(proc, op_ret); + linearize_expr_list(proc, node->return_stmt.expr_list, insn, &insn->operands); + add_instruction_target(proc, insn, allocate_block_pseudo(proc, proc->nodes[EXIT_BLOCK])); + add_instruction(proc, insn); +} + +/* A block is considered terminated if the last instruction is + a return or a branch */ +static bool is_block_terminated(BasicBlock *block) +{ + Instruction *last_insn = raviX_last_instruction(block); + if (last_insn == NULL) + return false; + if (last_insn->opcode == op_ret || last_insn->opcode == op_cbr || last_insn->opcode == op_br) + return true; + return false; +} + +static void linearize_test_cond(Proc *proc, AstNode *node, BasicBlock *true_block, + BasicBlock *false_block) +{ + Pseudo *condition_pseudo = linearize_expression(proc, node->test_then_block.condition); + instruct_cbr(proc, condition_pseudo, true_block, false_block); +} + +/* linearize the 'else if' block */ +static void linearize_test_then(Proc *proc, AstNode *node, BasicBlock *true_block, + BasicBlock *end_block) +{ + start_block(proc, true_block); + start_scope(proc->linearizer, proc, node->test_then_block.test_then_scope); + linearize_statement_list(proc, node->test_then_block.test_then_statement_list); + end_scope(proc->linearizer, proc); + if (!is_block_terminated(proc->current_bb)) + instruct_br(proc, allocate_block_pseudo(proc, end_block)); +} + +// clang-format off +/* +The Lua if statement has a complex structure as it is somewhat like +a combination of case and if statement. The if block is followed by +1 or more elseif blocks. Finally we have an optinal else block. +The elseif blocks are like case statements. + +Given + +if cond1 then + block for cond1 +elseif cond2 then + block for cond2 +else + block for else +end + +We linearize the statement as follows. + +B0: + if cond1 goto Bcond1 else B2; // Initial if condition + +B2: + if cond2 goto Bcond2 else B3: // This is an elseif condition + +B3: + + goto Belse; + + goto Bend; + +Bcond1: + start scope + block for cond1 + end scope + goto Bend; + +Bcond2: + start scope + block for cond2 + end scope + goto Bend; + +Belse: + start scope + block for else + end scope + goto Bend; + +Bend: +*/ +// clang-format on +static void linearize_if_statement(Proc *proc, AstNode *ifnode) +{ + BasicBlock *end_block = NULL; + BasicBlock *else_block = NULL; + BasicBlockList *if_blocks = NULL; + BasicBlockList *if_true_blocks = NULL; + AstNodeList *if_else_stmts = ifnode->if_stmt.if_condition_list; + AstNodeList *else_stmts = ifnode->if_stmt.else_statement_list; + Scope *else_scope = ifnode->if_stmt.else_block; + + AstNode *this_node; + FOR_EACH_PTR(if_else_stmts, this_node) + { + BasicBlock *block = create_block(proc); + raviX_ptrlist_add((struct ptr_list **)&if_blocks, block, &proc->linearizer->ptrlist_allocator); + } + END_FOR_EACH_PTR(this_node) + + FOR_EACH_PTR(if_else_stmts, this_node) + { + BasicBlock *block = create_block(proc); + raviX_ptrlist_add((struct ptr_list **)&if_true_blocks, block, &proc->linearizer->ptrlist_allocator); + } + END_FOR_EACH_PTR(this_node) + + if (ifnode->if_stmt.else_statement_list) { + else_block = create_block(proc); + } + + end_block = create_block(proc); + + BasicBlock *true_block = NULL; + BasicBlock *false_block = NULL; + BasicBlock *block = NULL; + + { + PREPARE_PTR_LIST(if_blocks, block); + PREPARE_PTR_LIST(if_true_blocks, true_block); + FOR_EACH_PTR(if_else_stmts, this_node) + { + start_block(proc, block); + NEXT_PTR_LIST(block); + if (!block) { + // last one + if (else_block) + false_block = else_block; + else + false_block = end_block; + } else { + false_block = block; + } + linearize_test_cond(proc, this_node, true_block, false_block); + NEXT_PTR_LIST(true_block); + } + END_FOR_EACH_PTR(node) + FINISH_PTR_LIST(block); + FINISH_PTR_LIST(true_block); + } + { + PREPARE_PTR_LIST(if_true_blocks, true_block); + FOR_EACH_PTR(if_else_stmts, this_node) + { + linearize_test_then(proc, this_node, true_block, end_block); + NEXT_PTR_LIST(true_block); + } + END_FOR_EACH_PTR(node) + FINISH_PTR_LIST(true_block); + } + + if (else_block) { + start_block(proc, else_block); + start_scope(proc->linearizer, proc, else_scope); + linearize_statement_list(proc, else_stmts); + end_scope(proc->linearizer, proc); + if (!is_block_terminated(proc->current_bb)) + instruct_br(proc, allocate_block_pseudo(proc, end_block)); + } + + start_block(proc, end_block); +} + +/* +handle label statement. +We start a new block which will get associated with the label. +We have to handle the situation where the label pseudo was already created when we +encountered a goto statement but we did not know the block then. +*/ +static void linearize_label_statement(Proc *proc, AstNode *node) +{ + BasicBlock* block; + if (node->label_stmt.symbol->label.pseudo != NULL) { + /* This means the block got created when we saw the goto statement, so we just need to make it current */ + assert(node->label_stmt.symbol->label.pseudo->block != NULL); + block = node->label_stmt.symbol->label.pseudo->block; + start_block(proc, block); + } + else { + block = proc->current_bb; + /* If the current block is empty then we can use it as the label target */ + if (raviX_ptrlist_size((const struct ptr_list *)block->insns) > 0) { + /* Create new block as label target */ + block = create_block(proc); + start_block(proc, block); + } + node->label_stmt.symbol->label.pseudo = allocate_block_pseudo(proc, block); + } +} + +/* TODO move this logic to parser? */ +/* Search for a label going up scopes starting from the scope where the goto statement appeared. + * Also return via min_closing_block the ancestor scope that is greater than or equal to the + * label scope, and where a local variable escaped. + */ +static LuaSymbol *find_label(Proc *proc, Scope *block, + const StringObject *label_name, Scope **min_closing_block) +{ + AstNode *function = block->function; /* We need to stay inside the function when lookng for the label */ + *min_closing_block = NULL; + while (block != NULL && block->function == function) { + LuaSymbol *symbol; + if (block->need_close) { + *min_closing_block = block; + } + FOR_EACH_PTR_REVERSE(block->symbol_list, symbol) + { + if (symbol->symbol_type == SYM_LABEL && symbol->label.label_name == label_name) { + return symbol; + } + } + END_FOR_EACH_PTR_REVERSE(symbol) + block = block->parent; + } + return NULL; +} + +/* +* Starting from block, go up the hierarchy until target_block and determine the oldest +* ancestor block that has escaped variables and thus needs to be closed. +*/ +static Scope *find_min_closing_block(Scope *block, Scope *target_block) +{ + AstNode *function = block->function; /* We need to stay inside the function when lookng for the label */ + Scope *min_closing_block = NULL; + while (block != NULL && block->function == function) { + if (block->need_close) { + min_closing_block = block; + } + if (block == target_block) + break; + block = block->parent; + } + return min_closing_block; +} + +/* + * Checks if a basic block is already closed - for now we check if the last + * instruction in the block is op_ret, which also handles closing of up-values. + */ +static bool is_already_closed(Proc *proc, BasicBlock *block) +{ + Instruction *last_insn = raviX_last_instruction(block); + if (last_insn == NULL) + return false; + if (last_insn->opcode == op_ret) + return true; + if (last_insn->opcode == op_close) { + // hmmm + assert(false); + } + return false; +} + +/* Adds a OP_CLOSE instruction at the specified basic block, but only if any local variables in the given + * scope escaped, i.e. were referenced as upvalues. + * Note that the proc's current_bb remains unchanged after this call. Normally we would expect + * the current basic block to be where we insert instructions but in this case there are scenarios + * such as when processing goto or break statemnt where the close instruction must be added to the + * the goto / break target block. + */ +static void instruct_close(Proc *proc, BasicBlock *block, Scope *scope) +{ + if (is_already_closed(proc, block)) + return; + /* temporarily make block current */ + BasicBlock *prev_current = proc->current_bb; + proc->current_bb = block; + + LuaSymbol *symbol; + FOR_EACH_PTR(scope->symbol_list, symbol) + { + /* We add the first escaping variable as the operand to op_close. + * op_close is meant to scan the stack from that point and close + * any open upvalues + */ + if (symbol->symbol_type == SYM_LOCAL && symbol->variable.escaped) { + assert(symbol->variable.pseudo); + Instruction *insn = allocate_instruction(proc, op_close); + add_instruction_operand(proc, insn, symbol->variable.pseudo); + add_instruction(proc, insn); + break; + } + } + END_FOR_EACH_PTR(symbol) + + /* restore current basic block */ + proc->current_bb = prev_current; +} + +/* +When linearizing the goto statement we create a pseudo for the label if it hasn't been already created. +But at this point we may not know the target basic block to goto, which we expect to be filled when the label is +encountered. Of course if the label was linearized before we got to the goto statement then the target block +would already be known and specified in the pseudo. +*/ +static void linearize_goto_statement(Proc *proc, const AstNode *node) +{ + if (node->goto_stmt.is_break) { + if (proc->current_break_target == NULL) { + handle_error(proc->linearizer->ast_container, "no current break target"); + } + /* Find the oldest ancestor scope that may need to be closed */ + Scope *min_closing_block = find_min_closing_block(node->goto_stmt.goto_scope, proc->current_break_scope); + instruct_br(proc, allocate_block_pseudo(proc, proc->current_break_target)); + start_block(proc, create_block(proc)); + if (min_closing_block) { + /* Note that the close instruction goes to the target block of the goto */ + instruct_close(proc, proc->current_break_target, min_closing_block); + } + return; + } + /* The AST does not provide link to the label so we have to search for the label in the goto scope + and above */ + if (node->goto_stmt.goto_scope) { + Scope *min_closing_block = NULL; + LuaSymbol *symbol = find_label(proc, node->goto_stmt.goto_scope, node->goto_stmt.name, &min_closing_block); + if (symbol) { + /* label found */ + if (symbol->label.pseudo == NULL) { + /* No pseudo? create with target block to be processed later when label is encountered */ + symbol->label.pseudo = allocate_block_pseudo(proc, create_block(proc)); + } + else { + assert(symbol->label.pseudo->block != NULL); + } + instruct_br(proc, symbol->label.pseudo); + start_block(proc, create_block(proc)); + if (min_closing_block) { + /* Note that the close instruction goes to the target block of the goto */ + instruct_close(proc, symbol->label.pseudo->block, min_closing_block); + } + return; + } + } + handle_error(proc->linearizer->ast_container, "goto label not found"); +} + +static void linearize_do_statement(Proc *proc, AstNode *node) +{ + assert(node->type == STMT_DO); + start_scope(proc->linearizer, proc, node->do_stmt.scope); + linearize_statement_list(proc, node->do_stmt.do_statement_list); + end_scope(proc->linearizer, proc); +} + +//clang-format off +/* +Lua manual states: + + for v = e1, e2, e3 do block end + +is equivalent to the code: + + do + local var, limit, step = tonumber(e1), tonumber(e2), tonumber(e3) + if not (var and limit and step) then error() end + var = var - step + while true do + var = var + step + if (step >= 0 and var > limit) or (step < 0 and var < limit) then + break + end + local v = var + block + end + end + +We do not need local vars to hold var, limit, step as these can be +temporaries. + + step_positive = 0 < step + var = var - step + goto L1 +L1: + var = var + step; + if step_positive goto L2; + else goto L3; +L2: + stop = var > limit + if stop goto Lend + else goto Lbody +L3: + stop = var < limit + if stop goto Lend + else goto Lbody +Lbody: + set local symbol in for loop to var + do body + goto L1; + +Lend: + +Above is the general case + +When we know the increment to be negative or positive we can simplify. +Example for positive case + + var = var - step + goto L1 +L1: + var = var + step; + goto L2 +L2: + stop = var > limit + if stop goto Lend + else goto Lbody +Lbody: + set local symbol in for loop to var + do body + goto L1; +Lend: + +Negative case + + var = var - step + goto L1 +L1: + var = var + step; + goto L3; +L3: + stop = var < limit + if stop goto Lend + else goto Lbody +Lbody: + set local symbol in for loop to var + do body + goto L1; +Lend: + + +*/ +//clang-format on + +static void linearize_for_num_statement_positivestep(Proc *proc, AstNode *node) +{ + start_scope(proc->linearizer, proc, node->for_stmt.for_scope); + + AstNode *index_var_expr = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 0); + AstNode *limit_expr = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 1); + AstNode *step_expr = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2); + LuaSymbol *var_sym = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.symbols, 0); + + if (index_var_expr == NULL || limit_expr == NULL) { + handle_error(proc->linearizer->ast_container, "A least index and limit must be supplied"); + } + Pseudo *t = linearize_expression(proc, index_var_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + Pseudo *index_var_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, index_var_pseudo, t); + + t = linearize_expression(proc, limit_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + Pseudo *limit_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, limit_pseudo, t); + + if (step_expr == NULL) + t = allocate_constant_pseudo(proc, allocate_integer_constant(proc, 1)); + else { + t = linearize_expression(proc, step_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + } + Pseudo *step_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, step_pseudo, t); + + Pseudo *stop_pseudo = allocate_temp_pseudo(proc, RAVI_TBOOLEAN); + create_binary_instruction(proc, op_subii, index_var_pseudo, step_pseudo, index_var_pseudo); + + BasicBlock *L1 = create_block(proc); + BasicBlock *L2 = create_block(proc); + BasicBlock *Lbody = create_block(proc); + BasicBlock *Lend = create_block(proc); + BasicBlock *previous_break_target = proc->current_break_target; + Scope *previous_break_scope = proc->current_break_scope; + proc->current_break_target = Lend; + proc->current_break_scope = proc->current_scope; + + start_block(proc, L1); + create_binary_instruction(proc, op_addii, index_var_pseudo, step_pseudo, index_var_pseudo); + instruct_br(proc, allocate_block_pseudo(proc, L2)); + + start_block(proc, L2); + create_binary_instruction(proc, op_ltii, limit_pseudo, index_var_pseudo, stop_pseudo); + instruct_cbr(proc, stop_pseudo, Lend, Lbody); + + start_block(proc, Lbody); + instruct_move(proc, op_mov, var_sym->variable.pseudo, index_var_pseudo); + + start_scope(proc->linearizer, proc, node->for_stmt.for_body); + linearize_statement_list(proc, node->for_stmt.for_statement_list); + end_scope(proc->linearizer, proc); + + /* If the fornum block has escaped local vars then we need to close */ + if (proc->current_break_scope->need_close) { + /* Note we put close instruction in current basic block */ + instruct_close(proc, proc->current_bb, proc->current_break_scope); + } + instruct_br(proc, allocate_block_pseudo(proc, L1)); + + end_scope(proc->linearizer, proc); + + free_temp_pseudo(proc, stop_pseudo, false); + free_temp_pseudo(proc, step_pseudo, false); + free_temp_pseudo(proc, limit_pseudo, false); + free_temp_pseudo(proc, index_var_pseudo, false); + + start_block(proc, Lend); + + proc->current_break_target = previous_break_target; + proc->current_break_scope = previous_break_scope; +} + +static void linearize_for_num_statement(Proc *proc, AstNode *node) +{ + assert(node->type == STMT_FOR_NUM); + + /* For now we only allow integer expressions */ + AstNode *expr; + FOR_EACH_PTR(node->for_stmt.expr_list, expr) + { + if (expr->common_expr.type.type_code != RAVI_TNUMINT) { + handle_error(proc->linearizer->ast_container, + "Only for loops with integer expressions currently supported"); + } + } + END_FOR_EACH_PTR(expr) + + /* Check if we can optimize */ + AstNode *step_expr = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2); + { + bool step_known_positive = false; +// bool step_known_negative = false; + if (step_expr == NULL) { + step_known_positive = true; + } else if (step_expr->type == EXPR_LITERAL) { + if (step_expr->literal_expr.type.type_code == RAVI_TNUMINT) { + if (step_expr->literal_expr.u.i > 0) + step_known_positive = true; +// else if (step_expr->literal_expr.u.i < 0) +// step_known_negative = true; + } + } + if (step_known_positive) { + linearize_for_num_statement_positivestep(proc, node); + return; + } + } + + /* Default case where we do not know if step is negative or positive */ + start_scope(proc->linearizer, proc, node->for_stmt.for_scope); + + AstNode *index_var_expr = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 0); + AstNode *limit_expr = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 1); + LuaSymbol *var_sym = raviX_ptrlist_nth_entry((struct ptr_list *)node->for_stmt.symbols, 0); + + if (index_var_expr == NULL || limit_expr == NULL) { + handle_error(proc->linearizer->ast_container, "A least index and limit must be supplied"); + } + + Pseudo *t = linearize_expression(proc, index_var_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + Pseudo *index_var_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, index_var_pseudo, t); + + t = linearize_expression(proc, limit_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + Pseudo *limit_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, limit_pseudo, t); + + if (step_expr == NULL) + t = allocate_constant_pseudo(proc, allocate_integer_constant(proc, 1)); + else { + t = linearize_expression(proc, step_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + } + Pseudo *step_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, step_pseudo, t); + + Pseudo *step_positive = allocate_temp_pseudo(proc, RAVI_TBOOLEAN); + create_binary_instruction(proc, op_ltii, allocate_constant_pseudo(proc, allocate_integer_constant(proc, 0)), + step_pseudo, step_positive); + + Pseudo *stop_pseudo = allocate_temp_pseudo(proc, RAVI_TBOOLEAN); + create_binary_instruction(proc, op_subii, index_var_pseudo, step_pseudo, index_var_pseudo); + + BasicBlock *L1 = create_block(proc); + BasicBlock *L2 = create_block(proc); + BasicBlock *L3 = create_block(proc); + BasicBlock *Lbody = create_block(proc); + BasicBlock *Lend = create_block(proc); + BasicBlock *previous_break_target = proc->current_break_target; + Scope *previous_break_scope = proc->current_break_scope; + proc->current_break_target = Lend; + proc->current_break_scope = proc->current_scope; + + start_block(proc, L1); + create_binary_instruction(proc, op_addii, index_var_pseudo, step_pseudo, index_var_pseudo); + instruct_cbr(proc, step_positive, L2, L3); + + start_block(proc, L2); + create_binary_instruction(proc, op_ltii, limit_pseudo, index_var_pseudo, stop_pseudo); + instruct_cbr(proc, stop_pseudo, Lend, Lbody); + + start_block(proc, L3); + create_binary_instruction(proc, op_ltii, index_var_pseudo, limit_pseudo, stop_pseudo); + instruct_cbr(proc, stop_pseudo, Lend, Lbody); + + start_block(proc, Lbody); + instruct_move(proc, op_mov, var_sym->variable.pseudo, index_var_pseudo); + + start_scope(proc->linearizer, proc, node->for_stmt.for_body); + linearize_statement_list(proc, node->for_stmt.for_statement_list); + end_scope(proc->linearizer, proc); + + /* If the fornum block has escaped local vars then we need to close */ + if (proc->current_break_scope->need_close) { + /* Note we put close instruction in current basic block */ + instruct_close(proc, proc->current_bb, proc->current_break_scope); + } + instruct_br(proc, allocate_block_pseudo(proc, L1)); + + end_scope(proc->linearizer, proc); + + free_temp_pseudo(proc, stop_pseudo, false); + free_temp_pseudo(proc, step_positive, false); + free_temp_pseudo(proc, step_pseudo, false); + free_temp_pseudo(proc, limit_pseudo, false); + free_temp_pseudo(proc, index_var_pseudo, false); + + start_block(proc, Lend); + + proc->current_break_target = previous_break_target; + proc->current_break_scope = previous_break_scope; +} + +static void linearize_while_statment(Proc *proc, AstNode *node) +{ + BasicBlock *test_block = create_block(proc); + BasicBlock *body_block = create_block(proc); + BasicBlock *end_block = create_block(proc); + BasicBlock *previous_break_target = proc->current_break_target; + Scope *previous_break_scope = proc->current_break_scope; + proc->current_break_target = end_block; + proc->current_break_scope = node->while_or_repeat_stmt.loop_scope; + + if (node->type == STMT_REPEAT) { + instruct_br(proc, allocate_block_pseudo(proc, body_block)); + } + + start_block(proc, test_block); + Pseudo *condition_pseudo = linearize_expression(proc, node->while_or_repeat_stmt.condition); + instruct_cbr(proc, condition_pseudo, body_block, end_block); + free_temp_pseudo(proc, condition_pseudo, false); + + start_block(proc, body_block); + start_scope(proc->linearizer, proc, node->while_or_repeat_stmt.loop_scope); + linearize_statement_list(proc, node->while_or_repeat_stmt.loop_statement_list); + end_scope(proc->linearizer, proc); + + /* If the while/repeat block has escaped local vars then we need to close */ + if (proc->current_break_scope->need_close) { + instruct_close(proc, proc->current_bb, proc->current_break_scope); + } + instruct_br(proc, allocate_block_pseudo(proc, test_block)); + + start_block(proc, end_block); + + proc->current_break_target = previous_break_target; + proc->current_break_scope = previous_break_scope; +} + +static void linearize_function_statement(Proc *proc, AstNode *node) +{ + /* function funcname funcbody */ + /* funcname ::= Name {‘.’ Name} [‘:’ Name] */ + + // Note the similarity of following to the handling of suffixed expressions and assignment expressions + // In truth we could translate this to an expression statement - the only benefit here is that we + // do not allow selectors to be arbitrary expressions + Pseudo *prev_pseudo = linearize_symbol_expression(proc, node->function_stmt.name); + AstNode *prev_node = node->function_stmt.name; + AstNode *this_node; + FOR_EACH_PTR(node->function_stmt.selectors, this_node) + { + Pseudo *next; + if (this_node->type == EXPR_FIELD_SELECTOR) { + Pseudo *key_pseudo = linearize_expression(proc, this_node->index_expr.expr); + ravitype_t key_type = this_node->index_expr.expr->common_expr.type.type_code; + next = instruct_indexed_load(proc, prev_node->common_expr.type.type_code, prev_pseudo, key_type, + key_pseudo, this_node->common_expr.type.type_code); + } else { + next = NULL; + handle_error(proc->linearizer->ast_container, + "Unexpected expr type in function name selector list"); + } + prev_node = this_node; + prev_pseudo = next; + } + END_FOR_EACH_PTR(node) + // FIXME maybe better to add the method name to the selector list above in the parser + // then we could have just handled it above rather than as a special case + if (node->function_stmt.method_name) { + this_node = node->function_stmt.method_name; + if (this_node->type == EXPR_FIELD_SELECTOR) { + Pseudo *key_pseudo = linearize_expression(proc, this_node->index_expr.expr); + ravitype_t key_type = this_node->index_expr.expr->common_expr.type.type_code; + prev_pseudo = + instruct_indexed_load(proc, prev_node->common_expr.type.type_code, prev_pseudo, key_type, + key_pseudo, this_node->common_expr.type.type_code); + } else { + handle_error(proc->linearizer->ast_container, + "Unexpected expr type in function name selector list"); + } + prev_node = this_node; + } + Pseudo *function_pseudo = linearize_function_expr(proc, node->function_stmt.function_expr); + /* Following will potentially convert load to store */ + linearize_store_var(proc, &prev_node->common_expr.type, prev_pseudo, + &node->function_stmt.function_expr->common_expr.type, function_pseudo); +} + +static void linearize_statement(Proc *proc, AstNode *node) +{ + switch (node->type) { + case AST_NONE: { + break; + } + case STMT_RETURN: { + linearize_return(proc, node); + break; + } + case STMT_LOCAL: { + linearize_local_statement(proc, node); + break; + } + case STMT_FUNCTION: { + linearize_function_statement(proc, node); + break; + } + case STMT_LABEL: { + linearize_label_statement(proc, node); + break; + } + case STMT_GOTO: { + linearize_goto_statement(proc, node); + break; + } + case STMT_DO: { + linearize_do_statement(proc, node); + break; + } + case STMT_EXPR: { + linearize_expression_statement(proc, node); + break; + } + case STMT_IF: { + linearize_if_statement(proc, node); + break; + } + case STMT_WHILE: + case STMT_REPEAT: { + linearize_while_statment(proc, node); + break; + } + case STMT_FOR_IN: { + handle_error(proc->linearizer->ast_container, "STMT_FOR_IN not yet implemented"); + break; + } + case STMT_FOR_NUM: { + linearize_for_num_statement(proc, node); + break; + } + default: + handle_error(proc->linearizer->ast_container, "unknown statement type"); + break; + } +} + +/** + * Creates and initializes a basic block to be an empty block. Returns the new basic block. + */ +static BasicBlock *create_block(Proc *proc) +{ + if (proc->node_count >= proc->allocated) { + unsigned new_size = proc->allocated + 25; + BasicBlock **new_data = + raviX_allocator_allocate(&proc->linearizer->unsized_allocator, new_size * sizeof(BasicBlock *)); + assert(new_data != NULL); + if (proc->node_count > 0) { + memcpy(new_data, proc->nodes, proc->allocated * sizeof(BasicBlock *)); + } + proc->allocated = new_size; + proc->nodes = new_data; + } + assert(proc->node_count < proc->allocated); + BasicBlock *new_block = raviX_allocator_allocate(&proc->linearizer->basic_block_allocator, 0); + /* note that each block must have an index that can be used to access the block as nodes[index] */ + new_block->index = proc->node_count; + proc->nodes[proc->node_count++] = new_block; + return new_block; +} + +/** + * Takes a basic block as an argument and makes it the current block. + * + * If the old current block is unterminated then this will terminate that + * block by adding an unconditional branch to the new current block. + * + * All future instructions will be added to the end of the new current block + */ +static void start_block(Proc *proc, BasicBlock *bb_to_start) +{ + // printf("Starting block %d\n", bb_to_start->index); + if (proc->current_bb && !is_block_terminated(proc->current_bb)) { + instruct_br(proc, allocate_block_pseudo(proc, bb_to_start)); + } + proc->current_bb = bb_to_start; +} + +/** + * Create the initial blocks entry and exit for the proc. + * sets current block to entry block. + */ +static void initialize_graph(Proc *proc) +{ + assert(proc != NULL); + BasicBlock *entry = create_block(proc); + assert(entry->index == ENTRY_BLOCK); + BasicBlock *exit = create_block(proc); + assert(exit->index == EXIT_BLOCK); + start_block(proc, entry); +} + +/** + * Makes given scope the current scope, and allocates registers for locals. + */ +static void start_scope(LinearizerState *linearizer, Proc *proc, Scope *scope) +{ + proc->current_scope = scope; + LuaSymbol *sym; + FOR_EACH_PTR(scope->symbol_list, sym) + { + if (sym->symbol_type == SYM_LOCAL) { + uint8_t reg; + if (!sym->variable.escaped && !sym->variable.function_parameter && + (sym->variable.value_type.type_code == RAVI_TNUMFLT || + sym->variable.value_type.type_code == RAVI_TNUMINT)) { + Pseudo *pseudo; + if (sym->variable.value_type.type_code == RAVI_TNUMFLT) + pseudo = allocate_temp_pseudo(proc, RAVI_TNUMFLT); + else + pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + sym->variable.pseudo = pseudo; + pseudo->temp_for_local = sym; /* Note that this temp is for a local */ + } + else { + reg = allocate_register(&proc->local_pseudos); + allocate_symbol_pseudo(proc, sym, reg); + } + // printf("Assigning register %d to local %s\n", (int)reg, getstr(sym->var.var_name)); + } + } + END_FOR_EACH_PTR(sym) +} + +/** + * Deallocate local registers when the scope ends, in reverse order + * so that we have a stack discipline, and then changes current scope to be the + * parent scope. + */ +static void end_scope(LinearizerState *linearizer, Proc *proc) +{ + Scope *scope = proc->current_scope; + LuaSymbol *sym; + if (scope->need_close) { + instruct_close(proc, proc->current_bb, scope); + } + FOR_EACH_PTR_REVERSE(scope->symbol_list, sym) + { + if (sym->symbol_type == SYM_LOCAL) { + Pseudo *pseudo = sym->variable.pseudo; + if (pseudo->type == PSEUDO_SYMBOL) { + assert(pseudo && pseudo->type == PSEUDO_SYMBOL && pseudo->symbol == sym); + // printf("Free register %d for local %s\n", (int)pseudo->regnum, getstr(sym->var.var_name)); + free_register(proc, &proc->local_pseudos, pseudo->regnum); + } + else if (pseudo->type == PSEUDO_TEMP_INT || pseudo->type == PSEUDO_TEMP_FLT || pseudo->type == PSEUDO_TEMP_BOOL) { + assert(sym == pseudo->temp_for_local); + free_temp_pseudo(proc, sym->variable.pseudo, true); + } + else { + assert(false); + } + } + } + END_FOR_EACH_PTR_REVERSE(sym) + proc->current_scope = scope->parent; +} + +static void linearize_function(LinearizerState *linearizer) +{ + Proc *proc = linearizer->current_proc; + assert(proc != NULL); + AstNode *func_expr = proc->function_expr; + assert(func_expr->type == EXPR_FUNCTION); + initialize_graph(proc); + assert(proc->node_count >= 2); + assert(proc->nodes[ENTRY_BLOCK] != NULL); + assert(proc->nodes[EXIT_BLOCK] != NULL); + start_scope(linearizer, proc, func_expr->function_expr.main_block); + linearize_function_args(linearizer); + linearize_statement_list(proc, func_expr->function_expr.function_statement_list); + end_scope(linearizer, proc); + if (!is_block_terminated(proc->current_bb)) { + //instruct_br(proc, allocate_block_pseudo(proc, proc->nodes[EXIT_BLOCK])); + Instruction *insn = allocate_instruction(proc, op_ret); + add_instruction_target(proc, insn, allocate_block_pseudo(proc, proc->nodes[EXIT_BLOCK])); + add_instruction(proc, insn); + } +} + +static void output_pseudo(Pseudo *pseudo, TextBuffer *mb) +{ + switch (pseudo->type) { + case PSEUDO_CONSTANT: { + const Constant *constant = pseudo->constant; + const char *tc = ""; + if (constant->type == RAVI_TNUMFLT) { + raviX_buffer_add_fstring(mb, "%.12f", constant->n); + tc = "flt"; + } else if (constant->type == RAVI_TNUMINT) { + raviX_buffer_add_fstring(mb, "%lld", (long long)constant->i); + tc = "int"; + } else { + raviX_buffer_add_fstring(mb, "'%s'", constant->s->str); + tc = "s"; + } + raviX_buffer_add_fstring(mb, " K%s(%d)", tc, pseudo->regnum); + } break; + case PSEUDO_TEMP_INT: + raviX_buffer_add_fstring(mb, "Tint(%d)", pseudo->regnum); + break; + case PSEUDO_TEMP_BOOL: + raviX_buffer_add_fstring(mb, "Tbool(%d)", pseudo->regnum); + break; + case PSEUDO_TEMP_FLT: + raviX_buffer_add_fstring(mb, "Tflt(%d)", pseudo->regnum); + break; + case PSEUDO_TEMP_ANY: + raviX_buffer_add_fstring(mb, "T(%d)", pseudo->regnum); + break; + case PSEUDO_RANGE_SELECT: + raviX_buffer_add_fstring(mb, "T(%d[%d..])", pseudo->regnum, pseudo->range_pseudo->regnum); + break; + case PSEUDO_PROC: + raviX_buffer_add_fstring(mb, "Proc%%%d", pseudo->proc->id); + break; + case PSEUDO_NIL: + raviX_buffer_add_string(mb, "nil"); + break; + case PSEUDO_FALSE: + raviX_buffer_add_string(mb, "false"); + break; + case PSEUDO_TRUE: + raviX_buffer_add_string(mb, "true"); + break; + case PSEUDO_SYMBOL: + switch (pseudo->symbol->symbol_type) { + case SYM_LOCAL: { + raviX_buffer_add_fstring(mb, "local(%s, %d)", pseudo->symbol->variable.var_name->str, + pseudo->regnum); + break; + } + case SYM_UPVALUE: { + if (pseudo->symbol->upvalue.target_variable->symbol_type == SYM_LOCAL) { + raviX_buffer_add_fstring(mb, "Upval(%u, Proc%%%d, %s)", pseudo->regnum, + pseudo->symbol->upvalue.target_variable->variable.block->function->function_expr.proc_id, + pseudo->symbol->upvalue.target_variable->variable.var_name->str); + } + else if (pseudo->symbol->upvalue.target_variable->symbol_type == SYM_ENV) { + raviX_buffer_add_fstring(mb, "Upval(%s)", + pseudo->symbol->upvalue.target_variable->variable.var_name->str); + } + break; + } + case SYM_GLOBAL: { + raviX_buffer_add_string(mb, pseudo->symbol->variable.var_name->str); + break; + } + default: + // handle_error(proc->linearizer->ast_container, "feature not yet implemented"); + abort(); + } + break; + case PSEUDO_BLOCK: { + raviX_buffer_add_fstring(mb, "L%d", pseudo->block ? (int)pseudo->block->index : -1); + break; + } + case PSEUDO_RANGE: { + raviX_buffer_add_fstring(mb, "T(%d..)", pseudo->regnum); + break; + } + } +} + +static const char *op_codenames[] = { + "NOOP", "RET", "ADD", "ADDff", "ADDfi", "ADDii", "SUB", "SUBff", "SUBfi", + "SUBif", "SUBii", "MUL", "MULff", "MULfi", "MULii", "DIV", "DIVff", "DIVfi", + "DIVif", "DIVii", "IDIV", "BAND", "BANDii", "BOR", "BORii", "BXOR", "BXORii", + "SHL", "SHLii", "SHR", "SHRii", "EQ", "EQii", "EQff", "LT", "LIii", + "LTff", "LE", "LEii", "LEff", "MOD", "POW", "CLOSURE", "UNM", "UNMi", + "UNMf", "LEN", "LENi", "TOINT", "TOFLT", "TOCLOSURE", "TOSTRING", "TOIARRAY", "TOFARRAY", + "TOTABLE", "TOTYPE", "NOT", "BNOT", "LOADGLOBAL", "NEWTABLE", "NEWIARRAY", "NEWFARRAY", "PUT", + "PUTik", "PUTsk", "TPUT", "TPUTik", "TPUTsk", "IAPUT", "IAPUTiv", "FAPUT", "FAPUTfv", + "CBR", "BR", "MOV", "MOVi", "MOVif", "MOVf", "MOVfi", "CALL", "GET", + "GETik", "GETsk", "TGET", "TGETik", "TGETsk", "IAGET", "IAGETik", "FAGET", "FAGETik", + "STOREGLOBAL", "CLOSE", "STRCONCAT"}; + +static void output_pseudo_list(PseudoList *list, TextBuffer *mb) +{ + Pseudo *pseudo; + raviX_buffer_add_string(mb, " {"); + int i = 0; + FOR_EACH_PTR(list, pseudo) + { + if (i > 0) + raviX_buffer_add_string(mb, ", "); + output_pseudo(pseudo, mb); + i++; + } + END_FOR_EACH_PTR(pseudo) + raviX_buffer_add_string(mb, "}"); +} + +const char *raviX_opcode_name(unsigned int opcode) { + return op_codenames[opcode]; +} + +static void output_instruction(Instruction *insn, TextBuffer *mb, const char *prefix, const char *suffix) +{ + raviX_buffer_add_fstring(mb, "%s%s", prefix, op_codenames[insn->opcode]); + if (insn->operands) { + output_pseudo_list(insn->operands, mb); + } + if (insn->targets) { + output_pseudo_list(insn->targets, mb); + } + raviX_buffer_add_string(mb, suffix); +} + +static void output_instructions(InstructionList *list, TextBuffer *mb, const char *prefix, const char *suffix) +{ + Instruction *insn; + FOR_EACH_PTR(list, insn) { output_instruction(insn, mb, prefix, suffix); } + END_FOR_EACH_PTR(insn) +} + +static void output_basic_block(Proc *proc, BasicBlock *bb, TextBuffer *mb) +{ + raviX_buffer_add_fstring(mb, "L%d", bb->index); + if (bb->index == ENTRY_BLOCK) { + raviX_buffer_add_string(mb, " (entry)\n"); + } else if (bb->index == EXIT_BLOCK) { + raviX_buffer_add_string(mb, " (exit)\n"); + } else { + raviX_buffer_add_string(mb, "\n"); + } + output_instructions(bb->insns, mb, "\t", "\n"); +} + +void raviX_output_basic_block_as_table(Proc *proc, BasicBlock *bb, TextBuffer *mb) +{ + raviX_buffer_add_string(mb, "\n"); + raviX_buffer_add_fstring(mb, "\n", bb->index); + output_instructions(bb->insns, mb, "\n"); + raviX_buffer_add_string(mb, "
L%d
", "
"); +} + + +static void output_proc(Proc *proc, TextBuffer *mb) +{ + BasicBlock *bb; + raviX_buffer_add_fstring(mb, "define Proc%%%d\n", proc->id); + for (int i = 0; i < (int)proc->node_count; i++) { + bb = proc->nodes[i]; + output_basic_block(proc, bb, mb); + } +} + +int raviX_ast_linearize(LinearizerState *linearizer) +{ + Proc *proc = allocate_proc(linearizer, linearizer->ast_container->main_function); + set_main_proc(linearizer, proc); + set_current_proc(linearizer, proc); + int rc = setjmp(linearizer->ast_container->env); + if (rc == 0) { + linearize_function(linearizer); + } else { + // dump it + // raviX_output_linearizer(linearizer, stderr); + } + return rc; +} + +void raviX_show_linearizer(LinearizerState *linearizer, TextBuffer *mb) +{ + output_proc(linearizer->main_proc, mb); + Proc *proc; + FOR_EACH_PTR(linearizer->all_procs, proc) + { + if (proc == linearizer->main_proc) + continue; + output_proc(proc, mb); + } + END_FOR_EACH_PTR(proc) +} + +void raviX_output_linearizer(LinearizerState *linearizer, FILE *fp) +{ + TextBuffer mb; + raviX_buffer_init(&mb, 4096); + raviX_show_linearizer(linearizer, &mb); + fputs(mb.buf, fp); + raviX_buffer_free(&mb); +} diff --git a/ravicomp/src/linearizer.h b/ravicomp/src/linearizer.h new file mode 100644 index 0000000..ee357f6 --- /dev/null +++ b/ravicomp/src/linearizer.h @@ -0,0 +1,244 @@ +#ifndef ravicomp_LINEARIZER_H +#define ravicomp_LINEARIZER_H + +#include "ravi_compiler.h" + +#include "common.h" +#include "parser.h" +#include "allocate.h" +#include "membuf.h" +#include "ptrlist.h" + +/* +Linearizer component is responsible for translating the abstract syntax tree to +a Linear intermediate representation (IR). +*/ +typedef struct Instruction Instruction; +typedef struct BasicBlock BasicBlock; +typedef struct Proc Proc; +typedef struct Constant Constant; +typedef struct Graph Graph; + +DECLARE_PTR_LIST(InstructionList, Instruction); +DECLARE_PTR_LIST(PseudoList, Pseudo); +DECLARE_PTR_LIST(ProcList, Proc); + +#define container_of(ptr, type, member) ((type *)((char *)(ptr)-offsetof(type, member))) + +/* order is important here ! */ +enum opcode { + op_nop, + op_ret, + op_add, + op_addff, + op_addfi, + op_addii, + op_sub, + op_subff, + op_subfi, + op_subif, + op_subii, + op_mul, + op_mulff, + op_mulfi, + op_mulii, + op_div, + op_divff, + op_divfi, + op_divif, + op_divii, + op_idiv, + op_band, + op_bandii, + op_bor, + op_borii, + op_bxor, + op_bxorii, + op_shl, + op_shlii, + op_shr, + op_shrii, + op_eq, + op_eqii, + op_eqff, + op_lt, + op_ltii, + op_ltff, + op_le, + op_leii, + op_leff, + op_mod, + op_pow, + op_closure, + op_unm, + op_unmi, + op_unmf, + op_len, + op_leni, + op_toint, + op_toflt, + op_toclosure, + op_tostring, + op_toiarray, + op_tofarray, + op_totable, + op_totype, + op_not, + op_bnot, + op_loadglobal, + op_newtable, + op_newiarray, + op_newfarray, + op_put, /* target is any */ + op_put_ikey, + op_put_skey, + op_tput, /* target is table */ + op_tput_ikey, + op_tput_skey, + op_iaput, /* target is integer[]*/ + op_iaput_ival, + op_faput, /* target is number[] */ + op_faput_fval, + op_cbr, + op_br, + op_mov, + op_movi, + op_movif, /* int to float if compatible else error */ + op_movf, + op_movfi, /* float to int if compatible else error */ + op_call, + op_get, + op_get_ikey, + op_get_skey, + op_tget, + op_tget_ikey, + op_tget_skey, + op_iaget, + op_iaget_ikey, + op_faget, + op_faget_ikey, + op_storeglobal, + op_close, + op_string_concat +}; + +/* +* The IR instructions use operands and targets of type pseudo, which +* is a way of referencing several different types of objects. +*/ +enum PseudoType { + PSEUDO_SYMBOL, /* An object of type lua_symbol representing local var or upvalue, always refers to Lua stack relative to 'base' */ + PSEUDO_TEMP_FLT, /* A floating point temp - may also be used for locals that don't escape - refers to C var */ + PSEUDO_TEMP_INT, /* An integer temp - may also be used for locals that don't escape - refers to C var */ + PSEUDO_TEMP_BOOL, /* An integer temp but restricted to 1 and 0 - refers to C var, shares the virtual C stack with PSEUDO_TEMP_INT */ + PSEUDO_TEMP_ANY, /* A temp of any type - will always be on Lua stack relative to 'base' */ + PSEUDO_CONSTANT, /* A literal value */ + PSEUDO_PROC, /* A proc / function */ + PSEUDO_NIL, /* Literal */ + PSEUDO_TRUE, /* Literal */ + PSEUDO_FALSE, /* Literal */ + PSEUDO_BLOCK, /* Points to a basic block, used as targets for jumps */ + PSEUDO_RANGE, /* Represents a range of registers from a certain starting register on Lua stack relative to 'base' */ + PSEUDO_RANGE_SELECT, /* Picks a certain register from a range, resolves to register on Lua stack, relative to 'base' */ + /* TODO we need a type for var args */ + PSEUDO_LUASTACK /* Specifies a Lua stack position - not used by linearizer - for use by codegen. This is relative to CI->func rather than 'base' */ +}; + +/* pseudo represents a pseudo (virtual) register */ +struct Pseudo { + unsigned type : 4, regnum : 16, freed : 1; + Instruction *insn; /* instruction that created this pseudo */ + union { + LuaSymbol *symbol; /* PSEUDO_SYMBOL */ + const Constant *constant; /* PSEUDO_CONSTANT */ + LuaSymbol *temp_for_local; /* PSEUDO_TEMP - if the temp represents a local */ + Proc *proc; /* PSEUDO_PROC */ + BasicBlock *block; /* PSEUDO_BLOCK */ + Pseudo *range_pseudo; /* PSEUDO_RANGE_SELECT */ + int stackidx; /* PSEUDO_LUASTACK */ + }; +}; + +/* single instruction */ +struct Instruction { + unsigned opcode : 8; + PseudoList *operands; + PseudoList *targets; + BasicBlock *block; /* owning block */ +}; + +/* Basic block */ +struct BasicBlock { + nodeId_t index; /* The index of the block is a key to enable retrieving the block from its container */ + InstructionList *insns; /* Note that if number of instructions is 0 then the block was logically deleted */ +}; +DECLARE_PTR_LIST(BasicBlockList, BasicBlock); + +typedef struct PseudoGenerator { + uint8_t next_reg; /* Next register if no free registers, initially 0 */ + int16_t free_pos; /* number of values in free_regs */ + uint8_t free_regs[256]; /* list of free registers */ +} PseudoGenerator; + +struct Constant { + uint8_t type; /* ravitype_t RAVI_TNUMINT, RAVI_TNUMFLT or RAVI_TSTRING */ + uint16_t index; /* index number starting from 0 assigned to each constant - acts like a reg num. + * Each type will be assigned separate range */ + union { + lua_Integer i; + lua_Number n; + const StringObject *s; + }; +}; + +/* proc is a type of cfg */ +struct Proc { + unsigned node_count; + unsigned allocated; + BasicBlock **nodes; + uint32_t id; /* ID for the proc */ + LinearizerState *linearizer; + ProcList *procs; /* procs defined in this proc */ + Proc *parent; /* enclosing proc */ + AstNode *function_expr; /* function ast that we are compiling */ + Scope *current_scope; + BasicBlock *current_bb; + BasicBlock *current_break_target; /* track the current break target, previous target must be saved / + restored in stack discipline */ + Scope *current_break_scope; /* as above track the block scope */ + PseudoGenerator local_pseudos; /* locals */ + PseudoGenerator temp_int_pseudos; /* temporaries known to be integer type */ + PseudoGenerator temp_flt_pseudos; /* temporaries known to be number type */ + PseudoGenerator temp_pseudos; /* All other temporaries */ + Set *constants; /* constants used by this proc */ + uint16_t num_intconstants; + uint16_t num_fltconstants; + uint16_t num_strconstants; + Graph *cfg; /* place holder for control flow graph; the linearizer does not create this */ + char funcname[30]; /* Each proc needs a name inside a module - name is a short string */ + void *userdata; /* For use by code generator */ +}; + +struct LinearizerState { + Allocator instruction_allocator; + Allocator pseudo_allocator; + Allocator ptrlist_allocator; + Allocator basic_block_allocator; + Allocator proc_allocator; + Allocator unsized_allocator; + Allocator constant_allocator; + CompilerState *ast_container; + Proc *main_proc; /* The root of the compiled chunk of code */ + ProcList *all_procs; /* All procs allocated by the linearizer */ + Proc *current_proc; /* proc being compiled */ + uint32_t proc_id; +}; + +void raviX_show_linearizer(LinearizerState *linearizer, TextBuffer *mb); +void raviX_output_basic_block_as_table(Proc *proc, BasicBlock *bb, TextBuffer *mb); + +Instruction *raviX_last_instruction(BasicBlock *block); +Pseudo* raviX_allocate_stack_pseudo(Proc* proc, unsigned reg); +const char *raviX_opcode_name(unsigned int opcode); + +#endif \ No newline at end of file diff --git a/ravicomp/src/membuf.c b/ravicomp/src/membuf.c new file mode 100644 index 0000000..b159ba7 --- /dev/null +++ b/ravicomp/src/membuf.c @@ -0,0 +1,117 @@ +/* +Copyright (C) 2018-2020 Dibyendu Majumdar +*/ + +#include "membuf.h" + +#include +#include +#include +#include +#include +#include +#include + +void raviX_string_copy(char *buf, const char *src, size_t buflen) +{ + if (buflen == 0) + return; + strncpy(buf, src, buflen); + buf[buflen - 1] = 0; +} + +void raviX_buffer_init(TextBuffer *mb, size_t initial_size) +{ + if (initial_size > 0) { + mb->buf = (char *)calloc(1, initial_size); + if (mb->buf == NULL) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + } else + mb->buf = NULL; + mb->pos = 0; + mb->capacity = initial_size; +} +void raviX_buffer_resize(TextBuffer *mb, size_t new_size) +{ + if (new_size <= mb->capacity) + return; + char *newmem = (char *)realloc(mb->buf, new_size); + if (newmem == NULL) { + fprintf(stderr, "Out of memory\n"); + exit(1); + } + mb->buf = newmem; + mb->capacity = new_size; +} +void raviX_buffer_reserve(TextBuffer *mb, size_t n) +{ + if (mb->capacity < mb->pos + n) { + size_t new_size = (((mb->pos + n) * 3 + 30) / 2) & ~15; + raviX_buffer_resize(mb, new_size); + assert(mb->capacity > mb->pos + n); + } +} +void raviX_buffer_free(TextBuffer *mb) { free(mb->buf); } +void raviX_buffer_add_bytes(TextBuffer *mb, const char *str, size_t len) +{ + size_t required_size = mb->pos + len + 1; /* extra byte for NULL terminator */ + raviX_buffer_resize(mb, required_size); + assert(mb->capacity - mb->pos > len); + raviX_string_copy(&mb->buf[mb->pos], str, mb->capacity - mb->pos); + mb->pos += len; +} +void raviX_buffer_add_string(TextBuffer *mb, const char *str) +{ + size_t len = strlen(str); + raviX_buffer_add_bytes(mb, str, len); +} + +void raviX_buffer_add_fstring(TextBuffer *mb, const char *fmt, ...) +{ + va_list args; + int estimated_size = 128; + + for (int i = 0; i < 2; i++) { + raviX_buffer_reserve(mb, estimated_size); // ensure we have at least estimated_size free space + va_start(args, fmt); + int n = vsnprintf(mb->buf + mb->pos, estimated_size, fmt, args); + va_end(args); + if (n > estimated_size) { + estimated_size = n + 1; // allow for 0 byte + } else if (n < 0) { + fprintf(stderr, "Buffer conversion error\n"); + assert(false); + break; + } else { + mb->pos += n; + break; + } + } +} + +void raviX_buffer_add_bool(TextBuffer *mb, bool value) +{ + if (value) + raviX_buffer_add_string(mb, "true"); + else + raviX_buffer_add_string(mb, "false"); +} +void raviX_buffer_add_int(TextBuffer *mb, int value) +{ + char temp[100]; + snprintf(temp, sizeof temp, "%d", value); + raviX_buffer_add_string(mb, temp); +} +void raviX_buffer_add_longlong(TextBuffer *mb, int64_t value) +{ + char temp[100]; + snprintf(temp, sizeof temp, "%" PRId64 "", value); + raviX_buffer_add_string(mb, temp); +} +void raviX_buffer_add_char(TextBuffer *mb, char c) +{ + char temp[2] = {c, '\0'}; + raviX_buffer_add_string(mb, temp); +} diff --git a/ravicomp/src/membuf.h b/ravicomp/src/membuf.h new file mode 100644 index 0000000..d1abf8b --- /dev/null +++ b/ravicomp/src/membuf.h @@ -0,0 +1,27 @@ +#ifndef ravicomp_MEMBUF_H +#define ravicomp_MEMBUF_H + +#include "ravi_compiler.h" + +#include +#include +#include +#include +#include + +extern void raviX_buffer_add_bool(TextBuffer *mb, bool value); +extern void raviX_buffer_add_int(TextBuffer *mb, int value); +extern void raviX_buffer_add_longlong(TextBuffer *mb, int64_t value); +extern void raviX_buffer_add_char(TextBuffer *mb, char c); + +/* Following add and remove raw bytes */ + +/* Unchecked - user must first resize */ +static inline void raviX_buffer_addc(TextBuffer *mb, int c) +{ + mb->buf[mb->pos++] = (char)c; + assert(mb->pos < mb->capacity); +} +static inline void raviX_buffer_remove(TextBuffer *mb, int i) { mb->pos -= i; } + +#endif diff --git a/ravicomp/src/opt_unusedcode.c b/ravicomp/src/opt_unusedcode.c new file mode 100644 index 0000000..43f304a --- /dev/null +++ b/ravicomp/src/opt_unusedcode.c @@ -0,0 +1,80 @@ +/* A pass over linearized code to eliminate unused code. + * Initially only tackle unreachable basic blocks + */ + +#include "linearizer.h" +#include "cfg.h" +#include "graph.h" +#include "allocate.h" +#include "optimizer.h" + +/** + * Check if a basic block has 0 predecessors. If so we can remove it from the CFG. + * We also remove all the instructions in the basic block + */ +static int process_block(LinearizerState *linearizer, Proc *proc, BasicBlock *bb) +{ + GraphNode *node = raviX_graph_node(proc->cfg, bb->index); + GraphNodeList *predecessors = raviX_predecessors(node); + if (raviX_node_list_size(predecessors) != 0) { + // Has predecessors so nothing to do + return 0; + } + // No predecessor blocks, so we can remove this block + GraphNodeList *successors = raviX_successors(node); + uint32_t count = raviX_node_list_size(successors); + if (count == 0) { + // Nothing to do, but odd? + // FIXME maybe assert? + return 0; + } + // Make a copy of the successor node list as we need to change the CFG + nodeId_t *nodes = raviX_realloc_array(NULL, sizeof(nodeId_t), 0, count); + for (uint32_t i = 0; i < count; i++) { + nodes[i] = raviX_node_list_at(successors, i); + } + for (uint32_t i = 0; i < count; i++) { + // Remove edge from bb to the successor node + raviX_delete_edge(proc->cfg, bb->index, nodes[i]); + } + free(nodes); + assert(raviX_node_list_size(successors) == 0); // All should be gone + // Now clear out this bb + // FIXME deallocate instructions + raviX_ptrlist_remove_all((struct ptr_list **)&bb->insns); + // FIXME do we deallocate bb? + return 1; // We changed something +} + +static int process_proc(LinearizerState *linearizer, Proc *proc) +{ + if (proc->cfg == NULL) { + if (raviX_construct_cfg(proc) != 0) { + return 1; + } + } + int changed = 1; + while (changed) { + changed = 0; + BasicBlock *bb; + for (int i = 0; i < (int)proc->node_count; i++) { + bb = proc->nodes[i]; + if (bb->index == ENTRY_BLOCK || bb->index == EXIT_BLOCK) + continue; + changed |= process_block(linearizer, proc, bb); + } + } + return 0; +} + +int raviX_remove_unreachable_blocks(LinearizerState *linearizer) +{ + Proc *proc; + FOR_EACH_PTR(linearizer->all_procs, proc) + { + if (process_proc(linearizer, proc) != 0) + return 1; + } + END_FOR_EACH_PTR(proc) + return 0; +} \ No newline at end of file diff --git a/ravicomp/src/optimizer.h b/ravicomp/src/optimizer.h new file mode 100644 index 0000000..7cf9314 --- /dev/null +++ b/ravicomp/src/optimizer.h @@ -0,0 +1,10 @@ +#ifndef ravicomp_OPTIMIZER_H +#define ravicomp_OPTIMIZER_H + +/** + * Remove blocks that are unreachable. Blocks ae logically deleted by removing + * all instructions, rather than being physically removed. + */ +extern int raviX_remove_unreachable_blocks(LinearizerState *linearizer); + +#endif \ No newline at end of file diff --git a/ravicomp/src/parser.c b/ravicomp/src/parser.c new file mode 100644 index 0000000..33a5c12 --- /dev/null +++ b/ravicomp/src/parser.c @@ -0,0 +1,1748 @@ +/* +A parser and syntax tree builder for Ravi. +Note that the overall structure of the parser is loosely based on the Lua 5.3 parser. + +The parser retains the syntactic structure - including constant expressions and some redundant +syntax nodes because these are useful for testing and understanding. + +A later pass simplifies the AST - see ast_simplify.c + +Copyright (C) 2018-2020 Dibyendu Majumdar +*/ +/* Portions Copyright (C) 1994-2019 Lua.org, PUC-Rio.*/ + +#include "fnv_hash.h" +#include + +/* forward declarations */ +static AstNode *parse_expression(struct parser_state *); +static void parse_statement_list(struct parser_state *, AstNodeList **list); +static AstNode *parse_statement(struct parser_state *); +static AstNode *new_function(struct parser_state *parser); +static AstNode *end_function(struct parser_state *parser); +static Scope *new_scope(struct parser_state *parser); +static void end_scope(struct parser_state *parser); +static AstNode *new_literal_expression(struct parser_state *parser, ravitype_t type); +static AstNode *generate_label(struct parser_state *parser, const StringObject *label); +static void add_local_symbol_to_current_scope(struct parser_state *parser, LuaSymbol *sym); + +static void add_symbol(CompilerState *container, LuaSymbolList **list, LuaSymbol *sym) +{ + raviX_ptrlist_add((struct ptr_list **)list, sym, &container->ptrlist_allocator); +} + +static void add_ast_node(CompilerState *container, AstNodeList **list, AstNode *node) +{ + raviX_ptrlist_add((struct ptr_list **)list, node, &container->ptrlist_allocator); +} + +static AstNode *allocate_ast_node(struct parser_state *parser, enum AstNodeType type) +{ + AstNode *node = (AstNode *)raviX_allocator_allocate(&parser->container->ast_node_allocator, 0); + node->type = type; + node->line_number = parser->ls->lastline; + return node; +} + +static AstNode *allocate_expr_ast_node(struct parser_state *parser, enum AstNodeType type) +{ + AstNode *node = allocate_ast_node(parser, type); + node->common_expr.truncate_results = 0; + set_typecode(&node->common_expr.type, RAVI_TANY); + return node; +} + +static void error_expected(LexerState *ls, int token) +{ + raviX_token2str(token, &ls->container->error_message); + raviX_buffer_add_string(&ls->container->error_message, " expected"); + longjmp(ls->container->env, 1); +} + +static int testnext(LexerState *ls, int c) +{ + if (ls->t.token == c) { + raviX_next(ls); + return 1; + } else + return 0; +} + +static void check(LexerState *ls, int c) +{ + if (ls->t.token != c) + error_expected(ls, c); +} + +static void checknext(LexerState *ls, int c) +{ + check(ls, c); + raviX_next(ls); +} + +/*============================================================*/ +/* GRAMMAR RULES */ +/*============================================================*/ + +/* +** check whether current token is in the follow set of a block. +** 'until' closes syntactical blocks, but do not close scope, +** so it is handled in separate. +*/ +static int block_follow(LexerState *ls, int withuntil) +{ + switch (ls->t.token) { + case TOK_else: + case TOK_elseif: + case TOK_end: + case TOK_EOS: + return 1; + case TOK_until: + return withuntil; + default: + return 0; + } +} + +static void check_match(LexerState *ls, int what, int who, int where) +{ + if (!testnext(ls, what)) { + if (where == ls->linenumber) + error_expected(ls, what); + else { + TextBuffer mb; + raviX_buffer_init(&mb, 256); + raviX_token2str(what, &mb); + raviX_buffer_add_string(&mb, " expected (to close "); + raviX_token2str(who, &mb); + raviX_buffer_add_fstring(&mb, " at line %d)", where); + char message[1024]; + raviX_string_copy(message, raviX_buffer_data(&mb), sizeof message); + raviX_buffer_free(&mb); + raviX_syntaxerror(ls, message); + } + } +} + +/* Check that current token is a name, and advance */ +static const StringObject *check_name_and_next(LexerState *ls) +{ + const StringObject *ts; + check(ls, TOK_NAME); + ts = ls->t.seminfo.ts; + raviX_next(ls); + return ts; +} + +/* create a new local variable in function scope, and set the + * variable type (RAVI - added type tt) */ +static LuaSymbol *new_local_symbol(struct parser_state *parser, const StringObject *name, ravitype_t tt, + const StringObject *usertype) +{ + Scope *scope = parser->current_scope; + LuaSymbol *symbol = raviX_allocator_allocate(&parser->container->symbol_allocator, 0); + set_typename(&symbol->variable.value_type, tt, usertype); + symbol->symbol_type = SYM_LOCAL; + symbol->variable.block = scope; + symbol->variable.var_name = name; + symbol->variable.pseudo = NULL; + symbol->variable.escaped = 0; + return symbol; +} + +/* create a new label */ +static LuaSymbol *new_label(struct parser_state *parser, const StringObject *name) +{ + Scope *scope = parser->current_scope; + assert(scope); + LuaSymbol *symbol = raviX_allocator_allocate(&parser->container->symbol_allocator, 0); + symbol->symbol_type = SYM_LABEL; + symbol->label.block = scope; + symbol->label.label_name = name; + // Add to the end of the symbol list + // Note that Lua allows multiple local declarations of the same name + // so a new instance just gets added to the end + add_symbol(parser->container, &scope->symbol_list, symbol); + return symbol; +} + +/* create a new local variable + */ +static LuaSymbol *new_localvarliteral_(struct parser_state *parser, const char *name, size_t sz) +{ + return new_local_symbol(parser, raviX_create_string(parser->container, name, (uint32_t)sz), RAVI_TANY, NULL); +} + +/* create a new local variable + */ +#define new_localvarliteral(parser, name) new_localvarliteral_(parser, "" name, (sizeof(name) / sizeof(char)) - 1) + +static LuaSymbol *search_for_variable_in_block(Scope *scope, const StringObject *varname) +{ + LuaSymbol *symbol; + // Lookup in reverse order so that we discover the + // most recently added local symbol - as Lua allows same + // symbol to be declared local more than once in a scope + // Should also work with nesting as the function when parsed + // will only know about vars declared in parent function until + // now. + FOR_EACH_PTR_REVERSE(scope->symbol_list, symbol) + { + switch (symbol->symbol_type) { + case SYM_LOCAL: { + if (varname == symbol->variable.var_name) { + return symbol; + } + break; + } + default: + break; + } + } + END_FOR_EACH_PTR_REVERSE(symbol); + return NULL; +} + +/* Each function has a list of upvalues, searches this list for given name + */ +static LuaSymbol *search_upvalue_in_function(AstNode *function, const StringObject *name) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(function->function_expr.upvalues, symbol) + { + switch (symbol->symbol_type) { + case SYM_UPVALUE: { + assert(symbol->upvalue.target_variable->symbol_type == SYM_LOCAL || + symbol->upvalue.target_variable->symbol_type == SYM_ENV); + if (name == symbol->upvalue.target_variable->variable.var_name) { + return symbol; + } + break; + } + default: + break; + } + } + END_FOR_EACH_PTR(symbol); + return NULL; +} + +/* Each function has a list of upvalues, searches this list for given name, and adds it if not found. + * Returns true if added, false means the function already has the upvalue. + */ +static bool add_upvalue_in_function(struct parser_state *parser, AstNode *function, LuaSymbol *sym) +{ + assert(sym->symbol_type == SYM_LOCAL || sym->symbol_type == SYM_ENV); + LuaSymbol *symbol; + FOR_EACH_PTR(function->function_expr.upvalues, symbol) + { + switch (symbol->symbol_type) { + case SYM_UPVALUE: { + assert(symbol->upvalue.target_variable->symbol_type == SYM_LOCAL || + symbol->upvalue.target_variable->symbol_type == SYM_ENV); + if (sym == symbol->upvalue.target_variable) { + return false; + } + break; + } + default: + break; + } + } + END_FOR_EACH_PTR(symbol); + LuaSymbol *upvalue = raviX_allocator_allocate(&parser->container->symbol_allocator, 0); + upvalue->symbol_type = SYM_UPVALUE; + upvalue->upvalue.target_variable = sym; + upvalue->upvalue.target_function = function; + upvalue->upvalue.upvalue_index = raviX_ptrlist_size( + (const struct ptr_list *)function->function_expr.upvalues); /* position of upvalue in function */ + copy_type(&upvalue->upvalue.value_type, &sym->variable.value_type); + add_symbol(parser->container, &function->function_expr.upvalues, upvalue); + if (sym->symbol_type == SYM_LOCAL) { + sym->variable.escaped = 1; /* mark original variable as having escaped */ + sym->variable.block->need_close = 1; /* mark block containing variable as needing close operation */ + sym->variable.block->function->function_expr.need_close = 1; + } + return true; +} + +/* Searches for a variable starting from current scope, going up the + * scope chain within the current function. If the variable is not found in any scope of the function, then + * search the function's upvalue list. Repeat the exercise in parent function until either + * the symbol is found or we exhaust the search. NULL is returned if search was + * exhausted. + */ +static LuaSymbol *search_for_variable(struct parser_state *parser, const StringObject *varname, + bool *is_local) +{ + *is_local = false; + Scope *current_scope = parser->current_scope; + AstNode *start_function = parser->current_function; + assert(current_scope && current_scope->function == parser->current_function); + while (current_scope) { + AstNode *current_function = current_scope->function; + while (current_scope && current_function == current_scope->function) { + LuaSymbol *symbol = search_for_variable_in_block(current_scope, varname); + if (symbol) { + *is_local = (current_function == start_function); + return symbol; + } + current_scope = current_scope->parent; + } + // search upvalues in the function + LuaSymbol *symbol = search_upvalue_in_function(current_function, varname); + if (symbol) + return symbol; + // try in parent function + } + return NULL; +} + +/* Adds an upvalue to current_function and its parents until var_function; var_function being where the symbol + * exists as a local or an upvalue. If the symbol is found in a function's upvalue list then there is no need to + * check parent functions. + */ +static void add_upvalue_in_levels_upto(struct parser_state *parser, AstNode *current_function, + AstNode *var_function, LuaSymbol *symbol) +{ + // NOTE: var_function may be NULL in the case of _ENV + // This is okay as it means we go up the whole call stack in that case + assert(symbol->symbol_type == SYM_LOCAL || symbol->symbol_type == SYM_ENV); + assert((symbol->symbol_type == SYM_ENV && var_function == NULL) || var_function != NULL); + assert(current_function != var_function); + while (current_function && current_function != var_function) { + bool added = add_upvalue_in_function(parser, current_function, symbol); + if (!added) + // this function already has it so we are done + break; + current_function = current_function->function_expr.parent_function; + } +} + +/** + * Adds an upvalue for _ENV. + */ +static void add_upvalue_for_ENV(struct parser_state *parser) +{ + bool is_local = false; + LuaSymbol *symbol = search_for_variable(parser, parser->container->_ENV, &is_local); + if (symbol == NULL) { + // No definition of _ENV found + // Create special symbol for _ENV - so that upvalues can reference it + // Note that this symbol is not added to any scope, however upvalue created below will reference it + symbol = raviX_allocator_allocate(&parser->container->symbol_allocator, 0); + symbol->symbol_type = SYM_ENV; + symbol->variable.var_name = parser->container->_ENV; + symbol->variable.block = NULL; + set_type(&symbol->variable.value_type, RAVI_TTABLE); // _ENV is by default a table + // Create an upvalue for _ENV + add_upvalue_in_levels_upto(parser, parser->current_function, NULL, symbol); + } else if (!is_local && symbol->symbol_type == SYM_LOCAL) { + // If _ENV occurred as a local symbol in a parent function then we + // need to construct an upvalue. Lua requires that the upvalue be + // added to all functions in the tree up to the function where the local + // is defined. + add_upvalue_in_levels_upto(parser, parser->current_function, symbol->variable.block->function, symbol); + } else if (symbol->symbol_type == SYM_UPVALUE && symbol->upvalue.target_function != parser->current_function) { + // We found an upvalue but it is not at the same level + // Ensure all levels have the upvalue + // Note that if the upvalue refers to special _ENV symbol then target function will be NULL + add_upvalue_in_levels_upto(parser, parser->current_function, symbol->upvalue.target_function, + symbol->upvalue.target_variable); + } +} + +/* Creates a symbol reference to the name; the returned symbol reference + * may be local, upvalue or global. + */ +static AstNode *new_symbol_reference(struct parser_state *parser, const StringObject *varname) +{ + bool is_local = false; + LuaSymbol *symbol = search_for_variable(parser, varname, &is_local); // Search in all scopes + if (symbol) { + // TODO we had a bug here - see t013.lua + // Need more test cases for this + // we found a local or upvalue + if (!is_local && symbol->symbol_type == SYM_LOCAL) { + // If the local symbol occurred in a parent function then we + // need to construct an upvalue. Lua requires that the upvalue be + // added to all functions in the tree up to the function where the local + // is defined. + add_upvalue_in_levels_upto(parser, parser->current_function, symbol->variable.block->function, + symbol); + // TODO Following search could be avoided if above returned the symbol + symbol = search_upvalue_in_function(parser->current_function, varname); + } else if (symbol->symbol_type == SYM_UPVALUE && + symbol->upvalue.target_function != parser->current_function) { + // We found an upvalue but it is not at the same level + // Ensure all levels have the upvalue + // Note that if the uvalue refers to special _ENV symbol then target function will be NULL + add_upvalue_in_levels_upto(parser, parser->current_function, symbol->upvalue.target_function, + symbol->upvalue.target_variable); + // TODO Following search could be avoided if above returned the symbol + symbol = search_upvalue_in_function(parser->current_function, varname); + } + } else { + // Return global symbol + LuaSymbol *global = raviX_allocator_allocate(&parser->container->symbol_allocator, 0); + global->symbol_type = SYM_GLOBAL; + global->variable.var_name = varname; + global->variable.block = NULL; + set_type(&global->variable.value_type, RAVI_TANY); // Globals are always ANY type + // We don't add globals to any scope so that they are + // always looked up + symbol = global; + // Since we have a global reference we need to add upvalue for _ENV + // At the parser level we do not try to model that the global reference will be + // resolved by _ENV[name] - we leave that to the code generator to decide. + // However adding an upvalue later is hard so we do it here. + add_upvalue_for_ENV(parser); + bool is_local; + global->variable.env = search_for_variable(parser, parser->container->_ENV, &is_local); + assert(global->variable.env); + } + AstNode *symbol_expr = allocate_expr_ast_node(parser, EXPR_SYMBOL); + symbol_expr->symbol_expr.type = symbol->variable.value_type; + symbol_expr->symbol_expr.var = symbol; + return symbol_expr; +} + +/*============================================================*/ +/* GRAMMAR RULES */ +/*============================================================*/ + +static AstNode *new_string_literal(struct parser_state *parser, const StringObject *ts) +{ + AstNode *node = allocate_expr_ast_node(parser, EXPR_LITERAL); + set_type(&node->literal_expr.type, RAVI_TSTRING); + node->literal_expr.u.ts = ts; + return node; +} + +static AstNode *new_field_selector(struct parser_state *parser, const StringObject *ts) +{ + AstNode *index = allocate_expr_ast_node(parser, EXPR_FIELD_SELECTOR); + index->index_expr.expr = new_string_literal(parser, ts); + set_type(&index->index_expr.type, RAVI_TANY); + return index; +} + +/* + * Parse ['.' | ':'] NAME + */ +static AstNode *parse_field_selector(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* fieldsel -> ['.' | ':'] NAME */ + raviX_next(ls); /* skip the dot or colon */ + const StringObject *ts = check_name_and_next(ls); + return new_field_selector(parser, ts); +} + +/* + * Parse '[' expr '] + */ +static AstNode *parse_yindex(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* index -> '[' expr ']' */ + raviX_next(ls); /* skip the '[' */ + AstNode *expr = parse_expression(parser); + checknext(ls, ']'); + + AstNode *index = allocate_expr_ast_node(parser, EXPR_Y_INDEX); + index->index_expr.expr = expr; + set_type(&index->index_expr.type, RAVI_TANY); + return index; +} + +/* +** {====================================================================== +** Rules for Constructors +** ======================================================================= +*/ + +static AstNode *new_indexed_assign_expr(struct parser_state *parser, AstNode *key_expr, + AstNode *value_expr) +{ + AstNode *set = allocate_expr_ast_node(parser, EXPR_TABLE_ELEMENT_ASSIGN); + set->table_elem_assign_expr.key_expr = key_expr; + set->table_elem_assign_expr.value_expr = value_expr; + set->table_elem_assign_expr.type = + value_expr->common_expr.type; /* type of indexed assignment is same as the value*/ + return set; +} + +static AstNode *parse_recfield(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* recfield -> (NAME | '['exp1']') = exp1 */ + AstNode *index_expr; + if (ls->t.token == TOK_NAME) { + const StringObject *ts = check_name_and_next(ls); + index_expr = new_field_selector(parser, ts); + } else /* ls->t.token == '[' */ + index_expr = parse_yindex(parser); + checknext(ls, '='); + AstNode *value_expr = parse_expression(parser); + return new_indexed_assign_expr(parser, index_expr, value_expr); +} + +static AstNode *parse_listfield(struct parser_state *parser) +{ + /* listfield -> exp */ + AstNode *value_expr = parse_expression(parser); + return new_indexed_assign_expr(parser, NULL, value_expr); +} + +static AstNode *parse_field(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* field -> listfield | recfield */ + switch (ls->t.token) { + case TOK_NAME: { /* may be 'listfield' or 'recfield' */ + if (raviX_lookahead(ls) != '=') /* expression? */ + return parse_listfield(parser); + else + return parse_recfield(parser); + break; + } + case '[': { + return parse_recfield(parser); + break; + } + default: { + return parse_listfield(parser); + break; + } + } + return NULL; +} + +static AstNode *has_function_call(AstNode *expr) +{ + if (!expr) + return NULL; + if (expr->type == EXPR_FUNCTION_CALL) + return expr; + else if (expr->type == EXPR_SUFFIXED) { + if (expr->suffixed_expr.suffix_list) { + return has_function_call( + (AstNode *)raviX_ptrlist_last((struct ptr_list *)expr->suffixed_expr.suffix_list)); + } else { + return has_function_call(expr->suffixed_expr.primary_expr); + } + } else { + return NULL; + } +} + +/* If a call expr appears as the last in the expression list then mark it as multi-return (-1) + * i.e. the caller wants all available returns. + */ +static void set_multireturn(struct parser_state *parser, AstNodeList *expr_list, bool in_table_constructor) +{ + AstNode *last_expr = (AstNode *)raviX_ptrlist_last((struct ptr_list *)expr_list); + if (!last_expr) + return; + if (in_table_constructor) { + if (last_expr->type == EXPR_TABLE_ELEMENT_ASSIGN && + last_expr->table_elem_assign_expr.key_expr == NULL) { + last_expr = last_expr->table_elem_assign_expr.value_expr; + } else { + return; + } + } + AstNode *call_expr = has_function_call(last_expr); + if (call_expr) { + // Last expr so accept all available results + call_expr->function_call_expr.num_results = -1; + } +} + +static AstNode *parse_table_constructor(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* constructor -> '{' [ field { sep field } [sep] ] '}' + sep -> ',' | ';' */ + int line = ls->linenumber; + checknext(ls, '{'); + AstNode *table_expr = allocate_expr_ast_node(parser, EXPR_TABLE_LITERAL); + set_type(&table_expr->table_expr.type, RAVI_TTABLE); + table_expr->table_expr.expr_list = NULL; + do { + if (ls->t.token == '}') + break; + AstNode *field_expr = parse_field(parser); + add_ast_node(parser->container, &table_expr->table_expr.expr_list, field_expr); + } while (testnext(ls, ',') || testnext(ls, ';')); + set_multireturn(parser, table_expr->table_expr.expr_list, true); + check_match(ls, '}', '{', line); + return table_expr; +} + +/* }====================================================================== */ + +/* + * We would like to allow user defined types to contain the sequence + * NAME [. NAME]+ + * The initial NAME is supplied. + * Returns extended name. + * Note that the returned string will be anchored in the Lexer and must + * be anchored somewhere else by the time parsing finishes + */ +static const StringObject *parse_user_defined_type_name(LexerState *ls, + const StringObject *typename) +{ + size_t len = 0; + if (testnext(ls, '.')) { + char buffer[256] = {0}; + const char *str = typename->str; + len = strlen(str); + if (len >= sizeof buffer) { + raviX_syntaxerror(ls, "User defined type name is too long"); + return typename; + } + snprintf(buffer, sizeof buffer, "%s", str); + do { + typename = check_name_and_next(ls); + str = typename->str; + size_t newlen = len + strlen(str) + 1; + if (newlen >= sizeof buffer) { + raviX_syntaxerror(ls, "User defined type name is too long"); + return typename; + } + snprintf(buffer + len, sizeof buffer - len, ".%s", str); + len = newlen; + } while (testnext(ls, '.')); + typename = raviX_create_string(ls->container, buffer, (uint32_t)strlen(buffer)); + } + return typename; +} + +/* RAVI Parse + * name : type + * where type is 'integer', 'integer[]', + * 'number', 'number[]' + */ +static LuaSymbol *parse_local_variable_declaration(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* assume a dynamic type */ + ravitype_t tt = RAVI_TANY; + const StringObject *name = check_name_and_next(ls); + const StringObject *pusertype = NULL; + if (testnext(ls, ':')) { + const StringObject *typename = check_name_and_next(ls); /* we expect a type name */ + const char *str = typename->str; + /* following is not very nice but easy as + * the lexer doesn't need to be changed + */ + if (strcmp(str, "integer") == 0) + tt = RAVI_TNUMINT; + else if (strcmp(str, "number") == 0) + tt = RAVI_TNUMFLT; + else if (strcmp(str, "closure") == 0) + tt = RAVI_TFUNCTION; + else if (strcmp(str, "table") == 0) + tt = RAVI_TTABLE; + else if (strcmp(str, "string") == 0) + tt = RAVI_TSTRING; + else if (strcmp(str, "boolean") == 0) + tt = RAVI_TBOOLEAN; + else if (strcmp(str, "any") == 0) + tt = RAVI_TANY; + else { + /* default is a userdata type */ + tt = RAVI_TUSERDATA; + typename = parse_user_defined_type_name(ls, typename); + // str = getstr(typename); + pusertype = typename; + } + if (tt == RAVI_TNUMFLT || tt == RAVI_TNUMINT) { + /* if we see [] then it is an array type */ + if (testnext(ls, '[')) { + checknext(ls, ']'); + tt = (tt == RAVI_TNUMFLT) ? RAVI_TARRAYFLT : RAVI_TARRAYINT; + } + } + } + return new_local_symbol(parser, name, tt, pusertype); +} + +static bool parse_parameter_list(struct parser_state *parser, LuaSymbolList **list) +{ + LexerState *ls = parser->ls; + /* parlist -> [ param { ',' param } ] */ + int nparams = 0; + bool is_vararg = false; + if (ls->t.token != ')') { /* is 'parlist' not empty? */ + do { + switch (ls->t.token) { + case TOK_NAME: { /* param -> NAME */ + /* RAVI change - add type */ + LuaSymbol *symbol = parse_local_variable_declaration(parser); + symbol->variable.function_parameter = 1; + add_symbol(parser->container, list, symbol); + add_local_symbol_to_current_scope(parser, symbol); + nparams++; + break; + } + case TOK_DOTS: { /* param -> '...' */ + raviX_next(ls); + is_vararg = true; /* declared vararg */ + break; + } + default: + raviX_syntaxerror(ls, " or '...' expected"); + } + } while (!is_vararg && testnext(ls, ',')); + } + return is_vararg; +} + +static void parse_function_body(struct parser_state *parser, AstNode *func_ast, int ismethod, int line) +{ + LexerState *ls = parser->ls; + /* body -> '(' parlist ')' block END */ + checknext(ls, '('); + if (ismethod) { + LuaSymbol *symbol = new_localvarliteral(parser, "self"); /* create 'self' parameter */ + add_symbol(parser->container, &func_ast->function_expr.args, symbol); + } + bool is_vararg = parse_parameter_list(parser, &func_ast->function_expr.args); + func_ast->function_expr.is_vararg = is_vararg; + func_ast->function_expr.is_method = ismethod; + checknext(ls, ')'); + parse_statement_list(parser, &func_ast->function_expr.function_statement_list); + check_match(ls, TOK_end, TOK_function, line); +} + +/* parse expression list */ +static int parse_expression_list(struct parser_state *parser, AstNodeList **list) +{ + LexerState *ls = parser->ls; + /* explist -> expr { ',' expr } */ + int n = 1; /* at least one expression */ + AstNode *expr = parse_expression(parser); + add_ast_node(parser->container, list, expr); + while (testnext(ls, ',')) { + expr = parse_expression(parser); + add_ast_node(parser->container, list, expr); + n++; + } + return n; +} + +/* parse function arguments */ +static AstNode *parse_function_call(struct parser_state *parser, const StringObject *methodname, + int line) +{ + LexerState *ls = parser->ls; + AstNode *call_expr = allocate_expr_ast_node(parser, EXPR_FUNCTION_CALL); + call_expr->function_call_expr.method_name = methodname; + call_expr->function_call_expr.arg_list = NULL; + call_expr->function_call_expr.num_results = 1; /* By default we expect one arg */ + set_type(&call_expr->function_call_expr.type, RAVI_TANY); + switch (ls->t.token) { + case '(': { /* funcargs -> '(' [ explist ] ')' */ + raviX_next(ls); + if (ls->t.token == ')') /* arg list is empty? */ + ; + else { + parse_expression_list(parser, &call_expr->function_call_expr.arg_list); + set_multireturn(parser, call_expr->function_call_expr.arg_list, false); + } + check_match(ls, ')', '(', line); + break; + } + case '{': { /* funcargs -> constructor */ + AstNode *table_expr = parse_table_constructor(parser); + add_ast_node(parser->container, &call_expr->function_call_expr.arg_list, table_expr); + break; + } + case TOK_STRING: { /* funcargs -> STRING */ + AstNode *string_expr = new_literal_expression(parser, RAVI_TSTRING); + string_expr->literal_expr.u.ts = ls->t.seminfo.ts; + add_ast_node(parser->container, &call_expr->function_call_expr.arg_list, string_expr); + raviX_next(ls); + break; + } + default: { + raviX_syntaxerror(ls, "function arguments expected"); + } + } + return call_expr; +} + +/* +** {====================================================================== +** Expression parsing +** ======================================================================= +*/ + +/* primary expression - name or subexpression */ +static AstNode *parse_primary_expression(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + AstNode *primary_expr = NULL; + /* primaryexp -> NAME | '(' expr ')' */ + switch (ls->t.token) { + case '(': { + int line = ls->linenumber; + raviX_next(ls); + primary_expr = parse_expression(parser); + primary_expr->common_expr.truncate_results = 1; /* Lua requires that we truncate results to 1 */ + check_match(ls, ')', '(', line); + break; + } + case TOK_NAME: { + primary_expr = new_symbol_reference(parser, check_name_and_next(parser->ls)); + break; + } + default: { + raviX_syntaxerror(ls, "unexpected symbol"); + } + } + assert(primary_expr); + return primary_expr; +} + +/* variable or field access or function call */ +static AstNode *parse_suffixed_expression(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* suffixedexp -> + primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */ + int line = ls->linenumber; + AstNode *suffixed_expr = allocate_expr_ast_node(parser, EXPR_SUFFIXED); + suffixed_expr->suffixed_expr.primary_expr = parse_primary_expression(parser); + suffixed_expr->suffixed_expr.type = suffixed_expr->suffixed_expr.primary_expr->common_expr.type; + suffixed_expr->suffixed_expr.suffix_list = NULL; + for (;;) { + switch (ls->t.token) { + case '.': { /* fieldsel */ + AstNode *suffix = parse_field_selector(parser); + add_ast_node(parser->container, &suffixed_expr->suffixed_expr.suffix_list, suffix); + set_type(&suffixed_expr->suffixed_expr.type, RAVI_TANY); + break; + } + case '[': { /* '[' exp1 ']' */ + AstNode *suffix = parse_yindex(parser); + add_ast_node(parser->container, &suffixed_expr->suffixed_expr.suffix_list, suffix); + set_type(&suffixed_expr->suffixed_expr.type, RAVI_TANY); + break; + } + case ':': { /* ':' NAME funcargs */ + raviX_next(ls); + const StringObject *methodname = check_name_and_next(ls); + AstNode *suffix = parse_function_call(parser, methodname, line); + add_ast_node(parser->container, &suffixed_expr->suffixed_expr.suffix_list, suffix); + break; + } + case '(': + case TOK_STRING: + case '{': { /* funcargs */ + AstNode *suffix = parse_function_call(parser, NULL, line); + add_ast_node(parser->container, &suffixed_expr->suffixed_expr.suffix_list, suffix); + break; + } + default: + return suffixed_expr; + } + } +} + +static AstNode *new_literal_expression(struct parser_state *parser, ravitype_t type) +{ + AstNode *expr = allocate_expr_ast_node(parser, EXPR_LITERAL); + set_type(&expr->literal_expr.type, type); + expr->literal_expr.u.i = 0; /* initialize */ + return expr; +} + +static AstNode *parse_simple_expression(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* simpleexp -> FLT | INT | STRING | NIL | TRUE | FALSE | ... | + constructor | FUNCTION body | suffixedexp */ + AstNode *expr = NULL; + switch (ls->t.token) { + case TOK_FLT: { + expr = new_literal_expression(parser, RAVI_TNUMFLT); + expr->literal_expr.u.r = ls->t.seminfo.r; + break; + } + case TOK_INT: { + expr = new_literal_expression(parser, RAVI_TNUMINT); + expr->literal_expr.u.i = ls->t.seminfo.i; + break; + } + case TOK_STRING: { + expr = new_literal_expression(parser, RAVI_TSTRING); + expr->literal_expr.u.ts = ls->t.seminfo.ts; + break; + } + case TOK_nil: { + expr = new_literal_expression(parser, RAVI_TNIL); + expr->literal_expr.u.i = -1; + break; + } + case TOK_true: { + expr = new_literal_expression(parser, RAVI_TBOOLEAN); + expr->literal_expr.u.i = 1; + break; + } + case TOK_false: { + expr = new_literal_expression(parser, RAVI_TBOOLEAN); + expr->literal_expr.u.i = 0; + break; + } + case TOK_DOTS: { /* vararg */ + expr = new_literal_expression(parser, RAVI_TVARARGS); + break; + } + case '{': { /* constructor */ + return parse_table_constructor(parser); + } + case TOK_function: { + raviX_next(ls); + AstNode *function_ast = new_function(parser); + parse_function_body(parser, function_ast, 0, ls->linenumber); + end_function(parser); + return function_ast; + } + default: { + return parse_suffixed_expression(parser); + } + } + raviX_next(ls); + return expr; +} + +static UnaryOperatorType get_unary_opr(int op) +{ + switch (op) { + case TOK_not: + return UNOPR_NOT; + case '-': + return UNOPR_MINUS; + case '~': + return UNOPR_BNOT; + case '#': + return UNOPR_LEN; + case TOK_TO_INTEGER: + return UNOPR_TO_INTEGER; + case TOK_TO_NUMBER: + return UNOPR_TO_NUMBER; + case TOK_TO_INTARRAY: + return UNOPR_TO_INTARRAY; + case TOK_TO_NUMARRAY: + return UNOPR_TO_NUMARRAY; + case TOK_TO_TABLE: + return UNOPR_TO_TABLE; + case TOK_TO_STRING: + return UNOPR_TO_STRING; + case TOK_TO_CLOSURE: + return UNOPR_TO_CLOSURE; + case '@': + return UNOPR_TO_TYPE; + default: + return UNOPR_NOUNOPR; + } +} + +static BinaryOperatorType get_binary_opr(int op) +{ + switch (op) { + case '+': + return BINOPR_ADD; + case '-': + return BINOPR_SUB; + case '*': + return BINOPR_MUL; + case '%': + return BINOPR_MOD; + case '^': + return BINOPR_POW; + case '/': + return BINOPR_DIV; + case TOK_IDIV: + return BINOPR_IDIV; + case '&': + return BINOPR_BAND; + case '|': + return BINOPR_BOR; + case '~': + return BINOPR_BXOR; + case TOK_SHL: + return BINOPR_SHL; + case TOK_SHR: + return BINOPR_SHR; + case TOK_CONCAT: + return BINOPR_CONCAT; + case TOK_NE: + return BINOPR_NE; + case TOK_EQ: + return BINOPR_EQ; + case '<': + return BINOPR_LT; + case TOK_LE: + return BINOPR_LE; + case '>': + return BINOPR_GT; + case TOK_GE: + return BINOPR_GE; + case TOK_and: + return BINOPR_AND; + case TOK_or: + return BINOPR_OR; + default: + return BINOPR_NOBINOPR; + } +} + +static const struct { + lu_byte left; /* left priority for each binary operator */ + lu_byte right; /* right priority */ +} priority[] = { + /* ORDER OPR */ + {10, 10}, {10, 10}, /* '+' '-' */ + {11, 11}, {11, 11}, /* '*' '%' */ + {14, 13}, /* '^' (right associative) */ + {11, 11}, {11, 11}, /* '/' '//' */ + {6, 6}, {4, 4}, {5, 5}, /* '&' '|' '~' */ + {7, 7}, {7, 7}, /* '<<' '>>' */ + {9, 8}, /* '..' (right associative) */ + {3, 3}, {3, 3}, {3, 3}, /* ==, <, <= */ + {3, 3}, {3, 3}, {3, 3}, /* ~=, >, >= */ + {2, 2}, {1, 1} /* and, or */ +}; + +#define UNARY_PRIORITY 12 /* priority for unary operators */ + +/* +** subexpr -> (simpleexp | unop subexpr) { binop subexpr } +** where 'binop' is any binary operator with a priority higher than 'limit' +*/ +static AstNode *parse_sub_expression(struct parser_state *parser, int limit, BinaryOperatorType *untreated_op) +{ + LexerState *ls = parser->ls; + BinaryOperatorType op; + UnaryOperatorType uop; + AstNode *expr = NULL; + uop = get_unary_opr(ls->t.token); + if (uop != UNOPR_NOUNOPR) { + // RAVI change - get usertype if @ + const StringObject *usertype = NULL; + if (uop == UNOPR_TO_TYPE) { + usertype = ls->t.seminfo.ts; + raviX_next(ls); + // Check and expand to extended name if necessary + usertype = parse_user_defined_type_name(ls, usertype); + } else { + raviX_next(ls); + } + BinaryOperatorType ignored; + AstNode *subexpr = parse_sub_expression(parser, UNARY_PRIORITY, &ignored); + expr = allocate_expr_ast_node(parser, EXPR_UNARY); + expr->unary_expr.expr = subexpr; + expr->unary_expr.unary_op = uop; + expr->unary_expr.type.type_name = usertype; + } else { + expr = parse_simple_expression(parser); + } + /* expand while operators have priorities higher than 'limit' */ + op = get_binary_opr(ls->t.token); + while (op != BINOPR_NOBINOPR && priority[op].left > limit) { + BinaryOperatorType nextop; + raviX_next(ls); + /* read sub-expression with higher priority */ + AstNode *exprright = parse_sub_expression(parser, priority[op].right, &nextop); + + AstNode *binexpr = allocate_expr_ast_node(parser, EXPR_BINARY); + binexpr->binary_expr.expr_left = expr; + binexpr->binary_expr.expr_right = exprright; + binexpr->binary_expr.binary_op = op; + expr = binexpr; // Becomes the left expr for next iteration + op = nextop; + } + *untreated_op = op; /* return first untreated operator */ + return expr; +} + +static AstNode *parse_expression(struct parser_state *parser) +{ + BinaryOperatorType ignored; + return parse_sub_expression(parser, 0, &ignored); +} + +/* }==================================================================== */ + +/* +** {====================================================================== +** Rules for Statements +** ======================================================================= +*/ + +static void add_local_symbol_to_current_scope(struct parser_state *parser, LuaSymbol *sym) +{ + // Note that Lua allows multiple local declarations of the same name + // so a new instance just gets added to the end + add_symbol(parser->container, &parser->current_scope->symbol_list, sym); + add_symbol(parser->container, &parser->current_scope->function->function_expr.locals, sym); +} + +static Scope *parse_block(struct parser_state *parser, AstNodeList **statement_list) +{ + /* block -> statlist */ + Scope *scope = new_scope(parser); + parse_statement_list(parser, statement_list); + end_scope(parser); + return scope; +} + +/* parse condition in a repeat statement or an if control structure + * called by repeatstat(), test_then_block() + */ +static AstNode *parse_condition(struct parser_state *parser) +{ + /* cond -> exp */ + return parse_expression(parser); /* read condition */ +} + +static AstNode *parse_goto_statment(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + const StringObject *label; + int is_break = 0; + if (testnext(ls, TOK_goto)) + label = check_name_and_next(ls); + else { + raviX_next(ls); /* skip break */ + label = raviX_create_string(ls->container, "break", sizeof "break"); + is_break = 1; + } + // Resolve labels in the end? + AstNode *goto_stmt = allocate_ast_node(parser, STMT_GOTO); + goto_stmt->goto_stmt.name = label; + goto_stmt->goto_stmt.is_break = is_break; + goto_stmt->goto_stmt.goto_scope = parser->current_scope; + return goto_stmt; +} + +/* skip no-op statements */ +static void skip_noop_statements(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + while (ls->t.token == ';') // || ls->t.token == TOK_DBCOLON) + parse_statement(parser); +} + +static AstNode *generate_label(struct parser_state *parser, const StringObject *label) +{ + LuaSymbol *symbol = new_label(parser, label); + AstNode *label_stmt = allocate_ast_node(parser, STMT_LABEL); + label_stmt->label_stmt.symbol = symbol; + return label_stmt; +} + +static AstNode *parse_label_statement(struct parser_state *parser, const StringObject *label, int line) +{ + (void)line; + LexerState *ls = parser->ls; + /* label -> '::' NAME '::' */ + checknext(ls, TOK_DBCOLON); /* skip double colon */ + /* create new entry for this label */ + AstNode *label_stmt = generate_label(parser, label); + skip_noop_statements(parser); /* skip other no-op statements */ + return label_stmt; +} + +static AstNode *parse_while_statement(struct parser_state *parser, int line) +{ + LexerState *ls = parser->ls; + /* whilestat -> WHILE cond DO block END */ + raviX_next(ls); /* skip WHILE */ + AstNode *stmt = allocate_ast_node(parser, STMT_WHILE); + stmt->while_or_repeat_stmt.loop_scope = NULL; + stmt->while_or_repeat_stmt.loop_statement_list = NULL; + stmt->while_or_repeat_stmt.condition = parse_condition(parser); + checknext(ls, TOK_do); + stmt->while_or_repeat_stmt.loop_scope = parse_block(parser, &stmt->while_or_repeat_stmt.loop_statement_list); + check_match(ls, TOK_end, TOK_while, line); + return stmt; +} + +static AstNode *parse_repeat_statement(struct parser_state *parser, int line) +{ + LexerState *ls = parser->ls; + /* repeatstat -> REPEAT block UNTIL cond */ + raviX_next(ls); /* skip REPEAT */ + AstNode *stmt = allocate_ast_node(parser, STMT_REPEAT); + stmt->while_or_repeat_stmt.condition = NULL; + stmt->while_or_repeat_stmt.loop_statement_list = NULL; + stmt->while_or_repeat_stmt.loop_scope = new_scope(parser); /* scope block */ + parse_statement_list(parser, &stmt->while_or_repeat_stmt.loop_statement_list); + check_match(ls, TOK_until, TOK_repeat, line); + stmt->while_or_repeat_stmt.condition = parse_condition(parser); /* read condition (inside scope block) */ + end_scope(parser); + return stmt; +} + +/* parse a for loop body for both versions of the for loop */ +static void parse_forbody(struct parser_state *parser, AstNode *stmt, int line, int nvars, int isnum) +{ + (void)line; + (void)nvars; + (void)isnum; + LexerState *ls = parser->ls; + /* forbody -> DO block */ + checknext(ls, TOK_do); + stmt->for_stmt.for_body = parse_block(parser, &stmt->for_stmt.for_statement_list); +} + +/* parse a numerical for loop */ +static void parse_fornum_statement(struct parser_state *parser, AstNode *stmt, + const StringObject *varname, int line) +{ + LexerState *ls = parser->ls; + /* fornum -> NAME = exp1,exp1[,exp1] forbody */ + LuaSymbol *local = new_local_symbol(parser, varname, RAVI_TANY, NULL); + add_symbol(parser->container, &stmt->for_stmt.symbols, local); + add_local_symbol_to_current_scope(parser, local); + checknext(ls, '='); + /* get the type of each expression */ + add_ast_node(parser->container, &stmt->for_stmt.expr_list, parse_expression(parser)); /* initial value */ + checknext(ls, ','); + add_ast_node(parser->container, &stmt->for_stmt.expr_list, parse_expression(parser)); /* limit */ + if (testnext(ls, ',')) { + add_ast_node(parser->container, &stmt->for_stmt.expr_list, + parse_expression(parser)); /* optional step */ + } + parse_forbody(parser, stmt, line, 1, 1); +} + +/* parse a generic for loop */ +static void parse_for_list(struct parser_state *parser, AstNode *stmt, const StringObject *indexname) +{ + LexerState *ls = parser->ls; + /* forlist -> NAME {,NAME} IN explist forbody */ + int nvars = 4; /* gen, state, control, plus at least one declared var */ + /* create declared variables */ + LuaSymbol *local = new_local_symbol(parser, indexname, RAVI_TANY, NULL); + add_symbol(parser->container, &stmt->for_stmt.symbols, local); + add_local_symbol_to_current_scope(parser, local); + while (testnext(ls, ',')) { + local = new_local_symbol(parser, check_name_and_next(ls), RAVI_TANY, NULL); + add_symbol(parser->container, &stmt->for_stmt.symbols, local); + add_local_symbol_to_current_scope(parser, local); + nvars++; + } + checknext(ls, TOK_in); + parse_expression_list(parser, &stmt->for_stmt.expr_list); + int line = ls->linenumber; + parse_forbody(parser, stmt, line, nvars - 3, 0); +} + +/* initial parsing of a for loop - calls fornum() or forlist() */ +static AstNode *parse_for_statement(struct parser_state *parser, int line) +{ + LexerState *ls = parser->ls; + /* forstat -> FOR (fornum | forlist) END */ + const StringObject *varname; + AstNode *stmt = allocate_ast_node(parser, AST_NONE); + stmt->for_stmt.symbols = NULL; + stmt->for_stmt.expr_list = NULL; + stmt->for_stmt.for_body = NULL; + stmt->for_stmt.for_statement_list = NULL; + stmt->for_stmt.for_scope = new_scope(parser); // For the loop variables + raviX_next(ls); /* skip 'for' */ + varname = check_name_and_next(ls); /* first variable name */ + switch (ls->t.token) { + case '=': + stmt->type = STMT_FOR_NUM; + parse_fornum_statement(parser, stmt, varname, line); + break; + case ',': + case TOK_in: + stmt->type = STMT_FOR_IN; + parse_for_list(parser, stmt, varname); + break; + default: + raviX_syntaxerror(ls, "'=' or 'in' expected"); + } + check_match(ls, TOK_end, TOK_for, line); + end_scope(parser); + return stmt; +} + +/* parse if cond then block - called from parse_if_statement() */ +static AstNode *parse_if_cond_then_block(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* test_then_block -> [IF | ELSEIF] cond THEN block */ + raviX_next(ls); /* skip IF or ELSEIF */ + AstNode *test_then_block = + allocate_ast_node(parser, STMT_TEST_THEN); // This is not an AST node on its own + test_then_block->test_then_block.condition = parse_expression(parser); /* read condition */ + test_then_block->test_then_block.test_then_scope = NULL; + test_then_block->test_then_block.test_then_statement_list = NULL; + checknext(ls, TOK_then); + if (ls->t.token == TOK_goto || ls->t.token == TOK_break) { + test_then_block->test_then_block.test_then_scope = new_scope(parser); + AstNode *stmt = parse_goto_statment(parser); /* handle goto/break */ + add_ast_node(parser->container, &test_then_block->test_then_block.test_then_statement_list, stmt); + skip_noop_statements(parser); /* skip other no-op statements */ + if (block_follow(ls, 0)) { /* 'goto' is the entire block? */ + end_scope(parser); + return test_then_block; /* and that is it */ + } else { /* must skip over 'then' part if condition is false */ + ; + } + } else { /* regular case (not goto/break) */ + test_then_block->test_then_block.test_then_scope = new_scope(parser); + } + parse_statement_list(parser, &test_then_block->test_then_block.test_then_statement_list); /* 'then' part */ + end_scope(parser); + return test_then_block; +} + +static AstNode *parse_if_statement(struct parser_state *parser, int line) +{ + LexerState *ls = parser->ls; + /* ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END */ + AstNode *stmt = allocate_ast_node(parser, STMT_IF); + stmt->if_stmt.if_condition_list = NULL; + stmt->if_stmt.else_block = NULL; + stmt->if_stmt.else_statement_list = NULL; + AstNode *test_then_block = parse_if_cond_then_block(parser); /* IF cond THEN block */ + add_ast_node(parser->container, &stmt->if_stmt.if_condition_list, test_then_block); + while (ls->t.token == TOK_elseif) { + test_then_block = parse_if_cond_then_block(parser); /* ELSEIF cond THEN block */ + add_ast_node(parser->container, &stmt->if_stmt.if_condition_list, test_then_block); + } + if (testnext(ls, TOK_else)) + stmt->if_stmt.else_block = parse_block(parser, &stmt->if_stmt.else_statement_list); /* 'else' part */ + check_match(ls, TOK_end, TOK_if, line); + return stmt; +} + +static AstNode *parse_local_function_statement(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + LuaSymbol *symbol = + new_local_symbol(parser, check_name_and_next(ls), RAVI_TFUNCTION, NULL); /* new local variable */ + /* local function f ... is parsed as local f; f = function ... */ + add_local_symbol_to_current_scope(parser, symbol); + AstNode *function_ast = new_function(parser); + parse_function_body(parser, function_ast, 0, ls->linenumber); /* function created in next register */ + end_function(parser); + AstNode *stmt = allocate_ast_node(parser, STMT_LOCAL); + stmt->local_stmt.var_list = NULL; + stmt->local_stmt.expr_list = NULL; + add_symbol(parser->container, &stmt->local_stmt.var_list, symbol); + add_ast_node(parser->container, &stmt->local_stmt.expr_list, function_ast); + return stmt; +} + +/** + * If a call expression is at the end of a local or expression statement then + * we need to check the number of return values that is expected. + */ +static void limit_function_call_results(struct parser_state *parser, int num_lhs, AstNodeList *expr_list) +{ + // FIXME probably doesn't handle var arg case + AstNode *last_expr = (AstNode *)raviX_ptrlist_last((struct ptr_list *)expr_list); + AstNode *call_expr = has_function_call(last_expr); + if (!call_expr) + return; + int num_expr = raviX_ptrlist_size((const struct ptr_list *)expr_list); + if (num_expr < num_lhs) { + call_expr->function_call_expr.num_results = (num_lhs - num_expr) + 1; + } +} + +static AstNode *parse_local_statement(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* stat -> LOCAL NAME {',' NAME} ['=' explist] */ + AstNode *node = allocate_ast_node(parser, STMT_LOCAL); + node->local_stmt.var_list = NULL; + node->local_stmt.expr_list = NULL; + int nvars = 0; + do { + /* local name : type = value */ + LuaSymbol *symbol = parse_local_variable_declaration(parser); + add_symbol(parser->container, &node->local_stmt.var_list, symbol); + nvars++; + if (nvars >= MAXVARS) + raviX_syntaxerror(ls, "too many local variables"); + } while (testnext(ls, ',')); + if (testnext(ls, '=')) /* nexps = */ + parse_expression_list(parser, &node->local_stmt.expr_list); + else { + /* nexps = 0; */ + ; + } + limit_function_call_results(parser, nvars, node->local_stmt.expr_list); + /* local symbols are only added to scope at the end of the local statement */ + LuaSymbol *sym = NULL; + FOR_EACH_PTR(node->local_stmt.var_list, sym) { add_local_symbol_to_current_scope(parser, sym); } + END_FOR_EACH_PTR(sym); + return node; +} + +/* parse a function name specification with base symbol, optional selectors and optional method name + */ +static AstNode *parse_function_name(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* funcname -> NAME {fieldsel} [':' NAME] */ + AstNode *function_stmt = allocate_ast_node(parser, STMT_FUNCTION); + function_stmt->function_stmt.function_expr = NULL; + function_stmt->function_stmt.method_name = NULL; + function_stmt->function_stmt.selectors = NULL; + function_stmt->function_stmt.name = new_symbol_reference(parser, check_name_and_next(parser->ls)); + while (ls->t.token == '.') { + add_ast_node(parser->container, &function_stmt->function_stmt.selectors, parse_field_selector(parser)); + } + if (ls->t.token == ':') { + function_stmt->function_stmt.method_name = parse_field_selector(parser); + } + return function_stmt; +} + +static AstNode *parse_function_statement(struct parser_state *parser, int line) +{ + LexerState *ls = parser->ls; + /* funcstat -> FUNCTION funcname body */ + raviX_next(ls); /* skip FUNCTION */ + AstNode *function_stmt = parse_function_name(parser); + int ismethod = function_stmt->function_stmt.method_name != NULL; + AstNode *function_ast = new_function(parser); + parse_function_body(parser, function_ast, ismethod, line); + end_function(parser); + function_stmt->function_stmt.function_expr = function_ast; + return function_stmt; +} + +/* parse function call with no returns or assignment statement */ +static AstNode *parse_expression_statement(struct parser_state *parser) +{ + AstNode *stmt = allocate_ast_node(parser, STMT_EXPR); + stmt->expression_stmt.var_expr_list = NULL; + stmt->expression_stmt.expr_list = NULL; + LexerState *ls = parser->ls; + /* stat -> func | assignment */ + /* Until we see '=' we do not know if this is an assignment or expr list*/ + AstNodeList *current_list = NULL; + add_ast_node(parser->container, ¤t_list, parse_suffixed_expression(parser)); + while (testnext(ls, ',')) { /* assignment -> ',' suffixedexp assignment */ + add_ast_node(parser->container, ¤t_list, parse_suffixed_expression(parser)); + } + if (ls->t.token == '=') { /* stat -> assignment ? */ + checknext(ls, '='); + stmt->expression_stmt.var_expr_list = current_list; + current_list = NULL; + parse_expression_list(parser, ¤t_list); + limit_function_call_results( + parser, raviX_ptrlist_size((const struct ptr_list *)stmt->expression_stmt.var_expr_list), current_list); + } + stmt->expression_stmt.expr_list = current_list; + // TODO Check that if not assignment then it is a function call + return stmt; +} + +static AstNode *parse_return_statement(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + /* stat -> RETURN [explist] [';'] */ + AstNode *return_stmt = allocate_ast_node(parser, STMT_RETURN); + return_stmt->return_stmt.expr_list = NULL; + if (block_follow(ls, 1) || ls->t.token == ';') + /* nret = 0*/; /* return no values */ + else { + /*nret = */ + parse_expression_list(parser, &return_stmt->return_stmt.expr_list); /* optional return values */ + set_multireturn(parser, return_stmt->return_stmt.expr_list, false); + } + testnext(ls, ';'); /* skip optional semicolon */ + return return_stmt; +} + +static AstNode *parse_do_statement(struct parser_state *parser, int line) +{ + raviX_next(parser->ls); /* skip DO */ + AstNode *stmt = allocate_ast_node(parser, STMT_DO); + stmt->do_stmt.do_statement_list = NULL; + stmt->do_stmt.scope = parse_block(parser, &stmt->do_stmt.do_statement_list); + check_match(parser->ls, TOK_end, TOK_do, line); + return stmt; +} + +/* parse a statement */ +static AstNode *parse_statement(struct parser_state *parser) +{ + LexerState *ls = parser->ls; + int line = ls->linenumber; /* may be needed for error messages */ + AstNode *stmt = NULL; + switch (ls->t.token) { + case ';': { /* stat -> ';' (empty statement) */ + raviX_next(ls); /* skip ';' */ + break; + } + case TOK_if: { /* stat -> ifstat */ + stmt = parse_if_statement(parser, line); + break; + } + case TOK_while: { /* stat -> whilestat */ + stmt = parse_while_statement(parser, line); + break; + } + case TOK_do: { /* stat -> DO block END */ + stmt = parse_do_statement(parser, line); + break; + } + case TOK_for: { /* stat -> forstat */ + stmt = parse_for_statement(parser, line); + break; + } + case TOK_repeat: { /* stat -> repeatstat */ + stmt = parse_repeat_statement(parser, line); + break; + } + case TOK_function: { /* stat -> funcstat */ + stmt = parse_function_statement(parser, line); + break; + } + case TOK_local: { /* stat -> localstat */ + raviX_next(ls); /* skip LOCAL */ + if (testnext(ls, TOK_function)) /* local function? */ + stmt = parse_local_function_statement(parser); + else + stmt = parse_local_statement(parser); + break; + } + case TOK_DBCOLON: { /* stat -> label */ + raviX_next(ls); /* skip double colon */ + stmt = parse_label_statement(parser, check_name_and_next(ls), line); + break; + } + case TOK_return: { /* stat -> retstat */ + raviX_next(ls); /* skip RETURN */ + stmt = parse_return_statement(parser); + break; + } + case TOK_break: /* stat -> breakstat */ + case TOK_goto: { /* stat -> 'goto' NAME */ + stmt = parse_goto_statment(parser); + break; + } + default: { /* stat -> func | assignment */ + stmt = parse_expression_statement(parser); + break; + } + } + return stmt; +} + +/* Parses a sequence of statements */ +/* statlist -> { stat [';'] } */ +static void parse_statement_list(struct parser_state *parser, AstNodeList **list) +{ + LexerState *ls = parser->ls; + while (!block_follow(ls, 1)) { + bool was_return = ls->t.token == TOK_return; + AstNode *stmt = parse_statement(parser); + if (stmt) + add_ast_node(parser->container, list, stmt); + if (was_return) + break; /* 'return' must be last statement */ + } +} + +/* Starts a new scope. If the current function has no main block + * defined then the new scope becomes its main block. The new scope + * gets existing scope as its parent even if that belongs to parent + * function. + */ +static Scope *new_scope(struct parser_state *parser) +{ + CompilerState *container = parser->container; + Scope *scope = raviX_allocator_allocate(&container->block_scope_allocator, 0); + scope->symbol_list = NULL; + // scope->do_statement_list = NULL; + scope->function = parser->current_function; + scope->need_close = 0; + assert(scope->function && scope->function->type == EXPR_FUNCTION); + scope->parent = parser->current_scope; + parser->current_scope = scope; + if (!parser->current_function->function_expr.main_block) + parser->current_function->function_expr.main_block = scope; + return scope; +} + +static void end_scope(struct parser_state *parser) +{ + assert(parser->current_scope); + Scope *scope = parser->current_scope; + parser->current_scope = scope->parent; + assert(parser->current_scope != NULL || scope == parser->current_function->function_expr.main_block); +} + +/* Creates a new function AST node and starts the function scope. +New function becomes child of current function if any, and scope is linked +to previous scope which may be of parent function. +*/ +static AstNode *new_function(struct parser_state *parser) +{ + AstNode *node = allocate_expr_ast_node(parser, EXPR_FUNCTION); + set_type(&node->function_expr.type, RAVI_TFUNCTION); + node->function_expr.is_method = false; + node->function_expr.is_vararg = false; + node->function_expr.need_close = false; + node->function_expr.proc_id = 0; + node->function_expr.args = NULL; + node->function_expr.child_functions = NULL; + node->function_expr.upvalues = NULL; + node->function_expr.locals = NULL; + node->function_expr.main_block = NULL; + node->function_expr.function_statement_list = NULL; + node->function_expr.parent_function = parser->current_function; + if (parser->current_function) { + // Make this function a child of current function + add_ast_node(parser->container, &parser->current_function->function_expr.child_functions, node); + } + parser->current_function = node; + new_scope(parser); /* Start function scope */ + return node; +} + +/* Ends the function node and closes the scope for the function. The + * function being closed becomes the current AST node, while parent function/scope + * become current function/scope. + */ +static AstNode *end_function(struct parser_state *parser) +{ + assert(parser->current_function); + end_scope(parser); + AstNode *function = parser->current_function; + parser->current_function = function->function_expr.parent_function; + return function; +} + +/* mainfunc() equivalent - parses a Lua script, also known as chunk. +The code is wrapped in a vararg function */ +static void parse_lua_chunk(struct parser_state *parser) +{ + raviX_next(parser->ls); /* read first token */ + parser->container->main_function = new_function(parser); /* vararg function wrapper */ + parser->container->main_function->function_expr.is_vararg = true; + add_upvalue_for_ENV(parser); + parse_statement_list(parser, &parser->container->main_function->function_expr.function_statement_list); + end_function(parser); + assert(parser->current_function == NULL); + assert(parser->current_scope == NULL); + check(parser->ls, TOK_EOS); +} + +static void parser_state_init(struct parser_state *parser, LexerState *ls, CompilerState *container) +{ + parser->ls = ls; + parser->container = container; + parser->current_function = NULL; + parser->current_scope = NULL; +} + +/* +** Parse the given source 'chunk' and build an abstract +** syntax tree; return 0 on success / non-zero return code on +** failure +*/ +int raviX_parse(CompilerState *container, const char *buffer, size_t buflen, const char *name) +{ + LexerState *lexstate = raviX_init_lexer(container, buffer, buflen, name); + struct parser_state parser_state; + parser_state_init(&parser_state, lexstate, container); + int rc = setjmp(container->env); + if (rc == 0) { + parse_lua_chunk(&parser_state); + } + raviX_destroy_lexer(lexstate); + return rc; +} + +/* +Return true if two strings are equal, false otherwise. +*/ +static int string_equal(const void *a, const void *b) +{ + const StringObject *c1 = (const StringObject *)a; + const StringObject *c2 = (const StringObject *)b; + if (c1->len != c2->len || c1->hash != c2->hash) + return 0; + return memcmp(c1->str, c2->str, c1->len) == 0; +} + +static uint32_t string_hash(const void *c) +{ + const StringObject *c1 = (const StringObject *)c; + return c1->hash; +} + +CompilerState *raviX_init_compiler() +{ + CompilerState *container = (CompilerState *)calloc(1, sizeof(CompilerState)); + raviX_allocator_init(&container->ast_node_allocator, "ast nodes", sizeof(AstNode), sizeof(double), + sizeof(AstNode) * 32); + raviX_allocator_init(&container->ptrlist_allocator, "ptrlists", sizeof(struct ptr_list), sizeof(double), + sizeof(struct ptr_list) * 32); + raviX_allocator_init(&container->block_scope_allocator, "block scopes", sizeof(Scope), + sizeof(double), sizeof(Scope) * 32); + raviX_allocator_init(&container->symbol_allocator, "symbols", sizeof(LuaSymbol), sizeof(double), + sizeof(LuaSymbol) * 64); + raviX_allocator_init(&container->string_allocator, "strings", 0, sizeof(double), 1024); + raviX_allocator_init(&container->string_object_allocator, "string_objects", sizeof(StringObject), + sizeof(double), sizeof(StringObject) * 64); + raviX_buffer_init(&container->buff, 1024); + raviX_buffer_init(&container->error_message, 256); + container->strings = raviX_set_create(string_hash, string_equal); + container->main_function = NULL; + container->killed = false; + container->linearizer = NULL; + container->_ENV = raviX_create_string(container, "_ENV", 4); + return container; +} + +// static void show_allocations(CompilerState *compiler) +//{ +// raviX_allocator_show_allocations(&compiler->symbol_allocator); +// raviX_allocator_show_allocations(&compiler->block_scope_allocator); +// raviX_allocator_show_allocations(&compiler->ast_node_allocator); +// raviX_allocator_show_allocations(&compiler->ptrlist_allocator); +// raviX_allocator_show_allocations(&compiler->string_allocator); +// raviX_allocator_show_allocations(&compiler->string_object_allocator); +//} + +void raviX_destroy_compiler(CompilerState *container) +{ + if (!container->killed) { + // show_allocations(container); + if (container->linearizer) { + raviX_destroy_linearizer(container->linearizer); + free(container->linearizer); + } + raviX_set_destroy(container->strings, NULL); + raviX_buffer_free(&container->buff); + raviX_buffer_free(&container->error_message); + raviX_allocator_destroy(&container->symbol_allocator); + raviX_allocator_destroy(&container->block_scope_allocator); + raviX_allocator_destroy(&container->ast_node_allocator); + raviX_allocator_destroy(&container->ptrlist_allocator); + raviX_allocator_destroy(&container->string_allocator); + raviX_allocator_destroy(&container->string_object_allocator); + container->killed = true; + } + free(container); +} diff --git a/ravicomp/src/parser.h b/ravicomp/src/parser.h new file mode 100644 index 0000000..e5e6b97 --- /dev/null +++ b/ravicomp/src/parser.h @@ -0,0 +1,376 @@ +#ifndef ravicomp_IMPLEMENTATION_H +#define ravicomp_IMPLEMENTATION_H + +/* + * Internal header file for the implementation. + * The data structures defined here are private. + */ + +#include "ravi_compiler.h" + +#include "allocate.h" +#include "membuf.h" +#include "ptrlist.h" +#include "set.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { MAXVARS = 125 }; +#define LUA_ENV "_ENV" +#define LUA_MAXINTEGER LLONG_MAX +#define LUA_MININTEGER LLONG_MIN + +typedef unsigned long long lua_Unsigned; +typedef unsigned char lu_byte; + +////////////////////////// + +typedef struct AstNode AstNode; + +/* + * Encapsulate all the compiler state. + * All memory is held by this object or sub-objects. Memory is freed when + * the object is destroyed. + */ +struct CompilerState { + Allocator ast_node_allocator; + Allocator ptrlist_allocator; + Allocator block_scope_allocator; + Allocator symbol_allocator; + Allocator string_allocator; + Allocator string_object_allocator; + Set *strings; + AstNode *main_function; + LinearizerState *linearizer; + int (*error_handler)(const char *fmt, ...); + TextBuffer buff; /* temp storage for literals, used by the lexer and parser */ + jmp_buf env; /* For error handling */ + TextBuffer error_message; /* For error handling, error message is saved here */ + bool killed; /* flag to check if this is already destroyed */ + const StringObject *_ENV; /* name of the env variable */ +}; + +/* number of reserved words */ +#define NUM_RESERVED ((int)(TOK_while - FIRST_RESERVED + 1)) + +/* state of the lexer plus state of the parser when shared by all + functions */ +struct LexerState { + int current; /* current character (charint) */ + int linenumber; /* input line counter */ + int lastline; /* line of last token 'consumed' */ + Token t; /* current token */ + Token lookahead; /* look ahead token */ + CompilerState *container; + const char *buf; + size_t bufsize; + size_t n; + const char *p; + TextBuffer *buff; /* buffer for tokens, points to the buffer in compiler_state */ + const char *source; /* current source name */ + const char *envn; /* environment variable name */ +}; +void raviX_syntaxerror(LexerState *ls, const char *msg); + +DECLARE_PTR_LIST(AstNodeList, AstNode); + +/* RAVI: Following are the types we will use +** use in parsing. The rationale for types is +** performance - as of now these are the only types that +** we care about from a performance point of view - if any +** other types appear then they are all treated as ANY +**/ +typedef enum { + RAVI_TANY = 0, /* Lua dynamic type */ + RAVI_TNUMINT = 1, /* integer number */ + RAVI_TNUMFLT, /* floating point number */ + RAVI_TARRAYINT, /* array of ints */ + RAVI_TARRAYFLT, /* array of doubles */ + RAVI_TFUNCTION, /* Lua or C Function */ + RAVI_TTABLE, /* Lua table */ + RAVI_TSTRING, /* string */ + RAVI_TNIL, /* NIL */ + RAVI_TBOOLEAN, /* boolean */ + RAVI_TUSERDATA, /* userdata or lightuserdata */ + RAVI_TVARARGS /* Not a real type - represents ... */ +} ravitype_t; + +/* Lua type info. We need to support user defined types too which are known by name */ +struct VariableType { + ravitype_t type_code; + /* type name for user defined types; used to lookup metatable in registry, only set when type_code is + * RAVI_TUSERDATA */ + const StringObject *type_name; +}; + +typedef struct Pseudo Pseudo; +DECLARE_PTR_LIST(LuaSymbolList, LuaSymbol); + +struct LuaVariableSymbol { + VariableType value_type; + const StringObject *var_name; /* name of the variable */ + Scope *block; /* NULL if global symbol, as globals are never added to a scope */ + LuaSymbol *env; /* Only applicable for global symbols - this should point to _ENV */ + unsigned escaped: 1, /* Has one or more up-value references */ + function_parameter: 1; /* Is a function parameter */ + Pseudo *pseudo; /* backend data for the symbol */ +}; +struct LuaLabelSymbol { + const StringObject *label_name; + Scope *block; + Pseudo* pseudo; /* backend data for the symbol */ +}; +struct LuaUpvalueSymbol { + VariableType value_type; + LuaSymbol *target_variable; /* variable reference */ + AstNode *target_function; /* Where the upvalue lives */ + unsigned upvalue_index : 16, /* index of the upvalue in the function where this upvalue occurs */ + is_in_parent_stack : 1, /* 1 if yes - populated by code generator only */ + parent_upvalue_index : 15; /* if !is_in_parent_stack then upvalue index in parent - populated by code generator only */ + /*TODO add pseudo ?*/ +}; +/* A symbol is a name recognised in Ravi/Lua code*/ +struct LuaSymbol { + enum SymbolType symbol_type; + union { + LuaVariableSymbol variable; + LuaLabelSymbol label; + LuaUpvalueSymbol upvalue; + }; +}; +struct Scope { + AstNode *function; /* function owning this block - of type FUNCTION_EXPR */ + Scope *parent; /* parent block, may belong to parent function */ + LuaSymbolList *symbol_list; /* symbols defined in this block */ + unsigned need_close: 1; /* When we exit scope of this block the upvalues need to be closed */ +}; + +/*STMT_RETURN */ +struct ReturnStatement { + AstNodeList *expr_list; +}; +/* STMT_LABEL */ +struct LabelStatement { + LuaSymbol *symbol; +}; +/* STMT_GOTO */ +struct GotoStatement { + unsigned is_break : 1; /* is this a break statement */ + const StringObject *name; /* target label, used to resolve the goto destination */ + Scope* goto_scope; /* The scope of the goto statement */ +}; +/* STMT_LOCAL local variable declarations */ +struct LocalStatement { + LuaSymbolList *var_list; + AstNodeList *expr_list; +}; +/* STMT_EXPR: Also covers assignments */ +struct ExpressionStatement { + AstNodeList *var_expr_list; /* Optional var expressions, comma separated */ + AstNodeList *expr_list; /* Comma separated expressions */ +}; +struct FunctionStatement { + AstNode *name; /* base symbol to be looked up - symbol_expression */ + AstNodeList *selectors; /* Optional list of index_expression(s) */ + AstNode *method_name; /* Optional - index_expression */ + AstNode *function_expr; /* Function's AST - function_expression */ +}; +struct DoStatement { + Scope *scope; /* The do statement only creates a new scope */ + AstNodeList *do_statement_list; /* statements in this block */ +}; +/* Used internally in if_stmt, not an independent AST node */ +struct TestThenStatement { + AstNode *condition; + Scope *test_then_scope; + AstNodeList *test_then_statement_list; /* statements in this block */ +}; +struct IfStatement { + AstNodeList *if_condition_list; /* Actually a list of test_then_blocks */ + Scope *else_block; + AstNodeList *else_statement_list; /* statements in this block */ +}; +struct WhileOrRepeatStatement { + AstNode *condition; + Scope *loop_scope; + AstNodeList *loop_statement_list; /* statements in this block */ +}; +/* Used for both generic and numeric for loops */ +struct ForStatement { + Scope* for_scope; /* encapsulates the entire for statement */ + LuaSymbolList *symbols; + AstNodeList *expr_list; + Scope *for_body; + AstNodeList *for_statement_list; /* statements in this block */ +}; +/* To access the type field common to all expr objects */ +/* all expr types must be compatible with base_expression */ + +#define BASE_EXPRESSION_FIELDS VariableType type; unsigned truncate_results: 1 + +typedef struct BaseExpression { + BASE_EXPRESSION_FIELDS; +} BaseExpression; + +struct LiteralExpression { + BASE_EXPRESSION_FIELDS; + SemInfo u; +}; +/* primaryexp -> NAME | '(' expr ')', NAME is parsed as EXPR_SYMBOL */ +struct SymbolExpression { + BASE_EXPRESSION_FIELDS; + LuaSymbol *var; +}; +/* EXPR_Y_INDEX or EXPR_FIELD_SELECTOR */ +struct IndexExpression { + BASE_EXPRESSION_FIELDS; + AstNode *expr; /* '[' expr ']' */ +}; +/* EXPR_UNARY */ +struct UnaryExpression { + BASE_EXPRESSION_FIELDS; + UnaryOperatorType unary_op; + AstNode *expr; +}; +struct BinaryExpression { + BASE_EXPRESSION_FIELDS; + BinaryOperatorType binary_op; + AstNode *expr_left; + AstNode *expr_right; +}; +struct FunctionExpression { + BASE_EXPRESSION_FIELDS; + unsigned is_vararg : 1; + unsigned is_method : 1; + unsigned need_close : 1; + uint32_t proc_id; /* Backend allocated id */ + AstNode *parent_function; /* parent function or NULL if main chunk */ + Scope *main_block; /* the function's main block */ + AstNodeList *function_statement_list; /* statements in this block */ + LuaSymbolList + *args; /* arguments, also must be part of the function block's symbol list */ + AstNodeList *child_functions; /* child functions declared in this function */ + LuaSymbolList *upvalues; /* List of upvalues */ + LuaSymbolList *locals; /* List of locals */ +}; +/* Assign values in table constructor */ +/* EXPR_TABLE_ELEMENT_ASSIGN - used in table constructor */ +struct TableElementAssignmentExpression { + BASE_EXPRESSION_FIELDS; + AstNode *key_expr; /* If NULL means this is a list field with next available index, + else specifies index expression */ + AstNode *value_expr; +}; +/* constructor -> '{' [ field { sep field } [sep] ] '}' where sep -> ',' | ';' */ +/* table constructor expression EXPR_TABLE_LITERAL occurs in function call and simple expr */ +struct TableLiteralExpression { + BASE_EXPRESSION_FIELDS; + AstNodeList *expr_list; +}; +/* suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */ +/* suffix_list may have EXPR_FIELD_SELECTOR, EXPR_Y_INDEX, EXPR_FUNCTION_CALL */ +struct SuffixedExpression { + BASE_EXPRESSION_FIELDS; + AstNode *primary_expr; + AstNodeList *suffix_list; +}; +struct FunctionCallExpression { + /* Note that in Ravi the results from a function call must be type asserted during assignment to + * variables. This is not explicit in the AST but is required to ensure that function return + * values do not overwrite the type of the variables in an inconsistent way. + */ + BASE_EXPRESSION_FIELDS; + const StringObject *method_name; /* Optional method_name */ + AstNodeList *arg_list; /* Call arguments */ + int num_results; /* How many results do we expect, -1 means all available results */ +}; +#undef BASE_EXPRESSION_FIELDS + +/* ALL AST nodes start with following fields */ +#define BASE_AST_FIELDS enum AstNodeType type; int line_number +/* Statement AST nodes have following common fields. + */ +struct Statement { + BASE_AST_FIELDS; +}; +/* Expression AST nodes have following common fields +*/ +struct Expression { + BASE_AST_FIELDS; + BaseExpression common_expr; +}; + +/* The parse tree is made up of ast_node objects. Some of the ast_nodes reference the appropriate block +scopes but not all scopes may be referenced. The tree captures Lua syntax tree - i.e. statements such as +while, repeat, and for are captured in the way user uses them and not the way Lua generates code. Potentially +we can have a transformation step to convert to a tree that is more like the code generation + +The ast_node must be aligned with Expression for expressions, and with Statement for statements. +*/ +struct AstNode { + BASE_AST_FIELDS; + union { + ReturnStatement return_stmt; /*STMT_RETURN */ + LabelStatement label_stmt; /* STMT_LABEL */ + GotoStatement goto_stmt; /* STMT_GOTO */ + LocalStatement local_stmt; /* STMT_LOCAL local variable declarations */ + ExpressionStatement expression_stmt; + FunctionStatement function_stmt; + DoStatement do_stmt; + TestThenStatement test_then_block; + IfStatement if_stmt; + WhileOrRepeatStatement while_or_repeat_stmt; + ForStatement for_stmt; + BaseExpression common_expr; + LiteralExpression literal_expr; + SymbolExpression symbol_expr; + IndexExpression index_expr; + UnaryExpression unary_expr; + BinaryExpression binary_expr; + FunctionExpression function_expr; /* a literal expression whose result is a value of type function */ + TableElementAssignmentExpression table_elem_assign_expr; + TableLiteralExpression table_expr; + SuffixedExpression suffixed_expr; + FunctionCallExpression function_call_expr; + }; +}; +#undef BASE_AST_FIELDS + +static inline void set_typecode(VariableType *vt, ravitype_t t) { vt->type_code = t; } +static inline void set_type(VariableType *vt, ravitype_t t) +{ + vt->type_code = t; + vt->type_name = NULL; +} +static inline void set_typename(VariableType *vt, ravitype_t t, const StringObject *name) +{ + vt->type_code = t; + vt->type_name = name; +} +static inline void copy_type(VariableType *a, const VariableType *b) +{ + a->type_code = b->type_code; + a->type_name = b->type_name; +} + +struct parser_state { + LexerState *ls; + CompilerState *container; + AstNode *current_function; + Scope *current_scope; +}; + +void raviX_print_ast_node(TextBuffer *buf, AstNode *node, int level); /* output the AST structure recursively */ +const char *raviX_get_type_name(ravitype_t tt); + +int raviX_ast_simplify(CompilerState* container); + +#endif diff --git a/ravicomp/src/ptrlist.c b/ravicomp/src/ptrlist.c new file mode 100644 index 0000000..32862ab --- /dev/null +++ b/ravicomp/src/ptrlist.c @@ -0,0 +1,987 @@ +/* + * ptrlist.c + * + * Pointer ptrlist_t manipulation + * + * (C) Copyright Linus Torvalds 2003-2005 + */ +/* + * This version is part of the dmr_c project. + * Copyright (C) 2017 Dibyendu Majumdar + */ + +#define PARANOIA 1 + +#include + +#include +#include + +/* The ptr list */ + +/* For testing we change this */ +static int N_ = LIST_NODE_NR; + +void raviX_ptrlist_split_node(struct ptr_list *head) +{ + int old = head->nr_, nr = old / 2; + Allocator *alloc = head->allocator_; + assert(alloc); + struct ptr_list *newlist = (struct ptr_list *)raviX_allocator_allocate(alloc, 0); + struct ptr_list *next = head->next_; + newlist->allocator_ = alloc; + + old -= nr; + head->nr_ = old; + newlist->next_ = next; + next->prev_ = newlist; + newlist->prev_ = head; + head->next_ = newlist; + newlist->nr_ = nr; + memcpy(newlist->list_, head->list_ + old, nr * sizeof(void *)); + memset(head->list_ + old, 0xf0, nr * sizeof(void *)); +} + +PtrListIterator raviX_ptrlist_forward_iterator(struct ptr_list *head) +{ + PtrListIterator iter; + iter.__head = iter.__list = head; + iter.__nr = -1; + return iter; +} + +// Reverse iterator has to start from previous node not previous entry +// in the given head +PtrListIterator raviX_ptrlist_reverse_iterator(struct ptr_list *head) +{ + PtrListIterator iter; + iter.__head = iter.__list = head ? head->prev_ : NULL; + iter.__nr = iter.__head ? iter.__head->nr_ : 0; + return iter; +} + +void *raviX_ptrlist_iter_next(PtrListIterator *self) +{ + if (self->__head == NULL) + return NULL; + self->__nr++; +Lretry: + if (self->__nr < self->__list->nr_) { + void *ptr = self->__list->list_[self->__nr]; + if (self->__list->rm_ && !ptr) { + self->__nr++; + goto Lretry; + } + return ptr; + } else if (self->__list->next_ != self->__head) { + self->__list = self->__list->next_; + self->__nr = 0; + goto Lretry; + } + return NULL; +} + +void *raviX_ptrlist_nth_entry(struct ptr_list *list, unsigned int idx) +{ + struct ptr_list *head = list; + if (!head) + return NULL; + do { + unsigned int nr = list->nr_; + if (idx < nr) + return list->list_[idx]; + else + idx -= nr; + } while ((list = list->next_) != head); + return NULL; +} + +void *raviX_ptrlist_iter_prev(PtrListIterator *self) +{ + if (self->__head == NULL) + return NULL; + self->__nr--; +Lretry: + if (self->__nr >= 0 && self->__nr < self->__list->nr_) { + void *ptr = self->__list->list_[self->__nr]; + if (self->__list->rm_ && !ptr) { + self->__nr--; + goto Lretry; + } + return ptr; + } else if (self->__list->prev_ != self->__head) { + self->__list = self->__list->prev_; + self->__nr = self->__list->nr_ - 1; + goto Lretry; + } + return NULL; +} + +void raviX_ptrlist_iter_split_current(PtrListIterator *self) +{ + if (self->__list->nr_ == N_) { + /* full so split */ + raviX_ptrlist_split_node(self->__list); + if (self->__nr >= self->__list->nr_) { + self->__nr -= self->__list->nr_; + self->__list = self->__list->next_; + } + } +} + +void raviX_ptrlist_iter_insert(PtrListIterator *self, void *newitem) +{ + assert(self->__nr >= 0); + raviX_ptrlist_iter_split_current(self); + void **__this = self->__list->list_ + self->__nr; + void **__last = self->__list->list_ + self->__list->nr_ - 1; + while (__last >= __this) { + __last[1] = __last[0]; + __last--; + } + *__this = newitem; + self->__list->nr_++; +} + +void raviX_ptrlist_iter_remove(PtrListIterator *self) +{ + assert(self->__nr >= 0); + void **__this = self->__list->list_ + self->__nr; + void **__last = self->__list->list_ + self->__list->nr_ - 1; + while (__this < __last) { + __this[0] = __this[1]; + __this++; + } + *__this = (void *)((uintptr_t)0xf0f0f0f0); + self->__list->nr_--; + self->__nr--; +} + +void raviX_ptrlist_iter_set(PtrListIterator *self, void *ptr) +{ + assert(self->__list && self->__nr >= 0 && self->__nr < self->__list->nr_); + self->__list->list_[self->__nr] = ptr; +} + +void raviX_ptrlist_iter_mark_deleted(PtrListIterator *self) +{ + raviX_ptrlist_iter_set(self, NULL); + self->__list->rm_++; +} + +int raviX_ptrlist_size(const struct ptr_list *self) +{ + int nr = 0; + if (self) { + const struct ptr_list *list = self; + do { + nr += list->nr_ - list->rm_; + } while ((list = list->next_) != self); + } + return nr; +} + +void **raviX_ptrlist_add(struct ptr_list **self, void *ptr, Allocator *ptr_list_allocator) +{ + struct ptr_list *list = *self; + struct ptr_list *last = NULL; + void **ret; + int nr; + + if (!list || (nr = (last = list->prev_)->nr_) >= N_) { + struct ptr_list *newlist = (struct ptr_list *)raviX_allocator_allocate(ptr_list_allocator, 0); + newlist->allocator_ = ptr_list_allocator; + if (!list) { + newlist->next_ = newlist; + newlist->prev_ = newlist; + *self = newlist; + } else { + newlist->prev_ = last; + newlist->next_ = list; + list->prev_ = newlist; + last->next_ = newlist; + } + last = newlist; + nr = 0; + } + ret = last->list_ + nr; + *ret = ptr; + nr++; + last->nr_ = nr; + return ret; +} + +void *raviX_ptrlist_first(struct ptr_list *list) +{ + if (!list) + return NULL; + return list->list_[0]; +} + +void *raviX_ptrlist_last(struct ptr_list *list) +{ + if (!list) + return NULL; + list = list->prev_; + return list->list_[list->nr_ - 1]; +} + +/* + * Linearize the entries of a list up to a total of 'max', + * and return the nr of entries linearized. + * + * The array to linearize into (second argument) should really + * be "void *x[]", but we want to let people fill in any kind + * of pointer array, so let's just call it "void **". + */ +int raviX_ptrlist_linearize(struct ptr_list *head, void **arr, int max) +{ + int nr = 0; + if (head && max > 0) { + struct ptr_list *list = head; + + do { + int i = list->nr_; + if (i > max) + i = max; + memcpy(arr, list->list_, i * sizeof(void *)); + arr += i; + nr += i; + max -= i; + if (!max) + break; + } while ((list = list->next_) != head); + } + return nr; +} + +/* + * When we've walked the list and deleted entries, + * we may need to re-pack it so that we don't have + * any empty blocks left (empty blocks upset the + * walking code + */ +void raviX_ptrlist_pack(struct ptr_list **self) +{ + struct ptr_list *head = *self; + + if (head) { + struct ptr_list *entry = head; + do { + struct ptr_list *next; + restart: + next = entry->next_; + if (!entry->nr_) { + struct ptr_list *prev; + if (next == entry) { + raviX_allocator_free(entry->allocator_, entry); + *self = NULL; + return; + } + prev = entry->prev_; + prev->next_ = next; + next->prev_ = prev; + raviX_allocator_free(entry->allocator_, entry); + if (entry == head) { + *self = next; + head = next; + entry = next; + goto restart; + } + } + entry = next; + } while (entry != head); + } +} + +void raviX_ptrlist_remove_all(struct ptr_list **self) +{ + struct ptr_list *tmp, *list = *self; + if (!list) + return; + list->prev_->next_ = NULL; + while (list) { + tmp = list; + list = list->next_; + raviX_allocator_free(tmp->allocator_, tmp); + } + *self = NULL; +} + +int raviX_ptrlist_remove(struct ptr_list **self, void *entry, int count) +{ + PtrListIterator iter = raviX_ptrlist_forward_iterator(*self); + for (void *ptr = raviX_ptrlist_iter_next(&iter); ptr != NULL; ptr = raviX_ptrlist_iter_next(&iter)) { + if (ptr == entry) { + raviX_ptrlist_iter_remove(&iter); + if (!--count) + goto out; + } + } + assert(count <= 0); +out: + raviX_ptrlist_pack(self); + return count; +} + +int raviX_ptrlist_replace(struct ptr_list **self, void *old_ptr, void *new_ptr, int count) +{ + PtrListIterator iter = raviX_ptrlist_forward_iterator(*self); + for (void *ptr = raviX_ptrlist_iter_next(&iter); ptr != NULL; ptr = raviX_ptrlist_iter_next(&iter)) { + if (ptr == old_ptr) { + raviX_ptrlist_iter_set(&iter, new_ptr); + if (!--count) + goto out; + } + } + assert(count <= 0); +out: + return count; +} + +/* This removes the last entry, but doesn't pack the ptr list */ +void *raviX_ptrlist_undo_last(struct ptr_list **self) +{ + struct ptr_list *last, *first = *self; + + if (!first) + return NULL; + last = first; + do { + last = last->prev_; + if (last->nr_) { + void *ptr; + int nr = --last->nr_; + ptr = last->list_[nr]; + last->list_[nr] = (void *)((intptr_t)0xf1f1f1f1); + return ptr; + } + } while (last != first); + return NULL; +} + +void *raviX_ptrlist_delete_last(struct ptr_list **self) +{ + void *ptr = NULL; + struct ptr_list *last, *first = *self; + + if (!first) + return NULL; + last = first->prev_; + if (last->nr_) + ptr = last->list_[--last->nr_]; + if (last->nr_ <= 0) { + first->prev_ = last->prev_; + last->prev_->next_ = first; + if (last == first) + *self = NULL; + raviX_allocator_free(last->allocator_, last); + } + return ptr; +} + +void raviX_ptrlist_concat(struct ptr_list *a, struct ptr_list **self) +{ + Allocator *alloc = NULL; + PtrListIterator iter = raviX_ptrlist_forward_iterator(a); + if (a) + alloc = a->allocator_; + else if (*self) + alloc = (*self)->allocator_; + else + return; + for (void *ptr = raviX_ptrlist_iter_next(&iter); ptr != NULL; ptr = raviX_ptrlist_iter_next(&iter)) { + raviX_ptrlist_add(self, ptr, alloc); + } +} + +/* + * sort_list: a stable sort for lists. + * + * Time complexity: O(n*log n) + * [assuming limited zero-element fragments] + * + * Space complexity: O(1). + * + * Stable: yes. + */ + +static void array_sort(void **ptr, int nr, void *userdata, int (*cmp)(void *, const void *, const void *)) +{ + int i; + for (i = 1; i < nr; i++) { + void *p = ptr[i]; + if (cmp(userdata, ptr[i - 1], p) > 0) { + int j = i; + do { + ptr[j] = ptr[j - 1]; + if (!--j) + break; + } while (cmp(userdata, ptr[j - 1], p) > 0); + ptr[j] = p; + } + } +} + +static void verify_sorted(struct ptr_list *l, int n, void *userdata, int (*cmp)(void *, const void *, const void *)) +{ + int i = 0; + const void *a; + struct ptr_list *head = l; + + while (l->nr_ == 0) { + l = l->next_; + if (--n == 0) + return; + assert(l != head); + } + + a = l->list_[0]; + while (n > 0) { + const void *b; + if (++i >= l->nr_) { + i = 0; + l = l->next_; + n--; + assert(l != head || n == 0); + continue; + } + b = l->list_[i]; + assert(cmp(userdata, a, b) <= 0); + a = b; + } +} + +static void flush_to(struct ptr_list *b, void **buffer, int *nbuf) +{ + int nr = b->nr_; + assert(*nbuf >= nr); + memcpy(b->list_, buffer, nr * sizeof(void *)); + *nbuf = *nbuf - nr; + memmove(buffer, buffer + nr, *nbuf * sizeof(void *)); +} + +static void dump_to(struct ptr_list *b, void **buffer, int nbuf) +{ + assert(nbuf <= b->nr_); + memcpy(b->list_, buffer, nbuf * sizeof(void *)); +} + +// Merge two already-sorted sequences of blocks: +// (b1_1, ..., b1_n) and (b2_1, ..., b2_m) +// Since we may be moving blocks around, we return the new head +// of the merged list. +static struct ptr_list *merge_block_seqs(struct ptr_list *b1, int n, struct ptr_list *b2, int m, void *userdata, + int (*cmp)(void *, const void *, const void *)) +{ + int i1 = 0, i2 = 0; + void *buffer[2 * LIST_NODE_NR]; + int nbuf = 0; + struct ptr_list *newhead = b1; + + // printf ("Merging %d blocks at %p with %d blocks at %p\n", n, b1, m, b2); + + // Skip empty blocks in b2. + while (b2->nr_ == 0) { + // BEEN_THERE('F'); + b2 = b2->next_; + if (--m == 0) { + // BEEN_THERE('G'); + return newhead; + } + } + + // Do a quick skip in case entire blocks from b1 are + // already less than smallest element in b2. + while (b1->nr_ == 0 || cmp(userdata, PTR_ENTRY(b1, b1->nr_ - 1), PTR_ENTRY(b2, 0)) < 0) { + // printf ("Skipping whole block.\n"); + // BEEN_THERE('H'); + b1 = b1->next_; + if (--n == 0) { + // BEEN_THERE('I'); + return newhead; + } + } + + while (1) { + void *d1 = PTR_ENTRY(b1, i1); + void *d2 = PTR_ENTRY(b2, i2); + + assert(i1 >= 0 && i1 < b1->nr_); + assert(i2 >= 0 && i2 < b2->nr_); + assert(b1 != b2); + assert(n > 0); + assert(m > 0); + + if (cmp(userdata, d1, d2) <= 0) { + // BEEN_THERE('J'); + buffer[nbuf++] = d1; + // Element from b1 is smaller + if (++i1 >= b1->nr_) { + // BEEN_THERE('L'); + flush_to(b1, buffer, &nbuf); + do { + b1 = b1->next_; + if (--n == 0) { + // BEEN_THERE('O'); + while (b1 != b2) { + // BEEN_THERE('P'); + flush_to(b1, buffer, &nbuf); + b1 = b1->next_; + } + assert(nbuf == i2); + dump_to(b2, buffer, nbuf); + return newhead; + } + } while (b1->nr_ == 0); + i1 = 0; + } + } else { + // BEEN_THERE('K'); + // Element from b2 is smaller + buffer[nbuf++] = d2; + if (++i2 >= b2->nr_) { + struct ptr_list *l = b2; + // BEEN_THERE('M'); + // OK, we finished with b2. Pull it out + // and plug it in before b1. + + b2 = b2->next_; + b2->prev_ = l->prev_; + b2->prev_->next_ = b2; + l->next_ = b1; + l->prev_ = b1->prev_; + l->next_->prev_ = l; + l->prev_->next_ = l; + + if (b1 == newhead) { + // BEEN_THERE('N'); + newhead = l; + } + + flush_to(l, buffer, &nbuf); + b2 = b2->prev_; + do { + b2 = b2->next_; + if (--m == 0) { + // BEEN_THERE('Q'); + assert(nbuf == i1); + dump_to(b1, buffer, nbuf); + return newhead; + } + } while (b2->nr_ == 0); + i2 = 0; + } + } + } +} + +void raviX_ptrlist_sort(struct ptr_list **plist, void *userdata, int (*cmp)(void *, const void *, const void *)) +{ + struct ptr_list *head = *plist, *list = head; + int blocks = 1; + + assert(N_ == LIST_NODE_NR); + if (!head) + return; + + // Sort all the sub-lists + do { + array_sort(list->list_, list->nr_, userdata, cmp); +#ifdef PARANOIA + verify_sorted(list, 1, userdata, cmp); +#endif + list = list->next_; + } while (list != head); + + // Merge the damn things together + while (1) { + struct ptr_list *block1 = head; + + do { + struct ptr_list *block2 = block1; + struct ptr_list *next, *newhead; + int i; + + for (i = 0; i < blocks; i++) { + block2 = block2->next_; + if (block2 == head) { + if (block1 == head) { + // BEEN_THERE('A'); + *plist = head; + return; + } + // BEEN_THERE('B'); + goto next_pass; + } + } + + next = block2; + for (i = 0; i < blocks;) { + next = next->next_; + i++; + if (next == head) { + // BEEN_THERE('C'); + break; + } + // BEEN_THERE('D'); + } + + newhead = merge_block_seqs(block1, blocks, block2, i, userdata, cmp); +#ifdef PARANOIA + verify_sorted(newhead, blocks + i, userdata, cmp); +#endif + if (block1 == head) { + // BEEN_THERE('E'); + head = newhead; + } + block1 = next; + } while (block1 != head); + next_pass: + blocks <<= 1; + } +} + +#if 0 + +static int int_cmp(void *ud, const void *_a, const void *_b) +{ + (void)ud; + const int *a = (const int *)_a; + const int *b = (const int *)_b; + return *a - *b; +} + +#define MIN(_x, _y) ((_x) < (_y) ? (_x) : (_y)) + +static int test_sort() +{ + int i, *e; + const int N = 10000; + + srand(N); + for (i = 0; i < 1000; i++) + (void)rand(); + + Allocator ptrlist_allocator; + raviX_allocator_init(&ptrlist_allocator, "ptrlist_nodes", sizeof(struct ptr_list), __alignof__(struct ptr_list), + CHUNK); + Allocator int_allocator; + raviX_allocator_init(&int_allocator, "ints", sizeof(int), __alignof__(int), CHUNK); + struct ptr_list *int_list = NULL; + + for (i = 0; i < N; i++) { + e = (int *)raviX_allocator_allocate(&int_allocator, 0); + *e = rand(); + raviX_ptrlist_add(&int_list, e, &ptrlist_allocator); + } + if (raviX_ptrlist_size(int_list) != N) + return 1; + raviX_ptrlist_sort(&int_list, NULL, int_cmp); + // Sort already sorted stuff. + raviX_ptrlist_sort(&int_list, NULL, int_cmp); + + int *p = NULL; + PtrListIterator iter = raviX_ptrlist_forward_iterator(int_list); + int count = 0; + for (int *k = (int *)raviX_ptrlist_iter_next(&iter); k != NULL; k = (int *)raviX_ptrlist_iter_next(&iter)) { + if (p != NULL) { + if (*k < *p) + return 1; + } + p = k; + count++; + } + if (count != N) + return 1; + + struct ptr_list *l = int_list, *l2; + l2 = l; + int expected_count = 0; + do { + l2->nr_ = MIN(l2->nr_, rand() % 3); + for (i = 0; i < l2->nr_; i++) { + *((int *)(l2->list_[i])) = rand(); + expected_count++; + } + l2 = l2->next_; + } while (l2 != l); + raviX_ptrlist_sort(&int_list, NULL, int_cmp); + + p = NULL; + iter = raviX_ptrlist_forward_iterator(int_list); + count = 0; + for (int *k = (int *)raviX_ptrlist_iter_next(&iter); k != NULL; k = (int *)raviX_ptrlist_iter_next(&iter)) { + if (p != NULL) { + if (*k < *p) + return 1; + } + p = k; + count++; + } + if (count != expected_count) + return 1; + raviX_ptrlist_remove_all(&int_list); + raviX_allocator_destroy(&int_allocator); + raviX_allocator_destroy(&ptrlist_allocator); + return 0; +} + +struct mystruct { + int i; +}; + +struct mytoken { + const char *a; +}; + +static int test_ptrlist_basics() +{ + Allocator ptrlist_allocator; + raviX_allocator_init(&ptrlist_allocator, "ptrlist_nodes", sizeof(struct ptr_list), __alignof__(struct ptr_list), + CHUNK); + Allocator token_allocator; + raviX_allocator_init(&token_allocator, "ptr_list_tokens", sizeof(struct mytoken), __alignof__(struct mytoken), + CHUNK); + struct ptr_list *token_list = NULL; + if (raviX_ptrlist_size(token_list) != 0) + return 1; + struct mytoken *tok1 = (struct mytoken *)raviX_allocator_allocate(&token_allocator, 0); + struct mytoken **tok1p = (struct mytoken **)raviX_ptrlist_add(&token_list, tok1, &ptrlist_allocator); + if (raviX_ptrlist_size(token_list) != 1) + return 1; + if (tok1 != *tok1p) + return 1; + if (raviX_ptrlist_first(token_list) != tok1) + return 1; + if (raviX_ptrlist_last(token_list) != tok1) + return 1; + struct mytoken *tok2 = (struct mytoken *)raviX_allocator_allocate(&token_allocator, 0); + struct mytoken **tok2p = (struct mytoken **)raviX_ptrlist_add(&token_list, tok2, &ptrlist_allocator); + if (raviX_ptrlist_size(token_list) != 2) + return 1; + struct mytoken *tok3 = (struct mytoken *)raviX_allocator_allocate(&token_allocator, 0); + raviX_ptrlist_add(&token_list, tok3, &ptrlist_allocator); + if (raviX_ptrlist_size(token_list) != 3) + return 1; + struct mytoken *tok4 = (struct mytoken *)raviX_allocator_allocate(&token_allocator, 0); + raviX_ptrlist_add(&token_list, tok4, &ptrlist_allocator); + if (raviX_ptrlist_size(token_list) != 4) + return 1; + struct mytoken *tok5 = (struct mytoken *)raviX_allocator_allocate(&token_allocator, 0); + struct mytoken **tok5p = (struct mytoken **)raviX_ptrlist_add(&token_list, tok5, &ptrlist_allocator); + if (raviX_ptrlist_size(token_list) != 5) + return 1; + + if (tok2 != *tok2p) + return 1; + if (tok5 != *tok5p) + return 1; + if (raviX_ptrlist_first(token_list) != tok1) + return 1; + if (raviX_ptrlist_last(token_list) != tok5) + return 1; + struct mytoken *toks[5]; + int lin1 = raviX_ptrlist_linearize(token_list, (void **)toks, 5); + if (lin1 != 5) + return 1; + if (toks[0] != tok1) + return 1; + if (toks[1] != tok2) + return 1; + if (toks[2] != tok3) + return 1; + if (toks[3] != tok4) + return 1; + if (toks[4] != tok5) + return 1; + if (raviX_ptrlist_size(token_list) != 5) + return 1; + raviX_ptrlist_pack(&token_list); + if (raviX_ptrlist_size(token_list) != 5) + return 1; + + if (raviX_ptrlist_first(token_list) != tok1) + return 1; + if (raviX_ptrlist_last(token_list) != tok5) + return 1; + + const int X = 5 + 1; + const int Y = X - 1; + const int Z = Y - 1; + PtrListIterator iter1 = raviX_ptrlist_forward_iterator(token_list); + for (int i = 0; i < X; i++) { + struct mytoken *tk = (struct mytoken *)raviX_ptrlist_iter_next(&iter1); + if (tk == NULL) { + if (i == Y) + break; + return 1; + } + if (tk != toks[i]) + return 1; + } + PtrListIterator iter2 = raviX_ptrlist_reverse_iterator(token_list); + for (int i = 0; i < X; i++) { + struct mytoken *tk = (struct mytoken *)raviX_ptrlist_iter_prev(&iter2); + if (tk == NULL) { + if (i == Y) + break; + return 1; + } + if (tk != toks[Z - i]) + return 1; + } + struct mytoken *tok0 = (struct mytoken *)raviX_allocator_allocate(&token_allocator, 0); + PtrListIterator iter3 = raviX_ptrlist_forward_iterator(token_list); + if (!raviX_ptrlist_iter_next(&iter3)) + return 1; + raviX_ptrlist_iter_insert(&iter3, tok0); + if (raviX_ptrlist_size(token_list) != 6) + return 1; + if (raviX_ptrlist_first(token_list) != tok0) + return 1; + if (raviX_ptrlist_last(token_list) != tok5) + return 1; + + Allocator mystruct_allocator; + raviX_allocator_init(&mystruct_allocator, "mystructs", sizeof(struct mystruct), __alignof__(struct mystruct), + CHUNK); + struct ptr_list *mystruct_list = NULL; + + struct mystruct *s1 = (struct mystruct *)raviX_allocator_allocate(&mystruct_allocator, 0); + s1->i = 1; + struct mystruct *s2 = (struct mystruct *)raviX_allocator_allocate(&mystruct_allocator, 0); + s2->i = 2; + struct mystruct *s3 = (struct mystruct *)raviX_allocator_allocate(&mystruct_allocator, 0); + s3->i = 3; + struct mystruct *s4 = (struct mystruct *)raviX_allocator_allocate(&mystruct_allocator, 0); + s4->i = 4; + struct mystruct *s5 = (struct mystruct *)raviX_allocator_allocate(&mystruct_allocator, 0); + s5->i = 5; + struct mystruct *s6 = (struct mystruct *)raviX_allocator_allocate(&mystruct_allocator, 0); + s6->i = 6; + + raviX_ptrlist_add(&mystruct_list, s1, &ptrlist_allocator); + raviX_ptrlist_add(&mystruct_list, s2, &ptrlist_allocator); + raviX_ptrlist_add(&mystruct_list, s3, &ptrlist_allocator); + raviX_ptrlist_add(&mystruct_list, s4, &ptrlist_allocator); + raviX_ptrlist_add(&mystruct_list, s5, &ptrlist_allocator); + raviX_ptrlist_add(&mystruct_list, s6, &ptrlist_allocator); + + struct mystruct *serial1_expected[6] = {s1, s2, s3, s4, s5, s6}; + struct mystruct *serial1_got[6]; + raviX_ptrlist_linearize(mystruct_list, (void **)serial1_got, 6); + for (int i = 0; i < 6; i++) { + if (serial1_expected[i] != serial1_got[i]) + return 1; + } + + if (raviX_ptrlist_remove(&mystruct_list, s3, 1) != 0) + return 1; + PtrListIterator iter4 = raviX_ptrlist_forward_iterator(mystruct_list); + for (struct mystruct *p = (struct mystruct *)raviX_ptrlist_iter_next(&iter4); p != NULL; + p = (struct mystruct *)raviX_ptrlist_iter_next(&iter4)) { + if (p->i == 4) + raviX_ptrlist_iter_remove(&iter4); + } + if (raviX_ptrlist_size(mystruct_list) != 4) + return 1; + + struct mystruct *serial3_expected[4] = {s1, s2, s5, s6}; + struct mystruct *serial3_got[4]; + int reverse_expected[2] = {2, 1}; + + int i = 0; + struct mystruct *p; + FOR_EACH_PTR(mystruct_list, p) + { + if (i == 4) + return 1; + serial3_got[i++] = p; + if (i == 3) { + struct mystruct *p2; + int j = 0; + RECURSE_PTR_REVERSE(p, p2) + { + if (j >= 2 || reverse_expected[j] != p2->i) + return 1; + j++; + } + END_FOR_EACH_PTR_REVERSE(p2); + } + } + END_FOR_EACH_PTR(p); + if (i != 4) + return 1; + for (i = 0; i < 4; i++) { + if (serial3_expected[i] != serial3_got[i]) + return 1; + } + + i = 0; + PREPARE_PTR_LIST(mystruct_list, p); + while (p != NULL) { + if (i == 4) + return 1; + serial3_got[i++] = p; + NEXT_PTR_LIST(p); + } + FINISH_PTR_LIST(p); + if (i != 4) + return 1; + for (i = 0; i < 4; i++) { + if (serial3_expected[i] != serial3_got[i]) + return 1; + } + + i = 0; + FOR_EACH_PTR_REVERSE(mystruct_list, p) + { + if (i == 4) + return 1; + serial3_got[i++] = p; + if (i == 2) { + struct mystruct *p3; + int j = 0; + RECURSE_PTR_REVERSE(p, p3) + { + if (j >= 2 || reverse_expected[j] != p3->i) + return 1; + j++; + } + END_FOR_EACH_PTR_REVERSE(p3); + } + } + END_FOR_EACH_PTR_REVERSE(p); + if (i != 4) + return 1; + for (int i = 0; i < 4; i++) { + if (serial3_expected[3 - i] != serial3_got[i]) + return 1; + } + + raviX_ptrlist_remove_all(&token_list); + raviX_ptrlist_remove_all(&mystruct_list); + + raviX_allocator_destroy(&token_allocator); + raviX_allocator_destroy(&mystruct_allocator); + raviX_allocator_destroy(&ptrlist_allocator); + return 0; +} + +int test_ptrlist() +{ + if (test_sort() != 0) + return 1; + + /* For testing we set N_ temporarily */ + N_ = 2; + int failure_count = test_ptrlist_basics(); + N_ = LIST_NODE_NR; + + if (failure_count == 0) + printf("ptrlist test okay\n"); + return failure_count; +} + +#endif \ No newline at end of file diff --git a/ravicomp/src/ptrlist.h b/ravicomp/src/ptrlist.h new file mode 100644 index 0000000..242d7b4 --- /dev/null +++ b/ravicomp/src/ptrlist.h @@ -0,0 +1,149 @@ +#ifndef ravicomp_PTRLIST_H +#define ravicomp_PTRLIST_H + +/* +* Generic pointer list manipulation code. +* +* (C) Copyright Linus Torvalds 2003-2005 +*/ +/* +* This version is part of the dmr_c project. +* Copyright (C) 2017 Dibyendu Majumdar +*/ + +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* +* The ptrlist data structure is copied from the Linux Sparse project. +* It is essentially a dynamic array of pointers but the array is split up +* into nodes that are linked together. Each node contains a small number of array entries. +* +* The ptr list data structure is like a train - with cars linked to each other. +* Just as in a train each car has many seats, so in ptr list each "node" has +* several entries. Unlike a train however, the ptr list is arranged as a ring, +* i.e. the the front and back nodes are linked to each other. Hence there is no +* such thing as a 'head' of the list - i.e. any node can be the head! +* +* The disadvantage of the ptrlist structure compared to a dynamic array is +* that it consumes more memory to maintain the linked list data structure. +* +* The main advantage is that it is well suited to fixed sized memory +* allocators as there is no resizing of memory already allocated, which happens +* with dynamic arrays. The ptrlist is made up of fixed size nodes. +*/ + +/* number of array entries per node */ +#ifndef LIST_NODE_NR +#define LIST_NODE_NR (7) +#endif + +/* structure of a node */ +#define DECLARE_PTR_LIST(listname, type) \ + typedef struct listname { \ + int nr_ : 8; \ + int rm_ : 8; \ + struct listname *prev_; \ + struct listname *next_; \ + Allocator *allocator_; \ + type *list_[LIST_NODE_NR]; \ + } listname + +/* Each node in the list */ +DECLARE_PTR_LIST(ptr_list, void); + +/* The iterator strucure is used for looping */ +typedef struct PtrListIterator { + struct ptr_list *__head; + struct ptr_list *__list; + int __nr; +} PtrListIterator; + +/* The ptr list */ +extern int raviX_ptrlist_size(const struct ptr_list *self); +extern void **raviX_ptrlist_add(struct ptr_list **self, void *ptr, Allocator *ptr_list_allocator); +extern void *raviX_ptrlist_nth_entry(struct ptr_list *list, unsigned int idx); +extern void *raviX_ptrlist_first(struct ptr_list *list); +extern void *raviX_ptrlist_last(struct ptr_list *list); +extern int raviX_ptrlist_linearize(struct ptr_list *head, void **arr, int max); +extern void raviX_ptrlist_split_node(struct ptr_list *head); +extern void raviX_ptrlist_pack(struct ptr_list **self); +extern void raviX_ptrlist_remove_all(struct ptr_list **self); +extern int raviX_ptrlist_remove(struct ptr_list **self, void *entry, int count); +extern int raviX_ptrlist_replace(struct ptr_list **self, void *old_ptr, void *new_ptr, + int count); +extern void *raviX_ptrlist_undo_last(struct ptr_list **self); +extern void *raviX_ptrlist_delete_last(struct ptr_list **self); +extern void raviX_ptrlist_concat(struct ptr_list *a, struct ptr_list **self); +extern void raviX_ptrlist_sort(struct ptr_list **self, void *, + int (*cmp)(void *, const void *, const void *)); + +/* iterator functions */ +extern PtrListIterator raviX_ptrlist_forward_iterator(struct ptr_list *self); +extern PtrListIterator raviX_ptrlist_reverse_iterator(struct ptr_list *self); +extern void *raviX_ptrlist_iter_next(PtrListIterator *self); +extern void *raviX_ptrlist_iter_prev(PtrListIterator *self); +extern void raviX_ptrlist_iter_split_current(PtrListIterator *self); +extern void raviX_ptrlist_iter_insert(PtrListIterator *self, void *newitem); +extern void raviX_ptrlist_iter_remove(PtrListIterator *self); +extern void raviX_ptrlist_iter_set(PtrListIterator *self, void *ptr); +extern void raviX_ptrlist_iter_mark_deleted(PtrListIterator *self); + +static inline void **raviX_ptrlist_iter_this_address(PtrListIterator *self) { + return &self->__list->list_[self->__nr]; +} +#define ptr_list_empty(x) ((x) == NULL) +#define PTR_ENTRY_NOTAG(h,i) ((h)->list_[i]) +#define PTR_ENTRY(h,i) (void *)(PTR_ENTRY_NOTAG(h,i)) + +#define FOR_EACH_PTR(list, var) \ + { PtrListIterator var##iter__ = raviX_ptrlist_forward_iterator((struct ptr_list *)list); \ + for (var = raviX_ptrlist_iter_next(&var##iter__); var != NULL; var = raviX_ptrlist_iter_next(&var##iter__)) +#define END_FOR_EACH_PTR(var) } + +#define FOR_EACH_PTR_REVERSE(list, var) \ + { PtrListIterator var##iter__ = raviX_ptrlist_reverse_iterator((struct ptr_list *)list); \ + for (var = raviX_ptrlist_iter_prev(&var##iter__); var != NULL; var = raviX_ptrlist_iter_prev(&var##iter__)) +#define END_FOR_EACH_PTR_REVERSE(var) } + +#define RECURSE_PTR_REVERSE(list, var) \ + { PtrListIterator var##iter__ = list##iter__; \ + for (var = raviX_ptrlist_iter_prev(&var##iter__); var != NULL; var = raviX_ptrlist_iter_prev(&var##iter__)) + +#define PREPARE_PTR_LIST(list, var) \ + PtrListIterator var##iter__ = raviX_ptrlist_forward_iterator((struct ptr_list *)list); \ + var = raviX_ptrlist_iter_next(&var##iter__) + +#define NEXT_PTR_LIST(var) \ + var = raviX_ptrlist_iter_next(&var##iter__) +#define FINISH_PTR_LIST(var) + +#define THIS_ADDRESS(type, var) \ + (type *)raviX_ptrlist_iter_this_address(&var##iter__) + +#define DELETE_CURRENT_PTR(var) \ + raviX_ptrlist_iter_remove(&var##iter__) + +#define REPLACE_CURRENT_PTR(type, var, replacement) \ + raviX_ptrlist_iter_set(&var##iter__, replacement) + +#define INSERT_CURRENT(newval, var) \ + raviX_ptrlist_iter_insert(&var##iter__, newval) + +#define MARK_CURRENT_DELETED(PTR_TYPE, var) \ + raviX_ptrlist_iter_mark_deleted(&var##iter__) + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/ravicomp/src/ravi_binding.c b/ravicomp/src/ravi_binding.c new file mode 100644 index 0000000..4e3752d --- /dev/null +++ b/ravicomp/src/ravi_binding.c @@ -0,0 +1,66 @@ +/* This will contain Lua bindings */ + +#include "ravi_api.h" +#include "ravi_compiler.h" + +#include "cfg.h" +#include "codegen.h" +#include "optimizer.h" +#include "parser.h" + +int raviX_compile(struct Ravi_CompilerInterface *compiler_interface) +{ + int rc = 0; + int dump_ir = 0; + if (compiler_interface->compiler_options != NULL) { + dump_ir = strstr(compiler_interface->compiler_options, "--dump-ir") != NULL; + } + compiler_interface->generated_code = NULL; + CompilerState *container = raviX_init_compiler(); + rc = raviX_parse(container, compiler_interface->source, compiler_interface->source_len, + compiler_interface->source_name); + if (rc != 0) { + compiler_interface->error_message(compiler_interface->context, raviX_get_last_error(container)); + goto L_exit; + } + rc = raviX_ast_typecheck(container); + if (rc != 0) { + compiler_interface->error_message(compiler_interface->context, raviX_get_last_error(container)); + goto L_exit; + } + rc = raviX_ast_simplify(container); + if (rc != 0) { + compiler_interface->error_message(compiler_interface->context, raviX_get_last_error(container)); + goto L_exit; + } + LinearizerState *linearizer = raviX_init_linearizer(container); + rc = raviX_ast_linearize(linearizer); + if (rc != 0) { + compiler_interface->error_message(compiler_interface->context, raviX_get_last_error(container)); + goto L_linend; + } + raviX_construct_cfg(linearizer->main_proc); + raviX_remove_unreachable_blocks(linearizer); + + TextBuffer buf; + raviX_buffer_init(&buf, 4096); + if (dump_ir) { + raviX_buffer_add_string(&buf, "/* Following is an IR Dump from the compiler\n"); + raviX_show_linearizer(linearizer, &buf); + raviX_buffer_add_string(&buf, "\nEnd of IR dump*/\n"); + } + rc = raviX_generate_C(linearizer, &buf, compiler_interface); + if (rc != 0) { + raviX_buffer_free(&buf); + } else { + compiler_interface->generated_code = buf.buf; + } + +L_linend: + raviX_destroy_linearizer(linearizer); + +L_exit: + raviX_destroy_compiler(container); + + return rc; +} \ No newline at end of file diff --git a/ravicomp/src/set.c b/ravicomp/src/set.c new file mode 100644 index 0000000..6ba9856 --- /dev/null +++ b/ravicomp/src/set.c @@ -0,0 +1,409 @@ +/* + * Copyright © 2009 Intel Corporation + * Copyright © 1988-2004 Keith Packard and Bart Massey. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Except as contained in this notice, the names of the authors + * or their institutions shall not be used in advertising or + * otherwise to promote the sale, use or other dealings in this + * Software without prior written authorization from the + * authors. + * + * Authors: + * Eric Anholt + * Keith Packard + */ + +#include "set.h" + +#include +#include + + +#define ARRAY_SIZE(array) ((int)(sizeof(array) / sizeof(array[0]))) + +/* + * From Knuth -- a good choice for hash/rehash values is p, p-2 where + * p and p-2 are both prime. These tables are sized to have an extra 10% + * free to avoid exponential performance degradation as the hash table fills + */ + +static const uint32_t deleted_key_value; +static const void *deleted_key = &deleted_key_value; + +static const struct { + uint32_t max_entries, size, rehash; +} hash_sizes[] = { + { 2, 5, 3 }, + { 4, 7, 5 }, + { 8, 13, 11 }, + { 16, 19, 17 }, + { 32, 43, 41 }, + { 64, 73, 71 }, + { 128, 151, 149 }, + { 256, 283, 281 }, + { 512, 571, 569 }, + { 1024, 1153, 1151 }, + { 2048, 2269, 2267 }, + { 4096, 4519, 4517 }, + { 8192, 9013, 9011 }, + { 16384, 18043, 18041 }, + { 32768, 36109, 36107 }, + { 65536, 72091, 72089 }, + { 131072, 144409, 144407 }, + { 262144, 288361, 288359 }, + { 524288, 576883, 576881 }, + { 1048576, 1153459, 1153457 }, + { 2097152, 2307163, 2307161 }, + { 4194304, 4613893, 4613891 }, + { 8388608, 9227641, 9227639 }, + { 16777216, 18455029, 18455027 }, + { 33554432, 36911011, 36911009 }, + { 67108864, 73819861, 73819859 }, + { 134217728, 147639589, 147639587 }, + { 268435456, 295279081, 295279079 }, + { 536870912, 590559793, 590559791 }, + { 1073741824, 1181116273, 1181116271}, + { 2147483648ul, 2362232233ul, 2362232231ul} +}; + +static int +entry_is_free(const SetEntry *entry) +{ + return entry->key == NULL; +} + +static int +entry_is_deleted(const SetEntry *entry) +{ + return entry->key == deleted_key; +} + +static int +entry_is_present(const SetEntry *entry) +{ + return entry->key != NULL && entry->key != deleted_key; +} + +Set *raviX_set_create(uint32_t (*hash_function)(const void *key), + int (*key_equals_function)(const void *a, + const void *b)) +{ + Set *set; + + set = malloc(sizeof(*set)); + if (set == NULL) + return NULL; + + set->size_index = 0; + set->size = hash_sizes[set->size_index].size; + set->rehash = hash_sizes[set->size_index].rehash; + set->max_entries = hash_sizes[set->size_index].max_entries; + set->hash_function = hash_function; + set->key_equals_function = key_equals_function; + set->table = calloc(set->size, sizeof(*set->table)); + set->entries = 0; + set->deleted_entries = 0; + + if (set->table == NULL) { + free(set); + return NULL; + } + + return set; +} + +/** + * Frees the given set. + * + * If delete_function is passed, it gets called on each entry present before + * freeing. + */ +void raviX_set_destroy(Set *set, void (*delete_function)(SetEntry *entry)) +{ + if (!set) + return; + + if (delete_function) { + SetEntry *entry; + + set_foreach(set, entry) { + delete_function(entry); + } + } + free(set->table); + free(set); +} + +/* Does the set contain an entry with the given key. + */ +bool raviX_set_contains(Set *set, const void *key) +{ + SetEntry *entry; + + entry = raviX_set_search(set, key); + + return entry != NULL; +} + +/** + * Finds a set entry with the given key. + * + * Returns NULL if no entry is found. + */ +SetEntry *raviX_set_search(Set *set, const void *key) +{ + uint32_t hash = set->hash_function(key); + + return raviX_set_search_pre_hashed(set, hash, key); +} + +/** + * Finds a set entry with the given key and hash of that key. + * + * Returns NULL if no entry is found. + */ +SetEntry *raviX_set_search_pre_hashed(Set *set, uint32_t hash, const void *key) +{ + uint32_t hash_address; + + hash_address = hash % set->size; + do { + uint32_t double_hash; + + SetEntry *entry = set->table + hash_address; + + if (entry_is_free(entry)) { + return NULL; + } else if (entry_is_present(entry) && entry->hash == hash) { + if (set->key_equals_function(key, entry->key)) { + return entry; + } + } + + double_hash = 1 + hash % set->rehash; + + hash_address = (hash_address + double_hash) % set->size; + } while (hash_address != hash % set->size); + + return NULL; +} + +static void +set_rehash(Set *set, int new_size_index) +{ + Set old_set; + SetEntry *table, *entry; + + if (new_size_index >= ARRAY_SIZE(hash_sizes)) + return; + + table = calloc(hash_sizes[new_size_index].size, sizeof(*set->table)); + if (table == NULL) + return; + + old_set = *set; + + set->table = table; + set->size_index = new_size_index; + set->size = hash_sizes[set->size_index].size; + set->rehash = hash_sizes[set->size_index].rehash; + set->max_entries = hash_sizes[set->size_index].max_entries; + set->entries = 0; + set->deleted_entries = 0; + + set_foreach(&old_set, entry) { raviX_set_add_pre_hashed(set, entry->hash, entry->key); + } + + free(old_set.table); +} + +/** + * Inserts the key into the set. + * + * Note that insertion may rearrange the set on a resize or rehash, so + * previously found set_entry pointers are no longer valid after this + * function. + */ +SetEntry *raviX_set_add(Set *set, const void *key) +{ + uint32_t hash = set->hash_function(key); + + /* Make sure nobody tries to add one of the magic values as a + * key. If you need to do so, either do so in a wrapper, or + * store keys with the magic values separately in the struct + * set. + */ + assert(key != NULL); + + return raviX_set_add_pre_hashed(set, hash, key); +} + +/** + * Inserts the key with the given hash into the set. + * + * Note that insertion may rearrange the set on a resize or rehash, so + * previously found set_entry pointers are no longer valid after this + * function. + */ +SetEntry *raviX_set_add_pre_hashed(Set *set, uint32_t hash, const void *key) +{ + uint32_t hash_address; + SetEntry *available_entry = NULL; + + if (set->entries >= set->max_entries) { + set_rehash(set, set->size_index + 1); + } else if (set->deleted_entries + set->entries >= set->max_entries) { + set_rehash(set, set->size_index); + } + + hash_address = hash % set->size; + do { + SetEntry *entry = set->table + hash_address; + uint32_t double_hash; + + if (!entry_is_present(entry)) { + /* Stash the first available entry we find */ + if (available_entry == NULL) + available_entry = entry; + if (entry_is_free(entry)) + break; + } + + /* Implement replacement when another insert happens + * with a matching key. This is a relatively common + * feature of hash tables, with the alternative + * generally being "insert the new value as well, and + * return it first when the key is searched for". + * + * Note that the set doesn't have a delete callback. + * If freeing of old keys is required to avoid memory leaks, + * perform a search before inserting. + */ + if (!entry_is_deleted(entry) && + entry->hash == hash && + set->key_equals_function(key, entry->key)) { + entry->key = key; + return entry; + } + + double_hash = 1 + hash % set->rehash; + + hash_address = (hash_address + double_hash) % set->size; + } while (hash_address != hash % set->size); + + if (available_entry) { + if (entry_is_deleted(available_entry)) + set->deleted_entries--; + available_entry->hash = hash; + available_entry->key = key; + set->entries++; + return available_entry; + } + + /* We could hit here if a required resize failed. An unchecked-malloc + * application could ignore this result. + */ + return NULL; +} + +/** + * This function searches for, and removes an entry from the set. + * + * If the caller has previously found a SetEntry pointer, + * (from calling raviX_set_search or remembering it from raviX_set_add), then + * raviX_set_remove_entry can be called instead to avoid an extra search. + */ +void raviX_set_remove(Set *set, const void *key) +{ + SetEntry *entry; + + entry = raviX_set_search(set, key); + + raviX_set_remove_entry(set, entry); +} + +/** + * This function deletes the set given set entry. + * + * Note that deletion doesn't otherwise modify the set, so an + * iteration over the set deleting entries is safe. + */ +void raviX_set_remove_entry(Set *set, SetEntry *entry) +{ + if (!entry) + return; + + entry->key = deleted_key; + set->entries--; + set->deleted_entries++; +} + +/** + * This function is an iterator over the set. + * + * Pass in NULL for the first entry, as in the start of a for loop. + * Note that an iteration over the set is O(table_size) not + * O(entries). + */ +SetEntry *raviX_set_next_entry(Set *set, SetEntry *entry) +{ + if (entry == NULL) + entry = set->table; + else + entry = entry + 1; + + for (; entry != set->table + set->size; entry++) { + if (entry_is_present(entry)) { + return entry; + } + } + + return NULL; +} + +#ifndef _WIN32 +SetEntry *raviX_set_random_entry(Set *set, + int (*predicate)(SetEntry *entry)) +{ + SetEntry *entry; + uint32_t i = random() % set->size; + + if (set->entries == 0) + return NULL; + + for (entry = set->table + i; entry != set->table + set->size; entry++) { + if (entry_is_present(entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + for (entry = set->table; entry != set->table + i; entry++) { + if (entry_is_present(entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + return NULL; +} +#endif diff --git a/ravicomp/src/set.h b/ravicomp/src/set.h new file mode 100644 index 0000000..806e814 --- /dev/null +++ b/ravicomp/src/set.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#ifndef ravicomp_SET_H +#define ravicomp_SET_H + +#include +#include + +typedef struct SetEntry { + uint32_t hash; + const void *key; +} SetEntry; + +typedef struct Set { + SetEntry *table; + uint32_t (*hash_function)(const void *key); + int (*key_equals_function)(const void *a, const void *b); + uint32_t size; + uint32_t rehash; + uint32_t max_entries; + uint32_t size_index; + uint32_t entries; + uint32_t deleted_entries; +} Set; + +Set *raviX_set_create(uint32_t (*hash_function)(const void *key), + int (*key_equals_function)(const void *a, + const void *b)); +void raviX_set_destroy(Set *set, + void (*delete_function)(SetEntry *entry)); + +SetEntry *raviX_set_add(Set *set, const void *key); + +bool raviX_set_contains(Set *set, const void *key); + +void raviX_set_remove(Set *set, const void *key); + +SetEntry *raviX_set_search(Set *set, const void *key); + +void raviX_set_remove_entry(Set *set, SetEntry *entry); + +SetEntry *raviX_set_next_entry(Set *set, SetEntry *entry); + +SetEntry *raviX_set_random_entry(Set *set, + int (*predicate)(SetEntry *entry)); + +/** + * This foreach function is safe against deletion (which just replaces + * an entry's data with the deleted marker), but not against insertion + * (which may rehash the table, making entry a dangling pointer). + */ +#define set_foreach(ht, entry) \ + for (entry = raviX_set_next_entry(ht, NULL); \ + entry != NULL; \ + entry = raviX_set_next_entry(ht, entry)) + +/* Alternate interfaces to reduce repeated calls to hash function. */ +SetEntry *raviX_set_search_pre_hashed(Set *set, uint32_t hash, const void *key); + +SetEntry *raviX_set_add_pre_hashed(Set *set, uint32_t hash, const void *key); + +#endif diff --git a/ravicomp/src/typechecker.c b/ravicomp/src/typechecker.c new file mode 100644 index 0000000..d926695 --- /dev/null +++ b/ravicomp/src/typechecker.c @@ -0,0 +1,522 @@ +/* +Copyright (C) 2018-2020 Dibyendu Majumdar +*/ +#include + +/* +The Type checker walks through the AST and annotates nodes with type information. +It also checks that the operations are valid. + +There are following assumptions made about the code generator backend. + +a) Function arguments that have type info must be asserted at runtime +b) Local variable assignments in 'local' or expression statements must be asserted by the backend +c) We allow assigning integer value to number and vice versa in the AST but the code generator must assert this is valid +d) Any unassigned local vars that have type info must be set to valid initial values. + +None of these operations are explicit in the AST. +*/ + + +static bool is_type_same(const VariableType *a, const VariableType *b) +{ + // String comparion of type_name relies upon strings being interned + return a->type_code == b->type_code && a->type_name == b->type_name; +} + +static void handle_error(CompilerState *container, const char *msg) +{ + // TODO source and line number + raviX_buffer_add_string(&container->error_message, msg); + longjmp(container->env, 1); +} + +/* Type checker - WIP */ +static void typecheck_ast_node(CompilerState *container, AstNode *function, AstNode *node); + +/* Type checker - WIP */ +static void typecheck_ast_list(CompilerState *container, AstNode *function, AstNodeList *list) +{ + AstNode *node; + FOR_EACH_PTR(list, node) { typecheck_ast_node(container, function, node); } + END_FOR_EACH_PTR(node); +} + +/* Type checker - WIP */ +static void typecheck_unary_operator(CompilerState *container, AstNode *function, AstNode *node) +{ + UnaryOperatorType op = node->unary_expr.unary_op; + typecheck_ast_node(container, function, node->unary_expr.expr); + ravitype_t subexpr_type = node->unary_expr.expr->common_expr.type.type_code; + switch (op) { + case UNOPR_MINUS: + if (subexpr_type == RAVI_TNUMINT) { + set_type(&node->unary_expr.type, RAVI_TNUMINT); + } else if (subexpr_type == RAVI_TNUMFLT) { + set_type(&node->unary_expr.type, RAVI_TNUMFLT); + } + break; + case UNOPR_LEN: + if (subexpr_type == RAVI_TARRAYINT || subexpr_type == RAVI_TARRAYFLT) { + set_type(&node->unary_expr.type, RAVI_TNUMINT); + } + break; + case UNOPR_TO_INTEGER: + set_type(&node->unary_expr.type, RAVI_TNUMINT); + break; + case UNOPR_TO_NUMBER: + set_type(&node->unary_expr.type, RAVI_TNUMFLT); + break; + case UNOPR_TO_CLOSURE: + set_type(&node->unary_expr.type, RAVI_TFUNCTION); + break; + case UNOPR_TO_STRING: + set_type(&node->unary_expr.type, RAVI_TSTRING); + break; + case UNOPR_TO_INTARRAY: + set_type(&node->unary_expr.type, RAVI_TARRAYINT); + if (node->unary_expr.expr->type == EXPR_TABLE_LITERAL) { + set_type(&node->unary_expr.expr->table_expr.type, RAVI_TARRAYINT); + } + break; + case UNOPR_TO_NUMARRAY: + set_type(&node->unary_expr.type, RAVI_TARRAYFLT); + if (node->unary_expr.expr->type == EXPR_TABLE_LITERAL) { + set_type(&node->unary_expr.expr->table_expr.type, RAVI_TARRAYFLT); + } + break; + case UNOPR_TO_TABLE: + set_type(&node->unary_expr.type, RAVI_TTABLE); + break; + case UNOPR_TO_TYPE: + assert(node->unary_expr.type.type_name != NULL); // Should already be set by the parser + set_typecode(&node->unary_expr.type, RAVI_TUSERDATA); + break; + default: + break; + } +} + +/* Type checker - WIP */ +static void typecheck_binary_operator(CompilerState *container, AstNode *function, + AstNode *node) +{ + BinaryOperatorType op = node->binary_expr.binary_op; + AstNode *e1 = node->binary_expr.expr_left; + AstNode *e2 = node->binary_expr.expr_right; + typecheck_ast_node(container, function, e1); + typecheck_ast_node(container, function, e2); + switch (op) { + case BINOPR_ADD: + case BINOPR_SUB: + case BINOPR_MUL: + case BINOPR_DIV: + if (e1->common_expr.type.type_code == RAVI_TNUMFLT && e2->common_expr.type.type_code == RAVI_TNUMFLT) + set_typecode(&node->binary_expr.type, RAVI_TNUMFLT); + else if (e1->common_expr.type.type_code == RAVI_TNUMFLT && + e2->common_expr.type.type_code == RAVI_TNUMINT) + set_typecode(&node->binary_expr.type, RAVI_TNUMFLT); + else if (e1->common_expr.type.type_code == RAVI_TNUMINT && + e2->common_expr.type.type_code == RAVI_TNUMFLT) + set_typecode(&node->binary_expr.type, RAVI_TNUMFLT); + else if (op != BINOPR_DIV && e1->common_expr.type.type_code == RAVI_TNUMINT && + e2->common_expr.type.type_code == RAVI_TNUMINT) + set_typecode(&node->binary_expr.type, RAVI_TNUMINT); + else if (op == BINOPR_DIV && e1->common_expr.type.type_code == RAVI_TNUMINT && + e2->common_expr.type.type_code == RAVI_TNUMINT) + set_typecode(&node->binary_expr.type, RAVI_TNUMFLT); + break; + case BINOPR_IDIV: + if (e1->common_expr.type.type_code == RAVI_TNUMINT && e2->common_expr.type.type_code == RAVI_TNUMINT) + set_typecode(&node->binary_expr.type, RAVI_TNUMINT); + // FIXME missing cases + break; + case BINOPR_BAND: + case BINOPR_BOR: + case BINOPR_BXOR: + case BINOPR_SHL: + case BINOPR_SHR: + if ((e1->common_expr.type.type_code == RAVI_TNUMFLT || + e1->common_expr.type.type_code == RAVI_TNUMINT) && + (e2->common_expr.type.type_code == RAVI_TNUMFLT || e2->common_expr.type.type_code == RAVI_TNUMINT)) + set_typecode(&node->binary_expr.type, RAVI_TNUMINT); + break; + case BINOPR_EQ: + case BINOPR_NE: + case BINOPR_GE: + case BINOPR_GT: + case BINOPR_LE: + case BINOPR_LT: + /* This case is not handled in default parser - why? */ + if ((e1->common_expr.type.type_code == RAVI_TNUMINT || e1->common_expr.type.type_code == RAVI_TNUMFLT || + e1->common_expr.type.type_code == RAVI_TBOOLEAN) && + (e2->common_expr.type.type_code == RAVI_TNUMFLT || e2->common_expr.type.type_code == RAVI_TNUMINT || + e2->common_expr.type.type_code == RAVI_TBOOLEAN)) + set_typecode(&node->binary_expr.type, RAVI_TBOOLEAN); + break; + case BINOPR_POW: + if ((e1->common_expr.type.type_code == RAVI_TNUMFLT || + e1->common_expr.type.type_code == RAVI_TNUMINT) && + (e2->common_expr.type.type_code == RAVI_TNUMFLT || e2->common_expr.type.type_code == RAVI_TNUMINT)) + set_typecode(&node->binary_expr.type, RAVI_TNUMFLT); + break; + case BINOPR_MOD: + if (e1->common_expr.type.type_code == RAVI_TNUMINT && e2->common_expr.type.type_code == RAVI_TNUMINT) + set_typecode(&node->binary_expr.type, RAVI_TNUMINT); + else if ((e1->common_expr.type.type_code == RAVI_TNUMINT && + e2->common_expr.type.type_code == RAVI_TNUMFLT) || + (e1->common_expr.type.type_code == RAVI_TNUMFLT && + e2->common_expr.type.type_code == RAVI_TNUMINT)) + set_typecode(&node->binary_expr.type, RAVI_TNUMFLT); + break; + default: + set_typecode(&node->binary_expr.type, RAVI_TANY); + break; + } +} + +static bool is_unindexable_type(VariableType *type) +{ + switch (type->type_code) { + case RAVI_TNUMFLT: + case RAVI_TNUMINT: + case RAVI_TBOOLEAN: + case RAVI_TNIL: + return true; + default: + return false; + } +} + +/* + * Suffixed expression examples: + * f()[1] + * x[1][2] + * x.y[1] + */ +static void typecheck_suffixedexpr(CompilerState *container, AstNode *function, AstNode *node) +{ + typecheck_ast_node(container, function, node->suffixed_expr.primary_expr); + AstNode *prev_node = node->suffixed_expr.primary_expr; + AstNode *this_node; + FOR_EACH_PTR(node->suffixed_expr.suffix_list, this_node) + { + typecheck_ast_node(container, function, this_node); + if (this_node->type == EXPR_Y_INDEX) { + if (prev_node->common_expr.type.type_code == RAVI_TARRAYFLT) { + if (this_node->index_expr.expr->common_expr.type.type_code == RAVI_TNUMINT) { + set_typecode(&this_node->index_expr.type, RAVI_TNUMFLT); + } else { + handle_error(container, "invalid type in index"); + } + } else if (prev_node->common_expr.type.type_code == RAVI_TARRAYINT) { + if (this_node->index_expr.expr->common_expr.type.type_code == RAVI_TNUMINT) { + set_typecode(&this_node->index_expr.type, RAVI_TNUMINT); + } else { + handle_error(container, "invalid type in index"); + } + } else if (is_unindexable_type(&prev_node->common_expr.type)) { + handle_error(container, "invalid type in index"); + } + } + prev_node = this_node; + } + END_FOR_EACH_PTR(node); + copy_type(&node->suffixed_expr.type, &prev_node->common_expr.type); +} + +static void typecheck_var_assignment(CompilerState *container, VariableType *var_type, AstNode *expr, + const StringObject *var_name) +{ + if (var_type->type_code == RAVI_TANY) + // Any value can be assigned to type ANY + return; + const char *variable_name = var_name ? var_name->str : "unknown-TODO"; + VariableType *expr_type = &expr->common_expr.type; + + if (var_type->type_code == RAVI_TNUMINT) { + /* if the expr is of type number or # operator then insert @integer operator */ + if (expr_type->type_code == RAVI_TNUMFLT || + (expr->type == EXPR_UNARY && expr->unary_expr.unary_op == UNOPR_LEN)) { + /* Okay, but backend must do appropriate conversion */ + ; + } else if (expr_type->type_code != RAVI_TNUMINT) { + fprintf(stderr, "Assignment to local symbol %s is not type compatible\n", variable_name); + } + return; + } + if (var_type->type_code == RAVI_TNUMFLT) { + if (expr_type->type_code == RAVI_TNUMINT) { + /* Okay, but backend must do appropriate conversion */ + ; + } else if (expr_type->type_code != RAVI_TNUMFLT) { + fprintf(stderr, "Assignment to local symbol %s is not type compatible\n", variable_name); + } + return; + } + // all other types must strictly match + if (!is_type_same(var_type, expr_type)) { // We should probably check type convert-ability here + fprintf(stderr, "Assignment to local symbol %s is not type compatible\n", variable_name); + } +} + +static void typecheck_local_statement(CompilerState *container, AstNode *function, + AstNode *node) +{ + // The local vars should already be annotated + // We need to typecheck the expressions to the right of = + // Then we need to ensure that the assignments are valid + // We can perhaps insert type assertions where we have a mismatch? + + typecheck_ast_list(container, function, node->local_stmt.expr_list); + + LuaSymbol *var; + AstNode *expr; + PREPARE_PTR_LIST(node->local_stmt.var_list, var); + PREPARE_PTR_LIST(node->local_stmt.expr_list, expr); + + for (;;) { + if (!var || !expr) + break; + + VariableType *var_type = &var->variable.value_type; + const StringObject *var_name = var->variable.var_name; + + typecheck_var_assignment(container, var_type, expr, var_name); + + NEXT_PTR_LIST(var); + NEXT_PTR_LIST(expr); + } +} + +static void typecheck_expr_statement(CompilerState *container, AstNode *function, AstNode *node) +{ + if (node->expression_stmt.var_expr_list) + typecheck_ast_list(container, function, node->expression_stmt.var_expr_list); + typecheck_ast_list(container, function, node->expression_stmt.expr_list); + + if (!node->expression_stmt.var_expr_list) + return; + + AstNode *var; + AstNode *expr; + PREPARE_PTR_LIST(node->expression_stmt.var_expr_list, var); + PREPARE_PTR_LIST(node->local_stmt.expr_list, expr); + + for (;;) { + if (!var || !expr) + break; + + VariableType *var_type = &var->common_expr.type; + const StringObject *var_name = NULL; // FIXME how do we get this? + + typecheck_var_assignment(container, var_type, expr, var_name); + + NEXT_PTR_LIST(var); + NEXT_PTR_LIST(expr); + } +} + +static void typecheck_for_in_statment(CompilerState *container, AstNode *function, + AstNode *node) +{ + typecheck_ast_list(container, function, node->for_stmt.expr_list); + typecheck_ast_list(container, function, node->for_stmt.for_statement_list); +} + +static void typecheck_for_num_statment(CompilerState *container, AstNode *function, + AstNode *node) +{ + typecheck_ast_list(container, function, node->for_stmt.expr_list); + AstNode *expr; + enum { I = 1, F = 2, A = 4 }; /* bits representing integer, number, any */ + int index_type = 0; + FOR_EACH_PTR(node->for_stmt.expr_list, expr) + { + switch (expr->common_expr.type.type_code) { + case RAVI_TNUMFLT: + index_type |= F; + break; + case RAVI_TNUMINT: + index_type |= I; + break; + default: + index_type |= A; + break; + } + if ((index_type & A) != 0) + break; + } + END_FOR_EACH_PTR(expr); + if ((index_type & A) == 0) { /* not any */ + /* for I+F we use F */ + ravitype_t symbol_type = index_type == I ? RAVI_TNUMINT : RAVI_TNUMFLT; + LuaSymbolList *symbols = node->for_stmt.symbols; + LuaSymbol *sym; + /* actually there will be only index variable */ + FOR_EACH_PTR(symbols, sym) + { + if (sym->symbol_type == SYM_LOCAL) { + set_typecode(&sym->variable.value_type, symbol_type); + } else { + assert(0); /* cannot happen */ + } + } + END_FOR_EACH_PTR(sym); + } + typecheck_ast_list(container, function, node->for_stmt.for_statement_list); +} + +static void typecheck_if_statement(CompilerState *container, AstNode *function, AstNode *node) +{ + AstNode *test_then_block; + FOR_EACH_PTR(node->if_stmt.if_condition_list, test_then_block) + { + typecheck_ast_node(container, function, test_then_block->test_then_block.condition); + typecheck_ast_list(container, function, test_then_block->test_then_block.test_then_statement_list); + } + END_FOR_EACH_PTR(node); + if (node->if_stmt.else_statement_list) { + typecheck_ast_list(container, function, node->if_stmt.else_statement_list); + } +} + +static void typecheck_while_or_repeat_statement(CompilerState *container, AstNode *function, + AstNode *node) +{ + typecheck_ast_node(container, function, node->while_or_repeat_stmt.condition); + if (node->while_or_repeat_stmt.loop_statement_list) { + typecheck_ast_list(container, function, node->while_or_repeat_stmt.loop_statement_list); + } +} + +/* Type checker - WIP */ +static void typecheck_ast_node(CompilerState *container, AstNode *function, AstNode *node) +{ + switch (node->type) { + case EXPR_FUNCTION: { + /* args need type assertions but those have no ast - i.e. code gen should do it */ + typecheck_ast_list(container, function, node->function_expr.function_statement_list); + break; + } + case AST_NONE: { + break; + } + case STMT_RETURN: { + typecheck_ast_list(container, function, node->return_stmt.expr_list); + break; + } + case STMT_LOCAL: { + typecheck_local_statement(container, function, node); + break; + } + case STMT_FUNCTION: { + typecheck_ast_node(container, function, node->function_stmt.function_expr); + break; + } + case STMT_LABEL: { + break; + } + case STMT_GOTO: { + break; + } + case STMT_DO: { + break; + } + case STMT_EXPR: { + typecheck_expr_statement(container, function, node); + break; + } + case STMT_IF: { + typecheck_if_statement(container, function, node); + break; + } + case STMT_WHILE: + case STMT_REPEAT: { + typecheck_while_or_repeat_statement(container, function, node); + break; + } + case STMT_FOR_IN: { + typecheck_for_in_statment(container, function, node); + break; + } + case STMT_FOR_NUM: { + typecheck_for_num_statment(container, function, node); + break; + } + case EXPR_SUFFIXED: { + typecheck_suffixedexpr(container, function, node); + break; + } + case EXPR_FUNCTION_CALL: { + if (node->function_call_expr.method_name) { + } else { + } + typecheck_ast_list(container, function, node->function_call_expr.arg_list); + break; + } + case EXPR_SYMBOL: { + /* symbol type should have been set when symbol was created */ + if (node->symbol_expr.var->symbol_type != SYM_LABEL) { + copy_type(&node->symbol_expr.type, &node->symbol_expr.var->variable.value_type); + } + else { + // TODO can this happen? + node->symbol_expr.type.type_code = RAVI_TANY; + } + break; + } + case EXPR_BINARY: { + typecheck_binary_operator(container, function, node); + break; + } + case EXPR_UNARY: { + typecheck_unary_operator(container, function, node); + break; + } + case EXPR_LITERAL: { + /* type set during parsing */ + break; + } + case EXPR_FIELD_SELECTOR: { + typecheck_ast_node(container, function, node->index_expr.expr); + break; + } + case EXPR_Y_INDEX: { + typecheck_ast_node(container, function, node->index_expr.expr); + break; + } + case EXPR_TABLE_ELEMENT_ASSIGN: { + if (node->table_elem_assign_expr.key_expr) { + typecheck_ast_node(container, function, node->table_elem_assign_expr.key_expr); + } + typecheck_ast_node(container, function, node->table_elem_assign_expr.value_expr); + copy_type(&node->table_elem_assign_expr.type, &node->table_elem_assign_expr.value_expr->common_expr.type); + break; + } + case EXPR_TABLE_LITERAL: { + typecheck_ast_list(container, function, node->table_expr.expr_list); + break; + } + default: + assert(0); + } +} + +/* Type checker - WIP */ +static void typecheck_function(CompilerState *container, AstNode *func) +{ + typecheck_ast_list(container, func, func->function_expr.function_statement_list); +} + +/* Type checker - WIP */ +int raviX_ast_typecheck(CompilerState *container) +{ + AstNode *main_function = container->main_function; + raviX_buffer_reset(&container->error_message); + int rc = setjmp(container->env); + if (rc == 0) { + typecheck_function(container, main_function); + } + return rc; +}