reformat docs in sphinx format

pull/81/head
dibyendumajumdar 9 years ago
parent b791c616ee
commit 7583b66858

@ -0,0 +1,177 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/RaviProgrammingLanguage.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/RaviProgrammingLanguage.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/RaviProgrammingLanguage"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/RaviProgrammingLanguage"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

@ -0,0 +1,258 @@
# -*- coding: utf-8 -*-
#
# Ravi Programming Language documentation build configuration file, created by
# sphinx-quickstart on Thu Feb 5 03:29:17 2015.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = []
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Ravi Programming Language'
copyright = u'2015, Dibyendu Majumdar'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.1'
# The full version, including alpha/beta/rc tags.
release = '0.1'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'RaviProgrammingLanguagedoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'RaviProgrammingLanguage.tex', u'Ravi Programming Language Documentation',
u'Dibyendu Majumdar', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'raviprogramminglanguage', u'Ravi Programming Language Documentation',
[u'Dibyendu Majumdar'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'RaviProgrammingLanguage', u'Ravi Programming Language Documentation',
u'Dibyendu Majumdar', 'RaviProgrammingLanguage', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False

@ -0,0 +1,25 @@
.. Ravi Programming Language documentation master file, created by
sphinx-quickstart on Thu Feb 5 03:29:17 2015.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to Ravi Programming Language's documentation!
=====================================================
Contents:
.. toctree::
:maxdepth: 2
ravi-overview
lua-parser
ravi-internals
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

@ -0,0 +1,444 @@
=============
Lua Internals
=============
As I learn more about Lua internals I will make notes here.
Stack and Registers
===================
There are two stacks.
The ``Callinfo`` stack tracks activation frames.
There is the secondary stack ``L->stack`` that is an array of ``TValue`` objects. The ``Callinfo`` objects index into this array. Registers are basically slots in the ``L->stack`` array.
When a function is called - the stack is setup as follows::
stack
| function reference
| base-> parameter 1
| ...
| parameter n
| local 1
| ...
| local n
| top->
|
V
So top is just past the registers needed by the function.
The number of registers is determined based on locals and temporaries.
The base of the stack is set to just past the function reference - i.e. on the first parameter or register.
All register addressing is done as offset from base - so ``R(0)`` is at ``base+0`` on the stack.
See `LuaNua <http://homepages.dcc.ufmg.br/~anolan/research/luanua:start>`_.
A description of the stack and registers from Mike Pall on Lua mailing list is reproduced below.
Sliding Register Window - by Mike Pall
--------------------------------------
Note: this is a reformatted version of a post on Lua mailing list (see MP6 link below).
The Lua 5 VM employs a sliding register window on top of a stack. Frames
(named CallInfo aka 'ci' in the source) occupy different (overlapping)
ranges on the stack. Successive frames are positioned exactly over the
passed arguments (luaD_precall). The compiler ensures that there are no
live variables after the arguments for a call. Return values need to be
copied down (with truncate/extend) to the slot holding the function object
(luaD_poscall). This is because the compiler has no idea how many values
another function may return -- only how many need to be stored.
Example::
function f2(arg1, arg2, ..., argN)
local local1, local2, ...
...
return ret1, ret2, ..., retO
end
function f1(arg1, arg2, ..., argM)
local local1, local2, ...
...
local ret1, ret2, ..., retP = f2(arg1, arg2, ..., argN)
...
end
Simplified stack diagram::
stack
|
| time: >>>> call >>>>>>>>>>>>>>>>>> call ~~~~~~~~~~~~~~~~~~ return >>>>>
|
| ciX.func-> f1 f1 f1 f1
| ciX.base-> arg1 arg1 arg1 arg1
| arg2 arg2 arg2 arg2
| ... ... ... ...
| argM argM argM argM
| ciX.topC-> local1 local1 local11
| local2 local2 local2
| local3 local3 local3
| ... ... ...
| f2 ciY.func-> f2 f2 ret1
| arg1 ciY.base-> arg1 arg1 ret2
| arg2 arg2 arg2 ...
| ... ... ... retP
| argN argN argN
| ciX.topL-> ------ ------ ------ ciY.topC-> local1 local1
| local2 local2
| ... ...
| ret1
| ret2
| ...
| retO
| ciY.topL-> ------ ------
V
Note that there is only a single 'top' for each frame:
For Lua functions the top (tagged topL in the diagram) is set to the base
plus the maximum number of slots used. The compiler knows this and stores
it in the function prototype. The top pointer is used only temporarily
for handling variable length argument and return value lists.
For C functions the top (tagged topC in the diagram) is initially set to
the base plus the number of passed arguments. C functions can access their
part of the stack via Lua API calls which in turn change the stack top.
C functions return an integer that indicates the number of return values
relative to the stack top.
In reality things are a bit more complex due to overlapped locals, block
scopes, varargs, coroutines and a few other things. But this should get
you the basic idea.
> Do you deviate from the typical register based architecture in
> that case to save memory traffic?
I think the architecture is pretty unique as far as VMs go. Some CPUs
have sliding register windows, but this gets quite complicated since they
need to spill/fill registers to/from the stack. A VM can of course use an
unbounded (reallocated) stack on the heap.
> 2. As far as I learned you do instruction encoding close to hardware
> architectures. Therefore you always have to decode the opcode in contrast to
> the JVM where opcode and arguments are stored in several independent bytes.
> Is opcode decoding cheap (one might forgive my poor knowledge of C operator
> performance;-)?
All instructions are 32 bit. The current layout as of Lua 5.1work4 is::
BBBBBBBB BCCCCCCC CCAAAAAA AAOOOOOO ABC format
BBBBBBBB BBBBBBBB BBAAAAAA AAOOOOOO ABx format
sBBBBBBB BBBBBBBB BBAAAAAA AAOOOOOO AsBx format
Fetching a 32 bit value once from memory and then extracting the bits to
other registers is cheaper than doing single-byte fetches for variable
length operands. Byte alignment does not matter at all (word alignment does).
Memory bandwith is usually not an issue for VM instructions since there
is so much else going on for each instruction. It's much more important
to keep the execution units busy by avoiding interlocks caused by memory
fetches. Tuning the code to make it easy for the compiler to generate
good code is another issue (the Lua authors have done quite a bit of
tuning in some important spots).
Parsing and Code Generation
===========================
* The parser is in `lparser.c <http://www.lua.org/source/5.3/lparser.c.html>`_.
* The code generator is in both above and `lcode.c <http://www.lua.org/source/5.3/lcode.c.html>`_.
The parser and code generator are arguably the most complex piece in the whole of Lua. The parser is one-pass - and generates code as it parses. That is, there is no AST build phase. This is primarily for efficiency it seems. The parser uses data structures on the stack - there are no heap allocated structures. Where needed the C stack itself is used to build structures - for example, as the assignment statement is parsed, there is recursion, and a stack based structure is built that links to structures in the call stack.
The main object used by the parser is the ``struct expdesc``::
typedef struct expdesc {
expkind k;
union {
struct { /* for indexed variables (VINDEXED) */
short idx; /* index (R/K) */
lu_byte t; /* table (register or upvalue) */
lu_byte vt; /* whether 't' is register (VLOCAL) or upvalue (VUPVAL) */
} ind;
int info; /* for generic use */
lua_Number nval; /* for VKFLT */
lua_Integer ival; /* for VKINT */
} u;
int t; /* patch list of 'exit when true' */
int f; /* patch list of 'exit when false' */
int ravi_type; /* RAVI change: type of the expression if known, else LUA_TNONE */
} expdesc;
The code is somewhat hard to follow as the ``expdesc`` objects go through various states and are also reused when needed.
As the parser generates code while parsing it needs to go back and patch the generated instructions when it has more information. For example when a function call is parsed the parser assumes that only 1 value is expected to be returned - but later this is patched when more information is available. The most common example is when the register where the value will be stored (operand A) is not known - in this case the parser later on updates this operand in the instruction. I believe jump statements have similar mechanics - however I have not yet gone through the details of these instructions.
Handling of Stack during parsing
--------------------------------
Functions have a register window on the stack.
The stack is represented in ``LexState->dyd.actvar`` (Dyndata)
structure (see llex.h). The register window of the function
starts from ``LexState->dyd.actvar.arr[firstlocal]``.
The 'active' local variables
of the function extend up to ``LexState->dyd.actvar.arr[nactvar-1]``. Note that
when parsing a ``local`` declaration statement the ``nactvar`` is adjusted at the end of
the statement so that during parsing of the statement the ``nactvar``
covers locals up to the start of the statement. This means that
local variables come into scope (become 'active') after the local statement ends.
However, if the local statement defines a function then the variable becomes 'active'
before the function body is parsed.
A tricky thing to note is that while ``nactvar`` is adjusted at the end of the
statement - the 'stack' as represented by ``LexState->dyd.actvar.arr`` is extended to the required
size as the local variables are created by ``new_localvar()``.
When a function is the topmost function being parsed, the
registers between ``LexState->dyd.actvar.arr[nactvar]`` and ``LexState->dyd.actvar.arr[freereg-1]``
are used by the parser for evaluating expressions - i.e. these are part of the
local registers available to the function
Note that function parameters are handled as locals.
Example of what all this mean. Let's say we are parsing following chunk of code::
function testfunc()
-- at this stage 'nactvar' is 0 (no active variables)
-- 'firstlocal' is set to current top of the variables stack
-- LexState->dyd.actvar.n (i.e. excluding registers used for expression evaluation)
-- LexState->dyd.actvar.n = 0 at this stage
local function tryme()
-- Since we are inside the local statement and 'tryme' is a local variable,
-- the LexState->dyd.actvar.n goes to 1. As this is a function definition
-- the local variable declaration is deemed to end here, so 'nactvar' for testfunc()
-- is gets set to 1 (making 'tryme' an active variable).
-- A new FuncState is created for 'tryme' function.
-- The new tryme() FunState has 'firstlocal' set to value of LexState->dyd.actvar.n, i.e., 1
local i,j = 5,6
-- After 'i' is parsed, LexState->dyd.actvar.n = 2, but 'nactvar' = 0 for tryme()
-- After 'j' is parsed, LexState->dyd.actvar.n = 3, but 'nactvar' = 0 for tryme()
-- Only after the full statement above is parsed, 'nactvar' for tryme() is set to '2'
-- This is done by adjustlocalvar().
return i,j
end
-- Here two things happen
-- Firstly the FuncState for tryme() is popped so that
-- FuncState for testfunc() is now at top
-- As part of this popping, leaveblock() calls removevars()
-- to adjust the LexState->dyd.actvar.n down to 1 where it was
-- at before parsing the tryme() function body.
local i, j = tryme()
-- After 'i' is parsed, LexState->dyd.actvar.n = 2, but 'nactvar' = 1 still
-- After 'j' is parsed, LexState->dyd.actvar.n = 3, but 'nactvar' = 1 still
-- At the end of the statement 'nactvar' is set to 3.
return i+j
end
-- As before the leaveblock() calls removevars() which resets
-- LexState->dyd.actvar.n to 0 (the value before testfunc() was parsed)
A rough debug trace of the above gives::
function testfunc()
-- open_func -> fs->firstlocal set to 0 (ls->dyd->actvar.n), and fs->nactvar reset to 0
local function tryme()
-- new_localvar -> registering var tryme fs->f->locvars[0] at ls->dyd->actvar.arr[0]
-- new_localvar -> ls->dyd->actvar.n set to 1
-- adjustlocalvars -> set fs->nactvar to 1
-- open_func -> fs->firstlocal set to 1 (ls->dyd->actvar.n), and fs->nactvar reset to 0
-- adjustlocalvars -> set fs->nactvar to 0 (no parameters)
local i,j = 5,6
-- new_localvar -> registering var i fs->f->locvars[0] at ls->dyd->actvar.arr[1]
-- new_localvar -> ls->dyd->actvar.n set to 2
-- new_localvar -> registering var j fs->f->locvars[1] at ls->dyd->actvar.arr[2]
-- new_localvar -> ls->dyd->actvar.n set to 3
-- adjustlocalvars -> set fs->nactvar to 2
return i,j
-- removevars -> reset fs->nactvar to 0
end
local i, j = tryme()
-- new_localvar -> registering var i fs->f->locvars[1] at ls->dyd->actvar.arr[1]
-- new_localvar -> ls->dyd->actvar.n set to 2
-- new_localvar -> registering var j fs->f->locvars[2] at ls->dyd->actvar.arr[2]
-- new_localvar -> ls->dyd->actvar.n set to 3
-- adjustlocalvars -> set fs->nactvar to 3
return i+j
-- removevars -> reset fs->nactvar to 0
end
Notes on Parser by Sven Olsen
-----------------------------
"discharging" expressions
~~~~~~~~~~~~~~~~~~~~~~~~~
"discharging" takes an expression of arbitrary type, and
converts it to one having particular properties.
the lowest-level discharge function is ``discharge2vars ()``,
which converts an expression into one of the two "result"
types; either a ``VNONRELOC`` or a ``VRELOCABLE``.
if the variable in question is a ``VLOCAL``, ``discharge2vars``
will simply change the stored type to ``VNONRELOC``.
much of lcode.c assumes that the it will be working with
discharged expressions. in particular, it assumes that if
it encounters a ``VNONRELOC`` expression, and ``e->info < nactvar``,
then the register referenced is a local, and therefore
shouldn't be implicitly freed after use.
local variables
~~~~~~~~~~~~~~~
however, the relationship between ``nactvar`` and locals is
actually somewhat more complex -- as each local variable
appearing in the code has a collection of data attached to
it, data that's being accumulated and changed as the lexer
moves through the source.
``fs->nlocvars`` stores the total number of named locals inside
the function -- recall that different local variables are
allowed to overlap the same register, depending on which
are in-scope at any particular time.
the list of locals that are active at any given time is
stored in ``ls->dyd`` -- a vector of stack references that grows
or shrinks as locals enter or leave scope.
managing the lifetime of local variables involves several
steps. first, new locals are declared using ``new_localvar``.
this sets their names and creates new references in ``dyd``.
soon thereafter, the parser is expected to call
``adjustlocalvar(ls,nvars)``, with ``nvars`` set to the number of
new locals. ``adjustlocalvar`` increments ``fs->nactvar`` by ``nvars``,
and marks the startpc's of all the locals.
note that neither ``new_localvar`` or ``adjustlocalvar`` ensures
that anything is actually inside the registers being labeled
as locals. failing to initialize said registers is an easy
way to write memory access bugs (peter's original table
unpack patch includes one such).
after ``adjustlocalvar`` is called, ``luaK_exp2nextreg()`` will no
longer place new data inside the local's registers -- as
they're no longer part of the temporary register stack.
when the time comes to deactivate locals, that's done via
``removevars(tolevel)``. ``tolevel`` is assumed to contain ``nactvars``
as it existed prior to entering the previous block. thus,
the number of locals to remove should simply be
``fs->nactvar-tolevel``. ``removevars(tolevel)`` will decrement
``nactvars`` down to ``tolevel``. it also shrinks the ``dyd`` vector,
and marks the endpc's of all the removed locals.
except in between ``new_localvar`` and ``adjustlocalvar`` calls, i
believe that::
fs->ls->dyd->actvar.n - fs->firstlocal == fs->nactvar
temporary registers
~~~~~~~~~~~~~~~~~~~
``freereg`` is used to manage the temporary register stack --
registers between [``fs->nactvars,fs->freereg``) are assumed to
belong to expressions currently being stored by the parser.
``fs->freereg`` is incremented explicitly by calls to
``luaK_reserveregs``, or implicitly, inside ``luaK_exp2nextreg``.
it's decremented whenever a ``freereg(r)`` is called on a
register in the temporary stack (i.e., a register for which
``r >= fs->nactvar``).
the temporary register stack is cleared when ``leaveblock()`` is
called, by setting ``fs->freereg=fs->nactvar``. it's also
partially cleared in other places -- for example, inside
the evaluation of table constructors.
note that ``freereg`` just pops the top of the stack if r does
not appear to be a local -- thus it doesn't necessarily,
free r. one of the important sanity checks that you'll get
by enabling ``lua_assert()`` checks that the register being
freed is also the top of the stack.
when writing parser patches, it's your job to ensure that
the registers that you've reserved are freed in an
appropriate order.
when a ``VINDEXED`` expression is discharged, ``freereg()`` will be
called on both the table and the index register. otherwise,
``freereg`` is only called from ``freeexp()`` -- which gets
triggered anytime an expression has been "used up";
typically, anytime it's been transformed into another
expression.
State Transitions
-----------------
The state transitions for ``expdesc`` structure are as follows:
expkind | Description | State Transitions
------------------ | ---------------------------------------| -----------------
``VVOID`` | This is used to indicate the lack of value - e.g. function call with no arguments, the rhs of local variable declaration, and empty table constructor | None
``VRELOCABLE`` | This is used to indicate that the result from expression needs to be set to a register. The operation that created the expression is referenced by the ``u.info`` parameter which contains an offset into the ``code`` of the function that is being compiled. So you can access this instruction by calling ``getcode(FuncState *, expdesc *)`` The operations that result in a ``VRELOCABLE`` object include ``OP_CLOSURE`` ``OP_NEWTABLE`` ``OP_GETUPVAL`` ``OP_GETTABUP`` ``OP_GETTABLE`` ``OP_NOT`` and code for binary and unary expressions that produce values (arithmetic operations, bitwise operations, concat, length). The associated code instruction has operand ``A`` unset (defaulted to 0) - this the ``VRELOCABLE`` expression must be later transitioned to ``VNONRELOC`` state when the register is set. | In terms of transitions the following expression kinds convert to ``VRELOCABLE``: ``VVARARG`` ``VUPVAL`` (``OP_GETUPVAL`` ``VINDEXED`` (``OP_GETTABUP`` or ``OP_GETTABLE`` And following expression states can result from a ``VRELOCABLE`` expression: ``VNONRELOC`` which means that the result register in the instruction operand A has been set.
``VNONRELOC`` | This state indicates that the output or result register has been set. The register is referenced in ``u.info`` parameter. Once set the register cannot be changed for this expression - subsequent operations involving this expression can refer to the register to obtain the result value. | As for transitions, the ``VNONELOC`` state results from ``VRELOCABLE`` after a register is assigned to the operation referenced by ``VRELOCABLE`` Also a ``VCALL`` expression transitions to ``VNONRELOC`` expression - ``u.info`` is set to the operand ``A`` of the associated call instruction. ``VLOCAL`` ``VNIL`` ``VTRUE`` ``VFALSE`` ``VK`` ``VKINT`` ``VKFLT`` and ``VJMP`` expressions transition to ``VNONRELOC``
``VLOCAL`` | This is used when referencing local variables. ``u.info`` is set to the local variable's register. | The ``VLOCAL`` expression may transition to ``VNONRELOC`` although this doesn't change the ``u.info`` parameter.
``VCALL`` | This results from a function call. The ``OP_CALL`` instruction is referenced by ``u.info`` parameter and may be retrieved by calling ``getcode(FuncState *, expdesc *)`` The ``OP_CALL`` instruction gets changed to ``OP_TAILCALL`` if the function call expression is the value of a ``RETURN`` statement. The instructions operand ``C`` gets updated when it is known the number of expected results from the function call. | In terms of transitions, the ``VCALL`` expression transitions to ``VNONRELOC`` When this happens the result register in ``VNONRELOC`` (``u.info`` is set to the operand ``A`` in the ``OP_CALL`` instruction.
``VINDEXED`` | This expression represents a table access. The ``u.ind.t`` parameter is set to the register or upvalue? that holds the table, the ``u.ind.idx`` is set to the register or constant that is the key, and ``u.ind.vt`` is either ``VLOCAL`` or ``VUPVAL`` | The ``VINDEXED`` expression transitions to ``VRELOCABLE`` When this happens the ``u.info`` is set to the offset of the code that contains the opcode ``OP_GETTABUP`` if ``u.ind.vt`` was ``VUPVAL`` or ``OP_GETTABLE`` if ``u.ind.vt`` was ``VLOCAL``
Examples of Parsing
-------------------
example 1
~~~~~~~~~
We investigate the simple code chunk below::
local i,j; j = i*j+i
The compiler allocates following local registers, constants and upvalues::
constants (0) for 0000007428FED950:
locals (2) for 0000007428FED950:
0 i 2 5
1 j 2 5
upvalues (1) for 0000007428FED950:
0 _ENV 1 0
Some of the parse steps are highlighted below.
Reference to variable ``i`` which is located in register ``0``. The ``p`` here is the pointer address of ``expdesc`` object so you can see how the same object evolves::
{p=0000007428E1F170, k=VLOCAL, register=0}
Reference to variable ``j`` located in register ``1``::
{p=0000007428E1F078, k=VLOCAL, register=1}
Now the MUL operator is applied so we get following. Note that the previously ``VLOCAL`` expression for ``i`` is now ``VNONRELOC``::
{p=0000007428E1F170, k=VNONRELOC, register=0} MUL {p=0000007428E1F078, k=VLOCAL, register=1}
Next code gets generated for the ``MUL`` operator and we can see that first expression is replaced by a ``VRELOCABLE`` expression. Note also that the ``MUL`` operator is encoded in the ``VRELOCABLE`` expression as instruction ``1`` which is decoded below::
{p=0000007428E1F170, k=VRELOCABLE, pc=1, instruction=(MUL A=0 B=0 C=1)}
Now a reference to ``i`` is again required::
{p=0000007428E1F078, k=VLOCAL, register=0}
And the ``ADD`` operator must be applied to the result of the ``MUL`` operator and above. Notice that a temporary register ``2`` has been allocated to hold the result of the ``MUL`` operator, and also notice that as a result the ``VRELOCABLE`` has now changed to ``VNONRELOC``::
{p=0000007428E1F170, k=VNONRELOC, register=2} ADD {p=0000007428E1F078, k=VLOCAL, register=0}
Next the result of the ``ADD`` expression gets encoded similarly to ``MUL`` earlier. As this is a ``VRELOCABLE`` expression it will be later on assigned a result register::
{p=0000007428E1F170, k=VRELOCABLE, pc=2, instruction=(ADD A=0 B=2 C=0)}
Eventually above gets assigned a result register and becomes ``VNONRELOC`` (not shown here) - and so the final generated code looks like below::
main <(string):0,0> (4 instructions at 0000007428FED950)
0+ params, 3 slots, 1 upvalue, 2 locals, 0 constants, 0 functions
1 [1] LOADNIL 0 1
2 [1] MUL 2 0 1
3 [1] ADD 1 2 0
4 [1] RETURN 0 1
Links
=====
* `(MP1) Lua Code Reading Order <http://www.reddit.com/comments/63hth/ask_reddit_which_oss_codebases_out_there_are_so/c02pxbpC>`_
* `(RL1) Registers allocation and GC <http://lua-users.org/lists/lua-l/2013-02/msg00075.html>`_
* `(MP2) LuaJIT interpreter optmisations <http://www.reddit.com/r/programming/comments/badl2/luajit_2_beta_3_is_out_support_both_x32_x64/c0lrus0>`_
* `(MP3) Performance of Switch Based Dispatch <http://lua-users.org/lists/lua-l/2011-02/msg00742.html>`_
* `(MP4) Challenges for static compilation of dynamic langauges <http://lua-users.org/lists/lua-l/2009-06/msg00071.html>`_
* `(MP5) VM Internals (bytecode format) <http://lua-users.org/lists/lua-l/2008-07/msg00651.html>`_
* `(RL2) Upvalues in closures <http://lua-users.org/lists/lua-l/2008-09/msg00076.html>`_
* `(LHF) Lua bytecode dump format <http://lua-users.org/lists/lua-l/2006-06/msg00205.html>`_
* `(MP6) Register VM and sliding stack window <http://lua-users.org/lists/lua-l/2005-01/msg00628.html>`_
* `(SO1) Sven Olsen's notes on registers <http://lua-users.org/files/wiki_insecure/power_patches/5.2/svenshacks-5.2.2.patch>`_ from `Sven Olsen's Lua Users Wiki page <http://lua-users.org/wiki/SvenOlsen>`_
* `(KHM) No Frills Introduction to Lua 5.1 VM Instructions <http://luaforge.net/docman/83/98/ANoFrillsIntroToLua51VMInstructions.pdf>`_

@ -0,0 +1,242 @@
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. xml to make Docutils-native XML files
echo. pseudoxml to make pseudoxml-XML files for display purposes
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
%SPHINXBUILD% 2> nul
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\RaviProgrammingLanguage.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\RaviProgrammingLanguage.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdfja" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf-ja
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
if "%1" == "xml" (
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The XML files are in %BUILDDIR%/xml.
goto end
)
if "%1" == "pseudoxml" (
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
goto end
)
:end

@ -0,0 +1,777 @@
===========================
Ravi Implementation Details
===========================
As I progress with Ravi I will document the design and implementation details here.
Type Information
================
The basic first step is to add type information to Lua.
As the parser progresses it creates a vector of ``LocVar`` for each function containing a list of local variables. I have enhanced ``LocVar`` structure in ``lobject.h`` to hold type information.
::
/* Following are the types we will use
** use in parsing. The rationale for types is
** performance - as of now these are the only types that
** we care about from a performance point of view - if any
** other types appear then they are all treated as ANY
**/
typedef enum {
RAVI_TANY, /* Lua dynamic type */
RAVI_TNUMINT, /* integer number */
RAVI_TNUMFLT, /* floating point number */
RAVI_TARRAYINT, /* array of ints */
RAVI_TARRAYFLT, /* array of doubles */
RAVI_TFUNCTION,
RAVI_TTABLE,
RAVI_TSTRING,
RAVI_TNIL,
RAVI_TBOOLEAN
} ravitype_t;
/*
** Description of a local variable for function prototypes
** (used for debug information)
*/
typedef struct LocVar {
TString *varname;
int startpc; /* first point where variable is active */
int endpc; /* first point where variable is dead */
ravitype_t ravi_type; /* RAVI type of the variable - RAVI_TANY if unknown */
} LocVar;
The ``expdesc`` structure is used by the parser to hold nodes in the expression tree. I have enhanced the ``expdesc`` structure to hold the type of an expression.
::
typedef struct expdesc {
expkind k;
union {
struct { /* for indexed variables (VINDEXED) */
short idx; /* index (R/K) */
lu_byte t; /* table (register or upvalue) */
lu_byte vt; /* whether 't' is register (VLOCAL) or upvalue (VUPVAL) */
ravitype_t key_type; /* key type */
} ind;
int info; /* for generic use */
lua_Number nval; /* for VKFLT */
lua_Integer ival; /* for VKINT */
} u;
int t; /* patch list of 'exit when true' */
int f; /* patch list of 'exit when false' */
ravitype_t ravi_type; /* RAVI change: type of the expression if known, else RAVI_TANY */
} expdesc;
Note the addition of type information in two places. Firstly at the ``expdesc`` level which identifies the type of the ``expdesc``. Secondly in the `ind` structure - the ``key_type`` is used to track the type of the key that will be used to index into a table.
The table structure has been enhanced to hold additional information for array usage.
::
typedef struct Table {
CommonHeader;
lu_byte flags; /* 1<<p means tagmethod(p) is not present */
lu_byte lsizenode; /* log2 of size of 'node' array */
unsigned int sizearray; /* size of 'array' array */
TValue *array; /* array part */
Node *node;
Node *lastfree; /* any free position is before this position */
struct Table *metatable;
GCObject *gclist;
ravitype_t ravi_array_type; /* RAVI specialization */
unsigned int ravi_array_len; /* RAVI len specialization */
} Table;
Parser Enhancements
===================
The parser needs to be enhanced to generate type specific instructions at various points.
Local Variable Declarations
---------------------------
First enhancement needed is when local variable declarations are parsed. We need to allow the type to be defined for each variable and ensure that any assignments are type-checked. This is somewhat complex process, due to the fact that assignments can be expressions involving function calls. The last function call is treated as a variable assignment - i.e. all trailing variables are assumed to be assigned values from the function call - if not the variables are set to nil by default.
The entry point for parsing a local statement is ``localstat()`` in ``lparser.c``. This function has been enhanced to parse the type annotations supported by Ravi. The modified function is shown below.
::
static void localstat (LexState *ls) {
/* stat -> LOCAL NAME {',' NAME} ['=' explist] */
int nvars = 0;
int nexps;
expdesc e;
e.ravi_type = RAVI_TANY;
int vars[MAXVARS] = { 0 };
do {
/* RAVI changes start */
/* local name : type = value */
TString *name = str_checkname(ls);
ravitype_t tt = RAVI_TANY;
if (testnext(ls, ':')) {
TString *typename = str_checkname(ls); /* we expect a type name */
const char *str = getaddrstr(typename);
if (strcmp(str, "int") == 0)
tt = RAVI_TNUMINT;
else if (strcmp(str, "double") == 0)
tt = RAVI_TNUMFLT;
if (tt == RAVI_TNUMFLT || tt == RAVI_TNUMINT) {
if (testnext(ls, '[')) {
checknext(ls, ']');
tt = (tt == RAVI_TNUMFLT) ? RAVI_TARRAYFLT : RAVI_TARRAYINT;
}
}
}
new_localvar(ls, name, tt);
vars[nvars] = tt;
/* RAVI changes end */
nvars++;
} while (testnext(ls, ','));
if (testnext(ls, '='))
nexps = localvar_explist(ls, &e, vars, nvars);
else {
e.k = VVOID;
nexps = 0;
}
localvar_adjust_assign(ls, nvars, nexps, &e);
adjustlocalvars(ls, nvars);
}
The do-while loop is responsible for parsing the variable names and the type annotations. As each variable name is parsed we detect if there is a type annotation, if and if present the type is recorded in the array ``vars``.
The type of the variable is also passed to ``new_localvar()`` which records this in the ``LocVar`` structure associated with the variable.
::
static int registerlocalvar (LexState *ls, TString *varname, int ravi_type) {
FuncState *fs = ls->fs;
Proto *f = fs->f;
int oldsize = f->sizelocvars;
luaM_growvector(ls->L, f->locvars, fs->nlocvars, f->sizelocvars,
LocVar, SHRT_MAX, "local variables");
while (oldsize < f->sizelocvars) {
/* RAVI change initialize */
f->locvars[oldsize].startpc = -1;
f->locvars[oldsize].endpc = -1;
f->locvars[oldsize].ravi_type = RAVI_TANY;
f->locvars[oldsize++].varname = NULL;
}
f->locvars[fs->nlocvars].varname = varname;
f->locvars[fs->nlocvars].ravi_type = ravi_type;
luaC_objbarrier(ls->L, f, varname);
return fs->nlocvars++;
}
/* create a new local variable in function scope, and set the
* variable type (RAVI - added type tt) */
static void new_localvar (LexState *ls, TString *name, ravitype_t tt) {
FuncState *fs = ls->fs;
Dyndata *dyd = ls->dyd;
/* register variable and get its index */
/* RAVI change - record type info for local variable */
int i = registerlocalvar(ls, name, tt);
checklimit(fs, dyd->actvar.n + 1 - fs->firstlocal,
MAXVARS, "local variables");
luaM_growvector(ls->L, dyd->actvar.arr, dyd->actvar.n + 1,
dyd->actvar.size, Vardesc, MAX_INT, "local variables");
/* variable will be placed at stack position dyd->actvar.n */
dyd->actvar.arr[dyd->actvar.n].idx = cast(short, i);
DEBUG_VARS(raviY_printf(fs, "new_localvar -> registering %v fs->f->locvars[%d] at ls->dyd->actvar.arr[%d]\n", &fs->f->locvars[i], i, dyd->actvar.n));
dyd->actvar.n++;
DEBUG_VARS(raviY_printf(fs, "new_localvar -> ls->dyd->actvar.n set to %d\n", dyd->actvar.n));
}
The next bit of change is how the expressions are handled following the ``=`` symbol. The previously built ``vars`` array is passed to a modified version of ``explist()`` called ``localvar_explist()``. This handles the parsing of expressions and then ensuring that each expression matches the type of the variable where known. The ``localvar_explist()`` function is shown next.
::
static int localvar_explist(LexState *ls, expdesc *v, int *vars, int nvars) {
/* explist -> expr { ',' expr } */
int n = 1; /* at least one expression */
expr(ls, v);
#if RAVI_ENABLED
ravi_typecheck(ls, v, vars, nvars, 0);
#endif
while (testnext(ls, ',')) {
luaK_exp2nextreg(ls->fs, v);
expr(ls, v);
#if RAVI_ENABLED
ravi_typecheck(ls, v, vars, nvars, n);
#endif
n++;
}
return n;
}
The main changes compared to ``explist()`` are the calls to ``ravi_typecheck()``. Note that the array ``vars`` is passed to the ``ravi_typecheck()`` function along with the current variable index in ``n``. The ``ravi_typecheck()`` function is reproduced below.
::
static void ravi_typecheck(LexState *ls, expdesc *v, int *vars, int nvars, int n)
{
if (n < nvars && vars[n] != RAVI_TANY && v->ravi_type != vars[n]) {
if (v->ravi_type != vars[n] &&
(vars[n] == RAVI_TARRAYFLT || vars[n] == RAVI_TARRAYINT) &&
v->k == VNONRELOC) {
/* as the bytecode for generating a table is already
* emitted by this stage we have to amend the generated byte code
* - not sure if there is a better approach.
* We look for the last bytecode that is OP_NEWTABLE
* and that has the same destination
* register as v->u.info which is our variable
* local a:int[] = { 1 }
* ^ We are just past this and
* about to assign to a
*/
int i = ls->fs->pc - 1;
for (; i >= 0; i--) {
Instruction *pc = &ls->fs->f->code[i];
OpCode op = GET_OPCODE(*pc);
int reg;
if (op != OP_NEWTABLE)
continue;
reg = GETARG_A(*pc);
if (reg != v->u.info)
continue;
op = (vars[n] == RAVI_TARRAYINT) ? OP_RAVI_NEWARRAYI : OP_RAVI_NEWARRAYF;
SET_OPCODE(*pc, op); /* modify opcode */
DEBUG_CODEGEN(raviY_printf(ls->fs, "[%d]* %o ; modify opcode\n", i, *pc));
break;
}
if (i < 0)
luaX_syntaxerror(ls, "expecting array initializer");
}
/* if we are calling a function then convert return types */
else if (v->ravi_type != vars[n] &&
(vars[n] == RAVI_TNUMFLT || vars[n] == RAVI_TNUMINT) &&
v->k == VCALL) {
/* For local variable declarations that call functions e.g.
* local i = func()
* Lua ensures that the function returns values
* to register assigned to variable i and above so that no
* separate OP_MOVE instruction is necessary. So that means that
* we need to coerce the return values in situ.
*/
/* Obtain the instruction for OP_CALL */
Instruction *pc = &getcode(ls->fs, v);
lua_assert(GET_OPCODE(*pc) == OP_CALL);
int a = GETARG_A(*pc); /* function return values
will be placed from register pointed
by A and upwards */
int nrets = GETARG_C(*pc) - 1; /* operand C contains
number of return values expected */
/* Note that at this stage nrets is always 1
* - as Lua patches in the this value for the last
* function call in a variable declaration statement
* in adjust_assign and localvar_adjust_assign */
/* all return values that are going to be assigned
to typed local vars must be converted to the correct type */
int i;
for (i = n; i < (n+nrets); i++)
/* do we need to convert ? */
if ((vars[i] == RAVI_TNUMFLT || vars[i] == RAVI_TNUMINT))
/* code an instruction to convert in place */
luaK_codeABC(ls->fs,
vars[i] == RAVI_TNUMFLT ?
OP_RAVI_TOFLT : OP_RAVI_TOINT,
a+(i-n), 0, 0);
else if ((vars[i] == RAVI_TARRAYFLT || vars[i] == RAVI_TARRAYINT))
/* code an instruction to convert in place */
luaK_codeABC(ls->fs,
vars[i] == RAVI_TARRAYFLT ?
OP_RAVI_TOARRAYF : OP_RAVI_TOARRAYI,
a + (i - n), 0, 0);
}
else if ((vars[n] == RAVI_TNUMFLT || vars[n] == RAVI_TNUMINT) &&
v->k == VINDEXED) {
if (vars[n] == RAVI_TNUMFLT && v->ravi_type != RAVI_TARRAYFLT ||
vars[n] == RAVI_TNUMINT && v->ravi_type != RAVI_TARRAYINT)
luaX_syntaxerror(ls, "Invalid local assignment");
}
else
luaX_syntaxerror(ls, "Invalid local assignment");
}
}
There are several parts to this function.
The simple case is when the type of the expression matches the variable.
Secondly if the expression is a table initializer then we need to generate specialized opcodes if the target variable is supposed to be ``int[]`` or ``double[]``. The specialized opcode sets up some information in the ``Table`` structure. The problem is that this requires us to modify ``OP_NEWTABLE`` instruction which has already been emitted. So we scan the generated instructions to find the last ``OP_NEWTABLE`` instruction that assigns to the register associated with the target variable.
Next bit of special handling is for function calls. If the assignment makes a function call then we perform type coercion on return values where these values are being assigned to variables with defined types. This means that if the target variable is ``int`` or ``double`` we issue opcodes ``TOINT`` and ``TOFLT`` respectively. If the target variable is ``int[]`` or ``double[]`` then we issue ``TOARRAYI`` and ``TOARRAYF`` respectively. These opcodes ensure that the values are of required type or can be cast to the required type.
Note that any left over variables that are not assigned values, are set to 0 if they are of int or double type, else they are set to nil as per Lua's default behavior. This is handled in ``localvar_adjust_assign()`` which is described later on.
Finally the last case is when the target variable is ``int`` or ``double`` and the expression is a table / array access. In this case we check that the table is of required type.
The ``localvar_adjust_assign()`` function referred to above is shown below.
::
static void localvar_adjust_assign(LexState *ls, int nvars, int nexps, expdesc *e) {
FuncState *fs = ls->fs;
int extra = nvars - nexps;
if (hasmultret(e->k)) {
extra++; /* includes call itself */
if (extra < 0) extra = 0;
/* following adjusts the C operand in the OP_CALL instruction */
luaK_setreturns(fs, e, extra); /* last exp. provides the difference */
#if RAVI_ENABLED
/* Since we did not know how many return values to process in localvar_explist() we
* need to add instructions for type coercions at this stage for any remaining
* variables
*/
ravi_coercetype(ls, e, extra);
#endif
if (extra > 1) luaK_reserveregs(fs, extra - 1);
}
else {
if (e->k != VVOID) luaK_exp2nextreg(fs, e); /* close last expression */
if (extra > 0) {
int reg = fs->freereg;
luaK_reserveregs(fs, extra);
/* RAVI TODO for typed variables we should not set to nil? */
luaK_nil(fs, reg, extra);
#if RAVI_ENABLED
/* typed variables that are primitives cannot be set to nil so
* we need to emit instructions to initialise them to default values
*/
ravi_setzero(fs, reg, extra);
#endif
}
}
}
As mentioned before any variables left over in a local declaration that have not been assigned values must be set to default values appropriate for the type. In the case of trailing values returned by a function call we need to coerce the values to the required types. All this is done in the ``localvar_adjust_assign()`` function above.
Note that local declarations have a complication that until the declaration is complete the variable does not come in scope. So we have to be careful when we wish to map from a register to the local variable declaration as this mapping is only available after the variable is activated. Couple of helper routines are shown below.
::
/* translate from local register to local variable index
*/
static int register_to_locvar_index(FuncState *fs, int reg) {
int idx;
lua_assert(reg >= 0 && (fs->firstlocal + reg) < fs->ls->dyd->actvar.n);
/* Get the LocVar associated with the register */
idx = fs->ls->dyd->actvar.arr[fs->firstlocal + reg].idx;
lua_assert(idx < fs->nlocvars);
return idx;
}
/* get type of a register - if the register is not allocated
* to an active local variable, then return RAVI_TANY else
* return the type associated with the variable.
* This is a RAVI function
*/
ravitype_t raviY_get_register_typeinfo(FuncState *fs, int reg) {
int idx;
LocVar *v;
if (reg < 0 || reg >= fs->nactvar || (fs->firstlocal + reg) >= fs->ls->dyd->actvar.n)
return RAVI_TANY;
/* Get the LocVar associated with the register */
idx = fs->ls->dyd->actvar.arr[fs->firstlocal + reg].idx;
lua_assert(idx < fs->nlocvars);
v = &fs->f->locvars[idx];
/* Variable in scope so return the type if we know it */
return v->ravi_type;
}
Note the use of ``register_to_localvar_index()`` in functions below.
::
/* Generate instructions for converting types
* This is needed post a function call to handle
* variable number of return values
* n = number of return values to adjust
*/
static void ravi_coercetype(LexState *ls, expdesc *v, int n)
{
if (v->k != VCALL || n <= 0) return;
/* For local variable declarations that call functions e.g.
* local i = func()
* Lua ensures that the function returns values to register
* assigned to variable and above so that no separate
* OP_MOVE instruction is necessary. So that means that
* we need to coerce the return values in situ.
*/
/* Obtain the instruction for OP_CALL */
Instruction *pc = &getcode(ls->fs, v);
lua_assert(GET_OPCODE(*pc) == OP_CALL);
int a = GETARG_A(*pc); /* function return values will be placed
from register pointed by A and upwards */
/* all return values that are going to be assigned
to typed local vars must be converted to the correct type */
int i;
for (i = a + 1; i < a + n; i++) {
/* Since this is called when parsing local statements the
* variable may not yet have a register assigned to it
* so we can't use raviY_get_register_typeinfo()
* here. Instead we need to check the variable definition - so we
* first convert from local register to variable index.
*/
int idx = register_to_locvar_index(ls->fs, i);
/* get variable's type */
ravitype_t ravi_type = ls->fs->f->locvars[idx].ravi_type;
/* do we need to convert ? */
if (ravi_type == RAVI_TNUMFLT || ravi_type == RAVI_TNUMINT)
/* code an instruction to convert in place */
luaK_codeABC(ls->fs, ravi_type == RAVI_TNUMFLT ?
OP_RAVI_TOFLT : OP_RAVI_TOINT, i, 0, 0);
else if (ravi_type == RAVI_TARRAYINT || ravi_type == RAVI_TARRAYFLT)
luaK_codeABC(ls->fs, ravi_type == RAVI_TARRAYINT ?
OP_RAVI_TOARRAYI : OP_RAVI_TOARRAYF, i, 0, 0);
}
}
static void ravi_setzero(FuncState *fs, int from, int n) {
int last = from + n - 1; /* last register to set nil */
int i;
for (i = from; i <= last; i++) {
/* Since this is called when parsing local statements
* the variable may not yet have a register assigned to
* it so we can't use raviY_get_register_typeinfo()
* here. Instead we need to check the variable definition - so we
* first convert from local register to variable index.
*/
int idx = register_to_locvar_index(fs, i);
/* get variable's type */
ravitype_t ravi_type = fs->f->locvars[idx].ravi_type;
/* do we need to convert ? */
if (ravi_type == RAVI_TNUMFLT || ravi_type == RAVI_TNUMINT)
/* code an instruction to convert in place */
luaK_codeABC(fs, ravi_type == RAVI_TNUMFLT ?
OP_RAVI_LOADFZ : OP_RAVI_LOADIZ, i, 0, 0);
}
}
Assignments
-----------
Assignment statements have to be enhanced to perform similar type checks as for local declarations. Fortunately he assignment goes through the function ``luaK_storevar()`` in ``lcode.c``. A modified version of this is shown below.
::
void luaK_storevar (FuncState *fs, expdesc *var, expdesc *ex) {
switch (var->k) {
case VLOCAL: {
check_valid_store(fs, var, ex);
freeexp(fs, ex);
exp2reg(fs, ex, var->u.info);
return;
}
case VUPVAL: {
int e = luaK_exp2anyreg(fs, ex);
luaK_codeABC(fs, OP_SETUPVAL, e, var->u.info, 0);
break;
}
case VINDEXED: {
OpCode op = (var->u.ind.vt == VLOCAL) ?
OP_SETTABLE : OP_SETTABUP;
if (op == OP_SETTABLE) {
/* table value set - if array access then use specialized versions */
if (var->ravi_type == RAVI_TARRAYFLT &&
var->u.ind.key_type == RAVI_TNUMINT)
op = OP_RAVI_SETTABLE_AF;
else if (var->ravi_type == RAVI_TARRAYINT &&
var->u.ind.key_type == RAVI_TNUMINT)
op = OP_RAVI_SETTABLE_AI;
}
int e = luaK_exp2RK(fs, ex);
luaK_codeABC(fs, op, var->u.ind.t, var->u.ind.idx, e);
break;
}
default: {
lua_assert(0); /* invalid var kind to store */
break;
}
}
freeexp(fs, ex);
}
Firstly note the call to ``check_valid_store()`` for a local variable assignment. The ``check_valid_store()`` function validates that the assignment is compatible.
Secondly if the assignment is to an indexed variable, i.e., table, then we need to generate special opcodes for arrays.
MOVE opcodes
------------
Any ``MOVE`` instructions must be modified so that if the target is register that hosts a variable of known type then we need to generate special instructions that do a type conversion during the move. This is handled in ``discharge2reg()`` function which is reproduced below.
::
static void discharge2reg (FuncState *fs, expdesc *e, int reg) {
luaK_dischargevars(fs, e);
switch (e->k) {
case VNIL: {
luaK_nil(fs, reg, 1);
break;
}
case VFALSE: case VTRUE: {
luaK_codeABC(fs, OP_LOADBOOL, reg, e->k == VTRUE, 0);
break;
}
case VK: {
luaK_codek(fs, reg, e->u.info);
break;
}
case VKFLT: {
luaK_codek(fs, reg, luaK_numberK(fs, e->u.nval));
break;
}
case VKINT: {
luaK_codek(fs, reg, luaK_intK(fs, e->u.ival));
break;
}
case VRELOCABLE: {
Instruction *pc = &getcode(fs, e);
SETARG_A(*pc, reg);
DEBUG_EXPR(raviY_printf(fs, "discharge2reg (VRELOCABLE set arg A) %e\n", e));
DEBUG_CODEGEN(raviY_printf(fs, "[%d]* %o ; set A to %d\n", e->u.info, *pc, reg));
break;
}
case VNONRELOC: {
if (reg != e->u.info) {
/* code a MOVEI or MOVEF if the target register is a local typed variable */
int ravi_type = raviY_get_register_typeinfo(fs, reg);
switch (ravi_type) {
case RAVI_TNUMINT:
luaK_codeABC(fs, OP_RAVI_MOVEI, reg, e->u.info, 0);
break;
case RAVI_TNUMFLT:
luaK_codeABC(fs, OP_RAVI_MOVEF, reg, e->u.info, 0);
break;
case RAVI_TARRAYINT:
luaK_codeABC(fs, OP_RAVI_MOVEAI, reg, e->u.info, 0);
break;
case RAVI_TARRAYFLT:
luaK_codeABC(fs, OP_RAVI_MOVEAF, reg, e->u.info, 0);
break;
default:
luaK_codeABC(fs, OP_MOVE, reg, e->u.info, 0);
break;
}
}
break;
}
default: {
lua_assert(e->k == VVOID || e->k == VJMP);
return; /* nothing to do... */
}
}
e->u.info = reg;
e->k = VNONRELOC;
}
Note the handling of ``VNONRELOC`` case.
Expression Parsing
------------------
The expression evaluation process must be modified so that type information is retained and flows through as the parser evaluates the expression. This involves ensuring that the type information is passed through as the parser modifies, reuses, creates new ``expdesc`` objects. Essentially this means keeping the ``ravi_type`` correct.
Additionally when arithmetic operations take place two things need to happen: a) specialized opcodes need to be emitted and b) the type of the resulting expression needs to be set.
::
static void codeexpval (FuncState *fs, OpCode op,
expdesc *e1, expdesc *e2, int line) {
lua_assert(op >= OP_ADD);
if (op <= OP_BNOT && constfolding(fs, getarithop(op), e1, e2))
return; /* result has been folded */
else {
int o1, o2;
int isbinary = 1;
/* move operands to registers (if needed) */
if (op == OP_UNM || op == OP_BNOT || op == OP_LEN) { /* unary op? */
o2 = 0; /* no second expression */
o1 = luaK_exp2anyreg(fs, e1); /* cannot operate on constants */
isbinary = 0;
}
else { /* regular case (binary operators) */
o2 = luaK_exp2RK(fs, e2); /* both operands are "RK" */
o1 = luaK_exp2RK(fs, e1);
}
if (o1 > o2) { /* free registers in proper order */
freeexp(fs, e1);
freeexp(fs, e2);
}
else {
freeexp(fs, e2);
freeexp(fs, e1);
}
#if RAVI_ENABLED
if (op == OP_ADD &&
(e1->ravi_type == RAVI_TNUMFLT || e1->ravi_type == RAVI_TNUMINT) &&
(e2->ravi_type == RAVI_TNUMFLT || e2->ravi_type == RAVI_TNUMINT))
generate_binarithop(fs, e1, e2, o1, o2, 0);
else if (op == OP_MUL &&
(e1->ravi_type == RAVI_TNUMFLT || e1->ravi_type == RAVI_TNUMINT) &&
(e2->ravi_type == RAVI_TNUMFLT || e2->ravi_type == RAVI_TNUMINT))
generate_binarithop(fs, e1, e2, o1, o2, OP_RAVI_MULFF - OP_RAVI_ADDFF);
/* todo optimize the SUB opcodes when constant is small */
else if (op == OP_SUB &&
e1->ravi_type == RAVI_TNUMFLT &&
e2->ravi_type == RAVI_TNUMFLT) {
e1->u.info = luaK_codeABC(fs, OP_RAVI_SUBFF, 0, o1, o2);
}
else if (op == OP_SUB &&
e1->ravi_type == RAVI_TNUMFLT &&
e2->ravi_type == RAVI_TNUMINT) {
e1->u.info = luaK_codeABC(fs, OP_RAVI_SUBFI, 0, o1, o2);
}
/* code omitted here .... */
else {
#endif
e1->u.info = luaK_codeABC(fs, op, 0, o1, o2); /* generate opcode */
#if RAVI_ENABLED
}
#endif
e1->k = VRELOCABLE; /* all those operations are relocable */
if (isbinary) {
if ((op == OP_ADD || op == OP_SUB || op == OP_MUL || op == OP_DIV)
&& e1->ravi_type == RAVI_TNUMFLT && e2->ravi_type == RAVI_TNUMFLT)
e1->ravi_type = RAVI_TNUMFLT;
else if ((op == OP_ADD || op == OP_SUB || op == OP_MUL || op == OP_DIV)
&& e1->ravi_type == RAVI_TNUMFLT && e2->ravi_type == RAVI_TNUMINT)
e1->ravi_type = RAVI_TNUMFLT;
else if ((op == OP_ADD || op == OP_SUB || op == OP_MUL || op == OP_DIV)
&& e1->ravi_type == RAVI_TNUMINT && e2->ravi_type == RAVI_TNUMFLT)
e1->ravi_type = RAVI_TNUMFLT;
else if ((op == OP_ADD || op == OP_SUB || op == OP_MUL)
&& e1->ravi_type == RAVI_TNUMINT && e2->ravi_type == RAVI_TNUMINT)
e1->ravi_type = RAVI_TNUMINT;
else if ((op == OP_DIV)
&& e1->ravi_type == RAVI_TNUMINT && e2->ravi_type == RAVI_TNUMINT)
e1->ravi_type = RAVI_TNUMFLT;
else
e1->ravi_type = RAVI_TANY;
}
luaK_fixline(fs, line);
}
}
When expression reference indexed variables, i.e., tables, we need to emit specialized opcodes if the table is an array. This is done in ``luaK_dischargevars()``.
::
void luaK_dischargevars (FuncState *fs, expdesc *e) {
switch (e->k) {
case VLOCAL: {
e->k = VNONRELOC;
DEBUG_EXPR(raviY_printf(fs, "luaK_dischargevars (VLOCAL->VNONRELOC) %e\n", e));
break;
}
case VUPVAL: {
e->u.info = luaK_codeABC(fs, OP_GETUPVAL, 0, e->u.info, 0);
e->k = VRELOCABLE;
DEBUG_EXPR(raviY_printf(fs, "luaK_dischargevars (VUPVAL->VRELOCABLE) %e\n", e));
break;
}
case VINDEXED: {
OpCode op = OP_GETTABUP; /* assume 't' is in an upvalue */
freereg(fs, e->u.ind.idx);
if (e->u.ind.vt == VLOCAL) { /* 't' is in a register? */
freereg(fs, e->u.ind.t);
/* table access - set specialized op codes if array types are detected */
if (e->ravi_type == RAVI_TARRAYFLT &&
e->u.ind.key_type == RAVI_TNUMINT)
op = OP_RAVI_GETTABLE_AF;
else if (e->ravi_type == RAVI_TARRAYINT &&
e->u.ind.key_type == RAVI_TNUMINT)
op = OP_RAVI_GETTABLE_AI;
else
op = OP_GETTABLE;
if (e->ravi_type == RAVI_TARRAYFLT || e->ravi_type == RAVI_TARRAYINT)
/* set the type of resulting expression */
e->ravi_type = e->ravi_type == RAVI_TARRAYFLT ?
RAVI_TNUMFLT : RAVI_TNUMINT;
}
e->u.info = luaK_codeABC(fs, op, 0, e->u.ind.t, e->u.ind.idx);
e->k = VRELOCABLE;
DEBUG_EXPR(raviY_printf(fs, "luaK_dischargevars (VINDEXED->VRELOCABLE) %e\n", e));
break;
}
case VVARARG:
case VCALL: {
luaK_setoneret(fs, e);
break;
}
default: break; /* there is one value available (somewhere) */
}
}
fornum statements
-----------------
The Lua fornum statements create special variables. In order to allows the loop variable to be used in expressions within the loop body we need to set the types of these variables. This is handled in ``fornum()`` as shown below.
::
/* parse a numerical for loop, calls forbody()
* called from forstat()
*/
static void fornum (LexState *ls, TString *varname, int line) {
/* fornum -> NAME = exp1,exp1[,exp1] forbody */
FuncState *fs = ls->fs;
int base = fs->freereg;
LocVar *vidx, *vlimit, *vstep, *vvar;
new_localvarliteral(ls, "(for index)");
new_localvarliteral(ls, "(for limit)");
new_localvarliteral(ls, "(for step)");
new_localvar(ls, varname, RAVI_TANY);
/* The fornum sets up its own variables as above.
These are expected to hold numeric values - but from Ravi's
point of view we need to know if the variable is an integer or
double. So we need to check if this can be determined from the
fornum expressions. If we can then we will set the
fornum variables to the type we discover.
*/
/* index variable - not yet active so get it from locvars*/
vidx = &fs->f->locvars[fs->nlocvars - 4];
/* index variable - not yet active so get it from locvars*/
vlimit = &fs->f->locvars[fs->nlocvars - 3];
/* index variable - not yet active so get it from locvars*/
vstep = &fs->f->locvars[fs->nlocvars - 2];
/* index variable - not yet active so get it from locvars*/
vvar = &fs->f->locvars[fs->nlocvars - 1];
checknext(ls, '=');
/* get the type of each expression */
ravitype_t tidx = RAVI_TANY,
tlimit = RAVI_TANY,
tstep = RAVI_TNUMINT;
exp1(ls, &tidx); /* initial value */
checknext(ls, ',');
exp1(ls, &tlimit); /* limit */
if (testnext(ls, ','))
exp1(ls, &tstep); /* optional step */
else { /* default step = 1 */
luaK_codek(fs, fs->freereg, luaK_intK(fs, 1));
luaK_reserveregs(fs, 1);
}
if (tidx == tlimit && tlimit == tstep
&& (tidx == RAVI_TNUMFLT || tidx == RAVI_TNUMINT)) {
/* Ok so we have an integer or double */
vidx->ravi_type = vlimit->ravi_type
= vstep->ravi_type
= vvar->ravi_type = tidx;
DEBUG_VARS(raviY_printf(fs, "fornum -> setting type for index %v\n", vidx));
DEBUG_VARS(raviY_printf(fs, "fornum -> setting type for limit %v\n", vlimit));
DEBUG_VARS(raviY_printf(fs, "fornum -> setting type for step %v\n", vstep));
DEBUG_VARS(raviY_printf(fs, "fornum -> setting type for variable %v\n", vvar));
}
forbody(ls, base, line, 1, 1);
}
VM Enhancements
===============
A number of new opcodes are introduced to allow type specific operations.
Currently there are specialized versions of ``ADD``, ``SUB``, ``MUL`` and ``DIV`` operations. This will be extended to cover additional operators such as ``IDIV``.
The ``ADD`` and ``MUL`` operations are implemented in a similar way. Both allow a second operand to be encoded directly in the ``C`` operand - when the value is a constant in the range [0,127].
One thing to note is that apart from division if an operation involves constants it is folded by Lua. Divisions are treated specially - an expression involving the ``0`` constant is not folded, even when the ``0`` is a numerator. Also worth noting is that DIV operator results in a float even when two integers are divided; you have to use ``IDIV`` to get an integer result - this opcode triggered in Lua 5.3 when the ``//`` operator is used.
A divide by zero when using integers causes a run time error, whereas for floating point operation the result is NaN.

@ -0,0 +1,216 @@
Ravi Programming Language
=========================
Experimental derivative/dialect of Lua. Ravi is a Sanskrit word that means the Sun.
Lua is perfect as a small embeddable dynamic language. So why a derivative? The reason is primarily to extend Lua with static typing for greater performance. However, at the same time maintain full compatibility with standard Lua.
There are other attempts to add static typing to Lua (e.g. `Typed Lua <https://github.com/andremm/typedlua>`_ but these efforts are mostly about adding static type checks in the language while leaving the VM unmodified. So the static typing is to aid programming in the large - the code is eventually translated to standard Lua and executed in the unmodified Lua VM.
My motivation is somewhat different - I want to enhance the VM to support more efficient operations when types are known.
Goals
-----
* Optional static typing for Lua
* No new types
* Type specific bytecodes to improve performance
* Full backward compatibility with Lua 5.3
* LLVM based JIT compiler
Status
------
The project was kicked off in January 2015. My intention is start small and grow incrementally.
Right now (as of Feb 2015) I am working on the JIT implementation. Please see `JIT Compilation for Ravi <https://github.com/dibyendumajumdar/ravi/wiki/RaviJITCompilation>`_ for details of this effort.
As of end Jan 2015, the Ravi interpreter allows you to declare local variables as ``int`` or ``double``. This triggers following behaviour:
* ``int`` and ``double`` variables are initialized to 0
* arithmetic operations trigger type specific bytecodes
* values assigned to these variables are checked - statically unless the values are results from a function call in which case the there is an attempt to convert values at runtime.
Also initial implementation of arrays is available. So you can declare arrays of integers or doubles.
* The type of an array of integers is denoted as ``int[]``.
* The type of an array of doubles is denoted as ``double[]``.
* Arrays are implmented using a mix of runtime and compile time checks.
* Operators to get/set from arrays not yet implemented so we won't yet full benefit.
Obviously this is early days so please expect bugs.
Example of code that works - you can copy this to the command line input::
local function tryme(); local i,j = 5,6; return i,j; end; local i:int, j:int = tryme(); return i+j
Another::
local j:double; for i=1,1000000000 do; j = j+1; end; return j
An example with arrays::
local a : double[], j:double = {}; for i=1,10 do; a[i] = i; j = j + a[i]; end; return j
The build is CMake based. I am testing this using Visual Studio 2013 on Windows 8.1 64bit and gcc on Unbuntu 64-bit.
To build on Windows I use::
cd build
cmake -G "Visual Studio 12 Win64" ..
I then open the solution in VS2013 and do a build from there.
On Ubuntu I use::
cd build
cmake -G "Unix Makefiles" ..
make
The ``lua`` command recognizes following environment variables.
* ``RAVI_DEBUG_EXPR`` - if set to a value this triggers debug output of expression parsing
* ``RAVI_DEBUG_CODEGEN`` - if set to a value this triggers a dump of the code being generated
* ``RAVI_DEBUG_VARS`` - if set this triggers a dump of local variables construction and destruction
Work Plan
---------
* Feb 2015 - implement type specialisation for arrays
* Mar 2015 - implement function parameter / return type specialisation
License
-------
Same as Lua.
Language Syntax
---------------
I hope to enhance the language to variables to be optionally decorated with types. As the reason for doing so is performance primarily - not all types benefit from this capability. In fact it is quite hard to extend this to generic recursive structures such as tables without encurring significant overhead. For instance - even to represent a recursive type in the parser will require dynamic memory allocation and add great overhead to the parser.
So as of now the only types that seem worth specializing are:
* int (64-bit)
* double
* array of ints
* array of doubles
Everything else will just be dynamic type as in Lua. However we can recognise following types to make the language more user friendly:
* string
* table
* function
* nil
* boolean
And we may end up allowing additionally following types depending on whether they help our goals:
* array of booleans
* array of strings
* array of functions
The syntax for introducing the type will probably be as below::
function foo(s: string) : string
return s
end
Local variables may be given types as shown below::
function foo() : string
local s: string = "hello world!"
return s
end
If no type is specified then then type will be dynamic - exactly what the Lua default is.
When a typed function is called the inputs and return value can be validated. Consider the function below::
local function foo(a, b: int, c: string)
return
end
When this function is called the compiler can validate that ``b`` is an int and ``c`` is a string. ``a`` on the other hand is dynamic so will behave as regular Lua value. The compiler can also ensure that the types of ``b`` and ``c`` are respected within the function.
Return statements in typed functions can also be validated.
Array Types
-----------
When it comes to complex types such as arrays, tables and functions, at this point in time, I think that Ravi only needs to support explicit specialization for arrays of integers and doubles::
function foo(p1: {}, p2: int[])
-- p1 is a table
-- p2 is an array of integers
local t1 = {} -- t1 is a table
local a1 : int[] = {} -- a1 is an array of integers, specialization of table
local d1 : double[] = {} -- d1 is an array of doubles, specialization of table
end
To support array types we need a mix of runtime and compile time type checking. The Lua table type will be enhanced to hold type information so that when an array type is created the type of the array will be recorded. This will allow the runtime to detect incorrect usage of array type and raise errors if necessary. However, on the other hand, it will be possible to pass the array type to an existing Lua function as a regular table - and as long as the Lua function does not attempt to subvert the array type it should work as normal.
The array types will have some special behaviour:
* indices must be >= 1
* array will grow automatically if user sets the element just past the array length
* it will be an error to attempt to set an element that is beyond len+1
* the current used length of the array will be recorded and returned by len operations
* the array will only permit the right type of value to be assigned (this will be checked at runtime to allow full compatibility with Lua)
* accessing out of bounds elements will cause an error, except for setting the len+1 element
* it will be possible to pass arrays to functions and return arrays from functions - the array types will be checked at runtime
* it should be possible to store an array type in a table - however any operations on array type can only be optimised to special bytecode if the array type is a local variable. Otherwise regular table access will be used subject to runtime checks.
* array types may not have meta methods - this will be enforced at runtime
* array elements will be set to 0 not nil as default value
All type checks are at runtime
------------------------------
To keep with Lua's dynamic nature I plan a mix of compile type checking and runtime type checks. However due to the dynamic nature of Lua, compilation happens at runtime anyway so effectually all checks are at runtime.
Implementation Strategy
-----------------------
I do not want to introduce any new types to the Lua system as the types I need already exist and I quite like the minimalist nature of Lua. However, to make the execution efficient I want to approach this by adding new type specific opcodes, and by enhancing the Lua parser/code generator to encode these opcodes only when types are known. The new opcodes will execute more efficiently as they will not need to perform type checks. In reality the performance gain may be offset by the increase in the instruction decoding / branching - so it remains to be seen whether this approach is beneficial. However, I am hoping that type specific instructions will lend themselves to more efficient JIT at a later stage.
My plan is to add new opcodes that cover arithmetic operations, array operations, variable assignments, etc..
I will probably need to augment some existing types such as functions and tables to add the type signature.
I intend to first add the opcodes to the VM before starting work on the parser and code generator.
Modifications to Lua Bytecode structure
---------------------------------------
An immediate issue is that the Lua bytecode structure has a 6-bit opcode which is insufficient to hold the various opcodes that I will need. Simply extending the size of this is problematic as then it reduces the space available to the operands A B and C. Furthermore the way Lua bytecodes work means that B and C operands must be 1-bit larger than A - as the extra bit is used to flag whether the operand refers to a constant or a register. (Thanks to Dirk Laurie for pointing this out).
If I change the sizes of the components it will make the new bytecode incompatible with Lua. Although this doesn't matter so much as long as source level compatibility is retained - I would like a solution that allows me to maintain full compatibility at bytecode level. An obvious solution is to allow extended 64-bit instructions - while retaining the existing 32-bit instructions.
For now however I am just amending the bit mapping in the 32-bit instruction to allow 9-bits for the byte-code, 7-bits for operand A, and 8-bits for operands B and C. This means that some of the Lua limits (maximum number of variables in a function, etc.) have to be revised to be lower than the default.
New OpCodes
-----------
The new instructions are specialised for types, and also for register/versus constant. So for example ``OP_RAVI_ADDFI`` means add ``float`` and ``int``. And ``OP_RAVI_ADDFF`` means add ``float`` and ``float``. The existing Lua opcodes that these are based on define which operands are used.
Example
-------
::
local i=0; i=i+1
Above standard Lua code compiles to::
[0] LOADK A=0 Bx=-1
[1] ADD A=0 B=0 C=-2
[2] RETURN A=0 B=1
We add type info using Ravi extensions::
local i:int=0; i=i+1
Now the code compiles to::
[0] LOADK A=0 Bx=-1
[1] ADDII A=0 B=0 C=-2
[2] RETURN A=0 B=1
Above uses type specialised opcode ``OP_RAVI_ADDII``.
Documentation
-------------
As I progress I will add documentation in the Wiki.
* `Ravi Internals <https://github.com/dibyendumajumdar/ravi/wiki/RaviInternals>`_
* `JIT Compilation for Ravi <https://github.com/dibyendumajumdar/ravi/wiki/RaviJITCompilation>`_
* `Lua Internals <https://github.com/dibyendumajumdar/ravi/wiki/Lua-Internals>`_
* `Change Log <https://github.com/dibyendumajumdar/ravi/wiki/Changes>`_
Loading…
Cancel
Save