diff --git a/CMakeLists.txt b/CMakeLists.txt index f0ac63d..12de929 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,7 +166,7 @@ set(LUA_CORE_SRCS src/lapi.c src/lcode.c src/lctype.c src/ldebug.c src/ldo.c src src/lfunc.c src/lgc.c src/llex.c src/lmem.c src/lobject.c src/lopcodes.c src/lparser.c src/lstate.c src/lstring.c src/ltable.c src/ltm.c src/lundump.c src/lvm.c src/lzio.c src/ravijit.cpp src/ltests.c src/ravi_profile.c src/ravi_membuf.c - src/ravi_jitshared.c) + src/ravi_jitshared.c src/bit.c) # define the lua lib source files set(LUA_LIB_SRCS src/lauxlib.c src/lbaselib.c src/lbitlib.c src/lcorolib.c src/ldblib.c src/liolib.c src/lmathlib.c src/loslib.c src/ltablib.c src/lstrlib.c src/loadlib.c src/linit.c src/lutf8lib.c) diff --git a/include/lualib.h b/include/lualib.h index 15c26dd..ece08b2 100644 --- a/include/lualib.h +++ b/include/lualib.h @@ -38,6 +38,9 @@ LUAMOD_API int (luaopen_utf8) (lua_State *L); #define LUA_BITLIBNAME "bit32" LUAMOD_API int (luaopen_bit32) (lua_State *L); +#define LUAJIT_BITLIBNAME "bit" +LUAMOD_API int (luaopen_bit)(lua_State *L); + #define LUA_MATHLIBNAME "math" LUAMOD_API int (luaopen_math) (lua_State *L); diff --git a/ravi-tests/bitbench.lua b/ravi-tests/bitbench.lua new file mode 100644 index 0000000..247c362 --- /dev/null +++ b/ravi-tests/bitbench.lua @@ -0,0 +1,69 @@ +-- Microbenchmark for bit operations library. Public domain. + +local bit = require"bit" + +if not bit.rol then -- Replacement function if rotates are missing. + local bor, shl, shr = bit.bor, bit.lshift, bit.rshift + function bit.rol(a, b) return bor(shl(a, b), shr(a, 32-b)) end +end + +if not bit.bswap then -- Replacement function if bswap is missing. + local bor, band, shl, shr = bit.bor, bit.band, bit.lshift, bit.rshift + function bit.bswap(a) + return bor(shr(a, 24), band(shr(a, 8), 0xff00), + shl(band(a, 0xff00), 8), shl(a, 24)); + end +end + +local base = 0 + +local function bench(name, t) + local n = 2000000 + repeat + local tm = os.clock() + t(n) + tm = os.clock() - tm + if tm > 1 then + local ns = tm*1000/(n/1000000) + io.write(string.format("%-15s %6.1f ns\n", name, ns-base)) + return ns + end + n = n + n + until false +end + +-- The overhead for the base loop is subtracted from the other measurements. +base = bench("loop baseline", function(n) + local x = 0; for i=1,n do x = x + i end +end) + +bench("tobit", function(n) + local f = bit.tobit or bit.cast + local x = 0; for i=1,n do x = x + f(i) end +end) + +bench("bnot", function(n) + local f = bit.bnot + local x = 0; for i=1,n do x = x + f(i) end +end) + +bench("bor/band/bxor", function(n) + local f = bit.bor + local x = 0; for i=1,n do x = x + f(i, 1) end +end) + +bench("shifts", function(n) + local f = bit.lshift + local x = 0; for i=1,n do x = x + f(i, 1) end +end) + +bench("rotates", function(n) + local f = bit.rol + local x = 0; for i=1,n do x = x + f(i, 1) end +end) + +bench("bswap", function(n) + local f = bit.bswap + local x = 0; for i=1,n do x = x + f(i) end +end) + diff --git a/ravi-tests/bittest.lua b/ravi-tests/bittest.lua new file mode 100644 index 0000000..a642c2f --- /dev/null +++ b/ravi-tests/bittest.lua @@ -0,0 +1,85 @@ +-- Test cases for bit operations library. Public domain. + +local bit = require"bit" + +local vb = { + 0, 1, -1, 2, -2, 0x12345678, 0x87654321, + 0x33333333, 0x77777777, 0x55aa55aa, 0xaa55aa55, + 0x7fffffff, 0x80000000, 0xffffffff +} + +local function cksum(name, s, r) + local z = 0 + for i=1,#s do z = (z + string.byte(s, i)*i) % 2147483629 end + if z ~= r then + error("bit."..name.." test failed (got "..z..", expected "..r..")", 0) + end +end + +local function check_unop(name, r) + local f = bit[name] + local s = "" + if pcall(f) or pcall(f, "z") or pcall(f, true) then + error("bit."..name.." fails to detect argument errors", 0) + end + for _,x in ipairs(vb) do s = s..","..tostring(f(x)) end + cksum(name, s, r) +end + +local function check_binop(name, r) + local f = bit[name] + local s = "" + if pcall(f) or pcall(f, "z") or pcall(f, true) then + error("bit."..name.." fails to detect argument errors", 0) + end + for _,x in ipairs(vb) do + for _,y in ipairs(vb) do s = s..","..tostring(f(x, y)) end + end + cksum(name, s, r) +end + +local function check_binop_range(name, r, yb, ye) + local f = bit[name] + local s = "" + if pcall(f) or pcall(f, "z") or pcall(f, true) or pcall(f, 1, true) then + error("bit."..name.." fails to detect argument errors", 0) + end + for _,x in ipairs(vb) do + for y=yb,ye do s = s..","..tostring(f(x, y)) end + end + cksum(name, s, r) +end + +local function check_shift(name, r) + check_binop_range(name, r, 0, 31) +end + +-- Minimal sanity checks. +assert(0x7fffffff == 2147483647, "broken hex literals") +assert(0xffffffff == -1 or 0xffffffff == 2^32-1, "broken hex literals") +assert(tostring(-1) == "-1", "broken tostring()") +assert(tostring(0xffffffff) == "-1" or tostring(0xffffffff) == "4294967295", "broken tostring()") + +-- Basic argument processing. +assert(bit.tobit(1) == 1) +assert(bit.band(1) == 1) +assert(bit.bxor(1,2) == 3) +assert(bit.bor(1,2,4,8,16,32,64,128) == 255) + +-- Apply operations to test vectors and compare checksums. +check_unop("tobit", 277312) +check_unop("bnot", 287870) +check_unop("bswap", 307611) + +check_binop("band", 41206764) +check_binop("bor", 51253663) +check_binop("bxor", 79322427) + +check_shift("lshift", 325260344) +check_shift("rshift", 139061800) +check_shift("arshift", 111364720) +check_shift("rol", 302401155) +check_shift("ror", 302316761) + +check_binop_range("tohex", 47880306, -8, 8) + diff --git a/ravi-tests/md5test.lua b/ravi-tests/md5test.lua new file mode 100644 index 0000000..3884f40 --- /dev/null +++ b/ravi-tests/md5test.lua @@ -0,0 +1,196 @@ +-- MD5 test and benchmark. Public domain. + +local bit = require("bit") +local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot +local bor, band, bxor = bit.bor, bit.band, bit.bxor +local lshift, rshift, rol, bswap = bit.lshift, bit.rshift, bit.rol, bit.bswap +local byte, char, sub, rep = string.byte, string.char, string.sub, string.rep + +if not rol then -- Replacement function if rotates are missing. + local bor, shl, shr = bit.bor, bit.lshift, bit.rshift + function rol(a, b) return bor(shl(a, b), shr(a, 32-b)) end +end + +if not bswap then -- Replacement function if bswap is missing. + local bor, band, shl, shr = bit.bor, bit.band, bit.lshift, bit.rshift + function bswap(a) + return bor(shr(a, 24), band(shr(a, 8), 0xff00), + shl(band(a, 0xff00), 8), shl(a, 24)); + end +end + +if not tohex then -- (Unreliable) replacement function if tohex is missing. + function tohex(a) + return string.sub(string.format("%08x", a), -8) + end +end + +local function tr_f(a, b, c, d, x, s) + return rol(bxor(d, band(b, bxor(c, d))) + a + x, s) + b +end + +local function tr_g(a, b, c, d, x, s) + return rol(bxor(c, band(d, bxor(b, c))) + a + x, s) + b +end + +local function tr_h(a, b, c, d, x, s) + return rol(bxor(b, c, d) + a + x, s) + b +end + +local function tr_i(a, b, c, d, x, s) + return rol(bxor(c, bor(b, bnot(d))) + a + x, s) + b +end + +local function transform(x, a1, b1, c1, d1) + local a, b, c, d = a1, b1, c1, d1 + + a = tr_f(a, b, c, d, x[ 1] + 0xd76aa478, 7) + d = tr_f(d, a, b, c, x[ 2] + 0xe8c7b756, 12) + c = tr_f(c, d, a, b, x[ 3] + 0x242070db, 17) + b = tr_f(b, c, d, a, x[ 4] + 0xc1bdceee, 22) + a = tr_f(a, b, c, d, x[ 5] + 0xf57c0faf, 7) + d = tr_f(d, a, b, c, x[ 6] + 0x4787c62a, 12) + c = tr_f(c, d, a, b, x[ 7] + 0xa8304613, 17) + b = tr_f(b, c, d, a, x[ 8] + 0xfd469501, 22) + a = tr_f(a, b, c, d, x[ 9] + 0x698098d8, 7) + d = tr_f(d, a, b, c, x[10] + 0x8b44f7af, 12) + c = tr_f(c, d, a, b, x[11] + 0xffff5bb1, 17) + b = tr_f(b, c, d, a, x[12] + 0x895cd7be, 22) + a = tr_f(a, b, c, d, x[13] + 0x6b901122, 7) + d = tr_f(d, a, b, c, x[14] + 0xfd987193, 12) + c = tr_f(c, d, a, b, x[15] + 0xa679438e, 17) + b = tr_f(b, c, d, a, x[16] + 0x49b40821, 22) + + a = tr_g(a, b, c, d, x[ 2] + 0xf61e2562, 5) + d = tr_g(d, a, b, c, x[ 7] + 0xc040b340, 9) + c = tr_g(c, d, a, b, x[12] + 0x265e5a51, 14) + b = tr_g(b, c, d, a, x[ 1] + 0xe9b6c7aa, 20) + a = tr_g(a, b, c, d, x[ 6] + 0xd62f105d, 5) + d = tr_g(d, a, b, c, x[11] + 0x02441453, 9) + c = tr_g(c, d, a, b, x[16] + 0xd8a1e681, 14) + b = tr_g(b, c, d, a, x[ 5] + 0xe7d3fbc8, 20) + a = tr_g(a, b, c, d, x[10] + 0x21e1cde6, 5) + d = tr_g(d, a, b, c, x[15] + 0xc33707d6, 9) + c = tr_g(c, d, a, b, x[ 4] + 0xf4d50d87, 14) + b = tr_g(b, c, d, a, x[ 9] + 0x455a14ed, 20) + a = tr_g(a, b, c, d, x[14] + 0xa9e3e905, 5) + d = tr_g(d, a, b, c, x[ 3] + 0xfcefa3f8, 9) + c = tr_g(c, d, a, b, x[ 8] + 0x676f02d9, 14) + b = tr_g(b, c, d, a, x[13] + 0x8d2a4c8a, 20) + + a = tr_h(a, b, c, d, x[ 6] + 0xfffa3942, 4) + d = tr_h(d, a, b, c, x[ 9] + 0x8771f681, 11) + c = tr_h(c, d, a, b, x[12] + 0x6d9d6122, 16) + b = tr_h(b, c, d, a, x[15] + 0xfde5380c, 23) + a = tr_h(a, b, c, d, x[ 2] + 0xa4beea44, 4) + d = tr_h(d, a, b, c, x[ 5] + 0x4bdecfa9, 11) + c = tr_h(c, d, a, b, x[ 8] + 0xf6bb4b60, 16) + b = tr_h(b, c, d, a, x[11] + 0xbebfbc70, 23) + a = tr_h(a, b, c, d, x[14] + 0x289b7ec6, 4) + d = tr_h(d, a, b, c, x[ 1] + 0xeaa127fa, 11) + c = tr_h(c, d, a, b, x[ 4] + 0xd4ef3085, 16) + b = tr_h(b, c, d, a, x[ 7] + 0x04881d05, 23) + a = tr_h(a, b, c, d, x[10] + 0xd9d4d039, 4) + d = tr_h(d, a, b, c, x[13] + 0xe6db99e5, 11) + c = tr_h(c, d, a, b, x[16] + 0x1fa27cf8, 16) + b = tr_h(b, c, d, a, x[ 3] + 0xc4ac5665, 23) + + a = tr_i(a, b, c, d, x[ 1] + 0xf4292244, 6) + d = tr_i(d, a, b, c, x[ 8] + 0x432aff97, 10) + c = tr_i(c, d, a, b, x[15] + 0xab9423a7, 15) + b = tr_i(b, c, d, a, x[ 6] + 0xfc93a039, 21) + a = tr_i(a, b, c, d, x[13] + 0x655b59c3, 6) + d = tr_i(d, a, b, c, x[ 4] + 0x8f0ccc92, 10) + c = tr_i(c, d, a, b, x[11] + 0xffeff47d, 15) + b = tr_i(b, c, d, a, x[ 2] + 0x85845dd1, 21) + a = tr_i(a, b, c, d, x[ 9] + 0x6fa87e4f, 6) + d = tr_i(d, a, b, c, x[16] + 0xfe2ce6e0, 10) + c = tr_i(c, d, a, b, x[ 7] + 0xa3014314, 15) + b = tr_i(b, c, d, a, x[14] + 0x4e0811a1, 21) + a = tr_i(a, b, c, d, x[ 5] + 0xf7537e82, 6) + d = tr_i(d, a, b, c, x[12] + 0xbd3af235, 10) + c = tr_i(c, d, a, b, x[ 3] + 0x2ad7d2bb, 15) + b = tr_i(b, c, d, a, x[10] + 0xeb86d391, 21) + + return tobit(a+a1), tobit(b+b1), tobit(c+c1), tobit(d+d1) +end + +-- Note: this is copying the original string and NOT particularly fast. +-- A library for struct unpacking would make this task much easier. +local function md5(msg) + local len = #msg + msg = msg.."\128"..rep("\0", 63 - band(len + 8, 63)) + ..char(band(lshift(len, 3), 255), band(rshift(len, 5), 255), + band(rshift(len, 13), 255), band(rshift(len, 21), 255)) + .."\0\0\0\0" + local a, b, c, d = 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 + local x, k = {}, 1 + for i=1,#msg,4 do + local m0, m1, m2, m3 = byte(msg, i, i+3) + x[k] = bor(m0, lshift(m1, 8), lshift(m2, 16), lshift(m3, 24)) + if k == 16 then + a, b, c, d = transform(x, a, b, c, d) + k = 1 + else + k = k + 1 + end + end + return tohex(bswap(a))..tohex(bswap(b))..tohex(bswap(c))..tohex(bswap(d)) +end + +assert(md5('') == 'd41d8cd98f00b204e9800998ecf8427e') +assert(md5('a') == '0cc175b9c0f1b6a831c399e269772661') +assert(md5('abc') == '900150983cd24fb0d6963f7d28e17f72') +assert(md5('message digest') == 'f96b697d7cb7938d525a2f31aaf161d0') +assert(md5('abcdefghijklmnopqrstuvwxyz') == 'c3fcd3d76192e4007dfb496cca67e13b') +assert(md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') == + 'd174ab98d277d9f5a5611c2c9f419d9f') +assert(md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') == + '57edf4a22be3c955ac49da2e2107b67a') + +if arg and arg[1] == "bench" then + -- Credits: William Shakespeare, Romeo and Juliet + local txt = [[Rebellious subjects, enemies to peace, +Profaners of this neighbour-stained steel,-- +Will they not hear? What, ho! you men, you beasts, +That quench the fire of your pernicious rage +With purple fountains issuing from your veins, +On pain of torture, from those bloody hands +Throw your mistemper'd weapons to the ground, +And hear the sentence of your moved prince. +Three civil brawls, bred of an airy word, +By thee, old Capulet, and Montague, +Have thrice disturb'd the quiet of our streets, +And made Verona's ancient citizens +Cast by their grave beseeming ornaments, +To wield old partisans, in hands as old, +Canker'd with peace, to part your canker'd hate: +If ever you disturb our streets again, +Your lives shall pay the forfeit of the peace. +For this time, all the rest depart away: +You Capulet; shall go along with me: +And, Montague, come you this afternoon, +To know our further pleasure in this case, +To old Free-town, our common judgment-place. +Once more, on pain of death, all men depart.]] + txt = txt..txt..txt..txt + txt = txt..txt..txt..txt + + local function bench() + local n = 80 + repeat + local tm = os.clock() + local res + for i=1,n do + res = md5(txt) + end + assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd') + tm = os.clock() - tm + if tm > 1 then return tm*(1000000000/(n*#txt)) end + n = n + n + until false + end + + io.write(string.format("MD5 %7.1f ns/char\n", bench())) +end + diff --git a/ravi-tests/nsievebits.lua b/ravi-tests/nsievebits.lua new file mode 100644 index 0000000..b99d21e --- /dev/null +++ b/ravi-tests/nsievebits.lua @@ -0,0 +1,32 @@ +-- This is the (naive) Sieve of Eratosthenes. Public domain. + +local bit = require("bit") +local band, bxor, rshift, rol = bit.band, bit.bxor, bit.rshift, bit.rol + +local function nsieve(p, m) + local count = 0 + for i=0,rshift(m, 5) do p[i] = -1 end + for i=2,m do + if band(rshift(p[rshift(i, 5)], i), 1) ~= 0 then + count = count + 1 + for j=i+i,m,i do + local jx = rshift(j, 5) + p[jx] = band(p[jx], rol(-2, j)) + end + end + end + return count +end + +if arg and arg[1] then + local N = tonumber(arg[1]) or 1 + if N < 2 then N = 2 end + local primes = {} + + for i=0,2 do + local m = (2^(N-i))*10000 + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) + end +else + assert(nsieve({}, 10000) == 1229) +end diff --git a/src/bit.c b/src/bit.c new file mode 100644 index 0000000..c185f53 --- /dev/null +++ b/src/bit.c @@ -0,0 +1,130 @@ +/* +** Lua BitOp -- a bit operations library for Lua 5.1/5.2. +** http://bitop.luajit.org/ +** +** Copyright (C) 2008-2012 Mike Pall. All rights reserved. +** +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +*/ + +#define LUA_BITOP_VERSION "1.0.2" + +#define LUA_LIB +#include "lua.h" +#include "lauxlib.h" + +#include + +typedef int32_t SBits; +typedef uint32_t UBits; + +/* Convert argument to bit type. */ +static inline UBits barg(lua_State *L, int idx) +{ + return (UBits)luaL_checkinteger(L, idx); +} + +/* Return bit type. */ +#define BRET(b) lua_pushinteger(L, (lua_Integer)(SBits)(b)); return 1; + +static int bit_tobit(lua_State *L) { BRET(barg(L, 1)) } +static int bit_bnot(lua_State *L) { BRET(~barg(L, 1)) } + +#define BIT_OP(func, opr) \ + static int func(lua_State *L) { int i; UBits b = barg(L, 1); \ + for (i = lua_gettop(L); i > 1; i--) b opr barg(L, i); BRET(b) } +BIT_OP(bit_band, &=) +BIT_OP(bit_bor, |=) +BIT_OP(bit_bxor, ^=) + +#define bshl(b, n) (b << n) +#define bshr(b, n) (b >> n) +#define bsar(b, n) ((SBits)b >> n) +#define brol(b, n) ((b << n) | (b >> (32-n))) +#define bror(b, n) ((b << (32-n)) | (b >> n)) +#define BIT_SH(func, fn) \ + static int func(lua_State *L) { \ + UBits b = barg(L, 1); UBits n = barg(L, 2) & 31; BRET(fn(b, n)) } +BIT_SH(bit_lshift, bshl) +BIT_SH(bit_rshift, bshr) +BIT_SH(bit_arshift, bsar) +BIT_SH(bit_rol, brol) +BIT_SH(bit_ror, bror) + +static int bit_bswap(lua_State *L) +{ + UBits b = barg(L, 1); + b = (b >> 24) | ((b >> 8) & 0xff00) | ((b & 0xff00) << 8) | (b << 24); + BRET(b) +} + +static int bit_tohex(lua_State *L) +{ + UBits b = barg(L, 1); + SBits n = lua_isnone(L, 2) ? 8 : (SBits)barg(L, 2); + const char *hexdigits = "0123456789abcdef"; + char buf[8]; + int i; + if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } + if (n > 8) n = 8; + for (i = (int)n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } + lua_pushlstring(L, buf, (size_t)n); + return 1; +} + +static const struct luaL_Reg bit_funcs[] = { + { "tobit", bit_tobit }, + { "bnot", bit_bnot }, + { "band", bit_band }, + { "bor", bit_bor }, + { "bxor", bit_bxor }, + { "lshift", bit_lshift }, + { "rshift", bit_rshift }, + { "arshift", bit_arshift }, + { "rol", bit_rol }, + { "ror", bit_ror }, + { "bswap", bit_bswap }, + { "tohex", bit_tohex }, + { NULL, NULL } +}; + +/* Signed right-shifts are implementation-defined per C89/C99. +** But the de facto standard are arithmetic right-shifts on two's +** complement CPUs. This behaviour is required here, so test for it. +*/ +#define BAD_SAR (bsar(-8, 2) != (SBits)-2) + +LUALIB_API int luaopen_bit(lua_State *L) +{ + UBits b; + lua_pushnumber(L, (lua_Number)1437217655L); + b = barg(L, -1); + if (b != (UBits)1437217655L || BAD_SAR) { /* Perform a simple self-test. */ + const char *msg = "compiled with incompatible luaconf.h"; + if (BAD_SAR) + msg = "arithmetic right-shift broken"; + luaL_error(L, "bit library self-test failed (%s)", msg); + } + luaL_newlib(L, bit_funcs); + return 1; +} + diff --git a/src/linit.c b/src/linit.c index a2bb808..c2c37ab 100644 --- a/src/linit.c +++ b/src/linit.c @@ -63,6 +63,7 @@ static const luaL_Reg loadedlibs[] = { #if defined(LUA_COMPAT_BITLIB) {LUA_BITLIBNAME, luaopen_bit32}, #endif + {LUAJIT_BITLIBNAME, luaopen_bit }, {NULL, NULL} };