diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0528289f3..60fff3b23 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -22,7 +22,7 @@ jobs:
fail-fast: false
matrix:
target: [linux, darwin, windows]
- architecture: [32, 64, arm64]
+ architecture: [64, arm64]
build_system: [make, cmake, cmake-mingw, cmake-clang-cl, vs2019, makegcc14]
include:
@@ -429,7 +429,7 @@ jobs:
fail-fast: false
matrix:
os: [darwin, linux, windows]
- architecture: [x86_32, x86_64, arm64]
+ architecture: [x86_64, arm64]
include:
- architecture: arm64
test-flags: --skip-hl-jit # not yet supported
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ddd2fd260..5cbb7277a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13)
-set(HL_VERSION_MAJOR 1)
-set(HL_VERSION_MINOR 16)
+set(HL_VERSION_MAJOR 2)
+set(HL_VERSION_MINOR 0)
set(HL_VERSION_PATCH 0)
set(HL_VERSION ${HL_VERSION_MAJOR}.${HL_VERSION_MINOR}.${HL_VERSION_PATCH})
@@ -20,7 +20,8 @@ include(FindPkgConfig)
include(CTest)
set(WITH_VM_DEFAULT ON)
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64" AND (NOT CMAKE_OSX_ARCHITECTURES MATCHES "x86_64"))
+# 32-bit ARM has no JIT backend; aarch64/arm64 uses src/jit_aarch64.c.
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm$|^armv7" AND (NOT CMAKE_OSX_ARCHITECTURES MATCHES "x86_64"))
set(WITH_VM_DEFAULT OFF)
endif()
@@ -225,9 +226,18 @@ else()
endif()
if (WITH_VM)
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
+ set(HL_JIT_BACKEND src/jit_aarch64.c src/jit_aarch64_emit.c)
+ else()
+ set(HL_JIT_BACKEND src/jit_x86_64.c)
+ endif()
add_executable(hl
src/code.c
src/jit.c
+ src/jit_emit.c
+ src/jit_regs.c
+ ${HL_JIT_BACKEND}
+ src/jit_dump.c
src/main.c
src/module.c
src/debugger.c
diff --git a/Makefile b/Makefile
index aded6c272..e1ffa169b 100644
--- a/Makefile
+++ b/Makefile
@@ -41,7 +41,13 @@ STD = src/std/array.o src/std/buffer.o src/std/bytes.o src/std/cast.o src/std/da
src/std/socket.o src/std/string.o src/std/sys.o src/std/types.o src/std/ucs2.o src/std/thread.o src/std/process.o \
src/std/track.o
-HL_OBJ = src/code.o src/jit.o src/main.o src/module.o src/debugger.o src/profile.o
+ifeq ($(ARCH),arm64)
+HL_JIT_BACKEND_OBJ = src/jit_aarch64.o src/jit_aarch64_emit.o
+else
+HL_JIT_BACKEND_OBJ = src/jit_x86_64.o
+endif
+
+HL_OBJ = src/code.o src/jit.o src/jit_emit.o src/jit_regs.o $(HL_JIT_BACKEND_OBJ) src/jit_dump.o src/main.o src/module.o src/debugger.o src/profile.o
FMT_CPPFLAGS = -I include/mikktspace -I include/minimp3
@@ -240,19 +246,12 @@ LIBHL = libhl.$(LIBEXT)
HL = hl$(EXE_SUFFIX)
HLC = hlc$(EXE_SUFFIX)
-all: $(LIBHL) libs
-ifeq ($(ARCH),arm64)
- $(warning HashLink vm is not supported on arm64, skipping)
-else
-all: $(HL)
-endif
+all: $(LIBHL) libs $(HL)
install:
$(UNAME)==Darwin && ${MAKE} uninstall
-ifneq ($(ARCH),arm64)
mkdir -p $(INSTALL_BIN_DIR)
cp $(HL) $(INSTALL_BIN_DIR)
-endif
mkdir -p $(INSTALL_LIB_DIR)
cp *.hdll $(INSTALL_LIB_DIR)
cp $(LIBHL) $(INSTALL_LIB_DIR)
@@ -365,11 +364,7 @@ release_win:
rm -rf $(PACKAGE_NAME)
release_linux release_osx:
-ifeq ($(ARCH),arm64)
- cp $(LIBHL) *.hdll $(PACKAGE_NAME)
-else
cp $(HL) $(LIBHL) *.hdll $(PACKAGE_NAME)
-endif
tar -cvzf $(PACKAGE_NAME).tar.gz $(PACKAGE_NAME)
rm -rf $(PACKAGE_NAME)
diff --git a/hl.vcxproj b/hl.vcxproj
index 88e95b28b..fef4a909e 100644
--- a/hl.vcxproj
+++ b/hl.vcxproj
@@ -45,55 +45,55 @@
Application
true
Unicode
- v142
+ v143
Application
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v120
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v120
+ v143
@@ -186,7 +186,7 @@
EnableAllWarnings
Disabled
WIN32;_DEBUG;_CONSOLE;HL_VTUNE;%(PreprocessorDefinitions)
- /wd4456 /wd4100 /wd4204 /wd4702 /wd4457 %(AdditionalOptions)
+ /wd4456 /wd4100 /wd4204 /wd4702 /wd4457 /we4013 %(AdditionalOptions)
true
stdc11
@@ -196,6 +196,7 @@
libhl.lib;user32.lib;include/vtune/jitprofiling.lib
false
false
+ 4194304
PerMonitorHighDPIAware
@@ -361,14 +362,20 @@
+
+
+
+
+
+
diff --git a/hl.vcxproj.filters b/hl.vcxproj.filters
index f86723996..8a8395f72 100644
--- a/hl.vcxproj.filters
+++ b/hl.vcxproj.filters
@@ -4,14 +4,20 @@
-
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/libhl.vcxproj b/libhl.vcxproj
index 40f1a2eff..1f86fe1a7 100644
--- a/libhl.vcxproj
+++ b/libhl.vcxproj
@@ -36,40 +36,40 @@
DynamicLibrary
true
- v142
+ v143
Unicode
DynamicLibrary
false
- v142
+ v143
true
Unicode
DynamicLibrary
false
- v120
+ v143
true
Unicode
DynamicLibrary
true
- v142
+ v143
Unicode
DynamicLibrary
false
- v142
+ v143
true
Unicode
DynamicLibrary
false
- v120
+ v143
true
Unicode
diff --git a/src/allocator.c b/src/allocator.c
index 47dfc8f41..f9bd63420 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -313,6 +313,8 @@ static void *gc_alloc_fixed( int part, int kind ) {
for(i=0;iblock_size;i++)
if( ptr[i] != 0xDD )
hl_fatal("assert");
+ else
+ ptr[i] = 0xCD;
}
# endif
gc_free_pages[pid] = ph;
@@ -367,6 +369,8 @@ static void *gc_alloc_var( int part, int size, int kind ) {
for(i=0;ibmp ) {
diff --git a/src/data_struct.c b/src/data_struct.c
new file mode 100644
index 000000000..ed417770e
--- /dev/null
+++ b/src/data_struct.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C)2015-2026 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifdef S_TYPE
+
+// is included by data_struct.h
+
+#ifdef S_MAP
+# define S_ARGS S_KEY k, S_VALUE v
+#else
+# define S_ARGS S_VALUE k
+# define S_KEY S_VALUE
+# define keys values
+#endif
+
+#ifndef S_DEFVAL
+# define S_DEFVAL (S_VALUE)0
+#endif
+
+#ifndef S_CMP
+# define S_CMP(a,b) a > b
+#endif
+
+typedef struct {
+ int cur;
+ int max;
+ S_KEY *keys;
+# ifdef S_MAP
+ S_VALUE *values;
+# endif
+} S_TYPE;
+
+typedef S_VALUE S_NAME(_value);
+#ifdef S_MAP
+typedef S_KEY S_NAME(_key);
+#endif
+
+INLINE static void S_NAME(check_size)( hl_alloc *alloc, S_TYPE *st ) {
+ if( st->cur == st->max ) {
+ int n = st->max ? (st->max << 1) : STRUCT_DEF_SIZE;
+ S_KEY *keys = (S_KEY*)hl_malloc(alloc,sizeof(S_KEY) * n);
+ memcpy(keys,st->keys,sizeof(S_KEY) * st->cur);
+ st->keys = keys;
+# ifdef S_MAP
+ S_VALUE *vals = (S_VALUE*)hl_malloc(alloc,sizeof(S_VALUE) * n);
+ memcpy(vals,st->values,sizeof(S_VALUE) * st->cur);
+ st->values = vals;
+# endif
+ st->max = n;
+ }
+}
+
+#ifndef S_SORTED
+
+INLINE static void S_NAME(add_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) {
+ S_NAME(check_size)(alloc,st);
+ st->keys[st->cur] = k;
+# ifdef S_MAP
+ st->values[st->cur] = v;
+# endif
+ st->cur++;
+}
+
+INLINE static bool S_NAME(exists)( S_TYPE st, S_KEY k ) {
+ for(int i=0;icur;i++)
+ if( st->keys[i] == k ) {
+ int pos = i;
+ memmove(st->keys + pos, st->keys + pos + 1, (st->cur - pos - 1) * sizeof(S_KEY));
+# ifdef S_MAP
+ memmove(st->values + pos, st->values + pos + 1, (st->cur - pos - 1) * sizeof(S_VALUE));
+# endif
+ st->cur--;
+ return true;
+ }
+ return false;
+}
+
+INLINE static void S_NAME(remove_range)( S_TYPE *st, int pos, int count ) {
+ memmove(st->keys + pos, st->keys + pos + count, (st->cur - pos - count) * sizeof(S_KEY));
+# ifdef S_MAP
+ memmove(st->values + pos, st->values + pos + count, (st->cur - pos - count) * sizeof(S_VALUE));
+# endif
+ st->cur -= count;
+}
+
+#ifdef S_MAP
+static S_VALUE S_NAME(find)( S_TYPE st, S_KEY k ) {
+ for(int i=0;icur + count > st->max ) {
+ int n = st->max ? (st->max << 1) : STRUCT_DEF_SIZE;
+ while( n < st->cur + count ) n <<= 1;
+ S_KEY *keys = (S_KEY*)hl_malloc(alloc,sizeof(S_KEY) * n);
+ memcpy(keys,st->keys,sizeof(S_KEY) * st->cur);
+ st->keys = keys;
+ st->max = n;
+ }
+ S_VALUE *ptr = st->keys + st->cur;
+ st->cur += count;
+ return ptr;
+}
+#endif
+
+
+#else
+
+INLINE static bool S_NAME(add_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) {
+ int min = 0;
+ int max = st->cur;
+ int pos;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st->keys[mid];
+ if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else return false;
+ }
+ S_NAME(check_size)(alloc,st);
+ pos = (min + max) >> 1;
+ memmove(st->keys + pos + 1, st->keys + pos, (st->cur - pos) * sizeof(S_KEY));
+# ifdef S_MAP
+ memmove(st->values + pos + 1, st->values + pos, (st->cur - pos) * sizeof(S_VALUE));
+# endif
+ st->keys[pos] = k;
+# ifdef S_MAP
+ st->values[pos] = v;
+# endif
+ st->cur++;
+ return true;
+}
+
+#ifdef S_MAP
+INLINE static void S_NAME(replace_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) {
+ int min = 0;
+ int max = st->cur;
+ int pos;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st->keys[mid];
+ if( k2 < k ) min = mid + 1; else if( k2 > k ) max = mid; else {
+ st->values[mid] = v;
+ return;
+ }
+ }
+ S_NAME(check_size)(alloc,st);
+ pos = (min + max) >> 1;
+ memmove(st->keys + pos + 1, st->keys + pos, (st->cur - pos) * sizeof(S_KEY));
+ memmove(st->values + pos + 1, st->values + pos, (st->cur - pos) * sizeof(S_VALUE));
+ st->keys[pos] = k;
+ st->values[pos] = v;
+ st->cur++;
+}
+
+INLINE static bool S_NAME(add_pair_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) {
+ int min = 0;
+ int max = st->cur;
+ int pos;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st->keys[mid];
+ if( k2 < k ) min = mid + 1; else if( k2 > k ) max = mid; else {
+ S_VALUE v2 = st->values[mid];
+ if( S_CMP(v,v2) ) min = mid+1; else if( S_CMP(v2,v) ) max = mid; else return false;
+ }
+ }
+ S_NAME(check_size)(alloc,st);
+ pos = (min + max) >> 1;
+ memmove(st->keys + pos + 1, st->keys + pos, (st->cur - pos) * sizeof(S_KEY));
+ memmove(st->values + pos + 1, st->values + pos, (st->cur - pos) * sizeof(S_VALUE));
+ st->keys[pos] = k;
+ st->values[pos] = v;
+ st->cur++;
+ return true;
+}
+#endif
+
+INLINE static bool S_NAME(exists)( S_TYPE st, S_KEY k ) {
+ int min = 0;
+ int max = st.cur;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st.keys[mid];
+ if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else return true;
+ }
+ return false;
+}
+
+#ifdef S_MAP
+INLINE static S_VALUE S_NAME(find)( S_TYPE st, S_KEY k ) {
+ int min = 0;
+ int max = st.cur;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st.keys[mid];
+ if( k2 < k ) min = mid + 1; else if( k2 > k ) max = mid; else return st.values[mid];
+ }
+ return S_DEFVAL;
+}
+#endif
+
+INLINE static bool S_NAME(remove)( S_TYPE *st, S_KEY k ) {
+ int min = 0;
+ int max = st->cur;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st->keys[mid];
+ if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else {
+ int pos = mid;
+ memmove(st->keys + pos, st->keys + pos + 1, (st->cur - pos - 1) * sizeof(S_KEY));
+# ifdef S_MAP
+ memmove(st->values + pos, st->values + pos + 1, (st->cur - pos - 1) * sizeof(S_VALUE));
+# endif
+ st->cur--;
+ return true;
+ }
+ }
+ return false;
+}
+
+#endif
+
+INLINE static void S_NAME(reset)( S_TYPE *st ) {
+ st->cur = 0;
+}
+
+INLINE static S_VALUE *S_NAME(free)( S_TYPE *st ) {
+ st->cur = 0;
+ st->max = 0;
+ S_VALUE *vals = st->values;
+# ifdef S_MAP
+ st->keys = NULL;
+# endif
+ st->values = NULL;
+ return vals;
+}
+
+INLINE static int S_NAME(count)( S_TYPE st ) {
+ return st.cur;
+}
+
+INLINE static S_VALUE S_NAME(get)( S_TYPE st, int idx ) {
+ return st.values[idx];
+}
+
+INLINE static S_VALUE *S_NAME(addr)( S_TYPE st, int idx ) {
+ return &st.values[idx];
+}
+
+INLINE static S_VALUE S_NAME(first)( S_TYPE st ) {
+ return st.cur == 0 ? S_DEFVAL : st.values[0];
+}
+
+INLINE static bool S_NAME(iter_next)( S_TYPE st, S_VALUE *val, int idx ) {
+ if( idx < st.cur ) *val = st.values[idx];
+ return idx < st.cur;
+}
+
+#ifdef S_MAP
+INLINE static bool S_NAME(iter_next_key)( S_TYPE st, S_KEY *key, int idx ) {
+ if( idx < st.cur ) *key = st.keys[idx];
+ return idx < st.cur;
+}
+#endif
+
+INLINE static bool S_NAME(iter_prev)( S_TYPE st, S_VALUE *val, int idx ) {
+ if( idx >= 0 ) *val = st.values[idx];
+ return idx >= 0;
+}
+
+#undef S_NAME
+#undef S_TYPE
+#undef S_VALUE
+#undef S_KEY
+#undef S_ARGS
+#undef STRUCT_NAME
+#undef S_CMP
+#undef S_DEFVAL
+#undef keys
+
+#endif
diff --git a/src/data_struct.h b/src/data_struct.h
new file mode 100644
index 000000000..5c5b9fe4e
--- /dev/null
+++ b/src/data_struct.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C)2015-2026 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef HL_DATA_STRUCT_H
+#define HL_DATA_STRUCT_H
+
+#include
+
+#if defined(__GNUC__) || defined(__clang__)
+#define INLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define INLINE __forceinline
+#else
+#define INLINE inline
+#endif
+
+#define STRUCT_DEF_SIZE 2
+#define for_iter(name,var,set) name##__value var; for(int __idx=0;name##_iter_next(set,&var,__idx);__idx++)
+#define for_iter_key(name,var,set) name##__key var; for(int __idx=0;name##_iter_next_key(set,&var,__idx);__idx++)
+#define for_iter_back(name,var,set) name##__value var; for(int __idx=(set).cur-1;name##_iter_prev(set,&var,__idx);__idx--)
+
+#define S_TYPE ptr_set
+#define S_NAME(name) ptr_set_##name
+#define S_VALUE void*
+#include "data_struct.c"
+#define ptr_set_add(set,v) ptr_set_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_TYPE int_arr
+#define S_NAME(name) int_arr_##name
+#define S_VALUE int
+#include "data_struct.c"
+#define int_arr_add(set,v) int_arr_add_impl(DEF_ALLOC,&(set),v)
+#define int_arr_reserve(set,v) int_arr_reserve_impl(DEF_ALLOC,&(set),v)
+
+#define S_SORTED
+
+#define S_TYPE int_set
+#define S_NAME(name) int_set_##name
+#define S_VALUE int
+#include "data_struct.c"
+#define int_set_add(set,v) int_set_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_MAP
+
+#define S_TYPE int_map
+#define S_NAME(name) int_map_##name
+#define S_KEY int
+#define S_VALUE int
+#include "data_struct.c"
+#define int_map_add(map,k,v) int_map_add_impl(DEF_ALLOC,&(map),k,v)
+#define int_map_replace(map,k,v) int_map_replace_impl(DEF_ALLOC,&(map),k,v)
+
+#define S_TYPE ptr_map
+#define S_NAME(name) ptr_map_##name
+#define S_KEY int
+#define S_VALUE void*
+#include "data_struct.c"
+#define ptr_map_add(map,k,v) ptr_map_add_impl(DEF_ALLOC,&(map),k,v)
+#define ptr_map_replace(map,k,v) ptr_map_replace_impl(DEF_ALLOC,&(map),k,v)
+
+#undef S_MAP
+#undef S_SORTED
+
+#endif
diff --git a/src/hl.h b/src/hl.h
index 6220eb369..e21be7f92 100644
--- a/src/hl.h
+++ b/src/hl.h
@@ -27,7 +27,7 @@
https://github.com/HaxeFoundation/hashlink/wiki/
**/
-#define HL_VERSION 0x011000
+#define HL_VERSION 0x020000
#if defined(_WIN32)
# define HL_WIN
diff --git a/src/hlmodule.h b/src/hlmodule.h
index b2619f932..adf29f9bd 100644
--- a/src/hlmodule.h
+++ b/src/hlmodule.h
@@ -19,6 +19,9 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
+#ifndef HL_MODULE_H
+#define HL_MODULE_H
+
#include
#include
#include "opcodes.h"
@@ -104,9 +107,6 @@ typedef struct {
bool large;
} hl_debug_infos;
-typedef struct _jit_ctx jit_ctx;
-
-
typedef struct {
hl_code *code;
int *types_hashes;
@@ -124,6 +124,8 @@ typedef struct {
#endif
#endif
+typedef struct _jit_ctx jit_ctx;
+
typedef struct {
hl_code *code;
int codesize;
@@ -138,6 +140,7 @@ typedef struct {
jit_ctx *jit_ctx;
hl_module_context ctx;
#ifdef WIN64_UNWIND_TABLES
+ int unwind_table_size;
PRUNTIME_FUNCTION unwind_table;
#endif
} hl_module;
@@ -165,10 +168,4 @@ hl_type *hl_module_resolve_type( hl_module *m, hl_type *t, bool err );
void hl_profile_setup( int sample_count );
void hl_profile_end();
-jit_ctx *hl_jit_alloc();
-void hl_jit_free( jit_ctx *ctx, h_bool can_reset );
-void hl_jit_reset( jit_ctx *ctx, hl_module *m );
-void hl_jit_init( jit_ctx *ctx, hl_module *m );
-int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f );
-void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous );
-void hl_jit_patch_method( void *old_fun, void **new_fun_table );
+#endif
diff --git a/src/jit.c b/src/jit.c
index b1f82b0fa..ddf9a187d 100644
--- a/src/jit.c
+++ b/src/jit.c
@@ -19,4753 +19,330 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
-#ifdef _MSC_VER
-#pragma warning(disable:4820)
-#endif
-#include
-#include
-#include "hlsystem.h"
-
-#ifdef __arm__
-# error "JIT does not support ARM processors, only x86 and x86-64 are supported, please use HashLink/C native compilation instead"
-#endif
-
-#ifdef HL_DEBUG
-# define JIT_DEBUG
-#endif
-
-typedef enum {
- Eax = 0,
- Ecx = 1,
- Edx = 2,
- Ebx = 3,
- Esp = 4,
- Ebp = 5,
- Esi = 6,
- Edi = 7,
-#ifdef HL_64
- R8 = 8,
- R9 = 9,
- R10 = 10,
- R11 = 11,
- R12 = 12,
- R13 = 13,
- R14 = 14,
- R15 = 15,
-#endif
- _LAST = 0xFF
-} CpuReg;
-
-typedef enum {
- MOV,
- LEA,
- PUSH,
- ADD,
- SUB,
- IMUL, // only overflow flag changes compared to MUL
- DIV,
- IDIV,
- CDQ,
- CDQE,
- POP,
- RET,
- CALL,
- AND,
- OR,
- XOR,
- CMP,
- TEST,
- NOP,
- SHL,
- SHR,
- SAR,
- INC,
- DEC,
- JMP,
- // FPU
- FSTP,
- FSTP32,
- FLD,
- FLD32,
- FLDCW,
- // SSE
- MOVSD,
- MOVSS,
- COMISD,
- COMISS,
- ADDSD,
- SUBSD,
- MULSD,
- DIVSD,
- ADDSS,
- SUBSS,
- MULSS,
- DIVSS,
- XORPD,
- CVTSI2SD,
- CVTSI2SS,
- CVTSD2SI,
- CVTSD2SS,
- CVTSS2SD,
- CVTSS2SI,
- STMXCSR,
- LDMXCSR,
- // 8-16 bits
- MOV8,
- CMP8,
- TEST8,
- PUSH8,
- MOV16,
- CMP16,
- TEST16,
- // prefetchs
- PREFETCHT0,
- PREFETCHT1,
- PREFETCHT2,
- PREFETCHNTA,
- PREFETCHW,
- // --
- _CPU_LAST
-} CpuOp;
-
-#define JAlways 0
-#define JOverflow 0x80
-#define JULt 0x82
-#define JUGte 0x83
-#define JEq 0x84
-#define JNeq 0x85
-#define JULte 0x86
-#define JUGt 0x87
-#define JParity 0x8A
-#define JNParity 0x8B
-#define JSLt 0x8C
-#define JSGte 0x8D
-#define JSLte 0x8E
-#define JSGt 0x8F
-
-#define JCarry JLt
-#define JZero JEq
-#define JNotZero JNeq
-
-#define B(bv) *ctx->buf.b++ = (unsigned char)(bv)
-#define W(wv) *ctx->buf.w++ = wv
-
-#ifdef HL_64
-# define W64(wv) *ctx->buf.w64++ = wv
-#else
-# define W64(wv) W(wv)
-#endif
-
-static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
-
-#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7))
-#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7))
-#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 )
-
-#define AddJump(how,local) { if( (how) == JAlways ) { B(0xE9); } else { B(0x0F); B(how); }; local = BUF_POS(); W(0); }
-#define AddJump_small(how,local) { if( (how) == JAlways ) { B(0xEB); } else B(how - 0x10); local = BUF_POS() | 0x40000000; B(0); }
-#define XJump(how,local) AddJump(how,local)
-#define XJump_small(how,local) AddJump_small(how,local)
+#include
-#define MAX_OP_SIZE 256
+static jit_ctx *current_ctx = NULL;
-#define BUF_POS() ((int)(ctx->buf.b - ctx->startBuf))
-#define RTYPE(r) r->t->kind
-
-#ifdef HL_64
-# define RESERVE_ADDRESS 0x8000000000000000
-#else
-# define RESERVE_ADDRESS 0x80000000
-#endif
-
-#if defined(HL_WIN_CALL) && defined(HL_64)
-# define IS_WINCALL64 1
-#else
-# define IS_WINCALL64 0
-#endif
-
-typedef struct jlist jlist;
-struct jlist {
- int pos;
- int target;
- jlist *next;
-};
-
-typedef struct vreg vreg;
-
-typedef enum {
- RCPU = 0,
- RFPU = 1,
- RSTACK = 2,
- RCONST = 3,
- RADDR = 4,
- RMEM = 5,
- RUNUSED = 6,
- RCPU_CALL = 1 | 8,
- RCPU_8BITS = 1 | 16
-} preg_kind;
-
-typedef struct {
- preg_kind kind;
- int id;
- int lock;
- vreg *holds;
-} preg;
-
-struct vreg {
- int stackPos;
- int size;
- hl_type *t;
- preg *current;
- preg stack;
-};
-
-#define REG_AT(i) (ctx->pregs + (i))
+void hl_jit_error( const char *msg, const char *func, int line ) {
+ printf("*** JIT ERROR %s:%d (%s)****\n", func, line, msg);
+ if( current_ctx ) {
+ jit_ctx *ctx = current_ctx;
+ current_ctx = NULL;
+ hl_emit_dump(ctx);
+ }
+ fflush(stdout);
+}
-#ifdef HL_64
-# define RCPU_COUNT 16
-# define RFPU_COUNT 16
-# ifdef HL_WIN_CALL
-# define CALL_NREGS 4
-# define RCPU_SCRATCH_COUNT 7
-# define RFPU_SCRATCH_COUNT 6
-static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 };
-static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 };
-# else
-# define CALL_NREGS 6 // TODO : XMM6+XMM7 are FPU reg parameters
-# define RCPU_SCRATCH_COUNT 9
-# define RFPU_SCRATCH_COUNT 16
-static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 };
-static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
-# endif
-#else
-# define CALL_NREGS 0
-# define RCPU_COUNT 8
-# define RFPU_COUNT 8
-# define RCPU_SCRATCH_COUNT 3
-# define RFPU_SCRATCH_COUNT 8
-static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx };
-#endif
+void hl_jit_null_field_access( int fhash ) {
+ vbyte *field = hl_field_name(fhash);
+ hl_buffer *b = hl_alloc_buffer();
+ hl_buffer_str(b, USTR("Null access ."));
+ hl_buffer_str(b, (uchar*)field);
+ vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
+ d->v.ptr = hl_buffer_content(b,NULL);
+ hl_throw(d);
+}
-#define XMM(i) ((i) + RCPU_COUNT)
-#define PXMM(i) REG_AT(XMM(i))
-#define REG_IS_FPU(i) ((i) >= RCPU_COUNT)
+void hl_jit_assert() {
+ vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
+ d->v.ptr = USTR("Assert");
+ hl_throw(d);
+}
-#define PEAX REG_AT(Eax)
-#define PESP REG_AT(Esp)
-#define PEBP REG_AT(Ebp)
+void hl_emit_alloc( jit_ctx *jit );
+void hl_emit_free( jit_ctx *jit );
+void hl_emit_function( jit_ctx *jit );
+void hl_emit_final( jit_ctx *jit );
-#define REG_COUNT (RCPU_COUNT + RFPU_COUNT)
+void hl_regs_alloc( jit_ctx *jit );
+void hl_regs_free( jit_ctx *jit );
+void hl_regs_function( jit_ctx *jit );
-#define ID2(a,b) ((a) | ((b)<<8))
-#define R(id) (ctx->vregs + (id))
-#define ASSERT(i) { printf("JIT ERROR %d (jit.c line %d)\n",i,(int)__LINE__); jit_exit(); }
-#define IS_FLOAT(r) ((r)->t->kind == HF64 || (r)->t->kind == HF32)
-#define RLOCK(r) if( (r)->lock < ctx->currentPos ) (r)->lock = ctx->currentPos
-#define RUNLOCK(r) if( (r)->lock == ctx->currentPos ) (r)->lock = 0
+void hl_codegen_alloc( jit_ctx *jit );
+void hl_codegen_init( jit_ctx *jit );
+void hl_codegen_free( jit_ctx *jit );
+void hl_codegen_flush_consts( jit_ctx *jit );
+void hl_codegen_function( jit_ctx *jit );
+void hl_codegen_final( jit_ctx *jit );
-#define BREAK() B(0xCC)
+void hl_jit_init_regs( regs_config *cfg );
-static preg _unused = { RUNUSED, 0, 0, NULL };
-static preg *UNUSED = &_unused;
+jit_ctx *hl_jit_alloc() {
+ jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx));
+ memset(ctx,0,sizeof(jit_ctx));
+ hl_jit_init_regs(&ctx->cfg);
+ hl_alloc_init(&ctx->falloc);
+ hl_emit_alloc(ctx);
+ hl_regs_alloc(ctx);
+ hl_codegen_alloc(ctx);
+ return ctx;
+}
-struct _jit_ctx {
- union {
- unsigned char *b;
- unsigned int *w;
- unsigned long long *w64;
- int *i;
- double *d;
- } buf;
- vreg *vregs;
- preg pregs[REG_COUNT];
- vreg *savedRegs[REG_COUNT];
- int savedLocks[REG_COUNT];
- int *opsPos;
- int maxRegs;
- int maxOps;
- int bufSize;
- int totalRegsSize;
- int functionPos;
- int allocOffset;
- int currentPos;
- int nativeArgsCount;
- unsigned char *startBuf;
- hl_module *m;
- hl_function *f;
- jlist *jumps;
- jlist *calls;
- jlist *switchs;
- hl_alloc falloc; // cleared per-function
- hl_alloc galloc;
- vclosure *closure_list;
- hl_debug_infos *debug;
- int c2hl;
- int hl2c;
-#ifdef JIT_CUSTOM_LONGJUMP
- int longjump;
-#endif
- void *static_functions[8];
- bool static_function_offset;
+void hl_jit_define_function( jit_ctx *ctx, int start, int size ) {
#ifdef WIN64_UNWIND_TABLES
- int unwind_offset;
- int nunwind;
- PRUNTIME_FUNCTION unwind_table;
+ int fid = ctx->fdef_index++;
+ if( fid >= ctx->mod->unwind_table_size ) jit_assert();
+ ctx->mod->unwind_table[fid].BeginAddress = start;
+ ctx->mod->unwind_table[fid].EndAddress = start + size;
#endif
-};
-
-#ifdef WIN64_UNWIND_TABLES
+}
-typedef enum _UNWIND_OP_CODES
-{
- UWOP_PUSH_NONVOL = 0, /* info == register number */
- UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */
- UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */
- UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
- UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */
- UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
- UWOP_SAVE_XMM128 = 8, /* info == XMM reg number, offset in next slot */
- UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
- UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
-} UNWIND_CODE_OPS;
+static bool jit_code_reserve( jit_ctx *ctx, int size ) {
+ int pos = ctx->out_pos;
+ if( pos + size > ctx->out_max ) {
+ int nsize = ctx->out_max ? ctx->out_max * 3 : 4096;
+ while( pos + ctx->code_size > nsize ) nsize *= 3;
+ unsigned char *nout = malloc(nsize);
+ if( !nout ) return false;
+ memcpy(nout,ctx->output,pos);
+ free(ctx->output);
+ ctx->output = nout;
+ ctx->out_max = nsize;
+ }
+ return true;
+}
-void write_uwcode(jit_ctx *ctx, unsigned char offset, UNWIND_CODE_OPS code, unsigned char info)
-{
- B(offset);
- B((code) | (info) << 4);
+static bool jit_code_append( jit_ctx *ctx ) {
+ if( !jit_code_reserve(ctx,ctx->code_size) )
+ return false;
+ int pos = ctx->out_pos;
+ memcpy(ctx->output + pos, ctx->code_instrs, ctx->code_size);
+ ctx->out_pos += ctx->code_size;
+ return true;
}
-void write_unwind_data(jit_ctx *ctx)
-{
- // All generated functions use a frame pointer, so the same unwind info can be used for all of them
+void hl_jit_init( jit_ctx *ctx, hl_module *m ) {
+ ctx->mod = m;
+#ifdef WIN64_UNWIND_TABLES
unsigned char version = 1;
unsigned char flags = 0;
unsigned char CountOfCodes = 2;
unsigned char SizeOfProlog = 4;
unsigned char FrameRegister = 5; // RBP
unsigned char FrameOffset = 0;
+ jit_code_reserve(ctx,64);
+# define B(v) ctx->output[ctx->out_pos++] = v
+# define UW(offs,code,inf) B(offs); B((code) | (inf) << 4)
B((version) | (flags) << 3);
B(SizeOfProlog);
B(CountOfCodes);
B((FrameRegister) | (FrameOffset) << 4);
- write_uwcode(ctx, 4, UWOP_SET_FPREG, 0);
- write_uwcode(ctx, 1, UWOP_PUSH_NONVOL, 5);
-}
-#endif
-
-#define jit_exit() { hl_debug_break(); exit(-1); }
-#define jit_error(msg) _jit_error(ctx,msg,__LINE__)
-
-#ifndef HL_64
-# ifdef HL_DEBUG
-# define error_i64() jit_error("i64-32")
-# else
-void error_i64() {
- printf("The module you are loading is using 64 bit ints that are not supported by the HL32.\nPlease run using HL64 or compile with -D hl-legacy32");
- jit_exit();
-}
-# endif
-#endif
-
-static void _jit_error( jit_ctx *ctx, const char *msg, int line );
-static void on_jit_error( const char *msg, int_val line );
-
-static preg *pmem( preg *r, CpuReg reg, int offset ) {
- r->kind = RMEM;
- r->id = 0 | (reg << 4) | (offset << 8);
- return r;
-}
-
-static preg *pmem2( preg *r, CpuReg reg, CpuReg reg2, int mult, int offset ) {
- r->kind = RMEM;
- r->id = mult | (reg << 4) | (reg2 << 8);
- r->holds = (void*)(int_val)offset;
- return r;
-}
-
-#ifdef HL_64
-static preg *pcodeaddr( preg *r, int offset ) {
- r->kind = RMEM;
- r->id = 15 | (offset << 4);
- return r;
-}
-#endif
-
-static preg *pconst( preg *r, int c ) {
- r->kind = RCONST;
- r->holds = NULL;
- r->id = c;
- return r;
-}
-
-static preg *pconst64( preg *r, int_val c ) {
-#ifdef HL_64
- if( ((int)c) == c )
- return pconst(r,(int)c);
- r->kind = RCONST;
- r->id = 0xC064C064;
- r->holds = (vreg*)c;
- return r;
-#else
- return pconst(r,(int)c);
-#endif
-}
-
-#ifndef HL_64
-// it is not possible to access direct 64 bit address in x86-64
-static preg *paddr( preg *r, void *p ) {
- r->kind = RADDR;
- r->holds = (vreg*)p;
- return r;
-}
-#endif
-
-static void save_regs( jit_ctx *ctx ) {
- int i;
- for(i=0;isavedRegs[i] = ctx->pregs[i].holds;
- ctx->savedLocks[i] = ctx->pregs[i].lock;
- }
-}
-
-static void restore_regs( jit_ctx *ctx ) {
- int i;
- for(i=0;imaxRegs;i++)
- ctx->vregs[i].current = NULL;
- for(i=0;isavedRegs[i];
- preg *p = ctx->pregs + i;
- p->holds = r;
- p->lock = ctx->savedLocks[i];
- if( r ) r->current = p;
- }
-}
-
-static void jit_buf( jit_ctx *ctx ) {
- if( BUF_POS() > ctx->bufSize - MAX_OP_SIZE ) {
- int nsize = ctx->bufSize * 4 / 3;
- unsigned char *nbuf;
- int curpos;
- if( nsize == 0 ) {
- int i;
- for(i=0;im->code->nfunctions;i++)
- nsize += ctx->m->code->functions[i].nops;
- nsize *= 4;
- }
- if( nsize < ctx->bufSize + MAX_OP_SIZE * 4 ) nsize = ctx->bufSize + MAX_OP_SIZE * 4;
- curpos = BUF_POS();
- nbuf = (unsigned char*)malloc(nsize);
- if( nbuf == NULL ) ASSERT(nsize);
- if( ctx->startBuf ) {
- memcpy(nbuf,ctx->startBuf,curpos);
- free(ctx->startBuf);
- }
- ctx->startBuf = nbuf;
- ctx->buf.b = nbuf + curpos;
- ctx->bufSize = nsize;
- }
-}
-
-static const char *KNAMES[] = { "cpu","fpu","stack","const","addr","mem","unused" };
-#define ERRIF(c) if( c ) { printf("%s(%s,%s)\n",f?f->name:"???",KNAMES[a->kind], KNAMES[b->kind]); ASSERT(0); }
-
-typedef struct {
- const char *name; // single operand
- int r_mem; // r32 / r/m32 r32
- int mem_r; // r/m32 / r32 r/m32
- int r_const; // r32 / imm32 imm32
- int r_i8; // r32 / imm8 imm8
- int mem_const; // r/m32 / imm32 N/A
-} opform;
-
-#define FLAG_LONGOP 0x80000000
-#define FLAG_16B 0x40000000
-#define FLAG_8B 0x20000000
-#define FLAG_DUAL 0x10000000
-
-#define RM(op,id) ((op) | (((id)+1)<<8))
-#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15)
-#define SBYTE(op) ((op) << 16)
-#define LONG_OP(op) ((op) | FLAG_LONGOP)
-#define OP16(op) LONG_OP((op) | FLAG_16B)
-#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24))
-
-static opform OP_FORMS[_CPU_LAST] = {
- { "MOV", 0x8B, 0x89, 0xB8, 0, RM(0xC7,0) },
- { "LEA", 0x8D },
- { "PUSH", 0x50, RM(0xFF,6), 0x68, 0x6A },
- { "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) },
- { "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) },
- { "IMUL", LONG_OP(0x0FAF), 0, 0x69 | FLAG_DUAL, 0x6B | FLAG_DUAL },
- { "DIV", RM(0xF7,6), RM(0xF7,6) },
- { "IDIV", RM(0xF7,7), RM(0xF7,7) },
- { "CDQ", 0x99 },
- { "CDQE", 0x98 },
- { "POP", 0x58, RM(0x8F,0) },
- { "RET", 0xC3 },
- { "CALL", RM(0xFF,2), RM(0xFF,2), 0xE8 },
- { "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) },
- { "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) },
- { "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) },
- { "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) },
- { "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) },
- { "NOP", 0x90 },
- { "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) },
- { "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) },
- { "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) },
- { "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) },
- { "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) },
- { "JMP", RM(0xFF,4) },
- // FPU
- { "FSTP", 0, RM(0xDD,3) },
- { "FSTP32", 0, RM(0xD9,3) },
- { "FLD", 0, RM(0xDD,0) },
- { "FLD32", 0, RM(0xD9,0) },
- { "FLDCW", 0, RM(0xD9, 5) },
- // SSE
- { "MOVSD", 0xF20F10, 0xF20F11 },
- { "MOVSS", 0xF30F10, 0xF30F11 },
- { "COMISD", 0x660F2F },
- { "COMISS", LONG_OP(0x0F2F) },
- { "ADDSD", 0xF20F58 },
- { "SUBSD", 0xF20F5C },
- { "MULSD", 0xF20F59 },
- { "DIVSD", 0xF20F5E },
- { "ADDSS", 0xF30F58 },
- { "SUBSS", 0xF30F5C },
- { "MULSS", 0xF30F59 },
- { "DIVSS", 0xF30F5E },
- { "XORPD", 0x660F57 },
- { "CVTSI2SD", 0xF20F2A },
- { "CVTSI2SS", 0xF30F2A },
- { "CVTSD2SI", 0xF20F2D },
- { "CVTSD2SS", 0xF20F5A },
- { "CVTSS2SD", 0xF30F5A },
- { "CVTSS2SI", 0xF30F2D },
- { "STMXCSR", 0, LONG_RM(0x0FAE,3) },
- { "LDMXCSR", 0, LONG_RM(0x0FAE,2) },
- // 8 bits,
- { "MOV8", 0x8A, 0x88, 0, 0xB0, RM(0xC6,0) },
- { "CMP8", 0x3A, 0x38, 0, RM(0x80,7) },
- { "TEST8", 0x84, 0x84, RM(0xF6,0) },
- { "PUSH8", 0, 0, 0x6A | FLAG_8B },
- { "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) },
- { "CMP16", OP16(0x3B), OP16(0x39) },
- { "TEST16", OP16(0x85) },
- // prefetchs
- { "PREFETCHT0", 0, LONG_RM(0x0F18,1) },
- { "PREFETCHT1", 0, LONG_RM(0x0F18,2) },
- { "PREFETCHT2", 0, LONG_RM(0x0F18,3) },
- { "PREFETCHNTA", 0, LONG_RM(0x0F18,0) },
- { "PREFETCHW", 0, LONG_RM(0x0F0D,1) },
-};
-
-#ifdef HL_64
-# define REX() if( r64 ) B(r64 | 0x40)
-#else
-# define REX()
-#endif
-
-#define OP(b) \
- if( (b) & 0xFF0000 ) { \
- B((b)>>16); \
- if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \
- B((b)>>8); \
- B(b); \
- } else { \
- if( (b) & FLAG_16B ) { \
- B(0x66); \
- REX(); \
- } else {\
- REX(); \
- if( (b) & FLAG_LONGOP ) B((b)>>8); \
- }\
- B(b); \
- }
-
-static bool is_reg8( preg *a ) {
- return a->kind == RSTACK || a->kind == RMEM || a->kind == RCONST || (a->kind == RCPU && a->id != Esi && a->id != Edi);
-}
-
-static void op( jit_ctx *ctx, CpuOp o, preg *a, preg *b, bool mode64 ) {
- opform *f = &OP_FORMS[o];
- int r64 = mode64 && (o != PUSH && o != POP && o != CALL && o != PUSH8 && o < PREFETCHT0) ? 8 : 0;
- switch( o ) {
- case CMP8:
- case TEST8:
- case MOV8:
- if( !is_reg8(a) || !is_reg8(b) )
- ASSERT(0);
- break;
- default:
- break;
- }
- switch( ID2(a->kind,b->kind) ) {
- case ID2(RUNUSED,RUNUSED):
- ERRIF(f->r_mem == 0);
- OP(f->r_mem);
- break;
- case ID2(RCPU,RCPU):
- case ID2(RFPU,RFPU):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 4;
- if( b->id > 7 ) r64 |= 1;
- OP(f->r_mem);
- MOD_RM(3,a->id,b->id);
- break;
- case ID2(RCPU,RFPU):
- case ID2(RFPU,RCPU):
- ERRIF( (f->r_mem>>16) == 0 );
- if( a->id > 7 ) r64 |= 4;
- if( b->id > 7 ) r64 |= 1;
- OP(f->r_mem);
- MOD_RM(3,a->id,b->id);
- break;
- case ID2(RCPU,RUNUSED):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 1;
- if( GET_RM(f->r_mem) > 0 ) {
- OP(f->r_mem);
- MOD_RM(3, GET_RM(f->r_mem)-1, a->id);
- } else
- OP(f->r_mem + (a->id&7));
- break;
- case ID2(RSTACK,RUNUSED):
- ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 );
- {
- int stackPos = R(a->id)->stackPos;
- OP(f->mem_r);
- if( IS_SBYTE(stackPos) ) {
- MOD_RM(1,GET_RM(f->mem_r)-1,Ebp);
- B(stackPos);
- } else {
- MOD_RM(2,GET_RM(f->mem_r)-1,Ebp);
- W(stackPos);
- }
- }
- break;
- case ID2(RCPU,RCONST):
- ERRIF( f->r_const == 0 && f->r_i8 == 0 );
- if( a->id > 7 ) r64 |= 1;
- {
- int_val cval = b->holds ? (int_val)b->holds : b->id;
- // short byte form
- if( f->r_i8 && IS_SBYTE(cval) ) {
- if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
- OP(f->r_i8);
- if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_i8)-1,a->id);
- B((int)cval);
- } else if( GET_RM(f->r_const) > 0 || (f->r_const&FLAG_DUAL) ) {
- if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
- OP(f->r_const&0xFF);
- if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_const)-1,a->id);
- if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
- } else {
- ERRIF( f->r_const == 0);
- OP((f->r_const&0xFF) + (a->id&7));
- if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
- }
- }
- break;
- case ID2(RSTACK,RCPU):
- case ID2(RSTACK,RFPU):
- ERRIF( f->mem_r == 0 );
- if( b->id > 7 ) r64 |= 4;
- {
- int stackPos = R(a->id)->stackPos;
- OP(f->mem_r);
- if( IS_SBYTE(stackPos) ) {
- MOD_RM(1,b->id,Ebp);
- B(stackPos);
- } else {
- MOD_RM(2,b->id,Ebp);
- W(stackPos);
- }
- }
- break;
- case ID2(RCPU,RSTACK):
- case ID2(RFPU,RSTACK):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 4;
- {
- int stackPos = R(b->id)->stackPos;
- OP(f->r_mem);
- if( IS_SBYTE(stackPos) ) {
- MOD_RM(1,a->id,Ebp);
- B(stackPos);
- } else {
- MOD_RM(2,a->id,Ebp);
- W(stackPos);
- }
- }
- break;
- case ID2(RCONST,RUNUSED):
- ERRIF( f->r_const == 0 );
- {
- int_val cval = a->holds ? (int_val)a->holds : a->id;
- OP(f->r_const);
- if( f->r_const & FLAG_8B ) B((int)cval); else W((int)cval);
- }
- break;
- case ID2(RMEM,RUNUSED):
- ERRIF( f->mem_r == 0 );
- {
- int mult = a->id & 0xF;
- int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
- CpuReg reg = (a->id >> 4) & 0xF;
- if( mult == 15 ) {
- ERRIF(1);
- } else if( mult == 0 ) {
- if( reg > 7 ) r64 |= 1;
- OP(f->mem_r);
- if( regOrOffs == 0 && (reg&7) != Ebp ) {
- MOD_RM(0,GET_RM(f->mem_r)-1,reg);
- if( (reg&7) == Esp ) B(0x24);
- } else if( IS_SBYTE(regOrOffs) ) {
- MOD_RM(1,GET_RM(f->mem_r)-1,reg);
- if( (reg&7) == Esp ) B(0x24);
- B(regOrOffs);
- } else {
- MOD_RM(2,GET_RM(f->mem_r)-1,reg);
- if( (reg&7) == Esp ) B(0x24);
- W(regOrOffs);
- }
- } else {
- // [eax + ebx * M]
- ERRIF(1);
- }
- }
- break;
- case ID2(RCPU, RMEM):
- case ID2(RFPU, RMEM):
- ERRIF( f->r_mem == 0 );
- {
- int mult = b->id & 0xF;
- int regOrOffs = mult == 15 ? b->id >> 4 : b->id >> 8;
- CpuReg reg = (b->id >> 4) & 0xF;
- if( mult == 15 ) {
- int pos;
- if( a->id > 7 ) r64 |= 4;
- OP(f->r_mem);
- MOD_RM(0,a->id,5);
- if( IS_64 ) {
- // offset wrt current code
- pos = BUF_POS() + 4;
- W(regOrOffs - pos);
- } else {
- ERRIF(1);
- }
- } else if( mult == 0 ) {
- if( a->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- OP(f->r_mem);
- if( regOrOffs == 0 && (reg&7) != Ebp ) {
- MOD_RM(0,a->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- } else if( IS_SBYTE(regOrOffs) ) {
- MOD_RM(1,a->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- B(regOrOffs);
- } else {
- MOD_RM(2,a->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- W(regOrOffs);
- }
- } else {
- int offset = (int)(int_val)b->holds;
- if( a->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- if( regOrOffs > 7 ) r64 |= 2;
- OP(f->r_mem);
- MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,a->id,4);
- SIB(mult,regOrOffs,reg);
- if( offset ) {
- if( IS_SBYTE(offset) ) B(offset); else W(offset);
- }
- }
- }
- break;
-# ifndef HL_64
- case ID2(RFPU,RADDR):
-# endif
- case ID2(RCPU,RADDR):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 4;
- OP(f->r_mem);
- MOD_RM(0,a->id,5);
- if( IS_64 )
- W64((int_val)b->holds);
- else
- W((int)(int_val)b->holds);
- break;
-# ifndef HL_64
- case ID2(RADDR,RFPU):
-# endif
- case ID2(RADDR,RCPU):
- ERRIF( f->mem_r == 0 );
- if( b->id > 7 ) r64 |= 4;
- OP(f->mem_r);
- MOD_RM(0,b->id,5);
- if( IS_64 )
- W64((int_val)a->holds);
- else
- W((int)(int_val)a->holds);
- break;
- case ID2(RMEM, RCPU):
- case ID2(RMEM, RFPU):
- ERRIF( f->mem_r == 0 );
- {
- int mult = a->id & 0xF;
- int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
- CpuReg reg = (a->id >> 4) & 0xF;
- if( mult == 15 ) {
- int pos;
- if( b->id > 7 ) r64 |= 4;
- OP(f->mem_r);
- MOD_RM(0,b->id,5);
- if( IS_64 ) {
- // offset wrt current code
- pos = BUF_POS() + 4;
- W(regOrOffs - pos);
- } else {
- ERRIF(1);
- }
- } else if( mult == 0 ) {
- if( b->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- OP(f->mem_r);
- if( regOrOffs == 0 && (reg&7) != Ebp ) {
- MOD_RM(0,b->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- } else if( IS_SBYTE(regOrOffs) ) {
- MOD_RM(1,b->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- B(regOrOffs);
- } else {
- MOD_RM(2,b->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- W(regOrOffs);
- }
- } else {
- int offset = (int)(int_val)a->holds;
- if( b->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- if( regOrOffs > 7 ) r64 |= 2;
- OP(f->mem_r);
- MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,b->id,4);
- SIB(mult,regOrOffs,reg);
- if( offset ) {
- if( IS_SBYTE(offset) ) B(offset); else W(offset);
- }
- }
- }
- break;
- default:
- ERRIF(1);
- }
- if( ctx->debug && ctx->f && o == CALL ) {
- preg p;
- op(ctx,MOV,pmem(&p,Esp,-HL_WSIZE),PEBP,true); // erase EIP (clean stack report)
- }
-}
-
-static void op32( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
- op(ctx,o,a,b,false);
-}
-
-static void op64( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
-#ifndef HL_64
- op(ctx,o,a,b,false);
-#else
- op(ctx,o,a,b,true);
+ UW(4, 3 /*UWOP_SET_FPREG*/, 0);
+ UW(1, 0 /*UWOP_PUSH_NONVOL*/, 5);
+ while( ctx->out_pos & 15 ) B(0);
#endif
-}
-
-static void patch_jump( jit_ctx *ctx, int p ) {
- if( p == 0 ) return;
- if( p & 0x40000000 ) {
- int d;
- p &= 0x3FFFFFFF;
- d = BUF_POS() - (p + 1);
- if( d < -128 || d >= 128 ) ASSERT(d);
- *(char*)(ctx->startBuf + p) = (char)d;
- } else {
- *(int*)(ctx->startBuf + p) = BUF_POS() - (p + 4);
- }
-}
-
-static void patch_jump_to( jit_ctx *ctx, int p, int target ) {
- if( p == 0 ) return;
- if( p & 0x40000000 ) {
- int d;
- p &= 0x3FFFFFFF;
- d = target - (p + 1);
- if( d < -128 || d >= 128 ) ASSERT(d);
- *(char*)(ctx->startBuf + p) = (char)d;
- } else {
- *(int*)(ctx->startBuf + p) = target - (p + 4);
- }
-}
-
-static int stack_size( hl_type *t ) {
- switch( t->kind ) {
- case HUI8:
- case HUI16:
- case HBOOL:
-# ifdef HL_64
- case HI32:
- case HF32:
-# endif
- return sizeof(int_val);
- case HI64:
- default:
- return hl_type_size(t);
+ hl_codegen_init(ctx);
+ jit_code_append(ctx);
+ if( m->code->hasdebug ) {
+ m->jit_debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions);
+ memset(m->jit_debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions);
}
}
-static int call_reg_index( int reg ) {
-# ifdef HL_64
- int i;
- for(i=0;ifalloc);
+ free(ctx);
}
-static bool is_call_reg( preg *p ) {
-# ifdef HL_64
- int i;
- if( p->kind == RFPU )
- return p->id < CALL_NREGS;
- for(i=0;ikind == RCPU && p->id == CALL_REGS[i] )
- return true;
- return false;
-# else
- return false;
-# endif
+void hl_jit_reset( jit_ctx *ctx, hl_module *m ) {
}
-static preg *alloc_reg( jit_ctx *ctx, preg_kind k ) {
- int i;
- preg *p;
- switch( k ) {
- case RCPU:
- case RCPU_CALL:
- case RCPU_8BITS:
- {
- int off = ctx->allocOffset++;
- const int count = RCPU_SCRATCH_COUNT;
- for(i=0;ipregs + r;
- if( p->lock >= ctx->currentPos ) continue;
- if( k == RCPU_CALL && is_call_reg(p) ) continue;
- if( k == RCPU_8BITS && !is_reg8(p) ) continue;
- if( p->holds == NULL ) {
- RLOCK(p);
- return p;
- }
- }
- for(i=0;ipregs + RCPU_SCRATCH_REGS[(i + off)%count];
- if( p->lock >= ctx->currentPos ) continue;
- if( k == RCPU_CALL && is_call_reg(p) ) continue;
- if( k == RCPU_8BITS && !is_reg8(p) ) continue;
- if( p->holds ) {
- RLOCK(p);
- p->holds->current = NULL;
- p->holds = NULL;
- return p;
- }
- }
- }
- break;
- case RFPU:
- {
- int off = ctx->allocOffset++;
- const int count = RFPU_SCRATCH_COUNT;
- for(i=0;ilock >= ctx->currentPos ) continue;
- if( p->holds == NULL ) {
- RLOCK(p);
- return p;
- }
- }
- for(i=0;ilock >= ctx->currentPos ) continue;
- if( p->holds ) {
- RLOCK(p);
- p->holds->current = NULL;
- p->holds = NULL;
- return p;
- }
- }
+int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
+ hl_free(&ctx->falloc);
+ ctx->mod = m;
+ ctx->fun = f;
+ ctx->reg_instr_count = 0;
+ ctx->code_size = 0;
+ current_ctx = ctx;
+ hl_emit_function(ctx);
+ hl_regs_function(ctx);
+ hl_codegen_function(ctx);
+ int pos = ctx->out_pos;
+ hl_jit_define_function(ctx, pos, ctx->code_size);
+ if( m->jit_debug && ctx->code_pos_map ) {
+ bool compact = ctx->code_size < 0xFFFF;
+ void *debug = malloc((compact ? sizeof(unsigned short) : sizeof(int)) * (f->nops + 1));
+ for(int i=0;i<=f->nops;i++) {
+ int ipos = ctx->emit_pos_map[i];
+ int rpos = ctx->reg_pos_map[ipos];
+ int cpos = ctx->code_pos_map[rpos];
+ if( compact )
+ ((unsigned short*)debug)[i] = (unsigned short)cpos;
+ else
+ ((int*)debug)[i] = cpos;
}
- break;
- default:
- ASSERT(k);
- }
- ASSERT(0); // out of registers !
- return NULL;
-}
-
-static preg *fetch( vreg *r ) {
- if( r->current )
- return r->current;
- return &r->stack;
-}
-
-static void scratch( preg *r ) {
- if( r && r->holds ) {
- r->holds->current = NULL;
- r->holds = NULL;
- r->lock = 0;
+ int fid = (int)(f - m->code->functions);
+ m->jit_debug[fid].start = pos;
+ m->jit_debug[fid].offsets = debug;
+ m->jit_debug[fid].large = !compact;
}
+ if( !jit_code_append(ctx) )
+ return -1;
+ current_ctx = NULL;
+ return pos;
}
-static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size );
-
-static void load( jit_ctx *ctx, preg *r, vreg *v ) {
- preg *from = fetch(v);
- if( from == r || v->size == 0 ) return;
- if( r->holds ) r->holds->current = NULL;
- if( v->current ) {
- v->current->holds = NULL;
- from = r;
- }
- r->holds = v;
- v->current = r;
- copy(ctx,r,from,v->size);
-}
+static void *call_jit_c2hl = hl_jit_assert;
+static void *call_jit_hl2c = hl_jit_assert;
+static int arg_reg_count = 0;
+static int arg_fp_count = 0;
-static preg *alloc_fpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
- preg *p = fetch(r);
- if( p->kind != RFPU ) {
- if( !IS_FLOAT(r) && (IS_64 || r->t->kind != HI64) ) ASSERT(r->t->kind);
- p = alloc_reg(ctx, RFPU);
- if( andLoad )
- load(ctx,p,r);
- else {
- if( r->current )
- r->current->holds = NULL;
- r->current = p;
- p->holds = r;
+static int get_next_reg( hl_type *t, int *rp, int *fp ) {
+ if( t->kind == HF32 || t->kind == HF64 ) {
+ if( *fp < arg_fp_count ) {
+ int r = (*fp)++;
+ if( IS_WINCALL64 ) (*rp)++;
+ return r;
}
- } else
- RLOCK(p);
- return p;
-}
-
-static void reg_bind( vreg *r, preg *p ) {
- if( r->current )
- r->current->holds = NULL;
- r->current = p;
- p->holds = r;
-}
-
-static preg *alloc_cpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
- preg *p = fetch(r);
- if( p->kind != RCPU ) {
-# ifndef HL_64
- if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,andLoad);
- if( r->size > 4 ) ASSERT(r->size);
-# endif
- p = alloc_reg(ctx, RCPU);
- if( andLoad )
- load(ctx,p,r);
- else
- reg_bind(r,p);
- } else
- RLOCK(p);
- return p;
-}
-
-// allocate a register that is not a call parameter
-static preg *alloc_cpu_call( jit_ctx *ctx, vreg *r ) {
- preg *p = fetch(r);
- if( p->kind != RCPU ) {
-# ifndef HL_64
- if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,true);
- if( r->size > 4 ) ASSERT(r->size);
-# endif
- p = alloc_reg(ctx, RCPU_CALL);
- load(ctx,p,r);
- } else if( is_call_reg(p) ) {
- preg *p2 = alloc_reg(ctx, RCPU_CALL);
- op64(ctx,MOV,p2,p);
- scratch(p);
- reg_bind(r,p2);
- return p2;
- } else
- RLOCK(p);
- return p;
-}
-
-static preg *fetch32( jit_ctx *ctx, vreg *r ) {
- if( r->current )
- return r->current;
- // make sure that the register is correctly erased
- if( r->size < 4 ) {
- preg *p = alloc_cpu(ctx, r, true);
- RUNLOCK(p);
- return p;
+ return -1;
}
- return fetch(r);
-}
-
-// make sure higher bits are zeroes
-static preg *alloc_cpu64( jit_ctx *ctx, vreg *r, bool andLoad ) {
-# ifndef HL_64
- return alloc_cpu(ctx,r,andLoad);
-# else
- preg *p = fetch(r);
- if( !andLoad ) ASSERT(0);
- if( p->kind != RCPU ) {
- p = alloc_reg(ctx, RCPU);
- op64(ctx,XOR,p,p);
- load(ctx,p,r);
- } else {
- // remove higher bits
- preg tmp;
- op64(ctx,SHL,p,pconst(&tmp,32));
- op64(ctx,SHR,p,pconst(&tmp,32));
- RLOCK(p);
+ if( *rp < arg_fp_count ) {
+ int r = (*rp)++;
+ if( IS_WINCALL64 ) (*fp)++;
+ return r;
}
- return p;
-# endif
+ return -1;
}
-// make sure the register can be used with 8 bits access
-static preg *alloc_cpu8( jit_ctx *ctx, vreg *r, bool andLoad ) {
- preg *p = fetch(r);
- if( p->kind != RCPU ) {
- p = alloc_reg(ctx, RCPU_8BITS);
- load(ctx,p,r);
- } else if( !is_reg8(p) ) {
- preg *p2 = alloc_reg(ctx, RCPU_8BITS);
- op64(ctx,MOV,p2,p);
- scratch(p);
- reg_bind(r,p2);
- return p2;
- } else
- RLOCK(p);
- return p;
+static void *default_wrapper( hl_type *ft ) {
+ return call_jit_hl2c;
}
-static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ) {
- if( size == 0 || to == from ) return to;
- switch( ID2(to->kind,from->kind) ) {
- case ID2(RMEM,RCPU):
- case ID2(RSTACK,RCPU):
- case ID2(RCPU,RSTACK):
- case ID2(RCPU,RMEM):
- case ID2(RCPU,RCPU):
-# ifndef HL_64
- case ID2(RCPU,RADDR):
- case ID2(RADDR,RCPU):
-# endif
- switch( size ) {
- case 1:
- if( to->kind == RCPU ) {
- op64(ctx,XOR,to,to);
- if( !is_reg8(to) ) {
- preg p;
- op32(ctx,MOV16,to,from);
- op32(ctx,SHL,to,pconst(&p,24));
- op32(ctx,SHR,to,pconst(&p,24));
- break;
- }
- }
- if( !is_reg8(from) ) {
- preg *r = alloc_reg(ctx, RCPU_CALL);
- op32(ctx, MOV, r, from);
- RUNLOCK(r);
- op32(ctx,MOV8,to,r);
- return from;
- }
- op32(ctx,MOV8,to,from);
- break;
- case 2:
- if( to->kind == RCPU )
- op64(ctx,XOR,to,to);
- op32(ctx,MOV16,to,from);
+static void *callback_c2hl( void *f, hl_type *t, void **args, vdynamic *ret ) {
+ int nargs = t->fun->nargs;
+ if( nargs > MAX_ARGS )
+ hl_error("Too many arguments for dynamic call");
+ struct {
+ void *regs[MAX_ARGS];
+ void *stack[MAX_ARGS];
+ } vargs;
+ int rp = 0, fp = 0, sp = MAX_ARGS;
+ for(int i=0;ifun->nargs;i++) {
+ hl_type *at = t->fun->args[i];
+ void *v = args[i];
+ int r = get_next_reg(at,&rp,&fp);
+ int_val iv;
+ switch( at->kind ) {
+ case HBOOL:
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HF32:
+ iv = *(int*)v;
break;
- case 4:
- op32(ctx,MOV,to,from);
+ case HI64:
+ case HGUID:
+ case HF64:
+ iv = *(int_val*)v;
break;
- case 8:
- if( IS_64 ) {
- op64(ctx,MOV,to,from);
- break;
- }
default:
- ASSERT(size);
- }
- return to->kind == RCPU ? to : from;
- case ID2(RFPU,RFPU):
- case ID2(RMEM,RFPU):
- case ID2(RSTACK,RFPU):
- case ID2(RFPU,RMEM):
- case ID2(RFPU,RSTACK):
- switch( size ) {
- case 8:
- op64(ctx,MOVSD,to,from);
+ iv = (int_val)v;
break;
- case 4:
- op32(ctx,MOVSS,to,from);
- break;
- default:
- ASSERT(size);
}
- return to->kind == RFPU ? to : from;
- case ID2(RMEM,RSTACK):
- {
- vreg *rfrom = R(from->id);
- if( IS_FLOAT(rfrom) )
- return copy(ctx,to,alloc_fpu(ctx,rfrom,true),size);
- return copy(ctx,to,alloc_cpu(ctx,rfrom,true),size);
- }
- case ID2(RMEM,RMEM):
- case ID2(RSTACK,RMEM):
- case ID2(RSTACK,RSTACK):
-# ifndef HL_64
- case ID2(RMEM,RADDR):
- case ID2(RSTACK,RADDR):
- case ID2(RADDR,RSTACK):
-# endif
- {
- preg *tmp;
- if( (!IS_64 && size == 8) || (to->kind == RSTACK && IS_FLOAT(R(to->id))) || (from->kind == RSTACK && IS_FLOAT(R(from->id))) ) {
- tmp = alloc_reg(ctx, RFPU);
- op64(ctx,size == 8 ? MOVSD : MOVSS,tmp,from);
- } else {
- tmp = alloc_reg(ctx, RCPU);
- copy(ctx,tmp,from,size);
- }
- return copy(ctx,to,tmp,size);
- }
-# ifdef HL_64
- case ID2(RCPU,RADDR):
- case ID2(RMEM,RADDR):
- case ID2(RSTACK,RADDR):
- {
- preg p;
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx,MOV,tmp,pconst64(&p,(int_val)from->holds));
- return copy(ctx,to,pmem(&p,tmp->id,0),size);
- }
- case ID2(RADDR,RCPU):
- case ID2(RADDR,RMEM):
- case ID2(RADDR,RSTACK):
- {
- preg p;
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx,MOV,tmp,pconst64(&p,(int_val)to->holds));
- return copy(ctx,pmem(&p,tmp->id,0),from,size);
- }
-# endif
+ if( r >= 0 )
+ vargs.regs[r + (at->kind == HF32 || at->kind == HF64 ? arg_reg_count : 0)] = (void*)iv;
+ else
+ vargs.stack[--sp] = (void*)iv;
+ }
+ switch( t->fun->ret->kind ) {
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ ret->v.i = ((int (*)(void *, void *, int))call_jit_c2hl)(f, &vargs, MAX_ARGS - sp);
+ return &ret->v.i;
+ case HI64:
+ case HGUID:
+ ret->v.i64 = ((int64 (*)(void *, void *, int))call_jit_c2hl)(f, &vargs, MAX_ARGS - sp);
+ return &ret->v.i64;
+ case HF32:
+ ret->v.f = ((float (*)(void *, void *, int))call_jit_c2hl)(f, &vargs, MAX_ARGS - sp);
+ return &ret->v.f;
+ case HF64:
+ ret->v.d = ((double (*)(void *, void *, int))call_jit_c2hl)(f, &vargs, MAX_ARGS - sp);
+ return &ret->v.d;
default:
- break;
+ return ((void *(*)(void *, void *, int))call_jit_c2hl)(f, &vargs, MAX_ARGS - sp);
}
- printf("copy(%s,%s)\n",KNAMES[to->kind], KNAMES[from->kind]);
- ASSERT(0);
- return NULL;
}
-static void store( jit_ctx *ctx, vreg *r, preg *v, bool bind ) {
- if( r->current && r->current != v ) {
- r->current->holds = NULL;
- r->current = NULL;
- }
- v = copy(ctx,&r->stack,v,r->size);
- if( IS_FLOAT(r) != (v->kind == RFPU) )
- ASSERT(0);
- if( bind && r->current != v && (v->kind == RCPU || v->kind == RFPU) ) {
- scratch(v);
- r->current = v;
- v->holds = r;
+static vdynamic *callback_hl2c( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *args[MAX_ARGS];
+ int nargs = c->cl.t->fun->nargs;
+ if( nargs > MAX_ARGS )
+ hl_error("Too many arguments for wrapped call");
+ int rp = 0, fp = 0;
+ rp++; // skip fptr in HL64 - was passed as arg0
+ if( IS_WINCALL64 ) fp++;
+ for(int i=0;icl.t->fun->args[i];
+ int creg = get_next_reg(t,&rp,&fp);
+ if( creg < 0 ) {
+ args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t);
+ stack_args += (t->kind == HF64 ? 8 : HL_WSIZE);
+ } else if( hl_is_dynamic(t) ) {
+ args[i] = *(vdynamic**)(regs + creg);
+ } else if( t->kind == HF32 || t->kind == HF64 ) {
+ args[i] = hl_make_dyn(regs + arg_reg_count + creg,&hlt_f64);
+ } else {
+ args[i] = hl_make_dyn(regs + creg,t);
+ }
}
+ return hl_dyn_call(c->wrappedFun,args,nargs);
}
-static void store_result( jit_ctx *ctx, vreg *r ) {
-# ifndef HL_64
- switch( r->t->kind ) {
- case HF64:
- scratch(r->current);
- op64(ctx,FSTP,&r->stack,UNUSED);
- break;
- case HF32:
- scratch(r->current);
- op64(ctx,FSTP32,&r->stack,UNUSED);
- break;
+void *hl_jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *ret = callback_hl2c(c, stack_args, regs);
+ hl_type *tret = c->cl.t->fun->ret;
+ switch( tret->kind ) {
+ case HVOID:
+ return NULL;
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret);
case HI64:
- scratch(r->current);
- error_i64();
- break;
+ case HGUID:
+ return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn);
default:
-# endif
- store(ctx,r,IS_FLOAT(r) ? REG_AT(XMM(0)) : PEAX,true);
-# ifndef HL_64
- break;
- }
-# endif
-}
-
-static void op_mov( jit_ctx *ctx, vreg *to, vreg *from ) {
- preg *r = fetch(from);
-# ifndef HL_64
- if( to->t->kind == HI64 ) {
- error_i64();
- return;
+ return hl_dyn_castp(&ret,&hlt_dyn,tret);
}
-# endif
- if( from->t->kind == HF32 && r->kind != RFPU )
- r = alloc_fpu(ctx,from,true);
- store(ctx, to, r, true);
}
-static void copy_to( jit_ctx *ctx, vreg *to, preg *from ) {
- store(ctx,to,from,true);
+double hl_jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *ret = callback_hl2c(c, stack_args, regs);
+ return hl_dyn_castd(&ret,&hlt_dyn);
}
-static void copy_from( jit_ctx *ctx, preg *to, vreg *from ) {
- copy(ctx,to,fetch(from),from->size);
+void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) {
+ hl_codegen_flush_consts(ctx);
+ jit_code_append(ctx);
+ int size = ctx->out_pos;
+ if( size & 4095 ) size += 4096 - (size&4095);
+ unsigned char *code = (unsigned char*)hl_alloc_executable_memory(size);
+ if( code == NULL ) return NULL;
+ memcpy(code,ctx->output,size);
+ *codesize = size;
+ *debug = m->jit_debug;
+ ctx->final_code = code;
+ hl_emit_final(ctx);
+ hl_codegen_final(ctx);
+ arg_reg_count = ctx->cfg.regs.nargs;
+ arg_fp_count = ctx->cfg.floats.nargs;
+ call_jit_c2hl = ctx->final_code + ctx->code_funs.c2hl;
+ call_jit_hl2c = ctx->final_code + ctx->code_funs.hl2c;
+# ifdef WIN64_UNWIND_TABLES
+ ctx->mod->unwind_table_size = ctx->fdef_index;
+# endif
+ hl_setup.get_wrapper = default_wrapper;
+ hl_setup.static_call = callback_c2hl;
+ return code;
}
-static void store_const( jit_ctx *ctx, vreg *r, int c ) {
- preg p;
- if( c == 0 )
- op(ctx,XOR,alloc_cpu(ctx,r,false),alloc_cpu(ctx,r,false),r->size == 8);
- else if( r->size == 8 )
- op64(ctx,MOV,alloc_cpu(ctx,r,false),pconst64(&p,c));
- else
- op32(ctx,MOV,alloc_cpu(ctx,r,false),pconst(&p,c));
- store(ctx,r,r->current,false);
+void hl_jit_patch_method( void*fun, void**newt ) {
+ jit_assert();
}
-
-static void discard_regs( jit_ctx *ctx, bool native_call ) {
- int i;
- for(i=0;ipregs + RCPU_SCRATCH_REGS[i];
- if( r->holds ) {
- r->holds->current = NULL;
- r->holds = NULL;
- }
- }
- for(i=0;ipregs + XMM(i);
- if( r->holds ) {
- r->holds->current = NULL;
- r->holds = NULL;
- }
- }
-}
-
-static int pad_before_call( jit_ctx *ctx, int size ) {
- int total = size + ctx->totalRegsSize + HL_WSIZE * 2; // EIP+EBP
- if( total & 15 ) {
- int pad = 16 - (total & 15);
- preg p;
- if( pad ) op64(ctx,SUB,PESP,pconst(&p,pad));
- size += pad;
- }
- return size;
-}
-
-static void push_reg( jit_ctx *ctx, vreg *r ) {
- preg p;
- switch( stack_size(r->t) ) {
- case 1:
- op64(ctx,SUB,PESP,pconst(&p,1));
- op32(ctx,MOV8,pmem(&p,Esp,0),alloc_cpu8(ctx,r,true));
- break;
- case 2:
- op64(ctx,SUB,PESP,pconst(&p,2));
- op32(ctx,MOV16,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
- break;
- case 4:
- if( r->size < 4 )
- alloc_cpu(ctx,r,true); // force fetch (higher bits set to 0)
- if( !IS_64 ) {
- if( r->current != NULL && r->current->kind == RFPU ) scratch(r->current);
- op32(ctx,PUSH,fetch(r),UNUSED);
- } else {
- // pseudo push32 (not available)
- op64(ctx,SUB,PESP,pconst(&p,4));
- op32(ctx,MOV,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
- }
- break;
- case 8:
- if( fetch(r)->kind == RFPU ) {
- op64(ctx,SUB,PESP,pconst(&p,8));
- op64(ctx,MOVSD,pmem(&p,Esp,0),fetch(r));
- } else if( IS_64 )
- op64(ctx,PUSH,fetch(r),UNUSED);
- else if( r->stack.kind == RSTACK ) {
- scratch(r->current);
- r->stackPos += 4;
- op32(ctx,PUSH,&r->stack,UNUSED);
- r->stackPos -= 4;
- op32(ctx,PUSH,&r->stack,UNUSED);
- } else
- ASSERT(0);
- break;
- default:
- ASSERT(r->size);
- }
-}
-
-static int begin_native_call( jit_ctx *ctx, int nargs ) {
- ctx->nativeArgsCount = nargs;
- return pad_before_call(ctx, nargs > CALL_NREGS ? (nargs - CALL_NREGS) * HL_WSIZE : 0);
-}
-
-static preg *alloc_native_arg( jit_ctx *ctx ) {
-# ifdef HL_64
- int rid = ctx->nativeArgsCount - 1;
- preg *r = rid < CALL_NREGS ? REG_AT(CALL_REGS[rid]) : alloc_reg(ctx,RCPU_CALL);
- scratch(r);
- return r;
-# else
- return alloc_reg(ctx, RCPU);
-# endif
-}
-
-static void set_native_arg( jit_ctx *ctx, preg *r ) {
- if( r->kind == RSTACK ) {
- vreg *v = ctx->vregs + r->id;
- if( v->size < 4 )
- r = fetch32(ctx, v);
- }
-# ifdef HL_64
- if( r->kind == RFPU ) ASSERT(0);
- int rid = --ctx->nativeArgsCount;
- preg *target;
- if( rid >= CALL_NREGS ) {
- op64(ctx,PUSH,r,UNUSED);
- return;
- }
- target = REG_AT(CALL_REGS[rid]);
- if( target != r ) {
- op64(ctx, MOV, target, r);
- scratch(target);
- }
-# else
- op32(ctx,PUSH,r,UNUSED);
-# endif
-}
-
-static void set_native_arg_fpu( jit_ctx *ctx, preg *r, bool isf32 ) {
-# ifdef HL_64
- if( r->kind == RCPU ) ASSERT(0);
- // can only be used if last argument !!
- ctx->nativeArgsCount--;
- preg *target = REG_AT(XMM(IS_WINCALL64 ? ctx->nativeArgsCount : 0));
- if( target != r ) {
- op64(ctx, isf32 ? MOVSS : MOVSD, target, r);
- scratch(target);
- }
-# else
- op32(ctx,PUSH,r,UNUSED);
-# endif
-}
-
-typedef struct {
- int nextCpu;
- int nextFpu;
- int mapped[REG_COUNT];
-} call_regs;
-
-static int select_call_reg( call_regs *regs, hl_type *t, int id ) {
-# ifndef HL_64
- return -1;
-#else
- bool isFloat = t->kind == HF32 || t->kind == HF64;
-# ifdef HL_WIN_CALL
- int index = regs->nextCpu++;
-# else
- int index = isFloat ? regs->nextFpu++ : regs->nextCpu++;
-# endif
- if( index >= CALL_NREGS )
- return -1;
- int reg = isFloat ? XMM(index) : CALL_REGS[index];
- regs->mapped[reg] = id + 1;
- return reg;
-#endif
-}
-
-static int mapped_reg( call_regs *regs, int id ) {
-# ifndef HL_64
- return -1;
-#else
- int i;
- for(i=0;imapped[r] == id + 1 ) return r;
- r = XMM(i);
- if( regs->mapped[r] == id + 1 ) return r;
- }
- return -1;
-#endif
-}
-
-static int prepare_call_args( jit_ctx *ctx, int count, int *args, vreg *vregs, int extraSize ) {
- int i;
- int size = extraSize, paddedSize;
- call_regs ctmp = {0};
- for(i=0;it, i);
- if( cr >= 0 ) {
- preg *c = REG_AT(cr);
- preg *cur = fetch(r);
- if( cur != c ) {
- copy(ctx,c,cur,r->size);
- scratch(c);
- }
- RLOCK(c);
- continue;
- }
- size += stack_size(r->t);
- }
- paddedSize = pad_before_call(ctx,size);
- for(i=0;i= 0 ) continue;
- push_reg(ctx,r);
- if( r->current ) RUNLOCK(r->current);
- }
- return paddedSize;
-}
-
-static void op_call( jit_ctx *ctx, preg *r, int size ) {
- preg p;
-# ifdef JIT_DEBUG
- if( IS_64 && size >= 0 ) {
- int jchk;
- op32(ctx,TEST,PESP,pconst(&p,15));
- XJump(JZero,jchk);
- BREAK(); // unaligned ESP
- patch_jump(ctx, jchk);
- }
-# endif
- if( IS_WINCALL64 ) {
- // MSVC requires 32bytes of free space here
- op64(ctx,SUB,PESP,pconst(&p,32));
- if( size >= 0 ) size += 32;
- }
- op32(ctx, CALL, r, UNUSED);
- if( size > 0 ) op64(ctx,ADD,PESP,pconst(&p,size));
-}
-
-static void call_native( jit_ctx *ctx, void *nativeFun, int size ) {
- bool isExc = nativeFun == hl_assert || nativeFun == hl_throw || nativeFun == on_jit_error;
- preg p;
- // native function, already resolved
- op64(ctx,MOV,PEAX,pconst64(&p,(int_val)nativeFun));
- op_call(ctx,PEAX, isExc ? -1 : size);
- if( isExc )
- return;
- discard_regs(ctx, true);
-}
-
-static void op_call_fun( jit_ctx *ctx, vreg *dst, int findex, int count, int *args ) {
- int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex];
- bool isNative = fid >= ctx->m->code->nfunctions;
- int size = prepare_call_args(ctx,count,args,ctx->vregs,0);
- preg p;
- if( fid < 0 ) {
- ASSERT(fid);
- } else if( isNative ) {
- call_native(ctx,ctx->m->functions_ptrs[findex],size);
- } else {
- int cpos = BUF_POS() + (IS_WINCALL64 ? 4 : 0);
-# ifdef JIT_DEBUG
- if( IS_64 ) cpos += 13; // ESP CHECK
-# endif
- if( ctx->m->functions_ptrs[findex] ) {
- // already compiled
- op_call(ctx,pconst(&p,(int)(int_val)ctx->m->functions_ptrs[findex] - (cpos + 5)), size);
- } else if( ctx->m->code->functions + fid == ctx->f ) {
- // our current function
- op_call(ctx,pconst(&p, ctx->functionPos - (cpos + 5)), size);
- } else {
- // stage for later
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- j->pos = cpos;
- j->target = findex;
- j->next = ctx->calls;
- ctx->calls = j;
- op_call(ctx,pconst(&p,0), size);
- }
- discard_regs(ctx, false);
- }
- if( dst )
- store_result(ctx,dst);
-}
-
-static void op_enter( jit_ctx *ctx ) {
- preg p;
- op64(ctx, PUSH, PEBP, UNUSED);
- op64(ctx, MOV, PEBP, PESP);
- if( ctx->totalRegsSize ) op64(ctx, SUB, PESP, pconst(&p,ctx->totalRegsSize));
-}
-
-static void op_ret( jit_ctx *ctx, vreg *r ) {
- preg p;
- switch( r->t->kind ) {
- case HF32:
-# ifdef HL_64
- op64(ctx, MOVSS, PXMM(0), fetch(r));
-# else
- op64(ctx,FLD32,&r->stack,UNUSED);
-# endif
- break;
- case HF64:
-# ifdef HL_64
- op64(ctx, MOVSD, PXMM(0), fetch(r));
-# else
- op64(ctx,FLD,&r->stack,UNUSED);
-# endif
- break;
- default:
- if( r->size < 4 && !r->current )
- fetch32(ctx, r);
- if( r->current != PEAX )
- op64(ctx,MOV,PEAX,fetch(r));
- break;
- }
- if( ctx->totalRegsSize ) op64(ctx, ADD, PESP, pconst(&p, ctx->totalRegsSize));
-# ifdef JIT_DEBUG
- {
- int jeq;
- op64(ctx, CMP, PESP, PEBP);
- XJump_small(JEq,jeq);
- jit_error("invalid ESP");
- patch_jump(ctx,jeq);
- }
-# endif
- op64(ctx, POP, PEBP, UNUSED);
- op64(ctx, RET, UNUSED, UNUSED);
-}
-
-static void call_native_consts( jit_ctx *ctx, void *nativeFun, int_val *args, int nargs ) {
- int size = pad_before_call(ctx, IS_64 ? 0 : HL_WSIZE*nargs);
- preg p;
- int i;
-# ifdef HL_64
- for(i=0;i=0;i--)
- op32(ctx, PUSH, pconst64(&p, args[i]), UNUSED);
-# endif
- call_native(ctx, nativeFun, size);
-}
-
-static void on_jit_error( const char *msg, int_val line ) {
- char buf[256];
- int iline = (int)line;
- sprintf(buf,"%s (line %d)",msg,iline);
-#ifdef HL_WIN_DESKTOP
- MessageBoxA(NULL,buf,"JIT ERROR",MB_OK);
-#else
- printf("JIT ERROR : %s\n",buf);
-#endif
- hl_debug_break();
- hl_throw(NULL);
-}
-
-static void _jit_error( jit_ctx *ctx, const char *msg, int line ) {
- int_val args[2] = { (int_val)msg, (int_val)line };
- call_native_consts(ctx,on_jit_error,args,2);
-}
-
-
-static preg *op_binop( jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op bop ) {
- preg *pa = fetch(a), *pb = fetch(b), *out = NULL;
- CpuOp o;
- if( IS_FLOAT(a) ) {
- bool isf32 = a->t->kind == HF32;
- switch( bop ) {
- case OAdd: o = isf32 ? ADDSS : ADDSD; break;
- case OSub: o = isf32 ? SUBSS : SUBSD; break;
- case OMul: o = isf32 ? MULSS : MULSD; break;
- case OSDiv: o = isf32 ? DIVSS : DIVSD; break;
- case OJSLt:
- case OJSGte:
- case OJSLte:
- case OJSGt:
- case OJEq:
- case OJNotEq:
- case OJNotLt:
- case OJNotGte:
- o = isf32 ? COMISS : COMISD;
- break;
- case OSMod:
- {
- int args[] = { a->stack.id, b->stack.id };
- int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
- void *mod_fun;
- if( isf32 ) mod_fun = fmodf; else mod_fun = fmod;
- call_native(ctx,mod_fun,size);
- store_result(ctx,dst);
- return fetch(dst);
- }
- default:
- printf("%s\n", hl_op_name(bop));
- ASSERT(bop);
- }
- } else {
- bool is64 = a->t->kind == HI64;
-# ifndef HL_64
- if( is64 ) {
- error_i64();
- return fetch(a);
- }
-# endif
- switch( bop ) {
- case OAdd: o = ADD; break;
- case OSub: o = SUB; break;
- case OMul: o = IMUL; break;
- case OAnd: o = AND; break;
- case OOr: o = OR; break;
- case OXor: o = XOR; break;
- case OShl:
- case OUShr:
- case OSShr:
- if( !b->current || b->current->kind != RCPU || b->current->id != Ecx ) {
- scratch(REG_AT(Ecx));
- op(ctx,MOV,REG_AT(Ecx),pb,is64);
- RLOCK(REG_AT(Ecx));
- pa = fetch(a);
- } else
- RLOCK(b->current);
- if( pa->kind != RCPU ) {
- pa = alloc_reg(ctx, RCPU);
- op(ctx,MOV,pa,fetch(a), is64);
- }
- op(ctx,bop == OShl ? SHL : (bop == OUShr ? SHR : SAR), pa, UNUSED,is64);
- if( dst ) store(ctx, dst, pa, true);
- return pa;
- case OSDiv:
- case OUDiv:
- case OSMod:
- case OUMod:
- {
- preg *out = bop == OSMod || bop == OUMod ? REG_AT(Edx) : PEAX;
- preg *r = pb;
- preg p;
- int jz, jz1 = 0, jend;
- if( pa->kind == RCPU && pa->id == Eax ) RLOCK(pa);
- // ensure b in CPU reg and not in Eax/Edx (for UI8/UI16)
- if( pb->kind != RCPU || (pb->id == Eax || pb->id == Edx) ) {
- scratch(REG_AT(Ecx));
- scratch(pb);
- load(ctx,REG_AT(Ecx),b);
- r = REG_AT(Ecx);
- }
- // integer div 0 => 0
- op(ctx,TEST,r,r,is64);
- XJump_small(JZero, jz);
- // Prevent MIN/-1 overflow exception
- // OSMod: r = (b == 0 || b == -1) ? 0 : a % b
- // OSDiv: r = (b == 0 || b == -1) ? a * b : a / b
- if( bop == OSMod || bop == OSDiv ) {
- op(ctx, CMP, r, pconst(&p,-1), is64);
- XJump_small(JEq, jz1);
- }
- pa = fetch(a);
- if( pa->kind != RCPU || pa->id != Eax ) {
- scratch(PEAX);
- scratch(pa);
- load(ctx,PEAX,a);
- }
- scratch(REG_AT(Edx));
- scratch(REG_AT(Eax));
- if( bop == OUDiv || bop == OUMod )
- op(ctx, XOR, REG_AT(Edx), REG_AT(Edx), is64);
- else
- op(ctx, CDQ, UNUSED, UNUSED, is64); // sign-extend Eax into Eax:Edx
- op(ctx, bop == OUDiv || bop == OUMod ? DIV : IDIV, r, UNUSED, is64);
- XJump_small(JAlways, jend);
- patch_jump(ctx, jz);
- patch_jump(ctx, jz1);
- if( bop != OSDiv ) {
- op(ctx, XOR, out, out, is64);
- } else {
- load(ctx, out, a);
- op(ctx, IMUL, out, r, is64);
- }
- patch_jump(ctx, jend);
- if( dst ) store(ctx, dst, out, true);
- return out;
- }
- case OJSLt:
- case OJSGte:
- case OJSLte:
- case OJSGt:
- case OJULt:
- case OJUGte:
- case OJEq:
- case OJNotEq:
- switch( a->t->kind ) {
- case HUI8:
- case HBOOL:
- o = CMP8;
- break;
- case HUI16:
- o = CMP16;
- break;
- default:
- o = CMP;
- break;
- }
- break;
- default:
- printf("%s\n", hl_op_name(bop));
- ASSERT(bop);
- }
- }
- switch( RTYPE(a) ) {
- case HI32:
- case HUI8:
- case HUI16:
- case HBOOL:
-# ifndef HL_64
- case HDYNOBJ:
- case HVIRTUAL:
- case HOBJ:
- case HSTRUCT:
- case HFUN:
- case HMETHOD:
- case HBYTES:
- case HNULL:
- case HENUM:
- case HDYN:
- case HTYPE:
- case HABSTRACT:
- case HARRAY:
-# endif
- switch( ID2(pa->kind, pb->kind) ) {
- case ID2(RCPU,RCPU):
- case ID2(RCPU,RSTACK):
- op32(ctx, o, pa, pb);
- scratch(pa);
- out = pa;
- break;
- case ID2(RSTACK,RCPU):
- if( dst == a && o != IMUL ) {
- op32(ctx, o, pa, pb);
- dst = NULL;
- out = pa;
- } else {
- alloc_cpu(ctx,a, true);
- return op_binop(ctx,dst,a,b,bop);
- }
- break;
- case ID2(RSTACK,RSTACK):
- alloc_cpu(ctx, a, true);
- return op_binop(ctx, dst, a, b, bop);
- default:
- printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
- ASSERT(ID2(pa->kind, pb->kind));
- }
- if( dst ) store(ctx, dst, out, true);
- return out;
-# ifdef HL_64
- case HOBJ:
- case HSTRUCT:
- case HDYNOBJ:
- case HVIRTUAL:
- case HFUN:
- case HMETHOD:
- case HBYTES:
- case HNULL:
- case HENUM:
- case HDYN:
- case HTYPE:
- case HABSTRACT:
- case HARRAY:
- case HI64:
- case HGUID:
- switch( ID2(pa->kind, pb->kind) ) {
- case ID2(RCPU,RCPU):
- case ID2(RCPU,RSTACK):
- op64(ctx, o, pa, pb);
- scratch(pa);
- out = pa;
- break;
- case ID2(RSTACK,RCPU):
- if( dst == a && OP_FORMS[o].mem_r ) {
- op64(ctx, o, pa, pb);
- dst = NULL;
- out = pa;
- } else {
- alloc_cpu(ctx,a, true);
- return op_binop(ctx,dst,a,b,bop);
- }
- break;
- case ID2(RSTACK,RSTACK):
- alloc_cpu(ctx, a, true);
- return op_binop(ctx, dst, a, b, bop);
- default:
- printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
- ASSERT(ID2(pa->kind, pb->kind));
- }
- if( dst ) store(ctx, dst, out, true);
- return out;
-# endif
- case HF64:
- case HF32:
- pa = alloc_fpu(ctx, a, true);
- pb = alloc_fpu(ctx, b, true);
- switch( ID2(pa->kind, pb->kind) ) {
- case ID2(RFPU,RFPU):
- op64(ctx,o,pa,pb);
- if( (o == COMISD || o == COMISS) && bop != OJSGt ) {
- int jnotnan;
- XJump_small(JNParity,jnotnan);
- switch( bop ) {
- case OJSLt:
- case OJNotLt:
- {
- preg *r = alloc_reg(ctx,RCPU);
- // set CF=0, ZF=1
- op64(ctx,XOR,r,r);
- RUNLOCK(r);
- break;
- }
- case OJSGte:
- case OJNotGte:
- {
- preg *r = alloc_reg(ctx,RCPU);
- // set ZF=0, CF=1
- op64(ctx,XOR,r,r);
- op64(ctx,CMP,r,PESP);
- RUNLOCK(r);
- break;
- }
- break;
- case OJNotEq:
- case OJEq:
- // set ZF=0, CF=?
- case OJSLte:
- // set ZF=0, CF=0
- op64(ctx,TEST,PESP,PESP);
- break;
- default:
- ASSERT(bop);
- }
- patch_jump(ctx,jnotnan);
- }
- scratch(pa);
- out = pa;
- break;
- default:
- printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
- ASSERT(ID2(pa->kind, pb->kind));
- }
- if( dst ) store(ctx, dst, out, true);
- return out;
- default:
- ASSERT(RTYPE(a));
- }
- return NULL;
-}
-
-static int do_jump( jit_ctx *ctx, hl_op op, bool isFloat ) {
- int j;
- switch( op ) {
- case OJAlways:
- XJump(JAlways,j);
- break;
- case OJSGte:
- XJump(isFloat ? JUGte : JSGte,j);
- break;
- case OJSGt:
- XJump(isFloat ? JUGt : JSGt,j);
- break;
- case OJUGte:
- XJump(JUGte,j);
- break;
- case OJSLt:
- XJump(isFloat ? JULt : JSLt,j);
- break;
- case OJSLte:
- XJump(isFloat ? JULte : JSLte,j);
- break;
- case OJULt:
- XJump(JULt,j);
- break;
- case OJEq:
- XJump(JEq,j);
- break;
- case OJNotEq:
- XJump(JNeq,j);
- break;
- case OJNotLt:
- XJump(JUGte,j);
- break;
- case OJNotGte:
- XJump(JULt,j);
- break;
- default:
- j = 0;
- printf("Unknown JUMP %d\n",op);
- break;
- }
- return j;
-}
-
-static void register_jump( jit_ctx *ctx, int pos, int target ) {
- jlist *j = (jlist*)hl_malloc(&ctx->falloc, sizeof(jlist));
- j->pos = pos;
- j->target = target;
- j->next = ctx->jumps;
- ctx->jumps = j;
- if( target != 0 && ctx->opsPos[target] == 0 )
- ctx->opsPos[target] = -1;
-}
-
-#define HDYN_VALUE 8
-
-static void dyn_value_compare( jit_ctx *ctx, preg *a, preg *b, hl_type *t ) {
- preg p;
- switch( t->kind ) {
- case HUI8:
- case HBOOL:
- op32(ctx,MOV8,a,pmem(&p,a->id,HDYN_VALUE));
- op32(ctx,MOV8,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP8,a,b);
- break;
- case HUI16:
- op32(ctx,MOV16,a,pmem(&p,a->id,HDYN_VALUE));
- op32(ctx,MOV16,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP16,a,b);
- break;
- case HI32:
- op32(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
- op32(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP,a,b);
- break;
- case HF32:
- {
- preg *fa = alloc_reg(ctx, RFPU);
- preg *fb = alloc_reg(ctx, RFPU);
- op64(ctx,MOVSS,fa,pmem(&p,a->id,HDYN_VALUE));
- op64(ctx,MOVSS,fb,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,COMISD,fa,fb);
- }
- break;
- case HF64:
- {
- preg *fa = alloc_reg(ctx, RFPU);
- preg *fb = alloc_reg(ctx, RFPU);
- op64(ctx,MOVSD,fa,pmem(&p,a->id,HDYN_VALUE));
- op64(ctx,MOVSD,fb,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,COMISD,fa,fb);
- }
- break;
- case HI64:
- default:
- // ptr comparison
- op64(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
- op64(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP,a,b);
- break;
- }
-}
-
-static void op_jump( jit_ctx *ctx, vreg *a, vreg *b, hl_opcode *op, int targetPos ) {
- if( a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN ) {
- int args[] = { a->stack.id, b->stack.id };
- int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
- call_native(ctx,hl_dyn_compare,size);
- if( op->op == OJSGt || op->op == OJSGte ) {
- preg p;
- int jinvalid;
- op32(ctx,CMP,PEAX,pconst(&p,hl_invalid_comparison));
- XJump_small(JEq,jinvalid);
- op32(ctx,TEST,PEAX,PEAX);
- register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
- patch_jump(ctx,jinvalid);
- return;
- }
- op32(ctx,TEST,PEAX,PEAX);
- } else switch( a->t->kind ) {
- case HTYPE:
- {
- int args[] = { a->stack.id, b->stack.id };
- int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
- preg p;
- call_native(ctx,hl_same_type,size);
- op64(ctx,CMP8,PEAX,pconst(&p,1));
- }
- break;
- case HNULL:
- {
- preg *pa = hl_type_size(a->t->tparam) == 1 ? alloc_cpu8(ctx,a,true) : alloc_cpu(ctx,a,true);
- preg *pb = hl_type_size(b->t->tparam) == 1 ? alloc_cpu8(ctx,b,true) : alloc_cpu(ctx,b,true);
- if( op->op == OJEq ) {
- // if( a == b || (a && b && a->v == b->v) ) goto
- int ja, jb;
- // if( a != b && (!a || !b || a->v != b->v) ) goto
- op64(ctx,CMP,pa,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- dyn_value_compare(ctx,pa,pb,a->t->tparam);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- scratch(pa);
- scratch(pb);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- } else if( op->op == OJNotEq ) {
- int jeq, jcmp;
- // if( a != b && (!a || !b || a->v != b->v) ) goto
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jeq);
- op64(ctx,TEST,pa,pa);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,TEST,pb,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- dyn_value_compare(ctx,pa,pb,a->t->tparam);
- XJump_small(JZero,jcmp);
- scratch(pa);
- scratch(pb);
- register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
- patch_jump(ctx,jcmp);
- patch_jump(ctx,jeq);
- } else
- ASSERT(op->op);
- return;
- }
- case HVIRTUAL:
- {
- preg p;
- preg *pa = alloc_cpu(ctx,a,true);
- preg *pb = alloc_cpu(ctx,b,true);
- int ja,jb,jav,jbv,jvalue;
- if( b->t->kind == HOBJ ) {
- if( op->op == OJEq ) {
- // if( a ? (b && a->value == b) : (b == NULL) ) goto
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,CMP,pa,pb);
- XJump_small(JAlways,jvalue);
- patch_jump(ctx,ja);
- op64(ctx,TEST,pb,pb);
- patch_jump(ctx,jvalue);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- patch_jump(ctx,jb);
- } else if( op->op == OJNotEq ) {
- // if( a ? (b == NULL || a->value != b) : (b != NULL) ) goto
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,CMP,pa,pb);
- XJump_small(JAlways,jvalue);
- patch_jump(ctx,ja);
- op64(ctx,TEST,pb,pb);
- patch_jump(ctx,jvalue);
- register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
- } else
- ASSERT(op->op);
- scratch(pa);
- return;
- }
- op64(ctx,CMP,pa,pb);
- if( op->op == OJEq ) {
- // if( a == b || (a && b && a->value && b->value && a->value == b->value) ) goto
- register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,jav);
- op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jbv);
- op64(ctx,CMP,pa,pb);
- XJump_small(JNeq,jvalue);
- register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- patch_jump(ctx,jav);
- patch_jump(ctx,jbv);
- patch_jump(ctx,jvalue);
- } else if( op->op == OJNotEq ) {
- int jnext;
- // if( a != b && (!a || !b || !a->value || !b->value || a->value != b->value) ) goto
- XJump_small(JEq,jnext);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,jav);
- op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jbv);
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jvalue);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- patch_jump(ctx,jav);
- patch_jump(ctx,jbv);
- register_jump(ctx,do_jump(ctx,OJAlways, false),targetPos);
- patch_jump(ctx,jnext);
- patch_jump(ctx,jvalue);
- } else
- ASSERT(op->op);
- scratch(pa);
- scratch(pb);
- return;
- }
- break;
- case HOBJ:
- case HSTRUCT:
- if( b->t->kind == HVIRTUAL ) {
- op_jump(ctx,b,a,op,targetPos); // inverse
- return;
- }
- if( hl_get_obj_rt(a->t)->compareFun ) {
- preg *pa = alloc_cpu(ctx,a,true);
- preg *pb = alloc_cpu(ctx,b,true);
- preg p;
- int jeq, ja, jb, jcmp;
- int args[] = { a->stack.id, b->stack.id };
- switch( op->op ) {
- case OJEq:
- // if( a == b || (a && b && cmp(a,b) == 0) ) goto
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jeq);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
- op32(ctx,TEST,PEAX,PEAX);
- XJump_small(JNotZero,jcmp);
- patch_jump(ctx,jeq);
- register_jump(ctx,do_jump(ctx,OJAlways,false),targetPos);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- patch_jump(ctx,jcmp);
- break;
- case OJNotEq:
- // if( a != b && (!a || !b || cmp(a,b) != 0) ) goto
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jeq);
- op64(ctx,TEST,pa,pa);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,TEST,pb,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
-
- op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
- op32(ctx,TEST,PEAX,PEAX);
- XJump_small(JZero,jcmp);
-
- register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
- patch_jump(ctx,jcmp);
- patch_jump(ctx,jeq);
- break;
- default:
- // if( a && b && cmp(a,b) ?? 0 ) goto
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
- op32(ctx,CMP,PEAX,pconst(&p,0));
- register_jump(ctx,do_jump(ctx,op->op,false),targetPos);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- break;
- }
- return;
- }
- // fallthrough
- default:
- // make sure we have valid 8 bits registers
- if( a->size == 1 ) alloc_cpu8(ctx,a,true);
- if( b->size == 1 ) alloc_cpu8(ctx,b,true);
- op_binop(ctx,NULL,a,b,op->op);
- break;
- }
- register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
-}
-
-jit_ctx *hl_jit_alloc() {
- int i;
- jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx));
- if( ctx == NULL ) return NULL;
- memset(ctx,0,sizeof(jit_ctx));
- hl_alloc_init(&ctx->falloc);
- hl_alloc_init(&ctx->galloc);
- for(i=0;iid = i;
- r->kind = RCPU;
- }
- for(i=0;iid = i;
- r->kind = RFPU;
- }
- return ctx;
-}
-
-void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) {
- free(ctx->vregs);
- free(ctx->opsPos);
- free(ctx->startBuf);
- ctx->maxRegs = 0;
- ctx->vregs = NULL;
- ctx->maxOps = 0;
- ctx->opsPos = NULL;
- ctx->startBuf = NULL;
- ctx->bufSize = 0;
- ctx->buf.b = NULL;
- ctx->calls = NULL;
- ctx->switchs = NULL;
- ctx->closure_list = NULL;
- hl_free(&ctx->falloc);
- hl_free(&ctx->galloc);
- if( !can_reset ) free(ctx);
-}
-
-static void jit_nops( jit_ctx *ctx ) {
- while( BUF_POS() & 15 )
- op32(ctx, NOP, UNUSED, UNUSED);
-}
-
-#define MAX_ARGS 16
-
-static void *call_jit_c2hl = NULL;
-static void *call_jit_hl2c = NULL;
-
-static void *callback_c2hl( void *_f, hl_type *t, void **args, vdynamic *ret ) {
- /*
- prepare stack and regs according to prepare_call_args, but by reading runtime type information
- from the function type. The stack and regs will be setup by the trampoline function.
- */
- void **f = (void**)_f;
- unsigned char stack[MAX_ARGS * 8];
- call_regs cregs = {0};
- if( t->fun->nargs > MAX_ARGS )
- hl_error("Too many arguments for dynamic call");
- int i, size = 0, pad = 0, pos = 0;
- for(i=0;ifun->nargs;i++) {
- hl_type *at = t->fun->args[i];
- int creg = select_call_reg(&cregs,at,i);
- if( creg >= 0 )
- continue;
- size += stack_size(at);
- }
- pad = (-size) & 15;
- size += pad;
- pos = 0;
- for(i=0;ifun->nargs;i++) {
- // RTL
- hl_type *at = t->fun->args[i];
- void *v = args[i];
- int creg = mapped_reg(&cregs,i);
- void *store;
- if( creg >= 0 ) {
- if( REG_IS_FPU(creg) ) {
- store = stack + size + CALL_NREGS * HL_WSIZE + (creg - XMM(0)) * sizeof(double);
- } else {
- store = stack + size + call_reg_index(creg) * HL_WSIZE;
- }
- switch( at->kind ) {
- case HBOOL:
- case HUI8:
- *(int_val*)store = *(unsigned char*)v;
- break;
- case HUI16:
- *(int_val*)store = *(unsigned short*)v;
- break;
- case HI32:
- *(int_val*)store = *(int*)v;
- break;
- case HF32:
- *(void**)store = 0;
- *(float*)store = *(float*)v;
- break;
- case HF64:
- *(double*)store = *(double*)v;
- break;
- case HI64:
- case HGUID:
- *(int64*)store = *(int64*)v;
- break;
- default:
- *(void**)store = v;
- break;
- }
- } else {
- int tsize = stack_size(at);
- store = stack + pos;
- pos += tsize;
- switch( at->kind ) {
- case HBOOL:
- case HUI8:
- *(int*)store = *(unsigned char*)v;
- break;
- case HUI16:
- *(int*)store = *(unsigned short*)v;
- break;
- case HI32:
- case HF32:
- *(int*)store = *(int*)v;
- break;
- case HF64:
- *(double*)store = *(double*)v;
- break;
- case HI64:
- case HGUID:
- *(int64*)store = *(int64*)v;
- break;
- default:
- *(void**)store = v;
- break;
- }
- }
- }
- pos += pad;
- pos >>= IS_64 ? 3 : 2;
- switch( t->fun->ret->kind ) {
- case HUI8:
- case HUI16:
- case HI32:
- case HBOOL:
- ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.i;
- case HI64:
- case HGUID:
- ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.i64;
- case HF32:
- ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.f;
- case HF64:
- ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.d;
- default:
- return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- }
-}
-
-static void jit_c2hl( jit_ctx *ctx ) {
- // create the function that will be called by callback_c2hl
- // it will make sure to prepare the stack/regs according to native calling conventions
- int jeq, jloop, jstart;
- preg *fptr, *stack, *stend;
- preg p;
-
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
-
-# ifdef HL_64
-
- fptr = REG_AT(R10);
- stack = PEAX;
- stend = REG_AT(R11);
- op64(ctx, MOV, fptr, REG_AT(CALL_REGS[0]));
- op64(ctx, MOV, stack, REG_AT(CALL_REGS[1]));
- op64(ctx, MOV, stend, REG_AT(CALL_REGS[2]));
-
- // set native call regs
- int i;
- for(i=0;iid,i*HL_WSIZE));
- for(i=0;iid,(i+CALL_NREGS)*HL_WSIZE));
-
-# else
-
- // make sure the stack is aligned on 16 bytes
- // the amount of push we will do afterwards is guaranteed to be a multiple of 16bytes by hl_callback
-# ifdef HL_VCC
- // VCC does not guarantee us an aligned stack...
- op64(ctx,MOV,PEAX,PESP);
- op64(ctx,AND,PEAX,pconst(&p,15));
- op64(ctx,SUB,PESP,PEAX);
-# else
- op64(ctx,SUB,PESP,pconst(&p,8));
-# endif
-
- // mov arguments to regs
- fptr = REG_AT(Eax);
- stack = REG_AT(Edx);
- stend = REG_AT(Ecx);
- op64(ctx,MOV,fptr,pmem(&p,Ebp,HL_WSIZE*2));
- op64(ctx,MOV,stack,pmem(&p,Ebp,HL_WSIZE*3));
- op64(ctx,MOV,stend,pmem(&p,Ebp,HL_WSIZE*4));
-
-# endif
-
- // push stack args
- jstart = BUF_POS();
- op64(ctx,CMP,stack,stend);
- XJump(JEq,jeq);
- op64(ctx,SUB,stack,pconst(&p,HL_WSIZE));
- op64(ctx,PUSH,pmem(&p,stack->id,0),UNUSED);
- XJump(JAlways,jloop);
- patch_jump(ctx,jeq);
- patch_jump_to(ctx, jloop, jstart);
-
- op_call(ctx,fptr,0);
-
- // cleanup and ret
- op64(ctx,MOV,PESP,PEBP);
- op64(ctx,POP,PEBP, UNUSED);
- op64(ctx,RET,UNUSED,UNUSED);
-}
-
-static vdynamic *jit_wrapper_call( vclosure_wrapper *c, char *stack_args, void **regs ) {
- vdynamic *args[MAX_ARGS];
- int i;
- int nargs = c->cl.t->fun->nargs;
- call_regs cregs = {0};
- if( nargs > MAX_ARGS )
- hl_error("Too many arguments for wrapped call");
- cregs.nextCpu++; // skip fptr in HL64 - was passed as arg0
- for(i=0;icl.t->fun->args[i];
- int creg = select_call_reg(&cregs,t,i);
- if( creg < 0 ) {
- args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t);
- stack_args += stack_size(t);
- } else if( hl_is_dynamic(t) ) {
- args[i] = *(vdynamic**)(regs + call_reg_index(creg));
- } else if( t->kind == HF32 || t->kind == HF64 ) {
- args[i] = hl_make_dyn(regs + CALL_NREGS + creg - XMM(0),&hlt_f64);
- } else {
- args[i] = hl_make_dyn(regs + call_reg_index(creg),t);
- }
- }
- return hl_dyn_call(c->wrappedFun,args,nargs);
-}
-
-static void *jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) {
- vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
- hl_type *tret = c->cl.t->fun->ret;
- switch( tret->kind ) {
- case HVOID:
- return NULL;
- case HUI8:
- case HUI16:
- case HI32:
- case HBOOL:
- return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret);
- case HI64:
- case HGUID:
- return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn);
- default:
- return hl_dyn_castp(&ret,&hlt_dyn,tret);
- }
-}
-
-static double jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) {
- vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
- return hl_dyn_castd(&ret,&hlt_dyn);
-}
-
-static void jit_hl2c( jit_ctx *ctx ) {
- // create a function that is called with a vclosure_wrapper* and native args
- // and pack and pass the args to callback_hl2c
- preg p;
- int jfloat1, jfloat2, jexit;
- hl_type_fun *ft = NULL;
- int size;
-# ifdef HL_64
- preg *cl = REG_AT(CALL_REGS[0]);
- preg *tmp = REG_AT(CALL_REGS[1]);
-# else
- preg *cl = REG_AT(Ecx);
- preg *tmp = REG_AT(Edx);
-# endif
-
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
-
-# ifdef HL_64
- // push registers
- int i;
- op64(ctx,SUB,PESP,pconst(&p,CALL_NREGS*8));
- for(i=0;it->fun->ret->kind ) {
- // case HF32: case HF64: return jit_wrapper_d(arg0,&args);
- // default: return jit_wrapper_ptr(arg0,&args);
- // }
- if( !IS_64 )
- op64(ctx,MOV,cl,pmem(&p,Ebp,HL_WSIZE*2)); // load arg0
- op64(ctx,MOV,tmp,pmem(&p,cl->id,0)); // ->t
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE)); // ->fun
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,(int)(int_val)&ft->ret)); // ->ret
- op32(ctx,MOV,tmp,pmem(&p,tmp->id,0)); // -> kind
-
- op32(ctx,CMP,tmp,pconst(&p,HF64));
- XJump_small(JEq,jfloat1);
- op32(ctx,CMP,tmp,pconst(&p,HF32));
- XJump_small(JEq,jfloat2);
-
- // 64 bits : ESP + EIP (+WIN64PAD)
- // 32 bits : ESP + EIP + PARAM0
- int args_pos = IS_64 ? ((IS_WINCALL64 ? 32 : 0) + HL_WSIZE * 2) : (HL_WSIZE*3);
-
- size = begin_native_call(ctx,3);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
- set_native_arg(ctx, tmp);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
- set_native_arg(ctx, tmp);
- set_native_arg(ctx, cl);
- call_native(ctx, jit_wrapper_ptr, size);
- XJump_small(JAlways, jexit);
-
- patch_jump(ctx,jfloat1);
- patch_jump(ctx,jfloat2);
- size = begin_native_call(ctx,3);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
- set_native_arg(ctx, tmp);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
- set_native_arg(ctx, tmp);
- set_native_arg(ctx, cl);
- call_native(ctx, jit_wrapper_d, size);
-
- patch_jump(ctx,jexit);
- op64(ctx,MOV,PESP,PEBP);
- op64(ctx,POP,PEBP, UNUSED);
- op64(ctx,RET,UNUSED,UNUSED);
-}
-
-#ifdef JIT_CUSTOM_LONGJUMP
-// Win64 debug CRT performs a Rtl stack check in debug mode, preventing from
-// using longjump. This in an alternate implementation that follows the native
-// setjump storage.
-//
-// Another more reliable way of handling this would be to use RtlAddFunctionTable
-// but some platform does not have it.
-static void jit_longjump( jit_ctx *ctx ) {
- preg *buf = REG_AT(CALL_REGS[0]);
- preg *ret = REG_AT(CALL_REGS[1]);
- preg p;
- int i;
- op64(ctx,MOV,PEAX,ret); // return value
- op64(ctx,MOV,REG_AT(Edx),pmem(&p,buf->id,0x0));
- op64(ctx,MOV,REG_AT(Ebx),pmem(&p,buf->id,0x8));
- op64(ctx,MOV,REG_AT(Esp),pmem(&p,buf->id,0x10));
- op64(ctx,MOV,REG_AT(Ebp),pmem(&p,buf->id,0x18));
- op64(ctx,MOV,REG_AT(Esi),pmem(&p,buf->id,0x20));
- op64(ctx,MOV,REG_AT(Edi),pmem(&p,buf->id,0x28));
- op64(ctx,MOV,REG_AT(R12),pmem(&p,buf->id,0x30));
- op64(ctx,MOV,REG_AT(R13),pmem(&p,buf->id,0x38));
- op64(ctx,MOV,REG_AT(R14),pmem(&p,buf->id,0x40));
- op64(ctx,MOV,REG_AT(R15),pmem(&p,buf->id,0x48));
- op64(ctx,LDMXCSR,pmem(&p,buf->id,0x58), UNUSED);
- op64(ctx,FLDCW,pmem(&p,buf->id,0x5C), UNUSED);
- for(i=0;i<10;i++)
- op64(ctx,MOVSD,REG_AT(XMM(i+6)),pmem(&p,buf->id,0x60 + i * 16));
- op64(ctx,PUSH,pmem(&p,buf->id,0x50),UNUSED);
- op64(ctx,RET,UNUSED,UNUSED);
-}
-#endif
-
-static void jit_fail( uchar *msg ) {
- if( msg == NULL ) {
- hl_debug_break();
- msg = USTR("assert");
- }
- vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
- d->v.ptr = msg;
- hl_throw(d);
-}
-
-static void jit_null_access( jit_ctx *ctx ) {
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
- int_val arg = (int_val)USTR("Null access");
- call_native_consts(ctx, jit_fail, &arg, 1);
-}
-
-static void jit_null_fail( int fhash ) {
- vbyte *field = hl_field_name(fhash);
- hl_buffer *b = hl_alloc_buffer();
- hl_buffer_str(b, USTR("Null access ."));
- hl_buffer_str(b, (uchar*)field);
- vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
- d->v.ptr = hl_buffer_content(b,NULL);
- hl_throw(d);
-}
-
-static void jit_null_field_access( jit_ctx *ctx ) {
- preg p;
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
- int size = begin_native_call(ctx, 1);
- int args_pos = (IS_WINCALL64 ? 32 : 0) + HL_WSIZE*2;
- set_native_arg(ctx, pmem(&p,Ebp,args_pos));
- call_native(ctx,jit_null_fail,size);
-}
-
-static void jit_assert( jit_ctx *ctx ) {
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
- int_val arg = 0;
- call_native_consts(ctx, jit_fail, &arg, 1);
-}
-
-static int jit_build( jit_ctx *ctx, void (*fbuild)( jit_ctx *) ) {
- int pos;
- jit_buf(ctx);
- jit_nops(ctx);
- pos = BUF_POS();
- fbuild(ctx);
- int endPos = BUF_POS();
- jit_nops(ctx);
-#ifdef WIN64_UNWIND_TABLES
- int fid = ctx->nunwind++;
- ctx->unwind_table[fid].BeginAddress = pos;
- ctx->unwind_table[fid].EndAddress = endPos;
- ctx->unwind_table[fid].UnwindData = ctx->unwind_offset;
-#endif
- return pos;
-}
-
-static void hl_jit_init_module( jit_ctx *ctx, hl_module *m ) {
- int i;
- ctx->m = m;
- if( m->code->hasdebug ) {
- ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions);
- memset(ctx->debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions);
- }
- for(i=0;icode->nfloats;i++) {
- jit_buf(ctx);
- *ctx->buf.d++ = m->code->floats[i];
- }
-#ifdef WIN64_UNWIND_TABLES
- jit_buf(ctx);
- ctx->unwind_offset = BUF_POS();
- write_unwind_data(ctx);
-
- ctx->unwind_table = malloc(sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
- memset(ctx->unwind_table, 0, sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
-#endif
-}
-
-void hl_jit_init( jit_ctx *ctx, hl_module *m ) {
- hl_jit_init_module(ctx,m);
- ctx->c2hl = jit_build(ctx, jit_c2hl);
- ctx->hl2c = jit_build(ctx, jit_hl2c);
-# ifdef JIT_CUSTOM_LONGJUMP
- ctx->longjump = jit_build(ctx, jit_longjump);
-# endif
- ctx->static_functions[0] = (void*)(int_val)jit_build(ctx,jit_null_access);
- ctx->static_functions[1] = (void*)(int_val)jit_build(ctx,jit_assert);
- ctx->static_functions[2] = (void*)(int_val)jit_build(ctx,jit_null_field_access);
-}
-
-void hl_jit_reset( jit_ctx *ctx, hl_module *m ) {
- ctx->debug = NULL;
- hl_jit_init_module(ctx,m);
-}
-
-static void *get_dyncast( hl_type *t ) {
- switch( t->kind ) {
- case HF32:
- return hl_dyn_castf;
- case HF64:
- return hl_dyn_castd;
- case HI64:
- case HGUID:
- return hl_dyn_casti64;
- case HI32:
- case HUI16:
- case HUI8:
- case HBOOL:
- return hl_dyn_casti;
- default:
- return hl_dyn_castp;
- }
-}
-
-static void *get_dynset( hl_type *t ) {
- switch( t->kind ) {
- case HF32:
- return hl_dyn_setf;
- case HF64:
- return hl_dyn_setd;
- case HI64:
- case HGUID:
- return hl_dyn_seti64;
- case HI32:
- case HUI16:
- case HUI8:
- case HBOOL:
- return hl_dyn_seti;
- default:
- return hl_dyn_setp;
- }
-}
-
-static void *get_dynget( hl_type *t ) {
- switch( t->kind ) {
- case HF32:
- return hl_dyn_getf;
- case HF64:
- return hl_dyn_getd;
- case HI64:
- case HGUID:
- return hl_dyn_geti64;
- case HI32:
- case HUI16:
- case HUI8:
- case HBOOL:
- return hl_dyn_geti;
- default:
- return hl_dyn_getp;
- }
-}
-
-static double uint_to_double( unsigned int v ) {
- return v;
-}
-
-static vclosure *alloc_static_closure( jit_ctx *ctx, int fid ) {
- hl_module *m = ctx->m;
- vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure));
- int fidx = m->functions_indexes[fid];
- c->hasValue = 0;
- if( fidx >= m->code->nfunctions ) {
- // native
- c->t = m->code->natives[fidx - m->code->nfunctions].t;
- c->fun = m->functions_ptrs[fid];
- c->value = NULL;
- } else {
- c->t = m->code->functions[fidx].type;
- c->fun = (void*)(int_val)fid;
- c->value = ctx->closure_list;
- ctx->closure_list = c;
- }
- return c;
-}
-
-static void make_dyn_cast( jit_ctx *ctx, vreg *dst, vreg *v ) {
- int size;
- preg p;
- preg *tmp;
- if( v->t->kind == HNULL && v->t->tparam->kind == dst->t->kind ) {
- int jnull, jend;
- preg *out;
- switch( dst->t->kind ) {
- case HUI8:
- case HUI16:
- case HI32:
- case HBOOL:
- case HI64:
- case HGUID:
- tmp = alloc_cpu(ctx, v, true);
- op64(ctx, TEST, tmp, tmp);
- XJump_small(JZero, jnull);
- op64(ctx, MOV, tmp, pmem(&p,tmp->id,8));
- XJump_small(JAlways, jend);
- patch_jump(ctx, jnull);
- op64(ctx, XOR, tmp, tmp);
- patch_jump(ctx, jend);
- store(ctx, dst, tmp, true);
- return;
- case HF32:
- case HF64:
- tmp = alloc_cpu(ctx, v, true);
- out = alloc_fpu(ctx, dst, false);
- op64(ctx, TEST, tmp, tmp);
- XJump_small(JZero, jnull);
- op64(ctx, dst->t->kind == HF32 ? MOVSS : MOVSD, out, pmem(&p,tmp->id,8));
- XJump_small(JAlways, jend);
- patch_jump(ctx, jnull);
- op64(ctx, XORPD, out, out);
- patch_jump(ctx, jend);
- store(ctx, dst, out, true);
- return;
- default:
- break;
- }
- }
- switch( dst->t->kind ) {
- case HF32:
- case HF64:
- case HI64:
- case HGUID:
- size = begin_native_call(ctx, 2);
- set_native_arg(ctx, pconst64(&p,(int_val)v->t));
- break;
- default:
- size = begin_native_call(ctx, 3);
- set_native_arg(ctx, pconst64(&p,(int_val)dst->t));
- set_native_arg(ctx, pconst64(&p,(int_val)v->t));
- break;
- }
- tmp = alloc_native_arg(ctx);
- op64(ctx,MOV,tmp,REG_AT(Ebp));
- if( v->stackPos >= 0 )
- op64(ctx,ADD,tmp,pconst(&p,v->stackPos));
- else
- op64(ctx,SUB,tmp,pconst(&p,-v->stackPos));
- set_native_arg(ctx,tmp);
- call_native(ctx,get_dyncast(dst->t),size);
- store_result(ctx, dst);
-}
-
-int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
- int i, size = 0, opCount;
- int codePos = BUF_POS();
- int nargs = f->type->fun->nargs;
- unsigned short *debug16 = NULL;
- int *debug32 = NULL;
- call_regs cregs = {0};
- hl_thread_info *tinf = NULL;
- preg p;
- ctx->f = f;
- ctx->allocOffset = 0;
- if( f->nregs > ctx->maxRegs ) {
- free(ctx->vregs);
- ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1));
- if( ctx->vregs == NULL ) {
- ctx->maxRegs = 0;
- return -1;
- }
- ctx->maxRegs = f->nregs;
- }
- if( f->nops > ctx->maxOps ) {
- free(ctx->opsPos);
- ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1));
- if( ctx->opsPos == NULL ) {
- ctx->maxOps = 0;
- return -1;
- }
- ctx->maxOps = f->nops;
- }
- memset(ctx->opsPos,0,(f->nops+1)*sizeof(int));
- for(i=0;inregs;i++) {
- vreg *r = R(i);
- r->t = f->regs[i];
- r->size = hl_type_size(r->t);
- r->current = NULL;
- r->stack.holds = NULL;
- r->stack.id = i;
- r->stack.kind = RSTACK;
- }
- size = 0;
- int argsSize = 0;
- for(i=0;it,i);
- if( creg < 0 || IS_WINCALL64 ) {
- // use existing stack storage
- r->stackPos = argsSize + HL_WSIZE * 2;
- argsSize += stack_size(r->t);
- } else {
- // make room in local vars
- size += r->size;
- size += hl_pad_size(size,r->t);
- r->stackPos = -size;
- }
- }
- for(i=nargs;inregs;i++) {
- vreg *r = R(i);
- size += r->size;
- size += hl_pad_size(size,r->t); // align local vars
- r->stackPos = -size;
- }
-# ifdef HL_64
- size += (-size) & 15; // align on 16 bytes
-# else
- size += hl_pad_size(size,&hlt_dyn); // align on word size
-# endif
- ctx->totalRegsSize = size;
- jit_buf(ctx);
- ctx->functionPos = BUF_POS();
- // make sure currentPos is > 0 before any reg allocations happen
- // otherwise `alloc_reg` thinks that all registers are locked
- ctx->currentPos = 1;
- op_enter(ctx);
-# ifdef HL_64
- {
- // store in local var
- for(i=0;isize);
- p->holds = r;
- r->current = p;
- }
- }
-# endif
- if( ctx->m->code->hasdebug ) {
- debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1));
- debug16[0] = (unsigned short)(BUF_POS() - codePos);
- }
- ctx->opsPos[0] = BUF_POS();
-
- for(opCount=0;opCountnops;opCount++) {
- int jump;
- hl_opcode *o = f->ops + opCount;
- vreg *dst = R(o->p1);
- vreg *ra = R(o->p2);
- vreg *rb = R(o->p3);
- ctx->currentPos = opCount + 1;
- jit_buf(ctx);
-# ifdef JIT_DEBUG
- if( opCount == 0 || f->ops[opCount-1].op != OAsm ) {
- int uid = opCount + (f->findex<<16);
- op32(ctx, PUSH, pconst(&p,uid), UNUSED);
- op64(ctx, ADD, PESP, pconst(&p,HL_WSIZE));
- }
-# endif
- // emit code
- switch( o->op ) {
- case OMov:
- case OUnsafeCast:
- op_mov(ctx, dst, ra);
- break;
- case OInt:
- store_const(ctx, dst, m->code->ints[o->p2]);
- break;
- case OBool:
- store_const(ctx, dst, o->p2);
- break;
- case OGetGlobal:
- {
- void *addr = m->globals_data + m->globals_indexes[o->p2];
-# ifdef HL_64
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
- copy_to(ctx, dst, pmem(&p,tmp->id,0));
-# else
- copy_to(ctx, dst, paddr(&p,addr));
-# endif
- }
- break;
- case OSetGlobal:
- {
- void *addr = m->globals_data + m->globals_indexes[o->p1];
-# ifdef HL_64
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
- copy_from(ctx, pmem(&p,tmp->id,0), ra);
-# else
- copy_from(ctx, paddr(&p,addr), ra);
-# endif
- }
- break;
- case OCall3:
- {
- int args[3] = { o->p3, o->extra[0], o->extra[1] };
- op_call_fun(ctx, dst, o->p2, 3, args);
- }
- break;
- case OCall4:
- {
- int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] };
- op_call_fun(ctx, dst, o->p2, 4, args);
- }
- break;
- case OCallN:
- op_call_fun(ctx, dst, o->p2, o->p3, o->extra);
- break;
- case OCall0:
- op_call_fun(ctx, dst, o->p2, 0, NULL);
- break;
- case OCall1:
- op_call_fun(ctx, dst, o->p2, 1, &o->p3);
- break;
- case OCall2:
- {
- int args[2] = { o->p3, (int)(int_val)o->extra };
- op_call_fun(ctx, dst, o->p2, 2, args);
- }
- break;
- case OSub:
- case OAdd:
- case OMul:
- case OSDiv:
- case OUDiv:
- case OShl:
- case OSShr:
- case OUShr:
- case OAnd:
- case OOr:
- case OXor:
- case OSMod:
- case OUMod:
- op_binop(ctx, dst, ra, rb, o->op);
- break;
- case ONeg:
- {
- if( IS_FLOAT(ra) ) {
- preg *pa = alloc_reg(ctx,RFPU);
- preg *pb = alloc_fpu(ctx,ra,true);
- op64(ctx,XORPD,pa,pa);
- op64(ctx,ra->t->kind == HF32 ? SUBSS : SUBSD,pa,pb);
- store(ctx,dst,pa,true);
- } else if( ra->t->kind == HI64 ) {
-# ifdef HL_64
- preg *pa = alloc_reg(ctx,RCPU);
- preg *pb = alloc_cpu(ctx,ra,true);
- op64(ctx,XOR,pa,pa);
- op64(ctx,SUB,pa,pb);
- store(ctx,dst,pa,true);
-# else
- error_i64();
-# endif
- } else {
- preg *pa = alloc_reg(ctx,RCPU);
- preg *pb = alloc_cpu(ctx,ra,true);
- op32(ctx,XOR,pa,pa);
- op32(ctx,SUB,pa,pb);
- store(ctx,dst,pa,true);
- }
- }
- break;
- case ONot:
- {
- preg *v = alloc_cpu(ctx,ra,true);
- op32(ctx,XOR,v,pconst(&p,1));
- store(ctx,dst,v,true);
- }
- break;
- case OJFalse:
- case OJTrue:
- case OJNotNull:
- case OJNull:
- {
- preg *r = dst->t->kind == HBOOL ? alloc_cpu8(ctx, dst, true) : alloc_cpu(ctx, dst, true);
- op64(ctx, dst->t->kind == HBOOL ? TEST8 : TEST, r, r);
- XJump( o->op == OJFalse || o->op == OJNull ? JZero : JNotZero,jump);
- register_jump(ctx,jump,(opCount + 1) + o->p2);
- }
- break;
- case OJEq:
- case OJNotEq:
- case OJSLt:
- case OJSGte:
- case OJSLte:
- case OJSGt:
- case OJULt:
- case OJUGte:
- case OJNotLt:
- case OJNotGte:
- op_jump(ctx,dst,ra,o,(opCount + 1) + o->p3);
- break;
- case OJAlways:
- jump = do_jump(ctx,o->op,false);
- register_jump(ctx,jump,(opCount + 1) + o->p1);
- break;
- case OToDyn:
- if( ra->t->kind == HBOOL ) {
- int size = begin_native_call(ctx, 1);
- set_native_arg(ctx, fetch(ra));
- call_native(ctx, hl_alloc_dynbool, size);
- store(ctx, dst, PEAX, true);
- } else {
- int_val rt = (int_val)ra->t;
- int jskip = 0;
- if( hl_is_ptr(ra->t) ) {
- int jnz;
- preg *a = alloc_cpu(ctx,ra,true);
- op64(ctx,TEST,a,a);
- XJump_small(JNotZero,jnz);
- op64(ctx,XOR,PEAX,PEAX); // will replace the result of alloc_dynamic at jump land
- XJump_small(JAlways,jskip);
- patch_jump(ctx,jnz);
- }
- call_native_consts(ctx, hl_alloc_dynamic, &rt, 1);
- // copy value to dynamic
- if( (IS_FLOAT(ra) || ra->size == 8) && !IS_64 ) {
- preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
- op64(ctx,MOV,tmp,&ra->stack);
- op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
- if( ra->t->kind == HF64 ) {
- ra->stackPos += 4;
- op64(ctx,MOV,tmp,&ra->stack);
- op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE+4),tmp);
- ra->stackPos -= 4;
- }
- } else {
- preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
- copy_from(ctx,tmp,ra);
- op64(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
- }
- if( hl_is_ptr(ra->t) ) patch_jump(ctx,jskip);
- store(ctx, dst, PEAX, true);
- }
- break;
- case OToSFloat:
- if( ra == dst ) break;
- if (ra->t->kind == HI32 || ra->t->kind == HUI16 || ra->t->kind == HUI8) {
- preg* r = alloc_cpu(ctx, ra, true);
- preg* w = alloc_fpu(ctx, dst, false);
- op32(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
- store(ctx, dst, w, true);
- } else if (ra->t->kind == HI64 ) {
- preg* r = alloc_cpu(ctx, ra, true);
- preg* w = alloc_fpu(ctx, dst, false);
- op64(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
- store(ctx, dst, w, true);
- } else if( ra->t->kind == HF64 && dst->t->kind == HF32 ) {
- preg *r = alloc_fpu(ctx,ra,true);
- preg *w = alloc_fpu(ctx,dst,false);
- op32(ctx,CVTSD2SS,w,r);
- store(ctx, dst, w, true);
- } else if( ra->t->kind == HF32 && dst->t->kind == HF64 ) {
- preg *r = alloc_fpu(ctx,ra,true);
- preg *w = alloc_fpu(ctx,dst,false);
- op32(ctx,CVTSS2SD,w,r);
- store(ctx, dst, w, true);
- } else
- ASSERT(0);
- break;
- case OToUFloat:
- {
- int size;
- size = prepare_call_args(ctx,1,&o->p2,ctx->vregs,0);
- call_native(ctx,uint_to_double,size);
- store_result(ctx,dst);
- }
- break;
- case OToInt:
- if( ra == dst ) break;
- if( ra->t->kind == HF64 ) {
- preg *r = alloc_fpu(ctx,ra,true);
- preg *w = alloc_cpu(ctx,dst,false);
- preg *tmp = alloc_reg(ctx,RCPU);
- op32(ctx,STMXCSR,pmem(&p,Esp,-4),UNUSED);
- op32(ctx,MOV,tmp,&p);
- op32(ctx,OR,tmp,pconst(&p,0x6000)); // set round towards 0
- op32(ctx,MOV,pmem(&p,Esp,-8),tmp);
- op32(ctx,LDMXCSR,&p,UNUSED);
- op32(ctx,CVTSD2SI,w,r);
- op32(ctx,LDMXCSR,pmem(&p,Esp,-4),UNUSED);
- store(ctx, dst, w, true);
- } else if (ra->t->kind == HF32) {
- preg *r = alloc_fpu(ctx, ra, true);
- preg *w = alloc_cpu(ctx, dst, false);
- preg *tmp = alloc_reg(ctx, RCPU);
- op32(ctx, STMXCSR, pmem(&p, Esp, -4), UNUSED);
- op32(ctx, MOV, tmp, &p);
- op32(ctx, OR, tmp, pconst(&p, 0x6000)); // set round towards 0
- op32(ctx, MOV, pmem(&p, Esp, -8), tmp);
- op32(ctx, LDMXCSR, &p, UNUSED);
- op32(ctx, CVTSS2SI, w, r);
- op32(ctx, LDMXCSR, pmem(&p, Esp, -4), UNUSED);
- store(ctx, dst, w, true);
- } else if( (dst->t->kind == HI64 || dst->t->kind == HGUID) && ra->t->kind == HI32 ) {
- if( ra->current != PEAX ) {
- op32(ctx, MOV, PEAX, fetch(ra));
- scratch(PEAX);
- }
-# ifdef HL_64
- op64(ctx, CDQE, UNUSED, UNUSED); // sign-extend Eax into Rax
- store(ctx, dst, PEAX, true);
-# else
- op32(ctx, CDQ, UNUSED, UNUSED); // sign-extend Eax into Eax:Edx
- scratch(REG_AT(Edx));
- op32(ctx, MOV, fetch(dst), PEAX);
- dst->stackPos += 4;
- op32(ctx, MOV, fetch(dst), REG_AT(Edx));
- dst->stackPos -= 4;
- } else if( dst->t->kind == HI32 && ra->t->kind == HI64 ) {
- error_i64();
-# endif
- } else {
- preg *r = alloc_cpu(ctx,dst,false);
- copy_from(ctx, r, ra);
- store(ctx, dst, r, true);
- }
- break;
- case ORet:
- op_ret(ctx, dst);
- break;
- case OIncr:
- {
- if( IS_FLOAT(dst) ) {
- ASSERT(0);
- } else {
- preg *v = fetch32(ctx,dst);
- op32(ctx,INC,v,UNUSED);
- if( v->kind != RSTACK ) store(ctx, dst, v, false);
- }
- }
- break;
- case ODecr:
- {
- if( IS_FLOAT(dst) ) {
- ASSERT(0);
- } else {
- preg *v = fetch32(ctx,dst);
- op32(ctx,DEC,v,UNUSED);
- if( v->kind != RSTACK ) store(ctx, dst, v, false);
- }
- }
- break;
- case OFloat:
- {
- if( m->code->floats[o->p2] == 0 ) {
- preg *f = alloc_fpu(ctx,dst,false);
- op64(ctx,XORPD,f,f);
- } else switch( dst->t->kind ) {
- case HF64:
- case HF32:
-# ifdef HL_64
- op64(ctx,dst->t->kind == HF32 ? CVTSD2SS : MOVSD,alloc_fpu(ctx,dst,false),pcodeaddr(&p,o->p2 * 8));
-# else
- op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),paddr(&p,m->code->floats + o->p2));
-# endif
- break;
- default:
- ASSERT(dst->t->kind);
- }
- store(ctx,dst,dst->current,false);
- }
- break;
- case OString:
- op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)hl_get_ustring(m->code,o->p2)));
- store(ctx,dst,dst->current,false);
- break;
- case OBytes:
- {
- char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2];
- op64(ctx,MOV,alloc_cpu(ctx,dst,false),pconst64(&p,(int_val)b));
- store(ctx,dst,dst->current,false);
- }
- break;
- case ONull:
- {
- op64(ctx,XOR,alloc_cpu(ctx, dst, false),alloc_cpu(ctx, dst, false));
- store(ctx,dst,dst->current,false);
- }
- break;
- case ONew:
- {
- int_val args[] = { (int_val)dst->t };
- void *allocFun;
- int nargs = 1;
- switch( dst->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- allocFun = hl_alloc_obj;
- break;
- case HDYNOBJ:
- allocFun = hl_alloc_dynobj;
- nargs = 0;
- break;
- case HVIRTUAL:
- allocFun = hl_alloc_virtual;
- break;
- default:
- ASSERT(dst->t->kind);
- }
- call_native_consts(ctx, allocFun, args, nargs);
- store(ctx, dst, PEAX, true);
- }
- break;
- case OInstanceClosure:
- {
- preg *r = alloc_cpu(ctx, rb, true);
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- int size = begin_native_call(ctx,3);
- set_native_arg(ctx,r);
-
- j->pos = BUF_POS();
- j->target = o->p2;
- j->next = ctx->calls;
- ctx->calls = j;
-
- set_native_arg(ctx,pconst64(&p,RESERVE_ADDRESS));
- set_native_arg(ctx,pconst64(&p,(int_val)m->code->functions[m->functions_indexes[o->p2]].type));
- call_native(ctx,hl_alloc_closure_ptr,size);
- store(ctx,dst,PEAX,true);
- }
- break;
- case OVirtualClosure:
- {
- int size, i;
- preg *r = alloc_cpu_call(ctx, ra);
- hl_type *t = NULL;
- hl_type *ot = ra->t;
- while( t == NULL ) {
- for(i=0;iobj->nproto;i++) {
- hl_obj_proto *pp = ot->obj->proto + i;
- if( pp->pindex == o->p3 ) {
- t = m->code->functions[m->functions_indexes[pp->findex]].type;
- break;
- }
- }
- ot = ot->obj->super;
- }
- size = begin_native_call(ctx,3);
- set_native_arg(ctx,r);
- // read r->type->vobj_proto[i] for function address
- op64(ctx,MOV,r,pmem(&p,r->id,0));
- op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*2));
- op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*o->p3));
- set_native_arg(ctx,r);
- op64(ctx,MOV,r,pconst64(&p,(int_val)t));
- set_native_arg(ctx,r);
- call_native(ctx,hl_alloc_closure_ptr,size);
- store(ctx,dst,PEAX,true);
- }
- break;
- case OCallClosure:
- if( ra->t->kind == HDYN ) {
- // ASM for {
- // vdynamic *args[] = {args};
- // vdynamic *ret = hl_dyn_call(closure,args,nargs);
- // dst = hl_dyncast(ret,t_dynamic,t_dst);
- // }
- int offset = o->p3 * HL_WSIZE;
- preg *r = alloc_reg(ctx, RCPU_CALL);
- if( offset & 15 ) offset += 16 - (offset & 15);
- op64(ctx,SUB,PESP,pconst(&p,offset));
- op64(ctx,MOV,r,PESP);
- for(i=0;ip3;i++) {
- vreg *a = R(o->extra[i]);
- if( !hl_is_dynamic(a->t) ) ASSERT(0);
- preg *v = alloc_cpu(ctx,a,true);
- op64(ctx,MOV,pmem(&p,r->id,i * HL_WSIZE),v);
- RUNLOCK(v);
- }
-# ifdef HL_64
- int size = begin_native_call(ctx, 3) + offset;
- set_native_arg(ctx, pconst(&p,o->p3));
- set_native_arg(ctx, r);
- set_native_arg(ctx, fetch(ra));
-# else
- int size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(int) + offset);
- op64(ctx,PUSH,pconst(&p,o->p3),UNUSED);
- op64(ctx,PUSH,r,UNUSED);
- op64(ctx,PUSH,alloc_cpu(ctx,ra,true),UNUSED);
-# endif
- call_native(ctx,hl_dyn_call,size);
- if( dst->t->kind != HVOID ) {
- store(ctx,dst,PEAX,true);
- make_dyn_cast(ctx,dst,dst);
- }
- } else {
- int jhasvalue, jend, size;
- // ASM for if( c->hasValue ) c->fun(value,args) else c->fun(args)
- preg *r = alloc_cpu(ctx,ra,true);
- preg *tmp = alloc_reg(ctx, RCPU);
- op32(ctx,MOV,tmp,pmem(&p,r->id,HL_WSIZE*2));
- op32(ctx,TEST,tmp,tmp);
- scratch(tmp);
- XJump_small(JNotZero,jhasvalue);
- save_regs(ctx);
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
- preg *rr = r;
- if( rr->holds != ra ) rr = alloc_cpu(ctx, ra, true);
- op_call(ctx, pmem(&p,rr->id,HL_WSIZE), size);
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasvalue);
- restore_regs(ctx);
-# ifdef HL_64
- {
- int regids[64];
- preg *pc = REG_AT(CALL_REGS[0]);
- vreg *sc = R(f->nregs); // scratch register that we temporary rebind
- if( o->p3 >= 63 ) jit_error("assert");
- memcpy(regids + 1, o->extra, o->p3 * sizeof(int));
- regids[0] = f->nregs;
- sc->size = HL_WSIZE;
- sc->t = &hlt_dyn;
- op64(ctx, MOV, pc, pmem(&p,r->id,HL_WSIZE*3));
- scratch(pc);
- sc->current = pc;
- pc->holds = sc;
- size = prepare_call_args(ctx,o->p3 + 1,regids,ctx->vregs,0);
- if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
- }
-# else
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,HL_WSIZE);
- if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
- op64(ctx, PUSH,pmem(&p,r->id,HL_WSIZE*3),UNUSED); // push closure value
-# endif
- op_call(ctx, pmem(&p,r->id,HL_WSIZE), size);
- discard_regs(ctx,false);
- patch_jump(ctx,jend);
- store_result(ctx, dst);
- }
- break;
- case OStaticClosure:
- {
- vclosure *c = alloc_static_closure(ctx,o->p2);
- preg *r = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, r, pconst64(&p,(int_val)c));
- store(ctx,dst,r,true);
- }
- break;
- case OField:
- {
-# ifndef HL_64
- if( dst->t->kind == HI64 ) {
- error_i64();
- break;
- }
-# endif
- switch( ra->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- {
- hl_runtime_obj *rt = hl_get_obj_rt(ra->t);
- preg *rr = alloc_cpu(ctx,ra, true);
- if( dst->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t;
- if( ft->kind == HPACKED ) {
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p3]));
- store(ctx,dst,r,true);
- break;
- }
- }
- copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p3]));
- }
- break;
- case HVIRTUAL:
- // ASM for --> if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt)
- {
- int jhasfield, jend, size;
- bool need_type = !(IS_FLOAT(dst) || dst->t->kind == HI64);
- preg *v = alloc_cpu_call(ctx,ra);
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p3));
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jhasfield);
- size = begin_native_call(ctx, need_type ? 3 : 2);
- if( need_type ) set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
- set_native_arg(ctx,pconst64(&p,(int_val)ra->t->virt->fields[o->p3].hashed_name));
- set_native_arg(ctx,v);
- call_native(ctx,get_dynget(dst->t),size);
- store_result(ctx,dst);
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasfield);
- copy_to(ctx, dst, pmem(&p,(CpuReg)r->id,0));
- patch_jump(ctx,jend);
- scratch(dst->current);
- }
- break;
- default:
- ASSERT(ra->t->kind);
- break;
- }
- }
- break;
- case OSetField:
- {
- switch( dst->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- {
- hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
- preg *rr = alloc_cpu(ctx, dst, true);
- if( rb->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t;
- if( ft->kind == HPACKED ) {
- hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
- preg *prb = alloc_cpu(ctx, rb, true);
- preg *tmp = alloc_reg(ctx, RCPU_CALL);
- int offset = 0;
- while( offset < frt->size ) {
- int remain = frt->size - offset;
- int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
- copy(ctx, tmp, pmem(&p, (CpuReg)prb->id, offset), copy_size);
- copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]+offset), tmp, copy_size);
- offset += copy_size;
- }
- break;
- }
- }
- copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]), rb);
- }
- break;
- case HVIRTUAL:
- // ASM for --> if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v)
- {
- int jhasfield, jend;
- preg *obj = alloc_cpu_call(ctx,dst);
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,MOV,r,pmem(&p,obj->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jhasfield);
-# ifdef HL_64
- switch( rb->t->kind ) {
- case HF64:
- case HF32:
- size = begin_native_call(ctx,3);
- set_native_arg_fpu(ctx, fetch(rb), rb->t->kind == HF32);
- break;
- case HI64:
- case HGUID:
- size = begin_native_call(ctx,3);
- set_native_arg(ctx, fetch(rb));
- break;
- default:
- size = begin_native_call(ctx, 4);
- set_native_arg(ctx, fetch(rb));
- set_native_arg(ctx, pconst64(&p,(int_val)rb->t));
- break;
- }
- set_native_arg(ctx,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
- set_native_arg(ctx,obj);
-# else
- switch( rb->t->kind ) {
- case HF64:
- case HI64:
- case HGUID:
- size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(double));
- push_reg(ctx,rb);
- break;
- case HF32:
- size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(float));
- push_reg(ctx,rb);
- break;
- default:
- size = pad_before_call(ctx,HL_WSIZE*4);
- op64(ctx,PUSH,fetch32(ctx,rb),UNUSED);
- op64(ctx,MOV,r,pconst64(&p,(int_val)rb->t));
- op64(ctx,PUSH,r,UNUSED);
- break;
- }
- op32(ctx,MOV,r,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
- op64(ctx,PUSH,r,UNUSED);
- op64(ctx,PUSH,obj,UNUSED);
-# endif
- call_native(ctx,get_dynset(rb->t),size);
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasfield);
- copy_from(ctx, pmem(&p,(CpuReg)r->id,0), rb);
- patch_jump(ctx,jend);
- scratch(rb->current);
- }
- break;
- default:
- ASSERT(dst->t->kind);
- break;
- }
- }
- break;
- case OGetThis:
- {
- vreg *r = R(0);
- hl_runtime_obj *rt = hl_get_obj_rt(r->t);
- preg *rr = alloc_cpu(ctx,r, true);
- if( dst->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t;
- if( ft->kind == HPACKED ) {
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p2]));
- store(ctx,dst,r,true);
- break;
- }
- }
- copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]));
- }
- break;
- case OSetThis:
- {
- vreg *r = R(0);
- hl_runtime_obj *rt = hl_get_obj_rt(r->t);
- preg *rr = alloc_cpu(ctx, r, true);
- if( ra->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t;
- if( ft->kind == HPACKED ) {
- hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
- preg *pra = alloc_cpu(ctx, ra, true);
- preg *tmp = alloc_reg(ctx, RCPU_CALL);
- int offset = 0;
- while( offset < frt->size ) {
- int remain = frt->size - offset;
- int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
- copy(ctx, tmp, pmem(&p, (CpuReg)pra->id, offset), copy_size);
- copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]+offset), tmp, copy_size);
- offset += copy_size;
- }
- break;
- }
- }
- copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]), ra);
- }
- break;
- case OCallThis:
- {
- int nargs = o->p3 + 1;
- int *args = (int*)hl_malloc(&ctx->falloc,sizeof(int) * nargs);
- int size;
- preg *r = alloc_cpu(ctx, R(0), true);
- preg *tmp;
- tmp = alloc_reg(ctx, RCPU_CALL);
- op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
- args[0] = 0;
- for(i=1;iextra[i-1];
- size = prepare_call_args(ctx,nargs,args,ctx->vregs,0);
- op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
- discard_regs(ctx, false);
- store_result(ctx, dst);
- }
- break;
- case OCallMethod:
- switch( R(o->extra[0])->t->kind ) {
- case HOBJ: {
- int size;
- preg *r = alloc_cpu(ctx, R(o->extra[0]), true);
- preg *tmp;
- tmp = alloc_reg(ctx, RCPU_CALL);
- op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
- op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
- discard_regs(ctx, false);
- store_result(ctx, dst);
- break;
- }
- case HVIRTUAL:
- // ASM for --> if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret)
- {
- int size;
- int paramsSize;
- int jhasfield, jend;
- bool need_dyn;
- bool obj_in_args = false;
- vreg *obj = R(o->extra[0]);
- preg *v = alloc_cpu_call(ctx,obj);
- preg *r = alloc_reg(ctx,RCPU_CALL);
- op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
- op64(ctx,TEST,r,r);
- save_regs(ctx);
-
- if( o->p3 < 6 ) {
- XJump_small(JNotZero,jhasfield);
- } else {
- XJump(JNotZero,jhasfield);
- }
-
- need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID;
- paramsSize = (o->p3 - 1) * HL_WSIZE;
- if( need_dyn ) paramsSize += sizeof(vdynamic);
- if( paramsSize & 15 ) paramsSize += 16 - (paramsSize&15);
- op64(ctx,SUB,PESP,pconst(&p,paramsSize));
- op64(ctx,MOV,r,PESP);
-
- for(i=0;ip3-1;i++) {
- vreg *a = R(o->extra[i+1]);
- if( hl_is_ptr(a->t) ) {
- op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),alloc_cpu(ctx,a,true));
- if( a->current != v ) {
- RUNLOCK(a->current);
- } else
- obj_in_args = true;
- } else {
- preg *r2 = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,r2,&a->stack);
- op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),r2);
- if( r2 != v ) RUNLOCK(r2);
- }
- }
-
- jit_buf(ctx);
-
- if( !need_dyn ) {
- size = begin_native_call(ctx, 5);
- set_native_arg(ctx, pconst(&p,0));
- } else {
- preg *rtmp = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,rtmp,pmem(&p,Esp,paramsSize - sizeof(vdynamic)));
- size = begin_native_call(ctx, 5);
- set_native_arg(ctx,rtmp);
- if( !IS_64 ) RUNLOCK(rtmp);
- }
- set_native_arg(ctx,r);
- set_native_arg(ctx,pconst(&p,obj->t->virt->fields[o->p2].hashed_name)); // fid
- set_native_arg(ctx,pconst64(&p,(int_val)obj->t->virt->fields[o->p2].t)); // ftype
- set_native_arg(ctx,pmem(&p,v->id,HL_WSIZE)); // o->value
- call_native(ctx,hl_dyn_call_obj,size + paramsSize);
- if( need_dyn ) {
- preg *r = IS_FLOAT(dst) ? REG_AT(XMM(0)) : PEAX;
- copy(ctx,r,pmem(&p,Esp,HDYN_VALUE - (int)sizeof(vdynamic)),dst->size);
- store(ctx, dst, r, false);
- } else
- store(ctx, dst, PEAX, false);
-
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasfield);
- restore_regs(ctx);
-
- if( !obj_in_args ) {
- // o = o->value hack
- if( v->holds ) v->holds->current = NULL;
- obj->current = v;
- v->holds = obj;
- op64(ctx,MOV,v,pmem(&p,v->id,HL_WSIZE));
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
- } else {
- // keep o->value in R(f->nregs)
- int regids[64];
- preg *pc = alloc_reg(ctx,RCPU_CALL);
- vreg *sc = R(f->nregs); // scratch register that we temporary rebind
- if( o->p3 >= 63 ) jit_error("assert");
- memcpy(regids, o->extra, o->p3 * sizeof(int));
- regids[0] = f->nregs;
- sc->size = HL_WSIZE;
- sc->t = &hlt_dyn;
- op64(ctx, MOV, pc, pmem(&p,v->id,HL_WSIZE));
- scratch(pc);
- sc->current = pc;
- pc->holds = sc;
- size = prepare_call_args(ctx,o->p3,regids,ctx->vregs,0);
- }
-
- op_call(ctx,r,size);
- discard_regs(ctx, false);
- store_result(ctx, dst);
- patch_jump(ctx,jend);
- }
- break;
- default:
- ASSERT(0);
- break;
- }
- break;
- case ORethrow:
- {
- int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
- call_native(ctx,hl_rethrow,size);
- }
- break;
- case OThrow:
- {
- int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
- call_native(ctx,hl_throw,size);
- }
- break;
- case OLabel:
- // NOP for now
- discard_regs(ctx,false);
- break;
- case OGetI8:
- case OGetI16:
- {
- preg *base = alloc_cpu(ctx, ra, true);
- preg *offset = alloc_cpu64(ctx, rb, true);
- preg *r = alloc_reg(ctx,o->op == OGetI8 ? RCPU_8BITS : RCPU);
- op64(ctx,XOR,r,r);
- op32(ctx, o->op == OGetI8 ? MOV8 : MOV16,r,pmem2(&p,base->id,offset->id,1,0));
- store(ctx, dst, r, true);
- }
- break;
- case OGetMem:
- {
- #ifndef HL_64
- if (dst->t->kind == HI64) {
- error_i64();
- }
- #endif
- preg *base = alloc_cpu(ctx, ra, true);
- preg *offset = alloc_cpu64(ctx, rb, true);
- store(ctx, dst, pmem2(&p,base->id,offset->id,1,0), false);
- }
- break;
- case OSetI8:
- {
- preg *base = alloc_cpu(ctx, dst, true);
- preg *offset = alloc_cpu64(ctx, ra, true);
- preg *value = alloc_cpu8(ctx, rb, true);
- op32(ctx,MOV8,pmem2(&p,base->id,offset->id,1,0),value);
- }
- break;
- case OSetI16:
- {
- preg *base = alloc_cpu(ctx, dst, true);
- preg *offset = alloc_cpu64(ctx, ra, true);
- preg *value = alloc_cpu(ctx, rb, true);
- op32(ctx,MOV16,pmem2(&p,base->id,offset->id,1,0),value);
- }
- break;
- case OSetMem:
- {
- preg *base = alloc_cpu(ctx, dst, true);
- preg *offset = alloc_cpu64(ctx, ra, true);
- preg *value;
- switch( rb->t->kind ) {
- case HI32:
- value = alloc_cpu(ctx, rb, true);
- op32(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- case HF32:
- value = alloc_fpu(ctx, rb, true);
- op32(ctx,MOVSS,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- case HF64:
- value = alloc_fpu(ctx, rb, true);
- op32(ctx,MOVSD,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- case HI64:
- case HGUID:
- value = alloc_cpu(ctx, rb, true);
- op64(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- default:
- ASSERT(rb->t->kind);
- break;
- }
- }
- break;
- case OType:
- {
- op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)(m->code->types + o->p2)));
- store(ctx,dst,dst->current,false);
- }
- break;
- case OGetType:
- {
- int jnext, jend;
- preg *r = alloc_cpu(ctx, ra, true);
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jnext);
- op64(ctx,MOV, tmp, pconst64(&p,(int_val)&hlt_void));
- XJump_small(JAlways,jend);
- patch_jump(ctx,jnext);
- op64(ctx, MOV, tmp, pmem(&p,r->id,0));
- patch_jump(ctx,jend);
- store(ctx,dst,tmp,true);
- }
- break;
- case OGetArray:
- {
- preg *rdst = IS_FLOAT(dst) ? alloc_fpu(ctx,dst,false) : alloc_cpu(ctx,dst,false);
- if( ra->t->kind == HABSTRACT ) {
- int osize;
- bool isRead = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT;
- if( isRead )
- osize = sizeof(void*);
- else {
- hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
- osize = rt->size;
- }
- preg *idx = alloc_cpu64(ctx, rb, true);
- op64(ctx, IMUL, idx, pconst(&p,osize));
- op64(ctx, isRead?MOV:LEA, rdst, pmem2(&p,alloc_cpu(ctx,ra, true)->id,idx->id,1,0));
- store(ctx,dst,dst->current,false);
- scratch(idx);
- } else {
- copy(ctx, rdst, pmem2(&p,alloc_cpu(ctx,ra,true)->id,alloc_cpu64(ctx,rb,true)->id,hl_type_size(dst->t),sizeof(varray)), dst->size);
- store(ctx,dst,dst->current,false);
- }
- }
- break;
- case OSetArray:
- {
- if( dst->t->kind == HABSTRACT ) {
- int osize;
- bool isWrite = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT;
- if( isWrite ) {
- osize = sizeof(void*);
- } else {
- hl_runtime_obj *rt = hl_get_obj_rt(rb->t);
- osize = rt->size;
- }
- preg *pdst = alloc_cpu(ctx,dst,true);
- preg *pra = alloc_cpu64(ctx,ra,true);
- op64(ctx, IMUL, pra, pconst(&p,osize));
- op64(ctx, ADD, pdst, pra);
- scratch(pra);
- preg *prb = alloc_cpu(ctx,rb,true);
- preg *tmp = alloc_reg(ctx, RCPU_CALL);
- int offset = 0;
- while( offset < osize ) {
- int remain = osize - offset;
- int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
- copy(ctx, tmp, pmem(&p, prb->id, offset), copy_size);
- copy(ctx, pmem(&p, pdst->id, offset), tmp, copy_size);
- offset += copy_size;
- }
- scratch(pdst);
- } else {
- preg *rrb = IS_FLOAT(rb) ? alloc_fpu(ctx,rb,true) : alloc_cpu(ctx,rb,true);
- copy(ctx, pmem2(&p,alloc_cpu(ctx,dst,true)->id,alloc_cpu64(ctx,ra,true)->id,hl_type_size(rb->t),sizeof(varray)), rrb, rb->size);
- }
- }
- break;
- case OArraySize:
- {
- op32(ctx,MOV,alloc_cpu(ctx,dst,false),pmem(&p,alloc_cpu(ctx,ra,true)->id,ra->t->kind == HABSTRACT ? HL_WSIZE + 4 : HL_WSIZE*2));
- store(ctx,dst,dst->current,false);
- }
- break;
- case ORef:
- {
- scratch(ra->current);
- op64(ctx,MOV,alloc_cpu(ctx,dst,false),REG_AT(Ebp));
- if( ra->stackPos < 0 )
- op64(ctx,SUB,dst->current,pconst(&p,-ra->stackPos));
- else
- op64(ctx,ADD,dst->current,pconst(&p,ra->stackPos));
- store(ctx,dst,dst->current,false);
- }
- break;
- case OUnref:
- copy_to(ctx,dst,pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
- break;
- case OSetref:
- copy_from(ctx,pmem(&p,alloc_cpu(ctx,dst,true)->id,0),ra);
- break;
- case ORefData:
- switch( ra->t->kind ) {
- case HARRAY:
- {
- preg *r = fetch(ra);
- preg *d = alloc_cpu(ctx,dst,false);
- op64(ctx,MOV,d,r);
- op64(ctx,ADD,d,pconst(&p,sizeof(varray)));
- store(ctx,dst,dst->current,false);
- }
- break;
- default:
- ASSERT(ra->t->kind);
- }
- break;
- case ORefOffset:
- {
- preg *d = alloc_cpu(ctx,rb,true);
- preg *r2 = alloc_cpu(ctx,dst,false);
- preg *r = fetch(ra);
- int size = hl_type_size(dst->t->tparam);
- op64(ctx,MOV,r2,r);
- switch( size ) {
- case 1:
- break;
- case 2:
- op64(ctx,SHL,d,pconst(&p,1));
- break;
- case 4:
- op64(ctx,SHL,d,pconst(&p,2));
- break;
- case 8:
- op64(ctx,SHL,d,pconst(&p,3));
- break;
- default:
- op64(ctx,IMUL,d,pconst(&p,size));
- break;
- }
- op64(ctx,ADD,r2,d);
- scratch(d);
- store(ctx,dst,dst->current,false);
- }
- break;
- case OToVirtual:
- {
-# ifdef HL_64
- int size = pad_before_call(ctx, 0);
- op64(ctx,MOV,REG_AT(CALL_REGS[1]),fetch(ra));
- op64(ctx,MOV,REG_AT(CALL_REGS[0]),pconst64(&p,(int_val)dst->t));
-# else
- int size = pad_before_call(ctx, HL_WSIZE*2);
- op32(ctx,PUSH,fetch(ra),UNUSED);
- op32(ctx,PUSH,pconst(&p,(int)(int_val)dst->t),UNUSED);
-# endif
- if( ra->t->kind == HOBJ ) hl_get_obj_rt(ra->t); // ensure it's initialized
- call_native(ctx,hl_to_virtual,size);
- store(ctx,dst,PEAX,true);
- }
- break;
- case OMakeEnum:
- {
- hl_enum_construct *c = &dst->t->tenum->constructs[o->p2];
- int_val args[] = { (int_val)dst->t, o->p2 };
- int i;
- call_native_consts(ctx, hl_alloc_enum, args, 2);
- RLOCK(PEAX);
- for(i=0;inparams;i++) {
- preg *r = fetch(R(o->extra[i]));
- copy(ctx, pmem(&p,Eax,c->offsets[i]),r, R(o->extra[i])->size);
- RUNLOCK(fetch(R(o->extra[i])));
- if ((i & 15) == 0) jit_buf(ctx);
- }
- store(ctx, dst, PEAX, true);
- }
- break;
- case OEnumAlloc:
- {
- int_val args[] = { (int_val)dst->t, o->p2 };
- call_native_consts(ctx, hl_alloc_enum, args, 2);
- store(ctx, dst, PEAX, true);
- }
- break;
- case OEnumField:
- {
- hl_enum_construct *c = &ra->t->tenum->constructs[o->p3];
- preg *r = alloc_cpu(ctx,ra,true);
- copy_to(ctx,dst,pmem(&p,r->id,c->offsets[(int)(int_val)o->extra]));
- }
- break;
- case OSetEnumField:
- {
- hl_enum_construct *c = &dst->t->tenum->constructs[0];
- preg *r = alloc_cpu(ctx,dst,true);
- switch( rb->t->kind ) {
- case HF64:
- {
- preg *d = alloc_fpu(ctx,rb,true);
- copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),d,8);
- break;
- }
- default:
- copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),alloc_cpu(ctx,rb,true),hl_type_size(c->params[o->p2]));
- break;
- }
- }
- break;
- case ONullCheck:
- {
- int jz;
- preg *r = alloc_cpu(ctx,dst,true);
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jz);
-
- hl_opcode *next = f->ops + opCount + 1;
- bool null_field_access = false;
- int hashed_name = 0;
- // skip const and operation between nullcheck and access
- while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) {
- next++;
- }
- if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) {
- int fid = next->op == OField ? next->p3 : next->p2;
- hl_obj_field *f = NULL;
- if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT )
- f = hl_obj_field_fetch(dst->t, fid);
- else if( dst->t->kind == HVIRTUAL )
- f = dst->t->virt->fields + fid;
- if( f == NULL ) ASSERT(dst->t->kind);
- null_field_access = true;
- hashed_name = f->hashed_name;
- } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) {
- int fid = next->p2 < 0 ? -1 : ctx->m->functions_indexes[next->p2];
- hl_function *cf = ctx->m->code->functions + fid;
- const uchar *name = fun_field_name(cf);
- null_field_access = true;
- hashed_name = hl_hash_gen(name, true);
- }
-
- if( null_field_access ) {
- pad_before_call(ctx, HL_WSIZE);
- if( hashed_name >= 0 && hashed_name < 256 )
- op64(ctx,PUSH8,pconst(&p,hashed_name),UNUSED);
- else
- op32(ctx,PUSH,pconst(&p,hashed_name),UNUSED);
- } else {
- pad_before_call(ctx, 0);
- }
-
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- j->pos = BUF_POS();
- j->target = null_field_access ? -3 : -1;
- j->next = ctx->calls;
- ctx->calls = j;
-
- op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
- op_call(ctx,PEAX,-1);
- patch_jump(ctx,jz);
- }
- break;
- case OSafeCast:
- make_dyn_cast(ctx, dst, ra);
- break;
- case ODynGet:
- {
- int size;
-# ifdef HL_64
- if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
- size = begin_native_call(ctx,2);
- } else {
- size = begin_native_call(ctx,3);
- set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
- }
- set_native_arg(ctx,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
- set_native_arg(ctx,fetch(ra));
-# else
- preg *r;
- r = alloc_reg(ctx,RCPU);
- if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
- size = pad_before_call(ctx,HL_WSIZE*2);
- } else {
- size = pad_before_call(ctx,HL_WSIZE*3);
- op64(ctx,MOV,r,pconst64(&p,(int_val)dst->t));
- op64(ctx,PUSH,r,UNUSED);
- }
- op64(ctx,MOV,r,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
- op64(ctx,PUSH,r,UNUSED);
- op64(ctx,PUSH,fetch(ra),UNUSED);
-# endif
- call_native(ctx,get_dynget(dst->t),size);
- store_result(ctx,dst);
- }
- break;
- case ODynSet:
- {
- int size;
-# ifdef HL_64
- switch( rb->t->kind ) {
- case HF32:
- case HF64:
- size = begin_native_call(ctx, 3);
- set_native_arg_fpu(ctx,fetch(rb),rb->t->kind == HF32);
- set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
- set_native_arg(ctx,fetch(dst));
- call_native(ctx,get_dynset(rb->t),size);
- break;
- case HI64:
- case HGUID:
- size = begin_native_call(ctx, 3);
- set_native_arg(ctx,fetch(rb));
- set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
- set_native_arg(ctx,fetch(dst));
- call_native(ctx,get_dynset(rb->t),size);
- break;
- default:
- size = begin_native_call(ctx,4);
- set_native_arg(ctx,fetch(rb));
- set_native_arg(ctx,pconst64(&p,(int_val)rb->t));
- set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
- set_native_arg(ctx,fetch(dst));
- call_native(ctx,get_dynset(rb->t),size);
- break;
- }
-# else
- switch( rb->t->kind ) {
- case HF32:
- size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(float));
- push_reg(ctx,rb);
- op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
- op32(ctx,PUSH,fetch(dst),UNUSED);
- call_native(ctx,get_dynset(rb->t),size);
- break;
- case HF64:
- case HI64:
- case HGUID:
- size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(double));
- push_reg(ctx,rb);
- op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
- op32(ctx,PUSH,fetch(dst),UNUSED);
- call_native(ctx,get_dynset(rb->t),size);
- break;
- default:
- size = pad_before_call(ctx, HL_WSIZE*4);
- op32(ctx,PUSH,fetch32(ctx,rb),UNUSED);
- op32(ctx,PUSH,pconst64(&p,(int_val)rb->t),UNUSED);
- op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
- op32(ctx,PUSH,fetch(dst),UNUSED);
- call_native(ctx,get_dynset(rb->t),size);
- break;
- }
-# endif
- }
- break;
- case OTrap:
- {
- int size, jenter, jtrap;
- int offset = 0;
- int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
- hl_trap_ctx *t = NULL;
-# ifndef HL_THREADS
- if( tinf == NULL ) tinf = hl_get_thread(); // single thread
-# endif
-
-# ifdef HL_64
- preg *trap = REG_AT(CALL_REGS[0]);
-# else
- preg *trap = PEAX;
-# endif
- RLOCK(trap);
-
- preg *treg = alloc_reg(ctx, RCPU);
- if( !tinf ) {
- call_native(ctx, hl_get_thread, 0);
- op64(ctx,MOV,treg,PEAX);
- offset = (int)(int_val)&tinf->trap_current;
- } else {
- offset = 0;
- op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current));
- }
- op64(ctx,MOV,trap,pmem(&p,treg->id,offset));
- op64(ctx,SUB,PESP,pconst(&p,trap_size));
- op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap);
- op64(ctx,MOV,trap,PESP);
- op64(ctx,MOV,pmem(&p,treg->id,offset),trap);
-
- /*
- trap E,@catch
- catch g
- catch g2
- ...
- @:catch
-
- // Before haxe 5
- This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following
- sequence of HL opcodes:
-
- trap E,@catch
- ...
- @catch:
- global R, _
- call _, ???(R,E)
-
- ??? is expected to be hl.BaseType.check
- */
- hl_opcode *cat = f->ops + opCount + 1;
- hl_opcode *next = f->ops + opCount + 1 + o->p2;
- hl_opcode *next2 = f->ops + opCount + 2 + o->p2;
- if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->stack.id == (int)(int_val)next2->extra) ) {
- int gindex = cat->op == OCatch ? cat->p1 : next->p2;
- hl_type *gt = m->code->globals[gindex];
- while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super;
- if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) {
- void *addr = m->globals_data + m->globals_indexes[gindex];
-# ifdef HL_64
- op64(ctx,MOV,treg,pconst64(&p,(int_val)addr));
- op64(ctx,MOV,treg,pmem(&p,treg->id,0));
-# else
- op64(ctx,MOV,treg,paddr(&p,addr));
-# endif
- } else
- op64(ctx,MOV,treg,pconst(&p,0));
- } else {
- op64(ctx,MOV,treg,pconst(&p,0));
- }
- op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->tcheck),treg);
-
- // On Win64 setjmp actually takes two arguments
- // the jump buffer and the frame pointer (or the stack pointer if there is no FP)
-#if defined(HL_WIN) && defined(HL_64)
- size = begin_native_call(ctx, 2);
- set_native_arg(ctx, REG_AT(Ebp));
-#else
- size = begin_native_call(ctx, 1);
-#endif
- set_native_arg(ctx,trap);
-#ifdef HL_MINGW
- call_native(ctx,_setjmp,size);
-#else
- call_native(ctx,setjmp,size);
-#endif
- op64(ctx,TEST,PEAX,PEAX);
- XJump_small(JZero,jenter);
- op64(ctx,ADD,PESP,pconst(&p,trap_size));
- if( !tinf ) {
- call_native(ctx, hl_get_thread, 0);
- op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value));
- } else {
- op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->exc_value));
- op64(ctx,MOV,PEAX,pmem(&p, Eax, 0));
- }
- store(ctx,dst,PEAX,false);
-
- jtrap = do_jump(ctx,OJAlways,false);
- register_jump(ctx,jtrap,(opCount + 1) + o->p2);
- patch_jump(ctx,jenter);
- }
- break;
- case OEndTrap:
- {
- int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
- hl_trap_ctx *tmp = NULL;
- preg *addr,*r;
- int offset;
- if (!tinf) {
- call_native(ctx, hl_get_thread, 0);
- addr = PEAX;
- RLOCK(addr);
- offset = (int)(int_val)&tinf->trap_current;
- } else {
- offset = 0;
- addr = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current));
- }
- r = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, r, pmem(&p,addr->id,offset));
- op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev));
- op64(ctx, MOV, pmem(&p,addr->id, offset), r);
-# ifdef HL_WIN
- // erase eip (prevent false positive)
- {
- _JUMP_BUFFER *b = NULL;
-# ifdef HL_64
- op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&(b->Rip)),PEAX);
-# else
- op64(ctx,MOV,pmem(&p,Esp,(int)&(b->Eip)),PEAX);
-# endif
- }
-# endif
- op64(ctx,ADD,PESP,pconst(&p,trap_size));
- }
- break;
- case OEnumIndex:
- {
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,MOV,r,pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE));
- store(ctx,dst,r,true);
- break;
- }
- break;
- case OSwitch:
- {
- int jdefault;
- int i;
- preg *r = alloc_cpu(ctx, dst, true);
- preg *r2 = alloc_reg(ctx, RCPU);
- op32(ctx, CMP, r, pconst(&p,o->p2));
- XJump(JUGte,jdefault);
- // r2 = r * 5 + eip
-# ifdef HL_64
- op64(ctx, XOR, r2, r2);
-# endif
- op32(ctx, MOV, r2, r);
- op32(ctx, SHL, r2, pconst(&p,2));
- op32(ctx, ADD, r2, r);
-# ifdef HL_64
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, tmp, pconst64(&p,RESERVE_ADDRESS));
-# else
- op64(ctx, ADD, r2, pconst64(&p,RESERVE_ADDRESS));
-# endif
- {
- jlist *s = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist));
- s->pos = BUF_POS() - sizeof(void*);
- s->next = ctx->switchs;
- ctx->switchs = s;
- }
-# ifdef HL_64
- op64(ctx, ADD, r2, tmp);
-# endif
- op64(ctx, JMP, r2, UNUSED);
- for(i=0;ip2;i++) {
- int j = do_jump(ctx,OJAlways,false);
- register_jump(ctx,j,(opCount + 1) + o->extra[i]);
- if( (i & 15) == 0 ) jit_buf(ctx);
- }
- patch_jump(ctx, jdefault);
- }
- break;
- case OGetTID:
- op32(ctx, MOV, alloc_cpu(ctx,dst,false), pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
- store(ctx,dst,dst->current,false);
- break;
- case OAssert:
- {
- pad_before_call(ctx, 0);
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- j->pos = BUF_POS();
- j->target = -2;
- j->next = ctx->calls;
- ctx->calls = j;
-
- op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
- op_call(ctx,PEAX,-1);
- }
- break;
- case ONop:
- break;
- case OPrefetch:
- {
- preg *r = alloc_cpu(ctx, dst, true);
- if( o->p2 > 0 ) {
- switch( dst->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- {
- hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
- preg *r2 = alloc_reg(ctx, RCPU);
- op64(ctx, LEA, r2, pmem(&p, r->id, rt->fields_indexes[o->p2-1]));
- r = r2;
- }
- break;
- default:
- ASSERT(dst->t->kind);
- break;
- }
- }
- switch( o->p3 ) {
- case 0:
- op64(ctx, PREFETCHT0, pmem(&p,r->id,0), UNUSED);
- break;
- case 1:
- op64(ctx, PREFETCHT1, pmem(&p,r->id,0), UNUSED);
- break;
- case 2:
- op64(ctx, PREFETCHT2, pmem(&p,r->id,0), UNUSED);
- break;
- case 3:
- op64(ctx, PREFETCHNTA, pmem(&p,r->id,0), UNUSED);
- break;
- case 4:
- op64(ctx, PREFETCHW, pmem(&p,r->id,0), UNUSED);
- break;
- default:
- ASSERT(o->p3);
- break;
- }
- }
- break;
- case OAsm:
- {
- switch( o->p1 ) {
- case 0: // byte output
- B(o->p2);
- break;
- case 1: // scratch cpu reg
- scratch(REG_AT(o->p2));
- break;
- case 2: // read vm reg
- rb--;
- copy(ctx, REG_AT(o->p2), &rb->stack, rb->size);
- scratch(REG_AT(o->p2));
- break;
- case 3: // write vm reg
- rb--;
- copy(ctx, &rb->stack, REG_AT(o->p2), rb->size);
- scratch(rb->current);
- break;
- case 4:
- if( ctx->totalRegsSize != 0 )
- hl_fatal("Asm naked function should not have local variables");
- if( opCount != 0 )
- hl_fatal("Asm naked function should be on first opcode");
- ctx->buf.b -= BUF_POS() - ctx->functionPos; // reset to our function start
- break;
- default:
- ASSERT(o->p1);
- break;
- }
- }
- break;
- case OCatch:
- // Only used by OTrap typing
- break;
- default:
- jit_error(hl_op_name(o->op));
- break;
- }
- // we are landing at this position, assume we have lost our registers
- if( ctx->opsPos[opCount+1] == -1 )
- discard_regs(ctx,true);
- ctx->opsPos[opCount+1] = BUF_POS();
-
- // write debug infos
- size = BUF_POS() - codePos;
- if( debug16 && size > 0xFF00 ) {
- debug32 = malloc(sizeof(int) * (f->nops + 1));
- for(i=0;icurrentPos;i++)
- debug32[i] = debug16[i];
- free(debug16);
- debug16 = NULL;
- }
- if( debug16 ) debug16[ctx->currentPos] = (unsigned short)size; else if( debug32 ) debug32[ctx->currentPos] = size;
-
- }
- // patch jumps
- {
- jlist *j = ctx->jumps;
- while( j ) {
- *(int*)(ctx->startBuf + j->pos) = ctx->opsPos[j->target] - (j->pos + 4);
- j = j->next;
- }
- ctx->jumps = NULL;
- }
- int codeEndPos = BUF_POS();
- // add nops padding
- jit_nops(ctx);
- // clear regs
- for(i=0;iholds = NULL;
- r->lock = 0;
- }
- // save debug infos
- if( ctx->debug ) {
- int fid = (int)(f - m->code->functions);
- ctx->debug[fid].start = codePos;
- ctx->debug[fid].offsets = debug32 ? (void*)debug32 : (void*)debug16;
- ctx->debug[fid].large = debug32 != NULL;
- }
- // unwind info
-#ifdef WIN64_UNWIND_TABLES
- int uw_idx = ctx->nunwind++;
- ctx->unwind_table[uw_idx].BeginAddress = codePos;
- ctx->unwind_table[uw_idx].EndAddress = codeEndPos;
- ctx->unwind_table[uw_idx].UnwindData = ctx->unwind_offset;
-#endif
- // reset tmp allocator
- hl_free(&ctx->falloc);
- return codePos;
-}
-
-static void *get_wrapper( hl_type *t ) {
- return call_jit_hl2c;
-}
-
-void hl_jit_patch_method( void *old_fun, void **new_fun_table ) {
- // mov eax, addr
- // jmp [eax]
- unsigned char *b = (unsigned char*)old_fun;
- unsigned long long addr = (unsigned long long)(int_val)new_fun_table;
-# ifdef HL_64
- *b++ = 0x48;
- *b++ = 0xB8;
- *b++ = (unsigned char)addr;
- *b++ = (unsigned char)(addr>>8);
- *b++ = (unsigned char)(addr>>16);
- *b++ = (unsigned char)(addr>>24);
- *b++ = (unsigned char)(addr>>32);
- *b++ = (unsigned char)(addr>>40);
- *b++ = (unsigned char)(addr>>48);
- *b++ = (unsigned char)(addr>>56);
-# else
- *b++ = 0xB8;
- *b++ = (unsigned char)addr;
- *b++ = (unsigned char)(addr>>8);
- *b++ = (unsigned char)(addr>>16);
- *b++ = (unsigned char)(addr>>24);
-# endif
- *b++ = 0xFF;
- *b++ = 0x20;
-}
-
-static void missing_closure() {
- hl_error("Missing static closure");
-}
-
-void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) {
- jlist *c;
- int size = BUF_POS();
- unsigned char *code;
- if( size & 4095 ) size += 4096 - (size&4095);
- code = (unsigned char*)hl_alloc_executable_memory(size);
- if( code == NULL ) return NULL;
- memcpy(code,ctx->startBuf,BUF_POS());
- *codesize = size;
- *debug = ctx->debug;
- if( !call_jit_c2hl ) {
- call_jit_c2hl = code + ctx->c2hl;
- call_jit_hl2c = code + ctx->hl2c;
- hl_setup.get_wrapper = get_wrapper;
- hl_setup.static_call = callback_c2hl;
- hl_setup.static_call_ref = true;
-# ifdef JIT_CUSTOM_LONGJUMP
- hl_setup.throw_jump = (void(*)(jmp_buf, int))(code + ctx->longjump);
-# endif
- }
-#ifdef WIN64_UNWIND_TABLES
- m->unwind_table = ctx->unwind_table;
- RtlAddFunctionTable(m->unwind_table, ctx->nunwind, (DWORD64)code);
-#endif
- if( !ctx->static_function_offset ) {
- int i;
- ctx->static_function_offset = true;
- for(i=0;i<(int)(sizeof(ctx->static_functions)/sizeof(void*));i++)
- ctx->static_functions[i] = (void*)(code + (int)(int_val)ctx->static_functions[i]);
- }
- // patch calls
- c = ctx->calls;
- while( c ) {
- void *fabs;
- if( c->target < 0 )
- fabs = ctx->static_functions[-c->target-1];
- else {
- fabs = m->functions_ptrs[c->target];
- if( fabs == NULL ) {
- // read absolute address from previous module
- int old_idx = m->hash->functions_hashes[m->functions_indexes[c->target]];
- if( old_idx < 0 )
- return NULL;
- fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
- } else {
- // relative
- fabs = (unsigned char*)code + (int)(int_val)fabs;
- }
- }
- if( (code[c->pos]&~3) == (IS_64?0x48:0xB8) || code[c->pos] == 0x68 ) // MOV : absolute | PUSH
- *(void**)(code + c->pos + (IS_64?2:1)) = fabs;
- else {
- int_val delta = (int_val)fabs - (int_val)code - (c->pos + 5);
- int rpos = (int)delta;
- if( (int_val)rpos != delta ) {
- printf("Target code too far too rebase\n");
- return NULL;
- }
- *(int*)(code + c->pos + 1) = rpos;
- }
- c = c->next;
- }
- // patch switchs
- c = ctx->switchs;
- while( c ) {
- *(void**)(code + c->pos) = code + c->pos + (IS_64 ? 14 : 6);
- c = c->next;
- }
- // patch closures
- {
- vclosure *c = ctx->closure_list;
- while( c ) {
- vclosure *next;
- int fidx = (int)(int_val)c->fun;
- void *fabs = m->functions_ptrs[fidx];
- if( fabs == NULL ) {
- // read absolute address from previous module
- int old_idx = m->hash->functions_hashes[m->functions_indexes[fidx]];
- if( old_idx < 0 )
- fabs = missing_closure;
- else
- fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
- } else {
- // relative
- fabs = (unsigned char*)code + (int)(int_val)fabs;
- }
- c->fun = fabs;
- next = (vclosure*)c->value;
- c->value = NULL;
- c = next;
- }
- }
- return code;
-}
-
diff --git a/src/jit.h b/src/jit.h
new file mode 100644
index 000000000..69c609547
--- /dev/null
+++ b/src/jit.h
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C)2005-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef JIT_H
+#define JIT_H
+
+#include
+#include
+
+typedef enum {
+ LOAD_ADDR,
+ LOAD_CONST,
+ LOAD_ARG,
+ LOAD_FUN,
+ STORE,
+ LEA,
+ TEST,
+ CMP,
+ JCOND,
+ JUMP,
+ JUMP_TABLE,
+ BINOP,
+ UNOP,
+ CONV,
+ CONV_UNSIGNED,
+ RET,
+ CALL_PTR,
+ CALL_REG,
+ CALL_FUN,
+ MOV,
+ CMOV,
+ XCHG,
+ CXCHG,
+ PUSH_CONST,
+ PUSH,
+ POP,
+ ALLOC_STACK,
+ PREFETCH,
+ DEBUG_BREAK,
+ BLOCK,
+ ENTER,
+ STACK_OFFS,
+ CATCH,
+ ADDRESS,
+ NOP,
+} emit_op;
+
+typedef enum {
+ M_NONE,
+ M_UI8,
+ M_UI16,
+ M_I32,
+ M_PTR,
+ M_F64,
+ M_F32,
+ M_VOID,
+ M_NORET,
+} emit_mode;
+
+typedef int ereg;
+
+typedef struct {
+ union {
+ struct {
+ unsigned char op;
+ unsigned char mode;
+ unsigned char nargs;
+ unsigned char _unused;
+ };
+ int header;
+ };
+ int size_offs;
+ union {
+ struct {
+ ereg a;
+ ereg b;
+ };
+ uint64 value;
+ };
+} einstr;
+
+typedef enum {
+ R_VALUE = 0,
+ R_REG = 0x40000000,
+ R_REG_PTR = 0x50000000,
+ R_CONST = 0x60000000,
+ R_PHI = 0x70000000,
+} rkind;
+
+// reg representation is :
+// higher bits
+// 0000 = positive value (for IR only VXXX)
+// X100 = native register, lower 7 bits is the register, bits 8-28 are the offset (21 bits)
+// X101 = same as above, but indirect address
+// X110 = small constant value stored in offset
+// 1111 = negative value (for IR phi PXXX)
+// 10XX = unused
+
+#define STACK_REG 5
+
+#define UNUSED ((ereg)0)
+#define MK_REG(v,kind) (((v)&0x7F) | (kind))
+#define MK_REG_VAL(v,kind,val) (MK_REG(v,kind) | (((val) << 7)&0x8FFFFF80))
+
+#define REG_KIND(r) ((r)&0x70000000)
+#define REG_REG(r) ((r)&0x7F)
+#define REG_VALUE(r) (((int)(((r) & 0x8000000) ? ((r) | 0xF0000000) : ((r)&0x0FFFFFFF)))>>7)
+#define REG_PTR(r) _reg_chk(r,R_REG,(r)|R_REG_PTR)
+#define REG_ADD_OFFSET(r,offs) _reg_chk(r,R_REG_PTR,MK_REG_VAL(r,REG_KIND(r),REG_VALUE(r)+(offs)))
+#define REG_IS_VAL(r) (REG_KIND(r) == R_VALUE || REG_KIND(r) == R_PHI)
+
+#define IS_NULL(r) ((r) == 0)
+#define IS_REG(r) (REG_KIND(r) == R_REG)
+#define MK_STACK_REG(v) MK_REG_VAL(STACK_REG,R_REG_PTR,v)
+#define MK_STACK_OFFS(v) MK_REG_VAL(STACK_REG,R_REG,v)
+#define MK_CONST(v) MK_REG_VAL(0,R_CONST,v)
+#define MK_ADDR(reg,offs) MK_REG_VAL(reg,R_REG_PTR,offs)
+
+#define IS_CALL(op) ((op) == CALL_PTR || (op) == CALL_REG || (op) == CALL_FUN)
+#define IS_FLOAT(mode) ((mode) == M_F64 || (mode) == M_F32)
+
+#define MAX_ARGS 16
+
+#if defined(HL_WIN_CALL) && defined(HL_64)
+# define IS_WINCALL64 1
+#else
+# define IS_WINCALL64 0
+#endif
+
+typedef struct {
+ int *data;
+ int max;
+ int cur;
+} int_alloc;
+
+typedef struct _ephi ephi;
+
+struct _ephi {
+ ereg value;
+ int nvalues;
+ emit_mode mode;
+ ereg *values;
+ int *blocks;
+};
+
+typedef struct _eblock {
+ int start_pos;
+ int end_pos;
+ int next_count;
+ int pred_count;
+ int phi_count;
+ int *nexts;
+ int *preds;
+ ephi *phis;
+} eblock;
+
+typedef struct _emit_ctx emit_ctx;
+typedef struct _regs_ctx regs_ctx;
+typedef struct _code_ctx code_ctx;
+typedef struct _jit_ctx jit_ctx;
+
+typedef struct {
+ int nscratchs;
+ int npersists;
+ int nargs;
+ ereg ret;
+ ereg *scratch;
+ ereg *persist;
+ ereg *arg;
+} reg_config;
+
+typedef struct {
+ reg_config regs;
+ reg_config floats;
+ ereg stack_reg;
+ ereg stack_pos;
+ int stack_align;
+ // Minimum bytes consumed by each stack argument. Defaults to HL_WSIZE
+ // when 0. Backends like AArch64 set this to 16 because each PUSH must
+ // move SP by 16 bytes to keep SP 16-byte aligned (any [SP, ...] access
+ // with a misaligned SP traps under EL0).
+ int stack_arg_size;
+ int debug_prefix_size;
+ ereg req_bit_shifts;
+ ereg req_div_a;
+ ereg req_div_b;
+} regs_config;
+
+typedef struct {
+ int c2hl;
+ int hl2c;
+} jit_special_funs;
+
+struct _jit_ctx {
+ hl_module *mod;
+ hl_function *fun;
+ hl_alloc falloc;
+ hl_alloc galloc;
+ emit_ctx *emit;
+ regs_ctx *regs;
+ code_ctx *code;
+ regs_config cfg;
+ // emit output
+ int instr_count;
+ int block_count;
+ int value_count;
+ int phi_count;
+ einstr *instrs;
+ eblock *blocks;
+ int *values_writes;
+ int *emit_pos_map;
+ // regs output
+ int reg_instr_count;
+ einstr *reg_instrs;
+ ereg *reg_writes;
+ int *reg_pos_map;
+ // codegen output
+ int code_size;
+ unsigned char *code_instrs;
+ int *code_pos_map;
+ jit_special_funs code_funs;
+ // accum output
+ int fdef_index;
+ int out_pos;
+ int out_max;
+ unsigned char *output;
+ unsigned char *final_code;
+};
+
+jit_ctx *hl_jit_alloc();
+void hl_jit_free( jit_ctx *ctx, h_bool can_reset );
+void hl_jit_reset( jit_ctx *ctx, hl_module *m );
+void hl_jit_init( jit_ctx *ctx, hl_module *m );
+int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f );
+void hl_jit_define_function( jit_ctx *ctx, int start, int size );
+
+void hl_jit_null_field_access( int fhash );
+void hl_jit_assert();
+void *hl_jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs );
+double hl_jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs );
+
+// emit & dump
+void hl_emit_dump( jit_ctx *ctx );
+const char *hl_emit_regstr( ereg v, emit_mode m );
+void hl_emit_store_args( emit_ctx *ctx, einstr *e, ereg *args, int count );
+void hl_emit_remap_jumps( emit_ctx *ctx, void *jumps, einstr *instrs, int *pos_map );
+ereg *hl_emit_get_args( emit_ctx *ctx, einstr *e );
+ereg **hl_emit_get_regs( einstr *e, int *count );
+void hl_emit_reg_iter( jit_ctx *jit, einstr *e, void *ctx, void (*iter_reg)( void *, ereg * ) );
+extern int hl_emit_mode_sizes[];
+extern bool hl_jit_dump_bin;
+#define val_str(v,m) hl_emit_regstr(v,m)
+
+#ifdef HL_DEBUG
+# define JIT_DEBUG
+#endif
+
+#define jit_error(msg) { hl_jit_error(msg,__func__,__LINE__); hl_debug_break(); exit(-1); }
+#define jit_assert() jit_error("")
+
+#if defined(JIT_DEBUG)
+# define jit_debug(...) printf(__VA_ARGS__)
+#else
+# define jit_debug(...)
+#endif
+
+#define DEF_ALLOC &ctx->jit->falloc
+
+#define jit_pad_size(size,k) ((k == 0) ? 0 : ((-(size)) & (k - 1)))
+
+static void __ignore( void *value ) {}
+
+void hl_jit_error( const char *msg, const char *func, int line );
+
+void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous );
+void hl_jit_patch_method( void *old_fun, void **new_fun_table );
+
+static ereg _reg_chk( ereg r, rkind k, ereg ret ) {
+ if( REG_KIND(r) != k ) jit_assert();
+ return ret;
+}
+
+
+#endif
diff --git a/src/jit_aarch64.c b/src/jit_aarch64.c
new file mode 100644
index 000000000..397f67104
--- /dev/null
+++ b/src/jit_aarch64.c
@@ -0,0 +1,1999 @@
+/*
+ * Copyright (C)2015-2026 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * AArch64 JIT backend for the HL2 IR JIT.
+ *
+ * Phase 2 + 3: function shell + simple ops + arithmetic + memory + conversions.
+ * Calls/trampolines and the constant pool are still phase 4.
+ */
+
+#if !defined(__aarch64__) && !defined(_M_ARM64)
+# error "This file is for AArch64 architecture only."
+#endif
+
+#include
+#include
+#include "jit_aarch64_emit.h"
+#include
+#include
+
+#ifdef HL_DEBUG
+# define GEN_DEBUG
+#endif
+
+// IR ereg encoding 5 is reserved (`STACK_REG` in jit.h) — the regs phase uses
+// it to label stack-bound vregs. ARM hardware register X5 happens to use the
+// same hardware encoding, which would create a fatal aliasing if we exposed
+// X5 through the regs configuration as encoding 5. Re-encode X5 as the
+// otherwise-unused IR slot 32; gpr_id maps it back to hardware X5 at emit
+// time. (FP regs encode in the 64..127 range and have no such conflict.)
+#define X5_LOGICAL 32
+
+#define R(id) MK_REG(id, R_REG)
+#define V(id) MK_REG((id) + 64, R_REG)
+
+// ============================================================================
+// Register class declaration (AAPCS64, Linux + Apple)
+// ============================================================================
+
+void hl_jit_init_regs( regs_config *cfg ) {
+ // Integer registers.
+ // X15/X16/X17 reserved as backend-private temporaries. X16/X17 are the
+ // linker IP0/IP1 (the Apple dynamic linker may clobber them at indirect
+ // branches). X15 (ARM_TMP3) is reserved as a third scratch for op
+ // handlers that need three independent temps at once — notably emit_store
+ // when both base and data are spilled (TMP1 holds base, TMP2 holds data,
+ // and emit_ld_st still needs a temp for the offset-register encoding).
+ // X18 reserved on Apple/Windows as platform register; conservatively skipped on Linux too.
+ // X29 = FP, X30 = LR, X31 = SP/XZR — special-purpose.
+ static int scratch_regs[] = {
+ R(X0), R(X1), R(X2), R(X3), R(X4), R(X5_LOGICAL), R(X6), R(X7),
+ R(X8), R(X9), R(X10), R(X11), R(X12), R(X13), R(X14)
+ };
+ static int persist_regs[] = {
+ R(X19), R(X20), R(X21), R(X22), R(X23),
+ R(X24), R(X25), R(X26), R(X27), R(X28)
+ };
+ static int arg_regs[] = {
+ R(X0), R(X1), R(X2), R(X3), R(X4), R(X5_LOGICAL), R(X6), R(X7)
+ };
+ cfg->regs.ret = scratch_regs[0];
+ cfg->regs.nscratchs = sizeof(scratch_regs) / sizeof(int);
+ cfg->regs.npersists = sizeof(persist_regs) / sizeof(int);
+ cfg->regs.nargs = sizeof(arg_regs) / sizeof(int);
+ cfg->regs.scratch = (ereg*)scratch_regs;
+ cfg->regs.persist = (ereg*)persist_regs;
+ cfg->regs.arg = (ereg*)arg_regs;
+
+ // Float registers (V0-V31; lower 64 bits of V8-V15 are callee-saved per AAPCS64).
+ static int float_scratch[] = {
+ V(0), V(1), V(2), V(3), V(4), V(5), V(6), V(7),
+ V(16), V(17), V(18), V(19), V(20), V(21), V(22), V(23),
+ V(24), V(25), V(26), V(27), V(28), V(29), V(30), V(31)
+ };
+ static int float_persist[] = {
+ V(8), V(9), V(10), V(11), V(12), V(13), V(14), V(15)
+ };
+ static int float_args[] = {
+ V(0), V(1), V(2), V(3), V(4), V(5), V(6), V(7)
+ };
+ cfg->floats.ret = float_scratch[0];
+ cfg->floats.nscratchs = sizeof(float_scratch) / sizeof(int);
+ cfg->floats.npersists = sizeof(float_persist) / sizeof(int);
+ cfg->floats.nargs = sizeof(float_args) / sizeof(int);
+ cfg->floats.scratch = (ereg*)float_scratch;
+ cfg->floats.persist = (ereg*)float_persist;
+ cfg->floats.arg = (ereg*)float_args;
+
+ // ARM has no register pinning constraints for shifts (LSLV/LSRV/ASRV accept
+ // any source) or division (SDIV/UDIV write any destination).
+ cfg->req_bit_shifts = 0;
+ cfg->req_div_a = 0;
+ cfg->req_div_b = 0;
+
+ cfg->stack_reg = R(SP_REG); // X31 (SP)
+ cfg->stack_pos = R(FP); // X29
+ cfg->stack_align = 16; // AAPCS64 mandates
+ // Each stack-passed arg consumes 16 bytes to keep SP 16-byte aligned —
+ // any [SP, ...] memory access with misaligned SP traps under EL0
+ // alignment enforcement on Linux/macOS. emit_push correspondingly moves
+ // SP by 16 per arg, so the IR's call-arg accounting matches.
+ cfg->stack_arg_size = 16;
+
+#ifdef GEN_DEBUG
+ cfg->debug_prefix_size = 4; // ARM instructions are fixed 4 bytes
+#endif
+}
+
+// ============================================================================
+// Disassembly helper
+// ============================================================================
+
+const char *hl_natreg_str( int reg, emit_mode m ) {
+ static char out[16];
+ int r = REG_REG(reg);
+ // Reverse the remappings used in gpr_id so debug output reflects the
+ // hardware register actually emitted.
+ int hw = (r == X5_LOGICAL) ? 5 : (r == STACK_REG) ? 29 : r;
+ switch( m ) {
+ case M_I32:
+ case M_UI16:
+ case M_UI8:
+ if( hw == 31 )
+ sprintf(out, "WZR");
+ else if( hw < 31 )
+ sprintf(out, "W%d", hw);
+ else
+ sprintf(out, "W%d???", hw);
+ break;
+ case M_F32:
+ hw = r - 64;
+ sprintf(out, "S%d%s", hw, hw >= 0 && hw < 32 ? "" : "???");
+ break;
+ case M_F64:
+ hw = r - 64;
+ sprintf(out, "D%d%s", hw, hw >= 0 && hw < 32 ? "" : "???");
+ break;
+ default:
+ if( hw == 31 )
+ sprintf(out, "SP");
+ else if( hw == 29 )
+ sprintf(out, "FP");
+ else if( hw == 30 )
+ sprintf(out, "LR");
+ else if( hw < 31 )
+ sprintf(out, "X%d", hw);
+ else
+ sprintf(out, "X%d???", hw);
+ break;
+ }
+ return out;
+}
+
+// ============================================================================
+// Backend lifecycle
+// ============================================================================
+
+void hl_codegen_alloc( jit_ctx *jit ) {
+ code_ctx *ctx = (code_ctx*)malloc(sizeof(code_ctx));
+ memset(ctx, 0, sizeof(code_ctx));
+ jit->code = ctx;
+ ctx->jit = jit;
+}
+
+void hl_codegen_free( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ if( ctx == NULL ) return;
+ free(ctx);
+}
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+#define ARM_TMP1 X16 // backend-private scratch (IP0)
+#define ARM_TMP2 X17 // backend-private scratch (IP1)
+#define ARM_TMP3 X15 // backend-private scratch (excluded from regalloc)
+
+// Map an IR ereg to a physical AArch64 GPR encoding (0..31).
+// IR encoding 5 (STACK_REG) → ARM FP (X29); the regs phase uses 5 as a
+// stack-bound-vreg marker, and after ENTER lowers `MOV stack_pos,
+// stack_reg` we keep that in X29.
+// IR encoding X5_LOGICAL (32) → ARM X5; the remap shifts X5 out of slot 5
+// so the IR's STACK_REG sentinel does not alias it.
+static Arm64Reg gpr_id( ereg r ) {
+ int v = REG_REG(r);
+ if( v == STACK_REG ) return FP;
+ if( v == X5_LOGICAL ) return X5;
+ return (Arm64Reg)v;
+}
+
+static Arm64FpReg fpr_id( ereg r ) {
+ return (Arm64FpReg)(REG_REG(r) - 64);
+}
+
+// LDR/STR `size` field: 0=8b, 1=16b, 2=32b, 3=64b.
+static int ls_size_for( emit_mode m ) {
+ switch( m ) {
+ case M_UI8: return 0;
+ case M_UI16: return 1;
+ case M_I32:
+ case M_F32: return 2;
+ case M_PTR:
+ case M_F64: return 3;
+ default: return 3;
+ }
+}
+
+static int sf_for( emit_mode m ) {
+ // 1 = 64-bit, 0 = 32-bit (sub-word loads/stores still use 64-bit reg encoding).
+ return (m == M_PTR || m == M_F64) ? 1 : 0;
+}
+
+static bool is_fp_mode( emit_mode m ) { return m == M_F32 || m == M_F64; }
+
+// ----------------------------------------------------------------------------
+// Stack pointer arithmetic with arbitrary signed delta.
+// `delta > 0` => SP += delta, `delta < 0` => SP -= |delta|.
+// Uses imm12 + optional LSL #12 when possible; falls back through ARM_TMP1.
+// ----------------------------------------------------------------------------
+static void emit_sp_offs( code_ctx *ctx, int delta ) {
+ if( delta == 0 ) return;
+ int op = (delta < 0) ? 1 : 0; // 0 = ADD, 1 = SUB
+ uint32_t mag = (uint32_t)(delta < 0 ? -delta : delta);
+ if( mag <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, op, 0, 0, (int)mag, SP_REG, SP_REG);
+ return;
+ }
+ if( (mag & 0xFFF) == 0 && (mag >> 12) <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, op, 0, 1, (int)(mag >> 12), SP_REG, SP_REG);
+ return;
+ }
+ // Try two-step imm: hi part (LSL #12) + lo part, both ≤ 0xFFF.
+ uint32_t mag_lo = mag & 0xFFF;
+ uint32_t mag_hi = mag >> 12;
+ if( mag_hi <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, op, 0, 1, (int)mag_hi, SP_REG, SP_REG);
+ if( mag_lo )
+ encode_add_sub_imm(ctx, 1, op, 0, 0, (int)mag_lo, SP_REG, SP_REG);
+ return;
+ }
+ // Fall back to register form. Must use ADD/SUB (extended register) — the
+ // shifted-register form interprets register 31 as XZR, not SP, so
+ // `SUB SP, SP, X16` would silently become `SUB XZR, XZR, X16` (a NOP).
+ // Extended-register form with option=UXTX(011), imm3=0 treats Rd/Rn=31
+ // as SP, which is what we want.
+ load_immediate(ctx, (int64_t)mag, ARM_TMP1, true);
+ encode_add_sub_ext(ctx, 1, op, 0, ARM_TMP1, /*option=UXTX*/3, /*imm3=*/0, SP_REG, SP_REG);
+}
+
+// ----------------------------------------------------------------------------
+// ADD/SUB-imm with optional 12-bit shift, returns true if `mag` fits.
+// Emits `op (ADD/SUB) Rd, Rn, #mag` using up to two instructions.
+// Caller picks 0=ADD or 1=SUB.
+// ----------------------------------------------------------------------------
+static bool emit_addsub_imm_2step( code_ctx *ctx, int op, Arm64Reg Rd, Arm64Reg Rn, uint32_t mag ) {
+ if( mag <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, op, 0, 0, (int)mag, Rn, Rd);
+ return true;
+ }
+ if( (mag >> 12) <= 0xFFF ) {
+ uint32_t hi = mag >> 12, lo = mag & 0xFFF;
+ encode_add_sub_imm(ctx, 1, op, 0, 1, (int)hi, Rn, Rd);
+ if( lo )
+ encode_add_sub_imm(ctx, 1, op, 0, 0, (int)lo, Rd, Rd);
+ return true;
+ }
+ return false;
+}
+
+// ----------------------------------------------------------------------------
+// Load/store with FP-relative or arbitrary base+offs.
+// Picks LDR/STR(unsigned imm scaled) when offset fits, else LDUR/STUR (signed,
+// unscaled, ±256), else falls back to a register-offset form.
+//
+// Register-form offset requires a scratch register that must NOT collide with
+// reg_t (for STR Xt,[base,Xt] would store the offset value at the offset
+// location). When base also lives in a backend temp (ARM_TMP1/TMP2), we may
+// run out of disjoint temps. In that case, fold the offset into base in place
+// using ADD/SUB-imm (preserving base across the load/store), which is valid
+// for magnitudes up to 0xFFFFFF.
+// ----------------------------------------------------------------------------
+static void emit_ld_st_ex( code_ctx *ctx, bool is_load, emit_mode mode, int reg_t, Arm64Reg base, int offs, Arm64Reg avoid ) {
+ int size = ls_size_for(mode);
+ int V = is_fp_mode(mode) ? 1 : 0;
+ int opc = is_load ? 1 : 0; // 0=STR, 1=LDR (for V=0 GPR; same for V=1 FP)
+ int scale = 1 << size;
+ if( offs >= 0 && (offs & (scale - 1)) == 0 && (offs / scale) < 0x1000 ) {
+ encode_ldr_str_imm(ctx, size, V, opc, offs / scale, base, (Arm64Reg)reg_t);
+ return;
+ }
+ if( offs >= -256 && offs < 256 ) {
+ encode_ldur_stur(ctx, size, V, opc, offs, base, (Arm64Reg)reg_t);
+ return;
+ }
+ // Pick an offset temp. Constraints:
+ // - For stores, off_tmp must not equal reg_t (else STR Xt,[base,Xt]
+ // writes the offset value instead of the data). Loads are immune
+ // since LDR reads the offset register before writing reg_t.
+ // - off_tmp must not equal base (the load/store needs base intact).
+ // - off_tmp must not equal `avoid` (a caller-supplied register the
+ // caller has parked a live value in — typically the OUTER base in
+ // emit_store/emit_load_addr while loading the data argument).
+ // For FP loads/stores, reg_t is a V-register, so V-vs-X never collides.
+ Arm64Reg off_tmp = ARM_TMP1;
+ if( V == 0 ) {
+ bool bad_t1 = (!is_load && reg_t == ARM_TMP1) || base == ARM_TMP1 || avoid == ARM_TMP1;
+ if( bad_t1 ) off_tmp = ARM_TMP2;
+ bool bad_t2 = (!is_load && reg_t == off_tmp) || base == off_tmp || avoid == off_tmp;
+ if( bad_t2 ) off_tmp = ARM_TMP3;
+ bool bad_t3 = (!is_load && reg_t == off_tmp) || base == off_tmp || avoid == off_tmp;
+ if( bad_t3 ) jit_error("aarch64 emit_ld_st: no free offset temp");
+ }
+ load_immediate(ctx, offs, off_tmp, true);
+ encode_ldr_str_reg(ctx, size, V, opc, off_tmp, /*option=*/3 /*LSL*/, /*S=*/0, base, (Arm64Reg)reg_t);
+}
+
+static void emit_ld_st( code_ctx *ctx, bool is_load, emit_mode mode, int reg_t, Arm64Reg base, int offs ) {
+ emit_ld_st_ex(ctx, is_load, mode, reg_t, base, offs, (Arm64Reg)-1 /*no avoid*/);
+}
+
+// MOV between two GPRs. Handles SP as source/dest (ARM disallows ORR with SP).
+static void emit_mov_gpr( code_ctx *ctx, Arm64Reg dst, Arm64Reg src, int sf ) {
+ if( dst == src ) return;
+ if( dst == SP_REG || src == SP_REG ) {
+ // ADD , , #0 (only form that accepts SP).
+ encode_add_sub_imm(ctx, sf, 0, 0, 0, 0, src, dst);
+ } else {
+ // ORR , XZR,
+ encode_logical_reg(ctx, sf, 0x01, 0, 0, src, 0, XZR, dst);
+ }
+}
+
+// MOV between two FP regs (preserves the lane size used by the mode).
+// Uses ORR.16B (same encoding regardless of S/D since it's a bitwise move).
+// FMOV is also an option; we use FMOV (scalar) for clarity.
+static void emit_mov_fpr( code_ctx *ctx, Arm64FpReg dst, Arm64FpReg src, emit_mode mode ) {
+ if( dst == src ) return;
+ int type = (mode == M_F64) ? 1 : 0; // 1=double, 0=single
+ // FMOV (register) opcode = 0
+ encode_fp_1src(ctx, /*M=*/0, /*S=*/0, type, /*opcode=*/0, src, dst);
+}
+
+// Generic MOV that mirrors x86's emit_mov: handles reg/reg, reg/mem, mem/reg.
+// imm-to-reg goes through emit_load_const.
+static void emit_load_const( code_ctx *ctx, ereg out, uint64_t value, emit_mode mode );
+
+// Phase 4 forward declarations (defined later in this file).
+static int reserve_const_segment( code_ctx *ctx, int size, int align );
+static int alloc_const( code_ctx *ctx, uint64_t value, int adrp_pos );
+static void emit_const_load( code_ctx *ctx, Arm64Reg dst, uint64_t value );
+static void emit_const_addr( code_ctx *ctx, Arm64Reg dst, uint64_t value );
+static void emit_pool_offset_addr( code_ctx *ctx, Arm64Reg dst, int const_offset );
+static Arm64FpReg materialize_fpr( code_ctx *ctx, ereg src, emit_mode mode, Arm64FpReg tmp );
+static Arm64Reg materialize_gpr( code_ctx *ctx, ereg src, emit_mode mode, Arm64Reg tmp );
+static Arm64Reg materialize_gpr_ex( code_ctx *ctx, ereg src, emit_mode mode, Arm64Reg tmp, Arm64Reg avoid );
+
+// LEA-like: out = base + offs. Used when an operand encodes an address as
+// (R_REG, value=offs) — e.g. MK_STACK_OFFS, or the LEA-rewritten ADDRESS op.
+static void emit_lea_imm( code_ctx *ctx, Arm64Reg out, Arm64Reg base, int offs ) {
+ if( offs == 0 ) {
+ emit_mov_gpr(ctx, out, base, 1);
+ } else if( offs > 0 && offs <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, offs, base, out);
+ } else if( offs < 0 && -offs <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, 1, 0, 0, -offs, base, out);
+ } else {
+ load_immediate(ctx, offs, ARM_TMP1, true);
+ encode_add_sub_reg(ctx, 1, 0, 0, 0, ARM_TMP1, 0, base, out);
+ }
+}
+
+static void emit_mov( code_ctx *ctx, ereg dst, ereg src, emit_mode mode ) {
+ int dst_kind = REG_KIND(dst);
+ int src_kind = REG_KIND(src);
+
+ if( dst_kind == R_REG && src_kind == R_REG ) {
+ // MK_STACK_OFFS / LEA-rewritten ADDRESS: src encodes (reg, offs).
+ // Treat as an address computation: dst = src_reg + offs.
+ if( !is_fp_mode(mode) && REG_VALUE(src) != 0 ) {
+ emit_lea_imm(ctx, gpr_id(dst), gpr_id(src), REG_VALUE(src));
+ return;
+ }
+ if( is_fp_mode(mode) )
+ emit_mov_fpr(ctx, fpr_id(dst), fpr_id(src), mode);
+ else
+ emit_mov_gpr(ctx, gpr_id(dst), gpr_id(src), sf_for(mode));
+ return;
+ }
+ if( dst_kind == R_REG && src_kind == R_REG_PTR ) {
+ // LOAD: dst <- [base + offs]
+ Arm64Reg base = gpr_id(src);
+ int offs = REG_VALUE(src);
+ int reg_t = is_fp_mode(mode) ? fpr_id(dst) : gpr_id(dst);
+ emit_ld_st(ctx, /*is_load=*/true, mode, reg_t, base, offs);
+ return;
+ }
+ if( dst_kind == R_REG_PTR && src_kind == R_REG ) {
+ // STORE: [base + offs] <- src
+ Arm64Reg base = gpr_id(dst);
+ int offs = REG_VALUE(dst);
+ int reg_t = is_fp_mode(mode) ? fpr_id(src) : gpr_id(src);
+ emit_ld_st(ctx, /*is_load=*/false, mode, reg_t, base, offs);
+ return;
+ }
+ if( dst_kind == R_REG && src_kind == R_CONST ) {
+ emit_load_const(ctx, dst, (uint64_t)REG_VALUE(src), mode);
+ return;
+ }
+ if( dst_kind == R_REG_PTR && src_kind == R_REG_PTR ) {
+ // memory-to-memory: load through a scratch register, then store.
+ // Use V31 for FP modes and ARM_TMP1 for integer/pointer modes — both
+ // are reserved as backend-private scratch.
+ Arm64Reg sb = gpr_id(src);
+ int so = REG_VALUE(src);
+ Arm64Reg db = gpr_id(dst);
+ int doff = REG_VALUE(dst);
+ if( is_fp_mode(mode) ) {
+ emit_ld_st(ctx, /*is_load=*/true, mode, (Arm64FpReg)31, sb, so);
+ emit_ld_st(ctx, /*is_load=*/false, mode, (Arm64FpReg)31, db, doff);
+ } else {
+ emit_ld_st(ctx, /*is_load=*/true, mode, ARM_TMP1, sb, so);
+ emit_ld_st(ctx, /*is_load=*/false, mode, ARM_TMP1, db, doff);
+ }
+ return;
+ }
+ jit_error("aarch64 emit_mov: unhandled operand kinds");
+}
+
+// ----------------------------------------------------------------------------
+// LOAD_CONST: integer immediate or floating constant.
+// Float constants need the literal pool (Phase 4). For Phase 2 only ints.
+// ----------------------------------------------------------------------------
+static void emit_load_const( code_ctx *ctx, ereg out, uint64_t value, emit_mode mode ) {
+ if( REG_KIND(out) != R_REG ) {
+ // emit-into-memory: load the bit pattern into ARM_TMP1 and store as the
+ // requested width. For floats we treat the FP constant's bit pattern as
+ // an integer — the resulting STR writes the same bytes a FP STR would.
+ emit_mode store_mode = is_fp_mode(mode) ? (mode == M_F32 ? M_I32 : M_PTR) : mode;
+ load_immediate(ctx, (int64_t)value, ARM_TMP1, sf_for(store_mode) == 1);
+ Arm64Reg base = gpr_id(out);
+ int offs = REG_VALUE(out);
+ emit_ld_st(ctx, /*is_load=*/false, store_mode, ARM_TMP1, base, offs);
+ return;
+ }
+ if( is_fp_mode(mode) ) {
+ // Float constants live in the literal pool: ADRP+LDR into the FP reg.
+ // jit_emit.c packs F32 constants into the low 32 bits of `value` with
+ // the upper 32 bits zeroed, so we must use the matching width-encoding
+ // (size=2 → LDR Sd, ...). Loading 8 bytes would pull the zero high
+ // half into D and yield a subnormal double when read as F64.
+ Arm64FpReg fp_dst = fpr_id(out);
+ int adrp_pos = byte_count(ctx->code);
+ int size = (mode == M_F32) ? 2 : 3;
+ encode_adrp(ctx, 0, 0, ARM_TMP1); // ADRP X16, page
+ // LDR Sd|Dd, [X16, #lo12] V=1, opc=01, imm12=placeholder
+ encode_ldr_str_imm(ctx, size, 1, 1, 0, ARM_TMP1, (Arm64Reg)fp_dst);
+ alloc_const(ctx, value, adrp_pos);
+ return;
+ }
+ load_immediate(ctx, (int64_t)value, gpr_id(out), sf_for(mode) == 1);
+}
+
+// ----------------------------------------------------------------------------
+// PUSH / POP. ARM has no explicit push/pop; we use STR/LDR with pre/post-index
+// on SP. To match the x86 stack-offset accounting (which assumes 16 bytes are
+// already consumed by RIP+RBP), PUSH X29 emits STP X29, X30, [SP, #-16]! so
+// LR is implicitly saved as part of FP-save. POP X29 mirrors with LDP.
+// All other PUSH/POPs use 16-byte SP movement (8 bytes wasted) to keep SP
+// 16-byte aligned per AAPCS64.
+// ----------------------------------------------------------------------------
+static void emit_push( code_ctx *ctx, ereg r, emit_mode mode ) {
+ if( is_fp_mode(mode) ) {
+ // SUB SP, SP, #16; STR Dn, [SP]. Materialize through V31 if r is not a register.
+ Arm64FpReg src = (REG_KIND(r) == R_REG) ? fpr_id(r) : materialize_fpr(ctx, r, mode, (Arm64FpReg)31);
+ emit_sp_offs(ctx, -16);
+ encode_ldr_str_imm(ctx, /*size=*/3, /*V=*/1, /*opc=*/0 /*STR*/, 0, SP_REG, (Arm64Reg)src);
+ return;
+ }
+ // materialize_gpr handles MK_STACK_OFFS by adding the offset; gpr_id alone
+ // would discard it (mapping STACK_REG->FP and ignoring REG_VALUE).
+ Arm64Reg src = materialize_gpr(ctx, r, mode, ARM_TMP1);
+ if( src == FP && REG_KIND(r) == R_REG && REG_VALUE(r) == 0 ) {
+ // True PUSH FP (prologue) — emit STP x29,x30,[sp,#-16]! to also save LR.
+ encode_ldp_stp(ctx, /*opc=*/2, /*V=*/0, /*mode=*/0x03, /*imm7=*/-2 & 0x7F, LR, SP_REG, FP);
+ return;
+ }
+ // SUB SP, SP, #16; STR Xn, [SP]
+ emit_sp_offs(ctx, -16);
+ encode_ldr_str_imm(ctx, /*size=*/3, /*V=*/0, /*opc=*/0, 0, SP_REG, src);
+}
+
+static void emit_pop( code_ctx *ctx, ereg r, emit_mode mode ) {
+ if( REG_KIND(r) != R_REG ) jit_error("aarch64 POP non-reg not implemented");
+ if( is_fp_mode(mode) ) {
+ encode_ldr_str_imm(ctx, /*size=*/3, /*V=*/1, /*opc=*/1 /*LDR*/, 0, SP_REG, (Arm64Reg)fpr_id(r));
+ emit_sp_offs(ctx, 16);
+ return;
+ }
+ Arm64Reg dst = gpr_id(r);
+ if( dst == FP ) {
+ // LDP X29, X30, [SP], #16 opc=10, V=0, mode=01 (post-index load), imm7=2
+ encode_ldp_stp(ctx, /*opc=*/2, /*V=*/0, /*mode=*/0x01, /*imm7=*/2, LR, SP_REG, FP);
+ return;
+ }
+ // LDR Xn, [SP]; ADD SP, SP, #16
+ encode_ldr_str_imm(ctx, /*size=*/3, /*V=*/0, /*opc=*/1, 0, SP_REG, dst);
+ emit_sp_offs(ctx, 16);
+}
+
+// ----------------------------------------------------------------------------
+// CMP / TEST. e->mode tells us int width / float; e->size_offs holds the
+// upstream OJxxx opcode (consumed later by the JCOND/CMOV that follows).
+// ----------------------------------------------------------------------------
+static void emit_cmp( code_ctx *ctx, einstr *e ) {
+ if( is_fp_mode(e->mode) ) {
+ // FCMP. NaN handling deferred; bare FCMP is correct for ordered compares
+ // and gives QNaN-as-unordered which matches ARM defaults.
+ Arm64FpReg ra = materialize_fpr(ctx, e->a, e->mode, (Arm64FpReg)29);
+ Arm64FpReg rb = materialize_fpr(ctx, e->b, e->mode, (Arm64FpReg)30);
+ int type = (e->mode == M_F64) ? 1 : 0;
+ encode_fp_compare(ctx, /*M=*/0, /*S=*/0, type, rb, /*op=*/0, ra);
+ return;
+ }
+ // Integer compare: SUBS XZR, Xa, Xb (or imm form).
+ // materialize_gpr handles R_REG (incl. MK_STACK_OFFS via emit_lea_imm),
+ // R_CONST, and R_REG_PTR — picking gpr_id alone would drop the FP+N
+ // offset for stack-allocated addresses.
+ int sf = sf_for(e->mode);
+ Arm64Reg a = materialize_gpr(ctx, e->a, e->mode, ARM_TMP1);
+ if( REG_KIND(e->b) == R_CONST ) {
+ int64_t val = (int64_t)REG_VALUE(e->b);
+ if( val >= 0 && val <= 0xFFF ) {
+ // CMP Xa, #imm (SUBS XZR, Xa, #imm; sf, op=1, S=1)
+ encode_add_sub_imm(ctx, sf, 1, 1, 0, (int)val, a, XZR);
+ return;
+ }
+ if( val < 0 && -val <= 0xFFF ) {
+ // CMN Xa, #imm (ADDS XZR, Xa, #imm)
+ encode_add_sub_imm(ctx, sf, 0, 1, 0, (int)-val, a, XZR);
+ return;
+ }
+ load_immediate(ctx, val, ARM_TMP2, sf == 1);
+ encode_add_sub_reg(ctx, sf, 1, 1, 0, ARM_TMP2, 0, a, XZR);
+ return;
+ }
+ Arm64Reg b = materialize_gpr_ex(ctx, e->b, e->mode, ARM_TMP2, a);
+ encode_add_sub_reg(ctx, sf, 1, 1, 0, b, 0, a, XZR);
+}
+
+static void emit_test( code_ctx *ctx, einstr *e ) {
+ if( is_fp_mode(e->mode) ) jit_error("aarch64 TEST float not supported");
+ int sf = sf_for(e->mode);
+ // materialize_gpr folds MK_STACK_OFFS (R_REG kind + non-zero REG_VALUE)
+ // into FP+N so we never TST raw FP for stack-allocated address operands.
+ Arm64Reg a = materialize_gpr(ctx, e->a, e->mode, ARM_TMP1);
+ // TST Xa, Xa (ANDS XZR, Xa, Xa); opc=11 (ANDS), shift=0, N=0
+ encode_logical_reg(ctx, sf, 0x03, 0, 0, a, 0, a, XZR);
+}
+
+// ----------------------------------------------------------------------------
+// JCOND / JUMP — branch fixups patched after function emit.
+// ----------------------------------------------------------------------------
+static void add_branch_fixup( code_ctx *ctx, int code_pos, int target_op, int is_cond ) {
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->branch_fixups, code_pos);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->branch_fixups, target_op);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->branch_fixups, is_cond);
+}
+
+static void emit_jump( code_ctx *ctx, int target_op_offset ) {
+ // target_op_offset is the IR-relative displacement, target = cur_op + 1 + offset
+ int target = ctx->cur_op + 1 + target_op_offset;
+ int pos = byte_count(ctx->code);
+ encode_branch_uncond(ctx, 0); // placeholder
+ add_branch_fixup(ctx, pos, target, 0);
+}
+
+static void emit_jump_cond( code_ctx *ctx, ArmCondition cond, int target_op_offset ) {
+ int target = ctx->cur_op + 1 + target_op_offset;
+ int pos = byte_count(ctx->code);
+ encode_branch_cond(ctx, 0, cond);
+ add_branch_fixup(ctx, pos, target, 1);
+}
+
+// Mirror x86 get_cond_jump: walk back through MOV/JCOND/CMOV/XCHG/CXCHG to find
+// the comparison whose flags this JCOND/CMOV consumes. Translate the upstream
+// OJxxx opcode into an ARM condition code.
+static ArmCondition get_cond_jump( code_ctx *ctx ) {
+ int prev = 0;
+ einstr *p;
+ do {
+ p = ctx->jit->reg_instrs + ctx->cur_op - (++prev);
+ } while( p->op == MOV || p->op == JCOND || p->op == CMOV || p->op == XCHG || p->op == CXCHG );
+ switch( p->size_offs ) {
+ case OJFalse:
+ case OJNull:
+ return COND_EQ;
+ case OJTrue:
+ case OJNotNull:
+ return COND_NE;
+ // For ARM64 FCMP, NaN sets N=0, Z=0, C=1, V=1. IEEE 754 ordered
+ // predicates need to evaluate FALSE for NaN. HS (C==1) and HI (C==1
+ // && Z==0) both fire on NaN — wrong. GE (N==V) and GT (Z==0 && N==V)
+ // reject NaN since V differs from N. The x86 backend can use JUGte/JUGt
+ // for FP only because x86 UCOMISS sets CF=1 on NaN, making JAE/JA
+ // reject it; ARM's carry conventions are inverted from x86's.
+ // LO (C==0) and LS (C==0 || Z==1) already reject NaN on ARM (C=1).
+ case OJSGte:
+ return COND_GE;
+ case OJSGt:
+ return COND_GT;
+ case OJUGte:
+ return COND_HS;
+ case OJSLt:
+ return is_fp_mode(p->mode) ? COND_LO : COND_LT;
+ case OJSLte:
+ return is_fp_mode(p->mode) ? COND_LS : COND_LE;
+ case OJULt:
+ return COND_LO;
+ case OJEq:
+ return COND_EQ;
+ case OJNotEq:
+ return COND_NE;
+ case OJNotLt:
+ // HS (C==1) fires on NaN (C=1) and on ordered >= (C=1) ✓.
+ // GE (N==V) would reject NaN (V=1, N=0) — wrong, NaN means
+ // "not less than" should fire.
+ return COND_HS;
+ case OJNotGte:
+ // LT (N!=V) is signed less-than for INT, and for FP it fires on
+ // NaN (V=1) — the right semantics for "not >=".
+ // LO (C==0) would not fire on NaN (C=1) — wrong for FP.
+ return COND_LT;
+ case 0:
+ if( p->op == DEBUG_BREAK ) return COND_EQ;
+ // fallthrough
+ default:
+ jit_error("aarch64 get_cond_jump: unknown OJ opcode");
+ return COND_AL;
+ }
+}
+
+static void patch_branch( code_ctx *ctx, int pos, int target_byte_pos, int is_cond ) {
+ int delta = target_byte_pos - pos;
+ if( delta & 3 ) jit_error("aarch64 branch target not 4-byte aligned");
+ int imm = delta >> 2;
+ unsigned int *insn = (unsigned int*)&ctx->code.values[pos];
+ if( is_cond ) {
+ // imm19 lives in bits [23:5]; cond + 0x54000000 prefix retained.
+ if( imm < -(1 << 18) || imm >= (1 << 18) )
+ jit_error("aarch64 B.cond out of range (Phase 2 limit)");
+ *insn = (*insn & ~(0x7FFFF << 5)) | ((imm & 0x7FFFF) << 5);
+ } else {
+ // imm26 lives in bits [25:0]; opcode 000101.
+ if( imm < -(1 << 25) || imm >= (1 << 25) )
+ jit_error("aarch64 B out of range");
+ *insn = (*insn & ~0x03FFFFFF) | (imm & 0x03FFFFFF);
+ }
+}
+
+// ----------------------------------------------------------------------------
+// Operand materialization: ensure src is a live register; load through a temp
+// if it's a constant or memory. Returns the GPR encoding to use.
+// ----------------------------------------------------------------------------
+static Arm64Reg materialize_gpr_ex( code_ctx *ctx, ereg src, emit_mode mode, Arm64Reg tmp, Arm64Reg avoid ) {
+ if( REG_KIND(src) == R_REG ) {
+ Arm64Reg base = gpr_id(src);
+ int v = REG_VALUE(src);
+ if( v == 0 ) return base;
+ emit_lea_imm(ctx, tmp, base, v);
+ return tmp;
+ }
+ if( REG_KIND(src) == R_CONST ) {
+ load_immediate(ctx, (int64_t)REG_VALUE(src), tmp, sf_for(mode) == 1);
+ return tmp;
+ }
+ if( REG_KIND(src) == R_REG_PTR ) {
+ // Load directly via emit_ld_st_ex so the offset-temp picker can avoid
+ // `avoid` (typically the caller's outer base register).
+ emit_ld_st_ex(ctx, true, mode, tmp, gpr_id(src), REG_VALUE(src), avoid);
+ return tmp;
+ }
+ emit_mov(ctx, R(tmp), src, mode);
+ return tmp;
+}
+
+static Arm64Reg materialize_gpr( code_ctx *ctx, ereg src, emit_mode mode, Arm64Reg tmp ) {
+ return materialize_gpr_ex(ctx, src, mode, tmp, (Arm64Reg)-1);
+}
+
+static Arm64FpReg materialize_fpr( code_ctx *ctx, ereg src, emit_mode mode, Arm64FpReg tmp ) {
+ if( REG_KIND(src) == R_REG ) return fpr_id(src);
+ if( REG_KIND(src) == R_REG_PTR ) {
+ Arm64Reg base = gpr_id(src);
+ int offs = REG_VALUE(src);
+ emit_ld_st(ctx, /*is_load=*/true, mode, tmp, base, offs);
+ return tmp;
+ }
+ if( REG_KIND(src) == R_CONST ) {
+ // FP constants always live in the literal pool.
+ int adrp_pos = byte_count(ctx->code);
+ encode_adrp(ctx, 0, 0, ARM_TMP1);
+ encode_ldr_str_imm(ctx, 3, 1, 1, 0, ARM_TMP1, (Arm64Reg)tmp);
+ alloc_const(ctx, (uint64_t)REG_VALUE(src), adrp_pos);
+ return tmp;
+ }
+ jit_error("aarch64 materialize_fpr: unsupported operand kind");
+ return (Arm64FpReg)0;
+}
+
+// ----------------------------------------------------------------------------
+// Bitfield helpers (SBFM / UBFM raw encoding) for sign/zero-extension.
+// ----------------------------------------------------------------------------
+static void emit_bitfield( code_ctx *ctx, int sf, int opc, int immr, int imms, Arm64Reg Rn, Arm64Reg Rd ) {
+ // [31]=sf, [30:29]=opc (00=SBFM, 01=BFM, 10=UBFM), [28:23]=100110, [22]=N(=sf),
+ // [21:16]=immr, [15:10]=imms, [9:5]=Rn, [4:0]=Rd
+ unsigned int insn = ((unsigned)sf << 31) | ((unsigned)opc << 29) | (0x26u << 23) |
+ ((unsigned)sf << 22) | ((immr & 0x3F) << 16) | ((imms & 0x3F) << 10) |
+ ((Rn & 0x1F) << 5) | (Rd & 0x1F);
+ EMIT32(ctx, insn);
+}
+
+static void emit_sxt_to_int( code_ctx *ctx, emit_mode in_mode, Arm64Reg Rn, Arm64Reg Rd ) {
+ // SXTB Wd, Wn / SXTH Wd, Wn — produce sign-extended 32-bit result.
+ switch( in_mode ) {
+ case M_UI8: emit_bitfield(ctx, 0, 0x00, 0, 7, Rn, Rd); break;
+ case M_UI16: emit_bitfield(ctx, 0, 0x00, 0, 15, Rn, Rd); break;
+ default: jit_error("emit_sxt_to_int unsupported in_mode");
+ }
+}
+
+static void emit_sxt_to_ptr( code_ctx *ctx, emit_mode in_mode, Arm64Reg Rn, Arm64Reg Rd ) {
+ // SBFM Xd, Xn, #0, #N — sign-extend to 64-bit.
+ switch( in_mode ) {
+ case M_UI8: emit_bitfield(ctx, 1, 0x00, 0, 7, Rn, Rd); break;
+ case M_UI16: emit_bitfield(ctx, 1, 0x00, 0, 15, Rn, Rd); break;
+ case M_I32: emit_bitfield(ctx, 1, 0x00, 0, 31, Rn, Rd); break;
+ default: jit_error("emit_sxt_to_ptr unsupported in_mode");
+ }
+}
+
+static void emit_uxt_to_w( code_ctx *ctx, emit_mode in_mode, Arm64Reg Rn, Arm64Reg Rd ) {
+ // UXTB Wd, Wn / UXTH Wd, Wn — implemented as AND Wd, Wn, #mask.
+ switch( in_mode ) {
+ case M_UI8: encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, Rn, Rd); break; // AND Wd, Wn, #0xFF
+ case M_UI16: encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, Rn, Rd); break; // AND Wd, Wn, #0xFFFF
+ default: jit_error("emit_uxt_to_w unsupported in_mode");
+ }
+}
+
+// ----------------------------------------------------------------------------
+// BINOP / UNOP integer. e->size_offs encodes the upstream Haxe op (OAdd, ...).
+// ARM has 3-operand ALU so we can write directly to `out` from `a, b`.
+// ----------------------------------------------------------------------------
+static void emit_div_mod( code_ctx *ctx, hl_op op, Arm64Reg out, Arm64Reg a, Arm64Reg b, int sf );
+
+static void emit_binop_int( code_ctx *ctx, hl_op op, ereg out_e, ereg a_e, ereg b_e, emit_mode mode ) {
+ int sf = sf_for(mode);
+ Arm64Reg out = (REG_KIND(out_e) == R_REG) ? gpr_id(out_e) : ARM_TMP1;
+ Arm64Reg a = materialize_gpr(ctx, a_e, mode, ARM_TMP1);
+
+ // Constant-imm fast paths (ADD/SUB/AND/OR/XOR with small immediates).
+ if( REG_KIND(b_e) == R_CONST ) {
+ int64_t v = (int64_t)REG_VALUE(b_e);
+ if( (op == OAdd || op == OSub) && v >= 0 && v <= 0xFFF ) {
+ encode_add_sub_imm(ctx, sf, op == OSub ? 1 : 0, 0, 0, (int)v, a, out);
+ goto store_out;
+ }
+ if( (op == OAdd || op == OSub) && v < 0 && -v <= 0xFFF ) {
+ encode_add_sub_imm(ctx, sf, op == OSub ? 0 : 1, 0, 0, (int)-v, a, out);
+ goto store_out;
+ }
+ }
+
+ Arm64Reg b = materialize_gpr_ex(ctx, b_e, mode, ARM_TMP2, a);
+
+ switch( op ) {
+ case OAdd: encode_add_sub_reg(ctx, sf, 0, 0, 0, b, 0, a, out); break;
+ case OSub: encode_add_sub_reg(ctx, sf, 1, 0, 0, b, 0, a, out); break;
+ case OMul: encode_madd_msub(ctx, sf, 0, b, XZR, a, out); break;
+ case OAnd: encode_logical_reg(ctx, sf, 0x00, 0, 0, b, 0, a, out); break;
+ case OOr: encode_logical_reg(ctx, sf, 0x01, 0, 0, b, 0, a, out); break;
+ case OXor: encode_logical_reg(ctx, sf, 0x02, 0, 0, b, 0, a, out); break;
+ case OShl: encode_shift_reg(ctx, sf, 0x00, b, a, out); break; // LSLV
+ case OUShr: encode_shift_reg(ctx, sf, 0x01, b, a, out); break; // LSRV
+ case OSShr: encode_shift_reg(ctx, sf, 0x02, b, a, out); break; // ASRV
+ case OSDiv:
+ case OUDiv:
+ case OSMod:
+ case OUMod:
+ emit_div_mod(ctx, op, out, a, b, sf);
+ break;
+ default:
+ jit_error("aarch64 emit_binop_int: unsupported op");
+ }
+
+ // Sub-word result truncation. Loads/stores already truncate, but ALU on
+ // 32-bit reg leaves upper W zero already; we only need a mask for 8/16-bit.
+ if( mode == M_UI8 ) {
+ encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, out, out); // AND Wd, Wd, #0xFF
+ } else if( mode == M_UI16 ) {
+ encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, out, out); // AND Wd, Wd, #0xFFFF
+ }
+
+store_out:
+ if( REG_KIND(out_e) != R_REG ) {
+ emit_mov(ctx, out_e, R(ARM_TMP1), mode);
+ }
+}
+
+// ----------------------------------------------------------------------------
+// Integer divide / modulo with Haxe semantics:
+// OUDiv: b == 0 => 0
+// OUMod: b == 0 => 0
+// OSDiv: b == 0 || -1 => a*b (matches x86; avoids INT_MIN/-1 overflow trap)
+// OSMod: b == 0 || -1 => 0
+// ARM SDIV/UDIV give 0 for div/0, but mod via MSUB needs explicit guarding.
+// ----------------------------------------------------------------------------
+static void emit_div_mod( code_ctx *ctx, hl_op op, Arm64Reg out, Arm64Reg a, Arm64Reg b, int sf ) {
+ bool unsign = (op == OUDiv || op == OUMod);
+ bool is_div = (op == OSDiv || op == OUDiv);
+
+ // Test b for 0; signed ops also test for -1.
+ encode_logical_reg(ctx, sf, 0x03, 0, 0, b, 0, b, XZR); // TST b, b
+ int jz_pos = byte_count(ctx->code);
+ encode_branch_cond(ctx, 0, COND_EQ); // patched later
+
+ int jneg_pos = -1;
+ if( !unsign ) {
+ // CMN b, #1 (= b + 1; sets Z if b == -1)
+ encode_add_sub_imm(ctx, sf, 0, 1, 0, 1, b, XZR);
+ jneg_pos = byte_count(ctx->code);
+ encode_branch_cond(ctx, 0, COND_EQ);
+ }
+
+ // Mainline. encode_div's U bit is 0=UDIV, 1=SDIV (per the ARM ARM
+ // bit-10 encoding) — pass `unsign ? 0 : 1`, NOT the inverse.
+ if( is_div ) {
+ // SDIV/UDIV out, a, b
+ encode_div(ctx, sf, unsign ? 0 : 1, b, a, out);
+ } else {
+ // MSUB needs the ORIGINAL `a` and `b` after the divide; SDIV writes
+ // `out`, so any of {out==a, out==b} would clobber a source. Spill
+ // the aliased operand(s) to backend temps first. ARM_TMP3 is
+ // reserved precisely for cases like this where we need a third
+ // independent register.
+ Arm64Reg a_safe = a, b_safe = b;
+ if( out == a ) {
+ emit_mov_gpr(ctx, ARM_TMP3, a, sf);
+ a_safe = ARM_TMP3;
+ if( b == a ) b_safe = ARM_TMP3; // a==b too: same value in TMP3
+ }
+ if( out == b && b_safe == b ) {
+ // Need a different temp from a_safe (which may be ARM_TMP3 already).
+ Arm64Reg t = (a_safe == ARM_TMP1) ? ARM_TMP2 : ARM_TMP1;
+ emit_mov_gpr(ctx, t, b, sf);
+ b_safe = t;
+ }
+ encode_div(ctx, sf, unsign ? 0 : 1, b_safe, a_safe, out);
+ // MSUB out, out, b_safe, a_safe => out = a_safe - out * b_safe
+ encode_madd_msub(ctx, sf, 1, b_safe, a_safe, out, out);
+ }
+ int jdone_pos = byte_count(ctx->code);
+ encode_branch_uncond(ctx, 0);
+
+ // Special case path: result = 0 (mod or unsigned div) or a*b (signed div).
+ int special_pos = byte_count(ctx->code);
+ if( op == OSDiv ) {
+ // out = a * b
+ encode_madd_msub(ctx, sf, 0, b, XZR, a, out);
+ } else {
+ // out = 0
+ encode_logical_reg(ctx, sf, 0x01, 0, 0, XZR, 0, XZR, out); // ORR out, XZR, XZR
+ }
+
+ int after = byte_count(ctx->code);
+
+ // Patch branches.
+ int delta_jz = (special_pos - jz_pos) >> 2;
+ *(unsigned int*)&ctx->code.values[jz_pos] =
+ (*(unsigned int*)&ctx->code.values[jz_pos] & ~(0x7FFFF << 5)) | ((delta_jz & 0x7FFFF) << 5);
+ if( jneg_pos >= 0 ) {
+ int delta_jn = (special_pos - jneg_pos) >> 2;
+ *(unsigned int*)&ctx->code.values[jneg_pos] =
+ (*(unsigned int*)&ctx->code.values[jneg_pos] & ~(0x7FFFF << 5)) | ((delta_jn & 0x7FFFF) << 5);
+ }
+ int delta_done = (after - jdone_pos) >> 2;
+ *(unsigned int*)&ctx->code.values[jdone_pos] =
+ (*(unsigned int*)&ctx->code.values[jdone_pos] & ~0x03FFFFFF) | (delta_done & 0x03FFFFFF);
+}
+
+// ----------------------------------------------------------------------------
+// BINOP / UNOP float.
+// ----------------------------------------------------------------------------
+static void emit_binop_fp( code_ctx *ctx, hl_op op, ereg out_e, ereg a_e, ereg b_e, emit_mode mode ) {
+ bool out_to_mem = (REG_KIND(out_e) != R_REG);
+ Arm64FpReg out = out_to_mem ? (Arm64FpReg)31 : fpr_id(out_e);
+ // Use V29/V30 as scratch FP regs (in our scratch list, won't collide with `out`=V31).
+ Arm64FpReg a = materialize_fpr(ctx, a_e, mode, (Arm64FpReg)29);
+ Arm64FpReg b = materialize_fpr(ctx, b_e, mode, (Arm64FpReg)30);
+ int type = (mode == M_F64) ? 1 : 0;
+ int opcode;
+ switch( op ) {
+ case OAdd: opcode = 0x02; break; // FADD
+ case OSub: opcode = 0x03; break; // FSUB
+ case OMul: opcode = 0x00; break; // FMUL
+ case OSDiv: opcode = 0x01; break; // FDIV
+ default: jit_error("aarch64 emit_binop_fp: unsupported op");
+ }
+ encode_fp_arith(ctx, /*M=*/0, /*S=*/0, type, b, opcode, a, out);
+ if( out_to_mem ) {
+ Arm64Reg base = gpr_id(out_e);
+ int offs = REG_VALUE(out_e);
+ emit_ld_st(ctx, false, mode, out, base, offs);
+ }
+}
+
+static void emit_unop( code_ctx *ctx, hl_op op, ereg out_e, ereg a_e, emit_mode mode ) {
+ if( is_fp_mode(mode) ) {
+ bool out_to_mem = (REG_KIND(out_e) != R_REG);
+ Arm64FpReg out = out_to_mem ? (Arm64FpReg)31 : fpr_id(out_e);
+ Arm64FpReg a = materialize_fpr(ctx, a_e, mode, (Arm64FpReg)29);
+ int type = (mode == M_F64) ? 1 : 0;
+ switch( op ) {
+ case ONeg: encode_fp_1src(ctx, 0, 0, type, /*FNEG*/2, a, out); break;
+ default: jit_error("aarch64 emit_unop float: unsupported op");
+ }
+ if( out_to_mem ) {
+ Arm64Reg base = gpr_id(out_e);
+ int offs = REG_VALUE(out_e);
+ emit_ld_st(ctx, false, mode, out, base, offs);
+ }
+ return;
+ }
+ int sf = sf_for(mode);
+ Arm64Reg out = (REG_KIND(out_e) == R_REG) ? gpr_id(out_e) : ARM_TMP1;
+ Arm64Reg a = materialize_gpr(ctx, a_e, mode, ARM_TMP1);
+ switch( op ) {
+ case ONeg:
+ // SUB out, XZR, a (NEG alias)
+ encode_add_sub_reg(ctx, sf, 1, 0, 0, a, 0, XZR, out);
+ break;
+ case ONot:
+ // EOR out, a, #1 (boolean toggle). N must equal sf for value 1.
+ encode_logical_imm(ctx, sf, 0x02, sf, 0, 0, a, out);
+ break;
+ case OIncr:
+ encode_add_sub_imm(ctx, sf, 0, 0, 0, 1, a, out);
+ break;
+ case ODecr:
+ encode_add_sub_imm(ctx, sf, 1, 0, 0, 1, a, out);
+ break;
+ default:
+ jit_error("aarch64 emit_unop: unsupported op");
+ }
+ if( mode == M_UI8 ) {
+ encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, out, out);
+ } else if( mode == M_UI16 ) {
+ encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, out, out);
+ }
+ if( REG_KIND(out_e) != R_REG ) emit_mov(ctx, out_e, R(ARM_TMP1), mode);
+}
+
+// ----------------------------------------------------------------------------
+// CONV / CONV_UNSIGNED. e->mode = output mode, e->size_offs = input mode.
+// ----------------------------------------------------------------------------
+static void emit_conv( code_ctx *ctx, einstr *e, ereg out_e, bool unsign ) {
+ emit_mode out_mode = e->mode;
+ emit_mode in_mode = (emit_mode)e->size_offs;
+ bool out_fp = is_fp_mode(out_mode);
+ bool in_fp = is_fp_mode(in_mode);
+
+ // Materialize source.
+ Arm64Reg a_gpr = 0;
+ Arm64FpReg a_fpr = (Arm64FpReg)0;
+ if( in_fp ) {
+ a_fpr = materialize_fpr(ctx, e->a, in_mode, (Arm64FpReg)29);
+ } else {
+ a_gpr = materialize_gpr(ctx, e->a, in_mode, ARM_TMP1);
+ }
+
+ // Pick output register encoding. When the result lives in memory we route
+ // the value through a backend-private temporary in the appropriate class.
+ bool out_to_mem = REG_KIND(out_e) != R_REG;
+ Arm64Reg dst_gpr = (!out_fp && !out_to_mem) ? gpr_id(out_e)
+ : (!out_fp ? ARM_TMP2 : 0);
+ // V31 is in our scratch list and serves as an FP temp; we still need to
+ // emit a follow-up STR if the output is memory.
+ Arm64FpReg dst_fpr = (out_fp && !out_to_mem) ? fpr_id(out_e)
+ : (out_fp ? (Arm64FpReg)31 : (Arm64FpReg)0);
+
+ if( in_fp && out_fp ) {
+ // FCVT between F32/F64
+ int type = (in_mode == M_F64) ? 1 : 0; // input type
+ int opcode = (in_mode == M_F32) ? 0x05 : 0x04; // F32->F64 = 0x05, F64->F32 = 0x04
+ encode_fp_1src(ctx, 0, 0, type, opcode, a_fpr, dst_fpr);
+ } else if( in_fp && !out_fp ) {
+ // FP -> int. FCVTZS / FCVTZU (round toward zero).
+ int sf = sf_for(out_mode);
+ int type = (in_mode == M_F64) ? 1 : 0;
+ int rmode = 3; // round toward zero
+ int opc = unsign ? 1 : 0; // 0=FCVTZS, 1=FCVTZU
+ encode_fcvt_int(ctx, sf, 0, type, rmode, opc, a_fpr, dst_gpr);
+ } else if( !in_fp && out_fp ) {
+ // int -> FP. SCVTF / UCVTF.
+ int sf = sf_for(in_mode);
+ int type = (out_mode == M_F64) ? 1 : 0;
+ int rmode = 0;
+ int opc = unsign ? 3 : 2; // 2=SCVTF, 3=UCVTF
+ // First, widen sub-word inputs to full width. UI8/UI16 are
+ // unsigned regardless of the `unsign` flag (which here selects
+ // SCVTF vs UCVTF), so always zero-extend the byte/half before
+ // the FP conversion.
+ Arm64Reg src = a_gpr;
+ if( in_mode == M_UI8 || in_mode == M_UI16 ) {
+ emit_uxt_to_w(ctx, in_mode, src, ARM_TMP1);
+ src = ARM_TMP1;
+ }
+ encode_int_fcvt(ctx, sf, 0, type, rmode, opc, src, dst_fpr);
+ } else {
+ // int -> int.
+ switch( in_mode ) {
+ case M_UI8:
+ case M_UI16:
+ // UI8/UI16 are inherently unsigned in HL — widening to a larger
+ // integer must always zero-extend, matching x86's MOVZX. The
+ // `unsign` flag is only meaningful for FP conversions.
+ if( out_mode == M_PTR || out_mode == M_I32 ) {
+ emit_uxt_to_w(ctx, in_mode, a_gpr, dst_gpr);
+ } else if( out_mode == M_UI16 || out_mode == M_UI8 ) {
+ emit_uxt_to_w(ctx, out_mode, a_gpr, dst_gpr);
+ }
+ break;
+ case M_I32:
+ if( out_mode == M_PTR ) {
+ if( unsign ) emit_mov_gpr(ctx, dst_gpr, a_gpr, 0); // MOV Wd, Wn — zero-extends to X
+ else emit_sxt_to_ptr(ctx, M_I32, a_gpr, dst_gpr);
+ } else {
+ emit_mov_gpr(ctx, dst_gpr, a_gpr, sf_for(out_mode));
+ if( out_mode == M_UI8 || out_mode == M_UI16 )
+ emit_uxt_to_w(ctx, out_mode, dst_gpr, dst_gpr);
+ }
+ break;
+ case M_PTR:
+ if( out_mode == M_I32 ) {
+ emit_mov_gpr(ctx, dst_gpr, a_gpr, 0); // truncate
+ } else if( out_mode == M_UI8 || out_mode == M_UI16 ) {
+ emit_uxt_to_w(ctx, out_mode, a_gpr, dst_gpr);
+ } else {
+ emit_mov_gpr(ctx, dst_gpr, a_gpr, 1);
+ }
+ break;
+ default:
+ jit_error("aarch64 emit_conv: unsupported int conversion");
+ }
+ }
+
+ if( out_to_mem ) {
+ if( out_fp ) {
+ // STR D31/S31, [base+offs] — base might be inside a register operand
+ // of `out_e`; use emit_ld_st with the FP class.
+ Arm64Reg base = gpr_id(out_e);
+ int offs = REG_VALUE(out_e);
+ emit_ld_st(ctx, false, out_mode, dst_fpr, base, offs);
+ } else {
+ emit_mov(ctx, out_e, R(ARM_TMP2), out_mode);
+ }
+ }
+}
+
+// ----------------------------------------------------------------------------
+// STORE / LOAD_ADDR / LEA.
+// ----------------------------------------------------------------------------
+static void emit_store( code_ctx *ctx, einstr *e ) {
+ int offs = e->size_offs;
+ Arm64Reg base;
+ if( REG_KIND(e->a) == R_REG ) {
+ base = gpr_id(e->a);
+ // MK_STACK_OFFS(v) and MK_ADDR-like values encode the offset in the
+ // register's value field; combine it with size_offs. For regular
+ // register operands REG_VALUE is 0, so this is a no-op.
+ offs += REG_VALUE(e->a);
+ } else {
+ emit_mov(ctx, R(ARM_TMP1), e->a, M_PTR);
+ base = ARM_TMP1;
+ }
+ if( is_fp_mode(e->mode) ) {
+ if( REG_KIND(e->b) == R_REG ) {
+ emit_ld_st(ctx, false, e->mode, fpr_id(e->b), base, offs);
+ } else {
+ // Route the bit pattern through a GPR. STR writes the same bytes
+ // regardless of FP vs. INT class.
+ Arm64Reg tmp = (base == ARM_TMP1) ? ARM_TMP2 : ARM_TMP1;
+ emit_mode int_mode = (e->mode == M_F32) ? M_I32 : M_PTR;
+ if( REG_KIND(e->b) == R_CONST ) {
+ load_immediate(ctx, (int64_t)REG_VALUE(e->b), tmp, sf_for(int_mode) == 1);
+ } else if( REG_KIND(e->b) == R_REG_PTR ) {
+ // Spilled FP vreg: load via emit_ld_st_ex so the offset-temp picker
+ // can avoid clobbering `base` (parked in ARM_TMP1 when e->a was spilled).
+ emit_ld_st_ex(ctx, true, int_mode, tmp, gpr_id(e->b), REG_VALUE(e->b), base);
+ } else {
+ emit_mov(ctx, R(tmp), e->b, int_mode);
+ }
+ emit_ld_st_ex(ctx, false, int_mode, tmp, base, offs, (Arm64Reg)-1);
+ }
+ return;
+ }
+ int reg_t;
+ if( REG_KIND(e->b) == R_REG && REG_VALUE(e->b) == 0 ) {
+ reg_t = gpr_id(e->b);
+ } else {
+ Arm64Reg tmp = (base == ARM_TMP1) ? ARM_TMP2 : ARM_TMP1;
+ if( REG_KIND(e->b) == R_REG ) {
+ // MK_STACK_OFFS / LEA-rewritten ADDRESS: source encodes (reg, offs).
+ // Materialize the effective address into tmp.
+ emit_lea_imm(ctx, tmp, gpr_id(e->b), REG_VALUE(e->b));
+ } else if( REG_KIND(e->b) == R_CONST ) {
+ load_immediate(ctx, (int64_t)REG_VALUE(e->b), tmp, sf_for(e->mode) == 1);
+ } else if( REG_KIND(e->b) == R_REG_PTR ) {
+ // Load directly via emit_ld_st_ex so we can tell it to avoid
+ // clobbering `base` (which lives in ARM_TMP1 when e->a was spilled).
+ emit_ld_st_ex(ctx, true, e->mode, tmp, gpr_id(e->b), REG_VALUE(e->b), base);
+ } else {
+ emit_mov(ctx, R(tmp), e->b, e->mode);
+ }
+ reg_t = tmp;
+ }
+ emit_ld_st_ex(ctx, false, e->mode, reg_t, base, offs, (Arm64Reg)-1);
+}
+
+static void emit_load_addr( code_ctx *ctx, einstr *e, ereg out_e ) {
+ emit_mode lmode = (emit_mode)e->nargs;
+ Arm64Reg base;
+ int offs = e->size_offs;
+ if( REG_KIND(e->a) == R_REG ) {
+ base = gpr_id(e->a);
+ offs += REG_VALUE(e->a);
+ } else {
+ emit_mov(ctx, R(ARM_TMP1), e->a, M_PTR);
+ base = ARM_TMP1;
+ }
+ if( is_fp_mode(lmode) ) {
+ if( REG_KIND(out_e) == R_REG ) {
+ emit_ld_st(ctx, true, lmode, fpr_id(out_e), base, offs);
+ } else {
+ // FP load into V31 then STR to memory dst.
+ emit_ld_st(ctx, true, lmode, (Arm64FpReg)31, base, offs);
+ Arm64Reg out_base = gpr_id(out_e);
+ int out_offs = REG_VALUE(out_e);
+ emit_ld_st(ctx, false, lmode, (Arm64FpReg)31, out_base, out_offs);
+ }
+ return;
+ }
+ Arm64Reg dst = (REG_KIND(out_e) == R_REG) ? gpr_id(out_e) : ARM_TMP2;
+ emit_ld_st(ctx, true, lmode, dst, base, offs);
+ if( REG_KIND(out_e) != R_REG ) {
+ emit_mov(ctx, out_e, R(ARM_TMP2), e->mode);
+ }
+}
+
+static void emit_lea( code_ctx *ctx, einstr *e, ereg out_e ) {
+ int mult = e->size_offs & 0xFF;
+ int offs = e->size_offs >> 8;
+ if( REG_KIND(e->a) == R_REG ) offs += REG_VALUE(e->a);
+
+ Arm64Reg out = (REG_KIND(out_e) == R_REG) ? gpr_id(out_e) : ARM_TMP1;
+ Arm64Reg a;
+ if( REG_KIND(e->a) == R_REG ) {
+ a = gpr_id(e->a);
+ } else {
+ emit_mov(ctx, R(ARM_TMP1), e->a, M_PTR);
+ a = ARM_TMP1;
+ }
+
+ if( mult == 0 || IS_NULL(e->b) ) {
+ // out = a + offs
+ if( offs == 0 ) {
+ emit_mov_gpr(ctx, out, a, 1);
+ } else if( offs > 0 && offs <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, offs, a, out);
+ } else if( offs < 0 && -offs <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, 1, 0, 0, -offs, a, out);
+ } else {
+ load_immediate(ctx, offs, ARM_TMP2, true);
+ encode_add_sub_reg(ctx, 1, 0, 0, 0, ARM_TMP2, 0, a, out);
+ }
+ } else {
+ if( mult != 1 && mult != 2 && mult != 4 && mult != 8 )
+ jit_error("aarch64 LEA: unsupported scale");
+ int shift = (mult == 1) ? 0 : (mult == 2) ? 1 : (mult == 4) ? 2 : 3;
+ // Index width matches HL semantics — array indexes are M_I32. Materialize
+ // from a 32-bit slot so we don't read garbage from the adjacent vreg, and
+ // use the extended-register ADD with UXTW so only the lower 32 bits feed
+ // the address calculation.
+ Arm64Reg b = materialize_gpr_ex(ctx, e->b, M_I32, ARM_TMP2, a);
+ // out = a + UXTW(b) << shift
+ encode_add_sub_ext(ctx, /*sf=*/1, /*op=*/0, /*S=*/0, b, /*option=UXTW*/2, shift, a, out);
+ if( offs != 0 ) {
+ if( offs > 0 && offs <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, offs, out, out);
+ } else if( offs < 0 && -offs <= 0xFFF ) {
+ encode_add_sub_imm(ctx, 1, 1, 0, 0, -offs, out, out);
+ } else {
+ load_immediate(ctx, offs, ARM_TMP2, true);
+ encode_add_sub_reg(ctx, 1, 0, 0, 0, ARM_TMP2, 0, out, out);
+ }
+ }
+ }
+
+ if( REG_KIND(out_e) != R_REG ) emit_mov(ctx, out_e, R(ARM_TMP1), M_PTR);
+}
+
+// ----------------------------------------------------------------------------
+// CMOV / XCHG / PUSH_CONST / PREFETCH.
+// ----------------------------------------------------------------------------
+static void emit_cmov_arm( code_ctx *ctx, ereg out_e, ereg a_e, ArmCondition cond ) {
+ if( REG_KIND(out_e) != R_REG ) jit_error("aarch64 CMOV non-reg out");
+ Arm64Reg out = gpr_id(out_e);
+ Arm64Reg a = materialize_gpr(ctx, a_e, M_PTR, ARM_TMP1);
+ // CSEL out, a, out, cond (if cond: out=a; else out=out)
+ encode_cond_select(ctx, 1, 0, out, cond, 0, a, out);
+}
+
+static void emit_xchg( code_ctx *ctx, einstr *e ) {
+ if( REG_KIND(e->a) != R_REG || REG_KIND(e->b) != R_REG )
+ jit_error("aarch64 XCHG with non-reg operand");
+ Arm64Reg ra = gpr_id(e->a);
+ Arm64Reg rb = gpr_id(e->b);
+ emit_mov_gpr(ctx, ARM_TMP1, ra, 1);
+ emit_mov_gpr(ctx, ra, rb, 1);
+ emit_mov_gpr(ctx, rb, ARM_TMP1, 1);
+}
+
+static void emit_push_const( code_ctx *ctx, einstr *e ) {
+ if( e->mode != M_PTR ) jit_error("aarch64 PUSH_CONST non-ptr mode");
+ load_immediate(ctx, (int64_t)e->value, ARM_TMP1, true);
+ emit_sp_offs(ctx, -16);
+ encode_ldr_str_imm(ctx, 3, 0, 0, 0, SP_REG, ARM_TMP1); // STR X16, [SP]
+}
+
+// ----------------------------------------------------------------------------
+// Phase 4: constant-pool helpers.
+// ----------------------------------------------------------------------------
+
+static int reserve_const_segment( code_ctx *ctx, int size, int align ) {
+ int pos = byte_count(ctx->const_table);
+ if( align ) {
+ int k = pos & (align - 1);
+ if( k ) {
+ byte_reserve_impl(&ctx->jit->galloc, &ctx->const_table, align - k);
+ pos = byte_count(ctx->const_table);
+ }
+ }
+ byte_reserve_impl(&ctx->jit->galloc, &ctx->const_table, size);
+ return pos;
+}
+
+// Insert (or find) a 64-bit value in the constant table; record the current
+// emission point as an ADRP+LDR (or ADRP+ADD) pair to be patched later.
+// Returns the byte offset of the value inside ctx->const_table.
+static int alloc_const( code_ctx *ctx, uint64_t value, int adrp_pos ) {
+ int pos = value_map_find(ctx->const_table_lookup, value);
+ if( pos < 0 ) {
+ pos = reserve_const_segment(ctx, 8, 8);
+ *(uint64_t*)byte_addr(ctx->const_table, pos) = value;
+ value_map_add_impl(&ctx->jit->galloc, &ctx->const_table_lookup, value, pos);
+ }
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->const_refs, ctx->jit->out_pos + adrp_pos);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->const_refs, pos);
+ return pos;
+}
+
+// Emit ADRP dst, page ; LDR dst, [dst, #lo12] — load constant `value` from pool.
+static void emit_const_load( code_ctx *ctx, Arm64Reg dst, uint64_t value ) {
+ int adrp_pos = byte_count(ctx->code);
+ encode_adrp(ctx, 0, 0, dst); // imm21 placeholder
+ encode_ldr_str_imm(ctx, 3, 0, 1, 0, dst, dst); // LDR Xd, [Xd, #0]
+ alloc_const(ctx, value, adrp_pos);
+}
+
+// Emit ADRP dst, page ; ADD dst, dst, #lo12 — load address of pool entry `value`.
+static void emit_const_addr( code_ctx *ctx, Arm64Reg dst, uint64_t value ) {
+ int adrp_pos = byte_count(ctx->code);
+ encode_adrp(ctx, 0, 0, dst); // imm21 placeholder
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, 0, dst, dst); // ADD Xd, Xd, #0
+ alloc_const(ctx, value, adrp_pos);
+}
+
+// Emit ADRP+ADD pair targeting an offset INSIDE the const table (used for
+// jump-table base addressing). The offset is recorded directly, not the value.
+static void emit_pool_offset_addr( code_ctx *ctx, Arm64Reg dst, int const_offset ) {
+ int adrp_pos = byte_count(ctx->code);
+ encode_adrp(ctx, 0, 0, dst);
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, 0, dst, dst);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->const_refs, ctx->jit->out_pos + adrp_pos);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->const_refs, const_offset);
+}
+
+// ----------------------------------------------------------------------------
+// Phase 4: call ops, LOAD_FUN, JUMP_TABLE.
+// ----------------------------------------------------------------------------
+
+// CALL_FUN: emit BL with a deferred imm26 patch (resolved in flush_consts once
+// jit->mod->functions_ptrs[fid] holds the in-output offset).
+static void emit_call_fun( code_ctx *ctx, einstr *e ) {
+ int pos = byte_count(ctx->code);
+ encode_branch_link(ctx, 0); // imm26 placeholder
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->funs, ctx->jit->out_pos + pos);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->funs, (int)e->a);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->funs, /*kind=BL*/0);
+}
+
+// LOAD_FUN: emit ADRP + ADD with a deferred imm21+imm12 patch — produces the
+// absolute address of the JIT-compiled function in `out`.
+static void emit_load_fun( code_ctx *ctx, ereg out_e, int fid ) {
+ Arm64Reg out = (REG_KIND(out_e) == R_REG) ? gpr_id(out_e) : ARM_TMP1;
+ int pos = byte_count(ctx->code);
+ encode_adrp(ctx, 0, 0, out);
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, 0, out, out);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->funs, ctx->jit->out_pos + pos);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->funs, fid);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->funs, /*kind=ADRP+ADD*/1);
+ if( REG_KIND(out_e) != R_REG ) emit_mov(ctx, out_e, R(ARM_TMP1), M_PTR);
+}
+
+// CALL_PTR: indirect call via constant pool, with shortcuts for the two known
+// near-call targets (hl_null_access, hl_jit_null_field_access).
+static void emit_call_ptr( code_ctx *ctx, einstr *e ) {
+ uint64_t target = (uint64_t)e->value;
+ int near_pos = -1;
+ if( target == (uint64_t)(uintptr_t)hl_null_access )
+ near_pos = ctx->null_access_pos;
+ else if( target == (uint64_t)(uintptr_t)hl_jit_null_field_access )
+ near_pos = ctx->null_field_pos;
+
+ if( near_pos >= 0 ) {
+ // BL — direct PC-relative call to the trampoline emitted in
+ // hl_codegen_init. Both source and target are within the same output
+ // buffer, so resolve the imm26 immediately.
+ int pos = ctx->jit->out_pos + byte_count(ctx->code);
+ intptr_t delta = (intptr_t)near_pos - (intptr_t)pos;
+ int imm26 = (int)(delta >> 2);
+ encode_branch_link(ctx, imm26);
+ } else {
+ emit_const_load(ctx, ARM_TMP1, target);
+ encode_branch_reg(ctx, /*BLR*/1, ARM_TMP1);
+ }
+ // Sub-word return masking to match x86's MOVZX behavior.
+ if( e->mode == M_UI8 )
+ encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, X0, X0);
+ else if( e->mode == M_UI16 )
+ encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, X0, X0);
+}
+
+// CALL_REG: BLR .
+static void emit_call_reg( code_ctx *ctx, einstr *e ) {
+ Arm64Reg target = materialize_gpr(ctx, e->a, M_PTR, ARM_TMP1);
+ encode_branch_reg(ctx, /*BLR*/1, target);
+}
+
+// JUMP_TABLE: dispatch through a const_table-resident jump table whose entries
+// are absolute target addresses (filled in hl_codegen_final). Index value lives
+// in e->a (32-bit int). Falls through after BR — caller assumes no return.
+static void emit_jump_table( code_ctx *ctx, einstr *e ) {
+ int n = e->nargs;
+ int start = reserve_const_segment(ctx, 8 * n, 16);
+
+ // Materialize index as a zero-extended 64-bit value. IR convention: e->a
+ // holds an int (M_I32); MOV Wn, Wn zero-extends to X.
+ Arm64Reg idx;
+ if( REG_KIND(e->a) == R_REG ) {
+ Arm64Reg src = gpr_id(e->a);
+ // MOV W17, Wsrc — clears upper 32 bits.
+ encode_logical_reg(ctx, 0, 0x01, 0, 0, src, 0, XZR, ARM_TMP2);
+ idx = ARM_TMP2;
+ } else {
+ emit_mov(ctx, R(ARM_TMP2), e->a, M_I32);
+ // Re-zero-extend to be safe.
+ encode_logical_reg(ctx, 0, 0x01, 0, 0, ARM_TMP2, 0, XZR, ARM_TMP2);
+ idx = ARM_TMP2;
+ }
+
+ emit_pool_offset_addr(ctx, ARM_TMP1, start);
+ // LDR X16, [X16, idx, LSL #3] size=3, V=0, opc=1, option=3 (LSL/UXTX), S=1
+ encode_ldr_str_reg(ctx, 3, 0, 1, idx, /*option=*/3, /*S=*/1, ARM_TMP1, ARM_TMP1);
+ encode_branch_reg(ctx, /*BR*/0, ARM_TMP1);
+
+ ereg *args = hl_emit_get_args(ctx->jit->emit, e);
+ for( int k = 0; k < n; k++ ) {
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->const_addr, start + k * 8);
+ int_arr_add_impl(&ctx->jit->galloc, &ctx->const_addr, ctx->cur_op + (int)args[k] + 1);
+ }
+}
+
+static void emit_prefetch( code_ctx *ctx, einstr *e ) {
+ int prfop;
+ switch( e->size_offs ) {
+ case 0: prfop = 0; break; // PLDL1KEEP
+ case 1: prfop = 2; break; // PLDL2KEEP
+ case 2: prfop = 4; break; // PLDL3KEEP
+ case 3: prfop = 1; break; // PLDL1STRM
+ case 4: prfop = 16; break; // PSTL1KEEP
+ default: jit_error("aarch64 PREFETCH: bad size_offs");
+ }
+ Arm64Reg base;
+ if( REG_KIND(e->a) == R_REG ) {
+ base = gpr_id(e->a);
+ } else {
+ emit_mov(ctx, R(ARM_TMP1), e->a, M_PTR);
+ base = ARM_TMP1;
+ }
+ // PRFM: size=11, V=0, opc=10, imm12=0, Rn=base, Rt=prfop
+ encode_ldr_str_imm(ctx, 3, 0, 2, 0, base, (Arm64Reg)prfop);
+}
+
+// ============================================================================
+// hl_codegen_flush
+// ============================================================================
+
+void hl_codegen_flush( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ if( ctx->flushed ) return;
+ ctx->flushed = true;
+ jit->code_size = ctx->code.cur;
+ jit->code_instrs = ctx->code.values;
+ jit->code_pos_map = ctx->pos_map;
+ if( ctx->pos_map ) ctx->pos_map[ctx->cur_op + 1] = ctx->code.cur;
+}
+
+// ============================================================================
+// hl_codegen_function — the main per-IR-op switch
+// ============================================================================
+
+void hl_codegen_function( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ ctx->flushed = false;
+ byte_free(&ctx->code);
+ int_arr_free(&ctx->branch_fixups);
+ free(ctx->pos_map);
+ ctx->pos_map = (int*)malloc((jit->reg_instr_count + 1) * sizeof(int));
+ ctx->pos_map[0] = 0;
+ byte_reserve(ctx->code, 64);
+ ctx->code.cur -= 64;
+
+ int const_addr_prev = int_arr_count(ctx->const_addr);
+
+ for( int cur_pos = 0; cur_pos < jit->reg_instr_count; cur_pos++ ) {
+ einstr *e = jit->reg_instrs + cur_pos;
+ ereg out = jit->reg_writes[cur_pos];
+ byte_reserve(ctx->code, 64);
+ ctx->code.cur -= 64;
+ ctx->cur_op = cur_pos;
+ if( cur_pos > 0 ) ctx->pos_map[cur_pos] = ctx->code.cur;
+
+ switch( e->op ) {
+ case LOAD_ARG:
+ // nop — argument lives in its allocated register already
+ continue;
+ case NOP:
+ // HINT #0 (NOP)
+ EMIT32(ctx, 0xD503201F);
+ break;
+ case MOV:
+ emit_mov(ctx, out, e->a, e->mode);
+ break;
+ case LOAD_CONST:
+ emit_load_const(ctx, out, e->value, e->mode);
+ break;
+ case RET:
+ // Result placement was handled by upstream regs phase via a preceding
+ // regs_emit_mov(out, e->a). Here we just emit the actual return.
+ encode_branch_reg(ctx, /*opc=*/2 /*RET*/, LR);
+ break;
+ case PUSH:
+ emit_push(ctx, e->a, e->mode);
+ break;
+ case POP:
+ emit_pop(ctx, e->a, e->mode);
+ break;
+ case STACK_OFFS:
+ emit_sp_offs(ctx, e->size_offs);
+ break;
+ case CMP:
+ emit_cmp(ctx, e);
+ break;
+ case TEST:
+ emit_test(ctx, e);
+ break;
+ case JCOND:
+ emit_jump_cond(ctx, get_cond_jump(ctx), e->size_offs);
+ break;
+ case JUMP:
+ emit_jump(ctx, e->size_offs);
+ break;
+ case DEBUG_BREAK:
+ // BRK #0 — encoded as 0xD4200000
+ EMIT32(ctx, 0xD4200000);
+ break;
+ case BINOP:
+ if( is_fp_mode(e->mode) )
+ emit_binop_fp(ctx, (hl_op)e->size_offs, out, e->a, e->b, e->mode);
+ else
+ emit_binop_int(ctx, (hl_op)e->size_offs, out, e->a, e->b, e->mode);
+ break;
+ case UNOP:
+ // jit_emit.c lowers `not b` and similar boolean toggles as a UNOP
+ // with two operands (a, b=immediate, op=OXor). Dispatch the
+ // two-operand form through the regular binop handler so OXor/OAnd/OOr
+ // don't need a second copy of the encoding.
+ if( !IS_NULL(e->b) ) {
+ if( is_fp_mode(e->mode) )
+ emit_binop_fp(ctx, (hl_op)e->size_offs, out, e->a, e->b, e->mode);
+ else
+ emit_binop_int(ctx, (hl_op)e->size_offs, out, e->a, e->b, e->mode);
+ } else {
+ emit_unop(ctx, (hl_op)e->size_offs, out, e->a, e->mode);
+ }
+ break;
+ case CONV:
+ emit_conv(ctx, e, out, /*unsign=*/false);
+ break;
+ case CONV_UNSIGNED:
+ emit_conv(ctx, e, out, /*unsign=*/true);
+ break;
+ case STORE:
+ emit_store(ctx, e);
+ break;
+ case LOAD_ADDR:
+ emit_load_addr(ctx, e, out);
+ break;
+ case LEA:
+ emit_lea(ctx, e, out);
+ break;
+ case CMOV:
+ emit_cmov_arm(ctx, out, e->a, get_cond_jump(ctx));
+ break;
+ case XCHG:
+ emit_xchg(ctx, e);
+ break;
+ case CXCHG:
+ // x86 emits BREAK() here too — atomic compare-exchange unimplemented.
+ EMIT32(ctx, 0xD4200000);
+ break;
+ case PUSH_CONST:
+ emit_push_const(ctx, e);
+ break;
+ case PREFETCH:
+ emit_prefetch(ctx, e);
+ break;
+ case CALL_FUN:
+ emit_call_fun(ctx, e);
+ break;
+ case CALL_PTR:
+ emit_call_ptr(ctx, e);
+ break;
+ case CALL_REG:
+ emit_call_reg(ctx, e);
+ break;
+ case LOAD_FUN:
+ emit_load_fun(ctx, out, e->size_offs);
+ break;
+ case JUMP_TABLE:
+ emit_jump_table(ctx, e);
+ break;
+ case ADDRESS:
+ // Rewritten to LEA in the regs phase; should never reach here.
+ jit_error("aarch64: ADDRESS reached backend (regs phase should rewrite)");
+ break;
+ case CATCH:
+ // IR marker only (mirrors x86) — no code emitted.
+ break;
+ default:
+ {
+ static const char *op_names[] = {
+ "LOAD_ADDR", "LOAD_CONST", "LOAD_ARG", "LOAD_FUN", "STORE",
+ "LEA", "TEST", "CMP", "JCOND", "JUMP", "JUMP_TABLE",
+ "BINOP", "UNOP", "CONV", "CONV_UNSIGNED", "RET",
+ "CALL_PTR", "CALL_REG", "CALL_FUN", "MOV", "CMOV",
+ "XCHG", "CXCHG", "PUSH_CONST", "PUSH", "POP",
+ "ALLOC_STACK", "PREFETCH", "DEBUG_BREAK", "BLOCK",
+ "ENTER", "STACK_OFFS", "CATCH", "ADDRESS", "NOP"
+ };
+ static char errbuf[128];
+ const char *name = (e->op < (int)(sizeof(op_names)/sizeof(*op_names)))
+ ? op_names[e->op] : "?";
+ snprintf(errbuf, sizeof(errbuf), "aarch64: unhandled IR op %s (%d) at cur_op=%d",
+ name, e->op, cur_pos);
+ jit_error(errbuf);
+ }
+ break;
+ }
+
+ if( ctx->code.cur > ctx->code.max ) jit_error("aarch64 code buffer overrun");
+ }
+
+ // Functions are 4-byte aligned naturally on ARM; no padding needed for now.
+ hl_codegen_flush(jit);
+
+ // Patch all in-function branches.
+ for( int i = 0; i < int_arr_count(ctx->branch_fixups); i += 3 ) {
+ int pos = int_arr_get(ctx->branch_fixups, i);
+ int target_op = int_arr_get(ctx->branch_fixups, i + 1);
+ int is_cond = int_arr_get(ctx->branch_fixups, i + 2);
+ int target_byte_pos = ctx->pos_map[target_op];
+ patch_branch(ctx, pos, target_byte_pos, is_cond);
+ }
+
+ // Convert any jump-table target_op_index entries recorded by emit_jump_table
+ // into absolute byte offsets in the output buffer.
+ for( int i = const_addr_prev; i < int_arr_count(ctx->const_addr); i += 2 ) {
+ int target_op = int_arr_get(ctx->const_addr, i + 1);
+ int offs = jit->out_pos + ctx->pos_map[target_op];
+ ctx->const_addr.values[i + 1] = offs;
+ }
+}
+
+// ============================================================================
+// Phase 4: module-level emission.
+// ============================================================================
+
+// Helper: finalize a freshly-emitted helper stub (null-access stubs, c2hl,
+// hl2c). Mirrors x86's flush_function: reports the start/size to the unwind
+// machinery and rounds the function buffer to 16 bytes.
+static void flush_helper( code_ctx *ctx, int start ) {
+ hl_jit_define_function(ctx->jit, start, ctx->jit->out_pos + byte_count(ctx->code) - start);
+ while( byte_count(ctx->code) & 15 )
+ EMIT32(ctx, 0xD503201F); // NOP
+ if( byte_count(ctx->code) > ctx->code.max ) jit_error("aarch64 trampoline overrun");
+}
+
+// Patch a placeholder branch (B, BL, or B.cond) emitted at byte position `pos`
+// to target byte position `target` in the same buffer. Selects imm26 for
+// unconditional and imm19 for conditional based on the opcode bits.
+static void patch_helper_branch( code_ctx *ctx, int pos, int target ) {
+ int delta = (target - pos) >> 2;
+ unsigned int *insn = (unsigned int*)&ctx->code.values[pos];
+ unsigned int op = (*insn >> 26) & 0x3F;
+ if( op == 0x05 || op == 0x25 ) {
+ // B / BL: imm26
+ *insn = (*insn & ~0x03FFFFFFu) | ((unsigned)delta & 0x03FFFFFF);
+ } else {
+ // B.cond: imm19 in bits [23:5]
+ *insn = (*insn & ~(0x7FFFFu << 5)) | ((unsigned)(delta & 0x7FFFF) << 5);
+ }
+}
+
+// Emit a function prologue compatible with the Apple ARM64 + AAPCS64 ABI:
+// STP X29, X30, [SP, #-16]! ; MOV X29, SP.
+static void emit_helper_prologue( code_ctx *ctx ) {
+ encode_ldp_stp(ctx, /*opc=*/2, /*V=*/0, /*mode=*/0x03, /*imm7=*/-2 & 0x7F, LR, SP_REG, FP);
+ emit_mov_gpr(ctx, FP, SP_REG, 1);
+}
+
+// Emit the standard epilogue used by all helpers/trampolines:
+// MOV SP, X29 ; LDP X29, X30, [SP], #16 ; RET.
+static void emit_helper_epilogue( code_ctx *ctx ) {
+ emit_mov_gpr(ctx, SP_REG, FP, 1);
+ encode_ldp_stp(ctx, /*opc=*/2, /*V=*/0, /*mode=*/0x01, /*imm7=*/2, LR, SP_REG, FP);
+ encode_branch_reg(ctx, /*RET*/2, LR);
+}
+
+// Emit hl_null_access stub: ADRP/LDR the C function pointer and BLR (it never
+// returns; we still emit a BRK afterward to mirror x86).
+static void emit_null_access_stub( code_ctx *ctx, void *target ) {
+ emit_helper_prologue(ctx);
+ emit_const_load(ctx, ARM_TMP1, (uint64_t)(uintptr_t)target);
+ encode_branch_reg(ctx, /*BLR*/1, ARM_TMP1);
+ EMIT32(ctx, 0xD4200000); // BRK #0
+}
+
+// Emit hl_jit_null_field_access stub. The caller passes the field hash in W0.
+// The C function takes one int argument (the hash), so our trampoline doesn't
+// need to marshal — just forward.
+static void emit_null_field_stub( code_ctx *ctx, void *target ) {
+ emit_helper_prologue(ctx);
+ emit_const_load(ctx, ARM_TMP1, (uint64_t)(uintptr_t)target);
+ encode_branch_reg(ctx, /*BLR*/1, ARM_TMP1);
+ EMIT32(ctx, 0xD4200000); // BRK #0
+}
+
+// Emit the c2hl trampoline.
+//
+// Called from C with: X0 = JIT-compiled fn ptr, X1 = &vargs (struct{regs[16];
+// stack[16]}), X2 = stack-arg count.
+//
+// The C side (jit.c:callback_c2hl) populates vargs.regs[0..7] with int reg
+// args, vargs.regs[8..15] with FP reg args, and vargs.stack[16-N..15] with the
+// N stack args (leftmost stack arg at vargs.stack[15]). We:
+// 1. Load X0..X7 from [vargs+0..56] and D0..D7 from [vargs+64..120].
+// 2. Push the stack args in reverse order so the leftmost ends up at SP+0.
+// 3. BLR fn ; restore frame ; RET.
+//
+// X16/X17 hold the fn pointer and vargs through the call (they survive any
+// data-load up to BLR; the dynamic linker only clobbers them at the BLR itself,
+// at which point we're done with them).
+static void emit_c2hl_trampoline( code_ctx *ctx ) {
+ emit_helper_prologue(ctx);
+ emit_mov_gpr(ctx, ARM_TMP1, X0, 1); // X16 = fn
+ emit_mov_gpr(ctx, ARM_TMP2, X1, 1); // X17 = vargs
+ emit_mov_gpr(ctx, X9, X2, 1); // X9 = stack count
+
+ // Load int arg regs from vargs.regs[0..7].
+ encode_ldp_stp(ctx, 0x02, 0, 0x02, 0, X1, ARM_TMP2, X0); // LDP X0,X1, [X17, #0]
+ encode_ldp_stp(ctx, 0x02, 0, 0x02, 2, X3, ARM_TMP2, X2); // LDP X2,X3, [X17, #16]
+ encode_ldp_stp(ctx, 0x02, 0, 0x02, 4, X5, ARM_TMP2, X4); // LDP X4,X5, [X17, #32]
+ encode_ldp_stp(ctx, 0x02, 0, 0x02, 6, X7, ARM_TMP2, X6); // LDP X6,X7, [X17, #48]
+ // Load FP arg regs from vargs.regs[8..15] (= byte offsets 64..120).
+ encode_ldp_stp(ctx, 0x01, 1, 0x02, 8, (Arm64Reg)1, ARM_TMP2, (Arm64Reg)0); // LDP D0,D1, [X17, #64]
+ encode_ldp_stp(ctx, 0x01, 1, 0x02, 10, (Arm64Reg)3, ARM_TMP2, (Arm64Reg)2); // LDP D2,D3, [X17, #80]
+ encode_ldp_stp(ctx, 0x01, 1, 0x02, 12, (Arm64Reg)5, ARM_TMP2, (Arm64Reg)4); // LDP D4,D5, [X17, #96]
+ encode_ldp_stp(ctx, 0x01, 1, 0x02, 14, (Arm64Reg)7, ARM_TMP2, (Arm64Reg)6); // LDP D6,D7, [X17, #112]
+
+ // Push stack args, padding SP to 16 bytes if N is odd.
+ // total bytes = N*8 + (N&1)*8 — always a multiple of 16.
+
+ // CBZ X9, no_stack — skip everything if no stack args.
+ int cbz_skip_pos = byte_count(ctx->code);
+ encode_cbz_cbnz(ctx, /*sf=*/1, /*op=*/0, 0, X9);
+
+ // X10 = X9 * 8 (size in bytes; LSL #3 via UBFM).
+ emit_bitfield(ctx, /*sf=*/1, /*opc=UBFM*/0x02, /*immr=*/(64 - 3) & 0x3F, /*imms=*/63 - 3, X9, X10);
+
+ // Pad: if X9 is odd, allocate +8. X10 += (X9 & 1) << 3
+ // AND X11, X9, #1 ; LSL X11, X11, #3 ; ADD X10, X10, X11.
+ encode_logical_imm(ctx, 1, 0x00, 1, 0, 0, X9, X11); // AND X11, X9, #1 (immr=0,imms=0,N=1 → 1)
+ emit_bitfield(ctx, 1, 0x02, (64 - 3) & 0x3F, 63 - 3, X11, X11);
+ encode_add_sub_reg(ctx, 1, 0, 0, 0, X11, 0, X10, X10);
+
+ // SUB SP, SP, X10 — must use ADD/SUB (extended register); the shifted-reg
+ // form treats register 31 as XZR, not SP, so this would silently NOP out.
+ encode_add_sub_ext(ctx, 1, 1, 0, X10, /*UXTX*/3, 0, SP_REG, SP_REG);
+
+ // Source pointer X12 = vargs + (32 - N) * 8 = vargs + 256 - X10
+ // Compute via X12 = vargs + 256, then X12 -= X10.
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, 256, ARM_TMP2, X12); // ADD X12, X17, #256
+ encode_add_sub_reg(ctx, 1, 1, 0, 0, X10, 0, X12, X12); // SUB X12, X12, X10
+
+ // Destination pointer X13 = SP
+ emit_mov_gpr(ctx, X13, SP_REG, 1);
+
+ // Counter X14 = X9
+ emit_mov_gpr(ctx, X14, X9, 1);
+
+ // Copy loop: while X14 != 0: *X13++ = *X12++ ; X14--.
+ int loop_top = byte_count(ctx->code);
+ encode_ldr_str_imm(ctx, 3, 0, 1, 0, X12, X15); // LDR X15, [X12, #0]
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, 8, X12, X12); // ADD X12, X12, #8
+ encode_ldr_str_imm(ctx, 3, 0, 0, 0, X13, X15); // STR X15, [X13, #0]
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, 8, X13, X13); // ADD X13, X13, #8
+ encode_add_sub_imm(ctx, 1, 1, 1, 0, 1, X14, X14); // SUBS X14, X14, #1
+ int loop_branch_pos = byte_count(ctx->code);
+ encode_branch_cond(ctx, 0, COND_NE); // B.NE loop_top
+ patch_helper_branch(ctx, loop_branch_pos, loop_top);
+
+ // Patch the CBZ skip target = end of stack-push block.
+ int after_stack = byte_count(ctx->code);
+ patch_helper_branch(ctx, cbz_skip_pos, after_stack);
+ // --- END STACK PUSH ---
+
+ // BLR fn (X16).
+ encode_branch_reg(ctx, /*BLR*/1, ARM_TMP1);
+
+ emit_helper_epilogue(ctx);
+}
+
+// Emit the hl2c trampoline. Called from JIT-compiled HL code; X0 holds the
+// closure (vclosure_wrapper*), X1..X7,V0..V7 hold call args. We:
+// 1. Spill X0..X7 and V0..V7 into a 128-byte buffer beneath the saved frame.
+// 2. Inspect cl->t->fun->ret->kind to decide between hl_jit_wrapper_ptr
+// (default) and hl_jit_wrapper_d (HF32/HF64 return).
+// 3. Call wrapper(closure, &caller_stack_args, &spilled_regs).
+static void emit_hl2c_trampoline( code_ctx *ctx ) {
+ hl_type_fun *ft = NULL;
+
+ emit_helper_prologue(ctx);
+ emit_sp_offs(ctx, -128); // SUB SP, SP, #128
+
+ // Spill X0..X7 → [SP+0..56]. mode 0x12 = signed-offset STORE.
+ encode_ldp_stp(ctx, 0x02, 0, 0x12, 0, X1, SP_REG, X0); // STP X0,X1, [SP, #0]
+ encode_ldp_stp(ctx, 0x02, 0, 0x12, 2, X3, SP_REG, X2); // STP X2,X3, [SP, #16]
+ encode_ldp_stp(ctx, 0x02, 0, 0x12, 4, X5, SP_REG, X4); // STP X4,X5, [SP, #32]
+ encode_ldp_stp(ctx, 0x02, 0, 0x12, 6, X7, SP_REG, X6); // STP X6,X7, [SP, #48]
+ // Spill V0..V7 → [SP+64..120] (V0 at lowest, matching wrapper expectations).
+ encode_ldp_stp(ctx, 0x01, 1, 0x12, 8, (Arm64Reg)1, SP_REG, (Arm64Reg)0); // STP D0,D1, [SP, #64]
+ encode_ldp_stp(ctx, 0x01, 1, 0x12, 10, (Arm64Reg)3, SP_REG, (Arm64Reg)2); // STP D2,D3, [SP, #80]
+ encode_ldp_stp(ctx, 0x01, 1, 0x12, 12, (Arm64Reg)5, SP_REG, (Arm64Reg)4); // STP D4,D5, [SP, #96]
+ encode_ldp_stp(ctx, 0x01, 1, 0x12, 14, (Arm64Reg)7, SP_REG, (Arm64Reg)6); // STP D6,D7, [SP, #112]
+
+ // X9 = closure (still in X0 — copy to keep X0 alive across loads).
+ emit_mov_gpr(ctx, X9, X0, 1);
+ // X9 = X9->t ; LDR X9, [X9, #0]
+ encode_ldr_str_imm(ctx, 3, 0, 1, 0, X9, X9);
+ // X9 = X9->fun ; LDR X9, [X9, #8]
+ encode_ldr_str_imm(ctx, 3, 0, 1, 1, X9, X9);
+ // X9 = X9->ret ; LDR X9, [X9, #offsetof(hl_type_fun, ret)]
+ int ret_offset = (int)(int_val)&ft->ret;
+ if( (ret_offset & 7) == 0 && (unsigned)ret_offset < 0x8000 )
+ encode_ldr_str_imm(ctx, 3, 0, 1, ret_offset / 8, X9, X9);
+ else {
+ load_immediate(ctx, ret_offset, X10, true);
+ encode_ldr_str_reg(ctx, 3, 0, 1, X10, /*option=*/3, /*S=*/0, X9, X9);
+ }
+ // W9 = W9->kind ; LDR W9, [X9, #0]
+ encode_ldr_str_imm(ctx, 2, 0, 1, 0, X9, X9);
+
+ // Branch on return-type kind. HF64 / HF32 → wrapper_d; default → wrapper_ptr.
+ encode_add_sub_imm(ctx, 0, 1, 1, 0, HF64, X9, XZR); // CMP W9, #HF64
+ int jeq_f64 = byte_count(ctx->code);
+ encode_branch_cond(ctx, 0, COND_EQ);
+ encode_add_sub_imm(ctx, 0, 1, 1, 0, HF32, X9, XZR); // CMP W9, #HF32
+ int jeq_f32 = byte_count(ctx->code);
+ encode_branch_cond(ctx, 0, COND_EQ);
+
+ // Default path: load wrapper_ptr.
+ emit_const_load(ctx, ARM_TMP1, (uint64_t)(uintptr_t)hl_jit_wrapper_ptr);
+ int jdone_default = byte_count(ctx->code);
+ encode_branch_uncond(ctx, 0);
+
+ // Float path.
+ int float_path = byte_count(ctx->code);
+ patch_helper_branch(ctx, jeq_f64, float_path);
+ patch_helper_branch(ctx, jeq_f32, float_path);
+ emit_const_load(ctx, ARM_TMP1, (uint64_t)(uintptr_t)hl_jit_wrapper_d);
+
+ int after_select = byte_count(ctx->code);
+ patch_helper_branch(ctx, jdone_default, after_select);
+
+ // Set up wrapper args:
+ // X0 (closure) — already in X0 across the type-walk because the LDR chain
+ // above used X9 only. ✓
+ // X1 = caller stack args = X29 + 16 (skip saved fp+lr).
+ encode_add_sub_imm(ctx, 1, 0, 0, 0, 16, FP, X1);
+ // X2 = &spilled regs = SP.
+ emit_mov_gpr(ctx, X2, SP_REG, 1);
+
+ // Call wrapper.
+ encode_branch_reg(ctx, /*BLR*/1, ARM_TMP1);
+
+ emit_helper_epilogue(ctx);
+}
+
+void hl_codegen_init( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ byte_reserve(ctx->code, 4096);
+ ctx->code.cur -= 4096;
+
+ // hl_null_access stub.
+ ctx->null_access_pos = jit->out_pos + byte_count(ctx->code);
+ emit_null_access_stub(ctx, (void*)hl_null_access);
+ flush_helper(ctx, ctx->null_access_pos);
+
+ // hl_jit_null_field_access stub.
+ ctx->null_field_pos = jit->out_pos + byte_count(ctx->code);
+ emit_null_field_stub(ctx, (void*)hl_jit_null_field_access);
+ flush_helper(ctx, ctx->null_field_pos);
+
+ // c2hl + hl2c trampolines.
+ jit->code_funs.c2hl = jit->out_pos + byte_count(ctx->code);
+ emit_c2hl_trampoline(ctx);
+ flush_helper(ctx, jit->code_funs.c2hl);
+
+ jit->code_funs.hl2c = jit->out_pos + byte_count(ctx->code);
+ emit_hl2c_trampoline(ctx);
+ flush_helper(ctx, jit->code_funs.hl2c);
+
+ hl_codegen_flush(jit);
+}
+
+// ---------------------------------------------------------------------------
+// hl_codegen_flush_consts: patch BL/ADRP/LDR/ADD references against absolute
+// positions, then append the constant table to the output stream.
+// ---------------------------------------------------------------------------
+
+// Patch ADRP imm21 split (immlo at bits 30:29, immhi at bits 23:5) given a
+// target byte address `target_abs` and the address `pc_abs` of the ADRP insn.
+// Both are absolute byte offsets within `jit->output` (page-aligned arithmetic
+// is preserved when the buffer is later mmap'd to a page-aligned VA).
+static void patch_adrp_imm21( unsigned char *out, int pc_abs, int target_abs ) {
+ int imm21 = (target_abs >> 12) - (pc_abs >> 12);
+ unsigned int *insn = (unsigned int*)(out + pc_abs);
+ unsigned int immlo = (unsigned)(imm21 & 0x3);
+ unsigned int immhi = (unsigned)((imm21 >> 2) & 0x7FFFF);
+ *insn = (*insn & ~((0x3u << 29) | (0x7FFFFu << 5)))
+ | (immlo << 29) | (immhi << 5);
+}
+
+// Patch ADD/LDR imm12 (bits 21:10). `scale` is the instruction's natural
+// immediate scale (1 for ADD, 8 for 64-bit LDR, etc.). Caller guarantees the
+// low bits of the target are aligned to `scale`.
+static void patch_imm12( unsigned char *out, int pos, int target_lo12, int scale ) {
+ unsigned int *insn = (unsigned int*)(out + pos);
+ unsigned int imm12 = (unsigned)((target_lo12 / scale) & 0xFFF);
+ *insn = (*insn & ~(0xFFFu << 10)) | (imm12 << 10);
+}
+
+void hl_codegen_flush_consts( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+
+ // Patch cross-function call sites recorded in `funs`.
+ for( int i = 0; i < int_arr_count(ctx->funs); i += 3 ) {
+ int pos = int_arr_get(ctx->funs, i);
+ int fid = int_arr_get(ctx->funs, i + 1);
+ int kind = int_arr_get(ctx->funs, i + 2);
+ intptr_t target_offs = (intptr_t)jit->mod->functions_ptrs[fid];
+ if( kind == 0 ) {
+ // BL imm26.
+ intptr_t delta = target_offs - (intptr_t)pos;
+ int imm26 = (int)(delta >> 2);
+ unsigned int *insn = (unsigned int*)(jit->output + pos);
+ *insn = (*insn & ~0x03FFFFFFu) | ((unsigned)imm26 & 0x03FFFFFF);
+ } else {
+ // ADRP+ADD pair: pos = ADRP, pos+4 = ADD.
+ patch_adrp_imm21(jit->output, pos, (int)target_offs);
+ int lo12 = (int)target_offs & 0xFFF;
+ patch_imm12(jit->output, pos + 4, lo12, /*scale=*/1);
+ }
+ }
+ int_arr_reset(&ctx->funs);
+
+ // Pad jit->out_pos to an 8-byte boundary so that constants at offset 0
+ // (and every multiple of 8) within the table are reachable through LDR's
+ // 8-byte-scaled imm12 field with no precision loss.
+ while( jit->out_pos & 7 ) {
+ if( jit->out_pos < jit->out_max ) jit->output[jit->out_pos] = 0;
+ jit->out_pos++;
+ }
+
+ // Append the constant table to the output stream.
+ jit->code_size = byte_count(ctx->const_table);
+ jit->code_instrs = ctx->const_table.values;
+ ctx->const_table_pos = jit->out_pos;
+
+ // Patch ADRP+(LDR|ADD) const-pool refs.
+ for( int i = 0; i < int_arr_count(ctx->const_refs); i += 2 ) {
+ int adrp_pos = int_arr_get(ctx->const_refs, i);
+ int coffs = int_arr_get(ctx->const_refs, i + 1);
+ int target = ctx->const_table_pos + coffs;
+ patch_adrp_imm21(jit->output, adrp_pos, target);
+ // Detect whether the second insn is LDR (Xt|Dt|St) or ADD by inspecting
+ // the top 10 bits (31:22). LDR (unsigned-imm) encoding is
+ // `size 111 V 01 01 imm12 Rn Rt`; the 8-byte-scaled imm12 lives in
+ // bits 21:10. ADD-imm leaves the imm12 unscaled.
+ // Top10 bits (>>22) of canonical encodings:
+ // LDR Xt (size=11,V=0,opc=01): 0b1111100101 = 0x3E5 (scale=8)
+ // LDR Dt (size=11,V=1,opc=01): 0b1111110101 = 0x3F5 (scale=8)
+ // LDR St (size=10,V=1,opc=01): 0b1011110101 = 0x2F5 (scale=4)
+ // ADD-imm always falls into the else.
+ unsigned int second = *(unsigned int*)(jit->output + adrp_pos + 4);
+ int lo12 = target & 0xFFF;
+ switch( (second >> 22) & 0x3FF ) {
+ case 0x3E5: // LDR Xt
+ case 0x3F5: // LDR Dt
+ patch_imm12(jit->output, adrp_pos + 4, lo12, /*scale=*/8);
+ break;
+ case 0x2F5: // LDR St
+ patch_imm12(jit->output, adrp_pos + 4, lo12, /*scale=*/4);
+ break;
+ default:
+ // ADD (imm), unscaled.
+ patch_imm12(jit->output, adrp_pos + 4, lo12, /*scale=*/1);
+ break;
+ }
+ }
+ int_arr_reset(&ctx->const_refs);
+
+ byte_free(&ctx->const_table);
+ value_map_free(&ctx->const_table_lookup);
+}
+
+void hl_codegen_final( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ // Fill jump-table entries with absolute addresses inside final_code.
+ for( int i = 0; i < int_arr_count(ctx->const_addr); i += 2 ) {
+ int table_offs = int_arr_get(ctx->const_addr, i);
+ int target_offs = int_arr_get(ctx->const_addr, i + 1);
+ *(void**)(jit->final_code + ctx->const_table_pos + table_offs) =
+ jit->final_code + target_offs;
+ }
+ int_arr_free(&ctx->const_addr);
+}
diff --git a/src/jit_aarch64_emit.c b/src/jit_aarch64_emit.c
new file mode 100644
index 000000000..dbef3be37
--- /dev/null
+++ b/src/jit_aarch64_emit.c
@@ -0,0 +1,864 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * AArch64 Instruction Encoding
+ *
+ * This file provides low-level instruction encoding functions for the AArch64
+ * architecture. All instructions are 32-bit fixed width.
+ *
+ * References:
+ * - ARM Architecture Reference Manual ARMv8 (ARM ARM)
+ * - AArch64 Instruction Set Architecture
+ */
+
+#if !defined(__aarch64__) && !defined(_M_ARM64)
+# error "This file is for AArch64 architecture only."
+#endif
+
+#include "jit_aarch64_emit.h"
+
+/*
+ * Helper macros for bit field manipulation
+ */
+#define BITS(val, start, len) (((unsigned int)(val) & ((1u << (len)) - 1)) << (start))
+#define BIT(val, pos) (((unsigned int)(val) & 1) << (pos))
+
+// EMIT32 is defined in jit_common.h
+
+// ============================================================================
+// ADD/SUB Instructions
+// ============================================================================
+
+/**
+ * Encode ADD/SUB (immediate) instruction
+ * Format: ADD/SUB Xd, Xn, #imm12 [, LSL #shift]
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param op 0=ADD, 1=SUB
+ * @param S 1=set flags (ADDS/SUBS), 0=don't set flags
+ * @param shift 0=LSL #0, 1=LSL #12
+ * @param imm12 12-bit unsigned immediate
+ * @param Rn Source register (0-31, 31=SP)
+ * @param Rd Destination register (0-31, 31=SP)
+ */
+void encode_add_sub_imm(code_ctx *ctx, int sf, int op, int S, int shift, int imm12, Arm64Reg Rn, Arm64Reg Rd) {
+ // ADD/SUB (immediate) encoding:
+ // [31] = sf, [30] = op (0=ADD, 1=SUB), [29] = S, [28:23] = 100010, [22] = sh
+ // [21:10] = imm12, [9:5] = Rn, [4:0] = Rd
+ unsigned int insn = BIT(sf, 31) | // [31] = sf
+ BIT(op, 30) | // [30] = op
+ BIT(S, 29) | // [29] = S
+ BITS(0x22, 23, 6) | // [28:23] = 100010
+ BIT(shift, 22) | // [22] = sh
+ BITS(imm12, 10, 12) | // [21:10] = imm12
+ BITS(Rn, 5, 5) | // [9:5] = Rn
+ BITS(Rd, 0, 5); // [4:0] = Rd
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode ADD/SUB (shifted register) instruction
+ * Format: ADD/SUB Xd, Xn, Xm [, shift #amount]
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param op 0=ADD, 1=SUB
+ * @param S 1=set flags, 0=don't set flags
+ * @param shift 00=LSL, 01=LSR, 10=ASR
+ * @param Rm Second source register
+ * @param imm6 Shift amount (0-63)
+ * @param Rn First source register
+ * @param Rd Destination register
+ */
+void encode_add_sub_reg(code_ctx *ctx, int sf, int op, int S, int shift, Arm64Reg Rm,
+ int imm6, Arm64Reg Rn, Arm64Reg Rd) {
+ unsigned int insn = BIT(sf, 31) | BITS(op, 30, 1) | BIT(S, 29) | BITS(0x0B, 24, 5) |
+ BITS(shift, 22, 2) | BITS(Rm, 16, 5) | BITS(imm6, 10, 6) |
+ BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode ADD/SUB (extended register) instruction
+ * Format: ADD/SUB Xd, Xn, Wm, extend [#amount]
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param op 0=ADD, 1=SUB
+ * @param S 1=set flags, 0=don't set flags
+ * @param Rm Second source register
+ * @param option Extend type (UXTB=000, UXTH=001, UXTW=010, UXTX=011, SXTB=100, SXTH=101, SXTW=110, SXTX=111)
+ * @param imm3 Shift amount (0-4)
+ * @param Rn First source register
+ * @param Rd Destination register
+ */
+void encode_add_sub_ext(code_ctx *ctx, int sf, int op, int S, Arm64Reg Rm,
+ int option, int imm3, Arm64Reg Rn, Arm64Reg Rd) {
+ unsigned int insn = BIT(sf, 31) | BITS(op, 30, 1) | BIT(S, 29) | BITS(0x0B, 24, 5) |
+ BITS(1, 21, 2) | BITS(Rm, 16, 5) | BITS(option, 13, 3) |
+ BITS(imm3, 10, 3) | BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Logical Instructions
+// ============================================================================
+
+/**
+ * Encode Logical (immediate) instruction
+ * Format: AND/ORR/EOR/ANDS Xd, Xn, #imm
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param opc 00=AND, 01=ORR, 10=EOR, 11=ANDS
+ * @param N Immediate encoding parameter
+ * @param immr Immediate encoding parameter (rotation)
+ * @param imms Immediate encoding parameter (size)
+ * @param Rn Source register
+ * @param Rd Destination register
+ */
+void encode_logical_imm(code_ctx *ctx, int sf, int opc, int N, int immr, int imms, Arm64Reg Rn, Arm64Reg Rd) {
+ unsigned int insn = BIT(sf, 31) | BITS(opc, 29, 2) | BITS(0x24, 23, 6) | BIT(N, 22) |
+ BITS(immr, 16, 6) | BITS(imms, 10, 6) | BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode Logical (shifted register) instruction
+ * Format: AND/ORR/EOR/ANDS Xd, Xn, Xm [, shift #amount]
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param opc 00=AND, 01=ORR, 10=EOR, 11=ANDS
+ * @param shift 00=LSL, 01=LSR, 10=ASR, 11=ROR
+ * @param N Must be 0 for regular logical ops
+ * @param Rm Second source register
+ * @param imm6 Shift amount
+ * @param Rn First source register
+ * @param Rd Destination register
+ */
+void encode_logical_reg(code_ctx *ctx, int sf, int opc, int shift, int N, Arm64Reg Rm,
+ int imm6, Arm64Reg Rn, Arm64Reg Rd) {
+ unsigned int insn = BIT(sf, 31) | BITS(opc, 29, 2) | BITS(0x0A, 24, 5) | BITS(shift, 22, 2) |
+ BIT(N, 21) | BITS(Rm, 16, 5) | BITS(imm6, 10, 6) |
+ BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Move Wide (immediate) Instructions
+// ============================================================================
+
+/**
+ * Encode MOVZ/MOVN/MOVK instruction
+ * Format: MOVZ/MOVN/MOVK Xd, #imm16 [, LSL #shift]
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param opc 10=MOVZ, 00=MOVN, 11=MOVK
+ * @param hw Hardware position (0-3 for 64-bit, 0-1 for 32-bit) - selects 16-bit field
+ * @param imm16 16-bit immediate value
+ * @param Rd Destination register
+ */
+void encode_mov_wide_imm(code_ctx *ctx, int sf, int opc, int hw, int imm16, Arm64Reg Rd) {
+ unsigned int insn = BIT(sf, 31) | BITS(opc, 29, 2) | BITS(0x25, 23, 6) |
+ BITS(hw, 21, 2) | BITS(imm16, 5, 16) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Multiply Instructions
+// ============================================================================
+
+/**
+ * Encode MADD/MSUB instruction (multiply-add/subtract)
+ * Format: MADD Xd, Xn, Xm, Xa (Xd = Xa + Xn*Xm)
+ * MSUB Xd, Xn, Xm, Xa (Xd = Xa - Xn*Xm)
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param op 0=MADD, 1=MSUB
+ * @param Rm Second multiplicand
+ * @param Ra Addend/subtrahend (use XZR for simple MUL)
+ * @param Rn First multiplicand
+ * @param Rd Destination
+ */
+void encode_madd_msub(code_ctx *ctx, int sf, int op, Arm64Reg Rm, Arm64Reg Ra, Arm64Reg Rn, Arm64Reg Rd) {
+ // MADD/MSUB encoding: [31]=sf, [30:29]=00, [28:24]=11011, [23:21]=000, [20:16]=Rm
+ // [15]=op (0=MADD, 1=MSUB), [14:10]=Ra, [9:5]=Rn, [4:0]=Rd
+ unsigned int insn = BIT(sf, 31) | BITS(0xD8, 21, 8) | BITS(Rm, 16, 5) |
+ BIT(op, 15) | BITS(Ra, 10, 5) | BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode SDIV/UDIV instruction
+ * Format: SDIV/UDIV Xd, Xn, Xm
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param U 0=UDIV (unsigned), 1=SDIV (signed) — this matches the
+ * ARM ARM bit-10 encoding: 0=UDIV, 1=SDIV. (Earlier comment
+ * had this inverted.)
+ * @param Rm Divisor
+ * @param Rn Dividend
+ * @param Rd Destination (quotient)
+ */
+void encode_div(code_ctx *ctx, int sf, int U, Arm64Reg Rm, Arm64Reg Rn, Arm64Reg Rd) {
+ // SDIV/UDIV encoding: [31]=sf, [30:29]=00, [28:21]=11010110, [20:16]=Rm
+ // [15:11]=00001, [10]=U (1=SDIV, 0=UDIV), [9:5]=Rn, [4:0]=Rd
+ unsigned int insn = BIT(sf, 31) | BITS(0xD6, 21, 8) | BITS(Rm, 16, 5) |
+ BITS(0x2 | U, 10, 6) | BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Shift Instructions
+// ============================================================================
+
+/**
+ * Encode variable shift (LSLV/LSRV/ASRV/RORV)
+ * Format: LSL/LSR/ASR/ROR Xd, Xn, Xm
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param op2 00=LSLV, 01=LSRV, 10=ASRV, 11=RORV
+ * @param Rm Shift amount register
+ * @param Rn Source register
+ * @param Rd Destination register
+ */
+void encode_shift_reg(code_ctx *ctx, int sf, int op2, Arm64Reg Rm, Arm64Reg Rn, Arm64Reg Rd) {
+ // LSLV/LSRV/ASRV/RORV encoding: [31]=sf, [30:29]=00, [28:21]=11010110, [20:16]=Rm
+ // [15:12]=0010, [11:10]=op2, [9:5]=Rn, [4:0]=Rd
+ unsigned int insn = BIT(sf, 31) | BITS(0xD6, 21, 8) | BITS(Rm, 16, 5) |
+ BITS(0x08 | op2, 10, 6) | BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Load/Store Instructions
+// ============================================================================
+
+/**
+ * Encode LDR/STR (unsigned immediate offset)
+ * Format: LDR/STR Xt, [Xn, #imm]
+ *
+ * @param size 00=8-bit, 01=16-bit, 10=32-bit, 11=64-bit
+ * @param V 0=GPR, 1=FP/SIMD
+ * @param opc For V=0: 01=LDR, 00=STR, 10=LDRSW, 11=prfm
+ * @param imm12 Unsigned 12-bit offset (scaled by size)
+ * @param Rn Base register
+ * @param Rt Source/destination register
+ */
+void encode_ldr_str_imm(code_ctx *ctx, int size, int V, int opc, int imm12, Arm64Reg Rn, Arm64Reg Rt) {
+ // LDR/STR (unsigned offset) encoding:
+ // [31:30] = size, [29:27] = 111, [26] = V, [25:24] = 01, [23:22] = opc
+ // [21:10] = imm12, [9:5] = Rn, [4:0] = Rt
+ unsigned int insn = BITS(size, 30, 2) | // [31:30] = size
+ BITS(7, 27, 3) | // [29:27] = 111
+ BIT(V, 26) | // [26] = V
+ BITS(1, 24, 2) | // [25:24] = 01 (unsigned offset)
+ BITS(opc, 22, 2) | // [23:22] = opc
+ BITS(imm12, 10, 12) | // [21:10] = imm12
+ BITS(Rn, 5, 5) | // [9:5] = Rn
+ BITS(Rt, 0, 5); // [4:0] = Rt
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode LDR/STR (register offset)
+ * Format: LDR/STR Xt, [Xn, Xm{, extend {#amount}}]
+ *
+ * @param size 00=8-bit, 01=16-bit, 10=32-bit, 11=64-bit
+ * @param V 0=GPR, 1=FP/SIMD
+ * @param opc For V=0: 01=LDR, 00=STR
+ * @param Rm Offset register
+ * @param option Extend type (010=UXTW, 011=LSL, 110=SXTW, 111=SXTX)
+ * @param S 1=scale offset by size, 0=no scaling
+ * @param Rn Base register
+ * @param Rt Source/destination register
+ */
+void encode_ldr_str_reg(code_ctx *ctx, int size, int V, int opc, Arm64Reg Rm,
+ int option, int S, Arm64Reg Rn, Arm64Reg Rt) {
+ // LDR/STR (register offset) encoding:
+ // [31:30] = size, [29:27] = 111, [26] = V, [25:24] = 00, [23:22] = opc
+ // [21] = 1, [20:16] = Rm, [15:13] = option, [12] = S, [11:10] = 10
+ // [9:5] = Rn, [4:0] = Rt
+ unsigned int insn = BITS(size, 30, 2) | // [31:30] = size
+ BITS(7, 27, 3) | // [29:27] = 111
+ BIT(V, 26) | // [26] = V
+ BITS(0, 24, 2) | // [25:24] = 00 (register offset)
+ BITS(opc, 22, 2) | // [23:22] = opc
+ BIT(1, 21) | // [21] = 1
+ BITS(Rm, 16, 5) | // [20:16] = Rm
+ BITS(option, 13, 3) | // [15:13] = option
+ BIT(S, 12) | // [12] = S
+ BITS(2, 10, 2) | // [11:10] = 10
+ BITS(Rn, 5, 5) | // [9:5] = Rn
+ BITS(Rt, 0, 5); // [4:0] = Rt
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode LDUR/STUR (unscaled signed offset)
+ * Format: LDUR/STUR Rt, [Xn, #simm9]
+ *
+ * This instruction uses a signed 9-bit immediate offset (-256 to +255) that is
+ * NOT scaled by the access size. This is ideal for accessing stack locals at
+ * negative offsets from the frame pointer.
+ *
+ * @param size 00=8-bit, 01=16-bit, 10=32-bit, 11=64-bit
+ * @param V 0=GPR, 1=FP/SIMD
+ * @param opc 00=STUR, 01=LDUR
+ * @param imm9 Signed 9-bit offset (-256 to +255), unscaled
+ * @param Rn Base register
+ * @param Rt Source/destination register
+ */
+void encode_ldur_stur(code_ctx *ctx, int size, int V, int opc, int imm9, Arm64Reg Rn, Arm64Reg Rt) {
+ // LDUR/STUR (unscaled offset) encoding:
+ // [31:30] = size, [29:27] = 111, [26] = V, [25:24] = 00, [23:22] = opc
+ // [21] = 0, [20:12] = imm9, [11:10] = 00, [9:5] = Rn, [4:0] = Rt
+ unsigned int insn = BITS(size, 30, 2) | // [31:30] = size
+ BITS(7, 27, 3) | // [29:27] = 111
+ BIT(V, 26) | // [26] = V
+ BITS(0, 24, 2) | // [25:24] = 00 (unscaled offset)
+ BITS(opc, 22, 2) | // [23:22] = opc
+ BIT(0, 21) | // [21] = 0
+ BITS(imm9 & 0x1FF, 12, 9) | // [20:12] = imm9 (masked to 9 bits)
+ BITS(0, 10, 2) | // [11:10] = 00
+ BITS(Rn, 5, 5) | // [9:5] = Rn
+ BITS(Rt, 0, 5); // [4:0] = Rt
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode LDP/STP (Load/Store Pair)
+ * Format: LDP/STP Xt1, Xt2, [Xn, #imm] (various addressing modes)
+ *
+ * @param opc Size: 00=32-bit, 10=64-bit
+ * @param V 0=GPR, 1=FP/SIMD registers
+ * @param mode Addressing mode + load/store:
+ * 0x01 = post-indexed load (LDP Xt1, Xt2, [Xn], #imm)
+ * 0x02 = signed-offset load (LDP Xt1, Xt2, [Xn, #imm])
+ * 0x03 = pre-indexed store (STP Xt1, Xt2, [Xn, #imm]!)
+ * 0x12 = signed-offset store (STP Xt1, Xt2, [Xn, #imm])
+ * 0x13 = pre-indexed load (LDP Xt1, Xt2, [Xn, #imm]!)
+ * 0x11 = post-indexed store (STP Xt1, Xt2, [Xn], #imm)
+ * @param imm7 Signed 7-bit offset (scaled by register size: *4 for 32-bit, *8 for 64-bit)
+ * @param Rt2 Second register
+ * @param Rn Base register
+ * @param Rt First register
+ *
+ * ARM64 encoding:
+ * [31:30] = opc (size)
+ * [29:27] = 101 (fixed)
+ * [26] = V
+ * [25:24] = addressing mode (01=post, 10=offset, 11=pre)
+ * [23] = 0 (reserved)
+ * [22] = L (0=store, 1=load)
+ * [21:15] = imm7
+ * [14:10] = Rt2
+ * [9:5] = Rn
+ * [4:0] = Rt
+ */
+void encode_ldp_stp(code_ctx *ctx, int opc, int V, int mode, int imm7,
+ Arm64Reg Rt2, Arm64Reg Rn, Arm64Reg Rt) {
+ int addr_mode, L;
+
+ // Decode mode parameter to get addressing mode and load/store bit.
+ // Bit 4 (0x10) of mode forces store; otherwise the legacy mappings apply.
+ if (mode & 0x10) {
+ addr_mode = mode & 3;
+ L = 0;
+ } else if (mode == 0x03) {
+ // Pre-indexed store: STP Xt1, Xt2, [Xn, #imm]!
+ addr_mode = 3;
+ L = 0;
+ } else if (mode == 0x01) {
+ // Post-indexed load: LDP Xt1, Xt2, [Xn], #imm
+ addr_mode = 1;
+ L = 1;
+ } else {
+ // Default: use mode as addressing mode, assume load
+ addr_mode = mode & 3;
+ L = 1;
+ }
+
+ unsigned int insn = BITS(opc, 30, 2) | // [31:30] = opc
+ BITS(5, 27, 3) | // [29:27] = 101
+ BIT(V, 26) | // [26] = V
+ BITS(addr_mode, 23, 2) | // [24:23] = addressing mode
+ BIT(L, 22) | // [22] = L
+ BITS(imm7, 15, 7) | // [21:15] = imm7
+ BITS(Rt2, 10, 5) | // [14:10] = Rt2
+ BITS(Rn, 5, 5) | // [9:5] = Rn
+ BITS(Rt, 0, 5); // [4:0] = Rt
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// PC-Relative Addressing
+// ============================================================================
+
+/**
+ * Encode ADRP instruction
+ * Format: ADRP Xd, label (load PC-relative page address)
+ *
+ * @param immlo Low 2 bits of 21-bit offset (bits 0-1)
+ * @param immhi High 19 bits of 21-bit offset (bits 2-20)
+ * @param Rd Destination register
+ *
+ * Note: offset is in pages (4KB), so actual byte offset = imm21 << 12
+ */
+void encode_adrp(code_ctx *ctx, int immlo, int immhi, Arm64Reg Rd) {
+ unsigned int insn = BITS(1, 31, 1) | BITS(immlo, 29, 2) | BITS(0x10, 24, 5) |
+ BITS(immhi, 5, 19) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode ADR instruction
+ * Format: ADR Xd, label (load PC-relative address)
+ *
+ * @param immlo Low 2 bits of 21-bit offset
+ * @param immhi High 19 bits of 21-bit offset
+ * @param Rd Destination register
+ */
+void encode_adr(code_ctx *ctx, int immlo, int immhi, Arm64Reg Rd) {
+ unsigned int insn = BITS(0, 31, 1) | BITS(immlo, 29, 2) | BITS(0x10, 24, 5) |
+ BITS(immhi, 5, 19) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Branch Instructions
+// ============================================================================
+
+/**
+ * Encode conditional branch
+ * Format: B.cond label
+ *
+ * @param imm19 Signed 19-bit offset (in instructions, i.e., offset/4)
+ * @param cond Condition code (0000=EQ, 0001=NE, 1010=GE, 1011=LT, etc.)
+ */
+void encode_branch_cond(code_ctx *ctx, int imm19, ArmCondition cond) {
+ unsigned int insn = BITS(0x54, 24, 8) | BITS(imm19, 5, 19) | BITS(cond, 0, 4);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode unconditional branch
+ * Format: B label
+ *
+ * @param imm26 Signed 26-bit offset (in instructions, i.e., offset/4)
+ */
+void encode_branch_uncond(code_ctx *ctx, int imm26) {
+ unsigned int insn = BITS(0x05, 26, 6) | BITS(imm26, 0, 26);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode branch with link
+ * Format: BL label
+ *
+ * @param imm26 Signed 26-bit offset (in instructions)
+ */
+void encode_branch_link(code_ctx *ctx, int imm26) {
+ unsigned int insn = BITS(0x25, 26, 6) | BITS(imm26, 0, 26);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode register branch instructions
+ * Format: BR/BLR/RET Xn
+ *
+ * @param opc 00=BR, 01=BLR, 10=RET
+ * @param Rn Register containing target address (X30/LR for RET)
+ */
+void encode_branch_reg(code_ctx *ctx, int opc, Arm64Reg Rn) {
+ unsigned int insn = BITS(0x6B0, 21, 11) | BITS(opc, 21, 2) |
+ BITS(0x1F, 16, 5) | BITS(Rn, 5, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode CBZ/CBNZ (compare and branch if zero/non-zero)
+ * Format: CBZ/CBNZ Xt, label
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param op 0=CBZ, 1=CBNZ
+ * @param imm19 Signed 19-bit offset (in instructions)
+ * @param Rt Register to test
+ */
+void encode_cbz_cbnz(code_ctx *ctx, int sf, int op, int imm19, Arm64Reg Rt) {
+ unsigned int insn = BIT(sf, 31) | BITS(0x1A, 25, 6) | BIT(op, 24) |
+ BITS(imm19, 5, 19) | BITS(Rt, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode TBZ/TBNZ (test bit and branch if zero/non-zero)
+ * Format: TBZ/TBNZ Xt, #bit, label
+ *
+ * @param b5 Bit 5 of bit position (0-63)
+ * @param op 0=TBZ, 1=TBNZ
+ * @param b40 Bits 4-0 of bit position
+ * @param imm14 Signed 14-bit offset (in instructions)
+ * @param Rt Register to test
+ */
+void encode_tbz_tbnz(code_ctx *ctx, int b5, int op, int b40, int imm14, Arm64Reg Rt) {
+ unsigned int insn = BIT(b5, 31) | BITS(0x1B, 25, 6) | BIT(op, 24) |
+ BITS(b40, 19, 5) | BITS(imm14, 5, 14) | BITS(Rt, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Floating-Point Instructions
+// ============================================================================
+
+/**
+ * Encode floating-point arithmetic (2-source)
+ * Format: FADD/FSUB/FMUL/FDIV/FMAX/FMIN Vd, Vn, Vm
+ *
+ * @param M 0=scalar, 1=vector
+ * @param S 0=single precision, 1=double precision
+ * @param type 00=single, 01=double
+ * @param Rm Second source register
+ * @param opcode 0000=FMUL, 0001=FDIV, 0010=FADD, 0011=FSUB, 0100=FMAX, 0101=FMIN
+ * @param Rn First source register
+ * @param Rd Destination register
+ */
+void encode_fp_arith(code_ctx *ctx, int M, int S, int type, Arm64FpReg Rm,
+ int opcode, Arm64FpReg Rn, Arm64FpReg Rd) {
+ unsigned int insn = BIT(M, 31) | BIT(S, 29) | BITS(0x1E, 24, 5) |
+ BITS(type, 22, 2) | BITS(1, 21, 1) | BITS(Rm, 16, 5) |
+ BITS(opcode, 12, 4) | BITS(2, 10, 2) |
+ BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode floating-point negate/abs/sqrt (1-source)
+ * Format: FNEG/FABS/FSQRT Vd, Vn
+ *
+ * @param M 0=scalar, 1=vector
+ * @param S 0=single precision, 1=double precision
+ * @param type 00=single, 01=double
+ * @param opcode 000000=FMOV, 000001=FABS, 000010=FNEG, 000011=FSQRT
+ * @param Rn Source register
+ * @param Rd Destination register
+ */
+void encode_fp_1src(code_ctx *ctx, int M, int S, int type, int opcode, Arm64FpReg Rn, Arm64FpReg Rd) {
+ unsigned int insn = BIT(M, 31) | BIT(S, 29) | BITS(0x1E, 24, 5) |
+ BITS(type, 22, 2) | BITS(1, 21, 1) |
+ BITS(opcode, 15, 6) | BITS(0x10, 10, 5) |
+ BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode floating-point compare
+ * Format: FCMP/FCMPE Vn, Vm
+ *
+ * @param M 0=scalar
+ * @param S 0=single precision, 1=double precision
+ * @param type 00=single, 01=double
+ * @param Rm Second source register (or 0 for comparison with zero)
+ * @param op 00=FCMP, 10=FCMPE (signal exception on qNaN)
+ * @param Rn First source register
+ */
+void encode_fp_compare(code_ctx *ctx, int M, int S, int type, Arm64FpReg Rm, int op, Arm64FpReg Rn) {
+ unsigned int insn = BIT(M, 31) | BIT(S, 29) | BITS(0x1E, 24, 5) |
+ BITS(type, 22, 2) | BITS(1, 21, 1) | BITS(Rm, 16, 5) |
+ BITS(op, 14, 2) | BITS(8, 10, 4) | BITS(Rn, 5, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode floating-point conversion to integer
+ * Format: FCVTZS/FCVTZU Xd, Vn
+ *
+ * @param sf 1=64-bit int, 0=32-bit int
+ * @param S 0=single precision, 1=double precision
+ * @param type 00=single, 01=double, 10/11=half
+ * @param rmode 00=round to nearest, 01=round towards +inf, 10=round towards -inf, 11=round towards zero
+ * @param opc 000=FCVTNS, 001=FCVTNU, 010=SCVTF, 011=UCVTF, 110=FMOV, 111=FMOV
+ * @param Rn Source FP register
+ * @param Rd Destination integer register
+ */
+void encode_fcvt_int(code_ctx *ctx, int sf, int S, int type, int rmode, int opc, Arm64FpReg Rn, Arm64Reg Rd) {
+ unsigned int insn = BIT(sf, 31) | BIT(S, 29) | BITS(0x1E, 24, 5) |
+ BITS(type, 22, 2) | BITS(1, 21, 1) |
+ BITS(rmode, 19, 2) | BITS(opc, 16, 3) |
+ BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+/**
+ * Encode integer conversion to floating-point
+ * Format: SCVTF/UCVTF Vd, Xn
+ *
+ * @param sf 1=64-bit int, 0=32-bit int
+ * @param S 0=single precision, 1=double precision
+ * @param type 00=single, 01=double
+ * @param rmode 00 for conversions
+ * @param opc 010=SCVTF, 011=UCVTF
+ * @param Rn Source integer register
+ * @param Rd Destination FP register
+ */
+void encode_int_fcvt(code_ctx *ctx, int sf, int S, int type, int rmode, int opc, Arm64Reg Rn, Arm64FpReg Rd) {
+ unsigned int insn = BIT(sf, 31) | BIT(S, 29) | BITS(0x1E, 24, 5) |
+ BITS(type, 22, 2) | BITS(1, 21, 1) |
+ BITS(rmode, 19, 2) | BITS(opc, 16, 3) |
+ BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// Conditional Select
+// ============================================================================
+
+/**
+ * Encode CSEL/CSINC/CSINV/CSNEG
+ * Format: CSEL Xd, Xn, Xm, cond
+ *
+ * @param sf 1=64-bit, 0=32-bit
+ * @param op 0=CSEL, 1=CSINC/CSINV/CSNEG (depends on op2)
+ * @param Rm Second source register
+ * @param cond Condition code
+ * @param op2 00=CSEL, 01=CSINC, 10=CSINV, 11=CSNEG
+ * @param Rn First source register
+ * @param Rd Destination register
+ */
+void encode_cond_select(code_ctx *ctx, int sf, int op, Arm64Reg Rm, ArmCondition cond,
+ int op2, Arm64Reg Rn, Arm64Reg Rd) {
+ // CSEL/CSINC/CSINV/CSNEG encoding: [31]=sf, [30]=op, [29]=S=0, [28:21]=11010100
+ // [20:16]=Rm, [15:12]=cond, [11:10]=op2, [9:5]=Rn, [4:0]=Rd
+ unsigned int insn = BIT(sf, 31) | BIT(op, 30) | BITS(0xD4, 21, 8) |
+ BITS(Rm, 16, 5) | BITS(cond, 12, 4) |
+ BITS(op2, 10, 2) | BITS(Rn, 5, 5) | BITS(Rd, 0, 5);
+ EMIT32(ctx, insn);
+}
+
+// ============================================================================
+// High-Level Helper Functions
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Logical Immediate Encoding Helpers
+// ----------------------------------------------------------------------------
+
+/**
+ * Rotate a 64-bit value right by the specified amount
+ */
+static inline uint64_t rotate_right_64(uint64_t val, int rotation) {
+ return (val >> (rotation & 63)) | (val << ((-rotation) & 63));
+}
+
+/**
+ * Check if a 64-bit value can be encoded as a logical immediate
+ * and compute the N, immr, imms fields if so.
+ *
+ * Based on the optimized algorithm from dougallj:
+ * https://dougallj.wordpress.com/2021/10/30/
+ *
+ * AArch64 logical immediates can represent bitmask patterns consisting of
+ * a single run of 1-bits, optionally rotated, and replicated across element
+ * sizes of 2, 4, 8, 16, 32, or 64 bits.
+ *
+ * @param val The 64-bit value to check
+ * @param N Output: N field (1 for 64-bit element, 0 otherwise)
+ * @param immr Output: rotation amount field (6 bits)
+ * @param imms Output: element size/ones encoding field (6 bits)
+ * @return true if value is encodable, false otherwise
+ */
+static bool is_logical_immediate_64(uint64_t val, int *N, int *immr, int *imms) {
+ // All-zeros and all-ones cannot be encoded
+ if (val == 0 || ~val == 0)
+ return false;
+
+ // Find rotation to normalize the pattern
+ // val & (val + 1) clears trailing ones; ctz gives rotation amount
+ // Handle the case where val is all trailing ones (ctzll(0) is undefined)
+ uint64_t tmp = val & (val + 1);
+ int rotation = (tmp == 0) ? 0 : __builtin_ctzll(tmp);
+ uint64_t normalized = rotate_right_64(val, rotation);
+
+ // Count leading zeros and trailing ones in normalized form
+ int zeroes = __builtin_clzll(normalized);
+ int ones = __builtin_ctzll(~normalized);
+ int size = zeroes + ones;
+
+ // Validate: pattern must repeat when rotated by size
+ // This also implicitly checks that size is a power of 2
+ if (rotate_right_64(val, size) != val)
+ return false;
+
+ // Encode the fields
+ *immr = (-rotation) & (size - 1);
+ *imms = ((-(size << 1)) | (ones - 1)) & 0x3f;
+ *N = (size >> 6);
+
+ return true;
+}
+
+/**
+ * Check if a 32-bit value can be encoded as a logical immediate
+ * for 32-bit operations (where N must be 0).
+ *
+ * @param val The 32-bit value to check
+ * @param N Output: N field (must be 0 for 32-bit)
+ * @param immr Output: rotation amount field
+ * @param imms Output: element size/ones encoding field
+ * @return true if value is encodable, false otherwise
+ */
+static bool is_logical_immediate_32(uint32_t val, int *N, int *immr, int *imms) {
+ // All-zeros and all-ones cannot be encoded
+ if (val == 0 || val == 0xFFFFFFFF)
+ return false;
+
+ // Replicate 32-bit pattern to 64-bit for encoding calculation
+ uint64_t val64 = ((uint64_t)val << 32) | val;
+
+ if (!is_logical_immediate_64(val64, N, immr, imms))
+ return false;
+
+ // For 32-bit operations, N must be 0 (element size <= 32)
+ if (*N != 0)
+ return false;
+
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+
+/**
+ * Load an immediate value into a register
+ * Uses logical immediate (ORR) when possible, otherwise MOVZ/MOVK sequence
+ *
+ * @param val 64-bit immediate value
+ * @param dst Destination register
+ * @param is_64bit true=64-bit register, false=32-bit register
+ */
+void load_immediate(code_ctx *ctx, int64_t val, Arm64Reg dst, bool is_64bit) {
+ int sf = is_64bit ? 1 : 0;
+
+ // Special case: zero
+ if (val == 0) {
+ // MOV Xd, XZR (using ORR with XZR)
+ encode_logical_reg(ctx, sf, 0x01, 0, 0, XZR, 0, XZR, dst);
+ return;
+ }
+
+ // Special case: all ones (for 32-bit: 0xFFFFFFFF, for 64-bit: 0xFFFFFFFFFFFFFFFF)
+ if ((!is_64bit && val == 0xFFFFFFFF) || (is_64bit && val == -1LL)) {
+ // MOVN Xd, #0
+ encode_mov_wide_imm(ctx, sf, 0x00, 0, 0, dst);
+ return;
+ }
+
+ // Special case: small negative values that fit in a single MOVN instruction
+ // MOVN Xd, #imm16 produces ~imm16, which equals -(imm16+1)
+ // So for values in range [-65536, -1], we can use a single MOVN
+ // For 32-bit mode, sign extension is automatic
+ if (val < 0 && val >= -65536) {
+ // ~val gives us the immediate to use with MOVN
+ // e.g., for val=-8: ~(-8) = 7, and MOVN Xd, #7 produces ~7 = -8
+ encode_mov_wide_imm(ctx, sf, 0x00, 0, (int)(~val) & 0xFFFF, dst);
+ return;
+ }
+
+ // Special case: small positive values that fit in a single MOVZ instruction
+ if (val > 0 && val <= 65535) {
+ encode_mov_wide_imm(ctx, sf, 0x02, 0, (int)val, dst);
+ return;
+ }
+
+ // Try logical immediate encoding: ORR Xd, XZR, #imm
+ // This can load many bitmask patterns with a single instruction
+ {
+ int N, immr, imms;
+ bool can_encode = is_64bit
+ ? is_logical_immediate_64((uint64_t)val, &N, &immr, &imms)
+ : is_logical_immediate_32((uint32_t)val, &N, &immr, &imms);
+
+ if (can_encode) {
+ // ORR Xd, XZR, #imm (opc=0x01 for ORR)
+ encode_logical_imm(ctx, sf, 0x01, N, immr, imms, XZR, dst);
+ return;
+ }
+ }
+
+ // Count which halfwords are non-zero
+ uint64_t uval = (uint64_t)val;
+ int hw0 = uval & 0xFFFF;
+ int hw1 = (uval >> 16) & 0xFFFF;
+ int hw2 = (uval >> 32) & 0xFFFF;
+ int hw3 = (uval >> 48) & 0xFFFF;
+
+ int nonzero_count = 0;
+ if (hw0) nonzero_count++;
+ if (hw1) nonzero_count++;
+ if (is_64bit) {
+ if (hw2) nonzero_count++;
+ if (hw3) nonzero_count++;
+ }
+
+ // Try MOVN (move inverted) if more halfwords are 0xFFFF than not
+ int ones_count = 0;
+ if (hw0 == 0xFFFF) ones_count++;
+ if (hw1 == 0xFFFF) ones_count++;
+ if (is_64bit) {
+ if (hw2 == 0xFFFF) ones_count++;
+ if (hw3 == 0xFFFF) ones_count++;
+ }
+
+ int total_hw = is_64bit ? 4 : 2;
+ bool use_movn = (ones_count > nonzero_count);
+
+ if (use_movn) {
+ // Use MOVN (inverted) + MOVK
+ int first = 1;
+ for (int i = 0; i < total_hw; i++) {
+ int hw_val = (uval >> (i * 16)) & 0xFFFF;
+ if (hw_val != 0xFFFF) {
+ if (first) {
+ // MOVN Xd, #(~hw_val & 0xFFFF), LSL #(i*16)
+ encode_mov_wide_imm(ctx, sf, 0x00, i, (~hw_val) & 0xFFFF, dst);
+ first = 0;
+ } else {
+ // MOVK Xd, #hw_val, LSL #(i*16)
+ encode_mov_wide_imm(ctx, sf, 0x03, i, hw_val, dst);
+ }
+ }
+ }
+ } else {
+ // Use MOVZ + MOVK
+ int first = 1;
+ for (int i = 0; i < total_hw; i++) {
+ int hw_val = (uval >> (i * 16)) & 0xFFFF;
+ if (hw_val != 0) {
+ if (first) {
+ // MOVZ Xd, #hw_val, LSL #(i*16)
+ encode_mov_wide_imm(ctx, sf, 0x02, i, hw_val, dst);
+ first = 0;
+ } else {
+ // MOVK Xd, #hw_val, LSL #(i*16)
+ encode_mov_wide_imm(ctx, sf, 0x03, i, hw_val, dst);
+ }
+ }
+ }
+ }
+}
diff --git a/src/jit_aarch64_emit.h b/src/jit_aarch64_emit.h
new file mode 100644
index 000000000..0371af69c
--- /dev/null
+++ b/src/jit_aarch64_emit.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C)2015-2026 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef JIT_AARCH64_EMIT_H
+#define JIT_AARCH64_EMIT_H
+
+#include
+#include
+#include
+#include
+#include "data_struct.h"
+
+// Per-TU instantiation of byte_arr (the code buffer type).
+// Helpers are static-inline so two TUs may include this header without ODR conflict.
+#define S_TYPE byte_arr
+#define S_NAME(name) byte_##name
+#define S_VALUE unsigned char
+#include "data_struct.c"
+#define byte_reserve(set,count) byte_reserve_impl(DEF_ALLOC,&set,count)
+
+// value_map: dedup uint64 constants in the literal pool (Phase 4+).
+#define S_SORTED
+#define S_MAP
+#define S_TYPE value_map
+#define S_NAME(name) value_map_##name
+#define S_KEY uint64
+#define S_VALUE int
+#define S_DEFVAL -1
+#include "data_struct.c"
+#undef S_MAP
+#undef S_SORTED
+
+// Backend codegen context (each backend defines its own _code_ctx layout).
+// Phase 2: function shell + branch fixups + per-IR-op pos_map.
+// Phase 4: constant pool, function-call relocations, jump-table absolutes.
+struct _code_ctx {
+ jit_ctx *jit;
+ byte_arr code;
+ // Each pending branch is a triple (code_byte_pos, target_ir_op, is_cond)
+ // patched after the function's pos_map is finalized.
+ int_arr branch_fixups;
+ int *pos_map;
+ int cur_op;
+ bool flushed;
+ // Phase 4: cross-function call relocations (BL imm26 or ADRP+ADD).
+ // Triples (code_byte_pos, fid, kind) where kind=0:BL, kind=1:ADRP+ADD pair.
+ int_arr funs;
+ // Constant pool. Each constant ref is (adrp_pos, const_offset); patched in
+ // hl_codegen_flush_consts to ADRP imm21 + LDR/ADD imm12 split.
+ value_map const_table_lookup;
+ byte_arr const_table;
+ int_arr const_refs;
+ // Jump-table absolute fills: pairs (table_offs, target_byte_pos_in_output).
+ // In hl_codegen_final each entry becomes `final_code + target` written into
+ // `final_code + const_table_pos + table_offs`.
+ int_arr const_addr;
+ int const_table_pos;
+ // Direct-call shortcuts for null-access stubs (BL within ±128 MB).
+ int null_access_pos;
+ int null_field_pos;
+};
+
+// Write a 32-bit instruction to ctx->code. Caller is responsible for byte_reserve.
+#define EMIT32(ctx, val) do { \
+ *(unsigned int*)&(ctx)->code.values[(ctx)->code.cur] = (unsigned int)(val); \
+ (ctx)->code.cur += 4; \
+} while(0)
+
+/*
+ * AArch64 Register Definitions
+ */
+
+// General Purpose Registers (64-bit: X0-X30, 32-bit: W0-W30)
+typedef enum {
+ X0 = 0, X1 = 1, X2 = 2, X3 = 3,
+ X4 = 4, X5 = 5, X6 = 6, X7 = 7,
+ X8 = 8, X9 = 9, X10 = 10, X11 = 11,
+ X12 = 12, X13 = 13, X14 = 14, X15 = 15,
+ X16 = 16, X17 = 17, X18 = 18, X19 = 19,
+ X20 = 20, X21 = 21, X22 = 22, X23 = 23,
+ X24 = 24, X25 = 25, X26 = 26, X27 = 27,
+ X28 = 28, X29 = 29, X30 = 30,
+
+ // Special register names
+ FP = 29, // Frame Pointer (X29)
+ LR = 30, // Link Register (X30)
+ SP_REG = 31, // Stack Pointer (encoding value, context-dependent)
+ XZR = 31 // Zero Register (encoding value, context-dependent)
+} Arm64Reg;
+
+// 32-bit register names (W registers)
+typedef enum {
+ W0 = 0, W1 = 1, W2 = 2, W3 = 3,
+ W4 = 4, W5 = 5, W6 = 6, W7 = 7,
+ W8 = 8, W9 = 9, W10 = 10, W11 = 11,
+ W12 = 12, W13 = 13, W14 = 14, W15 = 15,
+ W16 = 16, W17 = 17, W18 = 18, W19 = 19,
+ W20 = 20, W21 = 21, W22 = 22, W23 = 23,
+ W24 = 24, W25 = 25, W26 = 26, W27 = 27,
+ W28 = 28, W29 = 29, W30 = 30,
+ WZR = 31 // 32-bit zero register
+} Arm64Reg32;
+
+// Floating-Point/SIMD Registers
+typedef enum {
+ V0 = 0, V1 = 1, V2 = 2, V3 = 3,
+ V4 = 4, V5 = 5, V6 = 6, V7 = 7,
+ V8 = 8, V9 = 9, V10 = 10, V11 = 11,
+ V12 = 12, V13 = 13, V14 = 14, V15 = 15,
+ V16 = 16, V17 = 17, V18 = 18, V19 = 19,
+ V20 = 20, V21 = 21, V22 = 22, V23 = 23,
+ V24 = 24, V25 = 25, V26 = 26, V27 = 27,
+ V28 = 28, V29 = 29, V30 = 30, V31 = 31
+} Arm64FpReg;
+
+// Aliases for specific precision
+// D0-D31 = 64-bit (double precision) - same encoding as V0-V31
+// S0-S31 = 32-bit (single precision) - same encoding as V0-V31
+// H0-H31 = 16-bit (half precision) - same encoding as V0-V31
+
+/*
+ * Condition Codes for Conditional Branches and Selects
+ */
+typedef enum {
+ COND_EQ = 0x0, // Equal (Z == 1)
+ COND_NE = 0x1, // Not equal (Z == 0)
+ COND_CS = 0x2, // Carry set (C == 1), also HS (unsigned higher or same)
+ COND_CC = 0x3, // Carry clear (C == 0), also LO (unsigned lower)
+ COND_MI = 0x4, // Minus/negative (N == 1)
+ COND_PL = 0x5, // Plus/positive or zero (N == 0)
+ COND_VS = 0x6, // Overflow set (V == 1)
+ COND_VC = 0x7, // Overflow clear (V == 0)
+ COND_HI = 0x8, // Unsigned higher (C == 1 && Z == 0)
+ COND_LS = 0x9, // Unsigned lower or same (C == 0 || Z == 1)
+ COND_GE = 0xA, // Signed greater than or equal (N == V)
+ COND_LT = 0xB, // Signed less than (N != V)
+ COND_GT = 0xC, // Signed greater than (Z == 0 && N == V)
+ COND_LE = 0xD, // Signed less than or equal (Z == 1 || N != V)
+ COND_AL = 0xE, // Always (unconditional)
+ COND_NV = 0xF // Never (reserved, don't use)
+} ArmCondition;
+
+// Aliases
+#define COND_HS COND_CS // Unsigned higher or same
+#define COND_LO COND_CC // Unsigned lower
+
+/*
+ * Extend/Shift Types
+ */
+typedef enum {
+ EXTEND_UXTB = 0, // Unsigned extend byte
+ EXTEND_UXTH = 1, // Unsigned extend halfword
+ EXTEND_UXTW = 2, // Unsigned extend word
+ EXTEND_UXTX = 3, // Unsigned extend doubleword (64-bit, same as LSL)
+ EXTEND_SXTB = 4, // Signed extend byte
+ EXTEND_SXTH = 5, // Signed extend halfword
+ EXTEND_SXTW = 6, // Signed extend word
+ EXTEND_SXTX = 7 // Signed extend doubleword
+} ArmExtend;
+
+typedef enum {
+ SHIFT_LSL = 0, // Logical shift left
+ SHIFT_LSR = 1, // Logical shift right
+ SHIFT_ASR = 2, // Arithmetic shift right
+ SHIFT_ROR = 3 // Rotate right
+} ArmShift;
+
+/*
+ * Function Declarations
+ */
+
+// ADD/SUB instructions
+void encode_add_sub_imm(code_ctx *ctx, int sf, int op, int S, int shift, int imm12, Arm64Reg Rn, Arm64Reg Rd);
+void encode_add_sub_reg(code_ctx *ctx, int sf, int op, int S, int shift, Arm64Reg Rm, int imm6, Arm64Reg Rn, Arm64Reg Rd);
+void encode_add_sub_ext(code_ctx *ctx, int sf, int op, int S, Arm64Reg Rm, int option, int imm3, Arm64Reg Rn, Arm64Reg Rd);
+
+// Logical instructions
+void encode_logical_imm(code_ctx *ctx, int sf, int opc, int N, int immr, int imms, Arm64Reg Rn, Arm64Reg Rd);
+void encode_logical_reg(code_ctx *ctx, int sf, int opc, int shift, int N, Arm64Reg Rm, int imm6, Arm64Reg Rn, Arm64Reg Rd);
+
+// Move wide immediate
+void encode_mov_wide_imm(code_ctx *ctx, int sf, int opc, int hw, int imm16, Arm64Reg Rd);
+
+// Multiply/divide
+void encode_madd_msub(code_ctx *ctx, int sf, int op, Arm64Reg Rm, Arm64Reg Ra, Arm64Reg Rn, Arm64Reg Rd);
+void encode_div(code_ctx *ctx, int sf, int U, Arm64Reg Rm, Arm64Reg Rn, Arm64Reg Rd);
+
+// Shift instructions
+void encode_shift_reg(code_ctx *ctx, int sf, int op2, Arm64Reg Rm, Arm64Reg Rn, Arm64Reg Rd);
+
+// Load/store instructions
+void encode_ldr_str_imm(code_ctx *ctx, int size, int V, int opc, int imm12, Arm64Reg Rn, Arm64Reg Rt);
+void encode_ldr_str_reg(code_ctx *ctx, int size, int V, int opc, Arm64Reg Rm, int option, int S, Arm64Reg Rn, Arm64Reg Rt);
+void encode_ldur_stur(code_ctx *ctx, int size, int V, int opc, int imm9, Arm64Reg Rn, Arm64Reg Rt);
+void encode_ldp_stp(code_ctx *ctx, int opc, int V, int mode, int imm7, Arm64Reg Rt2, Arm64Reg Rn, Arm64Reg Rt);
+
+// PC-relative addressing
+void encode_adrp(code_ctx *ctx, int immlo, int immhi, Arm64Reg Rd);
+void encode_adr(code_ctx *ctx, int immlo, int immhi, Arm64Reg Rd);
+
+// Branch instructions
+void encode_branch_cond(code_ctx *ctx, int imm19, ArmCondition cond);
+void encode_branch_uncond(code_ctx *ctx, int imm26);
+void encode_branch_link(code_ctx *ctx, int imm26);
+void encode_branch_reg(code_ctx *ctx, int opc, Arm64Reg Rn);
+void encode_cbz_cbnz(code_ctx *ctx, int sf, int op, int imm19, Arm64Reg Rt);
+void encode_tbz_tbnz(code_ctx *ctx, int b5, int op, int b40, int imm14, Arm64Reg Rt);
+
+// Floating-point instructions
+void encode_fp_arith(code_ctx *ctx, int M, int S, int type, Arm64FpReg Rm, int opcode, Arm64FpReg Rn, Arm64FpReg Rd);
+void encode_fp_1src(code_ctx *ctx, int M, int S, int type, int opcode, Arm64FpReg Rn, Arm64FpReg Rd);
+void encode_fp_compare(code_ctx *ctx, int M, int S, int type, Arm64FpReg Rm, int op, Arm64FpReg Rn);
+void encode_fcvt_int(code_ctx *ctx, int sf, int S, int type, int rmode, int opc, Arm64FpReg Rn, Arm64Reg Rd);
+void encode_int_fcvt(code_ctx *ctx, int sf, int S, int type, int rmode, int opc, Arm64Reg Rn, Arm64FpReg Rd);
+
+// Conditional select
+void encode_cond_select(code_ctx *ctx, int sf, int op, Arm64Reg Rm, ArmCondition cond, int op2, Arm64Reg Rn, Arm64Reg Rd);
+
+// High-level helpers
+void load_immediate(code_ctx *ctx, int64_t val, Arm64Reg dst, bool is_64bit);
+
+#endif // JIT_AARCH64_EMIT_H
diff --git a/src/jit_dump.c b/src/jit_dump.c
new file mode 100644
index 000000000..c1b16a073
--- /dev/null
+++ b/src/jit_dump.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include
+
+static const char *op_names[] = {
+ "load-addr",
+ "load-const",
+ "load-arg",
+ "load-fun",
+ "store",
+ "lea",
+ "test",
+ "cmp",
+ "jcond",
+ "jump",
+ "jump-table",
+ "binop",
+ "unop",
+ "conv",
+ "conv-unsigned",
+ "ret",
+ "call",
+ "call",
+ "call",
+ "mov",
+ "cmov",
+ "xchg",
+ "cxhg",
+ "push-const",
+ "push",
+ "pop",
+ "alloc-stack",
+ "prefetch",
+ "debug-break",
+ "block",
+ "enter",
+ "stack",
+ "catch",
+ "address",
+ "nop"
+};
+
+bool hl_jit_dump_bin = false;
+
+const char *hl_natreg_str( int reg, emit_mode m );
+
+const char *hl_emit_regstr( ereg v, emit_mode m ) {
+ static char fmts[4][10];
+ static int flip = 0;
+ // allow up to four concurrent val_str
+ char *fmt = fmts[flip++&3];
+ if( IS_NULL(v) ) {
+ sprintf(fmt,"NULL???");
+ return fmt;
+ }
+ int val = REG_VALUE(v);
+ switch( REG_KIND(v) ) {
+ case R_VALUE:
+ sprintf(fmt,"V%d",v);
+ break;
+ case R_PHI:
+ sprintf(fmt,"P%d",-v);
+ break;
+ case R_CONST:
+ sprintf(fmt,"%d",val);
+ break;
+ case R_REG:
+ if( val == 0 )
+ sprintf(fmt,"%s",hl_natreg_str(v,m));
+ else if( val > 0 )
+ sprintf(fmt,"%s+%Xh",hl_natreg_str(v,m),val);
+ else
+ sprintf(fmt,"%s-%Xh",hl_natreg_str(v,m),-val);
+ break;
+ case R_REG_PTR:
+ if( val == 0 )
+ sprintf(fmt,"[%s]",REG_REG(v) == STACK_REG ? "ST" : hl_natreg_str(v,M_PTR));
+ else if( val > 0 )
+ sprintf(fmt,"[%s+%Xh]",REG_REG(v) == STACK_REG ? "ST" : hl_natreg_str(v,M_PTR),val);
+ else
+ sprintf(fmt,"[%s-%Xh]",REG_REG(v) == STACK_REG ? "ST" : hl_natreg_str(v,M_PTR),-val);
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ return fmt;
+}
+
+static void hl_dump_arg( hl_function *fun, int fmt, int val, char sep, int pos ) {
+ if( fmt == 0 ) return;
+ printf("%c", sep);
+ switch( fmt ) {
+ case 1:
+ case 2:
+ printf("R%d", val);
+ if( val < 0 || val >= fun->nregs ) printf("?");
+ break;
+ case 3:
+ printf("%d", val);
+ break;
+ case 4:
+ printf("[%d]", val);
+ break;
+ case 5:
+ case 6:
+ printf("@%X", val + pos + 1);
+ break;
+ default:
+ printf("?#%d", fmt);
+ break;
+ }
+}
+
+#define OP(_,_a,_b,_c) ((_a) | (((_b)&0xFF) << 8) | (((_c)&0xFF) << 16)),
+#define OP_BEGIN static int hl_op_fmt[] = {
+#define OP_END };
+#undef R
+#include "opcodes.h"
+
+static void hl_dump_op( hl_function *fun, hl_opcode *op ) {
+ printf("%s", hl_op_name(op->op) + 1);
+ int fmt = hl_op_fmt[op->op];
+ int pos = (int)(op - fun->ops);
+ hl_dump_arg(fun, fmt & 0xFF, op->p1, ' ', pos);
+ if( ((fmt >> 8) & 0xFF) == 5 ) {
+ int count = (fmt >> 16) & 0xFF;
+ printf(" [");
+ if( count == 4 ) {
+ printf("%d", op->p2);
+ printf(",%d", op->p3);
+ printf(",%d", (int)(int_val)op->extra);
+ } else if( op->op == OSwitch ) {
+ for(int i=0;ip2;i++) {
+ if( i != 0 ) printf(",");
+ printf("@%X", (op->extra[i] + pos + 1));
+ }
+ printf(",def=@%X", op->p3 + pos + 1);
+ } else {
+ if( count == 0xFF )
+ count = op->p3;
+ else {
+ printf("%d,%d,",op->p2,op->p3);
+ count -= 3;
+ }
+ for(int i=0;iextra[i]);
+ }
+ }
+ printf("]");
+ } else {
+ hl_dump_arg(fun, (fmt >> 8) & 0xFF, op->p2,',', pos);
+ hl_dump_arg(fun, fmt >> 16, op->p3,',', pos);
+ }
+}
+
+static const char *emit_mode_str( emit_mode mode ) {
+ switch( mode ) {
+ case M_UI8: return "-ui8";
+ case M_UI16: return "-ui16";
+ case M_I32: return "-i32";
+ case M_F32: return "-f32";
+ case M_F64: return "-f64";
+ case M_PTR: return "";
+ case M_VOID: return "-void";
+ case M_NORET: return "-noret";
+ default:
+ static char buf[50];
+ sprintf(buf,"?%d",mode);
+ return buf;
+ }
+}
+
+static void dump_value( jit_ctx *ctx, uint64 value, emit_mode mode ) {
+ union {
+ uint64 v;
+ double d;
+ float f;
+ } tmp;
+ hl_module *mod = ctx->mod;
+ hl_code *code = ctx->mod->code;
+ switch( mode ) {
+ case M_NONE:
+ printf("?0x%llX",value);
+ break;
+ case M_UI8:
+ case M_UI16:
+ case M_I32:
+ if( (int)value >= -0x10000 && (int)value <= 0x10000 )
+ printf("%d",(int)value);
+ else
+ printf("0x%X",(int)value);
+ break;
+ case M_F32:
+ tmp.v = value;
+ printf("%f",tmp.f);
+ break;
+ case M_F64:
+ tmp.v = value;
+ printf("%g",tmp.d);
+ break;
+ default:
+ if( value == 0 )
+ printf("NULL");
+ else if( mode == M_PTR && value >= (uint64)code->types && value < (uint64)(code->types + code->ntypes) )
+ uprintf(USTR("<%s>"),hl_type_str((hl_type*)value));
+ else if( mode == M_PTR && value == (uint64)mod->globals_data )
+ printf("");
+ else if( value == (uint64)&hlt_void )
+ printf("");
+ else
+ printf("0x%llX",value);
+ break;
+ }
+}
+
+static void hl_dump_fun_name( hl_function *f ) {
+ if( f->obj ) {
+ uprintf(USTR("%s."),f->obj->name);
+ uprintf(USTR("%s"),f->field.name);
+ }
+ else if( f->field.ref ) {
+ uprintf(USTR("%s."),f->field.ref->obj->name);
+ uprintf(USTR("~%s"),f->field.ref->field.name);
+ printf(".%d",f->ref);
+ }
+ printf("[%X]", f->findex);
+}
+
+static void hl_dump_args( jit_ctx *ctx, einstr *e ) {
+ if( e->nargs == 0xFF )
+ return;
+ ereg *v = hl_emit_get_args(ctx->emit, e);
+ printf("(");
+ for(int i=0;inargs;i++) {
+ if( i != 0 ) printf(",");
+ printf("%s", val_str(v[i],M_NONE));
+ }
+ printf(")");
+}
+
+typedef struct { const char *name; void *ptr; } named_ptr;
+static void hl_dump_ptr_name( jit_ctx *ctx, void *ptr ) {
+# define N(v) ptr_names[i].name = #v; ptr_names[i].ptr = v; i++
+# define N2(n,v) ptr_names[i].name = n; ptr_names[i].ptr = v; i++
+# define DYN(p) N2("dyn_get" #p, hl_dyn_get##p); N2("dyn_set" #p, hl_dyn_set##p); N2("dyn_cast" #p, hl_dyn_cast##p)
+ static named_ptr ptr_names[256] = { NULL };
+ int i = 0;
+ if( !ptr_names[0].ptr ) {
+ N(hl_alloc_dynbool);
+ N(hl_alloc_dynamic);
+ N(hl_alloc_obj);
+ N(hl_alloc_dynobj);
+ N(hl_alloc_virtual);
+ N(hl_alloc_closure_ptr);
+ N(hl_dyn_call);
+ N(hl_dyn_call_obj);
+ N(hl_throw);
+ N(hl_rethrow);
+ N(hl_to_virtual);
+ N(hl_alloc_enum);
+ N(hl_dyn_compare);
+ N(hl_same_type);
+ DYN(f);
+ DYN(d);
+ DYN(i64);
+ DYN(i);
+ DYN(p);
+ N2("null_field",hl_jit_null_field_access);
+ N2("null_access",hl_null_access);
+ N(hl_get_thread);
+ N(setjmp);
+ N(_setjmp);
+ N2("assert",hl_jit_assert);
+ N(fmod);
+ N(fmodf);
+ i = 0;
+ }
+# undef N
+# undef N2
+ while( true ) {
+ named_ptr p = ptr_names[i++];
+ if( !p.ptr ) break;
+ if( p.ptr == ptr ) {
+ printf("<%s>",p.name);
+ return;
+ }
+ }
+ for(i=0;imod->code->nnatives;i++) {
+ hl_native *n = ctx->mod->code->natives + i;
+ if( ctx->mod->functions_ptrs[n->findex] == ptr ) {
+ printf("<%s.%s>",n->lib[0] == '?' ? n->lib + 1 : n->lib,n->name);
+ return;
+ }
+ }
+ printf("0x%llX>",(uint64)ptr);
+}
+
+void hl_emit_flush( jit_ctx *ctx );
+void hl_regs_flush( jit_ctx *ctx );
+void hl_codegen_flush( jit_ctx *ctx );
+
+#define reg_str(r) val_str(r,e->mode)
+
+static void dump_instr( jit_ctx *ctx, einstr *e, int cur_pos ) {
+ printf("%s", op_names[e->op]);
+ bool show_size = true;
+ switch( e->op ) {
+ case TEST:
+ case CMP:
+ printf("-%s", hl_op_name(e->size_offs)+2);
+ show_size = false;
+ break;
+ case BINOP:
+ case UNOP:
+ printf("-%s", hl_op_name(e->size_offs)+1);
+ show_size = false;
+ break;
+ default:
+ break;
+ }
+ if( e->mode )
+ printf("%s", emit_mode_str(e->mode));
+ switch( e->op ) {
+ case CALL_FUN:
+ printf(" ");
+ {
+ int fid = ctx->mod->functions_indexes[e->a];
+ hl_code *code = ctx->mod->code;
+ if( fid < code->nfunctions ) {
+ hl_dump_fun_name(&code->functions[fid]);
+ } else {
+ printf("???");
+ }
+ }
+ hl_dump_args(ctx,e);
+ break;
+ case CALL_REG:
+ printf(" %s", val_str(e->a,M_PTR));
+ hl_dump_args(ctx,e);
+ break;
+ case CALL_PTR:
+ printf(" ");
+ hl_dump_ptr_name(ctx, (void*)e->value);
+ hl_dump_args(ctx,e);
+ break;
+ case JUMP:
+ case JCOND:
+ printf(" @%X", cur_pos + 1 + e->size_offs);
+ break;
+ case JUMP_TABLE:
+ {
+ int *offsets = hl_emit_get_args(ctx->emit, e);
+ printf(" %s (", reg_str(e->a));
+ for(int k=0;knargs;k++) {
+ if( k > 0 ) printf(",");
+ printf("@%X", cur_pos + 1 + offsets[k]);
+ }
+ printf(")");
+ }
+ break;
+ case BLOCK:
+ printf(" #%d", e->size_offs);
+ if( e->size_offs && ctx->blocks[e->size_offs].pred_count == 0 )
+ printf(" ???DEAD");
+ break;
+ case STACK_OFFS:
+ if( e->size_offs >= 0 )
+ printf(" +%Xh", e->size_offs);
+ else
+ printf(" -%Xh", -e->size_offs);
+ break;
+ case LOAD_CONST:
+ case PUSH_CONST:
+ printf(" ");
+ dump_value(ctx, e->value, e->mode);
+ break;
+ case LOAD_ADDR:
+ if( e->nargs != e->mode ) {
+ if( e->mode == M_PTR ) printf("-ptr");
+ printf("%s", e->nargs == M_PTR ? "-ptr" : emit_mode_str(e->nargs));
+ }
+ printf(" %s[%Xh]", val_str(e->a,M_PTR), e->size_offs);
+ break;
+ case STORE:
+ {
+ int offs = e->size_offs;
+ if( offs == 0 )
+ printf(" [%s]", val_str(e->a,M_PTR));
+ else
+ printf(" %s[%Xh]", val_str(e->a,M_PTR), offs);
+ printf(" = %s", reg_str(e->b));
+ }
+ break;
+ case CONV:
+ case CONV_UNSIGNED:
+ if( e->mode == M_PTR ) printf("-i64");
+ printf("%s %s", e->size_offs == M_PTR ? "-i64" : emit_mode_str(e->size_offs), val_str(e->a,(emit_mode)e->size_offs));
+ break;
+ case LEA:
+ printf(" [%s", reg_str(e->a));
+ if( !IS_NULL(e->b) ) printf("+%s", reg_str(e->b));
+ if( (e->size_offs&0xFF) > 1 ) printf("*%d",e->size_offs&0xFF);
+ if( e->size_offs >> 8 ) printf("+%Xh", e->size_offs>>8);
+ printf("]");
+ break;
+ default:
+ if( !IS_NULL(e->a) ) {
+ printf(" %s", reg_str(e->a));
+ if( !IS_NULL(e->b) ) printf(", %s", reg_str(e->b));
+ }
+ if( show_size && e->size_offs != 0 )
+ printf(" %d", e->size_offs);
+ break;
+ }
+}
+
+void hl_emit_dump( jit_ctx *ctx ) {
+ hl_function *f = ctx->fun;
+ int nargs = f->type->fun->nargs;
+ // if it not was not before (in case of dump during process)
+ hl_emit_flush(ctx);
+ hl_regs_flush(ctx);
+ hl_codegen_flush(ctx);
+ printf("function ");
+ hl_dump_fun_name(f);
+ printf("(");
+ for(int i=0;i 0 ) printf(",");
+ printf("R%d", i);
+ }
+ printf(")\n");
+ for(int i=0;inregs;i++) {
+ printf("\tR%d : ",i);
+ uprintf(USTR("%s\n"), hl_type_str(f->regs[i]));
+ }
+ // check blocks intervals
+ int cur = 0;
+ for(int i=0;iblock_count;i++) {
+ eblock *b = ctx->blocks + i;
+ if( b->start_pos != cur ) printf(" ??? BLOCK %d START AT %X != %X\n", i, b->start_pos, cur);
+ if( b->end_pos < b->start_pos ) printf(" ??? BLOCK %d RANGE [%X,%X]\n", i, b->start_pos, b->end_pos);
+ cur = b->end_pos;
+ }
+ if( cur != ctx->instr_count )
+ printf(" ??? MISSING BLOCK FOR RANGE %X-%X\n", cur, ctx->instr_count);
+ // print instrs
+ int vpos = 1;
+ int rpos = 0;
+ int cpos = 0;
+ int cur_op = 0;
+ bool new_op = false;
+ eblock *cur_block = NULL;
+ for(int icount=0;icountinstr_count;icount++) {
+ while( ctx->emit_pos_map[cur_op] == icount ) {
+ printf("@%X ", cur_op);
+ hl_dump_op(ctx->fun, f->ops + cur_op);
+ printf("\n");
+ new_op = true;
+ cur_op++;
+ }
+ einstr *e = ctx->instrs + icount;
+ printf("\t\t@%X ", icount);
+ if( vpos < ctx->value_count && ctx->values_writes[vpos] == icount )
+ printf("V%d = ", vpos++);
+ dump_instr(ctx, e, icount);
+ if( e->op == JCOND || e->op == JUMP ) {
+ int target = icount + 1 + e->size_offs;
+ bool bad = false;
+ if( icount + 1 >= ctx->instr_count || target < 0 || target >= ctx->instr_count )
+ bad = true;
+ else if( ctx->instrs[target].op != BLOCK || (e->op == JCOND && ctx->instrs[icount+1].op != BLOCK) )
+ bad = true;
+ else {
+ bool found = false;
+ for(int k=0;knext_count;k++) {
+ if( cur_block->nexts[k] == ctx->instrs[target].size_offs )
+ found = true;
+ if( (e->op == JUMP || e->op == JUMP_TABLE) && ctx->instrs[icount+1].op == BLOCK && ctx->instrs[icount+1].size_offs == cur_block->nexts[k] )
+ printf(" ???LEAK");
+ }
+ if( !found ) printf(" ???NEXT");
+ }
+ if( bad )
+ printf(" ???");
+ }
+ if( e->op == BLOCK ) {
+ eblock *b = &ctx->blocks[e->size_offs];
+ for(int k=0;kpred_count;k++) {
+ eblock *p = &ctx->blocks[b->preds[k]];
+ einstr *pe = &ctx->instrs[p->end_pos-1];
+ if( p->end_pos == icount )
+ continue;
+ bool bad = false;
+ if( (pe->op == JUMP || pe->op == JCOND) && pe->size_offs == icount - p->end_pos )
+ bad = false;
+ else if( pe->op != JUMP_TABLE )
+ bad = true;
+ if( bad )
+ printf(" ???PREV#%d",b->preds[k]);
+ }
+ for(int k=0;kphi_count;k++) {
+ ephi *p = b->phis + k;
+ printf("\n\t\t@%X %s = phi%s(",icount,val_str(p->value,p->mode),emit_mode_str(p->mode));
+ for(int n=0;nnvalues;n++) {
+ if( n > 0 ) printf(",");
+ printf("%s:%d",val_str(p->values[n],p->mode),p->blocks[n]);
+ }
+ if( p->nvalues == 0 )
+ printf("unwritten");
+ printf(")");
+ if( p->nvalues == 1 )
+ printf(" ???");
+ }
+ cur_block = b;
+ }
+ while( rpos < ctx->reg_instr_count && rpos < ctx->reg_pos_map[icount+1] ) {
+ ereg out = ctx->reg_writes[rpos];
+ e = ctx->reg_instrs + rpos;
+ printf("\n\t\t\t\t@%X ",rpos);
+ if( !IS_NULL(out) ) printf("%s = ",reg_str(out));
+ dump_instr(ctx,e,rpos);
+ bool first = true;
+ while( cpos < ctx->code_size && cpos < ctx->code_pos_map[rpos+1] ) {
+ if( first ) {
+ if( hl_jit_dump_bin )
+ printf("\t\t\t");
+ else
+ printf("\033[80G");
+ first = false;
+ if( new_op ) {
+ new_op = false;
+ cpos += ctx->cfg.debug_prefix_size;
+ if( cpos == ctx->code_pos_map[rpos+1] ) break;
+ }
+ }
+ printf("%.2X",ctx->code_instrs[cpos++]);
+ }
+ rpos++;
+ }
+ printf("\n");
+ }
+ // invalid ?
+ while( vpos < ctx->value_count )
+ printf(" ??? UNWRITTEN VALUE V%d @%X\n", vpos, ctx->values_writes[vpos++]);
+ // interrupted
+ if( cur_op < f->nops ) {
+ printf("@%X ", cur_op);
+ hl_dump_op(ctx->fun, f->ops + cur_op);
+ printf("\n\t\t...\n");
+ }
+ if( cpos == ctx->code_size && cpos > 0 ) {
+ int n = 1;
+ for(int i=0;icode_pos_map[n] == i ) {
+ if( (n & 15) == 0 ) printf("\n"); else printf(" ");
+ n++;
+ }
+ printf("%.2X", ctx->code_instrs[i]);
+ }
+ }
+ printf("\n\n");
+ fflush(stdout);
+}
diff --git a/src/jit_emit.c b/src/jit_emit.c
new file mode 100644
index 000000000..7524c5483
--- /dev/null
+++ b/src/jit_emit.c
@@ -0,0 +1,2214 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include
+#include
+#include
+#include "data_struct.h"
+
+//#define EMIT_DEBUG
+
+#ifdef EMIT_DEBUG
+# define emit_debug jit_debug
+#else
+# define emit_debug(...)
+#endif
+
+int hl_emit_mode_sizes[] = {0,1,2,4,HL_WSIZE,8,4,0,0};
+
+typedef struct {
+ hl_type *t;
+ int id;
+ ereg stored;
+} vreg;
+
+#define MAX_TMP_ARGS 32
+#define MAX_TRAPS 32
+
+typedef struct _linked_inf linked_inf;
+typedef struct _emit_block emit_block;
+typedef struct _tmp_phi tmp_phi;
+
+#define S_TYPE blocks
+#define S_NAME(name) blocks_##name
+#define S_VALUE emit_block*
+#include "data_struct.c"
+#define blocks_add(set,v) blocks_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_TYPE phi_arr
+#define S_NAME(name) phi_##name
+#define S_VALUE tmp_phi*
+#include "data_struct.c"
+#define phi_add(set,v) phi_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_SORTED
+
+#define S_MAP
+#define S_TYPE ereg_map
+#define S_NAME(name) ereg_##name
+#define S_KEY ereg
+#define S_VALUE emit_block*
+#include "data_struct.c"
+#define ereg_add(set,k,v) ereg_add_pair_impl(DEF_ALLOC,&(set),k,v)
+
+#define S_MAP
+
+#define S_TYPE vreg_map
+#define S_NAME(name) vreg_##name
+#define S_KEY int
+#define S_VALUE ereg
+#include "data_struct.c"
+#define vreg_replace(set,k,v) vreg_replace_impl(DEF_ALLOC,&(set),k,v)
+
+struct _linked_inf {
+ int id;
+ void *ptr;
+ linked_inf *next;
+};
+
+struct _emit_block {
+ int id;
+ int start_pos;
+ int end_pos;
+ int wait_nexts;
+ bool sealed;
+ blocks nexts;
+ blocks preds;
+ vreg_map written_vars;
+ phi_arr phis;
+ emit_block *wait_seal_next;
+};
+
+struct _tmp_phi {
+ ereg value;
+ vreg *r;
+ ereg target;
+ int final_id;
+ bool locked;
+ bool opt;
+ emit_mode mode;
+ emit_block *b;
+ ereg_map vals;
+ phi_arr ref_phis;
+ linked_inf *ref_blocks;
+};
+
+typedef struct {
+ ereg stack;
+ int target;
+} trap_inf;
+
+struct _emit_ctx {
+ hl_module *mod;
+ hl_function *fun;
+ jit_ctx *jit;
+
+ einstr *instrs;
+ vreg *vregs;
+ tmp_phi **phis;
+ int max_instrs;
+ int max_regs;
+ int max_phis;
+ int emit_pos;
+ int op_pos;
+ int phi_count;
+ int phi_depth;
+ bool flushed;
+
+ ereg tmp_args[MAX_TMP_ARGS];
+ trap_inf traps[MAX_TRAPS];
+ int *pos_map;
+ int pos_map_size;
+ int trap_count;
+
+ int_arr args_data;
+ int_arr jump_regs;
+ int_arr values;
+
+ blocks blocks;
+ emit_block *current_block;
+ emit_block *wait_seal;
+ linked_inf *arrival_points;
+ vclosure *closure_list;
+};
+
+#define R(i) (ctx->vregs + (i))
+
+#define LOAD(r) emit_load_reg(ctx, r)
+#define STORE(r, v) emit_store_reg(ctx, r, v)
+#define LOAD_CONST(v, t) emit_load_const(ctx, (uint64)(v), t)
+#define LOAD_CONST_PTR(v) LOAD_CONST(v,&hlt_bytes)
+#define LOAD_MEM(v, offs, t) emit_load_mem(ctx, v, offs, t, t)
+#define LOAD_MEM_PTR(v, offs) LOAD_MEM(v, offs, &hlt_bytes)
+#define STORE_MEM(to, offs, v) emit_store_mem(ctx, to, offs, v)
+#define LOAD_OBJ_METHOD(obj,id) LOAD_MEM_PTR(LOAD_MEM_PTR(LOAD_MEM_PTR(obj,0),HL_WSIZE*2),HL_WSIZE*(id))
+#define OFFSET(base,index,mult,offset) emit_gen_ext(ctx, LEA, base, index, M_PTR, (mult) | ((offset) << 8))
+#define BREAK() emit_gen(ctx, DEBUG_BREAK, UNUSED, UNUSED, 0)
+#define GET_MODE(r) emit_get_mode(ctx,r)
+#define GET_PHI(r) ctx->phis[-(r)-1]
+#define HDYN_VALUE 8
+
+static hl_type hlt_ui8 = { HUI8, 0 };
+static hl_type hlt_ui16 = { HUI16, 0 };
+
+static linked_inf *link_add( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) {
+ linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf));
+ l->id = id;
+ l->ptr = ptr;
+ l->next = head;
+ return l;
+}
+
+static linked_inf *link_add_sort_unique( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) {
+ linked_inf *prev = NULL;
+ linked_inf *cur = head;
+ while( cur && cur->id < id ) {
+ prev = cur;
+ cur = cur->next;
+ }
+ // check duplicate
+ while( cur && cur->id == id ) {
+ if( cur->ptr == ptr )
+ return head;
+ cur = cur->next;
+ }
+ // insert
+ linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf));
+ l->id = id;
+ l->ptr = ptr;
+ if( !prev ) {
+ l->next = head;
+ return l;
+ } else {
+ l->next = prev->next;
+ prev->next = l;
+ return head;
+ }
+}
+
+static linked_inf *link_add_sort_replace( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) {
+ linked_inf *prev = NULL;
+ linked_inf *cur = head;
+ while( cur && cur->id < id ) {
+ prev = cur;
+ cur = cur->next;
+ }
+ // replace duplicate
+ if( cur && cur->id == id ) {
+ cur->ptr = ptr;
+ return head;
+ }
+ // insert
+ linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf));
+ l->id = id;
+ l->ptr = ptr;
+ if( !prev ) {
+ l->next = head;
+ return l;
+ } else {
+ l->next = prev->next;
+ prev->next = l;
+ return head;
+ }
+}
+
+static void *link_sort_lookup( linked_inf *head, int id ) {
+ while( head && head->id < id )
+ head = head->next;
+ if( head && head->id == id )
+ return head->ptr;
+ return NULL;
+}
+
+static linked_inf *link_sort_remove( linked_inf *head, int id ) {
+ linked_inf *prev = NULL;
+ linked_inf *cur = head;
+ while( cur && cur->id < id ) {
+ prev = cur;
+ cur = cur->next;
+ }
+ if( cur && cur->id == id ) {
+ if( !prev )
+ return cur->next;
+ prev->next = cur->next;
+ return head;
+ }
+ return head;
+}
+
+static emit_mode hl_type_mode( hl_type *t ) {
+ static emit_mode CONV[] = {
+ M_VOID,
+ M_UI8,
+ M_UI16,
+ M_I32,
+ M_PTR,
+ M_F32,
+ M_F64,
+ sizeof(bool) == 1 ? M_UI8 : M_I32,
+ };
+ if( t->kind <= HBOOL )
+ return CONV[t->kind];
+ return M_PTR;
+}
+
+static ereg new_value( emit_ctx *ctx ) {
+ ereg r = int_arr_count(ctx->values);
+ int_arr_add(ctx->values, ctx->emit_pos-1);
+ return r;
+}
+
+static ereg *get_tmp_args( emit_ctx *ctx, int count ) {
+ if( count > MAX_TMP_ARGS ) jit_error("Too many arguments");
+ return ctx->tmp_args;
+}
+
+static emit_mode emit_get_mode( emit_ctx *ctx, ereg v ) {
+ if( IS_NULL(v) ) jit_assert();
+ if( v < 0 )
+ return GET_PHI(v)->mode;
+ return ctx->instrs[int_arr_get(ctx->values,v)].mode;
+}
+
+static const char *phi_prefix( emit_ctx *ctx ) {
+ static char tmp[20];
+ int sp = 3 + ctx->phi_depth * 2;
+ if( sp > 19 ) sp = 19;
+ memset(tmp,0x20,sp);
+ tmp[sp] = 0;
+ return tmp;
+}
+
+static einstr *emit_instr( emit_ctx *ctx, emit_op op ) {
+ if( ctx->emit_pos == ctx->max_instrs ) {
+ int pos = ctx->emit_pos;
+ int next_size = ctx->max_instrs ? (ctx->max_instrs << 1) : 256;
+ einstr *instrs = (einstr*)malloc(sizeof(einstr) * next_size);
+ if( instrs == NULL ) jit_error("Out of memory");
+ memcpy(instrs, ctx->instrs, pos * sizeof(einstr));
+ memset(instrs + pos, 0, (next_size - pos) * sizeof(einstr));
+ free(ctx->instrs);
+ ctx->instrs = instrs;
+ ctx->max_instrs = next_size;
+ } else if( (ctx->emit_pos & 0xFF) == 0 )
+ memset(ctx->instrs + ctx->emit_pos, 0, 256 * sizeof(einstr));
+ einstr *e = ctx->instrs + ctx->emit_pos++;
+ e->op = op;
+ return e;
+}
+
+static void emit_store_mem( emit_ctx *ctx, ereg to, int offs, ereg from ) {
+ einstr *e = emit_instr(ctx, STORE);
+ e->mode = GET_MODE(from);
+ e->size_offs = offs;
+ e->a = to;
+ e->b = from;
+}
+
+#define store_args hl_emit_store_args
+void hl_emit_store_args( emit_ctx *ctx, einstr *e, ereg *args, int count ) {
+ if( count < 0 ) jit_assert();
+ if( count > 256 ) jit_error("Too many arguments");
+ e->nargs = (unsigned char)count;
+ if( count == 0 ) return;
+ if( count == 1 ) {
+ e->size_offs = args[0];
+ return;
+ }
+ int *args_data = int_arr_reserve(ctx->args_data, count);
+ e->size_offs = (int)(args_data - ctx->args_data.values);
+ memcpy(args_data, args, sizeof(int) * count);
+}
+
+ereg *hl_emit_get_args( emit_ctx *ctx, einstr *e ) {
+ if( e->nargs == 0 )
+ return NULL;
+ if( e->nargs == 1 )
+ return (ereg*)&e->size_offs;
+ return (ereg*)(ctx->args_data.values + e->size_offs);
+}
+
+static ereg emit_gen_ext( emit_ctx *ctx, emit_op op, ereg a, ereg b, int mode, int size_offs ) {
+ einstr *e = emit_instr(ctx, op);
+ if( (unsigned char)mode != mode ) jit_assert();
+ e->mode = (unsigned char)mode;
+ e->size_offs = size_offs;
+ e->a = a;
+ e->b = b;
+ return mode == 0 || mode == M_NORET ? UNUSED : new_value(ctx);
+}
+
+static ereg emit_gen( emit_ctx *ctx, emit_op op, ereg a, ereg b, int mode ) {
+ return emit_gen_ext(ctx,op,a,b,mode,0);
+}
+
+static ereg emit_gen_size( emit_ctx *ctx, emit_op op, int size_offs ) {
+ return emit_gen_ext(ctx,op,UNUSED,UNUSED,op==ALLOC_STACK ? M_PTR : 0,size_offs);
+}
+
+static void patch_instr_mode( emit_ctx *ctx, int mode ) {
+ ctx->instrs[ctx->emit_pos-1].mode = (unsigned char)mode;
+}
+
+static tmp_phi *alloc_phi( emit_ctx *ctx, emit_block *b, vreg *r ) {
+ if( ctx->phi_count == ctx->max_phis ) {
+ int new_size = ctx->max_phis ? ctx->max_phis << 1 : 64;
+ tmp_phi **phis = (tmp_phi**)malloc(sizeof(tmp_phi*) * new_size);
+ if( phis == NULL ) jit_error("Out of memory");
+ memcpy(phis, ctx->phis, sizeof(tmp_phi*) * ctx->phi_count);
+ free(ctx->phis);
+ ctx->phis = phis;
+ ctx->max_phis = new_size;
+ }
+ tmp_phi *p = (tmp_phi*)hl_zalloc(&ctx->jit->falloc, sizeof(tmp_phi));
+ p->b = b;
+ p->r = r;
+ if( r ) p->mode = hl_type_mode(r->t);
+ p->value = -(++ctx->phi_count);
+ phi_add(b->phis,p);
+ GET_PHI(p->value) = p;
+ return p;
+}
+
+static emit_block *alloc_block( emit_ctx *ctx ) {
+ emit_block *b = hl_zalloc(&ctx->jit->falloc, sizeof(emit_block));
+ b->id = blocks_count(ctx->blocks);
+ b->start_pos = ctx->emit_pos;
+ blocks_add(ctx->blocks, b);
+ if( b->id > 0 ) emit_gen_size(ctx, BLOCK, b->id);
+ return b;
+}
+
+static void block_add_pred( emit_ctx *ctx, emit_block *b, emit_block *p ) {
+ for_iter(blocks,p2,b->preds)
+ if( p2 == p )
+ return;
+ blocks_add(b->preds,p);
+ blocks_add(p->nexts,b);
+ emit_debug(" PRED #%d\n",p->id);
+}
+
+static void store_block_var( emit_ctx *ctx, emit_block *b, vreg *r, ereg v ) {
+ if( IS_NULL(v) ) jit_assert();
+ vreg_replace(b->written_vars,r->id,v);
+ if( v < 0 ) {
+ tmp_phi *p = GET_PHI(v);
+ p->ref_blocks = link_add_sort_unique(ctx,b->id,b,p->ref_blocks);
+ }
+}
+
+static bool split_block( emit_ctx *ctx ) {
+ if( ctx->current_block->start_pos == ctx->emit_pos-1 )
+ return false;
+ emit_block *b = alloc_block(ctx);
+ b->sealed = true;
+ emit_debug("BLOCK #%d@%X[%X]\n",b->id,b->start_pos,ctx->op_pos);
+ while( ctx->arrival_points && ctx->arrival_points->id == ctx->op_pos ) {
+ block_add_pred(ctx, b, (emit_block*)ctx->arrival_points->ptr);
+ ctx->arrival_points = ctx->arrival_points->next;
+ }
+ einstr *eprev = &ctx->instrs[b->start_pos-1];
+ if( eprev->op != JUMP && eprev->op != JUMP_TABLE && eprev->op != RET && eprev->mode != M_NORET )
+ block_add_pred(ctx, b, ctx->current_block);
+ ctx->current_block->end_pos = b->start_pos;
+ ctx->current_block = b;
+ return true;
+}
+
+static void add_jump_target( emit_ctx *ctx, int offs ) {
+ if( offs == 0 && ctx->current_block->start_pos == ctx->emit_pos-1 )
+ return;
+ int target = offs + ctx->op_pos + 1;
+ ctx->arrival_points = link_add_sort_unique(ctx, target, ctx->current_block, ctx->arrival_points);
+}
+
+static int emit_jump( emit_ctx *ctx, bool cond ) {
+ int p = ctx->emit_pos;
+ emit_gen(ctx, cond ? JCOND : JUMP, UNUSED, UNUSED, 0);
+ if( !cond ) add_jump_target(ctx, 0);
+ split_block(ctx);
+ return p;
+}
+
+static void patch_jump( emit_ctx *ctx, int jpos ) {
+ emit_block *b = NULL;
+ // find the block or initial jump was
+ for_iter_back(blocks,b2,ctx->blocks) {
+ if( b2->start_pos <= jpos ) {
+ b = b2;
+ break;
+ }
+ }
+ if( !b || b == ctx->current_block ) jit_assert();
+ // patch opcode
+ bool after_block = ctx->current_block->start_pos == ctx->emit_pos-1;
+ ctx->instrs[jpos].size_offs = ctx->emit_pos - (after_block?1:0) - (jpos + 1);
+ if( after_block ) {
+ block_add_pred(ctx, ctx->current_block, b);
+ } else {
+ if( !split_block(ctx) ) jit_assert();
+ }
+}
+
+static void register_jump( emit_ctx *ctx, int jpos, int offs ) {
+ int target = offs + ctx->op_pos + 1;
+ int_arr_add(ctx->jump_regs, jpos);
+ int_arr_add(ctx->jump_regs, target);
+ if( offs > 0 ) add_jump_target(ctx, offs);
+}
+
+static ereg emit_load_const( emit_ctx *ctx, uint64 value, hl_type *size_t ) {
+ einstr *e = emit_instr(ctx, LOAD_CONST);
+ e->mode = hl_type_mode(size_t);
+ e->value = value;
+ return new_value(ctx);
+}
+
+static ereg emit_load_mem( emit_ctx *ctx, ereg v, int offset, hl_type *size_t, hl_type *to_t ) {
+ einstr *e = emit_instr(ctx, LOAD_ADDR);
+ e->mode = hl_type_mode(to_t);
+ e->a = v;
+ e->nargs = hl_type_mode(size_t);
+ e->size_offs = offset;
+ return new_value(ctx);
+}
+
+static void emit_store_reg( emit_ctx *ctx, vreg *to, ereg v ) {
+ if( to->t->kind == HVOID ) return;
+ if( IS_NULL(v) ) jit_assert();
+ store_block_var(ctx,ctx->current_block,to,v);
+ if( ctx->trap_count > 0 ) {
+ // if the value was written before the trap, let's update it
+ if( !IS_NULL(to->stored) )
+ STORE_MEM(emit_gen(ctx,ADDRESS,to->stored,UNUSED,M_PTR), 0, v);
+ } else {
+ to->stored = v;
+ }
+}
+
+static ereg emit_native_call( emit_ctx *ctx, void *native_ptr, ereg args[], int nargs, hl_type *ret ) {
+ einstr *e = emit_instr(ctx, CALL_PTR);
+ e->mode = (unsigned char)(ret ? hl_type_mode(ret) : M_NORET);
+ e->value = (int_val)native_ptr;
+ store_args(ctx, e, args, nargs);
+ return ret == NULL || e->mode == M_VOID ? UNUSED : new_value(ctx);
+}
+
+static ereg emit_dyn_call( emit_ctx *ctx, ereg f, ereg args[], int nargs, hl_type *ret ) {
+ einstr *e = emit_instr(ctx, CALL_REG);
+ e->mode = hl_type_mode(ret);
+ e->a = f;
+ store_args(ctx, e, args, nargs);
+ return e->mode == M_VOID ? UNUSED : new_value(ctx);
+}
+
+static void emit_test( emit_ctx *ctx, ereg v, hl_op o ) {
+ emit_gen_ext(ctx, TEST, v, UNUSED, 0, o);
+ patch_instr_mode(ctx, GET_MODE(v));
+}
+
+static void emit_cmp( emit_ctx *ctx, ereg a, ereg b, hl_op o ) {
+ emit_gen_ext(ctx, CMP, a, b, 0, o);
+ patch_instr_mode(ctx, GET_MODE(a));
+}
+
+static void phi_remove_val( emit_ctx *ctx, tmp_phi *p, ereg v ) {
+ ereg_remove(&p->vals,v);
+ emit_debug("%sPHI-REM-DEP %s = %s\n", phi_prefix(ctx), val_str(p->value,p->mode), val_str(v,p->mode));
+}
+
+static void phi_add_val( emit_ctx *ctx, tmp_phi *p, ereg v, emit_block *from ) {
+ if( !p->b ) jit_assert();
+ if( IS_NULL(v) ) jit_assert();
+ if( p->value == v )
+ return;
+ if( !ereg_add(p->vals,v,from) )
+ return;
+ emit_debug("%sPHI-DEP %s:#%d = %s\n", phi_prefix(ctx), val_str(p->value,p->mode), from->id, val_str(v,p->mode));
+ if( v < 0 ) {
+ tmp_phi *p2 = GET_PHI(v);
+ phi_add(p2->ref_phis,p);
+ }
+}
+
+static ereg optimize_phi_rec( emit_ctx *ctx, tmp_phi *p ) {
+
+ if( p->locked ) jit_assert();
+ ereg same = UNUSED;
+ for_iter_key(ereg,v,p->vals) {
+ if( v == same || v == p->value )
+ continue;
+ if( !IS_NULL(same) )
+ return p->value;
+ same = v;
+ }
+ if( IS_NULL(same) )
+ return p->value; // sealed (no dep yet)
+
+ if( !phi_count(p->ref_phis) && !p->ref_blocks )
+ return same;
+
+ if( p->locked || p->opt ) jit_assert();
+
+ emit_debug("%sPHI-OPT %s = %s\n", phi_prefix(ctx), val_str(p->value,p->mode), val_str(same,p->mode));
+ p->opt = true;
+ ctx->phi_depth++;
+ linked_inf *l = p->ref_blocks;
+ while( l ) {
+ emit_block *b = (emit_block*)l->ptr;
+ if( vreg_find(b->written_vars,p->r->id) == p->value )
+ store_block_var(ctx,b,p->r,same);
+ l = l->next;
+ }
+ for_iter(phi,p2,p->ref_phis) {
+ emit_block *bsame = ereg_find(p2->vals,p->value);
+ phi_remove_val(ctx,p2,p->value);
+ phi_add_val(ctx,p2,same,bsame);
+ }
+ p->ref_blocks = NULL;
+ int count = phi_count(p->ref_phis);
+ tmp_phi **phis = phi_free(&p->ref_phis);
+ for(int i=0;iphi_depth--;
+ emit_debug("%sPHI-OPT-DONE %s = %s\n", phi_prefix(ctx), val_str(p->value,p->mode), val_str(same,p->mode));
+ return optimize_phi_rec(ctx,p);
+}
+
+static ereg emit_load_reg_block( emit_ctx *ctx, emit_block *b, vreg *r );
+
+static ereg gather_phis( emit_ctx *ctx, tmp_phi *p ) {
+ p->locked = true;
+ for_iter(blocks,b,p->b->preds) {
+ ereg r = p->r ? emit_load_reg_block(ctx, b, p->r) : p->value;
+ phi_add_val(ctx, p, r, b);
+ }
+ p->locked = false;
+ return optimize_phi_rec(ctx, p);
+}
+
+static ereg emit_load_reg_block( emit_ctx *ctx, emit_block *b, vreg *r ) {
+ ereg v = vreg_find(b->written_vars,r->id);
+ if( !IS_NULL(v) )
+ return v;
+ if( !b->sealed ) {
+ tmp_phi *p = alloc_phi(ctx,b,r);
+ emit_debug("%sPHI-SEALED %s = R%d\n",phi_prefix(ctx),val_str(p->value,p->mode),r->id);
+ v = p->value;
+ } else if( blocks_count(b->preds) == 1 )
+ v = emit_load_reg_block(ctx, blocks_get(b->preds,0), r);
+ else {
+ tmp_phi *p = alloc_phi(ctx,b,r);
+ store_block_var(ctx,b,r,p->value);
+ v = gather_phis(ctx, p);
+ }
+ store_block_var(ctx,b,r,v);
+ return v;
+}
+
+static ereg emit_load_reg( emit_ctx *ctx, vreg *r ) {
+ return emit_load_reg_block(ctx, ctx->current_block, r);
+}
+
+static void seal_block( emit_ctx *ctx, emit_block *b ) {
+ emit_debug(" SEAL #%d\n",b->id);
+ for_iter(phi,p,b->phis)
+ gather_phis(ctx, p);
+ b->sealed = true;
+}
+
+static ereg emit_call_fid( emit_ctx *ctx, int findex, ereg *args, int nargs, emit_mode mode ) {
+ einstr *e = emit_instr(ctx, CALL_FUN);
+ e->mode = mode;
+ e->a = findex;
+ store_args(ctx, e, args, nargs);
+ return mode == M_VOID ? UNUSED : new_value(ctx);
+}
+
+static void emit_call_fun( emit_ctx *ctx, vreg *dst, int findex, int count, int *args_regs ) {
+ hl_module *m = ctx->mod;
+ int fid = m->functions_indexes[findex];
+ bool isNative = fid >= m->code->nfunctions;
+ ereg *args = get_tmp_args(ctx, count);
+ for(int i=0;ifunctions_ptrs[findex], args, count, dst->t));
+ else {
+ ereg out = emit_call_fid(ctx,findex,args,count,hl_type_mode(dst->t));
+ if( out ) STORE(dst, out);
+ }
+}
+
+static vclosure *alloc_static_closure( emit_ctx *ctx, int fid ) {
+ hl_module *m = ctx->mod;
+ vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure));
+ int fidx = m->functions_indexes[fid];
+ c->hasValue = 0;
+ if( fidx >= m->code->nfunctions ) {
+ // native
+ c->t = m->code->natives[fidx - m->code->nfunctions].t;
+ c->fun = m->functions_ptrs[fid];
+ c->value = NULL;
+ } else {
+ c->t = m->code->functions[fidx].type;
+ c->fun = (void*)(int_val)fid;
+ c->value = ctx->closure_list;
+ ctx->closure_list = c;
+ }
+ return c;
+}
+
+static void *get_dynget( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_getf;
+ case HF64:
+ return hl_dyn_getd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_geti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_geti;
+ default:
+ return hl_dyn_getp;
+ }
+}
+
+static void *get_dynset( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_setf;
+ case HF64:
+ return hl_dyn_setd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_seti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_seti;
+ default:
+ return hl_dyn_setp;
+ }
+}
+
+static void *get_dyncast( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_castf;
+ case HF64:
+ return hl_dyn_castd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_casti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_casti;
+ default:
+ return hl_dyn_castp;
+ }
+}
+
+static void emit_store_size( emit_ctx *ctx, ereg dst, int dst_offset, ereg src, int src_offset, int total_size ) {
+ int offset = 0;
+ while( offset < total_size) {
+ int remain = total_size - offset;
+ hl_type *ct = remain >= HL_WSIZE ? &hlt_bytes : (remain >= 4 ? &hlt_i32 : &hlt_ui8);
+ STORE_MEM(dst, dst_offset+offset, LOAD_MEM(src,src_offset+offset,ct));
+ offset += hl_type_size(ct);
+ }
+}
+
+
+static ereg emit_conv( emit_ctx *ctx, ereg v, emit_mode from, emit_mode to, bool _unsigned ) {
+ if( from == to && !_unsigned )
+ return emit_gen(ctx,MOV,v,UNUSED,to);
+ if( IS_FLOAT(from) != IS_FLOAT(to) )
+ return emit_gen_ext(ctx, _unsigned ? CONV_UNSIGNED : CONV, v, UNUSED, to, from);
+ return emit_gen_ext(ctx, CONV, v, UNUSED, to, from);
+}
+
+static bool dyn_need_type( hl_type *t ) {
+ return !(t->kind == HF32 || t->kind == HF64 || t->kind == HI64 || t->kind == HGUID);
+}
+
+static void emit_dyn_cast( emit_ctx *ctx, ereg v, hl_type *t, vreg *dst ) {
+ hl_type *dt = dst->t;
+ if( t->kind == HNULL && t->tparam->kind == dt->kind ) {
+ emit_test(ctx, v, OJNotNull);
+ int jnot = emit_jump(ctx, true);
+ ereg v1 = LOAD_CONST(0,dt);
+ STORE(dst, v1);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jnot);
+ ereg v2 = LOAD_MEM(v,HDYN_VALUE,dt);
+ STORE(dst, v2);
+ patch_jump(ctx, jend);
+ return;
+ }
+ bool need_dyn = dyn_need_type(dt);
+ ereg st = emit_gen_size(ctx, ALLOC_STACK, HL_WSIZE);
+ STORE_MEM(st, 0, v);
+ ereg args[3];
+ args[0] = st;
+ args[1] = LOAD_CONST_PTR(t);
+ if( need_dyn ) args[2] = LOAD_CONST_PTR(dt);
+ ereg r = emit_native_call(ctx, get_dyncast(dt), args, need_dyn ? 3 : 2, dt);
+ STORE(dst, r);
+}
+
+static void emit_opcode( emit_ctx *ctx, hl_opcode *o );
+
+static void remap_phi_reg( emit_ctx *ctx, ereg *r ) {
+ if( *r >= 0 || IS_NULL(*r) )
+ return;
+ tmp_phi *p = GET_PHI(*r);
+ while( p->final_id < 0 ) {
+ if( p->target >= 0 ) {
+ *r = p->target;
+ return;
+ }
+ p = GET_PHI(p->target);
+ }
+ if( p->final_id == 0 )
+ return;
+ *r = -p->final_id; // new phis
+}
+
+static void emit_write_block( emit_ctx *ctx, emit_block *b ) {
+ jit_ctx *jit = ctx->jit;
+ eblock *bl = jit->blocks + b->id;
+ bl->start_pos = b->id == 0 ? 0 : b->start_pos;
+ bl->end_pos = b->end_pos;
+ bl->pred_count = blocks_count(b->preds);
+ bl->next_count = blocks_count(b->nexts);
+ bl->preds = (int*)hl_malloc(&jit->falloc,sizeof(int)*bl->pred_count);
+ bl->nexts = (int*)hl_malloc(&jit->falloc,sizeof(int)*bl->next_count);
+ for(int i=0;ipred_count;i++)
+ bl->preds[i] = blocks_get(b->preds,i)->id;
+ for(int i=0;inext_count;i++)
+ bl->nexts[i] = blocks_get(b->nexts,i)->id;
+ // write phis
+ {
+ for_iter(phi,p,b->phis)
+ if( p->final_id >= 0 )
+ bl->phi_count++;
+ }
+ bl->phis = (ephi*)hl_zalloc(&jit->falloc,sizeof(ephi)*bl->phi_count);
+ jit->phi_count += bl->phi_count;
+ int i = 0;
+ for_iter(phi,p,b->phis) {
+ if( p->final_id < 0 )
+ continue;
+ ephi *p2 = bl->phis + i++;
+ if( p->final_id == 0 )
+ p2->value = p->value;
+ else
+ p2->value = -p->final_id;
+ p2->mode = p->mode;
+ p2->nvalues = ereg_count(p->vals);
+ p2->values = (ereg*)hl_malloc(&jit->falloc,sizeof(ereg)*p2->nvalues);
+ p2->blocks = (ereg*)hl_malloc(&jit->falloc,sizeof(int)*p2->nvalues);
+ int k = 0;
+ for_iter_key(ereg,v,p->vals) {
+ remap_phi_reg(ctx, &v);
+ p2->values[k++] = v;
+ }
+ k = 0;
+ for_iter(ereg,bfrom,p->vals)
+ p2->blocks[k++] = bfrom->id;
+ }
+}
+
+void hl_emit_remap_jumps( emit_ctx *ctx, void *_jumps, einstr *instrs, int *pos_map ) {
+ int_arr jumps = *(int_arr*)_jumps;
+ int i = 0;
+ while( i < int_arr_count(jumps) ) {
+ int pos = int_arr_get(jumps,i++);
+ int target = int_arr_get(jumps,i++);
+ einstr *e = instrs + pos;
+ if( e->op == JUMP_TABLE ) {
+ int *args = (int*)hl_emit_get_args(ctx, e);
+ for(int k=0;knargs;k++)
+ args[k] = pos_map[target + args[k]] - (pos + 1);
+ } else
+ e->size_offs = pos_map[target] - (pos + 1);
+ }
+ int_arr_reset((int_arr*)_jumps);
+}
+
+void hl_emit_flush( jit_ctx *jit ) {
+ emit_ctx *ctx = jit->emit;
+ if( ctx->flushed ) return;
+ ctx->flushed = true;
+ ctx->pos_map[ctx->fun->nops] = ctx->emit_pos;
+ ctx->current_block->end_pos = ctx->emit_pos;
+ hl_emit_remap_jumps(ctx,&ctx->jump_regs, ctx->instrs, ctx->pos_map);
+ jit->instrs = ctx->instrs;
+ jit->instr_count = ctx->emit_pos;
+ jit->emit_pos_map = ctx->pos_map;
+ jit->phi_count = 0;
+ jit->block_count = ctx->current_block->id + 1;
+ jit->blocks = hl_zalloc(&jit->falloc,sizeof(eblock) * jit->block_count);
+ jit->value_count = int_arr_count(ctx->values);
+ jit->values_writes = ctx->values.values;
+ for_iter(blocks,b,ctx->blocks)
+ emit_write_block(ctx,b);
+}
+
+void hl_emit_reg_iter( jit_ctx *jit, einstr *e, void *ctx, void (*iter_reg)( void *, ereg * ) ) {
+ switch( e->op ) {
+ case CALL_REG:
+ iter_reg(ctx,&e->a);
+ case CALL_FUN:
+ case CALL_PTR:
+ {
+ int i;
+ ereg *args = hl_emit_get_args(jit->emit, e);
+ for(i=0;inargs;i++)
+ iter_reg(ctx, args + i);
+ }
+ break;
+ case LOAD_CONST:
+ case PUSH_CONST:
+ // skip
+ break;
+ default:
+ if( !IS_NULL(e->a) ) {
+ iter_reg(ctx,&e->a);
+ if( !IS_NULL(e->b) )
+ iter_reg(ctx,&e->b);
+ }
+ break;
+ }
+}
+
+ereg **hl_emit_get_regs( einstr *e, int *count ) {
+ static ereg *tmp[2];
+ int k = 0;
+ switch( e->op ) {
+ case CALL_REG:
+ case CALL_FUN:
+ case CALL_PTR:
+ jit_assert();
+ break;
+ case LOAD_CONST:
+ case PUSH_CONST:
+ // skip
+ break;
+ default:
+ if( !IS_NULL(e->a) ) {
+ tmp[k++] = &e->a;
+ if( !IS_NULL(e->b) )
+ tmp[k++] = &e->b;
+ }
+ break;
+ }
+ *count = k;
+ return tmp;
+}
+
+static void hl_emit_clean_phis( emit_ctx *ctx ) {
+ for(int i=0;iphi_count;i++) {
+ tmp_phi *p = ctx->phis[i];
+ tmp_phi *cur = p;
+ ereg r;
+ while( true ) {
+ cur->opt = false;
+ r = optimize_phi_rec(ctx,cur);
+ if( r >= 0 || r == cur->value ) break;
+ cur = GET_PHI(r);
+ }
+ p->target = r;
+ }
+ int new_phis = 0;
+ for(int i=0;iphi_count;i++) {
+ tmp_phi *p = ctx->phis[i];
+ if( p->target == p->value )
+ p->final_id = ++new_phis;
+ else
+ p->final_id = -1;
+ }
+ for(int i=0;iemit_pos;i++)
+ hl_emit_reg_iter(ctx->jit, ctx->instrs + i, ctx, (void*)remap_phi_reg);
+}
+
+void hl_emit_function( jit_ctx *jit ) {
+ emit_ctx *ctx = jit->emit;
+ hl_function *f = jit->fun;
+ int i;
+ ctx->mod = jit->mod;
+ ctx->fun = f;
+ ctx->emit_pos = 0;
+ ctx->trap_count = 0;
+ ctx->phi_count = 0;
+ ctx->flushed = false;
+ int_arr_free(&ctx->args_data);
+ int_arr_free(&ctx->jump_regs);
+ int_arr_free(&ctx->values);
+ blocks_free(&ctx->blocks);
+ int_arr_add(ctx->values,-1);
+ ctx->current_block = alloc_block(ctx);
+ ctx->current_block->sealed = true;
+ ctx->arrival_points = NULL;
+ emit_debug("---- begin [%X] ----\n",f->findex);
+ if( f->nregs > ctx->max_regs ) {
+ free(ctx->vregs);
+ ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1));
+ if( ctx->vregs == NULL ) jit_assert();
+ for(i=0;inregs;i++)
+ R(i)->id = i;
+ ctx->max_regs = f->nregs;
+ }
+
+ if( f->nops >= ctx->pos_map_size ) {
+ free(ctx->pos_map);
+ ctx->pos_map = (int*)malloc(sizeof(int) * (f->nops+1));
+ if( ctx->pos_map == NULL ) jit_assert();
+ ctx->pos_map_size = f->nops;
+ }
+
+ for(i=0;inregs;i++) {
+ vreg *r = R(i);
+ r->t = f->regs[i];
+ r->stored = UNUSED;
+ }
+
+ emit_gen_size(ctx, BLOCK, 0);
+ emit_gen(ctx,ENTER,UNUSED,UNUSED,M_NONE);
+ for(i=0;itype->fun->nargs;i++) {
+ hl_type *t = f->type->fun->args[i];
+ STORE(R(i), emit_gen(ctx, LOAD_ARG, UNUSED, UNUSED, hl_type_mode(t)));
+ }
+
+ for(int op_pos=0;op_posnops;op_pos++) {
+ ctx->op_pos = op_pos;
+ if( ctx->emit_pos > 0 && ctx->instrs[ctx->emit_pos-1].op == BLOCK )
+ ctx->pos_map[op_pos] = ctx->emit_pos-1;
+ else
+ ctx->pos_map[op_pos] = ctx->emit_pos;
+ if( ctx->arrival_points ) {
+ if( ctx->arrival_points->id < op_pos )
+ jit_assert();
+ while( ctx->arrival_points && ctx->arrival_points->id == op_pos && !split_block(ctx) ) {
+ emit_block *b = ctx->arrival_points->ptr;
+ for_iter(blocks,bp,ctx->current_block->preds) {
+ if( b == bp ) { b = NULL; break; }
+ }
+ if( b ) block_add_pred(ctx, ctx->current_block, b);
+ ctx->arrival_points = ctx->arrival_points->next;
+ }
+ if( ctx->trap_count && ctx->traps[ctx->trap_count-1].target == ctx->op_pos )
+ ctx->trap_count--;
+ }
+ emit_opcode(ctx,f->ops + op_pos);
+ }
+ // emit a break if we're not supposed to reach here : will fix RtlUnwind on windows too.
+ if( f->nops == 0 || f->ops[f->nops-1].op != ORet )
+ BREAK();
+ if( ctx->arrival_points )
+ jit_assert();
+
+ hl_emit_clean_phis(ctx);
+ hl_emit_flush(ctx->jit);
+ if( ctx->wait_seal ) jit_assert();
+}
+
+void hl_emit_alloc( jit_ctx *jit ) {
+ emit_ctx *ctx = (emit_ctx*)malloc(sizeof(emit_ctx));
+ if( ctx == NULL ) jit_assert();
+ memset(ctx,0,sizeof(emit_ctx));
+ ctx->jit = jit;
+ jit->emit = ctx;
+ if( sizeof(einstr) != 16 ) jit_assert();
+}
+
+void hl_emit_free( jit_ctx *jit ) {
+ emit_ctx *ctx = jit->emit;
+ free(ctx->vregs);
+ free(ctx->instrs);
+ free(ctx->pos_map);
+ free(ctx);
+ jit->emit = NULL;
+}
+
+void hl_emit_final( jit_ctx *jit ) {
+ emit_ctx *ctx = jit->emit;
+ vclosure *l = ctx->closure_list;
+ while( l ) {
+ vclosure *n = (vclosure*)l->value;
+ l->value = NULL;
+ l->fun = jit->final_code + (int_val)jit->mod->functions_ptrs[(int_val)l->fun];
+ l = n;
+ }
+ ctx->closure_list = NULL;
+}
+
+static bool seal_block_rec( emit_ctx *ctx, emit_block *b, int target ) {
+ if( b->start_pos < target )
+ return false;
+ if( b->start_pos == target ) {
+ b->wait_nexts--;
+ block_add_pred(ctx, b, ctx->current_block);
+ while( b && b->wait_nexts == 0 && ctx->wait_seal == b ) {
+ seal_block(ctx,b);
+ b = b->wait_seal_next;
+ ctx->wait_seal = b;
+ }
+ return true;
+ }
+ for_iter(blocks,p,b->preds)
+ if( p->start_pos < b->start_pos && seal_block_rec(ctx,p,target) )
+ return true;
+ return false;
+}
+
+static void register_block_jump( emit_ctx *ctx, int offs, bool cond ) {
+ int jidx = ctx->emit_pos;
+ emit_gen(ctx, cond ? JCOND : JUMP, UNUSED, UNUSED, 0);
+ register_jump(ctx, jidx, offs);
+ if( offs < 0 ) {
+ int target = ctx->pos_map[ctx->op_pos + 1 + offs];
+ emit_block *b = ctx->current_block;
+ if( !seal_block_rec(ctx, b, target) ) jit_assert();
+ }
+}
+
+static void prepare_loop_block( emit_ctx *ctx ) {
+ emit_block *b = ctx->current_block;
+ // gather all backward jumps to know when the block will be finished
+ for(int i=ctx->op_pos+1;ifun->nops;i++) {
+ hl_opcode *op = &ctx->fun->ops[i];
+ int offs = 0;
+ switch( op->op ) {
+ case OJFalse:
+ case OJTrue:
+ case OJNotNull:
+ case OJNull:
+ offs = op->p2;
+ break;
+ case OJAlways:
+ offs = op->p1;
+ break;
+ case OJEq:
+ case OJNotEq:
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJNotLt:
+ case OJNotGte:
+ offs = op->p3;
+ break;
+ default:
+ break;
+ }
+ if( offs < 0 && i + 1 + offs == ctx->op_pos ) {
+ emit_debug(" WAIT @%X\n",i);
+ b->wait_nexts++;
+ if( b->sealed ) {
+ b->sealed = false;
+ b->wait_seal_next = ctx->wait_seal;
+ ctx->wait_seal = b;
+ }
+ }
+ }
+}
+
+static void emit_jump_dyn( emit_ctx *ctx, hl_op op, hl_type *at, ereg a, hl_type *bt, ereg b, int offset ) {
+ if( at->kind == HDYN || bt->kind == HDYN || at->kind == HFUN || bt->kind == HFUN ) {
+ ereg args[2] = { a, b };
+ ereg ret = emit_native_call(ctx,hl_dyn_compare,args,2,&hlt_i32);
+ if( op == OJSGt || op == OJSGte ) {
+ emit_cmp(ctx, ret, LOAD_CONST(hl_invalid_comparison,&hlt_i32), OJEq);
+ int jinvalid = emit_jump(ctx, true);
+ emit_test(ctx, ret, op);
+ register_block_jump(ctx, offset, true);
+ patch_jump(ctx, jinvalid);
+ return;
+ }
+ emit_test(ctx, ret, op);
+ // continue
+ } else switch( at->kind ) {
+ case HTYPE:
+ {
+ ereg args[2] = { a, b };
+ ereg ret = emit_native_call(ctx,hl_same_type,args,2,&hlt_bool);
+ emit_test(ctx, emit_gen_ext(ctx,UNOP,ret,UNUSED,M_I32,ONot), op);
+ }
+ break;
+ case HNULL:
+ {
+ if( op == OJEq ) {
+ // if( a == b || (a && b && a->v == b->v) ) goto
+ emit_cmp(ctx,a,b,OJEq);
+ register_block_jump(ctx,offset,true);
+ emit_test(ctx,a,OJNull);
+ int ja = emit_jump(ctx,true);
+ emit_test(ctx,b,OJNull);
+ int jb = emit_jump(ctx,true);
+ hl_type *vt = at->tparam;
+ emit_cmp(ctx, LOAD_MEM(a,HDYN_VALUE,vt), LOAD_MEM(b,HDYN_VALUE,vt), OJEq);
+ register_block_jump(ctx,offset,true);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ } else if( op == OJNotEq ) {
+ // if( a != b && (!a || !b || a->v != b->v) ) goto
+ emit_cmp(ctx,a,b,OJEq);
+ int jeq = emit_jump(ctx,true);
+ emit_test(ctx,a,OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ emit_test(ctx,b,OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ hl_type *vt = at->tparam;
+ emit_cmp(ctx, LOAD_MEM(a,HDYN_VALUE,vt), LOAD_MEM(b,HDYN_VALUE,vt), OJNull);
+ add_jump_target(ctx, 0);
+ int jcmp = emit_jump(ctx,true);
+ register_block_jump(ctx,offset,true);
+ patch_jump(ctx,jcmp);
+ patch_jump(ctx,jeq);
+ } else
+ jit_assert();
+ }
+ return;
+ case HVIRTUAL:
+ if( bt->kind == HOBJ ) {
+ if( op == OJEq ) {
+ // if( a == b || (a && a->value == b) ) goto
+ emit_cmp(ctx, a, b, OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ emit_test(ctx, a, OJNull);
+ int jnot = emit_jump(ctx, true);
+ emit_cmp(ctx, LOAD_MEM_PTR(a,HL_WSIZE), b, OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ patch_jump(ctx, jnot);
+ } else if( op == OJNotEq ) {
+ // if( a != b && (!a || a->value != b) ) goto
+ emit_cmp(ctx, a, b, OJEq);
+ int jsame = emit_jump(ctx, true);
+ emit_test(ctx, a, OJNull);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ emit_cmp(ctx, LOAD_MEM_PTR(a,HL_WSIZE), b, OJNotEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ patch_jump(ctx,jsame);
+ } else
+ jit_assert();
+ } else {
+ if( op == OJEq ) {
+ // if( a == b || (a && b && a->value && a->value == b->value) ) goto
+ emit_cmp(ctx, a, b, OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ emit_test(ctx, a, OJNull);
+ int ja = emit_jump(ctx, true);
+ emit_test(ctx, b, OJNull);
+ int jb = emit_jump(ctx, true);
+ ereg va = LOAD_MEM_PTR(a,HL_WSIZE);
+ emit_test(ctx, va, OJNull);
+ int jva = emit_jump(ctx, true);
+ ereg vb = LOAD_MEM_PTR(b,HL_WSIZE);
+ emit_cmp(ctx, va, vb, OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jva);
+ } else if( op == OJNotEq ) {
+ // if( a != b && (!a || !b || !a->value || a->value != b->value) ) goto
+ emit_cmp(ctx, a, b, OJEq);
+ int jeq1 = emit_jump(ctx, true);
+ emit_test(ctx, a, OJNull);
+ int ja = emit_jump(ctx, true);
+ emit_test(ctx, b, OJNull);
+ int jb = emit_jump(ctx, true);
+ ereg va = LOAD_MEM_PTR(a,HL_WSIZE);
+ emit_test(ctx, va, OJNull);
+ int jva = emit_jump(ctx, true);
+ ereg vb = LOAD_MEM_PTR(b,HL_WSIZE);
+ emit_cmp(ctx, va, vb, OJEq);
+ int jeq2 = emit_jump(ctx, true);
+ split_block(ctx);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jva);
+ register_block_jump(ctx,offset,false);
+ split_block(ctx);
+ patch_jump(ctx,jeq1);
+ patch_jump(ctx,jeq2);
+ } else
+ jit_assert();
+ }
+ return;
+ case HOBJ:
+ case HSTRUCT:
+ if( bt->kind == HVIRTUAL ) {
+ emit_jump_dyn(ctx,op,bt,b,at,a,offset); // inverse
+ return;
+ }
+ if( hl_get_obj_rt(at)->compareFun ) {
+ ereg args[] = {a,b};
+ switch( op ) {
+ case OJEq:
+ {
+ // if( a == b || (a && b && cmp(a,b) == 0) ) goto
+ emit_cmp(ctx,a,b,OJEq);
+ int jeq = emit_jump(ctx, true);
+ emit_test(ctx,a,OJNull);
+ int ja = emit_jump(ctx, true);
+ emit_test(ctx,b,OJNull);
+ int jb = emit_jump(ctx, true);
+ emit_test(ctx, emit_call_fid(ctx,(int)(int_val)at->obj->rt->compareFun,args,2,M_I32),OJNotNull);
+ int jcmp = emit_jump(ctx, true);
+ patch_jump(ctx, jeq);
+ register_block_jump(ctx, offset, false);
+ split_block(ctx);
+ patch_jump(ctx, ja);
+ patch_jump(ctx, jb);
+ patch_jump(ctx, jcmp);
+ }
+ break;
+ case OJNotEq:
+ {
+ // if( a != b && (!a || !b || cmp(a,b) != 0) ) goto
+ emit_cmp(ctx,a,b,OJEq);
+ add_jump_target(ctx, 0);
+ int jeq = emit_jump(ctx, true);
+ emit_test(ctx,a,OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ emit_test(ctx,b,OJEq);
+ register_block_jump(ctx,offset,true);
+ split_block(ctx);
+ emit_test(ctx, emit_call_fid(ctx,(int)(int_val)at->obj->rt->compareFun,args,2,M_I32),OJNotNull);
+ register_block_jump(ctx,offset,true);
+ patch_jump(ctx,jeq);
+ }
+ break;
+ default:
+ {
+ // if( a && b && cmp(a,b) ~op~ 0 ) goto
+ emit_test(ctx,a,OJNull);
+ int ja = emit_jump(ctx, true);
+ emit_test(ctx,b,OJNull);
+ int jb = emit_jump(ctx, true);
+ emit_cmp(ctx, emit_call_fid(ctx,(int)(int_val)at->obj->rt->compareFun,args,2,M_I32), LOAD_CONST(0,&hlt_i32),op);
+ register_block_jump(ctx,offset,true);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ }
+ break;
+ }
+ return;
+ }
+ // fallthrough
+ default:
+ emit_cmp(ctx, a, b, op);
+ break;
+ }
+ register_block_jump(ctx, offset, true);
+}
+
+static void emit_opcode( emit_ctx *ctx, hl_opcode *o ) {
+ vreg *dst = R(o->p1);
+ vreg *ra = R(o->p2);
+ vreg *rb = R(o->p3);
+ hl_module *m = ctx->mod;
+#ifdef HL_DEBUG
+ int uid = (ctx->fun->findex << 16) | ctx->op_pos;
+ __ignore(&uid);
+#endif
+ switch( o->op ) {
+ case OMov:
+ case OUnsafeCast:
+ STORE(dst, emit_gen(ctx,MOV,LOAD(ra),UNUSED,hl_type_mode(ra->t)));
+ break;
+ case OInt:
+ STORE(dst, LOAD_CONST(m->code->ints[o->p2], dst->t));
+ break;
+ case OBool:
+ STORE(dst, LOAD_CONST(o->p2, &hlt_bool));
+ break;
+ case ONull:
+ STORE(dst, LOAD_CONST(0, dst->t));
+ break;
+ case OFloat:
+ {
+ union {
+ float f;
+ double d;
+ uint64 i;
+ } v;
+ if( dst->t->kind == HF32 ) {
+ v.i = 0;
+ v.f = (float)m->code->floats[o->p2];
+ } else
+ v.d = m->code->floats[o->p2];
+ STORE(dst, LOAD_CONST(v.i, dst->t));
+ }
+ break;
+ case OString:
+ STORE(dst, LOAD_CONST_PTR(hl_get_ustring(m->code,o->p2)));
+ break;
+ case OBytes:
+ {
+ char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2];
+ STORE(dst,LOAD_CONST_PTR(b));
+ }
+ break;
+ case OGetGlobal:
+ {
+ int offs = m->globals_indexes[o->p2];
+ STORE(dst, LOAD_MEM_PTR(LOAD_CONST_PTR(m->globals_data),offs));
+ }
+ break;
+ case OSetGlobal:
+ {
+ int offs = m->globals_indexes[o->p1];
+ STORE_MEM(LOAD_CONST_PTR(m->globals_data),offs,LOAD(ra));
+ }
+ break;
+ case OCall0:
+ emit_call_fun(ctx, dst, o->p2, 0, NULL);
+ break;
+ case OCall1:
+ emit_call_fun(ctx, dst, o->p2, 1, &o->p3);
+ break;
+ case OCall2:
+ {
+ int args[2] = { o->p3, (int)(int_val)o->extra };
+ emit_call_fun(ctx, dst, o->p2, 2, args);
+ }
+ break;
+ case OCall3:
+ {
+ int args[3] = { o->p3, o->extra[0], o->extra[1] };
+ emit_call_fun(ctx, dst, o->p2, 3, args);
+ }
+ break;
+ case OCall4:
+ {
+ int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] };
+ emit_call_fun(ctx, dst, o->p2, 4, args);
+ }
+ break;
+ case OCallN:
+ emit_call_fun(ctx, dst, o->p2, o->p3, o->extra);
+ break;
+ case OSub:
+ case OAdd:
+ case OMul:
+ case OSDiv:
+ case OUDiv:
+ case OShl:
+ case OSShr:
+ case OUShr:
+ case OAnd:
+ case OOr:
+ case OXor:
+ case OSMod:
+ case OUMod:
+ {
+ ereg va = LOAD(ra);
+ ereg vb = LOAD(rb);
+ ereg r;
+ if( (dst->t->kind == HF32 || dst->t->kind == HF64) && o->op == OSMod ) {
+ ereg args[] = {va,vb};
+ r = emit_native_call(ctx, dst->t->kind == HF32 ? (void*)fmodf : (void*)fmod, args, 2, dst->t);
+ } else {
+ r = emit_gen_ext(ctx, BINOP, va, vb, hl_type_mode(dst->t), o->op);
+ }
+ STORE(dst, r);
+ }
+ break;
+ case ONeg:
+ STORE(dst, emit_gen_ext(ctx, UNOP, LOAD(ra), UNUSED, hl_type_mode(dst->t), o->op));
+ break;
+ case ONot:
+ STORE(dst, emit_gen_ext(ctx, UNOP, LOAD(ra), LOAD_CONST(1,&hlt_i32), hl_type_mode(dst->t), OXor));
+ break;
+ case OJFalse:
+ case OJTrue:
+ case OJNotNull:
+ case OJNull:
+ {
+ emit_test(ctx, LOAD(dst), o->op);
+ register_block_jump(ctx, o->p2, true);
+ add_jump_target(ctx, 0);
+ }
+ break;
+ case OJEq:
+ case OJNotEq:
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJNotLt:
+ case OJNotGte:
+ emit_jump_dyn(ctx,o->op,dst->t,LOAD(dst),ra->t,LOAD(ra),o->p3);
+ add_jump_target(ctx, 0);
+ break;
+ case OJAlways:
+ register_block_jump(ctx, o->p1, false);
+ break;
+ case OToDyn:
+ if( ra->t->kind == HBOOL ) {
+ ereg arg = LOAD(ra);
+ STORE(dst, emit_native_call(ctx,hl_alloc_dynbool,&arg,1,&hlt_dyn));
+ } else {
+ ereg arg = LOAD_CONST_PTR(ra->t);
+ ereg ret = emit_native_call(ctx,hl_alloc_dynamic,&arg,1,&hlt_dyn);
+ STORE_MEM(ret,HDYN_VALUE,LOAD(ra));
+ STORE(dst, ret);
+ }
+ break;
+ case OToSFloat:
+ case OToInt:
+ case OToUFloat:
+ STORE(dst, emit_conv(ctx,LOAD(ra),hl_type_mode(ra->t),hl_type_mode(dst->t), o->op == OToUFloat));
+ break;
+ case ORet:
+ emit_gen(ctx, RET, dst->t->kind == HVOID ? UNUSED : LOAD(dst), 0, M_NORET);
+ patch_instr_mode(ctx, hl_type_mode(dst->t));
+ break;
+ case OIncr:
+ case ODecr:
+ STORE(dst, emit_gen_ext(ctx,UNOP,LOAD(dst),UNUSED,hl_type_mode(dst->t),o->op));
+ break;
+ case ONew:
+ {
+ ereg arg = UNUSED;
+ void *allocFun = NULL;
+ int nargs = 1;
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ allocFun = hl_alloc_obj;
+ break;
+ case HDYNOBJ:
+ allocFun = hl_alloc_dynobj;
+ nargs = 0;
+ break;
+ case HVIRTUAL:
+ allocFun = hl_alloc_virtual;
+ break;
+ default:
+ jit_assert();
+ }
+ if( nargs ) arg = LOAD_CONST_PTR(dst->t);
+ STORE(dst, emit_native_call(ctx,allocFun,&arg,nargs,dst->t));
+ }
+ break;
+ case OInstanceClosure:
+ {
+ ereg args[3];
+ args[0] = LOAD_CONST_PTR(m->code->functions[m->functions_indexes[o->p2]].type);
+ einstr *e = emit_instr(ctx, LOAD_FUN);
+ e->mode = M_PTR;
+ e->size_offs = o->p2;
+ args[1] = new_value(ctx);
+ args[2] = LOAD(rb);
+ STORE(dst, emit_native_call(ctx,hl_alloc_closure_ptr,args,3,dst->t));
+ }
+ break;
+ case OVirtualClosure:
+ {
+ hl_type *t = NULL;
+ hl_type *ot = ra->t;
+ while( t == NULL ) {
+ int i;
+ for(i=0;iobj->nproto;i++) {
+ hl_obj_proto *pp = ot->obj->proto + i;
+ if( pp->pindex == o->p3 ) {
+ t = m->code->functions[m->functions_indexes[pp->findex]].type;
+ break;
+ }
+ }
+ ot = ot->obj->super;
+ }
+ ereg args[3];
+ ereg obj = LOAD(ra);
+ args[0] = LOAD_CONST_PTR(t);
+ args[1] = LOAD_OBJ_METHOD(obj,o->p3);
+ args[2] = obj;
+ STORE(dst, emit_native_call(ctx,hl_alloc_closure_ptr,args,3,dst->t));
+ }
+ break;
+ case OCallClosure:
+ if( ra->t->kind == HDYN ) {
+ int i;
+ ereg st = emit_gen_size(ctx, ALLOC_STACK, o->p3 * HL_WSIZE);
+ for(i=0;ip3;i++) {
+ vreg *r = R(o->extra[i]);
+ if( !hl_is_dynamic(r->t) ) jit_assert();
+ STORE_MEM(st,i*HL_WSIZE,LOAD(r));
+ }
+ ereg args[3];
+ args[0] = LOAD(ra);
+ args[1] = st;
+ args[2] = LOAD_CONST(o->p3,&hlt_i32);
+ emit_dyn_cast(ctx,emit_native_call(ctx,hl_dyn_call,args,3,dst->t),ra->t,dst);
+ } else {
+ ereg r = LOAD(ra);
+ ereg *args = get_tmp_args(ctx,o->p3+1);
+ // Code for if( c->hasValue ) c->fun(c->value,args) else c->fun(args)
+ ereg has = LOAD_MEM(r,HL_WSIZE*2,&hlt_i32);
+ emit_test(ctx, has, OJNull);
+ int jidx = emit_jump(ctx, true);
+ int i;
+ args[0] = LOAD_MEM_PTR(r,HL_WSIZE * 3);
+ for(i=0;ip3;i++)
+ args[i+1] = LOAD(R(o->extra[i]));
+ ereg v1 = emit_dyn_call(ctx,LOAD_MEM_PTR(r,HL_WSIZE),args,o->p3 + 1,dst->t);
+ STORE(dst, v1);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ for(i=0;ip3;i++)
+ args[i] = LOAD(R(o->extra[i]));
+ ereg v2 = emit_dyn_call(ctx,LOAD_MEM_PTR(r,HL_WSIZE),args,o->p3,dst->t);
+ STORE(dst, v2);
+ patch_jump(ctx, jend);
+ }
+ break;
+ case OStaticClosure:
+ {
+ vclosure *c = alloc_static_closure(ctx,o->p2);
+ STORE(dst, LOAD_CONST_PTR(c));
+ }
+ break;
+ case OField:
+ {
+ switch( ra->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(ra->t);
+ ereg r = LOAD(ra);
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t;
+ if( ft->kind == HPACKED ) {
+ STORE(dst,OFFSET(r, UNUSED, 0, rt->fields_indexes[o->p3]));
+ break;
+ }
+ }
+ STORE(dst, LOAD_MEM(r,rt->fields_indexes[o->p3],dst->t));
+ }
+ break;
+ case HVIRTUAL:
+ // code for : if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt)
+ {
+ ereg obj = LOAD(ra);
+ ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p3);
+ emit_test(ctx, field, OJNull);
+ int jidx = emit_jump(ctx, true);
+ ereg v1 = LOAD_MEM(field,0,dst->t);
+ STORE(dst, v1);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[3];
+ args[0] = obj;
+ args[1] = LOAD_CONST(ra->t->virt->fields[o->p3].hashed_name,&hlt_i32);
+ if( need_type ) args[2] = LOAD_CONST_PTR(dst->t);
+ ereg v2 = emit_native_call(ctx,get_dynget(dst->t),args,need_type?3:2,dst->t);
+ STORE(dst, v2);
+ patch_jump(ctx, jend);
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ break;
+ case OSetField:
+ {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ ereg obj = LOAD(dst);
+ ereg val = LOAD(rb);
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ int field_pos = rt->fields_indexes[o->p2];
+ if( rb->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ emit_store_size(ctx,obj,field_pos,val,0,hl_get_obj_rt(ft->tparam)->size);
+ break;
+ }
+ }
+ STORE_MEM(obj,field_pos, val);
+ }
+ break;
+ case HVIRTUAL:
+ // code for : if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v)
+ {
+ ereg obj = LOAD(dst);
+ ereg val = LOAD(rb);
+ ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p2);
+ emit_test(ctx, field, OJNull);
+ int jidx = emit_jump(ctx, true);
+ STORE_MEM(field, 0, val);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[4];
+ args[0] = obj;
+ args[1] = LOAD_CONST(dst->t->virt->fields[o->p2].hashed_name,&hlt_i32);
+ if( need_type ) {
+ args[2] = LOAD_CONST_PTR(rb->t);
+ args[3] = val;
+ } else {
+ args[2] = val;
+ }
+ emit_native_call(ctx,get_dynset(dst->t),args,need_type?4:3,dst->t);
+ patch_jump(ctx, jend);
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ break;
+ case OGetThis:
+ {
+ vreg *r = R(0);
+ ereg obj = LOAD(r);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ int field_pos = rt->fields_indexes[o->p2];
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ STORE(dst, OFFSET(obj, UNUSED, 0, field_pos));
+ break;
+ }
+ }
+ STORE(dst, LOAD_MEM(obj, field_pos, dst->t));
+ }
+ break;
+ case OSetThis:
+ {
+ vreg *r = R(0);
+ ereg obj = LOAD(r);
+ ereg val = LOAD(ra);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ int field_pos = rt->fields_indexes[o->p1];
+ if( ra->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t;
+ if( ft->kind == HPACKED ) {
+ emit_store_size(ctx, obj, field_pos, val, 0, hl_get_obj_rt(ft->tparam)->size);
+ break;
+ }
+ }
+ STORE_MEM(obj,field_pos,val);
+ }
+ break;
+ case OCallThis:
+ {
+ int i;
+ int nargs = o->p3 + 1;
+ ereg obj = LOAD(R(0));
+ ereg *args = get_tmp_args(ctx, nargs);
+ args[0] = obj;
+ for(i=1;iextra[i-1]));
+ ereg fun = LOAD_OBJ_METHOD(obj, o->p2);
+ STORE(dst, emit_dyn_call(ctx,fun,args,nargs,dst->t));
+ }
+ break;
+ case OCallMethod:
+ {
+ vreg *r = R(o->extra[0]);
+ ereg obj = LOAD(r);
+ switch( r->t->kind ) {
+ case HOBJ:
+ {
+ int i;
+ int nargs = o->p3;
+ ereg *args = get_tmp_args(ctx, nargs);
+ for(i=0;iextra[i]));
+ ereg fun = LOAD_OBJ_METHOD(obj, o->p2);
+ STORE(dst, emit_dyn_call(ctx,fun,args,nargs,dst->t));
+ }
+ break;
+ case HVIRTUAL:
+ // code for : if( (fun=hl_vfields(o)[f]) ) dst = fun(o->value,args...); else dst = hl_dyn_call_obj(o->value,ft,field,args,&ret)
+ {
+ vreg *_o = R(o->extra[0]);
+ ereg obj = LOAD(_o);
+ ereg fun = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p2);
+ emit_test(ctx, fun, OJNull);
+ int jidx = emit_jump(ctx, true);
+
+ int nargs = o->p3;
+ ereg *args = get_tmp_args(ctx, nargs);
+ int i;
+ args[0] = LOAD_MEM_PTR(obj,HL_WSIZE);
+ for(i=1;iextra[i]));
+ ereg v1 = emit_dyn_call(ctx,fun,args,nargs,dst->t);
+ STORE(dst, v1);
+
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+
+ nargs = o->p3 - 1;
+ ereg eargs = nargs == 0 ? LOAD_CONST_PTR(NULL) : emit_gen_size(ctx, ALLOC_STACK, nargs * HL_WSIZE);
+ for(i=0;iextra[i+1]);
+ if( hl_is_ptr(r->t) )
+ STORE_MEM(eargs,i*HL_WSIZE,LOAD(r));
+ else
+ STORE_MEM(eargs,i*HL_WSIZE,emit_gen(ctx, ADDRESS, LOAD(r), UNUSED, M_PTR));
+ }
+ bool need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID;
+ ereg edyn = need_dyn ? emit_gen_size(ctx, ALLOC_STACK, sizeof(vdynamic)) : LOAD_CONST_PTR(NULL);
+
+ args = get_tmp_args(ctx, 5);
+ args[0] = LOAD_MEM_PTR(obj,HL_WSIZE);
+ args[1] = LOAD_CONST_PTR(_o->t->virt->fields[o->p2].t);
+ args[2] = LOAD_CONST(_o->t->virt->fields[o->p2].hashed_name,&hlt_i32);
+ args[3] = eargs;
+ args[4] = edyn;
+
+ ereg v2 = emit_native_call(ctx, hl_dyn_call_obj, args, 5, &hlt_bytes);
+ if( need_dyn )
+ STORE(dst, LOAD_MEM(edyn,HDYN_VALUE,dst->t));
+ else
+ STORE(dst, v2);
+ patch_jump(ctx, jend);
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ break;
+ case OThrow:
+ case ORethrow:
+ {
+ ereg arg = LOAD(dst);
+ emit_native_call(ctx, o->op == OThrow ? hl_throw : hl_rethrow, &arg, 1, NULL);
+ }
+ break;
+ case OLabel:
+ split_block(ctx);
+ prepare_loop_block(ctx);
+ break;
+ case OGetI8:
+ case OGetI16:
+ case OGetMem:
+ {
+ hl_type *size_t = o->op == OGetI8 ? &hlt_ui8 : o->op == OGetI16 ? &hlt_ui16 : dst->t;
+ ereg offs = OFFSET(LOAD(ra),LOAD(rb),1,0);
+ ereg val = emit_load_mem(ctx, offs, 0, size_t, dst->t);
+ STORE(dst, val);
+ }
+ break;
+ case OSetI8:
+ case OSetI16:
+ case OSetMem:
+ {
+ ereg offs = OFFSET(LOAD(dst), LOAD(ra),1,0);
+ ereg val = LOAD(rb);
+ STORE_MEM(offs, 0, val);
+ if( o->op != OSetMem ) patch_instr_mode(ctx, o->op == OSetI8 ? M_UI8 : M_UI16);
+ }
+ break;
+ case OType:
+ STORE(dst, LOAD_CONST_PTR(m->code->types + o->p2));
+ break;
+ case OGetType:
+ {
+ ereg r = LOAD(ra);
+ emit_test(ctx, r, OJNotNull);
+ int jidx = emit_jump(ctx, true);
+ ereg v1 = LOAD_CONST_PTR(&hlt_void);
+ STORE(dst,v1);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ ereg v2 = LOAD_MEM_PTR(r,0);
+ STORE(dst,v2);
+ patch_jump(ctx, jend);
+ }
+ break;
+ case OGetArray:
+ {
+ if( ra->t->kind == HABSTRACT ) {
+ int osize;
+ bool isPtr = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT;
+ if( isPtr )
+ osize = HL_WSIZE; // a pointer into the carray
+ else {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ osize = rt->size; // a mem offset into it
+ }
+ ereg pos = (osize <= 8 && ((osize - 1) & osize) == 0) ? OFFSET(LOAD(ra), LOAD(rb), osize, 0) : OFFSET(LOAD(ra), emit_gen_ext(ctx,BINOP,LOAD(rb),MK_CONST(osize),M_I32,OMul),1,0);
+ ereg val = isPtr ? LOAD_MEM_PTR(pos,0) : pos;
+ STORE(dst, val);
+ } else {
+ ereg pos = OFFSET(LOAD(ra), LOAD(rb), hl_type_size(dst->t), sizeof(varray));
+ STORE(dst, LOAD_MEM(pos,0,dst->t));
+ }
+ }
+ break;
+ case OSetArray:
+ {
+ if( dst->t->kind == HABSTRACT ) {
+ int osize;
+ bool isPtr = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT;
+ if( isPtr) {
+ osize = HL_WSIZE;
+ } else {
+ hl_runtime_obj *rt = hl_get_obj_rt(rb->t);
+ osize = rt->size;
+ }
+ ereg pos = (osize <= 8 && ((osize - 1) & osize) == 0) ? OFFSET(LOAD(dst), LOAD(ra), osize, 0) : OFFSET(LOAD(dst), emit_gen_ext(ctx,BINOP,LOAD(ra),MK_CONST(osize),M_I32,OMul),1,0);
+ emit_store_size(ctx, pos, 0, LOAD(rb), 0, osize);
+ } else {
+ ereg pos = OFFSET(LOAD(dst), LOAD(ra), hl_type_size(dst->t), sizeof(varray));
+ STORE_MEM(pos, 0, LOAD(rb));
+ }
+ }
+ break;
+ case OArraySize:
+ STORE(dst, LOAD_MEM(LOAD(ra),HL_WSIZE*2,&hlt_i32));
+ break;
+ case ORef:
+ STORE(dst, emit_gen(ctx, ADDRESS, LOAD(ra), UNUSED, M_PTR));
+ break;
+ case OUnref:
+ STORE(dst, LOAD_MEM(LOAD(ra),0,dst->t));
+ break;
+ case OSetref:
+ STORE_MEM(LOAD(dst),0,LOAD(ra));
+ break;
+ case ORefData:
+ switch( ra->t->kind ) {
+ case HARRAY:
+ STORE(dst, OFFSET(LOAD(ra),UNUSED,0,sizeof(varray)));
+ break;
+ default:
+ jit_assert();
+ }
+ break;
+ case ORefOffset:
+ STORE(dst, OFFSET(LOAD(ra),LOAD(rb), hl_type_size(dst->t->tparam),0));
+ break;
+ case OToVirtual:
+ {
+ ereg args[2];
+ args[0] = LOAD_CONST_PTR(dst->t);
+ args[1] = LOAD(ra);
+ STORE(dst, emit_native_call(ctx,hl_to_virtual,args,2, dst->t));
+ }
+ break;
+ case OMakeEnum:
+ {
+ ereg args[2];
+ args[0] = LOAD_CONST_PTR(dst->t);
+ args[1] = LOAD_CONST(o->p2,&hlt_i32);
+ ereg en = emit_native_call(ctx, hl_alloc_enum, args, 2, dst->t);
+ hl_enum_construct *c = &dst->t->tenum->constructs[o->p2];
+ for(int i=0;inparams;i++)
+ STORE_MEM(en, c->offsets[i], LOAD(R(o->extra[i])));
+ STORE(dst, en);
+ }
+ break;
+ case OEnumAlloc:
+ {
+ ereg args[2];
+ args[0] = LOAD_CONST_PTR(dst->t);
+ args[1] = LOAD_CONST(o->p2,&hlt_i32);
+ STORE(dst, emit_native_call(ctx, hl_alloc_enum, args, 2, dst->t));
+ }
+ break;
+ case OEnumField:
+ {
+ hl_enum_construct *c = &ra->t->tenum->constructs[o->p3];
+ int slot = (int)(int_val)o->extra;
+ STORE(dst, LOAD_MEM(LOAD(ra),c->offsets[slot], dst->t));
+ }
+ break;
+ case OEnumIndex:
+ STORE(dst, LOAD_MEM(LOAD(ra),HL_WSIZE,dst->t));
+ break;
+ case OSetEnumField:
+ {
+ hl_enum_construct *c = &dst->t->tenum->constructs[0];
+ STORE_MEM(LOAD(dst), c->offsets[o->p2], LOAD(rb));
+ }
+ break;
+ case ONullCheck:
+ {
+ emit_test(ctx, LOAD(dst), OJNotNull);
+ add_jump_target(ctx, 0);
+ int jok = emit_jump(ctx, true);
+
+ // ----- DETECT FIELD ACCESS ----------------
+ hl_function *f = ctx->fun;
+ hl_opcode *next = f->ops + ctx->op_pos + 1;
+ bool null_field_access = false;
+ int hashed_name = 0;
+ // skip const and operation between nullcheck and access
+ while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) {
+ next++;
+ }
+ if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) {
+ int fid = next->op == OField ? next->p3 : next->p2;
+ hl_obj_field *f = NULL;
+ if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT )
+ f = hl_obj_field_fetch(dst->t, fid);
+ else if( dst->t->kind == HVIRTUAL )
+ f = dst->t->virt->fields + fid;
+ if( f == NULL ) jit_assert();
+ null_field_access = true;
+ hashed_name = f->hashed_name;
+ } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) {
+ int fid = next->p2 < 0 ? -1 : m->functions_indexes[next->p2];
+ hl_function *cf = m->code->functions + fid;
+ const uchar *name = fun_field_name(cf);
+ null_field_access = true;
+ hashed_name = hl_hash_gen(name, true);
+ }
+ // -----------------------------------------
+ if( null_field_access ) {
+ einstr *e = emit_instr(ctx, PUSH_CONST);
+ e->mode = M_PTR;
+ e->value = hashed_name;
+ }
+ emit_native_call(ctx, null_field_access ? (void*)hl_jit_null_field_access : (void*)hl_null_access, NULL, 0, NULL);
+ patch_jump(ctx, jok);
+ }
+ break;
+ case OSafeCast:
+ emit_dyn_cast(ctx, LOAD(ra), ra->t, dst);
+ break;
+ case ODynGet:
+ {
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[3];
+ args[0] = LOAD(ra);
+ args[1] = LOAD_CONST(hl_hash_utf8(m->code->strings[o->p3]),&hlt_i32);
+ if( need_type ) args[2] = LOAD_CONST_PTR(dst->t);
+ STORE(dst, emit_native_call(ctx, get_dynget(dst->t), args, need_type ? 3 : 2, dst->t));
+ }
+ break;
+ case ODynSet:
+ {
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[4];
+ args[0] = LOAD(dst);
+ args[1] = LOAD_CONST(hl_hash_utf8(m->code->strings[o->p2]),&hlt_i32);
+ if( need_type ) {
+ args[2] = LOAD_CONST_PTR(rb->t);
+ args[3] = LOAD(rb);
+ } else
+ args[2] = LOAD(rb);
+ emit_native_call(ctx, get_dynset(rb->t), args, need_type ? 4 : 3, &hlt_void);
+ }
+ break;
+ case OTrap:
+ {
+ ereg st = emit_gen_size(ctx, ALLOC_STACK, sizeof(hl_trap_ctx));
+
+ ereg thread, current_addr;
+ static hl_thread_info *tinf = NULL;
+ static hl_trap_ctx *trap = NULL;
+# ifndef HL_THREADS
+ if( tinf == NULL ) tinf = hl_get_thread();
+ current_addr = LOAD_CONST_PTR(&tinf->trap_current);
+# else
+ thread = emit_native_call(ctx, hl_get_thread, NULL, 0, &hlt_bytes);
+ current_addr = OFFSET(thread, UNUSED, 0, (int)(int_val)&tinf->trap_current);
+# endif
+ STORE_MEM(st, (int)(int_val)&trap->prev, LOAD_MEM_PTR(current_addr,0));
+ STORE_MEM(current_addr, 0, st);
+
+
+ /*
+ trap E,@catch
+ catch g
+ catch g2
+ ...
+ @:catch
+
+ // Before haxe 5
+ This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following
+ sequence of HL opcodes:
+
+ trap E,@catch
+ ...
+ @catch:
+ global R, _
+ call _, ???(R,E)
+
+ ??? is expected to be hl.BaseType.check
+ */
+ hl_function *f = ctx->fun;
+ hl_opcode *cat = f->ops + ctx->op_pos + 1;
+ hl_opcode *next = f->ops + ctx->op_pos + 1 + o->p2;
+ hl_opcode *next2 = f->ops + ctx->op_pos + 2 + o->p2;
+ void *addr = NULL;
+ int offs = 0;
+ if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->id == (int)(int_val)next2->extra) ) {
+ int gindex = cat->op == OCatch ? cat->p1 : next->p2;
+ hl_type *gt = m->code->globals[gindex];
+ while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super;
+ if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) {
+ addr = m->globals_data;
+ offs = m->globals_indexes[gindex];
+ }
+ }
+ STORE_MEM(st, (int)(int_val)&trap->tcheck, addr ? LOAD_MEM_PTR(LOAD_CONST_PTR(addr),offs) : LOAD_CONST_PTR(NULL));
+
+ void *fun = setjmp;
+ ereg args[2];
+ int nargs = 1;
+ args[0] = st;
+#if defined(HL_WIN) && defined(HL_64)
+ // On Win64 setjmp actually takes two arguments
+ // the jump buffer and the frame pointer (or the stack pointer if there is no FP)
+ nargs = 2;
+ args[1] = emit_gen(ctx,LEA,MK_STACK_REG(0),UNUSED,M_PTR);
+#endif
+#ifdef HL_MINGW
+ fun = _setjmp;
+#endif
+ ereg ret = emit_native_call(ctx, fun, args, nargs, &hlt_i32);
+ emit_test(ctx, ret, OJNull);
+ int jskip = emit_jump(ctx, true);
+ STORE(dst, tinf ? LOAD_CONST_PTR(&tinf->exc_value) : LOAD_MEM_PTR(thread,(int)(int_val)&tinf->exc_value));
+
+ int jtrap = ctx->emit_pos;
+ emit_gen(ctx, JUMP, UNUSED, UNUSED, 0);
+ register_jump(ctx, jtrap, o->p2);
+ split_block(ctx);
+ patch_jump(ctx, jskip);
+
+ if( ctx->trap_count == MAX_TRAPS ) jit_error("Too many try/catch depth");
+ trap_inf *inf = &ctx->traps[ctx->trap_count++];
+ inf->stack = st;
+ inf->target = o->p2 + 1 + ctx->op_pos;
+ }
+ break;
+ case OEndTrap:
+ {
+ if( ctx->trap_count == 0 ) jit_assert();
+ ereg st = ctx->traps[ctx->trap_count - 1].stack;
+
+ ereg thread, current_addr;
+ static hl_thread_info *tinf = NULL;
+ static hl_trap_ctx *trap = NULL;
+# ifndef HL_THREADS
+ if( tinf == NULL ) tinf = hl_get_thread();
+ current_addr = LOAD_CONST_PTR(&tinf->trap_current);
+# else
+ thread = emit_native_call(ctx, hl_get_thread, NULL, 0, &hlt_bytes);
+ current_addr = OFFSET(thread, UNUSED, 0, (int)(int_val)&tinf->trap_current);
+# endif
+
+ STORE_MEM(current_addr, 0, LOAD_MEM_PTR(st,(int)(int_val)&trap->prev));
+
+ emit_instr(ctx, CATCH);
+ }
+ break;
+ case OSwitch:
+ {
+ ereg v = LOAD(dst);
+ int count = o->p2;
+ emit_cmp(ctx,v,LOAD_CONST(count,&hlt_i32),OJUGte);
+ add_jump_target(ctx, 0);
+ int jdefault = emit_jump(ctx, true);
+ int pos = ctx->emit_pos;
+ einstr *e = emit_instr(ctx, JUMP_TABLE);
+ e->a = v;
+ patch_instr_mode(ctx, M_NORET);
+ store_args(ctx,e,(ereg*)o->extra,count);
+ register_jump(ctx, pos, 0);
+ for(int k=0;kextra[k];
+ if( offs < 0 ) jit_assert();
+ if( offs == 0 ) continue;
+ add_jump_target(ctx, offs);
+ }
+ patch_jump(ctx, jdefault);
+ }
+ break;
+ case OGetTID:
+ STORE(dst, LOAD_MEM(LOAD(ra),0,&hlt_i32));
+ break;
+ case OAssert:
+ emit_native_call(ctx, hl_jit_assert, NULL, 0, NULL);
+ break;
+ case ONop:
+ break;
+ case OPrefetch:
+ {
+ ereg r = LOAD(dst);
+ if( o->p2 > 0 ) {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ r = OFFSET(r, UNUSED, 0, rt->fields_indexes[o->p2-1]);
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ emit_gen_ext(ctx, PREFETCH, r, UNUSED, M_NONE, o->p3);
+ }
+ break;
+ case OAsm:
+ jit_assert();
+ break;
+ case OCatch:
+ // Only used by OTrap typing
+ break;
+ default:
+ jit_error(hl_op_name(o->op));
+ break;
+ }
+}
diff --git a/src/jit_old.c b/src/jit_old.c
new file mode 100644
index 000000000..7e4e6e88b
--- /dev/null
+++ b/src/jit_old.c
@@ -0,0 +1,4730 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifdef _MSC_VER
+#pragma warning(disable:4820)
+#endif
+#include
+#include
+#include "hlsystem.h"
+
+#ifdef __arm__
+# error "JIT does not support ARM processors, only x86 and x86-64 are supported, please use HashLink/C native compilation instead"
+#endif
+
+#ifdef HL_DEBUG
+# define JIT_DEBUG
+#endif
+
+typedef enum {
+ Eax = 0,
+ Ecx = 1,
+ Edx = 2,
+ Ebx = 3,
+ Esp = 4,
+ Ebp = 5,
+ Esi = 6,
+ Edi = 7,
+#ifdef HL_64
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+#endif
+ _LAST = 0xFF
+} CpuReg;
+
+typedef enum {
+ MOV,
+ LEA,
+ PUSH,
+ ADD,
+ SUB,
+ IMUL, // only overflow flag changes compared to MUL
+ DIV,
+ IDIV,
+ CDQ,
+ CDQE,
+ POP,
+ RET,
+ CALL,
+ AND,
+ OR,
+ XOR,
+ CMP,
+ TEST,
+ NOP,
+ SHL,
+ SHR,
+ SAR,
+ INC,
+ DEC,
+ JMP,
+ // FPU
+ FSTP,
+ FSTP32,
+ FLD,
+ FLD32,
+ FLDCW,
+ // SSE
+ MOVSD,
+ MOVSS,
+ COMISD,
+ COMISS,
+ ADDSD,
+ SUBSD,
+ MULSD,
+ DIVSD,
+ ADDSS,
+ SUBSS,
+ MULSS,
+ DIVSS,
+ XORPD,
+ CVTSI2SD,
+ CVTSI2SS,
+ CVTSD2SI,
+ CVTSD2SS,
+ CVTSS2SD,
+ CVTSS2SI,
+ STMXCSR,
+ LDMXCSR,
+ // 8-16 bits
+ MOV8,
+ CMP8,
+ TEST8,
+ PUSH8,
+ MOV16,
+ CMP16,
+ TEST16,
+ // prefetchs
+ PREFETCHT0,
+ PREFETCHT1,
+ PREFETCHT2,
+ PREFETCHNTA,
+ PREFETCHW,
+ // --
+ _CPU_LAST
+} CpuOp;
+
+#define JAlways 0
+#define JOverflow 0x80
+#define JULt 0x82
+#define JUGte 0x83
+#define JEq 0x84
+#define JNeq 0x85
+#define JULte 0x86
+#define JUGt 0x87
+#define JParity 0x8A
+#define JNParity 0x8B
+#define JSLt 0x8C
+#define JSGte 0x8D
+#define JSLte 0x8E
+#define JSGt 0x8F
+
+#define JCarry JLt
+#define JZero JEq
+#define JNotZero JNeq
+
+#define B(bv) *ctx->buf.b++ = (unsigned char)(bv)
+#define W(wv) *ctx->buf.w++ = wv
+
+#ifdef HL_64
+# define W64(wv) *ctx->buf.w64++ = wv
+#else
+# define W64(wv) W(wv)
+#endif
+
+static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
+
+#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7))
+#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7))
+#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 )
+
+#define AddJump(how,local) { if( (how) == JAlways ) { B(0xE9); } else { B(0x0F); B(how); }; local = BUF_POS(); W(0); }
+#define AddJump_small(how,local) { if( (how) == JAlways ) { B(0xEB); } else B(how - 0x10); local = BUF_POS() | 0x40000000; B(0); }
+#define XJump(how,local) AddJump(how,local)
+#define XJump_small(how,local) AddJump_small(how,local)
+
+#define MAX_OP_SIZE 256
+
+#define BUF_POS() ((int)(ctx->buf.b - ctx->startBuf))
+#define RTYPE(r) r->t->kind
+
+#ifdef HL_64
+# define RESERVE_ADDRESS 0x8000000000000000
+#else
+# define RESERVE_ADDRESS 0x80000000
+#endif
+
+#if defined(HL_WIN_CALL) && defined(HL_64)
+# define IS_WINCALL64 1
+#else
+# define IS_WINCALL64 0
+#endif
+
+typedef struct jlist jlist;
+struct jlist {
+ int pos;
+ int target;
+ jlist *next;
+};
+
+typedef struct vreg vreg;
+
+typedef enum {
+ RCPU = 0,
+ RFPU = 1,
+ RSTACK = 2,
+ RCONST = 3,
+ RADDR = 4,
+ RMEM = 5,
+ RUNUSED = 6,
+ RCPU_CALL = 1 | 8,
+ RCPU_8BITS = 1 | 16
+} preg_kind;
+
+typedef struct {
+ preg_kind kind;
+ int id;
+ int lock;
+ vreg *holds;
+} preg;
+
+struct vreg {
+ int stackPos;
+ int size;
+ hl_type *t;
+ preg *current;
+ preg stack;
+};
+
+#define REG_AT(i) (ctx->pregs + (i))
+
+#ifdef HL_64
+# define RCPU_COUNT 16
+# define RFPU_COUNT 16
+# ifdef HL_WIN_CALL
+# define CALL_NREGS 4
+# define RCPU_SCRATCH_COUNT 7
+# define RFPU_SCRATCH_COUNT 6
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 };
+static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 };
+# else
+# define CALL_NREGS 6 // TODO : XMM6+XMM7 are FPU reg parameters
+# define RCPU_SCRATCH_COUNT 9
+# define RFPU_SCRATCH_COUNT 16
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 };
+static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
+# endif
+#else
+# define CALL_NREGS 0
+# define RCPU_COUNT 8
+# define RFPU_COUNT 8
+# define RCPU_SCRATCH_COUNT 3
+# define RFPU_SCRATCH_COUNT 8
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx };
+#endif
+
+#define XMM(i) ((i) + RCPU_COUNT)
+#define PXMM(i) REG_AT(XMM(i))
+#define REG_IS_FPU(i) ((i) >= RCPU_COUNT)
+
+#define PEAX REG_AT(Eax)
+#define PESP REG_AT(Esp)
+#define PEBP REG_AT(Ebp)
+
+#define REG_COUNT (RCPU_COUNT + RFPU_COUNT)
+
+#define ID2(a,b) ((a) | ((b)<<8))
+#define R(id) (ctx->vregs + (id))
+#define ASSERT(i) { printf("JIT ERROR %d (jit.c line %d)\n",i,(int)__LINE__); jit_exit(); }
+#define IS_FLOAT(r) ((r)->t->kind == HF64 || (r)->t->kind == HF32)
+#define RLOCK(r) if( (r)->lock < ctx->currentPos ) (r)->lock = ctx->currentPos
+#define RUNLOCK(r) if( (r)->lock == ctx->currentPos ) (r)->lock = 0
+
+#define BREAK() B(0xCC)
+
+static preg _unused = { RUNUSED, 0, 0, NULL };
+static preg *UNUSED = &_unused;
+
+struct _jit_ctx {
+ union {
+ unsigned char *b;
+ unsigned int *w;
+ unsigned long long *w64;
+ int *i;
+ double *d;
+ } buf;
+ vreg *vregs;
+ preg pregs[REG_COUNT];
+ vreg *savedRegs[REG_COUNT];
+ int savedLocks[REG_COUNT];
+ int *opsPos;
+ int maxRegs;
+ int maxOps;
+ int bufSize;
+ int totalRegsSize;
+ int functionPos;
+ int allocOffset;
+ int currentPos;
+ int nativeArgsCount;
+ unsigned char *startBuf;
+ hl_module *m;
+ hl_function *f;
+ jlist *jumps;
+ jlist *calls;
+ jlist *switchs;
+ hl_alloc falloc; // cleared per-function
+ hl_alloc galloc;
+ vclosure *closure_list;
+ hl_debug_infos *debug;
+ int c2hl;
+ int hl2c;
+ void *static_functions[8];
+ bool static_function_offset;
+#ifdef WIN64_UNWIND_TABLES
+ int unwind_offset;
+ int nunwind;
+ PRUNTIME_FUNCTION unwind_table;
+#endif
+};
+
+#ifdef WIN64_UNWIND_TABLES
+
+typedef enum _UNWIND_OP_CODES
+{
+ UWOP_PUSH_NONVOL = 0, /* info == register number */
+ UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */
+ UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */
+ UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
+ UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */
+ UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
+ UWOP_SAVE_XMM128 = 8, /* info == XMM reg number, offset in next slot */
+ UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
+ UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
+} UNWIND_CODE_OPS;
+
+void write_uwcode(jit_ctx *ctx, unsigned char offset, UNWIND_CODE_OPS code, unsigned char info)
+{
+ B(offset);
+ B((code) | (info) << 4);
+}
+
+void write_unwind_data(jit_ctx *ctx)
+{
+ // All generated functions use a frame pointer, so the same unwind info can be used for all of them
+ unsigned char version = 1;
+ unsigned char flags = 0;
+ unsigned char CountOfCodes = 2;
+ unsigned char SizeOfProlog = 4;
+ unsigned char FrameRegister = 5; // RBP
+ unsigned char FrameOffset = 0;
+ B((version) | (flags) << 3);
+ B(SizeOfProlog);
+ B(CountOfCodes);
+ B((FrameRegister) | (FrameOffset) << 4);
+ write_uwcode(ctx, 4, UWOP_SET_FPREG, 0);
+ write_uwcode(ctx, 1, UWOP_PUSH_NONVOL, 5);
+}
+#endif
+
+#define jit_exit() { hl_debug_break(); exit(-1); }
+#define jit_error(msg) _jit_error(ctx,msg,__LINE__)
+
+#ifndef HL_64
+# ifdef HL_DEBUG
+# define error_i64() jit_error("i64-32")
+# else
+void error_i64() {
+ printf("The module you are loading is using 64 bit ints that are not supported by the HL32.\nPlease run using HL64 or compile with -D hl-legacy32");
+ jit_exit();
+}
+# endif
+#endif
+
+static void _jit_error( jit_ctx *ctx, const char *msg, int line );
+static void on_jit_error( const char *msg, int_val line );
+
+static preg *pmem( preg *r, CpuReg reg, int offset ) {
+ r->kind = RMEM;
+ r->id = 0 | (reg << 4) | (offset << 8);
+ return r;
+}
+
+static preg *pmem2( preg *r, CpuReg reg, CpuReg reg2, int mult, int offset ) {
+ r->kind = RMEM;
+ r->id = mult | (reg << 4) | (reg2 << 8);
+ r->holds = (void*)(int_val)offset;
+ return r;
+}
+
+#ifdef HL_64
+static preg *pcodeaddr( preg *r, int offset ) {
+ r->kind = RMEM;
+ r->id = 15 | (offset << 4);
+ return r;
+}
+#endif
+
+static preg *pconst( preg *r, int c ) {
+ r->kind = RCONST;
+ r->holds = NULL;
+ r->id = c;
+ return r;
+}
+
+static preg *pconst64( preg *r, int_val c ) {
+#ifdef HL_64
+ if( ((int)c) == c )
+ return pconst(r,(int)c);
+ r->kind = RCONST;
+ r->id = 0xC064C064;
+ r->holds = (vreg*)c;
+ return r;
+#else
+ return pconst(r,(int)c);
+#endif
+}
+
+#ifndef HL_64
+// it is not possible to access direct 64 bit address in x86-64
+static preg *paddr( preg *r, void *p ) {
+ r->kind = RADDR;
+ r->holds = (vreg*)p;
+ return r;
+}
+#endif
+
+static void save_regs( jit_ctx *ctx ) {
+ int i;
+ for(i=0;isavedRegs[i] = ctx->pregs[i].holds;
+ ctx->savedLocks[i] = ctx->pregs[i].lock;
+ }
+}
+
+static void restore_regs( jit_ctx *ctx ) {
+ int i;
+ for(i=0;imaxRegs;i++)
+ ctx->vregs[i].current = NULL;
+ for(i=0;isavedRegs[i];
+ preg *p = ctx->pregs + i;
+ p->holds = r;
+ p->lock = ctx->savedLocks[i];
+ if( r ) r->current = p;
+ }
+}
+
+static void jit_buf( jit_ctx *ctx ) {
+ if( BUF_POS() > ctx->bufSize - MAX_OP_SIZE ) {
+ int nsize = ctx->bufSize * 4 / 3;
+ unsigned char *nbuf;
+ int curpos;
+ if( nsize == 0 ) {
+ int i;
+ for(i=0;im->code->nfunctions;i++)
+ nsize += ctx->m->code->functions[i].nops;
+ nsize *= 4;
+ }
+ if( nsize < ctx->bufSize + MAX_OP_SIZE * 4 ) nsize = ctx->bufSize + MAX_OP_SIZE * 4;
+ curpos = BUF_POS();
+ nbuf = (unsigned char*)malloc(nsize);
+ if( nbuf == NULL ) ASSERT(nsize);
+ if( ctx->startBuf ) {
+ memcpy(nbuf,ctx->startBuf,curpos);
+ free(ctx->startBuf);
+ }
+ ctx->startBuf = nbuf;
+ ctx->buf.b = nbuf + curpos;
+ ctx->bufSize = nsize;
+ }
+}
+
+static const char *KNAMES[] = { "cpu","fpu","stack","const","addr","mem","unused" };
+#define ERRIF(c) if( c ) { printf("%s(%s,%s)\n",f?f->name:"???",KNAMES[a->kind], KNAMES[b->kind]); ASSERT(0); }
+
+typedef struct {
+ const char *name; // single operand
+ int r_mem; // r32 / r/m32 r32
+ int mem_r; // r/m32 / r32 r/m32
+ int r_const; // r32 / imm32 imm32
+ int r_i8; // r32 / imm8 imm8
+ int mem_const; // r/m32 / imm32 N/A
+} opform;
+
+#define FLAG_LONGOP 0x80000000
+#define FLAG_16B 0x40000000
+#define FLAG_8B 0x20000000
+#define FLAG_DUAL 0x10000000
+
+#define RM(op,id) ((op) | (((id)+1)<<8))
+#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15)
+#define SBYTE(op) ((op) << 16)
+#define LONG_OP(op) ((op) | FLAG_LONGOP)
+#define OP16(op) LONG_OP((op) | FLAG_16B)
+#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24))
+
+static opform OP_FORMS[_CPU_LAST] = {
+ { "MOV", 0x8B, 0x89, 0xB8, 0, RM(0xC7,0) },
+ { "LEA", 0x8D },
+ { "PUSH", 0x50, RM(0xFF,6), 0x68, 0x6A },
+ { "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) },
+ { "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) },
+ { "IMUL", LONG_OP(0x0FAF), 0, 0x69 | FLAG_DUAL, 0x6B | FLAG_DUAL },
+ { "DIV", RM(0xF7,6), RM(0xF7,6) },
+ { "IDIV", RM(0xF7,7), RM(0xF7,7) },
+ { "CDQ", 0x99 },
+ { "CDQE", 0x98 },
+ { "POP", 0x58, RM(0x8F,0) },
+ { "RET", 0xC3 },
+ { "CALL", RM(0xFF,2), RM(0xFF,2), 0xE8 },
+ { "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) },
+ { "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) },
+ { "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) },
+ { "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) },
+ { "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) },
+ { "NOP", 0x90 },
+ { "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) },
+ { "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) },
+ { "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) },
+ { "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) },
+ { "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) },
+ { "JMP", RM(0xFF,4) },
+ // FPU
+ { "FSTP", 0, RM(0xDD,3) },
+ { "FSTP32", 0, RM(0xD9,3) },
+ { "FLD", 0, RM(0xDD,0) },
+ { "FLD32", 0, RM(0xD9,0) },
+ { "FLDCW", 0, RM(0xD9, 5) },
+ // SSE
+ { "MOVSD", 0xF20F10, 0xF20F11 },
+ { "MOVSS", 0xF30F10, 0xF30F11 },
+ { "COMISD", 0x660F2F },
+ { "COMISS", LONG_OP(0x0F2F) },
+ { "ADDSD", 0xF20F58 },
+ { "SUBSD", 0xF20F5C },
+ { "MULSD", 0xF20F59 },
+ { "DIVSD", 0xF20F5E },
+ { "ADDSS", 0xF30F58 },
+ { "SUBSS", 0xF30F5C },
+ { "MULSS", 0xF30F59 },
+ { "DIVSS", 0xF30F5E },
+ { "XORPD", 0x660F57 },
+ { "CVTSI2SD", 0xF20F2A },
+ { "CVTSI2SS", 0xF30F2A },
+ { "CVTSD2SI", 0xF20F2D },
+ { "CVTSD2SS", 0xF20F5A },
+ { "CVTSS2SD", 0xF30F5A },
+ { "CVTSS2SI", 0xF30F2D },
+ { "STMXCSR", 0, LONG_RM(0x0FAE,3) },
+ { "LDMXCSR", 0, LONG_RM(0x0FAE,2) },
+ // 8 bits,
+ { "MOV8", 0x8A, 0x88, 0, 0xB0, RM(0xC6,0) },
+ { "CMP8", 0x3A, 0x38, 0, RM(0x80,7) },
+ { "TEST8", 0x84, 0x84, RM(0xF6,0) },
+ { "PUSH8", 0, 0, 0x6A | FLAG_8B },
+ { "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) },
+ { "CMP16", OP16(0x3B), OP16(0x39) },
+ { "TEST16", OP16(0x85) },
+ // prefetchs
+ { "PREFETCHT0", 0, LONG_RM(0x0F18,1) },
+ { "PREFETCHT1", 0, LONG_RM(0x0F18,2) },
+ { "PREFETCHT2", 0, LONG_RM(0x0F18,3) },
+ { "PREFETCHNTA", 0, LONG_RM(0x0F18,0) },
+ { "PREFETCHW", 0, LONG_RM(0x0F0D,1) },
+};
+
+#ifdef HL_64
+# define REX() if( r64 ) B(r64 | 0x40)
+#else
+# define REX()
+#endif
+
+#define OP(b) \
+ if( (b) & 0xFF0000 ) { \
+ B((b)>>16); \
+ if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \
+ B((b)>>8); \
+ B(b); \
+ } else { \
+ if( (b) & FLAG_16B ) { \
+ B(0x66); \
+ REX(); \
+ } else {\
+ REX(); \
+ if( (b) & FLAG_LONGOP ) B((b)>>8); \
+ }\
+ B(b); \
+ }
+
+static bool is_reg8( preg *a ) {
+ return a->kind == RSTACK || a->kind == RMEM || a->kind == RCONST || (a->kind == RCPU && a->id != Esi && a->id != Edi);
+}
+
+static void op( jit_ctx *ctx, CpuOp o, preg *a, preg *b, bool mode64 ) {
+ opform *f = &OP_FORMS[o];
+ int r64 = mode64 && (o != PUSH && o != POP && o != CALL && o != PUSH8 && o < PREFETCHT0) ? 8 : 0;
+ switch( o ) {
+ case CMP8:
+ case TEST8:
+ case MOV8:
+ if( !is_reg8(a) || !is_reg8(b) )
+ ASSERT(0);
+ break;
+ default:
+ break;
+ }
+ switch( ID2(a->kind,b->kind) ) {
+ case ID2(RUNUSED,RUNUSED):
+ ERRIF(f->r_mem == 0);
+ OP(f->r_mem);
+ break;
+ case ID2(RCPU,RCPU):
+ case ID2(RFPU,RFPU):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ if( b->id > 7 ) r64 |= 1;
+ OP(f->r_mem);
+ MOD_RM(3,a->id,b->id);
+ break;
+ case ID2(RCPU,RFPU):
+ case ID2(RFPU,RCPU):
+ ERRIF( (f->r_mem>>16) == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ if( b->id > 7 ) r64 |= 1;
+ OP(f->r_mem);
+ MOD_RM(3,a->id,b->id);
+ break;
+ case ID2(RCPU,RUNUSED):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 1;
+ if( GET_RM(f->r_mem) > 0 ) {
+ OP(f->r_mem);
+ MOD_RM(3, GET_RM(f->r_mem)-1, a->id);
+ } else
+ OP(f->r_mem + (a->id&7));
+ break;
+ case ID2(RSTACK,RUNUSED):
+ ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 );
+ {
+ int stackPos = R(a->id)->stackPos;
+ OP(f->mem_r);
+ if( IS_SBYTE(stackPos) ) {
+ MOD_RM(1,GET_RM(f->mem_r)-1,Ebp);
+ B(stackPos);
+ } else {
+ MOD_RM(2,GET_RM(f->mem_r)-1,Ebp);
+ W(stackPos);
+ }
+ }
+ break;
+ case ID2(RCPU,RCONST):
+ ERRIF( f->r_const == 0 && f->r_i8 == 0 );
+ if( a->id > 7 ) r64 |= 1;
+ {
+ int_val cval = b->holds ? (int_val)b->holds : b->id;
+ // short byte form
+ if( f->r_i8 && IS_SBYTE(cval) ) {
+ if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
+ OP(f->r_i8);
+ if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_i8)-1,a->id);
+ B((int)cval);
+ } else if( GET_RM(f->r_const) > 0 || (f->r_const&FLAG_DUAL) ) {
+ if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
+ OP(f->r_const&0xFF);
+ if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_const)-1,a->id);
+ if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
+ } else {
+ ERRIF( f->r_const == 0);
+ OP((f->r_const&0xFF) + (a->id&7));
+ if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
+ }
+ }
+ break;
+ case ID2(RSTACK,RCPU):
+ case ID2(RSTACK,RFPU):
+ ERRIF( f->mem_r == 0 );
+ if( b->id > 7 ) r64 |= 4;
+ {
+ int stackPos = R(a->id)->stackPos;
+ OP(f->mem_r);
+ if( IS_SBYTE(stackPos) ) {
+ MOD_RM(1,b->id,Ebp);
+ B(stackPos);
+ } else {
+ MOD_RM(2,b->id,Ebp);
+ W(stackPos);
+ }
+ }
+ break;
+ case ID2(RCPU,RSTACK):
+ case ID2(RFPU,RSTACK):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ {
+ int stackPos = R(b->id)->stackPos;
+ OP(f->r_mem);
+ if( IS_SBYTE(stackPos) ) {
+ MOD_RM(1,a->id,Ebp);
+ B(stackPos);
+ } else {
+ MOD_RM(2,a->id,Ebp);
+ W(stackPos);
+ }
+ }
+ break;
+ case ID2(RCONST,RUNUSED):
+ ERRIF( f->r_const == 0 );
+ {
+ int_val cval = a->holds ? (int_val)a->holds : a->id;
+ OP(f->r_const);
+ if( f->r_const & FLAG_8B ) B((int)cval); else W((int)cval);
+ }
+ break;
+ case ID2(RMEM,RUNUSED):
+ ERRIF( f->mem_r == 0 );
+ {
+ int mult = a->id & 0xF;
+ int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
+ CpuReg reg = (a->id >> 4) & 0xF;
+ if( mult == 15 ) {
+ ERRIF(1);
+ } else if( mult == 0 ) {
+ if( reg > 7 ) r64 |= 1;
+ OP(f->mem_r);
+ if( regOrOffs == 0 && (reg&7) != Ebp ) {
+ MOD_RM(0,GET_RM(f->mem_r)-1,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ } else if( IS_SBYTE(regOrOffs) ) {
+ MOD_RM(1,GET_RM(f->mem_r)-1,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ B(regOrOffs);
+ } else {
+ MOD_RM(2,GET_RM(f->mem_r)-1,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ W(regOrOffs);
+ }
+ } else {
+ // [eax + ebx * M]
+ ERRIF(1);
+ }
+ }
+ break;
+ case ID2(RCPU, RMEM):
+ case ID2(RFPU, RMEM):
+ ERRIF( f->r_mem == 0 );
+ {
+ int mult = b->id & 0xF;
+ int regOrOffs = mult == 15 ? b->id >> 4 : b->id >> 8;
+ CpuReg reg = (b->id >> 4) & 0xF;
+ if( mult == 15 ) {
+ int pos;
+ if( a->id > 7 ) r64 |= 4;
+ OP(f->r_mem);
+ MOD_RM(0,a->id,5);
+ if( IS_64 ) {
+ // offset wrt current code
+ pos = BUF_POS() + 4;
+ W(regOrOffs - pos);
+ } else {
+ ERRIF(1);
+ }
+ } else if( mult == 0 ) {
+ if( a->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ OP(f->r_mem);
+ if( regOrOffs == 0 && (reg&7) != Ebp ) {
+ MOD_RM(0,a->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ } else if( IS_SBYTE(regOrOffs) ) {
+ MOD_RM(1,a->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ B(regOrOffs);
+ } else {
+ MOD_RM(2,a->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ W(regOrOffs);
+ }
+ } else {
+ int offset = (int)(int_val)b->holds;
+ if( a->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ if( regOrOffs > 7 ) r64 |= 2;
+ OP(f->r_mem);
+ MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,a->id,4);
+ SIB(mult,regOrOffs,reg);
+ if( offset ) {
+ if( IS_SBYTE(offset) ) B(offset); else W(offset);
+ }
+ }
+ }
+ break;
+# ifndef HL_64
+ case ID2(RFPU,RADDR):
+# endif
+ case ID2(RCPU,RADDR):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ OP(f->r_mem);
+ MOD_RM(0,a->id,5);
+ if( IS_64 )
+ W64((int_val)b->holds);
+ else
+ W((int)(int_val)b->holds);
+ break;
+# ifndef HL_64
+ case ID2(RADDR,RFPU):
+# endif
+ case ID2(RADDR,RCPU):
+ ERRIF( f->mem_r == 0 );
+ if( b->id > 7 ) r64 |= 4;
+ OP(f->mem_r);
+ MOD_RM(0,b->id,5);
+ if( IS_64 )
+ W64((int_val)a->holds);
+ else
+ W((int)(int_val)a->holds);
+ break;
+ case ID2(RMEM, RCPU):
+ case ID2(RMEM, RFPU):
+ ERRIF( f->mem_r == 0 );
+ {
+ int mult = a->id & 0xF;
+ int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
+ CpuReg reg = (a->id >> 4) & 0xF;
+ if( mult == 15 ) {
+ int pos;
+ if( b->id > 7 ) r64 |= 4;
+ OP(f->mem_r);
+ MOD_RM(0,b->id,5);
+ if( IS_64 ) {
+ // offset wrt current code
+ pos = BUF_POS() + 4;
+ W(regOrOffs - pos);
+ } else {
+ ERRIF(1);
+ }
+ } else if( mult == 0 ) {
+ if( b->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ OP(f->mem_r);
+ if( regOrOffs == 0 && (reg&7) != Ebp ) {
+ MOD_RM(0,b->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ } else if( IS_SBYTE(regOrOffs) ) {
+ MOD_RM(1,b->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ B(regOrOffs);
+ } else {
+ MOD_RM(2,b->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ W(regOrOffs);
+ }
+ } else {
+ int offset = (int)(int_val)a->holds;
+ if( b->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ if( regOrOffs > 7 ) r64 |= 2;
+ OP(f->mem_r);
+ MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,b->id,4);
+ SIB(mult,regOrOffs,reg);
+ if( offset ) {
+ if( IS_SBYTE(offset) ) B(offset); else W(offset);
+ }
+ }
+ }
+ break;
+ default:
+ ERRIF(1);
+ }
+ if( ctx->debug && ctx->f && o == CALL ) {
+ preg p;
+ op(ctx,MOV,pmem(&p,Esp,-HL_WSIZE),PEBP,true); // erase EIP (clean stack report)
+ }
+}
+
+static void op32( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
+ op(ctx,o,a,b,false);
+}
+
+static void op64( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
+#ifndef HL_64
+ op(ctx,o,a,b,false);
+#else
+ op(ctx,o,a,b,true);
+#endif
+}
+
+static void patch_jump( jit_ctx *ctx, int p ) {
+ if( p == 0 ) return;
+ if( p & 0x40000000 ) {
+ int d;
+ p &= 0x3FFFFFFF;
+ d = BUF_POS() - (p + 1);
+ if( d < -128 || d >= 128 ) ASSERT(d);
+ *(char*)(ctx->startBuf + p) = (char)d;
+ } else {
+ *(int*)(ctx->startBuf + p) = BUF_POS() - (p + 4);
+ }
+}
+
+static void patch_jump_to( jit_ctx *ctx, int p, int target ) {
+ if( p == 0 ) return;
+ if( p & 0x40000000 ) {
+ int d;
+ p &= 0x3FFFFFFF;
+ d = target - (p + 1);
+ if( d < -128 || d >= 128 ) ASSERT(d);
+ *(char*)(ctx->startBuf + p) = (char)d;
+ } else {
+ *(int*)(ctx->startBuf + p) = target - (p + 4);
+ }
+}
+
+static int stack_size( hl_type *t ) {
+ switch( t->kind ) {
+ case HUI8:
+ case HUI16:
+ case HBOOL:
+# ifdef HL_64
+ case HI32:
+ case HF32:
+# endif
+ return sizeof(int_val);
+ case HI64:
+ default:
+ return hl_type_size(t);
+ }
+}
+
+static int call_reg_index( int reg ) {
+# ifdef HL_64
+ int i;
+ for(i=0;ikind == RFPU )
+ return p->id < CALL_NREGS;
+ for(i=0;ikind == RCPU && p->id == CALL_REGS[i] )
+ return true;
+ return false;
+# else
+ return false;
+# endif
+}
+
+static preg *alloc_reg( jit_ctx *ctx, preg_kind k ) {
+ int i;
+ preg *p;
+ switch( k ) {
+ case RCPU:
+ case RCPU_CALL:
+ case RCPU_8BITS:
+ {
+ int off = ctx->allocOffset++;
+ const int count = RCPU_SCRATCH_COUNT;
+ for(i=0;ipregs + r;
+ if( p->lock >= ctx->currentPos ) continue;
+ if( k == RCPU_CALL && is_call_reg(p) ) continue;
+ if( k == RCPU_8BITS && !is_reg8(p) ) continue;
+ if( p->holds == NULL ) {
+ RLOCK(p);
+ return p;
+ }
+ }
+ for(i=0;ipregs + RCPU_SCRATCH_REGS[(i + off)%count];
+ if( p->lock >= ctx->currentPos ) continue;
+ if( k == RCPU_CALL && is_call_reg(p) ) continue;
+ if( k == RCPU_8BITS && !is_reg8(p) ) continue;
+ if( p->holds ) {
+ RLOCK(p);
+ p->holds->current = NULL;
+ p->holds = NULL;
+ return p;
+ }
+ }
+ }
+ break;
+ case RFPU:
+ {
+ int off = ctx->allocOffset++;
+ const int count = RFPU_SCRATCH_COUNT;
+ for(i=0;ilock >= ctx->currentPos ) continue;
+ if( p->holds == NULL ) {
+ RLOCK(p);
+ return p;
+ }
+ }
+ for(i=0;ilock >= ctx->currentPos ) continue;
+ if( p->holds ) {
+ RLOCK(p);
+ p->holds->current = NULL;
+ p->holds = NULL;
+ return p;
+ }
+ }
+ }
+ break;
+ default:
+ ASSERT(k);
+ }
+ ASSERT(0); // out of registers !
+ return NULL;
+}
+
+static preg *fetch( vreg *r ) {
+ if( r->current )
+ return r->current;
+ return &r->stack;
+}
+
+static void scratch( preg *r ) {
+ if( r && r->holds ) {
+ r->holds->current = NULL;
+ r->holds = NULL;
+ r->lock = 0;
+ }
+}
+
+static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size );
+
+static void load( jit_ctx *ctx, preg *r, vreg *v ) {
+ preg *from = fetch(v);
+ if( from == r || v->size == 0 ) return;
+ if( r->holds ) r->holds->current = NULL;
+ if( v->current ) {
+ v->current->holds = NULL;
+ from = r;
+ }
+ r->holds = v;
+ v->current = r;
+ copy(ctx,r,from,v->size);
+}
+
+static preg *alloc_fpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
+ preg *p = fetch(r);
+ if( p->kind != RFPU ) {
+ if( !IS_FLOAT(r) && (IS_64 || r->t->kind != HI64) ) ASSERT(r->t->kind);
+ p = alloc_reg(ctx, RFPU);
+ if( andLoad )
+ load(ctx,p,r);
+ else {
+ if( r->current )
+ r->current->holds = NULL;
+ r->current = p;
+ p->holds = r;
+ }
+ } else
+ RLOCK(p);
+ return p;
+}
+
+static void reg_bind( vreg *r, preg *p ) {
+ if( r->current )
+ r->current->holds = NULL;
+ r->current = p;
+ p->holds = r;
+}
+
+static preg *alloc_cpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
+ preg *p = fetch(r);
+ if( p->kind != RCPU ) {
+# ifndef HL_64
+ if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,andLoad);
+ if( r->size > 4 ) ASSERT(r->size);
+# endif
+ p = alloc_reg(ctx, RCPU);
+ if( andLoad )
+ load(ctx,p,r);
+ else
+ reg_bind(r,p);
+ } else
+ RLOCK(p);
+ return p;
+}
+
+// allocate a register that is not a call parameter
+static preg *alloc_cpu_call( jit_ctx *ctx, vreg *r ) {
+ preg *p = fetch(r);
+ if( p->kind != RCPU ) {
+# ifndef HL_64
+ if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,true);
+ if( r->size > 4 ) ASSERT(r->size);
+# endif
+ p = alloc_reg(ctx, RCPU_CALL);
+ load(ctx,p,r);
+ } else if( is_call_reg(p) ) {
+ preg *p2 = alloc_reg(ctx, RCPU_CALL);
+ op64(ctx,MOV,p2,p);
+ scratch(p);
+ reg_bind(r,p2);
+ return p2;
+ } else
+ RLOCK(p);
+ return p;
+}
+
+static preg *fetch32( jit_ctx *ctx, vreg *r ) {
+ if( r->current )
+ return r->current;
+ // make sure that the register is correctly erased
+ if( r->size < 4 ) {
+ preg *p = alloc_cpu(ctx, r, true);
+ RUNLOCK(p);
+ return p;
+ }
+ return fetch(r);
+}
+
+// make sure higher bits are zeroes
+static preg *alloc_cpu64( jit_ctx *ctx, vreg *r, bool andLoad ) {
+# ifndef HL_64
+ return alloc_cpu(ctx,r,andLoad);
+# else
+ preg *p = fetch(r);
+ if( !andLoad ) ASSERT(0);
+ if( p->kind != RCPU ) {
+ p = alloc_reg(ctx, RCPU);
+ op64(ctx,XOR,p,p);
+ load(ctx,p,r);
+ } else {
+ // remove higher bits
+ preg tmp;
+ op64(ctx,SHL,p,pconst(&tmp,32));
+ op64(ctx,SHR,p,pconst(&tmp,32));
+ RLOCK(p);
+ }
+ return p;
+# endif
+}
+
+// make sure the register can be used with 8 bits access
+static preg *alloc_cpu8( jit_ctx *ctx, vreg *r, bool andLoad ) {
+ preg *p = fetch(r);
+ if( p->kind != RCPU ) {
+ p = alloc_reg(ctx, RCPU_8BITS);
+ load(ctx,p,r);
+ } else if( !is_reg8(p) ) {
+ preg *p2 = alloc_reg(ctx, RCPU_8BITS);
+ op64(ctx,MOV,p2,p);
+ scratch(p);
+ reg_bind(r,p2);
+ return p2;
+ } else
+ RLOCK(p);
+ return p;
+}
+
+static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ) {
+ if( size == 0 || to == from ) return to;
+ switch( ID2(to->kind,from->kind) ) {
+ case ID2(RMEM,RCPU):
+ case ID2(RSTACK,RCPU):
+ case ID2(RCPU,RSTACK):
+ case ID2(RCPU,RMEM):
+ case ID2(RCPU,RCPU):
+# ifndef HL_64
+ case ID2(RCPU,RADDR):
+ case ID2(RADDR,RCPU):
+# endif
+ switch( size ) {
+ case 1:
+ if( to->kind == RCPU ) {
+ op64(ctx,XOR,to,to);
+ if( !is_reg8(to) ) {
+ preg p;
+ op32(ctx,MOV16,to,from);
+ op32(ctx,SHL,to,pconst(&p,24));
+ op32(ctx,SHR,to,pconst(&p,24));
+ break;
+ }
+ }
+ if( !is_reg8(from) ) {
+ preg *r = alloc_reg(ctx, RCPU_CALL);
+ op32(ctx, MOV, r, from);
+ RUNLOCK(r);
+ op32(ctx,MOV8,to,r);
+ return from;
+ }
+ op32(ctx,MOV8,to,from);
+ break;
+ case 2:
+ if( to->kind == RCPU )
+ op64(ctx,XOR,to,to);
+ op32(ctx,MOV16,to,from);
+ break;
+ case 4:
+ op32(ctx,MOV,to,from);
+ break;
+ case 8:
+ if( IS_64 ) {
+ op64(ctx,MOV,to,from);
+ break;
+ }
+ default:
+ ASSERT(size);
+ }
+ return to->kind == RCPU ? to : from;
+ case ID2(RFPU,RFPU):
+ case ID2(RMEM,RFPU):
+ case ID2(RSTACK,RFPU):
+ case ID2(RFPU,RMEM):
+ case ID2(RFPU,RSTACK):
+ switch( size ) {
+ case 8:
+ op64(ctx,MOVSD,to,from);
+ break;
+ case 4:
+ op32(ctx,MOVSS,to,from);
+ break;
+ default:
+ ASSERT(size);
+ }
+ return to->kind == RFPU ? to : from;
+ case ID2(RMEM,RSTACK):
+ {
+ vreg *rfrom = R(from->id);
+ if( IS_FLOAT(rfrom) )
+ return copy(ctx,to,alloc_fpu(ctx,rfrom,true),size);
+ return copy(ctx,to,alloc_cpu(ctx,rfrom,true),size);
+ }
+ case ID2(RMEM,RMEM):
+ case ID2(RSTACK,RMEM):
+ case ID2(RSTACK,RSTACK):
+# ifndef HL_64
+ case ID2(RMEM,RADDR):
+ case ID2(RSTACK,RADDR):
+ case ID2(RADDR,RSTACK):
+# endif
+ {
+ preg *tmp;
+ if( (!IS_64 && size == 8) || (to->kind == RSTACK && IS_FLOAT(R(to->id))) || (from->kind == RSTACK && IS_FLOAT(R(from->id))) ) {
+ tmp = alloc_reg(ctx, RFPU);
+ op64(ctx,size == 8 ? MOVSD : MOVSS,tmp,from);
+ } else {
+ tmp = alloc_reg(ctx, RCPU);
+ copy(ctx,tmp,from,size);
+ }
+ return copy(ctx,to,tmp,size);
+ }
+# ifdef HL_64
+ case ID2(RCPU,RADDR):
+ case ID2(RMEM,RADDR):
+ case ID2(RSTACK,RADDR):
+ {
+ preg p;
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx,MOV,tmp,pconst64(&p,(int_val)from->holds));
+ return copy(ctx,to,pmem(&p,tmp->id,0),size);
+ }
+ case ID2(RADDR,RCPU):
+ case ID2(RADDR,RMEM):
+ case ID2(RADDR,RSTACK):
+ {
+ preg p;
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx,MOV,tmp,pconst64(&p,(int_val)to->holds));
+ return copy(ctx,pmem(&p,tmp->id,0),from,size);
+ }
+# endif
+ default:
+ break;
+ }
+ printf("copy(%s,%s)\n",KNAMES[to->kind], KNAMES[from->kind]);
+ ASSERT(0);
+ return NULL;
+}
+
+static void store( jit_ctx *ctx, vreg *r, preg *v, bool bind ) {
+ if( r->current && r->current != v ) {
+ r->current->holds = NULL;
+ r->current = NULL;
+ }
+ v = copy(ctx,&r->stack,v,r->size);
+ if( IS_FLOAT(r) != (v->kind == RFPU) )
+ ASSERT(0);
+ if( bind && r->current != v && (v->kind == RCPU || v->kind == RFPU) ) {
+ scratch(v);
+ r->current = v;
+ v->holds = r;
+ }
+}
+
+static void store_result( jit_ctx *ctx, vreg *r ) {
+# ifndef HL_64
+ switch( r->t->kind ) {
+ case HF64:
+ scratch(r->current);
+ op64(ctx,FSTP,&r->stack,UNUSED);
+ break;
+ case HF32:
+ scratch(r->current);
+ op64(ctx,FSTP32,&r->stack,UNUSED);
+ break;
+ case HI64:
+ scratch(r->current);
+ error_i64();
+ break;
+ default:
+# endif
+ store(ctx,r,IS_FLOAT(r) ? REG_AT(XMM(0)) : PEAX,true);
+# ifndef HL_64
+ break;
+ }
+# endif
+}
+
+static void op_mov( jit_ctx *ctx, vreg *to, vreg *from ) {
+ preg *r = fetch(from);
+# ifndef HL_64
+ if( to->t->kind == HI64 ) {
+ error_i64();
+ return;
+ }
+# endif
+ if( from->t->kind == HF32 && r->kind != RFPU )
+ r = alloc_fpu(ctx,from,true);
+ store(ctx, to, r, true);
+}
+
+static void copy_to( jit_ctx *ctx, vreg *to, preg *from ) {
+ store(ctx,to,from,true);
+}
+
+static void copy_from( jit_ctx *ctx, preg *to, vreg *from ) {
+ copy(ctx,to,fetch(from),from->size);
+}
+
+static void store_const( jit_ctx *ctx, vreg *r, int c ) {
+ preg p;
+ if( c == 0 )
+ op(ctx,XOR,alloc_cpu(ctx,r,false),alloc_cpu(ctx,r,false),r->size == 8);
+ else if( r->size == 8 )
+ op64(ctx,MOV,alloc_cpu(ctx,r,false),pconst64(&p,c));
+ else
+ op32(ctx,MOV,alloc_cpu(ctx,r,false),pconst(&p,c));
+ store(ctx,r,r->current,false);
+}
+
+static void discard_regs( jit_ctx *ctx, bool native_call ) {
+ int i;
+ for(i=0;ipregs + RCPU_SCRATCH_REGS[i];
+ if( r->holds ) {
+ r->holds->current = NULL;
+ r->holds = NULL;
+ }
+ }
+ for(i=0;ipregs + XMM(i);
+ if( r->holds ) {
+ r->holds->current = NULL;
+ r->holds = NULL;
+ }
+ }
+}
+
+static int pad_before_call( jit_ctx *ctx, int size ) {
+ int total = size + ctx->totalRegsSize + HL_WSIZE * 2; // EIP+EBP
+ if( total & 15 ) {
+ int pad = 16 - (total & 15);
+ preg p;
+ if( pad ) op64(ctx,SUB,PESP,pconst(&p,pad));
+ size += pad;
+ }
+ return size;
+}
+
+static void push_reg( jit_ctx *ctx, vreg *r ) {
+ preg p;
+ switch( stack_size(r->t) ) {
+ case 1:
+ op64(ctx,SUB,PESP,pconst(&p,1));
+ op32(ctx,MOV8,pmem(&p,Esp,0),alloc_cpu8(ctx,r,true));
+ break;
+ case 2:
+ op64(ctx,SUB,PESP,pconst(&p,2));
+ op32(ctx,MOV16,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
+ break;
+ case 4:
+ if( r->size < 4 )
+ alloc_cpu(ctx,r,true); // force fetch (higher bits set to 0)
+ if( !IS_64 ) {
+ if( r->current != NULL && r->current->kind == RFPU ) scratch(r->current);
+ op32(ctx,PUSH,fetch(r),UNUSED);
+ } else {
+ // pseudo push32 (not available)
+ op64(ctx,SUB,PESP,pconst(&p,4));
+ op32(ctx,MOV,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
+ }
+ break;
+ case 8:
+ if( fetch(r)->kind == RFPU ) {
+ op64(ctx,SUB,PESP,pconst(&p,8));
+ op64(ctx,MOVSD,pmem(&p,Esp,0),fetch(r));
+ } else if( IS_64 )
+ op64(ctx,PUSH,fetch(r),UNUSED);
+ else if( r->stack.kind == RSTACK ) {
+ scratch(r->current);
+ r->stackPos += 4;
+ op32(ctx,PUSH,&r->stack,UNUSED);
+ r->stackPos -= 4;
+ op32(ctx,PUSH,&r->stack,UNUSED);
+ } else
+ ASSERT(0);
+ break;
+ default:
+ ASSERT(r->size);
+ }
+}
+
+static int begin_native_call( jit_ctx *ctx, int nargs ) {
+ ctx->nativeArgsCount = nargs;
+ return pad_before_call(ctx, nargs > CALL_NREGS ? (nargs - CALL_NREGS) * HL_WSIZE : 0);
+}
+
+static preg *alloc_native_arg( jit_ctx *ctx ) {
+# ifdef HL_64
+ int rid = ctx->nativeArgsCount - 1;
+ preg *r = rid < CALL_NREGS ? REG_AT(CALL_REGS[rid]) : alloc_reg(ctx,RCPU_CALL);
+ scratch(r);
+ return r;
+# else
+ return alloc_reg(ctx, RCPU);
+# endif
+}
+
+static void set_native_arg( jit_ctx *ctx, preg *r ) {
+ if( r->kind == RSTACK ) {
+ vreg *v = ctx->vregs + r->id;
+ if( v->size < 4 )
+ r = fetch32(ctx, v);
+ }
+# ifdef HL_64
+ if( r->kind == RFPU ) ASSERT(0);
+ int rid = --ctx->nativeArgsCount;
+ preg *target;
+ if( rid >= CALL_NREGS ) {
+ op64(ctx,PUSH,r,UNUSED);
+ return;
+ }
+ target = REG_AT(CALL_REGS[rid]);
+ if( target != r ) {
+ op64(ctx, MOV, target, r);
+ scratch(target);
+ }
+# else
+ op32(ctx,PUSH,r,UNUSED);
+# endif
+}
+
+static void set_native_arg_fpu( jit_ctx *ctx, preg *r, bool isf32 ) {
+# ifdef HL_64
+ if( r->kind == RCPU ) ASSERT(0);
+ // can only be used if last argument !!
+ ctx->nativeArgsCount--;
+ preg *target = REG_AT(XMM(IS_WINCALL64 ? ctx->nativeArgsCount : 0));
+ if( target != r ) {
+ op64(ctx, isf32 ? MOVSS : MOVSD, target, r);
+ scratch(target);
+ }
+# else
+ op32(ctx,PUSH,r,UNUSED);
+# endif
+}
+
+typedef struct {
+ int nextCpu;
+ int nextFpu;
+ int mapped[REG_COUNT];
+} call_regs;
+
+static int select_call_reg( call_regs *regs, hl_type *t, int id ) {
+# ifndef HL_64
+ return -1;
+#else
+ bool isFloat = t->kind == HF32 || t->kind == HF64;
+# ifdef HL_WIN_CALL
+ int index = regs->nextCpu++;
+# else
+ int index = isFloat ? regs->nextFpu++ : regs->nextCpu++;
+# endif
+ if( index >= CALL_NREGS )
+ return -1;
+ int reg = isFloat ? XMM(index) : CALL_REGS[index];
+ regs->mapped[reg] = id + 1;
+ return reg;
+#endif
+}
+
+static int mapped_reg( call_regs *regs, int id ) {
+# ifndef HL_64
+ return -1;
+#else
+ int i;
+ for(i=0;imapped[r] == id + 1 ) return r;
+ r = XMM(i);
+ if( regs->mapped[r] == id + 1 ) return r;
+ }
+ return -1;
+#endif
+}
+
+static int prepare_call_args( jit_ctx *ctx, int count, int *args, vreg *vregs, int extraSize ) {
+ int i;
+ int size = extraSize, paddedSize;
+ call_regs ctmp = {0};
+ for(i=0;it, i);
+ if( cr >= 0 ) {
+ preg *c = REG_AT(cr);
+ preg *cur = fetch(r);
+ if( cur != c ) {
+ copy(ctx,c,cur,r->size);
+ scratch(c);
+ }
+ RLOCK(c);
+ continue;
+ }
+ size += stack_size(r->t);
+ }
+ paddedSize = pad_before_call(ctx,size);
+ for(i=0;i= 0 ) continue;
+ push_reg(ctx,r);
+ if( r->current ) RUNLOCK(r->current);
+ }
+ return paddedSize;
+}
+
+static void op_call( jit_ctx *ctx, preg *r, int size ) {
+ preg p;
+# ifdef JIT_DEBUG
+ if( IS_64 && size >= 0 ) {
+ int jchk;
+ op32(ctx,TEST,PESP,pconst(&p,15));
+ XJump(JZero,jchk);
+ BREAK(); // unaligned ESP
+ patch_jump(ctx, jchk);
+ }
+# endif
+ if( IS_WINCALL64 ) {
+ // MSVC requires 32bytes of free space here
+ op64(ctx,SUB,PESP,pconst(&p,32));
+ if( size >= 0 ) size += 32;
+ }
+ op32(ctx, CALL, r, UNUSED);
+ if( size > 0 ) op64(ctx,ADD,PESP,pconst(&p,size));
+}
+
+static void call_native( jit_ctx *ctx, void *nativeFun, int size ) {
+ bool isExc = nativeFun == hl_assert || nativeFun == hl_throw || nativeFun == on_jit_error;
+ preg p;
+ // native function, already resolved
+ op64(ctx,MOV,PEAX,pconst64(&p,(int_val)nativeFun));
+ op_call(ctx,PEAX, isExc ? -1 : size);
+ if( isExc )
+ return;
+ discard_regs(ctx, true);
+}
+
+static void op_call_fun( jit_ctx *ctx, vreg *dst, int findex, int count, int *args ) {
+ int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex];
+ bool isNative = fid >= ctx->m->code->nfunctions;
+ int size = prepare_call_args(ctx,count,args,ctx->vregs,0);
+ preg p;
+ if( fid < 0 ) {
+ ASSERT(fid);
+ } else if( isNative ) {
+ call_native(ctx,ctx->m->functions_ptrs[findex],size);
+ } else {
+ int cpos = BUF_POS() + (IS_WINCALL64 ? 4 : 0);
+# ifdef JIT_DEBUG
+ if( IS_64 ) cpos += 13; // ESP CHECK
+# endif
+ if( ctx->m->functions_ptrs[findex] ) {
+ // already compiled
+ op_call(ctx,pconst(&p,(int)(int_val)ctx->m->functions_ptrs[findex] - (cpos + 5)), size);
+ } else if( ctx->m->code->functions + fid == ctx->f ) {
+ // our current function
+ op_call(ctx,pconst(&p, ctx->functionPos - (cpos + 5)), size);
+ } else {
+ // stage for later
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ j->pos = cpos;
+ j->target = findex;
+ j->next = ctx->calls;
+ ctx->calls = j;
+ op_call(ctx,pconst(&p,0), size);
+ }
+ discard_regs(ctx, false);
+ }
+ if( dst )
+ store_result(ctx,dst);
+}
+
+static void op_enter( jit_ctx *ctx ) {
+ preg p;
+ op64(ctx, PUSH, PEBP, UNUSED);
+ op64(ctx, MOV, PEBP, PESP);
+ if( ctx->totalRegsSize ) op64(ctx, SUB, PESP, pconst(&p,ctx->totalRegsSize));
+}
+
+static void op_ret( jit_ctx *ctx, vreg *r ) {
+ preg p;
+ switch( r->t->kind ) {
+ case HF32:
+# ifdef HL_64
+ op64(ctx, MOVSS, PXMM(0), fetch(r));
+# else
+ op64(ctx,FLD32,&r->stack,UNUSED);
+# endif
+ break;
+ case HF64:
+# ifdef HL_64
+ op64(ctx, MOVSD, PXMM(0), fetch(r));
+# else
+ op64(ctx,FLD,&r->stack,UNUSED);
+# endif
+ break;
+ default:
+ if( r->size < 4 && !r->current )
+ fetch32(ctx, r);
+ if( r->current != PEAX )
+ op64(ctx,MOV,PEAX,fetch(r));
+ break;
+ }
+ if( ctx->totalRegsSize ) op64(ctx, ADD, PESP, pconst(&p, ctx->totalRegsSize));
+# ifdef JIT_DEBUG
+ {
+ int jeq;
+ op64(ctx, CMP, PESP, PEBP);
+ XJump_small(JEq,jeq);
+ jit_error("invalid ESP");
+ patch_jump(ctx,jeq);
+ }
+# endif
+ op64(ctx, POP, PEBP, UNUSED);
+ op64(ctx, RET, UNUSED, UNUSED);
+}
+
+static void call_native_consts( jit_ctx *ctx, void *nativeFun, int_val *args, int nargs ) {
+ int size = pad_before_call(ctx, IS_64 ? 0 : HL_WSIZE*nargs);
+ preg p;
+ int i;
+# ifdef HL_64
+ for(i=0;i=0;i--)
+ op32(ctx, PUSH, pconst64(&p, args[i]), UNUSED);
+# endif
+ call_native(ctx, nativeFun, size);
+}
+
+static void on_jit_error( const char *msg, int_val line ) {
+ char buf[256];
+ int iline = (int)line;
+ sprintf(buf,"%s (line %d)",msg,iline);
+#ifdef HL_WIN_DESKTOP
+ MessageBoxA(NULL,buf,"JIT ERROR",MB_OK);
+#else
+ printf("JIT ERROR : %s\n",buf);
+#endif
+ hl_debug_break();
+ hl_throw(NULL);
+}
+
+static void _jit_error( jit_ctx *ctx, const char *msg, int line ) {
+ int_val args[2] = { (int_val)msg, (int_val)line };
+ call_native_consts(ctx,on_jit_error,args,2);
+}
+
+
+static preg *op_binop( jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op bop ) {
+ preg *pa = fetch(a), *pb = fetch(b), *out = NULL;
+ CpuOp o;
+ if( IS_FLOAT(a) ) {
+ bool isf32 = a->t->kind == HF32;
+ switch( bop ) {
+ case OAdd: o = isf32 ? ADDSS : ADDSD; break;
+ case OSub: o = isf32 ? SUBSS : SUBSD; break;
+ case OMul: o = isf32 ? MULSS : MULSD; break;
+ case OSDiv: o = isf32 ? DIVSS : DIVSD; break;
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJEq:
+ case OJNotEq:
+ case OJNotLt:
+ case OJNotGte:
+ o = isf32 ? COMISS : COMISD;
+ break;
+ case OSMod:
+ {
+ int args[] = { a->stack.id, b->stack.id };
+ int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
+ void *mod_fun;
+ if( isf32 ) mod_fun = fmodf; else mod_fun = fmod;
+ call_native(ctx,mod_fun,size);
+ store_result(ctx,dst);
+ return fetch(dst);
+ }
+ default:
+ printf("%s\n", hl_op_name(bop));
+ ASSERT(bop);
+ }
+ } else {
+ bool is64 = a->t->kind == HI64;
+# ifndef HL_64
+ if( is64 ) {
+ error_i64();
+ return fetch(a);
+ }
+# endif
+ switch( bop ) {
+ case OAdd: o = ADD; break;
+ case OSub: o = SUB; break;
+ case OMul: o = IMUL; break;
+ case OAnd: o = AND; break;
+ case OOr: o = OR; break;
+ case OXor: o = XOR; break;
+ case OShl:
+ case OUShr:
+ case OSShr:
+ if( !b->current || b->current->kind != RCPU || b->current->id != Ecx ) {
+ scratch(REG_AT(Ecx));
+ op(ctx,MOV,REG_AT(Ecx),pb,is64);
+ RLOCK(REG_AT(Ecx));
+ pa = fetch(a);
+ } else
+ RLOCK(b->current);
+ if( pa->kind != RCPU ) {
+ pa = alloc_reg(ctx, RCPU);
+ op(ctx,MOV,pa,fetch(a), is64);
+ }
+ op(ctx,bop == OShl ? SHL : (bop == OUShr ? SHR : SAR), pa, UNUSED,is64);
+ if( dst ) store(ctx, dst, pa, true);
+ return pa;
+ case OSDiv:
+ case OUDiv:
+ case OSMod:
+ case OUMod:
+ {
+ preg *out = bop == OSMod || bop == OUMod ? REG_AT(Edx) : PEAX;
+ preg *r = pb;
+ preg p;
+ int jz, jz1 = 0, jend;
+ if( pa->kind == RCPU && pa->id == Eax ) RLOCK(pa);
+ // ensure b in CPU reg and not in Eax/Edx (for UI8/UI16)
+ if( pb->kind != RCPU || (pb->id == Eax || pb->id == Edx) ) {
+ scratch(REG_AT(Ecx));
+ scratch(pb);
+ load(ctx,REG_AT(Ecx),b);
+ r = REG_AT(Ecx);
+ }
+ // integer div 0 => 0
+ op(ctx,TEST,r,r,is64);
+ XJump_small(JZero, jz);
+ // Prevent MIN/-1 overflow exception
+ // OSMod: r = (b == 0 || b == -1) ? 0 : a % b
+ // OSDiv: r = (b == 0 || b == -1) ? a * b : a / b
+ if( bop == OSMod || bop == OSDiv ) {
+ op(ctx, CMP, r, pconst(&p,-1), is64);
+ XJump_small(JEq, jz1);
+ }
+ pa = fetch(a);
+ if( pa->kind != RCPU || pa->id != Eax ) {
+ scratch(PEAX);
+ scratch(pa);
+ load(ctx,PEAX,a);
+ }
+ scratch(REG_AT(Edx));
+ scratch(REG_AT(Eax));
+ if( bop == OUDiv || bop == OUMod )
+ op(ctx, XOR, REG_AT(Edx), REG_AT(Edx), is64);
+ else
+ op(ctx, CDQ, UNUSED, UNUSED, is64); // sign-extend Eax into Eax:Edx
+ op(ctx, bop == OUDiv || bop == OUMod ? DIV : IDIV, r, UNUSED, is64);
+ XJump_small(JAlways, jend);
+ patch_jump(ctx, jz);
+ patch_jump(ctx, jz1);
+ if( bop != OSDiv ) {
+ op(ctx, XOR, out, out, is64);
+ } else {
+ load(ctx, out, a);
+ op(ctx, IMUL, out, r, is64);
+ }
+ patch_jump(ctx, jend);
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+ }
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJEq:
+ case OJNotEq:
+ switch( a->t->kind ) {
+ case HUI8:
+ case HBOOL:
+ o = CMP8;
+ break;
+ case HUI16:
+ o = CMP16;
+ break;
+ default:
+ o = CMP;
+ break;
+ }
+ break;
+ default:
+ printf("%s\n", hl_op_name(bop));
+ ASSERT(bop);
+ }
+ }
+ switch( RTYPE(a) ) {
+ case HI32:
+ case HUI8:
+ case HUI16:
+ case HBOOL:
+# ifndef HL_64
+ case HDYNOBJ:
+ case HVIRTUAL:
+ case HOBJ:
+ case HSTRUCT:
+ case HFUN:
+ case HMETHOD:
+ case HBYTES:
+ case HNULL:
+ case HENUM:
+ case HDYN:
+ case HTYPE:
+ case HABSTRACT:
+ case HARRAY:
+# endif
+ switch( ID2(pa->kind, pb->kind) ) {
+ case ID2(RCPU,RCPU):
+ case ID2(RCPU,RSTACK):
+ op32(ctx, o, pa, pb);
+ scratch(pa);
+ out = pa;
+ break;
+ case ID2(RSTACK,RCPU):
+ if( dst == a && o != IMUL ) {
+ op32(ctx, o, pa, pb);
+ dst = NULL;
+ out = pa;
+ } else {
+ alloc_cpu(ctx,a, true);
+ return op_binop(ctx,dst,a,b,bop);
+ }
+ break;
+ case ID2(RSTACK,RSTACK):
+ alloc_cpu(ctx, a, true);
+ return op_binop(ctx, dst, a, b, bop);
+ default:
+ printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
+ ASSERT(ID2(pa->kind, pb->kind));
+ }
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+# ifdef HL_64
+ case HOBJ:
+ case HSTRUCT:
+ case HDYNOBJ:
+ case HVIRTUAL:
+ case HFUN:
+ case HMETHOD:
+ case HBYTES:
+ case HNULL:
+ case HENUM:
+ case HDYN:
+ case HTYPE:
+ case HABSTRACT:
+ case HARRAY:
+ case HI64:
+ case HGUID:
+ switch( ID2(pa->kind, pb->kind) ) {
+ case ID2(RCPU,RCPU):
+ case ID2(RCPU,RSTACK):
+ op64(ctx, o, pa, pb);
+ scratch(pa);
+ out = pa;
+ break;
+ case ID2(RSTACK,RCPU):
+ if( dst == a && OP_FORMS[o].mem_r ) {
+ op64(ctx, o, pa, pb);
+ dst = NULL;
+ out = pa;
+ } else {
+ alloc_cpu(ctx,a, true);
+ return op_binop(ctx,dst,a,b,bop);
+ }
+ break;
+ case ID2(RSTACK,RSTACK):
+ alloc_cpu(ctx, a, true);
+ return op_binop(ctx, dst, a, b, bop);
+ default:
+ printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
+ ASSERT(ID2(pa->kind, pb->kind));
+ }
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+# endif
+ case HF64:
+ case HF32:
+ pa = alloc_fpu(ctx, a, true);
+ pb = alloc_fpu(ctx, b, true);
+ switch( ID2(pa->kind, pb->kind) ) {
+ case ID2(RFPU,RFPU):
+ op64(ctx,o,pa,pb);
+ if( (o == COMISD || o == COMISS) && bop != OJSGt ) {
+ int jnotnan;
+ XJump_small(JNParity,jnotnan);
+ switch( bop ) {
+ case OJSLt:
+ case OJNotLt:
+ {
+ preg *r = alloc_reg(ctx,RCPU);
+ // set CF=0, ZF=1
+ op64(ctx,XOR,r,r);
+ RUNLOCK(r);
+ break;
+ }
+ case OJSGte:
+ case OJNotGte:
+ {
+ preg *r = alloc_reg(ctx,RCPU);
+ // set ZF=0, CF=1
+ op64(ctx,XOR,r,r);
+ op64(ctx,CMP,r,PESP);
+ RUNLOCK(r);
+ break;
+ }
+ break;
+ case OJNotEq:
+ case OJEq:
+ // set ZF=0, CF=?
+ case OJSLte:
+ // set ZF=0, CF=0
+ op64(ctx,TEST,PESP,PESP);
+ break;
+ default:
+ ASSERT(bop);
+ }
+ patch_jump(ctx,jnotnan);
+ }
+ scratch(pa);
+ out = pa;
+ break;
+ default:
+ printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
+ ASSERT(ID2(pa->kind, pb->kind));
+ }
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+ default:
+ ASSERT(RTYPE(a));
+ }
+ return NULL;
+}
+
+static int do_jump( jit_ctx *ctx, hl_op op, bool isFloat ) {
+ int j;
+ switch( op ) {
+ case OJAlways:
+ XJump(JAlways,j);
+ break;
+ case OJSGte:
+ XJump(isFloat ? JUGte : JSGte,j);
+ break;
+ case OJSGt:
+ XJump(isFloat ? JUGt : JSGt,j);
+ break;
+ case OJUGte:
+ XJump(JUGte,j);
+ break;
+ case OJSLt:
+ XJump(isFloat ? JULt : JSLt,j);
+ break;
+ case OJSLte:
+ XJump(isFloat ? JULte : JSLte,j);
+ break;
+ case OJULt:
+ XJump(JULt,j);
+ break;
+ case OJEq:
+ XJump(JEq,j);
+ break;
+ case OJNotEq:
+ XJump(JNeq,j);
+ break;
+ case OJNotLt:
+ XJump(JUGte,j);
+ break;
+ case OJNotGte:
+ XJump(JULt,j);
+ break;
+ default:
+ j = 0;
+ printf("Unknown JUMP %d\n",op);
+ break;
+ }
+ return j;
+}
+
+static void register_jump( jit_ctx *ctx, int pos, int target ) {
+ jlist *j = (jlist*)hl_malloc(&ctx->falloc, sizeof(jlist));
+ j->pos = pos;
+ j->target = target;
+ j->next = ctx->jumps;
+ ctx->jumps = j;
+ if( target != 0 && ctx->opsPos[target] == 0 )
+ ctx->opsPos[target] = -1;
+}
+
+#define HDYN_VALUE 8
+
+static void dyn_value_compare( jit_ctx *ctx, preg *a, preg *b, hl_type *t ) {
+ preg p;
+ switch( t->kind ) {
+ case HUI8:
+ case HBOOL:
+ op32(ctx,MOV8,a,pmem(&p,a->id,HDYN_VALUE));
+ op32(ctx,MOV8,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP8,a,b);
+ break;
+ case HUI16:
+ op32(ctx,MOV16,a,pmem(&p,a->id,HDYN_VALUE));
+ op32(ctx,MOV16,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP16,a,b);
+ break;
+ case HI32:
+ op32(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
+ op32(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP,a,b);
+ break;
+ case HF32:
+ {
+ preg *fa = alloc_reg(ctx, RFPU);
+ preg *fb = alloc_reg(ctx, RFPU);
+ op64(ctx,MOVSS,fa,pmem(&p,a->id,HDYN_VALUE));
+ op64(ctx,MOVSS,fb,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,COMISD,fa,fb);
+ }
+ break;
+ case HF64:
+ {
+ preg *fa = alloc_reg(ctx, RFPU);
+ preg *fb = alloc_reg(ctx, RFPU);
+ op64(ctx,MOVSD,fa,pmem(&p,a->id,HDYN_VALUE));
+ op64(ctx,MOVSD,fb,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,COMISD,fa,fb);
+ }
+ break;
+ case HI64:
+ default:
+ // ptr comparison
+ op64(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
+ op64(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP,a,b);
+ break;
+ }
+}
+
+static void op_jump( jit_ctx *ctx, vreg *a, vreg *b, hl_opcode *op, int targetPos ) {
+ if( a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN ) {
+ int args[] = { a->stack.id, b->stack.id };
+ int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
+ call_native(ctx,hl_dyn_compare,size);
+ if( op->op == OJSGt || op->op == OJSGte ) {
+ preg p;
+ int jinvalid;
+ op32(ctx,CMP,PEAX,pconst(&p,hl_invalid_comparison));
+ XJump_small(JEq,jinvalid);
+ op32(ctx,TEST,PEAX,PEAX);
+ register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
+ patch_jump(ctx,jinvalid);
+ return;
+ }
+ op32(ctx,TEST,PEAX,PEAX);
+ } else switch( a->t->kind ) {
+ case HTYPE:
+ {
+ int args[] = { a->stack.id, b->stack.id };
+ int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
+ preg p;
+ call_native(ctx,hl_same_type,size);
+ op64(ctx,CMP8,PEAX,pconst(&p,1));
+ }
+ break;
+ case HNULL:
+ {
+ preg *pa = hl_type_size(a->t->tparam) == 1 ? alloc_cpu8(ctx,a,true) : alloc_cpu(ctx,a,true);
+ preg *pb = hl_type_size(b->t->tparam) == 1 ? alloc_cpu8(ctx,b,true) : alloc_cpu(ctx,b,true);
+ if( op->op == OJEq ) {
+ // if( a == b || (a && b && a->v == b->v) ) goto
+ int ja, jb;
+ // if( a != b && (!a || !b || a->v != b->v) ) goto
+ op64(ctx,CMP,pa,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ dyn_value_compare(ctx,pa,pb,a->t->tparam);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ scratch(pa);
+ scratch(pb);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ } else if( op->op == OJNotEq ) {
+ int jeq, jcmp;
+ // if( a != b && (!a || !b || a->v != b->v) ) goto
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jeq);
+ op64(ctx,TEST,pa,pa);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,TEST,pb,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ dyn_value_compare(ctx,pa,pb,a->t->tparam);
+ XJump_small(JZero,jcmp);
+ scratch(pa);
+ scratch(pb);
+ register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
+ patch_jump(ctx,jcmp);
+ patch_jump(ctx,jeq);
+ } else
+ ASSERT(op->op);
+ return;
+ }
+ case HVIRTUAL:
+ {
+ preg p;
+ preg *pa = alloc_cpu(ctx,a,true);
+ preg *pb = alloc_cpu(ctx,b,true);
+ int ja,jb,jav,jbv,jvalue;
+ if( b->t->kind == HOBJ ) {
+ if( op->op == OJEq ) {
+ // if( a ? (b && a->value == b) : (b == NULL) ) goto
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JAlways,jvalue);
+ patch_jump(ctx,ja);
+ op64(ctx,TEST,pb,pb);
+ patch_jump(ctx,jvalue);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ patch_jump(ctx,jb);
+ } else if( op->op == OJNotEq ) {
+ // if( a ? (b == NULL || a->value != b) : (b != NULL) ) goto
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JAlways,jvalue);
+ patch_jump(ctx,ja);
+ op64(ctx,TEST,pb,pb);
+ patch_jump(ctx,jvalue);
+ register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
+ } else
+ ASSERT(op->op);
+ scratch(pa);
+ return;
+ }
+ op64(ctx,CMP,pa,pb);
+ if( op->op == OJEq ) {
+ // if( a == b || (a && b && a->value && b->value && a->value == b->value) ) goto
+ register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,jav);
+ op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jbv);
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JNeq,jvalue);
+ register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jav);
+ patch_jump(ctx,jbv);
+ patch_jump(ctx,jvalue);
+ } else if( op->op == OJNotEq ) {
+ int jnext;
+ // if( a != b && (!a || !b || !a->value || !b->value || a->value != b->value) ) goto
+ XJump_small(JEq,jnext);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,jav);
+ op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jbv);
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jvalue);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jav);
+ patch_jump(ctx,jbv);
+ register_jump(ctx,do_jump(ctx,OJAlways, false),targetPos);
+ patch_jump(ctx,jnext);
+ patch_jump(ctx,jvalue);
+ } else
+ ASSERT(op->op);
+ scratch(pa);
+ scratch(pb);
+ return;
+ }
+ break;
+ case HOBJ:
+ case HSTRUCT:
+ if( b->t->kind == HVIRTUAL ) {
+ op_jump(ctx,b,a,op,targetPos); // inverse
+ return;
+ }
+ if( hl_get_obj_rt(a->t)->compareFun ) {
+ preg *pa = alloc_cpu(ctx,a,true);
+ preg *pb = alloc_cpu(ctx,b,true);
+ preg p;
+ int jeq, ja, jb, jcmp;
+ int args[] = { a->stack.id, b->stack.id };
+ switch( op->op ) {
+ case OJEq:
+ // if( a == b || (a && b && cmp(a,b) == 0) ) goto
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jeq);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
+ op32(ctx,TEST,PEAX,PEAX);
+ XJump_small(JNotZero,jcmp);
+ patch_jump(ctx,jeq);
+ register_jump(ctx,do_jump(ctx,OJAlways,false),targetPos);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jcmp);
+ break;
+ case OJNotEq:
+ // if( a != b && (!a || !b || cmp(a,b) != 0) ) goto
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jeq);
+ op64(ctx,TEST,pa,pa);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,TEST,pb,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+
+ op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
+ op32(ctx,TEST,PEAX,PEAX);
+ XJump_small(JZero,jcmp);
+
+ register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
+ patch_jump(ctx,jcmp);
+ patch_jump(ctx,jeq);
+ break;
+ default:
+ // if( a && b && cmp(a,b) ?? 0 ) goto
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
+ op32(ctx,CMP,PEAX,pconst(&p,0));
+ register_jump(ctx,do_jump(ctx,op->op,false),targetPos);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ break;
+ }
+ return;
+ }
+ // fallthrough
+ default:
+ // make sure we have valid 8 bits registers
+ if( a->size == 1 ) alloc_cpu8(ctx,a,true);
+ if( b->size == 1 ) alloc_cpu8(ctx,b,true);
+ op_binop(ctx,NULL,a,b,op->op);
+ break;
+ }
+ register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
+}
+
+jit_ctx *hl_jit_alloc() {
+ int i;
+ jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx));
+ if( ctx == NULL ) return NULL;
+ memset(ctx,0,sizeof(jit_ctx));
+ hl_alloc_init(&ctx->falloc);
+ hl_alloc_init(&ctx->galloc);
+ for(i=0;iid = i;
+ r->kind = RCPU;
+ }
+ for(i=0;iid = i;
+ r->kind = RFPU;
+ }
+ return ctx;
+}
+
+void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) {
+ free(ctx->vregs);
+ free(ctx->opsPos);
+ free(ctx->startBuf);
+ ctx->maxRegs = 0;
+ ctx->vregs = NULL;
+ ctx->maxOps = 0;
+ ctx->opsPos = NULL;
+ ctx->startBuf = NULL;
+ ctx->bufSize = 0;
+ ctx->buf.b = NULL;
+ ctx->calls = NULL;
+ ctx->switchs = NULL;
+ ctx->closure_list = NULL;
+ hl_free(&ctx->falloc);
+ hl_free(&ctx->galloc);
+ if( !can_reset ) free(ctx);
+}
+
+static void jit_nops( jit_ctx *ctx ) {
+ while( BUF_POS() & 15 )
+ op32(ctx, NOP, UNUSED, UNUSED);
+}
+
+#define MAX_ARGS 16
+
+static void *call_jit_c2hl = NULL;
+static void *call_jit_hl2c = NULL;
+
+static void *callback_c2hl( void *_f, hl_type *t, void **args, vdynamic *ret ) {
+ /*
+ prepare stack and regs according to prepare_call_args, but by reading runtime type information
+ from the function type. The stack and regs will be setup by the trampoline function.
+ */
+ void **f = (void**)_f;
+ unsigned char stack[MAX_ARGS * 8];
+ call_regs cregs = {0};
+ if( t->fun->nargs > MAX_ARGS )
+ hl_error("Too many arguments for dynamic call");
+ int i, size = 0, pad = 0, pos = 0;
+ for(i=0;ifun->nargs;i++) {
+ hl_type *at = t->fun->args[i];
+ int creg = select_call_reg(&cregs,at,i);
+ if( creg >= 0 )
+ continue;
+ size += stack_size(at);
+ }
+ pad = (-size) & 15;
+ size += pad;
+ pos = 0;
+ for(i=0;ifun->nargs;i++) {
+ // RTL
+ hl_type *at = t->fun->args[i];
+ void *v = args[i];
+ int creg = mapped_reg(&cregs,i);
+ void *store;
+ if( creg >= 0 ) {
+ if( REG_IS_FPU(creg) ) {
+ store = stack + size + CALL_NREGS * HL_WSIZE + (creg - XMM(0)) * sizeof(double);
+ } else {
+ store = stack + size + call_reg_index(creg) * HL_WSIZE;
+ }
+ switch( at->kind ) {
+ case HBOOL:
+ case HUI8:
+ *(int_val*)store = *(unsigned char*)v;
+ break;
+ case HUI16:
+ *(int_val*)store = *(unsigned short*)v;
+ break;
+ case HI32:
+ *(int_val*)store = *(int*)v;
+ break;
+ case HF32:
+ *(void**)store = 0;
+ *(float*)store = *(float*)v;
+ break;
+ case HF64:
+ *(double*)store = *(double*)v;
+ break;
+ case HI64:
+ case HGUID:
+ *(int64*)store = *(int64*)v;
+ break;
+ default:
+ *(void**)store = v;
+ break;
+ }
+ } else {
+ int tsize = stack_size(at);
+ store = stack + pos;
+ pos += tsize;
+ switch( at->kind ) {
+ case HBOOL:
+ case HUI8:
+ *(int*)store = *(unsigned char*)v;
+ break;
+ case HUI16:
+ *(int*)store = *(unsigned short*)v;
+ break;
+ case HI32:
+ case HF32:
+ *(int*)store = *(int*)v;
+ break;
+ case HF64:
+ *(double*)store = *(double*)v;
+ break;
+ case HI64:
+ case HGUID:
+ *(int64*)store = *(int64*)v;
+ break;
+ default:
+ *(void**)store = v;
+ break;
+ }
+ }
+ }
+ pos += pad;
+ pos >>= IS_64 ? 3 : 2;
+ switch( t->fun->ret->kind ) {
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.i;
+ case HI64:
+ case HGUID:
+ ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.i64;
+ case HF32:
+ ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.f;
+ case HF64:
+ ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.d;
+ default:
+ return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ }
+}
+
+static void jit_c2hl( jit_ctx *ctx ) {
+ // create the function that will be called by callback_c2hl
+ // it will make sure to prepare the stack/regs according to native calling conventions
+ int jeq, jloop, jstart;
+ preg *fptr, *stack, *stend;
+ preg p;
+
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+
+# ifdef HL_64
+
+ fptr = REG_AT(R10);
+ stack = PEAX;
+ stend = REG_AT(R11);
+ op64(ctx, MOV, fptr, REG_AT(CALL_REGS[0]));
+ op64(ctx, MOV, stack, REG_AT(CALL_REGS[1]));
+ op64(ctx, MOV, stend, REG_AT(CALL_REGS[2]));
+
+ // set native call regs
+ int i;
+ for(i=0;iid,i*HL_WSIZE));
+ for(i=0;iid,(i+CALL_NREGS)*HL_WSIZE));
+
+# else
+
+ // make sure the stack is aligned on 16 bytes
+ // the amount of push we will do afterwards is guaranteed to be a multiple of 16bytes by hl_callback
+# ifdef HL_VCC
+ // VCC does not guarantee us an aligned stack...
+ op64(ctx,MOV,PEAX,PESP);
+ op64(ctx,AND,PEAX,pconst(&p,15));
+ op64(ctx,SUB,PESP,PEAX);
+# else
+ op64(ctx,SUB,PESP,pconst(&p,8));
+# endif
+
+ // mov arguments to regs
+ fptr = REG_AT(Eax);
+ stack = REG_AT(Edx);
+ stend = REG_AT(Ecx);
+ op64(ctx,MOV,fptr,pmem(&p,Ebp,HL_WSIZE*2));
+ op64(ctx,MOV,stack,pmem(&p,Ebp,HL_WSIZE*3));
+ op64(ctx,MOV,stend,pmem(&p,Ebp,HL_WSIZE*4));
+
+# endif
+
+ // push stack args
+ jstart = BUF_POS();
+ op64(ctx,CMP,stack,stend);
+ XJump(JEq,jeq);
+ op64(ctx,SUB,stack,pconst(&p,HL_WSIZE));
+ op64(ctx,PUSH,pmem(&p,stack->id,0),UNUSED);
+ XJump(JAlways,jloop);
+ patch_jump(ctx,jeq);
+ patch_jump_to(ctx, jloop, jstart);
+
+ op_call(ctx,fptr,0);
+
+ // cleanup and ret
+ op64(ctx,MOV,PESP,PEBP);
+ op64(ctx,POP,PEBP, UNUSED);
+ op64(ctx,RET,UNUSED,UNUSED);
+}
+
+static vdynamic *jit_wrapper_call( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *args[MAX_ARGS];
+ int i;
+ int nargs = c->cl.t->fun->nargs;
+ call_regs cregs = {0};
+ if( nargs > MAX_ARGS )
+ hl_error("Too many arguments for wrapped call");
+ cregs.nextCpu++; // skip fptr in HL64 - was passed as arg0
+ for(i=0;icl.t->fun->args[i];
+ int creg = select_call_reg(&cregs,t,i);
+ if( creg < 0 ) {
+ args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t);
+ stack_args += stack_size(t);
+ } else if( hl_is_dynamic(t) ) {
+ args[i] = *(vdynamic**)(regs + call_reg_index(creg));
+ } else if( t->kind == HF32 || t->kind == HF64 ) {
+ args[i] = hl_make_dyn(regs + CALL_NREGS + creg - XMM(0),&hlt_f64);
+ } else {
+ args[i] = hl_make_dyn(regs + call_reg_index(creg),t);
+ }
+ }
+ return hl_dyn_call(c->wrappedFun,args,nargs);
+}
+
+static void *jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
+ hl_type *tret = c->cl.t->fun->ret;
+ switch( tret->kind ) {
+ case HVOID:
+ return NULL;
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret);
+ case HI64:
+ case HGUID:
+ return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn);
+ default:
+ return hl_dyn_castp(&ret,&hlt_dyn,tret);
+ }
+}
+
+static double jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
+ return hl_dyn_castd(&ret,&hlt_dyn);
+}
+
+static void jit_hl2c( jit_ctx *ctx ) {
+ // create a function that is called with a vclosure_wrapper* and native args
+ // and pack and pass the args to callback_hl2c
+ preg p;
+ int jfloat1, jfloat2, jexit;
+ hl_type_fun *ft = NULL;
+ int size;
+# ifdef HL_64
+ preg *cl = REG_AT(CALL_REGS[0]);
+ preg *tmp = REG_AT(CALL_REGS[1]);
+# else
+ preg *cl = REG_AT(Ecx);
+ preg *tmp = REG_AT(Edx);
+# endif
+
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+
+# ifdef HL_64
+ // push registers
+ int i;
+ op64(ctx,SUB,PESP,pconst(&p,CALL_NREGS*8));
+ for(i=0;it->fun->ret->kind ) {
+ // case HF32: case HF64: return jit_wrapper_d(arg0,&args);
+ // default: return jit_wrapper_ptr(arg0,&args);
+ // }
+ if( !IS_64 )
+ op64(ctx,MOV,cl,pmem(&p,Ebp,HL_WSIZE*2)); // load arg0
+ op64(ctx,MOV,tmp,pmem(&p,cl->id,0)); // ->t
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE)); // ->fun
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,(int)(int_val)&ft->ret)); // ->ret
+ op32(ctx,MOV,tmp,pmem(&p,tmp->id,0)); // -> kind
+
+ op32(ctx,CMP,tmp,pconst(&p,HF64));
+ XJump_small(JEq,jfloat1);
+ op32(ctx,CMP,tmp,pconst(&p,HF32));
+ XJump_small(JEq,jfloat2);
+
+ // 64 bits : ESP + EIP (+WIN64PAD)
+ // 32 bits : ESP + EIP + PARAM0
+ int args_pos = IS_64 ? ((IS_WINCALL64 ? 32 : 0) + HL_WSIZE * 2) : (HL_WSIZE*3);
+
+ size = begin_native_call(ctx,3);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
+ set_native_arg(ctx, tmp);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
+ set_native_arg(ctx, tmp);
+ set_native_arg(ctx, cl);
+ call_native(ctx, jit_wrapper_ptr, size);
+ XJump_small(JAlways, jexit);
+
+ patch_jump(ctx,jfloat1);
+ patch_jump(ctx,jfloat2);
+ size = begin_native_call(ctx,3);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
+ set_native_arg(ctx, tmp);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
+ set_native_arg(ctx, tmp);
+ set_native_arg(ctx, cl);
+ call_native(ctx, jit_wrapper_d, size);
+
+ patch_jump(ctx,jexit);
+ op64(ctx,MOV,PESP,PEBP);
+ op64(ctx,POP,PEBP, UNUSED);
+ op64(ctx,RET,UNUSED,UNUSED);
+}
+
+static void jit_fail( uchar *msg ) {
+ if( msg == NULL ) {
+ hl_debug_break();
+ msg = USTR("assert");
+ }
+ vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
+ d->v.ptr = msg;
+ hl_throw(d);
+}
+
+static void jit_null_access( jit_ctx *ctx ) {
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+ int_val arg = (int_val)USTR("Null access");
+ call_native_consts(ctx, jit_fail, &arg, 1);
+}
+
+static void jit_null_fail( int fhash ) {
+ vbyte *field = hl_field_name(fhash);
+ hl_buffer *b = hl_alloc_buffer();
+ hl_buffer_str(b, USTR("Null access ."));
+ hl_buffer_str(b, (uchar*)field);
+ vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
+ d->v.ptr = hl_buffer_content(b,NULL);
+ hl_throw(d);
+}
+
+static void jit_null_field_access( jit_ctx *ctx ) {
+ preg p;
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+ int size = begin_native_call(ctx, 1);
+ int args_pos = (IS_WINCALL64 ? 32 : 0) + HL_WSIZE*2;
+ set_native_arg(ctx, pmem(&p,Ebp,args_pos));
+ call_native(ctx,jit_null_fail,size);
+}
+
+static void jit_assert( jit_ctx *ctx ) {
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+ int_val arg = 0;
+ call_native_consts(ctx, jit_fail, &arg, 1);
+}
+
+static int jit_build( jit_ctx *ctx, void (*fbuild)( jit_ctx *) ) {
+ int pos;
+ jit_buf(ctx);
+ jit_nops(ctx);
+ pos = BUF_POS();
+ fbuild(ctx);
+ int endPos = BUF_POS();
+ jit_nops(ctx);
+#ifdef WIN64_UNWIND_TABLES
+ int fid = ctx->nunwind++;
+ ctx->unwind_table[fid].BeginAddress = pos;
+ ctx->unwind_table[fid].EndAddress = endPos;
+ ctx->unwind_table[fid].UnwindData = ctx->unwind_offset;
+#endif
+ return pos;
+}
+
+static void hl_jit_init_module( jit_ctx *ctx, hl_module *m ) {
+ int i;
+ ctx->m = m;
+ if( m->code->hasdebug ) {
+ ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions);
+ memset(ctx->debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions);
+ }
+ for(i=0;icode->nfloats;i++) {
+ jit_buf(ctx);
+ *ctx->buf.d++ = m->code->floats[i];
+ }
+#ifdef WIN64_UNWIND_TABLES
+ jit_buf(ctx);
+ ctx->unwind_offset = BUF_POS();
+ write_unwind_data(ctx);
+
+ ctx->unwind_table = malloc(sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
+ memset(ctx->unwind_table, 0, sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
+#endif
+}
+
+void hl_jit_init( jit_ctx *ctx, hl_module *m ) {
+ hl_jit_init_module(ctx,m);
+ ctx->c2hl = jit_build(ctx, jit_c2hl);
+ ctx->hl2c = jit_build(ctx, jit_hl2c);
+ ctx->static_functions[0] = (void*)(int_val)jit_build(ctx,jit_null_access);
+ ctx->static_functions[1] = (void*)(int_val)jit_build(ctx,jit_assert);
+ ctx->static_functions[2] = (void*)(int_val)jit_build(ctx,jit_null_field_access);
+}
+
+void hl_jit_reset( jit_ctx *ctx, hl_module *m ) {
+ ctx->debug = NULL;
+ hl_jit_init_module(ctx,m);
+}
+
+static void *get_dyncast( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_castf;
+ case HF64:
+ return hl_dyn_castd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_casti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_casti;
+ default:
+ return hl_dyn_castp;
+ }
+}
+
+static void *get_dynset( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_setf;
+ case HF64:
+ return hl_dyn_setd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_seti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_seti;
+ default:
+ return hl_dyn_setp;
+ }
+}
+
+static void *get_dynget( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_getf;
+ case HF64:
+ return hl_dyn_getd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_geti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_geti;
+ default:
+ return hl_dyn_getp;
+ }
+}
+
+static double uint_to_double( unsigned int v ) {
+ return v;
+}
+
+static vclosure *alloc_static_closure( jit_ctx *ctx, int fid ) {
+ hl_module *m = ctx->m;
+ vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure));
+ int fidx = m->functions_indexes[fid];
+ c->hasValue = 0;
+ if( fidx >= m->code->nfunctions ) {
+ // native
+ c->t = m->code->natives[fidx - m->code->nfunctions].t;
+ c->fun = m->functions_ptrs[fid];
+ c->value = NULL;
+ } else {
+ c->t = m->code->functions[fidx].type;
+ c->fun = (void*)(int_val)fid;
+ c->value = ctx->closure_list;
+ ctx->closure_list = c;
+ }
+ return c;
+}
+
+static void make_dyn_cast( jit_ctx *ctx, vreg *dst, vreg *v ) {
+ int size;
+ preg p;
+ preg *tmp;
+ if( v->t->kind == HNULL && v->t->tparam->kind == dst->t->kind ) {
+ int jnull, jend;
+ preg *out;
+ switch( dst->t->kind ) {
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ case HI64:
+ case HGUID:
+ tmp = alloc_cpu(ctx, v, true);
+ op64(ctx, TEST, tmp, tmp);
+ XJump_small(JZero, jnull);
+ op64(ctx, MOV, tmp, pmem(&p,tmp->id,8));
+ XJump_small(JAlways, jend);
+ patch_jump(ctx, jnull);
+ op64(ctx, XOR, tmp, tmp);
+ patch_jump(ctx, jend);
+ store(ctx, dst, tmp, true);
+ return;
+ case HF32:
+ case HF64:
+ tmp = alloc_cpu(ctx, v, true);
+ out = alloc_fpu(ctx, dst, false);
+ op64(ctx, TEST, tmp, tmp);
+ XJump_small(JZero, jnull);
+ op64(ctx, dst->t->kind == HF32 ? MOVSS : MOVSD, out, pmem(&p,tmp->id,8));
+ XJump_small(JAlways, jend);
+ patch_jump(ctx, jnull);
+ op64(ctx, XORPD, out, out);
+ patch_jump(ctx, jend);
+ store(ctx, dst, out, true);
+ return;
+ default:
+ break;
+ }
+ }
+ switch( dst->t->kind ) {
+ case HF32:
+ case HF64:
+ case HI64:
+ case HGUID:
+ size = begin_native_call(ctx, 2);
+ set_native_arg(ctx, pconst64(&p,(int_val)v->t));
+ break;
+ default:
+ size = begin_native_call(ctx, 3);
+ set_native_arg(ctx, pconst64(&p,(int_val)dst->t));
+ set_native_arg(ctx, pconst64(&p,(int_val)v->t));
+ break;
+ }
+ tmp = alloc_native_arg(ctx);
+ op64(ctx,MOV,tmp,REG_AT(Ebp));
+ if( v->stackPos >= 0 )
+ op64(ctx,ADD,tmp,pconst(&p,v->stackPos));
+ else
+ op64(ctx,SUB,tmp,pconst(&p,-v->stackPos));
+ set_native_arg(ctx,tmp);
+ call_native(ctx,get_dyncast(dst->t),size);
+ store_result(ctx, dst);
+}
+
+int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
+ int i, size = 0, opCount;
+ int codePos = BUF_POS();
+ int nargs = f->type->fun->nargs;
+ unsigned short *debug16 = NULL;
+ int *debug32 = NULL;
+ call_regs cregs = {0};
+ hl_thread_info *tinf = NULL;
+ preg p;
+ ctx->f = f;
+ ctx->allocOffset = 0;
+ if( f->nregs > ctx->maxRegs ) {
+ free(ctx->vregs);
+ ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1));
+ if( ctx->vregs == NULL ) {
+ ctx->maxRegs = 0;
+ return -1;
+ }
+ ctx->maxRegs = f->nregs;
+ }
+ if( f->nops > ctx->maxOps ) {
+ free(ctx->opsPos);
+ ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1));
+ if( ctx->opsPos == NULL ) {
+ ctx->maxOps = 0;
+ return -1;
+ }
+ ctx->maxOps = f->nops;
+ }
+ memset(ctx->opsPos,0,(f->nops+1)*sizeof(int));
+ for(i=0;inregs;i++) {
+ vreg *r = R(i);
+ r->t = f->regs[i];
+ r->size = hl_type_size(r->t);
+ r->current = NULL;
+ r->stack.holds = NULL;
+ r->stack.id = i;
+ r->stack.kind = RSTACK;
+ }
+ size = 0;
+ int argsSize = 0;
+ for(i=0;it,i);
+ if( creg < 0 || IS_WINCALL64 ) {
+ // use existing stack storage
+ r->stackPos = argsSize + HL_WSIZE * 2;
+ argsSize += stack_size(r->t);
+ } else {
+ // make room in local vars
+ size += r->size;
+ size += hl_pad_size(size,r->t);
+ r->stackPos = -size;
+ }
+ }
+ for(i=nargs;inregs;i++) {
+ vreg *r = R(i);
+ size += r->size;
+ size += hl_pad_size(size,r->t); // align local vars
+ r->stackPos = -size;
+ }
+# ifdef HL_64
+ size += (-size) & 15; // align on 16 bytes
+# else
+ size += hl_pad_size(size,&hlt_dyn); // align on word size
+# endif
+ ctx->totalRegsSize = size;
+ jit_buf(ctx);
+ ctx->functionPos = BUF_POS();
+ // make sure currentPos is > 0 before any reg allocations happen
+ // otherwise `alloc_reg` thinks that all registers are locked
+ ctx->currentPos = 1;
+ op_enter(ctx);
+# ifdef HL_64
+ {
+ // store in local var
+ for(i=0;isize);
+ p->holds = r;
+ r->current = p;
+ }
+ }
+# endif
+ if( ctx->m->code->hasdebug ) {
+ debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1));
+ debug16[0] = (unsigned short)(BUF_POS() - codePos);
+ }
+ ctx->opsPos[0] = BUF_POS();
+
+ for(opCount=0;opCountnops;opCount++) {
+ int jump;
+ hl_opcode *o = f->ops + opCount;
+ vreg *dst = R(o->p1);
+ vreg *ra = R(o->p2);
+ vreg *rb = R(o->p3);
+ ctx->currentPos = opCount + 1;
+ jit_buf(ctx);
+# ifdef JIT_DEBUG
+ if( opCount == 0 || f->ops[opCount-1].op != OAsm ) {
+ int uid = opCount + (f->findex<<16);
+ op32(ctx, PUSH, pconst(&p,uid), UNUSED);
+ op64(ctx, ADD, PESP, pconst(&p,HL_WSIZE));
+ }
+# endif
+ // emit code
+ switch( o->op ) {
+ case OMov:
+ case OUnsafeCast:
+ op_mov(ctx, dst, ra);
+ break;
+ case OInt:
+ store_const(ctx, dst, m->code->ints[o->p2]);
+ break;
+ case OBool:
+ store_const(ctx, dst, o->p2);
+ break;
+ case OGetGlobal:
+ {
+ void *addr = m->globals_data + m->globals_indexes[o->p2];
+# ifdef HL_64
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
+ copy_to(ctx, dst, pmem(&p,tmp->id,0));
+# else
+ copy_to(ctx, dst, paddr(&p,addr));
+# endif
+ }
+ break;
+ case OSetGlobal:
+ {
+ void *addr = m->globals_data + m->globals_indexes[o->p1];
+# ifdef HL_64
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
+ copy_from(ctx, pmem(&p,tmp->id,0), ra);
+# else
+ copy_from(ctx, paddr(&p,addr), ra);
+# endif
+ }
+ break;
+ case OCall3:
+ {
+ int args[3] = { o->p3, o->extra[0], o->extra[1] };
+ op_call_fun(ctx, dst, o->p2, 3, args);
+ }
+ break;
+ case OCall4:
+ {
+ int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] };
+ op_call_fun(ctx, dst, o->p2, 4, args);
+ }
+ break;
+ case OCallN:
+ op_call_fun(ctx, dst, o->p2, o->p3, o->extra);
+ break;
+ case OCall0:
+ op_call_fun(ctx, dst, o->p2, 0, NULL);
+ break;
+ case OCall1:
+ op_call_fun(ctx, dst, o->p2, 1, &o->p3);
+ break;
+ case OCall2:
+ {
+ int args[2] = { o->p3, (int)(int_val)o->extra };
+ op_call_fun(ctx, dst, o->p2, 2, args);
+ }
+ break;
+ case OSub:
+ case OAdd:
+ case OMul:
+ case OSDiv:
+ case OUDiv:
+ case OShl:
+ case OSShr:
+ case OUShr:
+ case OAnd:
+ case OOr:
+ case OXor:
+ case OSMod:
+ case OUMod:
+ op_binop(ctx, dst, ra, rb, o->op);
+ break;
+ case ONeg:
+ {
+ if( IS_FLOAT(ra) ) {
+ preg *pa = alloc_reg(ctx,RFPU);
+ preg *pb = alloc_fpu(ctx,ra,true);
+ op64(ctx,XORPD,pa,pa);
+ op64(ctx,ra->t->kind == HF32 ? SUBSS : SUBSD,pa,pb);
+ store(ctx,dst,pa,true);
+ } else if( ra->t->kind == HI64 ) {
+# ifdef HL_64
+ preg *pa = alloc_reg(ctx,RCPU);
+ preg *pb = alloc_cpu(ctx,ra,true);
+ op64(ctx,XOR,pa,pa);
+ op64(ctx,SUB,pa,pb);
+ store(ctx,dst,pa,true);
+# else
+ error_i64();
+# endif
+ } else {
+ preg *pa = alloc_reg(ctx,RCPU);
+ preg *pb = alloc_cpu(ctx,ra,true);
+ op32(ctx,XOR,pa,pa);
+ op32(ctx,SUB,pa,pb);
+ store(ctx,dst,pa,true);
+ }
+ }
+ break;
+ case ONot:
+ {
+ preg *v = alloc_cpu(ctx,ra,true);
+ op32(ctx,XOR,v,pconst(&p,1));
+ store(ctx,dst,v,true);
+ }
+ break;
+ case OJFalse:
+ case OJTrue:
+ case OJNotNull:
+ case OJNull:
+ {
+ preg *r = dst->t->kind == HBOOL ? alloc_cpu8(ctx, dst, true) : alloc_cpu(ctx, dst, true);
+ op64(ctx, dst->t->kind == HBOOL ? TEST8 : TEST, r, r);
+ XJump( o->op == OJFalse || o->op == OJNull ? JZero : JNotZero,jump);
+ register_jump(ctx,jump,(opCount + 1) + o->p2);
+ }
+ break;
+ case OJEq:
+ case OJNotEq:
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJNotLt:
+ case OJNotGte:
+ op_jump(ctx,dst,ra,o,(opCount + 1) + o->p3);
+ break;
+ case OJAlways:
+ jump = do_jump(ctx,o->op,false);
+ register_jump(ctx,jump,(opCount + 1) + o->p1);
+ break;
+ case OToDyn:
+ if( ra->t->kind == HBOOL ) {
+ int size = begin_native_call(ctx, 1);
+ set_native_arg(ctx, fetch(ra));
+ call_native(ctx, hl_alloc_dynbool, size);
+ store(ctx, dst, PEAX, true);
+ } else {
+ int_val rt = (int_val)ra->t;
+ int jskip = 0;
+ if( hl_is_ptr(ra->t) ) {
+ int jnz;
+ preg *a = alloc_cpu(ctx,ra,true);
+ op64(ctx,TEST,a,a);
+ XJump_small(JNotZero,jnz);
+ op64(ctx,XOR,PEAX,PEAX); // will replace the result of alloc_dynamic at jump land
+ XJump_small(JAlways,jskip);
+ patch_jump(ctx,jnz);
+ }
+ call_native_consts(ctx, hl_alloc_dynamic, &rt, 1);
+ // copy value to dynamic
+ if( (IS_FLOAT(ra) || ra->size == 8) && !IS_64 ) {
+ preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
+ op64(ctx,MOV,tmp,&ra->stack);
+ op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
+ if( ra->t->kind == HF64 ) {
+ ra->stackPos += 4;
+ op64(ctx,MOV,tmp,&ra->stack);
+ op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE+4),tmp);
+ ra->stackPos -= 4;
+ }
+ } else {
+ preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
+ copy_from(ctx,tmp,ra);
+ op64(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
+ }
+ if( hl_is_ptr(ra->t) ) patch_jump(ctx,jskip);
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OToSFloat:
+ if( ra == dst ) break;
+ if (ra->t->kind == HI32 || ra->t->kind == HUI16 || ra->t->kind == HUI8) {
+ preg* r = alloc_cpu(ctx, ra, true);
+ preg* w = alloc_fpu(ctx, dst, false);
+ op32(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
+ store(ctx, dst, w, true);
+ } else if (ra->t->kind == HI64 ) {
+ preg* r = alloc_cpu(ctx, ra, true);
+ preg* w = alloc_fpu(ctx, dst, false);
+ op64(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
+ store(ctx, dst, w, true);
+ } else if( ra->t->kind == HF64 && dst->t->kind == HF32 ) {
+ preg *r = alloc_fpu(ctx,ra,true);
+ preg *w = alloc_fpu(ctx,dst,false);
+ op32(ctx,CVTSD2SS,w,r);
+ store(ctx, dst, w, true);
+ } else if( ra->t->kind == HF32 && dst->t->kind == HF64 ) {
+ preg *r = alloc_fpu(ctx,ra,true);
+ preg *w = alloc_fpu(ctx,dst,false);
+ op32(ctx,CVTSS2SD,w,r);
+ store(ctx, dst, w, true);
+ } else
+ ASSERT(0);
+ break;
+ case OToUFloat:
+ {
+ int size;
+ size = prepare_call_args(ctx,1,&o->p2,ctx->vregs,0);
+ call_native(ctx,uint_to_double,size);
+ store_result(ctx,dst);
+ }
+ break;
+ case OToInt:
+ if( ra == dst ) break;
+ if( ra->t->kind == HF64 ) {
+ preg *r = alloc_fpu(ctx,ra,true);
+ preg *w = alloc_cpu(ctx,dst,false);
+ preg *tmp = alloc_reg(ctx,RCPU);
+ op32(ctx,STMXCSR,pmem(&p,Esp,-4),UNUSED);
+ op32(ctx,MOV,tmp,&p);
+ op32(ctx,OR,tmp,pconst(&p,0x6000)); // set round towards 0
+ op32(ctx,MOV,pmem(&p,Esp,-8),tmp);
+ op32(ctx,LDMXCSR,&p,UNUSED);
+ op32(ctx,CVTSD2SI,w,r);
+ op32(ctx,LDMXCSR,pmem(&p,Esp,-4),UNUSED);
+ store(ctx, dst, w, true);
+ } else if (ra->t->kind == HF32) {
+ preg *r = alloc_fpu(ctx, ra, true);
+ preg *w = alloc_cpu(ctx, dst, false);
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op32(ctx, STMXCSR, pmem(&p, Esp, -4), UNUSED);
+ op32(ctx, MOV, tmp, &p);
+ op32(ctx, OR, tmp, pconst(&p, 0x6000)); // set round towards 0
+ op32(ctx, MOV, pmem(&p, Esp, -8), tmp);
+ op32(ctx, LDMXCSR, &p, UNUSED);
+ op32(ctx, CVTSS2SI, w, r);
+ op32(ctx, LDMXCSR, pmem(&p, Esp, -4), UNUSED);
+ store(ctx, dst, w, true);
+ } else if( (dst->t->kind == HI64 || dst->t->kind == HGUID) && ra->t->kind == HI32 ) {
+ if( ra->current != PEAX ) {
+ op32(ctx, MOV, PEAX, fetch(ra));
+ scratch(PEAX);
+ }
+# ifdef HL_64
+ op64(ctx, CDQE, UNUSED, UNUSED); // sign-extend Eax into Rax
+ store(ctx, dst, PEAX, true);
+# else
+ op32(ctx, CDQ, UNUSED, UNUSED); // sign-extend Eax into Eax:Edx
+ scratch(REG_AT(Edx));
+ op32(ctx, MOV, fetch(dst), PEAX);
+ dst->stackPos += 4;
+ op32(ctx, MOV, fetch(dst), REG_AT(Edx));
+ dst->stackPos -= 4;
+ } else if( dst->t->kind == HI32 && ra->t->kind == HI64 ) {
+ error_i64();
+# endif
+ } else {
+ preg *r = alloc_cpu(ctx,dst,false);
+ copy_from(ctx, r, ra);
+ store(ctx, dst, r, true);
+ }
+ break;
+ case ORet:
+ op_ret(ctx, dst);
+ break;
+ case OIncr:
+ {
+ if( IS_FLOAT(dst) ) {
+ ASSERT(0);
+ } else {
+ preg *v = fetch32(ctx,dst);
+ op32(ctx,INC,v,UNUSED);
+ if( v->kind != RSTACK ) store(ctx, dst, v, false);
+ }
+ }
+ break;
+ case ODecr:
+ {
+ if( IS_FLOAT(dst) ) {
+ ASSERT(0);
+ } else {
+ preg *v = fetch32(ctx,dst);
+ op32(ctx,DEC,v,UNUSED);
+ if( v->kind != RSTACK ) store(ctx, dst, v, false);
+ }
+ }
+ break;
+ case OFloat:
+ {
+ if( m->code->floats[o->p2] == 0 ) {
+ preg *f = alloc_fpu(ctx,dst,false);
+ op64(ctx,XORPD,f,f);
+ } else switch( dst->t->kind ) {
+ case HF64:
+ case HF32:
+# ifdef HL_64
+ op64(ctx,dst->t->kind == HF32 ? CVTSD2SS : MOVSD,alloc_fpu(ctx,dst,false),pcodeaddr(&p,o->p2 * 8));
+# else
+ op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),paddr(&p,m->code->floats + o->p2));
+# endif
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ }
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OString:
+ op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)hl_get_ustring(m->code,o->p2)));
+ store(ctx,dst,dst->current,false);
+ break;
+ case OBytes:
+ {
+ char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2];
+ op64(ctx,MOV,alloc_cpu(ctx,dst,false),pconst64(&p,(int_val)b));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case ONull:
+ {
+ op64(ctx,XOR,alloc_cpu(ctx, dst, false),alloc_cpu(ctx, dst, false));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case ONew:
+ {
+ int_val args[] = { (int_val)dst->t };
+ void *allocFun;
+ int nargs = 1;
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ allocFun = hl_alloc_obj;
+ break;
+ case HDYNOBJ:
+ allocFun = hl_alloc_dynobj;
+ nargs = 0;
+ break;
+ case HVIRTUAL:
+ allocFun = hl_alloc_virtual;
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ }
+ call_native_consts(ctx, allocFun, args, nargs);
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OInstanceClosure:
+ {
+ preg *r = alloc_cpu(ctx, rb, true);
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ int size = begin_native_call(ctx,3);
+ set_native_arg(ctx,r);
+
+ j->pos = BUF_POS();
+ j->target = o->p2;
+ j->next = ctx->calls;
+ ctx->calls = j;
+
+ set_native_arg(ctx,pconst64(&p,RESERVE_ADDRESS));
+ set_native_arg(ctx,pconst64(&p,(int_val)m->code->functions[m->functions_indexes[o->p2]].type));
+ call_native(ctx,hl_alloc_closure_ptr,size);
+ store(ctx,dst,PEAX,true);
+ }
+ break;
+ case OVirtualClosure:
+ {
+ int size, i;
+ preg *r = alloc_cpu_call(ctx, ra);
+ hl_type *t = NULL;
+ hl_type *ot = ra->t;
+ while( t == NULL ) {
+ for(i=0;iobj->nproto;i++) {
+ hl_obj_proto *pp = ot->obj->proto + i;
+ if( pp->pindex == o->p3 ) {
+ t = m->code->functions[m->functions_indexes[pp->findex]].type;
+ break;
+ }
+ }
+ ot = ot->obj->super;
+ }
+ size = begin_native_call(ctx,3);
+ set_native_arg(ctx,r);
+ // read r->type->vobj_proto[i] for function address
+ op64(ctx,MOV,r,pmem(&p,r->id,0));
+ op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*2));
+ op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*o->p3));
+ set_native_arg(ctx,r);
+ op64(ctx,MOV,r,pconst64(&p,(int_val)t));
+ set_native_arg(ctx,r);
+ call_native(ctx,hl_alloc_closure_ptr,size);
+ store(ctx,dst,PEAX,true);
+ }
+ break;
+ case OCallClosure:
+ if( ra->t->kind == HDYN ) {
+ // ASM for {
+ // vdynamic *args[] = {args};
+ // vdynamic *ret = hl_dyn_call(closure,args,nargs);
+ // dst = hl_dyncast(ret,t_dynamic,t_dst);
+ // }
+ int offset = o->p3 * HL_WSIZE;
+ preg *r = alloc_reg(ctx, RCPU_CALL);
+ if( offset & 15 ) offset += 16 - (offset & 15);
+ op64(ctx,SUB,PESP,pconst(&p,offset));
+ op64(ctx,MOV,r,PESP);
+ for(i=0;ip3;i++) {
+ vreg *a = R(o->extra[i]);
+ if( !hl_is_dynamic(a->t) ) ASSERT(0);
+ preg *v = alloc_cpu(ctx,a,true);
+ op64(ctx,MOV,pmem(&p,r->id,i * HL_WSIZE),v);
+ RUNLOCK(v);
+ }
+# ifdef HL_64
+ int size = begin_native_call(ctx, 3) + offset;
+ set_native_arg(ctx, pconst(&p,o->p3));
+ set_native_arg(ctx, r);
+ set_native_arg(ctx, fetch(ra));
+# else
+ int size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(int) + offset);
+ op64(ctx,PUSH,pconst(&p,o->p3),UNUSED);
+ op64(ctx,PUSH,r,UNUSED);
+ op64(ctx,PUSH,alloc_cpu(ctx,ra,true),UNUSED);
+# endif
+ call_native(ctx,hl_dyn_call,size);
+ if( dst->t->kind != HVOID ) {
+ store(ctx,dst,PEAX,true);
+ make_dyn_cast(ctx,dst,dst);
+ }
+ } else {
+ int jhasvalue, jend, size;
+ // ASM for if( c->hasValue ) c->fun(value,args) else c->fun(args)
+ preg *r = alloc_cpu(ctx,ra,true);
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op32(ctx,MOV,tmp,pmem(&p,r->id,HL_WSIZE*2));
+ op32(ctx,TEST,tmp,tmp);
+ scratch(tmp);
+ XJump_small(JNotZero,jhasvalue);
+ save_regs(ctx);
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
+ preg *rr = r;
+ if( rr->holds != ra ) rr = alloc_cpu(ctx, ra, true);
+ op_call(ctx, pmem(&p,rr->id,HL_WSIZE), size);
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasvalue);
+ restore_regs(ctx);
+# ifdef HL_64
+ {
+ int regids[64];
+ preg *pc = REG_AT(CALL_REGS[0]);
+ vreg *sc = R(f->nregs); // scratch register that we temporary rebind
+ if( o->p3 >= 63 ) jit_error("assert");
+ memcpy(regids + 1, o->extra, o->p3 * sizeof(int));
+ regids[0] = f->nregs;
+ sc->size = HL_WSIZE;
+ sc->t = &hlt_dyn;
+ op64(ctx, MOV, pc, pmem(&p,r->id,HL_WSIZE*3));
+ scratch(pc);
+ sc->current = pc;
+ pc->holds = sc;
+ size = prepare_call_args(ctx,o->p3 + 1,regids,ctx->vregs,0);
+ if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
+ }
+# else
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,HL_WSIZE);
+ if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
+ op64(ctx, PUSH,pmem(&p,r->id,HL_WSIZE*3),UNUSED); // push closure value
+# endif
+ op_call(ctx, pmem(&p,r->id,HL_WSIZE), size);
+ discard_regs(ctx,false);
+ patch_jump(ctx,jend);
+ store_result(ctx, dst);
+ }
+ break;
+ case OStaticClosure:
+ {
+ vclosure *c = alloc_static_closure(ctx,o->p2);
+ preg *r = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, r, pconst64(&p,(int_val)c));
+ store(ctx,dst,r,true);
+ }
+ break;
+ case OField:
+ {
+# ifndef HL_64
+ if( dst->t->kind == HI64 ) {
+ error_i64();
+ break;
+ }
+# endif
+ switch( ra->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(ra->t);
+ preg *rr = alloc_cpu(ctx,ra, true);
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t;
+ if( ft->kind == HPACKED ) {
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p3]));
+ store(ctx,dst,r,true);
+ break;
+ }
+ }
+ copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p3]));
+ }
+ break;
+ case HVIRTUAL:
+ // ASM for --> if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt)
+ {
+ int jhasfield, jend, size;
+ bool need_type = !(IS_FLOAT(dst) || dst->t->kind == HI64);
+ preg *v = alloc_cpu_call(ctx,ra);
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p3));
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jhasfield);
+ size = begin_native_call(ctx, need_type ? 3 : 2);
+ if( need_type ) set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
+ set_native_arg(ctx,pconst64(&p,(int_val)ra->t->virt->fields[o->p3].hashed_name));
+ set_native_arg(ctx,v);
+ call_native(ctx,get_dynget(dst->t),size);
+ store_result(ctx,dst);
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasfield);
+ copy_to(ctx, dst, pmem(&p,(CpuReg)r->id,0));
+ patch_jump(ctx,jend);
+ scratch(dst->current);
+ }
+ break;
+ default:
+ ASSERT(ra->t->kind);
+ break;
+ }
+ }
+ break;
+ case OSetField:
+ {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ preg *rr = alloc_cpu(ctx, dst, true);
+ if( rb->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
+ preg *prb = alloc_cpu(ctx, rb, true);
+ preg *tmp = alloc_reg(ctx, RCPU_CALL);
+ int offset = 0;
+ while( offset < frt->size ) {
+ int remain = frt->size - offset;
+ int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
+ copy(ctx, tmp, pmem(&p, (CpuReg)prb->id, offset), copy_size);
+ copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]+offset), tmp, copy_size);
+ offset += copy_size;
+ }
+ break;
+ }
+ }
+ copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]), rb);
+ }
+ break;
+ case HVIRTUAL:
+ // ASM for --> if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v)
+ {
+ int jhasfield, jend;
+ preg *obj = alloc_cpu_call(ctx,dst);
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,MOV,r,pmem(&p,obj->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jhasfield);
+# ifdef HL_64
+ switch( rb->t->kind ) {
+ case HF64:
+ case HF32:
+ size = begin_native_call(ctx,3);
+ set_native_arg_fpu(ctx, fetch(rb), rb->t->kind == HF32);
+ break;
+ case HI64:
+ case HGUID:
+ size = begin_native_call(ctx,3);
+ set_native_arg(ctx, fetch(rb));
+ break;
+ default:
+ size = begin_native_call(ctx, 4);
+ set_native_arg(ctx, fetch(rb));
+ set_native_arg(ctx, pconst64(&p,(int_val)rb->t));
+ break;
+ }
+ set_native_arg(ctx,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
+ set_native_arg(ctx,obj);
+# else
+ switch( rb->t->kind ) {
+ case HF64:
+ case HI64:
+ case HGUID:
+ size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(double));
+ push_reg(ctx,rb);
+ break;
+ case HF32:
+ size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(float));
+ push_reg(ctx,rb);
+ break;
+ default:
+ size = pad_before_call(ctx,HL_WSIZE*4);
+ op64(ctx,PUSH,fetch32(ctx,rb),UNUSED);
+ op64(ctx,MOV,r,pconst64(&p,(int_val)rb->t));
+ op64(ctx,PUSH,r,UNUSED);
+ break;
+ }
+ op32(ctx,MOV,r,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
+ op64(ctx,PUSH,r,UNUSED);
+ op64(ctx,PUSH,obj,UNUSED);
+# endif
+ call_native(ctx,get_dynset(rb->t),size);
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasfield);
+ copy_from(ctx, pmem(&p,(CpuReg)r->id,0), rb);
+ patch_jump(ctx,jend);
+ scratch(rb->current);
+ }
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ break;
+ }
+ }
+ break;
+ case OGetThis:
+ {
+ vreg *r = R(0);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ preg *rr = alloc_cpu(ctx,r, true);
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p2]));
+ store(ctx,dst,r,true);
+ break;
+ }
+ }
+ copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]));
+ }
+ break;
+ case OSetThis:
+ {
+ vreg *r = R(0);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ preg *rr = alloc_cpu(ctx, r, true);
+ if( ra->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t;
+ if( ft->kind == HPACKED ) {
+ hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
+ preg *pra = alloc_cpu(ctx, ra, true);
+ preg *tmp = alloc_reg(ctx, RCPU_CALL);
+ int offset = 0;
+ while( offset < frt->size ) {
+ int remain = frt->size - offset;
+ int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
+ copy(ctx, tmp, pmem(&p, (CpuReg)pra->id, offset), copy_size);
+ copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]+offset), tmp, copy_size);
+ offset += copy_size;
+ }
+ break;
+ }
+ }
+ copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]), ra);
+ }
+ break;
+ case OCallThis:
+ {
+ int nargs = o->p3 + 1;
+ int *args = (int*)hl_malloc(&ctx->falloc,sizeof(int) * nargs);
+ int size;
+ preg *r = alloc_cpu(ctx, R(0), true);
+ preg *tmp;
+ tmp = alloc_reg(ctx, RCPU_CALL);
+ op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
+ args[0] = 0;
+ for(i=1;iextra[i-1];
+ size = prepare_call_args(ctx,nargs,args,ctx->vregs,0);
+ op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
+ discard_regs(ctx, false);
+ store_result(ctx, dst);
+ }
+ break;
+ case OCallMethod:
+ switch( R(o->extra[0])->t->kind ) {
+ case HOBJ: {
+ int size;
+ preg *r = alloc_cpu(ctx, R(o->extra[0]), true);
+ preg *tmp;
+ tmp = alloc_reg(ctx, RCPU_CALL);
+ op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
+ op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
+ discard_regs(ctx, false);
+ store_result(ctx, dst);
+ break;
+ }
+ case HVIRTUAL:
+ // ASM for --> if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret)
+ {
+ int size;
+ int paramsSize;
+ int jhasfield, jend;
+ bool need_dyn;
+ bool obj_in_args = false;
+ vreg *obj = R(o->extra[0]);
+ preg *v = alloc_cpu_call(ctx,obj);
+ preg *r = alloc_reg(ctx,RCPU_CALL);
+ op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
+ op64(ctx,TEST,r,r);
+ save_regs(ctx);
+
+ if( o->p3 < 6 ) {
+ XJump_small(JNotZero,jhasfield);
+ } else {
+ XJump(JNotZero,jhasfield);
+ }
+
+ need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID;
+ paramsSize = (o->p3 - 1) * HL_WSIZE;
+ if( need_dyn ) paramsSize += sizeof(vdynamic);
+ if( paramsSize & 15 ) paramsSize += 16 - (paramsSize&15);
+ op64(ctx,SUB,PESP,pconst(&p,paramsSize));
+ op64(ctx,MOV,r,PESP);
+
+ for(i=0;ip3-1;i++) {
+ vreg *a = R(o->extra[i+1]);
+ if( hl_is_ptr(a->t) ) {
+ op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),alloc_cpu(ctx,a,true));
+ if( a->current != v ) {
+ RUNLOCK(a->current);
+ } else
+ obj_in_args = true;
+ } else {
+ preg *r2 = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,r2,&a->stack);
+ op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),r2);
+ if( r2 != v ) RUNLOCK(r2);
+ }
+ }
+
+ jit_buf(ctx);
+
+ if( !need_dyn ) {
+ size = begin_native_call(ctx, 5);
+ set_native_arg(ctx, pconst(&p,0));
+ } else {
+ preg *rtmp = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,rtmp,pmem(&p,Esp,paramsSize - sizeof(vdynamic)));
+ size = begin_native_call(ctx, 5);
+ set_native_arg(ctx,rtmp);
+ if( !IS_64 ) RUNLOCK(rtmp);
+ }
+ set_native_arg(ctx,r);
+ set_native_arg(ctx,pconst(&p,obj->t->virt->fields[o->p2].hashed_name)); // fid
+ set_native_arg(ctx,pconst64(&p,(int_val)obj->t->virt->fields[o->p2].t)); // ftype
+ set_native_arg(ctx,pmem(&p,v->id,HL_WSIZE)); // o->value
+ call_native(ctx,hl_dyn_call_obj,size + paramsSize);
+ if( need_dyn ) {
+ preg *r = IS_FLOAT(dst) ? REG_AT(XMM(0)) : PEAX;
+ copy(ctx,r,pmem(&p,Esp,HDYN_VALUE - (int)sizeof(vdynamic)),dst->size);
+ store(ctx, dst, r, false);
+ } else
+ store(ctx, dst, PEAX, false);
+
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasfield);
+ restore_regs(ctx);
+
+ if( !obj_in_args ) {
+ // o = o->value hack
+ if( v->holds ) v->holds->current = NULL;
+ obj->current = v;
+ v->holds = obj;
+ op64(ctx,MOV,v,pmem(&p,v->id,HL_WSIZE));
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
+ } else {
+ // keep o->value in R(f->nregs)
+ int regids[64];
+ preg *pc = alloc_reg(ctx,RCPU_CALL);
+ vreg *sc = R(f->nregs); // scratch register that we temporary rebind
+ if( o->p3 >= 63 ) jit_error("assert");
+ memcpy(regids, o->extra, o->p3 * sizeof(int));
+ regids[0] = f->nregs;
+ sc->size = HL_WSIZE;
+ sc->t = &hlt_dyn;
+ op64(ctx, MOV, pc, pmem(&p,v->id,HL_WSIZE));
+ scratch(pc);
+ sc->current = pc;
+ pc->holds = sc;
+ size = prepare_call_args(ctx,o->p3,regids,ctx->vregs,0);
+ }
+
+ op_call(ctx,r,size);
+ discard_regs(ctx, false);
+ store_result(ctx, dst);
+ patch_jump(ctx,jend);
+ }
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ break;
+ case ORethrow:
+ {
+ int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
+ call_native(ctx,hl_rethrow,size);
+ }
+ break;
+ case OThrow:
+ {
+ int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
+ call_native(ctx,hl_throw,size);
+ }
+ break;
+ case OLabel:
+ // NOP for now
+ discard_regs(ctx,false);
+ break;
+ case OGetI8:
+ case OGetI16:
+ {
+ preg *base = alloc_cpu(ctx, ra, true);
+ preg *offset = alloc_cpu64(ctx, rb, true);
+ preg *r = alloc_reg(ctx,o->op == OGetI8 ? RCPU_8BITS : RCPU);
+ op64(ctx,XOR,r,r);
+ op32(ctx, o->op == OGetI8 ? MOV8 : MOV16,r,pmem2(&p,base->id,offset->id,1,0));
+ store(ctx, dst, r, true);
+ }
+ break;
+ case OGetMem:
+ {
+ #ifndef HL_64
+ if (dst->t->kind == HI64) {
+ error_i64();
+ }
+ #endif
+ preg *base = alloc_cpu(ctx, ra, true);
+ preg *offset = alloc_cpu64(ctx, rb, true);
+ store(ctx, dst, pmem2(&p,base->id,offset->id,1,0), false);
+ }
+ break;
+ case OSetI8:
+ {
+ preg *base = alloc_cpu(ctx, dst, true);
+ preg *offset = alloc_cpu64(ctx, ra, true);
+ preg *value = alloc_cpu8(ctx, rb, true);
+ op32(ctx,MOV8,pmem2(&p,base->id,offset->id,1,0),value);
+ }
+ break;
+ case OSetI16:
+ {
+ preg *base = alloc_cpu(ctx, dst, true);
+ preg *offset = alloc_cpu64(ctx, ra, true);
+ preg *value = alloc_cpu(ctx, rb, true);
+ op32(ctx,MOV16,pmem2(&p,base->id,offset->id,1,0),value);
+ }
+ break;
+ case OSetMem:
+ {
+ preg *base = alloc_cpu(ctx, dst, true);
+ preg *offset = alloc_cpu64(ctx, ra, true);
+ preg *value;
+ switch( rb->t->kind ) {
+ case HI32:
+ value = alloc_cpu(ctx, rb, true);
+ op32(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ case HF32:
+ value = alloc_fpu(ctx, rb, true);
+ op32(ctx,MOVSS,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ case HF64:
+ value = alloc_fpu(ctx, rb, true);
+ op32(ctx,MOVSD,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ case HI64:
+ case HGUID:
+ value = alloc_cpu(ctx, rb, true);
+ op64(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ default:
+ ASSERT(rb->t->kind);
+ break;
+ }
+ }
+ break;
+ case OType:
+ {
+ op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)(m->code->types + o->p2)));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OGetType:
+ {
+ int jnext, jend;
+ preg *r = alloc_cpu(ctx, ra, true);
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jnext);
+ op64(ctx,MOV, tmp, pconst64(&p,(int_val)&hlt_void));
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jnext);
+ op64(ctx, MOV, tmp, pmem(&p,r->id,0));
+ patch_jump(ctx,jend);
+ store(ctx,dst,tmp,true);
+ }
+ break;
+ case OGetArray:
+ {
+ preg *rdst = IS_FLOAT(dst) ? alloc_fpu(ctx,dst,false) : alloc_cpu(ctx,dst,false);
+ if( ra->t->kind == HABSTRACT ) {
+ int osize;
+ bool isRead = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT;
+ if( isRead )
+ osize = sizeof(void*);
+ else {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ osize = rt->size;
+ }
+ preg *idx = alloc_cpu64(ctx, rb, true);
+ op64(ctx, IMUL, idx, pconst(&p,osize));
+ op64(ctx, isRead?MOV:LEA, rdst, pmem2(&p,alloc_cpu(ctx,ra, true)->id,idx->id,1,0));
+ store(ctx,dst,dst->current,false);
+ scratch(idx);
+ } else {
+ copy(ctx, rdst, pmem2(&p,alloc_cpu(ctx,ra,true)->id,alloc_cpu64(ctx,rb,true)->id,hl_type_size(dst->t),sizeof(varray)), dst->size);
+ store(ctx,dst,dst->current,false);
+ }
+ }
+ break;
+ case OSetArray:
+ {
+ if( dst->t->kind == HABSTRACT ) {
+ int osize;
+ bool isWrite = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT;
+ if( isWrite ) {
+ osize = sizeof(void*);
+ } else {
+ hl_runtime_obj *rt = hl_get_obj_rt(rb->t);
+ osize = rt->size;
+ }
+ preg *pdst = alloc_cpu(ctx,dst,true);
+ preg *pra = alloc_cpu64(ctx,ra,true);
+ op64(ctx, IMUL, pra, pconst(&p,osize));
+ op64(ctx, ADD, pdst, pra);
+ scratch(pra);
+ preg *prb = alloc_cpu(ctx,rb,true);
+ preg *tmp = alloc_reg(ctx, RCPU_CALL);
+ int offset = 0;
+ while( offset < osize ) {
+ int remain = osize - offset;
+ int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
+ copy(ctx, tmp, pmem(&p, prb->id, offset), copy_size);
+ copy(ctx, pmem(&p, pdst->id, offset), tmp, copy_size);
+ offset += copy_size;
+ }
+ scratch(pdst);
+ } else {
+ preg *rrb = IS_FLOAT(rb) ? alloc_fpu(ctx,rb,true) : alloc_cpu(ctx,rb,true);
+ copy(ctx, pmem2(&p,alloc_cpu(ctx,dst,true)->id,alloc_cpu64(ctx,ra,true)->id,hl_type_size(rb->t),sizeof(varray)), rrb, rb->size);
+ }
+ }
+ break;
+ case OArraySize:
+ {
+ op32(ctx,MOV,alloc_cpu(ctx,dst,false),pmem(&p,alloc_cpu(ctx,ra,true)->id,ra->t->kind == HABSTRACT ? HL_WSIZE + 4 : HL_WSIZE*2));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case ORef:
+ {
+ scratch(ra->current);
+ op64(ctx,MOV,alloc_cpu(ctx,dst,false),REG_AT(Ebp));
+ if( ra->stackPos < 0 )
+ op64(ctx,SUB,dst->current,pconst(&p,-ra->stackPos));
+ else
+ op64(ctx,ADD,dst->current,pconst(&p,ra->stackPos));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OUnref:
+ copy_to(ctx,dst,pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
+ break;
+ case OSetref:
+ copy_from(ctx,pmem(&p,alloc_cpu(ctx,dst,true)->id,0),ra);
+ break;
+ case ORefData:
+ switch( ra->t->kind ) {
+ case HARRAY:
+ {
+ preg *r = fetch(ra);
+ preg *d = alloc_cpu(ctx,dst,false);
+ op64(ctx,MOV,d,r);
+ op64(ctx,ADD,d,pconst(&p,sizeof(varray)));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ default:
+ ASSERT(ra->t->kind);
+ }
+ break;
+ case ORefOffset:
+ {
+ preg *d = alloc_cpu(ctx,rb,true);
+ preg *r2 = alloc_cpu(ctx,dst,false);
+ preg *r = fetch(ra);
+ int size = hl_type_size(dst->t->tparam);
+ op64(ctx,MOV,r2,r);
+ switch( size ) {
+ case 1:
+ break;
+ case 2:
+ op64(ctx,SHL,d,pconst(&p,1));
+ break;
+ case 4:
+ op64(ctx,SHL,d,pconst(&p,2));
+ break;
+ case 8:
+ op64(ctx,SHL,d,pconst(&p,3));
+ break;
+ default:
+ op64(ctx,IMUL,d,pconst(&p,size));
+ break;
+ }
+ op64(ctx,ADD,r2,d);
+ scratch(d);
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OToVirtual:
+ {
+# ifdef HL_64
+ int size = pad_before_call(ctx, 0);
+ op64(ctx,MOV,REG_AT(CALL_REGS[1]),fetch(ra));
+ op64(ctx,MOV,REG_AT(CALL_REGS[0]),pconst64(&p,(int_val)dst->t));
+# else
+ int size = pad_before_call(ctx, HL_WSIZE*2);
+ op32(ctx,PUSH,fetch(ra),UNUSED);
+ op32(ctx,PUSH,pconst(&p,(int)(int_val)dst->t),UNUSED);
+# endif
+ if( ra->t->kind == HOBJ ) hl_get_obj_rt(ra->t); // ensure it's initialized
+ call_native(ctx,hl_to_virtual,size);
+ store(ctx,dst,PEAX,true);
+ }
+ break;
+ case OMakeEnum:
+ {
+ hl_enum_construct *c = &dst->t->tenum->constructs[o->p2];
+ int_val args[] = { (int_val)dst->t, o->p2 };
+ int i;
+ call_native_consts(ctx, hl_alloc_enum, args, 2);
+ RLOCK(PEAX);
+ for(i=0;inparams;i++) {
+ preg *r = fetch(R(o->extra[i]));
+ copy(ctx, pmem(&p,Eax,c->offsets[i]),r, R(o->extra[i])->size);
+ RUNLOCK(fetch(R(o->extra[i])));
+ if ((i & 15) == 0) jit_buf(ctx);
+ }
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OEnumAlloc:
+ {
+ int_val args[] = { (int_val)dst->t, o->p2 };
+ call_native_consts(ctx, hl_alloc_enum, args, 2);
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OEnumField:
+ {
+ hl_enum_construct *c = &ra->t->tenum->constructs[o->p3];
+ preg *r = alloc_cpu(ctx,ra,true);
+ copy_to(ctx,dst,pmem(&p,r->id,c->offsets[(int)(int_val)o->extra]));
+ }
+ break;
+ case OSetEnumField:
+ {
+ hl_enum_construct *c = &dst->t->tenum->constructs[0];
+ preg *r = alloc_cpu(ctx,dst,true);
+ switch( rb->t->kind ) {
+ case HF64:
+ {
+ preg *d = alloc_fpu(ctx,rb,true);
+ copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),d,8);
+ break;
+ }
+ default:
+ copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),alloc_cpu(ctx,rb,true),hl_type_size(c->params[o->p2]));
+ break;
+ }
+ }
+ break;
+ case ONullCheck:
+ {
+ int jz;
+ preg *r = alloc_cpu(ctx,dst,true);
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jz);
+
+ hl_opcode *next = f->ops + opCount + 1;
+ bool null_field_access = false;
+ int hashed_name = 0;
+ // skip const and operation between nullcheck and access
+ while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) {
+ next++;
+ }
+ if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) {
+ int fid = next->op == OField ? next->p3 : next->p2;
+ hl_obj_field *f = NULL;
+ if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT )
+ f = hl_obj_field_fetch(dst->t, fid);
+ else if( dst->t->kind == HVIRTUAL )
+ f = dst->t->virt->fields + fid;
+ if( f == NULL ) ASSERT(dst->t->kind);
+ null_field_access = true;
+ hashed_name = f->hashed_name;
+ } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) {
+ int fid = next->p2 < 0 ? -1 : ctx->m->functions_indexes[next->p2];
+ hl_function *cf = ctx->m->code->functions + fid;
+ const uchar *name = fun_field_name(cf);
+ null_field_access = true;
+ hashed_name = hl_hash_gen(name, true);
+ }
+
+ if( null_field_access ) {
+ pad_before_call(ctx, HL_WSIZE);
+ if( hashed_name >= 0 && hashed_name < 256 )
+ op64(ctx,PUSH8,pconst(&p,hashed_name),UNUSED);
+ else
+ op32(ctx,PUSH,pconst(&p,hashed_name),UNUSED);
+ } else {
+ pad_before_call(ctx, 0);
+ }
+
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ j->pos = BUF_POS();
+ j->target = null_field_access ? -3 : -1;
+ j->next = ctx->calls;
+ ctx->calls = j;
+
+ op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
+ op_call(ctx,PEAX,-1);
+ patch_jump(ctx,jz);
+ }
+ break;
+ case OSafeCast:
+ make_dyn_cast(ctx, dst, ra);
+ break;
+ case ODynGet:
+ {
+ int size;
+# ifdef HL_64
+ if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
+ size = begin_native_call(ctx,2);
+ } else {
+ size = begin_native_call(ctx,3);
+ set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
+ }
+ set_native_arg(ctx,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
+ set_native_arg(ctx,fetch(ra));
+# else
+ preg *r;
+ r = alloc_reg(ctx,RCPU);
+ if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
+ size = pad_before_call(ctx,HL_WSIZE*2);
+ } else {
+ size = pad_before_call(ctx,HL_WSIZE*3);
+ op64(ctx,MOV,r,pconst64(&p,(int_val)dst->t));
+ op64(ctx,PUSH,r,UNUSED);
+ }
+ op64(ctx,MOV,r,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
+ op64(ctx,PUSH,r,UNUSED);
+ op64(ctx,PUSH,fetch(ra),UNUSED);
+# endif
+ call_native(ctx,get_dynget(dst->t),size);
+ store_result(ctx,dst);
+ }
+ break;
+ case ODynSet:
+ {
+ int size;
+# ifdef HL_64
+ switch( rb->t->kind ) {
+ case HF32:
+ case HF64:
+ size = begin_native_call(ctx, 3);
+ set_native_arg_fpu(ctx,fetch(rb),rb->t->kind == HF32);
+ set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
+ set_native_arg(ctx,fetch(dst));
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ case HI64:
+ case HGUID:
+ size = begin_native_call(ctx, 3);
+ set_native_arg(ctx,fetch(rb));
+ set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
+ set_native_arg(ctx,fetch(dst));
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ default:
+ size = begin_native_call(ctx,4);
+ set_native_arg(ctx,fetch(rb));
+ set_native_arg(ctx,pconst64(&p,(int_val)rb->t));
+ set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
+ set_native_arg(ctx,fetch(dst));
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ }
+# else
+ switch( rb->t->kind ) {
+ case HF32:
+ size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(float));
+ push_reg(ctx,rb);
+ op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
+ op32(ctx,PUSH,fetch(dst),UNUSED);
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ case HF64:
+ case HI64:
+ case HGUID:
+ size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(double));
+ push_reg(ctx,rb);
+ op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
+ op32(ctx,PUSH,fetch(dst),UNUSED);
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ default:
+ size = pad_before_call(ctx, HL_WSIZE*4);
+ op32(ctx,PUSH,fetch32(ctx,rb),UNUSED);
+ op32(ctx,PUSH,pconst64(&p,(int_val)rb->t),UNUSED);
+ op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
+ op32(ctx,PUSH,fetch(dst),UNUSED);
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ }
+# endif
+ }
+ break;
+ case OTrap:
+ {
+ int size, jenter, jtrap;
+ int offset = 0;
+ int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
+ hl_trap_ctx *t = NULL;
+# ifndef HL_THREADS
+ if( tinf == NULL ) tinf = hl_get_thread(); // single thread
+# endif
+
+# ifdef HL_64
+ preg *trap = REG_AT(CALL_REGS[0]);
+# else
+ preg *trap = PEAX;
+# endif
+ RLOCK(trap);
+
+ preg *treg = alloc_reg(ctx, RCPU);
+ if( !tinf ) {
+ call_native(ctx, hl_get_thread, 0);
+ op64(ctx,MOV,treg,PEAX);
+ offset = (int)(int_val)&tinf->trap_current;
+ } else {
+ offset = 0;
+ op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current));
+ }
+ op64(ctx,MOV,trap,pmem(&p,treg->id,offset));
+ op64(ctx,SUB,PESP,pconst(&p,trap_size));
+ op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap);
+ op64(ctx,MOV,trap,PESP);
+ op64(ctx,MOV,pmem(&p,treg->id,offset),trap);
+
+ /*
+ trap E,@catch
+ catch g
+ catch g2
+ ...
+ @:catch
+
+ // Before haxe 5
+ This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following
+ sequence of HL opcodes:
+
+ trap E,@catch
+ ...
+ @catch:
+ global R, _
+ call _, ???(R,E)
+
+ ??? is expected to be hl.BaseType.check
+ */
+ hl_opcode *cat = f->ops + opCount + 1;
+ hl_opcode *next = f->ops + opCount + 1 + o->p2;
+ hl_opcode *next2 = f->ops + opCount + 2 + o->p2;
+ if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->stack.id == (int)(int_val)next2->extra) ) {
+ int gindex = cat->op == OCatch ? cat->p1 : next->p2;
+ hl_type *gt = m->code->globals[gindex];
+ while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super;
+ if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) {
+ void *addr = m->globals_data + m->globals_indexes[gindex];
+# ifdef HL_64
+ op64(ctx,MOV,treg,pconst64(&p,(int_val)addr));
+ op64(ctx,MOV,treg,pmem(&p,treg->id,0));
+# else
+ op64(ctx,MOV,treg,paddr(&p,addr));
+# endif
+ } else
+ op64(ctx,MOV,treg,pconst(&p,0));
+ } else {
+ op64(ctx,MOV,treg,pconst(&p,0));
+ }
+ op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->tcheck),treg);
+
+ // On Win64 setjmp actually takes two arguments
+ // the jump buffer and the frame pointer (or the stack pointer if there is no FP)
+#if defined(HL_WIN) && defined(HL_64)
+ size = begin_native_call(ctx, 2);
+ set_native_arg(ctx, REG_AT(Ebp));
+#else
+ size = begin_native_call(ctx, 1);
+#endif
+ set_native_arg(ctx,trap);
+#ifdef HL_MINGW
+ call_native(ctx,_setjmp,size);
+#else
+ call_native(ctx,setjmp,size);
+#endif
+ op64(ctx,TEST,PEAX,PEAX);
+ XJump_small(JZero,jenter);
+ op64(ctx,ADD,PESP,pconst(&p,trap_size));
+ if( !tinf ) {
+ call_native(ctx, hl_get_thread, 0);
+ op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value));
+ } else {
+ op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->exc_value));
+ op64(ctx,MOV,PEAX,pmem(&p, Eax, 0));
+ }
+ store(ctx,dst,PEAX,false);
+
+ jtrap = do_jump(ctx,OJAlways,false);
+ register_jump(ctx,jtrap,(opCount + 1) + o->p2);
+ patch_jump(ctx,jenter);
+ }
+ break;
+ case OEndTrap:
+ {
+ int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
+ hl_trap_ctx *tmp = NULL;
+ preg *addr,*r;
+ int offset;
+ if (!tinf) {
+ call_native(ctx, hl_get_thread, 0);
+ addr = PEAX;
+ RLOCK(addr);
+ offset = (int)(int_val)&tinf->trap_current;
+ } else {
+ offset = 0;
+ addr = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current));
+ }
+ r = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, r, pmem(&p,addr->id,offset));
+ op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev));
+ op64(ctx, MOV, pmem(&p,addr->id, offset), r);
+# ifdef HL_WIN
+ // erase eip (prevent false positive)
+ {
+ _JUMP_BUFFER *b = NULL;
+# ifdef HL_64
+ op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&(b->Rip)),PEAX);
+# else
+ op64(ctx,MOV,pmem(&p,Esp,(int)&(b->Eip)),PEAX);
+# endif
+ }
+# endif
+ op64(ctx,ADD,PESP,pconst(&p,trap_size));
+ }
+ break;
+ case OEnumIndex:
+ {
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,MOV,r,pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE));
+ store(ctx,dst,r,true);
+ break;
+ }
+ break;
+ case OSwitch:
+ {
+ int jdefault;
+ int i;
+ preg *r = alloc_cpu(ctx, dst, true);
+ preg *r2 = alloc_reg(ctx, RCPU);
+ op32(ctx, CMP, r, pconst(&p,o->p2));
+ XJump(JUGte,jdefault);
+ // r2 = r * 5 + eip
+# ifdef HL_64
+ op64(ctx, XOR, r2, r2);
+# endif
+ op32(ctx, MOV, r2, r);
+ op32(ctx, SHL, r2, pconst(&p,2));
+ op32(ctx, ADD, r2, r);
+# ifdef HL_64
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, tmp, pconst64(&p,RESERVE_ADDRESS));
+# else
+ op64(ctx, ADD, r2, pconst64(&p,RESERVE_ADDRESS));
+# endif
+ {
+ jlist *s = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist));
+ s->pos = BUF_POS() - sizeof(void*);
+ s->next = ctx->switchs;
+ ctx->switchs = s;
+ }
+# ifdef HL_64
+ op64(ctx, ADD, r2, tmp);
+# endif
+ op64(ctx, JMP, r2, UNUSED);
+ for(i=0;ip2;i++) {
+ int j = do_jump(ctx,OJAlways,false);
+ register_jump(ctx,j,(opCount + 1) + o->extra[i]);
+ if( (i & 15) == 0 ) jit_buf(ctx);
+ }
+ patch_jump(ctx, jdefault);
+ }
+ break;
+ case OGetTID:
+ op32(ctx, MOV, alloc_cpu(ctx,dst,false), pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
+ store(ctx,dst,dst->current,false);
+ break;
+ case OAssert:
+ {
+ pad_before_call(ctx, 0);
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ j->pos = BUF_POS();
+ j->target = -2;
+ j->next = ctx->calls;
+ ctx->calls = j;
+
+ op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
+ op_call(ctx,PEAX,-1);
+ }
+ break;
+ case ONop:
+ break;
+ case OPrefetch:
+ {
+ preg *r = alloc_cpu(ctx, dst, true);
+ if( o->p2 > 0 ) {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ preg *r2 = alloc_reg(ctx, RCPU);
+ op64(ctx, LEA, r2, pmem(&p, r->id, rt->fields_indexes[o->p2-1]));
+ r = r2;
+ }
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ break;
+ }
+ }
+ switch( o->p3 ) {
+ case 0:
+ op64(ctx, PREFETCHT0, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 1:
+ op64(ctx, PREFETCHT1, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 2:
+ op64(ctx, PREFETCHT2, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 3:
+ op64(ctx, PREFETCHNTA, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 4:
+ op64(ctx, PREFETCHW, pmem(&p,r->id,0), UNUSED);
+ break;
+ default:
+ ASSERT(o->p3);
+ break;
+ }
+ }
+ break;
+ case OAsm:
+ {
+ switch( o->p1 ) {
+ case 0: // byte output
+ B(o->p2);
+ break;
+ case 1: // scratch cpu reg
+ scratch(REG_AT(o->p2));
+ break;
+ case 2: // read vm reg
+ rb--;
+ copy(ctx, REG_AT(o->p2), &rb->stack, rb->size);
+ scratch(REG_AT(o->p2));
+ break;
+ case 3: // write vm reg
+ rb--;
+ copy(ctx, &rb->stack, REG_AT(o->p2), rb->size);
+ scratch(rb->current);
+ break;
+ case 4:
+ if( ctx->totalRegsSize != 0 )
+ hl_fatal("Asm naked function should not have local variables");
+ if( opCount != 0 )
+ hl_fatal("Asm naked function should be on first opcode");
+ ctx->buf.b -= BUF_POS() - ctx->functionPos; // reset to our function start
+ break;
+ default:
+ ASSERT(o->p1);
+ break;
+ }
+ }
+ break;
+ case OCatch:
+ // Only used by OTrap typing
+ break;
+ default:
+ jit_error(hl_op_name(o->op));
+ break;
+ }
+ // we are landing at this position, assume we have lost our registers
+ if( ctx->opsPos[opCount+1] == -1 )
+ discard_regs(ctx,true);
+ ctx->opsPos[opCount+1] = BUF_POS();
+
+ // write debug infos
+ size = BUF_POS() - codePos;
+ if( debug16 && size > 0xFF00 ) {
+ debug32 = malloc(sizeof(int) * (f->nops + 1));
+ for(i=0;icurrentPos;i++)
+ debug32[i] = debug16[i];
+ free(debug16);
+ debug16 = NULL;
+ }
+ if( debug16 ) debug16[ctx->currentPos] = (unsigned short)size; else if( debug32 ) debug32[ctx->currentPos] = size;
+
+ }
+ // patch jumps
+ {
+ jlist *j = ctx->jumps;
+ while( j ) {
+ *(int*)(ctx->startBuf + j->pos) = ctx->opsPos[j->target] - (j->pos + 4);
+ j = j->next;
+ }
+ ctx->jumps = NULL;
+ }
+ int codeEndPos = BUF_POS();
+ // add nops padding
+ jit_nops(ctx);
+ // clear regs
+ for(i=0;iholds = NULL;
+ r->lock = 0;
+ }
+ // save debug infos
+ if( ctx->debug ) {
+ int fid = (int)(f - m->code->functions);
+ ctx->debug[fid].start = codePos;
+ ctx->debug[fid].offsets = debug32 ? (void*)debug32 : (void*)debug16;
+ ctx->debug[fid].large = debug32 != NULL;
+ }
+ // unwind info
+#ifdef WIN64_UNWIND_TABLES
+ int uw_idx = ctx->nunwind++;
+ ctx->unwind_table[uw_idx].BeginAddress = codePos;
+ ctx->unwind_table[uw_idx].EndAddress = codeEndPos;
+ ctx->unwind_table[uw_idx].UnwindData = ctx->unwind_offset;
+#endif
+ // reset tmp allocator
+ hl_free(&ctx->falloc);
+ return codePos;
+}
+
+static void *get_wrapper( hl_type *t ) {
+ return call_jit_hl2c;
+}
+
+void hl_jit_patch_method( void *old_fun, void **new_fun_table ) {
+ // mov eax, addr
+ // jmp [eax]
+ unsigned char *b = (unsigned char*)old_fun;
+ unsigned long long addr = (unsigned long long)(int_val)new_fun_table;
+# ifdef HL_64
+ *b++ = 0x48;
+ *b++ = 0xB8;
+ *b++ = (unsigned char)addr;
+ *b++ = (unsigned char)(addr>>8);
+ *b++ = (unsigned char)(addr>>16);
+ *b++ = (unsigned char)(addr>>24);
+ *b++ = (unsigned char)(addr>>32);
+ *b++ = (unsigned char)(addr>>40);
+ *b++ = (unsigned char)(addr>>48);
+ *b++ = (unsigned char)(addr>>56);
+# else
+ *b++ = 0xB8;
+ *b++ = (unsigned char)addr;
+ *b++ = (unsigned char)(addr>>8);
+ *b++ = (unsigned char)(addr>>16);
+ *b++ = (unsigned char)(addr>>24);
+# endif
+ *b++ = 0xFF;
+ *b++ = 0x20;
+}
+
+static void missing_closure() {
+ hl_error("Missing static closure");
+}
+
+void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) {
+ jlist *c;
+ int size = BUF_POS();
+ unsigned char *code;
+ if( size & 4095 ) size += 4096 - (size&4095);
+ code = (unsigned char*)hl_alloc_executable_memory(size);
+ if( code == NULL ) return NULL;
+ memcpy(code,ctx->startBuf,BUF_POS());
+ *codesize = size;
+ *debug = ctx->debug;
+ if( !call_jit_c2hl ) {
+ call_jit_c2hl = code + ctx->c2hl;
+ call_jit_hl2c = code + ctx->hl2c;
+ hl_setup.get_wrapper = get_wrapper;
+ hl_setup.static_call = callback_c2hl;
+ hl_setup.static_call_ref = true;
+ }
+#ifdef WIN64_UNWIND_TABLES
+ m->unwind_table = ctx->unwind_table;
+ RtlAddFunctionTable(m->unwind_table, ctx->nunwind, (DWORD64)code);
+#endif
+ if( !ctx->static_function_offset ) {
+ int i;
+ ctx->static_function_offset = true;
+ for(i=0;i<(int)(sizeof(ctx->static_functions)/sizeof(void*));i++)
+ ctx->static_functions[i] = (void*)(code + (int)(int_val)ctx->static_functions[i]);
+ }
+ // patch calls
+ c = ctx->calls;
+ while( c ) {
+ void *fabs;
+ if( c->target < 0 )
+ fabs = ctx->static_functions[-c->target-1];
+ else {
+ fabs = m->functions_ptrs[c->target];
+ if( fabs == NULL ) {
+ // read absolute address from previous module
+ int old_idx = m->hash->functions_hashes[m->functions_indexes[c->target]];
+ if( old_idx < 0 )
+ return NULL;
+ fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
+ } else {
+ // relative
+ fabs = (unsigned char*)code + (int)(int_val)fabs;
+ }
+ }
+ if( (code[c->pos]&~3) == (IS_64?0x48:0xB8) || code[c->pos] == 0x68 ) // MOV : absolute | PUSH
+ *(void**)(code + c->pos + (IS_64?2:1)) = fabs;
+ else {
+ int_val delta = (int_val)fabs - (int_val)code - (c->pos + 5);
+ int rpos = (int)delta;
+ if( (int_val)rpos != delta ) {
+ printf("Target code too far too rebase\n");
+ return NULL;
+ }
+ *(int*)(code + c->pos + 1) = rpos;
+ }
+ c = c->next;
+ }
+ // patch switchs
+ c = ctx->switchs;
+ while( c ) {
+ *(void**)(code + c->pos) = code + c->pos + (IS_64 ? 14 : 6);
+ c = c->next;
+ }
+ // patch closures
+ {
+ vclosure *c = ctx->closure_list;
+ while( c ) {
+ vclosure *next;
+ int fidx = (int)(int_val)c->fun;
+ void *fabs = m->functions_ptrs[fidx];
+ if( fabs == NULL ) {
+ // read absolute address from previous module
+ int old_idx = m->hash->functions_hashes[m->functions_indexes[fidx]];
+ if( old_idx < 0 )
+ fabs = missing_closure;
+ else
+ fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
+ } else {
+ // relative
+ fabs = (unsigned char*)code + (int)(int_val)fabs;
+ }
+ c->fun = fabs;
+ next = (vclosure*)c->value;
+ c->value = NULL;
+ c = next;
+ }
+ }
+ return code;
+}
+
diff --git a/src/jit_regs.c b/src/jit_regs.c
new file mode 100644
index 000000000..50f151f06
--- /dev/null
+++ b/src/jit_regs.c
@@ -0,0 +1,813 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include
+#include
+#include "data_struct.h"
+
+#define VAL(k) (ctx->values + (k))
+
+//#define REGS_DEBUG
+
+#ifdef REGS_DEBUG
+# define regs_debug jit_debug
+#else
+# define regs_debug(...)
+#endif
+
+#define INVALID 0x80000000
+
+#define VIDX(e) (((e) < 0) ? ctx->jit->value_count + (-(e)-1) : (e))
+#define VAL_REG(e) VAL(VIDX(e))
+#define REG_MODE(m) (IS_FLOAT(m) ? 1 :0)
+#define REG_CFG(m) (m ? &ctx->jit->cfg.floats : &ctx->jit->cfg.regs)
+
+#define EMIT(r,a,b,m) regs_emit(ctx,UNUSED,r,a,b,m,0)
+#define BREAK() EMIT(DEBUG_BREAK,UNUSED,UNUSED,0)
+
+typedef struct {
+ int id;
+ int stack_pos;
+ int last_read;
+ int tot_reads;
+ emit_mode mode;
+ ereg pref_reg;
+ ereg reg;
+} value_info;
+
+#define S_TYPE values
+#define S_NAME(name) values_##name
+#define S_VALUE value_info*
+#include "data_struct.c"
+#define values_add(set,v) values_add_impl(DEF_ALLOC,&(set),v)
+
+struct _regs_ctx {
+ jit_ctx *jit;
+ value_info *values;
+ values scratch;
+ int_arr jump_regs;
+ int_arr pack_movs;
+ int_arr *blocks_phis;
+ int max_instrs;
+ int cur_op;
+ int emit_pos;
+ int stack_size;
+ int stack_offset;
+ int loop_start;
+ int loop_end;
+ einstr *instrs;
+ ereg *out_write;
+ int *pos_map;
+ bool flushed;
+ bool has_direct_call;
+ int persists_uses[2];
+};
+
+typedef int call_regs[2];
+
+static ereg get_call_reg( regs_ctx *ctx, call_regs regs, emit_mode m ) {
+ ereg r;
+ int mode = REG_MODE(m);
+ reg_config *cfg = REG_CFG(mode);
+ int idx = IS_WINCALL64 ? 0 : mode;
+ if( regs[idx] < cfg->nargs )
+ r = cfg->arg[regs[idx]++];
+ else
+ r = UNUSED;
+ return r;
+}
+
+static int get_stack_size( regs_ctx *ctx, emit_mode m ) {
+ int size = hl_emit_mode_sizes[m];
+ if( size < HL_WSIZE ) size = HL_WSIZE;
+ int min = ctx->jit->cfg.stack_arg_size;
+ if( min && size < min ) size = min;
+ return size;
+}
+
+static void regs_write_instr( regs_ctx *ctx, einstr *e, ereg out ) {
+ if( ctx->emit_pos == ctx->max_instrs ) {
+ int pos = ctx->emit_pos;
+ int next_size = ctx->max_instrs ? (ctx->max_instrs << 1) : 256;
+ einstr *instrs = (einstr*)malloc(sizeof(einstr) * next_size);
+ ereg *out = (ereg*)malloc(sizeof(ereg) * next_size);
+ if( instrs == NULL || out == NULL ) jit_error("Out of memory");
+ memcpy(instrs, ctx->instrs, pos * sizeof(einstr));
+ memcpy(out, ctx->out_write, pos * sizeof(ereg));
+ memset(instrs + pos, 0, (next_size - pos) * sizeof(einstr));
+ free(ctx->instrs);
+ free(ctx->out_write);
+ ctx->instrs = instrs;
+ ctx->out_write = out;
+ ctx->max_instrs = next_size;
+ } else if( (ctx->emit_pos & 0xFF) == 0 )
+ memset(ctx->instrs + ctx->emit_pos, 0, 256 * sizeof(einstr));
+ ctx->out_write[ctx->emit_pos] = out;
+ ctx->instrs[ctx->emit_pos++] = *e;
+}
+
+static void regs_emit( regs_ctx *ctx, ereg out, emit_op op, ereg a, ereg b, emit_mode m, int size_offs ) {
+ einstr e;
+ e.header = op;
+ e.mode = m;
+ e.a = a;
+ e.b = b;
+ e.size_offs = size_offs;
+ regs_write_instr(ctx, &e, out);
+}
+
+static void regs_emit_mov( regs_ctx *ctx, ereg to, ereg from, emit_mode m ) {
+ if( to == from ) return;
+ regs_emit(ctx,to,MOV,from,UNUSED,m,0);
+}
+
+static int regs_alloc_stack( regs_ctx *ctx, int size ) {
+ ctx->stack_size += size;
+ ctx->stack_size += jit_pad_size(ctx->stack_size,size);
+ return -ctx->stack_size;
+}
+
+#define value_str(v) value_to_str(ctx,v)
+
+static const char *value_to_str( regs_ctx *ctx, value_info *v ) {
+ static char out[20];
+ sprintf(out,"%s:%s", val_str(v->id,v->mode), val_str(v->reg,v->mode));
+ return out;
+}
+
+static void spill( regs_ctx *ctx, value_info *v ) {
+ if( v->stack_pos == INVALID ) v->stack_pos = regs_alloc_stack(ctx, hl_emit_mode_sizes[v->mode]);
+ v->reg = MK_STACK_REG(v->stack_pos);
+ values_remove(&ctx->scratch,v);
+ regs_debug("REG SPILL %s @%X\n",value_str(v),ctx->cur_op);
+}
+
+static bool regs_alloc_reg( regs_ctx *ctx, value_info *v ) {
+ // lookup available reg
+ int mode = REG_MODE(v->mode);
+ reg_config *cfg = REG_CFG(mode);
+ if( !IS_NULL(v->pref_reg) ) {
+ bool free = true;
+ for_iter(values,v2,ctx->scratch) {
+ if( v2->reg == v->pref_reg ) {
+ free = false;
+ break;
+ }
+ }
+ if( free ) {
+ for(int i=0;ipersists_uses[mode];i++)
+ if( cfg->persist[i] == v->pref_reg ) {
+ free = false;
+ break;
+ }
+ }
+ if( free ) {
+ v->reg = v->pref_reg;
+ return true;
+ }
+ }
+ value_info *first = NULL;
+ for(int i=0;inscratchs;i++) {
+ ereg r = cfg->scratch[i];
+ for_iter(values,v2,ctx->scratch) {
+ if( v2->reg == r ) {
+ if( first == NULL ) first = v2;
+ r = UNUSED;
+ break;
+ }
+ }
+ if( !IS_NULL(r) ) {
+ v->reg = r;
+ return true;
+ }
+ }
+ if( ctx->persists_uses[mode] < cfg->npersists ) {
+ v->reg = cfg->persist[ctx->persists_uses[mode]++];
+ return false;
+ }
+ // free the oldest scratch reg
+ if( !first ) jit_assert();
+ v->reg = first->reg;
+ spill(ctx, first);
+ return true;
+}
+
+static void regs_assign( regs_ctx *ctx, value_info *v ) {
+ if( v->reg != UNUSED ) jit_assert();
+ if( regs_alloc_reg(ctx, v) )
+ values_add(ctx->scratch, v);
+ regs_debug("REG ASSIGN %s @%X-@%X\n",value_str(v),ctx->cur_op,v->last_read);
+}
+
+static void regs_write_live( regs_ctx *ctx, ereg *r ) {
+ if( IS_NULL(*r) ) jit_assert();
+ if( !REG_IS_VAL(*r) ) return; // some are injections of native regs at emit
+ value_info *v = VAL_REG(*r);
+ int write = v->id >= 0 ? ctx->jit->values_writes[v->id] : -1;
+ v->last_read = ctx->loop_end && write < ctx->loop_start ? ctx->loop_end : ctx->cur_op;
+ v->tot_reads++;
+}
+
+static value_info *regs_current( regs_ctx *ctx, ereg r ) {
+ for_iter(values,v,ctx->scratch) {
+ if( v->reg == r )
+ return v;
+ }
+ return NULL;
+}
+
+static void regs_compute_liveness( regs_ctx *ctx ) {
+# define MAX_LOOP_DEPTH 256
+ int loop_saves[MAX_LOOP_DEPTH];
+ int loop_count = 0;
+ int write_index = 1;
+ jit_ctx *jit = ctx->jit;
+ hl_type *tret = ctx->jit->fun->type->fun->ret;
+ emit_mode mret = tret->kind == HF32 || tret->kind == HF64 ? M_F64 : M_PTR;
+ ereg ret = REG_CFG(REG_MODE(mret))->ret;
+ for(int cur_op=0;cur_opinstr_count;cur_op++) {
+ einstr *e = jit->instrs + cur_op;
+ value_info *write = NULL;
+
+ while( ctx->loop_end == cur_op && cur_op ) {
+ ctx->loop_end = loop_saves[--loop_count];
+ ctx->loop_start = loop_saves[--loop_count];
+ }
+
+ if( write_index < jit->value_count && jit->values_writes[write_index] == cur_op )
+ write = VAL(write_index++);
+
+ ctx->cur_op = cur_op;
+ hl_emit_reg_iter(jit,e,ctx,(void*)regs_write_live);
+ if( IS_CALL(e->op) ) {
+ // anticipate register usage in call so we can previlege this assign
+ ereg *r = hl_emit_get_args(jit->emit, e);
+ call_regs regs = {0};
+ bool needs_push = false;
+ for(int k=0;knargs;k++) {
+ ereg arg = r[k];
+ value_info *v = REG_IS_VAL(arg) ? VAL_REG(r[k]) : NULL;
+ ereg r = get_call_reg(ctx, regs, v ? v->mode : M_I32);
+ if( IS_NULL(r) ) {
+ needs_push = true;
+ continue;
+ }
+ if( v && IS_NULL(v->pref_reg) )
+ v->pref_reg = r;
+ }
+ if( !needs_push && e->mode != M_NORET ) ctx->has_direct_call = true;
+ if( write && IS_NULL(write->pref_reg) )
+ write->pref_reg = REG_CFG(REG_MODE(e->mode))->ret;
+ } else switch( e->op ) {
+ case RET:
+ if( e->a ) {
+ value_info *v = VAL_REG(e->a);
+ if( v->pref_reg == UNUSED ) v->pref_reg = ret;
+ }
+ break;
+ case BINOP:
+ switch( e->size_offs ) {
+ case OSShr:
+ case OUShr:
+ case OShl:
+ if( jit->cfg.req_bit_shifts ) VAL_REG(e->b)->pref_reg = jit->cfg.req_bit_shifts;
+ break;
+ case OSDiv:
+ case OUDiv:
+ case OSMod:
+ case OUMod:
+ if( !IS_FLOAT(e->mode) ) {
+ if( jit->cfg.req_div_a ) VAL_REG(e->a)->pref_reg = jit->cfg.req_div_a;
+ if( jit->cfg.req_div_b ) VAL_REG(e->b)->pref_reg = jit->cfg.req_div_b;
+ }
+ break;
+ }
+ break;
+ case BLOCK:
+ {
+ // are we in loop ?
+ eblock *bl = jit->blocks + e->size_offs;
+ int loop_end = -1;
+ for(int k=0;kpred_count;k++) {
+ eblock *b2 = jit->blocks + bl->preds[k];
+ if( b2->start_pos > bl->start_pos && b2->end_pos >= loop_end )
+ loop_end = b2->end_pos - 1;
+ }
+ if( loop_end > 0 ) {
+ loop_saves[loop_count++] = ctx->loop_start;
+ loop_saves[loop_count++] = ctx->loop_end;
+ ctx->loop_start = cur_op;
+ ctx->loop_end = loop_end;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ if( loop_count != 0 ) jit_assert();
+ // compute reverse phis
+ for(int b=0;bblock_count;b++) {
+ eblock *bl = jit->blocks + b;
+ for(int p=0;pphi_count;p++) {
+ ephi *ph = bl->phis + p;
+ VAL_REG(ph->value)->mode = ph->mode;
+ for(int k=0;knvalues;k++) {
+ ereg v = ph->values[k];
+ eblock *b2 = jit->blocks + ph->blocks[k];
+ value_info *val = VAL_REG(v);
+ int_arr *arr = &ctx->blocks_phis[b2 - jit->blocks];
+ regs_debug("ADD PHI %s:=%s to #%d@%X\n",val_str(ph->value,ph->mode),val_str(v,ph->mode),(int)(b2 - jit->blocks),b2->end_pos-1);
+ int_arr_add(*arr,v);
+ int_arr_add(*arr,ph->value);
+ int_arr_add(*arr,(bl - b2) == 1);
+ val->tot_reads++;
+ if( val->last_read < b2->end_pos )
+ val->last_read = b2->end_pos;
+ }
+ }
+ }
+}
+
+static void regs_assign_regs( regs_ctx *ctx ) {
+ jit_ctx *jit = ctx->jit;
+ // assign args
+ call_regs regs = {0};
+ int args_count = 0;
+ for(int i=1;i<=ctx->jit->fun->type->fun->nargs;i++) {
+ value_info *v = VAL(i);
+ einstr *e = ctx->jit->instrs + ctx->jit->values_writes[i];
+ int size = hl_emit_mode_sizes[e->mode];
+ if( size <= 0 && e->mode != M_VOID ) jit_assert();
+ ereg r = get_call_reg(ctx,regs,e->mode);
+ if( !IS_NULL(r) ) {
+ v->reg = r;
+ values_add(ctx->scratch,v);
+ }
+ if( IS_NULL(r) || IS_WINCALL64 ) {
+ // use existing stack storage
+ v->stack_pos = (args_count++ + 2) * HL_WSIZE;
+ if( IS_NULL(r) ) v->reg = MK_STACK_REG(v->stack_pos);
+ }
+ }
+ // assign registers
+ int write_index = 1;
+ for(int cur_op=0;cur_opinstr_count;cur_op++) {
+ einstr e = jit->instrs[cur_op];
+ value_info *write = NULL;
+# ifdef HL_DEBUG
+ int eid = (jit->fun->findex << 16) | cur_op;
+ __ignore(&eid);
+# endif
+ ctx->cur_op = cur_op;
+
+
+ if( write_index < jit->value_count && jit->values_writes[write_index] == cur_op ) {
+ write = VAL(write_index++);
+ // try to preserve ops in the from A = A op B
+ if( (e.op == UNOP || e.op == BINOP) && write->pref_reg == UNUSED ) {
+ value_info *v = VAL_REG(e.a);
+ if( IS_REG(v->reg) ) write->pref_reg = v->reg;
+ }
+ }
+
+ for_iter_back(values,v,ctx->scratch) {
+ if( v->last_read <= cur_op )
+ values_remove(&ctx->scratch,v);
+ }
+
+ if( IS_CALL(e.op) ) {
+ ereg *args = hl_emit_get_args(ctx->jit->emit,&e);
+ call_regs regs = {0};
+ bool will_scratch = e.mode != M_NORET;
+ value_info *vcall = e.op == CALL_REG ? VAL_REG(e.a) : NULL;
+ if( will_scratch ) {
+ for_iter_back(values,v2,ctx->scratch) {
+ if( v2->last_read > cur_op )
+ spill(ctx,v2);
+ }
+ }
+ for(int k=0;kmode);
+ if( !IS_NULL(r) ) {
+ value_info *cur = regs_current(ctx,r);
+ if( cur && cur != v )
+ spill(ctx,cur);
+ if( vcall && vcall->reg == r )
+ spill(ctx,vcall);
+ }
+ }
+ if( will_scratch ) values_reset(&ctx->scratch);
+ }
+ switch( e.op ) {
+ case BLOCK:
+ for_iter_back(values,v,ctx->scratch) {
+ if( v->last_read == cur_op )
+ values_remove(&ctx->scratch,v);
+ }
+ eblock *bl = jit->blocks + e.size_offs;
+ for(int k=0;kphi_count;k++) {
+ ephi *p = bl->phis + k;
+ value_info *v = VAL_REG(p->value);
+ for(int n=0;nnvalues;n++) {
+ value_info *vn = VAL_REG(p->values[n]);
+ // ignore previously set pref_reg (minimize moves)
+ if( IS_REG(vn->reg) && !regs_current(ctx,vn->reg) ) {
+ v->pref_reg = vn->reg;
+ break;
+ }
+ }
+ regs_assign(ctx, v);
+ }
+ break;
+ case CATCH:
+ {
+ for_iter_back(values,v2,ctx->scratch)
+ spill(ctx,v2);
+ }
+ break;
+ case ALLOC_STACK:
+ write->reg = MK_STACK_OFFS(regs_alloc_stack(ctx, e.size_offs));
+ continue;
+ case LOAD_ARG:
+ if( write->reg == UNUSED )
+ regs_assign(ctx, write); // assign for stack reg
+ continue;
+ case ADDRESS:
+ {
+ if( REG_KIND(e.a) == R_CONST ) jit_assert();
+ value_info *v = VAL_REG(e.a);
+ spill(ctx, v);
+ break;
+ }
+ default:
+ break;
+ }
+ if( write ) regs_assign(ctx, write);
+ }
+ // assign stack regs
+ int nvalues = jit->value_count + jit->phi_count;
+ ctx->stack_offset = (ctx->persists_uses[0] + ctx->persists_uses[1]) * 8;
+ for(int i=0;ivalues + i;
+ if( v->reg == UNUSED ) v->reg = MK_STACK_REG(v->stack_pos);
+ }
+}
+
+static void flush_movs( regs_ctx *ctx, bool cond ) {
+ int_arr movs = ctx->pack_movs;
+ while( true ) {
+ int size = int_arr_count(movs);
+ if( !size ) break;
+ bool cycle = true;
+ for(int k=0;kpack_movs = movs;
+ int_arr_reset(&ctx->pack_movs);
+}
+
+static void flush_phis( regs_ctx *ctx, eblock *b, bool cond, bool after ) {
+ if( !b ) return;
+ jit_ctx *jit = ctx->jit;
+ int bid = (int)(b - jit->blocks);
+ int_arr arr = ctx->blocks_phis[bid];
+ int idx = 0;
+ int_arr movs = ctx->pack_movs;
+
+ while( idx < int_arr_count(arr) ) {
+ ereg a = int_arr_get(arr,idx++);
+ ereg b = int_arr_get(arr,idx++);
+ int bcount = int_arr_get(arr,idx++);
+ if( after != (bcount == 1) )
+ continue;
+ value_info *from = VAL_REG(a);
+ value_info *to = VAL_REG(b);
+ if( from->reg == to->reg ) continue;
+ int size = int_arr_count(movs);
+ bool dup = false;
+ for(int k=0;kreg && int_arr_get(movs,k+1) == from->reg ) {
+ dup = true;
+ break;
+ }
+ }
+ if( !dup ) {
+ int_arr_add(movs, to->reg);
+ int_arr_add(movs, from->reg);
+ int_arr_add(movs, from->mode);
+ }
+ }
+ ctx->pack_movs = movs;
+ if( !cond )
+ int_arr_free(&ctx->blocks_phis[bid]);
+ flush_movs(ctx, cond);
+}
+
+static void regs_emit_instrs( regs_ctx *ctx ) {
+ jit_ctx *jit = ctx->jit;
+ eblock *cur_block = NULL;
+ call_regs regs = {0};
+ int write_index = 1;
+ ctx->pos_map[0] = 0;
+
+ int stack_offset = ctx->stack_size;
+ int push_size = HL_WSIZE * 2 + ctx->stack_offset; // RIP + RBP save
+ if( jit->cfg.stack_align ) {
+ int align = (stack_offset + push_size) % jit->cfg.stack_align;
+ if( align ) stack_offset += jit->cfg.stack_align - align;
+ }
+
+ for(int cur_op=0;cur_opinstr_count;cur_op++) {
+ einstr e = jit->instrs[cur_op];
+ ereg *ret_val = NULL;
+ int nread;
+ int instr_stack_offset = 0;
+ ctx->cur_op = cur_op;
+
+ value_info *vout = NULL;
+ ereg out = UNUSED;
+ if( write_index < jit->value_count && jit->values_writes[write_index] == cur_op ) {
+ vout = VAL(write_index++);
+ out = vout->reg;
+ }
+
+ if( IS_CALL(e.op) ) {
+ ereg *args = hl_emit_get_args(ctx->jit->emit,&e);
+ call_regs regs = {0};
+ int stack_args = 0;
+ int stack_bits = 0;
+ for(int k=0;kmode : M_I32;
+ ereg r = get_call_reg(ctx,regs,mode);
+ if( IS_NULL(r) ) {
+ stack_args += get_stack_size(ctx, mode);
+ stack_bits |= 1 << k;
+ } else if( !v || r != v->reg ) {
+ int_arr_add(ctx->pack_movs,r);
+ int_arr_add(ctx->pack_movs,v ? v->reg : args[k]);
+ int_arr_add(ctx->pack_movs,mode);
+ }
+ }
+ if( stack_args > 0 ) {
+ int offset = 0;
+ if( jit->cfg.stack_align ) {
+ int align = stack_args % jit->cfg.stack_align;
+ if( align ) offset = jit->cfg.stack_align - align;
+ }
+ if( offset )
+ regs_emit(ctx,UNUSED,STACK_OFFS,UNUSED,UNUSED,0,-offset);
+ for(int k=e.nargs-1;k>=0;k--) {
+ if( stack_bits & (1 << k) ) {
+ value_info *v = REG_IS_VAL(args[k]) ? VAL_REG(args[k]) : NULL;
+ EMIT(PUSH,VAL_REG(args[k])->reg,UNUSED,v && IS_FLOAT(v->mode) ? v->mode : M_PTR);
+ }
+ }
+ if( IS_WINCALL64 ) {
+ regs_emit(ctx,UNUSED,STACK_OFFS,UNUSED,UNUSED,0,-0x20);
+ offset += 0x20;
+ }
+ instr_stack_offset = stack_args+offset;
+ }
+ flush_movs(ctx,0);
+ e.nargs = 0xFF;
+ if( vout && vout->last_read > cur_op )
+ ret_val = ®_CFG(REG_MODE(e.mode))->ret;
+ else if( e.mode != M_NORET ) {
+ e.mode = M_VOID; // ignore output
+ out = UNUSED;
+ }
+ if( e.op == CALL_REG )
+ e.a = VAL_REG(e.a)->reg;
+ } else {
+ ereg **regs = hl_emit_get_regs(&e,&nread);
+ for(int k=0;kreg;
+ }
+ }
+ switch( e.op ) {
+ case ALLOC_STACK:
+ case CATCH:
+ break;
+ case BLOCK:
+ cur_block = jit->blocks + e.size_offs;
+ break;
+ case LOAD_ARG:
+ {
+ ereg def = get_call_reg(ctx,regs,e.mode);
+ if( def && out != def )
+ regs_emit_mov(ctx,out,def,e.mode);
+ else
+ regs_write_instr(ctx, &e, out);
+ }
+ break;
+ case ENTER:
+ {
+ EMIT(PUSH,jit->cfg.stack_pos,UNUSED,M_PTR);
+ regs_emit_mov(ctx,jit->cfg.stack_pos,jit->cfg.stack_reg,M_PTR);
+ if( stack_offset )
+ regs_emit(ctx,UNUSED,STACK_OFFS,UNUSED,UNUSED,M_PTR,-stack_offset);
+ for(int i=0;ipersists_uses[0];i++)
+ EMIT(PUSH,ctx->jit->cfg.regs.persist[i],UNUSED,M_PTR);
+ for(int i=0;ipersists_uses[1];i++)
+ EMIT(PUSH,ctx->jit->cfg.floats.persist[i],UNUSED,M_F64);
+ if( IS_WINCALL64 && ctx->has_direct_call )
+ regs_emit(ctx,UNUSED,STACK_OFFS,UNUSED,UNUSED,M_PTR,-0x20);
+ }
+ break;
+ case JCOND:
+ case JUMP:
+ case JUMP_TABLE:
+ flush_phis(ctx,cur_block, e.op == JCOND, false);
+ if( e.op == JUMP_TABLE ) {
+ // copy args (remap later)
+ hl_emit_store_args(jit->emit,&e,hl_emit_get_args(jit->emit,&e),e.nargs);
+ }
+ regs_write_instr(ctx, &e, out);
+ int_arr_add(ctx->jump_regs, ctx->emit_pos - 1);
+ int_arr_add(ctx->jump_regs, cur_op + 1 + (e.op == JUMP_TABLE ? 0 : e.size_offs));
+ if( e.op == JCOND ) flush_phis(ctx,cur_block, false, true);
+ break;
+ case RET:
+ if( e.a ) {
+ ereg ret = REG_CFG(REG_MODE(e.mode))->ret;
+ if( e.a != ret )
+ regs_emit_mov(ctx, ret, e.a, e.mode);
+ }
+# ifdef WIN64_UNWIND_TABLES
+ // if we have our stack offset just after a call, the unwind algorithm
+ // will subtract and create invalid stack frame. this is because we do
+ // not register the stack offset in our unwind table so all functions
+ // can share the same definition
+ if( cur_op && IS_CALL(jit->instrs[cur_op-1].op) )
+ EMIT(NOP,UNUSED,UNUSED,M_NONE);
+# endif
+ if( IS_WINCALL64 && ctx->has_direct_call )
+ regs_emit(ctx,UNUSED,STACK_OFFS,UNUSED,UNUSED,M_PTR,0x20);
+ for(int i=ctx->persists_uses[1]-1;i>=0;i--)
+ EMIT(POP,ctx->jit->cfg.floats.persist[i],UNUSED,M_F64);
+ for(int i=ctx->persists_uses[0]-1;i>=0;i--)
+ EMIT(POP,ctx->jit->cfg.regs.persist[i],UNUSED,M_PTR);
+ if( stack_offset ) {
+ regs_emit(ctx,UNUSED,STACK_OFFS,UNUSED,UNUSED,M_PTR,stack_offset);
+ }
+ EMIT(POP,jit->cfg.stack_pos,UNUSED,M_PTR);
+ EMIT(RET,UNUSED,UNUSED,M_NONE);
+ break;
+ case MOV:
+ if( out == e.a ) break;
+ // fallthrough
+ default:
+ if( e.op == ADDRESS ) {
+ e.op = LEA;
+ if( REG_KIND(e.a) != R_REG_PTR ) jit_assert();
+ e.a = (e.a & ~R_REG_PTR) | R_REG;
+ }
+ if( ret_val && out ) {
+ regs_write_instr(ctx, &e, *ret_val);
+ regs_emit_mov(ctx, out, *ret_val, e.mode);
+ } else
+ regs_write_instr(ctx, &e, out);
+ break;
+ }
+ if( instr_stack_offset )
+ regs_emit(ctx,UNUSED,STACK_OFFS,UNUSED,UNUSED,M_PTR,instr_stack_offset);
+ if( cur_block && cur_block->end_pos == cur_op+1 )
+ flush_phis(ctx,cur_block,false,true);
+ ctx->pos_map[cur_op+1] = ctx->emit_pos;
+ }
+}
+
+void hl_regs_flush( jit_ctx *jit ) {
+ regs_ctx *ctx = jit->regs;
+ if( ctx->flushed ) return;
+ ctx->flushed = true;
+ jit->reg_instr_count = ctx->emit_pos;
+ jit->reg_instrs = ctx->instrs;
+ jit->reg_writes = ctx->out_write;
+ jit->reg_pos_map = ctx->pos_map;
+ if( ctx->pos_map ) ctx->pos_map[ctx->cur_op+1] = ctx->emit_pos;
+ hl_emit_remap_jumps(jit->emit, &ctx->jump_regs, ctx->instrs, ctx->pos_map);
+}
+
+void hl_regs_function( jit_ctx *jit ) {
+ regs_ctx *ctx = jit->regs;
+ int nvalues = jit->value_count + jit->phi_count;
+ memset(ctx->persists_uses,0,sizeof(ctx->persists_uses));
+ free(ctx->pos_map);
+ ctx->flushed = false;
+ ctx->has_direct_call = false;
+ ctx->pos_map = (int*)malloc((jit->instr_count + 1) * sizeof(int));
+ ctx->emit_pos = 0;
+ ctx->cur_op = 0;
+ ctx->stack_size = 0;
+ jit->reg_instrs = NULL;
+ values_free(&ctx->scratch);
+ int_arr_free(&ctx->jump_regs);
+ int_arr_free(&ctx->pack_movs);
+ ctx->blocks_phis = (int_arr*)hl_zalloc(&jit->falloc,sizeof(int_arr) * jit->block_count);
+ ctx->values = (value_info*)hl_zalloc(&jit->falloc,sizeof(value_info) * nvalues);
+ for(int i=1;ireg = UNUSED;
+ v->pref_reg = UNUSED;
+ v->stack_pos = INVALID;
+ v->last_read = -1;
+ if( i < jit->value_count ) {
+ v->id = i;
+ v->mode = jit->instrs[jit->values_writes[i]].mode;
+ } else {
+ v->id = -(i-jit->value_count) - 1;
+ v->mode = M_NONE;
+ }
+ }
+ regs_compute_liveness(ctx);
+ regs_assign_regs(ctx);
+ regs_emit_instrs(ctx);
+ hl_regs_flush(ctx->jit);
+}
+
+
+void hl_regs_alloc( jit_ctx *jit ) {
+ regs_ctx *ctx = malloc(sizeof(regs_ctx));
+ memset(ctx,0,sizeof(regs_ctx));
+ ctx->jit = jit;
+ jit->regs = ctx;
+}
+
+void hl_regs_free( jit_ctx *jit ) {
+ regs_ctx *ctx = jit->regs;
+ free(ctx->pos_map);
+ free(ctx->instrs);
+ free(ctx->out_write);
+ free(ctx);
+}
+
diff --git a/src/jit_x86_64.c b/src/jit_x86_64.c
new file mode 100644
index 000000000..a2b6185c3
--- /dev/null
+++ b/src/jit_x86_64.c
@@ -0,0 +1,1722 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include
+#include
+#include "data_struct.h"
+
+#ifdef HL_DEBUG
+# define GEN_DEBUG
+#endif
+
+#define S_TYPE byte_arr
+#define S_NAME(name) byte_##name
+#define S_VALUE unsigned char
+#include "data_struct.c"
+#define byte_reserve(set,count) byte_reserve_impl(DEF_ALLOC,&set,count)
+#define VAL_CONST 0x80000000
+#define VAL_MEM(reg) (FL_MEMPTR | (reg))
+
+#define S_TYPE value_arr
+#define S_NAME(name) value_arr_##name
+#define S_VALUE uint64
+#include "data_struct.c"
+
+#define S_SORTED
+#define S_MAP
+#define S_TYPE value_map
+#define S_NAME(name) value_map_##name
+#define S_KEY uint64
+#define S_VALUE int
+#define S_DEFVAL -1
+#include "data_struct.c"
+
+typedef enum {
+ RAX = 0,
+ RCX = 1,
+ RDX = 2,
+ RBX = 3,
+ RSP = 4,
+ RBP = 5,
+ RSI = 6,
+ RDI = 7,
+#ifdef HL_64
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+#endif
+ _UNUSED = 0xFF
+} CpuReg;
+
+#define R(id) MK_REG(id,R_REG)
+#define MMX(id) MK_REG((id)+64,R_REG)
+
+typedef enum {
+ _MOV,
+ _LEA,
+ _PUSH,
+ ADD,
+ SUB,
+ IMUL, // only overflow flag changes compared to MUL
+ DIV,
+ IDIV,
+ NEG,
+ CDQ,
+ CDQE,
+ _POP,
+ _RET,
+ _CALL,
+ AND,
+ OR,
+ XOR,
+ _CMP,
+ _TEST,
+ SHL,
+ SHR,
+ SAR,
+ INC,
+ DEC,
+ JMP,
+ MOVSXD,
+ // FPU
+ FSTP,
+ FSTP32,
+ FLD,
+ FLD32,
+ FLDCW,
+ // SSE
+ MOVSD,
+ MOVSS,
+ COMISD,
+ COMISS,
+ ADDSD,
+ SUBSD,
+ MULSD,
+ DIVSD,
+ ADDSS,
+ SUBSS,
+ MULSS,
+ DIVSS,
+ XORPS,
+ XORPD,
+ CVTSI2SD,
+ CVTSI2SS,
+ CVTTSD2SI,
+ CVTSD2SS,
+ CVTSS2SD,
+ CVTTSS2SI,
+ STMXCSR,
+ LDMXCSR,
+ STC,
+ CLC,
+ // 8-16 bits
+ ADD8,
+ SUB8,
+ MOV8,
+ MOVZX8,
+ MOVSX8,
+ CMP8,
+ TEST8,
+ PUSH8,
+ ADD16,
+ SUB16,
+ IMUL16,
+ MOV16,
+ MOVZX16,
+ MOVSX16,
+ CMP16,
+ TEST16,
+ // prefetchs
+ PREFETCHT0,
+ PREFETCHT1,
+ PREFETCHT2,
+ PREFETCHNTA,
+ PREFETCHW,
+ // --
+ _CPU_LAST
+} CpuOp;
+
+#define JAlways 0xE9
+#define JAlways_short 0xEB
+#define JOverflow 0x80
+#define JULt 0x82
+#define JUGte 0x83
+#define JEq 0x84
+#define JNeq 0x85
+#define JULte 0x86
+#define JUGt 0x87
+#define JParity 0x8A
+#define JNParity 0x8B
+#define JSLt 0x8C
+#define JSGte 0x8D
+#define JSLte 0x8E
+#define JSGt 0x8F
+
+#define JCarry JLt
+#define JZero JEq
+#define JNotZero JNeq
+
+#define FLAG_LONGOP 0x80000000
+#define FLAG_16B 0x40000000
+#define FLAG_8B 0x20000000
+#define FLAG_DUAL 0x10000000
+#define FLAG_DEF64 0x08000000
+
+#define RM(op,id) ((op) | (((id)+1)<<8))
+#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15)
+#define SBYTE(op) ((op) << 16)
+#define LONG_OP(op) ((op) | FLAG_LONGOP)
+#define OP16(op) ((op) | FLAG_16B)
+#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24))
+
+typedef struct {
+ const char *name; // single operand
+ int r_mem; // r32 / r/m32 r32
+ int mem_r; // r/m32 / r32 r/m32
+ int r_const; // r32 / imm32 imm32
+ int r_i8; // r32 / imm8 imm8
+} opform;
+
+static opform OP_FORMS[] = {
+ { "MOV", 0x8B, 0x89, 0xB8, 0 },
+ { "LEA", 0x8D },
+ { "PUSH", 0x50 | FLAG_DEF64, RM(0xFF,6), 0x68, 0x6A },
+ { "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) },
+ { "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) },
+ { "IMUL", LONG_OP(0x0FAF), 0, 0x69 | FLAG_DUAL, 0x6B | FLAG_DUAL },
+ { "DIV", RM(0xF7,6), RM(0xF7,6) },
+ { "IDIV", RM(0xF7,7), RM(0xF7,7) },
+ { "NEG", RM(0xF7,3) },
+ { "CDQ", 0x99 },
+ { "CDQE", 0x98 },
+ { "POP", 0x58 | FLAG_DEF64, RM(0x8F,0) },
+ { "RET", 0xC3 },
+ { "CALL", RM(0xFF,2) | FLAG_DEF64, RM(0xFF,2), 0xE8 },
+ { "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) },
+ { "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) },
+ { "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) },
+ { "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) },
+ { "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) },
+ { "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) },
+ { "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) },
+ { "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) },
+ { "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) },
+ { "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) },
+ { "JMP", RM(0xFF,4) },
+ { "MOVSXD", 0x63 },
+ // FPU
+ { "FSTP", 0, RM(0xDD,3) },
+ { "FSTP32", 0, RM(0xD9,3) },
+ { "FLD", 0, RM(0xDD,0) },
+ { "FLD32", 0, RM(0xD9,0) },
+ { "FLDCW", 0, RM(0xD9, 5) },
+ // SSE
+ { "MOVSD", 0xF20F10, 0xF20F11 },
+ { "MOVSS", 0xF30F10, 0xF30F11 },
+ { "COMISD", LONG_RM(0x660F2F,1) },
+ { "COMISS", LONG_RM(0x0F2F,1) },
+ { "ADDSD", 0xF20F58 },
+ { "SUBSD", 0xF20F5C },
+ { "MULSD", 0xF20F59 },
+ { "DIVSD", 0xF20F5E },
+ { "ADDSS", 0xF30F58 },
+ { "SUBSS", 0xF30F5C },
+ { "MULSS", 0xF30F59 },
+ { "DIVSS", 0xF30F5E },
+ { "XORPS", LONG_OP(0x0F57) },
+ { "XORPD", 0x660F57 },
+ { "CVTSI2SD", 0xF20F2A },
+ { "CVTSI2SS", 0xF30F2A },
+ { "CVTTSD2SI", 0xF20F2C },
+ { "CVTSD2SS", 0xF20F5A },
+ { "CVTSS2SD", 0xF30F5A },
+ { "CVTTSS2SI", 0xF30F2C },
+ { "STMXCSR", 0, LONG_RM(0x0FAE,3) },
+ { "LDMXCSR", 0, LONG_RM(0x0FAE,2) },
+ { "STC", 0xF9 },
+ { "CLC", 0xF8 },
+ // 8 bits,
+ { "ADD8", 0, RM(0x00,3) },
+ { "SUB8", 0, 0x28 },
+ { "MOV8", 0x8A, 0x88, 0, RM(0xC6,0) },
+ { "MOVZX8", LONG_OP(0x0FB6) },
+ { "MOVSX8", LONG_OP(0x0FBE) },
+ { "CMP8", 0x3A, 0x38, 0, RM(0x80,7) },
+ { "TEST8", 0x84, 0x84, RM(0xF6,0) },
+ { "PUSH8", FLAG_DEF64, 0, 0x6A | FLAG_8B },
+ { "ADD16", 0, OP16(0x01) },
+ { "SUB16", 0, OP16(0x29) },
+ { "IMUL16", OP16(LONG_OP(0x0FAF)) },
+ { "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) },
+ { "MOVZX16", LONG_OP(0x0FB7) },
+ { "MOVSX16", LONG_OP(0x0FBF) },
+ { "CMP16", OP16(0x3B), OP16(0x39) },
+ { "TEST16", OP16(0x85) },
+ // prefetchs
+ { "PREFETCHT0", FLAG_DEF64, LONG_RM(0x0F18,1) },
+ { "PREFETCHT1", FLAG_DEF64, LONG_RM(0x0F18,2) },
+ { "PREFETCHT2", FLAG_DEF64, LONG_RM(0x0F18,3) },
+ { "PREFETCHNTA", FLAG_DEF64, LONG_RM(0x0F18,0) },
+ { "PREFETCHW", FLAG_DEF64, LONG_RM(0x0F0D,1) },
+};
+
+#ifdef HL_64
+# define REX() if( r64 ) B(r64 | 0x40)
+#else
+# define REX()
+#endif
+
+static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
+
+#define B(v) ctx->code.values[ctx->code.cur++] = (unsigned char)(v)
+#define W(wv) *(int*)&ctx->code.values[_incr(&ctx->code.cur,4)] = wv
+#define W64(v64) *(int_val*)&ctx->code.values[_incr(&ctx->code.cur,8)] = v64
+
+#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7))
+#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7))
+#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 )
+
+#define BREAK() B(0xCC)
+
+#define OP(b) \
+ if( (b) & 0xFF0000 ) { \
+ B((b)>>16); \
+ if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \
+ B((b)>>8); \
+ B(b); \
+ } else { \
+ if( (b) & FLAG_16B ) { \
+ B(0x66); \
+ REX(); \
+ } else {\
+ REX(); \
+ }\
+ if( (b) & FLAG_LONGOP ) B((b)>>8); \
+ B(b); \
+ }
+
+struct _code_ctx {
+ jit_ctx *jit;
+ byte_arr code;
+ int_arr funs;
+ int_arr short_jumps;
+ int_arr near_jumps;
+ value_map const_table_lookup;
+ byte_arr const_table;
+ int_arr const_refs;
+ int_arr const_addr;
+ int *pos_map;
+ int cur_op;
+ bool flushed;
+ int const_table_pos;
+ int null_access_pos;
+ int null_field_pos;
+};
+
+static int _incr( int*v, int n ) {
+ int k = *v;
+ *v += n;
+ return k;
+}
+
+const char *hl_natreg_str( int reg, emit_mode m ) {
+ static char out[16];
+ static const char *regs_str[] = { "AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" };
+ static const char *regs_str8[] = { "AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL" };
+ CpuReg r = REG_REG(reg);
+ switch( m ) {
+ case M_I32:
+ if( r < 8 )
+ sprintf(out,"E%s",regs_str[r]);
+ else
+ sprintf(out,"R%dD%s",r,r<16?"":"???");
+ break;
+ case M_UI16:
+ if( r < 8 )
+ sprintf(out,"%s",regs_str[r]);
+ else
+ sprintf(out,"R%dW%s",r,r<16?"":"???");
+ break;
+ case M_UI8:
+ if( r < 8 )
+ sprintf(out,"%s",regs_str8[r]);
+ else
+ sprintf(out,"R%dB%s",r,r<16?"":"???");
+ break;
+ case M_F32:
+ r -= 64;
+ sprintf(out,"XMM%df%s",r,r >= 0 && r < 16 ? "" : "???");
+ break;
+ case M_F64:
+ r -= 64;
+ sprintf(out,"XMM%d%s",r,r >= 0 && r < 16 ? "" : "???");
+ break;
+ default:
+ if( r < 8 )
+ sprintf(out,"R%s",regs_str[r]);
+ else
+ sprintf(out,"R%d%s",r,r<16?"":"???");
+ break;
+ }
+ return out;
+}
+
+static int scratch_float_reg = -1;
+
+static ereg scratch_not_param[] = { R(RAX), R(R10), R(R11) };
+
+void hl_jit_init_regs( regs_config *cfg ) {
+ // exclude R11 at it's use as temporary for various ops
+# ifdef HL_WIN_CALL
+ static int scratch_regs[] = { R(RAX), R(RCX), R(RDX), R(R8), R(R9), R(R10), /*R(R11)*/ };
+ static int free_regs[] = { R(RSI), R(RDI), R(RBX), R(R12), R(R13), R(R14), R(R15) };
+ static int call_regs[] = { R(RCX), R(RDX), R(R8), R(R9) };
+# else
+ static int scratch_regs[] = { R(RAX), R(RCX), R(RDX), R(RSI), R(RDI), R(R8), R(R9), R(R10), /*R(R11)*/ };
+ static int free_regs[] = { R(RBX), R(R12), R(R13), R(R14), R(R15) };
+ static int call_regs[] = { R(RDI), R(RSI), R(RDX), R(RCX), R(R8), R(R9) };
+# endif
+ cfg->regs.ret = scratch_regs[0];
+ cfg->regs.nscratchs = sizeof(scratch_regs) / sizeof(int);
+ cfg->regs.npersists = sizeof(free_regs) / sizeof(int);
+ cfg->regs.nargs = sizeof(call_regs) / sizeof(int);
+ cfg->regs.scratch = (ereg*)scratch_regs;
+ cfg->regs.persist = (ereg*)free_regs;
+ cfg->regs.arg = (ereg*)call_regs;
+ // floats
+ static int floats[] = {
+ MMX(0), MMX(1), MMX(2), MMX(3),
+ MMX(4), MMX(5), MMX(6), MMX(7),
+ MMX(8), MMX(9), MMX(10), MMX(11),
+ MMX(12), MMX(13), MMX(14), MMX(15)
+ };
+# ifdef HL_WIN_CALL
+ cfg->floats.nargs = 4;
+ cfg->floats.nscratchs = 6;
+# else
+ cfg->floats.nargs = 8;
+ cfg->floats.nscratchs = 16;
+# endif
+ scratch_float_reg = cfg->floats.nscratchs - 1;
+ cfg->floats.nscratchs--;
+ cfg->floats.ret = floats[0];
+ cfg->floats.scratch = (ereg*)floats;
+ cfg->floats.arg = (ereg*)floats;
+ cfg->floats.persist = (ereg*)floats + cfg->floats.nscratchs + 1;
+ cfg->floats.npersists = 15 - cfg->floats.nscratchs;
+ // extra
+ cfg->req_bit_shifts = R(RCX);
+ cfg->req_div_a = R(RAX);
+ cfg->req_div_b = R(RCX);
+ cfg->stack_reg = R(RSP);
+ cfg->stack_pos = R(RBP);
+ cfg->stack_align = 16;
+# ifdef GEN_DEBUG
+ cfg->debug_prefix_size = 6;
+# endif
+}
+
+#define EMIT(op,a,b,mode) emit_ext(ctx,op,a,b,mode,0)
+#define ID2(a,b) ((a) | ((b)<<8))
+
+typedef enum {
+ RCPU = 0,
+ RFPU = 1,
+ RSTACK = 2,
+ RCONST = 3,
+ RMEM = 4,
+ RUNUSED = 5,
+} preg_kind;
+
+typedef struct {
+ preg_kind kind;
+ CpuReg reg;
+ int64 value;
+} preg;
+
+#define ERRIF(v) if( v ) jit_assert()
+
+static preg make_reg( ereg r, uint64 value ) {
+ preg p;
+ if( IS_NULL(r) ) {
+ p.kind = RUNUSED;
+ return p;
+ }
+ if( r == VAL_CONST ) {
+ p.kind = RCONST;
+ p.value = value;
+ return p;
+ }
+ p.reg = REG_REG(r);
+ p.value = REG_VALUE(r);
+ switch( REG_KIND(r) ) {
+ case R_REG:
+ if( p.reg >= 64 ) {
+ p.kind = RFPU;
+ p.reg -= 64;
+ } else
+ p.kind = RCPU;
+ break;
+ case R_REG_PTR:
+ if( p.reg == RBP )
+ p.kind = RSTACK;
+ else
+ p.kind = RMEM;
+ break;
+ case R_CONST:
+ p.kind = RCONST;
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ if( p.reg < 0 || p.reg > 15 ) jit_assert();
+ return p;
+}
+
+static void emit_ext( code_ctx *ctx, CpuOp op, ereg _a, ereg _b, emit_mode mode, int_val _value ) {
+ opform *f = &OP_FORMS[op];
+ int mode64 = mode == M_PTR && (f->r_mem&FLAG_DEF64) == 0 ? 8 : 0;
+ int r64 = mode64;
+ preg a = make_reg(_a,_value), b = make_reg(_b,_value);
+ switch( ID2(a.kind,b.kind) ) {
+ case ID2(RUNUSED,RUNUSED):
+ ERRIF(f->r_mem == 0);
+ OP(f->r_mem);
+ break;
+ case ID2(RCPU,RCPU):
+ case ID2(RFPU,RFPU):
+ if( f->mem_r ) {
+ // canonical form
+ if( a.reg & 8 ) r64 |= 1;
+ if( b.reg & 8 ) r64 |= 4;
+ OP(f->mem_r);
+ MOD_RM(3,b.reg,a.reg);
+ } else {
+ ERRIF( f->r_mem == 0 );
+ if( a.reg & 8 ) r64 |= 4;
+ if( b.reg & 8 ) r64 |= 1;
+ OP(f->r_mem);
+ MOD_RM(3,a.reg,b.reg);
+ }
+ break;
+ case ID2(RCPU,RFPU):
+ case ID2(RFPU,RCPU):
+ ERRIF( (f->r_mem>>16) == 0 );
+ if( a.reg & 8 ) r64 |= 4;
+ if( b.reg & 8 ) r64 |= 1;
+ OP(f->r_mem);
+ MOD_RM(3,a.reg,b.reg);
+ break;
+ case ID2(RCPU,RUNUSED):
+ ERRIF( f->r_mem == 0 );
+ if( a.reg & 8 ) r64 |= 1;
+ if( GET_RM(f->r_mem) > 0 ) {
+ OP(f->r_mem);
+ MOD_RM(3, GET_RM(f->r_mem)-1, a.reg);
+ } else
+ OP(f->r_mem + (a.reg&7));
+ break;
+ case ID2(RSTACK,RUNUSED):
+ ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 );
+ OP(f->mem_r);
+ if( IS_SBYTE(a.value) ) {
+ MOD_RM(1,GET_RM(f->mem_r)-1,RBP);
+ B(a.value);
+ } else {
+ MOD_RM(2,GET_RM(f->mem_r)-1,RBP);
+ W((int)a.value);
+ }
+ break;
+ case ID2(RCPU,RCONST):
+ ERRIF( f->r_const == 0 && f->r_i8 == 0 );
+ if( a.reg & 8 ) r64 |= 1;
+ if( f->r_i8 && IS_SBYTE(b.value) ) {
+ if( (f->r_i8&FLAG_DUAL) && (a.reg & 8) ) r64 |= 4;
+ OP(f->r_i8);
+ if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a.reg,a.reg); else MOD_RM(3,GET_RM(f->r_i8)-1,a.reg);
+ B(b.value);
+ } else if( GET_RM(f->r_const) > 0 || (f->r_const&FLAG_DUAL) ) {
+ if( (f->r_i8&FLAG_DUAL) && (a.reg & 8) ) r64 |= 4;
+ OP(f->r_const&0xFF);
+ if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a.reg,a.reg); else MOD_RM(3,GET_RM(f->r_const)-1,a.reg);
+ if( mode64 && IS_64 && op == _MOV ) W64(b.value); else W((int)b.value);
+ } else {
+ ERRIF( f->r_const == 0);
+ OP((f->r_const&0xFF) + (a.reg&7));
+ if( mode64 && IS_64 && op == _MOV ) W64(b.value); else W((int)b.value);
+ }
+ break;
+ case ID2(RSTACK,RCPU):
+ case ID2(RSTACK,RFPU):
+ ERRIF( f->mem_r == 0 );
+ if( b.reg & 8 ) r64 |= 4;
+ OP(f->mem_r);
+ if( IS_SBYTE(a.value) ) {
+ MOD_RM(1,b.reg,RBP);
+ B(a.value);
+ } else {
+ MOD_RM(2,b.reg,RBP);
+ W((int)a.value);
+ }
+ break;
+ case ID2(RCPU,RSTACK):
+ case ID2(RFPU,RSTACK):
+ ERRIF( f->r_mem == 0 );
+ if( a.reg & 8 ) r64 |= 4;
+ OP(f->r_mem);
+ if( IS_SBYTE(b.value) ) {
+ MOD_RM(1,a.reg,RBP);
+ B(b.value);
+ } else {
+ MOD_RM(2,a.reg,RBP);
+ W((int)b.value);
+ }
+ break;
+ case ID2(RCONST,RUNUSED):
+ ERRIF( f->r_const == 0 );
+ OP(f->r_const);
+ if( f->r_const & FLAG_8B ) B(a.value); else W((int)a.value);
+ break;
+ case ID2(RMEM,RUNUSED):
+ ERRIF( f->mem_r == 0 );
+ if( a.reg & 8 ) r64 |= 1;
+ OP(f->mem_r);
+ if( a.value == 0 && (a.reg&7) != RBP ) {
+ MOD_RM(0,GET_RM(f->mem_r)-1,a.reg);
+ if( (a.reg&7) == RSP ) B(0x24);
+ } else if( IS_SBYTE(a.value) ) {
+ MOD_RM(1,GET_RM(f->mem_r)-1,a.reg);
+ if( (a.reg&7) == RSP ) B(0x24);
+ B(a.value);
+ } else {
+ MOD_RM(2,GET_RM(f->mem_r)-1,a.reg);
+ if( (a.reg&7) == RSP ) B(0x24);
+ W((int)a.value);
+ }
+ break;
+ case ID2(RCPU, RMEM):
+ case ID2(RFPU, RMEM):
+ ERRIF( f->r_mem == 0 );
+ if( a.reg & 8 ) r64 |= 4;
+ if( b.reg & 8 ) r64 |= 1;
+ OP(f->r_mem);
+ if( b.value == 0 && (b.reg&7) != RBP ) {
+ MOD_RM(0,a.reg,b.reg);
+ if( (b.reg&7) == RSP ) B(0x24);
+ } else if( IS_SBYTE(b.value) ) {
+ MOD_RM(1,a.reg,b.reg);
+ if( (b.reg&7) == RSP ) B(0x24);
+ B(b.value);
+ } else {
+ MOD_RM(2,a.reg,b.reg);
+ if( (b.reg&7) == RSP ) B(0x24);
+ W((int)b.value);
+ }
+ break;
+ case ID2(RMEM, RCPU):
+ case ID2(RMEM, RFPU):
+ ERRIF( f->mem_r == 0 );
+ if( a.reg & 8 ) r64 |= 1;
+ if( b.reg & 8 ) r64 |= 4;
+ OP(f->mem_r);
+ if( a.value == 0 && (a.reg&7) != RBP ) {
+ MOD_RM(0,b.reg,a.reg);
+ if( (a.reg&7) == RSP ) B(0x24);
+ } else if( IS_SBYTE(a.value) ) {
+ MOD_RM(1,b.reg,a.reg);
+ if( (a.reg&7) == RSP ) B(0x24);
+ B(a.value);
+ } else {
+ MOD_RM(2,b.reg,a.reg);
+ if( (a.reg&7) == RSP ) B(0x24);
+ W((int)a.value);
+ }
+ break;
+ default:
+ ERRIF(1);
+ }
+}
+
+static void emit_jump( code_ctx *ctx, int mode, int offset ) {
+ int op_mult = 16;
+# ifdef GEN_DEBUG
+ op_mult += 6; // additional debug info per op
+# endif
+ if( IS_SBYTE(offset*op_mult) ) {
+ // assume it's ok to use short jump
+ B(mode == JAlways ? JAlways_short : mode - 0x10);
+ int_arr_add(ctx->short_jumps, byte_count(ctx->code));
+ int_arr_add(ctx->short_jumps, ctx->cur_op + offset + 1);
+ B(-2);
+ } else {
+ if( mode != JAlways ) B(0x0F);
+ B(mode);
+ int_arr_add(ctx->near_jumps, byte_count(ctx->code));
+ int_arr_add(ctx->near_jumps, ctx->cur_op + offset + 1);
+ W(-5);
+ }
+}
+
+#define RTMP R(R11)
+static ereg get_tmp( emit_mode mode ) {
+ if( IS_FLOAT(mode) )
+ return MMX(scratch_float_reg);
+ return RTMP;
+}
+
+static void emit_mov( code_ctx *ctx, ereg out, ereg val, emit_mode mode ) {
+ if( out == val )
+ return;
+ if( !IS_REG(out) && (!IS_REG(val) || REG_VALUE(val) != 0) ) {
+ ereg tmp = get_tmp(mode);
+ emit_mov(ctx, tmp, val, mode);
+ emit_mov(ctx, out, tmp, mode);
+ } else if( IS_REG(val) && REG_VALUE(val) != 0 ) {
+ emit_ext(ctx,_LEA,out,REG_PTR(val),M_PTR,0);
+ } else {
+ static CpuOp MOV_OP[] = {_MOV,MOV8,MOV16,_MOV,_MOV,MOVSD,MOVSS,_MOV,_MOV};
+ CpuOp op = MOV_OP[mode];
+ if( (mode == M_UI8 || mode == M_UI16) && IS_REG(out) ) {
+ op++; // MOVZX
+ mode = M_PTR;
+ }
+ emit_ext(ctx,op,out,val,mode,0);
+ }
+}
+
+static int jump_near( code_ctx *ctx, int mode ) {
+ int pos = byte_count(ctx->code);
+ if( mode < 0 ) {
+ // backwards
+ int target = -mode;
+ B(JAlways_short);
+ B(target - (pos + 2));
+ } else {
+ B(mode == JAlways ? JAlways_short : mode - 0x10);
+ B(0);
+ }
+ return pos;
+}
+
+static void patch_jump_near( code_ctx *ctx, int jpos ) {
+ if( !jpos ) return;
+ ctx->code.values[jpos + 1] = (unsigned char)(byte_count(ctx->code) - (jpos + 2));
+}
+
+static void emit_div_mod( code_ctx *ctx, hl_op op, ereg out, ereg a, ereg b, emit_mode mode ) {
+ if( IS_FLOAT(mode) ) {
+ BREAK();
+ return;
+ }
+ ereg bas = R(RAX), div = R(RDX);
+ if( out != bas ) EMIT(_PUSH,bas,UNUSED,M_PTR);
+ if( out != div ) EMIT(_PUSH,div,UNUSED,M_PTR);
+ if( b == bas || b == div || !IS_REG(b) ) {
+ EMIT(_MOV,RTMP,b,mode);
+ b = RTMP;
+ }
+ if( a != bas ) EMIT(_MOV,bas,a,mode);
+
+ // check for div = 0
+ EMIT(_TEST,b,b,mode);
+ int jz = jump_near(ctx,JZero);
+ int jz1 = 0;
+ // Prevent MIN/-1 overflow exception
+ // OSMod: r = (b == 0 || b == -1) ? 0 : a % b
+ // OSDiv: r = (b == 0 || b == -1) ? a * b : a / b
+ if( op == OSMod || op == OSDiv ) {
+ EMIT(_CMP,b,MK_CONST(-1),mode);
+ jz1 = jump_near(ctx,JZero);
+ }
+ bool unsign = op == OUDiv || op == OUMod;
+ if( unsign )
+ EMIT(XOR,div,div,mode);
+ else
+ EMIT(CDQ, UNUSED, UNUSED, mode);
+ EMIT(unsign ? DIV : IDIV, b, UNUSED, mode);
+ ereg res = (op == OUDiv || op == OSDiv) ? bas : div;
+ int jn = jump_near(ctx,JAlways);
+ patch_jump_near(ctx,jz);
+ patch_jump_near(ctx,jz1);
+ if( op != OSDiv ) {
+ EMIT(XOR, res, res, mode);
+ } else {
+ if( res != bas ) EMIT(_MOV,res,bas,mode);
+ EMIT(IMUL,res,b,mode);
+ }
+ patch_jump_near(ctx,jn);
+ if( out != res ) EMIT(_MOV,out,res,mode);
+ if( out != div ) EMIT(_POP,div,UNUSED,M_PTR);
+ if( out != bas ) EMIT(_POP,bas,UNUSED,M_PTR);
+}
+
+static void emit_anyop( code_ctx *ctx, hl_op op, ereg out, ereg a, ereg b, emit_mode mode ) {
+ CpuOp cop;
+ int mask = 0;
+# define F_OP(iop,f32,f64) cop = mode == M_F32 ? f32 : (mode == M_F64 ? f64 : iop);
+# define DECL_OP(i8,i16,iop,f32,f64) static CpuOp ops_##iop[] = {-1,i8,i16,iop,iop,f64,f32,-1,-1}; cop = ops_##iop[mode]
+ switch( op ) {
+ case OAdd:
+ DECL_OP(ADD8,ADD16,ADD,ADDSS,ADDSD);
+ break;
+ case OSub:
+ DECL_OP(SUB8,SUB16,SUB,SUBSS,SUBSD);
+ break;
+ case OMul:
+ DECL_OP(IMUL16/*NO IMUL8*/,IMUL16,IMUL,MULSS,MULSD);
+ if( mode == M_UI8 ) mask = 0xFF;
+ break;
+ case OIncr:
+ cop = INC;
+ break;
+ case ODecr:
+ cop = DEC;
+ break;
+ case OAnd:
+ cop = AND;
+ break;
+ case OOr:
+ cop = OR;
+ break;
+ case OXor:
+ cop = XOR;
+ break;
+ case OShl:
+ case OSShr:
+ case OUShr:
+ {
+ ereg f = R(RCX);
+ if( b != f ) {
+ if( a == f || out == f ) {
+ EMIT(_MOV,RTMP,a,mode);
+ a = RTMP;
+ }
+ if( out == f ) {
+ EMIT(_MOV,f,b,mode);
+ emit_anyop(ctx, op, RTMP, RTMP, f, mode);
+ EMIT(_MOV,f,RTMP,mode);
+ } else {
+ EMIT(_PUSH,f,UNUSED,M_PTR);
+ EMIT(_MOV,f,b,mode);
+ emit_anyop(ctx, op, out, a, f, mode);
+ EMIT(_POP,f,UNUSED,M_PTR);
+ }
+ return;
+ }
+ }
+ if( out == b ) {
+ ereg r = get_tmp(mode);
+ emit_anyop(ctx,op,r,a,b,mode);
+ emit_mov(ctx,out,r,mode);
+ return;
+ }
+ b = UNUSED;
+ cop = (op == OShl ? SHL : (op == OSShr ? SAR : SHR));
+ break;
+ case OSDiv:
+ F_OP(0,DIVSS,DIVSD);
+ if( IS_FLOAT(mode) ) break;
+ case OSMod:
+ case OUMod:
+ case OUDiv:
+ emit_div_mod(ctx,op,out,a,b,mode);
+ return;
+ case ONot:
+ if( IS_REG(a) ) {
+ EMIT(XOR,a,MK_CONST(1),M_I32);
+ } else {
+ BREAK();
+ }
+ return;
+ case ONeg:
+ if( IS_FLOAT(mode) ) {
+ if( out != a && IS_REG(out) ) {
+ EMIT(mode == M_F32 ? XORPS : XORPD, out, out, mode);
+ EMIT(mode == M_F32 ? SUBSS : SUBSD, out, a, mode);
+ } else {
+ ereg tmp = get_tmp(mode);
+ EMIT(mode == M_F32 ? XORPS : XORPD, tmp, tmp, mode);
+ EMIT(mode == M_F32 ? SUBSS : SUBSD, tmp, a, mode);
+ EMIT(mode == M_F32 ? MOVSS : MOVSD, out, tmp, mode);
+ }
+ return;
+ }
+ cop = NEG;
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+
+ if( out == a && IS_REG(a) ) {
+ EMIT(cop,out,b,mode);
+ } else if( !IS_REG(out) || out == b ) {
+ ereg tmp = get_tmp(mode);
+ emit_mov(ctx, tmp, a, mode);
+ EMIT(cop,tmp,b,mode);
+ if( mask ) {
+ EMIT(AND,tmp,MK_CONST(mask),M_I32);
+ mask = 0;
+ }
+ emit_mov(ctx, out, tmp, mode);
+ } else {
+ emit_mov(ctx, out, a, mode);
+ EMIT(cop,out,b,mode);
+ }
+ if( mask ) EMIT(AND,out,MK_CONST(mask),M_I32);
+}
+
+void hl_codegen_flush( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ if( ctx->flushed ) return;
+ ctx->flushed = true;
+ jit->code_size = ctx->code.cur;
+ jit->code_instrs = ctx->code.values;
+ jit->code_pos_map = ctx->pos_map;
+ if( ctx->pos_map ) ctx->pos_map[ctx->cur_op+1] = ctx->code.cur;
+}
+
+static void emit_nop( code_ctx *ctx, int size ) {
+ byte_reserve(ctx->code,size);
+ ctx->code.cur -= size;
+ if( size >= 8 ) {
+ W(0x841F0F);
+ W(0);
+ return;
+ }
+ if( size >= 5 ) {
+ W(0x441F0F);
+ B(0);
+ return;
+ }
+ if( size >= 4 ) {
+ W(0x401F0F);
+ return;
+ }
+ if( size >= 3 ) {
+ B(0x0F);
+ B(0x1F);
+ B(0x00);
+ return;
+ }
+ if( size >= 2 ) {
+ B(0x66);
+ B(0x90);
+ return;
+ }
+ B(0x90);
+}
+
+#define CALC_REX(w,a,b) (((w)&8) ? 4 : 0) | (((b)&8) ? 2 : 0) | (((a) & 8) ? 1 : 0)
+
+#define REX64(out,a,b) B(0x48 | CALC_REX(out,a,b))
+#define REX32(out,a,b) { int v = CALC_REX(out,a,b); if( v ) B(v|0x40); }
+
+static void emit_lea( code_ctx *ctx, ereg out, einstr *_e ) {
+ einstr e = *_e;
+
+ int mult = e.size_offs & 0xFF;
+ int offs = e.size_offs >> 8;
+ if( mult != 0 && (mult < 0 || mult > 8 || (mult & (mult - 1)) != 0) ) jit_assert();
+
+ if( IS_REG(e.a) )
+ offs += REG_VALUE(e.a);
+
+ if( !IS_REG(e.a) ) {
+ // a is always a mem address !
+ emit_mov(ctx, RTMP, e.a, M_PTR);
+ e.a = RTMP;
+ if( e.b && !IS_REG(e.b) ) {
+ if( !IS_REG(out) ) jit_assert();
+ emit_mov(ctx, out, e.b, M_I32);
+ e.b = out;
+ }
+ } else if( e.b && !IS_REG(e.b) ) {
+ // b is always an int index !
+ emit_mov(ctx, RTMP, e.b, M_I32);
+ e.b = RTMP;
+ }
+
+ if( mult == 0 ) {
+ if( REG_KIND(e.a) != R_REG ) jit_assert();
+ // no index
+ emit_ext(ctx,_LEA,out,MK_ADDR(e.a,offs),M_PTR,0);
+ return;
+ }
+
+ bool use_offs = offs != 0 || (e.a&7) == RBP;
+ REX64(out,e.a,e.b);
+ B(0x8D);
+ MOD_RM(use_offs ? 1 : 0,out,4);
+ SIB(mult,e.b,e.a);
+ if( use_offs ) {
+ if( !IS_SBYTE(offs) ) jit_assert();
+ B(offs);
+ }
+}
+
+static void align_function( code_ctx *ctx ) {
+ while( byte_count(ctx->code) & 15 )
+ emit_nop(ctx,16 - (byte_count(ctx->code) & 15));
+}
+
+static int reserve_const_segment( code_ctx *ctx, int size, int align ) {
+ int pos = byte_count(ctx->const_table);
+ if( align ) {
+ int k = pos & (align-1);
+ if( k ) {
+ byte_reserve_impl(&ctx->jit->galloc,&ctx->const_table,align - k);
+ pos = byte_count(ctx->const_table);
+ }
+ }
+ byte_reserve_impl(&ctx->jit->galloc,&ctx->const_table,size);
+ return pos;
+}
+
+static void alloc_const( code_ctx *ctx, uint64 value ) {
+ int pos = value_map_find(ctx->const_table_lookup, value);
+ if( pos < 0 ) {
+ pos = reserve_const_segment(ctx,8,8);
+ *(uint64*)byte_addr(ctx->const_table,pos) = value;
+ value_map_add_impl(&ctx->jit->galloc,&ctx->const_table_lookup,value,pos);
+ }
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->const_refs,ctx->jit->out_pos + byte_count(ctx->code) - 4);
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->const_refs,pos);
+}
+
+static int emit_lea_rel( code_ctx *ctx, ereg out ) {
+ B(0x48 + ((out & 8) ? 4 : 0));
+ B(0x8D);
+ MOD_RM(0,out&7,5);
+ int pos = ctx->jit->out_pos + byte_count(ctx->code);
+ W(0);
+ return pos;
+}
+
+static int get_cond_jump( code_ctx *ctx ) {
+ int prev = 0;
+ einstr *p;
+ do {
+ p = ctx->jit->reg_instrs + ctx->cur_op - (++prev);
+ } while( p->op == MOV || p->op == JCOND || p->op == CMOV || p->op == XCHG || p->op == CXCHG );
+ int op;
+ switch( p->size_offs ) {
+ case OJFalse:
+ case OJNull:
+ op = JZero;
+ break;
+ case OJTrue:
+ case OJNotNull:
+ op = JNotZero;
+ break;
+ case OJSGte:
+ op = IS_FLOAT(p->mode) ? JUGte : JSGte;
+ break;
+ case OJSGt:
+ op = IS_FLOAT(p->mode) ? JUGt : JSGt;
+ break;
+ case OJUGte:
+ op = JUGte;
+ break;
+ case OJSLt:
+ op = IS_FLOAT(p->mode) ? JULt : JSLt;
+ break;
+ case OJSLte:
+ op = IS_FLOAT(p->mode) ? JULte : JSLte;
+ break;
+ case OJULt:
+ op = JULt;
+ break;
+ case OJEq:
+ op = JEq;
+ break;
+ case OJNotEq:
+ op = JNeq;
+ break;
+ case OJNotLt:
+ op = JUGte;
+ break;
+ case OJNotGte:
+ op = JULt;
+ break;
+ case 0:
+ if( p->op == DEBUG_BREAK ) {
+ // found a debug break !
+ BREAK();
+ op = JZero;
+ break;
+ }
+ // fallback
+ default:
+ jit_assert();
+ break;
+ }
+ return op;
+}
+
+static void emit_cmov( code_ctx *ctx, ereg out, ereg r, int cond, emit_mode m ) {
+ if( IS_FLOAT(m) ) jit_assert();
+ if( hl_emit_mode_sizes[m] == 8 )
+ REX64(out,r,UNUSED);
+ else
+ REX32(out,r,UNUSED);
+ B(0x0F);
+ B(cond - 0x40);
+ MOD_RM(3,out,r);
+}
+
+void hl_codegen_function( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ ctx->flushed = false;
+ byte_free(&ctx->code);
+ int_arr_free(&ctx->near_jumps);
+ int_arr_free(&ctx->short_jumps);
+ free(ctx->pos_map);
+ ctx->pos_map = (int*)malloc((jit->reg_instr_count + 1) * sizeof(int));
+ ctx->pos_map[0] = 0;
+ int const_addr_prev = int_arr_count(ctx->const_addr);
+ byte_reserve(ctx->code,64);
+ ctx->code.cur -= 64;
+# ifdef GEN_DEBUG
+ int reg_index = 0;
+ int emit_index = 0;
+# endif
+ for(int cur_pos=0;cur_posreg_instr_count;cur_pos++) {
+ einstr *e = jit->reg_instrs + cur_pos;
+ ereg out = jit->reg_writes[cur_pos];
+ byte_reserve(ctx->code,64);
+ ctx->code.cur -= 64;
+ ctx->cur_op = cur_pos;
+ if( cur_pos > 0 ) ctx->pos_map[cur_pos] = ctx->code.cur;
+# ifdef GEN_DEBUG
+ int rid = cur_pos | (jit->fun->findex << 16);
+ while( reg_index < jit->instr_count && jit->reg_pos_map[reg_index] <= cur_pos ) reg_index++;
+ int uid;
+ while( emit_index < jit->fun->nops && jit->emit_pos_map[emit_index] < reg_index ) {
+ uid = emit_index | (jit->fun->findex << 16);
+ __ignore(&uid);
+ __ignore(&rid);
+ emit_index++;
+ if( emit_index >= jit->fun->nops || jit->emit_pos_map[emit_index] >= reg_index )
+ emit_ext(ctx,_MOV,RTMP,VAL_CONST,M_I32,uid);
+ }
+# endif
+ switch( e->op ) {
+ case LOAD_ARG:
+ continue; // nop
+ case MOV:
+ emit_mov(ctx, out, e->a, e->mode);
+ break;
+ case XCHG:
+ {
+ ereg tmp = get_tmp(e->mode);
+ if( !IS_REG(e->a) && !IS_REG(e->b) )
+ jit_assert();
+ emit_mov(ctx, tmp, e->a, M_PTR);
+ emit_mov(ctx, e->a, e->b, M_PTR);
+ emit_mov(ctx, e->b, tmp, M_PTR);
+ }
+ break;
+ case STORE:
+ if( !IS_REG(e->a) && !IS_REG(e->b) ) {
+ if( e->mode != M_PTR ) {
+ // no push/pop 32 bit
+ ereg tmp2 = R(RAX);
+ emit_mode mode = e->mode == M_F64 ? M_PTR : e->mode == M_F32 ? M_I32 : e->mode;
+ EMIT(_PUSH,tmp2,UNUSED,M_PTR);
+ emit_mov(ctx, RTMP, e->a, M_PTR);
+ emit_mov(ctx, tmp2, e->b, mode);
+ emit_mov(ctx, MK_ADDR(RTMP,e->size_offs), tmp2, mode);
+ EMIT(_POP,tmp2,UNUSED,M_PTR);
+ } else {
+ if( IS_FLOAT(e->mode) ) BREAK();
+ EMIT(_PUSH,e->b,UNUSED,e->mode);
+ emit_mov(ctx, RTMP, e->a, M_PTR);
+ emit_ext(ctx, _POP,REG_ADD_OFFSET(REG_PTR(RTMP),e->size_offs), UNUSED, e->mode, 0);
+ }
+ } else if( !IS_REG(e->a) ) {
+ emit_mov(ctx, RTMP, e->a, M_PTR);
+ emit_mov(ctx, MK_ADDR(RTMP,e->size_offs), e->b, e->mode);
+ } else
+ emit_mov(ctx, REG_ADD_OFFSET(REG_PTR(e->a),e->size_offs), e->b, e->mode);
+ break;
+ case PUSH:
+ if( IS_FLOAT(e->mode) ) {
+ if( !IS_REG(e->a) )
+ EMIT(_PUSH,e->a,UNUSED,M_PTR);
+ else {
+ EMIT(SUB,R(RSP),MK_CONST(8),M_PTR);
+ EMIT(e->mode == M_F32 ? MOVSS : MOVSD,REG_PTR(R(RSP)),e->a,e->mode);
+ }
+ } else if( IS_REG(e->a) && REG_VALUE(e->a) != 0 ) {
+ emit_mov(ctx, RTMP, e->a, e->mode);
+ EMIT(_PUSH, RTMP, UNUSED, M_PTR);
+ } else
+ EMIT(_PUSH, e->a, UNUSED, M_PTR);
+ break;
+ case POP:
+ if( IS_FLOAT(e->mode) ) {
+ EMIT(e->mode == M_F32 ? MOVSS : MOVSD,REG_PTR(R(RSP)),e->a,e->mode);
+ EMIT(ADD,R(RSP),MK_CONST(8),M_PTR);
+ } else {
+ EMIT(_POP, e->a, UNUSED, M_PTR);
+ }
+ break;
+ case PUSH_CONST:
+ if( e->mode != M_PTR ) jit_assert();
+ if( (e->value&0xFF) == e->value )
+ emit_ext(ctx,PUSH8, VAL_CONST, UNUSED, M_PTR, e->value);
+ else if( (e->value&0xFFFFFFFF) == e->value )
+ emit_ext(ctx,_PUSH, VAL_CONST, UNUSED, M_I32, e->value); // will push 64bits
+ else
+ emit_ext(ctx,_PUSH, VAL_CONST, UNUSED, M_PTR, e->value);
+ break;
+ case DEBUG_BREAK:
+ BREAK();
+ break;
+ case RET:
+ if( !IS_NULL(e->a) ) {
+ ereg ret = IS_FLOAT(e->mode) ? MMX(0) : R(RAX);
+ if( e->a != ret ) emit_mov(ctx, ret, e->a, e->mode);
+ }
+ EMIT(_RET, UNUSED, UNUSED, M_NONE);
+ break;
+ case LOAD_CONST:
+ {
+ emit_mode mode = e->mode;
+ if( !IS_REG(out) )
+ mode = (mode == M_F32 ? M_I32 : mode == M_F64 ? M_PTR : mode); // don't use FP for stack ops
+ ereg w = IS_REG(out) ? out : get_tmp(mode);
+ if( e->value == 0 )
+ EMIT(mode == M_F32 ? XORPS : mode == M_F64 ? XORPD : XOR, w, w, mode);
+ else if( IS_FLOAT(mode) ) {
+ // MOVSS / MOVSD with data relative
+ B(e->mode == M_F32 ? 0xF3 : 0xF2);
+ if( out&8 ) B(0x44);
+ B(0x0F);
+ B(0x10);
+ MOD_RM(0,out&7,5);
+ W(0);
+ alloc_const(ctx, e->value);
+ } else if( mode == M_PTR && (e->value&0xFFFFFFFF) == e->value )
+ emit_ext(ctx, _MOV, w, VAL_CONST, M_I32, e->value);
+ else
+ emit_ext(ctx, _MOV, w, VAL_CONST, mode, e->value);
+ if( w != out )
+ emit_mov(ctx, out, w, mode);
+ }
+ break;
+ case LOAD_ADDR:
+ if( IS_REG(e->a) && e->nargs == e->mode ) {
+ emit_mov(ctx, out, REG_ADD_OFFSET(REG_PTR(e->a),e->size_offs), e->nargs);
+ } else {
+ ereg tmp = IS_REG(out) || (e->nargs == e->mode) ? out : RTMP;
+ emit_mov(ctx, RTMP, e->a, M_PTR);
+ emit_mov(ctx, tmp, MK_ADDR(RTMP,e->size_offs), e->nargs);
+ if( out != tmp )
+ emit_mov(ctx, out, tmp, e->mode);
+ }
+ break;
+ case LOAD_FUN:
+ {
+ ereg w = IS_REG(out) ? out : RTMP;
+ int pos = emit_lea_rel(ctx,w);
+ int fid = e->size_offs;
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->funs,pos);
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->funs,fid);
+ if( w != out )
+ emit_mov(ctx, out, w, M_PTR);
+ }
+ break;
+ case CALL_FUN:
+ B(0xE8);
+ {
+ int pos = jit->out_pos + byte_count(ctx->code);
+ int fid = e->a;
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->funs,pos);
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->funs,fid);
+ W(0);
+ }
+ break;
+ case CALL_PTR:
+ if( e->value == (uint64)hl_null_access || e->value == (uint64)hl_jit_null_field_access ) {
+ // call near
+ int target = e->value == (uint64)hl_null_access ? ctx->null_access_pos : ctx->null_field_pos;
+ B(0xE8);
+ W(target - (jit->out_pos + byte_count(ctx->code) + 4));
+ } else {
+ // call near indirect
+ B(0xFF);
+ B(0x15);
+ W(0);
+ alloc_const(ctx, (uint64)e->value);
+ if( e->mode == M_UI8 || e->mode == M_UI16 ) {
+ // clear value upper bits
+ EMIT(e->mode == M_UI8 ? MOVZX8 : MOVZX16,R(RAX),R(RAX),M_PTR);
+ }
+ }
+ break;
+ case CALL_REG:
+ EMIT(_CALL, e->a, UNUSED, M_NONE);
+ break;
+ case TEST:
+ if( IS_FLOAT(e->mode) )
+ jit_assert();
+ if( !IS_REG(e->a) ) {
+ ereg tmp = get_tmp(e->mode);
+ emit_mov(ctx, tmp, e->a, e->mode);
+ EMIT(_TEST,tmp,tmp,e->mode);
+ } else
+ EMIT(_TEST,e->a,e->a,e->mode);
+ break;
+ case CMP:
+ {
+ CpuOp op;
+ switch( e->mode ) {
+ case M_UI8: op = CMP8; break;
+ case M_UI16: op = CMP16; break;
+ case M_F32: op = COMISS; break;
+ case M_F64: op = COMISD; break;
+ default: op = _CMP; break;
+ }
+ ereg a = e->a;
+ if( !IS_REG(e->a) && (IS_FLOAT(e->mode) || !IS_REG(e->b)) ) {
+ ereg tmp = get_tmp(e->mode);
+ emit_mov(ctx, tmp, e->a, e->mode);
+ a = tmp;
+ }
+ EMIT(op,a,e->b,e->mode);
+ if( IS_FLOAT(e->mode) && e->size_offs != OJSGt && e->size_offs != OJNull && e->size_offs != OJNotNull ) {
+ // handle NaNs
+ int jnotnan = jump_near(ctx,JNParity);
+ switch( e->size_offs ) {
+ case OJSLt:
+ case OJNotLt:
+ // set CF=0, ZF=1
+ EMIT(XOR,RTMP,RTMP,M_I32);
+ break;
+ case OJSGte:
+ case OJNotGte:
+ // set ZF=0, CF=1
+ EMIT(XOR,RTMP,RTMP,M_I32);
+ EMIT(STC,UNUSED,UNUSED,0);
+ break;
+ case OJNotEq:
+ case OJEq:
+ // set ZF=0, CF=?
+ case OJSLte:
+ // set ZF=0, CF=0
+ EMIT(TEST,R(RSP),R(RSP),M_PTR);
+ break;
+ default:
+ jit_assert();
+ }
+ patch_jump_near(ctx,jnotnan);
+ }
+ }
+ break;
+ case JCOND:
+ {
+ int jump = get_cond_jump(ctx);
+ emit_jump(ctx, jump, e->size_offs);
+ }
+ break;
+ case JUMP:
+ emit_jump(ctx, JAlways, e->size_offs);
+ break;
+ case JUMP_TABLE:
+ {
+ int start = reserve_const_segment(ctx,HL_WSIZE * e->nargs,16);
+ int pos = emit_lea_rel(ctx, RTMP);
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->const_refs,pos);
+ int_arr_add_impl(&ctx->jit->galloc,&ctx->const_refs,start);
+ ereg a = RTMP;
+ ereg b = e->a;
+ if( IS_REG(b) ) {
+ // jump [a+b*8]
+ B(0x40 | ((a&8)?1:0) | ((b&8)?2:0));
+ B(0xFF);
+ B(0x24);
+ SIB(3,(b&7),(a&7));
+ } else {
+ ereg save = R(RAX);
+ EMIT(_PUSH,save,UNUSED,M_PTR);
+ EMIT(_MOV,save,b,M_I32);
+ // lea tmp, [tmp+save*8]
+ einstr etmp;
+ etmp.a = a;
+ etmp.b = save;
+ etmp.size_offs = 8;
+ emit_lea(ctx, RTMP, &etmp);
+ EMIT(_POP,save,UNUSED,M_PTR);
+ // jump [tmp]
+ B(0x40 | ((RTMP&8)?1:0));
+ B(0xFF);
+ MOD_RM(0,4,RTMP&7);
+ }
+ ereg *args = hl_emit_get_args(jit->emit,e);
+ for(int k=0;knargs;k++) {
+ int_arr_add_impl(&jit->galloc,&ctx->const_addr,start + k * HL_WSIZE);
+ int_arr_add_impl(&jit->galloc,&ctx->const_addr,ctx->cur_op + (int)args[k] + 1);
+ }
+ }
+ break;
+ case CONV_UNSIGNED:
+ case CONV:
+ {
+ emit_mode in_mode = e->size_offs;
+ ereg r = IS_REG(e->a) ? e->a : get_tmp(in_mode);
+ if( r != e->a ) emit_mov(ctx, r, e->a, in_mode);
+ CpuOp op = -1;
+ switch( ID2(e->mode,in_mode) ) {
+ case ID2(M_F32,M_UI8):
+ case ID2(M_F32,M_UI16):
+ case ID2(M_F32,M_I32):
+ case ID2(M_F32,M_PTR):
+ op = CVTSI2SS;
+ break;
+ case ID2(M_F64,M_UI8):
+ case ID2(M_F64,M_UI16):
+ case ID2(M_F64,M_I32):
+ case ID2(M_F64,M_PTR):
+ op = CVTSI2SD;
+ break;
+ case ID2(M_UI8,M_F32):
+ case ID2(M_UI16,M_F32):
+ case ID2(M_I32,M_F32):
+ case ID2(M_PTR,M_F32):
+ op = CVTTSS2SI;
+ break;
+ case ID2(M_UI8,M_F64):
+ case ID2(M_UI16,M_F64):
+ case ID2(M_I32,M_F64):
+ case ID2(M_PTR,M_F64):
+ op = CVTTSD2SI;
+ break;
+ case ID2(M_F32,M_F64):
+ op = CVTSD2SS;
+ break;
+ case ID2(M_F64,M_F32):
+ op = CVTSS2SD;
+ break;
+ case ID2(M_PTR,M_I32):
+ // sign extend 32-64 bit conv
+ op = MOVSXD;
+ break;
+ case ID2(M_UI16,M_UI8):
+ case ID2(M_I32,M_UI8):
+ case ID2(M_PTR,M_UI8):
+ case ID2(M_UI8, M_UI16):
+ case ID2(M_UI8, M_I32):
+ case ID2(M_UI8, M_PTR):
+ op = MOVZX8;
+ break;
+ case ID2(M_I32,M_UI16):
+ case ID2(M_PTR,M_UI16):
+ case ID2(M_UI16, M_I32):
+ case ID2(M_UI16, M_PTR):
+ op = MOVZX16;
+ break;
+ case ID2(M_I32,M_PTR):
+ op = _MOV;
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ if( IS_REG(out) || op == _MOV )
+ EMIT(op,out,r,e->op == CONV_UNSIGNED ? M_PTR : e->mode);
+ else {
+ ereg r2 = get_tmp(e->mode);
+ EMIT(op,r2,r,e->op == CONV_UNSIGNED ? M_PTR : e->mode);
+ emit_mov(ctx,out,r2,e->mode);
+ }
+ }
+ break;
+ case BINOP:
+ case UNOP:
+ emit_anyop(ctx, e->size_offs, out, e->a, e->b, e->mode);
+ break;
+ case LEA:
+ if( !IS_REG(out) ) {
+ ereg tmp = get_tmp(e->mode);
+ emit_lea(ctx,tmp,e);
+ emit_mov(ctx,out,tmp,e->mode);
+ } else
+ emit_lea(ctx,out,e);
+ break;
+ case STACK_OFFS:
+ if( e->size_offs >= 0 )
+ EMIT(ADD,R(RSP),MK_CONST(e->size_offs),M_PTR);
+ else
+ EMIT(SUB,R(RSP),MK_CONST(-e->size_offs),M_PTR);
+ break;
+ case PREFETCH:
+ {
+ CpuOp op;
+ switch( e->size_offs ) {
+ case 0: op = PREFETCHT0; break;
+ case 1: op = PREFETCHT1; break;
+ case 2: op = PREFETCHT2; break;
+ case 3: op = PREFETCHNTA; break;
+ case 4: op = PREFETCHW; break;
+ default: jit_assert();
+ }
+ ereg a = e->a;
+ if( !IS_REG(e->a) ) {
+ emit_mov(ctx,RTMP,e->a,M_PTR);
+ a = RTMP;
+ }
+ EMIT(op,REG_PTR(a),UNUSED,M_PTR);
+ }
+ break;
+ case CMOV:
+ {
+ int cond = get_cond_jump(ctx);
+ if( !IS_REG(out) ) jit_assert();
+ if( IS_REG(e->a) ) {
+ emit_cmov(ctx,out,e->a,cond,M_PTR);
+ } else {
+ emit_mov(ctx,RTMP,e->a,e->mode);
+ emit_cmov(ctx,out,RTMP,cond,M_PTR);
+ }
+ }
+ break;
+ case CXCHG:
+ BREAK();
+ break;
+ case NOP:
+ emit_nop(ctx,1);
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ if( ctx->code.cur > ctx->code.max ) jit_assert();
+ }
+ align_function(ctx);
+ hl_codegen_flush(jit);
+ for(int i=0;ishort_jumps);i+=2) {
+ int pos = int_arr_get(ctx->short_jumps,i);
+ int target = int_arr_get(ctx->short_jumps,i+1);
+ int offset = ctx->pos_map[target] - (pos + 1);
+ if( !IS_SBYTE(offset) ) jit_assert();
+ *(char*)&ctx->code.values[pos] = (char)offset;
+ }
+ for(int i=0;inear_jumps);i+=2) {
+ int pos = int_arr_get(ctx->near_jumps,i);
+ int target = int_arr_get(ctx->near_jumps,i+1);
+ int offset = ctx->pos_map[target] - (pos + 4);
+ *(int*)&ctx->code.values[pos] = offset;
+ }
+ for(int i=const_addr_prev;iconst_addr);i+=2) {
+ int target = int_arr_get(ctx->const_addr,i+1);
+ int offs = jit->out_pos + ctx->pos_map[target];
+ ctx->const_addr.values[i+1] = offs;
+ }
+}
+
+void hl_codegen_alloc( jit_ctx *jit ) {
+ code_ctx *ctx = (code_ctx*)malloc(sizeof(code_ctx));
+ memset(ctx,0,sizeof(code_ctx));
+ jit->code = ctx;
+ ctx->jit = jit;
+}
+
+static void flush_function( code_ctx *ctx, int start ) {
+ hl_jit_define_function(ctx->jit, start, ctx->jit->out_pos + byte_count(ctx->code) - start);
+ align_function(ctx);
+ if( byte_count(ctx->code) > ctx->code.max ) jit_assert();
+}
+
+void hl_codegen_init( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ byte_reserve(ctx->code,1024);
+ ctx->code.cur -= 1024;
+
+ // generate hl_null_access stub
+ ctx->null_access_pos = jit->out_pos + byte_count(ctx->code);
+ EMIT(_PUSH,R(RBP),UNUSED,M_PTR);
+ EMIT(_MOV,R(RBP),R(RSP),M_PTR);
+ EMIT(SUB,R(RSP),MK_CONST(0x20),M_PTR);
+ emit_ext(ctx,_MOV,R(RAX),VAL_CONST,M_PTR,(int_val)hl_null_access);
+ EMIT(_CALL,R(RAX),UNUSED,M_PTR);
+ BREAK();
+ flush_function(ctx, ctx->null_access_pos);
+
+ // generate hl_null_field access stub
+ ctx->null_field_pos = jit->out_pos + byte_count(ctx->code);
+ EMIT(_PUSH,R(RBP),UNUSED,M_PTR);
+ EMIT(_MOV,R(RBP),R(RSP),M_PTR);
+ EMIT(SUB,R(RSP),MK_CONST(0x28),M_PTR);
+ EMIT(_MOV,jit->cfg.regs.arg[0],MK_ADDR(RBP,HL_WSIZE*2),M_I32);
+ emit_ext(ctx,_MOV,R(RAX),VAL_CONST,M_PTR,(int_val)hl_jit_null_field_access);
+ EMIT(_CALL,R(RAX),UNUSED,M_PTR);
+ BREAK();
+ flush_function(ctx, ctx->null_field_pos);
+
+ // generate c2hl stub
+ jit->code_funs.c2hl = jit->out_pos + byte_count(ctx->code);
+ regs_config *cfg = &jit->cfg;
+ EMIT(_PUSH,R(RBP),UNUSED,M_PTR);
+ EMIT(_MOV,R(RBP),R(RSP),M_PTR);
+
+ ereg fptr = scratch_not_param[0];
+ ereg vargs = scratch_not_param[1];
+ ereg nargs = scratch_not_param[2];
+ EMIT(_MOV,fptr,cfg->regs.arg[0],M_PTR);
+ EMIT(_MOV,vargs,cfg->regs.arg[1],M_PTR);
+ EMIT(_MOV,nargs,cfg->regs.arg[2],M_I32);
+
+ for(int i=0;iregs.nargs;i++)
+ EMIT(_MOV, cfg->regs.arg[i], MK_ADDR(vargs,i*8), M_PTR);
+ for(int i=0;ifloats.nargs;i++)
+ EMIT(MOVSD, cfg->floats.arg[i]-64, MK_ADDR(vargs,(i + cfg->regs.nargs) * 8), M_PTR);
+
+ EMIT(ADD,vargs,MK_CONST((MAX_ARGS - 1) * HL_WSIZE),M_PTR);
+ int begin = byte_count(ctx->code);
+ EMIT(_TEST,nargs,nargs,M_I32);
+ int pos = jump_near(ctx,JZero);
+ EMIT(_PUSH,MK_ADDR(vargs,0),UNUSED,M_PTR);
+ EMIT(SUB,vargs,MK_CONST(HL_WSIZE),M_PTR);
+ EMIT(DEC,nargs,UNUSED,M_I32);
+ jump_near(ctx,-begin);
+ patch_jump_near(ctx,pos);
+
+ if( IS_WINCALL64 ) EMIT(SUB,R(RSP),MK_CONST(0x20),M_PTR);
+ EMIT(_CALL, fptr, UNUSED, M_NONE);
+
+ EMIT(_MOV,R(RSP),R(RBP),M_PTR);
+ EMIT(_POP,R(RBP),UNUSED,M_PTR);
+ EMIT(_RET,UNUSED,UNUSED,M_NONE);
+
+ flush_function(ctx, jit->code_funs.c2hl);
+
+ // generate hl2c stub
+ jit->code_funs.hl2c = jit->out_pos + byte_count(ctx->code);
+ ereg cl = cfg->regs.arg[0];
+ ereg tmp = cfg->regs.arg[1];
+ EMIT(_PUSH,R(RBP),UNUSED,M_PTR);
+ EMIT(_MOV,R(RBP),R(RSP),M_PTR);
+ EMIT(SUB,R(RSP),MK_CONST(cfg->floats.nargs*8),M_PTR);
+
+ // push all possible call registers
+ for(int i=0;ifloats.nargs;i++)
+ EMIT(MOVSD,MK_ADDR(RSP,i*8),cfg->floats.arg[cfg->floats.nargs - 1 - i],M_F64);
+ for(int i=0;iregs.nargs;i++)
+ EMIT(_PUSH,cfg->regs.arg[cfg->regs.nargs - 1 - i],UNUSED,M_PTR);
+
+ // opcodes for:
+ // switch( arg0->t->fun->ret->kind ) {
+ // case HF32: case HF64: return jit_wrapper_d(arg0,&args);
+ // default: return jit_wrapper_ptr(arg0,&args);
+ // }
+ hl_type_fun *ft = NULL;
+ ereg fun_ptr = scratch_not_param[0];
+
+ EMIT(_MOV,tmp,MK_ADDR(cl,0),M_PTR); // ->t
+ EMIT(_MOV,tmp,MK_ADDR(tmp,HL_WSIZE),M_PTR); // ->fun
+ EMIT(_MOV,tmp,MK_ADDR(tmp,(int)(int_val)&ft->ret),M_PTR); // ->rets
+ EMIT(_MOV,tmp,MK_ADDR(tmp,0),M_I32); // ->kind
+
+ EMIT(_CMP,tmp,MK_CONST(HF64),M_I32);
+ int float1 = jump_near(ctx,JEq);
+ EMIT(_CMP,tmp,MK_CONST(HF32),M_I32);
+ int float2 = jump_near(ctx,JEq);
+ emit_ext(ctx,_MOV,fun_ptr,VAL_CONST,M_PTR,(int_val)hl_jit_wrapper_ptr);
+
+ int jexit = jump_near(ctx, JAlways);
+ patch_jump_near(ctx, float1);
+ patch_jump_near(ctx, float2);
+ emit_ext(ctx,_MOV,fun_ptr,VAL_CONST,M_PTR,(int_val)hl_jit_wrapper_d);
+ patch_jump_near(ctx, jexit);
+
+ int stack_args_pos = HL_WSIZE * (IS_64?2:3);
+ if( IS_WINCALL64 ) {
+ stack_args_pos += 0x20;
+ EMIT(SUB,R(RSP),MK_CONST(0x20),M_PTR);
+ }
+ EMIT(_LEA,cfg->regs.arg[1],MK_ADDR(R(RBP),stack_args_pos),M_PTR);
+ EMIT(_LEA,cfg->regs.arg[2],MK_ADDR(R(RBP),-(cfg->floats.nargs * 8 + cfg->regs.nargs * HL_WSIZE)),M_PTR);
+ EMIT(_CALL,fun_ptr,UNUSED,M_PTR);
+
+ if( IS_WINCALL64 )
+ EMIT(ADD,R(RSP),MK_CONST(0x20),M_PTR);
+
+ EMIT(_MOV,R(RSP),R(RBP),M_PTR);
+ EMIT(_POP,R(RBP),UNUSED,M_PTR);
+ EMIT(_RET,UNUSED,UNUSED,M_NONE);
+
+ flush_function(ctx, jit->code_funs.hl2c);
+
+
+ hl_codegen_flush(jit);
+}
+
+void hl_codegen_free( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ free(ctx->pos_map);
+ free(ctx);
+}
+
+void hl_codegen_flush_consts( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ // patch function offsets
+ for(int i=0;ifuns);i+=2) {
+ int pos = int_arr_get(ctx->funs,i);
+ int fid = int_arr_get(ctx->funs,i+1);
+ int offset = (int)(int_val)jit->mod->functions_ptrs[fid] - (pos + 4);
+ *(int*)(jit->output + pos) = offset;
+ }
+ int_arr_reset(&ctx->funs);
+ // emit constant table
+ jit->code_size = byte_count(ctx->const_table);
+ jit->code_instrs = ctx->const_table.values;
+ ctx->const_table_pos = jit->out_pos;
+ // patch constant offsets
+ for(int i=0;iconst_refs);i+=2) {
+ int pos = int_arr_get(ctx->const_refs,i);
+ int coffs = int_arr_get(ctx->const_refs,i+1);
+ int offset = (ctx->const_table_pos + coffs) - (pos + 4);
+ *(int*)(jit->output + pos) = offset;
+ }
+ int_arr_reset(&ctx->const_refs);
+ // cleanup
+ byte_free(&ctx->const_table);
+ value_map_free(&ctx->const_table_lookup);
+}
+
+void hl_codegen_final( jit_ctx *jit ) {
+ code_ctx *ctx = jit->code;
+ // patch absolute addresses
+ for(int i=0;iconst_addr);i+=2) {
+ int pos = int_arr_get(ctx->const_addr,i);
+ int offs = int_arr_get(ctx->const_addr,i+1);
+ *(void**)(jit->final_code + ctx->const_table_pos + pos) = jit->final_code + offs;
+ }
+ int_arr_free(&ctx->const_addr);
+}
diff --git a/src/main.c b/src/main.c
index 6054060d0..5ad605e36 100644
--- a/src/main.c
+++ b/src/main.c
@@ -20,7 +20,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include
-#include
+#include
#include "hlsystem.h"
#ifdef HL_WIN
@@ -259,7 +259,7 @@ int main(int argc, pchar *argv[]) {
file = PSTR("hlboot.dat");
fchk = pfopen(file,"rb");
if( fchk == NULL ) {
- printf("HL/JIT %d.%d.%d (c)2015-2025 Haxe Foundation\n Usage : hl [--debug ] [--debug-wait] \n",HL_VERSION>>16,(HL_VERSION>>8)&0xFF,HL_VERSION&0xFF);
+ printf("HL/JIT %d.%d.%d (c)2015-2026 Haxe Foundation\n Usage : hl [--debug ] [--debug-wait] \n",HL_VERSION>>16,(HL_VERSION>>8)&0xFF,HL_VERSION&0xFF);
return 1;
}
fclose(fchk);
diff --git a/src/module.c b/src/module.c
index e668b1064..b6d7a4a97 100644
--- a/src/module.c
+++ b/src/module.c
@@ -21,6 +21,7 @@
*/
#include
#include
+#include
#ifdef HL_WIN
# undef _GUID
@@ -34,6 +35,10 @@ EXTERN_C IMAGE_DOS_HEADER __ImageBase;
#define HOT_RELOAD_EXTRA_GLOBALS 4096
+#ifdef HL_DEBUG
+# define ALLOW_DUMP
+#endif
+
HL_API void hl_prim_not_loaded( const uchar *err );
static hl_module **cur_modules = NULL;
@@ -72,7 +77,7 @@ static bool module_resolve_pos( hl_module *m, void *addr, int *fidx, int *fpos )
while( min < max ) {
int mid = (min + max) >> 1;
int offset = dbg->large ? ((int*)dbg->offsets)[mid] : ((unsigned short*)dbg->offsets)[mid];
- if( offset <= code_pos )
+ if( offset < code_pos )
min = mid + 1;
else
max = mid;
@@ -224,10 +229,8 @@ static int module_capture_stack( void **stack, int size ) {
unsigned char *code = m->jit_code;
int code_size = m->codesize;
if( module_addr >= (void*)code && module_addr < (void*)(code + code_size) ) {
- if( stack && count == size ) {
+ if( stack && count == size )
break;
- }
-
if( stack )
stack[count++] = module_addr;
else
@@ -248,6 +251,41 @@ static int module_capture_stack( void **stack, int size ) {
}
}
return count;
+#elif defined(__aarch64__) || defined(_M_ARM64)
+ // On AArch64, walk the frame pointer (X29) chain instead of scanning the stack.
+ // The heuristic scanner produces false positives from callee-saved register spills
+ // (STP X19,X20 etc.) that look like (stack_addr, code_addr) pairs.
+ void *stack_top = hl_get_thread()->stack_top;
+ void **fp = (void **)__builtin_frame_address(0);
+ int count = 0;
+ while( fp && (void *)fp < stack_top ) {
+ void *lr = fp[1];
+ void *next_fp = fp[0];
+ int i;
+ for(i=0;ijit_code;
+ int code_size = m->codesize;
+ if( lr >= (void*)code && lr < (void*)(code + code_size) ) {
+ if( m->jit_debug ) {
+ int s = m->jit_debug[0].start;
+ code += s;
+ code_size -= s;
+ if( lr < (void*)code || lr >= (void*)(code + code_size) ) continue;
+ }
+ if( stack ) {
+ if( count == size ) return count;
+ stack[count] = lr;
+ }
+ count++;
+ break;
+ }
+ }
+ if( next_fp == NULL || next_fp <= (void *)fp || next_fp >= stack_top )
+ break;
+ fp = (void **)next_fp;
+ }
+ return count;
#else
return hl_module_capture_stack_range(hl_get_thread()->stack_top, (void**)&stack, stack, size);
#endif
@@ -705,21 +743,57 @@ int hl_module_init( hl_module *m, h_bool hot_reload ) {
if( hot_reload ) m->hash = hl_code_hash_alloc(m->code);
hl_module_init_natives(m);
hl_module_init_indexes(m);
+# ifdef WIN64_UNWIND_TABLES
+ m->unwind_table_size = m->code->nfunctions + 10; // extra space for jit internals
+ m->unwind_table = malloc(sizeof(RUNTIME_FUNCTION) * m->unwind_table_size);
+ memset(m->unwind_table, 0, sizeof(RUNTIME_FUNCTION) * m->unwind_table_size);
+# endif
// JIT
ctx = hl_jit_alloc();
if( ctx == NULL )
return 0;
hl_jit_init(ctx, m);
+# ifdef ALLOW_DUMP
+ bool dump = false;
+ int filter = -1;
+ for(i=0;i= '0' && arg[pos] <= '9' )
+ filter |= arg[pos] - '0';
+ else
+ filter |= arg[pos] - 'A' + 10;
+ pos++;
+ }
+ }
+ }
+# endif
for(i=0;icode->nfunctions;i++) {
hl_function *f = m->code->functions + i;
+# ifdef ALLOW_DUMP
+ if( filter >= 0 && filter != f->findex ) continue;
+# endif
int fpos = hl_jit_function(ctx, m, f);
if( fpos < 0 ) {
hl_jit_free(ctx, false);
return 0;
}
m->functions_ptrs[f->findex] = (void*)(int_val)fpos;
+# ifdef ALLOW_DUMP
+ if( dump ) hl_emit_dump(ctx);
+# endif
}
m->jit_code = hl_jit_code(ctx, m, &m->codesize, &m->jit_debug, NULL);
+# ifdef ALLOW_DUMP
+ if( filter >= 0 ) exit(0);
+# endif
for(i=0;icode->nfunctions;i++) {
hl_function *f = m->code->functions + i;
m->functions_ptrs[f->findex] = ((unsigned char*)m->jit_code) + ((int_val)m->functions_ptrs[f->findex]);
@@ -735,6 +809,9 @@ int hl_module_init( hl_module *m, h_bool hot_reload ) {
hl_gc_set_dump_types(hl_module_types_dump);
# ifdef HL_VTUNE
hl_setup.vtune_init = modules_init_vtune;
+# endif
+# ifdef WIN64_UNWIND_TABLES
+ RtlAddFunctionTable(m->unwind_table, m->unwind_table_size, (DWORD64)m->jit_code);
# endif
hl_jit_free(ctx, hot_reload);
if( hot_reload ) {
diff --git a/src/opcodes.h b/src/opcodes.h
index ab9b1fa51..9e4df7f60 100644
--- a/src/opcodes.h
+++ b/src/opcodes.h
@@ -67,8 +67,8 @@ OP_BEGIN
OP(OIncr,R,X,X)
OP(ODecr,R,X,X)
- OP(OCall0,R,R,X)
- OP(OCall1,R,R,R)
+ OP(OCall0,R,C,X)
+ OP(OCall1,R,C,R)
OP(OCall2,R,AR,4)
OP(OCall3,R,AR,5)
OP(OCall4,R,AR,6)
@@ -78,17 +78,17 @@ OP_BEGIN
OP(OCallClosure,R,AR,VAR_ARGS)
OP(OStaticClosure,R,G,X)
- OP(OInstanceClosure,R,R,G)
+ OP(OInstanceClosure,R,C,R)
OP(OVirtualClosure,R,R,G)
OP(OGetGlobal,R,G,X)
- OP(OSetGlobal,R_NW,G,X)
- OP(OField,R,R,C)
- OP(OSetField,R_NW,R,C)
- OP(OGetThis,R,C,X)
- OP(OSetThis,R_NW,R,X)
+ OP(OSetGlobal,G,R,X)
+ OP(OField,R,R,G)
+ OP(OSetField,R_NW,G,R)
+ OP(OGetThis,R,G,X)
+ OP(OSetThis,G,R,X)
OP(ODynGet,R,R,C)
- OP(ODynSet,R_NW,R,C)
+ OP(ODynSet,R_NW,C,R)
OP(OJTrue,R_NW,J,X)
OP(OJFalse,R_NW,J,X)
@@ -134,7 +134,7 @@ OP_BEGIN
OP(ONew,R,X,X)
OP(OArraySize,R,R,X)
- OP(OType,R,R,X)
+ OP(OType,R,G,X)
OP(OGetType,R,R,X)
OP(OGetTID,R,R,X)
diff --git a/src/profile.c b/src/profile.c
index e0df0efc3..09ba265ed 100644
--- a/src/profile.c
+++ b/src/profile.c
@@ -146,13 +146,23 @@ static void *get_thread_stackptr( thread_handle *t, void **eip ) {
return (void*)c.Esp;
# endif
#elif defined(HL_LINUX)
-# ifdef HL_64
+# if defined(__aarch64__) || defined(_M_ARM64)
+ *eip = (void*)shared_context.context.uc_mcontext.pc;
+ return (void*)shared_context.context.uc_mcontext.sp;
+# elif defined(HL_64)
*eip = (void*)shared_context.context.uc_mcontext.gregs[REG_RIP];
return (void*)shared_context.context.uc_mcontext.gregs[REG_RSP];
# else
*eip = (void*)shared_context.context.uc_mcontext.gregs[REG_EIP];
return (void*)shared_context.context.uc_mcontext.gregs[REG_ESP];
# endif
+#elif defined(HL_MAC) && defined(__aarch64__)
+ struct __darwin_mcontext64 *mcontext = shared_context.context.uc_mcontext;
+ if (mcontext != NULL) {
+ *eip = (void*)mcontext->__ss.__pc;
+ return (void*)mcontext->__ss.__sp;
+ }
+ return NULL;
#elif defined(HL_MAC) && defined(__x86_64__)
struct __darwin_mcontext64 *mcontext = shared_context.context.uc_mcontext;
if (mcontext != NULL) {
diff --git a/src/std/types.c b/src/std/types.c
index eaf228db6..8db708185 100644
--- a/src/std/types.c
+++ b/src/std/types.c
@@ -35,7 +35,7 @@ HL_PRIM hl_type hlt_bool = { HBOOL };
HL_PRIM hl_type hlt_abstract = { HABSTRACT, {USTR("")} };
static const uchar *TSTR[] = {
- USTR("void"), USTR("i8"), USTR("i16"), USTR("i32"), USTR("i64"), USTR("f32"), USTR("f64"),
+ USTR("void"), USTR("ui8"), USTR("ui16"), USTR("i32"), USTR("i64"), USTR("f32"), USTR("f64"),
USTR("bool"), USTR("bytes"), USTR("dynamic"), NULL, NULL,
USTR("array"), USTR("type"), NULL, NULL, USTR("dynobj"),
NULL, NULL, NULL, NULL, NULL, NULL, USTR("guid")
@@ -43,8 +43,8 @@ static const uchar *TSTR[] = {
static int T_SIZES[] = {
0, // VOID
- 1, // I8
- 2, // I16
+ 1, // UI8
+ 2, // UI16
4, // I32
8, // I64
4, // F32
@@ -160,8 +160,8 @@ HL_PRIM bool hl_same_type( hl_type *a, hl_type *b ) {
HL_PRIM bool hl_is_dynamic( hl_type *t ) {
static bool T_IS_DYNAMIC[] = {
false, // HVOID,
- false, // HI8
- false, // HI16
+ false, // HUI8
+ false, // HUI16
false, // HI32
false, // HI64
false, // HF32
@@ -190,8 +190,8 @@ HL_PRIM bool hl_is_dynamic( hl_type *t ) {
HL_PRIM bool hl_is_ptr( hl_type *t ) {
static bool T_IS_PTR[] = {
false, // HVOID,
- false, // HI8
- false, // HI16
+ false, // HUI8
+ false, // HUI16
false, // HI32
false, // HI64
false, // HF32