| 1 | /* |
| 2 | * Stack-less Just-In-Time compiler |
| 3 | * |
| 4 | * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without modification, are |
| 7 | * permitted provided that the following conditions are met: |
| 8 | * |
| 9 | * 1. Redistributions of source code must retain the above copyright notice, this list of |
| 10 | * conditions and the following disclaimer. |
| 11 | * |
| 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list |
| 13 | * of conditions and the following disclaimer in the documentation and/or other materials |
| 14 | * provided with the distribution. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY |
| 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| 18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT |
| 19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
| 21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
| 22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
| 24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 25 | */ |
| 26 | |
| 27 | #ifndef SLJIT_LIR_H_ |
| 28 | #define SLJIT_LIR_H_ |
| 29 | |
| 30 | /* |
| 31 | ------------------------------------------------------------------------ |
| 32 | Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) |
| 33 | ------------------------------------------------------------------------ |
| 34 | |
| 35 | Short description |
| 36 | Advantages: |
| 37 | - The execution can be continued from any LIR instruction. In other |
| 38 | words, it is possible to jump to any label from anywhere, even from |
| 39 | a code fragment, which is compiled later, as long as the compiling |
| 40 | context is the same. See sljit_emit_enter for more details. |
| 41 | - Supports self modifying code: target of any jump and call |
| 42 | instructions and some constant values can be dynamically modified |
| 43 | during runtime. See SLJIT_REWRITABLE_JUMP. |
| 44 | - although it is not suggested to do it frequently |
| 45 | - can be used for inline caching: save an important value once |
| 46 | in the instruction stream |
| 47 | - A fixed stack space can be allocated for local variables |
| 48 | - The compiler is thread-safe |
| 49 | - The compiler is highly configurable through preprocessor macros. |
| 50 | You can disable unneeded features (multithreading in single |
| 51 | threaded applications), and you can use your own system functions |
| 52 | (including memory allocators). See sljitConfig.h. |
| 53 | Disadvantages: |
| 54 | - The compiler is more like a platform independent assembler, so |
| 55 | there is no built-in variable management. Registers and stack must |
| 56 | be managed manually (the name of the compiler refers to this). |
| 57 | In practice: |
| 58 | - This approach is very effective for interpreters |
| 59 | - One of the saved registers typically points to a stack interface |
| 60 | - It can jump to any exception handler anytime (even if it belongs |
| 61 | to another function) |
| 62 | - Hot paths can be modified during runtime reflecting the changes |
| 63 | of the fastest execution path of the dynamic language |
| 64 | - SLJIT supports complex memory addressing modes |
| 65 | - mainly position and context independent code (except some cases) |
| 66 | |
| 67 | For valgrind users: |
| 68 | - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code" |
| 69 | */ |
| 70 | |
| 71 | #if (defined SLJIT_HAVE_CONFIG_PRE && SLJIT_HAVE_CONFIG_PRE) |
| 72 | #include "sljitConfigPre.h" |
| 73 | #endif /* SLJIT_HAVE_CONFIG_PRE */ |
| 74 | |
| 75 | #include "sljitConfig.h" |
| 76 | |
| 77 | /* The following header file defines useful macros for fine tuning |
| 78 | SLJIT based code generators. They are listed in the beginning |
| 79 | of sljitConfigInternal.h */ |
| 80 | |
| 81 | #include "sljitConfigInternal.h" |
| 82 | |
| 83 | #if (defined SLJIT_HAVE_CONFIG_POST && SLJIT_HAVE_CONFIG_POST) |
| 84 | #include "sljitConfigPost.h" |
| 85 | #endif /* SLJIT_HAVE_CONFIG_POST */ |
| 86 | |
| 87 | #ifdef __cplusplus |
| 88 | extern "C" { |
| 89 | #endif |
| 90 | |
| 91 | /* Version numbers. */ |
| 92 | #define SLJIT_MAJOR_VERSION 0 |
| 93 | #define SLJIT_MINOR_VERSION 95 |
| 94 | |
| 95 | /* --------------------------------------------------------------------- */ |
| 96 | /* Error codes */ |
| 97 | /* --------------------------------------------------------------------- */ |
| 98 | |
| 99 | /* Indicates no error. */ |
| 100 | #define SLJIT_SUCCESS 0 |
| 101 | /* After the call of sljit_generate_code(), the error code of the compiler |
| 102 | is set to this value to avoid further code generation. |
| 103 | The complier should be freed after sljit_generate_code(). */ |
| 104 | #define SLJIT_ERR_COMPILED 1 |
| 105 | /* Cannot allocate non-executable memory. */ |
| 106 | #define SLJIT_ERR_ALLOC_FAILED 2 |
| 107 | /* Cannot allocate executable memory. |
| 108 | Only sljit_generate_code() returns with this error code. */ |
| 109 | #define SLJIT_ERR_EX_ALLOC_FAILED 3 |
| 110 | /* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */ |
| 111 | #define SLJIT_ERR_UNSUPPORTED 4 |
| 112 | /* An ivalid argument is passed to any SLJIT function. */ |
| 113 | #define SLJIT_ERR_BAD_ARGUMENT 5 |
| 114 | |
| 115 | /* --------------------------------------------------------------------- */ |
| 116 | /* Registers */ |
| 117 | /* --------------------------------------------------------------------- */ |
| 118 | |
| 119 | /* |
| 120 | Scratch (R) registers: registers which may not preserve their values |
| 121 | across function calls. |
| 122 | |
| 123 | Saved (S) registers: registers which preserve their values across |
| 124 | function calls. |
| 125 | |
| 126 | The scratch and saved register sets overlap. The last scratch register |
| 127 | is the first saved register, the one before the last is the second saved |
| 128 | register, and so on. |
| 129 | |
| 130 | If an architecture provides two scratch and three saved registers, |
| 131 | its scratch and saved register sets are the following: |
| 132 | |
| 133 | R0 | | R0 is always a scratch register |
| 134 | R1 | | R1 is always a scratch register |
| 135 | [R2] | S2 | R2 and S2 represent the same physical register |
| 136 | [R3] | S1 | R3 and S1 represent the same physical register |
| 137 | [R4] | S0 | R4 and S0 represent the same physical register |
| 138 | |
| 139 | Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and |
| 140 | SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. |
| 141 | |
| 142 | Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 |
| 143 | and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers |
| 144 | are virtual on x86-32. See below. |
| 145 | |
| 146 | The purpose of this definition is convenience: saved registers can |
| 147 | be used as extra scratch registers. For example four registers can |
| 148 | be specified as scratch registers and the fifth one as saved register |
| 149 | on the CPU above and any user code which requires four scratch |
| 150 | registers can run unmodified. The SLJIT compiler automatically saves |
| 151 | the content of the two extra scratch register on the stack. Scratch |
| 152 | registers can also be preserved by saving their value on the stack |
| 153 | but this needs to be done manually. |
| 154 | |
| 155 | Note: To emphasize that registers assigned to R2-R4 are saved |
| 156 | registers, they are enclosed by square brackets. |
| 157 | |
| 158 | Note: sljit_emit_enter and sljit_set_context defines whether a register |
| 159 | is S or R register. E.g: when 3 scratches and 1 saved is mapped |
| 160 | by sljit_emit_enter, the allowed register set will be: R0-R2 and |
| 161 | S0. Although S2 is mapped to the same position as R2, it does not |
| 162 | available in the current configuration. Furthermore the S1 register |
| 163 | is not available at all. |
| 164 | */ |
| 165 | |
| 166 | /* Scratch registers. */ |
| 167 | #define SLJIT_R0 1 |
| 168 | #define SLJIT_R1 2 |
| 169 | #define SLJIT_R2 3 |
| 170 | /* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they |
| 171 | are allocated on the stack). These registers are called virtual |
| 172 | and cannot be used for memory addressing (cannot be part of |
| 173 | any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such |
| 174 | limitation on other CPUs. See sljit_get_register_index(). */ |
| 175 | #define SLJIT_R3 4 |
| 176 | #define SLJIT_R4 5 |
| 177 | #define SLJIT_R5 6 |
| 178 | #define SLJIT_R6 7 |
| 179 | #define SLJIT_R7 8 |
| 180 | #define SLJIT_R8 9 |
| 181 | #define SLJIT_R9 10 |
| 182 | /* All R registers provided by the architecture can be accessed by SLJIT_R(i) |
| 183 | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ |
| 184 | #define SLJIT_R(i) (1 + (i)) |
| 185 | |
| 186 | /* Saved registers. */ |
| 187 | #define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) |
| 188 | #define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) |
| 189 | #define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) |
| 190 | /* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they |
| 191 | are allocated on the stack). These registers are called virtual |
| 192 | and cannot be used for memory addressing (cannot be part of |
| 193 | any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such |
| 194 | limitation on other CPUs. See sljit_get_register_index(). */ |
| 195 | #define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) |
| 196 | #define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) |
| 197 | #define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) |
| 198 | #define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) |
| 199 | #define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) |
| 200 | #define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) |
| 201 | #define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) |
| 202 | /* All S registers provided by the architecture can be accessed by SLJIT_S(i) |
| 203 | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ |
| 204 | #define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) |
| 205 | |
| 206 | /* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ |
| 207 | #define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) |
| 208 | |
| 209 | /* The SLJIT_SP provides direct access to the linear stack space allocated by |
| 210 | sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). |
| 211 | The immediate offset is extended by the relative stack offset automatically. |
| 212 | The sljit_get_local_base can be used to obtain the real address of a value. */ |
| 213 | #define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) |
| 214 | |
| 215 | /* Return with machine word. */ |
| 216 | |
| 217 | #define SLJIT_RETURN_REG SLJIT_R0 |
| 218 | |
| 219 | /* --------------------------------------------------------------------- */ |
| 220 | /* Floating point registers */ |
| 221 | /* --------------------------------------------------------------------- */ |
| 222 | |
| 223 | /* Each floating point register can store a 32 or a 64 bit precision |
| 224 | value. The FR and FS register sets are overlap in the same way as R |
| 225 | and S register sets. See above. */ |
| 226 | |
| 227 | /* Floating point scratch registers. */ |
| 228 | #define SLJIT_FR0 1 |
| 229 | #define SLJIT_FR1 2 |
| 230 | #define SLJIT_FR2 3 |
| 231 | #define SLJIT_FR3 4 |
| 232 | #define SLJIT_FR4 5 |
| 233 | #define SLJIT_FR5 6 |
| 234 | /* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) |
| 235 | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ |
| 236 | #define SLJIT_FR(i) (1 + (i)) |
| 237 | |
| 238 | /* Floating point saved registers. */ |
| 239 | #define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) |
| 240 | #define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) |
| 241 | #define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) |
| 242 | #define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) |
| 243 | #define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) |
| 244 | #define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) |
| 245 | /* All S registers provided by the architecture can be accessed by SLJIT_FS(i) |
| 246 | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ |
| 247 | #define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) |
| 248 | |
| 249 | /* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ |
| 250 | #define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) |
| 251 | |
| 252 | /* Return with floating point arg. */ |
| 253 | |
| 254 | #define SLJIT_RETURN_FREG SLJIT_FR0 |
| 255 | |
| 256 | /* --------------------------------------------------------------------- */ |
| 257 | /* Argument type definitions */ |
| 258 | /* --------------------------------------------------------------------- */ |
| 259 | |
| 260 | /* The following argument type definitions are used by sljit_emit_enter, |
| 261 | sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. |
| 262 | |
| 263 | As for sljit_emit_call and sljit_emit_icall, the first integer argument |
| 264 | must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. |
| 265 | Similarly the first floating point argument must be placed into SLJIT_FR0, |
| 266 | the second one into SLJIT_FR1, and so on. |
| 267 | |
| 268 | As for sljit_emit_enter, the integer arguments can be stored in scratch |
| 269 | or saved registers. The first integer argument without _R postfix is |
| 270 | stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer |
| 271 | arguments with _R postfix are placed into scratch registers. The index |
| 272 | of the scratch register is the count of the previous integer arguments |
| 273 | starting from SLJIT_R0. The floating point arguments are always placed |
| 274 | into SLJIT_FR0, SLJIT_FR1, and so on. |
| 275 | |
| 276 | Note: if a function is called by sljit_emit_call/sljit_emit_icall and |
| 277 | an argument is stored in a scratch register by sljit_emit_enter, |
| 278 | that argument uses the same scratch register index for both |
| 279 | integer and floating point arguments. |
| 280 | |
| 281 | Example function definition: |
| 282 | sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, |
| 283 | sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); |
| 284 | |
| 285 | Argument type definition: |
| 286 | SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32) |
| 287 | | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2) |
| 288 | | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) |
| 289 | |
| 290 | Short form of argument type definition: |
| 291 | SLJIT_ARGS4(32, P, F64, 32, F32) |
| 292 | |
| 293 | Argument passing: |
| 294 | arg_a must be placed in SLJIT_R0 |
| 295 | arg_c must be placed in SLJIT_R1 |
| 296 | arg_b must be placed in SLJIT_FR0 |
| 297 | arg_d must be placed in SLJIT_FR1 |
| 298 | |
| 299 | Examples for argument processing by sljit_emit_enter: |
| 300 | SLJIT_ARGS4(VOID, P, 32_R, F32, W) |
| 301 | Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 |
| 302 | |
| 303 | SLJIT_ARGS4(VOID, W, W_R, W, W_R) |
| 304 | Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 |
| 305 | |
| 306 | SLJIT_ARGS4(VOID, F64, W, F32, W_R) |
| 307 | Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 |
| 308 | |
| 309 | Note: it is recommended to pass the scratch arguments first |
| 310 | followed by the saved arguments: |
| 311 | |
| 312 | SLJIT_ARGS4(VOID, W_R, W_R, W, W) |
| 313 | Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 |
| 314 | */ |
| 315 | |
| 316 | /* The following flag is only allowed for the integer arguments of |
| 317 | sljit_emit_enter. When the flag is set, the integer argument is |
| 318 | stored in a scratch register instead of a saved register. */ |
| 319 | #define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 |
| 320 | |
| 321 | /* Void result, can only be used by SLJIT_ARG_RETURN. */ |
| 322 | #define SLJIT_ARG_TYPE_VOID 0 |
| 323 | /* Machine word sized integer argument or result. */ |
| 324 | #define SLJIT_ARG_TYPE_W 1 |
| 325 | #define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) |
| 326 | /* 32 bit integer argument or result. */ |
| 327 | #define SLJIT_ARG_TYPE_32 2 |
| 328 | #define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) |
| 329 | /* Pointer sized integer argument or result. */ |
| 330 | #define SLJIT_ARG_TYPE_P 3 |
| 331 | #define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) |
| 332 | /* 64 bit floating point argument or result. */ |
| 333 | #define SLJIT_ARG_TYPE_F64 4 |
| 334 | /* 32 bit floating point argument or result. */ |
| 335 | #define SLJIT_ARG_TYPE_F32 5 |
| 336 | |
| 337 | #define SLJIT_ARG_SHIFT 4 |
| 338 | #define SLJIT_ARG_RETURN(type) (type) |
| 339 | #define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT)) |
| 340 | |
| 341 | /* Simplified argument list definitions. |
| 342 | |
| 343 | The following definition: |
| 344 | SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1) |
| 345 | |
| 346 | can be shortened to: |
| 347 | SLJIT_ARGS1(W, F32) |
| 348 | */ |
| 349 | |
| 350 | #define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type |
| 351 | |
| 352 | #define SLJIT_ARGS0(ret) \ |
| 353 | SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) |
| 354 | |
| 355 | #define SLJIT_ARGS1(ret, arg1) \ |
| 356 | (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) |
| 357 | |
| 358 | #define SLJIT_ARGS2(ret, arg1, arg2) \ |
| 359 | (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) |
| 360 | |
| 361 | #define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ |
| 362 | (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) |
| 363 | |
| 364 | #define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ |
| 365 | (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) |
| 366 | |
| 367 | /* --------------------------------------------------------------------- */ |
| 368 | /* Main structures and functions */ |
| 369 | /* --------------------------------------------------------------------- */ |
| 370 | |
| 371 | /* |
| 372 | The following structures are private, and can be changed in the |
| 373 | future. Keeping them here allows code inlining. |
| 374 | */ |
| 375 | |
| 376 | struct sljit_memory_fragment { |
| 377 | struct sljit_memory_fragment *next; |
| 378 | sljit_uw used_size; |
| 379 | /* Must be aligned to sljit_sw. */ |
| 380 | sljit_u8 memory[1]; |
| 381 | }; |
| 382 | |
| 383 | struct sljit_label { |
| 384 | struct sljit_label *next; |
| 385 | sljit_uw addr; |
| 386 | /* The maximum size difference. */ |
| 387 | sljit_uw size; |
| 388 | }; |
| 389 | |
| 390 | struct sljit_jump { |
| 391 | struct sljit_jump *next; |
| 392 | sljit_uw addr; |
| 393 | /* Architecture dependent flags. */ |
| 394 | sljit_uw flags; |
| 395 | union { |
| 396 | sljit_uw target; |
| 397 | struct sljit_label *label; |
| 398 | } u; |
| 399 | }; |
| 400 | |
| 401 | struct sljit_put_label { |
| 402 | struct sljit_put_label *next; |
| 403 | struct sljit_label *label; |
| 404 | sljit_uw addr; |
| 405 | sljit_uw flags; |
| 406 | }; |
| 407 | |
| 408 | struct sljit_const { |
| 409 | struct sljit_const *next; |
| 410 | sljit_uw addr; |
| 411 | }; |
| 412 | |
| 413 | struct sljit_compiler { |
| 414 | sljit_s32 error; |
| 415 | sljit_s32 options; |
| 416 | |
| 417 | struct sljit_label *labels; |
| 418 | struct sljit_jump *jumps; |
| 419 | struct sljit_put_label *put_labels; |
| 420 | struct sljit_const *consts; |
| 421 | struct sljit_label *last_label; |
| 422 | struct sljit_jump *last_jump; |
| 423 | struct sljit_const *last_const; |
| 424 | struct sljit_put_label *last_put_label; |
| 425 | |
| 426 | void *allocator_data; |
| 427 | void *exec_allocator_data; |
| 428 | struct sljit_memory_fragment *buf; |
| 429 | struct sljit_memory_fragment *abuf; |
| 430 | |
| 431 | /* Available scratch registers. */ |
| 432 | sljit_s32 scratches; |
| 433 | /* Available saved registers. */ |
| 434 | sljit_s32 saveds; |
| 435 | /* Available float scratch registers. */ |
| 436 | sljit_s32 fscratches; |
| 437 | /* Available float saved registers. */ |
| 438 | sljit_s32 fsaveds; |
| 439 | /* Local stack size. */ |
| 440 | sljit_s32 local_size; |
| 441 | /* Maximum code size. */ |
| 442 | sljit_uw size; |
| 443 | /* Relative offset of the executable mapping from the writable mapping. */ |
| 444 | sljit_sw executable_offset; |
| 445 | /* Executable size for statistical purposes. */ |
| 446 | sljit_uw executable_size; |
| 447 | |
| 448 | #if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) |
| 449 | sljit_s32 status_flags_state; |
| 450 | #endif |
| 451 | |
| 452 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
| 453 | sljit_s32 args_size; |
| 454 | #endif |
| 455 | |
| 456 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
| 457 | sljit_s32 mode32; |
| 458 | #endif |
| 459 | |
| 460 | #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) |
| 461 | /* Constant pool handling. */ |
| 462 | sljit_uw *cpool; |
| 463 | sljit_u8 *cpool_unique; |
| 464 | sljit_uw cpool_diff; |
| 465 | sljit_uw cpool_fill; |
| 466 | /* Other members. */ |
| 467 | /* Contains pointer, "ldr pc, [...]" pairs. */ |
| 468 | sljit_uw patches; |
| 469 | #endif |
| 470 | |
| 471 | #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) |
| 472 | /* Temporary fields. */ |
| 473 | sljit_uw shift_imm; |
| 474 | #endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ |
| 475 | |
| 476 | #if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) |
| 477 | sljit_uw args_size; |
| 478 | #endif |
| 479 | |
| 480 | #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) |
| 481 | sljit_u32 imm; |
| 482 | #endif |
| 483 | |
| 484 | #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) |
| 485 | sljit_s32 delay_slot; |
| 486 | sljit_s32 cache_arg; |
| 487 | sljit_sw cache_argw; |
| 488 | #endif |
| 489 | |
| 490 | #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) |
| 491 | sljit_uw args_size; |
| 492 | #endif |
| 493 | |
| 494 | #if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) |
| 495 | sljit_s32 cache_arg; |
| 496 | sljit_sw cache_argw; |
| 497 | #endif |
| 498 | |
| 499 | #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) |
| 500 | /* Need to allocate register save area to make calls. */ |
| 501 | sljit_s32 mode; |
| 502 | #endif |
| 503 | |
| 504 | #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) |
| 505 | FILE* verbose; |
| 506 | #endif |
| 507 | |
| 508 | #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ |
| 509 | || (defined SLJIT_DEBUG && SLJIT_DEBUG) |
| 510 | /* Flags specified by the last arithmetic instruction. |
| 511 | It contains the type of the variable flag. */ |
| 512 | sljit_s32 last_flags; |
| 513 | /* Return value type set by entry functions. */ |
| 514 | sljit_s32 last_return; |
| 515 | /* Local size passed to entry functions. */ |
| 516 | sljit_s32 logical_local_size; |
| 517 | #endif |
| 518 | |
| 519 | #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ |
| 520 | || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ |
| 521 | || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) |
| 522 | /* Trust arguments when an API function is called. |
| 523 | Used internally for calling API functions. */ |
| 524 | sljit_s32 skip_checks; |
| 525 | #endif |
| 526 | }; |
| 527 | |
| 528 | /* --------------------------------------------------------------------- */ |
| 529 | /* Main functions */ |
| 530 | /* --------------------------------------------------------------------- */ |
| 531 | |
| 532 | /* Creates an SLJIT compiler. The allocator_data is required by some |
| 533 | custom memory managers. This pointer is passed to SLJIT_MALLOC |
| 534 | and SLJIT_FREE macros. Most allocators (including the default |
| 535 | one) ignores this value, and it is recommended to pass NULL |
| 536 | as a dummy value for allocator_data. The exec_allocator_data |
| 537 | has the same purpose but this one is passed to SLJIT_MALLOC_EXEC / |
| 538 | SLJIT_MALLOC_FREE functions. |
| 539 | |
| 540 | Returns NULL if failed. */ |
| 541 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data); |
| 542 | |
| 543 | /* Frees everything except the compiled machine code. */ |
| 544 | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); |
| 545 | |
| 546 | /* Returns the current error code. If an error occurres, future calls |
| 547 | which uses the same compiler argument returns early with the same |
| 548 | error code. Thus there is no need for checking the error after every |
| 549 | call, it is enough to do it after the code is compiled. Removing |
| 550 | these checks increases the performance of the compiling process. */ |
| 551 | static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } |
| 552 | |
| 553 | /* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except |
| 554 | if an error was detected before. After the error code is set |
| 555 | the compiler behaves as if the allocation failure happened |
| 556 | during an SLJIT function call. This can greatly simplify error |
| 557 | checking, since it is enough to check the compiler status |
| 558 | after the code is compiled. */ |
| 559 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler); |
| 560 | |
| 561 | /* |
| 562 | Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, |
| 563 | and <= 128 bytes on 64 bit architectures. The memory area is owned by the |
| 564 | compiler, and freed by sljit_free_compiler. The returned pointer is |
| 565 | sizeof(sljit_sw) aligned. Excellent for allocating small blocks during |
| 566 | compiling, and no need to worry about freeing them. The size is enough |
| 567 | to contain at most 16 pointers. If the size is outside of the range, |
| 568 | the function will return with NULL. However, this return value does not |
| 569 | indicate that there is no more memory (does not set the current error code |
| 570 | of the compiler to out-of-memory status). |
| 571 | */ |
| 572 | SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size); |
| 573 | |
| 574 | #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) |
| 575 | /* Passing NULL disables verbose. */ |
| 576 | SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); |
| 577 | #endif |
| 578 | |
| 579 | /* |
| 580 | Create executable code from the instruction stream. This is the final step |
| 581 | of the code generation so no more instructions can be emitted after this call. |
| 582 | */ |
| 583 | |
| 584 | SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); |
| 585 | |
| 586 | /* Free executable code. */ |
| 587 | |
| 588 | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data); |
| 589 | |
| 590 | /* |
| 591 | When the protected executable allocator is used the JIT code is mapped |
| 592 | twice. The first mapping has read/write and the second mapping has read/exec |
| 593 | permissions. This function returns with the relative offset of the executable |
| 594 | mapping using the writable mapping as the base after the machine code is |
| 595 | successfully generated. The returned value is always 0 for the normal executable |
| 596 | allocator, since it uses only one mapping with read/write/exec permissions. |
| 597 | Dynamic code modifications requires this value. |
| 598 | |
| 599 | Before a successful code generation, this function returns with 0. |
| 600 | */ |
| 601 | static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; } |
| 602 | |
| 603 | /* |
| 604 | The executable memory consumption of the generated code can be retrieved by |
| 605 | this function. The returned value can be used for statistical purposes. |
| 606 | |
| 607 | Before a successful code generation, this function returns with 0. |
| 608 | */ |
| 609 | static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } |
| 610 | |
| 611 | /* Returns with non-zero if the feature or limitation type passed as its |
| 612 | argument is present on the current CPU. The return value is one, if a |
| 613 | feature is fully supported, and it is two, if partially supported. |
| 614 | |
| 615 | Some features (e.g. floating point operations) require hardware (CPU) |
| 616 | support while others (e.g. move with update) are emulated if not available. |
| 617 | However, even when a feature is emulated, specialized code paths may be |
| 618 | faster than the emulation. Some limitations are emulated as well so their |
| 619 | general case is supported but it has extra performance costs. */ |
| 620 | |
| 621 | /* [Not emulated] Floating-point support is available. */ |
| 622 | #define SLJIT_HAS_FPU 0 |
| 623 | /* [Limitation] Some registers are virtual registers. */ |
| 624 | #define SLJIT_HAS_VIRTUAL_REGISTERS 1 |
| 625 | /* [Emulated] Has zero register (setting a memory location to zero is efficient). */ |
| 626 | #define SLJIT_HAS_ZERO_REGISTER 2 |
| 627 | /* [Emulated] Count leading zero is supported. */ |
| 628 | #define SLJIT_HAS_CLZ 3 |
| 629 | /* [Emulated] Count trailing zero is supported. */ |
| 630 | #define SLJIT_HAS_CTZ 4 |
| 631 | /* [Emulated] Rotate left/right is supported. */ |
| 632 | #define SLJIT_HAS_ROT 5 |
| 633 | /* [Emulated] Conditional move is supported. */ |
| 634 | #define SLJIT_HAS_CMOV 6 |
| 635 | /* [Emulated] Prefetch instruction is available (emulated as a nop). */ |
| 636 | #define SLJIT_HAS_PREFETCH 7 |
| 637 | |
| 638 | #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
| 639 | /* [Not emulated] SSE2 support is available on x86. */ |
| 640 | #define SLJIT_HAS_SSE2 100 |
| 641 | #endif |
| 642 | |
| 643 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); |
| 644 | |
| 645 | /* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL, |
| 646 | sljit_cmp_info returns one, if the cpu supports the passed floating |
| 647 | point comparison type. |
| 648 | |
| 649 | If type is SLJIT_UNORDERED or SLJIT_ORDERED, sljit_cmp_info returns |
| 650 | one, if the cpu supports checking the unordered comparison result |
| 651 | regardless of the comparison type passed to the comparison instruction. |
| 652 | The returned value is always one, if there is at least one type between |
| 653 | SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL where sljit_cmp_info |
| 654 | returns with a zero value. |
| 655 | |
| 656 | Otherwise it returns zero. */ |
| 657 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); |
| 658 | |
| 659 | /* The following functions generate machine code. If there is no |
| 660 | error, they return with SLJIT_SUCCESS, otherwise they return |
| 661 | with an error code. */ |
| 662 | |
| 663 | /* |
| 664 | The executable code is a function from the viewpoint of the C |
| 665 | language. The function calls must obey to the ABI (Application |
| 666 | Binary Interface) of the platform, which specify the purpose of |
| 667 | machine registers and stack handling among other things. The |
| 668 | sljit_emit_enter function emits the necessary instructions for |
| 669 | setting up a new context for the executable code. This is often |
| 670 | called as function prologue. Furthermore the options argument |
| 671 | can be used to pass configuration options to the compiler. The |
| 672 | available options are listed before sljit_emit_enter. |
| 673 | |
| 674 | The function argument list is specified by the SLJIT_ARGSx |
| 675 | (SLJIT_ARGS0 .. SLJIT_ARGS4) macros. Currently maximum four |
| 676 | arguments are supported. See the description of SLJIT_ARGSx |
| 677 | macros about argument passing. Furthermore the register set |
| 678 | used by the function must be declared as well. The number of |
| 679 | scratch and saved registers available to the function must |
| 680 | be passed to sljit_emit_enter. Only R registers between R0 |
| 681 | and "scratches" argument can be used later. E.g. if "scratches" |
| 682 | is set to two, the scratch register set will be limited to |
| 683 | SLJIT_R0 and SLJIT_R1. The S registers and the floating point |
| 684 | registers ("fscratches" and "fsaveds") are specified in a |
| 685 | similar manner. The sljit_emit_enter is also capable of |
| 686 | allocating a stack space for local data. The "local_size" |
| 687 | argument contains the size in bytes of this local area, and |
| 688 | it can be accessed using SLJIT_MEM1(SLJIT_SP). The memory |
| 689 | area between SLJIT_SP (inclusive) and SLJIT_SP + local_size |
| 690 | (exclusive) can be modified freely until the function returns. |
| 691 | The stack space is not initialized to zero. |
| 692 | |
| 693 | Note: the following conditions must met: |
| 694 | 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS |
| 695 | 0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS |
| 696 | scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS |
| 697 | 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS |
| 698 | 0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS |
| 699 | fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS |
| 700 | |
| 701 | Note: the compiler can use saved registers as scratch registers, |
| 702 | but the opposite is not supported |
| 703 | |
| 704 | Note: every call of sljit_emit_enter and sljit_set_context |
| 705 | overwrites the previous context. |
| 706 | */ |
| 707 | |
| 708 | /* Saved registers between SLJIT_S0 and SLJIT_S(n - 1) (inclusive) |
| 709 | are not saved / restored on function enter / return. Instead, |
| 710 | these registers can be used to pass / return data (such as |
| 711 | global / local context pointers) across function calls. The |
| 712 | value of n must be between 1 and 3. This option is only |
| 713 | supported by SLJIT_ENTER_REG_ARG calling convention. */ |
| 714 | #define SLJIT_ENTER_KEEP(n) (n) |
| 715 | |
| 716 | /* The compiled function uses an SLJIT specific register argument |
| 717 | calling convention. This is a lightweight function call type where |
| 718 | both the caller and the called functions must be compiled by |
| 719 | SLJIT. The type argument of the call must be SLJIT_CALL_REG_ARG |
| 720 | and all arguments must be stored in scratch registers. */ |
| 721 | #define SLJIT_ENTER_REG_ARG 0x00000004 |
| 722 | |
| 723 | /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ |
| 724 | #define SLJIT_MAX_LOCAL_SIZE 65536 |
| 725 | |
| 726 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, |
| 727 | sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, |
| 728 | sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); |
| 729 | |
| 730 | /* The SLJIT compiler has a current context (which contains the local |
| 731 | stack space size, number of used registers, etc.) which is initialized |
| 732 | by sljit_emit_enter. Several functions (such as sljit_emit_return) |
| 733 | requires this context to be able to generate the appropriate code. |
| 734 | However, some code fragments (compiled separately) may have no |
| 735 | normal entry point so their context is unknown for the compiler. |
| 736 | |
| 737 | The sljit_set_context and sljit_emit_enter have the same arguments, |
| 738 | but sljit_set_context does not generate any machine code. |
| 739 | |
| 740 | Note: every call of sljit_emit_enter and sljit_set_context overwrites |
| 741 | the previous context. */ |
| 742 | |
| 743 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, |
| 744 | sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, |
| 745 | sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); |
| 746 | |
| 747 | /* Return to the caller function. The sljit_emit_return_void function |
| 748 | does not return with any value. The sljit_emit_return function returns |
| 749 | with a single value loaded from its source operand. The load operation |
| 750 | can be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1) and |
| 751 | SLJIT_MOV_F32/SLJIT_MOV_F64 (see sljit_emit_fop1) depending on the |
| 752 | return value specified by sljit_emit_enter/sljit_set_context. */ |
| 753 | |
| 754 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler); |
| 755 | |
| 756 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, |
| 757 | sljit_s32 src, sljit_sw srcw); |
| 758 | |
| 759 | /* Restores the saved registers and free the stack area, then the execution |
| 760 | continues from the address specified by the source operand. This |
| 761 | operation is similar to sljit_emit_return, but it ignores the return |
| 762 | address. The code where the exection continues should use the same context |
| 763 | as the caller function (see sljit_set_context). A word (pointer) value |
| 764 | can be passed in the SLJIT_RETURN_REG register. This function can be used |
| 765 | to jump to exception handlers. */ |
| 766 | |
| 767 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, |
| 768 | sljit_s32 src, sljit_sw srcw); |
| 769 | |
| 770 | /* Generating entry and exit points for fast call functions (see SLJIT_FAST_CALL). |
| 771 | Both sljit_emit_fast_enter and SLJIT_FAST_RETURN operations preserve the |
| 772 | values of all registers and stack frame. The return address is stored in the |
| 773 | dst argument of sljit_emit_fast_enter, and this return address can be passed |
| 774 | to SLJIT_FAST_RETURN to continue the execution after the fast call. |
| 775 | |
| 776 | Fast calls are cheap operations (usually only a single call instruction is |
| 777 | emitted) but they do not preserve any registers. However the callee function |
| 778 | can freely use / update any registers and the local area which can be |
| 779 | efficiently exploited by various optimizations. Registers can be saved |
| 780 | and restored manually if needed. |
| 781 | |
| 782 | Although returning to different address by SLJIT_FAST_RETURN is possible, |
| 783 | this address usually cannot be predicted by the return address predictor of |
| 784 | modern CPUs which may reduce performance. Furthermore certain security |
| 785 | enhancement technologies such as Intel Control-flow Enforcement Technology |
| 786 | (CET) may disallow returning to a different address. |
| 787 | |
| 788 | Flags: - (does not modify flags). */ |
| 789 | |
| 790 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); |
| 791 | |
| 792 | /* |
| 793 | Source and destination operands for arithmetical instructions |
| 794 | imm - a simple immediate value (cannot be used as a destination) |
| 795 | reg - any of the available registers (immediate argument must be 0) |
| 796 | [imm] - absolute memory address |
| 797 | [reg+imm] - indirect memory address |
| 798 | [reg+(reg<<imm)] - indirect indexed memory address (shift must be between 0 and 3) |
| 799 | useful for accessing arrays (fully supported by both x86 and |
| 800 | ARM architectures, and cheap operation on others) |
| 801 | */ |
| 802 | |
| 803 | /* |
| 804 | IMPORTANT NOTE: memory accesses MUST be naturally aligned unless |
| 805 | SLJIT_UNALIGNED macro is defined and its value is 1. |
| 806 | |
| 807 | length | alignment |
| 808 | ---------+----------- |
| 809 | byte | 1 byte (any physical_address is accepted) |
| 810 | half | 2 byte (physical_address & 0x1 == 0) |
| 811 | int | 4 byte (physical_address & 0x3 == 0) |
| 812 | word | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1 |
| 813 | | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1 |
| 814 | pointer | size of sljit_p type (4 byte on 32 bit machines, 4 or 8 byte |
| 815 | | on 64 bit machines) |
| 816 | |
| 817 | Note: Different architectures have different addressing limitations. |
| 818 | A single instruction is enough for the following addressing |
| 819 | modes. Other adrressing modes are emulated by instruction |
| 820 | sequences. This information could help to improve those code |
| 821 | generators which focuses only a few architectures. |
| 822 | |
| 823 | x86: [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32) |
| 824 | [reg+(reg<<imm)] is supported |
| 825 | [imm], -2^32+1 <= imm <= 2^32-1 is supported |
| 826 | Write-back is not supported |
| 827 | arm: [reg+imm], -4095 <= imm <= 4095 or -255 <= imm <= 255 for signed |
| 828 | bytes, any halfs or floating point values) |
| 829 | [reg+(reg<<imm)] is supported |
| 830 | Write-back is supported |
| 831 | arm-t2: [reg+imm], -255 <= imm <= 4095 |
| 832 | [reg+(reg<<imm)] is supported |
| 833 | Write back is supported only for [reg+imm], where -255 <= imm <= 255 |
| 834 | arm64: [reg+imm], -256 <= imm <= 255, 0 <= aligned imm <= 4095 * alignment |
| 835 | [reg+(reg<<imm)] is supported |
| 836 | Write back is supported only for [reg+imm], where -256 <= imm <= 255 |
| 837 | ppc: [reg+imm], -65536 <= imm <= 65535. 64 bit loads/stores and 32 bit |
| 838 | signed load on 64 bit requires immediates divisible by 4. |
| 839 | [reg+imm] is not supported for signed 8 bit values. |
| 840 | [reg+reg] is supported |
| 841 | Write-back is supported except for one instruction: 32 bit signed |
| 842 | load with [reg+imm] addressing mode on 64 bit. |
| 843 | mips: [reg+imm], -65536 <= imm <= 65535 |
| 844 | Write-back is not supported |
| 845 | riscv: [reg+imm], -2048 <= imm <= 2047 |
| 846 | Write-back is not supported |
| 847 | s390x: [reg+imm], -2^19 <= imm < 2^19 |
| 848 | [reg+reg] is supported |
| 849 | Write-back is not supported |
| 850 | */ |
| 851 | |
| 852 | /* Macros for specifying operand types. */ |
| 853 | #define SLJIT_MEM 0x80 |
| 854 | #define SLJIT_MEM0() (SLJIT_MEM) |
| 855 | #define SLJIT_MEM1(r1) (SLJIT_MEM | (r1)) |
| 856 | #define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) |
| 857 | #define SLJIT_IMM 0x40 |
| 858 | #define SLJIT_REG_PAIR(r1, r2) ((r1) | ((r2) << 8)) |
| 859 | |
| 860 | /* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on |
| 861 | 32 bit CPUs. When this option is set for an arithmetic operation, only |
| 862 | the lower 32 bits of the input registers are used, and the CPU status |
| 863 | flags are set according to the 32 bit result. Although the higher 32 bit |
| 864 | of the input and the result registers are not defined by SLJIT, it might |
| 865 | be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU |
| 866 | requirements all source registers must be the result of those operations |
| 867 | where this option was also set. Memory loads read 32 bit values rather |
| 868 | than 64 bit ones. In other words 32 bit and 64 bit operations cannot be |
| 869 | mixed. The only exception is SLJIT_MOV32 which source register can hold |
| 870 | any 32 or 64 bit value, and it is converted to a 32 bit compatible format |
| 871 | first. When the source and destination registers are the same, this |
| 872 | conversion is free (no instructions are emitted) on most CPUs. A 32 bit |
| 873 | value can also be converted to a 64 bit value by SLJIT_MOV_S32 |
| 874 | (sign extension) or SLJIT_MOV_U32 (zero extension). |
| 875 | |
| 876 | As for floating-point operations, this option sets 32 bit single |
| 877 | precision mode. Similar to the integer operations, all register arguments |
| 878 | must be the result of those operations where this option was also set. |
| 879 | |
| 880 | Note: memory addressing always uses 64 bit values on 64 bit systems so |
| 881 | the result of a 32 bit operation must not be used with SLJIT_MEMx |
| 882 | macros. |
| 883 | |
| 884 | This option is part of the instruction name, so there is no need to |
| 885 | manually set it. E.g: |
| 886 | |
| 887 | SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */ |
| 888 | #define SLJIT_32 0x100 |
| 889 | |
| 890 | /* Many CPUs (x86, ARM, PPC) have status flag bits which can be set according |
| 891 | to the result of an operation. Other CPUs (MIPS) do not have status |
| 892 | flag bits, and results must be stored in registers. To cover both |
| 893 | architecture types efficiently only two flags are defined by SLJIT: |
| 894 | |
| 895 | * Zero (equal) flag: it is set if the result is zero |
| 896 | * Variable flag: its value is defined by the arithmetic operation |
| 897 | |
| 898 | SLJIT instructions can set any or both of these flags. The value of |
| 899 | these flags is undefined if the instruction does not specify their |
| 900 | value. The description of each instruction contains the list of |
| 901 | allowed flag types. |
| 902 | |
| 903 | Note: the logical or operation can be used to set flags. |
| 904 | |
| 905 | Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence |
| 906 | |
| 907 | sljit_op2(..., SLJIT_ADD, ...) |
| 908 | Both the zero and variable flags are undefined so they can |
| 909 | have any value after the operation is completed. |
| 910 | |
| 911 | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) |
| 912 | Sets the zero flag if the result is zero, clears it otherwise. |
| 913 | The variable flag is undefined. |
| 914 | |
| 915 | sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...) |
| 916 | Sets the variable flag if an integer overflow occurs, clears |
| 917 | it otherwise. The zero flag is undefined. |
| 918 | |
| 919 | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...) |
| 920 | Sets the zero flag if the result is zero, clears it otherwise. |
| 921 | Sets the variable flag if unsigned overflow (carry) occurs, |
| 922 | clears it otherwise. |
| 923 | |
| 924 | Certain instructions (e.g. SLJIT_MOV) does not modify flags, so |
| 925 | status flags are unchanged. |
| 926 | |
| 927 | Example: |
| 928 | |
| 929 | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) |
| 930 | sljit_op1(..., SLJIT_MOV, ...) |
| 931 | Zero flag is set according to the result of SLJIT_ADD. |
| 932 | |
| 933 | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) |
| 934 | sljit_op2(..., SLJIT_ADD, ...) |
| 935 | Zero flag has unknown value. |
| 936 | |
| 937 | These flags can be used for code optimization. E.g. a fast loop can be |
| 938 | implemented by decreasing a counter register and set the zero flag |
| 939 | using a single instruction. The zero register can be used by a |
| 940 | conditional jump to restart the loop. A single comparison can set a |
| 941 | zero and less flags to check if a value is less, equal, or greater |
| 942 | than another value. |
| 943 | |
| 944 | Motivation: although some CPUs can set a large number of flag bits, |
| 945 | usually their values are ignored or only a few of them are used. Emulating |
| 946 | a large number of flags on systems without a flag register is complicated |
| 947 | so SLJIT instructions must specify the flag they want to use and only |
| 948 | that flag is computed. The last arithmetic instruction can be repeated if |
| 949 | multiple flags need to be checked. |
| 950 | */ |
| 951 | |
| 952 | /* Set Zero status flag. */ |
| 953 | #define SLJIT_SET_Z 0x0200 |
| 954 | /* Set the variable status flag if condition is true. |
| 955 | See comparison types (e.g. SLJIT_SET_LESS, SLJIT_SET_F_EQUAL). */ |
| 956 | #define SLJIT_SET(condition) ((condition) << 10) |
| 957 | |
| 958 | /* Starting index of opcodes for sljit_emit_op0. */ |
| 959 | #define SLJIT_OP0_BASE 0 |
| 960 | |
| 961 | /* Flags: - (does not modify flags) |
| 962 | Note: breakpoint instruction is not supported by all architectures (e.g. ppc) |
| 963 | It falls back to SLJIT_NOP in those cases. */ |
| 964 | #define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) |
| 965 | /* Flags: - (does not modify flags) |
| 966 | Note: may or may not cause an extra cycle wait |
| 967 | it can even decrease the runtime in a few cases. */ |
| 968 | #define SLJIT_NOP (SLJIT_OP0_BASE + 1) |
| 969 | /* Flags: - (may destroy flags) |
| 970 | Unsigned multiplication of SLJIT_R0 and SLJIT_R1. |
| 971 | Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ |
| 972 | #define SLJIT_LMUL_UW (SLJIT_OP0_BASE + 2) |
| 973 | /* Flags: - (may destroy flags) |
| 974 | Signed multiplication of SLJIT_R0 and SLJIT_R1. |
| 975 | Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ |
| 976 | #define SLJIT_LMUL_SW (SLJIT_OP0_BASE + 3) |
| 977 | /* Flags: - (may destroy flags) |
| 978 | Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
| 979 | The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. |
| 980 | Note: if SLJIT_R1 is 0, the behaviour is undefined. */ |
| 981 | #define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) |
| 982 | #define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_32) |
| 983 | /* Flags: - (may destroy flags) |
| 984 | Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
| 985 | The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. |
| 986 | Note: if SLJIT_R1 is 0, the behaviour is undefined. |
| 987 | Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), |
| 988 | the behaviour is undefined. */ |
| 989 | #define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) |
| 990 | #define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_32) |
| 991 | /* Flags: - (may destroy flags) |
| 992 | Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
| 993 | The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. |
| 994 | Note: if SLJIT_R1 is 0, the behaviour is undefined. */ |
| 995 | #define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) |
| 996 | #define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_32) |
| 997 | /* Flags: - (may destroy flags) |
| 998 | Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
| 999 | The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. |
| 1000 | Note: if SLJIT_R1 is 0, the behaviour is undefined. |
| 1001 | Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), |
| 1002 | the behaviour is undefined. */ |
| 1003 | #define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) |
| 1004 | #define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) |
| 1005 | /* Flags: - (does not modify flags) |
| 1006 | ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 |
| 1007 | when Intel Control-flow Enforcement Technology (CET) is enabled. |
| 1008 | No instructions are emitted for other architectures. */ |
| 1009 | #define SLJIT_ENDBR (SLJIT_OP0_BASE + 8) |
| 1010 | /* Flags: - (may destroy flags) |
| 1011 | Skip stack frames before return when Intel Control-flow |
| 1012 | Enforcement Technology (CET) is enabled. No instructions |
| 1013 | are emitted for other architectures. */ |
| 1014 | #define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 9) |
| 1015 | |
| 1016 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op); |
| 1017 | |
| 1018 | /* Starting index of opcodes for sljit_emit_op1. */ |
| 1019 | #define SLJIT_OP1_BASE 32 |
| 1020 | |
| 1021 | /* The MOV instruction transfers data from source to destination. |
| 1022 | |
| 1023 | MOV instruction suffixes: |
| 1024 | |
| 1025 | U8 - unsigned 8 bit data transfer |
| 1026 | S8 - signed 8 bit data transfer |
| 1027 | U16 - unsigned 16 bit data transfer |
| 1028 | S16 - signed 16 bit data transfer |
| 1029 | U32 - unsigned int (32 bit) data transfer |
| 1030 | S32 - signed int (32 bit) data transfer |
| 1031 | P - pointer (sljit_p) data transfer |
| 1032 | */ |
| 1033 | |
| 1034 | /* Flags: - (does not modify flags) */ |
| 1035 | #define SLJIT_MOV (SLJIT_OP1_BASE + 0) |
| 1036 | /* Flags: - (does not modify flags) */ |
| 1037 | #define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) |
| 1038 | #define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_32) |
| 1039 | /* Flags: - (does not modify flags) */ |
| 1040 | #define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) |
| 1041 | #define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_32) |
| 1042 | /* Flags: - (does not modify flags) */ |
| 1043 | #define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) |
| 1044 | #define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_32) |
| 1045 | /* Flags: - (does not modify flags) */ |
| 1046 | #define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) |
| 1047 | #define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_32) |
| 1048 | /* Flags: - (does not modify flags) |
| 1049 | Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ |
| 1050 | #define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) |
| 1051 | /* Flags: - (does not modify flags) |
| 1052 | Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ |
| 1053 | #define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) |
| 1054 | /* Flags: - (does not modify flags) */ |
| 1055 | #define SLJIT_MOV32 (SLJIT_OP1_BASE + 7) |
| 1056 | /* Flags: - (does not modify flags) |
| 1057 | Note: loads a pointer sized data, useful on x32 mode (a 64 bit mode |
| 1058 | on x86-64 which uses 32 bit pointers) or similar compiling modes */ |
| 1059 | #define SLJIT_MOV_P (SLJIT_OP1_BASE + 8) |
| 1060 | /* Flags: Z |
| 1061 | Note: immediate source argument is not supported */ |
| 1062 | #define SLJIT_NOT (SLJIT_OP1_BASE + 9) |
| 1063 | #define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32) |
| 1064 | /* Count leading zeroes |
| 1065 | Flags: - (may destroy flags) |
| 1066 | Note: immediate source argument is not supported */ |
| 1067 | #define SLJIT_CLZ (SLJIT_OP1_BASE + 10) |
| 1068 | #define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) |
| 1069 | /* Count trailing zeroes |
| 1070 | Flags: - (may destroy flags) |
| 1071 | Note: immediate source argument is not supported */ |
| 1072 | #define SLJIT_CTZ (SLJIT_OP1_BASE + 11) |
| 1073 | #define SLJIT_CTZ32 (SLJIT_CTZ | SLJIT_32) |
| 1074 | |
| 1075 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, |
| 1076 | sljit_s32 dst, sljit_sw dstw, |
| 1077 | sljit_s32 src, sljit_sw srcw); |
| 1078 | |
| 1079 | /* Starting index of opcodes for sljit_emit_op2. */ |
| 1080 | #define SLJIT_OP2_BASE 96 |
| 1081 | |
| 1082 | /* Flags: Z | OVERFLOW | CARRY */ |
| 1083 | #define SLJIT_ADD (SLJIT_OP2_BASE + 0) |
| 1084 | #define SLJIT_ADD32 (SLJIT_ADD | SLJIT_32) |
| 1085 | /* Flags: CARRY */ |
| 1086 | #define SLJIT_ADDC (SLJIT_OP2_BASE + 1) |
| 1087 | #define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_32) |
| 1088 | /* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL |
| 1089 | SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER |
| 1090 | SIG_LESS_EQUAL | OVERFLOW | CARRY */ |
| 1091 | #define SLJIT_SUB (SLJIT_OP2_BASE + 2) |
| 1092 | #define SLJIT_SUB32 (SLJIT_SUB | SLJIT_32) |
| 1093 | /* Flags: CARRY */ |
| 1094 | #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) |
| 1095 | #define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_32) |
| 1096 | /* Note: integer mul |
| 1097 | Flags: OVERFLOW */ |
| 1098 | #define SLJIT_MUL (SLJIT_OP2_BASE + 4) |
| 1099 | #define SLJIT_MUL32 (SLJIT_MUL | SLJIT_32) |
| 1100 | /* Flags: Z */ |
| 1101 | #define SLJIT_AND (SLJIT_OP2_BASE + 5) |
| 1102 | #define SLJIT_AND32 (SLJIT_AND | SLJIT_32) |
| 1103 | /* Flags: Z */ |
| 1104 | #define SLJIT_OR (SLJIT_OP2_BASE + 6) |
| 1105 | #define SLJIT_OR32 (SLJIT_OR | SLJIT_32) |
| 1106 | /* Flags: Z */ |
| 1107 | #define SLJIT_XOR (SLJIT_OP2_BASE + 7) |
| 1108 | #define SLJIT_XOR32 (SLJIT_XOR | SLJIT_32) |
| 1109 | /* Flags: Z |
| 1110 | Let bit_length be the length of the shift operation: 32 or 64. |
| 1111 | If src2 is immediate, src2w is masked by (bit_length - 1). |
| 1112 | Otherwise, if the content of src2 is outside the range from 0 |
| 1113 | to bit_length - 1, the result is undefined. */ |
| 1114 | #define SLJIT_SHL (SLJIT_OP2_BASE + 8) |
| 1115 | #define SLJIT_SHL32 (SLJIT_SHL | SLJIT_32) |
| 1116 | /* Flags: Z |
| 1117 | Same as SLJIT_SHL, except the the second operand is |
| 1118 | always masked by the length of the shift operation. */ |
| 1119 | #define SLJIT_MSHL (SLJIT_OP2_BASE + 9) |
| 1120 | #define SLJIT_MSHL32 (SLJIT_MSHL | SLJIT_32) |
| 1121 | /* Flags: Z |
| 1122 | Let bit_length be the length of the shift operation: 32 or 64. |
| 1123 | If src2 is immediate, src2w is masked by (bit_length - 1). |
| 1124 | Otherwise, if the content of src2 is outside the range from 0 |
| 1125 | to bit_length - 1, the result is undefined. */ |
| 1126 | #define SLJIT_LSHR (SLJIT_OP2_BASE + 10) |
| 1127 | #define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_32) |
| 1128 | /* Flags: Z |
| 1129 | Same as SLJIT_LSHR, except the the second operand is |
| 1130 | always masked by the length of the shift operation. */ |
| 1131 | #define SLJIT_MLSHR (SLJIT_OP2_BASE + 11) |
| 1132 | #define SLJIT_MLSHR32 (SLJIT_MLSHR | SLJIT_32) |
| 1133 | /* Flags: Z |
| 1134 | Let bit_length be the length of the shift operation: 32 or 64. |
| 1135 | If src2 is immediate, src2w is masked by (bit_length - 1). |
| 1136 | Otherwise, if the content of src2 is outside the range from 0 |
| 1137 | to bit_length - 1, the result is undefined. */ |
| 1138 | #define SLJIT_ASHR (SLJIT_OP2_BASE + 12) |
| 1139 | #define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_32) |
| 1140 | /* Flags: Z |
| 1141 | Same as SLJIT_ASHR, except the the second operand is |
| 1142 | always masked by the length of the shift operation. */ |
| 1143 | #define SLJIT_MASHR (SLJIT_OP2_BASE + 13) |
| 1144 | #define SLJIT_MASHR32 (SLJIT_MASHR | SLJIT_32) |
| 1145 | /* Flags: - (may destroy flags) |
| 1146 | Let bit_length be the length of the rotate operation: 32 or 64. |
| 1147 | The second operand is always masked by (bit_length - 1). */ |
| 1148 | #define SLJIT_ROTL (SLJIT_OP2_BASE + 14) |
| 1149 | #define SLJIT_ROTL32 (SLJIT_ROTL | SLJIT_32) |
| 1150 | /* Flags: - (may destroy flags) |
| 1151 | Let bit_length be the length of the rotate operation: 32 or 64. |
| 1152 | The second operand is always masked by (bit_length - 1). */ |
| 1153 | #define SLJIT_ROTR (SLJIT_OP2_BASE + 15) |
| 1154 | #define SLJIT_ROTR32 (SLJIT_ROTR | SLJIT_32) |
| 1155 | |
| 1156 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, |
| 1157 | sljit_s32 dst, sljit_sw dstw, |
| 1158 | sljit_s32 src1, sljit_sw src1w, |
| 1159 | sljit_s32 src2, sljit_sw src2w); |
| 1160 | |
| 1161 | /* The sljit_emit_op2u function is the same as sljit_emit_op2 |
| 1162 | except the result is discarded. */ |
| 1163 | |
| 1164 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, |
| 1165 | sljit_s32 src1, sljit_sw src1w, |
| 1166 | sljit_s32 src2, sljit_sw src2w); |
| 1167 | |
| 1168 | /* Emit a left or right shift operation, where the bits shifted |
| 1169 | in comes from a separate source operand. All operands are |
| 1170 | interpreted as unsigned integers. |
| 1171 | |
| 1172 | In the followings the value_mask variable is 31 for 32 bit |
| 1173 | operations and word_size - 1 otherwise. |
| 1174 | |
| 1175 | op must be one of the following operations: |
| 1176 | SLJIT_SHL or SLJIT_SHL32: |
| 1177 | src_dst <<= src2 |
| 1178 | src_dst |= ((src1 >> 1) >> (src2 ^ value_mask)) |
| 1179 | SLJIT_MSHL or SLJIT_MSHL32: |
| 1180 | src2 &= value_mask |
| 1181 | perform the SLJIT_SHL or SLJIT_SHL32 operation |
| 1182 | SLJIT_LSHR or SLJIT_LSHR32: |
| 1183 | src_dst >>= src2 |
| 1184 | src_dst |= ((src1 << 1) << (src2 ^ value_mask)) |
| 1185 | SLJIT_MLSHR or SLJIT_MLSHR32: |
| 1186 | src2 &= value_mask |
| 1187 | perform the SLJIT_LSHR or SLJIT_LSHR32 operation |
| 1188 | |
| 1189 | op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO |
| 1190 | |
| 1191 | src_dst must be a register which content is updated after |
| 1192 | the operation is completed |
| 1193 | src1 / src1w contains the bits which shifted into src_dst |
| 1194 | src2 / src2w contains the shift amount |
| 1195 | |
| 1196 | Note: a rotate operation can be performed if src_dst and |
| 1197 | src1 are set to the same register |
| 1198 | |
| 1199 | Flags: - (may destroy flags) */ |
| 1200 | |
| 1201 | /* The src2 contains a non-zero value. Improves the generated |
| 1202 | code on certain architectures, which provides a small |
| 1203 | performance improvement. */ |
| 1204 | #define SLJIT_SHIFT_INTO_NON_ZERO 0x200 |
| 1205 | |
| 1206 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, |
| 1207 | sljit_s32 src_dst, |
| 1208 | sljit_s32 src1, sljit_sw src1w, |
| 1209 | sljit_s32 src2, sljit_sw src2w); |
| 1210 | |
| 1211 | /* Starting index of opcodes for sljit_emit_op2. */ |
| 1212 | #define SLJIT_OP_SRC_BASE 128 |
| 1213 | |
| 1214 | /* Note: src cannot be an immedate value |
| 1215 | Flags: - (does not modify flags) */ |
| 1216 | #define SLJIT_FAST_RETURN (SLJIT_OP_SRC_BASE + 0) |
| 1217 | /* Skip stack frames before fast return. |
| 1218 | Note: src cannot be an immedate value |
| 1219 | Flags: may destroy flags. */ |
| 1220 | #define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN (SLJIT_OP_SRC_BASE + 1) |
| 1221 | /* Prefetch value into the level 1 data cache |
| 1222 | Note: if the target CPU does not support data prefetch, |
| 1223 | no instructions are emitted. |
| 1224 | Note: this instruction never fails, even if the memory address is invalid. |
| 1225 | Flags: - (does not modify flags) */ |
| 1226 | #define SLJIT_PREFETCH_L1 (SLJIT_OP_SRC_BASE + 2) |
| 1227 | /* Prefetch value into the level 2 data cache |
| 1228 | Note: same as SLJIT_PREFETCH_L1 if the target CPU |
| 1229 | does not support this instruction form. |
| 1230 | Note: this instruction never fails, even if the memory address is invalid. |
| 1231 | Flags: - (does not modify flags) */ |
| 1232 | #define SLJIT_PREFETCH_L2 (SLJIT_OP_SRC_BASE + 3) |
| 1233 | /* Prefetch value into the level 3 data cache |
| 1234 | Note: same as SLJIT_PREFETCH_L2 if the target CPU |
| 1235 | does not support this instruction form. |
| 1236 | Note: this instruction never fails, even if the memory address is invalid. |
| 1237 | Flags: - (does not modify flags) */ |
| 1238 | #define SLJIT_PREFETCH_L3 (SLJIT_OP_SRC_BASE + 4) |
| 1239 | /* Prefetch a value which is only used once (and can be discarded afterwards) |
| 1240 | Note: same as SLJIT_PREFETCH_L1 if the target CPU |
| 1241 | does not support this instruction form. |
| 1242 | Note: this instruction never fails, even if the memory address is invalid. |
| 1243 | Flags: - (does not modify flags) */ |
| 1244 | #define SLJIT_PREFETCH_ONCE (SLJIT_OP_SRC_BASE + 5) |
| 1245 | |
| 1246 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, |
| 1247 | sljit_s32 src, sljit_sw srcw); |
| 1248 | |
| 1249 | /* Starting index of opcodes for sljit_emit_fop1. */ |
| 1250 | #define SLJIT_FOP1_BASE 160 |
| 1251 | |
| 1252 | /* Flags: - (does not modify flags) */ |
| 1253 | #define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) |
| 1254 | #define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_32) |
| 1255 | /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] |
| 1256 | SRC/DST TYPE can be: F64, F32, S32, SW |
| 1257 | Rounding mode when the destination is SW or S32: round towards zero. */ |
| 1258 | /* Flags: - (may destroy flags) */ |
| 1259 | #define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) |
| 1260 | #define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_32) |
| 1261 | /* Flags: - (may destroy flags) */ |
| 1262 | #define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) |
| 1263 | #define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_32) |
| 1264 | /* Flags: - (may destroy flags) */ |
| 1265 | #define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) |
| 1266 | #define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_32) |
| 1267 | /* Flags: - (may destroy flags) */ |
| 1268 | #define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) |
| 1269 | #define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_32) |
| 1270 | /* Flags: - (may destroy flags) */ |
| 1271 | #define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) |
| 1272 | #define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32) |
| 1273 | /* Note: dst is the left and src is the right operand for SLJIT_CMP_F32/64. |
| 1274 | Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ |
| 1275 | #define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6) |
| 1276 | #define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32) |
| 1277 | /* Flags: - (may destroy flags) */ |
| 1278 | #define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7) |
| 1279 | #define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32) |
| 1280 | /* Flags: - (may destroy flags) */ |
| 1281 | #define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8) |
| 1282 | #define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32) |
| 1283 | |
| 1284 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, |
| 1285 | sljit_s32 dst, sljit_sw dstw, |
| 1286 | sljit_s32 src, sljit_sw srcw); |
| 1287 | |
| 1288 | /* Starting index of opcodes for sljit_emit_fop2. */ |
| 1289 | #define SLJIT_FOP2_BASE 192 |
| 1290 | |
| 1291 | /* Flags: - (may destroy flags) */ |
| 1292 | #define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) |
| 1293 | #define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_32) |
| 1294 | /* Flags: - (may destroy flags) */ |
| 1295 | #define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) |
| 1296 | #define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_32) |
| 1297 | /* Flags: - (may destroy flags) */ |
| 1298 | #define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) |
| 1299 | #define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_32) |
| 1300 | /* Flags: - (may destroy flags) */ |
| 1301 | #define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) |
| 1302 | #define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_32) |
| 1303 | |
| 1304 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, |
| 1305 | sljit_s32 dst, sljit_sw dstw, |
| 1306 | sljit_s32 src1, sljit_sw src1w, |
| 1307 | sljit_s32 src2, sljit_sw src2w); |
| 1308 | |
| 1309 | /* Label and jump instructions. */ |
| 1310 | |
| 1311 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler); |
| 1312 | |
| 1313 | /* Invert (negate) conditional type: xor (^) with 0x1 */ |
| 1314 | |
| 1315 | /* Integer comparison types. */ |
| 1316 | #define SLJIT_EQUAL 0 |
| 1317 | #define SLJIT_ZERO SLJIT_EQUAL |
| 1318 | #define SLJIT_NOT_EQUAL 1 |
| 1319 | #define SLJIT_NOT_ZERO SLJIT_NOT_EQUAL |
| 1320 | |
| 1321 | #define SLJIT_LESS 2 |
| 1322 | #define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS) |
| 1323 | #define SLJIT_GREATER_EQUAL 3 |
| 1324 | #define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_GREATER_EQUAL) |
| 1325 | #define SLJIT_GREATER 4 |
| 1326 | #define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER) |
| 1327 | #define SLJIT_LESS_EQUAL 5 |
| 1328 | #define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_LESS_EQUAL) |
| 1329 | #define SLJIT_SIG_LESS 6 |
| 1330 | #define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS) |
| 1331 | #define SLJIT_SIG_GREATER_EQUAL 7 |
| 1332 | #define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_GREATER_EQUAL) |
| 1333 | #define SLJIT_SIG_GREATER 8 |
| 1334 | #define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER) |
| 1335 | #define SLJIT_SIG_LESS_EQUAL 9 |
| 1336 | #define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_LESS_EQUAL) |
| 1337 | |
| 1338 | #define SLJIT_OVERFLOW 10 |
| 1339 | #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) |
| 1340 | #define SLJIT_NOT_OVERFLOW 11 |
| 1341 | |
| 1342 | /* Unlike other flags, sljit_emit_jump may destroy the carry flag. */ |
| 1343 | #define SLJIT_CARRY 12 |
| 1344 | #define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) |
| 1345 | #define SLJIT_NOT_CARRY 13 |
| 1346 | |
| 1347 | /* Basic floating point comparison types. |
| 1348 | |
| 1349 | Note: when the comparison result is unordered, their behaviour is unspecified. */ |
| 1350 | |
| 1351 | #define SLJIT_F_EQUAL 14 |
| 1352 | #define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL) |
| 1353 | #define SLJIT_F_NOT_EQUAL 15 |
| 1354 | #define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_NOT_EQUAL) |
| 1355 | #define SLJIT_F_LESS 16 |
| 1356 | #define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS) |
| 1357 | #define SLJIT_F_GREATER_EQUAL 17 |
| 1358 | #define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_GREATER_EQUAL) |
| 1359 | #define SLJIT_F_GREATER 18 |
| 1360 | #define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER) |
| 1361 | #define SLJIT_F_LESS_EQUAL 19 |
| 1362 | #define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_LESS_EQUAL) |
| 1363 | |
| 1364 | /* Jumps when either argument contains a NaN value. */ |
| 1365 | #define SLJIT_UNORDERED 20 |
| 1366 | #define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED) |
| 1367 | /* Jumps when neither argument contains a NaN value. */ |
| 1368 | #define SLJIT_ORDERED 21 |
| 1369 | #define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_ORDERED) |
| 1370 | |
| 1371 | /* Ordered / unordered floating point comparison types. |
| 1372 | |
| 1373 | Note: each comparison type has an ordered and unordered form. Some |
| 1374 | architectures supports only either of them (see: sljit_cmp_info). */ |
| 1375 | |
| 1376 | #define SLJIT_ORDERED_EQUAL 22 |
| 1377 | #define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) |
| 1378 | #define SLJIT_UNORDERED_OR_NOT_EQUAL 23 |
| 1379 | #define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_NOT_EQUAL) |
| 1380 | #define SLJIT_ORDERED_LESS 24 |
| 1381 | #define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS) |
| 1382 | #define SLJIT_UNORDERED_OR_GREATER_EQUAL 25 |
| 1383 | #define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER_EQUAL) |
| 1384 | #define SLJIT_ORDERED_GREATER 26 |
| 1385 | #define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER) |
| 1386 | #define SLJIT_UNORDERED_OR_LESS_EQUAL 27 |
| 1387 | #define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS_EQUAL) |
| 1388 | |
| 1389 | #define SLJIT_UNORDERED_OR_EQUAL 28 |
| 1390 | #define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) |
| 1391 | #define SLJIT_ORDERED_NOT_EQUAL 29 |
| 1392 | #define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_NOT_EQUAL) |
| 1393 | #define SLJIT_UNORDERED_OR_LESS 30 |
| 1394 | #define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS) |
| 1395 | #define SLJIT_ORDERED_GREATER_EQUAL 31 |
| 1396 | #define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER_EQUAL) |
| 1397 | #define SLJIT_UNORDERED_OR_GREATER 32 |
| 1398 | #define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) |
| 1399 | #define SLJIT_ORDERED_LESS_EQUAL 33 |
| 1400 | #define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS_EQUAL) |
| 1401 | |
| 1402 | /* Unconditional jump types. */ |
| 1403 | #define SLJIT_JUMP 34 |
| 1404 | /* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */ |
| 1405 | #define SLJIT_FAST_CALL 35 |
| 1406 | /* Default C calling convention. */ |
| 1407 | #define SLJIT_CALL 36 |
| 1408 | /* Called function must be compiled by SLJIT. |
| 1409 | See SLJIT_ENTER_REG_ARG option. */ |
| 1410 | #define SLJIT_CALL_REG_ARG 37 |
| 1411 | |
| 1412 | /* The target can be changed during runtime (see: sljit_set_jump_addr). */ |
| 1413 | #define SLJIT_REWRITABLE_JUMP 0x1000 |
| 1414 | /* When this flag is passed, the execution of the current function ends and |
| 1415 | the called function returns to the caller of the current function. The |
| 1416 | stack usage is reduced before the call, but it is not necessarily reduced |
| 1417 | to zero. In the latter case the compiler needs to allocate space for some |
| 1418 | arguments and the return address must be stored on the stack as well. */ |
| 1419 | #define SLJIT_CALL_RETURN 0x2000 |
| 1420 | |
| 1421 | /* Emit a jump instruction. The destination is not set, only the type of the jump. |
| 1422 | type must be between SLJIT_EQUAL and SLJIT_FAST_CALL |
| 1423 | type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP |
| 1424 | |
| 1425 | Flags: does not modify flags. */ |
| 1426 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type); |
| 1427 | |
| 1428 | /* Emit a C compiler (ABI) compatible function call. |
| 1429 | type must be SLJIT_CALL or SLJIT_CALL_REG_ARG |
| 1430 | type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and/or SLJIT_CALL_RETURN |
| 1431 | arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros |
| 1432 | |
| 1433 | Flags: destroy all flags. */ |
| 1434 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types); |
| 1435 | |
| 1436 | /* Basic arithmetic comparison. In most architectures it is implemented as |
| 1437 | a compare operation followed by a sljit_emit_jump. However some |
| 1438 | architectures (i.e: ARM64 or MIPS) may employ special optimizations |
| 1439 | here. It is suggested to use this comparison form when appropriate. |
| 1440 | type must be between SLJIT_EQUAL and SLJIT_SIG_LESS_EQUAL |
| 1441 | type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP |
| 1442 | |
| 1443 | Flags: may destroy flags. */ |
| 1444 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, |
| 1445 | sljit_s32 src1, sljit_sw src1w, |
| 1446 | sljit_s32 src2, sljit_sw src2w); |
| 1447 | |
| 1448 | /* Basic floating point comparison. In most architectures it is implemented as |
| 1449 | a SLJIT_CMP_F32/64 operation (setting appropriate flags) followed by a |
| 1450 | sljit_emit_jump. However some architectures (i.e: MIPS) may employ |
| 1451 | special optimizations here. It is suggested to use this comparison form |
| 1452 | when appropriate. |
| 1453 | type must be between SLJIT_F_EQUAL and SLJIT_ORDERED_LESS_EQUAL |
| 1454 | type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP |
| 1455 | Flags: destroy flags. |
| 1456 | Note: when an operand is NaN the behaviour depends on the comparison type. */ |
| 1457 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, |
| 1458 | sljit_s32 src1, sljit_sw src1w, |
| 1459 | sljit_s32 src2, sljit_sw src2w); |
| 1460 | |
| 1461 | /* Set the destination of the jump to this label. */ |
| 1462 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); |
| 1463 | /* Set the destination address of the jump to this label. */ |
| 1464 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); |
| 1465 | |
| 1466 | /* Emit an indirect jump or fast call. |
| 1467 | Direct form: set src to SLJIT_IMM() and srcw to the address |
| 1468 | Indirect form: any other valid addressing mode |
| 1469 | type must be between SLJIT_JUMP and SLJIT_FAST_CALL |
| 1470 | |
| 1471 | Flags: does not modify flags. */ |
| 1472 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw); |
| 1473 | |
| 1474 | /* Emit a C compiler (ABI) compatible function call. |
| 1475 | Direct form: set src to SLJIT_IMM() and srcw to the address |
| 1476 | Indirect form: any other valid addressing mode |
| 1477 | type must be SLJIT_CALL or SLJIT_CALL_REG_ARG |
| 1478 | type can be combined (or'ed) with SLJIT_CALL_RETURN |
| 1479 | arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros |
| 1480 | |
| 1481 | Flags: destroy all flags. */ |
| 1482 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw); |
| 1483 | |
| 1484 | /* Perform an operation using the conditional flags as the second argument. |
| 1485 | Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL. |
| 1486 | The value represented by the type is 1, if the condition represented |
| 1487 | by the type is fulfilled, and 0 otherwise. |
| 1488 | |
| 1489 | When op is SLJIT_MOV or SLJIT_MOV32: |
| 1490 | Set dst to the value represented by the type (0 or 1). |
| 1491 | Flags: - (does not modify flags) |
| 1492 | When op is SLJIT_AND, SLJIT_AND32, SLJIT_OR, SLJIT_OR32, SLJIT_XOR, or SLJIT_XOR32 |
| 1493 | Performs the binary operation using dst as the first, and the value |
| 1494 | represented by type as the second argument. Result is written into dst. |
| 1495 | Flags: Z (may destroy flags) */ |
| 1496 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, |
| 1497 | sljit_s32 dst, sljit_sw dstw, |
| 1498 | sljit_s32 type); |
| 1499 | |
| 1500 | /* Emit a conditional mov instruction which moves source to destination, |
| 1501 | if the condition is satisfied. Unlike other arithmetic operations this |
| 1502 | instruction does not support memory access. |
| 1503 | |
| 1504 | type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL |
| 1505 | type can be combined (or'ed) with SLJIT_32 |
| 1506 | dst_reg must be a valid register |
| 1507 | src must be a valid register or immediate (SLJIT_IMM) |
| 1508 | |
| 1509 | Flags: - (does not modify flags) */ |
| 1510 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, |
| 1511 | sljit_s32 dst_reg, |
| 1512 | sljit_s32 src, sljit_sw srcw); |
| 1513 | |
| 1514 | /* The following flags are used by sljit_emit_mem(), sljit_emit_mem_update(), |
| 1515 | sljit_emit_fmem(), and sljit_emit_fmem_update(). */ |
| 1516 | |
| 1517 | /* Memory load operation. This is the default. */ |
| 1518 | #define SLJIT_MEM_LOAD 0x000000 |
| 1519 | /* Memory store operation. */ |
| 1520 | #define SLJIT_MEM_STORE 0x000200 |
| 1521 | |
| 1522 | /* The following flags are used by sljit_emit_mem() and sljit_emit_fmem(). */ |
| 1523 | |
| 1524 | /* Load or stora data from an unaligned (byte aligned) address. */ |
| 1525 | #define SLJIT_MEM_UNALIGNED 0x000400 |
| 1526 | /* Load or stora data from a 16 bit aligned address. */ |
| 1527 | #define SLJIT_MEM_UNALIGNED_16 0x000800 |
| 1528 | /* Load or stora data from a 32 bit aligned address. */ |
| 1529 | #define SLJIT_MEM_UNALIGNED_32 0x001000 |
| 1530 | |
| 1531 | /* The following flags are used by sljit_emit_mem_update(), |
| 1532 | and sljit_emit_fmem_update(). */ |
| 1533 | |
| 1534 | /* Base register is updated before the memory access (default). */ |
| 1535 | #define SLJIT_MEM_PRE 0x000000 |
| 1536 | /* Base register is updated after the memory access. */ |
| 1537 | #define SLJIT_MEM_POST 0x000400 |
| 1538 | |
| 1539 | /* When SLJIT_MEM_SUPP is passed, no instructions are emitted. |
| 1540 | Instead the function returns with SLJIT_SUCCESS if the instruction |
| 1541 | form is supported and SLJIT_ERR_UNSUPPORTED otherwise. This flag |
| 1542 | allows runtime checking of available instruction forms. */ |
| 1543 | #define SLJIT_MEM_SUPP 0x000800 |
| 1544 | |
| 1545 | /* The sljit_emit_mem emits instructions for various memory operations: |
| 1546 | |
| 1547 | When SLJIT_MEM_UNALIGNED / SLJIT_MEM_UNALIGNED_16 / |
| 1548 | SLJIT_MEM_UNALIGNED_32 is set in type argument: |
| 1549 | Emit instructions for unaligned memory loads or stores. When |
| 1550 | SLJIT_UNALIGNED is not defined, the only way to access unaligned |
| 1551 | memory data is using sljit_emit_mem. Otherwise all operations (e.g. |
| 1552 | sljit_emit_op1/2, or sljit_emit_fop1/2) supports unaligned access. |
| 1553 | In general, the performance of unaligned memory accesses are often |
| 1554 | lower than aligned and should be avoided. |
| 1555 | |
| 1556 | When a pair of registers is passed in reg argument: |
| 1557 | Emit instructions for moving data between a register pair and |
| 1558 | memory. The register pair can be specified by the SLJIT_REG_PAIR |
| 1559 | macro. The first register is loaded from or stored into the |
| 1560 | location specified by the mem/memw arguments, and the end address |
| 1561 | of this operation is the starting address of the data transfer |
| 1562 | between the second register and memory. The type argument must |
| 1563 | be SLJIT_MOV. The SLJIT_MEM_UNALIGNED* options are allowed for |
| 1564 | this operation. |
| 1565 | |
| 1566 | type must be between SLJIT_MOV and SLJIT_MOV_P and can be |
| 1567 | combined (or'ed) with SLJIT_MEM_* flags |
| 1568 | reg is a register or register pair, which is the source or |
| 1569 | destination of the operation |
| 1570 | mem must be a memory operand |
| 1571 | |
| 1572 | Flags: - (does not modify flags) */ |
| 1573 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, |
| 1574 | sljit_s32 reg, |
| 1575 | sljit_s32 mem, sljit_sw memw); |
| 1576 | |
| 1577 | /* Emit a single memory load or store with update instruction. |
| 1578 | When the requested instruction form is not supported by the CPU, |
| 1579 | it returns with SLJIT_ERR_UNSUPPORTED instead of emulating the |
| 1580 | instruction. This allows specializing tight loops based on |
| 1581 | the supported instruction forms (see SLJIT_MEM_SUPP flag). |
| 1582 | Absolute address (SLJIT_MEM0) forms are never supported |
| 1583 | and the base (first) register specified by the mem argument |
| 1584 | must not be SLJIT_SP and must also be different from the |
| 1585 | register specified by the reg argument. |
| 1586 | |
| 1587 | type must be between SLJIT_MOV and SLJIT_MOV_P and can be |
| 1588 | combined (or'ed) with SLJIT_MEM_* flags |
| 1589 | reg is the source or destination register of the operation |
| 1590 | mem must be a memory operand |
| 1591 | |
| 1592 | Flags: - (does not modify flags) */ |
| 1593 | |
| 1594 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, |
| 1595 | sljit_s32 reg, |
| 1596 | sljit_s32 mem, sljit_sw memw); |
| 1597 | |
| 1598 | /* Same as sljit_emit_mem except the followings: |
| 1599 | |
| 1600 | Loading or storing a pair of registers is not supported. |
| 1601 | |
| 1602 | type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be |
| 1603 | combined (or'ed) with SLJIT_MEM_* flags. |
| 1604 | freg is the source or destination floating point register |
| 1605 | of the operation |
| 1606 | mem must be a memory operand |
| 1607 | |
| 1608 | Flags: - (does not modify flags) */ |
| 1609 | |
| 1610 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, |
| 1611 | sljit_s32 freg, |
| 1612 | sljit_s32 mem, sljit_sw memw); |
| 1613 | |
| 1614 | /* Same as sljit_emit_mem_update except the followings: |
| 1615 | |
| 1616 | type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be |
| 1617 | combined (or'ed) with SLJIT_MEM_* flags |
| 1618 | freg is the source or destination floating point register |
| 1619 | of the operation |
| 1620 | mem must be a memory operand |
| 1621 | |
| 1622 | Flags: - (does not modify flags) */ |
| 1623 | |
| 1624 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, |
| 1625 | sljit_s32 freg, |
| 1626 | sljit_s32 mem, sljit_sw memw); |
| 1627 | |
| 1628 | /* Copies the base address of SLJIT_SP + offset to dst. The offset can |
| 1629 | represent the starting address of a value in the local data (stack). |
| 1630 | The offset is not limited by the local data limits, it can be any value. |
| 1631 | For example if an array of bytes are stored on the stack from |
| 1632 | offset 0x40, and R0 contains the offset of an array item plus 0x120, |
| 1633 | this item can be changed by two SLJIT instructions: |
| 1634 | |
| 1635 | sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120); |
| 1636 | sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5); |
| 1637 | |
| 1638 | Flags: - (may destroy flags) */ |
| 1639 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset); |
| 1640 | |
| 1641 | /* Store a value that can be changed runtime (see: sljit_get_const_addr / sljit_set_const) |
| 1642 | Flags: - (does not modify flags) */ |
| 1643 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value); |
| 1644 | |
| 1645 | /* Store the value of a label (see: sljit_set_put_label) |
| 1646 | Flags: - (does not modify flags) */ |
| 1647 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); |
| 1648 | |
| 1649 | /* Set the value stored by put_label to this label. */ |
| 1650 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label); |
| 1651 | |
| 1652 | /* After the code generation the address for label, jump and const instructions |
| 1653 | are computed. Since these structures are freed by sljit_free_compiler, the |
| 1654 | addresses must be preserved by the user program elsewere. */ |
| 1655 | static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; } |
| 1656 | static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } |
| 1657 | static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } |
| 1658 | |
| 1659 | /* Only the address and executable offset are required to perform dynamic |
| 1660 | code modifications. See sljit_get_executable_offset function. */ |
| 1661 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset); |
| 1662 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset); |
| 1663 | |
| 1664 | /* --------------------------------------------------------------------- */ |
| 1665 | /* CPU specific functions */ |
| 1666 | /* --------------------------------------------------------------------- */ |
| 1667 | |
| 1668 | /* The following function is a helper function for sljit_emit_op_custom. |
| 1669 | It returns with the real machine register index ( >=0 ) of any SLJIT_R, |
| 1670 | SLJIT_S and SLJIT_SP registers. |
| 1671 | |
| 1672 | Note: it returns with -1 for virtual registers (only on x86-32). */ |
| 1673 | |
| 1674 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg); |
| 1675 | |
| 1676 | /* The following function is a helper function for sljit_emit_op_custom. |
| 1677 | It returns with the real machine register ( >= 0 ) index of any SLJIT_FR, |
| 1678 | and SLJIT_FS register. |
| 1679 | |
| 1680 | Note: the index is always an even number on ARM-32, MIPS. */ |
| 1681 | |
| 1682 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg); |
| 1683 | |
| 1684 | /* Any instruction can be inserted into the instruction stream by |
| 1685 | sljit_emit_op_custom. It has a similar purpose as inline assembly. |
| 1686 | The size parameter must match to the instruction size of the target |
| 1687 | architecture: |
| 1688 | |
| 1689 | x86: 0 < size <= 15. The instruction argument can be byte aligned. |
| 1690 | Thumb2: if size == 2, the instruction argument must be 2 byte aligned. |
| 1691 | if size == 4, the instruction argument must be 4 byte aligned. |
| 1692 | Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ |
| 1693 | |
| 1694 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, |
| 1695 | void *instruction, sljit_u32 size); |
| 1696 | |
| 1697 | /* Flags were set by a 32 bit operation. */ |
| 1698 | #define SLJIT_CURRENT_FLAGS_32 SLJIT_32 |
| 1699 | |
| 1700 | /* Flags were set by an ADD or ADDC operations. */ |
| 1701 | #define SLJIT_CURRENT_FLAGS_ADD 0x01 |
| 1702 | /* Flags were set by a SUB, SUBC, or NEG operation. */ |
| 1703 | #define SLJIT_CURRENT_FLAGS_SUB 0x02 |
| 1704 | |
| 1705 | /* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. |
| 1706 | Must be combined with SLJIT_CURRENT_FLAGS_SUB. */ |
| 1707 | #define SLJIT_CURRENT_FLAGS_COMPARE 0x04 |
| 1708 | |
| 1709 | /* Define the currently available CPU status flags. It is usually used after |
| 1710 | an sljit_emit_label or sljit_emit_op_custom operations to define which CPU |
| 1711 | status flags are available. |
| 1712 | |
| 1713 | The current_flags must be a valid combination of SLJIT_SET_* and |
| 1714 | SLJIT_CURRENT_FLAGS_* constants. */ |
| 1715 | |
| 1716 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, |
| 1717 | sljit_s32 current_flags); |
| 1718 | |
| 1719 | /* --------------------------------------------------------------------- */ |
| 1720 | /* Miscellaneous utility functions */ |
| 1721 | /* --------------------------------------------------------------------- */ |
| 1722 | |
| 1723 | /* Get the human readable name of the platform. Can be useful on platforms |
| 1724 | like ARM, where ARM and Thumb2 functions can be mixed, and it is useful |
| 1725 | to know the type of the code generator. */ |
| 1726 | SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void); |
| 1727 | |
| 1728 | /* Portable helper function to get an offset of a member. */ |
| 1729 | #define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) |
| 1730 | |
| 1731 | #if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) |
| 1732 | |
| 1733 | /* The sljit_stack structure and its manipulation functions provides |
| 1734 | an implementation for a top-down stack. The stack top is stored |
| 1735 | in the end field of the sljit_stack structure and the stack goes |
| 1736 | down to the min_start field, so the memory region reserved for |
| 1737 | this stack is between min_start (inclusive) and end (exclusive) |
| 1738 | fields. However the application can only use the region between |
| 1739 | start (inclusive) and end (exclusive) fields. The sljit_stack_resize |
| 1740 | function can be used to extend this region up to min_start. |
| 1741 | |
| 1742 | This feature uses the "address space reserve" feature of modern |
| 1743 | operating systems. Instead of allocating a large memory block |
| 1744 | applications can allocate a small memory region and extend it |
| 1745 | later without moving the content of the memory area. Therefore |
| 1746 | after a successful resize by sljit_stack_resize all pointers into |
| 1747 | this region are still valid. |
| 1748 | |
| 1749 | Note: |
| 1750 | this structure may not be supported by all operating systems. |
| 1751 | end and max_limit fields are aligned to PAGE_SIZE bytes (usually |
| 1752 | 4 Kbyte or more). |
| 1753 | stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more. */ |
| 1754 | |
| 1755 | struct sljit_stack { |
| 1756 | /* User data, anything can be stored here. |
| 1757 | Initialized to the same value as the end field. */ |
| 1758 | sljit_u8 *top; |
| 1759 | /* These members are read only. */ |
| 1760 | /* End address of the stack */ |
| 1761 | sljit_u8 *end; |
| 1762 | /* Current start address of the stack. */ |
| 1763 | sljit_u8 *start; |
| 1764 | /* Lowest start address of the stack. */ |
| 1765 | sljit_u8 *min_start; |
| 1766 | }; |
| 1767 | |
| 1768 | /* Allocates a new stack. Returns NULL if unsuccessful. |
| 1769 | Note: see sljit_create_compiler for the explanation of allocator_data. */ |
| 1770 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data); |
| 1771 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data); |
| 1772 | |
| 1773 | /* Can be used to increase (extend) or decrease (shrink) the stack |
| 1774 | memory area. Returns with new_start if successful and NULL otherwise. |
| 1775 | It always fails if new_start is less than min_start or greater or equal |
| 1776 | than end fields. The fields of the stack are not changed if the returned |
| 1777 | value is NULL (the current memory content is never lost). */ |
| 1778 | SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start); |
| 1779 | |
| 1780 | #endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ |
| 1781 | |
| 1782 | #if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) |
| 1783 | |
| 1784 | /* Get the entry address of a given function (signed, unsigned result). */ |
| 1785 | #define SLJIT_FUNC_ADDR(func_name) ((sljit_sw)func_name) |
| 1786 | #define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name) |
| 1787 | |
| 1788 | #else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ |
| 1789 | |
| 1790 | /* All JIT related code should be placed in the same context (library, binary, etc.). */ |
| 1791 | |
| 1792 | /* Get the entry address of a given function (signed, unsigned result). */ |
| 1793 | #define SLJIT_FUNC_ADDR(func_name) (*(sljit_sw*)(void*)func_name) |
| 1794 | #define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name) |
| 1795 | |
| 1796 | /* For powerpc64, the function pointers point to a context descriptor. */ |
| 1797 | struct sljit_function_context { |
| 1798 | sljit_uw addr; |
| 1799 | sljit_uw r2; |
| 1800 | sljit_uw r11; |
| 1801 | }; |
| 1802 | |
| 1803 | /* Fill the context arguments using the addr and the function. |
| 1804 | If func_ptr is NULL, it will not be set to the address of context |
| 1805 | If addr is NULL, the function address also comes from the func pointer. */ |
| 1806 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func); |
| 1807 | |
| 1808 | #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ |
| 1809 | |
| 1810 | #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) |
| 1811 | /* Free unused executable memory. The allocator keeps some free memory |
| 1812 | around to reduce the number of OS executable memory allocations. |
| 1813 | This improves performance since these calls are costly. However |
| 1814 | it is sometimes desired to free all unused memory regions, e.g. |
| 1815 | before the application terminates. */ |
| 1816 | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); |
| 1817 | #endif |
| 1818 | |
| 1819 | #ifdef __cplusplus |
| 1820 | } /* extern "C" */ |
| 1821 | #endif |
| 1822 | |
| 1823 | #endif /* SLJIT_LIR_H_ */ |
| 1824 | |