1 | /* |
2 | ** Definitions for target CPU. |
3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #ifndef _LJ_TARGET_H |
7 | #define _LJ_TARGET_H |
8 | |
9 | #include "lj_def.h" |
10 | #include "lj_arch.h" |
11 | |
12 | /* -- Registers and spill slots ------------------------------------------- */ |
13 | |
14 | /* Register type (uint8_t in ir->r). */ |
15 | typedef uint32_t Reg; |
16 | |
17 | /* The hi-bit is NOT set for an allocated register. This means the value |
18 | ** can be directly used without masking. The hi-bit is set for a register |
19 | ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK. |
20 | */ |
21 | #define RID_NONE 0x80 |
22 | #define RID_MASK 0x7f |
23 | #define RID_INIT (RID_NONE|RID_MASK) |
24 | #define RID_SINK (RID_INIT-1) |
25 | #define RID_SUNK (RID_INIT-2) |
26 | |
27 | #define ra_noreg(r) ((r) & RID_NONE) |
28 | #define ra_hasreg(r) (!((r) & RID_NONE)) |
29 | |
30 | /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ |
31 | #define ra_hashint(r) ((r) < RID_SUNK) |
32 | #define ra_gethint(r) ((Reg)((r) & RID_MASK)) |
33 | #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) |
34 | #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) |
35 | |
36 | /* Spill slot 0 means no spill slot has been allocated. */ |
37 | #define SPS_NONE 0 |
38 | |
39 | #define ra_hasspill(s) ((s) != SPS_NONE) |
40 | |
41 | /* Combined register and spill slot (uint16_t in ir->prev). */ |
42 | typedef uint32_t RegSP; |
43 | |
44 | #define REGSP(r, s) ((r) + ((s) << 8)) |
45 | #define REGSP_HINT(r) ((r)|RID_NONE) |
46 | #define REGSP_INIT REGSP(RID_INIT, 0) |
47 | |
48 | #define regsp_reg(rs) ((rs) & 255) |
49 | #define regsp_spill(rs) ((rs) >> 8) |
50 | #define regsp_used(rs) \ |
51 | (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0)) |
52 | |
53 | /* -- Register sets ------------------------------------------------------- */ |
54 | |
55 | /* Bitset for registers. 32 registers suffice for most architectures. |
56 | ** Note that one set holds bits for both GPRs and FPRs. |
57 | */ |
58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
59 | typedef uint64_t RegSet; |
60 | #else |
61 | typedef uint32_t RegSet; |
62 | #endif |
63 | |
64 | #define RID2RSET(r) (((RegSet)1) << (r)) |
65 | #define RSET_EMPTY ((RegSet)0) |
66 | #define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo)) |
67 | |
68 | #define rset_test(rs, r) ((int)((rs) >> (r)) & 1) |
69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) |
70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) |
71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) |
72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) |
74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) |
75 | #else |
76 | #define rset_picktop(rs) ((Reg)lj_fls(rs)) |
77 | #define rset_pickbot(rs) ((Reg)lj_ffs(rs)) |
78 | #endif |
79 | |
80 | /* -- Register allocation cost -------------------------------------------- */ |
81 | |
82 | /* The register allocation heuristic keeps track of the cost for allocating |
83 | ** a specific register: |
84 | ** |
85 | ** A free register (obviously) has a cost of 0 and a 1-bit in the free mask. |
86 | ** |
87 | ** An already allocated register has the (non-zero) IR reference in the lowest |
88 | ** bits and the result of a blended cost-model in the higher bits. |
89 | ** |
90 | ** The allocator first checks the free mask for a hit. Otherwise an (unrolled) |
91 | ** linear search for the minimum cost is used. The search doesn't need to |
92 | ** keep track of the position of the minimum, which makes it very fast. |
93 | ** The lowest bits of the minimum cost show the desired IR reference whose |
94 | ** register is the one to evict. |
95 | ** |
96 | ** Without the cost-model this degenerates to the standard heuristics for |
97 | ** (reverse) linear-scan register allocation. Since code generation is done |
98 | ** in reverse, a live interval extends from the last use to the first def. |
99 | ** For an SSA IR the IR reference is the first (and only) def and thus |
100 | ** trivially marks the end of the interval. The LSRA heuristics says to pick |
101 | ** the register whose live interval has the furthest extent, i.e. the lowest |
102 | ** IR reference in our case. |
103 | ** |
104 | ** A cost-model should take into account other factors, like spill-cost and |
105 | ** restore- or rematerialization-cost, which depend on the kind of instruction. |
106 | ** E.g. constants have zero spill costs, variant instructions have higher |
107 | ** costs than invariants and PHIs should preferably never be spilled. |
108 | ** |
109 | ** Here's a first cut at simple, but effective blended cost-model for R-LSRA: |
110 | ** - Due to careful design of the IR, constants already have lower IR |
111 | ** references than invariants and invariants have lower IR references |
112 | ** than variants. |
113 | ** - The cost in the upper 16 bits is the sum of the IR reference and a |
114 | ** weighted score. The score currently only takes into account whether |
115 | ** the IRT_ISPHI bit is set in the instruction type. |
116 | ** - The PHI weight is the minimum distance (in IR instructions) a PHI |
117 | ** reference has to be further apart from a non-PHI reference to be spilled. |
118 | ** - It should be a power of two (for speed) and must be between 2 and 32768. |
119 | ** Good values for the PHI weight seem to be between 40 and 150. |
120 | ** - Further study is required. |
121 | */ |
122 | #define REGCOST_PHI_WEIGHT 64 |
123 | |
124 | /* Cost for allocating a specific register. */ |
125 | typedef uint32_t RegCost; |
126 | |
127 | /* Note: assumes 16 bit IRRef1. */ |
128 | #define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16)) |
129 | #define regcost_ref(rc) ((IRRef1)(rc)) |
130 | |
131 | #define REGCOST_T(t) \ |
132 | ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI)) |
133 | #define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t))) |
134 | |
135 | /* -- Target-specific definitions ----------------------------------------- */ |
136 | |
137 | #if LJ_TARGET_X86ORX64 |
138 | #include "lj_target_x86.h" |
139 | #elif LJ_TARGET_ARM |
140 | #include "lj_target_arm.h" |
141 | #elif LJ_TARGET_ARM64 |
142 | #include "lj_target_arm64.h" |
143 | #elif LJ_TARGET_PPC |
144 | #include "lj_target_ppc.h" |
145 | #elif LJ_TARGET_MIPS |
146 | #include "lj_target_mips.h" |
147 | #else |
148 | #error "Missing include for target CPU" |
149 | #endif |
150 | |
151 | #ifdef EXITSTUBS_PER_GROUP |
152 | /* Return the address of an exit stub. */ |
153 | static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) |
154 | { |
155 | lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL, |
156 | "exit stub group for exit %d uninitialized" , exitno); |
157 | return (char *)group[exitno / EXITSTUBS_PER_GROUP] + |
158 | EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); |
159 | } |
160 | /* Avoid dependence on lj_jit.h if only including lj_target.h. */ |
161 | #define exitstub_addr(J, exitno) \ |
162 | ((MCode *)exitstub_addr_((char **)((J)->exitstubgroup), (exitno))) |
163 | #endif |
164 | |
165 | #endif |
166 | |