| 1 | #include "ggml.h" |
| 2 | #include "ggml-cpu.h" |
| 3 | |
| 4 | #include <cmath> |
| 5 | #include <cstdio> |
| 6 | #include <cstdlib> |
| 7 | #include <cassert> |
| 8 | #include <vector> |
| 9 | |
| 10 | #if defined(_MSC_VER) |
| 11 | #pragma warning(disable: 4244 4267) // possible loss of data |
| 12 | #endif |
| 13 | |
| 14 | #if defined(__GNUC__) |
| 15 | #pragma GCC diagnostic ignored "-Wdouble-promotion" |
| 16 | #endif |
| 17 | |
| 18 | #define MAX_NARGS 3 |
| 19 | |
| 20 | #undef MIN |
| 21 | #undef MAX |
| 22 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
| 23 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
| 24 | |
| 25 | #define GGML_SILU_FP16 |
| 26 | |
| 27 | // |
| 28 | // logging |
| 29 | // |
| 30 | |
| 31 | #if (GGML_DEBUG >= 1) |
| 32 | #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) |
| 33 | #else |
| 34 | #define GGML_PRINT_DEBUG(...) |
| 35 | #endif |
| 36 | |
| 37 | #if (GGML_DEBUG >= 5) |
| 38 | #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) |
| 39 | #else |
| 40 | #define GGML_PRINT_DEBUG_5(...) |
| 41 | #endif |
| 42 | |
| 43 | #if (GGML_DEBUG >= 10) |
| 44 | #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) |
| 45 | #else |
| 46 | #define GGML_PRINT_DEBUG_10(...) |
| 47 | #endif |
| 48 | |
| 49 | #define GGML_PRINT(...) printf(__VA_ARGS__) |
| 50 | |
| 51 | static float frand(void) { |
| 52 | return (float)rand()/(float)RAND_MAX; |
| 53 | } |
| 54 | |
| 55 | static int irand(int n) { |
| 56 | if (n == 0) return 0; |
| 57 | return rand()%n; |
| 58 | } |
| 59 | |
| 60 | static void get_random_dims(int64_t * dims, int ndims) { |
| 61 | dims[0] = dims[1] = dims[2] = dims[3] = 1; |
| 62 | |
| 63 | for (int i = 0; i < ndims; i++) { |
| 64 | dims[i] = 1 + irand(n: 4); |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | static struct ggml_tensor * get_random_tensor_f32( |
| 69 | struct ggml_context * ctx0, |
| 70 | int ndims, |
| 71 | const int64_t ne[], |
| 72 | float fmin, |
| 73 | float fmax) { |
| 74 | struct ggml_tensor * result = ggml_new_tensor(ctx: ctx0, type: GGML_TYPE_F32, n_dims: ndims, ne); |
| 75 | |
| 76 | switch (ndims) { |
| 77 | case 1: |
| 78 | for (int i0 = 0; i0 < ne[0]; i0++) { |
| 79 | ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin; |
| 80 | } |
| 81 | break; |
| 82 | case 2: |
| 83 | for (int i1 = 0; i1 < ne[1]; i1++) { |
| 84 | for (int i0 = 0; i0 < ne[0]; i0++) { |
| 85 | ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; |
| 86 | } |
| 87 | } |
| 88 | break; |
| 89 | case 3: |
| 90 | for (int i2 = 0; i2 < ne[2]; i2++) { |
| 91 | for (int i1 = 0; i1 < ne[1]; i1++) { |
| 92 | for (int i0 = 0; i0 < ne[0]; i0++) { |
| 93 | ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; |
| 94 | } |
| 95 | } |
| 96 | } |
| 97 | break; |
| 98 | case 4: |
| 99 | for (int i3 = 0; i3 < ne[3]; i3++) { |
| 100 | for (int i2 = 0; i2 < ne[2]; i2++) { |
| 101 | for (int i1 = 0; i1 < ne[1]; i1++) { |
| 102 | for (int i0 = 0; i0 < ne[0]; i0++) { |
| 103 | ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; |
| 104 | } |
| 105 | } |
| 106 | } |
| 107 | } |
| 108 | break; |
| 109 | default: |
| 110 | assert(false); |
| 111 | }; |
| 112 | |
| 113 | return result; |
| 114 | } |
| 115 | |
| 116 | static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) { |
| 117 | struct ggml_cplan plan = ggml_graph_plan(cgraph: graph, n_threads, threadpool: nullptr); |
| 118 | |
| 119 | if (plan.work_size > 0) { |
| 120 | buf.resize(new_size: plan.work_size); |
| 121 | plan.work_data = buf.data(); |
| 122 | } |
| 123 | |
| 124 | ggml_graph_compute(cgraph: graph, cplan: &plan); |
| 125 | } |
| 126 | |
| 127 | int main(int /*argc*/, const char ** /*argv*/) { |
| 128 | struct ggml_init_params params = { |
| 129 | /* .mem_size = */ 128*1024*1024, |
| 130 | /* .mem_buffer = */ NULL, |
| 131 | /* .no_alloc = */ false, |
| 132 | }; |
| 133 | |
| 134 | std::vector<uint8_t> work_buffer; |
| 135 | |
| 136 | struct ggml_context * ctx0 = ggml_init(params); |
| 137 | |
| 138 | struct ggml_tensor * x; |
| 139 | |
| 140 | // rope f32 |
| 141 | for (int m = 0; m < 6; ++m) { |
| 142 | const int ndims = 4; |
| 143 | |
| 144 | const int64_t n_rot = 128; |
| 145 | const int64_t ne[4] = { 2*n_rot, 32, 73, 1 }; |
| 146 | |
| 147 | const int n_past_0 = 100; |
| 148 | const int n_past_2 = 33; |
| 149 | |
| 150 | struct ggml_tensor * r0; |
| 151 | struct ggml_tensor * r1; |
| 152 | struct ggml_tensor * r2; |
| 153 | x = get_random_tensor_f32(ctx0, ndims, ne, fmin: -1.0f, fmax: 1.0f); |
| 154 | int mode = -1; |
| 155 | |
| 156 | if (m < 3) { |
| 157 | struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[2]); |
| 158 | struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[2]); |
| 159 | struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[2]); |
| 160 | |
| 161 | for (int i = 0; i < ne[2]; ++i) { |
| 162 | ((int32_t *) p0->data)[i] = n_past_0 + i; |
| 163 | ((int32_t *) p1->data)[i] = n_past_2 - n_past_0; |
| 164 | ((int32_t *) p2->data)[i] = n_past_2 + i; |
| 165 | } |
| 166 | // test mode 0, 2, 4 (standard, GPT-NeoX, GLM) |
| 167 | mode = m == 0 ? 0 : m == 1 ? 2 : 4; |
| 168 | |
| 169 | // 100, 101, 102, ..., 172 |
| 170 | r0 = ggml_rope(ctx: ctx0, a: x, b: p0, n_dims: n_rot, mode); |
| 171 | // -67, -67, -67, ..., -67 |
| 172 | r1 = ggml_rope(ctx: ctx0, a: r0, b: p1, n_dims: n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens |
| 173 | |
| 174 | // 33, 34, 35, ..., 105 |
| 175 | r2 = ggml_rope(ctx: ctx0, a: x, b: p2, n_dims: n_rot, mode); |
| 176 | } else { |
| 177 | // testing multi-dimension rope position embedding mode |
| 178 | struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[2] * 4); |
| 179 | struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[2] * 4); |
| 180 | struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[2] * 4); |
| 181 | |
| 182 | int sections[4] = {16, 24, 24, 0}; |
| 183 | mode = (m == 3) ? GGML_ROPE_TYPE_MROPE : (m == 4) ? GGML_ROPE_TYPE_VISION : GGML_ROPE_TYPE_IMROPE; |
| 184 | |
| 185 | for (int i = 0; i < ne[2]; ++i) { |
| 186 | for (int j = 0; j < 4; ++j) { |
| 187 | ((int32_t *) p0->data)[i + ne[2] * j] = n_past_0 + i + j; |
| 188 | ((int32_t *) p1->data)[i + ne[2] * j] = n_past_2 - n_past_0; |
| 189 | ((int32_t *) p2->data)[i + ne[2] * j] = n_past_2 + i + j; |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | // [[100, 101, 102, ..., 172], |
| 194 | // [101, 102, 103, ..., 173], |
| 195 | // [102, 103, 104, ..., 174]] |
| 196 | r0 = ggml_rope_multi( |
| 197 | ctx: ctx0, a: x, b: p0, c: nullptr, |
| 198 | n_dims: n_rot, sections, mode, n_ctx_orig: 32768, freq_base: 1000000, freq_scale: 1, ext_factor: 0, attn_factor: 1, beta_fast: 32, beta_slow: 1); |
| 199 | // [[-67, -67, -67, ..., -67] |
| 200 | // [-67, -67, -67, ..., -67] |
| 201 | // [-67, -67, -67, ..., -67]] |
| 202 | r1 = ggml_rope_multi( |
| 203 | ctx: ctx0, a: r0, b: p1, c: nullptr, |
| 204 | n_dims: n_rot, sections, mode, n_ctx_orig: 32768, freq_base: 1000000, freq_scale: 1, ext_factor: 0, attn_factor: 1, beta_fast: 32, beta_slow: 1); |
| 205 | |
| 206 | // [[33, 34, 35, ..., 105] |
| 207 | // [34, 35, 36, ..., 106] |
| 208 | // [35, 36, 37, ..., 107]] |
| 209 | r2 = ggml_rope_multi( |
| 210 | ctx: ctx0, a: x, b: p2, c: nullptr, |
| 211 | n_dims: n_rot, sections, mode, n_ctx_orig: 32768, freq_base: 1000000, freq_scale: 1, ext_factor: 0, attn_factor: 1, beta_fast: 32, beta_slow: 1); |
| 212 | } |
| 213 | |
| 214 | ggml_cgraph * gf = ggml_new_graph(ctx: ctx0); |
| 215 | |
| 216 | ggml_build_forward_expand(cgraph: gf, tensor: r0); |
| 217 | ggml_build_forward_expand(cgraph: gf, tensor: r1); |
| 218 | ggml_build_forward_expand(cgraph: gf, tensor: r2); |
| 219 | |
| 220 | ggml_graph_compute_helper(buf&: work_buffer, graph: gf, n_threads: 4); |
| 221 | |
| 222 | // check that r1 and r2 are the same |
| 223 | { |
| 224 | double sum0 = 0.0f; |
| 225 | double sum1 = 0.0f; |
| 226 | double diff = 0.0f; |
| 227 | |
| 228 | const float * r1_data = (float *) r1->data; |
| 229 | const float * r2_data = (float *) r2->data; |
| 230 | |
| 231 | const int n_elements = ggml_nelements(tensor: r1); |
| 232 | |
| 233 | for (int i = 0; i < n_elements; ++i) { |
| 234 | sum0 += fabs(x: r1_data[i]); |
| 235 | sum1 += fabs(x: r2_data[i]); |
| 236 | diff += fabs(x: r1_data[i] - r2_data[i]); |
| 237 | //if (fabs(r1_data[i] - r2_data[i]) > 0.0001f) { |
| 238 | // printf("%d: %f %f\n", i, r1_data[i], r2_data[i]); |
| 239 | // printf("diff: %f\n", fabs(r1_data[i] - r2_data[i])); |
| 240 | //} |
| 241 | } |
| 242 | |
| 243 | //for (int i = 4096; i < 4096 + 128; ++i) { |
| 244 | // printf("%f %f\n", r1_data[i], r2_data[i]); |
| 245 | //} |
| 246 | |
| 247 | printf(format: "mode: %d\n" , mode); |
| 248 | printf(format: "sum0: %f\n" , sum0); |
| 249 | printf(format: "sum1: %f\n" , sum1); |
| 250 | printf(format: "diff: %f\n" , diff); |
| 251 | printf(format: "rel err: %f\n" , diff / sum0); |
| 252 | printf(format: "rel err: %f\n" , diff / sum1); |
| 253 | |
| 254 | GGML_ASSERT(diff / sum0 < 0.0001f); |
| 255 | GGML_ASSERT(diff / sum1 < 0.0001f); |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | ggml_free(ctx: ctx0); |
| 260 | |
| 261 | return 0; |
| 262 | } |
| 263 | |