| 1 | #pragma once |
|---|---|
| 2 | |
| 3 | #include "ggml.h" |
| 4 | #include "traits.h" |
| 5 | #include "ggml-cpu-impl.h" |
| 6 | #include "ggml-impl.h" |
| 7 | #include "simd-mappings.h" |
| 8 | |
| 9 | #ifdef __cplusplus |
| 10 | |
| 11 | #include <utility> |
| 12 | |
| 13 | // convenience functions/macros for use in template calls |
| 14 | // note: these won't be required after the 'traits' lookup table is used. |
| 15 | static inline ggml_fp16_t f32_to_f16(float x) { |
| 16 | return GGML_CPU_FP32_TO_FP16(x); |
| 17 | } |
| 18 | |
| 19 | static inline float f16_to_f32(ggml_fp16_t x) { |
| 20 | return GGML_CPU_FP16_TO_FP32(x); |
| 21 | } |
| 22 | |
| 23 | static inline ggml_bf16_t f32_to_bf16(float x) { |
| 24 | return GGML_FP32_TO_BF16(x); |
| 25 | } |
| 26 | |
| 27 | static inline float bf16_to_f32(ggml_bf16_t x) { |
| 28 | return GGML_BF16_TO_FP32(x); |
| 29 | } |
| 30 | |
| 31 | static inline float i32_to_f32(int32_t x) { |
| 32 | return x; |
| 33 | } |
| 34 | |
| 35 | static inline int32_t f32_to_i32(float x) { |
| 36 | return x; |
| 37 | } |
| 38 | |
| 39 | static inline float f32_to_f32(float x) { |
| 40 | return x; |
| 41 | } |
| 42 | |
| 43 | // TODO - merge this into the traits table, after using row-based conversions |
| 44 | template <class T> |
| 45 | struct type_conversion_table; |
| 46 | |
| 47 | template <> |
| 48 | struct type_conversion_table<ggml_fp16_t> { |
| 49 | static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32; |
| 50 | static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16; |
| 51 | }; |
| 52 | |
| 53 | template <> |
| 54 | struct type_conversion_table<float> { |
| 55 | static constexpr float (*to_f32)(float) = f32_to_f32; |
| 56 | static constexpr float (*from_f32)(float) = f32_to_f32; |
| 57 | }; |
| 58 | |
| 59 | template <> |
| 60 | struct type_conversion_table<ggml_bf16_t> { |
| 61 | static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32; |
| 62 | static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16; |
| 63 | }; |
| 64 | |
| 65 | template <> |
| 66 | struct type_conversion_table<int32_t> { |
| 67 | static constexpr float (*to_f32)(int32_t) = i32_to_f32; |
| 68 | static constexpr int32_t (*from_f32)(float) = f32_to_i32; |
| 69 | }; |
| 70 | |
| 71 | static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) { |
| 72 | const int64_t ith = params->ith; |
| 73 | const int64_t nth = params->nth; |
| 74 | |
| 75 | const int64_t nr = ggml_nrows(tensor: src0); |
| 76 | |
| 77 | // rows per thread |
| 78 | const int64_t dr = (nr + nth - 1)/nth; |
| 79 | |
| 80 | // row range for this thread |
| 81 | const int64_t ir0 = dr*ith; |
| 82 | const int64_t ir1 = MIN(ir0 + dr, nr); |
| 83 | |
| 84 | return {ir0, ir1}; |
| 85 | } |
| 86 | |
| 87 | #endif |
| 88 |