1#pragma once
2
3#include "ggml.h"
4#include "traits.h"
5#include "ggml-cpu-impl.h"
6#include "ggml-impl.h"
7#include "simd-mappings.h"
8
9#ifdef __cplusplus
10
11#include <utility>
12
13// convenience functions/macros for use in template calls
14// note: these won't be required after the 'traits' lookup table is used.
15static inline ggml_fp16_t f32_to_f16(float x) {
16 return GGML_CPU_FP32_TO_FP16(x);
17}
18
19static inline float f16_to_f32(ggml_fp16_t x) {
20 return GGML_CPU_FP16_TO_FP32(x);
21}
22
23static inline ggml_bf16_t f32_to_bf16(float x) {
24 return GGML_FP32_TO_BF16(x);
25}
26
27static inline float bf16_to_f32(ggml_bf16_t x) {
28 return GGML_BF16_TO_FP32(x);
29}
30
31static inline float i32_to_f32(int32_t x) {
32 return x;
33}
34
35static inline int32_t f32_to_i32(float x) {
36 return x;
37}
38
39static inline float f32_to_f32(float x) {
40 return x;
41}
42
43// TODO - merge this into the traits table, after using row-based conversions
44template <class T>
45struct type_conversion_table;
46
47template <>
48struct type_conversion_table<ggml_fp16_t> {
49 static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
50 static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
51};
52
53template <>
54struct type_conversion_table<float> {
55 static constexpr float (*to_f32)(float) = f32_to_f32;
56 static constexpr float (*from_f32)(float) = f32_to_f32;
57};
58
59template <>
60struct type_conversion_table<ggml_bf16_t> {
61 static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
62 static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
63};
64
65template <>
66struct type_conversion_table<int32_t> {
67 static constexpr float (*to_f32)(int32_t) = i32_to_f32;
68 static constexpr int32_t (*from_f32)(float) = f32_to_i32;
69};
70
71static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
72 const int64_t ith = params->ith;
73 const int64_t nth = params->nth;
74
75 const int64_t nr = ggml_nrows(tensor: src0);
76
77 // rows per thread
78 const int64_t dr = (nr + nth - 1)/nth;
79
80 // row range for this thread
81 const int64_t ir0 = dr*ith;
82 const int64_t ir1 = MIN(ir0 + dr, nr);
83
84 return {ir0, ir1};
85}
86
87#endif
88