common.h source code [llama.cpp/ggml/src/ggml-cpu/common.h]

1	#pragma once
2
3	#include "ggml.h"
4	#include "traits.h"
5	#include "ggml-cpu-impl.h"
6	#include "ggml-impl.h"
7	#include "simd-mappings.h"
8
9	#ifdef __cplusplus
10
11	#include <utility>
12
13	// convenience functions/macros for use in template calls
14	// note: these won't be required after the 'traits' lookup table is used.
15	static inline ggml_fp16_t f32_to_f16(float x) {
16	return GGML_CPU_FP32_TO_FP16(x);
17	}
18
19	static inline float f16_to_f32(ggml_fp16_t x) {
20	return GGML_CPU_FP16_TO_FP32(x);
21	}
22
23	static inline ggml_bf16_t f32_to_bf16(float x) {
24	return GGML_FP32_TO_BF16(x);
25	}
26
27	static inline float bf16_to_f32(ggml_bf16_t x) {
28	return GGML_BF16_TO_FP32(x);
29	}
30
31	static inline float i32_to_f32(int32_t x) {
32	return x;
33	}
34
35	static inline int32_t f32_to_i32(float x) {
36	return x;
37	}
38
39	static inline float f32_to_f32(float x) {
40	return x;
41	}
42
43	// TODO - merge this into the traits table, after using row-based conversions
44	template <class T>
45	struct type_conversion_table;
46
47	template <>
48	struct type_conversion_table<ggml_fp16_t> {
49	static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
50	static constexpr ggml_fp16_t (from_f32)(float*) = f32_to_f16;
51	};
52
53	template <>
54	struct type_conversion_table<float> {
55	static constexpr float (to_f32)(float*) = f32_to_f32;
56	static constexpr float (from_f32)(float*) = f32_to_f32;
57	};
58
59	template <>
60	struct type_conversion_table<ggml_bf16_t> {
61	static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
62	static constexpr ggml_bf16_t (from_f32)(float*) = f32_to_bf16;
63	};
64
65	template <>
66	struct type_conversion_table<int32_t> {
67	static constexpr float (*to_f32)(int32_t) = i32_to_f32;
68	static constexpr int32_t (from_f32)(float*) = f32_to_i32;
69	};
70
71	static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
72	const int64_t ith = params->ith;
73	const int64_t nth = params->nth;
74
75	const int64_t nr = ggml_nrows(tensor: src0);
76
77	// rows per thread
78	const int64_t dr = (nr + nth - `1`)/nth;
79
80	// row range for this thread
81	const int64_t ir0 = dr*ith;
82	const int64_t ir1 = MIN(ir0 + dr, nr);
83
84	return {ir0, ir1};
85	}
86
87	#endif
88

Browse the source code of llama.cpp/ggml/src/ggml-cpu/common.h