1#include "unary-ops.h"
2
3static inline float op_abs(float x) {
4 return fabsf(x: x);
5}
6
7static inline float op_sgn(float x) {
8 return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f);
9}
10
11static inline float op_neg(float x) {
12 return -x;
13}
14
15static inline float op_step(float x) {
16 return (x > 0.f) ? 1.f : 0.f;
17}
18
19static inline float op_tanh(float x) {
20 return tanhf(x: x);
21}
22
23static inline float op_elu(float x) {
24 return (x > 0.f) ? x : expm1f(x: x);
25}
26
27static inline float op_relu(float x) {
28 return (x > 0.f) ? x : 0.f;
29}
30
31static inline float op_sigmoid(float x) {
32 return 1.f / (1.f + expf(x: -x));
33}
34
35static inline float op_hardsigmoid(float x) {
36 return fminf(x: 1.0f, y: fmaxf(x: 0.0f, y: (x + 3.0f) / 6.0f));
37}
38
39static inline float op_exp(float x) {
40 return expf(x: x);
41}
42
43static inline float op_hardswish(float x) {
44 return x * fminf(x: 1.0f, y: fmaxf(x: 0.0f, y: (x + 3.0f) / 6.0f));
45}
46
47static inline float op_sqr(float x) {
48 return x * x;
49}
50
51static inline float op_sqrt(float x) {
52 return sqrtf(x: x);
53}
54
55static inline float op_xielu(float x, float alpha_n, float alpha_p, float beta, float eps) {
56 if (x > 0.0f) {
57 return alpha_p * x * x + beta * x;
58 } else {
59 const float min_x_eps = fminf(x: x, y: eps);
60 return (expm1f(x: min_x_eps) - x) * alpha_n + beta * x;
61 }
62}
63
64static inline float op_sin(float x) {
65 return sinf(x: x);
66}
67
68static inline float op_cos(float x) {
69 return cosf(x: x);
70}
71
72static inline float op_log(float x) {
73 return logf(x: x);
74}
75
76static inline float op_floor(float x) {
77 return floorf(x: x);
78}
79
80static inline float op_ceil(float x) {
81 return ceilf(x: x);
82}
83
84static inline float op_round(float x) {
85 return roundf(x: x);
86}
87
88static inline float op_trunc(float x) {
89 return truncf(x: x);
90}
91
92template <float (*op)(float), typename src0_t, typename dst_t>
93static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) {
94 constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
95 constexpr auto f32_to_dst = type_conversion_table<dst_t >::from_f32;
96
97 for (int i = 0; i < n; i++) {
98 y[i] = f32_to_dst(op(src0_to_f32(x[i])));
99 }
100}
101
102template <float (*op)(float), typename src0_t, typename dst_t>
103static void apply_unary_op(const ggml_compute_params * params, ggml_tensor * dst) {
104 const ggml_tensor * src0 = dst->src[0];
105
106 GGML_ASSERT(ggml_is_contiguous_1(src0) && ggml_is_contiguous_1(dst) && ggml_are_same_shape(src0, dst));
107
108 GGML_TENSOR_UNARY_OP_LOCALS
109
110 GGML_ASSERT( nb0 == sizeof(dst_t));
111 GGML_ASSERT(nb00 == sizeof(src0_t));
112
113 const auto [ir0, ir1] = get_thread_range(params, src0);
114
115 for (int64_t ir = ir0; ir < ir1; ++ir) {
116 const int64_t i03 = ir/(ne02*ne01);
117 const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
118 const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
119
120 dst_t * dst_ptr = (dst_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
121 const src0_t * src0_ptr = (const src0_t *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
122
123 vec_unary_op<op>(ne0, dst_ptr, src0_ptr);
124 }
125}
126
127// TODO: Use the 'traits' lookup table (for type conversion fns), instead of a mass of 'if' conditions with long templates
128template <float (*op)(float)>
129static void unary_op(const ggml_compute_params * params, ggml_tensor * dst) {
130 const ggml_tensor * src0 = dst->src[0];
131
132 /* */ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
133 apply_unary_op<op, float, float>(params, dst);
134 } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
135 apply_unary_op<op, ggml_fp16_t, ggml_fp16_t>(params, dst);
136 } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
137 apply_unary_op<op, ggml_bf16_t, ggml_bf16_t>(params, dst);
138 } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_F32) {
139 apply_unary_op<op, ggml_bf16_t, float>(params, dst);
140 } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
141 apply_unary_op<op, ggml_fp16_t, float>(params, dst);
142 } else {
143 fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s\n", __func__,
144 ggml_type_name(type: dst->type), ggml_type_name(type: src0->type));
145 GGML_ABORT("fatal error");
146 }
147}
148
149template <float (*op)(float, ggml_tensor *)>
150static void unary_op_params(const ggml_compute_params * params, ggml_tensor * dst) {
151 const ggml_tensor * src0 = dst->src[0];
152
153 /* */ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
154 apply_unary_op<op, float, float>(params, dst);
155 } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
156 apply_unary_op<op, ggml_fp16_t, ggml_fp16_t>(params, dst);
157 } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
158 apply_unary_op<op, ggml_bf16_t, ggml_bf16_t>(params, dst);
159 } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_F32) {
160 apply_unary_op<op, ggml_bf16_t, float>(params, dst);
161 } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
162 apply_unary_op<op, ggml_fp16_t, float>(params, dst);
163 } else {
164 fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s\n", __func__,
165 ggml_type_name(type: dst->type), ggml_type_name(type: src0->type));
166 GGML_ABORT("fatal error");
167 }
168}
169
170// Extend vec_unary_op to support functors
171template <typename Op, typename src0_t, typename dst_t>
172static inline void vec_unary_op_functor(int64_t n, dst_t * y, const src0_t * x, Op op) {
173 constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
174 constexpr auto f32_to_dst = type_conversion_table<dst_t >::from_f32;
175
176 for (int i = 0; i < n; i++) {
177 y[i] = f32_to_dst(op(src0_to_f32(x[i])));
178 }
179}
180
181// Extend apply_unary_op to support functors
182template <typename Op, typename src0_t, typename dst_t>
183static void apply_unary_op_functor(const ggml_compute_params * params, ggml_tensor * dst, Op op) {
184 const ggml_tensor * src0 = dst->src[0];
185
186 GGML_ASSERT(ggml_is_contiguous_1(src0) && ggml_is_contiguous_1(dst) && ggml_are_same_shape(src0, dst));
187
188 GGML_TENSOR_UNARY_OP_LOCALS
189
190 GGML_ASSERT( nb0 == sizeof(dst_t));
191 GGML_ASSERT(nb00 == sizeof(src0_t));
192
193 const auto [ir0, ir1] = get_thread_range(params, src0);
194
195 for (int64_t ir = ir0; ir < ir1; ++ir) {
196 const int64_t i03 = ir/(ne02*ne01);
197 const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
198 const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
199
200 dst_t * dst_ptr = (dst_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
201 const src0_t * src0_ptr = (const src0_t *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
202
203 vec_unary_op_functor(ne0, dst_ptr, src0_ptr, op);
204 }
205}
206
207// Generic dispatcher for functors
208template <typename Op>
209static void unary_op_functor(const ggml_compute_params * params, ggml_tensor * dst, Op op) {
210 const ggml_tensor * src0 = dst->src[0];
211
212 /* */ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
213 apply_unary_op_functor<Op, float, float>(params, dst, op);
214 } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
215 apply_unary_op_functor<Op, ggml_fp16_t, ggml_fp16_t>(params, dst, op);
216 } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
217 apply_unary_op_functor<Op, ggml_bf16_t, ggml_bf16_t>(params, dst, op);
218 } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_F32) {
219 apply_unary_op_functor<Op, ggml_bf16_t, float>(params, dst, op);
220 } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
221 apply_unary_op_functor<Op, ggml_fp16_t, float>(params, dst, op);
222 } else {
223 fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s\n", __func__,
224 ggml_type_name(type: dst->type), ggml_type_name(type: src0->type));
225 GGML_ABORT("fatal error");
226 }
227}
228
229void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor * dst) {
230 unary_op<op_abs>(params, dst);
231}
232
233void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) {
234 unary_op<op_sgn>(params, dst);
235}
236
237void ggml_compute_forward_neg(const ggml_compute_params * params, ggml_tensor * dst) {
238 unary_op<op_neg>(params, dst);
239}
240
241void ggml_compute_forward_step(const ggml_compute_params * params, ggml_tensor * dst) {
242 unary_op<op_step>(params, dst);
243}
244
245void ggml_compute_forward_tanh(const ggml_compute_params * params, ggml_tensor * dst) {
246 unary_op<op_tanh>(params, dst);
247}
248
249void ggml_compute_forward_elu(const ggml_compute_params * params, ggml_tensor * dst) {
250 unary_op<op_elu>(params, dst);
251}
252
253void ggml_compute_forward_relu(const ggml_compute_params * params, ggml_tensor * dst) {
254 unary_op<op_relu>(params, dst);
255}
256
257void ggml_compute_forward_sigmoid(const ggml_compute_params * params, ggml_tensor * dst) {
258 unary_op<op_sigmoid>(params, dst);
259}
260
261void ggml_compute_forward_hardsigmoid(const ggml_compute_params * params, ggml_tensor * dst) {
262 unary_op<op_hardsigmoid>(params, dst);
263}
264
265void ggml_compute_forward_exp(const ggml_compute_params * params, ggml_tensor * dst) {
266 unary_op<op_exp>(params, dst);
267}
268
269void ggml_compute_forward_hardswish(const ggml_compute_params * params, ggml_tensor * dst) {
270 unary_op<op_hardswish>(params, dst);
271}
272
273void ggml_compute_forward_sqr(const ggml_compute_params * params, ggml_tensor * dst) {
274 unary_op<op_sqr>(params, dst);
275}
276
277void ggml_compute_forward_sqrt(const ggml_compute_params * params, ggml_tensor * dst) {
278 unary_op<op_sqrt>(params, dst);
279}
280
281void ggml_compute_forward_sin(const ggml_compute_params * params, ggml_tensor * dst) {
282 unary_op<op_sin>(params, dst);
283}
284
285void ggml_compute_forward_cos(const ggml_compute_params * params, ggml_tensor * dst) {
286 unary_op<op_cos>(params, dst);
287}
288
289void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor * dst) {
290 unary_op<op_log>(params, dst);
291}
292
293void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
294 unary_op<op_floor>(params, dst);
295}
296
297void ggml_compute_forward_ceil(const ggml_compute_params * params, ggml_tensor * dst) {
298 unary_op<op_ceil>(params, dst);
299}
300
301void ggml_compute_forward_round(const ggml_compute_params * params, ggml_tensor * dst) {
302 unary_op<op_round>(params, dst);
303}
304
305void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
306 unary_op<op_trunc>(params, dst);
307}
308
309void ggml_compute_forward_xielu(const ggml_compute_params * params, ggml_tensor * dst) {
310 const float alpha_n = ggml_get_op_params_f32(tensor: dst, i: 1);
311 const float alpha_p = ggml_get_op_params_f32(tensor: dst, i: 2);
312 const float beta = ggml_get_op_params_f32(tensor: dst, i: 3);
313 const float eps = ggml_get_op_params_f32(tensor: dst, i: 4);
314
315 const auto xielu_op_params = [alpha_n, alpha_p, beta, eps](float f) {
316 return op_xielu(x: f, alpha_n, alpha_p, beta, eps);
317 };
318
319 unary_op_functor(params, dst, op: xielu_op_params);
320}
321
322