unary-ops.cpp source code [llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp]

1	#include "unary-ops.h"
2
3	static inline float op_abs(float x) {
4	return fabsf(x: x);
5	}
6
7	static inline float op_sgn(float x) {
8	return (x > `0.f`) ? `1.f` : ((x < `0.f`) ? -`1.f` : `0.f`);
9	}
10
11	static inline float op_neg(float x) {
12	return -x;
13	}
14
15	static inline float op_step(float x) {
16	return (x > `0.f`) ? `1.f` : `0.f`;
17	}
18
19	static inline float op_tanh(float x) {
20	return tanhf(x: x);
21	}
22
23	static inline float op_elu(float x) {
24	return (x > `0.f`) ? x : expm1f(x: x);
25	}
26
27	static inline float op_relu(float x) {
28	return (x > `0.f`) ? x : `0.f`;
29	}
30
31	static inline float op_sigmoid(float x) {
32	return `1.f` / (`1.f` + expf(x: -x));
33	}
34
35	static inline float op_hardsigmoid(float x) {
36	return fminf(x: `1.0f`, y: fmaxf(x: `0.0f`, y: (x + `3.0f`) / `6.0f`));
37	}
38
39	static inline float op_exp(float x) {
40	return expf(x: x);
41	}
42
43	static inline float op_hardswish(float x) {
44	return x * fminf(x: `1.0f`, y: fmaxf(x: `0.0f`, y: (x + `3.0f`) / `6.0f`));
45	}
46
47	static inline float op_sqr(float x) {
48	return x * x;
49	}
50
51	static inline float op_sqrt(float x) {
52	return sqrtf(x: x);
53	}
54
55	static inline float op_xielu(float x, float alpha_n, float alpha_p, float beta, float eps) {
56	if (x > `0.0f`) {
57	return alpha_p * x * x + beta * x;
58	} else {
59	const float min_x_eps = fminf(x: x, y: eps);
60	return (expm1f(x: min_x_eps) - x) * alpha_n + beta * x;
61	}
62	}
63
64	static inline float op_sin(float x) {
65	return sinf(x: x);
66	}
67
68	static inline float op_cos(float x) {
69	return cosf(x: x);
70	}
71
72	static inline float op_log(float x) {
73	return logf(x: x);
74	}
75
76	static inline float op_floor(float x) {
77	return floorf(x: x);
78	}
79
80	static inline float op_ceil(float x) {
81	return ceilf(x: x);
82	}
83
84	static inline float op_round(float x) {
85	return roundf(x: x);
86	}
87
88	static inline float op_trunc(float x) {
89	return truncf(x: x);
90	}
91
92	template <float (op)(float), typename* src0_t, typename dst_t>
93	static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) {
94	constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
95	constexpr auto f32_to_dst = type_conversion_table<dst_t >::from_f32;
96
97	for (int i = `0`; i < n; i++) {
98	y[i] = f32_to_dst(op(src0_to_f32(x[i])));
99	}
100	}
101
102	template <float (op)(float), typename* src0_t, typename dst_t>
103	static void apply_unary_op(const ggml_compute_params * params, ggml_tensor * dst) {
104	const ggml_tensor * src0 = dst->src[`0`];
105
106	GGML_ASSERT(ggml_is_contiguous_1(src0) && ggml_is_contiguous_1(dst) && ggml_are_same_shape(src0, dst));
107
108	GGML_TENSOR_UNARY_OP_LOCALS
109
110	GGML_ASSERT( nb0 == sizeof(dst_t));
111	GGML_ASSERT(nb00 == sizeof(src0_t));
112
113	const auto [ir0, ir1] = get_thread_range(params, src0);
114
115	for (int64_t ir = ir0; ir < ir1; ++ir) {
116	const int64_t i03 = ir/(ne02*ne01);
117	const int64_t i02 = (ir - i03ne02ne01)/ne01;
118	const int64_t i01 = (ir - i03ne02ne01 - i02*ne01);
119
120	dst_t * dst_ptr = (dst_t ) ((char* ) dst->data + i03nb3 + i02nb2 + i01nb1 );
121	const src0_t * src0_ptr = (const src0_t ) ((const* char ) src0->data + i03nb03 + i02nb02 + i01nb01);
122
123	vec_unary_op<op>(ne0, dst_ptr, src0_ptr);
124	}
125	}
126
127	// TODO: Use the 'traits' lookup table (for type conversion fns), instead of a mass of 'if' conditions with long templates
128	template <float (op)(float*)>
129	static void unary_op(const ggml_compute_params * params, ggml_tensor * dst) {
130	const ggml_tensor * src0 = dst->src[`0`];
131
132	/ / if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
133	apply_unary_op<op, float, float>(params, dst);
134	} else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
135	apply_unary_op<op, ggml_fp16_t, ggml_fp16_t>(params, dst);
136	} else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
137	apply_unary_op<op, ggml_bf16_t, ggml_bf16_t>(params, dst);
138	} else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_F32) {
139	apply_unary_op<op, ggml_bf16_t, float>(params, dst);
140	} else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
141	apply_unary_op<op, ggml_fp16_t, float>(params, dst);
142	} else {
143	fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s\n", __func__,
144	ggml_type_name(type: dst->type), ggml_type_name(type: src0->type));
145	GGML_ABORT("fatal error");
146	}
147	}
148
149	template <float (op)(float, ggml_tensor )>
150	static void unary_op_params(const ggml_compute_params * params, ggml_tensor * dst) {
151	const ggml_tensor * src0 = dst->src[`0`];
152
153	/ / if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
154	apply_unary_op<op, float, float>(params, dst);
155	} else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
156	apply_unary_op<op, ggml_fp16_t, ggml_fp16_t>(params, dst);
157	} else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
158	apply_unary_op<op, ggml_bf16_t, ggml_bf16_t>(params, dst);
159	} else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_F32) {
160	apply_unary_op<op, ggml_bf16_t, float>(params, dst);
161	} else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
162	apply_unary_op<op, ggml_fp16_t, float>(params, dst);
163	} else {
164	fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s\n", __func__,
165	ggml_type_name(type: dst->type), ggml_type_name(type: src0->type));
166	GGML_ABORT("fatal error");
167	}
168	}
169
170	// Extend vec_unary_op to support functors
171	template <typename Op, typename src0_t, typename dst_t>
172	static inline void vec_unary_op_functor(int64_t n, dst_t * y, const src0_t * x, Op op) {
173	constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
174	constexpr auto f32_to_dst = type_conversion_table<dst_t >::from_f32;
175
176	for (int i = `0`; i < n; i++) {
177	y[i] = f32_to_dst(op(src0_to_f32(x[i])));
178	}
179	}
180
181	// Extend apply_unary_op to support functors
182	template <typename Op, typename src0_t, typename dst_t>
183	static void apply_unary_op_functor(const ggml_compute_params * params, ggml_tensor * dst, Op op) {
184	const ggml_tensor * src0 = dst->src[`0`];
185
186	GGML_ASSERT(ggml_is_contiguous_1(src0) && ggml_is_contiguous_1(dst) && ggml_are_same_shape(src0, dst));
187
188	GGML_TENSOR_UNARY_OP_LOCALS
189
190	GGML_ASSERT( nb0 == sizeof(dst_t));
191	GGML_ASSERT(nb00 == sizeof(src0_t));
192
193	const auto [ir0, ir1] = get_thread_range(params, src0);
194
195	for (int64_t ir = ir0; ir < ir1; ++ir) {
196	const int64_t i03 = ir/(ne02*ne01);
197	const int64_t i02 = (ir - i03ne02ne01)/ne01;
198	const int64_t i01 = (ir - i03ne02ne01 - i02*ne01);
199
200	dst_t * dst_ptr = (dst_t ) ((char* ) dst->data + i03nb3 + i02nb2 + i01nb1 );
201	const src0_t * src0_ptr = (const src0_t ) ((const* char ) src0->data + i03nb03 + i02nb02 + i01nb01);
202
203	vec_unary_op_functor(ne0, dst_ptr, src0_ptr, op);
204	}
205	}
206
207	// Generic dispatcher for functors
208	template <typename Op>
209	static void unary_op_functor(const ggml_compute_params * params, ggml_tensor * dst, Op op) {
210	const ggml_tensor * src0 = dst->src[`0`];
211
212	/ / if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
213	apply_unary_op_functor<Op, float, float>(params, dst, op);
214	} else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
215	apply_unary_op_functor<Op, ggml_fp16_t, ggml_fp16_t>(params, dst, op);
216	} else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
217	apply_unary_op_functor<Op, ggml_bf16_t, ggml_bf16_t>(params, dst, op);
218	} else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_F32) {
219	apply_unary_op_functor<Op, ggml_bf16_t, float>(params, dst, op);
220	} else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
221	apply_unary_op_functor<Op, ggml_fp16_t, float>(params, dst, op);
222	} else {
223	fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s\n", __func__,
224	ggml_type_name(type: dst->type), ggml_type_name(type: src0->type));
225	GGML_ABORT("fatal error");
226	}
227	}
228
229	void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor * dst) {
230	unary_op<op_abs>(params, dst);
231	}
232
233	void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) {
234	unary_op<op_sgn>(params, dst);
235	}
236
237	void ggml_compute_forward_neg(const ggml_compute_params * params, ggml_tensor * dst) {
238	unary_op<op_neg>(params, dst);
239	}
240
241	void ggml_compute_forward_step(const ggml_compute_params * params, ggml_tensor * dst) {
242	unary_op<op_step>(params, dst);
243	}
244
245	void ggml_compute_forward_tanh(const ggml_compute_params * params, ggml_tensor * dst) {
246	unary_op<op_tanh>(params, dst);
247	}
248
249	void ggml_compute_forward_elu(const ggml_compute_params * params, ggml_tensor * dst) {
250	unary_op<op_elu>(params, dst);
251	}
252
253	void ggml_compute_forward_relu(const ggml_compute_params * params, ggml_tensor * dst) {
254	unary_op<op_relu>(params, dst);
255	}
256
257	void ggml_compute_forward_sigmoid(const ggml_compute_params * params, ggml_tensor * dst) {
258	unary_op<op_sigmoid>(params, dst);
259	}
260
261	void ggml_compute_forward_hardsigmoid(const ggml_compute_params * params, ggml_tensor * dst) {
262	unary_op<op_hardsigmoid>(params, dst);
263	}
264
265	void ggml_compute_forward_exp(const ggml_compute_params * params, ggml_tensor * dst) {
266	unary_op<op_exp>(params, dst);
267	}
268
269	void ggml_compute_forward_hardswish(const ggml_compute_params * params, ggml_tensor * dst) {
270	unary_op<op_hardswish>(params, dst);
271	}
272
273	void ggml_compute_forward_sqr(const ggml_compute_params * params, ggml_tensor * dst) {
274	unary_op<op_sqr>(params, dst);
275	}
276
277	void ggml_compute_forward_sqrt(const ggml_compute_params * params, ggml_tensor * dst) {
278	unary_op<op_sqrt>(params, dst);
279	}
280
281	void ggml_compute_forward_sin(const ggml_compute_params * params, ggml_tensor * dst) {
282	unary_op<op_sin>(params, dst);
283	}
284
285	void ggml_compute_forward_cos(const ggml_compute_params * params, ggml_tensor * dst) {
286	unary_op<op_cos>(params, dst);
287	}
288
289	void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor * dst) {
290	unary_op<op_log>(params, dst);
291	}
292
293	void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
294	unary_op<op_floor>(params, dst);
295	}
296
297	void ggml_compute_forward_ceil(const ggml_compute_params * params, ggml_tensor * dst) {
298	unary_op<op_ceil>(params, dst);
299	}
300
301	void ggml_compute_forward_round(const ggml_compute_params * params, ggml_tensor * dst) {
302	unary_op<op_round>(params, dst);
303	}
304
305	void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
306	unary_op<op_trunc>(params, dst);
307	}
308
309	void ggml_compute_forward_xielu(const ggml_compute_params * params, ggml_tensor * dst) {
310	const float alpha_n = ggml_get_op_params_f32(tensor: dst, i: `1`);
311	const float alpha_p = ggml_get_op_params_f32(tensor: dst, i: `2`);
312	const float beta = ggml_get_op_params_f32(tensor: dst, i: `3`);
313	const float eps = ggml_get_op_params_f32(tensor: dst, i: `4`);
314
315	const auto xielu_op_params = [alpha_n, alpha_p, beta, eps](float f) {
316	return op_xielu(x: f, alpha_n, alpha_p, beta, eps);
317	};
318
319	unary_op_functor(params, dst, op: xielu_op_params);
320	}
321
322

Browse the source code of llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp