unary.cuh source code [llama.cpp/ggml/src/ggml-cuda/unary.cuh]

1	#pragma once
2	#include "common.cuh"
3
4	#define CUDA_NEG_BLOCK_SIZE 256
5	#define CUDA_STEP_BLOCK_SIZE 256
6	#define CUDA_GELU_BLOCK_SIZE 256
7	#define CUDA_SILU_BLOCK_SIZE 256
8	#define CUDA_SILU_BACK_BLOCK_SIZE 256
9	#define CUDA_TANH_BLOCK_SIZE 256
10	#define CUDA_RELU_BLOCK_SIZE 256
11	#define CUDA_SIGMOID_BLOCK_SIZE 256
12	#define CUDA_HARDSIGMOID_BLOCK_SIZE 256
13	#define CUDA_EXP_BLOCK_SIZE 256
14	#define CUDA_HARDSWISH_BLOCK_SIZE 256
15	#define CUDA_SQR_BLOCK_SIZE 256
16	#define CUDA_SQRT_BLOCK_SIZE 256
17	#define CUDA_SIN_BLOCK_SIZE 256
18	#define CUDA_COS_BLOCK_SIZE 256
19	#define CUDA_GLU_BLOCK_SIZE 256
20	#define CUDA_XIELU_BLOCK_SIZE 256
21
22	void ggml_cuda_op_abs(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
23
24	void ggml_cuda_op_sgn(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
25
26	void ggml_cuda_op_neg(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
27
28	void ggml_cuda_op_step(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
29
30	void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
31
32	void ggml_cuda_op_silu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
33
34	void ggml_cuda_op_silu_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
35
36	void ggml_cuda_op_gelu_erf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
37
38	void ggml_cuda_op_gelu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
39
40	void ggml_cuda_op_tanh(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
41
42	void ggml_cuda_op_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
43
44	void ggml_cuda_op_sigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
45
46	void ggml_cuda_op_hardsigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
47
48	void ggml_cuda_op_exp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
49
50	void ggml_cuda_op_hardswish(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
51
52	void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
53
54	void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
55
56	void ggml_cuda_op_sqrt(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
57
58	void ggml_cuda_op_sin(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
59
60	void ggml_cuda_op_cos(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
61
62	void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
63
64	void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
65
66	void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
67
68	void ggml_cuda_op_ceil(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
69
70	void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
71
72	void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
73
74	void ggml_cuda_op_reglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
75
76	void ggml_cuda_op_geglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
77
78	void ggml_cuda_op_swiglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
79
80	void ggml_cuda_op_swiglu_oai(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
81
82	void ggml_cuda_op_geglu_erf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
83
84	void ggml_cuda_op_geglu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
85
86	void ggml_cuda_op_xielu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
87
88	__device__ __forceinline__ float ggml_cuda_op_silu_single(float x) {
89	return x / (`1.0f` + expf(a: -x));
90	}
91
92	__device__ __forceinline__ float ggml_cuda_op_gelu_single(float x) {
93	const float GELU_COEF_A = `0.044715f`;
94	const float SQRT_2_OVER_PI = `0.79788456080286535587989211986876f`;
95
96	return `0.5f` * x * (`1.0f` + tanhf(a: SQRT_2_OVER_PI * x * (`1.0f` + GELU_COEF_A * x * x)));
97	}
98
99	__device__ __forceinline__ float ggml_cuda_op_swiglu_oai_single(float x, float g, float alpha = `1.702f`, float limit = `7.0f`) {
100	x = fminf(a: x, b: limit);
101	g = fmaxf(a: fminf(a: g, b: limit), b: -limit);
102
103	float out_glu = x / (`1.0f` + expf(a: -x * alpha));
104	out_glu = out_glu * (`1.0f` + g);
105	return out_glu;
106	}
107

Browse the source code of llama.cpp/ggml/src/ggml-cuda/unary.cuh