topk-moe.cuh source code [llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh]

1	#include "common.cuh"
2	#include "ggml.h"
3
4	#include <initializer_list>
5
6	void ggml_cuda_op_topk_moe(ggml_backend_cuda_context & ctx,
7	const ggml_tensor * logits,
8	ggml_tensor * weights,
9	ggml_tensor * ids,
10	const bool with_norm,
11	const bool delayed_softmax = false,
12	ggml_tensor * weight_clamp = nullptr);
13
14	bool ggml_cuda_should_use_topk_moe(const ggml_tensor * softmax, const ggml_tensor * weights, const ggml_tensor * clamp = nullptr);
15
16	std::initializer_list<enum ggml_op> ggml_cuda_topk_moe_ops(bool with_norm, bool delayed_softmax = false);
17

Browse the source code of llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh