1#include "ggml.h"
2#include "ggml-cpu.h"
3
4#include <chrono>
5#include <iostream>
6#include <cstdio>
7#include <cstdlib>
8#include <cassert>
9#include <vector>
10#include <thread>
11
12#define MAX_NARGS 2
13
14int main(int argc, char *argv[]) {
15
16 int n_threads = std::max(a: 1, b: std::min(a: 4, b: (int) std::thread::hardware_concurrency()));
17 int n_rounds = 100;
18
19 if (argc > 1) {
20 n_threads = std::atoi(nptr: argv[1]);
21 }
22
23 if (argc > 2) {
24 n_rounds = std::atoi(nptr: argv[2]);
25 }
26
27 struct ggml_init_params params = {
28 /* .mem_size = */ 1024*1024*1024,
29 /* .mem_buffer = */ NULL,
30 /* .no_alloc = */ false,
31 };
32
33 struct ggml_context * ctx = ggml_init(params);
34
35 // Create graph
36 struct ggml_cgraph * gf = ggml_new_graph(ctx);
37
38 // Lots of small, parallel ops where barriers in between will dominate
39 struct ggml_tensor * out = ggml_new_tensor_1d(ctx, type: GGML_TYPE_F32, ne0: 64);
40 for (int i = 0; i < 1000; i++) {
41 struct ggml_tensor * a = ggml_new_tensor_2d(ctx, type: GGML_TYPE_Q4_0, ne0: 64, ne1: 128);
42 out = ggml_mul_mat(ctx, a, b: out);
43
44 struct ggml_tensor * d = ggml_new_tensor_2d(ctx, type: GGML_TYPE_Q4_0, ne0: 128, ne1: 64);
45 out = ggml_mul_mat(ctx, a: d, b: out);
46 }
47
48 ggml_build_forward_expand(cgraph: gf, tensor: out);
49 int n_nodes = ggml_graph_n_nodes(cgraph: gf);
50
51 // Create threadpool
52 struct ggml_threadpool_params tpp = ggml_threadpool_params_default(n_threads);
53 struct ggml_threadpool* threadpool = ggml_threadpool_new(params: &tpp);
54 if (!threadpool) {
55 fprintf(stderr, format: "threadpool create failed : n_threads %d\n", n_threads);
56 exit(status: 1);
57 }
58
59 // Create compute plan
60 struct ggml_cplan cplan = ggml_graph_plan(cgraph: gf, n_threads, threadpool);
61
62 std::vector<uint8_t> work_data(cplan.work_size);
63 cplan.work_data = work_data.data();
64
65 std::cerr << "graph-compute with"
66 << "\n n_threads: " << n_threads
67 << "\n n_nodes: " << n_nodes
68 << "\n n_rounds: " << n_rounds
69 << "\n";
70 // ggml_graph_print(gf);
71
72 // Warmup
73 ggml_graph_compute(cgraph: gf, cplan: &cplan);
74
75 auto t0 = std::chrono::high_resolution_clock::now();
76
77 for (int i=0; i < n_rounds; i++) {
78 ggml_graph_compute(cgraph: gf, cplan: &cplan);
79 }
80
81 auto t1 = std::chrono::high_resolution_clock::now();
82
83 auto usec = std::chrono::duration_cast<std::chrono::microseconds>(d: t1-t0).count();
84 auto nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(d: t1-t0).count();
85 std::cerr << "graph-compute took " << usec << " usec "
86 << "\n " << (float) usec / n_rounds << " usec per-iter"
87 << "\n " << (float) nsec / (n_rounds * n_nodes) << " nsec per-node"
88 << "\n";
89
90 ggml_threadpool_free(threadpool);
91 ggml_free(ctx);
92
93 return 0;
94}
95