1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "default.h"
7
8namespace embree
9{
10 /*! helper structure for the implementation of the profile functions below */
11 struct ProfileTimer
12 {
13 static const size_t N = 20;
14
15 ProfileTimer () {}
16
17 ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
18 {
19 for (size_t i=0; i<N; i++) names[i] = nullptr;
20 for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
21 for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
22 for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
23 for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
24 }
25
26 __forceinline void begin()
27 {
28 j=0;
29 t0 = tj = getSeconds();
30 }
31
32 __forceinline void end() {
33 absolute("total");
34 i++;
35 }
36
37 __forceinline void operator() (const char* name) {
38 relative(name);
39 }
40
41 __forceinline void absolute (const char* name)
42 {
43 const double t1 = getSeconds();
44 const double dt = t1-t0;
45 assert(names[j] == nullptr || names[j] == name);
46 names[j] = name;
47 if (i == 0) dt_fst[j] = dt;
48 if (i>=numSkip) {
49 dt_min[j] = min(dt_min[j],dt);
50 dt_avg[j] = dt_avg[j] + dt;
51 dt_max[j] = max(dt_max[j],dt);
52 }
53 j++;
54 maxJ = max(maxJ,j);
55 }
56
57 __forceinline void relative (const char* name)
58 {
59 const double t1 = getSeconds();
60 const double dt = t1-tj;
61 tj = t1;
62 assert(names[j] == nullptr || names[j] == name);
63 names[j] = name;
64 if (i == 0) dt_fst[j] = dt;
65 if (i>=numSkip) {
66 dt_min[j] = min(dt_min[j],dt);
67 dt_avg[j] = dt_avg[j] + dt;
68 dt_max[j] = max(dt_max[j],dt);
69 }
70 j++;
71 maxJ = max(maxJ,j);
72 }
73
74 void print(size_t numElements)
75 {
76 for (size_t k=0; k<N; k++)
77 dt_avg[k] /= double(i-numSkip);
78
79 printf(" profile [M/s]:\n");
80 for (size_t j=0; j<maxJ; j++)
81 printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
82 names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
83
84 printf(" profile [ms]:\n");
85 for (size_t j=0; j<maxJ; j++)
86 printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
87 names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
88 }
89
90 void print()
91 {
92 printf(" profile:\n");
93
94 for (size_t k=0; k<N; k++)
95 dt_avg[k] /= double(i-numSkip);
96
97 for (size_t j=0; j<maxJ; j++) {
98 printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
99 names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
100 }
101 }
102
103 double avg() {
104 return dt_avg[maxJ-1]/double(i-numSkip);
105 }
106
107 private:
108 size_t i;
109 size_t j;
110 size_t maxJ;
111 size_t numSkip;
112 double t0;
113 double tj;
114 const char* names[N];
115 double dt_fst[N];
116 double dt_min[N];
117 double dt_avg[N];
118 double dt_max[N];
119 };
120
121 /*! This function executes some code block multiple times and measured sections of it.
122 Use the following way:
123
124 profile(1,10,1000,[&](ProfileTimer& timer) {
125 // code
126 timer("A");
127 // code
128 timer("B");
129 });
130 */
131 template<typename Closure>
132 void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
133 {
134 ProfileTimer timer(numSkip);
135
136 for (size_t i=0; i<numSkip+numIter; i++)
137 {
138 timer.begin();
139 closure(timer);
140 timer.end();
141 }
142 timer.print(numElements);
143 }
144
145 /*! similar as the function above, but the timer object comes externally */
146 template<typename Closure>
147 void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
148 {
149 timer = ProfileTimer(numSkip);
150
151 for (size_t i=0; i<numSkip+numIter; i++)
152 {
153 timer.begin();
154 closure(timer);
155 timer.end();
156 }
157 timer.print(numElements);
158 }
159}
160