| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 Intel Corporation. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtCore module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | #include "qbenchmarkperfevents_p.h" |
| 41 | #include "qbenchmarkmetric.h" |
| 42 | #include "qbenchmark_p.h" |
| 43 | |
| 44 | #ifdef QTESTLIB_USE_PERF_EVENTS |
| 45 | |
| 46 | // include the qcore_unix_p.h without core-private |
| 47 | // we only use inline functions anyway |
| 48 | #include "../corelib/kernel/qcore_unix_p.h" |
| 49 | |
| 50 | #include <sys/types.h> |
| 51 | #include <errno.h> |
| 52 | #include <fcntl.h> |
| 53 | #include <string.h> |
| 54 | #include <stdio.h> |
| 55 | |
| 56 | #include <sys/syscall.h> |
| 57 | #include <sys/ioctl.h> |
| 58 | |
| 59 | #include "3rdparty/linux_perf_event_p.h" |
| 60 | |
| 61 | // for PERF_TYPE_HW_CACHE, the config is a bitmask |
| 62 | // lowest 8 bits: cache type |
| 63 | // bits 8 to 15: cache operation |
| 64 | // bits 16 to 23: cache result |
| 65 | #define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 66 | #define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 67 | #define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 68 | #define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 69 | #define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 70 | #define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 71 | #define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 72 | #define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 73 | #define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 74 | #define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 75 | #define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 76 | #define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 77 | #define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 78 | #define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 79 | #define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 80 | #define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 81 | #define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 82 | #define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 83 | |
| 84 | QT_BEGIN_NAMESPACE |
| 85 | |
| 86 | static perf_event_attr attr; |
| 87 | |
| 88 | static void initPerf() |
| 89 | { |
| 90 | static bool done; |
| 91 | if (!done) { |
| 92 | memset(&attr, 0, sizeof attr); |
| 93 | attr.size = sizeof attr; |
| 94 | attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; |
| 95 | attr.disabled = true; // we'll enable later |
| 96 | attr.inherit = true; // let children processes inherit the monitoring |
| 97 | attr.pinned = true; // keep it running in the hardware |
| 98 | attr.inherit_stat = true; // aggregate all the info from child processes |
| 99 | attr.task = true; // trace fork/exits |
| 100 | |
| 101 | // set a default performance counter: CPU cycles |
| 102 | attr.type = PERF_TYPE_HARDWARE; |
| 103 | attr.config = PERF_COUNT_HW_CPU_CYCLES; // default |
| 104 | |
| 105 | done = true; |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | // This class does not exist in the API so it's qdoc comment marker was removed. |
| 110 | |
| 111 | /* |
| 112 | \class QBenchmarkPerfEvents |
| 113 | \brief The Linux perf events benchmark backend |
| 114 | |
| 115 | This benchmark backend uses the Linux Performance Counters interface, |
| 116 | introduced with the Linux kernel v2.6.31. The interface is done by one |
| 117 | system call (perf_event_open) which takes an attribute structure and |
| 118 | returns a file descriptor. |
| 119 | |
| 120 | More information: |
| 121 | \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt> |
| 122 | \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c> |
| 123 | (note: as of v3.3.1, the documentation is out-of-date with the kernel |
| 124 | interface, so reading the source code of existing tools is necessary) |
| 125 | |
| 126 | This benchlib backend monitors the current process as well as child process |
| 127 | launched. We do not try to benchmark in kernel or hypervisor mode, as that |
| 128 | usually requires elevated privileges. |
| 129 | */ |
| 130 | |
| 131 | static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) |
| 132 | { |
| 133 | #ifdef SYS_perf_event_open |
| 134 | return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); |
| 135 | #else |
| 136 | Q_UNUSED(attr); |
| 137 | Q_UNUSED(pid); |
| 138 | Q_UNUSED(cpu); |
| 139 | Q_UNUSED(group_fd); |
| 140 | Q_UNUSED(flags); |
| 141 | errno = ENOSYS; |
| 142 | return -1; |
| 143 | #endif |
| 144 | } |
| 145 | |
| 146 | bool QBenchmarkPerfEventsMeasurer::isAvailable() |
| 147 | { |
| 148 | // this generates an EFAULT because attr == NULL if perf_event_open is available |
| 149 | // if the kernel is too old, it generates ENOSYS |
| 150 | return perf_event_open(nullptr, 0, 0, 0, 0) == -1 && errno != ENOSYS; |
| 151 | } |
| 152 | |
| 153 | /* Event list structure |
| 154 | The following table provides the list of supported events |
| 155 | |
| 156 | Event type Event counter Unit Name and aliases |
| 157 | HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles |
| 158 | HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles |
| 159 | HARDWARE INSTRUCTIONS Instructions instructions |
| 160 | HARDWARE CACHE_REFERENCES CacheReferences cache-references |
| 161 | HARDWARE CACHE_MISSES CacheMisses cache-misses |
| 162 | HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches |
| 163 | HARDWARE BRANCH_MISSES BranchMisses branch-misses |
| 164 | HARDWARE BUS_CYCLES BusCycles bus-cycles |
| 165 | HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend |
| 166 | HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend |
| 167 | SOFTWARE CPU_CLOCK WalltimeMilliseconds cpu-clock |
| 168 | SOFTWARE TASK_CLOCK WalltimeMilliseconds task-clock |
| 169 | SOFTWARE PAGE_FAULTS PageFaults page-faults faults |
| 170 | SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults |
| 171 | SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults |
| 172 | SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs |
| 173 | SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations |
| 174 | SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults |
| 175 | SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults |
| 176 | HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads |
| 177 | HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores |
| 178 | HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches |
| 179 | HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads |
| 180 | HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches |
| 181 | HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads |
| 182 | HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores |
| 183 | HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches |
| 184 | HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses |
| 185 | HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses |
| 186 | HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses |
| 187 | HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses |
| 188 | HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses |
| 189 | HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses |
| 190 | HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses |
| 191 | HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses |
| 192 | HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts |
| 193 | HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses |
| 194 | |
| 195 | Use the following Perl script to re-generate the list |
| 196 | === cut perl === |
| 197 | #!/usr/bin/env perl |
| 198 | # Load all entries into %map |
| 199 | while (<STDIN>) { |
| 200 | m/^\s*(.*)\s*$/; |
| 201 | @_ = split /\s+/, $1; |
| 202 | $type = shift @_; |
| 203 | $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : |
| 204 | $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : |
| 205 | $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; |
| 206 | $unit = shift @_; |
| 207 | |
| 208 | for $string (@_) { |
| 209 | die "$string was already seen!" if defined($map{$string}); |
| 210 | $map{$string} = [-1, $type, $id, $unit]; |
| 211 | push @strings, $string; |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | # sort the map and print the string list |
| 216 | @strings = sort @strings; |
| 217 | print "static const char eventlist_strings[] = \n"; |
| 218 | $counter = 0; |
| 219 | for $entry (@strings) { |
| 220 | print " \"$entry\\0\"\n"; |
| 221 | $map{$entry}[0] = $counter; |
| 222 | $counter += 1 + length $entry; |
| 223 | } |
| 224 | |
| 225 | # print the table |
| 226 | print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; |
| 227 | for $entry (sort @strings) { |
| 228 | printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", |
| 229 | $map{$entry}[0], |
| 230 | $map{$entry}[1], |
| 231 | $map{$entry}[2], |
| 232 | $map{$entry}[3]; |
| 233 | } |
| 234 | print " { 0, PERF_TYPE_MAX, 0, QTest::Events }\n};\n"; |
| 235 | === cut perl === |
| 236 | */ |
| 237 | |
| 238 | struct Events { |
| 239 | unsigned offset; |
| 240 | quint32 type; |
| 241 | quint64 event_id; |
| 242 | QTest::QBenchmarkMetric metric; |
| 243 | }; |
| 244 | |
| 245 | /* -- BEGIN GENERATED CODE -- */ |
| 246 | static const char eventlist_strings[] = |
| 247 | "alignment-faults\0" |
| 248 | "branch-instructions\0" |
| 249 | "branch-load-misses\0" |
| 250 | "branch-loads\0" |
| 251 | "branch-mispredicts\0" |
| 252 | "branch-misses\0" |
| 253 | "branch-predicts\0" |
| 254 | "branch-read-misses\0" |
| 255 | "branch-reads\0" |
| 256 | "branches\0" |
| 257 | "bus-cycles\0" |
| 258 | "cache-misses\0" |
| 259 | "cache-references\0" |
| 260 | "context-switches\0" |
| 261 | "cpu-clock\0" |
| 262 | "cpu-cycles\0" |
| 263 | "cpu-migrations\0" |
| 264 | "cs\0" |
| 265 | "cycles\0" |
| 266 | "emulation-faults\0" |
| 267 | "faults\0" |
| 268 | "idle-cycles-backend\0" |
| 269 | "idle-cycles-frontend\0" |
| 270 | "instructions\0" |
| 271 | "l1d-cache-load-misses\0" |
| 272 | "l1d-cache-loads\0" |
| 273 | "l1d-cache-prefetch-misses\0" |
| 274 | "l1d-cache-prefetches\0" |
| 275 | "l1d-cache-read-misses\0" |
| 276 | "l1d-cache-reads\0" |
| 277 | "l1d-cache-store-misses\0" |
| 278 | "l1d-cache-stores\0" |
| 279 | "l1d-cache-write-misses\0" |
| 280 | "l1d-cache-writes\0" |
| 281 | "l1d-load-misses\0" |
| 282 | "l1d-loads\0" |
| 283 | "l1d-prefetch-misses\0" |
| 284 | "l1d-prefetches\0" |
| 285 | "l1d-read-misses\0" |
| 286 | "l1d-reads\0" |
| 287 | "l1d-store-misses\0" |
| 288 | "l1d-stores\0" |
| 289 | "l1d-write-misses\0" |
| 290 | "l1d-writes\0" |
| 291 | "l1i-cache-load-misses\0" |
| 292 | "l1i-cache-loads\0" |
| 293 | "l1i-cache-prefetch-misses\0" |
| 294 | "l1i-cache-prefetches\0" |
| 295 | "l1i-cache-read-misses\0" |
| 296 | "l1i-cache-reads\0" |
| 297 | "l1i-load-misses\0" |
| 298 | "l1i-loads\0" |
| 299 | "l1i-prefetch-misses\0" |
| 300 | "l1i-prefetches\0" |
| 301 | "l1i-read-misses\0" |
| 302 | "l1i-reads\0" |
| 303 | "llc-cache-load-misses\0" |
| 304 | "llc-cache-loads\0" |
| 305 | "llc-cache-prefetch-misses\0" |
| 306 | "llc-cache-prefetches\0" |
| 307 | "llc-cache-read-misses\0" |
| 308 | "llc-cache-reads\0" |
| 309 | "llc-cache-store-misses\0" |
| 310 | "llc-cache-stores\0" |
| 311 | "llc-cache-write-misses\0" |
| 312 | "llc-cache-writes\0" |
| 313 | "llc-load-misses\0" |
| 314 | "llc-loads\0" |
| 315 | "llc-prefetch-misses\0" |
| 316 | "llc-prefetches\0" |
| 317 | "llc-read-misses\0" |
| 318 | "llc-reads\0" |
| 319 | "llc-store-misses\0" |
| 320 | "llc-stores\0" |
| 321 | "llc-write-misses\0" |
| 322 | "llc-writes\0" |
| 323 | "major-faults\0" |
| 324 | "migrations\0" |
| 325 | "minor-faults\0" |
| 326 | "page-faults\0" |
| 327 | "ref-cycles\0" |
| 328 | "stalled-cycles-backend\0" |
| 329 | "stalled-cycles-frontend\0" |
| 330 | "task-clock\0" |
| 331 | "\0" ; |
| 332 | |
| 333 | static const Events eventlist[] = { |
| 334 | { 0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults }, |
| 335 | { 17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, |
| 336 | { 37, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, |
| 337 | { 56, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, |
| 338 | { 69, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, |
| 339 | { 88, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses }, |
| 340 | { 102, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, |
| 341 | { 118, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, |
| 342 | { 137, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, |
| 343 | { 150, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, |
| 344 | { 159, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles }, |
| 345 | { 170, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses }, |
| 346 | { 183, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences }, |
| 347 | { 200, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, |
| 348 | { 217, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeMilliseconds }, |
| 349 | { 227, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, |
| 350 | { 238, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, |
| 351 | { 253, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, |
| 352 | { 256, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, |
| 353 | { 263, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults }, |
| 354 | { 280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, |
| 355 | { 287, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, |
| 356 | { 307, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, |
| 357 | { 328, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions }, |
| 358 | { 341, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| 359 | { 363, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| 360 | { 379, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches }, |
| 361 | { 405, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches }, |
| 362 | { 426, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| 363 | { 448, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| 364 | { 464, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| 365 | { 487, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| 366 | { 504, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| 367 | { 527, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| 368 | { 544, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| 369 | { 560, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| 370 | { 570, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches }, |
| 371 | { 590, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches }, |
| 372 | { 605, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| 373 | { 621, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| 374 | { 631, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| 375 | { 648, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| 376 | { 659, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| 377 | { 676, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| 378 | { 687, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| 379 | { 709, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| 380 | { 725, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches }, |
| 381 | { 751, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches }, |
| 382 | { 772, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| 383 | { 794, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| 384 | { 810, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| 385 | { 826, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| 386 | { 836, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches }, |
| 387 | { 856, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches }, |
| 388 | { 871, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| 389 | { 887, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| 390 | { 897, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| 391 | { 919, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| 392 | { 935, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches }, |
| 393 | { 961, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches }, |
| 394 | { 982, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| 395 | { 1004, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| 396 | { 1020, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| 397 | { 1043, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| 398 | { 1060, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| 399 | { 1083, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| 400 | { 1100, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| 401 | { 1116, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| 402 | { 1126, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches }, |
| 403 | { 1146, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches }, |
| 404 | { 1161, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| 405 | { 1177, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| 406 | { 1187, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| 407 | { 1204, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| 408 | { 1215, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| 409 | { 1232, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| 410 | { 1243, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults }, |
| 411 | { 1256, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, |
| 412 | { 1267, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults }, |
| 413 | { 1280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, |
| 414 | { 1292, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, QTest::RefCPUCycles }, |
| 415 | { 1303, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, |
| 416 | { 1326, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, |
| 417 | { 1350, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeMilliseconds }, |
| 418 | { 0, PERF_TYPE_MAX, 0, QTest::Events } |
| 419 | }; |
| 420 | /* -- END GENERATED CODE -- */ |
| 421 | |
| 422 | QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricForEvent(quint32 type, quint64 event_id) |
| 423 | { |
| 424 | const Events *ptr = eventlist; |
| 425 | for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
| 426 | if (ptr->type == type && ptr->event_id == event_id) |
| 427 | return ptr->metric; |
| 428 | } |
| 429 | return QTest::Events; |
| 430 | } |
| 431 | |
| 432 | void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) |
| 433 | { |
| 434 | initPerf(); |
| 435 | const char *colon = strchr(name, ':'); |
| 436 | int n = colon ? colon - name : strlen(name); |
| 437 | const Events *ptr = eventlist; |
| 438 | for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
| 439 | int c = strncmp(name, eventlist_strings + ptr->offset, n); |
| 440 | if (c == 0) |
| 441 | break; |
| 442 | if (c < 0) { |
| 443 | fprintf(stderr, "ERROR: Performance counter type '%s' is unknown\n" , name); |
| 444 | exit(1); |
| 445 | } |
| 446 | } |
| 447 | |
| 448 | attr.type = ptr->type; |
| 449 | attr.config = ptr->event_id; |
| 450 | |
| 451 | // now parse the attributes |
| 452 | if (!colon) |
| 453 | return; |
| 454 | while (*++colon) { |
| 455 | switch (*colon) { |
| 456 | case 'u': |
| 457 | attr.exclude_user = true; |
| 458 | break; |
| 459 | case 'k': |
| 460 | attr.exclude_kernel = true; |
| 461 | break; |
| 462 | case 'h': |
| 463 | attr.exclude_hv = true; |
| 464 | break; |
| 465 | case 'G': |
| 466 | attr.exclude_guest = true; |
| 467 | break; |
| 468 | case 'H': |
| 469 | attr.exclude_host = true; |
| 470 | break; |
| 471 | default: |
| 472 | fprintf(stderr, "ERROR: Unknown attribute '%c'\n" , *colon); |
| 473 | exit(1); |
| 474 | } |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | void QBenchmarkPerfEventsMeasurer::listCounters() |
| 479 | { |
| 480 | if (!isAvailable()) { |
| 481 | printf("Performance counters are not available on this system\n" ); |
| 482 | return; |
| 483 | } |
| 484 | |
| 485 | printf("The following performance counters are available:\n" ); |
| 486 | const Events *ptr = eventlist; |
| 487 | for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
| 488 | printf(" %-30s [%s]\n" , eventlist_strings + ptr->offset, |
| 489 | ptr->type == PERF_TYPE_HARDWARE ? "hardware" : |
| 490 | ptr->type == PERF_TYPE_SOFTWARE ? "software" : |
| 491 | ptr->type == PERF_TYPE_HW_CACHE ? "cache" : "other" ); |
| 492 | } |
| 493 | |
| 494 | printf("\nAttributes can be specified by adding a colon and the following:\n" |
| 495 | " u - exclude measuring in the userspace\n" |
| 496 | " k - exclude measuring in kernel mode\n" |
| 497 | " h - exclude measuring in the hypervisor\n" |
| 498 | " G - exclude measuring when running virtualized (guest VM)\n" |
| 499 | " H - exclude measuring when running non-virtualized (host system)\n" |
| 500 | "Attributes can be combined, for example: -perfcounter branch-mispredicts:kh\n" ); |
| 501 | } |
| 502 | |
| 503 | QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default; |
| 504 | |
| 505 | QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() |
| 506 | { |
| 507 | qt_safe_close(fd); |
| 508 | } |
| 509 | |
| 510 | void QBenchmarkPerfEventsMeasurer::init() |
| 511 | { |
| 512 | } |
| 513 | |
| 514 | void QBenchmarkPerfEventsMeasurer::start() |
| 515 | { |
| 516 | |
| 517 | initPerf(); |
| 518 | if (fd == -1) { |
| 519 | // pid == 0 -> attach to the current process |
| 520 | // cpu == -1 -> monitor on all CPUs |
| 521 | // group_fd == -1 -> this is the group leader |
| 522 | // flags == 0 -> reserved, must be zero |
| 523 | fd = perf_event_open(&attr, 0, -1, -1, 0); |
| 524 | if (fd == -1) { |
| 525 | perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open" ); |
| 526 | exit(1); |
| 527 | } else { |
| 528 | ::fcntl(fd, F_SETFD, FD_CLOEXEC); |
| 529 | } |
| 530 | } |
| 531 | |
| 532 | // enable the counter |
| 533 | ::ioctl(fd, PERF_EVENT_IOC_RESET); |
| 534 | ::ioctl(fd, PERF_EVENT_IOC_ENABLE); |
| 535 | } |
| 536 | |
| 537 | qint64 QBenchmarkPerfEventsMeasurer::checkpoint() |
| 538 | { |
| 539 | ::ioctl(fd, PERF_EVENT_IOC_DISABLE); |
| 540 | qint64 value = readValue(); |
| 541 | ::ioctl(fd, PERF_EVENT_IOC_ENABLE); |
| 542 | return value; |
| 543 | } |
| 544 | |
| 545 | qint64 QBenchmarkPerfEventsMeasurer::stop() |
| 546 | { |
| 547 | // disable the counter |
| 548 | ::ioctl(fd, PERF_EVENT_IOC_DISABLE); |
| 549 | return readValue(); |
| 550 | } |
| 551 | |
| 552 | bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64) |
| 553 | { |
| 554 | return true; |
| 555 | } |
| 556 | |
| 557 | int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int) |
| 558 | { |
| 559 | return 1; |
| 560 | } |
| 561 | |
| 562 | int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) |
| 563 | { |
| 564 | return 1; |
| 565 | } |
| 566 | |
| 567 | QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType() |
| 568 | { |
| 569 | return metricForEvent(attr.type, attr.config); |
| 570 | } |
| 571 | |
| 572 | static quint64 rawReadValue(int fd) |
| 573 | { |
| 574 | /* from the kernel docs: |
| 575 | * struct read_format { |
| 576 | * { u64 value; |
| 577 | * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
| 578 | * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING |
| 579 | * { u64 id; } && PERF_FORMAT_ID |
| 580 | * } && !PERF_FORMAT_GROUP |
| 581 | */ |
| 582 | |
| 583 | struct read_format { |
| 584 | quint64 value; |
| 585 | quint64 time_enabled; |
| 586 | quint64 time_running; |
| 587 | } results; |
| 588 | |
| 589 | size_t nread = 0; |
| 590 | while (nread < sizeof results) { |
| 591 | char *ptr = reinterpret_cast<char *>(&results); |
| 592 | qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread); |
| 593 | if (r == -1) { |
| 594 | perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results" ); |
| 595 | exit(1); |
| 596 | } |
| 597 | nread += quint64(r); |
| 598 | } |
| 599 | |
| 600 | if (results.time_running == results.time_enabled) |
| 601 | return results.value; |
| 602 | |
| 603 | // scale the results, though this shouldn't happen! |
| 604 | return results.value * (double(results.time_running) / double(results.time_enabled)); |
| 605 | } |
| 606 | |
| 607 | qint64 QBenchmarkPerfEventsMeasurer::readValue() |
| 608 | { |
| 609 | quint64 raw = rawReadValue(fd); |
| 610 | if (metricType() == QTest::WalltimeMilliseconds) { |
| 611 | // perf returns nanoseconds |
| 612 | return raw / 1000000; |
| 613 | } |
| 614 | return raw; |
| 615 | } |
| 616 | |
| 617 | QT_END_NAMESPACE |
| 618 | |
| 619 | #endif |
| 620 | |