1//
2// Copyright 2017 The Abseil Authors.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// https://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// -----------------------------------------------------------------------------
17// File: optimization.h
18// -----------------------------------------------------------------------------
19//
20// This header file defines portable macros for performance optimization.
21
22#ifndef ABSL_BASE_OPTIMIZATION_H_
23#define ABSL_BASE_OPTIMIZATION_H_
24
25#include "absl/base/config.h"
26
27// ABSL_BLOCK_TAIL_CALL_OPTIMIZATION
28//
29// Instructs the compiler to avoid optimizing tail-call recursion. Use of this
30// macro is useful when you wish to preserve the existing function order within
31// a stack trace for logging, debugging, or profiling purposes.
32//
33// Example:
34//
35// int f() {
36// int result = g();
37// ABSL_BLOCK_TAIL_CALL_OPTIMIZATION();
38// return result;
39// }
40#if defined(__pnacl__)
41#define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; }
42#elif defined(__clang__)
43// Clang will not tail call given inline volatile assembly.
44#define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("")
45#elif defined(__GNUC__)
46// GCC will not tail call given inline volatile assembly.
47#define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("")
48#elif defined(_MSC_VER)
49#include <intrin.h>
50// The __nop() intrinsic blocks the optimisation.
51#define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __nop()
52#else
53#define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; }
54#endif
55
56// ABSL_CACHELINE_SIZE
57//
58// Explicitly defines the size of the L1 cache for purposes of alignment.
59// Setting the cacheline size allows you to specify that certain objects be
60// aligned on a cacheline boundary with `ABSL_CACHELINE_ALIGNED` declarations.
61// (See below.)
62//
63// NOTE: this macro should be replaced with the following C++17 features, when
64// those are generally available:
65//
66// * `std::hardware_constructive_interference_size`
67// * `std::hardware_destructive_interference_size`
68//
69// See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html
70// for more information.
71#if defined(__GNUC__)
72// Cache line alignment
73#if defined(__i386__) || defined(__x86_64__)
74#define ABSL_CACHELINE_SIZE 64
75#elif defined(__powerpc64__)
76#define ABSL_CACHELINE_SIZE 128
77#elif defined(__aarch64__)
78// We would need to read special register ctr_el0 to find out L1 dcache size.
79// This value is a good estimate based on a real aarch64 machine.
80#define ABSL_CACHELINE_SIZE 64
81#elif defined(__arm__)
82// Cache line sizes for ARM: These values are not strictly correct since
83// cache line sizes depend on implementations, not architectures. There
84// are even implementations with cache line sizes configurable at boot
85// time.
86#if defined(__ARM_ARCH_5T__)
87#define ABSL_CACHELINE_SIZE 32
88#elif defined(__ARM_ARCH_7A__)
89#define ABSL_CACHELINE_SIZE 64
90#endif
91#endif
92
93#ifndef ABSL_CACHELINE_SIZE
94// A reasonable default guess. Note that overestimates tend to waste more
95// space, while underestimates tend to waste more time.
96#define ABSL_CACHELINE_SIZE 64
97#endif
98
99// ABSL_CACHELINE_ALIGNED
100//
101// Indicates that the declared object be cache aligned using
102// `ABSL_CACHELINE_SIZE` (see above). Cacheline aligning objects allows you to
103// load a set of related objects in the L1 cache for performance improvements.
104// Cacheline aligning objects properly allows constructive memory sharing and
105// prevents destructive (or "false") memory sharing.
106//
107// NOTE: this macro should be replaced with usage of `alignas()` using
108// `std::hardware_constructive_interference_size` and/or
109// `std::hardware_destructive_interference_size` when available within C++17.
110//
111// See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html
112// for more information.
113//
114// On some compilers, `ABSL_CACHELINE_ALIGNED` expands to an `__attribute__`
115// or `__declspec` attribute. For compilers where this is not known to work,
116// the macro expands to nothing.
117//
118// No further guarantees are made here. The result of applying the macro
119// to variables and types is always implementation-defined.
120//
121// WARNING: It is easy to use this attribute incorrectly, even to the point
122// of causing bugs that are difficult to diagnose, crash, etc. It does not
123// of itself guarantee that objects are aligned to a cache line.
124//
125// NOTE: Some compilers are picky about the locations of annotations such as
126// this attribute, so prefer to put it at the beginning of your declaration.
127// For example,
128//
129// ABSL_CACHELINE_ALIGNED static Foo* foo = ...
130//
131// class ABSL_CACHELINE_ALIGNED Bar { ...
132//
133// Recommendations:
134//
135// 1) Consult compiler documentation; this comment is not kept in sync as
136// toolchains evolve.
137// 2) Verify your use has the intended effect. This often requires inspecting
138// the generated machine code.
139// 3) Prefer applying this attribute to individual variables. Avoid
140// applying it to types. This tends to localize the effect.
141#define ABSL_CACHELINE_ALIGNED __attribute__((aligned(ABSL_CACHELINE_SIZE)))
142#elif defined(_MSC_VER)
143#define ABSL_CACHELINE_SIZE 64
144#define ABSL_CACHELINE_ALIGNED __declspec(align(ABSL_CACHELINE_SIZE))
145#else
146#define ABSL_CACHELINE_SIZE 64
147#define ABSL_CACHELINE_ALIGNED
148#endif
149
150// ABSL_PREDICT_TRUE, ABSL_PREDICT_FALSE
151//
152// Enables the compiler to prioritize compilation using static analysis for
153// likely paths within a boolean branch.
154//
155// Example:
156//
157// if (ABSL_PREDICT_TRUE(expression)) {
158// return result; // Faster if more likely
159// } else {
160// return 0;
161// }
162//
163// Compilers can use the information that a certain branch is not likely to be
164// taken (for instance, a CHECK failure) to optimize for the common case in
165// the absence of better information (ie. compiling gcc with `-fprofile-arcs`).
166//
167// Recommendation: Modern CPUs dynamically predict branch execution paths,
168// typically with accuracy greater than 97%. As a result, annotating every
169// branch in a codebase is likely counterproductive; however, annotating
170// specific branches that are both hot and consistently mispredicted is likely
171// to yield performance improvements.
172#if ABSL_HAVE_BUILTIN(__builtin_expect) || \
173 (defined(__GNUC__) && !defined(__clang__))
174#define ABSL_PREDICT_FALSE(x) (__builtin_expect(x, 0))
175#define ABSL_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
176#else
177#define ABSL_PREDICT_FALSE(x) (x)
178#define ABSL_PREDICT_TRUE(x) (x)
179#endif
180
181#endif // ABSL_BASE_OPTIMIZATION_H_
182