| 1 | /* Profiling of shared libraries. | 
|---|
| 2 | Copyright (C) 1997-2020 Free Software Foundation, Inc. | 
|---|
| 3 | This file is part of the GNU C Library. | 
|---|
| 4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | 
|---|
| 5 | Based on the BSD mcount implementation. | 
|---|
| 6 |  | 
|---|
| 7 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 8 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 9 | License as published by the Free Software Foundation; either | 
|---|
| 10 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 11 |  | 
|---|
| 12 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 15 | Lesser General Public License for more details. | 
|---|
| 16 |  | 
|---|
| 17 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 18 | License along with the GNU C Library; if not, see | 
|---|
| 19 | <https://www.gnu.org/licenses/>.  */ | 
|---|
| 20 |  | 
|---|
| 21 | #include <assert.h> | 
|---|
| 22 | #include <errno.h> | 
|---|
| 23 | #include <fcntl.h> | 
|---|
| 24 | #include <inttypes.h> | 
|---|
| 25 | #include <limits.h> | 
|---|
| 26 | #include <stdio.h> | 
|---|
| 27 | #include <stdlib.h> | 
|---|
| 28 | #include <string.h> | 
|---|
| 29 | #include <unistd.h> | 
|---|
| 30 | #include <stdint.h> | 
|---|
| 31 | #include <ldsodefs.h> | 
|---|
| 32 | #include <sys/gmon.h> | 
|---|
| 33 | #include <sys/gmon_out.h> | 
|---|
| 34 | #include <sys/mman.h> | 
|---|
| 35 | #include <sys/param.h> | 
|---|
| 36 | #include <sys/stat.h> | 
|---|
| 37 | #include <atomic.h> | 
|---|
| 38 | #include <not-cancel.h> | 
|---|
| 39 |  | 
|---|
| 40 | /* The LD_PROFILE feature has to be implemented different to the | 
|---|
| 41 | normal profiling using the gmon/ functions.  The problem is that an | 
|---|
| 42 | arbitrary amount of processes simulataneously can be run using | 
|---|
| 43 | profiling and all write the results in the same file.  To provide | 
|---|
| 44 | this mechanism one could implement a complicated mechanism to merge | 
|---|
| 45 | the content of two profiling runs or one could extend the file | 
|---|
| 46 | format to allow more than one data set.  For the second solution we | 
|---|
| 47 | would have the problem that the file can grow in size beyond any | 
|---|
| 48 | limit and both solutions have the problem that the concurrency of | 
|---|
| 49 | writing the results is a big problem. | 
|---|
| 50 |  | 
|---|
| 51 | Another much simpler method is to use mmap to map the same file in | 
|---|
| 52 | all using programs and modify the data in the mmap'ed area and so | 
|---|
| 53 | also automatically on the disk.  Using the MAP_SHARED option of | 
|---|
| 54 | mmap(2) this can be done without big problems in more than one | 
|---|
| 55 | file. | 
|---|
| 56 |  | 
|---|
| 57 | This approach is very different from the normal profiling.  We have | 
|---|
| 58 | to use the profiling data in exactly the way they are expected to | 
|---|
| 59 | be written to disk.  But the normal format used by gprof is not usable | 
|---|
| 60 | to do this.  It is optimized for size.  It writes the tags as single | 
|---|
| 61 | bytes but this means that the following 32/64 bit values are | 
|---|
| 62 | unaligned. | 
|---|
| 63 |  | 
|---|
| 64 | Therefore we use a new format.  This will look like this | 
|---|
| 65 |  | 
|---|
| 66 | 0  1  2  3	<- byte is 32 bit word | 
|---|
| 67 | 0000				g  m  o  n | 
|---|
| 68 | 0004				*version*	<- GMON_SHOBJ_VERSION | 
|---|
| 69 | 0008				00 00 00 00 | 
|---|
| 70 | 000c				00 00 00 00 | 
|---|
| 71 | 0010				00 00 00 00 | 
|---|
| 72 |  | 
|---|
| 73 | 0014				*tag*		<- GMON_TAG_TIME_HIST | 
|---|
| 74 | 0018				?? ?? ?? ?? | 
|---|
| 75 | ?? ?? ?? ??	<- 32/64 bit LowPC | 
|---|
| 76 | 0018+A				?? ?? ?? ?? | 
|---|
| 77 | ?? ?? ?? ??	<- 32/64 bit HighPC | 
|---|
| 78 | 0018+2*A			*histsize* | 
|---|
| 79 | 001c+2*A			*profrate* | 
|---|
| 80 | 0020+2*A			s  e  c  o | 
|---|
| 81 | 0024+2*A			n  d  s  \0 | 
|---|
| 82 | 0028+2*A			\0 \0 \0 \0 | 
|---|
| 83 | 002c+2*A			\0 \0 \0 | 
|---|
| 84 | 002f+2*A			s | 
|---|
| 85 |  | 
|---|
| 86 | 0030+2*A			?? ?? ?? ??	<- Count data | 
|---|
| 87 | ...				... | 
|---|
| 88 | 0030+2*A+K			?? ?? ?? ?? | 
|---|
| 89 |  | 
|---|
| 90 | 0030+2*A+K			*tag*		<- GMON_TAG_CG_ARC | 
|---|
| 91 | 0034+2*A+K			*lastused* | 
|---|
| 92 | 0038+2*A+K			?? ?? ?? ?? | 
|---|
| 93 | ?? ?? ?? ??	<- FromPC#1 | 
|---|
| 94 | 0038+3*A+K			?? ?? ?? ?? | 
|---|
| 95 | ?? ?? ?? ??	<- ToPC#1 | 
|---|
| 96 | 0038+4*A+K			?? ?? ?? ??	<- Count#1 | 
|---|
| 97 | ...				...		   ... | 
|---|
| 98 | 0038+(2*(CN-1)+2)*A+(CN-1)*4+K	?? ?? ?? ?? | 
|---|
| 99 | ?? ?? ?? ??	<- FromPC#CGN | 
|---|
| 100 | 0038+(2*(CN-1)+3)*A+(CN-1)*4+K	?? ?? ?? ?? | 
|---|
| 101 | ?? ?? ?? ??	<- ToPC#CGN | 
|---|
| 102 | 0038+(2*CN+2)*A+(CN-1)*4+K	?? ?? ?? ??	<- Count#CGN | 
|---|
| 103 |  | 
|---|
| 104 | We put (for now?) no basic block information in the file since this would | 
|---|
| 105 | introduce rase conditions among all the processes who want to write them. | 
|---|
| 106 |  | 
|---|
| 107 | `K' is the number of count entries which is computed as | 
|---|
| 108 |  | 
|---|
| 109 | textsize / HISTFRACTION | 
|---|
| 110 |  | 
|---|
| 111 | `CG' in the above table is the number of call graph arcs.  Normally, | 
|---|
| 112 | the table is sparse and the profiling code writes out only the those | 
|---|
| 113 | entries which are really used in the program run.  But since we must | 
|---|
| 114 | not extend this table (the profiling file) we'll keep them all here. | 
|---|
| 115 | So CN can be executed in advance as | 
|---|
| 116 |  | 
|---|
| 117 | MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS | 
|---|
| 118 |  | 
|---|
| 119 | Now the remaining question is: how to build the data structures we can | 
|---|
| 120 | work with from this data.  We need the from set and must associate the | 
|---|
| 121 | froms with all the associated tos.  We will do this by constructing this | 
|---|
| 122 | data structures at the program start.  To do this we'll simply visit all | 
|---|
| 123 | entries in the call graph table and add it to the appropriate list.  */ | 
|---|
| 124 |  | 
|---|
| 125 | extern int __profile_frequency (void); | 
|---|
| 126 | libc_hidden_proto (__profile_frequency) | 
|---|
| 127 |  | 
|---|
| 128 | /* We define a special type to address the elements of the arc table. | 
|---|
| 129 | This is basically the `gmon_cg_arc_record' format but it includes | 
|---|
| 130 | the room for the tag and it uses real types.  */ | 
|---|
| 131 | struct here_cg_arc_record | 
|---|
| 132 | { | 
|---|
| 133 | uintptr_t from_pc; | 
|---|
| 134 | uintptr_t self_pc; | 
|---|
| 135 | /* The count field is atomically incremented in _dl_mcount, which | 
|---|
| 136 | requires it to be properly aligned for its type, and for this | 
|---|
| 137 | alignment to be visible to the compiler.  The amount of data | 
|---|
| 138 | before an array of this structure is calculated as | 
|---|
| 139 | expected_size in _dl_start_profile.  Everything in that | 
|---|
| 140 | calculation is a multiple of 4 bytes (in the case of | 
|---|
| 141 | kcountsize, because it is derived from a subtraction of | 
|---|
| 142 | page-aligned values, and the corresponding calculation in | 
|---|
| 143 | __monstartup also ensures it is at least a multiple of the size | 
|---|
| 144 | of u_long), so all copies of this field do in fact have the | 
|---|
| 145 | appropriate alignment.  */ | 
|---|
| 146 | uint32_t count __attribute__ ((aligned (__alignof__ (uint32_t)))); | 
|---|
| 147 | } __attribute__ ((packed)); | 
|---|
| 148 |  | 
|---|
| 149 | static struct here_cg_arc_record *data; | 
|---|
| 150 |  | 
|---|
| 151 | /* Nonzero if profiling is under way.  */ | 
|---|
| 152 | static int running; | 
|---|
| 153 |  | 
|---|
| 154 | /* This is the number of entry which have been incorporated in the toset.  */ | 
|---|
| 155 | static uint32_t narcs; | 
|---|
| 156 | /* This is a pointer to the object representing the number of entries | 
|---|
| 157 | currently in the mmaped file.  At no point of time this has to be the | 
|---|
| 158 | same as NARCS.  If it is equal all entries from the file are in our | 
|---|
| 159 | lists.  */ | 
|---|
| 160 | static volatile uint32_t *narcsp; | 
|---|
| 161 |  | 
|---|
| 162 |  | 
|---|
| 163 | struct here_fromstruct | 
|---|
| 164 | { | 
|---|
| 165 | struct here_cg_arc_record volatile *here; | 
|---|
| 166 | uint16_t link; | 
|---|
| 167 | }; | 
|---|
| 168 |  | 
|---|
| 169 | static volatile uint16_t *tos; | 
|---|
| 170 |  | 
|---|
| 171 | static struct here_fromstruct *froms; | 
|---|
| 172 | static uint32_t fromlimit; | 
|---|
| 173 | static volatile uint32_t fromidx; | 
|---|
| 174 |  | 
|---|
| 175 | static uintptr_t lowpc; | 
|---|
| 176 | static size_t textsize; | 
|---|
| 177 | static unsigned int log_hashfraction; | 
|---|
| 178 |  | 
|---|
| 179 |  | 
|---|
| 180 |  | 
|---|
| 181 | /* Set up profiling data to profile object desribed by MAP.  The output | 
|---|
| 182 | file is found (or created) in OUTPUT_DIR.  */ | 
|---|
| 183 | void | 
|---|
| 184 | _dl_start_profile (void) | 
|---|
| 185 | { | 
|---|
| 186 | char *filename; | 
|---|
| 187 | int fd; | 
|---|
| 188 | struct stat64 st; | 
|---|
| 189 | const ElfW(Phdr) *ph; | 
|---|
| 190 | ElfW(Addr) mapstart = ~((ElfW(Addr)) 0); | 
|---|
| 191 | ElfW(Addr) mapend = 0; | 
|---|
| 192 | char *hist, *cp; | 
|---|
| 193 | size_t idx; | 
|---|
| 194 | size_t tossize; | 
|---|
| 195 | size_t fromssize; | 
|---|
| 196 | uintptr_t highpc; | 
|---|
| 197 | uint16_t *kcount; | 
|---|
| 198 | size_t kcountsize; | 
|---|
| 199 | struct gmon_hdr *addr = NULL; | 
|---|
| 200 | off_t expected_size; | 
|---|
| 201 | /* See profil(2) where this is described.  */ | 
|---|
| 202 | int s_scale; | 
|---|
| 203 | #define SCALE_1_TO_1	0x10000L | 
|---|
| 204 | const char *errstr = NULL; | 
|---|
| 205 |  | 
|---|
| 206 | /* Compute the size of the sections which contain program code.  */ | 
|---|
| 207 | for (ph = GL(dl_profile_map)->l_phdr; | 
|---|
| 208 | ph < &GL(dl_profile_map)->l_phdr[GL(dl_profile_map)->l_phnum]; ++ph) | 
|---|
| 209 | if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X)) | 
|---|
| 210 | { | 
|---|
| 211 | ElfW(Addr) start = (ph->p_vaddr & ~(GLRO(dl_pagesize) - 1)); | 
|---|
| 212 | ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + GLRO(dl_pagesize) - 1) | 
|---|
| 213 | & ~(GLRO(dl_pagesize) - 1)); | 
|---|
| 214 |  | 
|---|
| 215 | if (start < mapstart) | 
|---|
| 216 | mapstart = start; | 
|---|
| 217 | if (end > mapend) | 
|---|
| 218 | mapend = end; | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | /* Now we can compute the size of the profiling data.  This is done | 
|---|
| 222 | with the same formulars as in `monstartup' (see gmon.c).  */ | 
|---|
| 223 | running = 0; | 
|---|
| 224 | lowpc = ROUNDDOWN (mapstart + GL(dl_profile_map)->l_addr, | 
|---|
| 225 | HISTFRACTION * sizeof (HISTCOUNTER)); | 
|---|
| 226 | highpc = ROUNDUP (mapend + GL(dl_profile_map)->l_addr, | 
|---|
| 227 | HISTFRACTION * sizeof (HISTCOUNTER)); | 
|---|
| 228 | textsize = highpc - lowpc; | 
|---|
| 229 | kcountsize = textsize / HISTFRACTION; | 
|---|
| 230 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) | 
|---|
| 231 | { | 
|---|
| 232 | /* If HASHFRACTION is a power of two, mcount can use shifting | 
|---|
| 233 | instead of integer division.  Precompute shift amount. | 
|---|
| 234 |  | 
|---|
| 235 | This is a constant but the compiler cannot compile the | 
|---|
| 236 | expression away since the __ffs implementation is not known | 
|---|
| 237 | to the compiler.  Help the compiler by precomputing the | 
|---|
| 238 | usual cases.  */ | 
|---|
| 239 | assert (HASHFRACTION == 2); | 
|---|
| 240 |  | 
|---|
| 241 | if (sizeof (*froms) == 8) | 
|---|
| 242 | log_hashfraction = 4; | 
|---|
| 243 | else if (sizeof (*froms) == 16) | 
|---|
| 244 | log_hashfraction = 5; | 
|---|
| 245 | else | 
|---|
| 246 | log_hashfraction = __ffs (HASHFRACTION * sizeof (*froms)) - 1; | 
|---|
| 247 | } | 
|---|
| 248 | else | 
|---|
| 249 | log_hashfraction = -1; | 
|---|
| 250 | tossize = textsize / HASHFRACTION; | 
|---|
| 251 | fromlimit = textsize * ARCDENSITY / 100; | 
|---|
| 252 | if (fromlimit < MINARCS) | 
|---|
| 253 | fromlimit = MINARCS; | 
|---|
| 254 | if (fromlimit > MAXARCS) | 
|---|
| 255 | fromlimit = MAXARCS; | 
|---|
| 256 | fromssize = fromlimit * sizeof (struct here_fromstruct); | 
|---|
| 257 |  | 
|---|
| 258 | expected_size = (sizeof (struct gmon_hdr) | 
|---|
| 259 | + 4 + sizeof (struct gmon_hist_hdr) + kcountsize | 
|---|
| 260 | + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record)); | 
|---|
| 261 |  | 
|---|
| 262 | /* Create the gmon_hdr we expect or write.  */ | 
|---|
| 263 | struct real_gmon_hdr | 
|---|
| 264 | { | 
|---|
| 265 | char cookie[4]; | 
|---|
| 266 | int32_t version; | 
|---|
| 267 | char spare[3 * 4]; | 
|---|
| 268 | } gmon_hdr; | 
|---|
| 269 | if (sizeof (gmon_hdr) != sizeof (struct gmon_hdr) | 
|---|
| 270 | || (offsetof (struct real_gmon_hdr, cookie) | 
|---|
| 271 | != offsetof (struct gmon_hdr, cookie)) | 
|---|
| 272 | || (offsetof (struct real_gmon_hdr, version) | 
|---|
| 273 | != offsetof (struct gmon_hdr, version))) | 
|---|
| 274 | abort (); | 
|---|
| 275 |  | 
|---|
| 276 | memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie)); | 
|---|
| 277 | gmon_hdr.version = GMON_SHOBJ_VERSION; | 
|---|
| 278 | memset (gmon_hdr.spare, '\0', sizeof (gmon_hdr.spare)); | 
|---|
| 279 |  | 
|---|
| 280 | /* Create the hist_hdr we expect or write.  */ | 
|---|
| 281 | struct real_gmon_hist_hdr | 
|---|
| 282 | { | 
|---|
| 283 | char *low_pc; | 
|---|
| 284 | char *high_pc; | 
|---|
| 285 | int32_t hist_size; | 
|---|
| 286 | int32_t prof_rate; | 
|---|
| 287 | char dimen[15]; | 
|---|
| 288 | char dimen_abbrev; | 
|---|
| 289 | } hist_hdr; | 
|---|
| 290 | if (sizeof (hist_hdr) != sizeof (struct gmon_hist_hdr) | 
|---|
| 291 | || (offsetof (struct real_gmon_hist_hdr, low_pc) | 
|---|
| 292 | != offsetof (struct gmon_hist_hdr, low_pc)) | 
|---|
| 293 | || (offsetof (struct real_gmon_hist_hdr, high_pc) | 
|---|
| 294 | != offsetof (struct gmon_hist_hdr, high_pc)) | 
|---|
| 295 | || (offsetof (struct real_gmon_hist_hdr, hist_size) | 
|---|
| 296 | != offsetof (struct gmon_hist_hdr, hist_size)) | 
|---|
| 297 | || (offsetof (struct real_gmon_hist_hdr, prof_rate) | 
|---|
| 298 | != offsetof (struct gmon_hist_hdr, prof_rate)) | 
|---|
| 299 | || (offsetof (struct real_gmon_hist_hdr, dimen) | 
|---|
| 300 | != offsetof (struct gmon_hist_hdr, dimen)) | 
|---|
| 301 | || (offsetof (struct real_gmon_hist_hdr, dimen_abbrev) | 
|---|
| 302 | != offsetof (struct gmon_hist_hdr, dimen_abbrev))) | 
|---|
| 303 | abort (); | 
|---|
| 304 |  | 
|---|
| 305 | hist_hdr.low_pc = (char *) mapstart; | 
|---|
| 306 | hist_hdr.high_pc = (char *) mapend; | 
|---|
| 307 | hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER); | 
|---|
| 308 | hist_hdr.prof_rate = __profile_frequency (); | 
|---|
| 309 | if (sizeof (hist_hdr.dimen) >= sizeof ( "seconds")) | 
|---|
| 310 | { | 
|---|
| 311 | memcpy (hist_hdr.dimen, "seconds", sizeof ( "seconds")); | 
|---|
| 312 | memset (hist_hdr.dimen + sizeof ( "seconds"), '\0', | 
|---|
| 313 | sizeof (hist_hdr.dimen) - sizeof ( "seconds")); | 
|---|
| 314 | } | 
|---|
| 315 | else | 
|---|
| 316 | strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); | 
|---|
| 317 | hist_hdr.dimen_abbrev = 's'; | 
|---|
| 318 |  | 
|---|
| 319 | /* First determine the output name.  We write in the directory | 
|---|
| 320 | OUTPUT_DIR and the name is composed from the shared objects | 
|---|
| 321 | soname (or the file name) and the ending ".profile".  */ | 
|---|
| 322 | filename = (char *) alloca (strlen (GLRO(dl_profile_output)) + 1 | 
|---|
| 323 | + strlen (GLRO(dl_profile)) + sizeof ".profile"); | 
|---|
| 324 | cp = __stpcpy (filename, GLRO(dl_profile_output)); | 
|---|
| 325 | *cp++ = '/'; | 
|---|
| 326 | __stpcpy (__stpcpy (cp, GLRO(dl_profile)), ".profile"); | 
|---|
| 327 |  | 
|---|
| 328 | fd = __open64_nocancel (filename, O_RDWR|O_CREAT|O_NOFOLLOW, DEFFILEMODE); | 
|---|
| 329 | if (fd == -1) | 
|---|
| 330 | { | 
|---|
| 331 | char buf[400]; | 
|---|
| 332 | int errnum; | 
|---|
| 333 |  | 
|---|
| 334 | /* We cannot write the profiling data so don't do anything.  */ | 
|---|
| 335 | errstr = "%s: cannot open file: %s\n"; | 
|---|
| 336 | print_error: | 
|---|
| 337 | errnum = errno; | 
|---|
| 338 | if (fd != -1) | 
|---|
| 339 | __close_nocancel (fd); | 
|---|
| 340 | _dl_error_printf (errstr, filename, | 
|---|
| 341 | __strerror_r (errnum, buf, sizeof buf)); | 
|---|
| 342 | return; | 
|---|
| 343 | } | 
|---|
| 344 |  | 
|---|
| 345 | if (__fxstat64 (_STAT_VER, fd, &st) < 0 || !S_ISREG (st.st_mode)) | 
|---|
| 346 | { | 
|---|
| 347 | /* Not stat'able or not a regular file => don't use it.  */ | 
|---|
| 348 | errstr = "%s: cannot stat file: %s\n"; | 
|---|
| 349 | goto print_error; | 
|---|
| 350 | } | 
|---|
| 351 |  | 
|---|
| 352 | /* Test the size.  If it does not match what we expect from the size | 
|---|
| 353 | values in the map MAP we don't use it and warn the user.  */ | 
|---|
| 354 | if (st.st_size == 0) | 
|---|
| 355 | { | 
|---|
| 356 | /* We have to create the file.  */ | 
|---|
| 357 | char buf[GLRO(dl_pagesize)]; | 
|---|
| 358 |  | 
|---|
| 359 | memset (buf, '\0', GLRO(dl_pagesize)); | 
|---|
| 360 |  | 
|---|
| 361 | if (__lseek (fd, expected_size & ~(GLRO(dl_pagesize) - 1), SEEK_SET) == -1) | 
|---|
| 362 | { | 
|---|
| 363 | cannot_create: | 
|---|
| 364 | errstr = "%s: cannot create file: %s\n"; | 
|---|
| 365 | goto print_error; | 
|---|
| 366 | } | 
|---|
| 367 |  | 
|---|
| 368 | if (TEMP_FAILURE_RETRY | 
|---|
| 369 | (__write_nocancel (fd, buf, (expected_size & (GLRO(dl_pagesize) - 1)))) | 
|---|
| 370 | < 0) | 
|---|
| 371 | goto cannot_create; | 
|---|
| 372 | } | 
|---|
| 373 | else if (st.st_size != expected_size) | 
|---|
| 374 | { | 
|---|
| 375 | __close_nocancel (fd); | 
|---|
| 376 | wrong_format: | 
|---|
| 377 |  | 
|---|
| 378 | if (addr != NULL) | 
|---|
| 379 | __munmap ((void *) addr, expected_size); | 
|---|
| 380 |  | 
|---|
| 381 | _dl_error_printf ( "%s: file is no correct profile data file for `%s'\n", | 
|---|
| 382 | filename, GLRO(dl_profile)); | 
|---|
| 383 | return; | 
|---|
| 384 | } | 
|---|
| 385 |  | 
|---|
| 386 | addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE, | 
|---|
| 387 | MAP_SHARED|MAP_FILE, fd, 0); | 
|---|
| 388 | if (addr == (struct gmon_hdr *) MAP_FAILED) | 
|---|
| 389 | { | 
|---|
| 390 | errstr = "%s: cannot map file: %s\n"; | 
|---|
| 391 | goto print_error; | 
|---|
| 392 | } | 
|---|
| 393 |  | 
|---|
| 394 | /* We don't need the file descriptor anymore.  */ | 
|---|
| 395 | __close_nocancel (fd); | 
|---|
| 396 |  | 
|---|
| 397 | /* Pointer to data after the header.  */ | 
|---|
| 398 | hist = (char *) (addr + 1); | 
|---|
| 399 | kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t) | 
|---|
| 400 | + sizeof (struct gmon_hist_hdr)); | 
|---|
| 401 |  | 
|---|
| 402 | /* Compute pointer to array of the arc information.  */ | 
|---|
| 403 | narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t)); | 
|---|
| 404 | data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t)); | 
|---|
| 405 |  | 
|---|
| 406 | if (st.st_size == 0) | 
|---|
| 407 | { | 
|---|
| 408 | /* Create the signature.  */ | 
|---|
| 409 | memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr)); | 
|---|
| 410 |  | 
|---|
| 411 | *(uint32_t *) hist = GMON_TAG_TIME_HIST; | 
|---|
| 412 | memcpy (hist + sizeof (uint32_t), &hist_hdr, | 
|---|
| 413 | sizeof (struct gmon_hist_hdr)); | 
|---|
| 414 |  | 
|---|
| 415 | narcsp[-1] = GMON_TAG_CG_ARC; | 
|---|
| 416 | } | 
|---|
| 417 | else | 
|---|
| 418 | { | 
|---|
| 419 | /* Test the signature in the file.  */ | 
|---|
| 420 | if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 | 
|---|
| 421 | || *(uint32_t *) hist != GMON_TAG_TIME_HIST | 
|---|
| 422 | || memcmp (hist + sizeof (uint32_t), &hist_hdr, | 
|---|
| 423 | sizeof (struct gmon_hist_hdr)) != 0 | 
|---|
| 424 | || narcsp[-1] != GMON_TAG_CG_ARC) | 
|---|
| 425 | goto wrong_format; | 
|---|
| 426 | } | 
|---|
| 427 |  | 
|---|
| 428 | /* Allocate memory for the froms data and the pointer to the tos records.  */ | 
|---|
| 429 | tos = (uint16_t *) calloc (tossize + fromssize, 1); | 
|---|
| 430 | if (tos == NULL) | 
|---|
| 431 | { | 
|---|
| 432 | __munmap ((void *) addr, expected_size); | 
|---|
| 433 | _dl_fatal_printf ( "Out of memory while initializing profiler\n"); | 
|---|
| 434 | /* NOTREACHED */ | 
|---|
| 435 | } | 
|---|
| 436 |  | 
|---|
| 437 | froms = (struct here_fromstruct *) ((char *) tos + tossize); | 
|---|
| 438 | fromidx = 0; | 
|---|
| 439 |  | 
|---|
| 440 | /* Now we have to process all the arc count entries.  BTW: it is | 
|---|
| 441 | not critical whether the *NARCSP value changes meanwhile.  Before | 
|---|
| 442 | we enter a new entry in to toset we will check that everything is | 
|---|
| 443 | available in TOS.  This happens in _dl_mcount. | 
|---|
| 444 |  | 
|---|
| 445 | Loading the entries in reverse order should help to get the most | 
|---|
| 446 | frequently used entries at the front of the list.  */ | 
|---|
| 447 | for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; ) | 
|---|
| 448 | { | 
|---|
| 449 | size_t to_index; | 
|---|
| 450 | size_t newfromidx; | 
|---|
| 451 | --idx; | 
|---|
| 452 | to_index = (data[idx].self_pc / (HASHFRACTION * sizeof (*tos))); | 
|---|
| 453 | newfromidx = fromidx++; | 
|---|
| 454 | froms[newfromidx].here = &data[idx]; | 
|---|
| 455 | froms[newfromidx].link = tos[to_index]; | 
|---|
| 456 | tos[to_index] = newfromidx; | 
|---|
| 457 | } | 
|---|
| 458 |  | 
|---|
| 459 | /* Setup counting data.  */ | 
|---|
| 460 | if (kcountsize < highpc - lowpc) | 
|---|
| 461 | { | 
|---|
| 462 | #if 0 | 
|---|
| 463 | s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1; | 
|---|
| 464 | #else | 
|---|
| 465 | size_t range = highpc - lowpc; | 
|---|
| 466 | size_t quot = range / kcountsize; | 
|---|
| 467 |  | 
|---|
| 468 | if (quot >= SCALE_1_TO_1) | 
|---|
| 469 | s_scale = 1; | 
|---|
| 470 | else if (quot >= SCALE_1_TO_1 / 256) | 
|---|
| 471 | s_scale = SCALE_1_TO_1 / quot; | 
|---|
| 472 | else if (range > ULONG_MAX / 256) | 
|---|
| 473 | s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256)); | 
|---|
| 474 | else | 
|---|
| 475 | s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize); | 
|---|
| 476 | #endif | 
|---|
| 477 | } | 
|---|
| 478 | else | 
|---|
| 479 | s_scale = SCALE_1_TO_1; | 
|---|
| 480 |  | 
|---|
| 481 | /* Start the profiler.  */ | 
|---|
| 482 | __profil ((void *) kcount, kcountsize, lowpc, s_scale); | 
|---|
| 483 |  | 
|---|
| 484 | /* Turn on profiling.  */ | 
|---|
| 485 | running = 1; | 
|---|
| 486 | } | 
|---|
| 487 |  | 
|---|
| 488 |  | 
|---|
| 489 | void | 
|---|
| 490 | _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) | 
|---|
| 491 | { | 
|---|
| 492 | volatile uint16_t *topcindex; | 
|---|
| 493 | size_t i, fromindex; | 
|---|
| 494 | struct here_fromstruct *fromp; | 
|---|
| 495 |  | 
|---|
| 496 | if (! running) | 
|---|
| 497 | return; | 
|---|
| 498 |  | 
|---|
| 499 | /* Compute relative addresses.  The shared object can be loaded at | 
|---|
| 500 | any address.  The value of frompc could be anything.  We cannot | 
|---|
| 501 | restrict it in any way, just set to a fixed value (0) in case it | 
|---|
| 502 | is outside the allowed range.  These calls show up as calls from | 
|---|
| 503 | <external> in the gprof output.  */ | 
|---|
| 504 | frompc -= lowpc; | 
|---|
| 505 | if (frompc >= textsize) | 
|---|
| 506 | frompc = 0; | 
|---|
| 507 | selfpc -= lowpc; | 
|---|
| 508 | if (selfpc >= textsize) | 
|---|
| 509 | goto done; | 
|---|
| 510 |  | 
|---|
| 511 | /* Getting here we now have to find out whether the location was | 
|---|
| 512 | already used.  If yes we are lucky and only have to increment a | 
|---|
| 513 | counter (this also has to be atomic).  If the entry is new things | 
|---|
| 514 | are getting complicated...  */ | 
|---|
| 515 |  | 
|---|
| 516 | /* Avoid integer divide if possible.  */ | 
|---|
| 517 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) | 
|---|
| 518 | i = selfpc >> log_hashfraction; | 
|---|
| 519 | else | 
|---|
| 520 | i = selfpc / (HASHFRACTION * sizeof (*tos)); | 
|---|
| 521 |  | 
|---|
| 522 | topcindex = &tos[i]; | 
|---|
| 523 | fromindex = *topcindex; | 
|---|
| 524 |  | 
|---|
| 525 | if (fromindex == 0) | 
|---|
| 526 | goto check_new_or_add; | 
|---|
| 527 |  | 
|---|
| 528 | fromp = &froms[fromindex]; | 
|---|
| 529 |  | 
|---|
| 530 | /* We have to look through the chain of arcs whether there is already | 
|---|
| 531 | an entry for our arc.  */ | 
|---|
| 532 | while (fromp->here->from_pc != frompc) | 
|---|
| 533 | { | 
|---|
| 534 | if (fromp->link != 0) | 
|---|
| 535 | do | 
|---|
| 536 | fromp = &froms[fromp->link]; | 
|---|
| 537 | while (fromp->link != 0 && fromp->here->from_pc != frompc); | 
|---|
| 538 |  | 
|---|
| 539 | if (fromp->here->from_pc != frompc) | 
|---|
| 540 | { | 
|---|
| 541 | topcindex = &fromp->link; | 
|---|
| 542 |  | 
|---|
| 543 | check_new_or_add: | 
|---|
| 544 | /* Our entry is not among the entries we read so far from the | 
|---|
| 545 | data file.  Now see whether we have to update the list.  */ | 
|---|
| 546 | while (narcs != *narcsp && narcs < fromlimit) | 
|---|
| 547 | { | 
|---|
| 548 | size_t to_index; | 
|---|
| 549 | size_t newfromidx; | 
|---|
| 550 | to_index = (data[narcs].self_pc | 
|---|
| 551 | / (HASHFRACTION * sizeof (*tos))); | 
|---|
| 552 | newfromidx = catomic_exchange_and_add (&fromidx, 1) + 1; | 
|---|
| 553 | froms[newfromidx].here = &data[narcs]; | 
|---|
| 554 | froms[newfromidx].link = tos[to_index]; | 
|---|
| 555 | tos[to_index] = newfromidx; | 
|---|
| 556 | catomic_increment (&narcs); | 
|---|
| 557 | } | 
|---|
| 558 |  | 
|---|
| 559 | /* If we still have no entry stop searching and insert.  */ | 
|---|
| 560 | if (*topcindex == 0) | 
|---|
| 561 | { | 
|---|
| 562 | uint_fast32_t newarc = catomic_exchange_and_add (narcsp, 1); | 
|---|
| 563 |  | 
|---|
| 564 | /* In rare cases it could happen that all entries in FROMS are | 
|---|
| 565 | occupied.  So we cannot count this anymore.  */ | 
|---|
| 566 | if (newarc >= fromlimit) | 
|---|
| 567 | goto done; | 
|---|
| 568 |  | 
|---|
| 569 | *topcindex = catomic_exchange_and_add (&fromidx, 1) + 1; | 
|---|
| 570 | fromp = &froms[*topcindex]; | 
|---|
| 571 |  | 
|---|
| 572 | fromp->here = &data[newarc]; | 
|---|
| 573 | data[newarc].from_pc = frompc; | 
|---|
| 574 | data[newarc].self_pc = selfpc; | 
|---|
| 575 | data[newarc].count = 0; | 
|---|
| 576 | fromp->link = 0; | 
|---|
| 577 | catomic_increment (&narcs); | 
|---|
| 578 |  | 
|---|
| 579 | break; | 
|---|
| 580 | } | 
|---|
| 581 |  | 
|---|
| 582 | fromp = &froms[*topcindex]; | 
|---|
| 583 | } | 
|---|
| 584 | else | 
|---|
| 585 | /* Found in.  */ | 
|---|
| 586 | break; | 
|---|
| 587 | } | 
|---|
| 588 |  | 
|---|
| 589 | /* Increment the counter.  */ | 
|---|
| 590 | catomic_increment (&fromp->here->count); | 
|---|
| 591 |  | 
|---|
| 592 | done: | 
|---|
| 593 | ; | 
|---|
| 594 | } | 
|---|
| 595 | rtld_hidden_def (_dl_mcount) | 
|---|
| 596 |  | 
|---|