1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39#include <portability/toku_config.h>
40
41#include <stdio.h>
42#include <stdlib.h>
43#include <stdint.h>
44#include <unistd.h>
45#include <string.h>
46#include <fcntl.h>
47#include <errno.h>
48#include <toku_assert.h>
49#if defined(HAVE_MALLOC_H)
50# include <malloc.h>
51#elif defined(HAVE_SYS_MALLOC_H)
52# include <sys/malloc.h>
53#endif
54#include <sys/types.h>
55#include <sys/stat.h>
56#include <sys/file.h>
57#if defined(HAVE_SYSCALL_H)
58# include <syscall.h>
59#endif
60#if defined(HAVE_SYS_SYSCALL_H)
61# include <sys/syscall.h>
62#endif
63#if defined(HAVE_SYS_SYSCTL_H)
64# include <sys/sysctl.h>
65#endif
66#if defined(HAVE_PTHREAD_H)
67# include <pthread.h>
68#endif
69#if defined(HAVE_PTHREAD_NP_H)
70# include <pthread_np.h>
71#endif
72#include <inttypes.h>
73#include <sys/time.h>
74#if defined(HAVE_SYS_RESOURCE_H)
75# include <sys/resource.h>
76#endif
77#include <sys/statvfs.h>
78#include "toku_portability.h"
79#include "toku_os.h"
80#include "toku_time.h"
81#include "memory.h"
82
83#include "toku_instrumentation.h"
84
85#include <portability/toku_atomic.h>
86#include <util/partitioned_counter.h>
87
88int
89toku_portability_init(void) {
90 int r = toku_memory_startup();
91 if (r == 0) {
92 uint64_t hz;
93 r = toku_os_get_processor_frequency(&hz); // get and cache freq
94 }
95 (void) toku_os_get_pagesize(); // get and cache pagesize
96 return r;
97}
98
99void
100toku_portability_destroy(void) {
101 toku_memory_shutdown();
102}
103
104int
105toku_os_getpid(void) {
106 return getpid();
107}
108
109int
110toku_os_gettid(void) {
111#if defined(HAVE_PTHREAD_THREADID_NP)
112 uint64_t result;
113 pthread_threadid_np(NULL, &result);
114 return (int) result; // Used for instrumentation so overflow is ok here.
115#elif defined(__NR_gettid)
116 return syscall(__NR_gettid);
117#elif defined(SYS_gettid)
118 return syscall(SYS_gettid);
119#elif defined(HAVE_PTHREAD_GETTHREADID_NP)
120 return pthread_getthreadid_np();
121#else
122# error "no implementation of gettid available"
123#endif
124}
125
126int
127toku_os_get_number_processors(void) {
128 return sysconf(_SC_NPROCESSORS_CONF);
129}
130
131int
132toku_os_get_number_active_processors(void) {
133 int n = sysconf(_SC_NPROCESSORS_ONLN);
134#define DO_TOKU_NCPUS 1
135#if DO_TOKU_NCPUS
136 {
137 char *toku_ncpus = getenv("TOKU_NCPUS");
138 if (toku_ncpus) {
139 int ncpus = atoi(toku_ncpus);
140 if (ncpus < n)
141 n = ncpus;
142 }
143 }
144#endif
145 return n;
146}
147
148int toku_cached_pagesize = 0;
149
150int
151toku_os_get_pagesize(void) {
152 int pagesize = toku_cached_pagesize;
153 if (pagesize == 0) {
154 pagesize = sysconf(_SC_PAGESIZE);
155 if (pagesize) {
156 toku_cached_pagesize = pagesize;
157 }
158 }
159 return pagesize;
160}
161
162uint64_t
163toku_os_get_phys_memory_size(void) {
164#if defined(_SC_PHYS_PAGES)
165 uint64_t npages = sysconf(_SC_PHYS_PAGES);
166 uint64_t pagesize = sysconf(_SC_PAGESIZE);
167 return npages*pagesize;
168#elif defined(HAVE_SYS_SYSCTL_H)
169 uint64_t memsize;
170 size_t len = sizeof memsize;
171 sysctlbyname("hw.memsize", &memsize, &len, NULL, 0);
172 return memsize;
173#else
174# error "cannot find _SC_PHYS_PAGES or sysctlbyname()"
175#endif
176}
177
178int toku_os_get_file_size_with_source_location(int fildes,
179 int64_t *fsize,
180 const char *src_file,
181 uint src_line) {
182 toku_struct_stat sbuf;
183
184 toku_io_instrumentation io_annotation;
185 toku_instr_file_io_begin(io_annotation,
186 toku_instr_file_op::file_stat,
187 fildes,
188 0,
189 src_file,
190 src_line);
191
192 int r = fstat(fildes, &sbuf);
193 if (r == 0) {
194 *fsize = sbuf.st_size;
195 }
196 toku_instr_file_io_end(io_annotation, 0);
197
198 return r;
199}
200
201int
202toku_os_get_unique_file_id(int fildes, struct fileid *id) {
203 toku_struct_stat statbuf;
204 memset(id, 0, sizeof(*id));
205 int r=fstat(fildes, &statbuf);
206 if (r==0) {
207 id->st_dev = statbuf.st_dev;
208 id->st_ino = statbuf.st_ino;
209 }
210 return r;
211}
212
213int
214toku_os_lock_file(const char *name) {
215 int r;
216 int fd = open(name, O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
217 if (fd>=0) {
218 r = flock(fd, LOCK_EX | LOCK_NB);
219 if (r!=0) {
220 r = errno; //Save errno from flock.
221 close(fd);
222 fd = -1; //Disable fd.
223 errno = r;
224 }
225 }
226 return fd;
227}
228
229int
230toku_os_unlock_file(int fildes) {
231 int r = flock(fildes, LOCK_UN);
232 if (r==0) r = close(fildes);
233 return r;
234}
235
236int
237toku_os_mkdir(const char *pathname, mode_t mode) {
238 int r = mkdir(pathname, mode);
239 return r;
240}
241
242int
243toku_os_get_process_times(struct timeval *usertime, struct timeval *kerneltime) {
244 int r;
245 struct rusage rusage;
246 r = getrusage(RUSAGE_SELF, &rusage);
247 if (r == -1)
248 return get_error_errno();
249 if (usertime)
250 *usertime = rusage.ru_utime;
251 if (kerneltime)
252 *kerneltime = rusage.ru_stime;
253 return 0;
254}
255
256int
257toku_os_initialize_settings(int UU(verbosity)) {
258 int r = 0;
259 static int initialized = 0;
260 assert(initialized==0);
261 initialized=1;
262 return r;
263}
264
265bool toku_os_is_absolute_name(const char* path) {
266 return path[0] == '/';
267}
268
269int
270toku_os_get_max_process_data_size(uint64_t *maxdata) {
271 int r;
272 struct rlimit rlimit;
273
274 r = getrlimit(RLIMIT_DATA, &rlimit);
275 if (r == 0) {
276 uint64_t d;
277 d = rlimit.rlim_max;
278 // with the "right" macros defined, the rlimit is a 64 bit number on a
279 // 32 bit system. getrlimit returns 2**64-1 which is clearly wrong.
280
281 // for 32 bit processes, we assume that 1/2 of the address space is
282 // used for mapping the kernel. this may be pessimistic.
283 if (sizeof (void *) == 4 && d > (1ULL << 31))
284 d = 1ULL << 31;
285 *maxdata = d;
286 } else
287 r = get_error_errno();
288 return r;
289}
290
291int toku_stat_with_source_location(const char *name,
292 toku_struct_stat *buf,
293 const toku_instr_key &instr_key,
294 const char *src_file,
295 uint src_line) {
296 toku_io_instrumentation io_annotation;
297 toku_instr_file_name_io_begin(io_annotation,
298 instr_key,
299 toku_instr_file_op::file_stat,
300 name,
301 0,
302 src_file,
303 src_line);
304 int r = stat(name, buf);
305
306 toku_instr_file_io_end(io_annotation, 0);
307 return r;
308}
309
310int toku_os_fstat_with_source_location(int fd,
311 toku_struct_stat *buf,
312 const char *src_file,
313 uint src_line) {
314 toku_io_instrumentation io_annotation;
315 toku_instr_file_io_begin(io_annotation,
316 toku_instr_file_op::file_stat,
317 fd,
318 0,
319 src_file,
320 src_line);
321
322 int r = fstat(fd, buf);
323 toku_instr_file_io_end(io_annotation, 0);
324 return r;
325}
326
327static int
328toku_get_processor_frequency_sys(uint64_t *hzret) {
329 int r;
330 FILE *fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
331 if (!fp)
332 r = get_error_errno();
333 else {
334 unsigned int khz = 0;
335 if (fscanf(fp, "%u", &khz) == 1) {
336 *hzret = khz * 1000ULL;
337 r = 0;
338 } else
339 r = ENOENT;
340 fclose(fp);
341 }
342 return r;
343}
344
345static int
346toku_get_processor_frequency_cpuinfo(uint64_t *hzret) {
347 int r;
348 FILE *fp = fopen("/proc/cpuinfo", "r");
349 if (!fp) {
350 r = get_error_errno();
351 } else {
352 uint64_t maxhz = 0;
353 /*
354 Some lines in the "/proc/cpuinfo" output can be long, e.g.:
355 "flags : fpu vme de pse tsc ms .... smep erms"
356 In case a line does not fit into "buf", it will be read
357 in parts by multiple "fgets" calls. This is ok, as
358 it is very unlikely that a non-leading substring of a line
359 will match again the pattern "processor: %u".
360 */
361 char buf[512];
362 while (fgets(buf, (int) sizeof(buf), fp) != NULL) {
363 unsigned int cpu;
364 sscanf(buf, "processor : %u", &cpu);
365 unsigned int ma, mb;
366 if (sscanf(buf, "cpu MHz : %u.%u", &ma, &mb) == 2) {
367 uint64_t hz = ma * 1000000ULL + mb * 1000ULL;
368 if (hz > maxhz)
369 maxhz = hz;
370 }
371 }
372 fclose(fp);
373 *hzret = maxhz;
374 r = maxhz == 0 ? ENOENT : 0;;
375 }
376 return r;
377}
378
379static int
380toku_get_processor_frequency_sysctl(const char * const cmd, uint64_t *hzret) {
381 int r = 0;
382 FILE *fp = popen(cmd, "r");
383 if (!fp) {
384 r = EINVAL; // popen doesn't return anything useful in errno,
385 // gotta pick something
386 } else {
387 r = fscanf(fp, "%" SCNu64, hzret);
388 if (r != 1) {
389 r = get_maybe_error_errno();
390 } else {
391 r = 0;
392 }
393 pclose(fp);
394 }
395 return r;
396}
397
398static uint64_t toku_cached_hz; // cache the value of hz so that we avoid opening files to compute it later
399
400int
401toku_os_get_processor_frequency(uint64_t *hzret) {
402 int r;
403 if (toku_cached_hz) {
404 *hzret = toku_cached_hz;
405 r = 0;
406 } else {
407 r = toku_get_processor_frequency_sys(hzret);
408 if (r != 0)
409 r = toku_get_processor_frequency_cpuinfo(hzret);
410 if (r != 0)
411 r = toku_get_processor_frequency_sysctl("sysctl -n hw.cpufrequency", hzret);
412 if (r != 0)
413 r = toku_get_processor_frequency_sysctl("sysctl -n machdep.tsc_freq", hzret);
414 if (r == 0)
415 toku_cached_hz = *hzret;
416 }
417 return r;
418}
419
420int
421toku_get_filesystem_sizes(const char *path, uint64_t *avail_size, uint64_t *free_size, uint64_t *total_size) {
422 struct statvfs s;
423 int r = statvfs(path, &s);
424 if (r == -1) {
425 r = get_error_errno();
426 } else {
427 // get the block size in bytes
428 uint64_t bsize = s.f_frsize ? s.f_frsize : s.f_bsize;
429 // convert blocks to bytes
430 if (avail_size)
431 *avail_size = (uint64_t) s.f_bavail * bsize;
432 if (free_size)
433 *free_size = (uint64_t) s.f_bfree * bsize;
434 if (total_size)
435 *total_size = (uint64_t) s.f_blocks * bsize;
436 }
437 return r;
438}
439
440
441int
442toku_dup2(int fd, int fd2) {
443 int r;
444 r = dup2(fd, fd2);
445 return r;
446}
447
448
449// Time
450static double seconds_per_clock = -1;
451
452double tokutime_to_seconds(tokutime_t t) {
453 // Convert tokutime to seconds.
454 if (seconds_per_clock<0) {
455 uint64_t hz;
456 int r = toku_os_get_processor_frequency(&hz);
457 assert(r==0);
458 // There's a race condition here, but it doesn't really matter. If two threads call tokutime_to_seconds
459 // for the first time at the same time, then both will fetch the value and set the same value.
460 seconds_per_clock = 1.0/hz;
461 }
462 return t*seconds_per_clock;
463}
464
465#include <toku_race_tools.h>
466void __attribute__((constructor)) toku_portability_helgrind_ignore(void);
467void
468toku_portability_helgrind_ignore(void) {
469 TOKU_VALGRIND_HG_DISABLE_CHECKING(&toku_cached_hz, sizeof toku_cached_hz);
470 TOKU_VALGRIND_HG_DISABLE_CHECKING(&toku_cached_pagesize,
471 sizeof toku_cached_pagesize);
472}
473
474static const pfs_key_t pfs_not_instrumented = 0xFFFFFFFF;
475toku_instr_key toku_uninstrumented(pfs_not_instrumented);
476