vm_version_x86.cpp source code [OpenJDK/src/hotspot/cpu/x86/vm_version_x86.cpp]

1	/*
2	* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4	*
5	* This code is free software; you can redistribute it and/or modify it
6	* under the terms of the GNU General Public License version 2 only, as
7	* published by the Free Software Foundation.
8	*
9	* This code is distributed in the hope that it will be useful, but WITHOUT
10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12	* version 2 for more details (a copy is included in the LICENSE file that
13	* accompanied this code).
14	*
15	* You should have received a copy of the GNU General Public License version
16	* 2 along with this work; if not, write to the Free Software Foundation,
17	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18	*
19	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20	* or visit www.oracle.com if you need additional information or have any
21	* questions.
22	*
23	*/
24
25	#include "precompiled.hpp"
26	#include "jvm.h"
27	#include "asm/macroAssembler.hpp"
28	#include "asm/macroAssembler.inline.hpp"
29	#include "logging/log.hpp"
30	#include "logging/logStream.hpp"
31	#include "memory/resourceArea.hpp"
32	#include "runtime/java.hpp"
33	#include "runtime/os.hpp"
34	#include "runtime/stubCodeGenerator.hpp"
35	#include "utilities/virtualizationSupport.hpp"
36	#include "vm_version_x86.hpp"
37
38
39	int VM_Version::_cpu;
40	int VM_Version::_model;
41	int VM_Version::_stepping;
42	VM_Version::CpuidInfo VM_Version::_cpuid_info = { `0`, };
43
44	// Address of instruction which causes SEGV
45	address VM_Version::_cpuinfo_segv_addr = `0`;
46	// Address of instruction after the one which causes SEGV
47	address VM_Version::_cpuinfo_cont_addr = `0`;
48
49	static BufferBlob* stub_blob;
50	static const int stub_size = `1100`;
51
52	extern "C" {
53	typedef void (get_cpu_info_stub_t)(void**);
54	}
55	static get_cpu_info_stub_t get_cpu_info_stub = NULL;
56
57
58	class VM_Version_StubGenerator: public StubCodeGenerator {
59	public:
60
61	VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator (c) {}
62
63	address generate_get_cpu_info() {
64	// Flags to test CPU type.
65	const uint32_t HS_EFL_AC = `0x40000`;
66	const uint32_t HS_EFL_ID = `0x200000`;
67	// Values for when we don't have a CPUID instruction.
68	const int CPU_FAMILY_SHIFT = `8`;
69	const uint32_t CPU_FAMILY_386 = (`3` << CPU_FAMILY_SHIFT);
70	const uint32_t CPU_FAMILY_486 = (`4` << CPU_FAMILY_SHIFT);
71	bool use_evex = FLAG_IS_DEFAULT(UseAVX) \|\| (UseAVX > `2`);
72
73	Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
74	Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
75	Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
76
77	StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
78	# define __ _masm->
79
80	address start = __ pc();
81
82	//
83	// void get_cpu_info(VM_Version::CpuidInfo cpuid_info);*
84	//
85	// LP64: rcx and rdx are first and second argument registers on windows
86
87	__ push(rbp);
88	#ifdef _LP64
89	__ mov(rbp, c_rarg0); // cpuid_info address
90	#else
91	__ movptr(rbp, Address(rsp, `8`)); // cpuid_info address
92	#endif
93	__ push(rbx);
94	__ push(rsi);
95	__ pushf(); // preserve rbx, and flags
96	__ pop(rax);
97	__ push(rax);
98	__ mov(rcx, rax);
99	//
100	// if we are unable to change the AC flag, we have a 386
101	//
102	__ xorl(rax, HS_EFL_AC);
103	__ push(rax);
104	__ popf();
105	__ pushf();
106	__ pop(rax);
107	__ cmpptr(rax, rcx);
108	__ jccb(Assembler::notEqual, detect_486);
109
110	__ movl(rax, CPU_FAMILY_386);
111	__ movl(Address (rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
112	__ jmp(done);
113
114	//
115	// If we are unable to change the ID flag, we have a 486 which does
116	// not support the "cpuid" instruction.
117	//
118	__ bind(detect_486);
119	__ mov(rax, rcx);
120	__ xorl(rax, HS_EFL_ID);
121	__ push(rax);
122	__ popf();
123	__ pushf();
124	__ pop(rax);
125	__ cmpptr(rcx, rax);
126	__ jccb(Assembler::notEqual, detect_586);
127
128	__ bind(cpu486);
129	__ movl(rax, CPU_FAMILY_486);
130	__ movl(Address (rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
131	__ jmp(done);
132
133	//
134	// At this point, we have a chip which supports the "cpuid" instruction
135	//
136	__ bind(detect_586);
137	__ xorl(rax, rax);
138	__ cpuid();
139	__ orl(rax, rax);
140	__ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
141	// value of at least 1, we give up and
142	// assume a 486
143	__ lea(rsi, Address (rbp, in_bytes(VM_Version::std_cpuid0_offset())));
144	__ movl(Address (rsi, `0`), rax);
145	__ movl(Address (rsi, `4`), rbx);
146	__ movl(Address (rsi, `8`), rcx);
147	__ movl(Address (rsi,`12`), rdx);
148
149	__ cmpl(rax, `0xa`); // Is cpuid(0xB) supported?
150	__ jccb(Assembler::belowEqual, std_cpuid4);
151
152	//
153	// cpuid(0xB) Processor Topology
154	//
155	__ movl(rax, `0xb`);
156	__ xorl(rcx, rcx); // Threads level
157	__ cpuid();
158
159	__ lea(rsi, Address (rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
160	__ movl(Address (rsi, `0`), rax);
161	__ movl(Address (rsi, `4`), rbx);
162	__ movl(Address (rsi, `8`), rcx);
163	__ movl(Address (rsi,`12`), rdx);
164
165	__ movl(rax, `0xb`);
166	__ movl(rcx, `1`); // Cores level
167	__ cpuid();
168	__ push(rax);
169	__ andl(rax, `0x1f`); // Determine if valid topology level
170	__ orl(rax, rbx); // eax[4:0] \| ebx[0:15] == 0 indicates invalid level
171	__ andl(rax, `0xffff`);
172	__ pop(rax);
173	__ jccb(Assembler::equal, std_cpuid4);
174
175	__ lea(rsi, Address (rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
176	__ movl(Address (rsi, `0`), rax);
177	__ movl(Address (rsi, `4`), rbx);
178	__ movl(Address (rsi, `8`), rcx);
179	__ movl(Address (rsi,`12`), rdx);
180
181	__ movl(rax, `0xb`);
182	__ movl(rcx, `2`); // Packages level
183	__ cpuid();
184	__ push(rax);
185	__ andl(rax, `0x1f`); // Determine if valid topology level
186	__ orl(rax, rbx); // eax[4:0] \| ebx[0:15] == 0 indicates invalid level
187	__ andl(rax, `0xffff`);
188	__ pop(rax);
189	__ jccb(Assembler::equal, std_cpuid4);
190
191	__ lea(rsi, Address (rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
192	__ movl(Address (rsi, `0`), rax);
193	__ movl(Address (rsi, `4`), rbx);
194	__ movl(Address (rsi, `8`), rcx);
195	__ movl(Address (rsi,`12`), rdx);
196
197	//
198	// cpuid(0x4) Deterministic cache params
199	//
200	__ bind(std_cpuid4);
201	__ movl(rax, `4`);
202	__ cmpl(rax, Address (rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
203	__ jccb(Assembler::greater, std_cpuid1);
204
205	__ xorl(rcx, rcx); // L1 cache
206	__ cpuid();
207	__ push(rax);
208	__ andl(rax, `0x1f`); // Determine if valid cache parameters used
209	__ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
210	__ pop(rax);
211	__ jccb(Assembler::equal, std_cpuid1);
212
213	__ lea(rsi, Address (rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
214	__ movl(Address (rsi, `0`), rax);
215	__ movl(Address (rsi, `4`), rbx);
216	__ movl(Address (rsi, `8`), rcx);
217	__ movl(Address (rsi,`12`), rdx);
218
219	//
220	// Standard cpuid(0x1)
221	//
222	__ bind(std_cpuid1);
223	__ movl(rax, `1`);
224	__ cpuid();
225	__ lea(rsi, Address (rbp, in_bytes(VM_Version::std_cpuid1_offset())));
226	__ movl(Address (rsi, `0`), rax);
227	__ movl(Address (rsi, `4`), rbx);
228	__ movl(Address (rsi, `8`), rcx);
229	__ movl(Address (rsi,`12`), rdx);
230
231	//
232	// Check if OS has enabled XGETBV instruction to access XCR0
233	// (OSXSAVE feature flag) and CPU supports AVX
234	//
235	__ andl(rcx, `0x18000000`); // cpuid1 bits osxsave \| avx
236	__ cmpl(rcx, `0x18000000`);
237	__ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
238
239	//
240	// XCR0, XFEATURE_ENABLED_MASK register
241	//
242	__ xorl(rcx, rcx); // zero for XCR0 register
243	__ xgetbv();
244	__ lea(rsi, Address (rbp, in_bytes(VM_Version::xem_xcr0_offset())));
245	__ movl(Address (rsi, `0`), rax);
246	__ movl(Address (rsi, `4`), rdx);
247
248	//
249	// cpuid(0x7) Structured Extended Features
250	//
251	__ bind(sef_cpuid);
252	__ movl(rax, `7`);
253	__ cmpl(rax, Address (rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
254	__ jccb(Assembler::greater, ext_cpuid);
255
256	__ xorl(rcx, rcx);
257	__ cpuid();
258	__ lea(rsi, Address (rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
259	__ movl(Address (rsi, `0`), rax);
260	__ movl(Address (rsi, `4`), rbx);
261	__ movl(Address (rsi, `8`), rcx);
262	__ movl(Address (rsi, `12`), rdx);
263
264	//
265	// Extended cpuid(0x80000000)
266	//
267	__ bind(ext_cpuid);
268	__ movl(rax, `0x80000000`);
269	__ cpuid();
270	__ cmpl(rax, `0x80000000`); // Is cpuid(0x80000001) supported?
271	__ jcc(Assembler::belowEqual, done);
272	__ cmpl(rax, `0x80000004`); // Is cpuid(0x80000005) supported?
273	__ jcc(Assembler::belowEqual, ext_cpuid1);
274	__ cmpl(rax, `0x80000006`); // Is cpuid(0x80000007) supported?
275	__ jccb(Assembler::belowEqual, ext_cpuid5);
276	__ cmpl(rax, `0x80000007`); // Is cpuid(0x80000008) supported?
277	__ jccb(Assembler::belowEqual, ext_cpuid7);
278	__ cmpl(rax, `0x80000008`); // Is cpuid(0x80000009 and above) supported?
279	__ jccb(Assembler::belowEqual, ext_cpuid8);
280	__ cmpl(rax, `0x8000001E`); // Is cpuid(0x8000001E) supported?
281	__ jccb(Assembler::below, ext_cpuid8);
282	//
283	// Extended cpuid(0x8000001E)
284	//
285	__ movl(rax, `0x8000001E`);
286	__ cpuid();
287	__ lea(rsi, Address (rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
288	__ movl(Address (rsi, `0`), rax);
289	__ movl(Address (rsi, `4`), rbx);
290	__ movl(Address (rsi, `8`), rcx);
291	__ movl(Address (rsi,`12`), rdx);
292
293	//
294	// Extended cpuid(0x80000008)
295	//
296	__ bind(ext_cpuid8);
297	__ movl(rax, `0x80000008`);
298	__ cpuid();
299	__ lea(rsi, Address (rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
300	__ movl(Address (rsi, `0`), rax);
301	__ movl(Address (rsi, `4`), rbx);
302	__ movl(Address (rsi, `8`), rcx);
303	__ movl(Address (rsi,`12`), rdx);
304
305	//
306	// Extended cpuid(0x80000007)
307	//
308	__ bind(ext_cpuid7);
309	__ movl(rax, `0x80000007`);
310	__ cpuid();
311	__ lea(rsi, Address (rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
312	__ movl(Address (rsi, `0`), rax);
313	__ movl(Address (rsi, `4`), rbx);
314	__ movl(Address (rsi, `8`), rcx);
315	__ movl(Address (rsi,`12`), rdx);
316
317	//
318	// Extended cpuid(0x80000005)
319	//
320	__ bind(ext_cpuid5);
321	__ movl(rax, `0x80000005`);
322	__ cpuid();
323	__ lea(rsi, Address (rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
324	__ movl(Address (rsi, `0`), rax);
325	__ movl(Address (rsi, `4`), rbx);
326	__ movl(Address (rsi, `8`), rcx);
327	__ movl(Address (rsi,`12`), rdx);
328
329	//
330	// Extended cpuid(0x80000001)
331	//
332	__ bind(ext_cpuid1);
333	__ movl(rax, `0x80000001`);
334	__ cpuid();
335	__ lea(rsi, Address (rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
336	__ movl(Address (rsi, `0`), rax);
337	__ movl(Address (rsi, `4`), rbx);
338	__ movl(Address (rsi, `8`), rcx);
339	__ movl(Address (rsi,`12`), rdx);
340
341	//
342	// Check if OS has enabled XGETBV instruction to access XCR0
343	// (OSXSAVE feature flag) and CPU supports AVX
344	//
345	__ lea(rsi, Address (rbp, in_bytes(VM_Version::std_cpuid1_offset())));
346	__ movl(rcx, `0x18000000`); // cpuid1 bits osxsave \| avx
347	__ andl(rcx, Address (rsi, `8`)); // cpuid1 bits osxsave \| avx
348	__ cmpl(rcx, `0x18000000`);
349	__ jccb(Assembler::notEqual, done); // jump if AVX is not supported
350
351	__ movl(rax, `0x6`);
352	__ andl(rax, Address (rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse \| ymm
353	__ cmpl(rax, `0x6`);
354	__ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
355
356	// we need to bridge farther than imm8, so we use this island as a thunk
357	__ bind(done);
358	__ jmp(wrapup);
359
360	__ bind(start_simd_check);
361	//
362	// Some OSs have a bug when upper 128/256bits of YMM/ZMM
363	// registers are not restored after a signal processing.
364	// Generate SEGV here (reference through NULL)
365	// and check upper YMM/ZMM bits after it.
366	//
367	intx saved_useavx = UseAVX;
368	intx saved_usesse = UseSSE;
369	// check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
370	__ lea(rsi, Address (rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
371	__ movl(rax, `0x10000`);
372	__ andl(rax, Address (rsi, `4`)); // xcr0 bits sse \| ymm
373	__ cmpl(rax, `0x10000`);
374	__ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
375	// check _cpuid_info.xem_xcr0_eax.bits.opmask
376	// check _cpuid_info.xem_xcr0_eax.bits.zmm512
377	// check _cpuid_info.xem_xcr0_eax.bits.zmm32
378	__ movl(rax, `0xE0`);
379	__ andl(rax, Address (rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse \| ymm
380	__ cmpl(rax, `0xE0`);
381	__ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
382
383	// If UseAVX is unitialized or is set by the user to include EVEX
384	if (use_evex) {
385	// EVEX setup: run in lowest evex mode
386	VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
387	UseAVX = `3`;
388	UseSSE = `2`;
389	#ifdef _WINDOWS
390	// xmm5-xmm15 are not preserved by caller on windows
391	// https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
392	__ subptr(rsp, `64`);
393	__ evmovdqul(Address(rsp, `0`), xmm7, Assembler::AVX_512bit);
394	#ifdef _LP64
395	__ subptr(rsp, `64`);
396	__ evmovdqul(Address(rsp, `0`), xmm8, Assembler::AVX_512bit);
397	__ subptr(rsp, `64`);
398	__ evmovdqul(Address(rsp, `0`), xmm31, Assembler::AVX_512bit);
399	#endif // _LP64
400	#endif // _WINDOWS
401
402	// load value into all 64 bytes of zmm7 register
403	__ movl(rcx, VM_Version::ymm_test_value());
404	__ movdl(xmm0, rcx);
405	__ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
406	__ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
407	#ifdef _LP64
408	__ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
409	__ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
410	#endif
411	VM_Version::clean_cpuFeatures();
412	__ jmp(save_restore_except);
413	}
414
415	__ bind(legacy_setup);
416	// AVX setup
417	VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
418	UseAVX = `1`;
419	UseSSE = `2`;
420	#ifdef _WINDOWS
421	__ subptr(rsp, `32`);
422	__ vmovdqu(Address(rsp, `0`), xmm7);
423	#ifdef _LP64
424	__ subptr(rsp, `32`);
425	__ vmovdqu(Address(rsp, `0`), xmm8);
426	__ subptr(rsp, `32`);
427	__ vmovdqu(Address(rsp, `0`), xmm15);
428	#endif // _LP64
429	#endif // _WINDOWS
430
431	// load value into all 32 bytes of ymm7 register
432	__ movl(rcx, VM_Version::ymm_test_value());
433
434	__ movdl(xmm0, rcx);
435	__ pshufd(xmm0, xmm0, `0x00`);
436	__ vinsertf128_high(xmm0, xmm0);
437	__ vmovdqu(xmm7, xmm0);
438	#ifdef _LP64
439	__ vmovdqu(xmm8, xmm0);
440	__ vmovdqu(xmm15, xmm0);
441	#endif
442	VM_Version::clean_cpuFeatures();
443
444	__ bind(save_restore_except);
445	__ xorl(rsi, rsi);
446	VM_Version::set_cpuinfo_segv_addr(__ pc());
447	// Generate SEGV
448	__ movl(rax, Address (rsi, `0`));
449
450	VM_Version::set_cpuinfo_cont_addr(__ pc());
451	// Returns here after signal. Save xmm0 to check it later.
452
453	// check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
454	__ lea(rsi, Address (rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
455	__ movl(rax, `0x10000`);
456	__ andl(rax, Address (rsi, `4`));
457	__ cmpl(rax, `0x10000`);
458	__ jcc(Assembler::notEqual, legacy_save_restore);
459	// check _cpuid_info.xem_xcr0_eax.bits.opmask
460	// check _cpuid_info.xem_xcr0_eax.bits.zmm512
461	// check _cpuid_info.xem_xcr0_eax.bits.zmm32
462	__ movl(rax, `0xE0`);
463	__ andl(rax, Address (rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse \| ymm
464	__ cmpl(rax, `0xE0`);
465	__ jcc(Assembler::notEqual, legacy_save_restore);
466
467	// If UseAVX is unitialized or is set by the user to include EVEX
468	if (use_evex) {
469	// EVEX check: run in lowest evex mode
470	VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
471	UseAVX = `3`;
472	UseSSE = `2`;
473	__ lea(rsi, Address (rbp, in_bytes(VM_Version::zmm_save_offset())));
474	__ evmovdqul(Address (rsi, `0`), xmm0, Assembler::AVX_512bit);
475	__ evmovdqul(Address (rsi, `64`), xmm7, Assembler::AVX_512bit);
476	#ifdef _LP64
477	__ evmovdqul(Address (rsi, `128`), xmm8, Assembler::AVX_512bit);
478	__ evmovdqul(Address (rsi, `192`), xmm31, Assembler::AVX_512bit);
479	#endif
480
481	#ifdef _WINDOWS
482	#ifdef _LP64
483	__ evmovdqul(xmm31, Address(rsp, `0`), Assembler::AVX_512bit);
484	__ addptr(rsp, `64`);
485	__ evmovdqul(xmm8, Address(rsp, `0`), Assembler::AVX_512bit);
486	__ addptr(rsp, `64`);
487	#endif // _LP64
488	__ evmovdqul(xmm7, Address(rsp, `0`), Assembler::AVX_512bit);
489	__ addptr(rsp, `64`);
490	#endif // _WINDOWS
491	generate_vzeroupper(wrapup);
492	VM_Version::clean_cpuFeatures();
493	UseAVX = saved_useavx;
494	UseSSE = saved_usesse;
495	__ jmp(wrapup);
496	}
497
498	__ bind(legacy_save_restore);
499	// AVX check
500	VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
501	UseAVX = `1`;
502	UseSSE = `2`;
503	__ lea(rsi, Address (rbp, in_bytes(VM_Version::ymm_save_offset())));
504	__ vmovdqu(Address (rsi, `0`), xmm0);
505	__ vmovdqu(Address (rsi, `32`), xmm7);
506	#ifdef _LP64
507	__ vmovdqu(Address (rsi, `64`), xmm8);
508	__ vmovdqu(Address (rsi, `96`), xmm15);
509	#endif
510
511	#ifdef _WINDOWS
512	#ifdef _LP64
513	__ vmovdqu(xmm15, Address(rsp, `0`));
514	__ addptr(rsp, `32`);
515	__ vmovdqu(xmm8, Address(rsp, `0`));
516	__ addptr(rsp, `32`);
517	#endif // _LP64
518	__ vmovdqu(xmm7, Address(rsp, `0`));
519	__ addptr(rsp, `32`);
520	#endif // _WINDOWS
521	generate_vzeroupper(wrapup);
522	VM_Version::clean_cpuFeatures();
523	UseAVX = saved_useavx;
524	UseSSE = saved_usesse;
525
526	__ bind(wrapup);
527	__ popf();
528	__ pop(rsi);
529	__ pop(rbx);
530	__ pop(rbp);
531	__ ret(`0`);
532
533	# undef __
534
535	return start;
536	};
537	void generate_vzeroupper(Label& L_wrapup) {
538	# define __ _masm->
539	__ lea(rsi, Address (rbp, in_bytes(VM_Version::std_cpuid0_offset())));
540	__ cmpl(Address (rsi, `4`), `0x756e6547`); // 'uneG'
541	__ jcc(Assembler::notEqual, L_wrapup);
542	__ movl(rcx, `0x0FFF0FF0`);
543	__ lea(rsi, Address (rbp, in_bytes(VM_Version::std_cpuid1_offset())));
544	__ andl(rcx, Address (rsi, `0`));
545	__ cmpl(rcx, `0x00050670`); // If it is Xeon Phi 3200/5200/7200
546	__ jcc(Assembler::equal, L_wrapup);
547	__ cmpl(rcx, `0x00080650`); // If it is Future Xeon Phi
548	__ jcc(Assembler::equal, L_wrapup);
549	__ vzeroupper();
550	# undef __
551	}
552	};
553
554	void VM_Version::get_processor_features() {
555
556	_cpu = `4`; // 486 by default
557	_model = `0`;
558	_stepping = `0`;
559	_features = `0`;
560	_logical_processors_per_package = `1`;
561	// i486 internal cache is both I&D and has a 16-byte line size
562	_L1_data_cache_line_size = `16`;
563
564	// Get raw processor info
565
566	get_cpu_info_stub(&_cpuid_info);
567
568	assert_is_initialized();
569	_cpu = extended_cpu_family();
570	_model = extended_cpu_model();
571	_stepping = cpu_stepping();
572
573	if (cpu_family() > `4`) { // it supports CPUID
574	_features = feature_flags();
575	// Logical processors are only available on P4s and above,
576	// and only if hyperthreading is available.
577	_logical_processors_per_package = logical_processor_count();
578	_L1_data_cache_line_size = L1_line_size();
579	}
580
581	_supports_cx8 = supports_cmpxchg8();
582	// xchg and xadd instructions
583	_supports_atomic_getset4 = true;
584	_supports_atomic_getadd4 = true;
585	LP64_ONLY(_supports_atomic_getset8 = true);
586	LP64_ONLY(_supports_atomic_getadd8 = true);
587
588	#ifdef _LP64
589	// OS should support SSE for x64 and hardware should support at least SSE2.
590	if (!VM_Version::supports_sse2()) {
591	vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
592	}
593	// in 64 bit the use of SSE2 is the minimum
594	if (UseSSE < `2`) UseSSE = `2`;
595	#endif
596
597	#ifdef AMD64
598	// flush_icache_stub have to be generated first.
599	// That is why Icache line size is hard coded in ICache class,
600	// see icache_x86.hpp. It is also the reason why we can't use
601	// clflush instruction in 32-bit VM since it could be running
602	// on CPU which does not support it.
603	//
604	// The only thing we can do is to verify that flushed
605	// ICache::line_size has correct value.
606	guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != `0`, "clflush is not supported");
607	// clflush_size is size in quadwords (8 bytes).
608	guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == `8`, "such clflush size is not supported");
609	#endif
610
611	// If the OS doesn't support SSE, we can't use this feature even if the HW does
612	if (!os::supports_sse())
613	_features &= ~(CPU_SSE\|CPU_SSE2\|CPU_SSE3\|CPU_SSSE3\|CPU_SSE4A\|CPU_SSE4_1\|CPU_SSE4_2);
614
615	if (UseSSE < `4`) {
616	_features &= ~CPU_SSE4_1;
617	_features &= ~CPU_SSE4_2;
618	}
619
620	if (UseSSE < `3`) {
621	_features &= ~CPU_SSE3;
622	_features &= ~CPU_SSSE3;
623	_features &= ~CPU_SSE4A;
624	}
625
626	if (UseSSE < `2`)
627	_features &= ~CPU_SSE2;
628
629	if (UseSSE < `1`)
630	_features &= ~CPU_SSE;
631
632	//since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
633	if (is_zx() && ((cpu_family() == `6`) \|\| (cpu_family() == `7`))) {
634	UseAVX = `0`;
635	}
636
637	// first try initial setting and detect what we can support
638	int use_avx_limit = `0`;
639	if (UseAVX > `0`) {
640	if (UseAVX > `2` && supports_evex()) {
641	use_avx_limit = `3`;
642	} else if (UseAVX > `1` && supports_avx2()) {
643	use_avx_limit = `2`;
644	} else if (UseAVX > `0` && supports_avx()) {
645	use_avx_limit = `1`;
646	} else {
647	use_avx_limit = `0`;
648	}
649	}
650	if (FLAG_IS_DEFAULT(UseAVX)) {
651	FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
652	} else if (UseAVX > use_avx_limit) {
653	warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit);
654	FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
655	} else if (UseAVX < `0`) {
656	warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX);
657	FLAG_SET_DEFAULT(UseAVX, `0`);
658	}
659
660	if (UseAVX < `3`) {
661	_features &= ~CPU_AVX512F;
662	_features &= ~CPU_AVX512DQ;
663	_features &= ~CPU_AVX512CD;
664	_features &= ~CPU_AVX512BW;
665	_features &= ~CPU_AVX512VL;
666	_features &= ~CPU_AVX512_VPOPCNTDQ;
667	_features &= ~CPU_VPCLMULQDQ;
668	_features &= ~CPU_VAES;
669	}
670
671	if (UseAVX < `2`)
672	_features &= ~CPU_AVX2;
673
674	if (UseAVX < `1`) {
675	_features &= ~CPU_AVX;
676	_features &= ~CPU_VZEROUPPER;
677	}
678
679	if (logical_processors_per_package() == `1`) {
680	// HT processor could be installed on a system which doesn't support HT.
681	_features &= ~CPU_HT;
682	}
683
684	if (is_intel()) { // Intel cpus specific settings
685	if (is_knights_family()) {
686	_features &= ~CPU_VZEROUPPER;
687	}
688	}
689
690	char buf[`256`];
691	jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
692	cores_per_cpu(), threads_per_core(),
693	cpu_family(), _model, _stepping,
694	(supports_cmov() ? ", cmov" : ""),
695	(supports_cmpxchg8() ? ", cx8" : ""),
696	(supports_fxsr() ? ", fxsr" : ""),
697	(supports_mmx() ? ", mmx" : ""),
698	(supports_sse() ? ", sse" : ""),
699	(supports_sse2() ? ", sse2" : ""),
700	(supports_sse3() ? ", sse3" : ""),
701	(supports_ssse3()? ", ssse3": ""),
702	(supports_sse4_1() ? ", sse4.1" : ""),
703	(supports_sse4_2() ? ", sse4.2" : ""),
704	(supports_popcnt() ? ", popcnt" : ""),
705	(supports_avx() ? ", avx" : ""),
706	(supports_avx2() ? ", avx2" : ""),
707	(supports_aes() ? ", aes" : ""),
708	(supports_clmul() ? ", clmul" : ""),
709	(supports_erms() ? ", erms" : ""),
710	(supports_rtm() ? ", rtm" : ""),
711	(supports_mmx_ext() ? ", mmxext" : ""),
712	(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
713	(supports_lzcnt() ? ", lzcnt": ""),
714	(supports_sse4a() ? ", sse4a": ""),
715	(supports_ht() ? ", ht": ""),
716	(supports_tsc() ? ", tsc": ""),
717	(supports_tscinv_bit() ? ", tscinvbit": ""),
718	(supports_tscinv() ? ", tscinv": ""),
719	(supports_bmi1() ? ", bmi1" : ""),
720	(supports_bmi2() ? ", bmi2" : ""),
721	(supports_adx() ? ", adx" : ""),
722	(supports_evex() ? ", evex" : ""),
723	(supports_sha() ? ", sha" : ""),
724	(supports_fma() ? ", fma" : ""));
725	_features_string = os::strdup(buf);
726
727	// UseSSE is set to the smaller of what hardware supports and what
728	// the command line requires. I.e., you cannot set UseSSE to 2 on
729	// older Pentiums which do not support it.
730	int use_sse_limit = `0`;
731	if (UseSSE > `0`) {
732	if (UseSSE > `3` && supports_sse4_1()) {
733	use_sse_limit = `4`;
734	} else if (UseSSE > `2` && supports_sse3()) {
735	use_sse_limit = `3`;
736	} else if (UseSSE > `1` && supports_sse2()) {
737	use_sse_limit = `2`;
738	} else if (UseSSE > `0` && supports_sse()) {
739	use_sse_limit = `1`;
740	} else {
741	use_sse_limit = `0`;
742	}
743	}
744	if (FLAG_IS_DEFAULT(UseSSE)) {
745	FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
746	} else if (UseSSE > use_sse_limit) {
747	warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit);
748	FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
749	} else if (UseSSE < `0`) {
750	warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE);
751	FLAG_SET_DEFAULT(UseSSE, `0`);
752	}
753
754	// Use AES instructions if available.
755	if (supports_aes()) {
756	if (FLAG_IS_DEFAULT(UseAES)) {
757	FLAG_SET_DEFAULT(UseAES, true);
758	}
759	if (!UseAES) {
760	if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
761	warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
762	}
763	FLAG_SET_DEFAULT(UseAESIntrinsics, false);
764	} else {
765	if (UseSSE > `2`) {
766	if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
767	FLAG_SET_DEFAULT(UseAESIntrinsics, true);
768	}
769	} else {
770	// The AES intrinsic stubs require AES instruction support (of course)
771	// but also require sse3 mode or higher for instructions it use.
772	if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
773	warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
774	}
775	FLAG_SET_DEFAULT(UseAESIntrinsics, false);
776	}
777
778	// --AES-CTR begins--
779	if (!UseAESIntrinsics) {
780	if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
781	warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
782	FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
783	}
784	} else {
785	if (supports_sse4_1()) {
786	if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
787	FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
788	}
789	} else {
790	// The AES-CTR intrinsic stubs require AES instruction support (of course)
791	// but also require sse4.1 mode or higher for instructions it use.
792	if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
793	warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
794	}
795	FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
796	}
797	}
798	// --AES-CTR ends--
799	}
800	} else if (UseAES \|\| UseAESIntrinsics \|\| UseAESCTRIntrinsics) {
801	if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
802	warning("AES instructions are not available on this CPU");
803	FLAG_SET_DEFAULT(UseAES, false);
804	}
805	if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
806	warning("AES intrinsics are not available on this CPU");
807	FLAG_SET_DEFAULT(UseAESIntrinsics, false);
808	}
809	if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
810	warning("AES-CTR intrinsics are not available on this CPU");
811	FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
812	}
813	}
814
815	// Use CLMUL instructions if available.
816	if (supports_clmul()) {
817	if (FLAG_IS_DEFAULT(UseCLMUL)) {
818	UseCLMUL = true;
819	}
820	} else if (UseCLMUL) {
821	if (!FLAG_IS_DEFAULT(UseCLMUL))
822	warning("CLMUL instructions not available on this CPU (AVX may also be required)");
823	FLAG_SET_DEFAULT(UseCLMUL, false);
824	}
825
826	if (UseCLMUL && (UseSSE > `2`)) {
827	if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
828	UseCRC32Intrinsics = true;
829	}
830	} else if (UseCRC32Intrinsics) {
831	if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
832	warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
833	FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
834	}
835
836	if (supports_sse4_2() && supports_clmul()) {
837	if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
838	UseCRC32CIntrinsics = true;
839	}
840	} else if (UseCRC32CIntrinsics) {
841	if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
842	warning("CRC32C intrinsics are not available on this CPU");
843	}
844	FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
845	}
846
847	// GHASH/GCM intrinsics
848	if (UseCLMUL && (UseSSE > `2`)) {
849	if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
850	UseGHASHIntrinsics = true;
851	}
852	} else if (UseGHASHIntrinsics) {
853	if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
854	warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
855	FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
856	}
857
858	// Base64 Intrinsics (Check the condition for which the intrinsic will be active)
859	if ((UseAVX > `2`) && supports_avx512vl() && supports_avx512bw()) {
860	if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
861	UseBASE64Intrinsics = true;
862	}
863	} else if (UseBASE64Intrinsics) {
864	if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
865	warning("Base64 intrinsic requires EVEX instructions on this CPU");
866	FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
867	}
868
869	if (supports_fma() && UseSSE >= `2`) { // Check UseSSE since FMA code uses SSE instructions
870	if (FLAG_IS_DEFAULT(UseFMA)) {
871	UseFMA = true;
872	}
873	} else if (UseFMA) {
874	warning("FMA instructions are not available on this CPU");
875	FLAG_SET_DEFAULT(UseFMA, false);
876	}
877
878	if (supports_sha() LP64_ONLY(\|\| supports_avx2() && supports_bmi2())) {
879	if (FLAG_IS_DEFAULT(UseSHA)) {
880	UseSHA = true;
881	}
882	} else if (UseSHA) {
883	warning("SHA instructions are not available on this CPU");
884	FLAG_SET_DEFAULT(UseSHA, false);
885	}
886
887	if (supports_sha() && supports_sse4_1() && UseSHA) {
888	if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
889	FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
890	}
891	} else if (UseSHA1Intrinsics) {
892	warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
893	FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
894	}
895
896	if (supports_sse4_1() && UseSHA) {
897	if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
898	FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
899	}
900	} else if (UseSHA256Intrinsics) {
901	warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
902	FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
903	}
904
905	#ifdef _LP64
906	// These are only supported on 64-bit
907	if (UseSHA && supports_avx2() && supports_bmi2()) {
908	if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
909	FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
910	}
911	} else
912	#endif
913	if (UseSHA512Intrinsics) {
914	warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
915	FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
916	}
917
918	if (!(UseSHA1Intrinsics \|\| UseSHA256Intrinsics \|\| UseSHA512Intrinsics)) {
919	FLAG_SET_DEFAULT(UseSHA, false);
920	}
921
922	if (UseAdler32Intrinsics) {
923	warning("Adler32Intrinsics not available on this CPU.");
924	FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
925	}
926
927	if (!supports_rtm() && UseRTMLocking) {
928	// Can't continue because UseRTMLocking affects UseBiasedLocking flag
929	// setting during arguments processing. See use_biased_locking().
930	// VM_Version_init() is executed after UseBiasedLocking is used
931	// in Thread::allocate().
932	vm_exit_during_initialization("RTM instructions are not available on this CPU");
933	}
934
935	#if INCLUDE_RTM_OPT
936	if (UseRTMLocking) {
937	if (is_client_compilation_mode_vm()) {
938	// Only C2 does RTM locking optimization.
939	// Can't continue because UseRTMLocking affects UseBiasedLocking flag
940	// setting during arguments processing. See use_biased_locking().
941	vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
942	}
943	if (is_intel_family_core()) {
944	if ((_model == CPU_MODEL_HASWELL_E3) \|\|
945	(_model == CPU_MODEL_HASWELL_E7 && _stepping < `3`) \|\|
946	(_model == CPU_MODEL_BROADWELL && _stepping < `4`)) {
947	// currently a collision between SKL and HSW_E3
948	if (!UnlockExperimentalVMOptions && UseAVX < `3`) {
949	vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
950	"platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
951	} else {
952	warning("UseRTMLocking is only available as experimental option on this platform.");
953	}
954	}
955	}
956	if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
957	// RTM locking should be used only for applications with
958	// high lock contention. For now we do not use it by default.
959	vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
960	}
961	} else { // !UseRTMLocking
962	if (UseRTMForStackLocks) {
963	if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
964	warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
965	}
966	FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
967	}
968	if (UseRTMDeopt) {
969	FLAG_SET_DEFAULT(UseRTMDeopt, false);
970	}
971	if (PrintPreciseRTMLockingStatistics) {
972	FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
973	}
974	}
975	#else
976	if (UseRTMLocking) {
977	// Only C2 does RTM locking optimization.
978	// Can't continue because UseRTMLocking affects UseBiasedLocking flag
979	// setting during arguments processing. See use_biased_locking().
980	vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
981	}
982	#endif
983
984	#ifdef COMPILER2
985	if (UseFPUForSpilling) {
986	if (UseSSE < `2`) {
987	// Only supported with SSE2+
988	FLAG_SET_DEFAULT(UseFPUForSpilling, false);
989	}
990	}
991	#endif
992
993	#if COMPILER2_OR_JVMCI
994	int max_vector_size = `0`;
995	if (UseSSE < `2`) {
996	// Vectors (in XMM) are only supported with SSE2+
997	// SSE is always 2 on x64.
998	max_vector_size = `0`;
999	} else if (UseAVX == `0` \|\| !os_supports_avx_vectors()) {
1000	// 16 byte vectors (in XMM) are supported with SSE2+
1001	max_vector_size = `16`;
1002	} else if (UseAVX == `1` \|\| UseAVX == `2`) {
1003	// 32 bytes vectors (in YMM) are only supported with AVX+
1004	max_vector_size = `32`;
1005	} else if (UseAVX > `2`) {
1006	// 64 bytes vectors (in ZMM) are only supported with AVX 3
1007	max_vector_size = `64`;
1008	}
1009
1010	#ifdef _LP64
1011	int min_vector_size = `4`; // We require MaxVectorSize to be at least 4 on 64bit
1012	#else
1013	int min_vector_size = `0`;
1014	#endif
1015
1016	if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1017	if (MaxVectorSize < min_vector_size) {
1018	warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1019	FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1020	}
1021	if (MaxVectorSize > max_vector_size) {
1022	warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1023	FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1024	}
1025	if (!is_power_of_2(MaxVectorSize)) {
1026	warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1027	FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1028	}
1029	} else {
1030	// If default, use highest supported configuration
1031	FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1032	}
1033
1034	#if defined(COMPILER2) && defined(ASSERT)
1035	if (MaxVectorSize > `0`) {
1036	if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1037	tty->print_cr("State of YMM registers after signal handle:");
1038	int nreg = `2` LP64_ONLY(+`2`);
1039	const char* ymm_name[`4`] = {"0", "7", "8", "15"};
1040	for (int i = `0`; i < nreg; i++) {
1041	tty->print("YMM%s:", ymm_name[i]);
1042	for (int j = `7`; j >=`0`; j--) {
1043	tty->print(" %x", _cpuid_info.ymm_save[i*`8` + j]);
1044	}
1045	tty->cr();
1046	}
1047	}
1048	}
1049	#endif // COMPILER2 && ASSERT
1050
1051	#ifdef _LP64
1052	if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1053	UseMultiplyToLenIntrinsic = true;
1054	}
1055	if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1056	UseSquareToLenIntrinsic = true;
1057	}
1058	if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1059	UseMulAddIntrinsic = true;
1060	}
1061	if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1062	UseMontgomeryMultiplyIntrinsic = true;
1063	}
1064	if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1065	UseMontgomerySquareIntrinsic = true;
1066	}
1067	#else
1068	if (UseMultiplyToLenIntrinsic) {
1069	if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1070	warning("multiplyToLen intrinsic is not available in 32-bit VM");
1071	}
1072	FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1073	}
1074	if (UseMontgomeryMultiplyIntrinsic) {
1075	if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1076	warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1077	}
1078	FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1079	}
1080	if (UseMontgomerySquareIntrinsic) {
1081	if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1082	warning("montgomerySquare intrinsic is not available in 32-bit VM");
1083	}
1084	FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1085	}
1086	if (UseSquareToLenIntrinsic) {
1087	if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1088	warning("squareToLen intrinsic is not available in 32-bit VM");
1089	}
1090	FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1091	}
1092	if (UseMulAddIntrinsic) {
1093	if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1094	warning("mulAdd intrinsic is not available in 32-bit VM");
1095	}
1096	FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1097	}
1098	#endif // _LP64
1099	#endif // COMPILER2_OR_JVMCI
1100
1101	// On new cpus instructions which update whole XMM register should be used
1102	// to prevent partial register stall due to dependencies on high half.
1103	//
1104	// UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1105	// UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1106	// UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1107	// UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1108
1109
1110	if (is_zx()) { // ZX cpus specific settings
1111	if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1112	UseStoreImmI16 = false; // don't use it on ZX cpus
1113	}
1114	if ((cpu_family() == `6`) \|\| (cpu_family() == `7`)) {
1115	if (FLAG_IS_DEFAULT(UseAddressNop)) {
1116	// Use it on all ZX cpus
1117	UseAddressNop = true;
1118	}
1119	}
1120	if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1121	UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1122	}
1123	if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1124	if (supports_sse3()) {
1125	UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1126	} else {
1127	UseXmmRegToRegMoveAll = false;
1128	}
1129	}
1130	if (((cpu_family() == `6`) \|\| (cpu_family() == `7`)) && supports_sse3()) { // new ZX cpus
1131	#ifdef COMPILER2
1132	if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1133	// For new ZX cpus do the next optimization:
1134	// don't align the beginning of a loop if there are enough instructions
1135	// left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1136	// in current fetch line (OptoLoopAlignment) or the padding
1137	// is big (> MaxLoopPad).
1138	// Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1139	// generated NOP instructions. 11 is the largest size of one
1140	// address NOP instruction '0F 1F' (see Assembler::nop(i)).
1141	MaxLoopPad = `11`;
1142	}
1143	#endif // COMPILER2
1144	if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1145	UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1146	}
1147	if (supports_sse4_2()) { // new ZX cpus
1148	if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1149	UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1150	}
1151	}
1152	if (supports_sse4_2()) {
1153	if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1154	FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1155	}
1156	} else {
1157	if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1158	warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1159	}
1160	FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1161	}
1162	}
1163
1164	if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1165	FLAG_SET_DEFAULT(AllocatePrefetchInstr, `3`);
1166	}
1167	}
1168
1169	if (is_amd_family()) { // AMD cpus specific settings
1170	if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1171	// Use it on new AMD cpus starting from Opteron.
1172	UseAddressNop = true;
1173	}
1174	if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1175	// Use it on new AMD cpus starting from Opteron.
1176	UseNewLongLShift = true;
1177	}
1178	if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1179	if (supports_sse4a()) {
1180	UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1181	} else {
1182	UseXmmLoadAndClearUpper = false;
1183	}
1184	}
1185	if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1186	if (supports_sse4a()) {
1187	UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1188	} else {
1189	UseXmmRegToRegMoveAll = false;
1190	}
1191	}
1192	if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1193	if (supports_sse4a()) {
1194	UseXmmI2F = true;
1195	} else {
1196	UseXmmI2F = false;
1197	}
1198	}
1199	if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1200	if (supports_sse4a()) {
1201	UseXmmI2D = true;
1202	} else {
1203	UseXmmI2D = false;
1204	}
1205	}
1206	if (supports_sse4_2()) {
1207	if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1208	FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1209	}
1210	} else {
1211	if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1212	warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1213	}
1214	FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1215	}
1216
1217	// some defaults for AMD family 15h
1218	if (cpu_family() == `0x15`) {
1219	// On family 15h processors default is no sw prefetch
1220	if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1221	FLAG_SET_DEFAULT(AllocatePrefetchStyle, `0`);
1222	}
1223	// Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1224	if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1225	FLAG_SET_DEFAULT(AllocatePrefetchInstr, `3`);
1226	}
1227	// On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1228	if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1229	FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1230	}
1231	if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1232	FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1233	}
1234	}
1235
1236	#ifdef COMPILER2
1237	if (cpu_family() < `0x17` && MaxVectorSize > `16`) {
1238	// Limit vectors size to 16 bytes on AMD cpus < 17h.
1239	FLAG_SET_DEFAULT(MaxVectorSize, `16`);
1240	}
1241	#endif // COMPILER2
1242
1243	// Some defaults for AMD family 17h \|\| Hygon family 18h
1244	if (cpu_family() == `0x17` \|\| cpu_family() == `0x18`) {
1245	// On family 17h processors use XMM and UnalignedLoadStores for Array Copy
1246	if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1247	FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1248	}
1249	if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1250	FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1251	}
1252	#ifdef COMPILER2
1253	if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1254	FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1255	}
1256	#endif
1257	}
1258	}
1259
1260	if (is_intel()) { // Intel cpus specific settings
1261	if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1262	UseStoreImmI16 = false; // don't use it on Intel cpus
1263	}
1264	if (cpu_family() == `6` \|\| cpu_family() == `15`) {
1265	if (FLAG_IS_DEFAULT(UseAddressNop)) {
1266	// Use it on all Intel cpus starting from PentiumPro
1267	UseAddressNop = true;
1268	}
1269	}
1270	if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1271	UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1272	}
1273	if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1274	if (supports_sse3()) {
1275	UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1276	} else {
1277	UseXmmRegToRegMoveAll = false;
1278	}
1279	}
1280	if (cpu_family() == `6` && supports_sse3()) { // New Intel cpus
1281	#ifdef COMPILER2
1282	if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1283	// For new Intel cpus do the next optimization:
1284	// don't align the beginning of a loop if there are enough instructions
1285	// left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1286	// in current fetch line (OptoLoopAlignment) or the padding
1287	// is big (> MaxLoopPad).
1288	// Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1289	// generated NOP instructions. 11 is the largest size of one
1290	// address NOP instruction '0F 1F' (see Assembler::nop(i)).
1291	MaxLoopPad = `11`;
1292	}
1293	#endif // COMPILER2
1294	if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1295	UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1296	}
1297	if ((supports_sse4_2() && supports_ht()) \|\| supports_avx()) { // Newest Intel cpus
1298	if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1299	UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1300	}
1301	}
1302	if (supports_sse4_2()) {
1303	if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1304	FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1305	}
1306	} else {
1307	if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1308	warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1309	}
1310	FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1311	}
1312	}
1313	if (is_atom_family() \|\| is_knights_family()) {
1314	#ifdef COMPILER2
1315	if (FLAG_IS_DEFAULT(OptoScheduling)) {
1316	OptoScheduling = true;
1317	}
1318	#endif
1319	if (supports_sse4_2()) { // Silvermont
1320	if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1321	UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1322	}
1323	}
1324	if (FLAG_IS_DEFAULT(UseIncDec)) {
1325	FLAG_SET_DEFAULT(UseIncDec, false);
1326	}
1327	}
1328	if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1329	FLAG_SET_DEFAULT(AllocatePrefetchInstr, `3`);
1330	}
1331	}
1332
1333	#ifdef _LP64
1334	if (UseSSE42Intrinsics) {
1335	if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1336	UseVectorizedMismatchIntrinsic = true;
1337	}
1338	} else if (UseVectorizedMismatchIntrinsic) {
1339	if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1340	warning("vectorizedMismatch intrinsics are not available on this CPU");
1341	FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1342	}
1343	#else
1344	if (UseVectorizedMismatchIntrinsic) {
1345	if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1346	warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1347	}
1348	FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1349	}
1350	#endif // _LP64
1351
1352	// Use count leading zeros count instruction if available.
1353	if (supports_lzcnt()) {
1354	if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1355	UseCountLeadingZerosInstruction = true;
1356	}
1357	} else if (UseCountLeadingZerosInstruction) {
1358	warning("lzcnt instruction is not available on this CPU");
1359	FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1360	}
1361
1362	// Use count trailing zeros instruction if available
1363	if (supports_bmi1()) {
1364	// tzcnt does not require VEX prefix
1365	if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1366	if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1367	// Don't use tzcnt if BMI1 is switched off on command line.
1368	UseCountTrailingZerosInstruction = false;
1369	} else {
1370	UseCountTrailingZerosInstruction = true;
1371	}
1372	}
1373	} else if (UseCountTrailingZerosInstruction) {
1374	warning("tzcnt instruction is not available on this CPU");
1375	FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1376	}
1377
1378	// BMI instructions (except tzcnt) use an encoding with VEX prefix.
1379	// VEX prefix is generated only when AVX > 0.
1380	if (supports_bmi1() && supports_avx()) {
1381	if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1382	UseBMI1Instructions = true;
1383	}
1384	} else if (UseBMI1Instructions) {
1385	warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1386	FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1387	}
1388
1389	if (supports_bmi2() && supports_avx()) {
1390	if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1391	UseBMI2Instructions = true;
1392	}
1393	} else if (UseBMI2Instructions) {
1394	warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1395	FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1396	}
1397
1398	// Use population count instruction if available.
1399	if (supports_popcnt()) {
1400	if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1401	UsePopCountInstruction = true;
1402	}
1403	} else if (UsePopCountInstruction) {
1404	warning("POPCNT instruction is not available on this CPU");
1405	FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1406	}
1407
1408	// Use fast-string operations if available.
1409	if (supports_erms()) {
1410	if (FLAG_IS_DEFAULT(UseFastStosb)) {
1411	UseFastStosb = true;
1412	}
1413	} else if (UseFastStosb) {
1414	warning("fast-string operations are not available on this CPU");
1415	FLAG_SET_DEFAULT(UseFastStosb, false);
1416	}
1417
1418	// Use XMM/YMM MOVDQU instruction for Object Initialization
1419	if (!UseFastStosb && UseSSE >= `2` && UseUnalignedLoadStores) {
1420	if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1421	UseXMMForObjInit = true;
1422	}
1423	} else if (UseXMMForObjInit) {
1424	warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1425	FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1426	}
1427
1428	#ifdef COMPILER2
1429	if (FLAG_IS_DEFAULT(AlignVector)) {
1430	// Modern processors allow misaligned memory operations for vectors.
1431	AlignVector = !UseUnalignedLoadStores;
1432	}
1433	#endif // COMPILER2
1434
1435	if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1436	if (AllocatePrefetchInstr == `3` && !supports_3dnow_prefetch()) {
1437	FLAG_SET_DEFAULT(AllocatePrefetchInstr, `0`);
1438	} else if (!supports_sse() && supports_3dnow_prefetch()) {
1439	FLAG_SET_DEFAULT(AllocatePrefetchInstr, `3`);
1440	}
1441	}
1442
1443	// Allocation prefetch settings
1444	intx cache_line_size = prefetch_data_size();
1445	if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1446	(cache_line_size > AllocatePrefetchStepSize)) {
1447	FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1448	}
1449
1450	if ((AllocatePrefetchDistance == `0`) && (AllocatePrefetchStyle != `0`)) {
1451	assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1452	if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1453	warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1454	}
1455	FLAG_SET_DEFAULT(AllocatePrefetchStyle, `0`);
1456	}
1457
1458	if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1459	bool use_watermark_prefetch = (AllocatePrefetchStyle == `2`);
1460	FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1461	}
1462
1463	if (is_intel() && cpu_family() == `6` && supports_sse3()) {
1464	if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1465	supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1466	FLAG_SET_DEFAULT(AllocatePrefetchLines, `4`);
1467	}
1468	#ifdef COMPILER2
1469	if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1470	FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1471	}
1472	#endif
1473	}
1474
1475	if (is_zx() && ((cpu_family() == `6`) \|\| (cpu_family() == `7`)) && supports_sse4_2()) {
1476	#ifdef COMPILER2
1477	if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1478	FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1479	}
1480	#endif
1481	}
1482
1483	#ifdef _LP64
1484	// Prefetch settings
1485
1486	// Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1487	// 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1488	// Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1489	// 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1490
1491	// gc copy/scan is disabled if prefetchw isn't supported, because
1492	// Prefetch::write emits an inlined prefetchw on Linux.
1493	// Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1494	// The used prefetcht0 instruction works for both amd64 and em64t.
1495
1496	if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1497	FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, `576`);
1498	}
1499	if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1500	FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, `576`);
1501	}
1502	if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
1503	FLAG_SET_DEFAULT(PrefetchFieldsAhead, `1`);
1504	}
1505	#endif
1506
1507	if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1508	(cache_line_size > ContendedPaddingWidth))
1509	ContendedPaddingWidth = cache_line_size;
1510
1511	// This machine allows unaligned memory accesses
1512	if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1513	FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1514	}
1515
1516	#ifndef PRODUCT
1517	if (log_is_enabled(Info, os, cpu)) {
1518	LogStream ls(Log(os, cpu)::info());
1519	outputStream* log = &ls;
1520	log->print_cr("Logical CPUs per core: %u",
1521	logical_processors_per_package());
1522	log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1523	log->print("UseSSE=%d", (int) UseSSE);
1524	if (UseAVX > `0`) {
1525	log->print(" UseAVX=%d", (int) UseAVX);
1526	}
1527	if (UseAES) {
1528	log->print(" UseAES=1");
1529	}
1530	#ifdef COMPILER2
1531	if (MaxVectorSize > `0`) {
1532	log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1533	}
1534	#endif
1535	log->cr();
1536	log->print("Allocation");
1537	if (AllocatePrefetchStyle <= `0` \|\| (UseSSE == `0` && !supports_3dnow_prefetch())) {
1538	log->print_cr(": no prefetching");
1539	} else {
1540	log->print(" prefetching: ");
1541	if (UseSSE == `0` && supports_3dnow_prefetch()) {
1542	log->print("PREFETCHW");
1543	} else if (UseSSE >= `1`) {
1544	if (AllocatePrefetchInstr == `0`) {
1545	log->print("PREFETCHNTA");
1546	} else if (AllocatePrefetchInstr == `1`) {
1547	log->print("PREFETCHT0");
1548	} else if (AllocatePrefetchInstr == `2`) {
1549	log->print("PREFETCHT2");
1550	} else if (AllocatePrefetchInstr == `3`) {
1551	log->print("PREFETCHW");
1552	}
1553	}
1554	if (AllocatePrefetchLines > `1`) {
1555	log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1556	} else {
1557	log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1558	}
1559	}
1560
1561	if (PrefetchCopyIntervalInBytes > `0`) {
1562	log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1563	}
1564	if (PrefetchScanIntervalInBytes > `0`) {
1565	log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1566	}
1567	if (PrefetchFieldsAhead > `0`) {
1568	log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1569	}
1570	if (ContendedPaddingWidth > `0`) {
1571	log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1572	}
1573	}
1574	#endif // !PRODUCT
1575	}
1576
1577	void VM_Version::print_platform_virtualization_info(outputStream* st) {
1578	VirtualizationType vrt = VM_Version::get_detected_virtualization();
1579	if (vrt == XenHVM) {
1580	st->print_cr("Xen hardware-assisted virtualization detected");
1581	} else if (vrt == KVM) {
1582	st->print_cr("KVM virtualization detected");
1583	} else if (vrt == VMWare) {
1584	st->print_cr("VMWare virtualization detected");
1585	VirtualizationSupport::print_virtualization_info(st);
1586	} else if (vrt == HyperV) {
1587	st->print_cr("HyperV virtualization detected");
1588	}
1589	}
1590
1591	void VM_Version::check_virt_cpuid(uint32_t idx, uint32_t *regs) {
1592	// TODO support 32 bit
1593	#if defined(_LP64)
1594	#if defined(_MSC_VER)
1595	// Allocate space for the code
1596	const int code_size = `100`;
1597	ResourceMark rm;
1598	CodeBuffer cb("detect_virt", code_size, `0`);
1599	MacroAssembler* a = new MacroAssembler(&cb);
1600	address code = a->pc();
1601	void (test)(uint32_t idx, uint32_t regs) = (void()(uint32_t idx, uint32_t regs))code;
1602
1603	a->movq(r9, rbx); // save nonvolatile register
1604
1605	// next line would not work on 32-bit
1606	a->movq(rax, c_rarg0 / rcx /);
1607	a->movq(r8, c_rarg1 / rdx /);
1608	a->cpuid();
1609	a->movl(Address(r8, `0`), rax);
1610	a->movl(Address(r8, `4`), rbx);
1611	a->movl(Address(r8, `8`), rcx);
1612	a->movl(Address(r8, `12`), rdx);
1613
1614	a->movq(rbx, r9); // restore nonvolatile register
1615	a->ret(`0`);
1616
1617	uint32_t code_end = (uint32_t )a->pc();
1618	a->flush();
1619
1620	// execute code
1621	(*test)(idx, regs);
1622	#elif defined(__GNUC__)
1623	__asm__ volatile (
1624	" cpuid;"
1625	" mov %%eax,(%1);"
1626	" mov %%ebx,4(%1);"
1627	" mov %%ecx,8(%1);"
1628	" mov %%edx,12(%1);"
1629	: "+a" (idx)
1630	: "S" (regs)
1631	: "ebx", "ecx", "edx", "memory" );
1632	#endif
1633	#endif
1634	}
1635
1636
1637	bool VM_Version::use_biased_locking() {
1638	#if INCLUDE_RTM_OPT
1639	// RTM locking is most useful when there is high lock contention and
1640	// low data contention. With high lock contention the lock is usually
1641	// inflated and biased locking is not suitable for that case.
1642	// RTM locking code requires that biased locking is off.
1643	// Note: we can't switch off UseBiasedLocking in get_processor_features()
1644	// because it is used by Thread::allocate() which is called before
1645	// VM_Version::initialize().
1646	if (UseRTMLocking && UseBiasedLocking) {
1647	if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
1648	FLAG_SET_DEFAULT(UseBiasedLocking, false);
1649	} else {
1650	warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
1651	UseBiasedLocking = false;
1652	}
1653	}
1654	#endif
1655	return UseBiasedLocking;
1656	}
1657
1658	// On Xen, the cpuid instruction returns
1659	// eax / registers[0]: Version of Xen
1660	// ebx / registers[1]: chars 'XenV'
1661	// ecx / registers[2]: chars 'MMXe'
1662	// edx / registers[3]: chars 'nVMM'
1663	//
1664	// On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
1665	// ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
1666	// ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
1667	// edx / registers[3]: chars 'M' / 'ware' / 't Hv'
1668	//
1669	// more information :
1670	// https://kb.vmware.com/s/article/1009458
1671	//
1672	void VM_Version::check_virtualizations() {
1673	#if defined(_LP64)
1674	uint32_t registers[`4`];
1675	char signature[`13`];
1676	uint32_t base;
1677	signature[`12`] = `'\0'`;
1678	memset((void)registers, `0`, `4`sizeof(uint32_t));
1679
1680	for (base = `0x40000000`; base < `0x40010000`; base += `0x100`) {
1681	check_virt_cpuid(base, registers);
1682
1683	(uint32_t )(signature + `0`) = registers[`1`];
1684	(uint32_t )(signature + `4`) = registers[`2`];
1685	(uint32_t )(signature + `8`) = registers[`3`];
1686
1687	if (strncmp("VMwareVMware", signature, `12`) == `0`) {
1688	Abstract_VM_Version::_detected_virtualization = VMWare;
1689	// check for extended metrics from guestlib
1690	VirtualizationSupport::initialize();
1691	}
1692
1693	if (strncmp("Microsoft Hv", signature, `12`) == `0`) {
1694	Abstract_VM_Version::_detected_virtualization = HyperV;
1695	}
1696
1697	if (strncmp("KVMKVMKVM", signature, `9`) == `0`) {
1698	Abstract_VM_Version::_detected_virtualization = KVM;
1699	}
1700
1701	if (strncmp("XenVMMXenVMM", signature, `12`) == `0`) {
1702	Abstract_VM_Version::_detected_virtualization = XenHVM;
1703	}
1704	}
1705	#endif
1706	}
1707
1708	void VM_Version::initialize() {
1709	ResourceMark rm;
1710	// Making this stub must be FIRST use of assembler
1711
1712	stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
1713	if (stub_blob == NULL) {
1714	vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
1715	}
1716	CodeBuffer c(stub_blob);
1717	VM_Version_StubGenerator g(&c);
1718	get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
1719	g.generate_get_cpu_info());
1720
1721	get_processor_features();
1722	if (cpu_family() > `4`) { // it supports CPUID
1723	check_virtualizations();
1724	}
1725	}
1726

Browse the source code of OpenJDK/src/hotspot/cpu/x86/vm_version_x86.cpp