1/********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 CPU capability detection for x86 processors.
14 Originally written by Rudolf Marek.
15
16 function:
17 last mod: $Id$
18
19 ********************************************************************/
20
21#include "x86cpu.h"
22
23#if !defined(OC_X86_ASM)
24ogg_uint32_t oc_cpu_flags_get(void){
25 return 0;
26}
27#else
28# if defined(__amd64__)||defined(__x86_64__)
29/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
30 compiling with -fPIC.*/
31# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
32 __asm__ __volatile__( \
33 "cpuid\n\t" \
34 :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
35 :"a"(_op) \
36 :"cc" \
37 )
38# else
39/*On x86-32, not so much.*/
40# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
41 __asm__ __volatile__( \
42 "xchgl %%ebx,%[ebx]\n\t" \
43 "cpuid\n\t" \
44 "xchgl %%ebx,%[ebx]\n\t" \
45 :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
46 :"a"(_op) \
47 :"cc" \
48 )
49# endif
50
51static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
52 ogg_uint32_t flags;
53 /*If there isn't even MMX, give up.*/
54 if(!(_edx&0x00800000))return 0;
55 flags=OC_CPU_X86_MMX;
56 if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
57 if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2;
58 if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI;
59 if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3;
60 if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1;
61 if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2;
62 return flags;
63}
64
65static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
66 ogg_uint32_t flags;
67 /*If there isn't even MMX, give up.*/
68 if(!(_edx&0x00800000))return 0;
69 flags=OC_CPU_X86_MMX;
70 if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
71 if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
72 if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
73 if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A;
74 if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5;
75 return flags;
76}
77
78ogg_uint32_t oc_cpu_flags_get(void){
79 ogg_uint32_t flags;
80 ogg_uint32_t eax;
81 ogg_uint32_t ebx;
82 ogg_uint32_t ecx;
83 ogg_uint32_t edx;
84# if !defined(__amd64__)&&!defined(__x86_64__)
85 /*Not all x86-32 chips support cpuid, so we have to check.*/
86 __asm__ __volatile__(
87 "pushfl\n\t"
88 "pushfl\n\t"
89 "popl %[a]\n\t"
90 "movl %[a],%[b]\n\t"
91 "xorl $0x200000,%[a]\n\t"
92 "pushl %[a]\n\t"
93 "popfl\n\t"
94 "pushfl\n\t"
95 "popl %[a]\n\t"
96 "popfl\n\t"
97 :[a]"=r"(eax),[b]"=r"(ebx)
98 :
99 :"cc"
100 );
101 /*No cpuid.*/
102 if(eax==ebx)return 0;
103# endif
104 cpuid(0,eax,ebx,ecx,edx);
105 /* l e t n I e n i u n e G*/
106 if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
107 /* 6 8 x M T e n i u n e G*/
108 ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
109 int family;
110 int model;
111 /*Intel, Transmeta (tested with Crusoe TM5800):*/
112 cpuid(1,eax,ebx,ecx,edx);
113 flags=oc_parse_intel_flags(edx,ecx);
114 family=(eax>>8)&0xF;
115 model=(eax>>4)&0xF;
116 /*The SSE unit on the Pentium M and Core Duo is much slower than the MMX
117 unit, so don't use it.*/
118 if(family==6&&(model==9||model==13||model==14)){
119 flags&=~(OC_CPU_X86_SSE2|OC_CPU_X86_PNI);
120 }
121 }
122 /* D M A c i t n e h t u A*/
123 else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
124 /* C S N y b e d o e G*/
125 ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
126 /*AMD, Geode:*/
127 cpuid(0x80000000,eax,ebx,ecx,edx);
128 if(eax<0x80000001)flags=0;
129 else{
130 cpuid(0x80000001,eax,ebx,ecx,edx);
131 flags=oc_parse_amd_flags(edx,ecx);
132 }
133 /*Also check for SSE.*/
134 cpuid(1,eax,ebx,ecx,edx);
135 flags|=oc_parse_intel_flags(edx,ecx);
136 }
137 /*Technically some VIA chips can be configured in the BIOS to return any
138 string here the user wants.
139 There is a special detection method that can be used to identify such
140 processors, but in my opinion, if the user really wants to change it, they
141 deserve what they get.*/
142 /* s l u a H r u a t n e C*/
143 else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
144 /*VIA:*/
145 /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming)
146 chips (thanks to the engineers from Centaur Technology who provided it).
147 These chips support Intel-like cpuid info.
148 The C3-2 (Nehemiah) cores appear to, as well.*/
149 cpuid(1,eax,ebx,ecx,edx);
150 flags=oc_parse_intel_flags(edx,ecx);
151 if(eax>=0x80000001){
152 /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
153 We need to check this even if the Intel test succeeds to pick up 3DNow!
154 support on these processors.
155 Unlike actual AMD processors, we cannot _rely_ on this info, since
156 some cores (e.g., the 693 stepping of the Nehemiah) claim to support
157 this function, yet return edx=0, despite the Intel test indicating
158 MMX support.
159 Therefore the features detected here are strictly added to those
160 detected by the Intel test.*/
161 /*TODO: How about earlier chips?*/
162 cpuid(0x80000001,eax,ebx,ecx,edx);
163 /*Note: As of the C7, this function returns Intel-style extended feature
164 flags, not AMD-style.
165 Currently, this only defines bits 11, 20, and 29 (0x20100800), which
166 do not conflict with any of the AMD flags we inspect.
167 For the remaining bits, Intel tells us, "Do not count on their value",
168 but VIA assures us that they will all be zero (at least on the C7 and
169 Isaiah chips).
170 In the (unlikely) event a future processor uses bits 18, 19, 30, or 31
171 (0xC0C00000) for something else, we will have to add code to detect
172 the model to decide when it is appropriate to inspect them.*/
173 flags|=oc_parse_amd_flags(edx,ecx);
174 }
175 }
176 else{
177 /*Implement me.*/
178 flags=0;
179 }
180 return flags;
181}
182#endif
183