1 | /******************************************************************** |
2 | * * |
3 | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
7 | * * |
8 | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
9 | * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
10 | * * |
11 | ******************************************************************** |
12 | |
13 | CPU capability detection for x86 processors. |
14 | Originally written by Rudolf Marek. |
15 | |
16 | function: |
17 | last mod: $Id$ |
18 | |
19 | ********************************************************************/ |
20 | |
21 | #include "x86cpu.h" |
22 | |
23 | #if !defined(OC_X86_ASM) |
24 | ogg_uint32_t oc_cpu_flags_get(void){ |
25 | return 0; |
26 | } |
27 | #else |
28 | # if defined(__amd64__)||defined(__x86_64__) |
29 | /*On x86-64, gcc seems to be able to figure out how to save %rbx for us when |
30 | compiling with -fPIC.*/ |
31 | # define cpuid(_op,_eax,_ebx,_ecx,_edx) \ |
32 | __asm__ __volatile__( \ |
33 | "cpuid\n\t" \ |
34 | :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ |
35 | :"a"(_op) \ |
36 | :"cc" \ |
37 | ) |
38 | # else |
39 | /*On x86-32, not so much.*/ |
40 | # define cpuid(_op,_eax,_ebx,_ecx,_edx) \ |
41 | __asm__ __volatile__( \ |
42 | "xchgl %%ebx,%[ebx]\n\t" \ |
43 | "cpuid\n\t" \ |
44 | "xchgl %%ebx,%[ebx]\n\t" \ |
45 | :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ |
46 | :"a"(_op) \ |
47 | :"cc" \ |
48 | ) |
49 | # endif |
50 | |
51 | static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ |
52 | ogg_uint32_t flags; |
53 | /*If there isn't even MMX, give up.*/ |
54 | if(!(_edx&0x00800000))return 0; |
55 | flags=OC_CPU_X86_MMX; |
56 | if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE; |
57 | if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2; |
58 | if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI; |
59 | if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3; |
60 | if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1; |
61 | if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2; |
62 | return flags; |
63 | } |
64 | |
65 | static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ |
66 | ogg_uint32_t flags; |
67 | /*If there isn't even MMX, give up.*/ |
68 | if(!(_edx&0x00800000))return 0; |
69 | flags=OC_CPU_X86_MMX; |
70 | if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT; |
71 | if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW; |
72 | if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT; |
73 | if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A; |
74 | if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5; |
75 | return flags; |
76 | } |
77 | |
78 | ogg_uint32_t oc_cpu_flags_get(void){ |
79 | ogg_uint32_t flags; |
80 | ogg_uint32_t eax; |
81 | ogg_uint32_t ebx; |
82 | ogg_uint32_t ecx; |
83 | ogg_uint32_t edx; |
84 | # if !defined(__amd64__)&&!defined(__x86_64__) |
85 | /*Not all x86-32 chips support cpuid, so we have to check.*/ |
86 | __asm__ __volatile__( |
87 | "pushfl\n\t" |
88 | "pushfl\n\t" |
89 | "popl %[a]\n\t" |
90 | "movl %[a],%[b]\n\t" |
91 | "xorl $0x200000,%[a]\n\t" |
92 | "pushl %[a]\n\t" |
93 | "popfl\n\t" |
94 | "pushfl\n\t" |
95 | "popl %[a]\n\t" |
96 | "popfl\n\t" |
97 | :[a]"=r" (eax),[b]"=r" (ebx) |
98 | : |
99 | :"cc" |
100 | ); |
101 | /*No cpuid.*/ |
102 | if(eax==ebx)return 0; |
103 | # endif |
104 | cpuid(0,eax,ebx,ecx,edx); |
105 | /* l e t n I e n i u n e G*/ |
106 | if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547|| |
107 | /* 6 8 x M T e n i u n e G*/ |
108 | ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){ |
109 | int family; |
110 | int model; |
111 | /*Intel, Transmeta (tested with Crusoe TM5800):*/ |
112 | cpuid(1,eax,ebx,ecx,edx); |
113 | flags=oc_parse_intel_flags(edx,ecx); |
114 | family=(eax>>8)&0xF; |
115 | model=(eax>>4)&0xF; |
116 | /*The SSE unit on the Pentium M and Core Duo is much slower than the MMX |
117 | unit, so don't use it.*/ |
118 | if(family==6&&(model==9||model==13||model==14)){ |
119 | flags&=~(OC_CPU_X86_SSE2|OC_CPU_X86_PNI); |
120 | } |
121 | } |
122 | /* D M A c i t n e h t u A*/ |
123 | else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541|| |
124 | /* C S N y b e d o e G*/ |
125 | ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){ |
126 | /*AMD, Geode:*/ |
127 | cpuid(0x80000000,eax,ebx,ecx,edx); |
128 | if(eax<0x80000001)flags=0; |
129 | else{ |
130 | cpuid(0x80000001,eax,ebx,ecx,edx); |
131 | flags=oc_parse_amd_flags(edx,ecx); |
132 | } |
133 | /*Also check for SSE.*/ |
134 | cpuid(1,eax,ebx,ecx,edx); |
135 | flags|=oc_parse_intel_flags(edx,ecx); |
136 | } |
137 | /*Technically some VIA chips can be configured in the BIOS to return any |
138 | string here the user wants. |
139 | There is a special detection method that can be used to identify such |
140 | processors, but in my opinion, if the user really wants to change it, they |
141 | deserve what they get.*/ |
142 | /* s l u a H r u a t n e C*/ |
143 | else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){ |
144 | /*VIA:*/ |
145 | /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming) |
146 | chips (thanks to the engineers from Centaur Technology who provided it). |
147 | These chips support Intel-like cpuid info. |
148 | The C3-2 (Nehemiah) cores appear to, as well.*/ |
149 | cpuid(1,eax,ebx,ecx,edx); |
150 | flags=oc_parse_intel_flags(edx,ecx); |
151 | if(eax>=0x80000001){ |
152 | /*The (non-Nehemiah) C3 processors support AMD-like cpuid info. |
153 | We need to check this even if the Intel test succeeds to pick up 3DNow! |
154 | support on these processors. |
155 | Unlike actual AMD processors, we cannot _rely_ on this info, since |
156 | some cores (e.g., the 693 stepping of the Nehemiah) claim to support |
157 | this function, yet return edx=0, despite the Intel test indicating |
158 | MMX support. |
159 | Therefore the features detected here are strictly added to those |
160 | detected by the Intel test.*/ |
161 | /*TODO: How about earlier chips?*/ |
162 | cpuid(0x80000001,eax,ebx,ecx,edx); |
163 | /*Note: As of the C7, this function returns Intel-style extended feature |
164 | flags, not AMD-style. |
165 | Currently, this only defines bits 11, 20, and 29 (0x20100800), which |
166 | do not conflict with any of the AMD flags we inspect. |
167 | For the remaining bits, Intel tells us, "Do not count on their value", |
168 | but VIA assures us that they will all be zero (at least on the C7 and |
169 | Isaiah chips). |
170 | In the (unlikely) event a future processor uses bits 18, 19, 30, or 31 |
171 | (0xC0C00000) for something else, we will have to add code to detect |
172 | the model to decide when it is appropriate to inspect them.*/ |
173 | flags|=oc_parse_amd_flags(edx,ecx); |
174 | } |
175 | } |
176 | else{ |
177 | /*Implement me.*/ |
178 | flags=0; |
179 | } |
180 | return flags; |
181 | } |
182 | #endif |
183 | |