1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "SamplerCore.hpp"
16
17#include "Constants.hpp"
18#include "Common/Debug.hpp"
19
20namespace
21{
22 void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c)
23 {
24 switch(swizzle)
25 {
26 case sw::SWIZZLE_RED: s = c.x; break;
27 case sw::SWIZZLE_GREEN: s = c.y; break;
28 case sw::SWIZZLE_BLUE: s = c.z; break;
29 case sw::SWIZZLE_ALPHA: s = c.w; break;
30 case sw::SWIZZLE_ZERO: s = sw::Short4(0x0000); break;
31 case sw::SWIZZLE_ONE: s = sw::Short4(0x1000); break;
32 default: ASSERT(false);
33 }
34 }
35
36 void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c)
37 {
38 switch(swizzle)
39 {
40 case sw::SWIZZLE_RED: f = c.x; break;
41 case sw::SWIZZLE_GREEN: f = c.y; break;
42 case sw::SWIZZLE_BLUE: f = c.z; break;
43 case sw::SWIZZLE_ALPHA: f = c.w; break;
44 case sw::SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
45 case sw::SWIZZLE_ONE: f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break;
46 default: ASSERT(false);
47 }
48 }
49}
50
51namespace sw
52{
53 extern bool colorsDefaultToZero;
54
55 SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state)
56 {
57 }
58
59 Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy)
60 {
61 return sampleTexture(texture, u, v, w, q, q, dsx, dsy, (dsx), Implicit, true);
62 }
63
64 Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12)
65 {
66 Vector4s c;
67
68 #if PERF_PROFILE
69 AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
70
71 if(state.compressedFormat)
72 {
73 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
74 }
75 #endif
76
77 if(state.textureType == TEXTURE_NULL)
78 {
79 c.x = Short4(0x0000);
80 c.y = Short4(0x0000);
81 c.z = Short4(0x0000);
82
83 if(fixed12) // FIXME: Convert to fixed12 at higher level, when required
84 {
85 c.w = Short4(0x1000);
86 }
87 else
88 {
89 c.w = Short4(0xFFFFu); // FIXME
90 }
91 }
92 else
93 {
94 Float4 uuuu = u;
95 Float4 vvvv = v;
96 Float4 wwww = w;
97 Float4 qqqq = q;
98
99 Int face[4];
100 Float lod;
101 Float anisotropy;
102 Float4 uDelta;
103 Float4 vDelta;
104
105 if(state.textureType != TEXTURE_3D)
106 {
107 if(state.textureType != TEXTURE_CUBE)
108 {
109 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function);
110 }
111 else
112 {
113 Float4 M;
114 cubeFace(face, uuuu, vvvv, u, v, w, M);
115 computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function);
116 }
117 }
118 else
119 {
120 computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function);
121 }
122
123 if(!hasFloatTexture())
124 {
125 c = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
126 }
127 else
128 {
129 Vector4f cf = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function);
130
131 convertFixed12(c, cf);
132 }
133
134 if(fixed12)
135 {
136 if(!hasFloatTexture())
137 {
138 if(state.textureFormat == FORMAT_R5G6B5)
139 {
140 c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800));
141 c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00));
142 c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800));
143 }
144 else
145 {
146 for(int component = 0; component < textureComponentCount(); component++)
147 {
148 if(hasUnsignedTextureComponent(component))
149 {
150 c[component] = As<UShort4>(c[component]) >> 4;
151 }
152 else
153 {
154 c[component] = c[component] >> 3;
155 }
156 }
157 }
158 }
159
160 if(state.textureFilter != FILTER_GATHER)
161 {
162 int componentCount = textureComponentCount();
163 short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000;
164
165 switch(state.textureFormat)
166 {
167 case FORMAT_R8_SNORM:
168 case FORMAT_G8R8_SNORM:
169 case FORMAT_X8B8G8R8_SNORM:
170 case FORMAT_A8B8G8R8_SNORM:
171 case FORMAT_R8:
172 case FORMAT_R5G6B5:
173 case FORMAT_G8R8:
174 case FORMAT_R8I:
175 case FORMAT_R8UI:
176 case FORMAT_G8R8I:
177 case FORMAT_G8R8UI:
178 case FORMAT_X8B8G8R8I:
179 case FORMAT_X8B8G8R8UI:
180 case FORMAT_A8B8G8R8I:
181 case FORMAT_A8B8G8R8UI:
182 case FORMAT_R16I:
183 case FORMAT_R16UI:
184 case FORMAT_G16R16:
185 case FORMAT_G16R16I:
186 case FORMAT_G16R16UI:
187 case FORMAT_X16B16G16R16I:
188 case FORMAT_X16B16G16R16UI:
189 case FORMAT_A16B16G16R16:
190 case FORMAT_A16B16G16R16I:
191 case FORMAT_A16B16G16R16UI:
192 case FORMAT_R32I:
193 case FORMAT_R32UI:
194 case FORMAT_G32R32I:
195 case FORMAT_G32R32UI:
196 case FORMAT_X32B32G32R32I:
197 case FORMAT_X32B32G32R32UI:
198 case FORMAT_A32B32G32R32I:
199 case FORMAT_A32B32G32R32UI:
200 case FORMAT_X8R8G8B8:
201 case FORMAT_X8B8G8R8:
202 case FORMAT_A8R8G8B8:
203 case FORMAT_A8B8G8R8:
204 case FORMAT_SRGB8_X8:
205 case FORMAT_SRGB8_A8:
206 case FORMAT_V8U8:
207 case FORMAT_Q8W8V8U8:
208 case FORMAT_X8L8V8U8:
209 case FORMAT_V16U16:
210 case FORMAT_A16W16V16U16:
211 case FORMAT_Q16W16V16U16:
212 case FORMAT_YV12_BT601:
213 case FORMAT_YV12_BT709:
214 case FORMAT_YV12_JFIF:
215 if(componentCount < 2) c.y = Short4(defaultColorValue);
216 if(componentCount < 3) c.z = Short4(defaultColorValue);
217 if(componentCount < 4) c.w = Short4(0x1000);
218 break;
219 case FORMAT_A8:
220 c.w = c.x;
221 c.x = Short4(0x0000);
222 c.y = Short4(0x0000);
223 c.z = Short4(0x0000);
224 break;
225 case FORMAT_L8:
226 case FORMAT_L16:
227 c.y = c.x;
228 c.z = c.x;
229 c.w = Short4(0x1000);
230 break;
231 case FORMAT_A8L8:
232 c.w = c.y;
233 c.y = c.x;
234 c.z = c.x;
235 break;
236 case FORMAT_R32F:
237 c.y = Short4(defaultColorValue);
238 case FORMAT_G32R32F:
239 c.z = Short4(defaultColorValue);
240 case FORMAT_X32B32G32R32F:
241 case FORMAT_X32B32G32R32F_UNSIGNED:
242 c.w = Short4(0x1000);
243 case FORMAT_A32B32G32R32F:
244 break;
245 case FORMAT_D32F_LOCKABLE:
246 case FORMAT_D32FS8_TEXTURE:
247 case FORMAT_D32F_SHADOW:
248 case FORMAT_D32FS8_SHADOW:
249 c.y = c.x;
250 c.z = c.x;
251 c.w = c.x;
252 break;
253 default:
254 ASSERT(false);
255 }
256 }
257
258 if((state.swizzleR != SWIZZLE_RED) ||
259 (state.swizzleG != SWIZZLE_GREEN) ||
260 (state.swizzleB != SWIZZLE_BLUE) ||
261 (state.swizzleA != SWIZZLE_ALPHA))
262 {
263 const Vector4s col(c);
264 applySwizzle(state.swizzleR, c.x, col);
265 applySwizzle(state.swizzleG, c.y, col);
266 applySwizzle(state.swizzleB, c.z, col);
267 applySwizzle(state.swizzleA, c.w, col);
268 }
269 }
270 }
271
272 return c;
273 }
274
275 Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
276 {
277 Vector4f c;
278
279 #if PERF_PROFILE
280 AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
281
282 if(state.compressedFormat)
283 {
284 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
285 }
286 #endif
287
288 if(state.textureType == TEXTURE_NULL)
289 {
290 c.x = Float4(0.0f);
291 c.y = Float4(0.0f);
292 c.z = Float4(0.0f);
293 c.w = Float4(1.0f);
294 }
295 else
296 {
297 // FIXME: YUV is not supported by the floating point path
298 bool forceFloatFiltering = state.highPrecisionFiltering && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
299 bool seamlessCube = (state.addressingModeU == ADDRESSING_SEAMLESS);
300 bool rectangleTexture = (state.textureType == TEXTURE_RECTANGLE);
301 if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering || seamlessCube || rectangleTexture) // FIXME: Mostly identical to integer sampling
302 {
303 Float4 uuuu = u;
304 Float4 vvvv = v;
305 Float4 wwww = w;
306 Float4 qqqq = q;
307
308 Int face[4];
309 Float lod;
310 Float anisotropy;
311 Float4 uDelta;
312 Float4 vDelta;
313
314 if(state.textureType != TEXTURE_3D)
315 {
316 if(state.textureType != TEXTURE_CUBE)
317 {
318 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function);
319 }
320 else
321 {
322 Float4 M;
323 cubeFace(face, uuuu, vvvv, u, v, w, M);
324 computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function);
325 }
326 }
327 else
328 {
329 computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function);
330 }
331
332 c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function);
333
334 if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture())
335 {
336 if(has16bitTextureFormat())
337 {
338 switch(state.textureFormat)
339 {
340 case FORMAT_R5G6B5:
341 c.x *= Float4(1.0f / 0xF800);
342 c.y *= Float4(1.0f / 0xFC00);
343 c.z *= Float4(1.0f / 0xF800);
344 break;
345 default:
346 ASSERT(false);
347 }
348 }
349 else
350 {
351 for(int component = 0; component < textureComponentCount(); component++)
352 {
353 c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
354 }
355 }
356 }
357 }
358 else
359 {
360 Vector4s cs = sampleTexture(texture, u, v, w, q, bias, dsx, dsy, offset, function, false);
361
362 if(state.textureFormat == FORMAT_R5G6B5)
363 {
364 c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
365 c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
366 c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
367 }
368 else
369 {
370 for(int component = 0; component < textureComponentCount(); component++)
371 {
372 if(hasUnsignedTextureComponent(component))
373 {
374 convertUnsigned16(c[component], cs[component]);
375 }
376 else
377 {
378 convertSigned15(c[component], cs[component]);
379 }
380 }
381 }
382 }
383
384 int componentCount = textureComponentCount();
385 float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f;
386
387 if(state.textureFilter != FILTER_GATHER)
388 {
389 switch(state.textureFormat)
390 {
391 case FORMAT_R8I:
392 case FORMAT_R8UI:
393 case FORMAT_R16I:
394 case FORMAT_R16UI:
395 case FORMAT_R32I:
396 case FORMAT_R32UI:
397 c.y = As<Float4>(UInt4(0));
398 case FORMAT_G8R8I:
399 case FORMAT_G8R8UI:
400 case FORMAT_G16R16I:
401 case FORMAT_G16R16UI:
402 case FORMAT_G32R32I:
403 case FORMAT_G32R32UI:
404 c.z = As<Float4>(UInt4(0));
405 case FORMAT_X8B8G8R8I:
406 case FORMAT_X8B8G8R8UI:
407 case FORMAT_X16B16G16R16I:
408 case FORMAT_X16B16G16R16UI:
409 case FORMAT_X32B32G32R32I:
410 case FORMAT_X32B32G32R32UI:
411 c.w = As<Float4>(UInt4(1));
412 case FORMAT_A8B8G8R8I:
413 case FORMAT_A8B8G8R8UI:
414 case FORMAT_A16B16G16R16I:
415 case FORMAT_A16B16G16R16UI:
416 case FORMAT_A32B32G32R32I:
417 case FORMAT_A32B32G32R32UI:
418 break;
419 case FORMAT_R8_SNORM:
420 case FORMAT_G8R8_SNORM:
421 case FORMAT_X8B8G8R8_SNORM:
422 case FORMAT_A8B8G8R8_SNORM:
423 case FORMAT_R8:
424 case FORMAT_R5G6B5:
425 case FORMAT_G8R8:
426 case FORMAT_G16R16:
427 case FORMAT_A16B16G16R16:
428 case FORMAT_X8R8G8B8:
429 case FORMAT_X8B8G8R8:
430 case FORMAT_A8R8G8B8:
431 case FORMAT_A8B8G8R8:
432 case FORMAT_SRGB8_X8:
433 case FORMAT_SRGB8_A8:
434 case FORMAT_V8U8:
435 case FORMAT_Q8W8V8U8:
436 case FORMAT_X8L8V8U8:
437 case FORMAT_V16U16:
438 case FORMAT_A16W16V16U16:
439 case FORMAT_Q16W16V16U16:
440 case FORMAT_YV12_BT601:
441 case FORMAT_YV12_BT709:
442 case FORMAT_YV12_JFIF:
443 if(componentCount < 2) c.y = Float4(defaultColorValue);
444 if(componentCount < 3) c.z = Float4(defaultColorValue);
445 if(componentCount < 4) c.w = Float4(1.0f);
446 break;
447 case FORMAT_A8:
448 c.w = c.x;
449 c.x = Float4(0.0f);
450 c.y = Float4(0.0f);
451 c.z = Float4(0.0f);
452 break;
453 case FORMAT_L8:
454 case FORMAT_L16:
455 c.y = c.x;
456 c.z = c.x;
457 c.w = Float4(1.0f);
458 break;
459 case FORMAT_A8L8:
460 c.w = c.y;
461 c.y = c.x;
462 c.z = c.x;
463 break;
464 case FORMAT_R32F:
465 c.y = Float4(defaultColorValue);
466 case FORMAT_G32R32F:
467 c.z = Float4(defaultColorValue);
468 case FORMAT_X32B32G32R32F:
469 case FORMAT_X32B32G32R32F_UNSIGNED:
470 c.w = Float4(1.0f);
471 case FORMAT_A32B32G32R32F:
472 break;
473 case FORMAT_D32F_LOCKABLE:
474 case FORMAT_D32FS8_TEXTURE:
475 case FORMAT_D32F_SHADOW:
476 case FORMAT_D32FS8_SHADOW:
477 c.y = Float4(0.0f);
478 c.z = Float4(0.0f);
479 c.w = Float4(1.0f);
480 break;
481 default:
482 ASSERT(false);
483 }
484 }
485
486 if((state.swizzleR != SWIZZLE_RED) ||
487 (state.swizzleG != SWIZZLE_GREEN) ||
488 (state.swizzleB != SWIZZLE_BLUE) ||
489 (state.swizzleA != SWIZZLE_ALPHA))
490 {
491 const Vector4f col(c);
492 applySwizzle(state.swizzleR, c.x, col);
493 applySwizzle(state.swizzleG, c.y, col);
494 applySwizzle(state.swizzleB, c.z, col);
495 applySwizzle(state.swizzleA, c.w, col);
496 }
497 }
498
499 return c;
500 }
501
502 Vector4f SamplerCore::textureSize(Pointer<Byte> &texture, Float4 &lod)
503 {
504 Vector4f size;
505
506 for(int i = 0; i < 4; ++i)
507 {
508 Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel));
509 Int index = Min(As<UInt>(As<Int>(Extract(lod, i)) + baseLevel), MIPMAP_LEVELS - 1);
510 Pointer<Byte> mipmap = texture + OFFSET(Texture, mipmap) + index * sizeof(Mipmap);
511 size.x = Insert(size.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
512 size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
513 size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
514 }
515
516 return size;
517 }
518
519 void SamplerCore::border(Short4 &mask, Float4 &coordinates)
520 {
521 Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
522 mask = As<Short4>(Int2(As<Int4>(PackSigned(border, border))));
523 }
524
525 void SamplerCore::border(Int4 &mask, Float4 &coordinates)
526 {
527 mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
528 }
529
530 Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
531 {
532 Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
533
534 if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
535 {
536 offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f)));
537 }
538 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
539 {
540 offset &= Short4(CmpLE(Float4(lod), Float4(0.0f)));
541 }
542
543 if(wrap)
544 {
545 switch(count)
546 {
547 case -1: return uvw - offset;
548 case 0: return uvw;
549 case +1: return uvw + offset;
550 case 2: return uvw + offset + offset;
551 }
552 }
553 else // Clamp or mirror
554 {
555 switch(count)
556 {
557 case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
558 case 0: return uvw;
559 case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
560 case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
561 }
562 }
563
564 return uvw;
565 }
566
567 Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
568 {
569 Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
570
571 if(function == Fetch)
572 {
573 return c;
574 }
575
576 if(state.mipmapFilter == MIPMAP_LINEAR)
577 {
578 Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
579
580 lod *= Float(1 << 16);
581
582 UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize
583 Short4 stri = utri >> 1; // FIXME: Optimize
584
585 if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
586 if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
587 if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
588 if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
589
590 utri = ~utri;
591 stri = Short4(0x7FFF) - stri;
592
593 if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
594 if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
595 if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
596 if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
597
598 c.x += cc.x;
599 c.y += cc.y;
600 c.z += cc.z;
601 c.w += cc.w;
602
603 if(!hasUnsignedTextureComponent(0)) c.x += c.x;
604 if(!hasUnsignedTextureComponent(1)) c.y += c.y;
605 if(!hasUnsignedTextureComponent(2)) c.z += c.z;
606 if(!hasUnsignedTextureComponent(3)) c.w += c.w;
607 }
608
609 Short4 borderMask;
610
611 if(state.addressingModeU == ADDRESSING_BORDER)
612 {
613 Short4 u0;
614
615 border(u0, u);
616
617 borderMask = u0;
618 }
619
620 if(state.addressingModeV == ADDRESSING_BORDER)
621 {
622 Short4 v0;
623
624 border(v0, v);
625
626 if(state.addressingModeU == ADDRESSING_BORDER)
627 {
628 borderMask &= v0;
629 }
630 else
631 {
632 borderMask = v0;
633 }
634 }
635
636 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
637 {
638 Short4 s0;
639
640 border(s0, w);
641
642 if(state.addressingModeU == ADDRESSING_BORDER ||
643 state.addressingModeV == ADDRESSING_BORDER)
644 {
645 borderMask &= s0;
646 }
647 else
648 {
649 borderMask = s0;
650 }
651 }
652
653 if(state.addressingModeU == ADDRESSING_BORDER ||
654 state.addressingModeV == ADDRESSING_BORDER ||
655 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
656 {
657 Short4 b;
658
659 c.x = (borderMask & c.x) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1)));
660 c.y = (borderMask & c.y) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1)));
661 c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1)));
662 c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1)));
663 }
664
665 return c;
666 }
667
668 Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
669 {
670 Vector4s c;
671
672 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
673 {
674 c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function);
675 }
676 else
677 {
678 Int a = RoundInt(anisotropy);
679
680 Vector4s cSum;
681
682 cSum.x = Short4(0);
683 cSum.y = Short4(0);
684 cSum.z = Short4(0);
685 cSum.w = Short4(0);
686
687 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
688 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
689 UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
690 Short4 sw = Short4(cw >> 1);
691
692 Float4 du = uDelta;
693 Float4 dv = vDelta;
694
695 Float4 u0 = u + B * du;
696 Float4 v0 = v + B * dv;
697
698 du *= A;
699 dv *= A;
700
701 Int i = 0;
702
703 Do
704 {
705 c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function);
706
707 u0 += du;
708 v0 += dv;
709
710 if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
711 if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
712 if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
713 if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
714
715 i++;
716 }
717 Until(i >= a)
718
719 if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
720 if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
721 if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
722 if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
723 }
724
725 return c;
726 }
727
728 Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
729 {
730 if(state.textureType != TEXTURE_3D)
731 {
732 return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function);
733 }
734 else
735 {
736 return sample3D(texture, u, v, w, offset, lod, secondLOD, function);
737 }
738 }
739
740 Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
741 {
742 Vector4s c;
743
744 int componentCount = textureComponentCount();
745 bool gather = state.textureFilter == FILTER_GATHER;
746
747 Pointer<Byte> mipmap;
748 Pointer<Byte> buffer[4];
749
750 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
751
752 bool texelFetch = (function == Fetch);
753
754 Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
755 Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
756 Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
757
758 if(state.textureFilter == FILTER_POINT || texelFetch)
759 {
760 c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
761 }
762 else
763 {
764 Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
765 Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
766 Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
767 Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
768
769 Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function);
770 Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function);
771 Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function);
772 Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function);
773
774 if(!gather) // Blend
775 {
776 // Fractions
777 UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
778 UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
779
780 UShort4 f1u = ~f0u;
781 UShort4 f1v = ~f0v;
782
783 UShort4 f0u0v = MulHigh(f0u, f0v);
784 UShort4 f1u0v = MulHigh(f1u, f0v);
785 UShort4 f0u1v = MulHigh(f0u, f1v);
786 UShort4 f1u1v = MulHigh(f1u, f1v);
787
788 // Signed fractions
789 Short4 f1u1vs;
790 Short4 f0u1vs;
791 Short4 f1u0vs;
792 Short4 f0u0vs;
793
794 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
795 {
796 f1u1vs = f1u1v >> 1;
797 f0u1vs = f0u1v >> 1;
798 f1u0vs = f1u0v >> 1;
799 f0u0vs = f0u0v >> 1;
800 }
801
802 // Bilinear interpolation
803 if(componentCount >= 1)
804 {
805 if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
806 {
807 c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u);
808 c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u);
809 c.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v);
810 }
811 else
812 {
813 if(hasUnsignedTextureComponent(0))
814 {
815 c0.x = MulHigh(As<UShort4>(c0.x), f1u1v);
816 c1.x = MulHigh(As<UShort4>(c1.x), f0u1v);
817 c2.x = MulHigh(As<UShort4>(c2.x), f1u0v);
818 c3.x = MulHigh(As<UShort4>(c3.x), f0u0v);
819 }
820 else
821 {
822 c0.x = MulHigh(c0.x, f1u1vs);
823 c1.x = MulHigh(c1.x, f0u1vs);
824 c2.x = MulHigh(c2.x, f1u0vs);
825 c3.x = MulHigh(c3.x, f0u0vs);
826 }
827
828 c.x = (c0.x + c1.x) + (c2.x + c3.x);
829 if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions
830 }
831 }
832
833 if(componentCount >= 2)
834 {
835 if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
836 {
837 c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u);
838 c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u);
839 c.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v);
840 }
841 else
842 {
843 if(hasUnsignedTextureComponent(1))
844 {
845 c0.y = MulHigh(As<UShort4>(c0.y), f1u1v);
846 c1.y = MulHigh(As<UShort4>(c1.y), f0u1v);
847 c2.y = MulHigh(As<UShort4>(c2.y), f1u0v);
848 c3.y = MulHigh(As<UShort4>(c3.y), f0u0v);
849 }
850 else
851 {
852 c0.y = MulHigh(c0.y, f1u1vs);
853 c1.y = MulHigh(c1.y, f0u1vs);
854 c2.y = MulHigh(c2.y, f1u0vs);
855 c3.y = MulHigh(c3.y, f0u0vs);
856 }
857
858 c.y = (c0.y + c1.y) + (c2.y + c3.y);
859 if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions
860 }
861 }
862
863 if(componentCount >= 3)
864 {
865 if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
866 {
867 c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u);
868 c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u);
869 c.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v);
870 }
871 else
872 {
873 if(hasUnsignedTextureComponent(2))
874 {
875 c0.z = MulHigh(As<UShort4>(c0.z), f1u1v);
876 c1.z = MulHigh(As<UShort4>(c1.z), f0u1v);
877 c2.z = MulHigh(As<UShort4>(c2.z), f1u0v);
878 c3.z = MulHigh(As<UShort4>(c3.z), f0u0v);
879 }
880 else
881 {
882 c0.z = MulHigh(c0.z, f1u1vs);
883 c1.z = MulHigh(c1.z, f0u1vs);
884 c2.z = MulHigh(c2.z, f1u0vs);
885 c3.z = MulHigh(c3.z, f0u0vs);
886 }
887
888 c.z = (c0.z + c1.z) + (c2.z + c3.z);
889 if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions
890 }
891 }
892
893 if(componentCount >= 4)
894 {
895 if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
896 {
897 c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u);
898 c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u);
899 c.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v);
900 }
901 else
902 {
903 if(hasUnsignedTextureComponent(3))
904 {
905 c0.w = MulHigh(As<UShort4>(c0.w), f1u1v);
906 c1.w = MulHigh(As<UShort4>(c1.w), f0u1v);
907 c2.w = MulHigh(As<UShort4>(c2.w), f1u0v);
908 c3.w = MulHigh(As<UShort4>(c3.w), f0u0v);
909 }
910 else
911 {
912 c0.w = MulHigh(c0.w, f1u1vs);
913 c1.w = MulHigh(c1.w, f0u1vs);
914 c2.w = MulHigh(c2.w, f1u0vs);
915 c3.w = MulHigh(c3.w, f0u0vs);
916 }
917
918 c.w = (c0.w + c1.w) + (c2.w + c3.w);
919 if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions
920 }
921 }
922 }
923 else
924 {
925 c.x = c1.x;
926 c.y = c2.x;
927 c.z = c3.x;
928 c.w = c0.x;
929 }
930 }
931
932 return c;
933 }
934
935 Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
936 {
937 Vector4s c_;
938
939 int componentCount = textureComponentCount();
940
941 Pointer<Byte> mipmap;
942 Pointer<Byte> buffer[4];
943 Int face[4];
944
945 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
946
947 bool texelFetch = (function == Fetch);
948
949 Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap);
950 Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap);
951 Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap);
952
953 if(state.textureFilter == FILTER_POINT || texelFetch)
954 {
955 c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
956 }
957 else
958 {
959 Vector4s c[2][2][2];
960
961 Short4 u[2][2][2];
962 Short4 v[2][2][2];
963 Short4 s[2][2][2];
964
965 for(int i = 0; i < 2; i++)
966 {
967 for(int j = 0; j < 2; j++)
968 {
969 for(int k = 0; k < 2; k++)
970 {
971 u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
972 v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
973 s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
974 }
975 }
976 }
977
978 // Fractions
979 UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
980 UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
981 UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth));
982
983 UShort4 f1u = ~f0u;
984 UShort4 f1v = ~f0v;
985 UShort4 f1s = ~f0s;
986
987 UShort4 f[2][2][2];
988 Short4 fs[2][2][2];
989
990 f[1][1][1] = MulHigh(f1u, f1v);
991 f[0][1][1] = MulHigh(f0u, f1v);
992 f[1][0][1] = MulHigh(f1u, f0v);
993 f[0][0][1] = MulHigh(f0u, f0v);
994 f[1][1][0] = MulHigh(f1u, f1v);
995 f[0][1][0] = MulHigh(f0u, f1v);
996 f[1][0][0] = MulHigh(f1u, f0v);
997 f[0][0][0] = MulHigh(f0u, f0v);
998
999 f[1][1][1] = MulHigh(f[1][1][1], f1s);
1000 f[0][1][1] = MulHigh(f[0][1][1], f1s);
1001 f[1][0][1] = MulHigh(f[1][0][1], f1s);
1002 f[0][0][1] = MulHigh(f[0][0][1], f1s);
1003 f[1][1][0] = MulHigh(f[1][1][0], f0s);
1004 f[0][1][0] = MulHigh(f[0][1][0], f0s);
1005 f[1][0][0] = MulHigh(f[1][0][0], f0s);
1006 f[0][0][0] = MulHigh(f[0][0][0], f0s);
1007
1008 // Signed fractions
1009 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
1010 {
1011 fs[0][0][0] = f[0][0][0] >> 1;
1012 fs[0][0][1] = f[0][0][1] >> 1;
1013 fs[0][1][0] = f[0][1][0] >> 1;
1014 fs[0][1][1] = f[0][1][1] >> 1;
1015 fs[1][0][0] = f[1][0][0] >> 1;
1016 fs[1][0][1] = f[1][0][1] >> 1;
1017 fs[1][1][0] = f[1][1][0] >> 1;
1018 fs[1][1][1] = f[1][1][1] >> 1;
1019 }
1020
1021 for(int i = 0; i < 2; i++)
1022 {
1023 for(int j = 0; j < 2; j++)
1024 {
1025 for(int k = 0; k < 2; k++)
1026 {
1027 c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function);
1028
1029 if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
1030 if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
1031 if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
1032 if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
1033
1034 if(i != 0 || j != 0 || k != 0)
1035 {
1036 if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x;
1037 if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y;
1038 if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z;
1039 if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w;
1040 }
1041 }
1042 }
1043 }
1044
1045 if(componentCount >= 1) c_.x = c[0][0][0].x;
1046 if(componentCount >= 2) c_.y = c[0][0][0].y;
1047 if(componentCount >= 3) c_.z = c[0][0][0].z;
1048 if(componentCount >= 4) c_.w = c[0][0][0].w;
1049
1050 // Correct for signed fractions
1051 if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
1052 if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
1053 if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
1054 if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
1055 }
1056
1057 return c_;
1058 }
1059
1060 Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
1061 {
1062 Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
1063
1064 if(function == Fetch)
1065 {
1066 return c;
1067 }
1068
1069 if(state.mipmapFilter == MIPMAP_LINEAR)
1070 {
1071 Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
1072
1073 Float4 lod4 = Float4(Frac(lod));
1074
1075 c.x = (cc.x - c.x) * lod4 + c.x;
1076 c.y = (cc.y - c.y) * lod4 + c.y;
1077 c.z = (cc.z - c.z) * lod4 + c.z;
1078 c.w = (cc.w - c.w) * lod4 + c.w;
1079 }
1080
1081 Int4 borderMask;
1082
1083 if(state.addressingModeU == ADDRESSING_BORDER)
1084 {
1085 Int4 u0;
1086
1087 border(u0, u);
1088
1089 borderMask = u0;
1090 }
1091
1092 if(state.addressingModeV == ADDRESSING_BORDER)
1093 {
1094 Int4 v0;
1095
1096 border(v0, v);
1097
1098 if(state.addressingModeU == ADDRESSING_BORDER)
1099 {
1100 borderMask &= v0;
1101 }
1102 else
1103 {
1104 borderMask = v0;
1105 }
1106 }
1107
1108 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
1109 {
1110 Int4 s0;
1111
1112 border(s0, w);
1113
1114 if(state.addressingModeU == ADDRESSING_BORDER ||
1115 state.addressingModeV == ADDRESSING_BORDER)
1116 {
1117 borderMask &= s0;
1118 }
1119 else
1120 {
1121 borderMask = s0;
1122 }
1123 }
1124
1125 if(state.addressingModeU == ADDRESSING_BORDER ||
1126 state.addressingModeV == ADDRESSING_BORDER ||
1127 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
1128 {
1129 Int4 b;
1130
1131 c.x = As<Float4>((borderMask & As<Int4>(c.x)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0]))));
1132 c.y = As<Float4>((borderMask & As<Int4>(c.y)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1]))));
1133 c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2]))));
1134 c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3]))));
1135 }
1136
1137 return c;
1138 }
1139
1140 Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
1141 {
1142 Vector4f c;
1143
1144 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
1145 {
1146 c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function);
1147 }
1148 else
1149 {
1150 Int a = RoundInt(anisotropy);
1151
1152 Vector4f cSum;
1153
1154 cSum.x = Float4(0.0f);
1155 cSum.y = Float4(0.0f);
1156 cSum.z = Float4(0.0f);
1157 cSum.w = Float4(0.0f);
1158
1159 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
1160 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
1161
1162 Float4 du = uDelta;
1163 Float4 dv = vDelta;
1164
1165 Float4 u0 = u + B * du;
1166 Float4 v0 = v + B * dv;
1167
1168 du *= A;
1169 dv *= A;
1170
1171 Int i = 0;
1172
1173 Do
1174 {
1175 c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function);
1176
1177 u0 += du;
1178 v0 += dv;
1179
1180 cSum.x += c.x * A;
1181 cSum.y += c.y * A;
1182 cSum.z += c.z * A;
1183 cSum.w += c.w * A;
1184
1185 i++;
1186 }
1187 Until(i >= a)
1188
1189 c.x = cSum.x;
1190 c.y = cSum.y;
1191 c.z = cSum.z;
1192 c.w = cSum.w;
1193 }
1194
1195 return c;
1196 }
1197
1198 Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
1199 {
1200 if(state.textureType != TEXTURE_3D)
1201 {
1202 return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function);
1203 }
1204 else
1205 {
1206 return sampleFloat3D(texture, u, v, w, offset, lod, secondLOD, function);
1207 }
1208 }
1209
1210 Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
1211 {
1212 Vector4f c;
1213
1214 int componentCount = textureComponentCount();
1215 bool gather = state.textureFilter == FILTER_GATHER;
1216
1217 Pointer<Byte> mipmap;
1218 Pointer<Byte> buffer[4];
1219
1220 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
1221
1222 Int4 x0, x1, y0, y1, z0;
1223 Float4 fu, fv;
1224 Int4 filter = computeFilterOffset(lod);
1225 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
1226 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
1227 address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
1228
1229 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
1230 y0 *= pitchP;
1231 if(hasThirdCoordinate())
1232 {
1233 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
1234 z0 *= sliceP;
1235 }
1236
1237 if(state.textureFilter == FILTER_POINT || (function == Fetch))
1238 {
1239 c = sampleTexel(x0, y0, z0, q, mipmap, buffer, function);
1240 }
1241 else
1242 {
1243 y1 *= pitchP;
1244
1245 Vector4f c0 = sampleTexel(x0, y0, z0, q, mipmap, buffer, function);
1246 Vector4f c1 = sampleTexel(x1, y0, z0, q, mipmap, buffer, function);
1247 Vector4f c2 = sampleTexel(x0, y1, z0, q, mipmap, buffer, function);
1248 Vector4f c3 = sampleTexel(x1, y1, z0, q, mipmap, buffer, function);
1249
1250 if(!gather) // Blend
1251 {
1252 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
1253 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
1254 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
1255 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
1256
1257 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
1258 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
1259 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
1260 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
1261
1262 if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x);
1263 if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y);
1264 if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z);
1265 if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w);
1266 }
1267 else
1268 {
1269 c.x = c1.x;
1270 c.y = c2.x;
1271 c.z = c3.x;
1272 c.w = c0.x;
1273 }
1274 }
1275
1276 return c;
1277 }
1278
1279 Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
1280 {
1281 Vector4f c;
1282
1283 int componentCount = textureComponentCount();
1284
1285 Pointer<Byte> mipmap;
1286 Pointer<Byte> buffer[4];
1287 Int face[4];
1288
1289 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
1290
1291 Int4 x0, x1, y0, y1, z0, z1;
1292 Float4 fu, fv, fw;
1293 Int4 filter = computeFilterOffset(lod);
1294 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
1295 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
1296 address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
1297
1298 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
1299 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
1300 y0 *= pitchP;
1301 z0 *= sliceP;
1302
1303 if(state.textureFilter == FILTER_POINT || (function == Fetch))
1304 {
1305 c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
1306 }
1307 else
1308 {
1309 y1 *= pitchP;
1310 z1 *= sliceP;
1311
1312 Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
1313 Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
1314 Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
1315 Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
1316 Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function);
1317 Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function);
1318 Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function);
1319 Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function);
1320
1321 // Blend first slice
1322 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
1323 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
1324 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
1325 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
1326
1327 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
1328 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
1329 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
1330 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
1331
1332 if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x);
1333 if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y);
1334 if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z);
1335 if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w);
1336
1337 // Blend second slice
1338 if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x);
1339 if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y);
1340 if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z);
1341 if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w);
1342
1343 if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x);
1344 if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y);
1345 if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z);
1346 if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w);
1347
1348 if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x);
1349 if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y);
1350 if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z);
1351 if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w);
1352
1353 // Blend slices
1354 if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x);
1355 if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y);
1356 if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z);
1357 if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w);
1358 }
1359
1360 return c;
1361 }
1362
1363 Float SamplerCore::log2sqrt(Float lod)
1364 {
1365 // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2).
1366 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
1367 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
1368 lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length).
1369
1370 return lod;
1371 }
1372
1373 Float SamplerCore::log2(Float lod)
1374 {
1375 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
1376 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
1377 lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length).
1378
1379 return lod;
1380 }
1381
1382 void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
1383 {
1384 if(function != Lod && function != Fetch)
1385 {
1386 Float4 duvdxy;
1387
1388 if(function != Grad) // Implicit
1389 {
1390 duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
1391 }
1392 else
1393 {
1394 Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
1395 Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
1396
1397 duvdxy = Float4(dudxy.xz, dvdxy.xz);
1398 }
1399
1400 // Scale by texture dimensions and global LOD.
1401 Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD));
1402
1403 Float4 dUV2dxy = dUVdxy * dUVdxy;
1404 Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
1405
1406 lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis
1407
1408 if(state.textureFilter == FILTER_ANISOTROPIC)
1409 {
1410 Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z));
1411
1412 Float4 dudx = duvdxy.xxxx;
1413 Float4 dudy = duvdxy.yyyy;
1414 Float4 dvdx = duvdxy.zzzz;
1415 Float4 dvdy = duvdxy.wwww;
1416
1417 Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y));
1418 uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask)));
1419 vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
1420
1421 anisotropy = lod * Rcp_pp(det);
1422 anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy)));
1423
1424 lod *= Rcp_pp(anisotropy * anisotropy);
1425 }
1426
1427 lod = log2sqrt(lod); // log2(sqrt(lod))
1428
1429 if(function == Bias)
1430 {
1431 lod += lodBias;
1432 }
1433 }
1434 else if(function == Lod)
1435 {
1436 lod = lodBias;
1437 }
1438 else if(function == Fetch)
1439 {
1440 // TODO: Eliminate int-float-int conversion.
1441 lod = Float(As<Int>(lodBias));
1442 }
1443 else if(function == Base)
1444 {
1445 lod = Float(0);
1446 }
1447 else assert(false);
1448
1449 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
1450 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
1451 }
1452
1453 void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
1454 {
1455 if(function != Lod && function != Fetch)
1456 {
1457 Float4 dudxy, dvdxy, dsdxy;
1458
1459 if(function != Grad) // Implicit
1460 {
1461 Float4 U = u * M;
1462 Float4 V = v * M;
1463 Float4 W = w * M;
1464
1465 dudxy = Abs(U - U.xxxx);
1466 dvdxy = Abs(V - V.xxxx);
1467 dsdxy = Abs(W - W.xxxx);
1468 }
1469 else
1470 {
1471 dudxy = Float4(dsx.x.xx, dsy.x.xx);
1472 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
1473 dsdxy = Float4(dsx.z.xx, dsy.z.xx);
1474
1475 dudxy = Abs(dudxy * Float4(M.x));
1476 dvdxy = Abs(dvdxy * Float4(M.x));
1477 dsdxy = Abs(dsdxy * Float4(M.x));
1478 }
1479
1480 // Compute the largest Manhattan distance in two dimensions.
1481 // This takes the footprint across adjacent faces into account.
1482 Float4 duvdxy = dudxy + dvdxy;
1483 Float4 dusdxy = dudxy + dsdxy;
1484 Float4 dvsdxy = dvdxy + dsdxy;
1485
1486 dudxy = Max(Max(duvdxy, dusdxy), dvsdxy);
1487
1488 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
1489
1490 // Scale by texture dimension and global LOD.
1491 lod *= *Pointer<Float>(texture + OFFSET(Texture,widthLOD));
1492
1493 lod = log2(lod);
1494
1495 if(function == Bias)
1496 {
1497 lod += lodBias;
1498 }
1499 }
1500 else if(function == Lod)
1501 {
1502 lod = lodBias;
1503 }
1504 else if(function == Fetch)
1505 {
1506 // TODO: Eliminate int-float-int conversion.
1507 lod = Float(As<Int>(lodBias));
1508 }
1509 else if(function == Base)
1510 {
1511 lod = Float(0);
1512 }
1513 else assert(false);
1514
1515 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
1516 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
1517 }
1518
1519 void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
1520 {
1521 if(function != Lod && function != Fetch)
1522 {
1523 Float4 dudxy, dvdxy, dsdxy;
1524
1525 if(function != Grad) // Implicit
1526 {
1527 dudxy = uuuu - uuuu.xxxx;
1528 dvdxy = vvvv - vvvv.xxxx;
1529 dsdxy = wwww - wwww.xxxx;
1530 }
1531 else
1532 {
1533 dudxy = Float4(dsx.x.xx, dsy.x.xx);
1534 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
1535 dsdxy = Float4(dsx.z.xx, dsy.z.xx);
1536 }
1537
1538 // Scale by texture dimensions and global LOD.
1539 dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
1540 dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD));
1541 dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD));
1542
1543 dudxy *= dudxy;
1544 dvdxy *= dvdxy;
1545 dsdxy *= dsdxy;
1546
1547 dudxy += dvdxy;
1548 dudxy += dsdxy;
1549
1550 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
1551
1552 lod = log2sqrt(lod); // log2(sqrt(lod))
1553
1554 if(function == Bias)
1555 {
1556 lod += lodBias;
1557 }
1558 }
1559 else if(function == Lod)
1560 {
1561 lod = lodBias;
1562 }
1563 else if(function == Fetch)
1564 {
1565 // TODO: Eliminate int-float-int conversion.
1566 lod = Float(As<Int>(lodBias));
1567 }
1568 else if(function == Base)
1569 {
1570 lod = Float(0);
1571 }
1572 else assert(false);
1573
1574 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
1575 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
1576 }
1577
1578 void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
1579 {
1580 Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0
1581 Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0
1582 Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0
1583
1584 Float4 absX = Abs(x);
1585 Float4 absY = Abs(y);
1586 Float4 absZ = Abs(z);
1587
1588 Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y)
1589 Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z)
1590 Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x)
1591 Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z)
1592 Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x)
1593 Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y)
1594
1595 // FACE_POSITIVE_X = 000b
1596 // FACE_NEGATIVE_X = 001b
1597 // FACE_POSITIVE_Y = 010b
1598 // FACE_NEGATIVE_Y = 011b
1599 // FACE_POSITIVE_Z = 100b
1600 // FACE_NEGATIVE_Z = 101b
1601
1602 Int yAxis = SignMask(yMajor);
1603 Int zAxis = SignMask(zMajor);
1604
1605 Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
1606 Int negative = SignMask(n);
1607
1608 face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
1609 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
1610 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
1611 face[1] = (face[0] >> 4) & 0x7;
1612 face[2] = (face[0] >> 8) & 0x7;
1613 face[3] = (face[0] >> 12) & 0x7;
1614 face[0] &= 0x7;
1615
1616 M = Max(Max(absX, absY), absZ);
1617
1618 // U = xMajor ? (neg ^ -z) : ((zMajor & neg) ^ x)
1619 U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x))));
1620
1621 // V = !yMajor ? -y : (n ^ z)
1622 V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z))));
1623
1624 M = reciprocal(M) * Float4(0.5f);
1625 U = U * M + Float4(0.5f);
1626 V = V * M + Float4(0.5f);
1627 }
1628
1629 Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode)
1630 {
1631 Int4 tmp = Int4(As<UShort4>(uvw));
1632 tmp = tmp + As<Int4>(offset);
1633
1634 switch(mode)
1635 {
1636 case AddressingMode::ADDRESSING_WRAP:
1637 tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd;
1638 break;
1639 case AddressingMode::ADDRESSING_CLAMP:
1640 case AddressingMode::ADDRESSING_MIRROR:
1641 case AddressingMode::ADDRESSING_MIRRORONCE:
1642 case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
1643 tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
1644 break;
1645 case ADDRESSING_TEXELFETCH:
1646 break;
1647 case AddressingMode::ADDRESSING_SEAMLESS:
1648 ASSERT(false); // Cube sampling doesn't support offset.
1649 default:
1650 ASSERT(false);
1651 }
1652
1653 return As<Short4>(UShort4(tmp));
1654 }
1655
1656 void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function)
1657 {
1658 bool texelFetch = (function == Fetch);
1659 bool hasOffset = (function.option == Offset);
1660
1661 if(!texelFetch)
1662 {
1663 uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width)));
1664 vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height)));
1665 }
1666
1667 if(hasOffset)
1668 {
1669 UShort4 w = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width));
1670 uuuu = applyOffset(uuuu, offset.x, Int4(w), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU);
1671 UShort4 h = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height));
1672 vvvv = applyOffset(vvvv, offset.y, Int4(h), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
1673 }
1674
1675 Short4 uuu2 = uuuu;
1676 uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
1677 uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
1678 uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
1679 uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
1680
1681 if(hasThirdCoordinate())
1682 {
1683 if(state.textureType != TEXTURE_2D_ARRAY)
1684 {
1685 if(!texelFetch)
1686 {
1687 wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)));
1688 }
1689
1690 if(hasOffset)
1691 {
1692 UShort4 d = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth));
1693 wwww = applyOffset(wwww, offset.z, Int4(d), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW);
1694 }
1695 }
1696
1697 UInt4 uv(As<UInt2>(uuuu), As<UInt2>(uuu2));
1698 uv += As<UInt4>(Int4(As<UShort4>(wwww))) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP));
1699
1700 index[0] = Extract(As<Int4>(uv), 0);
1701 index[1] = Extract(As<Int4>(uv), 1);
1702 index[2] = Extract(As<Int4>(uv), 2);
1703 index[3] = Extract(As<Int4>(uv), 3);
1704 }
1705 else
1706 {
1707 index[0] = Extract(As<Int2>(uuuu), 0);
1708 index[1] = Extract(As<Int2>(uuuu), 1);
1709 index[2] = Extract(As<Int2>(uuu2), 0);
1710 index[3] = Extract(As<Int2>(uuu2), 1);
1711 }
1712
1713 if(texelFetch)
1714 {
1715 Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP)));
1716 if(hasThirdCoordinate())
1717 {
1718 size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
1719 }
1720 UInt min = 0;
1721 UInt max = size - 1;
1722
1723 for(int i = 0; i < 4; i++)
1724 {
1725 index[i] = Min(Max(index[i], min), max);
1726 }
1727 }
1728 }
1729
1730 void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function)
1731 {
1732 UInt4 indices = uuuu + vvvv;
1733
1734 if(hasThirdCoordinate())
1735 {
1736 indices += As<UInt4>(wwww);
1737 }
1738
1739 for(int i = 0; i < 4; i++)
1740 {
1741 index[i] = Extract(As<Int4>(indices), i);
1742 }
1743 }
1744
1745 Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
1746 {
1747 Vector4s c;
1748
1749 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
1750 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
1751 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
1752 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
1753
1754 if(has16bitTextureFormat())
1755 {
1756 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1757 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1758 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1759 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1760
1761 switch(state.textureFormat)
1762 {
1763 case FORMAT_R5G6B5:
1764 c.z = (c.x & Short4(0x001Fu)) << 11;
1765 c.y = (c.x & Short4(0x07E0u)) << 5;
1766 c.x = (c.x & Short4(0xF800u));
1767 break;
1768 default:
1769 ASSERT(false);
1770 }
1771 }
1772 else if(has8bitTextureComponents())
1773 {
1774 switch(textureComponentCount())
1775 {
1776 case 4:
1777 {
1778 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
1779 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
1780 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
1781 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
1782 c.x = Unpack(c0, c1);
1783 c.y = Unpack(c2, c3);
1784
1785 switch(state.textureFormat)
1786 {
1787 case FORMAT_A8R8G8B8:
1788 c.z = As<Short4>(UnpackLow(c.x, c.y));
1789 c.x = As<Short4>(UnpackHigh(c.x, c.y));
1790 c.y = c.z;
1791 c.w = c.x;
1792 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1793 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1794 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1795 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
1796 break;
1797 case FORMAT_A8B8G8R8:
1798 case FORMAT_A8B8G8R8I:
1799 case FORMAT_A8B8G8R8_SNORM:
1800 case FORMAT_Q8W8V8U8:
1801 case FORMAT_SRGB8_A8:
1802 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1803 c.x = As<Short4>(UnpackLow(c.x, c.y));
1804 c.y = c.x;
1805 c.w = c.z;
1806 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1807 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1808 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1809 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
1810 // Propagate sign bit
1811 if(state.textureFormat == FORMAT_A8B8G8R8I)
1812 {
1813 c.x >>= 8;
1814 c.y >>= 8;
1815 c.z >>= 8;
1816 c.w >>= 8;
1817 }
1818 break;
1819 case FORMAT_A8B8G8R8UI:
1820 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1821 c.x = As<Short4>(UnpackLow(c.x, c.y));
1822 c.y = c.x;
1823 c.w = c.z;
1824 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
1825 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
1826 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
1827 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
1828 break;
1829 default:
1830 ASSERT(false);
1831 }
1832 }
1833 break;
1834 case 3:
1835 {
1836 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
1837 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
1838 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
1839 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
1840 c.x = Unpack(c0, c1);
1841 c.y = Unpack(c2, c3);
1842
1843 switch(state.textureFormat)
1844 {
1845 case FORMAT_X8R8G8B8:
1846 c.z = As<Short4>(UnpackLow(c.x, c.y));
1847 c.x = As<Short4>(UnpackHigh(c.x, c.y));
1848 c.y = c.z;
1849 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1850 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1851 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1852 break;
1853 case FORMAT_X8B8G8R8_SNORM:
1854 case FORMAT_X8B8G8R8I:
1855 case FORMAT_X8B8G8R8:
1856 case FORMAT_X8L8V8U8:
1857 case FORMAT_SRGB8_X8:
1858 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1859 c.x = As<Short4>(UnpackLow(c.x, c.y));
1860 c.y = c.x;
1861 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1862 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1863 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1864 // Propagate sign bit
1865 if(state.textureFormat == FORMAT_X8B8G8R8I)
1866 {
1867 c.x >>= 8;
1868 c.y >>= 8;
1869 c.z >>= 8;
1870 }
1871 break;
1872 case FORMAT_X8B8G8R8UI:
1873 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1874 c.x = As<Short4>(UnpackLow(c.x, c.y));
1875 c.y = c.x;
1876 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
1877 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
1878 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
1879 break;
1880 default:
1881 ASSERT(false);
1882 }
1883 }
1884 break;
1885 case 2:
1886 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1887 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1888 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1889 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1890
1891 switch(state.textureFormat)
1892 {
1893 case FORMAT_G8R8:
1894 case FORMAT_G8R8_SNORM:
1895 case FORMAT_V8U8:
1896 case FORMAT_A8L8:
1897 c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8);
1898 c.x = (c.x & Short4(0x00FFu)) | (c.x << 8);
1899 break;
1900 case FORMAT_G8R8I:
1901 c.y = c.x >> 8;
1902 c.x = (c.x << 8) >> 8; // Propagate sign bit
1903 break;
1904 case FORMAT_G8R8UI:
1905 c.y = As<Short4>(As<UShort4>(c.x) >> 8);
1906 c.x &= Short4(0x00FFu);
1907 break;
1908 default:
1909 ASSERT(false);
1910 }
1911 break;
1912 case 1:
1913 {
1914 Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
1915 Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
1916 Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
1917 Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
1918 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1919
1920 switch(state.textureFormat)
1921 {
1922 case FORMAT_R8I:
1923 case FORMAT_R8UI:
1924 {
1925 Int zero(0);
1926 c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
1927 // Propagate sign bit
1928 if(state.textureFormat == FORMAT_R8I)
1929 {
1930 c.x = (c.x << 8) >> 8;
1931 }
1932 }
1933 break;
1934 default:
1935 c.x = Unpack(As<Byte4>(c0));
1936 break;
1937 }
1938 }
1939 break;
1940 default:
1941 ASSERT(false);
1942 }
1943 }
1944 else if(has16bitTextureComponents())
1945 {
1946 switch(textureComponentCount())
1947 {
1948 case 4:
1949 c.x = Pointer<Short4>(buffer[f0])[index[0]];
1950 c.y = Pointer<Short4>(buffer[f1])[index[1]];
1951 c.z = Pointer<Short4>(buffer[f2])[index[2]];
1952 c.w = Pointer<Short4>(buffer[f3])[index[3]];
1953 transpose4x4(c.x, c.y, c.z, c.w);
1954 break;
1955 case 3:
1956 c.x = Pointer<Short4>(buffer[f0])[index[0]];
1957 c.y = Pointer<Short4>(buffer[f1])[index[1]];
1958 c.z = Pointer<Short4>(buffer[f2])[index[2]];
1959 c.w = Pointer<Short4>(buffer[f3])[index[3]];
1960 transpose4x3(c.x, c.y, c.z, c.w);
1961 break;
1962 case 2:
1963 c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
1964 c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
1965 c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
1966 c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
1967 c.y = c.x;
1968 c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
1969 c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
1970 break;
1971 case 1:
1972 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1973 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1974 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1975 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1976 break;
1977 default:
1978 ASSERT(false);
1979 }
1980 }
1981 else ASSERT(false);
1982
1983 if(state.sRGB)
1984 {
1985 if(state.textureFormat == FORMAT_R5G6B5)
1986 {
1987 sRGBtoLinear16_5_16(c.x);
1988 sRGBtoLinear16_6_16(c.y);
1989 sRGBtoLinear16_5_16(c.z);
1990 }
1991 else
1992 {
1993 for(int i = 0; i < textureComponentCount(); i++)
1994 {
1995 if(isRGBComponent(i))
1996 {
1997 sRGBtoLinear16_8_16(c[i]);
1998 }
1999 }
2000 }
2001 }
2002
2003 return c;
2004 }
2005
2006 Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
2007 {
2008 Vector4s c;
2009
2010 UInt index[4];
2011 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
2012
2013 if(hasYuvFormat())
2014 {
2015 // Generic YPbPr to RGB transformation
2016 // R = Y + 2 * (1 - Kr) * Pr
2017 // G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr
2018 // B = Y + 2 * (1 - Kb) * Pb
2019
2020 float Kb = 0.114f;
2021 float Kr = 0.299f;
2022 int studioSwing = 1;
2023
2024 switch(state.textureFormat)
2025 {
2026 case FORMAT_YV12_BT601:
2027 Kb = 0.114f;
2028 Kr = 0.299f;
2029 studioSwing = 1;
2030 break;
2031 case FORMAT_YV12_BT709:
2032 Kb = 0.0722f;
2033 Kr = 0.2126f;
2034 studioSwing = 1;
2035 break;
2036 case FORMAT_YV12_JFIF:
2037 Kb = 0.114f;
2038 Kr = 0.299f;
2039 studioSwing = 0;
2040 break;
2041 default:
2042 ASSERT(false);
2043 }
2044
2045 const float Kg = 1.0f - Kr - Kb;
2046
2047 const float Rr = 2 * (1 - Kr);
2048 const float Gb = -2 * Kb * (1 - Kb) / Kg;
2049 const float Gr = -2 * Kr * (1 - Kr) / Kg;
2050 const float Bb = 2 * (1 - Kb);
2051
2052 // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
2053 const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f;
2054 const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f;
2055 const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f;
2056
2057 const float Rv = Vv * Rr;
2058 const float Gu = Uu * Gb;
2059 const float Gv = Vv * Gr;
2060 const float Bu = Uu * Bb;
2061
2062 const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255;
2063 const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
2064 const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
2065
2066 Int c0 = Int(buffer[0][index[0]]);
2067 Int c1 = Int(buffer[0][index[1]]);
2068 Int c2 = Int(buffer[0][index[2]]);
2069 Int c3 = Int(buffer[0][index[3]]);
2070 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
2071 UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
2072
2073 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
2074 c0 = Int(buffer[1][index[0]]);
2075 c1 = Int(buffer[1][index[1]]);
2076 c2 = Int(buffer[1][index[2]]);
2077 c3 = Int(buffer[1][index[3]]);
2078 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
2079 UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
2080
2081 c0 = Int(buffer[2][index[0]]);
2082 c1 = Int(buffer[2][index[1]]);
2083 c2 = Int(buffer[2][index[2]]);
2084 c3 = Int(buffer[2][index[3]]);
2085 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
2086 UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
2087
2088 const UShort4 yY = UShort4(iround(Yy * 0x4000));
2089 const UShort4 rV = UShort4(iround(Rv * 0x4000));
2090 const UShort4 gU = UShort4(iround(-Gu * 0x4000));
2091 const UShort4 gV = UShort4(iround(-Gv * 0x4000));
2092 const UShort4 bU = UShort4(iround(Bu * 0x4000));
2093
2094 const UShort4 r0 = UShort4(iround(-R0 * 0x4000));
2095 const UShort4 g0 = UShort4(iround(G0 * 0x4000));
2096 const UShort4 b0 = UShort4(iround(-B0 * 0x4000));
2097
2098 UShort4 y = MulHigh(Y, yY);
2099 UShort4 r = SubSat(y + MulHigh(V, rV), r0);
2100 UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV));
2101 UShort4 b = SubSat(y + MulHigh(U, bU), b0);
2102
2103 c.x = Min(r, UShort4(0x3FFF)) << 2;
2104 c.y = Min(g, UShort4(0x3FFF)) << 2;
2105 c.z = Min(b, UShort4(0x3FFF)) << 2;
2106 }
2107 else
2108 {
2109 return sampleTexel(index, buffer);
2110 }
2111
2112 return c;
2113 }
2114
2115 Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
2116 {
2117 Vector4f c;
2118
2119 UInt index[4];
2120 computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
2121
2122 if(hasFloatTexture() || has32bitIntegerTextureComponents())
2123 {
2124 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
2125 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
2126 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
2127 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
2128
2129 // Read texels
2130 switch(textureComponentCount())
2131 {
2132 case 4:
2133 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
2134 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
2135 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
2136 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
2137 transpose4x4(c.x, c.y, c.z, c.w);
2138 break;
2139 case 3:
2140 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
2141 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
2142 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
2143 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
2144 transpose4x3(c.x, c.y, c.z, c.w);
2145 break;
2146 case 2:
2147 // FIXME: Optimal shuffling?
2148 c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
2149 c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
2150 c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
2151 c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
2152 c.y = c.x;
2153 c.x = Float4(c.x.xz, c.z.xz);
2154 c.y = Float4(c.y.yw, c.z.yw);
2155 break;
2156 case 1:
2157 // FIXME: Optimal shuffling?
2158 c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
2159 c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
2160 c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
2161 c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
2162 break;
2163 default:
2164 ASSERT(false);
2165 }
2166
2167 if(state.compare != COMPARE_BYPASS)
2168 {
2169 Float4 ref = z;
2170
2171 if(!hasFloatTexture())
2172 {
2173 ref = Min(Max(ref, Float4(0.0f)), Float4(1.0f));
2174 }
2175
2176 Int4 boolean;
2177
2178 switch(state.compare)
2179 {
2180 case COMPARE_LESSEQUAL: boolean = CmpLE(ref, c.x); break;
2181 case COMPARE_GREATEREQUAL: boolean = CmpNLT(ref, c.x); break;
2182 case COMPARE_LESS: boolean = CmpLT(ref, c.x); break;
2183 case COMPARE_GREATER: boolean = CmpNLE(ref, c.x); break;
2184 case COMPARE_EQUAL: boolean = CmpEQ(ref, c.x); break;
2185 case COMPARE_NOTEQUAL: boolean = CmpNEQ(ref, c.x); break;
2186 case COMPARE_ALWAYS: boolean = Int4(-1); break;
2187 case COMPARE_NEVER: boolean = Int4(0); break;
2188 default: ASSERT(false);
2189 }
2190
2191 c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f)));
2192 c.y = Float4(0.0f);
2193 c.z = Float4(0.0f);
2194 c.w = Float4(1.0f);
2195 }
2196 }
2197 else
2198 {
2199 ASSERT(!hasYuvFormat());
2200
2201 Vector4s cs = sampleTexel(index, buffer);
2202
2203 bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat);
2204 int componentCount = textureComponentCount();
2205 for(int n = 0; n < componentCount; n++)
2206 {
2207 if(hasUnsignedTextureComponent(n))
2208 {
2209 if(isInteger)
2210 {
2211 c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
2212 }
2213 else
2214 {
2215 c[n] = Float4(As<UShort4>(cs[n]));
2216 }
2217 }
2218 else
2219 {
2220 if(isInteger)
2221 {
2222 c[n] = As<Float4>(Int4(cs[n]));
2223 }
2224 else
2225 {
2226 c[n] = Float4(cs[n]);
2227 }
2228 }
2229 }
2230 }
2231
2232 return c;
2233 }
2234
2235 void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
2236 {
2237 if(state.mipmapFilter == MIPMAP_NONE)
2238 {
2239 mipmap = texture + OFFSET(Texture,mipmap[0]);
2240 }
2241 else
2242 {
2243 Int ilod;
2244
2245 if(state.mipmapFilter == MIPMAP_POINT)
2246 {
2247 ilod = RoundInt(lod);
2248 }
2249 else // MIPMAP_LINEAR
2250 {
2251 ilod = Int(lod);
2252 }
2253
2254 mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
2255 }
2256
2257 if(state.textureType != TEXTURE_CUBE)
2258 {
2259 buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0]));
2260
2261 if(hasYuvFormat())
2262 {
2263 buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1]));
2264 buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2]));
2265 }
2266 }
2267 else
2268 {
2269 for(int i = 0; i < 4; i++)
2270 {
2271 buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
2272 }
2273 }
2274 }
2275
2276 Int4 SamplerCore::computeFilterOffset(Float &lod)
2277 {
2278 Int4 filter = -1;
2279
2280 if(state.textureFilter == FILTER_POINT)
2281 {
2282 filter = 0;
2283 }
2284 else if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
2285 {
2286 filter = CmpNLE(Float4(lod), Float4(0.0f));
2287 }
2288 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
2289 {
2290 filter = CmpLE(Float4(lod), Float4(0.0f));
2291 }
2292
2293 return filter;
2294 }
2295
2296 Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap)
2297 {
2298 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
2299 {
2300 return Short4(); // Unused
2301 }
2302 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY)
2303 {
2304 return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1));
2305 }
2306 else if(addressingMode == ADDRESSING_CLAMP || addressingMode == ADDRESSING_BORDER)
2307 {
2308 Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f));
2309
2310 return Short4(Int4(clamp * Float4(1 << 16)));
2311 }
2312 else if(addressingMode == ADDRESSING_MIRROR)
2313 {
2314 Int4 convert = Int4(uw * Float4(1 << 16));
2315 Int4 mirror = (convert << 15) >> 31;
2316
2317 convert ^= mirror;
2318
2319 return Short4(convert);
2320 }
2321 else if(addressingMode == ADDRESSING_MIRRORONCE)
2322 {
2323 // Absolute value
2324 Int4 convert = Int4(Abs(uw * Float4(1 << 16)));
2325
2326 // Clamp
2327 convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
2328 convert = As<Int4>(PackSigned(convert, convert));
2329
2330 return As<Short4>(Int2(convert)) + Short4(0x8000u);
2331 }
2332 else // Wrap
2333 {
2334 return Short4(Int4(uw * Float4(1 << 16)));
2335 }
2336 }
2337
2338 void SamplerCore::address(Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
2339 {
2340 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
2341 {
2342 return; // Unused
2343 }
2344
2345 Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16));
2346 Int4 maxXYZ = dim - Int4(1);
2347
2348 if(function == Fetch)
2349 {
2350 xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
2351 }
2352 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers
2353 {
2354 xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
2355 }
2356 else
2357 {
2358 const int halfBits = 0x3EFFFFFF; // Value just under 0.5f
2359 const int oneBits = 0x3F7FFFFF; // Value just under 1.0f
2360 const int twoBits = 0x3FFFFFFF; // Value just under 2.0f
2361
2362 Float4 coord = uvw;
2363
2364 if(state.textureType == TEXTURE_RECTANGLE)
2365 {
2366 // According to https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_rectangle.txt
2367 // "CLAMP_TO_EDGE causes the s coordinate to be clamped to the range[0.5, wt - 0.5].
2368 // CLAMP_TO_EDGE causes the t coordinate to be clamped to the range[0.5, ht - 0.5]."
2369 // Unless SwiftShader implements support for ADDRESSING_BORDER, other modes should be equivalent
2370 // to CLAMP_TO_EDGE. Rectangle textures have no support for any MIRROR or REPEAT modes.
2371 coord = Min(Max(coord, Float4(0.5f)), Float4(dim) - Float4(0.5f));
2372 }
2373 else
2374 {
2375 switch(addressingMode)
2376 {
2377 case ADDRESSING_CLAMP:
2378 case ADDRESSING_BORDER:
2379 case ADDRESSING_SEAMLESS:
2380 {
2381 // While cube face coordinates are nominally already in the
2382 // [0, 1] range due to the projection, and numerical
2383 // imprecision is tolerated due to the border of pixels for
2384 // seamless filtering, this isn't true for inf and NaN
2385 // values. So we always clamp.
2386 Float4 one = As<Float4>(Int4(oneBits));
2387 coord = Min(Max(coord, Float4(0.0f)), one);
2388 }
2389 break;
2390 case ADDRESSING_MIRROR:
2391 {
2392 Float4 half = As<Float4>(Int4(halfBits));
2393 Float4 one = As<Float4>(Int4(oneBits));
2394 Float4 two = As<Float4>(Int4(twoBits));
2395 coord = one - Abs(two * Frac(coord * half) - one);
2396 }
2397 break;
2398 case ADDRESSING_MIRRORONCE:
2399 {
2400 Float4 half = As<Float4>(Int4(halfBits));
2401 Float4 one = As<Float4>(Int4(oneBits));
2402 Float4 two = As<Float4>(Int4(twoBits));
2403 coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
2404 }
2405 break;
2406 default: // Wrap
2407 coord = Frac(coord);
2408 break;
2409 }
2410
2411 coord = coord * Float4(dim);
2412 }
2413
2414 if(state.textureFilter == FILTER_POINT ||
2415 state.textureFilter == FILTER_GATHER)
2416 {
2417 xyz0 = Int4(coord);
2418 }
2419 else
2420 {
2421 if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
2422 state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
2423 {
2424 coord -= As<Float4>(As<Int4>(Float4(0.5f)) & filter);
2425 }
2426 else
2427 {
2428 coord -= Float4(0.5f);
2429 }
2430
2431 Float4 floor = Floor(coord);
2432 xyz0 = Int4(floor);
2433 f = coord - floor;
2434 }
2435
2436 if(function.option == Offset)
2437 {
2438 xyz0 += As<Int4>(texOffset);
2439 }
2440
2441 if(addressingMode == ADDRESSING_SEAMLESS)
2442 {
2443 xyz0 += Int4(1);
2444 }
2445
2446 xyz1 = xyz0 - filter; // Increment
2447
2448 if(function.option == Offset)
2449 {
2450 switch(addressingMode)
2451 {
2452 case ADDRESSING_SEAMLESS:
2453 ASSERT(false); // Cube sampling doesn't support offset.
2454 case ADDRESSING_MIRROR:
2455 case ADDRESSING_MIRRORONCE:
2456 case ADDRESSING_BORDER:
2457 // FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, and ADDRESSING_BORDER.
2458 // Fall through to Clamp.
2459 case ADDRESSING_CLAMP:
2460 xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
2461 xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
2462 break;
2463 default: // Wrap
2464 xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
2465 xyz1 = (xyz1 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
2466 break;
2467 }
2468 }
2469 else if(state.textureFilter != FILTER_POINT)
2470 {
2471 switch(addressingMode)
2472 {
2473 case ADDRESSING_SEAMLESS:
2474 break;
2475 case ADDRESSING_MIRROR:
2476 case ADDRESSING_MIRRORONCE:
2477 case ADDRESSING_BORDER:
2478 case ADDRESSING_CLAMP:
2479 xyz0 = Max(xyz0, Int4(0));
2480 xyz1 = Min(xyz1, maxXYZ);
2481 break;
2482 default: // Wrap
2483 {
2484 Int4 under = CmpLT(xyz0, Int4(0));
2485 xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // TODO: IfThenElse()
2486
2487 Int4 nover = CmpLT(xyz1, dim);
2488 xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz
2489 }
2490 break;
2491 }
2492 }
2493 }
2494 }
2495
2496 void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf)
2497 {
2498 cs = RoundShort4(cf * Float4(0x1000));
2499 }
2500
2501 void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf)
2502 {
2503 convertFixed12(cs.x, cf.x);
2504 convertFixed12(cs.y, cf.y);
2505 convertFixed12(cs.z, cf.z);
2506 convertFixed12(cs.w, cf.w);
2507 }
2508
2509 void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs)
2510 {
2511 cf = Float4(cs) * Float4(1.0f / 0x0FFE);
2512 }
2513
2514// void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs)
2515// {
2516// convertSigned12(cf.x, cs.x);
2517// convertSigned12(cf.y, cs.y);
2518// convertSigned12(cf.z, cs.z);
2519// convertSigned12(cf.w, cs.w);
2520// }
2521
2522 void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs)
2523 {
2524 cf = Float4(cs) * Float4(1.0f / 0x7FFF);
2525 }
2526
2527 void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs)
2528 {
2529 cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
2530 }
2531
2532 void SamplerCore::sRGBtoLinear16_8_16(Short4 &c)
2533 {
2534 c = As<UShort4>(c) >> 8;
2535
2536 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16));
2537
2538 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2539 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2540 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2541 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2542 }
2543
2544 void SamplerCore::sRGBtoLinear16_6_16(Short4 &c)
2545 {
2546 c = As<UShort4>(c) >> 10;
2547
2548 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_16));
2549
2550 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2551 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2552 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2553 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2554 }
2555
2556 void SamplerCore::sRGBtoLinear16_5_16(Short4 &c)
2557 {
2558 c = As<UShort4>(c) >> 11;
2559
2560 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_16));
2561
2562 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2563 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2564 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2565 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2566 }
2567
2568 bool SamplerCore::hasFloatTexture() const
2569 {
2570 return Surface::isFloatFormat(state.textureFormat);
2571 }
2572
2573 bool SamplerCore::hasUnnormalizedIntegerTexture() const
2574 {
2575 return Surface::isNonNormalizedInteger(state.textureFormat);
2576 }
2577
2578 bool SamplerCore::hasUnsignedTextureComponent(int component) const
2579 {
2580 return Surface::isUnsignedComponent(state.textureFormat, component);
2581 }
2582
2583 int SamplerCore::textureComponentCount() const
2584 {
2585 return Surface::componentCount(state.textureFormat);
2586 }
2587
2588 bool SamplerCore::hasThirdCoordinate() const
2589 {
2590 return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY);
2591 }
2592
2593 bool SamplerCore::has16bitTextureFormat() const
2594 {
2595 switch(state.textureFormat)
2596 {
2597 case FORMAT_R5G6B5:
2598 return true;
2599 case FORMAT_R8_SNORM:
2600 case FORMAT_G8R8_SNORM:
2601 case FORMAT_X8B8G8R8_SNORM:
2602 case FORMAT_A8B8G8R8_SNORM:
2603 case FORMAT_R8I:
2604 case FORMAT_R8UI:
2605 case FORMAT_G8R8I:
2606 case FORMAT_G8R8UI:
2607 case FORMAT_X8B8G8R8I:
2608 case FORMAT_X8B8G8R8UI:
2609 case FORMAT_A8B8G8R8I:
2610 case FORMAT_A8B8G8R8UI:
2611 case FORMAT_R32I:
2612 case FORMAT_R32UI:
2613 case FORMAT_G32R32I:
2614 case FORMAT_G32R32UI:
2615 case FORMAT_X32B32G32R32I:
2616 case FORMAT_X32B32G32R32UI:
2617 case FORMAT_A32B32G32R32I:
2618 case FORMAT_A32B32G32R32UI:
2619 case FORMAT_G8R8:
2620 case FORMAT_X8R8G8B8:
2621 case FORMAT_X8B8G8R8:
2622 case FORMAT_A8R8G8B8:
2623 case FORMAT_A8B8G8R8:
2624 case FORMAT_SRGB8_X8:
2625 case FORMAT_SRGB8_A8:
2626 case FORMAT_V8U8:
2627 case FORMAT_Q8W8V8U8:
2628 case FORMAT_X8L8V8U8:
2629 case FORMAT_R32F:
2630 case FORMAT_G32R32F:
2631 case FORMAT_X32B32G32R32F:
2632 case FORMAT_A32B32G32R32F:
2633 case FORMAT_X32B32G32R32F_UNSIGNED:
2634 case FORMAT_A8:
2635 case FORMAT_R8:
2636 case FORMAT_L8:
2637 case FORMAT_A8L8:
2638 case FORMAT_D32F_LOCKABLE:
2639 case FORMAT_D32FS8_TEXTURE:
2640 case FORMAT_D32F_SHADOW:
2641 case FORMAT_D32FS8_SHADOW:
2642 case FORMAT_L16:
2643 case FORMAT_G16R16:
2644 case FORMAT_A16B16G16R16:
2645 case FORMAT_V16U16:
2646 case FORMAT_A16W16V16U16:
2647 case FORMAT_Q16W16V16U16:
2648 case FORMAT_R16I:
2649 case FORMAT_R16UI:
2650 case FORMAT_G16R16I:
2651 case FORMAT_G16R16UI:
2652 case FORMAT_X16B16G16R16I:
2653 case FORMAT_X16B16G16R16UI:
2654 case FORMAT_A16B16G16R16I:
2655 case FORMAT_A16B16G16R16UI:
2656 case FORMAT_YV12_BT601:
2657 case FORMAT_YV12_BT709:
2658 case FORMAT_YV12_JFIF:
2659 return false;
2660 default:
2661 ASSERT(false);
2662 }
2663
2664 return false;
2665 }
2666
2667 bool SamplerCore::has8bitTextureComponents() const
2668 {
2669 switch(state.textureFormat)
2670 {
2671 case FORMAT_G8R8:
2672 case FORMAT_X8R8G8B8:
2673 case FORMAT_X8B8G8R8:
2674 case FORMAT_A8R8G8B8:
2675 case FORMAT_A8B8G8R8:
2676 case FORMAT_SRGB8_X8:
2677 case FORMAT_SRGB8_A8:
2678 case FORMAT_V8U8:
2679 case FORMAT_Q8W8V8U8:
2680 case FORMAT_X8L8V8U8:
2681 case FORMAT_A8:
2682 case FORMAT_R8:
2683 case FORMAT_L8:
2684 case FORMAT_A8L8:
2685 case FORMAT_R8_SNORM:
2686 case FORMAT_G8R8_SNORM:
2687 case FORMAT_X8B8G8R8_SNORM:
2688 case FORMAT_A8B8G8R8_SNORM:
2689 case FORMAT_R8I:
2690 case FORMAT_R8UI:
2691 case FORMAT_G8R8I:
2692 case FORMAT_G8R8UI:
2693 case FORMAT_X8B8G8R8I:
2694 case FORMAT_X8B8G8R8UI:
2695 case FORMAT_A8B8G8R8I:
2696 case FORMAT_A8B8G8R8UI:
2697 return true;
2698 case FORMAT_R5G6B5:
2699 case FORMAT_R32F:
2700 case FORMAT_G32R32F:
2701 case FORMAT_X32B32G32R32F:
2702 case FORMAT_A32B32G32R32F:
2703 case FORMAT_X32B32G32R32F_UNSIGNED:
2704 case FORMAT_D32F_LOCKABLE:
2705 case FORMAT_D32FS8_TEXTURE:
2706 case FORMAT_D32F_SHADOW:
2707 case FORMAT_D32FS8_SHADOW:
2708 case FORMAT_L16:
2709 case FORMAT_G16R16:
2710 case FORMAT_A16B16G16R16:
2711 case FORMAT_V16U16:
2712 case FORMAT_A16W16V16U16:
2713 case FORMAT_Q16W16V16U16:
2714 case FORMAT_R32I:
2715 case FORMAT_R32UI:
2716 case FORMAT_G32R32I:
2717 case FORMAT_G32R32UI:
2718 case FORMAT_X32B32G32R32I:
2719 case FORMAT_X32B32G32R32UI:
2720 case FORMAT_A32B32G32R32I:
2721 case FORMAT_A32B32G32R32UI:
2722 case FORMAT_R16I:
2723 case FORMAT_R16UI:
2724 case FORMAT_G16R16I:
2725 case FORMAT_G16R16UI:
2726 case FORMAT_X16B16G16R16I:
2727 case FORMAT_X16B16G16R16UI:
2728 case FORMAT_A16B16G16R16I:
2729 case FORMAT_A16B16G16R16UI:
2730 case FORMAT_YV12_BT601:
2731 case FORMAT_YV12_BT709:
2732 case FORMAT_YV12_JFIF:
2733 return false;
2734 default:
2735 ASSERT(false);
2736 }
2737
2738 return false;
2739 }
2740
2741 bool SamplerCore::has16bitTextureComponents() const
2742 {
2743 switch(state.textureFormat)
2744 {
2745 case FORMAT_R5G6B5:
2746 case FORMAT_R8_SNORM:
2747 case FORMAT_G8R8_SNORM:
2748 case FORMAT_X8B8G8R8_SNORM:
2749 case FORMAT_A8B8G8R8_SNORM:
2750 case FORMAT_R8I:
2751 case FORMAT_R8UI:
2752 case FORMAT_G8R8I:
2753 case FORMAT_G8R8UI:
2754 case FORMAT_X8B8G8R8I:
2755 case FORMAT_X8B8G8R8UI:
2756 case FORMAT_A8B8G8R8I:
2757 case FORMAT_A8B8G8R8UI:
2758 case FORMAT_R32I:
2759 case FORMAT_R32UI:
2760 case FORMAT_G32R32I:
2761 case FORMAT_G32R32UI:
2762 case FORMAT_X32B32G32R32I:
2763 case FORMAT_X32B32G32R32UI:
2764 case FORMAT_A32B32G32R32I:
2765 case FORMAT_A32B32G32R32UI:
2766 case FORMAT_G8R8:
2767 case FORMAT_X8R8G8B8:
2768 case FORMAT_X8B8G8R8:
2769 case FORMAT_A8R8G8B8:
2770 case FORMAT_A8B8G8R8:
2771 case FORMAT_SRGB8_X8:
2772 case FORMAT_SRGB8_A8:
2773 case FORMAT_V8U8:
2774 case FORMAT_Q8W8V8U8:
2775 case FORMAT_X8L8V8U8:
2776 case FORMAT_R32F:
2777 case FORMAT_G32R32F:
2778 case FORMAT_X32B32G32R32F:
2779 case FORMAT_A32B32G32R32F:
2780 case FORMAT_X32B32G32R32F_UNSIGNED:
2781 case FORMAT_A8:
2782 case FORMAT_R8:
2783 case FORMAT_L8:
2784 case FORMAT_A8L8:
2785 case FORMAT_D32F_LOCKABLE:
2786 case FORMAT_D32FS8_TEXTURE:
2787 case FORMAT_D32F_SHADOW:
2788 case FORMAT_D32FS8_SHADOW:
2789 case FORMAT_YV12_BT601:
2790 case FORMAT_YV12_BT709:
2791 case FORMAT_YV12_JFIF:
2792 return false;
2793 case FORMAT_L16:
2794 case FORMAT_G16R16:
2795 case FORMAT_A16B16G16R16:
2796 case FORMAT_R16I:
2797 case FORMAT_R16UI:
2798 case FORMAT_G16R16I:
2799 case FORMAT_G16R16UI:
2800 case FORMAT_X16B16G16R16I:
2801 case FORMAT_X16B16G16R16UI:
2802 case FORMAT_A16B16G16R16I:
2803 case FORMAT_A16B16G16R16UI:
2804 case FORMAT_V16U16:
2805 case FORMAT_A16W16V16U16:
2806 case FORMAT_Q16W16V16U16:
2807 return true;
2808 default:
2809 ASSERT(false);
2810 }
2811
2812 return false;
2813 }
2814
2815 bool SamplerCore::has32bitIntegerTextureComponents() const
2816 {
2817 switch(state.textureFormat)
2818 {
2819 case FORMAT_R5G6B5:
2820 case FORMAT_R8_SNORM:
2821 case FORMAT_G8R8_SNORM:
2822 case FORMAT_X8B8G8R8_SNORM:
2823 case FORMAT_A8B8G8R8_SNORM:
2824 case FORMAT_R8I:
2825 case FORMAT_R8UI:
2826 case FORMAT_G8R8I:
2827 case FORMAT_G8R8UI:
2828 case FORMAT_X8B8G8R8I:
2829 case FORMAT_X8B8G8R8UI:
2830 case FORMAT_A8B8G8R8I:
2831 case FORMAT_A8B8G8R8UI:
2832 case FORMAT_G8R8:
2833 case FORMAT_X8R8G8B8:
2834 case FORMAT_X8B8G8R8:
2835 case FORMAT_A8R8G8B8:
2836 case FORMAT_A8B8G8R8:
2837 case FORMAT_SRGB8_X8:
2838 case FORMAT_SRGB8_A8:
2839 case FORMAT_V8U8:
2840 case FORMAT_Q8W8V8U8:
2841 case FORMAT_X8L8V8U8:
2842 case FORMAT_L16:
2843 case FORMAT_G16R16:
2844 case FORMAT_A16B16G16R16:
2845 case FORMAT_R16I:
2846 case FORMAT_R16UI:
2847 case FORMAT_G16R16I:
2848 case FORMAT_G16R16UI:
2849 case FORMAT_X16B16G16R16I:
2850 case FORMAT_X16B16G16R16UI:
2851 case FORMAT_A16B16G16R16I:
2852 case FORMAT_A16B16G16R16UI:
2853 case FORMAT_V16U16:
2854 case FORMAT_A16W16V16U16:
2855 case FORMAT_Q16W16V16U16:
2856 case FORMAT_R32F:
2857 case FORMAT_G32R32F:
2858 case FORMAT_X32B32G32R32F:
2859 case FORMAT_A32B32G32R32F:
2860 case FORMAT_X32B32G32R32F_UNSIGNED:
2861 case FORMAT_A8:
2862 case FORMAT_R8:
2863 case FORMAT_L8:
2864 case FORMAT_A8L8:
2865 case FORMAT_D32F_LOCKABLE:
2866 case FORMAT_D32FS8_TEXTURE:
2867 case FORMAT_D32F_SHADOW:
2868 case FORMAT_D32FS8_SHADOW:
2869 case FORMAT_YV12_BT601:
2870 case FORMAT_YV12_BT709:
2871 case FORMAT_YV12_JFIF:
2872 return false;
2873 case FORMAT_R32I:
2874 case FORMAT_R32UI:
2875 case FORMAT_G32R32I:
2876 case FORMAT_G32R32UI:
2877 case FORMAT_X32B32G32R32I:
2878 case FORMAT_X32B32G32R32UI:
2879 case FORMAT_A32B32G32R32I:
2880 case FORMAT_A32B32G32R32UI:
2881 return true;
2882 default:
2883 ASSERT(false);
2884 }
2885
2886 return false;
2887 }
2888
2889 bool SamplerCore::hasYuvFormat() const
2890 {
2891 switch(state.textureFormat)
2892 {
2893 case FORMAT_YV12_BT601:
2894 case FORMAT_YV12_BT709:
2895 case FORMAT_YV12_JFIF:
2896 return true;
2897 case FORMAT_R5G6B5:
2898 case FORMAT_R8_SNORM:
2899 case FORMAT_G8R8_SNORM:
2900 case FORMAT_X8B8G8R8_SNORM:
2901 case FORMAT_A8B8G8R8_SNORM:
2902 case FORMAT_R8I:
2903 case FORMAT_R8UI:
2904 case FORMAT_G8R8I:
2905 case FORMAT_G8R8UI:
2906 case FORMAT_X8B8G8R8I:
2907 case FORMAT_X8B8G8R8UI:
2908 case FORMAT_A8B8G8R8I:
2909 case FORMAT_A8B8G8R8UI:
2910 case FORMAT_R32I:
2911 case FORMAT_R32UI:
2912 case FORMAT_G32R32I:
2913 case FORMAT_G32R32UI:
2914 case FORMAT_X32B32G32R32I:
2915 case FORMAT_X32B32G32R32UI:
2916 case FORMAT_A32B32G32R32I:
2917 case FORMAT_A32B32G32R32UI:
2918 case FORMAT_G8R8:
2919 case FORMAT_X8R8G8B8:
2920 case FORMAT_X8B8G8R8:
2921 case FORMAT_A8R8G8B8:
2922 case FORMAT_A8B8G8R8:
2923 case FORMAT_SRGB8_X8:
2924 case FORMAT_SRGB8_A8:
2925 case FORMAT_V8U8:
2926 case FORMAT_Q8W8V8U8:
2927 case FORMAT_X8L8V8U8:
2928 case FORMAT_R32F:
2929 case FORMAT_G32R32F:
2930 case FORMAT_X32B32G32R32F:
2931 case FORMAT_A32B32G32R32F:
2932 case FORMAT_X32B32G32R32F_UNSIGNED:
2933 case FORMAT_A8:
2934 case FORMAT_R8:
2935 case FORMAT_L8:
2936 case FORMAT_A8L8:
2937 case FORMAT_D32F_LOCKABLE:
2938 case FORMAT_D32FS8_TEXTURE:
2939 case FORMAT_D32F_SHADOW:
2940 case FORMAT_D32FS8_SHADOW:
2941 case FORMAT_L16:
2942 case FORMAT_G16R16:
2943 case FORMAT_A16B16G16R16:
2944 case FORMAT_R16I:
2945 case FORMAT_R16UI:
2946 case FORMAT_G16R16I:
2947 case FORMAT_G16R16UI:
2948 case FORMAT_X16B16G16R16I:
2949 case FORMAT_X16B16G16R16UI:
2950 case FORMAT_A16B16G16R16I:
2951 case FORMAT_A16B16G16R16UI:
2952 case FORMAT_V16U16:
2953 case FORMAT_A16W16V16U16:
2954 case FORMAT_Q16W16V16U16:
2955 return false;
2956 default:
2957 ASSERT(false);
2958 }
2959
2960 return false;
2961 }
2962
2963 bool SamplerCore::isRGBComponent(int component) const
2964 {
2965 switch(state.textureFormat)
2966 {
2967 case FORMAT_R5G6B5: return component < 3;
2968 case FORMAT_R8_SNORM: return component < 1;
2969 case FORMAT_G8R8_SNORM: return component < 2;
2970 case FORMAT_X8B8G8R8_SNORM: return component < 3;
2971 case FORMAT_A8B8G8R8_SNORM: return component < 3;
2972 case FORMAT_R8I: return component < 1;
2973 case FORMAT_R8UI: return component < 1;
2974 case FORMAT_G8R8I: return component < 2;
2975 case FORMAT_G8R8UI: return component < 2;
2976 case FORMAT_X8B8G8R8I: return component < 3;
2977 case FORMAT_X8B8G8R8UI: return component < 3;
2978 case FORMAT_A8B8G8R8I: return component < 3;
2979 case FORMAT_A8B8G8R8UI: return component < 3;
2980 case FORMAT_R32I: return component < 1;
2981 case FORMAT_R32UI: return component < 1;
2982 case FORMAT_G32R32I: return component < 2;
2983 case FORMAT_G32R32UI: return component < 2;
2984 case FORMAT_X32B32G32R32I: return component < 3;
2985 case FORMAT_X32B32G32R32UI: return component < 3;
2986 case FORMAT_A32B32G32R32I: return component < 3;
2987 case FORMAT_A32B32G32R32UI: return component < 3;
2988 case FORMAT_G8R8: return component < 2;
2989 case FORMAT_X8R8G8B8: return component < 3;
2990 case FORMAT_X8B8G8R8: return component < 3;
2991 case FORMAT_A8R8G8B8: return component < 3;
2992 case FORMAT_A8B8G8R8: return component < 3;
2993 case FORMAT_SRGB8_X8: return component < 3;
2994 case FORMAT_SRGB8_A8: return component < 3;
2995 case FORMAT_V8U8: return false;
2996 case FORMAT_Q8W8V8U8: return false;
2997 case FORMAT_X8L8V8U8: return false;
2998 case FORMAT_R32F: return component < 1;
2999 case FORMAT_G32R32F: return component < 2;
3000 case FORMAT_X32B32G32R32F: return component < 3;
3001 case FORMAT_A32B32G32R32F: return component < 3;
3002 case FORMAT_X32B32G32R32F_UNSIGNED: return component < 3;
3003 case FORMAT_A8: return false;
3004 case FORMAT_R8: return component < 1;
3005 case FORMAT_L8: return component < 1;
3006 case FORMAT_A8L8: return component < 1;
3007 case FORMAT_D32F_LOCKABLE: return false;
3008 case FORMAT_D32FS8_TEXTURE: return false;
3009 case FORMAT_D32F_SHADOW: return false;
3010 case FORMAT_D32FS8_SHADOW: return false;
3011 case FORMAT_L16: return component < 1;
3012 case FORMAT_G16R16: return component < 2;
3013 case FORMAT_A16B16G16R16: return component < 3;
3014 case FORMAT_R16I: return component < 1;
3015 case FORMAT_R16UI: return component < 1;
3016 case FORMAT_G16R16I: return component < 2;
3017 case FORMAT_G16R16UI: return component < 2;
3018 case FORMAT_X16B16G16R16I: return component < 3;
3019 case FORMAT_X16B16G16R16UI: return component < 3;
3020 case FORMAT_A16B16G16R16I: return component < 3;
3021 case FORMAT_A16B16G16R16UI: return component < 3;
3022 case FORMAT_V16U16: return false;
3023 case FORMAT_A16W16V16U16: return false;
3024 case FORMAT_Q16W16V16U16: return false;
3025 case FORMAT_YV12_BT601: return component < 3;
3026 case FORMAT_YV12_BT709: return component < 3;
3027 case FORMAT_YV12_JFIF: return component < 3;
3028 default:
3029 ASSERT(false);
3030 }
3031
3032 return false;
3033 }
3034}
3035