| 1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. | 
| 2 | // | 
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
| 4 | // you may not use this file except in compliance with the License. | 
| 5 | // You may obtain a copy of the License at | 
| 6 | // | 
| 7 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
| 8 | // | 
| 9 | // Unless required by applicable law or agreed to in writing, software | 
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
| 12 | // See the License for the specific language governing permissions and | 
| 13 | // limitations under the License. | 
| 14 |  | 
| 15 | #include "VertexPipeline.hpp" | 
| 16 |  | 
| 17 | #include "Renderer/Vertex.hpp" | 
| 18 | #include "Renderer/Renderer.hpp" | 
| 19 | #include "Common/Debug.hpp" | 
| 20 |  | 
| 21 | #include <string.h> | 
| 22 | #include <stdlib.h> | 
| 23 | #include <stdio.h> | 
| 24 |  | 
| 25 | #undef max | 
| 26 | #undef min | 
| 27 |  | 
| 28 | namespace sw | 
| 29 | { | 
| 30 | 	extern bool secondaryColor; | 
| 31 |  | 
| 32 | 	VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0) | 
| 33 | 	{ | 
| 34 | 	} | 
| 35 |  | 
| 36 | 	VertexPipeline::~VertexPipeline() | 
| 37 | 	{ | 
| 38 | 	} | 
| 39 |  | 
| 40 | 	Vector4f VertexPipeline::transformBlend(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) | 
| 41 | 	{ | 
| 42 | 		Vector4f dst; | 
| 43 |  | 
| 44 | 		if(state.vertexBlendMatrixCount == 0) | 
| 45 | 		{ | 
| 46 | 			dst = transform(src, matrix, homogeneous); | 
| 47 | 		} | 
| 48 | 		else | 
| 49 | 		{ | 
| 50 | 			UInt index0[4]; | 
| 51 | 			UInt index1[4]; | 
| 52 | 			UInt index2[4]; | 
| 53 | 			UInt index3[4]; | 
| 54 |  | 
| 55 | 			if(state.indexedVertexBlendEnable) | 
| 56 | 			{ | 
| 57 | 				for(int i = 0; i < 4; i++) | 
| 58 | 				{ | 
| 59 | 					Float4 B = v[BlendIndices].x; | 
| 60 | 					UInt indices; | 
| 61 |  | 
| 62 | 					switch(i) | 
| 63 | 					{ | 
| 64 | 					case 0: indices = As<UInt>(Float(B.x)); break; | 
| 65 | 					case 1: indices = As<UInt>(Float(B.y)); break; | 
| 66 | 					case 2: indices = As<UInt>(Float(B.z)); break; | 
| 67 | 					case 3: indices = As<UInt>(Float(B.w)); break; | 
| 68 | 					} | 
| 69 |  | 
| 70 | 					index0[i] = (indices & 0x000000FF) << 6; | 
| 71 | 					index1[i] = (indices & 0x0000FF00) >> 2; | 
| 72 | 					index2[i] = (indices & 0x00FF0000) >> 10; | 
| 73 | 					index3[i] = (indices & 0xFF000000) >> 18; | 
| 74 | 				} | 
| 75 | 			} | 
| 76 | 			else | 
| 77 | 			{ | 
| 78 | 				for(int i = 0; i < 4; i++) | 
| 79 | 				{ | 
| 80 | 					index0[i] = 0 * 64; | 
| 81 | 					index1[i] = 1 * 64; | 
| 82 | 					index2[i] = 2 * 64; | 
| 83 | 					index3[i] = 3 * 64; | 
| 84 | 				} | 
| 85 | 			} | 
| 86 |  | 
| 87 | 			Float4 weight0; | 
| 88 | 			Float4 weight1; | 
| 89 | 			Float4 weight2; | 
| 90 | 			Float4 weight3; | 
| 91 |  | 
| 92 | 			switch(state.vertexBlendMatrixCount) | 
| 93 | 			{ | 
| 94 | 			case 4: weight2 = v[BlendWeight].z; | 
| 95 | 			case 3: weight1 = v[BlendWeight].y; | 
| 96 | 			case 2: weight0 = v[BlendWeight].x; | 
| 97 | 			case 1: | 
| 98 | 				break; | 
| 99 | 			} | 
| 100 |  | 
| 101 | 			if(state.vertexBlendMatrixCount == 1) | 
| 102 | 			{ | 
| 103 | 				dst = transform(src, matrix, index0, homogeneous); | 
| 104 | 			} | 
| 105 | 			else if(state.vertexBlendMatrixCount == 2) | 
| 106 | 			{ | 
| 107 | 				weight1 = Float4(1.0f) - weight0; | 
| 108 |  | 
| 109 | 				Vector4f pos0; | 
| 110 | 				Vector4f pos1; | 
| 111 |  | 
| 112 | 				pos0 = transform(src, matrix, index0, homogeneous); | 
| 113 | 				pos1 = transform(src, matrix, index1, homogeneous); | 
| 114 |  | 
| 115 | 				dst.x = pos0.x * weight0 + pos1.x * weight1;   // FIXME: Vector4f operators | 
| 116 | 				dst.y = pos0.y * weight0 + pos1.y * weight1; | 
| 117 | 				dst.z = pos0.z * weight0 + pos1.z * weight1; | 
| 118 | 				dst.w = pos0.w * weight0 + pos1.w * weight1; | 
| 119 | 			} | 
| 120 | 			else if(state.vertexBlendMatrixCount == 3) | 
| 121 | 			{ | 
| 122 | 				weight2 = Float4(1.0f) - (weight0 + weight1); | 
| 123 |  | 
| 124 | 				Vector4f pos0; | 
| 125 | 				Vector4f pos1; | 
| 126 | 				Vector4f pos2; | 
| 127 |  | 
| 128 | 				pos0 = transform(src, matrix, index0, homogeneous); | 
| 129 | 				pos1 = transform(src, matrix, index1, homogeneous); | 
| 130 | 				pos2 = transform(src, matrix, index2, homogeneous); | 
| 131 |  | 
| 132 | 				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2; | 
| 133 | 				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2; | 
| 134 | 				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2; | 
| 135 | 				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2; | 
| 136 | 			} | 
| 137 | 			else if(state.vertexBlendMatrixCount == 4) | 
| 138 | 			{ | 
| 139 | 				weight3 = Float4(1.0f) - (weight0 + weight1 + weight2); | 
| 140 |  | 
| 141 | 				Vector4f pos0; | 
| 142 | 				Vector4f pos1; | 
| 143 | 				Vector4f pos2; | 
| 144 | 				Vector4f pos3; | 
| 145 |  | 
| 146 | 				pos0 = transform(src, matrix, index0, homogeneous); | 
| 147 | 				pos1 = transform(src, matrix, index1, homogeneous); | 
| 148 | 				pos2 = transform(src, matrix, index2, homogeneous); | 
| 149 | 				pos3 = transform(src, matrix, index3, homogeneous); | 
| 150 |  | 
| 151 | 				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3; | 
| 152 | 				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3; | 
| 153 | 				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3; | 
| 154 | 				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3; | 
| 155 | 			} | 
| 156 | 		} | 
| 157 |  | 
| 158 | 		return dst; | 
| 159 | 	} | 
| 160 |  | 
| 161 | 	void VertexPipeline::pipeline(UInt &index) | 
| 162 | 	{ | 
| 163 | 		Vector4f position; | 
| 164 | 		Vector4f normal; | 
| 165 |  | 
| 166 | 		if(!state.preTransformed) | 
| 167 | 		{ | 
| 168 | 			position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true); | 
| 169 | 		} | 
| 170 | 		else | 
| 171 | 		{ | 
| 172 | 			position = v[PositionT]; | 
| 173 | 		} | 
| 174 |  | 
| 175 | 		o[Pos].x = position.x; | 
| 176 | 		o[Pos].y = position.y; | 
| 177 | 		o[Pos].z = position.z; | 
| 178 | 		o[Pos].w = position.w; | 
| 179 |  | 
| 180 | 		Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); | 
| 181 |  | 
| 182 | 		if(state.vertexNormalActive) | 
| 183 | 		{ | 
| 184 | 			normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false); | 
| 185 |  | 
| 186 | 			if(state.normalizeNormals) | 
| 187 | 			{ | 
| 188 | 				normal = normalize(normal); | 
| 189 | 			} | 
| 190 | 		} | 
| 191 |  | 
| 192 | 		if(!state.vertexLightingActive) | 
| 193 | 		{ | 
| 194 | 			// FIXME: Don't process if not used at all | 
| 195 | 			if(state.diffuseActive && state.input[Color0]) | 
| 196 | 			{ | 
| 197 | 				Vector4f diffuse = v[Color0]; | 
| 198 |  | 
| 199 | 				o[C0].x = diffuse.x; | 
| 200 | 				o[C0].y = diffuse.y; | 
| 201 | 				o[C0].z = diffuse.z; | 
| 202 | 				o[C0].w = diffuse.w; | 
| 203 | 			} | 
| 204 | 			else | 
| 205 | 			{ | 
| 206 | 				o[C0].x = Float4(1.0f); | 
| 207 | 				o[C0].y = Float4(1.0f); | 
| 208 | 				o[C0].z = Float4(1.0f); | 
| 209 | 				o[C0].w = Float4(1.0f); | 
| 210 | 			} | 
| 211 |  | 
| 212 | 			// FIXME: Don't process if not used at all | 
| 213 | 			if(state.specularActive && state.input[Color1]) | 
| 214 | 			{ | 
| 215 | 				Vector4f specular = v[Color1]; | 
| 216 |  | 
| 217 | 				o[C1].x = specular.x; | 
| 218 | 				o[C1].y = specular.y; | 
| 219 | 				o[C1].z = specular.z; | 
| 220 | 				o[C1].w = specular.w; | 
| 221 | 			} | 
| 222 | 			else | 
| 223 | 			{ | 
| 224 | 				o[C1].x = Float4(0.0f); | 
| 225 | 				o[C1].y = Float4(0.0f); | 
| 226 | 				o[C1].z = Float4(0.0f); | 
| 227 | 				o[C1].w = Float4(1.0f); | 
| 228 | 			} | 
| 229 | 		} | 
| 230 | 		else | 
| 231 | 		{ | 
| 232 | 			o[C0].x = Float4(0.0f); | 
| 233 | 			o[C0].y = Float4(0.0f); | 
| 234 | 			o[C0].z = Float4(0.0f); | 
| 235 | 			o[C0].w = Float4(0.0f); | 
| 236 |  | 
| 237 | 			o[C1].x = Float4(0.0f); | 
| 238 | 			o[C1].y = Float4(0.0f); | 
| 239 | 			o[C1].z = Float4(0.0f); | 
| 240 | 			o[C1].w = Float4(0.0f); | 
| 241 |  | 
| 242 | 			Vector4f ambient; | 
| 243 | 			Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient));   // FIXME: Unpack | 
| 244 |  | 
| 245 | 			ambient.x = globalAmbient.x; | 
| 246 | 			ambient.y = globalAmbient.y; | 
| 247 | 			ambient.z = globalAmbient.z; | 
| 248 |  | 
| 249 | 			for(int i = 0; i < 8; i++) | 
| 250 | 			{ | 
| 251 | 				if(!(state.vertexLightActive & (1 << i))) | 
| 252 | 				{ | 
| 253 | 					continue; | 
| 254 | 				} | 
| 255 |  | 
| 256 | 				Vector4f L;    // Light vector | 
| 257 | 				Float4 att;   // Attenuation | 
| 258 |  | 
| 259 | 				// Attenuation | 
| 260 | 				{ | 
| 261 | 					Float4 d;   // Distance | 
| 262 |  | 
| 263 | 					L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i]));   // FIXME: Unpack | 
| 264 | 					L.x = L.x.xxxx; | 
| 265 | 					L.y = L.y.yyyy; | 
| 266 | 					L.z = L.z.zzzz; | 
| 267 |  | 
| 268 | 					L.x -= vertexPosition.x; | 
| 269 | 					L.y -= vertexPosition.y; | 
| 270 | 					L.z -= vertexPosition.z; | 
| 271 | 					d = dot3(L, L); | 
| 272 | 					d = RcpSqrt_pp(d);     // FIXME: Sufficient precision? | 
| 273 | 					L.x *= d; | 
| 274 | 					L.y *= d; | 
| 275 | 					L.z *= d; | 
| 276 | 					d = Rcp_pp(d);       // FIXME: Sufficient precision? | 
| 277 |  | 
| 278 | 					Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i])); | 
| 279 | 					Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i])); | 
| 280 | 					Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i])); | 
| 281 |  | 
| 282 | 					att = Rcp_pp((q * d + l) * d + c); | 
| 283 | 				} | 
| 284 |  | 
| 285 | 				// Ambient per light | 
| 286 | 				{ | 
| 287 | 					Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i]));   // FIXME: Unpack | 
| 288 |  | 
| 289 | 					ambient.x = ambient.x + lightAmbient.x * att; | 
| 290 | 					ambient.y = ambient.y + lightAmbient.y * att; | 
| 291 | 					ambient.z = ambient.z + lightAmbient.z * att; | 
| 292 | 				} | 
| 293 |  | 
| 294 | 				// Diffuse | 
| 295 | 				if(state.vertexNormalActive) | 
| 296 | 				{ | 
| 297 | 					Float4 dot; | 
| 298 |  | 
| 299 | 					dot = dot3(L, normal); | 
| 300 | 					dot = Max(dot, Float4(0.0f)); | 
| 301 | 					dot *= att; | 
| 302 |  | 
| 303 | 					Vector4f diff; | 
| 304 |  | 
| 305 | 					if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) | 
| 306 | 					{ | 
| 307 | 						diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse));   // FIXME: Unpack | 
| 308 | 						diff.x = diff.x.xxxx; | 
| 309 | 						diff.y = diff.y.yyyy; | 
| 310 | 						diff.z = diff.z.zzzz; | 
| 311 | 					} | 
| 312 | 					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) | 
| 313 | 					{ | 
| 314 | 						diff = v[Color0]; | 
| 315 | 					} | 
| 316 | 					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) | 
| 317 | 					{ | 
| 318 | 						diff = v[Color1]; | 
| 319 | 					} | 
| 320 | 					else ASSERT(false); | 
| 321 |  | 
| 322 | 					Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i])); | 
| 323 |  | 
| 324 | 					o[C0].x = o[C0].x + diff.x * dot * lightDiffuse.x;   // FIXME: Clamp first? | 
| 325 | 					o[C0].y = o[C0].y + diff.y * dot * lightDiffuse.y;   // FIXME: Clamp first? | 
| 326 | 					o[C0].z = o[C0].z + diff.z * dot * lightDiffuse.z;   // FIXME: Clamp first? | 
| 327 | 				} | 
| 328 |  | 
| 329 | 				// Specular | 
| 330 | 				if(state.vertexSpecularActive) | 
| 331 | 				{ | 
| 332 | 					Vector4f S; | 
| 333 | 					Vector4f C;   // Camera vector | 
| 334 | 					Float4 pow; | 
| 335 |  | 
| 336 | 					pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess)); | 
| 337 |  | 
| 338 | 					S.x = Float4(0.0f) - vertexPosition.x; | 
| 339 | 					S.y = Float4(0.0f) - vertexPosition.y; | 
| 340 | 					S.z = Float4(0.0f) - vertexPosition.z; | 
| 341 | 					C = normalize(S); | 
| 342 |  | 
| 343 | 					S.x = L.x + C.x; | 
| 344 | 					S.y = L.y + C.y; | 
| 345 | 					S.z = L.z + C.z; | 
| 346 | 					C = normalize(S); | 
| 347 |  | 
| 348 | 					Float4 dot = Max(dot3(C, normal), Float4(0.0f));   // FIXME: max(dot3(C, normal), 0) | 
| 349 |  | 
| 350 | 					Float4 P = power(dot, pow); | 
| 351 | 					P *= att; | 
| 352 |  | 
| 353 | 					Vector4f spec; | 
| 354 |  | 
| 355 | 					if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) | 
| 356 | 					{ | 
| 357 | 						Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular));   // FIXME: Unpack | 
| 358 |  | 
| 359 | 						spec.x = materialSpecular.x; | 
| 360 | 						spec.y = materialSpecular.y; | 
| 361 | 						spec.z = materialSpecular.z; | 
| 362 | 					} | 
| 363 | 					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) | 
| 364 | 					{ | 
| 365 | 						spec = v[Color0]; | 
| 366 | 					} | 
| 367 | 					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) | 
| 368 | 					{ | 
| 369 | 						spec = v[Color1]; | 
| 370 | 					} | 
| 371 | 					else ASSERT(false); | 
| 372 |  | 
| 373 | 					Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i])); | 
| 374 |  | 
| 375 | 					spec.x *= lightSpecular.x; | 
| 376 | 					spec.y *= lightSpecular.y; | 
| 377 | 					spec.z *= lightSpecular.z; | 
| 378 |  | 
| 379 | 					spec.x *= P; | 
| 380 | 					spec.y *= P; | 
| 381 | 					spec.z *= P; | 
| 382 |  | 
| 383 | 					spec.x = Max(spec.x, Float4(0.0f)); | 
| 384 | 					spec.y = Max(spec.y, Float4(0.0f)); | 
| 385 | 					spec.z = Max(spec.z, Float4(0.0f)); | 
| 386 |  | 
| 387 | 					if(secondaryColor) | 
| 388 | 					{ | 
| 389 | 						o[C1].x = o[C1].x + spec.x; | 
| 390 | 						o[C1].y = o[C1].y + spec.y; | 
| 391 | 						o[C1].z = o[C1].z + spec.z; | 
| 392 | 					} | 
| 393 | 					else | 
| 394 | 					{ | 
| 395 | 						o[C0].x = o[C0].x + spec.x; | 
| 396 | 						o[C0].y = o[C0].y + spec.y; | 
| 397 | 						o[C0].z = o[C0].z + spec.z; | 
| 398 | 					} | 
| 399 | 				} | 
| 400 | 			} | 
| 401 |  | 
| 402 | 			if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL) | 
| 403 | 			{ | 
| 404 | 				Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient));   // FIXME: Unpack | 
| 405 |  | 
| 406 | 				ambient.x = ambient.x * materialAmbient.x; | 
| 407 | 				ambient.y = ambient.y * materialAmbient.y; | 
| 408 | 				ambient.z = ambient.z * materialAmbient.z; | 
| 409 | 			} | 
| 410 | 			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1) | 
| 411 | 			{ | 
| 412 | 				Vector4f materialDiffuse = v[Color0]; | 
| 413 |  | 
| 414 | 				ambient.x = ambient.x * materialDiffuse.x; | 
| 415 | 				ambient.y = ambient.y * materialDiffuse.y; | 
| 416 | 				ambient.z = ambient.z * materialDiffuse.z; | 
| 417 | 			} | 
| 418 | 			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2) | 
| 419 | 			{ | 
| 420 | 				Vector4f materialSpecular = v[Color1]; | 
| 421 |  | 
| 422 | 				ambient.x = ambient.x * materialSpecular.x; | 
| 423 | 				ambient.y = ambient.y * materialSpecular.y; | 
| 424 | 				ambient.z = ambient.z * materialSpecular.z; | 
| 425 | 			} | 
| 426 | 			else ASSERT(false); | 
| 427 |  | 
| 428 | 			o[C0].x = o[C0].x + ambient.x; | 
| 429 | 			o[C0].y = o[C0].y + ambient.y; | 
| 430 | 			o[C0].z = o[C0].z + ambient.z; | 
| 431 |  | 
| 432 | 			// Emissive | 
| 433 | 			if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL) | 
| 434 | 			{ | 
| 435 | 				Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission));   // FIXME: Unpack | 
| 436 |  | 
| 437 | 				o[C0].x = o[C0].x + materialEmission.x; | 
| 438 | 				o[C0].y = o[C0].y + materialEmission.y; | 
| 439 | 				o[C0].z = o[C0].z + materialEmission.z; | 
| 440 | 			} | 
| 441 | 			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1) | 
| 442 | 			{ | 
| 443 | 				Vector4f materialSpecular = v[Color0]; | 
| 444 |  | 
| 445 | 				o[C0].x = o[C0].x + materialSpecular.x; | 
| 446 | 				o[C0].y = o[C0].y + materialSpecular.y; | 
| 447 | 				o[C0].z = o[C0].z + materialSpecular.z; | 
| 448 | 			} | 
| 449 | 			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2) | 
| 450 | 			{ | 
| 451 | 				Vector4f materialSpecular = v[Color1]; | 
| 452 |  | 
| 453 | 				o[C0].x = o[C0].x + materialSpecular.x; | 
| 454 | 				o[C0].y = o[C0].y + materialSpecular.y; | 
| 455 | 				o[C0].z = o[C0].z + materialSpecular.z; | 
| 456 | 			} | 
| 457 | 			else ASSERT(false); | 
| 458 |  | 
| 459 | 			// Diffuse alpha component | 
| 460 | 			if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) | 
| 461 | 			{ | 
| 462 | 				o[C0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww;   // FIXME: Unpack | 
| 463 | 			} | 
| 464 | 			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) | 
| 465 | 			{ | 
| 466 | 				Vector4f alpha = v[Color0]; | 
| 467 | 				o[C0].w = alpha.w; | 
| 468 | 			} | 
| 469 | 			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) | 
| 470 | 			{ | 
| 471 | 				Vector4f alpha = v[Color1]; | 
| 472 | 				o[C0].w = alpha.w; | 
| 473 | 			} | 
| 474 | 			else ASSERT(false); | 
| 475 |  | 
| 476 | 			if(state.vertexSpecularActive) | 
| 477 | 			{ | 
| 478 | 				// Specular alpha component | 
| 479 | 				if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) | 
| 480 | 				{ | 
| 481 | 					o[C1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww;   // FIXME: Unpack | 
| 482 | 				} | 
| 483 | 				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) | 
| 484 | 				{ | 
| 485 | 					Vector4f alpha = v[Color0]; | 
| 486 | 					o[C1].w = alpha.w; | 
| 487 | 				} | 
| 488 | 				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) | 
| 489 | 				{ | 
| 490 | 					Vector4f alpha = v[Color1]; | 
| 491 | 					o[C1].w = alpha.w; | 
| 492 | 				} | 
| 493 | 				else ASSERT(false); | 
| 494 | 			} | 
| 495 | 		} | 
| 496 |  | 
| 497 | 		if(state.fogActive) | 
| 498 | 		{ | 
| 499 | 			Float4 f; | 
| 500 |  | 
| 501 | 			if(!state.rangeFogActive) | 
| 502 | 			{ | 
| 503 | 				f = Abs(vertexPosition.z); | 
| 504 | 			} | 
| 505 | 			else | 
| 506 | 			{ | 
| 507 | 				f = Sqrt(dot3(vertexPosition, vertexPosition));   // FIXME: f = length(vertexPosition); | 
| 508 | 			} | 
| 509 |  | 
| 510 | 			switch(state.vertexFogMode) | 
| 511 | 			{ | 
| 512 | 			case FOG_NONE: | 
| 513 | 				if(state.specularActive) | 
| 514 | 				{ | 
| 515 | 					o[Fog].x = o[C1].w; | 
| 516 | 				} | 
| 517 | 				else | 
| 518 | 				{ | 
| 519 | 					o[Fog].x = Float4(0.0f); | 
| 520 | 				} | 
| 521 | 				break; | 
| 522 | 			case FOG_LINEAR: | 
| 523 | 				o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); | 
| 524 | 				break; | 
| 525 | 			case FOG_EXP: | 
| 526 | 				o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true); | 
| 527 | 				break; | 
| 528 | 			case FOG_EXP2: | 
| 529 | 				o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true); | 
| 530 | 				break; | 
| 531 | 			default: | 
| 532 | 				ASSERT(false); | 
| 533 | 			} | 
| 534 | 		} | 
| 535 |  | 
| 536 | 		for(int stage = 0; stage < 8; stage++) | 
| 537 | 		{ | 
| 538 | 			processTextureCoordinate(stage, normal, position); | 
| 539 | 		} | 
| 540 |  | 
| 541 | 		processPointSize(); | 
| 542 | 	} | 
| 543 |  | 
| 544 | 	void VertexPipeline::processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position) | 
| 545 | 	{ | 
| 546 | 		if(state.output[T0 + stage].write) | 
| 547 | 		{ | 
| 548 | 			int i = state.textureState[stage].texCoordIndexActive; | 
| 549 |  | 
| 550 | 			switch(state.textureState[stage].texGenActive) | 
| 551 | 			{ | 
| 552 | 			case TEXGEN_NONE: | 
| 553 | 				{ | 
| 554 | 					Vector4f &&varying = v[TexCoord0 + i]; | 
| 555 |  | 
| 556 | 					o[T0 + stage].x = varying.x; | 
| 557 | 					o[T0 + stage].y = varying.y; | 
| 558 | 					o[T0 + stage].z = varying.z; | 
| 559 | 					o[T0 + stage].w = varying.w; | 
| 560 | 				} | 
| 561 | 				break; | 
| 562 | 			case TEXGEN_PASSTHRU: | 
| 563 | 				{ | 
| 564 | 					Vector4f &&varying = v[TexCoord0 + i]; | 
| 565 |  | 
| 566 | 					o[T0 + stage].x = varying.x; | 
| 567 | 					o[T0 + stage].y = varying.y; | 
| 568 | 					o[T0 + stage].z = varying.z; | 
| 569 | 					o[T0 + stage].w = varying.w; | 
| 570 |  | 
| 571 | 					if(state.input[TexCoord0 + i]) | 
| 572 | 					{ | 
| 573 | 						switch(state.input[TexCoord0 + i].count) | 
| 574 | 						{ | 
| 575 | 						case 1: | 
| 576 | 							o[T0 + stage].y = Float4(1.0f); | 
| 577 | 							o[T0 + stage].z = Float4(0.0f); | 
| 578 | 							o[T0 + stage].w = Float4(0.0f); | 
| 579 | 							break; | 
| 580 | 						case 2: | 
| 581 | 							o[T0 + stage].z = Float4(1.0f); | 
| 582 | 							o[T0 + stage].w = Float4(0.0f); | 
| 583 | 							break; | 
| 584 | 						case 3: | 
| 585 | 							o[T0 + stage].w = Float4(1.0f); | 
| 586 | 							break; | 
| 587 | 						case 4: | 
| 588 | 							break; | 
| 589 | 						default: | 
| 590 | 							ASSERT(false); | 
| 591 | 						} | 
| 592 | 					} | 
| 593 | 				} | 
| 594 | 				break; | 
| 595 | 			case TEXGEN_NORMAL: | 
| 596 | 				{ | 
| 597 | 					Vector4f Nc;   // Normal vector in camera space | 
| 598 |  | 
| 599 | 					if(state.vertexNormalActive) | 
| 600 | 					{ | 
| 601 | 						Nc = normal; | 
| 602 | 					} | 
| 603 | 					else | 
| 604 | 					{ | 
| 605 | 						Nc.x = Float4(0.0f); | 
| 606 | 						Nc.y = Float4(0.0f); | 
| 607 | 						Nc.z = Float4(0.0f); | 
| 608 | 					} | 
| 609 |  | 
| 610 | 					Nc.w = Float4(1.0f); | 
| 611 |  | 
| 612 | 					o[T0 + stage].x = Nc.x; | 
| 613 | 					o[T0 + stage].y = Nc.y; | 
| 614 | 					o[T0 + stage].z = Nc.z; | 
| 615 | 					o[T0 + stage].w = Nc.w; | 
| 616 | 				} | 
| 617 | 				break; | 
| 618 | 			case TEXGEN_POSITION: | 
| 619 | 				{ | 
| 620 | 					Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);   // Position in camera space | 
| 621 |  | 
| 622 | 					Pn.w = Float4(1.0f); | 
| 623 |  | 
| 624 | 					o[T0 + stage].x = Pn.x; | 
| 625 | 					o[T0 + stage].y = Pn.y; | 
| 626 | 					o[T0 + stage].z = Pn.z; | 
| 627 | 					o[T0 + stage].w = Pn.w; | 
| 628 | 				} | 
| 629 | 				break; | 
| 630 | 			case TEXGEN_REFLECTION: | 
| 631 | 				{ | 
| 632 | 					Vector4f R;   // Reflection vector | 
| 633 |  | 
| 634 | 					if(state.vertexNormalActive) | 
| 635 | 					{ | 
| 636 | 						Vector4f Nc;   // Normal vector in camera space | 
| 637 |  | 
| 638 | 						Nc = normal; | 
| 639 |  | 
| 640 | 						if(state.localViewerActive) | 
| 641 | 						{ | 
| 642 | 							Vector4f Ec;   // Eye vector in camera space | 
| 643 | 							Vector4f N2; | 
| 644 |  | 
| 645 | 							Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); | 
| 646 | 							Ec = normalize(Ec); | 
| 647 |  | 
| 648 | 							// R = E - 2 * N * (E . N) | 
| 649 | 							Float4 dot = Float4(2.0f) * dot3(Ec, Nc); | 
| 650 |  | 
| 651 | 							R.x = Ec.x - Nc.x * dot; | 
| 652 | 							R.y = Ec.y - Nc.y * dot; | 
| 653 | 							R.z = Ec.z - Nc.z * dot; | 
| 654 | 						} | 
| 655 | 						else | 
| 656 | 						{ | 
| 657 | 							// u = -2 * Nz * Nx | 
| 658 | 							// v = -2 * Nz * Ny | 
| 659 | 							// w = 1 - 2 * Nz * Nz | 
| 660 |  | 
| 661 | 							R.x = -Float4(2.0f) * Nc.z * Nc.x; | 
| 662 | 							R.y = -Float4(2.0f) * Nc.z * Nc.y; | 
| 663 | 							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; | 
| 664 | 						} | 
| 665 | 					} | 
| 666 | 					else | 
| 667 | 					{ | 
| 668 | 						R.x = Float4(0.0f); | 
| 669 | 						R.y = Float4(0.0f); | 
| 670 | 						R.z = Float4(0.0f); | 
| 671 | 					} | 
| 672 |  | 
| 673 | 					R.w = Float4(1.0f); | 
| 674 |  | 
| 675 | 					o[T0 + stage].x = R.x; | 
| 676 | 					o[T0 + stage].y = R.y; | 
| 677 | 					o[T0 + stage].z = R.z; | 
| 678 | 					o[T0 + stage].w = R.w; | 
| 679 | 				} | 
| 680 | 				break; | 
| 681 | 			case TEXGEN_SPHEREMAP: | 
| 682 | 				{ | 
| 683 | 					Vector4f R;   // Reflection vector | 
| 684 |  | 
| 685 | 					if(state.vertexNormalActive) | 
| 686 | 					{ | 
| 687 | 						Vector4f Nc;   // Normal vector in camera space | 
| 688 |  | 
| 689 | 						Nc = normal; | 
| 690 |  | 
| 691 | 						if(state.localViewerActive) | 
| 692 | 						{ | 
| 693 | 							Vector4f Ec;   // Eye vector in camera space | 
| 694 | 							Vector4f N2; | 
| 695 |  | 
| 696 | 							Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); | 
| 697 | 							Ec = normalize(Ec); | 
| 698 |  | 
| 699 | 							// R = E - 2 * N * (E . N) | 
| 700 | 							Float4 dot = Float4(2.0f) * dot3(Ec, Nc); | 
| 701 |  | 
| 702 | 							R.x = Ec.x - Nc.x * dot; | 
| 703 | 							R.y = Ec.y - Nc.y * dot; | 
| 704 | 							R.z = Ec.z - Nc.z * dot; | 
| 705 | 						} | 
| 706 | 						else | 
| 707 | 						{ | 
| 708 | 							// u = -2 * Nz * Nx | 
| 709 | 							// v = -2 * Nz * Ny | 
| 710 | 							// w = 1 - 2 * Nz * Nz | 
| 711 |  | 
| 712 | 							R.x = -Float4(2.0f) * Nc.z * Nc.x; | 
| 713 | 							R.y = -Float4(2.0f) * Nc.z * Nc.y; | 
| 714 | 							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; | 
| 715 | 						} | 
| 716 | 					} | 
| 717 | 					else | 
| 718 | 					{ | 
| 719 | 						R.x = Float4(0.0f); | 
| 720 | 						R.y = Float4(0.0f); | 
| 721 | 						R.z = Float4(0.0f); | 
| 722 | 					} | 
| 723 |  | 
| 724 | 					R.z -= Float4(1.0f); | 
| 725 | 					R = normalize(R); | 
| 726 | 					R.x = Float4(0.5f) * R.x + Float4(0.5f); | 
| 727 | 					R.y = Float4(0.5f) * R.y + Float4(0.5f); | 
| 728 |  | 
| 729 | 					R.z = Float4(1.0f); | 
| 730 | 					R.w = Float4(0.0f); | 
| 731 |  | 
| 732 | 					o[T0 + stage].x = R.x; | 
| 733 | 					o[T0 + stage].y = R.y; | 
| 734 | 					o[T0 + stage].z = R.z; | 
| 735 | 					o[T0 + stage].w = R.w; | 
| 736 | 				} | 
| 737 | 				break; | 
| 738 | 			default: | 
| 739 | 				ASSERT(false); | 
| 740 | 			} | 
| 741 |  | 
| 742 | 			Vector4f ; | 
| 743 | 			Vector4f ; | 
| 744 | 			Vector4f ; | 
| 745 | 			Vector4f ; | 
| 746 |  | 
| 747 | 			Vector4f T; | 
| 748 | 			Vector4f t; | 
| 749 |  | 
| 750 | 			T.x = o[T0 + stage].x; | 
| 751 | 			T.y = o[T0 + stage].y; | 
| 752 | 			T.z = o[T0 + stage].z; | 
| 753 | 			T.w = o[T0 + stage].w; | 
| 754 |  | 
| 755 | 			switch(state.textureState[stage].textureTransformCountActive) | 
| 756 | 			{ | 
| 757 | 			case 4: | 
| 758 | 				texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3]));   // FIXME: Unpack | 
| 759 | 				texTrans3.x = texTrans3.x.xxxx; | 
| 760 | 				texTrans3.y = texTrans3.y.yyyy; | 
| 761 | 				texTrans3.z = texTrans3.z.zzzz; | 
| 762 | 				texTrans3.w = texTrans3.w.wwww; | 
| 763 | 				t.w = dot4(T, texTrans3); | 
| 764 | 			case 3: | 
| 765 | 				texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2]));   // FIXME: Unpack | 
| 766 | 				texTrans2.x = texTrans2.x.xxxx; | 
| 767 | 				texTrans2.y = texTrans2.y.yyyy; | 
| 768 | 				texTrans2.z = texTrans2.z.zzzz; | 
| 769 | 				texTrans2.w = texTrans2.w.wwww; | 
| 770 | 				t.z = dot4(T, texTrans2); | 
| 771 | 			case 2: | 
| 772 | 				texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1]));   // FIXME: Unpack | 
| 773 | 				texTrans1.x = texTrans1.x.xxxx; | 
| 774 | 				texTrans1.y = texTrans1.y.yyyy; | 
| 775 | 				texTrans1.z = texTrans1.z.zzzz; | 
| 776 | 				texTrans1.w = texTrans1.w.wwww; | 
| 777 | 				t.y = dot4(T, texTrans1); | 
| 778 | 			case 1: | 
| 779 | 				texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0]));   // FIXME: Unpack | 
| 780 | 				texTrans0.x = texTrans0.x.xxxx; | 
| 781 | 				texTrans0.y = texTrans0.y.yyyy; | 
| 782 | 				texTrans0.z = texTrans0.z.zzzz; | 
| 783 | 				texTrans0.w = texTrans0.w.wwww; | 
| 784 | 				t.x = dot4(T, texTrans0); | 
| 785 |  | 
| 786 | 				o[T0 + stage].x = t.x; | 
| 787 | 				o[T0 + stage].y = t.y; | 
| 788 | 				o[T0 + stage].z = t.z; | 
| 789 | 				o[T0 + stage].w = t.w; | 
| 790 | 			case 0: | 
| 791 | 				break; | 
| 792 | 			default: | 
| 793 | 				ASSERT(false); | 
| 794 | 			} | 
| 795 | 		} | 
| 796 | 	} | 
| 797 |  | 
| 798 | 	void VertexPipeline::processPointSize() | 
| 799 | 	{ | 
| 800 | 		if(!state.pointSizeActive) | 
| 801 | 		{ | 
| 802 | 			return;   // Use global pointsize | 
| 803 | 		} | 
| 804 |  | 
| 805 | 		if(state.input[PointSize]) | 
| 806 | 		{ | 
| 807 | 			o[Pts].y = v[PointSize].x; | 
| 808 | 		} | 
| 809 | 		else | 
| 810 | 		{ | 
| 811 | 			o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize)); | 
| 812 | 		} | 
| 813 |  | 
| 814 | 		if(state.pointScaleActive && !state.preTransformed) | 
| 815 | 		{ | 
| 816 | 			Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); | 
| 817 |  | 
| 818 | 			Float4 d = Sqrt(dot3(p, p));   // FIXME: length(p); | 
| 819 |  | 
| 820 | 			Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA));   // FIXME: Unpack | 
| 821 | 			Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB));   // FIXME: Unpack | 
| 822 | 			Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC));   // FIXME: Unpack | 
| 823 |  | 
| 824 | 			A = RcpSqrt_pp(A + d * (B + d * C)); | 
| 825 |  | 
| 826 | 			o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A;   // FIXME: Unpack | 
| 827 | 		} | 
| 828 | 	} | 
| 829 |  | 
| 830 | 	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) | 
| 831 | 	{ | 
| 832 | 		Vector4f dst; | 
| 833 |  | 
| 834 | 		if(homogeneous) | 
| 835 | 		{ | 
| 836 | 			Float4 m[4][4]; | 
| 837 |  | 
| 838 | 			for(int j = 0; j < 4; j++) | 
| 839 | 			{ | 
| 840 | 				for(int i = 0; i < 4; i++) | 
| 841 | 				{ | 
| 842 | 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 843 | 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 844 | 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 845 | 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 846 | 				} | 
| 847 | 			} | 
| 848 |  | 
| 849 | 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3]; | 
| 850 | 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3]; | 
| 851 | 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3]; | 
| 852 | 			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3]; | 
| 853 | 		} | 
| 854 | 		else | 
| 855 | 		{ | 
| 856 | 			Float4 m[3][3]; | 
| 857 |  | 
| 858 | 			for(int j = 0; j < 3; j++) | 
| 859 | 			{ | 
| 860 | 				for(int i = 0; i < 3; i++) | 
| 861 | 				{ | 
| 862 | 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 863 | 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 864 | 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 865 | 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); | 
| 866 | 				} | 
| 867 | 			} | 
| 868 |  | 
| 869 | 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; | 
| 870 | 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; | 
| 871 | 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; | 
| 872 | 		} | 
| 873 |  | 
| 874 | 		return dst; | 
| 875 | 	} | 
| 876 |  | 
| 877 | 	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous) | 
| 878 | 	{ | 
| 879 | 		Vector4f dst; | 
| 880 |  | 
| 881 | 		if(homogeneous) | 
| 882 | 		{ | 
| 883 | 			Float4 m[4][4]; | 
| 884 |  | 
| 885 | 			for(int j = 0; j < 4; j++) | 
| 886 | 			{ | 
| 887 | 				for(int i = 0; i < 4; i++) | 
| 888 | 				{ | 
| 889 | 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); | 
| 890 | 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); | 
| 891 | 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); | 
| 892 | 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); | 
| 893 | 				} | 
| 894 | 			} | 
| 895 |  | 
| 896 | 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3]; | 
| 897 | 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3]; | 
| 898 | 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3]; | 
| 899 | 			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3]; | 
| 900 | 		} | 
| 901 | 		else | 
| 902 | 		{ | 
| 903 | 			Float4 m[3][3]; | 
| 904 |  | 
| 905 | 			for(int j = 0; j < 3; j++) | 
| 906 | 			{ | 
| 907 | 				for(int i = 0; i < 3; i++) | 
| 908 | 				{ | 
| 909 | 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); | 
| 910 | 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); | 
| 911 | 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); | 
| 912 | 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); | 
| 913 | 				} | 
| 914 | 			} | 
| 915 |  | 
| 916 | 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; | 
| 917 | 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; | 
| 918 | 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; | 
| 919 | 		} | 
| 920 |  | 
| 921 | 		return dst; | 
| 922 | 	} | 
| 923 |  | 
| 924 | 	Vector4f VertexPipeline::normalize(Vector4f &src) | 
| 925 | 	{ | 
| 926 | 		Vector4f dst; | 
| 927 |  | 
| 928 | 		Float4 rcpLength = RcpSqrt_pp(dot3(src, src)); | 
| 929 |  | 
| 930 | 		dst.x = src.x * rcpLength; | 
| 931 | 		dst.y = src.y * rcpLength; | 
| 932 | 		dst.z = src.z * rcpLength; | 
| 933 |  | 
| 934 | 		return dst; | 
| 935 | 	} | 
| 936 |  | 
| 937 | 	Float4 VertexPipeline::power(Float4 &src0, Float4 &src1) | 
| 938 | 	{ | 
| 939 | 		Float4 dst = src0; | 
| 940 |  | 
| 941 | 		dst = dst * dst; | 
| 942 | 		dst = dst * dst; | 
| 943 | 		dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f))); | 
| 944 |  | 
| 945 | 		dst *= src1; | 
| 946 |  | 
| 947 | 		dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f))); | 
| 948 | 		dst = RcpSqrt_pp(dst); | 
| 949 | 		dst = RcpSqrt_pp(dst); | 
| 950 |  | 
| 951 | 		return dst; | 
| 952 | 	} | 
| 953 | } | 
| 954 |  |