| 1 | /* Optimized wcscmp for x86-64 with SSE2. | 
|---|
| 2 | Copyright (C) 2011-2020 Free Software Foundation, Inc. | 
|---|
| 3 | Contributed by Intel Corporation. | 
|---|
| 4 | This file is part of the GNU C Library. | 
|---|
| 5 |  | 
|---|
| 6 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 7 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 8 | License as published by the Free Software Foundation; either | 
|---|
| 9 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 10 |  | 
|---|
| 11 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 14 | Lesser General Public License for more details. | 
|---|
| 15 |  | 
|---|
| 16 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 17 | License along with the GNU C Library; if not, see | 
|---|
| 18 | <https://www.gnu.org/licenses/>.  */ | 
|---|
| 19 |  | 
|---|
| 20 | #include <sysdep.h> | 
|---|
| 21 |  | 
|---|
| 22 | /* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ | 
|---|
| 23 |  | 
|---|
| 24 | .text | 
|---|
| 25 | ENTRY (__wcscmp) | 
|---|
| 26 | /* | 
|---|
| 27 | * This implementation uses SSE to compare up to 16 bytes at a time. | 
|---|
| 28 | */ | 
|---|
| 29 | mov	%esi, %eax | 
|---|
| 30 | mov	%edi, %edx | 
|---|
| 31 | pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */ | 
|---|
| 32 | mov	%al, %ch | 
|---|
| 33 | mov	%dl, %cl | 
|---|
| 34 | and	$63, %eax		/* rsi alignment in cache line */ | 
|---|
| 35 | and	$63, %edx		/* rdi alignment in cache line */ | 
|---|
| 36 | and	$15, %cl | 
|---|
| 37 | jz	L(continue_00) | 
|---|
| 38 | cmp	$16, %edx | 
|---|
| 39 | jb	L(continue_0) | 
|---|
| 40 | cmp	$32, %edx | 
|---|
| 41 | jb	L(continue_16) | 
|---|
| 42 | cmp	$48, %edx | 
|---|
| 43 | jb	L(continue_32) | 
|---|
| 44 |  | 
|---|
| 45 | L(continue_48): | 
|---|
| 46 | and	$15, %ch | 
|---|
| 47 | jz	L(continue_48_00) | 
|---|
| 48 | cmp	$16, %eax | 
|---|
| 49 | jb	L(continue_0_48) | 
|---|
| 50 | cmp	$32, %eax | 
|---|
| 51 | jb	L(continue_16_48) | 
|---|
| 52 | cmp	$48, %eax | 
|---|
| 53 | jb	L(continue_32_48) | 
|---|
| 54 |  | 
|---|
| 55 | .p2align 4 | 
|---|
| 56 | L(continue_48_48): | 
|---|
| 57 | mov	(%rsi), %ecx | 
|---|
| 58 | cmp	%ecx, (%rdi) | 
|---|
| 59 | jne	L(nequal) | 
|---|
| 60 | test	%ecx, %ecx | 
|---|
| 61 | jz	L(equal) | 
|---|
| 62 |  | 
|---|
| 63 | mov	4(%rsi), %ecx | 
|---|
| 64 | cmp	%ecx, 4(%rdi) | 
|---|
| 65 | jne	L(nequal) | 
|---|
| 66 | test	%ecx, %ecx | 
|---|
| 67 | jz	L(equal) | 
|---|
| 68 |  | 
|---|
| 69 | mov	8(%rsi), %ecx | 
|---|
| 70 | cmp	%ecx, 8(%rdi) | 
|---|
| 71 | jne	L(nequal) | 
|---|
| 72 | test	%ecx, %ecx | 
|---|
| 73 | jz	L(equal) | 
|---|
| 74 |  | 
|---|
| 75 | mov	12(%rsi), %ecx | 
|---|
| 76 | cmp	%ecx, 12(%rdi) | 
|---|
| 77 | jne	L(nequal) | 
|---|
| 78 | test	%ecx, %ecx | 
|---|
| 79 | jz	L(equal) | 
|---|
| 80 |  | 
|---|
| 81 | movdqu	16(%rdi), %xmm1 | 
|---|
| 82 | movdqu	16(%rsi), %xmm2 | 
|---|
| 83 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 84 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 85 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 86 | pmovmskb %xmm1, %edx | 
|---|
| 87 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 88 | jnz	L(less4_double_words_16) | 
|---|
| 89 |  | 
|---|
| 90 | movdqu	32(%rdi), %xmm1 | 
|---|
| 91 | movdqu	32(%rsi), %xmm2 | 
|---|
| 92 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 93 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 94 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 95 | pmovmskb %xmm1, %edx | 
|---|
| 96 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 97 | jnz	L(less4_double_words_32) | 
|---|
| 98 |  | 
|---|
| 99 | movdqu	48(%rdi), %xmm1 | 
|---|
| 100 | movdqu	48(%rsi), %xmm2 | 
|---|
| 101 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 102 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 103 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 104 | pmovmskb %xmm1, %edx | 
|---|
| 105 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 106 | jnz	L(less4_double_words_48) | 
|---|
| 107 |  | 
|---|
| 108 | add	$64, %rsi | 
|---|
| 109 | add	$64, %rdi | 
|---|
| 110 | jmp	L(continue_48_48) | 
|---|
| 111 |  | 
|---|
| 112 | L(continue_0): | 
|---|
| 113 | and	$15, %ch | 
|---|
| 114 | jz	L(continue_0_00) | 
|---|
| 115 | cmp	$16, %eax | 
|---|
| 116 | jb	L(continue_0_0) | 
|---|
| 117 | cmp	$32, %eax | 
|---|
| 118 | jb	L(continue_0_16) | 
|---|
| 119 | cmp	$48, %eax | 
|---|
| 120 | jb	L(continue_0_32) | 
|---|
| 121 |  | 
|---|
| 122 | .p2align 4 | 
|---|
| 123 | L(continue_0_48): | 
|---|
| 124 | mov	(%rsi), %ecx | 
|---|
| 125 | cmp	%ecx, (%rdi) | 
|---|
| 126 | jne	L(nequal) | 
|---|
| 127 | test	%ecx, %ecx | 
|---|
| 128 | jz	L(equal) | 
|---|
| 129 |  | 
|---|
| 130 | mov	4(%rsi), %ecx | 
|---|
| 131 | cmp	%ecx, 4(%rdi) | 
|---|
| 132 | jne	L(nequal) | 
|---|
| 133 | test	%ecx, %ecx | 
|---|
| 134 | jz	L(equal) | 
|---|
| 135 |  | 
|---|
| 136 | mov	8(%rsi), %ecx | 
|---|
| 137 | cmp	%ecx, 8(%rdi) | 
|---|
| 138 | jne	L(nequal) | 
|---|
| 139 | test	%ecx, %ecx | 
|---|
| 140 | jz	L(equal) | 
|---|
| 141 |  | 
|---|
| 142 | mov	12(%rsi), %ecx | 
|---|
| 143 | cmp	%ecx, 12(%rdi) | 
|---|
| 144 | jne	L(nequal) | 
|---|
| 145 | test	%ecx, %ecx | 
|---|
| 146 | jz	L(equal) | 
|---|
| 147 |  | 
|---|
| 148 | movdqu	16(%rdi), %xmm1 | 
|---|
| 149 | movdqu	16(%rsi), %xmm2 | 
|---|
| 150 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 151 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 152 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 153 | pmovmskb %xmm1, %edx | 
|---|
| 154 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 155 | jnz	L(less4_double_words_16) | 
|---|
| 156 |  | 
|---|
| 157 | movdqu	32(%rdi), %xmm1 | 
|---|
| 158 | movdqu	32(%rsi), %xmm2 | 
|---|
| 159 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 160 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 161 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 162 | pmovmskb %xmm1, %edx | 
|---|
| 163 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 164 | jnz	L(less4_double_words_32) | 
|---|
| 165 |  | 
|---|
| 166 | mov	48(%rsi), %ecx | 
|---|
| 167 | cmp	%ecx, 48(%rdi) | 
|---|
| 168 | jne	L(nequal) | 
|---|
| 169 | test	%ecx, %ecx | 
|---|
| 170 | jz	L(equal) | 
|---|
| 171 |  | 
|---|
| 172 | mov	52(%rsi), %ecx | 
|---|
| 173 | cmp	%ecx, 52(%rdi) | 
|---|
| 174 | jne	L(nequal) | 
|---|
| 175 | test	%ecx, %ecx | 
|---|
| 176 | jz	L(equal) | 
|---|
| 177 |  | 
|---|
| 178 | mov	56(%rsi), %ecx | 
|---|
| 179 | cmp	%ecx, 56(%rdi) | 
|---|
| 180 | jne	L(nequal) | 
|---|
| 181 | test	%ecx, %ecx | 
|---|
| 182 | jz	L(equal) | 
|---|
| 183 |  | 
|---|
| 184 | mov	60(%rsi), %ecx | 
|---|
| 185 | cmp	%ecx, 60(%rdi) | 
|---|
| 186 | jne	L(nequal) | 
|---|
| 187 | test	%ecx, %ecx | 
|---|
| 188 | jz	L(equal) | 
|---|
| 189 |  | 
|---|
| 190 | add	$64, %rsi | 
|---|
| 191 | add	$64, %rdi | 
|---|
| 192 | jmp	L(continue_0_48) | 
|---|
| 193 |  | 
|---|
| 194 | .p2align 4 | 
|---|
| 195 | L(continue_00): | 
|---|
| 196 | and	$15, %ch | 
|---|
| 197 | jz	L(continue_00_00) | 
|---|
| 198 | cmp	$16, %eax | 
|---|
| 199 | jb	L(continue_00_0) | 
|---|
| 200 | cmp	$32, %eax | 
|---|
| 201 | jb	L(continue_00_16) | 
|---|
| 202 | cmp	$48, %eax | 
|---|
| 203 | jb	L(continue_00_32) | 
|---|
| 204 |  | 
|---|
| 205 | .p2align 4 | 
|---|
| 206 | L(continue_00_48): | 
|---|
| 207 | pcmpeqd	(%rdi), %xmm0 | 
|---|
| 208 | mov	(%rdi), %eax | 
|---|
| 209 | pmovmskb %xmm0, %ecx | 
|---|
| 210 | test	%ecx, %ecx | 
|---|
| 211 | jnz	L(less4_double_words1) | 
|---|
| 212 |  | 
|---|
| 213 | cmp	(%rsi), %eax | 
|---|
| 214 | jne	L(nequal) | 
|---|
| 215 |  | 
|---|
| 216 | mov	4(%rdi), %eax | 
|---|
| 217 | cmp	4(%rsi), %eax | 
|---|
| 218 | jne	L(nequal) | 
|---|
| 219 |  | 
|---|
| 220 | mov	8(%rdi), %eax | 
|---|
| 221 | cmp	8(%rsi), %eax | 
|---|
| 222 | jne	L(nequal) | 
|---|
| 223 |  | 
|---|
| 224 | mov	12(%rdi), %eax | 
|---|
| 225 | cmp	12(%rsi), %eax | 
|---|
| 226 | jne	L(nequal) | 
|---|
| 227 |  | 
|---|
| 228 | movdqu	16(%rsi), %xmm2 | 
|---|
| 229 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 230 | pcmpeqd	16(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 231 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 232 | pmovmskb %xmm2, %edx | 
|---|
| 233 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 234 | jnz	L(less4_double_words_16) | 
|---|
| 235 |  | 
|---|
| 236 | movdqu	32(%rsi), %xmm2 | 
|---|
| 237 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 238 | pcmpeqd	32(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 239 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 240 | pmovmskb %xmm2, %edx | 
|---|
| 241 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 242 | jnz	L(less4_double_words_32) | 
|---|
| 243 |  | 
|---|
| 244 | movdqu	48(%rsi), %xmm2 | 
|---|
| 245 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 246 | pcmpeqd	48(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 247 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 248 | pmovmskb %xmm2, %edx | 
|---|
| 249 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 250 | jnz	L(less4_double_words_48) | 
|---|
| 251 |  | 
|---|
| 252 | add	$64, %rsi | 
|---|
| 253 | add	$64, %rdi | 
|---|
| 254 | jmp	L(continue_00_48) | 
|---|
| 255 |  | 
|---|
| 256 | .p2align 4 | 
|---|
| 257 | L(continue_32): | 
|---|
| 258 | and	$15, %ch | 
|---|
| 259 | jz	L(continue_32_00) | 
|---|
| 260 | cmp	$16, %eax | 
|---|
| 261 | jb	L(continue_0_32) | 
|---|
| 262 | cmp	$32, %eax | 
|---|
| 263 | jb	L(continue_16_32) | 
|---|
| 264 | cmp	$48, %eax | 
|---|
| 265 | jb	L(continue_32_32) | 
|---|
| 266 |  | 
|---|
| 267 | .p2align 4 | 
|---|
| 268 | L(continue_32_48): | 
|---|
| 269 | mov	(%rsi), %ecx | 
|---|
| 270 | cmp	%ecx, (%rdi) | 
|---|
| 271 | jne	L(nequal) | 
|---|
| 272 | test	%ecx, %ecx | 
|---|
| 273 | jz	L(equal) | 
|---|
| 274 |  | 
|---|
| 275 | mov	4(%rsi), %ecx | 
|---|
| 276 | cmp	%ecx, 4(%rdi) | 
|---|
| 277 | jne	L(nequal) | 
|---|
| 278 | test	%ecx, %ecx | 
|---|
| 279 | jz	L(equal) | 
|---|
| 280 |  | 
|---|
| 281 | mov	8(%rsi), %ecx | 
|---|
| 282 | cmp	%ecx, 8(%rdi) | 
|---|
| 283 | jne	L(nequal) | 
|---|
| 284 | test	%ecx, %ecx | 
|---|
| 285 | jz	L(equal) | 
|---|
| 286 |  | 
|---|
| 287 | mov	12(%rsi), %ecx | 
|---|
| 288 | cmp	%ecx, 12(%rdi) | 
|---|
| 289 | jne	L(nequal) | 
|---|
| 290 | test	%ecx, %ecx | 
|---|
| 291 | jz	L(equal) | 
|---|
| 292 |  | 
|---|
| 293 | mov	16(%rsi), %ecx | 
|---|
| 294 | cmp	%ecx, 16(%rdi) | 
|---|
| 295 | jne	L(nequal) | 
|---|
| 296 | test	%ecx, %ecx | 
|---|
| 297 | jz	L(equal) | 
|---|
| 298 |  | 
|---|
| 299 | mov	20(%rsi), %ecx | 
|---|
| 300 | cmp	%ecx, 20(%rdi) | 
|---|
| 301 | jne	L(nequal) | 
|---|
| 302 | test	%ecx, %ecx | 
|---|
| 303 | jz	L(equal) | 
|---|
| 304 |  | 
|---|
| 305 | mov	24(%rsi), %ecx | 
|---|
| 306 | cmp	%ecx, 24(%rdi) | 
|---|
| 307 | jne	L(nequal) | 
|---|
| 308 | test	%ecx, %ecx | 
|---|
| 309 | jz	L(equal) | 
|---|
| 310 |  | 
|---|
| 311 | mov	28(%rsi), %ecx | 
|---|
| 312 | cmp	%ecx, 28(%rdi) | 
|---|
| 313 | jne	L(nequal) | 
|---|
| 314 | test	%ecx, %ecx | 
|---|
| 315 | jz	L(equal) | 
|---|
| 316 |  | 
|---|
| 317 | movdqu	32(%rdi), %xmm1 | 
|---|
| 318 | movdqu	32(%rsi), %xmm2 | 
|---|
| 319 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 320 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 321 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 322 | pmovmskb %xmm1, %edx | 
|---|
| 323 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 324 | jnz	L(less4_double_words_32) | 
|---|
| 325 |  | 
|---|
| 326 | movdqu	48(%rdi), %xmm1 | 
|---|
| 327 | movdqu	48(%rsi), %xmm2 | 
|---|
| 328 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 329 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 330 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 331 | pmovmskb %xmm1, %edx | 
|---|
| 332 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 333 | jnz	L(less4_double_words_48) | 
|---|
| 334 |  | 
|---|
| 335 | add	$64, %rsi | 
|---|
| 336 | add	$64, %rdi | 
|---|
| 337 | jmp	L(continue_32_48) | 
|---|
| 338 |  | 
|---|
| 339 | .p2align 4 | 
|---|
| 340 | L(continue_16): | 
|---|
| 341 | and	$15, %ch | 
|---|
| 342 | jz	L(continue_16_00) | 
|---|
| 343 | cmp	$16, %eax | 
|---|
| 344 | jb	L(continue_0_16) | 
|---|
| 345 | cmp	$32, %eax | 
|---|
| 346 | jb	L(continue_16_16) | 
|---|
| 347 | cmp	$48, %eax | 
|---|
| 348 | jb	L(continue_16_32) | 
|---|
| 349 |  | 
|---|
| 350 | .p2align 4 | 
|---|
| 351 | L(continue_16_48): | 
|---|
| 352 | mov	(%rsi), %ecx | 
|---|
| 353 | cmp	%ecx, (%rdi) | 
|---|
| 354 | jne	L(nequal) | 
|---|
| 355 | test	%ecx, %ecx | 
|---|
| 356 | jz	L(equal) | 
|---|
| 357 |  | 
|---|
| 358 | mov	4(%rsi), %ecx | 
|---|
| 359 | cmp	%ecx, 4(%rdi) | 
|---|
| 360 | jne	L(nequal) | 
|---|
| 361 | test	%ecx, %ecx | 
|---|
| 362 | jz	L(equal) | 
|---|
| 363 |  | 
|---|
| 364 | mov	8(%rsi), %ecx | 
|---|
| 365 | cmp	%ecx, 8(%rdi) | 
|---|
| 366 | jne	L(nequal) | 
|---|
| 367 | test	%ecx, %ecx | 
|---|
| 368 | jz	L(equal) | 
|---|
| 369 |  | 
|---|
| 370 | mov	12(%rsi), %ecx | 
|---|
| 371 | cmp	%ecx, 12(%rdi) | 
|---|
| 372 | jne	L(nequal) | 
|---|
| 373 | test	%ecx, %ecx | 
|---|
| 374 | jz	L(equal) | 
|---|
| 375 |  | 
|---|
| 376 | movdqu	16(%rdi), %xmm1 | 
|---|
| 377 | movdqu	16(%rsi), %xmm2 | 
|---|
| 378 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 379 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 380 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 381 | pmovmskb %xmm1, %edx | 
|---|
| 382 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 383 | jnz	L(less4_double_words_16) | 
|---|
| 384 |  | 
|---|
| 385 | mov	32(%rsi), %ecx | 
|---|
| 386 | cmp	%ecx, 32(%rdi) | 
|---|
| 387 | jne	L(nequal) | 
|---|
| 388 | test	%ecx, %ecx | 
|---|
| 389 | jz	L(equal) | 
|---|
| 390 |  | 
|---|
| 391 | mov	36(%rsi), %ecx | 
|---|
| 392 | cmp	%ecx, 36(%rdi) | 
|---|
| 393 | jne	L(nequal) | 
|---|
| 394 | test	%ecx, %ecx | 
|---|
| 395 | jz	L(equal) | 
|---|
| 396 |  | 
|---|
| 397 | mov	40(%rsi), %ecx | 
|---|
| 398 | cmp	%ecx, 40(%rdi) | 
|---|
| 399 | jne	L(nequal) | 
|---|
| 400 | test	%ecx, %ecx | 
|---|
| 401 | jz	L(equal) | 
|---|
| 402 |  | 
|---|
| 403 | mov	44(%rsi), %ecx | 
|---|
| 404 | cmp	%ecx, 44(%rdi) | 
|---|
| 405 | jne	L(nequal) | 
|---|
| 406 | test	%ecx, %ecx | 
|---|
| 407 | jz	L(equal) | 
|---|
| 408 |  | 
|---|
| 409 | movdqu	48(%rdi), %xmm1 | 
|---|
| 410 | movdqu	48(%rsi), %xmm2 | 
|---|
| 411 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 412 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 413 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 414 | pmovmskb %xmm1, %edx | 
|---|
| 415 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 416 | jnz	L(less4_double_words_48) | 
|---|
| 417 |  | 
|---|
| 418 | add	$64, %rsi | 
|---|
| 419 | add	$64, %rdi | 
|---|
| 420 | jmp	L(continue_16_48) | 
|---|
| 421 |  | 
|---|
| 422 | .p2align 4 | 
|---|
| 423 | L(continue_00_00): | 
|---|
| 424 | movdqa	(%rdi), %xmm1 | 
|---|
| 425 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 426 | pcmpeqd	(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 427 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 428 | pmovmskb %xmm1, %edx | 
|---|
| 429 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 430 | jnz	L(less4_double_words) | 
|---|
| 431 |  | 
|---|
| 432 | movdqa	16(%rdi), %xmm3 | 
|---|
| 433 | pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */ | 
|---|
| 434 | pcmpeqd	16(%rsi), %xmm3		/* compare first 4 double_words for equality */ | 
|---|
| 435 | psubb	%xmm0, %xmm3		/* packed sub of comparison results*/ | 
|---|
| 436 | pmovmskb %xmm3, %edx | 
|---|
| 437 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 438 | jnz	L(less4_double_words_16) | 
|---|
| 439 |  | 
|---|
| 440 | movdqa	32(%rdi), %xmm5 | 
|---|
| 441 | pcmpeqd	%xmm5, %xmm0		/* Any null double_word? */ | 
|---|
| 442 | pcmpeqd	32(%rsi), %xmm5		/* compare first 4 double_words for equality */ | 
|---|
| 443 | psubb	%xmm0, %xmm5		/* packed sub of comparison results*/ | 
|---|
| 444 | pmovmskb %xmm5, %edx | 
|---|
| 445 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 446 | jnz	L(less4_double_words_32) | 
|---|
| 447 |  | 
|---|
| 448 | movdqa	48(%rdi), %xmm1 | 
|---|
| 449 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 450 | pcmpeqd	48(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 451 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 452 | pmovmskb %xmm1, %edx | 
|---|
| 453 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 454 | jnz	L(less4_double_words_48) | 
|---|
| 455 |  | 
|---|
| 456 | add	$64, %rsi | 
|---|
| 457 | add	$64, %rdi | 
|---|
| 458 | jmp	L(continue_00_00) | 
|---|
| 459 |  | 
|---|
| 460 | .p2align 4 | 
|---|
| 461 | L(continue_00_32): | 
|---|
| 462 | movdqu	(%rsi), %xmm2 | 
|---|
| 463 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 464 | pcmpeqd	(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 465 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 466 | pmovmskb %xmm2, %edx | 
|---|
| 467 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 468 | jnz	L(less4_double_words) | 
|---|
| 469 |  | 
|---|
| 470 | add	$16, %rsi | 
|---|
| 471 | add	$16, %rdi | 
|---|
| 472 | jmp	L(continue_00_48) | 
|---|
| 473 |  | 
|---|
| 474 | .p2align 4 | 
|---|
| 475 | L(continue_00_16): | 
|---|
| 476 | movdqu	(%rsi), %xmm2 | 
|---|
| 477 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 478 | pcmpeqd	(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 479 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 480 | pmovmskb %xmm2, %edx | 
|---|
| 481 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 482 | jnz	L(less4_double_words) | 
|---|
| 483 |  | 
|---|
| 484 | movdqu	16(%rsi), %xmm2 | 
|---|
| 485 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 486 | pcmpeqd	16(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 487 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 488 | pmovmskb %xmm2, %edx | 
|---|
| 489 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 490 | jnz	L(less4_double_words_16) | 
|---|
| 491 |  | 
|---|
| 492 | add	$32, %rsi | 
|---|
| 493 | add	$32, %rdi | 
|---|
| 494 | jmp	L(continue_00_48) | 
|---|
| 495 |  | 
|---|
| 496 | .p2align 4 | 
|---|
| 497 | L(continue_00_0): | 
|---|
| 498 | movdqu	(%rsi), %xmm2 | 
|---|
| 499 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 500 | pcmpeqd	(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 501 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 502 | pmovmskb %xmm2, %edx | 
|---|
| 503 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 504 | jnz	L(less4_double_words) | 
|---|
| 505 |  | 
|---|
| 506 | movdqu	16(%rsi), %xmm2 | 
|---|
| 507 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 508 | pcmpeqd	16(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 509 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 510 | pmovmskb %xmm2, %edx | 
|---|
| 511 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 512 | jnz	L(less4_double_words_16) | 
|---|
| 513 |  | 
|---|
| 514 | movdqu	32(%rsi), %xmm2 | 
|---|
| 515 | pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */ | 
|---|
| 516 | pcmpeqd	32(%rdi), %xmm2		/* compare first 4 double_words for equality */ | 
|---|
| 517 | psubb	%xmm0, %xmm2		/* packed sub of comparison results*/ | 
|---|
| 518 | pmovmskb %xmm2, %edx | 
|---|
| 519 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 520 | jnz	L(less4_double_words_32) | 
|---|
| 521 |  | 
|---|
| 522 | add	$48, %rsi | 
|---|
| 523 | add	$48, %rdi | 
|---|
| 524 | jmp	L(continue_00_48) | 
|---|
| 525 |  | 
|---|
| 526 | .p2align 4 | 
|---|
| 527 | L(continue_48_00): | 
|---|
| 528 | pcmpeqd	(%rsi), %xmm0 | 
|---|
| 529 | mov	(%rdi), %eax | 
|---|
| 530 | pmovmskb %xmm0, %ecx | 
|---|
| 531 | test	%ecx, %ecx | 
|---|
| 532 | jnz	L(less4_double_words1) | 
|---|
| 533 |  | 
|---|
| 534 | cmp	(%rsi), %eax | 
|---|
| 535 | jne	L(nequal) | 
|---|
| 536 |  | 
|---|
| 537 | mov	4(%rdi), %eax | 
|---|
| 538 | cmp	4(%rsi), %eax | 
|---|
| 539 | jne	L(nequal) | 
|---|
| 540 |  | 
|---|
| 541 | mov	8(%rdi), %eax | 
|---|
| 542 | cmp	8(%rsi), %eax | 
|---|
| 543 | jne	L(nequal) | 
|---|
| 544 |  | 
|---|
| 545 | mov	12(%rdi), %eax | 
|---|
| 546 | cmp	12(%rsi), %eax | 
|---|
| 547 | jne	L(nequal) | 
|---|
| 548 |  | 
|---|
| 549 | movdqu	16(%rdi), %xmm1 | 
|---|
| 550 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 551 | pcmpeqd	16(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 552 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 553 | pmovmskb %xmm1, %edx | 
|---|
| 554 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 555 | jnz	L(less4_double_words_16) | 
|---|
| 556 |  | 
|---|
| 557 | movdqu	32(%rdi), %xmm1 | 
|---|
| 558 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 559 | pcmpeqd	32(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 560 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 561 | pmovmskb %xmm1, %edx | 
|---|
| 562 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 563 | jnz	L(less4_double_words_32) | 
|---|
| 564 |  | 
|---|
| 565 | movdqu	48(%rdi), %xmm1 | 
|---|
| 566 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 567 | pcmpeqd	48(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 568 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 569 | pmovmskb %xmm1, %edx | 
|---|
| 570 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 571 | jnz	L(less4_double_words_48) | 
|---|
| 572 |  | 
|---|
| 573 | add	$64, %rsi | 
|---|
| 574 | add	$64, %rdi | 
|---|
| 575 | jmp	L(continue_48_00) | 
|---|
| 576 |  | 
|---|
| 577 | .p2align 4 | 
|---|
| 578 | L(continue_32_00): | 
|---|
| 579 | movdqu	(%rdi), %xmm1 | 
|---|
| 580 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 581 | pcmpeqd	(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 582 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 583 | pmovmskb %xmm1, %edx | 
|---|
| 584 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 585 | jnz	L(less4_double_words) | 
|---|
| 586 |  | 
|---|
| 587 | add	$16, %rsi | 
|---|
| 588 | add	$16, %rdi | 
|---|
| 589 | jmp	L(continue_48_00) | 
|---|
| 590 |  | 
|---|
| 591 | .p2align 4 | 
|---|
| 592 | L(continue_16_00): | 
|---|
| 593 | movdqu	(%rdi), %xmm1 | 
|---|
| 594 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 595 | pcmpeqd	(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 596 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 597 | pmovmskb %xmm1, %edx | 
|---|
| 598 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 599 | jnz	L(less4_double_words) | 
|---|
| 600 |  | 
|---|
| 601 | movdqu	16(%rdi), %xmm1 | 
|---|
| 602 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 603 | pcmpeqd	16(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 604 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 605 | pmovmskb %xmm1, %edx | 
|---|
| 606 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 607 | jnz	L(less4_double_words_16) | 
|---|
| 608 |  | 
|---|
| 609 | add	$32, %rsi | 
|---|
| 610 | add	$32, %rdi | 
|---|
| 611 | jmp	L(continue_48_00) | 
|---|
| 612 |  | 
|---|
| 613 | .p2align 4 | 
|---|
| 614 | L(continue_0_00): | 
|---|
| 615 | movdqu	(%rdi), %xmm1 | 
|---|
| 616 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 617 | pcmpeqd	(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 618 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 619 | pmovmskb %xmm1, %edx | 
|---|
| 620 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 621 | jnz	L(less4_double_words) | 
|---|
| 622 |  | 
|---|
| 623 | movdqu	16(%rdi), %xmm1 | 
|---|
| 624 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 625 | pcmpeqd	16(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 626 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 627 | pmovmskb %xmm1, %edx | 
|---|
| 628 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 629 | jnz	L(less4_double_words_16) | 
|---|
| 630 |  | 
|---|
| 631 | movdqu	32(%rdi), %xmm1 | 
|---|
| 632 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 633 | pcmpeqd	32(%rsi), %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 634 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 635 | pmovmskb %xmm1, %edx | 
|---|
| 636 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 637 | jnz	L(less4_double_words_32) | 
|---|
| 638 |  | 
|---|
| 639 | add	$48, %rsi | 
|---|
| 640 | add	$48, %rdi | 
|---|
| 641 | jmp	L(continue_48_00) | 
|---|
| 642 |  | 
|---|
| 643 | .p2align 4 | 
|---|
| 644 | L(continue_32_32): | 
|---|
| 645 | movdqu	(%rdi), %xmm1 | 
|---|
| 646 | movdqu	(%rsi), %xmm2 | 
|---|
| 647 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 648 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 649 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 650 | pmovmskb %xmm1, %edx | 
|---|
| 651 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 652 | jnz	L(less4_double_words) | 
|---|
| 653 |  | 
|---|
| 654 | add	$16, %rsi | 
|---|
| 655 | add	$16, %rdi | 
|---|
| 656 | jmp	L(continue_48_48) | 
|---|
| 657 |  | 
|---|
| 658 | .p2align 4 | 
|---|
| 659 | L(continue_16_16): | 
|---|
| 660 | movdqu	(%rdi), %xmm1 | 
|---|
| 661 | movdqu	(%rsi), %xmm2 | 
|---|
| 662 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 663 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 664 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 665 | pmovmskb %xmm1, %edx | 
|---|
| 666 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 667 | jnz	L(less4_double_words) | 
|---|
| 668 |  | 
|---|
| 669 | movdqu	16(%rdi), %xmm3 | 
|---|
| 670 | movdqu	16(%rsi), %xmm4 | 
|---|
| 671 | pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */ | 
|---|
| 672 | pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */ | 
|---|
| 673 | psubb	%xmm0, %xmm3		/* packed sub of comparison results*/ | 
|---|
| 674 | pmovmskb %xmm3, %edx | 
|---|
| 675 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 676 | jnz	L(less4_double_words_16) | 
|---|
| 677 |  | 
|---|
| 678 | add	$32, %rsi | 
|---|
| 679 | add	$32, %rdi | 
|---|
| 680 | jmp	L(continue_48_48) | 
|---|
| 681 |  | 
|---|
| 682 | .p2align 4 | 
|---|
| 683 | L(continue_0_0): | 
|---|
| 684 | movdqu	(%rdi), %xmm1 | 
|---|
| 685 | movdqu	(%rsi), %xmm2 | 
|---|
| 686 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 687 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 688 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 689 | pmovmskb %xmm1, %edx | 
|---|
| 690 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 691 | jnz	L(less4_double_words) | 
|---|
| 692 |  | 
|---|
| 693 | movdqu	16(%rdi), %xmm3 | 
|---|
| 694 | movdqu	16(%rsi), %xmm4 | 
|---|
| 695 | pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */ | 
|---|
| 696 | pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */ | 
|---|
| 697 | psubb	%xmm0, %xmm3		/* packed sub of comparison results*/ | 
|---|
| 698 | pmovmskb %xmm3, %edx | 
|---|
| 699 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 700 | jnz	L(less4_double_words_16) | 
|---|
| 701 |  | 
|---|
| 702 | movdqu	32(%rdi), %xmm1 | 
|---|
| 703 | movdqu	32(%rsi), %xmm2 | 
|---|
| 704 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 705 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 706 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 707 | pmovmskb %xmm1, %edx | 
|---|
| 708 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 709 | jnz	L(less4_double_words_32) | 
|---|
| 710 |  | 
|---|
| 711 | add	$48, %rsi | 
|---|
| 712 | add	$48, %rdi | 
|---|
| 713 | jmp	L(continue_48_48) | 
|---|
| 714 |  | 
|---|
| 715 | .p2align 4 | 
|---|
| 716 | L(continue_0_16): | 
|---|
| 717 | movdqu	(%rdi), %xmm1 | 
|---|
| 718 | movdqu	(%rsi), %xmm2 | 
|---|
| 719 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 720 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 721 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 722 | pmovmskb %xmm1, %edx | 
|---|
| 723 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 724 | jnz	L(less4_double_words) | 
|---|
| 725 |  | 
|---|
| 726 | movdqu	16(%rdi), %xmm1 | 
|---|
| 727 | movdqu	16(%rsi), %xmm2 | 
|---|
| 728 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 729 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 730 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 731 | pmovmskb %xmm1, %edx | 
|---|
| 732 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 733 | jnz	L(less4_double_words_16) | 
|---|
| 734 |  | 
|---|
| 735 | add	$32, %rsi | 
|---|
| 736 | add	$32, %rdi | 
|---|
| 737 | jmp	L(continue_32_48) | 
|---|
| 738 |  | 
|---|
| 739 | .p2align 4 | 
|---|
| 740 | L(continue_0_32): | 
|---|
| 741 | movdqu	(%rdi), %xmm1 | 
|---|
| 742 | movdqu	(%rsi), %xmm2 | 
|---|
| 743 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 744 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 745 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 746 | pmovmskb %xmm1, %edx | 
|---|
| 747 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 748 | jnz	L(less4_double_words) | 
|---|
| 749 |  | 
|---|
| 750 | add	$16, %rsi | 
|---|
| 751 | add	$16, %rdi | 
|---|
| 752 | jmp	L(continue_16_48) | 
|---|
| 753 |  | 
|---|
| 754 | .p2align 4 | 
|---|
| 755 | L(continue_16_32): | 
|---|
| 756 | movdqu	(%rdi), %xmm1 | 
|---|
| 757 | movdqu	(%rsi), %xmm2 | 
|---|
| 758 | pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */ | 
|---|
| 759 | pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */ | 
|---|
| 760 | psubb	%xmm0, %xmm1		/* packed sub of comparison results*/ | 
|---|
| 761 | pmovmskb %xmm1, %edx | 
|---|
| 762 | sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */ | 
|---|
| 763 | jnz	L(less4_double_words) | 
|---|
| 764 |  | 
|---|
| 765 | add	$16, %rsi | 
|---|
| 766 | add	$16, %rdi | 
|---|
| 767 | jmp	L(continue_32_48) | 
|---|
| 768 |  | 
|---|
| 769 | .p2align 4 | 
|---|
| 770 | L(less4_double_words1): | 
|---|
| 771 | cmp	(%rsi), %eax | 
|---|
| 772 | jne	L(nequal) | 
|---|
| 773 | test	%eax, %eax | 
|---|
| 774 | jz	L(equal) | 
|---|
| 775 |  | 
|---|
| 776 | mov	4(%rsi), %ecx | 
|---|
| 777 | cmp	%ecx, 4(%rdi) | 
|---|
| 778 | jne	L(nequal) | 
|---|
| 779 | test	%ecx, %ecx | 
|---|
| 780 | jz	L(equal) | 
|---|
| 781 |  | 
|---|
| 782 | mov	8(%rsi), %ecx | 
|---|
| 783 | cmp	%ecx, 8(%rdi) | 
|---|
| 784 | jne	L(nequal) | 
|---|
| 785 | test	%ecx, %ecx | 
|---|
| 786 | jz	L(equal) | 
|---|
| 787 |  | 
|---|
| 788 | mov	12(%rsi), %ecx | 
|---|
| 789 | cmp	%ecx, 12(%rdi) | 
|---|
| 790 | jne	L(nequal) | 
|---|
| 791 | xor	%eax, %eax | 
|---|
| 792 | ret | 
|---|
| 793 |  | 
|---|
| 794 | .p2align 4 | 
|---|
| 795 | L(less4_double_words): | 
|---|
| 796 | xor	%eax, %eax | 
|---|
| 797 | test	%dl, %dl | 
|---|
| 798 | jz	L(next_two_double_words) | 
|---|
| 799 | and	$15, %dl | 
|---|
| 800 | jz	L(second_double_word) | 
|---|
| 801 | mov	(%rdi), %eax | 
|---|
| 802 | cmp	(%rsi), %eax | 
|---|
| 803 | jne	L(nequal) | 
|---|
| 804 | ret | 
|---|
| 805 |  | 
|---|
| 806 | .p2align 4 | 
|---|
| 807 | L(second_double_word): | 
|---|
| 808 | mov	4(%rdi), %eax | 
|---|
| 809 | cmp	4(%rsi), %eax | 
|---|
| 810 | jne	L(nequal) | 
|---|
| 811 | ret | 
|---|
| 812 |  | 
|---|
| 813 | .p2align 4 | 
|---|
| 814 | L(next_two_double_words): | 
|---|
| 815 | and	$15, %dh | 
|---|
| 816 | jz	L(fourth_double_word) | 
|---|
| 817 | mov	8(%rdi), %eax | 
|---|
| 818 | cmp	8(%rsi), %eax | 
|---|
| 819 | jne	L(nequal) | 
|---|
| 820 | ret | 
|---|
| 821 |  | 
|---|
| 822 | .p2align 4 | 
|---|
| 823 | L(fourth_double_word): | 
|---|
| 824 | mov	12(%rdi), %eax | 
|---|
| 825 | cmp	12(%rsi), %eax | 
|---|
| 826 | jne	L(nequal) | 
|---|
| 827 | ret | 
|---|
| 828 |  | 
|---|
| 829 | .p2align 4 | 
|---|
| 830 | L(less4_double_words_16): | 
|---|
| 831 | xor	%eax, %eax | 
|---|
| 832 | test	%dl, %dl | 
|---|
| 833 | jz	L(next_two_double_words_16) | 
|---|
| 834 | and	$15, %dl | 
|---|
| 835 | jz	L(second_double_word_16) | 
|---|
| 836 | mov	16(%rdi), %eax | 
|---|
| 837 | cmp	16(%rsi), %eax | 
|---|
| 838 | jne	L(nequal) | 
|---|
| 839 | ret | 
|---|
| 840 |  | 
|---|
| 841 | .p2align 4 | 
|---|
| 842 | L(second_double_word_16): | 
|---|
| 843 | mov	20(%rdi), %eax | 
|---|
| 844 | cmp	20(%rsi), %eax | 
|---|
| 845 | jne	L(nequal) | 
|---|
| 846 | ret | 
|---|
| 847 |  | 
|---|
| 848 | .p2align 4 | 
|---|
| 849 | L(next_two_double_words_16): | 
|---|
| 850 | and	$15, %dh | 
|---|
| 851 | jz	L(fourth_double_word_16) | 
|---|
| 852 | mov	24(%rdi), %eax | 
|---|
| 853 | cmp	24(%rsi), %eax | 
|---|
| 854 | jne	L(nequal) | 
|---|
| 855 | ret | 
|---|
| 856 |  | 
|---|
| 857 | .p2align 4 | 
|---|
| 858 | L(fourth_double_word_16): | 
|---|
| 859 | mov	28(%rdi), %eax | 
|---|
| 860 | cmp	28(%rsi), %eax | 
|---|
| 861 | jne	L(nequal) | 
|---|
| 862 | ret | 
|---|
| 863 |  | 
|---|
| 864 | .p2align 4 | 
|---|
| 865 | L(less4_double_words_32): | 
|---|
| 866 | xor	%eax, %eax | 
|---|
| 867 | test	%dl, %dl | 
|---|
| 868 | jz	L(next_two_double_words_32) | 
|---|
| 869 | and	$15, %dl | 
|---|
| 870 | jz	L(second_double_word_32) | 
|---|
| 871 | mov	32(%rdi), %eax | 
|---|
| 872 | cmp	32(%rsi), %eax | 
|---|
| 873 | jne	L(nequal) | 
|---|
| 874 | ret | 
|---|
| 875 |  | 
|---|
| 876 | .p2align 4 | 
|---|
| 877 | L(second_double_word_32): | 
|---|
| 878 | mov	36(%rdi), %eax | 
|---|
| 879 | cmp	36(%rsi), %eax | 
|---|
| 880 | jne	L(nequal) | 
|---|
| 881 | ret | 
|---|
| 882 |  | 
|---|
| 883 | .p2align 4 | 
|---|
| 884 | L(next_two_double_words_32): | 
|---|
| 885 | and	$15, %dh | 
|---|
| 886 | jz	L(fourth_double_word_32) | 
|---|
| 887 | mov	40(%rdi), %eax | 
|---|
| 888 | cmp	40(%rsi), %eax | 
|---|
| 889 | jne	L(nequal) | 
|---|
| 890 | ret | 
|---|
| 891 |  | 
|---|
| 892 | .p2align 4 | 
|---|
| 893 | L(fourth_double_word_32): | 
|---|
| 894 | mov	44(%rdi), %eax | 
|---|
| 895 | cmp	44(%rsi), %eax | 
|---|
| 896 | jne	L(nequal) | 
|---|
| 897 | ret | 
|---|
| 898 |  | 
|---|
| 899 | .p2align 4 | 
|---|
| 900 | L(less4_double_words_48): | 
|---|
| 901 | xor	%eax, %eax | 
|---|
| 902 | test	%dl, %dl | 
|---|
| 903 | jz	L(next_two_double_words_48) | 
|---|
| 904 | and	$15, %dl | 
|---|
| 905 | jz	L(second_double_word_48) | 
|---|
| 906 | mov	48(%rdi), %eax | 
|---|
| 907 | cmp	48(%rsi), %eax | 
|---|
| 908 | jne	L(nequal) | 
|---|
| 909 | ret | 
|---|
| 910 |  | 
|---|
| 911 | .p2align 4 | 
|---|
| 912 | L(second_double_word_48): | 
|---|
| 913 | mov	52(%rdi), %eax | 
|---|
| 914 | cmp	52(%rsi), %eax | 
|---|
| 915 | jne	L(nequal) | 
|---|
| 916 | ret | 
|---|
| 917 |  | 
|---|
| 918 | .p2align 4 | 
|---|
| 919 | L(next_two_double_words_48): | 
|---|
| 920 | and	$15, %dh | 
|---|
| 921 | jz	L(fourth_double_word_48) | 
|---|
| 922 | mov	56(%rdi), %eax | 
|---|
| 923 | cmp	56(%rsi), %eax | 
|---|
| 924 | jne	L(nequal) | 
|---|
| 925 | ret | 
|---|
| 926 |  | 
|---|
| 927 | .p2align 4 | 
|---|
| 928 | L(fourth_double_word_48): | 
|---|
| 929 | mov	60(%rdi), %eax | 
|---|
| 930 | cmp	60(%rsi), %eax | 
|---|
| 931 | jne	L(nequal) | 
|---|
| 932 | ret | 
|---|
| 933 |  | 
|---|
| 934 | .p2align 4 | 
|---|
| 935 | L(nequal): | 
|---|
| 936 | mov	$1, %eax | 
|---|
| 937 | jg	L(nequal_bigger) | 
|---|
| 938 | neg	%eax | 
|---|
| 939 |  | 
|---|
| 940 | L(nequal_bigger): | 
|---|
| 941 | ret | 
|---|
| 942 |  | 
|---|
| 943 | .p2align 4 | 
|---|
| 944 | L(equal): | 
|---|
| 945 | xor	%rax, %rax | 
|---|
| 946 | ret | 
|---|
| 947 |  | 
|---|
| 948 | END (__wcscmp) | 
|---|
| 949 | #ifndef __wcscmp | 
|---|
| 950 | libc_hidden_def (__wcscmp) | 
|---|
| 951 | weak_alias (__wcscmp, wcscmp) | 
|---|
| 952 | #endif | 
|---|
| 953 |  | 
|---|