| 1 | /* strcat(dest, src) -- Append SRC on the end of DEST. | 
|---|
| 2 | Optimized for x86-64. | 
|---|
| 3 | Copyright (C) 2002-2020 Free Software Foundation, Inc. | 
|---|
| 4 | This file is part of the GNU C Library. | 
|---|
| 5 | Contributed by Andreas Jaeger <aj@suse.de>, 2002. | 
|---|
| 6 |  | 
|---|
| 7 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 8 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 9 | License as published by the Free Software Foundation; either | 
|---|
| 10 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 11 |  | 
|---|
| 12 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 15 | Lesser General Public License for more details. | 
|---|
| 16 |  | 
|---|
| 17 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 18 | License along with the GNU C Library; if not, see | 
|---|
| 19 | <https://www.gnu.org/licenses/>.  */ | 
|---|
| 20 |  | 
|---|
| 21 | #include <sysdep.h> | 
|---|
| 22 | #include "asm-syntax.h" | 
|---|
| 23 |  | 
|---|
| 24 | /* Will be removed when new strcpy implementation gets merged.  */ | 
|---|
| 25 |  | 
|---|
| 26 | .text | 
|---|
| 27 | ENTRY (strcat) | 
|---|
| 28 | movq %rdi, %rcx		/* Dest. register. */ | 
|---|
| 29 | andl $7, %ecx		/* mask alignment bits */ | 
|---|
| 30 | movq %rdi, %rax		/* Duplicate destination pointer.  */ | 
|---|
| 31 | movq $0xfefefefefefefeff,%r8 | 
|---|
| 32 |  | 
|---|
| 33 | /* First step: Find end of destination.  */ | 
|---|
| 34 | jz 4f			/* aligned => start loop */ | 
|---|
| 35 |  | 
|---|
| 36 | neg %ecx		/* We need to align to 8 bytes.  */ | 
|---|
| 37 | addl $8,%ecx | 
|---|
| 38 | /* Search the first bytes directly.  */ | 
|---|
| 39 | 0:	cmpb $0x0,(%rax)	/* is byte NUL? */ | 
|---|
| 40 | je 2f			/* yes => start copy */ | 
|---|
| 41 | incq %rax		/* increment pointer */ | 
|---|
| 42 | decl %ecx | 
|---|
| 43 | jnz 0b | 
|---|
| 44 |  | 
|---|
| 45 |  | 
|---|
| 46 |  | 
|---|
| 47 | /* Now the source is aligned.  Scan for NUL byte.  */ | 
|---|
| 48 | .p2align 4 | 
|---|
| 49 | 4: | 
|---|
| 50 | /* First unroll.  */ | 
|---|
| 51 | movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
|---|
| 52 | addq $8,%rax		/* adjust pointer for next word */ | 
|---|
| 53 | movq %r8, %rdx		/* magic value */ | 
|---|
| 54 | addq %rcx, %rdx		/* add the magic value to the word.  We get | 
|---|
| 55 | carry bits reported for each byte which | 
|---|
| 56 | is *not* 0 */ | 
|---|
| 57 | jnc 3f			/* highest byte is NUL => return pointer */ | 
|---|
| 58 | xorq %rcx, %rdx		/* (word+magic)^word */ | 
|---|
| 59 | orq %r8, %rdx		/* set all non-carry bits */ | 
|---|
| 60 | incq %rdx		/* add 1: if one carry bit was *not* set | 
|---|
| 61 | the addition will not result in 0.  */ | 
|---|
| 62 | jnz 3f			/* found NUL => return pointer */ | 
|---|
| 63 |  | 
|---|
| 64 | /* Second unroll.  */ | 
|---|
| 65 | movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
|---|
| 66 | addq $8,%rax		/* adjust pointer for next word */ | 
|---|
| 67 | movq %r8, %rdx		/* magic value */ | 
|---|
| 68 | addq %rcx, %rdx		/* add the magic value to the word.  We get | 
|---|
| 69 | carry bits reported for each byte which | 
|---|
| 70 | is *not* 0 */ | 
|---|
| 71 | jnc 3f			/* highest byte is NUL => return pointer */ | 
|---|
| 72 | xorq %rcx, %rdx		/* (word+magic)^word */ | 
|---|
| 73 | orq %r8, %rdx		/* set all non-carry bits */ | 
|---|
| 74 | incq %rdx		/* add 1: if one carry bit was *not* set | 
|---|
| 75 | the addition will not result in 0.  */ | 
|---|
| 76 | jnz 3f			/* found NUL => return pointer */ | 
|---|
| 77 |  | 
|---|
| 78 | /* Third unroll.  */ | 
|---|
| 79 | movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
|---|
| 80 | addq $8,%rax		/* adjust pointer for next word */ | 
|---|
| 81 | movq %r8, %rdx		/* magic value */ | 
|---|
| 82 | addq %rcx, %rdx		/* add the magic value to the word.  We get | 
|---|
| 83 | carry bits reported for each byte which | 
|---|
| 84 | is *not* 0 */ | 
|---|
| 85 | jnc 3f			/* highest byte is NUL => return pointer */ | 
|---|
| 86 | xorq %rcx, %rdx		/* (word+magic)^word */ | 
|---|
| 87 | orq %r8, %rdx		/* set all non-carry bits */ | 
|---|
| 88 | incq %rdx		/* add 1: if one carry bit was *not* set | 
|---|
| 89 | the addition will not result in 0.  */ | 
|---|
| 90 | jnz 3f			/* found NUL => return pointer */ | 
|---|
| 91 |  | 
|---|
| 92 | /* Fourth unroll.  */ | 
|---|
| 93 | movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
|---|
| 94 | addq $8,%rax		/* adjust pointer for next word */ | 
|---|
| 95 | movq %r8, %rdx		/* magic value */ | 
|---|
| 96 | addq %rcx, %rdx		/* add the magic value to the word.  We get | 
|---|
| 97 | carry bits reported for each byte which | 
|---|
| 98 | is *not* 0 */ | 
|---|
| 99 | jnc 3f			/* highest byte is NUL => return pointer */ | 
|---|
| 100 | xorq %rcx, %rdx		/* (word+magic)^word */ | 
|---|
| 101 | orq %r8, %rdx		/* set all non-carry bits */ | 
|---|
| 102 | incq %rdx		/* add 1: if one carry bit was *not* set | 
|---|
| 103 | the addition will not result in 0.  */ | 
|---|
| 104 | jz 4b			/* no NUL found => continue loop */ | 
|---|
| 105 |  | 
|---|
| 106 | .p2align 4		/* Align, it's a jump target.  */ | 
|---|
| 107 | 3:	subq $8,%rax		/* correct pointer increment.  */ | 
|---|
| 108 |  | 
|---|
| 109 | testb %cl, %cl		/* is first byte NUL? */ | 
|---|
| 110 | jz 2f			/* yes => return */ | 
|---|
| 111 | incq %rax		/* increment pointer */ | 
|---|
| 112 |  | 
|---|
| 113 | testb %ch, %ch		/* is second byte NUL? */ | 
|---|
| 114 | jz 2f			/* yes => return */ | 
|---|
| 115 | incq %rax		/* increment pointer */ | 
|---|
| 116 |  | 
|---|
| 117 | testl $0x00ff0000, %ecx /* is third byte NUL? */ | 
|---|
| 118 | jz 2f			/* yes => return pointer */ | 
|---|
| 119 | incq %rax		/* increment pointer */ | 
|---|
| 120 |  | 
|---|
| 121 | testl $0xff000000, %ecx /* is fourth byte NUL? */ | 
|---|
| 122 | jz 2f			/* yes => return pointer */ | 
|---|
| 123 | incq %rax		/* increment pointer */ | 
|---|
| 124 |  | 
|---|
| 125 | shrq $32, %rcx		/* look at other half.  */ | 
|---|
| 126 |  | 
|---|
| 127 | testb %cl, %cl		/* is first byte NUL? */ | 
|---|
| 128 | jz 2f			/* yes => return */ | 
|---|
| 129 | incq %rax		/* increment pointer */ | 
|---|
| 130 |  | 
|---|
| 131 | testb %ch, %ch		/* is second byte NUL? */ | 
|---|
| 132 | jz 2f			/* yes => return */ | 
|---|
| 133 | incq %rax		/* increment pointer */ | 
|---|
| 134 |  | 
|---|
| 135 | testl $0xff0000, %ecx	/* is third byte NUL? */ | 
|---|
| 136 | jz 2f			/* yes => return pointer */ | 
|---|
| 137 | incq %rax		/* increment pointer */ | 
|---|
| 138 |  | 
|---|
| 139 | 2: | 
|---|
| 140 | /* Second step: Copy source to destination.  */ | 
|---|
| 141 |  | 
|---|
| 142 | movq	%rsi, %rcx	/* duplicate  */ | 
|---|
| 143 | andl	$7,%ecx		/* mask alignment bits */ | 
|---|
| 144 | movq	%rax, %rdx	/* move around */ | 
|---|
| 145 | jz	22f		/* aligned => start loop */ | 
|---|
| 146 |  | 
|---|
| 147 | neg	%ecx		/* align to 8 bytes.  */ | 
|---|
| 148 | addl	$8, %ecx | 
|---|
| 149 | /* Align the source pointer.  */ | 
|---|
| 150 | 21: | 
|---|
| 151 | movb	(%rsi), %al	/* Fetch a byte */ | 
|---|
| 152 | testb	%al, %al	/* Is it NUL? */ | 
|---|
| 153 | movb	%al, (%rdx)	/* Store it */ | 
|---|
| 154 | jz	24f		/* If it was NUL, done! */ | 
|---|
| 155 | incq	%rsi | 
|---|
| 156 | incq	%rdx | 
|---|
| 157 | decl	%ecx | 
|---|
| 158 | jnz	21b | 
|---|
| 159 |  | 
|---|
| 160 | /* Now the sources is aligned.  Unfortunatly we cannot force | 
|---|
| 161 | to have both source and destination aligned, so ignore the | 
|---|
| 162 | alignment of the destination.  */ | 
|---|
| 163 | .p2align 4 | 
|---|
| 164 | 22: | 
|---|
| 165 | /* 1st unroll.  */ | 
|---|
| 166 | movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
|---|
| 167 | addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
|---|
| 168 | movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
|---|
| 169 | addq	%r8, %r9	/* add the magic value to the word.  We get | 
|---|
| 170 | carry bits reported for each byte which | 
|---|
| 171 | is *not* 0 */ | 
|---|
| 172 | jnc	23f		/* highest byte is NUL => return pointer */ | 
|---|
| 173 | xorq	%rax, %r9	/* (word+magic)^word */ | 
|---|
| 174 | orq	%r8, %r9	/* set all non-carry bits */ | 
|---|
| 175 | incq	%r9		/* add 1: if one carry bit was *not* set | 
|---|
| 176 | the addition will not result in 0.  */ | 
|---|
| 177 |  | 
|---|
| 178 | jnz	23f		/* found NUL => return pointer */ | 
|---|
| 179 |  | 
|---|
| 180 | movq	%rax, (%rdx)	/* Write value to destination.  */ | 
|---|
| 181 | addq	$8, %rdx	/* Adjust pointer.  */ | 
|---|
| 182 |  | 
|---|
| 183 | /* 2nd unroll.  */ | 
|---|
| 184 | movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
|---|
| 185 | addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
|---|
| 186 | movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
|---|
| 187 | addq	%r8, %r9	/* add the magic value to the word.  We get | 
|---|
| 188 | carry bits reported for each byte which | 
|---|
| 189 | is *not* 0 */ | 
|---|
| 190 | jnc	23f		/* highest byte is NUL => return pointer */ | 
|---|
| 191 | xorq	%rax, %r9	/* (word+magic)^word */ | 
|---|
| 192 | orq	%r8, %r9	/* set all non-carry bits */ | 
|---|
| 193 | incq	%r9		/* add 1: if one carry bit was *not* set | 
|---|
| 194 | the addition will not result in 0.  */ | 
|---|
| 195 |  | 
|---|
| 196 | jnz	23f		/* found NUL => return pointer */ | 
|---|
| 197 |  | 
|---|
| 198 | movq	%rax, (%rdx)	/* Write value to destination.  */ | 
|---|
| 199 | addq	$8, %rdx	/* Adjust pointer.  */ | 
|---|
| 200 |  | 
|---|
| 201 | /* 3rd unroll.  */ | 
|---|
| 202 | movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
|---|
| 203 | addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
|---|
| 204 | movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
|---|
| 205 | addq	%r8, %r9	/* add the magic value to the word.  We get | 
|---|
| 206 | carry bits reported for each byte which | 
|---|
| 207 | is *not* 0 */ | 
|---|
| 208 | jnc	23f		/* highest byte is NUL => return pointer */ | 
|---|
| 209 | xorq	%rax, %r9	/* (word+magic)^word */ | 
|---|
| 210 | orq	%r8, %r9	/* set all non-carry bits */ | 
|---|
| 211 | incq	%r9		/* add 1: if one carry bit was *not* set | 
|---|
| 212 | the addition will not result in 0.  */ | 
|---|
| 213 |  | 
|---|
| 214 | jnz	23f		/* found NUL => return pointer */ | 
|---|
| 215 |  | 
|---|
| 216 | movq	%rax, (%rdx)	/* Write value to destination.  */ | 
|---|
| 217 | addq	$8, %rdx	/* Adjust pointer.  */ | 
|---|
| 218 |  | 
|---|
| 219 | /* 4th unroll.  */ | 
|---|
| 220 | movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
|---|
| 221 | addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
|---|
| 222 | movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
|---|
| 223 | addq	%r8, %r9	/* add the magic value to the word.  We get | 
|---|
| 224 | carry bits reported for each byte which | 
|---|
| 225 | is *not* 0 */ | 
|---|
| 226 | jnc	23f		/* highest byte is NUL => return pointer */ | 
|---|
| 227 | xorq	%rax, %r9	/* (word+magic)^word */ | 
|---|
| 228 | orq	%r8, %r9	/* set all non-carry bits */ | 
|---|
| 229 | incq	%r9		/* add 1: if one carry bit was *not* set | 
|---|
| 230 | the addition will not result in 0.  */ | 
|---|
| 231 |  | 
|---|
| 232 | jnz	23f		/* found NUL => return pointer */ | 
|---|
| 233 |  | 
|---|
| 234 | movq	%rax, (%rdx)	/* Write value to destination.  */ | 
|---|
| 235 | addq	$8, %rdx	/* Adjust pointer.  */ | 
|---|
| 236 | jmp	22b		/* Next iteration.  */ | 
|---|
| 237 |  | 
|---|
| 238 | /* Do the last few bytes. %rax contains the value to write. | 
|---|
| 239 | The loop is unrolled twice.  */ | 
|---|
| 240 | .p2align 4 | 
|---|
| 241 | 23: | 
|---|
| 242 | movb	%al, (%rdx)	/* 1st byte.  */ | 
|---|
| 243 | testb	%al, %al	/* Is it NUL.  */ | 
|---|
| 244 | jz	24f		/* yes, finish.  */ | 
|---|
| 245 | incq	%rdx		/* Increment destination.  */ | 
|---|
| 246 | movb	%ah, (%rdx)	/* 2nd byte.  */ | 
|---|
| 247 | testb	%ah, %ah	/* Is it NUL?.  */ | 
|---|
| 248 | jz	24f		/* yes, finish.  */ | 
|---|
| 249 | incq	%rdx		/* Increment destination.  */ | 
|---|
| 250 | shrq	$16, %rax	/* Shift...  */ | 
|---|
| 251 | jmp	23b		/* and look at next two bytes in %rax.  */ | 
|---|
| 252 |  | 
|---|
| 253 |  | 
|---|
| 254 | 24: | 
|---|
| 255 | movq	%rdi, %rax	/* Source is return value.  */ | 
|---|
| 256 | retq | 
|---|
| 257 | END (strcat) | 
|---|
| 258 | libc_hidden_builtin_def (strcat) | 
|---|
| 259 |  | 
|---|