strchr.S source code [Glibc/sysdeps/x86_64/strchr.S]

1	/ strchr (str, ch) -- Return pointer to first occurrence of CH in STR.*
2	For AMD x86-64.
3	Copyright (C) 2009-2020 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include <sysdep.h>
21
22	.text
23	ENTRY (strchr)
24	movd %esi, %xmm1
25	movl %edi, %eax
26	andl $`4095`, %eax
27	punpcklbw %xmm1, %xmm1
28	cmpl $`4032`, %eax
29	punpcklwd %xmm1, %xmm1
30	pshufd $`0`, %xmm1, %xmm1
31	jg L(cross_page)
32	movdqu (%rdi), %xmm0
33	pxor %xmm3, %xmm3
34	movdqa %xmm0, %xmm4
35	pcmpeqb %xmm1, %xmm0
36	pcmpeqb %xmm3, %xmm4
37	por %xmm4, %xmm0
38	pmovmskb %xmm0, %eax
39	test %eax, %eax
40	je L(next_48_bytes)
41	bsf %eax, %eax
42	#ifdef AS_STRCHRNUL
43	leaq (%rdi,%rax), %rax
44	#else
45	movl $`0`, %edx
46	leaq (%rdi,%rax), %rax
47	cmpb %sil, (%rax)
48	cmovne %rdx, %rax
49	#endif
50	ret
51
52	.p2align `3`
53	L(next_48_bytes):
54	movdqu `16`(%rdi), %xmm0
55	movdqa %xmm0, %xmm4
56	pcmpeqb %xmm1, %xmm0
57	pcmpeqb %xmm3, %xmm4
58	por %xmm4, %xmm0
59	pmovmskb %xmm0, %ecx
60	movdqu `32`(%rdi), %xmm0
61	movdqa %xmm0, %xmm4
62	pcmpeqb %xmm1, %xmm0
63	salq $`16`, %rcx
64	pcmpeqb %xmm3, %xmm4
65	por %xmm4, %xmm0
66	pmovmskb %xmm0, %eax
67	movdqu `48`(%rdi), %xmm0
68	pcmpeqb %xmm0, %xmm3
69	salq $`32`, %rax
70	pcmpeqb %xmm1, %xmm0
71	orq %rcx, %rax
72	por %xmm3, %xmm0
73	pmovmskb %xmm0, %ecx
74	salq $`48`, %rcx
75	orq %rcx, %rax
76	testq %rax, %rax
77	jne L(return)
78	L(loop_start):
79	/ We use this alignment to force loop be aligned to 8 but not*
80	16 bytes. This gives better sheduling on AMD processors. /*
81	.p2align `4`
82	pxor %xmm6, %xmm6
83	andq $-`64`, %rdi
84	.p2align `3`
85	L(loop64):
86	addq $`64`, %rdi
87	movdqa (%rdi), %xmm5
88	movdqa `16`(%rdi), %xmm2
89	movdqa `32`(%rdi), %xmm3
90	pxor %xmm1, %xmm5
91	movdqa `48`(%rdi), %xmm4
92	pxor %xmm1, %xmm2
93	pxor %xmm1, %xmm3
94	pminub (%rdi), %xmm5
95	pxor %xmm1, %xmm4
96	pminub `16`(%rdi), %xmm2
97	pminub `32`(%rdi), %xmm3
98	pminub %xmm2, %xmm5
99	pminub `48`(%rdi), %xmm4
100	pminub %xmm3, %xmm5
101	pminub %xmm4, %xmm5
102	pcmpeqb %xmm6, %xmm5
103	pmovmskb %xmm5, %eax
104
105	testl %eax, %eax
106	je L(loop64)
107
108	movdqa (%rdi), %xmm5
109	movdqa %xmm5, %xmm0
110	pcmpeqb %xmm1, %xmm5
111	pcmpeqb %xmm6, %xmm0
112	por %xmm0, %xmm5
113	pcmpeqb %xmm6, %xmm2
114	pcmpeqb %xmm6, %xmm3
115	pcmpeqb %xmm6, %xmm4
116
117	pmovmskb %xmm5, %ecx
118	pmovmskb %xmm2, %eax
119	salq $`16`, %rax
120	pmovmskb %xmm3, %r8d
121	pmovmskb %xmm4, %edx
122	salq $`32`, %r8
123	orq %r8, %rax
124	orq %rcx, %rax
125	salq $`48`, %rdx
126	orq %rdx, %rax
127	.p2align `3`
128	L(return):
129	bsfq %rax, %rax
130	#ifdef AS_STRCHRNUL
131	leaq (%rdi,%rax), %rax
132	#else
133	movl $`0`, %edx
134	leaq (%rdi,%rax), %rax
135	cmpb %sil, (%rax)
136	cmovne %rdx, %rax
137	#endif
138	ret
139	.p2align `4`
140
141	L(cross_page):
142	movq %rdi, %rdx
143	pxor %xmm2, %xmm2
144	andq $-`64`, %rdx
145	movdqa %xmm1, %xmm0
146	movdqa (%rdx), %xmm3
147	movdqa %xmm3, %xmm4
148	pcmpeqb %xmm1, %xmm3
149	pcmpeqb %xmm2, %xmm4
150	por %xmm4, %xmm3
151	pmovmskb %xmm3, %r8d
152	movdqa `16`(%rdx), %xmm3
153	movdqa %xmm3, %xmm4
154	pcmpeqb %xmm1, %xmm3
155	pcmpeqb %xmm2, %xmm4
156	por %xmm4, %xmm3
157	pmovmskb %xmm3, %eax
158	movdqa `32`(%rdx), %xmm3
159	movdqa %xmm3, %xmm4
160	pcmpeqb %xmm1, %xmm3
161	salq $`16`, %rax
162	pcmpeqb %xmm2, %xmm4
163	por %xmm4, %xmm3
164	pmovmskb %xmm3, %r9d
165	movdqa `48`(%rdx), %xmm3
166	pcmpeqb %xmm3, %xmm2
167	salq $`32`, %r9
168	pcmpeqb %xmm3, %xmm0
169	orq %r9, %rax
170	orq %r8, %rax
171	por %xmm2, %xmm0
172	pmovmskb %xmm0, %ecx
173	salq $`48`, %rcx
174	orq %rcx, %rax
175	movl %edi, %ecx
176	subb %dl, %cl
177	shrq %cl, %rax
178	testq %rax, %rax
179	jne L(return)
180	jmp L(loop_start)
181
182	END (strchr)
183
184	#ifndef AS_STRCHRNUL
185	weak_alias (strchr, index)
186	libc_hidden_builtin_def (strchr)
187	#endif
188

Browse the source code of Glibc/sysdeps/x86_64/strchr.S