blob: f28e1b878e88f8ce19df3fbfe80d3b2169ecdd2b [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Varvara Rainchik5a922842014-04-24 15:41:20 +04002Copyright (c) 2014, Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
Elliott Hughes81d6a182016-03-03 16:10:33 -080033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare8ff1a272010-03-04 11:03:37 -080055#define CFI_PUSH(REG) \
56 cfi_adjust_cfa_offset (4); \
57 cfi_rel_offset (REG, 0)
58
59#define CFI_POP(REG) \
60 cfi_adjust_cfa_offset (-4); \
61 cfi_restore (REG)
62
63#define PUSH(REG) pushl REG; CFI_PUSH (REG)
64#define POP(REG) popl REG; CFI_POP (REG)
65
Elliott Hughes81d6a182016-03-03 16:10:33 -080066#define DST PARMS
67#define CHR DST+4
Elliott Hughes01d5b942016-03-02 17:18:18 -080068#define LEN CHR+4
Elliott Hughes81d6a182016-03-03 16:10:33 -080069#define CHK_DST_LEN (LEN+4)
70#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080071
Nick Kralevich0aa82892011-11-11 15:47:24 -080072#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080073# define ENTRANCE PUSH (%ebx);
74# define RETURN_END POP (%ebx); ret
75# define RETURN RETURN_END; CFI_PUSH (%ebx)
76# define PARMS 8 /* Preserve EBX. */
77# define JMPTBL(I, B) I - B
78
79/* Load an entry in a jump table into EBX and branch to it. TABLE is a
80 jump table with relative offsets. */
81# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
82 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040083 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080084 /* Get the address of the jump table. */ \
85 add $(TABLE - .), %ebx; \
86 /* Get the entry and convert the relative offset to the \
87 absolute address. */ \
88 add (%ebx,%ecx,4), %ebx; \
89 add %ecx, %edx; \
90 /* We loaded the jump table and adjuested EDX. Go. */ \
91 jmp *%ebx
92
Varvara Rainchik5a922842014-04-24 15:41:20 +040093 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
94 .globl __x86.get_pc_thunk.bx
95 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -080096 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040097 .type __x86.get_pc_thunk.bx,@function
98__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080099 movl (%esp), %ebx
100 ret
101#else
102# define ENTRANCE
103# define RETURN_END ret
104# define RETURN RETURN_END
105# define PARMS 4
106# define JMPTBL(I, B) I
107
108/* Branch to an entry in a jump table. TABLE is a jump table with
109 absolute offsets. */
110# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
111 add %ecx, %edx; \
112 jmp *TABLE(,%ecx,4)
113#endif
114
Elliott Hughes81d6a182016-03-03 16:10:33 -0800115ENTRY(__memset_chk)
Elliott Hughes204990c2016-03-24 22:34:47 -0700116 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -0800117
Elliott Hughes204990c2016-03-24 22:34:47 -0700118 movl LEN(%esp), %ecx
119 cmpl CHK_DST_LEN(%esp), %ecx
Lev Rumyantsev3a528f12016-07-29 17:55:42 -0700120 jna L(memset_length_loaded)
121
122 POP (%ebx) // Undo ENTRANCE without returning.
123 jmp __memset_chk_fail
Elliott Hughes81d6a182016-03-03 16:10:33 -0800124END(__memset_chk)
125
Bruce Beare8ff1a272010-03-04 11:03:37 -0800126 .section .text.sse2,"ax",@progbits
127 ALIGN (4)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800128ENTRY(memset)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800129 ENTRANCE
130
131 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -0700132L(memset_length_loaded):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400133 cmp $0, %ecx
134 ja L(1byteormore)
135 SETRTNVAL
136 RETURN
137
138L(1byteormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800139 movzbl CHR(%esp), %eax
140 movb %al, %ah
141 /* Fill the whole EAX with pattern. */
142 movl %eax, %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400143 shl $16, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800144 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800145 movl DST(%esp), %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400146 cmp $1, %ecx
147 je L(1byte)
148 cmp $16, %ecx
149 jae L(16bytesormore)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800150
Varvara Rainchik5a922842014-04-24 15:41:20 +0400151 cmp $4, %ecx
152 jb L(4bytesless)
153 movl %eax, (%edx)
154 movl %eax, -4(%edx, %ecx)
155 cmp $8, %ecx
156 jb L(8bytesless)
157 movl %eax, 4(%edx)
158 movl %eax, -8(%edx, %ecx)
159L(8bytesless):
160 SETRTNVAL
161 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800162
Varvara Rainchik5a922842014-04-24 15:41:20 +0400163L(4bytesless):
164 movw %ax, (%edx)
165 movw %ax, -2(%edx, %ecx)
166 SETRTNVAL
167 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800168
Varvara Rainchik5a922842014-04-24 15:41:20 +0400169L(1byte):
170 movb %al, (%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800171 SETRTNVAL
172 RETURN
173
174 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400175L(16bytesormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800176 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800177 pshufd $0, %xmm0, %xmm0
Varvara Rainchik5a922842014-04-24 15:41:20 +0400178
179 cmp $64, %ecx
180 ja L(64bytesmore)
181 movdqu %xmm0, (%edx)
182 movdqu %xmm0, -16(%edx, %ecx)
183 cmp $32, %ecx
184 jbe L(32bytesless)
185 movdqu %xmm0, 16(%edx)
186 movdqu %xmm0, -32(%edx, %ecx)
187L(32bytesless):
188 SETRTNVAL
189 RETURN
190
191L(64bytesmore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800192 testl $0xf, %edx
193 jz L(aligned_16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800194L(not_aligned_16):
195 movdqu %xmm0, (%edx)
196 movl %edx, %eax
197 and $-16, %edx
198 add $16, %edx
199 sub %edx, %eax
200 add %eax, %ecx
201 movd %xmm0, %eax
202
203 ALIGN (4)
204L(aligned_16):
205 cmp $128, %ecx
206 jae L(128bytesormore)
207
208L(aligned_16_less128bytes):
209 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
210
211 ALIGN (4)
212L(128bytesormore):
213#ifdef SHARED_CACHE_SIZE
214 PUSH (%ebx)
215 mov $SHARED_CACHE_SIZE, %ebx
216#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800217# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400218 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800219 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400220 mov $__x86_shared_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800221# else
222 PUSH (%ebx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400223 mov $__x86_shared_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800224# endif
225#endif
226 cmp %ebx, %ecx
227 jae L(128bytesormore_nt_start)
228
Varvara Rainchik5a922842014-04-24 15:41:20 +0400229 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800230
231#ifdef DATA_CACHE_SIZE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400232 PUSH (%ebx)
233 mov $DATA_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800234#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800235# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400236 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800237 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400238 mov $__x86_data_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800239# else
Varvara Rainchik5a922842014-04-24 15:41:20 +0400240 PUSH (%ebx)
241 mov $__x86_data_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800242# endif
243#endif
244
Varvara Rainchik5a922842014-04-24 15:41:20 +0400245 cmp %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800246 jae L(128bytes_L2_normal)
247 subl $128, %ecx
248L(128bytesormore_normal):
249 sub $128, %ecx
250 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400251 movaps %xmm0, 0x10(%edx)
252 movaps %xmm0, 0x20(%edx)
253 movaps %xmm0, 0x30(%edx)
254 movaps %xmm0, 0x40(%edx)
255 movaps %xmm0, 0x50(%edx)
256 movaps %xmm0, 0x60(%edx)
257 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800258 lea 128(%edx), %edx
259 jb L(128bytesless_normal)
260
261
262 sub $128, %ecx
263 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400264 movaps %xmm0, 0x10(%edx)
265 movaps %xmm0, 0x20(%edx)
266 movaps %xmm0, 0x30(%edx)
267 movaps %xmm0, 0x40(%edx)
268 movaps %xmm0, 0x50(%edx)
269 movaps %xmm0, 0x60(%edx)
270 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800271 lea 128(%edx), %edx
272 jae L(128bytesormore_normal)
273
274L(128bytesless_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400275 lea 128(%ecx), %ecx
276#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
277 POP (%ebx)
278#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800279 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
280
281 ALIGN (4)
282L(128bytes_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400283 prefetchnta 0x380(%edx)
284 prefetchnta 0x3c0(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800285 sub $128, %ecx
286 movdqa %xmm0, (%edx)
287 movaps %xmm0, 0x10(%edx)
288 movaps %xmm0, 0x20(%edx)
289 movaps %xmm0, 0x30(%edx)
290 movaps %xmm0, 0x40(%edx)
291 movaps %xmm0, 0x50(%edx)
292 movaps %xmm0, 0x60(%edx)
293 movaps %xmm0, 0x70(%edx)
294 add $128, %edx
295 cmp $128, %ecx
296 jae L(128bytes_L2_normal)
297
298L(128bytesless_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400299#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
300 POP (%ebx)
301#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800302 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
303
304L(128bytesormore_nt_start):
305 sub %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800306 ALIGN (4)
307L(128bytesormore_shared_cache_loop):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400308 prefetchnta 0x3c0(%edx)
309 prefetchnta 0x380(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800310 sub $0x80, %ebx
311 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400312 movaps %xmm0, 0x10(%edx)
313 movaps %xmm0, 0x20(%edx)
314 movaps %xmm0, 0x30(%edx)
315 movaps %xmm0, 0x40(%edx)
316 movaps %xmm0, 0x50(%edx)
317 movaps %xmm0, 0x60(%edx)
318 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800319 add $0x80, %edx
320 cmp $0x80, %ebx
321 jae L(128bytesormore_shared_cache_loop)
322 cmp $0x80, %ecx
323 jb L(shared_cache_loop_end)
324 ALIGN (4)
325L(128bytesormore_nt):
326 sub $0x80, %ecx
327 movntdq %xmm0, (%edx)
328 movntdq %xmm0, 0x10(%edx)
329 movntdq %xmm0, 0x20(%edx)
330 movntdq %xmm0, 0x30(%edx)
331 movntdq %xmm0, 0x40(%edx)
332 movntdq %xmm0, 0x50(%edx)
333 movntdq %xmm0, 0x60(%edx)
334 movntdq %xmm0, 0x70(%edx)
335 add $0x80, %edx
336 cmp $0x80, %ecx
337 jae L(128bytesormore_nt)
338 sfence
339L(shared_cache_loop_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400340#if defined SHARED_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800341 POP (%ebx)
342#endif
343 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
344
345
346 .pushsection .rodata.sse2,"a",@progbits
347 ALIGN (2)
348L(table_16_128bytes):
349 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
350 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
351 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
352 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
353 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
354 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
355 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
356 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
357 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
358 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
359 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
360 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
361 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
362 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
363 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
364 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
365 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
366 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
367 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
368 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
369 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
370 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
371 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
372 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
373 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
374 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
375 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
376 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
377 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
378 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
379 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
380 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
381 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
382 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
383 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
384 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
385 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
386 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
387 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
388 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
389 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
390 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
391 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
392 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
393 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
394 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
395 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
396 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
397 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
398 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
399 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
400 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
401 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
402 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
403 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
404 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
405 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
406 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
407 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
408 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
409 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
410 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
411 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
412 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
413 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
414 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
415 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
416 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
417 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
418 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
419 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
420 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
421 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
422 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
423 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
424 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
425 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
426 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
427 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
428 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
477 .popsection
478
479 ALIGN (4)
480L(aligned_16_112bytes):
481 movdqa %xmm0, -112(%edx)
482L(aligned_16_96bytes):
483 movdqa %xmm0, -96(%edx)
484L(aligned_16_80bytes):
485 movdqa %xmm0, -80(%edx)
486L(aligned_16_64bytes):
487 movdqa %xmm0, -64(%edx)
488L(aligned_16_48bytes):
489 movdqa %xmm0, -48(%edx)
490L(aligned_16_32bytes):
491 movdqa %xmm0, -32(%edx)
492L(aligned_16_16bytes):
493 movdqa %xmm0, -16(%edx)
494L(aligned_16_0bytes):
495 SETRTNVAL
496 RETURN
497
498 ALIGN (4)
499L(aligned_16_113bytes):
500 movdqa %xmm0, -113(%edx)
501L(aligned_16_97bytes):
502 movdqa %xmm0, -97(%edx)
503L(aligned_16_81bytes):
504 movdqa %xmm0, -81(%edx)
505L(aligned_16_65bytes):
506 movdqa %xmm0, -65(%edx)
507L(aligned_16_49bytes):
508 movdqa %xmm0, -49(%edx)
509L(aligned_16_33bytes):
510 movdqa %xmm0, -33(%edx)
511L(aligned_16_17bytes):
512 movdqa %xmm0, -17(%edx)
513L(aligned_16_1bytes):
514 movb %al, -1(%edx)
515 SETRTNVAL
516 RETURN
517
518 ALIGN (4)
519L(aligned_16_114bytes):
520 movdqa %xmm0, -114(%edx)
521L(aligned_16_98bytes):
522 movdqa %xmm0, -98(%edx)
523L(aligned_16_82bytes):
524 movdqa %xmm0, -82(%edx)
525L(aligned_16_66bytes):
526 movdqa %xmm0, -66(%edx)
527L(aligned_16_50bytes):
528 movdqa %xmm0, -50(%edx)
529L(aligned_16_34bytes):
530 movdqa %xmm0, -34(%edx)
531L(aligned_16_18bytes):
532 movdqa %xmm0, -18(%edx)
533L(aligned_16_2bytes):
534 movw %ax, -2(%edx)
535 SETRTNVAL
536 RETURN
537
538 ALIGN (4)
539L(aligned_16_115bytes):
540 movdqa %xmm0, -115(%edx)
541L(aligned_16_99bytes):
542 movdqa %xmm0, -99(%edx)
543L(aligned_16_83bytes):
544 movdqa %xmm0, -83(%edx)
545L(aligned_16_67bytes):
546 movdqa %xmm0, -67(%edx)
547L(aligned_16_51bytes):
548 movdqa %xmm0, -51(%edx)
549L(aligned_16_35bytes):
550 movdqa %xmm0, -35(%edx)
551L(aligned_16_19bytes):
552 movdqa %xmm0, -19(%edx)
553L(aligned_16_3bytes):
554 movw %ax, -3(%edx)
555 movb %al, -1(%edx)
556 SETRTNVAL
557 RETURN
558
559 ALIGN (4)
560L(aligned_16_116bytes):
561 movdqa %xmm0, -116(%edx)
562L(aligned_16_100bytes):
563 movdqa %xmm0, -100(%edx)
564L(aligned_16_84bytes):
565 movdqa %xmm0, -84(%edx)
566L(aligned_16_68bytes):
567 movdqa %xmm0, -68(%edx)
568L(aligned_16_52bytes):
569 movdqa %xmm0, -52(%edx)
570L(aligned_16_36bytes):
571 movdqa %xmm0, -36(%edx)
572L(aligned_16_20bytes):
573 movdqa %xmm0, -20(%edx)
574L(aligned_16_4bytes):
575 movl %eax, -4(%edx)
576 SETRTNVAL
577 RETURN
578
579 ALIGN (4)
580L(aligned_16_117bytes):
581 movdqa %xmm0, -117(%edx)
582L(aligned_16_101bytes):
583 movdqa %xmm0, -101(%edx)
584L(aligned_16_85bytes):
585 movdqa %xmm0, -85(%edx)
586L(aligned_16_69bytes):
587 movdqa %xmm0, -69(%edx)
588L(aligned_16_53bytes):
589 movdqa %xmm0, -53(%edx)
590L(aligned_16_37bytes):
591 movdqa %xmm0, -37(%edx)
592L(aligned_16_21bytes):
593 movdqa %xmm0, -21(%edx)
594L(aligned_16_5bytes):
595 movl %eax, -5(%edx)
596 movb %al, -1(%edx)
597 SETRTNVAL
598 RETURN
599
600 ALIGN (4)
601L(aligned_16_118bytes):
602 movdqa %xmm0, -118(%edx)
603L(aligned_16_102bytes):
604 movdqa %xmm0, -102(%edx)
605L(aligned_16_86bytes):
606 movdqa %xmm0, -86(%edx)
607L(aligned_16_70bytes):
608 movdqa %xmm0, -70(%edx)
609L(aligned_16_54bytes):
610 movdqa %xmm0, -54(%edx)
611L(aligned_16_38bytes):
612 movdqa %xmm0, -38(%edx)
613L(aligned_16_22bytes):
614 movdqa %xmm0, -22(%edx)
615L(aligned_16_6bytes):
616 movl %eax, -6(%edx)
617 movw %ax, -2(%edx)
618 SETRTNVAL
619 RETURN
620
621 ALIGN (4)
622L(aligned_16_119bytes):
623 movdqa %xmm0, -119(%edx)
624L(aligned_16_103bytes):
625 movdqa %xmm0, -103(%edx)
626L(aligned_16_87bytes):
627 movdqa %xmm0, -87(%edx)
628L(aligned_16_71bytes):
629 movdqa %xmm0, -71(%edx)
630L(aligned_16_55bytes):
631 movdqa %xmm0, -55(%edx)
632L(aligned_16_39bytes):
633 movdqa %xmm0, -39(%edx)
634L(aligned_16_23bytes):
635 movdqa %xmm0, -23(%edx)
636L(aligned_16_7bytes):
637 movl %eax, -7(%edx)
638 movw %ax, -3(%edx)
639 movb %al, -1(%edx)
640 SETRTNVAL
641 RETURN
642
643 ALIGN (4)
644L(aligned_16_120bytes):
645 movdqa %xmm0, -120(%edx)
646L(aligned_16_104bytes):
647 movdqa %xmm0, -104(%edx)
648L(aligned_16_88bytes):
649 movdqa %xmm0, -88(%edx)
650L(aligned_16_72bytes):
651 movdqa %xmm0, -72(%edx)
652L(aligned_16_56bytes):
653 movdqa %xmm0, -56(%edx)
654L(aligned_16_40bytes):
655 movdqa %xmm0, -40(%edx)
656L(aligned_16_24bytes):
657 movdqa %xmm0, -24(%edx)
658L(aligned_16_8bytes):
659 movq %xmm0, -8(%edx)
660 SETRTNVAL
661 RETURN
662
663 ALIGN (4)
664L(aligned_16_121bytes):
665 movdqa %xmm0, -121(%edx)
666L(aligned_16_105bytes):
667 movdqa %xmm0, -105(%edx)
668L(aligned_16_89bytes):
669 movdqa %xmm0, -89(%edx)
670L(aligned_16_73bytes):
671 movdqa %xmm0, -73(%edx)
672L(aligned_16_57bytes):
673 movdqa %xmm0, -57(%edx)
674L(aligned_16_41bytes):
675 movdqa %xmm0, -41(%edx)
676L(aligned_16_25bytes):
677 movdqa %xmm0, -25(%edx)
678L(aligned_16_9bytes):
679 movq %xmm0, -9(%edx)
680 movb %al, -1(%edx)
681 SETRTNVAL
682 RETURN
683
684 ALIGN (4)
685L(aligned_16_122bytes):
686 movdqa %xmm0, -122(%edx)
687L(aligned_16_106bytes):
688 movdqa %xmm0, -106(%edx)
689L(aligned_16_90bytes):
690 movdqa %xmm0, -90(%edx)
691L(aligned_16_74bytes):
692 movdqa %xmm0, -74(%edx)
693L(aligned_16_58bytes):
694 movdqa %xmm0, -58(%edx)
695L(aligned_16_42bytes):
696 movdqa %xmm0, -42(%edx)
697L(aligned_16_26bytes):
698 movdqa %xmm0, -26(%edx)
699L(aligned_16_10bytes):
700 movq %xmm0, -10(%edx)
701 movw %ax, -2(%edx)
702 SETRTNVAL
703 RETURN
704
705 ALIGN (4)
706L(aligned_16_123bytes):
707 movdqa %xmm0, -123(%edx)
708L(aligned_16_107bytes):
709 movdqa %xmm0, -107(%edx)
710L(aligned_16_91bytes):
711 movdqa %xmm0, -91(%edx)
712L(aligned_16_75bytes):
713 movdqa %xmm0, -75(%edx)
714L(aligned_16_59bytes):
715 movdqa %xmm0, -59(%edx)
716L(aligned_16_43bytes):
717 movdqa %xmm0, -43(%edx)
718L(aligned_16_27bytes):
719 movdqa %xmm0, -27(%edx)
720L(aligned_16_11bytes):
721 movq %xmm0, -11(%edx)
722 movw %ax, -3(%edx)
723 movb %al, -1(%edx)
724 SETRTNVAL
725 RETURN
726
727 ALIGN (4)
728L(aligned_16_124bytes):
729 movdqa %xmm0, -124(%edx)
730L(aligned_16_108bytes):
731 movdqa %xmm0, -108(%edx)
732L(aligned_16_92bytes):
733 movdqa %xmm0, -92(%edx)
734L(aligned_16_76bytes):
735 movdqa %xmm0, -76(%edx)
736L(aligned_16_60bytes):
737 movdqa %xmm0, -60(%edx)
738L(aligned_16_44bytes):
739 movdqa %xmm0, -44(%edx)
740L(aligned_16_28bytes):
741 movdqa %xmm0, -28(%edx)
742L(aligned_16_12bytes):
743 movq %xmm0, -12(%edx)
744 movl %eax, -4(%edx)
745 SETRTNVAL
746 RETURN
747
748 ALIGN (4)
749L(aligned_16_125bytes):
750 movdqa %xmm0, -125(%edx)
751L(aligned_16_109bytes):
752 movdqa %xmm0, -109(%edx)
753L(aligned_16_93bytes):
754 movdqa %xmm0, -93(%edx)
755L(aligned_16_77bytes):
756 movdqa %xmm0, -77(%edx)
757L(aligned_16_61bytes):
758 movdqa %xmm0, -61(%edx)
759L(aligned_16_45bytes):
760 movdqa %xmm0, -45(%edx)
761L(aligned_16_29bytes):
762 movdqa %xmm0, -29(%edx)
763L(aligned_16_13bytes):
764 movq %xmm0, -13(%edx)
765 movl %eax, -5(%edx)
766 movb %al, -1(%edx)
767 SETRTNVAL
768 RETURN
769
770 ALIGN (4)
771L(aligned_16_126bytes):
772 movdqa %xmm0, -126(%edx)
773L(aligned_16_110bytes):
774 movdqa %xmm0, -110(%edx)
775L(aligned_16_94bytes):
776 movdqa %xmm0, -94(%edx)
777L(aligned_16_78bytes):
778 movdqa %xmm0, -78(%edx)
779L(aligned_16_62bytes):
780 movdqa %xmm0, -62(%edx)
781L(aligned_16_46bytes):
782 movdqa %xmm0, -46(%edx)
783L(aligned_16_30bytes):
784 movdqa %xmm0, -30(%edx)
785L(aligned_16_14bytes):
786 movq %xmm0, -14(%edx)
787 movl %eax, -6(%edx)
788 movw %ax, -2(%edx)
789 SETRTNVAL
790 RETURN
791
792 ALIGN (4)
793L(aligned_16_127bytes):
794 movdqa %xmm0, -127(%edx)
795L(aligned_16_111bytes):
796 movdqa %xmm0, -111(%edx)
797L(aligned_16_95bytes):
798 movdqa %xmm0, -95(%edx)
799L(aligned_16_79bytes):
800 movdqa %xmm0, -79(%edx)
801L(aligned_16_63bytes):
802 movdqa %xmm0, -63(%edx)
803L(aligned_16_47bytes):
804 movdqa %xmm0, -47(%edx)
805L(aligned_16_31bytes):
806 movdqa %xmm0, -31(%edx)
807L(aligned_16_15bytes):
808 movq %xmm0, -15(%edx)
809 movl %eax, -7(%edx)
810 movw %ax, -3(%edx)
811 movb %al, -1(%edx)
812 SETRTNVAL
813 RETURN_END
814
Elliott Hughes81d6a182016-03-03 16:10:33 -0800815END(memset)