| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 1 | /* A memcpy for CRIS. | 
 | 2 |    Copyright (C) 1994-2005 Axis Communications. | 
 | 3 |    All rights reserved. | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 4 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 5 |    Redistribution and use in source and binary forms, with or without | 
 | 6 |    modification, are permitted provided that the following conditions | 
 | 7 |    are met: | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 8 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 9 |    1. Redistributions of source code must retain the above copyright | 
 | 10 |       notice, this list of conditions and the following disclaimer. | 
 | 11 |  | 
 | 12 |    2. Neither the name of Axis Communications nor the names of its | 
 | 13 |       contributors may be used to endorse or promote products derived | 
 | 14 |       from this software without specific prior written permission. | 
 | 15 |  | 
 | 16 |    THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS | 
 | 17 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 | 18 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 | 19 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS | 
 | 20 |    COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | 
 | 21 |    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 | 22 |    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | 
 | 23 |    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
 | 24 |    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | 
 | 25 |    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | 
 | 26 |    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
 | 27 |    POSSIBILITY OF SUCH DAMAGE.  */ | 
 | 28 |  | 
 | 29 | /* FIXME: This file should really only be used for reference, as the | 
 | 30 |    result is somewhat depending on gcc generating what we expect rather | 
 | 31 |    than what we describe.  An assembly file should be used instead.  */ | 
 | 32 |  | 
 | 33 | #include <stddef.h> | 
 | 34 |  | 
 | 35 | /* Break even between movem and move16 is really at 38.7 * 2, but | 
 | 36 |    modulo 44, so up to the next multiple of 44, we use ordinary code.  */ | 
 | 37 | #define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2) | 
 | 38 |  | 
 | 39 | /* No name ambiguities in this file.  */ | 
 | 40 | __asm__ (".syntax no_register_prefix"); | 
 | 41 |  | 
 | 42 | void * | 
 | 43 | memcpy(void *pdst, const void *psrc, size_t pn) | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 44 | { | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 45 |   /* Now we want the parameters put in special registers. | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 46 |      Make sure the compiler is able to make something useful of this. | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 47 |      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 48 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 49 |      If gcc was allright, it really would need no temporaries, and no | 
 | 50 |      stack space to save stuff on.  */ | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 51 |  | 
 | 52 |   register void *return_dst __asm__ ("r10") = pdst; | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 53 |   register unsigned char *dst __asm__ ("r13") = pdst; | 
 | 54 |   register unsigned const char *src __asm__ ("r11") = psrc; | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 55 |   register int n __asm__ ("r12") = pn; | 
 | 56 |  | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 57 |   /* When src is aligned but not dst, this makes a few extra needless | 
 | 58 |      cycles.  I believe it would take as many to check that the | 
 | 59 |      re-alignment was unnecessary.  */ | 
 | 60 |   if (((unsigned long) dst & 3) != 0 | 
 | 61 |       /* Don't align if we wouldn't copy more than a few bytes; so we | 
 | 62 | 	 don't have to check further for overflows.  */ | 
 | 63 |       && n >= 3) | 
 | 64 |   { | 
 | 65 |     if ((unsigned long) dst & 1) | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 66 |       { | 
 | 67 | 	n--; | 
 | 68 | 	*dst = *src; | 
 | 69 | 	src++; | 
 | 70 | 	dst++; | 
 | 71 |       } | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 72 |  | 
 | 73 |     if ((unsigned long) dst & 2) | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 74 |       { | 
 | 75 | 	n -= 2; | 
 | 76 | 	*(short *) dst = *(short *) src; | 
 | 77 | 	src += 2; | 
 | 78 | 	dst += 2; | 
 | 79 |       } | 
 | 80 |   } | 
 | 81 |  | 
 | 82 |   /* Decide which copying method to use.  */ | 
 | 83 |   if (n >= MEMCPY_BY_BLOCK_THRESHOLD) | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 84 |     { | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 85 |       /* It is not optimal to tell the compiler about clobbering any | 
 | 86 | 	 registers; that will move the saving/restoring of those registers | 
 | 87 | 	 to the function prologue/epilogue, and make non-movem sizes | 
 | 88 | 	 suboptimal.  */ | 
 | 89 |       __asm__ volatile | 
 | 90 | 	("\ | 
 | 91 | 	 ;; GCC does promise correct register allocations, but let's	\n\ | 
 | 92 | 	 ;; make sure it keeps its promises.				\n\ | 
 | 93 | 	 .ifnc %0-%1-%2,$r13-$r11-$r12					\n\ | 
 | 94 | 	 .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"	\n\ | 
 | 95 | 	 .endif								\n\ | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 96 | 									\n\ | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 97 | 	 ;; Save the registers we'll use in the movem process		\n\ | 
 | 98 | 	 ;; on the stack.						\n\ | 
 | 99 | 	 subq	11*4,sp							\n\ | 
 | 100 | 	 movem	r10,[sp]						\n\ | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 101 | 									\n\ | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 102 | 	 ;; Now we've got this:						\n\ | 
 | 103 | 	 ;; r11 - src							\n\ | 
 | 104 | 	 ;; r13 - dst							\n\ | 
 | 105 | 	 ;; r12 - n							\n\ | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 106 | 									\n\ | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 107 | 	 ;; Update n for the first loop.				\n\ | 
 | 108 | 	 subq	 44,r12							\n\ | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 109 | 0:									\n\ | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 110 | " | 
 | 111 | #ifdef __arch_common_v10_v32 | 
 | 112 | 	 /* Cater to branch offset difference between v32 and v10.  We | 
 | 113 | 	    assume the branch below has an 8-bit offset.  */ | 
 | 114 | "	 setf\n" | 
 | 115 | #endif | 
 | 116 | "	 movem	[r11+],r10						\n\ | 
 | 117 | 	 subq	44,r12							\n\ | 
 | 118 | 	 bge	 0b							\n\ | 
 | 119 | 	 movem	r10,[r13+]						\n\ | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 120 | 									\n\ | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 121 | 	 ;; Compensate for last loop underflowing n.			\n\ | 
 | 122 | 	 addq	44,r12							\n\ | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 123 | 									\n\ | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 124 | 	 ;; Restore registers from stack.				\n\ | 
 | 125 | 	 movem [sp+],r10" | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 126 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 127 | 	 /* Outputs.  */ | 
 | 128 | 	 : "=r" (dst), "=r" (src), "=r" (n) | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 129 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 130 | 	 /* Inputs.  */ | 
 | 131 | 	 : "0" (dst), "1" (src), "2" (n)); | 
 | 132 |     } | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 133 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 134 |   while (n >= 16) | 
 | 135 |     { | 
 | 136 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 137 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 138 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 139 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 140 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 141 |       n -= 16; | 
 | 142 |     } | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 143 |  | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 144 |   switch (n) | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 145 |     { | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 146 |     case 0: | 
 | 147 |       break; | 
| Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 148 |  | 
| Jesper Nilsson | 9fe3fd0 | 2008-03-04 14:28:52 -0800 | [diff] [blame] | 149 |     case 1: | 
 | 150 |       *dst = *src; | 
 | 151 |       break; | 
 | 152 |  | 
 | 153 |     case 2: | 
 | 154 |       *(short *) dst = *(short *) src; | 
 | 155 |       break; | 
 | 156 |  | 
 | 157 |     case 3: | 
 | 158 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 | 159 |       *dst = *src; | 
 | 160 |       break; | 
 | 161 |  | 
 | 162 |     case 4: | 
 | 163 |       *(long *) dst = *(long *) src; | 
 | 164 |       break; | 
 | 165 |  | 
 | 166 |     case 5: | 
 | 167 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 168 |       *dst = *src; | 
 | 169 |       break; | 
 | 170 |  | 
 | 171 |     case 6: | 
 | 172 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 173 |       *(short *) dst = *(short *) src; | 
 | 174 |       break; | 
 | 175 |  | 
 | 176 |     case 7: | 
 | 177 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 178 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 | 179 |       *dst = *src; | 
 | 180 |       break; | 
 | 181 |  | 
 | 182 |     case 8: | 
 | 183 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 184 |       *(long *) dst = *(long *) src; | 
 | 185 |       break; | 
 | 186 |  | 
 | 187 |     case 9: | 
 | 188 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 189 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 190 |       *dst = *src; | 
 | 191 |       break; | 
 | 192 |  | 
 | 193 |     case 10: | 
 | 194 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 195 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 196 |       *(short *) dst = *(short *) src; | 
 | 197 |       break; | 
 | 198 |  | 
 | 199 |     case 11: | 
 | 200 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 201 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 202 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 | 203 |       *dst = *src; | 
 | 204 |       break; | 
 | 205 |  | 
 | 206 |     case 12: | 
 | 207 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 208 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 209 |       *(long *) dst = *(long *) src; | 
 | 210 |       break; | 
 | 211 |  | 
 | 212 |     case 13: | 
 | 213 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 214 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 215 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 216 |       *dst = *src; | 
 | 217 |       break; | 
 | 218 |  | 
 | 219 |     case 14: | 
 | 220 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 221 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 222 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 223 |       *(short *) dst = *(short *) src; | 
 | 224 |       break; | 
 | 225 |  | 
 | 226 |     case 15: | 
 | 227 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 228 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 229 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 | 230 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 | 231 |       *dst = *src; | 
 | 232 |       break; | 
 | 233 |     } | 
 | 234 |  | 
 | 235 |   return return_dst; | 
 | 236 | } |