| /* A memcpy for CRIS. | 
 |    Copyright (C) 1994-2005 Axis Communications. | 
 |    All rights reserved. | 
 |  | 
 |    Redistribution and use in source and binary forms, with or without | 
 |    modification, are permitted provided that the following conditions | 
 |    are met: | 
 |  | 
 |    1. Redistributions of source code must retain the above copyright | 
 |       notice, this list of conditions and the following disclaimer. | 
 |  | 
 |    2. Neither the name of Axis Communications nor the names of its | 
 |       contributors may be used to endorse or promote products derived | 
 |       from this software without specific prior written permission. | 
 |  | 
 |    THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS | 
 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS | 
 |    COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | 
 |    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 |    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | 
 |    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
 |    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | 
 |    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | 
 |    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
 |    POSSIBILITY OF SUCH DAMAGE.  */ | 
 |  | 
 | /* FIXME: This file should really only be used for reference, as the | 
 |    result is somewhat depending on gcc generating what we expect rather | 
 |    than what we describe.  An assembly file should be used instead.  */ | 
 |  | 
 | #include <stddef.h> | 
 |  | 
 | /* Break even between movem and move16 is really at 38.7 * 2, but | 
 |    modulo 44, so up to the next multiple of 44, we use ordinary code.  */ | 
 | #define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2) | 
 |  | 
 | /* No name ambiguities in this file.  */ | 
 | __asm__ (".syntax no_register_prefix"); | 
 |  | 
 | void * | 
 | memcpy(void *pdst, const void *psrc, size_t pn) | 
 | { | 
 |   /* Now we want the parameters put in special registers. | 
 |      Make sure the compiler is able to make something useful of this. | 
 |      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | 
 |  | 
 |      If gcc was allright, it really would need no temporaries, and no | 
 |      stack space to save stuff on.  */ | 
 |  | 
 |   register void *return_dst __asm__ ("r10") = pdst; | 
 |   register unsigned char *dst __asm__ ("r13") = pdst; | 
 |   register unsigned const char *src __asm__ ("r11") = psrc; | 
 |   register int n __asm__ ("r12") = pn; | 
 |  | 
 |   /* When src is aligned but not dst, this makes a few extra needless | 
 |      cycles.  I believe it would take as many to check that the | 
 |      re-alignment was unnecessary.  */ | 
 |   if (((unsigned long) dst & 3) != 0 | 
 |       /* Don't align if we wouldn't copy more than a few bytes; so we | 
 | 	 don't have to check further for overflows.  */ | 
 |       && n >= 3) | 
 |   { | 
 |     if ((unsigned long) dst & 1) | 
 |       { | 
 | 	n--; | 
 | 	*dst = *src; | 
 | 	src++; | 
 | 	dst++; | 
 |       } | 
 |  | 
 |     if ((unsigned long) dst & 2) | 
 |       { | 
 | 	n -= 2; | 
 | 	*(short *) dst = *(short *) src; | 
 | 	src += 2; | 
 | 	dst += 2; | 
 |       } | 
 |   } | 
 |  | 
 |   /* Decide which copying method to use.  */ | 
 |   if (n >= MEMCPY_BY_BLOCK_THRESHOLD) | 
 |     { | 
 |       /* It is not optimal to tell the compiler about clobbering any | 
 | 	 registers; that will move the saving/restoring of those registers | 
 | 	 to the function prologue/epilogue, and make non-movem sizes | 
 | 	 suboptimal.  */ | 
 |       __asm__ volatile | 
 | 	("\ | 
 | 	 ;; GCC does promise correct register allocations, but let's	\n\ | 
 | 	 ;; make sure it keeps its promises.				\n\ | 
 | 	 .ifnc %0-%1-%2,$r13-$r11-$r12					\n\ | 
 | 	 .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"	\n\ | 
 | 	 .endif								\n\ | 
 | 									\n\ | 
 | 	 ;; Save the registers we'll use in the movem process		\n\ | 
 | 	 ;; on the stack.						\n\ | 
 | 	 subq	11*4,sp							\n\ | 
 | 	 movem	r10,[sp]						\n\ | 
 | 									\n\ | 
 | 	 ;; Now we've got this:						\n\ | 
 | 	 ;; r11 - src							\n\ | 
 | 	 ;; r13 - dst							\n\ | 
 | 	 ;; r12 - n							\n\ | 
 | 									\n\ | 
 | 	 ;; Update n for the first loop.				\n\ | 
 | 	 subq	 44,r12							\n\ | 
 | 0:									\n\ | 
 | " | 
 | #ifdef __arch_common_v10_v32 | 
 | 	 /* Cater to branch offset difference between v32 and v10.  We | 
 | 	    assume the branch below has an 8-bit offset.  */ | 
 | "	 setf\n" | 
 | #endif | 
 | "	 movem	[r11+],r10						\n\ | 
 | 	 subq	44,r12							\n\ | 
 | 	 bge	 0b							\n\ | 
 | 	 movem	r10,[r13+]						\n\ | 
 | 									\n\ | 
 | 	 ;; Compensate for last loop underflowing n.			\n\ | 
 | 	 addq	44,r12							\n\ | 
 | 									\n\ | 
 | 	 ;; Restore registers from stack.				\n\ | 
 | 	 movem [sp+],r10" | 
 |  | 
 | 	 /* Outputs.  */ | 
 | 	 : "=r" (dst), "=r" (src), "=r" (n) | 
 |  | 
 | 	 /* Inputs.  */ | 
 | 	 : "0" (dst), "1" (src), "2" (n)); | 
 |     } | 
 |  | 
 |   while (n >= 16) | 
 |     { | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |  | 
 |       n -= 16; | 
 |     } | 
 |  | 
 |   switch (n) | 
 |     { | 
 |     case 0: | 
 |       break; | 
 |  | 
 |     case 1: | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 2: | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 3: | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 4: | 
 |       *(long *) dst = *(long *) src; | 
 |       break; | 
 |  | 
 |     case 5: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 6: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 7: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 8: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; | 
 |       break; | 
 |  | 
 |     case 9: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 10: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 11: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 12: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; | 
 |       break; | 
 |  | 
 |     case 13: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 14: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 15: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |     } | 
 |  | 
 |   return return_dst; | 
 | } |