| Michal Simek | 322ae8e | 2009-03-27 14:25:21 +0100 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> | 
|  | 3 | * Copyright (C) 2008-2009 PetaLogix | 
|  | 4 | * Copyright (C) 2007 John Williams | 
|  | 5 | * | 
|  | 6 | * Reasonably optimised generic C-code for memcpy on Microblaze | 
|  | 7 | * This is generic C code to do efficient, alignment-aware memcpy. | 
|  | 8 | * | 
|  | 9 | * It is based on demo code originally Copyright 2001 by Intel Corp, taken from | 
|  | 10 | * http://www.embedded.com/showArticle.jhtml?articleID=19205567 | 
|  | 11 | * | 
|  | 12 | * Attempts were made, unsuccesfully, to contact the original | 
|  | 13 | * author of this code (Michael Morrow, Intel).  Below is the original | 
|  | 14 | * copyright notice. | 
|  | 15 | * | 
|  | 16 | * This software has been developed by Intel Corporation. | 
|  | 17 | * Intel specifically disclaims all warranties, express or | 
|  | 18 | * implied, and all liability, including consequential and | 
|  | 19 | * other indirect damages, for the use of this program, including | 
|  | 20 | * liability for infringement of any proprietary rights, | 
|  | 21 | * and including the warranties of merchantability and fitness | 
|  | 22 | * for a particular purpose. Intel does not assume any | 
|  | 23 | * responsibility for and errors which may appear in this program | 
|  | 24 | * not any responsibility to update it. | 
|  | 25 | */ | 
|  | 26 |  | 
|  | 27 | #include <linux/types.h> | 
|  | 28 | #include <linux/stddef.h> | 
|  | 29 | #include <linux/compiler.h> | 
|  | 30 | #include <linux/module.h> | 
|  | 31 |  | 
|  | 32 | #include <linux/string.h> | 
|  | 33 | #include <asm/system.h> | 
|  | 34 |  | 
|  | 35 | #ifdef __HAVE_ARCH_MEMCPY | 
|  | 36 | void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) | 
|  | 37 | { | 
|  | 38 | const char *src = v_src; | 
|  | 39 | char *dst = v_dst; | 
|  | 40 | #ifndef CONFIG_OPT_LIB_FUNCTION | 
|  | 41 | /* Simple, byte oriented memcpy. */ | 
|  | 42 | while (c--) | 
|  | 43 | *dst++ = *src++; | 
|  | 44 |  | 
|  | 45 | return v_dst; | 
|  | 46 | #else | 
|  | 47 | /* The following code tries to optimize the copy by using unsigned | 
|  | 48 | * alignment. This will work fine if both source and destination are | 
|  | 49 | * aligned on the same boundary. However, if they are aligned on | 
|  | 50 | * different boundaries shifts will be necessary. This might result in | 
|  | 51 | * bad performance on MicroBlaze systems without a barrel shifter. | 
|  | 52 | */ | 
|  | 53 | const uint32_t *i_src; | 
|  | 54 | uint32_t *i_dst; | 
|  | 55 |  | 
|  | 56 | if (c >= 4) { | 
|  | 57 | unsigned  value, buf_hold; | 
|  | 58 |  | 
|  | 59 | /* Align the dstination to a word boundry. */ | 
|  | 60 | /* This is done in an endian independant manner. */ | 
|  | 61 | switch ((unsigned long)dst & 3) { | 
|  | 62 | case 1: | 
|  | 63 | *dst++ = *src++; | 
|  | 64 | --c; | 
|  | 65 | case 2: | 
|  | 66 | *dst++ = *src++; | 
|  | 67 | --c; | 
|  | 68 | case 3: | 
|  | 69 | *dst++ = *src++; | 
|  | 70 | --c; | 
|  | 71 | } | 
|  | 72 |  | 
|  | 73 | i_dst = (void *)dst; | 
|  | 74 |  | 
|  | 75 | /* Choose a copy scheme based on the source */ | 
|  | 76 | /* alignment relative to dstination. */ | 
|  | 77 | switch ((unsigned long)src & 3) { | 
|  | 78 | case 0x0:	/* Both byte offsets are aligned */ | 
|  | 79 | i_src  = (const void *)src; | 
|  | 80 |  | 
|  | 81 | for (; c >= 4; c -= 4) | 
|  | 82 | *i_dst++ = *i_src++; | 
|  | 83 |  | 
|  | 84 | src  = (const void *)i_src; | 
|  | 85 | break; | 
|  | 86 | case 0x1:	/* Unaligned - Off by 1 */ | 
|  | 87 | /* Word align the source */ | 
|  | 88 | i_src = (const void *) ((unsigned)src & ~3); | 
|  | 89 |  | 
|  | 90 | /* Load the holding buffer */ | 
|  | 91 | buf_hold = *i_src++ << 8; | 
|  | 92 |  | 
|  | 93 | for (; c >= 4; c -= 4) { | 
|  | 94 | value = *i_src++; | 
|  | 95 | *i_dst++ = buf_hold | value >> 24; | 
|  | 96 | buf_hold = value << 8; | 
|  | 97 | } | 
|  | 98 |  | 
|  | 99 | /* Realign the source */ | 
|  | 100 | src = (const void *)i_src; | 
|  | 101 | src -= 3; | 
|  | 102 | break; | 
|  | 103 | case 0x2:	/* Unaligned - Off by 2 */ | 
|  | 104 | /* Word align the source */ | 
|  | 105 | i_src = (const void *) ((unsigned)src & ~3); | 
|  | 106 |  | 
|  | 107 | /* Load the holding buffer */ | 
|  | 108 | buf_hold = *i_src++ << 16; | 
|  | 109 |  | 
|  | 110 | for (; c >= 4; c -= 4) { | 
|  | 111 | value = *i_src++; | 
|  | 112 | *i_dst++ = buf_hold | value >> 16; | 
|  | 113 | buf_hold = value << 16; | 
|  | 114 | } | 
|  | 115 |  | 
|  | 116 | /* Realign the source */ | 
|  | 117 | src = (const void *)i_src; | 
|  | 118 | src -= 2; | 
|  | 119 | break; | 
|  | 120 | case 0x3:	/* Unaligned - Off by 3 */ | 
|  | 121 | /* Word align the source */ | 
|  | 122 | i_src = (const void *) ((unsigned)src & ~3); | 
|  | 123 |  | 
|  | 124 | /* Load the holding buffer */ | 
|  | 125 | buf_hold = *i_src++ << 24; | 
|  | 126 |  | 
|  | 127 | for (; c >= 4; c -= 4) { | 
|  | 128 | value = *i_src++; | 
|  | 129 | *i_dst++ = buf_hold | value >> 8; | 
|  | 130 | buf_hold = value << 24; | 
|  | 131 | } | 
|  | 132 |  | 
|  | 133 | /* Realign the source */ | 
|  | 134 | src = (const void *)i_src; | 
|  | 135 | src -= 1; | 
|  | 136 | break; | 
|  | 137 | } | 
|  | 138 | dst = (void *)i_dst; | 
|  | 139 | } | 
|  | 140 |  | 
|  | 141 | /* Finish off any remaining bytes */ | 
|  | 142 | /* simple fast copy, ... unless a cache boundry is crossed */ | 
|  | 143 | switch (c) { | 
|  | 144 | case 3: | 
|  | 145 | *dst++ = *src++; | 
|  | 146 | case 2: | 
|  | 147 | *dst++ = *src++; | 
|  | 148 | case 1: | 
|  | 149 | *dst++ = *src++; | 
|  | 150 | } | 
|  | 151 |  | 
|  | 152 | return v_dst; | 
|  | 153 | #endif | 
|  | 154 | } | 
|  | 155 | EXPORT_SYMBOL(memcpy); | 
|  | 156 | #endif /* __HAVE_ARCH_MEMCPY */ |