| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* memset.S: optimised assembly memset | 
 | 2 |  * | 
 | 3 |  * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. | 
 | 4 |  * Written by David Howells (dhowells@redhat.com) | 
 | 5 |  * | 
 | 6 |  * This program is free software; you can redistribute it and/or | 
 | 7 |  * modify it under the terms of the GNU General Public License | 
 | 8 |  * as published by the Free Software Foundation; either version | 
 | 9 |  * 2 of the License, or (at your option) any later version. | 
 | 10 |  */ | 
 | 11 |  | 
 | 12 |  | 
 | 13 |         .text | 
 | 14 |         .p2align	4 | 
 | 15 |  | 
 | 16 | ############################################################################### | 
 | 17 | # | 
 | 18 | # void *memset(void *p, char ch, size_t count) | 
 | 19 | # | 
 | 20 | # - NOTE: must not use any stack. exception detection performs function return | 
 | 21 | #         to caller's fixup routine, aborting the remainder of the set | 
 | 22 | #         GR4, GR7, GR8, and GR11 must be managed | 
 | 23 | # | 
 | 24 | ############################################################################### | 
 | 25 |         .globl		memset,__memset_end | 
 | 26 |         .type		memset,@function | 
 | 27 | memset: | 
 | 28 | 	orcc.p		gr10,gr0,gr5,icc3		; GR5 = count | 
 | 29 | 	andi		gr9,#0xff,gr9 | 
 | 30 | 	or.p		gr8,gr0,gr4			; GR4 = address | 
 | 31 | 	beqlr		icc3,#0 | 
 | 32 |  | 
 | 33 | 	# conditionally write a byte to 2b-align the address | 
 | 34 | 	setlos.p	#1,gr6 | 
 | 35 | 	andicc		gr4,#1,gr0,icc0 | 
 | 36 | 	ckne		icc0,cc7 | 
 | 37 | 	cstb.p		gr9,@(gr4,gr0)		,cc7,#1 | 
 | 38 | 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
 | 39 | 	cadd.p		gr4,gr6,gr4		,cc7,#1 | 
 | 40 | 	beqlr		icc3,#0 | 
 | 41 |  | 
 | 42 | 	# conditionally write a word to 4b-align the address | 
 | 43 | 	andicc.p	gr4,#2,gr0,icc0 | 
 | 44 | 	subicc		gr5,#2,gr0,icc1 | 
 | 45 | 	setlos.p	#2,gr6 | 
 | 46 | 	ckne		icc0,cc7 | 
 | 47 | 	slli.p		gr9,#8,gr12			; need to double up the pattern | 
 | 48 | 	cknc		icc1,cc5 | 
 | 49 | 	or.p		gr9,gr12,gr12 | 
 | 50 | 	andcr		cc7,cc5,cc7 | 
 | 51 |  | 
 | 52 | 	csth.p		gr12,@(gr4,gr0)		,cc7,#1 | 
 | 53 | 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
 | 54 | 	cadd.p		gr4,gr6,gr4		,cc7,#1 | 
 | 55 | 	beqlr		icc3,#0 | 
 | 56 |  | 
 | 57 | 	# conditionally write a dword to 8b-align the address | 
 | 58 | 	andicc.p	gr4,#4,gr0,icc0 | 
 | 59 | 	subicc		gr5,#4,gr0,icc1 | 
 | 60 | 	setlos.p	#4,gr6 | 
 | 61 | 	ckne		icc0,cc7 | 
 | 62 | 	slli.p		gr12,#16,gr13			; need to quadruple-up the pattern | 
 | 63 | 	cknc		icc1,cc5 | 
 | 64 | 	or.p		gr13,gr12,gr12 | 
 | 65 | 	andcr		cc7,cc5,cc7 | 
 | 66 |  | 
 | 67 | 	cst.p		gr12,@(gr4,gr0)		,cc7,#1 | 
 | 68 | 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
 | 69 | 	cadd.p		gr4,gr6,gr4		,cc7,#1 | 
 | 70 | 	beqlr		icc3,#0 | 
 | 71 |  | 
 | 72 | 	or.p		gr12,gr12,gr13			; need to octuple-up the pattern | 
 | 73 |  | 
 | 74 | 	# the address is now 8b-aligned - loop around writing 64b chunks | 
 | 75 | 	setlos		#8,gr7 | 
 | 76 | 	subi.p		gr4,#8,gr4			; store with update index does weird stuff | 
 | 77 | 	setlos		#64,gr6 | 
 | 78 |  | 
 | 79 | 	subicc		gr5,#64,gr0,icc0 | 
 | 80 | 0:	cknc		icc0,cc7 | 
 | 81 | 	cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 82 | 	cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 83 | 	cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 84 | 	cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 85 | 	cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 86 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 87 | 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
 | 88 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 89 | 	subicc		gr5,#64,gr0,icc0 | 
 | 90 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 91 | 	beqlr		icc3,#0 | 
 | 92 | 	bnc		icc0,#2,0b | 
 | 93 |  | 
 | 94 | 	# now do 32-byte remnant | 
 | 95 | 	subicc.p	gr5,#32,gr0,icc0 | 
 | 96 | 	setlos		#32,gr6 | 
 | 97 | 	cknc		icc0,cc7 | 
 | 98 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 99 | 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
 | 100 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 101 | 	setlos		#16,gr6 | 
 | 102 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 103 | 	subicc		gr5,#16,gr0,icc0 | 
 | 104 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 105 | 	beqlr		icc3,#0 | 
 | 106 |  | 
 | 107 | 	# now do 16-byte remnant | 
 | 108 | 	cknc		icc0,cc7 | 
 | 109 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 110 | 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
 | 111 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 112 | 	beqlr		icc3,#0 | 
 | 113 |  | 
 | 114 | 	# now do 8-byte remnant | 
 | 115 | 	subicc		gr5,#8,gr0,icc1 | 
 | 116 | 	cknc		icc1,cc7 | 
 | 117 | 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 118 | 	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3 | 
 | 119 | 	setlos.p	#4,gr7 | 
 | 120 | 	beqlr		icc3,#0 | 
 | 121 |  | 
 | 122 | 	# now do 4-byte remnant | 
 | 123 | 	subicc		gr5,#4,gr0,icc0 | 
 | 124 | 	addi.p		gr4,#4,gr4 | 
 | 125 | 	cknc		icc0,cc7 | 
 | 126 | 	cstu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 127 | 	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3 | 
 | 128 | 	subicc.p	gr5,#2,gr0,icc1 | 
 | 129 | 	beqlr		icc3,#0 | 
 | 130 |  | 
 | 131 | 	# now do 2-byte remnant | 
 | 132 | 	setlos		#2,gr7 | 
 | 133 | 	addi.p		gr4,#2,gr4 | 
 | 134 | 	cknc		icc1,cc7 | 
 | 135 | 	csthu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
 | 136 | 	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3 | 
 | 137 | 	subicc.p	gr5,#1,gr0,icc0 | 
 | 138 | 	beqlr		icc3,#0 | 
 | 139 |  | 
 | 140 | 	# now do 1-byte remnant | 
 | 141 | 	setlos		#0,gr7 | 
 | 142 | 	addi.p		gr4,#2,gr4 | 
 | 143 | 	cknc		icc0,cc7 | 
 | 144 | 	cstb.p		gr12,@(gr4,gr0)		,cc7,#1 | 
 | 145 | 	bralr | 
 | 146 | __memset_end: | 
 | 147 |  | 
 | 148 | 	.size		memset, __memset_end-memset | 
 | 149 |  | 
 | 150 | ############################################################################### | 
 | 151 | # | 
 | 152 | # clear memory in userspace | 
 | 153 | # - return the number of bytes that could not be cleared (0 on complete success) | 
 | 154 | # | 
 | 155 | # long __memset_user(void *p, size_t count) | 
 | 156 | # | 
 | 157 | ############################################################################### | 
 | 158 |         .globl		__memset_user, __memset_user_error_lr, __memset_user_error_handler | 
 | 159 |         .type		__memset_user,@function | 
 | 160 | __memset_user: | 
 | 161 | 	movsg		lr,gr11 | 
 | 162 |  | 
 | 163 | 	# abuse memset to do the dirty work | 
 | 164 | 	or.p		gr9,gr9,gr10 | 
 | 165 | 	setlos		#0,gr9 | 
 | 166 | 	call		memset | 
 | 167 | __memset_user_error_lr: | 
 | 168 | 	jmpl.p		@(gr11,gr0) | 
 | 169 | 	setlos		#0,gr8 | 
 | 170 |  | 
 | 171 | 	# deal any exception generated by memset | 
 | 172 | 	# GR4  - memset's address tracking pointer | 
 | 173 | 	# GR7  - memset's step value (index register for store insns) | 
 | 174 | 	# GR8  - memset's original start address | 
 | 175 | 	# GR10 - memset's original count | 
 | 176 | __memset_user_error_handler: | 
 | 177 | 	add.p		gr4,gr7,gr4 | 
 | 178 | 	add		gr8,gr10,gr8 | 
 | 179 | 	jmpl.p		@(gr11,gr0) | 
 | 180 | 	sub		gr8,gr4,gr8		; we return the amount left uncleared | 
 | 181 |  | 
 | 182 | 	.size		__memset_user, .-__memset_user |