| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * arch/alpha/lib/strrchr.S | 
 | 3 |  * Contributed by Richard Henderson (rth@tamu.edu) | 
 | 4 |  * | 
 | 5 |  * Return the address of the last occurrence of a given character | 
 | 6 |  * within a null-terminated string, or null if it is not found. | 
 | 7 |  */ | 
 | 8 |  | 
 | 9 | #include <asm/regdef.h> | 
 | 10 |  | 
 | 11 | 	.set noreorder | 
 | 12 | 	.set noat | 
 | 13 |  | 
 | 14 | 	.align 3 | 
 | 15 | 	.ent strrchr | 
 | 16 | 	.globl strrchr | 
 | 17 | strrchr: | 
 | 18 | 	.frame sp, 0, ra | 
 | 19 | 	.prologue 0 | 
 | 20 |  | 
 | 21 | 	zapnot	a1, 1, a1	# e0    : zero extend our test character | 
 | 22 | 	mov	zero, t6	# .. e1 : t6 is last match aligned addr | 
 | 23 | 	sll	a1, 8, t5	# e0    : replicate our test character | 
 | 24 | 	mov	zero, t8	# .. e1 : t8 is last match byte compare mask | 
 | 25 | 	or	t5, a1, a1	# e0    : | 
 | 26 | 	ldq_u   t0, 0(a0)	# .. e1 : load first quadword | 
 | 27 | 	sll	a1, 16, t5	# e0    : | 
 | 28 | 	andnot  a0, 7, v0	# .. e1 : align source addr | 
 | 29 | 	or	t5, a1, a1	# e0    : | 
 | 30 | 	lda	t4, -1		# .. e1 : build garbage mask | 
 | 31 | 	sll	a1, 32, t5	# e0    : | 
 | 32 | 	cmpbge  zero, t0, t1	# .. e1 : bits set iff byte == zero | 
 | 33 | 	mskqh	t4, a0, t4	# e0    : | 
 | 34 | 	or	t5, a1, a1	# .. e1 : character replication complete | 
 | 35 | 	xor	t0, a1, t2	# e0    : make bytes == c zero | 
 | 36 | 	cmpbge	zero, t4, t4	# .. e1 : bits set iff byte is garbage | 
 | 37 | 	cmpbge  zero, t2, t3	# e0    : bits set iff byte == c | 
 | 38 | 	andnot	t1, t4, t1	# .. e1 : clear garbage from null test | 
 | 39 | 	andnot	t3, t4, t3	# e0    : clear garbage from char test | 
 | 40 | 	bne	t1, $eos	# .. e1 : did we already hit the terminator? | 
 | 41 |  | 
 | 42 | 	/* Character search main loop */ | 
 | 43 | $loop: | 
 | 44 | 	ldq	t0, 8(v0)	# e0    : load next quadword | 
 | 45 | 	cmovne	t3, v0, t6	# .. e1 : save previous comparisons match | 
 | 46 | 	cmovne	t3, t3, t8	# e0    : | 
 | 47 | 	addq	v0, 8, v0	# .. e1 : | 
 | 48 | 	xor	t0, a1, t2	# e0    : | 
 | 49 | 	cmpbge	zero, t0, t1	# .. e1 : bits set iff byte == zero | 
 | 50 | 	cmpbge	zero, t2, t3	# e0    : bits set iff byte == c | 
 | 51 | 	beq	t1, $loop	# .. e1 : if we havnt seen a null, loop | 
 | 52 |  | 
 | 53 | 	/* Mask out character matches after terminator */ | 
 | 54 | $eos: | 
 | 55 | 	negq	t1, t4		# e0    : isolate first null byte match | 
 | 56 | 	and	t1, t4, t4	# e1    : | 
| Lucas De Marchi | 25985ed | 2011-03-30 22:57:33 -0300 | [diff] [blame] | 57 | 	subq	t4, 1, t5	# e0    : build a mask of the bytes up to... | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 58 | 	or	t4, t5, t4	# e1    : ... and including the null | 
 | 59 |  | 
 | 60 | 	and	t3, t4, t3	# e0    : mask out char matches after null | 
 | 61 | 	cmovne	t3, t3, t8	# .. e1 : save it, if match found | 
 | 62 | 	cmovne	t3, v0, t6	# e0    : | 
 | 63 |  | 
 | 64 | 	/* Locate the address of the last matched character */ | 
 | 65 |  | 
 | 66 | 	/* Retain the early exit for the ev4 -- the ev5 mispredict penalty | 
 | 67 | 	   is 5 cycles -- the same as just falling through.  */ | 
 | 68 | 	beq	t8, $retnull	# .. e1 : | 
 | 69 |  | 
 | 70 | 	and	t8, 0xf0, t2	# e0    : binary search for the high bit set | 
 | 71 | 	cmovne	t2, t2, t8	# .. e1 (zdb) | 
 | 72 | 	cmovne	t2, 4, t2	# e0    : | 
 | 73 | 	and	t8, 0xcc, t1	# .. e1 : | 
 | 74 | 	cmovne	t1, t1, t8	# e0    : | 
 | 75 | 	cmovne	t1, 2, t1	# .. e1 : | 
 | 76 | 	and	t8, 0xaa, t0	# e0    : | 
 | 77 | 	cmovne	t0, 1, t0	# .. e1 (zdb) | 
 | 78 | 	addq	t2, t1, t1	# e0    : | 
 | 79 | 	addq	t6, t0, v0	# .. e1 : add our aligned base ptr to the mix | 
 | 80 | 	addq	v0, t1, v0	# e0    : | 
 | 81 | 	ret			# .. e1 : | 
 | 82 |  | 
 | 83 | $retnull: | 
 | 84 | 	mov	zero, v0	# e0    : | 
 | 85 | 	ret			# .. e1 : | 
 | 86 |  | 
 | 87 | 	.end strrchr |