Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 1 | /* |
| 2 | * A fast checksum+copy routine using movem |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 3 | * Copyright (c) 1998-2007 Axis Communications AB |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 4 | * |
| 5 | * Authors: Bjorn Wesen |
| 6 | * |
| 7 | * csum_partial_copy_nocheck(const char *src, char *dst, |
| 8 | * int len, unsigned int sum) |
| 9 | */ |
| 10 | |
| 11 | .globl csum_partial_copy_nocheck |
Jesper Nilsson | 5f9ac92 | 2010-08-03 16:13:37 +0200 | [diff] [blame] | 12 | .type csum_partial_copy_nocheck,@function |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 13 | csum_partial_copy_nocheck: |
| 14 | |
| 15 | ;; r10 - src |
| 16 | ;; r11 - dst |
| 17 | ;; r12 - length |
| 18 | ;; r13 - checksum |
| 19 | |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 20 | ;; Optimized for large packets |
| 21 | subq 10*4, $r12 |
| 22 | blt _word_loop |
| 23 | move.d $r12, $acr |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 24 | |
| 25 | subq 9*4,$sp |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 26 | clearf c |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 27 | movem $r8,[$sp] |
| 28 | |
| 29 | ;; do a movem copy and checksum |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 30 | 1: ;; A failing userspace access (the read) will have this as PC. |
| 31 | _mloop: movem [$r10+],$r9 ; read 10 longwords |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 32 | addoq -10*4, $acr, $acr ; loop counter in latency cycle |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 33 | movem $r9,[$r11+] ; write 10 longwords |
| 34 | |
| 35 | ;; perform dword checksumming on the 10 longwords |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 36 | addc $r0,$r13 |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 37 | addc $r1,$r13 |
| 38 | addc $r2,$r13 |
| 39 | addc $r3,$r13 |
| 40 | addc $r4,$r13 |
| 41 | addc $r5,$r13 |
| 42 | addc $r6,$r13 |
| 43 | addc $r7,$r13 |
| 44 | addc $r8,$r13 |
| 45 | addc $r9,$r13 |
| 46 | |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 47 | ;; test $acr, without trashing carry. |
| 48 | move.d $acr, $acr |
| 49 | bpl _mloop |
| 50 | ;; r12 <= acr is needed after mloop and in the exception handlers. |
| 51 | move.d $acr, $r12 |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 52 | |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 53 | ;; fold the last carry into r13 |
| 54 | addc 0, $r13 |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 55 | movem [$sp+],$r8 ; restore regs |
| 56 | |
| 57 | _word_loop: |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 58 | addq 10*4,$r12 ; compensate for last loop underflowing length |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 59 | |
| 60 | ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below |
| 61 | ;; r9 can be used as temporary. |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 62 | move.d $r13,$r9 |
| 63 | lsrq 16,$r9 ; r0 = checksum >> 16 |
| 64 | and.d 0xffff,$r13 ; checksum = checksum & 0xffff |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 65 | |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 66 | subq 2, $r12 |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 67 | blt _no_words |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 68 | add.d $r9,$r13 ; checksum += r0 |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 69 | |
| 70 | ;; copy and checksum the rest of the words |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 71 | 2: ;; A failing userspace access for the read below will have this as PC. |
| 72 | _wloop: move.w [$r10+],$r9 |
| 73 | addu.w $r9,$r13 |
| 74 | subq 2,$r12 |
| 75 | bge _wloop |
| 76 | move.w $r9,[$r11+] |
| 77 | |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 78 | _no_words: |
Jesper Nilsson | 41f9412 | 2008-01-25 17:54:14 +0100 | [diff] [blame] | 79 | addq 2,$r12 |
| 80 | bne _do_byte |
Mikael Starvik | 51533b6 | 2005-07-27 11:44:44 -0700 | [diff] [blame] | 81 | nop |
| 82 | ret |
| 83 | move.d $r13,$r10 |
| 84 | |
| 85 | _do_byte: |
| 86 | ;; copy and checksum the last byte |
| 87 | 3: ;; A failing userspace access for the read below will have this as PC. |
| 88 | move.b [$r10],$r9 |
| 89 | addu.b $r9,$r13 |
| 90 | move.b $r9,[$r11] |
| 91 | ret |
| 92 | move.d $r13,$r10 |
Jesper Nilsson | 5f9ac92 | 2010-08-03 16:13:37 +0200 | [diff] [blame] | 93 | |
| 94 | .size csum_partial_copy_nocheck, . - csum_partial_copy_nocheck |