| Michal Simek | 4e07dba | 2010-08-12 14:28:53 +0200 | [diff] [blame] | 1 | #include <linux/linkage.h> |
| 2 | |
| 3 | /* |
| 4 | * Multiply operation for 64 bit integers, for devices with hard multiply |
| 5 | * Input : Operand1[H] in Reg r5 |
| 6 | * Operand1[L] in Reg r6 |
| 7 | * Operand2[H] in Reg r7 |
| 8 | * Operand2[L] in Reg r8 |
| 9 | * Output: Result[H] in Reg r3 |
| 10 | * Result[L] in Reg r4 |
| 11 | * |
| 12 | * Explaination: |
| 13 | * |
| 14 | * Both the input numbers are divided into 16 bit number as follows |
| 15 | * op1 = A B C D |
| 16 | * op2 = E F G H |
| 17 | * result = D * H |
| 18 | * + (C * H + D * G) << 16 |
| 19 | * + (B * H + C * G + D * F) << 32 |
| 20 | * + (A * H + B * G + C * F + D * E) << 48 |
| 21 | * |
| 22 | * Only 64 bits of the output are considered |
| 23 | */ |
| 24 | |
| 25 | .text |
| 26 | .globl __muldi3 |
| 27 | .type __muldi3, @function |
| 28 | .ent __muldi3 |
| 29 | |
| 30 | __muldi3: |
| 31 | addi r1, r1, -40 |
| 32 | |
| 33 | /* Save the input operands on the caller's stack */ |
| 34 | swi r5, r1, 44 |
| 35 | swi r6, r1, 48 |
| 36 | swi r7, r1, 52 |
| 37 | swi r8, r1, 56 |
| 38 | |
| 39 | /* Store all the callee saved registers */ |
| 40 | sw r20, r1, r0 |
| 41 | swi r21, r1, 4 |
| 42 | swi r22, r1, 8 |
| 43 | swi r23, r1, 12 |
| 44 | swi r24, r1, 16 |
| 45 | swi r25, r1, 20 |
| 46 | swi r26, r1, 24 |
| 47 | swi r27, r1, 28 |
| 48 | |
| 49 | /* Load all the 16 bit values for A thru H */ |
| 50 | lhui r20, r1, 44 /* A */ |
| 51 | lhui r21, r1, 46 /* B */ |
| 52 | lhui r22, r1, 48 /* C */ |
| 53 | lhui r23, r1, 50 /* D */ |
| 54 | lhui r24, r1, 52 /* E */ |
| 55 | lhui r25, r1, 54 /* F */ |
| 56 | lhui r26, r1, 56 /* G */ |
| 57 | lhui r27, r1, 58 /* H */ |
| 58 | |
| 59 | /* D * H ==> LSB of the result on stack ==> Store1 */ |
| 60 | mul r9, r23, r27 |
| 61 | swi r9, r1, 36 /* Pos2 and Pos3 */ |
| 62 | |
| 63 | /* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ |
| 64 | /* Store the carry generated in position 2 for Pos 3 */ |
| 65 | lhui r11, r1, 36 /* Pos2 */ |
| 66 | mul r9, r22, r27 /* C * H */ |
| 67 | mul r10, r23, r26 /* D * G */ |
| 68 | add r9, r9, r10 |
| 69 | addc r12, r0, r0 |
| 70 | add r9, r9, r11 |
| 71 | addc r12, r12, r0 /* Store the Carry */ |
| 72 | shi r9, r1, 36 /* Store Pos2 */ |
| 73 | swi r9, r1, 32 |
| 74 | lhui r11, r1, 32 |
| 75 | shi r11, r1, 34 /* Store Pos1 */ |
| 76 | |
| 77 | /* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ |
| 78 | mul r9, r21, r27 /* B * H */ |
| 79 | mul r10, r22, r26 /* C * G */ |
| 80 | mul r7, r23, r25 /* D * F */ |
| 81 | add r9, r9, r11 |
| 82 | add r9, r9, r10 |
| 83 | add r9, r9, r7 |
| 84 | swi r9, r1, 32 /* Pos0 and Pos1 */ |
| 85 | |
| 86 | /* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ |
| 87 | lhui r11, r1, 32 /* Pos0 */ |
| 88 | mul r9, r20, r27 /* A * H */ |
| 89 | mul r10, r21, r26 /* B * G */ |
| 90 | mul r7, r22, r25 /* C * F */ |
| 91 | mul r8, r23, r24 /* D * E */ |
| 92 | add r9, r9, r11 |
| 93 | add r9, r9, r10 |
| 94 | add r9, r9, r7 |
| 95 | add r9, r9, r8 |
| 96 | sext16 r9, r9 /* Sign extend the MSB */ |
| 97 | shi r9, r1, 32 |
| 98 | |
| 99 | /* Move results to r3 and r4 */ |
| 100 | lhui r3, r1, 32 |
| 101 | add r3, r3, r12 |
| 102 | shi r3, r1, 32 |
| 103 | lwi r3, r1, 32 /* Hi Part */ |
| 104 | lwi r4, r1, 36 /* Lo Part */ |
| 105 | |
| 106 | /* Restore Callee saved registers */ |
| 107 | lw r20, r1, r0 |
| 108 | lwi r21, r1, 4 |
| 109 | lwi r22, r1, 8 |
| 110 | lwi r23, r1, 12 |
| 111 | lwi r24, r1, 16 |
| 112 | lwi r25, r1, 20 |
| 113 | lwi r26, r1, 24 |
| 114 | lwi r27, r1, 28 |
| 115 | |
| 116 | /* Restore Frame and return */ |
| 117 | rtsd r15, 8 |
| 118 | addi r1, r1, 40 |
| 119 | |
| 120 | .size __muldi3, . - __muldi3 |
| 121 | .end __muldi3 |