139 * !__GNUC__ ifdefs. Also removed ENCRYPTION, DECRYPTION,
140 * AES_128, AES_192, AES_256, AES_VAR ifdefs.
141 *
142 * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
143 *
144 * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
145 *
146 * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
147 * (operands reversed, literals prefixed with "$", registers prefixed with "%",
148 * and "[register+offset]", addressing changed to "offset(register)",
149 * parenthesis in constant expressions "()" changed to square brackets "[]",
150 * "." removed from local (numeric) labels, and other changes.
151 * Examples:
152 * Intel/yasm/nasm Syntax ATT/OpenSolaris Syntax
153 * mov rax,(4*20h) mov $[4*0x20],%rax
154 * mov rax,[ebx+20h] mov 0x20(%ebx),%rax
155 * lea rax,[ebx+ecx] lea (%ebx,%ecx),%rax
156 * sub rax,[ebx+ecx*4-20h] sub -0x20(%ebx,%ecx,4),%rax
157 *
158 * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
159 * /usr/include/sys/asm_linkage.h, lint(1B) guards, EXPORT DELETE START
160 * and EXPORT DELETE END markers, and dummy C function definitions for lint.
161 *
162 * 6. Renamed functions and reordered parameters to match OpenSolaris:
163 * Original Gladman interface:
164 * int aes_encrypt(const unsigned char *in,
165 * unsigned char *out, const aes_encrypt_ctx cx[1])/
166 * int aes_decrypt(const unsigned char *in,
167 * unsigned char *out, const aes_encrypt_ctx cx[1])/
168 * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
169 * and a union type, inf., containing inf.l, a uint32_t and
170 * inf.b, a 4-element array of uint32_t. Only b[0] in the array (aka "l") is
171 * used and contains the key schedule length * 16 where key schedule length is
172 * 10, 12, or 14 bytes.
173 *
174 * OpenSolaris OS interface:
175 * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
176 * const uint32_t pt[4], uint32_t ct[4])/
177 * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
178 * const uint32_t pt[4], uint32_t ct[4])/
179 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
180 * uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
310 #define kptr %r8 /* key schedule pointer */
311 #define fofs 128 /* adjust offset in key schedule to keep |disp| < 128 */
312 #define fk_ref(x, y) -16*x+fofs+4*y(kptr)
313
314 #ifdef AES_REV_DKS
315 #define rofs 128
316 #define ik_ref(x, y) -16*x+rofs+4*y(kptr)
317
318 #else
319 #define rofs -128
320 #define ik_ref(x, y) 16*x+rofs+4*y(kptr)
321 #endif /* AES_REV_DKS */
322
323 #define tab_0(x) (tptr,x,8)
324 #define tab_1(x) 3(tptr,x,8)
325 #define tab_2(x) 2(tptr,x,8)
326 #define tab_3(x) 1(tptr,x,8)
327 #define tab_f(x) 1(tptr,x,8)
328 #define tab_i(x) 7(tptr,x,8)
329
330 /* EXPORT DELETE START */
331 #define ff_rnd(p1, p2, p3, p4, round) /* normal forward round */ \
332 mov fk_ref(round,0), p1; \
333 mov fk_ref(round,1), p2; \
334 mov fk_ref(round,2), p3; \
335 mov fk_ref(round,3), p4; \
336 \
337 movzx %al, %esi; \
338 movzx %ah, %edi; \
339 shr $16, %eax; \
340 xor tab_0(%rsi), p1; \
341 xor tab_1(%rdi), p4; \
342 movzx %al, %esi; \
343 movzx %ah, %edi; \
344 xor tab_2(%rsi), p3; \
345 xor tab_3(%rdi), p2; \
346 \
347 movzx %bl, %esi; \
348 movzx %bh, %edi; \
349 shr $16, %ebx; \
350 xor tab_0(%rsi), p2; \
666 xor %edi, p2; \
667 \
668 movzx %dl, %esi; \
669 movzx %dh, %edi; \
670 movzx tab_i(%rsi), %esi; \
671 movzx tab_i(%rdi), %edi; \
672 shr $16, %edx; \
673 xor %esi, p4; \
674 rol $8, %edi; \
675 xor %edi, p1; \
676 movzx %dl, %esi; \
677 movzx %dh, %edi; \
678 movzx tab_i(%rsi), %esi; \
679 movzx tab_i(%rdi), %edi; \
680 rol $16, %esi; \
681 rol $24, %edi; \
682 xor %esi, p2; \
683 xor %edi, p3
684
685 #endif /* LAST_ROUND_TABLES */
686 /* EXPORT DELETE END */
687
688 /*
689 * OpenSolaris OS:
690 * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
691 * const uint32_t pt[4], uint32_t ct[4])/
692 *
693 * Original interface:
694 * int aes_encrypt(const unsigned char *in,
695 * unsigned char *out, const aes_encrypt_ctx cx[1])/
696 */
697 .align 64
698 enc_tab:
699 enc_vals(u8)
700 #ifdef LAST_ROUND_TABLES
701 / Last Round Tables:
702 enc_vals(w8)
703 #endif
704
705
706 ENTRY_NP(aes_encrypt_amd64)
707 /* EXPORT DELETE START */
708 #ifdef GLADMAN_INTERFACE
709 / Original interface
710 sub $[4*8], %rsp / gnu/linux/opensolaris binary interface
711 mov %rsi, (%rsp) / output pointer (P2)
712 mov %rdx, %r8 / context (P3)
713
714 mov %rbx, 1*8(%rsp) / P1: input pointer in rdi
715 mov %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
716 mov %r12, 3*8(%rsp) / P3: context in r8
717 movzx 4*KS_LENGTH(kptr), %esi / Get byte key length * 16
718
719 #else
720 / OpenSolaris OS interface
721 sub $[4*8], %rsp / Make room on stack to save registers
722 mov %rcx, (%rsp) / Save output pointer (P4) on stack
723 mov %rdi, %r8 / context (P1)
724 mov %rdx, %rdi / P3: save input pointer
725 shl $4, %esi / P2: esi byte key length * 16
726
727 mov %rbx, 1*8(%rsp) / Save registers
769 ff_rnd(%r9d, %r10d, %r11d, %r12d, 6)
770 ff_rnd(%r9d, %r10d, %r11d, %r12d, 5)
771 ff_rnd(%r9d, %r10d, %r11d, %r12d, 4)
772 ff_rnd(%r9d, %r10d, %r11d, %r12d, 3)
773 ff_rnd(%r9d, %r10d, %r11d, %r12d, 2)
774 ff_rnd(%r9d, %r10d, %r11d, %r12d, 1)
775 fl_rnd(%r9d, %r10d, %r11d, %r12d, 0)
776
777 / Copy results
778 mov (%rsp), %rbx
779 mov %r9d, (%rbx)
780 mov %r10d, 4(%rbx)
781 mov %r11d, 8(%rbx)
782 mov %r12d, 12(%rbx)
783 xor %rax, %rax
784 4: / Restore registers
785 mov 1*8(%rsp), %rbx
786 mov 2*8(%rsp), %rbp
787 mov 3*8(%rsp), %r12
788 add $[4*8], %rsp
789 /* EXPORT DELETE END */
790 ret
791
792 SET_SIZE(aes_encrypt_amd64)
793
794 /*
795 * OpenSolaris OS:
796 * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
797 * const uint32_t pt[4], uint32_t ct[4])/
798 *
799 * Original interface:
800 * int aes_decrypt(const unsigned char *in,
801 * unsigned char *out, const aes_encrypt_ctx cx[1])/
802 */
803 .align 64
804 dec_tab:
805 dec_vals(v8)
806 #ifdef LAST_ROUND_TABLES
807 / Last Round Tables:
808 dec_vals(w8)
809 #endif
810
811
812 ENTRY_NP(aes_decrypt_amd64)
813 /* EXPORT DELETE START */
814 #ifdef GLADMAN_INTERFACE
815 / Original interface
816 sub $[4*8], %rsp / gnu/linux/opensolaris binary interface
817 mov %rsi, (%rsp) / output pointer (P2)
818 mov %rdx, %r8 / context (P3)
819
820 mov %rbx, 1*8(%rsp) / P1: input pointer in rdi
821 mov %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
822 mov %r12, 3*8(%rsp) / P3: context in r8
823 movzx 4*KS_LENGTH(kptr), %esi / Get byte key length * 16
824
825 #else
826 / OpenSolaris OS interface
827 sub $[4*8], %rsp / Make room on stack to save registers
828 mov %rcx, (%rsp) / Save output pointer (P4) on stack
829 mov %rdi, %r8 / context (P1)
830 mov %rdx, %rdi / P3: save input pointer
831 shl $4, %esi / P2: esi byte key length * 16
832
833 mov %rbx, 1*8(%rsp) / Save registers
881 ii_rnd(%r9d, %r10d, %r11d, %r12d, 6)
882 ii_rnd(%r9d, %r10d, %r11d, %r12d, 5)
883 ii_rnd(%r9d, %r10d, %r11d, %r12d, 4)
884 ii_rnd(%r9d, %r10d, %r11d, %r12d, 3)
885 ii_rnd(%r9d, %r10d, %r11d, %r12d, 2)
886 ii_rnd(%r9d, %r10d, %r11d, %r12d, 1)
887 il_rnd(%r9d, %r10d, %r11d, %r12d, 0)
888
889 / Copy results
890 mov (%rsp), %rbx
891 mov %r9d, (%rbx)
892 mov %r10d, 4(%rbx)
893 mov %r11d, 8(%rbx)
894 mov %r12d, 12(%rbx)
895 xor %rax, %rax
896 4: / Restore registers
897 mov 1*8(%rsp), %rbx
898 mov 2*8(%rsp), %rbp
899 mov 3*8(%rsp), %r12
900 add $[4*8], %rsp
901 /* EXPORT DELETE END */
902 ret
903
904 SET_SIZE(aes_decrypt_amd64)
905 #endif /* lint || __lint */
|
139 * !__GNUC__ ifdefs. Also removed ENCRYPTION, DECRYPTION,
140 * AES_128, AES_192, AES_256, AES_VAR ifdefs.
141 *
142 * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
143 *
144 * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
145 *
146 * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
147 * (operands reversed, literals prefixed with "$", registers prefixed with "%",
148 * and "[register+offset]", addressing changed to "offset(register)",
149 * parenthesis in constant expressions "()" changed to square brackets "[]",
150 * "." removed from local (numeric) labels, and other changes.
151 * Examples:
152 * Intel/yasm/nasm Syntax ATT/OpenSolaris Syntax
153 * mov rax,(4*20h) mov $[4*0x20],%rax
154 * mov rax,[ebx+20h] mov 0x20(%ebx),%rax
155 * lea rax,[ebx+ecx] lea (%ebx,%ecx),%rax
156 * sub rax,[ebx+ecx*4-20h] sub -0x20(%ebx,%ecx,4),%rax
157 *
158 * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
159 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
160 * definitions for lint.
161 *
162 * 6. Renamed functions and reordered parameters to match OpenSolaris:
163 * Original Gladman interface:
164 * int aes_encrypt(const unsigned char *in,
165 * unsigned char *out, const aes_encrypt_ctx cx[1])/
166 * int aes_decrypt(const unsigned char *in,
167 * unsigned char *out, const aes_encrypt_ctx cx[1])/
168 * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
169 * and a union type, inf., containing inf.l, a uint32_t and
170 * inf.b, a 4-element array of uint32_t. Only b[0] in the array (aka "l") is
171 * used and contains the key schedule length * 16 where key schedule length is
172 * 10, 12, or 14 bytes.
173 *
174 * OpenSolaris OS interface:
175 * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
176 * const uint32_t pt[4], uint32_t ct[4])/
177 * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
178 * const uint32_t pt[4], uint32_t ct[4])/
179 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
180 * uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
310 #define kptr %r8 /* key schedule pointer */
311 #define fofs 128 /* adjust offset in key schedule to keep |disp| < 128 */
312 #define fk_ref(x, y) -16*x+fofs+4*y(kptr)
313
314 #ifdef AES_REV_DKS
315 #define rofs 128
316 #define ik_ref(x, y) -16*x+rofs+4*y(kptr)
317
318 #else
319 #define rofs -128
320 #define ik_ref(x, y) 16*x+rofs+4*y(kptr)
321 #endif /* AES_REV_DKS */
322
323 #define tab_0(x) (tptr,x,8)
324 #define tab_1(x) 3(tptr,x,8)
325 #define tab_2(x) 2(tptr,x,8)
326 #define tab_3(x) 1(tptr,x,8)
327 #define tab_f(x) 1(tptr,x,8)
328 #define tab_i(x) 7(tptr,x,8)
329
330 #define ff_rnd(p1, p2, p3, p4, round) /* normal forward round */ \
331 mov fk_ref(round,0), p1; \
332 mov fk_ref(round,1), p2; \
333 mov fk_ref(round,2), p3; \
334 mov fk_ref(round,3), p4; \
335 \
336 movzx %al, %esi; \
337 movzx %ah, %edi; \
338 shr $16, %eax; \
339 xor tab_0(%rsi), p1; \
340 xor tab_1(%rdi), p4; \
341 movzx %al, %esi; \
342 movzx %ah, %edi; \
343 xor tab_2(%rsi), p3; \
344 xor tab_3(%rdi), p2; \
345 \
346 movzx %bl, %esi; \
347 movzx %bh, %edi; \
348 shr $16, %ebx; \
349 xor tab_0(%rsi), p2; \
665 xor %edi, p2; \
666 \
667 movzx %dl, %esi; \
668 movzx %dh, %edi; \
669 movzx tab_i(%rsi), %esi; \
670 movzx tab_i(%rdi), %edi; \
671 shr $16, %edx; \
672 xor %esi, p4; \
673 rol $8, %edi; \
674 xor %edi, p1; \
675 movzx %dl, %esi; \
676 movzx %dh, %edi; \
677 movzx tab_i(%rsi), %esi; \
678 movzx tab_i(%rdi), %edi; \
679 rol $16, %esi; \
680 rol $24, %edi; \
681 xor %esi, p2; \
682 xor %edi, p3
683
684 #endif /* LAST_ROUND_TABLES */
685
686 /*
687 * OpenSolaris OS:
688 * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
689 * const uint32_t pt[4], uint32_t ct[4])/
690 *
691 * Original interface:
692 * int aes_encrypt(const unsigned char *in,
693 * unsigned char *out, const aes_encrypt_ctx cx[1])/
694 */
695 .align 64
696 enc_tab:
697 enc_vals(u8)
698 #ifdef LAST_ROUND_TABLES
699 / Last Round Tables:
700 enc_vals(w8)
701 #endif
702
703
704 ENTRY_NP(aes_encrypt_amd64)
705 #ifdef GLADMAN_INTERFACE
706 / Original interface
707 sub $[4*8], %rsp / gnu/linux/opensolaris binary interface
708 mov %rsi, (%rsp) / output pointer (P2)
709 mov %rdx, %r8 / context (P3)
710
711 mov %rbx, 1*8(%rsp) / P1: input pointer in rdi
712 mov %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
713 mov %r12, 3*8(%rsp) / P3: context in r8
714 movzx 4*KS_LENGTH(kptr), %esi / Get byte key length * 16
715
716 #else
717 / OpenSolaris OS interface
718 sub $[4*8], %rsp / Make room on stack to save registers
719 mov %rcx, (%rsp) / Save output pointer (P4) on stack
720 mov %rdi, %r8 / context (P1)
721 mov %rdx, %rdi / P3: save input pointer
722 shl $4, %esi / P2: esi byte key length * 16
723
724 mov %rbx, 1*8(%rsp) / Save registers
766 ff_rnd(%r9d, %r10d, %r11d, %r12d, 6)
767 ff_rnd(%r9d, %r10d, %r11d, %r12d, 5)
768 ff_rnd(%r9d, %r10d, %r11d, %r12d, 4)
769 ff_rnd(%r9d, %r10d, %r11d, %r12d, 3)
770 ff_rnd(%r9d, %r10d, %r11d, %r12d, 2)
771 ff_rnd(%r9d, %r10d, %r11d, %r12d, 1)
772 fl_rnd(%r9d, %r10d, %r11d, %r12d, 0)
773
774 / Copy results
775 mov (%rsp), %rbx
776 mov %r9d, (%rbx)
777 mov %r10d, 4(%rbx)
778 mov %r11d, 8(%rbx)
779 mov %r12d, 12(%rbx)
780 xor %rax, %rax
781 4: / Restore registers
782 mov 1*8(%rsp), %rbx
783 mov 2*8(%rsp), %rbp
784 mov 3*8(%rsp), %r12
785 add $[4*8], %rsp
786 ret
787
788 SET_SIZE(aes_encrypt_amd64)
789
790 /*
791 * OpenSolaris OS:
792 * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
793 * const uint32_t pt[4], uint32_t ct[4])/
794 *
795 * Original interface:
796 * int aes_decrypt(const unsigned char *in,
797 * unsigned char *out, const aes_encrypt_ctx cx[1])/
798 */
799 .align 64
800 dec_tab:
801 dec_vals(v8)
802 #ifdef LAST_ROUND_TABLES
803 / Last Round Tables:
804 dec_vals(w8)
805 #endif
806
807
808 ENTRY_NP(aes_decrypt_amd64)
809 #ifdef GLADMAN_INTERFACE
810 / Original interface
811 sub $[4*8], %rsp / gnu/linux/opensolaris binary interface
812 mov %rsi, (%rsp) / output pointer (P2)
813 mov %rdx, %r8 / context (P3)
814
815 mov %rbx, 1*8(%rsp) / P1: input pointer in rdi
816 mov %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
817 mov %r12, 3*8(%rsp) / P3: context in r8
818 movzx 4*KS_LENGTH(kptr), %esi / Get byte key length * 16
819
820 #else
821 / OpenSolaris OS interface
822 sub $[4*8], %rsp / Make room on stack to save registers
823 mov %rcx, (%rsp) / Save output pointer (P4) on stack
824 mov %rdi, %r8 / context (P1)
825 mov %rdx, %rdi / P3: save input pointer
826 shl $4, %esi / P2: esi byte key length * 16
827
828 mov %rbx, 1*8(%rsp) / Save registers
876 ii_rnd(%r9d, %r10d, %r11d, %r12d, 6)
877 ii_rnd(%r9d, %r10d, %r11d, %r12d, 5)
878 ii_rnd(%r9d, %r10d, %r11d, %r12d, 4)
879 ii_rnd(%r9d, %r10d, %r11d, %r12d, 3)
880 ii_rnd(%r9d, %r10d, %r11d, %r12d, 2)
881 ii_rnd(%r9d, %r10d, %r11d, %r12d, 1)
882 il_rnd(%r9d, %r10d, %r11d, %r12d, 0)
883
884 / Copy results
885 mov (%rsp), %rbx
886 mov %r9d, (%rbx)
887 mov %r10d, 4(%rbx)
888 mov %r11d, 8(%rbx)
889 mov %r12d, 12(%rbx)
890 xor %rax, %rax
891 4: / Restore registers
892 mov 1*8(%rsp), %rbx
893 mov 2*8(%rsp), %rbp
894 mov 3*8(%rsp), %r12
895 add $[4*8], %rsp
896 ret
897
898 SET_SIZE(aes_decrypt_amd64)
899 #endif /* lint || __lint */
|