1 /*
   2  * ====================================================================
   3  * Written by Intel Corporation for the OpenSSL project to add support
   4  * for Intel AES-NI instructions. Rights for redistribution and usage
   5  * in source and binary forms are granted according to the OpenSSL
   6  * license.
   7  *
   8  *   Author: Huang Ying <ying.huang at intel dot com>
   9  *           Vinodh Gopal <vinodh.gopal at intel dot com>
  10  *           Kahraman Akdemir
  11  *
  12  * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
  13  * instructions that are going to be introduced in the next generation
  14  * of Intel processor, as of 2009. These instructions enable fast and
  15  * secure data encryption and decryption, using the Advanced Encryption
  16  * Standard (AES), defined by FIPS Publication number 197. The
  17  * architecture introduces six instructions that offer full hardware
  18  * support for AES. Four of them support high performance data
  19  * encryption and decryption, and the other two instructions support
  20  * the AES key expansion procedure.
  21  * ====================================================================
  22  */
  23 
  24 /*
  25  * ====================================================================
  26  * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  *
  32  * 1. Redistributions of source code must retain the above copyright
  33  *    notice, this list of conditions and the following disclaimer.
  34  *
  35  * 2. Redistributions in binary form must reproduce the above copyright
  36  *    notice, this list of conditions and the following disclaimer in
  37  *    the documentation and/or other materials provided with the
  38  *    distribution.
  39  *
  40  * 3. All advertising materials mentioning features or use of this
  41  *    software must display the following acknowledgment:
  42  *    "This product includes software developed by the OpenSSL Project
  43  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  44  *
  45  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  46  *    endorse or promote products derived from this software without
  47  *    prior written permission. For written permission, please contact
  48  *    openssl-core@openssl.org.
  49  *
  50  * 5. Products derived from this software may not be called "OpenSSL"
  51  *    nor may "OpenSSL" appear in their names without prior written
  52  *    permission of the OpenSSL Project.
  53  *
  54  * 6. Redistributions of any form whatsoever must retain the following
  55  *    acknowledgment:
  56  *    "This product includes software developed by the OpenSSL Project
  57  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  58  *
  59  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  60  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
  63  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  64  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  65  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  66  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  67  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  68  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  69  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  70  * OF THE POSSIBILITY OF SUCH DAMAGE.
  71  * ====================================================================
  72  */
  73 
  74 /*
  75  * ====================================================================
  76  * OpenSolaris OS modifications
  77  *
  78  * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
  79  * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
  80  * Huang Ying of Intel to the openssl-dev mailing list under the subject
  81  * of "Add support to Intel AES-NI instruction set for x86_64 platform".
  82  *
  83  * This OpenSolaris version has these major changes from the original source:
  84  *
  85  * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  86  * /usr/include/sys/asm_linkage.h, lint(1B) guards, EXPORT DELETE START
  87  * and EXPORT DELETE END markers, and dummy C function definitions for lint.
  88  *
  89  * 2. Formatted code, added comments, and added #includes and #defines.
  90  *
  91  * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
  92  * calling kpreempt_disable() and kpreempt_enable().
  93  * If the TS bit is not set, Save and restore %xmm registers at the beginning
  94  * and end of function calls (%xmm* registers are not saved and restored by
  95  * during kernel thread preemption).
  96  *
  97  * 4. Renamed functions, reordered parameters, and changed return value
  98  * to match OpenSolaris:
  99  *
 100  * OpenSSL interface:
 101  *      int intel_AES_set_encrypt_key(const unsigned char *userKey,
 102  *              const int bits, AES_KEY *key);
 103  *      int intel_AES_set_decrypt_key(const unsigned char *userKey,
 104  *              const int bits, AES_KEY *key);
 105  *      Return values for above are non-zero on error, 0 on success.
 106  *
 107  *      void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
 108  *              const AES_KEY *key);
 109  *      void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
 110  *              const AES_KEY *key);
 111  *      typedef struct aes_key_st {
 112  *              unsigned int    rd_key[4 *(AES_MAXNR + 1)];
 113  *              int             rounds;
 114  *              unsigned int    pad[3];
 115  *      } AES_KEY;
 116  * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
 117  * (ks32) instead of 64-bit (ks64).
 118  * Number of rounds (aka round count) is at offset 240 of AES_KEY.
 119  *
 120  * OpenSolaris OS interface (#ifdefs removed for readability):
 121  *      int rijndael_key_setup_dec_intel(uint32_t rk[],
 122  *              const uint32_t cipherKey[], uint64_t keyBits);
 123  *      int rijndael_key_setup_enc_intel(uint32_t rk[],
 124  *              const uint32_t cipherKey[], uint64_t keyBits);
 125  *      Return values for above are 0 on error, number of rounds on success.
 126  *
 127  *      void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
 128  *              const uint32_t pt[4], uint32_t ct[4]);
 129  *      void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
 130  *              const uint32_t pt[4], uint32_t ct[4]);
 131  *      typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
 132  *               uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
 133  *
 134  *      typedef union {
 135  *              uint32_t        ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 136  *      } aes_ks_t;
 137  *      typedef struct aes_key {
 138  *              aes_ks_t        encr_ks, decr_ks;
 139  *              long double     align128;
 140  *              int             flags, nr, type;
 141  *      } aes_key_t;
 142  *
 143  * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
 144  * ct is crypto text, and MAX_AES_NR is 14.
 145  * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
 146  *
 147  * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
 148  *
 149  * ====================================================================
 150  */
 151 
 152 #if defined(lint) || defined(__lint)
 153 
 154 #include <sys/types.h>
 155 
 156 /* ARGSUSED */
 157 void
 158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
 159     uint32_t ct[4]) {
 160 }
 161 /* ARGSUSED */
 162 void
 163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
 164     uint32_t pt[4]) {
 165 }
 166 /* ARGSUSED */
 167 int
 168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
 169     uint64_t keyBits) {
 170         return (0);
 171 }
 172 /* ARGSUSED */
 173 int
 174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
 175    uint64_t keyBits) {
 176         return (0);
 177 }
 178 
 179 
 180 #else   /* lint */
 181 
 182 #include <sys/asm_linkage.h>
 183 #include <sys/controlregs.h>
 184 #ifdef _KERNEL
 185 #include <sys/machprivregs.h>
 186 #endif
 187 
 188 #ifdef _KERNEL
 189         /*
 190          * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv.  That is,
 191          * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
 192          * uses it to pass P2 to syscall.
 193          * This also occurs with the STTS macro, but we don't care if
 194          * P2 (%rsi) is modified just before function exit.
 195          * The CLTS and STTS macros push and pop P1 (%rdi) already.
 196          */
 197 #ifdef __xpv
 198 #define PROTECTED_CLTS \
 199         push    %rsi; \
 200         CLTS; \
 201         pop     %rsi
 202 #else
 203 #define PROTECTED_CLTS \
 204         CLTS
 205 #endif  /* __xpv */
 206 
 207 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
 208         push    %rbp; \
 209         mov     %rsp, %rbp; \
 210         movq    %cr0, tmpreg; \
 211         testq   $CR0_TS, tmpreg; \
 212         jnz     1f; \
 213         and     $-XMM_ALIGN, %rsp; \
 214         sub     $[XMM_SIZE * 2], %rsp; \
 215         movaps  %xmm0, 16(%rsp); \
 216         movaps  %xmm1, (%rsp); \
 217         jmp     2f; \
 218 1: \
 219         PROTECTED_CLTS; \
 220 2:
 221 
 222         /*
 223          * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
 224          * otherwise set CR0_TS.
 225          */
 226 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
 227         testq   $CR0_TS, tmpreg; \
 228         jnz     1f; \
 229         movaps  (%rsp), %xmm1; \
 230         movaps  16(%rsp), %xmm0; \
 231         jmp     2f; \
 232 1: \
 233         STTS(tmpreg); \
 234 2: \
 235         mov     %rbp, %rsp; \
 236         pop     %rbp
 237 
 238         /*
 239          * If CR0_TS is not set, align stack (with push %rbp) and push
 240          * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
 241          */
 242 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
 243         push    %rbp; \
 244         mov     %rsp, %rbp; \
 245         movq    %cr0, tmpreg; \
 246         testq   $CR0_TS, tmpreg; \
 247         jnz     1f; \
 248         and     $-XMM_ALIGN, %rsp; \
 249         sub     $[XMM_SIZE * 7], %rsp; \
 250         movaps  %xmm0, 96(%rsp); \
 251         movaps  %xmm1, 80(%rsp); \
 252         movaps  %xmm2, 64(%rsp); \
 253         movaps  %xmm3, 48(%rsp); \
 254         movaps  %xmm4, 32(%rsp); \
 255         movaps  %xmm5, 16(%rsp); \
 256         movaps  %xmm6, (%rsp); \
 257         jmp     2f; \
 258 1: \
 259         PROTECTED_CLTS; \
 260 2:
 261 
 262 
 263         /*
 264          * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
 265          * otherwise set CR0_TS.
 266          */
 267 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
 268         testq   $CR0_TS, tmpreg; \
 269         jnz     1f; \
 270         movaps  (%rsp), %xmm6; \
 271         movaps  16(%rsp), %xmm5; \
 272         movaps  32(%rsp), %xmm4; \
 273         movaps  48(%rsp), %xmm3; \
 274         movaps  64(%rsp), %xmm2; \
 275         movaps  80(%rsp), %xmm1; \
 276         movaps  96(%rsp), %xmm0; \
 277         jmp     2f; \
 278 1: \
 279         STTS(tmpreg); \
 280 2: \
 281         mov     %rbp, %rsp; \
 282         pop     %rbp
 283 
 284 
 285 #else
 286 #define PROTECTED_CLTS
 287 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
 288 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
 289 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
 290 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
 291 #endif  /* _KERNEL */
 292 
 293 
 294 /*
 295  * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
 296  * _key_expansion_256a(), _key_expansion_256b()
 297  *
 298  * Helper functions called by rijndael_key_setup_inc_intel().
 299  * Also used indirectly by rijndael_key_setup_dec_intel().
 300  *
 301  * Input:
 302  * %xmm0        User-provided cipher key
 303  * %xmm1        Round constant
 304  * Output:
 305  * (%rcx)       AES key
 306  */
 307 
 308         /* EXPORT DELETE START */
 309 .align  16
 310 _key_expansion_128:
 311 _key_expansion_256a:
 312         pshufd  $0b11111111, %xmm1, %xmm1
 313         shufps  $0b00010000, %xmm0, %xmm4
 314         pxor    %xmm4, %xmm0
 315         shufps  $0b10001100, %xmm0, %xmm4
 316         pxor    %xmm4, %xmm0
 317         pxor    %xmm1, %xmm0
 318         movaps  %xmm0, (%rcx)
 319         add     $0x10, %rcx
 320         ret
 321         SET_SIZE(_key_expansion_128)
 322         SET_SIZE(_key_expansion_256a)
 323 
 324 .align 16
 325 _key_expansion_192a:
 326         pshufd  $0b01010101, %xmm1, %xmm1
 327         shufps  $0b00010000, %xmm0, %xmm4
 328         pxor    %xmm4, %xmm0
 329         shufps  $0b10001100, %xmm0, %xmm4
 330         pxor    %xmm4, %xmm0
 331         pxor    %xmm1, %xmm0
 332 
 333         movaps  %xmm2, %xmm5
 334         movaps  %xmm2, %xmm6
 335         pslldq  $4, %xmm5
 336         pshufd  $0b11111111, %xmm0, %xmm3
 337         pxor    %xmm3, %xmm2
 338         pxor    %xmm5, %xmm2
 339 
 340         movaps  %xmm0, %xmm1
 341         shufps  $0b01000100, %xmm0, %xmm6
 342         movaps  %xmm6, (%rcx)
 343         shufps  $0b01001110, %xmm2, %xmm1
 344         movaps  %xmm1, 0x10(%rcx)
 345         add     $0x20, %rcx
 346         ret
 347         SET_SIZE(_key_expansion_192a)
 348 
 349 .align 16
 350 _key_expansion_192b:
 351         pshufd  $0b01010101, %xmm1, %xmm1
 352         shufps  $0b00010000, %xmm0, %xmm4
 353         pxor    %xmm4, %xmm0
 354         shufps  $0b10001100, %xmm0, %xmm4
 355         pxor    %xmm4, %xmm0
 356         pxor    %xmm1, %xmm0
 357 
 358         movaps  %xmm2, %xmm5
 359         pslldq  $4, %xmm5
 360         pshufd  $0b11111111, %xmm0, %xmm3
 361         pxor    %xmm3, %xmm2
 362         pxor    %xmm5, %xmm2
 363 
 364         movaps  %xmm0, (%rcx)
 365         add     $0x10, %rcx
 366         ret
 367         SET_SIZE(_key_expansion_192b)
 368 
 369 .align 16
 370 _key_expansion_256b:
 371         pshufd  $0b10101010, %xmm1, %xmm1
 372         shufps  $0b00010000, %xmm2, %xmm4
 373         pxor    %xmm4, %xmm2
 374         shufps  $0b10001100, %xmm2, %xmm4
 375         pxor    %xmm4, %xmm2
 376         pxor    %xmm1, %xmm2
 377         movaps  %xmm2, (%rcx)
 378         add     $0x10, %rcx
 379         ret
 380         SET_SIZE(_key_expansion_256b)
 381         /* EXPORT DELETE END */
 382 
 383 
 384 /*
 385  * rijndael_key_setup_enc_intel()
 386  * Expand the cipher key into the encryption key schedule.
 387  *
 388  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 389  * has been called.  This is because %xmm registers are not saved/restored.
 390  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 391  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 392  * on the stack.
 393  *
 394  * OpenSolaris interface:
 395  * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
 396  *      uint64_t keyBits);
 397  * Return value is 0 on error, number of rounds on success.
 398  *
 399  * Original Intel OpenSSL interface:
 400  * int intel_AES_set_encrypt_key(const unsigned char *userKey,
 401  *      const int bits, AES_KEY *key);
 402  * Return value is non-zero on error, 0 on success.
 403  */
 404 
 405 #ifdef  OPENSSL_INTERFACE
 406 #define rijndael_key_setup_enc_intel    intel_AES_set_encrypt_key
 407 #define rijndael_key_setup_dec_intel    intel_AES_set_decrypt_key
 408 
 409 #define USERCIPHERKEY           rdi     /* P1, 64 bits */
 410 #define KEYSIZE32               esi     /* P2, 32 bits */
 411 #define KEYSIZE64               rsi     /* P2, 64 bits */
 412 #define AESKEY                  rdx     /* P3, 64 bits */
 413 
 414 #else   /* OpenSolaris Interface */
 415 #define AESKEY                  rdi     /* P1, 64 bits */
 416 #define USERCIPHERKEY           rsi     /* P2, 64 bits */
 417 #define KEYSIZE32               edx     /* P3, 32 bits */
 418 #define KEYSIZE64               rdx     /* P3, 64 bits */
 419 #endif  /* OPENSSL_INTERFACE */
 420 
 421 #define ROUNDS32                KEYSIZE32       /* temp */
 422 #define ROUNDS64                KEYSIZE64       /* temp */
 423 #define ENDAESKEY               USERCIPHERKEY   /* temp */
 424 
 425 
 426 ENTRY_NP(rijndael_key_setup_enc_intel)
 427         /* EXPORT DELETE START */
 428         CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
 429 
 430         / NULL pointer sanity check
 431         test    %USERCIPHERKEY, %USERCIPHERKEY
 432         jz      .Lenc_key_invalid_param
 433         test    %AESKEY, %AESKEY
 434         jz      .Lenc_key_invalid_param
 435 
 436         movups  (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes)
 437         movaps  %xmm0, (%AESKEY)
 438         lea     0x10(%AESKEY), %rcx     / key addr
 439         pxor    %xmm4, %xmm4            / xmm4 is assumed 0 in _key_expansion_x
 440 
 441         cmp     $256, %KEYSIZE32
 442         jnz     .Lenc_key192
 443 
 444         / AES 256: 14 rounds in encryption key schedule
 445 #ifdef OPENSSL_INTERFACE
 446         mov     $14, %ROUNDS32
 447         movl    %ROUNDS32, 240(%AESKEY)         / key.rounds = 14
 448 #endif  /* OPENSSL_INTERFACE */
 449 
 450         movups  0x10(%USERCIPHERKEY), %xmm2     / other user key (2nd 16 bytes)
 451         movaps  %xmm2, (%rcx)
 452         add     $0x10, %rcx
 453 
 454         aeskeygenassist $0x1, %xmm2, %xmm1      / expand the key
 455         call    _key_expansion_256a
 456         aeskeygenassist $0x1, %xmm0, %xmm1
 457         call    _key_expansion_256b
 458         aeskeygenassist $0x2, %xmm2, %xmm1      / expand the key
 459         call    _key_expansion_256a
 460         aeskeygenassist $0x2, %xmm0, %xmm1
 461         call    _key_expansion_256b
 462         aeskeygenassist $0x4, %xmm2, %xmm1      / expand the key
 463         call    _key_expansion_256a
 464         aeskeygenassist $0x4, %xmm0, %xmm1
 465         call    _key_expansion_256b
 466         aeskeygenassist $0x8, %xmm2, %xmm1      / expand the key
 467         call    _key_expansion_256a
 468         aeskeygenassist $0x8, %xmm0, %xmm1
 469         call    _key_expansion_256b
 470         aeskeygenassist $0x10, %xmm2, %xmm1     / expand the key
 471         call    _key_expansion_256a
 472         aeskeygenassist $0x10, %xmm0, %xmm1
 473         call    _key_expansion_256b
 474         aeskeygenassist $0x20, %xmm2, %xmm1     / expand the key
 475         call    _key_expansion_256a
 476         aeskeygenassist $0x20, %xmm0, %xmm1
 477         call    _key_expansion_256b
 478         aeskeygenassist $0x40, %xmm2, %xmm1     / expand the key
 479         call    _key_expansion_256a
 480 
 481         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 482 #ifdef  OPENSSL_INTERFACE
 483         xor     %rax, %rax                      / return 0 (OK)
 484 #else   /* Open Solaris Interface */
 485         mov     $14, %rax                       / return # rounds = 14
 486 #endif
 487         ret
 488 
 489 .align 4
 490 .Lenc_key192:
 491         cmp     $192, %KEYSIZE32
 492         jnz     .Lenc_key128
 493 
 494         / AES 192: 12 rounds in encryption key schedule
 495 #ifdef OPENSSL_INTERFACE
 496         mov     $12, %ROUNDS32
 497         movl    %ROUNDS32, 240(%AESKEY) / key.rounds = 12
 498 #endif  /* OPENSSL_INTERFACE */
 499 
 500         movq    0x10(%USERCIPHERKEY), %xmm2     / other user key
 501         aeskeygenassist $0x1, %xmm2, %xmm1      / expand the key
 502         call    _key_expansion_192a
 503         aeskeygenassist $0x2, %xmm2, %xmm1      / expand the key
 504         call    _key_expansion_192b
 505         aeskeygenassist $0x4, %xmm2, %xmm1      / expand the key
 506         call    _key_expansion_192a
 507         aeskeygenassist $0x8, %xmm2, %xmm1      / expand the key
 508         call    _key_expansion_192b
 509         aeskeygenassist $0x10, %xmm2, %xmm1     / expand the key
 510         call    _key_expansion_192a
 511         aeskeygenassist $0x20, %xmm2, %xmm1     / expand the key
 512         call    _key_expansion_192b
 513         aeskeygenassist $0x40, %xmm2, %xmm1     / expand the key
 514         call    _key_expansion_192a
 515         aeskeygenassist $0x80, %xmm2, %xmm1     / expand the key
 516         call    _key_expansion_192b
 517 
 518         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 519 #ifdef  OPENSSL_INTERFACE
 520         xor     %rax, %rax                      / return 0 (OK)
 521 #else   /* OpenSolaris Interface */
 522         mov     $12, %rax                       / return # rounds = 12
 523 #endif
 524         ret
 525 
 526 .align 4
 527 .Lenc_key128:
 528         cmp $128, %KEYSIZE32
 529         jnz .Lenc_key_invalid_key_bits
 530 
 531         / AES 128: 10 rounds in encryption key schedule
 532 #ifdef OPENSSL_INTERFACE
 533         mov     $10, %ROUNDS32
 534         movl    %ROUNDS32, 240(%AESKEY)         / key.rounds = 10
 535 #endif  /* OPENSSL_INTERFACE */
 536 
 537         aeskeygenassist $0x1, %xmm0, %xmm1      / expand the key
 538         call    _key_expansion_128
 539         aeskeygenassist $0x2, %xmm0, %xmm1      / expand the key
 540         call    _key_expansion_128
 541         aeskeygenassist $0x4, %xmm0, %xmm1      / expand the key
 542         call    _key_expansion_128
 543         aeskeygenassist $0x8, %xmm0, %xmm1      / expand the key
 544         call    _key_expansion_128
 545         aeskeygenassist $0x10, %xmm0, %xmm1     / expand the key
 546         call    _key_expansion_128
 547         aeskeygenassist $0x20, %xmm0, %xmm1     / expand the key
 548         call    _key_expansion_128
 549         aeskeygenassist $0x40, %xmm0, %xmm1     / expand the key
 550         call    _key_expansion_128
 551         aeskeygenassist $0x80, %xmm0, %xmm1     / expand the key
 552         call    _key_expansion_128
 553         aeskeygenassist $0x1b, %xmm0, %xmm1     / expand the key
 554         call    _key_expansion_128
 555         aeskeygenassist $0x36, %xmm0, %xmm1     / expand the key
 556         call    _key_expansion_128
 557 
 558         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 559 #ifdef  OPENSSL_INTERFACE
 560         xor     %rax, %rax                      / return 0 (OK)
 561 #else   /* OpenSolaris Interface */
 562         mov     $10, %rax                       / return # rounds = 10
 563 #endif
 564         ret
 565 
 566 .Lenc_key_invalid_param:
 567 #ifdef  OPENSSL_INTERFACE
 568         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 569         mov     $-1, %rax       / user key or AES key pointer is NULL
 570         ret
 571 #else
 572         /* FALLTHROUGH */
 573 #endif  /* OPENSSL_INTERFACE */
 574 
 575 .Lenc_key_invalid_key_bits:
 576         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 577 #ifdef  OPENSSL_INTERFACE
 578         mov     $-2, %rax       / keysize is invalid
 579 #else   /* Open Solaris Interface */
 580         xor     %rax, %rax      / a key pointer is NULL or invalid keysize
 581 #endif  /* OPENSSL_INTERFACE */
 582 
 583         /* EXPORT DELETE END */
 584         ret
 585         SET_SIZE(rijndael_key_setup_enc_intel)
 586 
 587 
 588 /*
 589  * rijndael_key_setup_dec_intel()
 590  * Expand the cipher key into the decryption key schedule.
 591  *
 592  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 593  * has been called.  This is because %xmm registers are not saved/restored.
 594  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 595  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 596  * on the stack.
 597  *
 598  * OpenSolaris interface:
 599  * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
 600  *      uint64_t keyBits);
 601  * Return value is 0 on error, number of rounds on success.
 602  * P1->P2, P2->P3, P3->P1
 603  *
 604  * Original Intel OpenSSL interface:
 605  * int intel_AES_set_decrypt_key(const unsigned char *userKey,
 606  *      const int bits, AES_KEY *key);
 607  * Return value is non-zero on error, 0 on success.
 608  */
 609 ENTRY_NP(rijndael_key_setup_dec_intel)
 610         /* EXPORT DELETE START */
 611         / Generate round keys used for encryption
 612         call    rijndael_key_setup_enc_intel
 613         test    %rax, %rax
 614 #ifdef  OPENSSL_INTERFACE
 615         jnz     .Ldec_key_exit  / Failed if returned non-0
 616 #else   /* OpenSolaris Interface */
 617         jz      .Ldec_key_exit  / Failed if returned 0
 618 #endif  /* OPENSSL_INTERFACE */
 619 
 620         CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
 621 
 622         /*
 623          * Convert round keys used for encryption
 624          * to a form usable for decryption
 625          */
 626 #ifndef OPENSSL_INTERFACE               /* OpenSolaris Interface */
 627         mov     %rax, %ROUNDS64         / set # rounds (10, 12, or 14)
 628                                         / (already set for OpenSSL)
 629 #endif
 630 
 631         lea     0x10(%AESKEY), %rcx     / key addr
 632         shl     $4, %ROUNDS32
 633         add     %AESKEY, %ROUNDS64
 634         mov     %ROUNDS64, %ENDAESKEY
 635 
 636 .align 4
 637 .Ldec_key_reorder_loop:
 638         movaps  (%AESKEY), %xmm0
 639         movaps  (%ROUNDS64), %xmm1
 640         movaps  %xmm0, (%ROUNDS64)
 641         movaps  %xmm1, (%AESKEY)
 642         lea     0x10(%AESKEY), %AESKEY
 643         lea     -0x10(%ROUNDS64), %ROUNDS64
 644         cmp     %AESKEY, %ROUNDS64
 645         ja      .Ldec_key_reorder_loop
 646 
 647 .align 4
 648 .Ldec_key_inv_loop:
 649         movaps  (%rcx), %xmm0
 650         / Convert an encryption round key to a form usable for decryption
 651         / with the "AES Inverse Mix Columns" instruction
 652         aesimc  %xmm0, %xmm1
 653         movaps  %xmm1, (%rcx)
 654         lea     0x10(%rcx), %rcx
 655         cmp     %ENDAESKEY, %rcx
 656         jnz     .Ldec_key_inv_loop
 657 
 658         SET_TS_OR_POP_XMM0_XMM1(%r10)
 659 
 660 .Ldec_key_exit:
 661         / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
 662         / OpenSSL: rax = 0 for OK, or non-zero for error
 663         /* EXPORT DELETE END */
 664         ret
 665         SET_SIZE(rijndael_key_setup_dec_intel)
 666 
 667 
 668 /*
 669  * aes_encrypt_intel()
 670  * Encrypt a single block (in and out can overlap).
 671  *
 672  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 673  * has been called.  This is because %xmm registers are not saved/restored.
 674  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 675  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 676  * on the stack.
 677  *
 678  * Temporary register usage:
 679  * %xmm0        State
 680  * %xmm1        Key
 681  *
 682  * Original OpenSolaris Interface:
 683  * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
 684  *      const uint32_t pt[4], uint32_t ct[4])
 685  *
 686  * Original Intel OpenSSL Interface:
 687  * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
 688  *      const AES_KEY *key)
 689  */
 690 
 691 #ifdef  OPENSSL_INTERFACE
 692 #define aes_encrypt_intel       intel_AES_encrypt
 693 #define aes_decrypt_intel       intel_AES_decrypt
 694 
 695 #define INP             rdi     /* P1, 64 bits */
 696 #define OUTP            rsi     /* P2, 64 bits */
 697 #define KEYP            rdx     /* P3, 64 bits */
 698 
 699 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
 700 #define NROUNDS32       ecx     /* temporary, 32 bits */
 701 #define NROUNDS         cl      /* temporary,  8 bits */
 702 
 703 #else   /* OpenSolaris Interface */
 704 #define KEYP            rdi     /* P1, 64 bits */
 705 #define NROUNDS         esi     /* P2, 32 bits */
 706 #define INP             rdx     /* P3, 64 bits */
 707 #define OUTP            rcx     /* P4, 64 bits */
 708 #endif  /* OPENSSL_INTERFACE */
 709 
 710 #define STATE           xmm0    /* temporary, 128 bits */
 711 #define KEY             xmm1    /* temporary, 128 bits */
 712 
 713 ENTRY_NP(aes_encrypt_intel)
 714         /* EXPORT DELETE START */
 715         CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
 716 
 717         movups  (%INP), %STATE                  / input
 718         movaps  (%KEYP), %KEY                   / key
 719 #ifdef  OPENSSL_INTERFACE
 720         mov     240(%KEYP), %NROUNDS32          / round count
 721 #else   /* OpenSolaris Interface */
 722         /* Round count is already present as P2 in %rsi/%esi */
 723 #endif  /* OPENSSL_INTERFACE */
 724 
 725         pxor    %KEY, %STATE                    / round 0
 726         lea     0x30(%KEYP), %KEYP
 727         cmp     $12, %NROUNDS
 728         jb      .Lenc128
 729         lea     0x20(%KEYP), %KEYP
 730         je      .Lenc192
 731 
 732         / AES 256
 733         lea     0x20(%KEYP), %KEYP
 734         movaps  -0x60(%KEYP), %KEY
 735         aesenc  %KEY, %STATE
 736         movaps  -0x50(%KEYP), %KEY
 737         aesenc  %KEY, %STATE
 738 
 739 .align 4
 740 .Lenc192:
 741         / AES 192 and 256
 742         movaps  -0x40(%KEYP), %KEY
 743         aesenc  %KEY, %STATE
 744         movaps  -0x30(%KEYP), %KEY
 745         aesenc  %KEY, %STATE
 746 
 747 .align 4
 748 .Lenc128:
 749         / AES 128, 192, and 256
 750         movaps  -0x20(%KEYP), %KEY
 751         aesenc  %KEY, %STATE
 752         movaps  -0x10(%KEYP), %KEY
 753         aesenc  %KEY, %STATE
 754         movaps  (%KEYP), %KEY
 755         aesenc  %KEY, %STATE
 756         movaps  0x10(%KEYP), %KEY
 757         aesenc  %KEY, %STATE
 758         movaps  0x20(%KEYP), %KEY
 759         aesenc  %KEY, %STATE
 760         movaps  0x30(%KEYP), %KEY
 761         aesenc  %KEY, %STATE
 762         movaps  0x40(%KEYP), %KEY
 763         aesenc  %KEY, %STATE
 764         movaps  0x50(%KEYP), %KEY
 765         aesenc  %KEY, %STATE
 766         movaps  0x60(%KEYP), %KEY
 767         aesenc  %KEY, %STATE
 768         movaps  0x70(%KEYP), %KEY
 769         aesenclast       %KEY, %STATE           / last round
 770         movups  %STATE, (%OUTP)                 / output
 771 
 772         SET_TS_OR_POP_XMM0_XMM1(%r10)
 773         /* EXPORT DELETE END */
 774         ret
 775         SET_SIZE(aes_encrypt_intel)
 776 
 777 
 778 /*
 779  * aes_decrypt_intel()
 780  * Decrypt a single block (in and out can overlap).
 781  *
 782  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 783  * has been called.  This is because %xmm registers are not saved/restored.
 784  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 785  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 786  * on the stack.
 787  *
 788  * Temporary register usage:
 789  * %xmm0        State
 790  * %xmm1        Key
 791  *
 792  * Original OpenSolaris Interface:
 793  * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
 794  *      const uint32_t pt[4], uint32_t ct[4])/
 795  *
 796  * Original Intel OpenSSL Interface:
 797  * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
 798  *      const AES_KEY *key);
 799  */
 800 ENTRY_NP(aes_decrypt_intel)
 801         /* EXPORT DELETE START */
 802         CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
 803 
 804         movups  (%INP), %STATE                  / input
 805         movaps  (%KEYP), %KEY                   / key
 806 #ifdef  OPENSSL_INTERFACE
 807         mov     240(%KEYP), %NROUNDS32          / round count
 808 #else   /* OpenSolaris Interface */
 809         /* Round count is already present as P2 in %rsi/%esi */
 810 #endif  /* OPENSSL_INTERFACE */
 811 
 812         pxor    %KEY, %STATE                    / round 0
 813         lea     0x30(%KEYP), %KEYP
 814         cmp     $12, %NROUNDS
 815         jb      .Ldec128
 816         lea     0x20(%KEYP), %KEYP
 817         je      .Ldec192
 818 
 819         / AES 256
 820         lea     0x20(%KEYP), %KEYP
 821         movaps  -0x60(%KEYP), %KEY
 822         aesdec  %KEY, %STATE
 823         movaps  -0x50(%KEYP), %KEY
 824         aesdec  %KEY, %STATE
 825 
 826 .align 4
 827 .Ldec192:
 828         / AES 192 and 256
 829         movaps  -0x40(%KEYP), %KEY
 830         aesdec  %KEY, %STATE
 831         movaps  -0x30(%KEYP), %KEY
 832         aesdec  %KEY, %STATE
 833 
 834 .align 4
 835 .Ldec128:
 836         / AES 128, 192, and 256
 837         movaps  -0x20(%KEYP), %KEY
 838         aesdec  %KEY, %STATE
 839         movaps  -0x10(%KEYP), %KEY
 840         aesdec  %KEY, %STATE
 841         movaps  (%KEYP), %KEY
 842         aesdec  %KEY, %STATE
 843         movaps  0x10(%KEYP), %KEY
 844         aesdec  %KEY, %STATE
 845         movaps  0x20(%KEYP), %KEY
 846         aesdec  %KEY, %STATE
 847         movaps  0x30(%KEYP), %KEY
 848         aesdec  %KEY, %STATE
 849         movaps  0x40(%KEYP), %KEY
 850         aesdec  %KEY, %STATE
 851         movaps  0x50(%KEYP), %KEY
 852         aesdec  %KEY, %STATE
 853         movaps  0x60(%KEYP), %KEY
 854         aesdec  %KEY, %STATE
 855         movaps  0x70(%KEYP), %KEY
 856         aesdeclast      %KEY, %STATE            / last round
 857         movups  %STATE, (%OUTP)                 / output
 858 
 859         SET_TS_OR_POP_XMM0_XMM1(%r10)
 860         ret
 861         /* EXPORT DELETE END */
 862         SET_SIZE(aes_decrypt_intel)
 863 
 864 #endif  /* lint || __lint */