
; Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
; All rights reserved.
;
; This software has been derived at the University of Michigan from
; software originally created by Dr. Brian Gladman as described below.
;
; TERMS
;
;  Redistribution and use in source and binary forms, with or without 
;  modification, are permitted subject to the following conditions:
;
;  1. Redistributions of source code must retain the above copyright 
;     notice, this list of conditions and the following disclaimer.
;
;  2. Redistributions in binary form must reproduce the above copyright
;     notice, this list of conditions and the following disclaimer in the 
;     documentation and/or other materials provided with the distribution.
;
;  3. The copyright holder's name and/or the University of Michigan
;     must not be used to endorse or promote 
;     any products derived from this software without their specific prior
;     written permission. 
;
;  This software is provided 'as is' with no express or implied warranties 
;  of correctness or fitness for purpose.

; An AES (Rijndael) implementation for the Pentium MMX family using the NASM
; assembler <http://www.web-sites.co.uk/nasm/>. This version only implements
; the standard AES block length (128 bits, 16 bytes) with the same interface
; as that used in my C/C++ implementation.   This code does not preserve the
; eax, ecx or edx registers or the artihmetic status flags. However, the ebx, 
; esi, edi, and ebp registers are preserved across calls.    If this code is 
; used with compiled code the compiler's register saving and use conventions 
; will need to be checked (it is consistent with Microsoft VC++).

; NOTE: This code uses a dual key schedule (set DUAL_KEY_SCHEDULE in aes.h).

    section .text use32

; short _aes_set_key(const byte key[], const word key_len, const enum mode f, aes_ctx *cx)
; short _aes_encrypt(const byte in_blk[], byte out_blk[], const aes_ctx *cx)
; short _aes_decrypt(const byte in_blk[], byte out_blk[], const aes_ctx *cx)

#define CONCAT1(a,b) CONCAT2(a,b)
#define CONCAT2(a,b) a ## b
#define SYMBOL(NAME) CONCAT1(__USER_LABEL_PREFIX__, NAME)

    global  SYMBOL(aes_set_key)
    global  SYMBOL(aes_encrypt)
    global  SYMBOL(aes_decrypt)
    
%define USE_MMX         ; use MMX registers for temporary storage
%define GLOBAL_TABLES   ; NOTE: If global tables are used then FOUR_TABLES
                        ; must be defined for generating them for use here
%ifdef GLOBAL_TABLES

    extern  _rcon_tab
    extern  _ft_tab
    extern  _fl_tab
    extern  _it_tab
    extern  _il_tab
    extern  _im_tab

%endif

tlen:   equ  1024   ; length of each of 4 'xor' arrays (256 32-bit words)

; offsets to parameters with one register pushed onto stack

in_blk: equ     8   ; input byte array address parameter
out_blk:equ    12   ; output byte array address parameter
ctx:    equ    16   ; AES context structure

; offsets in context structure

ekey:   equ     0   ; encryption key schedule base address
dkey:   equ   256   ; decryption key schedule base address
nkey:   equ   512   ; key length
nrnd:   equ   516   ; number of rounds
sflg:   equ   520   ; encryption/decryption flags

; register mapping for encrypt and decrypt subroutines

%define r0  eax
%define r1  ebx
%define r2  ecx
%define r3  edx
%define r4  esi
%define r5  edi
%define r6  ebp

%define eaxl  al
%define eaxh  ah
%define ebxl  bl
%define ebxh  bh
%define ecxl  cl
%define ecxh  ch
%define edxl  dl
%define edxh  dh

; This macro takes a 32-bit word representing a column and uses
; each of its four bytes to index into four tables of 256 32-bit
; words to obtain values that are then xored into the appropriate
; output registers r0, r1, r4 or r5.  

; Parameters:
;   %1  out_state[0]
;   %2  out_state[1]
;   %3  out_state[2]
;   %4  out_state[3]
;   %5  table base address
;   %6  input register for the round (destroyed)
;   %7  scratch register for the round

%macro do_col 7

    movzx   %7,%6l
    xor     %1,[4*%7+%5]
    movzx   %7,%6h
    shr     %6,16
    xor     %2,[4*%7+%5+tlen]
    movzx   %7,%6l
    movzx   %6,%6h
    xor     %3,[4*%7+%5+2*tlen] 
    xor     %4,[4*%6+%5+3*tlen]

%endmacro

; initialise output registers from the key schedule

%macro do_fcol 8

    mov     %1,[%8]
    movzx   %7,%6l
    mov     %2,[%8+12]
    xor     %1,[4*%7+%5]
    mov     %4,[%8+ 4]
    movzx   %7,%6h
    shr     %6,16
    xor     %2,[4*%7+%5+tlen]
    movzx   %7,%6l
    movzx   %6,%6h
    xor     %4,[4*%6+%5+3*tlen]
    mov     %6,%3
    mov     %3,[%8+ 8]
    xor     %3,[4*%7+%5+2*tlen] 

%endmacro

; initialise output registers from the key schedule

%macro do_icol 8

    mov     %1,[%8]
    movzx   %7,%6l
    mov     %2,[%8+ 4]
    xor     %1,[4*%7+%5]
    mov     %4,[%8+12]
    movzx   %7,%6h
    shr     %6,16
    xor     %2,[4*%7+%5+tlen]
    movzx   %7,%6l
    movzx   %6,%6h
    xor     %4,[4*%6+%5+3*tlen]
    mov     %6,%3
    mov     %3,[%8+ 8]
    xor     %3,[4*%7+%5+2*tlen] 

%endmacro

; These macros implement either MMX or stack based local variables

%ifdef  USE_MMX

%macro  save 2
    movd    mm%1,%2
%endmacro

%macro  restore 2
    movd    %1,mm%2
%endmacro

%else

%macro  save 2
    mov     [esp+4*%1],%2
%endmacro

%macro  restore 2
    mov     %1,[esp+4*%2]
%endmacro

%endif

; This macro performs a forward encryption cycle. It is entered with
; the first previous round column values in r0, r1, r4 and r5 and
; exits with the final values in the same registers, using the MMX
; registers mm0-mm1 for temporary storage

%macro fwd_rnd 1-2 _ft_tab

; mov current column values into the MMX registers

    mov     r2,r0
    save    0,r1
    save    1,r5

; compute new column values

    do_fcol r0,r5,r4,r1, %2, r2,r3, %1
    do_col  r4,r1,r0,r5, %2, r2,r3
    restore r2,0
    do_col  r1,r0,r5,r4, %2, r2,r3
    restore r2,1
    do_col  r5,r4,r1,r0, %2, r2,r3

%endmacro

; This macro performs an inverse encryption cycle. It is entered with
; the first previous round column values in r0, r1, r4 and r5 and
; exits with the final values in the same registers, using the MMX
; registers mm0-mm1 for temporary storage

%macro inv_rnd 1-2 _it_tab

; mov current column values into the MMX registers

    mov     r2,r0
    save    0,r1
    save    1,r5

; compute new column values

    do_icol r0,r1,r4,r5, %2, r2,r3, %1
    do_col  r4,r5,r0,r1, %2, r2,r3
    restore r2,0
    do_col  r1,r4,r5,r0, %2, r2,r3
    restore r2,1
    do_col  r5,r0,r1,r4, %2, r2,r3

%endmacro

; AES (Rijndael) Encryption Subroutine

SYMBOL(aes_encrypt):
    push    ebp
    mov     ebp,[esp+ctx]       ; pointer to context
    test    [ebp+sflg],byte 1   ; encryption/decryption flags
    jne     short .0
    xor     eax,eax             ; encryption key has not been set 
    pop     ebp
    ret            

; CAUTION: the order and the values used in these assigns 
; rely on the register mappings

.0: push    ebx
    mov     r2,[esp+in_blk+4]
    push    esi
    mov     r3,[ebp+nrnd]   ; number of rounds
    push    edi
    lea     r6,[ebp+ekey]   ; key pointer

; input four columns and xor in first round key

    mov     r0,[r2]
    mov     r1,[r2+4]
    mov     r4,[r2+8]
    mov     r5,[r2+12]
    xor     r0,[r6]
    xor     r1,[r6+4]
    xor     r4,[r6+8]
    xor     r5,[r6+12]

%ifndef USE_MMX
    sub     esp,8           ; space for register saves on stack
%endif
    add     r6,16           ; increment to next round key   
    sub     r3,10          
    je      .5              ; 10 rounds for 128-bit key
    add     r6,32  
    sub     r3,2
    je      .3              ; 12 rounds for 128-bit key
    add     r6,32  

.1: fwd_rnd r6-64           ; 14 rounds for 128-bit key
    fwd_rnd r6-48  
.3: fwd_rnd r6-32           ; 12 rounds for 128-bit key
    fwd_rnd r6-16  
.5: fwd_rnd r6              ; 10 rounds for 128-bit key
    fwd_rnd r6+ 16 
    fwd_rnd r6+ 32
    fwd_rnd r6+ 48
    fwd_rnd r6+ 64
    fwd_rnd r6+ 80
    fwd_rnd r6+ 96
    fwd_rnd r6+112
    fwd_rnd r6+128
    fwd_rnd r6+144,_fl_tab  ; last round uses a different table

; move final values to the output array.  CAUTION: the 
; order of these assigns rely on the register mappings

%ifndef USE_MMX
    add     esp,8
%endif
    mov     r6,[esp+out_blk+12]
    mov     [r6+12],r5
    pop     edi
    mov     [r6+8],r4
    pop     esi
    mov     [r6+4],r1
    pop     ebx
    mov     [r6],r0
    pop     ebp
    mov     eax,1
%ifdef  USE_MMX
    emms
%endif
    ret

; AES (Rijndael) Decryption Subroutine

SYMBOL(aes_decrypt):
    push    ebp
    mov     ebp,[esp+ctx]   ; pointer to context
    test    byte[ebp+sflg],2 ; encryption/decryption flags
    jne     short .0
    xor     eax,eax         ; decryption key has not been set 
    pop     ebp
    ret

; CAUTION: the order and the values used in these assigns 
; rely on the register mappings

.0: push    ebx
    mov     r2,[esp+in_blk+4]
    push    esi
    mov     r3,[ebp+nrnd]   ; number of rounds
    push    edi
    lea     r6,[ebp+dkey]   ; key pointer

; input four columns and xor in first round key

    mov     r0,[r2]
    mov     r1,[r2+4]
    mov     r4,[r2+8]
    mov     r5,[r2+12]
    xor     r0,[r6]
    xor     r1,[r6+4]
    xor     r4,[r6+8]
    xor     r5,[r6+12]

%ifndef USE_MMX
    sub     esp,8           ; space for register saves on stack
%endif
    add     r6,16           ; increment to next round key   
    sub     r3,10          
    je      .5              ; 10 rounds for 128-bit key
    add     r6,32  
    sub     r3,2
    je      .3              ; 12 rounds for 128-bit key
    add     r6,32  

.1: inv_rnd r6-64           ; 14 rounds for 128-bit key 
    inv_rnd r6-48  
.3: inv_rnd r6-32           ; 12 rounds for 128-bit key
    inv_rnd r6-16  
.5: inv_rnd r6              ; 10 rounds for 128-bit key
    inv_rnd r6+ 16 
    inv_rnd r6+ 32
    inv_rnd r6+ 48
    inv_rnd r6+ 64
    inv_rnd r6+ 80
    inv_rnd r6+ 96
    inv_rnd r6+112
    inv_rnd r6+128
    inv_rnd r6+144,_il_tab  ; last round uses a different table

; move final values to the output array.  CAUTION: the 
; order of these assigns rely on the register mappings

%ifndef USE_MMX
    add     esp,8
%endif
    mov     r6,[esp+out_blk+12]
    mov     [r6+12],r5
    pop     edi
    mov     [r6+8],r4
    pop     esi
    mov     [r6+4],r1
    pop     ebx
    mov     [r6],r0
    pop     ebp
    mov     eax,1
%ifdef  USE_MMX
    emms
%endif
    ret

; AES (Rijndael) Key Schedule Subroutine

; input/output parameters

in_key: equ  12     ; key input array address
key_ln: equ  16     ; key length
ed_flg: equ  20     ; encryption/decryption flag
aes_cx: equ  24     ; AES context

; offsets for locals

cnt:    equ  -4
kpf:    equ  -8
slen:   equ   8

; This macro performs a column mixing operation on an input 32-bit
; word to give a 32-bit result. It uses each of the 4 bytes in the
; the input column to index 4 different tables of 256 32-bit words 
; that are xored together to form the output value.

%macro mix_col 2
    movzx   ecx,%1l
    mov     eax,[ecx*4+%2]
    movzx   ecx,%1h
    ror     %1,16
    xor     eax,[ecx*4+%2+tlen]
    movzx   ecx,%1l
    xor     eax,[ecx*4+%2+2*tlen]
    movzx   ecx,%1h
    xor     eax,[ecx*4+%2+3*tlen]
%endmacro

; Key Schedule Macros

%macro ksc4 2
    rol     ebx,24
    mix_col ebx,_fl_tab
    ror     ebx,8
    xor     eax,[4*%1+_rcon_tab]
    xor     esi,eax
    xor     ebp,esi
    mov     [edi+16*%1],esi
    mov     [edi+16*%1+4],ebp
    xor     edx,ebp
    xor     ebx,edx
    mov     [edi+16*%1+8],edx
    mov     [edi+16*%1+12],ebx
%endmacro

%macro ksc6 2
    rol     ebx,24
    mix_col ebx,_fl_tab
    ror     ebx,8
    xor     eax,[4*%1+_rcon_tab]
    xor     eax,[edi+24*%1-24]
    mov     [edi+24*%1],eax
    xor     eax,[edi+24*%1-20]
    mov     [edi+24*%1+4],eax
    xor     esi,eax
    xor     ebp,esi
    mov     [edi+24*%1+8],esi
    mov     [edi+24*%1+12],ebp
    xor     edx,ebp
    xor     ebx,edx
    mov     [edi+24*%1+16],edx
    mov     [edi+24*%1+20],ebx
%endmacro

%macro ksc8 2
    rol     ebx,24
    mix_col ebx,_fl_tab
    ror     ebx,8
    xor     eax,[4*%1+_rcon_tab]
    xor     eax,[edi+32*%1-32]
    mov     [edi+32*%1],eax
    xor     eax,[edi+32*%1-28]
    mov     [edi+32*%1+4],eax
    xor     eax,[edi+32*%1-24]
    mov     [edi+32*%1+8],eax
    xor     eax,[edi+32*%1-20]
    mov     [edi+32*%1+12],eax
    push    ebx
    mov     ebx,eax
    mix_col ebx,_fl_tab
    pop     ebx
    xor     esi,eax
    xor     ebp,esi
    mov     [edi+32*%1+16],esi
    mov     [edi+32*%1+20],ebp
    xor     edx,ebp
    xor     ebx,edx
    mov     [edi+32*%1+24],edx
    mov     [edi+32*%1+28],ebx
%endmacro

SYMBOL(aes_set_key):
    pushfd
    push    ebp
    mov     ebp,esp
    sub     esp,slen
    mov     edx,[ebp+aes_cx]        ; edx -> AES context
    mov     eax,[ebp+key_ln]        ; key length must be a multiple of 8 bytes
    and     eax,7
    test    eax,eax
    jne     .1
    cmp     [ebp+key_ln],dword 16   ; and 16 bytes or more bytes but
    jb      short .1
    cmp     [ebp+key_ln],dword 32   ; less than 32 bytes
    ja      .1
    mov     ecx,[ebp+ed_flg]        ; encryption/decryption flag
    and     ecx,3
    jne     .4                      ; if one or both enc and dec set
.1: cmp     [ebp+key_ln],dword 0    ; if key lenth is 0
    je      .2
    and     [edx+sflg],byte 0xfc    ; bad key length, reset enc/dec
    xor     eax,eax                 ; flag to zero
    jmp     .3
.2: mov     eax,[edx+nkey]          ; return key length in bytes
    shl     eax,2
.3: mov     esp,ebp
    pop     ebp
    popfd
    ret
.4:
    push    ebx                     ; edx -> AES context
    push    esi
    push    edi
    and     [edx+sflg],byte 0xfc    ; set up flag in AES context to
    or      [edx+sflg],cl           ; match the input enc/dec flag
    mov     ecx,[ebp+key_ln]        ; store key length in 32-bit words
    shr     ecx,2                   ; in AES context
    mov     [edx+nkey],ecx

    mov     eax,4                   ; calculate number of rounds
    cmp     [edx+nkey],eax          ; block length <= 4 32-bit words
    jbe     .5
    mov     eax,[edx+nkey]
.5: add     eax,6                   ; 10/12/14 for 4/6/8 32-bit key length
    mov     [edx+nrnd],eax

    mov     esi,[ebp+in_key]    ; key input array
    lea     edi,[edx+ekey]      ; key position in AES context
    cld
    push    ebp
    mov     eax,ecx             ; save key length in eax
    rep     movsd               ; words in the key schedule
    mov     ebx,[esi-4]         ; put some values in registers
    mov     edx,[esi-8]         ; to allow faster code
    mov     ebp,[esi-12]
    mov     esi,[esi-16]

    cmp     eax,dword 4         ; jump on key size
    je      near .7
    cmp     eax,dword 6
    je      near .6

    ksc8    0,8
    ksc8    1,8
    ksc8    2,8
    ksc8    3,8
    ksc8    4,8
    ksc8    5,8
    ksc8    6,8
    jmp     near .8
.6:
    ksc6    0,6
    ksc6    1,6
    ksc6    2,6
    ksc6    3,6
    ksc6    4,6
    ksc6    5,6
    ksc6    6,6
    ksc6    7,6
    jmp     near .8
.7:
    ksc4    0,4     ; 16 byte key
    ksc4    1,4
    ksc4    2,4
    ksc4    3,4
    ksc4    4,4
    ksc4    5,4
    ksc4    6,4
    ksc4    7,4
    ksc4    8,4
    ksc4    9,4
.8:
    pop     ebp
    mov     edx,[ebp+aes_cx]    ; edx -> AES context
    test    [edx+sflg],byte 2   ; exit if decryption key is not needed
    je      .10

; compile decryption key schedule from encryption schedule - reverse
; order and do mix_column operation on round keys except first and last

    mov     eax,[edx+nrnd]  ; kt = cx->d_key + nc * cx->Nrnd
    shl     eax,2
    lea     edi,[eax*4+edx+dkey]
    lea     esi,[edx+ekey]  ; kf = cx->e_key

    movsd                   ; copy first round key (unmodified)
    movsd
    movsd
    movsd
    sub     edi,32
    mov     [ebp+cnt],dword 1
.9:                         ; do mix column on each column of 
    lodsd                   ; each round key
    mov     ebx,eax
    mix_col ebx,_im_tab
    stosd
    lodsd
    mov     ebx,eax
    mix_col ebx,_im_tab
    stosd
    lodsd
    mov     ebx,eax
    mix_col ebx,_im_tab
    stosd
    lodsd
    mov     ebx,eax
    mix_col ebx,_im_tab
    stosd
    sub     edi,32

    inc     dword[ebp+cnt]
    mov     eax,[ebp+cnt]
    cmp     eax,[edx+nrnd]
    jb      .9

    movsd                   ; copy last round key (unmodified)
    movsd
    movsd
    movsd
.10:
    pop     edi
    pop     esi
    pop     ebx
    mov     esp,ebp
    pop     ebp
    mov     eax,1
    popfd
    ret

    section .data

%ifndef GLOBAL_TABLES

; finite field multiplies by {02}, {04} and {08}

%define f2(x)   ((x<<1)^(((x>>7)&1)*0x11b))
%define f4(x)   ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
%define f8(x)   ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))

; finite field multiplies required in table generation

%define f3(x)   (f2(x) ^ x)
%define f9(x)   (f8(x) ^ x)
%define fb(x)   (f8(x) ^ f2(x) ^ x)
%define fd(x)   (f8(x) ^ f4(x) ^ x)
%define fe(x)   (f8(x) ^ f4(x) ^ f2(x))

; These defines generate the forward table entries

%define u0(x)   ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
%define u1(x)   ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
%define u2(x)   ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
%define u3(x)   ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)

; These defines generate the inverse table entries

%define v0(x)   ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
%define v1(x)   ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
%define v2(x)   ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
%define v3(x)   ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))

; These defines generate entries for the last round tables

%define w0(x)   (x)
%define w1(x)   (x <<  8)
%define w2(x)   (x << 16)
%define w3(x)   (x << 24)

; macro to generate inverse mix column tables (needed for the key schedule)

%macro im_data 2
%assign x   0
%rep 256
    %1  %2(x)
%assign x x+1
%endrep
%endmacro

; The rcon_table (needed for the key schedule)

_rcon_tab:
%assign x   1
%rep 29
    dd  x
%assign x f2(x)
%endrep

; S-box data - 256 entries

%macro sb_data 2
    %1    %2(0x63),%2(0x7c),%2(0x77),%2(0x7b),%2(0xf2),%2(0x6b),%2(0x6f),%2(0xc5)
    %1    %2(0x30),%2(0x01),%2(0x67),%2(0x2b),%2(0xfe),%2(0xd7),%2(0xab),%2(0x76)
    %1    %2(0xca),%2(0x82),%2(0xc9),%2(0x7d),%2(0xfa),%2(0x59),%2(0x47),%2(0xf0)
    %1    %2(0xad),%2(0xd4),%2(0xa2),%2(0xaf),%2(0x9c),%2(0xa4),%2(0x72),%2(0xc0)
    %1    %2(0xb7),%2(0xfd),%2(0x93),%2(0x26),%2(0x36),%2(0x3f),%2(0xf7),%2(0xcc)
    %1    %2(0x34),%2(0xa5),%2(0xe5),%2(0xf1),%2(0x71),%2(0xd8),%2(0x31),%2(0x15)
    %1    %2(0x04),%2(0xc7),%2(0x23),%2(0xc3),%2(0x18),%2(0x96),%2(0x05),%2(0x9a)
    %1    %2(0x07),%2(0x12),%2(0x80),%2(0xe2),%2(0xeb),%2(0x27),%2(0xb2),%2(0x75)
    %1    %2(0x09),%2(0x83),%2(0x2c),%2(0x1a),%2(0x1b),%2(0x6e),%2(0x5a),%2(0xa0)
    %1    %2(0x52),%2(0x3b),%2(0xd6),%2(0xb3),%2(0x29),%2(0xe3),%2(0x2f),%2(0x84)
    %1    %2(0x53),%2(0xd1),%2(0x00),%2(0xed),%2(0x20),%2(0xfc),%2(0xb1),%2(0x5b)
    %1    %2(0x6a),%2(0xcb),%2(0xbe),%2(0x39),%2(0x4a),%2(0x4c),%2(0x58),%2(0xcf)
    %1    %2(0xd0),%2(0xef),%2(0xaa),%2(0xfb),%2(0x43),%2(0x4d),%2(0x33),%2(0x85)
    %1    %2(0x45),%2(0xf9),%2(0x02),%2(0x7f),%2(0x50),%2(0x3c),%2(0x9f),%2(0xa8)
    %1    %2(0x51),%2(0xa3),%2(0x40),%2(0x8f),%2(0x92),%2(0x9d),%2(0x38),%2(0xf5)
    %1    %2(0xbc),%2(0xb6),%2(0xda),%2(0x21),%2(0x10),%2(0xff),%2(0xf3),%2(0xd2)
    %1    %2(0xcd),%2(0x0c),%2(0x13),%2(0xec),%2(0x5f),%2(0x97),%2(0x44),%2(0x17)
    %1    %2(0xc4),%2(0xa7),%2(0x7e),%2(0x3d),%2(0x64),%2(0x5d),%2(0x19),%2(0x73)
    %1    %2(0x60),%2(0x81),%2(0x4f),%2(0xdc),%2(0x22),%2(0x2a),%2(0x90),%2(0x88)
    %1    %2(0x46),%2(0xee),%2(0xb8),%2(0x14),%2(0xde),%2(0x5e),%2(0x0b),%2(0xdb)
    %1    %2(0xe0),%2(0x32),%2(0x3a),%2(0x0a),%2(0x49),%2(0x06),%2(0x24),%2(0x5c)
    %1    %2(0xc2),%2(0xd3),%2(0xac),%2(0x62),%2(0x91),%2(0x95),%2(0xe4),%2(0x79)
    %1    %2(0xe7),%2(0xc8),%2(0x37),%2(0x6d),%2(0x8d),%2(0xd5),%2(0x4e),%2(0xa9)
    %1    %2(0x6c),%2(0x56),%2(0xf4),%2(0xea),%2(0x65),%2(0x7a),%2(0xae),%2(0x08)
    %1    %2(0xba),%2(0x78),%2(0x25),%2(0x2e),%2(0x1c),%2(0xa6),%2(0xb4),%2(0xc6)
    %1    %2(0xe8),%2(0x%1),%2(0x74),%2(0x1f),%2(0x4b),%2(0xbd),%2(0x8b),%2(0x8a)
    %1    %2(0x70),%2(0x3e),%2(0xb5),%2(0x66),%2(0x48),%2(0x03),%2(0xf6),%2(0x0e)
    %1    %2(0x61),%2(0x35),%2(0x57),%2(0xb9),%2(0x86),%2(0xc1),%2(0x1d),%2(0x9e)
    %1    %2(0xe1),%2(0xf8),%2(0x98),%2(0x11),%2(0x69),%2(0xd9),%2(0x8e),%2(0x94)
    %1    %2(0x9b),%2(0x1e),%2(0x87),%2(0xe9),%2(0xce),%2(0x55),%2(0x28),%2(0xdf)
    %1    %2(0x8c),%2(0xa1),%2(0x89),%2(0x0d),%2(0xbf),%2(0xe6),%2(0x42),%2(0x68)
    %1    %2(0x41),%2(0x99),%2(0x2d),%2(0x0f),%2(0xb0),%2(0x54),%2(0xbb),%2(0x16)
%endmacro

; Inverse S-box data - 256 entries

%macro ib_data 2
    %1    %2(0x52),%2(0x09),%2(0x6a),%2(0xd5),%2(0x30),%2(0x36),%2(0xa5),%2(0x38)
    %1    %2(0xbf),%2(0x40),%2(0xa3),%2(0x9e),%2(0x81),%2(0xf3),%2(0xd7),%2(0xfb)
    %1    %2(0x7c),%2(0xe3),%2(0x39),%2(0x82),%2(0x9b),%2(0x2f),%2(0xff),%2(0x87)
    %1    %2(0x34),%2(0x8e),%2(0x43),%2(0x44),%2(0xc4),%2(0xde),%2(0xe9),%2(0xcb)
    %1    %2(0x54),%2(0x7b),%2(0x94),%2(0x32),%2(0xa6),%2(0xc2),%2(0x23),%2(0x3d)
    %1    %2(0xee),%2(0x4c),%2(0x95),%2(0x0b),%2(0x42),%2(0xfa),%2(0xc3),%2(0x4e)
    %1    %2(0x08),%2(0x2e),%2(0xa1),%2(0x66),%2(0x28),%2(0xd9),%2(0x24),%2(0xb2)
    %1    %2(0x76),%2(0x5b),%2(0xa2),%2(0x49),%2(0x6d),%2(0x8b),%2(0xd1),%2(0x25)
    %1    %2(0x72),%2(0xf8),%2(0xf6),%2(0x64),%2(0x86),%2(0x68),%2(0x98),%2(0x16)
    %1    %2(0xd4),%2(0xa4),%2(0x5c),%2(0xcc),%2(0x5d),%2(0x65),%2(0xb6),%2(0x92)
    %1    %2(0x6c),%2(0x70),%2(0x48),%2(0x50),%2(0xfd),%2(0xed),%2(0xb9),%2(0xda)
    %1    %2(0x5e),%2(0x15),%2(0x46),%2(0x57),%2(0xa7),%2(0x8d),%2(0x9d),%2(0x84)
    %1    %2(0x90),%2(0xd8),%2(0xab),%2(0x00),%2(0x8c),%2(0xbc),%2(0xd3),%2(0x0a)
    %1    %2(0xf7),%2(0xe4),%2(0x58),%2(0x05),%2(0xb8),%2(0xb3),%2(0x45),%2(0x06)
    %1    %2(0xd0),%2(0x2c),%2(0x1e),%2(0x8f),%2(0xca),%2(0x3f),%2(0x0f),%2(0x02)
    %1    %2(0xc1),%2(0xaf),%2(0xbd),%2(0x03),%2(0x01),%2(0x13),%2(0x8a),%2(0x6b)
    %1    %2(0x3a),%2(0x91),%2(0x11),%2(0x41),%2(0x4f),%2(0x67),%2(0xdc),%2(0xea)
    %1    %2(0x97),%2(0xf2),%2(0xcf),%2(0xce),%2(0xf0),%2(0xb4),%2(0xe6),%2(0x73)
    %1    %2(0x96),%2(0xac),%2(0x74),%2(0x22),%2(0xe7),%2(0xad),%2(0x35),%2(0x85)
    %1    %2(0xe2),%2(0xf9),%2(0x37),%2(0xe8),%2(0x1c),%2(0x75),%2(0xdf),%2(0x6e)
    %1    %2(0x47),%2(0xf1),%2(0x1a),%2(0x71),%2(0x1d),%2(0x29),%2(0xc5),%2(0x89)
    %1    %2(0x6f),%2(0xb7),%2(0x62),%2(0x0e),%2(0xaa),%2(0x18),%2(0xbe),%2(0x1b)
    %1    %2(0xfc),%2(0x56),%2(0x3e),%2(0x4b),%2(0xc6),%2(0xd2),%2(0x79),%2(0x20)
    %1    %2(0x9a),%2(0xdb),%2(0xc0),%2(0xfe),%2(0x78),%2(0xcd),%2(0x5a),%2(0xf4)
    %1    %2(0x1f),%2(0xdd),%2(0xa8),%2(0x33),%2(0x88),%2(0x07),%2(0xc7),%2(0x31)
    %1    %2(0xb1),%2(0x12),%2(0x10),%2(0x59),%2(0x27),%2(0x80),%2(0xec),%2(0x5f)
    %1    %2(0x60),%2(0x51),%2(0x7f),%2(0xa9),%2(0x19),%2(0xb5),%2(0x4a),%2(0x0d)
    %1    %2(0x2d),%2(0xe5),%2(0x7a),%2(0x9f),%2(0x93),%2(0xc9),%2(0x9c),%2(0xef)
    %1    %2(0xa0),%2(0xe0),%2(0x3b),%2(0x4d),%2(0xae),%2(0x2a),%2(0xf5),%2(0xb0)
    %1    %2(0xc8),%2(0xeb),%2(0xbb),%2(0x3c),%2(0x83),%2(0x53),%2(0x99),%2(0x61)
    %1    %2(0x17),%2(0x2b),%2(0x04),%2(0x7e),%2(0xba),%2(0x77),%2(0xd6),%2(0x26)
    %1    %2(0xe1),%2(0x69),%2(0x14),%2(0x63),%2(0x55),%2(0x21),%2(0x0c),%2(0x7d)
%endmacro

; The forward xor tables

    align   32

_ft_tab:
    sb_data dd,u0
    sb_data dd,u1
    sb_data dd,u2
    sb_data dd,u3

_fl_tab:
    sb_data dd,w0
    sb_data dd,w1
    sb_data dd,w2
    sb_data dd,w3

; The inverse xor tables

_it_tab:
    ib_data dd,v0
    ib_data dd,v1
    ib_data dd,v2
    ib_data dd,v3

_il_tab:
    ib_data dd,w0
    ib_data dd,w1
    ib_data dd,w2
    ib_data dd,w3

; The inverse mix column tables

_im_tab:
    im_data dd,v0
    im_data dd,v1
    im_data dd,v2
    im_data dd,v3

%endif

    end
