
; Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
; All rights reserved.
;
; This software has been derived at the University of Michigan from
; software originally created by Dr. Brian Gladman as described below.
;   
; TERMS
;
;  Redistribution and use in source and binary forms, with or without 
;  modification, are permitted subject to the following conditions:
;
;  1. Redistributions of source code must retain the above copyright 
;     notice, this list of conditions and the following disclaimer.
;
;  2. Redistributions in binary form must reproduce the above copyright
;     notice, this list of conditions and the following disclaimer in the 
;     documentation and/or other materials provided with the distribution.
;
;  3. The copyright holder's name and/or the University of Michigan
;     must not be used to endorse or promote 
;     any products derived from this software without their specific prior
;     written permission. 
;
;  This software is provided 'as is' with no express or implied warranties 
;  of correctness or fitness for purpose.

; An AES (Rijndael) implementation for the Pentium MMX family using the NASM
; assembler <http://www.web-sites.co.uk/nasm/>.  This version implements the
; the standard AES block length (16 bytes, 128 bits) with the same interface
; as that used in my C/C++ implementation.   This code does not preserve the
; eax, ecx or edx registers or the artihmetic status flags. However, the ebx, 
; esi, edi, and ebp registers are preserved across calls.    If this code is 
; used with compiled code the compiler's register saving and use conventions 
; will need to be checked (it is consistent with Microsoft VC++).

    section .text use32

; short _aes_set_key(const byte key[], const word key_len, const enum mode f, aes_ctx *cx)
; short _aes_encrypt(const byte in_blk[], byte out_blk[], const aes_ctx *cx)
; short _aes_decrypt(const byte in_blk[], byte out_blk[], const aes_ctx *cx)

    global  _rcon_tab
    global  _ft_tab
    global  _fl_tab
    global  _it_tab
    global  _il_tab
    global  _im_tab

%define FOUR_TABLES

    section .data

; finite field multiplies by {02}, {04} and {08}

%define f2(x)   ((x<<1)^(((x>>7)&1)*0x11b))
%define f4(x)   ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
%define f8(x)   ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))

; finite field multiplies required in table generation

%define f3(x)   (f2(x) ^ x)
%define f9(x)   (f8(x) ^ x)
%define fb(x)   (f8(x) ^ f2(x) ^ x)
%define fd(x)   (f8(x) ^ f4(x) ^ x)
%define fe(x)   (f8(x) ^ f4(x) ^ f2(x))

; These defines generate the forward table entries

%define u0(x)   ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
%define u1(x)   ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
%define u2(x)   ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
%define u3(x)   ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)

; These defines generate the inverse table entries

%define v0(x)   ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
%define v1(x)   ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
%define v2(x)   ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
%define v3(x)   ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))

; These defines generate entries for the last round tables

%define w0(x)   (x)
%define w1(x)   (x <<  8)
%define w2(x)   (x << 16)
%define w3(x)   (x << 24)

; macro to generate inverse mix column tables (needed for the key schedule)

%macro im_data 2
%assign x   0
%rep 256
    %1  %2(x)
%assign x x+1
%endrep
%endmacro

; The rcon_table (needed for the key schedule)

_rcon_tab:
%assign x   1
%rep 29
    dd  x
%assign x f2(x)
%endrep

; S-box data - 256 entries

%macro sb_data 2
    %1    %2(0x63),%2(0x7c),%2(0x77),%2(0x7b),%2(0xf2),%2(0x6b),%2(0x6f),%2(0xc5)
    %1    %2(0x30),%2(0x01),%2(0x67),%2(0x2b),%2(0xfe),%2(0xd7),%2(0xab),%2(0x76)
    %1    %2(0xca),%2(0x82),%2(0xc9),%2(0x7d),%2(0xfa),%2(0x59),%2(0x47),%2(0xf0)
    %1    %2(0xad),%2(0xd4),%2(0xa2),%2(0xaf),%2(0x9c),%2(0xa4),%2(0x72),%2(0xc0)
    %1    %2(0xb7),%2(0xfd),%2(0x93),%2(0x26),%2(0x36),%2(0x3f),%2(0xf7),%2(0xcc)
    %1    %2(0x34),%2(0xa5),%2(0xe5),%2(0xf1),%2(0x71),%2(0xd8),%2(0x31),%2(0x15)
    %1    %2(0x04),%2(0xc7),%2(0x23),%2(0xc3),%2(0x18),%2(0x96),%2(0x05),%2(0x9a)
    %1    %2(0x07),%2(0x12),%2(0x80),%2(0xe2),%2(0xeb),%2(0x27),%2(0xb2),%2(0x75)
    %1    %2(0x09),%2(0x83),%2(0x2c),%2(0x1a),%2(0x1b),%2(0x6e),%2(0x5a),%2(0xa0)
    %1    %2(0x52),%2(0x3b),%2(0xd6),%2(0xb3),%2(0x29),%2(0xe3),%2(0x2f),%2(0x84)
    %1    %2(0x53),%2(0xd1),%2(0x00),%2(0xed),%2(0x20),%2(0xfc),%2(0xb1),%2(0x5b)
    %1    %2(0x6a),%2(0xcb),%2(0xbe),%2(0x39),%2(0x4a),%2(0x4c),%2(0x58),%2(0xcf)
    %1    %2(0xd0),%2(0xef),%2(0xaa),%2(0xfb),%2(0x43),%2(0x4d),%2(0x33),%2(0x85)
    %1    %2(0x45),%2(0xf9),%2(0x02),%2(0x7f),%2(0x50),%2(0x3c),%2(0x9f),%2(0xa8)
    %1    %2(0x51),%2(0xa3),%2(0x40),%2(0x8f),%2(0x92),%2(0x9d),%2(0x38),%2(0xf5)
    %1    %2(0xbc),%2(0xb6),%2(0xda),%2(0x21),%2(0x10),%2(0xff),%2(0xf3),%2(0xd2)
    %1    %2(0xcd),%2(0x0c),%2(0x13),%2(0xec),%2(0x5f),%2(0x97),%2(0x44),%2(0x17)
    %1    %2(0xc4),%2(0xa7),%2(0x7e),%2(0x3d),%2(0x64),%2(0x5d),%2(0x19),%2(0x73)
    %1    %2(0x60),%2(0x81),%2(0x4f),%2(0xdc),%2(0x22),%2(0x2a),%2(0x90),%2(0x88)
    %1    %2(0x46),%2(0xee),%2(0xb8),%2(0x14),%2(0xde),%2(0x5e),%2(0x0b),%2(0xdb)
    %1    %2(0xe0),%2(0x32),%2(0x3a),%2(0x0a),%2(0x49),%2(0x06),%2(0x24),%2(0x5c)
    %1    %2(0xc2),%2(0xd3),%2(0xac),%2(0x62),%2(0x91),%2(0x95),%2(0xe4),%2(0x79)
    %1    %2(0xe7),%2(0xc8),%2(0x37),%2(0x6d),%2(0x8d),%2(0xd5),%2(0x4e),%2(0xa9)
    %1    %2(0x6c),%2(0x56),%2(0xf4),%2(0xea),%2(0x65),%2(0x7a),%2(0xae),%2(0x08)
    %1    %2(0xba),%2(0x78),%2(0x25),%2(0x2e),%2(0x1c),%2(0xa6),%2(0xb4),%2(0xc6)
    %1    %2(0xe8),%2(0x%1),%2(0x74),%2(0x1f),%2(0x4b),%2(0xbd),%2(0x8b),%2(0x8a)
    %1    %2(0x70),%2(0x3e),%2(0xb5),%2(0x66),%2(0x48),%2(0x03),%2(0xf6),%2(0x0e)
    %1    %2(0x61),%2(0x35),%2(0x57),%2(0xb9),%2(0x86),%2(0xc1),%2(0x1d),%2(0x9e)
    %1    %2(0xe1),%2(0xf8),%2(0x98),%2(0x11),%2(0x69),%2(0xd9),%2(0x8e),%2(0x94)
    %1    %2(0x9b),%2(0x1e),%2(0x87),%2(0xe9),%2(0xce),%2(0x55),%2(0x28),%2(0xdf)
    %1    %2(0x8c),%2(0xa1),%2(0x89),%2(0x0d),%2(0xbf),%2(0xe6),%2(0x42),%2(0x68)
    %1    %2(0x41),%2(0x99),%2(0x2d),%2(0x0f),%2(0xb0),%2(0x54),%2(0xbb),%2(0x16)
%endmacro

; Inverse S-box data - 256 entries

%macro ib_data 2
    %1    %2(0x52),%2(0x09),%2(0x6a),%2(0xd5),%2(0x30),%2(0x36),%2(0xa5),%2(0x38)
    %1    %2(0xbf),%2(0x40),%2(0xa3),%2(0x9e),%2(0x81),%2(0xf3),%2(0xd7),%2(0xfb)
    %1    %2(0x7c),%2(0xe3),%2(0x39),%2(0x82),%2(0x9b),%2(0x2f),%2(0xff),%2(0x87)
    %1    %2(0x34),%2(0x8e),%2(0x43),%2(0x44),%2(0xc4),%2(0xde),%2(0xe9),%2(0xcb)
    %1    %2(0x54),%2(0x7b),%2(0x94),%2(0x32),%2(0xa6),%2(0xc2),%2(0x23),%2(0x3d)
    %1    %2(0xee),%2(0x4c),%2(0x95),%2(0x0b),%2(0x42),%2(0xfa),%2(0xc3),%2(0x4e)
    %1    %2(0x08),%2(0x2e),%2(0xa1),%2(0x66),%2(0x28),%2(0xd9),%2(0x24),%2(0xb2)
    %1    %2(0x76),%2(0x5b),%2(0xa2),%2(0x49),%2(0x6d),%2(0x8b),%2(0xd1),%2(0x25)
    %1    %2(0x72),%2(0xf8),%2(0xf6),%2(0x64),%2(0x86),%2(0x68),%2(0x98),%2(0x16)
    %1    %2(0xd4),%2(0xa4),%2(0x5c),%2(0xcc),%2(0x5d),%2(0x65),%2(0xb6),%2(0x92)
    %1    %2(0x6c),%2(0x70),%2(0x48),%2(0x50),%2(0xfd),%2(0xed),%2(0xb9),%2(0xda)
    %1    %2(0x5e),%2(0x15),%2(0x46),%2(0x57),%2(0xa7),%2(0x8d),%2(0x9d),%2(0x84)
    %1    %2(0x90),%2(0xd8),%2(0xab),%2(0x00),%2(0x8c),%2(0xbc),%2(0xd3),%2(0x0a)
    %1    %2(0xf7),%2(0xe4),%2(0x58),%2(0x05),%2(0xb8),%2(0xb3),%2(0x45),%2(0x06)
    %1    %2(0xd0),%2(0x2c),%2(0x1e),%2(0x8f),%2(0xca),%2(0x3f),%2(0x0f),%2(0x02)
    %1    %2(0xc1),%2(0xaf),%2(0xbd),%2(0x03),%2(0x01),%2(0x13),%2(0x8a),%2(0x6b)
    %1    %2(0x3a),%2(0x91),%2(0x11),%2(0x41),%2(0x4f),%2(0x67),%2(0xdc),%2(0xea)
    %1    %2(0x97),%2(0xf2),%2(0xcf),%2(0xce),%2(0xf0),%2(0xb4),%2(0xe6),%2(0x73)
    %1    %2(0x96),%2(0xac),%2(0x74),%2(0x22),%2(0xe7),%2(0xad),%2(0x35),%2(0x85)
    %1    %2(0xe2),%2(0xf9),%2(0x37),%2(0xe8),%2(0x1c),%2(0x75),%2(0xdf),%2(0x6e)
    %1    %2(0x47),%2(0xf1),%2(0x1a),%2(0x71),%2(0x1d),%2(0x29),%2(0xc5),%2(0x89)
    %1    %2(0x6f),%2(0xb7),%2(0x62),%2(0x0e),%2(0xaa),%2(0x18),%2(0xbe),%2(0x1b)
    %1    %2(0xfc),%2(0x56),%2(0x3e),%2(0x4b),%2(0xc6),%2(0xd2),%2(0x79),%2(0x20)
    %1    %2(0x9a),%2(0xdb),%2(0xc0),%2(0xfe),%2(0x78),%2(0xcd),%2(0x5a),%2(0xf4)
    %1    %2(0x1f),%2(0xdd),%2(0xa8),%2(0x33),%2(0x88),%2(0x07),%2(0xc7),%2(0x31)
    %1    %2(0xb1),%2(0x12),%2(0x10),%2(0x59),%2(0x27),%2(0x80),%2(0xec),%2(0x5f)
    %1    %2(0x60),%2(0x51),%2(0x7f),%2(0xa9),%2(0x19),%2(0xb5),%2(0x4a),%2(0x0d)
    %1    %2(0x2d),%2(0xe5),%2(0x7a),%2(0x9f),%2(0x93),%2(0xc9),%2(0x9c),%2(0xef)
    %1    %2(0xa0),%2(0xe0),%2(0x3b),%2(0x4d),%2(0xae),%2(0x2a),%2(0xf5),%2(0xb0)
    %1    %2(0xc8),%2(0xeb),%2(0xbb),%2(0x3c),%2(0x83),%2(0x53),%2(0x99),%2(0x61)
    %1    %2(0x17),%2(0x2b),%2(0x04),%2(0x7e),%2(0xba),%2(0x77),%2(0xd6),%2(0x26)
    %1    %2(0xe1),%2(0x69),%2(0x14),%2(0x63),%2(0x55),%2(0x21),%2(0x0c),%2(0x7d)
%endmacro

; The forward xor tables

    align   32

_ft_tab:
    sb_data dd,u0
%ifdef FOUR_TABLES
    sb_data dd,u1
    sb_data dd,u2
    sb_data dd,u3
%endif

_fl_tab:
    sb_data dd,w0
%ifdef FOUR_TABLES
    sb_data dd,w1
    sb_data dd,w2
    sb_data dd,w3
%endif

; The inverse xor tables

_it_tab:
    ib_data dd,v0
%ifdef FOUR_TABLES
    ib_data dd,v1
    ib_data dd,v2
    ib_data dd,v3
%endif

_il_tab:
    ib_data dd,w0
%ifdef FOUR_TABLES
    ib_data dd,w1
    ib_data dd,w2
    ib_data dd,w3
%endif

; The inverse mix column tables

_im_tab:
    im_data dd,v0
%ifdef FOUR_TABLES
    im_data dd,v1
    im_data dd,v2
    im_data dd,v3
%endif

    end
