Chaskey-LTS Block Cipher (original) (raw)
Introduction
Chaskey is a 128-bit block cipher with support for 128-bit keys. It was designed by Nicky Mouha, Bart Mennink, Anthony Van Herrewege, Dai Watanabe, Bart Preneel and Ingrid Verbauwhede. The main permutation is derived from SipHash, a fast short-input Pseudo-Random-Function (PRF) designed and published in 2012 by Daniel Bernstein and Jean-Phillippe Aumasson. It uses an Even-Mansour construction. Shimon Even and Yishay Mansour published a paper in 1997 titled A Construction of a Cipher From a Single Pseudorandom Permutation that suggested an incredibly simple but provably secure design for a cryptographic algorithm. Because only add-rotate-xor (ARX) instructions are used, it’s very suitable for many architectures.

The key is mixed with plaintext before encryption and after the application of permutation function F.
F function
The permutation uses 16 rounds of ADD/ROL/XOR (ARX) instructions for encryption. Decryption of ciphertext is simply reversing the process with SUB/ROR/XOR.

Full function
This will perform encryption and decryption depending the enc parameter.
void chaskey(int enc, void *key, void *buf) { int i; uint32_t v=(uint32_t)buf; uint32_t k=(uint32_t)key;
// pre-whiten for (i=0; i<4; i++) { v[i] ^= k[i]; }
// apply permutation function
for (i=0; i<16; i++) {
if (enc==CHASKEY_ENCRYPT)
{
v[0] += v[1];
v[1]=ROTL32(v[1], 5);
v[1] ^= v[0];
v[0]=ROTL32(v[0],16);
v[2] += v[3];
v[3]=ROTL32(v[3], 8);
v[3] ^= v[2];
v[0] += v[3];
v[3]=ROTL32(v[3],13);
v[3] ^= v[0];
v[2] += v[1];
v[1]=ROTL32(v[1], 7);
v[1] ^= v[2];
v[2]=ROTL32(v[2],16);
} else {
v[2]=ROTR32(v[2],16);
v[1] ^= v[2];
v[1]=ROTR32(v[1], 7);
v[2] -= v[1];
v[3] ^= v[0];
v[3]=ROTR32(v[3],13);
v[0] -= v[3];
v[3] ^= v[2];
v[3]=ROTR32(v[3], 8);
v[2] -= v[3];
v[0]=ROTR32(v[0],16);
v[1] ^= v[0];
v[1]=ROTR32(v[1], 5);
v[0] -= v[1];
}
}
// post-whiten
for (i=0; i<4; i++) {
v[i] ^= k[i];
}
}
x86 assembly
The assembly is straight forward. We load buffer into ESI, key into EDI and enc into ECX. Load 4 32-bit registers with 128-bit data, apply pre-whitening with 128-bit key. Test ECX for zero, then save flag status with PUSHFD. This then frees ECX to use as a loop counter which is set to 16 (for LTS). After each round of permutation, restore the flag status with POPFD and keep looping until ECX is zero. Finally apply post-whitening using 128-bit key, save and return.
%define v0 eax %define v1 ebx %define v2 edx %define v3 ebp
chaskey: _chaskey: pushad lea esi, [esp+32+4] lodsd xchg ecx, eax ; ecx = enc lodsd xchg edi, eax ; edi = key lodsd xchg eax, esi ; esi = buf push esi ; load buf lodsd xchg eax, v3 lodsd xchg eax, v1 lodsd xchg eax, v2 lodsd xchg eax, v3 ; pre-whiten xor v0, [edi ] xor v1, [edi+ 4] xor v2, [edi+ 8] xor v3, [edi+12] test ecx, ecx mov cl, 16 ck_l0: pushfd jz ck_l1 ; encrypt add v0, v1 rol v1, 5 xor v1, v0 rol v0, 16 add v2, v3 rol v3, 8 xor v3, v2 add v0, v3 rol v3, 13 xor v3, v0 add v2, v1 rol v1, 7 xor v1, v2 rol v2, 16 jmp ck_l2 ck_l1: ; decrypt ror v2, 16 xor v1, v2 ror v1, 7 sub v2, v1 xor v3, v0 ror v3, 13 sub v0, v3 xor v3, v2 ror v3, 8 sub v2, v3 ror v0, 16 xor v1, v0 ror v1, 5 sub v0, v1 ck_l2: popfd loop ck_l0 ck_l3: ; post-whiten xor v0, [edi ] xor v1, [edi+ 4] xor v2, [edi+ 8] xor v3, [edi+12] pop edi ; save buf stosd xchg eax, v1 stosd xchg eax, v2 stosd xchg eax, v3 stosd popad ret
Compact code
#define R(v,n)(((v)>>(n))|((v)<<(32-(n)))) #define F(n)for(i=0;i<n;i++)
void chaskey(voidmk,voidp){ unsigned int i,*x=p,*k=mk;
F(4)x[i]^=k[i];
F(16)
*x+=x[1],
x[1]=R(x[1],27)^*x,
x[2]+=x[3],
x[3]=R(x[3],24)^x[2],
x[2]+=x[1],
*x=R(*x,16)+x[3],
x[3]=R(x[3],19)^*x,
x[1]=R(x[1],25)^x[2],
x[2]=R(x[2],16);
F(4)x[i]^=k[i];}
x86 assembly
; ----------------------------------------------- ; Chaskey-LTS block cipher in x86 assembly (encryption only) ; ; size: 89 bytes ; ; global calls use cdecl convention ; ; -----------------------------------------------
bits 32%ifndef BIN global chaskey global _chaskey %endif
%define v0 eax %define v1 ebx %define v2 edx %define v3 ebp
chaskey:
_chaskey:
pushad
mov edi, [esp+32+ 8]
mov esi, [esp+32+12]
push esi
; load buf
lodsd
xchg eax, v3
lodsd
xchg eax, v1
lodsd
xchg eax, v2
lodsd
xchg eax, v3
; pre-whiten
xor v0, [edi ]
xor v1, [edi+ 4]
xor v2, [edi+ 8]
xor v3, [edi+12]
; 16 rounds
push 16
pop ecx
ck_l0:
; apply permutation
add v0, v1
rol v1, 5
xor v1, v0
rol v0, 16
add v2, v3
rol v3, 8
xor v3, v2
add v0, v3
rol v3, 13
xor v3, v0
add v2, v1
rol v1, 7
xor v1, v2
rol v2, 16
loop ck_l0
; post-whiten
xor v0, [edi ]
xor v1, [edi+ 4]
xor v2, [edi+ 8]
xor v3, [edi+12]
pop edi
; save buf
stosd
xchg eax, v1
stosd
xchg eax, v2
stosd
xchg eax, v3
stosd
popad
ret
ARM32 / AArch32 assembly
k .req r0 x .req r1
k0 .req r2 k1 .req r3 k2 .req r4 k3 .req r5
x0 .req r6 x1 .req r7 x2 .req r8 x3 .req r9
i .req r10
// chaskey(void *key, void *data); chaskey:
// saxe registers push {r0-r12,lr}
// load 128-bit key ldm k, {k0, k1, k2, k3}
// load 128-bit plaintext ldm x, {x0, x1, x2, x3}
// xor plaintext with key eor x0, x0, k0 // x[0] ^= k[0]; eor x1, x1, k1 // x[1] ^= k[1]; eor x2, x2, k2 // x[2] ^= k[2]; eor x3, x3, k3 // x[3] ^= k[3]; mov i, #16 // i = 16 chaskey_loop: add x0, x0, x1 // x[0] += x[1]; eor x1, x0, x1, ror #27 // x[1]=ROTL32(x[1], 5) ^ x[0]; add x2, x2, x3 // x[2] += x[3]; eor x3, x2, x3, ror #24 // x[3]=ROTL32(x[3], 8) ^ x[2]; add x2, x2, x1 // x[2] += x[1]; add x0, x3, x0, ror #16 // x[0]=ROTL32(x[0], 16) + x[3]; eor x3, x0, x3, ror #19 // x[3]=ROTL32(x[3], 13) ^ x[0]; eor x1, x2, x1, ror #25 // x[1]=ROTL32(x[1], 7) ^ x[2]; mov x2, x2, ror #16 // x[2]=ROTL32(x[2], 16); subs i, i, #1 // i-- bne chaskey_loop // i>0
// xor ciphertext with key eor x0, x0, k0 // x[0] ^= k[0]; eor x1, x1, k1 // x[1] ^= k[1]; eor x2, x2, k2 // x[2] ^= k[2]; eor x3, x3, k3 // x[3] ^= k[3];
// save ciphertext stm x, {x0, x1, x2, x3}
// restore registers pop {r0-r12,pc}
ARM64 / AArch64 assembly
// CHASKEY in ARM64 assembly // 112 bytes
.arch armv8-a
.text
.global chaskey
// chaskey(voidmk, voiddata); chaskey: // load 128-bit key ldp w2, w3, [x0] ldp w4, w5, [x0, 8]
// load 128-bit plain text
ldp w6, w7, [x1]
ldp w8, w9, [x1, 8]
// xor plaintext with key
eor w6, w6, w2 // x[0] ^= k[0];
eor w7, w7, w3 // x[1] ^= k[1];
eor w8, w8, w4 // x[2] ^= k[2];
eor w9, w9, w5 // x[3] ^= k[3];
mov w10, 16 // i = 16L0: add w6, w6, w7 // x[0] += x[1]; eor w7, w6, w7, ror 27 // x[1]=R(x[1],27) ^ x[0]; add w8, w8, w9 // x[2] += x[3]; eor w9, w8, w9, ror 24 // x[3]=R(x[3],24) ^ x[2]; add w8, w8, w7 // x[2] += x[1]; ror w6, w6, 16 add w6, w9, w6 // x[0]=R(x[0],16) + x[3]; eor w9, w6, w9, ror 19 // x[3]=R(x[3],19) ^ x[0]; eor w7, w8, w7, ror 25 // x[1]=R(x[1],25) ^ x[2]; ror w8, w8, 16 // x[2]=R(x[2],16); subs w10, w10, 1 // i-- bne L0 // i > 0
// xor cipher text with key
eor w6, w6, w2 // x[0] ^= k[0];
eor w7, w7, w3 // x[1] ^= k[1];
eor w8, w8, w4 // x[2] ^= k[2];
eor w9, w9, w5 // x[3] ^= k[3];
// save 128-bit cipher text
stp w6, w7, [x1]
stp w8, w9, [x1, 8]
ret