Writeup by kemyo for AES Distrace

crypto reverse side channel attacks

April 29, 2025

This challenge asks you to find the plaintext of a ciphertext obtained through AES-128-ECB, starting from a trace generated by providing a relevant address/register pair to QEMU with libexeclog (provided).

For complete beginners:

  1. You must generate assembly code from the provided binary aes-distrace with
objdump -d -M intel aes-distrace > output.asm
  1. Don’t forget: you must redo this command if you modify the source code and rebuild the binary (for example, to practice on a fixed key).

  2. The output obtained by running the .py script gives you the content of the register AFTER executing the corresponding instruction. Also, the 15 possible registers you can provide have ‘sub-registers’, for example, if an instruction mentions dl and ecx, you should provide rdx or rcx to the script.

In the second loop of the SubBytes_ShiftRows function of the aes-distrace.c source code, there is this particularly interesting line:

s[i][0] = S[ t[i][(i + 0) % 4] ];

which corresponds in the objdump (of the original binary!) to

401b1d: 88 02 mov BYTE PTR [rdx], al

al corresponds to 1 byte, more precisely the least significant byte of rax. So input = (401b1d, rax).

There are 10 calls to this function, thus we recover 40 bytes corresponding to the first column output of SubBytes_ShiftRows:

0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x0000000000000092
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000f7
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000045
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000008d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000c7
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000a8
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x000000000000000a
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000bb
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000e9
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000b9
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000c2
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000f6
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000e0
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x000000000000002d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000027
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x0000000000000089
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x000000000000000a
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x0000000000000034
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000d3
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000a3
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000c4
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x0000000000000068
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000de
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000004c
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000d0
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000d1
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000c4
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000c8
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000a4
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000dd
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000b9
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x0000000000000018
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000ae
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000e3
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000069
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000005d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x0000000000000034
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x0000000000000089
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000f8
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x0000000000000065

The order of operations, retrievable in the AES-128 function in the C code, allows us to deduce:

Example with the first 4+1=5 lines of QEMU output:

0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x0000000000000092
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000f7
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000045
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000008d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000c7

Let a=0x92, b=0xf7, c=0x45, d=0x8d, e=0xc7 and x the first byte of roundkey 1. We have the relation:

x + MC.line(0) * (a,b,c,d) = Sbox_inv(e)

thus:

x = Sbox_inv(e) + MC.line(0) * (a,b,c,d) = Sbox_inv(e) + 2*a + 3*b + c + d = Sbox_inv(e) + Mul2(a) + Mul2(b) + b + c + d

In $GF(8)$, $3z = 2z ^ z$, and mul2 is provided, same for Sbox, which gives Sbox_inv below

Sbox_inv = [0]*256
for i, v in enumerate(S):
    Sbox_inv[v] = i

Same logic to get the first byte of rk2 from lines 5-9 etc.

Thus, we get the first byte of rk 1-10 and the first column of rk10. Then we work backwards using the key_expansion loop to recover the missing bytes.

Here is the key_expansion loop for reference and an example function:

for (int i = 1; i < 11; ++i) {
    /* First column */
    rk[i][0][0] = rk[i - 1][0][0] ^ S[rk[i - 1][1][3]] ^ RCON[i];
    rk[i][1][0] = rk[i - 1][1][0] ^ S[rk[i - 1][2][3]];
    rk[i][2][0] = rk[i - 1][2][0] ^ S[rk[i - 1][3][3]];
    rk[i][3][0] = rk[i - 1][3][0] ^ S[rk[i - 1][0][3]];

    /* Second column */
    rk[i][0][1] = rk[i - 1][0][1] ^ rk[i][0][0];
    rk[i][1][1] = rk[i - 1][1][1] ^ rk[i][1][0];
    rk[i][2][1] = rk[i - 1][2][1] ^ rk[i][2][0];
    rk[i][3][1] = rk[i - 1][3][1] ^ rk[i][3][0];

    /* Third column */
    rk[i][0][2] = rk[i - 1][0][2] ^ rk[i][0][1];
    rk[i][1][2] = rk[i - 1][1][2] ^ rk[i][1][1];
    rk[i][2][2] = rk[i - 1][2][2] ^ rk[i][2][1];
    rk[i][3][2] = rk[i - 1][3][2] ^ rk[i][3][1];

    /* Fourth column */
    rk[i][0][3] = rk[i - 1][0][3] ^ rk[i][0][2];
    rk[i][1][3] = rk[i - 1][1][3] ^ rk[i][1][2];
    rk[i][2][3] = rk[i - 1][2][3] ^ rk[i][2][2];
    rk[i][3][3] = rk[i - 1][3][3] ^ rk[i][3][2];
}
def guess_byte_index(rk, i, x, y):
    if y == 0:
        if x == 0:
            rk[i][1][3] = S_inv[rk[i+1][0][0] ^ rk[i][0][0] ^ RCON[i+1]]
        else:
            rk[i][(x+1)%4][3] = S_inv[rk[i+1][x][0] ^ rk[i][x][0]]
    else:
        rk[i][x][y-1] = rk[i][x][y]^rk[i-1][x][y]

Start of the byte_recovery algorithm:

for i in range(1,10):
    guess_byte_index(rk,i,0,0)
for i in range(2,10):
    guess_byte_index(rk,i,1,3)
for i in range(3,10):
    guess_byte_index(rk,i,1,2)
for i in range(4,10):
    guess_byte_index(rk,i,1,1)
# etc...

Since rk9 has the most discovered bytes, we operate on it:

We don’t know [0][1], [0][2], [0][3], and [3][0].

Fixing [3][0] lets you solve [0][3] by xor-ing rk9[3][0] and rk10[0][3].

Thus only 3 bytes to discover → 2^24 possibilities (about 5 minutes worst-case on my machine, without CUDA).

How to know if the guessed key is correct?

In the .py script:

def randomString(length = 16):
    return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
rnd = randomString(length = 16).encode()

Thus distinguish the correct plaintext:

if all((0x30 <= b <= 0x39) or (0x41 <= b <= 0x5A) or (0x61 <= b <= 0x7A) for b in plaintext):
    print(f"Plaintext:{plaintext.decode('ascii')}")

Statistically, among the 16 million possibilities, only 1 will only contain a-z, A-Z, 0-9.

If not, repeat.

Thanks to Cryptanalyse for this challenge and the FCSC team for this year’s edition!