This challenge asks you to find the plaintext of a ciphertext obtained through AES-128-ECB, starting from a trace generated by providing a relevant address/register pair to QEMU with libexeclog (provided).
For complete beginners:
- You must generate assembly code from the provided binary aes-distrace with
objdump -d -M intel aes-distrace > output.asm
Don’t forget: you must redo this command if you modify the source code and rebuild the binary (for example, to practice on a fixed key).
The output obtained by running the
.py
script gives you the content of the register AFTER executing the corresponding instruction. Also, the 15 possible registers you can provide have ‘sub-registers’, for example, if an instruction mentionsdl
andecx
, you should providerdx
orrcx
to the script.
In the second loop of the SubBytes_ShiftRows function of the aes-distrace.c source code, there is this particularly interesting line:
s[i][0] = S[ t[i][(i + 0) % 4] ];
which corresponds in the objdump (of the original binary!) to
401b1d: 88 02 mov BYTE PTR [rdx], al
al
corresponds to 1 byte, more precisely the least significant byte of rax
. So input = (401b1d, rax).
There are 10 calls to this function, thus we recover 40 bytes corresponding to the first column output of SubBytes_ShiftRows:
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x0000000000000092
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000f7
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000045
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000008d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000c7
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000a8
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x000000000000000a
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000bb
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000e9
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000b9
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000c2
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000f6
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000e0
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x000000000000002d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000027
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x0000000000000089
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x000000000000000a
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x0000000000000034
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000d3
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000a3
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000c4
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x0000000000000068
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000de
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000004c
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000d0
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000d1
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000c4
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x00000000000000c8
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000a4
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000dd
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000b9
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x0000000000000018
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000ae
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000e3
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000069
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000005d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x0000000000000034
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x0000000000000089
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x00000000000000f8
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x0000000000000065
The order of operations, retrievable in the AES-128 function in the C code, allows us to deduce:
- The column 0 of roundkey 10, since the last round has no MixColumns (SubBytes_ShiftRows -> AddRoundKey10 -> Ciphertext).
- The first byte (row 0, column 0) of roundkeys 1 to 9. Because for a given round k:
- The output of SubBytes_ShiftRows-k is the input to MixColumns.
- The input to SubBytes_ShiftRows-(k+1) is the roundkey-k XORed with the output of MixColumns. Since ShiftRows doesn’t move row 0, SubBytes_ShiftRows just applies the Sbox to row 0.
Example with the first 4+1=5 lines of QEMU output:
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x0000000000000092
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa4, rax -> 0x00000000000000f7
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa8, rax -> 0x0000000000000045
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aac, rax -> 0x000000000000008d
0, 0x401b1d, 0x288, "mov byte ptr [rdx], al", store, 0x69516c485aa0, rax -> 0x00000000000000c7
Let a=0x92
, b=0xf7
, c=0x45
, d=0x8d
, e=0xc7
and x
the first byte of roundkey 1.
We have the relation:
x + MC.line(0) * (a,b,c,d) = Sbox_inv(e)
thus:
x = Sbox_inv(e) + MC.line(0) * (a,b,c,d) = Sbox_inv(e) + 2*a + 3*b + c + d = Sbox_inv(e) + Mul2(a) + Mul2(b) + b + c + d
In $GF(8)$, $3z = 2z ^ z$, and
mul2
is provided, same forSbox
, which givesSbox_inv
below
Sbox_inv = [0]*256
for i, v in enumerate(S):
Sbox_inv[v] = i
Same logic to get the first byte of rk2 from lines 5-9 etc.
Thus, we get the first byte of rk 1-10 and the first column of rk10. Then we work backwards using the key_expansion loop to recover the missing bytes.
Here is the key_expansion loop for reference and an example function:
for (int i = 1; i < 11; ++i) {
/* First column */
rk[i][0][0] = rk[i - 1][0][0] ^ S[rk[i - 1][1][3]] ^ RCON[i];
rk[i][1][0] = rk[i - 1][1][0] ^ S[rk[i - 1][2][3]];
rk[i][2][0] = rk[i - 1][2][0] ^ S[rk[i - 1][3][3]];
rk[i][3][0] = rk[i - 1][3][0] ^ S[rk[i - 1][0][3]];
/* Second column */
rk[i][0][1] = rk[i - 1][0][1] ^ rk[i][0][0];
rk[i][1][1] = rk[i - 1][1][1] ^ rk[i][1][0];
rk[i][2][1] = rk[i - 1][2][1] ^ rk[i][2][0];
rk[i][3][1] = rk[i - 1][3][1] ^ rk[i][3][0];
/* Third column */
rk[i][0][2] = rk[i - 1][0][2] ^ rk[i][0][1];
rk[i][1][2] = rk[i - 1][1][2] ^ rk[i][1][1];
rk[i][2][2] = rk[i - 1][2][2] ^ rk[i][2][1];
rk[i][3][2] = rk[i - 1][3][2] ^ rk[i][3][1];
/* Fourth column */
rk[i][0][3] = rk[i - 1][0][3] ^ rk[i][0][2];
rk[i][1][3] = rk[i - 1][1][3] ^ rk[i][1][2];
rk[i][2][3] = rk[i - 1][2][3] ^ rk[i][2][2];
rk[i][3][3] = rk[i - 1][3][3] ^ rk[i][3][2];
}
def guess_byte_index(rk, i, x, y):
if y == 0:
if x == 0:
rk[i][1][3] = S_inv[rk[i+1][0][0] ^ rk[i][0][0] ^ RCON[i+1]]
else:
rk[i][(x+1)%4][3] = S_inv[rk[i+1][x][0] ^ rk[i][x][0]]
else:
rk[i][x][y-1] = rk[i][x][y]^rk[i-1][x][y]
Start of the byte_recovery algorithm:
for i in range(1,10):
guess_byte_index(rk,i,0,0)
for i in range(2,10):
guess_byte_index(rk,i,1,3)
for i in range(3,10):
guess_byte_index(rk,i,1,2)
for i in range(4,10):
guess_byte_index(rk,i,1,1)
# etc...
Since rk9 has the most discovered bytes, we operate on it:
We don’t know [0][1], [0][2], [0][3], and [3][0].
Fixing [3][0] lets you solve [0][3] by xor-ing rk9[3][0] and rk10[0][3].
Thus only 3 bytes to discover → 2^24 possibilities (about 5 minutes worst-case on my machine, without CUDA).
How to know if the guessed key is correct?
In the .py
script:
def randomString(length = 16):
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
rnd = randomString(length = 16).encode()
Thus distinguish the correct plaintext:
if all((0x30 <= b <= 0x39) or (0x41 <= b <= 0x5A) or (0x61 <= b <= 0x7A) for b in plaintext):
print(f"Plaintext:{plaintext.decode('ascii')}")
Statistically, among the 16 million possibilities, only 1 will only contain a-z, A-Z, 0-9.
If not, repeat.
Thanks to Cryptanalyse for this challenge and the FCSC team for this year’s edition!