Writeup by groumage for Le hash qui rit

crypto symmetric

May 3, 2025

We need to find $m_i$ such that $\texttt{SHA-256} (m_1) \oplus \dots \oplus \texttt{SHA-256} (m_n) = target$ where $target$ is a random 256-bit string. At most 256 $m_i$ can be used.

From the paper in reference [1], given a $z \in$ { $1, 0$ } $^k$ for some fix length $k$ such as $k = 128$, it is possible to find an $x$ such that $\text{XHASH}^h (x) = z$ where $\text{XHASH}^h (x_1 \dots x_n) = h(\langle 1 \rangle . x_1) \oplus \dots h(\langle n \rangle . x_n)$ and $h$ is a hash function. The papers also shows that $n = k + 1$ suffices. In the context of this challenge: $h = \texttt{SHA-256}$, $k = 256$, $m_i = \langle i \rangle . x_i$ and $z$ is the $target$.

The idea is to generate a vector basis which generates the set of all $target$ values. This vector basis is put into a $256 \times 256$ matrix where each row is the 256 bits of a vector belonging to the basis. The matrix is then transposed and a column vector representing the $target$ is appended to the right, yielding a $256 \times 257$ matrix. A Gaussian elimination over $GF(2)$ on this matrix is executed and the augmented column indicates which vector from the basis should be included, i.e., which $\texttt{SHA-256} (m_i = \langle i \rangle . x_i)$ xored together to yield the $target$.

Additional information can be found at this github repo, as well as a Perl implementation of the attack.

import hashlib
import numpy as np
from pwn import *
 
def split_bits(data_bytes: bytes) -> np.ndarray:
	"""
	Convert a byte array to a bit array (numpy array of bits). Each byte is converted to 8 bits.
	"""
	return np.unpackbits(np.frombuffer(data_bytes, dtype=np.uint8))

def solve_gf2(A: np.ndarray, b: np.ndarray) -> np.ndarray:
	"""
	Solve the system of linear equations Ax = b over GF(2).
	"""
	A = A.copy()
	b = b.copy()
	n = len(b)
	for i in range(n):
		if A[i, i] == 0:
			for j in range(i + 1, n):
				if A[j, i] == 1:
					A[[i, j]] = A[[j, i]]
					b[i], b[j] = b[j], b[i]
					break
				else:
					continue
		for j in range(i + 1, n):
			if A[j, i] == 1:
				A[j] ^= A[i]
				b[j] ^= b[i]
	x = np.zeros_like(b)
	for i in range(n - 1, -1, -1):
		x[i] = b[i] ^ np.dot(A[i, i + 1:], x[i + 1:]) % 2
	return x
  
def find_basis(num_bits: int) -> tuple:
	"""
	Find a basis for the vector space of bit vectors of length num_bits.
	"""
	vecs = [split_bits(hashlib.sha256(f"{i}".encode()).digest())[:num_bits] for i in range(2 * num_bits)]
	mat = np.array(vecs, dtype=np.uint8)
	basis = []
	for row in mat:
		if not any(np.array_equal(row, b) for b in basis):
			tmp = basis + [row]
			tmp_mat = np.vstack(tmp)
			try:
				np.linalg.matrix_rank(tmp_mat)
				basis.append(row)
			except:
				continue
		if len(basis) == num_bits:
			break
	return np.array(basis, dtype=np.uint8), list(range(len(basis)))
  
def build_collision(target_bits):
	"""
	Build a collision for the target bits using the basis found.
	"""
	num_bits = len(target_bits)
	basis, indices = find_basis(num_bits)
	A = basis.T
	b = target_bits
	x = solve_gf2(A, b)
	return [indices[i] for i, bit in enumerate(x) if bit]
  
conn = remote('localhost', 4000)
conn.recvuntil(b"Input at most 256 messages M_i in hex format (empty line to exit) such that \\bigoplus_i SHA256(M_i) = 0x")
challenge = conn.recvuntil(b"\n").strip()
target_bytes = bytes.fromhex(challenge.decode())
target_bits = split_bits(target_bytes)
indices = build_collision(target_bits)
for i in indices:
value = str(i).encode()
conn.sendline(f"{value.hex()}".encode())
conn.recvuntil(b">>>")
conn.sendline(b"")
rsp = conn.recvall()
print(rsp.decode())
conn.close()

[1] A New Paradigm for Collision-free Hashing: Incrementality at Reduced Cost. Mihir Bellare and Daniele Micciancio, November 1996, https://cseweb.ucsd.edu/%7Emihir/papers/inchash.pdf.