Seregon/zftpd

Zero-copy FTP/HTTP Daemon compatible with all POSIX systems

C/11.0 KB/No license
src/ftp_crypto.c
zftpd / src / ftp_crypto.c
1/*
2MIT License
3 
4Copyright (c) 2026 Seregon
5 
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12 
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15 
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24 
25/**
26 * @file ftp_crypto.c
27 * @brief ChaCha20 stream cipher — pure C, no external dependencies
28 *
29 * @author SeregonWar
30 * @version 1.0.0
31 * @date 2026-02-19
32 *
33 * REFERENCE: RFC 7539 — ChaCha20 and Poly1305 for IETF Protocols
34 *
35 * PERFORMANCE NOTES:
36 * The inner loop is 20 rounds of ARX (Add-Rotate-XOR) on 32-bit words.
37 * Modern compilers vectorize this well. Typical throughput:
38 * x86-64 (Zen2/Intel): ~3 GB/s
39 * ARM (Cortex-A76): ~1 GB/s
40 * This far exceeds gigabit Ethernet (~125 MB/s), so encryption
41 * adds effectively zero overhead to FTP transfers.
42 */
43 
44#include "ftp_crypto.h"
45 
46#if FTP_ENABLE_CRYPTO
47 
48#include <string.h>
49 
50/*===========================================================================*
51 * ChaCha20 CORE
52 *
53 * The ChaCha20 state is a 4x4 matrix of 32-bit words:
54 *
55 * ┌──────────┬──────────┬──────────┬──────────┐
56 * │ "expa" │ "nd 3" │ "2-by" │ "te k" │ Constants
57 * ├──────────┼──────────┼──────────┼──────────┤
58 * │ key[0] │ key[1] │ key[2] │ key[3] │ Key (256-bit)
59 * ├──────────┼──────────┼──────────┼──────────┤
60 * │ key[4] │ key[5] │ key[6] │ key[7] │
61 * ├──────────┼──────────┼──────────┼──────────┤
62 * │ counter │ nonce[0] │ nonce[1] │ nonce[2] │ Counter + Nonce
63 * └──────────┴──────────┴──────────┴──────────┘
64 *
65 *===========================================================================*/
66 
67/**
68 * Rotate left 32-bit (compiler intrinsic on most platforms)
69 */
70static inline uint32_t rotl32(uint32_t v, unsigned int n) {
71 return (v << n) | (v >> (32U - n));
72}
73 
74/**
75 * ChaCha20 quarter round — the fundamental ARX operation
76 *
77 * a += b; d ^= a; d <<<= 16;
78 * c += d; b ^= c; b <<<= 12;
79 * a += b; d ^= a; d <<<= 8;
80 * c += d; b ^= c; b <<<= 7;
81 */
82static inline void quarter_round(uint32_t *a, uint32_t *b, uint32_t *c,
83 uint32_t *d) {
84 *a += *b;
85 *d ^= *a;
86 *d = rotl32(*d, 16U);
87 *c += *d;
88 *b ^= *c;
89 *b = rotl32(*b, 12U);
90 *a += *b;
91 *d ^= *a;
92 *d = rotl32(*d, 8U);
93 *c += *d;
94 *b ^= *c;
95 *b = rotl32(*b, 7U);
96}
97 
98/**
99 * Load 32-bit little-endian word from byte array
100 */
101static inline uint32_t load32_le(const uint8_t *p) {
102 return (uint32_t)p[0] | ((uint32_t)p[1] << 8U) | ((uint32_t)p[2] << 16U) |
103 ((uint32_t)p[3] << 24U);
104}
105 
106/**
107 * Store 32-bit little-endian word to byte array
108 */
109static inline void store32_le(uint8_t *p, uint32_t v) {
110 p[0] = (uint8_t)(v);
111 p[1] = (uint8_t)(v >> 8U);
112 p[2] = (uint8_t)(v >> 16U);
113 p[3] = (uint8_t)(v >> 24U);
114}
115 
116/**
117 * Generate one 64-byte ChaCha20 keystream block
118 *
119 * Performs 20 rounds (10 column rounds + 10 diagonal rounds),
120 * then adds the original state back (prevents state recovery).
121 */
122static void chacha20_block(const uint32_t state[16], uint8_t out[64]) {
123 uint32_t x[16];
124 memcpy(x, state, sizeof(x));
125 
126 /* 20 rounds = 10 double-rounds */
127 for (unsigned int i = 0U; i < 10U; i++) {
128 /* Column rounds */
129 quarter_round(&x[0], &x[4], &x[8], &x[12]);
130 quarter_round(&x[1], &x[5], &x[9], &x[13]);
131 quarter_round(&x[2], &x[6], &x[10], &x[14]);
132 quarter_round(&x[3], &x[7], &x[11], &x[15]);
133 /* Diagonal rounds */
134 quarter_round(&x[0], &x[5], &x[10], &x[15]);
135 quarter_round(&x[1], &x[6], &x[11], &x[12]);
136 quarter_round(&x[2], &x[7], &x[8], &x[13]);
137 quarter_round(&x[3], &x[4], &x[9], &x[14]);
138 }
139 
140 /* Add original state (prevents inverting the permutation) */
141 for (unsigned int i = 0U; i < 16U; i++) {
142 x[i] += state[i];
143 }
144 
145 /* Serialize to little-endian bytes */
146 for (unsigned int i = 0U; i < 16U; i++) {
147 store32_le(&out[i * 4U], x[i]);
148 }
149}
150 
151/*===========================================================================*
152 * PUBLIC API
153 *===========================================================================*/
154 
155/* ChaCha20 magic constant: "expand 32-byte k" in little-endian */
156static const uint32_t SIGMA[4] = {
157 0x61707865U, /* "expa" */
158 0x3320646EU, /* "nd 3" */
159 0x79622D32U, /* "2-by" */
160 0x6B206574U /* "te k" */
161};
162 
163void ftp_crypto_init(ftp_crypto_ctx_t *ctx, const uint8_t key[32],
164 const uint8_t nonce[12]) {
165 if ((ctx == NULL) || (key == NULL) || (nonce == NULL)) {
166 return;
167 }
168 
169 memset(ctx, 0, sizeof(*ctx));
170 
171 /* Row 0: constants */
172 ctx->state[0] = SIGMA[0];
173 ctx->state[1] = SIGMA[1];
174 ctx->state[2] = SIGMA[2];
175 ctx->state[3] = SIGMA[3];
176 
177 /* Row 1-2: 256-bit key (8 x 32-bit words) */
178 for (unsigned int i = 0U; i < 8U; i++) {
179 ctx->state[4U + i] = load32_le(&key[i * 4U]);
180 }
181 
182 /* Row 3: counter(0) + 96-bit nonce */
183 ctx->state[12] = 0U; /* block counter starts at 0 */
184 ctx->state[13] = load32_le(&nonce[0]);
185 ctx->state[14] = load32_le(&nonce[4]);
186 ctx->state[15] = load32_le(&nonce[8]);
187 
188 ctx->counter = 0U;
189 ctx->ks_offset = 64U; /* Force first block generation on next xor */
190 ctx->active = 1U;
191}
192 
193void ftp_crypto_xor(ftp_crypto_ctx_t *ctx, void *data, size_t len) {
194 if ((ctx == NULL) || (data == NULL) || (len == 0U)) {
195 return;
196 }
197 
198 uint8_t *p = (uint8_t *)data;
199 size_t remaining = len;
200 
201 while (remaining > 0U) {
202 /* Generate new keystream block if current one is exhausted */
203 if (ctx->ks_offset >= 64U) {
204 ctx->state[12] = ctx->counter;
205 chacha20_block(ctx->state, ctx->keystream);
206 ctx->counter++;
207 ctx->ks_offset = 0U;
208 }
209 
210 /* XOR available keystream bytes with data */
211 size_t avail = 64U - (size_t)ctx->ks_offset;
212 size_t chunk = (remaining < avail) ? remaining : avail;
213 
214 for (size_t i = 0U; i < chunk; i++) {
215 p[i] ^= ctx->keystream[ctx->ks_offset + (uint8_t)i];
216 }
217 
218 p += chunk;
219 remaining -= chunk;
220 ctx->ks_offset += (uint8_t)chunk;
221 }
222}
223 
224void ftp_crypto_reset(ftp_crypto_ctx_t *ctx) {
225 if (ctx == NULL) {
226 return;
227 }
228 
229 /* Secure erase: volatile prevents compiler from optimizing away */
230 volatile uint8_t *p = (volatile uint8_t *)ctx;
231 for (size_t i = 0U; i < sizeof(*ctx); i++) {
232 p[i] = 0U;
233 }
234}
235 
236void ftp_crypto_derive_key(const uint8_t psk[32], const uint8_t nonce[12],
237 uint8_t out_key[32]) {
238 if ((psk == NULL) || (nonce == NULL) || (out_key == NULL)) {
239 return;
240 }
241 
242 /*
243 * Key derivation: ChaCha20-based KDF
244 *
245 * Use the PSK as a ChaCha20 key with nonce to generate
246 * 64 bytes of keystream, then take the first 32 bytes
247 * as the derived session key.
248 *
249 * This ensures each session gets a unique key even with
250 * the same PSK, because the nonce is random per session.
251 *
252 * PSK ──┐
253 * ├──► ChaCha20(counter=0) ──► 64B keystream
254 * nonce ─┘ │
255 * first 32B = session key
256 */
257 uint32_t kdf_state[16];
258 
259 kdf_state[0] = SIGMA[0];
260 kdf_state[1] = SIGMA[1];
261 kdf_state[2] = SIGMA[2];
262 kdf_state[3] = SIGMA[3];
263 
264 for (unsigned int i = 0U; i < 8U; i++) {
265 kdf_state[4U + i] = load32_le(&psk[i * 4U]);
266 }
267 
268 kdf_state[12] = 0U;
269 kdf_state[13] = load32_le(&nonce[0]);
270 kdf_state[14] = load32_le(&nonce[4]);
271 kdf_state[15] = load32_le(&nonce[8]);
272 
273 uint8_t block[64];
274 chacha20_block(kdf_state, block);
275 
276 memcpy(out_key, block, 32U);
277 
278 /* Scrub temporary key material from stack */
279 volatile uint8_t *vb = (volatile uint8_t *)block;
280 for (size_t i = 0U; i < sizeof(block); i++) {
281 vb[i] = 0U;
282 }
283 volatile uint32_t *vs = (volatile uint32_t *)kdf_state;
284 for (size_t i = 0U; i < 16U; i++) {
285 vs[i] = 0U;
286 }
287}
288 
289#endif /* FTP_ENABLE_CRYPTO */
290