Seregon/zftpd

Zero-copy FTP/HTTP Daemon compatible with all POSIX systems

C/11.0 KB/No license
src/pal_alloc.c
zftpd / src / pal_alloc.c
1/*
2MIT License
3 
4Copyright (c) 2026 Seregon
5 
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12 
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15 
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24/**
25 * @file pal_alloc.h
26 * @brief Unified memory allocation abstraction (malloc/free)
27 *
28 * @author Seregon
29 * @version 1.0.0
30 *
31 * PLATFORMS: FreeBSD (PS4/PS5 kqueue), Linux (epoll)
32 * DESIGN: Single-threaded, non-blocking I/O
33 *
34 */
35#include "pal_alloc.h"
36 
37#include <stdatomic.h>
38#include <stdalign.h>
39#include <string.h>
40#include <stdlib.h>
41 
42#ifndef PAL_ALLOC_DEFAULT_SIZE
43#define PAL_ALLOC_DEFAULT_SIZE (8U * 1024U * 1024U)
44#endif
45 
46#ifndef PAL_ALLOC_MIN_ORDER
47#define PAL_ALLOC_MIN_ORDER 5U
48#endif
49 
50#ifndef PAL_ALLOC_MAX_ORDER
51#define PAL_ALLOC_MAX_ORDER 26U
52#endif
53 
54#define PAL_ALLOC_MAGIC 0xA11C0A7U
55 
56#ifndef PAL_ALLOC_HARD_FAIL
57#if defined(FTP_DEBUG) && (FTP_DEBUG != 0)
58#define PAL_ALLOC_HARD_FAIL 1
59#else
60#define PAL_ALLOC_HARD_FAIL 0
61#endif
62#endif
63 
64typedef struct {
65 uint32_t magic;
66 uint16_t order;
67 uint16_t used;
68 uint64_t pad;
69} pal_alloc_hdr_t;
70 
71typedef struct pal_alloc_free_node {
72 pal_alloc_hdr_t hdr;
73 struct pal_alloc_free_node *next;
74} pal_alloc_free_node_t;
75 
76static pal_allocator_t g_alloc = {
77 .base = NULL,
78 .size = 0U,
79 .max_order = 0U,
80 .lock = ATOMIC_FLAG_INIT,
81 .initialized = ATOMIC_VAR_INIT(0),
82 .alloc_calls = ATOMIC_VAR_INIT(0),
83 .free_calls = ATOMIC_VAR_INIT(0),
84 .calloc_calls = ATOMIC_VAR_INIT(0),
85 .realloc_calls = ATOMIC_VAR_INIT(0),
86 .aligned_calls = ATOMIC_VAR_INIT(0),
87 .failures = ATOMIC_VAR_INIT(0),
88 .bytes_in_use = ATOMIC_VAR_INIT(0),
89 .bytes_peak = ATOMIC_VAR_INIT(0),
90 .free_lists = {0},
91};
92 
93static _Alignas(4096) uint8_t g_alloc_default_buf[PAL_ALLOC_DEFAULT_SIZE];
94 
95#if defined(__clang__) || defined(__GNUC__)
96#define PAL_ASSUME_ALIGNED(p, a) __builtin_assume_aligned((p), (a))
97#else
98#define PAL_ASSUME_ALIGNED(p, a) (p)
99#endif
100 
101static void pal_alloc_lock(pal_allocator_t *a)
102{
103 while (atomic_flag_test_and_set_explicit(&a->lock, memory_order_acquire)) {
104 }
105}
106 
107static void pal_alloc_unlock(pal_allocator_t *a)
108{
109 atomic_flag_clear_explicit(&a->lock, memory_order_release);
110}
111 
112#if PAL_ALLOC_HARD_FAIL
113static void pal_alloc_fail_fast(void)
114{
115#if defined(__clang__) || defined(__GNUC__)
116 __builtin_trap();
117#else
118 abort();
119#endif
120}
121#endif
122 
123static uint32_t floor_log2_u64(uint64_t v)
124{
125 uint32_t r = 0U;
126 while (v >>= 1U) {
127 r++;
128 }
129 return r;
130}
131 
132static uint32_t ceil_log2_u64(uint64_t v)
133{
134 if (v <= 1U) {
135 return 0U;
136 }
137 uint32_t f = floor_log2_u64(v - 1U);
138 return f + 1U;
139}
140 
141static pal_alloc_free_node_t *list_pop(pal_allocator_t *a, uint32_t order)
142{
143 uint32_t idx = order - PAL_ALLOC_MIN_ORDER;
144 pal_alloc_free_node_t *n = (pal_alloc_free_node_t *)a->free_lists[idx];
145 if (n != NULL) {
146 a->free_lists[idx] = (void *)n->next;
147 n->next = NULL;
148 }
149 return n;
150}
151 
152static void list_push(pal_allocator_t *a, uint32_t order, pal_alloc_free_node_t *n)
153{
154 uint32_t idx = order - PAL_ALLOC_MIN_ORDER;
155 n->next = (pal_alloc_free_node_t *)a->free_lists[idx];
156 a->free_lists[idx] = (void *)n;
157}
158 
159static int list_remove(pal_allocator_t *a, uint32_t order, pal_alloc_free_node_t *target)
160{
161 uint32_t idx = order - PAL_ALLOC_MIN_ORDER;
162 pal_alloc_free_node_t *prev = NULL;
163 pal_alloc_free_node_t *cur = (pal_alloc_free_node_t *)a->free_lists[idx];
164 while (cur != NULL) {
165 if (cur == target) {
166 if (prev != NULL) {
167 prev->next = cur->next;
168 } else {
169 a->free_lists[idx] = (void *)cur->next;
170 }
171 cur->next = NULL;
172 return 1;
173 }
174 prev = cur;
175 cur = cur->next;
176 }
177 return 0;
178}
179 
180static void prefault_pages(uint8_t *base, size_t size)
181{
182 if (base == NULL || size == 0U) {
183 return;
184 }
185 for (size_t i = 0U; i < size; i += 4096U) {
186 base[i] = 0U;
187 }
188 base[size - 1U] = 0U;
189}
190 
191static int ptr_in_arena(pal_allocator_t *a, const void *p)
192{
193 if (p == NULL || a == NULL || a->base == NULL || a->size == 0U) {
194 return 0;
195 }
196 const uint8_t *bp = (const uint8_t *)p;
197 return (bp >= a->base) && (bp < (a->base + a->size));
198}
199 
200void pal_allocator_get_stats(pal_allocator_t *a, pal_alloc_stats_t *out)
201{
202 if (out == NULL) {
203 return;
204 }
205 memset(out, 0, sizeof(*out));
206 if (a == NULL || atomic_load(&a->initialized) == 0) {
207 return;
208 }
209 out->alloc_calls = atomic_load(&a->alloc_calls);
210 out->free_calls = atomic_load(&a->free_calls);
211 out->calloc_calls = atomic_load(&a->calloc_calls);
212 out->realloc_calls = atomic_load(&a->realloc_calls);
213 out->aligned_calls = atomic_load(&a->aligned_calls);
214 out->failures = atomic_load(&a->failures);
215 out->bytes_in_use = atomic_load(&a->bytes_in_use);
216 out->bytes_peak = atomic_load(&a->bytes_peak);
217}
218 
219void pal_allocator_reset_stats(pal_allocator_t *a)
220{
221 if (a == NULL) {
222 return;
223 }
224 atomic_store(&a->alloc_calls, 0U);
225 atomic_store(&a->free_calls, 0U);
226 atomic_store(&a->calloc_calls, 0U);
227 atomic_store(&a->realloc_calls, 0U);
228 atomic_store(&a->aligned_calls, 0U);
229 atomic_store(&a->failures, 0U);
230 atomic_store(&a->bytes_in_use, 0U);
231 atomic_store(&a->bytes_peak, 0U);
232}
233 
234int pal_allocator_init(pal_allocator_t *a, void *buffer, size_t size)
235{
236 if (a == NULL || buffer == NULL) {
237 return -1;
238 }
239 if (size < (size_t)(1U << PAL_ALLOC_MIN_ORDER)) {
240 return -1;
241 }
242 
243 uintptr_t p = (uintptr_t)buffer;
244 uintptr_t aligned = (p + 15U) & ~(uintptr_t)15U;
245 size_t adj = (size_t)(aligned - p);
246 if (adj >= size) {
247 return -1;
248 }
249 uint8_t *base = (uint8_t *)aligned;
250 size_t usable = size - adj;
251 
252 uint32_t max_order = floor_log2_u64((uint64_t)usable);
253 if (max_order > PAL_ALLOC_MAX_ORDER) {
254 max_order = PAL_ALLOC_MAX_ORDER;
255 }
256 if (max_order < PAL_ALLOC_MIN_ORDER) {
257 return -1;
258 }
259 
260 size_t arena_size = (size_t)1U << max_order;
261 
262 pal_alloc_lock(a);
263 a->base = base;
264 a->size = arena_size;
265 a->max_order = max_order;
266 for (size_t i = 0U; i < (sizeof(a->free_lists) / sizeof(a->free_lists[0])); i++) {
267 a->free_lists[i] = NULL;
268 }
269 pal_allocator_reset_stats(a);
270 
271 prefault_pages(base, arena_size);
272 
273 pal_alloc_free_node_t *root =
274 (pal_alloc_free_node_t *)PAL_ASSUME_ALIGNED(base, alignof(pal_alloc_free_node_t));
275 root->hdr.magic = PAL_ALLOC_MAGIC;
276 root->hdr.order = (uint16_t)max_order;
277 root->hdr.used = 0U;
278 root->hdr.pad = 0U;
279 root->next = NULL;
280 list_push(a, max_order, root);
281 
282 atomic_store(&a->initialized, 1);
283 pal_alloc_unlock(a);
284 
285 return 0;
286}
287 
288int pal_alloc_init(void *buffer, size_t size)
289{
290 return pal_allocator_init(&g_alloc, buffer, size);
291}
292 
293int pal_alloc_init_default(void)
294{
295 if (atomic_load(&g_alloc.initialized) != 0) {
296 return 0;
297 }
298 return pal_alloc_init(g_alloc_default_buf, sizeof(g_alloc_default_buf));
299}
300 
301size_t pal_alloc_arena_size(void)
302{
303 if (atomic_load(&g_alloc.initialized) == 0) {
304 return 0U;
305 }
306 return g_alloc.size;
307}
308 
309size_t pal_alloc_arena_free_approx(void)
310{
311 if (atomic_load(&g_alloc.initialized) == 0) {
312 return 0U;
313 }
314 
315 pal_alloc_lock(&g_alloc);
316 size_t free_total = 0U;
317 for (uint32_t order = PAL_ALLOC_MIN_ORDER; order <= g_alloc.max_order; order++) {
318 uint32_t idx = order - PAL_ALLOC_MIN_ORDER;
319 pal_alloc_free_node_t *n = (pal_alloc_free_node_t *)g_alloc.free_lists[idx];
320 while (n != NULL) {
321 free_total += (size_t)1U << order;
322 n = n->next;
323 }
324 }
325 pal_alloc_unlock(&g_alloc);
326 return free_total;
327}
328 
329void pal_alloc_get_stats(pal_alloc_stats_t *out)
330{
331 pal_allocator_get_stats(&g_alloc, out);
332}
333 
334void pal_alloc_reset_stats(void)
335{
336 pal_allocator_reset_stats(&g_alloc);
337}
338 
339static void bump_peak(pal_allocator_t *a, uint64_t in_use)
340{
341 uint64_t peak = atomic_load(&a->bytes_peak);
342 while (in_use > peak) {
343 if (atomic_compare_exchange_weak(&a->bytes_peak, &peak, in_use)) {
344 break;
345 }
346 }
347}
348 
349static void *alloc_locked(pal_allocator_t *a, size_t size)
350{
351 if (size == 0U) {
352 size = 1U;
353 }
354 
355 size_t need = size + sizeof(pal_alloc_hdr_t);
356 if (need > a->size) {
357 return NULL;
358 }
359 
360 uint32_t order = ceil_log2_u64((uint64_t)need);
361 if (order < PAL_ALLOC_MIN_ORDER) {
362 order = PAL_ALLOC_MIN_ORDER;
363 }
364 if (order > a->max_order) {
365 return NULL;
366 }
367 
368 uint32_t cur = order;
369 pal_alloc_free_node_t *node = NULL;
370 
371 while (cur <= a->max_order) {
372 node = list_pop(a, cur);
373 if (node != NULL) {
374 break;
375 }
376 cur++;
377 }
378 
379 if (node == NULL) {
380 return NULL;
381 }
382 
383 while (cur > order) {
384 cur--;
385 size_t half = (size_t)1U << cur;
386 uint8_t *b = (uint8_t *)node;
387 pal_alloc_free_node_t *buddy =
388 (pal_alloc_free_node_t *)PAL_ASSUME_ALIGNED((b + half), alignof(pal_alloc_free_node_t));
389 
390 buddy->hdr.magic = PAL_ALLOC_MAGIC;
391 buddy->hdr.order = (uint16_t)cur;
392 buddy->hdr.used = 0U;
393 buddy->hdr.pad = 0U;
394 buddy->next = NULL;
395 list_push(a, cur, buddy);
396 
397 node->hdr.order = (uint16_t)cur;
398 }
399 
400 node->hdr.magic = PAL_ALLOC_MAGIC;
401 node->hdr.order = (uint16_t)order;
402 node->hdr.used = 1U;
403 node->hdr.pad = 0U;
404 
405 return (void *)((uint8_t *)node + sizeof(pal_alloc_hdr_t));
406}
407 
408void *pal_allocator_malloc(pal_allocator_t *a, size_t size)
409{
410 if (a == NULL) {
411 return NULL;
412 }
413 if (atomic_load(&a->initialized) == 0) {
414 return NULL;
415 }
416 
417 pal_alloc_lock(a);
418 void *p = alloc_locked(a, size);
419 if (p != NULL) {
420 atomic_fetch_add(&a->alloc_calls, 1U);
421 pal_alloc_hdr_t *hdr =
422 (pal_alloc_hdr_t *)PAL_ASSUME_ALIGNED(((uint8_t *)p - sizeof(pal_alloc_hdr_t)),
423 alignof(pal_alloc_hdr_t));
424 uint64_t bytes = (uint64_t)((size_t)1U << (uint32_t)hdr->order);
425 uint64_t in_use = atomic_fetch_add(&a->bytes_in_use, bytes) + bytes;
426 bump_peak(a, in_use);
427 } else {
428 atomic_fetch_add(&a->failures, 1U);
429 }
430 pal_alloc_unlock(a);
431 return p;
432}
433 
434static void free_locked(pal_allocator_t *a, void *ptr)
435{
436 if (ptr == NULL) {
437 return;
438 }
439 
440 uint8_t *p = (uint8_t *)ptr;
441 pal_alloc_hdr_t *hdr =
442 (pal_alloc_hdr_t *)PAL_ASSUME_ALIGNED((p - sizeof(pal_alloc_hdr_t)), alignof(pal_alloc_hdr_t));
443 if (hdr->magic != PAL_ALLOC_MAGIC || hdr->used == 0U) {
444 if (((uintptr_t)ptr % alignof(void *)) == 0U) {
445 void *raw = *((void **)ptr - 1);
446 if (ptr_in_arena(a, raw)) {
447 uint8_t *rp = (uint8_t *)raw;
448 pal_alloc_hdr_t *rh =
449 (pal_alloc_hdr_t *)PAL_ASSUME_ALIGNED((rp - sizeof(pal_alloc_hdr_t)),
450 alignof(pal_alloc_hdr_t));
451 if ((rh->magic == PAL_ALLOC_MAGIC) && (rh->used != 0U)) {
452 hdr = rh;
453 } else {
454#if PAL_ALLOC_HARD_FAIL
455 pal_alloc_fail_fast();
456#endif
457 return;
458 }
459 } else {
460#if PAL_ALLOC_HARD_FAIL
461 pal_alloc_fail_fast();
462#endif
463 return;
464 }
465 } else {
466#if PAL_ALLOC_HARD_FAIL
467 pal_alloc_fail_fast();
468#endif
469 return;
470 }
471 }
472 
473 uint32_t order = (uint32_t)hdr->order;
474 if (order < PAL_ALLOC_MIN_ORDER || order > a->max_order) {
475#if PAL_ALLOC_HARD_FAIL
476 pal_alloc_fail_fast();
477#endif
478 return;
479 }
480 
481 uint64_t bytes = (uint64_t)((size_t)1U << order);
482 atomic_fetch_add(&a->free_calls, 1U);
483 atomic_fetch_sub(&a->bytes_in_use, bytes);
484 
485 hdr->used = 0U;
486 pal_alloc_free_node_t *node = (pal_alloc_free_node_t *)hdr;
487 node->next = NULL;
488 
489 size_t block_size = (size_t)1U << order;
490 size_t offset = (size_t)((uint8_t *)node - a->base);
491 
492 while (order < a->max_order) {
493 size_t buddy_offset = offset ^ block_size;
494 if (buddy_offset >= a->size) {
495 break;
496 }
497 
498 pal_alloc_free_node_t *buddy =
499 (pal_alloc_free_node_t *)PAL_ASSUME_ALIGNED((a->base + buddy_offset),
500 alignof(pal_alloc_free_node_t));
501 if (buddy->hdr.magic != PAL_ALLOC_MAGIC || buddy->hdr.used != 0U) {
502 break;
503 }
504 if ((uint32_t)buddy->hdr.order != order) {
505 break;
506 }
507 
508 if (list_remove(a, order, buddy) == 0) {
509 break;
510 }
511 
512 if (buddy_offset < offset) {
513 node = buddy;
514 offset = buddy_offset;
515 }
516 
517 order++;
518 block_size <<= 1U;
519 node->hdr.order = (uint16_t)order;
520 node->hdr.used = 0U;
521 node->hdr.pad = 0U;
522 node->next = NULL;
523 }
524 
525 list_push(a, order, node);
526}
527 
528void pal_allocator_free(pal_allocator_t *a, void *ptr)
529{
530 if (ptr == NULL) {
531 return;
532 }
533 if (a == NULL || atomic_load(&a->initialized) == 0) {
534 return;
535 }
536 pal_alloc_lock(a);
537 free_locked(a, ptr);
538 pal_alloc_unlock(a);
539}
540 
541void *pal_malloc(size_t size)
542{
543 if (atomic_load(&g_alloc.initialized) == 0) {
544 if (pal_alloc_init_default() != 0) {
545 return NULL;
546 }
547 }
548 return pal_allocator_malloc(&g_alloc, size);
549}
550 
551void pal_free(void *ptr)
552{
553 if (atomic_load(&g_alloc.initialized) == 0) {
554 return;
555 }
556 pal_allocator_free(&g_alloc, ptr);
557}
558 
559void *pal_allocator_calloc(pal_allocator_t *a, size_t nmemb, size_t size)
560{
561 if (a == NULL) {
562 return NULL;
563 }
564 atomic_fetch_add(&a->calloc_calls, 1U);
565 
566 if (nmemb == 0U || size == 0U) {
567 return pal_allocator_malloc(a, 0U);
568 }
569 if (nmemb > (SIZE_MAX / size)) {
570 return NULL;
571 }
572 size_t total = nmemb * size;
573 void *p = pal_allocator_malloc(a, total);
574 if (p != NULL) {
575 memset(p, 0, total);
576 }
577 return p;
578}
579 
580void *pal_calloc(size_t nmemb, size_t size)
581{
582 if (atomic_load(&g_alloc.initialized) == 0) {
583 if (pal_alloc_init_default() != 0) {
584 return NULL;
585 }
586 }
587 return pal_allocator_calloc(&g_alloc, nmemb, size);
588}
589 
590void *pal_allocator_realloc(pal_allocator_t *a, void *ptr, size_t size)
591{
592 if (a == NULL) {
593 return NULL;
594 }
595 atomic_fetch_add(&a->realloc_calls, 1U);
596 
597 if (ptr == NULL) {
598 return pal_allocator_malloc(a, size);
599 }
600 if (size == 0U) {
601 pal_allocator_free(a, ptr);
602 return NULL;
603 }
604 if (atomic_load(&a->initialized) == 0) {
605 return NULL;
606 }
607 
608 uint8_t *p = (uint8_t *)ptr;
609 pal_alloc_hdr_t *hdr =
610 (pal_alloc_hdr_t *)PAL_ASSUME_ALIGNED((p - sizeof(pal_alloc_hdr_t)), alignof(pal_alloc_hdr_t));
611 if (hdr->magic != PAL_ALLOC_MAGIC || hdr->used == 0U) {
612#if PAL_ALLOC_HARD_FAIL
613 pal_alloc_fail_fast();
614#endif
615 return NULL;
616 }
617 
618 uint32_t order = (uint32_t)hdr->order;
619 size_t cap = ((size_t)1U << order) - sizeof(pal_alloc_hdr_t);
620 if (size <= cap) {
621 return ptr;
622 }
623 
624 void *n = pal_allocator_malloc(a, size);
625 if (n == NULL) {
626 return NULL;
627 }
628 memcpy(n, ptr, cap);
629 pal_allocator_free(a, ptr);
630 return n;
631}
632 
633void *pal_realloc(void *ptr, size_t size)
634{
635 if (atomic_load(&g_alloc.initialized) == 0) {
636 if (pal_alloc_init_default() != 0) {
637 return NULL;
638 }
639 }
640 return pal_allocator_realloc(&g_alloc, ptr, size);
641}
642 
643void *pal_allocator_aligned_alloc(pal_allocator_t *a, size_t alignment, size_t size)
644{
645 if (a == NULL) {
646 return NULL;
647 }
648 atomic_fetch_add(&a->aligned_calls, 1U);
649 
650 if (alignment < sizeof(void *)) {
651 alignment = sizeof(void *);
652 }
653 if ((alignment & (alignment - 1U)) != 0U) {
654 return NULL;
655 }
656 
657 size_t need = size + alignment + sizeof(void *);
658 void *raw = pal_allocator_malloc(a, need);
659 if (raw == NULL) {
660 return NULL;
661 }
662 
663 uintptr_t start = (uintptr_t)raw + sizeof(void *);
664 uintptr_t aligned = (start + (alignment - 1U)) & ~(uintptr_t)(alignment - 1U);
665 void **slot = (void **)(aligned - sizeof(void *));
666 *slot = raw;
667 return (void *)aligned;
668}
669 
670void *pal_aligned_alloc(size_t alignment, size_t size)
671{
672 if (atomic_load(&g_alloc.initialized) == 0) {
673 if (pal_alloc_init_default() != 0) {
674 return NULL;
675 }
676 }
677 return pal_allocator_aligned_alloc(&g_alloc, alignment, size);
678}
679 
680int pal_allocator_posix_memalign(pal_allocator_t *a, void **memptr, size_t alignment, size_t size)
681{
682 if (memptr == NULL) {
683 return 22;
684 }
685 void *p = pal_allocator_aligned_alloc(a, alignment, size);
686 if (p == NULL) {
687 *memptr = NULL;
688 return 12;
689 }
690 *memptr = p;
691 return 0;
692}
693 
694int pal_posix_memalign(void **memptr, size_t alignment, size_t size)
695{
696 if (atomic_load(&g_alloc.initialized) == 0) {
697 if (pal_alloc_init_default() != 0) {
698 return 12;
699 }
700 }
701 return pal_allocator_posix_memalign(&g_alloc, memptr, alignment, size);
702}
703