Zero-copy FTP/HTTP Daemon compatible with all POSIX systems
| 1 | /* |
| 2 | MIT License |
| 3 | |
| 4 | Copyright (c) 2026 Seregon |
| 5 | |
| 6 | Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | of this software and associated documentation files (the "Software"), to deal |
| 8 | in the Software without restriction, including without limitation the rights |
| 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | copies of the Software, and to permit persons to whom the Software is |
| 11 | furnished to do so, subject to the following conditions: |
| 12 | |
| 13 | The above copyright notice and this permission notice shall be included in all |
| 14 | copies or substantial portions of the Software. |
| 15 | |
| 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | SOFTWARE. |
| 23 | */ |
| 24 | |
| 25 | /** |
| 26 | * @file pal_fileio.c |
| 27 | * @brief Platform Abstraction Layer - File I/O Implementation |
| 28 | * |
| 29 | * @author SeregonWar |
| 30 | * @version 1.0.0 |
| 31 | * @date 2026-02-13 |
| 32 | * |
| 33 | */ |
| 34 | |
| 35 | #include "pal_fileio.h" |
| 36 | #include "ftp_log.h" |
| 37 | #include "pal_alloc.h" |
| 38 | #include "pal_network.h" |
| 39 | #include <dirent.h> |
| 40 | #include <errno.h> |
| 41 | #include <fcntl.h> |
| 42 | #include <stdatomic.h> |
| 43 | #include <stdio.h> |
| 44 | #include <string.h> |
| 45 | #include <sys/socket.h> |
| 46 | #include <sys/statvfs.h> |
| 47 | #include <unistd.h> |
| 48 | |
| 49 | /* Fallback buffer size for non-sendfile platforms */ |
| 50 | #define FALLBACK_BUFFER_SIZE FTP_BUFFER_SIZE |
| 51 | |
| 52 | /* |
| 53 | * PAL_FILE_WRITE_CHUNK_MAX is defined in pal_fileio.h. |
| 54 | * This fallback guard protects against edge cases where a transitive include |
| 55 | * of pal_fileio.h fires before our own #include above (e.g. via ftp_types.h), |
| 56 | * causing the header guard to suppress the macro on the second pass. |
| 57 | */ |
| 58 | #ifndef PAL_FILE_WRITE_CHUNK_MAX |
| 59 | # if defined(PLATFORM_PS5) || defined(PS5) |
| 60 | /* |
| 61 | * 1 MB — matches FTP_BUFFER_SIZE and ftpsrv's IO_COPY_BUFSIZE. |
| 62 | * |
| 63 | * The original 128 KB limit existed to keep per-chunk PFS write latency |
| 64 | * under ~5 ms in the STOR double-buffer path, preventing the recv thread |
| 65 | * from stalling the TCP window. On PS5 the double-buffer path is |
| 66 | * explicitly disabled (buf1 = NULL forced in cmd_STOR), so this cap is |
| 67 | * never protective — it only creates 8× 128 KB writes per 1 MB recv: |
| 68 | * 8 × 5 ms = 40 ms/MB → 25 MB/s (observed bottleneck) |
| 69 | * |
| 70 | * With 1 MB writes matching the recv buffer size: |
| 71 | * 1 × ~9 ms = ~115 MB/s (matches ftpsrv on /data) |
| 72 | * |
| 73 | * Safety: FTP_TCP_RCVBUF = 4 MB absorbs all in-flight network data |
| 74 | * during the single write() call, so no TCP zero-window stall occurs. |
| 75 | */ |
| 76 | # define PAL_FILE_WRITE_CHUNK_MAX 1048576U /* 1 MB — PS5: double-buf disabled, no TCP stall risk */ |
| 77 | # elif defined(PLATFORM_PS4) || defined(PS4) |
| 78 | # define PAL_FILE_WRITE_CHUNK_MAX 65536U /* 64 KB — PS4: keep original; HDD write latency higher */ |
| 79 | # else |
| 80 | # define PAL_FILE_WRITE_CHUNK_MAX 262144U /* 256 KB */ |
| 81 | # endif |
| 82 | #endif |
| 83 | |
| 84 | /* Max recursion depth for cross-device directory move */ |
| 85 | #define PAL_MOVE_MAX_DEPTH 64U |
| 86 | |
| 87 | /* |
| 88 | * PAL_FILE_COPY_BUFFER_SIZE — per-buffer size for the copy pipeline. |
| 89 | * |
| 90 | * PS5 USB→NVMe throughput analysis (exFAT → PFS, 12 GB file): |
| 91 | * |
| 92 | * Observed serial throughput: 135 MB/s |
| 93 | * Target pipelined throughput: 215 MB/s |
| 94 | * |
| 95 | * USB exFAT sequential read: ~363 MB/s |
| 96 | * NVMe PFS sequential write: ~215 MB/s |
| 97 | * |
| 98 | * Serial model: 1/(1/363 + 1/215) = 135 MB/s ✓ matches observation |
| 99 | * Pipelined: min(363, 215) = 215 MB/s ✓ matches target |
| 100 | * |
| 101 | * PS4 USB→HDD/SSD throughput analysis: |
| 102 | * |
| 103 | * USB exFAT sequential read: ~320 MB/s |
| 104 | * HDD 5400 RPM PFS write: ~85 MB/s → serial ~67 MB/s, pipelined 85 |
| 105 | * MB/s (+27%) SSD aftermarket PFS write: ~175 MB/s → serial ~113 MB/s, |
| 106 | * pipelined 175 MB/s (+55%) |
| 107 | * |
| 108 | * 1 MB buffers are sufficient on PS4: T_read(1MB) = 3.3 ms is well under |
| 109 | * T_write_HDD(1MB) = 12.3 ms, so the reader always finishes before the |
| 110 | * writer and the pipeline never stalls. 4 MB buffers add memory pressure |
| 111 | * (PS4 daemon budget ~78 MB) with no throughput gain. |
| 112 | * |
| 113 | * The serial read+write loop leaves one device idle during every cycle. |
| 114 | * A double-buffer pipeline overlaps USB reads with NVMe/HDD writes fully, |
| 115 | * recovering the throughput gap on both platforms. |
| 116 | * |
| 117 | * IMPORTANT: the copy path writes the FULL buffer in a single write() |
| 118 | * call, bypassing pal_file_write_all()'s PAL_WRITE_CHUNK_MAX subdivision. |
| 119 | * That limit (128 KB on PS5, 64 KB on PS4) exists to prevent TCP stalls |
| 120 | * in cmd_STOR; it has no relevance for a file-to-file copy. |
| 121 | */ |
| 122 | #ifndef PAL_FILE_COPY_BUFFER_SIZE |
| 123 | #if defined(PLATFORM_PS5) |
| 124 | #define PAL_FILE_COPY_BUFFER_SIZE \ |
| 125 | (4U * 1024U * \ |
| 126 | 1024U) /* 4 MB — NVMe ~215 MB/s, T_write=19ms covers T_read=12ms */ |
| 127 | #elif defined(PLATFORM_PS4) |
| 128 | #define PAL_FILE_COPY_BUFFER_SIZE \ |
| 129 | (1024U * 1024U) /* 1 MB — HDD ~85 MB/s, T_write=12ms covers T_read=3ms */ |
| 130 | #else |
| 131 | #define PAL_FILE_COPY_BUFFER_SIZE (4U * 1024U * 1024U) /* 4 MB */ |
| 132 | #endif |
| 133 | #endif |
| 134 | |
| 135 | /*---------------------------------------------------------------------------* |
| 136 | * DOUBLE-BUFFER COPY PIPELINE (PS4 and PS5) |
| 137 | * |
| 138 | * Two buffers alternate between a reader thread (USB exFAT) and the |
| 139 | * calling thread (NVMe/HDD PFS writer), fully overlapping I/O: |
| 140 | * |
| 141 | * Cycle N: [read buf A from USB] [write buf B to storage] |
| 142 | * Cycle N+1: [read buf B from USB] [write buf A to storage] |
| 143 | * |
| 144 | * PS5: writer (NVMe, ~215 MB/s) is the bottleneck; reader (USB, ~363 MB/s) |
| 145 | * always finishes first. Net throughput: 215 MB/s. |
| 146 | * PS4: writer (HDD, ~85 MB/s) is the bottleneck; reader (USB, ~320 MB/s) |
| 147 | * always finishes first. Net throughput: ~85 MB/s (vs ~67 MB/s serial). |
| 148 | * |
| 149 | * Thread roles: |
| 150 | * Main thread — writer: drains the filled buffer to dst_fd, calls |
| 151 | * the progress callback, signals cv_free. |
| 152 | * Reader thread — reads from src_fd into the free buffer, signals |
| 153 | * cv_ready when a full chunk is available. |
| 154 | * |
| 155 | * State machine (protected by pipe_mtx): |
| 156 | * fill_idx — buffer currently being read into (0 or 1) |
| 157 | * drain_idx — buffer currently being written out (1-fill_idx) |
| 158 | * len[i] — bytes available in buffer i (0 = free, >0 = ready) |
| 159 | * done — reader has hit EOF or error |
| 160 | * reader_err — non-zero errno set by reader on error |
| 161 | *---------------------------------------------------------------------------*/ |
| 162 | #include <pthread.h> |
| 163 | |
| 164 | typedef struct { |
| 165 | uint8_t *buf[2]; /* two PAL_FILE_COPY_BUFFER_SIZE buffers */ |
| 166 | size_t len[2]; /* bytes filled in each buffer (0 = free) */ |
| 167 | int fill_idx; /* index reader is filling right now */ |
| 168 | int src_fd; /* source file descriptor */ |
| 169 | size_t buf_sz; /* PAL_FILE_COPY_BUFFER_SIZE */ |
| 170 | int done; /* reader set to 1 on EOF or error */ |
| 171 | int reader_err; /* errno from reader (0 = ok) */ |
| 172 | pthread_mutex_t mtx; |
| 173 | pthread_cond_t cv_ready; /* writer waits: "buffer filled and ready" */ |
| 174 | pthread_cond_t cv_free; /* reader waits: "buffer drained and free" */ |
| 175 | } copy_pipe_t; |
| 176 | |
| 177 | static void *copy_reader_thread(void *arg) { |
| 178 | copy_pipe_t *p = (copy_pipe_t *)arg; |
| 179 | |
| 180 | pthread_mutex_lock(&p->mtx); |
| 181 | for (;;) { |
| 182 | int fi = p->fill_idx; |
| 183 | |
| 184 | /* Wait until the fill buffer is free (len[fi] == 0) */ |
| 185 | while ((p->len[fi] != 0U) && (p->done == 0)) { |
| 186 | pthread_cond_wait(&p->cv_free, &p->mtx); |
| 187 | } |
| 188 | if (p->done != 0) { |
| 189 | break; /* main thread requested stop (write error or cancel) */ |
| 190 | } |
| 191 | |
| 192 | pthread_mutex_unlock(&p->mtx); |
| 193 | |
| 194 | /* Read without holding the lock — this is the slow USB read */ |
| 195 | ssize_t n; |
| 196 | do { |
| 197 | n = read(p->src_fd, p->buf[fi], p->buf_sz); |
| 198 | } while ((n < 0) && (errno == EINTR)); |
| 199 | |
| 200 | pthread_mutex_lock(&p->mtx); |
| 201 | |
| 202 | if (n < 0) { |
| 203 | p->reader_err = errno; |
| 204 | p->done = 1; |
| 205 | pthread_cond_signal(&p->cv_ready); |
| 206 | break; |
| 207 | } |
| 208 | if (n == 0) { |
| 209 | /* EOF */ |
| 210 | p->done = 1; |
| 211 | pthread_cond_signal(&p->cv_ready); |
| 212 | break; |
| 213 | } |
| 214 | |
| 215 | p->len[fi] = (size_t)n; |
| 216 | p->fill_idx = 1 - fi; /* swap to the other buffer */ |
| 217 | pthread_cond_signal(&p->cv_ready); |
| 218 | } |
| 219 | pthread_mutex_unlock(&p->mtx); |
| 220 | return NULL; |
| 221 | } |
| 222 | |
| 223 | /*===========================================================================* |
| 224 | * ZERO-COPY FILE TRANSFER |
| 225 | *===========================================================================*/ |
| 226 | |
| 227 | /** |
| 228 | * @brief Send file data via socket (zero-copy) |
| 229 | * |
| 230 | * DESIGN RATIONALE: |
| 231 | * - sendfile() eliminates userspace copy (kernel-direct transfer) |
| 232 | * - Fallback to buffered I/O maintains portability |
| 233 | * - Performance difference: 2-3x throughput improvement |
| 234 | */ |
| 235 | ssize_t pal_sendfile(int sock_fd, int file_fd, off_t *offset, size_t count) { |
| 236 | /* Validate parameters */ |
| 237 | if ((sock_fd < 0) || (file_fd < 0)) { |
| 238 | errno = EINVAL; |
| 239 | return -1; |
| 240 | } |
| 241 | |
| 242 | if ((offset == NULL) || (*offset < 0)) { |
| 243 | errno = EINVAL; |
| 244 | return -1; |
| 245 | } |
| 246 | |
| 247 | if (count == 0U) { |
| 248 | return 0; |
| 249 | } |
| 250 | |
| 251 | /* |
| 252 | * NOTE: PAL_FILE_WRITE_CHUNK_MAX is intentionally NOT applied here. |
| 253 | * |
| 254 | * That cap (128 KB on PS5, 64 KB on PS4) exists to keep per-chunk write |
| 255 | * latency under ~5 ms so the FTP double-buffer producer never starves the |
| 256 | * kernel TCP recv-buffer long enough to trigger a client inactivity timeout. |
| 257 | * It is an FTP protocol concern, not a platform abstraction concern. |
| 258 | * |
| 259 | * Enforcing it here silently degraded HTTP /api/download throughput from |
| 260 | * ~300 MB/s to ~240 MB/s: the HTTP server passes 1 MB chunks but each call |
| 261 | * was internally cut to 128 KB, generating 8x more syscalls per MB. |
| 262 | * |
| 263 | * The FTP caller (ftp_commands.c cmd_RETR) now passes pre-capped chunks |
| 264 | * using PAL_FILE_WRITE_CHUNK_MAX directly, so the limit is preserved where |
| 265 | * it matters without penalising other callers. |
| 266 | */ |
| 267 | |
| 268 | #if defined(__linux__) |
| 269 | /* |
| 270 | * Linux sendfile(2) |
| 271 | * Signature: sendfile(out_fd, in_fd, offset, count) |
| 272 | */ |
| 273 | ssize_t result = sendfile(sock_fd, file_fd, offset, count); |
| 274 | return result; |
| 275 | |
| 276 | #elif defined(__FreeBSD__) || defined(PLATFORM_PS4) || defined(PLATFORM_PS5) |
| 277 | /* |
| 278 | * FreeBSD sendfile(2) |
| 279 | * Signature: sendfile(in_fd, out_fd, offset, count, hdtr, sbytes, flags) |
| 280 | * |
| 281 | * IMPORTANT: Different parameter order than Linux! |
| 282 | */ |
| 283 | off_t sbytes = 0; |
| 284 | off_t start_offset = *offset; |
| 285 | |
| 286 | int ret = sendfile(file_fd, sock_fd, start_offset, count, NULL, &sbytes, 0); |
| 287 | |
| 288 | /* |
| 289 | * Update offset by bytes actually sent. |
| 290 | * FreeBSD updates sbytes even on EAGAIN (partial send). |
| 291 | */ |
| 292 | if (sbytes > 0) { |
| 293 | *offset += sbytes; |
| 294 | } |
| 295 | |
| 296 | if (ret == 0) { |
| 297 | /* Success: all bytes sent */ |
| 298 | return sbytes; |
| 299 | } else if ((ret == -1) && (errno == EAGAIN)) { |
| 300 | /* Non-blocking socket: partial send — caller retries */ |
| 301 | return sbytes; |
| 302 | } else if ((ret == -1) && (errno == EINTR)) { |
| 303 | /* |
| 304 | * Interrupted by signal mid-transfer. |
| 305 | * |
| 306 | * IMPORTANT: *offset has already been advanced by sbytes above. |
| 307 | * Returning -1 here would cause the caller to retry from the |
| 308 | * old offset, re-sending bytes already transmitted — file |
| 309 | * corruption guaranteed. |
| 310 | * |
| 311 | * Return sbytes (>= 0) so the caller advances its own position |
| 312 | * and retries from the correct point. |
| 313 | */ |
| 314 | return sbytes; /* may be 0 if interrupted before any byte sent */ |
| 315 | } else if ((ret == -1) && ((errno == EIO) || (errno == ESTALE) || |
| 316 | (errno == EBADF) || (errno == EFAULT))) { |
| 317 | /* |
| 318 | * Fatal storage-level error during transfer. |
| 319 | * |
| 320 | * EIO : underlying device I/O error (USB read failure, NVMe fault) |
| 321 | * ESTALE : stale vnode — filesystem unmounted while transfer was in flight |
| 322 | * EBADF : fd invalidated — should never reach here, but guard anyway |
| 323 | * EFAULT : kernel memory fault — extremely rare; never retry |
| 324 | * |
| 325 | * CRITICAL: ALWAYS return -1 here, regardless of sbytes. |
| 326 | * |
| 327 | * Previous behaviour returned sbytes when sbytes > 0, signalling |
| 328 | * a "partial success" to the caller. This caused the caller's loop |
| 329 | * to decrement its remaining-byte counter and call pal_sendfile() |
| 330 | * again on the same (now-corrupted/unmounted) vnode. On PS5/PS4 |
| 331 | * that second call can trigger an unrecoverable kernel panic. |
| 332 | * |
| 333 | * Safety: *offset has already been advanced by sbytes above, so no |
| 334 | * data is double-sent. The caller sees -1, breaks out of its loop, |
| 335 | * and closes the connection cleanly — which is the correct behaviour. |
| 336 | * |
| 337 | * FTP callers (ftp_commands.c): they detect sent < 0, check errno, |
| 338 | * and skip the EAGAIN retry loop (see the EIO guard there), falling |
| 339 | * through to the read()-based cooldown path without any sendfile retry. |
| 340 | */ |
| 341 | return -1; |
| 342 | |
| 343 | } else { |
| 344 | /* |
| 345 | * EINVAL / ENOSYS / ENXIO / other unexpected sendfile() error. |
| 346 | * |
| 347 | * On PS5 and PS4, sendfile(2) is not supported for every file+socket |
| 348 | * combination. Known cases: |
| 349 | * - exFAT / FAT32 USB drives: sendfile can return EINVAL because |
| 350 | * the kernel vnode driver for exFAT does not implement the |
| 351 | * sendfile vnode operation. |
| 352 | * - Special pseudo-files (pipes, device nodes, etc.) |
| 353 | * |
| 354 | * If sbytes == 0 (sendfile failed before sending a single byte) we |
| 355 | * transparently fall back to pread(2) + pal_send_all(). The caller |
| 356 | * never notices the switch — it just receives the expected data. |
| 357 | * |
| 358 | * If sbytes > 0 (sendfile sent some data then failed with EINVAL, |
| 359 | * which is unusual but possible) we continue the fallback from the |
| 360 | * already-advanced *offset so no bytes are skipped or duplicated. |
| 361 | * |
| 362 | * IMPORTANT: this fallback MUST NOT be used for EIO/ESTALE — those |
| 363 | * errors indicate the underlying storage is gone and pread() will |
| 364 | * either also fail or return stale data from an inconsistent vnode. |
| 365 | * They are handled in the branch above. |
| 366 | */ |
| 367 | static _Thread_local char fb_buf[FALLBACK_BUFFER_SIZE]; |
| 368 | |
| 369 | size_t fb_remaining = count - (size_t)sbytes; |
| 370 | |
| 371 | while (fb_remaining > 0U) { |
| 372 | size_t chunk = (fb_remaining < (size_t)FALLBACK_BUFFER_SIZE) |
| 373 | ? fb_remaining |
| 374 | : (size_t)FALLBACK_BUFFER_SIZE; |
| 375 | |
| 376 | ssize_t nread = pread(file_fd, fb_buf, chunk, *offset); |
| 377 | if (nread <= 0) { |
| 378 | /* EOF or read error — return what we managed to send */ |
| 379 | return (sbytes > 0) ? sbytes : -1; |
| 380 | } |
| 381 | |
| 382 | ssize_t nsent = pal_send_all(sock_fd, fb_buf, (size_t)nread, 0); |
| 383 | if (nsent < 0) { |
| 384 | return (sbytes > 0) ? sbytes : -1; |
| 385 | } |
| 386 | |
| 387 | *offset += (off_t)nsent; |
| 388 | sbytes += (off_t)nsent; |
| 389 | fb_remaining -= (size_t)nsent; |
| 390 | } |
| 391 | |
| 392 | return sbytes; |
| 393 | } |
| 394 | |
| 395 | #else |
| 396 | /* |
| 397 | * Fallback: Buffered read/write |
| 398 | * Used on platforms without sendfile() support |
| 399 | */ |
| 400 | static _Thread_local char buffer[FALLBACK_BUFFER_SIZE]; |
| 401 | |
| 402 | /* Read from file at specified offset */ |
| 403 | ssize_t nread = pread( |
| 404 | file_fd, buffer, |
| 405 | (count < FALLBACK_BUFFER_SIZE) ? count : FALLBACK_BUFFER_SIZE, *offset); |
| 406 | if (nread <= 0) { |
| 407 | return nread; /* EOF or error */ |
| 408 | } |
| 409 | |
| 410 | /* Send via socket */ |
| 411 | ssize_t nsent = pal_send_all(sock_fd, buffer, (size_t)nread, 0); |
| 412 | if (nsent < 0) { |
| 413 | return -1; |
| 414 | } |
| 415 | *offset += nsent; |
| 416 | return nsent; |
| 417 | #endif |
| 418 | } |
| 419 | |
| 420 | /*===========================================================================* |
| 421 | * FILE OPERATIONS |
| 422 | *===========================================================================*/ |
| 423 | |
| 424 | static ftp_error_t |
| 425 | pal_file_copy_atomic_ex(const char *src_path, const char *dst_path, |
| 426 | pal_copy_progress_cb_t cb, void *user_data, |
| 427 | uint64_t *cumulative, int *out_errno) { |
| 428 | if ((src_path == NULL) || (dst_path == NULL)) { |
| 429 | return FTP_ERR_INVALID_PARAM; |
| 430 | } |
| 431 | |
| 432 | struct stat st; |
| 433 | if (stat(src_path, &st) < 0) { |
| 434 | switch (errno) { |
| 435 | case ENOENT: |
| 436 | return FTP_ERR_NOT_FOUND; |
| 437 | case EACCES: |
| 438 | case EPERM: |
| 439 | return FTP_ERR_PERMISSION; |
| 440 | default: |
| 441 | return FTP_ERR_FILE_STAT; |
| 442 | } |
| 443 | } |
| 444 | |
| 445 | if ((st.st_mode & S_IFMT) != S_IFREG) { |
| 446 | return FTP_ERR_INVALID_PARAM; |
| 447 | } |
| 448 | |
| 449 | int src_fd = -1; |
| 450 | int dst_fd = -1; |
| 451 | uint8_t *copy_buf = NULL; /* heap-allocated; freed in cleanup */ |
| 452 | ftp_error_t out_err = FTP_ERR_FILE_WRITE; |
| 453 | |
| 454 | /* |
| 455 | * TEMP FILENAME STRATEGY — safe for exFAT 255-char name limit |
| 456 | * |
| 457 | * Old: "<dst_path>.zftpd-tmp-<pid>-<counter>" (appends ~25 chars) |
| 458 | * New: "<dst_dir>/.zftpd.<pid>.<counter>.tmp" (fixed short name) |
| 459 | * |
| 460 | * The old scheme overflows when the destination filename already |
| 461 | * approaches 255 characters (common with PS5 game directories). |
| 462 | * The new scheme puts a short, fixed-length temp file in the same |
| 463 | * directory so the final same-FS rename() is always valid. |
| 464 | */ |
| 465 | static atomic_uint_fast32_t g_tmp_counter = ATOMIC_VAR_INIT(0U); |
| 466 | uint_fast32_t counter = atomic_fetch_add(&g_tmp_counter, 1U); |
| 467 | |
| 468 | /* Find parent directory of dst_path */ |
| 469 | char tmp_path[FTP_PATH_MAX]; |
| 470 | const char *last_slash = strrchr(dst_path, '/'); |
| 471 | if (last_slash != NULL) { |
| 472 | size_t dir_len = (size_t)(last_slash - dst_path); |
| 473 | int n = snprintf(tmp_path, sizeof(tmp_path), "%.*s/.zftpd.%lu.%lu.tmp", |
| 474 | (int)dir_len, dst_path, (unsigned long)getpid(), |
| 475 | (unsigned long)counter); |
| 476 | if ((n < 0) || ((size_t)n >= sizeof(tmp_path))) { |
| 477 | return FTP_ERR_PATH_TOO_LONG; |
| 478 | } |
| 479 | } else { |
| 480 | int n = snprintf(tmp_path, sizeof(tmp_path), ".zftpd.%lu.%lu.tmp", |
| 481 | (unsigned long)getpid(), (unsigned long)counter); |
| 482 | if ((n < 0) || ((size_t)n >= sizeof(tmp_path))) { |
| 483 | return FTP_ERR_PATH_TOO_LONG; |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | src_fd = open(src_path, O_RDONLY); |
| 488 | |
| 489 | /* |
| 490 | * SOURCE FD CACHE POLICY — F_NOCACHE + POSIX_FADV_SEQUENTIAL |
| 491 | * |
| 492 | * ┌────────────────────────────────────────────────────────────────────┐ |
| 493 | * │ WHY F_NOCACHE ON THE SOURCE fd? │ |
| 494 | * │ │ |
| 495 | * │ For a sequential file-to-file copy the source pages are read │ |
| 496 | * │ exactly once and never needed again. Without F_NOCACHE, every │ |
| 497 | * │ read() places the just-read pages in the kernel page cache. For │ |
| 498 | * │ a 12 GB game file this fills all available daemon RSS (~3-4 GB │ |
| 499 | * │ on PS5) within the first few seconds of the copy. Once RAM is │ |
| 500 | * │ full the kernel must evict older cache pages on every new read, │ |
| 501 | * │ adding eviction overhead that degrades throughput from 400 MB/s │ |
| 502 | * │ (clean cache) to ~250 MB/s (cache thrashing). │ |
| 503 | * │ │ |
| 504 | * │ F_NOCACHE (FreeBSD equivalent of O_DIRECT) bypasses the page │ |
| 505 | * │ cache for this fd: source blocks are read directly into the │ |
| 506 | * │ pipeline buffer without caching, keeping RAM pressure flat for │ |
| 507 | * │ the entire duration of the copy regardless of file size. │ |
| 508 | * │ │ |
| 509 | * │ SAFETY: │ |
| 510 | * │ • This fd is NEVER passed to sendfile() — pal_file_copy_atomic_ex │ |
| 511 | * │ uses read()+write() loops only. The F_NOCACHE + sendfile() │ |
| 512 | * │ KP concern described in pal_file_open() does NOT apply here. │ |
| 513 | * │ • F_NOCACHE on the SOURCE (read) fd is safe on PS5 PFS even for │ |
| 514 | * │ encrypted /data files: decryption happens inside the kernel │ |
| 515 | * │ at the block layer before the data reaches userspace. │ |
| 516 | * │ • F_NOCACHE is deliberately NOT set on the DESTINATION (write) fd │ |
| 517 | * │ (dst_fd, opened below). The write-side has PFS alignment │ |
| 518 | * │ constraints: F_NOCACHE forces 512-byte-aligned write() calls │ |
| 519 | * │ and OrbisOS PFS returns EINVAL for unaligned writes, breaking │ |
| 520 | * │ the copy. Write caching is beneficial anyway (write-back │ |
| 521 | * │ coalescing helps the NVMe controller build large sequential │ |
| 522 | * │ extents rather than many small random writes). │ |
| 523 | * │ │ |
| 524 | * │ RESULT: sustained 400 MB/s for /data → /mnt/ext1 (USB-C M.2) │ |
| 525 | * │ instead of 400 MB/s for small files / 250 MB/s for 12+ GB files. │ |
| 526 | * └────────────────────────────────────────────────────────────────────┘ |
| 527 | * |
| 528 | * POSIX_FADV_SEQUENTIAL is also set: hints the kernel read-ahead engine |
| 529 | * to prefetch large contiguous extents. On PS5 this doubles read-ahead |
| 530 | * window from the default (128 KB) to 2× the file system block size, |
| 531 | * keeping the pipeline reader thread from stalling on rotational-latency- |
| 532 | * equivalent NVMe seek delays between extents. |
| 533 | */ |
| 534 | if (src_fd >= 0) { |
| 535 | #ifdef F_NOCACHE |
| 536 | /* |
| 537 | * Bypass page cache for source reads. |
| 538 | * Failure is non-fatal: fall through to cached reads (slower but correct). |
| 539 | */ |
| 540 | (void)fcntl(src_fd, F_NOCACHE, 1); |
| 541 | #endif |
| 542 | #ifdef F_RDAHEAD |
| 543 | (void)fcntl(src_fd, F_RDAHEAD, 1); |
| 544 | #endif |
| 545 | #if defined(POSIX_FADV_SEQUENTIAL) && !defined(PLATFORM_PS4) && !defined(PS4) |
| 546 | (void)posix_fadvise(src_fd, 0, 0, POSIX_FADV_SEQUENTIAL); |
| 547 | #endif |
| 548 | } |
| 549 | if (src_fd < 0) { |
| 550 | int e = errno; |
| 551 | { |
| 552 | char msg[256]; |
| 553 | snprintf(msg, sizeof(msg), "[XDEV] open(src) failed: errno=%d path=%s", e, |
| 554 | src_path); |
| 555 | ftp_log_line(FTP_LOG_WARN, msg); |
| 556 | } |
| 557 | if (out_errno != NULL) { |
| 558 | *out_errno = e; |
| 559 | } |
| 560 | switch (e) { |
| 561 | case ENOENT: |
| 562 | out_err = FTP_ERR_NOT_FOUND; |
| 563 | break; |
| 564 | case EACCES: |
| 565 | case EPERM: |
| 566 | out_err = FTP_ERR_PERMISSION; |
| 567 | break; |
| 568 | default: |
| 569 | out_err = FTP_ERR_FILE_OPEN; |
| 570 | break; |
| 571 | } |
| 572 | goto cleanup; |
| 573 | } |
| 574 | |
| 575 | mode_t mode = (mode_t)(st.st_mode & 0777); |
| 576 | |
| 577 | /* |
| 578 | * PRE-FLIGHT SPACE CHECK (PS4/PS5 PFS silent-full workaround) |
| 579 | * |
| 580 | * PFS on /data does not always propagate ENOSPC through write() — it |
| 581 | * can silently return 0, leaving errno==0 and making the failure |
| 582 | * completely opaque. Check available space up front so we can abort |
| 583 | * immediately with a clear diagnostic rather than failing mid-copy |
| 584 | * with errno=0 deep inside the write pipeline. |
| 585 | * |
| 586 | * statvfs() on the DESTINATION directory (not src) gives us the free |
| 587 | * blocks on the target filesystem. We accept up to a ~1 % statvfs |
| 588 | * race (file is being written by someone else), so the check is a |
| 589 | * warning rather than a hard block — we still attempt the copy and |
| 590 | * rely on the write-loop fix to surface ENOSPC if it happens anyway. |
| 591 | */ |
| 592 | { |
| 593 | /* Extract parent directory of dst_path for statvfs */ |
| 594 | char dst_dir[FTP_PATH_MAX]; |
| 595 | const char *dst_slash = strrchr(dst_path, '/'); |
| 596 | if (dst_slash != NULL && dst_slash != dst_path) { |
| 597 | size_t dlen = (size_t)(dst_slash - dst_path); |
| 598 | if (dlen < sizeof(dst_dir)) { |
| 599 | memcpy(dst_dir, dst_path, dlen); |
| 600 | dst_dir[dlen] = '\0'; |
| 601 | } else { |
| 602 | dst_dir[0] = '\0'; /* fallback: skip check */ |
| 603 | } |
| 604 | } else { |
| 605 | dst_dir[0] = '/'; |
| 606 | dst_dir[1] = '\0'; |
| 607 | } |
| 608 | |
| 609 | if (dst_dir[0] != '\0') { |
| 610 | struct statvfs vfs; |
| 611 | if (statvfs(dst_dir, &vfs) == 0) { |
| 612 | uint64_t free_bytes = |
| 613 | (uint64_t)vfs.f_bavail * (uint64_t)vfs.f_frsize; |
| 614 | uint64_t need_bytes = (uint64_t)st.st_size; |
| 615 | if (need_bytes > free_bytes) { |
| 616 | char msg[256]; |
| 617 | snprintf(msg, sizeof(msg), |
| 618 | "[XDEV] pre-flight ENOSPC: need=%llu free=%llu dst=%s", |
| 619 | (unsigned long long)need_bytes, |
| 620 | (unsigned long long)free_bytes, dst_path); |
| 621 | ftp_log_line(FTP_LOG_WARN, msg); |
| 622 | if (out_errno != NULL) { |
| 623 | *out_errno = ENOSPC; |
| 624 | } |
| 625 | return FTP_ERR_FILE_WRITE; |
| 626 | } |
| 627 | } |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | #if defined(PLATFORM_PS4) || defined(PLATFORM_PS5) |
| 632 | dst_fd = open(tmp_path, O_WRONLY | O_CREAT | O_TRUNC, mode); |
| 633 | #else |
| 634 | dst_fd = open(tmp_path, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, mode); |
| 635 | #endif |
| 636 | if (dst_fd < 0) { |
| 637 | int e = errno; |
| 638 | { |
| 639 | char msg[256]; |
| 640 | snprintf(msg, sizeof(msg), "[XDEV] open(tmp) failed: errno=%d path=%s", e, |
| 641 | tmp_path); |
| 642 | ftp_log_line(FTP_LOG_WARN, msg); |
| 643 | } |
| 644 | if (out_errno != NULL) { |
| 645 | *out_errno = e; |
| 646 | } |
| 647 | switch (e) { |
| 648 | case EACCES: |
| 649 | case EPERM: |
| 650 | out_err = FTP_ERR_PERMISSION; |
| 651 | break; |
| 652 | default: |
| 653 | out_err = FTP_ERR_FILE_OPEN; |
| 654 | break; |
| 655 | } |
| 656 | goto cleanup; |
| 657 | } |
| 658 | |
| 659 | /* |
| 660 | * DESIGN RATIONALE — heap vs. static _Thread_local: |
| 661 | * |
| 662 | * static _Thread_local would allocate PAL_FILE_COPY_BUFFER_SIZE (1 MB on |
| 663 | * PS4/PS5) permanently for every thread that ever calls this function, |
| 664 | * for the thread's entire lifetime — even when idle between transfers. |
| 665 | * With N concurrent FTP sessions that means N MB of non-reclaimable RSS. |
| 666 | * |
| 667 | * A single malloc/free per copy call returns the memory immediately after |
| 668 | * the operation, keeping the daemon's footprint minimal when idle. |
| 669 | */ |
| 670 | /*=========================================================================* |
| 671 | * COPY LOOP |
| 672 | * |
| 673 | * PS5: double-buffer pipeline — reader thread (USB) runs in parallel |
| 674 | * with the writer (NVMe), achieving min(USB_bw, NVMe_bw) = 215 MB/s. |
| 675 | * |
| 676 | * Other: simple serial read→write loop. |
| 677 | *=========================================================================*/ |
| 678 | |
| 679 | /*-----------------------------------------------------------------------* |
| 680 | * Double-buffer copy pipeline |
| 681 | * |
| 682 | * Allocate both buffers up-front. If either malloc fails, fall through |
| 683 | * to the serial path (pal_malloc returns NULL gracefully). |
| 684 | *-----------------------------------------------------------------------*/ |
| 685 | { |
| 686 | uint8_t *dbuf0 = (uint8_t *)pal_malloc(PAL_FILE_COPY_BUFFER_SIZE); |
| 687 | uint8_t *dbuf1 = (uint8_t *)pal_malloc(PAL_FILE_COPY_BUFFER_SIZE); |
| 688 | |
| 689 | /* Log arena state immediately after the two allocs so we can correlate |
| 690 | * with SceShellCore heap pressure messages in the system log. */ |
| 691 | { |
| 692 | pal_alloc_stats_t ast; |
| 693 | pal_alloc_get_stats(&ast); |
| 694 | char msg[256]; |
| 695 | snprintf(msg, sizeof(msg), |
| 696 | "[XDEV] pipeline alloc: buf0=%s buf1=%s " |
| 697 | "arena_inuse=%llu peak=%llu failures=%llu file=%s", |
| 698 | (dbuf0 != NULL) ? "ok" : "NULL", |
| 699 | (dbuf1 != NULL) ? "ok" : "NULL", |
| 700 | (unsigned long long)ast.bytes_in_use, |
| 701 | (unsigned long long)ast.bytes_peak, |
| 702 | (unsigned long long)ast.failures, |
| 703 | src_path); |
| 704 | ftp_log_line((dbuf0 && dbuf1) ? FTP_LOG_INFO : FTP_LOG_WARN, msg); |
| 705 | } |
| 706 | |
| 707 | if ((dbuf0 != NULL) && (dbuf1 != NULL)) { |
| 708 | /* Initialise pipeline state */ |
| 709 | copy_pipe_t pipe; |
| 710 | pipe.buf[0] = dbuf0; |
| 711 | pipe.buf[1] = dbuf1; |
| 712 | pipe.len[0] = 0U; |
| 713 | pipe.len[1] = 0U; |
| 714 | pipe.fill_idx = 0; |
| 715 | pipe.src_fd = src_fd; |
| 716 | pipe.buf_sz = (size_t)PAL_FILE_COPY_BUFFER_SIZE; |
| 717 | pipe.done = 0; |
| 718 | pipe.reader_err = 0; |
| 719 | pthread_mutex_init(&pipe.mtx, NULL); |
| 720 | pthread_cond_init(&pipe.cv_ready, NULL); |
| 721 | pthread_cond_init(&pipe.cv_free, NULL); |
| 722 | |
| 723 | pthread_t reader_tid; |
| 724 | int pt_ret = pthread_create(&reader_tid, NULL, copy_reader_thread, &pipe); |
| 725 | int thread_ok = (pt_ret == 0) ? 1 : 0; |
| 726 | |
| 727 | /* Log pthread_create result — on PS4 this can fail with EAGAIN (thread |
| 728 | * limit) or ENOMEM (stack allocation failed under memory pressure). */ |
| 729 | if (thread_ok == 0) { |
| 730 | char msg[256]; |
| 731 | snprintf(msg, sizeof(msg), |
| 732 | "[XDEV] pthread_create failed: errno=%d — " |
| 733 | "falling back to serial copy for %s", |
| 734 | pt_ret, src_path); |
| 735 | ftp_log_line(FTP_LOG_WARN, msg); |
| 736 | } |
| 737 | |
| 738 | if (thread_ok != 0) { |
| 739 | /* Writer loop: drain the buffer that the reader just filled */ |
| 740 | ssize_t written = 0; /* last write result — checked after join */ |
| 741 | int write_errno = 0; /* saved errno from the last failed write(); |
| 742 | * hoisted outside the for loop so it remains |
| 743 | * accessible after break for the post-join log */ |
| 744 | pthread_mutex_lock(&pipe.mtx); |
| 745 | for (;;) { |
| 746 | /* Wait for a filled buffer or EOF/error */ |
| 747 | while ((pipe.len[1 - pipe.fill_idx] == 0U) && (pipe.done == 0)) { |
| 748 | pthread_cond_wait(&pipe.cv_ready, &pipe.mtx); |
| 749 | } |
| 750 | |
| 751 | int drain_idx = 1 - pipe.fill_idx; |
| 752 | size_t nbytes = pipe.len[drain_idx]; |
| 753 | |
| 754 | if ((nbytes == 0U) && (pipe.done != 0)) { |
| 755 | break; /* EOF — pipeline drained */ |
| 756 | } |
| 757 | |
| 758 | pthread_mutex_unlock(&pipe.mtx); |
| 759 | |
| 760 | /* |
| 761 | * Write the full buffer in a single write() call. |
| 762 | * |
| 763 | * We intentionally bypass pal_file_write_all()'s |
| 764 | * PAL_WRITE_CHUNK_MAX=128 KB subdivision here. That limit |
| 765 | * exists to prevent TCP recv-buffer stalls in cmd_STOR; it |
| 766 | * has no relevance for a file-to-file copy. A single 4 MB |
| 767 | * write() is processed by the PFS driver as a sequential |
| 768 | * extent, avoiding per-chunk AES-XTS context setup overhead. |
| 769 | */ |
| 770 | written = 0; |
| 771 | write_errno = 0; /* reset each iteration; saved before mutex lock */ |
| 772 | { |
| 773 | const uint8_t *p_out = pipe.buf[drain_idx]; |
| 774 | size_t remaining = nbytes; |
| 775 | while (remaining > 0U) { |
| 776 | ssize_t w = write(dst_fd, p_out, remaining); |
| 777 | if (w > 0) { |
| 778 | p_out += (size_t)w; |
| 779 | remaining -= (size_t)w; |
| 780 | written += w; |
| 781 | continue; |
| 782 | } |
| 783 | if ((w < 0) && (errno == EINTR)) { |
| 784 | continue; |
| 785 | } |
| 786 | /* |
| 787 | * Save errno NOW — pthread_mutex_lock() below can |
| 788 | * overwrite it on success (POSIX does not guarantee |
| 789 | * errno is untouched on a successful call). |
| 790 | * |
| 791 | * IMPORTANT — w == 0 case (PS4/PS5 PFS quirk): |
| 792 | * POSIX does not define write() returning 0 for a |
| 793 | * positive count on a regular file. On Orbis/Prospero |
| 794 | * PFS, a full filesystem silently returns 0 instead of |
| 795 | * -1 + ENOSPC. Detect this and synthesise ENOSPC so |
| 796 | * the log always shows a meaningful errno value. |
| 797 | */ |
| 798 | write_errno = (w == 0) ? ENOSPC : errno; |
| 799 | written = -1; |
| 800 | break; |
| 801 | } |
| 802 | } |
| 803 | |
| 804 | pthread_mutex_lock(&pipe.mtx); |
| 805 | |
| 806 | if (written < 0) { |
| 807 | /* |
| 808 | * Write error — signal reader to stop, then break. |
| 809 | * We log below after joining the reader thread. |
| 810 | */ |
| 811 | if (out_errno != NULL) { |
| 812 | *out_errno = write_errno; |
| 813 | } |
| 814 | pipe.done = 1; |
| 815 | pthread_cond_signal(&pipe.cv_free); |
| 816 | out_err = FTP_ERR_FILE_WRITE; |
| 817 | break; |
| 818 | } |
| 819 | |
| 820 | /* Report progress */ |
| 821 | if ((cb != NULL) && (cumulative != NULL)) { |
| 822 | *cumulative += (uint64_t)written; |
| 823 | if (cb(*cumulative, user_data) < 0) { |
| 824 | pipe.done = 1; |
| 825 | pthread_cond_signal(&pipe.cv_free); |
| 826 | out_err = FTP_ERR_UNKNOWN; /* cancelled */ |
| 827 | break; |
| 828 | } |
| 829 | } |
| 830 | |
| 831 | /* Mark buffer as free and wake reader */ |
| 832 | pipe.len[drain_idx] = 0U; |
| 833 | pthread_cond_signal(&pipe.cv_free); |
| 834 | } |
| 835 | pthread_mutex_unlock(&pipe.mtx); |
| 836 | |
| 837 | /* Join reader; collect any read-side error */ |
| 838 | (void)pthread_join(reader_tid, NULL); |
| 839 | |
| 840 | /* |
| 841 | * Post-join result resolution. |
| 842 | * |
| 843 | * Priority: write error > read error > cancellation > success. |
| 844 | * |
| 845 | * IMPORTANT: do NOT use `out_err` to detect write failures here. |
| 846 | * `out_err` is initialised to FTP_ERR_FILE_WRITE (line ~376) and |
| 847 | * is only changed during the loop for cancellation (FTP_ERR_UNKNOWN) |
| 848 | * or left at its initial value on both success AND write failure. |
| 849 | * Using it as a success/failure discriminator therefore confuses the |
| 850 | * two cases and causes a successful pipeline copy to return |
| 851 | * FTP_ERR_FILE_WRITE. Use `written < 0` instead — it is set to -1 |
| 852 | * exclusively by the write-error break path. |
| 853 | */ |
| 854 | if (written < 0) { |
| 855 | /* write() failed — write_errno was captured before mutex lock */ |
| 856 | char msg[256]; |
| 857 | snprintf(msg, sizeof(msg), "[COPY] write failed: errno=%d dst=%s", |
| 858 | write_errno, dst_path); |
| 859 | ftp_log_line(FTP_LOG_WARN, msg); |
| 860 | if (out_errno != NULL) { |
| 861 | *out_errno = write_errno; |
| 862 | } |
| 863 | out_err = FTP_ERR_FILE_WRITE; |
| 864 | } else if (pipe.reader_err != 0) { |
| 865 | char msg[256]; |
| 866 | snprintf(msg, sizeof(msg), "[COPY] read failed: errno=%d src=%s", |
| 867 | pipe.reader_err, src_path); |
| 868 | ftp_log_line(FTP_LOG_WARN, msg); |
| 869 | if (out_errno != NULL) { |
| 870 | *out_errno = pipe.reader_err; |
| 871 | } |
| 872 | out_err = FTP_ERR_FILE_READ; |
| 873 | } else if (out_err == FTP_ERR_UNKNOWN) { |
| 874 | /* cancelled by progress callback — out_err already set */ |
| 875 | } else { |
| 876 | /* Pipeline completed successfully */ |
| 877 | out_err = FTP_OK; |
| 878 | } |
| 879 | } else { |
| 880 | /* pthread_create failed — fall through to serial path below */ |
| 881 | out_err = FTP_ERR_FILE_WRITE; /* will be overwritten by serial path */ |
| 882 | } |
| 883 | |
| 884 | pthread_mutex_destroy(&pipe.mtx); |
| 885 | pthread_cond_destroy(&pipe.cv_ready); |
| 886 | pthread_cond_destroy(&pipe.cv_free); |
| 887 | |
| 888 | pal_free(dbuf0); |
| 889 | pal_free(dbuf1); |
| 890 | |
| 891 | if (thread_ok != 0) { |
| 892 | /* Pipeline ran (success or failure) — skip serial fallback */ |
| 893 | if (out_err != FTP_OK) { |
| 894 | goto cleanup; |
| 895 | } |
| 896 | goto copy_done; |
| 897 | } |
| 898 | /* thread_ok == 0: fall through to serial path (already logged above) */ |
| 899 | } else { |
| 900 | /* One or both malloc failed — free whichever succeeded and fall through */ |
| 901 | { |
| 902 | char msg[256]; |
| 903 | snprintf(msg, sizeof(msg), |
| 904 | "[XDEV] pipeline malloc failed (buf0=%s buf1=%s) — " |
| 905 | "falling back to serial copy for %s", |
| 906 | (dbuf0 != NULL) ? "ok" : "NULL", |
| 907 | (dbuf1 != NULL) ? "ok" : "NULL", |
| 908 | src_path); |
| 909 | ftp_log_line(FTP_LOG_WARN, msg); |
| 910 | } |
| 911 | pal_free(dbuf0); |
| 912 | pal_free(dbuf1); |
| 913 | } |
| 914 | } |
| 915 | /* --- Serial fallback (malloc failure or pthread_create failure) --- */ |
| 916 | |
| 917 | { |
| 918 | char msg[256]; |
| 919 | snprintf(msg, sizeof(msg), |
| 920 | "[XDEV] serial copy starting: file_size=%llu buf=%u src=%s", |
| 921 | (unsigned long long)st.st_size, |
| 922 | (unsigned)PAL_FILE_COPY_BUFFER_SIZE, |
| 923 | src_path); |
| 924 | ftp_log_line(FTP_LOG_INFO, msg); |
| 925 | } |
| 926 | |
| 927 | copy_buf = (uint8_t *)pal_malloc(PAL_FILE_COPY_BUFFER_SIZE); |
| 928 | if (copy_buf == NULL) { |
| 929 | { |
| 930 | pal_alloc_stats_t ast; |
| 931 | pal_alloc_get_stats(&ast); |
| 932 | char msg[256]; |
| 933 | snprintf(msg, sizeof(msg), |
| 934 | "[XDEV] serial malloc failed: arena_inuse=%llu peak=%llu " |
| 935 | "failures=%llu buf_needed=%u src=%s", |
| 936 | (unsigned long long)ast.bytes_in_use, |
| 937 | (unsigned long long)ast.bytes_peak, |
| 938 | (unsigned long long)ast.failures, |
| 939 | (unsigned)PAL_FILE_COPY_BUFFER_SIZE, |
| 940 | src_path); |
| 941 | ftp_log_line(FTP_LOG_WARN, msg); |
| 942 | } |
| 943 | out_err = FTP_ERR_OUT_OF_MEMORY; |
| 944 | goto cleanup; |
| 945 | } |
| 946 | |
| 947 | { |
| 948 | uint64_t serial_written = 0U; |
| 949 | uint64_t serial_last_log = 0U; |
| 950 | /* Log every 64 MB so we can see how far the copy got before failing */ |
| 951 | const uint64_t LOG_INTERVAL = 64U * 1024U * 1024U; |
| 952 | |
| 953 | for (;;) { |
| 954 | ssize_t r = read(src_fd, copy_buf, (size_t)PAL_FILE_COPY_BUFFER_SIZE); |
| 955 | if (r > 0) { |
| 956 | /* |
| 957 | * Write the full buffer in a single write() loop — do NOT call |
| 958 | * pal_file_write_all(), which subdivides writes into |
| 959 | * PAL_FILE_WRITE_CHUNK_MAX (128 KB on PS5, 64 KB on PS4). |
| 960 | * |
| 961 | * That limit exists exclusively to prevent TCP recv-buffer stalls |
| 962 | * in cmd_STOR. It has no relevance for a file-to-file copy: large |
| 963 | * single write() calls let the USB exFAT driver allocate contiguous |
| 964 | * extents and avoid per-chunk FAT chain updates, which is the same |
| 965 | * rationale used by the double-buffer pipeline above. |
| 966 | * |
| 967 | * On PS5 (NVMe PFS, ~215 MB/s write bandwidth), using 128 KB chunks |
| 968 | * instead of 4 MB chunks causes ~32x more write() syscalls per buffer, |
| 969 | * each with its own AES-XTS context setup in the kernel VFS layer. |
| 970 | * This is the primary cause of the observed 130 MB/s cap vs the |
| 971 | * 230 MB/s target. |
| 972 | * |
| 973 | * PS4/PS5 PFS quirk: a full filesystem silently returns write() == 0 |
| 974 | * instead of -1 + ENOSPC. Detect and synthesise ENOSPC so the log |
| 975 | * always shows a meaningful errno value. |
| 976 | */ |
| 977 | { |
| 978 | const uint8_t *p_wr = copy_buf; |
| 979 | size_t rem_wr = (size_t)r; |
| 980 | int write_ok = 1; |
| 981 | int write_errno = 0; |
| 982 | |
| 983 | while (rem_wr > 0U) { |
| 984 | ssize_t w = write(dst_fd, p_wr, rem_wr); |
| 985 | if (w > 0) { |
| 986 | p_wr += (size_t)w; |
| 987 | rem_wr -= (size_t)w; |
| 988 | continue; |
| 989 | } |
| 990 | if ((w < 0) && (errno == EINTR)) { |
| 991 | continue; |
| 992 | } |
| 993 | /* |
| 994 | * w == 0: PS4/PS5 PFS silent ENOSPC — synthesise the real errno |
| 995 | * so the caller and log see a meaningful error code. |
| 996 | */ |
| 997 | write_errno = (w == 0) ? ENOSPC : errno; |
| 998 | write_ok = 0; |
| 999 | break; |
| 1000 | } |
| 1001 | |
| 1002 | if (write_ok == 0) { |
| 1003 | { |
| 1004 | char msg[256]; |
| 1005 | snprintf(msg, sizeof(msg), |
| 1006 | "[XDEV] write failed: errno=%d written_so_far=%llu " |
| 1007 | "file_size=%llu dst=%s", |
| 1008 | write_errno, |
| 1009 | (unsigned long long)serial_written, |
| 1010 | (unsigned long long)st.st_size, |
| 1011 | dst_path); |
| 1012 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1013 | if (out_errno != NULL) { |
| 1014 | *out_errno = write_errno; |
| 1015 | } |
| 1016 | } |
| 1017 | out_err = FTP_ERR_FILE_WRITE; |
| 1018 | goto cleanup; |
| 1019 | } |
| 1020 | } |
| 1021 | serial_written += (uint64_t)r; |
| 1022 | |
| 1023 | /* Periodic progress log */ |
| 1024 | if (serial_written - serial_last_log >= LOG_INTERVAL) { |
| 1025 | serial_last_log = serial_written; |
| 1026 | char msg[256]; |
| 1027 | snprintf(msg, sizeof(msg), |
| 1028 | "[XDEV] serial progress: %llu / %llu bytes (%.1f%%) dst=%s", |
| 1029 | (unsigned long long)serial_written, |
| 1030 | (unsigned long long)st.st_size, |
| 1031 | (st.st_size > 0) |
| 1032 | ? (100.0 * (double)serial_written / (double)st.st_size) |
| 1033 | : 0.0, |
| 1034 | dst_path); |
| 1035 | ftp_log_line(FTP_LOG_INFO, msg); |
| 1036 | } |
| 1037 | |
| 1038 | /* Report progress to caller; check for cancellation */ |
| 1039 | if ((cb != NULL) && (cumulative != NULL)) { |
| 1040 | *cumulative += (uint64_t)r; |
| 1041 | if (cb(*cumulative, user_data) < 0) { |
| 1042 | out_err = FTP_ERR_UNKNOWN; /* cancelled */ |
| 1043 | goto cleanup; |
| 1044 | } |
| 1045 | } |
| 1046 | continue; |
| 1047 | } |
| 1048 | if (r == 0) { |
| 1049 | break; |
| 1050 | } |
| 1051 | if (errno == EINTR) { |
| 1052 | continue; |
| 1053 | } |
| 1054 | { |
| 1055 | int e = errno; |
| 1056 | char msg[256]; |
| 1057 | snprintf(msg, sizeof(msg), |
| 1058 | "[XDEV] read failed: errno=%d written_so_far=%llu src=%s", |
| 1059 | e, (unsigned long long)serial_written, src_path); |
| 1060 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1061 | if (out_errno != NULL) { |
| 1062 | *out_errno = e; |
| 1063 | } |
| 1064 | } |
| 1065 | out_err = FTP_ERR_FILE_READ; |
| 1066 | goto cleanup; |
| 1067 | } /* end serial for(;;) loop */ |
| 1068 | } /* end serial_written scope */ |
| 1069 | |
| 1070 | out_err = FTP_OK; |
| 1071 | |
| 1072 | copy_done:; |
| 1073 | |
| 1074 | /* |
| 1075 | * POST-COPY CACHE EVICTION — release source file pages from page cache. |
| 1076 | * |
| 1077 | * After the copy pipeline drains the source file, all its pages are still |
| 1078 | * pinned in the page cache (unless F_NOCACHE was effective above). |
| 1079 | * POSIX_FADV_DONTNEED marks them as immediately reclaimable without |
| 1080 | * waiting for memory pressure. |
| 1081 | * |
| 1082 | * This matters most when: |
| 1083 | * (a) F_NOCACHE is unavailable (non-PS4/PS5 platforms) |
| 1084 | * (b) F_NOCACHE failed silently (fcntl returned -1) |
| 1085 | * (c) The src filesystem does not honour F_NOCACHE (some exFAT builds) |
| 1086 | * |
| 1087 | * On PS5 this keeps the daemon RSS low between consecutive copy operations, |
| 1088 | * so that the page cache arena is fresh for the NEXT file's read pipeline |
| 1089 | * rather than full of the previous file's stale blocks. |
| 1090 | * |
| 1091 | * Safe on both Linux and FreeBSD/PS5: fadvise DONTNEED on a read-only fd |
| 1092 | * simply marks pages as low-priority; it never writes or invalidates data. |
| 1093 | */ |
| 1094 | #if defined(POSIX_FADV_DONTNEED) && !defined(PLATFORM_PS4) && !defined(PS4) |
| 1095 | if (src_fd >= 0) { |
| 1096 | (void)posix_fadvise(src_fd, 0, 0, POSIX_FADV_DONTNEED); |
| 1097 | } |
| 1098 | if (dst_fd >= 0) { |
| 1099 | (void)posix_fadvise(dst_fd, 0, 0, POSIX_FADV_DONTNEED); |
| 1100 | } |
| 1101 | #endif |
| 1102 | |
| 1103 | /* Log completion before the atomic rename so a crash here is diagnosable */ |
| 1104 | { |
| 1105 | char msg[256]; |
| 1106 | snprintf(msg, sizeof(msg), |
| 1107 | "[XDEV] copy complete, renaming tmp -> dst: %s", dst_path); |
| 1108 | ftp_log_line(FTP_LOG_INFO, msg); |
| 1109 | } |
| 1110 | |
| 1111 | if (rename(tmp_path, dst_path) < 0) { |
| 1112 | { |
| 1113 | int e = errno; |
| 1114 | char msg[256]; |
| 1115 | snprintf(msg, sizeof(msg), |
| 1116 | "[XDEV] rename(tmp->dst) failed: errno=%d tmp=%s dst=%s", e, |
| 1117 | tmp_path, dst_path); |
| 1118 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1119 | if (out_errno != NULL) { |
| 1120 | *out_errno = e; |
| 1121 | } |
| 1122 | } |
| 1123 | out_err = FTP_ERR_FILE_WRITE; |
| 1124 | goto cleanup; |
| 1125 | } |
| 1126 | |
| 1127 | out_err = FTP_OK; |
| 1128 | |
| 1129 | cleanup: |
| 1130 | pal_free(copy_buf); /* safe: pal_free(NULL) is a no-op */ |
| 1131 | if (dst_fd >= 0) { |
| 1132 | (void)close(dst_fd); |
| 1133 | } |
| 1134 | if (src_fd >= 0) { |
| 1135 | (void)close(src_fd); |
| 1136 | } |
| 1137 | if (out_err != FTP_OK) { |
| 1138 | (void)unlink(tmp_path); |
| 1139 | } |
| 1140 | return out_err; |
| 1141 | } |
| 1142 | |
| 1143 | /** |
| 1144 | * @brief Safe file open |
| 1145 | */ |
| 1146 | int pal_file_open(const char *path, int flags, mode_t mode) { |
| 1147 | if (path == NULL) { |
| 1148 | errno = EINVAL; |
| 1149 | return FTP_ERR_INVALID_PARAM; |
| 1150 | } |
| 1151 | |
| 1152 | /* Check path length */ |
| 1153 | size_t len = strlen(path); |
| 1154 | if (len >= FTP_PATH_MAX) { |
| 1155 | errno = ENAMETOOLONG; |
| 1156 | return FTP_ERR_PATH_TOO_LONG; |
| 1157 | } |
| 1158 | |
| 1159 | /* Open file */ |
| 1160 | int fd = open(path, flags, mode); |
| 1161 | if (fd < 0) { |
| 1162 | /* Map errno to FTP error code */ |
| 1163 | switch (errno) { |
| 1164 | case ENOENT: |
| 1165 | return FTP_ERR_NOT_FOUND; |
| 1166 | case EACCES: |
| 1167 | case EPERM: |
| 1168 | return FTP_ERR_PERMISSION; |
| 1169 | case EMFILE: |
| 1170 | case ENFILE: |
| 1171 | return FTP_ERR_OUT_OF_MEMORY; |
| 1172 | default: |
| 1173 | return FTP_ERR_FILE_OPEN; |
| 1174 | } |
| 1175 | } |
| 1176 | |
| 1177 | #if defined(PLATFORM_PS4) || defined(PLATFORM_PS5) |
| 1178 | /* |
| 1179 | * F_NOCACHE / POSIX_FADV_SEQUENTIAL — PLATFORM NOTES |
| 1180 | * |
| 1181 | * F_NOCACHE (FreeBSD O_DIRECT equivalent): |
| 1182 | * Bypasses the kernel page cache for this fd. This is intentionally |
| 1183 | * NOT set on PS5 because sendfile(2) relies on the page cache to pin |
| 1184 | * source pages before DMA-ing them to the socket buffer. Setting |
| 1185 | * F_NOCACHE on an fd that is later passed to sendfile() produces |
| 1186 | * undefined behavior on FreeBSD; on PS5's modified kernel this |
| 1187 | * manifests as a kernel panic when the file resides on exFAT/USB. |
| 1188 | * |
| 1189 | * PS4: F_NOCACHE is retained — PS4 does not use sendfile() for the |
| 1190 | * data transfer path (VFS_CAP_SENDFILE is not set by vfs_open on PS4 |
| 1191 | * for USB-backed files via the fstatfs check added above). |
| 1192 | * |
| 1193 | * POSIX_FADV_SEQUENTIAL: |
| 1194 | * Safe on PS5 — does not affect sendfile() compatibility. |
| 1195 | * Retained for read-ahead hinting on sequential transfers. |
| 1196 | */ |
| 1197 | #if defined(PLATFORM_PS4) |
| 1198 | #ifdef F_NOCACHE |
| 1199 | /* F_NOCACHE is only safe for read-only fds on PFS-encrypted partitions. |
| 1200 | * For write fds, OrbisOS PFS enforces alignment constraints that cause |
| 1201 | * write() to fail with EINVAL after ~512 KB on /data/pkg/ and similar |
| 1202 | * encrypted mounts. GoldHEN/ftpsrv never set F_NOCACHE; skip it. */ |
| 1203 | if ((flags & O_WRONLY) == 0 && (flags & O_RDWR) == 0) { |
| 1204 | (void)fcntl(fd, F_NOCACHE, 1); |
| 1205 | } |
| 1206 | #endif |
| 1207 | #endif /* PLATFORM_PS4 only — explicitly excluded from PS5 */ |
| 1208 | |
| 1209 | #if defined(PLATFORM_PS5) && defined(POSIX_FADV_SEQUENTIAL) |
| 1210 | (void)posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); |
| 1211 | #endif |
| 1212 | #endif |
| 1213 | |
| 1214 | return fd; |
| 1215 | } |
| 1216 | |
| 1217 | /** |
| 1218 | * @brief Safe file close |
| 1219 | * |
| 1220 | * PLATFORM NOTE — EINTR semantics differ between Linux and FreeBSD: |
| 1221 | * |
| 1222 | * Linux: close() interrupted by a signal still closes the fd. |
| 1223 | * Retrying would attempt to close an fd already freed and |
| 1224 | * potentially reused by another thread — silent data corruption. |
| 1225 | * |
| 1226 | * FreeBSD: close() interrupted by a signal does NOT close the fd. |
| 1227 | * The fd remains open and MUST be retried, or it leaks. |
| 1228 | * This is the behaviour on PS4 and PS5. |
| 1229 | * |
| 1230 | * Failure to handle this distinction on PS5 causes one leaked fd per |
| 1231 | * interrupted close(), accumulating into BUDGET_FD_FILE exhaustion |
| 1232 | * (visible in klog as "called fdescfree(), but remain BUDGET_FD_FILE"). |
| 1233 | * |
| 1234 | * @pre fd >= 0 |
| 1235 | * @note Thread-safety: safe (fd is caller-owned) |
| 1236 | */ |
| 1237 | ftp_error_t pal_file_close(int fd) { |
| 1238 | if (fd < 0) { |
| 1239 | return FTP_ERR_INVALID_PARAM; |
| 1240 | } |
| 1241 | |
| 1242 | #if defined(__FreeBSD__) || defined(PLATFORM_PS4) || defined(PLATFORM_PS5) |
| 1243 | /* |
| 1244 | * FreeBSD/PS4/PS5: EINTR means fd is STILL OPEN — must retry. |
| 1245 | * Cap iterations to avoid looping forever on a persistent signal storm. |
| 1246 | */ |
| 1247 | { |
| 1248 | unsigned retries = 0U; |
| 1249 | const unsigned MAX_CLOSE_RETRIES = 8U; |
| 1250 | while (close(fd) < 0) { |
| 1251 | if (errno != EINTR) { |
| 1252 | return FTP_ERR_FILE_WRITE; |
| 1253 | } |
| 1254 | if (++retries >= MAX_CLOSE_RETRIES) { |
| 1255 | return FTP_ERR_FILE_WRITE; /* give up; fd leaks, not much we can do */ |
| 1256 | } |
| 1257 | } |
| 1258 | } |
| 1259 | #else |
| 1260 | /* |
| 1261 | * Linux / generic POSIX: close() with EINTR has already closed the fd. |
| 1262 | * Do NOT retry — the fd number may be reused by this point. |
| 1263 | */ |
| 1264 | if (close(fd) < 0) { |
| 1265 | if (errno != EINTR) { |
| 1266 | return FTP_ERR_FILE_WRITE; |
| 1267 | } |
| 1268 | /* EINTR on Linux: fd is gone, treat as success */ |
| 1269 | } |
| 1270 | #endif |
| 1271 | |
| 1272 | return FTP_OK; |
| 1273 | } |
| 1274 | |
| 1275 | /** |
| 1276 | * @brief Get file status |
| 1277 | */ |
| 1278 | ftp_error_t pal_file_stat(const char *path, struct stat *st) { |
| 1279 | if ((path == NULL) || (st == NULL)) { |
| 1280 | return FTP_ERR_INVALID_PARAM; |
| 1281 | } |
| 1282 | |
| 1283 | if (stat(path, st) < 0) { |
| 1284 | switch (errno) { |
| 1285 | case ENOENT: |
| 1286 | return FTP_ERR_NOT_FOUND; |
| 1287 | case EACCES: |
| 1288 | return FTP_ERR_PERMISSION; |
| 1289 | default: |
| 1290 | return FTP_ERR_FILE_STAT; |
| 1291 | } |
| 1292 | } |
| 1293 | |
| 1294 | return FTP_OK; |
| 1295 | } |
| 1296 | |
| 1297 | /** |
| 1298 | * @brief Get file status from descriptor |
| 1299 | */ |
| 1300 | ftp_error_t pal_file_fstat(int fd, struct stat *st) { |
| 1301 | if ((fd < 0) || (st == NULL)) { |
| 1302 | return FTP_ERR_INVALID_PARAM; |
| 1303 | } |
| 1304 | |
| 1305 | if (fstat(fd, st) < 0) { |
| 1306 | return FTP_ERR_FILE_STAT; |
| 1307 | } |
| 1308 | |
| 1309 | return FTP_OK; |
| 1310 | } |
| 1311 | |
| 1312 | /** |
| 1313 | * @brief Read file data |
| 1314 | */ |
| 1315 | ssize_t pal_file_read(int fd, void *buffer, size_t count) { |
| 1316 | if ((fd < 0) || (buffer == NULL) || (count == 0U)) { |
| 1317 | errno = EINVAL; |
| 1318 | return -1; |
| 1319 | } |
| 1320 | |
| 1321 | return read(fd, buffer, count); |
| 1322 | } |
| 1323 | |
| 1324 | /** |
| 1325 | * @brief Write file data |
| 1326 | */ |
| 1327 | ssize_t pal_file_write(int fd, const void *buffer, size_t count) { |
| 1328 | if ((fd < 0) || (buffer == NULL) || (count == 0U)) { |
| 1329 | errno = EINVAL; |
| 1330 | return -1; |
| 1331 | } |
| 1332 | |
| 1333 | return write(fd, buffer, count); |
| 1334 | } |
| 1335 | |
| 1336 | ssize_t pal_file_write_all(int fd, const void *buffer, size_t count) { |
| 1337 | if ((fd < 0) || (buffer == NULL) || (count == 0U)) { |
| 1338 | errno = EINVAL; |
| 1339 | return -1; |
| 1340 | } |
| 1341 | |
| 1342 | const uint8_t *p = (const uint8_t *)buffer; |
| 1343 | size_t total = 0U; |
| 1344 | |
| 1345 | while (total < count) { |
| 1346 | size_t remaining = count - total; |
| 1347 | size_t chunk = remaining; |
| 1348 | if (chunk > (size_t)PAL_FILE_WRITE_CHUNK_MAX) { |
| 1349 | chunk = (size_t)PAL_FILE_WRITE_CHUNK_MAX; |
| 1350 | } |
| 1351 | ssize_t n = write(fd, p + total, chunk); |
| 1352 | if (n > 0) { |
| 1353 | total += (size_t)n; |
| 1354 | continue; |
| 1355 | } |
| 1356 | if (n == 0) { |
| 1357 | /* POSIX does not define write() returning 0 for count > 0. |
| 1358 | * On PS4/PS5 PFS a full filesystem silently returns 0 instead |
| 1359 | * of -1 + ENOSPC. Map this to ENOSPC so callers see the real |
| 1360 | * cause rather than the misleading EIO. */ |
| 1361 | errno = ENOSPC; |
| 1362 | return -1; |
| 1363 | } |
| 1364 | if (errno == EINTR) { |
| 1365 | continue; |
| 1366 | } |
| 1367 | return -1; |
| 1368 | } |
| 1369 | |
| 1370 | return (ssize_t)total; |
| 1371 | } |
| 1372 | |
| 1373 | /** |
| 1374 | * @brief Seek to file position |
| 1375 | */ |
| 1376 | off_t pal_file_seek(int fd, off_t offset, int whence) { |
| 1377 | if (fd < 0) { |
| 1378 | errno = EINVAL; |
| 1379 | return -1; |
| 1380 | } |
| 1381 | |
| 1382 | return lseek(fd, offset, whence); |
| 1383 | } |
| 1384 | |
| 1385 | /** |
| 1386 | * @brief Truncate file |
| 1387 | */ |
| 1388 | ftp_error_t pal_file_truncate(int fd, off_t len) { |
| 1389 | if ((fd < 0) || (len < 0)) { |
| 1390 | return FTP_ERR_INVALID_PARAM; |
| 1391 | } |
| 1392 | |
| 1393 | #ifdef PLATFORM_PS4 |
| 1394 | /* PS4: Use syscall directly */ |
| 1395 | if (syscall(480, fd, len) < 0) { |
| 1396 | return FTP_ERR_FILE_WRITE; |
| 1397 | } |
| 1398 | #else |
| 1399 | /* POSIX: Standard ftruncate */ |
| 1400 | if (ftruncate(fd, len) < 0) { |
| 1401 | return FTP_ERR_FILE_WRITE; |
| 1402 | } |
| 1403 | #endif |
| 1404 | |
| 1405 | return FTP_OK; |
| 1406 | } |
| 1407 | |
| 1408 | /** |
| 1409 | * @brief Delete file |
| 1410 | */ |
| 1411 | ftp_error_t pal_file_delete(const char *path) { |
| 1412 | if (path == NULL) { |
| 1413 | return FTP_ERR_INVALID_PARAM; |
| 1414 | } |
| 1415 | |
| 1416 | if (unlink(path) < 0) { |
| 1417 | switch (errno) { |
| 1418 | case ENOENT: |
| 1419 | return FTP_ERR_NOT_FOUND; |
| 1420 | case EACCES: |
| 1421 | case EPERM: |
| 1422 | return FTP_ERR_PERMISSION; |
| 1423 | case EISDIR: |
| 1424 | return FTP_ERR_INVALID_PARAM; /* Use rmdir for directories */ |
| 1425 | default: |
| 1426 | return FTP_ERR_FILE_WRITE; |
| 1427 | } |
| 1428 | } |
| 1429 | |
| 1430 | return FTP_OK; |
| 1431 | } |
| 1432 | |
| 1433 | /*===========================================================================* |
| 1434 | * CROSS-DEVICE MOVE (EXDEV fallback) |
| 1435 | * |
| 1436 | * rename() fails with EXDEV when src and dst live on different |
| 1437 | * filesystems (e.g. /data/homebrew → /mnt/ext1). For regular |
| 1438 | * files we already had pal_file_copy_atomic(). The functions |
| 1439 | * below extend the fallback to entire directory trees. |
| 1440 | * |
| 1441 | * pal_file_rename |
| 1442 | * │ |
| 1443 | * ├── rename() ─── OK? done |
| 1444 | * │ |
| 1445 | * └── EXDEV? |
| 1446 | * │ |
| 1447 | * ├── regular file → pal_file_copy_atomic + unlink |
| 1448 | * │ |
| 1449 | * └── directory → pal_move_cross_device_r |
| 1450 | * ├── mkdir dst |
| 1451 | * ├── for each entry: |
| 1452 | * │ ├── file → copy_atomic + unlink |
| 1453 | * │ └── dir → recurse |
| 1454 | * └── rmdir src |
| 1455 | *===========================================================================*/ |
| 1456 | |
| 1457 | /** |
| 1458 | * @brief Remove a directory tree recursively (depth-first). |
| 1459 | * |
| 1460 | * Used to clean up the source tree after a successful cross-device |
| 1461 | * copy, or to roll back a partial destination on failure. |
| 1462 | */ |
| 1463 | static ftp_error_t pal_dir_remove_recursive(const char *path, unsigned depth) { |
| 1464 | if (path == NULL) { |
| 1465 | return FTP_ERR_INVALID_PARAM; |
| 1466 | } |
| 1467 | if (depth > PAL_MOVE_MAX_DEPTH) { |
| 1468 | return FTP_ERR_PATH_TOO_LONG; |
| 1469 | } |
| 1470 | |
| 1471 | DIR *dir = opendir(path); |
| 1472 | if (dir == NULL) { |
| 1473 | if (errno == ENOENT) { |
| 1474 | return FTP_OK; |
| 1475 | } |
| 1476 | { |
| 1477 | char msg[256]; |
| 1478 | snprintf(msg, sizeof(msg), |
| 1479 | "[XDEV] opendir(cleanup) failed: errno=%d path=%s", errno, path); |
| 1480 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1481 | } |
| 1482 | return FTP_ERR_DIR_OPEN; |
| 1483 | } |
| 1484 | |
| 1485 | struct dirent *ent; |
| 1486 | ftp_error_t err = FTP_OK; |
| 1487 | |
| 1488 | while ((ent = readdir(dir)) != NULL) { |
| 1489 | /* Skip "." and ".." */ |
| 1490 | if ((ent->d_name[0] == '.') && |
| 1491 | ((ent->d_name[1] == '\0') || |
| 1492 | ((ent->d_name[1] == '.') && (ent->d_name[2] == '\0')))) { |
| 1493 | continue; |
| 1494 | } |
| 1495 | |
| 1496 | char child[FTP_PATH_MAX]; |
| 1497 | int n = snprintf(child, sizeof(child), "%s/%s", path, ent->d_name); |
| 1498 | if ((n < 0) || ((size_t)n >= sizeof(child))) { |
| 1499 | err = FTP_ERR_PATH_TOO_LONG; |
| 1500 | break; |
| 1501 | } |
| 1502 | |
| 1503 | struct stat st; |
| 1504 | if (stat(child, &st) < 0) { |
| 1505 | err = FTP_ERR_FILE_STAT; |
| 1506 | break; |
| 1507 | } |
| 1508 | |
| 1509 | if (S_ISDIR(st.st_mode)) { |
| 1510 | err = pal_dir_remove_recursive(child, depth + 1U); |
| 1511 | } else { |
| 1512 | if (unlink(child) != 0) { |
| 1513 | { |
| 1514 | char msg[256]; |
| 1515 | snprintf(msg, sizeof(msg), |
| 1516 | "[XDEV] unlink(cleanup) failed: errno=%d path=%s", errno, |
| 1517 | child); |
| 1518 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1519 | } |
| 1520 | err = FTP_ERR_FILE_WRITE; |
| 1521 | } |
| 1522 | } |
| 1523 | |
| 1524 | if (err != FTP_OK) { |
| 1525 | break; |
| 1526 | } |
| 1527 | } |
| 1528 | |
| 1529 | (void)closedir(dir); |
| 1530 | |
| 1531 | if (err == FTP_OK) { |
| 1532 | if (rmdir(path) < 0) { |
| 1533 | { |
| 1534 | char msg[256]; |
| 1535 | snprintf(msg, sizeof(msg), |
| 1536 | "[XDEV] rmdir(cleanup) failed: errno=%d path=%s", errno, path); |
| 1537 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1538 | } |
| 1539 | err = FTP_ERR_FILE_WRITE; |
| 1540 | } |
| 1541 | } |
| 1542 | |
| 1543 | return err; |
| 1544 | } |
| 1545 | |
| 1546 | /** |
| 1547 | * @brief Recursively move a directory tree across filesystems. |
| 1548 | * |
| 1549 | * Creates the destination directory, copies every file with |
| 1550 | * pal_file_copy_atomic(), recurses into subdirectories, then |
| 1551 | * removes each source entry after its copy succeeds. |
| 1552 | * |
| 1553 | * On failure the partial destination is cleaned up and the |
| 1554 | * source is left intact so the user can retry. |
| 1555 | */ |
| 1556 | static ftp_error_t pal_copy_cross_device_r_ex(const char *src, const char *dst, |
| 1557 | unsigned depth, int keep_src, |
| 1558 | pal_copy_progress_cb_t cb, |
| 1559 | void *user_data, |
| 1560 | uint64_t *cumulative, |
| 1561 | int *out_errno) { |
| 1562 | if ((src == NULL) || (dst == NULL)) { |
| 1563 | return FTP_ERR_INVALID_PARAM; |
| 1564 | } |
| 1565 | if (depth > PAL_MOVE_MAX_DEPTH) { |
| 1566 | { |
| 1567 | char msg[256]; |
| 1568 | snprintf(msg, sizeof(msg), "[XDEV] max depth %u exceeded: %s", |
| 1569 | (unsigned)PAL_MOVE_MAX_DEPTH, src); |
| 1570 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1571 | } |
| 1572 | return FTP_ERR_PATH_TOO_LONG; |
| 1573 | } |
| 1574 | |
| 1575 | if (depth == 0U) { |
| 1576 | char msg[256]; |
| 1577 | snprintf(msg, sizeof(msg), "[XDEV] cross-device move: %s -> %s", src, dst); |
| 1578 | ftp_log_line(FTP_LOG_INFO, msg); |
| 1579 | } |
| 1580 | |
| 1581 | /* Stat source to get permissions */ |
| 1582 | struct stat src_st; |
| 1583 | if (stat(src, &src_st) < 0) { |
| 1584 | int e = errno; |
| 1585 | { |
| 1586 | char msg[256]; |
| 1587 | snprintf(msg, sizeof(msg), "[XDEV] stat(src) failed: errno=%d path=%s", e, |
| 1588 | src); |
| 1589 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1590 | } |
| 1591 | return (e == ENOENT) ? FTP_ERR_NOT_FOUND : FTP_ERR_FILE_STAT; |
| 1592 | } |
| 1593 | |
| 1594 | /*-------------------------------------------------------* |
| 1595 | * Leaf: regular file -> atomic copy + delete source * |
| 1596 | *-------------------------------------------------------*/ |
| 1597 | if (S_ISREG(src_st.st_mode)) { |
| 1598 | /* Use a local errno so the error log below always shows the real OS code |
| 1599 | * even when the caller passes NULL for out_errno (e.g. the async copy |
| 1600 | * thread). If the caller DID provide out_errno, we forward into it. */ |
| 1601 | int local_errno = 0; |
| 1602 | int *errno_ptr = (out_errno != NULL) ? out_errno : &local_errno; |
| 1603 | ftp_error_t err = |
| 1604 | pal_file_copy_atomic_ex(src, dst, cb, user_data, cumulative, errno_ptr); |
| 1605 | if (err != FTP_OK) { |
| 1606 | { |
| 1607 | int os_err = *errno_ptr; /* always valid: either *out_errno or local_errno */ |
| 1608 | char msg[256]; |
| 1609 | snprintf(msg, sizeof(msg), |
| 1610 | "[XDEV] file copy failed (err=%d, errno=%d): %s -> %s", |
| 1611 | (int)err, os_err, src, dst); |
| 1612 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1613 | } |
| 1614 | return err; |
| 1615 | } |
| 1616 | if (keep_src == 0) { |
| 1617 | if (unlink(src) < 0) { |
| 1618 | { |
| 1619 | char msg[256]; |
| 1620 | snprintf(msg, sizeof(msg), |
| 1621 | "[XDEV] unlink(src) failed: errno=%d path=%s", errno, src); |
| 1622 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1623 | } |
| 1624 | /* Copy succeeded but source delete failed — not fatal, |
| 1625 | caller gets an error but data is safe at dst. */ |
| 1626 | return FTP_ERR_FILE_WRITE; |
| 1627 | } |
| 1628 | } |
| 1629 | return FTP_OK; |
| 1630 | } |
| 1631 | |
| 1632 | /*-------------------------------------------------------* |
| 1633 | * Branch: directory → mkdir dst, recurse, rmdir src * |
| 1634 | *-------------------------------------------------------*/ |
| 1635 | if (!S_ISDIR(src_st.st_mode)) { |
| 1636 | /* Symlinks, devices, etc. — skip silently */ |
| 1637 | return FTP_OK; |
| 1638 | } |
| 1639 | |
| 1640 | mode_t mode = (mode_t)(src_st.st_mode & 0777); |
| 1641 | |
| 1642 | /* |
| 1643 | * DATA LOSS BUG — fixed here. |
| 1644 | * |
| 1645 | * Original code: mkdir() silently accepts EEXIST, then on failure |
| 1646 | * calls pal_dir_remove_recursive(dst) unconditionally — deleting a |
| 1647 | * directory that pre-existed and was not created by this move. |
| 1648 | * |
| 1649 | * Fix: track whether WE created dst. Roll back only our own work; |
| 1650 | * never touch a directory that existed before this call. |
| 1651 | */ |
| 1652 | int dst_created_by_us = 0; |
| 1653 | if (mkdir(dst, mode) < 0) { |
| 1654 | if (errno != EEXIST) { |
| 1655 | int e = errno; |
| 1656 | { |
| 1657 | char msg[256]; |
| 1658 | snprintf(msg, sizeof(msg), "[XDEV] mkdir(dst) failed: errno=%d path=%s", |
| 1659 | e, dst); |
| 1660 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1661 | } |
| 1662 | return FTP_ERR_FILE_WRITE; |
| 1663 | } |
| 1664 | /* dst already existed — do not delete it on rollback */ |
| 1665 | } else { |
| 1666 | dst_created_by_us = 1; |
| 1667 | } |
| 1668 | |
| 1669 | DIR *dir = opendir(src); |
| 1670 | if (dir == NULL) { |
| 1671 | { |
| 1672 | char msg[256]; |
| 1673 | snprintf(msg, sizeof(msg), "[XDEV] opendir(src) failed: errno=%d path=%s", |
| 1674 | errno, src); |
| 1675 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1676 | } |
| 1677 | if (dst_created_by_us != 0) { |
| 1678 | (void)rmdir(dst); /* undo our mkdir before returning */ |
| 1679 | } |
| 1680 | return FTP_ERR_DIR_OPEN; |
| 1681 | } |
| 1682 | |
| 1683 | struct dirent *ent; |
| 1684 | ftp_error_t err = FTP_OK; |
| 1685 | |
| 1686 | while ((ent = readdir(dir)) != NULL) { |
| 1687 | /* Skip "." and ".." */ |
| 1688 | if ((ent->d_name[0] == '.') && |
| 1689 | ((ent->d_name[1] == '\0') || |
| 1690 | ((ent->d_name[1] == '.') && (ent->d_name[2] == '\0')))) { |
| 1691 | continue; |
| 1692 | } |
| 1693 | |
| 1694 | char src_child[FTP_PATH_MAX]; |
| 1695 | char dst_child[FTP_PATH_MAX]; |
| 1696 | |
| 1697 | int ns = snprintf(src_child, sizeof(src_child), "%s/%s", src, ent->d_name); |
| 1698 | int nd = snprintf(dst_child, sizeof(dst_child), "%s/%s", dst, ent->d_name); |
| 1699 | |
| 1700 | if ((ns < 0) || ((size_t)ns >= sizeof(src_child)) || (nd < 0) || |
| 1701 | ((size_t)nd >= sizeof(dst_child))) { |
| 1702 | err = FTP_ERR_PATH_TOO_LONG; |
| 1703 | break; |
| 1704 | } |
| 1705 | |
| 1706 | err = pal_copy_cross_device_r_ex(src_child, dst_child, depth + 1U, keep_src, |
| 1707 | cb, user_data, cumulative, out_errno); |
| 1708 | if (err != FTP_OK) { |
| 1709 | break; |
| 1710 | } |
| 1711 | } |
| 1712 | |
| 1713 | (void)closedir(dir); |
| 1714 | |
| 1715 | if (err != FTP_OK) { |
| 1716 | /* |
| 1717 | * Roll back only if we created dst. If it pre-existed, leave it |
| 1718 | * alone — its original contents are not our responsibility. |
| 1719 | * Source is untouched so the user can retry. |
| 1720 | */ |
| 1721 | if (dst_created_by_us != 0) { |
| 1722 | (void)pal_dir_remove_recursive(dst, 0U); |
| 1723 | } |
| 1724 | return err; |
| 1725 | } |
| 1726 | |
| 1727 | if (keep_src == 0) { |
| 1728 | /* Source directory should now be empty — remove it */ |
| 1729 | if (rmdir(src) < 0) { |
| 1730 | { |
| 1731 | char msg[256]; |
| 1732 | snprintf(msg, sizeof(msg), "[XDEV] rmdir(src) failed: errno=%d path=%s", |
| 1733 | errno, src); |
| 1734 | ftp_log_line(FTP_LOG_WARN, msg); |
| 1735 | } |
| 1736 | return FTP_ERR_FILE_WRITE; |
| 1737 | } |
| 1738 | } |
| 1739 | |
| 1740 | if (depth == 0U) { |
| 1741 | ftp_log_line(FTP_LOG_INFO, "[XDEV] cross-device operation completed OK"); |
| 1742 | } |
| 1743 | |
| 1744 | return FTP_OK; |
| 1745 | } |
| 1746 | |
| 1747 | /** |
| 1748 | * @brief Rename/move file or directory |
| 1749 | * |
| 1750 | * Falls back to recursive copy + delete when rename() returns |
| 1751 | * EXDEV (source and destination on different filesystems). |
| 1752 | */ |
| 1753 | ftp_error_t pal_file_rename(const char *old_path, const char *new_path) { |
| 1754 | if ((old_path == NULL) || (new_path == NULL)) { |
| 1755 | return FTP_ERR_INVALID_PARAM; |
| 1756 | } |
| 1757 | |
| 1758 | if (rename(old_path, new_path) < 0) { |
| 1759 | switch (errno) { |
| 1760 | case ENOENT: |
| 1761 | return FTP_ERR_NOT_FOUND; |
| 1762 | case EACCES: |
| 1763 | case EPERM: |
| 1764 | return FTP_ERR_PERMISSION; |
| 1765 | case EXDEV: |
| 1766 | return FTP_ERR_CROSS_DEVICE; |
| 1767 | default: |
| 1768 | return FTP_ERR_FILE_WRITE; |
| 1769 | } |
| 1770 | } |
| 1771 | |
| 1772 | return FTP_OK; |
| 1773 | } |
| 1774 | |
| 1775 | /** |
| 1776 | * @brief Recursively copy file or directory |
| 1777 | */ |
| 1778 | ftp_error_t pal_file_copy_recursive(const char *src, const char *dst, |
| 1779 | int keep_src) { |
| 1780 | uint64_t cum = 0U; |
| 1781 | return pal_copy_cross_device_r_ex(src, dst, 0U, keep_src, NULL, NULL, &cum, |
| 1782 | NULL); |
| 1783 | } |
| 1784 | |
| 1785 | ftp_error_t pal_file_copy_recursive_ex(const char *src, const char *dst, |
| 1786 | int keep_src, pal_copy_progress_cb_t cb, |
| 1787 | void *user_data, int *out_errno) { |
| 1788 | uint64_t cum = 0U; |
| 1789 | return pal_copy_cross_device_r_ex(src, dst, 0U, keep_src, cb, user_data, &cum, |
| 1790 | out_errno); |
| 1791 | } |
| 1792 | |
| 1793 | /*===========================================================================* |
| 1794 | * DIRECTORY OPERATIONS |
| 1795 | *===========================================================================*/ |
| 1796 | |
| 1797 | /** |
| 1798 | * @brief Create directory |
| 1799 | */ |
| 1800 | ftp_error_t pal_dir_create(const char *path, mode_t mode) { |
| 1801 | if (path == NULL) { |
| 1802 | return FTP_ERR_INVALID_PARAM; |
| 1803 | } |
| 1804 | |
| 1805 | if (mkdir(path, mode) < 0) { |
| 1806 | switch (errno) { |
| 1807 | case EEXIST: |
| 1808 | return FTP_ERR_DIR_EXISTS; /* Already exists */ |
| 1809 | case EACCES: |
| 1810 | case EPERM: |
| 1811 | return FTP_ERR_PERMISSION; |
| 1812 | case ENOENT: |
| 1813 | return FTP_ERR_NOT_FOUND; /* Parent doesn't exist */ |
| 1814 | default: |
| 1815 | return FTP_ERR_FILE_WRITE; |
| 1816 | } |
| 1817 | } |
| 1818 | |
| 1819 | return FTP_OK; |
| 1820 | } |
| 1821 | |
| 1822 | /** |
| 1823 | * @brief Remove directory |
| 1824 | */ |
| 1825 | ftp_error_t pal_dir_remove(const char *path) { |
| 1826 | if (path == NULL) { |
| 1827 | return FTP_ERR_INVALID_PARAM; |
| 1828 | } |
| 1829 | |
| 1830 | if (rmdir(path) < 0) { |
| 1831 | switch (errno) { |
| 1832 | case ENOENT: |
| 1833 | return FTP_ERR_NOT_FOUND; |
| 1834 | case EACCES: |
| 1835 | case EPERM: |
| 1836 | return FTP_ERR_PERMISSION; |
| 1837 | case ENOTEMPTY: |
| 1838 | return FTP_ERR_INVALID_PARAM; /* Directory not empty */ |
| 1839 | case ENOTDIR: |
| 1840 | return FTP_ERR_INVALID_PARAM; /* Not a directory */ |
| 1841 | default: |
| 1842 | return FTP_ERR_FILE_WRITE; |
| 1843 | } |
| 1844 | } |
| 1845 | |
| 1846 | return FTP_OK; |
| 1847 | } |
| 1848 | |
| 1849 | /** |
| 1850 | * @brief Check if path exists |
| 1851 | */ |
| 1852 | int pal_path_exists(const char *path) { |
| 1853 | if (path == NULL) { |
| 1854 | return FTP_ERR_INVALID_PARAM; |
| 1855 | } |
| 1856 | |
| 1857 | struct stat st; |
| 1858 | if (stat(path, &st) == 0) { |
| 1859 | return 1; /* Exists */ |
| 1860 | } |
| 1861 | |
| 1862 | if (errno == ENOENT) { |
| 1863 | return 0; /* Does not exist */ |
| 1864 | } |
| 1865 | |
| 1866 | return FTP_ERR_FILE_STAT; /* Other error */ |
| 1867 | } |
| 1868 | |
| 1869 | /** |
| 1870 | * @brief Check if path is a directory |
| 1871 | */ |
| 1872 | int pal_path_is_directory(const char *path) { |
| 1873 | if (path == NULL) { |
| 1874 | return FTP_ERR_INVALID_PARAM; |
| 1875 | } |
| 1876 | |
| 1877 | struct stat st; |
| 1878 | if (stat(path, &st) < 0) { |
| 1879 | return FTP_ERR_FILE_STAT; |
| 1880 | } |
| 1881 | |
| 1882 | return S_ISDIR(st.st_mode) ? 1 : 0; |
| 1883 | } |
| 1884 | |
| 1885 | /** |
| 1886 | * @brief Check if path is a regular file |
| 1887 | */ |
| 1888 | int pal_path_is_file(const char *path) { |
| 1889 | if (path == NULL) { |
| 1890 | return FTP_ERR_INVALID_PARAM; |
| 1891 | } |
| 1892 | |
| 1893 | struct stat st; |
| 1894 | if (stat(path, &st) < 0) { |
| 1895 | return FTP_ERR_FILE_STAT; |
| 1896 | } |
| 1897 | |
| 1898 | return S_ISREG(st.st_mode) ? 1 : 0; |
| 1899 | } |
| 1900 | |
| 1901 | /** |
| 1902 | * @brief Remove a directory tree recursively (public wrapper) |
| 1903 | * |
| 1904 | * Wraps the static pal_dir_remove_recursive() for use by callers outside |
| 1905 | * this translation unit (e.g. http_api.c api_delete with recursive=1). |
| 1906 | * |
| 1907 | * @param[in] path Root of the tree to remove (must not be NULL or "/") |
| 1908 | * @return FTP_OK on success, FTP_ERR_* on failure |
| 1909 | * |
| 1910 | * @note Thread-safety: NOT thread-safe |
| 1911 | * @note All files and subdirectories are removed depth-first. |
| 1912 | * This is irreversible — call only after user confirmation. |
| 1913 | * @warning Do NOT call on paths that are still in use by other processes. |
| 1914 | */ |
| 1915 | ftp_error_t pal_dir_remove_recursive_pub(const char *path) { |
| 1916 | if (path == NULL) { |
| 1917 | return FTP_ERR_INVALID_PARAM; |
| 1918 | } |
| 1919 | return pal_dir_remove_recursive(path, 0U); |
| 1920 | } |
| 1921 |