Seregon/zftpd

Zero-copy FTP/HTTP Daemon compatible with all POSIX systems

C/11.0 KB/No license
src/pkg_unpacker.c
zftpd / src / pkg_unpacker.c
1/*
2 * GNU GPLv3 License — Copyright (c) 2026 SeregonWar
3 * See LICENSE for full text.
4 */
5/*
6 * pkg_unpacker.c — PS4 PKG archive metadata parser
7 *
8 * ── DESIGN RATIONALE ────────────────────────────────────────────────────────
9 *
10 * Big-endian parsing via read_be32():
11 * The original code used expressions of the form `(hdr_buf[N] << 24)`.
12 * When `char` is signed and the byte value >= 0x80, the uint8_t is promoted
13 * to `int`, and shifting a positive `int` into the sign bit is undefined
14 * behaviour (C11 §6.5.7 ¶4). read_be32() casts every byte to uint32_t
15 * before shifting, eliminating the UB entirely.
16 *
17 * All offsets widened to uint64_t before arithmetic:
18 * PKG offsets and sizes are stored as uint32_t (big-endian) on disk.
19 * Before any addition or comparison against the 64-bit file size, they are
20 * widened with an explicit cast to prevent silent wrap-around.
21 *
22 * No dynamic allocation after pkg_init():
23 * pkg_extract_to_buffer() and pkg_extract_file_fd() use only the
24 * caller-supplied buffer or a stack buffer (COPY_BUF_SIZE bytes).
25 *
26 * No fprintf / perror in library code:
27 * Side-effects on stderr are inappropriate for a library. All diagnostics
28 * are communicated exclusively through return values (pkg_error_t).
29 *
30 * EINTR handling in write(2) loop:
31 * A signal delivered between the kernel accepting bytes and write(2)
32 * returning causes it to return -1 / EINTR. The inner write loop retries
33 * automatically rather than propagating a spurious error.
34 *
35 * Partial-read detection in pkg_extract_file_fd():
36 * The original code checked only `got == 0` as an fread error. A short
37 * read (0 < got < to_read) silently corrupted the extracted data. We now
38 * require `got == to_read` and return PKG_ERR_IO otherwise.
39 *
40 * Encryption guard on all extraction paths:
41 * flags1 bit 31 (PKG_ENTRY_FLAG_ENCRYPTED) was parsed in the original
42 * struct but never populated. We now read flags1/flags2 from each entry
43 * record and refuse extraction of encrypted entries with PKG_ERR_ENCRYPTED.
44 *
45 * ── MISRA C:2012 deviations ─────────────────────────────────────────────────
46 * Rule 15.5 — Single return point: intentionally violated in validation
47 * sequences to avoid deeply nested if-else ladders that reduce
48 * readability without improving safety.
49 * Dir 4.9 — Function-like macros: not used; all helpers are functions.
50 */
51 
52#ifndef _FILE_OFFSET_BITS
53# define _FILE_OFFSET_BITS 64
54#endif
55#ifndef _POSIX_C_SOURCE
56# define _POSIX_C_SOURCE 200809L
57#endif
58 
59#include "pkg_unpacker.h"
60#include <stdlib.h>
61#include <string.h>
62#include <errno.h>
63#include <limits.h> /* SSIZE_MAX */
64 
65#if defined(_MSC_VER)
66# include <io.h>
67# define fseeko _fseeki64
68# define ftello _ftelli64
69# define write _write
70# define SSIZE_MAX _I64_MAX
71#else
72# include <unistd.h>
73#endif
74 
75/* ── Internal constants ─────────────────────────────────────────────────── */
76 
77/**
78 * Stack-buffer size for the streaming copy in pkg_extract_file_fd().
79 *
80 * DESIGN RATIONALE — why 64 KiB:
81 * Matches one typical OS page-cache read-ahead quantum. Large enough to
82 * amortise the per-fread overhead; small enough to live on the stack without
83 * risk of overflow on embedded targets with reduced stack sizes.
84 */
85#define COPY_BUF_SIZE 65536U
86 
87/*
88 * Byte offsets of the fields we decode within the on-disk header buffer.
89 * Cross-referenced against psdevwiki.com/ps4/PKG_files and shadPS4 pkg.h.
90 *
91 * 0x00 magic (u32_be)
92 * 0x04 type (u32_be)
93 * 0x08 unk_0x08 (u32_be)
94 * 0x0C file_count (u32_be)
95 * 0x10 entry_count (u32_be) ← HDR_OFF_ENTRY_COUNT
96 * 0x14 sc_entry_count (u16_be)
97 * 0x16 entry_count_2 (u16_be)
98 * 0x18 table_offset (u32_be) ← HDR_OFF_TABLE_OFFSET
99 * 0x1C entry_data_size (u32_be)
100 * 0x20 body_offset (u64_be)
101 * 0x28 body_size (u64_be)
102 * 0x30 content_offset (u64_be)
103 * 0x38 content_size (u64_be)
104 * 0x40 content_id (char[36]) ← HDR_OFF_CONTENT_ID
105 */
106#define HDR_OFF_MAGIC 0x000U
107#define HDR_OFF_ENTRY_COUNT 0x010U
108#define HDR_OFF_TABLE_OFFSET 0x018U
109#define HDR_OFF_CONTENT_ID 0x040U
110 
111/*
112 * Byte offsets within one 32-byte on-disk entry record.
113 *
114 * +00 id (u32_be)
115 * +04 filename_offset (u32_be)
116 * +08 flags1 (u32_be) bit 31 = PKG_ENTRY_FLAG_ENCRYPTED
117 * +12 flags2 (u32_be)
118 * +16 offset (u32_be)
119 * +20 size (u32_be)
120 * +24 padding (u64_be) unused
121 */
122#define EOFF_ID 0U
123#define EOFF_FILENAME_OFFSET 4U
124#define EOFF_FLAGS1 8U
125#define EOFF_FLAGS2 12U
126#define EOFF_OFFSET 16U
127#define EOFF_SIZE 20U
128 
129/* ── Endianness helper ───────────────────────────────────────────────────── */
130 
131/**
132 * @brief Read a big-endian uint32_t from an unaligned byte buffer.
133 *
134 * Each byte is cast to uint32_t before shifting to prevent two sources of
135 * undefined behaviour in the original code:
136 * 1. `uint8_t` is implicitly promoted to `int` (which may be 32-bit signed).
137 * 2. Shifting a signed integer into or past the sign bit is UB (C11 §6.5.7).
138 *
139 * @param[in] p Pointer to 4 bytes; must not be NULL (guaranteed by callers).
140 * @return The decoded value.
141 *
142 * @note Thread-safety: pure function, no shared state.
143 * @note WCET: O(1), no I/O, no branches.
144 */
145static uint32_t read_be32(const uint8_t *p)
146{
147 return (((uint32_t)p[0]) << 24U) |
148 (((uint32_t)p[1]) << 16U) |
149 (((uint32_t)p[2]) << 8U) |
150 ((uint32_t)p[3]);
151}
152 
153/* ── File-size helper ────────────────────────────────────────────────────── */
154 
155/**
156 * @brief Determine total byte size of an open file without disturbing the
157 * current file position.
158 *
159 * Saves the current position with ftello(), seeks to SEEK_END, queries the
160 * position, then restores the saved position.
161 *
162 * @param[in] fp Open file handle; must not be NULL.
163 * @param[out] out Receives the file size on PKG_OK; must not be NULL.
164 *
165 * @return PKG_OK on success, PKG_ERR_IO if any stdio call fails.
166 *
167 * @note Thread-safety: NOT thread-safe (modifies fp's position transiently).
168 * @note WCET: O(1) on most OS/FS implementations (no I/O read required).
169 */
170static int get_file_size(FILE *fp, uint64_t *out)
171{
172 off_t saved = ftello(fp);
173 if (saved == (off_t)-1) {
174 return PKG_ERR_IO;
175 }
176 
177 if (fseeko(fp, (off_t)0, SEEK_END) != 0) {
178 return PKG_ERR_IO;
179 }
180 
181 off_t end = ftello(fp);
182 
183 /* Restore position before any return, even on error. */
184 if (fseeko(fp, saved, SEEK_SET) != 0) {
185 return PKG_ERR_IO;
186 }
187 
188 if (end < (off_t)0) {
189 return PKG_ERR_IO;
190 }
191 
192 *out = (uint64_t)end;
193 return PKG_OK;
194}
195 
196/* ═══════════════════════════════════════════════════════════════════════════
197 * Context lifecycle
198 * ═════════════════════════════════════════════════════════════════════════*/
199 
200int pkg_init(pkg_context_t *ctx, const char *pkg_path)
201{
202 if ((ctx == NULL) || (pkg_path == NULL) || (pkg_path[0] == '\0')) {
203 return PKG_ERR_PARAM;
204 }
205 
206 /*
207 * Zero-initialise first. This ensures pkg_cleanup() is safe to call
208 * on ctx at any point below, even before all fields are populated.
209 */
210 (void)memset(ctx, 0, sizeof(*ctx));
211 
212 /* ── Open file ──────────────────────────────────────────────────────── */
213 ctx->file = fopen(pkg_path, "rb");
214 if (ctx->file == NULL) {
215 /* errno is set by fopen; caller may inspect it. */
216 return PKG_ERR_IO;
217 }
218 
219 /* ── Determine file size ────────────────────────────────────────────── */
220 {
221 int rc = get_file_size(ctx->file, &ctx->file_size);
222 if (rc != PKG_OK) {
223 pkg_cleanup(ctx);
224 return rc;
225 }
226 }
227 
228 if (ctx->file_size < (uint64_t)PKG_HEADER_SIZE) {
229 pkg_cleanup(ctx);
230 return PKG_ERR_FORMAT;
231 }
232 
233 /* ── Read the header block ──────────────────────────────────────────── */
234 uint8_t hdr_buf[PKG_HEADER_SIZE];
235 if (fread(hdr_buf, 1U, sizeof(hdr_buf), ctx->file) != sizeof(hdr_buf)) {
236 pkg_cleanup(ctx);
237 return PKG_ERR_IO;
238 }
239 
240 /* ── Validate magic ─────────────────────────────────────────────────── */
241 uint32_t magic = read_be32(hdr_buf + HDR_OFF_MAGIC);
242 if ((magic != PKG_MAGIC_CNT) && (magic != PKG_MAGIC_PKG)) {
243 fprintf(stderr, "[PKG] invalid magic: 0x%08X (expected 0x%08X or 0x%08X)\n",
244 magic, PKG_MAGIC_CNT, PKG_MAGIC_PKG);
245 fprintf(stderr, "[PKG] Dump: %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
246 hdr_buf[0], hdr_buf[1], hdr_buf[2], hdr_buf[3],
247 hdr_buf[4], hdr_buf[5], hdr_buf[6], hdr_buf[7],
248 hdr_buf[8], hdr_buf[9], hdr_buf[10], hdr_buf[11],
249 hdr_buf[12], hdr_buf[13], hdr_buf[14], hdr_buf[15]);
250 pkg_cleanup(ctx);
251 return PKG_ERR_FORMAT;
252 }
253 ctx->header.magic = magic;
254 
255 /* ── Decode and validate entry_count ────────────────────────────────── */
256 uint32_t entry_count = read_be32(hdr_buf + HDR_OFF_ENTRY_COUNT);
257 if ((entry_count == 0U) || (entry_count > PKG_MAX_ENTRY_COUNT)) {
258 fprintf(stderr, "[PKG] invalid entry count: %u\n", entry_count);
259 pkg_cleanup(ctx);
260 return PKG_ERR_FORMAT;
261 }
262 ctx->header.entry_count = entry_count;
263 
264 /* ── Decode and validate table_offset ──────────────────────────────── */
265 uint32_t table_offset = read_be32(hdr_buf + HDR_OFF_TABLE_OFFSET);
266 
267 if (table_offset < PKG_MIN_TABLE_OFFSET) {
268 /*
269 * The entry table overlaps the header — either the file is corrupt
270 * or this is not a real PKG.
271 */
272 pkg_cleanup(ctx);
273 return PKG_ERR_FORMAT;
274 }
275 
276 /*
277 * Verify the complete table fits inside the file.
278 *
279 * All values widened to uint64_t before multiplication and addition to
280 * prevent uint32_t wrap-around (e.g., table_offset=0xFFFFFF00 +
281 * entry_count*32 could overflow a u32 silently).
282 */
283 uint64_t table_bytes = (uint64_t)entry_count *
284 (uint64_t)PKG_ENTRY_RECORD_SIZE;
285 uint64_t table_end = (uint64_t)table_offset + table_bytes;
286 
287 if ((table_end < (uint64_t)table_offset) || /* addition overflowed u64 */
288 (table_end > ctx->file_size)) {
289 pkg_cleanup(ctx);
290 return PKG_ERR_FORMAT;
291 }
292 ctx->header.table_offset = table_offset;
293 
294 /* ── Copy content_id; NUL-terminate explicitly ──────────────────────── */
295 /*
296 * The on-disk field is exactly PKG_CONTENT_ID_LEN bytes with no NUL.
297 * The struct provides PKG_CONTENT_ID_LEN+1 bytes; zero-init (above)
298 * already placed a '\0' at [PKG_CONTENT_ID_LEN], but we write it
299 * explicitly for clarity and MISRA Rule 9.1 compliance.
300 */
301 (void)memcpy(ctx->header.content_id,
302 hdr_buf + HDR_OFF_CONTENT_ID,
303 PKG_CONTENT_ID_LEN);
304 ctx->header.content_id[PKG_CONTENT_ID_LEN] = '\0';
305 
306 /* ── Allocate entry array ───────────────────────────────────────────── */
307 /*
308 * Overflow analysis:
309 * entry_count <= PKG_MAX_ENTRY_COUNT == 10 000
310 * sizeof(pkg_entry_t) == 24 bytes
311 * max allocation: 240 000 bytes — well within any sane size_t.
312 * The division check below is a defensive invariant assertion.
313 */
314 size_t alloc_bytes = (size_t)entry_count * sizeof(pkg_entry_t);
315 if ((alloc_bytes / sizeof(pkg_entry_t)) != (size_t)entry_count) {
316 /* Should never trigger given the guard above. */
317 pkg_cleanup(ctx);
318 return PKG_ERR_RANGE;
319 }
320 
321 ctx->entries = (pkg_entry_t *)malloc(alloc_bytes);
322 if (ctx->entries == NULL) {
323 pkg_cleanup(ctx);
324 return PKG_ERR_NOMEM;
325 }
326 
327 /* ── Parse each entry record ────────────────────────────────────────── */
328 if (fseeko(ctx->file, (off_t)table_offset, SEEK_SET) != 0) {
329 pkg_cleanup(ctx);
330 return PKG_ERR_IO;
331 }
332 
333 for (uint32_t i = 0U; i < entry_count; i++) {
334 uint8_t e_buf[PKG_ENTRY_RECORD_SIZE];
335 
336 if (fread(e_buf, 1U, sizeof(e_buf), ctx->file) != sizeof(e_buf)) {
337 pkg_cleanup(ctx);
338 return PKG_ERR_IO;
339 }
340 
341 pkg_entry_t *e = &ctx->entries[i];
342 
343 e->id = read_be32(e_buf + EOFF_ID);
344 e->filename_offset = read_be32(e_buf + EOFF_FILENAME_OFFSET);
345 e->flags1 = read_be32(e_buf + EOFF_FLAGS1);
346 e->flags2 = read_be32(e_buf + EOFF_FLAGS2);
347 e->offset = read_be32(e_buf + EOFF_OFFSET);
348 e->size = read_be32(e_buf + EOFF_SIZE);
349 
350 /*
351 * Validate that this entry's data is fully contained in the file.
352 * Widen to uint64_t before addition to prevent uint32_t wrap.
353 *
354 * NOTE: We validate all entries on init rather than at extraction
355 * time to fail fast on corrupt files and to avoid redundant checks
356 * on every call to pkg_extract_*.
357 */
358 uint64_t entry_end = (uint64_t)e->offset + (uint64_t)e->size;
359 if ((entry_end < (uint64_t)e->offset) || /* addition overflow guard */
360 (entry_end > ctx->file_size)) {
361 pkg_cleanup(ctx);
362 return PKG_ERR_FORMAT;
363 }
364 }
365 
366 ctx->num_entries = (size_t)entry_count;
367 return PKG_OK;
368}
369 
370void pkg_cleanup(pkg_context_t *ctx)
371{
372 if (ctx == NULL) {
373 return;
374 }
375 
376 if (ctx->file != NULL) {
377 (void)fclose(ctx->file);
378 ctx->file = NULL;
379 }
380 
381 if (ctx->entries != NULL) {
382 free(ctx->entries);
383 ctx->entries = NULL;
384 }
385 
386 ctx->num_entries = 0U;
387 ctx->file_size = 0U;
388}
389 
390/* ═══════════════════════════════════════════════════════════════════════════
391 * Entry retrieval
392 * ═════════════════════════════════════════════════════════════════════════*/
393 
394const pkg_entry_t *pkg_find_entry_by_id(const pkg_context_t *ctx, uint32_t id)
395{
396 if ((ctx == NULL) || (ctx->entries == NULL)) {
397 return NULL;
398 }
399 
400 for (size_t i = 0U; i < ctx->num_entries; i++) {
401 if (ctx->entries[i].id == id) {
402 return &ctx->entries[i];
403 }
404 }
405 return NULL;
406}
407 
408/* ═══════════════════════════════════════════════════════════════════════════
409 * Extraction
410 * ═════════════════════════════════════════════════════════════════════════*/
411 
412ssize_t pkg_extract_to_buffer(pkg_context_t *ctx, const pkg_entry_t *entry,
413 uint8_t *buf, size_t buf_size)
414{
415 if ((ctx == NULL) || (ctx->file == NULL) ||
416 (entry == NULL) || (buf == NULL) || (buf_size == 0U)) {
417 return (ssize_t)PKG_ERR_PARAM;
418 }
419 
420 if (pkg_entry_is_encrypted(entry)) {
421 return (ssize_t)PKG_ERR_ENCRYPTED;
422 }
423 
424 /*
425 * Bound the in-memory extraction size. This is a defence-in-depth check;
426 * pkg_init() already validated that entry->offset + entry->size <=
427 * file_size, so a corrupt entry->size cannot point past the file.
428 * However, without this guard a 4 GiB size field would cause a
429 * 4 GiB stack or heap demand on the caller's side.
430 */
431 if (entry->size > PKG_MAX_ENTRY_SIZE) {
432 return (ssize_t)PKG_ERR_RANGE;
433 }
434 
435 if ((size_t)entry->size > buf_size) {
436 return (ssize_t)PKG_ERR_BUFFER_SMALL;
437 }
438 
439 /*
440 * ssize_t range note:
441 * entry->size <= PKG_MAX_ENTRY_SIZE (64 MiB) is guaranteed by the
442 * check above. SSIZE_MAX is at minimum INT32_MAX (~2 GiB) on all
443 * supported platforms, so the cast on the return statement below is
444 * always safe. No additional guard is required.
445 */
446 
447 if (entry->size == 0U) {
448 return (ssize_t)0;
449 }
450 
451 if (fseeko(ctx->file, (off_t)entry->offset, SEEK_SET) != 0) {
452 return (ssize_t)PKG_ERR_IO;
453 }
454 
455 size_t got = fread(buf, 1U, (size_t)entry->size, ctx->file);
456 if (got != (size_t)entry->size) {
457 /*
458 * A short read indicates either an I/O error or that the file was
459 * truncated between pkg_init() and this call. Either way, the
460 * buffer content is incomplete and must not be used.
461 */
462 return (ssize_t)PKG_ERR_IO;
463 }
464 
465 return (ssize_t)got;
466}
467 
468int pkg_extract_file_fd(pkg_context_t *ctx, const pkg_entry_t *entry,
469 int output_fd)
470{
471 if ((ctx == NULL) || (ctx->file == NULL) ||
472 (entry == NULL) || (output_fd < 0)) {
473 return PKG_ERR_PARAM;
474 }
475 
476 if (pkg_entry_is_encrypted(entry)) {
477 return PKG_ERR_ENCRYPTED;
478 }
479 
480 if (entry->size > PKG_MAX_ENTRY_SIZE) {
481 return PKG_ERR_RANGE;
482 }
483 
484 if (fseeko(ctx->file, (off_t)entry->offset, SEEK_SET) != 0) {
485 return PKG_ERR_IO;
486 }
487 
488 /* Stack-allocated copy buffer — no heap allocation in this function. */
489 uint8_t copy_buf[COPY_BUF_SIZE];
490 uint32_t remaining = entry->size;
491 
492 while (remaining > 0U) {
493 size_t to_read = (remaining < (uint32_t)COPY_BUF_SIZE)
494 ? (size_t)remaining
495 : (size_t)COPY_BUF_SIZE;
496 
497 size_t got = fread(copy_buf, 1U, to_read, ctx->file);
498 
499 if (got == 0U) {
500 /* EOF or hard read error before we consumed entry->size bytes. */
501 return PKG_ERR_IO;
502 }
503 
504 if (got != to_read) {
505 /*
506 * Partial read: the file is shorter than entry->size claimed.
507 * The original code silently continued with a smaller chunk,
508 * producing a corrupt output file. We fail fast instead.
509 */
510 return PKG_ERR_IO;
511 }
512 
513 /*
514 * Write the full buffer to output_fd.
515 *
516 * DESIGN RATIONALE — inner retry loop for EINTR:
517 * A signal delivered while write(2) is in progress causes it to
518 * return -1 with errno == EINTR. This is not a true error; we
519 * simply retry the write from where we left off. Without this,
520 * callers running under a debugger or with timers would see
521 * spurious PKG_ERR_IO failures.
522 */
523 const uint8_t *p = copy_buf;
524 size_t len = got;
525 
526 while (len > 0U) {
527 ssize_t n = write(output_fd, p, len);
528 
529 if (n < 0) {
530 if (errno == EINTR) {
531 continue; /* Signal interrupted; retry this write. */
532 }
533 return PKG_ERR_IO;
534 }
535 
536 if (n == 0) {
537 /*
538 * write(2) returning 0 without error is not specified by
539 * POSIX for regular files; treat it as a non-recoverable
540 * error to avoid an infinite spin.
541 */
542 return PKG_ERR_IO;
543 }
544 
545 /* Safe casts: 0 < n <= (ssize_t)len, len is a size_t. */
546 p += (size_t)n;
547 len -= (size_t)n;
548 }
549 
550 /*
551 * Safe subtraction: got == to_read <= remaining (ensured above),
552 * so remaining - got >= 0 and no uint32_t underflow is possible.
553 */
554 remaining -= (uint32_t)got;
555 }
556 
557 return PKG_OK;
558}
559