Seregon/zftpd

Zero-copy FTP/HTTP Daemon compatible with all POSIX systems

C/11.0 KB/No license
src/http_parser.c
zftpd / src / http_parser.c
1/*
2MIT License
3 
4Copyright (c) 2026 Seregon
5 
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12 
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15 
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24 
25/**
26 * @file http_parser.c
27 * @brief Minimal HTTP/1.1 request parser (no strtok, no strncpy)
28 *
29 * Rewrites the original parser to avoid:
30 * - strtok() : hidden global state, not reentrant
31 * - strncpy(): no guaranteed NUL termination
32 *
33 * Uses bounded pointer arithmetic with explicit length checks.
34 * The input buffer is mutated in-place (NUL inserted at delimiters)
35 * so header name/value pointers remain valid for the buffer's lifetime.
36 */
37 
38#include "http_parser.h"
39#include <ctype.h>
40#include <stdio.h>
41#include <string.h>
42 
43/*===========================================================================*
44 * HELPERS
45 *
46 * find_char() — bounded strchr (stays within [p, end))
47 * find_crlf() — bounded strstr for "\r\n"
48 *===========================================================================*/
49 
50/**
51 * @brief Find first occurrence of c in [p, end)
52 * @return Pointer to c, or NULL if not found
53 */
54static char *find_char(char *p, const char *end, char c) {
55 while (p < end) {
56 if (*p == c) {
57 return p;
58 }
59 p++;
60 }
61 return NULL;
62}
63 
64/**
65 * @brief Find "\r\n" in [p, end)
66 * @return Pointer to the '\r', or NULL if not found
67 */
68static char *find_crlf(char *p, const char *end) {
69 while ((p + 1) < end) {
70 if (p[0] == '\r' && p[1] == '\n') {
71 return p;
72 }
73 p++;
74 }
75 return NULL;
76}
77 
78/*===========================================================================*
79 * PARSE REQUEST LINE
80 *
81 * "GET /index.html HTTP/1.1\r\n"
82 * ^method ^uri ^version
83 *
84 * Splits on spaces with bounded search, no strtok.
85 *===========================================================================*/
86 
87/**
88 * @brief Parse the request line: METHOD SP URI SP HTTP/x.y
89 *
90 * @param[in,out] line Start of request line (NUL-terminated by caller)
91 * @param[in] line_len Length of the line (excluding NUL)
92 * @param[out] request Populated with method, uri, version
93 *
94 * @return 0 on success, -1 on malformed request
95 */
96static int parse_request_line(char *line, size_t line_len,
97 http_request_t *request) {
98 const char *end = line + line_len;
99 
100 /*-- METHOD --*/
101 char *sp1 = find_char(line, end, ' ');
102 if (sp1 == NULL) {
103 return -1;
104 }
105 *sp1 = '\0';
106 
107 if (strcmp(line, "GET") == 0) {
108 request->method = HTTP_METHOD_GET;
109 } else if (strcmp(line, "POST") == 0) {
110 request->method = HTTP_METHOD_POST;
111 } else if (strcmp(line, "HEAD") == 0) {
112 request->method = HTTP_METHOD_HEAD;
113 } else {
114 request->method = HTTP_METHOD_UNKNOWN;
115 }
116 
117 /*-- URI --*/
118 char *uri_start = sp1 + 1;
119 if (uri_start >= end) {
120 return -1;
121 }
122 char *sp2 = find_char(uri_start, end, ' ');
123 if (sp2 == NULL) {
124 return -1;
125 }
126 *sp2 = '\0';
127 
128 /* Bounded copy into fixed-size uri field */
129 size_t uri_len = (size_t)(sp2 - uri_start);
130 if (uri_len >= HTTP_URI_MAX_LENGTH) {
131 uri_len = HTTP_URI_MAX_LENGTH - 1U;
132 }
133 memcpy(request->uri, uri_start, uri_len);
134 request->uri[uri_len] = '\0';
135 
136 /*-- VERSION "HTTP/x.y" --*/
137 char *ver_start = sp2 + 1;
138 if (ver_start >= end) {
139 return -1;
140 }
141 if (sscanf(ver_start, "HTTP/%d.%d", &request->version_major,
142 &request->version_minor) != 2) {
143 return -1;
144 }
145 
146 return 0;
147}
148 
149/*===========================================================================*
150 * PARSE HEADERS
151 *
152 * "Content-Type: application/json\r\n"
153 * ^name ^colon ^value
154 *
155 * Each header line is NUL-terminated at the \r\n boundary.
156 * Colon is replaced with NUL to split name/value in-place.
157 * Leading whitespace on value is skipped.
158 *===========================================================================*/
159 
160/**
161 * @brief Parse a single header line into request->headers[]
162 *
163 * @param[in,out] line Header line, already NUL-terminated
164 * @param[out] request Target request struct
165 */
166static void parse_header_line(char *line, http_request_t *request) {
167 if (request->num_headers >= HTTP_HEADER_MAX_COUNT) {
168 return; /* silently drop excess headers */
169 }
170 
171 char *colon = strchr(line, ':');
172 if (colon == NULL) {
173 return; /* malformed header, skip */
174 }
175 
176 *colon = '\0';
177 char *value = colon + 1;
178 
179 /* Skip leading whitespace (OWS per RFC 7230 §3.2.6) */
180 while (*value == ' ' || *value == '\t') {
181 value++;
182 }
183 
184 request->headers[request->num_headers].name = line;
185 request->headers[request->num_headers].value = value;
186 request->num_headers++;
187}
188 
189/*===========================================================================*
190 * PUBLIC API
191 *===========================================================================*/
192 
193/**
194 * @brief Parse a complete HTTP/1.1 request from a mutable buffer
195 *
196 * The buffer is mutated in-place: NUL bytes are inserted at
197 * line boundaries and colon separators so that request->uri,
198 * header name/value, and body pointers reference the buffer
199 * directly with zero-copy semantics.
200 *
201 * @param[in,out] buffer Raw HTTP request data (mutable)
202 * @param[in] length Number of valid bytes in buffer
203 * @param[out] request Parsed result
204 *
205 * @return 0 on success, negative on parse error
206 * -1: NULL input or malformed request line
207 * -2: missing CRLF (incomplete request)
208 */
209int http_parse_request(char *buffer, size_t length, http_request_t *request) {
210 if ((buffer == NULL) || (request == NULL) || (length == 0U)) {
211 return -1;
212 }
213 
214 memset(request, 0, sizeof(*request));
215 
216 const char *buf_end = buffer + length;
217 
218 /*-- Request line --*/
219 char *crlf = find_crlf(buffer, buf_end);
220 if (crlf == NULL) {
221 return -2;
222 }
223 *crlf = '\0'; /* NUL-terminate request line */
224 
225 size_t line_len = (size_t)(crlf - buffer);
226 if (parse_request_line(buffer, line_len, request) != 0) {
227 return -1;
228 }
229 
230 /*-- Headers --*/
231 char *line = crlf + 2; /* skip past \r\n */
232 while (line < buf_end) {
233 crlf = find_crlf(line, buf_end);
234 if (crlf == NULL) {
235 break; /* truncated headers, stop */
236 }
237 *crlf = '\0';
238 
239 /* Empty line = end of headers, body follows */
240 if (line[0] == '\0') {
241 char *body_start = crlf + 2;
242 if (body_start < buf_end) {
243 request->body = body_start;
244 request->body_length = (size_t)(buf_end - body_start);
245 }
246 break;
247 }
248 
249 parse_header_line(line, request);
250 line = crlf + 2;
251 }
252 
253 return 0;
254}
255 
256/**
257 * @brief Look up a header value by name (case-insensitive)
258 *
259 * @param request Parsed request
260 * @param name Header name to search for
261 *
262 * @return Header value string, or NULL if not found
263 */
264const char *http_get_header(const http_request_t *request, const char *name) {
265 if ((request == NULL) || (name == NULL)) {
266 return NULL;
267 }
268 
269 for (size_t i = 0; i < request->num_headers; i++) {
270 if (strcasecmp(request->headers[i].name, name) == 0) {
271 return request->headers[i].value;
272 }
273 }
274 
275 return NULL;
276}
277