From 4ecdd3c18ccc5d3bcaa82ed720bf28443aa0ca9d Mon Sep 17 00:00:00 2001 From: ame Date: Fri, 12 Jun 2026 00:56:21 -0500 Subject: http body parsing rewrite --- src/net/common.h | 9 ++-- src/net/lua.c | 98 ++++++++++++++++----------------- src/net/lua.h | 2 +- src/net/luai.c | 152 +++++++++++++++++++++++++--------------------------- src/net/util.c | 52 +++++++++++------- src/net/util.h | 5 +- src/net/websocket.c | 2 +- 7 files changed, 163 insertions(+), 157 deletions(-) (limited to 'src/net') diff --git a/src/net/common.h b/src/net/common.h index 6d2f625..ee8f77a 100644 --- a/src/net/common.h +++ b/src/net/common.h @@ -22,18 +22,19 @@ #define max_con 200 //2^42 #define MAX_HEADER_SIZE (1<<20) -#define BUFFER_SIZE 20000 +#define BUFFER_SIZE 210000 +//00 #define HTTP_BUFFER_SIZE 4098 #define max_content_length 200000 enum file_status { - _ignore, BARRIER_READ, FILE_HEADER, FILE_BODY, NORMAL + //_ignore, BARRIER_READ, FILE_HEADER, FILE_BODY, NORMAL + _ignore, BARRIER_START, BARRIER_END, NORMAL }; struct file_parse { enum file_status status; - str *current, *old, *boundary, *boundary_id; - int dash_count, table_idx; + str *current, *old, *boundary, *line; }; struct net_data { diff --git a/src/net/lua.c b/src/net/lua.c index 025828b..5d178a5 100644 --- a/src/net/lua.c +++ b/src/net/lua.c @@ -137,81 +137,75 @@ int l_stop(lua_State* L){ return 0; } -int l_roll(lua_State* L){ - int64_t alen; - if(lua_gettop(L) > 2) { +int l_load(lua_State* L){ + ssize_t alen; + if(lua_gettop(L) >= 2) { alen = luaL_checkinteger(L, 2); } else { alen = -1; } - lua_pushvalue(L, 1); - lua_pushstring(L, "_bytes"); - lua_gettable(L, 1); + lua_getfield(L, 1, "_bytes"); int64_t bytes = luaL_checkinteger(L, -1); - lua_pushstring(L, "content-length"); - lua_gettable(L, 1); - if(lua_type(L, -1) == LUA_TNIL) { - lua_pushinteger(L, -1); + lua_getfield(L, 1, "content-length"); + luaI_assert(L, lua_type(L, -1) != LUA_TNIL); + uint64_t content_length = strtol(luaL_checkstring(L, -1), NULL, 10); + + if(bytes >= content_length){ + lua_pushinteger(L, 0); return 1; } - uint64_t content_length = strtol(luaL_checkstring(L, -1), NULL, 10); - lua_pushstring(L, "_data"); - lua_gettable(L, 1); - struct file_parse* data = (void*)lua_topointer(L, -1); lua_getfield(L, 1, "res"); - lua_pushstring(L, "_"); - lua_gettable(L, -2); + lua_getfield(L, -1, "_"); struct net_data* ctx = lua_touserdata(L, -1); - client_fd_errors(ctx->sock); + lua_getfield(L, 1, "_data"); + struct file_parse* data = lua_touserdata(L, -1); - //fd_set rfd; - //FD_ZERO(&rfd); - //FD_SET(client_fd, &rfd); - //printf("* %li / %li\n", bytes, content_length); - if(bytes >= content_length){ - lua_pushinteger(L, -1); - return 1; - } + client_fd_errors(ctx->sock); - /*if(select(client_fd+1, &rfd, NULL, NULL, &((struct timeval){.tv_sec = 0, .tv_usec = 0})) == 0){ - lua_pushinteger(L, 0); - return 1; - }*/ + lua_getfield(L, 1, "body"); + int body_idx = lua_gettop(L); + lua_getfield(L, 1, "files"); + int files_idx = lua_gettop(L); + if(alen == -1) str_loadatleast(data->current, content_length); + char* buffer = malloc(sizeof * buffer * BUFFER_SIZE); + ssize_t total = bytes; + int over = 1; - //time_start(recv) if(alen == -1) alen = content_length - bytes; - char* buffer = malloc(alen * sizeof * buffer); - int r = net_ctx_read(ctx, buffer, alen); - if(r <= 0){ - lua_pushinteger(L, r - 1); - return 1; - } - //time_end("recv", recv) + else alen = bytes + alen; - lua_pushstring(L, "_bytes"); - lua_pushinteger(L, bytes + r); - lua_settable(L, 1); + for(;total < alen;){ + ssize_t read = BUFFER_SIZE; + if(total + BUFFER_SIZE > alen) read = alen - total; - lua_pushstring(L, "body"); - lua_gettable(L, 1); - int body_idx = lua_gettop(L); + memset(buffer, 0, BUFFER_SIZE); - lua_pushstring(L, "files"); - lua_gettable(L, 1); - int files_idx = lua_gettop(L); - //time_start(parse) - http_body_parse(L, &files_idx, &body_idx, buffer, NULL, r, data); - //time_end("parse", parse) - luaI_tsetv(L, 1, "body", body_idx); - luaI_tsetv(L, 1, "files", files_idx); + ssize_t out = net_ctx_read(ctx, buffer, read); + + if(out == 0) { + over = 0; + break; + } + if(out == -1) { + luaI_error(L, -2, "net read error"); + } + + total += out; + + http_body_parse(L, &files_idx, &body_idx, buffer, NULL, out, data); + } + + lua_pushinteger(L, total); + lua_setfield(L, 1, "_bytes"); free(buffer); - lua_pushinteger(L, r); + + lua_pushinteger(L, over); return 1; } diff --git a/src/net/lua.h b/src/net/lua.h index ef4c0e4..8aaa4a6 100644 --- a/src/net/lua.h +++ b/src/net/lua.h @@ -5,6 +5,6 @@ int l_send(lua_State* L); int l_neterror(lua_State* L); int l_close(lua_State* L); int l_stop(lua_State* L); -int l_roll(lua_State* L); +int l_load(lua_State* L); int l_sendfile(lua_State* L); int l_connection_upgrade(lua_State* L); diff --git a/src/net/luai.c b/src/net/luai.c index 9c320a0..5e1d004 100644 --- a/src/net/luai.c +++ b/src/net/luai.c @@ -48,14 +48,13 @@ int http_body_parse(lua_State* L, int* files_idx, int* body_idx, char* buffer, s parray_t* par = parray_init(); gen_parse(content_type->c, content_type->len, &par); content.boundary = parray_pop(par, "boundary"); - content.status = content.boundary == NULL?BARRIER_READ:NORMAL; - content.dash_count = 0; + content.status = content.boundary != NULL?BARRIER_START:NORMAL; content.current = str_init(""); - content.table_idx = lua_gettop(L); - content.boundary_id = str_init(""); + content.line = str_init(""); parray_clear(par, STR); } + if(content.status == NORMAL){ lua_pushvalue(L, *body_idx); lua_pushlstring(L, buffer, blen); @@ -63,82 +62,75 @@ int http_body_parse(lua_State* L, int* files_idx, int* body_idx, char* buffer, s *body_idx = lua_gettop(L); } else { file_start:; - if(content.status == BARRIER_READ){ - for(int i = 0; i != blen; i++){ - if(*buffer == '\r'){ - content.status = FILE_HEADER; - buffer += 2; - blen -= i + 2; - - content.table_idx = lua_rawlen(L, *files_idx) + 1; - lua_pushinteger(L, content.table_idx); - lua_newtable(L); - lua_settable(L, *files_idx); - break; - } - str_pushl(content.boundary_id, buffer, 1); - buffer++; - } - } - lua_pushvalue(L, *files_idx); - lua_pushinteger(L, content.table_idx); - lua_gettable(L, -2); - int rfiles_idx = lua_gettop(L); - if(content.status == FILE_HEADER){ - for(int i = 0; i < blen; i++){ - - if(buffer[i] == ':'){ - content.old = content.current; - content.current = str_init(""); - } else if(buffer[i] == '\n'){ - if(content.current->len == 0){ - content.status = FILE_BODY; - - buffer += i + 1; - blen -= i + 1; - - content.old = NULL; - str_free(content.current); - content.current = str_init(""); - break; - } -#warning "error here" - luaI_tsets(L, rfiles_idx, content.old->c, content.current->c); - - str_free(content.old); - content.old = NULL; - str_clear(content.current); - } else if(buffer[i] != '\r' && !(buffer[i] == ' ' && content.current->len == 0)) str_pushl(content.current, buffer + i, 1); - } - } - - if(content.status == FILE_BODY){ - char* barrier_end = memmem(buffer, blen, content.boundary->c, content.boundary->len); - if(barrier_end == NULL){ - str* temp = str_initl(content.current->c, content.current->len); - str_pushl(temp, buffer, blen); - barrier_end = memmem(temp->c, temp->len, content.boundary->c, content.boundary->len); - if(barrier_end != NULL) abort(); // todo - - str* temp2 = content.current; - content.current = temp; - str_free(temp2); - - } else { - char* start = barrier_end, *end = barrier_end; - for(; *start != '\n'; start--); - for(; *end != '\n'; end++); - int clen = start - buffer; - str_pushl(content.current, buffer, clen); - luaI_tsetsl(L, rfiles_idx, "content", content.current->c, content.current->len - 1); - str_clear(content.current); - blen-= end - buffer; - buffer = end; - content.status = BARRIER_READ; - goto file_start; - } - - } + if(content.status == BARRIER_START){ + for(; blen > 0; buffer++, blen--){ + if(*buffer == '\n'){ + content.status = BARRIER_END; + blen--; + buffer++; + break; + } + } + } + + if(content.status == BARRIER_END){ + int check = 80; + if(content.current->len < check) check = content.current->len; + ssize_t backtrack = content.current->len - check; + + str_pushl(content.current, buffer, blen); + char* end = memmem(content.current->c + backtrack, content.current->len - backtrack, content.boundary->c, content.boundary->len); + + if(end != NULL){ + ssize_t size = content.current->len - (end - content.current->c + 4); + str_popb(content.current, content.current->len - (end - content.current->c) + 4); + size_t header_len = (char*)memmem(content.current->c, content.current->len, "\r\n\r\n", 4) - content.current->c; + + parray_t* header = parray_init(); + parse_header_kv(content.current->c, header_len, header); + + str* cd = (str*)parray_get(header, "content-disposition"); + if(cd != NULL){ + lua_newtable(L); + int newfile_idx = lua_gettop(L); + + parray_t* cd_parse = parray_init(); + gen_parse(cd->c, cd->len, &cd_parse); + lua_newtable(L); + int cdidx = lua_gettop(L); + luaI_fromparray(L, cdidx, cd_parse, 1); + + luaI_fromparray(L, newfile_idx, header, 1); + luaI_tsetv(L, newfile_idx, "content-disposition", cdidx); + + str* name = (str*)parray_get(cd_parse, "name"); + if(name == NULL) name = (str*)parray_get(cd_parse, "filename"); + + if(name == NULL){ + int ind = lua_objlen(L, *files_idx) + 1; + lua_pushinteger(L, ind); + lua_pushvalue(L, newfile_idx); + lua_settable(L, *files_idx); + } + else { + luaI_tsetv(L, *files_idx, name->c, newfile_idx); + } + parray_clear(cd_parse, STR); + + lua_pushlstring(L, content.current->c + header_len + 4, content.current->len - header_len - 4); + lua_setfield(L, newfile_idx, "body"); + } + + str_clear(content.current); + parray_clear(header, STR); + content.status = BARRIER_START; + + ssize_t oblen = blen; + blen = oblen - (oblen - size); + buffer += oblen - blen; + goto file_start; + } + } } *_content = content; diff --git a/src/net/util.c b/src/net/util.c index ce4fc26..08a9a55 100644 --- a/src/net/util.c +++ b/src/net/util.c @@ -46,41 +46,43 @@ void lowercase(char* c, uint64_t len){ * @brief converts the request buffer into a parray_t * */ -int parse_header(char* buffer, int header_eof, parray_t** _table){ - if(header_eof == -1) return -1; - - parray_t* table = parray_init(); +ssize_t parse_header_head(char* buffer, size_t header_len, parray_t* table){ str* current = str_init(""); - int oi = 0; + int i = 0; int item = 0; - for(; oi != header_eof; oi++){ - if(buffer[oi] == ' ' || buffer[oi] == '\n'){ - if(buffer[oi] == '\n') current->c[current->len - 1] = 0; + for(; i != header_len; i++){ + if(buffer[i] == ' ' || buffer[i] == '\n'){ + if(buffer[i] == '\n') current->c[current->len - 1] = 0; if(item < 3) parray_set(table, item == 0 ? "request" : item == 1 ? "path" : "version", (void*)str_init(current->c)); str_clear(current); item++; - if(buffer[oi] == '\n') break; + if(buffer[i] == '\n') break; } else { - if(oi >= max_uri_len){ - *_table = table; + if(i >= max_uri_len){ str_free(current); return -2; } - str_pushl(current, buffer + oi, 1); + str_pushl(current, buffer + i, 1); } } + str_free(current); + if(item < 3){ - str_free(current); - *_table = table; return -1; } - int key = 1; + return i; +} + +int parse_header_kv(char* buffer, size_t header_len, parray_t* table){ + str* current = str_init(""); str* sw = NULL; - for(int i = oi + 1; i != header_eof; i++){ + int key = 1; + + for(int i = 0; i != header_len; i++){ if(buffer[i] == ' ' && strcmp(current->c, "") == 0) continue; if((key && buffer[i] == ':') || (!key && buffer[i] == '\n')){ if(key){ @@ -88,7 +90,7 @@ int parse_header(char* buffer, int header_eof, parray_t** _table){ current = str_init(""); key = 0; } else { - if(buffer[oi] == '\n') current->c[current->len - 1] = 0; + current->c[current->len - 1] = 0; //duplicate keys would cause memory leaks, ignore them for now //todo: figure out system to handle this str* id = (str*)parray_get(table, sw->c); @@ -103,16 +105,30 @@ int parse_header(char* buffer, int header_eof, parray_t** _table){ continue; } else str_pushc(current, buffer[i]); } + if(sw != NULL){ + lowercase(sw->c, sw->len); parray_set(table, sw->c, (void*)str_init(current->c)); str_free(sw); } - str_free(current); + + return 0; +} + +int parse_header(char* buffer, size_t header_len, parray_t** _table){ + if(header_len == -1) return -1; + parray_t* table = parray_init(); + + ssize_t used = parse_header_head(buffer, header_len, table); + if(used == -1) return -1; + + parse_header_kv(buffer + used + 1, header_len - used, table); *_table = table; return 0; } + /** * @brief contructs an http request * diff --git a/src/net/util.h b/src/net/util.h index 602ee53..34bf317 100644 --- a/src/net/util.h +++ b/src/net/util.h @@ -22,7 +22,10 @@ int64_t recv_header(struct net_data* ctx, char** _buffer, char** header_eof); * @param {parray_t**} pointer to a unallocated parray_t * @return {int} returns 0 or -1 on failure */ -int parse_header(char* buffer, int header_eof, parray_t** _table); +int parse_header(char* buffer, size_t header_len, parray_t** _table); +ssize_t parse_header_head(char* buffer, size_t header_len, parray_t* table); +int parse_header_kv(char* buffer, size_t header_len, parray_t* table); + /** * @brief contructs an http request diff --git a/src/net/websocket.c b/src/net/websocket.c index 06283f6..0f6081f 100644 --- a/src/net/websocket.c +++ b/src/net/websocket.c @@ -212,7 +212,7 @@ int l_websocket_upgrade(lua_State* L){ data->ctx = ctx->ctx; data->buffer = str_init(""); - luaI_tsetnil(L, req_idx, "roll"); + luaI_tsetnil(L, req_idx, "load"); luaI_tsetlud(L, res_idx, "_ws", data); #warning "missing ws commands" luaI_tsetcf(L, res_idx, "send", l_ws_write); -- cgit v1.2.3