From 655b57d340d78e2a4adf5340e474bfa71979aa0e Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Tue, 21 Jan 2025 15:19:01 +0100 Subject: [PATCH 01/13] Add FormData streaming encoder --- builtins/web/fetch/request-response.cpp | 27 +- builtins/web/file.cpp | 5 + builtins/web/file.h | 2 + builtins/web/form-data/form-data-encoder.cpp | 496 ++++++++++++++++++ builtins/web/form-data/form-data-encoder.h | 46 ++ builtins/web/{ => form-data}/form-data.cpp | 9 +- builtins/web/{ => form-data}/form-data.h | 1 + cmake/builtins.cmake | 10 +- tests/integration/fetch/fetch.js | 77 ++- .../request/request-consume-empty.any.js.json | 2 +- .../response-consume-empty.any.js.json | 2 +- 11 files changed, 669 insertions(+), 8 deletions(-) create mode 100644 builtins/web/form-data/form-data-encoder.cpp create mode 100644 builtins/web/form-data/form-data-encoder.h rename builtins/web/{ => form-data}/form-data.cpp (98%) rename builtins/web/{ => form-data}/form-data.h (98%) diff --git a/builtins/web/fetch/request-response.cpp b/builtins/web/fetch/request-response.cpp index dbf2ece0..8c6bf651 100644 --- a/builtins/web/fetch/request-response.cpp +++ b/builtins/web/fetch/request-response.cpp @@ -1,6 +1,8 @@ #include "request-response.h" #include "../blob.h" +#include "../form-data/form-data.h" +#include "../form-data/form-data-encoder.h" #include "../streams/native-stream-source.h" #include "../streams/transform-stream.h" #include "../url.h" @@ -27,8 +29,6 @@ #include "js/experimental/TypedData.h" #pragma clang diagnostic pop -using builtins::web::blob::Blob; - namespace builtins::web::streams { bool NativeStreamSource::stream_is_body(JSContext *cx, JS::HandleObject stream) { @@ -41,6 +41,10 @@ bool NativeStreamSource::stream_is_body(JSContext *cx, JS::HandleObject stream) namespace builtins::web::fetch { +using blob::Blob; +using form_data::FormData; +using form_data::MultipartFormData; + static api::Engine *ENGINE; bool error_stream_controller_with_pending_exception(JSContext *cx, HandleObject stream) { @@ -293,6 +297,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, // - byte sequence // - buffer source // - Blob + // - FormData // - USV strings // - URLSearchParams // - ReadableStream @@ -320,6 +325,24 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, MOZ_ASSERT(host_type_str); content_type = host_type_str.ptr.get(); } + } else if (FormData::is_instance(body_obj)) { + RootedObject encoder(cx, MultipartFormData::create(cx, body_obj)); + if (!encoder) { + return false; + } + + RootedObject stream(cx, MultipartFormData::encode_stream(cx, encoder)); + if (!stream) { + return false; + } + + auto boundary = MultipartFormData::boundary(encoder); + auto type = "multipart/form-data; boundary=" + boundary; + host_type_str = std::string_view(type); + content_type = host_type_str.ptr.get(); + + RootedValue stream_val(cx, JS::ObjectValue(*stream)); + JS_SetReservedSlot(self, static_cast(RequestOrResponse::Slots::BodyStream), stream_val); } else if (body_obj && JS::IsReadableStream(body_obj)) { if (RequestOrResponse::body_unusable(cx, body_obj)) { return api::throw_error(cx, FetchErrors::BodyStreamUnusable); diff --git a/builtins/web/file.cpp b/builtins/web/file.cpp index e07cbcf0..eecfd087 100644 --- a/builtins/web/file.cpp +++ b/builtins/web/file.cpp @@ -75,6 +75,11 @@ bool File::lastModified_get(JSContext *cx, unsigned argc, JS::Value *vp) { return true; } +JSString *File::name(JSObject *self) { + MOZ_ASSERT(is_instance(self)); + return JS::GetReservedSlot(self, static_cast(Slots::Name)).toString(); +} + // https://w3c.github.io/FileAPI/#file-constructor bool File::init(JSContext *cx, HandleObject self, HandleValue fileBits, HandleValue fileName, HandleValue opts) { diff --git a/builtins/web/file.h b/builtins/web/file.h index 040625b2..310c76c2 100644 --- a/builtins/web/file.h +++ b/builtins/web/file.h @@ -23,6 +23,8 @@ class File : public BuiltinImpl { static const JSFunctionSpec methods[]; static const JSPropertySpec properties[]; + static JSString *name(JSObject *self); + static JSObject *create(JSContext *cx, HandleValue fileBits, HandleValue fileName, HandleValue opts); static bool init(JSContext *cx, HandleObject self, HandleValue fileBits, HandleValue fileName, HandleValue opts); static bool init_class(JSContext *cx, HandleObject global); diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp new file mode 100644 index 00000000..31782dbe --- /dev/null +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -0,0 +1,496 @@ +#include "form-data-encoder.h" +#include "extension-api.h" +#include "form-data.h" + +#include "../blob.h" +#include "../file.h" +#include "../streams/buf-reader.h" +#include "../streams/native-stream-source.h" + +#include "encode.h" +#include "mozilla/Assertions.h" + +#include +#include + +namespace { + +const char LF = '\n'; +const char CR = '\r'; +const char *CRLF = "\r\n"; + +size_t compute_normalized_len(std::string_view src, const char *newline) { + size_t len = 0; + size_t newline_len = strlen(newline); + + for (size_t i = 0; i < src.size(); i++) { + if (src[i] == CR) { + if (i + 1 < src.size() && src[i + 1] == LF) { + len += newline_len; + i++; + } else { + len += newline_len; + } + } else if (src[i] == LF) { + len += newline_len; + } else { + len += 1; + } + } + + return len; +} + +// Replace every occurrence of U+000D (CR) not followed by U+000A (LF), +// and every occurrence of U+000A (LF) not preceded by U+000D (CR), +// in entry's name, by a string consisting of a U+000D (CR) and U+000A (LF). +// +// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data +std::optional normalize_newlines(std::string_view src) { + std::string output; + + output.reserve(compute_normalized_len(src, CRLF)); + + for (size_t i = 0; i < src.size(); i++) { + if (src[i] == CR) { + if (i + 1 < src.size() && src[i + 1] == LF) { + output += CRLF; + i++; + } else { + output += CRLF; + } + } else if (src[i] == LF) { + output += CRLF; + } else { + output.push_back(src[i]); + } + } + + return output; +} + +std::optional normalize_newlines(JSContext *cx, HandleValue src) { + auto chars = core::encode(cx, src); + if (!chars) { + return std::nullopt; + } + + return normalize_newlines(chars); +} + +// For field names and filenames for file fields, the result of the encoding must +// be escaped by replacing any 0x0A (LF) bytes with the byte sequence +// `%0A`, 0x0D (CR) with `%0D` and 0x22 (") with `%22`. +// +// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data +std::optional escape_newlines(std::string_view str) { + int32_t offset = 0; + std::string output(str); + + while ((offset = output.find_first_of("\n\r\"", offset)) != std::string::npos) { + if (output[offset] == '\n') { + output.replace(offset, 1, "%0A"); + } else if (output[offset] == '\r') { + output.replace(offset, 1, "%0D"); + } else if (output[offset] == '"') { + output.replace(offset, 1, "%22"); + } else { + offset++; + continue; + } + } + + return output; +} + +std::optional escape_newlines(JSContext *cx, HandleValue src) { + auto chars = core::encode(cx, src); + if (!chars) { + return std::nullopt; + } + + return escape_newlines(chars); +} + +} // namespace + +namespace builtins { +namespace web { +namespace form_data { + +using blob::Blob; +using file::File; +using streams::BufReader; +using streams::NativeStreamSource; + +using EntryList = JS::GCVector; + +struct StreamContext { + StreamContext(const EntryList *entries, std::span outbuf) + : entries(entries), outbuf(outbuf), read(0), done(false) {} + const EntryList *entries; + + std::span outbuf; + size_t read; + bool done; + + size_t remaining() { + MOZ_ASSERT(outbuf.size() >= read); + return outbuf.size() - read; + } + + template size_t write(I first, I last) { + auto datasz = static_cast(std::distance(first, last)); + if (datasz == 0) { + return 0; + } + + size_t bufsz = remaining(); + if (bufsz == 0) { + return 0; + } + + size_t to_write = std::min(datasz, bufsz); + auto dest = outbuf.begin() + read; + + std::copy_n(first, to_write, dest); + read += to_write; + return to_write; + } +}; + +class MultipartFormDataImpl { + enum class State : int { Start, EntryHeader, EntryBody, EntryFooter, Close, Done }; + + State state_; + std::string boundary_; + std::string remainder_; + std::string_view remainder_view_; + + size_t chunk_idx_; + size_t file_leftovers_; + + bool is_draining() { return (file_leftovers_ || remainder_.size()); }; + + template size_t write_and_cache_remainder(StreamContext &stream, I first, I last); + + State next_state(StreamContext &stream); + void maybe_drain_leftovers(JSContext *cx, StreamContext &stream); + bool handle_entry_header(JSContext *cx, StreamContext &stream); + bool handle_entry_body(JSContext *cx, StreamContext &stream); + bool handle_entry_footer(JSContext *cx, StreamContext &stream); + bool handle_close(JSContext *cx, StreamContext &stream); + +public: + MultipartFormDataImpl(std::string boundary) + : state_(State::Start), boundary_(std::move(boundary)), chunk_idx_(0), file_leftovers_(0) { + remainder_.reserve(128); + } + + std::string boundary() { return boundary_; }; + bool read_next(JSContext *cx, StreamContext &stream); +}; + +MultipartFormDataImpl::State MultipartFormDataImpl::next_state(StreamContext &stream) { + auto finished = (chunk_idx_ >= stream.entries->length()); + auto empty = stream.entries->empty(); + + switch (state_) { + case State::Start: + return empty ? State::Done : State::EntryHeader; + case State::EntryHeader: + return State::EntryBody; + case State::EntryBody: + return State::EntryFooter; + case State::EntryFooter: + return finished ? State::Close : State::EntryHeader; + case State::Close: + return State::Done; + case State::Done: + return State::Done; + default: + MOZ_ASSERT_UNREACHABLE("Invalid state"); + } +} + +void MultipartFormDataImpl::maybe_drain_leftovers(JSContext *cx, StreamContext &stream) { + if (!remainder_view_.empty()) { + auto written = stream.write(remainder_view_.begin(), remainder_view_.end()); + remainder_view_.remove_prefix(written); + + if (remainder_view_.empty()) { + remainder_.clear(); + remainder_view_ = remainder_; + } + } + + if (file_leftovers_ != 0) { + auto entry = stream.entries->begin()[chunk_idx_]; + MOZ_ASSERT(state_ == State::EntryBody); + MOZ_ASSERT(File::is_instance(entry.value)); + + RootedObject obj(cx, &entry.value.toObject()); + auto blob = Blob::blob(obj); + auto blobsz = blob->length(); + auto offset = blobsz - file_leftovers_; + file_leftovers_ -= stream.write(blob->begin() + offset, blob->end()); + } +} + +template +size_t MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I first, I last) { + auto datasz = static_cast(std::distance(first, last)); + auto written = stream.write(first, last); + + MOZ_ASSERT(written <= datasz); + + auto leftover = datasz - written; + if (leftover > 0) { + MOZ_ASSERT(remainder_.empty()); + remainder_.assign(first + written, last); + remainder_view_ = remainder_; + } + + return written; +} + +bool MultipartFormDataImpl::handle_entry_header(JSContext *cx, StreamContext &stream) { + auto entry = stream.entries->begin()[chunk_idx_]; + auto header = fmt::memory_buffer(); + auto name = escape_newlines(entry.name).value(); + + fmt::format_to(std::back_inserter(header), "--{}\r\n", boundary_); + fmt::format_to(std::back_inserter(header), "Content-Disposition: form-data; name=\"{}\"", name); + + if (entry.value.isString()) { + fmt::format_to(std::back_inserter(header), "\r\n\r\n"); + } else { + MOZ_ASSERT(File::is_instance(entry.value)); + RootedObject obj(cx, &entry.value.toObject()); + + RootedValue filename_val(cx, JS::StringValue(File::name(obj))); + auto filename = escape_newlines(cx, filename_val); + + RootedString type_str(cx, Blob::type(obj)); + auto type = core::encode(cx, type_str); + + if (!filename || !type) { + return false; + } + + auto tmp = type.size() ? std::string_view(type) : "application/octet-stream"; + fmt::format_to(std::back_inserter(header), "; filename=\"{}\"\r\n", filename.value()); + fmt::format_to(std::back_inserter(header), "Content-Type: {}\r\n\r\n", tmp); + } + + // If there are leftovers that didn't fit in outbuf, put it into remainder_ + // and it will be drained the next run. + write_and_cache_remainder(stream, header.begin(), header.end()); + return true; +} + +bool MultipartFormDataImpl::handle_entry_body(JSContext *cx, StreamContext &stream) { + auto entry = stream.entries->begin()[chunk_idx_]; + + if (entry.value.isString()) { + RootedValue value_val(cx, entry.value); + auto maybe_normalized = normalize_newlines(cx, value_val); + if (!maybe_normalized) { + return false; + } + + auto normalized = maybe_normalized.value(); + write_and_cache_remainder(stream, normalized.begin(), normalized.end()); + } else { + MOZ_ASSERT(File::is_instance(entry.value)); + RootedObject obj(cx, &entry.value.toObject()); + + auto blob = Blob::blob(obj); + auto blobsz = blob->length(); + auto written = stream.write(blob->begin(), blob->end()); + MOZ_ASSERT(written <= blobsz); + file_leftovers_ = blobsz - written; + } + + return true; +} + +bool MultipartFormDataImpl::handle_entry_footer(JSContext *cx, StreamContext &stream) { + auto footer = fmt::memory_buffer(); + fmt::format_to(std::back_inserter(footer), "\r\n"); + + write_and_cache_remainder(stream, footer.begin(), footer.end()); + chunk_idx_ += 1; + + MOZ_ASSERT(chunk_idx_ <= stream.entries->length()); + return true; +} + +bool MultipartFormDataImpl::handle_close(JSContext *cx, StreamContext &stream) { + auto footer = fmt::memory_buffer(); + fmt::format_to(std::back_inserter(footer), "--{}--", boundary_); + + write_and_cache_remainder(stream, footer.begin(), footer.end()); + return true; +} + +bool MultipartFormDataImpl::read_next(JSContext *cx, StreamContext &stream) { + maybe_drain_leftovers(cx, stream); + if (is_draining()) { + return true; + } + + state_ = next_state(stream); + + switch (state_) { + case State::EntryHeader: { + return handle_entry_header(cx, stream); + } + case State::EntryBody: { + return handle_entry_body(cx, stream); + } + case State::EntryFooter: { + return handle_entry_footer(cx, stream); + } + case State::Close: { + return handle_close(cx, stream); + } + case State::Done: { + stream.done = true; + return true; + } + default: + MOZ_ASSERT_UNREACHABLE("Invalid state"); + return false; + } +} + +const JSFunctionSpec MultipartFormData::static_methods[] = {JS_FS_END}; +const JSPropertySpec MultipartFormData::static_properties[] = {JS_PS_END}; +const JSFunctionSpec MultipartFormData::methods[] = {JS_FS_END}; +const JSPropertySpec MultipartFormData::properties[] = {JS_PS_END}; + +bool MultipartFormData::read(JSContext *cx, HandleObject self, std::span buf, size_t start, + size_t *read, bool *done) { + MOZ_ASSERT(is_instance(self)); + + if (buf.empty()) { + *read = 0; + return true; + } + + size_t bufsz = buf.size(); + size_t total = 0; + bool finished = false; + RootedObject obj(cx, form_data(self)); + + auto entries = FormData::entry_list(obj); + auto impl = as_impl(self); + + // Try to fill the buffer + while (total < bufsz && !finished) { + auto subspan = buf.subspan(total); + auto stream = StreamContext(entries, subspan); + + if (!impl->read_next(cx, stream)) { + return false; + } + + total += stream.read; + finished = stream.done; + } + + // Delay reporting done to produce a separate empty chunk. + *done = finished && total == 0; + *read = total; + return true; +} + +std::string MultipartFormData::boundary(JSObject *self) { + MOZ_ASSERT(is_instance(self)); + auto impl = as_impl(self); + MOZ_ASSERT(impl); + + return impl->boundary(); +} + +MultipartFormDataImpl *MultipartFormData::as_impl(JSObject *self) { + MOZ_ASSERT(is_instance(self)); + return reinterpret_cast( + JS::GetReservedSlot(self, Slots::Inner).toPrivate()); +} + +JSObject *MultipartFormData::form_data(JSObject *self) { + MOZ_ASSERT(is_instance(self)); + return &JS::GetReservedSlot(self, Slots::Form).toObject(); +} + +JSObject *MultipartFormData::encode_stream(JSContext *cx, HandleObject self) { + RootedObject reader(cx, BufReader::create(cx, self, read)); + if (!reader) { + return nullptr; + } + + RootedObject native_stream(cx, BufReader::stream(reader)); + RootedObject default_stream(cx, NativeStreamSource::stream(native_stream)); + + return default_stream; +} + +JSObject *MultipartFormData::create(JSContext *cx, HandleObject form_data) { + JS::RootedObject self(cx, JS_NewObjectWithGivenProto(cx, &class_, proto_obj)); + if (!self) { + return nullptr; + } + + if (!FormData::is_instance(form_data)) { + return nullptr; + } + + auto res = host_api::Random::get_bytes(16); + if (auto *err = res.to_err()) { + return nullptr; + } + + // Hex encode bytes to string + auto bytes = std::move(res.unwrap()); + + std::string hex_str; + hex_str.reserve(bytes.size() * 2); + + for (auto b : bytes) { + fmt::format_to(std::back_inserter(hex_str), "{:02x}", b); + } + + auto boundary = fmt::format("--Boundary{}", hex_str); + auto impl = new (std::nothrow) MultipartFormDataImpl(boundary); + if (!impl) { + return nullptr; + } + + JS::SetReservedSlot(self, Slots::Form, JS::ObjectValue(*form_data)); + JS::SetReservedSlot(self, Slots::Inner, JS::PrivateValue(reinterpret_cast(impl))); + + return self; +} + +bool MultipartFormData::init_class(JSContext *cx, JS::HandleObject global) { + return init_class_impl(cx, global); +} + +bool MultipartFormData::constructor(JSContext *cx, unsigned argc, JS::Value *vp) { + MOZ_ASSERT_UNREACHABLE("No MultipartFormData Ctor builtin"); + return api::throw_error(cx, api::Errors::NoCtorBuiltin, class_name); +} + +void MultipartFormData::finalize(JS::GCContext *gcx, JSObject *self) { + MOZ_ASSERT(is_instance(self)); + auto impl = as_impl(self); + if (impl) { + delete impl; + } +} + +} // namespace form_data +} // namespace web +} // namespace builtins diff --git a/builtins/web/form-data/form-data-encoder.h b/builtins/web/form-data/form-data-encoder.h new file mode 100644 index 00000000..e2fccecf --- /dev/null +++ b/builtins/web/form-data/form-data-encoder.h @@ -0,0 +1,46 @@ +#ifndef BUILTINS_WEB_FORM_DATA_ENCODER_ +#define BUILTINS_WEB_FORM_DATA_ENCODER_ + +#include "builtin.h" +#include "js/TypeDecls.h" + +namespace builtins { +namespace web { +namespace form_data { + +class MultipartFormDataImpl; + +class MultipartFormData : public FinalizableBuiltinImpl { + static MultipartFormDataImpl *as_impl(JSObject *self); + + static bool read(JSContext *cx, HandleObject self, std::span buf, + size_t start, size_t *read, bool *done); + +public: + static constexpr const char *class_name = "MultipartFormData"; + static constexpr unsigned ctor_length = 0; + + static const JSFunctionSpec static_methods[]; + static const JSPropertySpec static_properties[]; + static const JSFunctionSpec methods[]; + static const JSPropertySpec properties[]; + + enum Slots { Form, Inner, Count }; + + static JSObject *form_data(JSObject *self); + static std::string boundary(JSObject *self); + + static size_t query_length(JSContext *cx, HandleObject self); + static JSObject *encode_stream(JSContext *cx, HandleObject self); + static JSObject *create(JSContext *cx, HandleObject form_data); + + static bool init_class(JSContext *cx, HandleObject global); + static bool constructor(JSContext *cx, unsigned argc, Value *vp); + static void finalize(JS::GCContext *gcx, JSObject *self); +}; + +} // namespace form_data_encoder +} // namespace web +} // namespace builtins + +#endif // BUILTINS_WEB_FORM_DATA_ENCODER_ diff --git a/builtins/web/form-data.cpp b/builtins/web/form-data/form-data.cpp similarity index 98% rename from builtins/web/form-data.cpp rename to builtins/web/form-data/form-data.cpp index e6082b0d..def91eef 100644 --- a/builtins/web/form-data.cpp +++ b/builtins/web/form-data/form-data.cpp @@ -1,8 +1,10 @@ #include "form-data.h" -#include "blob.h" +#include "form-data-encoder.h" + +#include "../blob.h" +#include "../file.h" #include "decode.h" #include "encode.h" -#include "file.h" #include "host_api.h" #include "js/TypeDecls.h" @@ -461,6 +463,9 @@ bool install(api::Engine *engine) { if (!FormDataIterator::init_class(engine->cx(), engine->global())) { return false; } + if (!MultipartFormData::init_class(engine->cx(), engine->global())) { + return false; + } return true; } diff --git a/builtins/web/form-data.h b/builtins/web/form-data/form-data.h similarity index 98% rename from builtins/web/form-data.h rename to builtins/web/form-data/form-data.h index 3eef3323..7d2f4358 100644 --- a/builtins/web/form-data.h +++ b/builtins/web/form-data/form-data.h @@ -55,6 +55,7 @@ class FormData : public TraceableBuiltinImpl { static EntryList *entry_list(JSObject *self); friend class FormDataIterator; + friend class MultipartFormData; public: static constexpr const char *class_name = "FormData"; diff --git a/cmake/builtins.cmake b/cmake/builtins.cmake index 4ca403bf..86eb92da 100644 --- a/cmake/builtins.cmake +++ b/cmake/builtins.cmake @@ -19,7 +19,15 @@ add_builtin(builtins/web/structured-clone.cpp) add_builtin(builtins/web/base64.cpp) add_builtin(builtins/web/blob.cpp) add_builtin(builtins/web/file.cpp) -add_builtin(builtins/web/form-data.cpp) + +add_builtin( + builtins::web::form_data + SRC + builtins/web/form-data/form-data.cpp + builtins/web/form-data/form-data-encoder.cpp + DEPENDENCIES + fmt) + add_builtin( builtins::web::dom_exception SRC diff --git a/tests/integration/fetch/fetch.js b/tests/integration/fetch/fetch.js index 7a6fad8c..d6dd70bf 100644 --- a/tests/integration/fetch/fetch.js +++ b/tests/integration/fetch/fetch.js @@ -1,5 +1,5 @@ import { serveTest } from '../test-server.js'; -import { strictEqual, deepStrictEqual, throws } from '../../assert.js'; +import { assert, strictEqual, deepStrictEqual, throws } from '../../assert.js'; export const handler = serveTest(async (t) => { await t.test('headers-non-ascii-latin1-field-value', async () => { @@ -146,4 +146,79 @@ export const handler = serveTest(async (t) => { URL.revokeObjectURL(fileUrl); }); + + await t.test('form-data-encode', async () => { + async function readStream(stream) { + const reader = stream.getReader(); + const chunks = []; + let totalLen = 0; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + chunks.push(value); + totalLen += value.length; + } + const joined = new Uint8Array(totalLen); + let offset = 0; + for (const chunk of chunks) { + joined.set(chunk, offset); + offset += chunk.length; + } + return joined.buffer; + } + + const form = new FormData(); + form.append('field1', 'value1'); + form.append('field2', 'value2'); + + const file = new File(['Hello World!'], 'dummy.txt', { type: 'foo' }); + form.append('file1', file); + + const req = new Request('https://example.com', { method: 'POST', body: form }); + + const contentType = req.headers.get('Content-Type') || ''; + assert( + contentType.startsWith('multipart/form-data; boundary='), + `Content-Type should be multipart/form-data; got: ${contentType}` + ); + + const boundary = contentType.split('boundary=')[1]; + assert(boundary, 'Boundary must be present in the Content-Type'); + + const arrayBuf = await readStream(req.body); + const bodyStr = new TextDecoder().decode(arrayBuf); + const lines = bodyStr.split('\r\n'); + + const expectedLines = [ + `--${boundary}`, + 'Content-Disposition: form-data; name="field1"', + '', + 'value1', + `--${boundary}`, + 'Content-Disposition: form-data; name="field2"', + '', + 'value2', + `--${boundary}`, + 'Content-Disposition: form-data; name="file1"; filename="dummy.txt"', + 'Content-Type: foo', + '', + 'Hello World!', + `--${boundary}--`, + ]; + + strictEqual( + lines.length, + expectedLines.length, + `Expected ${expectedLines.length} lines, got ${lines.length}` + ); + + for (let i = 0; i < expectedLines.length; i++) { + strictEqual( + lines[i], + expectedLines[i], + `Mismatch at line ${i}. Actual: '${lines[i]}' Expected: '${expectedLines[i]}'` + ); + } + }); }); diff --git a/tests/wpt-harness/expectations/fetch/api/request/request-consume-empty.any.js.json b/tests/wpt-harness/expectations/fetch/api/request/request-consume-empty.any.js.json index 0539b451..025d9ba6 100644 --- a/tests/wpt-harness/expectations/fetch/api/request/request-consume-empty.any.js.json +++ b/tests/wpt-harness/expectations/fetch/api/request/request-consume-empty.any.js.json @@ -36,7 +36,7 @@ "status": "PASS" }, "Consume empty FormData request body as text": { - "status": "FAIL" + "status": "PASS" }, "Consume empty ArrayBuffer request body as text": { "status": "PASS" diff --git a/tests/wpt-harness/expectations/fetch/api/response/response-consume-empty.any.js.json b/tests/wpt-harness/expectations/fetch/api/response/response-consume-empty.any.js.json index 095fa14b..7b003872 100644 --- a/tests/wpt-harness/expectations/fetch/api/response/response-consume-empty.any.js.json +++ b/tests/wpt-harness/expectations/fetch/api/response/response-consume-empty.any.js.json @@ -36,7 +36,7 @@ "status": "PASS" }, "Consume empty FormData response body as text": { - "status": "FAIL" + "status": "PASS" }, "Consume empty ArrayBuffer response body as text": { "status": "PASS" From 493a46c03b74f59160d53197b0e9f009afd42b99 Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Thu, 23 Jan 2025 10:46:19 +0100 Subject: [PATCH 02/13] Fix setting content type and use base64 for boundary encoding --- builtins/web/fetch/request-response.cpp | 2 +- builtins/web/form-data/form-data-encoder.cpp | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/builtins/web/fetch/request-response.cpp b/builtins/web/fetch/request-response.cpp index 8c6bf651..e7838ba0 100644 --- a/builtins/web/fetch/request-response.cpp +++ b/builtins/web/fetch/request-response.cpp @@ -338,7 +338,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, auto boundary = MultipartFormData::boundary(encoder); auto type = "multipart/form-data; boundary=" + boundary; - host_type_str = std::string_view(type); + host_type_str = type.c_str(); content_type = host_type_str.ptr.get(); RootedValue stream_val(cx, JS::ObjectValue(*stream)); diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index 31782dbe..1305423b 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -2,6 +2,7 @@ #include "extension-api.h" #include "form-data.h" +#include "../base64.h" #include "../blob.h" #include "../file.h" #include "../streams/buf-reader.h" @@ -447,22 +448,17 @@ JSObject *MultipartFormData::create(JSContext *cx, HandleObject form_data) { return nullptr; } - auto res = host_api::Random::get_bytes(16); + auto res = host_api::Random::get_bytes(12); if (auto *err = res.to_err()) { return nullptr; } // Hex encode bytes to string auto bytes = std::move(res.unwrap()); + auto bytes_str = std::string_view((char *)(bytes.ptr.get()), bytes.size()); + auto base64_str = base64::forgivingBase64Encode(bytes_str, base64::base64EncodeTable); - std::string hex_str; - hex_str.reserve(bytes.size() * 2); - - for (auto b : bytes) { - fmt::format_to(std::back_inserter(hex_str), "{:02x}", b); - } - - auto boundary = fmt::format("--Boundary{}", hex_str); + auto boundary = fmt::format("--Boundary{}", base64_str); auto impl = new (std::nothrow) MultipartFormDataImpl(boundary); if (!impl) { return nullptr; From 0243ac265a208548478bfe01a9399c4ba076d2b8 Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Thu, 23 Jan 2025 11:03:30 +0100 Subject: [PATCH 03/13] Update wpt expectations --- .../expectations/fetch/api/request/request-init-002.any.js.json | 2 +- .../fetch/api/request/request-init-contenttype.any.js.json | 2 +- .../fetch/api/response/response-consume-stream.any.js.json | 2 +- .../fetch/api/response/response-init-002.any.js.json | 2 +- .../fetch/api/response/response-init-contenttype.any.js.json | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/wpt-harness/expectations/fetch/api/request/request-init-002.any.js.json b/tests/wpt-harness/expectations/fetch/api/request/request-init-002.any.js.json index c1f37244..174bb56d 100644 --- a/tests/wpt-harness/expectations/fetch/api/request/request-init-002.any.js.json +++ b/tests/wpt-harness/expectations/fetch/api/request/request-init-002.any.js.json @@ -12,7 +12,7 @@ "status": "PASS" }, "Initialize Request's body with \"[object Object]\", multipart/form-data": { - "status": "FAIL" + "status": "PASS" }, "Initialize Request's body with \"This is a USVString\", text/plain;charset=UTF-8": { "status": "PASS" diff --git a/tests/wpt-harness/expectations/fetch/api/request/request-init-contenttype.any.js.json b/tests/wpt-harness/expectations/fetch/api/request/request-init-contenttype.any.js.json index 180d3949..d476e184 100644 --- a/tests/wpt-harness/expectations/fetch/api/request/request-init-contenttype.any.js.json +++ b/tests/wpt-harness/expectations/fetch/api/request/request-init-contenttype.any.js.json @@ -15,7 +15,7 @@ "status": "PASS" }, "Default Content-Type for Request with FormData body": { - "status": "FAIL" + "status": "PASS" }, "Default Content-Type for Request with URLSearchParams body": { "status": "PASS" diff --git a/tests/wpt-harness/expectations/fetch/api/response/response-consume-stream.any.js.json b/tests/wpt-harness/expectations/fetch/api/response/response-consume-stream.any.js.json index 0af94bba..9b86d205 100644 --- a/tests/wpt-harness/expectations/fetch/api/response/response-consume-stream.any.js.json +++ b/tests/wpt-harness/expectations/fetch/api/response/response-consume-stream.any.js.json @@ -18,7 +18,7 @@ "status": "PASS" }, "Read form data response's body as readableStream with mode=undefined": { - "status": "FAIL" + "status": "PASS" }, "Read blob response's body as readableStream with mode=byob": { "status": "FAIL" diff --git a/tests/wpt-harness/expectations/fetch/api/response/response-init-002.any.js.json b/tests/wpt-harness/expectations/fetch/api/response/response-init-002.any.js.json index f44af7e2..83efcb08 100644 --- a/tests/wpt-harness/expectations/fetch/api/response/response-init-002.any.js.json +++ b/tests/wpt-harness/expectations/fetch/api/response/response-init-002.any.js.json @@ -6,7 +6,7 @@ "status": "PASS" }, "Initialize Response's body with multipart/form-data": { - "status": "FAIL" + "status": "PASS" }, "Initialize Response's body with application/x-www-form-urlencoded;charset=UTF-8": { "status": "PASS" diff --git a/tests/wpt-harness/expectations/fetch/api/response/response-init-contenttype.any.js.json b/tests/wpt-harness/expectations/fetch/api/response/response-init-contenttype.any.js.json index c5436211..d04488c6 100644 --- a/tests/wpt-harness/expectations/fetch/api/response/response-init-contenttype.any.js.json +++ b/tests/wpt-harness/expectations/fetch/api/response/response-init-contenttype.any.js.json @@ -15,7 +15,7 @@ "status": "PASS" }, "Default Content-Type for Response with FormData body": { - "status": "FAIL" + "status": "PASS" }, "Default Content-Type for Response with URLSearchParams body": { "status": "PASS" From 58e0404bc48259f7ab1c8e9946fad842a9ed30ed Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Thu, 23 Jan 2025 11:15:09 +0100 Subject: [PATCH 04/13] Remove unused header --- builtins/web/form-data/form-data-encoder.h | 1 - 1 file changed, 1 deletion(-) diff --git a/builtins/web/form-data/form-data-encoder.h b/builtins/web/form-data/form-data-encoder.h index e2fccecf..54735b1a 100644 --- a/builtins/web/form-data/form-data-encoder.h +++ b/builtins/web/form-data/form-data-encoder.h @@ -2,7 +2,6 @@ #define BUILTINS_WEB_FORM_DATA_ENCODER_ #include "builtin.h" -#include "js/TypeDecls.h" namespace builtins { namespace web { From ace478da72d704c86628b52641c5b00bbb4a1156 Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Mon, 27 Jan 2025 10:14:24 +0100 Subject: [PATCH 05/13] Address code review feedback --- builtins/web/form-data/form-data-encoder.cpp | 41 +++++++++++--------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index 1305423b..2a9983dd 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -20,18 +20,16 @@ const char LF = '\n'; const char CR = '\r'; const char *CRLF = "\r\n"; -size_t compute_normalized_len(std::string_view src, const char *newline) { +size_t compute_normalized_len(std::string_view src) { size_t len = 0; - size_t newline_len = strlen(newline); + const size_t newline_len = strlen(CRLF); for (size_t i = 0; i < src.size(); i++) { if (src[i] == CR) { if (i + 1 < src.size() && src[i + 1] == LF) { - len += newline_len; i++; - } else { - len += newline_len; } + len += newline_len; } else if (src[i] == LF) { len += newline_len; } else { @@ -50,7 +48,7 @@ size_t compute_normalized_len(std::string_view src, const char *newline) { std::optional normalize_newlines(std::string_view src) { std::string output; - output.reserve(compute_normalized_len(src, CRLF)); + output.reserve(compute_normalized_len(src)); for (size_t i = 0; i < src.size(); i++) { if (src[i] == CR) { @@ -84,7 +82,7 @@ std::optional normalize_newlines(JSContext *cx, HandleValue src) { // `%0A`, 0x0D (CR) with `%0D` and 0x22 (") with `%22`. // // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data -std::optional escape_newlines(std::string_view str) { +std::optional escape_name(std::string_view str) { int32_t offset = 0; std::string output(str); @@ -104,13 +102,13 @@ std::optional escape_newlines(std::string_view str) { return output; } -std::optional escape_newlines(JSContext *cx, HandleValue src) { +std::optional escape_name(JSContext *cx, HandleValue src) { auto chars = core::encode(cx, src); if (!chars) { return std::nullopt; } - return escape_newlines(chars); + return escape_name(chars); } } // namespace @@ -140,6 +138,10 @@ struct StreamContext { return outbuf.size() - read; } + // Writes as many elements from the range [first, last) into the underlying buffer as possible. + // + // This function is deliberately infallible as it simply writes up to the available buffer size + // and returns how many elements were successfully written. template size_t write(I first, I last) { auto datasz = static_cast(std::distance(first, last)); if (datasz == 0) { @@ -173,7 +175,7 @@ class MultipartFormDataImpl { bool is_draining() { return (file_leftovers_ || remainder_.size()); }; - template size_t write_and_cache_remainder(StreamContext &stream, I first, I last); + template void write_and_cache_remainder(StreamContext &stream, I first, I last); State next_state(StreamContext &stream); void maybe_drain_leftovers(JSContext *cx, StreamContext &stream); @@ -239,7 +241,7 @@ void MultipartFormDataImpl::maybe_drain_leftovers(JSContext *cx, StreamContext & } template -size_t MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I first, I last) { +void MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I first, I last) { auto datasz = static_cast(std::distance(first, last)); auto written = stream.write(first, last); @@ -251,17 +253,18 @@ size_t MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I remainder_.assign(first + written, last); remainder_view_ = remainder_; } - - return written; } bool MultipartFormDataImpl::handle_entry_header(JSContext *cx, StreamContext &stream) { auto entry = stream.entries->begin()[chunk_idx_]; auto header = fmt::memory_buffer(); - auto name = escape_newlines(entry.name).value(); + auto name = escape_name(entry.name); + + // Safety: The overloaded `escape_name` that takes a string argument is infallible. + MOZ_ASSERT(name.has_value()); fmt::format_to(std::back_inserter(header), "--{}\r\n", boundary_); - fmt::format_to(std::back_inserter(header), "Content-Disposition: form-data; name=\"{}\"", name); + fmt::format_to(std::back_inserter(header), "Content-Disposition: form-data; name=\"{}\"", name.value()); if (entry.value.isString()) { fmt::format_to(std::back_inserter(header), "\r\n\r\n"); @@ -270,12 +273,14 @@ bool MultipartFormDataImpl::handle_entry_header(JSContext *cx, StreamContext &st RootedObject obj(cx, &entry.value.toObject()); RootedValue filename_val(cx, JS::StringValue(File::name(obj))); - auto filename = escape_newlines(cx, filename_val); + auto filename = escape_name(cx, filename_val); + if (!filename) { + return false; + } RootedString type_str(cx, Blob::type(obj)); auto type = core::encode(cx, type_str); - - if (!filename || !type) { + if (!type) { return false; } From b739d56aaf65a381e5817b2918ce0ba30cd0fade Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Thu, 6 Feb 2025 14:05:38 +0100 Subject: [PATCH 06/13] Add integration tests --- builtins/web/form-data/form-data-encoder.cpp | 19 +- tests/integration/formdata/formdata.js | 253 +++++++++++++++++++ tests/integration/handlers.js | 1 + tests/tests.cmake | 1 + 4 files changed, 268 insertions(+), 6 deletions(-) create mode 100644 tests/integration/formdata/formdata.js diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index 2a9983dd..51a5bca9 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -111,7 +111,17 @@ std::optional escape_name(JSContext *cx, HandleValue src) { return escape_name(chars); } -} // namespace +std::string normalize_and_escape(std::string_view src) { + auto normalized = normalize_newlines(src); + MOZ_ASSERT(normalized.has_value()); + + auto escaped = escape_name(normalized.value()); + MOZ_ASSERT(escaped); + + return escaped.value(); +} + +}// namespace namespace builtins { namespace web { @@ -258,13 +268,10 @@ void MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I f bool MultipartFormDataImpl::handle_entry_header(JSContext *cx, StreamContext &stream) { auto entry = stream.entries->begin()[chunk_idx_]; auto header = fmt::memory_buffer(); - auto name = escape_name(entry.name); - - // Safety: The overloaded `escape_name` that takes a string argument is infallible. - MOZ_ASSERT(name.has_value()); + auto name = normalize_and_escape(entry.name); fmt::format_to(std::back_inserter(header), "--{}\r\n", boundary_); - fmt::format_to(std::back_inserter(header), "Content-Disposition: form-data; name=\"{}\"", name.value()); + fmt::format_to(std::back_inserter(header), "Content-Disposition: form-data; name=\"{}\"", name); if (entry.value.isString()) { fmt::format_to(std::back_inserter(header), "\r\n\r\n"); diff --git a/tests/integration/formdata/formdata.js b/tests/integration/formdata/formdata.js new file mode 100644 index 00000000..bd70b6d4 --- /dev/null +++ b/tests/integration/formdata/formdata.js @@ -0,0 +1,253 @@ +import { serveTest } from "../test-server.js"; +import { assert, strictEqual, deepStrictEqual } from "../../assert.js"; + +// Adopted from wpt tests + +const kTestChars = "ABC~‾¥≈¤・・•∙·☼★星🌟星★☼·∙•・・¤≈¥‾~XYZ"; + +async function formDataPostFileUploadTest(fileBaseName) { + const formData = new FormData(); + let file = new Blob([kTestChars], { type: "text/plain" }); + try { + // Switch to File in browsers that allow this + file = new File([file], fileBaseName, { type: file.type }); + } catch (ignoredException) { + } + + formData.append("filename", fileBaseName); + formData.append(fileBaseName, "filename"); + formData.append("file", file, fileBaseName); + + const req = new Request('about:blank', { + method: 'POST', + body: formData, + }); + + const formDataText = await req.text(); + const formDataLines = formDataText.split("\r\n"); + if (formDataLines.length && !formDataLines[formDataLines.length - 1]) { + --formDataLines.length; + } + + assert( + formDataLines.length > 2, + `${fileBaseName}: multipart form data must have at least 3 lines: ${ + JSON.stringify(formDataText) + }`, + ); + + const boundary = formDataLines[0]; + assert( + formDataLines[formDataLines.length - 1] === boundary + "--", + `${fileBaseName}: multipart form data must end with ${boundary}--: ${ + JSON.stringify(formDataText) + }`, + ); + + const asValue = fileBaseName.replace(/\r\n?|\n/g, "\r\n"); + const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent); + const asFilename = fileBaseName.replace(/[\r\n"]/g, encodeURIComponent); + const expectedText = [ + boundary, + 'Content-Disposition: form-data; name="filename"', + "", + asValue, + boundary, + `Content-Disposition: form-data; name="${asName}"`, + "", + "filename", + boundary, + `Content-Disposition: form-data; name="file"; ` + + `filename="${asFilename}"`, + "Content-Type: text/plain", + "", + kTestChars, + boundary + "--", + ].join("\r\n"); + + strictEqual( + formDataText, expectedText, + `Unexpected multipart-shaped form data received:\n${formDataText}\nExpected:\n${expectedText}`, + ); +} + +export const handler = serveTest(async (t) => { + await t.test("ASCII", async () => + formDataPostFileUploadTest("file-for-upload-in-form.txt") + ); + + await t.test("x-user-defined", async () => + formDataPostFileUploadTest("file-for-upload-in-form-\uF7F0\uF793\uF783\uF7A0.txt") + ); + + await t.test("windows-1252", async () => + formDataPostFileUploadTest("file-for-upload-in-form-☺😂.txt") + ); + + await t.test("JIS X 0201 and JIS X 0208", async () => + formDataPostFileUploadTest("file-for-upload-in-form-???.txt") + ); + + await t.test("Unicode-1", async () => + formDataPostFileUploadTest("file-for-upload-in-form-???.txt") + ); + + await t.test("Unicode-2", async () => + formDataPostFileUploadTest(`file-for-upload-in-form-${kTestChars}.txt`) + ); + + await t.test("ASCII-with-NUL", async () => + formDataPostFileUploadTest("file-for-upload-in-form-NUL-[\0].txt") + ); + + await t.test("ASCII-with-BS", async () => + formDataPostFileUploadTest("file-for-upload-in-form-BS-[\b].txt") + ); + + await t.test("ASCII-with-VT", async () => + formDataPostFileUploadTest("file-for-upload-in-form-VT-[\v].txt") + ); + + await t.test("ASCII-with-LF", async () => + formDataPostFileUploadTest("file-for-upload-in-form-LF-[\n].txt") + ); + + await t.test("ASCII-with-LFCR", async () => + formDataPostFileUploadTest("file-for-upload-in-form-LF-CR-[\n\r].txt") + ); + + await t.test("ASCII-with-CR", async () => + formDataPostFileUploadTest("file-for-upload-in-form-CR-[\r].txt") + ); + + await t.test("ASCII-with-CRLF", async () => + formDataPostFileUploadTest("file-for-upload-in-form-CR-LF-[\r\n].txt") + ); + + await t.test("ASCII-with-HT", async () => + formDataPostFileUploadTest("file-for-upload-in-form-HT-[\t].txt") + ); + + await t.test("ASCII-with-FF", async () => + formDataPostFileUploadTest("file-for-upload-in-form-FF-[\f].txt") + ); + + await t.test("ASCII-with-DEL", async () => + formDataPostFileUploadTest("file-for-upload-in-form-DEL-[\x7F].txt") + ); + + await t.test("ASCII-with-ESC", async () => + formDataPostFileUploadTest("file-for-upload-in-form-ESC-[\x1B].txt") + ); + + await t.test("ASCII-with-SPACE", async () => + formDataPostFileUploadTest("file-for-upload-in-form-SPACE-[ ].txt") + ); + await t.test("ASCII-with-QUOTATION-MARK", async () => + formDataPostFileUploadTest("file-for-upload-in-form-QUOTATION-MARK-[\x22].txt") + ); + + await t.test("ASCII-with-double-quoted", async () => + formDataPostFileUploadTest("file-for-upload-in-form-double-quoted.txt") + ); + + await t.test("ASCII-with-REVERSE-SOLIDUS", async () => + formDataPostFileUploadTest("file-for-upload-in-form-REVERSE-SOLIDUS-[\\].txt") + ); + + await t.test("ASCII-with-EXCLAMATION-MARK", async () => + formDataPostFileUploadTest("file-for-upload-in-form-EXCLAMATION-MARK-[!].txt") + ); + + await t.test("ASCII-with-DOLLAR-SIGN", async () => + formDataPostFileUploadTest("file-for-upload-in-form-DOLLAR-SIGN-[$].txt") + ); + + await t.test("ASCII-with-PERCENT-SIGN", async () => + formDataPostFileUploadTest("file-for-upload-in-form-PERCENT-SIGN-[%].txt") + ); + + await t.test("ASCII-with-AMPERSAND", async () => + formDataPostFileUploadTest("file-for-upload-in-form-AMPERSAND-[&].txt") + ); + + await t.test("ASCII-with-APOSTROPHE", async () => + formDataPostFileUploadTest("file-for-upload-in-form-APOSTROPHE-['].txt") + ); + + await t.test("ASCII-with-LEFT-PARENTHESIS", async () => + formDataPostFileUploadTest("file-for-upload-in-form-LEFT-PARENTHESIS-[(].txt") + ); + + await t.test("ASCII-with-RIGHT-PARENTHESIS", async () => + formDataPostFileUploadTest("file-for-upload-in-form-RIGHT-PARENTHESIS-[)].txt") + ); + + await t.test("ASCII-with-ASTERISK", async () => + formDataPostFileUploadTest("file-for-upload-in-form-ASTERISK-[*].txt") + ); + + await t.test("ASCII-with-PLUS-SIGN", async () => + formDataPostFileUploadTest("file-for-upload-in-form-PLUS-SIGN-[+].txt") + ); + + await t.test("ASCII-with-COMMA", async () => + formDataPostFileUploadTest("file-for-upload-in-form-COMMA-[,].txt") + ); + + await t.test("ASCII-with-FULL-STOP", async () => + formDataPostFileUploadTest("file-for-upload-in-form-FULL-STOP-[.].txt") + ); + + await t.test("ASCII-with-SOLIDUS", async () => + formDataPostFileUploadTest("file-for-upload-in-form-SOLIDUS-[/].txt") + ); + + await t.test("ASCII-with-COLON", async () => + formDataPostFileUploadTest("file-for-upload-in-form-COLON-[:].txt") + ); + + await t.test("ASCII-with-SEMICOLON", async () => + formDataPostFileUploadTest("file-for-upload-in-form-SEMICOLON-[;].txt") + ); + + await t.test("ASCII-with-EQUALS-SIGN", async () => + formDataPostFileUploadTest("file-for-upload-in-form-EQUALS-SIGN-[=].txt") + ); + + await t.test("ASCII-with-QUESTION-MARK", async () => + formDataPostFileUploadTest("file-for-upload-in-form-QUESTION-MARK-[?].txt") + ); + + await t.test("ASCII-with-CIRCUMFLEX-ACCENT", async () => + formDataPostFileUploadTest("file-for-upload-in-form-CIRCUMFLEX-ACCENT-[^].txt") + ); + + await t.test("ASCII-with-LEFT-SQUARE-BRACKET", async () => + formDataPostFileUploadTest("file-for-upload-in-form-LEFT-SQUARE-BRACKET-[[].txt") + ); + + await t.test("ASCII-with-RIGHT-SQUARE-BRACKET", async () => + formDataPostFileUploadTest("file-for-upload-in-form-RIGHT-SQUARE-BRACKET-[]].txt") + ); + + await t.test("ASCII-with-LEFT-CURLY-BRACKET", async () => + formDataPostFileUploadTest("file-for-upload-in-form-LEFT-CURLY-BRACKET-[{].txt") + ); + + await t.test("ASCII-with-VERTICAL-LINE", async () => + formDataPostFileUploadTest("file-for-upload-in-form-VERTICAL-LINE-[|].txt") + ); + + await t.test("ASCII-with-RIGHT-CURLY-BRACKET", async () => + formDataPostFileUploadTest("file-for-upload-in-form-RIGHT-CURLY-BRACKET-[}].txt") + ); + + await t.test("ASCII-with-TILDE", async () => + formDataPostFileUploadTest("file-for-upload-in-form-TILDE-[~].txt") + ); + + await t.test("ASCII-with-single-quoted", async () => + formDataPostFileUploadTest("file-for-upload-in-form-single-quoted.txt") + ); +}); diff --git a/tests/integration/handlers.js b/tests/integration/handlers.js index c1ee5cbf..0e892f60 100644 --- a/tests/integration/handlers.js +++ b/tests/integration/handlers.js @@ -4,3 +4,4 @@ export { handler as performance } from './performance/performance.js'; export { handler as crypto } from './crypto/crypto.js'; export { handler as timers } from './timers/timers.js'; export { handler as fetch } from './fetch/fetch.js'; +export { handler as formdata } from './formdata/formdata.js'; diff --git a/tests/tests.cmake b/tests/tests.cmake index 06b35a20..75200b5f 100644 --- a/tests/tests.cmake +++ b/tests/tests.cmake @@ -46,5 +46,6 @@ test_integration(blob) test_integration(btoa) test_integration(crypto) test_integration(fetch) +test_integration(formdata) test_integration(performance) test_integration(timers) From ed556fc1f588b1a2573cb5c2dac256c199202977 Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Thu, 6 Feb 2025 14:27:25 +0100 Subject: [PATCH 07/13] Move form-data-encode test from fetch to form-data --- tests/integration/fetch/fetch.js | 75 -------------------------- tests/integration/formdata/formdata.js | 75 ++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 75 deletions(-) diff --git a/tests/integration/fetch/fetch.js b/tests/integration/fetch/fetch.js index d6dd70bf..a74db090 100644 --- a/tests/integration/fetch/fetch.js +++ b/tests/integration/fetch/fetch.js @@ -146,79 +146,4 @@ export const handler = serveTest(async (t) => { URL.revokeObjectURL(fileUrl); }); - - await t.test('form-data-encode', async () => { - async function readStream(stream) { - const reader = stream.getReader(); - const chunks = []; - let totalLen = 0; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - chunks.push(value); - totalLen += value.length; - } - const joined = new Uint8Array(totalLen); - let offset = 0; - for (const chunk of chunks) { - joined.set(chunk, offset); - offset += chunk.length; - } - return joined.buffer; - } - - const form = new FormData(); - form.append('field1', 'value1'); - form.append('field2', 'value2'); - - const file = new File(['Hello World!'], 'dummy.txt', { type: 'foo' }); - form.append('file1', file); - - const req = new Request('https://example.com', { method: 'POST', body: form }); - - const contentType = req.headers.get('Content-Type') || ''; - assert( - contentType.startsWith('multipart/form-data; boundary='), - `Content-Type should be multipart/form-data; got: ${contentType}` - ); - - const boundary = contentType.split('boundary=')[1]; - assert(boundary, 'Boundary must be present in the Content-Type'); - - const arrayBuf = await readStream(req.body); - const bodyStr = new TextDecoder().decode(arrayBuf); - const lines = bodyStr.split('\r\n'); - - const expectedLines = [ - `--${boundary}`, - 'Content-Disposition: form-data; name="field1"', - '', - 'value1', - `--${boundary}`, - 'Content-Disposition: form-data; name="field2"', - '', - 'value2', - `--${boundary}`, - 'Content-Disposition: form-data; name="file1"; filename="dummy.txt"', - 'Content-Type: foo', - '', - 'Hello World!', - `--${boundary}--`, - ]; - - strictEqual( - lines.length, - expectedLines.length, - `Expected ${expectedLines.length} lines, got ${lines.length}` - ); - - for (let i = 0; i < expectedLines.length; i++) { - strictEqual( - lines[i], - expectedLines[i], - `Mismatch at line ${i}. Actual: '${lines[i]}' Expected: '${expectedLines[i]}'` - ); - } - }); }); diff --git a/tests/integration/formdata/formdata.js b/tests/integration/formdata/formdata.js index bd70b6d4..44bc177f 100644 --- a/tests/integration/formdata/formdata.js +++ b/tests/integration/formdata/formdata.js @@ -250,4 +250,79 @@ export const handler = serveTest(async (t) => { await t.test("ASCII-with-single-quoted", async () => formDataPostFileUploadTest("file-for-upload-in-form-single-quoted.txt") ); + + await t.test('form-data-encode', async () => { + async function readStream(stream) { + const reader = stream.getReader(); + const chunks = []; + let totalLen = 0; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + chunks.push(value); + totalLen += value.length; + } + const joined = new Uint8Array(totalLen); + let offset = 0; + for (const chunk of chunks) { + joined.set(chunk, offset); + offset += chunk.length; + } + return joined.buffer; + } + + const form = new FormData(); + form.append('field1', 'value1'); + form.append('field2', 'value2'); + + const file = new File(['Hello World!'], 'dummy.txt', { type: 'foo' }); + form.append('file1', file); + + const req = new Request('https://example.com', { method: 'POST', body: form }); + + const contentType = req.headers.get('Content-Type') || ''; + assert( + contentType.startsWith('multipart/form-data; boundary='), + `Content-Type should be multipart/form-data; got: ${contentType}` + ); + + const boundary = contentType.split('boundary=')[1]; + assert(boundary, 'Boundary must be present in the Content-Type'); + + const arrayBuf = await readStream(req.body); + const bodyStr = new TextDecoder().decode(arrayBuf); + const lines = bodyStr.split('\r\n'); + + const expectedLines = [ + `--${boundary}`, + 'Content-Disposition: form-data; name="field1"', + '', + 'value1', + `--${boundary}`, + 'Content-Disposition: form-data; name="field2"', + '', + 'value2', + `--${boundary}`, + 'Content-Disposition: form-data; name="file1"; filename="dummy.txt"', + 'Content-Type: foo', + '', + 'Hello World!', + `--${boundary}--`, + ]; + + strictEqual( + lines.length, + expectedLines.length, + `Expected ${expectedLines.length} lines, got ${lines.length}` + ); + + for (let i = 0; i < expectedLines.length; i++) { + strictEqual( + lines[i], + expectedLines[i], + `Mismatch at line ${i}. Actual: '${lines[i]}' Expected: '${expectedLines[i]}'` + ); + } + }); }); From f6fd729a3ca24b680f39ce4bf6469d3fffbd0c7a Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Mon, 10 Feb 2025 19:32:08 +0100 Subject: [PATCH 08/13] Implement query_length function --- builtins/web/fetch/request-response.cpp | 7 + builtins/web/form-data/form-data-encoder.cpp | 152 ++++++++++- builtins/web/form-data/form-data-encoder.h | 2 +- tests/integration/formdata/formdata.js | 251 +----------------- .../send-file-formdata-controls.any.js.json | 38 +++ ...send-file-formdata-punctuation.any.js.json | 83 ++++++ .../file/send-file-formdata-utf-8.any.js.json | 20 ++ .../file/send-file-formdata.any.js.json | 5 + tests/wpt-harness/tests.json | 4 + 9 files changed, 298 insertions(+), 264 deletions(-) create mode 100644 tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-controls.any.js.json create mode 100644 tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-punctuation.any.js.json create mode 100644 tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-utf-8.any.js.json create mode 100644 tests/wpt-harness/expectations/FileAPI/file/send-file-formdata.any.js.json diff --git a/builtins/web/fetch/request-response.cpp b/builtins/web/fetch/request-response.cpp index e7838ba0..d031cc9d 100644 --- a/builtins/web/fetch/request-response.cpp +++ b/builtins/web/fetch/request-response.cpp @@ -339,6 +339,13 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, auto boundary = MultipartFormData::boundary(encoder); auto type = "multipart/form-data; boundary=" + boundary; host_type_str = type.c_str(); + + auto length = MultipartFormData::query_length(cx, encoder); + if (!length) { + return false; + } + + content_length = mozilla::Some(length.value()); content_type = host_type_str.ptr.get(); RootedValue stream_val(cx, JS::ObjectValue(*stream)); diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index 51a5bca9..ca2cace4 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -77,25 +77,37 @@ std::optional normalize_newlines(JSContext *cx, HandleValue src) { return normalize_newlines(chars); } +size_t compute_escaped_len(std::string_view src) { + size_t len = 0; + for (char ch : src) { + if (ch == '\n' || ch == '\r' || ch == '"') { + len += 3; + } else { + ++len; + } + } + return len; +} + // For field names and filenames for file fields, the result of the encoding must // be escaped by replacing any 0x0A (LF) bytes with the byte sequence // `%0A`, 0x0D (CR) with `%0D` and 0x22 (") with `%22`. // // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data -std::optional escape_name(std::string_view str) { - int32_t offset = 0; - std::string output(str); - - while ((offset = output.find_first_of("\n\r\"", offset)) != std::string::npos) { - if (output[offset] == '\n') { - output.replace(offset, 1, "%0A"); - } else if (output[offset] == '\r') { - output.replace(offset, 1, "%0D"); - } else if (output[offset] == '"') { - output.replace(offset, 1, "%22"); +std::optional escape_name(std::string_view src) { + size_t escaped_len = compute_escaped_len(src); + std::string output; + output.reserve(escaped_len); + + for (char ch : src) { + if (ch == '\n') { + output.append("%0A"); + } else if (ch == '\r') { + output.append("%0D"); + } else if (ch == '"') { + output.append("%22"); } else { - offset++; - continue; + output.push_back(ch); } } @@ -111,6 +123,30 @@ std::optional escape_name(JSContext *cx, HandleValue src) { return escape_name(chars); } +size_t compute_normalized_and_escaped_len(std::string_view src) { + size_t len = 0; + + for (size_t i = 0; i < src.size(); ++i) { + char ch = src[i]; + if (ch == '\r') { + len += 3; // CR -> "%0D" + len += 3; // LF -> "%0A" + if ((i + 1) < src.size() && src[i + 1] == '\n') { + ++i; + } + } else if (ch == '\n') { + len += 3; // CR -> "%0D" + len += 3; // LF -> "%0A" + } else if (ch == '"') { + len += 3; // -> "%22" + } else { + len += 1; + } + } + + return len; +} + std::string normalize_and_escape(std::string_view src) { auto normalized = normalize_newlines(src); MOZ_ASSERT(normalized.has_value()); @@ -200,6 +236,7 @@ class MultipartFormDataImpl { remainder_.reserve(128); } + std::optional query_length(JSContext* cx, const EntryList *entries); std::string boundary() { return boundary_; }; bool read_next(JSContext *cx, StreamContext &stream); }; @@ -378,6 +415,86 @@ bool MultipartFormDataImpl::read_next(JSContext *cx, StreamContext &stream) { } } +// Computes the total size (in bytes) of the encoded multipart/form-data stream. +// +// Returns std::nullopt if any string conversion fails. +std::optional MultipartFormDataImpl::query_length(JSContext* cx, const EntryList *entries) { + size_t total = 0; + + constexpr const char* content_disp_lit = "Content-Disposition: form-data; name=\"\""; + constexpr const char* content_type_lit = "Content-Type: "; + constexpr const char* filename_lit = "; filename=\"\""; + constexpr const char* default_mime_lit = "application/octet-stream"; + + const size_t content_disp_len = strlen(content_disp_lit); + const size_t content_type_len = strlen(content_type_lit); + const size_t filename_len = strlen(filename_lit); + const size_t default_mime_len = strlen(default_mime_lit); + const size_t crlf_len = strlen(CRLF); + + // For every entry in the FormData + for (const auto& entry : *entries) { + // Add: "--" + boundary + CRLF + total += 2 + boundary_.size() + crlf_len; + + // Add: "Content-Disposition: form-data; name=\"\"" + total += content_disp_len; + total += compute_normalized_and_escaped_len(entry.name); + + if (entry.value.isString()) { + // Terminate the header + total += 2 * crlf_len; + + RootedValue value_str(cx, entry.value); + auto value = core::encode(cx, value_str); + if (!value) { + return std::nullopt; + } + + total += compute_normalized_len(value); + } else { + MOZ_ASSERT(File::is_instance(entry.value)); + RootedObject obj(cx, &entry.value.toObject()); + RootedString filename_str(cx, File::name(obj)); + auto filename = core::encode(cx, filename_str); + if (!filename) { + return std::nullopt; + } + + // Literal: ; filename="" + total += filename_len; + total += compute_escaped_len(filename); + total += crlf_len; + + // Literal: "Content-Type: " + total += content_type_len; + + // The type string (defaulting to "application/octet-stream" if empty) + RootedString type_str(cx, Blob::type(obj)); + auto type = core::encode(cx, type_str); + if (!type) { + return std::nullopt; + } + + total += type.size() > 0 ? type.size() : default_mime_len; + + // Terminate the header + total += 2 * crlf_len; + + // Add payload + total += Blob::blob_size(obj); + } + + // Each entry is terminated with a CRLF. + total += crlf_len; + } + + // This is written as: "--" + boundary + "--" + total += 2 + boundary_.size() + 2; + + return total; +} + const JSFunctionSpec MultipartFormData::static_methods[] = {JS_FS_END}; const JSPropertySpec MultipartFormData::static_properties[] = {JS_PS_END}; const JSFunctionSpec MultipartFormData::methods[] = {JS_FS_END}; @@ -438,6 +555,15 @@ JSObject *MultipartFormData::form_data(JSObject *self) { return &JS::GetReservedSlot(self, Slots::Form).toObject(); } +std::optional MultipartFormData::query_length(JSContext *cx, HandleObject self) { + RootedObject obj(cx, form_data(self)); + + auto entries = FormData::entry_list(obj); + auto impl = as_impl(self); + + return impl->query_length(cx, entries); +} + JSObject *MultipartFormData::encode_stream(JSContext *cx, HandleObject self) { RootedObject reader(cx, BufReader::create(cx, self, read)); if (!reader) { diff --git a/builtins/web/form-data/form-data-encoder.h b/builtins/web/form-data/form-data-encoder.h index 54735b1a..07df3198 100644 --- a/builtins/web/form-data/form-data-encoder.h +++ b/builtins/web/form-data/form-data-encoder.h @@ -29,7 +29,7 @@ class MultipartFormData : public FinalizableBuiltinImpl { static JSObject *form_data(JSObject *self); static std::string boundary(JSObject *self); - static size_t query_length(JSContext *cx, HandleObject self); + static std::optional query_length(JSContext *cx, HandleObject self); static JSObject *encode_stream(JSContext *cx, HandleObject self); static JSObject *create(JSContext *cx, HandleObject form_data); diff --git a/tests/integration/formdata/formdata.js b/tests/integration/formdata/formdata.js index 44bc177f..ea892907 100644 --- a/tests/integration/formdata/formdata.js +++ b/tests/integration/formdata/formdata.js @@ -1,256 +1,7 @@ import { serveTest } from "../test-server.js"; -import { assert, strictEqual, deepStrictEqual } from "../../assert.js"; - -// Adopted from wpt tests - -const kTestChars = "ABC~‾¥≈¤・・•∙·☼★星🌟星★☼·∙•・・¤≈¥‾~XYZ"; - -async function formDataPostFileUploadTest(fileBaseName) { - const formData = new FormData(); - let file = new Blob([kTestChars], { type: "text/plain" }); - try { - // Switch to File in browsers that allow this - file = new File([file], fileBaseName, { type: file.type }); - } catch (ignoredException) { - } - - formData.append("filename", fileBaseName); - formData.append(fileBaseName, "filename"); - formData.append("file", file, fileBaseName); - - const req = new Request('about:blank', { - method: 'POST', - body: formData, - }); - - const formDataText = await req.text(); - const formDataLines = formDataText.split("\r\n"); - if (formDataLines.length && !formDataLines[formDataLines.length - 1]) { - --formDataLines.length; - } - - assert( - formDataLines.length > 2, - `${fileBaseName}: multipart form data must have at least 3 lines: ${ - JSON.stringify(formDataText) - }`, - ); - - const boundary = formDataLines[0]; - assert( - formDataLines[formDataLines.length - 1] === boundary + "--", - `${fileBaseName}: multipart form data must end with ${boundary}--: ${ - JSON.stringify(formDataText) - }`, - ); - - const asValue = fileBaseName.replace(/\r\n?|\n/g, "\r\n"); - const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent); - const asFilename = fileBaseName.replace(/[\r\n"]/g, encodeURIComponent); - const expectedText = [ - boundary, - 'Content-Disposition: form-data; name="filename"', - "", - asValue, - boundary, - `Content-Disposition: form-data; name="${asName}"`, - "", - "filename", - boundary, - `Content-Disposition: form-data; name="file"; ` + - `filename="${asFilename}"`, - "Content-Type: text/plain", - "", - kTestChars, - boundary + "--", - ].join("\r\n"); - - strictEqual( - formDataText, expectedText, - `Unexpected multipart-shaped form data received:\n${formDataText}\nExpected:\n${expectedText}`, - ); -} +import { assert, strictEqual } from "../../assert.js"; export const handler = serveTest(async (t) => { - await t.test("ASCII", async () => - formDataPostFileUploadTest("file-for-upload-in-form.txt") - ); - - await t.test("x-user-defined", async () => - formDataPostFileUploadTest("file-for-upload-in-form-\uF7F0\uF793\uF783\uF7A0.txt") - ); - - await t.test("windows-1252", async () => - formDataPostFileUploadTest("file-for-upload-in-form-☺😂.txt") - ); - - await t.test("JIS X 0201 and JIS X 0208", async () => - formDataPostFileUploadTest("file-for-upload-in-form-???.txt") - ); - - await t.test("Unicode-1", async () => - formDataPostFileUploadTest("file-for-upload-in-form-???.txt") - ); - - await t.test("Unicode-2", async () => - formDataPostFileUploadTest(`file-for-upload-in-form-${kTestChars}.txt`) - ); - - await t.test("ASCII-with-NUL", async () => - formDataPostFileUploadTest("file-for-upload-in-form-NUL-[\0].txt") - ); - - await t.test("ASCII-with-BS", async () => - formDataPostFileUploadTest("file-for-upload-in-form-BS-[\b].txt") - ); - - await t.test("ASCII-with-VT", async () => - formDataPostFileUploadTest("file-for-upload-in-form-VT-[\v].txt") - ); - - await t.test("ASCII-with-LF", async () => - formDataPostFileUploadTest("file-for-upload-in-form-LF-[\n].txt") - ); - - await t.test("ASCII-with-LFCR", async () => - formDataPostFileUploadTest("file-for-upload-in-form-LF-CR-[\n\r].txt") - ); - - await t.test("ASCII-with-CR", async () => - formDataPostFileUploadTest("file-for-upload-in-form-CR-[\r].txt") - ); - - await t.test("ASCII-with-CRLF", async () => - formDataPostFileUploadTest("file-for-upload-in-form-CR-LF-[\r\n].txt") - ); - - await t.test("ASCII-with-HT", async () => - formDataPostFileUploadTest("file-for-upload-in-form-HT-[\t].txt") - ); - - await t.test("ASCII-with-FF", async () => - formDataPostFileUploadTest("file-for-upload-in-form-FF-[\f].txt") - ); - - await t.test("ASCII-with-DEL", async () => - formDataPostFileUploadTest("file-for-upload-in-form-DEL-[\x7F].txt") - ); - - await t.test("ASCII-with-ESC", async () => - formDataPostFileUploadTest("file-for-upload-in-form-ESC-[\x1B].txt") - ); - - await t.test("ASCII-with-SPACE", async () => - formDataPostFileUploadTest("file-for-upload-in-form-SPACE-[ ].txt") - ); - await t.test("ASCII-with-QUOTATION-MARK", async () => - formDataPostFileUploadTest("file-for-upload-in-form-QUOTATION-MARK-[\x22].txt") - ); - - await t.test("ASCII-with-double-quoted", async () => - formDataPostFileUploadTest("file-for-upload-in-form-double-quoted.txt") - ); - - await t.test("ASCII-with-REVERSE-SOLIDUS", async () => - formDataPostFileUploadTest("file-for-upload-in-form-REVERSE-SOLIDUS-[\\].txt") - ); - - await t.test("ASCII-with-EXCLAMATION-MARK", async () => - formDataPostFileUploadTest("file-for-upload-in-form-EXCLAMATION-MARK-[!].txt") - ); - - await t.test("ASCII-with-DOLLAR-SIGN", async () => - formDataPostFileUploadTest("file-for-upload-in-form-DOLLAR-SIGN-[$].txt") - ); - - await t.test("ASCII-with-PERCENT-SIGN", async () => - formDataPostFileUploadTest("file-for-upload-in-form-PERCENT-SIGN-[%].txt") - ); - - await t.test("ASCII-with-AMPERSAND", async () => - formDataPostFileUploadTest("file-for-upload-in-form-AMPERSAND-[&].txt") - ); - - await t.test("ASCII-with-APOSTROPHE", async () => - formDataPostFileUploadTest("file-for-upload-in-form-APOSTROPHE-['].txt") - ); - - await t.test("ASCII-with-LEFT-PARENTHESIS", async () => - formDataPostFileUploadTest("file-for-upload-in-form-LEFT-PARENTHESIS-[(].txt") - ); - - await t.test("ASCII-with-RIGHT-PARENTHESIS", async () => - formDataPostFileUploadTest("file-for-upload-in-form-RIGHT-PARENTHESIS-[)].txt") - ); - - await t.test("ASCII-with-ASTERISK", async () => - formDataPostFileUploadTest("file-for-upload-in-form-ASTERISK-[*].txt") - ); - - await t.test("ASCII-with-PLUS-SIGN", async () => - formDataPostFileUploadTest("file-for-upload-in-form-PLUS-SIGN-[+].txt") - ); - - await t.test("ASCII-with-COMMA", async () => - formDataPostFileUploadTest("file-for-upload-in-form-COMMA-[,].txt") - ); - - await t.test("ASCII-with-FULL-STOP", async () => - formDataPostFileUploadTest("file-for-upload-in-form-FULL-STOP-[.].txt") - ); - - await t.test("ASCII-with-SOLIDUS", async () => - formDataPostFileUploadTest("file-for-upload-in-form-SOLIDUS-[/].txt") - ); - - await t.test("ASCII-with-COLON", async () => - formDataPostFileUploadTest("file-for-upload-in-form-COLON-[:].txt") - ); - - await t.test("ASCII-with-SEMICOLON", async () => - formDataPostFileUploadTest("file-for-upload-in-form-SEMICOLON-[;].txt") - ); - - await t.test("ASCII-with-EQUALS-SIGN", async () => - formDataPostFileUploadTest("file-for-upload-in-form-EQUALS-SIGN-[=].txt") - ); - - await t.test("ASCII-with-QUESTION-MARK", async () => - formDataPostFileUploadTest("file-for-upload-in-form-QUESTION-MARK-[?].txt") - ); - - await t.test("ASCII-with-CIRCUMFLEX-ACCENT", async () => - formDataPostFileUploadTest("file-for-upload-in-form-CIRCUMFLEX-ACCENT-[^].txt") - ); - - await t.test("ASCII-with-LEFT-SQUARE-BRACKET", async () => - formDataPostFileUploadTest("file-for-upload-in-form-LEFT-SQUARE-BRACKET-[[].txt") - ); - - await t.test("ASCII-with-RIGHT-SQUARE-BRACKET", async () => - formDataPostFileUploadTest("file-for-upload-in-form-RIGHT-SQUARE-BRACKET-[]].txt") - ); - - await t.test("ASCII-with-LEFT-CURLY-BRACKET", async () => - formDataPostFileUploadTest("file-for-upload-in-form-LEFT-CURLY-BRACKET-[{].txt") - ); - - await t.test("ASCII-with-VERTICAL-LINE", async () => - formDataPostFileUploadTest("file-for-upload-in-form-VERTICAL-LINE-[|].txt") - ); - - await t.test("ASCII-with-RIGHT-CURLY-BRACKET", async () => - formDataPostFileUploadTest("file-for-upload-in-form-RIGHT-CURLY-BRACKET-[}].txt") - ); - - await t.test("ASCII-with-TILDE", async () => - formDataPostFileUploadTest("file-for-upload-in-form-TILDE-[~].txt") - ); - - await t.test("ASCII-with-single-quoted", async () => - formDataPostFileUploadTest("file-for-upload-in-form-single-quoted.txt") - ); - await t.test('form-data-encode', async () => { async function readStream(stream) { const reader = stream.getReader(); diff --git a/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-controls.any.js.json b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-controls.any.js.json new file mode 100644 index 00000000..a401a3bd --- /dev/null +++ b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-controls.any.js.json @@ -0,0 +1,38 @@ +{ + "Upload file-for-upload-in-form-NUL-[\u0000].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-BS-[\b].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-VT-[\u000b].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-LF-[\n].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-LF-CR-[\n\r].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-CR-[\r].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-CR-LF-[\r\n].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-HT-[\t].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-FF-[\f].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-DEL-[].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-ESC-[\u001b].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-SPACE-[ ].txt (ASCII) in fetch with FormData": { + "status": "PASS" + } +} \ No newline at end of file diff --git a/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-punctuation.any.js.json b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-punctuation.any.js.json new file mode 100644 index 00000000..6e71c62a --- /dev/null +++ b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-punctuation.any.js.json @@ -0,0 +1,83 @@ +{ + "Upload file-for-upload-in-form-QUOTATION-MARK-[\"].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload \"file-for-upload-in-form-double-quoted.txt\" (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-REVERSE-SOLIDUS-[\\].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-EXCLAMATION-MARK-[!].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-DOLLAR-SIGN-[$].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-PERCENT-SIGN-[%].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-AMPERSAND-[&].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-APOSTROPHE-['].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-LEFT-PARENTHESIS-[(].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-RIGHT-PARENTHESIS-[)].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-ASTERISK-[*].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-PLUS-SIGN-[+].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-COMMA-[,].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-FULL-STOP-[.].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-SOLIDUS-[/].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-COLON-[:].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-SEMICOLON-[;].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-EQUALS-SIGN-[=].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-QUESTION-MARK-[?].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-CIRCUMFLEX-ACCENT-[^].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-LEFT-SQUARE-BRACKET-[[].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-RIGHT-SQUARE-BRACKET-[]].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-LEFT-CURLY-BRACKET-[{].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-VERTICAL-LINE-[|].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-RIGHT-CURLY-BRACKET-[}].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-TILDE-[~].txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload 'file-for-upload-in-form-single-quoted.txt' (ASCII) in fetch with FormData": { + "status": "PASS" + } +} \ No newline at end of file diff --git a/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-utf-8.any.js.json b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-utf-8.any.js.json new file mode 100644 index 00000000..b3fd805f --- /dev/null +++ b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata-utf-8.any.js.json @@ -0,0 +1,20 @@ +{ + "Upload file-for-upload-in-form.txt (ASCII) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-.txt (x-user-defined) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-☺😂.txt (windows-1252) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-★星★.txt (JIS X 0201 and JIS X 0208) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-☺😂.txt (Unicode) in fetch with FormData": { + "status": "PASS" + }, + "Upload file-for-upload-in-form-ABC~‾¥≈¤・・•∙·☼★星🌟星★☼·∙•・・¤≈¥‾~XYZ.txt (Unicode) in fetch with FormData": { + "status": "PASS" + } +} \ No newline at end of file diff --git a/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata.any.js.json b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata.any.js.json new file mode 100644 index 00000000..b0a3fa70 --- /dev/null +++ b/tests/wpt-harness/expectations/FileAPI/file/send-file-formdata.any.js.json @@ -0,0 +1,5 @@ +{ + "Upload file-for-upload-in-form.txt (ASCII) in fetch with FormData": { + "status": "PASS" + } +} diff --git a/tests/wpt-harness/tests.json b/tests/wpt-harness/tests.json index 322e0d3c..bdd76a08 100644 --- a/tests/wpt-harness/tests.json +++ b/tests/wpt-harness/tests.json @@ -173,6 +173,10 @@ "FileAPI/blob/Blob-stream.any.js", "FileAPI/blob/Blob-text.any.js", "FileAPI/file/File-constructor.any.js", + "FileAPI/file/send-file-formdata.any.js", + "FileAPI/file/send-file-formdata-controls.any.js", + "FileAPI/file/send-file-formdata-punctuation.any.js", + "FileAPI/file/send-file-formdata-utf-8.any.js", "hr-time/basic.any.js", "hr-time/idlharness.any.js", "hr-time/monotonic-clock.any.js", From 5208350e6dfa45abe8c739bbd2433aa8f5a0966a Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Wed, 12 Feb 2025 13:57:08 +0100 Subject: [PATCH 09/13] Unify normalize to use append instead of += operator --- builtins/web/form-data/form-data-encoder.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index ca2cace4..78e2f43c 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -53,13 +53,11 @@ std::optional normalize_newlines(std::string_view src) { for (size_t i = 0; i < src.size(); i++) { if (src[i] == CR) { if (i + 1 < src.size() && src[i + 1] == LF) { - output += CRLF; i++; - } else { - output += CRLF; } + output.append(CRLF); } else if (src[i] == LF) { - output += CRLF; + output.append(CRLF); } else { output.push_back(src[i]); } @@ -95,9 +93,8 @@ size_t compute_escaped_len(std::string_view src) { // // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data std::optional escape_name(std::string_view src) { - size_t escaped_len = compute_escaped_len(src); std::string output; - output.reserve(escaped_len); + output.reserve(compute_escaped_len(src)); for (char ch : src) { if (ch == '\n') { From 8cf2e34e1be1e5c022579a3cdadd7e6752db15fb Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Thu, 13 Feb 2025 11:32:54 +0100 Subject: [PATCH 10/13] Add form-data-encoder docs --- builtins/web/form-data/form-data-encoder.cpp | 111 +++++++++++++++++-- 1 file changed, 99 insertions(+), 12 deletions(-) diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index 78e2f43c..289f6c25 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -20,6 +20,8 @@ const char LF = '\n'; const char CR = '\r'; const char *CRLF = "\r\n"; +// Computes the length of a string after normalizing its newlines. +// Converts CR, LF, and CRLF into a CRLF sequence. size_t compute_normalized_len(std::string_view src) { size_t len = 0; const size_t newline_len = strlen(CRLF); @@ -40,11 +42,9 @@ size_t compute_normalized_len(std::string_view src) { return len; } -// Replace every occurrence of U+000D (CR) not followed by U+000A (LF), -// and every occurrence of U+000A (LF) not preceded by U+000D (CR), -// in entry's name, by a string consisting of a U+000D (CR) and U+000A (LF). -// -// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data +// Normalizes newlines in a string by replacing: +// - CR not followed by LF -> CRLF +// - LF not preceded by CR -> CRLF std::optional normalize_newlines(std::string_view src) { std::string output; @@ -75,6 +75,10 @@ std::optional normalize_newlines(JSContext *cx, HandleValue src) { return normalize_newlines(chars); } +// Computes the length of a string after percent encoding following characters: +// - LF (0x0A) -> "%0A" +// - CR (0x0D) -> "%0D" +// - Double quote (0x22) -> "%22" size_t compute_escaped_len(std::string_view src) { size_t len = 0; for (char ch : src) { @@ -87,11 +91,11 @@ size_t compute_escaped_len(std::string_view src) { return len; } -// For field names and filenames for file fields, the result of the encoding must -// be escaped by replacing any 0x0A (LF) bytes with the byte sequence -// `%0A`, 0x0D (CR) with `%0D` and 0x22 (") with `%22`. -// -// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data +// Percent encode following characters in a string for safe use in multipart/form-data +// field names and filenames: +// - LF (0x0A) -> "%0A" +// - CR (0x0D) -> "%0D" +// - Double quote (0x22) -> "%22" std::optional escape_name(std::string_view src) { std::string output; output.reserve(compute_escaped_len(src)); @@ -120,6 +124,7 @@ std::optional escape_name(JSContext *cx, HandleValue src) { return escape_name(chars); } +// Computes the length of a string after both normalizing newlines and escaping characters. size_t compute_normalized_and_escaped_len(std::string_view src) { size_t len = 0; @@ -144,6 +149,7 @@ size_t compute_normalized_and_escaped_len(std::string_view src) { return len; } +// Folds normalizing newlines and escaping characters in the given string into a single function. std::string normalize_and_escape(std::string_view src) { auto normalized = normalize_newlines(src); MOZ_ASSERT(normalized.has_value()); @@ -205,6 +211,23 @@ struct StreamContext { } }; +// `MultipartFormDataImpl` encodes `FormData` into a multipart/form-data body, +// following the specification in https://datatracker.ietf.org/doc/html/rfc7578. +// +// Each entry is serialized in three atomic operations: writing the header, body, and footer. +// These parts are written into a fixed-size buffer, so the implementation must handle cases +// where not all data can be written at once. Any unwritten data is cached as a "leftover" +// and will be written in the next iteration before transitioning to the next state. This +// introduces an implicit state where the encoder drains leftover data from the previous +// operation before proceeding. +// +// The algorithm is implemented as a state machine with the following states: +// - Start: Initialization of the process. +// - EntryHeader: Write the boundary and header information for the current entry. +// - EntryBody: Write the actual content (payload) of the entry. +// - EntryFooter: Write the trailing CRLF for the entry. +// - Close: Write the closing boundary indicating the end of the multipart data. +// - Done: Processing is complete. class MultipartFormDataImpl { enum class State : int { Start, EntryHeader, EntryBody, EntryFooter, Close, Done }; @@ -260,6 +283,16 @@ MultipartFormDataImpl::State MultipartFormDataImpl::next_state(StreamContext &st } } +// Drains any previously cached leftover data or remaining file data by writing +// it to the stream. +// +// The draining function handles two types of leftover data: +// - Metadata leftovers: This includes generated data for each entry, such as the boundary +// delimiter, content-disposition header, etc. These are cached in `remainder_`, while +// `remainder_view_` tracks how much remains to be written. +// - Entry value leftovers: Tracked by `file_leftovers_`, this represents the number of +// bytes from a blob that still need to be written to the output buffer to complete +// the entry's value. void MultipartFormDataImpl::maybe_drain_leftovers(JSContext *cx, StreamContext &stream) { if (!remainder_view_.empty()) { auto written = stream.write(remainder_view_.begin(), remainder_view_.end()); @@ -284,6 +317,9 @@ void MultipartFormDataImpl::maybe_drain_leftovers(JSContext *cx, StreamContext & } } +// Writes data from the range [first, last) to the stream. If the stream cannot +// accept all the data, the unwritten part is stored in the remainder_ buffer +// for later draining. template void MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I first, I last) { auto datasz = static_cast(std::distance(first, last)); @@ -299,6 +335,34 @@ void MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I f } } +// https://datatracker.ietf.org/doc/html/rfc7578: +// - A multipart/form-data body contains a series of parts separated by a boundary +// - The parts are delimited with a boundary delimiter, constructed using CRLF, "--", +// and the value of the "boundary" parameter. +// See https://datatracker.ietf.org/doc/html/rfc7578#section-4.1 +// - Each part MUST contain a Content-Disposition header field where the disposition type is "form-data". +// The Content-Disposition header field MUST also contain an additional parameter of "name"; the value of +// the "name" parameter is the original field name from the form. +// See https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 +// - For form data that represents the content of a file, a name for the file SHOULD be supplied as well, +// by using a "filename" parameter of the Content-Disposition header field. +// See https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 +// - Each part MAY have an (optional) "Content-Type" header field, which defaults to "text/plain". If the +// contents of a file are to be sent, the file data SHOULD be labeled with an appropriate media type, if +// known, or "application/octet-stream". +// +// Additionaly from the https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart%2Fform-data-encoding-algorithm +// - The parts of the generated multipart/form-data resource that correspond to non-file fields +// must not have a `Content-Type` header specified. +// - Replace every occurrence of U+000D (CR) not followed by U+000A (LF), and every occurrence +// of U+000A (LF) not preceded by U+000D (CR), in entry's name, by a string consisting of a +// U+000D (CR) and U+000A (LF) +// - For field names and filenames for file fields, the result of the encoding in the previous +// bullet point must be escaped by replacing any 0x0A (LF) bytes with the byte sequence `%0A`, +// 0x0D (CR) with `%0D` and 0x22 (") with `%22`. +// +// The two bullets above for "name" are folded into `normalize_and_escape`. The filename on the other +// hand is escaped using `escape_name`. bool MultipartFormDataImpl::handle_entry_header(JSContext *cx, StreamContext &stream) { auto entry = stream.entries->begin()[chunk_idx_]; auto header = fmt::memory_buffer(); @@ -336,6 +400,10 @@ bool MultipartFormDataImpl::handle_entry_header(JSContext *cx, StreamContext &st return true; } +// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart%2Fform-data-encoding-algorithm +// - If entry's value is not a File object, then replace every occurrence of U+000D (CR) not followed by U+000A (LF), +// and every occurrence of U+000A (LF) not preceded by U+000D (CR), in entry's value, by a string consisting of a +// U+000D (CR) and U+000A (LF) - this is folded into `normalize_newlines`. bool MultipartFormDataImpl::handle_entry_body(JSContext *cx, StreamContext &stream) { auto entry = stream.entries->begin()[chunk_idx_]; @@ -362,6 +430,7 @@ bool MultipartFormDataImpl::handle_entry_body(JSContext *cx, StreamContext &stre return true; } +// https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.1 - writes `crlf` bool MultipartFormDataImpl::handle_entry_footer(JSContext *cx, StreamContext &stream) { auto footer = fmt::memory_buffer(); fmt::format_to(std::back_inserter(footer), "\r\n"); @@ -373,6 +442,12 @@ bool MultipartFormDataImpl::handle_entry_footer(JSContext *cx, StreamContext &st return true; } +// https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.1 +// +// The boundary delimiter line following the last body part is a distinguished delimiter that +// indicates that no further body parts will follow. Such a delimiter line is identical to +// the previous delimiter lines, with the addition of two more hyphens after the boundary +// parameter value. bool MultipartFormDataImpl::handle_close(JSContext *cx, StreamContext &stream) { auto footer = fmt::memory_buffer(); fmt::format_to(std::back_inserter(footer), "--{}--", boundary_); @@ -414,7 +489,9 @@ bool MultipartFormDataImpl::read_next(JSContext *cx, StreamContext &stream) { // Computes the total size (in bytes) of the encoded multipart/form-data stream. // -// Returns std::nullopt if any string conversion fails. +// Returns `std::nullopt` if any string conversion fails. This function simulates +// the multipart/form-data encoding process without actually writing to a buffer. +// Instead, it accumulates the total size of each encoding step. std::optional MultipartFormDataImpl::query_length(JSContext* cx, const EntryList *entries) { size_t total = 0; @@ -588,7 +665,17 @@ JSObject *MultipartFormData::create(JSContext *cx, HandleObject form_data) { return nullptr; } - // Hex encode bytes to string + // The requirements for boundary are (https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.1): + // Boundary delimiters must not appear within the encapsulated material, and must be no longer than + // 70 characters, not counting the two leading hyphens and consist of bcharsnospace characters, + // where EBNF for bcharsnospace is as follows: + // + // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," / "-" / "." / "/" / ":" / "=" / "?" + // + // e.g.: + // This implementation: --BoundaryjXo5N4HEAXWcKrw7 + // WebKit: ----WebKitFormBoundaryhpShnP1JqrBTVTnC + // Gecko: ----geckoformboundary8c79e61efa53dc5d441481912ad86113 auto bytes = std::move(res.unwrap()); auto bytes_str = std::string_view((char *)(bytes.ptr.get()), bytes.size()); auto base64_str = base64::forgivingBase64Encode(bytes_str, base64::base64EncodeTable); From fa1f898c027525d0887cbbbd5dc8a56574f51d03 Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Fri, 14 Feb 2025 12:38:44 +0100 Subject: [PATCH 11/13] Address code review feedback --- builtins/web/fetch/request-response.cpp | 8 +- builtins/web/form-data/form-data-encoder.cpp | 61 +++++++-------- builtins/web/form-data/form-data-encoder.h | 3 +- tests/integration/fetch/fetch.js | 2 +- tests/integration/formdata/formdata.js | 79 -------------------- tests/integration/handlers.js | 1 - tests/tests.cmake | 1 - 7 files changed, 36 insertions(+), 119 deletions(-) delete mode 100644 tests/integration/formdata/formdata.js diff --git a/builtins/web/fetch/request-response.cpp b/builtins/web/fetch/request-response.cpp index 7d6b48b0..34e3d454 100644 --- a/builtins/web/fetch/request-response.cpp +++ b/builtins/web/fetch/request-response.cpp @@ -19,6 +19,7 @@ #include "js/Conversions.h" #include "js/JSON.h" #include "js/Stream.h" +#include "mozilla/ResultVariant.h" #include #include #include @@ -293,7 +294,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, const char *content_type = nullptr; mozilla::Maybe content_length; - // We currently support five types of body inputs: + // We support all types of body inputs required by the spec: // - byte sequence // - buffer source // - Blob @@ -303,7 +304,6 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, // - ReadableStream // After the other other options are checked explicitly, all other inputs are // encoded to a UTF8 string to be treated as a USV string. - // TODO: Support the other possible inputs to Body. JS::RootedObject body_obj(cx, body_val.isObject() ? &body_val.toObject() : nullptr); host_api::HostString host_type_str; @@ -341,11 +341,11 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, host_type_str = type.c_str(); auto length = MultipartFormData::query_length(cx, encoder); - if (!length) { + if (length.isErr()) { return false; } - content_length = mozilla::Some(length.value()); + content_length = mozilla::Some(length.unwrap()); content_type = host_type_str.ptr.get(); RootedValue stream_val(cx, JS::ObjectValue(*stream)); diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index 289f6c25..e1cab5f9 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -10,6 +10,7 @@ #include "encode.h" #include "mozilla/Assertions.h" +#include "mozilla/ResultVariant.h" #include #include @@ -24,16 +25,15 @@ const char *CRLF = "\r\n"; // Converts CR, LF, and CRLF into a CRLF sequence. size_t compute_normalized_len(std::string_view src) { size_t len = 0; - const size_t newline_len = strlen(CRLF); for (size_t i = 0; i < src.size(); i++) { if (src[i] == CR) { if (i + 1 < src.size() && src[i + 1] == LF) { i++; } - len += newline_len; + len += 2; // CRLF } else if (src[i] == LF) { - len += newline_len; + len += 2; // CRLF } else { len += 1; } @@ -131,11 +131,11 @@ size_t compute_normalized_and_escaped_len(std::string_view src) { for (size_t i = 0; i < src.size(); ++i) { char ch = src[i]; if (ch == '\r') { - len += 3; // CR -> "%0D" - len += 3; // LF -> "%0A" if ((i + 1) < src.size() && src[i + 1] == '\n') { ++i; } + len += 3; // CR -> "%0D" + len += 3; // LF -> "%0A" } else if (ch == '\n') { len += 3; // CR -> "%0D" len += 3; // LF -> "%0A" @@ -216,7 +216,7 @@ struct StreamContext { // // Each entry is serialized in three atomic operations: writing the header, body, and footer. // These parts are written into a fixed-size buffer, so the implementation must handle cases -// where not all data can be written at once. Any unwritten data is cached as a "leftover" +// where not all data can be written at once. Any unwritten data is stored as a "leftover" // and will be written in the next iteration before transitioning to the next state. This // introduces an implicit state where the encoder drains leftover data from the previous // operation before proceeding. @@ -241,7 +241,7 @@ class MultipartFormDataImpl { bool is_draining() { return (file_leftovers_ || remainder_.size()); }; - template void write_and_cache_remainder(StreamContext &stream, I first, I last); + template void write_and_store_remainder(StreamContext &stream, I first, I last); State next_state(StreamContext &stream); void maybe_drain_leftovers(JSContext *cx, StreamContext &stream); @@ -252,11 +252,9 @@ class MultipartFormDataImpl { public: MultipartFormDataImpl(std::string boundary) - : state_(State::Start), boundary_(std::move(boundary)), chunk_idx_(0), file_leftovers_(0) { - remainder_.reserve(128); - } + : state_(State::Start), boundary_(std::move(boundary)), chunk_idx_(0), file_leftovers_(0) {} - std::optional query_length(JSContext* cx, const EntryList *entries); + mozilla::Result query_length(JSContext* cx, const EntryList *entries); std::string boundary() { return boundary_; }; bool read_next(JSContext *cx, StreamContext &stream); }; @@ -311,8 +309,7 @@ void MultipartFormDataImpl::maybe_drain_leftovers(JSContext *cx, StreamContext & RootedObject obj(cx, &entry.value.toObject()); auto blob = Blob::blob(obj); - auto blobsz = blob->length(); - auto offset = blobsz - file_leftovers_; + auto offset = blob->length() - file_leftovers_; file_leftovers_ -= stream.write(blob->begin() + offset, blob->end()); } } @@ -321,13 +318,13 @@ void MultipartFormDataImpl::maybe_drain_leftovers(JSContext *cx, StreamContext & // accept all the data, the unwritten part is stored in the remainder_ buffer // for later draining. template -void MultipartFormDataImpl::write_and_cache_remainder(StreamContext &stream, I first, I last) { - auto datasz = static_cast(std::distance(first, last)); +void MultipartFormDataImpl::write_and_store_remainder(StreamContext &stream, I first, I last) { + auto to_write = static_cast(std::distance(first, last)); auto written = stream.write(first, last); - MOZ_ASSERT(written <= datasz); + MOZ_ASSERT(written <= to_write); - auto leftover = datasz - written; + auto leftover = to_write - written; if (leftover > 0) { MOZ_ASSERT(remainder_.empty()); remainder_.assign(first + written, last); @@ -396,7 +393,7 @@ bool MultipartFormDataImpl::handle_entry_header(JSContext *cx, StreamContext &st // If there are leftovers that didn't fit in outbuf, put it into remainder_ // and it will be drained the next run. - write_and_cache_remainder(stream, header.begin(), header.end()); + write_and_store_remainder(stream, header.begin(), header.end()); return true; } @@ -415,16 +412,16 @@ bool MultipartFormDataImpl::handle_entry_body(JSContext *cx, StreamContext &stre } auto normalized = maybe_normalized.value(); - write_and_cache_remainder(stream, normalized.begin(), normalized.end()); + write_and_store_remainder(stream, normalized.begin(), normalized.end()); } else { MOZ_ASSERT(File::is_instance(entry.value)); RootedObject obj(cx, &entry.value.toObject()); auto blob = Blob::blob(obj); - auto blobsz = blob->length(); + auto to_write = blob->length(); auto written = stream.write(blob->begin(), blob->end()); - MOZ_ASSERT(written <= blobsz); - file_leftovers_ = blobsz - written; + MOZ_ASSERT(written <= to_write); + file_leftovers_ = to_write - written; } return true; @@ -435,7 +432,7 @@ bool MultipartFormDataImpl::handle_entry_footer(JSContext *cx, StreamContext &st auto footer = fmt::memory_buffer(); fmt::format_to(std::back_inserter(footer), "\r\n"); - write_and_cache_remainder(stream, footer.begin(), footer.end()); + write_and_store_remainder(stream, footer.begin(), footer.end()); chunk_idx_ += 1; MOZ_ASSERT(chunk_idx_ <= stream.entries->length()); @@ -452,7 +449,7 @@ bool MultipartFormDataImpl::handle_close(JSContext *cx, StreamContext &stream) { auto footer = fmt::memory_buffer(); fmt::format_to(std::back_inserter(footer), "--{}--", boundary_); - write_and_cache_remainder(stream, footer.begin(), footer.end()); + write_and_store_remainder(stream, footer.begin(), footer.end()); return true; } @@ -492,7 +489,7 @@ bool MultipartFormDataImpl::read_next(JSContext *cx, StreamContext &stream) { // Returns `std::nullopt` if any string conversion fails. This function simulates // the multipart/form-data encoding process without actually writing to a buffer. // Instead, it accumulates the total size of each encoding step. -std::optional MultipartFormDataImpl::query_length(JSContext* cx, const EntryList *entries) { +mozilla::Result MultipartFormDataImpl::query_length(JSContext* cx, const EntryList *entries) { size_t total = 0; constexpr const char* content_disp_lit = "Content-Disposition: form-data; name=\"\""; @@ -522,7 +519,7 @@ std::optional MultipartFormDataImpl::query_length(JSContext* cx, const E RootedValue value_str(cx, entry.value); auto value = core::encode(cx, value_str); if (!value) { - return std::nullopt; + return mozilla::Result(OutOfMemory {}); } total += compute_normalized_len(value); @@ -532,7 +529,7 @@ std::optional MultipartFormDataImpl::query_length(JSContext* cx, const E RootedString filename_str(cx, File::name(obj)); auto filename = core::encode(cx, filename_str); if (!filename) { - return std::nullopt; + return mozilla::Result(OutOfMemory {}); } // Literal: ; filename="" @@ -547,7 +544,7 @@ std::optional MultipartFormDataImpl::query_length(JSContext* cx, const E RootedString type_str(cx, Blob::type(obj)); auto type = core::encode(cx, type_str); if (!type) { - return std::nullopt; + return mozilla::Result(OutOfMemory {}); } total += type.size() > 0 ? type.size() : default_mime_len; @@ -583,7 +580,7 @@ bool MultipartFormData::read(JSContext *cx, HandleObject self, std::span MultipartFormData::query_length(JSContext *cx, HandleObject self) { +mozilla::Result MultipartFormData::query_length(JSContext *cx, HandleObject self) { RootedObject obj(cx, form_data(self)); auto entries = FormData::entry_list(obj); @@ -680,7 +677,7 @@ JSObject *MultipartFormData::create(JSContext *cx, HandleObject form_data) { auto bytes_str = std::string_view((char *)(bytes.ptr.get()), bytes.size()); auto base64_str = base64::forgivingBase64Encode(bytes_str, base64::base64EncodeTable); - auto boundary = fmt::format("--Boundary{}", base64_str); + auto boundary = fmt::format("--StarlingMonkeyFormBoundary{}", base64_str); auto impl = new (std::nothrow) MultipartFormDataImpl(boundary); if (!impl) { return nullptr; diff --git a/builtins/web/form-data/form-data-encoder.h b/builtins/web/form-data/form-data-encoder.h index 07df3198..2363a811 100644 --- a/builtins/web/form-data/form-data-encoder.h +++ b/builtins/web/form-data/form-data-encoder.h @@ -7,6 +7,7 @@ namespace builtins { namespace web { namespace form_data { +class OutOfMemory {}; class MultipartFormDataImpl; class MultipartFormData : public FinalizableBuiltinImpl { @@ -29,7 +30,7 @@ class MultipartFormData : public FinalizableBuiltinImpl { static JSObject *form_data(JSObject *self); static std::string boundary(JSObject *self); - static std::optional query_length(JSContext *cx, HandleObject self); + static mozilla::Result query_length(JSContext *cx, HandleObject self); static JSObject *encode_stream(JSContext *cx, HandleObject self); static JSObject *create(JSContext *cx, HandleObject form_data); diff --git a/tests/integration/fetch/fetch.js b/tests/integration/fetch/fetch.js index a74db090..7a6fad8c 100644 --- a/tests/integration/fetch/fetch.js +++ b/tests/integration/fetch/fetch.js @@ -1,5 +1,5 @@ import { serveTest } from '../test-server.js'; -import { assert, strictEqual, deepStrictEqual, throws } from '../../assert.js'; +import { strictEqual, deepStrictEqual, throws } from '../../assert.js'; export const handler = serveTest(async (t) => { await t.test('headers-non-ascii-latin1-field-value', async () => { diff --git a/tests/integration/formdata/formdata.js b/tests/integration/formdata/formdata.js deleted file mode 100644 index ea892907..00000000 --- a/tests/integration/formdata/formdata.js +++ /dev/null @@ -1,79 +0,0 @@ -import { serveTest } from "../test-server.js"; -import { assert, strictEqual } from "../../assert.js"; - -export const handler = serveTest(async (t) => { - await t.test('form-data-encode', async () => { - async function readStream(stream) { - const reader = stream.getReader(); - const chunks = []; - let totalLen = 0; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - chunks.push(value); - totalLen += value.length; - } - const joined = new Uint8Array(totalLen); - let offset = 0; - for (const chunk of chunks) { - joined.set(chunk, offset); - offset += chunk.length; - } - return joined.buffer; - } - - const form = new FormData(); - form.append('field1', 'value1'); - form.append('field2', 'value2'); - - const file = new File(['Hello World!'], 'dummy.txt', { type: 'foo' }); - form.append('file1', file); - - const req = new Request('https://example.com', { method: 'POST', body: form }); - - const contentType = req.headers.get('Content-Type') || ''; - assert( - contentType.startsWith('multipart/form-data; boundary='), - `Content-Type should be multipart/form-data; got: ${contentType}` - ); - - const boundary = contentType.split('boundary=')[1]; - assert(boundary, 'Boundary must be present in the Content-Type'); - - const arrayBuf = await readStream(req.body); - const bodyStr = new TextDecoder().decode(arrayBuf); - const lines = bodyStr.split('\r\n'); - - const expectedLines = [ - `--${boundary}`, - 'Content-Disposition: form-data; name="field1"', - '', - 'value1', - `--${boundary}`, - 'Content-Disposition: form-data; name="field2"', - '', - 'value2', - `--${boundary}`, - 'Content-Disposition: form-data; name="file1"; filename="dummy.txt"', - 'Content-Type: foo', - '', - 'Hello World!', - `--${boundary}--`, - ]; - - strictEqual( - lines.length, - expectedLines.length, - `Expected ${expectedLines.length} lines, got ${lines.length}` - ); - - for (let i = 0; i < expectedLines.length; i++) { - strictEqual( - lines[i], - expectedLines[i], - `Mismatch at line ${i}. Actual: '${lines[i]}' Expected: '${expectedLines[i]}'` - ); - } - }); -}); diff --git a/tests/integration/handlers.js b/tests/integration/handlers.js index 0e892f60..c1ee5cbf 100644 --- a/tests/integration/handlers.js +++ b/tests/integration/handlers.js @@ -4,4 +4,3 @@ export { handler as performance } from './performance/performance.js'; export { handler as crypto } from './crypto/crypto.js'; export { handler as timers } from './timers/timers.js'; export { handler as fetch } from './fetch/fetch.js'; -export { handler as formdata } from './formdata/formdata.js'; diff --git a/tests/tests.cmake b/tests/tests.cmake index 75200b5f..06b35a20 100644 --- a/tests/tests.cmake +++ b/tests/tests.cmake @@ -46,6 +46,5 @@ test_integration(blob) test_integration(btoa) test_integration(crypto) test_integration(fetch) -test_integration(formdata) test_integration(performance) test_integration(timers) From bba1830fb336526e807225e0f3d1a14ea560a450 Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Fri, 14 Feb 2025 13:32:53 +0100 Subject: [PATCH 12/13] Fix assigning to content_type to const char pointer --- builtins/web/fetch/request-response.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/builtins/web/fetch/request-response.cpp b/builtins/web/fetch/request-response.cpp index 34e3d454..f5631565 100644 --- a/builtins/web/fetch/request-response.cpp +++ b/builtins/web/fetch/request-response.cpp @@ -46,6 +46,8 @@ using blob::Blob; using form_data::FormData; using form_data::MultipartFormData; +using namespace std::literals; + static api::Engine *ENGINE; bool error_stream_controller_with_pending_exception(JSContext *cx, HandleObject stream) { @@ -291,7 +293,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, MOZ_ASSERT(!has_body(self)); MOZ_ASSERT(!body_val.isNullOrUndefined()); - const char *content_type = nullptr; + string_view content_type; mozilla::Maybe content_length; // We support all types of body inputs required by the spec: @@ -323,7 +325,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, if (JS::GetStringLength(type_str) > 0) { host_type_str = core::encode(cx, type_str); MOZ_ASSERT(host_type_str); - content_type = host_type_str.ptr.get(); + content_type = host_type_str; } } else if (FormData::is_instance(body_obj)) { RootedObject encoder(cx, MultipartFormData::create(cx, body_obj)); @@ -338,7 +340,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, auto boundary = MultipartFormData::boundary(encoder); auto type = "multipart/form-data; boundary=" + boundary; - host_type_str = type.c_str(); + host_type_str = string_view(type); auto length = MultipartFormData::query_length(cx, encoder); if (length.isErr()) { @@ -346,7 +348,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, } content_length = mozilla::Some(length.unwrap()); - content_type = host_type_str.ptr.get(); + content_type = host_type_str; RootedValue stream_val(cx, JS::ObjectValue(*stream)); JS_SetReservedSlot(self, static_cast(RequestOrResponse::Slots::BodyStream), stream_val); @@ -394,7 +396,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, auto slice = url::URLSearchParams::serialize(cx, body_obj); buf = (char *)slice.data; length = slice.len; - content_type = "application/x-www-form-urlencoded;charset=UTF-8"; + content_type = "application/x-www-form-urlencoded;charset=UTF-8"sv; } else { auto text = core::encode(cx, body_val); if (!text.ptr) { @@ -402,7 +404,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, } buf = text.ptr.release(); length = text.len; - content_type = "text/plain;charset=UTF-8"; + content_type = "text/plain;charset=UTF-8"sv; } if (!buffer) { @@ -443,7 +445,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, content_length.emplace(length); } - if (content_type || content_length.isSome()) { + if (!content_type.empty() || content_length.isSome()) { JS::RootedObject headers(cx, RequestOrResponse::headers(cx, self)); if (!headers) { return false; @@ -457,7 +459,7 @@ bool RequestOrResponse::extract_body(JSContext *cx, JS::HandleObject self, } // Step 36.3 of Request constructor / 8.4 of Response constructor. - if (content_type && + if (!content_type.empty() && !Headers::set_valid_if_undefined(cx, headers, "Content-Type", content_type)) { return false; } From 9362df3aff0097dcff6657b1cce8d9b8c04837ab Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Fri, 14 Feb 2025 17:42:49 +0100 Subject: [PATCH 13/13] Remove unreachable constructor assert --- builtins/web/form-data/form-data-encoder.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/builtins/web/form-data/form-data-encoder.cpp b/builtins/web/form-data/form-data-encoder.cpp index e1cab5f9..59c36736 100644 --- a/builtins/web/form-data/form-data-encoder.cpp +++ b/builtins/web/form-data/form-data-encoder.cpp @@ -694,7 +694,6 @@ bool MultipartFormData::init_class(JSContext *cx, JS::HandleObject global) { } bool MultipartFormData::constructor(JSContext *cx, unsigned argc, JS::Value *vp) { - MOZ_ASSERT_UNREACHABLE("No MultipartFormData Ctor builtin"); return api::throw_error(cx, api::Errors::NoCtorBuiltin, class_name); }