LibWeb: Support unbuffered fetch requests

Supporting unbuffered fetches is actually part of the fetch spec in its
HTTP-network-fetch algorithm. We had previously implemented this method
in a very ad-hoc manner as a simple wrapper around ResourceLoader. This
is still the case, but we now implement a good amount of these steps
according to spec, using ResourceLoader's unbuffered API. The response
data is forwarded through to the fetch response using streams.

This will eventually let us remove the use of ResourceLoader's buffered
API, as all responses should just be streamed this way. The streams spec
then supplies ways to wait for completion, thus allowing fully buffered
responses. However, we have more work to do to make the other parts of
our fetch implementation (namely, Body::fully_read) use streams before
we can do this.
This commit is contained in:
Timothy Flynn 2024-05-26 08:03:29 -04:00 committed by Andreas Kling
parent 1e97ae66e5
commit 6056428cb5
8 changed files with 266 additions and 8 deletions

View file

@ -3,6 +3,7 @@ source_set("Fetching") {
deps = [ "//Userland/Libraries/LibWeb:all_generated" ]
sources = [
"Checks.cpp",
"FetchedDataReceiver.cpp",
"Fetching.cpp",
"PendingResponse.cpp",
"RefCountedFlag.cpp",

View file

@ -204,6 +204,7 @@ set(SOURCES
Fetch/BodyInit.cpp
Fetch/Enums.cpp
Fetch/Fetching/Checks.cpp
Fetch/Fetching/FetchedDataReceiver.cpp
Fetch/Fetching/Fetching.cpp
Fetch/Fetching/PendingResponse.cpp
Fetch/Fetching/RefCountedFlag.cpp

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibJS/Heap/HeapFunction.h>
#include <LibWeb/Bindings/ExceptionOrUtils.h>
#include <LibWeb/Bindings/HostDefined.h>
#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
#include <LibWeb/Fetch/Infrastructure/FetchParams.h>
#include <LibWeb/Fetch/Infrastructure/Task.h>
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
#include <LibWeb/Streams/AbstractOperations.h>
#include <LibWeb/WebIDL/Promise.h>
namespace Web::Fetch::Fetching {
JS_DEFINE_ALLOCATOR(FetchedDataReceiver);
FetchedDataReceiver::FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const> fetch_params, JS::NonnullGCPtr<Streams::ReadableStream> stream)
: m_fetch_params(fetch_params)
, m_stream(stream)
{
}
FetchedDataReceiver::~FetchedDataReceiver() = default;
void FetchedDataReceiver::visit_edges(Visitor& visitor)
{
Base::visit_edges(visitor);
visitor.visit(m_fetch_params);
visitor.visit(m_stream);
visitor.visit(m_pending_promise);
}
void FetchedDataReceiver::set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise> promise)
{
auto had_pending_promise = m_pending_promise != nullptr;
m_pending_promise = promise;
if (!had_pending_promise && !m_buffer.is_empty()) {
on_data_received(m_buffer);
m_buffer.clear();
}
}
// This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
// https://fetch.spec.whatwg.org/#ref-for-in-parallel④
void FetchedDataReceiver::on_data_received(ReadonlyBytes bytes)
{
// FIXME: 1. If the size of buffer is smaller than a lower limit chosen by the user agent and the ongoing fetch
// is suspended, resume the fetch.
// FIXME: 2. Wait until buffer is not empty.
// If the remote end sends data immediately after we receive headers, we will often get that data here before the
// stream tasks have all been queued internally. Just hold onto that data.
if (!m_pending_promise) {
m_buffer.append(bytes);
return;
}
// 3. Queue a fetch task to run the following steps, with fetchParamss task destination.
Infrastructure::queue_fetch_task(
m_fetch_params->controller(),
m_fetch_params->task_destination().get<JS::NonnullGCPtr<JS::Object>>(),
JS::create_heap_function(heap(), [this, bytes = MUST(ByteBuffer::copy(bytes))]() mutable {
HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(m_stream->realm()), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
// 1. Pull from bytes buffer into stream.
if (auto result = Streams::readable_stream_pull_from_bytes(m_stream, move(bytes)); result.is_error()) {
auto throw_completion = Bindings::dom_exception_to_throw_completion(m_stream->vm(), result.release_error());
dbgln("FetchedDataReceiver: Stream error pulling bytes");
HTML::report_exception(throw_completion, m_stream->realm());
return;
}
// 2. If stream is errored, then terminate fetchParamss controller.
if (m_stream->is_errored())
m_fetch_params->controller()->terminate();
// 3. Resolve promise with undefined.
WebIDL::resolve_promise(m_stream->realm(), *m_pending_promise, JS::js_undefined());
}));
}
}

View file

@ -0,0 +1,37 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/ByteBuffer.h>
#include <LibJS/Heap/Cell.h>
#include <LibJS/Heap/CellAllocator.h>
#include <LibWeb/Forward.h>
namespace Web::Fetch::Fetching {
class FetchedDataReceiver final : public JS::Cell {
JS_CELL(FetchedDataReceiver, JS::Cell);
JS_DECLARE_ALLOCATOR(FetchedDataReceiver);
public:
virtual ~FetchedDataReceiver() override;
void set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise>);
void on_data_received(ReadonlyBytes);
private:
FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const>, JS::NonnullGCPtr<Streams::ReadableStream>);
virtual void visit_edges(Visitor& visitor) override;
JS::NonnullGCPtr<Infrastructure::FetchParams const> m_fetch_params;
JS::NonnullGCPtr<Streams::ReadableStream> m_stream;
JS::GCPtr<WebIDL::Promise> m_pending_promise;
ByteBuffer m_buffer;
};
}

View file

@ -17,6 +17,7 @@
#include <LibWeb/DOMURL/DOMURL.h>
#include <LibWeb/Fetch/BodyInit.h>
#include <LibWeb/Fetch/Fetching/Checks.h>
#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
#include <LibWeb/Fetch/Fetching/Fetching.h>
#include <LibWeb/Fetch/Fetching/PendingResponse.h>
#include <LibWeb/Fetch/Fetching/RefCountedFlag.h>
@ -1962,8 +1963,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
load_request.set_url(request->current_url());
load_request.set_page(page);
load_request.set_method(ByteString::copy(request->method()));
for (auto const& header : *request->header_list())
load_request.set_header(ByteString::copy(header.name), ByteString::copy(header.value));
if (auto const* body = request->body().get_pointer<JS::NonnullGCPtr<Infrastructure::Body>>()) {
TRY((*body)->source().visit(
[&](ByteBuffer const& byte_buffer) -> WebIDL::ExceptionOr<void> {
@ -1981,13 +1984,121 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
auto pending_response = PendingResponse::create(vm, request);
dbgln_if(WEB_FETCH_DEBUG, "Fetch: Invoking ResourceLoader");
if constexpr (WEB_FETCH_DEBUG)
if constexpr (WEB_FETCH_DEBUG) {
dbgln("Fetch: Invoking ResourceLoader");
log_load_request(load_request);
}
ResourceLoader::the().load(
load_request,
[&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
// FIXME: This check should be removed and all HTTP requests should go through the `ResourceLoader::load_unbuffered`
// path. The buffer option should then be supplied to the steps below that allow us to buffer data up to a
// user-agent-defined limit (or not). However, we will need to fully use stream operations throughout the
// fetch process to enable this (e.g. Body::fully_read must use streams for this to work).
if (request->buffer_policy() == Infrastructure::Request::BufferPolicy::DoNotBufferResponse) {
HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
// 12. Let stream be a new ReadableStream.
auto stream = realm.heap().allocate<Streams::ReadableStream>(realm, realm);
auto fetched_data_receiver = realm.heap().allocate<FetchedDataReceiver>(realm, fetch_params, stream);
// 10. Let pullAlgorithm be the followings steps:
auto pull_algorithm = JS::create_heap_function(realm.heap(), [&realm, fetched_data_receiver]() {
// 1. Let promise be a new promise.
auto promise = WebIDL::create_promise(realm);
// 2. Run the following steps in parallel:
// NOTE: This is handled by FetchedDataReceiver.
fetched_data_receiver->set_pending_promise(promise);
// 3. Return promise.
return promise;
});
// 11. Let cancelAlgorithm be an algorithm that aborts fetchParamss controller with reason, given reason.
auto cancel_algorithm = JS::create_heap_function(realm.heap(), [&realm, &fetch_params](JS::Value reason) {
fetch_params.controller()->abort(realm, reason);
return WebIDL::create_resolved_promise(realm, JS::js_undefined());
});
// 13. Set up stream with byte reading support with pullAlgorithm set to pullAlgorithm, cancelAlgorithm set to cancelAlgorithm.
Streams::set_up_readable_stream_controller_with_byte_reading_support(stream, pull_algorithm, cancel_algorithm);
auto on_headers_received = [&vm, request, pending_response, stream](auto const& response_headers, Optional<u32> status_code) {
if (pending_response->is_resolved()) {
// RequestServer will send us the response headers twice, the second time being for HTTP trailers. This
// fetch algorithm is not interested in trailers, so just drop them here.
return;
}
auto response = Infrastructure::Response::create(vm);
response->set_status(status_code.value_or(200));
// FIXME: Set response status message
if constexpr (WEB_FETCH_DEBUG) {
dbgln("Fetch: ResourceLoader load for '{}' {}: (status {})",
request->url(),
Infrastructure::is_ok_status(response->status()) ? "complete"sv : "failed"sv,
response->status());
log_response(status_code, response_headers, ReadonlyBytes {});
}
for (auto const& [name, value] : response_headers) {
auto header = Infrastructure::Header::from_string_pair(name, value);
response->header_list()->append(move(header));
}
// 14. Set responses body to a new body whose stream is stream.
response->set_body(Infrastructure::Body::create(vm, stream));
// 17. Return response.
// NOTE: Typically responses bodys stream is still being enqueued to after returning.
pending_response->resolve(response);
};
// 16. Run these steps in parallel:
// FIXME: 1. Run these steps, but abort when fetchParams is canceled:
auto on_data_received = [fetched_data_receiver](auto bytes) {
// 1. If one or more bytes have been transmitted from responses message body, then:
if (!bytes.is_empty()) {
// 1. Let bytes be the transmitted bytes.
// FIXME: 2. Let codings be the result of extracting header list values given `Content-Encoding` and responses header list.
// FIXME: 3. Increase responses body infos encoded size by bytess length.
// FIXME: 4. Set bytes to the result of handling content codings given codings and bytes.
// FIXME: 5. Increase responses body infos decoded size by bytess length.
// FIXME: 6. If bytes is failure, then terminate fetchParamss controller.
// 7. Append bytes to buffer.
fetched_data_receiver->on_data_received(bytes);
// FIXME: 8. If the size of buffer is larger than an upper limit chosen by the user agent, ask the user agent
// to suspend the ongoing fetch.
}
};
auto on_complete = [&vm, &realm, pending_response, stream](auto success, auto error_message) {
HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
// 16.1.1.2. Otherwise, if the bytes transmission for responses message body is done normally and stream is readable,
// then close stream, and abort these in-parallel steps.
if (success) {
if (stream->is_readable())
stream->close();
}
// 16.1.2.2. Otherwise, if stream is readable, error stream with a TypeError.
else {
auto error = MUST(String::formatted("Load failed: {}", error_message));
if (stream->is_readable())
stream->error(JS::TypeError::create(realm, error));
if (!pending_response->is_resolved())
pending_response->resolve(Infrastructure::Response::network_error(vm, error));
}
};
ResourceLoader::the().load_unbuffered(load_request, move(on_headers_received), move(on_data_received), move(on_complete));
} else {
auto on_load_success = [&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' complete", request->url());
if constexpr (WEB_FETCH_DEBUG)
log_response(status_code, response_headers, data);
@ -2001,8 +2112,9 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
}
// FIXME: Set response status message
pending_response->resolve(response);
},
[&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
};
auto on_load_error = [&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' failed: {} (status {})", request->url(), error, status_code.value_or(0));
if constexpr (WEB_FETCH_DEBUG)
log_response(status_code, response_headers, data);
@ -2022,7 +2134,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
// FIXME: Set response status message
}
pending_response->resolve(response);
});
};
ResourceLoader::the().load(load_request, move(on_load_success), move(on_load_error));
}
return pending_response;
}

View file

@ -30,6 +30,7 @@ public:
void when_loaded(Callback);
void resolve(JS::NonnullGCPtr<Infrastructure::Response>);
bool is_resolved() const { return m_response != nullptr; }
private:
PendingResponse(JS::NonnullGCPtr<Infrastructure::Request>, JS::GCPtr<Infrastructure::Response> = {});

View file

@ -250,6 +250,7 @@ JS::NonnullGCPtr<Request> Request::clone(JS::Realm& realm) const
new_request->set_prevent_no_cache_cache_control_header_modification(m_prevent_no_cache_cache_control_header_modification);
new_request->set_done(m_done);
new_request->set_timing_allow_failed(m_timing_allow_failed);
new_request->set_buffer_policy(m_buffer_policy);
// 2. If requests body is non-null, set newRequests body to the result of cloning requests body.
if (auto const* body = m_body.get_pointer<JS::NonnullGCPtr<Body>>())

View file

@ -159,6 +159,13 @@ public:
Auto
};
// AD-HOC: Some web features need to receive data as it arrives, rather than when the response is fully complete
// or when enough data has been buffered. Use this buffer policy to inform fetch of that requirement.
enum class BufferPolicy {
BufferResponse,
DoNotBufferResponse,
};
// Members are implementation-defined
struct InternalPriority { };
@ -325,6 +332,9 @@ public:
m_pending_responses.remove_first_matching([&](auto gc_ptr) { return gc_ptr == pending_response; });
}
[[nodiscard]] BufferPolicy buffer_policy() const { return m_buffer_policy; }
void set_buffer_policy(BufferPolicy buffer_policy) { m_buffer_policy = buffer_policy; }
private:
explicit Request(JS::NonnullGCPtr<HeaderList>);
@ -515,6 +525,8 @@ private:
// Non-standard
Vector<JS::NonnullGCPtr<Fetching::PendingResponse>> m_pending_responses;
BufferPolicy m_buffer_policy { BufferPolicy::BufferResponse };
};
StringView request_destination_to_string(Request::Destination);