Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*.svg binary
/test/common/tls/test_data/aes_128_key binary
/test/common/tls/test_data/ticket_key_* binary
/test/common/http/sse/sse_parser_corpus/* binary
/test/**/*_corpus/* linguist-generated=true
package.lock binary
yarn.lock binary
19 changes: 19 additions & 0 deletions source/common/http/sse/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_library",
"envoy_package",
)

licenses(["notice"]) # Apache 2

envoy_package()

envoy_cc_library(
name = "sse_parser_lib",
srcs = ["sse_parser.cc"],
hdrs = ["sse_parser.h"],
deps = [
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:optional",
],
)
135 changes: 135 additions & 0 deletions source/common/http/sse/sse_parser.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include "source/common/http/sse/sse_parser.h"

#include <cstdint>

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Http {
namespace Sse {

SseParser::ParsedEvent SseParser::parseEvent(absl::string_view event) {
// TODO(optimization): Consider merging findEventEnd and parseEvent into a single-pass
// algorithm to avoid traversing the buffer twice.
ParsedEvent parsed_event;
absl::string_view remaining = event;

while (!remaining.empty()) {
auto [line_end, next_line] = findLineEnd(remaining, true);
absl::string_view line = remaining.substr(0, line_end);
remaining = remaining.substr(next_line);

auto [field_name, field_value] = parseFieldLine(line);
if (field_name == "data") {
if (!parsed_event.data.has_value()) {
// Optimization: Reserve memory to avoid allocations during append.
// The total data cannot be larger than the input event string.
parsed_event.data = std::string();
parsed_event.data->reserve(event.size());
} else {
// Per SSE spec, multiple data fields are concatenated with newlines.
parsed_event.data->append("\n");
}
parsed_event.data->append(field_value.data(), field_value.size());
}
}

return parsed_event;
}

std::tuple<size_t, size_t, size_t> SseParser::findEventEnd(absl::string_view buffer,
bool end_stream) {
size_t consumed = 0;
size_t event_start = 0;
absl::string_view remaining = buffer;

// Per SSE spec: Strip UTF-8 BOM (0xEF 0xBB 0xBF) if present at stream start.
if (consumed == 0 && remaining.size() >= 3 && static_cast<uint8_t>(remaining[0]) == 0xEF &&
static_cast<uint8_t>(remaining[1]) == 0xBB && static_cast<uint8_t>(remaining[2]) == 0xBF) {
remaining = remaining.substr(3);
consumed = 3;
event_start = 3; // Event content starts after BOM
}

while (!remaining.empty()) {
auto [line_end, next_line] = findLineEnd(remaining, end_stream);

if (line_end == absl::string_view::npos) {
return {absl::string_view::npos, absl::string_view::npos, absl::string_view::npos};
}

if (line_end == 0) {
// Found blank line so this is the end of event
return {event_start, consumed, consumed + next_line};
}

consumed += next_line;
remaining = remaining.substr(next_line);
}

// Per SSE spec: Once the end of the file is reached, any pending data must be discarded.
// (i.e., incomplete events without a closing blank line are dropped)
return {absl::string_view::npos, absl::string_view::npos, absl::string_view::npos};
}

std::pair<absl::string_view, absl::string_view> SseParser::parseFieldLine(absl::string_view line) {
if (line.empty()) {
return {"", ""};
}

// Per SSE spec, lines starting with ':' are comments and should be ignored.
if (line[0] == ':') {
return {"", ""};
}

const auto colon_pos = line.find(':');
if (colon_pos == absl::string_view::npos) {
return {line, ""};
}

absl::string_view field_name = line.substr(0, colon_pos);
absl::string_view field_value = line.substr(colon_pos + 1);

// Per SSE spec, remove leading space from value if present.
if (!field_value.empty() && field_value[0] == ' ') {
field_value = field_value.substr(1);
}

return {field_name, field_value};
}

std::pair<size_t, size_t> SseParser::findLineEnd(absl::string_view str, bool end_stream) {
const auto pos = str.find_first_of("\r\n");

// Case 1: No delimiter found
if (pos == absl::string_view::npos) {
if (end_stream) {
return {str.size(), str.size()};
}
return {absl::string_view::npos, absl::string_view::npos};
}

// Case 2: LF (\n)
if (str[pos] == '\n') {
return {pos, pos + 1};
}

// Case 3: CR (\r) or CRLF (\r\n), handle per SSE spec
if (pos + 1 < str.size()) {
if (str[pos + 1] == '\n') {
return {pos, pos + 2};
}
return {pos, pos + 1};
}

// Case 4: Split CRLF edge case
// If '\r' is at the end and more data may come, wait to see if it's CRLF.
if (end_stream) {
return {pos, pos + 1};
}
return {absl::string_view::npos, absl::string_view::npos};
}

} // namespace Sse
} // namespace Http
} // namespace Envoy
103 changes: 103 additions & 0 deletions source/common/http/sse/sse_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#pragma once

#include <string>
#include <tuple>
#include <utility>

#include "absl/strings/string_view.h"
#include "absl/types/optional.h"

namespace Envoy {
namespace Http {
namespace Sse {

/**
* Parser for Server-Sent Events (SSE) format.
* Implements the SSE specification: https://html.spec.whatwg.org/multipage/server-sent-events.html
*
* This parser handles:
* - Multiple line ending formats (CR, LF, CRLF)
* - Comment lines (lines starting with ':')
* - Multiple data fields (concatenated with newlines)
* - Partial events split across chunks
* - End-of-stream handling
*
* Example usage:
* std::string buffer_;
* absl::string_view buffer_view(buffer_);
* while (!buffer_view.empty()) {
* auto [event_start, event_end, next_start] = findEventEnd(buffer_view, end_stream);
* if (event_start == absl::string_view::npos) break;
*
* auto event_str = buffer_view.substr(event_start, event_end - event_start);
* auto event = parseEvent(event_str);
* if (event.data.has_value()) {
* // Process event.data.value()
* }
* buffer_view = buffer_view.substr(next_start);
* }
* buffer_.erase(0, buffer_.size() - buffer_view.size());
*/
class SseParser {
public:
/**
* Represents a parsed SSE event.
* Currently only supports the 'data' field. Future versions may add 'id', 'event', and 'retry'.
*/
struct ParsedEvent {
// The concatenated data field values. Per SSE spec, multiple data fields are joined with
// newlines. absl::nullopt if no data fields present, empty string if data field exists but
// empty.
absl::optional<std::string> data;
};

/**
* Parses an SSE event and extracts fields.
* Currently extracts only the 'data' field. Per SSE spec, multiple data fields are joined with
* newlines.
*
* @param event the complete SSE event string (from blank line to blank line).
* @return parsed event with available fields populated.
*/
static ParsedEvent parseEvent(absl::string_view event);

/**
* Finds the end of the next SSE event in the buffer.
* An event ends with a blank line (two consecutive line breaks).
* Automatically handles UTF-8 BOM at the start of the stream.
*
* @param buffer the buffer to search for an event.
* @param end_stream whether this is the end of the stream (affects partial line handling).
* @return a tuple of {event_start, event_end, next_event_start} positions.
* event_start: where the event content begins (after BOM if present)
* event_end: where the event content ends (excluding trailing blank line)
* next_event_start: where to continue parsing for the next event
* Returns {npos, npos, npos} if no complete event is found.
*/
static std::tuple<size_t, size_t, size_t> findEventEnd(absl::string_view buffer, bool end_stream);

private:
/**
* Parses an SSE field line into {field_name, field_value}.
* Handles comments (lines starting with ':') and strips leading space from value.
*
* @param line a single line from an SSE event.
* @return a pair of {field_name, field_value}. Returns {"", ""} for empty lines or comments.
*/
static std::pair<absl::string_view, absl::string_view> parseFieldLine(absl::string_view line);

/**
* Finds the end of the current line, handling CR, LF, and CRLF line endings.
* Per SSE spec, all three line ending formats are supported.
*
* @param str the string to search for a line ending.
* @param end_stream whether this is the end of the stream (affects partial line handling).
* @return a pair of {line_end, next_line_start} positions.
* Returns {npos, npos} if no complete line is found.
*/
static std::pair<size_t, size_t> findLineEnd(absl::string_view str, bool end_stream);
};

} // namespace Sse
} // namespace Http
} // namespace Envoy
27 changes: 27 additions & 0 deletions test/common/http/sse/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_fuzz_test",
"envoy_cc_test",
"envoy_package",
)

licenses(["notice"]) # Apache 2

envoy_package()

envoy_cc_test(
name = "sse_parser_test",
srcs = ["sse_parser_test.cc"],
deps = [
"//source/common/http/sse:sse_parser_lib",
],
)

envoy_cc_fuzz_test(
name = "sse_parser_fuzz_test",
srcs = ["sse_parser_fuzz_test.cc"],
corpus = "sse_parser_corpus",
deps = [
"//source/common/http/sse:sse_parser_lib",
],
)
5 changes: 5 additions & 0 deletions test/common/http/sse/sse_parser_corpus/all_fields

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions test/common/http/sse/sse_parser_corpus/crlf_event

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions test/common/http/sse/sse_parser_corpus/empty_event

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions test/common/http/sse/sse_parser_corpus/incomplete_event

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions test/common/http/sse/sse_parser_corpus/json_event

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions test/common/http/sse/sse_parser_corpus/large_event

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions test/common/http/sse/sse_parser_corpus/mixed_endings

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions test/common/http/sse/sse_parser_corpus/multiple_data

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions test/common/http/sse/sse_parser_corpus/simple_event

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file not shown.
5 changes: 5 additions & 0 deletions test/common/http/sse/sse_parser_corpus/with_comments

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading