-
Notifications
You must be signed in to change notification settings - Fork 5.2k
http: add sse parser utility in common #43081
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5b354cb
390b357
8aa70cc
b94ffbe
499a5a3
4bb01fa
9f07b53
ca19e1f
e5c5fb0
db534d0
f03f4ff
5c5700d
596c98b
8cc6ad9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| load( | ||
| "//bazel:envoy_build_system.bzl", | ||
| "envoy_cc_library", | ||
| "envoy_package", | ||
| ) | ||
|
|
||
| licenses(["notice"]) # Apache 2 | ||
|
|
||
| envoy_package() | ||
|
|
||
| envoy_cc_library( | ||
| name = "sse_parser_lib", | ||
| srcs = ["sse_parser.cc"], | ||
| hdrs = ["sse_parser.h"], | ||
| deps = [ | ||
| "@com_google_absl//absl/strings", | ||
| "@com_google_absl//absl/types:optional", | ||
| ], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| #include "source/common/http/sse/sse_parser.h" | ||
|
|
||
| #include <cstdint> | ||
|
|
||
| #include "absl/strings/string_view.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Http { | ||
| namespace Sse { | ||
|
|
||
| SseParser::ParsedEvent SseParser::parseEvent(absl::string_view event) { | ||
| // TODO(optimization): Consider merging findEventEnd and parseEvent into a single-pass | ||
| // algorithm to avoid traversing the buffer twice. | ||
| ParsedEvent parsed_event; | ||
| absl::string_view remaining = event; | ||
|
|
||
| while (!remaining.empty()) { | ||
| auto [line_end, next_line] = findLineEnd(remaining, true); | ||
| absl::string_view line = remaining.substr(0, line_end); | ||
| remaining = remaining.substr(next_line); | ||
|
|
||
| auto [field_name, field_value] = parseFieldLine(line); | ||
| if (field_name == "data") { | ||
| if (!parsed_event.data.has_value()) { | ||
| // Optimization: Reserve memory to avoid allocations during append. | ||
| // The total data cannot be larger than the input event string. | ||
| parsed_event.data = std::string(); | ||
| parsed_event.data->reserve(event.size()); | ||
| } else { | ||
| // Per SSE spec, multiple data fields are concatenated with newlines. | ||
| parsed_event.data->append("\n"); | ||
| } | ||
| parsed_event.data->append(field_value.data(), field_value.size()); | ||
| } | ||
| } | ||
|
|
||
| return parsed_event; | ||
| } | ||
|
|
||
| std::tuple<size_t, size_t, size_t> SseParser::findEventEnd(absl::string_view buffer, | ||
| bool end_stream) { | ||
| size_t consumed = 0; | ||
| size_t event_start = 0; | ||
| absl::string_view remaining = buffer; | ||
|
|
||
| // Per SSE spec: Strip UTF-8 BOM (0xEF 0xBB 0xBF) if present at stream start. | ||
| if (consumed == 0 && remaining.size() >= 3 && static_cast<uint8_t>(remaining[0]) == 0xEF && | ||
| static_cast<uint8_t>(remaining[1]) == 0xBB && static_cast<uint8_t>(remaining[2]) == 0xBF) { | ||
| remaining = remaining.substr(3); | ||
| consumed = 3; | ||
| event_start = 3; // Event content starts after BOM | ||
| } | ||
|
|
||
| while (!remaining.empty()) { | ||
| auto [line_end, next_line] = findLineEnd(remaining, end_stream); | ||
|
|
||
| if (line_end == absl::string_view::npos) { | ||
| return {absl::string_view::npos, absl::string_view::npos, absl::string_view::npos}; | ||
| } | ||
|
|
||
| if (line_end == 0) { | ||
| // Found blank line so this is the end of event | ||
| return {event_start, consumed, consumed + next_line}; | ||
| } | ||
|
|
||
| consumed += next_line; | ||
| remaining = remaining.substr(next_line); | ||
| } | ||
|
|
||
| // Per SSE spec: Once the end of the file is reached, any pending data must be discarded. | ||
| // (i.e., incomplete events without a closing blank line are dropped) | ||
| return {absl::string_view::npos, absl::string_view::npos, absl::string_view::npos}; | ||
| } | ||
|
|
||
| std::pair<absl::string_view, absl::string_view> SseParser::parseFieldLine(absl::string_view line) { | ||
| if (line.empty()) { | ||
| return {"", ""}; | ||
| } | ||
|
|
||
| // Per SSE spec, lines starting with ':' are comments and should be ignored. | ||
| if (line[0] == ':') { | ||
| return {"", ""}; | ||
| } | ||
|
|
||
| const auto colon_pos = line.find(':'); | ||
| if (colon_pos == absl::string_view::npos) { | ||
| return {line, ""}; | ||
| } | ||
|
|
||
| absl::string_view field_name = line.substr(0, colon_pos); | ||
| absl::string_view field_value = line.substr(colon_pos + 1); | ||
|
|
||
| // Per SSE spec, remove leading space from value if present. | ||
| if (!field_value.empty() && field_value[0] == ' ') { | ||
| field_value = field_value.substr(1); | ||
| } | ||
|
|
||
| return {field_name, field_value}; | ||
| } | ||
|
|
||
| std::pair<size_t, size_t> SseParser::findLineEnd(absl::string_view str, bool end_stream) { | ||
| const auto pos = str.find_first_of("\r\n"); | ||
|
|
||
| // Case 1: No delimiter found | ||
| if (pos == absl::string_view::npos) { | ||
PeterL328 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (end_stream) { | ||
| return {str.size(), str.size()}; | ||
| } | ||
| return {absl::string_view::npos, absl::string_view::npos}; | ||
| } | ||
|
|
||
| // Case 2: LF (\n) | ||
| if (str[pos] == '\n') { | ||
| return {pos, pos + 1}; | ||
| } | ||
|
|
||
| // Case 3: CR (\r) or CRLF (\r\n), handle per SSE spec | ||
| if (pos + 1 < str.size()) { | ||
| if (str[pos + 1] == '\n') { | ||
| return {pos, pos + 2}; | ||
| } | ||
| return {pos, pos + 1}; | ||
| } | ||
|
|
||
| // Case 4: Split CRLF edge case | ||
| // If '\r' is at the end and more data may come, wait to see if it's CRLF. | ||
| if (end_stream) { | ||
| return {pos, pos + 1}; | ||
| } | ||
| return {absl::string_view::npos, absl::string_view::npos}; | ||
| } | ||
|
|
||
| } // namespace Sse | ||
| } // namespace Http | ||
| } // namespace Envoy | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| #pragma once | ||
|
|
||
| #include <string> | ||
| #include <tuple> | ||
| #include <utility> | ||
|
|
||
| #include "absl/strings/string_view.h" | ||
| #include "absl/types/optional.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Http { | ||
| namespace Sse { | ||
|
|
||
| /** | ||
| * Parser for Server-Sent Events (SSE) format. | ||
| * Implements the SSE specification: https://html.spec.whatwg.org/multipage/server-sent-events.html | ||
| * | ||
| * This parser handles: | ||
| * - Multiple line ending formats (CR, LF, CRLF) | ||
| * - Comment lines (lines starting with ':') | ||
| * - Multiple data fields (concatenated with newlines) | ||
| * - Partial events split across chunks | ||
| * - End-of-stream handling | ||
| * | ||
| * Example usage: | ||
| * std::string buffer_; | ||
| * absl::string_view buffer_view(buffer_); | ||
| * while (!buffer_view.empty()) { | ||
| * auto [event_start, event_end, next_start] = findEventEnd(buffer_view, end_stream); | ||
| * if (event_start == absl::string_view::npos) break; | ||
| * | ||
| * auto event_str = buffer_view.substr(event_start, event_end - event_start); | ||
| * auto event = parseEvent(event_str); | ||
| * if (event.data.has_value()) { | ||
| * // Process event.data.value() | ||
| * } | ||
| * buffer_view = buffer_view.substr(next_start); | ||
| * } | ||
| * buffer_.erase(0, buffer_.size() - buffer_view.size()); | ||
| */ | ||
| class SseParser { | ||
PeterL328 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| public: | ||
| /** | ||
| * Represents a parsed SSE event. | ||
| * Currently only supports the 'data' field. Future versions may add 'id', 'event', and 'retry'. | ||
| */ | ||
| struct ParsedEvent { | ||
| // The concatenated data field values. Per SSE spec, multiple data fields are joined with | ||
| // newlines. absl::nullopt if no data fields present, empty string if data field exists but | ||
| // empty. | ||
| absl::optional<std::string> data; | ||
| }; | ||
|
|
||
| /** | ||
| * Parses an SSE event and extracts fields. | ||
| * Currently extracts only the 'data' field. Per SSE spec, multiple data fields are joined with | ||
| * newlines. | ||
| * | ||
| * @param event the complete SSE event string (from blank line to blank line). | ||
| * @return parsed event with available fields populated. | ||
| */ | ||
| static ParsedEvent parseEvent(absl::string_view event); | ||
|
|
||
| /** | ||
| * Finds the end of the next SSE event in the buffer. | ||
| * An event ends with a blank line (two consecutive line breaks). | ||
| * Automatically handles UTF-8 BOM at the start of the stream. | ||
| * | ||
| * @param buffer the buffer to search for an event. | ||
| * @param end_stream whether this is the end of the stream (affects partial line handling). | ||
| * @return a tuple of {event_start, event_end, next_event_start} positions. | ||
| * event_start: where the event content begins (after BOM if present) | ||
| * event_end: where the event content ends (excluding trailing blank line) | ||
| * next_event_start: where to continue parsing for the next event | ||
| * Returns {npos, npos, npos} if no complete event is found. | ||
| */ | ||
| static std::tuple<size_t, size_t, size_t> findEventEnd(absl::string_view buffer, bool end_stream); | ||
|
|
||
| private: | ||
| /** | ||
| * Parses an SSE field line into {field_name, field_value}. | ||
| * Handles comments (lines starting with ':') and strips leading space from value. | ||
| * | ||
| * @param line a single line from an SSE event. | ||
| * @return a pair of {field_name, field_value}. Returns {"", ""} for empty lines or comments. | ||
| */ | ||
| static std::pair<absl::string_view, absl::string_view> parseFieldLine(absl::string_view line); | ||
|
|
||
| /** | ||
| * Finds the end of the current line, handling CR, LF, and CRLF line endings. | ||
| * Per SSE spec, all three line ending formats are supported. | ||
| * | ||
| * @param str the string to search for a line ending. | ||
| * @param end_stream whether this is the end of the stream (affects partial line handling). | ||
| * @return a pair of {line_end, next_line_start} positions. | ||
| * Returns {npos, npos} if no complete line is found. | ||
| */ | ||
| static std::pair<size_t, size_t> findLineEnd(absl::string_view str, bool end_stream); | ||
| }; | ||
|
|
||
| } // namespace Sse | ||
| } // namespace Http | ||
| } // namespace Envoy | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| load( | ||
| "//bazel:envoy_build_system.bzl", | ||
| "envoy_cc_fuzz_test", | ||
| "envoy_cc_test", | ||
| "envoy_package", | ||
| ) | ||
|
|
||
| licenses(["notice"]) # Apache 2 | ||
|
|
||
| envoy_package() | ||
|
|
||
| envoy_cc_test( | ||
| name = "sse_parser_test", | ||
| srcs = ["sse_parser_test.cc"], | ||
| deps = [ | ||
| "//source/common/http/sse:sse_parser_lib", | ||
| ], | ||
| ) | ||
|
|
||
| envoy_cc_fuzz_test( | ||
| name = "sse_parser_fuzz_test", | ||
| srcs = ["sse_parser_fuzz_test.cc"], | ||
| corpus = "sse_parser_corpus", | ||
| deps = [ | ||
| "//source/common/http/sse:sse_parser_lib", | ||
| ], | ||
| ) |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.