@@ -3,7 +3,7 @@ use nom::branch::alt;
33// Using STREAMING parsers - they return Incomplete when they need more data
44// This enables TRUE bounded-memory streaming
55use nom:: bytes:: streaming:: {
6- tag, tag_no_case, take, take_until, take_while, take_while1, take_while_m_n,
6+ tag, tag_no_case, take, take_till , take_until, take_while, take_while1, take_while_m_n,
77} ;
88use nom:: character:: streaming:: { alpha1, multispace0, multispace1} ;
99use nom:: combinator:: { map, map_res, not, opt, peek, recognize} ;
@@ -245,7 +245,7 @@ fn esi_assign<'a>(
245245 alt ( ( esi_assign_short, |i| esi_assign_long ( original, i) ) ) ( input)
246246}
247247
248- fn parse_assign_attributes_short ( attrs : Vec < ( String , String ) > ) -> Vec < Element > {
248+ fn assign_attributes_short ( attrs : Vec < ( String , String ) > ) -> Vec < Element > {
249249 let mut name = String :: new ( ) ;
250250 let mut value_str = String :: new ( ) ;
251251 for ( key, val) in attrs {
@@ -269,7 +269,7 @@ fn parse_assign_attributes_short(attrs: Vec<(String, String)>) -> Vec<Element> {
269269 vec ! [ Element :: Esi ( Tag :: Assign { name, value } ) ]
270270}
271271
272- fn parse_assign_long ( attrs : Vec < ( String , String ) > , content : Vec < Element > ) -> Vec < Element > {
272+ fn assign_long ( attrs : Vec < ( String , String ) > , content : Vec < Element > ) -> Vec < Element > {
273273 let mut name = String :: new ( ) ;
274274 for ( key, val) in attrs {
275275 if key == "name" {
@@ -316,7 +316,7 @@ fn esi_assign_short(input: &[u8]) -> IResult<&[u8], Vec<Element>, Error<&[u8]>>
316316 attributes,
317317 preceded ( multispace0, self_closing) ,
318318 ) ,
319- parse_assign_attributes_short ,
319+ assign_attributes_short ,
320320 ) ( input)
321321}
322322
@@ -334,7 +334,7 @@ fn esi_assign_long<'a>(
334334 |i| parse_interpolated ( original, i) ,
335335 tag ( b"</esi:assign>" ) ,
336336 ) ) ,
337- |( attrs, content, _) | parse_assign_long ( attrs, content) ,
337+ |( attrs, content, _) | assign_long ( attrs, content) ,
338338 ) ( input)
339339}
340340
@@ -448,9 +448,7 @@ fn esi_when<'a>(
448448 ) ( input)
449449}
450450
451- // Removed - use parse_complete() directly for delimited content
452-
453- // Zero-copy version used by both esi_tag and esi_tag_old (via parse_interpolated)
451+ /// Zero-copy parser for <esi:choose>...</esi:choose>
454452fn esi_choose < ' a > (
455453 original : & Bytes ,
456454 input : & ' a [ u8 ] ,
@@ -731,6 +729,11 @@ fn single_quote(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> {
731729 tag ( b"\' " ) ( input)
732730}
733731
732+ #[ inline]
733+ fn is_closing_bracket ( b : u8 ) -> bool {
734+ b == b'>'
735+ }
736+
734737#[ inline]
735738fn is_double_quote ( b : u8 ) -> bool {
736739 b == b'\"'
@@ -807,10 +810,14 @@ fn tag_handler<'a>(
807810 _ if name. eq_ignore_ascii_case ( b"script" ) => html_script_tag ( original, start) ,
808811
809812 // Regular HTML tag - continue parsing from where we left off
810- // (we've already consumed `<tagname`, just need to find `>`)
811813 _ => {
812- let ( input, _) = take_until ( b">" . as_ref ( ) ) ( input) ?;
814+ // we've already consumed `<tagname`, let's find `>`
815+ // Consume everything up to '>'
816+ let ( input, _) = take_till ( is_closing_bracket) ( input) ?;
817+ // Consume the '>' itself
813818 let ( input, _) = closing_bracket ( input) ?;
819+
820+ // Calculate the full tag from start (includes `<tagname...>`)
814821 let full_tag = & start[ ..start. len ( ) - input. len ( ) ] ;
815822
816823 Ok ( (
@@ -845,43 +852,35 @@ fn script_content(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> {
845852}
846853
847854/// script tag parser - input starts at <script
855+ /// Treats all script tags (inline and external) as HTML elements
848856fn html_script_tag < ' a > (
849857 original : & Bytes ,
850858 input : & ' a [ u8 ] ,
851859) -> IResult < & ' a [ u8 ] , Vec < Element > , Error < & ' a [ u8 ] > > {
852860 let start = input;
853- let ( input, _) = tag_no_case ( b"<script" ) ( input) ?;
854- let ( input, attrs) = attributes ( input) ?;
855- let ( input, _) = closing_bracket ( input) ?;
856- let opening = & start[ ..start. len ( ) - input. len ( ) ] ;
857-
858- let has_src = attrs. iter ( ) . any ( |( k, _) | k == "src" ) ;
859-
860- if has_src {
861- // External script - return just the opening tag as HTML
862- return Ok ( (
863- input,
864- vec ! [ Element :: Html ( slice_as_bytes( original, opening) ) ] ,
865- ) ) ;
866- }
867861
868- // Inline script - find closing </script> tag (case insensitive)
869- let ( input, content) = script_content ( input) ?;
862+ // Parse opening tag
863+ let ( input, _) = recognize ( delimited (
864+ tag_no_case ( b"<script" ) ,
865+ take_till ( is_closing_bracket) ,
866+ closing_bracket,
867+ ) ) ( input) ?;
870868
871- // Parse closing tag
872- let closing_start = input;
873- let ( input, _) = tag_no_case ( b"</script" ) ( input) ?;
874- let ( input, _) = multispace0 ( input) ?;
875- let ( input, _) = closing_bracket ( input) ?;
876- let closing = & closing_start[ ..closing_start. len ( ) - input. len ( ) ] ;
869+ // Parse content (if any) and closing tag (if any)
870+ let ( input, _) = opt ( tuple ( (
871+ script_content,
872+ recognize ( delimited (
873+ tag_no_case ( b"</script" ) ,
874+ multispace0,
875+ closing_bracket,
876+ ) ) ,
877+ ) ) ) ( input) ?;
877878
879+ // Return entire script tag as single HTML element
880+ let full_script = & start[ ..start. len ( ) - input. len ( ) ] ;
878881 Ok ( (
879882 input,
880- vec ! [
881- Element :: Html ( slice_as_bytes( original, opening) ) ,
882- Element :: Text ( slice_as_bytes( original, content) ) ,
883- Element :: Html ( slice_as_bytes( original, closing) ) ,
884- ] ,
883+ vec ! [ Element :: Html ( slice_as_bytes( original, full_script) ) ] ,
885884 ) )
886885}
887886
@@ -930,18 +929,18 @@ fn is_lower_alphanumeric_or_underscore(c: u8) -> bool {
930929 c. is_ascii_lowercase ( ) || c. is_ascii_digit ( ) || c == b'_'
931930}
932931
933- fn fn_name ( input : & [ u8 ] ) -> IResult < & [ u8 ] , String , Error < & [ u8 ] > > {
932+ fn esi_fn_name ( input : & [ u8 ] ) -> IResult < & [ u8 ] , String , Error < & [ u8 ] > > {
934933 map (
935934 preceded ( tag ( b"$" ) , take_while1 ( is_lower_alphanumeric_or_underscore) ) ,
936935 bytes_to_string,
937936 ) ( input)
938937}
939938
940- fn var_name ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
939+ fn esi_var_name ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
941940 map (
942941 tuple ( (
943942 take_while1 ( is_alphanumeric_or_underscore) ,
944- opt ( delimited ( tag ( b"{" ) , var_key_expr , tag ( b"}" ) ) ) ,
943+ opt ( delimited ( tag ( b"{" ) , esi_var_key_expr , tag ( b"}" ) ) ) ,
945944 opt ( preceded ( tag ( b"|" ) , fn_nested_argument) ) ,
946945 ) ) ,
947946 |( name, key, default) : ( & [ u8 ] , _ , _ ) | {
@@ -964,7 +963,11 @@ fn not_dollar_or_curlies(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> {
964963// TODO: handle escaping
965964fn single_quoted_string ( input : & [ u8 ] ) -> IResult < & [ u8 ] , String , Error < & [ u8 ] > > {
966965 map (
967- delimited ( tag ( b"'" ) , take_while ( |c| c != b'\'' ) , tag ( b"'" ) ) ,
966+ delimited (
967+ single_quote,
968+ take_while ( |c| !is_single_quote ( c) ) ,
969+ single_quote,
970+ ) ,
968971 bytes_to_string,
969972 ) ( input)
970973}
@@ -996,11 +999,11 @@ fn var_key(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> {
996999 ) ) ( input)
9971000}
9981001
999- // Parse subscript key - can be a string or a nested variable expression
1000- fn var_key_expr ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
1002+ /// Parse subscript key - can be a string or a nested variable expression
1003+ fn esi_var_key_expr ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
10011004 alt ( (
10021005 // Try to parse as a variable first (e.g., $(keyVar))
1003- variable ,
1006+ esi_variable ,
10041007 // Otherwise parse as a string
10051008 map ( var_key, |s : String | Expr :: String ( Some ( s) ) ) ,
10061009 ) ) ( input)
@@ -1020,7 +1023,7 @@ fn fn_argument(input: &[u8]) -> IResult<&[u8], Vec<Expr>, Error<&[u8]>> {
10201023}
10211024
10221025fn fn_nested_argument ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
1023- alt ( ( call , variable , string, integer, bareword) ) ( input)
1026+ alt ( ( esi_function , esi_variable , string, integer, bareword) ) ( input)
10241027}
10251028
10261029fn integer ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
@@ -1040,9 +1043,9 @@ fn bareword(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> {
10401043 ) ( input)
10411044}
10421045
1043- fn call ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
1046+ fn esi_function ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
10441047 let ( input, parsed) = tuple ( (
1045- fn_name ,
1048+ esi_fn_name ,
10461049 delimited (
10471050 terminated ( tag ( b"(" ) , multispace0) ,
10481051 fn_argument,
@@ -1055,8 +1058,8 @@ fn call(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> {
10551058 Ok ( ( input, Expr :: Call ( name, args) ) )
10561059}
10571060
1058- fn variable ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
1059- delimited ( tag ( b"$(" ) , var_name , tag ( b")" ) ) ( input)
1061+ fn esi_variable ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
1062+ delimited ( tag ( b"$(" ) , esi_var_name , tag ( b")" ) ) ( input)
10601063}
10611064
10621065fn operator ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Operator , Error < & [ u8 ] > > {
@@ -1076,7 +1079,9 @@ fn operator(input: &[u8]) -> IResult<&[u8], Operator, Error<&[u8]>> {
10761079}
10771080
10781081fn interpolated_expression ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Vec < Element > , Error < & [ u8 ] > > {
1079- map ( alt ( ( call, variable) ) , |expr| vec ! [ Element :: Expr ( expr) ] ) ( input)
1082+ map ( alt ( ( esi_function, esi_variable) ) , |expr| {
1083+ vec ! [ Element :: Expr ( expr) ]
1084+ } ) ( input)
10801085}
10811086
10821087fn primary_expr ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Expr , Error < & [ u8 ] > > {
@@ -1093,8 +1098,8 @@ fn primary_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> {
10931098 tag ( b")" ) ,
10941099 ) ,
10951100 // Parse basic expressions
1096- call ,
1097- variable ,
1101+ esi_function ,
1102+ esi_variable ,
10981103 integer,
10991104 string,
11001105 ) ) ( input)
@@ -1164,7 +1169,7 @@ mod tests {
11641169 }
11651170
11661171 #[ test]
1167- fn test_new_parse ( ) {
1172+ fn test_parse ( ) {
11681173 let input = br#"
11691174<a>foo</a>
11701175<bar />
@@ -1221,35 +1226,33 @@ exception!
12211226 }
12221227 }
12231228 #[ test]
1224- fn test_new_parse_script ( ) {
1229+ fn test_parse_script ( ) {
12251230 let input = b"<sCripT> less < more </scRIpt>" ;
12261231 let bytes = Bytes :: from_static ( input) ;
12271232 let ( rest, x) = html_script_tag ( & bytes, input) . unwrap ( ) ;
12281233 assert_eq ! ( rest. len( ) , 0 ) ;
12291234 assert_eq ! (
12301235 x,
1231- [
1232- Element :: Html ( Bytes :: from_static( b"<sCripT>" ) ) ,
1233- Element :: Text ( Bytes :: from_static( b" less < more " ) ) ,
1234- Element :: Html ( Bytes :: from_static( b"</scRIpt>" ) )
1235- ]
1236+ [ Element :: Html ( Bytes :: from_static(
1237+ b"<sCripT> less < more </scRIpt>"
1238+ ) ) ]
12361239 ) ;
12371240 }
12381241 #[ test]
1239- fn test_new_parse_script_with_src ( ) {
1240- let input = b"<sCripT src=\" whatever\" >" ;
1242+ fn test_parse_script_with_src ( ) {
1243+ let input = b"<sCripT src=\" whatever\" ></sCripT> " ;
12411244 let bytes = Bytes :: from_static ( input) ;
1242- let ( rest, x) = parse_complete ( & bytes) . unwrap ( ) ;
1245+ let ( rest, x) = html_script_tag ( & bytes, input ) . unwrap ( ) ;
12431246 assert_eq ! ( rest. len( ) , 0 ) ;
12441247 assert_eq ! (
12451248 x,
12461249 [ Element :: Html ( Bytes :: from_static(
1247- b"<sCripT src=\" whatever\" >"
1250+ b"<sCripT src=\" whatever\" ></sCripT> "
12481251 ) ) ]
12491252 ) ;
12501253 }
12511254 #[ test]
1252- fn test_new_parse_esi_vars_short ( ) {
1255+ fn test_parse_esi_vars_short ( ) {
12531256 let input = br#"<esi:vars name="$(hello)"/>"# ;
12541257 let bytes = Bytes :: from_static ( input) ;
12551258 let ( rest, x) = esi_vars ( & bytes, input) . unwrap ( ) ;
@@ -1264,7 +1267,7 @@ exception!
12641267 ) ;
12651268 }
12661269 #[ test]
1267- fn test_new_parse_esi_vars_long ( ) {
1270+ fn test_parse_esi_vars_long ( ) {
12681271 // Nested <esi:vars> tags are not supported to prevent infinite recursion
12691272 // The inner <esi:vars> tags should be treated as plain text/HTML
12701273 let input = br#"<esi:vars>hello<br></esi:vars>"# ;
@@ -1297,7 +1300,7 @@ exception!
12971300 ) ;
12981301 }
12991302 #[ test]
1300- fn test_new_parse_complex_expr ( ) {
1303+ fn test_parse_complex_expr ( ) {
13011304 let input = br#"<esi:vars name="$call('hello') matches $(var{'key'})"/>"# ;
13021305 let bytes = Bytes :: from_static ( input) ;
13031306 let ( rest, x) = parse_complete ( & bytes) . unwrap ( ) ;
@@ -1396,15 +1399,15 @@ exception!
13961399 }
13971400
13981401 #[ test]
1399- fn test_new_parse_plain_text ( ) {
1402+ fn test_parse_plain_text ( ) {
14001403 let input = b"hello\n there" ;
14011404 let bytes = Bytes :: from_static ( input) ;
14021405 let ( rest, x) = parse_complete ( & bytes) . unwrap ( ) ;
14031406 assert_eq ! ( rest. len( ) , 0 ) ;
14041407 assert_eq ! ( x, [ Element :: Text ( Bytes :: from_static( b"hello\n there" ) ) ] ) ;
14051408 }
14061409 #[ test]
1407- fn test_new_parse_interpolated ( ) {
1410+ fn test_parse_interpolated ( ) {
14081411 let input = b"hello $(foo)<esi:vars>goodbye $(foo)</esi:vars>" ;
14091412 let bytes = Bytes :: from_static ( input) ;
14101413 let ( rest, x) = parse_complete ( & bytes) . unwrap ( ) ;
@@ -1419,7 +1422,7 @@ exception!
14191422 ) ;
14201423 }
14211424 #[ test]
1422- fn test_new_parse_examples ( ) {
1425+ fn test_parse_examples ( ) {
14231426 let input = include_bytes ! ( "../../examples/esi_vars_example/src/index.html" ) ;
14241427 let bytes = Bytes :: from_static ( input) ;
14251428 let ( rest, _) = parse_complete ( & bytes) . unwrap ( ) ;
0 commit comments