Skip to content

Commit 4305dd4

Browse files
andygroveclaude
andauthored
refactor: use to_ident() instead of clone().into_ident() for borrowed Words (#2177)
Co-authored-by: Claude Opus 4.5 <[email protected]>
1 parent 614ea06 commit 4305dd4

File tree

2 files changed

+92
-15
lines changed

2 files changed

+92
-15
lines changed

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
use criterion::{criterion_group, criterion_main, Criterion};
1919
use sqlparser::dialect::GenericDialect;
20+
use sqlparser::keywords::Keyword;
2021
use sqlparser::parser::Parser;
22+
use sqlparser::tokenizer::{Span, Word};
2123

2224
fn basic_queries(c: &mut Criterion) {
2325
let mut group = c.benchmark_group("sqlparser-rs parsing benchmark");
@@ -82,5 +84,73 @@ fn basic_queries(c: &mut Criterion) {
8284
});
8385
}
8486

85-
criterion_group!(benches, basic_queries);
87+
/// Benchmark comparing `to_ident(&self)` vs `clone().into_ident(self)`.
88+
///
89+
/// Both approaches have equivalent performance since the String clone dominates.
90+
/// `to_ident()` is preferred for clearer code (one method call vs two).
91+
fn word_to_ident(c: &mut Criterion) {
92+
let mut group = c.benchmark_group("word_to_ident");
93+
94+
// Create Word instances with varying identifier lengths
95+
let words: Vec<Word> = (0..100)
96+
.map(|i| Word {
97+
value: format!("identifier_name_with_number_{i}"),
98+
quote_style: None,
99+
keyword: Keyword::NoKeyword,
100+
})
101+
.collect();
102+
let span = Span::empty();
103+
104+
// clone().into_ident(): clones entire Word struct, then moves the String value
105+
group.bench_function("clone_into_ident_100x", |b| {
106+
b.iter(|| {
107+
for w in &words {
108+
std::hint::black_box(w.clone().into_ident(span));
109+
}
110+
});
111+
});
112+
113+
// to_ident(): clones only the String value directly into the Ident
114+
group.bench_function("to_ident_100x", |b| {
115+
b.iter(|| {
116+
for w in &words {
117+
std::hint::black_box(w.to_ident(span));
118+
}
119+
});
120+
});
121+
122+
group.finish();
123+
}
124+
125+
/// Benchmark parsing queries with many identifiers to show real-world impact
126+
fn parse_many_identifiers(c: &mut Criterion) {
127+
let mut group = c.benchmark_group("parse_identifiers");
128+
let dialect = GenericDialect {};
129+
130+
// Query with many column references (identifiers)
131+
let many_columns = (0..100)
132+
.map(|n| format!("column_{n}"))
133+
.collect::<Vec<_>>()
134+
.join(", ");
135+
let query = format!("SELECT {many_columns} FROM my_table");
136+
137+
group.bench_function("select_100_columns", |b| {
138+
b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(&query)));
139+
});
140+
141+
// Query with many table.column references
142+
let qualified_columns = (0..100)
143+
.map(|n| format!("t{}.column_{n}", n % 5))
144+
.collect::<Vec<_>>()
145+
.join(", ");
146+
let query_qualified = format!("SELECT {qualified_columns} FROM t0, t1, t2, t3, t4");
147+
148+
group.bench_function("select_100_qualified_columns", |b| {
149+
b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(&query_qualified)));
150+
});
151+
152+
group.finish();
153+
}
154+
155+
criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers);
86156
criterion_main!(benches);

src/parser/mod.rs

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,7 +1460,7 @@ impl<'a> Parser<'a> {
14601460
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
14611461
{
14621462
Ok(Some(Expr::Function(Function {
1463-
name: ObjectName::from(vec![w.clone().into_ident(w_span)]),
1463+
name: ObjectName::from(vec![w.to_ident(w_span)]),
14641464
uses_odbc_syntax: false,
14651465
parameters: FunctionArguments::None,
14661466
args: FunctionArguments::None,
@@ -1475,7 +1475,7 @@ impl<'a> Parser<'a> {
14751475
| Keyword::CURRENT_DATE
14761476
| Keyword::LOCALTIME
14771477
| Keyword::LOCALTIMESTAMP => {
1478-
Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.clone().into_ident(w_span)]))?))
1478+
Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.to_ident(w_span)]))?))
14791479
}
14801480
Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
14811481
Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
@@ -1500,7 +1500,7 @@ impl<'a> Parser<'a> {
15001500
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
15011501
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
15021502
Keyword::POSITION if self.peek_token_ref().token == Token::LParen => {
1503-
Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?))
1503+
Ok(Some(self.parse_position_expr(w.to_ident(w_span))?))
15041504
}
15051505
Keyword::SUBSTR | Keyword::SUBSTRING => {
15061506
self.prev_token();
@@ -1522,7 +1522,7 @@ impl<'a> Parser<'a> {
15221522
let query = self.parse_query()?;
15231523
self.expect_token(&Token::RParen)?;
15241524
Ok(Some(Expr::Function(Function {
1525-
name: ObjectName::from(vec![w.clone().into_ident(w_span)]),
1525+
name: ObjectName::from(vec![w.to_ident(w_span)]),
15261526
uses_odbc_syntax: false,
15271527
parameters: FunctionArguments::None,
15281528
args: FunctionArguments::Subquery(query),
@@ -1572,7 +1572,7 @@ impl<'a> Parser<'a> {
15721572
) -> Result<Expr, ParserError> {
15731573
match self.peek_token().token {
15741574
Token::LParen if !self.peek_outer_join_operator() => {
1575-
let id_parts = vec![w.clone().into_ident(w_span)];
1575+
let id_parts = vec![w.to_ident(w_span)];
15761576
self.parse_function(ObjectName::from(id_parts))
15771577
}
15781578
// string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
@@ -1582,7 +1582,7 @@ impl<'a> Parser<'a> {
15821582
if w.value.starts_with('_') =>
15831583
{
15841584
Ok(Expr::Prefixed {
1585-
prefix: w.clone().into_ident(w_span),
1585+
prefix: w.to_ident(w_span),
15861586
value: self.parse_introduced_string_expr()?.into(),
15871587
})
15881588
}
@@ -1593,19 +1593,19 @@ impl<'a> Parser<'a> {
15931593
if w.value.starts_with('_') =>
15941594
{
15951595
Ok(Expr::Prefixed {
1596-
prefix: w.clone().into_ident(w_span),
1596+
prefix: w.to_ident(w_span),
15971597
value: self.parse_introduced_string_expr()?.into(),
15981598
})
15991599
}
16001600
Token::Arrow if self.dialect.supports_lambda_functions() => {
16011601
self.expect_token(&Token::Arrow)?;
16021602
Ok(Expr::Lambda(LambdaFunction {
1603-
params: OneOrManyWithParens::One(w.clone().into_ident(w_span)),
1603+
params: OneOrManyWithParens::One(w.to_ident(w_span)),
16041604
body: Box::new(self.parse_expr()?),
16051605
syntax: LambdaSyntax::Arrow,
16061606
}))
16071607
}
1608-
_ => Ok(Expr::Identifier(w.clone().into_ident(w_span))),
1608+
_ => Ok(Expr::Identifier(w.to_ident(w_span))),
16091609
}
16101610
}
16111611

@@ -12401,9 +12401,10 @@ impl<'a> Parser<'a> {
1240112401
pub fn parse_identifiers(&mut self) -> Result<Vec<Ident>, ParserError> {
1240212402
let mut idents = vec![];
1240312403
loop {
12404-
match &self.peek_token_ref().token {
12404+
let token = self.peek_token_ref();
12405+
match &token.token {
1240512406
Token::Word(w) => {
12406-
idents.push(w.clone().into_ident(self.peek_token_ref().span));
12407+
idents.push(w.to_ident(token.span));
1240712408
}
1240812409
Token::EOF | Token::Eq | Token::SemiColon => break,
1240912410
_ => {}
@@ -19203,8 +19204,11 @@ fn maybe_prefixed_expr(expr: Expr, prefix: Option<Ident>) -> Expr {
1920319204
}
1920419205

1920519206
impl Word {
19206-
#[deprecated(since = "0.54.0", note = "please use `into_ident` instead")]
19207-
/// Convert this word into an [`Ident`] identifier
19207+
/// Convert a reference to this word into an [`Ident`] by cloning the value.
19208+
///
19209+
/// Use this method when you need to keep the original `Word` around.
19210+
/// If you can consume the `Word`, prefer [`into_ident`](Self::into_ident) instead
19211+
/// to avoid cloning.
1920819212
pub fn to_ident(&self, span: Span) -> Ident {
1920919213
Ident {
1921019214
value: self.value.clone(),
@@ -19213,7 +19217,10 @@ impl Word {
1921319217
}
1921419218
}
1921519219

19216-
/// Convert this word into an [`Ident`] identifier
19220+
/// Convert this word into an [`Ident`] identifier, consuming the `Word`.
19221+
///
19222+
/// This avoids cloning the string value. If you need to keep the original
19223+
/// `Word`, use [`to_ident`](Self::to_ident) instead.
1921719224
pub fn into_ident(self, span: Span) -> Ident {
1921819225
Ident {
1921919226
value: self.value,

0 commit comments

Comments
 (0)