From 20876ace7e3bcbe361de4170f470d02b34acbd0c Mon Sep 17 00:00:00 2001 From: Joao Henrique Machado Silva Date: Thu, 11 Jun 2026 16:28:38 +0200 Subject: [PATCH] fix(sql): validate table qualifiers in FTS function column args (SQLR-15) fts_match() / bm25_score() extract their column argument syntactically (they need the column name to find the index, not its value), so the qualifier never passed through RowScope::lookup and SQLR-14's check. A bogus qualifier was silently dropped: fts_match(bogus.body, 'q') ran as fts_match(body, 'q'). Add RowScope::scope_name() (Some(name) for single-table scope, None for joined / group-row scopes) and run check_single_scope_qualifier in resolve_fts_args when the column arg is a CompoundIdentifier. Error wording matches SQLR-14. 3+-part identifiers now error like the main evaluator instead of being silently truncated. Audited the other syntactic extraction sites: JSON and vec_distance helpers evaluate args through eval_expr_scope (already covered); pager catalog re-parses are trusted engine-written SQL. Co-Authored-By: Claude Fable 5 --- docs/fts.md | 4 +- docs/sql-engine.md | 2 +- docs/supported-sql.md | 2 +- src/sql/executor.rs | 47 ++++++++++++++-- src/sql/mod.rs | 121 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 169 insertions(+), 7 deletions(-) diff --git a/docs/fts.md b/docs/fts.md index d2edd61..252f2b5 100644 --- a/docs/fts.md +++ b/docs/fts.md @@ -105,6 +105,8 @@ The function requires a TEXT column with an FTS index attached. Without one, it fts_match(body, ...): no FTS index on column 'body' (run CREATE INDEX ON USING fts(body) first) ``` +The column argument may be qualified (`fts_match(docs.body, 'q')` or the FROM alias). The qualifier is validated the same way as any column reference (SQLR-15): it must name the table in scope, else the query errors with `unknown table qualifier`. + This contrasts with SQLite's `MATCH` operator, which is parser-level; SQLRite uses a function-call shape because `sqlparser`'s SQLite dialect doesn't expose a `BinaryOperator::Match` variant we can dispatch on ([Q1](phase-8-plan.md#q1-match-operator-syntax)). ### `bm25_score(col, 'q')` @@ -119,7 +121,7 @@ Notes: - **`DESC`** is the conventional direction. `ASC` works (returns least-relevant first) but disables the optimizer probe — see [Optimizer hook](#optimizer-hook). - **Same query string** must appear in `WHERE fts_match(...)` and `ORDER BY bm25_score(...)`. The optimizer recognizes the joint pattern; mismatched strings fall through to a slow per-row evaluation. -- **Same caveats as `fts_match`:** requires an FTS index on the column; errors clearly otherwise. +- **Same caveats as `fts_match`:** requires an FTS index on the column; errors clearly otherwise. Qualified column args (`bm25_score(docs.body, …)`) are validated against the table in scope (SQLR-15) — note the optimizer probe only matches a *bare* column identifier, so a qualified arg always takes the per-row evaluation path. The math is the standard BM25+ variant (`k1 = 1.5`, `b = 0.75`, fixed at SQLite FTS5 defaults). Full formula in [`src/sql/fts/bm25.rs`](../src/sql/fts/bm25.rs). diff --git a/docs/sql-engine.md b/docs/sql-engine.md index 0471052..3cbcdd0 100644 --- a/docs/sql-engine.md +++ b/docs/sql-engine.md @@ -82,7 +82,7 @@ match query { - `Expr::Nested(inner)` → recurse - `Expr::Identifier(ident)` → look up `ident.value` on the table at the given rowid -- `Expr::CompoundIdentifier(parts)` → same, after validating the qualifier against the one table in scope — `scope_name` is the FROM alias when declared, else the table name, and anything else errors with `unknown table qualifier` (SQLR-14, matching the joined scope's behavior) +- `Expr::CompoundIdentifier(parts)` → same, after validating the qualifier against the one table in scope — `scope_name` is the FROM alias when declared, else the table name, and anything else errors with `unknown table qualifier` (SQLR-14, matching the joined scope's behavior). Functions that extract a column *name* syntactically instead of resolving a value — `fts_match` / `bm25_score`'s first argument — run the same check via `RowScope::scope_name()` (SQLR-15) - `Expr::Value(v)` → convert a sqlparser literal to a runtime `Value` - `Expr::UnaryOp { op, expr }` → recurse on inner, apply `+` / `-` / `NOT` - `Expr::BinaryOp { left, op, right }` → recurse on both sides, apply the operator diff --git a/docs/supported-sql.md b/docs/supported-sql.md index 1849fbf..0df377d 100644 --- a/docs/supported-sql.md +++ b/docs/supported-sql.md @@ -202,7 +202,7 @@ COUNT([DISTINCT] ) -- counts non-NULL values, option ### What works - **Projection**: `*` (all columns in declaration order), a bare column list, or an explicit list mixing bare columns and aggregate calls. Each item can carry an optional `AS alias` (the alias becomes the output column header and is recognized by `ORDER BY`). -- **Qualified column references** (SQLR-14): `t.col` works in the projection, `WHERE`, `GROUP BY`, `ORDER BY`, and aggregate arguments, but the qualifier must name the table in scope — the `FROM` alias when one is declared, else the table name (ASCII case-insensitive). Anything else errors with `unknown table qualifier`, matching the joined-scope behavior and SQLite. Declaring an alias removes the table name from scope (`SELECT t.id FROM t AS a` errors), and the same validation applies to `UPDATE` / `DELETE` (including alias forms `UPDATE t AS a …` / `DELETE FROM t AS a …`). +- **Qualified column references** (SQLR-14): `t.col` works in the projection, `WHERE`, `GROUP BY`, `ORDER BY`, and aggregate arguments, but the qualifier must name the table in scope — the `FROM` alias when one is declared, else the table name (ASCII case-insensitive). Anything else errors with `unknown table qualifier`, matching the joined-scope behavior and SQLite. Declaring an alias removes the table name from scope (`SELECT t.id FROM t AS a` errors), and the same validation applies to `UPDATE` / `DELETE` (including alias forms `UPDATE t AS a …` / `DELETE FROM t AS a …`). The column argument of `fts_match` / `bm25_score` gets the same check (SQLR-15) — `fts_match(bogus.body, 'q')` errors instead of silently dropping the qualifier. - **`WHERE`**: any [expression](#expressions). Evaluated per row; NULL-as-false in WHERE context (three-valued logic collapsed to two-valued for filtering). Includes **`IS NULL`** / **`IS NOT NULL`** for explicit null tests, **`LIKE` / `NOT LIKE` / `ILIKE`** for pattern matching, and **`IN (list) / NOT IN (list)`** for set-membership against literal lists. - **`DISTINCT`**: `SELECT DISTINCT` deduplicates result rows after projection (and after aggregation, when both apply). `NULL` values compare equal to other `NULL`s for dedupe, matching SQL's DISTINCT semantic. - **`GROUP BY`**: one or more column names, optionally qualified (`GROUP BY customers.name`) — the qualifier disambiguates same-named columns across joined tables (SQLR-6). Every non-aggregate item in the projection must appear in the `GROUP BY` list (rejected with a clear message — by the parser for single-table queries, by the executor for joined ones, where resolving qualifiers needs the schemas). `GROUP BY ` without any aggregate behaves like an implicit `DISTINCT `. diff --git a/src/sql/executor.rs b/src/sql/executor.rs index 8b9c8ab..3636cfd 100644 --- a/src/sql/executor.rs +++ b/src/sql/executor.rs @@ -63,6 +63,14 @@ pub(crate) trait RowScope { /// single-table path; calling them from a joined query produces /// a `NotImplemented` error rather than wrong results. fn single_table_view(&self) -> Option<(&Table, i64)>; + + /// The user-visible name a `t.col` qualifier must match in this + /// scope (FROM alias if declared, else table name), or `None` when + /// no single name applies (joined / group-row scopes). SQLR-15 — + /// lets helpers that extract a column *name* syntactically (rather + /// than resolving a value through `lookup`) run the same SQLR-14 + /// qualifier check as plain column references. + fn scope_name(&self) -> Option<&str>; } /// The default scope for non-join queries: one table, one rowid. @@ -107,6 +115,10 @@ impl RowScope for SingleTableScope<'_> { fn single_table_view(&self) -> Option<(&Table, i64)> { Some((self.table, self.rowid)) } + + fn scope_name(&self) -> Option<&str> { + Some(self.scope_name) + } } /// One table participating in a joined query, plus the user-visible @@ -184,6 +196,12 @@ impl RowScope for JoinedScope<'_> { fn single_table_view(&self) -> Option<(&Table, i64)> { None } + + fn scope_name(&self) -> Option<&str> { + // Qualified references in joined scope resolve per-table in + // `lookup`; there is no single name to check against. + None + } } /// SQLR-14 — reject a `q.col` qualifier that doesn't name the single @@ -2839,10 +2857,25 @@ fn resolve_fts_args<'t>( }; let col_name = match col_expr { Expr::Identifier(ident) => ident.value.clone(), - Expr::CompoundIdentifier(parts) => parts - .last() - .map(|p| p.value.clone()) - .ok_or_else(|| SQLRiteError::Internal("empty compound identifier".to_string()))?, + // SQLR-15 — a `t.col` argument extracts the column *name* + // without going through `RowScope::lookup`, so it must run the + // SQLR-14 qualifier check itself: `fts_match(bogus.body, …)` + // errors instead of silently dropping the `bogus.`. + Expr::CompoundIdentifier(parts) => match parts.as_slice() { + [only] => only.value.clone(), + [q, c] => { + if let Some(scope_name) = scope.scope_name() { + check_single_scope_qualifier(Some(&q.value), scope_name, &c.value)?; + } + c.value.clone() + } + _ => { + return Err(SQLRiteError::NotImplemented(format!( + "compound identifier with {} parts is not supported", + parts.len() + ))); + } + }, other => { return Err(SQLRiteError::General(format!( "{fn_name}() argument 0 must be a column reference, got {other:?}" @@ -3649,6 +3682,12 @@ impl RowScope for GroupRowScope<'_> { fn single_table_view(&self) -> Option<(&Table, i64)> { None } + + fn scope_name(&self) -> Option<&str> { + // Output rows carry no table qualifier — `lookup` ignores + // qualifiers entirely, so there is nothing to validate. + None + } } /// Rewrite a HAVING expression for group-row evaluation: every diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 806663c..29b4250 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -2314,6 +2314,127 @@ mod tests { assert!(resp.contains("3 rows returned"), "got: {resp}"); } + // ----------------------------------------------------------------- + // SQLR-15 — table qualifiers in FTS function column args are + // validated like plain column references (SQLR-14). Before the + // fix, `fts_match(bogus.body, …)` silently dropped the `bogus.`. + // ----------------------------------------------------------------- + + /// Assert `sql` fails with the SQLR-14/15 unknown-qualifier message. + fn assert_fts_unknown_qualifier(db: &mut Database, sql: &str, qualifier: &str) { + let err = process_command(sql, db) + .expect_err("a bogus qualifier in an FTS function arg must error, not be ignored"); + let msg = format!("{err}"); + assert!( + msg.contains(&format!("unknown table qualifier '{qualifier}'")), + "expected unknown-qualifier error for `{sql}`, got: {msg}" + ); + } + + #[test] + fn fts_match_with_matching_table_qualifier_works() { + let mut db = seed_fts_table(); + process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap(); + let resp = process_command( + "SELECT id FROM docs WHERE fts_match(docs.body, 'rust');", + &mut db, + ) + .unwrap(); + assert!(resp.contains("3 rows returned"), "got: {resp}"); + // Case-insensitive match, same as plain column references. + let resp = process_command( + "SELECT id FROM docs WHERE fts_match(DOCS.body, 'rust');", + &mut db, + ) + .unwrap(); + assert!(resp.contains("3 rows returned"), "got: {resp}"); + } + + #[test] + fn fts_match_with_matching_alias_qualifier_works() { + let mut db = seed_fts_table(); + process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap(); + let resp = process_command( + "SELECT id FROM docs AS d WHERE fts_match(d.body, 'rust');", + &mut db, + ) + .unwrap(); + assert!(resp.contains("3 rows returned"), "got: {resp}"); + } + + #[test] + fn fts_match_alias_shadows_table_name_as_qualifier() { + // Once `FROM docs AS d` declares an alias, the alias is the + // only valid qualifier — `docs.body` errors (SQLite semantics, + // mirrors the SQLR-14 plain-column tests). + let mut db = seed_fts_table(); + process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap(); + assert_fts_unknown_qualifier( + &mut db, + "SELECT id FROM docs AS d WHERE fts_match(docs.body, 'rust');", + "docs", + ); + } + + #[test] + fn fts_match_unknown_qualifier_in_where_errors() { + let mut db = seed_fts_table(); + process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap(); + assert_fts_unknown_qualifier( + &mut db, + "SELECT id FROM docs WHERE fts_match(bogus.body, 'rust');", + "bogus", + ); + } + + #[test] + fn bm25_score_unknown_qualifier_in_order_by_errors() { + // ORDER BY position exercises the scalar-eval path: the FTS + // probe only matches bare identifiers, so the qualified arg + // falls through to resolve_fts_args. + let mut db = seed_fts_table(); + process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap(); + assert_fts_unknown_qualifier( + &mut db, + "SELECT id FROM docs ORDER BY bm25_score(bogus.body, 'rust') DESC LIMIT 1;", + "bogus", + ); + } + + #[test] + fn bm25_score_unknown_qualifier_in_where_errors() { + // (Projection position can't reach the FTS helper — scalar + // functions are rejected in the projection list outright, so + // WHERE-comparison is the other scalar-eval position.) + let mut db = seed_fts_table(); + process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap(); + assert_fts_unknown_qualifier( + &mut db, + "SELECT id FROM docs WHERE bm25_score(bogus.body, 'rust') > 0.0;", + "bogus", + ); + } + + #[test] + fn bm25_score_with_matching_qualifier_still_ranks_correctly() { + // Qualified arg bypasses the FTS probe (bare-identifier match + // only) — the scalar path must produce the same top result. + let mut db = seed_fts_table(); + process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap(); + let out = process_command_with_render( + "SELECT id FROM docs WHERE fts_match(docs.body, 'rust') \ + ORDER BY bm25_score(docs.body, 'rust') DESC LIMIT 1;", + &mut db, + ) + .unwrap(); + assert!(out.status.contains("1 row returned"), "got: {}", out.status); + let rendered = out.rendered.expect("SELECT should produce rendered output"); + assert!( + rendered.contains(" 5 "), + "expected id=5 to be top-ranked; rendered:\n{rendered}" + ); + } + // ----------------------------------------------------------------- // Phase 7b — vector distance functions through process_command // -----------------------------------------------------------------