-
Notifications
You must be signed in to change notification settings - Fork 190
refactor: use BackendPid instead of BackendKeyData where secret is not required #1025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,221 @@ | ||
| use std::time::Duration; | ||
|
|
||
| use bytes::{BufMut, BytesMut}; | ||
| use rust::setup::{admin_tokio, connection_sqlx_direct}; | ||
| use sqlx::PgPool; | ||
| use tokio::{io::AsyncWriteExt, net::TcpStream, task::JoinHandle, time::timeout}; | ||
| use tokio_postgres::{CancelToken, Error as PgError, NoTls, SimpleQueryMessage}; | ||
|
|
||
| /// Returns whether `pid` has an active `pg_sleep` query visible in `pg_stat_activity`. | ||
| /// Uses a direct PostgreSQL connection so the result bypasses pgdog completely. | ||
| async fn is_sleeping(direct: &PgPool, pid: i32) -> bool { | ||
| let count: i64 = sqlx::query_scalar( | ||
| "SELECT COUNT(*) \ | ||
| FROM pg_stat_activity \ | ||
| WHERE pid = $1 \ | ||
| AND state = 'active' \ | ||
| AND query LIKE '%pg_sleep%'", | ||
| ) | ||
| .bind(pid) | ||
| .fetch_one(direct) | ||
| .await | ||
| .unwrap(); | ||
| count == 1 | ||
| } | ||
|
|
||
| /// Connect to pgdog, pin to a specific PG backend via BEGIN, capture the backend pid | ||
| /// via `pg_backend_pid()`, and launch `SELECT pg_sleep(60)` in a background task. | ||
| /// | ||
| /// `application_name` is embedded in the connection string so the caller can identify | ||
| /// this connection in `SHOW CLIENTS` if needed. | ||
| /// | ||
| /// Returns `(backend_pid, cancel_token, query_handle)`. The caller owns `cancel_token` | ||
| /// and `query_handle`; both must be driven to completion to keep the test clean. | ||
| async fn start_sleeping_connection( | ||
| application_name: &str, | ||
| ) -> ( | ||
| i32, | ||
| CancelToken, | ||
| JoinHandle<Result<Vec<SimpleQueryMessage>, PgError>>, | ||
| ) { | ||
| let (client, connection) = tokio_postgres::connect( | ||
| &format!( | ||
| "host=127.0.0.1 user=pgdog dbname=pgdog password=pgdog port=6432 application_name={application_name}" | ||
| ), | ||
| NoTls, | ||
| ) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| tokio::spawn(async move { | ||
| if let Err(e) = connection.await { | ||
| eprintln!("pgdog connection error: {}", e); | ||
| } | ||
| }); | ||
|
|
||
| let cancel_token = client.cancel_token(); | ||
|
|
||
| // BEGIN pins the client to one backend for the duration of the transaction. | ||
| // Without this, transaction-mode pooling may assign a different backend to | ||
| // pg_sleep than the one whose pid we captured. | ||
| client.simple_query("BEGIN").await.unwrap(); | ||
|
|
||
| let row = client | ||
| .query_one("SELECT pg_backend_pid()", &[]) | ||
| .await | ||
| .unwrap(); | ||
| let backend_pid: i32 = row.get(0); | ||
|
|
||
| let handle = tokio::spawn(async move { client.simple_query("SELECT pg_sleep(60)").await }); | ||
|
|
||
| (backend_pid, cancel_token, handle) | ||
| } | ||
|
|
||
| /// Assert that a query handle returned by `start_sleeping_connection` was cancelled: | ||
| /// it must resolve to SQLSTATE 57014 (canceling statement due to user request). | ||
| async fn assert_cancelled( | ||
| handle: JoinHandle<Result<Vec<SimpleQueryMessage>, PgError>>, | ||
| label: &str, | ||
| ) { | ||
| let result = timeout(Duration::from_secs(5), handle) | ||
| .await | ||
| .expect(&format!( | ||
| "{label}: cancelled query did not unblock within 5 seconds" | ||
| )) | ||
| .expect(&format!("{label}: task panicked")); | ||
|
|
||
| let err = result.expect_err(&format!( | ||
| "{label}: query should have been cancelled, but it succeeded" | ||
| )); | ||
| let db_err = err.as_db_error().expect(&format!( | ||
| "{label}: expected a PostgreSQL error, not a network error" | ||
| )); | ||
|
|
||
| assert_eq!( | ||
| db_err.code().code(), | ||
| "57014", | ||
| "{label}: expected SQLSTATE 57014, got {}", | ||
| db_err.code().code() | ||
| ); | ||
| } | ||
|
|
||
| /// Verify that cancellation is precise: two independent connections both run a long | ||
| /// query and each cancel request stops exactly one of them. | ||
| /// | ||
| /// Steps: | ||
| /// 1. Two clients connect through pgdog; each starts `SELECT pg_sleep(60)`. | ||
| /// 2. Both queries are confirmed active on specific PG backends via `pg_stat_activity`. | ||
| /// 3. Cancel connection 1 → only backend 1 stops; backend 2 remains active. | ||
| /// 4. Cancel connection 2 → backend 2 stops. | ||
| #[tokio::test] | ||
| async fn test_cancel_query() { | ||
| let direct = connection_sqlx_direct().await; | ||
|
|
||
| let (pid1, token1, handle1) = start_sleeping_connection("cancel_test").await; | ||
| let (pid2, token2, handle2) = start_sleeping_connection("cancel_test").await; | ||
|
|
||
| // Give both queries time to reach their respective backends. | ||
| tokio::time::sleep(Duration::from_millis(300)).await; | ||
|
|
||
| assert!( | ||
| is_sleeping(&direct, pid1).await, | ||
| "connection 1 (backend {pid1}) should be active before any cancel" | ||
| ); | ||
| assert!( | ||
| is_sleeping(&direct, pid2).await, | ||
| "connection 2 (backend {pid2}) should be active before any cancel" | ||
| ); | ||
|
|
||
| // ── Cancel connection 1 ──────────────────────────────────────────────── | ||
| token1.cancel_query(NoTls).await.unwrap(); | ||
|
|
||
| // Wait for the client to receive the cancellation error. | ||
| // By the time the handle resolves, the backend has already stopped. | ||
| assert_cancelled(handle1, "connection 1").await; | ||
|
|
||
| // Connection 1's backend is gone; connection 2 must still be running. | ||
| tokio::time::sleep(Duration::from_millis(100)).await; | ||
| assert!( | ||
| !is_sleeping(&direct, pid1).await, | ||
| "backend {pid1} should be idle after cancelling connection 1" | ||
| ); | ||
| assert!( | ||
| is_sleeping(&direct, pid2).await, | ||
| "backend {pid2} should still be active after cancelling connection 1 only" | ||
| ); | ||
|
|
||
| // ── Cancel connection 2 ──────────────────────────────────────────────── | ||
| token2.cancel_query(NoTls).await.unwrap(); | ||
|
|
||
| assert_cancelled(handle2, "connection 2").await; | ||
|
|
||
| tokio::time::sleep(Duration::from_millis(100)).await; | ||
| assert!( | ||
| !is_sleeping(&direct, pid2).await, | ||
| "backend {pid2} should be idle after cancelling connection 2" | ||
| ); | ||
| } | ||
|
|
||
| /// Verify that a cancel request carrying a wrong pid and secret is silently rejected: | ||
| /// the running query is unaffected and the client does not receive a cancellation error. | ||
| /// | ||
| /// pgdog's `verify_cancel` gate must reject the request before it reaches the pool, | ||
| /// so the backend continues executing as if nothing happened. | ||
| #[tokio::test] | ||
| async fn test_cancel_query_wrong_secret() { | ||
| let direct = connection_sqlx_direct().await; | ||
| let app_name = "cancel_test_wrong_secret"; | ||
| let (backend_pid, real_cancel_token, query_handle) = start_sleeping_connection(app_name).await; | ||
|
|
||
| // Give the query time to reach the backend. | ||
| tokio::time::sleep(Duration::from_millis(300)).await; | ||
|
|
||
| assert!( | ||
| is_sleeping(&direct, backend_pid).await, | ||
| "query should be running before wrong-secret cancel" | ||
| ); | ||
|
|
||
| // Look up the pgdog client pid from the admin interface. | ||
| // SHOW CLIENTS exposes the pid (the 'id' column) that pgdog assigned during login — | ||
| // the same value that was sent in the K message and that verify_cancel checks against. | ||
| let admin = admin_tokio().await; | ||
| let messages = admin.simple_query("SHOW CLIENTS").await.unwrap(); | ||
| let pgdog_pid: i32 = messages | ||
| .iter() | ||
| .filter_map(|m| match m { | ||
| SimpleQueryMessage::Row(row) => Some(row), | ||
| _ => None, | ||
| }) | ||
| .find(|row| row.get("application_name") == Some(app_name)) | ||
| .expect("connection should appear in SHOW CLIENTS") | ||
| .get("id") | ||
| .expect("id column should be present") | ||
| .parse() | ||
| .expect("id should be a valid i32"); | ||
|
|
||
| // Send a CancelRequest with the real pgdog client pid but a wrong secret. | ||
| // pgdog will find the client in comms by pid, then reject it because | ||
| // the secret doesn't match — verify_cancel returns false. | ||
| let mut raw = TcpStream::connect("127.0.0.1:6432").await.unwrap(); | ||
| let mut buf = BytesMut::new(); | ||
| buf.put_i32(16); // total message length (including the length field) | ||
| buf.put_i32(80877102); // CancelRequest magic code | ||
| buf.put_i32(pgdog_pid); // correct pid | ||
| buf.put_i32(0); // wrong secret | ||
| raw.write_all(&buf).await.unwrap(); | ||
| // pgdog closes the connection silently after processing; no response is sent. | ||
| drop(raw); | ||
|
|
||
| // Give pgdog enough time to receive and process the bogus cancel. | ||
| tokio::time::sleep(Duration::from_millis(300)).await; | ||
|
|
||
| // The query must still be running — the secret mismatch was caught by verify_cancel. | ||
| assert!( | ||
| is_sleeping(&direct, backend_pid).await, | ||
| "query should still be running after wrong-secret cancel — verify_cancel must have rejected it" | ||
| ); | ||
|
|
||
| // Clean up: cancel for real. | ||
| real_cancel_token.cancel_query(NoTls).await.unwrap(); | ||
| assert_cancelled(query_handle, "wrong-secret test cleanup").await; | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we still want to apply the
backendprovencance soDebugworks correctly. There are ambiguous message codes likeDwhich could be either frontend or backend message (Describe or DataRow)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yup, that's why I've added new Source::Internal variant to explicitly handle the case we were generate the message to the client, but we don't have the actual backend that resolved this, and the Internal is the default now.