-
Notifications
You must be signed in to change notification settings - Fork 48
auctioneer: reconnect on EOF instead of silently leaving the subscribe stream dead #518
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+237
−18
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,205 @@ | ||
| package auctioneer | ||
|
|
||
| import ( | ||
| "errors" | ||
| "io" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/lightninglabs/pool/auctioneerrpc" | ||
| "google.golang.org/grpc" | ||
| "google.golang.org/grpc/codes" | ||
| "google.golang.org/grpc/status" | ||
| ) | ||
|
|
||
| // TestJitterBackoffBounds samples the jitter helper for a typical configured | ||
| // backoff and asserts results fall in the expected [backoff, backoff + | ||
| // backoff/4] range and aren't pinned to a single value. | ||
| func TestJitterBackoffBounds(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| const ( | ||
| base = 5 * time.Second | ||
| samples = 200 | ||
| ) | ||
| seen := make(map[time.Duration]struct{}, samples) | ||
| for i := 0; i < samples; i++ { | ||
| got := jitterBackoff(base) | ||
| if got < base || got > base+base/4 { | ||
| t.Fatalf("jitterBackoff(%v) = %v, out of [%v, %v]", | ||
| base, got, base, base+base/4) | ||
| } | ||
| seen[got] = struct{}{} | ||
| } | ||
|
|
||
| // In 200 samples over a 1.25s window of nanosecond resolution we | ||
| // expect many distinct values. If we get only a handful, jitter is | ||
| // broken. | ||
| if len(seen) < 10 { | ||
| t.Fatalf("expected diverse jitter samples, "+ | ||
| "only got %d unique values", len(seen)) | ||
| } | ||
| } | ||
|
|
||
| // fakeServerStream is a minimal implementation of | ||
| // ChannelAuctioneer_SubscribeBatchAuctionClient that returns predetermined | ||
| // results from Recv. It is only sufficient for driving the client's read loop. | ||
| type fakeServerStream struct { | ||
| grpc.ClientStream | ||
|
|
||
| recv chan recvResult | ||
| } | ||
|
|
||
| type recvResult struct { | ||
| msg *auctioneerrpc.ServerAuctionMessage | ||
| err error | ||
| } | ||
|
|
||
| func (s *fakeServerStream) Send(*auctioneerrpc.ClientAuctionMessage) error { | ||
| return nil | ||
| } | ||
|
|
||
| func (s *fakeServerStream) Recv() (*auctioneerrpc.ServerAuctionMessage, error) { | ||
| r := <-s.recv | ||
| return r.msg, r.err | ||
| } | ||
|
|
||
| // newTestClient returns a Client wired up just enough to drive | ||
| // readIncomingStream against a fake server stream. | ||
| func newTestClient(stream auctioneerrpc.ChannelAuctioneer_SubscribeBatchAuctionClient, | ||
| ) (*Client, chan error) { | ||
|
|
||
| mainErrChan := make(chan error, 1) | ||
| c := &Client{ | ||
| serverStream: stream, | ||
| FromServerChan: make(chan *auctioneerrpc.ServerAuctionMessage), | ||
| StreamErrChan: mainErrChan, | ||
| errChanSwitch: NewErrChanSwitch(mainErrChan), | ||
| quit: make(chan struct{}), | ||
| subscribedAccts: make(map[[33]byte]*acctSubscription), | ||
| } | ||
| c.errChanSwitch.Start() | ||
| return c, mainErrChan | ||
| } | ||
|
|
||
| // runReadLoop runs readIncomingStream in a goroutine and returns a channel | ||
| // that closes when the loop exits. | ||
| func runReadLoop(c *Client) <-chan struct{} { | ||
| done := make(chan struct{}) | ||
| go func() { | ||
| c.readIncomingStream() | ||
| close(done) | ||
| }() | ||
| return done | ||
| } | ||
|
|
||
| // TestReadIncomingStreamEOFTriggersReconnect ensures that an io.EOF received | ||
| // on the server stream is surfaced as ErrServerErrored on the error channel, | ||
| // which is the signal the rpcserver consumer uses to trigger reconnect logic. | ||
| // | ||
| // This is a regression test: EOF was previously reported as a separate | ||
| // "ErrServerShutdown" sentinel that the consumer silently ignored under the | ||
| // (incorrect) assumption that the client had already scheduled its own | ||
| // reconnect. The result was a permanently dead subscription stream after any | ||
| // clean close (proxy/LB timeout, planned server shutdown, etc.), with the | ||
| // trader being filtered as offline until the process restarted. | ||
| func TestReadIncomingStreamEOFTriggersReconnect(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| stream := &fakeServerStream{recv: make(chan recvResult, 1)} | ||
| c, mainErrChan := newTestClient(stream) | ||
| defer c.errChanSwitch.Stop() | ||
| defer close(c.quit) | ||
|
|
||
| // Tell the fake stream to return io.EOF, simulating the server (or an | ||
| // intermediate proxy) cleanly closing its side of the bidi stream. | ||
| stream.recv <- recvResult{err: io.EOF} | ||
|
|
||
| done := runReadLoop(c) | ||
|
|
||
| select { | ||
| case err := <-mainErrChan: | ||
| if !errors.Is(err, ErrServerErrored) { | ||
| t.Fatalf("expected ErrServerErrored on EOF, got: %v", | ||
| err) | ||
| } | ||
| case <-time.After(defaultTimeout): | ||
| t.Fatal("timed out waiting for error after EOF") | ||
| } | ||
|
|
||
| select { | ||
| case <-done: | ||
| case <-time.After(defaultTimeout): | ||
| t.Fatal("readIncomingStream did not return after EOF") | ||
| } | ||
| } | ||
|
|
||
| // TestReadIncomingStreamTransportErrorTriggersReconnect ensures non-EOF | ||
| // transport errors continue to be surfaced as ErrServerErrored. This is the | ||
| // pre-existing behaviour we want to preserve after unifying it with the EOF | ||
| // path. | ||
| func TestReadIncomingStreamTransportErrorTriggersReconnect(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| stream := &fakeServerStream{recv: make(chan recvResult, 1)} | ||
| c, mainErrChan := newTestClient(stream) | ||
| defer c.errChanSwitch.Stop() | ||
| defer close(c.quit) | ||
|
|
||
| // A "transport is closing" style error, which is what gRPC surfaces | ||
| // when the underlying TCP connection breaks abruptly. | ||
| stream.recv <- recvResult{ | ||
| err: status.Error(codes.Unavailable, "transport is closing"), | ||
| } | ||
|
|
||
| done := runReadLoop(c) | ||
|
|
||
| select { | ||
| case err := <-mainErrChan: | ||
| if !errors.Is(err, ErrServerErrored) { | ||
| t.Fatalf("expected ErrServerErrored on transport "+ | ||
| "error, got: %v", err) | ||
| } | ||
| case <-time.After(defaultTimeout): | ||
| t.Fatal("timed out waiting for error after transport failure") | ||
| } | ||
|
|
||
| select { | ||
| case <-done: | ||
| case <-time.After(defaultTimeout): | ||
| t.Fatal("readIncomingStream did not return after transport " + | ||
| "failure") | ||
| } | ||
| } | ||
|
|
||
| // TestReadIncomingStreamContextCanceledDoesNotReconnect ensures that a | ||
| // codes.Canceled error (which happens when *we* cancel the stream context | ||
| // during shutdown or a planned reconnect) does NOT surface an error to the | ||
| // consumer, so we don't accidentally schedule a second reconnect. | ||
| func TestReadIncomingStreamContextCanceledDoesNotReconnect(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| stream := &fakeServerStream{recv: make(chan recvResult, 1)} | ||
| c, mainErrChan := newTestClient(stream) | ||
| defer c.errChanSwitch.Stop() | ||
| defer close(c.quit) | ||
|
|
||
| stream.recv <- recvResult{ | ||
| err: status.Error(codes.Canceled, "context canceled"), | ||
| } | ||
|
|
||
| done := runReadLoop(c) | ||
|
|
||
| select { | ||
| case <-done: | ||
| case <-time.After(defaultTimeout): | ||
| t.Fatal("readIncomingStream did not return after cancel") | ||
| } | ||
|
|
||
| select { | ||
| case err := <-mainErrChan: | ||
| t.Fatalf("unexpected error surfaced on cancel: %v", err) | ||
| case <-time.After(defaultTimeout): | ||
| // Expected: no error surfaced. | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.