Skip to content

Commit e077644

Browse files
committed
Send SERVFAIL response on DNS query failure to prevent client timeouts
When DnsQueryRaw fails (synchronously or asynchronously), the Windows DNS resolver currently sends no response back to the Linux DNS client. This forces the client to wait for its full retransmit timeout (typically 5-10 seconds per query) before moving on, causing significant DNS delays. This change sends a minimal 12-byte DNS SERVFAIL response (RFC 1035, RCODE=2) back to the Linux DNS client whenever: - DnsQueryRaw returns a synchronous failure (not DNS_REQUEST_PENDING) - The async completion callback receives null results - The async completion returns a non-null result with no raw response The SERVFAIL response uses the original request's transaction ID so the client can match it to the pending query and immediately retry or fail. This also fixes a memory leak on the Linux side: when no response was sent, UDP request tracking entries in DnsServer::m_udpRequests were never cleaned up. The SERVFAIL response now triggers the normal response handling path which erases the tracking entry. Refs: #4285 #5256 #4737
1 parent 107104e commit e077644

2 files changed

Lines changed: 45 additions & 1 deletion

File tree

src/windows/common/DnsResolver.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,12 @@ try
223223
auto [it, _] = m_dnsRequests.emplace(requestId, std::move(context));
224224
const auto localContext = it->second.get();
225225

226+
// Store the DNS transaction ID (first 2 bytes of the request) for constructing SERVFAIL responses
227+
if (dnsBuffer.size() >= 2)
228+
{
229+
memcpy(&localContext->m_dnsTransactionId, dnsBuffer.data(), sizeof(localContext->m_dnsTransactionId));
230+
}
231+
226232
auto removeContextOnError = wil::scope_exit([&] { WI_VERIFY(m_dnsRequests.erase(requestId) == 1); });
227233

228234
// Fill DNS request structure
@@ -264,6 +270,10 @@ try
264270
TraceLoggingValue(requestId, "requestId"),
265271
TraceLoggingValue(result, "result"),
266272
TraceLoggingValue("DnsQueryRaw", "executionStep"));
273+
274+
// Send SERVFAIL back to Linux so the DNS client gets an immediate error
275+
// instead of waiting for a timeout.
276+
SendServfailResponse(localContext->m_dnsTransactionId, localContext->m_dnsClientIdentifier);
267277
return;
268278
}
269279

@@ -337,6 +347,12 @@ try
337347
m_dnsChannel.SendDnsMessage(gsl::make_span(dnsResponse), dnsClientIdentifier);
338348
});
339349
}
350+
else if (!m_stopped)
351+
{
352+
// The Windows DNS API failed to resolve the request. Send a SERVFAIL response to the Linux DNS client
353+
// so it gets an immediate error instead of waiting for a timeout (which can take 5-10 seconds).
354+
SendServfailResponse(queryContext->m_dnsTransactionId, queryContext->m_dnsClientIdentifier);
355+
}
340356

341357
// Stop tracking this DNS request and delete the request context
342358
WI_VERIFY(m_dnsRequests.erase(queryContext->m_id) == 1);
@@ -349,6 +365,26 @@ try
349365
}
350366
CATCH_LOG()
351367

368+
void DnsResolver::SendServfailResponse(uint16_t transactionId, const LX_GNS_DNS_CLIENT_IDENTIFIER& dnsClientIdentifier)
369+
{
370+
// Build a minimal 12-byte DNS SERVFAIL response per RFC 1035 section 4.1.1.
371+
// This allows the Linux DNS client to immediately learn the query failed,
372+
// rather than waiting for a retransmit timeout (typically 5-10 seconds).
373+
std::vector<gsl::byte> servfail(12, gsl::byte{0});
374+
memcpy(servfail.data(), &transactionId, sizeof(transactionId)); // Transaction ID (network byte order, copied as-is)
375+
servfail[2] = gsl::byte{0x80}; // QR=1 (response), OPCODE=0 (standard query)
376+
servfail[3] = gsl::byte{0x02}; // RA=0, Z=0, RCODE=2 (Server Failure)
377+
378+
WSL_LOG(
379+
"DnsResolver::SendServfailResponse",
380+
TraceLoggingValue(dnsClientIdentifier.Protocol == IPPROTO_UDP ? "UDP" : "TCP", "Protocol"),
381+
TraceLoggingValue(dnsClientIdentifier.DnsClientId, "DNS client id"));
382+
383+
m_dnsResponseQueue.submit([this, servfail = std::move(servfail), dnsClientIdentifier]() mutable {
384+
m_dnsChannel.SendDnsMessage(gsl::make_span(servfail), dnsClientIdentifier);
385+
});
386+
}
387+
352388
void DnsResolver::ResolveExternalInterfaceConstraintIndex() noexcept
353389
try
354390
{

src/windows/common/DnsResolver.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class DnsResolver
4343
// Unique query id.
4444
uint32_t m_id{};
4545

46+
// Transaction ID from the original DNS request header (first 2 bytes, network byte order).
47+
// Used to construct SERVFAIL responses when the Windows DNS API fails.
48+
uint16_t m_dnsTransactionId{};
49+
4650
// Callback to the parent object to notify about the DNS query completion.
4751
std::function<void(DnsQueryContext*, DNS_QUERY_RAW_RESULT*)> m_handleQueryCompletion;
4852

@@ -78,6 +82,10 @@ class DnsResolver
7882
// queryResults - structure containing result of the DNS request.
7983
void HandleDnsQueryCompletion(_Inout_ DnsQueryContext* dnsQueryContext, _Inout_opt_ DNS_QUERY_RAW_RESULT* queryResults) noexcept;
8084

85+
// Build and send a minimal DNS SERVFAIL response (RFC 1035, RCODE=2) back to the Linux DNS client.
86+
// This is used when the Windows DNS API fails, to prevent the Linux client from waiting until timeout.
87+
void SendServfailResponse(uint16_t transactionId, const LX_GNS_DNS_CLIENT_IDENTIFIER& dnsClientIdentifier);
88+
8189
void ResolveExternalInterfaceConstraintIndex() noexcept;
8290

8391
// Callback that will be invoked by the DNS API whenever a request finishes. The callback is invoked on success, error or when request is cancelled.
@@ -105,7 +113,7 @@ class DnsResolver
105113
_Guarded_by_(m_dnsLock) uint32_t m_currentRequestId = 0;
106114

107115
// Mapping request id to the request context structure.
108-
_Guarded_by_(m_dnsLock) std::unordered_map<uint32_t, std::unique_ptr<DnsQueryContext>> m_dnsRequests {};
116+
_Guarded_by_(m_dnsLock) std::unordered_map<uint32_t, std::unique_ptr<DnsQueryContext>> m_dnsRequests{};
109117

110118
// Event that is set when all tracked DNS requests have completed.
111119
wil::unique_event m_allRequestsFinished{wil::EventOptions::ManualReset};

0 commit comments

Comments
 (0)