Skip to content

Commit 5633a18

Browse files
Shockppitrou
andauthored
GH-49433: [C++] Buffer ARROW_LOG output to prevent thread interleaving (#49663)
### Rationale for this change When using the C++ `ARROW_LOG` macros from multiple threads, messages often get mingled together in `stderr` because the `operator<<` writes directly to the global stream piece-by-piece, which is not thread-safe. ### What changes are included in this PR? This PR introduces an internal `std::ostringstream` buffer to the fallback `CerrLog` class. Instead of writing to `std::cerr` immediately on every `<<` operation, the messages are accumulated locally within the `CerrLog` instance. The completed string is then flushed atomically to `std::cerr` upon the object's destruction. ### Are these changes tested? Yes. I built the C++ project locally and ran the test suite (`ctest`) to ensure no existing logging behavior or IPC functionality was broken. ### Are there any user-facing changes? No API changes. Users will simply notice that multi-threaded `ARROW_LOG` console output is now cleanly separated per line instead of interleaved. Closes #49433 * GitHub Issue: #49433 Lead-authored-by: shockp <masque2204@gmail.com> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 210a856 commit 5633a18

2 files changed

Lines changed: 38 additions & 14 deletions

File tree

cpp/src/arrow/util/logging.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#endif
2525
#include <cstdlib>
2626
#include <iostream>
27+
#include <mutex>
28+
#include <sstream>
2729

2830
#ifdef ARROW_USE_GLOG
2931

@@ -67,7 +69,9 @@ class CerrLog {
6769

6870
virtual ~CerrLog() {
6971
if (has_logged_) {
70-
std::cerr << std::endl;
72+
static std::mutex cerr_mutex;
73+
std::lock_guard<std::mutex> lock(cerr_mutex);
74+
std::cerr << std::move(buffer_).str() << std::endl;
7175
}
7276
if (severity_ == ArrowLogLevel::ARROW_FATAL) {
7377
PrintBackTrace();
@@ -77,21 +81,22 @@ class CerrLog {
7781

7882
std::ostream& Stream() {
7983
has_logged_ = true;
80-
return std::cerr;
84+
return buffer_;
8185
}
8286

8387
template <class T>
8488
CerrLog& operator<<(const T& t) {
8589
if (severity_ != ArrowLogLevel::ARROW_DEBUG) {
8690
has_logged_ = true;
87-
std::cerr << t;
91+
buffer_ << t;
8892
}
8993
return *this;
9094
}
9195

9296
protected:
9397
const ArrowLogLevel severity_;
9498
bool has_logged_;
99+
std::ostringstream buffer_;
95100

96101
void PrintBackTrace() {
97102
#ifdef ARROW_WITH_BACKTRACE

cpp/src/arrow/util/logging_test.cc

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include <chrono>
1919
#include <cstdint>
2020
#include <iostream>
21+
#include <thread>
22+
#include <vector>
2123

2224
#include <gtest/gtest.h>
2325

@@ -40,17 +42,11 @@ int64_t current_time_ms() {
4042
// This file just print some information using the logging macro.
4143

4244
void PrintLog() {
43-
ARROW_LOG(DEBUG) << "This is the"
44-
<< " DEBUG"
45-
<< " message";
46-
ARROW_LOG(INFO) << "This is the"
47-
<< " INFO message";
48-
ARROW_LOG(WARNING) << "This is the"
49-
<< " WARNING message";
50-
ARROW_LOG(ERROR) << "This is the"
51-
<< " ERROR message";
52-
ARROW_CHECK(true) << "This is a ARROW_CHECK"
53-
<< " message but it won't show up";
45+
ARROW_LOG(DEBUG) << "This is the" << " DEBUG" << " message";
46+
ARROW_LOG(INFO) << "This is the" << " INFO message";
47+
ARROW_LOG(WARNING) << "This is the" << " WARNING message";
48+
ARROW_LOG(ERROR) << "This is the" << " ERROR message";
49+
ARROW_CHECK(true) << "This is a ARROW_CHECK" << " message but it won't show up";
5450
// The following 2 lines should not run since it will cause program failure.
5551
// ARROW_LOG(FATAL) << "This is the FATAL message";
5652
// ARROW_CHECK(false) << "This is a ARROW_CHECK message but it won't show up";
@@ -95,6 +91,29 @@ TEST(ArrowCheck, PayloadEvaluatedOnFailure) {
9591
ASSERT_TRUE(tracer.was_printed);
9692
}
9793

94+
TEST(ArrowLog, MultiThreadedLogging) {
95+
// This is mostly a visual test that logging from multiple threads produces
96+
// a clean output without interleaving messages (GH-49433).
97+
constexpr int kNumThreads = 10;
98+
constexpr int kNumMessges = 10;
99+
std::vector<std::thread> threads;
100+
101+
threads.reserve(kNumThreads);
102+
103+
for (int i = 0; i < kNumThreads; ++i) {
104+
threads.emplace_back([i]() {
105+
for (int j = 0; j < kNumMessges; ++j) {
106+
ARROW_LOG(INFO) << "Thread " << i << " message " << j
107+
<< " - testing thread safety.";
108+
}
109+
});
110+
}
111+
112+
for (auto& thread : threads) {
113+
thread.join();
114+
}
115+
}
116+
98117
} // namespace util
99118

100119
TEST(DcheckMacros, DoNotEvaluateReleaseMode) {

0 commit comments

Comments
 (0)