Skip to content

Commit 0a51dcd

Browse files
authored
Merge pull request #1264 from trcrsired/next
crt uses full name ::fast_io::details::crt_iobuf for crt_iobuf
2 parents be22dc8 + e2f7f47 commit 0a51dcd

File tree

3 files changed

+220
-20
lines changed

3 files changed

+220
-20
lines changed

benchmark/0011.containers/deque/0001.push_back/fast_io_reverse.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,5 @@ int main()
2323
sum += e;
2424
}
2525
}
26-
::fast_io::io::perrln("sum=",sum);
26+
::fast_io::io::perrln("sum=", sum);
2727
}

include/fast_io_dsal/impl/deque.h

Lines changed: 206 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,23 @@ inline constexpr void deque_grow_to_new_blocks_count_impl(dequecontroltype &cont
436436
::std::size_t const old_back_block_ptr_pos{static_cast<::std::size_t>(controller.back_block.controller_ptr - old_start_ptr)};
437437

438438
using block_typed_allocator = ::fast_io::typed_generic_allocator_adapter<allocator, typename dequecontroltype::controlreplacetype>;
439-
auto [new_start_ptr, new_blocks_count] = block_typed_allocator::allocate_at_least(new_blocks_count_least + 1zu);
439+
440+
#if (defined(__GNUC__) || defined(__clang__))
441+
::std::size_t new_blocks_count_least_p1;
442+
if (__builtin_add_overflow(new_blocks_count_least, 1zu, __builtin_addressof(new_blocks_count_least_p1))) [[unlikely]]
443+
{
444+
::fast_io::fast_terminate();
445+
}
446+
#else
447+
constexpr ::std::size_t mx{::std::numeric_limits<::std::size_t>::max()};
448+
::std::size_t new_blocks_count_least_p1{new_blocks_count_least};
449+
if (mx == new_blocks_count_least)
450+
{
451+
::fast_io::fast_terminate();
452+
}
453+
++new_blocks_count_least_p1;
454+
#endif
455+
auto [new_start_ptr, new_blocks_count] = block_typed_allocator::allocate_at_least(new_blocks_count_least_p1);
440456

441457
auto const old_reserved_blocks_count{
442458
static_cast<::std::size_t>(old_after_reserved_ptr - old_start_reserved_ptr)};
@@ -567,12 +583,21 @@ template <typename allocator, typename dequecontroltype>
567583
inline constexpr void deque_allocate_on_empty_common_with_n_impl(dequecontroltype &controller, ::std::size_t align, ::std::size_t bytes,
568584
::std::size_t initial_allocated_block_counts) noexcept
569585
{
586+
#if (defined(__GNUC__) || defined(__clang__))
587+
::std::size_t initial_allocated_block_counts_with_sentinel;
588+
if (__builtin_add_overflow(initial_allocated_block_counts, 1u,
589+
__builtin_addressof(initial_allocated_block_counts_with_sentinel)))
590+
{
591+
::fast_io::fast_terminate();
592+
}
593+
#else
570594
constexpr ::std::size_t maxval{::std::numeric_limits<::std::size_t>::max()};
571595
if (initial_allocated_block_counts == maxval) [[unlikely]]
572596
{
573597
::fast_io::fast_terminate();
574598
}
575599
::std::size_t initial_allocated_block_counts_with_sentinel{initial_allocated_block_counts + 1u};
600+
#endif
576601
using block_typed_allocator = ::fast_io::typed_generic_allocator_adapter<allocator, typename dequecontroltype::controlreplacetype>;
577602
auto [allocated_blocks_ptr, allocated_blocks_count] = block_typed_allocator::allocate_at_least(initial_allocated_block_counts_with_sentinel);
578603
// we need a null terminator as sentinel like c style string does
@@ -1205,27 +1230,202 @@ deque_erase_common_trivial_impl(::fast_io::containers::details::deque_controller
12051230
return first;
12061231
}
12071232

1208-
#if 0
1233+
#if 1
1234+
12091235
template <typename allocator, typename dequecontroltype>
1210-
inline constexpr void deque_reserve_back_spaces_impl(dequecontroltype &controller, ::std::size_t n, ::std::size_t align, ::std::size_t blockbytes) noexcept
1236+
inline constexpr void deque_rebalance_or_grow_insertation_impl(dequecontroltype &controller, ::std::size_t extrablocks) noexcept
12111237
{
1212-
::std::size_t const nb{n/blockbytes};
1238+
// ignore overchecked first
1239+
auto const used_blocks_count{
1240+
static_cast<::std::size_t>(controller.back_block.controller_ptr - controller.front_block.controller_ptr) + 1zu};
1241+
auto const total_slots_count{
1242+
static_cast<::std::size_t>(controller.controller_block.controller_after_ptr - controller.controller_block.controller_start_ptr)};
1243+
auto const half_slots_count{static_cast<::std::size_t>(total_slots_count >> 1u)};
1244+
#if defined(__GNUC__) || defined(__clang__)
1245+
::std::size_t new_used_blocks_count;
1246+
if (__builtin_add_overflow(used_blocks_count, extrablocks, __builtin_addressof(new_used_blocks_count))) [[unlikely]]
1247+
{
1248+
::fast_io::fast_terminate();
1249+
}
1250+
#else
1251+
constexpr ::std::size_t mx{::std::numeric_limits<::std::size_t>::max()};
1252+
::std::size_t const mx_sub_extrablocks{mx - extrablocks};
1253+
if (mx_sub_extrablocks < used_blocks_count)
1254+
{
1255+
::fast_io::fast_terminate();
1256+
}
1257+
1258+
auto const new_used_blocks_count{used_blocks_count + extrablocks};
1259+
#endif
1260+
1261+
if (half_slots_count < new_used_blocks_count) // grow blocks
1262+
{
1263+
#if defined(__GNUC__) || defined(__clang__)
1264+
::std::size_t doubleslotsextra;
1265+
if (__builtin_add_overflow(total_slots_count, extrablocks, __builtin_addressof(doubleslotsextra)))
1266+
{
1267+
::fast_io::fast_terminate();
1268+
}
1269+
if (__builtin_add_overflow(doubleslotsextra, doubleslotsextra, __builtin_addressof(doubleslotsextra)))
1270+
{
1271+
::fast_io::fast_terminate();
1272+
}
1273+
#else
1274+
::std::size_t mx_total_slots{mx - extrablocks};
1275+
if (mx_total_slots < total_slots_count)
1276+
{
1277+
::fast_io::fast_terminate();
1278+
}
1279+
::std::size_t doubleslotsextra{extrablocks + total_slots_count};
1280+
constexpr ::std::size_t mxdv2m1{(mx >> 1u)};
1281+
if (mxdv2m1 < doubleslotsextra)
1282+
{
1283+
::fast_io::fast_terminate();
1284+
}
1285+
doubleslotsextra <<= 1u;
1286+
#endif
12131287

1288+
::fast_io::containers::details::deque_grow_to_new_blocks_count_impl<allocator>(controller, doubleslotsextra);
1289+
}
1290+
else
1291+
{
1292+
#if 0
1293+
::fast_io::iomnp::debug_println(::std::source_location::current());
1294+
#endif
1295+
// balance blocks
1296+
auto start_reserved_ptr{controller.controller_block.controller_start_reserved_ptr};
1297+
auto after_reserved_ptr{controller.controller_block.controller_after_reserved_ptr};
1298+
auto const reserved_blocks_count{
1299+
static_cast<::std::size_t>(after_reserved_ptr - start_reserved_ptr)};
1300+
auto const half_reserved_blocks_count{
1301+
static_cast<::std::size_t>(reserved_blocks_count >> 1u)};
1302+
auto reserved_pivot{start_reserved_ptr + half_reserved_blocks_count};
1303+
auto const half_used_blocks_count{
1304+
static_cast<::std::size_t>(new_used_blocks_count >> 1u)};
1305+
auto used_blocks_pivot{controller.front_block.controller_ptr + half_used_blocks_count};
1306+
if (used_blocks_pivot != reserved_pivot)
1307+
{
1308+
::std::ptrdiff_t diff{reserved_pivot - used_blocks_pivot};
1309+
#if 0
1310+
::fast_io::iomnp::debug_println(::std::source_location::current(),
1311+
"\tdiff=",diff);
1312+
#endif
1313+
auto rotate_pivot{diff < 0 ? start_reserved_ptr : after_reserved_ptr};
1314+
rotate_pivot -= diff;
1315+
::std::rotate(start_reserved_ptr, rotate_pivot, after_reserved_ptr);
1316+
controller.front_block.controller_ptr += diff;
1317+
controller.back_block.controller_ptr += diff;
1318+
}
1319+
1320+
auto slots_pivot{controller.controller_block.controller_start_ptr + half_slots_count};
1321+
if (slots_pivot != reserved_pivot)
1322+
{
1323+
#if 0
1324+
::fast_io::iomnp::debug_println(::std::source_location::current());
1325+
#endif
1326+
::std::ptrdiff_t diff{slots_pivot - reserved_pivot};
1327+
::fast_io::freestanding::overlapped_copy(start_reserved_ptr,
1328+
after_reserved_ptr, start_reserved_ptr + diff);
1329+
controller.front_block.controller_ptr += diff;
1330+
controller.back_block.controller_ptr += diff;
1331+
controller.controller_block.controller_start_reserved_ptr += diff;
1332+
*(controller.controller_block.controller_after_reserved_ptr += diff) = nullptr;
1333+
}
1334+
}
1335+
}
1336+
1337+
template <typename allocator, typename dequecontroltype>
1338+
inline constexpr void deque_reserve_back_blocks_impl(dequecontroltype &controller, ::std::size_t nb, ::std::size_t align, ::std::size_t blockbytes) noexcept
1339+
{
12141340
if (controller.controller_block.controller_start_ptr == nullptr)
12151341
{
12161342
::fast_io::containers::details::deque_allocate_on_empty_common_with_n_impl<allocator>(
12171343
controller, align, blockbytes, nb);
12181344
return;
12191345
}
1220-
1346+
1347+
using replacetype = typename dequecontroltype::replacetype;
1348+
using begin_ptrtype = replacetype *;
1349+
1350+
std::size_t diff_to_after_ptr =
1351+
static_cast<std::size_t>(
1352+
controller.controller_block.controller_after_reserved_ptr -
1353+
controller.back_block.controller_ptr);
1354+
if (diff_to_after_ptr <= nb)
1355+
{
1356+
std::size_t distance_back_to_reserve{
1357+
static_cast<std::size_t>(controller.controller_block.controller_after_reserved_ptr -
1358+
controller.back_block.controller_ptr)};
1359+
if (distance_back_to_reserve < nb)
1360+
{
1361+
::fast_io::containers::details::deque_rebalance_or_grow_insertation_impl<allocator>(controller, nb);
1362+
}
1363+
std::size_t diff_to_after_ptr2 =
1364+
static_cast<std::size_t>(
1365+
controller.controller_block.controller_after_reserved_ptr -
1366+
controller.back_block.controller_ptr);
1367+
if (diff_to_after_ptr2 <= nb)
1368+
{
1369+
::std::size_t front_reserved_blocks{
1370+
static_cast<::std::size_t>(controller.front_block.controller_ptr - controller.controller_block.controller_start_reserved_ptr)};
1371+
1372+
::std::size_t front_borrowed_blocks_count{front_reserved_blocks};
1373+
::std::size_t to_allocate_blocks{nb};
1374+
if (nb < front_reserved_blocks)
1375+
{
1376+
front_borrowed_blocks_count = nb;
1377+
to_allocate_blocks = 0u;
1378+
}
1379+
else
1380+
{
1381+
to_allocate_blocks -= front_borrowed_blocks_count;
1382+
}
1383+
1384+
auto controller_start_reserved_ptr{
1385+
controller.controller_block.controller_start_reserved_ptr};
1386+
1387+
auto pos{
1388+
controller.controller_block.controller_after_reserved_ptr};
1389+
pos = ::fast_io::freestanding::non_overlapped_copy_n(controller_start_reserved_ptr,
1390+
front_borrowed_blocks_count,
1391+
pos);
1392+
controller.controller_block.controller_start_reserved_ptr =
1393+
controller_start_reserved_ptr + front_borrowed_blocks_count;
1394+
1395+
for (auto e{pos + to_allocate_blocks}; pos != e; ++pos)
1396+
{
1397+
::std::construct_at(pos, static_cast<begin_ptrtype>(allocator::allocate_aligned(align, blockbytes)));
1398+
}
1399+
*pos = nullptr;
1400+
controller.controller_block.controller_after_reserved_ptr = pos;
1401+
}
1402+
}
1403+
1404+
if (controller.back_block.controller_ptr == controller.front_block.controller_ptr && controller.front_block.curr_ptr == controller.front_end_ptr)
1405+
{
1406+
auto front_block_controller_ptr{controller.front_block.controller_ptr + 1};
1407+
controller.front_block.controller_ptr = front_block_controller_ptr;
1408+
auto front_begin_ptr = static_cast<begin_ptrtype>(*front_block_controller_ptr);
1409+
controller.front_block.curr_ptr = controller.front_block.begin_ptr = front_begin_ptr;
1410+
controller.front_end_ptr = front_begin_ptr + blockbytes;
1411+
}
1412+
1413+
controller.back_block.controller_ptr += nb;
1414+
auto begin_ptr =
1415+
static_cast<begin_ptrtype>(*controller.back_block.controller_ptr);
1416+
1417+
controller.back_block.begin_ptr = begin_ptr;
1418+
controller.back_block.curr_ptr = begin_ptr;
1419+
controller.back_end_ptr = begin_ptr + blockbytes;
12211420
}
12221421

1422+
#if 0
12231423
template <typename allocator, ::std::size_t align, ::std::size_t sz, ::std::size_t block_size, typename dequecontroltype>
12241424
inline constexpr void deque_reserve_back_spaces(dequecontroltype &controller, ::std::size_t n)
12251425
{
1226-
12271426
}
12281427
#endif
1428+
#endif
12291429

12301430
} // namespace details
12311431

include/fast_io_legacy_impl/c/wincrt.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ CRT heap debugging does not exist on mingw-w64
120120

121121
inline void wincrt_fp_allocate_buffer_impl(FILE *__restrict fpp) noexcept
122122
{
123-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
123+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
124124
if (fp->_bufsiz < 4)
125125
{
126126
fp->_bufsiz = wincrt_internal_buffer_size;
@@ -145,7 +145,7 @@ inline void wincrt_fp_write_cold_malloc_case_impl(FILE *__restrict fpp, char con
145145
return;
146146
}
147147

148-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
148+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
149149

150150
::std::size_t allocated_buffer_size{wincrt_internal_buffer_size};
151151

@@ -173,7 +173,7 @@ inline void wincrt_fp_write_cold_malloc_case_impl(FILE *__restrict fpp, char con
173173
inline void wincrt_fp_write_cold_normal_case_impl(FILE *__restrict fpp, char const *__restrict first,
174174
::std::size_t diff)
175175
{
176-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
176+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
177177
fp->_flag |= crt_dirty_value;
178178

179179
if (::std::size_t const remain{static_cast<::std::size_t>(static_cast<::std::uint_least32_t>(fp->_cnt))}; diff < remain)
@@ -210,7 +210,7 @@ inline void wincrt_fp_write_cold_normal_case_impl(FILE *__restrict fpp, char con
210210
inline void wincrt_fp_write_cold_impl(FILE *__restrict fp, char const *first, char const *last)
211211
{
212212
::std::size_t diff{static_cast<::std::size_t>(last - first)};
213-
crt_iobuf *fpp{reinterpret_cast<crt_iobuf *>(fp)};
213+
::fast_io::details::crt_iobuf *fpp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fp)};
214214
if (fpp->_base == nullptr)
215215
{
216216
if (auto const fd{fpp->_file}; fd == ::fast_io::posix_stderr_number)
@@ -237,7 +237,7 @@ template <::std::integral char_type>
237237
#endif
238238
inline void wincrt_fp_overflow_impl(FILE *__restrict fpp, char_type ch)
239239
{
240-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
240+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
241241
if (fp->_base == nullptr)
242242
{
243243
wincrt_fp_allocate_buffer_impl(fpp);
@@ -259,7 +259,7 @@ inline void wincrt_fp_overflow_impl(FILE *__restrict fpp, char_type ch)
259259
#endif
260260
inline void wincrt_fp_flush_stdout_impl()
261261
{
262-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(::fast_io::win32::wincrt_acrt_iob_func(1))};
262+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(::fast_io::win32::wincrt_acrt_iob_func(1))};
263263
if (fp->_ptr == fp->_base) [[unlikely]]
264264
{
265265
return;
@@ -278,7 +278,7 @@ inline char *wincrt_fp_read_cold_impl(FILE *__restrict fpp, char *first, ::std::
278278
{
279279
wincrt_fp_flush_stdout_impl();
280280
}
281-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
281+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
282282
::std::size_t cnt{static_cast<::std::size_t>(static_cast<::std::uint_least32_t>(fp->_cnt))};
283283
non_overlapped_copy_n(fp->_ptr, cnt, first);
284284
first += cnt;
@@ -332,7 +332,7 @@ inline bool wincrt_fp_underflow_impl(FILE *__restrict fpp)
332332
{
333333
wincrt_fp_flush_stdout_impl();
334334
}
335-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
335+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
336336
if (fp->_base == nullptr)
337337
{
338338
wincrt_fp_allocate_buffer_impl(fpp);
@@ -362,7 +362,7 @@ template <typename T, ::std::size_t num>
362362
inline T *wincrt_get_buffer_ptr_impl(FILE *__restrict fpp) noexcept
363363
{
364364
static_assert(num < 4);
365-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
365+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
366366
if constexpr (num == 0)
367367
{
368368
return reinterpret_cast<T *>(fp->_base);
@@ -383,7 +383,7 @@ inline void wincrt_set_buffer_curr_ptr_impl(FILE *__restrict fpp,
383383
#endif
384384
void *ptr) noexcept
385385
{
386-
crt_iobuf *fp{reinterpret_cast<crt_iobuf *>(fpp)};
386+
::fast_io::details::crt_iobuf *fp{reinterpret_cast<::fast_io::details::crt_iobuf *>(fpp)};
387387
fp->_cnt -= static_cast<::std::int_least32_t>(
388388
static_cast<::std::uint_least32_t>(static_cast<::std::size_t>(reinterpret_cast<char *>(ptr) - fp->_ptr)));
389389
fp->_ptr = reinterpret_cast<char *>(ptr);
@@ -395,12 +395,12 @@ WINE has not correctly implemented this yet. I am submitting patches.
395395
inline void ucrt_lock_file(FILE *__restrict fp) noexcept
396396
{
397397
char *fp2{reinterpret_cast<char *>(fp)};
398-
::fast_io::win32::EnterCriticalSection(fp2 + sizeof(crt_iobuf));
398+
::fast_io::win32::EnterCriticalSection(fp2 + sizeof(::fast_io::details::crt_iobuf));
399399
}
400400
inline void ucrt_unlock_file(FILE *__restrict fp) noexcept
401401
{
402402
char *fp2{reinterpret_cast<char *>(fp)};
403-
::fast_io::win32::LeaveCriticalSection(fp2 + sizeof(crt_iobuf));
403+
::fast_io::win32::LeaveCriticalSection(fp2 + sizeof(::fast_io::details::crt_iobuf));
404404
}
405405
#endif
406406
} // namespace details
@@ -487,7 +487,7 @@ template <::std::integral char_type>
487487
inline ::std::byte *read_some_bytes_underflow_define(::fast_io::basic_c_io_observer_unlocked<char_type> ciob,
488488
::std::byte *first, ::std::byte *last)
489489
{
490-
return reinterpret_cast<::std::byte *>(::fast_io::details::wincrt_fp_read_cold_impl(ciob.fp,
490+
return reinterpret_cast<::std::byte *>(::fast_io::details::wincrt_fp_read_cold_impl(ciob.fp,
491491
reinterpret_cast<char *>(first),
492492
reinterpret_cast<char *>(last)));
493493
}

0 commit comments

Comments
 (0)