diff --git a/src/dune_digest/digest.ml b/src/dune_digest/digest.ml index 8ddc319aca4..2d99c3e6ccb 100644 --- a/src/dune_digest/digest.ml +++ b/src/dune_digest/digest.ml @@ -79,9 +79,6 @@ let open_for_digest file = |> Fd.unsafe_of_unix_file_descr ;; -(* CR-someday rgrinberg: maybe this should exist in blake3_mini? *) -let zero = lazy (Hasher.with_singleton (fun _f -> ())) - let digest_and_close_fd fd = let start = Counter.Timer.start () in let res = @@ -104,36 +101,19 @@ let file file = digest_and_close_fd fd ;; -(* Throttle concurrent [file_async] calls so an unbounded [parallel_map] over - many targets (e.g. the relocatable compiler) does not exhaust the process - fd limit. 100 sits comfortably above the background thread pool's worker - count (so digesting never starves it) while staying well under the typical - 1024 fd soft limit; raising that limit is hazardous because some code - still falls back to [select()], which has a hard FD_SETSIZE (1024) cap. *) -let digest_throttle = lazy (Fiber.Throttle.create 100) -let async_digest_minimum = 1_000 - -let file_async file = - Fiber.Throttle.run (Lazy.force digest_throttle) ~f:(fun () -> - let open Fiber.O in - let* () = Fiber.return () in - let fd = open_for_digest file in - Counter.incr Metrics.Digest.File.count; - let size = - match Unix.fstat (Fd.unsafe_to_unix_file_descr fd) with - | exception exn -> - Fd.close fd; - raise exn - | stat -> stat.st_size - in - Counter.add Metrics.Digest.File.bytes size; - if size = 0 - then - let+ () = Fiber.return @@ Fd.close fd in - Lazy.force zero - else if size < async_digest_minimum - then Fiber.return (digest_and_close_fd fd) - else Dune_scheduler.Scheduler.async_exn (fun () -> digest_and_close_fd fd)) +let file_async = + let digest_throttle = lazy (Fiber.Throttle.create 32) in + fun file -> + Fiber.Throttle.run (Lazy.force digest_throttle) ~f:(fun () -> + let open Fiber.O in + let start = Counter.Timer.start () in + let+ digest, size = + Dune_scheduler.Scheduler.async_exn (fun () -> Blake3_mini.file_with_size file) + in + Counter.incr Metrics.Digest.File.count; + Counter.add Metrics.Digest.File.bytes size; + Counter.Timer.stop Metrics.Digest.File.time start; + digest) ;; let equal = Blake3_mini.Digest.equal diff --git a/src/ocaml-blake3-mini/blake3_mini.ml b/src/ocaml-blake3-mini/blake3_mini.ml index c5a1d339d9e..ef79f4c1d35 100644 --- a/src/ocaml-blake3-mini/blake3_mini.ml +++ b/src/ocaml-blake3-mini/blake3_mini.ml @@ -49,3 +49,25 @@ external feed_bigstring_release_lock = "blake3_mini_feed_bigstring_unlock" external fd : Unix.file_descr -> string = "blake3_mini_fd" +external file_with_size_unix : string -> Digest.t * int = "blake3_mini_file_with_size" + +let file_with_size_ocaml file = + let digest_fd = fd in + let fd = Unix.openfile file [ Unix.O_RDONLY; Unix.O_SHARE_DELETE; Unix.O_CLOEXEC ] 0 in + match + let size = (Unix.fstat fd).st_size in + let digest = digest_fd fd in + digest, size + with + | exception exn -> + let bt = Printexc.get_raw_backtrace () in + (match Unix.close fd with + | () -> () + | exception _ -> ()); + Printexc.raise_with_backtrace exn bt + | res -> + Unix.close fd; + res +;; + +let file_with_size = if Sys.win32 then file_with_size_ocaml else file_with_size_unix diff --git a/src/ocaml-blake3-mini/blake3_mini.mli b/src/ocaml-blake3-mini/blake3_mini.mli index 00751b67fcd..f50526fd403 100644 --- a/src/ocaml-blake3-mini/blake3_mini.mli +++ b/src/ocaml-blake3-mini/blake3_mini.mli @@ -25,3 +25,4 @@ val feed_bigstring_release_lock val digest : t -> Digest.t val fd : Unix.file_descr -> Digest.t +val file_with_size : string -> Digest.t * int diff --git a/src/ocaml-blake3-mini/blake3_stubs.c b/src/ocaml-blake3-mini/blake3_stubs.c index b34d62b70f4..6763d6182bc 100644 --- a/src/ocaml-blake3-mini/blake3_stubs.c +++ b/src/ocaml-blake3-mini/blake3_stubs.c @@ -1,4 +1,12 @@ #include +#ifndef _WIN32 +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif +#include +#include +#include +#endif #include #include @@ -6,6 +14,7 @@ #include #include #include +#include #include #include @@ -55,6 +64,84 @@ CAMLprim value blake3_mini_fd(value v_fd) { CAMLreturn(v_ret); } +CAMLprim value blake3_mini_file_with_size(value v_path) { + CAMLparam1(v_path); +#ifdef _WIN32 + caml_failwith("blake3_mini_file_with_size is not implemented on Windows"); +#else + CAMLlocal3(v_digest, v_size, v_result); + caml_unix_check_path(v_path, "open"); + char_os *path = caml_stat_strdup_to_os(String_val(v_path)); + blake3_hasher hasher; + blake3_hasher_init(&hasher); + int err = 0; + const char *err_op = NULL; + int fd = -1; + intnat size = 0; + + caml_release_runtime_system(); + + while (1) { + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd != -1 || errno != EINTR) + break; + } + if (fd == -1) { + err = errno; + err_op = "open"; + goto done; + } + + struct stat st; + while (fstat(fd, &st) == -1) { + if (errno == EINTR) + continue; + err = errno; + err_op = "fstat"; + goto close; + } + size = st.st_size; + + char buffer[UNIX_BUFFER_SIZE]; + ssize_t bytes_read; + while (1) { + bytes_read = read(fd, buffer, sizeof(buffer)); + if (bytes_read == 0) { + break; + } else if (bytes_read < 0) { + if (errno == EINTR) + continue; + err = errno; + err_op = "read"; + break; + } else { + blake3_hasher_update(&hasher, buffer, bytes_read); + } + } + +close: + if (close(fd) == -1 && err == 0) { + err = errno; + err_op = "close"; + } + +done: + caml_acquire_runtime_system(); + caml_stat_free(path); + if (err != 0) { + errno = err; + uerror(err_op, v_path); + } + + v_digest = alloc_hash(&hasher, 16); + v_size = Val_long(size); + v_result = caml_alloc_tuple(2); + Store_field(v_result, 0, v_digest); + Store_field(v_result, 1, v_size); + CAMLreturn(v_result); +#endif +} + static void blake3_mini_finalize(value v_t) { blake3_hasher *hasher = Blake3_val(v_t); caml_stat_free(hasher); diff --git a/test/blackbox-tests/test-cases/dune-cache/repro-check.t b/test/blackbox-tests/test-cases/dune-cache/repro-check.t index 456a22c6ea9..69a1544ec7e 100644 --- a/test/blackbox-tests/test-cases/dune-cache/repro-check.t +++ b/test/blackbox-tests/test-cases/dune-cache/repro-check.t @@ -9,6 +9,16 @@ Test reproducibility check $ dune_build () { > dune build -j1 "$@" > } + $ dune_build_sorted_actions () { + > dune_build "$@" > dune-build-output 2>&1 || { + > status=$? + > cat dune-build-output + > return $status + > } + > sed -n '/^Warning:/,/^action at dune:6/p' dune-build-output + > sed -n '/^build /p' dune-build-output | sort + > sed '/^Warning:/,/^action at dune:6/d; /^build /d' dune-build-output + > } $ cat > dune-project < (lang dune 3.0) @@ -31,9 +41,9 @@ Both rules read [dep] but only the reproducible rule declares it as a dependency Build both, which will store the results to the cache - $ dune_build --config-file config reproducible non-reproducible - build reproducible + $ dune_build_sorted_actions --config-file config reproducible non-reproducible build non-reproducible + build reproducible Update the content and rebuild; only the reproducible rule will rerun @@ -71,13 +81,13 @@ Set 'cache-check-probability' to 1.0, which should trigger the check > (cache-check-probability 1.0) > EOF $ rm -rf _build - $ dune_build --config-file config reproducible non-reproducible + $ dune_build_sorted_actions --config-file config reproducible non-reproducible Warning: cache store error [3bb99da19b1ae86663e3b09c33f59ff6]: ((in_cache ((non-reproducible 7378fb2d7d80dc4468d6558d864f0897))) (computed ((non-reproducible 074ebdc1c3853f27c68566d8d183032c)))) after executing action at dune:6 - build reproducible build non-reproducible + build reproducible Check that the reported digests make sense @@ -127,25 +137,25 @@ Test that the environment variable and the command line flag work too $ DUNE_CACHE_CHECK_PROBABILITY=0.0 dune_build --cache=enabled reproducible non-reproducible $ rm -rf _build - $ DUNE_CACHE_CHECK_PROBABILITY=1.0 dune_build --cache=enabled reproducible non-reproducible + $ DUNE_CACHE_CHECK_PROBABILITY=1.0 dune_build_sorted_actions --cache=enabled reproducible non-reproducible Warning: cache store error [3bb99da19b1ae86663e3b09c33f59ff6]: ((in_cache ((non-reproducible 7378fb2d7d80dc4468d6558d864f0897))) (computed ((non-reproducible 074ebdc1c3853f27c68566d8d183032c)))) after executing action at dune:6 - build reproducible build non-reproducible + build reproducible $ rm -rf _build $ DUNE_CACHE_CHECK_PROBABILITY=1.0 dune_build --cache-check-probability=0.0 --cache=enabled reproducible non-reproducible $ rm -rf _build - $ dune_build --cache=enabled --cache-check-probability=1.0 reproducible non-reproducible + $ dune_build_sorted_actions --cache=enabled --cache-check-probability=1.0 reproducible non-reproducible Warning: cache store error [3bb99da19b1ae86663e3b09c33f59ff6]: ((in_cache ((non-reproducible 7378fb2d7d80dc4468d6558d864f0897))) (computed ((non-reproducible 074ebdc1c3853f27c68566d8d183032c)))) after executing action at dune:6 - build reproducible build non-reproducible + build reproducible $ dune_build --cache=enabled --cache-check-probability=8 reproducible non-reproducible Error: The reproducibility check probability must be in the range [0, 1]. diff --git a/test/expect-tests/blake3/blake3_mini_tests.ml b/test/expect-tests/blake3/blake3_mini_tests.ml index fadf5c05194..2571f9f8929 100644 --- a/test/expect-tests/blake3/blake3_mini_tests.ml +++ b/test/expect-tests/blake3/blake3_mini_tests.ml @@ -15,6 +15,12 @@ let%expect_test "128 bits fd" = [%expect {| 7a7d692dfca02a756fea9a8a77903807 |}] ;; +let%expect_test "file with size" = + let hash, size = Blake3_mini.file_with_size "somefile" in + printf "%s %d\n" (Blake3_mini.Digest.to_hex hash) size; + [%expect {| 7a7d692dfca02a756fea9a8a77903807 11 |}] +;; + let read_file name = let chan = open_in name in let size = in_channel_length chan in