Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 13 additions & 33 deletions src/dune_digest/digest.ml
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ let open_for_digest file =
|> Fd.unsafe_of_unix_file_descr
;;

(* CR-someday rgrinberg: maybe this should exist in blake3_mini? *)
let zero = lazy (Hasher.with_singleton (fun _f -> ()))

let digest_and_close_fd fd =
let start = Counter.Timer.start () in
let res =
Expand All @@ -104,36 +101,19 @@ let file file =
digest_and_close_fd fd
;;

(* Throttle concurrent [file_async] calls so an unbounded [parallel_map] over
many targets (e.g. the relocatable compiler) does not exhaust the process
fd limit. 100 sits comfortably above the background thread pool's worker
count (so digesting never starves it) while staying well under the typical
1024 fd soft limit; raising that limit is hazardous because some code
still falls back to [select()], which has a hard FD_SETSIZE (1024) cap. *)
let digest_throttle = lazy (Fiber.Throttle.create 100)
let async_digest_minimum = 1_000

let file_async file =
Fiber.Throttle.run (Lazy.force digest_throttle) ~f:(fun () ->
let open Fiber.O in
let* () = Fiber.return () in
let fd = open_for_digest file in
Counter.incr Metrics.Digest.File.count;
let size =
match Unix.fstat (Fd.unsafe_to_unix_file_descr fd) with
| exception exn ->
Fd.close fd;
raise exn
| stat -> stat.st_size
in
Counter.add Metrics.Digest.File.bytes size;
if size = 0
then
let+ () = Fiber.return @@ Fd.close fd in
Lazy.force zero
else if size < async_digest_minimum
then Fiber.return (digest_and_close_fd fd)
else Dune_scheduler.Scheduler.async_exn (fun () -> digest_and_close_fd fd))
let file_async =
let digest_throttle = lazy (Fiber.Throttle.create 32) in
fun file ->
Fiber.Throttle.run (Lazy.force digest_throttle) ~f:(fun () ->
let open Fiber.O in
let start = Counter.Timer.start () in
let+ digest, size =
Dune_scheduler.Scheduler.async_exn (fun () -> Blake3_mini.file_with_size file)
in
Counter.incr Metrics.Digest.File.count;
Counter.add Metrics.Digest.File.bytes size;
Counter.Timer.stop Metrics.Digest.File.time start;
digest)
;;

let equal = Blake3_mini.Digest.equal
Expand Down
22 changes: 22 additions & 0 deletions src/ocaml-blake3-mini/blake3_mini.ml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,25 @@ external feed_bigstring_release_lock
= "blake3_mini_feed_bigstring_unlock"

external fd : Unix.file_descr -> string = "blake3_mini_fd"
external file_with_size_unix : string -> Digest.t * int = "blake3_mini_file_with_size"

let file_with_size_ocaml file =
let digest_fd = fd in
let fd = Unix.openfile file [ Unix.O_RDONLY; Unix.O_SHARE_DELETE; Unix.O_CLOEXEC ] 0 in
match
let size = (Unix.fstat fd).st_size in
let digest = digest_fd fd in
digest, size
with
| exception exn ->
let bt = Printexc.get_raw_backtrace () in
(match Unix.close fd with
| () -> ()
| exception _ -> ());
Printexc.raise_with_backtrace exn bt
| res ->
Unix.close fd;
res
;;

let file_with_size = if Sys.win32 then file_with_size_ocaml else file_with_size_unix
1 change: 1 addition & 0 deletions src/ocaml-blake3-mini/blake3_mini.mli
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ val feed_bigstring_release_lock

val digest : t -> Digest.t
val fd : Unix.file_descr -> Digest.t
val file_with_size : string -> Digest.t * int
87 changes: 87 additions & 0 deletions src/ocaml-blake3-mini/blake3_stubs.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
#include <errno.h>
#ifndef _WIN32
#ifndef O_CLOEXEC
#define O_CLOEXEC 0
#endif
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#endif

#include <caml/alloc.h>
#include <caml/bigarray.h>
#include <caml/custom.h>
#include <caml/fail.h>
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include <caml/osdeps.h>
#include <caml/threads.h>
#include <caml/unixsupport.h>

Expand Down Expand Up @@ -55,6 +64,84 @@ CAMLprim value blake3_mini_fd(value v_fd) {
CAMLreturn(v_ret);
}

CAMLprim value blake3_mini_file_with_size(value v_path) {
CAMLparam1(v_path);
#ifdef _WIN32
caml_failwith("blake3_mini_file_with_size is not implemented on Windows");
#else
CAMLlocal3(v_digest, v_size, v_result);
caml_unix_check_path(v_path, "open");
char_os *path = caml_stat_strdup_to_os(String_val(v_path));
blake3_hasher hasher;
blake3_hasher_init(&hasher);
int err = 0;
const char *err_op = NULL;
int fd = -1;
intnat size = 0;

caml_release_runtime_system();

while (1) {
fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd != -1 || errno != EINTR)
break;
}
if (fd == -1) {
err = errno;
err_op = "open";
goto done;
}

struct stat st;
while (fstat(fd, &st) == -1) {
if (errno == EINTR)
continue;
err = errno;
err_op = "fstat";
goto close;
}
size = st.st_size;

char buffer[UNIX_BUFFER_SIZE];
ssize_t bytes_read;
while (1) {
bytes_read = read(fd, buffer, sizeof(buffer));
if (bytes_read == 0) {
break;
} else if (bytes_read < 0) {
if (errno == EINTR)
continue;
err = errno;
err_op = "read";
break;
} else {
blake3_hasher_update(&hasher, buffer, bytes_read);
}
}

close:
if (close(fd) == -1 && err == 0) {
err = errno;
err_op = "close";
}

done:
caml_acquire_runtime_system();
caml_stat_free(path);
if (err != 0) {
errno = err;
uerror(err_op, v_path);
}

v_digest = alloc_hash(&hasher, 16);
v_size = Val_long(size);
v_result = caml_alloc_tuple(2);
Store_field(v_result, 0, v_digest);
Store_field(v_result, 1, v_size);
CAMLreturn(v_result);
#endif
}

static void blake3_mini_finalize(value v_t) {
blake3_hasher *hasher = Blake3_val(v_t);
caml_stat_free(hasher);
Expand Down
26 changes: 18 additions & 8 deletions test/blackbox-tests/test-cases/dune-cache/repro-check.t
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ Test reproducibility check
$ dune_build () {
> dune build -j1 "$@"
> }
$ dune_build_sorted_actions () {
> dune_build "$@" > dune-build-output 2>&1 || {
> status=$?
> cat dune-build-output
> return $status
> }
> sed -n '/^Warning:/,/^action at dune:6/p' dune-build-output
> sed -n '/^build /p' dune-build-output | sort
> sed '/^Warning:/,/^action at dune:6/d; /^build /d' dune-build-output
> }

$ cat > dune-project <<EOF
> (lang dune 3.0)
Expand All @@ -31,9 +41,9 @@ Both rules read [dep] but only the reproducible rule declares it as a dependency

Build both, which will store the results to the cache

$ dune_build --config-file config reproducible non-reproducible
build reproducible
$ dune_build_sorted_actions --config-file config reproducible non-reproducible
build non-reproducible
build reproducible

Update the content and rebuild; only the reproducible rule will rerun

Expand Down Expand Up @@ -71,13 +81,13 @@ Set 'cache-check-probability' to 1.0, which should trigger the check
> (cache-check-probability 1.0)
> EOF
$ rm -rf _build
$ dune_build --config-file config reproducible non-reproducible
$ dune_build_sorted_actions --config-file config reproducible non-reproducible
Warning: cache store error [3bb99da19b1ae86663e3b09c33f59ff6]: ((in_cache
((non-reproducible 7378fb2d7d80dc4468d6558d864f0897))) (computed
((non-reproducible 074ebdc1c3853f27c68566d8d183032c)))) after executing
action at dune:6
build reproducible
build non-reproducible
build reproducible

Check that the reported digests make sense

Expand Down Expand Up @@ -127,25 +137,25 @@ Test that the environment variable and the command line flag work too
$ DUNE_CACHE_CHECK_PROBABILITY=0.0 dune_build --cache=enabled reproducible non-reproducible

$ rm -rf _build
$ DUNE_CACHE_CHECK_PROBABILITY=1.0 dune_build --cache=enabled reproducible non-reproducible
$ DUNE_CACHE_CHECK_PROBABILITY=1.0 dune_build_sorted_actions --cache=enabled reproducible non-reproducible
Warning: cache store error [3bb99da19b1ae86663e3b09c33f59ff6]: ((in_cache
((non-reproducible 7378fb2d7d80dc4468d6558d864f0897))) (computed
((non-reproducible 074ebdc1c3853f27c68566d8d183032c)))) after executing
action at dune:6
build reproducible
build non-reproducible
build reproducible

$ rm -rf _build
$ DUNE_CACHE_CHECK_PROBABILITY=1.0 dune_build --cache-check-probability=0.0 --cache=enabled reproducible non-reproducible

$ rm -rf _build
$ dune_build --cache=enabled --cache-check-probability=1.0 reproducible non-reproducible
$ dune_build_sorted_actions --cache=enabled --cache-check-probability=1.0 reproducible non-reproducible
Warning: cache store error [3bb99da19b1ae86663e3b09c33f59ff6]: ((in_cache
((non-reproducible 7378fb2d7d80dc4468d6558d864f0897))) (computed
((non-reproducible 074ebdc1c3853f27c68566d8d183032c)))) after executing
action at dune:6
build reproducible
build non-reproducible
build reproducible

$ dune_build --cache=enabled --cache-check-probability=8 reproducible non-reproducible
Error: The reproducibility check probability must be in the range [0, 1].
Expand Down
6 changes: 6 additions & 0 deletions test/expect-tests/blake3/blake3_mini_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ let%expect_test "128 bits fd" =
[%expect {| 7a7d692dfca02a756fea9a8a77903807 |}]
;;

let%expect_test "file with size" =
let hash, size = Blake3_mini.file_with_size "somefile" in
printf "%s %d\n" (Blake3_mini.Digest.to_hex hash) size;
[%expect {| 7a7d692dfca02a756fea9a8a77903807 11 |}]
;;

let read_file name =
let chan = open_in name in
let size = in_channel_length chan in
Expand Down
Loading