diff --git a/python/dolma/core/ft_dataset.py b/python/dolma/core/ft_dataset.py index 40a63d7e..8ecbdf49 100644 --- a/python/dolma/core/ft_dataset.py +++ b/python/dolma/core/ft_dataset.py @@ -100,7 +100,7 @@ def process_file(config: Config, q: "Queue[str]", flag: Event, label: str, fn): def write_results(config: Config, q: "Queue[str]", flag: Event): written = 0 - with smart_open.open(config.out_path, "wb") as o: + with smart_open.open(config.out_path, "w") as o: while True: msg = q.get() @@ -108,7 +108,7 @@ def write_results(config: Config, q: "Queue[str]", flag: Event): break if not flag.is_set(): - o.write(q.get()) + o.write(msg) o.write("\n") written += 1