diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 7e46b9bc3b24..0cf52a69d9b5 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -841,6 +841,59 @@ def read_parquet( ) return self.table(table_name) + @util.experimental + def read_vortex( + self, + path: str | Path, + /, + *, + table_name: str | None = None, + **kwargs: Any, + ) -> ir.Table: + """Register a vortex file as a table in the current database. + + :: {.callout-note} + ## This feature requires duckdb>=1.4.2 + ::: + + Parameters + ---------- + path + The data source(s). May be a path to a file or a glob pattern + for vortex files. + table_name + An optional name to use for the created table. This defaults to + a sequentially generated name. + **kwargs + Additional keyword arguments passed to DuckDB's `read_vortex` + function. See https://duckdb.org/docs/stable/core_extensions/vortex + for more information. + + Returns + ------- + ir.Table + The just-registered table. + """ + path = util.normalize_filename(path) + + if not table_name: + table_name = util.gen_name("read_vortex") + + extensions = ["vortex"] + if path.startswith(("http://", "https://", "s3://")): + extensions.append("httpfs") + + self._load_extensions(extensions) + + options = [ + sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() + ] + self._create_temp_view( + table_name, + sg.select(STAR).from_(self.compiler.f.read_vortex(path, *options)), + ) + return self.table(table_name) + def read_delta( self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any ) -> ir.Table: @@ -1539,6 +1592,47 @@ def to_parquet( with self._safe_raw_sql(copy_cmd): pass + @util.experimental + def to_vortex( + self, + expr: ir.Table, + /, + path: str | Path, + *, + params: Mapping[ir.Scalar, Any] | None = None, + **kwargs: Any, + ) -> None: + """Write the results of executing the given expression to a vortex file. + + :: {.callout-note} + ## This feature requires duckdb>=1.4.2 + ::: + + This method is eager and will execute the associated expression + immediately. + + Parameters + ---------- + expr + The ibis expression to execute and persist to a vortex file. + path + The data source. A string or Path to the vortex file. + params + Mapping of scalar parameter expressions to value. + **kwargs + DuckDB Vortex writer arguments. See https://duckdb.org/docs/stable/core_extensions/vortex + for more information. + + """ + + self._run_pre_execute_hooks(expr) + self._load_extensions(["vortex"]) + query = self.compile(expr, params=params) + args = ["FORMAT vortex", *(f"{k.upper()} {v!r}" for k, v in kwargs.items())] + copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})" + with self._safe_raw_sql(copy_cmd): + pass + @util.experimental def to_csv( self, diff --git a/ibis/backends/duckdb/tests/test_io.py b/ibis/backends/duckdb/tests/test_io.py index 85a0decc3105..c2a0080b8854 100644 --- a/ibis/backends/duckdb/tests/test_io.py +++ b/ibis/backends/duckdb/tests/test_io.py @@ -417,6 +417,45 @@ def test_memtable_null_column_parquet_dtype_roundtrip(con, tmp_path): assert before.a.type() == after.a.type() +@pytest.mark.xfail( + LINUX and SANDBOXED, + reason="nix on linux cannot download duckdb extensions or data due to sandboxing", +) +def test_read_vortex(con, data_dir, tmp_path): + # convert the contents of a parquet file to vortex, then read it back + t = con.read_parquet(data_dir / "parquet" / "functional_alltypes.parquet") + path = tmp_path / "functional_alltypes.vortex" + con.to_vortex(t, path) + vt = con.read_vortex(path) + assert vt.count().execute() == t.count().execute() + + +@pytest.mark.xfail( + LINUX and SANDBOXED, + reason="nix on linux cannot download duckdb extensions or data due to sandboxing", +) +def test_roundtrip_vortex(con, tmp_path): + original = ibis.memtable({"x": [1, 2, 3], "y": ["a", "b", "c"]}) + path = tmp_path / "test.vortex" + con.to_vortex(original, path) + result = con.read_vortex(path) + assert result.count().execute() == 3 + assert result.columns == ("x", "y") + + +@pytest.mark.xfail( + LINUX and SANDBOXED, + reason="nix on linux cannot download duckdb extensions or data due to sandboxing", +) +def test_roundtrip_vortex_with_table_name(con, tmp_path): + original = ibis.memtable({"a": [10, 20, 30]}) + path = tmp_path / "named.vortex" + con.to_vortex(original, path) + result = con.read_vortex(path, table_name="my_vortex_table") + assert result.count().execute() == 3 + assert "my_vortex_table" in con.list_tables() + + def test_read_json_no_auto_detection(con, tmp_path): ndjson_data = """ {"year": 2007}