Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,59 @@ def read_parquet(
)
return self.table(table_name)

@util.experimental
def read_vortex(
self,
path: str | Path,
/,
*,
table_name: str | None = None,
**kwargs: Any,
) -> ir.Table:
"""Register a vortex file as a table in the current database.

:: {.callout-note}
## This feature requires duckdb>=1.4.2
:::

Parameters
----------
path
The data source(s). May be a path to a file or a glob pattern
for vortex files.
table_name
An optional name to use for the created table. This defaults to
a sequentially generated name.
**kwargs
Additional keyword arguments passed to DuckDB's `read_vortex`
function. See https://duckdb.org/docs/stable/core_extensions/vortex
for more information.

Returns
-------
ir.Table
The just-registered table.
"""
path = util.normalize_filename(path)

if not table_name:
table_name = util.gen_name("read_vortex")

extensions = ["vortex"]
if path.startswith(("http://", "https://", "s3://")):
extensions.append("httpfs")

self._load_extensions(extensions)

options = [
sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items()
]
self._create_temp_view(
table_name,
sg.select(STAR).from_(self.compiler.f.read_vortex(path, *options)),
)
return self.table(table_name)

def read_delta(
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
Expand Down Expand Up @@ -1539,6 +1592,47 @@ def to_parquet(
with self._safe_raw_sql(copy_cmd):
pass

@util.experimental
def to_vortex(
self,
expr: ir.Table,
/,
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
**kwargs: Any,
) -> None:
"""Write the results of executing the given expression to a vortex file.

:: {.callout-note}
## This feature requires duckdb>=1.4.2
:::

This method is eager and will execute the associated expression
immediately.

Parameters
----------
expr
The ibis expression to execute and persist to a vortex file.
path
The data source. A string or Path to the vortex file.
params
Mapping of scalar parameter expressions to value.
**kwargs
DuckDB Vortex writer arguments. See https://duckdb.org/docs/stable/core_extensions/vortex
for more information.

"""

self._run_pre_execute_hooks(expr)
self._load_extensions(["vortex"])
query = self.compile(expr, params=params)
args = ["FORMAT vortex", *(f"{k.upper()} {v!r}" for k, v in kwargs.items())]
copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})"
with self._safe_raw_sql(copy_cmd):
pass

@util.experimental
def to_csv(
self,
Expand Down
39 changes: 39 additions & 0 deletions ibis/backends/duckdb/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,45 @@ def test_memtable_null_column_parquet_dtype_roundtrip(con, tmp_path):
assert before.a.type() == after.a.type()


@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
)
def test_read_vortex(con, data_dir, tmp_path):
# convert the contents of a parquet file to vortex, then read it back
t = con.read_parquet(data_dir / "parquet" / "functional_alltypes.parquet")
path = tmp_path / "functional_alltypes.vortex"
con.to_vortex(t, path)
vt = con.read_vortex(path)
assert vt.count().execute() == t.count().execute()


@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
)
def test_roundtrip_vortex(con, tmp_path):
original = ibis.memtable({"x": [1, 2, 3], "y": ["a", "b", "c"]})
path = tmp_path / "test.vortex"
con.to_vortex(original, path)
result = con.read_vortex(path)
assert result.count().execute() == 3
assert result.columns == ("x", "y")


@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
)
def test_roundtrip_vortex_with_table_name(con, tmp_path):
original = ibis.memtable({"a": [10, 20, 30]})
path = tmp_path / "named.vortex"
con.to_vortex(original, path)
result = con.read_vortex(path, table_name="my_vortex_table")
assert result.count().execute() == 3
assert "my_vortex_table" in con.list_tables()


def test_read_json_no_auto_detection(con, tmp_path):
ndjson_data = """
{"year": 2007}
Expand Down