diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index f4778f2b21f1..84cf5eac2957 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -4,7 +4,7 @@ import random import subprocess import sys -from datetime import datetime +from datetime import datetime, timezone import duckdb import numpy as np @@ -14,6 +14,7 @@ from pytest import param import ibis +from ibis import _ import ibis.common.exceptions as com import ibis.expr.datatypes as dt from ibis.conftest import LINUX, SANDBOXED, not_windows @@ -201,6 +202,59 @@ def test_to_other_sql(con, snapshot): snapshot.assert_match(sql, "out.sql") +def test_timezone_cast_extracts_and_time(): + con = ibis.duckdb.connect() + t = ibis.memtable( + {"x": [datetime(2023, 1, 2, 0, 0, tzinfo=timezone.utc)]}, + schema=ibis.schema({"x": "timestamp('UTC')"}), + ) + expr = t.select( + ams_hour=t.x.cast("timestamp('Europe/Amsterdam')").hour(), + utc_hour=t.x.cast("timestamp('UTC')").hour(), + ams_time=t.x.cast("timestamp('Europe/Amsterdam')").time(), + utc_time=t.x.cast("timestamp('UTC')").time(), + ) + + result = con.execute(expr) + + assert result.ams_hour.iat[0] == 1 + assert result.utc_hour.iat[0] == 0 + assert str(result.ams_time.iat[0]) == "01:00:00" + assert str(result.utc_time.iat[0]) == "00:00:00" + + +def test_timezone_cast_epoch_seconds_uses_timezone_instant(): + con = ibis.duckdb.connect() + t = ibis.memtable({"a": [1]}) + expr = t.select(var=ibis.literal("2023-01-02")).mutate( + es_ams=ibis.timestamp(_.var, timezone="Europe/Amsterdam").epoch_seconds(), + es_utc=ibis.timestamp(_.var, timezone="UTC").epoch_seconds(), + es_ams2=_.var.cast("timestamp('Europe/Amsterdam')").epoch_seconds(), + es_utc2=_.var.cast("timestamp('UTC')").epoch_seconds(), + ) + + result = con.execute(expr).iloc[0] + + assert result.es_ams == 1672614000 + assert result.es_utc == 1672617600 + assert result.es_ams2 == 1672614000 + assert result.es_utc2 == 1672617600 + + +def test_to_trino_sql_timezone_cast_uses_timezone_functions(): + t = ibis.memtable({"x": ["2023-01-02"]}) + expr = t.select( + casted=t.x.cast("timestamp('Europe/Paris')"), + hour=t.x.cast("timestamp('Europe/Paris')").hour(), + time=t.x.cast("timestamp('Europe/Paris')").time(), + ) + + sql = ibis.to_sql(expr, dialect="trino") + + assert "AT_TIMEZONE(" in sql + assert "WITH_TIMEZONE(" in sql + + def test_insert_preserves_column_case(con): name1 = ibis.util.guid() name2 = ibis.util.guid() diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index 82cb686fdac2..f4ea8952ac15 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -392,12 +392,41 @@ def visit_CountDistinctStar(self, op, *, where, arg): ) return self.agg.count(sge.Distinct(expressions=[row]), where=where) + def _localize_timestamp_for_extract(self, op, *, arg): + if op.arg.dtype.is_timestamp() and (timezone := op.arg.dtype.timezone) is not None: + return self.f.timezone(timezone, arg) + return arg + + def visit_Time(self, op, *, arg): + arg = self._localize_timestamp_for_extract(op, arg=arg) + return super().visit_Time(op, arg=arg) + + def visit_ExtractEpochSeconds(self, op, *, arg): + if op.arg.dtype.is_timestamp() and op.arg.dtype.timezone is not None: + return self.f.epoch(arg) + return super().visit_ExtractEpochSeconds(op, arg=arg) + + def visit_ExtractHour(self, op, *, arg): + return self.f.extract("hour", self._localize_timestamp_for_extract(op, arg=arg)) + + def visit_ExtractMinute(self, op, *, arg): + return self.f.extract( + "minute", self._localize_timestamp_for_extract(op, arg=arg) + ) + + def visit_ExtractSecond(self, op, *, arg): + return self.f.extract( + "second", self._localize_timestamp_for_extract(op, arg=arg) + ) + def visit_ExtractMillisecond(self, op, *, arg): + arg = self._localize_timestamp_for_extract(op, arg=arg) return self.f.mod(self.f.extract("ms", arg), 1_000) # DuckDB extracts subminute microseconds and milliseconds # so we have to finesse it a little bit def visit_ExtractMicrosecond(self, op, *, arg): + arg = self._localize_timestamp_for_extract(op, arg=arg) return self.f.mod(self.f.extract("us", arg), 1_000_000) def visit_TimestampFromUNIX(self, op, *, arg, unit): @@ -428,6 +457,15 @@ def visit_Cast(self, op, *, arg, to): return func(sg.cast(arg, to=self.type_mapper.from_ibis(dt.int32))) elif to.is_timestamp() and dtype.is_numeric(): return self.f.to_timestamp(arg) + elif to.is_timestamp() and to.timezone is not None and ( + dtype.is_string() or dtype.is_date() + ): + # DuckDB TIMESTAMPTZ casts from strings/dates do not retain the target + # timezone intent by default, so parse as naive timestamp and then + # localize into the requested timezone. + return self.f.timezone( + to.timezone, self.cast(arg, dt.Timestamp(scale=to.scale)) + ) elif to.is_geospatial(): if dtype.is_binary(): return self.f.st_geomfromwkb(arg) diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index 24c7a0dece70..4ec06362f4ce 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -585,9 +585,21 @@ def visit_Cast(self, op, *, arg, to): if from_.is_integer(): return self.f.from_unixtime(arg, tz) else: - return self.f.from_unixtime_nanos( + out = self.f.from_unixtime_nanos( self.cast(arg, dt.Decimal(38, 9)) * 1_000_000_000 ) + return self.f.at_timezone(out, tz) + + if to.is_timestamp() and (timezone := to.timezone) is not None: + if from_.is_string() or from_.is_date(): + arg = self.cast(arg, dt.Timestamp(scale=to.scale)) + from_ = dt.Timestamp(scale=to.scale) + + if from_.is_timestamp(): + if from_.timezone is None: + arg = self.f.with_timezone(arg, "UTC") + return self.f.at_timezone(arg, timezone) + return super().visit_Cast(op, arg=arg, to=to) def visit_CountDistinctStar(self, op, *, arg, where):