From 40edc5adbd2c086305e59f34420ed69795072ca1 Mon Sep 17 00:00:00 2001 From: ddl-rliu Date: Thu, 5 Mar 2026 14:37:04 -0800 Subject: [PATCH 1/5] Add FileDownloadConfig annotation for FlyteFile inputs Port new BlobType fields file_extension and enable_legacy_filename to flytekit. FlyteFile inputs can be annotated with the FileDownloadConfig annotation to configure the file extension to use during the copilot download phase. e.g. ```python def t1(file: Annotated[FlyteFile, FileDownloadConfig(file_extension="csv")]): ... # copilot downloads the file to e.g. /inputs/file.csv versus... def t1(file: FlyteFile["csv"]): ... # copilot downloads the file to e.g. /inputs/file ``` Signed-off-by: ddl-rliu --- flytekit/core/type_engine.py | 42 +++++++++++++++++++ flytekit/models/core/types.py | 40 ++++++++++++++++-- flytekit/types/file/file.py | 35 ++++++++++++++-- pyproject.toml | 2 +- tests/flytekit/unit/core/test_flyte_file.py | 14 ++++++- tests/flytekit/unit/models/core/test_types.py | 22 ++++++++++ 6 files changed, 146 insertions(+), 9 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 9993c98479..d2f9b0a94a 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -105,6 +105,48 @@ def get_batch_size(t: Type) -> Optional[int]: return None +class FileDownloadConfig: + """ + This is used to annotate a FlyteFile when we want to download the file with a specific extension. For example, + + ```python + # ContainerTask + def t1(file: Annotated[FlyteFile, FileDownloadConfig(file_extension="csv")]): + ... # copilot downloads the file to e.g. /inputs/file.csv + + versus... + + def t1(file: FlyteFile["csv"]): + ... # copilot downloads the file to e.g. /inputs/file + ``` + + file_extension: (Default is "") The file extension (e.g. "csv", "parquet") to use during copilot download. + enable_legacy_filename: (Default is False) When true and file_extension is non-empty, the copilot download phase + writes the blob to both the full path (with extension) and the old path (without extension), preserving backward compatibility for + workflows with tasks that may read from both. + """ + + def __init__(self, file_extension: str = "", enable_legacy_filename: bool = False): + self._file_extension = file_extension + self._enable_legacy_filename = enable_legacy_filename + + @property + def file_extension(self) -> str: + return self._file_extension + + @property + def enable_legacy_filename(self) -> bool: + return self._enable_legacy_filename + + +def get_file_download_config(t: Type) -> Optional[FileDownloadConfig]: + if is_annotated(t): + for arg in get_args(t): + if isinstance(arg, FileDownloadConfig): + return arg + return None + + def modify_literal_uris(lit: Literal): """ Modifies the literal object recursively to replace the URIs with the native paths in case they are of diff --git a/flytekit/models/core/types.py b/flytekit/models/core/types.py index 4508961bbc..e01068f95e 100644 --- a/flytekit/models/core/types.py +++ b/flytekit/models/core/types.py @@ -38,13 +38,19 @@ class BlobDimensionality(object): SINGLE = _types_pb2.BlobType.SINGLE MULTIPART = _types_pb2.BlobType.MULTIPART - def __init__(self, format, dimensionality): + def __init__(self, format, dimensionality, file_extension="", enable_legacy_filename=False): """ :param Text format: A string describing the format of the underlying blob data. :param int dimensionality: An integer from BlobType.BlobDimensionality enum + :param Text file_extension: The file extension (e.g. "csv", "parquet") to use + during copilot download, e.g. "csv", "parquet". Empty by default. + :param bool enable_legacy_filename: When True and file_extension is set, the copilot + download phase writes the blob to both the extended path and the base path. """ self._format = format self._dimensionality = dimensionality + self._file_extension = file_extension + self._enable_legacy_filename = enable_legacy_filename @property def format(self): @@ -62,11 +68,34 @@ def dimensionality(self): """ return self._dimensionality + @property + def file_extension(self): + """ + The file extension (e.g. "csv", "parquet") to use during copilot download. + Default is "", which means no extension is appended. + :rtype: Text + """ + return self._file_extension + + @property + def enable_legacy_filename(self): + """ + When True and file_extension is set, the copilot download writes the blob to + both the full path (with extension) and the old path (without extension). + :rtype: bool + """ + return self._enable_legacy_filename + def to_flyte_idl(self): """ :rtype: flyteidl.core.types_pb2.BlobType """ - return _types_pb2.BlobType(format=self.format, dimensionality=self.dimensionality) + return _types_pb2.BlobType( + format=self.format, + dimensionality=self.dimensionality, + file_extension=self._file_extension, + enable_legacy_filename=self._enable_legacy_filename, + ) @classmethod def from_flyte_idl(cls, proto): @@ -74,4 +103,9 @@ def from_flyte_idl(cls, proto): :param flyteidl.core.types_pb2.BlobType proto: :rtype: BlobType """ - return cls(format=proto.format, dimensionality=proto.dimensionality) + return cls( + format=proto.format, + dimensionality=proto.dimensionality, + file_extension=proto.file_extension, + enable_legacy_filename=proto.enable_legacy_filename, + ) diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 47915add8e..bdf772b9fd 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -24,6 +24,7 @@ AsyncTypeTransformer, TypeEngine, TypeTransformerFailedError, + get_file_download_config, get_underlying_type, ) from flytekit.exceptions.user import FlyteAssertion @@ -477,8 +478,26 @@ def get_format(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: return "" return cast(FlyteFile, t).extension() - def _blob_type(self, format: str) -> BlobType: - return BlobType(format=format, dimensionality=BlobType.BlobDimensionality.SINGLE) + @staticmethod + def get_file_extension(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: + if t is os.PathLike: + return "" + file_download_config = get_file_download_config(t) + if file_download_config is None: + return "" + return file_download_config.file_extension or "" + + @staticmethod + def get_enable_legacy_filename(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: + if t is os.PathLike: + return False + file_download_config = get_file_download_config(t) + if file_download_config is None: + return False + return file_download_config.enable_legacy_filename or False + + def _blob_type(self, format: str, file_extension: str = "", enable_legacy_filename: bool = False) -> BlobType: + return BlobType(format=format, dimensionality=BlobType.BlobDimensionality.SINGLE, file_extension=file_extension, enable_legacy_filename=enable_legacy_filename) def assert_type( self, t: typing.Union[typing.Type[FlyteFile], os.PathLike], v: typing.Union[FlyteFile, os.PathLike, str] @@ -491,7 +510,11 @@ def assert_type( ) def get_literal_type(self, t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> LiteralType: - return LiteralType(blob=self._blob_type(format=FlyteFilePathTransformer.get_format(t))) + return LiteralType(blob=self._blob_type( + format=FlyteFilePathTransformer.get_format(t), + file_extension=FlyteFilePathTransformer.get_file_extension(t), + enable_legacy_filename=FlyteFilePathTransformer.get_enable_legacy_filename(t), + )) def get_mime_type_from_extension(self, extension: str) -> typing.Union[str, typing.Sequence[str]]: extension_to_mime_type = { @@ -565,7 +588,11 @@ async def async_to_literal( raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike") # information used by all cases - meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type))) + meta = BlobMetadata(type=self._blob_type( + format=FlyteFilePathTransformer.get_format(python_type), + file_extension=FlyteFilePathTransformer.get_file_extension(python_type), + enable_legacy_filename=FlyteFilePathTransformer.get_enable_legacy_filename(python_type), + )) if isinstance(python_val, FlyteFile): # Cast the source path to str type to avoid error raised when the source path is used as the blob uri, diff --git a/pyproject.toml b/pyproject.toml index 82b8c6c054..7b02665795 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "diskcache>=5.2.1", "docker>=4.0.0", "docstring-parser>=0.9.0", - "flyteidl>=1.16.1,<2.0.0a0", + "flyteidl @ git+https://github.com/dominodatalab/flyteidl.git@af517f6", "fsspec>=2023.3.0", # Bug in 2025.5.0, 2025.5.0post1 https://github.com/fsspec/gcsfs/issues/687 # Bug in 2024.2.0 https://github.com/fsspec/gcsfs/pull/643 diff --git a/tests/flytekit/unit/core/test_flyte_file.py b/tests/flytekit/unit/core/test_flyte_file.py index fb0903c567..9681a990fb 100644 --- a/tests/flytekit/unit/core/test_flyte_file.py +++ b/tests/flytekit/unit/core/test_flyte_file.py @@ -17,7 +17,7 @@ from flytekit.core.hash import HashMethod from flytekit.core.launch_plan import LaunchPlan from flytekit.core.task import task -from flytekit.core.type_engine import TypeEngine +from flytekit.core.type_engine import FileDownloadConfig, TypeEngine from flytekit.core.workflow import workflow from flytekit.models.core.types import BlobType from flytekit.models.literals import LiteralMap, Blob, BlobMetadata @@ -764,6 +764,18 @@ def test_headers(): assert len(FlyteFilePathTransformer.get_additional_headers(".gz")) == 1 +def test_transform_flytefile_with_file_download_config(): + csv_file_no_config = FlyteFile["csv"] + lt = FlyteFilePathTransformer().get_literal_type(csv_file_no_config) + assert lt.blob.file_extension == "" + assert lt.blob.enable_legacy_filename == False + + legacy_file = Annotated[FlyteFile["csv"], FileDownloadConfig(file_extension="csv", enable_legacy_filename=True)] + lt = FlyteFilePathTransformer().get_literal_type(legacy_file) + assert lt.blob.file_extension == "csv" + assert lt.blob.enable_legacy_filename == True + + def test_new_remote_file(): nf = FlyteFile.new_remote_file(name="foo.txt") assert isinstance(nf, FlyteFile) diff --git a/tests/flytekit/unit/models/core/test_types.py b/tests/flytekit/unit/models/core/test_types.py index 21d6cea396..bf4124eb67 100644 --- a/tests/flytekit/unit/models/core/test_types.py +++ b/tests/flytekit/unit/models/core/test_types.py @@ -15,11 +15,33 @@ def test_blob_type(): ) assert o.format == "csv" assert o.dimensionality == _types.BlobType.BlobDimensionality.SINGLE + assert o.file_extension == "" + assert o.enable_legacy_filename == False o2 = _types.BlobType.from_flyte_idl(o.to_flyte_idl()) assert o == o2 assert o2.format == "csv" assert o2.dimensionality == _types.BlobType.BlobDimensionality.SINGLE + assert o2.file_extension == "" + assert o2.enable_legacy_filename == False + + o = _types.BlobType( + format="csv", + dimensionality=_types.BlobType.BlobDimensionality.SINGLE, + file_extension="csv", + enable_legacy_filename=True, + ) + assert o.format == "csv" + assert o.dimensionality == _types.BlobType.BlobDimensionality.SINGLE + assert o.file_extension == "csv" + assert o.enable_legacy_filename == True + + o2 = _types.BlobType.from_flyte_idl(o.to_flyte_idl()) + assert o == o2 + assert o2.format == "csv" + assert o2.dimensionality == _types.BlobType.BlobDimensionality.SINGLE + assert o2.file_extension == "csv" + assert o2.enable_legacy_filename == True def test_enum_type(): From 6fa120794bc4bd3b5e9a53689ec332e11ccc2cde Mon Sep 17 00:00:00 2001 From: ddl-rliu Date: Wed, 25 Mar 2026 14:46:19 -0700 Subject: [PATCH 2/5] Add regex match for file_extension Signed-off-by: ddl-rliu --- flytekit/core/type_engine.py | 6 ++++++ tests/flytekit/unit/core/test_flyte_file.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index d2f9b0a94a..8e22b53f90 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -10,6 +10,7 @@ import json import mimetypes import os +import re import sys import textwrap import threading @@ -129,6 +130,11 @@ def t1(file: FlyteFile["csv"]): def __init__(self, file_extension: str = "", enable_legacy_filename: bool = False): self._file_extension = file_extension self._enable_legacy_filename = enable_legacy_filename + + if self._file_extension is not "": + pattern = r"^[a-zA-Z0-9]+(\.[a-zA-Z0-9]+)*$" + if not re.match(pattern, self._file_extension): + raise ValueError(f"Invalid file extension: {self._file_extension}") @property def file_extension(self) -> str: diff --git a/tests/flytekit/unit/core/test_flyte_file.py b/tests/flytekit/unit/core/test_flyte_file.py index 9681a990fb..91158fb229 100644 --- a/tests/flytekit/unit/core/test_flyte_file.py +++ b/tests/flytekit/unit/core/test_flyte_file.py @@ -776,6 +776,22 @@ def test_transform_flytefile_with_file_download_config(): assert lt.blob.enable_legacy_filename == True +def test_file_download_config_valid_compound_extension(): + config = FileDownloadConfig(file_extension="tar.gz") + assert config.file_extension == "tar.gz" + + +@pytest.mark.parametrize("bad_ext", [ + ".csv", + "my file", + "../../escape", + "csv!", +]) +def test_file_download_config_rejects_invalid_extensions(bad_ext): + with pytest.raises(ValueError, match="Invalid file extension"): + FileDownloadConfig(file_extension=bad_ext) + + def test_new_remote_file(): nf = FlyteFile.new_remote_file(name="foo.txt") assert isinstance(nf, FlyteFile) From 04a502cc1f8cea86f03bb6634bef3cb182656de8 Mon Sep 17 00:00:00 2001 From: ddl-rliu Date: Thu, 9 Apr 2026 15:08:17 -0700 Subject: [PATCH 3/5] Remove enable_legacy_filename Signed-off-by: ddl-rliu --- flytekit/core/type_engine.py | 39 +++++++------------ flytekit/models/core/types.py | 16 +------- flytekit/types/file/file.py | 23 +++-------- pyproject.toml | 2 +- tests/flytekit/unit/core/test_flyte_file.py | 24 ++++++------ tests/flytekit/unit/models/core/test_types.py | 5 --- 6 files changed, 34 insertions(+), 75 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 8e22b53f90..f692b2ced1 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -106,13 +106,13 @@ def get_batch_size(t: Type) -> Optional[int]: return None -class FileDownloadConfig: +class FileExtension: """ This is used to annotate a FlyteFile when we want to download the file with a specific extension. For example, ```python # ContainerTask - def t1(file: Annotated[FlyteFile, FileDownloadConfig(file_extension="csv")]): + def t1(file: Annotated[FlyteFile, FileExtension("csv")]): ... # copilot downloads the file to e.g. /inputs/file.csv versus... @@ -121,35 +121,26 @@ def t1(file: FlyteFile["csv"]): ... # copilot downloads the file to e.g. /inputs/file ``` - file_extension: (Default is "") The file extension (e.g. "csv", "parquet") to use during copilot download. - enable_legacy_filename: (Default is False) When true and file_extension is non-empty, the copilot download phase - writes the blob to both the full path (with extension) and the old path (without extension), preserving backward compatibility for - workflows with tasks that may read from both. + val: (Default is "") The file extension (e.g. "csv", "parquet") to use during copilot download. """ - def __init__(self, file_extension: str = "", enable_legacy_filename: bool = False): - self._file_extension = file_extension - self._enable_legacy_filename = enable_legacy_filename + def __init__(self, val: str = ""): + self._val = val + + pattern = r"^[a-zA-Z0-9]+(\.[a-zA-Z0-9]+)*$" + if not re.match(pattern, self._val): + raise ValueError(f"Invalid file extension: {self._val}") - if self._file_extension is not "": - pattern = r"^[a-zA-Z0-9]+(\.[a-zA-Z0-9]+)*$" - if not re.match(pattern, self._file_extension): - raise ValueError(f"Invalid file extension: {self._file_extension}") - @property - def file_extension(self) -> str: - return self._file_extension + def val(self) -> str: + return self._val - @property - def enable_legacy_filename(self) -> bool: - return self._enable_legacy_filename - -def get_file_download_config(t: Type) -> Optional[FileDownloadConfig]: +def get_file_extension(t: Type) -> Optional[str]: if is_annotated(t): - for arg in get_args(t): - if isinstance(arg, FileDownloadConfig): - return arg + for annotation in get_args(t)[1:]: + if isinstance(annotation, FileExtension): + return annotation.val return None diff --git a/flytekit/models/core/types.py b/flytekit/models/core/types.py index e01068f95e..b9619ddf79 100644 --- a/flytekit/models/core/types.py +++ b/flytekit/models/core/types.py @@ -38,19 +38,16 @@ class BlobDimensionality(object): SINGLE = _types_pb2.BlobType.SINGLE MULTIPART = _types_pb2.BlobType.MULTIPART - def __init__(self, format, dimensionality, file_extension="", enable_legacy_filename=False): + def __init__(self, format, dimensionality, file_extension=""): """ :param Text format: A string describing the format of the underlying blob data. :param int dimensionality: An integer from BlobType.BlobDimensionality enum :param Text file_extension: The file extension (e.g. "csv", "parquet") to use during copilot download, e.g. "csv", "parquet". Empty by default. - :param bool enable_legacy_filename: When True and file_extension is set, the copilot - download phase writes the blob to both the extended path and the base path. """ self._format = format self._dimensionality = dimensionality self._file_extension = file_extension - self._enable_legacy_filename = enable_legacy_filename @property def format(self): @@ -77,15 +74,6 @@ def file_extension(self): """ return self._file_extension - @property - def enable_legacy_filename(self): - """ - When True and file_extension is set, the copilot download writes the blob to - both the full path (with extension) and the old path (without extension). - :rtype: bool - """ - return self._enable_legacy_filename - def to_flyte_idl(self): """ :rtype: flyteidl.core.types_pb2.BlobType @@ -94,7 +82,6 @@ def to_flyte_idl(self): format=self.format, dimensionality=self.dimensionality, file_extension=self._file_extension, - enable_legacy_filename=self._enable_legacy_filename, ) @classmethod @@ -107,5 +94,4 @@ def from_flyte_idl(cls, proto): format=proto.format, dimensionality=proto.dimensionality, file_extension=proto.file_extension, - enable_legacy_filename=proto.enable_legacy_filename, ) diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index bdf772b9fd..cc115b8209 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -24,7 +24,7 @@ AsyncTypeTransformer, TypeEngine, TypeTransformerFailedError, - get_file_download_config, + get_file_extension, get_underlying_type, ) from flytekit.exceptions.user import FlyteAssertion @@ -482,22 +482,13 @@ def get_format(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: def get_file_extension(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: if t is os.PathLike: return "" - file_download_config = get_file_download_config(t) - if file_download_config is None: + file_extension = get_file_extension(t) + if file_extension is None: return "" - return file_download_config.file_extension or "" + return file_extension - @staticmethod - def get_enable_legacy_filename(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: - if t is os.PathLike: - return False - file_download_config = get_file_download_config(t) - if file_download_config is None: - return False - return file_download_config.enable_legacy_filename or False - - def _blob_type(self, format: str, file_extension: str = "", enable_legacy_filename: bool = False) -> BlobType: - return BlobType(format=format, dimensionality=BlobType.BlobDimensionality.SINGLE, file_extension=file_extension, enable_legacy_filename=enable_legacy_filename) + def _blob_type(self, format: str, file_extension: str = "") -> BlobType: + return BlobType(format=format, dimensionality=BlobType.BlobDimensionality.SINGLE, file_extension=file_extension) def assert_type( self, t: typing.Union[typing.Type[FlyteFile], os.PathLike], v: typing.Union[FlyteFile, os.PathLike, str] @@ -513,7 +504,6 @@ def get_literal_type(self, t: typing.Union[typing.Type[FlyteFile], os.PathLike]) return LiteralType(blob=self._blob_type( format=FlyteFilePathTransformer.get_format(t), file_extension=FlyteFilePathTransformer.get_file_extension(t), - enable_legacy_filename=FlyteFilePathTransformer.get_enable_legacy_filename(t), )) def get_mime_type_from_extension(self, extension: str) -> typing.Union[str, typing.Sequence[str]]: @@ -591,7 +581,6 @@ async def async_to_literal( meta = BlobMetadata(type=self._blob_type( format=FlyteFilePathTransformer.get_format(python_type), file_extension=FlyteFilePathTransformer.get_file_extension(python_type), - enable_legacy_filename=FlyteFilePathTransformer.get_enable_legacy_filename(python_type), )) if isinstance(python_val, FlyteFile): diff --git a/pyproject.toml b/pyproject.toml index 7b02665795..172270a9ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "diskcache>=5.2.1", "docker>=4.0.0", "docstring-parser>=0.9.0", - "flyteidl @ git+https://github.com/dominodatalab/flyteidl.git@af517f6", + "flyteidl @ git+https://github.com/ddl-rliu/flyte.git@1ba7c1545198a2820348323e64c23a41a19e7a7d#subdirectory=flyteidl", "fsspec>=2023.3.0", # Bug in 2025.5.0, 2025.5.0post1 https://github.com/fsspec/gcsfs/issues/687 # Bug in 2024.2.0 https://github.com/fsspec/gcsfs/pull/643 diff --git a/tests/flytekit/unit/core/test_flyte_file.py b/tests/flytekit/unit/core/test_flyte_file.py index 91158fb229..29473645ca 100644 --- a/tests/flytekit/unit/core/test_flyte_file.py +++ b/tests/flytekit/unit/core/test_flyte_file.py @@ -17,7 +17,7 @@ from flytekit.core.hash import HashMethod from flytekit.core.launch_plan import LaunchPlan from flytekit.core.task import task -from flytekit.core.type_engine import FileDownloadConfig, TypeEngine +from flytekit.core.type_engine import FileExtension, TypeEngine from flytekit.core.workflow import workflow from flytekit.models.core.types import BlobType from flytekit.models.literals import LiteralMap, Blob, BlobMetadata @@ -764,21 +764,19 @@ def test_headers(): assert len(FlyteFilePathTransformer.get_additional_headers(".gz")) == 1 -def test_transform_flytefile_with_file_download_config(): - csv_file_no_config = FlyteFile["csv"] - lt = FlyteFilePathTransformer().get_literal_type(csv_file_no_config) +def test_transform_flytefile_with_file_extension(): + csv_file_no_file_extension = FlyteFile["csv"] + lt = FlyteFilePathTransformer().get_literal_type(csv_file_no_file_extension) assert lt.blob.file_extension == "" - assert lt.blob.enable_legacy_filename == False - legacy_file = Annotated[FlyteFile["csv"], FileDownloadConfig(file_extension="csv", enable_legacy_filename=True)] - lt = FlyteFilePathTransformer().get_literal_type(legacy_file) + csv_file_with_file_extension = Annotated[FlyteFile["csv"], FileExtension("csv")] + lt = FlyteFilePathTransformer().get_literal_type(csv_file_with_file_extension) assert lt.blob.file_extension == "csv" - assert lt.blob.enable_legacy_filename == True -def test_file_download_config_valid_compound_extension(): - config = FileDownloadConfig(file_extension="tar.gz") - assert config.file_extension == "tar.gz" +def test_file_extension_valid_compound_extension(): + extension = FileExtension("tar.gz") + assert extension.val == "tar.gz" @pytest.mark.parametrize("bad_ext", [ @@ -787,9 +785,9 @@ def test_file_download_config_valid_compound_extension(): "../../escape", "csv!", ]) -def test_file_download_config_rejects_invalid_extensions(bad_ext): +def test_file_extension_rejects_invalid_extensions(bad_ext): with pytest.raises(ValueError, match="Invalid file extension"): - FileDownloadConfig(file_extension=bad_ext) + FileExtension(bad_ext) def test_new_remote_file(): diff --git a/tests/flytekit/unit/models/core/test_types.py b/tests/flytekit/unit/models/core/test_types.py index bf4124eb67..c1b96f595e 100644 --- a/tests/flytekit/unit/models/core/test_types.py +++ b/tests/flytekit/unit/models/core/test_types.py @@ -16,32 +16,27 @@ def test_blob_type(): assert o.format == "csv" assert o.dimensionality == _types.BlobType.BlobDimensionality.SINGLE assert o.file_extension == "" - assert o.enable_legacy_filename == False o2 = _types.BlobType.from_flyte_idl(o.to_flyte_idl()) assert o == o2 assert o2.format == "csv" assert o2.dimensionality == _types.BlobType.BlobDimensionality.SINGLE assert o2.file_extension == "" - assert o2.enable_legacy_filename == False o = _types.BlobType( format="csv", dimensionality=_types.BlobType.BlobDimensionality.SINGLE, file_extension="csv", - enable_legacy_filename=True, ) assert o.format == "csv" assert o.dimensionality == _types.BlobType.BlobDimensionality.SINGLE assert o.file_extension == "csv" - assert o.enable_legacy_filename == True o2 = _types.BlobType.from_flyte_idl(o.to_flyte_idl()) assert o == o2 assert o2.format == "csv" assert o2.dimensionality == _types.BlobType.BlobDimensionality.SINGLE assert o2.file_extension == "csv" - assert o2.enable_legacy_filename == True def test_enum_type(): From d9b5c7bf60aadd34cbf6e4381f489f01563413dd Mon Sep 17 00:00:00 2001 From: ddl-rliu Date: Thu, 9 Apr 2026 16:46:04 -0700 Subject: [PATCH 4/5] Fix lint Signed-off-by: ddl-rliu --- flytekit/core/type_engine.py | 6 +++--- flytekit/types/file/file.py | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index f692b2ced1..94a60e8820 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -114,9 +114,9 @@ class FileExtension: # ContainerTask def t1(file: Annotated[FlyteFile, FileExtension("csv")]): ... # copilot downloads the file to e.g. /inputs/file.csv - + versus... - + def t1(file: FlyteFile["csv"]): ... # copilot downloads the file to e.g. /inputs/file ``` @@ -134,7 +134,7 @@ def __init__(self, val: str = ""): @property def val(self) -> str: return self._val - + def get_file_extension(t: Type) -> Optional[str]: if is_annotated(t): diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index cc115b8209..e07a36df5a 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -501,10 +501,12 @@ def assert_type( ) def get_literal_type(self, t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> LiteralType: - return LiteralType(blob=self._blob_type( - format=FlyteFilePathTransformer.get_format(t), - file_extension=FlyteFilePathTransformer.get_file_extension(t), - )) + return LiteralType( + blob=self._blob_type( + format=FlyteFilePathTransformer.get_format(t), + file_extension=FlyteFilePathTransformer.get_file_extension(t), + ) + ) def get_mime_type_from_extension(self, extension: str) -> typing.Union[str, typing.Sequence[str]]: extension_to_mime_type = { @@ -578,10 +580,12 @@ async def async_to_literal( raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike") # information used by all cases - meta = BlobMetadata(type=self._blob_type( - format=FlyteFilePathTransformer.get_format(python_type), - file_extension=FlyteFilePathTransformer.get_file_extension(python_type), - )) + meta = BlobMetadata( + type=self._blob_type( + format=FlyteFilePathTransformer.get_format(python_type), + file_extension=FlyteFilePathTransformer.get_file_extension(python_type), + ) + ) if isinstance(python_val, FlyteFile): # Cast the source path to str type to avoid error raised when the source path is used as the blob uri, From 3edf6e42c93ef15cbe5444fd452b86f560827450 Mon Sep 17 00:00:00 2001 From: ddl-rliu Date: Thu, 9 Apr 2026 16:50:47 -0700 Subject: [PATCH 5/5] Bump commit Signed-off-by: ddl-rliu --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 172270a9ac..0c356c7638 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "diskcache>=5.2.1", "docker>=4.0.0", "docstring-parser>=0.9.0", - "flyteidl @ git+https://github.com/ddl-rliu/flyte.git@1ba7c1545198a2820348323e64c23a41a19e7a7d#subdirectory=flyteidl", + "flyteidl @ git+https://github.com/ddl-rliu/flyte.git@93ff903e63de6384d41db4c9da8df155612d16db#subdirectory=flyteidl", "fsspec>=2023.3.0", # Bug in 2025.5.0, 2025.5.0post1 https://github.com/fsspec/gcsfs/issues/687 # Bug in 2024.2.0 https://github.com/fsspec/gcsfs/pull/643