Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions src/supervision/dataset/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ def as_pascal_voc(
min_image_area_percentage: float = 0.0,
max_image_area_percentage: float = 1.0,
approximation_percentage: float = 0.0,
show_progress: bool = False,
) -> None:
"""
Exports the dataset to PASCAL VOC format. This method saves the images
Expand All @@ -357,11 +358,13 @@ def as_pascal_voc(
approximation_percentage: The percentage of
polygon points to be removed from the input polygon,
in the range [0, 1). Argument is used only for segmentation datasets.
show_progress: If `True`, display a progress bar while saving images.
"""
if images_directory_path:
save_dataset_images(
dataset=self,
images_directory_path=images_directory_path,
show_progress=show_progress,
)
if annotations_directory_path:
Path(annotations_directory_path).mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -390,6 +393,7 @@ def from_pascal_voc(
images_directory_path: str,
annotations_directory_path: str,
force_masks: bool = False,
show_progress: bool = False,
) -> DetectionDataset:
"""
Creates a Dataset instance from PASCAL VOC formatted data.
Expand All @@ -400,6 +404,7 @@ def from_pascal_voc(
containing the PASCAL VOC XML annotations.
force_masks: If True, forces masks to
be loaded for all annotations, regardless of whether they are present.
show_progress: If `True`, display a progress bar while loading images.

Returns:
A DetectionDataset instance containing
Expand Down Expand Up @@ -432,6 +437,7 @@ def from_pascal_voc(
images_directory_path=images_directory_path,
annotations_directory_path=annotations_directory_path,
force_masks=force_masks,
show_progress=show_progress,
)

return DetectionDataset(
Expand All @@ -446,6 +452,7 @@ def from_yolo(
data_yaml_path: str,
force_masks: bool = False,
is_obb: bool = False,
show_progress: bool = False,
) -> DetectionDataset:
"""
Creates a Dataset instance from YOLO formatted data.
Expand All @@ -463,6 +470,7 @@ def from_yolo(
is_obb: If True, loads the annotations in OBB format.
OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`,
where pairs of [x, y] are box corners.
show_progress: If `True`, display a progress bar while loading images.

Returns:
A DetectionDataset instance
Expand Down Expand Up @@ -496,6 +504,7 @@ def from_yolo(
data_yaml_path=data_yaml_path,
force_masks=force_masks,
is_obb=is_obb,
show_progress=show_progress,
)
return DetectionDataset(
classes=classes, images=image_paths, annotations=annotations
Expand All @@ -509,6 +518,7 @@ def as_yolo(
min_image_area_percentage: float = 0.0,
max_image_area_percentage: float = 1.0,
approximation_percentage: float = 0.0,
show_progress: bool = False,
) -> None:
"""
Exports the dataset to YOLO format. This method saves the
Expand Down Expand Up @@ -537,10 +547,13 @@ def as_yolo(
be removed from the input polygon, in the range [0, 1).
This is useful for simplifying the annotations.
Argument is used only for segmentation datasets.
show_progress: If `True`, display a progress bar while saving images.
"""
if images_directory_path is not None:
save_dataset_images(
dataset=self, images_directory_path=images_directory_path
dataset=self,
images_directory_path=images_directory_path,
show_progress=show_progress,
)
if annotations_directory_path is not None:
save_yolo_annotations(
Expand All @@ -559,6 +572,7 @@ def from_coco(
images_directory_path: str,
annotations_path: str,
force_masks: bool = False,
show_progress: bool = False,
) -> DetectionDataset:
Comment on lines 577 to 584
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New show_progress code paths (both loaders and exporters) aren’t covered by tests. Please add unit/integration tests that call from_coco/from_yolo/from_pascal_voc and as_coco/as_yolo/as_pascal_voc with show_progress=True (ideally by monkeypatching tqdm.auto.tqdm to a no-op) to ensure the option doesn’t change outputs or raise exceptions.

Copilot generated this review using guidance from repository custom instructions.
"""
Creates a Dataset instance from COCO formatted data.
Expand All @@ -570,6 +584,7 @@ def from_coco(
force_masks: If True,
forces masks to be loaded for all annotations,
regardless of whether they are present.
show_progress: If `True`, display a progress bar while loading images.
Returns:
A DetectionDataset instance containing
the loaded images and annotations.
Expand Down Expand Up @@ -599,6 +614,7 @@ def from_coco(
images_directory_path=images_directory_path,
annotations_path=annotations_path,
force_masks=force_masks,
show_progress=show_progress,
)
return DetectionDataset(classes=classes, images=images, annotations=annotations)

Expand All @@ -609,6 +625,7 @@ def as_coco(
min_image_area_percentage: float = 0.0,
max_image_area_percentage: float = 1.0,
approximation_percentage: float = 0.0,
show_progress: bool = False,
) -> None:
"""
Exports the dataset to COCO format. This method saves the
Expand Down Expand Up @@ -645,10 +662,13 @@ def as_coco(
to be removed from the input polygon,
in the range [0, 1). This is useful for simplifying the annotations.
Argument is used only for segmentation datasets.
show_progress: If `True`, display a progress bar while saving images.
"""
if images_directory_path is not None:
save_dataset_images(
dataset=self, images_directory_path=images_directory_path
dataset=self,
images_directory_path=images_directory_path,
show_progress=show_progress,
)
if annotations_path is not None:
save_coco_annotations(
Expand Down
66 changes: 43 additions & 23 deletions src/supervision/dataset/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import numpy as np
import numpy.typing as npt
from tqdm.auto import tqdm

from supervision.dataset.utils import (
approximate_mask_with_polygons,
Expand Down Expand Up @@ -254,6 +255,7 @@ def load_coco_annotations(
annotations_path: str,
force_masks: bool = False,
use_iscrowd: bool = True,
show_progress: bool = False,
) -> tuple[list[str], list[str], dict[str, Detections]]:
"""
Load COCO annotations and convert them to `Detections`.
Expand All @@ -267,9 +269,21 @@ def load_coco_annotations(
annotations_path: Path to COCO JSON annotations.
force_masks: If `True`, always attempt to load masks.
use_iscrowd: If `True`, include `iscrowd` and `area` in detection data.
show_progress: If `True`, display a progress bar while loading images.

Returns:
A tuple of `(classes, image_paths, annotations)`.

Examples:
```python
import supervision as sv

ds = sv.DetectionDataset.from_coco(
images_directory_path="images/train",
annotations_path="images/train/_annotations.coco.json",
show_progress=True,
)
```
"""
coco_data = read_json_file(file_path=annotations_path)
classes = coco_categories_to_classes(coco_categories=coco_data["categories"])
Expand All @@ -286,32 +300,38 @@ def load_coco_annotations(
images = []
annotations = {}

for coco_image in coco_images:
image_name, image_width, image_height = (
coco_image["file_name"],
coco_image["width"],
coco_image["height"],
)
image_annotations = coco_annotations_groups.get(coco_image["id"], [])
image_path = os.path.join(images_directory_path, image_name)
with tqdm(
total=len(coco_images),
desc="Loading COCO annotations",
disable=not show_progress,
) as progress_bar:
for coco_image in coco_images:
image_name, image_width, image_height = (
coco_image["file_name"],
coco_image["width"],
coco_image["height"],
)
image_annotations = coco_annotations_groups.get(coco_image["id"], [])
image_path = os.path.join(images_directory_path, image_name)

with_masks = force_masks or any(
_with_seg_mask(annotation) for annotation in image_annotations
)
annotation = coco_annotations_to_detections(
image_annotations=image_annotations,
resolution_wh=(image_width, image_height),
with_masks=with_masks,
use_iscrowd=use_iscrowd,
)
with_masks = force_masks or any(
_with_seg_mask(annotation) for annotation in image_annotations
)
annotation = coco_annotations_to_detections(
image_annotations=image_annotations,
resolution_wh=(image_width, image_height),
with_masks=with_masks,
use_iscrowd=use_iscrowd,
)

annotation = map_detections_class_id(
source_to_target_mapping=class_index_mapping,
detections=annotation,
)
annotation = map_detections_class_id(
source_to_target_mapping=class_index_mapping,
detections=annotation,
)

images.append(image_path)
annotations[image_path] = annotation
images.append(image_path)
annotations[image_path] = annotation
progress_bar.update(1)

return classes, images, annotations

Expand Down
59 changes: 41 additions & 18 deletions src/supervision/dataset/formats/pascal_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy.typing as npt
from defusedxml.ElementTree import parse, tostring
from defusedxml.minidom import parseString
from tqdm.auto import tqdm

from supervision.dataset.utils import approximate_mask_with_polygons
from supervision.detection.core import Detections
Expand Down Expand Up @@ -149,6 +150,7 @@ def load_pascal_voc_annotations(
images_directory_path: str,
annotations_directory_path: str,
force_masks: bool = False,
show_progress: bool = False,
) -> tuple[list[str], list[str], dict[str, Detections]]:
"""
Loads PASCAL VOC XML annotations and returns the image name,
Expand All @@ -160,11 +162,23 @@ def load_pascal_voc_annotations(
PASCAL VOC annotation files.
force_masks: If True, forces masks to be loaded for all
annotations, regardless of whether they are present.
show_progress: If `True`, display a progress bar while loading images.

Returns:
A tuple with a list
of class names, a list of paths to images, and a dictionary with image
paths as keys and corresponding Detections instances as values.

Examples:
```python
import supervision as sv

ds = sv.DetectionDataset.from_pascal_voc(
images_directory_path="images/train",
annotations_directory_path="images/train/labels",
show_progress=True,
)
```
"""

image_paths = [
Expand All @@ -177,24 +191,33 @@ def load_pascal_voc_annotations(
classes: list[str] = []
annotations = {}

for image_path in image_paths:
image_stem = Path(image_path).stem
annotation_path = os.path.join(annotations_directory_path, f"{image_stem}.xml")
if not os.path.exists(annotation_path):
annotations[image_path] = Detections.empty()
continue

tree = parse(annotation_path)
root = tree.getroot()

image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Could not read image from path: {image_path}")
resolution_wh = (image.shape[1], image.shape[0])
annotation, classes = detections_from_xml_obj(
root, classes, resolution_wh, force_masks
)
annotations[image_path] = annotation
with tqdm(
total=len(image_paths),
desc="Loading Pascal VOC annotations",
disable=not show_progress,
) as progress_bar:
for image_path in image_paths:
image_stem = Path(image_path).stem
annotation_path = os.path.join(
annotations_directory_path, f"{image_stem}.xml"
)
if not os.path.exists(annotation_path):
annotations[image_path] = Detections.empty()
progress_bar.update(1)
continue

tree = parse(annotation_path)
root = tree.getroot()

image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Could not read image from path: {image_path}")
resolution_wh = (image.shape[1], image.shape[0])
annotation, classes = detections_from_xml_obj(
root, classes, resolution_wh, force_masks
)
annotations[image_path] = annotation
progress_bar.update(1)

return classes, image_paths, annotations

Expand Down
Loading
Loading