Skip to content
Closed
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ __pycache__/

# C extensions
*.so
!lib/*.so
!lib/*.so.*

# Distribution / packaging
.Python
Expand All @@ -15,6 +17,7 @@ downloads/
eggs/
.eggs/
lib/
!lib/
lib64/
parts/
sdist/
Expand Down
Empty file.
43 changes: 43 additions & 0 deletions submissions/optimized/compress.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env bash
# Best tested: 50% lanczos, CRF 34, GOP 240, QM + fg22 + Laplacian sharpening
# Preset 4: 2.18 | Preset 0: ~2.03
set -euo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PD="$(cd "${HERE}/../.." && pwd)"

IN_DIR="${PD}/videos"
VIDEO_NAMES_FILE="${PD}/public_test_video_names.txt"
ARCHIVE_DIR="${HERE}/archive"

while [[ $# -gt 0 ]]; do
case "$1" in
--in-dir|--in_dir) IN_DIR="${2%/}"; shift 2 ;;
--video-names-file|--video_names_file) VIDEO_NAMES_FILE="$2"; shift 2 ;;
*) echo "Unknown arg: $1" >&2; exit 2 ;;
esac
done

rm -rf "$ARCHIVE_DIR"
mkdir -p "$ARCHIVE_DIR"

while IFS= read -r rel; do
[[ -z "$rel" ]] && continue
IN="${IN_DIR}/${rel}"
BASE="${rel%.*}"
OUT="${ARCHIVE_DIR}/${BASE}.mkv"

echo "→ ${IN} → ${OUT}"

ffmpeg -nostdin -y -hide_banner -loglevel warning \
-r 20 -fflags +genpts -i "$IN" \
-vf "scale=trunc(iw*0.50/2)*2:trunc(ih*0.50/2)*2:flags=lanczos" \
-c:v libsvtav1 -preset 0 -crf 34 -g 240 \
-svtav1-params "film-grain=22:film-grain-denoise=1:enable-qm=1:qm-min=0" \
-pix_fmt yuv420p \
-r 20 "$OUT"
done < "$VIDEO_NAMES_FILE"

cd "$ARCHIVE_DIR"
zip -r "${HERE}/archive.zip" .
echo "Compressed to ${HERE}/archive.zip"
44 changes: 44 additions & 0 deletions submissions/optimized/inflate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python
"""
Inflate with Laplacian sharpening: restores edge detail lost in compression.
Tested: -0.01 score improvement (2.05 → 2.04 with preset 0).
"""
import av, torch, sys
import torch.nn.functional as F
from frame_utils import camera_size, yuv420_to_rgb


def sharpen(x, strength=0.20):
"""3x3 Laplacian sharpening."""
kernel = torch.tensor([[0, -1, 0], [-1, 4, -1], [0, -1, 0]],
dtype=torch.float32, device=x.device)
kernel = kernel.view(1, 1, 3, 3).expand(x.shape[1], -1, -1, -1)
detail = F.conv2d(F.pad(x, [1, 1, 1, 1], mode='reflect'), kernel, groups=x.shape[1])
return x + strength * detail


def decode_and_resize_to_file(video_path: str, dst: str):
target_w, target_h = camera_size
fmt = 'hevc' if video_path.endswith('.hevc') else None
container = av.open(video_path, format=fmt)
stream = container.streams.video[0]
n = 0
with open(dst, 'wb') as f:
for frame in container.decode(stream):
t = yuv420_to_rgb(frame)
H, W, _ = t.shape
if H != target_h or W != target_w:
x = t.permute(2, 0, 1).unsqueeze(0).float()
x = F.interpolate(x, size=(target_h, target_w), mode='bicubic', align_corners=False)
x = sharpen(x)
t = x.clamp(0, 255).squeeze(0).permute(1, 2, 0).round().to(torch.uint8)
f.write(t.contiguous().numpy().tobytes())
n += 1
container.close()
return n


if __name__ == "__main__":
src, dst = sys.argv[1], sys.argv[2]
n = decode_and_resize_to_file(src, dst)
print(f"saved {n} frames")
28 changes: 28 additions & 0 deletions submissions/optimized/inflate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env bash
# Must produce a raw video file at `<output_dir>/<base_name>.raw`.
# A `.raw` file is a flat binary dump of uint8 RGB frames with shape `(N, H, W, 3)`
# where N is the number of frames, H and W match the original video dimensions, no header.
set -euo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$HERE/../.." && pwd)"
SUB_NAME="$(basename "$HERE")"

DATA_DIR="$1"
OUTPUT_DIR="$2"
FILE_LIST="$3"

mkdir -p "$OUTPUT_DIR"

while IFS= read -r line; do
[ -z "$line" ] && continue
BASE="${line%.*}"
SRC="${DATA_DIR}/${BASE}.mkv"
DST="${OUTPUT_DIR}/${BASE}.raw"

[ ! -f "$SRC" ] && echo "ERROR: ${SRC} not found" >&2 && exit 1

printf "Decoding + resizing %s ... " "$line"
cd "$ROOT"
python -m "submissions.${SUB_NAME}.inflate" "$SRC" "$DST"
done < "$FILE_LIST"
32 changes: 32 additions & 0 deletions submissions/roi_v2/compress.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PD="$(cd "${HERE}/../.." && pwd)"
TMP_DIR="${PD}/tmp/roi_v2"
IN_DIR="${PD}/videos"
VIDEO_NAMES_FILE="${PD}/public_test_video_names.txt"
ARCHIVE_DIR="${HERE}/archive"
rm -rf "$ARCHIVE_DIR"; mkdir -p "$ARCHIVE_DIR" "$TMP_DIR"
export IN_DIR ARCHIVE_DIR PD
head -n "$(wc -l < "$VIDEO_NAMES_FILE")" "$VIDEO_NAMES_FILE" | xargs -P1 -I{} bash -lc '
rel="$1"; [[ -z "$rel" ]] && exit 0
IN="${IN_DIR}/${rel}"; BASE="${rel%.*}"
OUT="${ARCHIVE_DIR}/${BASE}.mkv"; PRE_IN="'"${TMP_DIR}"'/${BASE}.pre.mkv"
rm -f "$PRE_IN"
cd "'"${PD}"'"
.venv/bin/python -m submissions.roi_v2.preprocess \
--input "$IN" --output "$PRE_IN" \
--outside-luma-denoise 2.5 --outside-chroma-mode medium \
--feather-radius 24 --outside-blend 0.50
FFMPEG="'"${HERE}"'/ffmpeg-new"
[ ! -x "$FFMPEG" ] && FFMPEG="ffmpeg"
export LD_LIBRARY_PATH="'"${HERE}"'/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
Comment on lines +10 to +23
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The inner bash -lc command is building PRE_IN with embedded single-quote characters (PRE_IN="'"${TMP_DIR}"'/${BASE}.pre.mkv"). That makes the resulting path contain literal quotes, so rm -f, the Python preprocess step, and ffmpeg -i will likely operate on a different/nonexistent filename. Prefer exporting TMP_DIR (and other needed vars) for the subshell and set PRE_IN="${TMP_DIR}/${BASE}.pre.mkv" inside the subshell, or avoid nested quoting by using a plain while read loop.

Suggested change
export IN_DIR ARCHIVE_DIR PD
head -n "$(wc -l < "$VIDEO_NAMES_FILE")" "$VIDEO_NAMES_FILE" | xargs -P1 -I{} bash -lc '
rel="$1"; [[ -z "$rel" ]] && exit 0
IN="${IN_DIR}/${rel}"; BASE="${rel%.*}"
OUT="${ARCHIVE_DIR}/${BASE}.mkv"; PRE_IN="'"${TMP_DIR}"'/${BASE}.pre.mkv"
rm -f "$PRE_IN"
cd "'"${PD}"'"
.venv/bin/python -m submissions.roi_v2.preprocess \
--input "$IN" --output "$PRE_IN" \
--outside-luma-denoise 2.5 --outside-chroma-mode medium \
--feather-radius 24 --outside-blend 0.50
FFMPEG="'"${HERE}"'/ffmpeg-new"
[ ! -x "$FFMPEG" ] && FFMPEG="ffmpeg"
export LD_LIBRARY_PATH="'"${HERE}"'/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
export IN_DIR ARCHIVE_DIR PD TMP_DIR HERE
head -n "$(wc -l < "$VIDEO_NAMES_FILE")" "$VIDEO_NAMES_FILE" | xargs -P1 -I{} bash -lc '
rel="$1"; [[ -z "$rel" ]] && exit 0
IN="${IN_DIR}/${rel}"; BASE="${rel%.*}"
OUT="${ARCHIVE_DIR}/${BASE}.mkv"; PRE_IN="${TMP_DIR}/${BASE}.pre.mkv"
rm -f "$PRE_IN"
cd "${PD}"
.venv/bin/python -m submissions.roi_v2.preprocess \
--input "$IN" --output "$PRE_IN" \
--outside-luma-denoise 2.5 --outside-chroma-mode medium \
--feather-radius 24 --outside-blend 0.50
FFMPEG="${HERE}/ffmpeg-new"
[ ! -x "$FFMPEG" ] && FFMPEG="ffmpeg"
export LD_LIBRARY_PATH="${HERE}/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"

Copilot uses AI. Check for mistakes.
Comment on lines +10 to +23
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar quoting issue for FFMPEG and LD_LIBRARY_PATH (values include literal single quotes). That can make the -x check and the final ffmpeg invocation look for a path like '/.../ffmpeg-new' instead of the real file, and can also corrupt the library search path. Use normal double-quoting without injecting literal quotes, or export HERE for the subshell and reference it directly.

Suggested change
export IN_DIR ARCHIVE_DIR PD
head -n "$(wc -l < "$VIDEO_NAMES_FILE")" "$VIDEO_NAMES_FILE" | xargs -P1 -I{} bash -lc '
rel="$1"; [[ -z "$rel" ]] && exit 0
IN="${IN_DIR}/${rel}"; BASE="${rel%.*}"
OUT="${ARCHIVE_DIR}/${BASE}.mkv"; PRE_IN="'"${TMP_DIR}"'/${BASE}.pre.mkv"
rm -f "$PRE_IN"
cd "'"${PD}"'"
.venv/bin/python -m submissions.roi_v2.preprocess \
--input "$IN" --output "$PRE_IN" \
--outside-luma-denoise 2.5 --outside-chroma-mode medium \
--feather-radius 24 --outside-blend 0.50
FFMPEG="'"${HERE}"'/ffmpeg-new"
[ ! -x "$FFMPEG" ] && FFMPEG="ffmpeg"
export LD_LIBRARY_PATH="'"${HERE}"'/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
export IN_DIR ARCHIVE_DIR PD HERE
head -n "$(wc -l < "$VIDEO_NAMES_FILE")" "$VIDEO_NAMES_FILE" | xargs -P1 -I{} bash -lc '
rel="$1"; [[ -z "$rel" ]] && exit 0
IN="${IN_DIR}/${rel}"; BASE="${rel%.*}"
OUT="${ARCHIVE_DIR}/${BASE}.mkv"; PRE_IN="'"${TMP_DIR}"'/${BASE}.pre.mkv"
rm -f "$PRE_IN"
cd "$PD"
.venv/bin/python -m submissions.roi_v2.preprocess \
--input "$IN" --output "$PRE_IN" \
--outside-luma-denoise 2.5 --outside-chroma-mode medium \
--feather-radius 24 --outside-blend 0.50
FFMPEG="${HERE}/ffmpeg-new"
[ ! -x "$FFMPEG" ] && FFMPEG="ffmpeg"
export LD_LIBRARY_PATH="${HERE}/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"

Copilot uses AI. Check for mistakes.
"$FFMPEG" -nostdin -y -hide_banner -loglevel warning \
-r 20 -fflags +genpts -i "$PRE_IN" \
-vf "scale=trunc(iw*0.45/2)*2:trunc(ih*0.45/2)*2:flags=lanczos" \
-pix_fmt yuv420p -c:v libsvtav1 -preset 0 -crf 33 \
-svtav1-params "film-grain=22:keyint=180:scd=0" \
-r 20 "$OUT"
rm -f "$PRE_IN"
' _ {}
cd "$ARCHIVE_DIR"; zip -r "${HERE}/archive.zip" .
Binary file added submissions/roi_v2/ffmpeg-new
Binary file not shown.
41 changes: 41 additions & 0 deletions submissions/roi_v2/inflate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python
import av, torch, numpy as np
import torch.nn.functional as F
from PIL import Image
from frame_utils import camera_size, yuv420_to_rgb

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 9-tap binomial unsharp kernel (Pascal row 8 / 65536)
_r = torch.tensor([1., 8., 28., 56., 70., 56., 28., 8., 1.])
KERNEL = (torch.outer(_r, _r) / (_r.sum()**2)).to(DEVICE).expand(3, 1, 9, 9)
STRENGTH = 0.40


def decode_and_resize_to_file(video_path: str, dst: str):
target_w, target_h = camera_size
container = av.open(video_path)
stream = container.streams.video[0]
n = 0
with open(dst, 'wb') as f:
for frame in container.decode(stream):
t = yuv420_to_rgb(frame)
H, W, _ = t.shape
if H != target_h or W != target_w:
pil = Image.fromarray(t.numpy())
pil = pil.resize((target_w, target_h), Image.LANCZOS)
x = torch.from_numpy(np.array(pil)).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE)
blur = F.conv2d(F.pad(x, (4, 4, 4, 4), mode='reflect'), KERNEL, padding=0, groups=3)
x = x + STRENGTH * (x - blur)
t = x.clamp(0, 255).squeeze(0).permute(1, 2, 0).round().cpu().to(torch.uint8)
f.write(t.contiguous().numpy().tobytes())
n += 1
container.close()
return n


if __name__ == "__main__":
import sys
src, dst = sys.argv[1], sys.argv[2]
n = decode_and_resize_to_file(src, dst)
print(f"saved {n} frames")
26 changes: 26 additions & 0 deletions submissions/roi_v2/inflate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -euo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$HERE/../.." && pwd)"
SUB_NAME="$(basename "$HERE")"

DATA_DIR="$1"
OUTPUT_DIR="$2"
FILE_LIST="$3"

mkdir -p "$OUTPUT_DIR"

while IFS= read -r line; do
[ -z "$line" ] && continue
BASE="${line%.*}"
SRC="${DATA_DIR}/${BASE}.mkv"
DST="${OUTPUT_DIR}/${BASE}.raw"

[ ! -f "$SRC" ] && echo "ERROR: ${SRC} not found" >&2 && exit 1

printf "Decoding + resizing %s ... " "$line"
cd "$ROOT"
.venv/bin/python -m "submissions.${SUB_NAME}.inflate" "$SRC" "$DST"
echo "done"
done < "$FILE_LIST"
1 change: 1 addition & 0 deletions submissions/roi_v2/lib/libSvtAv1Enc.so.2
Binary file added submissions/roi_v2/lib/libSvtAv1Enc.so.2.3.0
Binary file not shown.
146 changes: 146 additions & 0 deletions submissions/roi_v2/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env python3
"""ROI-aware preprocessing: denoise outside driving corridor to save bits for encoder."""
import argparse
import sys
from pathlib import Path

import av
import torch
import torch.nn.functional as F
from PIL import Image, ImageDraw, ImageFilter

ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))

from frame_utils import yuv420_to_rgb


def collapse_chroma(x: torch.Tensor, mode: str) -> torch.Tensor:
if mode == "normal":
return x
k = {"soft": 1, "medium": 2, "strong": 4}[mode]
uv = x[:, 1:3]
uv = F.avg_pool2d(uv, kernel_size=k * 2 + 1, stride=1, padding=k)
x[:, 1:3] = uv
return x


def apply_luma_denoise(x: torch.Tensor, strength: float) -> torch.Tensor:
if strength <= 0:
return x
kernel_size = 3 if strength <= 2.0 else 5
sigma = max(0.1, strength * 0.35)
coords = torch.arange(kernel_size, device=x.device) - kernel_size // 2
g = torch.exp(-(coords ** 2) / (2 * sigma * sigma))
kernel_1d = (g / g.sum()).float()
kernel_2d = torch.outer(kernel_1d, kernel_1d).view(1, 1, kernel_size, kernel_size)
y = x[:, 0:1]
y_blur = F.conv2d(y, kernel_2d, padding=kernel_size // 2)
Comment on lines +36 to +39
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apply_luma_denoise uses F.conv2d(..., padding=kernel_size//2) which applies zero-padding at the borders. That will darken/brighten edges and create visible border artifacts (and can harm downstream metrics). Consider using F.pad(..., mode='reflect'|'replicate') before the convolution and set padding=0 for the conv.

Suggested change
kernel_1d = (g / g.sum()).float()
kernel_2d = torch.outer(kernel_1d, kernel_1d).view(1, 1, kernel_size, kernel_size)
y = x[:, 0:1]
y_blur = F.conv2d(y, kernel_2d, padding=kernel_size // 2)
kernel_1d = (g / g.sum()).to(dtype=x.dtype)
kernel_2d = torch.outer(kernel_1d, kernel_1d).view(1, 1, kernel_size, kernel_size)
y = x[:, 0:1]
pad = kernel_size // 2
y_padded = F.pad(y, (pad, pad, pad, pad), mode="replicate")
y_blur = F.conv2d(y_padded, kernel_2d, padding=0)

Copilot uses AI. Check for mistakes.
blend = min(0.9, strength / 3.0)
x[:, 0:1] = (1 - blend) * y + blend * y_blur
return x


def rgb_to_yuv(rgb: torch.Tensor) -> torch.Tensor:
r, g, b = rgb[:, 0:1], rgb[:, 1:2], rgb[:, 2:3]
y = 0.299 * r + 0.587 * g + 0.114 * b
u = (b - y) / 1.772 + 128.0
v = (r - y) / 1.402 + 128.0
return torch.cat([y, u, v], dim=1)


def yuv_to_rgb(yuv: torch.Tensor) -> torch.Tensor:
y = yuv[:, 0:1]
u, v = yuv[:, 1:2] - 128.0, yuv[:, 2:3] - 128.0
r = y + 1.402 * v
g = y - 0.344136 * u - 0.714136 * v
b = y + 1.772 * u
return torch.cat([r, g, b], dim=1)


def segment_polygon(frame_idx: int, width: int, height: int) -> list[tuple[float, float]]:
segments = [
(0, 299, [(0.14, 0.52), (0.82, 0.48), (0.98, 1.00), (0.05, 1.00)]),
(300, 599, [(0.10, 0.50), (0.76, 0.47), (0.92, 1.00), (0.00, 1.00)]),
(600, 899, [(0.18, 0.50), (0.84, 0.47), (0.98, 1.00), (0.06, 1.00)]),
(900, 1199, [(0.22, 0.52), (0.90, 0.49), (1.00, 1.00), (0.10, 1.00)]),
]
for start, end, poly in segments:
if start <= frame_idx <= end:
return [(x * width, y * height) for x, y in poly]
return [(0.15 * width, 0.52 * height), (0.85 * width, 0.48 * height), (width, height), (0, height)]


def build_mask(frame_idx: int, width: int, height: int, feather_radius: int) -> torch.Tensor:
img = Image.new("L", (width, height), 0)
draw = ImageDraw.Draw(img)
draw.polygon(segment_polygon(frame_idx, width, height), fill=255)
if feather_radius > 0:
img = img.filter(ImageFilter.GaussianBlur(radius=feather_radius))
mask = torch.frombuffer(memoryview(img.tobytes()), dtype=torch.uint8).clone().view(height, width).float() / 255.0
return mask.unsqueeze(0).unsqueeze(0)
Comment on lines +75 to +82
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

build_mask constructs and Gaussian-blurs a full-resolution PIL image for every frame. For 1200-frame videos this is a significant CPU cost during compression. Since the mask only changes across a few frame ranges, consider caching the blurred mask per segment (or precomputing per frame_idx) and reusing it instead of rebuilding it every frame.

Copilot uses AI. Check for mistakes.


def process_frame(
frame_rgb: torch.Tensor,
frame_idx: int,
outside_luma_denoise: float,
outside_chroma_mode: str,
feather_radius: int,
outside_blend: float,
) -> torch.Tensor:
chw = frame_rgb.permute(2, 0, 1).float().unsqueeze(0)
mask = build_mask(frame_idx, chw.shape[-1], chw.shape[-2], feather_radius).to(chw.device)
yuv = rgb_to_yuv(chw)
processed = yuv.clone()
processed = apply_luma_denoise(processed, outside_luma_denoise)
processed = collapse_chroma(processed, outside_chroma_mode)
processed_rgb = yuv_to_rgb(processed)
outside_alpha = (1.0 - mask) * outside_blend
mixed = chw * (1.0 - outside_alpha) + processed_rgb * outside_alpha
return mixed.clamp(0, 255).round().to(torch.uint8).squeeze(0).permute(1, 2, 0)


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--input", type=Path, required=True)
parser.add_argument("--output", type=Path, required=True)
parser.add_argument("--outside-luma-denoise", type=float, default=2.5)
parser.add_argument("--outside-chroma-mode", type=str, default="medium")
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collapse_chroma assumes mode is one of {soft, medium, strong, normal} and will raise a KeyError for any other value, but the argparse option doesn't restrict choices. Add choices=["normal","soft","medium","strong"] (and/or validate with a clear error) so bad CLI input fails with a helpful message.

Suggested change
parser.add_argument("--outside-chroma-mode", type=str, default="medium")
parser.add_argument(
"--outside-chroma-mode",
type=str,
default="medium",
choices=["normal", "soft", "medium", "strong"],
)

Copilot uses AI. Check for mistakes.
parser.add_argument("--feather-radius", type=int, default=24)
parser.add_argument("--outside-blend", type=float, default=0.60)
args = parser.parse_args()

in_container = av.open(str(args.input))
in_stream = in_container.streams.video[0]
width, height = in_stream.width, in_stream.height

out_container = av.open(str(args.output), mode="w")
out_stream = out_container.add_stream("ffv1", rate=20)
Comment on lines +118 to +120
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output stream rate is hard-coded to 20 fps (add_stream(..., rate=20)). If the input file has a different FPS/time base, this will rewrite timing and can change frame pacing. Prefer deriving the rate from the input stream (e.g., in_stream.average_rate) or explicitly copying timing metadata when writing the intermediate file.

Suggested change
out_container = av.open(str(args.output), mode="w")
out_stream = out_container.add_stream("ffv1", rate=20)
output_rate = in_stream.average_rate or in_stream.base_rate or 20
out_container = av.open(str(args.output), mode="w")
out_stream = out_container.add_stream("ffv1", rate=output_rate)

Copilot uses AI. Check for mistakes.
out_stream.width = width
out_stream.height = height
out_stream.pix_fmt = "yuv420p"

for frame_idx, frame in enumerate(in_container.decode(in_stream)):
rgb = yuv420_to_rgb(frame)
out_rgb = process_frame(
rgb, frame_idx,
outside_luma_denoise=args.outside_luma_denoise,
outside_chroma_mode=args.outside_chroma_mode,
feather_radius=args.feather_radius,
outside_blend=args.outside_blend,
)
video_frame = av.VideoFrame.from_ndarray(out_rgb.cpu().numpy(), format="rgb24")
for packet in out_stream.encode(video_frame):
out_container.mux(packet)

for packet in out_stream.encode():
out_container.mux(packet)

out_container.close()
in_container.close()


if __name__ == "__main__":
main()
Loading