diff --git a/.gitignore b/.gitignore
index 8eb1ae4..8936beb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,8 @@ __pycache__/
# C extensions
*.so
+!lib/*.so
+!lib/*.so.*
# Distribution / packaging
.Python
@@ -15,6 +17,7 @@ downloads/
eggs/
.eggs/
lib/
+!lib/
lib64/
parts/
sdist/
diff --git a/ffmpeg-new b/ffmpeg-new
new file mode 100755
index 0000000..3dc330e
Binary files /dev/null and b/ffmpeg-new differ
diff --git a/pyproject.toml b/pyproject.toml
index a92afe2..8595876 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,9 @@ dependencies = [
"tqdm",
"pillow",
"av",
+ "charset-normalizer",
+ "requests",
+ "urllib3",
]
[dependency-groups]
diff --git a/submissions/av1_roi_lanczos_unsharp/compress.sh b/submissions/av1_roi_lanczos_unsharp/compress.sh
new file mode 100644
index 0000000..bf246df
--- /dev/null
+++ b/submissions/av1_roi_lanczos_unsharp/compress.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PD="$(cd "${HERE}/../.." && pwd)"
+TMP_DIR="${PD}/tmp/av1_roi_lanczos_unsharp"
+
+IN_DIR="${PD}/videos"
+VIDEO_NAMES_FILE="${PD}/public_test_video_names.txt"
+ARCHIVE_DIR="${HERE}/archive"
+JOBS="1"
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --in-dir|--in_dir)
+ IN_DIR="${2%/}"; shift 2 ;;
+ --jobs)
+ JOBS="$2"; shift 2 ;;
+ --video-names-file|--video_names_file)
+ VIDEO_NAMES_FILE="$2"; shift 2 ;;
+ *)
+ echo "Unknown arg: $1" >&2
+ echo "Usage: $0 [--in-dir
] [--jobs ] [--video-names-file ]" >&2
+ exit 2 ;;
+ esac
+done
+
+rm -rf "$ARCHIVE_DIR"
+mkdir -p "$ARCHIVE_DIR"
+mkdir -p "$TMP_DIR"
+
+export IN_DIR ARCHIVE_DIR PD
+
+head -n "$(wc -l < "$VIDEO_NAMES_FILE")" "$VIDEO_NAMES_FILE" | xargs -P"$JOBS" -I{} bash -lc '
+ rel="$1"
+ [[ -z "$rel" ]] && exit 0
+
+ IN="${IN_DIR}/${rel}"
+ BASE="${rel%.*}"
+ OUT="${ARCHIVE_DIR}/${BASE}.mkv"
+ PRE_IN="'"${TMP_DIR}"'/${BASE}.pre.mkv"
+
+ echo "→ ${IN} → ${OUT}"
+
+ # Step 1: ROI preprocess — denoise outside driving corridor
+ rm -f "$PRE_IN"
+ python "'"${HERE}"'/preprocess.py" \
+ --input "$IN" \
+ --output "$PRE_IN" \
+ --outside-luma-denoise 2.5 \
+ --outside-chroma-mode medium \
+ --feather-radius 24 \
+ --outside-blend 0.50
+
+ # Step 2: Downscale + AV1 encode
+ FFMPEG="'"${HERE}"'/ffmpeg-new"
+ [ ! -x "$FFMPEG" ] && FFMPEG="ffmpeg"
+ export LD_LIBRARY_PATH="'"${HERE}"'/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+ "$FFMPEG" -nostdin -y -hide_banner -loglevel warning \
+ -r 20 -fflags +genpts -i "$PRE_IN" \
+ -vf "scale=trunc(iw*0.45/2)*2:trunc(ih*0.45/2)*2:flags=lanczos" \
+ -pix_fmt yuv420p -c:v libsvtav1 -preset 0 -crf 33 \
+ -svtav1-params "film-grain=22:keyint=180:scd=0" \
+ -r 20 "$OUT"
+
+ rm -f "$PRE_IN"
+' _ {}
+
+# zip archive
+cd "$ARCHIVE_DIR"
+if command -v zip &>/dev/null; then
+ zip -r "${HERE}/archive.zip" .
+else
+ python3 -c "
+import zipfile, os
+with zipfile.ZipFile('${HERE}/archive.zip', 'w', zipfile.ZIP_STORED) as zf:
+ for f in os.listdir('.'):
+ zf.write(f)
+"
+fi
+echo "Compressed to ${HERE}/archive.zip"
diff --git a/submissions/av1_roi_lanczos_unsharp/ffmpeg-new b/submissions/av1_roi_lanczos_unsharp/ffmpeg-new
new file mode 100644
index 0000000..3dc330e
Binary files /dev/null and b/submissions/av1_roi_lanczos_unsharp/ffmpeg-new differ
diff --git a/submissions/av1_roi_lanczos_unsharp/inflate.py b/submissions/av1_roi_lanczos_unsharp/inflate.py
new file mode 100644
index 0000000..813eac4
--- /dev/null
+++ b/submissions/av1_roi_lanczos_unsharp/inflate.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+import av, torch, numpy as np
+import torch.nn.functional as F
+from PIL import Image
+from frame_utils import camera_size, yuv420_to_rgb
+
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+_r = torch.tensor([1., 8., 28., 56., 70., 56., 28., 8., 1.])
+KERNEL = (torch.outer(_r, _r) / (_r.sum()**2)).to(DEVICE).expand(3, 1, 9, 9)
+STRENGTH = 0.40
+
+
+def decode_and_resize_to_file(video_path: str, dst: str):
+ target_w, target_h = camera_size
+ fmt = 'hevc' if video_path.endswith('.hevc') else None
+ container = av.open(video_path, format=fmt)
+ stream = container.streams.video[0]
+ n = 0
+ with open(dst, 'wb') as f:
+ for frame in container.decode(stream):
+ t = yuv420_to_rgb(frame) # (H, W, 3)
+ H, W, _ = t.shape
+ if H != target_h or W != target_w:
+ pil = Image.fromarray(t.numpy())
+ pil = pil.resize((target_w, target_h), Image.LANCZOS)
+ x = torch.from_numpy(np.array(pil)).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE)
+ blur = F.conv2d(F.pad(x, (4, 4, 4, 4), mode='reflect'), KERNEL, padding=0, groups=3)
+ x = x + STRENGTH * (x - blur)
+ t = x.clamp(0, 255).squeeze(0).permute(1, 2, 0).round().cpu().to(torch.uint8)
+ f.write(t.contiguous().numpy().tobytes())
+ n += 1
+ container.close()
+ return n
+
+
+if __name__ == "__main__":
+ import sys
+ src, dst = sys.argv[1], sys.argv[2]
+ n = decode_and_resize_to_file(src, dst)
+ print(f"saved {n} frames")
diff --git a/submissions/av1_roi_lanczos_unsharp/inflate.sh b/submissions/av1_roi_lanczos_unsharp/inflate.sh
new file mode 100644
index 0000000..a54b563
--- /dev/null
+++ b/submissions/av1_roi_lanczos_unsharp/inflate.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Must produce a raw video file at `/.raw`.
+# A `.raw` file is a flat binary dump of uint8 RGB frames with shape `(N, H, W, 3)`
+# where N is the number of frames, H and W match the original video dimensions, no header.
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT="$(cd "$HERE/../.." && pwd)"
+SUB_NAME="$(basename "$HERE")"
+
+DATA_DIR="$1"
+OUTPUT_DIR="$2"
+FILE_LIST="$3"
+
+mkdir -p "$OUTPUT_DIR"
+
+while IFS= read -r line; do
+ [ -z "$line" ] && continue
+ BASE="${line%.*}"
+ SRC="${DATA_DIR}/${BASE}.mkv"
+ DST="${OUTPUT_DIR}/${BASE}.raw"
+
+ [ ! -f "$SRC" ] && echo "ERROR: ${SRC} not found" >&2 && exit 1
+
+ printf "Decoding + resizing %s ... " "$line"
+ cd "$ROOT"
+ python -m "submissions.${SUB_NAME}.inflate" "$SRC" "$DST"
+done < "$FILE_LIST"
diff --git a/submissions/av1_roi_lanczos_unsharp/lib/libSvtAv1Enc.so.2 b/submissions/av1_roi_lanczos_unsharp/lib/libSvtAv1Enc.so.2
new file mode 120000
index 0000000..6b76c45
--- /dev/null
+++ b/submissions/av1_roi_lanczos_unsharp/lib/libSvtAv1Enc.so.2
@@ -0,0 +1 @@
+libSvtAv1Enc.so.2.3.0
\ No newline at end of file
diff --git a/submissions/av1_roi_lanczos_unsharp/lib/libSvtAv1Enc.so.2.3.0 b/submissions/av1_roi_lanczos_unsharp/lib/libSvtAv1Enc.so.2.3.0
new file mode 100644
index 0000000..263b8df
Binary files /dev/null and b/submissions/av1_roi_lanczos_unsharp/lib/libSvtAv1Enc.so.2.3.0 differ
diff --git a/submissions/av1_roi_lanczos_unsharp/preprocess.py b/submissions/av1_roi_lanczos_unsharp/preprocess.py
new file mode 100644
index 0000000..b4aa751
--- /dev/null
+++ b/submissions/av1_roi_lanczos_unsharp/preprocess.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+from pathlib import Path
+
+import av
+import torch
+import torch.nn.functional as F
+from PIL import Image, ImageDraw, ImageFilter
+
+ROOT = Path(__file__).resolve().parents[2]
+if str(ROOT) not in sys.path:
+ sys.path.insert(0, str(ROOT))
+
+from frame_utils import yuv420_to_rgb
+
+
+def collapse_chroma(x: torch.Tensor, mode: str) -> torch.Tensor:
+ if mode == "normal":
+ return x
+ if mode == "soft":
+ k = 1
+ elif mode == "medium":
+ k = 2
+ elif mode == "strong":
+ k = 4
+ else:
+ raise ValueError(f"unknown chroma mode: {mode}")
+ uv = x[:, 1:3]
+ uv = F.avg_pool2d(uv, kernel_size=k * 2 + 1, stride=1, padding=k)
+ x[:, 1:3] = uv
+ return x
+
+
+def apply_luma_denoise(x: torch.Tensor, strength: float) -> torch.Tensor:
+ if strength <= 0:
+ return x
+ kernel_size = 3 if strength <= 2.0 else 5
+ sigma = max(0.1, strength * 0.35)
+ coords = torch.arange(kernel_size, device=x.device) - kernel_size // 2
+ g = torch.exp(-(coords ** 2) / (2 * sigma * sigma))
+ kernel_1d = (g / g.sum()).float()
+ kernel_2d = torch.outer(kernel_1d, kernel_1d).view(1, 1, kernel_size, kernel_size)
+ y = x[:, 0:1]
+ y_blur = F.conv2d(y, kernel_2d, padding=kernel_size // 2)
+ blend = min(0.9, strength / 3.0)
+ x[:, 0:1] = (1 - blend) * y + blend * y_blur
+ return x
+
+
+def rgb_to_yuv(rgb: torch.Tensor) -> torch.Tensor:
+ r = rgb[:, 0:1]
+ g = rgb[:, 1:2]
+ b = rgb[:, 2:3]
+ y = 0.299 * r + 0.587 * g + 0.114 * b
+ u = (b - y) / 1.772 + 128.0
+ v = (r - y) / 1.402 + 128.0
+ return torch.cat([y, u, v], dim=1)
+
+
+def yuv_to_rgb(yuv: torch.Tensor) -> torch.Tensor:
+ y = yuv[:, 0:1]
+ u = yuv[:, 1:2] - 128.0
+ v = yuv[:, 2:3] - 128.0
+ r = y + 1.402 * v
+ g = y - 0.344136 * u - 0.714136 * v
+ b = y + 1.772 * u
+ return torch.cat([r, g, b], dim=1)
+
+
+def segment_polygon(frame_idx: int, width: int, height: int) -> list[tuple[float, float]]:
+ segments = [
+ (0, 299, [(0.14, 0.52), (0.82, 0.48), (0.98, 1.00), (0.05, 1.00)]),
+ (300, 599, [(0.10, 0.50), (0.76, 0.47), (0.92, 1.00), (0.00, 1.00)]),
+ (600, 899, [(0.18, 0.50), (0.84, 0.47), (0.98, 1.00), (0.06, 1.00)]),
+ (900, 1199, [(0.22, 0.52), (0.90, 0.49), (1.00, 1.00), (0.10, 1.00)]),
+ ]
+ for start, end, poly in segments:
+ if start <= frame_idx <= end:
+ return [(x * width, y * height) for x, y in poly]
+ return [(0.15 * width, 0.52 * height), (0.85 * width, 0.48 * height), (width, height), (0, height)]
+
+
+def build_mask(frame_idx: int, width: int, height: int, feather_radius: int) -> torch.Tensor:
+ img = Image.new("L", (width, height), 0)
+ draw = ImageDraw.Draw(img)
+ draw.polygon(segment_polygon(frame_idx, width, height), fill=255)
+ if feather_radius > 0:
+ img = img.filter(ImageFilter.GaussianBlur(radius=feather_radius))
+ mask = torch.frombuffer(memoryview(img.tobytes()), dtype=torch.uint8).clone().view(height, width).float() / 255.0
+ return mask.unsqueeze(0).unsqueeze(0)
+
+
+def process_frame(
+ frame_rgb: torch.Tensor,
+ frame_idx: int,
+ outside_luma_denoise: float,
+ outside_chroma_mode: str,
+ feather_radius: int,
+ outside_blend: float,
+) -> torch.Tensor:
+ chw = frame_rgb.permute(2, 0, 1).float().unsqueeze(0)
+ mask = build_mask(frame_idx, chw.shape[-1], chw.shape[-2], feather_radius).to(chw.device)
+ yuv = rgb_to_yuv(chw)
+ processed = yuv.clone()
+ processed = apply_luma_denoise(processed, outside_luma_denoise)
+ processed = collapse_chroma(processed, outside_chroma_mode)
+ processed_rgb = yuv_to_rgb(processed)
+ outside_alpha = (1.0 - mask) * outside_blend
+ mixed = chw * (1.0 - outside_alpha) + processed_rgb * outside_alpha
+ return mixed.clamp(0, 255).round().to(torch.uint8).squeeze(0).permute(1, 2, 0)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Hand-authored ROI preprocessor for AV1 encode.")
+ parser.add_argument("--input", type=Path, required=True)
+ parser.add_argument("--output", type=Path, required=True)
+ parser.add_argument("--outside-luma-denoise", type=float, default=0.0)
+ parser.add_argument("--outside-chroma-mode", type=str, default="normal")
+ parser.add_argument("--feather-radius", type=int, default=32)
+ parser.add_argument("--outside-blend", type=float, default=1.0)
+ args = parser.parse_args()
+
+ in_container = av.open(str(args.input))
+ in_stream = in_container.streams.video[0]
+ width = in_stream.width
+ height = in_stream.height
+
+ out_container = av.open(str(args.output), mode="w")
+ out_stream = out_container.add_stream("ffv1", rate=20)
+ out_stream.width = width
+ out_stream.height = height
+ out_stream.pix_fmt = "yuv420p"
+
+ for frame_idx, frame in enumerate(in_container.decode(in_stream)):
+ rgb = yuv420_to_rgb(frame)
+ out_rgb = process_frame(
+ rgb,
+ frame_idx=frame_idx,
+ outside_luma_denoise=args.outside_luma_denoise,
+ outside_chroma_mode=args.outside_chroma_mode,
+ feather_radius=args.feather_radius,
+ outside_blend=args.outside_blend,
+ )
+ video_frame = av.VideoFrame.from_ndarray(out_rgb.cpu().numpy(), format="rgb24")
+ for packet in out_stream.encode(video_frame):
+ out_container.mux(packet)
+
+ for packet in out_stream.encode():
+ out_container.mux(packet)
+
+ out_container.close()
+ in_container.close()
+
+
+if __name__ == "__main__":
+ main()