diff --git a/src/torchcodec/_core/Metadata.h b/src/torchcodec/_core/Metadata.h index dcbf7f89..f9ca85e6 100644 --- a/src/torchcodec/_core/Metadata.h +++ b/src/torchcodec/_core/Metadata.h @@ -13,6 +13,7 @@ extern "C" { #include #include +#include } namespace facebook::torchcodec { @@ -45,6 +46,7 @@ struct StreamMetadata { // Video-only fields derived from the AVCodecContext. std::optional width; std::optional height; + std::optional sampleAspectRatio; // Audio-only fields std::optional sampleRate; diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index a66281cd..1966e6f6 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -459,6 +459,8 @@ void SingleStreamDecoder::addVideoStream( streamMetadata.width = streamInfo.codecContext->width; streamMetadata.height = streamInfo.codecContext->height; + streamMetadata.sampleAspectRatio = + streamInfo.codecContext->sample_aspect_ratio; } void SingleStreamDecoder::addAudioStream( diff --git a/src/torchcodec/_core/_metadata.py b/src/torchcodec/_core/_metadata.py index c15e86e7..9be7db98 100644 --- a/src/torchcodec/_core/_metadata.py +++ b/src/torchcodec/_core/_metadata.py @@ -8,6 +8,7 @@ import json import pathlib from dataclasses import dataclass +from fractions import Fraction from typing import List, Optional, Union import torch @@ -80,6 +81,11 @@ class VideoStreamMetadata(StreamMetadata): average_fps_from_header: Optional[float] """Averate fps of the stream, obtained from the header (float or None). We recommend using the ``average_fps`` attribute instead.""" + pixel_aspect_ratio: Optional[Fraction] + """Pixel Aspect Ratio (PAR), also known as Sample Aspect Ratio + (SAR --- not to be confused with Storage Aspect Ratio, also SAR), + is the ratio between the width and height of each pixel + (``fractions.Fraction`` or None).""" @property def duration_seconds(self) -> Optional[float]: @@ -211,6 +217,16 @@ def best_audio_stream(self) -> AudioStreamMetadata: return metadata +def _get_optional_par_fraction(stream_dict): + try: + return Fraction( + stream_dict["sampleAspectRatioNum"], + stream_dict["sampleAspectRatioDen"], + ) + except KeyError: + return None + + # TODO-AUDIO: This is user-facing. Should this just be `get_metadata`, without # the "container" name in it? Same below. def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata: @@ -247,6 +263,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata: num_frames_from_header=stream_dict.get("numFramesFromHeader"), num_frames_from_content=stream_dict.get("numFramesFromContent"), average_fps_from_header=stream_dict.get("averageFpsFromHeader"), + pixel_aspect_ratio=_get_optional_par_fraction(stream_dict), **common_meta, ) ) diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 03718698..d368ddf3 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -601,6 +601,12 @@ std::string get_stream_json_metadata( if (streamMetadata.height.has_value()) { map["height"] = std::to_string(*streamMetadata.height); } + if (streamMetadata.sampleAspectRatio.has_value()) { + map["sampleAspectRatioNum"] = + std::to_string((*streamMetadata.sampleAspectRatio).num); + map["sampleAspectRatioDen"] = + std::to_string((*streamMetadata.sampleAspectRatio).den); + } if (streamMetadata.averageFpsFromHeader.has_value()) { map["averageFpsFromHeader"] = std::to_string(*streamMetadata.averageFpsFromHeader); diff --git a/test/test_metadata.py b/test/test_metadata.py index 7ed9508f..cee7cf02 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import functools +from fractions import Fraction import pytest @@ -81,6 +82,7 @@ def test_get_metadata(metadata_getter): assert best_video_stream_metadata.begin_stream_seconds_from_header == 0 assert best_video_stream_metadata.bit_rate == 128783 assert best_video_stream_metadata.average_fps == pytest.approx(29.97, abs=0.001) + assert best_video_stream_metadata.pixel_aspect_ratio is None assert best_video_stream_metadata.codec == "h264" assert best_video_stream_metadata.num_frames_from_content == ( 390 if with_scan else None @@ -137,6 +139,7 @@ def test_num_frames_fallback( width=123, height=321, average_fps_from_header=30, + pixel_aspect_ratio=Fraction(1, 1), stream_index=0, ) @@ -161,6 +164,7 @@ def test_repr(): num_frames_from_header: 390 num_frames_from_content: 390 average_fps_from_header: 29.97003 + pixel_aspect_ratio: 1 duration_seconds: 13.013 begin_stream_seconds: 0.0 end_stream_seconds: 13.013