Skip to content

VideoStreamMetadata.sample_aspect_ratio: new metadata field (#733) #737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/torchcodec/_core/Metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/avutil.h>
#include <libavutil/rational.h>
}

namespace facebook::torchcodec {
Expand Down Expand Up @@ -45,6 +46,7 @@ struct StreamMetadata {
// Video-only fields derived from the AVCodecContext.
std::optional<int64_t> width;
std::optional<int64_t> height;
std::optional<AVRational> sampleAspectRatio;

// Audio-only fields
std::optional<int64_t> sampleRate;
Expand Down
2 changes: 2 additions & 0 deletions src/torchcodec/_core/SingleStreamDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,8 @@ void SingleStreamDecoder::addVideoStream(

streamMetadata.width = streamInfo.codecContext->width;
streamMetadata.height = streamInfo.codecContext->height;
streamMetadata.sampleAspectRatio =
streamInfo.codecContext->sample_aspect_ratio;
}

void SingleStreamDecoder::addAudioStream(
Expand Down
17 changes: 17 additions & 0 deletions src/torchcodec/_core/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
import pathlib
from dataclasses import dataclass
from fractions import Fraction
from typing import List, Optional, Union

import torch
Expand Down Expand Up @@ -80,6 +81,11 @@ class VideoStreamMetadata(StreamMetadata):
average_fps_from_header: Optional[float]
"""Averate fps of the stream, obtained from the header (float or None).
We recommend using the ``average_fps`` attribute instead."""
pixel_aspect_ratio: Optional[Fraction]
"""Pixel Aspect Ratio (PAR), also known as Sample Aspect Ratio
(SAR --- not to be confused with Storage Aspect Ratio, also SAR),
is the ratio between the width and height of each pixel
(``fractions.Fraction`` or None)."""

@property
def duration_seconds(self) -> Optional[float]:
Expand Down Expand Up @@ -211,6 +217,16 @@ def best_audio_stream(self) -> AudioStreamMetadata:
return metadata


def _get_optional_par_fraction(stream_dict):
try:
return Fraction(
stream_dict["sampleAspectRatioNum"],
stream_dict["sampleAspectRatioDen"],
)
except KeyError:
return None


# TODO-AUDIO: This is user-facing. Should this just be `get_metadata`, without
# the "container" name in it? Same below.
def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
Expand Down Expand Up @@ -247,6 +263,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
num_frames_from_header=stream_dict.get("numFramesFromHeader"),
num_frames_from_content=stream_dict.get("numFramesFromContent"),
average_fps_from_header=stream_dict.get("averageFpsFromHeader"),
pixel_aspect_ratio=_get_optional_par_fraction(stream_dict),
**common_meta,
)
)
Expand Down
6 changes: 6 additions & 0 deletions src/torchcodec/_core/custom_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,12 @@ std::string get_stream_json_metadata(
if (streamMetadata.height.has_value()) {
map["height"] = std::to_string(*streamMetadata.height);
}
if (streamMetadata.sampleAspectRatio.has_value()) {
map["sampleAspectRatioNum"] =
std::to_string((*streamMetadata.sampleAspectRatio).num);
map["sampleAspectRatioDen"] =
std::to_string((*streamMetadata.sampleAspectRatio).den);
}
Copy link
Member

@NicolasHug NicolasHug Jun 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what you're doing here is OK. I don't see a much cleaner way around the json intermediate layer. @scotts any concern?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. We're turning what is fundamentally two values into one object. This is a reasonable way to do that.

if (streamMetadata.averageFpsFromHeader.has_value()) {
map["averageFpsFromHeader"] =
std::to_string(*streamMetadata.averageFpsFromHeader);
Expand Down
4 changes: 4 additions & 0 deletions test/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# LICENSE file in the root directory of this source tree.

import functools
from fractions import Fraction

import pytest

Expand Down Expand Up @@ -81,6 +82,7 @@ def test_get_metadata(metadata_getter):
assert best_video_stream_metadata.begin_stream_seconds_from_header == 0
assert best_video_stream_metadata.bit_rate == 128783
assert best_video_stream_metadata.average_fps == pytest.approx(29.97, abs=0.001)
assert best_video_stream_metadata.pixel_aspect_ratio is None
assert best_video_stream_metadata.codec == "h264"
assert best_video_stream_metadata.num_frames_from_content == (
390 if with_scan else None
Expand Down Expand Up @@ -137,6 +139,7 @@ def test_num_frames_fallback(
width=123,
height=321,
average_fps_from_header=30,
pixel_aspect_ratio=Fraction(1, 1),
stream_index=0,
)

Expand All @@ -161,6 +164,7 @@ def test_repr():
num_frames_from_header: 390
num_frames_from_content: 390
average_fps_from_header: 29.97003
pixel_aspect_ratio: 1
duration_seconds: 13.013
begin_stream_seconds: 0.0
end_stream_seconds: 13.013
Expand Down
Loading