Source code for sign_language_translator.models.video_embedding.video_embedding_model

"""This module provides an abstract base class for video embedding models that
transform a sequence of video frames into an embedding tensor.

Classes:
    - VideoEmbeddingModel: An abstract base class for video embedding models.
"""

from abc import ABC, abstractmethod
from typing import Iterable, Union

from numpy import uint8
from numpy.typing import NDArray
from torch import Tensor


[docs] class VideoEmbeddingModel(ABC): """ Abstract base class for video embedding models. This class defines the interface for video embedding models, which transform a sequence of video frames into an embedding tensor. Attributes: None Methods: embed(frame_sequence, **kwargs): Abstract method to embed a sequence of video frames. """
[docs] @abstractmethod def embed( self, frame_sequence: Iterable[Union[Tensor, NDArray[uint8]]], **kwargs ) -> Tensor: """Embed a sequence of video frames into an embedding tensor. Args: frame_sequence (Iterable[Union[Tensor, NDArray[uint8]]]): A sequence of video frames, where each frame can be either a Tensor or a numpy array of uint8 values of shape (W, H, C). **kwargs: Additional keyword arguments specific to the embedding model. Returns: Tensor: An embedding tensor representing the sequence of video frames. """