Spaces:

Salimshakeel
/

video-summarization-backend

Sleeping

App Files Files Community

salimshakeel commited on Jul 13

Commit

d2542a3

1 Parent(s): 30508a4

upload files

Browse files

Files changed (21) hide show

.gitattributes +173 -35
Dockerfile +15 -0
Procfile +1 -0
__init__.py +0 -0
config.py +7 -0
layers/attention.py +132 -0
layers/summarizer.py +139 -0
main.py +26 -0
routes/__init__.py +0 -0
routes/summarize.py +28 -0
services/__init__.py +0 -0
services/extractor.py +62 -0
services/model_loader.py +19 -0
services/summarizer.py +65 -0
static/uploads/77ea55af6d744160a5c7e8440b294bb6_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 +3 -0
static/uploads/84daab3df51f418ebff312b2ed129bc1_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 +3 -0
static/uploads/8ba4aec007f5404db2e9ac9570e59ca6_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 +3 -0
static/uploads/b0b93f4bcdcb4662865bb4dc26c1b243_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 +3 -0
static/uploads/e051610a8a634fd9a9de3c016d38ce73_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 +3 -0
utils/__init__.py +0 -0
utils/file_utils.py +10 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,173 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml
+static/uploads/* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# You will also find guides on how best to write your Dockerfile
+FROM python:3.12-slim
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: gunicorn -w 3 -k uvicorn.workers.UvicornWorker main:app

__init__.py ADDED Viewed

File without changes

config.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# config.py
+import torch
+UPLOAD_DIR = "backend/static/uploads"
+OUTPUT_DIR = "backend/static/outputs"
+FRAME_RATE = 15
+SCORE_THRESHOLD = 0.4
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

layers/attention.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import numpy as np
+class SelfAttention(nn.Module):
+    def __init__(self, input_size=1024, output_size=1024, freq=10000, heads=1, pos_enc=None):
+        """ The basic (multi-head) Attention 'cell' containing the learnable parameters of Q, K and V
+        :param int input_size: Feature input size of Q, K, V.
+        :param int output_size: Feature -hidden- size of Q, K, V.
+        :param int freq: The frequency of the sinusoidal positional encoding.
+        :param int heads: Number of heads for the attention module.
+        :param str | None pos_enc: The type of the positional encoding [supported: Absolute, Relative].
+        """
+        super(SelfAttention, self).__init__()
+        self.permitted_encodings = ["absolute", "relative"]
+        if pos_enc is not None:
+            pos_enc = pos_enc.lower()
+            assert pos_enc in self.permitted_encodings, f"Supported encodings: {*self.permitted_encodings,}"
+        self.input_size = input_size
+        self.output_size = output_size
+        self.heads = heads
+        self.pos_enc = pos_enc
+        self.freq = freq
+        self.Wk, self.Wq, self.Wv = nn.ModuleList(), nn.ModuleList(), nn.ModuleList()
+        for _ in range(self.heads):
+            self.Wk.append(nn.Linear(in_features=input_size, out_features=output_size//heads, bias=False))
+            self.Wq.append(nn.Linear(in_features=input_size, out_features=output_size//heads, bias=False))
+            self.Wv.append(nn.Linear(in_features=input_size, out_features=output_size//heads, bias=False))
+        self.out = nn.Linear(in_features=output_size, out_features=input_size, bias=False)
+        self.softmax = nn.Softmax(dim=-1)
+        self.drop = nn.Dropout(p=0.5)
+    def getAbsolutePosition(self, T):
+        """Calculate the sinusoidal positional encoding based on the absolute position of each considered frame.
+        Based on 'Attention is all you need' paper (https://arxiv.org/abs/1706.03762)
+        :param int T: Number of frames contained in Q, K and V
+        :return: Tensor with shape [T, T]
+        """
+        freq = self.freq
+        d = self.input_size
+        pos = torch.tensor([k for k in range(T)], device=self.out.weight.device)
+        i = torch.tensor([k for k in range(T//2)], device=self.out.weight.device)
+        # Reshape tensors each pos_k for each i indices
+        pos = pos.reshape(pos.shape[0], 1)
+        pos = pos.repeat_interleave(i.shape[0], dim=1)
+        i = i.repeat(pos.shape[0], 1)
+        AP = torch.zeros(T, T, device=self.out.weight.device)
+        AP[pos, 2*i] = torch.sin(pos / freq ** ((2 * i) / d))
+        AP[pos, 2*i+1] = torch.cos(pos / freq ** ((2 * i) / d))
+        return AP
+    def getRelativePosition(self, T):
+        """Calculate the sinusoidal positional encoding based on the relative position of each considered frame.
+        r_pos calculations as here: https://theaisummer.com/positional-embeddings/
+        :param int T: Number of frames contained in Q, K and V
+        :return: Tensor with shape [T, T]
+        """
+        freq = self.freq
+        d = 2 * T
+        min_rpos = -(T - 1)
+        i = torch.tensor([k for k in range(T)], device=self.out.weight.device)
+        j = torch.tensor([k for k in range(T)], device=self.out.weight.device)
+        # Reshape tensors each i for each j indices
+        i = i.reshape(i.shape[0], 1)
+        i = i.repeat_interleave(i.shape[0], dim=1)
+        j = j.repeat(i.shape[0], 1)
+        # Calculate the relative positions
+        r_pos = j - i - min_rpos
+        RP = torch.zeros(T, T, device=self.out.weight.device)
+        idx = torch.tensor([k for k in range(T//2)], device=self.out.weight.device)
+        RP[:, 2*idx] = torch.sin(r_pos[:, 2*idx] / freq ** ((i[:, 2*idx] + j[:, 2*idx]) / d))
+        RP[:, 2*idx+1] = torch.cos(r_pos[:, 2*idx+1] / freq ** ((i[:, 2*idx+1] + j[:, 2*idx+1]) / d))
+        return RP
+    def forward(self, x):
+        """ Compute the weighted frame features, based on either the global or local (multi-head) attention mechanism.
+        :param torch.tensor x: Frame features with shape [T, input_size]
+        :return: A tuple of:
+                    y: Weighted features based on the attention weights, with shape [T, input_size]
+                    att_weights : The attention weights (before dropout), with shape [T, T]
+        """
+        outputs = []
+        for head in range(self.heads):
+            K = self.Wk[head](x)
+            Q = self.Wq[head](x)
+            V = self.Wv[head](x)
+            # Q *= 0.06                       # scale factor VASNet
+            # Q /= np.sqrt(self.output_size)  # scale factor (i.e 1 / sqrt(d_k) )
+            energies = torch.matmul(Q, K.transpose(1, 0))
+            if self.pos_enc is not None:
+                if self.pos_enc == "absolute":
+                    AP = self.getAbsolutePosition(T=energies.shape[0])
+                    energies = energies + AP
+                elif self.pos_enc == "relative":
+                    RP = self.getRelativePosition(T=energies.shape[0])
+                    energies = energies + RP
+            att_weights = self.softmax(energies)
+            _att_weights = self.drop(att_weights)
+            y = torch.matmul(_att_weights, V)
+            # Save the current head output
+            outputs.append(y)
+        y = self.out(torch.cat(outputs, dim=1))
+        return y, att_weights.clone()  # for now we don't deal with the weights (probably max or avg pooling)
+if __name__ == '__main__':
+    pass
+    """Uncomment for a quick proof of concept
+    model = SelfAttention(input_size=256, output_size=256, pos_enc="absolute").cuda()
+    _input = torch.randn(500, 256).cuda()  # [seq_len, hidden_size]
+    output, weights = model(_input)
+    print(f"Output shape: {output.shape}\tattention shape: {weights.shape}")
+    """

layers/summarizer.py ADDED Viewed

	@@ -0,0 +1,139 @@

+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from .attention import SelfAttention
+class MultiAttention(nn.Module):
+    def __init__(self, input_size=1024, output_size=1024, freq=10000, pos_enc=None,
+                 num_segments=None, heads=1, fusion=None):
+        """ Class wrapping the MultiAttention part of PGL-SUM; its key modules and parameters.
+        :param int input_size: The expected input feature size.
+        :param int output_size: The hidden feature size of the attention mechanisms.
+        :param int freq: The frequency of the sinusoidal positional encoding.
+        :param None | str pos_enc: The selected positional encoding [absolute, relative].
+        :param None | int num_segments: The selected number of segments to split the videos.
+        :param int heads: The selected number of global heads.
+        :param None | str fusion: The selected type of feature fusion.
+        """
+        super(MultiAttention, self).__init__()
+        # Global Attention, considering differences among all frames
+        self.attention = SelfAttention(input_size=input_size, output_size=output_size,
+                                       freq=freq, pos_enc=pos_enc, heads=heads)
+        self.num_segments = num_segments
+        if self.num_segments is not None:
+            assert self.num_segments >= 2, "num_segments must be None or 2+"
+            self.local_attention = nn.ModuleList()
+            for _ in range(self.num_segments):
+                # Local Attention, considering differences among the same segment with reduce hidden size
+                self.local_attention.append(SelfAttention(input_size=input_size, output_size=output_size//num_segments,
+                                                          freq=freq, pos_enc=pos_enc, heads=4))
+        self.permitted_fusions = ["add", "mult", "avg", "max"]
+        self.fusion = fusion
+        if self.fusion is not None:
+            self.fusion = self.fusion.lower()
+            assert self.fusion in self.permitted_fusions, f"Fusion method must be: {*self.permitted_fusions,}"
+    def forward(self, x):
+        """ Compute the weighted frame features, based on the global and locals (multi-head) attention mechanisms.
+        :param torch.Tensor x: Tensor with shape [T, input_size] containing the frame features.
+        :return: A tuple of:
+            weighted_value: Tensor with shape [T, input_size] containing the weighted frame features.
+            attn_weights: Tensor with shape [T, T] containing the attention weights.
+        """
+        weighted_value, attn_weights = self.attention(x)  # global attention
+        if self.num_segments is not None and self.fusion is not None:
+            segment_size = math.ceil(x.shape[0] / self.num_segments)
+            for segment in range(self.num_segments):
+                left_pos = segment * segment_size
+                right_pos = (segment + 1) * segment_size
+                local_x = x[left_pos:right_pos]
+                weighted_local_value, attn_local_weights = self.local_attention[segment](local_x)  # local attentions
+                # Normalize the features vectors
+                weighted_value[left_pos:right_pos] = F.normalize(weighted_value[left_pos:right_pos].clone(), p=2, dim=1)
+                weighted_local_value = F.normalize(weighted_local_value, p=2, dim=1)
+                if self.fusion == "add":
+                    weighted_value[left_pos:right_pos] += weighted_local_value
+                elif self.fusion == "mult":
+                    weighted_value[left_pos:right_pos] *= weighted_local_value
+                elif self.fusion == "avg":
+                    weighted_value[left_pos:right_pos] += weighted_local_value
+                    weighted_value[left_pos:right_pos] /= 2
+                elif self.fusion == "max":
+                    weighted_value[left_pos:right_pos] = torch.max(weighted_value[left_pos:right_pos].clone(),
+                                                                   weighted_local_value)
+        return weighted_value, attn_weights
+class PGL_SUM(nn.Module):
+    def __init__(self, input_size=1024, output_size=1024, freq=10000, pos_enc=None,
+                 num_segments=None, heads=1, fusion=None):
+        """ Class wrapping the PGL-SUM model; its key modules and parameters.
+        :param int input_size: The expected input feature size.
+        :param int output_size: The hidden feature size of the attention mechanisms.
+        :param int freq: The frequency of the sinusoidal positional encoding.
+        :param None | str pos_enc: The selected positional encoding [absolute, relative].
+        :param None | int num_segments: The selected number of segments to split the videos.
+        :param int heads: The selected number of global heads.
+        :param None | str fusion: The selected type of feature fusion.
+        """
+        super(PGL_SUM, self).__init__()
+        self.attention = MultiAttention(input_size=input_size, output_size=output_size, freq=freq,
+                                        pos_enc=pos_enc, num_segments=num_segments, heads=heads, fusion=fusion)
+        self.linear_1 = nn.Linear(in_features=input_size, out_features=input_size)
+        self.linear_2 = nn.Linear(in_features=self.linear_1.out_features, out_features=1)
+        self.drop = nn.Dropout(p=0.5)
+        self.norm_y = nn.LayerNorm(normalized_shape=input_size, eps=1e-6)
+        self.norm_linear = nn.LayerNorm(normalized_shape=self.linear_1.out_features, eps=1e-6)
+        self.relu = nn.ReLU()
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, frame_features):
+        """ Produce frames importance scores from the frame features, using the PGL-SUM model.
+        :param torch.Tensor frame_features: Tensor of shape [T, input_size] containing the frame features produced by
+        using the pool5 layer of GoogleNet.
+        :return: A tuple of:
+            y: Tensor with shape [1, T] containing the frames importance scores in [0, 1].
+            attn_weights: Tensor with shape [T, T] containing the attention weights.
+        """
+        residual = frame_features
+        weighted_value, attn_weights = self.attention(frame_features)
+        y = weighted_value + residual
+        y = self.drop(y)
+        y = self.norm_y(y)
+        # 2-layer NN (Regressor Network)
+        y = self.linear_1(y)
+        y = self.relu(y)
+        y = self.drop(y)
+        y = self.norm_linear(y)
+        y = self.linear_2(y)
+        y = self.sigmoid(y)
+        y = y.view(1, -1)
+        return y, attn_weights
+if __name__ == '__main__':
+    pass
+    """Uncomment for a quick proof of concept
+    model = PGL_SUM(input_size=256, output_size=256, num_segments=3, fusion="Add").cuda()
+    _input = torch.randn(500, 256).cuda()  # [seq_len, hidden_size]
+    output, weights = model(_input)
+    print(f"Output shape: {output.shape}\tattention shape: {weights.shape}")
+    """

main.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from routes import summarize
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import JSONResponse
+import os
+app = FastAPI()
+app.include_router(summarize.router)
+# ✅ Root route to avoid 404 on /
+@app.get("/")
+def read_root():
+    return JSONResponse(content={"message": "Video summarization API is running"})
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount static folder
+static_dir = os.path.join("backend", "static")
+app.mount("/static", StaticFiles(directory=static_dir), name="static")

routes/__init__.py ADDED Viewed

File without changes

routes/summarize.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from fastapi import APIRouter, UploadFile, File
+from fastapi.responses import JSONResponse
+from utils.file_utils import save_uploaded_file
+from services.extractor import extract_features
+from services.model_loader import load_model
+from services.summarizer import get_scores, get_selected_indices, save_summary_video
+from config import UPLOAD_DIR, OUTPUT_DIR
+router = APIRouter()
+@router.post("/summarize")
+def summarize_video(video: UploadFile = File(...)):
+    if not video.filename.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
+        return JSONResponse(content={"error": "Unsupported file format"}, status_code=400)
+    video_path = save_uploaded_file(video, UPLOAD_DIR)
+    features, picks = extract_features(video_path)
+    model = load_model("backend/Model/epoch-199.pkl")
+    scores = get_scores(model, features)
+    selected = get_selected_indices(scores, picks)
+    output_path = f"{OUTPUT_DIR}/summary_{video.filename}"
+    save_summary_video(video_path, selected, output_path)
+    summary_url = f"/static/outputs/summary_{video.filename}"
+    return JSONResponse(content={
+        "message": "Summarization complete",
+        "summary_video_url": summary_url
+    })

services/__init__.py ADDED Viewed

File without changes

services/extractor.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import cv2
+import torch
+import numpy as np
+from PIL import Image
+from torchvision import models, transforms
+from config import DEVICE, FRAME_RATE
+# Load GoogLeNet once
+from torchvision.models import GoogLeNet_Weights
+weights = GoogLeNet_Weights.DEFAULT
+googlenet = models.googlenet(weights=weights).to(DEVICE).eval()
+feature_extractor = torch.nn.Sequential(
+    googlenet.conv1,
+    googlenet.maxpool1,
+    googlenet.conv2,
+    googlenet.conv3,
+    googlenet.maxpool2,
+    googlenet.inception3a,
+    googlenet.inception3b,
+    googlenet.maxpool3,
+    googlenet.inception4a,
+    googlenet.inception4b,
+    googlenet.inception4c,
+    googlenet.inception4d,
+    googlenet.inception4e,
+    googlenet.maxpool4,
+    googlenet.inception5a,
+    googlenet.inception5b,
+    googlenet.avgpool,
+    torch.nn.Flatten()
+)
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]
+    )
+])
+def extract_features(video_path):
+    cap = cv2.VideoCapture(video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    picks, frames = [], []
+    count = 0
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if int(count % round(fps // FRAME_RATE)) == 0:
+            image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            input_tensor = transform(image).unsqueeze(0).to(DEVICE)
+            with torch.no_grad():
+                feature = feature_extractor(input_tensor).squeeze(0).cpu().numpy()
+                frames.append(feature)
+                picks.append(count)
+        count += 1
+    cap.release()
+    return np.stack(frames), picks

services/model_loader.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import torch
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+from layers.summarizer import PGL_SUM
+from config import DEVICE
+def load_model(weights_path):
+    model = PGL_SUM(
+        input_size=1024,
+        output_size=1024,
+        num_segments=4,
+        heads=8,
+        fusion="add",
+        pos_enc="absolute"
+    ).to(DEVICE)
+    model.load_state_dict(torch.load(weights_path, map_location=DEVICE))
+    model.eval
+    return model

services/summarizer.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import cv2
+import torch
+from config import SCORE_THRESHOLD
+def get_scores(model, features):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    with torch.no_grad():
+        features_tensor = torch.tensor(features, dtype=torch.float32).to(device)
+        scores, _ = model(features_tensor)
+    return scores.squeeze().cpu().numpy()
+def get_selected_indices(scores, picks, threshold=SCORE_THRESHOLD):
+    return [picks[i] for i, score in enumerate(scores) if score >= threshold]
+import subprocess
+import os
+def save_summary_video(video_path, selected_indices, output_path, fps=15):
+    import cv2
+    cap = cv2.VideoCapture(video_path)
+    selected = set(selected_indices)
+    frame_id = 0
+    frames = {}
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_id in selected:
+            frames[frame_id] = frame
+        frame_id += 1
+    cap.release()
+    if not frames:
+        print("No frames selected.")
+        return
+    h, w, _ = list(frames.values())[0].shape
+    # 1️⃣ Save raw video first
+    raw_output_path = output_path.replace(".mp4", "_raw.mp4")
+    writer = cv2.VideoWriter(raw_output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+    for fid in sorted(frames.keys()):
+        writer.write(frames[fid])
+    writer.release()
+    # 2️⃣ Use FFmpeg to fix video (browser-compatible)
+    try:
+        subprocess.run([
+            "ffmpeg",
+            "-y",  # overwrite if file exists
+            "-i", raw_output_path,
+            "-vcodec", "libx264",
+            "-acodec", "aac",
+            output_path
+        ], check=True)
+        os.remove(raw_output_path)  # optional: remove raw file
+        print(f"✅ FFmpeg re-encoded video saved to: {output_path}")
+    except subprocess.CalledProcessError as e:
+        print("❌ FFmpeg failed:", e)
+        print("⚠️ Using raw video instead.")
+        os.rename(raw_output_path, output_path)

static/uploads/77ea55af6d744160a5c7e8440b294bb6_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99183f4ca670013008f6a45943bf878532a1db1ad0753f671d289f55f45dac93
+size 22890415

static/uploads/84daab3df51f418ebff312b2ed129bc1_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99183f4ca670013008f6a45943bf878532a1db1ad0753f671d289f55f45dac93
+size 22890415

static/uploads/8ba4aec007f5404db2e9ac9570e59ca6_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99183f4ca670013008f6a45943bf878532a1db1ad0753f671d289f55f45dac93
+size 22890415

static/uploads/b0b93f4bcdcb4662865bb4dc26c1b243_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99183f4ca670013008f6a45943bf878532a1db1ad0753f671d289f55f45dac93
+size 22890415

static/uploads/e051610a8a634fd9a9de3c016d38ce73_Paris Saint-Germain vs Atlético de Madrid Highlights ｜ FIFA Club World Cup 2025.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99183f4ca670013008f6a45943bf878532a1db1ad0753f671d289f55f45dac93
+size 22890415

utils/__init__.py ADDED Viewed

File without changes

utils/file_utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import os
+from uuid import uuid4
+def save_uploaded_file(uploaded_file, upload_dir):
+    os.makedirs(upload_dir, exist_ok=True)
+    filename = f"{uuid4().hex}_{uploaded_file.filename}"
+    filepath = os.path.join(upload_dir, filename)
+    with open(filepath, "wb") as f:
+        f.write(uploaded_file.file.read())
+    return filepath