Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
-
import nvidia.cublas.lib
|
| 3 |
-
import nvidia.cudnn.lib
|
| 4 |
import tempfile
|
| 5 |
from pathlib import Path
|
| 6 |
-
import subprocess
|
| 7 |
|
| 8 |
import gradio as gr
|
| 9 |
import spaces
|
|
@@ -17,7 +14,6 @@ from src.models.llama_inference import inference
|
|
| 17 |
from src.test.vidchapters import get_chapters
|
| 18 |
from tools.download.models import download_base_model, download_model
|
| 19 |
|
| 20 |
-
|
| 21 |
# Set up proxies
|
| 22 |
# from urllib.request import getproxies
|
| 23 |
# proxies = getproxies()
|
|
@@ -34,29 +30,6 @@ inference_model = None
|
|
| 34 |
LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
| 35 |
|
| 36 |
|
| 37 |
-
cublas_path = os.path.dirname(nvidia.cublas.lib.__file__)
|
| 38 |
-
cudnn_path = os.path.dirname(nvidia.cudnn.lib.__file__)
|
| 39 |
-
|
| 40 |
-
ld_library_path = f"{cublas_path}:{cudnn_path}"
|
| 41 |
-
os.environ["LD_LIBRARY_PATH"] = ld_library_path
|
| 42 |
-
|
| 43 |
-
def install_cudnn():
|
| 44 |
-
"""Installs specific versions of libcudnn and configures torch for TF32."""
|
| 45 |
-
|
| 46 |
-
try:
|
| 47 |
-
subprocess.run(["apt-get", "update"], check=True)
|
| 48 |
-
subprocess.run(["apt-get", "install", "-y", "libcudnn8=8.9.2.26-1+cuda12.1"], check=True)
|
| 49 |
-
subprocess.run(["apt-get", "install", "-y", "libcudnn8-dev=8.9.2.26-1+cuda12.1"], check=True)
|
| 50 |
-
subprocess.run(["python", "-c", "import torch; torch.backends.cuda.matmul.allow_tf32 = True; torch.backends.cudnn.allow_tf32 = True"], check=True)
|
| 51 |
-
subprocess.run(["ln", "-s", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8"], check=True)
|
| 52 |
-
print("cuDNN installation and configuration successful.")
|
| 53 |
-
except subprocess.CalledProcessError as e:
|
| 54 |
-
print(f"Error during cuDNN installation: {e}")
|
| 55 |
-
except FileNotFoundError:
|
| 56 |
-
print("apt-get or python not found. Ensure they are in your PATH.")
|
| 57 |
-
|
| 58 |
-
install_cudnn()
|
| 59 |
-
|
| 60 |
@spaces.GPU
|
| 61 |
def load_base_model():
|
| 62 |
"""Load the base Llama model and tokenizer once at startup."""
|
|
@@ -87,6 +60,7 @@ def load_base_model():
|
|
| 87 |
tokenizer.pad_token = tokenizer.eos_token
|
| 88 |
print("Base model loaded successfully")
|
| 89 |
|
|
|
|
| 90 |
@spaces.GPU
|
| 91 |
class FastLlamaInference:
|
| 92 |
def __init__(
|
|
@@ -145,6 +119,7 @@ class FastLlamaInference:
|
|
| 145 |
|
| 146 |
return inference(**params)
|
| 147 |
|
|
|
|
| 148 |
@spaces.GPU
|
| 149 |
def load_peft(model_name: str = "asr-10k"):
|
| 150 |
"""Load or switch PEFT model while reusing the base model."""
|
|
@@ -283,21 +258,57 @@ def process_video(
|
|
| 283 |
return output
|
| 284 |
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
# Create the Gradio interface
|
| 287 |
-
with gr.Blocks(title="Chapter-Llama") as demo:
|
| 288 |
-
gr.
|
| 289 |
-
gr.Markdown("## Chaptering in Hour-Long Videos with LLMs")
|
| 290 |
-
gr.Markdown(
|
| 291 |
-
"Upload a video file or provide a URL to generate chapters automatically."
|
| 292 |
-
)
|
| 293 |
gr.Markdown(
|
| 294 |
"""
|
| 295 |
This demo is currently using only the audio data (ASR), without frame information.
|
| 296 |
We will add audio+captions functionality in the near future, which will improve
|
| 297 |
chapter generation by incorporating visual content.
|
| 298 |
-
|
| 299 |
-
- GitHub: [https://github.com/lucas-ventura/chapter-llama](https://github.com/lucas-ventura/chapter-llama)
|
| 300 |
-
- Website: [https://imagine.enpc.fr/~lucas.ventura/chapter-llama/](https://imagine.enpc.fr/~lucas.ventura/chapter-llama/)
|
| 301 |
"""
|
| 302 |
)
|
| 303 |
|
|
@@ -329,7 +340,7 @@ with gr.Blocks(title="Chapter-Llama") as demo:
|
|
| 329 |
with gr.Column():
|
| 330 |
status_area = gr.Markdown("**Status:** Ready to process video")
|
| 331 |
output_text = gr.Textbox(
|
| 332 |
-
label="Generated Chapters", lines=
|
| 333 |
)
|
| 334 |
|
| 335 |
def update_status_and_process(video_file, video_url, model_name, do_sample):
|
|
@@ -352,7 +363,9 @@ with gr.Blocks(title="Chapter-Llama") as demo:
|
|
| 352 |
outputs=[status_area, output_text],
|
| 353 |
)
|
| 354 |
|
|
|
|
|
|
|
| 355 |
|
| 356 |
if __name__ == "__main__":
|
| 357 |
# Launch the Gradio app
|
| 358 |
-
demo.launch()
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
| 2 |
import tempfile
|
| 3 |
from pathlib import Path
|
|
|
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import spaces
|
|
|
|
| 14 |
from src.test.vidchapters import get_chapters
|
| 15 |
from tools.download.models import download_base_model, download_model
|
| 16 |
|
|
|
|
| 17 |
# Set up proxies
|
| 18 |
# from urllib.request import getproxies
|
| 19 |
# proxies = getproxies()
|
|
|
|
| 30 |
LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
| 31 |
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
@spaces.GPU
|
| 34 |
def load_base_model():
|
| 35 |
"""Load the base Llama model and tokenizer once at startup."""
|
|
|
|
| 60 |
tokenizer.pad_token = tokenizer.eos_token
|
| 61 |
print("Base model loaded successfully")
|
| 62 |
|
| 63 |
+
|
| 64 |
@spaces.GPU
|
| 65 |
class FastLlamaInference:
|
| 66 |
def __init__(
|
|
|
|
| 119 |
|
| 120 |
return inference(**params)
|
| 121 |
|
| 122 |
+
|
| 123 |
@spaces.GPU
|
| 124 |
def load_peft(model_name: str = "asr-10k"):
|
| 125 |
"""Load or switch PEFT model while reusing the base model."""
|
|
|
|
| 258 |
return output
|
| 259 |
|
| 260 |
|
| 261 |
+
# CSS for the submit button color
|
| 262 |
+
head = """
|
| 263 |
+
<head>
|
| 264 |
+
<title>Chapter-Llama - VidChapters</title>
|
| 265 |
+
<link rel="icon" type="image/x-icon" href="./favicon.ico">
|
| 266 |
+
</head>
|
| 267 |
+
"""
|
| 268 |
+
|
| 269 |
+
title_markdown = """
|
| 270 |
+
<div style="display: flex; justify-content: space-between; align-items: center; background: linear-gradient(90deg, rgba(72,219,251,0.1), rgba(29,209,161,0.1)); border-radius: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); padding: 20px; margin-bottom: 20px;">
|
| 271 |
+
<div style="display: flex; align-items: center;">
|
| 272 |
+
<a href="https://github.com/lucas-ventura/chapter-llama" style="margin-right: 20px; text-decoration: none; display: flex; align-items: center;">
|
| 273 |
+
<img src="https://imagine.enpc.fr/~lucas.ventura/chapter-llama/images/chapter-llama.png" alt="Chapter-Llama" style="max-width: 100px; height: auto; border-radius: 15px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
| 274 |
+
</a>
|
| 275 |
+
<div>
|
| 276 |
+
<h1 style="margin: 0; background: linear-gradient(90deg, #8F68C3, #477EF4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; font-weight: 700;">Chapter-Llama</h1>
|
| 277 |
+
<h2 style="margin: 10px 0; background: linear-gradient(90deg, #8F68C3, #477EF4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 1.8em; font-weight: 600;">Efficient Chaptering in Hour-Long Videos with LLMs</h2>
|
| 278 |
+
<div style="display: flex; gap: 15px; margin-top: 10px;">
|
| 279 |
+
<a href="https://github.com/lucas-ventura/chapter-llama" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">GitHub</a> |
|
| 280 |
+
<a href="https://imagine.enpc.fr/~lucas.ventura/chapter-llama/" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">Project Page</a> |
|
| 281 |
+
<a href="https://arxiv.org/abs/2504.00072" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">Paper</a>
|
| 282 |
+
</div>
|
| 283 |
+
</div>
|
| 284 |
+
</div>
|
| 285 |
+
<div style="text-align: right; margin-left: 20px;">
|
| 286 |
+
<h2 style="margin: 10px 0; color: #24467C; font-weight: 700; font-size: 2.5em;">CVPR 2025</h2>
|
| 287 |
+
</div>
|
| 288 |
+
</div>
|
| 289 |
+
"""
|
| 290 |
+
|
| 291 |
+
# Citation from demo_sample.py
|
| 292 |
+
bibtext = """
|
| 293 |
+
### Citation
|
| 294 |
+
```
|
| 295 |
+
@article{ventura25chapter,
|
| 296 |
+
title = {{Chapter-Llama}: Efficient Chaptering in Hour-Long Videos with {LLM}s},
|
| 297 |
+
author = {Lucas Ventura and Antoine Yang and Cordelia Schmid and G{\"u}l Varol},
|
| 298 |
+
journal = {CVPR},
|
| 299 |
+
year = {2025}
|
| 300 |
+
}
|
| 301 |
+
```
|
| 302 |
+
"""
|
| 303 |
+
|
| 304 |
# Create the Gradio interface
|
| 305 |
+
with gr.Blocks(title="Chapter-Llama", head=head) as demo:
|
| 306 |
+
gr.HTML(title_markdown)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
gr.Markdown(
|
| 308 |
"""
|
| 309 |
This demo is currently using only the audio data (ASR), without frame information.
|
| 310 |
We will add audio+captions functionality in the near future, which will improve
|
| 311 |
chapter generation by incorporating visual content.
|
|
|
|
|
|
|
|
|
|
| 312 |
"""
|
| 313 |
)
|
| 314 |
|
|
|
|
| 340 |
with gr.Column():
|
| 341 |
status_area = gr.Markdown("**Status:** Ready to process video")
|
| 342 |
output_text = gr.Textbox(
|
| 343 |
+
label="Generated Chapters", lines=12, interactive=False
|
| 344 |
)
|
| 345 |
|
| 346 |
def update_status_and_process(video_file, video_url, model_name, do_sample):
|
|
|
|
| 363 |
outputs=[status_area, output_text],
|
| 364 |
)
|
| 365 |
|
| 366 |
+
gr.Markdown(bibtext)
|
| 367 |
+
|
| 368 |
|
| 369 |
if __name__ == "__main__":
|
| 370 |
# Launch the Gradio app
|
| 371 |
+
demo.launch()
|