lucas-ventura commited on
Commit
4947e5d
·
verified ·
1 Parent(s): 2f278a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -38
app.py CHANGED
@@ -1,9 +1,6 @@
1
  import os
2
- import nvidia.cublas.lib
3
- import nvidia.cudnn.lib
4
  import tempfile
5
  from pathlib import Path
6
- import subprocess
7
 
8
  import gradio as gr
9
  import spaces
@@ -17,7 +14,6 @@ from src.models.llama_inference import inference
17
  from src.test.vidchapters import get_chapters
18
  from tools.download.models import download_base_model, download_model
19
 
20
-
21
  # Set up proxies
22
  # from urllib.request import getproxies
23
  # proxies = getproxies()
@@ -34,29 +30,6 @@ inference_model = None
34
  LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
35
 
36
 
37
- cublas_path = os.path.dirname(nvidia.cublas.lib.__file__)
38
- cudnn_path = os.path.dirname(nvidia.cudnn.lib.__file__)
39
-
40
- ld_library_path = f"{cublas_path}:{cudnn_path}"
41
- os.environ["LD_LIBRARY_PATH"] = ld_library_path
42
-
43
- def install_cudnn():
44
- """Installs specific versions of libcudnn and configures torch for TF32."""
45
-
46
- try:
47
- subprocess.run(["apt-get", "update"], check=True)
48
- subprocess.run(["apt-get", "install", "-y", "libcudnn8=8.9.2.26-1+cuda12.1"], check=True)
49
- subprocess.run(["apt-get", "install", "-y", "libcudnn8-dev=8.9.2.26-1+cuda12.1"], check=True)
50
- subprocess.run(["python", "-c", "import torch; torch.backends.cuda.matmul.allow_tf32 = True; torch.backends.cudnn.allow_tf32 = True"], check=True)
51
- subprocess.run(["ln", "-s", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8"], check=True)
52
- print("cuDNN installation and configuration successful.")
53
- except subprocess.CalledProcessError as e:
54
- print(f"Error during cuDNN installation: {e}")
55
- except FileNotFoundError:
56
- print("apt-get or python not found. Ensure they are in your PATH.")
57
-
58
- install_cudnn()
59
-
60
  @spaces.GPU
61
  def load_base_model():
62
  """Load the base Llama model and tokenizer once at startup."""
@@ -87,6 +60,7 @@ def load_base_model():
87
  tokenizer.pad_token = tokenizer.eos_token
88
  print("Base model loaded successfully")
89
 
 
90
  @spaces.GPU
91
  class FastLlamaInference:
92
  def __init__(
@@ -145,6 +119,7 @@ class FastLlamaInference:
145
 
146
  return inference(**params)
147
 
 
148
  @spaces.GPU
149
  def load_peft(model_name: str = "asr-10k"):
150
  """Load or switch PEFT model while reusing the base model."""
@@ -283,21 +258,57 @@ def process_video(
283
  return output
284
 
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  # Create the Gradio interface
287
- with gr.Blocks(title="Chapter-Llama") as demo:
288
- gr.Markdown("# Chapter-Llama")
289
- gr.Markdown("## Chaptering in Hour-Long Videos with LLMs")
290
- gr.Markdown(
291
- "Upload a video file or provide a URL to generate chapters automatically."
292
- )
293
  gr.Markdown(
294
  """
295
  This demo is currently using only the audio data (ASR), without frame information.
296
  We will add audio+captions functionality in the near future, which will improve
297
  chapter generation by incorporating visual content.
298
-
299
- - GitHub: [https://github.com/lucas-ventura/chapter-llama](https://github.com/lucas-ventura/chapter-llama)
300
- - Website: [https://imagine.enpc.fr/~lucas.ventura/chapter-llama/](https://imagine.enpc.fr/~lucas.ventura/chapter-llama/)
301
  """
302
  )
303
 
@@ -329,7 +340,7 @@ with gr.Blocks(title="Chapter-Llama") as demo:
329
  with gr.Column():
330
  status_area = gr.Markdown("**Status:** Ready to process video")
331
  output_text = gr.Textbox(
332
- label="Generated Chapters", lines=10, interactive=False
333
  )
334
 
335
  def update_status_and_process(video_file, video_url, model_name, do_sample):
@@ -352,7 +363,9 @@ with gr.Blocks(title="Chapter-Llama") as demo:
352
  outputs=[status_area, output_text],
353
  )
354
 
 
 
355
 
356
  if __name__ == "__main__":
357
  # Launch the Gradio app
358
- demo.launch()
 
1
  import os
 
 
2
  import tempfile
3
  from pathlib import Path
 
4
 
5
  import gradio as gr
6
  import spaces
 
14
  from src.test.vidchapters import get_chapters
15
  from tools.download.models import download_base_model, download_model
16
 
 
17
  # Set up proxies
18
  # from urllib.request import getproxies
19
  # proxies = getproxies()
 
30
  LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  @spaces.GPU
34
  def load_base_model():
35
  """Load the base Llama model and tokenizer once at startup."""
 
60
  tokenizer.pad_token = tokenizer.eos_token
61
  print("Base model loaded successfully")
62
 
63
+
64
  @spaces.GPU
65
  class FastLlamaInference:
66
  def __init__(
 
119
 
120
  return inference(**params)
121
 
122
+
123
  @spaces.GPU
124
  def load_peft(model_name: str = "asr-10k"):
125
  """Load or switch PEFT model while reusing the base model."""
 
258
  return output
259
 
260
 
261
+ # CSS for the submit button color
262
+ head = """
263
+ <head>
264
+ <title>Chapter-Llama - VidChapters</title>
265
+ <link rel="icon" type="image/x-icon" href="./favicon.ico">
266
+ </head>
267
+ """
268
+
269
+ title_markdown = """
270
+ <div style="display: flex; justify-content: space-between; align-items: center; background: linear-gradient(90deg, rgba(72,219,251,0.1), rgba(29,209,161,0.1)); border-radius: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); padding: 20px; margin-bottom: 20px;">
271
+ <div style="display: flex; align-items: center;">
272
+ <a href="https://github.com/lucas-ventura/chapter-llama" style="margin-right: 20px; text-decoration: none; display: flex; align-items: center;">
273
+ <img src="https://imagine.enpc.fr/~lucas.ventura/chapter-llama/images/chapter-llama.png" alt="Chapter-Llama" style="max-width: 100px; height: auto; border-radius: 15px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
274
+ </a>
275
+ <div>
276
+ <h1 style="margin: 0; background: linear-gradient(90deg, #8F68C3, #477EF4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; font-weight: 700;">Chapter-Llama</h1>
277
+ <h2 style="margin: 10px 0; background: linear-gradient(90deg, #8F68C3, #477EF4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 1.8em; font-weight: 600;">Efficient Chaptering in Hour-Long Videos with LLMs</h2>
278
+ <div style="display: flex; gap: 15px; margin-top: 10px;">
279
+ <a href="https://github.com/lucas-ventura/chapter-llama" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">GitHub</a> |
280
+ <a href="https://imagine.enpc.fr/~lucas.ventura/chapter-llama/" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">Project Page</a> |
281
+ <a href="https://arxiv.org/abs/2504.00072" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">Paper</a>
282
+ </div>
283
+ </div>
284
+ </div>
285
+ <div style="text-align: right; margin-left: 20px;">
286
+ <h2 style="margin: 10px 0; color: #24467C; font-weight: 700; font-size: 2.5em;">CVPR 2025</h2>
287
+ </div>
288
+ </div>
289
+ """
290
+
291
+ # Citation from demo_sample.py
292
+ bibtext = """
293
+ ### Citation
294
+ ```
295
+ @article{ventura25chapter,
296
+ title = {{Chapter-Llama}: Efficient Chaptering in Hour-Long Videos with {LLM}s},
297
+ author = {Lucas Ventura and Antoine Yang and Cordelia Schmid and G{\"u}l Varol},
298
+ journal = {CVPR},
299
+ year = {2025}
300
+ }
301
+ ```
302
+ """
303
+
304
  # Create the Gradio interface
305
+ with gr.Blocks(title="Chapter-Llama", head=head) as demo:
306
+ gr.HTML(title_markdown)
 
 
 
 
307
  gr.Markdown(
308
  """
309
  This demo is currently using only the audio data (ASR), without frame information.
310
  We will add audio+captions functionality in the near future, which will improve
311
  chapter generation by incorporating visual content.
 
 
 
312
  """
313
  )
314
 
 
340
  with gr.Column():
341
  status_area = gr.Markdown("**Status:** Ready to process video")
342
  output_text = gr.Textbox(
343
+ label="Generated Chapters", lines=12, interactive=False
344
  )
345
 
346
  def update_status_and_process(video_file, video_url, model_name, do_sample):
 
363
  outputs=[status_area, output_text],
364
  )
365
 
366
+ gr.Markdown(bibtext)
367
+
368
 
369
  if __name__ == "__main__":
370
  # Launch the Gradio app
371
+ demo.launch()