Spaces:

lucas-ventura
/

chapter-llama

Runtime error

App Files Files Community

lucas-ventura commited on Apr 3

Commit

4947e5d

verified ·

1 Parent(s): 2f278a4

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -38

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 import os
-import nvidia.cublas.lib
-import nvidia.cudnn.lib
 import tempfile
 from pathlib import Path
-import subprocess
 import gradio as gr
 import spaces
@@ -17,7 +14,6 @@ from src.models.llama_inference import inference
 from src.test.vidchapters import get_chapters
 from tools.download.models import download_base_model, download_model
 # Set up proxies
 # from urllib.request import getproxies
 # proxies = getproxies()
@@ -34,29 +30,6 @@ inference_model = None
 LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
-cublas_path = os.path.dirname(nvidia.cublas.lib.__file__)
-cudnn_path = os.path.dirname(nvidia.cudnn.lib.__file__)
-ld_library_path = f"{cublas_path}:{cudnn_path}"
-os.environ["LD_LIBRARY_PATH"] = ld_library_path
-def install_cudnn():
-  """Installs specific versions of libcudnn and configures torch for TF32."""
-  try:
-    subprocess.run(["apt-get", "update"], check=True)
-    subprocess.run(["apt-get", "install", "-y", "libcudnn8=8.9.2.26-1+cuda12.1"], check=True)
-    subprocess.run(["apt-get", "install", "-y", "libcudnn8-dev=8.9.2.26-1+cuda12.1"], check=True)
-    subprocess.run(["python", "-c", "import torch; torch.backends.cuda.matmul.allow_tf32 = True; torch.backends.cudnn.allow_tf32 = True"], check=True)
-    subprocess.run(["ln", "-s", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8"], check=True)
-    print("cuDNN installation and configuration successful.")
-  except subprocess.CalledProcessError as e:
-    print(f"Error during cuDNN installation: {e}")
-  except FileNotFoundError:
-    print("apt-get or python not found. Ensure they are in your PATH.")
-install_cudnn()
 @spaces.GPU
 def load_base_model():
     """Load the base Llama model and tokenizer once at startup."""
@@ -87,6 +60,7 @@ def load_base_model():
         tokenizer.pad_token = tokenizer.eos_token
         print("Base model loaded successfully")
 @spaces.GPU
 class FastLlamaInference:
     def __init__(
@@ -145,6 +119,7 @@ class FastLlamaInference:
         return inference(**params)
 @spaces.GPU
 def load_peft(model_name: str = "asr-10k"):
     """Load or switch PEFT model while reusing the base model."""
@@ -283,21 +258,57 @@ def process_video(
         return output
 # Create the Gradio interface
-with gr.Blocks(title="Chapter-Llama") as demo:
-    gr.Markdown("# Chapter-Llama")
-    gr.Markdown("## Chaptering in Hour-Long Videos with LLMs")
-    gr.Markdown(
-        "Upload a video file or provide a URL to generate chapters automatically."
-    )
     gr.Markdown(
         """
         This demo is currently using only the audio data (ASR), without frame information.
         We will add audio+captions functionality in the near future, which will improve
         chapter generation by incorporating visual content.
-        - GitHub: [https://github.com/lucas-ventura/chapter-llama](https://github.com/lucas-ventura/chapter-llama)
-        - Website: [https://imagine.enpc.fr/~lucas.ventura/chapter-llama/](https://imagine.enpc.fr/~lucas.ventura/chapter-llama/)
         """
     )
@@ -329,7 +340,7 @@ with gr.Blocks(title="Chapter-Llama") as demo:
         with gr.Column():
             status_area = gr.Markdown("**Status:** Ready to process video")
             output_text = gr.Textbox(
-                label="Generated Chapters", lines=10, interactive=False
             )
     def update_status_and_process(video_file, video_url, model_name, do_sample):
@@ -352,7 +363,9 @@ with gr.Blocks(title="Chapter-Llama") as demo:
         outputs=[status_area, output_text],
     )
 if __name__ == "__main__":
     # Launch the Gradio app
-    demo.launch()

 import os
 import tempfile
 from pathlib import Path
 import gradio as gr
 import spaces
 from src.test.vidchapters import get_chapters
 from tools.download.models import download_base_model, download_model
 # Set up proxies
 # from urllib.request import getproxies
 # proxies = getproxies()
 LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 @spaces.GPU
 def load_base_model():
     """Load the base Llama model and tokenizer once at startup."""
         tokenizer.pad_token = tokenizer.eos_token
         print("Base model loaded successfully")
 @spaces.GPU
 class FastLlamaInference:
     def __init__(
         return inference(**params)
 @spaces.GPU
 def load_peft(model_name: str = "asr-10k"):
     """Load or switch PEFT model while reusing the base model."""
         return output
+# CSS for the submit button color
+head = """
+<head>
+    <title>Chapter-Llama - VidChapters</title>
+    <link rel="icon" type="image/x-icon" href="./favicon.ico">
+</head>
+"""
+title_markdown = """
+<div style="display: flex; justify-content: space-between; align-items: center; background: linear-gradient(90deg, rgba(72,219,251,0.1), rgba(29,209,161,0.1)); border-radius: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); padding: 20px; margin-bottom: 20px;">
+    <div style="display: flex; align-items: center;">
+        <a href="https://github.com/lucas-ventura/chapter-llama" style="margin-right: 20px; text-decoration: none; display: flex; align-items: center;">
+            <img src="https://imagine.enpc.fr/~lucas.ventura/chapter-llama/images/chapter-llama.png" alt="Chapter-Llama" style="max-width: 100px; height: auto; border-radius: 15px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+        </a>
+        <div>
+            <h1 style="margin: 0; background: linear-gradient(90deg, #8F68C3, #477EF4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; font-weight: 700;">Chapter-Llama</h1>
+            <h2 style="margin: 10px 0; background: linear-gradient(90deg, #8F68C3, #477EF4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 1.8em; font-weight: 600;">Efficient Chaptering in Hour-Long Videos with LLMs</h2>
+            <div style="display: flex; gap: 15px; margin-top: 10px;">
+                <a href="https://github.com/lucas-ventura/chapter-llama" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">GitHub</a> |
+                <a href="https://imagine.enpc.fr/~lucas.ventura/chapter-llama/" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">Project Page</a> |
+                <a href="https://arxiv.org/abs/2504.00072" style="text-decoration: none; color: #8F68C3; font-weight: 500; transition: color 0.3s;">Paper</a>
+            </div>
+        </div>
+    </div>
+    <div style="text-align: right; margin-left: 20px;">
+        <h2 style="margin: 10px 0; color: #24467C; font-weight: 700; font-size: 2.5em;">CVPR 2025</h2>
+    </div>
+</div>
+"""
+# Citation from demo_sample.py
+bibtext = """
+### Citation
+```
+@article{ventura25chapter,
+  title = {{Chapter-Llama}: Efficient Chaptering in Hour-Long Videos with {LLM}s},
+  author = {Lucas Ventura and Antoine Yang and Cordelia Schmid and G{\"u}l Varol},
+  journal = {CVPR},
+  year = {2025}
+}
+```
+"""
 # Create the Gradio interface
+with gr.Blocks(title="Chapter-Llama", head=head) as demo:
+    gr.HTML(title_markdown)
     gr.Markdown(
         """
         This demo is currently using only the audio data (ASR), without frame information.
         We will add audio+captions functionality in the near future, which will improve
         chapter generation by incorporating visual content.
         """
     )
         with gr.Column():
             status_area = gr.Markdown("**Status:** Ready to process video")
             output_text = gr.Textbox(
+                label="Generated Chapters", lines=12, interactive=False
             )
     def update_status_and_process(video_file, video_url, model_name, do_sample):
         outputs=[status_area, output_text],
     )
+    gr.Markdown(bibtext)
 if __name__ == "__main__":
     # Launch the Gradio app
+    demo.launch()