Spaces:

yunusajib
/

transport_query_assistant

Sleeping

App Files Files Community

yunusajib commited on 27 days ago

Commit

932d067

verified ·

1 Parent(s): 971be40

update app and model

Browse files

Files changed (2) hide show

app.py +23 -4
llava_inference.py +86 -60

app.py CHANGED Viewed

@@ -1,13 +1,31 @@
 import gradio as gr
 from PIL import Image
 from llava_inference import LLaVAHelper
-model = LLaVAHelper()
 def answer_question(image, question):
     if image is None or question.strip() == "":
         return "Please upload an image and enter a question."
-    return model.generate_answer(image, question)
 demo = gr.Interface(
     fn=answer_question,
@@ -19,9 +37,10 @@ demo = gr.Interface(
     title="UK Public Transport Assistant",
     description="Upload an image of UK public transport signage (like train timetables or metro maps), and ask a question related to it. Powered by LLaVA-1.5.",
     examples=[
-        ["assets/example.jpg", "Where is platform 3?"],
     ]
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from PIL import Image
+import os
+import sys
 from llava_inference import LLaVAHelper
+# Add error handling for module imports
+try:
+    model = LLaVAHelper()
+except Exception as e:
+    print(f"Failed to initialize LLaVA model: {e}")
+    # Continue execution to show error in the UI
+    model = None
 def answer_question(image, question):
+    if model is None:
+        return "Model initialization failed. Please check server logs."
     if image is None or question.strip() == "":
         return "Please upload an image and enter a question."
+    try:
+        return model.generate_answer(image, question)
+    except Exception as e:
+        return f"Error processing request: {str(e)}"
+# Create examples directory if it doesn't exist
+os.makedirs("assets", exist_ok=True)
 demo = gr.Interface(
     fn=answer_question,
     title="UK Public Transport Assistant",
     description="Upload an image of UK public transport signage (like train timetables or metro maps), and ask a question related to it. Powered by LLaVA-1.5.",
     examples=[
+        # Only use examples if the example file exists
+        ["assets/example.jpg", "Where is platform 3?"] if os.path.exists("assets/example.jpg") else None
     ]
 )
 if __name__ == "__main__":
+    demo.launch(share=True)  # Added share=True to make it accessible on a public URL

llava_inference.py CHANGED Viewed

@@ -1,35 +1,58 @@
 from llava.model.builder import load_pretrained_model
 from llava.mm_utils import process_images, tokenizer_image_token
-from transformers import AutoTokenizer
 import torch
 import requests
 from PIL import Image
 from io import BytesIO
 class LLaVAHelper:
     def __init__(self, model_name="llava-hf/llava-1.5-7b-hf"):
-        # Use cache_dir to avoid issues with the default cache location
-        # and disable force_download to use cached versions when available
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            cache_dir="./model_cache",
-            force_download=False,
-            trust_remote_code=True
-        )
-        # Load model with same cache directory
-        self.model, self.image_processor, _ = load_pretrained_model(
-            model_name,
-            None,
-            cache_dir="./model_cache"
-        )
-        self.model.eval()
-        # Move model to appropriate device
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model.to(self.device)
-        print(f"Model loaded on {self.device}")
     def generate_answer(self, image, question):
         """
         Generate a response to a question about an image
@@ -41,47 +64,50 @@ class LLaVAHelper:
         Returns:
             String response from the model
         """
-        # Handle image input (either PIL Image or path/URL)
-        if isinstance(image, str):
-            if image.startswith(('http://', 'https://')):
-                response = requests.get(image)
-                image = Image.open(BytesIO(response.content))
-            else:
-                image = Image.open(image)
-        # Preprocess image
-        image_tensor = process_images(
-            [image],
-            self.image_processor,
-            self.model.config
-        )[0].unsqueeze(0).to(self.device)
-        # Format prompt with question
-        prompt = f"###Human: <image>\n{question}\n###Assistant:"
-        # Tokenize prompt
-        input_ids = tokenizer_image_token(
-            prompt,
-            self.tokenizer,
-            return_tensors="pt"
-        ).to(self.device)
-        # Generate response
-        with torch.no_grad():
-            output_ids = self.model.generate(
-                input_ids=input_ids.input_ids,
-                images=image_tensor,
-                max_new_tokens=512,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9,
-            )
-        # Decode and extract response
-        output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        return output.split("###Assistant:")[-1].strip()
-# Example usage
 if __name__ == "__main__":
     try:
         # Initialize model

 from llava.model.builder import load_pretrained_model
 from llava.mm_utils import process_images, tokenizer_image_token
+from transformers import AutoTokenizer, AutoConfig
 import torch
 import requests
 from PIL import Image
 from io import BytesIO
+import os
 class LLaVAHelper:
     def __init__(self, model_name="llava-hf/llava-1.5-7b-hf"):
+        # Create cache directory if it doesn't exist
+        os.makedirs("./model_cache", exist_ok=True)
+        # First, try loading just the config to ensure the model is valid
+        try:
+            AutoConfig.from_pretrained(model_name)
+        except Exception as e:
+            print(f"Error loading model config: {e}")
+            # Try a different model version as fallback
+            model_name = "llava-hf/llava-1.5-13b-hf"
+            print(f"Trying alternative model: {model_name}")
+        try:
+            # Use specific tokenizer class to avoid issues
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                cache_dir="./model_cache",
+                use_fast=False,  # Use the Python implementation instead of the Rust one
+                legacy=True
+            )
+            # Load model with same cache directory and more explicit parameters
+            self.model, self.image_processor, _ = load_pretrained_model(
+                model_name,
+                None,
+                cache_dir="./model_cache",
+                load_8bit=False,
+                load_4bit=False,
+                device_map="auto"
+            )
+            self.model.eval()
+            # Move model to appropriate device
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            if self.device == "cpu":
+                # If using CPU, make sure model is in the right place
+                self.model = self.model.to(self.device)
+            print(f"Model loaded on {self.device}")
+        except Exception as e:
+            print(f"Detailed initialization error: {e}")
+            raise
     def generate_answer(self, image, question):
         """
         Generate a response to a question about an image
         Returns:
             String response from the model
         """
+        try:
+            # Handle image input (either PIL Image or path/URL)
+            if isinstance(image, str):
+                if image.startswith(('http://', 'https://')):
+                    response = requests.get(image)
+                    image = Image.open(BytesIO(response.content))
+                else:
+                    image = Image.open(image)
+            # Preprocess image
+            image_tensor = process_images(
+                [image],
+                self.image_processor,
+                self.model.config
+            )[0].unsqueeze(0).to(self.device)
+            # Format prompt with question
+            prompt = f"###Human: <image>\n{question}\n###Assistant:"
+            # Tokenize prompt
+            input_ids = tokenizer_image_token(
+                prompt,
+                self.tokenizer,
+                return_tensors="pt"
+            ).to(self.device)
+            # Generate response
+            with torch.no_grad():
+                output_ids = self.model.generate(
+                    input_ids=input_ids.input_ids,
+                    images=image_tensor,
+                    max_new_tokens=512,
+                    do_sample=True,
+                    temperature=0.7,
+                    top_p=0.9,
+                )
+            # Decode and extract response
+            output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
+            return output.split("###Assistant:")[-1].strip()
+        except Exception as e:
+            return f"Error generating answer: {str(e)}"
+# Example usage if __name__ == "__main__":
 if __name__ == "__main__":
     try:
         # Initialize model