Spaces:

cpg716
/

staffmanager-llama4-scout

Running

App Files Files Community

cpg716 commited on Apr 9

Commit

d1f9e33

verified ·

1 Parent(s): 3e10424

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -88

app.py CHANGED Viewed

@@ -9,6 +9,14 @@ import os
 import hashlib
 import base64
 from huggingface_hub import login
 # Print token information (first few characters only for security)
 token = os.environ.get("HUGGINGFACE_TOKEN", "")
@@ -24,84 +32,103 @@ try:
 except Exception as e:
     print(f"Error logging in: {e}")
-# Simple test to verify token works
-try:
-    from huggingface_hub import whoami
-    user_info = whoami()
-    print(f"Authenticated as: {user_info}")
-except Exception as e:
-    print(f"Error checking authentication: {e}")
-# Global variables for pipelines
-text_pipeline = None
-image_text_pipeline = None
-# Initialize pipelines
-def load_pipelines():
-    global text_pipeline, image_text_pipeline
-    if text_pipeline is None or image_text_pipeline is None:
         try:
-            print("Loading Llama 4 Scout pipelines...")
-            # Load text generation pipeline
-            text_pipeline = pipeline(
-                "text-generation",
-                model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
-                device_map="auto",
-                torch_dtype=torch.bfloat16,
-                token=token
             )
-            print("Text pipeline loaded successfully!")
-            # Load image-to-text pipeline
-            image_text_pipeline = pipeline(
                 "image-text-to-text",
-                model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
-                device_map="auto",
-                torch_dtype=torch.bfloat16,
-                token=token
-            )
-            print("Image-text pipeline loaded successfully!")
-        except Exception as e:
-            print(f"Error loading pipelines: {e}")
-            # Try loading just the text pipeline if image-text fails
-            try:
-                if text_pipeline is None:
-                    text_pipeline = pipeline(
-                        "text-generation",
                         model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
                         device_map="auto",
-                        torch_dtype=torch.bfloat16,
                         token=token
                     )
-                    print("Text pipeline loaded successfully!")
-            except Exception as text_error:
-                print(f"Error loading text pipeline: {text_error}")
-            # Fall back to LLaVA for image-text if needed
             try:
-                if image_text_pipeline is None:
-                    print("Falling back to LLaVA for image-text...")
-                    image_text_pipeline = pipeline(
-                        "image-to-text",
-                        model="llava-hf/llava-1.5-13b-hf",
-                        device_map="auto"
-                    )
-                    print("LLaVA image-text pipeline loaded as fallback!")
-            except Exception as image_error:
-                print(f"Error loading fallback image pipeline: {image_error}")
                 raise
-    return text_pipeline, image_text_pipeline
-# Function to convert PIL Image to base64
-def image_to_base64(img):
-    buffered = io.BytesIO()
-    img.save(buffered, format="PNG")
-    img_str = base64.b64encode(buffered.getvalue()).decode()
-    return img_str
 # Simple caching mechanism
 cache = {}
@@ -133,30 +160,43 @@ def verify_document(img, doc_type, verification_info):
         return f"[CACHED] {cache[cache_key]}"
     try:
-        # Load pipelines
-        _, image_text_pipeline = load_pipelines()
         # Create prompt
         prompt = f"""This is a {doc_type} document.
 Verify if it's authentic and extract the following information: {verification_info}
 Provide your analysis in a structured format."""
-        # Process with pipeline
-        messages = [
-            {"role": "user", "content": [
-                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_to_base64(img)}"}}
-            ]}
-        ]
-        result = image_text_pipeline(messages, max_new_tokens=500)[0]["generated_text"]
         # Save to cache
-        cache[cache_key] = result
-        return result
     except Exception as e:
-        return f"Error: {str(e)}"
 def check_workplace(img, industry):
     """Check workplace compliance using Llama 4 Scout"""
@@ -172,8 +212,8 @@ def check_workplace(img, industry):
         return f"[CACHED] {cache[cache_key]}"
     try:
-        # Load pipelines
-        _, image_text_pipeline = load_pipelines()
         # Create prompt
         prompt = f"""This is a workplace in the {industry} industry.
@@ -190,22 +230,35 @@ Format your response as a detailed assessment with:
 - Severity level for each issue
 - Recommendations for correction"""
-        # Process with pipeline
-        messages = [
-            {"role": "user", "content": [
-                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_to_base64(img)}"}}
-            ]}
-        ]
-        result = image_text_pipeline(messages, max_new_tokens=800)[0]["generated_text"]
         # Save to cache
-        cache[cache_key] = result
-        return result
     except Exception as e:
-        return f"Error: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="StaffManager AI Assistant") as demo:

 import hashlib
 import base64
 from huggingface_hub import login
+import traceback
+import sys
+# Print Python and library versions for debugging
+print(f"Python version: {sys.version}")
+print(f"PyTorch version: {torch.__version__}")
+import transformers
+print(f"Transformers version: {transformers.__version__}")
 # Print token information (first few characters only for security)
 token = os.environ.get("HUGGINGFACE_TOKEN", "")
 except Exception as e:
     print(f"Error logging in: {e}")
+# Global variables
+llama_pipeline = None
+# Initialize Llama 4 Scout pipeline
+def load_llama_pipeline():
+    global llama_pipeline
+    if llama_pipeline is None:
         try:
+            print("Loading Llama 4 Scout pipeline...")
+            # Use 4-bit quantization to reduce memory usage
+            from transformers import BitsAndBytesConfig
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.float16,
+                bnb_4bit_quant_type="nf4"
             )
+            # Try different pipeline types for Llama 4 Scout
+            pipeline_types = [
+                "image-to-text",
                 "image-text-to-text",
+                "visual-question-answering"
+            ]
+            for pipeline_type in pipeline_types:
+                try:
+                    print(f"Trying pipeline type: {pipeline_type}")
+                    llama_pipeline = pipeline(
+                        pipeline_type,
                         model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
                         device_map="auto",
+                        model_kwargs={"quantization_config": quantization_config},
                         token=token
                     )
+                    print(f"Successfully loaded Llama 4 Scout with pipeline type: {pipeline_type}")
+                    break
+                except Exception as pipeline_error:
+                    print(f"Failed to load with pipeline type {pipeline_type}: {pipeline_error}")
+            if llama_pipeline is None:
+                # If all pipeline types fail, try loading with AutoModel classes
+                print("Trying to load with AutoModel classes...")
+                from transformers import AutoProcessor, AutoModelForVision2Seq
+                processor = AutoProcessor.from_pretrained(
+                    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+                    token=token
+                )
+                model = AutoModelForVision2Seq.from_pretrained(
+                    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+                    token=token,
+                    quantization_config=quantization_config,
+                    device_map="auto"
+                )
+                # Create a custom pipeline function
+                def custom_pipeline(image, prompt, max_new_tokens=300):
+                    inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
+                    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
+                    return processor.decode(outputs[0], skip_special_tokens=True)
+                llama_pipeline = custom_pipeline
+                print("Successfully created custom Llama 4 Scout pipeline")
+            # If still None, fall back to LLaVA
+            if llama_pipeline is None:
+                print("All Llama 4 Scout loading attempts failed, falling back to LLaVA...")
+                llama_pipeline = pipeline(
+                    "image-to-text",
+                    model="llava-hf/llava-1.5-7b-hf",
+                    device_map="auto",
+                    model_kwargs={"quantization_config": quantization_config}
+                )
+                print("LLaVA pipeline loaded as fallback")
+        except Exception as e:
+            print(f"Error loading pipeline: {e}")
+            print(traceback.format_exc())
+            # Final fallback to LLaVA if everything else fails
             try:
+                print("Falling back to LLaVA after error...")
+                llama_pipeline = pipeline(
+                    "image-to-text",
+                    model="llava-hf/llava-1.5-7b-hf",
+                    device_map="auto"
+                )
+                print("LLaVA pipeline loaded as fallback after error")
+            except Exception as fallback_error:
+                print(f"Even fallback failed: {fallback_error}")
                 raise
+    return llama_pipeline
 # Simple caching mechanism
 cache = {}
         return f"[CACHED] {cache[cache_key]}"
     try:
+        # Load pipeline
+        pipeline = load_llama_pipeline()
         # Create prompt
         prompt = f"""This is a {doc_type} document.
 Verify if it's authentic and extract the following information: {verification_info}
 Provide your analysis in a structured format."""
+        # Process with pipeline (with timeout)
+        start_time = time.time()
+        print(f"Starting document verification at {start_time}")
+        # Handle different pipeline types
+        if callable(pipeline) and not hasattr(pipeline, 'task'):  # Custom pipeline
+            result_text = pipeline(image=img, prompt=prompt, max_new_tokens=300)
+        elif hasattr(pipeline, 'task') and pipeline.task == "visual-question-answering":
+            result = pipeline(image=img, question=prompt, max_new_tokens=300)
+            result_text = result[0]["answer"] if isinstance(result, list) else result["answer"]
+        else:  # Standard pipeline
+            result = pipeline(image=img, prompt=prompt, max_new_tokens=300)
+            if isinstance(result, list):
+                result_text = result[0].get('generated_text', str(result))
+            else:
+                result_text = str(result)
+        end_time = time.time()
+        print(f"Completed document verification in {end_time - start_time:.2f} seconds")
         # Save to cache
+        cache[cache_key] = result_text
+        return result_text
     except Exception as e:
+        error_details = traceback.format_exc()
+        print(f"Error in verify_document: {e}")
+        print(error_details)
+        return f"Error processing document: {str(e)}\n\nPlease try again with a different image or try later."
 def check_workplace(img, industry):
     """Check workplace compliance using Llama 4 Scout"""
         return f"[CACHED] {cache[cache_key]}"
     try:
+        # Load pipeline
+        pipeline = load_llama_pipeline()
         # Create prompt
         prompt = f"""This is a workplace in the {industry} industry.
 - Severity level for each issue
 - Recommendations for correction"""
+        # Process with pipeline (with timeout)
+        start_time = time.time()
+        print(f"Starting workplace compliance check at {start_time}")
+        # Handle different pipeline types
+        if callable(pipeline) and not hasattr(pipeline, 'task'):  # Custom pipeline
+            result_text = pipeline(image=img, prompt=prompt, max_new_tokens=300)
+        elif hasattr(pipeline, 'task') and pipeline.task == "visual-question-answering":
+            result = pipeline(image=img, question=prompt, max_new_tokens=300)
+            result_text = result[0]["answer"] if isinstance(result, list) else result["answer"]
+        else:  # Standard pipeline
+            result = pipeline(image=img, prompt=prompt, max_new_tokens=300)
+            if isinstance(result, list):
+                result_text = result[0].get('generated_text', str(result))
+            else:
+                result_text = str(result)
+        end_time = time.time()
+        print(f"Completed workplace compliance check in {end_time - start_time:.2f} seconds")
         # Save to cache
+        cache[cache_key] = result_text
+        return result_text
     except Exception as e:
+        error_details = traceback.format_exc()
+        print(f"Error in check_workplace: {e}")
+        print(error_details)
+        return f"Error processing workplace image: {str(e)}\n\nPlease try again with a different image or try later."
 # Create Gradio interface
 with gr.Blocks(title="StaffManager AI Assistant") as demo: