Spaces:

cpg716
/

staffmanager-llama4-scout

Running

App Files Files Community

cpg716 commited on Apr 9

Commit

b604130

verified ·

1 Parent(s): e85540a

Create test_llama4.py

Browse files

Files changed (1) hide show

test_llama4.py +158 -0

test_llama4.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import torch
+from transformers import AutoProcessor, Llama4ForConditionalGeneration
+import time
+import os
+from huggingface_hub import login
+import requests
+from PIL import Image
+from io import BytesIO
+# Print versions for debugging
+import sys
+print(f"Python version: {sys.version}")
+print(f"PyTorch version: {torch.__version__}")
+import transformers
+print(f"Transformers version: {transformers.__version__}")
+# Get token from environment
+token = os.environ.get("HUGGINGFACE_TOKEN", "")
+if token:
+    print(f"Token found: {token[:5]}...")
+else:
+    print("No token found in environment variables!")
+# Login to Hugging Face
+try:
+    login(token=token)
+    print("Successfully logged in to Hugging Face Hub")
+except Exception as e:
+    print(f"Error logging in: {e}")
+# Test 1: Simple text generation with Llama 4
+def test_text_generation():
+    print("\n=== Testing Text Generation ===")
+    try:
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        model_id = "meta-llama/Llama-4-8B-Instruct"  # Using smaller model for faster testing
+        print(f"Loading tokenizer from {model_id}...")
+        tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
+        print(f"Loading model from {model_id}...")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            token=token,
+            torch_dtype=torch.bfloat16,
+            device_map="auto"
+        )
+        print("Model and tokenizer loaded successfully!")
+        # Simple prompt
+        prompt = "Write a short poem about artificial intelligence."
+        print(f"Generating text for prompt: '{prompt}'")
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        start_time = time.time()
+        outputs = model.generate(**inputs, max_new_tokens=100)
+        end_time = time.time()
+        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Generation completed in {end_time - start_time:.2f} seconds")
+        print(f"Result: {result}")
+        return True
+    except Exception as e:
+        print(f"Error in text generation test: {e}")
+        import traceback
+        print(traceback.format_exc())
+        return False
+# Test 2: Image-text generation with Llama 4 Scout
+def test_image_text_generation():
+    print("\n=== Testing Image-Text Generation ===")
+    try:
+        model_id = "meta-llama/Llama-4-Scout-8B-16E-Instruct"  # Using smaller model for faster testing
+        print(f"Loading processor from {model_id}...")
+        processor = AutoProcessor.from_pretrained(model_id, token=token)
+        print(f"Loading model from {model_id}...")
+        model = Llama4ForConditionalGeneration.from_pretrained(
+            model_id,
+            token=token,
+            torch_dtype=torch.bfloat16,
+            device_map="auto"
+        )
+        print("Model and processor loaded successfully!")
+        # Load a test image
+        print("Loading test image...")
+        response = requests.get("https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg")
+        img = Image.open(BytesIO(response.content))
+        print(f"Image loaded: {img.size}")
+        # Simple prompt
+        prompt = "Describe this image in two sentences."
+        print(f"Creating messages with prompt: '{prompt}'")
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "url": "data:image/jpeg;base64," + BytesIO(response.content).getvalue().hex()},
+                    {"type": "text", "text": prompt},
+                ]
+            },
+        ]
+        print("Applying chat template...")
+        inputs = processor.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to(model.device)
+        print("Generating response...")
+        start_time = time.time()
+        outputs = model.generate(**inputs, max_new_tokens=100)
+        end_time = time.time()
+        result = processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
+        print(f"Generation completed in {end_time - start_time:.2f} seconds")
+        print(f"Result: {result}")
+        return True
+    except Exception as e:
+        print(f"Error in image-text generation test: {e}")
+        import traceback
+        print(traceback.format_exc())
+        return False
+if __name__ == "__main__":
+    print("Starting Llama 4 tests...")
+    # Run text generation test
+    text_success = test_text_generation()
+    # Run image-text generation test if text test succeeds
+    if text_success:
+        image_text_success = test_image_text_generation()
+    else:
+        print("Skipping image-text test due to text test failure")
+        image_text_success = False
+    # Summary
+    print("\n=== Test Summary ===")
+    print(f"Text Generation Test: {'SUCCESS' if text_success else 'FAILED'}")
+    print(f"Image-Text Generation Test: {'SUCCESS' if image_text_success else 'FAILED'}")
+    if text_success and image_text_success:
+        print("\nAll tests passed! Your Llama 4 Scout setup is working correctly.")
+    else:
+        print("\nSome tests failed. Please check the error messages above.")