import torch from transformers import AutoProcessor, Llama4ForConditionalGeneration import time import os from huggingface_hub import login import requests from PIL import Image from io import BytesIO # Print versions for debugging import sys print(f"Python version: {sys.version}") print(f"PyTorch version: {torch.__version__}") import transformers print(f"Transformers version: {transformers.__version__}") # Get token from environment token = os.environ.get("HUGGINGFACE_TOKEN", "") if token: print(f"Token found: {token[:5]}...") else: print("No token found in environment variables!") # Login to Hugging Face try: login(token=token) print("Successfully logged in to Hugging Face Hub") except Exception as e: print(f"Error logging in: {e}") # Test 1: Simple text generation with Llama 4 def test_text_generation(): print("\n=== Testing Text Generation ===") try: from transformers import AutoModelForCausalLM, AutoTokenizer model_id = "meta-llama/Llama-4-8B-Instruct" # Using smaller model for faster testing print(f"Loading tokenizer from {model_id}...") tokenizer = AutoTokenizer.from_pretrained(model_id, token=token) print(f"Loading model from {model_id}...") model = AutoModelForCausalLM.from_pretrained( model_id, token=token, torch_dtype=torch.bfloat16, device_map="auto" ) print("Model and tokenizer loaded successfully!") # Simple prompt prompt = "Write a short poem about artificial intelligence." print(f"Generating text for prompt: '{prompt}'") inputs = tokenizer(prompt, return_tensors="pt").to(model.device) start_time = time.time() outputs = model.generate(**inputs, max_new_tokens=100) end_time = time.time() result = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"Generation completed in {end_time - start_time:.2f} seconds") print(f"Result: {result}") return True except Exception as e: print(f"Error in text generation test: {e}") import traceback print(traceback.format_exc()) return False # Test 2: Image-text generation with Llama 4 Scout def test_image_text_generation(): print("\n=== Testing Image-Text Generation ===") try: model_id = "meta-llama/Llama-4-Scout-8B-16E-Instruct" # Using smaller model for faster testing print(f"Loading processor from {model_id}...") processor = AutoProcessor.from_pretrained(model_id, token=token) print(f"Loading model from {model_id}...") model = Llama4ForConditionalGeneration.from_pretrained( model_id, token=token, torch_dtype=torch.bfloat16, device_map="auto" ) print("Model and processor loaded successfully!") # Load a test image print("Loading test image...") response = requests.get("https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg") img = Image.open(BytesIO(response.content)) print(f"Image loaded: {img.size}") # Simple prompt prompt = "Describe this image in two sentences." print(f"Creating messages with prompt: '{prompt}'") messages = [ { "role": "user", "content": [ {"type": "image", "url": "data:image/jpeg;base64," + BytesIO(response.content).getvalue().hex()}, {"type": "text", "text": prompt}, ] }, ] print("Applying chat template...") inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) print("Generating response...") start_time = time.time() outputs = model.generate(**inputs, max_new_tokens=100) end_time = time.time() result = processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0] print(f"Generation completed in {end_time - start_time:.2f} seconds") print(f"Result: {result}") return True except Exception as e: print(f"Error in image-text generation test: {e}") import traceback print(traceback.format_exc()) return False if __name__ == "__main__": print("Starting Llama 4 tests...") # Run text generation test text_success = test_text_generation() # Run image-text generation test if text test succeeds if text_success: image_text_success = test_image_text_generation() else: print("Skipping image-text test due to text test failure") image_text_success = False # Summary print("\n=== Test Summary ===") print(f"Text Generation Test: {'SUCCESS' if text_success else 'FAILED'}") print(f"Image-Text Generation Test: {'SUCCESS' if image_text_success else 'FAILED'}") if text_success and image_text_success: print("\nAll tests passed! Your Llama 4 Scout setup is working correctly.") else: print("\nSome tests failed. Please check the error messages above.")