""" Test script for TMDB data loading and embedding generation Run this to validate your setup before building the full index """ import os import sys import json from settings import get_settings from build_index import TMDBClient, create_composite_text, get_embeddings_batch from openai import OpenAI def test_tmdb_connection(): """Test TMDB API connection""" print("šŸ” Testing TMDB API connection...") try: settings = get_settings() tmdb_client = TMDBClient(settings.tmdb_api_key) # Test getting popular movies (just first page) movie_ids = tmdb_client.get_popular_movies(max_pages=1) if movie_ids: print(f"āœ… Successfully fetched {len(movie_ids)} movie IDs from TMDB") # Test getting details for first movie movie_data = tmdb_client.get_movie_details(movie_ids[0]) if movie_data: print(f"āœ… Successfully fetched details for movie: {movie_data.get('title', 'Unknown')}") # Test getting credits credits = tmdb_client.get_movie_credits(movie_ids[0]) if credits: print(f"āœ… Successfully fetched credits (cast: {len(credits.get('cast', []))}, crew: {len(credits.get('crew', []))})") else: print("āš ļø Could not fetch credits") return movie_data, credits else: print("āŒ Could not fetch movie details") else: print("āŒ Could not fetch movie IDs") except Exception as e: print(f"āŒ TMDB API error: {e}") return None, None def test_composite_text(movie_data, credits): """Test composite text creation""" print("\nšŸ“ Testing composite text creation...") if movie_data: # Add credits to movie data if credits: movie_data['credits'] = credits composite_text = create_composite_text(movie_data) print(f"āœ… Generated composite text ({len(composite_text)} chars)") print(f"Preview: {composite_text[:200]}...") return composite_text else: print("āŒ No movie data to test") return None def test_embeddings(composite_text): """Test embedding generation""" print("\nšŸ¤– Testing embedding generation...") if composite_text: try: settings = get_settings() openai_client = OpenAI(api_key=settings.openai_api_key) embeddings = get_embeddings_batch([composite_text], openai_client) if embeddings: embedding = embeddings[0] print(f"āœ… Generated embedding (dimension: {len(embedding)})") print(f"Sample values: {embedding[:5]}...") return embedding else: print("āŒ No embeddings generated") except Exception as e: print(f"āŒ Embedding error: {e}") else: print("āŒ No composite text to test") return None def main(): """Run all tests""" print("šŸŽ¬ Karl Movie Vector Backend - Test Suite") print("=" * 50) # Test environment variables print("šŸ”§ Checking environment variables...") try: settings = get_settings() print(f"āœ… OpenAI API key: {'sk-...' + settings.openai_api_key[-10:] if settings.openai_api_key else 'Not set'}") print(f"āœ… TMDB API key: {'...' + settings.tmdb_api_key[-10:] if settings.tmdb_api_key else 'Not set'}") except Exception as e: print(f"āŒ Settings error: {e}") print("Make sure you have a .env file with OPENAI_API_KEY and TMDB_API_KEY") return # Run tests movie_data, credits = test_tmdb_connection() composite_text = test_composite_text(movie_data, credits) embedding = test_embeddings(composite_text) print("\n" + "=" * 50) if movie_data and composite_text and embedding: print("šŸŽ‰ All tests passed! You can now run the full build:") print(" python app/build_index.py --max-pages 3") else: print("āŒ Some tests failed. Check your API keys and internet connection.") if __name__ == "__main__": main()