#!/usr/bin/env python3 """ Startup script that builds the index if data files don't exist, then starts the FastAPI application. """ import os import subprocess import sys import logging # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def check_data_files(): """Check if all required data files exist""" required_files = [ "app/data/faiss.index", "app/data/movies.npy", "app/data/id_map.json", "app/data/movie_metadata.json" ] missing_files = [] for file_path in required_files: if not os.path.exists(file_path): missing_files.append(file_path) return missing_files def build_index(): """Run the build_index script""" logger.info("🔧 Building movie index and data files...") try: # Run build_index with reduced dataset for faster startup on HF result = subprocess.run([ sys.executable, "-m", "app.build_index", "--max-pages", "5" # Reduced for faster startup ], check=True, capture_output=True, text=True) logger.info("✅ Index built successfully!") logger.info(result.stdout) except subprocess.CalledProcessError as e: logger.error("❌ Failed to build index:") logger.error(e.stderr) raise def start_api(): """Start the FastAPI application""" logger.info("🚀 Starting FastAPI application...") os.execv(sys.executable, [ sys.executable, "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860" ]) if __name__ == "__main__": logger.info("🎬 Karl Movie Vector Backend - Starting up...") # Check if data files exist missing_files = check_data_files() if missing_files: logger.info(f"📁 Missing data files: {missing_files}") logger.info("🔄 This is the first startup - building index...") # Build the index build_index() # Verify files were created missing_after_build = check_data_files() if missing_after_build: logger.error(f"❌ Still missing files after build: {missing_after_build}") sys.exit(1) else: logger.info("✅ All data files present, skipping index build") # Start the API start_api()