#!/usr/bin/env python3 """ Simple test script to verify vectorization functionality. Run this to check if sentence-transformers is working correctly. """ import os import sys def test_vectorization(): """Test the vectorization functionality.""" print("๐Ÿงช Testing vectorization functionality...") # Test 1: Import dependencies print("\n1. Testing imports...") try: import numpy as np print("โœ… numpy imported successfully") except ImportError as e: print(f"โŒ numpy import failed: {e}") return False try: from sentence_transformers import SentenceTransformer print("โœ… sentence-transformers imported successfully") except ImportError as e: print(f"โŒ sentence-transformers import failed: {e}") print("Install with: pip install sentence-transformers") return False # Test 2: Load model print("\n2. Testing model loading...") try: model = SentenceTransformer('all-MiniLM-L6-v2') print("โœ… SentenceTransformer model loaded successfully") except Exception as e: print(f"โŒ Model loading failed: {e}") return False # Test 3: Create embeddings print("\n3. Testing embedding creation...") try: test_texts = [ "This is a Python function for machine learning", "Here's a repository configuration file", "Installation instructions for the project" ] embeddings = model.encode(test_texts) print(f"โœ… Created embeddings with shape: {embeddings.shape}") except Exception as e: print(f"โŒ Embedding creation failed: {e}") return False # Test 4: Test similarity calculation print("\n4. Testing similarity calculation...") try: query_embedding = model.encode(["Python code example"]) similarities = [] for embedding in embeddings: similarity = np.dot(query_embedding[0], embedding) / ( np.linalg.norm(query_embedding[0]) * np.linalg.norm(embedding) ) similarities.append(similarity) print(f"โœ… Similarity scores: {[f'{s:.3f}' for s in similarities]}") except Exception as e: print(f"โŒ Similarity calculation failed: {e}") return False # Test 5: Test repo_explorer integration print("\n5. Testing repo_explorer integration...") try: from repo_explorer import SimpleVectorStore, vectorize_repository_content # Create test repository content test_repo_content = """# Test Repository import numpy as np import pandas as pd def main(): print("Hello, world!") class DataProcessor: def __init__(self): self.data = [] def process(self, data): return data.upper() if __name__ == "__main__": main() """ # Test vectorization success = vectorize_repository_content(test_repo_content, "test/repo") if success: print("โœ… Repository vectorization successful") # Test vector store from repo_explorer import vector_store stats = vector_store.get_stats() print(f"โœ… Vector store stats: {stats}") # Test search results = vector_store.search("Python function", top_k=2) if results: print(f"โœ… Vector search returned {len(results)} results") for i, (chunk, similarity, metadata) in enumerate(results): print(f" Result {i+1}: similarity={similarity:.3f}") else: print("โš ๏ธ Vector search returned no results") else: print("โŒ Repository vectorization failed") return False except Exception as e: print(f"โŒ repo_explorer integration test failed: {e}") return False print("\n๐ŸŽ‰ All tests passed! Vectorization is working correctly.") return True if __name__ == "__main__": print("Repository Explorer Vectorization Test") print("=" * 45) success = test_vectorization() if success: print("\nโœ… Ready to use vectorization in repo explorer!") print(" The sentence-transformers model will be downloaded on first use.") else: print("\nโŒ Vectorization setup incomplete.") print(" Make sure to install: pip install sentence-transformers numpy") sys.exit(0 if success else 1)