HF_RepoSense / test_vectorization.py
naman1102's picture
vectorization
feb8f14
raw
history blame
4.53 kB
#!/usr/bin/env python3
"""
Simple test script to verify vectorization functionality.
Run this to check if sentence-transformers is working correctly.
"""
import os
import sys
def test_vectorization():
"""Test the vectorization functionality."""
print("πŸ§ͺ Testing vectorization functionality...")
# Test 1: Import dependencies
print("\n1. Testing imports...")
try:
import numpy as np
print("βœ… numpy imported successfully")
except ImportError as e:
print(f"❌ numpy import failed: {e}")
return False
try:
from sentence_transformers import SentenceTransformer
print("βœ… sentence-transformers imported successfully")
except ImportError as e:
print(f"❌ sentence-transformers import failed: {e}")
print("Install with: pip install sentence-transformers")
return False
# Test 2: Load model
print("\n2. Testing model loading...")
try:
model = SentenceTransformer('all-MiniLM-L6-v2')
print("βœ… SentenceTransformer model loaded successfully")
except Exception as e:
print(f"❌ Model loading failed: {e}")
return False
# Test 3: Create embeddings
print("\n3. Testing embedding creation...")
try:
test_texts = [
"This is a Python function for machine learning",
"Here's a repository configuration file",
"Installation instructions for the project"
]
embeddings = model.encode(test_texts)
print(f"βœ… Created embeddings with shape: {embeddings.shape}")
except Exception as e:
print(f"❌ Embedding creation failed: {e}")
return False
# Test 4: Test similarity calculation
print("\n4. Testing similarity calculation...")
try:
query_embedding = model.encode(["Python code example"])
similarities = []
for embedding in embeddings:
similarity = np.dot(query_embedding[0], embedding) / (
np.linalg.norm(query_embedding[0]) * np.linalg.norm(embedding)
)
similarities.append(similarity)
print(f"βœ… Similarity scores: {[f'{s:.3f}' for s in similarities]}")
except Exception as e:
print(f"❌ Similarity calculation failed: {e}")
return False
# Test 5: Test repo_explorer integration
print("\n5. Testing repo_explorer integration...")
try:
from repo_explorer import SimpleVectorStore, vectorize_repository_content
# Create test repository content
test_repo_content = """# Test Repository
import numpy as np
import pandas as pd
def main():
print("Hello, world!")
class DataProcessor:
def __init__(self):
self.data = []
def process(self, data):
return data.upper()
if __name__ == "__main__":
main()
"""
# Test vectorization
success = vectorize_repository_content(test_repo_content, "test/repo")
if success:
print("βœ… Repository vectorization successful")
# Test vector store
from repo_explorer import vector_store
stats = vector_store.get_stats()
print(f"βœ… Vector store stats: {stats}")
# Test search
results = vector_store.search("Python function", top_k=2)
if results:
print(f"βœ… Vector search returned {len(results)} results")
for i, (chunk, similarity, metadata) in enumerate(results):
print(f" Result {i+1}: similarity={similarity:.3f}")
else:
print("⚠️ Vector search returned no results")
else:
print("❌ Repository vectorization failed")
return False
except Exception as e:
print(f"❌ repo_explorer integration test failed: {e}")
return False
print("\nπŸŽ‰ All tests passed! Vectorization is working correctly.")
return True
if __name__ == "__main__":
print("Repository Explorer Vectorization Test")
print("=" * 45)
success = test_vectorization()
if success:
print("\nβœ… Ready to use vectorization in repo explorer!")
print(" The sentence-transformers model will be downloaded on first use.")
else:
print("\n❌ Vectorization setup incomplete.")
print(" Make sure to install: pip install sentence-transformers numpy")
sys.exit(0 if success else 1)