File size: 4,532 Bytes
feb8f14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
#!/usr/bin/env python3
"""
Simple test script to verify vectorization functionality.
Run this to check if sentence-transformers is working correctly.
"""
import os
import sys
def test_vectorization():
"""Test the vectorization functionality."""
print("π§ͺ Testing vectorization functionality...")
# Test 1: Import dependencies
print("\n1. Testing imports...")
try:
import numpy as np
print("β
numpy imported successfully")
except ImportError as e:
print(f"β numpy import failed: {e}")
return False
try:
from sentence_transformers import SentenceTransformer
print("β
sentence-transformers imported successfully")
except ImportError as e:
print(f"β sentence-transformers import failed: {e}")
print("Install with: pip install sentence-transformers")
return False
# Test 2: Load model
print("\n2. Testing model loading...")
try:
model = SentenceTransformer('all-MiniLM-L6-v2')
print("β
SentenceTransformer model loaded successfully")
except Exception as e:
print(f"β Model loading failed: {e}")
return False
# Test 3: Create embeddings
print("\n3. Testing embedding creation...")
try:
test_texts = [
"This is a Python function for machine learning",
"Here's a repository configuration file",
"Installation instructions for the project"
]
embeddings = model.encode(test_texts)
print(f"β
Created embeddings with shape: {embeddings.shape}")
except Exception as e:
print(f"β Embedding creation failed: {e}")
return False
# Test 4: Test similarity calculation
print("\n4. Testing similarity calculation...")
try:
query_embedding = model.encode(["Python code example"])
similarities = []
for embedding in embeddings:
similarity = np.dot(query_embedding[0], embedding) / (
np.linalg.norm(query_embedding[0]) * np.linalg.norm(embedding)
)
similarities.append(similarity)
print(f"β
Similarity scores: {[f'{s:.3f}' for s in similarities]}")
except Exception as e:
print(f"β Similarity calculation failed: {e}")
return False
# Test 5: Test repo_explorer integration
print("\n5. Testing repo_explorer integration...")
try:
from repo_explorer import SimpleVectorStore, vectorize_repository_content
# Create test repository content
test_repo_content = """# Test Repository
import numpy as np
import pandas as pd
def main():
print("Hello, world!")
class DataProcessor:
def __init__(self):
self.data = []
def process(self, data):
return data.upper()
if __name__ == "__main__":
main()
"""
# Test vectorization
success = vectorize_repository_content(test_repo_content, "test/repo")
if success:
print("β
Repository vectorization successful")
# Test vector store
from repo_explorer import vector_store
stats = vector_store.get_stats()
print(f"β
Vector store stats: {stats}")
# Test search
results = vector_store.search("Python function", top_k=2)
if results:
print(f"β
Vector search returned {len(results)} results")
for i, (chunk, similarity, metadata) in enumerate(results):
print(f" Result {i+1}: similarity={similarity:.3f}")
else:
print("β οΈ Vector search returned no results")
else:
print("β Repository vectorization failed")
return False
except Exception as e:
print(f"β repo_explorer integration test failed: {e}")
return False
print("\nπ All tests passed! Vectorization is working correctly.")
return True
if __name__ == "__main__":
print("Repository Explorer Vectorization Test")
print("=" * 45)
success = test_vectorization()
if success:
print("\nβ
Ready to use vectorization in repo explorer!")
print(" The sentence-transformers model will be downloaded on first use.")
else:
print("\nβ Vectorization setup incomplete.")
print(" Make sure to install: pip install sentence-transformers numpy")
sys.exit(0 if success else 1) |