File size: 4,532 Bytes
feb8f14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
"""
Simple test script to verify vectorization functionality.
Run this to check if sentence-transformers is working correctly.
"""

import os
import sys

def test_vectorization():
    """Test the vectorization functionality."""
    print("πŸ§ͺ Testing vectorization functionality...")
    
    # Test 1: Import dependencies
    print("\n1. Testing imports...")
    try:
        import numpy as np
        print("βœ… numpy imported successfully")
    except ImportError as e:
        print(f"❌ numpy import failed: {e}")
        return False
    
    try:
        from sentence_transformers import SentenceTransformer
        print("βœ… sentence-transformers imported successfully")
    except ImportError as e:
        print(f"❌ sentence-transformers import failed: {e}")
        print("Install with: pip install sentence-transformers")
        return False
    
    # Test 2: Load model
    print("\n2. Testing model loading...")
    try:
        model = SentenceTransformer('all-MiniLM-L6-v2')
        print("βœ… SentenceTransformer model loaded successfully")
    except Exception as e:
        print(f"❌ Model loading failed: {e}")
        return False
    
    # Test 3: Create embeddings
    print("\n3. Testing embedding creation...")
    try:
        test_texts = [
            "This is a Python function for machine learning",
            "Here's a repository configuration file",
            "Installation instructions for the project"
        ]
        embeddings = model.encode(test_texts)
        print(f"βœ… Created embeddings with shape: {embeddings.shape}")
    except Exception as e:
        print(f"❌ Embedding creation failed: {e}")
        return False
    
    # Test 4: Test similarity calculation
    print("\n4. Testing similarity calculation...")
    try:
        query_embedding = model.encode(["Python code example"])
        similarities = []
        for embedding in embeddings:
            similarity = np.dot(query_embedding[0], embedding) / (
                np.linalg.norm(query_embedding[0]) * np.linalg.norm(embedding)
            )
            similarities.append(similarity)
        print(f"βœ… Similarity scores: {[f'{s:.3f}' for s in similarities]}")
    except Exception as e:
        print(f"❌ Similarity calculation failed: {e}")
        return False
    
    # Test 5: Test repo_explorer integration
    print("\n5. Testing repo_explorer integration...")
    try:
        from repo_explorer import SimpleVectorStore, vectorize_repository_content
        
        # Create test repository content
        test_repo_content = """# Test Repository
import numpy as np
import pandas as pd

def main():
    print("Hello, world!")
    
class DataProcessor:
    def __init__(self):
        self.data = []
    
    def process(self, data):
        return data.upper()

if __name__ == "__main__":
    main()
"""
        
        # Test vectorization
        success = vectorize_repository_content(test_repo_content, "test/repo")
        if success:
            print("βœ… Repository vectorization successful")
            
            # Test vector store
            from repo_explorer import vector_store
            stats = vector_store.get_stats()
            print(f"βœ… Vector store stats: {stats}")
            
            # Test search
            results = vector_store.search("Python function", top_k=2)
            if results:
                print(f"βœ… Vector search returned {len(results)} results")
                for i, (chunk, similarity, metadata) in enumerate(results):
                    print(f"   Result {i+1}: similarity={similarity:.3f}")
            else:
                print("⚠️ Vector search returned no results")
        else:
            print("❌ Repository vectorization failed")
            return False
            
    except Exception as e:
        print(f"❌ repo_explorer integration test failed: {e}")
        return False
    
    print("\nπŸŽ‰ All tests passed! Vectorization is working correctly.")
    return True

if __name__ == "__main__":
    print("Repository Explorer Vectorization Test")
    print("=" * 45)
    
    success = test_vectorization()
    
    if success:
        print("\nβœ… Ready to use vectorization in repo explorer!")
        print("   The sentence-transformers model will be downloaded on first use.")
    else:
        print("\n❌ Vectorization setup incomplete.")
        print("   Make sure to install: pip install sentence-transformers numpy")
    
    sys.exit(0 if success else 1)