Spaces:
Sleeping
Sleeping
File size: 3,080 Bytes
890d952 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
#!/usr/bin/env python3
"""
Analyze where UNUSED interfaces are actually located in the database.
"""
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
def analyze_unused_locations():
"""Find where UNUSED interfaces are actually stored."""
print("Analyzing where UNUSED interfaces are located...")
print("=" * 80)
# Load the FAISS database
FAISS_INDEX_PATH = "faiss_index"
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
# Search for chunks that actually contain UNUSED
query = "UNUSED"
results_with_scores = db.similarity_search_with_score(query, k=15)
print(f"Query: '{query}'")
print(f"Found {len(results_with_scores)} results")
print("=" * 80)
for i, (doc, score) in enumerate(results_with_scores):
device_name = doc.metadata.get('device_name', 'Unknown')
header_path = doc.metadata.get('header_path', 'No header path')
section_title = doc.metadata.get('section_title', 'No section')
unused_count = doc.page_content.count('UNUSED')
if unused_count > 0: # Only show chunks with UNUSED
print(f"\\nResult {i+1} (Score: {score:.4f}) - {unused_count} UNUSED")
print(f" Device: {device_name}")
print(f" Header Path: {header_path}")
print(f" Section: {section_title}")
# Show where UNUSED appears in content
lines = doc.page_content.split('\\n')
unused_lines = [line for line in lines if 'UNUSED' in line]
print(f" UNUSED interfaces found:")
for line in unused_lines[:3]: # Show first 3
print(f" {line.strip()}")
# Show broader context
print(f" Content preview: {doc.page_content[:200]}...")
print("-" * 60)
print("\\n" + "=" * 80)
print("Testing better queries for finding UNUSED interfaces...")
# Test different queries
test_queries = [
"UNUSED interface Ethernet",
"Ethernet Interfaces Device Configuration UNUSED",
"interface description UNUSED",
"switchport access vlan 50 UNUSED"
]
for query in test_queries:
print(f"\\nTesting query: '{query}'")
results = db.similarity_search_with_score(query, k=3)
for i, (doc, score) in enumerate(results):
unused_count = doc.page_content.count('UNUSED')
if unused_count > 0:
print(f" ✅ Result {i+1}: {unused_count} UNUSED (score: {score:.4f})")
print(f" Device: {doc.metadata.get('device_name', 'Unknown')}")
print(f" Section: {doc.metadata.get('section_title', 'Unknown')}")
else:
print(f" ❌ Result {i+1}: No UNUSED (score: {score:.4f})")
if __name__ == "__main__":
analyze_unused_locations()
|