#!/usr/bin/env python3 """ Analyze where UNUSED interfaces are actually located in the database. """ from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS def analyze_unused_locations(): """Find where UNUSED interfaces are actually stored.""" print("Analyzing where UNUSED interfaces are located...") print("=" * 80) # Load the FAISS database FAISS_INDEX_PATH = "faiss_index" embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True) # Search for chunks that actually contain UNUSED query = "UNUSED" results_with_scores = db.similarity_search_with_score(query, k=15) print(f"Query: '{query}'") print(f"Found {len(results_with_scores)} results") print("=" * 80) for i, (doc, score) in enumerate(results_with_scores): device_name = doc.metadata.get('device_name', 'Unknown') header_path = doc.metadata.get('header_path', 'No header path') section_title = doc.metadata.get('section_title', 'No section') unused_count = doc.page_content.count('UNUSED') if unused_count > 0: # Only show chunks with UNUSED print(f"\\nResult {i+1} (Score: {score:.4f}) - {unused_count} UNUSED") print(f" Device: {device_name}") print(f" Header Path: {header_path}") print(f" Section: {section_title}") # Show where UNUSED appears in content lines = doc.page_content.split('\\n') unused_lines = [line for line in lines if 'UNUSED' in line] print(f" UNUSED interfaces found:") for line in unused_lines[:3]: # Show first 3 print(f" {line.strip()}") # Show broader context print(f" Content preview: {doc.page_content[:200]}...") print("-" * 60) print("\\n" + "=" * 80) print("Testing better queries for finding UNUSED interfaces...") # Test different queries test_queries = [ "UNUSED interface Ethernet", "Ethernet Interfaces Device Configuration UNUSED", "interface description UNUSED", "switchport access vlan 50 UNUSED" ] for query in test_queries: print(f"\\nTesting query: '{query}'") results = db.similarity_search_with_score(query, k=3) for i, (doc, score) in enumerate(results): unused_count = doc.page_content.count('UNUSED') if unused_count > 0: print(f" ✅ Result {i+1}: {unused_count} UNUSED (score: {score:.4f})") print(f" Device: {doc.metadata.get('device_name', 'Unknown')}") print(f" Section: {doc.metadata.get('section_title', 'Unknown')}") else: print(f" ❌ Result {i+1}: No UNUSED (score: {score:.4f})") if __name__ == "__main__": analyze_unused_locations()