chat-with-avd-doc / analyze_unused.py
rogerscuall's picture
Upload folder using huggingface_hub
890d952 verified
#!/usr/bin/env python3
"""
Analyze where UNUSED interfaces are actually located in the database.
"""
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
def analyze_unused_locations():
"""Find where UNUSED interfaces are actually stored."""
print("Analyzing where UNUSED interfaces are located...")
print("=" * 80)
# Load the FAISS database
FAISS_INDEX_PATH = "faiss_index"
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
# Search for chunks that actually contain UNUSED
query = "UNUSED"
results_with_scores = db.similarity_search_with_score(query, k=15)
print(f"Query: '{query}'")
print(f"Found {len(results_with_scores)} results")
print("=" * 80)
for i, (doc, score) in enumerate(results_with_scores):
device_name = doc.metadata.get('device_name', 'Unknown')
header_path = doc.metadata.get('header_path', 'No header path')
section_title = doc.metadata.get('section_title', 'No section')
unused_count = doc.page_content.count('UNUSED')
if unused_count > 0: # Only show chunks with UNUSED
print(f"\\nResult {i+1} (Score: {score:.4f}) - {unused_count} UNUSED")
print(f" Device: {device_name}")
print(f" Header Path: {header_path}")
print(f" Section: {section_title}")
# Show where UNUSED appears in content
lines = doc.page_content.split('\\n')
unused_lines = [line for line in lines if 'UNUSED' in line]
print(f" UNUSED interfaces found:")
for line in unused_lines[:3]: # Show first 3
print(f" {line.strip()}")
# Show broader context
print(f" Content preview: {doc.page_content[:200]}...")
print("-" * 60)
print("\\n" + "=" * 80)
print("Testing better queries for finding UNUSED interfaces...")
# Test different queries
test_queries = [
"UNUSED interface Ethernet",
"Ethernet Interfaces Device Configuration UNUSED",
"interface description UNUSED",
"switchport access vlan 50 UNUSED"
]
for query in test_queries:
print(f"\\nTesting query: '{query}'")
results = db.similarity_search_with_score(query, k=3)
for i, (doc, score) in enumerate(results):
unused_count = doc.page_content.count('UNUSED')
if unused_count > 0:
print(f" βœ… Result {i+1}: {unused_count} UNUSED (score: {score:.4f})")
print(f" Device: {doc.metadata.get('device_name', 'Unknown')}")
print(f" Section: {doc.metadata.get('section_title', 'Unknown')}")
else:
print(f" ❌ Result {i+1}: No UNUSED (score: {score:.4f})")
if __name__ == "__main__":
analyze_unused_locations()