#!/usr/bin/env python3
"""
Analyze where UNUSED interfaces are actually located in the database.
"""

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

def analyze_unused_locations():
    """Find where UNUSED interfaces are actually stored."""
    
    print("Analyzing where UNUSED interfaces are located...")
    print("=" * 80)
    
    # Load the FAISS database
    FAISS_INDEX_PATH = "faiss_index"
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
    
    # Search for chunks that actually contain UNUSED
    query = "UNUSED"
    results_with_scores = db.similarity_search_with_score(query, k=15)
    
    print(f"Query: '{query}'")
    print(f"Found {len(results_with_scores)} results")
    print("=" * 80)
    
    for i, (doc, score) in enumerate(results_with_scores):
        device_name = doc.metadata.get('device_name', 'Unknown')
        header_path = doc.metadata.get('header_path', 'No header path')
        section_title = doc.metadata.get('section_title', 'No section')
        
        unused_count = doc.page_content.count('UNUSED')
        
        if unused_count > 0:  # Only show chunks with UNUSED
            print(f"\\nResult {i+1} (Score: {score:.4f}) - {unused_count} UNUSED")
            print(f"  Device: {device_name}")
            print(f"  Header Path: {header_path}")
            print(f"  Section: {section_title}")
            
            # Show where UNUSED appears in content
            lines = doc.page_content.split('\\n')
            unused_lines = [line for line in lines if 'UNUSED' in line]
            print(f"  UNUSED interfaces found:")
            for line in unused_lines[:3]:  # Show first 3
                print(f"    {line.strip()}")
            
            # Show broader context
            print(f"  Content preview: {doc.page_content[:200]}...")
            print("-" * 60)
    
    print("\\n" + "=" * 80)
    print("Testing better queries for finding UNUSED interfaces...")
    
    # Test different queries
    test_queries = [
        "UNUSED interface Ethernet",
        "Ethernet Interfaces Device Configuration UNUSED", 
        "interface description UNUSED",
        "switchport access vlan 50 UNUSED"
    ]
    
    for query in test_queries:
        print(f"\\nTesting query: '{query}'")
        results = db.similarity_search_with_score(query, k=3)
        for i, (doc, score) in enumerate(results):
            unused_count = doc.page_content.count('UNUSED')
            if unused_count > 0:
                print(f"  ✅ Result {i+1}: {unused_count} UNUSED (score: {score:.4f})")
                print(f"    Device: {doc.metadata.get('device_name', 'Unknown')}")
                print(f"    Section: {doc.metadata.get('section_title', 'Unknown')}")
            else:
                print(f"  ❌ Result {i+1}: No UNUSED (score: {score:.4f})")

if __name__ == "__main__":
    analyze_unused_locations()