File size: 1,677 Bytes
6129cb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import streamlit as st
from search_utils import SemanticSearch

@st.cache_resource
def init_search_system():
    search_system = SemanticSearch(shard_dir="compressed_shards")
    search_system.initialize_system()
    return search_system

st.set_page_config(page_title="Research Paper Semantic Search with FAISS", layout="wide")
search_system = init_search_system()

st.title("πŸ” Research Paper Semantic Search")
col1, col2 = st.columns([3, 1])

with col1:
    query = st.text_input("Search query:", placeholder="Enter your search...")

with col2:
    top_k = st.slider("Results count:", 1, 20, 5)
    threshold = st.slider("Similarity threshold:", 0.0, 1.0, 0.6)

if query:
    with st.spinner("Searching through documents..."):
        base_results = search_system.search(query, top_k)
        threshold_results = search_system.search_with_threshold(query, top_k, threshold)
        
    st.subheader("Top Matches")
    st.dataframe(
        base_results.style.format({'similarity': "{:.2%}"}),
        use_container_width=True,
        hide_index=True
    )
    
    st.subheader(f"Filtered (>{threshold:.0%} similarity)")
    if not threshold_results.empty:
        st.dataframe(
            threshold_results.style.format({'similarity': "{:.2%}"}),
            use_container_width=True,
            hide_index=True
        )
    else:
        st.warning("No results meet the similarity threshold")

with st.sidebar:
    st.header("System Info")
    st.markdown(f"""
    - **Loaded Shards:** {len(search_system.index_shards)}
    - **Embedding Model:** `all-MiniLM-L6-v2`
    """)
    
    if st.button("Clear Cache"):
        st.cache_resource.clear()
        st.rerun()