Spaces:
Build error
Build error
| import streamlit as st | |
| from sentence_transformers import SentenceTransformer, util | |
| import pandas as pd | |
| import numpy as np | |
| from ast import literal_eval | |
| # Load the model | |
| model_name = "./Embedder-Typosquat" | |
| model = SentenceTransformer(model_name) | |
| # Load the domains and embeddings | |
| domains_df = pd.read_csv('domains_embs.csv') | |
| domains_df.embedding = domains_df.embedding.apply(literal_eval) | |
| corpus_domains = domains_df.domain.to_list() | |
| corpus_embeddings = np.stack(domains_df.embedding.values).astype(np.float32) # Ensure embeddings are float32 | |
| # Streamlit App | |
| st.title("Mining Potential Legitimate Domains from a Typosquatted Domain") | |
| st.write("Enter a potential typosquatted domain and select the number of top results to retrieve.") | |
| # User Inputs | |
| domain = st.text_input("Potential Typosquatted Domain") | |
| top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=5, step=1) | |
| # Button to trigger search | |
| if st.button("Search for Legitimate Domains"): | |
| if domain: | |
| # Perform Semantic Search | |
| query_emb = model.encode(domain).astype(np.float32) # Ensure query embedding is also float32 | |
| semantic_res = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0] | |
| ids = [r['corpus_id'] for r in semantic_res] | |
| scores = [r['score'] for r in semantic_res] | |
| # Create a DataFrame for the results | |
| res_df = domains_df.loc[ids,'domain'].copy() | |
| res_df['score'] = scores | |
| # Display the result DataFrame | |
| st.write("Mined Domains:") | |
| st.dataframe(res_df) | |
| else: | |
| st.warning("Please enter a domain to perform the search.") | |