Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ model = SentenceTransformer(model_name)
|
|
12 |
domains_df = pd.read_csv('domains_embs.csv')
|
13 |
domains_df.embedding = domains_df.embedding.apply(literal_eval)
|
14 |
corpus_domains = domains_df.domain.to_list()
|
15 |
-
corpus_embeddings = np.stack(domains_df.embedding.values)
|
16 |
|
17 |
# Streamlit App
|
18 |
st.title("Mining Potential Legitimate Domains from a Typosquatted Domain")
|
@@ -26,7 +26,7 @@ top_k = st.number_input("Top K Results", min_value=1, max_value=len(corpus_domai
|
|
26 |
if st.button("Search for Legitimate Domains"):
|
27 |
if domain:
|
28 |
# Perform Semantic Search
|
29 |
-
query_emb = model.encode(domain)
|
30 |
semantic_res = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0]
|
31 |
ids = [r['corpus_id'] for r in semantic_res]
|
32 |
scores = [r['score'] for r in semantic_res]
|
|
|
12 |
domains_df = pd.read_csv('domains_embs.csv')
|
13 |
domains_df.embedding = domains_df.embedding.apply(literal_eval)
|
14 |
corpus_domains = domains_df.domain.to_list()
|
15 |
+
corpus_embeddings = np.stack(domains_df.embedding.values).astype(np.float32) # Ensure embeddings are float32
|
16 |
|
17 |
# Streamlit App
|
18 |
st.title("Mining Potential Legitimate Domains from a Typosquatted Domain")
|
|
|
26 |
if st.button("Search for Legitimate Domains"):
|
27 |
if domain:
|
28 |
# Perform Semantic Search
|
29 |
+
query_emb = model.encode(domain).astype(np.float32) # Ensure query embedding is also float32
|
30 |
semantic_res = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0]
|
31 |
ids = [r['corpus_id'] for r in semantic_res]
|
32 |
scores = [r['score'] for r in semantic_res]
|