rahideer commited on
Commit
73dcc97
·
verified ·
1 Parent(s): bda4668

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import zipfile
4
+ import os
5
+ from sentence_transformers import SentenceTransformer, util
6
+ from transformers import pipeline
7
+
8
+ # Constants
9
+ ZIP_FILE = "xnli-multilingual-nli-dataset.zip"
10
+ CSV_FILE = "en_test.csv"
11
+ EXTRACT_FOLDER = "extracted_data"
12
+
13
+ # Load and extract ZIP
14
+ @st.cache_data
15
+ def extract_and_load():
16
+ if not os.path.exists(EXTRACT_FOLDER):
17
+ with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
18
+ zip_ref.extractall(EXTRACT_FOLDER)
19
+ csv_path = os.path.join(EXTRACT_FOLDER, CSV_FILE)
20
+ df = pd.read_csv(csv_path).dropna().sample(500)
21
+ return df[['premise', 'hypothesis', 'label']]
22
+
23
+ df = extract_and_load()
24
+
25
+ # Load models
26
+ nli_model = pipeline("text-classification", model="joeddav/xlm-roberta-large-xnli")
27
+ embedder = SentenceTransformer("sentence-transformers/distiluse-base-multilingual-cased-v2")
28
+
29
+ # UI
30
+ st.title("🌐 Multilingual RAG-style NLI Explorer")
31
+ st.markdown("Enter a sentence in **any language**, and the app will find a related statement from the dataset and infer their relationship.")
32
+
33
+ user_input = st.text_input("Enter your **hypothesis** (your own sentence):")
34
+
35
+ if user_input:
36
+ with st.spinner("Finding most relevant premise..."):
37
+ premise_embeddings = embedder.encode(df['premise'].tolist(), convert_to_tensor=True)
38
+ user_embedding = embedder.encode(user_input, convert_to_tensor=True)
39
+
40
+ top_hit = util.semantic_search(user_embedding, premise_embeddings, top_k=1)[0][0]
41
+ match_idx = top_hit['corpus_id']
42
+ selected_premise = df.iloc[match_idx]['premise']
43
+
44
+ st.subheader("🔍 Most Relevant Premise:")
45
+ st.write(selected_premise)
46
+
47
+ # Run NLI classification
48
+ full_input = f"{selected_premise} </s> {user_input}"
49
+ result = nli_model(full_input)[0]
50
+
51
+ st.subheader("🧠 Predicted Relationship:")
52
+ st.write(f"**{result['label']}** (confidence: {result['score']:.2f})")