rahideer commited on
Commit
e90cc8b
·
verified ·
1 Parent(s): 06818f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -45
app.py CHANGED
@@ -1,51 +1,53 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
- import torch
4
  import pandas as pd
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  @st.cache_resource
7
  def load_model():
8
- model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
11
- return tokenizer, model
12
 
13
- @st.cache_data
14
- def load_sample_data():
15
- # Assuming you've uploaded the CSV to Hugging Face Space `/app` directory
16
- try:
17
- df = pd.read_csv("xnli_sample.csv") # rename appropriately
18
- return df[['premise', 'hypothesis', 'label']]
19
- except FileNotFoundError:
20
- return pd.DataFrame(columns=["premise", "hypothesis", "label"])
21
-
22
- def predict_label(premise, hypothesis, tokenizer, model):
23
- inputs = tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, padding=True)
24
- with torch.no_grad():
25
- outputs = model(**inputs)
26
- probs = torch.nn.functional.softmax(outputs.logits, dim=1)
27
- label_id = torch.argmax(probs).item()
28
- labels = model.config.id2label
29
- return labels[label_id], probs[0][label_id].item()
30
-
31
- st.set_page_config(page_title="Multilingual RAG NLI App", layout="wide")
32
- st.title("Multilingual NLI with mDeBERTa-v3")
33
-
34
- tokenizer, model = load_model()
35
-
36
- st.subheader("Check Natural Language Inference")
37
- premise = st.text_input("Premise", "A man inspects the uniform of a figure in some East Asian country.")
38
- hypothesis = st.text_input("Hypothesis", "The man is sleeping.")
39
-
40
- if st.button("Predict"):
41
- label, confidence = predict_label(premise, hypothesis, tokenizer, model)
42
- st.success(f"Prediction: {label} ({confidence:.2f} confidence)")
43
-
44
- st.markdown("---")
45
- st.subheader("Explore Sample Data")
46
-
47
- df = load_sample_data()
48
- if not df.empty:
49
- st.dataframe(df.sample(10))
50
- else:
51
- st.warning("Upload a file named `xnli_sample.csv` for sample data display.")
 
1
  import streamlit as st
2
+ import zipfile
3
+ import os
4
  import pandas as pd
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
6
 
7
+ # Paths
8
+ ZIP_FILE = "xnli-multilingual-nli-dataset.zip"
9
+ EXTRACT_DIR = "extracted_data"
10
+
11
+ @st.cache_data
12
+ def extract_and_load():
13
+ if not os.path.exists(EXTRACT_DIR):
14
+ with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
15
+ zip_ref.extractall(EXTRACT_DIR)
16
+ csv_files = [f for f in os.listdir(EXTRACT_DIR) if f.endswith('.csv')]
17
+ return csv_files
18
+
19
+ # Load model and tokenizer
20
  @st.cache_resource
21
  def load_model():
22
+ tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
23
+ model = AutoModelForSequenceClassification.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
24
+ nli_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
25
+ return nli_pipeline
26
 
27
+ st.set_page_config(page_title="Multilingual NLI App", layout="centered")
28
+
29
+ st.title("🌍 Multilingual NLI (Natural Language Inference) Explorer")
30
+ st.markdown("Upload premise & hypothesis pairs or use the dataset to explore entailment, contradiction, or neutrality.")
31
+
32
+ nli_pipeline = load_model()
33
+
34
+ csv_files = extract_and_load()
35
+ selected_csv = st.selectbox("Choose a language CSV file:", csv_files)
36
+
37
+ df = pd.read_csv(os.path.join(EXTRACT_DIR, selected_csv)).dropna()
38
+ sample_df = df.sample(5).reset_index(drop=True)
39
+
40
+ st.subheader("Sample from Dataset")
41
+ st.dataframe(sample_df[['premise', 'hypothesis', 'label']])
42
+
43
+ st.subheader("🔍 Run Inference")
44
+ index = st.number_input("Select Sample Index", min_value=0, max_value=len(sample_df)-1, value=0, step=1)
45
+ premise = sample_df.loc[index, 'premise']
46
+ hypothesis = sample_df.loc[index, 'hypothesis']
47
+
48
+ st.markdown(f"**Premise:** {premise}")
49
+ st.markdown(f"**Hypothesis:** {hypothesis}")
50
+
51
+ if st.button("Run NLI Prediction"):
52
+ result = nli_pipeline(f"{premise} </s> {hypothesis}")
53
+ st.success(f"**Prediction:** {result[0]['label']} (Score: {result[0]['score']:.2f})")