Spaces:

oceddyyy
/

QandA_Generator

Sleeping

oceddyyy commited on May 7

Commit

a6db1b9

verified ·

1 Parent(s): 29f1b81

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,12 @@ from transformers import pipeline
 import gradio as gr
 # Load question-generation and question-answering pipelines
-qg_pipeline = pipeline("e2e-qg", model="valhalla/t5-small-qa-qg-hl")
 qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
 # Simple chunking: split on paragraphs (for demo)
@@ -28,18 +33,20 @@ def convert_text(raw_text):
     for chunk in chunks:
         # Generate raw Q&A pairs
         try:
-            candidates = qg_pipeline(chunk)
         except Exception:
             continue
-        for cand in candidates:
-            question = cand.get("question") or cand.get("Q")
-            if not question:
-                continue
             # Refine answer using QA pipeline
             ans = qa_pipeline({"question": question, "context": chunk})
             answer = ans.get("answer", "").strip()
             # Append result
-            qna_list.append({"question": question.strip(), "answer": answer})
     # Deduplicate
     unique = []
     seen = set()

 import gradio as gr
 # Load question-generation and question-answering pipelines
+# Use 'text2text-generation' for QG since 'e2e-qg' is not a recognized task
+qg_pipeline = pipeline(
+    "text2text-generation",
+    model="valhalla/t5-small-qa-qg-hl",
+    tokenizer="valhalla/t5-small-qa-qg-hl"
+)
 qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
 # Simple chunking: split on paragraphs (for demo)
     for chunk in chunks:
         # Generate raw Q&A pairs
         try:
+            # The model expects a prompt prefix for QG
+            prompt = f"generate question: {chunk}"
+            outputs = qg_pipeline(prompt, max_length=64, clean_up_tokenization_spaces=True)
         except Exception:
             continue
+        for out in outputs:
+            question = out["generated_text"].strip()
+            if not question.endswith("?"):
+                question += "?"
             # Refine answer using QA pipeline
             ans = qa_pipeline({"question": question, "context": chunk})
             answer = ans.get("answer", "").strip()
             # Append result
+            qna_list.append({"question": question, "answer": answer})
     # Deduplicate
     unique = []
     seen = set()