oceddyyy commited on
Commit
a6db1b9
·
verified ·
1 Parent(s): 29f1b81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -3,7 +3,12 @@ from transformers import pipeline
3
  import gradio as gr
4
 
5
  # Load question-generation and question-answering pipelines
6
- qg_pipeline = pipeline("e2e-qg", model="valhalla/t5-small-qa-qg-hl")
 
 
 
 
 
7
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
8
 
9
  # Simple chunking: split on paragraphs (for demo)
@@ -28,18 +33,20 @@ def convert_text(raw_text):
28
  for chunk in chunks:
29
  # Generate raw Q&A pairs
30
  try:
31
- candidates = qg_pipeline(chunk)
 
 
32
  except Exception:
33
  continue
34
- for cand in candidates:
35
- question = cand.get("question") or cand.get("Q")
36
- if not question:
37
- continue
38
  # Refine answer using QA pipeline
39
  ans = qa_pipeline({"question": question, "context": chunk})
40
  answer = ans.get("answer", "").strip()
41
  # Append result
42
- qna_list.append({"question": question.strip(), "answer": answer})
43
  # Deduplicate
44
  unique = []
45
  seen = set()
 
3
  import gradio as gr
4
 
5
  # Load question-generation and question-answering pipelines
6
+ # Use 'text2text-generation' for QG since 'e2e-qg' is not a recognized task
7
+ qg_pipeline = pipeline(
8
+ "text2text-generation",
9
+ model="valhalla/t5-small-qa-qg-hl",
10
+ tokenizer="valhalla/t5-small-qa-qg-hl"
11
+ )
12
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
13
 
14
  # Simple chunking: split on paragraphs (for demo)
 
33
  for chunk in chunks:
34
  # Generate raw Q&A pairs
35
  try:
36
+ # The model expects a prompt prefix for QG
37
+ prompt = f"generate question: {chunk}"
38
+ outputs = qg_pipeline(prompt, max_length=64, clean_up_tokenization_spaces=True)
39
  except Exception:
40
  continue
41
+ for out in outputs:
42
+ question = out["generated_text"].strip()
43
+ if not question.endswith("?"):
44
+ question += "?"
45
  # Refine answer using QA pipeline
46
  ans = qa_pipeline({"question": question, "context": chunk})
47
  answer = ans.get("answer", "").strip()
48
  # Append result
49
+ qna_list.append({"question": question, "answer": answer})
50
  # Deduplicate
51
  unique = []
52
  seen = set()