QandA_Generator / app.py
oceddyyy's picture
Update app.py
a6db1b9 verified
raw
history blame
2.52 kB
import json
from transformers import pipeline
import gradio as gr
# Load question-generation and question-answering pipelines
# Use 'text2text-generation' for QG since 'e2e-qg' is not a recognized task
qg_pipeline = pipeline(
"text2text-generation",
model="valhalla/t5-small-qa-qg-hl",
tokenizer="valhalla/t5-small-qa-qg-hl"
)
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
# Simple chunking: split on paragraphs (for demo)
def split_chunks(text, max_len=200):
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
chunks = []
for p in paragraphs:
# further split long paragraphs
words = p.split()
if len(words) <= max_len:
chunks.append(p)
else:
for i in range(0, len(words), max_len):
chunk = " ".join(words[i : i + max_len])
chunks.append(chunk)
return chunks
# Conversion function
def convert_text(raw_text):
chunks = split_chunks(raw_text)
qna_list = []
for chunk in chunks:
# Generate raw Q&A pairs
try:
# The model expects a prompt prefix for QG
prompt = f"generate question: {chunk}"
outputs = qg_pipeline(prompt, max_length=64, clean_up_tokenization_spaces=True)
except Exception:
continue
for out in outputs:
question = out["generated_text"].strip()
if not question.endswith("?"):
question += "?"
# Refine answer using QA pipeline
ans = qa_pipeline({"question": question, "context": chunk})
answer = ans.get("answer", "").strip()
# Append result
qna_list.append({"question": question, "answer": answer})
# Deduplicate
unique = []
seen = set()
for qa in qna_list:
key = (qa['question'], qa['answer'])
if key not in seen:
unique.append(qa)
seen.add(key)
return json.dumps(unique, indent=2, ensure_ascii=False)
# Gradio interface
def main():
with gr.Blocks() as demo:
gr.Markdown("# Handbook Text to Q&A Converter")
input_text = gr.Textbox(lines=10, placeholder="Paste handbook text here...", label="Raw Text")
output_json = gr.Textbox(lines=10, label="Generated Q&A JSON")
convert_btn = gr.Button("Convert")
convert_btn.click(fn=convert_text, inputs=input_text, outputs=output_json)
demo.launch()
if __name__ == "__main__":
main()