QandA_Generator / app.py
oceddyyy's picture
Update app.py
9762129 verified
raw
history blame
2.22 kB
import json
from transformers import pipeline
import gradio as gr
# Load question-generation and question-answering pipelines
qg_pipeline = pipeline("e2e-qg", model="valhalla/t5-small-qa-qg-hl")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
# Simple chunking: split on paragraphs (for demo)
def split_chunks(text, max_len=200):
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
chunks = []
for p in paragraphs:
# further split long paragraphs
words = p.split()
if len(words) <= max_len:
chunks.append(p)
else:
for i in range(0, len(words), max_len):
chunk = " ".join(words[i : i + max_len])
chunks.append(chunk)
return chunks
# Conversion function
def convert_text(raw_text):
chunks = split_chunks(raw_text)
qna_list = []
for chunk in chunks:
# Generate raw Q&A pairs
try:
candidates = qg_pipeline(chunk)
except Exception:
continue
for cand in candidates:
question = cand.get("question") or cand.get("Q")
if not question:
continue
# Refine answer using QA pipeline
ans = qa_pipeline({"question": question, "context": chunk})
answer = ans.get("answer", "").strip()
# Append result
qna_list.append({"question": question.strip(), "answer": answer})
# Deduplicate
unique = []
seen = set()
for qa in qna_list:
key = (qa['question'], qa['answer'])
if key not in seen:
unique.append(qa)
seen.add(key)
return json.dumps(unique, indent=2, ensure_ascii=False)
# Gradio interface
def main():
with gr.Blocks() as demo:
gr.Markdown("# Handbook Text to Q&A Converter")
input_text = gr.Textbox(lines=10, placeholder="Paste handbook text here...", label="Raw Text")
output_json = gr.Textbox(lines=10, label="Generated Q&A JSON")
convert_btn = gr.Button("Convert")
convert_btn.click(fn=convert_text, inputs=input_text, outputs=output_json)
demo.launch()
if __name__ == "__main__":
main()