Spaces:
Sleeping
Sleeping
import json | |
from transformers import pipeline | |
import gradio as gr | |
# Load question-generation and question-answering pipelines | |
qg_pipeline = pipeline("e2e-qg", model="valhalla/t5-small-qa-qg-hl") | |
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") | |
# Simple chunking: split on paragraphs (for demo) | |
def split_chunks(text, max_len=200): | |
paragraphs = [p.strip() for p in text.split("\n") if p.strip()] | |
chunks = [] | |
for p in paragraphs: | |
# further split long paragraphs | |
words = p.split() | |
if len(words) <= max_len: | |
chunks.append(p) | |
else: | |
for i in range(0, len(words), max_len): | |
chunk = " ".join(words[i : i + max_len]) | |
chunks.append(chunk) | |
return chunks | |
# Conversion function | |
def convert_text(raw_text): | |
chunks = split_chunks(raw_text) | |
qna_list = [] | |
for chunk in chunks: | |
# Generate raw Q&A pairs | |
try: | |
candidates = qg_pipeline(chunk) | |
except Exception: | |
continue | |
for cand in candidates: | |
question = cand.get("question") or cand.get("Q") | |
if not question: | |
continue | |
# Refine answer using QA pipeline | |
ans = qa_pipeline({"question": question, "context": chunk}) | |
answer = ans.get("answer", "").strip() | |
# Append result | |
qna_list.append({"question": question.strip(), "answer": answer}) | |
# Deduplicate | |
unique = [] | |
seen = set() | |
for qa in qna_list: | |
key = (qa['question'], qa['answer']) | |
if key not in seen: | |
unique.append(qa) | |
seen.add(key) | |
return json.dumps(unique, indent=2, ensure_ascii=False) | |
# Gradio interface | |
def main(): | |
with gr.Blocks() as demo: | |
gr.Markdown("# Handbook Text to Q&A Converter") | |
input_text = gr.Textbox(lines=10, placeholder="Paste handbook text here...", label="Raw Text") | |
output_json = gr.Textbox(lines=10, label="Generated Q&A JSON") | |
convert_btn = gr.Button("Convert") | |
convert_btn.click(fn=convert_text, inputs=input_text, outputs=output_json) | |
demo.launch() | |
if __name__ == "__main__": | |
main() | |