Spaces:

oceddyyy
/

QandA_Generator

Sleeping

App Files Files Community

QandA_Generator / app.py

oceddyyy

Update app.py

9762129 verified 3 months ago

raw

history blame

2.22 kB

	import json
	from transformers import pipeline
	import gradio as gr

	# Load question-generation and question-answering pipelines
	qg_pipeline = pipeline("e2e-qg", model="valhalla/t5-small-qa-qg-hl")
	qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

	# Simple chunking: split on paragraphs (for demo)
	def split_chunks(text, max_len=200):
	paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
	chunks = []
	for p in paragraphs:
	# further split long paragraphs
	words = p.split()
	if len(words) <= max_len:
	chunks.append(p)
	else:
	for i in range(0, len(words), max_len):
	chunk = " ".join(words[i : i + max_len])
	chunks.append(chunk)
	return chunks

	# Conversion function
	def convert_text(raw_text):
	chunks = split_chunks(raw_text)
	qna_list = []
	for chunk in chunks:
	# Generate raw Q&A pairs
	try:
	candidates = qg_pipeline(chunk)
	except Exception:
	continue
	for cand in candidates:
	question = cand.get("question") or cand.get("Q")
	if not question:
	continue
	# Refine answer using QA pipeline
	ans = qa_pipeline({"question": question, "context": chunk})
	answer = ans.get("answer", "").strip()
	# Append result
	qna_list.append({"question": question.strip(), "answer": answer})
	# Deduplicate
	unique = []
	seen = set()
	for qa in qna_list:
	key = (qa['question'], qa['answer'])
	if key not in seen:
	unique.append(qa)
	seen.add(key)
	return json.dumps(unique, indent=2, ensure_ascii=False)

	# Gradio interface
	def main():
	with gr.Blocks() as demo:
	gr.Markdown("# Handbook Text to Q&A Converter")
	input_text = gr.Textbox(lines=10, placeholder="Paste handbook text here...", label="Raw Text")
	output_json = gr.Textbox(lines=10, label="Generated Q&A JSON")
	convert_btn = gr.Button("Convert")
	convert_btn.click(fn=convert_text, inputs=input_text, outputs=output_json)
	demo.launch()

	if __name__ == "__main__":
	main()