import gradio as gr from transformers import AutoModelForQuestionAnswering, AutoTokenizer import torch # Load model and tokenizer MODEL_NAME = "your-hf-username/raft-qa" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForQuestionAnswering.from_pretrained(MODEL_NAME) def answer_question(context, question): inputs = tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) start_scores, end_scores = outputs.start_logits, outputs.end_logits start_idx = torch.argmax(start_scores) end_idx = torch.argmax(end_scores) + 1 answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][start_idx:end_idx])) return answer if answer.strip() else "No answer found." # Define UI with gr.Blocks(theme="soft") as demo: gr.Markdown("# 🤖 RAFT: Retrieval-Augmented Fine-Tuning for QA") gr.Markdown("Ask a question based on the provided context and see how RAFT improves response accuracy!") with gr.Row(): context_input = gr.Textbox(lines=5, label="Context", placeholder="Enter background text here...") question_input = gr.Textbox(lines=2, label="Question", placeholder="What is the main idea?") answer_output = gr.Textbox(label="Answer", interactive=False) submit_btn = gr.Button("Generate Answer") submit_btn.click(answer_question, inputs=[context_input, question_input], outputs=answer_output) demo.launch()