# app.py import torch from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr # Initialize variables model = None tokenizer = None device = None # Define function to load model def load_model(): global model, tokenizer, device # Use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Load the Phi-2 model model_id = "microsoft/phi-1_5" print("Loading Phi-2 model and tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" # Better device management for Spaces ) print("Model loaded successfully!") # Define inference function def generate_text(prompt, task_type, max_length=300): global model, tokenizer, device # If model hasn't been loaded yet, load it if model is None: load_model() # Set temperature based on task type temperature_map = { "Math Problem": 0.2, "Science Theory": 0.4, "Coding Question": 0.3, "Reasoning": 0.5, "Creative Writing": 0.8 } temperature = temperature_map.get(task_type, 0.5) # Enhance the prompt to request step-by-step solutions enhanced_prompt = f"{prompt}\n\nPlease provide a detailed step-by-step solution with clear reasoning." # Progress update for UI yield "Generating solution..." # Tokenize input inputs = tokenizer(enhanced_prompt, return_tensors="pt").to(device) # Generate output with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_length, temperature=temperature, do_sample=True ) # Decode response response = tokenizer.decode(outputs[0], skip_special_tokens=True) # If the response doesn't seem to include steps, add formatting for clarity if "step" not in response.lower() and len(response) > 100: # Split into paragraphs and format as steps paragraphs = [p for p in response.split('\n') if p.strip()] formatted_response = "" for i, para in enumerate(paragraphs): if i == 0 and para == enhanced_prompt: continue formatted_response += f"Step {i+1}: {para}\n\n" yield formatted_response else: yield response # Create Gradio interface with gr.Blocks(title="Phi-2 Step-by-Step Solution Generator", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🧠 Phi-2 Step-by-Step Solution Generator") gr.Markdown(""" Enter a prompt below and get detailed step-by-step solutions using Microsoft's Phi-2 model. Select the appropriate task type to optimize the model's response. """) with gr.Row(): with gr.Column(scale=2): prompt_input = gr.Textbox( label="Prompt", placeholder="Enter your question or problem here...", lines=5 ) with gr.Row(): task_type = gr.Radio( ["Math Problem", "Science Theory", "Coding Question", "Reasoning", "Creative Writing"], label="Task Type (sets optimal temperature)", value="Reasoning" ) max_length_slider = gr.Slider( minimum=100, maximum=1000, value=300, step=50, label="Maximum Output Length" ) with gr.Row(): generate_button = gr.Button( "✨ Generate Step-by-Step Solution", variant="primary", size="lg" ) clear_button = gr.Button("Clear", variant="secondary") with gr.Column(scale=3): output_text = gr.Textbox( label="Step-by-Step Solution", lines=15, show_copy_button=True ) # Examples with different task types with gr.Accordion("Example Prompts", open=False): gr.Examples( examples=[ ["Solve the quadratic equation: 2x² + 5x - 3 = 0", "Math Problem"], ["Explain how photosynthesis works in plants", "Science Theory"], ["Write a function in Python to find the Fibonacci sequence up to n terms", "Coding Question"], ["Why might increasing minimum wage have both positive and negative economic impacts?", "Reasoning"], ["Write a short story about a robot discovering emotions", "Creative Writing"] ], inputs=[prompt_input, task_type] ) # Add functionality to buttons generate_button.click( fn=generate_text, inputs=[prompt_input, task_type, max_length_slider], outputs=output_text ) # Clear functionality clear_button.click( fn=lambda: ("", "Reasoning"), inputs=[], outputs=[prompt_input, task_type] ) # Adding a note about load times gr.Markdown(""" > **Note**: The model loads when you submit your first prompt, which may take 1-2 minutes. > Subsequent generations will be much faster. """) # Launch the app if __name__ == "__main__": demo.queue().launch()