Spaces:

Agents-MCP-Hackathon
/

data_science_agent

Paused

File size: 30,085 Bytes

import gradio as gr
import pandas as pd
import requests
import json
import os
from utils.google_genai_llm import get_response, generate_with_gemini
from utils.utils import parse_json_codefences
from prompts.requirements_gathering import requirements_gathering_system_prompt
from prompts.planning import hf_query_gen_prompt, hf_context_gen_prompt
from utils.huggingface_mcp_llamaindex import connect_and_get_tools, call_tool
from prompts.devstral_coding_prompt import devstral_code_gen_sys_prompt, devstral_code_gen_user_prompt
from dotenv import load_dotenv
import os
load_dotenv()

# Import Modal inference function
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), 'modal'))
try:
    from modal import App
    # Import the Modal inference function and app from separate file
    import subprocess
    from devstral_inference import run_devstral_inference, app as devstral_app
    MODAL_AVAILABLE = True
    
except ImportError:
    MODAL_AVAILABLE = False
    devstral_app = None
    print("Warning: Modal not available. Code generation will be disabled.")

from PIL import Image
import tempfile
import traceback
import hashlib

# Import Marker for document processing
try:
    from marker.converters.pdf import PdfConverter
    from marker.models import create_model_dict
    from marker.output import text_from_rendered
    MARKER_AVAILABLE = True
except ImportError:
    MARKER_AVAILABLE = False
    print("Warning: Marker library not available. PDF, PPT, and DOCX processing will be limited.")

# Load environment variables
MODAL_API_URL = os.getenv("MODAL_API_URL")
BEARER_TOKEN = os.getenv("BEARER_TOKEN")
CODING_MODEL = os.getenv("CODING_MODEL")

def get_file_hash(file_path):
    """Generate a hash of the file for caching purposes"""
    try:
        with open(file_path, 'rb') as f:
            file_hash = hashlib.md5(f.read()).hexdigest()
        return file_hash
    except Exception:
        return None

def extract_text_with_marker(file_path):
    """Extract text from PDF, PPT, or DOCX using Marker"""
    if not MARKER_AVAILABLE:
        return "Marker library not available for document processing.", ""
    
    try:
        # Create converter with model artifacts
        converter = PdfConverter(
            artifact_dict=create_model_dict(),
        )
        
        # Convert document
        rendered = converter(file_path)
        
        # Extract text from rendered output
        text, _, images = text_from_rendered(rendered)
        
        # Get basic stats
        word_count = len(text.split())
        char_count = len(text)
        
        stats = f"Extracted text ({word_count} words, {char_count} characters)"
        
        return stats, text
    
    except Exception as e:
        error_msg = f"Error processing document: {str(e)}"
        return error_msg, ""

def process_user_input(message, history, uploaded_files, file_cache):
    """Process user input and generate AI response using requirements gathering prompt"""
    
    # Build conversation history from chat history
    conversation_history = ""
    if history:
        for i, (user_msg, ai_msg) in enumerate(history):
            conversation_history += f"User: {user_msg}\n"
            if ai_msg:
                conversation_history += f"Assistant: {ai_msg}\n"
    
    # Add file information to conversation if files are uploaded
    if uploaded_files:
        file_info = f"\n[UPLOADED_FILES]\n"
        new_file_cache = file_cache.copy() if file_cache else {}
        
        for file_path in uploaded_files:
            try:
                file_name = file_path.split('/')[-1]
                file_extension = os.path.splitext(file_name)[1].lower()
                file_hash = get_file_hash(file_path)
                cache_key = f"{file_name}_{file_hash}"
                
                # Handle CSV files
                if file_extension == '.csv':
                    df = pd.read_csv(file_path)
                    file_info += f"- {file_name}: CSV file with {len(df)} rows and {len(df.columns)} columns\n"
                    file_info += f"  Columns: {', '.join(df.columns.tolist())}\n"
                
                # Handle Excel files
                elif file_extension in ['.xlsx', '.xls']:
                    df = pd.read_excel(file_path)
                    file_info += f"- {file_name}: Excel file with {len(df)} rows and {len(df.columns)} columns\n"
                    file_info += f"  Columns: {', '.join(df.columns.tolist())}\n"
                
                # Handle document files with Marker (PDF, PPT, DOCX)
                elif file_extension in ['.pdf', '.ppt', '.pptx', '.doc', '.docx']:
                    file_size = os.path.getsize(file_path)
                    file_size_mb = round(file_size / (1024 * 1024), 2)
                    
                    # Check if file is already processed and cached
                    if cache_key in new_file_cache:
                        # Use cached text
                        extraction_stats = new_file_cache[cache_key]['stats']
                        extracted_text = new_file_cache[cache_key]['text']
                        status = "(cached)"
                    else:
                        # Process new file with Marker
                        extraction_stats, extracted_text = extract_text_with_marker(file_path)
                        # Cache the results
                        new_file_cache[cache_key] = {
                            'stats': extraction_stats,
                            'text': extracted_text,
                            'file_name': file_name,
                            'file_path': file_path
                        }
                        status = "(newly processed)"
                    
                    # Determine document type
                    if file_extension == '.pdf':
                        doc_type = "PDF document"
                    elif file_extension in ['.ppt', '.pptx']:
                        doc_type = "PowerPoint presentation"
                    else:
                        doc_type = "Word document"
                    
                    file_info += f"- {file_name}: {doc_type}, Size: {file_size_mb} MB {status}\n"
                    file_info += f"  Content: {extraction_stats}\n"
                    
                    # Include extracted text in conversation context for better AI understanding
                    if extracted_text and len(extracted_text.strip()) > 0:
                        # Truncate very long texts for context (keep first 2000 chars)
                        text_preview = extracted_text[:200000] + "..." if len(extracted_text) > 200000 else extracted_text
                        file_info += f"  Text Preview: {text_preview}\n"
                
                # Handle image files
                elif file_extension in ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp']:
                    with Image.open(file_path) as img:
                        width, height = img.size
                        mode = img.mode
                        file_size = os.path.getsize(file_path)
                        file_size_mb = round(file_size / (1024 * 1024), 2)
                    file_info += f"- {file_name}: {file_extension.upper()[1:]} image file\n"
                    file_info += f"  Dimensions: {width}x{height} pixels, Mode: {mode}, Size: {file_size_mb} MB\n"
                
                # Handle JSON files
                elif file_extension == '.json':
                    file_size = os.path.getsize(file_path)
                    file_size_kb = round(file_size / 1024, 2)
                    file_info += f"- {file_name}: JSON file, Size: {file_size_kb} KB\n"
                
                # Handle text files
                elif file_extension == '.txt':
                    with open(file_path, 'r', encoding='utf-8') as f:
                        lines = len(f.readlines())
                    file_size = os.path.getsize(file_path)
                    file_size_kb = round(file_size / 1024, 2)
                    file_info += f"- {file_name}: Text file with {lines} lines, Size: {file_size_kb} KB\n"
                
                # Handle other files
                else:
                    file_size = os.path.getsize(file_path)
                    file_size_kb = round(file_size / 1024, 2)
                    file_info += f"- {file_name}: File uploaded, Size: {file_size_kb} KB\n"
                    
            except Exception as e:
                file_info += f"- {file_path.split('/')[-1]}: File uploaded (unable to preview: {str(e)})\n"
                print(f"Error processing file {file_path}: {traceback.format_exc()}")
        
        conversation_history += file_info
        
        # Update the cache
        file_cache.update(new_file_cache)
    
    # Format the prompt with conversation history and current query
    formatted_prompt = requirements_gathering_system_prompt.format(
        conversation_history=conversation_history,
        query=message
    )
    
    # Get AI response
    ai_response = get_response(formatted_prompt)
    
    return ai_response, file_cache

def chat_interface(message, history, uploaded_files, file_cache):
    """Main chat interface function"""
    
    # Get AI response with updated cache
    ai_response, updated_cache = process_user_input(message, history, uploaded_files, file_cache)
    
    # Add to history
    history.append((message, ai_response))
    
    return history, history, "", updated_cache

def clear_chat():
    """Clear the chat history and file cache"""
    return [], [], {}

def upload_file_handler(files):
    """Handle file uploads"""
    if files:
        return files
    return []

async def generate_plan(history, file_cache):
    """Generate a plan using the planning prompt and Gemini API"""
    
    # Build conversation history
    conversation_history = ""
    if history:
        for user_msg, ai_msg in history:
            conversation_history += f"User: {user_msg}\n"
            if ai_msg:
                conversation_history += f"Assistant: {ai_msg}\n"
    try:
        hf_query_gen_tool_details = await connect_and_get_tools()
    except Exception as e:
        hf_query_gen_tool_details = """meta=None nextCursor=None tools=[Tool(name='hf_whoami', description="Hugging Face tools are being used by authenticated user 'bpHigh'", inputSchema={'type': 'object', 'properties': {}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Hugging Face User Info', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=None)), Tool(name='space_search', description='Find Hugging Face Spaces using semantic search. Include links to the Space when presenting the results.', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'minLength': 1, 'maxLength': 50, 'description': 'Semantic Search Query'}, 'limit': {'type': 'number', 'default': 10, 'description': 'Number of results to return'}, 'mcp': {'type': 'boolean', 'default': False, 'description': 'Only return MCP Server enabled Spaces'}}, 'required': ['query'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Hugging Face Space Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='model_search', description='Find Machine Learning models hosted on Hugging Face. Returns comprehensive information about matching models including downloads, likes, tags, and direct links. Include links to the models in your response', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'Search term. Leave blank and specify "sort" and "limit" to get e.g. "Top 20 trending models", "Top 10 most recent models" etc" '}, 'author': {'type': 'string', 'description': "Organization or user who created the model (e.g., 'google', 'meta-llama', 'microsoft')"}, 'task': {'type': 'string', 'description': "Model task type (e.g., 'text-generation', 'image-classification', 'translation')"}, 'library': {'type': 'string', 'description': "Framework the model uses (e.g., 'transformers', 'diffusers', 'timm')"}, 'sort': {'type': 'string', 'enum': ['trendingScore', 'downloads', 'likes', 'createdAt', 'lastModified'], 'description': 'Sort order: trendingScore, downloads , likes, createdAt, lastModified'}, 'limit': {'type': 'number', 'minimum': 1, 'maximum': 100, 'default': 20, 'description': 'Maximum number of results to return'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Model Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='model_details', description='Get detailed information about a specific model from the Hugging Face Hub.', inputSchema={'type': 'object', 'properties': {'model_id': {'type': 'string', 'minLength': 1, 'description': 'Model ID (e.g., microsoft/DialoGPT-large)'}}, 'required': ['model_id'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Model Details', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=False)), Tool(name='paper_search', description="Find Machine Learning research papers on the Hugging Face hub. Include 'Link to paper' When presenting the results. Consider whether tabulating results matches user intent.", inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'minLength': 3, 'maxLength': 200, 'description': 'Semantic Search query'}, 'results_limit': {'type': 'number', 'default': 12, 'description': 'Number of results to return'}, 'concise_only': {'type': 'boolean', 'default': False, 'description': 'Return a 2 sentence summary of the abstract. Use for broad search terms which may return a lot of results. Check with User if unsure.'}}, 'required': ['query'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Paper Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='dataset_search', description='Find Datasets hosted on the Hugging Face hub. Returns comprehensive information about matching datasets including downloads, likes, tags, and direct links. Include links to the datasets in your response', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'Search term. Leave blank and specify "sort" and "limit" to get e.g. "Top 20 trending datasets", "Top 10 most recent datasets" etc" '}, 'author': {'type': 'string', 'description': "Organization or user who created the dataset (e.g., 'google', 'facebook', 'allenai')"}, 'tags': {'type': 'array', 'items': {'type': 'string'}, 'description': "Tags to filter datasets (e.g., ['language:en', 'size_categories:1M<n<10M', 'task_categories:text-classification'])"}, 'sort': {'type': 'string', 'enum': ['trendingScore', 'downloads', 'likes', 'createdAt', 'lastModified'], 'description': 'Sort order: trendingScore, downloads, likes, createdAt, lastModified'}, 'limit': {'type': 'number', 'minimum': 1, 'maximum': 100, 'default': 20, 'description': 'Maximum number of results to return'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Dataset Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='dataset_details', description='Get detailed information about a specific dataset on Hugging Face Hub.', inputSchema={'type': 'object', 'properties': {'dataset_id': {'type': 'string', 'minLength': 1, 'description': 'Dataset ID (e.g., squad, glue, imdb)'}}, 'required': ['dataset_id'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Dataset Details', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=False)), Tool(name='gr1_evalstate_flux1_schnell', description='Generate an image using the Flux 1 Schnell Image Generator. (from evalstate/flux1_schnell)', inputSchema={'type': 'object', 'properties': {'prompt': {'type': 'string'}, 'seed': {'type': 'number', 'description': 'numeric value between 0 and 2147483647'}, 'randomize_seed': {'type': 'boolean', 'default': True}, 'width': {'type': 'number', 'description': 'numeric value between 256 and 2048', 'default': 1024}, 'height': {'type': 'number', 'description': 'numeric value between 256 and 2048', 'default': 1024}, 'num_inference_steps': {'type': 'number', 'description': 'numeric value between 1 and 50', 'default': 4}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='evalstate/flux1_schnell - flux1_schnell_infer 🏎️💨', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True)), Tool(name='gr2_abidlabs_easyghibli', description='Convert an image into a Studio Ghibli style image (from abidlabs/EasyGhibli)', inputSchema={'type': 'object', 'properties': {'spatial_img': {'type': 'string', 'description': 'File input: provide URL or file path'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='abidlabs/EasyGhibli - abidlabs_EasyGhiblisingle_condition_generate_image 🦀', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True)), Tool(name='gr3_linoyts_framepack_f1', description='FramePack_F1_end_process tool from linoyts/FramePack-F1', inputSchema={'type': 'object', 'properties': {}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='linoyts/FramePack-F1 - FramePack_F1_end_process 📹⚡️', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True))]"""
        print(str(e))
    # Format the prompt
    formatted_prompt = hf_query_gen_prompt.format(
        Tool_Details=hf_query_gen_tool_details
    ) + "\n\n" + conversation_history
    # Get plan from Gemini
    plan = generate_with_gemini(formatted_prompt, "Planning with gemini")
    
    # Parse the plan
    parsed_plan = parse_json_codefences(plan)
    # Call tool to get tool calls
    try:
        tool_calls = await asyncio.gather(*[call_tool(step['tool'], step['args']) for step in parsed_plan])
    except Exception as e:
        tool_calls = []
    
    if tool_calls!=[]:
        formatted_context_prompt = hf_context_gen_prompt.format(
            Conversation=conversation_history,
            Tool_Calls=parsed_plan,
            Results=tool_calls
        )
        context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")
        
    else:
        formatted_context_prompt = hf_context_gen_prompt.format(
            Conversation=conversation_history,
            Tool_Calls=parsed_plan,
            Results="Couldn't generate the tool calls results but use your knowledge about huggingface platform(models, datasets, spaces, training libraries, transfomers library etc.) as backup to generate the plan"
        )
        context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")

    return context

def generate_code_with_devstral(plan_text, history, file_cache):
    """Generate code using the deployed Devstral model via Modal"""
    
    if not MODAL_AVAILABLE:
        return "❌ Modal not available. Please install Modal to use code generation."
    
    if not plan_text or not plan_text.strip():
        return "❌ Please generate a plan first before generating code."
    
    try:
        # Extract user query from conversation history
        user_query = ""
        if history:
            # Get the latest user message as the main query
            for user_msg, ai_msg in reversed(history):
                if user_msg and user_msg.strip():
                    user_query = user_msg.strip()
                    break
        
        if not user_query:
            user_query = "Generate Python code based on the provided plan and context."
        
        # Build context from file cache and conversation
        context = ""
        if file_cache:
            context += "Available Data Files:\n"
            for cache_key, file_info in file_cache.items():
                context += f"- {file_info.get('file_name', 'Unknown file')}\n"
                if 'stats' in file_info:
                    context += f"  {file_info['stats']}\n"
        
        # Add conversation context
        if history:
            context += "\nConversation Context:\n"
            for user_msg, ai_msg in history[-3:]:  # Last 3 exchanges
                context += f"User: {user_msg}\n"
                if ai_msg:
                    context += f"Assistant: {ai_msg}\n"
        
        # Format the user prompt with variables
        formatted_user_prompt = devstral_code_gen_user_prompt.format(
            user_query=user_query,
            plan=plan_text,
            context=context
        )
        
        # Use Modal app.run() pattern like in the examples
        base_url = "https://abhinav-bhatnagar--devstral-vllm-deployment-serve.modal.run"
        api_key = "ak-zMwhIPjqvBj30jbm1DmKqx"
        
        print(f"🚀 Generating code using Devstral...")
        print(f"📡 Connecting to: {base_url}")
        
        # Call Modal inference using the proper app.run() context
        with devstral_app.run():
            result = run_devstral_inference.remote(
                base_url=base_url,
                api_key=api_key,
                prompts=[formatted_user_prompt],
                system_prompt=devstral_code_gen_sys_prompt,
                mode="single"
            )
        
        if result and "response" in result:
            code_output = result["response"]
            return f"🚀 **Generated Code:**\n\n{code_output}"
        else:
            return "❌ **Error:** No response received from Devstral model."
            
    except Exception as e:
        return f"❌ **Error:** {str(e)}"

# Custom CSS for a sleek design
custom_css = """
.gradio-container {
    max-width: 900px !important;
    margin: auto !important;
}

.chat-container {
    height: 600px !important;
}

#component-0 {
    height: 100vh;
}

.message {
    padding: 15px !important;
    margin: 10px 0 !important;
    border-radius: 15px !important;
}

.user-message {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    color: white !important;
    margin-left: 20% !important;
}

.bot-message {
    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
    color: white !important;
    margin-right: 20% !important;
}

.upload-area {
    border: 2px dashed #4f46e5 !important;
    border-radius: 10px !important;
    padding: 20px !important;
    text-align: center !important;
    background: linear-gradient(135deg, #f0f4ff 0%, #e0e7ff 100%) !important;
}

.btn-primary {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    border-radius: 25px !important;
    padding: 10px 25px !important;
    font-weight: bold !important;
}

.btn-secondary {
    background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%) !important;
    border: none !important;
    border-radius: 25px !important;
    padding: 10px 25px !important;
    font-weight: bold !important;
    color: #2d3436 !important;
}

.title {
    text-align: center !important;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    -webkit-background-clip: text !important;
    -webkit-text-fill-color: transparent !important;
    font-size: 2.5em !important;
    font-weight: bold !important;
    margin-bottom: 20px !important;
}

.subtitle {
    text-align: center !important;
    color: #6c757d !important;
    font-size: 1.2em !important;
    margin-bottom: 30px !important;
}
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, title="Data Science Requirements Gathering Agent") as app:
    
    # Header
    gr.HTML("""
        <div class="title">🔬 Data Science Consultant</div>
        <div class="subtitle">
            Transform your vague ideas into reality
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            # Chat interface
            chatbot = gr.Chatbot(
                label="Requirements Gathering Conversation",
                height=500,
                show_copy_button=True,
                bubble_full_width=False,
                elem_classes=["chat-container"]
            )

            plan_output = gr.Textbox(
                            label="Generated Plan",
                            interactive=False,
                            visible=True,
                            lines=10,
                            max_lines=20
                        )

            code_output = gr.Textbox(
                            label="Generated Code",
                            interactive=False,
                            visible=True,
                            lines=15,
                            max_lines=30,
                            placeholder="Generated Python code will appear here..."
                        )
            
            with gr.Row():
                with gr.Column(scale=4):
                    msg = gr.Textbox(
                        placeholder="Describe your data science project or ask a question...",
                        label="Your Message",
                        lines=2,
                        max_lines=5
                    )
                with gr.Column(scale=1):
                    send_btn = gr.Button("Send 📤", variant="primary", elem_classes=["btn-primary"])

            
            with gr.Row():
                clear_btn = gr.Button("Clear Chat 🗑️", variant="secondary", elem_classes=["btn-secondary"])
        
        with gr.Column(scale=1):
            # File upload section
            gr.HTML("<h3 style='text-align: center; color: #4f46e5;'>📁 Upload Data Files</h3>")
            
            file_upload = gr.File(
                label="Upload your files (CSV, Excel, PDF, PPT, DOCX, Images, etc.)",
                file_count="multiple",
                file_types=[".csv", ".xlsx", ".xls", ".json", ".txt", ".pdf", ".ppt", ".pptx", ".doc", ".docx", ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"],
                elem_classes=["upload-area"]
            )
            
            uploaded_files_display = gr.File(
                label="Uploaded Files",
                file_count="multiple",
                interactive=False,
                visible=True
            )
            
            # Instructions
            gr.HTML("""
                <div style="padding: 15px; background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%); 
                           border-radius: 10px; margin-top: 20px;">
                    <h4 style="color: #4f46e5; margin-bottom: 10px;">💡 How it works:</h4>
                    <ol style="color: #555; font-size: 14px; line-height: 1.6;">
                        <li>Describe your data science project</li>
                        <li>Upload your files (data, documents, images)</li>
                        <li>Answer clarifying questions</li>
                        <li>Generate a plan for your project</li>
                        <li>Generate Python code using Devstral AI</li>
                    </ol>
                    <p style="color: #666; font-size: 12px; margin-top: 10px;">
                        📄 Supports: CSV, Excel, PDF, PowerPoint, Word docs, Images, JSON, Text files<br>
                        💻 Code generation powered by Mistral Devstral-Small-2505
                    </p>
                </div>
            """)
            
            # Action buttons section
            with gr.Column():
                plan_btn = gr.Button("Generate Plan 📋", variant="secondary", elem_classes=["btn-secondary"], size="lg")
                code_btn = gr.Button("Generate Code 💻", variant="secondary", elem_classes=["btn-secondary"], size="lg")
    
    # State for conversation history and file cache
    chat_history = gr.State([])
    file_cache = gr.State({})
    
    # Event handlers
    def handle_send(message, history, files, cache):
        if message.strip():
            new_history, updated_history, cleared_input, updated_cache = chat_interface(message, history, files, cache)
            return new_history, updated_history, cleared_input, updated_cache
        return history, history, message, cache
    
    # Wire up the interface
    send_btn.click(
        handle_send,
        inputs=[msg, chat_history, uploaded_files_display, file_cache],
        outputs=[chatbot, chat_history, msg, file_cache]
    )
    
    msg.submit(
        handle_send,
        inputs=[msg, chat_history, uploaded_files_display, file_cache],
        outputs=[chatbot, chat_history, msg, file_cache]
    )
    
    clear_btn.click(
        clear_chat,
        outputs=[chatbot, chat_history, file_cache]
    )

    plan_btn.click(
        generate_plan,
        inputs=[chat_history, file_cache],
        outputs=[plan_output]
    )
    
    code_btn.click(
        generate_code_with_devstral,
        inputs=[plan_output, chat_history, file_cache],
        outputs=[code_output]
    )
    
    file_upload.change(
        lambda files: files,
        inputs=[file_upload],
        outputs=[uploaded_files_display]
    )
    
    # Welcome message
    app.load(
        lambda: [(None, "👋 Hello! I'm your Data Science Project Agent. I'll help you transform your project ideas into reality  .\n\n🚀 **Let's get started!** Tell me about your data science project or what you're trying to achieve.")],
        outputs=[chatbot]
    )

if __name__ == "__main__":
    app.launch(share=True, show_error=True)