File size: 33,854 Bytes
685adc8
 
af36381
 
 
aad7490
506a87c
 
685adc8
a604df6
af36381
 
 
5515a5b
af36381
 
 
cd40e1b
af36381
 
 
 
 
 
cd40e1b
aad7490
685adc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc0d268
685adc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506a87c
685adc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368f482
f14b334
 
 
368f482
634f701
f14b334
 
 
 
 
 
368f482
cd40e1b
 
 
 
 
 
 
f14b334
a604df6
 
 
f14b334
368f482
634f701
f14b334
359e0bf
 
cd40e1b
a604df6
368f482
634f701
a604df6
cd40e1b
 
 
 
a604df6
cd40e1b
a604df6
be14c78
368f482
634f701
a604df6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368f482
a604df6
368f482
af36381
368f482
634f701
af36381
368f482
 
af36381
368f482
 
 
af36381
f063810
af36381
f063810
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dad441e
 
f063810
 
368f482
dc3aaaa
cd40e1b
 
 
 
 
 
 
 
 
 
 
368f482
cd40e1b
368f482
cd40e1b
368f482
 
 
 
634f701
506a87c
368f482
 
 
634f701
368f482
506a87c
368f482
 
 
 
 
 
056a335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506a87c
056a335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634f701
506a87c
056a335
506a87c
685adc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee63b9f
 
 
 
 
 
 
 
 
 
 
 
368f482
 
 
 
 
 
 
 
685adc8
 
 
 
 
 
 
07de967
685adc8
1d69af8
685adc8
04e02e0
 
685adc8
 
 
 
 
 
 
 
 
 
 
 
f14b334
506a87c
368f482
f14b334
368f482
f14b334
af36381
506a87c
368f482
af36381
368f482
506a87c
 
368f482
506a87c
368f482
af36381
685adc8
 
 
 
 
 
 
 
 
 
f14b334
685adc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af36381
 
685adc8
 
af36381
 
685adc8
 
 
af36381
 
 
 
 
506a87c
685adc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f14b334
 
 
 
 
 
685adc8
af36381
 
 
 
 
506a87c
 
 
 
 
685adc8
 
 
 
 
 
 
 
 
 
 
 
 
50c423b
7bf2603
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
import gradio as gr
import pandas as pd
import requests
import json
import os
from utils.google_genai_llm import get_response, generate_with_gemini
from utils.utils import parse_json_codefences, parse_python_codefences
from utils.code_sandbox import code_eval
from prompts.requirements_gathering import requirements_gathering_system_prompt
from prompts.planning import hf_query_gen_prompt, hf_context_gen_prompt
from prompts.devstral_coding_prompt import devstral_code_gen_sys_prompt, devstral_code_gen_user_prompt
from dotenv import load_dotenv
import os
import asyncio
load_dotenv()

try:
    import modal
    # Import the Modal inference function and app from separate file
    import subprocess
    MODAL_AVAILABLE = True
    
except ImportError:
    MODAL_AVAILABLE = False
    print("Warning: Modal not available. Code generation will be disabled.MCP Server will be disabled")

from PIL import Image
import tempfile
import traceback
import hashlib

# Import Marker for document processing
try:
    from marker.converters.pdf import PdfConverter
    from marker.models import create_model_dict
    from marker.output import text_from_rendered
    MARKER_AVAILABLE = True
except ImportError:
    MARKER_AVAILABLE = False
    print("Warning: Marker library not available. PDF, PPT, and DOCX processing will be limited.")


def get_file_hash(file_path):
    """Generate a hash of the file for caching purposes"""
    try:
        with open(file_path, 'rb') as f:
            file_hash = hashlib.md5(f.read()).hexdigest()
        return file_hash
    except Exception:
        return None

def extract_text_with_marker(file_path):
    """Extract text from PDF, PPT, or DOCX using Marker"""
    if not MARKER_AVAILABLE:
        return "Marker library not available for document processing.", ""
    
    try:
        # Create converter with model artifacts
        converter = PdfConverter(
            artifact_dict=create_model_dict(),
        )
        
        # Convert document
        rendered = converter(file_path)
        
        # Extract text from rendered output
        text, _, images = text_from_rendered(rendered)
        
        # Get basic stats
        word_count = len(text.split())
        char_count = len(text)
        
        stats = f"Extracted text ({word_count} words, {char_count} characters)"
        
        return stats, text
    
    except Exception as e:
        error_msg = f"Error processing document: {str(e)}"
        return error_msg, ""

def process_user_input(message, history, uploaded_files, file_cache):
    """Process user input and generate AI response using requirements gathering prompt"""
    
    # Build conversation history from chat history
    conversation_history = ""
    if history:
        for i, (user_msg, ai_msg) in enumerate(history):
            conversation_history += f"User: {user_msg}\n"
            if ai_msg:
                conversation_history += f"Assistant: {ai_msg}\n"
    
    # Add file information to conversation if files are uploaded
    if uploaded_files:
        file_info = f"\n[UPLOADED_FILES]\n"
        new_file_cache = file_cache.copy() if file_cache else {}
        
        for file_path in uploaded_files:
            try:
                file_name = file_path.split('/')[-1]
                file_extension = os.path.splitext(file_name)[1].lower()
                file_hash = get_file_hash(file_path)
                cache_key = f"{file_name}_{file_hash}"
                
                # Handle CSV files
                if file_extension == '.csv':
                    df = pd.read_csv(file_path)
                    file_info += f"- {file_name}: CSV file with {len(df)} rows and {len(df.columns)} columns\n"
                    file_info += f"  Columns: {', '.join(df.columns.tolist())}\n"
                
                # Handle Excel files
                elif file_extension in ['.xlsx', '.xls']:
                    df = pd.read_excel(file_path)
                    file_info += f"- {file_name}: Excel file with {len(df)} rows and {len(df.columns)} columns\n"
                    file_info += f"  Columns: {', '.join(df.columns.tolist())}\n"
                
                # Handle document files with Marker (PDF, PPT, DOCX)
                elif file_extension in ['.pdf', '.ppt', '.pptx', '.doc', '.docx']:
                    file_size = os.path.getsize(file_path)
                    file_size_mb = round(file_size / (1024 * 1024), 2)
                    
                    # Check if file is already processed and cached
                    if cache_key in new_file_cache:
                        # Use cached text
                        extraction_stats = new_file_cache[cache_key]['stats']
                        extracted_text = new_file_cache[cache_key]['text']
                        status = "(cached)"
                    else:
                        # Process new file with Marker
                        extraction_stats, extracted_text = extract_text_with_marker(file_path)
                        # Cache the results
                        new_file_cache[cache_key] = {
                            'stats': extraction_stats,
                            'text': extracted_text,
                            'file_name': file_name,
                            'file_path': file_path
                        }
                        status = "(newly processed)"
                    
                    # Determine document type
                    if file_extension == '.pdf':
                        doc_type = "PDF document"
                    elif file_extension in ['.ppt', '.pptx']:
                        doc_type = "PowerPoint presentation"
                    else:
                        doc_type = "Word document"
                    
                    file_info += f"- {file_name}: {doc_type}, Size: {file_size_mb} MB {status}\n"
                    file_info += f"  Content: {extraction_stats}\n"
                    
                    # Include extracted text in conversation context for better AI understanding
                    if extracted_text and len(extracted_text.strip()) > 0:
                        # Truncate very long texts for context (keep first 2000 chars)
                        text_preview = extracted_text[:200000] + "..." if len(extracted_text) > 200000 else extracted_text
                        file_info += f"  Text Preview: {text_preview}\n"
                
                # Handle image files
                elif file_extension in ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp']:
                    with Image.open(file_path) as img:
                        width, height = img.size
                        mode = img.mode
                        file_size = os.path.getsize(file_path)
                        file_size_mb = round(file_size / (1024 * 1024), 2)
                    file_info += f"- {file_name}: {file_extension.upper()[1:]} image file\n"
                    file_info += f"  Dimensions: {width}x{height} pixels, Mode: {mode}, Size: {file_size_mb} MB\n"
                
                # Handle JSON files
                elif file_extension == '.json':
                    file_size = os.path.getsize(file_path)
                    file_size_kb = round(file_size / 1024, 2)
                    file_info += f"- {file_name}: JSON file, Size: {file_size_kb} KB\n"
                
                # Handle text files
                elif file_extension == '.txt':
                    with open(file_path, 'r', encoding='utf-8') as f:
                        lines = len(f.readlines())
                    file_size = os.path.getsize(file_path)
                    file_size_kb = round(file_size / 1024, 2)
                    file_info += f"- {file_name}: Text file with {lines} lines, Size: {file_size_kb} KB\n"
                
                # Handle other files
                else:
                    file_size = os.path.getsize(file_path)
                    file_size_kb = round(file_size / 1024, 2)
                    file_info += f"- {file_name}: File uploaded, Size: {file_size_kb} KB\n"
                    
            except Exception as e:
                file_info += f"- {file_path.split('/')[-1]}: File uploaded (unable to preview: {str(e)})\n"
                print(f"Error processing file {file_path}: {traceback.format_exc()}")
        
        conversation_history += file_info
        
        # Update the cache
        file_cache.update(new_file_cache)
    
    # Format the prompt with conversation history and current query
    formatted_prompt = requirements_gathering_system_prompt.format(
        conversation_history=conversation_history,
        query=message
    )
    
    # Get AI response
    ai_response = generate_with_gemini(formatted_prompt, purpose="REQUIREMENTS_GATHERING")
    
    return ai_response, file_cache

def chat_interface(message, history, uploaded_files, file_cache):
    """Main chat interface function"""
    
    # Get AI response with updated cache
    ai_response, updated_cache = process_user_input(message, history, uploaded_files, file_cache)
    
    # Add to history
    history.append((message, ai_response))
    
    return history, history, "", updated_cache

def clear_chat():
    """Clear the chat history and file cache"""
    return [], [], {}

def upload_file_handler(files):
    """Handle file uploads"""
    if files:
        return files
    return []

async def generate_plan(history, file_cache):
    """Generate a plan using the planning prompt and Gemini API"""
    
    # Build conversation history
    yield "**⏳ Generating plan...** (Starting)"

    conversation_history = ""
    if history:
        for user_msg, ai_msg in history:
            conversation_history += f"User: {user_msg}\n"
            if ai_msg:
                conversation_history += f"Assistant: {ai_msg}\n"
    yield "**⏳ Generating plan...** (Getting HF MCP tools)"
    try:
        mcp_tool_func = modal.Function.from_name("HuggingFace-MCP","connect_and_get_tools")
        hf_query_gen_tool_details = mcp_tool_func.remote()
        print(hf_query_gen_tool_details)
    except Exception as e:
        hf_query_gen_tool_details = """meta=None nextCursor=None tools=[Tool(name='hf_whoami', description="Hugging Face tools are being used by authenticated user 'bpHigh'", inputSchema={'type': 'object', 'properties': {}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Hugging Face User Info', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=None)), Tool(name='space_search', description='Find Hugging Face Spaces using semantic search. Include links to the Space when presenting the results.', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'minLength': 1, 'maxLength': 50, 'description': 'Semantic Search Query'}, 'limit': {'type': 'number', 'default': 10, 'description': 'Number of results to return'}, 'mcp': {'type': 'boolean', 'default': False, 'description': 'Only return MCP Server enabled Spaces'}}, 'required': ['query'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Hugging Face Space Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='model_search', description='Find Machine Learning models hosted on Hugging Face. Returns comprehensive information about matching models including downloads, likes, tags, and direct links. Include links to the models in your response', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'Search term. Leave blank and specify "sort" and "limit" to get e.g. "Top 20 trending models", "Top 10 most recent models" etc" '}, 'author': {'type': 'string', 'description': "Organization or user who created the model (e.g., 'google', 'meta-llama', 'microsoft')"}, 'task': {'type': 'string', 'description': "Model task type (e.g., 'text-generation', 'image-classification', 'translation')"}, 'library': {'type': 'string', 'description': "Framework the model uses (e.g., 'transformers', 'diffusers', 'timm')"}, 'sort': {'type': 'string', 'enum': ['trendingScore', 'downloads', 'likes', 'createdAt', 'lastModified'], 'description': 'Sort order: trendingScore, downloads , likes, createdAt, lastModified'}, 'limit': {'type': 'number', 'minimum': 1, 'maximum': 100, 'default': 20, 'description': 'Maximum number of results to return'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Model Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='model_details', description='Get detailed information about a specific model from the Hugging Face Hub.', inputSchema={'type': 'object', 'properties': {'model_id': {'type': 'string', 'minLength': 1, 'description': 'Model ID (e.g., microsoft/DialoGPT-large)'}}, 'required': ['model_id'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Model Details', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=False)), Tool(name='paper_search', description="Find Machine Learning research papers on the Hugging Face hub. Include 'Link to paper' When presenting the results. Consider whether tabulating results matches user intent.", inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'minLength': 3, 'maxLength': 200, 'description': 'Semantic Search query'}, 'results_limit': {'type': 'number', 'default': 12, 'description': 'Number of results to return'}, 'concise_only': {'type': 'boolean', 'default': False, 'description': 'Return a 2 sentence summary of the abstract. Use for broad search terms which may return a lot of results. Check with User if unsure.'}}, 'required': ['query'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Paper Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='dataset_search', description='Find Datasets hosted on the Hugging Face hub. Returns comprehensive information about matching datasets including downloads, likes, tags, and direct links. Include links to the datasets in your response', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'Search term. Leave blank and specify "sort" and "limit" to get e.g. "Top 20 trending datasets", "Top 10 most recent datasets" etc" '}, 'author': {'type': 'string', 'description': "Organization or user who created the dataset (e.g., 'google', 'facebook', 'allenai')"}, 'tags': {'type': 'array', 'items': {'type': 'string'}, 'description': "Tags to filter datasets (e.g., ['language:en', 'size_categories:1M<n<10M', 'task_categories:text-classification'])"}, 'sort': {'type': 'string', 'enum': ['trendingScore', 'downloads', 'likes', 'createdAt', 'lastModified'], 'description': 'Sort order: trendingScore, downloads, likes, createdAt, lastModified'}, 'limit': {'type': 'number', 'minimum': 1, 'maximum': 100, 'default': 20, 'description': 'Maximum number of results to return'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Dataset Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='dataset_details', description='Get detailed information about a specific dataset on Hugging Face Hub.', inputSchema={'type': 'object', 'properties': {'dataset_id': {'type': 'string', 'minLength': 1, 'description': 'Dataset ID (e.g., squad, glue, imdb)'}}, 'required': ['dataset_id'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Dataset Details', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=False)), Tool(name='gr1_evalstate_flux1_schnell', description='Generate an image using the Flux 1 Schnell Image Generator. (from evalstate/flux1_schnell)', inputSchema={'type': 'object', 'properties': {'prompt': {'type': 'string'}, 'seed': {'type': 'number', 'description': 'numeric value between 0 and 2147483647'}, 'randomize_seed': {'type': 'boolean', 'default': True}, 'width': {'type': 'number', 'description': 'numeric value between 256 and 2048', 'default': 1024}, 'height': {'type': 'number', 'description': 'numeric value between 256 and 2048', 'default': 1024}, 'num_inference_steps': {'type': 'number', 'description': 'numeric value between 1 and 50', 'default': 4}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='evalstate/flux1_schnell - flux1_schnell_infer πŸŽοΈπŸ’¨', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True)), Tool(name='gr2_abidlabs_easyghibli', description='Convert an image into a Studio Ghibli style image (from abidlabs/EasyGhibli)', inputSchema={'type': 'object', 'properties': {'spatial_img': {'type': 'string', 'description': 'File input: provide URL or file path'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='abidlabs/EasyGhibli - abidlabs_EasyGhiblisingle_condition_generate_image πŸ¦€', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True)), Tool(name='gr3_linoyts_framepack_f1', description='FramePack_F1_end_process tool from linoyts/FramePack-F1', inputSchema={'type': 'object', 'properties': {}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='linoyts/FramePack-F1 - FramePack_F1_end_process πŸ“Ήβš‘οΈ', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True))]"""
        print(str(e))
    # Format the prompt
    formatted_prompt = hf_query_gen_prompt.format(
        Tool_Details=hf_query_gen_tool_details
    ) + "\n\n" + conversation_history
    # Get plan from Gemini
    yield "**⏳ Generating plan...** (Strategizing which tools to call)"

    plan = generate_with_gemini(formatted_prompt, "Planning with gemini")
    # Parse the plan
    parsed_plan = parse_json_codefences(plan)
    print(parsed_plan)
    # Call tool to get tool calls
    yield "**⏳ Generating plan...** (calling HF platform tools and getting data)"

    try:
        mcp_call_tool_func = modal.Function.from_name(app_name="HuggingFace-MCP",name="call_tool")
        tool_calls = []
        async for tool_call in mcp_call_tool_func.starmap.aio([(tool['tool'], tool['args']) for tool in parsed_plan]):
            tool_calls.append(tool_call)
    except Exception as e:
        print(str(e))
        tool_calls = []
    print(tool_calls)
    yield "**⏳ Generating plan...** (Generating Plan context from tool call info)"

    if tool_calls!=[]:
        formatted_context_prompt = hf_context_gen_prompt.format(
            Conversation=conversation_history,
            Tool_Calls=parsed_plan,
            Results=tool_calls
        )
        context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")
        
    else:
        formatted_context_prompt = hf_context_gen_prompt.format(
            Conversation=conversation_history,
            Tool_Calls=parsed_plan,
            Results="Couldn't generate the tool calls results but use your knowledge about huggingface platform(models, datasets, spaces, training libraries, transfomers library etc.) as backup to generate the plan"
        )
        context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")
    yield context

def generate_code_with_devstral(plan_text, history, file_cache):
    """Generate code using the deployed Devstral model via Modal"""
    yield "**⏳ Generating code...** (Starting Codegen)"

    if not MODAL_AVAILABLE:
        yield "❌ Modal not available. Please install Modal to use code generation."
        return
    
    if not plan_text or not plan_text.strip() or "**Plan will be generated here...**" in plan_text:
        yield "❌ Please generate a plan first before generating code."
        return
    
    # try:
        # Extract user query from conversation history
    user_query = ""
    if history:
        # Get the latest user message as the main query
        for user_msg, ai_msg in reversed(history):
            if user_msg and user_msg.strip():
                user_query = user_msg.strip()
                break
    
    if not user_query:
        user_query = "Generate Python code based on the provided plan and context."
    
    # Build context from file cache and conversation
    context = ""
    if file_cache:
        context += "Available Data Files:\n"
        for cache_key, file_info in file_cache.items():
            context += f"- {file_info.get('file_name', 'Unknown file')}\n"
            if 'stats' in file_info:
                context += f"  {file_info['stats']}\n"
    
    # Add conversation context
    if history:
        context += "\nConversation Context:\n"
        for user_msg, ai_msg in history[-3:]:  # Last 3 exchanges
            context += f"User: {user_msg}\n"
            if ai_msg:
                context += f"Assistant: {ai_msg}\n"
    
    # Format the user prompt with variables
    formatted_user_prompt = devstral_code_gen_user_prompt.format(
        user_query=user_query,
        plan=plan_text,
        context=context
    )
    
    # Use Modal app.run() pattern like in the examples
    
    base_url = os.getenv("DEVSTRAL_BASE_URL")
    api_key = os.getenv("DEVSTRAL_API_KEY")
    print(f"πŸš€ Generating code using Devstral...")
    print(f"πŸ“‘ Connecting to: {base_url}")
    yield "**⏳ Generating code...** (Calling Devstral VLLM API server deployed on Modal)"

    try:
        devstral_inference_func = modal.Function.from_name("devstral-inference-client", "run_devstral_inference")
        result = devstral_inference_func.remote(
            base_url=base_url,
            api_key=api_key,
            prompts=[formatted_user_prompt],
            system_prompt=devstral_code_gen_sys_prompt,
            mode="single"
        )
        if result and "response" in result:
            code_output = result["response"]
            yield f"πŸš€ **Generated Code:**\n\n{code_output}"
        else:
            yield "❌ **Error:** No response received from Devstral model."
    except Exception as e:
        yield f"❌ **Error:** {str(e)}"
def execute_code(code_output):
    """Executes Python code from a string and returns the output."""
    yield "**⏳ Executing code...** (Starting)"

    try:
        if "**Code will be generated here...**" in code_output or "Generated Code" not in code_output:
            yield "❌ Please generate code first before executing."
            return

        yield "**⏳ Executing code...** (Parsing code)"
        code = parse_python_codefences(code_output)
        
        if not code or not code.strip():
            yield "❌ No Python code found to execute."
            return
        
        yield "**⏳ Executing code...** (Running in sandbox)"
        exec_result, build_logs = code_eval(code)

        # Ensure exec_result is a dictionary
        if not isinstance(exec_result, dict):
            yield f"❌ **Error:** Unexpected execution result format.\\n\\n```\\n{str(exec_result)}\\n```"
            return
            
        return_code = exec_result.get('returncode', -1)
        stdout = exec_result.get('stdout', '')
        stderr = exec_result.get('stderr', '')
        error_msg = exec_result.get('error', 'Unknown error')

        # Build the formatted Markdown output
        formatted_output = ""
        if return_code == 0:
            formatted_output += "## βœ… Execution Successful\n"
            if stdout:
                formatted_output += f"**Output:**\n```text\n{stdout.strip()}\n```\n"
            if stderr:
                formatted_output += f"**Warnings (`stderr`):**\n```text\n{stderr.strip()}\n```\n"
        else:
            formatted_output += f"## ❌ Execution Failed (Exit Code: {return_code})\n"
            formatted_output += f"**Error:** `{error_msg}`\n\n"
            if stderr:
                formatted_output += f"**Error Log (`stderr`):**\n```text\n{stderr.strip()}\n```\n"
            if stdout:
                formatted_output += f"**Output (`stdout`):**\n```text\n{stdout.strip()}\n```\n"

        # Add build logs in a collapsible section
        if build_logs:
            formatted_output += f"""
<details>
<summary>Click to view build logs</summary>

```
{build_logs.strip()}
```
</details>
"""
        yield formatted_output

    except Exception as e:
        yield f"❌ **Error running execution logic:** {str(e)}\n\n{traceback.format_exc()}"
        
# Custom CSS for a sleek design
custom_css = """
.gradio-container {
    max-width: 900px !important;
    margin: auto !important;
}

.chat-container {
    height: 600px !important;
}

#component-0 {
    height: 100vh;
}

.message {
    padding: 15px !important;
    margin: 10px 0 !important;
    border-radius: 15px !important;
}

.user-message {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    color: white !important;
    margin-left: 20% !important;
}

.bot-message {
    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
    color: white !important;
    margin-right: 20% !important;
}

.upload-area {
    border: 2px dashed #4f46e5 !important;
    border-radius: 10px !important;
    padding: 20px !important;
    text-align: center !important;
    background: linear-gradient(135deg, #f0f4ff 0%, #e0e7ff 100%) !important;
}

.btn-primary {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    border-radius: 25px !important;
    padding: 10px 25px !important;
    font-weight: bold !important;
}

.btn-secondary {
    background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%) !important;
    border: none !important;
    border-radius: 25px !important;
    padding: 10px 25px !important;
    font-weight: bold !important;
    color: #2d3436 !important;
}

.title {
    text-align: center !important;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    -webkit-background-clip: text !important;
    -webkit-text-fill-color: transparent !important;
    font-size: 2.5em !important;
    font-weight: bold !important;
    margin-bottom: 20px !important;
}

.subtitle {
    text-align: center !important;
    color: #6c757d !important;
    font-size: 1.2em !important;
    margin-bottom: 30px !important;
}
.tools {
    text-align: center !important;
    color: #6c757d !important;
    font-size: 1.2em !important;
    margin-bottom: 30px !important;
}
.recharge {
    text-align: center !important;
    color: #2d3436 !important;
    font-size: 1.2em !important;
    margin-bottom: 30px !important;
}

.output-markdown {
    height: 250px;
    overflow-y: auto !important;
    border: 1px solid #e0e0e0;
    padding: 10px;
    border-radius: 5px;
}
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, title="Data Science Requirements Gathering Agent") as app:
    
    # Header
    gr.HTML("""
        <div class="title">πŸ”¬ Data Science Consultant</div>
        <div class="subtitle">
            Transform your vague ideas into reality
        </div>
        <div class="tools">Powered by Modal🧑 , Hugging FaceπŸ€— ,LlamaIndex πŸ¦™, Mistral AI🦾 & Sambanova πŸ§‘πŸ½β€πŸ’»</div>
        <div class="recharge">Recharged by HuggingFace-MCP https://hf.co/mcp</div>
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            # Chat interface
            chatbot = gr.Chatbot(
                label="Requirements Gathering Conversation",
                height=500,
                show_copy_button=True,
                bubble_full_width=False,
                elem_classes=["chat-container"]
            )

            plan_output = gr.Markdown(
                            "**Plan will be generated here...**",
                            label="Generated Plan",
                            elem_classes=["output-markdown"],
                        )

            code_output = gr.Markdown(
                            "**Code will be generated here...**",
                            label="Generated Code",
                            elem_classes=["output-markdown"],
                        )
            execution_output = gr.Markdown(
                            "**Execution output will be shown here...**",
                            label="Execution Output",
                            elem_classes=["output-markdown"],
                        )
            with gr.Row():
                with gr.Column(scale=4):
                    msg = gr.Textbox(
                        placeholder="Describe your data science project or ask a question...",
                        label="Your Message",
                        lines=2,
                        max_lines=5
                    )
                with gr.Column(scale=1):
                    send_btn = gr.Button("Send πŸ“€", variant="primary", elem_classes=["btn-primary"])

            
            with gr.Row():
                clear_btn = gr.Button("Clear Chat πŸ—‘οΈ", variant="secondary", elem_classes=["btn-secondary"])
        
        with gr.Column(scale=1):
            # File upload section
            gr.HTML("<h3 style='text-align: center; color: #4f46e5;'>πŸ“ Upload Data Files</h3>")
            
            file_upload = gr.File(
                label="Upload your files (CSV, Excel, PDF, PPT, DOCX, Images, etc.)",
                file_count="multiple",
                file_types=[".csv", ".xlsx", ".xls", ".json", ".txt", ".pdf", ".ppt", ".pptx", ".doc", ".docx", ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"],
                elem_classes=["upload-area"]
            )
            
            uploaded_files_display = gr.File(
                label="Uploaded Files",
                file_count="multiple",
                interactive=False,
                visible=True
            )
            
            # Instructions
            gr.HTML("""
                <div style="padding: 15px; background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%); 
                           border-radius: 10px; margin-top: 20px;">
                    <h4 style="color: #4f46e5; margin-bottom: 10px;">πŸ’‘ How it works:</h4>
                    <ol style="color: #555; font-size: 14px; line-height: 1.6;">
                        <li>Describe your data science project</li>
                        <li>Upload your files (data, documents, images)</li>
                        <li>Answer clarifying questions</li>
                        <li>Generate a plan for your project</li>
                        <li>Generate Python code using Devstral AI</li>
                    </ol>
                    <p style="color: #666; font-size: 12px; margin-top: 10px;">
                        πŸ“„ Supports: CSV, Excel, PDF, PowerPoint, Word docs, Images, JSON, Text files<br>
                        πŸ’» Code generation powered by Mistral Devstral-Small-2505
                    </p>
                </div>
            """)
            
            # Action buttons section
            with gr.Column():
                plan_btn = gr.Button("Generate Plan πŸ“‹", variant="secondary", elem_classes=["btn-secondary"], size="lg")
                code_btn = gr.Button("Generate Code πŸ’»", variant="secondary", elem_classes=["btn-secondary"], size="lg")
                execute_code_btn = gr.Button("Execute Code πŸš€", variant="primary", elem_classes=["btn-primary"], size="lg")
    
    # State for conversation history and file cache
    chat_history = gr.State([])
    file_cache = gr.State({})
    
    # Event handlers
    def handle_send(message, history, files, cache):
        if message.strip():
            new_history, updated_history, cleared_input, updated_cache = chat_interface(message, history, files, cache)
            return new_history, updated_history, cleared_input, updated_cache
        return history, history, message, cache
    
    # Wire up the interface
    send_btn.click(
        handle_send,
        inputs=[msg, chat_history, uploaded_files_display, file_cache],
        outputs=[chatbot, chat_history, msg, file_cache]
    )
    
    msg.submit(
        handle_send,
        inputs=[msg, chat_history, uploaded_files_display, file_cache],
        outputs=[chatbot, chat_history, msg, file_cache]
    )
    
    clear_btn.click(
        clear_chat,
        outputs=[chatbot, chat_history, file_cache]
    )

    plan_btn.click(
        generate_plan,
        inputs=[chat_history, file_cache],
        outputs=[plan_output]
    )
    
    code_btn.click(
        generate_code_with_devstral,
        inputs=[plan_output, chat_history, file_cache],
        outputs=[code_output]
    )
    execute_code_btn.click(
        execute_code,
        inputs=[code_output],
        outputs=[execution_output]
    )
    file_upload.change(
        lambda files: files,
        inputs=[file_upload],
        outputs=[uploaded_files_display]
    )
    
    # Welcome message
    app.load(
        lambda: [(None, "πŸ‘‹ Hello! I'm your Data Science Project Agent. I'll help you transform your project ideas into reality  .\n\nπŸš€ **Let's get started!** Tell me about your data science project or what you're trying to achieve.")],
        outputs=[chatbot]
    )

if __name__ == "__main__":
    app.queue()
    app.launch(show_api=True, ssr_mode=False, show_error=True, mcp_server=False)