File size: 32,773 Bytes
f80bb4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
import gradio as gr
import re
import json
import requests
import os
import tempfile
from bs4 import BeautifulSoup # For web scraping
from newspaper import Article # For smarter article extraction
from tavily import TavilyClient # For web search

# --- build_logic.py is NO LONGER a hard requirement for the research agent core ---
# We might repurpose some utility functions or remove its direct use if focusing purely on research.
# For this transformation, we'll comment out most build_logic specific interactions
# but keep parsing functions if they are general enough.

# from build_logic import (
#     create_space as build_logic_create_space,
#     _get_api_token as build_logic_get_api_token,
#     whoami as build_logic_whoami,
#     list_space_files_for_browsing,
#     get_space_repository_info,
#     get_space_file_content,
#     update_space_file,
#     parse_markdown as build_logic_parse_markdown, # May still be useful for report generation
#     delete_space_file as build_logic_delete_space_file,
#     get_space_runtime_status
# )
# print("build_logic.py related functions commented out for Research Agent mode.")
# --- End build_logic import ---


bbb = chr(96) * 3
parsed_research_outputs_cache = [] # Renamed from parsed_code_blocks_state_cache
BOT_ROLE_NAME = "assistant" # LLM's role
TOOL_ROLE_NAME = "tool" # Role for tool execution results
GROQ_API_ENDPOINT = "https://api.groq.com/openai/v1/chat/completions"
MAX_WEBPAGE_CONTENT_LENGTH = 6000 # Max characters to extract from a webpage
MAX_SEARCH_RESULTS_TO_PROCESS = 3 # Max search results to browse by default

# --- New System Prompt for Research Agent ---
DEFAULT_SYSTEM_PROMPT = f"""You are an expert AI Research Assistant. Your goal is to answer user questions and perform research tasks by intelligently using the tools available to you.

Available Tools:
1.  **`search_web`**: Use this tool to search the internet for information.
    -   Input: A JSON object with a "query" key (e.g., `{{"query": "latest advancements in AI"}}`)
2.  **`browse_web_page`**: Use this tool to get the content of a specific URL.
    -   Input: A JSON object with a "url" key (e.g., `{{"url": "https://example.com/article"}}`)

Tool Usage Instructions:
- When you need to use a tool, respond ONLY with a JSON object describing the tool call.
  Example for search:
  `{{"tool_calls": [{{"id": "call_abc123", "type": "function", "function": {{"name": "search_web", "arguments": "{{\\"query\\": \\"your search query\\"}}"}}}}]}}`
  Example for browsing a URL:
  `{{"tool_calls": [{{"id": "call_xyz789", "type": "function", "function": {{"name": "browse_web_page", "arguments": "{{\\"url\\": \\"https://www.example.com/page\\"}}"}}}}]}}`
- The `id` for the tool call should be unique for each call, e.g., "call_randomstring123".
- After you make a tool call, the system will execute it and provide you with the results. You should then use these results to formulate your answer or decide on the next step.
- If you have enough information from the conversation history or the previous tool responses to answer the user's query, provide a comprehensive answer directly.
- When providing an answer, cite your sources (URLs) if you used information from specific web pages.
- If a web search returns multiple promising links, you might need to use `browse_web_page` on a few of them to gather more detailed information. Prioritize relevant and reputable sources.
- If a webpage is too long or you cannot access it, note that in your reasoning.
- If the user's request is ambiguous, ask clarifying questions.
- The role name for your responses in the chat history must be '{BOT_ROLE_NAME}'.

Output Format for Final Answers (not tool calls):
- Provide clear, concise, and well-structured answers.
- If you are summarizing information from web pages, mention the source URLs.
- Example:
  "Based on my research:
  - Finding 1 (Source: [url1])
  - Finding 2 (Source: [url2])
  For more details, you can visit the source pages."

File/Report Generation (Optional - if you generate a structured report):
If you generate a structured text report, use this format:
### Report: research_summary.md
{bbb}markdown
# Research Topic: [User's Query]

## Key Findings:
- Point 1
- Point 2

## Detailed Information:
### [Source Title 1 (URL)]
- Summary of content from this source...

### [Source Title 2 (URL)]
- Summary of content from this source...
{bbb}
"""

# --- Core Utility, Parsing, API Call functions (some adapted) ---
def escape_html_for_markdown(text):
    if not isinstance(text, str): return ""
    return text.replace("&", "&").replace("<", "<").replace(">", ">")

# _infer_lang_from_filename might be less used, but kept for potential report formatting
def _infer_lang_from_filename(filename):
    # ... (keep existing implementation, it's fine)
    if not filename: return "plaintext"
    if '.' in filename:
        ext = filename.split('.')[-1].lower()
        mapping = {
            'py': 'python', 'js': 'javascript', 'ts': 'typescript', 'jsx': 'javascript', 'tsx': 'typescript',
            'html': 'html', 'htm': 'html', 'css': 'css', 'scss': 'scss', 'sass': 'sass', 'less': 'less',
            'json': 'json', 'xml': 'xml', 'yaml': 'yaml', 'yml': 'yaml', 'toml': 'toml',
            'md': 'markdown', 'rst': 'rst',
            'sh': 'bash', 'bash': 'bash', 'zsh': 'bash', 'bat': 'batch', 'cmd': 'batch', 'ps1': 'powershell',
            'c': 'c', 'h': 'c', 'cpp': 'cpp', 'hpp': 'cpp', 'cs': 'csharp', 'java': 'java',
            'rb': 'ruby', 'php': 'php', 'go': 'go', 'rs': 'rust', 'swift': 'swift', 'kt': 'kotlin', 'kts': 'kotlin',
            'sql': 'sql', 'dockerfile': 'docker', 'tf': 'terraform', 'hcl': 'terraform',
            'txt': 'plaintext', 'log': 'plaintext', 'ini': 'ini', 'conf': 'plaintext', 'cfg': 'plaintext',
            'csv': 'plaintext', 'tsv': 'plaintext', 'err': 'plaintext',
            '.env': 'plaintext', '.gitignore': 'plaintext', '.npmrc': 'plaintext', '.gitattributes': 'plaintext',
            'makefile': 'makefile',
        }
        return mapping.get(ext, "plaintext")
    base_filename = os.path.basename(filename)
    if base_filename == 'Dockerfile': return 'docker'
    if base_filename == 'Makefile': return 'makefile'
    if base_filename.startswith('.'): return 'plaintext'
    return "plaintext"


# _clean_filename might be less used if not parsing filenames from LLM for code
def _clean_filename(filename_line_content):
    # ... (keep existing implementation, it's fine)
    text = filename_line_content.strip()
    text = re.sub(r'[`\*_]+', '', text) # Remove markdown emphasis characters
    path_match = re.match(r'^([\w\-\.\s\/\\]+)', text) 
    if path_match:
        parts = re.split(r'\s*\(', path_match.group(1).strip(), 1)
        return parts[0].strip() if parts else ""
    backtick_match = re.search(r'`([^`]+)`', text)
    if backtick_match:
        potential_fn = backtick_match.group(1).strip()
        parts = re.split(r'\s*\(|\s{2,}', potential_fn, 1)
        cleaned_fn = parts[0].strip() if parts else ""
        cleaned_fn = cleaned_fn.strip('`\'":;,') 
        if cleaned_fn: return cleaned_fn
    parts = re.split(r'\s*\(|\s{2,}', text, 1)
    filename_candidate = parts[0].strip() if parts else text.strip()
    filename_candidate = filename_candidate.strip('`\'":;,') 
    return filename_candidate if filename_candidate else text.strip()


# _parse_chat_stream_logic: Adapting for potential structured report output from LLM
def _parse_chat_stream_logic(chat_json_string, existing_outputs_state=None):
    global parsed_research_outputs_cache
    latest_outputs_dict = {}
    if existing_outputs_state:
        for item in existing_outputs_state: latest_outputs_dict[item["filename"]] = item.copy()

    results = {"parsed_outputs": [], "preview_md": "", "error_message": None}
    try:
        ai_chat_history = json.loads(chat_json_string)
        if not isinstance(ai_chat_history, list): raise ValueError("JSON input must be a list of chat messages.")
    except json.JSONDecodeError as e: results["error_message"] = f"JSON Parsing Error: {e}."; return results
    except ValueError as e: results["error_message"] = str(e); return results

    message_obj = None
    if ai_chat_history and isinstance(ai_chat_history[-1], dict) and ai_chat_history[-1].get("role", "").lower() == BOT_ROLE_NAME:
         message_obj = ai_chat_history[-1]

    if not message_obj:
         results["parsed_outputs"] = list(latest_outputs_dict.values())
         return results

    role, content = message_obj.get("role", "").lower(), message_obj.get("content", "")

    # Check for report format
    report_pattern = re.compile(r"### Report:\s*(?P<filename_line>[^\n]+)\n```(?P<lang>[\w\.\-\+]*)\n(?P<code>[\s\S]*?)\n```")

    if role == BOT_ROLE_NAME:
        for match in report_pattern.finditer(content):
            filename = _clean_filename(match.group("filename_line"))
            if not filename: continue
            lang, code_block = match.group("lang"), match.group("code")
            item_data = {
                "filename": filename,
                "code": code_block.strip(),
                "language": (lang.strip().lower() if lang else _infer_lang_from_filename(filename)),
                "is_report": True
            }
            latest_outputs_dict[filename] = item_data # Overwrite if exists

    current_parsed_outputs = list(latest_outputs_dict.values())
    parsed_research_outputs_cache = current_parsed_outputs # Update global cache
    results["parsed_outputs"] = current_parsed_outputs
    return results

# _generate_ui_outputs_from_cache: Adapting for research reports
def _generate_ui_outputs_from_cache():
    global parsed_research_outputs_cache
    preview_md_val = "*No structured reports generated by AI yet.*"
    formatted_md_val = "# Research Agent Output\n\n*No structured reports generated yet.*"
    download_file = None

    if parsed_research_outputs_cache:
        preview_md_lines = ["## Generated Reports/Structured Outputs:"]
        main_report_content = ""
        for item in parsed_research_outputs_cache:
            if item.get("is_report"):
                preview_md_lines.append(f"\n----\n**Report:** `{escape_html_for_markdown(item['filename'])}` (Language: `{item['language']}`)\n")
                preview_md_lines.append(f"\n{bbb}{item.get('language', 'plaintext') or 'plaintext'}\n{item.get('code','')}\n{bbb}\n")
                if not main_report_content: # Take the first report as the main one for formatted output
                    main_report_content = f"# Report: {item['filename']}\n\n{bbb}{item.get('language', 'plaintext') or 'plaintext'}\n{item.get('code','')}\n{bbb}"

        preview_md_val = "\n".join(preview_md_lines)
        if main_report_content:
            formatted_md_val = main_report_content
            try:
                # Use the report filename for download if available, else generic
                report_filename_for_download = "research_report.md"
                if parsed_research_outputs_cache and parsed_research_outputs_cache[0].get("filename"):
                    report_filename_for_download = parsed_research_outputs_cache[0]["filename"]

                with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".md", prefix=report_filename_for_download.split('.')[0] + "_", encoding='utf-8') as tmpfile:
                    tmpfile.write(main_report_content); download_file = tmpfile.name
            except Exception as e: print(f"Error creating temp file for report: {e}")
        else: # if no structured report, but there's other content in cache (future use)
            formatted_md_val = "# Research Agent Output\n\n*No specific report found, showing raw cache if any.*"
            # Potentially list other non-report items here if the cache structure evolves

    return formatted_md_val, preview_md_val, gr.update(value=download_file, interactive=download_file is not None)


def _convert_gr_history_to_api_messages(system_prompt, gr_history, current_user_message=None):
    messages = [{"role": "system", "content": system_prompt}] if system_prompt else []
    for user_msg, bot_msg_or_tool_resp in gr_history:
        if user_msg: messages.append({"role": "user", "content": user_msg})
        if bot_msg_or_tool_resp:
            # Check if it's a tool call from the assistant or a tool response
            try:
                # Attempt to parse as JSON, if it's a tool_calls object from assistant
                # or a tool response object we constructed.
                potential_json = json.loads(bot_msg_or_tool_resp)
                if isinstance(potential_json, dict) and "tool_calls" in potential_json and isinstance(potential_json["tool_calls"], list):
                    # This is an assistant's message with tool calls
                    messages.append({
                        "role": BOT_ROLE_NAME,
                        "content": None, # OpenAI expects content to be null for tool_calls only message
                        "tool_calls": potential_json["tool_calls"]
                    })
                elif isinstance(potential_json, dict) and "tool_call_id" in potential_json and "role" in potential_json and potential_json["role"] == TOOL_ROLE_NAME:
                    # This is a tool response message we constructed
                    messages.append(potential_json) # Already in correct format
                else: # Not a special JSON, treat as regular bot message
                    messages.append({"role": BOT_ROLE_NAME, "content": str(bot_msg_or_tool_resp)})
            except json.JSONDecodeError: # Not JSON, treat as regular bot message
                messages.append({"role": BOT_ROLE_NAME, "content": str(bot_msg_or_tool_resp)})

    if current_user_message: messages.append({"role": "user", "content": current_user_message})
    return messages

# --- New Tool Functions ---
def search_web(query: str, tavily_api_key: str):
    """Performs a web search using Tavily API."""
    if not tavily_api_key:
        return json.dumps({"error": "Tavily API key not provided."})
    try:
        client = TavilyClient(api_key=tavily_api_key)
        response = client.search(query=query, search_depth="basic", max_results=5) # basic is often enough
        # `response` includes 'results' which is a list of dicts: {'title': ..., 'url': ..., 'content': ...}
        # We'll return the stringified JSON of results for the LLM.
        return json.dumps(response.get("results", []))
    except Exception as e:
        return json.dumps({"error": f"Tavily search failed: {str(e)}"})

def browse_web_page(url: str):
    """Fetches and extracts text content from a web page."""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # Try Newspaper3k first for cleaner article text
        try:
            article = Article(url)
            article.download(input_html=response.content) # Pass downloaded HTML
            article.parse()
            content = article.text
            if content and len(content.strip()) > 100: # If newspaper got good content
                 return json.dumps({"url": url, "content": content[:MAX_WEBPAGE_CONTENT_LENGTH]})
        except Exception as e:
            print(f"Newspaper3k failed for {url}: {e}. Falling back to BeautifulSoup.")

        # Fallback to BeautifulSoup if Newspaper3k fails or gets minimal content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Remove script and style elements
        for script_or_style in soup(["script", "style"]):
            script_or_style.decompose()
        
        text = soup.get_text(separator='\n', strip=True)
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = '\n'.join(chunk for chunk in chunks if chunk)
        
        if not text:
            return json.dumps({"url": url, "content": "[No text content found or page is primarily non-textual]"})
            
        return json.dumps({"url": url, "content": text[:MAX_WEBPAGE_CONTENT_LENGTH]})
    except requests.exceptions.RequestException as e:
        return json.dumps({"url": url, "error": f"Failed to fetch URL: {str(e)}"})
    except Exception as e:
        return json.dumps({"url": url, "error": f"Error processing page: {str(e)}"})

available_tools = {
    "search_web": search_web,
    "browse_web_page": browse_web_page,
}
# --- Main Chat Handler ---
def handle_research_chat_submit(user_message, chat_history, groq_api_key, tavily_api_key, model_select, system_prompt):
    global parsed_research_outputs_cache
    _chat_msg_in, _chat_hist, _status = "", list(chat_history), "Initializing..."
    _detected_outputs_update, _formatted_output_update, _download_btn_update = gr.update(), gr.update(), gr.update(interactive=False, value=None)

    if not user_message.strip():
        _status = "Cannot send an empty message."
        yield (user_message, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update); return

    _chat_hist.append((user_message, None))
    yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)

    effective_groq_api_key = groq_api_key or os.environ.get("GROQ_API_KEY")
    effective_tavily_api_key = tavily_api_key or os.environ.get("TAVILY_API_KEY")

    if not effective_groq_api_key:
        _chat_hist[-1] = (user_message, "Error: Groq API Key not set."); _status = "Groq API Key missing."
        yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update); return

    current_sys_prompt = system_prompt.strip() or DEFAULT_SYSTEM_PROMPT
    
    # Tool definitions for the API
    tools_for_api = [
        {
            "type": "function",
            "function": {
                "name": "search_web",
                "description": "Searches the web for a given query using Tavily.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {"type": "string", "description": "The search query."},
                    },
                    "required": ["query"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "browse_web_page",
                "description": "Fetches and extracts text content from a given URL.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "url": {"type": "string", "description": "The URL of the web page to browse."},
                    },
                    "required": ["url"],
                },
            },
        },
    ]

    # Convert current chat history for API
    # For the first message from user, history is _chat_hist[:-1] and current_user_message is user_message
    api_msgs = _convert_gr_history_to_api_messages(current_sys_prompt, _chat_hist[:-1], user_message)
    
    max_tool_iterations = 5 # Prevent infinite loops
    current_iteration = 0

    while current_iteration < max_tool_iterations:
        current_iteration += 1
        headers = {"Authorization": f"Bearer {effective_groq_api_key}", "Content-Type": "application/json"}
        payload = {"model": model_select, "messages": api_msgs, "tools": tools_for_api, "tool_choice": "auto"}

        try:
            _status = f"Waiting for {model_select} (Iteration {current_iteration})...";
            # Update chat history for streaming intermediate status to user
            if _chat_hist[-1] and _chat_hist[-1][1] is None : # If last bot message is empty (first iteration of this turn)
                 _chat_hist[-1] = (_chat_hist[-1][0], f"<i>{_status}</i>")
            else: # If there's already a bot message (e.g. tool response was added)
                 _chat_hist.append((None, f"<i>{_status}</i>"))

            yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)

            response = requests.post(GROQ_API_ENDPOINT, headers=headers, json=payload, timeout=180)
            response.raise_for_status()
            api_resp_json = response.json()

            # Clean up "Waiting..." message from history if a real response is coming
            if _chat_hist and _chat_hist[-1][1] and _chat_hist[-1][1].startswith("<i>Waiting for"):
                if _chat_hist[-1][0] is None: # It was a status-only message
                    _chat_hist.pop()
                else: # It was part of a user-bot turn
                    _chat_hist[-1] = (_chat_hist[-1][0], None) # Clear the status for now


            if not api_resp_json.get("choices") or not api_resp_json["choices"][0]:
                raise ValueError("API response missing choices.")

            message = api_resp_json["choices"][0].get("message")
            finish_reason = api_resp_json["choices"][0].get("finish_reason")

            if not message:
                raise ValueError("API response missing message object in choice.")

            # Add assistant's response (or tool call) to API message list for next potential iteration
            api_msgs.append(message)

            if message.get("tool_calls"):
                _status = "AI requested to use tools. Executing..."
                # Store the tool call request itself in chat history for visibility
                # The actual tool response will be added later.
                tool_call_request_str = json.dumps({"tool_calls": message["tool_calls"]})
                if _chat_hist[-1] and _chat_hist[-1][1] is None:
                    _chat_hist[-1] = (_chat_hist[-1][0], f"πŸ€– Requesting tools:\n```json\n{tool_call_request_str}\n```")
                else:
                    _chat_hist.append((None, f"πŸ€– Requesting tools:\n```json\n{tool_call_request_str}\n```"))
                yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)

                for tool_call in message["tool_calls"]:
                    function_name = tool_call["function"]["name"]
                    function_args = json.loads(tool_call["function"]["arguments"])
                    tool_call_id = tool_call["id"]

                    if function_name not in available_tools:
                        tool_response_content = json.dumps({"error": f"Tool '{function_name}' not found."})
                        _status = f"Error: Tool '{function_name}' not found."
                    else:
                        _status = f"Executing tool: {function_name} with args: {function_args}"
                        # Update chat history with tool execution status
                        _chat_hist.append((None, f"πŸ› οΈ Executing: {function_name}({json.dumps(function_args)})"))
                        yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)

                        tool_function = available_tools[function_name]
                        if function_name == "search_web":
                            if not effective_tavily_api_key:
                                tool_response_content = json.dumps({"error": "Tavily API key not configured by user."})
                                _status = "Error: Tavily API Key not set by user."
                            else:
                                tool_response_content = tool_function(query=function_args["query"], tavily_api_key=effective_tavily_api_key)
                        elif function_name == "browse_web_page":
                            tool_response_content = tool_function(url=function_args["url"])
                        else: # Should not happen if function_name in available_tools
                            tool_response_content = json.dumps({"error": "Unknown tool execution path."})
                    
                    # Add tool response to API message list for LLM
                    tool_response_message = {
                        "tool_call_id": tool_call_id,
                        "role": TOOL_ROLE_NAME,
                        "name": function_name,
                        "content": tool_response_content, # This is the JSON string result from the tool
                    }
                    api_msgs.append(tool_response_message)
                    
                    # Add tool response to Gradio chat history for visibility
                    # Truncate long content for display
                    display_content = tool_response_content
                    if len(display_content) > 500:
                        display_content = display_content[:500] + "... (truncated for display)"
                    _chat_hist.append((None, f"βš™οΈ Tool Result ({function_name}):\n```json\n{display_content}\n```" ))
                    yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)
                
                # If finish_reason is 'tool_calls', continue loop to let LLM process tool results
                if finish_reason == "tool_calls":
                    continue 
                else: # LLM decided to call tool AND respond in same turn (unusual for OpenAI spec but handle)
                    if message.get("content"):
                        bot_response_actual = message.get("content", "")
                        _chat_hist.append((None, bot_response_actual)) # Add the text response as well
                        break # Exit loop as LLM also provided content
                    else: # Only tool calls, continue loop
                        continue


            elif message.get("content"): # Standard text response from LLM
                bot_response_actual = message.get("content", "")
                if _chat_hist[-1] and _chat_hist[-1][1] is None :
                     _chat_hist[-1] = (_chat_hist[-1][0], bot_response_actual)
                else:
                     _chat_hist.append((None, bot_response_actual))
                _status = "AI response received."
                
                # Try to parse for structured reports
                latest_bot_message_json = json.dumps([{"role": BOT_ROLE_NAME, "content": bot_response_actual}], indent=2)
                parsing_res = _parse_chat_stream_logic(latest_bot_message_json, existing_outputs_state=parsed_research_outputs_cache)
                if parsing_res["error_message"]:
                    _status = f"Parsing Error: {parsing_res['error_message']}"
                    _detected_outputs_update = gr.Markdown(f"## Parsing Error\n`{escape_html_for_markdown(parsing_res['error_message'])}`")
                else:
                    _formatted_output_update, _detected_outputs_update, _download_btn_update = _generate_ui_outputs_from_cache()
                    _status = "Processing complete. Previews updated."

                yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)
                return # End of processing for this user message

            else: # No tool_calls and no content, unusual
                _status = "AI response was empty or malformed."
                _chat_hist.append((None, f"<i>{_status}</i>"))
                yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)
                return


        except requests.exceptions.HTTPError as e: error_msg = f"API HTTP Error: {e} - {e.response.text if e.response else 'No details'}"
        except requests.exceptions.RequestException as e: error_msg = f"API Request Error: {e}"
        except Exception as e: error_msg = f"Unexpected error in chat submit: {str(e)}"

        # Error handling for the loop
        _chat_hist.append((None, error_msg))
        _status = error_msg
        yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)
        return # Exit on error

    if current_iteration >= max_tool_iterations:
        _status = "Max tool iterations reached. AI may be in a loop."
        _chat_hist.append((None, f"<i>{_status}</i>"))
        yield (_chat_msg_in, _chat_hist, _status, _detected_outputs_update, _formatted_output_update, _download_btn_update)


# --- UI Definition ---
custom_theme = gr.themes.Base(primary_hue="teal", secondary_hue="purple", neutral_hue="zinc", text_size="sm", spacing_size="md", radius_size="sm", font=["System UI", "sans-serif"])
custom_css = """ /* ... (Your existing CSS, it's good) ... */ """ # Keep your CSS

with gr.Blocks(theme=custom_theme, css=custom_css) as demo:
    gr.Markdown("# 🌐 Internet Research Mega Agent")
    gr.Markdown("Ask questions or research topics. The AI will use web search and browsing tools to find answers.")
    with gr.Row():
        with gr.Sidebar():
            gr.Markdown("## βš™οΈ Configuration")
            with gr.Group():
                gr.Markdown("### API Keys")
                groq_api_key_input = gr.Textbox(label="Groq API Key", type="password", placeholder="gsk_...", info="Needed for LLM.")
                tavily_api_key_input = gr.Textbox(label="Tavily API Key", type="password", placeholder="tvly-...", info="Needed for web search tool.")
            with gr.Group():
                gr.Markdown("### AI Model Settings")
                groq_model_select = gr.Dropdown(label="Groq Model", choices=["mixtral-8x7b-32768", "llama3-8b-8192", "llama3-70b-8192", "gemma-7b-it"], value="llama3-70b-8192", info="Llama3-70b is recommended for better reasoning with tools.")
                groq_system_prompt_input = gr.Textbox(label="System Prompt", lines=10, value=DEFAULT_SYSTEM_PROMPT, interactive=True)

        with gr.Column(scale=3):
            gr.Markdown("## πŸ’¬ AI Research Assistant Chat")
            research_chatbot_display = gr.Chatbot(label="AI Research Chat", height=500, bubble_full_width=False, avatar_images=(None, "https://raw.githubusercontent.com/groq/groq-api-cookbook/main/groq.png"))
            with gr.Row():
                research_chat_message_input = gr.Textbox(show_label=False, placeholder="Ask your research question...", scale=7)
                research_send_chat_button = gr.Button("Send", variant="primary", scale=1, size="lg")
            research_status_output = gr.Textbox(label="Agent Status", interactive=False, lines=1, value="Ready.")
            
            gr.Markdown("---")
            with gr.Tabs():
                with gr.TabItem("πŸ“ Generated Report/Output"):
                    gr.Markdown("If the AI generates a structured report, it will appear here.")
                    formatted_research_output_display = gr.Textbox(label="Current Research Report", lines=15, interactive=True, show_copy_button=True, value="*Research reports will appear here...*")
                    download_report_button = gr.DownloadButton(label="Download Report", interactive=False, size="sm")
                with gr.TabItem("πŸ” Intermediate Outputs Preview"):
                    detected_outputs_preview = gr.Markdown(value="*Intermediate outputs or tool call details might show here...*")

    # --- Event Handlers ---
    chat_outputs = [research_chat_message_input, research_chatbot_display, research_status_output, detected_outputs_preview, formatted_research_output_display, download_report_button]
    chat_inputs = [research_chat_message_input, research_chatbot_display, groq_api_key_input, tavily_api_key_input, groq_model_select, groq_system_prompt_input]
    
    research_send_chat_button.click(fn=handle_research_chat_submit, inputs=chat_inputs, outputs=chat_outputs)
    research_chat_message_input.submit(fn=handle_research_chat_submit, inputs=chat_inputs, outputs=chat_outputs)

    # Removed Hugging Face specific buttons and their handlers:
    # - load_space_button, build_space_button, refresh_status_button
    # - file_browser_dropdown, file_content_editor, commit_message_input, update_file_button, delete_file_button
    # And their corresponding output components if they are not repurposed.

if __name__ == "__main__":
    # For local testing, you might set API keys as environment variables or directly in the script for convenience (not recommended for sharing)
    # os.environ["GROQ_API_KEY"] = "your_groq_key"
    # os.environ["TAVILY_API_KEY"] = "your_tavily_key"
    demo.launch(debug=True, share=False)