|
|
|
import os |
|
import gradio as gr |
|
from google import genai |
|
|
|
from google.generative_ai import types |
|
from google.generative_ai.types import HarmCategory, HarmBlockThreshold |
|
import requests |
|
import markdownify |
|
from urllib.robotparser import RobotFileParser |
|
from urllib.parse import urlparse |
|
import traceback |
|
import json |
|
|
|
|
|
|
|
def can_crawl_url(url: str, user_agent: str = "PythonGoogleGenAIAgent/1.0") -> bool: |
|
"""Check robots.txt permissions for a URL""" |
|
|
|
if not url: |
|
print("No URL provided to can_crawl_url") |
|
return False |
|
try: |
|
parsed_url = urlparse(url) |
|
if not parsed_url.scheme or not parsed_url.netloc: |
|
print(f"Invalid URL format for robots.txt check: {url}") |
|
return False |
|
|
|
robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt" |
|
print(f"Checking robots.txt at: {robots_url} for URL: {url}") |
|
|
|
|
|
rp = RobotFileParser() |
|
rp.set_url(robots_url) |
|
rp.read() |
|
can_fetch = rp.can_fetch(user_agent, url) |
|
print(f"Can fetch {url} with agent '{user_agent}': {can_fetch}") |
|
return can_fetch |
|
except Exception as e: |
|
print(f"Error checking robots.txt for {url}: {e}") |
|
|
|
return False |
|
|
|
def load_page(url: str) -> str: |
|
""" |
|
Load webpage content as markdown. Designed to be used as a Gemini Function. |
|
Args: |
|
url: The URL of the webpage to load. |
|
Returns: |
|
Markdown content of the page or an error message. |
|
""" |
|
print(f"Attempting to load page: {url}") |
|
if not url: |
|
return "Error: No URL provided." |
|
if not url.startswith(('http://', 'https://')): |
|
return f"Error: Invalid URL scheme. Please provide http or https URL. Got: {url}" |
|
|
|
USER_AGENT = "PythonGoogleGenAIAgent/1.0 (Function Calling)" |
|
if not can_crawl_url(url, user_agent=USER_AGENT): |
|
print(f"URL {url} failed robots.txt check for agent {USER_AGENT}") |
|
return f"Error: Access denied by robots.txt for URL {url}" |
|
try: |
|
headers = {'User-Agent': USER_AGENT} |
|
response = requests.get(url, timeout=15, headers=headers, allow_redirects=True) |
|
response.raise_for_status() |
|
|
|
|
|
content_type = response.headers.get('content-type', '').lower() |
|
if 'html' not in content_type: |
|
print(f"Non-HTML content type '{content_type}' at {url}. Returning summary.") |
|
|
|
return f"Content at {url} is of type '{content_type}'. Size: {len(response.content)} bytes. Cannot convert to Markdown." |
|
|
|
|
|
MAX_CONTENT_SIZE = 1_000_000 |
|
if len(response.content) > MAX_CONTENT_SIZE: |
|
print(f"Content size {len(response.content)} exceeds limit {MAX_CONTENT_SIZE}. Truncating.") |
|
|
|
try: |
|
html_content = response.content[:MAX_CONTENT_SIZE].decode(response.apparent_encoding or 'utf-8', errors='ignore') |
|
except Exception as decode_err: |
|
print(f"Decoding error after truncation: {decode_err}. Falling back to utf-8 ignore.") |
|
html_content = response.content[:MAX_CONTENT_SIZE].decode('utf-8', errors='ignore') |
|
truncated_msg = "\n\n[Content truncated due to size limit]" |
|
else: |
|
html_content = response.text |
|
truncated_msg = "" |
|
|
|
|
|
|
|
markdown_content = markdownify.markdownify(html_content, heading_style="ATX", strip=['script', 'style'], escape_underscores=False) |
|
|
|
|
|
markdown_content = '\n'.join([line.strip() for line in markdown_content.splitlines() if line.strip()]) |
|
|
|
print(f"Successfully loaded and converted {url} to markdown.") |
|
|
|
return f"Content from {url}:\n\n" + markdown_content + truncated_msg |
|
|
|
except requests.exceptions.Timeout: |
|
print(f"Timeout error loading page: {url}") |
|
return f"Error: Timeout while trying to load {url}" |
|
except requests.exceptions.RequestException as e: |
|
print(f"Request error loading page {url}: {str(e)}") |
|
return f"Error loading page {url}: {str(e)}" |
|
except Exception as e: |
|
print(f"General error loading page {url}: {str(e)}") |
|
traceback.print_exc() |
|
return f"Error loading page {url}: An unexpected error occurred ({type(e).__name__})." |
|
|
|
|
|
|
|
try: |
|
api_key = os.environ.get("GEMINI_API_KEY") |
|
if not api_key: |
|
raise ValueError("GEMINI_API_KEY environment variable not set.") |
|
genai.configure(api_key=api_key) |
|
|
|
|
|
MODEL_NAME = "gemini-2.5-pro-exp-03-25" |
|
print(f"Attempting to use EXPERIMENTAL model: {MODEL_NAME}") |
|
|
|
|
|
browse_tool = types.Tool( |
|
function_declarations=[ |
|
types.FunctionDeclaration( |
|
name='load_page', |
|
description='Fetches the content of a specific web page URL as Markdown text. Use this when the user asks for information from a specific URL they provide, or when you need to look up live information mentioned alongside a specific source URL.', |
|
parameters=types.Schema( |
|
type=types.Type.OBJECT, |
|
properties={ |
|
'url': types.Schema(type=types.Type.STRING, description="The *full* URL of the webpage to load (must start with http:// or https://).") |
|
}, |
|
required=['url'] |
|
) |
|
) |
|
] |
|
) |
|
|
|
|
|
code_execution_tool = types.Tool(code_execution=types.ToolCodeExecution()) |
|
|
|
|
|
tools = [browse_tool, code_execution_tool] |
|
|
|
|
|
model = genai.GenerativeModel( |
|
model_name=MODEL_NAME, |
|
tools=tools, |
|
|
|
|
|
|
|
safety_settings={ |
|
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, |
|
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, |
|
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, |
|
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, |
|
|
|
}, |
|
|
|
system_instruction="You are a helpful AI assistant called Gemini-Toolkit. You can browse specific web pages provided by the user via the 'load_page' tool. You can also execute Python code using the 'code_execution' tool to perform calculations, analyze data, or demonstrate programming concepts. Explain your reasoning and the steps you take. If asked to browse, confirm the URL you are accessing. If providing code, explain what it does.", |
|
) |
|
print(f"Gemini client initialized with model: {MODEL_NAME} and tools.") |
|
|
|
except Exception as e: |
|
print(f"CRITICAL ERROR: Error initializing Gemini client: {e}") |
|
traceback.print_exc() |
|
|
|
model = None |
|
tools = [] |
|
|
|
|
|
|
|
|
|
|
|
def handle_function_call(function_call): |
|
"""Executes the function call requested by the model.""" |
|
function_name = function_call.name |
|
args = function_call.args |
|
|
|
print(f"Executing Function Call: {function_name} with args: {dict(args)}") |
|
|
|
try: |
|
if function_name == 'load_page': |
|
url = args.get('url') |
|
if url: |
|
|
|
function_response_content = load_page(url=url) |
|
|
|
MAX_RESPONSE_LEN = 50000 |
|
if len(function_response_content) > MAX_RESPONSE_LEN: |
|
print(f"Tool Response truncated from {len(function_response_content)} to {MAX_RESPONSE_LEN} chars.") |
|
function_response_content = function_response_content[:MAX_RESPONSE_LEN] + "\n\n[... Tool Response Truncated Due to Size Limit ...]" |
|
else: |
|
function_response_content = "Error: URL parameter was missing in the function call. Please ensure the 'url' argument is provided." |
|
else: |
|
|
|
print(f"Error: Received call for unknown function '{function_name}'") |
|
function_response_content = f"Error: Unknown function '{function_name}' called by the model." |
|
|
|
|
|
|
|
function_response_part = types.Part( |
|
function_response=types.FunctionResponse( |
|
name=function_name, |
|
response={'content': function_response_content} |
|
) |
|
) |
|
print(f"Function Response generated for {function_name}") |
|
return function_response_part |
|
|
|
except Exception as e: |
|
print(f"Error during execution of function '{function_name}': {e}") |
|
traceback.print_exc() |
|
|
|
return types.Part( |
|
function_response=types.FunctionResponse( |
|
name=function_name, |
|
response={'error': f"Failed to execute function {function_name}: {str(e)}"} |
|
) |
|
) |
|
|
|
def generate_response_with_tools(user_input, history_state): |
|
"""Handles user input, interacts with Gemini (incl. tools), and manages history.""" |
|
if not model: |
|
|
|
return "Error: The AI model (Gemini) could not be initialized. Please check the logs or API key configuration.", history_state or [] |
|
|
|
if not user_input.strip(): |
|
|
|
|
|
|
|
|
|
return [[None, "Please enter a valid query."]], history_state or [] |
|
|
|
|
|
|
|
|
|
|
|
conversation_history = history_state if isinstance(history_state, list) else [] |
|
|
|
|
|
conversation_history.append(types.Content(role="user", parts=[types.Part.from_text(user_input)])) |
|
print(f"\n--- Sending to Gemini (History length: {len(conversation_history)}) ---") |
|
|
|
|
|
|
|
MAX_HISTORY_TURNS = 10 |
|
max_history_items = MAX_HISTORY_TURNS * 2 + (1 if conversation_history and conversation_history[0].role == "system" else 0) |
|
|
|
if len(conversation_history) > max_history_items: |
|
print(f"Trimming conversation history from {len(conversation_history)} items to ~{max_history_items}") |
|
if conversation_history[0].role == "system": |
|
|
|
conversation_history = [conversation_history[0]] + conversation_history[-(max_history_items-1):] |
|
else: |
|
|
|
conversation_history = conversation_history[-max_history_items:] |
|
|
|
|
|
|
|
MAX_TOOL_LOOPS = 5 |
|
loop_count = 0 |
|
current_history_for_api = list(conversation_history) |
|
|
|
try: |
|
while loop_count < MAX_TOOL_LOOPS: |
|
loop_count += 1 |
|
print(f"Generation loop {loop_count}/{MAX_TOOL_LOOPS}...") |
|
|
|
|
|
|
|
response = model.generate_content( |
|
current_history_for_api, |
|
request_options={"timeout": 120}, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
if not response.candidates: |
|
print("Warning: No candidates received from Gemini.") |
|
|
|
final_bot_message = "[No response generated by the model.]" |
|
current_history_for_api.append(types.Content(role="model", parts=[types.Part.from_text(final_bot_message)])) |
|
break |
|
|
|
candidate = response.candidates[0] |
|
|
|
|
|
if candidate.finish_reason not in (types.Candidate.FinishReason.STOP, types.Candidate.FinishReason.TOOL_CALL): |
|
print(f"Warning: Generation stopped unexpectedly. Reason: {candidate.finish_reason.name}") |
|
|
|
stop_reason_msg = f"[Model stopped generating. Reason: {candidate.finish_reason.name}]" |
|
|
|
if candidate.content and candidate.content.parts and any(p.text for p in candidate.content.parts): |
|
current_history_for_api.append(candidate.content) |
|
|
|
final_bot_message = "".join([p.text for p in candidate.content.parts if p.text]) + f"\n{stop_reason_msg}" |
|
else: |
|
|
|
final_bot_message = stop_reason_msg |
|
current_history_for_api.append(types.Content(role="model", parts=[types.Part.from_text(final_bot_message)])) |
|
break |
|
|
|
|
|
has_tool_call = candidate.finish_reason == types.Candidate.FinishReason.TOOL_CALL |
|
|
|
|
|
|
|
current_history_for_api.append(candidate.content) |
|
|
|
if has_tool_call: |
|
print("Tool call requested by model.") |
|
tool_calls_to_process = [part.function_call for part in candidate.content.parts if part.function_call] |
|
|
|
if not tool_calls_to_process: |
|
print("Warning: Model indicated TOOL_CALL finish reason but no function_call part found.") |
|
|
|
|
|
final_bot_message = "".join([p.text for p in candidate.content.parts if p.text]) |
|
if not final_bot_message: |
|
final_bot_message = "[Model indicated tool use but provided no details or text.]" |
|
break |
|
|
|
|
|
tool_responses = [] |
|
for function_call in tool_calls_to_process: |
|
function_response_part = handle_function_call(function_call) |
|
tool_responses.append(function_response_part) |
|
|
|
|
|
current_history_for_api.append(types.Content(role="tool", parts=tool_responses)) |
|
print("Added tool response(s) to history. Continuing loop...") |
|
continue |
|
|
|
else: |
|
|
|
print("No tool call requested. Final response received.") |
|
final_bot_message = "".join([part.text for part in candidate.content.parts if part.text]) |
|
|
|
|
|
code_parts_display = [] |
|
for part in candidate.content.parts: |
|
if part.executable_code: |
|
lang = part.executable_code.language.name.lower() if part.executable_code.language else "python" |
|
code = part.executable_code.code |
|
code_parts_display.append(f"Suggested Code ({lang}):\n```{'python' if lang == 'unknown_language' else lang}\n{code}\n```") |
|
elif part.code_execution_result: |
|
outcome_str = "Success" if part.code_execution_result.outcome == part.code_execution_result.Outcome.OK else "Failure" |
|
code_parts_display.append(f"Code Execution Result ({outcome_str}):\n```\n{part.code_execution_result.output}\n```") |
|
|
|
if code_parts_display: |
|
final_bot_message += "\n\n" + "\n\n".join(code_parts_display) |
|
|
|
|
|
if not final_bot_message.strip(): |
|
final_bot_message = "[Assistant completed its turn without generating text output.]" |
|
|
|
break |
|
|
|
|
|
if loop_count >= MAX_TOOL_LOOPS: |
|
print(f"Warning: Reached maximum tool execution loops ({MAX_TOOL_LOOPS}).") |
|
final_bot_message = (final_bot_message + "\n\n" if final_bot_message else "") + f"[Warning: Reached maximum tool execution loops ({MAX_TOOL_LOOPS}). The final response might be incomplete.]" |
|
|
|
if current_history_for_api[-1].role != "model": |
|
current_history_for_api.append(types.Content(role="model", parts=[types.Part.from_text(final_bot_message)])) |
|
|
|
|
|
print("--- Response Generation Complete ---") |
|
|
|
|
|
|
|
|
|
chatbot_display_list = [] |
|
user_msg = None |
|
for i, content in enumerate(current_history_for_api): |
|
|
|
if content.role == "system": continue |
|
|
|
msg_text = "" |
|
for part in content.parts: |
|
if part.text: |
|
msg_text += part.text + "\n" |
|
|
|
elif part.executable_code: |
|
lang = part.executable_code.language.name.lower() if part.executable_code.language else "python" |
|
code = part.executable_code.code |
|
msg_text += f"\nSuggested Code ({lang}):\n```{'python' if lang == 'unknown_language' else lang}\n{code}\n```\n" |
|
|
|
|
|
|
|
elif part.code_execution_result: |
|
outcome_str = "Success" if part.code_execution_result.outcome == part.code_execution_result.Outcome.OK else "Failure" |
|
msg_text += f"\nCode Execution Result ({outcome_str}):\n```\n{part.code_execution_result.output}\n```\n" |
|
|
|
msg_text = msg_text.strip() |
|
if not msg_text: continue |
|
|
|
if content.role == "user": |
|
|
|
user_msg = msg_text |
|
|
|
chatbot_display_list.append([user_msg, None]) |
|
elif content.role == "model": |
|
if chatbot_display_list and chatbot_display_list[-1][1] is None: |
|
|
|
chatbot_display_list[-1][1] = msg_text |
|
else: |
|
|
|
|
|
chatbot_display_list.append([None, msg_text]) |
|
user_msg = None |
|
|
|
|
|
|
|
|
|
|
|
return chatbot_display_list, current_history_for_api |
|
|
|
except Exception as e: |
|
print(f"ERROR during Gemini generation or tool processing: {str(e)}") |
|
traceback.print_exc() |
|
error_message = f"An error occurred while processing your request: {str(e)}" |
|
|
|
chatbot_error_display = [[None, error_message]] |
|
|
|
if 'current_history_for_api' in locals(): |
|
|
|
|
|
existing_display = [] |
|
for c in current_history_for_api[:-1]: |
|
if c.role == "user": existing_display.append([c.parts[0].text, None]) |
|
elif c.role == "model" and existing_display and existing_display[-1][1] is None: |
|
existing_display[-1][1] = "".join([p.text for p in c.parts if p.text]) |
|
existing_display.append([None, error_message]) |
|
chatbot_error_display = existing_display |
|
|
|
|
|
|
|
return chatbot_error_display, conversation_history |
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Gemini AI Assistant w/ Tools", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown(f"# π Gemini AI Assistant ({MODEL_NAME})") |
|
gr.Markdown("Ask questions, request info from specific URLs, or ask for code/calculations. Uses function calling and code execution.") |
|
|
|
|
|
chatbot_display = gr.Chatbot( |
|
label="Conversation", |
|
bubble_full_width=False, |
|
height=600, |
|
show_copy_button=True, |
|
render_markdown=True |
|
) |
|
|
|
|
|
msg_input = gr.Textbox( |
|
label="Your Query", |
|
placeholder="Ask anything... (e.g., 'Summarize example.com', 'Calculate 2^64', 'Write python code to list files')", |
|
lines=3, |
|
scale=4 |
|
) |
|
|
|
|
|
clear_btn = gr.ClearButton(value="ποΈ Clear Chat") |
|
|
|
|
|
send_btn = gr.Button("β‘οΈ Send", variant="primary", scale=1) |
|
|
|
|
|
|
|
chat_history_state = gr.State([]) |
|
|
|
def user_message_update(user_message, history_display_list): |
|
"""Appends the user's message to the display list and clears the input.""" |
|
if not user_message.strip(): |
|
return gr.update(value=""), history_display_list |
|
|
|
return gr.update(value=""), history_display_list + [[user_message, None]] |
|
|
|
def bot_response_update(history_display_list, history_state): |
|
"""Calls the backend Gemini function and updates display/state.""" |
|
if not history_display_list or history_display_list[-1][0] is None: |
|
|
|
print("Warning: bot_response_update called without preceding user message in display.") |
|
|
|
return history_display_list, history_state |
|
|
|
user_message = history_display_list[-1][0] |
|
print(f"User message being sent to backend: {user_message}") |
|
|
|
|
|
|
|
updated_display_list, updated_history_state = generate_response_with_tools(user_message, history_state) |
|
|
|
|
|
|
|
return updated_display_list, updated_history_state |
|
|
|
|
|
msg_input.submit( |
|
user_message_update, |
|
[msg_input, chatbot_display], |
|
[msg_input, chatbot_display], |
|
queue=False, |
|
).then( |
|
bot_response_update, |
|
[chatbot_display, chat_history_state], |
|
[chatbot_display, chat_history_state] |
|
) |
|
|
|
|
|
send_btn.click( |
|
user_message_update, |
|
[msg_input, chatbot_display], |
|
[msg_input, chatbot_display], |
|
queue=False, |
|
).then( |
|
bot_response_update, |
|
[chatbot_display, chat_history_state], |
|
[chatbot_display, chat_history_state] |
|
) |
|
|
|
|
|
clear_btn.add(components=[msg_input, chatbot_display, chat_history_state]) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
print("Starting Gradio App...") |
|
|
|
|
|
demo.queue().launch(server_name="0.0.0.0", server_port=7860) |
|
print("Gradio App Stopped.") |