Spaces:
Sleeping
Sleeping
File size: 5,225 Bytes
343c564 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import os
from dotenv import load_dotenv
from smolagents import CodeAgent, ToolCallingAgent, LiteLLMModel, MCPClient
from mcp import StdioServerParameters
import base64
from PIL import Image
import io
# --- 1. Environment and Model Setup ---
# Load environment variables from a .env file (for API keys)
load_dotenv()
# Initialize the language model that our agents will use.
# Ensure your GEMINI_API_KEY is set in your .env file.
model = LiteLLMModel(
model_id="gemini/gemini-2.0-flash-exp",
api_key=os.getenv("GEMINI_API_KEY")
)
# --- 2. MCP Server Configuration ---
# Define the connection parameters for your MCP servers.
# These commands will run in the background to connect to your deployed tools.
kgb_server_parameters = StdioServerParameters(
command="npx",
args=[
"mcp-remote",
"https://agents-mcp-hackathon-kgb-mcp.hf.space/gradio_api/mcp/sse",
"--transport",
"sse-only"],
)
t2i_server_parameters = StdioServerParameters(
command="npx",
args=[
"mcp-remote",
"https://agents-mcp-hackathon-t2i.hf.space/gradio_api/mcp/sse",
"--transport",
"sse-only"],
)
server_parameters = [kgb_server_parameters, t2i_server_parameters]
# --- 3. Main Execution Block ---
# We use the MCPClient as a context manager to handle the lifecycle of the servers.
with MCPClient(server_parameters) as mcp:
print("Connecting to MCP servers and fetching tools...")
all_tools = mcp.get_tools()
print(f"Found {len(all_tools)} tools.")
# --- 4. Tool Integration ---
# Find our specific tools from the list provided by the MCP servers.
# We will look for them by name.
knowledge_tool = next((tool for tool in all_tools if "knowledge_graph" in tool.name.lower()), None)
image_tool = next((tool for tool in all_tools if "text_to_image" in tool.name.lower()), None)
if not knowledge_tool:
print("Warning: Knowledge graph tool not found.")
if not image_tool:
print("Warning: Text-to-image tool not found.")
writer_tools = [knowledge_tool] if knowledge_tool else []
illustrator_tools = [image_tool] if image_tool else []
# --- 5. Agent Definitions ---
# We define our agent team, now equipped with the tools from your MCP servers.
# The Writer Agent
writer_agent = ToolCallingAgent(
tools=writer_tools,
model=model,
name="writer",
description="A creative agent that writes short stories. It can use a knowledge graph tool to research topics for inspiration."
)
# The Illustrator Agent
illustrator_agent = ToolCallingAgent(
tools=illustrator_tools,
model=model,
name="illustrator",
description="An artist agent that creates illustrations based on a descriptive prompt using a text-to-image tool."
)
# The Director Agent
director_agent = CodeAgent(
tools=[],
model=model,
managed_agents=[writer_agent, illustrator_agent],
system_prompt="""
You are the Director of Agentic Storycrafter, a creative team. Your job is to manage the writer and illustrator agents to create a story with an illustration.
Here is your workflow:
1. Receive a user's prompt for a story.
2. Call the `writer` agent to write a story based on the user's prompt.
3. After the story is written, create a short, descriptive prompt for an illustration that captures the essence of the story.
4. Call the `illustrator` agent with this new prompt to generate an image. The result will be a dictionary containing image data.
5. Return a dictionary containing both the final 'story' and the 'image_data' from the illustrator.
"""
)
# --- 6. The Creative Workflow ---
if __name__ == "__main__":
user_prompt = "a story about a wise old owl living in a library of forgotten books"
print(f"\n--- Director's Task ---")
print(f"Prompt: {user_prompt}\n")
# The director now runs the full workflow.
final_output = director_agent.run(f"Create a story and illustration for the following prompt: {user_prompt}")
print("\n--- Agentic Storycrafter Result ---")
# The output from the director is code that needs to be executed to get the result
result_dict = eval(final_output)
story = result_dict.get("story")
image_data = result_dict.get("image_data")
print("\n--- STORY ---")
print(story)
if image_data and 'b64_json' in image_data:
print("\n--- ILLUSTRATION ---")
print("Illustration created. Saving to 'story_illustration.png'")
# Decode the base64 string and save it as an image file
try:
img_bytes = base64.b64decode(image_data['b64_json'])
img = Image.open(io.BytesIO(img_bytes))
img.save("story_illustration.png")
print("Image saved successfully.")
except Exception as e:
print(f"Error saving image: {e}")
else:
print("\n--- ILLUSTRATION ---")
print("No illustration was generated.")
|