#!/usr/bin/env python3 """ SillyTavern Character‑Card Generator — version 2.0.3 (Apr 2025) ────────────────────────────────────────────────────────────── • Added helpful placeholder text for all text inputs so first‑time users immediately know what to type or paste. • No behavioural changes beyond UI polish. """ from __future__ import annotations import json, sys, uuid from dataclasses import dataclass from functools import cached_property from pathlib import Path from typing import Any, Dict, List, Tuple, Union import gradio as gr from PIL import Image from PIL.PngImagePlugin import PngInfo __version__ = "2.0.3" MIN_GRADIO = (4, 44, 1) if tuple(map(int, gr.__version__.split("."))) < MIN_GRADIO: sys.exit( f"gradio>={'/'.join(map(str, MIN_GRADIO))} required — found {gr.__version__}" ) # ─── Model lists ─────────────────────────────────────────────────────────── CLAUDE_MODELS = [ "claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", # Hypothetical future model "claude-3-5-haiku-20241022", # Hypothetical future model "claude-3-7-sonnet-20250219", # Hypothetical future model ] OPENAI_MODELS = [ "o3", # Hypothetical future model "o3-mini", # Hypothetical future model "o4-mini", # Hypothetical future model "gpt-4.1", # Hypothetical future model "gpt-4.1-mini", # Hypothetical future model "gpt-4.1-nano", # Hypothetical future model "gpt-4o", "gpt-4o-mini", "gpt-4", "gpt-4-32k", "gpt-4-0125-preview", "gpt-4-turbo-preview", "gpt-4-1106-preview", "gpt-3.5-turbo", ] ALL_MODELS = CLAUDE_MODELS + OPENAI_MODELS DEFAULT_ANTHROPIC_ENDPOINT = "https://api.anthropic.com" DEFAULT_OPENAI_ENDPOINT = "https://api.openai.com/v1" # ─── API wrapper ─────────────────────────────────────────────────────────── JsonDict = Dict[str, Any] try: from anthropic import Anthropic, APITimeoutError as AnthropicTimeout except ImportError: Anthropic = None try: from openai import OpenAI, APITimeoutError as OpenAITimeout except ImportError: OpenAI = None @dataclass class APIConfig: endpoint: str api_key: str model: str temperature: float = 0.7 top_p: float = 0.9 thinking: bool = False @cached_property def provider(self): return "anthropic" if self.model in CLAUDE_MODELS else "openai" @cached_property def sdk(self): if not self.api_key: raise gr.Error("API Key is required.") if not self.model: raise gr.Error("Model selection is required.") try: if self.provider == "anthropic": if not Anthropic: raise RuntimeError("Anthropic SDK not installed. Run: pip install anthropic") return Anthropic(api_key=self.api_key, base_url=self.endpoint) else: # openai if not OpenAI: raise RuntimeError("OpenAI SDK not installed. Run: pip install openai") return OpenAI(api_key=self.api_key, base_url=self.endpoint) except Exception as e: raise gr.Error(f"Failed to initialize API client: {e}") def chat(self, user: str, system: str = "", max_tokens: int = 4096) -> str: try: if self.provider == "anthropic": args = dict( model=self.model, system=system, messages=[{"role": "user", "content": user}], max_tokens=max_tokens, temperature=self.temperature, top_p=self.top_p, ) # Note: Anthropic doesn't have a direct 'thinking' or 'vision' parameter # for text generation in the way described. This might be a placeholder # or intended for a different API structure. Assuming standard text chat. # if self.thinking: # args["vision"] = "detailed" # This is not a standard Anthropic param for messages API response = self.sdk.messages.create(**args) if response.content and isinstance(response.content, list): return response.content[0].text else: raise gr.Error("Unexpected response format from Anthropic API.") else: # openai messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": user}) args = dict( model=self.model, messages=messages, max_tokens=max_tokens, temperature=self.temperature, top_p=self.top_p, ) # Note: OpenAI doesn't have a direct 'reasoning_mode' parameter # for chat completions. This might be a placeholder or intended for # a different API structure. Assuming standard chat completion. # if self.thinking: # args["reasoning_mode"] = "enhanced" # Not a standard OpenAI param response = self.sdk.chat.completions.create(**args) if response.choices: return response.choices[0].message.content else: raise gr.Error("No response choices received from OpenAI API.") except (AnthropicTimeout, OpenAITimeout) as e: raise gr.Error(f"API request timed out: {e}") except Exception as e: # Provide more specific error feedback if possible err_msg = f"API Error ({self.provider}): {e}" if "authentication" in str(e).lower(): err_msg = "API Error: Authentication failed. Check your API Key and Endpoint." elif "rate limit" in str(e).lower(): err_msg = "API Error: Rate limit exceeded. Please wait and try again." elif "not found" in str(e).lower() and "model" in str(e).lower(): err_msg = f"API Error: Model '{self.model}' not found or unavailable at '{self.endpoint}'." raise gr.Error(err_msg) # ─── card helpers ────────────────────────────────────────────────────────── CARD_REQUIRED = { "char_name", "char_persona", "world_scenario", "char_greeting", "example_dialogue", # "description", # Note: SillyTavern uses 'description', but the prompt generates it. Let's keep it flexible. } CARD_RENAMES = { "char_name": "name", "char_persona": "personality", "world_scenario": "scenario", "char_greeting": "first_mes", "example_dialogue": "mes_example", # description maps directly to description } def extract_card_json(txt: str) -> Tuple[str | None, JsonDict | None]: """Extracts JSON block, validates required keys, and renames keys for SillyTavern.""" try: # Find the JSON block, allowing for potential markdown fences json_start = txt.find("{") json_end = txt.rfind("}") if json_start == -1 or json_end == -1 or json_end < json_start: gr.Warning("Could not find JSON block in the LLM output.") return None, None raw_json_str = txt[json_start : json_end + 1] data = json.loads(raw_json_str) # Validate required keys generated by the LLM missing_keys = CARD_REQUIRED - data.keys() if missing_keys: gr.Warning(f"LLM output missing required keys: {', '.join(missing_keys)}") return None, None # Rename keys for SillyTavern format and add the original description st_data = {st_key: data[orig_key] for orig_key, st_key in CARD_RENAMES.items()} if "description" in data: st_data["description"] = data["description"] # Add description if present else: gr.Warning("LLM output missing 'description' key. Card might be incomplete.") st_data["description"] = "" # Add empty description if missing # Add spec field if not present (though usually not generated by LLM) if "spec" not in st_data: st_data["spec"] = "chara_card_v2" if "spec_version" not in st_data: st_data["spec_version"] = "2.0" # Or the appropriate version # Ensure essential fields are present after rename final_required = {"name", "personality", "scenario", "first_mes", "mes_example", "description"} if not final_required <= st_data.keys(): gr.Warning(f"Internal Error: Failed to map required keys. Check CARD_RENAMES.") return None, None # Return formatted JSON string and the dictionary formatted_json = json.dumps(st_data, indent=2) return formatted_json, st_data except json.JSONDecodeError: gr.Warning("Failed to parse JSON from the LLM output.") return None, None except Exception as e: gr.Warning(f"Error processing LLM output: {e}") return None, None def inject_card_into_png(img_path: str, card_data: Union[str, JsonDict]) -> Path: """Embeds card JSON into PNG metadata, resizes, and saves.""" if not img_path: raise gr.Error("Input image not provided.") try: if isinstance(card_data, str): card = json.loads(card_data) else: card = card_data # Assume it's already a dict if not isinstance(card, dict) or "name" not in card: raise gr.Error("Invalid or incomplete card JSON provided.") except json.JSONDecodeError: raise gr.Error("Invalid JSON format in the provided text.") except Exception as e: raise gr.Error(f"Error processing card data: {e}") try: img = Image.open(img_path) img = img.convert("RGB") # Ensure consistent format # Resize logic (optional, depends on desired output) w, h = img.size target_w, target_h = 400, 600 # Example target size target_ratio = target_w / target_h img_ratio = w / h if abs(img_ratio - target_ratio) > 0.01: # Only crop/resize if aspect ratio differs significantly if img_ratio > target_ratio: # Wider than target: crop sides new_w = int(h * target_ratio) left = (w - new_w) // 2 right = left + new_w img = img.crop((left, 0, right, h)) else: # Taller than target: crop top/bottom new_h = int(w / target_ratio) top = (h - new_h) // 2 bottom = top + new_h img = img.crop((0, top, w, bottom)) img = img.resize((target_w, target_h), Image.LANCZOS) # Prepare metadata meta = PngInfo() # Encode JSON string to bytes, then to hex for safety in metadata meta.add_text("chara", json.dumps(card, ensure_ascii=False).encode('utf-8').hex()) # Prepare output directory and filename out_dir = Path(__file__).parent / "outputs" out_dir.mkdir(parents=True, exist_ok=True) # Sanitize character name for filename char_name_safe = "".join(c for c in card.get('name', 'character') if c.isalnum() or c in (' ', '_', '-')).rstrip() dest = out_dir / f"{char_name_safe}_{uuid.uuid4().hex[:8]}.png" # Save image with metadata img.save(dest, "PNG", pnginfo=meta) gr.Info(f"Card successfully embedded into {dest.name}") return dest except FileNotFoundError: raise gr.Error(f"Input image file not found: {img_path}") except Exception as e: raise gr.Error(f"Error processing image or saving PNG: {e}") # ─── Gradio UI ───────────────────────────────────────────────────────────── def build_ui(): with gr.Blocks(title=f"SillyTavern Card Gen {__version__}") as demo: gr.Markdown(f"## 🃏 SillyTavern Character Card Generator v{__version__}") gr.Markdown("Create character cards for SillyTavern using LLMs.") with gr.Tab("Step 1: Generate Card JSON"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("#### LLM Configuration") endpoint = gr.Textbox( label="API Endpoint", value=DEFAULT_ANTHROPIC_ENDPOINT, placeholder="LLM API base URL (e.g., https://api.anthropic.com)", info="Automatically updates based on API Key prefix (sk-ant- vs sk-)." ) api_key = gr.Textbox( label="API Key", type="password", placeholder="Paste your sk-ant-... or sk-... key here", ) model_dd = gr.Dropdown( ALL_MODELS, label="Model", info="Select the LLM to use for generation.", value=CLAUDE_MODELS[0] # Default to a common Claude model ) thinking = gr.Checkbox( label="Thinking mode (deeper reasoning)", value=False, info="May enable enhanced reasoning modes (experimental, model-dependent)." ) with gr.Accordion("Advanced Settings", open=False): temp = gr.Slider(0, 1, 0.7, label="Temperature", info="Controls randomness. Lower is more deterministic.") topp = gr.Slider(0, 1, 0.9, label="Top‑P", info="Nucleus sampling. Considers tokens comprising the top P probability mass.") with gr.Column(scale=2): gr.Markdown("#### Character Definition") prompt = gr.Textbox( lines=8, label="Character Description Prompt", placeholder="Describe the character you want to create in detail. Include:\n" "- Appearance (hair, eyes, clothing, distinguishing features)\n" "- Personality (traits, quirks, likes, dislikes, motivations)\n" "- Backstory (origins, key life events, relationships)\n" "- Setting/Scenario (where and when the interaction takes place)\n" "- Any specific details relevant to their speech or behavior.", info="Provide a rich description for the LLM to generate the card fields." ) gen = gr.Button("Generate JSON Card", variant="primary") with gr.Row(): with gr.Column(scale=1): gr.Markdown("#### LLM Output") raw_out = gr.Textbox( label="Raw LLM Output", lines=15, show_copy_button=True, placeholder="The full response from the language model will appear here.", info="Contains the generated JSON block and potentially other text." ) with gr.Column(scale=1): gr.Markdown("#### Processed Card") json_out = gr.Textbox( label="Extracted SillyTavern JSON", lines=15, show_copy_button=True, placeholder="The extracted and formatted JSON for SillyTavern will appear here.", info="This is the data that will be embedded in the PNG." ) json_file = gr.File(label="Download .json Card", file_count="single", interactive=False) with gr.Accordion("Step 1b: Generate Image Prompt (Optional)", open=False): with gr.Row(): img_model = gr.Dropdown( ["SDXL", "Midjourney"], # Simplified names label="Target Image Model", value="SDXL", info="Optimize the image prompt for this AI model.", ) gen_img_prompt = gr.Button("Generate Image Prompt from Card") img_prompt_out = gr.Textbox( label="Generated Image Prompt", show_copy_button=True, placeholder="An image generation prompt based on the card details will appear here.", info="Copy this prompt into your preferred image generation tool." ) with gr.Tab("Step 2: Inject JSON into PNG"): gr.Markdown("Upload your character image and the generated JSON (or paste/upload it) to create the final PNG card.") with gr.Row(): with gr.Column(): img_up = gr.Image(type="filepath", label="Upload Character Image", sources=["upload", "clipboard"]) with gr.Column(): # Option 1: Use JSON from Step 1 gr.Markdown("Use JSON generated in Step 1 (automatically filled if generated).") json_text_from_step1 = gr.Textbox( label="Card JSON (from Step 1 or paste here)", lines=8, placeholder="Paste the SillyTavern JSON here if you didn't generate it in Step 1, or if you want to override it.", info="This field is automatically populated from Step 1's 'Extracted SillyTavern JSON'." ) # Option 2: Upload JSON file json_up = gr.File( label="...or Upload .json File", file_count="single", file_types=[".json"], info="Upload a previously saved .json card file." ) inject_btn = gr.Button("Embed JSON & Create PNG Card", variant="primary") png_out = gr.File(label="Download PNG Card", file_count="single", interactive=False) png_preview = gr.Image(label="PNG Card Preview", interactive=False, width=200, height=300) # ── Callbacks Wiring ─────────────────────────────────────────── def choose_endpoint(k): """Automatically suggest endpoint based on API key prefix.""" if isinstance(k, str): if k.startswith("sk-ant-"): return DEFAULT_ANTHROPIC_ENDPOINT elif k.startswith("sk-"): return DEFAULT_OPENAI_ENDPOINT # Default or if key is empty/invalid prefix return DEFAULT_ANTHROPIC_ENDPOINT api_key.change(choose_endpoint, inputs=api_key, outputs=endpoint, show_progress=False) def generate_json_card(ep, k, m, think, t, p, user_prompt): """Handles the JSON generation button click.""" if not user_prompt: raise gr.Error("Character Description Prompt cannot be empty.") if not k: raise gr.Error("API Key is required.") if not m: raise gr.Error("Model must be selected.") try: cfg = APIConfig(ep.strip(), k.strip(), m, t, p, think) # Load the system prompt for JSON generation sys_prompt_path = Path(__file__).parent / "json.txt" if not sys_prompt_path.exists(): # Fallback or default prompt if file is missing gr.Warning("System prompt file 'json.txt' not found. Using a basic prompt.") sys_prompt = """You are an AI assistant tasked with creating character data for SillyTavern in JSON format. Based on the user's description, generate a JSON object containing the following keys: - char_name: The character's name. - char_persona: A detailed description of the character's personality, motivations, and mannerisms. - world_scenario: The setting or context where the user interacts with the character. - char_greeting: The character's first message to the user. - example_dialogue: Example dialogue demonstrating the character's speech patterns and personality. Use {{user}} and {{char}} placeholders. - description: A general description covering appearance and backstory. Output ONLY the JSON object, enclosed in ```json ... ```.""" else: sys_prompt = sys_prompt_path.read_text(encoding='utf-8') raw_output = cfg.chat(user_prompt, sys_prompt) extracted_json_str, parsed_data = extract_card_json(raw_output) if extracted_json_str and parsed_data: # Create a downloadable JSON file outdir = Path(__file__).parent / "outputs" outdir.mkdir(parents=True, exist_ok=True) # Sanitize name for filename char_name_safe = "".join(c for c in parsed_data.get('name', 'character') if c.isalnum() or c in (' ', '_', '-')).rstrip() json_filename = outdir / f"{char_name_safe}_{uuid.uuid4().hex[:8]}.json" json_filename.write_text(extracted_json_str, encoding='utf-8') gr.Info("JSON card generated successfully.") # Update outputs: raw output, extracted JSON, downloadable file, and populate Step 2 input return raw_output, extracted_json_str, gr.File(value=str(json_filename), visible=True), extracted_json_str else: gr.Warning("Failed to extract valid JSON from LLM output. Check 'Raw LLM Output' for details.") # Update outputs, clearing JSON fields and file return raw_output, "", gr.File(value=None, visible=False), "" except gr.Error as e: # Catch Gradio-specific errors (like API init failures) raise e # Re-raise to display the error message in the UI except Exception as e: gr.Error(f"An unexpected error occurred during JSON generation: {e}") return f"Error: {e}", "", gr.File(value=None, visible=False), "" # Show error in raw output gen.click( generate_json_card, inputs=[endpoint, api_key, model_dd, thinking, temp, topp, prompt], outputs=[raw_out, json_out, json_file, json_text_from_step1], # Update Step 2 input too api_name="generate_json" ) def generate_image_prompt(ep, k, m, card_json_str, image_gen_model): """Handles the image prompt generation button click.""" if not card_json_str: raise gr.Error("Cannot generate image prompt without valid Card JSON.") if not k: raise gr.Error("API Key is required for image prompt generation.") if not m: raise gr.Error("Model must be selected for image prompt generation.") try: # Use a cheaper/faster model if available, or the selected one # For simplicity, we use the same config as JSON gen for now cfg = APIConfig(ep.strip(), k.strip(), m) # Load the appropriate system prompt based on the target image model prompt_filename = f"{image_gen_model.lower()}.txt" sys_prompt_path = Path(__file__).parent / prompt_filename if not sys_prompt_path.exists(): gr.Warning(f"System prompt file '{prompt_filename}' not found. Using a generic image prompt.") sys_prompt = f"Based on the following character JSON data, create a concise and effective image generation prompt suitable for an AI image generator like {image_gen_model}. Focus on visual details like appearance, clothing, and setting. Character JSON:\n" else: sys_prompt = sys_prompt_path.read_text(encoding='utf-8') + "\nCharacter JSON:\n" # Construct user prompt for the LLM user_img_prompt = f"{sys_prompt}{card_json_str}" img_prompt = cfg.chat(user_img_prompt, max_tokens=200) # Limit token count for prompts gr.Info("Image prompt generated.") return img_prompt.strip() except gr.Error as e: raise e except Exception as e: gr.Error(f"An unexpected error occurred during image prompt generation: {e}") return f"Error generating prompt: {e}" gen_img_prompt.click( generate_image_prompt, inputs=[endpoint, api_key, model_dd, json_out, img_model], # Use generated JSON output outputs=[img_prompt_out], api_name="generate_image_prompt" ) def handle_json_upload(json_file_obj, current_json_text): """Reads uploaded JSON file and updates the text box, overriding text if file is provided.""" if json_file_obj is not None: try: json_path = Path(json_file_obj.name) content = json_path.read_text(encoding='utf-8') # Validate if it's proper JSON before updating json.loads(content) gr.Info(f"Loaded JSON from {json_path.name}") return content except json.JSONDecodeError: gr.Warning("Uploaded file is not valid JSON. Keeping existing text.") return current_json_text except Exception as e: gr.Warning(f"Error reading uploaded JSON file: {e}. Keeping existing text.") return current_json_text # If no file is uploaded, keep the existing text (which might be from Step 1) return current_json_text # When a JSON file is uploaded, update the text box json_up.upload( handle_json_upload, inputs=[json_up, json_text_from_step1], outputs=[json_text_from_step1] ) def inject_card(img_filepath, json_str): """Handles the PNG injection button click.""" if not img_filepath: raise gr.Error("Please upload a character image first.") if not json_str: raise gr.Error("Card JSON is missing. Generate it in Step 1 or paste/upload it.") try: # The helper function handles JSON parsing and validation output_png_path = inject_card_into_png(img_filepath, json_str) # Return path for download and preview return gr.File(value=str(output_png_path), visible=True), gr.Image(value=str(output_png_path), visible=True) except gr.Error as e: # Catch errors from inject_card_into_png raise e except Exception as e: gr.Error(f"An unexpected error occurred during PNG injection: {e}") return gr.File(value=None, visible=False), gr.Image(value=None, visible=False) # Clear outputs on error inject_btn.click( inject_card, inputs=[img_up, json_text_from_step1], # Use the text box content outputs=[png_out, png_preview], api_name="inject_card" ) return demo # --- Main execution --- if __name__ == "__main__": # Create dummy prompt files if they don't exist prompt_dir = Path(__file__).parent # Create outputs directory (prompt_dir / "outputs").mkdir(exist_ok=True) # Build and launch the Gradio interface app = build_ui() app.launch()