diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..39870fc213b0e1f094534255b385e0a2e069d7c0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,95 @@ +# Git and version control +.git +.gitignore +*.md +!README.md + +# Python cache and virtual environments +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +tea_env/ + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Output directories (will be created in container) +output/ +*.mp4 +*.srt +*.wav + +# Image files (except those needed for the app) +thumbnails/ +*.png +*.jpg +*.jpeg + +# Log files +*.log +gradio_app.log + +# Cache directories +.cache/ +.pytest_cache/ + +# Jupyter Notebook +.ipynb_checkpoints + +# Temporary files +tmp/ +temp/ +*.tmp +Miniconda3-*.sh + +# Documentation that's not needed in container +docs/ + +# Test files +test_*.py + +# Models will be downloaded in container, so exclude local ones +# But keep the directory structure +models/*.onnx +models/*.bin +``` \ No newline at end of file diff --git a/.env b/.env new file mode 100644 index 0000000000000000000000000000000000000000..c5248bd10368a413f83738e3d565f0eee347e73d --- /dev/null +++ b/.env @@ -0,0 +1,26 @@ +# OpenAI +OPENAI_API_KEY="" + +# Azure OpenAI +AZURE_API_KEY="" +AZURE_API_BASE="" +AZURE_API_VERSION="" +OPENROUTER_API_KEY = "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" +# Google Vertex AI +VERTEXAI_PROJECT="" +VERTEXAI_LOCATION="" +GOOGLE_APPLICATION_CREDENTIALS="" +GITHUB_API_KEY = "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" +GITHUB_TOKEN = "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" +OPENAI_API_KEY = "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" +# Google Gemini +GEMINI_API_KEY="AIzaSyBUCGQ_hDLAHQN-T1ycWBJV8SGfwusfEjg" + +... + +# Kokoro TTS Settings +KOKORO_MODEL_PATH="models/kokoro-v0_19.onnx" +KOKORO_VOICES_PATH="models/voices.bin" +KOKORO_DEFAULT_VOICE="af" +KOKORO_DEFAULT_SPEED="1.0" +KOKORO_DEFAULT_LANG="en-us" \ No newline at end of file diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..2c2c6c13c8cc57cb289f4b0df59787208672be32 --- /dev/null +++ b/.env.example @@ -0,0 +1,26 @@ +# OpenAI +OPENAI_API_KEY="" + +# Azure OpenAI +AZURE_API_KEY="" +AZURE_API_BASE="" +AZURE_API_VERSION="" +OPENROUTER_API_KEY = "" +# Google Vertex AI +VERTEXAI_PROJECT="" +VERTEXAI_LOCATION="" +GOOGLE_APPLICATION_CREDENTIALS="" +GITHUB_API_KEY = "" +GITHUB_TOKEN = "" +OPENAI_API_KEY = "" +# Google Gemini +GEMINI_API_KEY="" + +... + +# Kokoro TTS Settings +KOKORO_MODEL_PATH="models/kokoro-v0_19.onnx" +KOKORO_VOICES_PATH="models/voices.bin" +KOKORO_DEFAULT_VOICE="af" +KOKORO_DEFAULT_SPEED="1.0" +KOKORO_DEFAULT_LANG="en-us" \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..7661356418d375f249463c6b873864490d9de981 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +task_generator/prompts_raw/__pycache__/__init__.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..af7d3b0ba7e6b6301cc415ed2c278f87ba37fd7c --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# This is essential for the release to work \ No newline at end of file diff --git a/src/__pycache__/__init__.cpython-312.pyc b/src/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9c831e768ad60ccd4f406b3e18cbeb235a1245e Binary files /dev/null and b/src/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/config/__init__.py b/src/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/config/__pycache__/__init__.cpython-312.pyc b/src/config/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2eaab151b884a40f3095600f1be6cba5dbbd9cd8 Binary files /dev/null and b/src/config/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/config/__pycache__/config.cpython-312.pyc b/src/config/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c08b1b0f9a5295d5a53e1f4c349ea3eae1cdf87 Binary files /dev/null and b/src/config/__pycache__/config.cpython-312.pyc differ diff --git a/src/config/config.py b/src/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..7efe1ffb729cc22c00deceeaaf47d95adf444470 --- /dev/null +++ b/src/config/config.py @@ -0,0 +1,20 @@ +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +class Config: + OUTPUT_DIR = "output" + THEOREMS_PATH = os.path.join("data", "easy_20.json") + CONTEXT_LEARNING_PATH = "data/context_learning" + CHROMA_DB_PATH = "data/rag/chroma_db" + MANIM_DOCS_PATH = "data/rag/manim_docs" + EMBEDDING_MODEL = "hf:ibm-granite/granite-embedding-30m-english" + + # Kokoro TTS configurations + KOKORO_MODEL_PATH = os.getenv('KOKORO_MODEL_PATH') + KOKORO_VOICES_PATH = os.getenv('KOKORO_VOICES_PATH') + KOKORO_DEFAULT_VOICE = os.getenv('KOKORO_DEFAULT_VOICE') + KOKORO_DEFAULT_SPEED = float(os.getenv('KOKORO_DEFAULT_SPEED', '1.0')) + KOKORO_DEFAULT_LANG = os.getenv('KOKORO_DEFAULT_LANG') \ No newline at end of file diff --git a/src/core/__init__.py b/src/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/core/__pycache__/__init__.cpython-312.pyc b/src/core/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3bbb2748ee9727efbb2b59de76f9992dee73e787 Binary files /dev/null and b/src/core/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/core/__pycache__/code_generator.cpython-312.pyc b/src/core/__pycache__/code_generator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f36ef1ee5061a7aae996d57ccafd16fe8839514e Binary files /dev/null and b/src/core/__pycache__/code_generator.cpython-312.pyc differ diff --git a/src/core/__pycache__/parse_video.cpython-312.pyc b/src/core/__pycache__/parse_video.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..011f5ba0ed6d48deffa12bbf0166411cb335b92e Binary files /dev/null and b/src/core/__pycache__/parse_video.cpython-312.pyc differ diff --git a/src/core/__pycache__/video_planner.cpython-312.pyc b/src/core/__pycache__/video_planner.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aab6c615582ab5062561b20d094a888b25c21504 Binary files /dev/null and b/src/core/__pycache__/video_planner.cpython-312.pyc differ diff --git a/src/core/__pycache__/video_renderer.cpython-312.pyc b/src/core/__pycache__/video_renderer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75c78bf66d874de9a4f57f39c719d126c14cbcac Binary files /dev/null and b/src/core/__pycache__/video_renderer.cpython-312.pyc differ diff --git a/src/core/code_generator.py b/src/core/code_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a4912a23aa8e21787f4ef0af7cdc74870f88ec --- /dev/null +++ b/src/core/code_generator.py @@ -0,0 +1,1045 @@ +import os +import re +import json +import logging +import glob +from pathlib import Path +from typing import Union, List, Dict, Optional, Tuple, Any +from PIL import Image + +from src.utils.utils import extract_json +from mllm_tools.utils import _prepare_text_inputs, _extract_code, _prepare_text_image_inputs +from mllm_tools.gemini import GeminiWrapper +from mllm_tools.vertex_ai import VertexAIWrapper +from task_generator import ( + get_prompt_code_generation, + get_prompt_fix_error, + get_prompt_visual_fix_error, + get_banned_reasonings, + get_prompt_rag_query_generation_fix_error, + get_prompt_context_learning_code, + get_prompt_rag_query_generation_code +) +from task_generator.prompts_raw import ( + _code_font_size, + _code_disable, + _code_limit, + _prompt_manim_cheatsheet +) +from src.rag.vector_store import RAGVectorStore + +# Configuration constants +DEFAULT_MAX_RETRIES = 10 +DEFAULT_RAG_K_VALUE = 2 +CACHE_FILE_ENCODING = 'utf-8' +CODE_PATTERN = r"```python(.*)```" +JSON_PATTERN = r'```json(.*)```' + +# Set up logging +logger = logging.getLogger(__name__) + +class CodeGenerator: + """A class for generating and managing Manim code with improved error handling and maintainability.""" + + def __init__( + self, + scene_model: Any, + helper_model: Any, + output_dir: str = "output", + print_response: bool = False, + use_rag: bool = False, + use_context_learning: bool = False, + context_learning_path: str = "data/context_learning", + chroma_db_path: str = "rag/chroma_db", + manim_docs_path: str = "rag/manim_docs", + embedding_model: str = "azure/text-embedding-3-large", + use_visual_fix_code: bool = False, + use_langfuse: bool = True, + session_id: Optional[str] = None + ) -> None: + """Initialize the CodeGenerator. + + Args: + scene_model: The model used for scene generation + helper_model: The model used for helper tasks + output_dir (str, optional): Directory for output files. Defaults to "output". + print_response (bool, optional): Whether to print model responses. Defaults to False. + use_rag (bool, optional): Whether to use RAG. Defaults to False. + use_context_learning (bool, optional): Whether to use context learning. Defaults to False. + context_learning_path (str, optional): Path to context learning examples. Defaults to "data/context_learning". + chroma_db_path (str, optional): Path to ChromaDB. Defaults to "rag/chroma_db". + manim_docs_path (str, optional): Path to Manim docs. Defaults to "rag/manim_docs". + embedding_model (str, optional): Name of embedding model. Defaults to "azure/text-embedding-3-large". + use_visual_fix_code (bool, optional): Whether to use visual code fixing. Defaults to False. + use_langfuse (bool, optional): Whether to use Langfuse logging. Defaults to True. + session_id (str, optional): Session identifier. Defaults to None. + """ + self.scene_model = scene_model + self.helper_model = helper_model + self.output_dir = Path(output_dir) + self.print_response = print_response + self.use_rag = use_rag + self.use_context_learning = use_context_learning + self.context_learning_path = Path(context_learning_path) + self.manim_docs_path = Path(manim_docs_path) + self.use_visual_fix_code = use_visual_fix_code + self.session_id = session_id + + # Ensure output directory exists + self.output_dir.mkdir(parents=True, exist_ok=True) + + # Load context examples and banned reasonings + self.context_examples = self._load_context_examples() if use_context_learning else None + self.banned_reasonings = self._load_banned_reasonings() + + # Initialize RAG vector store if enabled + self.vector_store = self._initialize_vector_store( + chroma_db_path, embedding_model, use_langfuse + ) if use_rag else None + + logger.info(f"CodeGenerator initialized with RAG: {use_rag}, Context Learning: {use_context_learning}") + + def _load_banned_reasonings(self) -> List[str]: + """Load banned reasonings with error handling.""" + try: + return get_banned_reasonings() + except Exception as e: + logger.warning(f"Failed to load banned reasonings: {e}") + return [] + + def _initialize_vector_store(self, chroma_db_path: str, embedding_model: str, use_langfuse: bool) -> Optional[RAGVectorStore]: + """Initialize RAG vector store with error handling.""" + try: + return RAGVectorStore( + chroma_db_path=chroma_db_path, + manim_docs_path=str(self.manim_docs_path), + embedding_model=embedding_model, + session_id=self.session_id, + use_langfuse=use_langfuse + ) + except Exception as e: + logger.error(f"Failed to initialize RAG vector store: {e}") + return None + + def _load_context_examples(self) -> Optional[str]: + """Load all context learning examples from the specified directory. + + Returns: + Optional[str]: Formatted context learning examples, or None if no examples found. + """ + if not self.context_learning_path.exists(): + logger.warning(f"Context learning path does not exist: {self.context_learning_path}") + return None + + examples = [] + pattern = str(self.context_learning_path / "**" / "*.py") + + try: + for example_file in glob.glob(pattern, recursive=True): + example_path = Path(example_file) + try: + with example_path.open('r', encoding=CACHE_FILE_ENCODING) as f: + content = f.read() + examples.append(f"# Example from {example_path.name}\n{content}\n") + except (IOError, UnicodeDecodeError) as e: + logger.warning(f"Failed to read example file {example_file}: {e}") + continue + + if examples: + formatted_examples = get_prompt_context_learning_code( + examples="\n".join(examples) + ) + logger.info(f"Loaded {len(examples)} context learning examples") + return formatted_examples + + except Exception as e: + logger.error(f"Error loading context examples: {e}") + + return None + + def _create_cache_directory(self, topic: str, scene_number: int, cache_type: str = "rag_cache") -> Path: + """Create and return cache directory path.""" + sanitized_topic = re.sub(r'[^a-z0-9_]+', '_', topic.lower()) + cache_dir = self.output_dir / sanitized_topic / f"scene{scene_number}" / cache_type + cache_dir.mkdir(parents=True, exist_ok=True) + return cache_dir + + def _load_cached_queries(self, cache_file: Path) -> Optional[List[str]]: + """Load cached queries from file with error handling.""" + if not cache_file.exists(): + return None + + try: + with cache_file.open('r', encoding=CACHE_FILE_ENCODING) as f: + cached_queries = json.load(f) + logger.debug(f"Loaded cached queries from {cache_file}") + return cached_queries + except (json.JSONDecodeError, IOError) as e: + logger.warning(f"Failed to load cached queries from {cache_file}: {e}") + return None + + def _save_queries_to_cache(self, queries: List[str], cache_file: Path) -> None: + """Save queries to cache file with error handling.""" + try: + with cache_file.open('w', encoding=CACHE_FILE_ENCODING) as f: + json.dump(queries, f, indent=2) + logger.debug(f"Saved queries to cache: {cache_file}") + except (IOError, TypeError) as e: + logger.error(f"Failed to save queries to cache {cache_file}: {e}") + + def _extract_json_from_response(self, response: str, error_context: str = "") -> List[str]: + """Extract and parse JSON from model response with improved error handling.""" + # Try to extract JSON from code blocks first + json_match = re.search(JSON_PATTERN, response, re.DOTALL) + if json_match: + json_text = json_match.group(1).strip() + else: + # Fallback: clean the response and try direct parsing + json_text = response.replace("```json", "").replace("```", "").strip() + + try: + return json.loads(json_text) + except json.JSONDecodeError as e: + logger.error(f"JSONDecodeError when parsing {error_context}: {e}") + logger.error(f"Response text was: {response[:500]}...") + return [] + + def _generate_rag_queries_code( + self, + implementation: str, + scene_trace_id: Optional[str] = None, + topic: Optional[str] = None, + scene_number: Optional[int] = None, + session_id: Optional[str] = None, + relevant_plugins: List[str] = None + ) -> List[str]: + """Generate RAG queries from the implementation plan. + + Args: + implementation: The implementation plan text + scene_trace_id: Trace ID for the scene + topic: Topic of the scene + scene_number: Scene number + session_id: Session identifier + relevant_plugins: List of relevant plugins + + Returns: + List of generated RAG queries + """ + if relevant_plugins is None: + relevant_plugins = [] + + if not topic or scene_number is None: + logger.warning("Missing topic or scene_number for RAG query generation") + return [] + + # Setup cache + cache_dir = self._create_cache_directory(topic, scene_number) + cache_file = cache_dir / "rag_queries_code.json" + + # Try to load from cache + cached_queries = self._load_cached_queries(cache_file) + if cached_queries is not None: + logger.info(f"Using cached RAG queries for {topic}_scene{scene_number}") + return cached_queries + + # Generate new queries + try: + plugins_text = ", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant." + prompt = get_prompt_rag_query_generation_code(implementation, plugins_text) + + response = self.helper_model( + _prepare_text_inputs(prompt), + metadata={ + "generation_name": "rag_query_generation", + "trace_id": scene_trace_id, + "tags": [topic, f"scene{scene_number}"], + "session_id": session_id + } + ) + + logger.debug(f"RAG queries response: {response[:200]}...") + queries = self._extract_json_from_response(response, "RAG queries for code generation") + + # Cache the queries + if queries: + self._save_queries_to_cache(queries, cache_file) + + return queries + + except Exception as e: + logger.error(f"Error generating RAG queries for code: {e}") + return [] + + def _generate_rag_queries_error_fix( + self, + error: str, + code: str, + scene_trace_id: Optional[str] = None, + topic: Optional[str] = None, + scene_number: Optional[int] = None, + session_id: Optional[str] = None, + relevant_plugins: List[str] = None + ) -> List[str]: + """Generate RAG queries for fixing code errors. + + Args: + error: The error message to fix + code: The code containing the error + scene_trace_id: Trace ID for the scene + topic: Topic of the scene + scene_number: Scene number + session_id: Session identifier + relevant_plugins: List of relevant plugins + + Returns: + List of generated RAG queries for error fixing + """ + if relevant_plugins is None: + relevant_plugins = [] + + if not topic or scene_number is None: + logger.warning("Missing topic or scene_number for RAG error fix query generation") + return [] + + # Setup cache + cache_dir = self._create_cache_directory(topic, scene_number) + cache_file = cache_dir / "rag_queries_error_fix.json" + + # Try to load from cache + cached_queries = self._load_cached_queries(cache_file) + if cached_queries is not None: + logger.info(f"Using cached RAG error fix queries for {topic}_scene{scene_number}") + return cached_queries + + # Generate new queries for error fix + try: + plugins_text = ", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant." + prompt = get_prompt_rag_query_generation_fix_error( + error=error, + code=code, + relevant_plugins=plugins_text + ) + + response = self.helper_model( + _prepare_text_inputs(prompt), + metadata={ + "generation_name": "rag-query-generation-fix-error", + "trace_id": scene_trace_id, + "tags": [topic, f"scene{scene_number}"], + "session_id": session_id + } + ) + + queries = self._extract_json_from_response(response, "RAG queries for error fix") + + # Cache the queries + if queries: + self._save_queries_to_cache(queries, cache_file) + + return queries + + except Exception as e: + logger.error(f"Error generating RAG queries for error fix: {e}") + return [] + + def _extract_code_with_retries( + self, + response_text: str, + pattern: str = CODE_PATTERN, + generation_name: Optional[str] = None, + trace_id: Optional[str] = None, + session_id: Optional[str] = None, + max_retries: int = DEFAULT_MAX_RETRIES + ) -> str: + """Extract code from response text with retry logic. + + Args: + response_text: The text containing code to extract + pattern: Regex pattern for extracting code + generation_name: Name of generation step + trace_id: Trace identifier + session_id: Session identifier + max_retries: Maximum number of retries + + Returns: + The extracted code + + Raises: + ValueError: If code extraction fails after max retries + """ + retry_prompt_template = """ + Please extract the Python code in the correct format using the pattern: {pattern}. + You MUST NOT include any other text or comments. + You MUST return the exact same code as in the previous response, NO CONTENT EDITING is allowed. + Previous response: + {response_text} + """ + + for attempt in range(max_retries): + try: + code_match = re.search(pattern, response_text, re.DOTALL) + if code_match: + extracted_code = code_match.group(1).strip() + logger.debug(f"Successfully extracted code on attempt {attempt + 1}") + return extracted_code + + if attempt < max_retries - 1: + logger.warning(f"Attempt {attempt + 1}: Failed to extract code pattern. Retrying...") + + # Regenerate response with a more explicit prompt + retry_prompt = retry_prompt_template.format( + pattern=pattern, + response_text=response_text[:1000] # Limit response length + ) + + response_text = self.scene_model( + _prepare_text_inputs(retry_prompt), + metadata={ + "generation_name": f"{generation_name}_format_retry_{attempt + 1}", + "trace_id": trace_id, + "session_id": session_id + } + ) + + except Exception as e: + logger.error(f"Error during code extraction attempt {attempt + 1}: {e}") + if attempt == max_retries - 1: + break + + raise ValueError(f"Failed to extract code pattern after {max_retries} attempts. Pattern: {pattern}") + + def _prepare_additional_context(self, additional_context: Union[str, List[str], None]) -> List[str]: + """Prepare additional context for code generation.""" + if additional_context is None: + return [] + elif isinstance(additional_context, str): + return [additional_context] + return additional_context.copy() + + def _retrieve_rag_context( + self, + rag_queries: List[str], + scene_trace_id: Optional[str], + topic: str, + scene_number: int + ) -> Optional[str]: + """Retrieve context from RAG vector store.""" + if not self.vector_store or not rag_queries: + return None + + try: + return self.vector_store.find_relevant_docs( + queries=rag_queries, + k=DEFAULT_RAG_K_VALUE, + trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number + ) + except Exception as e: + logger.error(f"Error retrieving RAG context: {e}") + return None + + def generate_manim_code( + self, + topic: str, + description: str, + scene_outline: str, + scene_implementation: str, + scene_number: int, + additional_context: Union[str, List[str], None] = None, + scene_trace_id: Optional[str] = None, + session_id: Optional[str] = None, + rag_queries_cache: Optional[Dict] = None + ) -> Tuple[str, str]: + """Generate Manim code from video plan. + + Args: + topic: Topic of the scene + description: Description of the scene + scene_outline: Outline of the scene + scene_implementation: Implementation details + scene_number: Scene number + additional_context: Additional context + scene_trace_id: Trace identifier + session_id: Session identifier + rag_queries_cache: Cache for RAG queries (deprecated, use file cache) + + Returns: + Tuple of generated code and response text + + Raises: + ValueError: If code generation fails + """ + try: + # Prepare additional context + context_list = self._prepare_additional_context(additional_context) + + # Add context learning examples if enabled + if self.use_context_learning and self.context_examples: + context_list.append(self.context_examples) + + # Add RAG context if enabled + if self.use_rag: + rag_queries = self._generate_rag_queries_code( + implementation=scene_implementation, + scene_trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number, + session_id=session_id or self.session_id + ) + + rag_context = self._retrieve_rag_context( + rag_queries, scene_trace_id, topic, scene_number + ) + + if rag_context: + context_list.append(rag_context) + + # Generate prompt + prompt = get_prompt_code_generation( + scene_outline=scene_outline, + scene_implementation=scene_implementation, + topic=topic, + description=description, + scene_number=scene_number, + additional_context=context_list if context_list else None + ) + + # Generate code using model + response_text = self.scene_model( + _prepare_text_inputs(prompt), + metadata={ + "generation_name": "code_generation", + "trace_id": scene_trace_id, + "tags": [topic, f"scene{scene_number}"], + "session_id": session_id or self.session_id + } + ) + + # Extract code with retries + code = self._extract_code_with_retries( + response_text, + CODE_PATTERN, + generation_name="code_generation", + trace_id=scene_trace_id, + session_id=session_id or self.session_id + ) + + logger.info(f"Successfully generated code for {topic} scene {scene_number}") + return code, response_text + + except Exception as e: + logger.error(f"Error generating Manim code for {topic} scene {scene_number}: {e}") + raise ValueError(f"Code generation failed: {e}") from e + + def fix_code_errors( + self, + implementation_plan: str, + code: str, + error: str, + scene_trace_id: str, + topic: str, + scene_number: int, + session_id: str, + rag_queries_cache: Optional[Dict] = None + ) -> Tuple[str, str]: + """Fix errors in generated Manim code. + + Args: + implementation_plan: Original implementation plan + code: Code containing errors + error: Error message to fix + scene_trace_id: Trace identifier + topic: Topic of the scene + scene_number: Scene number + session_id: Session identifier + rag_queries_cache: Cache for RAG queries (deprecated, use file cache) + + Returns: + Tuple of fixed code and response text + + Raises: + ValueError: If code fixing fails + """ + try: + # Start with base error fix prompt + additional_context = None + + # Add RAG context if enabled + if self.use_rag: + rag_queries = self._generate_rag_queries_error_fix( + error=error, + code=code, + scene_trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number, + session_id=session_id + ) + + rag_context = self._retrieve_rag_context( + rag_queries, scene_trace_id, topic, scene_number + ) + + if rag_context: + additional_context = rag_context + + # Generate prompt (with or without RAG context) + if additional_context: + prompt = get_prompt_fix_error( + implementation_plan=implementation_plan, + manim_code=code, + error=error, + additional_context=additional_context + ) + else: + prompt = get_prompt_fix_error( + implementation_plan=implementation_plan, + manim_code=code, + error=error + ) + + # Get fixed code from model + response_text = self.scene_model( + _prepare_text_inputs(prompt), + metadata={ + "generation_name": "code_fix_error", + "trace_id": scene_trace_id, + "tags": [topic, f"scene{scene_number}"], + "session_id": session_id + } + ) + + # Extract fixed code with retries + fixed_code = self._extract_code_with_retries( + response_text, + CODE_PATTERN, + generation_name="code_fix_error", + trace_id=scene_trace_id, + session_id=session_id + ) + + logger.info(f"Successfully fixed code errors for {topic} scene {scene_number}") + return fixed_code, response_text + + except Exception as e: + logger.error(f"Error fixing code for {topic} scene {scene_number}: {e}") + raise ValueError(f"Code error fixing failed: {e}") from e + + def visual_self_reflection( + self, + code: str, + media_path: Union[str, Image.Image], + scene_trace_id: str, + topic: str, + scene_number: int, + session_id: str + ) -> Tuple[str, str]: + """Use snapshot image or mp4 video to fix code. + + Args: + code: Code to fix + media_path: Path to media file or PIL Image + scene_trace_id: Trace identifier + topic: Topic of the scene + scene_number: Scene number + session_id: Session identifier + + Returns: + Tuple of fixed code and response text + + Raises: + ValueError: If visual self-reflection fails + FileNotFoundError: If media file doesn't exist + """ + try: + # Validate media input + if isinstance(media_path, str): + media_file = Path(media_path) + if not media_file.exists(): + raise FileNotFoundError(f"Media file not found: {media_path}") + + # Determine if we're dealing with video or image + is_video = isinstance(media_path, str) and media_path.lower().endswith('.mp4') + + # Load prompt template + prompt_file = Path('task_generator/prompts_raw/prompt_visual_self_reflection.txt') + if not prompt_file.exists(): + logger.warning(f"Visual self-reflection prompt file not found: {prompt_file}") + # Fallback prompt + prompt_template = """ + Analyze the visual output and the provided code. Fix any issues you notice in the code. + + Code: + {code} + """ + else: + with prompt_file.open('r', encoding=CACHE_FILE_ENCODING) as f: + prompt_template = f.read() + + # Format prompt + prompt = prompt_template.format(code=code) + + # Prepare input based on media type and model capabilities + if is_video and isinstance(self.scene_model, (GeminiWrapper, VertexAIWrapper)): + # For video with Gemini models + messages = [ + {"type": "text", "content": prompt}, + {"type": "video", "content": str(media_path)} + ] + else: + # For images or non-Gemini models + if isinstance(media_path, str): + media = Image.open(media_path) + else: + media = media_path + messages = [ + {"type": "text", "content": prompt}, + {"type": "image", "content": media} + ] + + # Get model response + response_text = self.scene_model( + messages, + metadata={ + "generation_name": "visual_self_reflection", + "trace_id": scene_trace_id, + "tags": [topic, f"scene{scene_number}"], + "session_id": session_id + } + ) + + # Extract code with retries + fixed_code = self._extract_code_with_retries( + response_text, + CODE_PATTERN, + generation_name="visual_self_reflection", + trace_id=scene_trace_id, + session_id=session_id + ) + + logger.info(f"Successfully completed visual self-reflection for {topic} scene {scene_number}") + return fixed_code, response_text + + except Exception as e: + logger.error(f"Error in visual self-reflection for {topic} scene {scene_number}: {e}") + raise ValueError(f"Visual self-reflection failed: {e}") from e + + def enhanced_visual_self_reflection( + self, + code: str, + media_path: Union[str, Image.Image], + scene_trace_id: str, + topic: str, + scene_number: int, + session_id: str, + implementation_plan: Optional[str] = None + ) -> Tuple[str, str]: + """Enhanced visual self-reflection using VLM for detailed error detection. + + This method specifically focuses on detecting and fixing: + - Element overlap and collision + - Out-of-bounds positioning + - Spatial boundary violations + - Poor visual arrangement + - Educational effectiveness issues + + Args: + code: Code to analyze and fix + media_path: Path to media file or PIL Image + scene_trace_id: Trace identifier + topic: Topic of the scene + scene_number: Scene number + session_id: Session identifier + implementation_plan: Optional implementation plan for context + + Returns: + Tuple of fixed code and response text + + Raises: + ValueError: If enhanced visual analysis fails + FileNotFoundError: If media file doesn't exist + """ + try: + # Validate media input + if isinstance(media_path, str): + media_file = Path(media_path) + if not media_file.exists(): + raise FileNotFoundError(f"Media file not found: {media_path}") + + # Determine if we're dealing with video or image + is_video = isinstance(media_path, str) and media_path.lower().endswith('.mp4') + + # Load enhanced visual analysis prompt + enhanced_prompt_file = Path('task_generator/prompts_raw/prompt_enhanced_visual_self_reflection.txt') + if enhanced_prompt_file.exists(): + with enhanced_prompt_file.open('r', encoding=CACHE_FILE_ENCODING) as f: + prompt_template = f.read() + else: + # Fallback to original prompt if enhanced version not found + logger.warning("Enhanced visual self-reflection prompt not found, using fallback") + prompt_template = self._get_fallback_visual_prompt() + + # Format prompt with implementation plan and code + prompt = prompt_template.format( + implementation=implementation_plan or "No implementation plan provided", + code=code + ) + + # Prepare input based on media type and model capabilities + if is_video and isinstance(self.scene_model, (GeminiWrapper, VertexAIWrapper)): + # For video with Gemini/Vertex AI models + messages = [ + {"type": "text", "content": prompt}, + {"type": "video", "content": str(media_path)} + ] + else: + # For images or non-Gemini models + if isinstance(media_path, str): + media = Image.open(media_path) + else: + media = media_path + messages = [ + {"type": "text", "content": prompt}, + {"type": "image", "content": media} + ] + + # Get enhanced VLM analysis response + response_text = self.scene_model( + messages, + metadata={ + "generation_name": "enhanced_visual_self_reflection", + "trace_id": scene_trace_id, + "tags": [topic, f"scene{scene_number}", "visual_error_detection"], + "session_id": session_id + } + ) + + # Parse response for visual analysis results + if "" in response_text or response_text.strip() == "": + logger.info(f"Enhanced visual analysis passed for {topic} scene {scene_number}") + return code, response_text + + # Extract improved code if visual issues were found + fixed_code = self._extract_visual_fix_code(response_text, scene_trace_id, session_id) + + logger.info(f"Enhanced visual self-reflection completed with fixes for {topic} scene {scene_number}") + return fixed_code, response_text + + except Exception as e: + logger.error(f"Error in enhanced visual self-reflection for {topic} scene {scene_number}: {e}") + # Fallback to original visual_self_reflection if enhanced version fails + logger.info("Falling back to original visual_self_reflection method") + return self.visual_self_reflection( + code, media_path, scene_trace_id, topic, scene_number, session_id + ) + + def _extract_visual_fix_code( + self, + response_text: str, + scene_trace_id: Optional[str] = None, + session_id: Optional[str] = None + ) -> str: + """Extract code from enhanced visual analysis response. + + Args: + response_text: The VLM response containing visual analysis + scene_trace_id: Trace identifier + session_id: Session identifier + + Returns: + The extracted and fixed code + + Raises: + ValueError: If code extraction fails + """ + # Try to extract code from tags first + improved_code_pattern = r'\s*```python\s*(.*?)\s*```\s*' + code_match = re.search(improved_code_pattern, response_text, re.DOTALL) + + if code_match: + extracted_code = code_match.group(1).strip() + logger.debug("Successfully extracted code from tags") + return extracted_code + + # Fallback to standard code extraction + return self._extract_code_with_retries( + response_text, + CODE_PATTERN, + generation_name="enhanced_visual_fix", + trace_id=scene_trace_id, + session_id=session_id + ) + + def _get_fallback_visual_prompt(self) -> str: + """Get fallback visual analysis prompt if enhanced version is not available.""" + return """ + Analyze the visual output and the provided code for the following issues: + + 1. **Element Overlap:** Check for overlapping text, shapes, or mathematical expressions + 2. **Out-of-Bounds Objects:** Identify elements outside the visible frame + 3. **Spacing Issues:** Verify minimum 0.3 unit spacing between elements + 4. **Safe Area Compliance:** Ensure 0.5 unit margins from frame edges + 5. **Educational Clarity:** Assess if arrangement supports learning objectives + + Implementation Plan: {implementation} + + Code to analyze: + {code} + + If issues are found, provide fixed code. If no issues, return "". + + + ```python + [Fixed code here] + ``` + + """ + + def detect_visual_errors( + self, + media_path: Union[str, Image.Image], + scene_trace_id: Optional[str] = None, + topic: Optional[str] = None, + scene_number: Optional[int] = None, + session_id: Optional[str] = None + ) -> Dict[str, Any]: + """Detect visual errors using VLM without code modification. + + This method provides detailed visual error analysis without attempting to fix code. + Useful for validation and quality assessment. + + Args: + media_path: Path to media file or PIL Image + scene_trace_id: Trace identifier + topic: Topic of the scene + scene_number: Scene number + session_id: Session identifier + + Returns: + Dictionary containing visual error analysis results + + Raises: + ValueError: If visual error detection fails + FileNotFoundError: If media file doesn't exist + """ + try: + # Validate media input + if isinstance(media_path, str): + media_file = Path(media_path) + if not media_file.exists(): + raise FileNotFoundError(f"Media file not found: {media_path}") + + # Create analysis prompt + analysis_prompt = """ + You are an expert visual quality analyst. Analyze this Manim-generated frame/video for: + + 1. **Element Overlap Detection:** + - Text overlapping with shapes or other text + - Mathematical expressions colliding + - Unintentional object occlusion + + 2. **Spatial Boundary Issues:** + - Objects extending beyond frame boundaries + - Violations of safe area margins (0.5 units from edges) + - Insufficient spacing between elements (minimum 0.3 units) + + 3. **Visual Quality Assessment:** + - Overall composition balance + - Readability of text elements + - Educational effectiveness of arrangement + + Provide your analysis in the following format: + + **VISUAL ERROR ANALYSIS:** + - Overlap Issues: [List any overlapping elements] + - Boundary Violations: [List out-of-bounds elements] + - Spacing Problems: [List spacing violations] + - Quality Issues: [List other visual problems] + + **SEVERITY ASSESSMENT:** + - Critical Errors: [Issues that severely impact readability] + - Major Errors: [Issues that noticeably reduce quality] + - Minor Errors: [Issues that slightly affect visual appeal] + + **OVERALL RATING:** [Excellent/Good/Fair/Poor] + """ + + # Determine media type and prepare input + is_video = isinstance(media_path, str) and media_path.lower().endswith('.mp4') + + if is_video and isinstance(self.scene_model, (GeminiWrapper, VertexAIWrapper)): + messages = [ + {"type": "text", "content": analysis_prompt}, + {"type": "video", "content": str(media_path)} + ] + else: + if isinstance(media_path, str): + media = Image.open(media_path) + else: + media = media_path + messages = [ + {"type": "text", "content": analysis_prompt}, + {"type": "image", "content": media} + ] + + # Get analysis response + response_text = self.scene_model( + messages, + metadata={ + "generation_name": "visual_error_detection", + "trace_id": scene_trace_id, + "tags": [topic or "unknown", f"scene{scene_number or 0}", "quality_analysis"], + "session_id": session_id or self.session_id + } + ) + + # Parse response into structured results + analysis_results = self._parse_visual_analysis(response_text) + + logger.info(f"Visual error detection completed for scene {scene_number or 'unknown'}") + return analysis_results + + except Exception as e: + logger.error(f"Error in visual error detection: {e}") + raise ValueError(f"Visual error detection failed: {e}") from e + + def _parse_visual_analysis(self, response_text: str) -> Dict[str, Any]: + """Parse visual analysis response into structured data. + + Args: + response_text: Raw response from VLM + + Returns: + Structured analysis results + """ + results = { + "overlap_issues": [], + "boundary_violations": [], + "spacing_problems": [], + "quality_issues": [], + "critical_errors": [], + "major_errors": [], + "minor_errors": [], + "overall_rating": "Unknown", + "raw_analysis": response_text + } + + try: + # Extract different sections using regex patterns + overlap_match = re.search(r'Overlap Issues:\s*(.*?)(?=\n-|\n\*\*|$)', response_text, re.DOTALL) + if overlap_match: + results["overlap_issues"] = [item.strip() for item in overlap_match.group(1).split('\n') if item.strip()] + + boundary_match = re.search(r'Boundary Violations:\s*(.*?)(?=\n-|\n\*\*|$)', response_text, re.DOTALL) + if boundary_match: + results["boundary_violations"] = [item.strip() for item in boundary_match.group(1).split('\n') if item.strip()] + + rating_match = re.search(r'OVERALL RATING.*?:\s*([A-Za-z]+)', response_text) + if rating_match: + results["overall_rating"] = rating_match.group(1) + + except Exception as e: + logger.warning(f"Error parsing visual analysis: {e}") + + return results \ No newline at end of file diff --git a/src/core/parse_video.py b/src/core/parse_video.py new file mode 100644 index 0000000000000000000000000000000000000000..45bea246b2aba3340b8b461c7bf58d0bdbc4a473 --- /dev/null +++ b/src/core/parse_video.py @@ -0,0 +1,227 @@ +import os +import pysrt +from moviepy import VideoFileClip +import shutil +from PIL import Image, ImageOps +import numpy as np +import speech_recognition as sr + +def get_images_from_video(video_path, fps=0.2): + """Extract frames from a video file at specified FPS. + + Args: + video_path (str): Path to the video file. + fps (float, optional): Frames per second to extract. Defaults to 0.2. + + Returns: + list: List of frames as numpy arrays. + """ + clip = VideoFileClip(video_path) + images = clip.iter_frames(fps=fps) + return images + +def image_with_most_non_black_space(images, output_path, return_type="path"): + """Find and save the image with the most non-black space from a list of images. + + Args: + images (list): List of image file paths, PIL Image objects, or numpy arrays. + output_path (str): Path where the output image should be saved. + return_type (str, optional): Type of return value - "path" or "image". Defaults to "path". + + Returns: + Union[str, PIL.Image, None]: Path to saved image, PIL Image object, or None if no valid image found. + """ + max_non_black_area = 0 + image_with_max_non_black_space = None + + for img in images: + try: + # If img is a path, open the image + if isinstance(img, str): + image = Image.open(img) + elif isinstance(img, Image.Image): + image = img + elif isinstance(img, np.ndarray): + image = Image.fromarray(img) + else: + print(f"Unsupported type: {type(img)}. Skipping.") + continue + + # Convert to grayscale + gray = ImageOps.grayscale(image) + + # Convert to numpy array + gray_array = np.array(gray) + + # Count non-black pixels (threshold to consider near-black as black) + non_black_pixels = np.sum(gray_array > 10) # Threshold 10 to account for slight variations in black + + if non_black_pixels > max_non_black_area: + max_non_black_area = non_black_pixels + image_with_max_non_black_space = image + + except Exception as e: + print(f"Warning: Unable to process image {img}: {e}") + + if image_with_max_non_black_space is not None: + image_with_max_non_black_space.save(output_path) + print(f"Saved image with most non-black space to {output_path}") + + if return_type == "path": + return output_path + else: + return image_with_max_non_black_space + return image_with_max_non_black_space + +def parse_srt_to_text(output_dir, topic_name): + """Convert SRT subtitle file to plain text. + + Args: + output_dir (str): Directory containing the topic folders. + topic_name (str): Name of the topic/video. + """ + topic_name = topic_name.replace(" ", "_").lower() + srt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.srt") + txt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.txt") + subs = pysrt.open(srt_path) + + with open(txt_path, 'w') as f: + full_text = "" + for sub in subs: + sub.text = sub.text.replace("...", ".") + full_text += sub.text + " " + f.write(full_text.strip()) + +def parse_srt_and_extract_frames(output_dir, topic_name): + """Extract frames from video at subtitle timestamps and save with corresponding text. + + Args: + output_dir (str): Directory containing the topic folders. + topic_name (str): Name of the topic/video. + """ + topic_name = topic_name.replace(" ", "_").lower() + video_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.mp4") + srt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.srt") + subs = pysrt.open(srt_path) + + # Create extract_images folder if it doesn't exist + images_dir = os.path.join(output_dir, topic_name, "extract_images") + if os.path.exists(images_dir): + shutil.rmtree(images_dir) + os.makedirs(images_dir, exist_ok=True) + + # Load the video file + video = VideoFileClip(video_path) + + # Dictionary to store image-text pairs + pairs = {} + + i = 0 + while i < len(subs): + sub = subs[i] + text = sub.text + sub_indexes = [sub.index] + + # Check if we need to concatenate with next subtitle + while i < len(subs) - 1 and not text.strip().endswith('.'): + i += 1 + next_sub = subs[i] + text += " " + next_sub.text + sub_indexes.append(next_sub.index) + + # Get the end time of the last concatenated subtitle + end_time = sub.end.to_time() + # Convert end time to seconds + end_time_seconds = end_time.hour * 3600 + end_time.minute * 60 + end_time.second + end_time.microsecond / 1e6 + + # Save the frame as an image in extract_images folder + frame_path = os.path.join(images_dir, f"{sub.index}.jpg") + video.save_frame(frame_path, t=end_time_seconds) + + # Save the subtitle text to a txt file + text_path = os.path.join(images_dir, f"{sub.index}.txt") + with open(text_path, 'w') as f: + f.write(text) + + # Add pair to dictionary + pairs[str(sub.index)] = { + "image_path": f"{sub.index}.jpg", + "text": text, + "text_path": f"{sub.index}.txt", + "srt_index": sub_indexes, + } + + i += 1 + + # Save pairs to json file + import json + json_path = os.path.join(images_dir, "pairs.json") + with open(json_path, 'w') as f: + json.dump(pairs, f, indent=4) + + # Close the video file + video.close() + +def extract_trasnscript(video_path): + """Extract transcript from video audio using Google Speech Recognition. + + Args: + video_path (str): Path to the video file. + + Returns: + str: Transcribed text from the video audio. + + Raises: + FileNotFoundError: If video file does not exist. + """ + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found: {video_path}") + + clip = VideoFileClip(video_path) + + # write the video to a temporary audio file + audio_path = os.path.join(os.path.dirname(video_path), "audio.wav") + clip.audio.write_audiofile(audio_path) + + try: + # extract the subtitles from the audio file + recognizer = sr.Recognizer() + with sr.AudioFile(audio_path) as source: + audio = recognizer.record(source) + return recognizer.recognize_google(audio) + finally: + # clean up the temporary audio file + if os.path.exists(audio_path): + os.remove(audio_path) + +if __name__ == "__main__": + import argparse + + def process_all_topics(output_folder): + """Process all topic folders in the output directory. + + Args: + output_folder (str): Directory containing the topic folders. + """ + # Only get immediate subdirectories + topics = [d for d in os.listdir(output_folder) + if os.path.isdir(os.path.join(output_folder, d))] + + for topic in topics: + print(f"\nProcessing topic: {topic}") + try: + parse_srt_to_text(output_folder, topic) + parse_srt_and_extract_frames(output_folder, topic) + except Exception as e: + print(f"Error processing {topic}: {str(e)}") + continue + + # Set up argument parser + parser = argparse.ArgumentParser(description='Process video files and extract frames with subtitles') + parser.add_argument('--output_dir', type=str, default="output", + help='Directory containing the topic folders') + + args = parser.parse_args() + + # Process topics using provided output directory + process_all_topics(args.output_dir) \ No newline at end of file diff --git a/src/core/video_planner.py b/src/core/video_planner.py new file mode 100644 index 0000000000000000000000000000000000000000..244d942984cf74cc25da661b423a626dd6d2e18e --- /dev/null +++ b/src/core/video_planner.py @@ -0,0 +1,670 @@ +import os +import re +import json +import glob +from typing import List, Optional, Dict, Tuple +import uuid +import asyncio +import time +from concurrent.futures import ThreadPoolExecutor +from functools import lru_cache +import aiofiles + +from mllm_tools.utils import _prepare_text_inputs +from src.utils.utils import extract_xml +from task_generator import ( + get_prompt_scene_plan, + get_prompt_scene_vision_storyboard, + get_prompt_scene_technical_implementation, + get_prompt_scene_animation_narration, + get_prompt_context_learning_scene_plan, + get_prompt_context_learning_vision_storyboard, + get_prompt_context_learning_technical_implementation, + get_prompt_context_learning_animation_narration, + get_prompt_context_learning_code +) +from src.rag.rag_integration import RAGIntegration + +class EnhancedVideoPlanner: + """Enhanced video planner with improved parallelization and performance.""" + + def __init__(self, planner_model, helper_model=None, output_dir="output", + print_response=False, use_context_learning=False, + context_learning_path="data/context_learning", use_rag=False, + session_id=None, chroma_db_path="data/rag/chroma_db", + manim_docs_path="data/rag/manim_docs", + embedding_model="text-embedding-ada-002", use_langfuse=True, + max_scene_concurrency=5, max_step_concurrency=3, enable_caching=True): + + self.planner_model = planner_model + self.helper_model = helper_model if helper_model is not None else planner_model + self.output_dir = output_dir + self.print_response = print_response + self.use_context_learning = use_context_learning + self.context_learning_path = context_learning_path + self.use_rag = use_rag + self.session_id = session_id + self.enable_caching = enable_caching + + # Enhanced concurrency control + self.max_scene_concurrency = max_scene_concurrency + self.max_step_concurrency = max_step_concurrency + self.scene_semaphore = asyncio.Semaphore(max_scene_concurrency) + self.step_semaphore = asyncio.Semaphore(max_step_concurrency) + + # Thread pool for I/O operations + self.thread_pool = ThreadPoolExecutor(max_workers=4) + + # Cache for prompts and examples + self._context_cache = {} + self._prompt_cache = {} + + # Initialize context examples with caching + self._initialize_context_examples() + + # Initialize RAG with enhanced settings + self.rag_integration = None + self.relevant_plugins = [] + if use_rag: + self.rag_integration = RAGIntegration( + helper_model=helper_model, + output_dir=output_dir, + chroma_db_path=chroma_db_path, + manim_docs_path=manim_docs_path, + embedding_model=embedding_model, + use_langfuse=use_langfuse, + session_id=session_id + ) + + def _initialize_context_examples(self): + """Initialize and cache context examples for faster access.""" + example_types = [ + 'scene_plan', 'scene_vision_storyboard', 'technical_implementation', + 'scene_animation_narration', 'code' + ] + + if self.use_context_learning: + for example_type in example_types: + self._context_cache[example_type] = self._load_context_examples(example_type) + else: + for example_type in example_types: + self._context_cache[example_type] = None + + @lru_cache(maxsize=128) + def _get_cached_prompt(self, prompt_type: str, *args) -> str: + """Get cached prompt to avoid regeneration.""" + prompt_generators = { + 'scene_plan': get_prompt_scene_plan, + 'scene_vision_storyboard': get_prompt_scene_vision_storyboard, + 'scene_technical_implementation': get_prompt_scene_technical_implementation, + 'scene_animation_narration': get_prompt_scene_animation_narration + } + + generator = prompt_generators.get(prompt_type) + if generator: + return generator(*args) + return "" + + async def _async_file_write(self, file_path: str, content: str): + """Asynchronous file writing for better performance.""" + async with aiofiles.open(file_path, 'w', encoding='utf-8') as f: + await f.write(content) + + async def _async_file_read(self, file_path: str) -> str: + """Asynchronous file reading.""" + try: + async with aiofiles.open(file_path, 'r', encoding='utf-8') as f: + return await f.read() + except FileNotFoundError: + return None + + async def _ensure_directories(self, *paths): + """Asynchronously ensure directories exist.""" + loop = asyncio.get_event_loop() + for path in paths: + await loop.run_in_executor(self.thread_pool, lambda p: os.makedirs(p, exist_ok=True), path) + + def _load_context_examples(self, example_type: str) -> str: + """Load context learning examples with improved performance.""" + if example_type in self._context_cache: + return self._context_cache[example_type] + + examples = [] + file_patterns = { + 'scene_plan': '*_scene_plan.txt', + 'scene_vision_storyboard': '*_scene_vision_storyboard.txt', + 'technical_implementation': '*_technical_implementation.txt', + 'scene_animation_narration': '*_scene_animation_narration.txt', + 'code': '*.py' + } + + pattern = file_patterns.get(example_type) + if not pattern: + return None + + # Use glob for faster file discovery + search_pattern = os.path.join(self.context_learning_path, "**", pattern) + for example_file in glob.glob(search_pattern, recursive=True): + try: + with open(example_file, 'r', encoding='utf-8') as f: + content = f.read() + examples.append(f"# Example from {os.path.basename(example_file)}\n{content}\n") + except Exception as e: + print(f"Warning: Could not load example {example_file}: {e}") + + if examples: + formatted_examples = self._format_examples(example_type, examples) + self._context_cache[example_type] = formatted_examples + return formatted_examples + return None + + def _format_examples(self, example_type: str, examples: List[str]) -> str: + """Format examples using the appropriate template.""" + templates = { + 'scene_plan': get_prompt_context_learning_scene_plan, + 'scene_vision_storyboard': get_prompt_context_learning_vision_storyboard, + 'technical_implementation': get_prompt_context_learning_technical_implementation, + 'scene_animation_narration': get_prompt_context_learning_animation_narration, + 'code': get_prompt_context_learning_code + } + + template = templates.get(example_type) + if template: + return template(examples="\n".join(examples)) + return None + + async def generate_scene_outline(self, topic: str, description: str, session_id: str) -> str: + """Enhanced scene outline generation with async I/O.""" + start_time = time.time() + + # Detect relevant plugins upfront if RAG is enabled + if self.use_rag and self.rag_integration: + plugin_detection_task = asyncio.create_task( + self._detect_plugins_async(topic, description) + ) + + # Prepare prompt with cached examples + prompt = self._get_cached_prompt('scene_plan', topic, description) + + if self.use_context_learning and self._context_cache.get('scene_plan'): + prompt += f"\n\nHere are some example scene plans for reference:\n{self._context_cache['scene_plan']}" + + # Wait for plugin detection if enabled + if self.use_rag and self.rag_integration: + self.relevant_plugins = await plugin_detection_task + print(f"✅ Detected relevant plugins: {self.relevant_plugins}") + + # Generate plan using planner model + response_text = self.planner_model( + _prepare_text_inputs(prompt), + metadata={ + "generation_name": "scene_outline", + "tags": [topic, "scene-outline"], + "session_id": session_id + } + ) + + # Extract scene outline with improved error handling + scene_outline = self._extract_scene_outline_robust(response_text) + + # Async file operations + file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower()) + output_dir = os.path.join(self.output_dir, file_prefix) + await self._ensure_directories(output_dir) + + file_path = os.path.join(output_dir, f"{file_prefix}_scene_outline.txt") + await self._async_file_write(file_path, scene_outline) + + elapsed_time = time.time() - start_time + print(f"Scene outline generated in {elapsed_time:.2f}s - saved to {file_prefix}_scene_outline.txt") + + return scene_outline + + async def _detect_plugins_async(self, topic: str, description: str) -> List[str]: + """Asynchronously detect relevant plugins.""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + self.thread_pool, + lambda: self.rag_integration.detect_relevant_plugins(topic, description) or [] + ) + + async def _generate_scene_step_parallel(self, step_name: str, prompt_func, + scene_trace_id: str, topic: str, + scene_number: int, session_id: str, + output_path: str, *args) -> Tuple[str, str]: + """Generate a single scene step with async operations.""" + async with self.step_semaphore: # Control step-level concurrency + + # Check cache first if enabled + if self.enable_caching: + cached_content = await self._async_file_read(output_path) + if cached_content: + print(f"Using cached {step_name} for scene {scene_number}") + return cached_content, output_path + + print(f"🚀 Generating {step_name} for scene {scene_number}") + start_time = time.time() + + # Generate prompt + prompt = prompt_func(*args) + + # Add context examples if available + example_type = step_name.replace('_plan', '').replace('scene_', '') + if self._context_cache.get(example_type): + prompt += f"\n\nHere are some example {step_name}s:\n{self._context_cache[example_type]}" + + # Add RAG context if enabled + if self.use_rag and self.rag_integration: + rag_queries = await self._generate_rag_queries_async( + step_name, args, scene_trace_id, topic, scene_number, session_id + ) + + if rag_queries: + retrieved_docs = self.rag_integration.get_relevant_docs( + rag_queries=rag_queries, + scene_trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number + ) + prompt += f"\n\n{retrieved_docs}" + + # Generate content + response = self.planner_model( + _prepare_text_inputs(prompt), + metadata={ + "generation_name": step_name, + "trace_id": scene_trace_id, + "tags": [topic, f"scene{scene_number}"], + "session_id": session_id + } + ) + + # Extract content using step-specific patterns + extraction_patterns = { + 'scene_vision_storyboard': r'(.*?)', + 'scene_technical_implementation': r'(.*?)', + 'scene_animation_narration': r'(.*?)' + } + + pattern = extraction_patterns.get(step_name) + if pattern: + match = re.search(pattern, response, re.DOTALL) + content = match.group(1) if match else response + else: + content = response + + # Async file save + await self._async_file_write(output_path, content) + + elapsed_time = time.time() - start_time + print(f"{step_name} for scene {scene_number} completed in {elapsed_time:.2f}s") + + return content, output_path + + async def _generate_rag_queries_async(self, step_name: str, args: tuple, + scene_trace_id: str, topic: str, + scene_number: int, session_id: str) -> List[Dict]: + """Generate RAG queries asynchronously based on step type.""" + query_generators = { + 'scene_vision_storyboard': self.rag_integration._generate_rag_queries_storyboard, + 'scene_technical_implementation': self.rag_integration._generate_rag_queries_technical, + 'scene_animation_narration': self.rag_integration._generate_rag_queries_narration + } + + generator = query_generators.get(step_name) + if not generator: + return [] + + # Map args to appropriate parameters based on step + if step_name == 'scene_vision_storyboard': + scene_plan = args[3] if len(args) > 3 else "" + return generator( + scene_plan=scene_plan, + scene_trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number, + session_id=session_id, + relevant_plugins=self.relevant_plugins + ) + elif step_name == 'scene_technical_implementation': + storyboard = args[4] if len(args) > 4 else "" + return generator( + storyboard=storyboard, + scene_trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number, + session_id=session_id, + relevant_plugins=self.relevant_plugins + ) + elif step_name == 'scene_animation_narration': + storyboard = args[4] if len(args) > 4 else "" + return generator( + storyboard=storyboard, + scene_trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number, + session_id=session_id, + relevant_plugins=self.relevant_plugins + ) + + return [] + + async def _generate_scene_implementation_single_enhanced(self, topic: str, description: str, + scene_outline_i: str, scene_number: int, + file_prefix: str, session_id: str, + scene_trace_id: str) -> str: + """Enhanced single scene implementation with parallel steps.""" + start_time = time.time() + print(f"Starting scene {scene_number} implementation (parallel processing)") + + # Setup directories + scene_dir = os.path.join(self.output_dir, file_prefix, f"scene{scene_number}") + subplan_dir = os.path.join(scene_dir, "subplans") + await self._ensure_directories(scene_dir, subplan_dir) + + # Save scene trace ID + trace_id_file = os.path.join(subplan_dir, "scene_trace_id.txt") + await self._async_file_write(trace_id_file, scene_trace_id) + + # Define all steps with their configurations + steps_config = [ + { + 'name': 'scene_vision_storyboard', + 'prompt_func': get_prompt_scene_vision_storyboard, + 'args': (scene_number, topic, description, scene_outline_i, self.relevant_plugins), + 'output_path': os.path.join(subplan_dir, f"{file_prefix}_scene{scene_number}_vision_storyboard_plan.txt") + } + ] + + # Execute Step 1: Vision Storyboard (sequential dependency) + vision_storyboard_content, _ = await self._generate_scene_step_parallel( + steps_config[0]['name'], + steps_config[0]['prompt_func'], + scene_trace_id, + topic, + scene_number, + session_id, + steps_config[0]['output_path'], + *steps_config[0]['args'] + ) + + # Prepare Step 2 and 3 for parallel execution (both depend on Step 1) + remaining_steps = [ + { + 'name': 'scene_technical_implementation', + 'prompt_func': get_prompt_scene_technical_implementation, + 'args': (scene_number, topic, description, scene_outline_i, vision_storyboard_content, self.relevant_plugins), + 'output_path': os.path.join(subplan_dir, f"{file_prefix}_scene{scene_number}_technical_implementation_plan.txt") + }, + { + 'name': 'scene_animation_narration', + 'prompt_func': get_prompt_scene_animation_narration, + 'args': (scene_number, topic, description, scene_outline_i, vision_storyboard_content, None, self.relevant_plugins), + 'output_path': os.path.join(subplan_dir, f"{file_prefix}_scene{scene_number}_animation_narration_plan.txt") + } + ] + + # Execute Steps 2 and 3 in parallel + parallel_tasks = [] + for step_config in remaining_steps: + task = asyncio.create_task( + self._generate_scene_step_parallel( + step_config['name'], + step_config['prompt_func'], + scene_trace_id, + topic, + scene_number, + session_id, + step_config['output_path'], + *step_config['args'] + ) + ) + parallel_tasks.append(task) + + # Wait for parallel tasks to complete + parallel_results = await asyncio.gather(*parallel_tasks) + technical_implementation_content = parallel_results[0][0] + animation_narration_content = parallel_results[1][0] + + # Update animation narration args with technical implementation and regenerate if needed + if technical_implementation_content: + updated_animation_args = ( + scene_number, topic, description, scene_outline_i, + vision_storyboard_content, technical_implementation_content, self.relevant_plugins + ) + + animation_narration_content, _ = await self._generate_scene_step_parallel( + 'scene_animation_narration', + get_prompt_scene_animation_narration, + scene_trace_id, + topic, + scene_number, + session_id, + remaining_steps[1]['output_path'], + *updated_animation_args + ) + + # Combine all implementation plans + implementation_plan = ( + f"{vision_storyboard_content}\n\n" + f"{technical_implementation_content}\n\n" + f"{animation_narration_content}\n\n" + ) + + # Ensure scene directory exists (just to be extra safe) + scene_dir = os.path.join(self.output_dir, file_prefix, f"scene{scene_number}") + await self._ensure_directories(scene_dir) + + # Save combined implementation plan + combined_plan_path = os.path.join(scene_dir, f"{file_prefix}_scene{scene_number}_implementation_plan.txt") + combined_content = f"# Scene {scene_number} Implementation Plan\n\n{implementation_plan}" + + try: + await self._async_file_write(combined_plan_path, combined_content) + print(f"✅ Saved implementation plan for scene {scene_number} to: {combined_plan_path}") + except Exception as e: + print(f"❌ Error saving implementation plan for scene {scene_number}: {e}") + raise + + elapsed_time = time.time() - start_time + print(f"Scene {scene_number} implementation completed in {elapsed_time:.2f}s") + + return implementation_plan + + async def generate_scene_implementation_concurrently_enhanced(self, topic: str, description: str, + plan: str, session_id: str) -> List[str]: + """Enhanced concurrent scene implementation with better performance.""" + start_time = time.time() + + # Extract scene information + scene_outline = extract_xml(plan) + scene_number = len(re.findall(r'[^<]', scene_outline)) + file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower()) + + print(f"Starting implementation generation for {scene_number} scenes with max concurrency: {self.max_scene_concurrency}") + + async def generate_single_scene_implementation(i): + async with self.scene_semaphore: # Control scene-level concurrency + scene_regex = r'(.*?)'.format(i) + scene_match = re.search( + scene_regex, + scene_outline, + re.DOTALL + ) + if not scene_match: + print(f"❌ Error: Could not find scene {i} in scene outline. Regex pattern: {scene_regex}") + raise ValueError(f"Scene {i} not found in scene outline") + scene_outline_i = scene_match.group(1) + scene_trace_id = str(uuid.uuid4()) + + return await self._generate_scene_implementation_single_enhanced( + topic, description, scene_outline_i, i, file_prefix, session_id, scene_trace_id + ) + + # Create tasks for all scenes + tasks = [generate_single_scene_implementation(i + 1) for i in range(scene_number)] + + # Execute with progress tracking + print(f"Executing {len(tasks)} scene implementation tasks...") + try: + all_scene_implementation_plans = await asyncio.gather(*tasks, return_exceptions=True) + + # Handle any exceptions + successful_plans = [] + error_count = 0 + for i, result in enumerate(all_scene_implementation_plans): + if isinstance(result, Exception): + print(f"❌ Error in scene {i+1}: {result}") + error_message = f"# Scene {i+1} - Error: {result}" + successful_plans.append(error_message) + + # Write error to file to maintain file structure even on failure + scene_dir = os.path.join(self.output_dir, file_prefix, f"scene{i+1}") + os.makedirs(scene_dir, exist_ok=True) + error_file_path = os.path.join(scene_dir, f"{file_prefix}_scene{i+1}_implementation_plan.txt") + try: + with open(error_file_path, 'w') as f: + f.write(error_message) + except Exception as e: + print(f"❌ Failed to write error file for scene {i+1}: {e}") + + error_count += 1 + else: + successful_plans.append(result) + print(f"✅ Successfully generated implementation plan for scene {i+1}") + + total_time = time.time() - start_time + print(f"All scene implementations completed in {total_time:.2f}s") + print(f" Average time per scene: {total_time/len(tasks):.2f}s") + print(f" Success rate: {len(tasks) - error_count}/{len(tasks)} scenes ({(len(tasks) - error_count) / len(tasks) * 100:.1f}%)") + + if error_count > 0: + print(f"⚠️ Warning: {error_count} scenes had errors during implementation plan generation") + + except Exception as e: + print(f"❌ Fatal error during scene implementation tasks: {e}") + raise + + return successful_plans + + async def __aenter__(self): + """Async context manager entry.""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Async context manager exit - cleanup resources.""" + self.thread_pool.shutdown(wait=True) + + # Legacy method compatibility + async def generate_scene_implementation_concurrently(self, topic: str, description: str, + plan: str, session_id: str, + scene_semaphore=None) -> List[str]: + """Legacy compatibility method - redirects to enhanced version.""" + if scene_semaphore: + self.scene_semaphore = scene_semaphore + return await self.generate_scene_implementation_concurrently_enhanced( + topic, description, plan, session_id + ) + + def _extract_scene_outline_robust(self, response_text: str) -> str: + """ + Robust extraction of scene outline that handles various XML format issues. + + This method addresses common problems: + 1. XML wrapped in markdown code blocks + 2. Missing closing tags + 3. Malformed XML structure + 4. Extra text before/after XML + """ + import re + + # First try: Look for XML wrapped in markdown code blocks + markdown_xml_pattern = r'```xml\s*\n(.*?)\s*\n```' + markdown_match = re.search(markdown_xml_pattern, response_text, re.DOTALL) + if markdown_match: + xml_content = markdown_match.group(1) + return self._validate_and_fix_xml(xml_content) + + # Second try: Look for direct XML tags + direct_xml_pattern = r'(.*?)' + direct_match = re.search(direct_xml_pattern, response_text, re.DOTALL) + if direct_match: + xml_content = direct_match.group(1) + return self._validate_and_fix_xml(xml_content) + + # Third try: Look for incomplete XML and attempt to fix + incomplete_pattern = r'(.*?)(?:|$)' + incomplete_match = re.search(incomplete_pattern, response_text, re.DOTALL) + if incomplete_match: + xml_content = incomplete_match.group(1) + # Add missing closing tag if needed + full_xml = f"{xml_content}" + return self._validate_and_fix_xml(full_xml) + + # If no XML structure found, return the entire response but warn + print("⚠️ Warning: No valid XML structure found in LLM response. Using full response.") + print("Response preview:", response_text[:200] + "..." if len(response_text) > 200 else response_text) + return response_text + + def _validate_and_fix_xml(self, xml_content: str) -> str: + """ + Validate and fix common XML issues in scene outlines. + """ + import re + + # Check for unclosed scene tags + scene_pattern = r'' + scene_matches = re.findall(scene_pattern, xml_content) + + fixed_content = xml_content + + for scene_num in scene_matches: + # Check if this scene has a proper closing tag + open_tag = f"" + close_tag = f"" + + # Find the position of this scene's opening tag + open_pos = fixed_content.find(open_tag) + if open_pos == -1: + continue + + # Find the next scene's opening tag (if any) + next_scene_pattern = f"" + next_scene_pos = fixed_content.find(next_scene_pattern, open_pos) + + # Check if there's a closing tag before the next scene + close_pos = fixed_content.find(close_tag, open_pos) + + if close_pos == -1 or (next_scene_pos != -1 and close_pos > next_scene_pos): + # Missing or misplaced closing tag + if next_scene_pos != -1: + # Insert closing tag before next scene + insert_pos = next_scene_pos + while insert_pos > 0 and fixed_content[insert_pos - 1] in ' \n\t': + insert_pos -= 1 + fixed_content = (fixed_content[:insert_pos] + + f"\n {close_tag}\n\n " + + fixed_content[insert_pos:]) + else: + # Insert closing tag at the end + end_outline_pos = fixed_content.find("") + if end_outline_pos != -1: + fixed_content = (fixed_content[:end_outline_pos] + + f"\n {close_tag}\n" + + fixed_content[end_outline_pos:]) + else: + fixed_content += f"\n {close_tag}" + + print(f"🔧 Fixed missing closing tag for SCENE_{scene_num}") + + # Ensure proper SCENE_OUTLINE structure + if not fixed_content.strip().startswith(""): + fixed_content = f"\n{fixed_content}" + + if not fixed_content.strip().endswith(""): + fixed_content = f"{fixed_content}\n" + + return fixed_content + +# Update class alias for backward compatibility +VideoPlanner = EnhancedVideoPlanner \ No newline at end of file diff --git a/src/core/video_renderer.py b/src/core/video_renderer.py new file mode 100644 index 0000000000000000000000000000000000000000..214fb8ddc4c47c60443c165e6cbba8037cd6af10 --- /dev/null +++ b/src/core/video_renderer.py @@ -0,0 +1,1048 @@ +import os +import re +import subprocess +import asyncio +import concurrent.futures +from PIL import Image +from typing import Optional, List, Union, Dict +import traceback +import sys +import time +import json +import hashlib +from pathlib import Path +import shutil +import tempfile + +try: + import ffmpeg +except ImportError: + print("Warning: ffmpeg-python not installed. Video combination features will be limited.") + ffmpeg = None + +from src.core.parse_video import ( + get_images_from_video, + image_with_most_non_black_space +) + + +class OptimizedVideoRenderer: + """Enhanced video renderer with significant performance optimizations.""" + + def __init__(self, output_dir="output", print_response=False, use_visual_fix_code=False, + max_concurrent_renders=4, enable_caching=True, default_quality="medium", + use_gpu_acceleration=False, preview_mode=False): + """Initialize the enhanced VideoRenderer. + + Args: + output_dir (str): Directory for output files + print_response (bool): Whether to print responses + use_visual_fix_code (bool): Whether to use visual fix code + max_concurrent_renders (int): Maximum concurrent render processes + enable_caching (bool): Enable intelligent caching system + default_quality (str): Default render quality (low/medium/high/preview) + use_gpu_acceleration (bool): Use GPU acceleration if available + preview_mode (bool): Enable preview mode for faster development + """ + self.output_dir = output_dir + self.print_response = print_response + self.use_visual_fix_code = use_visual_fix_code + self.max_concurrent_renders = max_concurrent_renders + self.enable_caching = enable_caching + self.default_quality = default_quality + self.use_gpu_acceleration = use_gpu_acceleration + self.preview_mode = preview_mode + + # Performance monitoring + self.render_stats = { + 'total_renders': 0, + 'cache_hits': 0, + 'total_time': 0, + 'average_time': 0 + } + + # Quality presets for faster rendering + self.quality_presets = { + 'preview': {'flag': '-ql', 'fps': 15, 'resolution': '480p'}, + 'low': {'flag': '-ql', 'fps': 15, 'resolution': '480p'}, + 'medium': {'flag': '-qm', 'fps': 30, 'resolution': '720p'}, + 'high': {'flag': '-qh', 'fps': 60, 'resolution': '1080p'}, + 'production': {'flag': '-qp', 'fps': 60, 'resolution': '1440p'} + } + + # Cache directory for rendered scenes + self.cache_dir = os.path.join(output_dir, '.render_cache') + if enable_caching: + os.makedirs(self.cache_dir, exist_ok=True) + + # Thread pool for concurrent operations + self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_concurrent_renders) + + def _get_code_hash(self, code: str) -> str: + """Generate hash for code to enable caching.""" + return hashlib.md5(code.encode()).hexdigest() + + def _get_cache_path(self, code_hash: str, quality: str) -> str: + """Get cache file path for given code hash and quality.""" + return os.path.join(self.cache_dir, f"{code_hash}_{quality}.mp4") + + def _is_cached(self, code: str, quality: str) -> Optional[str]: + """Check if rendered video exists in cache.""" + if not self.enable_caching: + return None + + code_hash = self._get_code_hash(code) + cache_path = self._get_cache_path(code_hash, quality) + + if os.path.exists(cache_path): + print(f"Cache hit for code hash {code_hash[:8]}...") + self.render_stats['cache_hits'] += 1 + return cache_path + return None + + def _save_to_cache(self, code: str, quality: str, video_path: str): + """Save rendered video to cache.""" + if not self.enable_caching or not os.path.exists(video_path): + return + + code_hash = self._get_code_hash(code) + cache_path = self._get_cache_path(code_hash, quality) + + try: + shutil.copy2(video_path, cache_path) + print(f"Cached render for hash {code_hash[:8]}...") + except Exception as e: + print(f"Warning: Could not cache render: {e}") + + async def render_scene_optimized(self, code: str, file_prefix: str, curr_scene: int, + curr_version: int, code_dir: str, media_dir: str, + quality: str = None, max_retries: int = 3, + use_visual_fix_code=False, visual_self_reflection_func=None, + banned_reasonings=None, scene_trace_id=None, topic=None, + session_id=None, code_generator=None, + scene_implementation=None, description=None, + scene_outline=None) -> tuple: + """Optimized scene rendering with intelligent error handling and code generation fixes.""" + + start_time = time.time() + quality = quality or self.default_quality + current_code = code + + # Check cache first + cached_video = self._is_cached(current_code, quality) + if cached_video: + # Copy cached video to expected location + expected_path = self._get_expected_video_path(file_prefix, curr_scene, curr_version, media_dir) + os.makedirs(os.path.dirname(expected_path), exist_ok=True) + shutil.copy2(cached_video, expected_path) + + elapsed = time.time() - start_time + print(f"Scene {curr_scene} rendered from cache in {elapsed:.2f}s") + return current_code, None + + # Optimize manim command for speed + file_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py") + + # Write optimized code file + await self._write_code_file_async(file_path, current_code) + + # Build optimized manim command + manim_cmd = self._build_optimized_command(file_path, media_dir, quality) + + retries = 0 + while retries < max_retries: + try: + print(f"🎬 Rendering scene {curr_scene} (quality: {quality}, attempt: {retries + 1})") + + # Execute manim with optimizations + result = await asyncio.to_thread( + self._run_manim_optimized, + manim_cmd, + file_path + ) + + if result.returncode != 0: + raise Exception(result.stderr) + + # Find the rendered video + video_path = self._find_rendered_video(file_prefix, curr_scene, curr_version, media_dir) + + # Save to cache + self._save_to_cache(current_code, quality, video_path) + + # Visual fix code processing + if use_visual_fix_code and visual_self_reflection_func and banned_reasonings: + current_code = await self._process_visual_fix( + current_code, video_path, file_prefix, curr_scene, curr_version, + code_dir, visual_self_reflection_func, banned_reasonings, + scene_trace_id, topic, session_id + ) + + elapsed = time.time() - start_time + self.render_stats['total_renders'] += 1 + self.render_stats['total_time'] += elapsed + self.render_stats['average_time'] = self.render_stats['total_time'] / self.render_stats['total_renders'] + + print(f"Scene {curr_scene} rendered successfully in {elapsed:.2f}s") + print(f"Average render time: {self.render_stats['average_time']:.2f}s") + + return current_code, None + + except Exception as e: + print(f"Render attempt {retries + 1} failed: {e}") + + # Save error log + error_log_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_error_{retries}.log") + await self._write_error_log_async(error_log_path, str(e), retries) + + # Instead of blind retry, try to fix the code if we have a code generator + if code_generator and scene_implementation and retries < max_retries - 1: + print(f"🔧 Attempting to fix code using CodeGenerator (attempt {retries + 1})") + try: + fixed_code, fix_log = code_generator.fix_code_errors( + implementation_plan=scene_implementation, + code=current_code, + error=str(e), + scene_trace_id=scene_trace_id, + topic=topic, + scene_number=curr_scene, + session_id=session_id + ) + + if fixed_code and fixed_code != current_code: + print(f"✨ Code fix generated, updating for next attempt") + current_code = fixed_code + curr_version += 1 + + # Update file path and write fixed code + file_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py") + await self._write_code_file_async(file_path, current_code) + + # Update manim command for new file + manim_cmd = self._build_optimized_command(file_path, media_dir, quality) + + # Log the fix + fix_log_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_fix_log.txt") + await self._write_error_log_async(fix_log_path, fix_log or "Code fix applied", 0) + else: + print(f"⚠️ Code generator returned same or empty code, doing standard retry") + except Exception as fix_error: + print(f"❌ Code fix attempt failed: {fix_error}") + # Fall back to standard retry behavior + + retries += 1 + if retries < max_retries: + await asyncio.sleep(1) # Brief delay before retry + else: + return current_code, str(e) + + return current_code, f"Failed after {max_retries} attempts" + + def _build_optimized_command(self, file_path: str, media_dir: str, quality: str) -> List[str]: + """Build optimized manim command with performance flags.""" + quality_preset = self.quality_presets.get(quality, self.quality_presets['medium']) + + cmd = [ + "manim", + "render", + quality_preset['flag'], # Quality setting + file_path, + "--media_dir", media_dir, + "--fps", str(quality_preset['fps']) + ] + + # Add caching option (only disable if needed) + if not self.enable_caching: + cmd.append("--disable_caching") + + # Add GPU acceleration if available and enabled + if self.use_gpu_acceleration: + cmd.extend(["--renderer", "opengl"]) + + # Preview mode optimizations + if self.preview_mode or quality == 'preview': + cmd.extend([ + "--save_last_frame", # Only render final frame for quick preview + "--write_to_movie" # Skip unnecessary file operations + ]) + + return cmd + + def _run_manim_optimized(self, cmd: List[str], file_path: str) -> subprocess.CompletedProcess: + """Run manim command with optimizations.""" + env = os.environ.copy() + + # Optimize environment for performance + env.update({ + 'MANIM_DISABLE_CACHING': 'false' if self.enable_caching else 'true', + 'MANIM_VERBOSITY': 'WARNING', # Reduce log verbosity + 'OMP_NUM_THREADS': str(os.cpu_count()), # Use all CPU cores + 'MANIM_RENDERER_TIMEOUT': '300' # 5 minute timeout + }) + + return subprocess.run( + cmd, + capture_output=True, + text=True, + env=env, + timeout=300 # 5 minute timeout + ) + + async def _write_code_file_async(self, file_path: str, code: str): + """Asynchronously write code file.""" + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + # Add optimization hints to the code + optimized_code = self._optimize_code_for_rendering(code) + + with open(file_path, 'w', encoding='utf-8') as f: + f.write(optimized_code) + + def _optimize_code_for_rendering(self, code: str) -> str: + """Add optimization hints to Manim code.""" + optimizations = [ + "", + "# Manim rendering optimizations", + "from manim import config", + "config.frame_rate = 30 # Balanced frame rate", + "config.pixel_height = 720 # Optimized resolution", + "config.pixel_width = 1280", + "" + ] + + # Find the end of manim imports specifically + lines = code.split('\n') + manim_import_end = 0 + + for i, line in enumerate(lines): + # Look for manim-related imports + if (line.strip().startswith('from manim') or + line.strip().startswith('import manim') or + line.strip().startswith('from manim_')): + manim_import_end = i + 1 + + # If no manim imports found, look for the end of all imports + if manim_import_end == 0: + for i, line in enumerate(lines): + if (line.strip().startswith(('from ', 'import ')) and + not line.strip().startswith('#')): + manim_import_end = i + 1 + + # Insert optimization code after manim imports + lines[manim_import_end:manim_import_end] = optimizations + + return '\n'.join(lines) + + async def _write_error_log_async(self, file_path: str, error: str, attempt: int): + """Asynchronously write error log.""" + timestamp = time.strftime('%Y-%m-%d %H:%M:%S') + log_content = f"[{timestamp}] Attempt {attempt + 1}: {error}\n" + + with open(file_path, 'a', encoding='utf-8') as f: + f.write(log_content) + + def _get_expected_video_path(self, file_prefix: str, scene: int, version: int, media_dir: str) -> str: + """Get expected path for rendered video.""" + return os.path.join( + media_dir, "videos", f"{file_prefix}_scene{scene}_v{version}", + "1080p60", f"{file_prefix}_scene{scene}_v{version}.mp4" + ) + + def _find_rendered_video(self, file_prefix: str, scene: int, version: int, media_dir: str) -> str: + """Find the rendered video file.""" + video_dir = os.path.join(media_dir, "videos", f"{file_prefix}_scene{scene}_v{version}") + + # Look in quality-specific subdirectories + for quality_dir in ["1080p60", "720p30", "480p15"]: + search_dir = os.path.join(video_dir, quality_dir) + if os.path.exists(search_dir): + for file in os.listdir(search_dir): + if file.endswith('.mp4'): + return os.path.join(search_dir, file) + + raise FileNotFoundError(f"No rendered video found for scene {scene} version {version}") + + async def _process_visual_fix(self, code: str, video_path: str, file_prefix: str, + scene: int, version: int, code_dir: str, + visual_self_reflection_func, banned_reasonings: List[str], + scene_trace_id: str, topic: str, session_id: str) -> str: + """Process visual fix code with optimization.""" + + # For Gemini/Vertex AI models, pass the video directly + if hasattr(self, 'scene_model') and self.scene_model.model_name.startswith(('gemini/', 'vertex_ai/')): + media_input = video_path + else: + # For other models, create optimized snapshot + media_input = await self._create_optimized_snapshot(topic, scene, version) + + new_code, log = visual_self_reflection_func( + code, media_input, scene_trace_id=scene_trace_id, + topic=topic, scene_number=scene, session_id=session_id + ) + + # Save visual fix log + log_path = os.path.join(code_dir, f"{file_prefix}_scene{scene}_v{version}_vfix_log.txt") + await self._write_error_log_async(log_path, log, 0) + + # Check for termination markers + if "" in new_code or any(word in new_code for word in banned_reasonings): + return code + + # Save updated code + new_version = version + 1 + new_code_path = os.path.join(code_dir, f"{file_prefix}_scene{scene}_v{new_version}.py") + await self._write_code_file_async(new_code_path, new_code) + print(f"Visual fix code saved to scene{scene}/code/{file_prefix}_scene{scene}_v{new_version}.py") + + return new_code + + async def render_multiple_scenes_parallel(self, scene_configs: List[Dict], + max_concurrent: int = None) -> List[tuple]: + """Render multiple scenes in parallel with optimized resource management.""" + + max_concurrent = max_concurrent or self.max_concurrent_renders + print(f"Starting parallel rendering of {len(scene_configs)} scenes (max concurrent: {max_concurrent})") + + semaphore = asyncio.Semaphore(max_concurrent) + + async def render_single_scene(config): + async with semaphore: + return await self.render_scene_optimized(**config) + + start_time = time.time() + + # Execute all renders concurrently + tasks = [render_single_scene(config) for config in scene_configs] + results = await asyncio.gather(*tasks, return_exceptions=True) + + elapsed = time.time() - start_time + successful = sum(1 for r in results if not isinstance(r, Exception) and r[1] is None) + + print(f"Parallel rendering completed in {elapsed:.2f}s") + print(f"Success rate: {successful}/{len(scene_configs)} scenes") + print(f"Cache hit rate: {self.render_stats['cache_hits']}/{self.render_stats['total_renders']} ({self.render_stats['cache_hits']/max(1,self.render_stats['total_renders'])*100:.1f}%)") + + return results + + async def _create_optimized_snapshot(self, topic: str, scene_number: int, + version_number: int) -> Image.Image: + """Create optimized snapshot with async processing.""" + file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower()) + video_folder_path = os.path.join( + self.output_dir, file_prefix, "media", "videos", + f"{file_prefix}_scene{scene_number}_v{version_number}", "1080p60" + ) + + # Find video file + video_files = [f for f in os.listdir(video_folder_path) if f.endswith('.mp4')] + if not video_files: + raise FileNotFoundError(f"No mp4 files found in {video_folder_path}") + + video_path = os.path.join(video_folder_path, video_files[0]) + + # Create snapshot asynchronously + return await asyncio.to_thread( + lambda: image_with_most_non_black_space( + get_images_from_video(video_path), + return_type="image" + ) + ) + + async def combine_videos_optimized(self, topic: str, use_hardware_acceleration: bool = False) -> str: + """Optimized video combination with hardware acceleration and parallel processing.""" + + start_time = time.time() + file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower()) + + print(f"🎬 Starting optimized video combination for topic: {topic}") + print(f"🖥️ GPU Acceleration: {'Enabled' if use_hardware_acceleration else 'Disabled (CPU only)'}") + + # Prepare paths + video_output_dir = os.path.join(self.output_dir, file_prefix) + output_video_path = os.path.join(video_output_dir, f"{file_prefix}_combined.mp4") + output_srt_path = os.path.join(video_output_dir, f"{file_prefix}_combined.srt") + + # Check if already exists + if os.path.exists(output_video_path): + print(f"Combined video already exists at {output_video_path}") + return output_video_path + + # Get scene information + scene_videos, scene_subtitles = await self._gather_scene_files_async(file_prefix) + + if not scene_videos: + raise ValueError("No scene videos found to combine") + + print(f"📹 Found {len(scene_videos)} scene videos to combine") + + try: + if ffmpeg is None: + print("⚠️ ffmpeg-python not available, using direct FFmpeg fallback...") + fallback_output = await self._fallback_video_combination(scene_videos, output_video_path) + print(f"✅ Direct FFmpeg combination successful: {fallback_output}") + return fallback_output + + # Analyze videos in parallel + print("🔍 Analyzing video properties...") + analysis_tasks = [ + asyncio.to_thread(self._analyze_video, video) + for video in scene_videos + ] + video_info = await asyncio.gather(*analysis_tasks) + + has_audio = [info['has_audio'] for info in video_info] + print(f"🎵 Audio tracks found: {sum(has_audio)}/{len(scene_videos)} videos") + + # Build optimized ffmpeg command + if any(has_audio): + print("🎵 Combining videos with audio tracks...") + await self._combine_with_audio_optimized( + scene_videos, video_info, output_video_path, use_hardware_acceleration + ) + else: + print("🔇 Combining videos without audio...") + await self._combine_without_audio_optimized( + scene_videos, output_video_path, use_hardware_acceleration + ) + + # Verify the output file was created and is valid + if not os.path.exists(output_video_path): + raise FileNotFoundError(f"Output video was not created: {output_video_path}") + + # Check if the video file is valid + file_size = os.path.getsize(output_video_path) + if file_size < 1024: # Less than 1KB is probably invalid + raise ValueError(f"Output video file seems invalid (size: {file_size} bytes)") + + print(f"✅ Video file created successfully (size: {file_size / (1024*1024):.2f} MB)") + + # Combine subtitles if available + if scene_subtitles: + print("📝 Combining subtitles...") + await self._combine_subtitles_async(scene_subtitles, scene_videos, output_srt_path) + + elapsed = time.time() - start_time + print(f"🎉 Video combination completed in {elapsed:.2f}s") + print(f"📁 Output: {output_video_path}") + + return output_video_path + + except Exception as e: + print(f"❌ Error in optimized video combination: {e}") + print("🔧 Attempting fallback video combination...") + + # Fallback to simple concatenation + try: + fallback_output = await self._fallback_video_combination(scene_videos, output_video_path) + print(f"✅ Fallback combination successful: {fallback_output}") + return fallback_output + except Exception as fallback_error: + print(f"❌ Fallback combination also failed: {fallback_error}") + traceback.print_exc() + raise + + async def _gather_scene_files_async(self, file_prefix: str) -> tuple: + """Asynchronously gather scene video and subtitle files.""" + search_path = os.path.join(self.output_dir, file_prefix, "media", "videos") + + # Get scene count + scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt") + with open(scene_outline_path) as f: + plan = f.read() + + scene_outline_match = re.search(r'(.*?)', plan, re.DOTALL) + if not scene_outline_match: + print(f"No scene outline found in plan: {plan[:200]}...") + return [] + scene_outline = scene_outline_match.group(1) + scene_count = len(re.findall(r'[^<]', scene_outline)) + + # Find scene files in parallel + tasks = [ + asyncio.to_thread(self._find_scene_files, search_path, file_prefix, scene_num) + for scene_num in range(1, scene_count + 1) + ] + + results = await asyncio.gather(*tasks) + + scene_videos = [] + scene_subtitles = [] + + for video, subtitle in results: + if video: + scene_videos.append(video) + scene_subtitles.append(subtitle) + + return scene_videos, scene_subtitles + + def _find_scene_files(self, search_path: str, file_prefix: str, scene_num: int) -> tuple: + """Find video and subtitle files for a specific scene.""" + scene_folders = [] + for root, dirs, files in os.walk(search_path): + for dir in dirs: + if dir.startswith(f"{file_prefix}_scene{scene_num}"): + scene_folders.append(os.path.join(root, dir)) + + if not scene_folders: + return None, None + + # Get latest version + scene_folders.sort(key=lambda f: int(f.split("_v")[-1]) if "_v" in f else 0) + folder = scene_folders[-1] + + video_file = None + subtitle_file = None + + quality_dirs = ["1080p60", "720p30", "480p15"] + for quality_dir in quality_dirs: + quality_path = os.path.join(folder, quality_dir) + if os.path.exists(quality_path): + for filename in os.listdir(quality_path): + if filename.endswith('.mp4') and not video_file: + video_file = os.path.join(quality_path, filename) + elif filename.endswith('.srt') and not subtitle_file: + subtitle_file = os.path.join(quality_path, filename) + break + + return video_file, subtitle_file + + def _analyze_video(self, video_path: str) -> Dict: + """Analyze video properties for optimization.""" + if ffmpeg is None: + # Fallback analysis using direct FFmpeg probe + import subprocess + import json + + try: + cmd = [ + 'ffprobe', + '-v', 'quiet', + '-print_format', 'json', + '-show_streams', + video_path + ] + + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + probe_data = json.loads(result.stdout) + + video_stream = next(stream for stream in probe_data['streams'] if stream['codec_type'] == 'video') + audio_streams = [stream for stream in probe_data['streams'] if stream['codec_type'] == 'audio'] + + return { + 'path': video_path, + 'duration': float(video_stream.get('duration', 0)), + 'has_audio': len(audio_streams) > 0, + 'width': int(video_stream.get('width', 1920)), + 'height': int(video_stream.get('height', 1080)), + 'fps': eval(video_stream.get('avg_frame_rate', '30/1')) + } + except Exception as e: + print(f"Warning: Could not analyze video {video_path}: {e}") + # Return default values + return { + 'path': video_path, + 'duration': 10.0, # Default duration + 'has_audio': False, + 'width': 1920, + 'height': 1080, + 'fps': 30 + } + + probe = ffmpeg.probe(video_path) + video_stream = next(stream for stream in probe['streams'] if stream['codec_type'] == 'video') + audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio'] + + return { + 'path': video_path, + 'duration': float(video_stream['duration']), + 'has_audio': len(audio_streams) > 0, + 'width': int(video_stream['width']), + 'height': int(video_stream['height']), + 'fps': eval(video_stream['avg_frame_rate']) + } + + async def _combine_with_audio_optimized(self, scene_videos: List[str], video_info: List[Dict], + output_path: str, use_hardware_acceleration: bool): + """Combine videos with audio using hardware acceleration.""" + import ffmpeg + + streams = [] + for video_path, info in zip(scene_videos, video_info): + input_vid = ffmpeg.input(video_path) + + if info['has_audio']: + streams.extend([input_vid['v'], input_vid['a']]) + else: + # Add silent audio + silent_audio = ffmpeg.input( + f'anullsrc=channel_layout=stereo:sample_rate=44100', + f='lavfi', t=info['duration'] + )['a'] + streams.extend([input_vid['v'], silent_audio]) + + # Build optimized encoding options for maximum compatibility + encode_options = { + 'c:v': 'libx264', # Use libx264 for maximum compatibility + 'c:a': 'aac', # AAC audio codec + 'preset': 'medium', # Balanced preset for good quality/speed + 'crf': '23', # Good quality/speed balance + 'pix_fmt': 'yuv420p', # Pixel format for maximum compatibility + 'movflags': '+faststart', # Enable fast start for web playback + 'r': '30', # Set frame rate to 30fps + 'threads': '0', # Use all available threads + 'profile:v': 'high', # H.264 profile for better compatibility + 'level': '4.0' # H.264 level for broad device support + } + + # Only use hardware acceleration if explicitly requested and working + if use_hardware_acceleration: + try: + # Test if NVENC is available by creating a simple test + test_cmd = ['ffmpeg', '-f', 'lavfi', '-i', 'testsrc=duration=1:size=320x240:rate=1', + '-c:v', 'h264_nvenc', '-f', 'null', '-'] + test_result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=10) + + if test_result.returncode == 0: + encode_options.update({ + 'c:v': 'h264_nvenc', + 'preset': 'fast', # NVENC preset + 'profile:v': 'high', + 'level': '4.0', + 'rc': 'constqp', # Constant quality mode + 'qp': '23' # Quality parameter + }) + print("✅ Using NVIDIA hardware acceleration") + else: + print("⚠️ NVIDIA hardware acceleration not available, using CPU encoding") + except Exception as e: + print(f"⚠️ Hardware acceleration test failed: {e}, using CPU encoding") + + concat = ffmpeg.concat(*streams, v=1, a=1, unsafe=True) + + # Run with progress monitoring + process = ( + concat + .output(output_path, **encode_options) + .overwrite_output() + .run_async(pipe_stdout=True, pipe_stderr=True) + ) + + await self._monitor_ffmpeg_progress(process, "audio combination") + + async def _combine_without_audio_optimized(self, scene_videos: List[str], + output_path: str, use_hardware_acceleration: bool): + """Combine videos without audio using hardware acceleration.""" + import ffmpeg + + streams = [ffmpeg.input(video)['v'] for video in scene_videos] + + # Build encoding options for maximum compatibility + encode_options = { + 'c:v': 'libx264', # Use libx264 for maximum compatibility + 'preset': 'medium', # Balanced preset + 'crf': '20', # Good quality + 'pix_fmt': 'yuv420p', # Pixel format for maximum compatibility + 'movflags': '+faststart', # Enable fast start + 'r': '30', # Set frame rate to 30fps + 'threads': '0', # Use all available threads + 'profile:v': 'high', # H.264 profile + 'level': '4.0' # H.264 level + } + + # Test hardware acceleration availability + if use_hardware_acceleration: + try: + # Test if NVENC is available + test_cmd = ['ffmpeg', '-f', 'lavfi', '-i', 'testsrc=duration=1:size=320x240:rate=1', + '-c:v', 'h264_nvenc', '-f', 'null', '-'] + test_result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=10) + + if test_result.returncode == 0: + encode_options.update({ + 'c:v': 'h264_nvenc', + 'preset': 'fast', + 'profile:v': 'high', + 'level': '4.0', + 'rc': 'constqp', + 'qp': '20' + }) + print("✅ Using NVIDIA hardware acceleration for video-only combination") + else: + print("⚠️ NVIDIA hardware acceleration not available, using CPU encoding") + except Exception as e: + print(f"⚠️ Hardware acceleration test failed: {e}, using CPU encoding") + + concat = ffmpeg.concat(*streams, v=1, unsafe=True) + + process = ( + concat + .output(output_path, **encode_options) + .overwrite_output() + .run_async(pipe_stdout=True, pipe_stderr=True) + ) + + await self._monitor_ffmpeg_progress(process, "video combination") + + async def _monitor_ffmpeg_progress(self, process, operation_name: str): + """Monitor FFmpeg progress asynchronously.""" + print(f"Starting {operation_name}...") + + while True: + line = await asyncio.to_thread(process.stdout.readline) + if not line: + break + + line = line.decode('utf-8') + if 'frame=' in line: + # Extract progress information + frame_match = re.search(r'frame=\s*(\d+)', line) + time_match = re.search(r'time=(\d+:\d+:\d+\.\d+)', line) + + if frame_match and time_match: + frame = frame_match.group(1) + time_str = time_match.group(1) + print(f"\r⚡ Processing: frame={frame}, time={time_str}", end='', flush=True) + + stdout, stderr = await asyncio.to_thread(process.communicate) + print(f"\n{operation_name} completed!") + + if process.returncode != 0: + raise Exception(f"FFmpeg error: {stderr.decode('utf-8')}") + + async def _combine_subtitles_async(self, scene_subtitles: List[str], + scene_videos: List[str], output_path: str): + """Combine subtitles asynchronously.""" + + def combine_subtitles(): + with open(output_path, 'w', encoding='utf-8') as outfile: + current_time_offset = 0 + subtitle_index = 1 + + for srt_file, video_file in zip(scene_subtitles, scene_videos): + if srt_file is None: + continue + + with open(srt_file, 'r', encoding='utf-8') as infile: + lines = infile.readlines() + i = 0 + while i < len(lines): + line = lines[i].strip() + if line.isdigit(): + outfile.write(f"{subtitle_index}\n") + subtitle_index += 1 + i += 1 + + time_line = lines[i].strip() + start_time, end_time = time_line.split(' --> ') + + def adjust_time(time_str, offset): + h, m, s = time_str.replace(',', '.').split(':') + total_seconds = float(h) * 3600 + float(m) * 60 + float(s) + offset + h = int(total_seconds // 3600) + m = int((total_seconds % 3600) // 60) + s = total_seconds % 60 + return f"{h:02d}:{m:02d}:{s:06.3f}".replace('.', ',') + + new_start = adjust_time(start_time, current_time_offset) + new_end = adjust_time(end_time, current_time_offset) + outfile.write(f"{new_start} --> {new_end}\n") + i += 1 + + while i < len(lines) and lines[i].strip(): + outfile.write(lines[i]) + i += 1 + outfile.write('\n') + else: + i += 1 + + # Update time offset + import ffmpeg + probe = ffmpeg.probe(video_file) + duration = float(probe['streams'][0]['duration']) + current_time_offset += duration + + await asyncio.to_thread(combine_subtitles) + print(f"Subtitles combined to {output_path}") + + def get_performance_stats(self) -> Dict: + """Get current performance statistics.""" + return { + **self.render_stats, + 'cache_hit_rate': self.render_stats['cache_hits'] / max(1, self.render_stats['total_renders']), + 'cache_enabled': self.enable_caching, + 'concurrent_renders': self.max_concurrent_renders + } + + def cleanup_cache(self, max_age_days: int = 7): + """Clean up old cache files.""" + if not self.enable_caching: + return + + import time + current_time = time.time() + max_age_seconds = max_age_days * 24 * 60 * 60 + + for file in os.listdir(self.cache_dir): + file_path = os.path.join(self.cache_dir, file) + if os.path.getmtime(file_path) < current_time - max_age_seconds: + os.remove(file_path) + print(f"Removed old cache file: {file}") + + async def __aenter__(self): + """Async context manager entry.""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Async context manager exit.""" + self.executor.shutdown(wait=True) + + def render_scene(self, code: str, file_prefix: str, curr_scene: int, + curr_version: int, code_dir: str, media_dir: str, + use_visual_fix_code=False, visual_self_reflection_func=None, + banned_reasonings=None, scene_trace_id=None, topic=None, + session_id=None, code_generator=None, scene_implementation=None, + description=None, scene_outline=None) -> tuple: + """Legacy render_scene method for backward compatibility.""" + # Run the async method synchronously + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + result = loop.run_until_complete( + self.render_scene_optimized( + code=code, + file_prefix=file_prefix, + curr_scene=curr_scene, + curr_version=curr_version, + code_dir=code_dir, + media_dir=media_dir, + use_visual_fix_code=use_visual_fix_code, + visual_self_reflection_func=visual_self_reflection_func, + banned_reasonings=banned_reasonings, + scene_trace_id=scene_trace_id, + topic=topic, + session_id=session_id, + code_generator=code_generator, + scene_implementation=scene_implementation, + description=description, + scene_outline=scene_outline + ) + ) + return result + finally: + loop.close() + + def combine_videos(self, topic: str) -> str: + """Legacy combine_videos method for backward compatibility.""" + # Run the async method synchronously + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + result = loop.run_until_complete( + self.combine_videos_optimized(topic=topic) + ) + return result + finally: + loop.close() + + async def _fallback_video_combination(self, scene_videos: List[str], output_path: str) -> str: + """Simple fallback video combination using direct FFmpeg commands.""" + + print("🔧 Using fallback video combination method...") + + # Create a temporary file list for concat demuxer + temp_dir = tempfile.mkdtemp() + file_list_path = os.path.join(temp_dir, "file_list.txt") + + try: + # Write file list for concat demuxer + with open(file_list_path, 'w') as f: + for video in scene_videos: + # Ensure proper path format for concat demuxer + video_path = os.path.abspath(video).replace('\\', '/') + f.write(f"file '{video_path}'\n") + + print(f"📝 Created file list: {file_list_path}") + print(f"🎬 Combining {len(scene_videos)} videos using direct FFmpeg...") + + # Use direct FFmpeg command for maximum compatibility + cmd = [ + 'ffmpeg', + '-f', 'concat', + '-safe', '0', + '-i', file_list_path, + '-c:v', 'libx264', + '-c:a', 'aac', + '-preset', 'fast', + '-crf', '25', + '-pix_fmt', 'yuv420p', + '-movflags', '+faststart', + '-avoid_negative_ts', 'make_zero', + '-y', # Overwrite output file + output_path + ] + + print(f"🔧 Running command: {' '.join(cmd)}") + + # Run the command + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + + # Monitor progress + async def read_stderr(): + stderr_output = [] + while True: + line = await process.stderr.readline() + if not line: + break + + line_str = line.decode('utf-8').strip() + stderr_output.append(line_str) + + if 'frame=' in line_str: + frame_match = re.search(r'frame=\s*(\d+)', line_str) + time_match = re.search(r'time=(\d+:\d+:\d+\.\d+)', line_str) + + if frame_match and time_match: + frame = frame_match.group(1) + time_str = time_match.group(1) + print(f"\r🔧 Fallback processing: frame={frame}, time={time_str}", end='', flush=True) + + return stderr_output + + # Wait for completion + stderr_task = asyncio.create_task(read_stderr()) + await process.wait() + stderr_output = await stderr_task + + print(f"\n🔧 Fallback combination completed!") + + if process.returncode != 0: + error_msg = '\n'.join(stderr_output) + print(f"❌ FFmpeg error output:\n{error_msg}") + raise Exception(f"Direct FFmpeg command failed with return code {process.returncode}") + + # Verify output + if not os.path.exists(output_path): + raise FileNotFoundError(f"Fallback output video was not created: {output_path}") + + file_size = os.path.getsize(output_path) + if file_size < 1024: + raise ValueError(f"Fallback output video file seems invalid (size: {file_size} bytes)") + + print(f"✅ Fallback video created successfully (size: {file_size / (1024*1024):.2f} MB)") + return output_path + + finally: + # Clean up temporary files + try: + if os.path.exists(file_list_path): + os.remove(file_list_path) + os.rmdir(temp_dir) + except Exception as e: + print(f"⚠️ Could not clean up temp files: {e}") + +# Backward compatibility alias +VideoRenderer = OptimizedVideoRenderer \ No newline at end of file diff --git a/src/rag/__init__.py b/src/rag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/rag/__pycache__/__init__.cpython-312.pyc b/src/rag/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dd4ae2ad59de7df8ad8fe041457872144a56498 Binary files /dev/null and b/src/rag/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/rag/__pycache__/rag_integration.cpython-312.pyc b/src/rag/__pycache__/rag_integration.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24afdd2eeb15b7d9a2527d8d9923ba5d8f13a8f9 Binary files /dev/null and b/src/rag/__pycache__/rag_integration.cpython-312.pyc differ diff --git a/src/rag/__pycache__/vector_store.cpython-312.pyc b/src/rag/__pycache__/vector_store.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..626bfe56581cf3e277d08be5d79624be64b77b97 Binary files /dev/null and b/src/rag/__pycache__/vector_store.cpython-312.pyc differ diff --git a/src/rag/rag_integration.py b/src/rag/rag_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..184015a1596825f61932f6629a716bbb77e0f2e7 --- /dev/null +++ b/src/rag/rag_integration.py @@ -0,0 +1,410 @@ +import os +import re +import json +from typing import List, Dict + +from mllm_tools.utils import _prepare_text_inputs +from task_generator import ( + get_prompt_rag_query_generation_fix_error, + get_prompt_detect_plugins, + get_prompt_rag_query_generation_technical, + get_prompt_rag_query_generation_vision_storyboard, + get_prompt_rag_query_generation_narration, + get_prompt_rag_query_generation_code +) +from src.rag.vector_store import EnhancedRAGVectorStore as RAGVectorStore + +class RAGIntegration: + """Class for integrating RAG (Retrieval Augmented Generation) functionality. + + This class handles RAG integration including plugin detection, query generation, + and document retrieval. + + Args: + helper_model: Model used for generating queries and processing text + output_dir (str): Directory for output files + chroma_db_path (str): Path to ChromaDB + manim_docs_path (str): Path to Manim documentation + embedding_model (str): Name of embedding model to use + use_langfuse (bool, optional): Whether to use Langfuse logging. Defaults to True + session_id (str, optional): Session identifier. Defaults to None + """ + + def __init__(self, helper_model, output_dir, chroma_db_path, manim_docs_path, embedding_model, use_langfuse=True, session_id=None): + self.helper_model = helper_model + self.output_dir = output_dir + self.manim_docs_path = manim_docs_path + self.session_id = session_id + self.relevant_plugins = None + + self.vector_store = RAGVectorStore( + chroma_db_path=chroma_db_path, + manim_docs_path=manim_docs_path, + embedding_model=embedding_model, + session_id=self.session_id, + use_langfuse=use_langfuse, + helper_model=helper_model + ) + + def set_relevant_plugins(self, plugins: List[str]) -> None: + """Set the relevant plugins for the current video. + + Args: + plugins (List[str]): List of plugin names to set as relevant + """ + self.relevant_plugins = plugins + + def detect_relevant_plugins(self, topic: str, description: str) -> List[str]: + """Detect which plugins might be relevant based on topic and description. + + Args: + topic (str): Topic of the video + description (str): Description of the video content + + Returns: + List[str]: List of detected relevant plugin names + """ + # Load plugin descriptions + plugins = self._load_plugin_descriptions() + if not plugins: + return [] + + # Get formatted prompt using the task_generator function + prompt = get_prompt_detect_plugins( + topic=topic, + description=description, + plugin_descriptions=json.dumps([{'name': p['name'], 'description': p['description']} for p in plugins], indent=2) + ) + + try: + response = self.helper_model( + _prepare_text_inputs(prompt), + metadata={"generation_name": "detect-relevant-plugins", "tags": [topic, "plugin-detection"], "session_id": self.session_id} + ) # Clean the response to ensure it only contains the JSON array + json_match = re.search(r'```json(.*)```', response, re.DOTALL) + if not json_match: + print(f"No JSON block found in plugin detection response: {response[:200]}...") + return [] + response = json_match.group(1) + try: + relevant_plugins = json.loads(response) + except json.JSONDecodeError as e: + print(f"JSONDecodeError when parsing relevant plugins: {e}") + print(f"Response text was: {response}") + return [] + + print(f"LLM detected relevant plugins: {relevant_plugins}") + return relevant_plugins + except Exception as e: + print(f"Error detecting plugins with LLM: {e}") + return [] + + def _load_plugin_descriptions(self) -> list: + """Load plugin descriptions from JSON file. + + Returns: + list: List of plugin descriptions, empty list if loading fails + """ + try: + plugin_config_path = os.path.join( + self.manim_docs_path, + "plugin_docs", + "plugins.json" + ) + if os.path.exists(plugin_config_path): + with open(plugin_config_path, "r") as f: + return json.load(f) + else: + print(f"Plugin descriptions file not found at {plugin_config_path}") + return [] + except Exception as e: + print(f"Error loading plugin descriptions: {e}") + return [] + + def _generate_rag_queries_storyboard(self, scene_plan: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None, relevant_plugins: List[str] = []) -> List[str]: + """Generate RAG queries from the scene plan to help create storyboard. + + Args: + scene_plan (str): Scene plan text to generate queries from + scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None + topic (str, optional): Topic name. Defaults to None + scene_number (int, optional): Scene number. Defaults to None + session_id (str, optional): Session identifier. Defaults to None + relevant_plugins (List[str], optional): List of relevant plugins. Defaults to empty list + + Returns: + List[str]: List of generated RAG queries + """ + cache_key = f"{topic}_scene{scene_number}_storyboard_rag" + cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache") + os.makedirs(cache_dir, exist_ok=True) + cache_file = os.path.join(cache_dir, "rag_queries_storyboard.json") + + if os.path.exists(cache_file): + with open(cache_file, 'r') as f: + return json.load(f) + + # Format relevant plugins as a string + plugins_str = ", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant." + + # Generate the prompt with only the required arguments + prompt = get_prompt_rag_query_generation_vision_storyboard( + scene_plan=scene_plan, + relevant_plugins=plugins_str + ) + queries = self.helper_model( + _prepare_text_inputs(prompt), + metadata={"generation_name": "rag_query_generation_storyboard", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id} + ) + + # retreive json triple backticks + + try: # add try-except block to handle potential json decode errors + json_match = re.search(r'```json(.*)```', queries, re.DOTALL) + if not json_match: + print(f"No JSON block found in storyboard RAG queries response: {queries[:200]}...") + return [] + queries = json_match.group(1) + queries = json.loads(queries) + except json.JSONDecodeError as e: + print(f"JSONDecodeError when parsing RAG queries for storyboard: {e}") + print(f"Response text was: {queries}") + return [] # Return empty list in case of parsing error + + # Cache the queries + with open(cache_file, 'w') as f: + json.dump(queries, f) + + return queries + + def _generate_rag_queries_technical(self, storyboard: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None, relevant_plugins: List[str] = []) -> List[str]: + """Generate RAG queries from the storyboard to help create technical implementation. + + Args: + storyboard (str): Storyboard text to generate queries from + scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None + topic (str, optional): Topic name. Defaults to None + scene_number (int, optional): Scene number. Defaults to None + session_id (str, optional): Session identifier. Defaults to None + relevant_plugins (List[str], optional): List of relevant plugins. Defaults to empty list + + Returns: + List[str]: List of generated RAG queries + """ + cache_key = f"{topic}_scene{scene_number}_technical_rag" + cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache") + os.makedirs(cache_dir, exist_ok=True) + cache_file = os.path.join(cache_dir, "rag_queries_technical.json") + + if os.path.exists(cache_file): + with open(cache_file, 'r') as f: + return json.load(f) + prompt = get_prompt_rag_query_generation_technical( + storyboard=storyboard, + relevant_plugins=", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant." + ) + + queries = self.helper_model( + _prepare_text_inputs(prompt), + metadata={"generation_name": "rag_query_generation_technical", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id} + ) + + try: # add try-except block to handle potential json decode errors + json_match = re.search(r'```json(.*)```', queries, re.DOTALL) + if not json_match: + print(f"No JSON block found in technical RAG queries response: {queries[:200]}...") + return [] + queries = json_match.group(1) + queries = json.loads(queries) + except json.JSONDecodeError as e: + print(f"JSONDecodeError when parsing RAG queries for technical implementation: {e}") + print(f"Response text was: {queries}") + return [] # Return empty list in case of parsing error + + # Cache the queries + with open(cache_file, 'w') as f: + json.dump(queries, f) + + return queries + + def _generate_rag_queries_narration(self, storyboard: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None, relevant_plugins: List[str] = []) -> List[str]: + """Generate RAG queries from the storyboard to help create narration plan. + + Args: + storyboard (str): Storyboard text to generate queries from + scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None + topic (str, optional): Topic name. Defaults to None + scene_number (int, optional): Scene number. Defaults to None + session_id (str, optional): Session identifier. Defaults to None + relevant_plugins (List[str], optional): List of relevant plugins. Defaults to empty list + + Returns: + List[str]: List of generated RAG queries + """ + cache_key = f"{topic}_scene{scene_number}_narration_rag" + cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache") + os.makedirs(cache_dir, exist_ok=True) + cache_file = os.path.join(cache_dir, "rag_queries_narration.json") + + if os.path.exists(cache_file): + with open(cache_file, 'r') as f: + return json.load(f) + + prompt = get_prompt_rag_query_generation_narration( + storyboard=storyboard, + relevant_plugins=", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant." + ) + + queries = self.helper_model( + _prepare_text_inputs(prompt), + metadata={"generation_name": "rag_query_generation_narration", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id} + ) + + try: # add try-except block to handle potential json decode errors + json_match = re.search(r'```json(.*)```', queries, re.DOTALL) + if not json_match: + print(f"No JSON block found in narration RAG queries response: {queries[:200]}...") + return [] + queries = json_match.group(1) + queries = json.loads(queries) + except json.JSONDecodeError as e: + print(f"JSONDecodeError when parsing narration RAG queries: {e}") + print(f"Response text was: {queries}") + return [] # Return empty list in case of parsing error + + # Cache the queries + with open(cache_file, 'w') as f: + json.dump(queries, f) + + return queries + + def get_relevant_docs(self, rag_queries: List[Dict], scene_trace_id: str, topic: str, scene_number: int) -> List[str]: + """Get relevant documentation using the vector store. + + Args: + rag_queries (List[Dict]): List of RAG queries to search for + scene_trace_id (str): Trace identifier for the scene + topic (str): Topic name + scene_number (int): Scene number + + Returns: + List[str]: List of relevant documentation snippets + """ + return self.vector_store.find_relevant_docs( + queries=rag_queries, + k=2, + trace_id=scene_trace_id, + topic=topic, + scene_number=scene_number + ) + + def _generate_rag_queries_code(self, implementation_plan: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, relevant_plugins: List[str] = None) -> List[str]: + """Generate RAG queries from implementation plan. + + Args: + implementation_plan (str): Implementation plan text to generate queries from + scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None + topic (str, optional): Topic name. Defaults to None + scene_number (int, optional): Scene number. Defaults to None + relevant_plugins (List[str], optional): List of relevant plugins. Defaults to None + + Returns: + List[str]: List of generated RAG queries + """ + cache_key = f"{topic}_scene{scene_number}" + cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache") + os.makedirs(cache_dir, exist_ok=True) + cache_file = os.path.join(cache_dir, "rag_queries_code.json") + + if os.path.exists(cache_file): + with open(cache_file, 'r') as f: + return json.load(f) + + prompt = get_prompt_rag_query_generation_code( + implementation_plan=implementation_plan, + relevant_plugins=", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant." + ) + + try: + response = self.helper_model( + _prepare_text_inputs(prompt), + metadata={"generation_name": "rag_query_generation_code", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": self.session_id} + ) + + # Clean and parse response + json_match = re.search(r'```json(.*)```', response, re.DOTALL) + if not json_match: + print(f"No JSON block found in code RAG queries response: {response[:200]}...") + return [] + response = json_match.group(1) + queries = json.loads(response) + + # Cache the queries + with open(cache_file, 'w') as f: + json.dump(queries, f) + + return queries + except Exception as e: + print(f"Error generating RAG queries: {e}") + return [] + + def _generate_rag_queries_error_fix(self, error: str, code: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None) -> List[str]: + """Generate RAG queries for fixing code errors. + + Args: + error (str): Error message to generate queries from + code (str): Code containing the error + scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None + topic (str, optional): Topic name. Defaults to None + scene_number (int, optional): Scene number. Defaults to None + session_id (str, optional): Session identifier. Defaults to None + + Returns: + List[str]: List of generated RAG queries + """ + if self.relevant_plugins is None: + print("Warning: No plugins have been detected yet") + plugins_str = "No plugins are relevant." + else: + plugins_str = ", ".join(self.relevant_plugins) if self.relevant_plugins else "No plugins are relevant." + + cache_key = f"{topic}_scene{scene_number}_error_fix" + cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache") + os.makedirs(cache_dir, exist_ok=True) + cache_file = os.path.join(cache_dir, "rag_queries_error_fix.json") + + if os.path.exists(cache_file): + with open(cache_file, 'r') as f: + cached_queries = json.load(f) + print(f"Using cached RAG queries for error fix in {cache_key}") + return cached_queries + + prompt = get_prompt_rag_query_generation_fix_error( + error=error, + code=code, + relevant_plugins=plugins_str + ) + + queries = self.helper_model( + _prepare_text_inputs(prompt), + metadata={"generation_name": "rag-query-generation-fix-error", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id} + ) + + try: + # retrieve json triple backticks + json_match = re.search(r'```json(.*)```', queries, re.DOTALL) + if not json_match: + print(f"No JSON block found in error fix RAG queries response: {queries[:200]}...") + return [] + queries = json_match.group(1) + queries = json.loads(queries) + except json.JSONDecodeError as e: + print(f"JSONDecodeError when parsing RAG queries for error fix: {e}") + print(f"Response text was: {queries}") + return [] + + # Cache the queries + with open(cache_file, 'w') as f: + json.dump(queries, f) + + return queries \ No newline at end of file diff --git a/src/rag/vector_store.py b/src/rag/vector_store.py new file mode 100644 index 0000000000000000000000000000000000000000..a47db6c96768ff587f73eabaa13c30412f89b991 --- /dev/null +++ b/src/rag/vector_store.py @@ -0,0 +1,465 @@ +import json +import os +import ast +from typing import List, Dict, Tuple, Optional +import uuid +from langchain.schema import Document +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.document_loaders import TextLoader +from langchain_community.vectorstores import Chroma +from langchain_text_splitters import Language +from langchain_core.embeddings import Embeddings +import statistics +import tiktoken +from tqdm import tqdm +from langfuse import Langfuse +from langchain_community.embeddings import HuggingFaceEmbeddings +import re + +from mllm_tools.utils import _prepare_text_inputs +from task_generator import get_prompt_detect_plugins + +class CodeAwareTextSplitter: + """Enhanced text splitter that understands code structure.""" + + def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200): + self.chunk_size = chunk_size + self.chunk_overlap = chunk_overlap + + def split_python_file(self, content: str, metadata: dict) -> List[Document]: + """Split Python files preserving code structure.""" + documents = [] + + try: + tree = ast.parse(content) + + # Extract classes and functions with their docstrings + for node in ast.walk(tree): + if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): + # Get the source code segment + start_line = node.lineno + end_line = getattr(node, 'end_lineno', start_line + 20) + + lines = content.split('\n') + code_segment = '\n'.join(lines[start_line-1:end_line]) + + # Extract docstring + docstring = ast.get_docstring(node) or "" + + # Create enhanced content + enhanced_content = f""" +Type: {"Class" if isinstance(node, ast.ClassDef) else "Function"} +Name: {node.name} +Docstring: {docstring} + +Code: +```python +{code_segment} +``` + """.strip() + + # Enhanced metadata + enhanced_metadata = { + **metadata, + 'type': 'class' if isinstance(node, ast.ClassDef) else 'function', + 'name': node.name, + 'start_line': start_line, + 'end_line': end_line, + 'has_docstring': bool(docstring), + 'docstring': docstring[:200] + "..." if len(docstring) > 200 else docstring + } + + documents.append(Document( + page_content=enhanced_content, + metadata=enhanced_metadata + )) + + # Also create chunks for imports and module-level code + imports_and_constants = self._extract_imports_and_constants(content) + if imports_and_constants: + documents.append(Document( + page_content=f"Module-level imports and constants:\n\n{imports_and_constants}", + metadata={**metadata, 'type': 'module_level', 'name': 'imports_constants'} + )) + + except SyntaxError: + # Fallback to regular text splitting for invalid Python + splitter = RecursiveCharacterTextSplitter.from_language( + language=Language.PYTHON, + chunk_size=self.chunk_size, + chunk_overlap=self.chunk_overlap + ) + documents = splitter.split_documents([Document(page_content=content, metadata=metadata)]) + + return documents + + def split_markdown_file(self, content: str, metadata: dict) -> List[Document]: + """Split Markdown files preserving structure.""" + documents = [] + + # Split by headers while preserving hierarchy + sections = self._split_by_headers(content) + + for section in sections: + # Extract code blocks + code_blocks = self._extract_code_blocks(section['content']) + + # Create document for text content + text_content = self._remove_code_blocks(section['content']) + if text_content.strip(): + enhanced_metadata = { + **metadata, + 'type': 'markdown_section', + 'header': section['header'], + 'level': section['level'], + 'has_code_blocks': len(code_blocks) > 0 + } + + documents.append(Document( + page_content=f"Header: {section['header']}\n\n{text_content}", + metadata=enhanced_metadata + )) + + # Create separate documents for code blocks + for i, code_block in enumerate(code_blocks): + enhanced_metadata = { + **metadata, + 'type': 'code_block', + 'language': code_block['language'], + 'in_section': section['header'], + 'block_index': i + } + + documents.append(Document( + page_content=f"Code example in '{section['header']}':\n\n```{code_block['language']}\n{code_block['code']}\n```", + metadata=enhanced_metadata + )) + + return documents + + def _extract_imports_and_constants(self, content: str) -> str: + """Extract imports and module-level constants.""" + lines = content.split('\n') + relevant_lines = [] + for line in lines: + stripped = line.strip() + if (stripped.startswith('import ') or + stripped.startswith('from ') or + (stripped and not stripped.startswith('def ') and + not stripped.startswith('class ') and + not stripped.startswith('#') and + '=' in stripped and stripped.split('=')[0].strip().isupper())): + relevant_lines.append(line) + + return '\n'.join(relevant_lines) + + def _split_by_headers(self, content: str) -> List[Dict]: + """Split markdown content by headers.""" + sections = [] + lines = content.split('\n') + current_section = {'header': 'Introduction', 'level': 0, 'content': ''} + + for line in lines: + header_match = re.match(r'^(#{1,6})\s+(.+)$', line) + if header_match: + # Save previous section + if current_section['content'].strip(): + sections.append(current_section) + + # Start new section + level = len(header_match.group(1)) + header = header_match.group(2) + current_section = {'header': header, 'level': level, 'content': ''} + else: + current_section['content'] += line + '\n' + + # Add last section + if current_section['content'].strip(): + sections.append(current_section) + + return sections + + def _extract_code_blocks(self, content: str) -> List[Dict]: + """Extract code blocks from markdown content.""" + code_blocks = [] + pattern = r'```(\w+)?\n(.*?)\n```' + + for match in re.finditer(pattern, content, re.DOTALL): + language = match.group(1) or 'text' + code = match.group(2) + code_blocks.append({'language': language, 'code': code}) + + return code_blocks + + def _remove_code_blocks(self, content: str) -> str: + """Remove code blocks from content.""" + pattern = r'```\w*\n.*?\n```' + return re.sub(pattern, '', content, flags=re.DOTALL) + +class EnhancedRAGVectorStore: + """Enhanced RAG vector store with improved code understanding.""" + + def __init__(self, + chroma_db_path: str = "chroma_db", + manim_docs_path: str = "rag/manim_docs", + embedding_model: str = "hf:ibm-granite/granite-embedding-30m-english", + trace_id: str = None, + session_id: str = None, + use_langfuse: bool = True, + helper_model = None): + self.chroma_db_path = chroma_db_path + self.manim_docs_path = manim_docs_path + self.embedding_model = embedding_model + self.trace_id = trace_id + self.session_id = session_id + self.use_langfuse = use_langfuse + self.helper_model = helper_model + self.enc = tiktoken.encoding_for_model("gpt-4") + self.plugin_stores = {} + self.code_splitter = CodeAwareTextSplitter() + self.vector_store = self._load_or_create_vector_store() + + def _load_or_create_vector_store(self): + """Enhanced vector store creation with better document processing.""" + print("Creating enhanced vector store with code-aware processing...") + core_path = os.path.join(self.chroma_db_path, "manim_core_enhanced") + + if os.path.exists(core_path): + print("Loading existing enhanced ChromaDB...") + self.core_vector_store = Chroma( + collection_name="manim_core_enhanced", + persist_directory=core_path, + embedding_function=self._get_embedding_function() + ) + else: + print("Creating new enhanced ChromaDB...") + self.core_vector_store = self._create_enhanced_core_store() + + # Process plugins with enhanced splitting + plugin_docs_path = os.path.join(self.manim_docs_path, "plugin_docs") + if os.path.exists(plugin_docs_path): + for plugin_name in os.listdir(plugin_docs_path): + plugin_store_path = os.path.join(self.chroma_db_path, f"manim_plugin_{plugin_name}_enhanced") + if os.path.exists(plugin_store_path): + print(f"Loading existing enhanced plugin store: {plugin_name}") + self.plugin_stores[plugin_name] = Chroma( + collection_name=f"manim_plugin_{plugin_name}_enhanced", + persist_directory=plugin_store_path, + embedding_function=self._get_embedding_function() + ) + else: + print(f"Creating new enhanced plugin store: {plugin_name}") + plugin_path = os.path.join(plugin_docs_path, plugin_name) + if os.path.isdir(plugin_path): + plugin_store = Chroma( + collection_name=f"manim_plugin_{plugin_name}_enhanced", + embedding_function=self._get_embedding_function(), + persist_directory=plugin_store_path + ) + plugin_docs = self._process_documentation_folder_enhanced(plugin_path) + if plugin_docs: + self._add_documents_to_store(plugin_store, plugin_docs, plugin_name) + self.plugin_stores[plugin_name] = plugin_store + + return self.core_vector_store + + def _get_embedding_function(self) -> Embeddings: + """Enhanced embedding function with better model selection.""" + if self.embedding_model.startswith('hf:'): + model_name = self.embedding_model[3:] + print(f"Using HuggingFaceEmbeddings with model: {model_name}") + + # Use better models for code understanding + if 'code' not in model_name.lower(): + print("Consider using a code-specific embedding model like 'microsoft/codebert-base'") + + return HuggingFaceEmbeddings( + model_name=model_name, + model_kwargs={'device': 'cpu'}, + encode_kwargs={'normalize_embeddings': True} + ) + else: + raise ValueError("Only HuggingFace embeddings are supported in this configuration.") + + def _create_enhanced_core_store(self): + """Create enhanced core store with better document processing.""" + core_vector_store = Chroma( + collection_name="manim_core_enhanced", + embedding_function=self._get_embedding_function(), + persist_directory=os.path.join(self.chroma_db_path, "manim_core_enhanced") + ) + + core_docs = self._process_documentation_folder_enhanced( + os.path.join(self.manim_docs_path, "manim_core") + ) + if core_docs: + self._add_documents_to_store(core_vector_store, core_docs, "manim_core_enhanced") + + return core_vector_store + + def _process_documentation_folder_enhanced(self, folder_path: str) -> List[Document]: + """Enhanced document processing with code-aware splitting.""" + all_docs = [] + + for root, _, files in os.walk(folder_path): + for file in files: + if file.endswith(('.md', '.py')): + file_path = os.path.join(root, file) + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + base_metadata = { + 'source': file_path, + 'filename': file, + 'file_type': 'python' if file.endswith('.py') else 'markdown', + 'relative_path': os.path.relpath(file_path, folder_path) + } + + if file.endswith('.py'): + docs = self.code_splitter.split_python_file(content, base_metadata) + else: # .md files + docs = self.code_splitter.split_markdown_file(content, base_metadata) + + # Add source prefix to content + for doc in docs: + doc.page_content = f"Source: {file_path}\nType: {doc.metadata.get('type', 'unknown')}\n\n{doc.page_content}" + + all_docs.extend(docs) + + except Exception as e: + print(f"Error loading file {file_path}: {e}") + + print(f"Processed {len(all_docs)} enhanced document chunks from {folder_path}") + return all_docs + + def _add_documents_to_store(self, vector_store: Chroma, documents: List[Document], store_name: str): + """Enhanced document addition with better batching.""" + print(f"Adding {len(documents)} enhanced documents to {store_name} store") + + # Group documents by type for better organization + doc_types = {} + for doc in documents: + doc_type = doc.metadata.get('type', 'unknown') + if doc_type not in doc_types: + doc_types[doc_type] = [] + doc_types[doc_type].append(doc) + + print(f"Document types distribution: {dict((k, len(v)) for k, v in doc_types.items())}") + + # Calculate token statistics + token_lengths = [len(self.enc.encode(doc.page_content)) for doc in documents] + print(f"Token length statistics for {store_name}: " + f"Min: {min(token_lengths)}, Max: {max(token_lengths)}, " + f"Mean: {sum(token_lengths) / len(token_lengths):.1f}, " + f"Median: {statistics.median(token_lengths):.1f}") + + batch_size = 10 + for i in tqdm(range(0, len(documents), batch_size), desc=f"Processing {store_name} enhanced batches"): + batch_docs = documents[i:i + batch_size] + batch_ids = [str(uuid.uuid4()) for _ in batch_docs] + vector_store.add_documents(documents=batch_docs, ids=batch_ids) + + vector_store.persist() + + def find_relevant_docs(self, queries: List[Dict], k: int = 5, trace_id: str = None, topic: str = None, scene_number: int = None) -> str: + """Find relevant documents - compatibility method that calls the enhanced version.""" + return self.find_relevant_docs_enhanced(queries, k, trace_id, topic, scene_number) + + def find_relevant_docs_enhanced(self, queries: List[Dict], k: int = 5, trace_id: str = None, topic: str = None, scene_number: int = None) -> str: + """Enhanced document retrieval with type-aware search.""" + # Separate queries by intent + code_queries = [q for q in queries if any(keyword in q["query"].lower() + for keyword in ["function", "class", "method", "import", "code", "implementation"])] + concept_queries = [q for q in queries if q not in code_queries] + + all_results = [] + + # Search with different strategies for different query types + for query in code_queries: + results = self._search_with_filters( + query["query"], + k=k, + filter_metadata={'type': ['function', 'class', 'code_block']}, + boost_code=True + ) + all_results.extend(results) + + for query in concept_queries: + results = self._search_with_filters( + query["query"], + k=k, + filter_metadata={'type': ['markdown_section', 'module_level']}, + boost_code=False + ) + all_results.extend(results) + + # Remove duplicates and format results + unique_results = self._remove_duplicates(all_results) + return self._format_results(unique_results) + + def _search_with_filters(self, query: str, k: int, filter_metadata: Dict = None, boost_code: bool = False) -> List[Dict]: + """Search with metadata filters and result boosting.""" + # This is a simplified version - in practice, you'd implement proper filtering + core_results = self.core_vector_store.similarity_search_with_relevance_scores( + query=query, k=k, score_threshold=0.3 + ) + + formatted_results = [] + for result in core_results: + doc, score = result + # Boost scores for code-related results if needed + if boost_code and doc.metadata.get('type') in ['function', 'class', 'code_block']: + score *= 1.2 + + formatted_results.append({ + "query": query, + "source": doc.metadata['source'], + "content": doc.page_content, + "score": score, + "type": doc.metadata.get('type', 'unknown'), + "metadata": doc.metadata + }) + + return formatted_results + + def _remove_duplicates(self, results: List[Dict]) -> List[Dict]: + """Remove duplicate results based on content similarity.""" + unique_results = [] + seen_content = set() + + for result in sorted(results, key=lambda x: x['score'], reverse=True): + content_hash = hash(result['content'][:200]) # Hash first 200 chars + if content_hash not in seen_content: + unique_results.append(result) + seen_content.add(content_hash) + + return unique_results[:10] # Return top 10 unique results + + def _format_results(self, results: List[Dict]) -> str: + """Format results with enhanced presentation.""" + if not results: + return "No relevant documentation found." + + formatted = "## Relevant Documentation\n\n" + + # Group by type + by_type = {} + for result in results: + result_type = result['type'] + if result_type not in by_type: + by_type[result_type] = [] + by_type[result_type].append(result) + + for result_type, type_results in by_type.items(): + formatted += f"### {result_type.replace('_', ' ').title()} Documentation\n\n" + + for result in type_results: + formatted += f"**Source:** {result['source']}\n" + formatted += f"**Relevance Score:** {result['score']:.3f}\n" + formatted += f"**Content:**\n```\n{result['content'][:500]}...\n```\n\n" + + return formatted + +# Update the existing RAGVectorStore class alias for backward compatibility +RAGVectorStore = EnhancedRAGVectorStore \ No newline at end of file diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/utils/__pycache__/__init__.cpython-312.pyc b/src/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be92c99ce036ca0febeb1b9a2673b962302597b3 Binary files /dev/null and b/src/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/utils/__pycache__/kokoro_voiceover.cpython-312.pyc b/src/utils/__pycache__/kokoro_voiceover.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..553c6697e641cc995627505b0e5b0e392c20e6cb Binary files /dev/null and b/src/utils/__pycache__/kokoro_voiceover.cpython-312.pyc differ diff --git a/src/utils/__pycache__/utils.cpython-312.pyc b/src/utils/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d326764adc893f5c287fa558b9bed049bc19ba1b Binary files /dev/null and b/src/utils/__pycache__/utils.cpython-312.pyc differ diff --git a/src/utils/allowed_models.json b/src/utils/allowed_models.json new file mode 100644 index 0000000000000000000000000000000000000000..74b235b6a7cb5a336c0093163e50563748724152 --- /dev/null +++ b/src/utils/allowed_models.json @@ -0,0 +1,37 @@ +{ + "allowed_models": [ + "gemini/gemini-1.5-pro-002", + "gemini/gemini-1.5-flash-002", + "github/gpt-4.1", + "gemini/gemini-2.5-flash-preview-04-17", + "gemini/gemini-2.0-flash-001", + "gemini/gemini-2.5-pro-preview-03-25", + "vertex_ai/gemini-1.5-flash-002", + "vertex_ai/gemini-1.5-pro-002", + "vertex_ai/gemini-2.0-flash-001", + "openai/o3-mini", + "gpt-4o", + "azure/gpt-4o", + "azure/gpt-4o-mini", + "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", + "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0", + "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0", + "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0", + "openrouter/openai/gpt-4o", + "openrouter/openai/gpt-4o-mini", + "openrouter/openai/gpt-3.5-turbo", + "openrouter/anthropic/claude-3.5-sonnet", + "openrouter/anthropic/claude-3-haiku", + "openrouter/google/gemini-pro-1.5", + "openrouter/deepseek/deepseek-chat", + "openrouter/qwen/qwen-2.5-72b-instruct", + "openrouter/meta-llama/llama-3.1-8b-instruct:free", + "openrouter/microsoft/phi-3-mini-128k-instruct:free" + ], + "embedding_models": [ + "text-embedding-ada-002", + "vertex_ai/text-embedding-005", + "azure/text-embedding-3-large", + "gemini/gemini-embedding-exp-03-07" + ] +} \ No newline at end of file diff --git a/src/utils/kokoro_voiceover.py b/src/utils/kokoro_voiceover.py new file mode 100644 index 0000000000000000000000000000000000000000..4b7ee8675a4eeb2ee840d27e490e4922e3cdf730 --- /dev/null +++ b/src/utils/kokoro_voiceover.py @@ -0,0 +1,117 @@ +""" +Copyright (c) 2025 Xposed73 +All rights reserved. +This file is part of the Manim Voiceover project. +""" + +import hashlib +import json +import numpy as np +from pathlib import Path +from manim_voiceover.services.base import SpeechService +from kokoro_onnx import Kokoro +from manim_voiceover.helper import remove_bookmarks, wav2mp3 +from scipy.io.wavfile import write as write_wav +from src.config.config import Config + + +class KokoroService(SpeechService): + """Speech service class for kokoro_self (using text_to_speech via Kokoro ONNX).""" + + def __init__(self, engine=None, + model_path: str = Config.KOKORO_MODEL_PATH, + voices_path: str = Config.KOKORO_VOICES_PATH, + voice: str = Config.KOKORO_DEFAULT_VOICE, + speed: float = Config.KOKORO_DEFAULT_SPEED, + lang: str = Config.KOKORO_DEFAULT_LANG, + **kwargs): + self.kokoro = Kokoro(model_path, voices_path) + self.voice = voice + self.speed = speed + self.lang = lang + + if engine is None: + engine = self.text_to_speech # Default to local function + + self.engine = engine + super().__init__(**kwargs) + + def get_data_hash(self, input_data: dict) -> str: + """ + Generates a hash based on the input data dictionary. + The hash is used to create a unique identifier for the input data. + + Parameters: + input_data (dict): A dictionary of input data (e.g., text, voice, etc.). + + Returns: + str: The generated hash as a string. + """ + # Convert the input data dictionary to a JSON string (sorted for consistency) + data_str = json.dumps(input_data, sort_keys=True) + # Generate a SHA-256 hash of the JSON string + return hashlib.sha256(data_str.encode('utf-8')).hexdigest() + + def text_to_speech(self, text, output_file, voice_name, speed, lang): + """ + Generates speech from text using Kokoro ONNX and saves the audio file. + Normalizes the audio to make it audible. + """ + # Generate audio samples using Kokoro + samples, sample_rate = self.kokoro.create( + text, voice=voice_name, speed=speed, lang=lang + ) + + # Normalize audio to the range [-1, 1] + max_val = np.max(np.abs(samples)) + if max_val > 0: + samples = samples / max_val + + # Convert to 16-bit integer PCM format + samples = (samples * 32767).astype("int16") + + # Save the normalized audio as a .wav file + write_wav(output_file, sample_rate, samples) + print(f"Saved at {output_file}") + + return output_file + + + def generate_from_text(self, text: str, cache_dir: str = None, path: str = None) -> dict: + if cache_dir is None: + cache_dir = self.cache_dir + + input_data = {"input_text": text, "service": "kokoro_self", "voice": self.voice, "lang": self.lang} + cached_result = self.get_cached_result(input_data, cache_dir) + if cached_result is not None: + return cached_result + + if path is None: + audio_path = self.get_data_hash(input_data) + ".mp3" + else: + audio_path = path + + # Generate .wav file using the text_to_speech function + audio_path_wav = str(Path(cache_dir) / audio_path.replace(".mp3", ".wav")) + self.engine( + text=text, + output_file=audio_path_wav, + voice_name=self.voice, + speed=self.speed, + lang=self.lang, + ) + + # Convert .wav to .mp3 + mp3_audio_path = str(Path(cache_dir) / audio_path) + wav2mp3(audio_path_wav, mp3_audio_path) + + # Remove original .wav file + remove_bookmarks(audio_path_wav) + + json_dict = { + "input_text": text, + "input_data": input_data, + "original_audio": audio_path, + } + + return json_dict \ No newline at end of file diff --git a/src/utils/utils.py b/src/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..88af99f0571f7facc216c7b829afb5fdece83a09 --- /dev/null +++ b/src/utils/utils.py @@ -0,0 +1,132 @@ +import json +import re +try: + from pylatexenc.latexencode import utf8tolatex, UnicodeToLatexEncoder +except: + print("Warning: Missing pylatexenc, please do pip install pylatexenc") + +def _print_response(response_type: str, theorem_name: str, content: str, separator: str = "=" * 50) -> None: + """Print formatted responses from the video generation process. + + Prints a formatted response with separators and headers for readability. + + Args: + response_type (str): Type of response (e.g., 'Scene Plan', 'Implementation Plan') + theorem_name (str): Name of the theorem being processed + content (str): The content to print + separator (str, optional): Separator string for visual distinction. Defaults to 50 equals signs. + + Returns: + None + """ + print(f"\n{separator}") + print(f"{response_type} for {theorem_name}:") + print(f"{separator}\n") + print(content) + print(f"\n{separator}") + +def _extract_code(response_text: str) -> str: + """Extract code blocks from a text response. + + Extracts Python code blocks delimited by ```python markers. If no code blocks are found, + returns the entire response text. + + Args: + response_text (str): The text response containing code blocks + + Returns: + str: The extracted code blocks joined by newlines, or the full response if no blocks found + """ + code = "" + code_blocks = re.findall(r'```python\n(.*?)\n```', response_text, re.DOTALL) + if code_blocks: + code = "\n\n".join(code_blocks) + elif "```" not in response_text: # if no code block, return the whole response + code = response_text + return code + +def extract_json(response: str) -> dict: + """Extract and parse JSON content from a text response. + + Attempts to parse the response as JSON directly, then tries to extract JSON from code blocks + if direct parsing fails. + + Args: + response (str): The text response containing JSON content + + Returns: + dict: The parsed JSON content as a dictionary, or empty list if parsing fails + + Note: + Will attempt to parse content between ```json markers first, then between generic ``` markers + """ + try: + evaluation_json = json.loads(response) + except json.JSONDecodeError: + # If JSON parsing fails, try to extract the content between ```json and ``` + match = re.search(r'```json\n(.*?)\n```', response, re.DOTALL) + if not match: + # If no match for ```json, try to extract content between ``` and ``` + match = re.search(r'```\n(.*?)\n```', response, re.DOTALL) + + if match: + evaluation_content = match.group(1) + evaluation_json = json.loads(evaluation_content) + else: + # return empty list + evaluation_json = [] + print(f"Warning: Failed to extract valid JSON content from {response}") + return evaluation_json + +def _fix_unicode_to_latex(text: str, parse_unicode: bool = True) -> str: + """Convert Unicode symbols to LaTeX source code. + + Converts Unicode subscripts and superscripts to LaTeX format, with optional full Unicode parsing. + + Args: + text (str): The text containing Unicode symbols to convert + parse_unicode (bool, optional): Whether to perform full Unicode to LaTeX conversion. Defaults to True. + + Returns: + str: The text with Unicode symbols converted to LaTeX format + """ + # Map of unicode subscripts to latex format + subscripts = { + "₀": "_0", "₁": "_1", "₂": "_2", "₃": "_3", "₄": "_4", + "₅": "_5", "₆": "_6", "₇": "_7", "₈": "_8", "₉": "_9", + "₊": "_+", "₋": "_-" + } + # Map of unicode superscripts to latex format + superscripts = { + "⁰": "^0", "¹": "^1", "²": "^2", "³": "^3", "⁴": "^4", + "⁵": "^5", "⁶": "^6", "⁷": "^7", "⁸": "^8", "⁹": "^9", + "⁺": "^+", "⁻": "^-" + } + + for unicode_char, latex_format in {**subscripts, **superscripts}.items(): + text = text.replace(unicode_char, latex_format) + + if parse_unicode: + text = utf8tolatex(text) + + return text + +def extract_xml(response: str) -> str: + """Extract XML content from a text response. + + Extracts XML content between ```xml markers. Returns the full response if no XML blocks found. + + Args: + response (str): The text response containing XML content + + Returns: + str: The extracted XML content, or the full response if no XML blocks found + """ + try: + match = re.search(r'```xml\n(.*?)\n```', response, re.DOTALL) + if match: + return match.group(1) + else: + return response + except Exception: + return response diff --git a/src/utils/visual_error_detection.py b/src/utils/visual_error_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..86148405c6c22395d426157e22e3ceb4fb37b268 --- /dev/null +++ b/src/utils/visual_error_detection.py @@ -0,0 +1,336 @@ +""" +Visual Error Detection Utilities for Manim Code Analysis + +This module provides utilities for detecting and analyzing visual errors in Manim animations, +specifically focusing on element overlap, positioning issues, and spatial constraint violations. +""" + +import re +import logging +from typing import Dict, List, Tuple, Any, Optional +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Visual error detection patterns +VISUAL_ERROR_PATTERNS = { + 'overlap_keywords': [ + 'overlap', 'overlapping', 'collision', 'colliding', 'obscured', 'hidden', + 'blocked', 'covering', 'covered', 'behind', 'on top of' + ], + 'boundary_keywords': [ + 'out of bounds', 'outside frame', 'clipped', 'cut off', 'beyond edge', + 'outside safe area', 'margin violation', 'boundary violation' + ], + 'spacing_keywords': [ + 'too close', 'insufficient spacing', 'cramped', 'crowded', 'bunched up', + 'spacing violation', 'minimum distance', 'tight spacing' + ], + 'positioning_keywords': [ + 'misaligned', 'mispositioned', 'wrong position', 'incorrect placement', + 'poor arrangement', 'bad layout', 'disorganized' + ] +} + +# Critical visual issues that require immediate fixing +CRITICAL_VISUAL_ISSUES = [ + 'text completely obscured', + 'formula unreadable', + 'important element hidden', + 'content outside frame', + 'major overlap', + 'critical positioning error' +] + +# Safe area and spacing constraints (Manim units) +VISUAL_CONSTRAINTS = { + 'safe_area_margin': 0.5, # Units from frame edge + 'minimum_spacing': 0.3, # Units between elements + 'frame_width': 14.22, # Manim frame width + 'frame_height': 8.0, # Manim frame height + 'center_x': 0.0, # Frame center X + 'center_y': 0.0, # Frame center Y + 'x_bounds': (-7.0, 7.0), # Safe X coordinate range + 'y_bounds': (-4.0, 4.0) # Safe Y coordinate range +} + +class VisualErrorDetector: + """Utility class for detecting and categorizing visual errors in VLM responses.""" + + def __init__(self): + self.error_patterns = VISUAL_ERROR_PATTERNS + self.critical_issues = CRITICAL_VISUAL_ISSUES + self.constraints = VISUAL_CONSTRAINTS + + def detect_error_types(self, analysis_text: str) -> Dict[str, List[str]]: + """ + Detect different types of visual errors from VLM analysis text. + + Args: + analysis_text: Raw text from VLM visual analysis + + Returns: + Dictionary categorizing detected errors by type + """ + errors = { + 'overlap_errors': [], + 'boundary_errors': [], + 'spacing_errors': [], + 'positioning_errors': [], + 'critical_errors': [] + } + + analysis_lower = analysis_text.lower() + + # Check for overlap errors + for keyword in self.error_patterns['overlap_keywords']: + if keyword in analysis_lower: + errors['overlap_errors'].append(self._extract_error_context(analysis_text, keyword)) + + # Check for boundary errors + for keyword in self.error_patterns['boundary_keywords']: + if keyword in analysis_lower: + errors['boundary_errors'].append(self._extract_error_context(analysis_text, keyword)) + + # Check for spacing errors + for keyword in self.error_patterns['spacing_keywords']: + if keyword in analysis_lower: + errors['spacing_errors'].append(self._extract_error_context(analysis_text, keyword)) + + # Check for positioning errors + for keyword in self.error_patterns['positioning_keywords']: + if keyword in analysis_lower: + errors['positioning_errors'].append(self._extract_error_context(analysis_text, keyword)) + + # Check for critical issues + for issue in self.critical_issues: + if issue in analysis_lower: + errors['critical_errors'].append(self._extract_error_context(analysis_text, issue)) + + # Remove empty entries and duplicates + for error_type in errors: + errors[error_type] = list(set([e for e in errors[error_type] if e])) + + return errors + + def _extract_error_context(self, text: str, keyword: str, context_length: int = 100) -> str: + """ + Extract context around a detected error keyword. + + Args: + text: Full analysis text + keyword: Error keyword found + context_length: Characters to include around keyword + + Returns: + Context string around the error keyword + """ + try: + # Find keyword position (case insensitive) + lower_text = text.lower() + keyword_pos = lower_text.find(keyword.lower()) + + if keyword_pos == -1: + return keyword + + # Extract context around keyword + start = max(0, keyword_pos - context_length // 2) + end = min(len(text), keyword_pos + len(keyword) + context_length // 2) + + context = text[start:end].strip() + + # Clean up context + context = re.sub(r'\s+', ' ', context) + + return context + except Exception as e: + logger.warning(f"Error extracting context for keyword '{keyword}': {e}") + return keyword + + def categorize_severity(self, errors: Dict[str, List[str]]) -> Dict[str, str]: + """ + Categorize the severity of detected visual errors. + + Args: + errors: Dictionary of detected errors by type + + Returns: + Dictionary mapping error types to severity levels + """ + severity_map = {} + + # Critical errors are always high severity + if errors['critical_errors']: + severity_map['critical'] = 'HIGH' + + # Overlap errors can vary in severity + if errors['overlap_errors']: + # Check if any overlap errors mention important elements + important_keywords = ['text', 'formula', 'equation', 'title', 'label'] + has_important_overlap = any( + any(keyword in error.lower() for keyword in important_keywords) + for error in errors['overlap_errors'] + ) + severity_map['overlap'] = 'HIGH' if has_important_overlap else 'MEDIUM' + + # Boundary errors are typically medium to high severity + if errors['boundary_errors']: + severity_map['boundary'] = 'MEDIUM' + + # Spacing errors are usually low to medium severity + if errors['spacing_errors']: + severity_map['spacing'] = 'LOW' + + # Positioning errors vary based on context + if errors['positioning_errors']: + severity_map['positioning'] = 'MEDIUM' + + return severity_map + + def generate_fix_suggestions(self, errors: Dict[str, List[str]]) -> List[str]: + """ + Generate specific code fix suggestions based on detected errors. + + Args: + errors: Dictionary of detected errors by type + + Returns: + List of specific fix suggestions + """ + suggestions = [] + + if errors['overlap_errors']: + suggestions.extend([ + "Use `.next_to()` method to position elements relative to each other with proper spacing", + "Apply `buff` parameter in positioning methods to ensure minimum 0.3 unit spacing", + "Reorganize elements into VGroups for better spatial management", + "Use `bring_to_front()` or `bring_to_back()` to manage z-order layering" + ]) + + if errors['boundary_errors']: + suggestions.extend([ + "Ensure all elements are positioned within safe area bounds (-7 to 7 for X, -4 to 4 for Y)", + "Use `move_to(ORIGIN)` and then apply relative positioning to keep elements centered", + "Check element sizes and scale them down if they extend beyond frame boundaries", + "Apply safe area margins of 0.5 units from frame edges" + ]) + + if errors['spacing_errors']: + suggestions.extend([ + "Use `buff=0.3` or higher in `.next_to()` methods for proper spacing", + "Apply `.shift()` method to adjust element positions for better spacing", + "Consider using `.arrange()` method for VGroups to maintain consistent spacing", + "Verify minimum 0.3 unit spacing between all visual elements" + ]) + + if errors['positioning_errors']: + suggestions.extend([ + "Use relative positioning methods exclusively: `.next_to()`, `.align_to()`, `.shift()`", + "Position elements relative to ORIGIN, other objects, or scene margins", + "Ensure logical flow and visual hierarchy in element arrangement", + "Group related elements using VGroup for coordinated positioning" + ]) + + # Remove duplicates while preserving order + unique_suggestions = [] + for suggestion in suggestions: + if suggestion not in unique_suggestions: + unique_suggestions.append(suggestion) + + return unique_suggestions + + def validate_manim_constraints(self, code: str) -> Dict[str, List[str]]: + """ + Validate Manim code against spatial constraints. + + Args: + code: Manim code to validate + + Returns: + Dictionary of constraint violations found in code + """ + violations = { + 'absolute_coordinates': [], + 'unsafe_positioning': [], + 'missing_spacing': [], + 'out_of_bounds': [] + } + + lines = code.split('\n') + + for i, line in enumerate(lines, 1): + # Check for absolute coordinates (potential issues) + if re.search(r'move_to\s*\(\s*[-+]?\d+\.?\d*\s*,\s*[-+]?\d+\.?\d*', line): + violations['absolute_coordinates'].append(f"Line {i}: {line.strip()}") + + # Check for potentially unsafe positioning + if re.search(r'shift\s*\(\s*[^)]*[5-9]\d*', line): + violations['unsafe_positioning'].append(f"Line {i}: Large shift detected - {line.strip()}") + + # Check for missing buff parameters in next_to calls + if 'next_to' in line and 'buff' not in line: + violations['missing_spacing'].append(f"Line {i}: Missing buff parameter - {line.strip()}") + + # Check for coordinates that might be out of bounds + coord_matches = re.findall(r'[-+]?\d+\.?\d*', line) + for coord in coord_matches: + try: + val = float(coord) + if abs(val) > 10: # Potentially problematic large coordinates + violations['out_of_bounds'].append(f"Line {i}: Large coordinate {val} - {line.strip()}") + except ValueError: + continue + + return violations + + +def create_visual_fix_context( + errors: Dict[str, List[str]], + suggestions: List[str], + constraints: Dict[str, Any] +) -> str: + """ + Create a formatted context string for visual fix operations. + + Args: + errors: Detected visual errors + suggestions: Fix suggestions + constraints: Visual constraints to enforce + + Returns: + Formatted context string for LLM prompt + """ + context_parts = [] + + if any(errors.values()): + context_parts.append("**DETECTED VISUAL ERRORS:**") + + for error_type, error_list in errors.items(): + if error_list: + error_type_formatted = error_type.replace('_', ' ').title() + context_parts.append(f"\n{error_type_formatted}:") + for error in error_list: + context_parts.append(f" - {error}") + + if suggestions: + context_parts.append("\n\n**RECOMMENDED FIXES:**") + for i, suggestion in enumerate(suggestions, 1): + context_parts.append(f"{i}. {suggestion}") + + context_parts.append("\n\n**SPATIAL CONSTRAINTS TO ENFORCE:**") + context_parts.append(f"- Safe area margin: {constraints['safe_area_margin']} units from edges") + context_parts.append(f"- Minimum spacing: {constraints['minimum_spacing']} units between elements") + context_parts.append(f"- X coordinate bounds: {constraints['x_bounds']}") + context_parts.append(f"- Y coordinate bounds: {constraints['y_bounds']}") + + return '\n'.join(context_parts) + + +# Export main utilities +__all__ = [ + 'VisualErrorDetector', + 'VISUAL_ERROR_PATTERNS', + 'CRITICAL_VISUAL_ISSUES', + 'VISUAL_CONSTRAINTS', + 'create_visual_fix_context' +] diff --git a/task_generator/__init__.py b/task_generator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..949046b4903d61bb48137be257f57aa6f2b1546c --- /dev/null +++ b/task_generator/__init__.py @@ -0,0 +1,297 @@ +from .prompts_raw import ( + _prompt_code_generation, + _prompt_fix_error, + _prompt_visual_fix_error, + _prompt_scene_plan, + _prompt_scene_vision_storyboard, + _prompt_scene_technical_implementation, + _prompt_scene_animation_narration, + _prompt_animation_simple, + _prompt_animation_fix_error, + _prompt_animation_rag_query_generation, + _prompt_animation_rag_query_generation_fix_error, + _banned_reasonings, + _prompt_context_learning_scene_plan, + _prompt_context_learning_vision_storyboard, + _prompt_context_learning_technical_implementation, + _prompt_context_learning_animation_narration, + _prompt_context_learning_code, + _prompt_detect_plugins, + _prompt_rag_query_generation_code, + _prompt_rag_query_generation_vision_storyboard, + _prompt_rag_query_generation_technical, + _prompt_rag_query_generation_narration, + _prompt_rag_query_generation_fix_error +) +from typing import Union, List + +def get_prompt_scene_plan(topic: str, description: str) -> str: + """ + Generate a prompt for scene planning based on the given parameters. + + Args: + topic (str): The topic of the video. + description (str): A brief description of the video content. + + Returns: + str: The formatted prompt for scene planning. + """ + prompt = _prompt_scene_plan.format(topic=topic, description=description) + return prompt + +def get_prompt_scene_vision_storyboard(scene_number: int, topic: str, description: str, scene_outline: str, relevant_plugins: List[str]) -> str: + prompt = _prompt_scene_vision_storyboard.format( + scene_number=scene_number, + topic=topic, + description=description, + scene_outline=scene_outline, + relevant_plugins=", ".join(relevant_plugins) + ) + return prompt + +def get_prompt_scene_technical_implementation(scene_number: int, topic: str, description: str, scene_outline: str, scene_vision_storyboard: str, relevant_plugins: List[str], additional_context: Union[str, List[str]] = None) -> str: + prompt = _prompt_scene_technical_implementation.format( + scene_number=scene_number, + topic=topic, + description=description, + scene_outline=scene_outline, + scene_vision_storyboard=scene_vision_storyboard, + relevant_plugins=", ".join(relevant_plugins) + ) + if additional_context is not None: + if isinstance(additional_context, str): + prompt += f"\nAdditional context: {additional_context}" + elif isinstance(additional_context, list): + prompt += f"\nAdditional context: {additional_context[0]}" + if len(additional_context) > 1: + prompt += f"\n" + "\n".join(additional_context[1:]) + return prompt + +def get_prompt_scene_animation_narration(scene_number: int, topic: str, description: str, scene_outline: str, scene_vision_storyboard: str, technical_implementation_plan: str, relevant_plugins: List[str]) -> str: + prompt = _prompt_scene_animation_narration.format( + scene_number=scene_number, + topic=topic, + description=description, + scene_outline=scene_outline, + scene_vision_storyboard=scene_vision_storyboard, + technical_implementation_plan=technical_implementation_plan, + relevant_plugins=", ".join(relevant_plugins) + ) + return prompt + +def get_prompt_code_generation(topic: str, + description: str, + scene_outline: str, + scene_implementation: str, + scene_number: int, + additional_context: Union[str, List[str]] = None) -> str: + """ + Generate a prompt for code generation based on the given video plan and implementation details. + + Args: + topic (str): The topic of the video. + description (str): A brief description of the video content. + scene_outline (str): The scene outline. + scene_implementation (str): The detailed scene implementation. + scene_number (int): The scene number + additional_context (Union[str, List[str]]): Additional context to include in the prompt + Returns: + str: The formatted prompt for code generation. + """ + prompt = _prompt_code_generation.format( + topic=topic, + description=description, + scene_outline=scene_outline, + scene_implementation=scene_implementation, + scene_number=scene_number + ) + if additional_context is not None: + if isinstance(additional_context, str): + prompt += f"\nAdditional context: {additional_context}" + elif isinstance(additional_context, list): + prompt += f"\nAdditional context: {additional_context[0]}" + if len(additional_context) > 1: + prompt += f"\n" + "\n".join(additional_context[1:]) + return prompt + +def get_prompt_fix_error(implementation_plan: str, manim_code: str, error: str, additional_context: Union[str, List[str]] = None) -> str: + """ + Generate a prompt to fix errors in the given manim code. + + Args: + implementation_plan (str): The implementation plan of the scene. + code (str): The manim code with errors. + error (str): The error message encountered. + + Returns: + str: The formatted prompt to fix the code errors. + """ + prompt = _prompt_fix_error.format( + implementation_plan=implementation_plan, + manim_code=manim_code, + error_message=error + ) + if additional_context is not None: + if isinstance(additional_context, str): + prompt += f"\nAdditional context: {additional_context}" + elif isinstance(additional_context, list) and additional_context: + prompt += f"\nAdditional context: {additional_context[0]}" + if len(additional_context) > 1: + prompt += f"\n" + "\n".join(additional_context[1:]) + return prompt + +def get_prompt_visual_fix_error(implementation: str, generated_code: str) -> str: + prompt = _prompt_visual_fix_error.format( + implementation=implementation, + generated_code=generated_code + ) + return prompt + +def get_banned_reasonings() -> List[str]: + return _banned_reasonings.split("\n") + +def get_prompt_rag_query_generation_vision_storyboard(scene_plan: str, relevant_plugins: str) -> str: + prompt = _prompt_rag_query_generation_vision_storyboard.format( + scene_plan=scene_plan, + relevant_plugins=relevant_plugins + ) + return prompt + +def get_prompt_rag_query_generation_technical(storyboard: str, relevant_plugins: str) -> str: + """For generating RAG queries during storyboard to technical implementation stage""" + prompt = _prompt_rag_query_generation_technical.format( + storyboard=storyboard, + relevant_plugins=relevant_plugins + ) + return prompt + +def get_prompt_rag_query_generation_narration(storyboard: str, relevant_plugins: str) -> str: + """For generating RAG queries during storyboard to narration stage""" + prompt = _prompt_rag_query_generation_narration.format( + storyboard=storyboard, + relevant_plugins=relevant_plugins + ) + return prompt + +def get_prompt_rag_query_generation_code(implementation_plan: str, relevant_plugins: str) -> str: + """For generating RAG queries during technical implementation to code generation stage""" + prompt = _prompt_rag_query_generation_code.format( + implementation_plan=implementation_plan, + relevant_plugins=relevant_plugins + ) + return prompt + +def get_prompt_rag_query_generation_fix_error(error: str, code: str, relevant_plugins: str) -> str: + prompt = _prompt_rag_query_generation_fix_error.format( + error=error, + code=code, + relevant_plugins=relevant_plugins + ) + return prompt + +def get_prompt_context_learning_scene_plan(examples: str) -> str: + prompt = _prompt_context_learning_scene_plan.format( + examples=examples + ) + return prompt + +def get_prompt_context_learning_vision_storyboard(examples: str) -> str: + prompt = _prompt_context_learning_vision_storyboard.format( + examples=examples + ) + return prompt + +def get_prompt_context_learning_technical_implementation(examples: str) -> str: + prompt = _prompt_context_learning_technical_implementation.format( + examples=examples + ) + return prompt + +def get_prompt_context_learning_animation_narration(examples: str) -> str: + prompt = _prompt_context_learning_animation_narration.format( + examples=examples + ) + return prompt + +def get_prompt_context_learning_code(examples: str) -> str: + prompt = _prompt_context_learning_code.format( + examples=examples + ) + return prompt + +def get_prompt_detect_plugins(topic: str, description: str, plugin_descriptions: str) -> str: + """ + Generate a prompt for detecting relevant plugins based on topic and description. + + Args: + topic (str): The video topic + description (str): The video description + plugin_descriptions (str): JSON string of available plugin descriptions + + Returns: + str: The formatted prompt for plugin detection + """ + prompt = _prompt_detect_plugins.format( + topic=topic, + description=description, + plugin_descriptions=plugin_descriptions + ) + return prompt + +def get_prompt_animation(topic: str, description: str, additional_context: Union[str, List[str]] = None) -> str: + prompt = _prompt_animation_simple.format( + topic=topic, + description=description + ) + if additional_context is not None: + if isinstance(additional_context, str): + prompt += f"\nAdditional context: {additional_context}" + elif isinstance(additional_context, list) and additional_context: + prompt += f"\nAdditional context: {additional_context[0]}" + if len(additional_context) > 1: + prompt += f"\n" + "\n".join(additional_context[1:]) + return prompt + +def get_prompt_animation_fix_error(text_explanation: str, manim_code: str, error: str, additional_context: Union[str, List[str]] = None) -> str: + """ + Generate a prompt to fix errors in the given manim code. + + Args: + text_explanation (str): The implementation plan of the scene. + code (str): The manim code with errors. + error (str): The error message encountered. + + Returns: + str: The formatted prompt to fix the code errors. + """ + prompt = _prompt_animation_fix_error.format( + text_explanation=text_explanation, + manim_code=manim_code, + error_message=error + ) + if additional_context is not None: + if isinstance(additional_context, str): + prompt += f"\nAdditional context: {additional_context}" + elif isinstance(additional_context, list): + prompt += f"\nAdditional context: {additional_context[0]}" + if len(additional_context) > 1: + prompt += f"\n" + "\n".join(additional_context[1:]) + return prompt + +def get_prompt_animation_rag_query_generation(topic: str, context: str, relevant_plugins: str) -> str: + if context is None: + context = "" + prompt = _prompt_animation_rag_query_generation.format( + topic=topic, + context=context, + relevant_plugins=relevant_plugins + ) + return prompt + +def get_prompt_animation_rag_query_generation_fix_error(text_explanation: str, error: str, code: str) -> str: + prompt = _prompt_animation_rag_query_generation_fix_error.format( + text_explanation=text_explanation, + error=error, + code=code + ) + return prompt \ No newline at end of file diff --git a/task_generator/__pycache__/__init__.cpython-312.pyc b/task_generator/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7e0a8f1dc5ddbfa24e7ea556b0a15e6b75be05d Binary files /dev/null and b/task_generator/__pycache__/__init__.cpython-312.pyc differ diff --git a/task_generator/parse_prompt.py b/task_generator/parse_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..075ce754894bb73bc60d47c100c951b98c2a7e9b --- /dev/null +++ b/task_generator/parse_prompt.py @@ -0,0 +1,54 @@ +import os +from tqdm import tqdm + + +def call_parse_prompt(): + """ + Find the prompts_raw directory and generate an __init__.py file containing prompt texts. + + Searches for prompts_raw directory in current and parent directories. Once found, + calls create_python_file_with_texts() to generate the __init__.py file. + """ + current_file_path = os.path.abspath(__file__) + current_folder_path = os.path.dirname(current_file_path) + folder_path = os.path.join(current_folder_path, "prompts_raw") + + # If prompts_raw not found in current directory, search parent directories + if not os.path.exists(folder_path): + parent_dir = current_folder_path + while parent_dir != os.path.dirname(parent_dir): # Stop at root directory + parent_dir = os.path.dirname(parent_dir) + test_path = os.path.join(parent_dir, "prompts_raw") + if os.path.exists(test_path): + folder_path = test_path + break + + output_file = os.path.join(folder_path, "__init__.py") + create_python_file_with_texts(folder_path, output_file) + + +def create_python_file_with_texts(folder_path: str, output_file: str) -> None: + """ + Generate a Python file containing prompt texts from .txt files. + + Args: + folder_path (str): Path to directory containing prompt .txt files + output_file (str): Path where the generated Python file will be saved + + The function reads all .txt files in the given folder, converts their contents + into Python variables, and writes them to the output file. Variable names are + derived from file paths with special characters replaced. + """ + with open(output_file, 'w', encoding='utf-8') as out_file: + out_file.write("# This file is generated automatically through parse_prompt.py\n\n") + txt_files = [file for root, dirs, files in os.walk(folder_path) for file in files if file.endswith(".txt")] + for file in tqdm(txt_files, desc="Processing files"): + file_path = os.path.join(folder_path, file) + var_name = "_" + file_path.replace(folder_path, "").replace(os.sep, "_").replace(".txt", "").strip("_") + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read().replace('"""', '\"\"\"') + out_file.write(f'{var_name} = """{content}"""\n\n') + + +if __name__ == "__main__": + call_parse_prompt() \ No newline at end of file diff --git a/task_generator/prompts_raw/__init__.py b/task_generator/prompts_raw/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f414287c34b41c9491ea7306364a34a1e3340ad5 --- /dev/null +++ b/task_generator/prompts_raw/__init__.py @@ -0,0 +1,1885 @@ +# This file is generated automatically through parse_prompt.py +import sys +import os +# Adjusting the path to add the project root (which contains the "src" folder) to the system path. +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))) +from manim import * +from manim import config as global_config +from manim_voiceover import VoiceoverScene +from src.utils.kokoro_voiceover import KokoroService # You MUST import like this as this is our custom voiceover service. + +_prompt_context_learning_scene_plan = """Here are some example scene plans to help guide your scene planning: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current topic.""" + +_prompt_scene_vision_storyboard = """You are an expert in educational video production and Manim animation. +**Reminder:** Each scene's vision and storyboard plan is entirely self-contained. There is no dependency on any implementation from previous or subsequent scenes. However, the narration will treat all scenes as part of a single, continuous video. + +Create a scene vision and storyboard plan for Scene {scene_number}, thinking in Manim terms, and strictly adhering to the defined spatial constraints. + +Topic: {topic} +Description: {description} + +Scene Overview: +{scene_outline} + +The following manim plugins are relevant to the scene: +{relevant_plugins} + +**Spatial Constraints (Strictly Enforced):** +* **Safe area margins:** 0.5 units on all sides from the scene edges. *All objects must be positioned within these margins.* +* **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). *Ensure a minimum spacing of 0.3 units to prevent overlaps and maintain visual clarity. This spacing must be maintained between all objects in the scene, including text, shapes, and graphs.* + +**Positioning Requirements:** +1. Safe area margins (0.5 units). +2. Minimum spacing between objects (0.3 units). +3. Relative positioning (`next_to`, `align_to`, `shift`) from `ORIGIN`, margins, or object references. **No absolute coordinates are allowed.** All positioning MUST be relative and clearly specified using reference points and relative positioning methods. +4. Transition buffers (`Wait` times) between sub-scenes and animation steps for visual clarity and pacing. + +**Diagrams/Sketches (Optional but Recommended for Complex Scenes):** +* For complex scenes, consider including a simple diagram or sketch (even text-based) of the intended layout to visually clarify spatial relationships and ensure adherence to spacing and margin constraints. + +**Focus:** +* Focus on clear visual communication of the scene's learning objective through effective use of Manim objects and animations, while strictly adhering to the defined spatial constraints. +* Provide detailed visual descriptions in Manim terms to guide human implementation. +* Prioritize explanation and visualization of the theorem. Do not include any promotional elements or quiz sessions. +* Minimize text usage - rely primarily on visual elements, mathematical notation, and animations to convey concepts. Use text sparingly and only when necessary for clarity. + +**Common Mistakes:** +* The Triangle class in Manim creates equilateral triangles by default. To create a right-angled triangle, use the Polygon class instead. + +**Manim Plugins:** +* Consider using established Manim plugins if they significantly simplify the implementation or offer visual elements not readily available in core Manim. If a plugin is used, clearly indicate this in the storyboard with a note like "**Plugin Suggestion:** Consider using the `manim-plugin-name` plugin for [brief explanation of benefit]." + +You MUST generate the scene vision and storyboard plan for the scene in the following format (from ```xml to ```): + +```xml + +[SCENE_VISION] +1. **Scene Overview**: + - Scene story, key takeaway, video role. *Consider how this scene fits within the overall video narrative.* + - **Visual learning objectives for viewers:** Think about *specific Manim object types* that best represent the learning objective. Example: "Visualize roots as `Dot` objects on an `Axes` graph." Be specific about Manim object classes (e.g., `MathTex`, `Shapes`, `Graphs`, `Axes`, `VGroup`). If a plugin provides a relevant object type, mention it (e.g., "Visualize X using `PluginObject` from `manim-plugin-name`"). + - How Manim visuals & animations support learning? Consider `MathTex`, `Shapes`, `Graphs`, `Axes`, `VGroup`. Focus on spatial arrangement and clarity, ensuring adherence to safe area margins and minimum spacing (0.3 units). Consider using `VGroup` to group related formula components for easier animation and spatial control. Example: "Use `VGroup` to group related formula components for easier animation and spatial control, ensuring a minimum spacing of 0.3 units between VGroup and other scene elements." If a plugin offers a more efficient way to achieve a visual effect, mention it. + - Key concepts to emphasize visually using visual hierarchy and spatial arrangement in Manim, while respecting safe area margins and minimum spacing (0.3 units). **Use `MathTex` for mathematical expressions and equations. Use `Tex` for general text, titles, labels, and any non-mathematical text. When mixing text with mathematical symbols in `MathTex`, use the `\\text{{}}` command (e.g., `MathTex(r"\\text{{Area}} = \\pi r^2")`)** + +[STORYBOARD] +1. **Visual Flow & Pacing (Manim Animation Sequence)**: + - Describe the sequence of Manim visuals and animations (`Text`, `Circle`, `Arrow`, `Create`, `FadeIn`, `Transform`, etc.). Be specific about animation types and their parameters (e.g., `run_time`). If a plugin provides a specific animation type, mention it (e.g., "Use `PluginAnimation` from `manim-plugin-name`"). + - Key visual moments: composition and arrangement of Manim elements, ensuring all elements are within safe area margins and maintain a minimum 0.3 unit spacing. Example: "`MathTex` formula center (`.move_to(ORIGIN)`) with `Write` animation, ensuring 0.3 unit spacing from scene edges and other elements." + - Visual transitions between ideas using Manim animations (`Transform`, `Shift`, `FadeOutAndShift`, etc.). Specify transition animations and their timings. + - Scene pacing (pauses, action) and Manim animation timing's role. Use `Wait()` for transition buffers and visual clarity. + - **Sub-scene Breakdown**: Divide the scene into logical sub-scenes, each focusing on a specific step in the explanation or visualization. + - For each sub-scene, start with a **Visual Element**: The primary visual component that drives the explanation (e.g., mathematical notation, diagram, graph). If this element comes from a plugin, clearly state this (e.g., "Visual Element: `PluginObject` from `manim-plugin-name`"). + - Detail the **Animation Sequence**: Describe step-by-step the Manim animations and visual elements for each sub-scene. Be specific about: + - **Text Usage Guidelines:** + - **Use `MathTex` *only* for mathematical expressions and equations.** + - **Use `Tex` for all other text, including labels, explanations, and titles.** + - **When mixing text with mathematical symbols in `MathTex`, wrap the text portions in `\\text{{}}`. Example: `MathTex(r"\\text{{Area of circle}} = \\pi r^2")`.** + - Manim object classes (`MathTex`, `Circle`, `Arrow`, `Axes`, `Plot`, `Line`, `VGroup`, etc.), prioritizing mathematical notation and visual elements over text. Include plugin object classes where appropriate. + - Animation types (`Create`, `Write`, `FadeIn`, `Transform`, `FadeOut`, `Circumscribe`, `FocusOn`, etc.) and their parameters (e.g., `run_time`). Include plugin animation types where appropriate. + - Positioning of objects using relative positioning methods (`.next_to()`, `.align_to()`, `.shift()`, `.to_corner()`, `.move_to(ORIGIN)`, etc.) and references to other objects or scene elements. **No absolute coordinates allowed.** + - Color and style specifications (e.g., `color=BLUE`, `stroke_width=2`, `dashed=True`). + - Explicitly mention safe area margins and minimum spacing (0.3 units) for all objects within each sub-scene. + + +```""" + +_prompt_rag_query_generation_storyboard = """You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a storyboard plan for a Manim video scene into effective queries that will retrieve relevant information from Manim documentation. The storyboard plan describes the scene's visual elements and narrative flow. + +Here is the storyboard plan: + +{storyboard} + +Based on the storyboard plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least some queries are focused on retrieving information about **Manim core functionalities**, like general visual elements or animations. Frame these queries using Manim terminology (classes, methods, concepts). +2. If the storyboard suggests using specific visual effects or complex animations that might be plugin-related, include at least 1 query specifically targeting **plugin documentation**. Make sure to mention the plugin name if known or suspected. +3. Queries should be general enough to explore different possibilities within Manim and its plugins based on the storyboard's visual and narrative descriptions, but also specific enough to target Manim documentation effectively. + +The above storyboard might be relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +Output the queries in the following format: +```json +[ + {{"query": "content of query 1", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 2", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 3", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 4", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 5", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 6", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 7", "type": "manim_core/{relevant_plugins}"}}, +] +``` """ + +_code_background = """PLEASE DO NOT create another color background Rectangles. Default background (Black) is enough. +PLEASE DO NOT use BLACK color for any text. +""" + +_prompt_context_learning_vision_storyboard = """Here are some example vision and storyboard plans to help guide your planning: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current scene.""" + +_prompt_context_learning_code = """Here are some example Manim code implementations to help guide your code generation: + +{examples} + +Please follow similar patterns and best practices while implementing the current scene.""" + +_code_limit = """Note that the frame width and height are 14.222222222222221 and 8.0 respectively. And the center of the frame is (0, 0, 0). +It means to avoid putting any object out of the frame, you should limit the x and y coordinates of the objects. +limit x to be within -7.0 and 7.0 for objects, and limit y to be within -4.0 and 4.0 for objects. +Place the objects near the center of the frame, without overlapping with each other.""" + +_prompt_animation_rag_query_generation = """You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a topic for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins. + +Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely to be used to implement the scene, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the *entire* scene plan. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins. + +Here is the Topic (and the context): + +{topic}. {context} + +Based on the topic and the context, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least 1-2 queries are focused on retrieving information about Manim *function usage* in Manim scenes +2. If the topic and the context can be linked to the use of plugin functionality, include at least 1 query specifically targeting plugin documentation +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant + +The above text explanations are relevant to these plugins: {relevant_plugins} + +Output the queries in the following format: +```json +[ + {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}}, +] +```""" + +_code_font_size = """If there is title text, font size is highly recommended to be 28. +If there are side labels, font size is highly recommended to be 24. +If there are formulas, font size is highly recommended to be 24. + +However, if the text has more than 10 words, font size should be reduced further and mutiple lines should be used.""" + +_prompt_best_practices = """# Best practices for generating educational videos with manim + +1. Specify positions as relative to other objects whenever it makes sense. + * For example, if you want to place a label for a geometric object. +2. Objects should be of different color from the black background. +3. Keep the text on screen concise. + * On-screen elements should focus on showcasing the concept, examples and visuals. Labels and illustrative text are still encouraged. + * For explanations and observations, prefer narrations over on-screen text. + * You should still show calculations and algorithms in full on screen. + * For examples and practice problems, it is reasonable to show more text, especially key statements. + * Longer text should appear smaller to fit on screen. +4. To control the timing of objects appearing: + * `add` has instantaneous effect, best used for the initial setup of the scene. + * Animations are best used during narration. + * Make sure the animations make sense. If an object is already on screen, it makes no sense to fade it in or create it again. +5. Use TeX or MathTeX whenever you want to display math, including symbols and formulas. +""" + +_prompt_scene_plan = """You are an expert in educational video production, instructional design, and {topic}. Please design a high-quality video to provide in-depth explanation on {topic}. + +**Video Overview:** + +Topic: {topic} +Description: {description} + +**Scene Breakdown:** + +Plan individual scenes. For each scene please provide the following: + +* **Scene Title:** Short, descriptive title (2-5 words). +* **Scene Purpose:** Learning objective of this scene. How does it connect to previous scenes? +* **Scene Description:** Detailed description of scene content. +* **Scene Layout:** Detailedly describe the spatial layout concept. Consider safe area margins and minimum spacing between objects. + +Please generate the scene plan for the video in the following format: + +```xml + + + Scene Title: [Title] + Scene Purpose: [Learning objective, connection to previous scene] + Scene Description: [Brief content description] + Scene Layout: [Spatial layout concept, consider safe area and spacing] + + + + ... + +... + +``` + +**Spatial Constraints:** +* **Safe area margins:** 0.5 units on all sides from the scene edges. *All objects must be positioned within these margins.* +* **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). *Ensure adequate spacing to prevent overlaps and maintain visual clarity.* + +Requirements: +1. Scenes must build progressively, starting from foundational concepts and advancing to more complex ideas to ensure a logical flow of understanding for the viewer. Each scene should naturally follow from the previous one, creating a cohesive learning narrative. Start with simpler scene layouts and progressively increase complexity in later scenes. +2. The total number of scenes should be between 3 and 7. +3. Learning objectives should be distributed evenly across the scenes. +4. The total video duration must be under 15 minutes. +5. It is essential to use the exact output format, tags, and headers as specified in the prompt. +6. Maintain consistent formatting throughout the entire scene plan. +7. **No External Assets:** Do not import any external files (images, audio, video). *Use only Manim built-in elements and procedural generation. +8. **Focus on in-depth explanation of the theorem. Do not include any promotional elements (like YouTube channel promotion, subscribe messages, or external resources) or quiz sessions. Detailed example questions are acceptable and encouraged.** + +Note: High-level plan. Detailed scene specifications will be generated later, ensuring adherence to safe area margins and minimum spacing. The spatial constraints defined above will be strictly enforced in subsequent planning stages.""" + +_prompt_rag_query_generation_technical = """You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a storyboard plan and generate effective queries that will retrieve relevant technical documentation about implementation details. + +Here is the storyboard plan: + +{storyboard} + +Based on this storyboard plan, generate multiple human-like queries (maximum 10) for retrieving relevant technical documentation. + +**Specifically, ensure that:** +1. Queries focus on retrieving information about **core Manim functionality** and implementation details +2. Include queries about **complex animations and effects** described in the storyboard +3. If the storyboard suggests using plugin functionality, include specific queries targeting those plugin's technical documentation + +The above storyboard plan is relevant to these plugins: {relevant_plugins} +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of core functionality query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of animation technique query"}} + ... +] +``` """ + +_prompt_rag_query_generation_fix_error = """You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a Manim error and its associated code into effective queries that will retrieve relevant information from Manim documentation. + +Here is the error message: +{error} + +Here is the Manim code that caused the error: +{code} + +Based on the error and code, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least some queries are focused on retrieving information about **Manim function usage** in scenes. Frame these queries to target function definitions, usage examples, and parameter details within Manim documentation. +2. If the error suggests using plugin functionality, include at least 1 query specifically targeting **plugin documentation**. Clearly mention the plugin name in these queries to focus the search. +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant, and to target the most helpful sections of the documentation (API reference, tutorials, examples). + +The above error and code are relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of function usage query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of API reference query"}} + ... +] +``` """ + +_code_disable = """""" + +_prompt_manim_cheatsheet = """The followings are the inheritance diagram of the Manim library. You can take as reference to select which class to use for the animation. + +``` +digraph Animation { + "AddTextLetterByLetter" + "ShowIncreasingSubsets" + "ShowIncreasingSubsets" -> "AddTextLetterByLetter" + "AddTextWordByWord"; + "Succession"; + "Succession" -> "AddTextWordByWord"; + "AnimatedBoundary"; + "VGroup"; + "VGroup" -> "AnimatedBoundary"; + "Animation"; + "AnimationGroup"; + "Animation" -> "AnimationGroup"; + "ApplyComplexFunction"; + "ApplyMethod"; + "ApplyMethod" -> "ApplyComplexFunction"; + "ApplyFunction"; + "Transform"; + "Transform" -> "ApplyFunction"; + "ApplyMatrix"; + "ApplyPointwiseFunction"; + "ApplyPointwiseFunction" -> "ApplyMatrix"; + "ApplyMethod"; + "Transform" -> "ApplyMethod"; + "ApplyPointwiseFunction"; + "ApplyMethod" -> "ApplyPointwiseFunction"; + "ApplyPointwiseFunctionToCenter"; + "ApplyPointwiseFunction" -> "ApplyPointwiseFunctionToCenter"; + "ApplyWave"; + "Homotopy"; + "Homotopy" -> "ApplyWave"; + "Broadcast"; + "LaggedStart"; + "LaggedStart" -> "Broadcast"; + "ChangeDecimalToValue"; + "ChangingDecimal"; + "ChangingDecimal" -> "ChangeDecimalToValue"; + "ChangeSpeed"; + "Animation" -> "ChangeSpeed"; + "ChangingDecimal"; + "Animation" -> "ChangingDecimal"; + "Circumscribe"; + "Succession" -> "Circumscribe"; + "ClockwiseTransform"; + "Transform" -> "ClockwiseTransform"; + "ComplexHomotopy"; + "Homotopy" -> "ComplexHomotopy"; + "CounterclockwiseTransform"; + "Transform" -> "CounterclockwiseTransform"; + "Create"; + "ShowPartial"; + "ShowPartial" -> "Create"; + "CyclicReplace"; + "Transform" -> "CyclicReplace"; + "DrawBorderThenFill"; + "Animation" -> "DrawBorderThenFill"; + "FadeIn"; + "FadeOut"; + "FadeToColor"; + "ApplyMethod" -> "FadeToColor"; + "FadeTransform"; + "Transform" -> "FadeTransform"; + "FadeTransformPieces"; + "FadeTransform" -> "FadeTransformPieces"; + "Flash"; + "AnimationGroup" -> "Flash"; + "FocusOn"; + "Transform" -> "FocusOn"; + "GrowArrow"; + "GrowFromPoint"; + "GrowFromPoint" -> "GrowArrow"; + "GrowFromCenter"; + "GrowFromPoint" -> "GrowFromCenter"; + "GrowFromEdge"; + "GrowFromPoint" -> "GrowFromEdge"; + "GrowFromPoint"; + "Transform" -> "GrowFromPoint"; + "Homotopy"; + "Animation" -> "Homotopy"; + "Indicate"; + "Transform" -> "Indicate"; + "LaggedStart"; + "AnimationGroup" -> "LaggedStart"; + "LaggedStartMap"; + "LaggedStart" -> "LaggedStartMap"; + "MaintainPositionRelativeTo"; + "Animation" -> "MaintainPositionRelativeTo"; + "Mobject"; + "MoveAlongPath"; + "Animation" -> "MoveAlongPath"; + "MoveToTarget"; + "Transform" -> "MoveToTarget"; + "PhaseFlow"; + "Animation" -> "PhaseFlow"; + "RemoveTextLetterByLetter"; + "AddTextLetterByLetter" -> "RemoveTextLetterByLetter"; + "ReplacementTransform"; + "Transform" -> "ReplacementTransform"; + "Restore"; + "ApplyMethod" -> "Restore"; + "Rotate"; + "Transform" -> "Rotate"; + "Rotating"; + "Animation" -> "Rotating"; + "ScaleInPlace"; + "ApplyMethod" -> "ScaleInPlace"; + "ShowIncreasingSubsets"; + "Animation" -> "ShowIncreasingSubsets"; + "ShowPartial"; + "Animation" -> "ShowPartial"; + "ShowPassingFlash"; + "ShowPartial" -> "ShowPassingFlash"; + "ShowPassingFlashWithThinningStrokeWidth"; + "AnimationGroup" -> "ShowPassingFlashWithThinningStrokeWidth"; + "ShowSubmobjectsOneByOne"; + "ShowIncreasingSubsets" -> "ShowSubmobjectsOneByOne"; + "ShrinkToCenter"; + "ScaleInPlace" -> "ShrinkToCenter"; + "SmoothedVectorizedHomotopy"; + "Homotopy" -> "SmoothedVectorizedHomotopy"; + "SpinInFromNothing"; + "GrowFromCenter" -> "SpinInFromNothing"; + "SpiralIn"; + "Animation" -> "SpiralIn"; + "Succession"; + "AnimationGroup" -> "Succession"; + "Swap"; + "CyclicReplace" -> "Swap"; + "TracedPath"; + "VMobject"; + "VMobject" -> "TracedPath"; + "Transform"; + "Animation" -> "Transform"; + "TransformAnimations"; + "Transform" -> "TransformAnimations"; + "TransformFromCopy"; + "Transform" -> "TransformFromCopy"; + "TransformMatchingAbstractBase"; + "AnimationGroup" -> "TransformMatchingAbstractBase"; + "TransformMatchingShapes"; + "TransformMatchingAbstractBase" -> "TransformMatchingShapes"; + "TransformMatchingTex"; + "TransformMatchingAbstractBase" -> "TransformMatchingTex"; + "Uncreate"; + "Create" -> "Uncreate"; + "Unwrite"; + "Write"; + "Write" -> "Unwrite"; + "UpdateFromAlphaFunc"; + "UpdateFromFunc"; + "UpdateFromFunc" -> "UpdateFromAlphaFunc"; + "UpdateFromFunc"; + "Animation" -> "UpdateFromFunc"; + "VGroup"; + "VMobject" -> "VGroup"; + "VMobject"; + "Mobject" -> "VMobject"; + + "Wait"; + "Animation" -> "Wait"; + "Wiggle"; + "Animation" -> "Wiggle"; + "Write"; + "DrawBorderThenFill" -> "Write"; +} +``` + + +``` +digraph Camera { + "BackgroundColoredVMobjectDisplayer" + "Camera" + "MappingCamera" + "Camera" -> "MappingCamera" + "MovingCamera" + "Camera" -> "MovingCamera" + "MultiCamera" + "MovingCamera" -> "MultiCamera" + "OldMultiCamera" + "Camera" -> "OldMultiCamera" + "SplitScreenCamera" + "OldMultiCamera" -> "SplitScreenCamera" + "ThreeDCamera" + "Camera" -> "ThreeDCamera" +} +``` + +``` +digraph MObject { + "AbstractImageMobject" + "Mobject" -> "AbstractImageMobject" + "Angle" + "VMobject" -> "Angle" + "AnnotationDot" + "Dot" -> "AnnotationDot" + "AnnularSector" + "Arc" -> "AnnularSector" + "Annulus" + "Circle" -> "Annulus" + "Arc" + "TipableVMobject" -> "Arc" + "ArcBetweenPoints" + "Arc" -> "ArcBetweenPoints" + "ArcBrace" + "Brace" -> "ArcBrace" + "ArcPolygon" + "VMobject" -> "ArcPolygon" + "ArcPolygonFromArcs" + "VMobject" -> "ArcPolygonFromArcs" + "Arrow" + "Line" -> "Arrow" + "Arrow3D" + "Line3D" -> "Arrow3D" + "ArrowCircleFilledTip" + "ArrowCircleTip" -> "ArrowCircleFilledTip" + "ArrowCircleTip" + "ArrowTip" -> "ArrowCircleTip" + "Circle" -> "ArrowCircleTip" + "ArrowSquareFilledTip" + "ArrowSquareTip" -> "ArrowSquareFilledTip" + "ArrowSquareTip" + "ArrowTip" -> "ArrowSquareTip" + "Square" -> "ArrowSquareTip" + "ArrowTip" + "VMobject" -> "ArrowTip" + "ArrowTriangleFilledTip" + "ArrowTriangleTip" -> "ArrowTriangleFilledTip" + "ArrowTriangleTip" + "ArrowTip" -> "ArrowTriangleTip" + "Triangle" -> "ArrowTriangleTip" + "ArrowVectorField" + "VectorField" -> "ArrowVectorField" + "Axes" + "VGroup" -> "Axes" + "CoordinateSystem" -> "Axes" + "BackgroundRectangle" + "SurroundingRectangle" -> "BackgroundRectangle" + "BarChart" + "Axes" -> "BarChart" + "Brace" + "svg_mobject.VMobjectFromSVGPath" -> "Brace" + "BraceBetweenPoints" + "Brace" -> "BraceBetweenPoints" + "BraceLabel" + "VMobject" -> "BraceLabel" + "BraceText" + "BraceLabel" -> "BraceText" + "BulletedList" + "Tex" -> "BulletedList" + "Circle" + "Arc" -> "Circle" + "Code" + "VGroup" -> "Code" + "ComplexPlane" + "NumberPlane" -> "ComplexPlane" + "ComplexValueTracker" + "ValueTracker" -> "ComplexValueTracker" + "Cone" + "Surface" -> "Cone" + "CoordinateSystem" + "Cross" + "VGroup" -> "Cross" + "Cube" + "VGroup" -> "Cube" + "CubicBezier" + "VMobject" -> "CubicBezier" + "CurvedArrow" + "ArcBetweenPoints" -> "CurvedArrow" + "CurvedDoubleArrow" + "CurvedArrow" -> "CurvedDoubleArrow" + "CurvesAsSubmobjects" + "VGroup" -> "CurvesAsSubmobjects" + "Cutout" + "VMobject" -> "Cutout" + "Cylinder" + "Surface" -> "Cylinder" + "DashedLine" + "Line" -> "DashedLine" + "DashedVMobject" + "VMobject" -> "DashedVMobject" + "DecimalMatrix" + "Matrix" -> "DecimalMatrix" + "DecimalNumber" + "VMobject" -> "DecimalNumber" + "DecimalTable" + "Table" -> "DecimalTable" + "DiGraph" + "GenericGraph" -> "DiGraph" + "Difference" + "Dodecahedron" + "Polyhedron" -> "Dodecahedron" + "Dot" + "Circle" -> "Dot" + "Dot3D" + "Sphere" -> "Dot3D" + "DoubleArrow" + "Arrow" -> "DoubleArrow" + "Elbow" + "VMobject" -> "Elbow" + "Ellipse" + "Circle" -> "Ellipse" + "Exclusion" + "FullScreenRectangle" + "ScreenRectangle" -> "FullScreenRectangle" + "FunctionGraph" + "ParametricFunction" -> "FunctionGraph" + "Generic" + "GenericGraph" + "Generic" -> "GenericGraph" + "Graph" + "GenericGraph" -> "Graph" + "Group" + "Mobject" -> "Group" + "Icosahedron" + "Polyhedron" -> "Icosahedron" + "ImageMobject" + "AbstractImageMobject" -> "ImageMobject" + "ImageMobjectFromCamera" + "AbstractImageMobject" -> "ImageMobjectFromCamera" + "ImplicitFunction" + "VMobject" -> "ImplicitFunction" + "Integer" + "DecimalNumber" -> "Integer" + "IntegerMatrix" + "Matrix" -> "IntegerMatrix" + "IntegerTable" + "Table" -> "IntegerTable" + "Intersection" + "LabeledDot" + "Dot" -> "LabeledDot" + "LayoutFunction" + "Protocol" -> "LayoutFunction" + "Line" + "TipableVMobject" -> "Line" + "Line3D" + "Cylinder" -> "Line3D" + "LinearBase" + "LogBase" + "ManimBanner" + "VGroup" -> "ManimBanner" + "MarkupText" + "svg_mobject.SVGMobject" -> "MarkupText" + "MathTable" + "Table" -> "MathTable" + "MathTex" + "SingleStringMathTex" -> "MathTex" + "Matrix" + "VMobject" -> "Matrix" + "Mobject" + "Mobject1D" + "PMobject" -> "Mobject1D" + "Mobject2D" + "PMobject" -> "Mobject2D" + "MobjectMatrix" + "Matrix" -> "MobjectMatrix" + "MobjectTable" + "Table" -> "MobjectTable" + "NumberLine" + "Line" -> "NumberLine" + "NumberPlane" + "Axes" -> "NumberPlane" + "Octahedron" + "Polyhedron" -> "Octahedron" + "PGroup" + "PMobject" -> "PGroup" + "PMobject" + "Mobject" -> "PMobject" + "Paragraph" + "VGroup" -> "Paragraph" + "ParametricFunction" + "VMobject" -> "ParametricFunction" + "Point" + "PMobject" -> "Point" + "PointCloudDot" + "Mobject1D" -> "PointCloudDot" + "PolarPlane" + "Axes" -> "PolarPlane" + "Polygon" + "Polygram" -> "Polygon" + "Polygram" + "VMobject" -> "Polygram" + "Polyhedron" + "VGroup" -> "Polyhedron" + "Prism" + "Cube" -> "Prism" + "Protocol" + "Generic" -> "Protocol" + "Rectangle" + "Polygon" -> "Rectangle" + "RegularPolygon" + "RegularPolygram" -> "RegularPolygon" + "RegularPolygram" + "Polygram" -> "RegularPolygram" + "RightAngle" + "Angle" -> "RightAngle" + "RoundedRectangle" + "Rectangle" -> "RoundedRectangle" + "SVGMobject" + "VMobject" -> "SVGMobject" + "SampleSpace" + "Rectangle" -> "SampleSpace" + "ScreenRectangle" + "Rectangle" -> "ScreenRectangle" + "Sector" + "AnnularSector" -> "Sector" + "SingleStringMathTex" + "svg_mobject.SVGMobject" -> "SingleStringMathTex" + "Sphere" + "Surface" -> "Sphere" + "Square" + "Rectangle" -> "Square" + "Star" + "Polygon" -> "Star" + "StealthTip" + "ArrowTip" -> "StealthTip" + "StreamLines" + "VectorField" -> "StreamLines" + "Surface" + "VGroup" -> "Surface" + "SurroundingRectangle" + "RoundedRectangle" -> "SurroundingRectangle" + "Table" + "VGroup" -> "Table" + "TangentLine" + "Line" -> "TangentLine" + "Tetrahedron" + "Polyhedron" -> "Tetrahedron" + "Tex" + "MathTex" -> "Tex" + "Text" + "svg_mobject.SVGMobject" -> "Text" + "ThreeDAxes" + "Axes" -> "ThreeDAxes" + "ThreeDVMobject" + "VMobject" -> "ThreeDVMobject" + "TipableVMobject" + "VMobject" -> "TipableVMobject" + "Title" + "Tex" -> "Title" + "Torus" + "Surface" -> "Torus" + "Triangle" + "RegularPolygon" -> "Triangle" + "Underline" + "Line" -> "Underline" + "Union" + "UnitInterval" + "NumberLine" -> "UnitInterval" + "VDict" + "VMobject" -> "VDict" + "VGroup" + "VMobject" -> "VGroup" + "VMobject" + "Mobject" -> "VMobject" + "VMobjectFromSVGPath" + "VMobject" -> "VMobjectFromSVGPath" + "ValueTracker" + "Mobject" -> "ValueTracker" + "Variable" + "VMobject" -> "Variable" + "Vector" + "Arrow" -> "Vector" + "VectorField" + "VGroup" -> "VectorField" + "VectorizedPoint" + "VMobject" -> "VectorizedPoint" +} +``` + +``` +digraph Scene { + "LinearTransformationScene" + "VectorScene" + "VectorScene" -> "LinearTransformationScene" + "MovingCameraScene" + "Scene" + "Scene" -> "MovingCameraScene" + "RerunSceneHandler" + "Scene" + "SceneFileWriter" + "SpecialThreeDScene" + "ThreeDScene" + "ThreeDScene" -> "SpecialThreeDScene" + "ThreeDScene" + "Scene" -> "ThreeDScene" + "VectorScene" + "Scene" -> "VectorScene" + "ZoomedScene" + "MovingCameraScene" -> "ZoomedScene" +} +```""" + +_prompt_rag_query_generation_vision_storyboard = """You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a scene plan for a Manim animation and generate effective queries that will retrieve relevant documentation about visual elements and scene composition. + +Here is the scene plan: + +{scene_plan} + +Based on this scene plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation about visual elements and scene composition techniques. + +**Specifically, ensure that:** +1. Queries focus on retrieving information about **visual elements** like shapes, objects, and their properties +2. Include queries about **scene composition techniques** like layout, positioning, and grouping +3. If the scene plan suggests using plugin functionality, include specific queries targeting those plugin's visual capabilities +4. Queries should be high-level, aiming to discover what Manim features can be used, rather than focusing on low-level implementation details. + - For example, instead of "how to set the color of a circle", ask "what visual properties of shapes can I control in Manim?". + +The above scene plan is relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of visual element query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of composition technique query"}} + ... +] +```""" + +_prompt_context_learning_technical_implementation = """Here are some example technical implementation plans to help guide your implementation: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current scene.""" + +_prompt_detect_plugins = """You are a Manim plugin detection system. Your task is to analyze a video topic and description to determine which Manim plugins would be most relevant for the actual animation implementation needs. + +Topic: +{topic} + +Description: +{description} + +Available Plugins: +{plugin_descriptions} + +Instructions: +1. Analyze the topic and description, focusing specifically on what needs to be animated +2. Review each plugin's capabilities and determine if they provide specific tools needed for the animations described +3. Only select plugins that provide functionality directly needed for the core animations +4. Consider these criteria for each plugin: + - Does the plugin provide specific tools or components needed for the main visual elements? + - Are the plugin's features necessary for implementing the core animations? + - Would the animation be significantly more difficult to create without this plugin? +5. Exclude plugins that: + - Only relate to the general topic area but don't provide needed animation tools + - Might be "nice to have" but aren't essential for the core visualization + - Could be replaced easily with basic Manim shapes and animations + +Your response must follow the output format below: + +[brief description of your thinking process] + + +```json +["plugin_name1", "plugin_name2"] +``` +""" + +_prompt_scene_animation_narration = """You are an expert in educational video production and Manim animation, skilled in creating engaging and pedagogically effective learning experiences. +**Reminder:** This animation and narration plan is entirely self-contained; there is no dependency on any previous or subsequent scene implementations. However, the narration should flow smoothly as part of a larger, single video. + +Your task is to create a **detailed animation and narration plan for Scene {scene_number}**, ensuring it is not just visually appealing but also serves a clear educational purpose within the overall video topic. + +Remember, the narration should not simply describe what's happening visually, but rather **teach a concept step-by-step**, guiding the viewer to a deeper understanding. Animations should be spatially coherent, contribute to a clear visual flow, and strictly respect safe area margins (0.5 units) and minimum spacing (0.3 units). **Consider the scene number {scene_number} and the overall scene context to ensure smooth transitions and a logical flow within the larger video narrative.** + +Topic: {topic} +Description: {description} + +Scene Overview: +{scene_outline} + +Scene Vision and Storyboard: +{scene_vision_storyboard} + +Technical Implementation Plan: +{technical_implementation_plan} + +The following manim plugins are relevant to the scene: +{relevant_plugins} + +**Spatial Constraints (Strictly Enforced Throughout Animations):** +* **Safe area margins:** 0.5 units. *Maintain objects and VGroups within margins.* +* **Minimum spacing:** 0.3 units. *Ensure minimum spacing between all objects and VGroups.* + +**Animation Timing and Pacing Requirements:** +* Specify `run_time` for all animations. +* Use `Wait()` for transition buffers, specifying durations and **pedagogical purpose**. +* Coordinate animation timings with narration cues for synchronized pedagogical presentation. + +**Visual Flow and Pedagogical Clarity:** +* Ensure animations create a clear and logical visual flow, **optimized for learning and concept understanding.** +* Use animation pacing and transition buffers to visually separate ideas and **enhance pedagogical clarity.** +* Maintain spatial coherence for predictable and understandable animations, strictly adhering to spatial constraints. + +**Diagrams/Sketches (Optional but Highly Recommended for Complex Scenes):** +* For complex animations, include diagrams/sketches to visualize animation flow and object movements. This aids clarity and reduces errors. + +Your plan must demonstrate a strong understanding of pedagogical narration and how animations can be used to effectively teach concepts, while strictly adhering to spatial constraints and timing requirements. + +You MUST generate a **detailed and comprehensive** animation and narration plan for **Scene {scene_number}**, in the following format, similar to the example provided (from ```xml to ```): + +```xml + + +[ANIMATION_STRATEGY] +1. **Pedagogical Animation Plan:** Provide a detailed plan for all animations in the scene, explicitly focusing on how each animation contributes to **teaching the core concepts** of this scene. + - **Parent VGroup transitions (if applicable):** + - If VGroups are used, specify transitions (`Shift`, `Transform`, `FadeIn`, `FadeOut`) with `Animation` type, direction, magnitude, target VGroup, and `run_time`. + - **Explain the pedagogical rationale** for each VGroup transition. How does it guide the viewer's attention or contribute to understanding the scene's learning objectives? Ensure spatial coherence and respect for constraints. + - **Element animations within VGroups and for individual Mobjects:** + - Specify animation types (`Create`, `Write`, `FadeIn`, `Transform`, `Circumscribe`, `AnimationGroup`, `Succession`) for elements. + - For each element animation, specify `Animation` type, target object(s), and `run_time`. Detail sequences and timing for `AnimationGroup` or `Succession`. + - **Explain the pedagogical purpose** of each element animation. How does it break down complex information, highlight key details, or improve visual clarity for learning? Ensure spatial coherence and minimum spacing. + - **Coordinate element animations with VGroup transitions:** + - Clearly describe the synchronization between element animations and VGroup transitions (if any). + - Specify relative timing and `run_time` to illustrate coordination. + - **Explain how this animation sequence and coordination creates a pedagogical flow**, guiding the viewer's eye and attention logically through the learning material. + +2. **Scene Flow - Pedagogical Pacing and Clarity:** Detail the overall flow of the scene, emphasizing pedagogical effectiveness. + - **Overall animation sequence, spatial progression for learning:** + - Describe the complete animation sequence, broken down into pedagogical sub-sections (e.g., "Introducing the Problem", "Step-by-step Solution", "Concept Reinforcement"). + - Outline the spatial progression of objects and VGroups, focusing on how it supports the **pedagogical narrative** and concept development. + - Ensure a clear and logical visual flow optimized for learning, respecting spatial constraints. + - **Transition buffers for pedagogical pauses:** + - Specify `Wait()` times between animation sections for visual separation and **learner processing time**. + - For each `Wait()`, specify duration and **explain the pedagogical reason** for this buffer (e.g., "Allow viewers time to process the formula", "Create a pause for reflection before moving to the next concept"). + - **Coordinate animation timing with narration for engagement and comprehension:** + - Describe how animation timings are coordinated with the narration script to **maximize viewer engagement and comprehension**. + - Specify animation cues within the narration script and explain how these cues are synchronized with animations to **reinforce learning points** at the optimal moment. + +[NARRATION] +- **Pedagogical Narration Script:** + - Provide the full narration script for Scene {scene_number}. + - **Embed precise animation timing cues** within the narration script (as described before). + - **The script should be written as if delivered by a knowledgeable and engaging lecturer.** It should: + - **Clearly explain concepts step-by-step.** + - **Use analogies and real-world examples to enhance understanding.** + - **Pose questions to encourage active thinking.** + - **Summarize key points and transitions.** + - **Be detailed and knowledge-rich, not just visually descriptive.** + - **Connect smoothly with the previous and subsequent scenes, acting as a segment within a single, cohesive video. + - Avoid repetitive introductions or conclusions.** + - Consider using phrases like "Building on what we saw in the previous part..." or "Let's now move on to..." to create a sense of continuity. + - Reference the scene number when appropriate (e.g., "Now, let's explore..."). + - **Crucially, the narration should seamlessly integrate with the animations to create a cohesive and effective learning experience.** +- **Narration Sync - Pedagogical Alignment:** + - Detail the synchronization strategy between narration and animations, emphasizing **pedagogical alignment**. + - Explain how narration timing is aligned with animation start/end times to **guide viewer attention to key learning elements precisely when they animate.** + - Emphasize how narration cues and animation timings work together to **create a synchronized audiovisual presentation that maximizes learning and retention.** + + +``` +""" + +_code_color_cheatsheet = """MUST include the following color definitions if you use the colors in your code. ONLY USE THE COLORS BELOW. + +WHITE = '#FFFFFF' +RED = '#FF0000' +GREEN = '#00FF00' +BLUE = '#0000FF' +YELLOW = '#FFFF00' +CYAN = '#00FFFF' +MAGENTA = '#FF00FF' +ORANGE = '#FFA500' +PURPLE = '#800080' +PINK = '#FFC0CB' +BROWN = '#A52A2A' +GRAY = '#808080' +TEAL = '#008080' +NAVY = '#000080' +OLIVE = '#808000' +MAROON = '#800000' +LIME = '#00FF00' +AQUA = '#00FFFF' +FUCHSIA = '#FF00FF' +SILVER = '#C0C0C0' +GOLD = '#FFD700'""" + +_prompt_visual_self_reflection = """You are an expert in Manim animations and educational video quality assessment. Your task is to analyze a rendered Manim video and its corresponding audio narration to identify areas for visual and auditory improvement, ensuring alignment with the provided implementation plan and enhancing the video's teaching effectiveness. + +Please analyze the provided Manim video and listen to the accompanying audio narration. Conduct a thorough self-reflection focusing on the following aspects: + +**1. Visual Presentation and Clarity (Automated VLM Analysis & Expert Human-like Judgment):** + +* **Object Overlap:** Does the video exhibit any visual elements (text, shapes, equations, etc.) overlapping in a way that obscures information or makes the animation difficult to understand? If possible, Detect regions of significant overlap and highlight them in your reflection. +* **Out-of-Bounds Objects:** Are any objects positioned partially or entirely outside of the visible frame of the video? Identify and report objects that appear to be clipped or outside the frame boundaries. +* **Incorrect Object Positioning:** Based on your understanding of good visual design and the scene's educational purpose, are objects placed in positions that are illogical, distracting, or misaligned with their intended locations or relationships to other elements as described in the implementation plan? Consider: + * **Logical Flow:** Does the spatial arrangement support the intended visual flow and narrative progression of the scene? + * **Alignment and Balance:** Is the scene visually balanced? Are elements aligned in a way that is aesthetically pleasing and contributes to clarity, or does the layout appear haphazard or unbalanced? + * **Proximity and Grouping:** Are related elements positioned close enough to be visually grouped, and are unrelated elements sufficiently separated to avoid visual clutter? +* **General Visual Clarity & Effectiveness:** Consider broader aspects of visual communication. Are there any other issues that detract from the video's clarity, impact, or overall effectiveness? This could include: + * **Visual Clutter:** Is the scene too busy or visually overwhelming at any point? Are there too many elements on screen simultaneously? + * **Poor Spacing/Layout:** Is the spacing between elements inconsistent or inefficient, making the scene feel cramped or unbalanced? Are margins and padding used effectively? + * **Ineffective Use of Color:** Are color choices distracting, clashing, or not contributing to the animation's message? Are colors used consistently and purposefully to highlight key information? + * **Pacing Issues (Visual):** Is the visual animation too fast or too slow in certain sections, hindering comprehension? Are visual transitions smooth and well-timed? + * **Animation Clarity:** Are the animations themselves clear and helpful in conveying the intended information? Do animations effectively guide the viewer's eye and focus attention? + +**2. Narration Quality:** + +* **Narration Clarity and Pacing:** Is the narration clear, concise, and easy to understand? Is the pacing of the narration appropriate for the visual content and the target audience? Does the narration effectively support the visual explanations? +* **Narration Sync with Visuals:** Does the narration effectively synchronize with the on-screen visuals? Use VLM to analyze the video and identify instances where the narration is misaligned with the animations or visual elements it is describing. Report specific timings of misalignment. + +**3. Alignment with Implementation Plan:** + +* **Visual Fidelity:** Does the rendered video accurately reflect the visual elements and spatial arrangements described in the provided Manim Implementation Plan? Identify any deviations. +* **Animation Fidelity:** Do the animations in the video match the animation methods and sequences outlined in the Implementation Plan? Report any discrepancies. + +Manim Implementation Plan: +{implementation} + +Generated Code: +{generated_code} + +Output Format 1: +If any issues are identified in visual presentation, audio quality, narration, or plan alignment, please provide a detailed reflection on the issues and how to improve the video's visual and auditory quality, narration effectiveness, and code correctness. Then, you must return the updated Python code that directly addresses these issues. The code must be complete and executable. + + +[Detailed reflection on visual, auditory, narration, and plan alignment issues and improvement suggestions. Include specific timings for narration/visual sync issues and descriptions of object overlap/out-of-bounds problems if detected by VLM. Be specific about code changes needed for improvement.] + + +[Improved Python Code - Complete and Executable - Directly Addressing Reflection Points] + + +Output Format 2: +If no issues are found and the video and audio are deemed high quality, visually clear, narratively effective, and fully aligned with the implementation plan, please explicitly only return "" as output.""" + +_prompt_teaching_framework = """# Comprehensive Educational Video Content Framework + +## 1. Pre-Production Planning + +### A. Learning Objectives +- **Knowledge Level (Remember & Understand)** + Define specific, measurable learning outcomes that can be clearly assessed and evaluated. These outcomes should be concrete and observable, allowing instructors to verify that learning has occurred. Each outcome should be written using precise language that leaves no ambiguity about what constitutes success. For example, \"After watching this video, learners will be able to define and explain the concept of variables in programming\" provides a clear benchmark for assessment. + + Action verbs are essential tools for crafting effective learning objectives. Choose verbs like define, list, describe, explain, and identify that clearly indicate the expected cognitive processes. These verbs should align with Bloom's Taxonomy to ensure appropriate cognitive engagement. When applicable, ensure all objectives align with relevant curriculum standards to maintain educational consistency and meet institutional requirements. + +- **Comprehension Level (Analyze & Evaluate)** + Develop objectives that emphasize deeper understanding and connections between concepts. These objectives should go beyond simple recall to require analysis and evaluation of the material. Students should be able to make meaningful connections between different aspects of the content and explain their relationships. For example, \"Learners will be able to compare different data types and explain when to use each\" demonstrates this deeper level of understanding. + + Critical thinking elements should be deliberately incorporated into each objective. Create scenarios that challenge students to apply their knowledge in new contexts. These scenarios should require careful analysis and reasoned decision-making to solve problems effectively. Design learning experiences that encourage students to question assumptions and develop analytical skills. + +- **Application Level (Apply & Create)** + Develop practical skills that directly translate to real-world applications and scenarios. These objectives should focus on hands-on experience and tangible outcomes that demonstrate mastery. For example, \"Learners will be able to write a basic program using variables and proper naming conventions\" provides a clear, actionable goal that can be demonstrated through practical work. + + Include hands-on exercises that allow students to practice and refine their skills in a supported environment. These exercises should gradually increase in complexity to build confidence and competence. Provide real-world context by incorporating authentic scenarios and problems that students might encounter in their future careers or daily lives. This connection to reality helps maintain engagement and demonstrates the immediate value of the learning. + +- **Target Audience Analysis** + Conduct thorough demographic research to understand your learners' backgrounds, ages, and educational levels. This analysis should include assessment of prior knowledge and experience with the subject matter. Consider the technical capabilities of your audience, including their access to necessary tools and technologies. + + Evaluate different learning preferences and styles within your target audience. This understanding helps in designing varied content that appeals to visual, auditory, and kinesthetic learners. Consider cultural and linguistic factors that might impact learning effectiveness. Create content that is inclusive and accessible to learners from diverse backgrounds. Account for varying levels of technical proficiency and ensure your content can be accessed across different devices and platforms. + +### B. Content Structure + +- **Hook (5-10% of duration)** + Begin each video with a compelling problem or scenario that immediately captures attention and creates interest. This hook should be relevant to the content while being unexpected or intriguing enough to maintain viewer engagement. Use surprising facts or statistics that challenge common assumptions or demonstrate the importance of the topic. + + Share relevant real-world applications that demonstrate immediate value to the learner. For example, \"What if you could automate your daily tasks with just a few lines of code?\" creates immediate interest by connecting to practical benefits. The hook should create an emotional connection and generate curiosity about the upcoming content. Consider using storytelling elements or real-world problems that your audience can relate to. + +- **Context (10-15%)** + Provide clear explanations of how the content relates to real-world situations and problems. This context should help learners understand why the material is relevant to their lives or career goals. Make explicit connections to previous knowledge and experiences that learners can build upon. + + Address the fundamental question of \"Why should I learn this?\" by demonstrating practical applications and benefits. This explanation should be concrete and specific to your audience's needs and interests. Set clear expectations for learning outcomes so students understand what they will gain from the content. Provide a roadmap for the learning journey ahead, including how this content connects to future topics and skills. + +- **Core Content (60-70%)** + Organize material in a logical progression that builds from fundamental concepts to more complex applications. This progression should be carefully planned to avoid overwhelming learners while maintaining engagement. Include multiple examples that demonstrate concepts from different angles and perspectives. + + Use varied teaching methods to accommodate different learning styles and maintain interest. These methods might include demonstrations, animations, code examples, and interactive elements. Implement frequent knowledge checks throughout the content to ensure understanding and maintain engagement. Break complex topics into manageable chunks that can be easily processed and remembered. + +- **Practice/Application (10-15%)** + Create guided practice opportunities that allow learners to apply new knowledge in a supported environment. These practice sessions should include clear instructions and immediate feedback mechanisms. Design interactive elements that engage learners and require active participation rather than passive viewing. + + Develop problem-solving scenarios that challenge learners to apply concepts in realistic situations. These scenarios should gradually increase in complexity as learners gain confidence. Include opportunities for peer learning and collaboration when possible. Provide scaffolded support that can be gradually removed as learners become more proficient. + +- **Summary (5-10%)** + Conclude each video with a comprehensive recap of key points and main takeaways. This summary should reinforce the most important concepts and their practical applications. Preview upcoming topics to create anticipation and show how current learning connects to future content. + + Provide specific action items that learners can implement immediately to reinforce their learning. These should be concrete, achievable tasks that build confidence and competence. Share additional resources for further learning, including reference materials, practice exercises, and advanced topics. Create clear connections between the current content and future learning objectives. + +## 2. Instructional Design Elements + +### A. Cognitive Load Management + +- **Chunking Strategies** + Break complex content into manageable segments of 3-5 minutes each. These chunks should focus on single concepts or closely related ideas that form a coherent unit. Use clear transitions between segments to maintain flow while allowing for cognitive processing. + + Implement progressive complexity by building from basic concepts to more advanced applications. This progression should be carefully planned to avoid overwhelming learners. Include strategic pauses and processing time between segments to allow for reflection and integration of new information. Use visual and verbal cues to signal transitions between different concepts or levels of complexity. + +- **Visual Organization** + Develop a consistent visual hierarchy that guides learners through the content effectively. This hierarchy should use size, color, and placement to indicate the relative importance of different elements. Implement clean, uncluttered designs that minimize distractions and focus attention on key concepts. + + Apply color coding consistently to help learners identify and remember related concepts. This coding should be intentional and meaningful, not merely decorative. Use white space effectively to create visual breathing room and help separate different concepts. Ensure that visual elements support rather than compete with the learning objectives. + +- **Information Processing** + Carefully limit the introduction of new concepts to 5-7 per video to prevent cognitive overload. This limitation helps ensure that learners can effectively process and retain the information presented. Develop and use mnemonics and memory aids that help learners organize and remember key concepts. + + Provide visual anchors that learners can reference throughout the content. These anchors should help maintain context and show relationships between concepts. Include strategic review points that reinforce previous learning before introducing new material. Create clear connections between new information and existing knowledge to facilitate better retention. + +### B. Engagement Techniques + +- **Storytelling Elements** + Develop a clear narrative flow that carries learners through the content naturally. This narrative should have a beginning, middle, and end that maintains interest and supports learning objectives. Use character-driven examples that learners can relate to and remember. + + Include elements of conflict and resolution to create tension and maintain engagement. These elements should be relevant to the learning objectives and help illustrate key concepts. Maintain an emotional connection through relatable scenarios and authentic problems. Create story arcs that span multiple videos or modules to maintain long-term engagement. + +- **Visual Support** + Create relevant graphics and animations that enhance understanding of key concepts. These visual elements should be purposeful and directly support learning objectives, not merely decorative. Implement a consistent visual style across all content to maintain professionalism and reduce cognitive load. + + Develop clear infographics that break down complex concepts into understandable components. These should use visual hierarchy and design principles effectively. Use motion and animation thoughtfully to direct attention to important elements and demonstrate processes. Ensure all visual elements are accessible and effectively communicate their intended message. + +- **Interactive Components** + Design and embed quiz questions that check understanding at key points in the content. These questions should be strategically placed to maintain engagement and reinforce learning. Include deliberate pause points that encourage reflection and active processing of information. + + Create coding challenges or practical exercises that allow immediate application of concepts. These should be scaffolded appropriately for the learner's skill level. Provide multiple opportunities for feedback, both automated and instructor-guided when possible. Design interactive elements that encourage experimentation and learning from mistakes. + +## 3. Content Delivery Framework + +### A. Teaching Sequence + +1. **Activate** + Begin each learning session by connecting to familiar concepts that students already understand. This activation of prior knowledge creates a foundation for new learning and helps students feel confident. Use carefully chosen analogies and metaphors that bridge the gap between known and new concepts. These comparisons should be relevant to your audience's experience and background. + + Create explicit connections to previous learning modules or related concepts. These connections help students build a coherent mental model of the subject matter. Assess prior knowledge through quick activities or questions that reveal students' current understanding. Use this assessment to adjust your teaching approach and address any misconceptions early in the lesson. + +2. **Present** + Deliver clear, structured explanations of new concepts that build upon activated knowledge. These explanations should use precise language while remaining accessible to your target audience. Employ multiple representation methods, including verbal explanations, visual diagrams, and interactive demonstrations. This variety helps accommodate different learning styles and reinforces understanding. + + Provide step-by-step demonstrations that break complex processes into manageable parts. Each step should be clearly explained and connected to the overall objective. Include real-world examples that illustrate practical applications of the concepts. These examples should be relevant to your audience's interests and career goals. + +3. **Guide** + Develop worked examples that demonstrate expert problem-solving processes and thinking strategies. These examples should include explicit explanations of decision-making and common pitfalls to avoid. Share expert thinking processes by \"thinking aloud\" through problem-solving steps. This transparency helps students understand the metacognitive aspects of learning. + + Create scaffolded learning experiences that gradually reduce support as students gain confidence. Begin with highly structured guidance and progressively move toward independent work. Address common misconceptions and errors proactively, explaining why they occur and how to avoid them. Provide clear strategies for troubleshooting and problem-solving. + +4. **Practice** + Design guided exercises that allow students to apply new knowledge with appropriate support. These exercises should be carefully sequenced to build confidence and competence gradually. Include opportunities for independent practice that reinforce learning and build autonomy. Ensure these practice sessions are aligned with learning objectives and provide clear success criteria. + + Create peer learning opportunities that allow students to learn from and teach others. These interactions can reinforce understanding and develop communication skills. Implement immediate feedback mechanisms that help students understand their progress and areas for improvement. This feedback should be specific, constructive, and actionable. + +5. **Apply** + Develop real-world projects that require students to integrate and apply their learning in authentic contexts. These projects should be challenging but achievable, with clear connections to practical applications. Create case studies that illustrate complex scenarios and require critical thinking and problem-solving skills. These studies should reflect realistic situations students might encounter in their careers. + + Design problem-solving scenarios that encourage creative application of knowledge and skills. These scenarios should have multiple possible solutions to encourage innovative thinking. Provide opportunities for creative applications that allow students to extend their learning in personally meaningful ways. Support experimentation and risk-taking in a safe learning environment. + +### B. Presentation Techniques + +- **Transitions** + Implement clear verbal cues that signal shifts between concepts or activities. These cues help students maintain orientation and prepare for new information. Design visual transition elements that support cognitive processing and maintain engagement. These elements should be consistent throughout your content to establish familiar patterns. + + Create concept maps that show relationships between different topics and ideas. These maps help students understand how current learning connects to broader concepts. Use progress indicators that help students track their advancement through the material. These indicators should provide a sense of accomplishment and motivation. + +- **Multiple Representations** + Combine text and graphics effectively to convey information through multiple channels. This combination should be purposeful and coordinated to enhance understanding. Integrate audio and visual elements that complement each other and reinforce key concepts. Ensure these elements work together without creating cognitive overload. + + Develop interactive elements that encourage active engagement with the content. These elements should provide immediate feedback and support learning objectives. Include physical demonstrations when appropriate to illustrate concepts in tangible ways. These demonstrations should be clear, visible, and directly relevant to learning goals. + +## 4. Assessment Integration + +### A. Knowledge Verification +- **Formative Assessment** + Implement regular quick checks for understanding throughout the learning process. These checks should be low-stakes and provide immediate feedback to both learner and instructor. Design self-assessment prompts that encourage students to reflect on their own learning progress. These prompts should help students develop metacognitive skills and self-awareness. + + Create opportunities for peer discussion and feedback that deepen understanding through explanation and debate. These discussions should be structured to ensure productive exchanges and learning outcomes. Develop reflection questions that help students connect new learning to existing knowledge and future applications. These questions should promote deep thinking and personal connection to the material. + +- **Summative Assessment** + Design project-based assessments that evaluate comprehensive understanding and practical application. These projects should integrate multiple concepts and skills learned throughout the course. Guide students in developing portfolios that demonstrate their learning journey and achievements. These portfolios should include examples of both process and product. + + Create opportunities for skill demonstration that allow students to show mastery in authentic contexts. These demonstrations should reflect real-world applications and standards. Develop knowledge application assessments that require students to transfer learning to new situations. These assessments should evaluate both understanding and adaptability. + +### B. Learning Reinforcement +- **Review Strategies** + Implement spaced repetition techniques that optimize long-term retention of information. This approach should strategically revisit concepts at increasing intervals. Create concept mapping exercises that help students visualize and understand relationships between ideas. These maps should become increasingly complex as understanding develops. + + Guide students in knowledge synthesis activities that combine multiple concepts into coherent understanding. These activities should help students see the bigger picture and make meaningful connections. Design application scenarios that require students to apply knowledge in new and challenging contexts. These scenarios should build confidence and demonstrate practical relevance. + +## 5. Technical Considerations + +### A. Video Production Elements +- **Duration Guidelines** + Optimize video length to maintain engagement while effectively covering necessary content. The ideal duration of 6-12 minutes balances attention span with comprehensive coverage. Implement concept-based segmentation that breaks longer topics into digestible chunks. This segmentation should follow natural breaking points in the material. + + Consider attention span patterns when planning content structure and pacing. Include variety and interaction to maintain engagement throughout longer sessions. Adapt content length to platform-specific requirements and viewing habits. Consider mobile viewing habits and platform limitations in your planning. + +- **Quality Standards** + Ensure professional audio quality through proper equipment and recording techniques. This includes clear voice recording, minimal background noise, and appropriate volume levels. Maintain consistent lighting that enhances visibility and reduces viewer fatigue. Pay attention to both subject lighting and screen content visibility. + + Create clear visual presentations that effectively communicate key concepts. This includes appropriate font sizes, color contrast, and visual hierarchy. Maintain appropriate pacing that allows for processing time while maintaining engagement. Consider your audience's needs and learning objectives when determining pace. + +### B. Accessibility Features +- **Universal Design** + Create content that accommodates multiple learning modalities and preferences. This includes providing information through visual, auditory, and interactive channels. Ensure screen reader compatibility by following accessibility best practices and standards. This includes proper heading structure and alt text for images. + + Implement appropriate color contrast considerations for all visual elements. This ensures content is accessible to viewers with various visual abilities. Provide alternative text descriptions for all important images and graphics. These descriptions should convey the same information as the visual elements. + +## 6. Follow-up Resources + +### A. Supporting Materials +- **Resource Types** + Develop comprehensive practice exercises that reinforce learning and build confidence. These exercises should range from basic to advanced, accommodating different skill levels. Create well-documented code samples that demonstrate best practices and common patterns. These samples should include comments explaining key concepts and decisions. + + Compile detailed reference guides that support independent learning and problem-solving. These guides should be easily searchable and regularly updated. Design cheat sheets that provide quick access to essential information and common procedures. These should be concise while including all crucial information. + +### B. Implementation Guide +- **Learning Pathways** + Create clear prerequisite maps that show relationships between different topics and skills. This mapping helps students understand learning dependencies and plan their progress. Provide advanced topic suggestions that help motivated learners extend their knowledge. These suggestions should include resources and guidance for self-directed learning. + + Develop skill progression guides that show clear paths from beginner to advanced levels. These guides should include milestones and checkpoints for measuring progress. Suggest project ideas that allow practical application of learned skills. These projects should be scalable to different skill levels and interests.""" + +_prompt_fix_error = """You are an expert Manim developer specializing in debugging and error resolution. Based on the provided implementation plan and Manim code, analyze the error message to provide a comprehensive fix and explanation. + +Implementation Plan of the Scene: +{implementation_plan} + +Manim Code: +```python +{manim_code} +``` + +Error Message: +{error_message} + +Requirements: +1. Provide complete error analysis with specific line numbers where possible. +2. Include exact instructions for every code change. +3. Explain why the error occurred in plain language. +4. If external assets (e.g., images, audio, video) are referenced, remove them. +5. **If voiceover is present in the original code, ensure it remains preserved in the corrected code.** +6. Preserve all original code that is not causing the reported error. Do not remove or alter any intentional elements unnecessarily. +7. Follow best practices for code clarity and the current Manim version. + +You MUST only output the following format (from to ). You MUST NOT come up with any other format like JSON. + + +Error Type: [Syntax/Runtime/Logic/Other] +Error Location: [File/Line number/Component] +Root Cause: [Brief explanation of what caused the error] +Impact: [What functionality is affected] +Solution: +[FIXES_REQUIRED] +- Fix 1: [Description] + - Location: [Where to apply] + - Change: [What to modify] +- Fix 2: [If applicable] +... + + +```python +# Complete corrected and fully implemented Python code +# Include all necessary imports, definitions, and any additional code for the script to run successfully +``` +""" + +_prompt_animation_simple = """Given a topic and the context, you need to explain the topic by text. + +Also generate a Manim script that visually illustrates a key aspect of {topic} without including explanatory text in the animation itself. +Your text can mention the animation, but it should not be the main focus. +Context about the topic {topic}: {description}. + +The animation should focus on: +* Illustrating the significant part of the theorem or concept – Use geometric figures, graphs, number lines, or any relevant visualization. +* Providing an intuitive example – Instead of proving the theorem, show a concrete example or transformation that visually supports understanding. +* Separately, provide a written explanation of the theorem as text that can be displayed outside the animation. + +Ensure that: + +* The animation is concise. +* The Manim code is compatible with the latest version of community manim. +* The visual elements are clear and enhance understanding. + +Please provide the only output as: + +1. A text explanation of the theorem. +2. A complete Manim script that generates the animation. Only give the code. + +Output format: + +(Text Explanation Output) +--- (split by ---) +(Manim Code Output) + +Please do not include any other text or headers in your output. +Only use one --- to split the text explanation and the Manim code.""" + +_prompt_animation_rag_query_generation_fix_error = """You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a complete implementation plan for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins. The implementation plan will describe the scene's vision, technical implementation, and animation strategy. + +Here is the Text Explanation (Implementation Plan) as the context: + +{text_explanation} + +The error message will describe a problem encountered while running Manim code. Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely related to the error, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the error message itself and the code snippet that produced the error. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins. + +Here is the error message and the code snippet: + +**Error Message:** +{error} + +**Code Snippet:** +{code} + +Based on the error message and the code snippet, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation to fix this error. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the error and its potential solutions. + +**Specifically, ensure that:** +1. At least 1-2 queries are focused on retrieving information about Manim *function or class usage* that might be causing the error. +2. If the error message or code suggests the use of plugin functionality, include at least 1 query specifically targeting plugin documentation related to the error. +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant. + +Output the queries in the following format: +[ + {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}}, +] """ + +_prompt_animation_fix_error = """You are an expert Manim developer specializing in debugging and error resolution. Analyze the provided code and error message to provide a comprehensive fix and explanation. + + +Text Explanation: +{text_explanation} + +Manim Code Animation to complement the Text Explanation: +```python +{manim_code} +``` + +Error Message on code running: +{error_message} + + +You MUST only output the following format (make sure to include the ```python and ``` in the code): + + +Error Type: [Syntax/Runtime/Logic/Other] +Error Location: [File/Line number/Component] +Root Cause: [Brief explanation of what caused the error] +Impact: [What functionality is affected] + + + +[FIXES_REQUIRED] +- Fix 1: [Description] + - Location: [Where to apply] + - Change: [What to modify] +- Fix 2: [If applicable] + ... + +[CORRECTED_CODE] +```python +# Complete corrected and fully implemented code, don't be lazy +# Include all necessary imports, definitions, and any additional code for the script to run successfully +``` + + + +Requirements: +1. Provide complete error analysis with specific line numbers where possible. +2. Include exact instructions for every code change. +3. Ensure that the [CORRECTED_CODE] section contains complete, executable Python code (not just code snippets). Do not assume context from the prompt. +4. Explain why the error occurred in plain language. +5. Include verification steps to confirm the error is resolved. +6. Suggest preventive measures for avoiding similar errors in the future. +7. If external assets (e.g., images, audio, video) are referenced, remove them. +8. Preserve all original code that is not causing the reported error. Do not remove or alter any intentional elements unnecessarily. +9. Follow best practices for code clarity and the current Manim version.""" + +_prompt_scene_technical_implementation = """You are an expert in educational video production and Manim (Community Edition), adept at translating pedagogical narration plans into robust and spatially accurate Manim code. +**Reminder:** This technical implementation plan is fully self-contained. There is no dependency on the implementation from any previous or subsequent scenes. + +Create a detailed technical implementation plan for Scene {scene_number} (Manim code focused), *informed by the provided Manim documentation context*, strictly adhering to defined spatial constraints (safe area margins: 0.5 units, minimum spacing: 0.3 units), and **addressing potential text bounding box overflow issues**. + +Topic: {topic} +Description: {description} + +Scene Overview: +{scene_outline} + +Scene Vision and Storyboard: +{scene_vision_storyboard} + +The following manim plugins are relevant to the scene: +{relevant_plugins} + +**Spatial Constraints (Strictly Enforced):** +* **Safe area margins:** 0.5 units on all sides from the scene edges. All objects must be positioned within these margins. +* **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). This prevents overlaps and maintains visual clarity. + +**Positioning Requirements:** +1. All positioning MUST be relative (`next_to`, `align_to`, `shift`) from ORIGIN, safe margins, or other objects. **No absolute coordinates are allowed.** +2. Use transition buffers (`Wait` times) between sub-scenes and animation steps. + +**Diagrams/Sketches (Highly Recommended):** +* Include diagrams/sketches (even text-based) for complex layouts to visualize spatial relationships, improve clarity, and reduce spatial errors. + +**Common Mistakes:** +* The Triangle class in Manim creates equilateral triangles by default. To create a right-angled triangle, use the Polygon class instead. + +**Manim Plugins:** +* You may use established, well-documented Manim plugins if they offer significant advantages in terms of code clarity, efficiency, or functionality not readily available in core Manim. +* **If a plugin is used:** + * Clearly state the plugin name and version (if applicable). + * Provide a brief justification for using the plugin (e.g., "Using `manim-plugin-name` for its advanced graph layout capabilities"). + * Ensure all plugin usage adheres to the plugin's documentation. + * Include a comment in the plan: `### Plugin: - `. + +**Focus:** +* Creating *pedagogically sound and spatially correct Manim code*. +* Detailed technical descriptions, referencing Manim documentation. +* Strict adherence to spatial constraints and relative positioning. + +You MUST generate the technical implementation plan for the scene in the following format (from ```xml to ```): + +```xml + +0. **Dependencies**: + - **Manim API Version**: Target the latest stable Manim release, using only documented API elements. + - **Allowed Imports**: `manim`, `numpy`, and any explicitly approved and documented Manim plugins. No external assets (e.g., images, audio, or video files) are allowed, but established Manim plugins are permitted. + +1. **Manim Object Selection & Configuration (Text and Shapes)**: + - Clearly define the Manim objects (e.g., `Tex`, `MathTex`, `Circle`, `Line`, etc.) used to construct the scene. Also include any objects provided by used plugins. + - Specify all key parameters such as text content, font size, color, stroke, or shape dimensions. + - **Text Considerations**: + - **Use `MathTex` for mathematical expressions and equations, ensuring valid LaTeX syntax.** For example: `MathTex("x^2 + y^2 = r^2")`. + - **Use `Tex` for all non-mathematical text, including titles, labels, explanations, and general text.** For example: `Tex("This is a circle")`. + - **If you need to include regular text *within* a `MathTex` environment (e.g., for explanations alongside a formula), use the `\\text{{}}` command.** For example: `MathTex(r"\\text{{Area of circle}} = \\pi r^2")`. + - **Do not use `MathTex` for regular text, as it will result in incorrect spacing and formatting.** + - **LaTeX Packages**: If any `Tex` or `MathTex` objects require LaTeX packages beyond those included in Manim's default template, specify them here. For example: "Requires: `\\usepackage{{amssymb}}`". Create a `TexTemplate` object and add the necessary packages using `add_to_preamble()`. + - **Font Size Recommendations**: + - If there is title text, font size is highly recommended to be 28. + - If there are side labels or formulas, font size is highly recommended to be 24. + - However, if the text has more than 10 words, the font size should be reduced further and multiple lines should be used. + - Confirm all objects begin within the safe area (0.5 units from all edges) and maintain at least 0.3 units spacing to avoid overlaps. + +2. **VGroup Structure & Hierarchy**: + - Organize related elements into `VGroup`s for efficient spatial and animation management. If a plugin provides a specialized group-like object, consider using it. + - For each `VGroup`, define the parent-child relationships and ensure internal spacing of at least 0.3 units. + - Clearly document the purpose for each grouping (e.g., "formula_group" for mathematical expressions). + +3. **Spatial Positioning Strategy**: + - Mandate the exclusive use of relative positioning methods (`next_to`, `align_to`, `shift`), based on ORIGIN, safe margins, or other objects. + - For every object, specify: + - The reference object (or safe edge) used for positioning. + - The specific method (and direction/aligned edge) along with a `buff` value (minimum 0.3 units). + - Outline the layout in sequential stages, inserting visual checkpoints to verify that every element continues to respect safe margins and spacing. + - Highlight measures to safeguard text bounding boxes, especially for multi-line text. + - Reference the font size recommendations under "Text Considerations" to ensure appropriate sizing and prevent overflow. + +4. **Animation Methods & Object Lifecycle Management**: + - Define clear animation sequences using documented methods such as `Create`, `Write`, `FadeIn`, `Transform`, and corresponding removal animations (`FadeOut`, `Uncreate`). Include animation methods from plugins if they are used. + - For each animation, specify parameters like `run_time`, `lag_ratio`, and the use of `Wait()` for transition buffers. + - Ensure every object's appearance and removal is managed to prevent clutter and maintain scene clarity. + +5. **Code Structure & Reusability**: + - Propose modular functions for creating and animating common objects to promote code reusability. + - Organize the overall code structure into logical sections: dependencies, object definitions, individual layout stages, and the main `construct` method. + - Include inline comments to document the rationale for configuration choices, referencing the Manim Documentation *and the plugin documentation where applicable*. + +***Mandatory Safety Checks***: + - **Safe Area Enforcement**: All objects, including text bounding boxes, must remain within 0.5 unit margins. + - **Minimum Spacing Validation**: Confirm a minimum of 0.3 units spacing between every pair of objects. + - **Transition Buffers**: Use explicit `Wait()` calls to separate animation steps and sub-scenes. + +``` +""" + +_prompt_rag_query_generation_narration = """You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a storyboard and generate effective queries that will retrieve relevant documentation about narration, text animations, and audio-visual synchronization. + +Here is the storyboard: + +{storyboard} + +Based on this storyboard, generate multiple human-like queries (maximum 10) for retrieving relevant documentation about narration and text animation techniques. + +**Specifically, ensure that:** +1. Queries focus on retrieving information about **text animations** and their properties +2. Include queries about **timing and synchronization** techniques +3. If the storyboard suggests using plugin functionality, include specific queries targeting those plugin's narration capabilities + +The above storyboard is relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of text animation query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of timing synchronization query"}} + ... +] +```""" + +_prompt_context_learning_animation_narration = """Here are some example animation and narration plans to help guide your planning: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current scene.""" + +_prompt_scene_implementation = """You are an expert in educational video production and Manim (Community Edition) animation development. Your task is to create a detailed implementation plan for Scene {scene_number}. + + +Topic: {topic} +Description: {description} + + + +Scene Overview: +{scene_outline} + + + + +[SCENE_VISION] +1. **Overall Narrative**: + - Describe the overall story or message of the scene. What is the key takeaway for the viewer? + - How does this scene fit into the larger narrative of the video? + - What is the desired emotional impact on the viewer? + +2. **Learning Objectives**: + - What specific knowledge or skills should the viewer gain from this scene? + - How will the visual elements and animations support these learning objectives? + - What are the key concepts that need to be emphasized? + +[STORYBOARD] +1. **Visual Flow**: + - Describe the sequence of visual elements and animations in the scene. + - Provide a rough sketch or description of the key visual moments. + - How will the scene transition between different ideas or concepts? + - What is the pacing of the scene? Are there moments of pause or rapid action? + +[TECHNICAL_IMPLEMENTATION] +1. **High-Level Components (VGroups)**: + - **Identify the main conceptual sections of the scene.** Think of this like outlining chapters in a story or sections in a presentation. + - **Define the purpose of each high-level component.** What should the viewer learn or understand from each section? + - **Describe how these components relate to each other and the overall scene flow.** How will you transition between these sections to create a cohesive narrative? + - **Provide a brief rationale for your choice of high-level components.** Why did you choose these specific sections? + +2. **VGroup Hierarchy**: + - **For each high-level component, define a parent VGroup.** This VGroup will act as a container for all elements within that section. + - **Break down each parent VGroup into nested VGroups for sub-components as needed.** Think about logical groupings of elements. + - **Specify the relative positioning of these VGroups within the scene using `next_to()`, `align_to()`, and `shift()` where possible.** How will the parent VGroups be arranged on the screen relative to each other? (e.g., stacked vertically, side-by-side, etc.) Prioritize relative positioning using the following references: + - `ORIGIN`: the center of the scene + - scene margins (e.g., corners, edges) + - other VGroups as references. + - **MUST NOT use absolute coordinates.** + - **Define the scale relationships between different levels of the VGroup hierarchy.** Will sub-VGroups inherit scale from parent VGroups? How will scaling be managed to maintain visual consistency? + - **Provide a brief rationale for your VGroup hierarchy.** Why did you choose this specific structure? + + For each VGroup level (from high-level down to sub-components): + - Name: [Descriptive name for the VGroup, e.g., "TitleSection", "ProblemStatementGroup", "Explanation1Group"] + - Purpose: [What is the purpose of this VGroup? What should the viewer learn or understand from this VGroup?] + - Contents: [List all child VGroups and individual elements (Text, MathTex, Shapes, etc.) that belong to this VGroup.] + - Positioning: + * Reference: [Specify what this VGroup is positioned relative to. Do not use absolute coordinates.] + * Alignment: [How is it aligned relative to the reference? Use `align_to()` with options like `UP`, `DOWN`, `LEFT`, `RIGHT`, `ORIGIN`, etc.] + * Spacing: [Describe any spacing considerations relative to sibling VGroups or elements within the parent. Use `buff` argument in `next_to()` or `arrange()`. Refer to the defined minimum spacing value.] + - Scale: [Specify the scale of this VGroup relative to its parent VGroup. Use relative scaling factors (e.g., 1.0 for same scale, 0.8 for smaller).] + - Rationale: [Explain the reasoning behind the structure and organization of this VGroup. Why did you group these elements together?] + +3. **Element Specification**: + For each individual element (Text, MathTex, Shapes, etc.) within a VGroup: + - Name: [Descriptive name for the element, e.g., "ProblemTitleText", "Equation1", "HighlightCircle"] + - Type: [Manim object type. Examples: Text, MathTex, Circle, Rectangle, Arrow, Line, etc.] + - Parent VGroup: [Specify the VGroup this element belongs to. This establishes the hierarchical relationship.] + - Positioning: + * Reference: [Specify what this element is positioned relative to. Use its parent VGroup, other elements, `ORIGIN`, or scene margins as references. Do not use absolute coordinates.] + * Alignment: [How is it aligned within its parent VGroup? Use `align_to()` or `next_to()` with appropriate directions, e.g. `UP`, `DOWN`, `LEFT`, `RIGHT`, `ORIGIN`, `UL`, `UR`, `DL`, `DR`] + * Spacing: [If applicable, describe spacing relative to other elements using `buff` in `next_to()`. Refer to the defined minimum spacing value.] + - Style Properties: + * Color: [Hex code or named color (e.g., "RED", "BLUE"). Use hex codes for specific colors. e.g., #FF0000 for red] + * Opacity: [Value between 0 and 1. 1 for fully opaque, 0 for fully transparent.] + * Stroke Width: [Specify stroke width using levels: `thin`, `medium`, or `thick`.] + * Font: [Font family name, if applicable.] + * Font Size: [Specify font size using levels: `heading1`, `heading2`, `heading3`, `heading4`, `heading5`, `heading6`, or `body`. Refer to the defined font size levels.] + * Fill Color: [Hex code for fill color, if applicable.] + * ... [Include any other relevant style properties] + - Z-Index: [Integer value for layering order within the VGroup. Higher values are on top.] + - Required Imports: [List specific Manim classes that need to be imported to create this element. e.g., `from manim import Text, Circle`] + +[ANIMATION_STRATEGY] +1. **VGroup Transitions**: + - **Define how parent VGroups will transition onto and off of the scene, and between different sections.** Describe the movement patterns for these high-level groups. Examples: 'Slide in from left', 'Fade in and scale up', 'Move to top of screen'. + - **Specify the timing and coordination of VGroup transitions.** How long will each transition take? Will transitions overlap or be sequential? + - **Describe any transformation sequences applied to VGroups during transitions.** Will VGroups rotate, scale, or change shape during transitions? + +2. **Element Animations**: + - **Define the animations for individual elements within each VGroup.** What animations will bring each element to life? Examples: 'Write in text', 'Draw a circle', 'Highlight an equation', 'Fade in an image'. + - **Group related element animations using Manim's animation grouping features (e.g., `AnimationGroup`, `Succession`).** Explain how these groups will be used to create cohesive animation sequences. + - **Coordinate element animations with parent VGroup movements and transitions.** Ensure element animations are synchronized with the overall scene flow. + - **Specify the timing of element animations relative to VGroup transitions and other element animations.** Create a timeline or sequence of animations. + +3. **Scene Flow**: + - **Describe the overall animation sequence for the entire scene.** Outline the order in which VGroups and elements will be animated. + - **Specify transition buffers or pauses between major sections of the scene.** How much time will be left between animations for the viewer to process information? + - **Consider how the animation timing will coordinate with the narration (if narration is planned).** Animations should complement and reinforce the spoken content. + +[NARRATION] +- **Narration Script:** [Provide the full script for the narration, including timing cues or markers for when specific animations should occur. The script should be clear, detailed, and engaging, and should align with the visual elements and animations.] +- **Narration Sync:** [Describe how the narration should be synchronized with the animations. Specify how timing cues in the narration script will be used to trigger animations. Are there specific points where the narration and animations should be perfectly synchronized? Explain how you will achieve this synchronization.] + +[VIEWER_EXPERIENCE] +1. **Cognitive Load**: + - How will you manage the amount of information presented at any given time? + - Are there any complex concepts that need to be broken down into smaller steps? + - How will you use visual cues to guide the viewer's attention? + +2. **Pacing**: + - Is the pacing of the scene appropriate for the content? + - Are there moments where the viewer needs time to pause and reflect? + - How will you use animation timing to control the pace of the scene? + +3. **Accessibility**: + - How will you ensure that the scene is accessible to viewers with different needs? + - Are there any specific considerations for color contrast or text readability? + +[TECHNICAL_CHECKS] +- **VGroup boundary validation:** Ensure all elements are contained within their intended VGroup boundaries and are not overflowing unexpectedly. +- **Hierarchy scale consistency:** Verify that scaling is applied consistently throughout the VGroup hierarchy and that text and elements remain readable at all scales. +- **Animation coordination between levels:** Check that animations at different VGroup levels are coordinated and do not clash or look disjointed. +- **Performance optimization for nested groups:** Consider the performance implications of deeply nested VGroups and optimize structure and animations for smooth playback. +- **Text readability:** Ensure all text elements are legible in terms of size, color contrast, and positioning. +- **Color contrast:** Verify sufficient color contrast between text and background, and between different visual elements for accessibility. +- **Animation smoothness:** Check for any jerky or abrupt animations and refine timing and easing for smoother transitions. + + + +Requirements: +1. All elements must stay within safe area margins +2. Maintain minimum spacing between objects: [value] (This value is defined in the project settings) +3. Use relative positioning when possible, leveraging `next_to()`, `align_to()`, and `shift()`. Only reference positions relative to `ORIGIN`, scene margins, or other object reference points. Do not use absolute coordinates. +4. Include transition buffers between animations +5. Specify z-index for overlapping elements +6. All colors must use hex codes or named colors +7. Define scale relative to base unit +8. No external dependencies +9. Currently, there are no images or other assets available locally or remotely for you to use in the scene. Only include elements that can be generated through manim. +10. **Do not generate any code in this plan, except for illustrative examples where necessary. This plan is for outlining the scene and should not include any python code.** +11. **The purpose of this plan is to be a detailed guide for a human to implement the scene in manim.**""" + +_prompt_visual_fix_error = """You are an expert in Manim animations. Your task is to ensure that the rendered animation frame (image) aligns with the intended teaching content based on the provided implementation plan. + +Instructions: +Evaluate whether the object coordinates and positions in the image match the described plan and educational purpose. +The implementation plan serves as a reference, but your primary goal is to verify that the rendered animation frame supports effective teaching. +For example: +* If the object is supposed to be at the top of the screen, but it is at the bottom, you need to adjust the position. +* If the object is supposed to be at the left side but it is too far to the left, you need to adjust the position. +* If the two objects are not supposed to be overlapped but it is overlapped, you need to adjust the positions. + +If adjustments are needed, provide the complete code of the adjusted version. +If the current code is correct, return it as is. + +Manim Implementation Plan: +{implementation} + +Generated Code: +{generated_code} + +Return the complete code of the adjusted version if the code needs to be updated. If the code is correct, only return "" as output. +""" + +_banned_reasonings = """evaluation cannot +can't assist +cannot assist +can't provide +cannot provide +can't evaluate +cannot evaluate +cannot be evaluated +cannot be rated +cannot be completed +cannot be assessed +cannot be scored +cannot be conducted +unable to evaluate +do not have the capability +do not have the ability +are photographs and not AI-generated +unable to provide the evaluation""" + +_prompt_code_generation = """You are an expert Manim (Community Edition) developer for educational content. Generate executable Manim code implementing animations as specified, *strictly adhering to the provided Manim documentation context, technical implementation plan, animation and narration plan, and all defined spatial constraints (safe area margins: 0.5 units, minimum spacing: 0.3 units)*. + +Think of reusable animation components for a clean, modular, and maintainable library, *prioritizing code structure and best practices as demonstrated in the Manim documentation context*. *Throughout code generation, rigorously validate all spatial positioning and animations against the defined safe area margins and minimum spacing constraints. If any potential constraint violation is detected, generate a comment in the code highlighting the issue for manual review and correction.* + +Input Context: + +Topic: {topic} +Description: {description} + +Scene Outline: +{scene_outline} + +Scene Technical Implementation: +{scene_implementation} + +**Code Generation Guidelines:** + +1. **Scene Class:** Class name `Scene{scene_number}`, where `{scene_number}` is replaced by the scene number (e.g., `Scene1`, `Scene2`). The scene class should at least inherit from `VoiceoverScene`. However, you can add more Manim Scene classes on top of VoiceoverScene for multiple inheritance if needed. +2. **Imports:** Include ALL necessary imports explicitly at the top of the file, based on used Manim classes, functions, colors, and constants. Do not rely on implicit imports. Double-check for required modules, classes, functions, colors, and constants, *ensuring all imports are valid and consistent with the Manim Documentation*. **Include imports for any used Manim plugins.** +3. **Speech Service:** Initialize `KokoroService()`. You MUST import like this: `from src.utils.kokoro_voiceover import KokoroService` as this is our custom voiceover service. +4. **Reusable Animations:** Implement functions for each animation sequence to create modular and reusable code. Structure code into well-defined functions, following function definition patterns from Manim Documentation. +5. **Voiceover:** Use `with self.voiceover(text="...")` for speech synchronization, precisely matching the narration script and animation timings from the Animation and Narration Plan. +6. **Comments:** Add clear and concise comments for complex animations, spatial logic (positioning, arrangements), and object lifecycle management. *Use comments extensively to explain code logic, especially for spatial positioning, animation sequences, and constraint enforcement, mirroring commenting style in Manim Documentation*. **Add comments to explain the purpose and usage of any Manim plugins.** +7. **Error Handling & Constraint Validation:** Implement basic error handling if error handling strategies are suggested or exemplified in the Manim Documentation. **Critically, during code generation, implement explicit checks to validate if each object's position and animation adheres to the safe area margins (0.5 units) and minimum spacing (0.3 units).** +8. **Performance:** Follow Manim best practices for efficient code and rendering performance, as recommended in the Manim Documentation. +9. **Manim Plugins:** You are allowed and encouraged to use established, well-documented Manim plugins if they simplify the code, improve efficiency, or provide functionality not readily available in core Manim. + * **If a plugin is used:** + * Include the necessary import statement at the top of the file. + * Add a comment indicating the plugin used and its purpose: `### Plugin: - `. + * Ensure all plugin usage adheres to the plugin's documentation. +10. **No External Assets:** No external files (images, audio, video). *Use only Manim built-in elements and procedural generation, or elements provided by approved Manim plugins. No external assets are allowed*. +11. **No Main Function:** Only scene class. No `if __name__ == "__main__":`. +12. **Spatial Accuracy (Paramount):** Achieve accurate spatial positioning as described in the technical implementation plan, *strictly using relative positioning methods (`next_to`, `align_to`, `shift`, VGroups) and enforcing safe area margins and minimum 0.3 unit spacing, as documented in Manim Documentation Context*. *Spatial accuracy and constraint adherence are the highest priorities in code generation.* +13. **VGroup Structure:** Implement VGroup hierarchy precisely as defined in the Technical Implementation Plan, using documented VGroup methods for object grouping and manipulation. +14. **Spacing & Margins (Strict Enforcement):** Adhere strictly to safe area margins (0.5 units) and minimum spacing (0.3 units) requirements for *all* objects and VGroups throughout the scene and all animations. Prevent overlaps and ensure all objects stay within the safe area. *Rigorously enforce spacing and margin requirements using `buff` parameters, relative positioning, and explicit constraint validation checks during code generation, and validate against safe area guidelines from Manim Documentation Context*. +15. **Background:** Default background (Black) is sufficient. Do not create custom color background Rectangles. +16. **Text Color:** Do not use BLACK color for any text. Use predefined colors (BLUE_C, BLUE_D, GREEN_C, GREEN_D, GREY_A, GREY_B, GREY_C, LIGHTER_GRAY, LIGHT_GRAY, GOLD_C, GOLD_D, PURPLE_C, TEAL_C, TEAL_D, WHITE). +17. **Default Colors:** You MUST use the provided color definitions if you use colors in your code. ONLY USE THE COLORS PREVIOUSLY DEFINED. +18. **Animation Timings and Narration Sync:** Implement animations with precise `run_time` values and synchronize them with the narration script according to the Animation and Narration Plan. Use `Wait()` commands with specified durations for transition buffers. +19. **Don't be lazy on code generation:** Generate full, complete code including all helper functions. Ensure that the output is comprehensive and the code is fully functional, incorporating all necessary helper methods and complete scene implementation details. +20. **LaTeX Package Handling:** If the technical implementation plan specifies the need for additional LaTeX packages: + * Create a `TexTemplate` object. + * Use `myTemplate = TexTemplate()` + * Use `myTemplate.add_to_preamble(r"\\usepackage{{package_name}}")` to add the required package. + * Pass this template to the `Tex` or `MathTex` object: `tex = Tex(..., tex_template=myTemplate)`. + +**Example Code Style and Structure to Emulate:** + +* **Helper Classes:** Utilize helper classes (like `Scene2_Helper`) to encapsulate object creation and scene logic, promoting modularity and reusability. +* **Stage-Based `construct` Method:** Structure the `construct` method into logical stages (e.g., Stage 1, Stage 2, Stage 3) with comments to organize the scene flow. +* **Reusable Object Creation Functions:** Define reusable functions within helper classes for creating specific Manim objects (e.g., `create_axes`, `create_formula_tex`, `create_explanation_text`). +* **Clear Comments and Variable Names:** Use clear, concise comments to explain code sections and logic. Employ descriptive variable names (e.g., `linear_function_formula`, `logistic_plot`) for better readability. +* **Text Elements:** Create text elements using `Tex` or `MathTex` for formulas and explanations, styling them with `color` and `font_size` as needed. +* **Manim Best Practices:** Follow Manim best practices, including using `VoiceoverScene`, `KokoroService`, common Manim objects, animations, relative positioning, and predefined colors. + +You MUST generate the Python code in the following format (from to ): + +```python +from manim import * +from manim import config as global_config +from manim_voiceover import VoiceoverScene +from src.utils.kokoro_voiceover import KokoroService # You MUST import like this as this is our custom voiceover service. + +# plugins imports, don't change the import statements +from manim_circuit import * +from manim_physics import * +from manim_chemistry import * +from manim_dsa import * +from manim_ml import * + +# Helper Functions/Classes (Implement and use helper classes and functions for improved code reusability and organization) +class Scene{scene_number}_Helper: # Example: class Scene1_Helper: + # Helper class containing utility functions for scene {scene_number}. + def __init__(self, scene): + self.scene = scene + # ... (add any necessary initializations) + + # Reusable object creation functions (Implement object creation functions for modularity and reusability as per plan) + def get_center_of_edges(self, polygon, buff=SMALL_BUFF*3): + # Calculate the center points of each edge in a polygon (Triangle, Square, etc.) with an optional buffer. + # Get the vertices of the polygon + vertices = polygon.get_vertices() + n_vertices = len(vertices) + # Initialize list to store edge centers + coords_vertices = [] + # Calculate center point and normal for each edge + for i in range(n_vertices): + # Get current and next vertex (wrapping around to first vertex) + v1 = vertices[i] + v2 = vertices[(i + 1) % n_vertices] + # Calculate edge center + edge_center = (v1 + v2) / 2 + # Calculate edge vector and normalize + edge_vector = v2 - v1 + edge_length = np.linalg.norm(edge_vector) + normal = np.array([-edge_vector[1], edge_vector[0], 0]) / edge_length + # Add buffer in the normal direction + coords_vertices.append(edge_center + normal * buff) + + return coords_vertices + + def create_formula_tex(self, formula_str, color): + # Example function to create a MathTex formula with a specified color. + # Check if a custom TexTemplate is needed (from the technical plan). + if hasattr(self.scene, 'tex_template'): + formula = MathTex(formula_str, color=color, tex_template=self.scene.tex_template) + else: + formula = MathTex(formula_str, color=color) + return formula + + # ... (add more helper functions as needed for object creation and scene logic) + + +class Scene{scene_number}(VoiceoverScene, MovingCameraScene): # Note: You can add more Manim Scene classes on top of current templates for multiple inheritance if needed. + # Reminder: This scene class is fully self-contained. There is no dependency on the implementation from previous or subsequent scenes. + def construct(self): + # Initialize speech service + self.set_speech_service(KokoroService()) + + # Instantiate helper class (as per plan) + helper = Scene{scene_number}_Helper(self) # Example: helper = Scene1_Helper(self) + + # Check for LaTeX packages and create TexTemplate if needed. + # This section should be generated based on the technical implementation plan. + # For example, if the plan includes: "Requires: \\usepackage{{amsmath}}" + # Then generate: + # + # my_template = TexTemplate() + # my_template.add_to_preamble(r"\\usepackage{{amsmath}}") + # self.tex_template = my_template + + # --- Stage 1: Scene Setup (adapt stage numbers and descriptions to your scene, following plan) --- + with self.voiceover(text="[Narration for Stage 1 - from Animation and Narration Plan]") as tracker: # Voiceover for Stage 1 + # Object Creation using helper functions (as per plan) + axes = helper.create_axes() # Example: axes = helper.create_axes() + formula = helper.create_formula_tex("...", BLUE_C) # Example: formula = helper.create_formula_tex("...", BLUE_C) + explanation = helper.create_explanation_text("...") # Example: explanation = helper.create_explanation_text("...") + + # Positioning objects (relative positioning, constraint validation - as per plan) + formula.to_corner(UL) # Example positioning + axes.move_to(ORIGIN) # Example positioning + explanation.next_to(axes, RIGHT) # Example positioning + + # Animations for Stage 1 (synced with voiceover - as per plan) + self.play(Write(formula), Write(axes), run_time=tracker.duration) # Example animations + self.wait(0.5) # Transition buffer + + # --- Stage 2: ... (Implement Stage 2, Stage 3, etc. in a similar modular and structured way, following plan) --- + with self.voiceover(text="[Narration for Stage 2 - from Animation and Narration Plan]") as tracker: # Voiceover for Stage 2 + # ... (Object creation, positioning, and animations for Stage 2, using helper functions and constraint validation) + pass # Replace with actual Stage 2 code + + # ... (Implement remaining stages in a similar modular and structured way, following the Animation and Narration Plan and Technical Implementation Plan, and rigorously validating spatial constraints in each stage) + + self.wait(1) # Scene end transition buffer +``` + + +Notes: +The `get_center_of_edges` helper function is particularly useful for: +1. Finding the midpoint of polygon edges for label placement +2. Calculating offset positions for side labels that don't overlap with the polygon +3. Creating consistent label positioning across different polygon sizes and orientations + +Example usage in your scene: +```python +def label_triangle_sides(self, triangle, labels=["a", "b", "c"]): + # Helper function to label triangle sides. + edge_centers = self.helper.get_center_of_edges(triangle) + labeled_sides = VGroup() + for center, label in zip(edge_centers, labels): + tex = MathTex(label).move_to(center) + labeled_sides.add(tex) + return labeled_sides +```""" + +_prompt_rag_query_generation_code = """You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a complete implementation plan for a Manim video scene into effective queries that will retrieve relevant information from Manim documentation. The implementation plan describes the scene's vision, storyboard, technical implementation, and animation/narration strategy. + +Here is the complete scene implementation plan: + +{implementation_plan} + +Based on the complete implementation plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least some queries are focused on retrieving information about **Manim function usage** in scenes. Frame these queries to target function definitions, usage examples, and parameter details within Manim documentation. +2. If the implementation suggests using plugin functionality, include at least 1 query specifically targeting **plugin documentation**. Clearly mention the plugin name in these queries to focus the search. +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant, and to target the most helpful sections of the documentation (API reference, tutorials, examples). + +The above implementation plans are relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of function usage query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of API reference query"}} + ... +] +```""" + diff --git a/task_generator/prompts_raw/__pycache__/__init__.cpython-312.pyc b/task_generator/prompts_raw/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db623fdfdcde47c85568e7100a4d1479f35b39e2 --- /dev/null +++ b/task_generator/prompts_raw/__pycache__/__init__.cpython-312.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e8e3c61296436f102ac597f09dfe31ad67a0820ad9160cb4be90486d090b27 +size 120229 diff --git a/task_generator/prompts_raw/banned_reasonings.txt b/task_generator/prompts_raw/banned_reasonings.txt new file mode 100644 index 0000000000000000000000000000000000000000..c7329204ae6125dac677164cb8a066bdd2676449 --- /dev/null +++ b/task_generator/prompts_raw/banned_reasonings.txt @@ -0,0 +1,18 @@ +evaluation cannot +can't assist +cannot assist +can't provide +cannot provide +can't evaluate +cannot evaluate +cannot be evaluated +cannot be rated +cannot be completed +cannot be assessed +cannot be scored +cannot be conducted +unable to evaluate +do not have the capability +do not have the ability +are photographs and not AI-generated +unable to provide the evaluation \ No newline at end of file diff --git a/task_generator/prompts_raw/code_background.txt b/task_generator/prompts_raw/code_background.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d18f0843f35082777daeba6cbaea8928c5ea277 --- /dev/null +++ b/task_generator/prompts_raw/code_background.txt @@ -0,0 +1,2 @@ +PLEASE DO NOT create another color background Rectangles. Default background (Black) is enough. +PLEASE DO NOT use BLACK color for any text. diff --git a/task_generator/prompts_raw/code_color_cheatsheet.txt b/task_generator/prompts_raw/code_color_cheatsheet.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb301dd2d1126df650c8f7acf0432dbf2e387f70 --- /dev/null +++ b/task_generator/prompts_raw/code_color_cheatsheet.txt @@ -0,0 +1,23 @@ +MUST include the following color definitions if you use the colors in your code. ONLY USE THE COLORS BELOW. + +WHITE = '#FFFFFF' +RED = '#FF0000' +GREEN = '#00FF00' +BLUE = '#0000FF' +YELLOW = '#FFFF00' +CYAN = '#00FFFF' +MAGENTA = '#FF00FF' +ORANGE = '#FFA500' +PURPLE = '#800080' +PINK = '#FFC0CB' +BROWN = '#A52A2A' +GRAY = '#808080' +TEAL = '#008080' +NAVY = '#000080' +OLIVE = '#808000' +MAROON = '#800000' +LIME = '#00FF00' +AQUA = '#00FFFF' +FUCHSIA = '#FF00FF' +SILVER = '#C0C0C0' +GOLD = '#FFD700' \ No newline at end of file diff --git a/task_generator/prompts_raw/code_disable.txt b/task_generator/prompts_raw/code_disable.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/task_generator/prompts_raw/code_font_size.txt b/task_generator/prompts_raw/code_font_size.txt new file mode 100644 index 0000000000000000000000000000000000000000..baaa31ee173ae702578059e335850967764dd1a4 --- /dev/null +++ b/task_generator/prompts_raw/code_font_size.txt @@ -0,0 +1,5 @@ +If there is title text, font size is highly recommended to be 28. +If there are side labels, font size is highly recommended to be 24. +If there are formulas, font size is highly recommended to be 24. + +However, if the text has more than 10 words, font size should be reduced further and mutiple lines should be used. \ No newline at end of file diff --git a/task_generator/prompts_raw/code_limit.txt b/task_generator/prompts_raw/code_limit.txt new file mode 100644 index 0000000000000000000000000000000000000000..e1e798ae55374d93accadba57636a2105bc8ac9e --- /dev/null +++ b/task_generator/prompts_raw/code_limit.txt @@ -0,0 +1,4 @@ +Note that the frame width and height are 14.222222222222221 and 8.0 respectively. And the center of the frame is (0, 0, 0). +It means to avoid putting any object out of the frame, you should limit the x and y coordinates of the objects. +limit x to be within -7.0 and 7.0 for objects, and limit y to be within -4.0 and 4.0 for objects. +Place the objects near the center of the frame, without overlapping with each other. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_animation_fix_error.txt b/task_generator/prompts_raw/prompt_animation_fix_error.txt new file mode 100644 index 0000000000000000000000000000000000000000..aab84c5add28f9da2c49e0a299187b10a1efae99 --- /dev/null +++ b/task_generator/prompts_raw/prompt_animation_fix_error.txt @@ -0,0 +1,50 @@ +You are an expert Manim developer specializing in debugging and error resolution. Analyze the provided code and error message to provide a comprehensive fix and explanation. + + +Text Explanation: +{text_explanation} + +Manim Code Animation to complement the Text Explanation: +```python +{manim_code} +``` + +Error Message on code running: +{error_message} + + +You MUST only output the following format (make sure to include the ```python and ``` in the code): + + +Error Type: [Syntax/Runtime/Logic/Other] +Error Location: [File/Line number/Component] +Root Cause: [Brief explanation of what caused the error] +Impact: [What functionality is affected] + + + +[FIXES_REQUIRED] +- Fix 1: [Description] + - Location: [Where to apply] + - Change: [What to modify] +- Fix 2: [If applicable] + ... + +[CORRECTED_CODE] +```python +# Complete corrected and fully implemented code, don't be lazy +# Include all necessary imports, definitions, and any additional code for the script to run successfully +``` + + + +Requirements: +1. Provide complete error analysis with specific line numbers where possible. +2. Include exact instructions for every code change. +3. Ensure that the [CORRECTED_CODE] section contains complete, executable Python code (not just code snippets). Do not assume context from the prompt. +4. Explain why the error occurred in plain language. +5. Include verification steps to confirm the error is resolved. +6. Suggest preventive measures for avoiding similar errors in the future. +7. If external assets (e.g., images, audio, video) are referenced, remove them. +8. Preserve all original code that is not causing the reported error. Do not remove or alter any intentional elements unnecessarily. +9. Follow best practices for code clarity and the current Manim version. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_animation_rag_query_generation.txt b/task_generator/prompts_raw/prompt_animation_rag_query_generation.txt new file mode 100644 index 0000000000000000000000000000000000000000..a7c74360e3cded328900e3e5c38b0f18b0628595 --- /dev/null +++ b/task_generator/prompts_raw/prompt_animation_rag_query_generation.txt @@ -0,0 +1,29 @@ +You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a topic for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins. + +Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely to be used to implement the scene, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the *entire* scene plan. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins. + +Here is the Topic (and the context): + +{topic}. {context} + +Based on the topic and the context, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least 1-2 queries are focused on retrieving information about Manim *function usage* in Manim scenes +2. If the topic and the context can be linked to the use of plugin functionality, include at least 1 query specifically targeting plugin documentation +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant + +The above text explanations are relevant to these plugins: {relevant_plugins} + +Output the queries in the following format: +```json +[ + {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}}, +] +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_animation_rag_query_generation_fix_error.txt b/task_generator/prompts_raw/prompt_animation_rag_query_generation_fix_error.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe326331e1f29dc569e4e9e8e3064480de485071 --- /dev/null +++ b/task_generator/prompts_raw/prompt_animation_rag_query_generation_fix_error.txt @@ -0,0 +1,33 @@ +You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a complete implementation plan for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins. The implementation plan will describe the scene's vision, technical implementation, and animation strategy. + +Here is the Text Explanation (Implementation Plan) as the context: + +{text_explanation} + +The error message will describe a problem encountered while running Manim code. Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely related to the error, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the error message itself and the code snippet that produced the error. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins. + +Here is the error message and the code snippet: + +**Error Message:** +{error} + +**Code Snippet:** +{code} + +Based on the error message and the code snippet, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation to fix this error. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the error and its potential solutions. + +**Specifically, ensure that:** +1. At least 1-2 queries are focused on retrieving information about Manim *function or class usage* that might be causing the error. +2. If the error message or code suggests the use of plugin functionality, include at least 1 query specifically targeting plugin documentation related to the error. +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant. + +Output the queries in the following format: +[ + {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}}, + {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}}, +] \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_animation_simple.txt b/task_generator/prompts_raw/prompt_animation_simple.txt new file mode 100644 index 0000000000000000000000000000000000000000..90ad63cdcd3a51b254ad7e54ef138797e0822300 --- /dev/null +++ b/task_generator/prompts_raw/prompt_animation_simple.txt @@ -0,0 +1,30 @@ +Given a topic and the context, you need to explain the topic by text. + +Also generate a Manim script that visually illustrates a key aspect of {topic} without including explanatory text in the animation itself. +Your text can mention the animation, but it should not be the main focus. +Context about the topic {topic}: {description}. + +The animation should focus on: +* Illustrating the significant part of the theorem or concept – Use geometric figures, graphs, number lines, or any relevant visualization. +* Providing an intuitive example – Instead of proving the theorem, show a concrete example or transformation that visually supports understanding. +* Separately, provide a written explanation of the theorem as text that can be displayed outside the animation. + +Ensure that: + +* The animation is concise. +* The Manim code is compatible with the latest version of community manim. +* The visual elements are clear and enhance understanding. + +Please provide the only output as: + +1. A text explanation of the theorem. +2. A complete Manim script that generates the animation. Only give the code. + +Output format: + +(Text Explanation Output) +--- (split by ---) +(Manim Code Output) + +Please do not include any other text or headers in your output. +Only use one --- to split the text explanation and the Manim code. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_best_practices.txt b/task_generator/prompts_raw/prompt_best_practices.txt new file mode 100644 index 0000000000000000000000000000000000000000..da36c51c7ab1f32512de74821b85e45721bb2a6a --- /dev/null +++ b/task_generator/prompts_raw/prompt_best_practices.txt @@ -0,0 +1,16 @@ +# Best practices for generating educational videos with manim + +1. Specify positions as relative to other objects whenever it makes sense. + * For example, if you want to place a label for a geometric object. +2. Objects should be of different color from the black background. +3. Keep the text on screen concise. + * On-screen elements should focus on showcasing the concept, examples and visuals. Labels and illustrative text are still encouraged. + * For explanations and observations, prefer narrations over on-screen text. + * You should still show calculations and algorithms in full on screen. + * For examples and practice problems, it is reasonable to show more text, especially key statements. + * Longer text should appear smaller to fit on screen. +4. To control the timing of objects appearing: + * `add` has instantaneous effect, best used for the initial setup of the scene. + * Animations are best used during narration. + * Make sure the animations make sense. If an object is already on screen, it makes no sense to fade it in or create it again. +5. Use TeX or MathTeX whenever you want to display math, including symbols and formulas. diff --git a/task_generator/prompts_raw/prompt_code_generation.txt b/task_generator/prompts_raw/prompt_code_generation.txt new file mode 100644 index 0000000000000000000000000000000000000000..096e2b126e8d2aa73f61de2b78d21802aa5d0a43 --- /dev/null +++ b/task_generator/prompts_raw/prompt_code_generation.txt @@ -0,0 +1,175 @@ +You are an expert Manim (Community Edition) developer for educational content. Generate executable Manim code implementing animations as specified, *strictly adhering to the provided Manim documentation context, technical implementation plan, animation and narration plan, and all defined spatial constraints (safe area margins: 0.5 units, minimum spacing: 0.3 units)*. + +Think of reusable animation components for a clean, modular, and maintainable library, *prioritizing code structure and best practices as demonstrated in the Manim documentation context*. *Throughout code generation, rigorously validate all spatial positioning and animations against the defined safe area margins and minimum spacing constraints. If any potential constraint violation is detected, generate a comment in the code highlighting the issue for manual review and correction.* + +Input Context: + +Topic: {topic} +Description: {description} + +Scene Outline: +{scene_outline} + +Scene Technical Implementation: +{scene_implementation} + +**Code Generation Guidelines:** + +1. **Scene Class:** Class name `Scene{scene_number}`, where `{scene_number}` is replaced by the scene number (e.g., `Scene1`, `Scene2`). The scene class should at least inherit from `VoiceoverScene`. However, you can add more Manim Scene classes on top of VoiceoverScene for multiple inheritance if needed. +2. **Imports:** Include ALL necessary imports explicitly at the top of the file, based on used Manim classes, functions, colors, and constants. Do not rely on implicit imports. Double-check for required modules, classes, functions, colors, and constants, *ensuring all imports are valid and consistent with the Manim Documentation*. **Include imports for any used Manim plugins.** +3. **Speech Service:** Initialize `KokoroService()`. You MUST import like this: `from src.utils.kokoro_voiceover import KokoroService` as this is our custom voiceover service. +4. **Reusable Animations:** Implement functions for each animation sequence to create modular and reusable code. Structure code into well-defined functions, following function definition patterns from Manim Documentation. +5. **Voiceover:** Use `with self.voiceover(text="...")` for speech synchronization, precisely matching the narration script and animation timings from the Animation and Narration Plan. +6. **Comments:** Add clear and concise comments for complex animations, spatial logic (positioning, arrangements), and object lifecycle management. *Use comments extensively to explain code logic, especially for spatial positioning, animation sequences, and constraint enforcement, mirroring commenting style in Manim Documentation*. **Add comments to explain the purpose and usage of any Manim plugins.** +7. **Error Handling & Constraint Validation:** Implement basic error handling if error handling strategies are suggested or exemplified in the Manim Documentation. **Critically, during code generation, implement explicit checks to validate if each object's position and animation adheres to the safe area margins (0.5 units) and minimum spacing (0.3 units).** +8. **Performance:** Follow Manim best practices for efficient code and rendering performance, as recommended in the Manim Documentation. +9. **Manim Plugins:** You are allowed and encouraged to use established, well-documented Manim plugins if they simplify the code, improve efficiency, or provide functionality not readily available in core Manim. + * **If a plugin is used:** + * Include the necessary import statement at the top of the file. + * Add a comment indicating the plugin used and its purpose: `### Plugin: - `. + * Ensure all plugin usage adheres to the plugin's documentation. +10. **No External Assets:** No external files (images, audio, video). *Use only Manim built-in elements and procedural generation, or elements provided by approved Manim plugins. No external assets are allowed*. +11. **No Main Function:** Only scene class. No `if __name__ == "__main__":`. +12. **Spatial Accuracy (Paramount):** Achieve accurate spatial positioning as described in the technical implementation plan, *strictly using relative positioning methods (`next_to`, `align_to`, `shift`, VGroups) and enforcing safe area margins and minimum 0.3 unit spacing, as documented in Manim Documentation Context*. *Spatial accuracy and constraint adherence are the highest priorities in code generation.* +13. **VGroup Structure:** Implement VGroup hierarchy precisely as defined in the Technical Implementation Plan, using documented VGroup methods for object grouping and manipulation. +14. **Spacing & Margins (Strict Enforcement):** Adhere strictly to safe area margins (0.5 units) and minimum spacing (0.3 units) requirements for *all* objects and VGroups throughout the scene and all animations. Prevent overlaps and ensure all objects stay within the safe area. *Rigorously enforce spacing and margin requirements using `buff` parameters, relative positioning, and explicit constraint validation checks during code generation, and validate against safe area guidelines from Manim Documentation Context*. +15. **Background:** Default background (Black) is sufficient. Do not create custom color background Rectangles. +16. **Text Color:** Do not use BLACK color for any text. Use predefined colors (BLUE_C, BLUE_D, GREEN_C, GREEN_D, GREY_A, GREY_B, GREY_C, LIGHTER_GRAY, LIGHT_GRAY, GOLD_C, GOLD_D, PURPLE_C, TEAL_C, TEAL_D, WHITE). +17. **Default Colors:** You MUST use the provided color definitions if you use colors in your code. ONLY USE THE COLORS PREVIOUSLY DEFINED. +18. **Animation Timings and Narration Sync:** Implement animations with precise `run_time` values and synchronize them with the narration script according to the Animation and Narration Plan. Use `Wait()` commands with specified durations for transition buffers. +19. **Don't be lazy on code generation:** Generate full, complete code including all helper functions. Ensure that the output is comprehensive and the code is fully functional, incorporating all necessary helper methods and complete scene implementation details. +20. **LaTeX Package Handling:** If the technical implementation plan specifies the need for additional LaTeX packages: + * Create a `TexTemplate` object. + * Use `myTemplate = TexTemplate()` + * Use `myTemplate.add_to_preamble(r"\\usepackage{{package_name}}")` to add the required package. + * Pass this template to the `Tex` or `MathTex` object: `tex = Tex(..., tex_template=myTemplate)`. + +**Example Code Style and Structure to Emulate:** + +* **Helper Classes:** Utilize helper classes (like `Scene2_Helper`) to encapsulate object creation and scene logic, promoting modularity and reusability. +* **Stage-Based `construct` Method:** Structure the `construct` method into logical stages (e.g., Stage 1, Stage 2, Stage 3) with comments to organize the scene flow. +* **Reusable Object Creation Functions:** Define reusable functions within helper classes for creating specific Manim objects (e.g., `create_axes`, `create_formula_tex`, `create_explanation_text`). +* **Clear Comments and Variable Names:** Use clear, concise comments to explain code sections and logic. Employ descriptive variable names (e.g., `linear_function_formula`, `logistic_plot`) for better readability. +* **Text Elements:** Create text elements using `Tex` or `MathTex` for formulas and explanations, styling them with `color` and `font_size` as needed. +* **Manim Best Practices:** Follow Manim best practices, including using `VoiceoverScene`, `KokoroService`, common Manim objects, animations, relative positioning, and predefined colors. + +You MUST generate the Python code in the following format (from to ): + +```python +from manim import * +from manim import config as global_config +from manim_voiceover import VoiceoverScene +from src.utils.kokoro_voiceover import KokoroService # You MUST import like this as this is our custom voiceover service. + +# plugins imports, don't change the import statements +from manim_circuit import * +from manim_physics import * +from manim_chemistry import * +from manim_dsa import * +from manim_ml import * + +# Helper Functions/Classes (Implement and use helper classes and functions for improved code reusability and organization) +class Scene{scene_number}_Helper: # Example: class Scene1_Helper: + # Helper class containing utility functions for scene {scene_number}. + def __init__(self, scene): + self.scene = scene + # ... (add any necessary initializations) + + # Reusable object creation functions (Implement object creation functions for modularity and reusability as per plan) + def get_center_of_edges(self, polygon, buff=SMALL_BUFF*3): + # Calculate the center points of each edge in a polygon (Triangle, Square, etc.) with an optional buffer. + # Get the vertices of the polygon + vertices = polygon.get_vertices() + n_vertices = len(vertices) + # Initialize list to store edge centers + coords_vertices = [] + # Calculate center point and normal for each edge + for i in range(n_vertices): + # Get current and next vertex (wrapping around to first vertex) + v1 = vertices[i] + v2 = vertices[(i + 1) % n_vertices] + # Calculate edge center + edge_center = (v1 + v2) / 2 + # Calculate edge vector and normalize + edge_vector = v2 - v1 + edge_length = np.linalg.norm(edge_vector) + normal = np.array([-edge_vector[1], edge_vector[0], 0]) / edge_length + # Add buffer in the normal direction + coords_vertices.append(edge_center + normal * buff) + + return coords_vertices + + def create_formula_tex(self, formula_str, color): + # Example function to create a MathTex formula with a specified color. + # Check if a custom TexTemplate is needed (from the technical plan). + if hasattr(self.scene, 'tex_template'): + formula = MathTex(formula_str, color=color, tex_template=self.scene.tex_template) + else: + formula = MathTex(formula_str, color=color) + return formula + + # ... (add more helper functions as needed for object creation and scene logic) + + +class Scene{scene_number}(VoiceoverScene, MovingCameraScene): # Note: You can add more Manim Scene classes on top of current templates for multiple inheritance if needed. + # Reminder: This scene class is fully self-contained. There is no dependency on the implementation from previous or subsequent scenes. + def construct(self): + # Initialize speech service + self.set_speech_service(KokoroService()) + + # Instantiate helper class (as per plan) + helper = Scene{scene_number}_Helper(self) # Example: helper = Scene1_Helper(self) + + # Check for LaTeX packages and create TexTemplate if needed. + # This section should be generated based on the technical implementation plan. + # For example, if the plan includes: "Requires: \\usepackage{{amsmath}}" + # Then generate: + # + # my_template = TexTemplate() + # my_template.add_to_preamble(r"\\usepackage{{amsmath}}") + # self.tex_template = my_template + + # --- Stage 1: Scene Setup (adapt stage numbers and descriptions to your scene, following plan) --- + with self.voiceover(text="[Narration for Stage 1 - from Animation and Narration Plan]") as tracker: # Voiceover for Stage 1 + # Object Creation using helper functions (as per plan) + axes = helper.create_axes() # Example: axes = helper.create_axes() + formula = helper.create_formula_tex("...", BLUE_C) # Example: formula = helper.create_formula_tex("...", BLUE_C) + explanation = helper.create_explanation_text("...") # Example: explanation = helper.create_explanation_text("...") + + # Positioning objects (relative positioning, constraint validation - as per plan) + formula.to_corner(UL) # Example positioning + axes.move_to(ORIGIN) # Example positioning + explanation.next_to(axes, RIGHT) # Example positioning + + # Animations for Stage 1 (synced with voiceover - as per plan) + self.play(Write(formula), Write(axes), run_time=tracker.duration) # Example animations + self.wait(0.5) # Transition buffer + + # --- Stage 2: ... (Implement Stage 2, Stage 3, etc. in a similar modular and structured way, following plan) --- + with self.voiceover(text="[Narration for Stage 2 - from Animation and Narration Plan]") as tracker: # Voiceover for Stage 2 + # ... (Object creation, positioning, and animations for Stage 2, using helper functions and constraint validation) + pass # Replace with actual Stage 2 code + + # ... (Implement remaining stages in a similar modular and structured way, following the Animation and Narration Plan and Technical Implementation Plan, and rigorously validating spatial constraints in each stage) + + self.wait(1) # Scene end transition buffer +``` + + +Notes: +The `get_center_of_edges` helper function is particularly useful for: +1. Finding the midpoint of polygon edges for label placement +2. Calculating offset positions for side labels that don't overlap with the polygon +3. Creating consistent label positioning across different polygon sizes and orientations + +Example usage in your scene: +```python +def label_triangle_sides(self, triangle, labels=["a", "b", "c"]): + # Helper function to label triangle sides. + edge_centers = self.helper.get_center_of_edges(triangle) + labeled_sides = VGroup() + for center, label in zip(edge_centers, labels): + tex = MathTex(label).move_to(center) + labeled_sides.add(tex) + return labeled_sides +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_context_learning_animation_narration.txt b/task_generator/prompts_raw/prompt_context_learning_animation_narration.txt new file mode 100644 index 0000000000000000000000000000000000000000..3aca08d1d99c18212dde6996344d8db96075712a --- /dev/null +++ b/task_generator/prompts_raw/prompt_context_learning_animation_narration.txt @@ -0,0 +1,5 @@ +Here are some example animation and narration plans to help guide your planning: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current scene. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_context_learning_code.txt b/task_generator/prompts_raw/prompt_context_learning_code.txt new file mode 100644 index 0000000000000000000000000000000000000000..5360359bdad8a0e657ba1ab7d907d19bfbe1e8f7 --- /dev/null +++ b/task_generator/prompts_raw/prompt_context_learning_code.txt @@ -0,0 +1,5 @@ +Here are some example Manim code implementations to help guide your code generation: + +{examples} + +Please follow similar patterns and best practices while implementing the current scene. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_context_learning_scene_plan.txt b/task_generator/prompts_raw/prompt_context_learning_scene_plan.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c43363872196eda42d1926da2f6c108f00aa7da --- /dev/null +++ b/task_generator/prompts_raw/prompt_context_learning_scene_plan.txt @@ -0,0 +1,5 @@ +Here are some example scene plans to help guide your scene planning: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current topic. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_context_learning_technical_implementation.txt b/task_generator/prompts_raw/prompt_context_learning_technical_implementation.txt new file mode 100644 index 0000000000000000000000000000000000000000..61935ff1499d2d94ac8fa69ab50dde8bcbc6ac34 --- /dev/null +++ b/task_generator/prompts_raw/prompt_context_learning_technical_implementation.txt @@ -0,0 +1,5 @@ +Here are some example technical implementation plans to help guide your implementation: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current scene. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_context_learning_vision_storyboard.txt b/task_generator/prompts_raw/prompt_context_learning_vision_storyboard.txt new file mode 100644 index 0000000000000000000000000000000000000000..561e826b2643af7440c5225a40164604710980be --- /dev/null +++ b/task_generator/prompts_raw/prompt_context_learning_vision_storyboard.txt @@ -0,0 +1,5 @@ +Here are some example vision and storyboard plans to help guide your planning: + +{examples} + +Please follow a similar structure while maintaining creativity and relevance to the current scene. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_detect_plugins.txt b/task_generator/prompts_raw/prompt_detect_plugins.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4517f55551b84b9806143ed5bcc64577a7aeec5 --- /dev/null +++ b/task_generator/prompts_raw/prompt_detect_plugins.txt @@ -0,0 +1,33 @@ +You are a Manim plugin detection system. Your task is to analyze a video topic and description to determine which Manim plugins would be most relevant for the actual animation implementation needs. + +Topic: +{topic} + +Description: +{description} + +Available Plugins: +{plugin_descriptions} + +Instructions: +1. Analyze the topic and description, focusing specifically on what needs to be animated +2. Review each plugin's capabilities and determine if they provide specific tools needed for the animations described +3. Only select plugins that provide functionality directly needed for the core animations +4. Consider these criteria for each plugin: + - Does the plugin provide specific tools or components needed for the main visual elements? + - Are the plugin's features necessary for implementing the core animations? + - Would the animation be significantly more difficult to create without this plugin? +5. Exclude plugins that: + - Only relate to the general topic area but don't provide needed animation tools + - Might be "nice to have" but aren't essential for the core visualization + - Could be replaced easily with basic Manim shapes and animations + +Your response must follow the output format below: + +[brief description of your thinking process] + + +```json +["plugin_name1", "plugin_name2"] +``` + \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_enhanced_visual_self_reflection.txt b/task_generator/prompts_raw/prompt_enhanced_visual_self_reflection.txt new file mode 100644 index 0000000000000000000000000000000000000000..98c035c67cf85a1dc5e441f1d05ee315fce5c31c --- /dev/null +++ b/task_generator/prompts_raw/prompt_enhanced_visual_self_reflection.txt @@ -0,0 +1,99 @@ +You are an expert in Manim animations and visual error detection using Vision Language Models. Your task is to analyze a rendered Manim video frame or image to detect and fix specific visual errors like element overlap, out-of-bounds objects, and poor positioning. + +**PRIMARY OBJECTIVE:** Use VLM capabilities to perform detailed visual analysis and provide code fixes for visual errors. + +**VISUAL ERROR DETECTION PROTOCOL:** + +**1. Element Overlap Detection:** +- **Unintentional Overlap:** Scan for text, mathematical expressions, shapes, or diagrams that overlap in ways that obscure content or reduce readability +- **Text Collision:** Check for text elements (MathTex, Text, Tex) that collide or overlap with other visual elements +- **Shape Interference:** Identify shapes, arrows, or geometric objects that overlap inappropriately +- **Z-Index Issues:** Detect cases where important elements are hidden behind less important ones + +**2. Spatial Boundary Violations:** +- **Out-of-Bounds Objects:** Identify elements that extend beyond the visible frame boundaries +- **Safe Area Violations:** Check if objects violate the 0.5-unit safe area margins from frame edges +- **Minimum Spacing Violations:** Verify that objects maintain at least 0.3 units of spacing between each other + +**3. Positioning Analysis:** +- **Logical Arrangement:** Assess if spatial arrangement supports educational flow and narrative progression +- **Visual Balance:** Evaluate overall composition balance and aesthetic arrangement +- **Proximity Grouping:** Check if related elements are appropriately grouped and unrelated elements are sufficiently separated + +**4. Educational Effectiveness:** +- **Readability:** Ensure all text elements are clearly readable without obstruction +- **Visual Hierarchy:** Verify that important elements are prominently positioned +- **Learning Support:** Confirm that visual arrangement supports the educational objective + +**DETECTION METHODOLOGY:** +1. **Systematic Scanning:** Examine the frame/video systematically from top-left to bottom-right +2. **Element Identification:** Catalog all visible Manim objects and their spatial relationships +3. **Overlap Mapping:** Create a mental map of overlapping regions and assess their appropriateness +4. **Boundary Checking:** Verify all elements are within safe viewing area +5. **Spacing Measurement:** Assess spacing between elements for minimum distance compliance + +**ERROR CLASSIFICATION:** +- **Critical:** Errors that severely impact readability or comprehension (e.g., text completely obscured) +- **Major:** Errors that noticeably reduce visual quality (e.g., partial overlap, out-of-bounds elements) +- **Minor:** Errors that slightly affect visual appeal (e.g., suboptimal spacing, minor misalignment) + +**SOLUTION APPROACH:** +For each detected error, provide specific code modifications using: +- **Relative Positioning:** Use `next_to()`, `align_to()`, `shift()`, `move_to(ORIGIN)` +- **Spacing Controls:** Apply `buff` parameters for proper spacing +- **VGroup Organization:** Reorganize elements into logical VGroups +- **Z-Index Management:** Use `bring_to_front()`, `bring_to_back()` for layering +- **Margin Enforcement:** Ensure compliance with safe area constraints + +**ANALYSIS FRAMEWORK:** + +Current Implementation Plan: +{implementation} + +Current Code: +{code} + +**STEP-BY-STEP ANALYSIS:** + +1. **Visual Inventory:** List all visible Manim objects in the frame +2. **Overlap Detection:** Identify any overlapping elements and assess if intentional +3. **Boundary Check:** Verify all elements are within frame and safe area +4. **Spacing Verification:** Confirm minimum 0.3-unit spacing between elements +5. **Educational Assessment:** Evaluate if arrangement supports learning objectives + +**OUTPUT REQUIREMENTS:** + +If visual errors are detected, provide: + + +**Detected Issues:** +- [Specific description of each visual error with object names and positions] +- [Severity classification: Critical/Major/Minor] +- [Impact on educational effectiveness] + +**Recommended Fixes:** +- [Specific positioning adjustments needed] +- [Code modifications to implement fixes] +- [Verification steps to ensure fixes work] + + + +```python +[Complete corrected Python code with all visual fixes applied] +[Use relative positioning methods exclusively] +[Ensure safe area compliance and minimum spacing] +[Maintain all original functionality while fixing visual issues] +``` + + +If no visual errors are detected and the frame meets all quality standards: + + + + +**ADDITIONAL CONSIDERATIONS:** +- Prioritize educational clarity over aesthetic preferences +- Maintain consistency with Manim best practices +- Ensure all fixes preserve the original educational intent +- Test spatial relationships after applying fixes +- Consider animation flow when adjusting static positions diff --git a/task_generator/prompts_raw/prompt_fix_error.txt b/task_generator/prompts_raw/prompt_fix_error.txt new file mode 100644 index 0000000000000000000000000000000000000000..d7033ad92d9924c1130efa4defd398daf9654953 --- /dev/null +++ b/task_generator/prompts_raw/prompt_fix_error.txt @@ -0,0 +1,43 @@ +You are an expert Manim developer specializing in debugging and error resolution. Based on the provided implementation plan and Manim code, analyze the error message to provide a comprehensive fix and explanation. + +Implementation Plan of the Scene: +{implementation_plan} + +Manim Code: +```python +{manim_code} +``` + +Error Message: +{error_message} + +Requirements: +1. Provide complete error analysis with specific line numbers where possible. +2. Include exact instructions for every code change. +3. Explain why the error occurred in plain language. +4. If external assets (e.g., images, audio, video) are referenced, remove them. +5. **If voiceover is present in the original code, ensure it remains preserved in the corrected code.** +6. Preserve all original code that is not causing the reported error. Do not remove or alter any intentional elements unnecessarily. +7. Follow best practices for code clarity and the current Manim version. + +You MUST only output the following format (from to ). You MUST NOT come up with any other format like JSON. + + +Error Type: [Syntax/Runtime/Logic/Other] +Error Location: [File/Line number/Component] +Root Cause: [Brief explanation of what caused the error] +Impact: [What functionality is affected] +Solution: +[FIXES_REQUIRED] +- Fix 1: [Description] + - Location: [Where to apply] + - Change: [What to modify] +- Fix 2: [If applicable] +... + + +```python +# Complete corrected and fully implemented Python code +# Include all necessary imports, definitions, and any additional code for the script to run successfully +``` + \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_manim_cheatsheet.txt b/task_generator/prompts_raw/prompt_manim_cheatsheet.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b985c02c3636d4fc13969672aa4a9c56ffaedbd --- /dev/null +++ b/task_generator/prompts_raw/prompt_manim_cheatsheet.txt @@ -0,0 +1,494 @@ +The followings are the inheritance diagram of the Manim library. You can take as reference to select which class to use for the animation. + +``` +digraph Animation { + "AddTextLetterByLetter" + "ShowIncreasingSubsets" + "ShowIncreasingSubsets" -> "AddTextLetterByLetter" + "AddTextWordByWord"; + "Succession"; + "Succession" -> "AddTextWordByWord"; + "AnimatedBoundary"; + "VGroup"; + "VGroup" -> "AnimatedBoundary"; + "Animation"; + "AnimationGroup"; + "Animation" -> "AnimationGroup"; + "ApplyComplexFunction"; + "ApplyMethod"; + "ApplyMethod" -> "ApplyComplexFunction"; + "ApplyFunction"; + "Transform"; + "Transform" -> "ApplyFunction"; + "ApplyMatrix"; + "ApplyPointwiseFunction"; + "ApplyPointwiseFunction" -> "ApplyMatrix"; + "ApplyMethod"; + "Transform" -> "ApplyMethod"; + "ApplyPointwiseFunction"; + "ApplyMethod" -> "ApplyPointwiseFunction"; + "ApplyPointwiseFunctionToCenter"; + "ApplyPointwiseFunction" -> "ApplyPointwiseFunctionToCenter"; + "ApplyWave"; + "Homotopy"; + "Homotopy" -> "ApplyWave"; + "Broadcast"; + "LaggedStart"; + "LaggedStart" -> "Broadcast"; + "ChangeDecimalToValue"; + "ChangingDecimal"; + "ChangingDecimal" -> "ChangeDecimalToValue"; + "ChangeSpeed"; + "Animation" -> "ChangeSpeed"; + "ChangingDecimal"; + "Animation" -> "ChangingDecimal"; + "Circumscribe"; + "Succession" -> "Circumscribe"; + "ClockwiseTransform"; + "Transform" -> "ClockwiseTransform"; + "ComplexHomotopy"; + "Homotopy" -> "ComplexHomotopy"; + "CounterclockwiseTransform"; + "Transform" -> "CounterclockwiseTransform"; + "Create"; + "ShowPartial"; + "ShowPartial" -> "Create"; + "CyclicReplace"; + "Transform" -> "CyclicReplace"; + "DrawBorderThenFill"; + "Animation" -> "DrawBorderThenFill"; + "FadeIn"; + "FadeOut"; + "FadeToColor"; + "ApplyMethod" -> "FadeToColor"; + "FadeTransform"; + "Transform" -> "FadeTransform"; + "FadeTransformPieces"; + "FadeTransform" -> "FadeTransformPieces"; + "Flash"; + "AnimationGroup" -> "Flash"; + "FocusOn"; + "Transform" -> "FocusOn"; + "GrowArrow"; + "GrowFromPoint"; + "GrowFromPoint" -> "GrowArrow"; + "GrowFromCenter"; + "GrowFromPoint" -> "GrowFromCenter"; + "GrowFromEdge"; + "GrowFromPoint" -> "GrowFromEdge"; + "GrowFromPoint"; + "Transform" -> "GrowFromPoint"; + "Homotopy"; + "Animation" -> "Homotopy"; + "Indicate"; + "Transform" -> "Indicate"; + "LaggedStart"; + "AnimationGroup" -> "LaggedStart"; + "LaggedStartMap"; + "LaggedStart" -> "LaggedStartMap"; + "MaintainPositionRelativeTo"; + "Animation" -> "MaintainPositionRelativeTo"; + "Mobject"; + "MoveAlongPath"; + "Animation" -> "MoveAlongPath"; + "MoveToTarget"; + "Transform" -> "MoveToTarget"; + "PhaseFlow"; + "Animation" -> "PhaseFlow"; + "RemoveTextLetterByLetter"; + "AddTextLetterByLetter" -> "RemoveTextLetterByLetter"; + "ReplacementTransform"; + "Transform" -> "ReplacementTransform"; + "Restore"; + "ApplyMethod" -> "Restore"; + "Rotate"; + "Transform" -> "Rotate"; + "Rotating"; + "Animation" -> "Rotating"; + "ScaleInPlace"; + "ApplyMethod" -> "ScaleInPlace"; + "ShowIncreasingSubsets"; + "Animation" -> "ShowIncreasingSubsets"; + "ShowPartial"; + "Animation" -> "ShowPartial"; + "ShowPassingFlash"; + "ShowPartial" -> "ShowPassingFlash"; + "ShowPassingFlashWithThinningStrokeWidth"; + "AnimationGroup" -> "ShowPassingFlashWithThinningStrokeWidth"; + "ShowSubmobjectsOneByOne"; + "ShowIncreasingSubsets" -> "ShowSubmobjectsOneByOne"; + "ShrinkToCenter"; + "ScaleInPlace" -> "ShrinkToCenter"; + "SmoothedVectorizedHomotopy"; + "Homotopy" -> "SmoothedVectorizedHomotopy"; + "SpinInFromNothing"; + "GrowFromCenter" -> "SpinInFromNothing"; + "SpiralIn"; + "Animation" -> "SpiralIn"; + "Succession"; + "AnimationGroup" -> "Succession"; + "Swap"; + "CyclicReplace" -> "Swap"; + "TracedPath"; + "VMobject"; + "VMobject" -> "TracedPath"; + "Transform"; + "Animation" -> "Transform"; + "TransformAnimations"; + "Transform" -> "TransformAnimations"; + "TransformFromCopy"; + "Transform" -> "TransformFromCopy"; + "TransformMatchingAbstractBase"; + "AnimationGroup" -> "TransformMatchingAbstractBase"; + "TransformMatchingShapes"; + "TransformMatchingAbstractBase" -> "TransformMatchingShapes"; + "TransformMatchingTex"; + "TransformMatchingAbstractBase" -> "TransformMatchingTex"; + "Uncreate"; + "Create" -> "Uncreate"; + "Unwrite"; + "Write"; + "Write" -> "Unwrite"; + "UpdateFromAlphaFunc"; + "UpdateFromFunc"; + "UpdateFromFunc" -> "UpdateFromAlphaFunc"; + "UpdateFromFunc"; + "Animation" -> "UpdateFromFunc"; + "VGroup"; + "VMobject" -> "VGroup"; + "VMobject"; + "Mobject" -> "VMobject"; + + "Wait"; + "Animation" -> "Wait"; + "Wiggle"; + "Animation" -> "Wiggle"; + "Write"; + "DrawBorderThenFill" -> "Write"; +} +``` + + +``` +digraph Camera { + "BackgroundColoredVMobjectDisplayer" + "Camera" + "MappingCamera" + "Camera" -> "MappingCamera" + "MovingCamera" + "Camera" -> "MovingCamera" + "MultiCamera" + "MovingCamera" -> "MultiCamera" + "OldMultiCamera" + "Camera" -> "OldMultiCamera" + "SplitScreenCamera" + "OldMultiCamera" -> "SplitScreenCamera" + "ThreeDCamera" + "Camera" -> "ThreeDCamera" +} +``` + +``` +digraph MObject { + "AbstractImageMobject" + "Mobject" -> "AbstractImageMobject" + "Angle" + "VMobject" -> "Angle" + "AnnotationDot" + "Dot" -> "AnnotationDot" + "AnnularSector" + "Arc" -> "AnnularSector" + "Annulus" + "Circle" -> "Annulus" + "Arc" + "TipableVMobject" -> "Arc" + "ArcBetweenPoints" + "Arc" -> "ArcBetweenPoints" + "ArcBrace" + "Brace" -> "ArcBrace" + "ArcPolygon" + "VMobject" -> "ArcPolygon" + "ArcPolygonFromArcs" + "VMobject" -> "ArcPolygonFromArcs" + "Arrow" + "Line" -> "Arrow" + "Arrow3D" + "Line3D" -> "Arrow3D" + "ArrowCircleFilledTip" + "ArrowCircleTip" -> "ArrowCircleFilledTip" + "ArrowCircleTip" + "ArrowTip" -> "ArrowCircleTip" + "Circle" -> "ArrowCircleTip" + "ArrowSquareFilledTip" + "ArrowSquareTip" -> "ArrowSquareFilledTip" + "ArrowSquareTip" + "ArrowTip" -> "ArrowSquareTip" + "Square" -> "ArrowSquareTip" + "ArrowTip" + "VMobject" -> "ArrowTip" + "ArrowTriangleFilledTip" + "ArrowTriangleTip" -> "ArrowTriangleFilledTip" + "ArrowTriangleTip" + "ArrowTip" -> "ArrowTriangleTip" + "Triangle" -> "ArrowTriangleTip" + "ArrowVectorField" + "VectorField" -> "ArrowVectorField" + "Axes" + "VGroup" -> "Axes" + "CoordinateSystem" -> "Axes" + "BackgroundRectangle" + "SurroundingRectangle" -> "BackgroundRectangle" + "BarChart" + "Axes" -> "BarChart" + "Brace" + "svg_mobject.VMobjectFromSVGPath" -> "Brace" + "BraceBetweenPoints" + "Brace" -> "BraceBetweenPoints" + "BraceLabel" + "VMobject" -> "BraceLabel" + "BraceText" + "BraceLabel" -> "BraceText" + "BulletedList" + "Tex" -> "BulletedList" + "Circle" + "Arc" -> "Circle" + "Code" + "VGroup" -> "Code" + "ComplexPlane" + "NumberPlane" -> "ComplexPlane" + "ComplexValueTracker" + "ValueTracker" -> "ComplexValueTracker" + "Cone" + "Surface" -> "Cone" + "CoordinateSystem" + "Cross" + "VGroup" -> "Cross" + "Cube" + "VGroup" -> "Cube" + "CubicBezier" + "VMobject" -> "CubicBezier" + "CurvedArrow" + "ArcBetweenPoints" -> "CurvedArrow" + "CurvedDoubleArrow" + "CurvedArrow" -> "CurvedDoubleArrow" + "CurvesAsSubmobjects" + "VGroup" -> "CurvesAsSubmobjects" + "Cutout" + "VMobject" -> "Cutout" + "Cylinder" + "Surface" -> "Cylinder" + "DashedLine" + "Line" -> "DashedLine" + "DashedVMobject" + "VMobject" -> "DashedVMobject" + "DecimalMatrix" + "Matrix" -> "DecimalMatrix" + "DecimalNumber" + "VMobject" -> "DecimalNumber" + "DecimalTable" + "Table" -> "DecimalTable" + "DiGraph" + "GenericGraph" -> "DiGraph" + "Difference" + "Dodecahedron" + "Polyhedron" -> "Dodecahedron" + "Dot" + "Circle" -> "Dot" + "Dot3D" + "Sphere" -> "Dot3D" + "DoubleArrow" + "Arrow" -> "DoubleArrow" + "Elbow" + "VMobject" -> "Elbow" + "Ellipse" + "Circle" -> "Ellipse" + "Exclusion" + "FullScreenRectangle" + "ScreenRectangle" -> "FullScreenRectangle" + "FunctionGraph" + "ParametricFunction" -> "FunctionGraph" + "Generic" + "GenericGraph" + "Generic" -> "GenericGraph" + "Graph" + "GenericGraph" -> "Graph" + "Group" + "Mobject" -> "Group" + "Icosahedron" + "Polyhedron" -> "Icosahedron" + "ImageMobject" + "AbstractImageMobject" -> "ImageMobject" + "ImageMobjectFromCamera" + "AbstractImageMobject" -> "ImageMobjectFromCamera" + "ImplicitFunction" + "VMobject" -> "ImplicitFunction" + "Integer" + "DecimalNumber" -> "Integer" + "IntegerMatrix" + "Matrix" -> "IntegerMatrix" + "IntegerTable" + "Table" -> "IntegerTable" + "Intersection" + "LabeledDot" + "Dot" -> "LabeledDot" + "LayoutFunction" + "Protocol" -> "LayoutFunction" + "Line" + "TipableVMobject" -> "Line" + "Line3D" + "Cylinder" -> "Line3D" + "LinearBase" + "LogBase" + "ManimBanner" + "VGroup" -> "ManimBanner" + "MarkupText" + "svg_mobject.SVGMobject" -> "MarkupText" + "MathTable" + "Table" -> "MathTable" + "MathTex" + "SingleStringMathTex" -> "MathTex" + "Matrix" + "VMobject" -> "Matrix" + "Mobject" + "Mobject1D" + "PMobject" -> "Mobject1D" + "Mobject2D" + "PMobject" -> "Mobject2D" + "MobjectMatrix" + "Matrix" -> "MobjectMatrix" + "MobjectTable" + "Table" -> "MobjectTable" + "NumberLine" + "Line" -> "NumberLine" + "NumberPlane" + "Axes" -> "NumberPlane" + "Octahedron" + "Polyhedron" -> "Octahedron" + "PGroup" + "PMobject" -> "PGroup" + "PMobject" + "Mobject" -> "PMobject" + "Paragraph" + "VGroup" -> "Paragraph" + "ParametricFunction" + "VMobject" -> "ParametricFunction" + "Point" + "PMobject" -> "Point" + "PointCloudDot" + "Mobject1D" -> "PointCloudDot" + "PolarPlane" + "Axes" -> "PolarPlane" + "Polygon" + "Polygram" -> "Polygon" + "Polygram" + "VMobject" -> "Polygram" + "Polyhedron" + "VGroup" -> "Polyhedron" + "Prism" + "Cube" -> "Prism" + "Protocol" + "Generic" -> "Protocol" + "Rectangle" + "Polygon" -> "Rectangle" + "RegularPolygon" + "RegularPolygram" -> "RegularPolygon" + "RegularPolygram" + "Polygram" -> "RegularPolygram" + "RightAngle" + "Angle" -> "RightAngle" + "RoundedRectangle" + "Rectangle" -> "RoundedRectangle" + "SVGMobject" + "VMobject" -> "SVGMobject" + "SampleSpace" + "Rectangle" -> "SampleSpace" + "ScreenRectangle" + "Rectangle" -> "ScreenRectangle" + "Sector" + "AnnularSector" -> "Sector" + "SingleStringMathTex" + "svg_mobject.SVGMobject" -> "SingleStringMathTex" + "Sphere" + "Surface" -> "Sphere" + "Square" + "Rectangle" -> "Square" + "Star" + "Polygon" -> "Star" + "StealthTip" + "ArrowTip" -> "StealthTip" + "StreamLines" + "VectorField" -> "StreamLines" + "Surface" + "VGroup" -> "Surface" + "SurroundingRectangle" + "RoundedRectangle" -> "SurroundingRectangle" + "Table" + "VGroup" -> "Table" + "TangentLine" + "Line" -> "TangentLine" + "Tetrahedron" + "Polyhedron" -> "Tetrahedron" + "Tex" + "MathTex" -> "Tex" + "Text" + "svg_mobject.SVGMobject" -> "Text" + "ThreeDAxes" + "Axes" -> "ThreeDAxes" + "ThreeDVMobject" + "VMobject" -> "ThreeDVMobject" + "TipableVMobject" + "VMobject" -> "TipableVMobject" + "Title" + "Tex" -> "Title" + "Torus" + "Surface" -> "Torus" + "Triangle" + "RegularPolygon" -> "Triangle" + "Underline" + "Line" -> "Underline" + "Union" + "UnitInterval" + "NumberLine" -> "UnitInterval" + "VDict" + "VMobject" -> "VDict" + "VGroup" + "VMobject" -> "VGroup" + "VMobject" + "Mobject" -> "VMobject" + "VMobjectFromSVGPath" + "VMobject" -> "VMobjectFromSVGPath" + "ValueTracker" + "Mobject" -> "ValueTracker" + "Variable" + "VMobject" -> "Variable" + "Vector" + "Arrow" -> "Vector" + "VectorField" + "VGroup" -> "VectorField" + "VectorizedPoint" + "VMobject" -> "VectorizedPoint" +} +``` + +``` +digraph Scene { + "LinearTransformationScene" + "VectorScene" + "VectorScene" -> "LinearTransformationScene" + "MovingCameraScene" + "Scene" + "Scene" -> "MovingCameraScene" + "RerunSceneHandler" + "Scene" + "SceneFileWriter" + "SpecialThreeDScene" + "ThreeDScene" + "ThreeDScene" -> "SpecialThreeDScene" + "ThreeDScene" + "Scene" -> "ThreeDScene" + "VectorScene" + "Scene" -> "VectorScene" + "ZoomedScene" + "MovingCameraScene" -> "ZoomedScene" +} +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_rag_query_generation_code.txt b/task_generator/prompts_raw/prompt_rag_query_generation_code.txt new file mode 100644 index 0000000000000000000000000000000000000000..17a30d5d7dcc84febe2d6d919062fdcfcb39c0ad --- /dev/null +++ b/task_generator/prompts_raw/prompt_rag_query_generation_code.txt @@ -0,0 +1,25 @@ +You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a complete implementation plan for a Manim video scene into effective queries that will retrieve relevant information from Manim documentation. The implementation plan describes the scene's vision, storyboard, technical implementation, and animation/narration strategy. + +Here is the complete scene implementation plan: + +{implementation_plan} + +Based on the complete implementation plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least some queries are focused on retrieving information about **Manim function usage** in scenes. Frame these queries to target function definitions, usage examples, and parameter details within Manim documentation. +2. If the implementation suggests using plugin functionality, include at least 1 query specifically targeting **plugin documentation**. Clearly mention the plugin name in these queries to focus the search. +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant, and to target the most helpful sections of the documentation (API reference, tutorials, examples). + +The above implementation plans are relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of function usage query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of API reference query"}} + ... +] +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_rag_query_generation_fix_error.txt b/task_generator/prompts_raw/prompt_rag_query_generation_fix_error.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f9f564730b648adb43e99cfaf184a8b5fbd2049 --- /dev/null +++ b/task_generator/prompts_raw/prompt_rag_query_generation_fix_error.txt @@ -0,0 +1,27 @@ +You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a Manim error and its associated code into effective queries that will retrieve relevant information from Manim documentation. + +Here is the error message: +{error} + +Here is the Manim code that caused the error: +{code} + +Based on the error and code, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least some queries are focused on retrieving information about **Manim function usage** in scenes. Frame these queries to target function definitions, usage examples, and parameter details within Manim documentation. +2. If the error suggests using plugin functionality, include at least 1 query specifically targeting **plugin documentation**. Clearly mention the plugin name in these queries to focus the search. +3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant, and to target the most helpful sections of the documentation (API reference, tutorials, examples). + +The above error and code are relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of function usage query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of API reference query"}} + ... +] +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_rag_query_generation_narration.txt b/task_generator/prompts_raw/prompt_rag_query_generation_narration.txt new file mode 100644 index 0000000000000000000000000000000000000000..d1bf63c2e7cc45391f77b56ecd787e1612179886 --- /dev/null +++ b/task_generator/prompts_raw/prompt_rag_query_generation_narration.txt @@ -0,0 +1,25 @@ +You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a storyboard and generate effective queries that will retrieve relevant documentation about narration, text animations, and audio-visual synchronization. + +Here is the storyboard: + +{storyboard} + +Based on this storyboard, generate multiple human-like queries (maximum 10) for retrieving relevant documentation about narration and text animation techniques. + +**Specifically, ensure that:** +1. Queries focus on retrieving information about **text animations** and their properties +2. Include queries about **timing and synchronization** techniques +3. If the storyboard suggests using plugin functionality, include specific queries targeting those plugin's narration capabilities + +The above storyboard is relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of text animation query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of timing synchronization query"}} + ... +] +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_rag_query_generation_storyboard.txt b/task_generator/prompts_raw/prompt_rag_query_generation_storyboard.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f9375076a5f9294dc48fa9321696318db2f739b --- /dev/null +++ b/task_generator/prompts_raw/prompt_rag_query_generation_storyboard.txt @@ -0,0 +1,28 @@ +You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a storyboard plan for a Manim video scene into effective queries that will retrieve relevant information from Manim documentation. The storyboard plan describes the scene's visual elements and narrative flow. + +Here is the storyboard plan: + +{storyboard} + +Based on the storyboard plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation. + +**Specifically, ensure that:** +1. At least some queries are focused on retrieving information about **Manim core functionalities**, like general visual elements or animations. Frame these queries using Manim terminology (classes, methods, concepts). +2. If the storyboard suggests using specific visual effects or complex animations that might be plugin-related, include at least 1 query specifically targeting **plugin documentation**. Make sure to mention the plugin name if known or suspected. +3. Queries should be general enough to explore different possibilities within Manim and its plugins based on the storyboard's visual and narrative descriptions, but also specific enough to target Manim documentation effectively. + +The above storyboard might be relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +Output the queries in the following format: +```json +[ + {{"query": "content of query 1", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 2", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 3", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 4", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 5", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 6", "type": "manim_core/{relevant_plugins}"}}, + {{"query": "content of query 7", "type": "manim_core/{relevant_plugins}"}}, +] +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_rag_query_generation_technical.txt b/task_generator/prompts_raw/prompt_rag_query_generation_technical.txt new file mode 100644 index 0000000000000000000000000000000000000000..f793afbf2a6dda1e61af5df43d26c9b8537191e8 --- /dev/null +++ b/task_generator/prompts_raw/prompt_rag_query_generation_technical.txt @@ -0,0 +1,25 @@ +You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a storyboard plan and generate effective queries that will retrieve relevant technical documentation about implementation details. + +Here is the storyboard plan: + +{storyboard} + +Based on this storyboard plan, generate multiple human-like queries (maximum 10) for retrieving relevant technical documentation. + +**Specifically, ensure that:** +1. Queries focus on retrieving information about **core Manim functionality** and implementation details +2. Include queries about **complex animations and effects** described in the storyboard +3. If the storyboard suggests using plugin functionality, include specific queries targeting those plugin's technical documentation + +The above storyboard plan is relevant to these plugins: {relevant_plugins} +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of core functionality query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of animation technique query"}} + ... +] +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_rag_query_generation_vision_storyboard.txt b/task_generator/prompts_raw/prompt_rag_query_generation_vision_storyboard.txt new file mode 100644 index 0000000000000000000000000000000000000000..22232cccb467f4b6b1a8c9c282b02c67a76e3cce --- /dev/null +++ b/task_generator/prompts_raw/prompt_rag_query_generation_vision_storyboard.txt @@ -0,0 +1,27 @@ +You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a scene plan for a Manim animation and generate effective queries that will retrieve relevant documentation about visual elements and scene composition. + +Here is the scene plan: + +{scene_plan} + +Based on this scene plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation about visual elements and scene composition techniques. + +**Specifically, ensure that:** +1. Queries focus on retrieving information about **visual elements** like shapes, objects, and their properties +2. Include queries about **scene composition techniques** like layout, positioning, and grouping +3. If the scene plan suggests using plugin functionality, include specific queries targeting those plugin's visual capabilities +4. Queries should be high-level, aiming to discover what Manim features can be used, rather than focusing on low-level implementation details. + - For example, instead of "how to set the color of a circle", ask "what visual properties of shapes can I control in Manim?". + +The above scene plan is relevant to these plugins: {relevant_plugins}. +Note that you MUST NOT use the plugins that are not listed above. + +You MUST only output the queries in the following JSON format (with json triple backticks): +```json +[ + {{"type": "manim-core", "query": "content of visual element query"}}, + {{"type": "", "query": "content of plugin-specific query"}}, + {{"type": "manim-core", "query": "content of composition technique query"}} + ... +] +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_scene_animation_narration.txt b/task_generator/prompts_raw/prompt_scene_animation_narration.txt new file mode 100644 index 0000000000000000000000000000000000000000..567b4c406c1675102ba8403fb93841a48ef6286a --- /dev/null +++ b/task_generator/prompts_raw/prompt_scene_animation_narration.txt @@ -0,0 +1,94 @@ +You are an expert in educational video production and Manim animation, skilled in creating engaging and pedagogically effective learning experiences. +**Reminder:** This animation and narration plan is entirely self-contained; there is no dependency on any previous or subsequent scene implementations. However, the narration should flow smoothly as part of a larger, single video. + +Your task is to create a **detailed animation and narration plan for Scene {scene_number}**, ensuring it is not just visually appealing but also serves a clear educational purpose within the overall video topic. + +Remember, the narration should not simply describe what's happening visually, but rather **teach a concept step-by-step**, guiding the viewer to a deeper understanding. Animations should be spatially coherent, contribute to a clear visual flow, and strictly respect safe area margins (0.5 units) and minimum spacing (0.3 units). **Consider the scene number {scene_number} and the overall scene context to ensure smooth transitions and a logical flow within the larger video narrative.** + +Topic: {topic} +Description: {description} + +Scene Overview: +{scene_outline} + +Scene Vision and Storyboard: +{scene_vision_storyboard} + +Technical Implementation Plan: +{technical_implementation_plan} + +The following manim plugins are relevant to the scene: +{relevant_plugins} + +**Spatial Constraints (Strictly Enforced Throughout Animations):** +* **Safe area margins:** 0.5 units. *Maintain objects and VGroups within margins.* +* **Minimum spacing:** 0.3 units. *Ensure minimum spacing between all objects and VGroups.* + +**Animation Timing and Pacing Requirements:** +* Specify `run_time` for all animations. +* Use `Wait()` for transition buffers, specifying durations and **pedagogical purpose**. +* Coordinate animation timings with narration cues for synchronized pedagogical presentation. + +**Visual Flow and Pedagogical Clarity:** +* Ensure animations create a clear and logical visual flow, **optimized for learning and concept understanding.** +* Use animation pacing and transition buffers to visually separate ideas and **enhance pedagogical clarity.** +* Maintain spatial coherence for predictable and understandable animations, strictly adhering to spatial constraints. + +**Diagrams/Sketches (Optional but Highly Recommended for Complex Scenes):** +* For complex animations, include diagrams/sketches to visualize animation flow and object movements. This aids clarity and reduces errors. + +Your plan must demonstrate a strong understanding of pedagogical narration and how animations can be used to effectively teach concepts, while strictly adhering to spatial constraints and timing requirements. + +You MUST generate a **detailed and comprehensive** animation and narration plan for **Scene {scene_number}**, in the following format, similar to the example provided (from ```xml to ```): + +```xml + + +[ANIMATION_STRATEGY] +1. **Pedagogical Animation Plan:** Provide a detailed plan for all animations in the scene, explicitly focusing on how each animation contributes to **teaching the core concepts** of this scene. + - **Parent VGroup transitions (if applicable):** + - If VGroups are used, specify transitions (`Shift`, `Transform`, `FadeIn`, `FadeOut`) with `Animation` type, direction, magnitude, target VGroup, and `run_time`. + - **Explain the pedagogical rationale** for each VGroup transition. How does it guide the viewer's attention or contribute to understanding the scene's learning objectives? Ensure spatial coherence and respect for constraints. + - **Element animations within VGroups and for individual Mobjects:** + - Specify animation types (`Create`, `Write`, `FadeIn`, `Transform`, `Circumscribe`, `AnimationGroup`, `Succession`) for elements. + - For each element animation, specify `Animation` type, target object(s), and `run_time`. Detail sequences and timing for `AnimationGroup` or `Succession`. + - **Explain the pedagogical purpose** of each element animation. How does it break down complex information, highlight key details, or improve visual clarity for learning? Ensure spatial coherence and minimum spacing. + - **Coordinate element animations with VGroup transitions:** + - Clearly describe the synchronization between element animations and VGroup transitions (if any). + - Specify relative timing and `run_time` to illustrate coordination. + - **Explain how this animation sequence and coordination creates a pedagogical flow**, guiding the viewer's eye and attention logically through the learning material. + +2. **Scene Flow - Pedagogical Pacing and Clarity:** Detail the overall flow of the scene, emphasizing pedagogical effectiveness. + - **Overall animation sequence, spatial progression for learning:** + - Describe the complete animation sequence, broken down into pedagogical sub-sections (e.g., "Introducing the Problem", "Step-by-step Solution", "Concept Reinforcement"). + - Outline the spatial progression of objects and VGroups, focusing on how it supports the **pedagogical narrative** and concept development. + - Ensure a clear and logical visual flow optimized for learning, respecting spatial constraints. + - **Transition buffers for pedagogical pauses:** + - Specify `Wait()` times between animation sections for visual separation and **learner processing time**. + - For each `Wait()`, specify duration and **explain the pedagogical reason** for this buffer (e.g., "Allow viewers time to process the formula", "Create a pause for reflection before moving to the next concept"). + - **Coordinate animation timing with narration for engagement and comprehension:** + - Describe how animation timings are coordinated with the narration script to **maximize viewer engagement and comprehension**. + - Specify animation cues within the narration script and explain how these cues are synchronized with animations to **reinforce learning points** at the optimal moment. + +[NARRATION] +- **Pedagogical Narration Script:** + - Provide the full narration script for Scene {scene_number}. + - **Embed precise animation timing cues** within the narration script (as described before). + - **The script should be written as if delivered by a knowledgeable and engaging lecturer.** It should: + - **Clearly explain concepts step-by-step.** + - **Use analogies and real-world examples to enhance understanding.** + - **Pose questions to encourage active thinking.** + - **Summarize key points and transitions.** + - **Be detailed and knowledge-rich, not just visually descriptive.** + - **Connect smoothly with the previous and subsequent scenes, acting as a segment within a single, cohesive video. + - Avoid repetitive introductions or conclusions.** + - Consider using phrases like "Building on what we saw in the previous part..." or "Let's now move on to..." to create a sense of continuity. + - Reference the scene number when appropriate (e.g., "Now, let's explore..."). + - **Crucially, the narration should seamlessly integrate with the animations to create a cohesive and effective learning experience.** +- **Narration Sync - Pedagogical Alignment:** + - Detail the synchronization strategy between narration and animations, emphasizing **pedagogical alignment**. + - Explain how narration timing is aligned with animation start/end times to **guide viewer attention to key learning elements precisely when they animate.** + - Emphasize how narration cues and animation timings work together to **create a synchronized audiovisual presentation that maximizes learning and retention.** + + +``` diff --git a/task_generator/prompts_raw/prompt_scene_implementation.txt b/task_generator/prompts_raw/prompt_scene_implementation.txt new file mode 100644 index 0000000000000000000000000000000000000000..9a6dd3aecc827b426c3dcb2dc03ed95243d295a4 --- /dev/null +++ b/task_generator/prompts_raw/prompt_scene_implementation.txt @@ -0,0 +1,140 @@ +You are an expert in educational video production and Manim (Community Edition) animation development. Your task is to create a detailed implementation plan for Scene {scene_number}. + + +Topic: {topic} +Description: {description} + + + +Scene Overview: +{scene_outline} + + + + +[SCENE_VISION] +1. **Overall Narrative**: + - Describe the overall story or message of the scene. What is the key takeaway for the viewer? + - How does this scene fit into the larger narrative of the video? + - What is the desired emotional impact on the viewer? + +2. **Learning Objectives**: + - What specific knowledge or skills should the viewer gain from this scene? + - How will the visual elements and animations support these learning objectives? + - What are the key concepts that need to be emphasized? + +[STORYBOARD] +1. **Visual Flow**: + - Describe the sequence of visual elements and animations in the scene. + - Provide a rough sketch or description of the key visual moments. + - How will the scene transition between different ideas or concepts? + - What is the pacing of the scene? Are there moments of pause or rapid action? + +[TECHNICAL_IMPLEMENTATION] +1. **High-Level Components (VGroups)**: + - **Identify the main conceptual sections of the scene.** Think of this like outlining chapters in a story or sections in a presentation. + - **Define the purpose of each high-level component.** What should the viewer learn or understand from each section? + - **Describe how these components relate to each other and the overall scene flow.** How will you transition between these sections to create a cohesive narrative? + - **Provide a brief rationale for your choice of high-level components.** Why did you choose these specific sections? + +2. **VGroup Hierarchy**: + - **For each high-level component, define a parent VGroup.** This VGroup will act as a container for all elements within that section. + - **Break down each parent VGroup into nested VGroups for sub-components as needed.** Think about logical groupings of elements. + - **Specify the relative positioning of these VGroups within the scene using `next_to()`, `align_to()`, and `shift()` where possible.** How will the parent VGroups be arranged on the screen relative to each other? (e.g., stacked vertically, side-by-side, etc.) Prioritize relative positioning using the following references: + - `ORIGIN`: the center of the scene + - scene margins (e.g., corners, edges) + - other VGroups as references. + - **MUST NOT use absolute coordinates.** + - **Define the scale relationships between different levels of the VGroup hierarchy.** Will sub-VGroups inherit scale from parent VGroups? How will scaling be managed to maintain visual consistency? + - **Provide a brief rationale for your VGroup hierarchy.** Why did you choose this specific structure? + + For each VGroup level (from high-level down to sub-components): + - Name: [Descriptive name for the VGroup, e.g., "TitleSection", "ProblemStatementGroup", "Explanation1Group"] + - Purpose: [What is the purpose of this VGroup? What should the viewer learn or understand from this VGroup?] + - Contents: [List all child VGroups and individual elements (Text, MathTex, Shapes, etc.) that belong to this VGroup.] + - Positioning: + * Reference: [Specify what this VGroup is positioned relative to. Do not use absolute coordinates.] + * Alignment: [How is it aligned relative to the reference? Use `align_to()` with options like `UP`, `DOWN`, `LEFT`, `RIGHT`, `ORIGIN`, etc.] + * Spacing: [Describe any spacing considerations relative to sibling VGroups or elements within the parent. Use `buff` argument in `next_to()` or `arrange()`. Refer to the defined minimum spacing value.] + - Scale: [Specify the scale of this VGroup relative to its parent VGroup. Use relative scaling factors (e.g., 1.0 for same scale, 0.8 for smaller).] + - Rationale: [Explain the reasoning behind the structure and organization of this VGroup. Why did you group these elements together?] + +3. **Element Specification**: + For each individual element (Text, MathTex, Shapes, etc.) within a VGroup: + - Name: [Descriptive name for the element, e.g., "ProblemTitleText", "Equation1", "HighlightCircle"] + - Type: [Manim object type. Examples: Text, MathTex, Circle, Rectangle, Arrow, Line, etc.] + - Parent VGroup: [Specify the VGroup this element belongs to. This establishes the hierarchical relationship.] + - Positioning: + * Reference: [Specify what this element is positioned relative to. Use its parent VGroup, other elements, `ORIGIN`, or scene margins as references. Do not use absolute coordinates.] + * Alignment: [How is it aligned within its parent VGroup? Use `align_to()` or `next_to()` with appropriate directions, e.g. `UP`, `DOWN`, `LEFT`, `RIGHT`, `ORIGIN`, `UL`, `UR`, `DL`, `DR`] + * Spacing: [If applicable, describe spacing relative to other elements using `buff` in `next_to()`. Refer to the defined minimum spacing value.] + - Style Properties: + * Color: [Hex code or named color (e.g., "RED", "BLUE"). Use hex codes for specific colors. e.g., #FF0000 for red] + * Opacity: [Value between 0 and 1. 1 for fully opaque, 0 for fully transparent.] + * Stroke Width: [Specify stroke width using levels: `thin`, `medium`, or `thick`.] + * Font: [Font family name, if applicable.] + * Font Size: [Specify font size using levels: `heading1`, `heading2`, `heading3`, `heading4`, `heading5`, `heading6`, or `body`. Refer to the defined font size levels.] + * Fill Color: [Hex code for fill color, if applicable.] + * ... [Include any other relevant style properties] + - Z-Index: [Integer value for layering order within the VGroup. Higher values are on top.] + - Required Imports: [List specific Manim classes that need to be imported to create this element. e.g., `from manim import Text, Circle`] + +[ANIMATION_STRATEGY] +1. **VGroup Transitions**: + - **Define how parent VGroups will transition onto and off of the scene, and between different sections.** Describe the movement patterns for these high-level groups. Examples: 'Slide in from left', 'Fade in and scale up', 'Move to top of screen'. + - **Specify the timing and coordination of VGroup transitions.** How long will each transition take? Will transitions overlap or be sequential? + - **Describe any transformation sequences applied to VGroups during transitions.** Will VGroups rotate, scale, or change shape during transitions? + +2. **Element Animations**: + - **Define the animations for individual elements within each VGroup.** What animations will bring each element to life? Examples: 'Write in text', 'Draw a circle', 'Highlight an equation', 'Fade in an image'. + - **Group related element animations using Manim's animation grouping features (e.g., `AnimationGroup`, `Succession`).** Explain how these groups will be used to create cohesive animation sequences. + - **Coordinate element animations with parent VGroup movements and transitions.** Ensure element animations are synchronized with the overall scene flow. + - **Specify the timing of element animations relative to VGroup transitions and other element animations.** Create a timeline or sequence of animations. + +3. **Scene Flow**: + - **Describe the overall animation sequence for the entire scene.** Outline the order in which VGroups and elements will be animated. + - **Specify transition buffers or pauses between major sections of the scene.** How much time will be left between animations for the viewer to process information? + - **Consider how the animation timing will coordinate with the narration (if narration is planned).** Animations should complement and reinforce the spoken content. + +[NARRATION] +- **Narration Script:** [Provide the full script for the narration, including timing cues or markers for when specific animations should occur. The script should be clear, detailed, and engaging, and should align with the visual elements and animations.] +- **Narration Sync:** [Describe how the narration should be synchronized with the animations. Specify how timing cues in the narration script will be used to trigger animations. Are there specific points where the narration and animations should be perfectly synchronized? Explain how you will achieve this synchronization.] + +[VIEWER_EXPERIENCE] +1. **Cognitive Load**: + - How will you manage the amount of information presented at any given time? + - Are there any complex concepts that need to be broken down into smaller steps? + - How will you use visual cues to guide the viewer's attention? + +2. **Pacing**: + - Is the pacing of the scene appropriate for the content? + - Are there moments where the viewer needs time to pause and reflect? + - How will you use animation timing to control the pace of the scene? + +3. **Accessibility**: + - How will you ensure that the scene is accessible to viewers with different needs? + - Are there any specific considerations for color contrast or text readability? + +[TECHNICAL_CHECKS] +- **VGroup boundary validation:** Ensure all elements are contained within their intended VGroup boundaries and are not overflowing unexpectedly. +- **Hierarchy scale consistency:** Verify that scaling is applied consistently throughout the VGroup hierarchy and that text and elements remain readable at all scales. +- **Animation coordination between levels:** Check that animations at different VGroup levels are coordinated and do not clash or look disjointed. +- **Performance optimization for nested groups:** Consider the performance implications of deeply nested VGroups and optimize structure and animations for smooth playback. +- **Text readability:** Ensure all text elements are legible in terms of size, color contrast, and positioning. +- **Color contrast:** Verify sufficient color contrast between text and background, and between different visual elements for accessibility. +- **Animation smoothness:** Check for any jerky or abrupt animations and refine timing and easing for smoother transitions. + + + +Requirements: +1. All elements must stay within safe area margins +2. Maintain minimum spacing between objects: [value] (This value is defined in the project settings) +3. Use relative positioning when possible, leveraging `next_to()`, `align_to()`, and `shift()`. Only reference positions relative to `ORIGIN`, scene margins, or other object reference points. Do not use absolute coordinates. +4. Include transition buffers between animations +5. Specify z-index for overlapping elements +6. All colors must use hex codes or named colors +7. Define scale relative to base unit +8. No external dependencies +9. Currently, there are no images or other assets available locally or remotely for you to use in the scene. Only include elements that can be generated through manim. +10. **Do not generate any code in this plan, except for illustrative examples where necessary. This plan is for outlining the scene and should not include any python code.** +11. **The purpose of this plan is to be a detailed guide for a human to implement the scene in manim.** \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_scene_plan.txt b/task_generator/prompts_raw/prompt_scene_plan.txt new file mode 100644 index 0000000000000000000000000000000000000000..45a36e6b0d3f35636eb7eee44f8eb3932a30c867 --- /dev/null +++ b/task_generator/prompts_raw/prompt_scene_plan.txt @@ -0,0 +1,49 @@ +You are an expert in educational video production, instructional design, and {topic}. Please design a high-quality video to provide in-depth explanation on {topic}. + +**Video Overview:** + +Topic: {topic} +Description: {description} + +**Scene Breakdown:** + +Plan individual scenes. For each scene please provide the following: + +* **Scene Title:** Short, descriptive title (2-5 words). +* **Scene Purpose:** Learning objective of this scene. How does it connect to previous scenes? +* **Scene Description:** Detailed description of scene content. +* **Scene Layout:** Detailedly describe the spatial layout concept. Consider safe area margins and minimum spacing between objects. + +Please generate the scene plan for the video in the following format: + +```xml + + + Scene Title: [Title] + Scene Purpose: [Learning objective, connection to previous scene] + Scene Description: [Brief content description] + Scene Layout: [Spatial layout concept, consider safe area and spacing] + + + + ... + +... + +``` + +**Spatial Constraints:** +* **Safe area margins:** 0.5 units on all sides from the scene edges. *All objects must be positioned within these margins.* +* **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). *Ensure adequate spacing to prevent overlaps and maintain visual clarity.* + +Requirements: +1. Scenes must build progressively, starting from foundational concepts and advancing to more complex ideas to ensure a logical flow of understanding for the viewer. Each scene should naturally follow from the previous one, creating a cohesive learning narrative. Start with simpler scene layouts and progressively increase complexity in later scenes. +2. The total number of scenes should be between 3 and 7. +3. Learning objectives should be distributed evenly across the scenes. +4. The total video duration must be under 15 minutes. +5. It is essential to use the exact output format, tags, and headers as specified in the prompt. +6. Maintain consistent formatting throughout the entire scene plan. +7. **No External Assets:** Do not import any external files (images, audio, video). *Use only Manim built-in elements and procedural generation. +8. **Focus on in-depth explanation of the theorem. Do not include any promotional elements (like YouTube channel promotion, subscribe messages, or external resources) or quiz sessions. Detailed example questions are acceptable and encouraged.** + +Note: High-level plan. Detailed scene specifications will be generated later, ensuring adherence to safe area margins and minimum spacing. The spatial constraints defined above will be strictly enforced in subsequent planning stages. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_scene_technical_implementation.txt b/task_generator/prompts_raw/prompt_scene_technical_implementation.txt new file mode 100644 index 0000000000000000000000000000000000000000..5d8aa99e5d7a0e977e3d4e38ab1b5f6e3f704d8f --- /dev/null +++ b/task_generator/prompts_raw/prompt_scene_technical_implementation.txt @@ -0,0 +1,97 @@ +You are an expert in educational video production and Manim (Community Edition), adept at translating pedagogical narration plans into robust and spatially accurate Manim code. +**Reminder:** This technical implementation plan is fully self-contained. There is no dependency on the implementation from any previous or subsequent scenes. + +Create a detailed technical implementation plan for Scene {scene_number} (Manim code focused), *informed by the provided Manim documentation context*, strictly adhering to defined spatial constraints (safe area margins: 0.5 units, minimum spacing: 0.3 units), and **addressing potential text bounding box overflow issues**. + +Topic: {topic} +Description: {description} + +Scene Overview: +{scene_outline} + +Scene Vision and Storyboard: +{scene_vision_storyboard} + +The following manim plugins are relevant to the scene: +{relevant_plugins} + +**Spatial Constraints (Strictly Enforced):** +* **Safe area margins:** 0.5 units on all sides from the scene edges. All objects must be positioned within these margins. +* **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). This prevents overlaps and maintains visual clarity. + +**Positioning Requirements:** +1. All positioning MUST be relative (`next_to`, `align_to`, `shift`) from ORIGIN, safe margins, or other objects. **No absolute coordinates are allowed.** +2. Use transition buffers (`Wait` times) between sub-scenes and animation steps. + +**Diagrams/Sketches (Highly Recommended):** +* Include diagrams/sketches (even text-based) for complex layouts to visualize spatial relationships, improve clarity, and reduce spatial errors. + +**Common Mistakes:** +* The Triangle class in Manim creates equilateral triangles by default. To create a right-angled triangle, use the Polygon class instead. + +**Manim Plugins:** +* You may use established, well-documented Manim plugins if they offer significant advantages in terms of code clarity, efficiency, or functionality not readily available in core Manim. +* **If a plugin is used:** + * Clearly state the plugin name and version (if applicable). + * Provide a brief justification for using the plugin (e.g., "Using `manim-plugin-name` for its advanced graph layout capabilities"). + * Ensure all plugin usage adheres to the plugin's documentation. + * Include a comment in the plan: `### Plugin: - `. + +**Focus:** +* Creating *pedagogically sound and spatially correct Manim code*. +* Detailed technical descriptions, referencing Manim documentation. +* Strict adherence to spatial constraints and relative positioning. + +You MUST generate the technical implementation plan for the scene in the following format (from ```xml to ```): + +```xml + +0. **Dependencies**: + - **Manim API Version**: Target the latest stable Manim release, using only documented API elements. + - **Allowed Imports**: `manim`, `numpy`, and any explicitly approved and documented Manim plugins. No external assets (e.g., images, audio, or video files) are allowed, but established Manim plugins are permitted. + +1. **Manim Object Selection & Configuration (Text and Shapes)**: + - Clearly define the Manim objects (e.g., `Tex`, `MathTex`, `Circle`, `Line`, etc.) used to construct the scene. Also include any objects provided by used plugins. + - Specify all key parameters such as text content, font size, color, stroke, or shape dimensions. + - **Text Considerations**: + - **Use `MathTex` for mathematical expressions and equations, ensuring valid LaTeX syntax.** For example: `MathTex("x^2 + y^2 = r^2")`. + - **Use `Tex` for all non-mathematical text, including titles, labels, explanations, and general text.** For example: `Tex("This is a circle")`. + - **If you need to include regular text *within* a `MathTex` environment (e.g., for explanations alongside a formula), use the `\\text{{}}` command.** For example: `MathTex(r"\\text{{Area of circle}} = \\pi r^2")`. + - **Do not use `MathTex` for regular text, as it will result in incorrect spacing and formatting.** + - **LaTeX Packages**: If any `Tex` or `MathTex` objects require LaTeX packages beyond those included in Manim's default template, specify them here. For example: "Requires: `\\usepackage{{amssymb}}`". Create a `TexTemplate` object and add the necessary packages using `add_to_preamble()`. + - **Font Size Recommendations**: + - If there is title text, font size is highly recommended to be 28. + - If there are side labels or formulas, font size is highly recommended to be 24. + - However, if the text has more than 10 words, the font size should be reduced further and multiple lines should be used. + - Confirm all objects begin within the safe area (0.5 units from all edges) and maintain at least 0.3 units spacing to avoid overlaps. + +2. **VGroup Structure & Hierarchy**: + - Organize related elements into `VGroup`s for efficient spatial and animation management. If a plugin provides a specialized group-like object, consider using it. + - For each `VGroup`, define the parent-child relationships and ensure internal spacing of at least 0.3 units. + - Clearly document the purpose for each grouping (e.g., "formula_group" for mathematical expressions). + +3. **Spatial Positioning Strategy**: + - Mandate the exclusive use of relative positioning methods (`next_to`, `align_to`, `shift`), based on ORIGIN, safe margins, or other objects. + - For every object, specify: + - The reference object (or safe edge) used for positioning. + - The specific method (and direction/aligned edge) along with a `buff` value (minimum 0.3 units). + - Outline the layout in sequential stages, inserting visual checkpoints to verify that every element continues to respect safe margins and spacing. + - Highlight measures to safeguard text bounding boxes, especially for multi-line text. + - Reference the font size recommendations under "Text Considerations" to ensure appropriate sizing and prevent overflow. + +4. **Animation Methods & Object Lifecycle Management**: + - Define clear animation sequences using documented methods such as `Create`, `Write`, `FadeIn`, `Transform`, and corresponding removal animations (`FadeOut`, `Uncreate`). Include animation methods from plugins if they are used. + - For each animation, specify parameters like `run_time`, `lag_ratio`, and the use of `Wait()` for transition buffers. + - Ensure every object's appearance and removal is managed to prevent clutter and maintain scene clarity. + +5. **Code Structure & Reusability**: + - Propose modular functions for creating and animating common objects to promote code reusability. + - Organize the overall code structure into logical sections: dependencies, object definitions, individual layout stages, and the main `construct` method. + - Include inline comments to document the rationale for configuration choices, referencing the Manim Documentation *and the plugin documentation where applicable*. + +***Mandatory Safety Checks***: + - **Safe Area Enforcement**: All objects, including text bounding boxes, must remain within 0.5 unit margins. + - **Minimum Spacing Validation**: Confirm a minimum of 0.3 units spacing between every pair of objects. + - **Transition Buffers**: Use explicit `Wait()` calls to separate animation steps and sub-scenes. + +``` diff --git a/task_generator/prompts_raw/prompt_scene_vision_storyboard.txt b/task_generator/prompts_raw/prompt_scene_vision_storyboard.txt new file mode 100644 index 0000000000000000000000000000000000000000..f35d1e324bce182e495b53a0c98fe184d74d15a0 --- /dev/null +++ b/task_generator/prompts_raw/prompt_scene_vision_storyboard.txt @@ -0,0 +1,71 @@ +You are an expert in educational video production and Manim animation. +**Reminder:** Each scene's vision and storyboard plan is entirely self-contained. There is no dependency on any implementation from previous or subsequent scenes. However, the narration will treat all scenes as part of a single, continuous video. + +Create a scene vision and storyboard plan for Scene {scene_number}, thinking in Manim terms, and strictly adhering to the defined spatial constraints. + +Topic: {topic} +Description: {description} + +Scene Overview: +{scene_outline} + +The following manim plugins are relevant to the scene: +{relevant_plugins} + +**Spatial Constraints (Strictly Enforced):** +* **Safe area margins:** 0.5 units on all sides from the scene edges. *All objects must be positioned within these margins.* +* **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). *Ensure a minimum spacing of 0.3 units to prevent overlaps and maintain visual clarity. This spacing must be maintained between all objects in the scene, including text, shapes, and graphs.* + +**Positioning Requirements:** +1. Safe area margins (0.5 units). +2. Minimum spacing between objects (0.3 units). +3. Relative positioning (`next_to`, `align_to`, `shift`) from `ORIGIN`, margins, or object references. **No absolute coordinates are allowed.** All positioning MUST be relative and clearly specified using reference points and relative positioning methods. +4. Transition buffers (`Wait` times) between sub-scenes and animation steps for visual clarity and pacing. + +**Diagrams/Sketches (Optional but Recommended for Complex Scenes):** +* For complex scenes, consider including a simple diagram or sketch (even text-based) of the intended layout to visually clarify spatial relationships and ensure adherence to spacing and margin constraints. + +**Focus:** +* Focus on clear visual communication of the scene's learning objective through effective use of Manim objects and animations, while strictly adhering to the defined spatial constraints. +* Provide detailed visual descriptions in Manim terms to guide human implementation. +* Prioritize explanation and visualization of the theorem. Do not include any promotional elements or quiz sessions. +* Minimize text usage - rely primarily on visual elements, mathematical notation, and animations to convey concepts. Use text sparingly and only when necessary for clarity. + +**Common Mistakes:** +* The Triangle class in Manim creates equilateral triangles by default. To create a right-angled triangle, use the Polygon class instead. + +**Manim Plugins:** +* Consider using established Manim plugins if they significantly simplify the implementation or offer visual elements not readily available in core Manim. If a plugin is used, clearly indicate this in the storyboard with a note like "**Plugin Suggestion:** Consider using the `manim-plugin-name` plugin for [brief explanation of benefit]." + +You MUST generate the scene vision and storyboard plan for the scene in the following format (from ```xml to ```): + +```xml + +[SCENE_VISION] +1. **Scene Overview**: + - Scene story, key takeaway, video role. *Consider how this scene fits within the overall video narrative.* + - **Visual learning objectives for viewers:** Think about *specific Manim object types* that best represent the learning objective. Example: "Visualize roots as `Dot` objects on an `Axes` graph." Be specific about Manim object classes (e.g., `MathTex`, `Shapes`, `Graphs`, `Axes`, `VGroup`). If a plugin provides a relevant object type, mention it (e.g., "Visualize X using `PluginObject` from `manim-plugin-name`"). + - How Manim visuals & animations support learning? Consider `MathTex`, `Shapes`, `Graphs`, `Axes`, `VGroup`. Focus on spatial arrangement and clarity, ensuring adherence to safe area margins and minimum spacing (0.3 units). Consider using `VGroup` to group related formula components for easier animation and spatial control. Example: "Use `VGroup` to group related formula components for easier animation and spatial control, ensuring a minimum spacing of 0.3 units between VGroup and other scene elements." If a plugin offers a more efficient way to achieve a visual effect, mention it. + - Key concepts to emphasize visually using visual hierarchy and spatial arrangement in Manim, while respecting safe area margins and minimum spacing (0.3 units). **Use `MathTex` for mathematical expressions and equations. Use `Tex` for general text, titles, labels, and any non-mathematical text. When mixing text with mathematical symbols in `MathTex`, use the `\\text{{}}` command (e.g., `MathTex(r"\\text{{Area}} = \\pi r^2")`)** + +[STORYBOARD] +1. **Visual Flow & Pacing (Manim Animation Sequence)**: + - Describe the sequence of Manim visuals and animations (`Text`, `Circle`, `Arrow`, `Create`, `FadeIn`, `Transform`, etc.). Be specific about animation types and their parameters (e.g., `run_time`). If a plugin provides a specific animation type, mention it (e.g., "Use `PluginAnimation` from `manim-plugin-name`"). + - Key visual moments: composition and arrangement of Manim elements, ensuring all elements are within safe area margins and maintain a minimum 0.3 unit spacing. Example: "`MathTex` formula center (`.move_to(ORIGIN)`) with `Write` animation, ensuring 0.3 unit spacing from scene edges and other elements." + - Visual transitions between ideas using Manim animations (`Transform`, `Shift`, `FadeOutAndShift`, etc.). Specify transition animations and their timings. + - Scene pacing (pauses, action) and Manim animation timing's role. Use `Wait()` for transition buffers and visual clarity. + - **Sub-scene Breakdown**: Divide the scene into logical sub-scenes, each focusing on a specific step in the explanation or visualization. + - For each sub-scene, start with a **Visual Element**: The primary visual component that drives the explanation (e.g., mathematical notation, diagram, graph). If this element comes from a plugin, clearly state this (e.g., "Visual Element: `PluginObject` from `manim-plugin-name`"). + - Detail the **Animation Sequence**: Describe step-by-step the Manim animations and visual elements for each sub-scene. Be specific about: + - **Text Usage Guidelines:** + - **Use `MathTex` *only* for mathematical expressions and equations.** + - **Use `Tex` for all other text, including labels, explanations, and titles.** + - **When mixing text with mathematical symbols in `MathTex`, wrap the text portions in `\\text{{}}`. Example: `MathTex(r"\\text{{Area of circle}} = \\pi r^2")`.** + - Manim object classes (`MathTex`, `Circle`, `Arrow`, `Axes`, `Plot`, `Line`, `VGroup`, etc.), prioritizing mathematical notation and visual elements over text. Include plugin object classes where appropriate. + - Animation types (`Create`, `Write`, `FadeIn`, `Transform`, `FadeOut`, `Circumscribe`, `FocusOn`, etc.) and their parameters (e.g., `run_time`). Include plugin animation types where appropriate. + - Positioning of objects using relative positioning methods (`.next_to()`, `.align_to()`, `.shift()`, `.to_corner()`, `.move_to(ORIGIN)`, etc.) and references to other objects or scene elements. **No absolute coordinates allowed.** + - Color and style specifications (e.g., `color=BLUE`, `stroke_width=2`, `dashed=True`). + - Explicitly mention safe area margins and minimum spacing (0.3 units) for all objects within each sub-scene. + + +``` \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_teaching_framework.txt b/task_generator/prompts_raw/prompt_teaching_framework.txt new file mode 100644 index 0000000000000000000000000000000000000000..7766c61523d6474f860f79618de49409bfbfb967 --- /dev/null +++ b/task_generator/prompts_raw/prompt_teaching_framework.txt @@ -0,0 +1,180 @@ +# Comprehensive Educational Video Content Framework + +## 1. Pre-Production Planning + +### A. Learning Objectives +- **Knowledge Level (Remember & Understand)** + Define specific, measurable learning outcomes that can be clearly assessed and evaluated. These outcomes should be concrete and observable, allowing instructors to verify that learning has occurred. Each outcome should be written using precise language that leaves no ambiguity about what constitutes success. For example, \"After watching this video, learners will be able to define and explain the concept of variables in programming\" provides a clear benchmark for assessment. + + Action verbs are essential tools for crafting effective learning objectives. Choose verbs like define, list, describe, explain, and identify that clearly indicate the expected cognitive processes. These verbs should align with Bloom's Taxonomy to ensure appropriate cognitive engagement. When applicable, ensure all objectives align with relevant curriculum standards to maintain educational consistency and meet institutional requirements. + +- **Comprehension Level (Analyze & Evaluate)** + Develop objectives that emphasize deeper understanding and connections between concepts. These objectives should go beyond simple recall to require analysis and evaluation of the material. Students should be able to make meaningful connections between different aspects of the content and explain their relationships. For example, \"Learners will be able to compare different data types and explain when to use each\" demonstrates this deeper level of understanding. + + Critical thinking elements should be deliberately incorporated into each objective. Create scenarios that challenge students to apply their knowledge in new contexts. These scenarios should require careful analysis and reasoned decision-making to solve problems effectively. Design learning experiences that encourage students to question assumptions and develop analytical skills. + +- **Application Level (Apply & Create)** + Develop practical skills that directly translate to real-world applications and scenarios. These objectives should focus on hands-on experience and tangible outcomes that demonstrate mastery. For example, \"Learners will be able to write a basic program using variables and proper naming conventions\" provides a clear, actionable goal that can be demonstrated through practical work. + + Include hands-on exercises that allow students to practice and refine their skills in a supported environment. These exercises should gradually increase in complexity to build confidence and competence. Provide real-world context by incorporating authentic scenarios and problems that students might encounter in their future careers or daily lives. This connection to reality helps maintain engagement and demonstrates the immediate value of the learning. + +- **Target Audience Analysis** + Conduct thorough demographic research to understand your learners' backgrounds, ages, and educational levels. This analysis should include assessment of prior knowledge and experience with the subject matter. Consider the technical capabilities of your audience, including their access to necessary tools and technologies. + + Evaluate different learning preferences and styles within your target audience. This understanding helps in designing varied content that appeals to visual, auditory, and kinesthetic learners. Consider cultural and linguistic factors that might impact learning effectiveness. Create content that is inclusive and accessible to learners from diverse backgrounds. Account for varying levels of technical proficiency and ensure your content can be accessed across different devices and platforms. + +### B. Content Structure + +- **Hook (5-10% of duration)** + Begin each video with a compelling problem or scenario that immediately captures attention and creates interest. This hook should be relevant to the content while being unexpected or intriguing enough to maintain viewer engagement. Use surprising facts or statistics that challenge common assumptions or demonstrate the importance of the topic. + + Share relevant real-world applications that demonstrate immediate value to the learner. For example, \"What if you could automate your daily tasks with just a few lines of code?\" creates immediate interest by connecting to practical benefits. The hook should create an emotional connection and generate curiosity about the upcoming content. Consider using storytelling elements or real-world problems that your audience can relate to. + +- **Context (10-15%)** + Provide clear explanations of how the content relates to real-world situations and problems. This context should help learners understand why the material is relevant to their lives or career goals. Make explicit connections to previous knowledge and experiences that learners can build upon. + + Address the fundamental question of \"Why should I learn this?\" by demonstrating practical applications and benefits. This explanation should be concrete and specific to your audience's needs and interests. Set clear expectations for learning outcomes so students understand what they will gain from the content. Provide a roadmap for the learning journey ahead, including how this content connects to future topics and skills. + +- **Core Content (60-70%)** + Organize material in a logical progression that builds from fundamental concepts to more complex applications. This progression should be carefully planned to avoid overwhelming learners while maintaining engagement. Include multiple examples that demonstrate concepts from different angles and perspectives. + + Use varied teaching methods to accommodate different learning styles and maintain interest. These methods might include demonstrations, animations, code examples, and interactive elements. Implement frequent knowledge checks throughout the content to ensure understanding and maintain engagement. Break complex topics into manageable chunks that can be easily processed and remembered. + +- **Practice/Application (10-15%)** + Create guided practice opportunities that allow learners to apply new knowledge in a supported environment. These practice sessions should include clear instructions and immediate feedback mechanisms. Design interactive elements that engage learners and require active participation rather than passive viewing. + + Develop problem-solving scenarios that challenge learners to apply concepts in realistic situations. These scenarios should gradually increase in complexity as learners gain confidence. Include opportunities for peer learning and collaboration when possible. Provide scaffolded support that can be gradually removed as learners become more proficient. + +- **Summary (5-10%)** + Conclude each video with a comprehensive recap of key points and main takeaways. This summary should reinforce the most important concepts and their practical applications. Preview upcoming topics to create anticipation and show how current learning connects to future content. + + Provide specific action items that learners can implement immediately to reinforce their learning. These should be concrete, achievable tasks that build confidence and competence. Share additional resources for further learning, including reference materials, practice exercises, and advanced topics. Create clear connections between the current content and future learning objectives. + +## 2. Instructional Design Elements + +### A. Cognitive Load Management + +- **Chunking Strategies** + Break complex content into manageable segments of 3-5 minutes each. These chunks should focus on single concepts or closely related ideas that form a coherent unit. Use clear transitions between segments to maintain flow while allowing for cognitive processing. + + Implement progressive complexity by building from basic concepts to more advanced applications. This progression should be carefully planned to avoid overwhelming learners. Include strategic pauses and processing time between segments to allow for reflection and integration of new information. Use visual and verbal cues to signal transitions between different concepts or levels of complexity. + +- **Visual Organization** + Develop a consistent visual hierarchy that guides learners through the content effectively. This hierarchy should use size, color, and placement to indicate the relative importance of different elements. Implement clean, uncluttered designs that minimize distractions and focus attention on key concepts. + + Apply color coding consistently to help learners identify and remember related concepts. This coding should be intentional and meaningful, not merely decorative. Use white space effectively to create visual breathing room and help separate different concepts. Ensure that visual elements support rather than compete with the learning objectives. + +- **Information Processing** + Carefully limit the introduction of new concepts to 5-7 per video to prevent cognitive overload. This limitation helps ensure that learners can effectively process and retain the information presented. Develop and use mnemonics and memory aids that help learners organize and remember key concepts. + + Provide visual anchors that learners can reference throughout the content. These anchors should help maintain context and show relationships between concepts. Include strategic review points that reinforce previous learning before introducing new material. Create clear connections between new information and existing knowledge to facilitate better retention. + +### B. Engagement Techniques + +- **Storytelling Elements** + Develop a clear narrative flow that carries learners through the content naturally. This narrative should have a beginning, middle, and end that maintains interest and supports learning objectives. Use character-driven examples that learners can relate to and remember. + + Include elements of conflict and resolution to create tension and maintain engagement. These elements should be relevant to the learning objectives and help illustrate key concepts. Maintain an emotional connection through relatable scenarios and authentic problems. Create story arcs that span multiple videos or modules to maintain long-term engagement. + +- **Visual Support** + Create relevant graphics and animations that enhance understanding of key concepts. These visual elements should be purposeful and directly support learning objectives, not merely decorative. Implement a consistent visual style across all content to maintain professionalism and reduce cognitive load. + + Develop clear infographics that break down complex concepts into understandable components. These should use visual hierarchy and design principles effectively. Use motion and animation thoughtfully to direct attention to important elements and demonstrate processes. Ensure all visual elements are accessible and effectively communicate their intended message. + +- **Interactive Components** + Design and embed quiz questions that check understanding at key points in the content. These questions should be strategically placed to maintain engagement and reinforce learning. Include deliberate pause points that encourage reflection and active processing of information. + + Create coding challenges or practical exercises that allow immediate application of concepts. These should be scaffolded appropriately for the learner's skill level. Provide multiple opportunities for feedback, both automated and instructor-guided when possible. Design interactive elements that encourage experimentation and learning from mistakes. + +## 3. Content Delivery Framework + +### A. Teaching Sequence + +1. **Activate** + Begin each learning session by connecting to familiar concepts that students already understand. This activation of prior knowledge creates a foundation for new learning and helps students feel confident. Use carefully chosen analogies and metaphors that bridge the gap between known and new concepts. These comparisons should be relevant to your audience's experience and background. + + Create explicit connections to previous learning modules or related concepts. These connections help students build a coherent mental model of the subject matter. Assess prior knowledge through quick activities or questions that reveal students' current understanding. Use this assessment to adjust your teaching approach and address any misconceptions early in the lesson. + +2. **Present** + Deliver clear, structured explanations of new concepts that build upon activated knowledge. These explanations should use precise language while remaining accessible to your target audience. Employ multiple representation methods, including verbal explanations, visual diagrams, and interactive demonstrations. This variety helps accommodate different learning styles and reinforces understanding. + + Provide step-by-step demonstrations that break complex processes into manageable parts. Each step should be clearly explained and connected to the overall objective. Include real-world examples that illustrate practical applications of the concepts. These examples should be relevant to your audience's interests and career goals. + +3. **Guide** + Develop worked examples that demonstrate expert problem-solving processes and thinking strategies. These examples should include explicit explanations of decision-making and common pitfalls to avoid. Share expert thinking processes by \"thinking aloud\" through problem-solving steps. This transparency helps students understand the metacognitive aspects of learning. + + Create scaffolded learning experiences that gradually reduce support as students gain confidence. Begin with highly structured guidance and progressively move toward independent work. Address common misconceptions and errors proactively, explaining why they occur and how to avoid them. Provide clear strategies for troubleshooting and problem-solving. + +4. **Practice** + Design guided exercises that allow students to apply new knowledge with appropriate support. These exercises should be carefully sequenced to build confidence and competence gradually. Include opportunities for independent practice that reinforce learning and build autonomy. Ensure these practice sessions are aligned with learning objectives and provide clear success criteria. + + Create peer learning opportunities that allow students to learn from and teach others. These interactions can reinforce understanding and develop communication skills. Implement immediate feedback mechanisms that help students understand their progress and areas for improvement. This feedback should be specific, constructive, and actionable. + +5. **Apply** + Develop real-world projects that require students to integrate and apply their learning in authentic contexts. These projects should be challenging but achievable, with clear connections to practical applications. Create case studies that illustrate complex scenarios and require critical thinking and problem-solving skills. These studies should reflect realistic situations students might encounter in their careers. + + Design problem-solving scenarios that encourage creative application of knowledge and skills. These scenarios should have multiple possible solutions to encourage innovative thinking. Provide opportunities for creative applications that allow students to extend their learning in personally meaningful ways. Support experimentation and risk-taking in a safe learning environment. + +### B. Presentation Techniques + +- **Transitions** + Implement clear verbal cues that signal shifts between concepts or activities. These cues help students maintain orientation and prepare for new information. Design visual transition elements that support cognitive processing and maintain engagement. These elements should be consistent throughout your content to establish familiar patterns. + + Create concept maps that show relationships between different topics and ideas. These maps help students understand how current learning connects to broader concepts. Use progress indicators that help students track their advancement through the material. These indicators should provide a sense of accomplishment and motivation. + +- **Multiple Representations** + Combine text and graphics effectively to convey information through multiple channels. This combination should be purposeful and coordinated to enhance understanding. Integrate audio and visual elements that complement each other and reinforce key concepts. Ensure these elements work together without creating cognitive overload. + + Develop interactive elements that encourage active engagement with the content. These elements should provide immediate feedback and support learning objectives. Include physical demonstrations when appropriate to illustrate concepts in tangible ways. These demonstrations should be clear, visible, and directly relevant to learning goals. + +## 4. Assessment Integration + +### A. Knowledge Verification +- **Formative Assessment** + Implement regular quick checks for understanding throughout the learning process. These checks should be low-stakes and provide immediate feedback to both learner and instructor. Design self-assessment prompts that encourage students to reflect on their own learning progress. These prompts should help students develop metacognitive skills and self-awareness. + + Create opportunities for peer discussion and feedback that deepen understanding through explanation and debate. These discussions should be structured to ensure productive exchanges and learning outcomes. Develop reflection questions that help students connect new learning to existing knowledge and future applications. These questions should promote deep thinking and personal connection to the material. + +- **Summative Assessment** + Design project-based assessments that evaluate comprehensive understanding and practical application. These projects should integrate multiple concepts and skills learned throughout the course. Guide students in developing portfolios that demonstrate their learning journey and achievements. These portfolios should include examples of both process and product. + + Create opportunities for skill demonstration that allow students to show mastery in authentic contexts. These demonstrations should reflect real-world applications and standards. Develop knowledge application assessments that require students to transfer learning to new situations. These assessments should evaluate both understanding and adaptability. + +### B. Learning Reinforcement +- **Review Strategies** + Implement spaced repetition techniques that optimize long-term retention of information. This approach should strategically revisit concepts at increasing intervals. Create concept mapping exercises that help students visualize and understand relationships between ideas. These maps should become increasingly complex as understanding develops. + + Guide students in knowledge synthesis activities that combine multiple concepts into coherent understanding. These activities should help students see the bigger picture and make meaningful connections. Design application scenarios that require students to apply knowledge in new and challenging contexts. These scenarios should build confidence and demonstrate practical relevance. + +## 5. Technical Considerations + +### A. Video Production Elements +- **Duration Guidelines** + Optimize video length to maintain engagement while effectively covering necessary content. The ideal duration of 6-12 minutes balances attention span with comprehensive coverage. Implement concept-based segmentation that breaks longer topics into digestible chunks. This segmentation should follow natural breaking points in the material. + + Consider attention span patterns when planning content structure and pacing. Include variety and interaction to maintain engagement throughout longer sessions. Adapt content length to platform-specific requirements and viewing habits. Consider mobile viewing habits and platform limitations in your planning. + +- **Quality Standards** + Ensure professional audio quality through proper equipment and recording techniques. This includes clear voice recording, minimal background noise, and appropriate volume levels. Maintain consistent lighting that enhances visibility and reduces viewer fatigue. Pay attention to both subject lighting and screen content visibility. + + Create clear visual presentations that effectively communicate key concepts. This includes appropriate font sizes, color contrast, and visual hierarchy. Maintain appropriate pacing that allows for processing time while maintaining engagement. Consider your audience's needs and learning objectives when determining pace. + +### B. Accessibility Features +- **Universal Design** + Create content that accommodates multiple learning modalities and preferences. This includes providing information through visual, auditory, and interactive channels. Ensure screen reader compatibility by following accessibility best practices and standards. This includes proper heading structure and alt text for images. + + Implement appropriate color contrast considerations for all visual elements. This ensures content is accessible to viewers with various visual abilities. Provide alternative text descriptions for all important images and graphics. These descriptions should convey the same information as the visual elements. + +## 6. Follow-up Resources + +### A. Supporting Materials +- **Resource Types** + Develop comprehensive practice exercises that reinforce learning and build confidence. These exercises should range from basic to advanced, accommodating different skill levels. Create well-documented code samples that demonstrate best practices and common patterns. These samples should include comments explaining key concepts and decisions. + + Compile detailed reference guides that support independent learning and problem-solving. These guides should be easily searchable and regularly updated. Design cheat sheets that provide quick access to essential information and common procedures. These should be concise while including all crucial information. + +### B. Implementation Guide +- **Learning Pathways** + Create clear prerequisite maps that show relationships between different topics and skills. This mapping helps students understand learning dependencies and plan their progress. Provide advanced topic suggestions that help motivated learners extend their knowledge. These suggestions should include resources and guidance for self-directed learning. + + Develop skill progression guides that show clear paths from beginner to advanced levels. These guides should include milestones and checkpoints for measuring progress. Suggest project ideas that allow practical application of learned skills. These projects should be scalable to different skill levels and interests. \ No newline at end of file diff --git a/task_generator/prompts_raw/prompt_visual_fix_error.txt b/task_generator/prompts_raw/prompt_visual_fix_error.txt new file mode 100644 index 0000000000000000000000000000000000000000..19024df9dca606f1f0b6fad2693cb3c7523a64c8 --- /dev/null +++ b/task_generator/prompts_raw/prompt_visual_fix_error.txt @@ -0,0 +1,20 @@ +You are an expert in Manim animations. Your task is to ensure that the rendered animation frame (image) aligns with the intended teaching content based on the provided implementation plan. + +Instructions: +Evaluate whether the object coordinates and positions in the image match the described plan and educational purpose. +The implementation plan serves as a reference, but your primary goal is to verify that the rendered animation frame supports effective teaching. +For example: +* If the object is supposed to be at the top of the screen, but it is at the bottom, you need to adjust the position. +* If the object is supposed to be at the left side but it is too far to the left, you need to adjust the position. +* If the two objects are not supposed to be overlapped but it is overlapped, you need to adjust the positions. + +If adjustments are needed, provide the complete code of the adjusted version. +If the current code is correct, return it as is. + +Manim Implementation Plan: +{implementation} + +Generated Code: +{generated_code} + +Return the complete code of the adjusted version if the code needs to be updated. If the code is correct, only return "" as output. diff --git a/task_generator/prompts_raw/prompt_visual_self_reflection.txt b/task_generator/prompts_raw/prompt_visual_self_reflection.txt new file mode 100644 index 0000000000000000000000000000000000000000..3832b3b452da79696b6dc2690af646f8104dc189 --- /dev/null +++ b/task_generator/prompts_raw/prompt_visual_self_reflection.txt @@ -0,0 +1,47 @@ +You are an expert in Manim animations and educational video quality assessment. Your task is to analyze a rendered Manim video and its corresponding audio narration to identify areas for visual and auditory improvement, ensuring alignment with the provided implementation plan and enhancing the video's teaching effectiveness. + +Please analyze the provided Manim video and listen to the accompanying audio narration. Conduct a thorough self-reflection focusing on the following aspects: + +**1. Visual Presentation and Clarity (Automated VLM Analysis & Expert Human-like Judgment):** + +* **Object Overlap:** Does the video exhibit any visual elements (text, shapes, equations, etc.) overlapping in a way that obscures information or makes the animation difficult to understand? If possible, Detect regions of significant overlap and highlight them in your reflection. +* **Out-of-Bounds Objects:** Are any objects positioned partially or entirely outside of the visible frame of the video? Identify and report objects that appear to be clipped or outside the frame boundaries. +* **Incorrect Object Positioning:** Based on your understanding of good visual design and the scene's educational purpose, are objects placed in positions that are illogical, distracting, or misaligned with their intended locations or relationships to other elements as described in the implementation plan? Consider: + * **Logical Flow:** Does the spatial arrangement support the intended visual flow and narrative progression of the scene? + * **Alignment and Balance:** Is the scene visually balanced? Are elements aligned in a way that is aesthetically pleasing and contributes to clarity, or does the layout appear haphazard or unbalanced? + * **Proximity and Grouping:** Are related elements positioned close enough to be visually grouped, and are unrelated elements sufficiently separated to avoid visual clutter? +* **General Visual Clarity & Effectiveness:** Consider broader aspects of visual communication. Are there any other issues that detract from the video's clarity, impact, or overall effectiveness? This could include: + * **Visual Clutter:** Is the scene too busy or visually overwhelming at any point? Are there too many elements on screen simultaneously? + * **Poor Spacing/Layout:** Is the spacing between elements inconsistent or inefficient, making the scene feel cramped or unbalanced? Are margins and padding used effectively? + * **Ineffective Use of Color:** Are color choices distracting, clashing, or not contributing to the animation's message? Are colors used consistently and purposefully to highlight key information? + * **Pacing Issues (Visual):** Is the visual animation too fast or too slow in certain sections, hindering comprehension? Are visual transitions smooth and well-timed? + * **Animation Clarity:** Are the animations themselves clear and helpful in conveying the intended information? Do animations effectively guide the viewer's eye and focus attention? + +**2. Narration Quality:** + +* **Narration Clarity and Pacing:** Is the narration clear, concise, and easy to understand? Is the pacing of the narration appropriate for the visual content and the target audience? Does the narration effectively support the visual explanations? +* **Narration Sync with Visuals:** Does the narration effectively synchronize with the on-screen visuals? Use VLM to analyze the video and identify instances where the narration is misaligned with the animations or visual elements it is describing. Report specific timings of misalignment. + +**3. Alignment with Implementation Plan:** + +* **Visual Fidelity:** Does the rendered video accurately reflect the visual elements and spatial arrangements described in the provided Manim Implementation Plan? Identify any deviations. +* **Animation Fidelity:** Do the animations in the video match the animation methods and sequences outlined in the Implementation Plan? Report any discrepancies. + +Manim Implementation Plan: +{implementation} + +Generated Code: +{generated_code} + +Output Format 1: +If any issues are identified in visual presentation, audio quality, narration, or plan alignment, please provide a detailed reflection on the issues and how to improve the video's visual and auditory quality, narration effectiveness, and code correctness. Then, you must return the updated Python code that directly addresses these issues. The code must be complete and executable. + + +[Detailed reflection on visual, auditory, narration, and plan alignment issues and improvement suggestions. Include specific timings for narration/visual sync issues and descriptions of object overlap/out-of-bounds problems if detected by VLM. Be specific about code changes needed for improvement.] + + +[Improved Python Code - Complete and Executable - Directly Addressing Reflection Points] + + +Output Format 2: +If no issues are found and the video and audio are deemed high quality, visually clear, narratively effective, and fully aligned with the implementation plan, please explicitly only return "" as output. \ No newline at end of file diff --git a/thumbnails/644fb913-8734-4ec4-96fa-5f1be8a191b2.jpg b/thumbnails/644fb913-8734-4ec4-96fa-5f1be8a191b2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1113f85754b4379abc578c488ac52bf1b8cd7a49 Binary files /dev/null and b/thumbnails/644fb913-8734-4ec4-96fa-5f1be8a191b2.jpg differ diff --git a/thumbnails/fb610799-54aa-4a07-8ead-2b566374b866.jpg b/thumbnails/fb610799-54aa-4a07-8ead-2b566374b866.jpg new file mode 100644 index 0000000000000000000000000000000000000000..15313ac78a95fb8b2200de2423aa709057a3a022 Binary files /dev/null and b/thumbnails/fb610799-54aa-4a07-8ead-2b566374b866.jpg differ