Spaces:

thanhkt
/

t2m

Running

App Files Files Community

thanhkt commited on 8 days ago

Commit

9b5ca29

verified ·

1 Parent(s): 8fb7841

Upload 75 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +95 -0
.env +26 -0
.env.example +26 -0
.gitattributes +1 -0
src/__init__.py +1 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/config/__init__.py +0 -0
src/config/__pycache__/__init__.cpython-312.pyc +0 -0
src/config/__pycache__/config.cpython-312.pyc +0 -0
src/config/config.py +20 -0
src/core/__init__.py +0 -0
src/core/__pycache__/__init__.cpython-312.pyc +0 -0
src/core/__pycache__/code_generator.cpython-312.pyc +0 -0
src/core/__pycache__/parse_video.cpython-312.pyc +0 -0
src/core/__pycache__/video_planner.cpython-312.pyc +0 -0
src/core/__pycache__/video_renderer.cpython-312.pyc +0 -0
src/core/code_generator.py +1045 -0
src/core/parse_video.py +227 -0
src/core/video_planner.py +670 -0
src/core/video_renderer.py +1048 -0
src/rag/__init__.py +0 -0
src/rag/__pycache__/__init__.cpython-312.pyc +0 -0
src/rag/__pycache__/rag_integration.cpython-312.pyc +0 -0
src/rag/__pycache__/vector_store.cpython-312.pyc +0 -0
src/rag/rag_integration.py +410 -0
src/rag/vector_store.py +465 -0
src/utils/__init__.py +0 -0
src/utils/__pycache__/__init__.cpython-312.pyc +0 -0
src/utils/__pycache__/kokoro_voiceover.cpython-312.pyc +0 -0
src/utils/__pycache__/utils.cpython-312.pyc +0 -0
src/utils/allowed_models.json +37 -0
src/utils/kokoro_voiceover.py +117 -0
src/utils/utils.py +132 -0
src/utils/visual_error_detection.py +336 -0
task_generator/__init__.py +297 -0
task_generator/__pycache__/__init__.cpython-312.pyc +0 -0
task_generator/parse_prompt.py +54 -0
task_generator/prompts_raw/__init__.py +0 -0
task_generator/prompts_raw/__pycache__/__init__.cpython-312.pyc +3 -0
task_generator/prompts_raw/banned_reasonings.txt +18 -0
task_generator/prompts_raw/code_background.txt +2 -0
task_generator/prompts_raw/code_color_cheatsheet.txt +23 -0
task_generator/prompts_raw/code_disable.txt +0 -0
task_generator/prompts_raw/code_font_size.txt +5 -0
task_generator/prompts_raw/code_limit.txt +4 -0
task_generator/prompts_raw/prompt_animation_fix_error.txt +50 -0
task_generator/prompts_raw/prompt_animation_rag_query_generation.txt +29 -0
task_generator/prompts_raw/prompt_animation_rag_query_generation_fix_error.txt +33 -0
task_generator/prompts_raw/prompt_animation_simple.txt +30 -0
task_generator/prompts_raw/prompt_best_practices.txt +16 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,95 @@

+# Git and version control
+.git
+.gitignore
+*.md
+!README.md
+# Python cache and virtual environments
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+tea_env/
+# IDE and editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Output directories (will be created in container)
+output/
+*.mp4
+*.srt
+*.wav
+# Image files (except those needed for the app)
+thumbnails/
+*.png
+*.jpg
+*.jpeg
+# Log files
+*.log
+gradio_app.log
+# Cache directories
+.cache/
+.pytest_cache/
+# Jupyter Notebook
+.ipynb_checkpoints
+# Temporary files
+tmp/
+temp/
+*.tmp
+Miniconda3-*.sh
+# Documentation that's not needed in container
+docs/
+# Test files
+test_*.py
+# Models will be downloaded in container, so exclude local ones
+# But keep the directory structure
+models/*.onnx
+models/*.bin
+```

.env ADDED Viewed

	@@ -0,0 +1,26 @@

+# OpenAI
+OPENAI_API_KEY=""
+# Azure OpenAI
+AZURE_API_KEY=""
+AZURE_API_BASE=""
+AZURE_API_VERSION=""
+OPENROUTER_API_KEY = "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"
+# Google Vertex AI
+VERTEXAI_PROJECT=""
+VERTEXAI_LOCATION=""
+GOOGLE_APPLICATION_CREDENTIALS=""
+GITHUB_API_KEY = "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"
+GITHUB_TOKEN = "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"
+OPENAI_API_KEY = "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"
+# Google Gemini
+GEMINI_API_KEY="AIzaSyBUCGQ_hDLAHQN-T1ycWBJV8SGfwusfEjg"
+...
+# Kokoro TTS Settings
+KOKORO_MODEL_PATH="models/kokoro-v0_19.onnx"
+KOKORO_VOICES_PATH="models/voices.bin"
+KOKORO_DEFAULT_VOICE="af"
+KOKORO_DEFAULT_SPEED="1.0"
+KOKORO_DEFAULT_LANG="en-us"

.env.example ADDED Viewed

	@@ -0,0 +1,26 @@

+# OpenAI
+OPENAI_API_KEY=""
+# Azure OpenAI
+AZURE_API_KEY=""
+AZURE_API_BASE=""
+AZURE_API_VERSION=""
+OPENROUTER_API_KEY = ""
+# Google Vertex AI
+VERTEXAI_PROJECT=""
+VERTEXAI_LOCATION=""
+GOOGLE_APPLICATION_CREDENTIALS=""
+GITHUB_API_KEY = ""
+GITHUB_TOKEN = ""
+OPENAI_API_KEY = ""
+# Google Gemini
+GEMINI_API_KEY=""
+...
+# Kokoro TTS Settings
+KOKORO_MODEL_PATH="models/kokoro-v0_19.onnx"
+KOKORO_VOICES_PATH="models/voices.bin"
+KOKORO_DEFAULT_VOICE="af"
+KOKORO_DEFAULT_SPEED="1.0"
+KOKORO_DEFAULT_LANG="en-us"

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+task_generator/prompts_raw/__pycache__/__init__.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This is essential for the release to work

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (147 Bytes). View file

src/config/__init__.py ADDED Viewed

File without changes

src/config/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (154 Bytes). View file

src/config/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (1.22 kB). View file

src/config/config.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+class Config:
+    OUTPUT_DIR = "output"
+    THEOREMS_PATH = os.path.join("data", "easy_20.json")
+    CONTEXT_LEARNING_PATH = "data/context_learning"
+    CHROMA_DB_PATH = "data/rag/chroma_db"
+    MANIM_DOCS_PATH = "data/rag/manim_docs"
+    EMBEDDING_MODEL = "hf:ibm-granite/granite-embedding-30m-english"
+    # Kokoro TTS configurations
+    KOKORO_MODEL_PATH = os.getenv('KOKORO_MODEL_PATH')
+    KOKORO_VOICES_PATH = os.getenv('KOKORO_VOICES_PATH')
+    KOKORO_DEFAULT_VOICE = os.getenv('KOKORO_DEFAULT_VOICE')
+    KOKORO_DEFAULT_SPEED = float(os.getenv('KOKORO_DEFAULT_SPEED', '1.0'))
+    KOKORO_DEFAULT_LANG = os.getenv('KOKORO_DEFAULT_LANG')

src/core/__init__.py ADDED Viewed

File without changes

src/core/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (152 Bytes). View file

src/core/__pycache__/code_generator.cpython-312.pyc ADDED Viewed

Binary file (40.5 kB). View file

src/core/__pycache__/parse_video.cpython-312.pyc ADDED Viewed

Binary file (10.5 kB). View file

src/core/__pycache__/video_planner.cpython-312.pyc ADDED Viewed

Binary file (30.2 kB). View file

src/core/__pycache__/video_renderer.cpython-312.pyc ADDED Viewed

Binary file (51.4 kB). View file

src/core/code_generator.py ADDED Viewed

	@@ -0,0 +1,1045 @@

+import os
+import re
+import json
+import logging
+import glob
+from pathlib import Path
+from typing import Union, List, Dict, Optional, Tuple, Any
+from PIL import Image
+from src.utils.utils import extract_json
+from mllm_tools.utils import _prepare_text_inputs, _extract_code, _prepare_text_image_inputs
+from mllm_tools.gemini import GeminiWrapper
+from mllm_tools.vertex_ai import VertexAIWrapper
+from task_generator import (
+    get_prompt_code_generation,
+    get_prompt_fix_error,
+    get_prompt_visual_fix_error,
+    get_banned_reasonings,
+    get_prompt_rag_query_generation_fix_error,
+    get_prompt_context_learning_code,
+    get_prompt_rag_query_generation_code
+)
+from task_generator.prompts_raw import (
+    _code_font_size,
+    _code_disable,
+    _code_limit,
+    _prompt_manim_cheatsheet
+)
+from src.rag.vector_store import RAGVectorStore
+# Configuration constants
+DEFAULT_MAX_RETRIES = 10
+DEFAULT_RAG_K_VALUE = 2
+CACHE_FILE_ENCODING = 'utf-8'
+CODE_PATTERN = r"```python(.*)```"
+JSON_PATTERN = r'```json(.*)```'
+# Set up logging
+logger = logging.getLogger(__name__)
+class CodeGenerator:
+    """A class for generating and managing Manim code with improved error handling and maintainability."""
+    def __init__(
+        self,
+        scene_model: Any,
+        helper_model: Any,
+        output_dir: str = "output",
+        print_response: bool = False,
+        use_rag: bool = False,
+        use_context_learning: bool = False,
+        context_learning_path: str = "data/context_learning",
+        chroma_db_path: str = "rag/chroma_db",
+        manim_docs_path: str = "rag/manim_docs",
+        embedding_model: str = "azure/text-embedding-3-large",
+        use_visual_fix_code: bool = False,
+        use_langfuse: bool = True,
+        session_id: Optional[str] = None
+    ) -> None:
+        """Initialize the CodeGenerator.
+        Args:
+            scene_model: The model used for scene generation
+            helper_model: The model used for helper tasks
+            output_dir (str, optional): Directory for output files. Defaults to "output".
+            print_response (bool, optional): Whether to print model responses. Defaults to False.
+            use_rag (bool, optional): Whether to use RAG. Defaults to False.
+            use_context_learning (bool, optional): Whether to use context learning. Defaults to False.
+            context_learning_path (str, optional): Path to context learning examples. Defaults to "data/context_learning".
+            chroma_db_path (str, optional): Path to ChromaDB. Defaults to "rag/chroma_db".
+            manim_docs_path (str, optional): Path to Manim docs. Defaults to "rag/manim_docs".
+            embedding_model (str, optional): Name of embedding model. Defaults to "azure/text-embedding-3-large".
+            use_visual_fix_code (bool, optional): Whether to use visual code fixing. Defaults to False.
+            use_langfuse (bool, optional): Whether to use Langfuse logging. Defaults to True.
+            session_id (str, optional): Session identifier. Defaults to None.
+        """
+        self.scene_model = scene_model
+        self.helper_model = helper_model
+        self.output_dir = Path(output_dir)
+        self.print_response = print_response
+        self.use_rag = use_rag
+        self.use_context_learning = use_context_learning
+        self.context_learning_path = Path(context_learning_path)
+        self.manim_docs_path = Path(manim_docs_path)
+        self.use_visual_fix_code = use_visual_fix_code
+        self.session_id = session_id
+        # Ensure output directory exists
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        # Load context examples and banned reasonings
+        self.context_examples = self._load_context_examples() if use_context_learning else None
+        self.banned_reasonings = self._load_banned_reasonings()
+        # Initialize RAG vector store if enabled
+        self.vector_store = self._initialize_vector_store(
+            chroma_db_path, embedding_model, use_langfuse
+        ) if use_rag else None
+        logger.info(f"CodeGenerator initialized with RAG: {use_rag}, Context Learning: {use_context_learning}")
+    def _load_banned_reasonings(self) -> List[str]:
+        """Load banned reasonings with error handling."""
+        try:
+            return get_banned_reasonings()
+        except Exception as e:
+            logger.warning(f"Failed to load banned reasonings: {e}")
+            return []
+    def _initialize_vector_store(self, chroma_db_path: str, embedding_model: str, use_langfuse: bool) -> Optional[RAGVectorStore]:
+        """Initialize RAG vector store with error handling."""
+        try:
+            return RAGVectorStore(
+                chroma_db_path=chroma_db_path,
+                manim_docs_path=str(self.manim_docs_path),
+                embedding_model=embedding_model,
+                session_id=self.session_id,
+                use_langfuse=use_langfuse
+            )
+        except Exception as e:
+            logger.error(f"Failed to initialize RAG vector store: {e}")
+            return None
+    def _load_context_examples(self) -> Optional[str]:
+        """Load all context learning examples from the specified directory.
+        Returns:
+            Optional[str]: Formatted context learning examples, or None if no examples found.
+        """
+        if not self.context_learning_path.exists():
+            logger.warning(f"Context learning path does not exist: {self.context_learning_path}")
+            return None
+        examples = []
+        pattern = str(self.context_learning_path / "**" / "*.py")
+        try:
+            for example_file in glob.glob(pattern, recursive=True):
+                example_path = Path(example_file)
+                try:
+                    with example_path.open('r', encoding=CACHE_FILE_ENCODING) as f:
+                        content = f.read()
+                        examples.append(f"# Example from {example_path.name}\n{content}\n")
+                except (IOError, UnicodeDecodeError) as e:
+                    logger.warning(f"Failed to read example file {example_file}: {e}")
+                    continue
+            if examples:
+                formatted_examples = get_prompt_context_learning_code(
+                    examples="\n".join(examples)
+                )
+                logger.info(f"Loaded {len(examples)} context learning examples")
+                return formatted_examples
+        except Exception as e:
+            logger.error(f"Error loading context examples: {e}")
+        return None
+    def _create_cache_directory(self, topic: str, scene_number: int, cache_type: str = "rag_cache") -> Path:
+        """Create and return cache directory path."""
+        sanitized_topic = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
+        cache_dir = self.output_dir / sanitized_topic / f"scene{scene_number}" / cache_type
+        cache_dir.mkdir(parents=True, exist_ok=True)
+        return cache_dir
+    def _load_cached_queries(self, cache_file: Path) -> Optional[List[str]]:
+        """Load cached queries from file with error handling."""
+        if not cache_file.exists():
+            return None
+        try:
+            with cache_file.open('r', encoding=CACHE_FILE_ENCODING) as f:
+                cached_queries = json.load(f)
+                logger.debug(f"Loaded cached queries from {cache_file}")
+                return cached_queries
+        except (json.JSONDecodeError, IOError) as e:
+            logger.warning(f"Failed to load cached queries from {cache_file}: {e}")
+            return None
+    def _save_queries_to_cache(self, queries: List[str], cache_file: Path) -> None:
+        """Save queries to cache file with error handling."""
+        try:
+            with cache_file.open('w', encoding=CACHE_FILE_ENCODING) as f:
+                json.dump(queries, f, indent=2)
+                logger.debug(f"Saved queries to cache: {cache_file}")
+        except (IOError, TypeError) as e:
+            logger.error(f"Failed to save queries to cache {cache_file}: {e}")
+    def _extract_json_from_response(self, response: str, error_context: str = "") -> List[str]:
+        """Extract and parse JSON from model response with improved error handling."""
+        # Try to extract JSON from code blocks first
+        json_match = re.search(JSON_PATTERN, response, re.DOTALL)
+        if json_match:
+            json_text = json_match.group(1).strip()
+        else:
+            # Fallback: clean the response and try direct parsing
+            json_text = response.replace("```json", "").replace("```", "").strip()
+        try:
+            return json.loads(json_text)
+        except json.JSONDecodeError as e:
+            logger.error(f"JSONDecodeError when parsing {error_context}: {e}")
+            logger.error(f"Response text was: {response[:500]}...")
+            return []
+    def _generate_rag_queries_code(
+        self,
+        implementation: str,
+        scene_trace_id: Optional[str] = None,
+        topic: Optional[str] = None,
+        scene_number: Optional[int] = None,
+        session_id: Optional[str] = None,
+        relevant_plugins: List[str] = None
+    ) -> List[str]:
+        """Generate RAG queries from the implementation plan.
+        Args:
+            implementation: The implementation plan text
+            scene_trace_id: Trace ID for the scene
+            topic: Topic of the scene
+            scene_number: Scene number
+            session_id: Session identifier
+            relevant_plugins: List of relevant plugins
+        Returns:
+            List of generated RAG queries
+        """
+        if relevant_plugins is None:
+            relevant_plugins = []
+        if not topic or scene_number is None:
+            logger.warning("Missing topic or scene_number for RAG query generation")
+            return []
+        # Setup cache
+        cache_dir = self._create_cache_directory(topic, scene_number)
+        cache_file = cache_dir / "rag_queries_code.json"
+        # Try to load from cache
+        cached_queries = self._load_cached_queries(cache_file)
+        if cached_queries is not None:
+            logger.info(f"Using cached RAG queries for {topic}_scene{scene_number}")
+            return cached_queries
+        # Generate new queries
+        try:
+            plugins_text = ", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant."
+            prompt = get_prompt_rag_query_generation_code(implementation, plugins_text)
+            response = self.helper_model(
+                _prepare_text_inputs(prompt),
+                metadata={
+                    "generation_name": "rag_query_generation",
+                    "trace_id": scene_trace_id,
+                    "tags": [topic, f"scene{scene_number}"],
+                    "session_id": session_id
+                }
+            )
+            logger.debug(f"RAG queries response: {response[:200]}...")
+            queries = self._extract_json_from_response(response, "RAG queries for code generation")
+            # Cache the queries
+            if queries:
+                self._save_queries_to_cache(queries, cache_file)
+            return queries
+        except Exception as e:
+            logger.error(f"Error generating RAG queries for code: {e}")
+            return []
+    def _generate_rag_queries_error_fix(
+        self,
+        error: str,
+        code: str,
+        scene_trace_id: Optional[str] = None,
+        topic: Optional[str] = None,
+        scene_number: Optional[int] = None,
+        session_id: Optional[str] = None,
+        relevant_plugins: List[str] = None
+    ) -> List[str]:
+        """Generate RAG queries for fixing code errors.
+        Args:
+            error: The error message to fix
+            code: The code containing the error
+            scene_trace_id: Trace ID for the scene
+            topic: Topic of the scene
+            scene_number: Scene number
+            session_id: Session identifier
+            relevant_plugins: List of relevant plugins
+        Returns:
+            List of generated RAG queries for error fixing
+        """
+        if relevant_plugins is None:
+            relevant_plugins = []
+        if not topic or scene_number is None:
+            logger.warning("Missing topic or scene_number for RAG error fix query generation")
+            return []
+        # Setup cache
+        cache_dir = self._create_cache_directory(topic, scene_number)
+        cache_file = cache_dir / "rag_queries_error_fix.json"
+        # Try to load from cache
+        cached_queries = self._load_cached_queries(cache_file)
+        if cached_queries is not None:
+            logger.info(f"Using cached RAG error fix queries for {topic}_scene{scene_number}")
+            return cached_queries
+        # Generate new queries for error fix
+        try:
+            plugins_text = ", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant."
+            prompt = get_prompt_rag_query_generation_fix_error(
+                error=error,
+                code=code,
+                relevant_plugins=plugins_text
+            )
+            response = self.helper_model(
+                _prepare_text_inputs(prompt),
+                metadata={
+                    "generation_name": "rag-query-generation-fix-error",
+                    "trace_id": scene_trace_id,
+                    "tags": [topic, f"scene{scene_number}"],
+                    "session_id": session_id
+                }
+            )
+            queries = self._extract_json_from_response(response, "RAG queries for error fix")
+            # Cache the queries
+            if queries:
+                self._save_queries_to_cache(queries, cache_file)
+            return queries
+        except Exception as e:
+            logger.error(f"Error generating RAG queries for error fix: {e}")
+            return []
+    def _extract_code_with_retries(
+        self,
+        response_text: str,
+        pattern: str = CODE_PATTERN,
+        generation_name: Optional[str] = None,
+        trace_id: Optional[str] = None,
+        session_id: Optional[str] = None,
+        max_retries: int = DEFAULT_MAX_RETRIES
+    ) -> str:
+        """Extract code from response text with retry logic.
+        Args:
+            response_text: The text containing code to extract
+            pattern: Regex pattern for extracting code
+            generation_name: Name of generation step
+            trace_id: Trace identifier
+            session_id: Session identifier
+            max_retries: Maximum number of retries
+        Returns:
+            The extracted code
+        Raises:
+            ValueError: If code extraction fails after max retries
+        """
+        retry_prompt_template = """
+        Please extract the Python code in the correct format using the pattern: {pattern}.
+        You MUST NOT include any other text or comments.
+        You MUST return the exact same code as in the previous response, NO CONTENT EDITING is allowed.
+        Previous response:
+        {response_text}
+        """
+        for attempt in range(max_retries):
+            try:
+                code_match = re.search(pattern, response_text, re.DOTALL)
+                if code_match:
+                    extracted_code = code_match.group(1).strip()
+                    logger.debug(f"Successfully extracted code on attempt {attempt + 1}")
+                    return extracted_code
+                if attempt < max_retries - 1:
+                    logger.warning(f"Attempt {attempt + 1}: Failed to extract code pattern. Retrying...")
+                    # Regenerate response with a more explicit prompt
+                    retry_prompt = retry_prompt_template.format(
+                        pattern=pattern,
+                        response_text=response_text[:1000]  # Limit response length
+                    )
+                    response_text = self.scene_model(
+                        _prepare_text_inputs(retry_prompt),
+                        metadata={
+                            "generation_name": f"{generation_name}_format_retry_{attempt + 1}",
+                            "trace_id": trace_id,
+                            "session_id": session_id
+                        }
+                    )
+            except Exception as e:
+                logger.error(f"Error during code extraction attempt {attempt + 1}: {e}")
+                if attempt == max_retries - 1:
+                    break
+        raise ValueError(f"Failed to extract code pattern after {max_retries} attempts. Pattern: {pattern}")
+    def _prepare_additional_context(self, additional_context: Union[str, List[str], None]) -> List[str]:
+        """Prepare additional context for code generation."""
+        if additional_context is None:
+            return []
+        elif isinstance(additional_context, str):
+            return [additional_context]
+        return additional_context.copy()
+    def _retrieve_rag_context(
+        self,
+        rag_queries: List[str],
+        scene_trace_id: Optional[str],
+        topic: str,
+        scene_number: int
+    ) -> Optional[str]:
+        """Retrieve context from RAG vector store."""
+        if not self.vector_store or not rag_queries:
+            return None
+        try:
+            return self.vector_store.find_relevant_docs(
+                queries=rag_queries,
+                k=DEFAULT_RAG_K_VALUE,
+                trace_id=scene_trace_id,
+                topic=topic,
+                scene_number=scene_number
+            )
+        except Exception as e:
+            logger.error(f"Error retrieving RAG context: {e}")
+            return None
+    def generate_manim_code(
+        self,
+        topic: str,
+        description: str,
+        scene_outline: str,
+        scene_implementation: str,
+        scene_number: int,
+        additional_context: Union[str, List[str], None] = None,
+        scene_trace_id: Optional[str] = None,
+        session_id: Optional[str] = None,
+        rag_queries_cache: Optional[Dict] = None
+    ) -> Tuple[str, str]:
+        """Generate Manim code from video plan.
+        Args:
+            topic: Topic of the scene
+            description: Description of the scene
+            scene_outline: Outline of the scene
+            scene_implementation: Implementation details
+            scene_number: Scene number
+            additional_context: Additional context
+            scene_trace_id: Trace identifier
+            session_id: Session identifier
+            rag_queries_cache: Cache for RAG queries (deprecated, use file cache)
+        Returns:
+            Tuple of generated code and response text
+        Raises:
+            ValueError: If code generation fails
+        """
+        try:
+            # Prepare additional context
+            context_list = self._prepare_additional_context(additional_context)
+            # Add context learning examples if enabled
+            if self.use_context_learning and self.context_examples:
+                context_list.append(self.context_examples)
+            # Add RAG context if enabled
+            if self.use_rag:
+                rag_queries = self._generate_rag_queries_code(
+                    implementation=scene_implementation,
+                    scene_trace_id=scene_trace_id,
+                    topic=topic,
+                    scene_number=scene_number,
+                    session_id=session_id or self.session_id
+                )
+                rag_context = self._retrieve_rag_context(
+                    rag_queries, scene_trace_id, topic, scene_number
+                )
+                if rag_context:
+                    context_list.append(rag_context)
+            # Generate prompt
+            prompt = get_prompt_code_generation(
+                scene_outline=scene_outline,
+                scene_implementation=scene_implementation,
+                topic=topic,
+                description=description,
+                scene_number=scene_number,
+                additional_context=context_list if context_list else None
+            )
+            # Generate code using model
+            response_text = self.scene_model(
+                _prepare_text_inputs(prompt),
+                metadata={
+                    "generation_name": "code_generation",
+                    "trace_id": scene_trace_id,
+                    "tags": [topic, f"scene{scene_number}"],
+                    "session_id": session_id or self.session_id
+                }
+            )
+            # Extract code with retries
+            code = self._extract_code_with_retries(
+                response_text,
+                CODE_PATTERN,
+                generation_name="code_generation",
+                trace_id=scene_trace_id,
+                session_id=session_id or self.session_id
+            )
+            logger.info(f"Successfully generated code for {topic} scene {scene_number}")
+            return code, response_text
+        except Exception as e:
+            logger.error(f"Error generating Manim code for {topic} scene {scene_number}: {e}")
+            raise ValueError(f"Code generation failed: {e}") from e
+    def fix_code_errors(
+        self,
+        implementation_plan: str,
+        code: str,
+        error: str,
+        scene_trace_id: str,
+        topic: str,
+        scene_number: int,
+        session_id: str,
+        rag_queries_cache: Optional[Dict] = None
+    ) -> Tuple[str, str]:
+        """Fix errors in generated Manim code.
+        Args:
+            implementation_plan: Original implementation plan
+            code: Code containing errors
+            error: Error message to fix
+            scene_trace_id: Trace identifier
+            topic: Topic of the scene
+            scene_number: Scene number
+            session_id: Session identifier
+            rag_queries_cache: Cache for RAG queries (deprecated, use file cache)
+        Returns:
+            Tuple of fixed code and response text
+        Raises:
+            ValueError: If code fixing fails
+        """
+        try:
+            # Start with base error fix prompt
+            additional_context = None
+            # Add RAG context if enabled
+            if self.use_rag:
+                rag_queries = self._generate_rag_queries_error_fix(
+                    error=error,
+                    code=code,
+                    scene_trace_id=scene_trace_id,
+                    topic=topic,
+                    scene_number=scene_number,
+                    session_id=session_id
+                )
+                rag_context = self._retrieve_rag_context(
+                    rag_queries, scene_trace_id, topic, scene_number
+                )
+                if rag_context:
+                    additional_context = rag_context
+            # Generate prompt (with or without RAG context)
+            if additional_context:
+                prompt = get_prompt_fix_error(
+                    implementation_plan=implementation_plan,
+                    manim_code=code,
+                    error=error,
+                    additional_context=additional_context
+                )
+            else:
+                prompt = get_prompt_fix_error(
+                    implementation_plan=implementation_plan,
+                    manim_code=code,
+                    error=error
+                )
+            # Get fixed code from model
+            response_text = self.scene_model(
+                _prepare_text_inputs(prompt),
+                metadata={
+                    "generation_name": "code_fix_error",
+                    "trace_id": scene_trace_id,
+                    "tags": [topic, f"scene{scene_number}"],
+                    "session_id": session_id
+                }
+            )
+            # Extract fixed code with retries
+            fixed_code = self._extract_code_with_retries(
+                response_text,
+                CODE_PATTERN,
+                generation_name="code_fix_error",
+                trace_id=scene_trace_id,
+                session_id=session_id
+            )
+            logger.info(f"Successfully fixed code errors for {topic} scene {scene_number}")
+            return fixed_code, response_text
+        except Exception as e:
+            logger.error(f"Error fixing code for {topic} scene {scene_number}: {e}")
+            raise ValueError(f"Code error fixing failed: {e}") from e
+    def visual_self_reflection(
+        self,
+        code: str,
+        media_path: Union[str, Image.Image],
+        scene_trace_id: str,
+        topic: str,
+        scene_number: int,
+        session_id: str
+    ) -> Tuple[str, str]:
+        """Use snapshot image or mp4 video to fix code.
+        Args:
+            code: Code to fix
+            media_path: Path to media file or PIL Image
+            scene_trace_id: Trace identifier
+            topic: Topic of the scene
+            scene_number: Scene number
+            session_id: Session identifier
+        Returns:
+            Tuple of fixed code and response text
+        Raises:
+            ValueError: If visual self-reflection fails
+            FileNotFoundError: If media file doesn't exist
+        """
+        try:
+            # Validate media input
+            if isinstance(media_path, str):
+                media_file = Path(media_path)
+                if not media_file.exists():
+                    raise FileNotFoundError(f"Media file not found: {media_path}")
+            # Determine if we're dealing with video or image
+            is_video = isinstance(media_path, str) and media_path.lower().endswith('.mp4')
+            # Load prompt template
+            prompt_file = Path('task_generator/prompts_raw/prompt_visual_self_reflection.txt')
+            if not prompt_file.exists():
+                logger.warning(f"Visual self-reflection prompt file not found: {prompt_file}")
+                # Fallback prompt
+                prompt_template = """
+                Analyze the visual output and the provided code. Fix any issues you notice in the code.
+                Code:
+                {code}
+                """
+            else:
+                with prompt_file.open('r', encoding=CACHE_FILE_ENCODING) as f:
+                    prompt_template = f.read()
+            # Format prompt
+            prompt = prompt_template.format(code=code)
+            # Prepare input based on media type and model capabilities
+            if is_video and isinstance(self.scene_model, (GeminiWrapper, VertexAIWrapper)):
+                # For video with Gemini models
+                messages = [
+                    {"type": "text", "content": prompt},
+                    {"type": "video", "content": str(media_path)}
+                ]
+            else:
+                # For images or non-Gemini models
+                if isinstance(media_path, str):
+                    media = Image.open(media_path)
+                else:
+                    media = media_path
+                messages = [
+                    {"type": "text", "content": prompt},
+                    {"type": "image", "content": media}
+                ]
+            # Get model response
+            response_text = self.scene_model(
+                messages,
+                metadata={
+                    "generation_name": "visual_self_reflection",
+                    "trace_id": scene_trace_id,
+                    "tags": [topic, f"scene{scene_number}"],
+                    "session_id": session_id
+                }
+            )
+            # Extract code with retries
+            fixed_code = self._extract_code_with_retries(
+                response_text,
+                CODE_PATTERN,
+                generation_name="visual_self_reflection",
+                trace_id=scene_trace_id,
+                session_id=session_id
+            )
+            logger.info(f"Successfully completed visual self-reflection for {topic} scene {scene_number}")
+            return fixed_code, response_text
+        except Exception as e:
+            logger.error(f"Error in visual self-reflection for {topic} scene {scene_number}: {e}")
+            raise ValueError(f"Visual self-reflection failed: {e}") from e
+    def enhanced_visual_self_reflection(
+        self,
+        code: str,
+        media_path: Union[str, Image.Image],
+        scene_trace_id: str,
+        topic: str,
+        scene_number: int,
+        session_id: str,
+        implementation_plan: Optional[str] = None
+    ) -> Tuple[str, str]:
+        """Enhanced visual self-reflection using VLM for detailed error detection.
+        This method specifically focuses on detecting and fixing:
+        - Element overlap and collision
+        - Out-of-bounds positioning
+        - Spatial boundary violations
+        - Poor visual arrangement
+        - Educational effectiveness issues
+        Args:
+            code: Code to analyze and fix
+            media_path: Path to media file or PIL Image
+            scene_trace_id: Trace identifier
+            topic: Topic of the scene
+            scene_number: Scene number
+            session_id: Session identifier
+            implementation_plan: Optional implementation plan for context
+        Returns:
+            Tuple of fixed code and response text
+        Raises:
+            ValueError: If enhanced visual analysis fails
+            FileNotFoundError: If media file doesn't exist
+        """
+        try:
+            # Validate media input
+            if isinstance(media_path, str):
+                media_file = Path(media_path)
+                if not media_file.exists():
+                    raise FileNotFoundError(f"Media file not found: {media_path}")
+            # Determine if we're dealing with video or image
+            is_video = isinstance(media_path, str) and media_path.lower().endswith('.mp4')
+            # Load enhanced visual analysis prompt
+            enhanced_prompt_file = Path('task_generator/prompts_raw/prompt_enhanced_visual_self_reflection.txt')
+            if enhanced_prompt_file.exists():
+                with enhanced_prompt_file.open('r', encoding=CACHE_FILE_ENCODING) as f:
+                    prompt_template = f.read()
+            else:
+                # Fallback to original prompt if enhanced version not found
+                logger.warning("Enhanced visual self-reflection prompt not found, using fallback")
+                prompt_template = self._get_fallback_visual_prompt()
+            # Format prompt with implementation plan and code
+            prompt = prompt_template.format(
+                implementation=implementation_plan or "No implementation plan provided",
+                code=code
+            )
+            # Prepare input based on media type and model capabilities
+            if is_video and isinstance(self.scene_model, (GeminiWrapper, VertexAIWrapper)):
+                # For video with Gemini/Vertex AI models
+                messages = [
+                    {"type": "text", "content": prompt},
+                    {"type": "video", "content": str(media_path)}
+                ]
+            else:
+                # For images or non-Gemini models
+                if isinstance(media_path, str):
+                    media = Image.open(media_path)
+                else:
+                    media = media_path
+                messages = [
+                    {"type": "text", "content": prompt},
+                    {"type": "image", "content": media}
+                ]
+            # Get enhanced VLM analysis response
+            response_text = self.scene_model(
+                messages,
+                metadata={
+                    "generation_name": "enhanced_visual_self_reflection",
+                    "trace_id": scene_trace_id,
+                    "tags": [topic, f"scene{scene_number}", "visual_error_detection"],
+                    "session_id": session_id
+                }
+            )
+            # Parse response for visual analysis results
+            if "<LGTM>" in response_text or response_text.strip() == "<LGTM>":
+                logger.info(f"Enhanced visual analysis passed for {topic} scene {scene_number}")
+                return code, response_text
+            # Extract improved code if visual issues were found
+            fixed_code = self._extract_visual_fix_code(response_text, scene_trace_id, session_id)
+            logger.info(f"Enhanced visual self-reflection completed with fixes for {topic} scene {scene_number}")
+            return fixed_code, response_text
+        except Exception as e:
+            logger.error(f"Error in enhanced visual self-reflection for {topic} scene {scene_number}: {e}")
+            # Fallback to original visual_self_reflection if enhanced version fails
+            logger.info("Falling back to original visual_self_reflection method")
+            return self.visual_self_reflection(
+                code, media_path, scene_trace_id, topic, scene_number, session_id
+            )
+    def _extract_visual_fix_code(
+        self,
+        response_text: str,
+        scene_trace_id: Optional[str] = None,
+        session_id: Optional[str] = None
+    ) -> str:
+        """Extract code from enhanced visual analysis response.
+        Args:
+            response_text: The VLM response containing visual analysis
+            scene_trace_id: Trace identifier
+            session_id: Session identifier
+        Returns:
+            The extracted and fixed code
+        Raises:
+            ValueError: If code extraction fails
+        """
+        # Try to extract code from <improved_code> tags first
+        improved_code_pattern = r'<improved_code>\s*```python\s*(.*?)\s*```\s*</improved_code>'
+        code_match = re.search(improved_code_pattern, response_text, re.DOTALL)
+        if code_match:
+            extracted_code = code_match.group(1).strip()
+            logger.debug("Successfully extracted code from <improved_code> tags")
+            return extracted_code
+        # Fallback to standard code extraction
+        return self._extract_code_with_retries(
+            response_text,
+            CODE_PATTERN,
+            generation_name="enhanced_visual_fix",
+            trace_id=scene_trace_id,
+            session_id=session_id
+        )
+    def _get_fallback_visual_prompt(self) -> str:
+        """Get fallback visual analysis prompt if enhanced version is not available."""
+        return """
+        Analyze the visual output and the provided code for the following issues:
+        1. **Element Overlap:** Check for overlapping text, shapes, or mathematical expressions
+        2. **Out-of-Bounds Objects:** Identify elements outside the visible frame
+        3. **Spacing Issues:** Verify minimum 0.3 unit spacing between elements
+        4. **Safe Area Compliance:** Ensure 0.5 unit margins from frame edges
+        5. **Educational Clarity:** Assess if arrangement supports learning objectives
+        Implementation Plan: {implementation}
+        Code to analyze:
+        {code}
+        If issues are found, provide fixed code. If no issues, return "<LGTM>".
+        <improved_code>
+        ```python
+        [Fixed code here]
+        ```
+        </improved_code>
+        """
+    def detect_visual_errors(
+        self,
+        media_path: Union[str, Image.Image],
+        scene_trace_id: Optional[str] = None,
+        topic: Optional[str] = None,
+        scene_number: Optional[int] = None,
+        session_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Detect visual errors using VLM without code modification.
+        This method provides detailed visual error analysis without attempting to fix code.
+        Useful for validation and quality assessment.
+        Args:
+            media_path: Path to media file or PIL Image
+            scene_trace_id: Trace identifier
+            topic: Topic of the scene
+            scene_number: Scene number
+            session_id: Session identifier
+        Returns:
+            Dictionary containing visual error analysis results
+        Raises:
+            ValueError: If visual error detection fails
+            FileNotFoundError: If media file doesn't exist
+        """
+        try:
+            # Validate media input
+            if isinstance(media_path, str):
+                media_file = Path(media_path)
+                if not media_file.exists():
+                    raise FileNotFoundError(f"Media file not found: {media_path}")
+            # Create analysis prompt
+            analysis_prompt = """
+            You are an expert visual quality analyst. Analyze this Manim-generated frame/video for:
+            1. **Element Overlap Detection:**
+               - Text overlapping with shapes or other text
+               - Mathematical expressions colliding
+               - Unintentional object occlusion
+            2. **Spatial Boundary Issues:**
+               - Objects extending beyond frame boundaries
+               - Violations of safe area margins (0.5 units from edges)
+               - Insufficient spacing between elements (minimum 0.3 units)
+            3. **Visual Quality Assessment:**
+               - Overall composition balance
+               - Readability of text elements
+               - Educational effectiveness of arrangement
+            Provide your analysis in the following format:
+            **VISUAL ERROR ANALYSIS:**
+            - Overlap Issues: [List any overlapping elements]
+            - Boundary Violations: [List out-of-bounds elements]
+            - Spacing Problems: [List spacing violations]
+            - Quality Issues: [List other visual problems]
+            **SEVERITY ASSESSMENT:**
+            - Critical Errors: [Issues that severely impact readability]
+            - Major Errors: [Issues that noticeably reduce quality]
+            - Minor Errors: [Issues that slightly affect visual appeal]
+            **OVERALL RATING:** [Excellent/Good/Fair/Poor]
+            """
+            # Determine media type and prepare input
+            is_video = isinstance(media_path, str) and media_path.lower().endswith('.mp4')
+            if is_video and isinstance(self.scene_model, (GeminiWrapper, VertexAIWrapper)):
+                messages = [
+                    {"type": "text", "content": analysis_prompt},
+                    {"type": "video", "content": str(media_path)}
+                ]
+            else:
+                if isinstance(media_path, str):
+                    media = Image.open(media_path)
+                else:
+                    media = media_path
+                messages = [
+                    {"type": "text", "content": analysis_prompt},
+                    {"type": "image", "content": media}
+                ]
+            # Get analysis response
+            response_text = self.scene_model(
+                messages,
+                metadata={
+                    "generation_name": "visual_error_detection",
+                    "trace_id": scene_trace_id,
+                    "tags": [topic or "unknown", f"scene{scene_number or 0}", "quality_analysis"],
+                    "session_id": session_id or self.session_id
+                }
+            )
+            # Parse response into structured results
+            analysis_results = self._parse_visual_analysis(response_text)
+            logger.info(f"Visual error detection completed for scene {scene_number or 'unknown'}")
+            return analysis_results
+        except Exception as e:
+            logger.error(f"Error in visual error detection: {e}")
+            raise ValueError(f"Visual error detection failed: {e}") from e
+    def _parse_visual_analysis(self, response_text: str) -> Dict[str, Any]:
+        """Parse visual analysis response into structured data.
+        Args:
+            response_text: Raw response from VLM
+        Returns:
+            Structured analysis results
+        """
+        results = {
+            "overlap_issues": [],
+            "boundary_violations": [],
+            "spacing_problems": [],
+            "quality_issues": [],
+            "critical_errors": [],
+            "major_errors": [],
+            "minor_errors": [],
+            "overall_rating": "Unknown",
+            "raw_analysis": response_text
+        }
+        try:
+            # Extract different sections using regex patterns
+            overlap_match = re.search(r'Overlap Issues:\s*(.*?)(?=\n-|\n\*\*|$)', response_text, re.DOTALL)
+            if overlap_match:
+                results["overlap_issues"] = [item.strip() for item in overlap_match.group(1).split('\n') if item.strip()]
+            boundary_match = re.search(r'Boundary Violations:\s*(.*?)(?=\n-|\n\*\*|$)', response_text, re.DOTALL)
+            if boundary_match:
+                results["boundary_violations"] = [item.strip() for item in boundary_match.group(1).split('\n') if item.strip()]
+            rating_match = re.search(r'OVERALL RATING.*?:\s*([A-Za-z]+)', response_text)
+            if rating_match:
+                results["overall_rating"] = rating_match.group(1)
+        except Exception as e:
+            logger.warning(f"Error parsing visual analysis: {e}")
+        return results

src/core/parse_video.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import os
+import pysrt
+from moviepy import VideoFileClip
+import shutil
+from PIL import Image, ImageOps
+import numpy as np
+import speech_recognition as sr
+def get_images_from_video(video_path, fps=0.2):
+    """Extract frames from a video file at specified FPS.
+    Args:
+        video_path (str): Path to the video file.
+        fps (float, optional): Frames per second to extract. Defaults to 0.2.
+    Returns:
+        list: List of frames as numpy arrays.
+    """
+    clip = VideoFileClip(video_path)
+    images = clip.iter_frames(fps=fps)
+    return images
+def image_with_most_non_black_space(images, output_path, return_type="path"):
+    """Find and save the image with the most non-black space from a list of images.
+    Args:
+        images (list): List of image file paths, PIL Image objects, or numpy arrays.
+        output_path (str): Path where the output image should be saved.
+        return_type (str, optional): Type of return value - "path" or "image". Defaults to "path".
+    Returns:
+        Union[str, PIL.Image, None]: Path to saved image, PIL Image object, or None if no valid image found.
+    """
+    max_non_black_area = 0
+    image_with_max_non_black_space = None
+    for img in images:
+        try:
+            # If img is a path, open the image
+            if isinstance(img, str):
+                image = Image.open(img)
+            elif isinstance(img, Image.Image):
+                image = img
+            elif isinstance(img, np.ndarray):
+                image = Image.fromarray(img)
+            else:
+                print(f"Unsupported type: {type(img)}. Skipping.")
+                continue
+            # Convert to grayscale
+            gray = ImageOps.grayscale(image)
+            # Convert to numpy array
+            gray_array = np.array(gray)
+            # Count non-black pixels (threshold to consider near-black as black)
+            non_black_pixels = np.sum(gray_array > 10)  # Threshold 10 to account for slight variations in black
+            if non_black_pixels > max_non_black_area:
+                max_non_black_area = non_black_pixels
+                image_with_max_non_black_space = image
+        except Exception as e:
+            print(f"Warning: Unable to process image {img}: {e}")
+    if image_with_max_non_black_space is not None:
+        image_with_max_non_black_space.save(output_path)
+        print(f"Saved image with most non-black space to {output_path}")
+        if return_type == "path":
+            return output_path
+        else:
+            return image_with_max_non_black_space
+    return image_with_max_non_black_space
+def parse_srt_to_text(output_dir, topic_name):
+    """Convert SRT subtitle file to plain text.
+    Args:
+        output_dir (str): Directory containing the topic folders.
+        topic_name (str): Name of the topic/video.
+    """
+    topic_name = topic_name.replace(" ", "_").lower()
+    srt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.srt")
+    txt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.txt")
+    subs = pysrt.open(srt_path)
+    with open(txt_path, 'w') as f:
+        full_text = ""
+        for sub in subs:
+            sub.text = sub.text.replace("...", ".")
+            full_text += sub.text + " "
+        f.write(full_text.strip())
+def parse_srt_and_extract_frames(output_dir, topic_name):
+    """Extract frames from video at subtitle timestamps and save with corresponding text.
+    Args:
+        output_dir (str): Directory containing the topic folders.
+        topic_name (str): Name of the topic/video.
+    """
+    topic_name = topic_name.replace(" ", "_").lower()
+    video_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.mp4")
+    srt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.srt")
+    subs = pysrt.open(srt_path)
+    # Create extract_images folder if it doesn't exist
+    images_dir = os.path.join(output_dir, topic_name, "extract_images")
+    if os.path.exists(images_dir):
+        shutil.rmtree(images_dir)
+    os.makedirs(images_dir, exist_ok=True)
+    # Load the video file
+    video = VideoFileClip(video_path)
+    # Dictionary to store image-text pairs
+    pairs = {}
+    i = 0
+    while i < len(subs):
+        sub = subs[i]
+        text = sub.text
+        sub_indexes = [sub.index]
+        # Check if we need to concatenate with next subtitle
+        while i < len(subs) - 1 and not text.strip().endswith('.'):
+            i += 1
+            next_sub = subs[i]
+            text += " " + next_sub.text
+            sub_indexes.append(next_sub.index)
+        # Get the end time of the last concatenated subtitle
+        end_time = sub.end.to_time()
+        # Convert end time to seconds
+        end_time_seconds = end_time.hour * 3600 + end_time.minute * 60 + end_time.second + end_time.microsecond / 1e6
+        # Save the frame as an image in extract_images folder
+        frame_path = os.path.join(images_dir, f"{sub.index}.jpg")
+        video.save_frame(frame_path, t=end_time_seconds)
+        # Save the subtitle text to a txt file
+        text_path = os.path.join(images_dir, f"{sub.index}.txt")
+        with open(text_path, 'w') as f:
+            f.write(text)
+        # Add pair to dictionary
+        pairs[str(sub.index)] = {
+            "image_path": f"{sub.index}.jpg",
+            "text": text,
+            "text_path": f"{sub.index}.txt",
+            "srt_index": sub_indexes,
+        }
+        i += 1
+    # Save pairs to json file
+    import json
+    json_path = os.path.join(images_dir, "pairs.json")
+    with open(json_path, 'w') as f:
+        json.dump(pairs, f, indent=4)
+    # Close the video file
+    video.close()
+def extract_trasnscript(video_path):
+    """Extract transcript from video audio using Google Speech Recognition.
+    Args:
+        video_path (str): Path to the video file.
+    Returns:
+        str: Transcribed text from the video audio.
+    Raises:
+        FileNotFoundError: If video file does not exist.
+    """
+    if not os.path.exists(video_path):
+        raise FileNotFoundError(f"Video file not found: {video_path}")
+    clip = VideoFileClip(video_path)
+    # write the video to a temporary audio file
+    audio_path = os.path.join(os.path.dirname(video_path), "audio.wav")
+    clip.audio.write_audiofile(audio_path)
+    try:
+        # extract the subtitles from the audio file
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(audio_path) as source:
+            audio = recognizer.record(source)
+        return recognizer.recognize_google(audio)
+    finally:
+        # clean up the temporary audio file
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
+if __name__ == "__main__":
+    import argparse
+    def process_all_topics(output_folder):
+        """Process all topic folders in the output directory.
+        Args:
+            output_folder (str): Directory containing the topic folders.
+        """
+        # Only get immediate subdirectories
+        topics = [d for d in os.listdir(output_folder)
+                 if os.path.isdir(os.path.join(output_folder, d))]
+        for topic in topics:
+            print(f"\nProcessing topic: {topic}")
+            try:
+                parse_srt_to_text(output_folder, topic)
+                parse_srt_and_extract_frames(output_folder, topic)
+            except Exception as e:
+                print(f"Error processing {topic}: {str(e)}")
+                continue
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description='Process video files and extract frames with subtitles')
+    parser.add_argument('--output_dir', type=str, default="output",
+                      help='Directory containing the topic folders')
+    args = parser.parse_args()
+    # Process topics using provided output directory
+    process_all_topics(args.output_dir)

src/core/video_planner.py ADDED Viewed

	@@ -0,0 +1,670 @@

+import os
+import re
+import json
+import glob
+from typing import List, Optional, Dict, Tuple
+import uuid
+import asyncio
+import time
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
+import aiofiles
+from mllm_tools.utils import _prepare_text_inputs
+from src.utils.utils import extract_xml
+from task_generator import (
+    get_prompt_scene_plan,
+    get_prompt_scene_vision_storyboard,
+    get_prompt_scene_technical_implementation,
+    get_prompt_scene_animation_narration,
+    get_prompt_context_learning_scene_plan,
+    get_prompt_context_learning_vision_storyboard,
+    get_prompt_context_learning_technical_implementation,
+    get_prompt_context_learning_animation_narration,
+    get_prompt_context_learning_code
+)
+from src.rag.rag_integration import RAGIntegration
+class EnhancedVideoPlanner:
+    """Enhanced video planner with improved parallelization and performance."""
+    def __init__(self, planner_model, helper_model=None, output_dir="output",
+                 print_response=False, use_context_learning=False,
+                 context_learning_path="data/context_learning", use_rag=False,
+                 session_id=None, chroma_db_path="data/rag/chroma_db",
+                 manim_docs_path="data/rag/manim_docs",
+                 embedding_model="text-embedding-ada-002", use_langfuse=True,
+                 max_scene_concurrency=5, max_step_concurrency=3, enable_caching=True):
+        self.planner_model = planner_model
+        self.helper_model = helper_model if helper_model is not None else planner_model
+        self.output_dir = output_dir
+        self.print_response = print_response
+        self.use_context_learning = use_context_learning
+        self.context_learning_path = context_learning_path
+        self.use_rag = use_rag
+        self.session_id = session_id
+        self.enable_caching = enable_caching
+        # Enhanced concurrency control
+        self.max_scene_concurrency = max_scene_concurrency
+        self.max_step_concurrency = max_step_concurrency
+        self.scene_semaphore = asyncio.Semaphore(max_scene_concurrency)
+        self.step_semaphore = asyncio.Semaphore(max_step_concurrency)
+        # Thread pool for I/O operations
+        self.thread_pool = ThreadPoolExecutor(max_workers=4)
+        # Cache for prompts and examples
+        self._context_cache = {}
+        self._prompt_cache = {}
+        # Initialize context examples with caching
+        self._initialize_context_examples()
+        # Initialize RAG with enhanced settings
+        self.rag_integration = None
+        self.relevant_plugins = []
+        if use_rag:
+            self.rag_integration = RAGIntegration(
+                helper_model=helper_model,
+                output_dir=output_dir,
+                chroma_db_path=chroma_db_path,
+                manim_docs_path=manim_docs_path,
+                embedding_model=embedding_model,
+                use_langfuse=use_langfuse,
+                session_id=session_id
+            )
+    def _initialize_context_examples(self):
+        """Initialize and cache context examples for faster access."""
+        example_types = [
+            'scene_plan', 'scene_vision_storyboard', 'technical_implementation',
+            'scene_animation_narration', 'code'
+        ]
+        if self.use_context_learning:
+            for example_type in example_types:
+                self._context_cache[example_type] = self._load_context_examples(example_type)
+        else:
+            for example_type in example_types:
+                self._context_cache[example_type] = None
+    @lru_cache(maxsize=128)
+    def _get_cached_prompt(self, prompt_type: str, *args) -> str:
+        """Get cached prompt to avoid regeneration."""
+        prompt_generators = {
+            'scene_plan': get_prompt_scene_plan,
+            'scene_vision_storyboard': get_prompt_scene_vision_storyboard,
+            'scene_technical_implementation': get_prompt_scene_technical_implementation,
+            'scene_animation_narration': get_prompt_scene_animation_narration
+        }
+        generator = prompt_generators.get(prompt_type)
+        if generator:
+            return generator(*args)
+        return ""
+    async def _async_file_write(self, file_path: str, content: str):
+        """Asynchronous file writing for better performance."""
+        async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
+            await f.write(content)
+    async def _async_file_read(self, file_path: str) -> str:
+        """Asynchronous file reading."""
+        try:
+            async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
+                return await f.read()
+        except FileNotFoundError:
+            return None
+    async def _ensure_directories(self, *paths):
+        """Asynchronously ensure directories exist."""
+        loop = asyncio.get_event_loop()
+        for path in paths:
+            await loop.run_in_executor(self.thread_pool, lambda p: os.makedirs(p, exist_ok=True), path)
+    def _load_context_examples(self, example_type: str) -> str:
+        """Load context learning examples with improved performance."""
+        if example_type in self._context_cache:
+            return self._context_cache[example_type]
+        examples = []
+        file_patterns = {
+            'scene_plan': '*_scene_plan.txt',
+            'scene_vision_storyboard': '*_scene_vision_storyboard.txt',
+            'technical_implementation': '*_technical_implementation.txt',
+            'scene_animation_narration': '*_scene_animation_narration.txt',
+            'code': '*.py'
+        }
+        pattern = file_patterns.get(example_type)
+        if not pattern:
+            return None
+        # Use glob for faster file discovery
+        search_pattern = os.path.join(self.context_learning_path, "**", pattern)
+        for example_file in glob.glob(search_pattern, recursive=True):
+            try:
+                with open(example_file, 'r', encoding='utf-8') as f:
+                    content = f.read()
+                    examples.append(f"# Example from {os.path.basename(example_file)}\n{content}\n")
+            except Exception as e:
+                print(f"Warning: Could not load example {example_file}: {e}")
+        if examples:
+            formatted_examples = self._format_examples(example_type, examples)
+            self._context_cache[example_type] = formatted_examples
+            return formatted_examples
+        return None
+    def _format_examples(self, example_type: str, examples: List[str]) -> str:
+        """Format examples using the appropriate template."""
+        templates = {
+            'scene_plan': get_prompt_context_learning_scene_plan,
+            'scene_vision_storyboard': get_prompt_context_learning_vision_storyboard,
+            'technical_implementation': get_prompt_context_learning_technical_implementation,
+            'scene_animation_narration': get_prompt_context_learning_animation_narration,
+            'code': get_prompt_context_learning_code
+        }
+        template = templates.get(example_type)
+        if template:
+            return template(examples="\n".join(examples))
+        return None
+    async def generate_scene_outline(self, topic: str, description: str, session_id: str) -> str:
+        """Enhanced scene outline generation with async I/O."""
+        start_time = time.time()
+        # Detect relevant plugins upfront if RAG is enabled
+        if self.use_rag and self.rag_integration:
+            plugin_detection_task = asyncio.create_task(
+                self._detect_plugins_async(topic, description)
+            )
+        # Prepare prompt with cached examples
+        prompt = self._get_cached_prompt('scene_plan', topic, description)
+        if self.use_context_learning and self._context_cache.get('scene_plan'):
+            prompt += f"\n\nHere are some example scene plans for reference:\n{self._context_cache['scene_plan']}"
+        # Wait for plugin detection if enabled
+        if self.use_rag and self.rag_integration:
+            self.relevant_plugins = await plugin_detection_task
+            print(f"✅ Detected relevant plugins: {self.relevant_plugins}")
+        # Generate plan using planner model
+        response_text = self.planner_model(
+            _prepare_text_inputs(prompt),
+            metadata={
+                "generation_name": "scene_outline",
+                "tags": [topic, "scene-outline"],
+                "session_id": session_id
+            }
+        )
+        # Extract scene outline with improved error handling
+        scene_outline = self._extract_scene_outline_robust(response_text)
+        # Async file operations
+        file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
+        output_dir = os.path.join(self.output_dir, file_prefix)
+        await self._ensure_directories(output_dir)
+        file_path = os.path.join(output_dir, f"{file_prefix}_scene_outline.txt")
+        await self._async_file_write(file_path, scene_outline)
+        elapsed_time = time.time() - start_time
+        print(f"Scene outline generated in {elapsed_time:.2f}s - saved to {file_prefix}_scene_outline.txt")
+        return scene_outline
+    async def _detect_plugins_async(self, topic: str, description: str) -> List[str]:
+        """Asynchronously detect relevant plugins."""
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(
+            self.thread_pool,
+            lambda: self.rag_integration.detect_relevant_plugins(topic, description) or []
+        )
+    async def _generate_scene_step_parallel(self, step_name: str, prompt_func,
+                                          scene_trace_id: str, topic: str,
+                                          scene_number: int, session_id: str,
+                                          output_path: str, *args) -> Tuple[str, str]:
+        """Generate a single scene step with async operations."""
+        async with self.step_semaphore:  # Control step-level concurrency
+            # Check cache first if enabled
+            if self.enable_caching:
+                cached_content = await self._async_file_read(output_path)
+                if cached_content:
+                    print(f"Using cached {step_name} for scene {scene_number}")
+                    return cached_content, output_path
+            print(f"🚀 Generating {step_name} for scene {scene_number}")
+            start_time = time.time()
+            # Generate prompt
+            prompt = prompt_func(*args)
+            # Add context examples if available
+            example_type = step_name.replace('_plan', '').replace('scene_', '')
+            if self._context_cache.get(example_type):
+                prompt += f"\n\nHere are some example {step_name}s:\n{self._context_cache[example_type]}"
+            # Add RAG context if enabled
+            if self.use_rag and self.rag_integration:
+                rag_queries = await self._generate_rag_queries_async(
+                    step_name, args, scene_trace_id, topic, scene_number, session_id
+                )
+                if rag_queries:
+                    retrieved_docs = self.rag_integration.get_relevant_docs(
+                        rag_queries=rag_queries,
+                        scene_trace_id=scene_trace_id,
+                        topic=topic,
+                        scene_number=scene_number
+                    )
+                    prompt += f"\n\n{retrieved_docs}"
+            # Generate content
+            response = self.planner_model(
+                _prepare_text_inputs(prompt),
+                metadata={
+                    "generation_name": step_name,
+                    "trace_id": scene_trace_id,
+                    "tags": [topic, f"scene{scene_number}"],
+                    "session_id": session_id
+                }
+            )
+            # Extract content using step-specific patterns
+            extraction_patterns = {
+                'scene_vision_storyboard': r'(<SCENE_VISION_STORYBOARD_PLAN>.*?</SCENE_VISION_STORYBOARD_PLAN>)',
+                'scene_technical_implementation': r'(<SCENE_TECHNICAL_IMPLEMENTATION_PLAN>.*?</SCENE_TECHNICAL_IMPLEMENTATION_PLAN>)',
+                'scene_animation_narration': r'(<SCENE_ANIMATION_NARRATION_PLAN>.*?</SCENE_ANIMATION_NARRATION_PLAN>)'
+            }
+            pattern = extraction_patterns.get(step_name)
+            if pattern:
+                match = re.search(pattern, response, re.DOTALL)
+                content = match.group(1) if match else response
+            else:
+                content = response
+            # Async file save
+            await self._async_file_write(output_path, content)
+            elapsed_time = time.time() - start_time
+            print(f"{step_name} for scene {scene_number} completed in {elapsed_time:.2f}s")
+            return content, output_path
+    async def _generate_rag_queries_async(self, step_name: str, args: tuple,
+                                        scene_trace_id: str, topic: str,
+                                        scene_number: int, session_id: str) -> List[Dict]:
+        """Generate RAG queries asynchronously based on step type."""
+        query_generators = {
+            'scene_vision_storyboard': self.rag_integration._generate_rag_queries_storyboard,
+            'scene_technical_implementation': self.rag_integration._generate_rag_queries_technical,
+            'scene_animation_narration': self.rag_integration._generate_rag_queries_narration
+        }
+        generator = query_generators.get(step_name)
+        if not generator:
+            return []
+        # Map args to appropriate parameters based on step
+        if step_name == 'scene_vision_storyboard':
+            scene_plan = args[3] if len(args) > 3 else ""
+            return generator(
+                scene_plan=scene_plan,
+                scene_trace_id=scene_trace_id,
+                topic=topic,
+                scene_number=scene_number,
+                session_id=session_id,
+                relevant_plugins=self.relevant_plugins
+            )
+        elif step_name == 'scene_technical_implementation':
+            storyboard = args[4] if len(args) > 4 else ""
+            return generator(
+                storyboard=storyboard,
+                scene_trace_id=scene_trace_id,
+                topic=topic,
+                scene_number=scene_number,
+                session_id=session_id,
+                relevant_plugins=self.relevant_plugins
+            )
+        elif step_name == 'scene_animation_narration':
+            storyboard = args[4] if len(args) > 4 else ""
+            return generator(
+                storyboard=storyboard,
+                scene_trace_id=scene_trace_id,
+                topic=topic,
+                scene_number=scene_number,
+                session_id=session_id,
+                relevant_plugins=self.relevant_plugins
+            )
+        return []
+    async def _generate_scene_implementation_single_enhanced(self, topic: str, description: str,
+                                                           scene_outline_i: str, scene_number: int,
+                                                           file_prefix: str, session_id: str,
+                                                           scene_trace_id: str) -> str:
+        """Enhanced single scene implementation with parallel steps."""
+        start_time = time.time()
+        print(f"Starting scene {scene_number} implementation (parallel processing)")
+        # Setup directories
+        scene_dir = os.path.join(self.output_dir, file_prefix, f"scene{scene_number}")
+        subplan_dir = os.path.join(scene_dir, "subplans")
+        await self._ensure_directories(scene_dir, subplan_dir)
+        # Save scene trace ID
+        trace_id_file = os.path.join(subplan_dir, "scene_trace_id.txt")
+        await self._async_file_write(trace_id_file, scene_trace_id)
+        # Define all steps with their configurations
+        steps_config = [
+            {
+                'name': 'scene_vision_storyboard',
+                'prompt_func': get_prompt_scene_vision_storyboard,
+                'args': (scene_number, topic, description, scene_outline_i, self.relevant_plugins),
+                'output_path': os.path.join(subplan_dir, f"{file_prefix}_scene{scene_number}_vision_storyboard_plan.txt")
+            }
+        ]
+        # Execute Step 1: Vision Storyboard (sequential dependency)
+        vision_storyboard_content, _ = await self._generate_scene_step_parallel(
+            steps_config[0]['name'],
+            steps_config[0]['prompt_func'],
+            scene_trace_id,
+            topic,
+            scene_number,
+            session_id,
+            steps_config[0]['output_path'],
+            *steps_config[0]['args']
+        )
+        # Prepare Step 2 and 3 for parallel execution (both depend on Step 1)
+        remaining_steps = [
+            {
+                'name': 'scene_technical_implementation',
+                'prompt_func': get_prompt_scene_technical_implementation,
+                'args': (scene_number, topic, description, scene_outline_i, vision_storyboard_content, self.relevant_plugins),
+                'output_path': os.path.join(subplan_dir, f"{file_prefix}_scene{scene_number}_technical_implementation_plan.txt")
+            },
+            {
+                'name': 'scene_animation_narration',
+                'prompt_func': get_prompt_scene_animation_narration,
+                'args': (scene_number, topic, description, scene_outline_i, vision_storyboard_content, None, self.relevant_plugins),
+                'output_path': os.path.join(subplan_dir, f"{file_prefix}_scene{scene_number}_animation_narration_plan.txt")
+            }
+        ]
+        # Execute Steps 2 and 3 in parallel
+        parallel_tasks = []
+        for step_config in remaining_steps:
+            task = asyncio.create_task(
+                self._generate_scene_step_parallel(
+                    step_config['name'],
+                    step_config['prompt_func'],
+                    scene_trace_id,
+                    topic,
+                    scene_number,
+                    session_id,
+                    step_config['output_path'],
+                    *step_config['args']
+                )
+            )
+            parallel_tasks.append(task)
+        # Wait for parallel tasks to complete
+        parallel_results = await asyncio.gather(*parallel_tasks)
+        technical_implementation_content = parallel_results[0][0]
+        animation_narration_content = parallel_results[1][0]
+        # Update animation narration args with technical implementation and regenerate if needed
+        if technical_implementation_content:
+            updated_animation_args = (
+                scene_number, topic, description, scene_outline_i,
+                vision_storyboard_content, technical_implementation_content, self.relevant_plugins
+            )
+            animation_narration_content, _ = await self._generate_scene_step_parallel(
+                'scene_animation_narration',
+                get_prompt_scene_animation_narration,
+                scene_trace_id,
+                topic,
+                scene_number,
+                session_id,
+                remaining_steps[1]['output_path'],
+                *updated_animation_args
+            )
+        # Combine all implementation plans
+        implementation_plan = (
+            f"{vision_storyboard_content}\n\n"
+            f"{technical_implementation_content}\n\n"
+            f"{animation_narration_content}\n\n"
+        )
+        # Ensure scene directory exists (just to be extra safe)
+        scene_dir = os.path.join(self.output_dir, file_prefix, f"scene{scene_number}")
+        await self._ensure_directories(scene_dir)
+        # Save combined implementation plan
+        combined_plan_path = os.path.join(scene_dir, f"{file_prefix}_scene{scene_number}_implementation_plan.txt")
+        combined_content = f"# Scene {scene_number} Implementation Plan\n\n{implementation_plan}"
+        try:
+            await self._async_file_write(combined_plan_path, combined_content)
+            print(f"✅ Saved implementation plan for scene {scene_number} to: {combined_plan_path}")
+        except Exception as e:
+            print(f"❌ Error saving implementation plan for scene {scene_number}: {e}")
+            raise
+        elapsed_time = time.time() - start_time
+        print(f"Scene {scene_number} implementation completed in {elapsed_time:.2f}s")
+        return implementation_plan
+    async def generate_scene_implementation_concurrently_enhanced(self, topic: str, description: str,
+                                                                plan: str, session_id: str) -> List[str]:
+        """Enhanced concurrent scene implementation with better performance."""
+        start_time = time.time()
+        # Extract scene information
+        scene_outline = extract_xml(plan)
+        scene_number = len(re.findall(r'<SCENE_(\d+)>[^<]', scene_outline))
+        file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
+        print(f"Starting implementation generation for {scene_number} scenes with max concurrency: {self.max_scene_concurrency}")
+        async def generate_single_scene_implementation(i):
+            async with self.scene_semaphore:  # Control scene-level concurrency
+                scene_regex = r'(<SCENE_{0}>.*?</SCENE_{0}>)'.format(i)
+                scene_match = re.search(
+                    scene_regex,
+                    scene_outline,
+                    re.DOTALL
+                )
+                if not scene_match:
+                    print(f"❌ Error: Could not find scene {i} in scene outline. Regex pattern: {scene_regex}")
+                    raise ValueError(f"Scene {i} not found in scene outline")
+                scene_outline_i = scene_match.group(1)
+                scene_trace_id = str(uuid.uuid4())
+                return await self._generate_scene_implementation_single_enhanced(
+                    topic, description, scene_outline_i, i, file_prefix, session_id, scene_trace_id
+                )
+        # Create tasks for all scenes
+        tasks = [generate_single_scene_implementation(i + 1) for i in range(scene_number)]
+        # Execute with progress tracking
+        print(f"Executing {len(tasks)} scene implementation tasks...")
+        try:
+            all_scene_implementation_plans = await asyncio.gather(*tasks, return_exceptions=True)
+            # Handle any exceptions
+            successful_plans = []
+            error_count = 0
+            for i, result in enumerate(all_scene_implementation_plans):
+                if isinstance(result, Exception):
+                    print(f"❌ Error in scene {i+1}: {result}")
+                    error_message = f"# Scene {i+1} - Error: {result}"
+                    successful_plans.append(error_message)
+                    # Write error to file to maintain file structure even on failure
+                    scene_dir = os.path.join(self.output_dir, file_prefix, f"scene{i+1}")
+                    os.makedirs(scene_dir, exist_ok=True)
+                    error_file_path = os.path.join(scene_dir, f"{file_prefix}_scene{i+1}_implementation_plan.txt")
+                    try:
+                        with open(error_file_path, 'w') as f:
+                            f.write(error_message)
+                    except Exception as e:
+                        print(f"❌ Failed to write error file for scene {i+1}: {e}")
+                    error_count += 1
+                else:
+                    successful_plans.append(result)
+                    print(f"✅ Successfully generated implementation plan for scene {i+1}")
+            total_time = time.time() - start_time
+            print(f"All scene implementations completed in {total_time:.2f}s")
+            print(f" Average time per scene: {total_time/len(tasks):.2f}s")
+            print(f" Success rate: {len(tasks) - error_count}/{len(tasks)} scenes ({(len(tasks) - error_count) / len(tasks) * 100:.1f}%)")
+            if error_count > 0:
+                print(f"⚠️ Warning: {error_count} scenes had errors during implementation plan generation")
+        except Exception as e:
+            print(f"❌ Fatal error during scene implementation tasks: {e}")
+            raise
+        return successful_plans
+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit - cleanup resources."""
+        self.thread_pool.shutdown(wait=True)
+    # Legacy method compatibility
+    async def generate_scene_implementation_concurrently(self, topic: str, description: str,
+                                                       plan: str, session_id: str,
+                                                       scene_semaphore=None) -> List[str]:
+        """Legacy compatibility method - redirects to enhanced version."""
+        if scene_semaphore:
+            self.scene_semaphore = scene_semaphore
+        return await self.generate_scene_implementation_concurrently_enhanced(
+            topic, description, plan, session_id
+        )
+    def _extract_scene_outline_robust(self, response_text: str) -> str:
+        """
+        Robust extraction of scene outline that handles various XML format issues.
+        This method addresses common problems:
+        1. XML wrapped in markdown code blocks
+        2. Missing closing tags
+        3. Malformed XML structure
+        4. Extra text before/after XML
+        """
+        import re
+        # First try: Look for XML wrapped in markdown code blocks
+        markdown_xml_pattern = r'```xml\s*\n(<SCENE_OUTLINE>.*?</SCENE_OUTLINE>)\s*\n```'
+        markdown_match = re.search(markdown_xml_pattern, response_text, re.DOTALL)
+        if markdown_match:
+            xml_content = markdown_match.group(1)
+            return self._validate_and_fix_xml(xml_content)
+        # Second try: Look for direct XML tags
+        direct_xml_pattern = r'(<SCENE_OUTLINE>.*?</SCENE_OUTLINE>)'
+        direct_match = re.search(direct_xml_pattern, response_text, re.DOTALL)
+        if direct_match:
+            xml_content = direct_match.group(1)
+            return self._validate_and_fix_xml(xml_content)
+        # Third try: Look for incomplete XML and attempt to fix
+        incomplete_pattern = r'<SCENE_OUTLINE>(.*?)(?:</SCENE_OUTLINE>|$)'
+        incomplete_match = re.search(incomplete_pattern, response_text, re.DOTALL)
+        if incomplete_match:
+            xml_content = incomplete_match.group(1)
+            # Add missing closing tag if needed
+            full_xml = f"<SCENE_OUTLINE>{xml_content}</SCENE_OUTLINE>"
+            return self._validate_and_fix_xml(full_xml)
+        # If no XML structure found, return the entire response but warn
+        print("⚠️ Warning: No valid XML structure found in LLM response. Using full response.")
+        print("Response preview:", response_text[:200] + "..." if len(response_text) > 200 else response_text)
+        return response_text
+    def _validate_and_fix_xml(self, xml_content: str) -> str:
+        """
+        Validate and fix common XML issues in scene outlines.
+        """
+        import re
+        # Check for unclosed scene tags
+        scene_pattern = r'<SCENE_(\d+)>'
+        scene_matches = re.findall(scene_pattern, xml_content)
+        fixed_content = xml_content
+        for scene_num in scene_matches:
+            # Check if this scene has a proper closing tag
+            open_tag = f"<SCENE_{scene_num}>"
+            close_tag = f"</SCENE_{scene_num}>"
+            # Find the position of this scene's opening tag
+            open_pos = fixed_content.find(open_tag)
+            if open_pos == -1:
+                continue
+            # Find the next scene's opening tag (if any)
+            next_scene_pattern = f"<SCENE_{int(scene_num) + 1}>"
+            next_scene_pos = fixed_content.find(next_scene_pattern, open_pos)
+            # Check if there's a closing tag before the next scene
+            close_pos = fixed_content.find(close_tag, open_pos)
+            if close_pos == -1 or (next_scene_pos != -1 and close_pos > next_scene_pos):
+                # Missing or misplaced closing tag
+                if next_scene_pos != -1:
+                    # Insert closing tag before next scene
+                    insert_pos = next_scene_pos
+                    while insert_pos > 0 and fixed_content[insert_pos - 1] in ' \n\t':
+                        insert_pos -= 1
+                    fixed_content = (fixed_content[:insert_pos] +
+                                   f"\n    {close_tag}\n\n    " +
+                                   fixed_content[insert_pos:])
+                else:
+                    # Insert closing tag at the end
+                    end_outline_pos = fixed_content.find("</SCENE_OUTLINE>")
+                    if end_outline_pos != -1:
+                        fixed_content = (fixed_content[:end_outline_pos] +
+                                       f"\n    {close_tag}\n" +
+                                       fixed_content[end_outline_pos:])
+                    else:
+                        fixed_content += f"\n    {close_tag}"
+                print(f"🔧 Fixed missing closing tag for SCENE_{scene_num}")
+        # Ensure proper SCENE_OUTLINE structure
+        if not fixed_content.strip().startswith("<SCENE_OUTLINE>"):
+            fixed_content = f"<SCENE_OUTLINE>\n{fixed_content}"
+        if not fixed_content.strip().endswith("</SCENE_OUTLINE>"):
+            fixed_content = f"{fixed_content}\n</SCENE_OUTLINE>"
+        return fixed_content
+# Update class alias for backward compatibility
+VideoPlanner = EnhancedVideoPlanner

src/core/video_renderer.py ADDED Viewed

	@@ -0,0 +1,1048 @@

+import os
+import re
+import subprocess
+import asyncio
+import concurrent.futures
+from PIL import Image
+from typing import Optional, List, Union, Dict
+import traceback
+import sys
+import time
+import json
+import hashlib
+from pathlib import Path
+import shutil
+import tempfile
+try:
+    import ffmpeg
+except ImportError:
+    print("Warning: ffmpeg-python not installed. Video combination features will be limited.")
+    ffmpeg = None
+from src.core.parse_video import (
+    get_images_from_video,
+    image_with_most_non_black_space
+)
+class OptimizedVideoRenderer:
+    """Enhanced video renderer with significant performance optimizations."""
+    def __init__(self, output_dir="output", print_response=False, use_visual_fix_code=False,
+                 max_concurrent_renders=4, enable_caching=True, default_quality="medium",
+                 use_gpu_acceleration=False, preview_mode=False):
+        """Initialize the enhanced VideoRenderer.
+        Args:
+            output_dir (str): Directory for output files
+            print_response (bool): Whether to print responses
+            use_visual_fix_code (bool): Whether to use visual fix code
+            max_concurrent_renders (int): Maximum concurrent render processes
+            enable_caching (bool): Enable intelligent caching system
+            default_quality (str): Default render quality (low/medium/high/preview)
+            use_gpu_acceleration (bool): Use GPU acceleration if available
+            preview_mode (bool): Enable preview mode for faster development
+        """
+        self.output_dir = output_dir
+        self.print_response = print_response
+        self.use_visual_fix_code = use_visual_fix_code
+        self.max_concurrent_renders = max_concurrent_renders
+        self.enable_caching = enable_caching
+        self.default_quality = default_quality
+        self.use_gpu_acceleration = use_gpu_acceleration
+        self.preview_mode = preview_mode
+        # Performance monitoring
+        self.render_stats = {
+            'total_renders': 0,
+            'cache_hits': 0,
+            'total_time': 0,
+            'average_time': 0
+        }
+        # Quality presets for faster rendering
+        self.quality_presets = {
+            'preview': {'flag': '-ql', 'fps': 15, 'resolution': '480p'},
+            'low': {'flag': '-ql', 'fps': 15, 'resolution': '480p'},
+            'medium': {'flag': '-qm', 'fps': 30, 'resolution': '720p'},
+            'high': {'flag': '-qh', 'fps': 60, 'resolution': '1080p'},
+            'production': {'flag': '-qp', 'fps': 60, 'resolution': '1440p'}
+        }
+        # Cache directory for rendered scenes
+        self.cache_dir = os.path.join(output_dir, '.render_cache')
+        if enable_caching:
+            os.makedirs(self.cache_dir, exist_ok=True)
+        # Thread pool for concurrent operations
+        self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_concurrent_renders)
+    def _get_code_hash(self, code: str) -> str:
+        """Generate hash for code to enable caching."""
+        return hashlib.md5(code.encode()).hexdigest()
+    def _get_cache_path(self, code_hash: str, quality: str) -> str:
+        """Get cache file path for given code hash and quality."""
+        return os.path.join(self.cache_dir, f"{code_hash}_{quality}.mp4")
+    def _is_cached(self, code: str, quality: str) -> Optional[str]:
+        """Check if rendered video exists in cache."""
+        if not self.enable_caching:
+            return None
+        code_hash = self._get_code_hash(code)
+        cache_path = self._get_cache_path(code_hash, quality)
+        if os.path.exists(cache_path):
+            print(f"Cache hit for code hash {code_hash[:8]}...")
+            self.render_stats['cache_hits'] += 1
+            return cache_path
+        return None
+    def _save_to_cache(self, code: str, quality: str, video_path: str):
+        """Save rendered video to cache."""
+        if not self.enable_caching or not os.path.exists(video_path):
+            return
+        code_hash = self._get_code_hash(code)
+        cache_path = self._get_cache_path(code_hash, quality)
+        try:
+            shutil.copy2(video_path, cache_path)
+            print(f"Cached render for hash {code_hash[:8]}...")
+        except Exception as e:
+            print(f"Warning: Could not cache render: {e}")
+    async def render_scene_optimized(self, code: str, file_prefix: str, curr_scene: int,
+                                   curr_version: int, code_dir: str, media_dir: str,
+                                   quality: str = None, max_retries: int = 3,
+                                   use_visual_fix_code=False, visual_self_reflection_func=None,
+                                   banned_reasonings=None, scene_trace_id=None, topic=None,
+                                   session_id=None, code_generator=None,
+                                   scene_implementation=None, description=None,
+                                   scene_outline=None) -> tuple:
+        """Optimized scene rendering with intelligent error handling and code generation fixes."""
+        start_time = time.time()
+        quality = quality or self.default_quality
+        current_code = code
+        # Check cache first
+        cached_video = self._is_cached(current_code, quality)
+        if cached_video:
+            # Copy cached video to expected location
+            expected_path = self._get_expected_video_path(file_prefix, curr_scene, curr_version, media_dir)
+            os.makedirs(os.path.dirname(expected_path), exist_ok=True)
+            shutil.copy2(cached_video, expected_path)
+            elapsed = time.time() - start_time
+            print(f"Scene {curr_scene} rendered from cache in {elapsed:.2f}s")
+            return current_code, None
+        # Optimize manim command for speed
+        file_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py")
+        # Write optimized code file
+        await self._write_code_file_async(file_path, current_code)
+        # Build optimized manim command
+        manim_cmd = self._build_optimized_command(file_path, media_dir, quality)
+        retries = 0
+        while retries < max_retries:
+            try:
+                print(f"🎬 Rendering scene {curr_scene} (quality: {quality}, attempt: {retries + 1})")
+                # Execute manim with optimizations
+                result = await asyncio.to_thread(
+                    self._run_manim_optimized,
+                    manim_cmd,
+                    file_path
+                )
+                if result.returncode != 0:
+                    raise Exception(result.stderr)
+                # Find the rendered video
+                video_path = self._find_rendered_video(file_prefix, curr_scene, curr_version, media_dir)
+                # Save to cache
+                self._save_to_cache(current_code, quality, video_path)
+                # Visual fix code processing
+                if use_visual_fix_code and visual_self_reflection_func and banned_reasonings:
+                    current_code = await self._process_visual_fix(
+                        current_code, video_path, file_prefix, curr_scene, curr_version,
+                        code_dir, visual_self_reflection_func, banned_reasonings,
+                        scene_trace_id, topic, session_id
+                    )
+                elapsed = time.time() - start_time
+                self.render_stats['total_renders'] += 1
+                self.render_stats['total_time'] += elapsed
+                self.render_stats['average_time'] = self.render_stats['total_time'] / self.render_stats['total_renders']
+                print(f"Scene {curr_scene} rendered successfully in {elapsed:.2f}s")
+                print(f"Average render time: {self.render_stats['average_time']:.2f}s")
+                return current_code, None
+            except Exception as e:
+                print(f"Render attempt {retries + 1} failed: {e}")
+                # Save error log
+                error_log_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_error_{retries}.log")
+                await self._write_error_log_async(error_log_path, str(e), retries)
+                # Instead of blind retry, try to fix the code if we have a code generator
+                if code_generator and scene_implementation and retries < max_retries - 1:
+                    print(f"🔧 Attempting to fix code using CodeGenerator (attempt {retries + 1})")
+                    try:
+                        fixed_code, fix_log = code_generator.fix_code_errors(
+                            implementation_plan=scene_implementation,
+                            code=current_code,
+                            error=str(e),
+                            scene_trace_id=scene_trace_id,
+                            topic=topic,
+                            scene_number=curr_scene,
+                            session_id=session_id
+                        )
+                        if fixed_code and fixed_code != current_code:
+                            print(f"✨ Code fix generated, updating for next attempt")
+                            current_code = fixed_code
+                            curr_version += 1
+                            # Update file path and write fixed code
+                            file_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py")
+                            await self._write_code_file_async(file_path, current_code)
+                            # Update manim command for new file
+                            manim_cmd = self._build_optimized_command(file_path, media_dir, quality)
+                            # Log the fix
+                            fix_log_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_fix_log.txt")
+                            await self._write_error_log_async(fix_log_path, fix_log or "Code fix applied", 0)
+                        else:
+                            print(f"⚠️ Code generator returned same or empty code, doing standard retry")
+                    except Exception as fix_error:
+                        print(f"❌ Code fix attempt failed: {fix_error}")
+                        # Fall back to standard retry behavior
+                retries += 1
+                if retries < max_retries:
+                    await asyncio.sleep(1)  # Brief delay before retry
+                else:
+                    return current_code, str(e)
+        return current_code, f"Failed after {max_retries} attempts"
+    def _build_optimized_command(self, file_path: str, media_dir: str, quality: str) -> List[str]:
+        """Build optimized manim command with performance flags."""
+        quality_preset = self.quality_presets.get(quality, self.quality_presets['medium'])
+        cmd = [
+            "manim",
+            "render",
+            quality_preset['flag'],  # Quality setting
+            file_path,
+            "--media_dir", media_dir,
+            "--fps", str(quality_preset['fps'])
+        ]
+        # Add caching option (only disable if needed)
+        if not self.enable_caching:
+            cmd.append("--disable_caching")
+        # Add GPU acceleration if available and enabled
+        if self.use_gpu_acceleration:
+            cmd.extend(["--renderer", "opengl"])
+        # Preview mode optimizations
+        if self.preview_mode or quality == 'preview':
+            cmd.extend([
+                "--save_last_frame",  # Only render final frame for quick preview
+                "--write_to_movie"    # Skip unnecessary file operations
+            ])
+        return cmd
+    def _run_manim_optimized(self, cmd: List[str], file_path: str) -> subprocess.CompletedProcess:
+        """Run manim command with optimizations."""
+        env = os.environ.copy()
+        # Optimize environment for performance
+        env.update({
+            'MANIM_DISABLE_CACHING': 'false' if self.enable_caching else 'true',
+            'MANIM_VERBOSITY': 'WARNING',  # Reduce log verbosity
+            'OMP_NUM_THREADS': str(os.cpu_count()),  # Use all CPU cores
+            'MANIM_RENDERER_TIMEOUT': '300'  # 5 minute timeout
+        })
+        return subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=300  # 5 minute timeout
+        )
+    async def _write_code_file_async(self, file_path: str, code: str):
+        """Asynchronously write code file."""
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        # Add optimization hints to the code
+        optimized_code = self._optimize_code_for_rendering(code)
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(optimized_code)
+    def _optimize_code_for_rendering(self, code: str) -> str:
+        """Add optimization hints to Manim code."""
+        optimizations = [
+            "",
+            "# Manim rendering optimizations",
+            "from manim import config",
+            "config.frame_rate = 30  # Balanced frame rate",
+            "config.pixel_height = 720  # Optimized resolution",
+            "config.pixel_width = 1280",
+            ""
+        ]
+        # Find the end of manim imports specifically
+        lines = code.split('\n')
+        manim_import_end = 0
+        for i, line in enumerate(lines):
+            # Look for manim-related imports
+            if (line.strip().startswith('from manim') or
+                line.strip().startswith('import manim') or
+                line.strip().startswith('from manim_')):
+                manim_import_end = i + 1
+        # If no manim imports found, look for the end of all imports
+        if manim_import_end == 0:
+            for i, line in enumerate(lines):
+                if (line.strip().startswith(('from ', 'import ')) and
+                    not line.strip().startswith('#')):
+                    manim_import_end = i + 1
+        # Insert optimization code after manim imports
+        lines[manim_import_end:manim_import_end] = optimizations
+        return '\n'.join(lines)
+    async def _write_error_log_async(self, file_path: str, error: str, attempt: int):
+        """Asynchronously write error log."""
+        timestamp = time.strftime('%Y-%m-%d %H:%M:%S')
+        log_content = f"[{timestamp}] Attempt {attempt + 1}: {error}\n"
+        with open(file_path, 'a', encoding='utf-8') as f:
+            f.write(log_content)
+    def _get_expected_video_path(self, file_prefix: str, scene: int, version: int, media_dir: str) -> str:
+        """Get expected path for rendered video."""
+        return os.path.join(
+            media_dir, "videos", f"{file_prefix}_scene{scene}_v{version}",
+            "1080p60", f"{file_prefix}_scene{scene}_v{version}.mp4"
+        )
+    def _find_rendered_video(self, file_prefix: str, scene: int, version: int, media_dir: str) -> str:
+        """Find the rendered video file."""
+        video_dir = os.path.join(media_dir, "videos", f"{file_prefix}_scene{scene}_v{version}")
+        # Look in quality-specific subdirectories
+        for quality_dir in ["1080p60", "720p30", "480p15"]:
+            search_dir = os.path.join(video_dir, quality_dir)
+            if os.path.exists(search_dir):
+                for file in os.listdir(search_dir):
+                    if file.endswith('.mp4'):
+                        return os.path.join(search_dir, file)
+        raise FileNotFoundError(f"No rendered video found for scene {scene} version {version}")
+    async def _process_visual_fix(self, code: str, video_path: str, file_prefix: str,
+                                scene: int, version: int, code_dir: str,
+                                visual_self_reflection_func, banned_reasonings: List[str],
+                                scene_trace_id: str, topic: str, session_id: str) -> str:
+        """Process visual fix code with optimization."""
+        # For Gemini/Vertex AI models, pass the video directly
+        if hasattr(self, 'scene_model') and self.scene_model.model_name.startswith(('gemini/', 'vertex_ai/')):
+            media_input = video_path
+        else:
+            # For other models, create optimized snapshot
+            media_input = await self._create_optimized_snapshot(topic, scene, version)
+        new_code, log = visual_self_reflection_func(
+            code, media_input, scene_trace_id=scene_trace_id,
+            topic=topic, scene_number=scene, session_id=session_id
+        )
+        # Save visual fix log
+        log_path = os.path.join(code_dir, f"{file_prefix}_scene{scene}_v{version}_vfix_log.txt")
+        await self._write_error_log_async(log_path, log, 0)
+        # Check for termination markers
+        if "<LGTM>" in new_code or any(word in new_code for word in banned_reasonings):
+            return code
+        # Save updated code
+        new_version = version + 1
+        new_code_path = os.path.join(code_dir, f"{file_prefix}_scene{scene}_v{new_version}.py")
+        await self._write_code_file_async(new_code_path, new_code)
+        print(f"Visual fix code saved to scene{scene}/code/{file_prefix}_scene{scene}_v{new_version}.py")
+        return new_code
+    async def render_multiple_scenes_parallel(self, scene_configs: List[Dict],
+                                           max_concurrent: int = None) -> List[tuple]:
+        """Render multiple scenes in parallel with optimized resource management."""
+        max_concurrent = max_concurrent or self.max_concurrent_renders
+        print(f"Starting parallel rendering of {len(scene_configs)} scenes (max concurrent: {max_concurrent})")
+        semaphore = asyncio.Semaphore(max_concurrent)
+        async def render_single_scene(config):
+            async with semaphore:
+                return await self.render_scene_optimized(**config)
+        start_time = time.time()
+        # Execute all renders concurrently
+        tasks = [render_single_scene(config) for config in scene_configs]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        elapsed = time.time() - start_time
+        successful = sum(1 for r in results if not isinstance(r, Exception) and r[1] is None)
+        print(f"Parallel rendering completed in {elapsed:.2f}s")
+        print(f"Success rate: {successful}/{len(scene_configs)} scenes")
+        print(f"Cache hit rate: {self.render_stats['cache_hits']}/{self.render_stats['total_renders']} ({self.render_stats['cache_hits']/max(1,self.render_stats['total_renders'])*100:.1f}%)")
+        return results
+    async def _create_optimized_snapshot(self, topic: str, scene_number: int,
+                                       version_number: int) -> Image.Image:
+        """Create optimized snapshot with async processing."""
+        file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
+        video_folder_path = os.path.join(
+            self.output_dir, file_prefix, "media", "videos",
+            f"{file_prefix}_scene{scene_number}_v{version_number}", "1080p60"
+        )
+        # Find video file
+        video_files = [f for f in os.listdir(video_folder_path) if f.endswith('.mp4')]
+        if not video_files:
+            raise FileNotFoundError(f"No mp4 files found in {video_folder_path}")
+        video_path = os.path.join(video_folder_path, video_files[0])
+        # Create snapshot asynchronously
+        return await asyncio.to_thread(
+            lambda: image_with_most_non_black_space(
+                get_images_from_video(video_path),
+                return_type="image"
+            )
+        )
+    async def combine_videos_optimized(self, topic: str, use_hardware_acceleration: bool = False) -> str:
+        """Optimized video combination with hardware acceleration and parallel processing."""
+        start_time = time.time()
+        file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
+        print(f"🎬 Starting optimized video combination for topic: {topic}")
+        print(f"🖥️ GPU Acceleration: {'Enabled' if use_hardware_acceleration else 'Disabled (CPU only)'}")
+        # Prepare paths
+        video_output_dir = os.path.join(self.output_dir, file_prefix)
+        output_video_path = os.path.join(video_output_dir, f"{file_prefix}_combined.mp4")
+        output_srt_path = os.path.join(video_output_dir, f"{file_prefix}_combined.srt")
+        # Check if already exists
+        if os.path.exists(output_video_path):
+            print(f"Combined video already exists at {output_video_path}")
+            return output_video_path
+        # Get scene information
+        scene_videos, scene_subtitles = await self._gather_scene_files_async(file_prefix)
+        if not scene_videos:
+            raise ValueError("No scene videos found to combine")
+        print(f"📹 Found {len(scene_videos)} scene videos to combine")
+        try:
+            if ffmpeg is None:
+                print("⚠️ ffmpeg-python not available, using direct FFmpeg fallback...")
+                fallback_output = await self._fallback_video_combination(scene_videos, output_video_path)
+                print(f"✅ Direct FFmpeg combination successful: {fallback_output}")
+                return fallback_output
+            # Analyze videos in parallel
+            print("🔍 Analyzing video properties...")
+            analysis_tasks = [
+                asyncio.to_thread(self._analyze_video, video)
+                for video in scene_videos
+            ]
+            video_info = await asyncio.gather(*analysis_tasks)
+            has_audio = [info['has_audio'] for info in video_info]
+            print(f"🎵 Audio tracks found: {sum(has_audio)}/{len(scene_videos)} videos")
+            # Build optimized ffmpeg command
+            if any(has_audio):
+                print("🎵 Combining videos with audio tracks...")
+                await self._combine_with_audio_optimized(
+                    scene_videos, video_info, output_video_path, use_hardware_acceleration
+                )
+            else:
+                print("🔇 Combining videos without audio...")
+                await self._combine_without_audio_optimized(
+                    scene_videos, output_video_path, use_hardware_acceleration
+                )
+            # Verify the output file was created and is valid
+            if not os.path.exists(output_video_path):
+                raise FileNotFoundError(f"Output video was not created: {output_video_path}")
+            # Check if the video file is valid
+            file_size = os.path.getsize(output_video_path)
+            if file_size < 1024:  # Less than 1KB is probably invalid
+                raise ValueError(f"Output video file seems invalid (size: {file_size} bytes)")
+            print(f"✅ Video file created successfully (size: {file_size / (1024*1024):.2f} MB)")
+            # Combine subtitles if available
+            if scene_subtitles:
+                print("📝 Combining subtitles...")
+                await self._combine_subtitles_async(scene_subtitles, scene_videos, output_srt_path)
+            elapsed = time.time() - start_time
+            print(f"🎉 Video combination completed in {elapsed:.2f}s")
+            print(f"📁 Output: {output_video_path}")
+            return output_video_path
+        except Exception as e:
+            print(f"❌ Error in optimized video combination: {e}")
+            print("🔧 Attempting fallback video combination...")
+            # Fallback to simple concatenation
+            try:
+                fallback_output = await self._fallback_video_combination(scene_videos, output_video_path)
+                print(f"✅ Fallback combination successful: {fallback_output}")
+                return fallback_output
+            except Exception as fallback_error:
+                print(f"❌ Fallback combination also failed: {fallback_error}")
+                traceback.print_exc()
+                raise
+    async def _gather_scene_files_async(self, file_prefix: str) -> tuple:
+        """Asynchronously gather scene video and subtitle files."""
+        search_path = os.path.join(self.output_dir, file_prefix, "media", "videos")
+        # Get scene count
+        scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
+        with open(scene_outline_path) as f:
+            plan = f.read()
+        scene_outline_match = re.search(r'(<SCENE_OUTLINE>.*?</SCENE_OUTLINE>)', plan, re.DOTALL)
+        if not scene_outline_match:
+            print(f"No scene outline found in plan: {plan[:200]}...")
+            return []
+        scene_outline = scene_outline_match.group(1)
+        scene_count = len(re.findall(r'<SCENE_(\d+)>[^<]', scene_outline))
+        # Find scene files in parallel
+        tasks = [
+            asyncio.to_thread(self._find_scene_files, search_path, file_prefix, scene_num)
+            for scene_num in range(1, scene_count + 1)
+        ]
+        results = await asyncio.gather(*tasks)
+        scene_videos = []
+        scene_subtitles = []
+        for video, subtitle in results:
+            if video:
+                scene_videos.append(video)
+                scene_subtitles.append(subtitle)
+        return scene_videos, scene_subtitles
+    def _find_scene_files(self, search_path: str, file_prefix: str, scene_num: int) -> tuple:
+        """Find video and subtitle files for a specific scene."""
+        scene_folders = []
+        for root, dirs, files in os.walk(search_path):
+            for dir in dirs:
+                if dir.startswith(f"{file_prefix}_scene{scene_num}"):
+                    scene_folders.append(os.path.join(root, dir))
+        if not scene_folders:
+            return None, None
+        # Get latest version
+        scene_folders.sort(key=lambda f: int(f.split("_v")[-1]) if "_v" in f else 0)
+        folder = scene_folders[-1]
+        video_file = None
+        subtitle_file = None
+        quality_dirs = ["1080p60", "720p30", "480p15"]
+        for quality_dir in quality_dirs:
+            quality_path = os.path.join(folder, quality_dir)
+            if os.path.exists(quality_path):
+                for filename in os.listdir(quality_path):
+                    if filename.endswith('.mp4') and not video_file:
+                        video_file = os.path.join(quality_path, filename)
+                    elif filename.endswith('.srt') and not subtitle_file:
+                        subtitle_file = os.path.join(quality_path, filename)
+                break
+        return video_file, subtitle_file
+    def _analyze_video(self, video_path: str) -> Dict:
+        """Analyze video properties for optimization."""
+        if ffmpeg is None:
+            # Fallback analysis using direct FFmpeg probe
+            import subprocess
+            import json
+            try:
+                cmd = [
+                    'ffprobe',
+                    '-v', 'quiet',
+                    '-print_format', 'json',
+                    '-show_streams',
+                    video_path
+                ]
+                result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+                probe_data = json.loads(result.stdout)
+                video_stream = next(stream for stream in probe_data['streams'] if stream['codec_type'] == 'video')
+                audio_streams = [stream for stream in probe_data['streams'] if stream['codec_type'] == 'audio']
+                return {
+                    'path': video_path,
+                    'duration': float(video_stream.get('duration', 0)),
+                    'has_audio': len(audio_streams) > 0,
+                    'width': int(video_stream.get('width', 1920)),
+                    'height': int(video_stream.get('height', 1080)),
+                    'fps': eval(video_stream.get('avg_frame_rate', '30/1'))
+                }
+            except Exception as e:
+                print(f"Warning: Could not analyze video {video_path}: {e}")
+                # Return default values
+                return {
+                    'path': video_path,
+                    'duration': 10.0,  # Default duration
+                    'has_audio': False,
+                    'width': 1920,
+                    'height': 1080,
+                    'fps': 30
+                }
+        probe = ffmpeg.probe(video_path)
+        video_stream = next(stream for stream in probe['streams'] if stream['codec_type'] == 'video')
+        audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio']
+        return {
+            'path': video_path,
+            'duration': float(video_stream['duration']),
+            'has_audio': len(audio_streams) > 0,
+            'width': int(video_stream['width']),
+            'height': int(video_stream['height']),
+            'fps': eval(video_stream['avg_frame_rate'])
+        }
+    async def _combine_with_audio_optimized(self, scene_videos: List[str], video_info: List[Dict],
+                                          output_path: str, use_hardware_acceleration: bool):
+        """Combine videos with audio using hardware acceleration."""
+        import ffmpeg
+        streams = []
+        for video_path, info in zip(scene_videos, video_info):
+            input_vid = ffmpeg.input(video_path)
+            if info['has_audio']:
+                streams.extend([input_vid['v'], input_vid['a']])
+            else:
+                # Add silent audio
+                silent_audio = ffmpeg.input(
+                    f'anullsrc=channel_layout=stereo:sample_rate=44100',
+                    f='lavfi', t=info['duration']
+                )['a']
+                streams.extend([input_vid['v'], silent_audio])
+        # Build optimized encoding options for maximum compatibility
+        encode_options = {
+            'c:v': 'libx264',      # Use libx264 for maximum compatibility
+            'c:a': 'aac',          # AAC audio codec
+            'preset': 'medium',    # Balanced preset for good quality/speed
+            'crf': '23',           # Good quality/speed balance
+            'pix_fmt': 'yuv420p',  # Pixel format for maximum compatibility
+            'movflags': '+faststart',  # Enable fast start for web playback
+            'r': '30',             # Set frame rate to 30fps
+            'threads': '0',        # Use all available threads
+            'profile:v': 'high',   # H.264 profile for better compatibility
+            'level': '4.0'         # H.264 level for broad device support
+        }
+        # Only use hardware acceleration if explicitly requested and working
+        if use_hardware_acceleration:
+            try:
+                # Test if NVENC is available by creating a simple test
+                test_cmd = ['ffmpeg', '-f', 'lavfi', '-i', 'testsrc=duration=1:size=320x240:rate=1',
+                           '-c:v', 'h264_nvenc', '-f', 'null', '-']
+                test_result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=10)
+                if test_result.returncode == 0:
+                    encode_options.update({
+                        'c:v': 'h264_nvenc',
+                        'preset': 'fast',      # NVENC preset
+                        'profile:v': 'high',
+                        'level': '4.0',
+                        'rc': 'constqp',       # Constant quality mode
+                        'qp': '23'            # Quality parameter
+                    })
+                    print("✅ Using NVIDIA hardware acceleration")
+                else:
+                    print("⚠️ NVIDIA hardware acceleration not available, using CPU encoding")
+            except Exception as e:
+                print(f"⚠️ Hardware acceleration test failed: {e}, using CPU encoding")
+        concat = ffmpeg.concat(*streams, v=1, a=1, unsafe=True)
+        # Run with progress monitoring
+        process = (
+            concat
+            .output(output_path, **encode_options)
+            .overwrite_output()
+            .run_async(pipe_stdout=True, pipe_stderr=True)
+        )
+        await self._monitor_ffmpeg_progress(process, "audio combination")
+    async def _combine_without_audio_optimized(self, scene_videos: List[str],
+                                             output_path: str, use_hardware_acceleration: bool):
+        """Combine videos without audio using hardware acceleration."""
+        import ffmpeg
+        streams = [ffmpeg.input(video)['v'] for video in scene_videos]
+        # Build encoding options for maximum compatibility
+        encode_options = {
+            'c:v': 'libx264',      # Use libx264 for maximum compatibility
+            'preset': 'medium',    # Balanced preset
+            'crf': '20',           # Good quality
+            'pix_fmt': 'yuv420p',  # Pixel format for maximum compatibility
+            'movflags': '+faststart',  # Enable fast start
+            'r': '30',             # Set frame rate to 30fps
+            'threads': '0',        # Use all available threads
+            'profile:v': 'high',   # H.264 profile
+            'level': '4.0'         # H.264 level
+        }
+        # Test hardware acceleration availability
+        if use_hardware_acceleration:
+            try:
+                # Test if NVENC is available
+                test_cmd = ['ffmpeg', '-f', 'lavfi', '-i', 'testsrc=duration=1:size=320x240:rate=1',
+                           '-c:v', 'h264_nvenc', '-f', 'null', '-']
+                test_result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=10)
+                if test_result.returncode == 0:
+                    encode_options.update({
+                        'c:v': 'h264_nvenc',
+                        'preset': 'fast',
+                        'profile:v': 'high',
+                        'level': '4.0',
+                        'rc': 'constqp',
+                        'qp': '20'
+                    })
+                    print("✅ Using NVIDIA hardware acceleration for video-only combination")
+                else:
+                    print("⚠️ NVIDIA hardware acceleration not available, using CPU encoding")
+            except Exception as e:
+                print(f"⚠️ Hardware acceleration test failed: {e}, using CPU encoding")
+        concat = ffmpeg.concat(*streams, v=1, unsafe=True)
+        process = (
+            concat
+            .output(output_path, **encode_options)
+            .overwrite_output()
+            .run_async(pipe_stdout=True, pipe_stderr=True)
+        )
+        await self._monitor_ffmpeg_progress(process, "video combination")
+    async def _monitor_ffmpeg_progress(self, process, operation_name: str):
+        """Monitor FFmpeg progress asynchronously."""
+        print(f"Starting {operation_name}...")
+        while True:
+            line = await asyncio.to_thread(process.stdout.readline)
+            if not line:
+                break
+            line = line.decode('utf-8')
+            if 'frame=' in line:
+                # Extract progress information
+                frame_match = re.search(r'frame=\s*(\d+)', line)
+                time_match = re.search(r'time=(\d+:\d+:\d+\.\d+)', line)
+                if frame_match and time_match:
+                    frame = frame_match.group(1)
+                    time_str = time_match.group(1)
+                    print(f"\r⚡ Processing: frame={frame}, time={time_str}", end='', flush=True)
+        stdout, stderr = await asyncio.to_thread(process.communicate)
+        print(f"\n{operation_name} completed!")
+        if process.returncode != 0:
+            raise Exception(f"FFmpeg error: {stderr.decode('utf-8')}")
+    async def _combine_subtitles_async(self, scene_subtitles: List[str],
+                                     scene_videos: List[str], output_path: str):
+        """Combine subtitles asynchronously."""
+        def combine_subtitles():
+            with open(output_path, 'w', encoding='utf-8') as outfile:
+                current_time_offset = 0
+                subtitle_index = 1
+                for srt_file, video_file in zip(scene_subtitles, scene_videos):
+                    if srt_file is None:
+                        continue
+                    with open(srt_file, 'r', encoding='utf-8') as infile:
+                        lines = infile.readlines()
+                        i = 0
+                        while i < len(lines):
+                            line = lines[i].strip()
+                            if line.isdigit():
+                                outfile.write(f"{subtitle_index}\n")
+                                subtitle_index += 1
+                                i += 1
+                                time_line = lines[i].strip()
+                                start_time, end_time = time_line.split(' --> ')
+                                def adjust_time(time_str, offset):
+                                    h, m, s = time_str.replace(',', '.').split(':')
+                                    total_seconds = float(h) * 3600 + float(m) * 60 + float(s) + offset
+                                    h = int(total_seconds // 3600)
+                                    m = int((total_seconds % 3600) // 60)
+                                    s = total_seconds % 60
+                                    return f"{h:02d}:{m:02d}:{s:06.3f}".replace('.', ',')
+                                new_start = adjust_time(start_time, current_time_offset)
+                                new_end = adjust_time(end_time, current_time_offset)
+                                outfile.write(f"{new_start} --> {new_end}\n")
+                                i += 1
+                                while i < len(lines) and lines[i].strip():
+                                    outfile.write(lines[i])
+                                    i += 1
+                                outfile.write('\n')
+                            else:
+                                i += 1
+                    # Update time offset
+                    import ffmpeg
+                    probe = ffmpeg.probe(video_file)
+                    duration = float(probe['streams'][0]['duration'])
+                    current_time_offset += duration
+        await asyncio.to_thread(combine_subtitles)
+        print(f"Subtitles combined to {output_path}")
+    def get_performance_stats(self) -> Dict:
+        """Get current performance statistics."""
+        return {
+            **self.render_stats,
+            'cache_hit_rate': self.render_stats['cache_hits'] / max(1, self.render_stats['total_renders']),
+            'cache_enabled': self.enable_caching,
+            'concurrent_renders': self.max_concurrent_renders
+        }
+    def cleanup_cache(self, max_age_days: int = 7):
+        """Clean up old cache files."""
+        if not self.enable_caching:
+            return
+        import time
+        current_time = time.time()
+        max_age_seconds = max_age_days * 24 * 60 * 60
+        for file in os.listdir(self.cache_dir):
+            file_path = os.path.join(self.cache_dir, file)
+            if os.path.getmtime(file_path) < current_time - max_age_seconds:
+                os.remove(file_path)
+                print(f"Removed old cache file: {file}")
+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        self.executor.shutdown(wait=True)
+    def render_scene(self, code: str, file_prefix: str, curr_scene: int,
+                    curr_version: int, code_dir: str, media_dir: str,
+                    use_visual_fix_code=False, visual_self_reflection_func=None,
+                    banned_reasonings=None, scene_trace_id=None, topic=None,
+                    session_id=None, code_generator=None, scene_implementation=None,
+                    description=None, scene_outline=None) -> tuple:
+        """Legacy render_scene method for backward compatibility."""
+        # Run the async method synchronously
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            result = loop.run_until_complete(
+                self.render_scene_optimized(
+                    code=code,
+                    file_prefix=file_prefix,
+                    curr_scene=curr_scene,
+                    curr_version=curr_version,
+                    code_dir=code_dir,
+                    media_dir=media_dir,
+                    use_visual_fix_code=use_visual_fix_code,
+                    visual_self_reflection_func=visual_self_reflection_func,
+                    banned_reasonings=banned_reasonings,
+                    scene_trace_id=scene_trace_id,
+                    topic=topic,
+                    session_id=session_id,
+                    code_generator=code_generator,
+                    scene_implementation=scene_implementation,
+                    description=description,
+                    scene_outline=scene_outline
+                )
+            )
+            return result
+        finally:
+            loop.close()
+    def combine_videos(self, topic: str) -> str:
+        """Legacy combine_videos method for backward compatibility."""
+        # Run the async method synchronously
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            result = loop.run_until_complete(
+                self.combine_videos_optimized(topic=topic)
+            )
+            return result
+        finally:
+            loop.close()
+    async def _fallback_video_combination(self, scene_videos: List[str], output_path: str) -> str:
+        """Simple fallback video combination using direct FFmpeg commands."""
+        print("🔧 Using fallback video combination method...")
+        # Create a temporary file list for concat demuxer
+        temp_dir = tempfile.mkdtemp()
+        file_list_path = os.path.join(temp_dir, "file_list.txt")
+        try:
+            # Write file list for concat demuxer
+            with open(file_list_path, 'w') as f:
+                for video in scene_videos:
+                    # Ensure proper path format for concat demuxer
+                    video_path = os.path.abspath(video).replace('\\', '/')
+                    f.write(f"file '{video_path}'\n")
+            print(f"📝 Created file list: {file_list_path}")
+            print(f"🎬 Combining {len(scene_videos)} videos using direct FFmpeg...")
+            # Use direct FFmpeg command for maximum compatibility
+            cmd = [
+                'ffmpeg',
+                '-f', 'concat',
+                '-safe', '0',
+                '-i', file_list_path,
+                '-c:v', 'libx264',
+                '-c:a', 'aac',
+                '-preset', 'fast',
+                '-crf', '25',
+                '-pix_fmt', 'yuv420p',
+                '-movflags', '+faststart',
+                '-avoid_negative_ts', 'make_zero',
+                '-y',  # Overwrite output file
+                output_path
+            ]
+            print(f"🔧 Running command: {' '.join(cmd)}")
+            # Run the command
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            # Monitor progress
+            async def read_stderr():
+                stderr_output = []
+                while True:
+                    line = await process.stderr.readline()
+                    if not line:
+                        break
+                    line_str = line.decode('utf-8').strip()
+                    stderr_output.append(line_str)
+                    if 'frame=' in line_str:
+                        frame_match = re.search(r'frame=\s*(\d+)', line_str)
+                        time_match = re.search(r'time=(\d+:\d+:\d+\.\d+)', line_str)
+                        if frame_match and time_match:
+                            frame = frame_match.group(1)
+                            time_str = time_match.group(1)
+                            print(f"\r🔧 Fallback processing: frame={frame}, time={time_str}", end='', flush=True)
+                return stderr_output
+            # Wait for completion
+            stderr_task = asyncio.create_task(read_stderr())
+            await process.wait()
+            stderr_output = await stderr_task
+            print(f"\n🔧 Fallback combination completed!")
+            if process.returncode != 0:
+                error_msg = '\n'.join(stderr_output)
+                print(f"❌ FFmpeg error output:\n{error_msg}")
+                raise Exception(f"Direct FFmpeg command failed with return code {process.returncode}")
+            # Verify output
+            if not os.path.exists(output_path):
+                raise FileNotFoundError(f"Fallback output video was not created: {output_path}")
+            file_size = os.path.getsize(output_path)
+            if file_size < 1024:
+                raise ValueError(f"Fallback output video file seems invalid (size: {file_size} bytes)")
+            print(f"✅ Fallback video created successfully (size: {file_size / (1024*1024):.2f} MB)")
+            return output_path
+        finally:
+            # Clean up temporary files
+            try:
+                if os.path.exists(file_list_path):
+                    os.remove(file_list_path)
+                os.rmdir(temp_dir)
+            except Exception as e:
+                print(f"⚠️ Could not clean up temp files: {e}")
+# Backward compatibility alias
+VideoRenderer = OptimizedVideoRenderer

src/rag/__init__.py ADDED Viewed

File without changes

src/rag/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (151 Bytes). View file

src/rag/__pycache__/rag_integration.cpython-312.pyc ADDED Viewed

Binary file (22.1 kB). View file

src/rag/__pycache__/vector_store.cpython-312.pyc ADDED Viewed

Binary file (23.2 kB). View file

src/rag/rag_integration.py ADDED Viewed

	@@ -0,0 +1,410 @@

+import os
+import re
+import json
+from typing import List, Dict
+from mllm_tools.utils import _prepare_text_inputs
+from task_generator import (
+    get_prompt_rag_query_generation_fix_error,
+    get_prompt_detect_plugins,
+    get_prompt_rag_query_generation_technical,
+    get_prompt_rag_query_generation_vision_storyboard,
+    get_prompt_rag_query_generation_narration,
+    get_prompt_rag_query_generation_code
+)
+from src.rag.vector_store import EnhancedRAGVectorStore as RAGVectorStore
+class RAGIntegration:
+    """Class for integrating RAG (Retrieval Augmented Generation) functionality.
+    This class handles RAG integration including plugin detection, query generation,
+    and document retrieval.
+    Args:
+        helper_model: Model used for generating queries and processing text
+        output_dir (str): Directory for output files
+        chroma_db_path (str): Path to ChromaDB
+        manim_docs_path (str): Path to Manim documentation
+        embedding_model (str): Name of embedding model to use
+        use_langfuse (bool, optional): Whether to use Langfuse logging. Defaults to True
+        session_id (str, optional): Session identifier. Defaults to None
+    """
+    def __init__(self, helper_model, output_dir, chroma_db_path, manim_docs_path, embedding_model, use_langfuse=True, session_id=None):
+        self.helper_model = helper_model
+        self.output_dir = output_dir
+        self.manim_docs_path = manim_docs_path
+        self.session_id = session_id
+        self.relevant_plugins = None
+        self.vector_store = RAGVectorStore(
+            chroma_db_path=chroma_db_path,
+            manim_docs_path=manim_docs_path,
+            embedding_model=embedding_model,
+            session_id=self.session_id,
+            use_langfuse=use_langfuse,
+            helper_model=helper_model
+        )
+    def set_relevant_plugins(self, plugins: List[str]) -> None:
+        """Set the relevant plugins for the current video.
+        Args:
+            plugins (List[str]): List of plugin names to set as relevant
+        """
+        self.relevant_plugins = plugins
+    def detect_relevant_plugins(self, topic: str, description: str) -> List[str]:
+        """Detect which plugins might be relevant based on topic and description.
+        Args:
+            topic (str): Topic of the video
+            description (str): Description of the video content
+        Returns:
+            List[str]: List of detected relevant plugin names
+        """
+        # Load plugin descriptions
+        plugins = self._load_plugin_descriptions()
+        if not plugins:
+            return []
+        # Get formatted prompt using the task_generator function
+        prompt = get_prompt_detect_plugins(
+            topic=topic,
+            description=description,
+            plugin_descriptions=json.dumps([{'name': p['name'], 'description': p['description']} for p in plugins], indent=2)
+        )
+        try:
+            response = self.helper_model(
+                _prepare_text_inputs(prompt),
+                metadata={"generation_name": "detect-relevant-plugins", "tags": [topic, "plugin-detection"], "session_id": self.session_id}
+            )            # Clean the response to ensure it only contains the JSON array
+            json_match = re.search(r'```json(.*)```', response, re.DOTALL)
+            if not json_match:
+                print(f"No JSON block found in plugin detection response: {response[:200]}...")
+                return []
+            response = json_match.group(1)
+            try:
+                relevant_plugins = json.loads(response)
+            except json.JSONDecodeError as e:
+                print(f"JSONDecodeError when parsing relevant plugins: {e}")
+                print(f"Response text was: {response}")
+                return []
+            print(f"LLM detected relevant plugins: {relevant_plugins}")
+            return relevant_plugins
+        except Exception as e:
+            print(f"Error detecting plugins with LLM: {e}")
+            return []
+    def _load_plugin_descriptions(self) -> list:
+        """Load plugin descriptions from JSON file.
+        Returns:
+            list: List of plugin descriptions, empty list if loading fails
+        """
+        try:
+            plugin_config_path = os.path.join(
+                self.manim_docs_path,
+                "plugin_docs",
+                "plugins.json"
+            )
+            if os.path.exists(plugin_config_path):
+                with open(plugin_config_path, "r") as f:
+                    return json.load(f)
+            else:
+                print(f"Plugin descriptions file not found at {plugin_config_path}")
+                return []
+        except Exception as e:
+            print(f"Error loading plugin descriptions: {e}")
+            return []
+    def _generate_rag_queries_storyboard(self, scene_plan: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None, relevant_plugins: List[str] = []) -> List[str]:
+        """Generate RAG queries from the scene plan to help create storyboard.
+        Args:
+            scene_plan (str): Scene plan text to generate queries from
+            scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None
+            topic (str, optional): Topic name. Defaults to None
+            scene_number (int, optional): Scene number. Defaults to None
+            session_id (str, optional): Session identifier. Defaults to None
+            relevant_plugins (List[str], optional): List of relevant plugins. Defaults to empty list
+        Returns:
+            List[str]: List of generated RAG queries
+        """
+        cache_key = f"{topic}_scene{scene_number}_storyboard_rag"
+        cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        cache_file = os.path.join(cache_dir, "rag_queries_storyboard.json")
+        if os.path.exists(cache_file):
+            with open(cache_file, 'r') as f:
+                return json.load(f)
+        # Format relevant plugins as a string
+        plugins_str = ", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant."
+        # Generate the prompt with only the required arguments
+        prompt = get_prompt_rag_query_generation_vision_storyboard(
+            scene_plan=scene_plan,
+            relevant_plugins=plugins_str
+        )
+        queries = self.helper_model(
+            _prepare_text_inputs(prompt),
+            metadata={"generation_name": "rag_query_generation_storyboard", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id}
+        )
+        # retreive json triple backticks
+        try: # add try-except block to handle potential json decode errors
+            json_match = re.search(r'```json(.*)```', queries, re.DOTALL)
+            if not json_match:
+                print(f"No JSON block found in storyboard RAG queries response: {queries[:200]}...")
+                return []
+            queries = json_match.group(1)
+            queries = json.loads(queries)
+        except json.JSONDecodeError as e:
+            print(f"JSONDecodeError when parsing RAG queries for storyboard: {e}")
+            print(f"Response text was: {queries}")
+            return [] # Return empty list in case of parsing error
+        # Cache the queries
+        with open(cache_file, 'w') as f:
+            json.dump(queries, f)
+        return queries
+    def _generate_rag_queries_technical(self, storyboard: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None, relevant_plugins: List[str] = []) -> List[str]:
+        """Generate RAG queries from the storyboard to help create technical implementation.
+        Args:
+            storyboard (str): Storyboard text to generate queries from
+            scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None
+            topic (str, optional): Topic name. Defaults to None
+            scene_number (int, optional): Scene number. Defaults to None
+            session_id (str, optional): Session identifier. Defaults to None
+            relevant_plugins (List[str], optional): List of relevant plugins. Defaults to empty list
+        Returns:
+            List[str]: List of generated RAG queries
+        """
+        cache_key = f"{topic}_scene{scene_number}_technical_rag"
+        cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        cache_file = os.path.join(cache_dir, "rag_queries_technical.json")
+        if os.path.exists(cache_file):
+            with open(cache_file, 'r') as f:
+                return json.load(f)
+        prompt = get_prompt_rag_query_generation_technical(
+            storyboard=storyboard,
+            relevant_plugins=", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant."
+        )
+        queries = self.helper_model(
+            _prepare_text_inputs(prompt),
+            metadata={"generation_name": "rag_query_generation_technical", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id}
+        )
+        try: # add try-except block to handle potential json decode errors
+            json_match = re.search(r'```json(.*)```', queries, re.DOTALL)
+            if not json_match:
+                print(f"No JSON block found in technical RAG queries response: {queries[:200]}...")
+                return []
+            queries = json_match.group(1)
+            queries = json.loads(queries)
+        except json.JSONDecodeError as e:
+            print(f"JSONDecodeError when parsing RAG queries for technical implementation: {e}")
+            print(f"Response text was: {queries}")
+            return [] # Return empty list in case of parsing error
+        # Cache the queries
+        with open(cache_file, 'w') as f:
+            json.dump(queries, f)
+        return queries
+    def _generate_rag_queries_narration(self, storyboard: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None, relevant_plugins: List[str] = []) -> List[str]:
+        """Generate RAG queries from the storyboard to help create narration plan.
+        Args:
+            storyboard (str): Storyboard text to generate queries from
+            scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None
+            topic (str, optional): Topic name. Defaults to None
+            scene_number (int, optional): Scene number. Defaults to None
+            session_id (str, optional): Session identifier. Defaults to None
+            relevant_plugins (List[str], optional): List of relevant plugins. Defaults to empty list
+        Returns:
+            List[str]: List of generated RAG queries
+        """
+        cache_key = f"{topic}_scene{scene_number}_narration_rag"
+        cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        cache_file = os.path.join(cache_dir, "rag_queries_narration.json")
+        if os.path.exists(cache_file):
+            with open(cache_file, 'r') as f:
+                return json.load(f)
+        prompt = get_prompt_rag_query_generation_narration(
+            storyboard=storyboard,
+            relevant_plugins=", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant."
+        )
+        queries = self.helper_model(
+            _prepare_text_inputs(prompt),
+            metadata={"generation_name": "rag_query_generation_narration", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id}
+        )
+        try: # add try-except block to handle potential json decode errors
+            json_match = re.search(r'```json(.*)```', queries, re.DOTALL)
+            if not json_match:
+                print(f"No JSON block found in narration RAG queries response: {queries[:200]}...")
+                return []
+            queries = json_match.group(1)
+            queries = json.loads(queries)
+        except json.JSONDecodeError as e:
+            print(f"JSONDecodeError when parsing narration RAG queries: {e}")
+            print(f"Response text was: {queries}")
+            return [] # Return empty list in case of parsing error
+        # Cache the queries
+        with open(cache_file, 'w') as f:
+            json.dump(queries, f)
+        return queries
+    def get_relevant_docs(self, rag_queries: List[Dict], scene_trace_id: str, topic: str, scene_number: int) -> List[str]:
+        """Get relevant documentation using the vector store.
+        Args:
+            rag_queries (List[Dict]): List of RAG queries to search for
+            scene_trace_id (str): Trace identifier for the scene
+            topic (str): Topic name
+            scene_number (int): Scene number
+        Returns:
+            List[str]: List of relevant documentation snippets
+        """
+        return self.vector_store.find_relevant_docs(
+            queries=rag_queries,
+            k=2,
+            trace_id=scene_trace_id,
+            topic=topic,
+            scene_number=scene_number
+        )
+    def _generate_rag_queries_code(self, implementation_plan: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, relevant_plugins: List[str] = None) -> List[str]:
+        """Generate RAG queries from implementation plan.
+        Args:
+            implementation_plan (str): Implementation plan text to generate queries from
+            scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None
+            topic (str, optional): Topic name. Defaults to None
+            scene_number (int, optional): Scene number. Defaults to None
+            relevant_plugins (List[str], optional): List of relevant plugins. Defaults to None
+        Returns:
+            List[str]: List of generated RAG queries
+        """
+        cache_key = f"{topic}_scene{scene_number}"
+        cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        cache_file = os.path.join(cache_dir, "rag_queries_code.json")
+        if os.path.exists(cache_file):
+            with open(cache_file, 'r') as f:
+                return json.load(f)
+        prompt = get_prompt_rag_query_generation_code(
+            implementation_plan=implementation_plan,
+            relevant_plugins=", ".join(relevant_plugins) if relevant_plugins else "No plugins are relevant."
+        )
+        try:
+            response = self.helper_model(
+                _prepare_text_inputs(prompt),
+                metadata={"generation_name": "rag_query_generation_code", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": self.session_id}
+            )
+            # Clean and parse response
+            json_match = re.search(r'```json(.*)```', response, re.DOTALL)
+            if not json_match:
+                print(f"No JSON block found in code RAG queries response: {response[:200]}...")
+                return []
+            response = json_match.group(1)
+            queries = json.loads(response)
+            # Cache the queries
+            with open(cache_file, 'w') as f:
+                json.dump(queries, f)
+            return queries
+        except Exception as e:
+            print(f"Error generating RAG queries: {e}")
+            return []
+    def _generate_rag_queries_error_fix(self, error: str, code: str, scene_trace_id: str = None, topic: str = None, scene_number: int = None, session_id: str = None) -> List[str]:
+        """Generate RAG queries for fixing code errors.
+        Args:
+            error (str): Error message to generate queries from
+            code (str): Code containing the error
+            scene_trace_id (str, optional): Trace identifier for the scene. Defaults to None
+            topic (str, optional): Topic name. Defaults to None
+            scene_number (int, optional): Scene number. Defaults to None
+            session_id (str, optional): Session identifier. Defaults to None
+        Returns:
+            List[str]: List of generated RAG queries
+        """
+        if self.relevant_plugins is None:
+            print("Warning: No plugins have been detected yet")
+            plugins_str = "No plugins are relevant."
+        else:
+            plugins_str = ", ".join(self.relevant_plugins) if self.relevant_plugins else "No plugins are relevant."
+        cache_key = f"{topic}_scene{scene_number}_error_fix"
+        cache_dir = os.path.join(self.output_dir, re.sub(r'[^a-z0-9_]+', '_', topic.lower()), f"scene{scene_number}", "rag_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        cache_file = os.path.join(cache_dir, "rag_queries_error_fix.json")
+        if os.path.exists(cache_file):
+            with open(cache_file, 'r') as f:
+                cached_queries = json.load(f)
+                print(f"Using cached RAG queries for error fix in {cache_key}")
+                return cached_queries
+        prompt = get_prompt_rag_query_generation_fix_error(
+            error=error,
+            code=code,
+            relevant_plugins=plugins_str
+        )
+        queries = self.helper_model(
+            _prepare_text_inputs(prompt),
+            metadata={"generation_name": "rag-query-generation-fix-error", "trace_id": scene_trace_id, "tags": [topic, f"scene{scene_number}"], "session_id": session_id}
+        )
+        try:
+            # retrieve json triple backticks
+            json_match = re.search(r'```json(.*)```', queries, re.DOTALL)
+            if not json_match:
+                print(f"No JSON block found in error fix RAG queries response: {queries[:200]}...")
+                return []
+            queries = json_match.group(1)
+            queries = json.loads(queries)
+        except json.JSONDecodeError as e:
+            print(f"JSONDecodeError when parsing RAG queries for error fix: {e}")
+            print(f"Response text was: {queries}")
+            return []
+        # Cache the queries
+        with open(cache_file, 'w') as f:
+            json.dump(queries, f)
+        return queries

src/rag/vector_store.py ADDED Viewed

	@@ -0,0 +1,465 @@

+import json
+import os
+import ast
+from typing import List, Dict, Tuple, Optional
+import uuid
+from langchain.schema import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import TextLoader
+from langchain_community.vectorstores import Chroma
+from langchain_text_splitters import Language
+from langchain_core.embeddings import Embeddings
+import statistics
+import tiktoken
+from tqdm import tqdm
+from langfuse import Langfuse
+from langchain_community.embeddings import HuggingFaceEmbeddings
+import re
+from mllm_tools.utils import _prepare_text_inputs
+from task_generator import get_prompt_detect_plugins
+class CodeAwareTextSplitter:
+    """Enhanced text splitter that understands code structure."""
+    def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+    def split_python_file(self, content: str, metadata: dict) -> List[Document]:
+        """Split Python files preserving code structure."""
+        documents = []
+        try:
+            tree = ast.parse(content)
+            # Extract classes and functions with their docstrings
+            for node in ast.walk(tree):
+                if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
+                    # Get the source code segment
+                    start_line = node.lineno
+                    end_line = getattr(node, 'end_lineno', start_line + 20)
+                    lines = content.split('\n')
+                    code_segment = '\n'.join(lines[start_line-1:end_line])
+                    # Extract docstring
+                    docstring = ast.get_docstring(node) or ""
+                    # Create enhanced content
+                    enhanced_content = f"""
+Type: {"Class" if isinstance(node, ast.ClassDef) else "Function"}
+Name: {node.name}
+Docstring: {docstring}
+Code:
+```python
+{code_segment}
+```
+                    """.strip()
+                    # Enhanced metadata
+                    enhanced_metadata = {
+                        **metadata,
+                        'type': 'class' if isinstance(node, ast.ClassDef) else 'function',
+                        'name': node.name,
+                        'start_line': start_line,
+                        'end_line': end_line,
+                        'has_docstring': bool(docstring),
+                        'docstring': docstring[:200] + "..." if len(docstring) > 200 else docstring
+                    }
+                    documents.append(Document(
+                        page_content=enhanced_content,
+                        metadata=enhanced_metadata
+                    ))
+            # Also create chunks for imports and module-level code
+            imports_and_constants = self._extract_imports_and_constants(content)
+            if imports_and_constants:
+                documents.append(Document(
+                    page_content=f"Module-level imports and constants:\n\n{imports_and_constants}",
+                    metadata={**metadata, 'type': 'module_level', 'name': 'imports_constants'}
+                ))
+        except SyntaxError:
+            # Fallback to regular text splitting for invalid Python
+            splitter = RecursiveCharacterTextSplitter.from_language(
+                language=Language.PYTHON,
+                chunk_size=self.chunk_size,
+                chunk_overlap=self.chunk_overlap
+            )
+            documents = splitter.split_documents([Document(page_content=content, metadata=metadata)])
+        return documents
+    def split_markdown_file(self, content: str, metadata: dict) -> List[Document]:
+        """Split Markdown files preserving structure."""
+        documents = []
+        # Split by headers while preserving hierarchy
+        sections = self._split_by_headers(content)
+        for section in sections:
+            # Extract code blocks
+            code_blocks = self._extract_code_blocks(section['content'])
+            # Create document for text content
+            text_content = self._remove_code_blocks(section['content'])
+            if text_content.strip():
+                enhanced_metadata = {
+                    **metadata,
+                    'type': 'markdown_section',
+                    'header': section['header'],
+                    'level': section['level'],
+                    'has_code_blocks': len(code_blocks) > 0
+                }
+                documents.append(Document(
+                    page_content=f"Header: {section['header']}\n\n{text_content}",
+                    metadata=enhanced_metadata
+                ))
+            # Create separate documents for code blocks
+            for i, code_block in enumerate(code_blocks):
+                enhanced_metadata = {
+                    **metadata,
+                    'type': 'code_block',
+                    'language': code_block['language'],
+                    'in_section': section['header'],
+                    'block_index': i
+                }
+                documents.append(Document(
+                    page_content=f"Code example in '{section['header']}':\n\n```{code_block['language']}\n{code_block['code']}\n```",
+                    metadata=enhanced_metadata
+                ))
+        return documents
+    def _extract_imports_and_constants(self, content: str) -> str:
+        """Extract imports and module-level constants."""
+        lines = content.split('\n')
+        relevant_lines = []
+        for line in lines:
+            stripped = line.strip()
+            if (stripped.startswith('import ') or
+                stripped.startswith('from ') or
+                (stripped and not stripped.startswith('def ') and
+                 not stripped.startswith('class ') and
+                 not stripped.startswith('#') and
+                 '=' in stripped and stripped.split('=')[0].strip().isupper())):
+                relevant_lines.append(line)
+        return '\n'.join(relevant_lines)
+    def _split_by_headers(self, content: str) -> List[Dict]:
+        """Split markdown content by headers."""
+        sections = []
+        lines = content.split('\n')
+        current_section = {'header': 'Introduction', 'level': 0, 'content': ''}
+        for line in lines:
+            header_match = re.match(r'^(#{1,6})\s+(.+)$', line)
+            if header_match:
+                # Save previous section
+                if current_section['content'].strip():
+                    sections.append(current_section)
+                # Start new section
+                level = len(header_match.group(1))
+                header = header_match.group(2)
+                current_section = {'header': header, 'level': level, 'content': ''}
+            else:
+                current_section['content'] += line + '\n'
+        # Add last section
+        if current_section['content'].strip():
+            sections.append(current_section)
+        return sections
+    def _extract_code_blocks(self, content: str) -> List[Dict]:
+        """Extract code blocks from markdown content."""
+        code_blocks = []
+        pattern = r'```(\w+)?\n(.*?)\n```'
+        for match in re.finditer(pattern, content, re.DOTALL):
+            language = match.group(1) or 'text'
+            code = match.group(2)
+            code_blocks.append({'language': language, 'code': code})
+        return code_blocks
+    def _remove_code_blocks(self, content: str) -> str:
+        """Remove code blocks from content."""
+        pattern = r'```\w*\n.*?\n```'
+        return re.sub(pattern, '', content, flags=re.DOTALL)
+class EnhancedRAGVectorStore:
+    """Enhanced RAG vector store with improved code understanding."""
+    def __init__(self,
+                 chroma_db_path: str = "chroma_db",
+                 manim_docs_path: str = "rag/manim_docs",
+                 embedding_model: str = "hf:ibm-granite/granite-embedding-30m-english",
+                 trace_id: str = None,
+                 session_id: str = None,
+                 use_langfuse: bool = True,
+                 helper_model = None):
+        self.chroma_db_path = chroma_db_path
+        self.manim_docs_path = manim_docs_path
+        self.embedding_model = embedding_model
+        self.trace_id = trace_id
+        self.session_id = session_id
+        self.use_langfuse = use_langfuse
+        self.helper_model = helper_model
+        self.enc = tiktoken.encoding_for_model("gpt-4")
+        self.plugin_stores = {}
+        self.code_splitter = CodeAwareTextSplitter()
+        self.vector_store = self._load_or_create_vector_store()
+    def _load_or_create_vector_store(self):
+        """Enhanced vector store creation with better document processing."""
+        print("Creating enhanced vector store with code-aware processing...")
+        core_path = os.path.join(self.chroma_db_path, "manim_core_enhanced")
+        if os.path.exists(core_path):
+            print("Loading existing enhanced ChromaDB...")
+            self.core_vector_store = Chroma(
+                collection_name="manim_core_enhanced",
+                persist_directory=core_path,
+                embedding_function=self._get_embedding_function()
+            )
+        else:
+            print("Creating new enhanced ChromaDB...")
+            self.core_vector_store = self._create_enhanced_core_store()
+        # Process plugins with enhanced splitting
+        plugin_docs_path = os.path.join(self.manim_docs_path, "plugin_docs")
+        if os.path.exists(plugin_docs_path):
+            for plugin_name in os.listdir(plugin_docs_path):
+                plugin_store_path = os.path.join(self.chroma_db_path, f"manim_plugin_{plugin_name}_enhanced")
+                if os.path.exists(plugin_store_path):
+                    print(f"Loading existing enhanced plugin store: {plugin_name}")
+                    self.plugin_stores[plugin_name] = Chroma(
+                        collection_name=f"manim_plugin_{plugin_name}_enhanced",
+                        persist_directory=plugin_store_path,
+                        embedding_function=self._get_embedding_function()
+                    )
+                else:
+                    print(f"Creating new enhanced plugin store: {plugin_name}")
+                    plugin_path = os.path.join(plugin_docs_path, plugin_name)
+                    if os.path.isdir(plugin_path):
+                        plugin_store = Chroma(
+                            collection_name=f"manim_plugin_{plugin_name}_enhanced",
+                            embedding_function=self._get_embedding_function(),
+                            persist_directory=plugin_store_path
+                        )
+                        plugin_docs = self._process_documentation_folder_enhanced(plugin_path)
+                        if plugin_docs:
+                            self._add_documents_to_store(plugin_store, plugin_docs, plugin_name)
+                        self.plugin_stores[plugin_name] = plugin_store
+        return self.core_vector_store
+    def _get_embedding_function(self) -> Embeddings:
+        """Enhanced embedding function with better model selection."""
+        if self.embedding_model.startswith('hf:'):
+            model_name = self.embedding_model[3:]
+            print(f"Using HuggingFaceEmbeddings with model: {model_name}")
+            # Use better models for code understanding
+            if 'code' not in model_name.lower():
+                print("Consider using a code-specific embedding model like 'microsoft/codebert-base'")
+            return HuggingFaceEmbeddings(
+                model_name=model_name,
+                model_kwargs={'device': 'cpu'},
+                encode_kwargs={'normalize_embeddings': True}
+            )
+        else:
+            raise ValueError("Only HuggingFace embeddings are supported in this configuration.")
+    def _create_enhanced_core_store(self):
+        """Create enhanced core store with better document processing."""
+        core_vector_store = Chroma(
+            collection_name="manim_core_enhanced",
+            embedding_function=self._get_embedding_function(),
+            persist_directory=os.path.join(self.chroma_db_path, "manim_core_enhanced")
+        )
+        core_docs = self._process_documentation_folder_enhanced(
+            os.path.join(self.manim_docs_path, "manim_core")
+        )
+        if core_docs:
+            self._add_documents_to_store(core_vector_store, core_docs, "manim_core_enhanced")
+        return core_vector_store
+    def _process_documentation_folder_enhanced(self, folder_path: str) -> List[Document]:
+        """Enhanced document processing with code-aware splitting."""
+        all_docs = []
+        for root, _, files in os.walk(folder_path):
+            for file in files:
+                if file.endswith(('.md', '.py')):
+                    file_path = os.path.join(root, file)
+                    try:
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                        base_metadata = {
+                            'source': file_path,
+                            'filename': file,
+                            'file_type': 'python' if file.endswith('.py') else 'markdown',
+                            'relative_path': os.path.relpath(file_path, folder_path)
+                        }
+                        if file.endswith('.py'):
+                            docs = self.code_splitter.split_python_file(content, base_metadata)
+                        else:  # .md files
+                            docs = self.code_splitter.split_markdown_file(content, base_metadata)
+                        # Add source prefix to content
+                        for doc in docs:
+                            doc.page_content = f"Source: {file_path}\nType: {doc.metadata.get('type', 'unknown')}\n\n{doc.page_content}"
+                        all_docs.extend(docs)
+                    except Exception as e:
+                        print(f"Error loading file {file_path}: {e}")
+        print(f"Processed {len(all_docs)} enhanced document chunks from {folder_path}")
+        return all_docs
+    def _add_documents_to_store(self, vector_store: Chroma, documents: List[Document], store_name: str):
+        """Enhanced document addition with better batching."""
+        print(f"Adding {len(documents)} enhanced documents to {store_name} store")
+        # Group documents by type for better organization
+        doc_types = {}
+        for doc in documents:
+            doc_type = doc.metadata.get('type', 'unknown')
+            if doc_type not in doc_types:
+                doc_types[doc_type] = []
+            doc_types[doc_type].append(doc)
+        print(f"Document types distribution: {dict((k, len(v)) for k, v in doc_types.items())}")
+        # Calculate token statistics
+        token_lengths = [len(self.enc.encode(doc.page_content)) for doc in documents]
+        print(f"Token length statistics for {store_name}: "
+              f"Min: {min(token_lengths)}, Max: {max(token_lengths)}, "
+              f"Mean: {sum(token_lengths) / len(token_lengths):.1f}, "
+              f"Median: {statistics.median(token_lengths):.1f}")
+        batch_size = 10
+        for i in tqdm(range(0, len(documents), batch_size), desc=f"Processing {store_name} enhanced batches"):
+            batch_docs = documents[i:i + batch_size]
+            batch_ids = [str(uuid.uuid4()) for _ in batch_docs]
+            vector_store.add_documents(documents=batch_docs, ids=batch_ids)
+        vector_store.persist()
+    def find_relevant_docs(self, queries: List[Dict], k: int = 5, trace_id: str = None, topic: str = None, scene_number: int = None) -> str:
+        """Find relevant documents - compatibility method that calls the enhanced version."""
+        return self.find_relevant_docs_enhanced(queries, k, trace_id, topic, scene_number)
+    def find_relevant_docs_enhanced(self, queries: List[Dict], k: int = 5, trace_id: str = None, topic: str = None, scene_number: int = None) -> str:
+        """Enhanced document retrieval with type-aware search."""
+        # Separate queries by intent
+        code_queries = [q for q in queries if any(keyword in q["query"].lower()
+                       for keyword in ["function", "class", "method", "import", "code", "implementation"])]
+        concept_queries = [q for q in queries if q not in code_queries]
+        all_results = []
+        # Search with different strategies for different query types
+        for query in code_queries:
+            results = self._search_with_filters(
+                query["query"],
+                k=k,
+                filter_metadata={'type': ['function', 'class', 'code_block']},
+                boost_code=True
+            )
+            all_results.extend(results)
+        for query in concept_queries:
+            results = self._search_with_filters(
+                query["query"],
+                k=k,
+                filter_metadata={'type': ['markdown_section', 'module_level']},
+                boost_code=False
+            )
+            all_results.extend(results)
+        # Remove duplicates and format results
+        unique_results = self._remove_duplicates(all_results)
+        return self._format_results(unique_results)
+    def _search_with_filters(self, query: str, k: int, filter_metadata: Dict = None, boost_code: bool = False) -> List[Dict]:
+        """Search with metadata filters and result boosting."""
+        # This is a simplified version - in practice, you'd implement proper filtering
+        core_results = self.core_vector_store.similarity_search_with_relevance_scores(
+            query=query, k=k, score_threshold=0.3
+        )
+        formatted_results = []
+        for result in core_results:
+            doc, score = result
+            # Boost scores for code-related results if needed
+            if boost_code and doc.metadata.get('type') in ['function', 'class', 'code_block']:
+                score *= 1.2
+            formatted_results.append({
+                "query": query,
+                "source": doc.metadata['source'],
+                "content": doc.page_content,
+                "score": score,
+                "type": doc.metadata.get('type', 'unknown'),
+                "metadata": doc.metadata
+            })
+        return formatted_results
+    def _remove_duplicates(self, results: List[Dict]) -> List[Dict]:
+        """Remove duplicate results based on content similarity."""
+        unique_results = []
+        seen_content = set()
+        for result in sorted(results, key=lambda x: x['score'], reverse=True):
+            content_hash = hash(result['content'][:200])  # Hash first 200 chars
+            if content_hash not in seen_content:
+                unique_results.append(result)
+                seen_content.add(content_hash)
+        return unique_results[:10]  # Return top 10 unique results
+    def _format_results(self, results: List[Dict]) -> str:
+        """Format results with enhanced presentation."""
+        if not results:
+            return "No relevant documentation found."
+        formatted = "## Relevant Documentation\n\n"
+        # Group by type
+        by_type = {}
+        for result in results:
+            result_type = result['type']
+            if result_type not in by_type:
+                by_type[result_type] = []
+            by_type[result_type].append(result)
+        for result_type, type_results in by_type.items():
+            formatted += f"### {result_type.replace('_', ' ').title()} Documentation\n\n"
+            for result in type_results:
+                formatted += f"**Source:** {result['source']}\n"
+                formatted += f"**Relevance Score:** {result['score']:.3f}\n"
+                formatted += f"**Content:**\n```\n{result['content'][:500]}...\n```\n\n"
+        return formatted
+# Update the existing RAGVectorStore class alias for backward compatibility
+RAGVectorStore = EnhancedRAGVectorStore

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (153 Bytes). View file

src/utils/__pycache__/kokoro_voiceover.cpython-312.pyc ADDED Viewed

Binary file (4.68 kB). View file

src/utils/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (5.7 kB). View file

src/utils/allowed_models.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "allowed_models": [
+    "gemini/gemini-1.5-pro-002",
+    "gemini/gemini-1.5-flash-002",
+    "github/gpt-4.1",
+    "gemini/gemini-2.5-flash-preview-04-17",
+    "gemini/gemini-2.0-flash-001",
+    "gemini/gemini-2.5-pro-preview-03-25",
+    "vertex_ai/gemini-1.5-flash-002",
+    "vertex_ai/gemini-1.5-pro-002",
+    "vertex_ai/gemini-2.0-flash-001",
+    "openai/o3-mini",
+    "gpt-4o",
+    "azure/gpt-4o",
+    "azure/gpt-4o-mini",
+    "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
+    "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
+    "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+    "openrouter/openai/gpt-4o",
+    "openrouter/openai/gpt-4o-mini",
+    "openrouter/openai/gpt-3.5-turbo",
+    "openrouter/anthropic/claude-3.5-sonnet",
+    "openrouter/anthropic/claude-3-haiku",
+    "openrouter/google/gemini-pro-1.5",
+    "openrouter/deepseek/deepseek-chat",
+    "openrouter/qwen/qwen-2.5-72b-instruct",
+    "openrouter/meta-llama/llama-3.1-8b-instruct:free",
+    "openrouter/microsoft/phi-3-mini-128k-instruct:free"
+  ],
+  "embedding_models": [
+    "text-embedding-ada-002",
+    "vertex_ai/text-embedding-005",
+    "azure/text-embedding-3-large",
+    "gemini/gemini-embedding-exp-03-07"
+  ]
+}

src/utils/kokoro_voiceover.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+Copyright (c) 2025 Xposed73
+All rights reserved.
+This file is part of the Manim Voiceover project.
+"""
+import hashlib
+import json
+import numpy as np
+from pathlib import Path
+from manim_voiceover.services.base import SpeechService
+from kokoro_onnx import Kokoro
+from manim_voiceover.helper import remove_bookmarks, wav2mp3
+from scipy.io.wavfile import write as write_wav
+from src.config.config import Config
+class KokoroService(SpeechService):
+    """Speech service class for kokoro_self (using text_to_speech via Kokoro ONNX)."""
+    def __init__(self, engine=None,
+                 model_path: str = Config.KOKORO_MODEL_PATH,
+                 voices_path: str = Config.KOKORO_VOICES_PATH,
+                 voice: str = Config.KOKORO_DEFAULT_VOICE,
+                 speed: float = Config.KOKORO_DEFAULT_SPEED,
+                 lang: str = Config.KOKORO_DEFAULT_LANG,
+                 **kwargs):
+        self.kokoro = Kokoro(model_path, voices_path)
+        self.voice = voice
+        self.speed = speed
+        self.lang = lang
+        if engine is None:
+            engine = self.text_to_speech  # Default to local function
+        self.engine = engine
+        super().__init__(**kwargs)
+    def get_data_hash(self, input_data: dict) -> str:
+        """
+        Generates a hash based on the input data dictionary.
+        The hash is used to create a unique identifier for the input data.
+        Parameters:
+            input_data (dict): A dictionary of input data (e.g., text, voice, etc.).
+        Returns:
+            str: The generated hash as a string.
+        """
+        # Convert the input data dictionary to a JSON string (sorted for consistency)
+        data_str = json.dumps(input_data, sort_keys=True)
+        # Generate a SHA-256 hash of the JSON string
+        return hashlib.sha256(data_str.encode('utf-8')).hexdigest()
+    def text_to_speech(self, text, output_file, voice_name, speed, lang):
+        """
+        Generates speech from text using Kokoro ONNX and saves the audio file.
+        Normalizes the audio to make it audible.
+        """
+        # Generate audio samples using Kokoro
+        samples, sample_rate = self.kokoro.create(
+            text, voice=voice_name, speed=speed, lang=lang
+        )
+        # Normalize audio to the range [-1, 1]
+        max_val = np.max(np.abs(samples))
+        if max_val > 0:
+            samples = samples / max_val
+        # Convert to 16-bit integer PCM format
+        samples = (samples * 32767).astype("int16")
+        # Save the normalized audio as a .wav file
+        write_wav(output_file, sample_rate, samples)
+        print(f"Saved at {output_file}")
+        return output_file
+    def generate_from_text(self, text: str, cache_dir: str = None, path: str = None) -> dict:
+        if cache_dir is None:
+            cache_dir = self.cache_dir
+        input_data = {"input_text": text, "service": "kokoro_self", "voice": self.voice, "lang": self.lang}
+        cached_result = self.get_cached_result(input_data, cache_dir)
+        if cached_result is not None:
+            return cached_result
+        if path is None:
+            audio_path = self.get_data_hash(input_data) + ".mp3"
+        else:
+            audio_path = path
+        # Generate .wav file using the text_to_speech function
+        audio_path_wav = str(Path(cache_dir) / audio_path.replace(".mp3", ".wav"))
+        self.engine(
+            text=text,
+            output_file=audio_path_wav,
+            voice_name=self.voice,
+            speed=self.speed,
+            lang=self.lang,
+        )
+        # Convert .wav to .mp3
+        mp3_audio_path = str(Path(cache_dir) / audio_path)
+        wav2mp3(audio_path_wav, mp3_audio_path)
+        # Remove original .wav file
+        remove_bookmarks(audio_path_wav)
+        json_dict = {
+            "input_text": text,
+            "input_data": input_data,
+            "original_audio": audio_path,
+        }
+        return json_dict

src/utils/utils.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import json
+import re
+try:
+    from pylatexenc.latexencode import utf8tolatex, UnicodeToLatexEncoder
+except:
+    print("Warning: Missing pylatexenc, please do pip install pylatexenc")
+def _print_response(response_type: str, theorem_name: str, content: str, separator: str = "=" * 50) -> None:
+    """Print formatted responses from the video generation process.
+    Prints a formatted response with separators and headers for readability.
+    Args:
+        response_type (str): Type of response (e.g., 'Scene Plan', 'Implementation Plan')
+        theorem_name (str): Name of the theorem being processed
+        content (str): The content to print
+        separator (str, optional): Separator string for visual distinction. Defaults to 50 equals signs.
+    Returns:
+        None
+    """
+    print(f"\n{separator}")
+    print(f"{response_type} for {theorem_name}:")
+    print(f"{separator}\n")
+    print(content)
+    print(f"\n{separator}")
+def _extract_code(response_text: str) -> str:
+    """Extract code blocks from a text response.
+    Extracts Python code blocks delimited by ```python markers. If no code blocks are found,
+    returns the entire response text.
+    Args:
+        response_text (str): The text response containing code blocks
+    Returns:
+        str: The extracted code blocks joined by newlines, or the full response if no blocks found
+    """
+    code = ""
+    code_blocks = re.findall(r'```python\n(.*?)\n```', response_text, re.DOTALL)
+    if code_blocks:
+        code = "\n\n".join(code_blocks)
+    elif "```" not in response_text: # if no code block, return the whole response
+        code = response_text
+    return code
+def extract_json(response: str) -> dict:
+    """Extract and parse JSON content from a text response.
+    Attempts to parse the response as JSON directly, then tries to extract JSON from code blocks
+    if direct parsing fails.
+    Args:
+        response (str): The text response containing JSON content
+    Returns:
+        dict: The parsed JSON content as a dictionary, or empty list if parsing fails
+    Note:
+        Will attempt to parse content between ```json markers first, then between generic ``` markers
+    """
+    try:
+        evaluation_json = json.loads(response)
+    except json.JSONDecodeError:
+        # If JSON parsing fails, try to extract the content between ```json and ```
+        match = re.search(r'```json\n(.*?)\n```', response, re.DOTALL)
+        if not match:
+            # If no match for ```json, try to extract content between ``` and ```
+            match = re.search(r'```\n(.*?)\n```', response, re.DOTALL)
+        if match:
+            evaluation_content = match.group(1)
+            evaluation_json = json.loads(evaluation_content)
+        else:
+            # return empty list
+            evaluation_json = []
+            print(f"Warning: Failed to extract valid JSON content from {response}")
+    return evaluation_json
+def _fix_unicode_to_latex(text: str, parse_unicode: bool = True) -> str:
+    """Convert Unicode symbols to LaTeX source code.
+    Converts Unicode subscripts and superscripts to LaTeX format, with optional full Unicode parsing.
+    Args:
+        text (str): The text containing Unicode symbols to convert
+        parse_unicode (bool, optional): Whether to perform full Unicode to LaTeX conversion. Defaults to True.
+    Returns:
+        str: The text with Unicode symbols converted to LaTeX format
+    """
+    # Map of unicode subscripts to latex format
+    subscripts = {
+        "₀": "_0", "₁": "_1", "₂": "_2", "₃": "_3", "₄": "_4",
+        "₅": "_5", "₆": "_6", "₇": "_7", "₈": "_8", "₉": "_9",
+        "₊": "_+", "₋": "_-"
+    }
+    # Map of unicode superscripts to latex format
+    superscripts = {
+        "⁰": "^0", "¹": "^1", "²": "^2", "³": "^3", "⁴": "^4",
+        "⁵": "^5", "⁶": "^6", "⁷": "^7", "⁸": "^8", "⁹": "^9",
+        "⁺": "^+", "⁻": "^-"
+    }
+    for unicode_char, latex_format in {**subscripts, **superscripts}.items():
+        text = text.replace(unicode_char, latex_format)
+    if parse_unicode:
+        text = utf8tolatex(text)
+    return text
+def extract_xml(response: str) -> str:
+    """Extract XML content from a text response.
+    Extracts XML content between ```xml markers. Returns the full response if no XML blocks found.
+    Args:
+        response (str): The text response containing XML content
+    Returns:
+        str: The extracted XML content, or the full response if no XML blocks found
+    """
+    try:
+        match = re.search(r'```xml\n(.*?)\n```', response, re.DOTALL)
+        if match:
+            return match.group(1)
+        else:
+            return response
+    except Exception:
+        return response

src/utils/visual_error_detection.py ADDED Viewed

	@@ -0,0 +1,336 @@

+"""
+Visual Error Detection Utilities for Manim Code Analysis
+This module provides utilities for detecting and analyzing visual errors in Manim animations,
+specifically focusing on element overlap, positioning issues, and spatial constraint violations.
+"""
+import re
+import logging
+from typing import Dict, List, Tuple, Any, Optional
+from pathlib import Path
+logger = logging.getLogger(__name__)
+# Visual error detection patterns
+VISUAL_ERROR_PATTERNS = {
+    'overlap_keywords': [
+        'overlap', 'overlapping', 'collision', 'colliding', 'obscured', 'hidden',
+        'blocked', 'covering', 'covered', 'behind', 'on top of'
+    ],
+    'boundary_keywords': [
+        'out of bounds', 'outside frame', 'clipped', 'cut off', 'beyond edge',
+        'outside safe area', 'margin violation', 'boundary violation'
+    ],
+    'spacing_keywords': [
+        'too close', 'insufficient spacing', 'cramped', 'crowded', 'bunched up',
+        'spacing violation', 'minimum distance', 'tight spacing'
+    ],
+    'positioning_keywords': [
+        'misaligned', 'mispositioned', 'wrong position', 'incorrect placement',
+        'poor arrangement', 'bad layout', 'disorganized'
+    ]
+}
+# Critical visual issues that require immediate fixing
+CRITICAL_VISUAL_ISSUES = [
+    'text completely obscured',
+    'formula unreadable',
+    'important element hidden',
+    'content outside frame',
+    'major overlap',
+    'critical positioning error'
+]
+# Safe area and spacing constraints (Manim units)
+VISUAL_CONSTRAINTS = {
+    'safe_area_margin': 0.5,  # Units from frame edge
+    'minimum_spacing': 0.3,   # Units between elements
+    'frame_width': 14.22,     # Manim frame width
+    'frame_height': 8.0,      # Manim frame height
+    'center_x': 0.0,          # Frame center X
+    'center_y': 0.0,          # Frame center Y
+    'x_bounds': (-7.0, 7.0),  # Safe X coordinate range
+    'y_bounds': (-4.0, 4.0)   # Safe Y coordinate range
+}
+class VisualErrorDetector:
+    """Utility class for detecting and categorizing visual errors in VLM responses."""
+    def __init__(self):
+        self.error_patterns = VISUAL_ERROR_PATTERNS
+        self.critical_issues = CRITICAL_VISUAL_ISSUES
+        self.constraints = VISUAL_CONSTRAINTS
+    def detect_error_types(self, analysis_text: str) -> Dict[str, List[str]]:
+        """
+        Detect different types of visual errors from VLM analysis text.
+        Args:
+            analysis_text: Raw text from VLM visual analysis
+        Returns:
+            Dictionary categorizing detected errors by type
+        """
+        errors = {
+            'overlap_errors': [],
+            'boundary_errors': [],
+            'spacing_errors': [],
+            'positioning_errors': [],
+            'critical_errors': []
+        }
+        analysis_lower = analysis_text.lower()
+        # Check for overlap errors
+        for keyword in self.error_patterns['overlap_keywords']:
+            if keyword in analysis_lower:
+                errors['overlap_errors'].append(self._extract_error_context(analysis_text, keyword))
+        # Check for boundary errors
+        for keyword in self.error_patterns['boundary_keywords']:
+            if keyword in analysis_lower:
+                errors['boundary_errors'].append(self._extract_error_context(analysis_text, keyword))
+        # Check for spacing errors
+        for keyword in self.error_patterns['spacing_keywords']:
+            if keyword in analysis_lower:
+                errors['spacing_errors'].append(self._extract_error_context(analysis_text, keyword))
+        # Check for positioning errors
+        for keyword in self.error_patterns['positioning_keywords']:
+            if keyword in analysis_lower:
+                errors['positioning_errors'].append(self._extract_error_context(analysis_text, keyword))
+        # Check for critical issues
+        for issue in self.critical_issues:
+            if issue in analysis_lower:
+                errors['critical_errors'].append(self._extract_error_context(analysis_text, issue))
+        # Remove empty entries and duplicates
+        for error_type in errors:
+            errors[error_type] = list(set([e for e in errors[error_type] if e]))
+        return errors
+    def _extract_error_context(self, text: str, keyword: str, context_length: int = 100) -> str:
+        """
+        Extract context around a detected error keyword.
+        Args:
+            text: Full analysis text
+            keyword: Error keyword found
+            context_length: Characters to include around keyword
+        Returns:
+            Context string around the error keyword
+        """
+        try:
+            # Find keyword position (case insensitive)
+            lower_text = text.lower()
+            keyword_pos = lower_text.find(keyword.lower())
+            if keyword_pos == -1:
+                return keyword
+            # Extract context around keyword
+            start = max(0, keyword_pos - context_length // 2)
+            end = min(len(text), keyword_pos + len(keyword) + context_length // 2)
+            context = text[start:end].strip()
+            # Clean up context
+            context = re.sub(r'\s+', ' ', context)
+            return context
+        except Exception as e:
+            logger.warning(f"Error extracting context for keyword '{keyword}': {e}")
+            return keyword
+    def categorize_severity(self, errors: Dict[str, List[str]]) -> Dict[str, str]:
+        """
+        Categorize the severity of detected visual errors.
+        Args:
+            errors: Dictionary of detected errors by type
+        Returns:
+            Dictionary mapping error types to severity levels
+        """
+        severity_map = {}
+        # Critical errors are always high severity
+        if errors['critical_errors']:
+            severity_map['critical'] = 'HIGH'
+        # Overlap errors can vary in severity
+        if errors['overlap_errors']:
+            # Check if any overlap errors mention important elements
+            important_keywords = ['text', 'formula', 'equation', 'title', 'label']
+            has_important_overlap = any(
+                any(keyword in error.lower() for keyword in important_keywords)
+                for error in errors['overlap_errors']
+            )
+            severity_map['overlap'] = 'HIGH' if has_important_overlap else 'MEDIUM'
+        # Boundary errors are typically medium to high severity
+        if errors['boundary_errors']:
+            severity_map['boundary'] = 'MEDIUM'
+        # Spacing errors are usually low to medium severity
+        if errors['spacing_errors']:
+            severity_map['spacing'] = 'LOW'
+        # Positioning errors vary based on context
+        if errors['positioning_errors']:
+            severity_map['positioning'] = 'MEDIUM'
+        return severity_map
+    def generate_fix_suggestions(self, errors: Dict[str, List[str]]) -> List[str]:
+        """
+        Generate specific code fix suggestions based on detected errors.
+        Args:
+            errors: Dictionary of detected errors by type
+        Returns:
+            List of specific fix suggestions
+        """
+        suggestions = []
+        if errors['overlap_errors']:
+            suggestions.extend([
+                "Use `.next_to()` method to position elements relative to each other with proper spacing",
+                "Apply `buff` parameter in positioning methods to ensure minimum 0.3 unit spacing",
+                "Reorganize elements into VGroups for better spatial management",
+                "Use `bring_to_front()` or `bring_to_back()` to manage z-order layering"
+            ])
+        if errors['boundary_errors']:
+            suggestions.extend([
+                "Ensure all elements are positioned within safe area bounds (-7 to 7 for X, -4 to 4 for Y)",
+                "Use `move_to(ORIGIN)` and then apply relative positioning to keep elements centered",
+                "Check element sizes and scale them down if they extend beyond frame boundaries",
+                "Apply safe area margins of 0.5 units from frame edges"
+            ])
+        if errors['spacing_errors']:
+            suggestions.extend([
+                "Use `buff=0.3` or higher in `.next_to()` methods for proper spacing",
+                "Apply `.shift()` method to adjust element positions for better spacing",
+                "Consider using `.arrange()` method for VGroups to maintain consistent spacing",
+                "Verify minimum 0.3 unit spacing between all visual elements"
+            ])
+        if errors['positioning_errors']:
+            suggestions.extend([
+                "Use relative positioning methods exclusively: `.next_to()`, `.align_to()`, `.shift()`",
+                "Position elements relative to ORIGIN, other objects, or scene margins",
+                "Ensure logical flow and visual hierarchy in element arrangement",
+                "Group related elements using VGroup for coordinated positioning"
+            ])
+        # Remove duplicates while preserving order
+        unique_suggestions = []
+        for suggestion in suggestions:
+            if suggestion not in unique_suggestions:
+                unique_suggestions.append(suggestion)
+        return unique_suggestions
+    def validate_manim_constraints(self, code: str) -> Dict[str, List[str]]:
+        """
+        Validate Manim code against spatial constraints.
+        Args:
+            code: Manim code to validate
+        Returns:
+            Dictionary of constraint violations found in code
+        """
+        violations = {
+            'absolute_coordinates': [],
+            'unsafe_positioning': [],
+            'missing_spacing': [],
+            'out_of_bounds': []
+        }
+        lines = code.split('\n')
+        for i, line in enumerate(lines, 1):
+            # Check for absolute coordinates (potential issues)
+            if re.search(r'move_to\s*\(\s*[-+]?\d+\.?\d*\s*,\s*[-+]?\d+\.?\d*', line):
+                violations['absolute_coordinates'].append(f"Line {i}: {line.strip()}")
+            # Check for potentially unsafe positioning
+            if re.search(r'shift\s*\(\s*[^)]*[5-9]\d*', line):
+                violations['unsafe_positioning'].append(f"Line {i}: Large shift detected - {line.strip()}")
+            # Check for missing buff parameters in next_to calls
+            if 'next_to' in line and 'buff' not in line:
+                violations['missing_spacing'].append(f"Line {i}: Missing buff parameter - {line.strip()}")
+            # Check for coordinates that might be out of bounds
+            coord_matches = re.findall(r'[-+]?\d+\.?\d*', line)
+            for coord in coord_matches:
+                try:
+                    val = float(coord)
+                    if abs(val) > 10:  # Potentially problematic large coordinates
+                        violations['out_of_bounds'].append(f"Line {i}: Large coordinate {val} - {line.strip()}")
+                except ValueError:
+                    continue
+        return violations
+def create_visual_fix_context(
+    errors: Dict[str, List[str]],
+    suggestions: List[str],
+    constraints: Dict[str, Any]
+) -> str:
+    """
+    Create a formatted context string for visual fix operations.
+    Args:
+        errors: Detected visual errors
+        suggestions: Fix suggestions
+        constraints: Visual constraints to enforce
+    Returns:
+        Formatted context string for LLM prompt
+    """
+    context_parts = []
+    if any(errors.values()):
+        context_parts.append("**DETECTED VISUAL ERRORS:**")
+        for error_type, error_list in errors.items():
+            if error_list:
+                error_type_formatted = error_type.replace('_', ' ').title()
+                context_parts.append(f"\n{error_type_formatted}:")
+                for error in error_list:
+                    context_parts.append(f"  - {error}")
+    if suggestions:
+        context_parts.append("\n\n**RECOMMENDED FIXES:**")
+        for i, suggestion in enumerate(suggestions, 1):
+            context_parts.append(f"{i}. {suggestion}")
+    context_parts.append("\n\n**SPATIAL CONSTRAINTS TO ENFORCE:**")
+    context_parts.append(f"- Safe area margin: {constraints['safe_area_margin']} units from edges")
+    context_parts.append(f"- Minimum spacing: {constraints['minimum_spacing']} units between elements")
+    context_parts.append(f"- X coordinate bounds: {constraints['x_bounds']}")
+    context_parts.append(f"- Y coordinate bounds: {constraints['y_bounds']}")
+    return '\n'.join(context_parts)
+# Export main utilities
+__all__ = [
+    'VisualErrorDetector',
+    'VISUAL_ERROR_PATTERNS',
+    'CRITICAL_VISUAL_ISSUES',
+    'VISUAL_CONSTRAINTS',
+    'create_visual_fix_context'
+]

task_generator/__init__.py ADDED Viewed

	@@ -0,0 +1,297 @@

+from .prompts_raw import (
+    _prompt_code_generation,
+    _prompt_fix_error,
+    _prompt_visual_fix_error,
+    _prompt_scene_plan,
+    _prompt_scene_vision_storyboard,
+    _prompt_scene_technical_implementation,
+    _prompt_scene_animation_narration,
+    _prompt_animation_simple,
+    _prompt_animation_fix_error,
+    _prompt_animation_rag_query_generation,
+    _prompt_animation_rag_query_generation_fix_error,
+    _banned_reasonings,
+    _prompt_context_learning_scene_plan,
+    _prompt_context_learning_vision_storyboard,
+    _prompt_context_learning_technical_implementation,
+    _prompt_context_learning_animation_narration,
+    _prompt_context_learning_code,
+    _prompt_detect_plugins,
+    _prompt_rag_query_generation_code,
+    _prompt_rag_query_generation_vision_storyboard,
+    _prompt_rag_query_generation_technical,
+    _prompt_rag_query_generation_narration,
+    _prompt_rag_query_generation_fix_error
+)
+from typing import Union, List
+def get_prompt_scene_plan(topic: str, description: str) -> str:
+    """
+    Generate a prompt for scene planning based on the given parameters.
+    Args:
+        topic (str): The topic of the video.
+        description (str): A brief description of the video content.
+    Returns:
+        str: The formatted prompt for scene planning.
+    """
+    prompt = _prompt_scene_plan.format(topic=topic, description=description)
+    return prompt
+def get_prompt_scene_vision_storyboard(scene_number: int, topic: str, description: str, scene_outline: str, relevant_plugins: List[str]) -> str:
+    prompt = _prompt_scene_vision_storyboard.format(
+        scene_number=scene_number,
+        topic=topic,
+        description=description,
+        scene_outline=scene_outline,
+        relevant_plugins=", ".join(relevant_plugins)
+    )
+    return prompt
+def get_prompt_scene_technical_implementation(scene_number: int, topic: str, description: str, scene_outline: str, scene_vision_storyboard: str, relevant_plugins: List[str], additional_context: Union[str, List[str]] = None) -> str:
+    prompt = _prompt_scene_technical_implementation.format(
+        scene_number=scene_number,
+        topic=topic,
+        description=description,
+        scene_outline=scene_outline,
+        scene_vision_storyboard=scene_vision_storyboard,
+        relevant_plugins=", ".join(relevant_plugins)
+    )
+    if additional_context is not None:
+        if isinstance(additional_context, str):
+            prompt += f"\nAdditional context: {additional_context}"
+        elif isinstance(additional_context, list):
+            prompt += f"\nAdditional context: {additional_context[0]}"
+            if len(additional_context) > 1:
+                prompt += f"\n" + "\n".join(additional_context[1:])
+    return prompt
+def get_prompt_scene_animation_narration(scene_number: int, topic: str, description: str, scene_outline: str, scene_vision_storyboard: str, technical_implementation_plan: str, relevant_plugins: List[str]) -> str:
+    prompt = _prompt_scene_animation_narration.format(
+        scene_number=scene_number,
+        topic=topic,
+        description=description,
+        scene_outline=scene_outline,
+        scene_vision_storyboard=scene_vision_storyboard,
+        technical_implementation_plan=technical_implementation_plan,
+        relevant_plugins=", ".join(relevant_plugins)
+    )
+    return prompt
+def get_prompt_code_generation(topic: str,
+                               description: str,
+                               scene_outline: str,
+                               scene_implementation: str,
+                               scene_number: int,
+                               additional_context: Union[str, List[str]] = None) -> str:
+    """
+    Generate a prompt for code generation based on the given video plan and implementation details.
+    Args:
+        topic (str): The topic of the video.
+        description (str): A brief description of the video content.
+        scene_outline (str): The scene outline.
+        scene_implementation (str): The detailed scene implementation.
+        scene_number (int): The scene number
+        additional_context (Union[str, List[str]]): Additional context to include in the prompt
+    Returns:
+        str: The formatted prompt for code generation.
+    """
+    prompt = _prompt_code_generation.format(
+        topic=topic,
+        description=description,
+        scene_outline=scene_outline,
+        scene_implementation=scene_implementation,
+        scene_number=scene_number
+    )
+    if additional_context is not None:
+        if isinstance(additional_context, str):
+            prompt += f"\nAdditional context: {additional_context}"
+        elif isinstance(additional_context, list):
+            prompt += f"\nAdditional context: {additional_context[0]}"
+            if len(additional_context) > 1:
+                prompt += f"\n" + "\n".join(additional_context[1:])
+    return prompt
+def get_prompt_fix_error(implementation_plan: str, manim_code: str, error: str, additional_context: Union[str, List[str]] = None) -> str:
+    """
+    Generate a prompt to fix errors in the given manim code.
+    Args:
+        implementation_plan (str): The implementation plan of the scene.
+        code (str): The manim code with errors.
+        error (str): The error message encountered.
+    Returns:
+        str: The formatted prompt to fix the code errors.
+    """
+    prompt = _prompt_fix_error.format(
+        implementation_plan=implementation_plan,
+        manim_code=manim_code,
+        error_message=error
+    )
+    if additional_context is not None:
+        if isinstance(additional_context, str):
+            prompt += f"\nAdditional context: {additional_context}"
+        elif isinstance(additional_context, list) and additional_context:
+            prompt += f"\nAdditional context: {additional_context[0]}"
+            if len(additional_context) > 1:
+                prompt += f"\n" + "\n".join(additional_context[1:])
+    return prompt
+def get_prompt_visual_fix_error(implementation: str, generated_code: str) -> str:
+    prompt = _prompt_visual_fix_error.format(
+        implementation=implementation,
+        generated_code=generated_code
+    )
+    return prompt
+def get_banned_reasonings() -> List[str]:
+    return _banned_reasonings.split("\n")
+def get_prompt_rag_query_generation_vision_storyboard(scene_plan: str, relevant_plugins: str) -> str:
+    prompt = _prompt_rag_query_generation_vision_storyboard.format(
+        scene_plan=scene_plan,
+        relevant_plugins=relevant_plugins
+    )
+    return prompt
+def get_prompt_rag_query_generation_technical(storyboard: str, relevant_plugins: str) -> str:
+    """For generating RAG queries during storyboard to technical implementation stage"""
+    prompt = _prompt_rag_query_generation_technical.format(
+        storyboard=storyboard,
+        relevant_plugins=relevant_plugins
+    )
+    return prompt
+def get_prompt_rag_query_generation_narration(storyboard: str, relevant_plugins: str) -> str:
+    """For generating RAG queries during storyboard to narration stage"""
+    prompt = _prompt_rag_query_generation_narration.format(
+        storyboard=storyboard,
+        relevant_plugins=relevant_plugins
+    )
+    return prompt
+def get_prompt_rag_query_generation_code(implementation_plan: str, relevant_plugins: str) -> str:
+    """For generating RAG queries during technical implementation to code generation stage"""
+    prompt = _prompt_rag_query_generation_code.format(
+        implementation_plan=implementation_plan,
+        relevant_plugins=relevant_plugins
+    )
+    return prompt
+def get_prompt_rag_query_generation_fix_error(error: str, code: str, relevant_plugins: str) -> str:
+    prompt = _prompt_rag_query_generation_fix_error.format(
+        error=error,
+        code=code,
+        relevant_plugins=relevant_plugins
+    )
+    return prompt
+def get_prompt_context_learning_scene_plan(examples: str) -> str:
+    prompt = _prompt_context_learning_scene_plan.format(
+        examples=examples
+    )
+    return prompt
+def get_prompt_context_learning_vision_storyboard(examples: str) -> str:
+    prompt = _prompt_context_learning_vision_storyboard.format(
+        examples=examples
+    )
+    return prompt
+def get_prompt_context_learning_technical_implementation(examples: str) -> str:
+    prompt = _prompt_context_learning_technical_implementation.format(
+        examples=examples
+    )
+    return prompt
+def get_prompt_context_learning_animation_narration(examples: str) -> str:
+    prompt = _prompt_context_learning_animation_narration.format(
+        examples=examples
+    )
+    return prompt
+def get_prompt_context_learning_code(examples: str) -> str:
+    prompt = _prompt_context_learning_code.format(
+        examples=examples
+    )
+    return prompt
+def get_prompt_detect_plugins(topic: str, description: str, plugin_descriptions: str) -> str:
+    """
+    Generate a prompt for detecting relevant plugins based on topic and description.
+    Args:
+        topic (str): The video topic
+        description (str): The video description
+        plugin_descriptions (str): JSON string of available plugin descriptions
+    Returns:
+        str: The formatted prompt for plugin detection
+    """
+    prompt = _prompt_detect_plugins.format(
+        topic=topic,
+        description=description,
+        plugin_descriptions=plugin_descriptions
+    )
+    return prompt
+def get_prompt_animation(topic: str, description: str, additional_context: Union[str, List[str]] = None) -> str:
+    prompt = _prompt_animation_simple.format(
+        topic=topic,
+        description=description
+    )
+    if additional_context is not None:
+        if isinstance(additional_context, str):
+            prompt += f"\nAdditional context: {additional_context}"
+        elif isinstance(additional_context, list) and additional_context:
+            prompt += f"\nAdditional context: {additional_context[0]}"
+            if len(additional_context) > 1:
+                prompt += f"\n" + "\n".join(additional_context[1:])
+    return prompt
+def get_prompt_animation_fix_error(text_explanation: str, manim_code: str, error: str, additional_context: Union[str, List[str]] = None) -> str:
+    """
+    Generate a prompt to fix errors in the given manim code.
+    Args:
+        text_explanation (str): The implementation plan of the scene.
+        code (str): The manim code with errors.
+        error (str): The error message encountered.
+    Returns:
+        str: The formatted prompt to fix the code errors.
+    """
+    prompt = _prompt_animation_fix_error.format(
+        text_explanation=text_explanation,
+        manim_code=manim_code,
+        error_message=error
+    )
+    if additional_context is not None:
+        if isinstance(additional_context, str):
+            prompt += f"\nAdditional context: {additional_context}"
+        elif isinstance(additional_context, list):
+            prompt += f"\nAdditional context: {additional_context[0]}"
+            if len(additional_context) > 1:
+                prompt += f"\n" + "\n".join(additional_context[1:])
+    return prompt
+def get_prompt_animation_rag_query_generation(topic: str, context: str, relevant_plugins: str) -> str:
+    if context is None:
+        context = ""
+    prompt = _prompt_animation_rag_query_generation.format(
+        topic=topic,
+        context=context,
+        relevant_plugins=relevant_plugins
+    )
+    return prompt
+def get_prompt_animation_rag_query_generation_fix_error(text_explanation: str, error: str, code: str) -> str:
+    prompt = _prompt_animation_rag_query_generation_fix_error.format(
+        text_explanation=text_explanation,
+        error=error,
+        code=code
+    )
+    return prompt

task_generator/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (12.7 kB). View file

task_generator/parse_prompt.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from tqdm import tqdm
+def call_parse_prompt():
+    """
+    Find the prompts_raw directory and generate an __init__.py file containing prompt texts.
+    Searches for prompts_raw directory in current and parent directories. Once found,
+    calls create_python_file_with_texts() to generate the __init__.py file.
+    """
+    current_file_path = os.path.abspath(__file__)
+    current_folder_path = os.path.dirname(current_file_path)
+    folder_path = os.path.join(current_folder_path, "prompts_raw")
+    # If prompts_raw not found in current directory, search parent directories
+    if not os.path.exists(folder_path):
+        parent_dir = current_folder_path
+        while parent_dir != os.path.dirname(parent_dir):  # Stop at root directory
+            parent_dir = os.path.dirname(parent_dir)
+            test_path = os.path.join(parent_dir, "prompts_raw")
+            if os.path.exists(test_path):
+                folder_path = test_path
+                break
+    output_file = os.path.join(folder_path, "__init__.py")
+    create_python_file_with_texts(folder_path, output_file)
+def create_python_file_with_texts(folder_path: str, output_file: str) -> None:
+    """
+    Generate a Python file containing prompt texts from .txt files.
+    Args:
+        folder_path (str): Path to directory containing prompt .txt files
+        output_file (str): Path where the generated Python file will be saved
+    The function reads all .txt files in the given folder, converts their contents
+    into Python variables, and writes them to the output file. Variable names are
+    derived from file paths with special characters replaced.
+    """
+    with open(output_file, 'w', encoding='utf-8') as out_file:
+        out_file.write("# This file is generated automatically through parse_prompt.py\n\n")
+        txt_files = [file for root, dirs, files in os.walk(folder_path) for file in files if file.endswith(".txt")]
+        for file in tqdm(txt_files, desc="Processing files"):
+            file_path = os.path.join(folder_path, file)
+            var_name = "_" + file_path.replace(folder_path, "").replace(os.sep, "_").replace(".txt", "").strip("_")
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read().replace('"""', '\"\"\"')
+                out_file.write(f'{var_name} = """{content}"""\n\n')
+if __name__ == "__main__":
+    call_parse_prompt()

task_generator/prompts_raw/__init__.py ADDED Viewed

The diff for this file is too large to render. See raw diff

task_generator/prompts_raw/__pycache__/__init__.cpython-312.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4e8e3c61296436f102ac597f09dfe31ad67a0820ad9160cb4be90486d090b27
+size 120229

task_generator/prompts_raw/banned_reasonings.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+evaluation cannot
+can't assist
+cannot assist
+can't provide
+cannot provide
+can't evaluate
+cannot evaluate
+cannot be evaluated
+cannot be rated
+cannot be completed
+cannot be assessed
+cannot be scored
+cannot be conducted
+unable to evaluate
+do not have the capability
+do not have the ability
+are photographs and not AI-generated
+unable to provide the evaluation

task_generator/prompts_raw/code_background.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ PLEASE DO NOT create another color background Rectangles. Default background (Black) is enough.
2	+ PLEASE DO NOT use BLACK color for any text.

task_generator/prompts_raw/code_color_cheatsheet.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+MUST include the following color definitions if you use the colors in your code. ONLY USE THE COLORS BELOW.
+WHITE = '#FFFFFF'
+RED = '#FF0000'
+GREEN = '#00FF00'
+BLUE = '#0000FF'
+YELLOW = '#FFFF00'
+CYAN = '#00FFFF'
+MAGENTA = '#FF00FF'
+ORANGE = '#FFA500'
+PURPLE = '#800080'
+PINK = '#FFC0CB'
+BROWN = '#A52A2A'
+GRAY = '#808080'
+TEAL = '#008080'
+NAVY = '#000080'
+OLIVE = '#808000'
+MAROON = '#800000'
+LIME = '#00FF00'
+AQUA = '#00FFFF'
+FUCHSIA = '#FF00FF'
+SILVER = '#C0C0C0'
+GOLD = '#FFD700'

task_generator/prompts_raw/code_disable.txt ADDED Viewed

File without changes

task_generator/prompts_raw/code_font_size.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+If there is title text, font size is highly recommended to be 28.
+If there are side labels, font size is highly recommended to be 24.
+If there are formulas, font size is highly recommended to be 24.
+However, if the text has more than 10 words, font size should be reduced further and mutiple lines should be used.

task_generator/prompts_raw/code_limit.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+Note that the frame width and height are 14.222222222222221 and 8.0 respectively. And the center of the frame is (0, 0, 0).
+It means to avoid putting any object out of the frame, you should limit the x and y coordinates of the objects.
+limit x to be within -7.0 and 7.0 for objects, and limit y to be within -4.0 and 4.0 for objects.
+Place the objects near the center of the frame, without overlapping with each other.

task_generator/prompts_raw/prompt_animation_fix_error.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+You are an expert Manim developer specializing in debugging and error resolution. Analyze the provided code and error message to provide a comprehensive fix and explanation.
+<CONTEXT>
+Text Explanation:
+{text_explanation}
+Manim Code Animation to complement the Text Explanation:
+```python
+{manim_code}
+```
+Error Message on code running:
+{error_message}
+</CONTEXT>
+You MUST only output the following format (make sure to include the ```python and ``` in the code):
+<ERROR_ANALYSIS>
+Error Type: [Syntax/Runtime/Logic/Other]
+Error Location: [File/Line number/Component]
+Root Cause: [Brief explanation of what caused the error]
+Impact: [What functionality is affected]
+</ERROR_ANALYSIS>
+<SOLUTION>
+[FIXES_REQUIRED]
+- Fix 1: [Description]
+  - Location: [Where to apply]
+  - Change: [What to modify]
+- Fix 2: [If applicable]
+  ...
+[CORRECTED_CODE]
+```python
+# Complete corrected and fully implemented code, don't be lazy
+# Include all necessary imports, definitions, and any additional code for the script to run successfully
+```
+</SOLUTION>
+Requirements:
+1. Provide complete error analysis with specific line numbers where possible.
+2. Include exact instructions for every code change.
+3. Ensure that the [CORRECTED_CODE] section contains complete, executable Python code (not just code snippets). Do not assume context from the prompt.
+4. Explain why the error occurred in plain language.
+5. Include verification steps to confirm the error is resolved.
+6. Suggest preventive measures for avoiding similar errors in the future.
+7. If external assets (e.g., images, audio, video) are referenced, remove them.
+8. Preserve all original code that is not causing the reported error. Do not remove or alter any intentional elements unnecessarily.
+9. Follow best practices for code clarity and the current Manim version.

task_generator/prompts_raw/prompt_animation_rag_query_generation.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a topic for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins.
+Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely to be used to implement the scene, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the *entire* scene plan. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins.
+Here is the Topic (and the context):
+{topic}. {context}
+Based on the topic and the context, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation.
+**Specifically, ensure that:**
+1. At least 1-2 queries are focused on retrieving information about Manim *function usage* in Manim scenes
+2. If the topic and the context can be linked to the use of plugin functionality, include at least 1 query specifically targeting plugin documentation
+3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant
+The above text explanations are relevant to these plugins: {relevant_plugins}
+Output the queries in the following format:
+```json
+[
+    {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}},
+]
+```

task_generator/prompts_raw/prompt_animation_rag_query_generation_fix_error.txt ADDED Viewed

	@@ -0,0 +1,33 @@

+You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a complete implementation plan for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins. The implementation plan will describe the scene's vision, technical implementation, and animation strategy.
+Here is the Text Explanation (Implementation Plan) as the context:
+{text_explanation}
+The error message will describe a problem encountered while running Manim code. Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely related to the error, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the error message itself and the code snippet that produced the error. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins.
+Here is the error message and the code snippet:
+**Error Message:**
+{error}
+**Code Snippet:**
+{code}
+Based on the error message and the code snippet, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation to fix this error. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the error and its potential solutions.
+**Specifically, ensure that:**
+1. At least 1-2 queries are focused on retrieving information about Manim *function or class usage* that might be causing the error.
+2. If the error message or code suggests the use of plugin functionality, include at least 1 query specifically targeting plugin documentation related to the error.
+3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant.
+Output the queries in the following format:
+[
+    {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}},
+    {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}},
+]

task_generator/prompts_raw/prompt_animation_simple.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+Given a topic and the context, you need to explain the topic by text.
+Also generate a Manim script that visually illustrates a key aspect of {topic} without including explanatory text in the animation itself.
+Your text can mention the animation, but it should not be the main focus.
+Context about the topic {topic}: {description}.
+The animation should focus on:
+* Illustrating the significant part of the theorem or concept – Use geometric figures, graphs, number lines, or any relevant visualization.
+* Providing an intuitive example – Instead of proving the theorem, show a concrete example or transformation that visually supports understanding.
+* Separately, provide a written explanation of the theorem as text that can be displayed outside the animation.
+Ensure that:
+* The animation is concise.
+* The Manim code is compatible with the latest version of community manim.
+* The visual elements are clear and enhance understanding.
+Please provide the only output as:
+1. A text explanation of the theorem.
+2. A complete Manim script that generates the animation. Only give the code.
+Output format:
+(Text Explanation Output)
+--- (split by ---)
+(Manim Code Output)
+Please do not include any other text or headers in your output.
+Only use one --- to split the text explanation and the Manim code.

task_generator/prompts_raw/prompt_best_practices.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+# Best practices for generating educational videos with manim
+1. Specify positions as relative to other objects whenever it makes sense.
+   * For example, if you want to place a label for a geometric object.
+2. Objects should be of different color from the black background.
+3. Keep the text on screen concise.
+   * On-screen elements should focus on showcasing the concept, examples and visuals. Labels and illustrative text are still encouraged.
+   * For explanations and observations, prefer narrations over on-screen text.
+   * You should still show calculations and algorithms in full on screen.
+   * For examples and practice problems, it is reasonable to show more text, especially key statements.
+   * Longer text should appear smaller to fit on screen.
+4. To control the timing of objects appearing:
+   * `add` has instantaneous effect, best used for the initial setup of the scene.
+   * Animations are best used during narration.
+   * Make sure the animations make sense. If an object is already on screen, it makes no sense to fade it in or create it again.
+5. Use TeX or MathTeX whenever you want to display math, including symbols and formulas.