Upload 11 files
Browse files- .gitignore +89 -0
- LICENSE +21 -0
- README.md +348 -10
- create_embeddings.py +491 -0
- docker-compose.yml +80 -0
- dockerfile +126 -0
- evaluate.py +474 -0
- generate_video.py +990 -0
- gradio_app.log +0 -0
- gradio_app.py +925 -0
- requirements.txt +99 -0
.gitignore
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
env/
|
12 |
+
build/
|
13 |
+
develop-eggs/
|
14 |
+
dist/
|
15 |
+
downloads/
|
16 |
+
eggs/
|
17 |
+
.eggs/
|
18 |
+
lib/
|
19 |
+
lib64/
|
20 |
+
parts/
|
21 |
+
sdist/
|
22 |
+
var/
|
23 |
+
*.egg-info/
|
24 |
+
.installed.cfg
|
25 |
+
*.egg
|
26 |
+
|
27 |
+
# Installer logs
|
28 |
+
debug.log
|
29 |
+
pip-log.txt
|
30 |
+
pip-delete-this-directory.txt
|
31 |
+
|
32 |
+
# Unit test / coverage reports
|
33 |
+
htmlcov/
|
34 |
+
.tox/
|
35 |
+
.nox/
|
36 |
+
.coverage
|
37 |
+
.coverage.*
|
38 |
+
.cache
|
39 |
+
nosetests.xml
|
40 |
+
coverage.xml
|
41 |
+
*.cover
|
42 |
+
.hypothesis/
|
43 |
+
.pytest_cache/
|
44 |
+
|
45 |
+
# Jupyter Notebook
|
46 |
+
.ipynb_checkpoints
|
47 |
+
|
48 |
+
# pyenv
|
49 |
+
.python-version
|
50 |
+
|
51 |
+
# VS Code
|
52 |
+
.vscode/
|
53 |
+
|
54 |
+
# mypy
|
55 |
+
.mypy_cache/
|
56 |
+
.dmypy.json
|
57 |
+
|
58 |
+
# Pyre type checker
|
59 |
+
.pyre/
|
60 |
+
|
61 |
+
# Output and models
|
62 |
+
|
63 |
+
output/
|
64 |
+
models/
|
65 |
+
|
66 |
+
# Environment files
|
67 |
+
.env
|
68 |
+
.env.*
|
69 |
+
*.env
|
70 |
+
|
71 |
+
# OS files
|
72 |
+
.DS_Store
|
73 |
+
Thumbs.db
|
74 |
+
|
75 |
+
# Log files
|
76 |
+
*.log
|
77 |
+
|
78 |
+
# Docker
|
79 |
+
*.pid
|
80 |
+
*.sock
|
81 |
+
|
82 |
+
# Ignore thumbnails
|
83 |
+
thumbnails/
|
84 |
+
|
85 |
+
# Ignore docker build cache
|
86 |
+
__docker_build_cache__/
|
87 |
+
|
88 |
+
# Ignore Miniconda
|
89 |
+
/opt/conda/
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 TIGER Lab
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,10 +1,348 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# TheoremExplainAgent (TEA) 🍵
|
2 |
+
[](https://arxiv.org/abs/2502.19400)
|
3 |
+
<a href='https://huggingface.co/papers/2502.19400'><img src='https://img.shields.io/static/v1?label=Paper&message=Huggingface&color=orange'></a>
|
4 |
+
|
5 |
+
[**🌐 Homepage**](https://tiger-ai-lab.github.io/TheoremExplainAgent/) | [**📖 arXiv**](https://arxiv.org/abs/2502.19400) | [**🤗 HuggingFace Dataset**](https://huggingface.co/datasets/TIGER-Lab/TheoremExplainBench)
|
6 |
+
|
7 |
+
[](https://github.com/TIGER-AI-Lab/TheoremExplainAgent/graphs/contributors)
|
8 |
+
[](https://github.com/TIGER-AI-Lab/TheoremExplainAgent/blob/main/LICENSE)
|
9 |
+
[](https://github.com/TIGER-AI-Lab/TheoremExplainAgent)
|
10 |
+
[](https://hits.seeyoufarm.com)
|
11 |
+
|
12 |
+
This repo contains the codebase for our paper [TheoremExplainAgent: Towards Multimodal Explanations for LLM Theorem Understanding](https://arxiv.org/abs/2502.19400)
|
13 |
+
|
14 |
+
## Introduction
|
15 |
+
TheoremExplainAgent is an AI system that generates long-form Manim videos to visually explain theorems, proving its deep understanding while uncovering reasoning flaws that text alone often hides.
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
https://github.com/user-attachments/assets/17f2f4f2-8f2c-4abc-b377-ac92ebda69f3
|
20 |
+
|
21 |
+
|
22 |
+
## 📰 News
|
23 |
+
* 2025 Mar 3: Generation code and Evaluation code released. Thanks for the wait!
|
24 |
+
<!--* 2025 Mar 3: Reach 404 stars without code.-->
|
25 |
+
* 2025 Feb 27: Paper available on [Arxiv](https://arxiv.org/abs/2502.19400). Thanks AK for putting our paper on [HF Daily](https://huggingface.co/papers/2502.19400).
|
26 |
+
|
27 |
+
## Installation
|
28 |
+
|
29 |
+
> **Look at the [FAQ section in this README doc](https://github.com/TIGER-AI-Lab/TheoremExplainAgent?tab=readme-ov-file#-faq) if you encountered any errors. If that didnt help, create a issue**<br>
|
30 |
+
|
31 |
+
1. Setting up conda environment
|
32 |
+
```shell
|
33 |
+
conda create --name tea python=3.12.8
|
34 |
+
conda activate tea
|
35 |
+
pip install -r requirements.txt
|
36 |
+
```
|
37 |
+
|
38 |
+
2. You may also need to install latex and other dependencies for Manim Community. Look at [Manim Installation Docs](https://docs.manim.community/en/stable/installation.html) for more details.
|
39 |
+
```shell
|
40 |
+
# You might need these dependencies if you are using Linux Ubuntu:
|
41 |
+
sudo apt-get install portaudio19-dev
|
42 |
+
sudo apt-get install libsdl-pango-dev
|
43 |
+
```
|
44 |
+
|
45 |
+
3. Then Download the Kokoro model and voices using the commands to enable TTS service.
|
46 |
+
|
47 |
+
```shell
|
48 |
+
mkdir -p models && wget -P models https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx && wget -P models https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin
|
49 |
+
```
|
50 |
+
|
51 |
+
4. Create `.env` based on `.env.template`, filling in the environmental variables according to the models you choose to use.
|
52 |
+
See [LiteLLM](https://docs.litellm.ai/docs/providers) for reference.
|
53 |
+
|
54 |
+
```shell
|
55 |
+
touch .env
|
56 |
+
```
|
57 |
+
Then open the `.env` file and edit it with whatever text editor you like.
|
58 |
+
|
59 |
+
Your `.env` file should look like the following:
|
60 |
+
```shell
|
61 |
+
# OpenAI
|
62 |
+
OPENAI_API_KEY=""
|
63 |
+
|
64 |
+
# Azure OpenAI
|
65 |
+
AZURE_API_KEY=""
|
66 |
+
AZURE_API_BASE=""
|
67 |
+
AZURE_API_VERSION=""
|
68 |
+
|
69 |
+
# Google Vertex AI
|
70 |
+
VERTEXAI_PROJECT=""
|
71 |
+
VERTEXAI_LOCATION=""
|
72 |
+
GOOGLE_APPLICATION_CREDENTIALS=""
|
73 |
+
|
74 |
+
# Google Gemini
|
75 |
+
GEMINI_API_KEY=""
|
76 |
+
|
77 |
+
...
|
78 |
+
|
79 |
+
# Kokoro TTS Settings
|
80 |
+
KOKORO_MODEL_PATH="models/kokoro-v0_19.onnx"
|
81 |
+
KOKORO_VOICES_PATH="models/voices.bin"
|
82 |
+
KOKORO_DEFAULT_VOICE="af"
|
83 |
+
KOKORO_DEFAULT_SPEED="1.0"
|
84 |
+
KOKORO_DEFAULT_LANG="en-us"
|
85 |
+
```
|
86 |
+
Fill in the API keys according to the model you wanted to use.
|
87 |
+
|
88 |
+
5. Configure Python path. Note that you need to configure the python path to make it work. Otherwise you may encounter import issues (like not being able to import src etc.)
|
89 |
+
```shell
|
90 |
+
export PYTHONPATH=$(pwd):$PYTHONPATH
|
91 |
+
```
|
92 |
+
|
93 |
+
6. (Optional) To setup RAG, See [https://github.com/TIGER-AI-Lab/TheoremExplainAgent?tab=readme-ov-file#generation-with-rag](https://github.com/TIGER-AI-Lab/TheoremExplainAgent?tab=readme-ov-file#generation-with-rag).
|
94 |
+
|
95 |
+
> **Look at the [FAQ section in this README doc](https://github.com/TIGER-AI-Lab/TheoremExplainAgent?tab=readme-ov-file#-faq) if you encountered any errors. If that didnt help, create a issue**<br>
|
96 |
+
|
97 |
+
## Generation
|
98 |
+
|
99 |
+
### Supported Models
|
100 |
+
<!--You can customize the allowed models by editing the `src/utils/allowed_models.json` file. This file specifies which `model` and `helper_model` the system is permitted to use.-->
|
101 |
+
The model naming follows the LiteLLM convention. For details on how models should be named, please refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
|
102 |
+
|
103 |
+
### Generation (Single topic)
|
104 |
+
```shell
|
105 |
+
python generate_video.py \
|
106 |
+
--model "openai/o3-mini" \
|
107 |
+
--helper_model "openai/o3-mini" \
|
108 |
+
--output_dir "output/your_exp_name" \
|
109 |
+
--topic "your_topic" \
|
110 |
+
--context "description of your topic, e.g. 'This is a topic about the properties of a triangle'" \
|
111 |
+
```
|
112 |
+
|
113 |
+
Example:
|
114 |
+
```shell
|
115 |
+
python generate_video.py \
|
116 |
+
--model "openai/o3-mini" \
|
117 |
+
--helper_model "openai/o3-mini" \
|
118 |
+
--output_dir "output/my_exp_name" \
|
119 |
+
--topic "Big O notation" \
|
120 |
+
--context "most common type of asymptotic notation in computer science used to measure worst case complexity" \
|
121 |
+
```
|
122 |
+
|
123 |
+
### Generation (in batch)
|
124 |
+
```shell
|
125 |
+
python generate_video.py \
|
126 |
+
--model "openai/o3-mini" \
|
127 |
+
--helper_model "openai/o3-mini" \
|
128 |
+
--output_dir "output/my_exp_name" \
|
129 |
+
--theorems_path data/thb_easy/math.json \
|
130 |
+
--max_scene_concurrency 7 \
|
131 |
+
--max_topic_concurrency 20 \
|
132 |
+
```
|
133 |
+
|
134 |
+
### Generation with RAG
|
135 |
+
Before using RAG, download the RAG documentation from this [Google Drive link](https://drive.google.com/file/d/1Tn6J_JKVefFZRgZbjns93KLBtI9ullRv/view?usp=sharing). After downloading, unzip the file. For example, if you unzip it to `data/rag/manim_docs`, then you should set `--manim_docs_path` to `data/rag/manim_docs`. The vector database will be created the first time you run with RAG.
|
136 |
+
|
137 |
+
```shell
|
138 |
+
python generate_video.py \
|
139 |
+
--model "openai/o3-mini" \
|
140 |
+
--helper_model "openai/o3-mini" \
|
141 |
+
--output_dir "output/with_rag/o3-mini/vtutorbench_easy/math" \
|
142 |
+
--topic "Big O notation" \
|
143 |
+
--context "most common type of asymptotic notation in computer science used to measure worst case complexity" \
|
144 |
+
--use_rag \
|
145 |
+
--chroma_db_path "data/rag/chroma_db" \
|
146 |
+
--manim_docs_path "data/rag/manim_docs" \
|
147 |
+
--embedding_model "vertex_ai/text-embedding-005"
|
148 |
+
```
|
149 |
+
|
150 |
+
We support more options for generation, see below for more details:
|
151 |
+
```shell
|
152 |
+
usage: generate_video.py [-h]
|
153 |
+
[--model]
|
154 |
+
[--topic TOPIC] [--context CONTEXT]
|
155 |
+
[--helper_model]
|
156 |
+
[--only_gen_vid] [--only_combine] [--peek_existing_videos] [--output_dir OUTPUT_DIR] [--theorems_path THEOREMS_PATH]
|
157 |
+
[--sample_size SAMPLE_SIZE] [--verbose] [--max_retries MAX_RETRIES] [--use_rag] [--use_visual_fix_code]
|
158 |
+
[--chroma_db_path CHROMA_DB_PATH] [--manim_docs_path MANIM_DOCS_PATH]
|
159 |
+
[--embedding_model {azure/text-embedding-3-large,vertex_ai/text-embedding-005}] [--use_context_learning]
|
160 |
+
[--context_learning_path CONTEXT_LEARNING_PATH] [--use_langfuse] [--max_scene_concurrency MAX_SCENE_CONCURRENCY]
|
161 |
+
[--max_topic_concurrency MAX_TOPIC_CONCURRENCY] [--debug_combine_topic DEBUG_COMBINE_TOPIC] [--only_plan] [--check_status]
|
162 |
+
[--only_render] [--scenes SCENES [SCENES ...]]
|
163 |
+
|
164 |
+
Generate Manim videos using AI
|
165 |
+
|
166 |
+
options:
|
167 |
+
-h, --help show this help message and exit
|
168 |
+
--model Select the AI model to use
|
169 |
+
--topic TOPIC Topic to generate videos for
|
170 |
+
--context CONTEXT Context of the topic
|
171 |
+
--helper_model Select the helper model to use
|
172 |
+
--only_gen_vid Only generate videos to existing plans
|
173 |
+
--only_combine Only combine videos
|
174 |
+
--peek_existing_videos, --peek
|
175 |
+
Peek at existing videos
|
176 |
+
--output_dir OUTPUT_DIR
|
177 |
+
Output directory
|
178 |
+
--theorems_path THEOREMS_PATH
|
179 |
+
Path to theorems json file
|
180 |
+
--sample_size SAMPLE_SIZE, --sample SAMPLE_SIZE
|
181 |
+
Number of theorems to sample
|
182 |
+
--verbose Print verbose output
|
183 |
+
--max_retries MAX_RETRIES
|
184 |
+
Maximum number of retries for code generation
|
185 |
+
--use_rag, --rag Use Retrieval Augmented Generation
|
186 |
+
--use_visual_fix_code, --visual_fix_code
|
187 |
+
Use VLM to fix code with rendered visuals
|
188 |
+
--chroma_db_path CHROMA_DB_PATH
|
189 |
+
Path to Chroma DB
|
190 |
+
--manim_docs_path MANIM_DOCS_PATH
|
191 |
+
Path to manim docs
|
192 |
+
--embedding_model {azure/text-embedding-3-large,vertex_ai/text-embedding-005}
|
193 |
+
Select the embedding model to use
|
194 |
+
--use_context_learning
|
195 |
+
Use context learning with example Manim code
|
196 |
+
--context_learning_path CONTEXT_LEARNING_PATH
|
197 |
+
Path to context learning examples
|
198 |
+
--use_langfuse Enable Langfuse logging
|
199 |
+
--max_scene_concurrency MAX_SCENE_CONCURRENCY
|
200 |
+
Maximum number of scenes to process concurrently
|
201 |
+
--max_topic_concurrency MAX_TOPIC_CONCURRENCY
|
202 |
+
Maximum number of topics to process concurrently
|
203 |
+
--debug_combine_topic DEBUG_COMBINE_TOPIC
|
204 |
+
Debug combine videos
|
205 |
+
--only_plan Only generate scene outline and implementation plans
|
206 |
+
--check_status Check planning and code status for all theorems
|
207 |
+
--only_render Only render scenes without combining videos
|
208 |
+
--scenes SCENES [SCENES ...]
|
209 |
+
Specific scenes to process (if theorems_path is provided)
|
210 |
+
```
|
211 |
+
|
212 |
+
## Evaluation
|
213 |
+
Note that Gemini and GPT4o is required for evaluation.
|
214 |
+
|
215 |
+
Currently, evaluation requires a video file and a subtitle file (SRT format).
|
216 |
+
|
217 |
+
Video evaluation:
|
218 |
+
```shell
|
219 |
+
usage: evaluate.py [-h]
|
220 |
+
[--model_text {gemini/gemini-1.5-pro-002,gemini/gemini-1.5-flash-002,gemini/gemini-2.0-flash-001,vertex_ai/gemini-1.5-flash-002,vertex_ai/gemini-1.5-pro-002,vertex_ai/gemini-2.0-flash-001,openai/o3-mini,gpt-4o,azure/gpt-4o,azure/gpt-4o-mini,bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0,bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0,bedrock/anthropic.claude-3-5-haiku-20241022-v1:0,bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0}]
|
221 |
+
[--model_video {gemini/gemini-1.5-pro-002,gemini/gemini-2.0-flash-exp,gemini/gemini-2.0-pro-exp-02-05}]
|
222 |
+
[--model_image {gemini/gemini-1.5-pro-002,gemini/gemini-1.5-flash-002,gemini/gemini-2.0-flash-001,vertex_ai/gemini-1.5-flash-002,vertex_ai/gemini-1.5-pro-002,vertex_ai/gemini-2.0-flash-001,openai/o3-mini,gpt-4o,azure/gpt-4o,azure/gpt-4o-mini,bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0,bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0,bedrock/anthropic.claude-3-5-haiku-20241022-v1:0,bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0}]
|
223 |
+
[--eval_type {text,video,image,all}] --file_path FILE_PATH --output_folder OUTPUT_FOLDER [--retry_limit RETRY_LIMIT] [--combine] [--bulk_evaluate] [--target_fps TARGET_FPS]
|
224 |
+
[--use_parent_folder_as_topic] [--max_workers MAX_WORKERS]
|
225 |
+
|
226 |
+
Automatic evaluation of theorem explanation videos with LLMs
|
227 |
+
|
228 |
+
options:
|
229 |
+
-h, --help show this help message and exit
|
230 |
+
--model_text {gemini/gemini-1.5-pro-002,gemini/gemini-1.5-flash-002,gemini/gemini-2.0-flash-001,vertex_ai/gemini-1.5-flash-002,vertex_ai/gemini-1.5-pro-002,vertex_ai/gemini-2.0-flash-001,openai/o3-mini,gpt-4o,azure/gpt-4o,azure/gpt-4o-mini,bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0,bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0,bedrock/anthropic.claude-3-5-haiku-20241022-v1:0,bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0}
|
231 |
+
Select the AI model to use for text evaluation
|
232 |
+
--model_video {gemini/gemini-1.5-pro-002,gemini/gemini-2.0-flash-exp,gemini/gemini-2.0-pro-exp-02-05}
|
233 |
+
Select the AI model to use for video evaluation
|
234 |
+
--model_image {gemini/gemini-1.5-pro-002,gemini/gemini-1.5-flash-002,gemini/gemini-2.0-flash-001,vertex_ai/gemini-1.5-flash-002,vertex_ai/gemini-1.5-pro-002,vertex_ai/gemini-2.0-flash-001,openai/o3-mini,gpt-4o,azure/gpt-4o,azure/gpt-4o-mini,bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0,bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0,bedrock/anthropic.claude-3-5-haiku-20241022-v1:0,bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0}
|
235 |
+
Select the AI model to use for image evaluation
|
236 |
+
--eval_type {text,video,image,all}
|
237 |
+
Type of evaluation to perform
|
238 |
+
--file_path FILE_PATH
|
239 |
+
Path to a file or a theorem folder
|
240 |
+
--output_folder OUTPUT_FOLDER
|
241 |
+
Directory to store the evaluation files
|
242 |
+
--retry_limit RETRY_LIMIT
|
243 |
+
Number of retry attempts for each inference
|
244 |
+
--combine Combine all results into a single JSON file
|
245 |
+
--bulk_evaluate Evaluate a folder of theorems together
|
246 |
+
--target_fps TARGET_FPS
|
247 |
+
Target FPS for video processing. If not set, original video FPS will be used
|
248 |
+
--use_parent_folder_as_topic
|
249 |
+
Use parent folder name as topic name for single file evaluation
|
250 |
+
--max_workers MAX_WORKERS
|
251 |
+
Maximum number of concurrent workers for parallel processing
|
252 |
+
```
|
253 |
+
* For `file_path`, it is recommended to pass a folder containing both an MP4 file and an SRT file.
|
254 |
+
|
255 |
+
## Misc: Modify the system prompt in TheoremExplainAgent
|
256 |
+
|
257 |
+
If you want to modify the system prompt, you need to:
|
258 |
+
|
259 |
+
1. Modify files in `task_generator/prompts_raw` folder.
|
260 |
+
2. Run `task_generator/parse_prompt.py` to rebuild the `__init__.py` file.
|
261 |
+
|
262 |
+
```python
|
263 |
+
cd task_generator
|
264 |
+
python parse_prompt.py
|
265 |
+
cd ..
|
266 |
+
```
|
267 |
+
|
268 |
+
## TheoremExplainBench (TEB)
|
269 |
+
|
270 |
+
TheoremExplainBench can be found on https://huggingface.co/datasets/TIGER-Lab/TheoremExplainBench.
|
271 |
+
|
272 |
+
How to use:
|
273 |
+
```python
|
274 |
+
import datasets
|
275 |
+
dataset = datasets.load_dataset("TIGER-Lab/TheoremExplainBench")
|
276 |
+
```
|
277 |
+
|
278 |
+
Dataset info:
|
279 |
+
```shell
|
280 |
+
DatasetDict({
|
281 |
+
train: Dataset({
|
282 |
+
features: ['uid', 'subject', 'difficulty', 'theorem', 'description', 'subfield'],
|
283 |
+
num_rows: 240
|
284 |
+
})
|
285 |
+
})
|
286 |
+
```
|
287 |
+
|
288 |
+
## ❓ FAQ
|
289 |
+
|
290 |
+
The FAQ should cover the most common errors you could encounter. If you see something new, report it on issues.
|
291 |
+
|
292 |
+
Q: Error `src.utils.kokoro_voiceover import KokoroService # You MUST import like this as this is our custom voiceover service. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ModuleNotFoundError: No module named 'src'`. <br>
|
293 |
+
A: Please run `export PYTHONPATH=$(pwd):$PYTHONPATH` when you start a new terminal. <br>
|
294 |
+
|
295 |
+
Q: Error `Files not found` <br>
|
296 |
+
A: Check your Manim installation. <br>
|
297 |
+
|
298 |
+
Q: Error `latex ...` <br>
|
299 |
+
A: Check your latex installation. <br>
|
300 |
+
|
301 |
+
Q: The output log is not showing response? <br>
|
302 |
+
A: It could be API-related issues. Make sure your `.env` file is properly configured (fill in your API keys), or you can enable litellm debug mode to figure out the issues. <be>
|
303 |
+
|
304 |
+
Q: Plans / Scenes are missing? <br>
|
305 |
+
A: It could be API-related issues. Make sure your `.env` file is properly configured (fill in your API keys), or you can enable litellm debug mode to figure out the issues. <br>
|
306 |
+
|
307 |
+
|
308 |
+
## 🖊️ Citation
|
309 |
+
|
310 |
+
Please kindly cite our paper if you use our code, data, models or results:
|
311 |
+
```bibtex
|
312 |
+
@misc{ku2025theoremexplainagentmultimodalexplanationsllm,
|
313 |
+
title={TheoremExplainAgent: Towards Multimodal Explanations for LLM Theorem Understanding},
|
314 |
+
author={Max Ku and Thomas Chong and Jonathan Leung and Krish Shah and Alvin Yu and Wenhu Chen},
|
315 |
+
year={2025},
|
316 |
+
eprint={2502.19400},
|
317 |
+
archivePrefix={arXiv},
|
318 |
+
primaryClass={cs.AI},
|
319 |
+
url={https://arxiv.org/abs/2502.19400},
|
320 |
+
}
|
321 |
+
```
|
322 |
+
|
323 |
+
## 🎫 License
|
324 |
+
|
325 |
+
This project is released under the [the MIT License](LICENSE).
|
326 |
+
|
327 |
+
## ⭐ Star History
|
328 |
+
|
329 |
+
[](https://star-history.com/#TIGER-AI-Lab/TheoremExplainAgent&Date)
|
330 |
+
|
331 |
+
## 💞 Acknowledgements
|
332 |
+
|
333 |
+
We want to thank [Votee AI](https://votee.ai/) for sponsoring API keys to access the close-sourced models.
|
334 |
+
|
335 |
+
The code is built upon the below repositories, we thank all the contributors for open-sourcing.
|
336 |
+
* [Manim Community](https://www.manim.community/)
|
337 |
+
* [kokoro-manim-voiceover](https://github.com/xposed73/kokoro-manim-voiceover)
|
338 |
+
* [manim-physics](https://github.com/Matheart/manim-physics)
|
339 |
+
* [manim-Chemistry](https://github.com/UnMolDeQuimica/manim-Chemistry)
|
340 |
+
* [ManimML](https://github.com/helblazer811/ManimML)
|
341 |
+
* [manim-dsa](https://github.com/F4bbi/manim-dsa)
|
342 |
+
* [manim-circuit](https://github.com/Mr-FuzzyPenguin/manim-circuit)
|
343 |
+
|
344 |
+
## 🚨 Disclaimer
|
345 |
+
|
346 |
+
**This work is intended for research purposes only. The authors do not encourage or endorse the use of this codebase for commercial applications. The code is provided "as is" without any warranties, and users assume all responsibility for its use.**
|
347 |
+
|
348 |
+
Tested Environment: MacOS, Linux
|
create_embeddings.py
ADDED
@@ -0,0 +1,491 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Script to create embeddings and populate the vector store with Manim documentation.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
import time
|
9 |
+
import json
|
10 |
+
import logging
|
11 |
+
from pathlib import Path
|
12 |
+
from typing import List, Dict, Any
|
13 |
+
|
14 |
+
from src.rag.vector_store import EnhancedRAGVectorStore as RAGVectorStore
|
15 |
+
from src.config.config import Config
|
16 |
+
|
17 |
+
# Setup logging
|
18 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
def download_manim_docs():
|
22 |
+
"""Download Manim documentation if not available locally."""
|
23 |
+
print("📥 Checking for Manim documentation...")
|
24 |
+
|
25 |
+
manim_docs_path = Config.MANIM_DOCS_PATH
|
26 |
+
os.makedirs(manim_docs_path, exist_ok=True)
|
27 |
+
|
28 |
+
# Check if we have any documentation files
|
29 |
+
doc_files = []
|
30 |
+
for ext in ['.py', '.md', '.rst', '.txt']:
|
31 |
+
doc_files.extend(list(Path(manim_docs_path).rglob(f'*{ext}')))
|
32 |
+
|
33 |
+
if len(doc_files) == 0:
|
34 |
+
print("📝 No documentation files found. Creating sample documentation...")
|
35 |
+
create_sample_manim_docs(manim_docs_path)
|
36 |
+
else:
|
37 |
+
print(f"✅ Found {len(doc_files)} documentation files")
|
38 |
+
|
39 |
+
def create_sample_manim_docs(docs_path: str):
|
40 |
+
"""Create sample Manim documentation for testing."""
|
41 |
+
|
42 |
+
# Create core documentation
|
43 |
+
core_docs = {
|
44 |
+
"circle.py": '''
|
45 |
+
"""Circle mobject in Manim.
|
46 |
+
|
47 |
+
The Circle class is one of the basic geometric shapes in Manim.
|
48 |
+
It can be used to create circular objects that can be animated.
|
49 |
+
|
50 |
+
Example:
|
51 |
+
circle = Circle(radius=1, color=BLUE)
|
52 |
+
self.add(circle)
|
53 |
+
self.play(Create(circle))
|
54 |
+
|
55 |
+
Attributes:
|
56 |
+
radius (float): The radius of the circle
|
57 |
+
color (str): The color of the circle
|
58 |
+
stroke_width (float): The width of the circle's outline
|
59 |
+
"""
|
60 |
+
|
61 |
+
from manim import *
|
62 |
+
|
63 |
+
class Circle(Arc):
|
64 |
+
\"\"\"A circle mobject.
|
65 |
+
|
66 |
+
Parameters
|
67 |
+
----------
|
68 |
+
radius : float
|
69 |
+
The radius of the circle.
|
70 |
+
color : str
|
71 |
+
The color of the circle.
|
72 |
+
\"\"\"
|
73 |
+
|
74 |
+
def __init__(self, radius=1, **kwargs):
|
75 |
+
super().__init__(start_angle=0, angle=TAU, radius=radius, **kwargs)
|
76 |
+
|
77 |
+
def animate_creation(self):
|
78 |
+
\"\"\"Animate the creation of the circle.\"\"\"
|
79 |
+
return Create(self)
|
80 |
+
|
81 |
+
def move_to_position(self, position):
|
82 |
+
\"\"\"Move circle to a specific position.\"\"\"
|
83 |
+
return self.move_to(position)
|
84 |
+
''',
|
85 |
+
|
86 |
+
"text.py": '''
|
87 |
+
"""Text mobjects in Manim.
|
88 |
+
|
89 |
+
Text and Tex classes for displaying text and mathematical expressions.
|
90 |
+
|
91 |
+
Example:
|
92 |
+
text = Text("Hello World", font_size=48)
|
93 |
+
equation = MathTex(r"E = mc^2")
|
94 |
+
|
95 |
+
self.play(Write(text))
|
96 |
+
self.play(Transform(text, equation))
|
97 |
+
"""
|
98 |
+
|
99 |
+
from manim import *
|
100 |
+
|
101 |
+
class Text(SVGMobject):
|
102 |
+
\"\"\"Text mobject for displaying regular text.
|
103 |
+
|
104 |
+
Parameters
|
105 |
+
----------
|
106 |
+
text : str
|
107 |
+
The text to display
|
108 |
+
font_size : int
|
109 |
+
Size of the font
|
110 |
+
color : str
|
111 |
+
Color of the text
|
112 |
+
\"\"\"
|
113 |
+
|
114 |
+
def __init__(self, text, font_size=48, **kwargs):
|
115 |
+
self.text = text
|
116 |
+
self.font_size = font_size
|
117 |
+
super().__init__(**kwargs)
|
118 |
+
|
119 |
+
class MathTex(SVGMobject):
|
120 |
+
\"\"\"Mathematical text using LaTeX.
|
121 |
+
|
122 |
+
Parameters
|
123 |
+
----------
|
124 |
+
tex_string : str
|
125 |
+
LaTeX string for mathematical expression
|
126 |
+
\"\"\"
|
127 |
+
|
128 |
+
def __init__(self, tex_string, **kwargs):
|
129 |
+
self.tex_string = tex_string
|
130 |
+
super().__init__(**kwargs)
|
131 |
+
''',
|
132 |
+
|
133 |
+
"animation.py": '''
|
134 |
+
"""Animation classes in Manim.
|
135 |
+
|
136 |
+
Core animation classes for creating smooth transitions and movements.
|
137 |
+
|
138 |
+
Example:
|
139 |
+
circle = Circle()
|
140 |
+
|
141 |
+
# Basic animations
|
142 |
+
self.play(Create(circle))
|
143 |
+
self.play(FadeIn(circle))
|
144 |
+
self.play(Transform(circle, square))
|
145 |
+
"""
|
146 |
+
|
147 |
+
from manim import *
|
148 |
+
|
149 |
+
class Create(Animation):
|
150 |
+
\"\"\"Animation that creates a mobject by drawing it.
|
151 |
+
|
152 |
+
Parameters
|
153 |
+
----------
|
154 |
+
mobject : Mobject
|
155 |
+
The mobject to create
|
156 |
+
run_time : float
|
157 |
+
Duration of the animation
|
158 |
+
\"\"\"
|
159 |
+
|
160 |
+
def __init__(self, mobject, run_time=1, **kwargs):
|
161 |
+
super().__init__(mobject, run_time=run_time, **kwargs)
|
162 |
+
|
163 |
+
class Transform(Animation):
|
164 |
+
\"\"\"Transform one mobject into another.
|
165 |
+
|
166 |
+
Parameters
|
167 |
+
----------
|
168 |
+
mobject : Mobject
|
169 |
+
Source mobject
|
170 |
+
target_mobject : Mobject
|
171 |
+
Target mobject to transform into
|
172 |
+
\"\"\"
|
173 |
+
|
174 |
+
def __init__(self, mobject, target_mobject, **kwargs):
|
175 |
+
self.target_mobject = target_mobject
|
176 |
+
super().__init__(mobject, **kwargs)
|
177 |
+
|
178 |
+
class FadeIn(Animation):
|
179 |
+
\"\"\"Fade in animation.\"\"\"
|
180 |
+
pass
|
181 |
+
|
182 |
+
class FadeOut(Animation):
|
183 |
+
\"\"\"Fade out animation.\"\"\"
|
184 |
+
pass
|
185 |
+
''',
|
186 |
+
|
187 |
+
"scene.py": '''
|
188 |
+
"""Scene class - the foundation of Manim animations.
|
189 |
+
|
190 |
+
Every Manim animation is built using Scene classes.
|
191 |
+
|
192 |
+
Example:
|
193 |
+
class MyScene(Scene):
|
194 |
+
def construct(self):
|
195 |
+
circle = Circle()
|
196 |
+
self.play(Create(circle))
|
197 |
+
self.wait(1)
|
198 |
+
"""
|
199 |
+
|
200 |
+
from manim import *
|
201 |
+
|
202 |
+
class Scene:
|
203 |
+
\"\"\"Base class for all Manim scenes.
|
204 |
+
|
205 |
+
The Scene class provides the foundation for creating animations.
|
206 |
+
Override the construct() method to define your animation.
|
207 |
+
\"\"\"
|
208 |
+
|
209 |
+
def __init__(self):
|
210 |
+
self.mobjects = []
|
211 |
+
|
212 |
+
def construct(self):
|
213 |
+
\"\"\"Override this method to create your animation.\"\"\"
|
214 |
+
pass
|
215 |
+
|
216 |
+
def add(self, *mobjects):
|
217 |
+
\"\"\"Add mobjects to the scene.\"\"\"
|
218 |
+
for mobject in mobjects:
|
219 |
+
self.mobjects.append(mobject)
|
220 |
+
|
221 |
+
def play(self, *animations, run_time=1):
|
222 |
+
\"\"\"Play animations.
|
223 |
+
|
224 |
+
Parameters
|
225 |
+
----------
|
226 |
+
animations : Animation
|
227 |
+
Animations to play
|
228 |
+
run_time : float
|
229 |
+
Duration to run animations
|
230 |
+
\"\"\"
|
231 |
+
for animation in animations:
|
232 |
+
animation.run_time = run_time
|
233 |
+
|
234 |
+
def wait(self, duration=1):
|
235 |
+
\"\"\"Wait for specified duration.\"\"\"
|
236 |
+
time.sleep(duration)
|
237 |
+
''',
|
238 |
+
|
239 |
+
"plotting.py": '''
|
240 |
+
"""Plotting utilities in Manim.
|
241 |
+
|
242 |
+
Classes for creating mathematical plots and graphs.
|
243 |
+
|
244 |
+
Example:
|
245 |
+
axes = Axes(x_range=[-3, 3], y_range=[-2, 2])
|
246 |
+
graph = axes.plot(lambda x: x**2, color=BLUE)
|
247 |
+
|
248 |
+
self.play(Create(axes))
|
249 |
+
self.play(Create(graph))
|
250 |
+
"""
|
251 |
+
|
252 |
+
from manim import *
|
253 |
+
|
254 |
+
class Axes(VGroup):
|
255 |
+
\"\"\"Coordinate axes for plotting.
|
256 |
+
|
257 |
+
Parameters
|
258 |
+
----------
|
259 |
+
x_range : list
|
260 |
+
Range for x-axis [min, max, step]
|
261 |
+
y_range : list
|
262 |
+
Range for y-axis [min, max, step]
|
263 |
+
\"\"\"
|
264 |
+
|
265 |
+
def __init__(self, x_range=[-1, 1, 1], y_range=[-1, 1, 1], **kwargs):
|
266 |
+
self.x_range = x_range
|
267 |
+
self.y_range = y_range
|
268 |
+
super().__init__(**kwargs)
|
269 |
+
|
270 |
+
def plot(self, function, color=WHITE, **kwargs):
|
271 |
+
\"\"\"Plot a mathematical function.
|
272 |
+
|
273 |
+
Parameters
|
274 |
+
----------
|
275 |
+
function : callable
|
276 |
+
Function to plot
|
277 |
+
color : str
|
278 |
+
Color of the plot
|
279 |
+
\"\"\"
|
280 |
+
return ParametricFunction(function, color=color, **kwargs)
|
281 |
+
|
282 |
+
def get_graph(self, function, **kwargs):
|
283 |
+
\"\"\"Get graph of function.\"\"\"
|
284 |
+
return self.plot(function, **kwargs)
|
285 |
+
|
286 |
+
class NumberPlane(Axes):
|
287 |
+
\"\"\"A coordinate plane with grid lines.\"\"\"
|
288 |
+
|
289 |
+
def __init__(self, **kwargs):
|
290 |
+
super().__init__(**kwargs)
|
291 |
+
'''
|
292 |
+
}
|
293 |
+
|
294 |
+
# Create the core documentation files
|
295 |
+
for filename, content in core_docs.items():
|
296 |
+
file_path = os.path.join(docs_path, filename)
|
297 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
298 |
+
f.write(content)
|
299 |
+
|
300 |
+
# Create a README file
|
301 |
+
readme_content = '''# Manim Documentation
|
302 |
+
|
303 |
+
This directory contains documentation for Manim (Mathematical Animation Engine).
|
304 |
+
|
305 |
+
## Core Classes
|
306 |
+
|
307 |
+
- **Circle**: Create circular shapes and animations
|
308 |
+
- **Text/MathTex**: Display text and mathematical expressions
|
309 |
+
- **Scene**: Base class for all animations
|
310 |
+
- **Animation**: Classes for creating smooth transitions
|
311 |
+
- **Axes/NumberPlane**: Plotting and coordinate systems
|
312 |
+
|
313 |
+
## Basic Usage
|
314 |
+
|
315 |
+
```python
|
316 |
+
from manim import *
|
317 |
+
|
318 |
+
class MyScene(Scene):
|
319 |
+
def construct(self):
|
320 |
+
# Create objects
|
321 |
+
circle = Circle(radius=1, color=BLUE)
|
322 |
+
text = Text("Hello Manim!")
|
323 |
+
|
324 |
+
# Animate
|
325 |
+
self.play(Create(circle))
|
326 |
+
self.play(Write(text))
|
327 |
+
self.wait(1)
|
328 |
+
```
|
329 |
+
|
330 |
+
For more information, visit: https://docs.manim.community/
|
331 |
+
'''
|
332 |
+
|
333 |
+
readme_path = os.path.join(docs_path, "README.md")
|
334 |
+
with open(readme_path, 'w', encoding='utf-8') as f:
|
335 |
+
f.write(readme_content)
|
336 |
+
|
337 |
+
print(f"✅ Created {len(core_docs) + 1} sample documentation files")
|
338 |
+
|
339 |
+
def create_vector_store_with_progress():
|
340 |
+
"""Create and populate the vector store with progress tracking."""
|
341 |
+
|
342 |
+
print("🚀 Initializing Enhanced RAG Vector Store...")
|
343 |
+
print(f"📁 ChromaDB Path: {Config.CHROMA_DB_PATH}")
|
344 |
+
print(f"📁 Manim Docs Path: {Config.MANIM_DOCS_PATH}")
|
345 |
+
print(f"🧠 Embedding Model: {Config.EMBEDDING_MODEL}")
|
346 |
+
print(f"📊 Expected Embedding Dimensions: 384")
|
347 |
+
|
348 |
+
# Ensure ChromaDB directory exists
|
349 |
+
os.makedirs(Config.CHROMA_DB_PATH, exist_ok=True)
|
350 |
+
|
351 |
+
start_time = time.time()
|
352 |
+
|
353 |
+
try:
|
354 |
+
# Create vector store - this will automatically process documents and create embeddings
|
355 |
+
vector_store = RAGVectorStore(
|
356 |
+
chroma_db_path=Config.CHROMA_DB_PATH,
|
357 |
+
manim_docs_path=Config.MANIM_DOCS_PATH,
|
358 |
+
embedding_model=Config.EMBEDDING_MODEL,
|
359 |
+
session_id="embedding_creation",
|
360 |
+
use_langfuse=False
|
361 |
+
)
|
362 |
+
|
363 |
+
print("✅ Vector store initialized successfully!")
|
364 |
+
|
365 |
+
# Check document count
|
366 |
+
if hasattr(vector_store, 'core_vector_store') and vector_store.core_vector_store:
|
367 |
+
try:
|
368 |
+
collection = vector_store.core_vector_store._collection
|
369 |
+
doc_count = collection.count()
|
370 |
+
print(f"📊 Total documents in vector store: {doc_count}")
|
371 |
+
|
372 |
+
if doc_count > 0:
|
373 |
+
# Test the embedding dimensions
|
374 |
+
embedding_function = vector_store._get_embedding_function()
|
375 |
+
test_embedding = embedding_function.embed_query("test query")
|
376 |
+
print(f"🧠 Embedding dimensions: {len(test_embedding)}")
|
377 |
+
|
378 |
+
# Get sample documents
|
379 |
+
sample_docs = collection.peek(limit=3)
|
380 |
+
print(f"📋 Sample document IDs: {sample_docs.get('ids', [])[:3]}")
|
381 |
+
|
382 |
+
# Test search functionality
|
383 |
+
print("\n🔍 Testing search functionality...")
|
384 |
+
results = vector_store.core_vector_store.similarity_search_with_relevance_scores(
|
385 |
+
query="How to create a circle?",
|
386 |
+
k=3,
|
387 |
+
score_threshold=0.0
|
388 |
+
)
|
389 |
+
|
390 |
+
print(f"🎯 Search results: {len(results)} documents found")
|
391 |
+
for i, (doc, score) in enumerate(results):
|
392 |
+
print(f" [{i+1}] Score: {score:.4f} | Source: {doc.metadata.get('source', 'unknown')}")
|
393 |
+
print(f" Content: {doc.page_content[:100]}...")
|
394 |
+
else:
|
395 |
+
print("⚠️ Warning: No documents found in vector store")
|
396 |
+
|
397 |
+
except Exception as e:
|
398 |
+
print(f"❌ Error checking document count: {e}")
|
399 |
+
|
400 |
+
elapsed_time = time.time() - start_time
|
401 |
+
print(f"\n⏱️ Vector store creation completed in {elapsed_time:.2f} seconds")
|
402 |
+
|
403 |
+
return vector_store
|
404 |
+
|
405 |
+
except Exception as e:
|
406 |
+
print(f"❌ Error creating vector store: {e}")
|
407 |
+
import traceback
|
408 |
+
traceback.print_exc()
|
409 |
+
return None
|
410 |
+
|
411 |
+
def verify_embeddings():
|
412 |
+
"""Verify that embeddings were created successfully."""
|
413 |
+
|
414 |
+
print("\n🔬 Verifying embeddings...")
|
415 |
+
|
416 |
+
try:
|
417 |
+
# Load the vector store
|
418 |
+
vector_store = RAGVectorStore(
|
419 |
+
chroma_db_path=Config.CHROMA_DB_PATH,
|
420 |
+
manim_docs_path=Config.MANIM_DOCS_PATH,
|
421 |
+
embedding_model=Config.EMBEDDING_MODEL,
|
422 |
+
session_id="verification",
|
423 |
+
use_langfuse=False
|
424 |
+
)
|
425 |
+
|
426 |
+
# Test queries
|
427 |
+
test_queries = [
|
428 |
+
"How to create a circle in Manim?",
|
429 |
+
"Text animation examples",
|
430 |
+
"Mathematical plotting with axes",
|
431 |
+
"Scene construction basics"
|
432 |
+
]
|
433 |
+
|
434 |
+
print("🎯 Testing search with various queries:")
|
435 |
+
|
436 |
+
for query in test_queries:
|
437 |
+
results = vector_store.core_vector_store.similarity_search_with_relevance_scores(
|
438 |
+
query=query,
|
439 |
+
k=2,
|
440 |
+
score_threshold=0.0
|
441 |
+
)
|
442 |
+
|
443 |
+
print(f"\nQuery: '{query}'")
|
444 |
+
print(f"Results: {len(results)} documents")
|
445 |
+
|
446 |
+
for i, (doc, score) in enumerate(results):
|
447 |
+
print(f" [{i+1}] Score: {score:.4f}")
|
448 |
+
print(f" Source: {doc.metadata.get('source', 'unknown')}")
|
449 |
+
print(f" Content: {doc.page_content[:80]}...")
|
450 |
+
|
451 |
+
print("\n✅ Embedding verification completed successfully!")
|
452 |
+
return True
|
453 |
+
|
454 |
+
except Exception as e:
|
455 |
+
print(f"❌ Embedding verification failed: {e}")
|
456 |
+
return False
|
457 |
+
|
458 |
+
def main():
|
459 |
+
"""Main function to create embeddings."""
|
460 |
+
|
461 |
+
print("🎬 MANIM RAG EMBEDDING CREATION")
|
462 |
+
print("="*50)
|
463 |
+
|
464 |
+
# Step 1: Download/create documentation
|
465 |
+
download_manim_docs()
|
466 |
+
|
467 |
+
# Step 2: Create vector store and embeddings
|
468 |
+
vector_store = create_vector_store_with_progress()
|
469 |
+
|
470 |
+
if vector_store is None:
|
471 |
+
print("❌ Failed to create vector store. Exiting.")
|
472 |
+
return False
|
473 |
+
|
474 |
+
# Step 3: Verify embeddings
|
475 |
+
success = verify_embeddings()
|
476 |
+
|
477 |
+
if success:
|
478 |
+
print("\n🎉 SUCCESS! Embeddings created and verified.")
|
479 |
+
print(f"📁 ChromaDB location: {Config.CHROMA_DB_PATH}")
|
480 |
+
print("🔍 You can now run the RAG system tests.")
|
481 |
+
print("\nNext steps:")
|
482 |
+
print(" 1. Run: python test_rag_system.py")
|
483 |
+
print(" 2. Run: python debug_retrieval.py")
|
484 |
+
else:
|
485 |
+
print("\n❌ FAILED! Something went wrong during verification.")
|
486 |
+
|
487 |
+
return success
|
488 |
+
|
489 |
+
if __name__ == "__main__":
|
490 |
+
success = main()
|
491 |
+
sys.exit(0 if success else 1)
|
docker-compose.yml
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
theoremexplain:
|
5 |
+
build:
|
6 |
+
context: .
|
7 |
+
dockerfile: dockerfile
|
8 |
+
container_name: theoremexplain-agent
|
9 |
+
ports:
|
10 |
+
- "7860:7860"
|
11 |
+
volumes:
|
12 |
+
# Mount output directory to persist generated videos
|
13 |
+
- ./output:/app/output
|
14 |
+
# Mount models directory if you want to use local models
|
15 |
+
- ./models:/app/models
|
16 |
+
# Mount data directory for RAG and datasets
|
17 |
+
- ./data:/app/data
|
18 |
+
environment:
|
19 |
+
# Copy environment variables from host .env file
|
20 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
21 |
+
- AZURE_API_KEY=${AZURE_API_KEY}
|
22 |
+
- AZURE_API_BASE=${AZURE_API_BASE}
|
23 |
+
- AZURE_API_VERSION=${AZURE_API_VERSION}
|
24 |
+
- VERTEXAI_PROJECT=${VERTEXAI_PROJECT}
|
25 |
+
- VERTEXAI_LOCATION=${VERTEXAI_LOCATION}
|
26 |
+
- GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS}
|
27 |
+
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
28 |
+
# Kokoro TTS settings
|
29 |
+
- KOKORO_MODEL_PATH=models/kokoro-v0_19.onnx
|
30 |
+
- KOKORO_VOICES_PATH=models/voices.bin
|
31 |
+
- KOKORO_DEFAULT_VOICE=af
|
32 |
+
- KOKORO_DEFAULT_SPEED=1.0
|
33 |
+
- KOKORO_DEFAULT_LANG=en-us
|
34 |
+
# Python path
|
35 |
+
- PYTHONPATH=/app:$PYTHONPATH
|
36 |
+
restart: unless-stopped
|
37 |
+
healthcheck:
|
38 |
+
test: ["CMD", "conda", "run", "-n", "tea", "python", "-c", "import src; import manim; print('Health check passed')"]
|
39 |
+
interval: 30s
|
40 |
+
timeout: 10s
|
41 |
+
retries: 3
|
42 |
+
start_period: 60s
|
43 |
+
|
44 |
+
# Optional: Add a service for running batch generation
|
45 |
+
theoremexplain-batch:
|
46 |
+
build:
|
47 |
+
context: .
|
48 |
+
dockerfile: dockerfile
|
49 |
+
container_name: theoremexplain-batch
|
50 |
+
profiles:
|
51 |
+
- batch
|
52 |
+
volumes:
|
53 |
+
- ./output:/app/output
|
54 |
+
- ./models:/app/models
|
55 |
+
- ./data:/app/data
|
56 |
+
environment:
|
57 |
+
# Same environment variables as main service
|
58 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
59 |
+
- AZURE_API_KEY=${AZURE_API_KEY}
|
60 |
+
- AZURE_API_BASE=${AZURE_API_BASE}
|
61 |
+
- AZURE_API_VERSION=${AZURE_API_VERSION}
|
62 |
+
- VERTEXAI_PROJECT=${VERTEXAI_PROJECT}
|
63 |
+
- VERTEXAI_LOCATION=${VERTEXAI_LOCATION}
|
64 |
+
- GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS}
|
65 |
+
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
66 |
+
- KOKORO_MODEL_PATH=models/kokoro-v0_19.onnx
|
67 |
+
- KOKORO_VOICES_PATH=models/voices.bin
|
68 |
+
- KOKORO_DEFAULT_VOICE=af
|
69 |
+
- KOKORO_DEFAULT_SPEED=1.0
|
70 |
+
- KOKORO_DEFAULT_LANG=en-us
|
71 |
+
- PYTHONPATH=/app:$PYTHONPATH
|
72 |
+
command: >
|
73 |
+
conda run --no-capture-output -n tea python generate_video.py
|
74 |
+
--model "openai/gpt-4o-mini"
|
75 |
+
--helper_model "openai/gpt-4o-mini"
|
76 |
+
--output_dir "output/batch_generation"
|
77 |
+
--theorems_path "data/thb_easy/math.json"
|
78 |
+
--max_scene_concurrency 3
|
79 |
+
--max_topic_concurrency 5
|
80 |
+
restart: no
|
dockerfile
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Stage 1: Builder
|
2 |
+
FROM python:3.12-slim AS builder
|
3 |
+
|
4 |
+
WORKDIR /app
|
5 |
+
|
6 |
+
# Set environment variables
|
7 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
8 |
+
PYTHONUNBUFFERED=1 \
|
9 |
+
PIP_NO_CACHE_DIR=1 \
|
10 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
11 |
+
|
12 |
+
# Install build dependencies in a single layer with version pinning where critical
|
13 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
14 |
+
wget=1.21.* \
|
15 |
+
curl \
|
16 |
+
gcc \
|
17 |
+
bzip2 \
|
18 |
+
ca-certificates \
|
19 |
+
gnupg \
|
20 |
+
git \
|
21 |
+
python3-dev \
|
22 |
+
build-essential \
|
23 |
+
pkg-config \
|
24 |
+
portaudio19-dev \
|
25 |
+
libsdl-pango-dev \
|
26 |
+
libcairo2-dev \
|
27 |
+
libpango1.0-dev \
|
28 |
+
&& apt-get clean \
|
29 |
+
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
30 |
+
|
31 |
+
# Install TinyTeX with error handling and cleanup
|
32 |
+
RUN wget -qO- "https://yihui.org/tinytex/install-bin-unix.sh" | sh \
|
33 |
+
&& ~/.TinyTeX/bin/*/tlmgr path add \
|
34 |
+
&& ~/.TinyTeX/bin/*/tlmgr install \
|
35 |
+
amsmath babel-english cbfonts-fd cm-super count1to ctex \
|
36 |
+
doublestroke dvisvgm everysel fontspec frcursive fundus-calligra \
|
37 |
+
gnu-freefont jknapltx latex-bin mathastext microtype multitoc \
|
38 |
+
physics preview prelim2e ragged2e relsize rsfs setspace \
|
39 |
+
standalone tipa wasy wasysym xcolor xetex xkeyval \
|
40 |
+
&& rm -rf ~/.TinyTeX/texmf-var/web2c/tlmgr.log* \
|
41 |
+
&& rm -rf ~/.TinyTeX/texmf-var/web2c/tlmgr-commands.log* \
|
42 |
+
&& find ~/.TinyTeX -name "*.log" -delete \
|
43 |
+
&& find ~/.TinyTeX -name "*.aux" -delete
|
44 |
+
|
45 |
+
# Copy and install Python dependencies
|
46 |
+
COPY requirements.txt .
|
47 |
+
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt \
|
48 |
+
&& find /install -name "*.pyc" -delete \
|
49 |
+
&& find /install -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
|
50 |
+
|
51 |
+
# Download models with checksums and error handling
|
52 |
+
RUN mkdir -p /models \
|
53 |
+
&& cd /models \
|
54 |
+
&& wget --progress=dot:giga -O kokoro-v0_19.onnx \
|
55 |
+
"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx" \
|
56 |
+
&& wget --progress=dot:giga -O voices.bin \
|
57 |
+
"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin" \
|
58 |
+
&& ls -la /models
|
59 |
+
|
60 |
+
# Stage 2: Runtime
|
61 |
+
FROM python:3.12-slim AS runtime
|
62 |
+
|
63 |
+
# Create non-root user for security
|
64 |
+
RUN groupadd -r appuser && useradd -r -g appuser -d /app -s /sbin/nologin appuser
|
65 |
+
|
66 |
+
WORKDIR /app
|
67 |
+
|
68 |
+
# Set environment variables
|
69 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
70 |
+
PYTHONUNBUFFERED=1 \
|
71 |
+
PYTHONPATH=/app \
|
72 |
+
PATH="/root/.TinyTeX/bin/x86_64-linux:$PATH"
|
73 |
+
|
74 |
+
# Install runtime dependencies
|
75 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
76 |
+
portaudio19-dev \
|
77 |
+
libasound2-dev \
|
78 |
+
libsdl-pango-dev \
|
79 |
+
libcairo2-dev \
|
80 |
+
libpango1.0-dev \
|
81 |
+
sox \
|
82 |
+
ffmpeg \
|
83 |
+
tini \
|
84 |
+
&& apt-get clean \
|
85 |
+
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
86 |
+
|
87 |
+
# Copy TinyTeX from builder stage
|
88 |
+
COPY --from=builder /root/.TinyTeX /root/.TinyTeX
|
89 |
+
|
90 |
+
# Copy Python packages from builder stage
|
91 |
+
COPY --from=builder /install /usr/local
|
92 |
+
|
93 |
+
# Copy models from builder stage
|
94 |
+
COPY --from=builder /models /app/models
|
95 |
+
|
96 |
+
# Copy application files (be more selective to reduce layer size)
|
97 |
+
COPY --chown=appuser:appuser .env gradio_app.py ./
|
98 |
+
COPY --chown=appuser:appuser src/ ./src/
|
99 |
+
|
100 |
+
# Create output directory with proper permissions
|
101 |
+
RUN mkdir -p output \
|
102 |
+
&& chown -R appuser:appuser /app
|
103 |
+
|
104 |
+
# Switch to non-root user
|
105 |
+
USER appuser
|
106 |
+
|
107 |
+
# Add labels for better maintainability
|
108 |
+
LABEL maintainer="[email protected]" \
|
109 |
+
version="1.0" \
|
110 |
+
description="Multi-stage Docker image for ML application"
|
111 |
+
|
112 |
+
# Expose port
|
113 |
+
EXPOSE 7860
|
114 |
+
|
115 |
+
# Use tini as PID 1 for proper signal handling
|
116 |
+
ENTRYPOINT ["tini", "--"]
|
117 |
+
|
118 |
+
# Improved health check with more specific validation
|
119 |
+
HEALTHCHECK --interval=30s --timeout=15s --start-period=60s --retries=3 \
|
120 |
+
CMD python -c "import sys; import src; import manim; \
|
121 |
+
import requests; \
|
122 |
+
r = requests.get('http://localhost:7860/health', timeout=10); \
|
123 |
+
sys.exit(0 if r.status_code == 200 else 1)" || exit 1
|
124 |
+
|
125 |
+
# Start the application
|
126 |
+
CMD ["python", "gradio_app.py"]
|
evaluate.py
ADDED
@@ -0,0 +1,474 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import argparse
|
4 |
+
import tempfile
|
5 |
+
from typing import Dict, List, Union
|
6 |
+
from datetime import datetime
|
7 |
+
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
from moviepy import VideoFileClip
|
10 |
+
|
11 |
+
from mllm_tools.litellm import LiteLLMWrapper
|
12 |
+
from mllm_tools.gemini import GeminiWrapper
|
13 |
+
from eval_suite.utils import calculate_geometric_mean
|
14 |
+
from eval_suite.text_utils import parse_srt_to_text, fix_transcript, evaluate_text
|
15 |
+
from eval_suite.video_utils import evaluate_video_chunk_new
|
16 |
+
from eval_suite.image_utils import evaluate_sampled_images
|
17 |
+
|
18 |
+
load_dotenv()
|
19 |
+
|
20 |
+
with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "src", "utils", "allowed_models.json")) as f:
|
21 |
+
ALLOWED_MODELS = json.load(f)["allowed_models"]
|
22 |
+
|
23 |
+
|
24 |
+
def combine_results(output_folder: str, combined_file: str, results: Dict[str, Dict]) -> None:
|
25 |
+
"""
|
26 |
+
Combine all evaluation results into a single file.
|
27 |
+
|
28 |
+
Args:
|
29 |
+
output_folder (str): Directory to store the combined file.
|
30 |
+
combined_file (str): Name of the combined file.
|
31 |
+
results (Dict[str, Dict]): Dictionary of evaluation results with file names as keys.
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
None
|
35 |
+
"""
|
36 |
+
combined_path = os.path.join(output_folder, combined_file)
|
37 |
+
with open(combined_path, 'w') as output_file:
|
38 |
+
json.dump(results, output_file, indent=4)
|
39 |
+
|
40 |
+
|
41 |
+
def save_individual_result(output_folder: str, file_name: str, result: Dict) -> None:
|
42 |
+
"""
|
43 |
+
Save individual evaluation result to a file.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
output_folder (str): Directory to store the evaluation file.
|
47 |
+
file_name (str): Name of the file.
|
48 |
+
result (Dict): Evaluation result.
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
None
|
52 |
+
"""
|
53 |
+
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
|
54 |
+
result_file = f"evaluation_{file_name}_{current_time}.json"
|
55 |
+
os.makedirs(output_folder, exist_ok=True)
|
56 |
+
result_path = os.path.join(output_folder, result_file)
|
57 |
+
with open(result_path, 'w') as output_file:
|
58 |
+
json.dump(result, output_file, indent=4)
|
59 |
+
|
60 |
+
|
61 |
+
def evaluate_text_file(model, transcript_path, retry_limit):
|
62 |
+
"""
|
63 |
+
Evaluate a text file using the provided model.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
model: The model to use for evaluation.
|
67 |
+
transcript_path (str): Path to the transcript file (.srt or .txt).
|
68 |
+
retry_limit (int): Number of retry attempts for evaluation.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
Dict or None: Evaluation results if successful, None if file format unsupported.
|
72 |
+
"""
|
73 |
+
if not transcript_path.endswith(('.srt', '.txt')):
|
74 |
+
print(f"Skipping {transcript_path}: Unsupported file format for text evaluation.")
|
75 |
+
return None
|
76 |
+
|
77 |
+
if transcript_path.endswith(".srt"):
|
78 |
+
transcript = parse_srt_to_text(transcript_path)
|
79 |
+
elif transcript_path.endswith(".txt"):
|
80 |
+
with open(transcript_path) as f:
|
81 |
+
transcript = f.read().strip()
|
82 |
+
else:
|
83 |
+
raise ValueError("Unrecognized transcript file format.")
|
84 |
+
|
85 |
+
capital_letter_proportion = sum(1 for c in transcript if c.isupper()) / sum(1 for c in transcript if c.isalpha())
|
86 |
+
if capital_letter_proportion < 0.01:
|
87 |
+
transcript = fix_transcript(model, transcript)
|
88 |
+
|
89 |
+
print(f"Performing text evaluation: {os.path.basename(transcript_path)}")
|
90 |
+
result = evaluate_text(model, transcript, retry_limit)
|
91 |
+
return result
|
92 |
+
|
93 |
+
|
94 |
+
def evaluate_video_file(model, video_path, transcript_path, description_path, target_fps=None, output_folder=None):
|
95 |
+
"""
|
96 |
+
Evaluate a video file using the provided model.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
model: The model to use for evaluation.
|
100 |
+
video_path (str): Path to the video file.
|
101 |
+
transcript_path (str): Path to the transcript file.
|
102 |
+
description_path (str): Path to the description file.
|
103 |
+
target_fps (int, optional): Target frames per second for video processing.
|
104 |
+
output_folder (str, optional): Directory to store output files.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
Dict or None: Evaluation results if successful, None if file format unsupported.
|
108 |
+
"""
|
109 |
+
if not video_path.endswith(('.mp4', '.mkv')):
|
110 |
+
print(f"Skipping {video_path}: Unsupported file format for video evaluation.")
|
111 |
+
return None
|
112 |
+
|
113 |
+
moviepy_temp_dir = os.path.join(output_folder, "moviepy_temp")
|
114 |
+
|
115 |
+
# Chunking
|
116 |
+
num_chunks = 10
|
117 |
+
with VideoFileClip(video_path) as clip:
|
118 |
+
duration = clip.duration
|
119 |
+
chunk_duration = duration / num_chunks
|
120 |
+
results = []
|
121 |
+
|
122 |
+
# Create a temporary directory in the output_folder
|
123 |
+
temp_dir_parent = output_folder or os.getcwd()
|
124 |
+
with tempfile.TemporaryDirectory(dir=temp_dir_parent) as temp_dir:
|
125 |
+
for i in range(10):
|
126 |
+
start = i * chunk_duration
|
127 |
+
end = min(start + chunk_duration, duration)
|
128 |
+
chunk = clip.subclipped(start, end)
|
129 |
+
chunk_path = os.path.join(temp_dir, f"chunk_{i+1}.mp4")
|
130 |
+
# Explicitly set the temp_audiofile path with matching codec
|
131 |
+
temp_audiofile = os.path.join(moviepy_temp_dir, f"temp_audio_chunk_{i+1}.m4a")
|
132 |
+
chunk.write_videofile(
|
133 |
+
chunk_path,
|
134 |
+
codec="libx264",
|
135 |
+
audio_codec="aac",
|
136 |
+
temp_audiofile=temp_audiofile,
|
137 |
+
audio_bitrate="192k",
|
138 |
+
preset="ultrafast", # Speed up encoding
|
139 |
+
logger=None
|
140 |
+
)
|
141 |
+
# Create processed videos folder inside output_folder
|
142 |
+
processed_videos_dir = os.path.join(output_folder, "processed_videos")
|
143 |
+
save_path = os.path.join(processed_videos_dir, f"processed_chunk_{i+1}.mp4")
|
144 |
+
result = evaluate_video_chunk_new(
|
145 |
+
model,
|
146 |
+
chunk_path,
|
147 |
+
transcript_path,
|
148 |
+
description_path,
|
149 |
+
target_fps=target_fps,
|
150 |
+
save_processed_video=save_path
|
151 |
+
)
|
152 |
+
results.append(result)
|
153 |
+
|
154 |
+
score_dict = {}
|
155 |
+
for key in results[0]["evaluation"].keys():
|
156 |
+
score_dict[key] = []
|
157 |
+
for result in results:
|
158 |
+
score_dict[key].append(result["evaluation"][key]["score"])
|
159 |
+
|
160 |
+
evaluation = {}
|
161 |
+
for key, scores in score_dict.items():
|
162 |
+
evaluation[key] = {"score": calculate_geometric_mean(scores)}
|
163 |
+
|
164 |
+
result_json = {
|
165 |
+
"evaluation": evaluation,
|
166 |
+
"video_chunks": results
|
167 |
+
}
|
168 |
+
return result_json
|
169 |
+
|
170 |
+
|
171 |
+
def extract_scores(data: Union[Dict, List]) -> List[int]:
|
172 |
+
"""
|
173 |
+
Extract all score values from a nested dictionary or list structure.
|
174 |
+
|
175 |
+
Args:
|
176 |
+
data (Union[Dict, List]): The data structure to extract scores from.
|
177 |
+
|
178 |
+
Returns:
|
179 |
+
List[int]: List of extracted score values.
|
180 |
+
"""
|
181 |
+
scores = []
|
182 |
+
if isinstance(data, dict):
|
183 |
+
for key, value in data.items():
|
184 |
+
if "chunks" in key:
|
185 |
+
continue
|
186 |
+
elif isinstance(value, dict) or isinstance(value, list):
|
187 |
+
scores.extend(extract_scores(value))
|
188 |
+
elif key == 'score':
|
189 |
+
scores.append(value)
|
190 |
+
elif isinstance(data, list):
|
191 |
+
for item in data:
|
192 |
+
scores.extend(extract_scores(item))
|
193 |
+
return scores
|
194 |
+
|
195 |
+
|
196 |
+
def calculate_overall_score(result: Dict) -> float:
|
197 |
+
"""
|
198 |
+
Calculate the overall score from evaluation results.
|
199 |
+
|
200 |
+
Args:
|
201 |
+
result (Dict): Dictionary containing evaluation results.
|
202 |
+
|
203 |
+
Returns:
|
204 |
+
float: The calculated overall score.
|
205 |
+
"""
|
206 |
+
scores = extract_scores(result)
|
207 |
+
overall_score = calculate_geometric_mean(scores)
|
208 |
+
return overall_score
|
209 |
+
|
210 |
+
|
211 |
+
def process_topic_name(topic_name: str) -> str:
|
212 |
+
"""
|
213 |
+
Process a topic name by capitalizing words and handling special characters.
|
214 |
+
|
215 |
+
Args:
|
216 |
+
topic_name (str): The topic name to process.
|
217 |
+
|
218 |
+
Returns:
|
219 |
+
str: The processed topic name.
|
220 |
+
"""
|
221 |
+
words = topic_name.replace("_s_", "'s_").split("_")
|
222 |
+
return " ".join([word.capitalize() for word in words])
|
223 |
+
|
224 |
+
|
225 |
+
def merge_dicts(dict1: dict, dict2: dict) -> dict:
|
226 |
+
"""
|
227 |
+
Recursively merge two dictionaries.
|
228 |
+
|
229 |
+
Args:
|
230 |
+
dict1 (dict): First dictionary.
|
231 |
+
dict2 (dict): Second dictionary.
|
232 |
+
|
233 |
+
Returns:
|
234 |
+
dict: Merged dictionary.
|
235 |
+
"""
|
236 |
+
merged = dict1.copy()
|
237 |
+
for key, value in dict2.items():
|
238 |
+
if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
|
239 |
+
merged[key] = merge_dicts(merged[key], value)
|
240 |
+
else:
|
241 |
+
merged[key] = value
|
242 |
+
return merged
|
243 |
+
|
244 |
+
|
245 |
+
def process_theorem(models, file_path: str, eval_type: str, retry_limit: int,
|
246 |
+
target_fps: int = None, use_parent_folder_as_topic: bool = False,
|
247 |
+
output_folder: str = None) -> tuple[str, dict]:
|
248 |
+
"""
|
249 |
+
Process a theorem file or directory for evaluation.
|
250 |
+
|
251 |
+
Args:
|
252 |
+
models: Dictionary of models for different evaluation types.
|
253 |
+
file_path (str): Path to the file or directory to evaluate.
|
254 |
+
eval_type (str): Type of evaluation to perform.
|
255 |
+
retry_limit (int): Number of retry attempts.
|
256 |
+
target_fps (int, optional): Target frames per second for video processing.
|
257 |
+
use_parent_folder_as_topic (bool, optional): Use parent folder name as topic.
|
258 |
+
output_folder (str, optional): Directory to store output files.
|
259 |
+
|
260 |
+
Returns:
|
261 |
+
tuple[str, dict]: Tuple of file name and evaluation results.
|
262 |
+
"""
|
263 |
+
ext_map = {
|
264 |
+
'text': ('.txt', '.srt'),
|
265 |
+
'video': ('.mp4', '.mkv')
|
266 |
+
}
|
267 |
+
|
268 |
+
# Handle single file evaluation
|
269 |
+
if os.path.isfile(file_path):
|
270 |
+
file_ext = os.path.splitext(file_path)[1].lower()
|
271 |
+
file_name = os.path.basename(file_path)
|
272 |
+
|
273 |
+
if eval_type == "text" and file_ext in ext_map['text']:
|
274 |
+
return file_name, evaluate_text_file(models['text'], file_path, retry_limit)
|
275 |
+
elif eval_type == "video" and file_ext in ext_map['video']:
|
276 |
+
if use_parent_folder_as_topic:
|
277 |
+
topic_name = os.path.basename(os.path.dirname(file_path))
|
278 |
+
else:
|
279 |
+
topic_name = None
|
280 |
+
topic_name = process_topic_name(topic_name)
|
281 |
+
return file_name, evaluate_video_file(models['video'], file_path, None, topic_name, target_fps, output_folder)
|
282 |
+
elif eval_type == "image" and file_ext in ext_map['video']:
|
283 |
+
if use_parent_folder_as_topic:
|
284 |
+
topic_name = os.path.basename(os.path.dirname(file_path))
|
285 |
+
else:
|
286 |
+
topic_name = None
|
287 |
+
topic_name = process_topic_name(topic_name)
|
288 |
+
return file_name, evaluate_sampled_images(models['image'], file_path, topic_name, num_chunks=10, output_folder=output_folder)
|
289 |
+
elif eval_type == "all":
|
290 |
+
raise ValueError("Evaluation type 'all' is not supported for a single file. Try passing a folder with both a video and a subtitle file.")
|
291 |
+
else:
|
292 |
+
raise ValueError(f"File type of {file_path} does not match evaluation type {eval_type!r}")
|
293 |
+
|
294 |
+
# Handle directory evaluation
|
295 |
+
theorem_dir = file_path
|
296 |
+
all_files = os.listdir(theorem_dir)
|
297 |
+
|
298 |
+
# Look for transcript files, prioritizing .srt over .txt if both exist
|
299 |
+
transcript_file_candidates = [f for f in all_files if f.endswith(ext_map['text']) and not f.endswith('_scene_outline.txt')]
|
300 |
+
srt_files = [f for f in transcript_file_candidates if f.endswith('.srt')]
|
301 |
+
txt_files = [f for f in transcript_file_candidates if f.endswith('.txt')]
|
302 |
+
|
303 |
+
transcript_path = None
|
304 |
+
if srt_files:
|
305 |
+
transcript_path = os.path.join(theorem_dir, srt_files[0])
|
306 |
+
elif txt_files:
|
307 |
+
transcript_path = os.path.join(theorem_dir, txt_files[0])
|
308 |
+
|
309 |
+
video_file_candidates = [f for f in all_files if f.endswith(ext_map['video'])]
|
310 |
+
video_path = os.path.join(theorem_dir, video_file_candidates[0]) if len(video_file_candidates) == 1 else None
|
311 |
+
|
312 |
+
topic_name = os.path.basename(theorem_dir)
|
313 |
+
topic_name = process_topic_name(topic_name)
|
314 |
+
|
315 |
+
if not video_path:
|
316 |
+
print(f"Skipping {theorem_dir}: No video file found")
|
317 |
+
return None, None
|
318 |
+
|
319 |
+
text_result = video_result = image_result = None
|
320 |
+
if eval_type == "text" or eval_type == "all":
|
321 |
+
if transcript_path is None:
|
322 |
+
print(f"Warning: No suitable transcript file found in {theorem_dir}")
|
323 |
+
else:
|
324 |
+
text_result = evaluate_text_file(models['text'], transcript_path, retry_limit)
|
325 |
+
if eval_type == "video" or eval_type == "all":
|
326 |
+
assert video_path is not None, f"Expected 1 video file, got {len(video_file_candidates)} for {theorem_dir}"
|
327 |
+
video_result = evaluate_video_file(models['video'], video_path, transcript_path, topic_name, target_fps, output_folder)
|
328 |
+
if eval_type == "image" or eval_type == "all":
|
329 |
+
assert video_path is not None, f"Expected 1 video file, got {len(video_file_candidates)} for {theorem_dir}"
|
330 |
+
image_result = evaluate_sampled_images(models['image'], video_path, topic_name, num_chunks=10, output_folder=output_folder)
|
331 |
+
|
332 |
+
if eval_type == "all":
|
333 |
+
result = {}
|
334 |
+
if text_result:
|
335 |
+
result = merge_dicts(result, text_result)
|
336 |
+
if video_result:
|
337 |
+
result = merge_dicts(result, video_result)
|
338 |
+
if image_result:
|
339 |
+
result = merge_dicts(result, image_result)
|
340 |
+
if result:
|
341 |
+
result["evaluation"]["overall_score"] = calculate_overall_score(result)
|
342 |
+
else:
|
343 |
+
result = text_result if eval_type == "text" else video_result if eval_type == "video" else image_result if eval_type == "image" else None
|
344 |
+
|
345 |
+
file_name = os.path.basename(theorem_dir)
|
346 |
+
return file_name, result
|
347 |
+
|
348 |
+
|
349 |
+
def main():
|
350 |
+
"""
|
351 |
+
Main function to run the evaluation script.
|
352 |
+
|
353 |
+
Parses command line arguments and orchestrates the evaluation process
|
354 |
+
for text, video, and image content using specified AI models.
|
355 |
+
"""
|
356 |
+
parser = argparse.ArgumentParser(description='Automatic evaluation of theorem explanation videos with LLMs')
|
357 |
+
parser.add_argument('--model_text', type=str,
|
358 |
+
choices=ALLOWED_MODELS,
|
359 |
+
default='azure/gpt-4o',
|
360 |
+
help='Select the AI model to use for text evaluation')
|
361 |
+
parser.add_argument('--model_video', type=str,
|
362 |
+
choices=['gemini/gemini-1.5-pro-002',
|
363 |
+
'gemini/gemini-2.0-flash-exp',
|
364 |
+
'gemini/gemini-2.0-pro-exp-02-05'],
|
365 |
+
default='gemini/gemini-1.5-pro-002',
|
366 |
+
help='Select the AI model to use for video evaluation')
|
367 |
+
parser.add_argument('--model_image', type=str,
|
368 |
+
choices=ALLOWED_MODELS,
|
369 |
+
default='azure/gpt-4o',
|
370 |
+
help='Select the AI model to use for image evaluation')
|
371 |
+
parser.add_argument('--eval_type', type=str, choices=['text', 'video', 'image', 'all'], default='all', help='Type of evaluation to perform')
|
372 |
+
parser.add_argument('--file_path', type=str, help='Path to a file or a theorem folder', required=True)
|
373 |
+
parser.add_argument('--output_folder', type=str, help='Directory to store the evaluation files', required=True)
|
374 |
+
parser.add_argument('--retry_limit', type=int, default=3, help='Number of retry attempts for each inference')
|
375 |
+
parser.add_argument('--combine', action='store_true', help='Combine all results into a single JSON file')
|
376 |
+
parser.add_argument('--bulk_evaluate', action='store_true', help='Evaluate a folder of theorems together', default=False)
|
377 |
+
parser.add_argument('--target_fps', type=int, help='Target FPS for video processing. If not set, original video FPS will be used', required=False)
|
378 |
+
parser.add_argument('--use_parent_folder_as_topic', action='store_true', help='Use parent folder name as topic name for single file evaluation', default=True)
|
379 |
+
parser.add_argument('--max_workers', type=int, default=4, help='Maximum number of concurrent workers for parallel processing')
|
380 |
+
|
381 |
+
args = parser.parse_args()
|
382 |
+
|
383 |
+
# Initialize separate models
|
384 |
+
text_model = LiteLLMWrapper(
|
385 |
+
model_name=args.model_text,
|
386 |
+
temperature=0.0,
|
387 |
+
)
|
388 |
+
video_model = GeminiWrapper(
|
389 |
+
model_name=args.model_video,
|
390 |
+
temperature=0.0,
|
391 |
+
)
|
392 |
+
image_model = LiteLLMWrapper(
|
393 |
+
model_name=args.model_image,
|
394 |
+
temperature=0.0,
|
395 |
+
)
|
396 |
+
|
397 |
+
models = {
|
398 |
+
'text': text_model,
|
399 |
+
'video': video_model,
|
400 |
+
'image': image_model
|
401 |
+
}
|
402 |
+
|
403 |
+
theorem_dirs = []
|
404 |
+
if args.bulk_evaluate:
|
405 |
+
assert os.path.isdir(args.file_path), "File path must be a folder for --bulk_evaluate"
|
406 |
+
for root, dirnames, _ in os.walk(args.file_path):
|
407 |
+
if not any(f.endswith(".mp4") for f in os.listdir(root)):
|
408 |
+
continue
|
409 |
+
|
410 |
+
theorem_dirs.append(root)
|
411 |
+
elif os.path.isdir(args.file_path):
|
412 |
+
assert any(f.endswith(".mp4") for f in os.listdir(args.file_path)), "The provided folder must contain a video file"
|
413 |
+
|
414 |
+
theorem_dirs.append(args.file_path)
|
415 |
+
|
416 |
+
# Create output directory and its temp subdirectories if it doesn't exist
|
417 |
+
os.makedirs(args.output_folder, exist_ok=True)
|
418 |
+
moviepy_temp_dir = os.path.join(args.output_folder, "moviepy_temp")
|
419 |
+
os.makedirs(moviepy_temp_dir, exist_ok=True)
|
420 |
+
VideoFileClip.DEFAULT_TEMP_DIR = moviepy_temp_dir
|
421 |
+
|
422 |
+
processed_videos_dir = os.path.join(args.output_folder, "processed_videos")
|
423 |
+
os.makedirs(processed_videos_dir, exist_ok=True)
|
424 |
+
|
425 |
+
results = {}
|
426 |
+
if theorem_dirs:
|
427 |
+
for theorem_dir in theorem_dirs:
|
428 |
+
file_name, result = process_theorem(
|
429 |
+
models,
|
430 |
+
theorem_dir,
|
431 |
+
args.eval_type,
|
432 |
+
args.retry_limit,
|
433 |
+
args.target_fps,
|
434 |
+
args.use_parent_folder_as_topic,
|
435 |
+
args.output_folder
|
436 |
+
)
|
437 |
+
|
438 |
+
if result is not None:
|
439 |
+
results[file_name] = result
|
440 |
+
|
441 |
+
if not args.combine:
|
442 |
+
save_individual_result(args.output_folder, file_name, result)
|
443 |
+
else:
|
444 |
+
file_name, result = process_theorem(
|
445 |
+
models,
|
446 |
+
args.file_path,
|
447 |
+
args.eval_type,
|
448 |
+
args.retry_limit,
|
449 |
+
args.target_fps,
|
450 |
+
args.use_parent_folder_as_topic,
|
451 |
+
args.output_folder
|
452 |
+
)
|
453 |
+
|
454 |
+
if result is not None:
|
455 |
+
results[file_name] = result
|
456 |
+
|
457 |
+
if not args.combine:
|
458 |
+
save_individual_result(args.output_folder, file_name, result)
|
459 |
+
|
460 |
+
if args.combine:
|
461 |
+
if len(results) > 1:
|
462 |
+
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
|
463 |
+
combined_file = f"evaluation_{current_time}.json"
|
464 |
+
combine_results(args.output_folder, combined_file, results)
|
465 |
+
print("Combining results completed.")
|
466 |
+
else:
|
467 |
+
for file_name, result in results.items():
|
468 |
+
save_individual_result(args.output_folder, file_name, result)
|
469 |
+
|
470 |
+
os.rmdir(moviepy_temp_dir)
|
471 |
+
|
472 |
+
|
473 |
+
if __name__ == "__main__":
|
474 |
+
main()
|
generate_video.py
ADDED
@@ -0,0 +1,990 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import asyncio
|
4 |
+
import uuid
|
5 |
+
from typing import Union, List, Dict, Optional, Protocol
|
6 |
+
from dataclasses import dataclass
|
7 |
+
from abc import ABC, abstractmethod
|
8 |
+
import argparse
|
9 |
+
import re
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
|
12 |
+
from mllm_tools.litellm import LiteLLMWrapper
|
13 |
+
from mllm_tools.openrouter import OpenRouterWrapper
|
14 |
+
from src.core.video_planner import EnhancedVideoPlanner
|
15 |
+
from src.core.code_generator import CodeGenerator # Use existing CodeGenerator
|
16 |
+
from src.core.video_renderer import VideoRenderer # Use existing VideoRenderer
|
17 |
+
from src.utils.utils import extract_xml
|
18 |
+
from src.config.config import Config
|
19 |
+
from task_generator import get_banned_reasonings
|
20 |
+
from task_generator.prompts_raw import (_code_font_size, _code_disable, _code_limit, _prompt_manim_cheatsheet)
|
21 |
+
|
22 |
+
# Load configuration
|
23 |
+
load_dotenv(override=True)
|
24 |
+
|
25 |
+
# Load allowed models
|
26 |
+
allowed_models_path = os.path.join(os.path.dirname(__file__), 'src', 'utils', 'allowed_models.json')
|
27 |
+
with open(allowed_models_path, 'r') as f:
|
28 |
+
allowed_models_data = json.load(f)
|
29 |
+
allowed_models = allowed_models_data.get("allowed_models", [])
|
30 |
+
|
31 |
+
@dataclass
|
32 |
+
class VideoGenerationConfig:
|
33 |
+
"""Configuration for video generation pipeline."""
|
34 |
+
planner_model: str
|
35 |
+
scene_model: Optional[str] = None
|
36 |
+
helper_model: Optional[str] = None
|
37 |
+
output_dir: str = "output"
|
38 |
+
verbose: bool = False
|
39 |
+
use_rag: bool = False
|
40 |
+
use_context_learning: bool = False
|
41 |
+
context_learning_path: str = "data/context_learning"
|
42 |
+
chroma_db_path: str = "data/rag/chroma_db"
|
43 |
+
manim_docs_path: str = "data/rag/manim_docs"
|
44 |
+
embedding_model: str = "hf:ibm-granite/granite-embedding-30m-english"
|
45 |
+
use_visual_fix_code: bool = False
|
46 |
+
use_langfuse: bool = True
|
47 |
+
max_scene_concurrency: int = 5
|
48 |
+
max_topic_concurrency: int = 1
|
49 |
+
max_retries: int = 5
|
50 |
+
|
51 |
+
# Renderer optimizations
|
52 |
+
enable_caching: bool = True
|
53 |
+
default_quality: str = "medium"
|
54 |
+
use_gpu_acceleration: bool = False
|
55 |
+
preview_mode: bool = False
|
56 |
+
max_concurrent_renders: int = 4
|
57 |
+
|
58 |
+
# Protocols for dependency injection (Interface Segregation Principle)
|
59 |
+
class ModelProvider(Protocol):
|
60 |
+
"""Protocol for AI model providers."""
|
61 |
+
def __call__(self, prompt: str, **kwargs) -> str: ...
|
62 |
+
|
63 |
+
class PlannerInterface(Protocol):
|
64 |
+
"""Interface for video planners."""
|
65 |
+
async def generate_scene_outline(self, topic: str, description: str, session_id: str) -> str: ...
|
66 |
+
async def generate_scene_implementation_concurrently_enhanced(
|
67 |
+
self, topic: str, description: str, plan: str, session_id: str
|
68 |
+
) -> List[str]: ...
|
69 |
+
|
70 |
+
class CodeGeneratorInterface(Protocol):
|
71 |
+
"""Interface for code generators."""
|
72 |
+
def generate_manim_code(self, **kwargs) -> tuple: ...
|
73 |
+
def fix_code_errors(self, **kwargs) -> tuple: ...
|
74 |
+
def visual_self_reflection(self, **kwargs) -> tuple: ...
|
75 |
+
|
76 |
+
class RendererInterface(Protocol):
|
77 |
+
"""Interface for video renderers."""
|
78 |
+
async def render_scene_optimized(self, **kwargs) -> tuple: ...
|
79 |
+
async def combine_videos_optimized(self, topic: str, **kwargs) -> str: ...
|
80 |
+
|
81 |
+
# Factory for creating components (Factory Pattern)
|
82 |
+
class ComponentFactory:
|
83 |
+
"""Factory for creating video generation components."""
|
84 |
+
|
85 |
+
@staticmethod
|
86 |
+
def create_model(model_name: str, config: VideoGenerationConfig) -> ModelProvider:
|
87 |
+
"""Create AI model wrapper."""
|
88 |
+
# Use OpenRouter wrapper for OpenRouter models
|
89 |
+
if model_name.startswith('openrouter/'):
|
90 |
+
return OpenRouterWrapper(
|
91 |
+
model_name=model_name,
|
92 |
+
temperature=0.7,
|
93 |
+
print_cost=True,
|
94 |
+
verbose=config.verbose,
|
95 |
+
use_langfuse=config.use_langfuse
|
96 |
+
)
|
97 |
+
else:
|
98 |
+
# Use LiteLLM wrapper for other models
|
99 |
+
return LiteLLMWrapper(
|
100 |
+
model_name=model_name,
|
101 |
+
temperature=0.7,
|
102 |
+
print_cost=True,
|
103 |
+
verbose=config.verbose,
|
104 |
+
use_langfuse=config.use_langfuse
|
105 |
+
)
|
106 |
+
|
107 |
+
@staticmethod
|
108 |
+
def create_planner(planner_model: ModelProvider, helper_model: ModelProvider,
|
109 |
+
config: VideoGenerationConfig, session_id: str) -> PlannerInterface:
|
110 |
+
"""Create video planner with enhanced capabilities."""
|
111 |
+
return EnhancedVideoPlanner(
|
112 |
+
planner_model=planner_model,
|
113 |
+
helper_model=helper_model,
|
114 |
+
output_dir=config.output_dir,
|
115 |
+
print_response=config.verbose,
|
116 |
+
use_context_learning=config.use_context_learning,
|
117 |
+
context_learning_path=config.context_learning_path,
|
118 |
+
use_rag=config.use_rag,
|
119 |
+
session_id=session_id,
|
120 |
+
chroma_db_path=config.chroma_db_path,
|
121 |
+
manim_docs_path=config.manim_docs_path,
|
122 |
+
embedding_model=config.embedding_model,
|
123 |
+
use_langfuse=config.use_langfuse,
|
124 |
+
max_scene_concurrency=config.max_scene_concurrency,
|
125 |
+
max_step_concurrency=3,
|
126 |
+
enable_caching=config.enable_caching
|
127 |
+
)
|
128 |
+
|
129 |
+
@staticmethod
|
130 |
+
def create_code_generator(scene_model: ModelProvider, helper_model: ModelProvider,
|
131 |
+
config: VideoGenerationConfig, session_id: str) -> CodeGeneratorInterface:
|
132 |
+
"""Create code generator with existing implementation."""
|
133 |
+
return CodeGenerator( # Use existing CodeGenerator
|
134 |
+
scene_model=scene_model,
|
135 |
+
helper_model=helper_model,
|
136 |
+
output_dir=config.output_dir,
|
137 |
+
print_response=config.verbose,
|
138 |
+
use_rag=config.use_rag,
|
139 |
+
use_context_learning=config.use_context_learning,
|
140 |
+
context_learning_path=config.context_learning_path,
|
141 |
+
chroma_db_path=config.chroma_db_path,
|
142 |
+
manim_docs_path=config.manim_docs_path,
|
143 |
+
embedding_model=config.embedding_model,
|
144 |
+
use_visual_fix_code=config.use_visual_fix_code,
|
145 |
+
use_langfuse=config.use_langfuse,
|
146 |
+
session_id=session_id
|
147 |
+
)
|
148 |
+
|
149 |
+
@staticmethod
|
150 |
+
def create_renderer(config: VideoGenerationConfig) -> RendererInterface:
|
151 |
+
"""Create video renderer with existing implementation."""
|
152 |
+
return VideoRenderer( # Use existing VideoRenderer
|
153 |
+
output_dir=config.output_dir,
|
154 |
+
print_response=config.verbose,
|
155 |
+
use_visual_fix_code=config.use_visual_fix_code
|
156 |
+
)
|
157 |
+
|
158 |
+
# Enhanced VideoRenderer wrapper to add async methods
|
159 |
+
class AsyncVideoRendererWrapper:
|
160 |
+
"""Wrapper to add async functionality to existing VideoRenderer."""
|
161 |
+
|
162 |
+
def __init__(self, renderer: VideoRenderer, config: VideoGenerationConfig):
|
163 |
+
self.renderer = renderer
|
164 |
+
self.config = config
|
165 |
+
self.render_stats = {'cache_hits': 0, 'total_renders': 0}
|
166 |
+
|
167 |
+
async def render_scene_optimized(self, **kwargs) -> tuple:
|
168 |
+
"""Async wrapper for scene rendering with intelligent error handling."""
|
169 |
+
# Extract parameters
|
170 |
+
code = kwargs.get('code')
|
171 |
+
file_prefix = kwargs.get('file_prefix')
|
172 |
+
curr_scene = kwargs.get('curr_scene')
|
173 |
+
curr_version = kwargs.get('curr_version', 1)
|
174 |
+
code_dir = kwargs.get('code_dir')
|
175 |
+
media_dir = kwargs.get('media_dir')
|
176 |
+
code_generator = kwargs.get('code_generator')
|
177 |
+
scene_implementation = kwargs.get('scene_implementation')
|
178 |
+
description = kwargs.get('description')
|
179 |
+
scene_outline = kwargs.get('scene_outline')
|
180 |
+
scene_trace_id = kwargs.get('scene_trace_id')
|
181 |
+
topic = kwargs.get('topic')
|
182 |
+
session_id = kwargs.get('session_id')
|
183 |
+
|
184 |
+
# Use existing render_scene method with all parameters
|
185 |
+
loop = asyncio.get_event_loop()
|
186 |
+
result = await loop.run_in_executor(
|
187 |
+
None,
|
188 |
+
self.renderer.render_scene,
|
189 |
+
code,
|
190 |
+
file_prefix,
|
191 |
+
curr_scene,
|
192 |
+
curr_version,
|
193 |
+
code_dir,
|
194 |
+
media_dir,
|
195 |
+
False, # use_visual_fix_code
|
196 |
+
None, # visual_self_reflection_func
|
197 |
+
None, # banned_reasonings
|
198 |
+
scene_trace_id,
|
199 |
+
topic,
|
200 |
+
session_id,
|
201 |
+
code_generator,
|
202 |
+
scene_implementation,
|
203 |
+
description,
|
204 |
+
scene_outline
|
205 |
+
)
|
206 |
+
|
207 |
+
self.render_stats['total_renders'] += 1
|
208 |
+
return result
|
209 |
+
|
210 |
+
async def render_multiple_scenes_parallel(self, scene_configs: List[Dict],
|
211 |
+
max_concurrent: int = None) -> List[tuple]:
|
212 |
+
"""Render multiple scenes in parallel."""
|
213 |
+
max_concurrent = max_concurrent or self.config.max_concurrent_renders
|
214 |
+
semaphore = asyncio.Semaphore(max_concurrent)
|
215 |
+
|
216 |
+
async def render_single_scene(config):
|
217 |
+
async with semaphore:
|
218 |
+
return await self.render_scene_optimized(**config)
|
219 |
+
|
220 |
+
print(f"🚀 Starting parallel rendering of {len(scene_configs)} scenes (max concurrent: {max_concurrent})")
|
221 |
+
|
222 |
+
tasks = [render_single_scene(config) for config in scene_configs]
|
223 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
224 |
+
|
225 |
+
successful = sum(1 for r in results if not isinstance(r, Exception))
|
226 |
+
print(f"📊 Render results: {successful}/{len(results)} scenes successful")
|
227 |
+
|
228 |
+
return results
|
229 |
+
|
230 |
+
async def combine_videos_optimized(self, topic: str, **kwargs) -> str:
|
231 |
+
"""Async wrapper for video combination."""
|
232 |
+
loop = asyncio.get_event_loop()
|
233 |
+
return await loop.run_in_executor(
|
234 |
+
None,
|
235 |
+
self.renderer.combine_videos,
|
236 |
+
topic
|
237 |
+
)
|
238 |
+
|
239 |
+
# Service classes (Single Responsibility Principle)
|
240 |
+
class SessionManager:
|
241 |
+
"""Manages session IDs for video generation."""
|
242 |
+
|
243 |
+
def __init__(self, output_dir: str):
|
244 |
+
self.output_dir = output_dir
|
245 |
+
|
246 |
+
def load_or_create_session_id(self) -> str:
|
247 |
+
"""Load existing session ID or create new one."""
|
248 |
+
session_file = os.path.join(self.output_dir, "session_id.txt")
|
249 |
+
|
250 |
+
if os.path.exists(session_file):
|
251 |
+
with open(session_file, 'r') as f:
|
252 |
+
session_id = f.read().strip()
|
253 |
+
print(f"📋 Loaded existing session ID: {session_id}")
|
254 |
+
return session_id
|
255 |
+
|
256 |
+
session_id = str(uuid.uuid4())
|
257 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
258 |
+
with open(session_file, 'w') as f:
|
259 |
+
f.write(session_id)
|
260 |
+
print(f"🆕 Created new session ID: {session_id}")
|
261 |
+
return session_id
|
262 |
+
|
263 |
+
def save_topic_session_id(self, topic: str, session_id: str) -> None:
|
264 |
+
"""Save session ID for specific topic."""
|
265 |
+
file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
|
266 |
+
topic_dir = os.path.join(self.output_dir, file_prefix)
|
267 |
+
os.makedirs(topic_dir, exist_ok=True)
|
268 |
+
|
269 |
+
session_file = os.path.join(topic_dir, "session_id.txt")
|
270 |
+
with open(session_file, 'w') as f:
|
271 |
+
f.write(session_id)
|
272 |
+
|
273 |
+
class SceneAnalyzer:
|
274 |
+
"""Analyzes and manages scene information."""
|
275 |
+
|
276 |
+
def __init__(self, output_dir: str):
|
277 |
+
self.output_dir = output_dir
|
278 |
+
|
279 |
+
def load_implementation_plans(self, topic: str) -> Dict[int, Optional[str]]:
|
280 |
+
"""Load implementation plans for each scene."""
|
281 |
+
file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
|
282 |
+
scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
|
283 |
+
|
284 |
+
if not os.path.exists(scene_outline_path):
|
285 |
+
return {}
|
286 |
+
|
287 |
+
with open(scene_outline_path, "r") as f:
|
288 |
+
scene_outline = f.read()
|
289 |
+
|
290 |
+
scene_outline_content = extract_xml(scene_outline)
|
291 |
+
scene_count = len(re.findall(r'<SCENE_(\d+)>[^<]', scene_outline_content))
|
292 |
+
|
293 |
+
implementation_plans = {}
|
294 |
+
for i in range(1, scene_count + 1):
|
295 |
+
plan_path = os.path.join(
|
296 |
+
self.output_dir, file_prefix, f"scene{i}",
|
297 |
+
f"{file_prefix}_scene{i}_implementation_plan.txt"
|
298 |
+
)
|
299 |
+
if os.path.exists(plan_path):
|
300 |
+
with open(plan_path, "r") as f:
|
301 |
+
implementation_plans[i] = f.read()
|
302 |
+
print(f"📄 Found existing implementation plan for scene {i}")
|
303 |
+
else:
|
304 |
+
implementation_plans[i] = None
|
305 |
+
print(f"❌ Missing implementation plan for scene {i}")
|
306 |
+
|
307 |
+
return implementation_plans
|
308 |
+
|
309 |
+
def analyze_scene_status(self, topic: str) -> Dict:
|
310 |
+
"""Analyze status of all scenes for a topic."""
|
311 |
+
file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
|
312 |
+
|
313 |
+
# Check scene outline
|
314 |
+
scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
|
315 |
+
has_scene_outline = os.path.exists(scene_outline_path)
|
316 |
+
|
317 |
+
num_scenes = 0
|
318 |
+
if has_scene_outline:
|
319 |
+
with open(scene_outline_path, "r") as f:
|
320 |
+
scene_outline = f.read()
|
321 |
+
scene_outline_content = extract_xml(scene_outline)
|
322 |
+
num_scenes = len(re.findall(r'<SCENE_(\d+)>[^<]', scene_outline_content))
|
323 |
+
|
324 |
+
# Analyze each scene
|
325 |
+
scene_status = []
|
326 |
+
implementation_plans = code_files = rendered_scenes = 0
|
327 |
+
|
328 |
+
for i in range(1, num_scenes + 1):
|
329 |
+
scene_dir = os.path.join(self.output_dir, file_prefix, f"scene{i}")
|
330 |
+
|
331 |
+
# Check implementation plan
|
332 |
+
plan_path = os.path.join(scene_dir, f"{file_prefix}_scene{i}_implementation_plan.txt")
|
333 |
+
has_plan = os.path.exists(plan_path)
|
334 |
+
if has_plan:
|
335 |
+
implementation_plans += 1
|
336 |
+
|
337 |
+
# Check code files
|
338 |
+
code_dir = os.path.join(scene_dir, "code")
|
339 |
+
has_code = os.path.exists(code_dir) and any(f.endswith('.py') for f in os.listdir(code_dir))
|
340 |
+
if has_code:
|
341 |
+
code_files += 1
|
342 |
+
|
343 |
+
# Check rendered videos
|
344 |
+
has_render = os.path.exists(os.path.join(scene_dir, "succ_rendered.txt"))
|
345 |
+
if has_render:
|
346 |
+
rendered_scenes += 1
|
347 |
+
|
348 |
+
scene_status.append({
|
349 |
+
'scene_number': i,
|
350 |
+
'has_plan': has_plan,
|
351 |
+
'has_code': has_code,
|
352 |
+
'has_render': has_render
|
353 |
+
})
|
354 |
+
|
355 |
+
# Check combined video
|
356 |
+
combined_video_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_combined.mp4")
|
357 |
+
has_combined_video = os.path.exists(combined_video_path)
|
358 |
+
|
359 |
+
return {
|
360 |
+
'topic': topic,
|
361 |
+
'has_scene_outline': has_scene_outline,
|
362 |
+
'total_scenes': num_scenes,
|
363 |
+
'implementation_plans': implementation_plans,
|
364 |
+
'code_files': code_files,
|
365 |
+
'rendered_scenes': rendered_scenes,
|
366 |
+
'has_combined_video': has_combined_video,
|
367 |
+
'scene_status': scene_status
|
368 |
+
}
|
369 |
+
|
370 |
+
# Scene rendering wrapper for existing render_scene method
|
371 |
+
class SceneRenderingService:
|
372 |
+
"""Service for rendering individual scenes with existing VideoRenderer."""
|
373 |
+
|
374 |
+
def __init__(self, renderer: VideoRenderer, code_generator: CodeGenerator,
|
375 |
+
banned_reasonings: List[str], config: VideoGenerationConfig):
|
376 |
+
self.renderer = renderer
|
377 |
+
self.code_generator = code_generator
|
378 |
+
self.banned_reasonings = banned_reasonings
|
379 |
+
self.config = config
|
380 |
+
|
381 |
+
async def render_scene_with_code_generation(self, topic: str, description: str,
|
382 |
+
scene_outline: str, scene_implementation: str,
|
383 |
+
scene_number: int, file_prefix: str,
|
384 |
+
code_dir: str, media_dir: str,
|
385 |
+
scene_trace_id: str, session_id: str) -> tuple:
|
386 |
+
"""Render a scene with code generation and error handling."""
|
387 |
+
|
388 |
+
print(f"🎬 Processing scene {scene_number} for {topic}")
|
389 |
+
|
390 |
+
try:
|
391 |
+
# Step 1: Generate Manim code
|
392 |
+
print(f"⚡ Generating code for scene {scene_number}")
|
393 |
+
code, _ = self.code_generator.generate_manim_code(
|
394 |
+
topic=topic,
|
395 |
+
description=description,
|
396 |
+
scene_outline=scene_outline,
|
397 |
+
scene_implementation=scene_implementation,
|
398 |
+
scene_number=scene_number,
|
399 |
+
scene_trace_id=scene_trace_id,
|
400 |
+
session_id=session_id
|
401 |
+
)
|
402 |
+
|
403 |
+
# Step 2: Render with intelligent error handling (single attempt - renderer handles retries and fixes)
|
404 |
+
current_version = 1
|
405 |
+
|
406 |
+
print(f"🎞️ Rendering scene {scene_number} with intelligent error handling")
|
407 |
+
|
408 |
+
try:
|
409 |
+
# Use existing render_scene method with enhanced error handling
|
410 |
+
loop = asyncio.get_event_loop()
|
411 |
+
result_code, error = await loop.run_in_executor(
|
412 |
+
None,
|
413 |
+
self.renderer.render_scene,
|
414 |
+
code,
|
415 |
+
file_prefix,
|
416 |
+
scene_number,
|
417 |
+
current_version,
|
418 |
+
code_dir,
|
419 |
+
media_dir,
|
420 |
+
False, # use_visual_fix_code
|
421 |
+
None, # visual_self_reflection_func
|
422 |
+
self.banned_reasonings,
|
423 |
+
scene_trace_id,
|
424 |
+
topic,
|
425 |
+
session_id,
|
426 |
+
self.code_generator, # Pass code generator for intelligent error handling
|
427 |
+
scene_implementation, # Pass implementation for context
|
428 |
+
description, # Pass description for context
|
429 |
+
scene_outline # Pass scene outline for context
|
430 |
+
)
|
431 |
+
|
432 |
+
if error is None:
|
433 |
+
# Success - mark as rendered
|
434 |
+
scene_dir = os.path.join(self.config.output_dir, file_prefix, f"scene{scene_number}")
|
435 |
+
success_file = os.path.join(scene_dir, "succ_rendered.txt")
|
436 |
+
with open(success_file, 'w') as f:
|
437 |
+
f.write(f"Successfully rendered with intelligent error handling")
|
438 |
+
|
439 |
+
print(f"✅ Scene {scene_number} rendered successfully")
|
440 |
+
return result_code, None
|
441 |
+
else:
|
442 |
+
# Error occurred even with intelligent retry/fix attempts
|
443 |
+
print(f"❌ Scene {scene_number} failed after intelligent error handling: {error}")
|
444 |
+
return result_code, error
|
445 |
+
|
446 |
+
except Exception as e:
|
447 |
+
print(f"❌ Exception during scene {scene_number} rendering: {e}")
|
448 |
+
return code, str(e)
|
449 |
+
|
450 |
+
except Exception as e:
|
451 |
+
print(f"❌ Fatal error in scene {scene_number}: {e}")
|
452 |
+
return None, str(e)
|
453 |
+
|
454 |
+
# Main Video Generator (Open/Closed Principle - extensible via composition)
|
455 |
+
class EnhancedVideoGenerator:
|
456 |
+
"""Enhanced video generator following SOLID principles."""
|
457 |
+
|
458 |
+
def __init__(self, config: VideoGenerationConfig):
|
459 |
+
self.config = config
|
460 |
+
self.session_manager = SessionManager(config.output_dir)
|
461 |
+
self.scene_analyzer = SceneAnalyzer(config.output_dir)
|
462 |
+
self.banned_reasonings = get_banned_reasonings()
|
463 |
+
|
464 |
+
# Initialize session
|
465 |
+
self.session_id = self.session_manager.load_or_create_session_id()
|
466 |
+
|
467 |
+
# Create AI models
|
468 |
+
self.planner_model = ComponentFactory.create_model(config.planner_model, config)
|
469 |
+
self.scene_model = ComponentFactory.create_model(
|
470 |
+
config.scene_model or config.planner_model, config
|
471 |
+
)
|
472 |
+
self.helper_model = ComponentFactory.create_model(
|
473 |
+
config.helper_model or config.planner_model, config
|
474 |
+
)
|
475 |
+
|
476 |
+
# Create components using dependency injection
|
477 |
+
self.planner = ComponentFactory.create_planner(
|
478 |
+
self.planner_model, self.helper_model, config, self.session_id
|
479 |
+
)
|
480 |
+
self.code_generator = ComponentFactory.create_code_generator(
|
481 |
+
self.scene_model, self.helper_model, config, self.session_id
|
482 |
+
)
|
483 |
+
|
484 |
+
# Create renderer with async wrapper
|
485 |
+
base_renderer = ComponentFactory.create_renderer(config)
|
486 |
+
self.renderer = AsyncVideoRendererWrapper(base_renderer, config)
|
487 |
+
|
488 |
+
# Create scene rendering service
|
489 |
+
self.scene_service = SceneRenderingService(
|
490 |
+
base_renderer, self.code_generator, self.banned_reasonings, config
|
491 |
+
)
|
492 |
+
|
493 |
+
# Concurrency control
|
494 |
+
self.scene_semaphore = asyncio.Semaphore(config.max_scene_concurrency)
|
495 |
+
|
496 |
+
print(f"🚀 Enhanced VideoGenerator initialized with:")
|
497 |
+
print(f" Planner: {config.planner_model}")
|
498 |
+
print(f" Scene: {config.scene_model or config.planner_model}")
|
499 |
+
print(f" Helper: {config.helper_model or config.planner_model}")
|
500 |
+
print(f" Max Scene Concurrency: {config.max_scene_concurrency}")
|
501 |
+
print(f" Caching: {'✅' if config.enable_caching else '❌'}")
|
502 |
+
print(f" GPU Acceleration: {'✅' if config.use_gpu_acceleration else '❌'}")
|
503 |
+
|
504 |
+
async def generate_scene_outline(self, topic: str, description: str) -> str:
|
505 |
+
"""Generate scene outline for topic."""
|
506 |
+
print(f"📝 Generating scene outline for: {topic}")
|
507 |
+
return await self.planner.generate_scene_outline(topic, description, self.session_id)
|
508 |
+
|
509 |
+
async def generate_video_pipeline(self, topic: str, description: str,
|
510 |
+
only_plan: bool = False,
|
511 |
+
specific_scenes: List[int] = None) -> None:
|
512 |
+
"""Complete video generation pipeline with enhanced performance."""
|
513 |
+
|
514 |
+
print(f"🎬 Starting enhanced video pipeline for: {topic}")
|
515 |
+
self.session_manager.save_topic_session_id(topic, self.session_id)
|
516 |
+
|
517 |
+
file_prefix = re.sub(r'[^a-z0-9_]+', '_', topic.lower())
|
518 |
+
|
519 |
+
# Step 1: Load or generate scene outline
|
520 |
+
scene_outline = await self._load_or_generate_outline(topic, description, file_prefix)
|
521 |
+
|
522 |
+
# Step 2: Generate implementation plans
|
523 |
+
implementation_plans = await self._generate_implementation_plans(
|
524 |
+
topic, description, scene_outline, file_prefix, specific_scenes
|
525 |
+
)
|
526 |
+
|
527 |
+
if only_plan:
|
528 |
+
print(f"📋 Plan-only mode completed for: {topic}")
|
529 |
+
return
|
530 |
+
|
531 |
+
# Step 3: Render scenes with optimization
|
532 |
+
await self._render_scenes_optimized(
|
533 |
+
topic, description, scene_outline, implementation_plans, file_prefix
|
534 |
+
)
|
535 |
+
|
536 |
+
# Step 4: Combine videos
|
537 |
+
await self._combine_videos_optimized(topic)
|
538 |
+
|
539 |
+
print(f"✅ Enhanced video pipeline completed for: {topic}")
|
540 |
+
|
541 |
+
async def _load_or_generate_outline(self, topic: str, description: str, file_prefix: str) -> str:
|
542 |
+
"""Load existing outline or generate new one."""
|
543 |
+
scene_outline_path = os.path.join(self.config.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
|
544 |
+
|
545 |
+
if os.path.exists(scene_outline_path):
|
546 |
+
with open(scene_outline_path, "r") as f:
|
547 |
+
scene_outline = f.read()
|
548 |
+
print(f"📄 Loaded existing scene outline for: {topic}")
|
549 |
+
|
550 |
+
# Detect plugins if RAG is enabled
|
551 |
+
if self.config.use_rag and hasattr(self.planner, 'rag_integration'):
|
552 |
+
plugins = self.planner.rag_integration.detect_relevant_plugins(topic, description)
|
553 |
+
if plugins:
|
554 |
+
self.planner.rag_integration.set_relevant_plugins(plugins)
|
555 |
+
print(f"🔌 Detected relevant plugins: {plugins}")
|
556 |
+
else:
|
557 |
+
print(f"📝 Generating new scene outline for: {topic}")
|
558 |
+
scene_outline = await self.planner.generate_scene_outline(topic, description, self.session_id)
|
559 |
+
|
560 |
+
os.makedirs(os.path.join(self.config.output_dir, file_prefix), exist_ok=True)
|
561 |
+
with open(scene_outline_path, "w") as f:
|
562 |
+
f.write(scene_outline)
|
563 |
+
|
564 |
+
return scene_outline
|
565 |
+
|
566 |
+
async def _generate_implementation_plans(self, topic: str, description: str,
|
567 |
+
scene_outline: str, file_prefix: str,
|
568 |
+
specific_scenes: List[int] = None) -> Dict[int, str]:
|
569 |
+
"""Generate missing implementation plans."""
|
570 |
+
|
571 |
+
# First, ensure the topic directory exists
|
572 |
+
topic_dir = os.path.join(self.config.output_dir, file_prefix)
|
573 |
+
os.makedirs(topic_dir, exist_ok=True)
|
574 |
+
|
575 |
+
try:
|
576 |
+
implementation_plans_dict = self.scene_analyzer.load_implementation_plans(topic)
|
577 |
+
|
578 |
+
if not implementation_plans_dict:
|
579 |
+
print(f"No existing implementation plans found for {topic}. Generating all plans from scratch.")
|
580 |
+
scene_outline_content = extract_xml(scene_outline)
|
581 |
+
scene_count = len(re.findall(r'<SCENE_(\d+)>[^<]', scene_outline_content))
|
582 |
+
|
583 |
+
if scene_count == 0:
|
584 |
+
print(f"⚠️ Warning: No scenes found in scene outline. Check the regex pattern and scene outline format.")
|
585 |
+
print(f"Scene outline content: {scene_outline_content[:100]}...")
|
586 |
+
|
587 |
+
print(f"Found {scene_count} scenes in the outline.")
|
588 |
+
implementation_plans_dict = {i: None for i in range(1, scene_count + 1)}
|
589 |
+
|
590 |
+
# Find missing scenes
|
591 |
+
missing_scenes = [
|
592 |
+
scene_num for scene_num, plan in implementation_plans_dict.items()
|
593 |
+
if plan is None and (specific_scenes is None or scene_num in specific_scenes)
|
594 |
+
]
|
595 |
+
|
596 |
+
if missing_scenes:
|
597 |
+
print(f"📋 Generating implementation plans for scenes: {missing_scenes}")
|
598 |
+
|
599 |
+
# Make sure scene directories exist for missing scenes
|
600 |
+
for scene_num in missing_scenes:
|
601 |
+
scene_dir = os.path.join(topic_dir, f"scene{scene_num}")
|
602 |
+
os.makedirs(scene_dir, exist_ok=True)
|
603 |
+
|
604 |
+
# Use enhanced concurrent generation if available
|
605 |
+
if hasattr(self.planner, 'generate_scene_implementation_concurrently_enhanced'):
|
606 |
+
try:
|
607 |
+
all_plans = await self.planner.generate_scene_implementation_concurrently_enhanced(
|
608 |
+
topic, description, scene_outline, self.session_id
|
609 |
+
)
|
610 |
+
|
611 |
+
if not all_plans:
|
612 |
+
print(f"❌ Error: No implementation plans were returned!")
|
613 |
+
return implementation_plans_dict
|
614 |
+
|
615 |
+
# Update missing plans
|
616 |
+
updated_count = 0
|
617 |
+
for i, scene_num in enumerate(sorted(missing_scenes)):
|
618 |
+
if i < len(all_plans):
|
619 |
+
plan = all_plans[i]
|
620 |
+
if isinstance(plan, str) and plan.strip():
|
621 |
+
implementation_plans_dict[scene_num] = plan
|
622 |
+
updated_count += 1
|
623 |
+
else:
|
624 |
+
print(f"⚠️ Warning: Empty or invalid plan for scene {scene_num}")
|
625 |
+
|
626 |
+
print(f"✅ Generated {updated_count}/{len(missing_scenes)} implementation plans")
|
627 |
+
|
628 |
+
except Exception as e:
|
629 |
+
print(f"❌ Error generating implementation plans: {str(e)}")
|
630 |
+
|
631 |
+
else:
|
632 |
+
# Fallback to sequential generation
|
633 |
+
print("⚠️ Using fallback sequential plan generation")
|
634 |
+
# Implement sequential generation if needed
|
635 |
+
else:
|
636 |
+
print("✅ All implementation plans already exist.")
|
637 |
+
|
638 |
+
return implementation_plans_dict
|
639 |
+
|
640 |
+
except Exception as e:
|
641 |
+
print(f"❌ Fatal error in implementation plan generation: {str(e)}")
|
642 |
+
raise
|
643 |
+
|
644 |
+
async def _render_scenes_optimized(self, topic: str, description: str,
|
645 |
+
scene_outline: str, implementation_plans: Dict[int, str],
|
646 |
+
file_prefix: str) -> None:
|
647 |
+
"""Render scenes with enhanced optimization."""
|
648 |
+
|
649 |
+
# Determine which scenes need processing
|
650 |
+
scenes_to_process = self._get_scenes_to_process(implementation_plans, file_prefix)
|
651 |
+
|
652 |
+
if not scenes_to_process:
|
653 |
+
print(f"✅ No scenes need processing for: {topic}")
|
654 |
+
return
|
655 |
+
|
656 |
+
print(f"🎬 Rendering {len(scenes_to_process)} scenes with optimization...")
|
657 |
+
|
658 |
+
# Create render tasks
|
659 |
+
render_tasks = []
|
660 |
+
for scene_num, implementation_plan in scenes_to_process:
|
661 |
+
task = self._create_scene_render_task(
|
662 |
+
topic, description, scene_outline, implementation_plan,
|
663 |
+
scene_num, file_prefix
|
664 |
+
)
|
665 |
+
render_tasks.append(task)
|
666 |
+
|
667 |
+
# Execute with concurrency control
|
668 |
+
semaphore = asyncio.Semaphore(self.config.max_concurrent_renders)
|
669 |
+
|
670 |
+
async def execute_render_task(task):
|
671 |
+
async with semaphore:
|
672 |
+
return await task
|
673 |
+
|
674 |
+
# Run all render tasks
|
675 |
+
results = await asyncio.gather(
|
676 |
+
*[execute_render_task(task) for task in render_tasks],
|
677 |
+
return_exceptions=True
|
678 |
+
)
|
679 |
+
|
680 |
+
# Process results
|
681 |
+
successful_renders = sum(1 for r in results if not isinstance(r, Exception) and r[1] is None)
|
682 |
+
print(f"📊 Render results: {successful_renders}/{len(results)} scenes successful")
|
683 |
+
|
684 |
+
def _get_scenes_to_process(self, implementation_plans: Dict[int, str],
|
685 |
+
file_prefix: str) -> List[tuple]:
|
686 |
+
"""Determine which scenes need processing."""
|
687 |
+
scenes_to_process = []
|
688 |
+
|
689 |
+
for scene_num, implementation_plan in implementation_plans.items():
|
690 |
+
if implementation_plan is None:
|
691 |
+
continue
|
692 |
+
|
693 |
+
scene_dir = os.path.join(self.config.output_dir, file_prefix, f"scene{scene_num}")
|
694 |
+
|
695 |
+
# Check if scene already successfully rendered
|
696 |
+
if not os.path.exists(os.path.join(scene_dir, "succ_rendered.txt")):
|
697 |
+
scenes_to_process.append((scene_num, implementation_plan))
|
698 |
+
|
699 |
+
return scenes_to_process
|
700 |
+
|
701 |
+
def _create_scene_render_task(self, topic: str, description: str, scene_outline: str,
|
702 |
+
implementation_plan: str, scene_num: int, file_prefix: str):
|
703 |
+
"""Create render task for a scene."""
|
704 |
+
|
705 |
+
# Generate or load scene trace ID
|
706 |
+
scene_dir = os.path.join(self.config.output_dir, file_prefix, f"scene{scene_num}")
|
707 |
+
subplan_dir = os.path.join(scene_dir, "subplans")
|
708 |
+
os.makedirs(subplan_dir, exist_ok=True)
|
709 |
+
|
710 |
+
scene_trace_id_path = os.path.join(subplan_dir, "scene_trace_id.txt")
|
711 |
+
try:
|
712 |
+
with open(scene_trace_id_path, 'r') as f:
|
713 |
+
scene_trace_id = f.read().strip()
|
714 |
+
except FileNotFoundError:
|
715 |
+
scene_trace_id = str(uuid.uuid4())
|
716 |
+
with open(scene_trace_id_path, 'w') as f:
|
717 |
+
f.write(scene_trace_id)
|
718 |
+
|
719 |
+
# Create directories
|
720 |
+
code_dir = os.path.join(scene_dir, "code")
|
721 |
+
media_dir = os.path.join(self.config.output_dir, file_prefix, "media")
|
722 |
+
os.makedirs(code_dir, exist_ok=True)
|
723 |
+
|
724 |
+
# Return coroutine that will be awaited later
|
725 |
+
return self.scene_service.render_scene_with_code_generation(
|
726 |
+
topic=topic,
|
727 |
+
description=description,
|
728 |
+
scene_outline=scene_outline,
|
729 |
+
scene_implementation=implementation_plan,
|
730 |
+
scene_number=scene_num,
|
731 |
+
file_prefix=file_prefix,
|
732 |
+
code_dir=code_dir,
|
733 |
+
media_dir=media_dir,
|
734 |
+
scene_trace_id=scene_trace_id,
|
735 |
+
session_id=self.session_id
|
736 |
+
)
|
737 |
+
|
738 |
+
async def _combine_videos_optimized(self, topic: str) -> None:
|
739 |
+
"""Combine videos with hardware acceleration."""
|
740 |
+
print(f"🎞️ Combining videos for: {topic}")
|
741 |
+
|
742 |
+
try:
|
743 |
+
output_path = await self.renderer.combine_videos_optimized(
|
744 |
+
topic, use_hardware_acceleration=self.config.use_gpu_acceleration
|
745 |
+
)
|
746 |
+
print(f"✅ Combined video saved to: {output_path}")
|
747 |
+
except Exception as e:
|
748 |
+
print(f"❌ Error combining videos: {e}")
|
749 |
+
|
750 |
+
async def process_multiple_topics(self, topics_data: List[Dict],
|
751 |
+
only_plan: bool = False,
|
752 |
+
specific_scenes: List[int] = None) -> None:
|
753 |
+
"""Process multiple topics concurrently."""
|
754 |
+
|
755 |
+
topic_semaphore = asyncio.Semaphore(self.config.max_topic_concurrency)
|
756 |
+
|
757 |
+
async def process_single_topic(topic_data):
|
758 |
+
async with topic_semaphore:
|
759 |
+
topic = topic_data['theorem']
|
760 |
+
description = topic_data['description']
|
761 |
+
print(f"🎯 Processing topic: {topic}")
|
762 |
+
|
763 |
+
try:
|
764 |
+
await self.generate_video_pipeline(
|
765 |
+
topic, description, only_plan=only_plan,
|
766 |
+
specific_scenes=specific_scenes
|
767 |
+
)
|
768 |
+
print(f"✅ Completed topic: {topic}")
|
769 |
+
except Exception as e:
|
770 |
+
print(f"❌ Error processing {topic}: {e}")
|
771 |
+
|
772 |
+
tasks = [process_single_topic(topic_data) for topic_data in topics_data]
|
773 |
+
await asyncio.gather(*tasks, return_exceptions=True)
|
774 |
+
|
775 |
+
def get_status_summary(self, topics_data: List[Dict]) -> None:
|
776 |
+
"""Print comprehensive status summary."""
|
777 |
+
print("\n📊 Comprehensive Status Summary")
|
778 |
+
print("=" * 160)
|
779 |
+
|
780 |
+
all_statuses = [
|
781 |
+
self.scene_analyzer.analyze_scene_status(topic_data['theorem'])
|
782 |
+
for topic_data in topics_data
|
783 |
+
]
|
784 |
+
|
785 |
+
# Print header
|
786 |
+
print(f"{'Topic':<40} {'Outline':<8} {'Total':<8} {'Status (Plan/Code/Render)':<50} {'Combined':<10} {'Missing Components':<40}")
|
787 |
+
print("-" * 160)
|
788 |
+
|
789 |
+
# Print each topic status
|
790 |
+
for status in all_statuses:
|
791 |
+
scene_status_str = ""
|
792 |
+
for scene in status['scene_status']:
|
793 |
+
scene_str = (
|
794 |
+
("P" if scene['has_plan'] else "-") +
|
795 |
+
("C" if scene['has_code'] else "-") +
|
796 |
+
("R" if scene['has_render'] else "-") + " "
|
797 |
+
)
|
798 |
+
scene_status_str += scene_str
|
799 |
+
|
800 |
+
# Collect missing components
|
801 |
+
missing_components = self._format_missing_components(status['scene_status'])
|
802 |
+
|
803 |
+
print(f"{status['topic'][:37]+'...' if len(status['topic'])>37 else status['topic']:<40} "
|
804 |
+
f"{'✓' if status['has_scene_outline'] else '✗':<8} "
|
805 |
+
f"{status['total_scenes']:<8} "
|
806 |
+
f"{scene_status_str[:47]+'...' if len(scene_status_str)>47 else scene_status_str:<50} "
|
807 |
+
f"{'✓' if status['has_combined_video'] else '✗':<10} "
|
808 |
+
f"{missing_components[:37]+'...' if len(missing_components)>37 else missing_components:<40}")
|
809 |
+
|
810 |
+
# Print summary statistics
|
811 |
+
self._print_summary_statistics(all_statuses, len(topics_data))
|
812 |
+
|
813 |
+
def _format_missing_components(self, scene_status: List[Dict]) -> str:
|
814 |
+
"""Format missing components string."""
|
815 |
+
missing_plans = [str(s['scene_number']) for s in scene_status if not s['has_plan']]
|
816 |
+
missing_code = [str(s['scene_number']) for s in scene_status if not s['has_code']]
|
817 |
+
missing_renders = [str(s['scene_number']) for s in scene_status if not s['has_render']]
|
818 |
+
|
819 |
+
missing_str = []
|
820 |
+
if missing_plans:
|
821 |
+
missing_str.append(f"P:{','.join(missing_plans)}")
|
822 |
+
if missing_code:
|
823 |
+
missing_str.append(f"C:{','.join(missing_code)}")
|
824 |
+
if missing_renders:
|
825 |
+
missing_str.append(f"R:{','.join(missing_renders)}")
|
826 |
+
|
827 |
+
return ' '.join(missing_str)
|
828 |
+
|
829 |
+
def _print_summary_statistics(self, all_statuses: List[Dict], total_topics: int) -> None:
|
830 |
+
"""Print summary statistics."""
|
831 |
+
total_scenes = sum(status['total_scenes'] for status in all_statuses)
|
832 |
+
total_plans = sum(status['implementation_plans'] for status in all_statuses)
|
833 |
+
total_code = sum(status['code_files'] for status in all_statuses)
|
834 |
+
total_renders = sum(status['rendered_scenes'] for status in all_statuses)
|
835 |
+
total_combined = sum(1 for status in all_statuses if status['has_combined_video'])
|
836 |
+
|
837 |
+
print("\n📈 Summary Statistics:")
|
838 |
+
print(f" Total topics: {total_topics}")
|
839 |
+
print(f" Total scenes: {total_scenes}")
|
840 |
+
print(f" Completion rates:")
|
841 |
+
print(f" Plans: {total_plans}/{total_scenes} ({total_plans/max(1,total_scenes)*100:.1f}%)")
|
842 |
+
print(f" Code: {total_code}/{total_scenes} ({total_code/max(1,total_scenes)*100:.1f}%)")
|
843 |
+
print(f" Renders: {total_renders}/{total_scenes} ({total_renders/max(1,total_scenes)*100:.1f}%)")
|
844 |
+
print(f" Combined videos: {total_combined}/{total_topics} ({total_combined/max(1,total_topics)*100:.1f}%)")
|
845 |
+
|
846 |
+
# Command-line interface
|
847 |
+
class VideoGeneratorCLI:
|
848 |
+
"""Command-line interface for video generation."""
|
849 |
+
|
850 |
+
@staticmethod
|
851 |
+
def create_argument_parser() -> argparse.ArgumentParser:
|
852 |
+
"""Create argument parser with all options."""
|
853 |
+
parser = argparse.ArgumentParser(description='Enhanced Manim Video Generator')
|
854 |
+
|
855 |
+
# Model configuration
|
856 |
+
parser.add_argument('--model', type=str, choices=allowed_models,
|
857 |
+
default='gemini/gemini-2.5-flash-preview-04-17', help='AI model to use')
|
858 |
+
parser.add_argument('--scene_model', type=str, choices=allowed_models,
|
859 |
+
help='Specific model for scene generation')
|
860 |
+
parser.add_argument('--helper_model', type=str, choices=allowed_models,
|
861 |
+
help='Helper model for additional tasks')
|
862 |
+
|
863 |
+
# Input/Output
|
864 |
+
parser.add_argument('--topic', type=str, help='Single topic to process')
|
865 |
+
parser.add_argument('--context', type=str, help='Context for the topic')
|
866 |
+
parser.add_argument('--theorems_path', type=str, help='Path to theorems JSON file')
|
867 |
+
parser.add_argument('--output_dir', type=str, default=Config.OUTPUT_DIR, help='Output directory')
|
868 |
+
|
869 |
+
# Processing options
|
870 |
+
parser.add_argument('--sample_size', type=int, help='Number of theorems to sample')
|
871 |
+
parser.add_argument('--scenes', nargs='+', type=int, help='Specific scenes to process')
|
872 |
+
parser.add_argument('--max_retries', type=int, default=5, help='Maximum retries for code generation')
|
873 |
+
|
874 |
+
# Mode flags
|
875 |
+
parser.add_argument('--only_plan', action='store_true', help='Only generate plans')
|
876 |
+
parser.add_argument('--only_render', action='store_true', help='Only render scenes')
|
877 |
+
parser.add_argument('--only_combine', action='store_true', help='Only combine videos')
|
878 |
+
parser.add_argument('--check_status', action='store_true', help='Check status of all topics')
|
879 |
+
|
880 |
+
# Performance options
|
881 |
+
parser.add_argument('--max_scene_concurrency', type=int, default=5, help='Max concurrent scenes')
|
882 |
+
parser.add_argument('--max_topic_concurrency', type=int, default=1, help='Max concurrent topics')
|
883 |
+
parser.add_argument('--max_concurrent_renders', type=int, default=4, help='Max concurrent renders')
|
884 |
+
parser.add_argument('--quality', choices=['preview', 'low', 'medium', 'high', 'production'],
|
885 |
+
default='medium', help='Render quality preset')
|
886 |
+
|
887 |
+
# Feature flags
|
888 |
+
parser.add_argument('--verbose', action='store_true', help='Verbose output')
|
889 |
+
parser.add_argument('--use_rag', action='store_true', help='Use RAG')
|
890 |
+
parser.add_argument('--use_context_learning', action='store_true', help='Use context learning')
|
891 |
+
parser.add_argument('--use_visual_fix_code', action='store_true', help='Use visual code fixing')
|
892 |
+
parser.add_argument('--use_langfuse', action='store_true', help='Enable Langfuse logging')
|
893 |
+
parser.add_argument('--enable_caching', action='store_true', default=True, help='Enable caching')
|
894 |
+
parser.add_argument('--use_gpu_acceleration', action='store_true', default=False, help='Use GPU acceleration')
|
895 |
+
parser.add_argument('--preview_mode', action='store_true', help='Enable preview mode')
|
896 |
+
|
897 |
+
# Paths
|
898 |
+
parser.add_argument('--chroma_db_path', type=str, default=Config.CHROMA_DB_PATH, help='ChromaDB path')
|
899 |
+
parser.add_argument('--manim_docs_path', type=str, default=Config.MANIM_DOCS_PATH, help='Manim docs path')
|
900 |
+
parser.add_argument('--context_learning_path', type=str, default=Config.CONTEXT_LEARNING_PATH, help='Context learning path')
|
901 |
+
parser.add_argument('--embedding_model', type=str, default=Config.EMBEDDING_MODEL, help='Embedding model')
|
902 |
+
|
903 |
+
return parser
|
904 |
+
|
905 |
+
@staticmethod
|
906 |
+
def create_config_from_args(args) -> VideoGenerationConfig:
|
907 |
+
"""Create configuration from command-line arguments."""
|
908 |
+
return VideoGenerationConfig(
|
909 |
+
planner_model=args.model,
|
910 |
+
scene_model=args.scene_model,
|
911 |
+
helper_model=args.helper_model,
|
912 |
+
output_dir=args.output_dir,
|
913 |
+
verbose=args.verbose,
|
914 |
+
use_rag=args.use_rag,
|
915 |
+
use_context_learning=args.use_context_learning,
|
916 |
+
context_learning_path=args.context_learning_path,
|
917 |
+
chroma_db_path=args.chroma_db_path,
|
918 |
+
manim_docs_path=args.manim_docs_path,
|
919 |
+
embedding_model=args.embedding_model,
|
920 |
+
use_visual_fix_code=args.use_visual_fix_code,
|
921 |
+
use_langfuse=args.use_langfuse,
|
922 |
+
max_scene_concurrency=args.max_scene_concurrency,
|
923 |
+
max_topic_concurrency=args.max_topic_concurrency,
|
924 |
+
max_retries=args.max_retries,
|
925 |
+
enable_caching=args.enable_caching,
|
926 |
+
default_quality=args.quality,
|
927 |
+
use_gpu_acceleration=args.use_gpu_acceleration,
|
928 |
+
preview_mode=args.preview_mode,
|
929 |
+
max_concurrent_renders=args.max_concurrent_renders
|
930 |
+
)
|
931 |
+
|
932 |
+
async def main():
|
933 |
+
"""Enhanced main function with improved error handling and performance."""
|
934 |
+
parser = VideoGeneratorCLI.create_argument_parser()
|
935 |
+
args = parser.parse_args()
|
936 |
+
|
937 |
+
# Create configuration
|
938 |
+
config = VideoGeneratorCLI.create_config_from_args(args)
|
939 |
+
|
940 |
+
# Initialize enhanced video generator
|
941 |
+
video_generator = EnhancedVideoGenerator(config)
|
942 |
+
|
943 |
+
try:
|
944 |
+
if args.theorems_path:
|
945 |
+
await handle_multiple_topics(video_generator, args)
|
946 |
+
elif args.topic and args.context:
|
947 |
+
await handle_single_topic(video_generator, args)
|
948 |
+
else:
|
949 |
+
print("❌ Please provide either (--theorems_path) or (--topic and --context)")
|
950 |
+
return
|
951 |
+
|
952 |
+
except Exception as e:
|
953 |
+
print(f"❌ Fatal error: {e}")
|
954 |
+
raise
|
955 |
+
|
956 |
+
async def handle_multiple_topics(video_generator: EnhancedVideoGenerator, args):
|
957 |
+
"""Handle processing of multiple topics."""
|
958 |
+
with open(args.theorems_path, "r") as f:
|
959 |
+
theorems = json.load(f)
|
960 |
+
|
961 |
+
if args.sample_size:
|
962 |
+
theorems = theorems[:args.sample_size]
|
963 |
+
|
964 |
+
if args.check_status:
|
965 |
+
video_generator.get_status_summary(theorems)
|
966 |
+
return
|
967 |
+
|
968 |
+
if args.only_combine:
|
969 |
+
for theorem in theorems:
|
970 |
+
await video_generator._combine_videos_optimized(theorem['theorem'])
|
971 |
+
else:
|
972 |
+
await video_generator.process_multiple_topics(
|
973 |
+
theorems,
|
974 |
+
only_plan=args.only_plan,
|
975 |
+
specific_scenes=args.scenes
|
976 |
+
)
|
977 |
+
|
978 |
+
async def handle_single_topic(video_generator: EnhancedVideoGenerator, args):
|
979 |
+
"""Handle processing of single topic."""
|
980 |
+
if args.only_combine:
|
981 |
+
await video_generator._combine_videos_optimized(args.topic)
|
982 |
+
else:
|
983 |
+
await video_generator.generate_video_pipeline(
|
984 |
+
args.topic,
|
985 |
+
args.context,
|
986 |
+
only_plan=args.only_plan
|
987 |
+
)
|
988 |
+
|
989 |
+
if __name__ == "__main__":
|
990 |
+
asyncio.run(main())
|
gradio_app.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
gradio_app.py
ADDED
@@ -0,0 +1,925 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import asyncio
|
4 |
+
import json
|
5 |
+
import uuid
|
6 |
+
import threading
|
7 |
+
import time
|
8 |
+
from datetime import datetime
|
9 |
+
import logging
|
10 |
+
import traceback
|
11 |
+
import re
|
12 |
+
from typing import Dict, List, Optional
|
13 |
+
|
14 |
+
from mllm_tools.litellm import LiteLLMWrapper
|
15 |
+
from src.config.config import Config
|
16 |
+
from generate_video import EnhancedVideoGenerator, VideoGenerationConfig, allowed_models
|
17 |
+
|
18 |
+
# Configure logging
|
19 |
+
logging.basicConfig(
|
20 |
+
level=logging.INFO,
|
21 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
22 |
+
handlers=[
|
23 |
+
logging.FileHandler("gradio_app.log"),
|
24 |
+
logging.StreamHandler()
|
25 |
+
]
|
26 |
+
)
|
27 |
+
logger = logging.getLogger(__name__)
|
28 |
+
|
29 |
+
# Create necessary directories
|
30 |
+
os.makedirs("thumbnails", exist_ok=True)
|
31 |
+
|
32 |
+
# Global dictionary to track job status
|
33 |
+
job_status = {}
|
34 |
+
|
35 |
+
# Model descriptions for better user understanding
|
36 |
+
MODEL_DESCRIPTIONS = {
|
37 |
+
"gemini/gemini-1.5-pro-002": "🧠 Advanced reasoning, excellent for complex mathematical concepts",
|
38 |
+
"gemini/gemini-2.5-flash-preview-04-17": "⚡ Fast processing, good for quick prototypes",
|
39 |
+
"openai/gpt-4": "🎯 Reliable and consistent, great for educational content",
|
40 |
+
"openai/gpt-4o": "🚀 Latest OpenAI model with enhanced capabilities",
|
41 |
+
"anthropic/claude-3-5-sonnet-20241022": "📚 Excellent at detailed explanations and structured content",
|
42 |
+
"openrouter/openai/gpt-4o": "🌐 GPT-4o via OpenRouter - Powerful and versatile",
|
43 |
+
"openrouter/openai/gpt-4o-mini": "🌐 GPT-4o Mini via OpenRouter - Fast and cost-effective",
|
44 |
+
"openrouter/anthropic/claude-3.5-sonnet": "🌐 Claude 3.5 Sonnet via OpenRouter - Excellent reasoning",
|
45 |
+
"openrouter/anthropic/claude-3-haiku": "🌐 Claude 3 Haiku via OpenRouter - Quick responses",
|
46 |
+
"openrouter/google/gemini-pro-1.5": "🌐 Gemini Pro 1.5 via OpenRouter - Google's advanced model",
|
47 |
+
"openrouter/deepseek/deepseek-chat": "🌐 DeepSeek Chat via OpenRouter - Advanced conversation",
|
48 |
+
"openrouter/qwen/qwen-2.5-72b-instruct": "🌐 Qwen 2.5 72B via OpenRouter - Alibaba's flagship model",
|
49 |
+
"openrouter/meta-llama/llama-3.1-8b-instruct:free": "🌐 Llama 3.1 8B via OpenRouter - Free open source model",
|
50 |
+
"openrouter/microsoft/phi-3-mini-128k-instruct:free": "🌐 Phi-3 Mini via OpenRouter - Free Microsoft model"
|
51 |
+
}
|
52 |
+
|
53 |
+
def cancel_job(job_id):
|
54 |
+
"""Cancel a running job."""
|
55 |
+
if job_id and job_id in job_status:
|
56 |
+
if job_status[job_id]['status'] in ['pending', 'initializing', 'planning', 'running']:
|
57 |
+
job_status[job_id]['status'] = 'cancelled'
|
58 |
+
job_status[job_id]['message'] = 'Job cancelled by user'
|
59 |
+
return f"Job {job_id} has been cancelled"
|
60 |
+
return "Job not found or cannot be cancelled"
|
61 |
+
|
62 |
+
def delete_job(job_id):
|
63 |
+
"""Delete a job from history."""
|
64 |
+
if job_id and job_id in job_status:
|
65 |
+
# Remove output files if they exist
|
66 |
+
job = job_status[job_id]
|
67 |
+
if job.get('output_file') and os.path.exists(job['output_file']):
|
68 |
+
try:
|
69 |
+
# Remove the entire output directory for this job
|
70 |
+
output_dir = os.path.dirname(job['output_file'])
|
71 |
+
import shutil
|
72 |
+
shutil.rmtree(output_dir, ignore_errors=True)
|
73 |
+
except Exception as e:
|
74 |
+
logger.error(f"Error removing output files: {e}")
|
75 |
+
|
76 |
+
# Remove thumbnail
|
77 |
+
if job.get('thumbnail') and os.path.exists(job['thumbnail']):
|
78 |
+
try:
|
79 |
+
os.remove(job['thumbnail'])
|
80 |
+
except Exception as e:
|
81 |
+
logger.error(f"Error removing thumbnail: {e}")
|
82 |
+
|
83 |
+
# Remove from job status
|
84 |
+
del job_status[job_id]
|
85 |
+
return f"Job {job_id} deleted successfully"
|
86 |
+
return "Job not found"
|
87 |
+
|
88 |
+
def get_job_statistics():
|
89 |
+
"""Get statistics about jobs."""
|
90 |
+
total_jobs = len(job_status)
|
91 |
+
completed_jobs = sum(1 for job in job_status.values() if job.get('status') == 'completed')
|
92 |
+
failed_jobs = sum(1 for job in job_status.values() if job.get('status') == 'failed')
|
93 |
+
running_jobs = sum(1 for job in job_status.values() if job.get('status') in ['pending', 'initializing', 'planning', 'running'])
|
94 |
+
|
95 |
+
return {
|
96 |
+
'total': total_jobs,
|
97 |
+
'completed': completed_jobs,
|
98 |
+
'failed': failed_jobs,
|
99 |
+
'running': running_jobs
|
100 |
+
}
|
101 |
+
|
102 |
+
def init_video_generator(params):
|
103 |
+
"""Initialize the EnhancedVideoGenerator with the given parameters."""
|
104 |
+
model_name = params.get('model', 'gemini/gemini-2.5-flash-preview-04-17')
|
105 |
+
helper_model_name = params.get('helper_model', model_name)
|
106 |
+
verbose = params.get('verbose', True) # Set verbose to True by default for better debugging
|
107 |
+
max_scene_concurrency = params.get('max_scene_concurrency', 1)
|
108 |
+
|
109 |
+
# Create configuration for the enhanced video generator
|
110 |
+
config = VideoGenerationConfig(
|
111 |
+
planner_model=model_name,
|
112 |
+
scene_model=model_name,
|
113 |
+
helper_model=helper_model_name,
|
114 |
+
output_dir=params.get('output_dir', Config.OUTPUT_DIR),
|
115 |
+
verbose=verbose,
|
116 |
+
use_rag=params.get('use_rag', False),
|
117 |
+
use_context_learning=params.get('use_context_learning', False),
|
118 |
+
context_learning_path=params.get('context_learning_path', Config.CONTEXT_LEARNING_PATH),
|
119 |
+
chroma_db_path=params.get('chroma_db_path', Config.CHROMA_DB_PATH),
|
120 |
+
manim_docs_path=params.get('manim_docs_path', Config.MANIM_DOCS_PATH),
|
121 |
+
embedding_model=params.get('embedding_model', Config.EMBEDDING_MODEL),
|
122 |
+
use_visual_fix_code=params.get('use_visual_fix_code', True), # Enable visual fix code by default
|
123 |
+
use_langfuse=params.get('use_langfuse', False),
|
124 |
+
max_scene_concurrency=max_scene_concurrency,
|
125 |
+
max_retries=params.get('max_retries', 3)
|
126 |
+
)
|
127 |
+
|
128 |
+
# Initialize EnhancedVideoGenerator
|
129 |
+
video_generator = EnhancedVideoGenerator(config)
|
130 |
+
|
131 |
+
return video_generator
|
132 |
+
|
133 |
+
async def process_video_generation(job_id, params):
|
134 |
+
"""Process video generation asynchronously."""
|
135 |
+
try:
|
136 |
+
# Update job status
|
137 |
+
job_status[job_id]['status'] = 'initializing'
|
138 |
+
job_status[job_id]['progress'] = 5
|
139 |
+
job_status[job_id]['message'] = 'Initializing video generator...'
|
140 |
+
|
141 |
+
# Initialize video generator
|
142 |
+
video_generator = init_video_generator(params)
|
143 |
+
|
144 |
+
# Extract video generation parameters
|
145 |
+
topic = params.get('topic')
|
146 |
+
description = params.get('description')
|
147 |
+
max_retries = int(params.get('max_retries', 3))
|
148 |
+
only_plan = params.get('only_plan', False)
|
149 |
+
|
150 |
+
# Log job start
|
151 |
+
logger.info(f"Starting job {job_id} for topic: {topic}")
|
152 |
+
job_status[job_id]['status'] = 'planning'
|
153 |
+
job_status[job_id]['progress'] = 10
|
154 |
+
job_status[job_id]['message'] = 'Planning video scenes...'
|
155 |
+
|
156 |
+
# Generate video pipeline
|
157 |
+
start_time = datetime.now()
|
158 |
+
logger.info(f"Running generate_video_pipeline for topic: {topic}")
|
159 |
+
|
160 |
+
# Create an event loop for the async process
|
161 |
+
def update_progress_callback(progress, message):
|
162 |
+
job_status[job_id]['progress'] = progress
|
163 |
+
job_status[job_id]['message'] = message
|
164 |
+
logger.info(f"Job {job_id} progress: {progress}% - {message}")
|
165 |
+
|
166 |
+
# Start a background task to periodically update progress
|
167 |
+
async def progress_update_task():
|
168 |
+
stages = [
|
169 |
+
(15, 'Creating scene outline...'),
|
170 |
+
(25, 'Generating implementation plans...'),
|
171 |
+
(35, 'Generating code for scenes...'),
|
172 |
+
(45, 'Compiling Manim code...'),
|
173 |
+
(60, 'Rendering scenes...'),
|
174 |
+
(80, 'Combining videos...'),
|
175 |
+
(90, 'Finalizing video...')
|
176 |
+
]
|
177 |
+
|
178 |
+
for progress, message in stages:
|
179 |
+
update_progress_callback(progress, message)
|
180 |
+
await asyncio.sleep(5) # Wait between updates
|
181 |
+
|
182 |
+
# Stop updating if job is complete or failed
|
183 |
+
if job_status[job_id]['status'] in ['completed', 'failed']:
|
184 |
+
break
|
185 |
+
|
186 |
+
# Start progress update task
|
187 |
+
progress_task = asyncio.create_task(progress_update_task())
|
188 |
+
|
189 |
+
# Run the main video generation task with detailed logging
|
190 |
+
try:
|
191 |
+
logger.info(f"Starting video generation pipeline for job {job_id}")
|
192 |
+
update_progress_callback(15, 'Starting video generation pipeline...')
|
193 |
+
|
194 |
+
await video_generator.generate_video_pipeline(
|
195 |
+
topic=topic,
|
196 |
+
description=description,
|
197 |
+
only_plan=only_plan
|
198 |
+
)
|
199 |
+
|
200 |
+
logger.info(f"Video generation pipeline completed for job {job_id}")
|
201 |
+
except Exception as e:
|
202 |
+
logger.error(f"Error in video generation pipeline for job {job_id}: {str(e)}")
|
203 |
+
logger.error(traceback.format_exc())
|
204 |
+
raise
|
205 |
+
|
206 |
+
# Cancel progress update task
|
207 |
+
if not progress_task.done():
|
208 |
+
progress_task.cancel()
|
209 |
+
|
210 |
+
# Calculate processing time
|
211 |
+
end_time = datetime.now()
|
212 |
+
processing_time = (end_time - start_time).total_seconds()
|
213 |
+
|
214 |
+
# Get output file path
|
215 |
+
file_prefix = topic.lower()
|
216 |
+
file_prefix = re.sub(r'[^a-z0-9_]+', '_', file_prefix)
|
217 |
+
output_file = os.path.join(
|
218 |
+
params.get('output_dir', Config.OUTPUT_DIR),
|
219 |
+
file_prefix,
|
220 |
+
f"{file_prefix}_combined.mp4"
|
221 |
+
)
|
222 |
+
|
223 |
+
# Check if output file actually exists
|
224 |
+
if not os.path.exists(output_file):
|
225 |
+
alternative_output = None
|
226 |
+
# Look for any MP4 files that might have been generated
|
227 |
+
scene_dir = os.path.join(params.get('output_dir', Config.OUTPUT_DIR), file_prefix)
|
228 |
+
if os.path.exists(scene_dir):
|
229 |
+
for root, dirs, files in os.walk(scene_dir):
|
230 |
+
for file in files:
|
231 |
+
if file.endswith('.mp4'):
|
232 |
+
alternative_output = os.path.join(root, file)
|
233 |
+
logger.info(f"Combined video not found, but found alternative: {alternative_output}")
|
234 |
+
break
|
235 |
+
if alternative_output:
|
236 |
+
break
|
237 |
+
|
238 |
+
if alternative_output:
|
239 |
+
output_file = alternative_output
|
240 |
+
else:
|
241 |
+
logger.error(f"No video output file found for job {job_id}")
|
242 |
+
raise Exception("No video output was generated. Check Manim execution logs.")
|
243 |
+
|
244 |
+
# Create a thumbnail from the video if it exists
|
245 |
+
thumbnail_path = None
|
246 |
+
if os.path.exists(output_file):
|
247 |
+
thumbnail_path = os.path.join("thumbnails", f"{job_id}.jpg")
|
248 |
+
try:
|
249 |
+
import subprocess
|
250 |
+
result = subprocess.run([
|
251 |
+
'ffmpeg', '-i', output_file,
|
252 |
+
'-ss', '00:00:05', '-frames:v', '1',
|
253 |
+
thumbnail_path
|
254 |
+
], capture_output=True, text=True)
|
255 |
+
|
256 |
+
if result.returncode != 0:
|
257 |
+
logger.error(f"Error creating thumbnail: {result.stderr}")
|
258 |
+
thumbnail_path = None
|
259 |
+
except Exception as e:
|
260 |
+
logger.error(f"Error creating thumbnail: {str(e)}")
|
261 |
+
thumbnail_path = None
|
262 |
+
|
263 |
+
# Get scene snapshots
|
264 |
+
scene_snapshots = []
|
265 |
+
scene_dir = os.path.join(params.get('output_dir', Config.OUTPUT_DIR), file_prefix)
|
266 |
+
if os.path.exists(scene_dir):
|
267 |
+
for i in range(1, 10): # Check up to 10 possible scenes
|
268 |
+
scene_snapshot_dir = os.path.join(scene_dir, f"scene{i}")
|
269 |
+
if os.path.exists(scene_snapshot_dir):
|
270 |
+
img_files = [f for f in os.listdir(scene_snapshot_dir) if f.endswith('.png')]
|
271 |
+
if img_files:
|
272 |
+
img_path = os.path.join(scene_snapshot_dir, img_files[-1]) # Get the last image
|
273 |
+
scene_snapshots.append(img_path)
|
274 |
+
|
275 |
+
# Update job status to completed
|
276 |
+
job_status[job_id].update({
|
277 |
+
'status': 'completed',
|
278 |
+
'progress': 100,
|
279 |
+
'message': 'Video generation completed',
|
280 |
+
'output_file': output_file if os.path.exists(output_file) else None,
|
281 |
+
'processing_time': processing_time,
|
282 |
+
'thumbnail': thumbnail_path,
|
283 |
+
'scene_snapshots': scene_snapshots
|
284 |
+
})
|
285 |
+
|
286 |
+
logger.info(f"Job {job_id} completed successfully in {processing_time:.2f} seconds")
|
287 |
+
|
288 |
+
except Exception as e:
|
289 |
+
# Handle exceptions
|
290 |
+
error_msg = str(e)
|
291 |
+
stack_trace = traceback.format_exc()
|
292 |
+
logger.error(f"Error in job {job_id}: {error_msg}\n{stack_trace}")
|
293 |
+
|
294 |
+
job_status[job_id].update({
|
295 |
+
'status': 'failed',
|
296 |
+
'error': error_msg,
|
297 |
+
'stack_trace': stack_trace,
|
298 |
+
'message': f'Error: {error_msg[:100]}...' if len(error_msg) > 100 else f'Error: {error_msg}'
|
299 |
+
})
|
300 |
+
|
301 |
+
def start_async_job(job_id, params):
|
302 |
+
"""Start an async job in a separate thread."""
|
303 |
+
def run_async():
|
304 |
+
asyncio.run(process_video_generation(job_id, params))
|
305 |
+
|
306 |
+
thread = threading.Thread(target=run_async)
|
307 |
+
thread.daemon = True
|
308 |
+
thread.start()
|
309 |
+
return thread
|
310 |
+
|
311 |
+
def submit_job(topic, description, model, helper_model, max_retries, use_rag, use_visual_fix_code, temperature, use_context_learning, verbose, max_scene_concurrency):
|
312 |
+
"""Submit a new video generation job."""
|
313 |
+
# Input validation
|
314 |
+
if not topic.strip():
|
315 |
+
return "❌ Error: Topic is required", None, gr.update(visible=False)
|
316 |
+
|
317 |
+
if not description.strip():
|
318 |
+
return "❌ Error: Description is required", None, gr.update(visible=False)
|
319 |
+
|
320 |
+
if len(topic.strip()) < 3:
|
321 |
+
return "❌ Error: Topic must be at least 3 characters long", None, gr.update(visible=False)
|
322 |
+
|
323 |
+
if len(description.strip()) < 10:
|
324 |
+
return "❌ Error: Description must be at least 10 characters long", None, gr.update(visible=False)
|
325 |
+
|
326 |
+
try:
|
327 |
+
# Generate job ID
|
328 |
+
job_id = str(uuid.uuid4())
|
329 |
+
|
330 |
+
# Initialize job status
|
331 |
+
job_status[job_id] = {
|
332 |
+
'id': job_id,
|
333 |
+
'status': 'pending',
|
334 |
+
'topic': topic,
|
335 |
+
'description': description,
|
336 |
+
'model': model,
|
337 |
+
'start_time': datetime.now().isoformat(),
|
338 |
+
'progress': 0,
|
339 |
+
'message': 'Job submitted, waiting to start...'
|
340 |
+
}
|
341 |
+
|
342 |
+
# Prepare parameters
|
343 |
+
params = {
|
344 |
+
'topic': topic,
|
345 |
+
'description': description,
|
346 |
+
'model': model,
|
347 |
+
'helper_model': helper_model,
|
348 |
+
'max_retries': max_retries,
|
349 |
+
'use_rag': use_rag,
|
350 |
+
'use_visual_fix_code': use_visual_fix_code,
|
351 |
+
'temperature': temperature,
|
352 |
+
'use_context_learning': use_context_learning,
|
353 |
+
'verbose': verbose,
|
354 |
+
'max_scene_concurrency': max_scene_concurrency,
|
355 |
+
'output_dir': Config.OUTPUT_DIR,
|
356 |
+
}
|
357 |
+
|
358 |
+
# Start job asynchronously
|
359 |
+
start_async_job(job_id, params)
|
360 |
+
|
361 |
+
return f"✅ Job submitted successfully. Job ID: {job_id}", job_id, gr.update(visible=True)
|
362 |
+
|
363 |
+
except Exception as e:
|
364 |
+
logger.error(f"Error submitting job: {str(e)}")
|
365 |
+
return f"❌ Error: {str(e)}", None, gr.update(visible=False)
|
366 |
+
|
367 |
+
def check_job_status(job_id):
|
368 |
+
"""Check the status of a job."""
|
369 |
+
if not job_id or job_id not in job_status:
|
370 |
+
return {"status": "not_found", "message": "Job not found"}
|
371 |
+
|
372 |
+
return job_status[job_id]
|
373 |
+
|
374 |
+
def get_video_details(job_id):
|
375 |
+
"""Get details of a completed video job."""
|
376 |
+
if not job_id or job_id not in job_status:
|
377 |
+
return None, None, None, [], "Job not found"
|
378 |
+
|
379 |
+
job = job_status[job_id]
|
380 |
+
|
381 |
+
if job['status'] != 'completed':
|
382 |
+
return None, None, None, [], f"Video not ready. Current status: {job['status']}"
|
383 |
+
|
384 |
+
# Get video path, processing time, thumbnail and scene snapshots
|
385 |
+
video_path = job.get('output_file')
|
386 |
+
processing_time = job.get('processing_time', 0)
|
387 |
+
thumbnail = job.get('thumbnail')
|
388 |
+
scene_snapshots = job.get('scene_snapshots', [])
|
389 |
+
|
390 |
+
if not video_path or not os.path.exists(video_path):
|
391 |
+
return None, None, None, [], "Video file not found"
|
392 |
+
|
393 |
+
return video_path, processing_time, thumbnail, scene_snapshots, None
|
394 |
+
|
395 |
+
def get_job_list():
|
396 |
+
"""Get a list of all jobs."""
|
397 |
+
job_list = []
|
398 |
+
for job_id, job in job_status.items():
|
399 |
+
job_list.append({
|
400 |
+
'id': job_id,
|
401 |
+
'topic': job.get('topic', 'Unknown'),
|
402 |
+
'status': job.get('status', 'unknown'),
|
403 |
+
'start_time': job.get('start_time', ''),
|
404 |
+
'progress': job.get('progress', 0),
|
405 |
+
'message': job.get('message', '')
|
406 |
+
})
|
407 |
+
|
408 |
+
# Sort by start time, most recent first
|
409 |
+
job_list.sort(key=lambda x: x.get('start_time', ''), reverse=True)
|
410 |
+
return job_list
|
411 |
+
|
412 |
+
def format_status_message(job):
|
413 |
+
"""Format status message for display."""
|
414 |
+
if not job:
|
415 |
+
return "No job selected"
|
416 |
+
|
417 |
+
status = job.get('status', 'unknown')
|
418 |
+
progress = job.get('progress', 0)
|
419 |
+
message = job.get('message', '')
|
420 |
+
|
421 |
+
status_emoji = {
|
422 |
+
'pending': '⏳',
|
423 |
+
'initializing': '🔄',
|
424 |
+
'planning': '🧠',
|
425 |
+
'running': '⚙️',
|
426 |
+
'completed': '✅',
|
427 |
+
'failed': '❌',
|
428 |
+
'unknown': '❓'
|
429 |
+
}.get(status, '❓')
|
430 |
+
|
431 |
+
return f"{status_emoji} Status: {status.title()} ({progress}%)\n{message}"
|
432 |
+
|
433 |
+
def update_status_display(job_id):
|
434 |
+
"""Update the status display for a job."""
|
435 |
+
if not job_id:
|
436 |
+
return ("No job selected",
|
437 |
+
gr.update(value=None),
|
438 |
+
gr.update(visible=False),
|
439 |
+
gr.update(visible=False),
|
440 |
+
gr.update(value=[]),
|
441 |
+
gr.update(visible=False),
|
442 |
+
gr.update(visible=False))
|
443 |
+
|
444 |
+
job = check_job_status(job_id)
|
445 |
+
status_message = format_status_message(job)
|
446 |
+
|
447 |
+
# Check if the job is completed to show the video
|
448 |
+
if job.get('status') == 'completed' and job.get('output_file') and os.path.exists(job.get('output_file')):
|
449 |
+
video_path = job.get('output_file')
|
450 |
+
video_vis = True
|
451 |
+
thumbnail = job.get('thumbnail')
|
452 |
+
scene_snapshots = job.get('scene_snapshots', [])
|
453 |
+
processing_time = job.get('processing_time', 0)
|
454 |
+
|
455 |
+
return (status_message,
|
456 |
+
gr.update(value=video_path),
|
457 |
+
gr.update(visible=video_vis),
|
458 |
+
gr.update(visible=thumbnail is not None, value=thumbnail),
|
459 |
+
gr.update(value=scene_snapshots),
|
460 |
+
gr.update(visible=True, value=f"⏱️ Processing Time: {processing_time:.2f} seconds"),
|
461 |
+
gr.update(visible=job.get('status') in ['pending', 'initializing', 'planning', 'running']))
|
462 |
+
|
463 |
+
return (status_message,
|
464 |
+
gr.update(value=None),
|
465 |
+
gr.update(visible=False),
|
466 |
+
gr.update(visible=False),
|
467 |
+
gr.update(value=[]),
|
468 |
+
gr.update(visible=False),
|
469 |
+
gr.update(visible=job.get('status') in ['pending', 'initializing', 'planning', 'running']))
|
470 |
+
|
471 |
+
# Create Gradio interface
|
472 |
+
with gr.Blocks(
|
473 |
+
title="Theory2Manim Video Generator",
|
474 |
+
theme=gr.themes.Soft(
|
475 |
+
primary_hue="blue",
|
476 |
+
secondary_hue="slate",
|
477 |
+
neutral_hue="slate",
|
478 |
+
font=gr.themes.GoogleFont("Inter")
|
479 |
+
),
|
480 |
+
css="""
|
481 |
+
.main-header {
|
482 |
+
text-align: center;
|
483 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
484 |
+
color: white;
|
485 |
+
padding: 2rem;
|
486 |
+
border-radius: 1rem;
|
487 |
+
margin-bottom: 2rem;
|
488 |
+
}
|
489 |
+
.status-card {
|
490 |
+
border: 1px solid #e1e5e9;
|
491 |
+
border-radius: 0.5rem;
|
492 |
+
padding: 1rem;
|
493 |
+
background: #f8f9fa;
|
494 |
+
}
|
495 |
+
.metric-card {
|
496 |
+
border: 1px solid #e1e5e9;
|
497 |
+
border-radius: 0.5rem;
|
498 |
+
padding: 1rem;
|
499 |
+
text-align: center;
|
500 |
+
background: white;
|
501 |
+
}
|
502 |
+
.job-actions {
|
503 |
+
gap: 0.5rem;
|
504 |
+
}
|
505 |
+
"""
|
506 |
+
) as app:
|
507 |
+
|
508 |
+
# Header
|
509 |
+
with gr.Row():
|
510 |
+
with gr.Column():
|
511 |
+
gr.HTML("""
|
512 |
+
<div class="main-header">
|
513 |
+
<h1>🎬 Theory2Manim Video Generator</h1>
|
514 |
+
<p>Transform mathematical and scientific concepts into engaging educational videos</p>
|
515 |
+
</div>
|
516 |
+
""")
|
517 |
+
|
518 |
+
# Statistics Dashboard
|
519 |
+
with gr.Row():
|
520 |
+
stats_total = gr.Textbox(label="📊 Total Jobs", interactive=False, scale=1)
|
521 |
+
stats_completed = gr.Textbox(label="✅ Completed", interactive=False, scale=1)
|
522 |
+
stats_running = gr.Textbox(label="⚙️ Running", interactive=False, scale=1)
|
523 |
+
stats_failed = gr.Textbox(label="❌ Failed", interactive=False, scale=1)
|
524 |
+
|
525 |
+
with gr.Tab("🎥 Generate Video"):
|
526 |
+
with gr.Row():
|
527 |
+
with gr.Column(scale=2):
|
528 |
+
with gr.Group():
|
529 |
+
gr.Markdown("### 📝 Content Configuration")
|
530 |
+
topic_input = gr.Textbox(
|
531 |
+
label="📚 Topic",
|
532 |
+
placeholder="e.g., Fourier Transform, Calculus Derivatives, Quantum Mechanics",
|
533 |
+
info="Enter the main topic for your educational video"
|
534 |
+
)
|
535 |
+
description_input = gr.Textbox(
|
536 |
+
label="📋 Detailed Description",
|
537 |
+
placeholder="Provide a comprehensive description of what you want the video to cover, including specific concepts, examples, and target audience level...",
|
538 |
+
lines=6,
|
539 |
+
info="The more detailed your description, the better the AI can generate relevant content"
|
540 |
+
)
|
541 |
+
|
542 |
+
with gr.Column(scale=1):
|
543 |
+
with gr.Group():
|
544 |
+
gr.Markdown("### ⚙️ AI Model Settings")
|
545 |
+
model_input = gr.Dropdown(
|
546 |
+
label="🤖 Primary AI Model",
|
547 |
+
choices=list(MODEL_DESCRIPTIONS.keys()),
|
548 |
+
value="gemini/gemini-2.5-flash-preview-04-17",
|
549 |
+
info="Choose the AI model for content generation"
|
550 |
+
)
|
551 |
+
model_description = gr.Markdown(MODEL_DESCRIPTIONS["gemini/gemini-2.5-flash-preview-04-17"])
|
552 |
+
|
553 |
+
helper_model_input = gr.Dropdown(
|
554 |
+
label="🔧 Helper Model",
|
555 |
+
choices=list(MODEL_DESCRIPTIONS.keys()),
|
556 |
+
value="gemini/gemini-2.5-flash-preview-04-17",
|
557 |
+
info="Model for auxiliary tasks"
|
558 |
+
)
|
559 |
+
|
560 |
+
temperature_input = gr.Slider(
|
561 |
+
label="🌡️ Creativity (Temperature)",
|
562 |
+
minimum=0.0,
|
563 |
+
maximum=1.0,
|
564 |
+
value=0.7,
|
565 |
+
step=0.1,
|
566 |
+
info="Lower = more focused, Higher = more creative"
|
567 |
+
)
|
568 |
+
|
569 |
+
with gr.Row():
|
570 |
+
with gr.Column():
|
571 |
+
with gr.Group():
|
572 |
+
gr.Markdown("### 🔧 Advanced Settings")
|
573 |
+
with gr.Row():
|
574 |
+
max_retries_input = gr.Slider(
|
575 |
+
label="🔄 Max Retries",
|
576 |
+
minimum=1,
|
577 |
+
maximum=10,
|
578 |
+
value=3,
|
579 |
+
step=1,
|
580 |
+
info="Number of retry attempts for failed operations"
|
581 |
+
)
|
582 |
+
max_scene_concurrency_input = gr.Slider(
|
583 |
+
label="⚡ Scene Concurrency",
|
584 |
+
minimum=1,
|
585 |
+
maximum=5,
|
586 |
+
value=1,
|
587 |
+
step=1,
|
588 |
+
info="Number of scenes to process simultaneously"
|
589 |
+
)
|
590 |
+
|
591 |
+
with gr.Row():
|
592 |
+
use_rag_input = gr.Checkbox(
|
593 |
+
label="📚 Use RAG (Retrieval Augmented Generation)",
|
594 |
+
value=False,
|
595 |
+
info="Enhance generation with relevant knowledge retrieval"
|
596 |
+
)
|
597 |
+
use_visual_fix_code_input = gr.Checkbox(
|
598 |
+
label="🎨 Use Visual Code Fixing",
|
599 |
+
value=True,
|
600 |
+
info="Automatically fix visual rendering issues"
|
601 |
+
)
|
602 |
+
use_context_learning_input = gr.Checkbox(
|
603 |
+
label="🧠 Use Context Learning",
|
604 |
+
value=False,
|
605 |
+
info="Learn from previous successful videos"
|
606 |
+
)
|
607 |
+
verbose_input = gr.Checkbox(
|
608 |
+
label="📝 Verbose Logging",
|
609 |
+
value=True,
|
610 |
+
info="Enable detailed logging for debugging"
|
611 |
+
)
|
612 |
+
|
613 |
+
with gr.Row():
|
614 |
+
with gr.Column(scale=3):
|
615 |
+
submit_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg")
|
616 |
+
with gr.Column(scale=1):
|
617 |
+
clear_form_btn = gr.Button("🧹 Clear Form", variant="secondary")
|
618 |
+
|
619 |
+
result_text = gr.Textbox(label="📋 Status", interactive=False)
|
620 |
+
job_id_output = gr.Textbox(label="Job ID", visible=False)
|
621 |
+
|
622 |
+
with gr.Column(visible=False) as status_container:
|
623 |
+
with gr.Group():
|
624 |
+
gr.Markdown("### 📊 Job Progress")
|
625 |
+
with gr.Row():
|
626 |
+
with gr.Column(scale=3):
|
627 |
+
status_text = gr.Textbox(label="Current Status", interactive=False, elem_classes=["status-card"])
|
628 |
+
processing_time_text = gr.Textbox(label="Processing Information", visible=False, interactive=False)
|
629 |
+
with gr.Column(scale=1):
|
630 |
+
with gr.Group():
|
631 |
+
refresh_btn = gr.Button("🔄 Refresh Status", variant="secondary")
|
632 |
+
cancel_btn = gr.Button("⏹️ Cancel Job", variant="stop", visible=False)
|
633 |
+
|
634 |
+
with gr.Row():
|
635 |
+
with gr.Column(scale=2):
|
636 |
+
video_output = gr.Video(
|
637 |
+
label="🎬 Generated Video",
|
638 |
+
interactive=False,
|
639 |
+
visible=False,
|
640 |
+
show_download_button=True
|
641 |
+
)
|
642 |
+
thumbnail_preview = gr.Image(
|
643 |
+
label="🖼️ Video Thumbnail",
|
644 |
+
visible=False,
|
645 |
+
height=200
|
646 |
+
)
|
647 |
+
|
648 |
+
with gr.Column(scale=1):
|
649 |
+
scene_gallery = gr.Gallery(
|
650 |
+
label="🎨 Scene Previews",
|
651 |
+
columns=2,
|
652 |
+
object_fit="contain",
|
653 |
+
height=400,
|
654 |
+
show_download_button=True
|
655 |
+
)
|
656 |
+
|
657 |
+
with gr.Tab("📂 Job History & Management"):
|
658 |
+
with gr.Row():
|
659 |
+
with gr.Column(scale=3):
|
660 |
+
refresh_jobs_btn = gr.Button("🔄 Refresh Job List", variant="secondary")
|
661 |
+
with gr.Column(scale=1):
|
662 |
+
clear_completed_btn = gr.Button("🧹 Clear Completed Jobs", variant="secondary")
|
663 |
+
clear_all_btn = gr.Button("🗑️ Clear All Jobs", variant="stop")
|
664 |
+
|
665 |
+
jobs_table = gr.Dataframe(
|
666 |
+
headers=["ID", "Topic", "Status", "Progress (%)", "Start Time", "Message"],
|
667 |
+
datatype=["str", "str", "str", "number", "str", "str"],
|
668 |
+
interactive=False,
|
669 |
+
label="📋 Job History",
|
670 |
+
wrap=True
|
671 |
+
)
|
672 |
+
|
673 |
+
with gr.Row():
|
674 |
+
with gr.Column():
|
675 |
+
select_job_btn = gr.Button("👁️ View Selected Job", variant="primary")
|
676 |
+
selected_job_id = gr.Textbox(label="Selected Job ID", visible=False)
|
677 |
+
with gr.Column():
|
678 |
+
delete_job_btn = gr.Button("🗑️ Delete Selected Job", variant="stop")
|
679 |
+
download_job_btn = gr.Button("💾 Download Job Results", variant="secondary")
|
680 |
+
|
681 |
+
with gr.Tab("ℹ️ Help & Documentation"):
|
682 |
+
gr.Markdown("""
|
683 |
+
## 🎯 How to Use Theory2Manim
|
684 |
+
|
685 |
+
### 📝 Step 1: Content Planning
|
686 |
+
- **Topic**: Enter a clear, specific topic (e.g., "Linear Algebra: Matrix Multiplication")
|
687 |
+
- **Description**: Provide detailed context about what you want covered:
|
688 |
+
- Target audience level (beginner, intermediate, advanced)
|
689 |
+
- Specific concepts to include
|
690 |
+
- Examples or applications to demonstrate
|
691 |
+
- Preferred video length or depth
|
692 |
+
|
693 |
+
### 🤖 Step 2: Model Selection
|
694 |
+
- **Gemini 1.5 Pro**: Best for complex mathematical reasoning
|
695 |
+
- **Gemini 2.0 Flash**: Fastest processing, good for simple topics
|
696 |
+
- **GPT-4**: Reliable and consistent output
|
697 |
+
- **Claude**: Excellent for detailed explanations
|
698 |
+
|
699 |
+
### ⚙️ Step 3: Advanced Settings
|
700 |
+
- **Temperature**: 0.3-0.5 for factual content, 0.7-0.9 for creative explanations
|
701 |
+
- **RAG**: Enable for topics requiring external knowledge
|
702 |
+
- **Visual Code Fixing**: Recommended for better video quality
|
703 |
+
- **Context Learning**: Use previous successful videos as examples
|
704 |
+
|
705 |
+
### 📊 Step 4: Monitor Progress
|
706 |
+
- Check the **Job History** tab to monitor all your video generation tasks
|
707 |
+
- Use **Refresh Status** to get real-time updates
|
708 |
+
- **Cancel** jobs if needed during processing
|
709 |
+
|
710 |
+
### 🎬 Step 5: Review Results
|
711 |
+
- Preview generated videos directly in the interface
|
712 |
+
- View scene breakdowns and thumbnails
|
713 |
+
- Download videos for offline use
|
714 |
+
|
715 |
+
## 💡 Tips for Best Results
|
716 |
+
1. **Be Specific**: Detailed descriptions lead to better videos
|
717 |
+
2. **Start Simple**: Try basic topics first to understand the system
|
718 |
+
3. **Use Examples**: Mention specific examples you want included
|
719 |
+
4. **Set Context**: Specify the educational level and background needed
|
720 |
+
5. **Review Settings**: Adjust temperature and models based on your content type
|
721 |
+
|
722 |
+
## 🔧 Troubleshooting
|
723 |
+
- **Job Stuck**: Try canceling and resubmitting with different settings
|
724 |
+
- **Poor Quality**: Use higher temperature or enable Visual Code Fixing
|
725 |
+
- **Missing Content**: Provide more detailed descriptions
|
726 |
+
- **Errors**: Check the verbose logs in the status messages
|
727 |
+
""")
|
728 |
+
|
729 |
+
# Event handlers with improved functionality
|
730 |
+
def clear_form():
|
731 |
+
return ("", "", 0.7, False, True, False, True, 1, 1, "Form cleared! Ready for new input.")
|
732 |
+
|
733 |
+
def update_model_description(model):
|
734 |
+
return MODEL_DESCRIPTIONS.get(model, "No description available")
|
735 |
+
|
736 |
+
def update_stats():
|
737 |
+
stats = get_job_statistics()
|
738 |
+
return (f"{stats['total']}",
|
739 |
+
f"{stats['completed']}",
|
740 |
+
f"{stats['running']}",
|
741 |
+
f"{stats['failed']}")
|
742 |
+
|
743 |
+
def clear_completed_jobs():
|
744 |
+
completed_jobs = [job_id for job_id, job in job_status.items()
|
745 |
+
if job.get('status') == 'completed']
|
746 |
+
for job_id in completed_jobs:
|
747 |
+
delete_job(job_id)
|
748 |
+
return f"Cleared {len(completed_jobs)} completed jobs"
|
749 |
+
|
750 |
+
def clear_all_jobs():
|
751 |
+
count = len(job_status)
|
752 |
+
job_status.clear()
|
753 |
+
return f"Cleared all {count} jobs"
|
754 |
+
|
755 |
+
# Connect event handlers
|
756 |
+
model_input.change(
|
757 |
+
fn=update_model_description,
|
758 |
+
inputs=[model_input],
|
759 |
+
outputs=[model_description]
|
760 |
+
)
|
761 |
+
|
762 |
+
clear_form_btn.click(
|
763 |
+
fn=clear_form,
|
764 |
+
outputs=[topic_input, description_input, temperature_input,
|
765 |
+
use_rag_input, use_visual_fix_code_input, use_context_learning_input,
|
766 |
+
verbose_input, max_retries_input, max_scene_concurrency_input, result_text]
|
767 |
+
)
|
768 |
+
|
769 |
+
submit_btn.click(
|
770 |
+
fn=submit_job,
|
771 |
+
inputs=[
|
772 |
+
topic_input, description_input, model_input, helper_model_input, max_retries_input,
|
773 |
+
use_rag_input, use_visual_fix_code_input, temperature_input, use_context_learning_input,
|
774 |
+
verbose_input, max_scene_concurrency_input
|
775 |
+
],
|
776 |
+
outputs=[result_text, job_id_output, status_container]
|
777 |
+
).then(
|
778 |
+
fn=update_status_display,
|
779 |
+
inputs=[job_id_output],
|
780 |
+
outputs=[status_text, video_output, video_output, thumbnail_preview, scene_gallery, processing_time_text, cancel_btn]
|
781 |
+
).then(
|
782 |
+
fn=update_stats,
|
783 |
+
outputs=[stats_total, stats_completed, stats_running, stats_failed]
|
784 |
+
)
|
785 |
+
|
786 |
+
refresh_btn.click(
|
787 |
+
fn=update_status_display,
|
788 |
+
inputs=[job_id_output],
|
789 |
+
outputs=[status_text, video_output, video_output, thumbnail_preview, scene_gallery, processing_time_text, cancel_btn]
|
790 |
+
).then(
|
791 |
+
fn=update_stats,
|
792 |
+
outputs=[stats_total, stats_completed, stats_running, stats_failed]
|
793 |
+
)
|
794 |
+
|
795 |
+
cancel_btn.click(
|
796 |
+
fn=cancel_job,
|
797 |
+
inputs=[job_id_output],
|
798 |
+
outputs=[result_text]
|
799 |
+
).then(
|
800 |
+
fn=update_status_display,
|
801 |
+
inputs=[job_id_output],
|
802 |
+
outputs=[status_text, video_output, video_output, thumbnail_preview, scene_gallery, processing_time_text, cancel_btn]
|
803 |
+
)
|
804 |
+
|
805 |
+
# Job history tab functions
|
806 |
+
def load_job_list():
|
807 |
+
jobs = get_job_list()
|
808 |
+
rows = []
|
809 |
+
for job in jobs:
|
810 |
+
start_time = job.get('start_time', '')
|
811 |
+
if start_time:
|
812 |
+
try:
|
813 |
+
dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
|
814 |
+
formatted_time = dt.strftime('%Y-%m-%d %H:%M:%S')
|
815 |
+
except:
|
816 |
+
formatted_time = start_time
|
817 |
+
else:
|
818 |
+
formatted_time = 'Unknown'
|
819 |
+
|
820 |
+
rows.append([
|
821 |
+
job['id'][:8] + '...',
|
822 |
+
job['topic'][:50] + ('...' if len(job['topic']) > 50 else ''),
|
823 |
+
job['status'].title(),
|
824 |
+
job['progress'],
|
825 |
+
formatted_time,
|
826 |
+
job['message'][:100] + ('...' if len(job['message']) > 100 else '')
|
827 |
+
])
|
828 |
+
return rows
|
829 |
+
|
830 |
+
def select_job(evt: gr.EventData):
|
831 |
+
if not evt:
|
832 |
+
return "", "No job selected"
|
833 |
+
|
834 |
+
selected_row = evt.index[0] if hasattr(evt, 'index') and evt.index else 0
|
835 |
+
jobs = get_job_list()
|
836 |
+
if selected_row < len(jobs):
|
837 |
+
return jobs[selected_row]['id'], f"Selected job: {jobs[selected_row]['topic']}"
|
838 |
+
return "", "No job selected"
|
839 |
+
|
840 |
+
def delete_selected_job(job_id):
|
841 |
+
if job_id:
|
842 |
+
result = delete_job(job_id)
|
843 |
+
return result, ""
|
844 |
+
return "No job selected", ""
|
845 |
+
|
846 |
+
refresh_jobs_btn.click(
|
847 |
+
fn=load_job_list,
|
848 |
+
outputs=[jobs_table]
|
849 |
+
).then(
|
850 |
+
fn=update_stats,
|
851 |
+
outputs=[stats_total, stats_completed, stats_running, stats_failed]
|
852 |
+
)
|
853 |
+
|
854 |
+
jobs_table.select(
|
855 |
+
fn=select_job,
|
856 |
+
outputs=[selected_job_id, result_text]
|
857 |
+
)
|
858 |
+
|
859 |
+
select_job_btn.click(
|
860 |
+
fn=lambda x: gr.update(visible=True) if x else gr.update(visible=False),
|
861 |
+
inputs=[selected_job_id],
|
862 |
+
outputs=[status_container]
|
863 |
+
).then(
|
864 |
+
fn=update_status_display,
|
865 |
+
inputs=[selected_job_id],
|
866 |
+
outputs=[status_text, video_output, video_output, thumbnail_preview, scene_gallery, processing_time_text, cancel_btn]
|
867 |
+
)
|
868 |
+
|
869 |
+
delete_job_btn.click(
|
870 |
+
fn=delete_selected_job,
|
871 |
+
inputs=[selected_job_id],
|
872 |
+
outputs=[result_text, selected_job_id]
|
873 |
+
).then(
|
874 |
+
fn=load_job_list,
|
875 |
+
outputs=[jobs_table]
|
876 |
+
).then(
|
877 |
+
fn=update_stats,
|
878 |
+
outputs=[stats_total, stats_completed, stats_running, stats_failed]
|
879 |
+
)
|
880 |
+
|
881 |
+
clear_completed_btn.click(
|
882 |
+
fn=clear_completed_jobs,
|
883 |
+
outputs=[result_text]
|
884 |
+
).then(
|
885 |
+
fn=load_job_list,
|
886 |
+
outputs=[jobs_table]
|
887 |
+
).then(
|
888 |
+
fn=update_stats,
|
889 |
+
outputs=[stats_total, stats_completed, stats_running, stats_failed]
|
890 |
+
)
|
891 |
+
|
892 |
+
clear_all_btn.click(
|
893 |
+
fn=clear_all_jobs,
|
894 |
+
outputs=[result_text]
|
895 |
+
).then(
|
896 |
+
fn=load_job_list,
|
897 |
+
outputs=[jobs_table]
|
898 |
+
).then(
|
899 |
+
fn=update_stats,
|
900 |
+
outputs=[stats_total, stats_completed, stats_running, stats_failed]
|
901 |
+
)
|
902 |
+
|
903 |
+
# Set up polling for status updates
|
904 |
+
app.load(
|
905 |
+
fn=load_job_list,
|
906 |
+
outputs=[jobs_table]
|
907 |
+
).then(
|
908 |
+
fn=update_stats,
|
909 |
+
outputs=[stats_total, stats_completed, stats_running, stats_failed]
|
910 |
+
)
|
911 |
+
|
912 |
+
# Load on app start
|
913 |
+
def on_app_start():
|
914 |
+
if not os.path.exists("thumbnails"):
|
915 |
+
os.makedirs("thumbnails", exist_ok=True)
|
916 |
+
return "🎬 Welcome to Theory2Manim Video Generator! Ready to create amazing educational videos."
|
917 |
+
|
918 |
+
app.load(
|
919 |
+
fn=on_app_start,
|
920 |
+
outputs=[result_text]
|
921 |
+
)
|
922 |
+
|
923 |
+
# Launch the app
|
924 |
+
if __name__ == "__main__":
|
925 |
+
app.queue().launch(server_name="127.0.0.1", share=False)
|
requirements.txt
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
annotated-types~=0.7.0
|
2 |
+
azure-cognitiveservices-speech~=1.41.1
|
3 |
+
cachetools~=5.5.0
|
4 |
+
certifi~=2024.8.30
|
5 |
+
charset-normalizer~=3.4.0
|
6 |
+
click~=8.1.7
|
7 |
+
cloup~=3.0.5
|
8 |
+
Cython~=3.0.11
|
9 |
+
decorator~=5.1.1
|
10 |
+
glcontext~=3.0.0
|
11 |
+
google-ai-generativelanguage~=0.6.10
|
12 |
+
google-api-core~=2.22.0
|
13 |
+
google-api-python-client~=2.151.0
|
14 |
+
google-auth~=2.35.0
|
15 |
+
google-auth-httplib2~=0.2.0
|
16 |
+
google-generativeai~=0.8.3
|
17 |
+
googleapis-common-protos~=1.65.0
|
18 |
+
grpcio~=1.67.1
|
19 |
+
grpcio-status~=1.67.1
|
20 |
+
gTTS~=2.5.3
|
21 |
+
httplib2~=0.22.0
|
22 |
+
idna~=3.10
|
23 |
+
isosurfaces~=0.1.2
|
24 |
+
manim~=0.18.1
|
25 |
+
manim-voiceover~=0.3.7
|
26 |
+
ManimPango~=0.6.0 # sudo apt-get install libsdl-pango-dev if you dont have pangocairo
|
27 |
+
mapbox_earcut~=1.0.2
|
28 |
+
markdown-it-py~=3.0.0
|
29 |
+
mdurl~=0.1.2
|
30 |
+
moderngl~=5.12.0
|
31 |
+
multipledispatch~=1.0.0
|
32 |
+
mutagen~=1.47.0
|
33 |
+
networkx~=3.4.2
|
34 |
+
numpy~=2.2.2
|
35 |
+
pillow
|
36 |
+
proto-plus~=1.25.0
|
37 |
+
protobuf~=5.28.3
|
38 |
+
pyasn1~=0.6.1
|
39 |
+
pyasn1_modules~=0.4.1
|
40 |
+
PyAudio~=0.2.14 #required brew install portaudio for mac
|
41 |
+
pycairo~=1.27.0
|
42 |
+
pydantic~=2.9.2
|
43 |
+
pydantic_core~=2.23.4
|
44 |
+
pydub~=0.25.1
|
45 |
+
pyglet~=2.0.18
|
46 |
+
Pygments~=2.18.0
|
47 |
+
#pyobjc-core~=10.3.1 # only for mac
|
48 |
+
#pyobjc-framework-Cocoa~=10.3.1 # only for mac
|
49 |
+
pyparsing~=3.2.0
|
50 |
+
pyrr~=0.10.3
|
51 |
+
python-dotenv~=0.21.1
|
52 |
+
python-slugify~=8.0.4
|
53 |
+
requests~=2.32.3
|
54 |
+
rich~=13.9.3
|
55 |
+
rsa~=4.9
|
56 |
+
scipy~=1.14.1
|
57 |
+
screeninfo~=0.8.1
|
58 |
+
skia-pathops~=0.8.0.post2
|
59 |
+
sox~=1.5.0
|
60 |
+
srt~=3.5.3
|
61 |
+
svgelements~=1.9.6
|
62 |
+
text-unidecode~=1.3
|
63 |
+
tqdm~=4.66.5
|
64 |
+
typing_extensions~=4.12.2
|
65 |
+
uritemplate~=4.1.1
|
66 |
+
urllib3~=2.2.3
|
67 |
+
watchdog~=5.0.3
|
68 |
+
inquirer
|
69 |
+
openai~=1.61.0
|
70 |
+
tiktoken~=0.8.0
|
71 |
+
timm
|
72 |
+
sentencepiece
|
73 |
+
transformers
|
74 |
+
sentence-transformers
|
75 |
+
litellm~=1.60.5
|
76 |
+
pysrt
|
77 |
+
moviepy~=2.1.2
|
78 |
+
yt-dlp
|
79 |
+
imageio_ffmpeg~=0.5.1
|
80 |
+
langchain~=0.3.14
|
81 |
+
langchain_community~=0.3.14
|
82 |
+
SpeechRecognition~=3.14.1
|
83 |
+
boto3~=1.36.9
|
84 |
+
manim-physics~=0.4.0
|
85 |
+
manim-ml~=0.0.24
|
86 |
+
manim-chemistry~=0.4.4
|
87 |
+
manim-dsa~=0.2.0
|
88 |
+
manim-circuit~=0.0.3
|
89 |
+
langfuse~=2.58.1
|
90 |
+
chromadb~=0.6.3
|
91 |
+
google-cloud-aiplatform~=1.79.0
|
92 |
+
cairosvg
|
93 |
+
pylatexenc~=2.10
|
94 |
+
ffmpeg-python~=0.2.0
|
95 |
+
kokoro-onnx # if you have a GPU, otherwise kokoro-onnx
|
96 |
+
soundfile~=0.13.1
|
97 |
+
krippendorff~=0.8.1
|
98 |
+
statsmodels~=0.14.4
|
99 |
+
opencv-python~=4.11.0
|