multimodal_module / requirements.txt
Princeaka's picture
Update requirements.txt
028b31f verified
# =========================
# Core ML & Hugging Face
# =========================
transformers>=4.30.0
diffusers>=0.19.0
accelerate>=0.20.0
huggingface-hub>=0.13.0
safetensors
# =========================
# Torch (CPU-safe; HF picks correct wheel)
# =========================
torch>=2.0.0
torchvision>=0.15.0
torchaudio>=2.0.0
# =========================
# Gradio UI & web server
# =========================
gradio>=3.35.0
fastapi>=0.95.0
uvicorn>=0.22.0
python-multipart>=0.0.6
# =========================
# Audio Processing
# =========================
librosa>=0.10.0
soundfile>=0.12.1
SpeechRecognition>=3.10.0
gTTS>=2.3.2
webrtcvad>=2.0.10
pydub>=0.25.1
# =========================
# Image & Video Processing
# =========================
Pillow>=9.5.0
opencv-python-headless>=4.7.0
imageio>=2.31.1
imageio-ffmpeg>=0.4.8
moviepy>=1.0.3
# =========================
# Document Processing
# =========================
pymupdf>=1.22.0
python-docx>=0.8.11
pdf2image>=1.16.0
# =========================
# Utilities & NLP helpers
# =========================
numpy>=1.24.0
tqdm
langdetect
protobuf>=3.20.0
sentencepiece
tokenizers
requests