Spaces:
Runtime error
Runtime error
| import argparse | |
| import logging | |
| import random | |
| import uuid | |
| import numpy as np | |
| from transformers import pipeline | |
| from diffusers import DiffusionPipeline, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler | |
| from diffusers.utils import load_image, export_to_video | |
| from transformers import ( | |
| SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5ForSpeechToSpeech, | |
| BlipProcessor, BlipForConditionalGeneration, TrOCRProcessor, VisionEncoderDecoderModel, | |
| ViTImageProcessor, AutoTokenizer, AutoImageProcessor, TimesformerForVideoClassification, | |
| MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation, DPTForDepthEstimation, DPTFeatureExtractor | |
| ) | |
| from datasets import load_dataset | |
| from PIL import Image | |
| from torchvision import transforms | |
| import torch | |
| import torchaudio | |
| from speechbrain.pretrained import WaveformEnhancement | |
| import joblib | |
| from huggingface_hub import hf_hub_url, cached_download | |
| from controlnet_aux import OpenposeDetector, MLSDdetector, HEDdetector, CannyDetector, MidasDetector | |
| import warnings | |
| import time | |
| from espnet2.bin.tts_inference import Text2Speech | |
| import soundfile as sf | |
| from asteroid.models import BaseModel | |
| import traceback | |
| import os | |
| import yaml | |
| warnings.filterwarnings("ignore") | |
| def setup_logger(): | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.INFO) | |
| handler = logging.StreamHandler() | |
| handler.setLevel(logging.INFO) | |
| formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| handler.setFormatter(formatter) | |
| logger.addHandler(handler) | |
| return logger | |
| logger = setup_logger() | |
| def load_config(config_path): | |
| with open(config_path, "r") as file: | |
| return yaml.load(file, Loader=yaml.FullLoader) | |
| def parse_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--config", type=str, default="config.yaml") | |
| return parser.parse_args() | |
| args = parse_args() | |
| # Ensure the config is always set when not running as the main script | |
| if __name__ != "__main__": | |
| args.config = "config.gradio.yaml" | |
| config = load_config(args.config) | |
| local_deployment = config["local_deployment"] | |
| if config["inference_mode"] == "huggingface": | |
| local_deployment = "none" | |
| PROXY = {"https": config["proxy"]} if config["proxy"] else None | |
| start = time.time() | |
| local_models = "" # Changed to empty string | |
| def load_pipes(local_deployment): | |
| standard_pipes = {} | |
| other_pipes = {} | |
| controlnet_sd_pipes = {} | |
| if local_deployment in ["full"]: | |
| other_pipes = { | |
| "damo-vilab/text-to-video-ms-1.7b": { | |
| "model": DiffusionPipeline.from_pretrained(f"{local_models}damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"), | |
| "device": "cuda:0" | |
| }, | |
| "JorisCos/DCCRNet_Libri1Mix_enhsingle_16k": { | |
| "model": BaseModel.from_pretrained("JorisCos/DCCRNet_Libri1Mix_enhsingle_16k"), | |
| "device": "cuda:0" | |
| }, | |
| "microsoft/speecht5_vc": { | |
| "processor": SpeechT5Processor.from_pretrained(f"{local_models}microsoft/speecht5_vc"), | |
| "model": SpeechT5ForSpeechToSpeech.from_pretrained(f"{local_models}microsoft/speecht5_vc"), | |
| "vocoder": SpeechT5HifiGan.from_pretrained(f"{local_models}microsoft/speecht5_hifigan"), | |
| "embeddings_dataset": load_dataset(f"{local_models}Matthijs/cmu-arctic-xvectors", split="validation"), | |
| "device": "cuda:0" | |
| }, | |
| "facebook/maskformer-swin-base-coco": { | |
| "feature_extractor": MaskFormerFeatureExtractor.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"), | |
| "model": MaskFormerForInstanceSegmentation.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"), | |
| "device": "cuda:0" | |
| }, | |
| "Intel/dpt-hybrid-midas": { | |
| "model": DPTForDepthEstimation.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas", low_cpu_mem_usage=True), | |
| "feature_extractor": DPTFeatureExtractor.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas"), | |
| "device": "cuda:0" | |
| } |