Spaces:

ruslanmv
/

TextToVideo-Flux

Paused

App Files Files Community

ruslanmv commited on Feb 1

Commit

aa17af1

1 Parent(s): 6370c41

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -12

app.py CHANGED Viewed

@@ -34,25 +34,34 @@ from gtts import gTTS
 from pydub import AudioSegment
 import textwrap
 # Initialize FLUX pipeline only if CUDA is available
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-if device == "cuda":
-    flux_pipe = DiffusionPipeline.from_pretrained(
-        "black-forest-labs/FLUX.1-schnell",
-        torch_dtype=dtype
-    ).to(device)
-else:
-    flux_pipe = None  # Avoid initializing the model when CUDA is unavailable
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
 nltk.download('punkt')
 # Ensure proper multiprocessing start method
-multiprocessing.set_start_method("spawn", force=True)
 # Download necessary NLTK data
 def setup_nltk():
@@ -70,7 +79,7 @@ DESCRIPTION = (
 TITLE = "Video Story Generator with Audio by using FLUX, distilbart, and GTTS."
 # Load Tokenizer and Model for Text Summarization
-def load_text_summarization_model():
     """Load the tokenizer and model for text summarization."""
     print("Loading text summarization model...")
     tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
@@ -80,6 +89,25 @@ def load_text_summarization_model():
     model.to(device)
     return tokenizer, model, device
 tokenizer, model, device = load_text_summarization_model()
 # Log GPU Memory (optional, for debugging)
@@ -102,8 +130,8 @@ def check_gpu_availability():
 check_gpu_availability()
-@spaces.GPU()
-def generate_image_with_flux(
     text: str,
     seed: int = 42,
     width: int = 1024,
@@ -141,6 +169,45 @@ def generate_image_with_flux(
     print("DEBUG: Image generated successfully.")
     return image
 # --------- End of MinDalle Functions ---------
 # Merge audio files
@@ -384,4 +451,6 @@ with demo:
     )
 # Launch the Gradio app
-demo.launch(debug=True, share=False)

 from pydub import AudioSegment
 import textwrap
 # Initialize FLUX pipeline only if CUDA is available
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+def get_flux_pipeline():
+    """Load FLUX pipeline only when needed to prevent main process CUDA initialization."""
+    if device == "cuda":
+        return DiffusionPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-schnell",
+            torch_dtype=dtype
+        ).to(device)
+    return None
+flux_pipe = None  # Do not load at startup
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
 nltk.download('punkt')
 # Ensure proper multiprocessing start method
+try:
+    multiprocessing.set_start_method("spawn", force=True)
+except RuntimeError:
+    pass  # Ignore errors if the start method is already set
 # Download necessary NLTK data
 def setup_nltk():
 TITLE = "Video Story Generator with Audio by using FLUX, distilbart, and GTTS."
 # Load Tokenizer and Model for Text Summarization
+def load_text_summarization_model_v1():
     """Load the tokenizer and model for text summarization."""
     print("Loading text summarization model...")
     tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
     model.to(device)
     return tokenizer, model, device
+def load_text_summarization_model():
+    """Load the tokenizer and model for text summarization without triggering CUDA init."""
+    print("Loading text summarization model...")
+    if "SPACE_ID" in os.environ:  # Detect if running in Hugging Face Spaces
+        os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Prevent CUDA initialization
+    tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
+    model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
+    if torch.cuda.is_available() and "SPACE_ID" not in os.environ:
+        device = torch.device("cuda:0")
+    else:
+        device = torch.device("cpu")
+    print(f"Using device: {device}")
+    model.to(device)
+    return tokenizer, model, device
 tokenizer, model, device = load_text_summarization_model()
 # Log GPU Memory (optional, for debugging)
 check_gpu_availability()
+#@spaces.GPU()
+def generate_image_with_flux_old(
     text: str,
     seed: int = 42,
     width: int = 1024,
     print("DEBUG: Image generated successfully.")
     return image
+@spaces.GPU()
+def generate_image_with_flux(
+    text: str,
+    seed: int = 42,
+    width: int = 1024,
+    height: int = 1024,
+    num_inference_steps: int = 4,
+    randomize_seed: bool = True
+):
+    print(f"DEBUG: Generating image with FLUX for text: '{text}'")
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator().manual_seed(seed)
+    # Load FLUX pipeline only when needed
+    global flux_pipe
+    if flux_pipe is None:
+        flux_pipe = get_flux_pipeline()  # Delayed initialization
+    if flux_pipe is None:
+        raise RuntimeError("FLUX pipeline is not available. Check CUDA or environment settings.")
+    image = flux_pipe(
+        prompt=text,
+        width=width,
+        height=height,
+        num_inference_steps=num_inference_steps,
+        generator=generator,
+        guidance_scale=0.0
+    ).images[0]
+    print("DEBUG: Image generated successfully.")
+    return image
 # --------- End of MinDalle Functions ---------
 # Merge audio files
     )
 # Launch the Gradio app
+#demo.launch(debug=True, share=False)
+demo.launch(debug=True, share="SPACE_ID" in os.environ)