Spaces:

malvin-ai
/

light-ai-video-generator

Running on Zero

App Files Files Community

malvin noel commited on May 12

Commit

aef0378

1 Parent(s): 6b9a6b5

change script

Browse files

Files changed (1) hide show

scripts/generate_scripts.py +86 -37

scripts/generate_scripts.py CHANGED Viewed

@@ -1,89 +1,138 @@
 import os
 import re
-import json
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import gradio as gr
-from dotenv import load_dotenv
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
-@spaces.GPU()
-def generate_local(prompt: str, max_new_tokens: int = 350, temperature: float = 0.7) -> str:
-    model_id = "Qwen/Qwen3-0.6B"
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # get the device the model is on
-    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, trust_remote_code=True).to(device)
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     output_ids = model.generate(
         **inputs,
         max_new_tokens=max_new_tokens,
         do_sample=True,
-        temperature=temperature,
         pad_token_id=tokenizer.eos_token_id,
     )
-    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
-def generate_script(prompt: str, word_count: int = 60) -> str:
     system_prompt = (
         "You are an expert YouTube scriptwriter. "
         "Your job is to write the EXACT words that will be spoken aloud in a video. "
-        f"Topic: {prompt.strip()}\n\n"
         "🎯 Output rules:\n"
         f"- Exactly {word_count} words.\n"
         "- Only the spoken words. NO scene descriptions, instructions, or formatting.\n"
-        "- Write in natural, clear, and simple English, as if it's being said by a voiceover artist.\n"
         "- Keep a steady rhythm (about 2 words per second).\n"
         "- Do NOT include any explanations, labels, or headers. Only output the final spoken script.\n\n"
         "Start now:"
     )
-    return generate_local(system_prompt)
-def one_word(query: str) -> str:
-    prompt_final = (
-        "Extract only the unique central theme of the following text in English in JSON format like this: "
-        '{"keyword": "impact"}. Text: ' + query
     )
-    result = generate_local(prompt_final, max_new_tokens=30, temperature=0.4)
     try:
-        keyword_json = json.loads(result)
-        keyword = keyword_json.get("keyword", "")
     except json.JSONDecodeError:
-        matches = re.findall(r'\b[a-zA-Z]{3,}\b', result)
         keyword = matches[0] if matches else ""
     return keyword.lower()
 def generate_title(text: str) -> str:
-    prompt_final = (
         "Generate a unique title for a YouTube Short video that is engaging and informative, "
-        "maximum 100 characters, without emojis, introduction, or explanation. Content:\n" + text
     )
-    return generate_local(prompt_final, max_new_tokens=50, temperature=0.9).strip()
 def generate_description(text: str) -> str:
-    prompt_final = (
         "Write only the YouTube video description in English:\n"
         "1. A compelling opening line.\n"
         "2. A clear summary of the video (max 3 lines).\n"
         "3. End with 3 relevant hashtags.\n"
         "No emojis or introductions. Here is the text:\n" + text
     )
-    return generate_local(prompt_final, max_new_tokens=300, temperature=0.7).strip()
-def generate_tags(text: str) -> list:
-    prompt_final = (
         "List only the important keywords for this YouTube video, separated by commas, "
         "maximum 10 keywords. Context: " + text
     )
-    result = generate_local(prompt_final, max_new_tokens=100, temperature=0.5)
-    return [tag.strip() for tag in result.split(",") if tag.strip()]

+"""Reusable helpers for YouTube‑content generation.
+Optimisations applied:
+• Model + tokenizer are loaded **once** at import‑time, not per call.
+• FP16 + `device_map=\"auto\"` for smaller VRAM + faster inference.
+• `@spaces.GPU()` decorator keeps the worker on a GPU Space.
+• All generation helpers reuse a single `generate_local()` for consistency.
+• Minimal error handling + regex fallback when JSON parsing fails.
+"""
+import json
 import os
 import re
+from typing import List
 import spaces
+import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# ─────────────────────────────────────────────────────────────
+# Model initialisation (runs ONCE per Space replica)
+# ─────────────────────────────────────────────────────────────
+MODEL_ID = os.getenv("LLM_ID", "Qwen/Qwen3-0.6B")
+DTYPE = torch.float16  # fp16 fits comfortably on free‑tier A10G/ T4
+# Load tokenizer + model once; they live for the lifetime of the process
+print(f"🔄 Loading model {MODEL_ID} …")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = (
+    AutoModelForCausalLM
+    .from_pretrained(
+        MODEL_ID,
+        torch_dtype=DTYPE,
+        device_map="auto",          # puts weights straight on the first CUDA device if available
+        trust_remote_code=True,
+    )
+    .eval()
+)  # .eval() disables dropout → deterministic + minor speed boost
+print("✅ Model loaded once.")
+# Prevent accidental CPU fallback when GPU memory is full
+DEVICE = model.device
+# ─────────────────────────────────────────────────────────────
+# Core text‑generation helper
+# ─────────────────────────────────────────────────────────────
+@spaces.GPU()  # Ensures this worker stays on a GPU node in HF Spaces
+@torch.inference_mode()  # no_grad + autocast under the hood in 2.2+
+def generate_local(
+    prompt: str,
+    *,
+    max_new_tokens: int = 350,
+    temperature: float = 0.7,
+) -> str:
+    """Low‑level wrapper around `model.generate()` using the shared model."""
+    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
     output_ids = model.generate(
         **inputs,
         max_new_tokens=max_new_tokens,
         do_sample=True,
+        temperature=float(temperature),  # ensure JSON‑serialisable types are cast properly
         pad_token_id=tokenizer.eos_token_id,
     )
+    return tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
+# ─────────────────────────────────────────────────────────────
+# High‑level helpers for YouTube workflow
+# ─────────────────────────────────────────────────────────────
+WORDS_PER_SECOND = 2   # used by callers to estimate length; not critical here
+def generate_script(topic: str, word_count: int = 60) -> str:
+    """Return a *spoken* script of exactly `word_count` words on `topic`."""
     system_prompt = (
         "You are an expert YouTube scriptwriter. "
         "Your job is to write the EXACT words that will be spoken aloud in a video. "
+        f"Topic: {topic.strip()}\n\n"
         "🎯 Output rules:\n"
         f"- Exactly {word_count} words.\n"
         "- Only the spoken words. NO scene descriptions, instructions, or formatting.\n"
+        "- Write in natural, clear, and simple English, as if it's being said by a voice‑over artist.\n"
         "- Keep a steady rhythm (about 2 words per second).\n"
         "- Do NOT include any explanations, labels, or headers. Only output the final spoken script.\n\n"
         "Start now:"
     )
+    return generate_local(system_prompt, max_new_tokens=word_count * 2, temperature=0.8)
+def one_word(text: str) -> str:
+    """Extract a single keyword that summarises *text*. Returns lowercase string."""
+    prompt = (
+        "Extract only the unique central theme of the following text in English "
+        "as JSON: {\"keyword\": \"impact\"}. Text: " + text
     )
+    result = generate_local(prompt, max_new_tokens=30, temperature=0.4)
+    # Try JSON first
     try:
+        keyword = json.loads(result).get("keyword", "")
     except json.JSONDecodeError:
+        # Fallback: pick first 3+ letter word
+        matches = re.findall(r"\\b[a-zA-Z]{3,}\\b", result)
         keyword = matches[0] if matches else ""
     return keyword.lower()
 def generate_title(text: str) -> str:
+    prompt = (
         "Generate a unique title for a YouTube Short video that is engaging and informative, "
+        "max 100 characters, without emojis, introduction, or explanation. Content:\n" + text
     )
+    return generate_local(prompt, max_new_tokens=50, temperature=0.9)
 def generate_description(text: str) -> str:
+    prompt = (
         "Write only the YouTube video description in English:\n"
         "1. A compelling opening line.\n"
         "2. A clear summary of the video (max 3 lines).\n"
         "3. End with 3 relevant hashtags.\n"
         "No emojis or introductions. Here is the text:\n" + text
     )
+    return generate_local(prompt, max_new_tokens=300, temperature=0.7)
+def generate_tags(text: str) -> List[str]:
+    prompt = (
         "List only the important keywords for this YouTube video, separated by commas, "
         "maximum 10 keywords. Context: " + text
     )
+    raw = generate_local(prompt, max_new_tokens=100, temperature=0.5)
+    return [tag.strip() for tag in raw.split(",") if tag.strip()]