Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,535 Bytes
ef107f6 8b05224 51bba63 aef0378 8b05224 51bba63 8b05224 b73d328 4b42e2c 4cd4316 8b05224 51bba63 8b05224 51bba63 8b05224 b73d328 449544a 8b05224 1e3c8be 51bba63 1e3c8be 51bba63 1e3c8be 8b05224 449544a 8b05224 b73d328 449544a 51bba63 8b05224 449544a 8b05224 51bba63 8b05224 51bba63 8b05224 b73d328 449544a 51bba63 8b05224 51bba63 8b05224 449544a 8b05224 449544a 51bba63 8b05224 449544a 8b05224 449544a 51bba63 8b05224 449544a 51bba63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# generate_scripts.py
import os
import re
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
from dotenv import load_dotenv
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
@spaces.GPU(duration=150)
def generate_local(model, tokenizer, prompt: str, max_new_tokens: int = 350, temperature: float = 0.7) -> str:
inputs = tokenizer(prompt, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()} # ⬅️ Safely match model's device
output_ids = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
)
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
def generate_script(model,tokenizer, prompt: str, word_count: int = 60) -> str:
system_prompt = (
"You are an expert YouTube scriptwriter. "
"Your job is to write the EXACT words that will be spoken aloud in a video. "
f"Topic: {prompt.strip()}\n\n"
"🎯 Output rules:\n"
f"- Exactly {word_count} words.\n"
"- Only the spoken words. NO scene descriptions, instructions, or formatting.\n"
"- Write in natural, clear, and simple English, as if it's being said by a voiceover artist.\n"
"- Keep a steady rhythm (about 2 words per second).\n"
"- Do NOT include any explanations, labels, or headers. Only output the final spoken script.\n\n"
"Start now:"
)
return generate_local(model,tokenizer, system_prompt)
def one_word(model,tokenizer, query: str) -> str:
prompt_final = (
"Extract only the unique central theme of the following text in English in JSON format like this: "
'{"keyword": "impact"}. Text: ' + query
)
result = generate_local(model,tokenizer, prompt_final, max_new_tokens=30, temperature=0.4)
try:
keyword_json = json.loads(result)
keyword = keyword_json.get("keyword", "")
except json.JSONDecodeError:
matches = re.findall(r'\b[a-zA-Z]{3,}\b', result)
keyword = matches[0] if matches else ""
return keyword.lower()
def generate_title(model,tokenizer, text: str) -> str:
prompt_final = (
"Generate a unique title for a YouTube Short video that is engaging and informative, "
"maximum 100 characters, without emojis, introduction, or explanation. Content:\n" + text
)
return generate_local(model,tokenizer, prompt_final, max_new_tokens=50, temperature=0.9).strip()
def generate_description(model,tokenizer, text: str) -> str:
prompt_final = (
"Write only the YouTube video description in English:\n"
"1. A compelling opening line.\n"
"2. A clear summary of the video (max 3 lines).\n"
"3. End with 3 relevant hashtags.\n"
"No emojis or introductions. Here is the text:\n" + text
)
return generate_local(model,tokenizer, prompt_final, max_new_tokens=300, temperature=0.7).strip()
def generate_tags(model,tokenizer, text: str) -> list:
prompt_final = (
"List only the important keywords for this YouTube video, separated by commas, "
"maximum 10 keywords. Context: " + text
)
result = generate_local(model,tokenizer, prompt_final, max_new_tokens=100, temperature=0.5)
return [tag.strip() for tag in result.split(",") if tag.strip()]
|