Spaces:

malvin-ai
/

light-ai-video-generator

Running on Zero

light-ai-video-generator / scripts /generate_scripts.py

malvin noel

change script

4b42e2c 2 months ago

3.54 kB

	# generate_scripts.py

	import os
	import re
	import json
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr
	from dotenv import load_dotenv
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer




	@spaces.GPU(duration=150)
	def generate_local(model, tokenizer, prompt: str, max_new_tokens: int = 350, temperature: float = 0.7) -> str:
	inputs = tokenizer(prompt, return_tensors="pt")
	inputs = {k: v.to(model.device) for k, v in inputs.items()} # ⬅️ Safely match model's device

	output_ids = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=temperature,
	pad_token_id=tokenizer.eos_token_id,
	)
	return tokenizer.decode(output_ids[0], skip_special_tokens=True)



	def generate_script(model,tokenizer, prompt: str, word_count: int = 60) -> str:
	system_prompt = (
	"You are an expert YouTube scriptwriter. "
	"Your job is to write the EXACT words that will be spoken aloud in a video. "
	f"Topic: {prompt.strip()}\n\n"
	"🎯 Output rules:\n"
	f"- Exactly {word_count} words.\n"
	"- Only the spoken words. NO scene descriptions, instructions, or formatting.\n"
	"- Write in natural, clear, and simple English, as if it's being said by a voiceover artist.\n"
	"- Keep a steady rhythm (about 2 words per second).\n"
	"- Do NOT include any explanations, labels, or headers. Only output the final spoken script.\n\n"
	"Start now:"
	)
	return generate_local(model,tokenizer, system_prompt)


	def one_word(model,tokenizer, query: str) -> str:
	prompt_final = (
	"Extract only the unique central theme of the following text in English in JSON format like this: "
	'{"keyword": "impact"}. Text: ' + query
	)
	result = generate_local(model,tokenizer, prompt_final, max_new_tokens=30, temperature=0.4)
	try:
	keyword_json = json.loads(result)
	keyword = keyword_json.get("keyword", "")
	except json.JSONDecodeError:
	matches = re.findall(r'\b[a-zA-Z]{3,}\b', result)
	keyword = matches[0] if matches else ""
	return keyword.lower()


	def generate_title(model,tokenizer, text: str) -> str:
	prompt_final = (
	"Generate a unique title for a YouTube Short video that is engaging and informative, "
	"maximum 100 characters, without emojis, introduction, or explanation. Content:\n" + text
	)
	return generate_local(model,tokenizer, prompt_final, max_new_tokens=50, temperature=0.9).strip()

	def generate_description(model,tokenizer, text: str) -> str:
	prompt_final = (
	"Write only the YouTube video description in English:\n"
	"1. A compelling opening line.\n"
	"2. A clear summary of the video (max 3 lines).\n"
	"3. End with 3 relevant hashtags.\n"
	"No emojis or introductions. Here is the text:\n" + text
	)
	return generate_local(model,tokenizer, prompt_final, max_new_tokens=300, temperature=0.7).strip()

	def generate_tags(model,tokenizer, text: str) -> list:
	prompt_final = (
	"List only the important keywords for this YouTube video, separated by commas, "
	"maximum 10 keywords. Context: " + text
	)
	result = generate_local(model,tokenizer, prompt_final, max_new_tokens=100, temperature=0.5)
	return [tag.strip() for tag in result.split(",") if tag.strip()]