Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,207 Bytes
8b05224 339bd9a 8b05224 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import re
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
from dotenv import load_dotenv
# Chargement du modèle et du tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "Qwen/Qwen2.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, trust_remote_code=True).to(device)
def generate_local(prompt: str, max_new_tokens: int = 350, temperature: float = 0.7) -> str:
device = model.device # get the device the model is on
inputs = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
)
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
def generate_script(prompt: str, word_count: int = 60) -> str:
system_prompt = (
"You are a professional video scriptwriter. "
f"Write a script for a short YouTube video about: {prompt.strip()}.\n"
f"The video must be {word_count} words long, engaging, clear, and formatted as plain text."
)
return generate_local(system_prompt)
def one_word(query: str) -> str:
prompt_final = (
"Extract only the unique central theme of the following text in English in JSON format like this: "
'{"keyword": "impact"}. Text: ' + query
)
result = generate_local(prompt_final, max_new_tokens=30, temperature=0.4)
try:
keyword_json = json.loads(result)
keyword = keyword_json.get("keyword", "")
except json.JSONDecodeError:
matches = re.findall(r'\b[a-zA-Z]{3,}\b', result)
keyword = matches[0] if matches else ""
return keyword.lower()
def generate_title(text: str) -> str:
prompt_final = (
"Generate a unique title for a YouTube Short video that is engaging and informative, "
"maximum 100 characters, without emojis, introduction, or explanation. Content:\n" + text
)
return generate_local(prompt_final, max_new_tokens=50, temperature=0.9).strip()
def generate_description(text: str) -> str:
prompt_final = (
"Write only the YouTube video description in English:\n"
"1. A compelling opening line.\n"
"2. A clear summary of the video (max 3 lines).\n"
"3. End with 3 relevant hashtags.\n"
"No emojis or introductions. Here is the text:\n" + text
)
return generate_local(prompt_final, max_new_tokens=300, temperature=0.7).strip()
def generate_tags(text: str) -> list:
prompt_final = (
"List only the important keywords for this YouTube video, separated by commas, "
"maximum 10 keywords. Context: " + text
)
result = generate_local(prompt_final, max_new_tokens=100, temperature=0.5)
return [tag.strip() for tag in result.split(",") if tag.strip()]
|