File size: 3,207 Bytes
8b05224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339bd9a
8b05224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import re
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
from dotenv import load_dotenv


# Chargement du modèle et du tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "Qwen/Qwen2.5-0.5B"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, trust_remote_code=True).to(device)


def generate_local(prompt: str, max_new_tokens: int = 350, temperature: float = 0.7) -> str:
    device = model.device  # get the device the model is on
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    output_ids = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)



def generate_script(prompt: str, word_count: int = 60) -> str:
    system_prompt = (
        "You are a professional video scriptwriter. "
        f"Write a script for a short YouTube video about: {prompt.strip()}.\n"
        f"The video must be {word_count} words long, engaging, clear, and formatted as plain text."
    )
    return generate_local(system_prompt)


def one_word(query: str) -> str:
    prompt_final = (
        "Extract only the unique central theme of the following text in English in JSON format like this: "
        '{"keyword": "impact"}. Text: ' + query
    )
    result = generate_local(prompt_final, max_new_tokens=30, temperature=0.4)
    try:
        keyword_json = json.loads(result)
        keyword = keyword_json.get("keyword", "")
    except json.JSONDecodeError:
        matches = re.findall(r'\b[a-zA-Z]{3,}\b', result)
        keyword = matches[0] if matches else ""
    return keyword.lower()


def generate_title(text: str) -> str:
    prompt_final = (
        "Generate a unique title for a YouTube Short video that is engaging and informative, "
        "maximum 100 characters, without emojis, introduction, or explanation. Content:\n" + text
    )
    return generate_local(prompt_final, max_new_tokens=50, temperature=0.9).strip()


def generate_description(text: str) -> str:
    prompt_final = (
        "Write only the YouTube video description in English:\n"
        "1. A compelling opening line.\n"
        "2. A clear summary of the video (max 3 lines).\n"
        "3. End with 3 relevant hashtags.\n"
        "No emojis or introductions. Here is the text:\n" + text
    )
    return generate_local(prompt_final, max_new_tokens=300, temperature=0.7).strip()


def generate_tags(text: str) -> list:
    prompt_final = (
        "List only the important keywords for this YouTube video, separated by commas, "
        "maximum 10 keywords. Context: " + text
    )
    result = generate_local(prompt_final, max_new_tokens=100, temperature=0.5)
    return [tag.strip() for tag in result.split(",") if tag.strip()]