File size: 2,531 Bytes
a90ada2
 
 
3a75e4a
a90ada2
0148b1c
 
fff4dbb
0148b1c
 
 
 
a90ada2
0148b1c
4058b9f
0148b1c
a2a249e
 
4058b9f
a2a249e
4058b9f
259ce62
4058b9f
0148b1c
4058b9f
0148b1c
 
 
 
 
 
 
 
4058b9f
0148b1c
 
4058b9f
 
 
 
 
 
 
 
 
 
 
 
0148b1c
4058b9f
 
 
 
 
a90ada2
0148b1c
 
 
4058b9f
0148b1c
 
78b6d85
4058b9f
 
 
 
 
a90ada2
0148b1c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from transformers import AutoTokenizer, T5ForConditionalGeneration
import torch
import gradio as gr
import re

tokenizer = AutoTokenizer.from_pretrained("cointegrated/rut5-base-multitask", legacy=False)
model = T5ForConditionalGeneration.from_pretrained("cointegrated/rut5-base-multitask")

def smart_truncate(text, max_len):
    if len(text) <= max_len:
        return text
    return text[:text[:max_len+1].rfind(' ')].strip()

def generate_meta(description):
    # Пример промта для модели
    prompt = f"""
Create a title and description for product page.
Product name: Fenix ARB-L18-4000U
Description: {description.strip()}
Output format:
{"title": "SEO заголовок до 60 символов", "description": "SEO описание до 160 символов"}
"""

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            num_beams=5,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    try:
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        json_match = re.search(r'\{.*\}', result, re.DOTALL)

        if json_match:
            json_data = json.loads(json_match.group())
            title = smart_truncate(json_data.get("title", ""), 60)
            desc = smart_truncate(json_data.get("description", ""), 160)
        else:
            clean_text = re.sub(r'\s+', ' ', description)
            title = smart_truncate(f"Аккумулятор Fenix {clean_text}", 60)
            desc = smart_truncate(clean_text, 160)

    except Exception as e:
        clean_text = re.sub(r'\s+', ' ', description)
        title = smart_truncate(f"Аккумулятор Fenix {clean_text}", 60)
        desc = smart_truncate(clean_text, 160)

    return title, desc  # ✅ Здесь важно — возвращать 2 отдельные строки


# Интерфейс
with gr.Blocks() as app:
    gr.Markdown("## Генератор метатегов (контроль длины)")
    
    inp = gr.Textbox(label="Описание товара", lines=7)
    btn = gr.Button("Сгенерировать")
    
    with gr.Row():
        out_title = gr.Textbox(label="Title (до 60)", interactive=False)
        out_desc = gr.Textbox(label="Description (до 160)", lines=3, interactive=False)

    btn.click(fn=generate_meta, inputs=inp, outputs=[out_title, out_desc])

app.launch()