Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, T5ForConditionalGeneration | |
import torch | |
import gradio as gr | |
import re | |
import json | |
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rut5-base-multitask", legacy=False) | |
model = T5ForConditionalGeneration.from_pretrained("cointegrated/rut5-base-multitask") | |
def smart_truncate(text, max_len): | |
if len(text) <= max_len: | |
return text | |
return text[:text[:max_len+1].rfind(' ')].strip() | |
def generate_meta(description): | |
# Упрощенный промт с примерами | |
prompt = f""" | |
Create a title and description for product page. | |
Product name: Fenix ARB-L18-4000U | |
Description: Аккумулятор 18650 с встроенным портом Type-C и защитой от перегрузок. | |
Output format: | |
{"title": "...", "description": "..."} | |
""" | |
inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True) | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=200, | |
num_beams=5, | |
early_stopping=True, | |
no_repeat_ngram_size=2 | |
) | |
try: | |
result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
json_data = json.loads(re.search(r'\{.*\}', result, re.DOTALL).group()) | |
# Принудительная постобработка | |
if "Fenix" not in json_data["title"]: | |
json_data["title"] = f"Аккумулятор Fenix {json_data['title']}" | |
return { | |
"title": smart_truncate(json_data["title"], 60), | |
"description": smart_truncate(json_data["description"], 160) | |
} | |
except: | |
# Фоллбэк | |
clean_text = re.sub(r'\s+', ' ', description) | |
return { | |
"title": smart_truncate(f"Аккумулятор Fenix {clean_text}", 60), | |
"description": smart_truncate(clean_text, 160) | |
} | |
# Интерфейс | |
with gr.Blocks() as app: | |
gr.Markdown("## Генератор метатегов (контроль длины)") | |
inp = gr.Textbox(label="Описание товара", lines=7) | |
btn = gr.Button("Сгенерировать") | |
with gr.Row(): | |
out_title = gr.Textbox(label="Title (до 60 символов)", interactive=False) | |
out_desc = gr.Textbox(label="Description (до 160 символов)", lines=3, interactive=False) | |
btn.click(generate_meta, inputs=inp, outputs=[out_title, out_desc]) | |
app.launch() |