Spaces:

Loversofdeath
/

lepidus

Sleeping

File size: 2,679 Bytes

235412b
a7aa125
 
404755d
a7aa125
 
9faaee5
a7aa125
 
a73e1ef
a7aa125
 
8c5a7b2
a7aa125
 
 
235412b
a7aa125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2782135
235412b
a7aa125
 
 
a73e1ef
a7aa125
 
84c49b0
a7aa125
 
 
84c49b0
a7aa125
 
84c49b0
4b347f0
a7aa125

import os
import glob
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Загружаем модель эмбеддингов
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Путь к папке с лором
LORE_DIR = "./lore"

# Параметры нарезки текста
CHUNK_SIZE = 500  # длина куска в символах
CHUNK_OVERLAP = 100  # перекрытие для плавности

# Загружаем и разбиваем текст на кусочки
def load_lore_chunks():
    chunks = []
    files = glob.glob(os.path.join(LORE_DIR, "*.txt"))
    if not files:
        print(f"В папке {LORE_DIR} нет файлов.")
    for file_path in files:
        with open(file_path, "r", encoding="utf-8") as file:
            text = file.read()
            text = ''.join(c if 0x20 <= ord(c) <= 0xFFFF else ' ' for c in text)  # чистим мусор
            for i in range(0, len(text), CHUNK_SIZE - CHUNK_OVERLAP):
                chunk = text[i:i+CHUNK_SIZE].strip()
                if chunk:
                    chunks.append(chunk)
    return chunks

# Загружаем все куски и эмбеддим
print("🚀 Загружаем лор...")
lore_chunks = load_lore_chunks()
if not lore_chunks:
    print("⚠️ Внимание: нет данных для поиска.")
lore_embeddings = model.encode(lore_chunks)
print(f"✅ Загружено {len(lore_chunks)} кусков текста.")

# Поиск лучшего ответа
def find_best_answer(question):
    question_embedding = model.encode([question])[0]
    similarities = cosine_similarity([question_embedding], lore_embeddings)[0]
    best_indices = similarities.argsort()[-3:][::-1]  # Топ-3 самых похожих
    best_chunks = [lore_chunks[idx] for idx in best_indices]
    response = "\n\n".join(best_chunks)
    return response

# Gradio интерфейс
with gr.Blocks() as demo:
    gr.Markdown("## 🧛‍♂️ ЛОР-БОТ: задавай вопросы о мире!")

    chat = gr.ChatInterface(
        fn=lambda message, history: (find_best_answer(message), history),
        examples=[
            ["Какие кланы есть у вампиров?"],
            ["Чем оборотни отличаются от ликантропов?"],
            ["Где находится замок теней?"]
        ],
        title="Лор-бот",
        theme="soft"
    )

# Для запуска локально:
if __name__ == "__main__":
    demo.launch()