Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from langchain.document_loaders import TextLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chains import RetrievalQA | |
from langchain.llms import HuggingFaceHub | |
# Конфигурация | |
DOCS_DIR = "lore" | |
EMBEDDINGS_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |
LLM_REPO = "IlyaGusev/saiga_mistral_7b" | |
HF_TOKEN = os.getenv("HF_TOKEN") # Добавьте в Secrets Space | |
# 1. Загрузка документов с обработкой ошибок | |
def load_documents(): | |
docs = [] | |
for filename in os.listdir(DOCS_DIR): | |
if filename.endswith(".txt"): | |
try: | |
loader = TextLoader( | |
os.path.join(DOCS_DIR, filename), | |
encoding="utf-8" | |
) | |
docs.extend(loader.load()) | |
except Exception as e: | |
print(f"Ошибка загрузки {filename}: {str(e)}") | |
return docs | |
# 2. Инициализация эмбеддингов с проверкой | |
def get_embeddings(): | |
try: | |
return HuggingFaceEmbeddings(model_name=EMBEDDINGS_MODEL) | |
except ImportError: | |
raise ImportError( | |
"Требуемые пакеты не установлены. " | |
"Добавьте в requirements.txt:\n" | |
"sentence-transformers\n" | |
"torch\n" | |
"transformers" | |
) | |
# 3. Подготовка базы знаний | |
def prepare_knowledge_base(): | |
documents = load_documents() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, | |
chunk_overlap=50, | |
separators=["\n\n", "\n", " ", ""] | |
) | |
splits = text_splitter.split_documents(documents) | |
embeddings = get_embeddings() | |
return FAISS.from_documents(splits, embeddings) | |
# 4. Создание цепочки QA | |
def create_qa_chain(): | |
llm = HuggingFaceHub( | |
repo_id=LLM_REPO, | |
huggingfacehub_api_token=HF_TOKEN, | |
model_kwargs={ | |
"temperature": 0.3, | |
"max_new_tokens": 200 | |
} | |
) | |
return RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=prepare_knowledge_base().as_retriever( | |
search_kwargs={"k": 2} | |
) | |
) | |
# 5. Интерфейс с обработкой ошибок | |
def get_answer(question): | |
try: | |
qa = create_qa_chain() | |
result = qa.run(question) | |
return result[:500] # Обрезаем слишком длинные ответы | |
except Exception as e: | |
return f"⚠️ Ошибка: {str(e)}" | |
# Запуск приложения | |
with gr.Blocks(title="📚 Лор-бот") as app: | |
gr.Markdown("## 🧛 Вопрос-ответ по лору") | |
question = gr.Textbox(label="Ваш вопрос", placeholder="Какие слабости у вампиров?") | |
output = gr.Textbox(label="Ответ", interactive=False) | |
btn = gr.Button("Спросить") | |
btn.click(get_answer, inputs=question, outputs=output) | |
app.launch(server_name="0.0.0.0", server_port=7860) |