Spaces:
Sleeping
Sleeping
File size: 3,222 Bytes
05647e2 8c5a7b2 95f2e49 05647e2 95f2e49 a73e1ef 8c5a7b2 95f2e49 8c5a7b2 95f2e49 8c5a7b2 4b347f0 8c5a7b2 4b347f0 95f2e49 4b347f0 b99265e 95f2e49 8c5a7b2 95f2e49 4b347f0 95f2e49 4b347f0 95f2e49 8c5a7b2 95f2e49 b99265e 8c5a7b2 95f2e49 b99265e a73e1ef 95f2e49 8c5a7b2 95f2e49 a73e1ef 95f2e49 4b347f0 95f2e49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os
import gradio as gr
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
# Конфигурация
DOCS_DIR = "lore"
EMBEDDINGS_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
LLM_REPO = "IlyaGusev/saiga_mistral_7b"
HF_TOKEN = os.getenv("HF_TOKEN") # Добавьте в Secrets Space
# 1. Загрузка документов с обработкой ошибок
def load_documents():
docs = []
for filename in os.listdir(DOCS_DIR):
if filename.endswith(".txt"):
try:
loader = TextLoader(
os.path.join(DOCS_DIR, filename),
encoding="utf-8"
)
docs.extend(loader.load())
except Exception as e:
print(f"Ошибка загрузки {filename}: {str(e)}")
return docs
# 2. Инициализация эмбеддингов с проверкой
def get_embeddings():
try:
return HuggingFaceEmbeddings(model_name=EMBEDDINGS_MODEL)
except ImportError:
raise ImportError(
"Требуемые пакеты не установлены. "
"Добавьте в requirements.txt:\n"
"sentence-transformers\n"
"torch\n"
"transformers"
)
# 3. Подготовка базы знаний
def prepare_knowledge_base():
documents = load_documents()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
separators=["\n\n", "\n", " ", ""]
)
splits = text_splitter.split_documents(documents)
embeddings = get_embeddings()
return FAISS.from_documents(splits, embeddings)
# 4. Создание цепочки QA
def create_qa_chain():
llm = HuggingFaceHub(
repo_id=LLM_REPO,
huggingfacehub_api_token=HF_TOKEN,
model_kwargs={
"temperature": 0.3,
"max_new_tokens": 200
}
)
return RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=prepare_knowledge_base().as_retriever(
search_kwargs={"k": 2}
)
)
# 5. Интерфейс с обработкой ошибок
def get_answer(question):
try:
qa = create_qa_chain()
result = qa.run(question)
return result[:500] # Обрезаем слишком длинные ответы
except Exception as e:
return f"⚠️ Ошибка: {str(e)}"
# Запуск приложения
with gr.Blocks(title="📚 Лор-бот") as app:
gr.Markdown("## 🧛 Вопрос-ответ по лору")
question = gr.Textbox(label="Ваш вопрос", placeholder="Какие слабости у вампиров?")
output = gr.Textbox(label="Ответ", interactive=False)
btn = gr.Button("Спросить")
btn.click(get_answer, inputs=question, outputs=output)
app.launch(server_name="0.0.0.0", server_port=7860) |