|
import gradio as gr |
|
from langchain_community.llms import HuggingFaceHub |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain import hub |
|
from langchain_community.document_loaders import PyPDFLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.vectorstores import Chroma |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from huggingface_hub import InferenceClient |
|
from rerankers import Reranker |
|
import os |
|
|
|
loader = PyPDFLoader("Constitucion_espa帽ola.pdf") |
|
documents = loader.load() |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
|
docs_split = text_splitter.split_documents(documents) |
|
|
|
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
vectordb = Chroma.from_documents(docs_split, embedding_function) |
|
|
|
client = InferenceClient("google/flan-t5-base", token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) |
|
|
|
ranker = Reranker("answerdotai/answerai-colbert-small-v1", model_type='colbert') |
|
|
|
def generate_text(context, query): |
|
inputs = f"Context: {context} Question: {query}" |
|
response = client.text_generation(prompt=inputs) |
|
return response |
|
|
|
def test_rag_reranking(query, ranker): |
|
docs = vectordb.similarity_search_with_score(query) |
|
context = [] |
|
|
|
for doc, score in docs: |
|
if score < 7: |
|
doc_details = doc.to_json()['kwargs'] |
|
context.append(doc_details['page_content']) |
|
|
|
if not context: |
|
return "No se encontr贸 informaci贸n suficiente para responder." |
|
|
|
reranked = ranker.rank(query=query, documents=context, top_k=1) |
|
best_context = reranked[0]["text"] |
|
|
|
return generate_text(best_context, query) |
|
|
|
def responder_chat(message, history): |
|
respuesta = test_rag_reranking(message, ranker) |
|
return respuesta |
|
|
|
demo = gr.ChatInterface( |
|
fn=responder_chat, |
|
title="Chatbot sobre la constituci贸n espa帽ola", |
|
theme="soft" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|