File size: 2,369 Bytes
8329127
514c042
8329127
 
514c042
 
8329127
514c042
 
8329127
514c042
 
 
 
 
 
 
 
 
 
8329127
514c042
 
8329127
7880b06
8329127
 
 
 
 
179639e
 
514c042
8329127
 
514c042
 
 
 
8329127
514c042
179639e
514c042
8329127
514c042
 
 
8329127
 
179639e
8329127
 
514c042
8329127
 
514c042
8329127
b02d5a7
8329127
 
 
 
179639e
 
 
 
8329127
 
179639e
8329127
514c042
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from rerankers import Reranker

# Cargar PDF y partirlo en chunks
loader = PyPDFLoader("80dias.pdf")
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
splits = splitter.split_documents(documents)

# Crear embeddings
embedding_model = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
vectordb = Chroma.from_documents(splits, embedding=embeddings)

# Inicializar reranker
ranker = Reranker("answerdotai/answerai-colbert-small-v1", model_type="colbert")

# Cargar modelo de lenguaje de Hugging Face
model_id = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Función principal RAG
def rag_chat(message, history):
    query = message
    results = vectordb.similarity_search_with_score(query)
    
    # Seleccionar contextos relevantes
    context = []
    for doc, score in results:
        if score < 7:
            context.append(doc.page_content)

    if not context:
        return "No tengo información suficiente para responder a esa pregunta."

    # Aplicar reranking
    ranking = ranker.rank(query=query, docs=context)
    best_context = ranking[0].text

    # Crear prompt final
    prompt = f"""Responde a la siguiente pregunta utilizando solo el contexto proporcionado:

Contexto:
{best_context}

Pregunta: {query}
Respuesta:"""

    # Generar respuesta
    output = generator(prompt, max_new_tokens=100, do_sample=False)[0]["generated_text"]
    response = output.split("Respuesta:")[-1].strip()
    return response

# Gradio Chat Interface
iface = gr.ChatInterface(
    fn=rag_chat,
    title="Chat Julio Verne - RAG",
    description="Pregunta lo que quieras sobre *La vuelta al mundo en 80 días* de Julio Verne.",
    chatbot=gr.Chatbot(type="messages"),
    theme="default"
)

iface.launch()