File size: 3,578 Bytes
7f5dc30
80b0eb8
7f5dc30
 
 
 
 
 
 
 
 
80b0eb8
7f5dc30
 
30f1183
80b0eb8
7f5dc30
 
80b0eb8
7f5dc30
30f1183
7f5dc30
 
 
 
 
80b0eb8
7f5dc30
 
 
 
 
 
 
80b0eb8
7f5dc30
 
 
30f1183
7f5dc30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80b0eb8
7f5dc30
 
 
80b0eb8
7f5dc30
 
80b0eb8
7f5dc30
 
 
 
 
 
 
 
 
 
 
80b0eb8
7f5dc30
 
 
 
 
 
 
 
 
 
 
80b0eb8
7f5dc30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import gradio as gr
from langchain.chains import RetrievalQA
from langchain_pinecone import Pinecone
from langchain_openai import ChatOpenAI
from langchain_community.llms import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv
import torch
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, pipeline, AutoTokenizer
from huggingface_hub import login

# Load environment variables
load_dotenv()
login(token=os.getenv('HF_KEY'))

# Initialize Embedding Model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Pinecone Retriever
api_key = os.getenv('PINCE_CONE_LIGHT')
if api_key is None:
    raise ValueError("Pinecone API key missing.")
else:
    pc = Pinecone(pinecone_api_key=api_key, embedding=embedding_model, index_name='rag-rubic', namespace='vectors_lightmodel')
    retriever = pc.as_retriever()

# LLM Options
llm_options = {
    "OpenAI": "gpt-4o-mini",
    "Microsoft-Phi": "microsoft/Phi-3.5-mini-instruct",
    "DeepSeek-R1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    "Intel-tinybert": "Intel/dynamic_tinybert"
}

def load_llm(name, model_name):
    """Loads the selected LLM model only when needed."""
    if name == "OpenAI":
        openai_api_key = os.getenv('OPEN_AI_KEY')
        return ChatOpenAI(model='gpt-4o-mini', openai_api_key=openai_api_key)
    
    if "Phi" in name or "DeepSeek" in name:
        model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=4096, eos_token_id=tokenizer.eos_token_id, return_full_text=False,
                        do_sample=False, num_return_sequences=1, max_new_tokens=50, temperature=0.1)
    elif "tinybert" in name:
        model = AutoModelForSequenceClassification.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        pipe = pipeline("feature-extraction", model=model, tokenizer=tokenizer, truncation=True, padding=True, max_length=512)
    else:
        return None
    
    return HuggingFacePipeline(pipeline=pipe)

# Initialize default LLM
selected_llm = list(llm_options.keys())[0]
llm = load_llm(selected_llm, llm_options[selected_llm])

# Create QA Retrieval Chain
qa = RetrievalQA.from_llm(llm=llm, retriever=retriever)

# Chatbot function
def chatbot(selected_llm, user_input, chat_history):
    global llm
    if selected_llm != llm.model_name:
        llm = load_llm(selected_llm, llm_options[selected_llm])
    
    response = qa.invoke({"query": user_input})
    answer = response.get("result", "No response received.")
    chat_history.append(("πŸ§‘β€πŸ’» You", user_input))
    chat_history.append(("πŸ€– Bot", answer))
    return chat_history, ""

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# πŸ€– RAG-Powered Chatbot")
    llm_selector = gr.Dropdown(choices=list(llm_options.keys()), value=selected_llm, label="Choose an LLM")
    chat_history = gr.State([])
    chatbot_ui = gr.Chatbot()
    user_input = gr.Textbox(label="πŸ’¬ Type your message and press Enter:")
    send_button = gr.Button("Send")
    
    send_button.click(chatbot, inputs=[llm_selector, user_input, chat_history], outputs=[chatbot_ui, user_input])
    user_input.submit(chatbot, inputs=[llm_selector, user_input, chat_history], outputs=[chatbot_ui, user_input])

demo.launch()