Spaces:
Runtime error
Runtime error
File size: 9,915 Bytes
c8be163 8b158c2 c8be163 27eee56 c8be163 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
import pandas as pd
from transformers import AutoTokenizer
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.utils import DistanceStrategy
from tqdm import tqdm
from transformers.agents import Tool, HfApiEngine, ReactJsonAgent
from huggingface_hub import InferenceClient
import os
from langchain_community.document_loaders import DirectoryLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from groq import Groq
from typing import List, Dict
from transformers.agents.llm_engine import MessageRole, get_clean_message_list
from huggingface_hub import InferenceClient
import streamlit as st
token = os.getenv("HF_TOKEN")
os.environ["GROQ_API_KEY"] = "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"
groq_api_key = os.getenv("GROQ_API_KEY")
# model_id="mistralai/Mistral-7B-Instruct-v0.3"
loader = DirectoryLoader('DATA', glob="**/*.pdf", show_progress=True)
docs = loader.load()
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
tokenizer,
chunk_size=200,
chunk_overlap=20,
add_start_index=True,
strip_whitespace=True,
separators=["\n\n", "\n", ".", " ", ""],
)
# Split documents and remove duplicates
docs_processed = []
unique_texts = {}
for doc in tqdm(docs):
new_docs = text_splitter.split_documents([doc])
for new_doc in new_docs:
if new_doc.page_content not in unique_texts:
unique_texts[new_doc.page_content] = True
docs_processed.append(new_doc)
model_name = "thenlper/gte-small"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embedding_model = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
# Create the vector database
vectordb = FAISS.from_documents(
documents=docs_processed,
embedding=embedding_model,
distance_strategy=DistanceStrategy.COSINE,
)
class RetrieverTool(Tool):
name = "retriever"
description = "Using semantic similarity, retrieves some documents from the knowledge base that have the closest embeddings to the input query."
inputs = {
"query": {
"type": "string",
"description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
}
}
output_type = "string"
def __init__(self, vectordb, **kwargs):
super().__init__(**kwargs)
self.vectordb = vectordb
def forward(self, query: str) -> str:
assert isinstance(query, str), "Your search query must be a string"
docs = self.vectordb.similarity_search(
query,
k=7,
)
return "\nRetrieved documents:\n" + "".join(
[f"===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
)
# Create an instance of the RetrieverTool
retriever_tool = RetrieverTool(vectordb)
llm = ChatGroq(
model="llama3-70b-8192",
temperature=0,
max_tokens=2048,
)
openai_role_conversions = {
MessageRole.TOOL_RESPONSE: MessageRole.USER,
}
class OpenAIEngine:
def __init__(self, model_name="llama-3.3-70b-versatile"):
print(groq_api_key)
self.model_name = model_name
self.client = Groq(
api_key=groq_api_key,
)
def __call__(self, messages, stop_sequences=[]):
messages = get_clean_message_list(messages, role_conversions=openai_role_conversions)
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
stop=stop_sequences,
temperature=0.5,
max_tokens=2048
)
return response.choices[0].message.content
llm_engine = OpenAIEngine()
# Create the agent
agent = ReactJsonAgent(tools=[retriever_tool], llm_engine=llm_engine, max_iterations=4, verbose=2)
# Function to run the agent
def run_agentic_rag(question: str) -> str:
enhanced_question = f"""Using the information contained in your knowledge base, which you can access with the 'retriever' tool,
give a comprehensive answer to the question below.
Respond only to the question asked, response should be concise and relevant to the question.
If you cannot find information, do not give up and try calling your retriever again with different arguments!
Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".
Question:
{question}"""
return agent.run(enhanced_question)
# def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=0.1):
# """
# Returns a language model for HuggingFace inference.
# Parameters:
# - model_id (str): The ID of the HuggingFace model repository.
# - max_new_tokens (int): The maximum number of new tokens to generate.
# - temperature (float): The temperature for sampling from the model.
# Returns:
# - llm (HuggingFaceEndpoint): The language model for HuggingFace inference.
# """
# llm = HuggingFaceEndpoint(
# repo_id=model_id,
# max_new_tokens=max_new_tokens,
# temperature=temperature,
# token = os.getenv("HF_TOKEN")
# )
# return llm
def get_response(chat_history, user_text):
"""
Generates a response from the chatbot model.
Args:
system_message (str): The system message for the conversation.
chat_history (list): The list of previous chat messages.
user_text (str): The user's input text.
model_id (str, optional): The ID of the HuggingFace model to use.
eos_token_id (list, optional): The list of end-of-sentence token IDs.
max_new_tokens (int, optional): The maximum number of new tokens to generate.
get_llm_hf_kws (dict, optional): Additional keyword arguments for the get_llm_hf function.
Returns:
tuple: A tuple containing the generated response and the updated chat history.
"""
# Update the chat history
chat_history.append({'role': 'user', 'content': user_text})
chat_history.append({'role': 'assistant', 'content': run_agentic_rag(user_text)})
return run_agentic_rag(user_text), chat_history
st.set_page_config(page_title="Hi, I am Telto assistant", page_icon="🤗")
st.title("Telto Support")
st.markdown(f"*This is telto assistant. For any guidance on how to use Telto, feel free to ask me.*")
# Initialize session state for avatars
if "avatars" not in st.session_state:
st.session_state.avatars = {'user': None, 'assistant': None}
# Initialize session state for user text input
if 'user_text' not in st.session_state:
st.session_state.user_text = None
if "system_message" not in st.session_state:
st.session_state.system_message = "friendly AI conversing with a human user"
if "starter_message" not in st.session_state:
st.session_state.starter_message = "Hello, there! How can I help you today?"
# Sidebar for settings
with st.sidebar:
st.header("System Settings")
# Avatar Selection
st.markdown("*Select Avatars:*")
col1, col2 = st.columns(2)
with col1:
st.session_state.avatars['assistant'] = st.selectbox(
"AI Avatar", options=["🤗", "💬", "🤖"], index=0
)
with col2:
st.session_state.avatars['user'] = st.selectbox(
"User Avatar", options=["👤", "👱♂️", "👨🏾", "👩", "👧🏾"], index=0
)
# Reset Chat History
reset_history = st.button("Reset Chat History")
# Initialize or reset chat history
if "chat_history" not in st.session_state or reset_history:
st.session_state.chat_history = [{"role": "assistant", "content": st.session_state.starter_message}]
# Chat interface
chat_interface = st.container(border=True)
with chat_interface:
output_container = st.container()
st.session_state.user_text = st.chat_input(placeholder="Enter your text here.")
# Display chat messages
with output_container:
# For every message in the history
for message in st.session_state.chat_history:
# Skip the system message
if message['role'] == 'system':
continue
# Display the chat message using the correct avatar
with st.chat_message(message['role'],
avatar=st.session_state['avatars'][message['role']]):
st.markdown(message['content'])
# When the user enter new text:
if st.session_state.user_text:
# Display the user's new message immediately
with st.chat_message("user",
avatar=st.session_state.avatars['user']):
st.markdown(st.session_state.user_text)
# Display a spinner status bar while waiting for the response
with st.chat_message("assistant",
avatar=st.session_state.avatars['assistant']):
with st.spinner("Thinking..."):
# Call the Inference API with the system_prompt, user text, and history
response, st.session_state.chat_history = get_response(
user_text=st.session_state.user_text,
chat_history=st.session_state.chat_history,
)
st.markdown(response)
|