Spaces:
Runtime error
Runtime error
import os | |
import logging | |
from dotenv import load_dotenv | |
import subprocess | |
import google.auth | |
# Force reinstall Gradio | |
subprocess.run(["pip", "install", "--upgrade", "gradio==4.44.1"]) | |
import gradio as gr | |
import threading | |
import time | |
import tempfile | |
from datetime import datetime | |
from typing import Any | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_text_splitters import TokenTextSplitter | |
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline | |
from langchain.retrievers import ContextualCompressionRetriever | |
from langchain_core.runnables import RunnableBranch | |
from langchain_core.callbacks import StdOutCallbackHandler, BaseCallbackHandler | |
from langchain_google_vertexai import HarmBlockThreshold, HarmCategory | |
from langchain_neo4j import Neo4jVector, Neo4jGraph, Neo4jChatMessageHistory | |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
from langchain_core.messages import HumanMessage, AIMessage | |
from langchain_community.chat_message_histories import ChatMessageHistory | |
from langchain_huggingface import HuggingFaceEmbeddings | |
# LangChain chat models | |
from langchain_openai import ChatOpenAI, AzureChatOpenAI | |
from langchain_google_vertexai import ChatVertexAI | |
from langchain_anthropic import ChatAnthropic | |
from langchain_community.chat_models import ChatOllama | |
from langchain.globals import set_debug | |
# ElevenLabs and Cloud Translate | |
from google.cloud import translate | |
from elevenlabs import ElevenLabs, play | |
# Load environment variables | |
load_dotenv() | |
set_debug(True) | |
# process of getting credentials | |
def get_credentials(): | |
"""Retrieve Google Cloud credentials from the environment variable and write them to a temporary file.""" | |
creds_json_str = os.getenv("BOB") # Get JSON credentials stored as a string | |
if creds_json_str is None: | |
raise ValueError("Environment variable 'BOB' not found. Please set it with the JSON credentials.") | |
try: | |
# Create a temporary file to store the credentials | |
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp: | |
temp.write(creds_json_str) # Write the JSON string to the file | |
temp_filename = temp.name # Get the temporary file's name | |
logging.info(f"Temporary credentials file created at: {temp_filename}") | |
return temp_filename | |
except Exception as e: | |
logging.error(f"Error creating temporary credentials file: {e}") | |
raise | |
# Store the temporary file path | |
temp_credentials_file = get_credentials() | |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp_credentials_file | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
project_id = os.getenv("PROJECT_ID") | |
# Neo4j Configuration | |
NEO4J_URI = os.getenv("NEO4J_URI") | |
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME") | |
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD") | |
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE") | |
# Load credentials using google.auth | |
credentials, project_id = google.auth.default() | |
logging.info(f"Loaded credentials for project: {project_id}") | |
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
### Vector graph search | |
VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 40 | |
VECTOR_GRAPH_SEARCH_EMBEDDING_MIN_MATCH = 0.3 | |
VECTOR_GRAPH_SEARCH_EMBEDDING_MAX_MATCH = 0.9 | |
VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE = 20 | |
VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE = 40 | |
# VECTOR_GRAPH_SEARCH_QUERY: Hybrid vector + graph retrieval Cypher query | |
VECTOR_GRAPH_SEARCH_QUERY_PREFIX = """ | |
WITH node as chunk, score | |
// find the document of the chunk | |
MATCH (chunk)-[:PART_OF]->(d:Document) | |
// aggregate chunk-details | |
WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score | |
// fetch entities | |
CALL { WITH chunks | |
UNWIND chunks as chunkScore | |
WITH chunkScore.chunk as chunk | |
""" | |
VECTOR_GRAPH_SEARCH_ENTITY_QUERY = """ | |
OPTIONAL MATCH (chunk)-[:HAS_ENTITY]->(e) | |
WITH e, count(*) AS numChunks | |
ORDER BY numChunks DESC | |
LIMIT {no_of_entites} | |
WITH | |
CASE | |
WHEN e.embedding IS NULL OR ({embedding_match_min} <= vector.similarity.cosine($embedding, e.embedding) AND vector.similarity.cosine($embedding, e.embedding) <= {embedding_match_max}) THEN | |
collect {{ | |
OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,1}}(:!Chunk&!Document&!__Community__) | |
RETURN path LIMIT {entity_limit_minmax_case} | |
}} | |
WHEN e.embedding IS NOT NULL AND vector.similarity.cosine($embedding, e.embedding) > {embedding_match_max} THEN | |
collect {{ | |
OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,2}}(:!Chunk&!Document&!__Community__) | |
RETURN path LIMIT {entity_limit_max_case} | |
}} | |
ELSE | |
collect {{ | |
MATCH path=(e) | |
RETURN path | |
}} | |
END AS paths, e | |
""" | |
VECTOR_GRAPH_SEARCH_QUERY_SUFFIX = """ | |
WITH apoc.coll.toSet(apoc.coll.flatten(collect(DISTINCT paths))) AS paths, | |
collect(DISTINCT e) AS entities | |
// De-duplicate nodes and relationships across chunks | |
RETURN | |
collect { | |
UNWIND paths AS p | |
UNWIND relationships(p) AS r | |
RETURN DISTINCT r | |
} AS rels, | |
collect { | |
UNWIND paths AS p | |
UNWIND nodes(p) AS n | |
RETURN DISTINCT n | |
} AS nodes, | |
entities | |
} | |
// Generate metadata and text components for chunks, nodes, and relationships | |
WITH d, avg_score, | |
[c IN chunks | c.chunk.text] AS texts, | |
[c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, | |
[n IN nodes | elementId(n)] AS entityIds, | |
[r IN rels | elementId(r)] AS relIds, | |
apoc.coll.sort([ | |
n IN nodes | | |
coalesce(apoc.coll.removeAll(labels(n), ['__Entity__'])[0], "") + ":" + | |
coalesce( | |
n.id, | |
n[head([k IN keys(n) WHERE k =~ "(?i)(name|title|id|description)$"])], | |
"" | |
) + | |
(CASE WHEN n.description IS NOT NULL THEN " (" + n.description + ")" ELSE "" END) | |
]) AS nodeTexts, | |
apoc.coll.sort([ | |
r IN rels | | |
coalesce(apoc.coll.removeAll(labels(startNode(r)), ['__Entity__'])[0], "") + ":" + | |
coalesce( | |
startNode(r).id, | |
startNode(r)[head([k IN keys(startNode(r)) WHERE k =~ "(?i)(name|title|id|description)$"])], | |
"" | |
) + " " + type(r) + " " + | |
coalesce(apoc.coll.removeAll(labels(endNode(r)), ['__Entity__'])[0], "") + ":" + | |
coalesce( | |
endNode(r).id, | |
endNode(r)[head([k IN keys(endNode(r)) WHERE k =~ "(?i)(name|title|id|description)$"])], | |
"" | |
) | |
]) AS relTexts, | |
entities | |
// Combine texts into response text | |
WITH d, avg_score, chunkdetails, entityIds, relIds, | |
"Text Content:\n" + apoc.text.join(texts, "\n----\n") + | |
"\n----\nEntities:\n" + apoc.text.join(nodeTexts, "\n") + | |
"\n----\nRelationships:\n" + apoc.text.join(relTexts, "\n") AS text, | |
entities | |
RETURN | |
text, | |
avg_score AS score, | |
{ | |
length: size(text), | |
source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), | |
chunkdetails: chunkdetails, | |
entities : { | |
entityids: entityIds, | |
relationshipids: relIds | |
} | |
} AS metadata | |
""" | |
VECTOR_GRAPH_SEARCH_QUERY = VECTOR_GRAPH_SEARCH_QUERY_PREFIX+ VECTOR_GRAPH_SEARCH_ENTITY_QUERY.format( | |
no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT, | |
embedding_match_min=VECTOR_GRAPH_SEARCH_EMBEDDING_MIN_MATCH, | |
embedding_match_max=VECTOR_GRAPH_SEARCH_EMBEDDING_MAX_MATCH, | |
entity_limit_minmax_case=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE, | |
entity_limit_max_case=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE | |
) + VECTOR_GRAPH_SEARCH_QUERY_SUFFIX | |
graph = Neo4jGraph( | |
url=NEO4J_URI, | |
username=NEO4J_USERNAME, | |
password=NEO4J_PASSWORD, | |
database=NEO4J_DATABASE | |
) | |
CHAT_TOKEN_CUT_OFF = { | |
('openai_gpt_3.5','azure_ai_gpt_35',"gemini_1.0_pro","gemini_1.5_pro", "gemini_1.5_flash","groq-llama3",'anthropic_claude_3_5_sonnet','bedrock_claude_3_5_sonnet', ) : 4, | |
("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai_gpt_4o", "openai_gpt_4o_mini") : 28, | |
("ollama_llama3") : 2 | |
} | |
# Prompt template | |
CHAT_SYSTEM_TEMPLATE = """ | |
You are an AI-powered question-answering agent. Your task is to provide accurate and comprehensive responses to user queries based on the given context, chat history, and available resources. | |
### Response Guidelines: | |
1. **Direct Answers**: Provide clear and thorough answers to the user's queries without headers unless requested. Avoid speculative responses. | |
2. **Utilize History and Context**: Leverage relevant information from previous interactions, the current user input, and the context provided below. | |
3. **No Greetings in Follow-ups**: Start with a greeting in initial interactions. Avoid greetings in subsequent responses unless there's a significant break or the chat restarts. | |
4. **Admit Unknowns**: Clearly state if an answer is unknown. Avoid making unsupported statements. | |
5. **Avoid Hallucination**: Only provide information based on the context provided. Do not invent information. | |
6. **Response Length**: Keep responses concise and relevant. Aim for clarity and completeness within 4-5 sentences unless more detail is requested. | |
7. **Tone and Style**: Maintain a professional and informative tone. Be friendly and approachable. | |
8. **Error Handling**: If a query is ambiguous or unclear, ask for clarification rather than providing a potentially incorrect answer. | |
9. **Fallback Options**: If the required information is not available in the provided context, provide a polite and helpful response. Example: "I don't have that information right now." or "I'm sorry, but I don't have that information. Is there something else I can help with?" | |
10. **Context Availability**: If the context is empty, do not provide answers based solely on internal knowledge. Instead, respond appropriately by indicating the lack of information. | |
**IMPORTANT** : DO NOT ANSWER FROM YOUR KNOWLEDGE BASE USE THE BELOW CONTEXT | |
### Context: | |
<context> | |
{context} | |
</context> | |
### Example Responses: | |
User: Hi | |
AI Response: 'Hello there! How can I assist you today?' | |
User: "What is Langchain?" | |
AI Response: "Langchain is a framework that enables the development of applications powered by large language models, such as chatbots. It simplifies the integration of language models into various applications by providing useful tools and components." | |
User: "Can you explain how to use memory management in Langchain?" | |
AI Response: "Langchain's memory management involves utilizing built-in mechanisms to manage conversational context effectively. It ensures that the conversation remains coherent and relevant by maintaining the history of interactions and using it to inform responses." | |
User: "I need help with PyCaret's classification model." | |
AI Response: "PyCaret simplifies the process of building and deploying machine learning models. For classification tasks, you can use PyCaret's setup function to prepare your data. After setup, you can compare multiple models to find the best one, and then fine-tune it for better performance." | |
User: "What can you tell me about the latest realtime trends in AI?" | |
AI Response: "I don't have that information right now. Is there something else I can help with?" | |
Note: This system does not generate answers based solely on internal knowledge. It answers from the information provided in the user's current and previous inputs, and from the context. | |
""" | |
QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." | |
prompt_template = ChatPromptTemplate.from_messages([ | |
("system", CHAT_SYSTEM_TEMPLATE), | |
MessagesPlaceholder(variable_name="messages"), | |
("human", "User question: {input}") | |
]) | |
class SessionChatHistory: | |
history_dict = {} | |
def get_chat_history(cls, session_id): | |
"""Retrieve or create chat message history for a given session ID.""" | |
if session_id not in cls.history_dict: | |
logging.info(f"Creating new ChatMessageHistory Local for session ID: {session_id}") | |
cls.history_dict[session_id] = ChatMessageHistory() | |
else: | |
logging.info(f"Retrieved existing ChatMessageHistory Local for session ID: {session_id}") | |
return cls.history_dict[session_id] | |
class CustomCallback(BaseCallbackHandler): | |
def __init__(self): | |
self.transformed_question = None | |
def on_llm_end( | |
self,response, **kwargs: Any | |
) -> None: | |
logging.info("question transformed") | |
self.transformed_question = response.generations[0][0].text.strip() | |
def get_history_by_session_id(session_id): | |
try: | |
return SessionChatHistory.get_chat_history(session_id) | |
except Exception as e: | |
logging.error(f"Failed to get history for session ID '{session_id}': {e}") | |
raise | |
# LLM selector supporting OpenAI, Gemini, Claude | |
def get_llm(model: str): | |
"""Retrieve the specified language model based on the model name.""" | |
model = model.lower().strip() | |
env_key = f"LLM_MODEL_CONFIG_{model.replace('-', '_').replace('.', '_')}" # Replace both dashes and periods | |
env_value = os.environ.get(env_key.upper()) | |
if not env_value: | |
err = f"Environment variable '{env_key}' is not defined as per format or missing" | |
logging.error(err) | |
raise Exception(err) | |
logging.info("Model: {}".format(env_key)) | |
try: | |
if "gemini" in model: | |
model_name = env_value | |
credentials, project_id = google.auth.default() | |
llm = ChatVertexAI( | |
model_name=model_name, | |
credentials=credentials, | |
project=project_id, | |
temperature=0, | |
safety_settings={ | |
HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, | |
}, | |
) | |
elif "openai" in model: | |
model_name, api_key = env_value.split(",") | |
llm = ChatOpenAI(api_key=api_key, model=model_name, temperature=0) | |
elif "claude" in model or "anthropic" in model: | |
model_name, api_key = env_value.split(",") | |
llm = ChatAnthropic(api_key=api_key, model=model_name, temperature=0) | |
else: | |
raise ValueError(f"Unsupported model type for: {model}") | |
except Exception as e: | |
err = f"Error while creating LLM '{model}': {str(e)}" | |
logging.error(err) | |
raise Exception(err) | |
logging.info(f"Model created - Model Version: {model_name}") | |
return llm, model_name | |
def summarize_and_log(history, stored_messages, llm): | |
logging.info("Starting summarization in a separate thread.") | |
if not stored_messages: | |
logging.info("No messages to summarize.") | |
return False | |
try: | |
start_time = time.time() | |
summarization_prompt = ChatPromptTemplate.from_messages( | |
[ | |
MessagesPlaceholder(variable_name="chat_history"), | |
( | |
"human", | |
"Summarize the above chat messages into a concise message, focusing on key points and relevant details that could be useful for future conversations. Exclude all introductions and extraneous information." | |
), | |
] | |
) | |
summarization_chain = summarization_prompt | llm | |
summary_message = summarization_chain.invoke({"chat_history": stored_messages}) | |
with threading.Lock(): | |
history.clear() | |
history.add_user_message("Our current conversation summary till now") | |
history.add_message(summary_message) | |
history_summarized_time = time.time() - start_time | |
logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") | |
return True | |
except Exception as e: | |
logging.error(f"An error occurred while summarizing messages: {e}", exc_info=True) | |
return False | |
def get_total_tokens(ai_response, llm): | |
try: | |
if isinstance(llm, (ChatOpenAI, AzureChatOpenAI)): | |
total_tokens = ai_response.response_metadata.get('token_usage', {}).get('total_tokens', 0) | |
elif isinstance(llm, ChatVertexAI): | |
total_tokens = ai_response.response_metadata.get('usage_metadata', {}).get('prompt_token_count', 0) | |
elif isinstance(llm, ChatAnthropic): | |
input_tokens = int(ai_response.response_metadata.get('usage', {}).get('input_tokens', 0)) | |
output_tokens = int(ai_response.response_metadata.get('usage', {}).get('output_tokens', 0)) | |
total_tokens = input_tokens + output_tokens | |
elif isinstance(llm, ChatOllama): | |
total_tokens = ai_response.response_metadata.get("prompt_eval_count", 0) | |
else: | |
logging.warning(f"Unrecognized language model: {type(llm)}. Returning 0 tokens.") | |
total_tokens = 0 | |
except Exception as e: | |
logging.error(f"Error retrieving total tokens: {e}") | |
total_tokens = 0 | |
return total_tokens | |
def get_sources_and_chunks(sources_used, docs): | |
chunkdetails_list = [] | |
sources_used_set = set(sources_used) | |
seen_ids_and_scores = set() | |
for doc in docs: | |
try: | |
source = doc.metadata.get("source") | |
chunkdetails = doc.metadata.get("chunkdetails", []) | |
if source in sources_used_set: | |
for chunkdetail in chunkdetails: | |
id = chunkdetail.get("id") | |
score = round(chunkdetail.get("score", 0), 4) | |
id_and_score = (id, score) | |
if id_and_score not in seen_ids_and_scores: | |
seen_ids_and_scores.add(id_and_score) | |
chunkdetails_list.append({**chunkdetail, "score": score}) | |
except Exception as e: | |
logging.error(f"Error processing document: {e}") | |
result = { | |
'sources': sources_used, | |
'chunkdetails': chunkdetails_list, | |
} | |
return result | |
def get_rag_chain(llm, system_template=CHAT_SYSTEM_TEMPLATE): | |
try: | |
question_answering_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_template), | |
MessagesPlaceholder(variable_name="messages"), | |
( | |
"human", | |
"User question: {input}" | |
), | |
] | |
) | |
question_answering_chain = question_answering_prompt | llm | |
return question_answering_chain | |
except Exception as e: | |
logging.error(f"Error creating RAG chain: {e}") | |
raise | |
def format_documents(documents, model): | |
prompt_token_cutoff = 4 | |
for model_names, value in CHAT_TOKEN_CUT_OFF.items(): | |
if model in model_names: | |
prompt_token_cutoff = value | |
break | |
sorted_documents = sorted(documents, key=lambda doc: doc.state.get("query_similarity_score", 0), reverse=True) | |
sorted_documents = sorted_documents[:prompt_token_cutoff] | |
formatted_docs = list() | |
sources = set() | |
entities = dict() | |
global_communities = list() | |
for doc in sorted_documents: | |
try: | |
source = doc.metadata.get('source', "unknown") | |
sources.add(source) | |
if 'entities' in doc.metadata: | |
if 'entityids' in doc.metadata['entities']: | |
entities.setdefault('entityids', set()).update(doc.metadata['entities']['entityids']) | |
if 'relationshipids' in doc.metadata['entities']: | |
entities.setdefault('relationshipids', set()).update(doc.metadata['entities']['relationshipids']) | |
if 'communitydetails' in doc.metadata: | |
existing_ids = {entry['id'] for entry in global_communities} | |
new_entries = [entry for entry in doc.metadata["communitydetails"] if entry['id'] not in existing_ids] | |
global_communities.extend(new_entries) | |
formatted_doc = ( | |
"Document start\n" | |
f"This Document belongs to the source {source}\n" | |
f"Content: {doc.page_content}\n" | |
"Document end\n" | |
) | |
formatted_docs.append(formatted_doc) | |
except Exception as e: | |
logging.error(f"Error formatting document: {e}") | |
return "\n\n".join(formatted_docs), sources,entities,global_communities | |
def process_documents(docs, question, messages, llm, model): | |
start_time = time.time() | |
try: | |
formatted_docs, sources, entitydetails, communities = format_documents(docs, model) | |
rag_chain = get_rag_chain(llm=llm) | |
ai_response = rag_chain.invoke({ | |
"messages": messages[:-1], | |
"context": formatted_docs, | |
"input": question | |
}) | |
result = {'sources': list(), 'nodedetails': dict(), 'entities': dict()} | |
node_details = {"chunkdetails":list(),"entitydetails":list(),"communitydetails":list()} | |
entities = {'entityids':list(),"relationshipids":list()} | |
sources_and_chunks = get_sources_and_chunks(sources, docs) | |
result['sources'] = sources_and_chunks['sources'] | |
node_details["chunkdetails"] = sources_and_chunks["chunkdetails"] | |
entities.update(entitydetails) | |
result["nodedetails"] = node_details | |
result["entities"] = entities | |
content = ai_response.content | |
total_tokens = get_total_tokens(ai_response, llm) | |
predict_time = time.time() - start_time | |
logging.info(f"Final response predicted in {predict_time:.2f} seconds") | |
except Exception as e: | |
logging.error(f"Error processing documents: {e}") | |
raise | |
return content, result, total_tokens, formatted_docs | |
def retrieve_documents(doc_retriever, messages): | |
start_time = time.time() | |
try: | |
handler = CustomCallback() | |
docs = doc_retriever.invoke({"messages": messages},{"callbacks":[handler]}) | |
transformed_question = handler.transformed_question | |
if transformed_question: | |
logging.info(f"Transformed question : {transformed_question}") | |
doc_retrieval_time = time.time() - start_time | |
logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") | |
except Exception as e: | |
error_message = f"Error retrieving documents: {str(e)}" | |
logging.error(error_message) | |
docs = None | |
transformed_question = None | |
return docs,transformed_question | |
def create_document_retriever_chain(llm, retriever): | |
try: | |
logging.info("Starting to create document retriever chain") | |
query_transform_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", QUESTION_TRANSFORM_TEMPLATE), | |
MessagesPlaceholder(variable_name="messages") | |
] | |
) | |
output_parser = StrOutputParser() | |
splitter = TokenTextSplitter(chunk_size=5000, chunk_overlap=100) | |
embeddings_filter = EmbeddingsFilter( | |
embeddings=embedding_function, | |
similarity_threshold=0.10 | |
) | |
pipeline_compressor = DocumentCompressorPipeline( | |
transformers=[splitter, embeddings_filter] | |
) | |
compression_retriever = ContextualCompressionRetriever( | |
base_compressor=pipeline_compressor, base_retriever=retriever | |
) | |
query_transforming_retriever_chain = RunnableBranch( | |
( | |
lambda x: len(x.get("messages", [])) == 1, | |
(lambda x: x["messages"][-1].content) | compression_retriever, | |
), | |
query_transform_prompt | llm | output_parser | compression_retriever, | |
).with_config(run_name="chat_retriever_chain") | |
logging.info("Successfully created document retriever chain") | |
return query_transforming_retriever_chain | |
except Exception as e: | |
logging.error(f"Error creating document retriever chain: {e}", exc_info=True) | |
raise | |
def initialize_neo4j_vector(graph): | |
try: | |
retrieval_query = VECTOR_GRAPH_SEARCH_QUERY | |
index_name = "vector" | |
keyword_index = "keyword" | |
node_label = "Chunk" | |
embedding_node_property = "embedding" | |
text_node_properties = ["text"] | |
if not retrieval_query or not index_name: | |
raise ValueError("Required settings 'retrieval_query' or 'index_name' are missing.") | |
if keyword_index: | |
neo_db = Neo4jVector.from_existing_graph( | |
embedding=embedding_function, | |
index_name=index_name, | |
retrieval_query=retrieval_query, | |
graph=graph, | |
search_type="hybrid", | |
node_label=node_label, | |
embedding_node_property=embedding_node_property, | |
text_node_properties=text_node_properties, | |
keyword_index_name=keyword_index | |
) | |
logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'") | |
else: | |
neo_db = Neo4jVector.from_existing_graph( | |
embedding=embedding_function, | |
index_name=index_name, | |
retrieval_query=retrieval_query, | |
graph=graph, | |
node_label=node_label, | |
embedding_node_property=embedding_node_property, | |
text_node_properties=text_node_properties | |
) | |
logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") | |
except Exception as e: | |
index_name = "vector" | |
logging.error(f"Error retrieving Neo4jVector index {index_name} : {e}") | |
raise | |
return neo_db | |
def create_retriever(neo_db, document_names,search_k, score_threshold,ef_ratio): | |
if document_names and "False": | |
retriever = neo_db.as_retriever( | |
search_type="similarity_score_threshold", | |
search_kwargs={ | |
'k': search_k, | |
'effective_search_ratio': ef_ratio, | |
'score_threshold': score_threshold, | |
'filter': {'fileName': {'$in': document_names}} | |
} | |
) | |
logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold} for documents {document_names}") | |
else: | |
retriever = neo_db.as_retriever( | |
search_type="similarity_score_threshold", | |
search_kwargs={'k': search_k,'effective_search_ratio': ef_ratio, 'score_threshold': score_threshold} | |
) | |
logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold}") | |
return retriever | |
def get_neo4j_retriever(graph, document_names, score_threshold=0.5): | |
try: | |
neo_db = initialize_neo4j_vector(graph) | |
# document_names= list(map(str.strip, json.loads(document_names))) | |
search_k = 5 | |
ef_ratio = int(os.getenv("EFFECTIVE_SEARCH_RATIO", "2")) if os.getenv("EFFECTIVE_SEARCH_RATIO", "2").isdigit() else 2 | |
retriever = create_retriever(neo_db, document_names, search_k, score_threshold,ef_ratio) | |
return retriever | |
except Exception as e: | |
index_name = "vector" | |
logging.error(f"Error retrieving Neo4jVector index {index_name} or creating retriever: {e}") | |
raise Exception(f"An error occurred while retrieving the Neo4jVector index or creating the retriever. Please drop and create a new vector index '{index_name}': {e}") from e | |
def setup_chat(model, graph, document_names): | |
start_time = time.time() | |
try: | |
if model == "diffbot": | |
model = os.getenv('DEFAULT_DIFFBOT_CHAT_MODEL') | |
llm, model_name = get_llm(model=model) | |
logging.info(f"Model called in chat: {model} (version: {model_name})") | |
retriever = get_neo4j_retriever(graph=graph, document_names=document_names) | |
doc_retriever = create_document_retriever_chain(llm, retriever) | |
chat_setup_time = time.time() - start_time | |
logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") | |
except Exception as e: | |
logging.error(f"Error during chat setup: {e}", exc_info=True) | |
raise | |
return llm, doc_retriever, model_name | |
def create_neo4j_chat_message_history(graph, session_id, write_access=True): | |
""" | |
Creates and returns a Neo4jChatMessageHistory instance. | |
""" | |
try: | |
if write_access: | |
history = Neo4jChatMessageHistory( | |
graph=graph, | |
session_id=session_id | |
) | |
return history | |
history = get_history_by_session_id(session_id) | |
return history | |
except Exception as e: | |
logging.error(f"Error creating Neo4jChatMessageHistory: {e}") | |
raise | |
# Final response logic | |
def process_chat_response(messages, history, question, model, graph, document_names): | |
try: | |
llm, doc_retriever, model_version = setup_chat(model, graph, document_names) | |
docs,transformed_question = retrieve_documents(doc_retriever, messages) | |
if docs: | |
content, result, total_tokens,formatted_docs = process_documents(docs, question, messages, llm, model) | |
else: | |
content = "I couldn't find any relevant documents to answer your question." | |
result = {"sources": list(), "nodedetails": list(), "entities": list()} | |
total_tokens = 0 | |
formatted_docs = "" | |
ai_response = AIMessage(content=content) | |
messages.append(ai_response) | |
summarization_thread = threading.Thread(target=summarize_and_log, args=(history, messages, llm)) | |
summarization_thread.start() | |
logging.info("Summarization thread started.") | |
# summarize_and_log(history, messages, llm) | |
metric_details = {"question":question,"contexts":formatted_docs,"answer":content} | |
return { | |
"session_id": "", | |
"message": content, | |
"info": { | |
# "metrics" : metrics, | |
"sources": result["sources"], | |
"model": model_version, | |
"nodedetails": result["nodedetails"], | |
"total_tokens": total_tokens, | |
"response_time": 0, | |
"entities": result["entities"], | |
"metric_details": metric_details, | |
}, | |
"user": "chatbot" | |
} | |
except Exception as e: | |
logging.exception(f"Error processing chat response at {datetime.now()}: {str(e)}") | |
return { | |
"session_id": "", | |
"message": "Something went wrong", | |
"info": { | |
"metrics" : [], | |
"sources": [], | |
"nodedetails": [], | |
"total_tokens": 0, | |
"response_time": 0, | |
"error": f"{type(e).__name__}: {str(e)}", | |
"entities": [], | |
"metric_details": {}, | |
}, | |
"user": "chatbot" | |
} | |
def handle_chat(question, history, llm): | |
# Check if the LLM model is selected | |
if not llm: | |
return history + [{"role": "assistant", "content": "Please select your AI Sage (LLM model) to proceed."}] | |
# Create or retrieve the chat history from Neo4j | |
neo4j_history = create_neo4j_chat_message_history(graph, session_id=1, write_access=True) | |
messages = neo4j_history.messages | |
# Append the current user question to the history | |
messages.append(HumanMessage(content=question)) | |
history.append({"role": "user", "content": question}) | |
# Call the process_chat_response function with the updated parameters | |
response = process_chat_response(messages, neo4j_history, question, llm, graph, document_names=[]) | |
# Extract the assistant's response text | |
assistant_response = response.get("message", "I couldn't process your request.") | |
# Append the assistant's response to the history | |
history.append({"role": "assistant", "content": assistant_response}) | |
# Return the updated history | |
return history | |
# Define your custom CSS | |
custom_css = """ | |
/* Custom CSS for the chat interface */ | |
.gradio-container { | |
# background: #f0f0f0; /* Change background color */ | |
border: 0 | |
border-radius: 15px; /* Add border radius */ | |
} | |
.primary.svelte-cmf5ev{ | |
background: linear-gradient(90deg, #9848FC 0%, #DC8855 100%); | |
# background-clip: text; | |
# -webkit-background-clip: text; | |
# -webkit-text-fill-color: transparent; | |
} | |
.v-application .secondary{ | |
background-color: #EEEEEE !important | |
} | |
""" | |
# Add the "Listen this in Hindi" button logic | |
def listen_in_hindi(response_text): | |
try: | |
if not response_text: | |
raise ValueError("No response text available to translate.") | |
# Step 1: Translate to Hindi | |
project_id = os.getenv("PROJECT_ID") | |
if not project_id: | |
raise ValueError("PROJECT_ID not set in environment.") | |
client = translate.TranslationServiceClient() | |
parent = f"projects/{project_id}/locations/global" | |
response = client.translate_text( | |
parent=parent, | |
contents=[response_text], | |
mime_type="text/plain", | |
source_language_code="en-US", | |
target_language_code="hi", | |
) | |
translated_text = response.translations[0].translated_text | |
# Step 2: Generate audio with ElevenLabs | |
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") | |
if not ELEVENLABS_API_KEY: | |
raise ValueError("ELEVENLABS_API_KEY not set") | |
elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY) | |
audio_generator = elevenlabs_client.text_to_speech.convert( | |
text=translated_text, | |
voice_id="MF4J4IDTRo0AxOO4dpFR", | |
model_id="eleven_multilingual_v2", | |
output_format="mp3_44100_128", | |
) | |
audio_bytes = b"".join(audio_generator) | |
# Step 3: Save to a temp file and return path | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: | |
f.write(audio_bytes) | |
audio_path = f.name | |
return audio_path, "Audio in Hindi generated successfully!" | |
except Exception as e: | |
logging.error(f"Error in 'listen_in_hindi': {e}") | |
return None, f"Error: {e}" | |
def get_last_bot_response(chat_history): | |
if not chat_history or not isinstance(chat_history, list): | |
print("Chat history is empty or invalid.") | |
return None | |
# Reverse iterate to find the last assistant message | |
for msg in reversed(chat_history): | |
if isinstance(msg, dict) and msg.get("role") == "assistant": | |
print(f"Last assistant response: {msg.get('content')}") | |
return msg.get("content") | |
print("No assistant response found.") | |
return None | |
with gr.Blocks(css=custom_css, theme="soft") as demo: | |
# Title and description | |
gr.Markdown( | |
""" | |
# Mahabharata AI Sage | |
Step into the epic world of the Mahabharata! Ask questions, explore characters, unravel mysteries, | |
and gain insights from the vast knowledge stored in the database. Let the wisdom of AI guide you! | |
""" | |
) | |
# Dropdown for LLM selection | |
llm_dropdown = gr.Dropdown( | |
choices=["openai-gpt-4o", "gemini-2.5-pro-experimental", "gemini-2.0-pro", "gemini-1.5-pro", "gemini-1.5-flash", "claude"], | |
label="Select Your AI Sage", | |
value=None, | |
interactive=True, | |
key="llm", | |
info="Choose the AI model to guide your journey through the Mahabharata." | |
) | |
# Textbox for user questions | |
question_textbox = gr.Textbox( | |
label="Ask Mahabharata AI Sage", | |
placeholder="Type your query about the Mahabharata here..." | |
) | |
# Chat interface | |
chatbot = gr.Chatbot(type="messages", height=450, label="Mahabharata Chat") | |
# Examples component | |
examples = gr.Examples( | |
examples=[ | |
["Why did the Mahabharata war happen?"], | |
["Who killed Karna, and why?"], | |
["Why did the Pandavas have to go live in the forest for 12 years?"], | |
["Who was the wife of all five Pandavas, and how did that marriage come to be?"], | |
["What was the role of Krishna during the Kurukshetra war? Did he fight?"], | |
["Describe the relationship between Karna and Kunti. How did it affect the war?"], | |
["Who killed Ghatotakach?"], | |
["Who were the siblings of Karna?"], | |
["Why did Bhishma take a vow of celibacy, and how did that impact the throne of Hastinapur?"], | |
["Who killed Dronacharya and how was he tricked into giving up his weapons?"] | |
], | |
inputs=question_textbox, # Link examples to the textbox component | |
label="Example Questions" | |
) | |
# Submit button | |
submit_button = gr.Button("Submit") | |
# "Listen this in Hindi" button | |
listen_button = gr.Button("Listen to this in Hindi", interactive=False) | |
audio_output = gr.Audio(label="Hindi Audio", type="filepath") | |
status_output = gr.Textbox(label="Status") | |
# Define the interaction logic | |
submit_button.click( | |
fn=handle_chat, | |
inputs=[question_textbox, chatbot, llm_dropdown], # Pass the question, chat history, and LLM model | |
outputs=chatbot # Update the chatbot with the new chat history | |
).then( | |
fn=lambda x: gr.update(interactive=True), | |
inputs=None, | |
outputs=listen_button | |
) | |
# Define the interaction logic for the "Listen this in Hindi" button | |
listen_button.click( | |
fn=lambda chat_history: listen_in_hindi(get_last_bot_response(chat_history)), | |
inputs=chatbot, | |
outputs=[audio_output, status_output] | |
) | |
question_textbox.change( | |
fn=lambda: gr.update(interactive=False), | |
inputs=None, | |
outputs=listen_button | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
demo.launch() |