Spaces:
Runtime error
Runtime error
File size: 4,960 Bytes
3934742 3c49a0e e119804 3934742 4b3c2a2 3934742 2941c27 3934742 e119804 3c49a0e 4b3c2a2 e119804 3c49a0e e119804 3c49a0e 3934742 4b3c2a2 3934742 4b3c2a2 3934742 4b3c2a2 3934742 4b3c2a2 2941c27 4b3c2a2 2941c27 622c776 2941c27 4b3c2a2 2941c27 4b3c2a2 3934742 4b3c2a2 3934742 79b3458 4b3c2a2 79b3458 4b3c2a2 79b3458 3934742 79b3458 4b3c2a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
from llama_index import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.prompts import ChatPromptTemplate
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index import ServiceContext
from dotenv import load_dotenv
import os
import base64
import tempfile
from pathlib import Path
# Load environment variables
load_dotenv()
# Configure the Llama index settings
llm = HuggingFaceInferenceAPI(
model_name="google/gemini-1.1-7b-it",
tokenizer_name="google/gemini-1.1-7b-it",
context_window=3000,
token=os.getenv("HF_TOKEN"),
max_new_tokens=512,
generate_kwargs={"temperature": 0.1},
)
embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5"
)
# Create a service context
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model
)
# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"
# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)
def displayPDF(file):
base64_pdf = base64.b64encode(file.read()).decode('utf-8')
return f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
def data_ingestion():
documents = SimpleDirectoryReader(DATA_DIR).load_data()
storage_context = StorageContext.from_defaults()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir=PERSIST_DIR)
def handle_query(query):
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context, service_context=service_context)
chat_text_qa_msgs = [
(
"user",
"""You are a Q&A assistant named CHATTO, created by Suriya. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
Context:
{context_str}
Question:
{query_str}
"""
)
]
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
query_engine = index.as_query_engine(text_qa_template=text_qa_template)
answer = query_engine.query(query)
if hasattr(answer, 'response'):
return answer.response
elif isinstance(answer, dict) and 'response' in answer:
return answer['response']
else:
return "Sorry, I couldn't find an answer."
def process_file(file):
if file is None:
return "Please upload a PDF file."
try:
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(file.read())
temp_path = Path(temp_file.name)
# Copy the file to the DATA_DIR
dest_path = Path(DATA_DIR) / file.name
dest_path.parent.mkdir(parents=True, exist_ok=True)
temp_path.replace(dest_path)
# Process the uploaded PDF
data_ingestion()
return f"PDF '{file.name}' processed successfully. You can now ask questions about its content.", displayPDF(file)
except Exception as e:
return f"An error occurred while processing the file: {str(e)}", None
def chat_function(message, history):
response = handle_query(message)
history.append((message, response))
return history, history
with gr.Blocks() as demo:
gr.Markdown("# (PDF) Information and Inference🗞️")
gr.Markdown("Retrieval-Augmented Generation")
with gr.Row():
with gr.Column(scale=1):
file_output = gr.Textbox(label="Upload Status")
file_display = gr.HTML()
upload_button = gr.UploadButton("Upload PDF", file_types=[".pdf"])
with gr.Column(scale=2):
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Ask me anything about the content of the PDF:")
clear = gr.Button("Clear")
upload_button.upload(process_file, upload_button, [file_output, file_display])
msg.submit(chat_function, [msg, chatbot], [chatbot, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.launch()
|