File size: 4,532 Bytes
3934742
2941c27
 
 
 
 
 
 
 
3934742
 
 
 
2941c27
 
3934742
2941c27
 
 
3934742
 
 
 
 
 
 
 
 
2941c27
 
 
 
 
3934742
2941c27
 
 
 
 
 
 
 
 
 
 
 
3934742
 
2941c27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3934742
 
 
 
 
2941c27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3934742
 
 
 
 
 
79b3458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3934742
79b3458
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
from llama_index.core import (
    StorageContext,
    load_index_from_storage,
    VectorStoreIndex,
    SimpleDirectoryReader,
    ChatPromptTemplate,
)
from llama_index.llms.huggingface import HuggingFaceInferencePipeline
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import os
import tempfile
from pathlib import Path
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Configure the Llama index settings
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

def data_ingestion():
    try:
        documents = SimpleDirectoryReader(DATA_DIR).load_data()
        if not documents:
            logger.warning("No documents loaded from the data directory.")
            return False
        storage_context = StorageContext.from_defaults()
        index = VectorStoreIndex.from_documents(documents)
        index.storage_context.persist(persist_dir=PERSIST_DIR)
        return True
    except Exception as e:
        logger.error(f"Error during data ingestion: {str(e)}")
        return False

def handle_query(query):
    try:
        storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
        index = load_index_from_storage(storage_context)
        chat_text_qa_msgs = [
            (
                "user",
                """You are a Q&A assistant named EazyPeazy. Your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
                Context:
                {context_str}
                Question:
                {query_str}
                """
            )
        ]
        text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
        
        query_engine = index.as_query_engine(text_qa_template=text_qa_template)
        answer = query_engine.query(query)
        
        if hasattr(answer, 'response'):
            return answer.response
        elif isinstance(answer, dict) and 'response' in answer:
            return answer['response']
        else:
            return "Sorry, I couldn't find an answer."
    except Exception as e:
        logger.error(f"Error handling query: {str(e)}")
        return "An error occurred while processing your query. Please try again."

def process_file(file):
    if file is None:
        return "Please upload a PDF file."
    
    try:
        temp_dir = tempfile.mkdtemp()
        temp_path = Path(temp_dir) / "uploaded.pdf"
        
        with open(temp_path, "wb") as f:
            f.write(file.read())
        
        # Copy the file to the DATA_DIR
        dest_path = Path(DATA_DIR) / "saved_pdf.pdf"
        dest_path.parent.mkdir(parents=True, exist_ok=True)
        temp_path.replace(dest_path)
        
        # Process the uploaded PDF
        if data_ingestion():
            return "PDF processed successfully. You can now ask questions about its content."
        else:
            return "Failed to process the PDF. Please try uploading again."
    except Exception as e:
        logger.error(f"Error processing file: {str(e)}")
        return f"An error occurred while processing the file: {str(e)}"

def chat_function(message, history):
    response = handle_query(message)
    history.append((message, response))
    return history

with gr.Blocks() as demo:
    gr.Markdown("# (PDF) Information and Inference🗞️")
    gr.Markdown("Retrieval-Augmented Generation")
    
    with gr.Row():
        with gr.Column(scale=1):
            file_output = gr.Textbox(label="Upload Status")
            upload_button = gr.UploadButton("Upload PDF", file_types=[".pdf"])
        
        with gr.Column(scale=2):
            chatbot = gr.Chatbot()
            msg = gr.Textbox(label="Ask me anything about the content of the PDF:")
            clear = gr.Button("Clear")
    
    upload_button.upload(process_file, upload_button, file_output)
    msg.submit(chat_function, [msg, chatbot], chatbot)
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch()