ibrahim313 commited on
Commit
855467f
·
verified ·
1 Parent(s): d451b60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -113
app.py CHANGED
@@ -1,132 +1,81 @@
1
  import os
2
- import base64
3
- import gc
4
- import tempfile
5
-
6
  import gradio as gr
 
 
7
 
8
  from llama_index.core import Settings
9
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
 
10
  from llama_index.llms.cohere import Cohere
11
  from llama_index.embeddings.cohere import CohereEmbedding
12
  from llama_index.postprocessor.cohere_rerank import CohereRerank
13
- from llama_index.core import PromptTemplate
14
-
15
- # Your Cohere API Key
16
- API_KEY = "ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo"
17
-
18
- # Global query engine
19
- query_engine = None
20
 
21
- # Function to reset chat
22
- def reset_chat():
23
- gc.collect()
24
 
25
- # Function to display PDF file
26
- def display_pdf(file):
27
- try:
28
- base64_pdf = base64.b64encode(file.read()).decode("utf-8")
29
- pdf_display = f"""<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf">
30
- </iframe>"""
31
- return pdf_display
32
- except Exception as e:
33
- return f"Error displaying PDF: {e}"
34
 
35
- # Function to process PDF and generate a query engine
36
- def process_pdf(uploaded_file):
37
- global query_engine # Use global to modify the global query_engine variable
38
 
39
- if not uploaded_file:
40
- return None, "No file uploaded. Please upload a PDF file."
 
 
 
41
 
42
- if not uploaded_file.name.lower().endswith(".pdf"):
43
- return None, "Invalid file type. Please upload a PDF file."
44
-
45
- try:
46
- with tempfile.TemporaryDirectory() as temp_dir:
47
- file_path = os.path.join(temp_dir, uploaded_file.name)
48
- with open(file_path, "wb") as f:
49
- f.write(uploaded_file.read())
50
-
51
- # Creating an index over loaded data
52
- loader = SimpleDirectoryReader(
 
 
 
53
  input_dir=temp_dir,
54
  required_exts=[".pdf"],
55
  recursive=True
56
  )
57
- docs = loader.load_data()
58
-
59
- # Setting up LLM & embedding model
60
- llm = Cohere(api_key=API_KEY, model="command")
61
- embed_model = CohereEmbedding(
62
- cohere_api_key=API_KEY,
63
- model_name="embed-english-v3.0",
64
- input_type="search_query",
65
- )
66
-
67
- Settings.embed_model = embed_model
68
- index = VectorStoreIndex.from_documents(docs, show_progress=True)
69
-
70
- # Create a cohere reranker
71
- cohere_rerank = CohereRerank(api_key=API_KEY)
72
-
73
- # Create the query engine
74
- Settings.llm = llm
75
- query_engine = index.as_query_engine(streaming=True, node_postprocessors=[cohere_rerank])
76
-
77
- # Customizing prompt template
78
- qa_prompt_tmpl_str = (
79
- "Context information is below.\n"
80
- "---------------------\n"
81
- "{context_str}\n"
82
- "---------------------\n"
83
- "Given the context information above, I want you to think step by step to answer the query in a crisp manner. "
84
- "If you don't know the answer, say 'I don't know!'.\n"
85
- "Query: {query_str}\n"
86
- "Answer: "
87
- )
88
- qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
89
-
90
- query_engine.update_prompts(
91
- {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
92
- )
93
-
94
- return query_engine, display_pdf(uploaded_file)
95
- except Exception as e:
96
- return None, f"An error occurred during PDF processing: {e}"
97
-
98
- # Function to handle chat queries
99
- def chat_with_pdf(prompt):
100
- if not query_engine:
101
- return "Please upload and process a PDF file first."
102
 
103
- try:
104
- full_response = ""
105
- streaming_response = query_engine.query(prompt)
106
-
107
- for chunk in streaming_response.response_gen:
108
- full_response += chunk
109
-
110
- return full_response
111
- except Exception as e:
112
- return f"An error occurred during the query process: {e}"
113
-
114
- # Gradio Interface
115
- with gr.Blocks() as demo:
116
- gr.Markdown("# 🔍 Searchable Document Chatbot")
117
- gr.Markdown("Upload your PDF document and start asking questions.")
118
-
119
- pdf_file = gr.File(label="Upload your PDF file", file_types=[".pdf"])
120
- pdf_preview = gr.HTML(label="PDF Preview")
121
-
122
- process_button = gr.Button("Process PDF")
123
 
124
- chat_input = gr.Textbox(label="Ask a question")
125
- chat_output = gr.Textbox(label="Chat Response")
126
 
127
- process_button.click(fn=process_pdf, inputs=pdf_file, outputs=pdf_preview)
128
- chat_input.submit(fn=chat_with_pdf, inputs=chat_input, outputs=chat_output)
129
-
130
- gr.Markdown("Made with ❤️ by Muhammad Ibrahim Qasmi")
131
-
132
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import nest_asyncio
 
 
 
3
  import gradio as gr
4
+ from dotenv import load_dotenv
5
+ from IPython.display import Markdown, display
6
 
7
  from llama_index.core import Settings
8
+ from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader
9
+
10
  from llama_index.llms.cohere import Cohere
11
  from llama_index.embeddings.cohere import CohereEmbedding
12
  from llama_index.postprocessor.cohere_rerank import CohereRerank
 
 
 
 
 
 
 
13
 
14
+ # allows nested access to the event loop
15
+ nest_asyncio.apply()
 
16
 
17
+ # put your API key here, find one at: https://dashboard.cohere.com/api-keys
18
+ API_KEY = 'ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo'
 
 
 
 
 
 
 
19
 
20
+ # setup llm & embedding model
21
+ llm = Cohere(api_key=API_KEY, model="command-r-plus")
 
22
 
23
+ embed_model = CohereEmbedding(
24
+ cohere_api_key=API_KEY,
25
+ model_name="embed-english-v3.0",
26
+ input_type="search_query",
27
+ )
28
 
29
+ # Function to load data from uploaded PDF
30
+ def process_pdfs(pdf_files):
31
+ # Create a temporary directory to store the uploaded PDFs
32
+ temp_dir = 'temp_pdf_directory'
33
+ os.makedirs(temp_dir, exist_ok=True)
34
+
35
+ # Save uploaded files to the temporary directory
36
+ for file in pdf_files:
37
+ file_path = os.path.join(temp_dir, file.name)
38
+ with open(file_path, 'wb') as f:
39
+ f.write(file.read())
40
+
41
+ # Load data from the temporary directory
42
+ loader = SimpleDirectoryReader(
43
  input_dir=temp_dir,
44
  required_exts=[".pdf"],
45
  recursive=True
46
  )
47
+ docs = loader.load_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Create an index over loaded data
50
+ Settings.embed_model = embed_model
51
+ index = VectorStoreIndex.from_documents(docs, show_progress=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ # Create a cohere reranker
54
+ cohere_rerank = CohereRerank(api_key=API_KEY)
55
 
56
+ # Create the query engine, where we use a cohere reranker on the fetched nodes
57
+ Settings.llm = llm
58
+ query_engine = index.as_query_engine(node_postprocessors=[cohere_rerank])
59
+
60
+ return index, query_engine
61
+
62
+ # Query function
63
+ def query_pdfs(pdf_files, question):
64
+ index, query_engine = process_pdfs(pdf_files)
65
+ response = query_engine.query(question)
66
+ return str(response)
67
+
68
+ # Create Gradio interface
69
+ iface = gr.Interface(
70
+ fn=query_pdfs,
71
+ inputs=[
72
+ gr.inputs.File(label="Upload PDF Files", type="file", multiple=True),
73
+ gr.inputs.Textbox(label="Ask a Question", placeholder="Enter your question here...")
74
+ ],
75
+ outputs="text",
76
+ title="PDF Query System",
77
+ description="Upload PDF files and ask questions to extract information from them."
78
+ )
79
+
80
+ if __name__ == "__main__":
81
+ iface.launch()