saritha commited on
Commit
e20e8c6
·
verified ·
1 Parent(s): 73619fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -49
app.py CHANGED
@@ -1,75 +1,80 @@
1
  import os
2
  import gradio as gr
3
  import asyncio
4
- from datetime import datetime
5
  from langchain_core.prompts import PromptTemplate
6
  from langchain_community.document_loaders import PyPDFLoader
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
  import google.generativeai as genai
9
- from langchain.chains.question_answering import load_qa_chain # Import load_qa_chain
10
 
11
- # Initialize an empty list to store chat history and context
12
- chat_history = []
13
- context_history = ""
14
 
15
- async def initialize(file_path, question):
16
- global context_history
17
-
18
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
19
  model = genai.GenerativeModel('gemini-pro')
20
  model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
21
 
22
  # Refined prompt template to encourage precise and concise answers
23
- prompt_template = """Answer the question precisely and concisely using the provided context. Avoid any additional commentary or system messages.
24
  If the answer is not contained in the context, respond with "answer not available in context".
25
-
26
  Context:
27
  {context}
28
 
 
 
 
29
  Question:
30
  {question}
31
 
32
  Answer:
33
  """
34
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
35
-
 
 
 
 
 
36
  if os.path.exists(file_path):
37
  pdf_loader = PyPDFLoader(file_path)
38
  pages = pdf_loader.load_and_split()
39
-
40
  # Extract content from each page and store along with page number
41
  page_contexts = [page.page_content for i, page in enumerate(pages)]
42
  context = "\n".join(page_contexts[:30]) # Using the first 30 pages for context
43
-
44
  # Load the question-answering chain
45
  stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
46
-
47
- # Combine previous context with the new context
48
- combined_context = context_history + "\n" + context
 
49
 
50
  # Get the answer from the model
51
- stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": combined_context})
52
  answer = stuff_answer.get('output_text', '').strip()
53
-
54
  # Identify key sentences or phrases
55
  key_phrases = answer.split(". ") # Split answer into sentences for more precise matching
56
-
57
  # Score each page based on the presence of key phrases
58
  page_scores = [0] * len(pages)
59
  for i, page in enumerate(pages):
60
  for phrase in key_phrases:
61
  if phrase.lower() in page.page_content.lower():
62
  page_scores[i] += 1
63
-
64
  # Determine the top pages based on highest scores
65
  top_pages_with_scores = sorted(enumerate(page_scores), key=lambda x: x[1], reverse=True)
66
  top_pages = [i + 1 for i, score in top_pages_with_scores if score > 0][:2] # Get top 2 pages
67
-
68
  # Generate links for each top page
69
  file_name = os.path.basename(file_path)
70
  page_links = [f"[Page {p}](file://{os.path.abspath(file_path)})" for p in top_pages]
71
  page_links_str = ', '.join(page_links)
72
-
73
  if top_pages:
74
  source_str = f"Top relevant page(s): {page_links_str}"
75
  else:
@@ -77,20 +82,13 @@ async def initialize(file_path, question):
77
 
78
  # Create a clickable link for the document
79
  source_link = f"[Document: {file_name}](file://{os.path.abspath(file_path)})"
80
-
81
- # Save interaction to chat history
82
- timestamp = datetime.now().isoformat()
83
- chat_history.append({
84
- 'timestamp': timestamp,
85
- 'question': question,
86
- 'answer': answer,
87
- 'source': source_str,
88
- 'document_link': source_link
89
- })
90
-
91
- # Update context history
92
- context_history += f"\nQ: {question}\nA: {answer}"
93
-
94
  return f"Answer: {answer}\n{source_str}\n{source_link}"
95
  else:
96
  return "Error: Unable to process the document. Please ensure the PDF file is valid."
@@ -100,36 +98,37 @@ input_file = gr.File(label="Upload PDF File")
100
  input_question = gr.Textbox(label="Ask about the document")
101
  output_text = gr.Textbox(label="Answer and Top Pages", lines=10, max_lines=10)
102
 
103
- def get_chat_history():
104
- history_str = "\n".join([f"Q: {entry['question']}\nA: {entry['answer']}\n{entry['source']}\n{entry['document_link']}\nTimestamp: {entry['timestamp']}\n" for entry in chat_history])
105
- return history_str
 
 
106
 
107
- async def pdf_qa(file, question):
108
  if file is None:
109
  return "Error: No file uploaded. Please upload a PDF document."
110
 
111
- answer = await initialize(file.name, question)
112
  return answer
113
 
114
  # Create Gradio Interfaces
115
  qa_interface = gr.Interface(
116
- fn=pdf_qa,
117
- inputs=[input_file, input_question],
118
  outputs=output_text,
119
  title="PDF Question Answering System",
120
- description="Upload a PDF file and ask questions about the content."
121
  )
122
 
123
  history_interface = gr.Interface(
124
- fn=get_chat_history,
125
- inputs=[],
126
  outputs=gr.Textbox(label="Chat History", lines=20, max_lines=20),
127
  title="Chat History",
128
- description="View the history of interactions."
129
  )
130
 
131
  # Launch both interfaces
132
  qa_interface.launch(share=True)
133
  history_interface.launch(share=True)
134
 
135
-
 
1
  import os
2
  import gradio as gr
3
  import asyncio
 
4
  from langchain_core.prompts import PromptTemplate
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
  import google.generativeai as genai
8
+ from langchain.chains.question_answering import load_qa_chain
9
 
10
+ # Initialize a dictionary to store chat history and context per session
11
+ session_contexts = {}
 
12
 
13
+ async def initialize(file_path, question, session_id):
 
 
14
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
15
  model = genai.GenerativeModel('gemini-pro')
16
  model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
17
 
18
  # Refined prompt template to encourage precise and concise answers
19
+ prompt_template = """You are a helpful assistant. Use the context provided below to answer the question precisely and concisely.
20
  If the answer is not contained in the context, respond with "answer not available in context".
21
+
22
  Context:
23
  {context}
24
 
25
+ Conversation History:
26
+ {history}
27
+
28
  Question:
29
  {question}
30
 
31
  Answer:
32
  """
33
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "history", "question"])
34
+
35
+ # Get or initialize the context and history for the current session
36
+ context_history = session_contexts.get(session_id, {"context": "", "history": ""})
37
+ combined_context = context_history["context"]
38
+ conversation_history = context_history["history"]
39
+
40
  if os.path.exists(file_path):
41
  pdf_loader = PyPDFLoader(file_path)
42
  pages = pdf_loader.load_and_split()
43
+
44
  # Extract content from each page and store along with page number
45
  page_contexts = [page.page_content for i, page in enumerate(pages)]
46
  context = "\n".join(page_contexts[:30]) # Using the first 30 pages for context
47
+
48
  # Load the question-answering chain
49
  stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
50
+
51
+ # Combine previous context and conversation history with the new context
52
+ full_context = combined_context + "\n" + context
53
+ full_history = conversation_history + f"\nQ: {question}\nA: {answer}"
54
 
55
  # Get the answer from the model
56
+ stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": full_context, "history": full_history})
57
  answer = stuff_answer.get('output_text', '').strip()
58
+
59
  # Identify key sentences or phrases
60
  key_phrases = answer.split(". ") # Split answer into sentences for more precise matching
61
+
62
  # Score each page based on the presence of key phrases
63
  page_scores = [0] * len(pages)
64
  for i, page in enumerate(pages):
65
  for phrase in key_phrases:
66
  if phrase.lower() in page.page_content.lower():
67
  page_scores[i] += 1
68
+
69
  # Determine the top pages based on highest scores
70
  top_pages_with_scores = sorted(enumerate(page_scores), key=lambda x: x[1], reverse=True)
71
  top_pages = [i + 1 for i, score in top_pages_with_scores if score > 0][:2] # Get top 2 pages
72
+
73
  # Generate links for each top page
74
  file_name = os.path.basename(file_path)
75
  page_links = [f"[Page {p}](file://{os.path.abspath(file_path)})" for p in top_pages]
76
  page_links_str = ', '.join(page_links)
77
+
78
  if top_pages:
79
  source_str = f"Top relevant page(s): {page_links_str}"
80
  else:
 
82
 
83
  # Create a clickable link for the document
84
  source_link = f"[Document: {file_name}](file://{os.path.abspath(file_path)})"
85
+
86
+ # Update session context with the new question and answer
87
+ session_contexts[session_id] = {
88
+ "context": full_context,
89
+ "history": full_history + f"\nQ: {question}\nA: {answer}"
90
+ }
91
+
 
 
 
 
 
 
 
92
  return f"Answer: {answer}\n{source_str}\n{source_link}"
93
  else:
94
  return "Error: Unable to process the document. Please ensure the PDF file is valid."
 
98
  input_question = gr.Textbox(label="Ask about the document")
99
  output_text = gr.Textbox(label="Answer and Top Pages", lines=10, max_lines=10)
100
 
101
+ def get_chat_history(session_id):
102
+ if session_id in session_contexts:
103
+ return session_contexts[session_id]["history"]
104
+ else:
105
+ return "No history available for this session."
106
 
107
+ async def pdf_qa(file, question, session_id):
108
  if file is None:
109
  return "Error: No file uploaded. Please upload a PDF document."
110
 
111
+ answer = await initialize(file.name, question, session_id)
112
  return answer
113
 
114
  # Create Gradio Interfaces
115
  qa_interface = gr.Interface(
116
+ fn=lambda file, question, session_id: asyncio.run(pdf_qa(file, question, session_id)),
117
+ inputs=[input_file, input_question, gr.Textbox(label="Session ID", placeholder="Enter a session ID to track your conversation")],
118
  outputs=output_text,
119
  title="PDF Question Answering System",
120
+ description="Upload a PDF file and ask questions about the content. Provide a session ID to maintain conversation context."
121
  )
122
 
123
  history_interface = gr.Interface(
124
+ fn=lambda session_id: get_chat_history(session_id),
125
+ inputs=gr.Textbox(label="Session ID", placeholder="Enter a session ID to view chat history"),
126
  outputs=gr.Textbox(label="Chat History", lines=20, max_lines=20),
127
  title="Chat History",
128
+ description="View the history of interactions for a specific session."
129
  )
130
 
131
  # Launch both interfaces
132
  qa_interface.launch(share=True)
133
  history_interface.launch(share=True)
134