Spaces:
Running
Running
File size: 3,787 Bytes
bf6d94a 48c5b47 bf6d94a 48c5b47 bf6d94a 169b01a bf6d94a 48c5b47 bf6d94a 169b01a bf6d94a 48c5b47 a5fb48a 48c5b47 bf6d94a 48c5b47 bf6d94a 169b01a bf6d94a 48c5b47 bf6d94a 169b01a bf6d94a 48c5b47 a5fb48a 48c5b47 bf6d94a 48c5b47 bf6d94a 48c5b47 bf6d94a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import openai
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from PyPDF2 import PdfReader
# Function to load and process the PDF document
def load_pdf(file):
# Load the PDF using LangChain's PyPDFLoader
loader = PyPDFLoader(file.name)
documents = loader.load()
return documents
# Summarization function using GPT-4
def summarize_pdf(file, openai_api_key):
# Set the OpenAI API key dynamically
openai.api_key = openai_api_key
# Load and process the PDF
documents = load_pdf(file)
# Create embeddings for the documents
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Use LangChain's FAISS Vector Store to store and search the embeddings
vector_store = FAISS.from_documents(documents, embeddings)
# Create a RetrievalQA chain for summarization
llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key) # Passing API key here
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store.as_retriever()
)
# Query the model for a summary of the document
response = qa_chain.run("Summarize the content of the research paper.")
return response
# Function to handle user queries and provide answers from the document
def query_pdf(file, user_query, openai_api_key):
# Set the OpenAI API key dynamically
openai.api_key = openai_api_key
# Load and process the PDF
documents = load_pdf(file)
# Create embeddings for the documents
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Use LangChain's FAISS Vector Store to store and search the embeddings
vector_store = FAISS.from_documents(documents, embeddings)
# Create a RetrievalQA chain for querying the document
llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key) # Passing API key here
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store.as_retriever()
)
# Query the model for the user query
response = qa_chain.run(user_query)
return response
# Define Gradio interface for the summarization
def create_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("### ChatPDF and Research Paper Summarizer using GPT-4 and LangChain")
# Input field for API Key
with gr.Row():
openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here")
with gr.Tab("Summarize PDF"):
with gr.Row():
pdf_file = gr.File(label="Upload PDF Document")
summarize_btn = gr.Button("Summarize")
summary_output = gr.Textbox(label="Summary", interactive=False)
summarize_btn.click(summarize_pdf, inputs=[pdf_file, openai_api_key_input], outputs=summary_output)
with gr.Tab("Ask Questions"):
with gr.Row():
pdf_file_q = gr.File(label="Upload PDF Document")
user_input = gr.Textbox(label="Enter your question")
answer_output = gr.Textbox(label="Answer", interactive=False)
user_input.submit(query_pdf, inputs=[pdf_file_q, user_input, openai_api_key_input], outputs=answer_output)
user_input.submit(None, None, answer_output) # Clear answer when typing new query
return demo
# Run Gradio app
if __name__ == "__main__":
demo = create_gradio_interface()
demo.launch(debug=True) |