|
import gradio as gr |
|
import os |
|
from langchain.chains import RetrievalQA |
|
from langchain.llms import OpenAI |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.document_loaders import DirectoryLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.vectorstores import Chroma |
|
|
|
|
|
|
|
dir_path = "./docs" |
|
|
|
|
|
os.makedirs(dir_path, exist_ok=True) |
|
|
|
|
|
print(f"New directory created at {dir_path}") |
|
|
|
def qa_system(pdf_file, openai_key, prompt, chain_type, k): |
|
os.environ["OPENAI_API_KEY"] = openai_key |
|
|
|
|
|
|
|
loader = DirectoryLoader(dir_path, glob="**/*.pdf") |
|
documents = loader.load() |
|
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) |
|
texts = text_splitter.split_documents(documents) |
|
|
|
|
|
embeddings = OpenAIEmbeddings() |
|
|
|
|
|
db = Chroma.from_documents(texts, embeddings) |
|
|
|
|
|
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k}) |
|
|
|
|
|
qa = RetrievalQA.from_chain_type( |
|
llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True) |
|
|
|
|
|
result = qa({"query": prompt}) |
|
return result['result'], [doc.page_content for doc in result["source_documents"]] |
|
|
|
|
|
input_file = gr.inputs.File(label="PDF File") |
|
openai_key = gr.inputs.Textbox(label="OpenAI API Key", type="password") |
|
prompt = gr.inputs.Textbox(label="Question Prompt") |
|
chain_type = gr.inputs.Radio(['stuff', 'map_reduce', "refine", "map_rerank"], label="Chain Type") |
|
k = gr.inputs.Slider(minimum=1, maximum=5, default=1, label="Number of Relevant Chunks") |
|
|
|
output_text = gr.outputs.Textbox(label="Answer") |
|
output_docs = gr.outputs.Textbox(label="Relevant Source Text") |
|
|
|
gr.Interface(qa_system, inputs=[input_file, openai_key, prompt, chain_type, k], outputs=[output_text, output_docs], |
|
title="Question Answering with PDF File and OpenAI", |
|
description="Upload a PDF file, enter your OpenAI API key, type a question prompt, select a chain type, and choose the number of relevant chunks to use for the answer.").launch(debug = True) |
|
|
|
|