Rajut's picture
Create app.py
cd8aa60 verified
raw
history blame
1.99 kB
from dotenv import load_dotenv
import os
from PyPDF2 import PdfReader
import docx
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI
from langchain.callbacks import get_openai_callback
import gradio as gr
from aiohttp import web
load_dotenv()
os.environ["OPENAI_API_KEY"] = "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"
def read_txt(file_path):
with open(file_path, "r") as file:
text = file.read()
return text
def read_documents_from_directory(directory):
combined_text = ""
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if filename.endswith(".pdf"):
combined_text += read_pdf(file_path)
elif filename.endswith(".docx"):
combined_text += read_word(file_path)
elif filename.endswith(".txt"):
combined_text += read_txt(file_path)
return combined_text
text_file_path = '/content/lawsofpower.txt'
user_query = read_txt(text_file_path)
char_text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000,
chunk_overlap=200, length_function=len)
text_chunks = char_text_splitter.split_text(user_query)
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(text_chunks, embeddings)
llm = OpenAI()
chain = load_qa_chain(llm, chain_type="stuff")
async def chatbot_interface(request):
data = await request.post()
input_text = data.get("input_text", "")
docs = docsearch.similarity_search(input_text)
response = chain.run(input_documents=docs, question=input_text)
return web.Response(text=response)
app = web.Application()
app.router.add_post('/chatbot', chatbot_interface)
if __name__ == "__main__":
web.run_app(app, port=os.getenv("PORT", 8080))