Spaces:
Sleeping
Sleeping
File size: 3,794 Bytes
196c8fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from PyPDF2 import PdfReader,PdfWriter
import gradio as gr
from langchain.embeddings import CohereEmbeddings
from langchain.prompts import PromptTemplate
from langchain import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import spacy
nlp = spacy.load('en_core_web_md')
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0)
embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY)
def recieve_pdf(filename):
reader = PdfReader(filename)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
with open('processed_file.pdf','wb') as f:
writer.write(f)
read = PdfReader('processed_file.pdf')
extracted_file =[page.extract_text(0) for page in read.pages]
extracted_text = ''.join(extracted_file)
global file
file = extracted_text
summary_prompt_formated = summary_prompt.format(document = extracted_text)
return llm(summary_prompt_formated)
def chatbot(query,history):
similarity_array =[]
embeded_query = embedding.embed_documents([query])
doc = nlp(file)
sentences_1 = [str(sentence) for sentence in doc.sents]
embedded_text = embedding.embed_documents(sentences_1)
similarity_score = cosine_similarity(embeded_query,embedded_text)
similarity_array.append(similarity_score)
most_similar_index = np.argmax(similarity_array)
most_similar_documents = sentences_1[most_similar_index]
splitter_text = text_splitter.split_text(file)
recursive_embedded_text = embedding.embed_documents(splitter_text)
most_similar_embed = embedding.embed_documents([most_similar_documents])
final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text)
final_similarity_index = np.argmax(final_similarity_score)
final_document = splitter_text[final_similarity_index]
prompt_formated = prompt.format(context = final_document, query = query)
repsonse = llm(prompt_formated)
history.append((query, repsonse))
return '', history
summary_template = """ You an article summarizer and have been provided with this file
{document}
provide a one line summary of the content of the provides file.
"""
summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template)
template = """ You are a knowledgeable chatbot that gently answers questions.
You know the following context information.
{context}
Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff.
Question: {query}
Answer:"""
prompt = PromptTemplate(input_variables= ['context', 'query'], template= template)
llm = OpenAI(model= 'gpt-3.5-turbo-instruct' , temperature= 0)
with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo:
signal = gr.Markdown('''# Welcome to Chat with Docs
I am an AI that recieves a document and can answer questions on the content of the document.''')
inp = gr.File()
out = gr.Textbox(label= 'Summary')
inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True)
signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded')
chat = gr.Chatbot()
msg = gr.Textbox(info='input your chat')
with gr.Row():
submit = gr.Button('Send')
clear = gr.ClearButton([msg,chat])
msg.submit(chatbot, [msg, chat], [msg ,chat])
submit.click(chatbot, [msg, chat], [msg ,chat])
feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)')
demo.launch()
|