Hamza011 commited on
Commit
196c8fb
·
1 Parent(s): ee415ff
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader,PdfWriter
2
+ import gradio as gr
3
+ from langchain.embeddings import CohereEmbeddings
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain import OpenAI
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+
8
+ import numpy as np
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ import spacy
12
+ nlp = spacy.load('en_core_web_md')
13
+
14
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0)
15
+ embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY)
16
+
17
+
18
+
19
+ def recieve_pdf(filename):
20
+ reader = PdfReader(filename)
21
+ writer = PdfWriter()
22
+
23
+ for page in reader.pages:
24
+ writer.add_page(page)
25
+
26
+
27
+ with open('processed_file.pdf','wb') as f:
28
+ writer.write(f)
29
+
30
+ read = PdfReader('processed_file.pdf')
31
+ extracted_file =[page.extract_text(0) for page in read.pages]
32
+ extracted_text = ''.join(extracted_file)
33
+
34
+ global file
35
+ file = extracted_text
36
+
37
+ summary_prompt_formated = summary_prompt.format(document = extracted_text)
38
+
39
+ return llm(summary_prompt_formated)
40
+
41
+
42
+ def chatbot(query,history):
43
+ similarity_array =[]
44
+ embeded_query = embedding.embed_documents([query])
45
+
46
+ doc = nlp(file)
47
+ sentences_1 = [str(sentence) for sentence in doc.sents]
48
+ embedded_text = embedding.embed_documents(sentences_1)
49
+
50
+
51
+
52
+ similarity_score = cosine_similarity(embeded_query,embedded_text)
53
+ similarity_array.append(similarity_score)
54
+
55
+
56
+
57
+ most_similar_index = np.argmax(similarity_array)
58
+ most_similar_documents = sentences_1[most_similar_index]
59
+
60
+
61
+
62
+ splitter_text = text_splitter.split_text(file)
63
+ recursive_embedded_text = embedding.embed_documents(splitter_text)
64
+
65
+ most_similar_embed = embedding.embed_documents([most_similar_documents])
66
+ final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text)
67
+
68
+ final_similarity_index = np.argmax(final_similarity_score)
69
+ final_document = splitter_text[final_similarity_index]
70
+
71
+ prompt_formated = prompt.format(context = final_document, query = query)
72
+ repsonse = llm(prompt_formated)
73
+
74
+ history.append((query, repsonse))
75
+
76
+
77
+ return '', history
78
+
79
+ summary_template = """ You an article summarizer and have been provided with this file
80
+
81
+ {document}
82
+
83
+ provide a one line summary of the content of the provides file.
84
+
85
+ """
86
+
87
+ summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template)
88
+ template = """ You are a knowledgeable chatbot that gently answers questions.
89
+
90
+ You know the following context information.
91
+
92
+ {context}
93
+
94
+ Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff.
95
+
96
+
97
+ Question: {query}
98
+
99
+ Answer:"""
100
+
101
+ prompt = PromptTemplate(input_variables= ['context', 'query'], template= template)
102
+
103
+ llm = OpenAI(model= 'gpt-3.5-turbo-instruct' , temperature= 0)
104
+
105
+ with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo:
106
+ signal = gr.Markdown('''# Welcome to Chat with Docs
107
+ I am an AI that recieves a document and can answer questions on the content of the document.''')
108
+ inp = gr.File()
109
+ out = gr.Textbox(label= 'Summary')
110
+ inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True)
111
+ signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded')
112
+ chat = gr.Chatbot()
113
+ msg = gr.Textbox(info='input your chat')
114
+
115
+ with gr.Row():
116
+ submit = gr.Button('Send')
117
+ clear = gr.ClearButton([msg,chat])
118
+
119
+ msg.submit(chatbot, [msg, chat], [msg ,chat])
120
+ submit.click(chatbot, [msg, chat], [msg ,chat])
121
+
122
+ feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)')
123
+
124
+
125
+
126
+ demo.launch()
127
+