Tj commited on
Commit
ea4ce23
·
1 Parent(s): d843704

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -85
app.py CHANGED
@@ -11,6 +11,8 @@ from sklearn.neighbors import NearestNeighbors
11
  def download_pdf(url, output_path):
12
  urllib.request.urlretrieve(url, output_path)
13
 
 
 
14
 
15
  def preprocess(text):
16
  text = text.replace('\n', ' ')
@@ -89,74 +91,56 @@ class SemanticSearch:
89
  return embeddings
90
 
91
 
92
-
93
- def load_recommender(path, start_page=1):
94
  global recommender
95
- texts = pdf_to_text(path, start_page=start_page)
96
- chunks = text_to_chunks(texts, start_page=start_page)
 
97
  recommender.fit(chunks)
98
- return 'Corpus Loaded.'
99
-
100
- def generate_text(openAI_key,prompt, engine="text-davinci-003"):
101
- openai.api_key = openAI_key
102
- completions = openai.Completion.create(
103
- engine=engine,
104
- prompt=prompt,
105
- max_tokens=512,
106
- n=1,
107
- stop=None,
108
- temperature=0.7,
109
- )
110
- message = completions.choices[0].text
111
- return message
112
-
113
- def generate_answer(question,openAI_key):
114
- topn_chunks = recommender(question)
115
- prompt = ""
116
- prompt += 'search results:\n\n'
117
- for c in topn_chunks:
118
- prompt += c + '\n\n'
119
-
120
- prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
121
- "Cite each reference using [ Page Number] notation (every result has this number at the beginning). "\
122
- "Citation should be done at the end of each sentence. If the search results mention multiple subjects "\
123
- "with the same name, create separate answers for each. Only include information found in the results and "\
124
- "don't add any additional information. Make sure the answer is correct and don't output false content. "\
125
- "If the text does not relate to the query, simply state 'Text Not Found in PDF'. Ignore outlier "\
126
- "search results which has nothing to do with the question. Only answer what is asked. The "\
127
- "answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer: "
128
-
129
- prompt += f"Query: {question}\nAnswer:"
130
- answer = generate_text(openAI_key, prompt,"text-davinci-003")
131
- return answer
132
-
133
-
134
- def question_answer(url, file, question,openAI_key):
135
- if openAI_key.strip()=='':
136
- return '[ERROR]: Please enter you Open AI Key. Get your key here : https://platform.openai.com/account/api-keys'
137
- if url.strip() == '' and file == None:
138
- return '[ERROR]: Both URL and PDF is empty. Provide atleast one.'
139
-
140
- if url.strip() != '' and file != None:
141
- return '[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF).'
142
-
143
- if url.strip() != '':
144
- glob_url = url
145
- download_pdf(glob_url, 'corpus.pdf')
146
- load_recommender('corpus.pdf')
147
-
148
- else:
149
- old_file_name = file.name
150
- file_name = file.name
151
- file_name = file_name[:-12] + file_name[-4:]
152
- os.rename(old_file_name, file_name)
153
- load_recommender(file_name)
154
-
155
- if question.strip() == '':
156
- return '[ERROR]: Question field is empty'
157
-
158
- return generate_answer(question,openAI_key)
159
-
160
 
161
  recommender = SemanticSearch()
162
 
@@ -165,24 +149,22 @@ description = """ PDF GPT allows you to chat with your PDF file using Universal
165
 
166
  with gr.Blocks() as demo:
167
 
168
- gr.Markdown(f'<center><h1>{title}</h1></center>')
169
- gr.Markdown(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- with gr.Row():
172
-
173
- with gr.Group():
174
- gr.Markdown(f'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>')
175
- openAI_key=gr.Textbox(label='Enter your OpenAI API key here')
176
- url = gr.Textbox(label='Enter PDF URL here')
177
- gr.Markdown("<center><h4>OR<h4></center>")
178
- file = gr.File(label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf'])
179
- question = gr.Textbox(label='Enter your question here')
180
- btn = gr.Button(value='Submit')
181
- btn.style(full_width=True)
182
-
183
- with gr.Group():
184
- answer = gr.Textbox(label='The answer to your question is :')
185
-
186
- btn.click(question_answer, inputs=[url, file, question,openAI_key], outputs=[answer])
187
- #openai.api_key = os.getenv('Your_Key_Here')
188
  demo.launch()
 
11
  def download_pdf(url, output_path):
12
  urllib.request.urlretrieve(url, output_path)
13
 
14
+ PDF_URL = 'https://www.westlondon.nhs.uk/download_file/view/1459/615'
15
+ OPENAI_API_KEY = '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'
16
 
17
  def preprocess(text):
18
  text = text.replace('\n', ' ')
 
91
  return embeddings
92
 
93
 
94
+ def load_recommender():
 
95
  global recommender
96
+ download_pdf(PDF_URL, 'corpus.pdf')
97
+ texts = pdf_to_text('corpus.pdf', start_page=1)
98
+ chunks = text_to_chunks(texts, start_page=1)
99
  recommender.fit(chunks)
100
+ return '
101
+ def generate_text(prompt, engine="text-davinci-003"):
102
+ openai.api_key = OPENAI_API_KEY
103
+ completions = openai.Completion.create(
104
+ engine=engine,
105
+ prompt=prompt,
106
+ max_tokens=512,
107
+ n=1,
108
+ stop=None,
109
+ temperature=0.7,
110
+ )
111
+ message = completions.choices[0].text
112
+ return message
113
+
114
+ def generate_answer(question):
115
+ topn_chunks = recommender(question)
116
+ prompt = ""
117
+ prompt += 'search results:\n\n'
118
+ for c in topn_chunks:
119
+ prompt += c + '\n\n'
120
+
121
+ swift
122
+
123
+ prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
124
+ "Cite each reference using [ Page Number] notation (every result has this number at the beginning). "\
125
+ "Citation should be done at the end of each sentence. If the search results mention multiple subjects "\
126
+ "with the same name, create separate answers for each. Only include information found in the results and "\
127
+ "don't add any additional information. Make sure the answer is correct and don't output false content. "\
128
+ "If the text does not relate to the query, simply state 'Text Not Found in PDF'. Ignore outlier "\
129
+ "search results which has nothing to do with the question. Only answer what is asked. The "\
130
+ "answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer: "
131
+
132
+ prompt += f"Query: {question}\nAnswer:"
133
+ answer = generate_text(prompt, "text-davinci-003")
134
+ return answer
135
+
136
+ def question_answer(question):
137
+ if OPENAI_API_KEY.strip()=='':
138
+ return '[ERROR]: Please enter your OpenAI API Key. Get your key here : https://platform.openai.com/account/api-keys'
139
+ load_recommender()
140
+ if question.strip() == '':
141
+ return '[ERROR]: Question field is empty'
142
+
143
+ return generate_answer(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  recommender = SemanticSearch()
146
 
 
149
 
150
  with gr.Blocks() as demo:
151
 
152
+ scss
153
+
154
+ gr.Markdown(f'<center><h1>{title}</h1></center>')
155
+ gr.Markdown(description)
156
+
157
+ with gr.Row():
158
+
159
+ with gr.Group():
160
+ openAI_key=gr.Textbox(label='OpenAI API key', default=OPENAI_API_KEY)
161
+ question = gr.Textbox(label='Enter your question here')
162
+ btn = gr.Button(value='Submit')
163
+ btn.style(full_width=True)
164
+
165
+ with gr.Group():
166
+ answer = gr.Textbox(label='The answer to your question is :')
167
+
168
+ btn.click(question_answer, inputs=[question], outputs=[answer])
169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  demo.launch()