tonic commited on
Commit
580dec9
·
1 Parent(s): 22e8191

Update app.py

Browse files
Files changed (1) hide show
  1. backend/app.py +43 -11
backend/app.py CHANGED
@@ -118,19 +118,30 @@ vectorstore.embedding = CohereEmbeddings(model="embed-multilingual-v2.0", cohere
118
  # Initialize Cohere client
119
  co = cohere.Client(api_key=cohere_api_key)
120
 
121
- def embed_pdf(file, filename, collection_name):
122
- # Check if the input is a filepath (str) or binary (bytes)
123
- if isinstance(file, str): # filepath
124
- file_path = file
125
- with open(file_path, 'rb') as f:
126
- file_content = f.read()
127
- elif isinstance(file, bytes): # binary
 
 
 
 
 
 
 
 
 
 
128
  file_content = file
129
  file_path = os.path.join('./', filename)
130
  with open(file_path, 'wb') as f:
131
  f.write(file_content)
132
  else:
133
- return {"error": "Invalid file format"}
 
134
 
135
  # Checking filetype for document parsing
136
  mime_type = mimetypes.guess_type(file_path)[0]
@@ -206,15 +217,36 @@ def retrieve_info(query):
206
 
207
  return final_response.choices[0].text
208
 
209
- def combined_interface(query, file, filename, collection_name):
210
  if query:
211
  article_info = retrieve_info(query)
212
  return article_info
213
- elif file is not None and filename and collection_name:
214
- return embed_pdf(file, filename, collection_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  else:
216
  return "Please enter a query or upload a PDF file and specify a collection name."
217
 
 
218
  iface = gr.Interface(
219
  fn=combined_interface,
220
  inputs=[
 
118
  # Initialize Cohere client
119
  co = cohere.Client(api_key=cohere_api_key)
120
 
121
+ def embed_pdf(file, filename, collection_name, file_type):
122
+ # Check the file type and handle accordingly
123
+ if file_type == "URL":
124
+ # Download the file from the URL
125
+ try:
126
+ context = ssl._create_unverified_context()
127
+ with urllib.request.urlopen(file, context=context) as response, open(filename, 'wb') as out_file:
128
+ data = response.read()
129
+ out_file.write(data)
130
+ file_path = filename
131
+ except Exception as e:
132
+ return {"error": f"Error downloading file from URL: {e}"}
133
+ elif file_type == "Binary":
134
+ # Handle binary file
135
+ if isinstance(file, str):
136
+ # Convert string to bytes if necessary
137
+ file = file.encode()
138
  file_content = file
139
  file_path = os.path.join('./', filename)
140
  with open(file_path, 'wb') as f:
141
  f.write(file_content)
142
  else:
143
+ return {"error": "Invalid file type"}
144
+
145
 
146
  # Checking filetype for document parsing
147
  mime_type = mimetypes.guess_type(file_path)[0]
 
217
 
218
  return final_response.choices[0].text
219
 
220
+ def combined_interface(query, file, collection_name):
221
  if query:
222
  article_info = retrieve_info(query)
223
  return article_info
224
+ elif file is not None and collection_name:
225
+ filename = file[1] # Extract filename
226
+ file_content = file[0] # Extract file content
227
+
228
+ # Check if file_content is a URL or binary data
229
+ if isinstance(file_content, str) and file_content.startswith("http"):
230
+ file_type = "URL"
231
+ # Handle URL case (if needed)
232
+ else:
233
+ file_type = "Binary"
234
+ # Write binary data to a temporary file
235
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
236
+ temp_file.write(file_content)
237
+ temp_filepath = temp_file.name
238
+
239
+ # Pass the file path to embed_pdf
240
+ result = embed_pdf(temp_filepath, collection_name)
241
+
242
+ # Clean up the temporary file
243
+ os.remove(temp_filepath)
244
+
245
+ return result
246
  else:
247
  return "Please enter a query or upload a PDF file and specify a collection name."
248
 
249
+
250
  iface = gr.Interface(
251
  fn=combined_interface,
252
  inputs=[