Tonic commited on
Commit
a13c2af
·
1 Parent(s): 4d5f35d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -5
app.py CHANGED
@@ -132,6 +132,9 @@ def embed_pdf(file, filename, collection_name, file_type):
132
  return {"error": f"Error downloading file from URL: {e}"}
133
  elif file_type == "Binary":
134
  # Handle binary file
 
 
 
135
  file_content = file
136
  file_path = os.path.join('./', filename)
137
  with open(file_path, 'wb') as f:
@@ -139,6 +142,7 @@ def embed_pdf(file, filename, collection_name, file_type):
139
  else:
140
  return {"error": "Invalid file type"}
141
 
 
142
  # Checking filetype for document parsing
143
  mime_type = mimetypes.guess_type(file_path)[0]
144
  loader = UnstructuredFileLoader(file_path)
@@ -218,20 +222,31 @@ def combined_interface(query, file, collection_name):
218
  article_info = retrieve_info(query)
219
  return article_info
220
  elif file is not None and collection_name:
221
- # Extract filename and content from the file object
222
- filename = file[1] # file[1] contains the filename
223
- file_content = file[0] # file[0] contains the file content
224
 
225
- # Determine if the file is a binary or a URL
226
  if isinstance(file_content, str) and file_content.startswith("http"):
227
  file_type = "URL"
 
228
  else:
229
  file_type = "Binary"
 
 
 
 
 
 
 
230
 
231
- return embed_pdf(file_content, filename, collection_name, file_type)
 
 
 
232
  else:
233
  return "Please enter a query or upload a PDF file and specify a collection name."
234
 
 
235
  iface = gr.Interface(
236
  fn=combined_interface,
237
  inputs=[
 
132
  return {"error": f"Error downloading file from URL: {e}"}
133
  elif file_type == "Binary":
134
  # Handle binary file
135
+ if isinstance(file, str):
136
+ # Convert string to bytes if necessary
137
+ file = file.encode()
138
  file_content = file
139
  file_path = os.path.join('./', filename)
140
  with open(file_path, 'wb') as f:
 
142
  else:
143
  return {"error": "Invalid file type"}
144
 
145
+
146
  # Checking filetype for document parsing
147
  mime_type = mimetypes.guess_type(file_path)[0]
148
  loader = UnstructuredFileLoader(file_path)
 
222
  article_info = retrieve_info(query)
223
  return article_info
224
  elif file is not None and collection_name:
225
+ filename = file[1] # Extract filename
226
+ file_content = file[0] # Extract file content
 
227
 
228
+ # Check if file_content is a URL or binary data
229
  if isinstance(file_content, str) and file_content.startswith("http"):
230
  file_type = "URL"
231
+ # Handle URL case (if needed)
232
  else:
233
  file_type = "Binary"
234
+ # Write binary data to a temporary file
235
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
236
+ temp_file.write(file_content)
237
+ temp_filepath = temp_file.name
238
+
239
+ # Pass the file path to embed_pdf
240
+ result = embed_pdf(temp_filepath, collection_name)
241
 
242
+ # Clean up the temporary file
243
+ os.remove(temp_filepath)
244
+
245
+ return result
246
  else:
247
  return "Please enter a query or upload a PDF file and specify a collection name."
248
 
249
+
250
  iface = gr.Interface(
251
  fn=combined_interface,
252
  inputs=[