Tonic commited on
Commit
4d5f35d
·
1 Parent(s): a7d5408

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -118,19 +118,26 @@ vectorstore.embedding = CohereEmbeddings(model="embed-multilingual-v2.0", cohere
118
  # Initialize Cohere client
119
  co = cohere.Client(api_key=cohere_api_key)
120
 
121
- def embed_pdf(file, filename, collection_name):
122
- # Check if the input is a filepath (str) or binary (bytes)
123
- if isinstance(file, str): # filepath
124
- file_path = file
125
- with open(file_path, 'rb') as f:
126
- file_content = f.read()
127
- elif isinstance(file, bytes): # binary
 
 
 
 
 
 
 
128
  file_content = file
129
  file_path = os.path.join('./', filename)
130
  with open(file_path, 'wb') as f:
131
  f.write(file_content)
132
  else:
133
- return {"error": "Invalid file format"}
134
 
135
  # Checking filetype for document parsing
136
  mime_type = mimetypes.guess_type(file_path)[0]
@@ -206,12 +213,22 @@ def retrieve_info(query):
206
 
207
  return final_response.choices[0].text
208
 
209
- def combined_interface(query, file, filename, collection_name):
210
  if query:
211
  article_info = retrieve_info(query)
212
  return article_info
213
- elif file is not None and filename and collection_name:
214
- return embed_pdf(file, filename, collection_name)
 
 
 
 
 
 
 
 
 
 
215
  else:
216
  return "Please enter a query or upload a PDF file and specify a collection name."
217
 
 
118
  # Initialize Cohere client
119
  co = cohere.Client(api_key=cohere_api_key)
120
 
121
+ def embed_pdf(file, filename, collection_name, file_type):
122
+ # Check the file type and handle accordingly
123
+ if file_type == "URL":
124
+ # Download the file from the URL
125
+ try:
126
+ context = ssl._create_unverified_context()
127
+ with urllib.request.urlopen(file, context=context) as response, open(filename, 'wb') as out_file:
128
+ data = response.read()
129
+ out_file.write(data)
130
+ file_path = filename
131
+ except Exception as e:
132
+ return {"error": f"Error downloading file from URL: {e}"}
133
+ elif file_type == "Binary":
134
+ # Handle binary file
135
  file_content = file
136
  file_path = os.path.join('./', filename)
137
  with open(file_path, 'wb') as f:
138
  f.write(file_content)
139
  else:
140
+ return {"error": "Invalid file type"}
141
 
142
  # Checking filetype for document parsing
143
  mime_type = mimetypes.guess_type(file_path)[0]
 
213
 
214
  return final_response.choices[0].text
215
 
216
+ def combined_interface(query, file, collection_name):
217
  if query:
218
  article_info = retrieve_info(query)
219
  return article_info
220
+ elif file is not None and collection_name:
221
+ # Extract filename and content from the file object
222
+ filename = file[1] # file[1] contains the filename
223
+ file_content = file[0] # file[0] contains the file content
224
+
225
+ # Determine if the file is a binary or a URL
226
+ if isinstance(file_content, str) and file_content.startswith("http"):
227
+ file_type = "URL"
228
+ else:
229
+ file_type = "Binary"
230
+
231
+ return embed_pdf(file_content, filename, collection_name, file_type)
232
  else:
233
  return "Please enter a query or upload a PDF file and specify a collection name."
234