tonic commited on
Commit
22e8191
·
1 Parent(s): 75e51f8

Update app.py

Browse files
Files changed (1) hide show
  1. backend/app.py +20 -20
backend/app.py CHANGED
@@ -118,20 +118,19 @@ vectorstore.embedding = CohereEmbeddings(model="embed-multilingual-v2.0", cohere
118
  # Initialize Cohere client
119
  co = cohere.Client(api_key=cohere_api_key)
120
 
121
- def embed_pdf(file, collection_name):
122
- # Save the uploaded file
123
- filename = file.name
124
- file_path = os.path.join('./', filename)
125
-
126
- # Check if the file object has 'read' method
127
- if hasattr(file, 'read'):
128
- file_content = file.read()
 
 
 
129
  else:
130
- # Handle the case where 'read' method is not available
131
- file_content = file
132
-
133
- with open(file_path, 'wb') as f:
134
- f.write(file_content)
135
 
136
  # Checking filetype for document parsing
137
  mime_type = mimetypes.guess_type(file_path)[0]
@@ -148,9 +147,11 @@ def embed_pdf(file, collection_name):
148
  }
149
  client.data_object.create(data_object=weaviate_document, class_name=collection_name)
150
 
151
- os.remove(file_path)
 
 
152
  return {"message": f"Documents embedded in Weaviate collection '{collection_name}'"}
153
-
154
  def retrieve_info(query):
155
  llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
156
  qa = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())
@@ -205,15 +206,14 @@ def retrieve_info(query):
205
 
206
  return final_response.choices[0].text
207
 
208
- def combined_interface(query, file, collection_name):
209
  if query:
210
  article_info = retrieve_info(query)
211
  return article_info
212
- elif file is not None and collection_name:
213
- return embed_pdf(file, collection_name)
214
  else:
215
- return "Please enter a query or upload a PDF file."
216
-
217
 
218
  iface = gr.Interface(
219
  fn=combined_interface,
 
118
  # Initialize Cohere client
119
  co = cohere.Client(api_key=cohere_api_key)
120
 
121
+ def embed_pdf(file, filename, collection_name):
122
+ # Check if the input is a filepath (str) or binary (bytes)
123
+ if isinstance(file, str): # filepath
124
+ file_path = file
125
+ with open(file_path, 'rb') as f:
126
+ file_content = f.read()
127
+ elif isinstance(file, bytes): # binary
128
+ file_content = file
129
+ file_path = os.path.join('./', filename)
130
+ with open(file_path, 'wb') as f:
131
+ f.write(file_content)
132
  else:
133
+ return {"error": "Invalid file format"}
 
 
 
 
134
 
135
  # Checking filetype for document parsing
136
  mime_type = mimetypes.guess_type(file_path)[0]
 
147
  }
148
  client.data_object.create(data_object=weaviate_document, class_name=collection_name)
149
 
150
+ # Clean up if a temporary file was created
151
+ if isinstance(file, bytes):
152
+ os.remove(file_path)
153
  return {"message": f"Documents embedded in Weaviate collection '{collection_name}'"}
154
+
155
  def retrieve_info(query):
156
  llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
157
  qa = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())
 
206
 
207
  return final_response.choices[0].text
208
 
209
+ def combined_interface(query, file, filename, collection_name):
210
  if query:
211
  article_info = retrieve_info(query)
212
  return article_info
213
+ elif file is not None and filename and collection_name:
214
+ return embed_pdf(file, filename, collection_name)
215
  else:
216
+ return "Please enter a query or upload a PDF file and specify a collection name."
 
217
 
218
  iface = gr.Interface(
219
  fn=combined_interface,