Redmind commited on
Commit
35aaa48
·
verified ·
1 Parent(s): 983a293

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py CHANGED
@@ -206,3 +206,109 @@ def retrieval(query: str):
206
  return {"results": "No relevant match found in ChromaDB."}
207
  except Exception as e:
208
  return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  return {"results": "No relevant match found in ChromaDB."}
207
  except Exception as e:
208
  return {"error": str(e)}
209
+
210
+ import pandas as pd
211
+ from io import StringIO
212
+ import os
213
+ import base64
214
+ @app.get("/save_file_dify")
215
+ def save_file_dify(csv_data: str):
216
+
217
+ # Split into lines
218
+ lines = csv_data.split("\n")
219
+
220
+ # Find the max number of columns
221
+ max_cols = max(line.count(",") + 1 for line in lines if line.strip())
222
+
223
+ # Normalize all rows to have the same number of columns
224
+ fixed_lines = [line + "," * (max_cols - line.count(",") - 1) for line in lines]
225
+
226
+ # Reconstruct CSV string
227
+ fixed_csv_data = "\n".join(fixed_lines)
228
+
229
+ # Convert CSV string to DataFrame
230
+ df = pd.read_csv(StringIO(fixed_csv_data))
231
+
232
+
233
+ #save in dify dataset and return download link
234
+ download_link = get_download_link_dify(df)
235
+
236
+ return download_link
237
+
238
+
239
+ def get_download_link_dify(df):
240
+ # code to save file in dify framework
241
+ import requests
242
+
243
+ # API Configuration
244
+ BASE_URL = "http://redmindgpt.redmindtechnologies.com:81/v1"
245
+ DATASET_ID = "084ae979-d101-414b-8854-9bbf5d3a442e"
246
+ API_KEY = "dataset-feqz5KrqHkFRdWbh2DInt58L"
247
+
248
+ dataset_name = 'output_dataset'
249
+ # Endpoint URL
250
+ url = f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file"
251
+ print(url)
252
+ # Headers
253
+ headers = {
254
+ "Authorization": f"Bearer {API_KEY}"
255
+ }
256
+
257
+ # Data payload (form data as a plain text string)
258
+ data_payload = {
259
+ "data": """
260
+ {
261
+ "indexing_technique": "high_quality",
262
+ "process_rule": {
263
+ "rules": {
264
+ "pre_processing_rules": [
265
+ {"id": "remove_extra_spaces", "enabled": true},
266
+ {"id": "remove_urls_emails", "enabled": true}
267
+ ],
268
+ "segmentation": {
269
+ "separator": "###",
270
+ "max_tokens": 500
271
+ }
272
+ },
273
+ "mode": "custom"
274
+ }
275
+ }
276
+ """
277
+ }
278
+
279
+ # Convert DataFrame to binary (in-memory)
280
+ file_buffer = dataframe_to_binary(df)
281
+
282
+ files = {
283
+ "file": ("output.xlsx", file_buffer, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
284
+ }
285
+
286
+ # Send the POST request
287
+ response = requests.post(url, headers=headers, data=data_payload, files=files)
288
+ print(response)
289
+ data = response.json()
290
+ document_id = data['document']['id']
291
+
292
+ # code to get download_url
293
+ url = f"http://redmindgpt.redmindtechnologies.com:81/v1/datasets/{DATASET_ID}/documents/{document_id}/upload-file"
294
+
295
+ response = requests.get(url, headers=headers)
296
+ print(response)
297
+
298
+ download_url = response.json().get("download_url")
299
+ download_url = download_url.replace("download/","")
300
+ return download_url
301
+
302
+ def dataframe_to_binary(df):
303
+ import io
304
+ # Create a BytesIO stream
305
+ output = io.BytesIO()
306
+
307
+ # Write the DataFrame to this in-memory buffer as an Excel file
308
+ df.to_excel(output, index=False, engine="openpyxl")
309
+
310
+ # Move the cursor to the beginning of the stream
311
+ output.seek(0)
312
+
313
+ return output
314
+