WebashalarForML commited on
Commit
a34d477
·
verified ·
1 Parent(s): 38daf2d

Update utils/mistral.py

Browse files
Files changed (1) hide show
  1. utils/mistral.py +20 -0
utils/mistral.py CHANGED
@@ -20,6 +20,7 @@ if not HFT:
20
  client = InferenceClient(model="mistralai/Mistral-Nemo-Instruct-2407", token=HFT)
21
 
22
  # Function to clean model output
 
23
  def Data_Cleaner(text):
24
  pattern = r".*?format:"
25
  result = re.split(pattern, text, maxsplit=1)
@@ -36,6 +37,25 @@ def Data_Cleaner(text):
36
  except json.JSONDecodeError:
37
  logging.error("Data cleaning led to invalid JSON")
38
  return text # Return the original text if cleaning goes wrong
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
  # Function to call Mistral and process output
 
20
  client = InferenceClient(model="mistralai/Mistral-Nemo-Instruct-2407", token=HFT)
21
 
22
  # Function to clean model output
23
+ '''
24
  def Data_Cleaner(text):
25
  pattern = r".*?format:"
26
  result = re.split(pattern, text, maxsplit=1)
 
37
  except json.JSONDecodeError:
38
  logging.error("Data cleaning led to invalid JSON")
39
  return text # Return the original text if cleaning goes wrong
40
+ '''
41
+ def Data_Cleaner(text):
42
+ # Use a regex pattern to extract everything between ```json and ```
43
+ pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
44
+ match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
45
+
46
+ if match:
47
+ json_str = match.group(1).strip() # Extract JSON block
48
+ else:
49
+ logging.error("JSON block not found in the text")
50
+ return text # Return the original text if no match is found
51
+
52
+ # Validate and return the cleaned JSON if it's valid
53
+ try:
54
+ json_obj = json.loads(json_str) # Validate JSON
55
+ return json_obj # Return the parsed JSON as a dictionary
56
+ except json.JSONDecodeError:
57
+ logging.error("Extracted text is not valid JSON")
58
+ return text # Return the original text if JSON decoding fails
59
 
60
 
61
  # Function to call Mistral and process output