Spaces:

WebashalarForML
/

ResumeExtractor2

Build error

App Files Files Community

WebashalarForML commited on Sep 28, 2024

Commit

087e276

verified ·

1 Parent(s): 5be90fe

Update utils/mistral.py

Browse files

Files changed (1) hide show

utils/mistral.py +16 -5

utils/mistral.py CHANGED Viewed

@@ -4,7 +4,6 @@ import json
 import logging
 from huggingface_hub import InferenceClient
 from huggingface_hub.utils._errors import BadRequestError
-#from huggingface_hub import BadRequestError
 from dotenv import load_dotenv
 from utils.fileTotext import extract_text_based_on_format
 import re
@@ -24,11 +23,19 @@ def Data_Cleaner(text):
     pattern = r".*?format:"
     result = re.split(pattern, text, maxsplit=1)
     if len(result) > 1:
         text_after_format = result[1].strip().strip('`').strip('json')
     else:
         text_after_format = text.strip().strip('`').strip('json')
-    return text_after_format
 # Function to call Mistral and process output
 def Model_ProfessionalDetails_Output(resume, client):
@@ -63,7 +70,7 @@ def Model_ProfessionalDetails_Output(resume, client):
     response = ""
-    for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=3000, stream=True, temperature=0.35):
         response += message.choices[0].delta.content
     try:
@@ -301,10 +308,11 @@ def process_resume_data(file_path):
     try:
         # Extract personal details using Mistral
         per_data = Model_PersonalDetails_Output(resume_text, client)
         # Extract professional details using Mistral
         pro_data = Model_ProfessionalDetails_Output(resume_text, client)
         # Check if per_data and pro_data have been populated correctly
         if not per_data:
             logging.warning("Mistral personal data extraction failed.")
@@ -359,6 +367,7 @@ def process_resume_data(file_path):
         # If Mistral produces valid output, return it
         if per_data or pro_data:
             logging.info("Successfully extracted data using Mistral.")
             print("---------Mistral-------")
             return result
         else:
@@ -376,3 +385,5 @@ def process_resume_data(file_path):
     logging.warning("Mistral failed, switching to SpaCy.")
     print("---------SpaCy-------")
     return Parser_from_model(file_path)

 import logging
 from huggingface_hub import InferenceClient
 from huggingface_hub.utils._errors import BadRequestError
 from dotenv import load_dotenv
 from utils.fileTotext import extract_text_based_on_format
 import re
     pattern = r".*?format:"
     result = re.split(pattern, text, maxsplit=1)
     if len(result) > 1:
+        # Handle edge cases where JSON might not be properly formatted after 'format:'
         text_after_format = result[1].strip().strip('`').strip('json')
     else:
         text_after_format = text.strip().strip('`').strip('json')
+    # Try to ensure valid JSON is returned
+    try:
+        json.loads(text_after_format)  # Check if it's valid JSON
+        return text_after_format
+    except json.JSONDecodeError:
+        logging.error("Data cleaning led to invalid JSON")
+        return text  # Return the original text if cleaning goes wrong
 # Function to call Mistral and process output
 def Model_ProfessionalDetails_Output(resume, client):
     response = ""
+    for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=4096, stream=True, temperature=0.35):
         response += message.choices[0].delta.content
     try:
     try:
         # Extract personal details using Mistral
         per_data = Model_PersonalDetails_Output(resume_text, client)
+        print(per_data)
         # Extract professional details using Mistral
         pro_data = Model_ProfessionalDetails_Output(resume_text, client)
+        print(pro_data)
         # Check if per_data and pro_data have been populated correctly
         if not per_data:
             logging.warning("Mistral personal data extraction failed.")
         # If Mistral produces valid output, return it
         if per_data or pro_data:
             logging.info("Successfully extracted data using Mistral.")
+            print(result)
             print("---------Mistral-------")
             return result
         else:
     logging.warning("Mistral failed, switching to SpaCy.")
     print("---------SpaCy-------")
     return Parser_from_model(file_path)