Spaces:
Build error
Build error
Update utils/mistral.py
Browse files- utils/mistral.py +16 -5
utils/mistral.py
CHANGED
|
@@ -4,7 +4,6 @@ import json
|
|
| 4 |
import logging
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from huggingface_hub.utils._errors import BadRequestError
|
| 7 |
-
#from huggingface_hub import BadRequestError
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from utils.fileTotext import extract_text_based_on_format
|
| 10 |
import re
|
|
@@ -24,11 +23,19 @@ def Data_Cleaner(text):
|
|
| 24 |
pattern = r".*?format:"
|
| 25 |
result = re.split(pattern, text, maxsplit=1)
|
| 26 |
if len(result) > 1:
|
|
|
|
| 27 |
text_after_format = result[1].strip().strip('`').strip('json')
|
| 28 |
else:
|
| 29 |
text_after_format = text.strip().strip('`').strip('json')
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# Function to call Mistral and process output
|
| 34 |
def Model_ProfessionalDetails_Output(resume, client):
|
|
@@ -63,7 +70,7 @@ def Model_ProfessionalDetails_Output(resume, client):
|
|
| 63 |
|
| 64 |
|
| 65 |
response = ""
|
| 66 |
-
for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=
|
| 67 |
response += message.choices[0].delta.content
|
| 68 |
|
| 69 |
try:
|
|
@@ -301,10 +308,11 @@ def process_resume_data(file_path):
|
|
| 301 |
try:
|
| 302 |
# Extract personal details using Mistral
|
| 303 |
per_data = Model_PersonalDetails_Output(resume_text, client)
|
|
|
|
| 304 |
|
| 305 |
# Extract professional details using Mistral
|
| 306 |
pro_data = Model_ProfessionalDetails_Output(resume_text, client)
|
| 307 |
-
|
| 308 |
# Check if per_data and pro_data have been populated correctly
|
| 309 |
if not per_data:
|
| 310 |
logging.warning("Mistral personal data extraction failed.")
|
|
@@ -359,6 +367,7 @@ def process_resume_data(file_path):
|
|
| 359 |
# If Mistral produces valid output, return it
|
| 360 |
if per_data or pro_data:
|
| 361 |
logging.info("Successfully extracted data using Mistral.")
|
|
|
|
| 362 |
print("---------Mistral-------")
|
| 363 |
return result
|
| 364 |
else:
|
|
@@ -376,3 +385,5 @@ def process_resume_data(file_path):
|
|
| 376 |
logging.warning("Mistral failed, switching to SpaCy.")
|
| 377 |
print("---------SpaCy-------")
|
| 378 |
return Parser_from_model(file_path)
|
|
|
|
|
|
|
|
|
| 4 |
import logging
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from huggingface_hub.utils._errors import BadRequestError
|
|
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
from utils.fileTotext import extract_text_based_on_format
|
| 9 |
import re
|
|
|
|
| 23 |
pattern = r".*?format:"
|
| 24 |
result = re.split(pattern, text, maxsplit=1)
|
| 25 |
if len(result) > 1:
|
| 26 |
+
# Handle edge cases where JSON might not be properly formatted after 'format:'
|
| 27 |
text_after_format = result[1].strip().strip('`').strip('json')
|
| 28 |
else:
|
| 29 |
text_after_format = text.strip().strip('`').strip('json')
|
| 30 |
+
|
| 31 |
+
# Try to ensure valid JSON is returned
|
| 32 |
+
try:
|
| 33 |
+
json.loads(text_after_format) # Check if it's valid JSON
|
| 34 |
+
return text_after_format
|
| 35 |
+
except json.JSONDecodeError:
|
| 36 |
+
logging.error("Data cleaning led to invalid JSON")
|
| 37 |
+
return text # Return the original text if cleaning goes wrong
|
| 38 |
+
|
| 39 |
|
| 40 |
# Function to call Mistral and process output
|
| 41 |
def Model_ProfessionalDetails_Output(resume, client):
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
response = ""
|
| 73 |
+
for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=4096, stream=True, temperature=0.35):
|
| 74 |
response += message.choices[0].delta.content
|
| 75 |
|
| 76 |
try:
|
|
|
|
| 308 |
try:
|
| 309 |
# Extract personal details using Mistral
|
| 310 |
per_data = Model_PersonalDetails_Output(resume_text, client)
|
| 311 |
+
print(per_data)
|
| 312 |
|
| 313 |
# Extract professional details using Mistral
|
| 314 |
pro_data = Model_ProfessionalDetails_Output(resume_text, client)
|
| 315 |
+
print(pro_data)
|
| 316 |
# Check if per_data and pro_data have been populated correctly
|
| 317 |
if not per_data:
|
| 318 |
logging.warning("Mistral personal data extraction failed.")
|
|
|
|
| 367 |
# If Mistral produces valid output, return it
|
| 368 |
if per_data or pro_data:
|
| 369 |
logging.info("Successfully extracted data using Mistral.")
|
| 370 |
+
print(result)
|
| 371 |
print("---------Mistral-------")
|
| 372 |
return result
|
| 373 |
else:
|
|
|
|
| 385 |
logging.warning("Mistral failed, switching to SpaCy.")
|
| 386 |
print("---------SpaCy-------")
|
| 387 |
return Parser_from_model(file_path)
|
| 388 |
+
|
| 389 |
+
|