Spaces:

chris32
/

Text-Intelligence-Real-State

Sleeping

App Files Files Community

Christopher Román Jaimes commited on May 23, 2024

Commit

7f254c6

1 Parent(s): 2c6872f

feat: add bert model to predict house levels.

Browse files

Files changed (1) hide show

app.py +51 -2

app.py CHANGED Viewed

@@ -10,11 +10,17 @@ import pandas as pd
 import gradio as gr
 # GLiNER Model
 from gliner import GLiNER
-# Load Model
 model = GLiNER.from_pretrained("chris32/gliner_multi_pii_real_state-v2")
 model.eval()
 # Global Variables: For Post Cleaning Inferences
 YEAR_OF_REMODELING_LIMIT = 100
 CURRENT_YEAR = int(datetime.date.today().year)
@@ -189,6 +195,40 @@ threshols_dict = {
     'NOMBRE_DESARROLLO': 0.9,
 }
 def generate_answer(text):
     labels = [
     'SUPERFICIE_JARDIN',
@@ -216,7 +256,16 @@ def generate_answer(text):
         entity_prediction_cleaned = clean_prediction(entities_formatted, feature_name, threshols_dict, clean_functions_dict)
         if isinstance(entity_prediction_cleaned, str) or isinstance(entity_prediction_cleaned, int):
             entities_cleaned[feature_name] = entity_prediction_cleaned
     result_json = json.dumps(entities_cleaned, indent = 4, ensure_ascii = False)
     return result_json + "\n \n" + json.dumps(entities_formatted, indent = 4, ensure_ascii = False)

 import gradio as gr
 # GLiNER Model
 from gliner import GLiNER
+# Transformers
+from transformers import pipeline
+# Load GLiNER Model
 model = GLiNER.from_pretrained("chris32/gliner_multi_pii_real_state-v2")
 model.eval()
+# BERT Model
+model_name = "chris32/distilbert-base-spanish-uncased-finetuned-text-intelligence"
+pipe = pipeline(model = model_name, device = "cpu")
 # Global Variables: For Post Cleaning Inferences
 YEAR_OF_REMODELING_LIMIT = 100
 CURRENT_YEAR = int(datetime.date.today().year)
     'NOMBRE_DESARROLLO': 0.9,
 }
+label_names_dict = {
+    'LABEL_0': None,
+    'LABEL_1': 1,
+    'LABEL_2': 2,
+    'LABEL_3': 3,
+}
+BERT_SCORE_LIMIT = 0.98
+def extract_max_label_score(probabilities):
+    # Find the dictionary with the maximum score
+    max_item = max(probabilities, key=lambda x: x['score'])
+    # Extract the label and the score
+    label = max_item['label']
+    score = max_item['score']
+    return label, score
+def clean_prediction_bert(label, score):
+    if score > BERT_SCORE_LIMIT:
+        label_formatted = label_names_dict.get(label, None)
+        return  label_formatted
+    else:
+        return None
+# BERT Inference Config
+pipe_config = {
+    "batch_size": 8,
+    "truncation": True,
+    "max_length": 250,
+    "add_special_tokens": True,
+    "return_all_scores": True,
+    "padding": True,
+}
 def generate_answer(text):
     labels = [
     'SUPERFICIE_JARDIN',
         entity_prediction_cleaned = clean_prediction(entities_formatted, feature_name, threshols_dict, clean_functions_dict)
         if isinstance(entity_prediction_cleaned, str) or isinstance(entity_prediction_cleaned, int):
             entities_cleaned[feature_name] = entity_prediction_cleaned
+    # BERT Inference
+    predictions = pipe([text], **pipe_config)
+    # Format Prediction
+    label, score = extract_max_label_score(predictions[0])
+    prediction_cleaned = clean_prediction_bert(label, score)
+    if isinstance(prediction_cleaned, int):
+        entities_cleaned["NIVELES_CASA"] = prediction_cleaned
     result_json = json.dumps(entities_cleaned, indent = 4, ensure_ascii = False)
     return result_json + "\n \n" + json.dumps(entities_formatted, indent = 4, ensure_ascii = False)