Spaces:
Sleeping
Sleeping
Upload semantic_analysis.py
Browse files
modules/text_analysis/semantic_analysis.py
CHANGED
|
@@ -25,7 +25,7 @@ from .stopwords import (
|
|
| 25 |
get_stopwords_for_spacy
|
| 26 |
)
|
| 27 |
|
| 28 |
-
|
| 29 |
# Define colors for grammatical categories
|
| 30 |
POS_COLORS = {
|
| 31 |
'ADJ': '#FFA07A', 'ADP': '#98FB98', 'ADV': '#87CEFA', 'AUX': '#DDA0DD',
|
|
@@ -34,7 +34,6 @@ POS_COLORS = {
|
|
| 34 |
'SCONJ': '#DEB887', 'SYM': '#7B68EE', 'VERB': '#FF69B4', 'X': '#A9A9A9',
|
| 35 |
}
|
| 36 |
|
| 37 |
-
###########################################################
|
| 38 |
POS_TRANSLATIONS = {
|
| 39 |
'es': {
|
| 40 |
'ADJ': 'Adjetivo', 'ADP': 'Preposición', 'ADV': 'Adverbio', 'AUX': 'Auxiliar',
|
|
@@ -56,17 +55,9 @@ POS_TRANSLATIONS = {
|
|
| 56 |
'NOUN': 'Nom', 'NUM': 'Nombre', 'PART': 'Particule', 'PRON': 'Pronom',
|
| 57 |
'PROPN': 'Nom Propre', 'SCONJ': 'Conjonction de Subordination', 'SYM': 'Symbole',
|
| 58 |
'VERB': 'Verbe', 'X': 'Autre',
|
| 59 |
-
},
|
| 60 |
-
'pt': {
|
| 61 |
-
'ADJ': 'Adjetivo', 'ADP': 'Preposição', 'ADV': 'Advérbio', 'AUX': 'Auxiliar',
|
| 62 |
-
'CCONJ': 'Conjunção Coordenativa', 'DET': 'Determinante', 'INTJ': 'Interjeição',
|
| 63 |
-
'NOUN': 'Substantivo', 'NUM': 'Número', 'PART': 'Partícula', 'PRON': 'Pronome',
|
| 64 |
-
'PROPN': 'Nome Próprio', 'SCONJ': 'Conjunção Subordinativa', 'SYM': 'Símbolo',
|
| 65 |
-
'VERB': 'Verbo', 'X': 'Outro',
|
| 66 |
}
|
| 67 |
}
|
| 68 |
|
| 69 |
-
###########################################################
|
| 70 |
ENTITY_LABELS = {
|
| 71 |
'es': {
|
| 72 |
"Personas": "lightblue",
|
|
@@ -88,17 +79,9 @@ ENTITY_LABELS = {
|
|
| 88 |
"Inventions": "lightgreen",
|
| 89 |
"Dates": "lightyellow",
|
| 90 |
"Concepts": "lightpink"
|
| 91 |
-
},
|
| 92 |
-
'pt': {
|
| 93 |
-
"Pessoas": "lightblue", # Personas/People
|
| 94 |
-
"Lugares": "lightcoral", # Lugares/Places
|
| 95 |
-
"Invenções": "lightgreen", # Inventos/Inventions
|
| 96 |
-
"Datas": "lightyellow", # Fechas/Dates
|
| 97 |
-
"Conceitos": "lightpink" # Conceptos/Concepts
|
| 98 |
}
|
| 99 |
}
|
| 100 |
|
| 101 |
-
###########################################################
|
| 102 |
def fig_to_bytes(fig):
|
| 103 |
"""Convierte una figura de matplotlib a bytes."""
|
| 104 |
try:
|
|
@@ -111,7 +94,7 @@ def fig_to_bytes(fig):
|
|
| 111 |
return None
|
| 112 |
|
| 113 |
###########################################################
|
| 114 |
-
def perform_semantic_analysis(text, nlp, lang_code
|
| 115 |
"""
|
| 116 |
Realiza el análisis semántico completo del texto.
|
| 117 |
"""
|
|
@@ -160,7 +143,7 @@ def perform_semantic_analysis(text, nlp, lang_code, semantic_t):
|
|
| 160 |
# Visualizar grafo
|
| 161 |
logger.info("Visualizando grafo...")
|
| 162 |
plt.clf() # Limpiar figura actual
|
| 163 |
-
concept_graph_fig = visualize_concept_graph(concept_graph, lang_code
|
| 164 |
|
| 165 |
# Convertir a bytes
|
| 166 |
logger.info("Convirtiendo grafo a bytes...")
|
|
@@ -300,7 +283,7 @@ def create_concept_graph(doc, key_concepts):
|
|
| 300 |
|
| 301 |
###############################################################################
|
| 302 |
|
| 303 |
-
def visualize_concept_graph(G, lang_code
|
| 304 |
"""
|
| 305 |
Visualiza el grafo de conceptos con layout consistente.
|
| 306 |
Args:
|
|
@@ -385,22 +368,26 @@ def visualize_concept_graph(G, lang_code, semantic_t):
|
|
| 385 |
),
|
| 386 |
ax=ax
|
| 387 |
)
|
| 388 |
-
#################################################################
|
| 389 |
-
# Usar semantic_t para obtener las traducciones
|
| 390 |
-
plt.title(semantic_t.get('concept_network', 'Relaciones entre los conceptos clave'), pad=20, fontsize=14)
|
| 391 |
|
| 392 |
-
#
|
| 393 |
-
sm = plt.cm.ScalarMappable(
|
|
|
|
|
|
|
|
|
|
| 394 |
sm.set_array([])
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
ax.set_axis_off()
|
|
|
|
|
|
|
| 399 |
plt.tight_layout()
|
|
|
|
| 400 |
return fig
|
|
|
|
| 401 |
except Exception as e:
|
| 402 |
logger.error(f"Error en visualize_concept_graph: {str(e)}")
|
| 403 |
-
return plt.figure()
|
| 404 |
|
| 405 |
########################################################################
|
| 406 |
def create_entity_graph(entities):
|
|
|
|
| 25 |
get_stopwords_for_spacy
|
| 26 |
)
|
| 27 |
|
| 28 |
+
|
| 29 |
# Define colors for grammatical categories
|
| 30 |
POS_COLORS = {
|
| 31 |
'ADJ': '#FFA07A', 'ADP': '#98FB98', 'ADV': '#87CEFA', 'AUX': '#DDA0DD',
|
|
|
|
| 34 |
'SCONJ': '#DEB887', 'SYM': '#7B68EE', 'VERB': '#FF69B4', 'X': '#A9A9A9',
|
| 35 |
}
|
| 36 |
|
|
|
|
| 37 |
POS_TRANSLATIONS = {
|
| 38 |
'es': {
|
| 39 |
'ADJ': 'Adjetivo', 'ADP': 'Preposición', 'ADV': 'Adverbio', 'AUX': 'Auxiliar',
|
|
|
|
| 55 |
'NOUN': 'Nom', 'NUM': 'Nombre', 'PART': 'Particule', 'PRON': 'Pronom',
|
| 56 |
'PROPN': 'Nom Propre', 'SCONJ': 'Conjonction de Subordination', 'SYM': 'Symbole',
|
| 57 |
'VERB': 'Verbe', 'X': 'Autre',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
}
|
| 60 |
|
|
|
|
| 61 |
ENTITY_LABELS = {
|
| 62 |
'es': {
|
| 63 |
"Personas": "lightblue",
|
|
|
|
| 79 |
"Inventions": "lightgreen",
|
| 80 |
"Dates": "lightyellow",
|
| 81 |
"Concepts": "lightpink"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
}
|
| 83 |
}
|
| 84 |
|
|
|
|
| 85 |
def fig_to_bytes(fig):
|
| 86 |
"""Convierte una figura de matplotlib a bytes."""
|
| 87 |
try:
|
|
|
|
| 94 |
return None
|
| 95 |
|
| 96 |
###########################################################
|
| 97 |
+
def perform_semantic_analysis(text, nlp, lang_code):
|
| 98 |
"""
|
| 99 |
Realiza el análisis semántico completo del texto.
|
| 100 |
"""
|
|
|
|
| 143 |
# Visualizar grafo
|
| 144 |
logger.info("Visualizando grafo...")
|
| 145 |
plt.clf() # Limpiar figura actual
|
| 146 |
+
concept_graph_fig = visualize_concept_graph(concept_graph, lang_code)
|
| 147 |
|
| 148 |
# Convertir a bytes
|
| 149 |
logger.info("Convirtiendo grafo a bytes...")
|
|
|
|
| 283 |
|
| 284 |
###############################################################################
|
| 285 |
|
| 286 |
+
def visualize_concept_graph(G, lang_code):
|
| 287 |
"""
|
| 288 |
Visualiza el grafo de conceptos con layout consistente.
|
| 289 |
Args:
|
|
|
|
| 368 |
),
|
| 369 |
ax=ax
|
| 370 |
)
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
+
# Añadir leyenda de centralidad
|
| 373 |
+
sm = plt.cm.ScalarMappable(
|
| 374 |
+
cmap=plt.cm.viridis,
|
| 375 |
+
norm=plt.Normalize(vmin=0, vmax=1)
|
| 376 |
+
)
|
| 377 |
sm.set_array([])
|
| 378 |
+
plt.colorbar(sm, ax=ax, label='Centralidad del concepto')
|
| 379 |
+
|
| 380 |
+
plt.title("Red de conceptos relacionados", pad=20, fontsize=14)
|
| 381 |
ax.set_axis_off()
|
| 382 |
+
|
| 383 |
+
# Ajustar el layout para que la barra de color no se superponga
|
| 384 |
plt.tight_layout()
|
| 385 |
+
|
| 386 |
return fig
|
| 387 |
+
|
| 388 |
except Exception as e:
|
| 389 |
logger.error(f"Error en visualize_concept_graph: {str(e)}")
|
| 390 |
+
return plt.figure() # Retornar figura vacía en caso de error
|
| 391 |
|
| 392 |
########################################################################
|
| 393 |
def create_entity_graph(entities):
|