Spaces:
Sleeping
Sleeping
File size: 8,218 Bytes
9c8c4f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
"""
Funzioni utility e gestione stato sessione.
"""
import streamlit as st
import json
import pandas as pd
from datetime import datetime
from anonymizer import NERAnonimizer
from ai_processor import AzureProcessor, RAGChatbot, CrewAIManager
def init_session_state():
"""Inizializza stato sessione"""
if 'anonymizer' not in st.session_state:
st.session_state.anonymizer = NERAnonimizer()
if 'processor' not in st.session_state:
st.session_state.processor = AzureProcessor()
if 'rag_chatbot' not in st.session_state:
st.session_state.rag_chatbot = RAGChatbot()
if 'crewai_manager' not in st.session_state:
st.session_state.crewai_manager = CrewAIManager(st.session_state.rag_chatbot)
if 'uploaded_files' not in st.session_state:
st.session_state.uploaded_files = {}
if 'anonymized_docs' not in st.session_state:
st.session_state.anonymized_docs = {}
if 'processed_docs' not in st.session_state:
st.session_state.processed_docs = {}
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
if 'crewai_history' not in st.session_state:
st.session_state.crewai_history = []
if 'vector_store_built' not in st.session_state:
st.session_state.vector_store_built = False
def validate_file_upload(uploaded_file) -> bool:
"""Valida file caricato"""
if not uploaded_file:
return False
# Controlla estensione
if not uploaded_file.name.endswith('.txt'):
st.error("Solo file .txt sono supportati")
return False
# Controlla dimensione (max 10MB)
if uploaded_file.size > 10 * 1024 * 1024:
st.error("File troppo grande (max 10MB)")
return False
return True
def process_uploaded_files(uploaded_files):
"""Processa file caricati"""
new_files_uploaded = False
for file in uploaded_files:
if validate_file_upload(file) and file.name not in st.session_state.uploaded_files:
try:
content = file.read().decode('utf-8')
st.session_state.uploaded_files[file.name] = {
'content': content,
'size': len(content)
}
new_files_uploaded = True
except Exception as e:
st.error(f"Errore lettura file {file.name}: {e}")
if new_files_uploaded:
# Reset stato quando si caricano nuovi file
st.session_state.anonymized_docs = {}
st.session_state.processed_docs = {}
st.session_state.vector_store_built = False
st.session_state.chat_history = []
st.session_state.crewai_history = []
return True
return False
def run_anonymization():
"""Esegue anonimizzazione su tutti i file"""
if not st.session_state.uploaded_files:
st.warning("Nessun file caricato")
return
progress_bar = st.progress(0)
total_files = len(st.session_state.uploaded_files)
for i, (filename, file_data) in enumerate(st.session_state.uploaded_files.items()):
progress_bar.progress((i + 1) / total_files, f"Processando {filename}...")
# Anonimizza
anonymized_text, entities = st.session_state.anonymizer.anonymize(file_data['content'])
st.session_state.anonymized_docs[filename] = {
'original': file_data['content'],
'anonymized': anonymized_text,
'entities': entities,
'confirmed': False
}
progress_bar.empty()
st.success("β
Anonimizzazione completata!")
st.session_state.vector_store_built = False
def run_ai_analysis():
"""Esegue analisi AI sui documenti confermati"""
confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
if v.get('confirmed', False)}
if not confirmed_docs:
st.warning("Nessun documento confermato")
return
progress_bar = st.progress(0)
for i, (filename, doc_data) in enumerate(confirmed_docs.items()):
progress_bar.progress((i + 1) / len(confirmed_docs), f"Analizzando {filename}...")
# Analisi Azure
analysis = st.session_state.processor.process_document(doc_data['anonymized'])
st.session_state.processed_docs[filename] = {
'anonymized_text': doc_data['anonymized'],
'entities_count': len(doc_data['entities']),
'analysis': analysis,
'entities': doc_data['entities']
}
progress_bar.empty()
st.success("β
Analisi completata!")
def build_rag_knowledge_base():
"""Costruisce knowledge base RAG"""
confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
if v.get('confirmed', False)}
if not confirmed_docs:
st.warning("Nessun documento confermato per RAG")
return False
if not st.session_state.vector_store_built:
with st.spinner("Costruendo knowledge base..."):
st.session_state.rag_chatbot.build_vector_store(confirmed_docs)
st.session_state.vector_store_built = True
return True
return True
def export_results_json(results: dict, filename_prefix: str) -> str:
"""Esporta risultati in JSON"""
export_data = {
**results,
'metadata': {
'exported_at': datetime.now().isoformat(),
'total_items': len(results) if isinstance(results, dict) else 1
}
}
return json.dumps(export_data, indent=2, ensure_ascii=False, default=str)
def get_confirmed_docs_count() -> int:
"""Ritorna numero documenti confermati"""
if 'anonymized_docs' not in st.session_state:
return 0
return sum(1 for doc in st.session_state.anonymized_docs.values()
if doc.get('confirmed', False))
def reset_document_state(filename: str):
"""Reset stato documento specifico"""
if filename in st.session_state.uploaded_files:
original_data = st.session_state.uploaded_files[filename]
anonymized_text, entities = st.session_state.anonymizer.anonymize(original_data['content'])
st.session_state.anonymized_docs[filename] = {
'original': original_data['content'],
'anonymized': anonymized_text,
'entities': entities,
'confirmed': False
}
st.session_state.vector_store_built = False
def add_chat_message(role: str, content: str):
"""Aggiunge messaggio alla chat history"""
st.session_state.chat_history.append({
"role": role,
"content": content
})
def add_crewai_result(query: str, analysis_type: str, result: str, agents_used=None):
"""Aggiunge risultato CrewAI alla history"""
analysis_result = {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"query": query,
"analysis_type": analysis_type,
"result": result,
"agents_used": agents_used if agents_used else "auto"
}
st.session_state.crewai_history.append(analysis_result)
def clear_chat_history():
"""Pulisce cronologia chat"""
st.session_state.chat_history = []
def clear_crewai_history():
"""Pulisce cronologia CrewAI"""
st.session_state.crewai_history = []
def get_system_stats() -> dict:
"""Ritorna statistiche sistema"""
return {
'uploaded_files': len(st.session_state.get('uploaded_files', {})),
'anonymized_docs': len(st.session_state.get('anonymized_docs', {})),
'confirmed_docs': get_confirmed_docs_count(),
'processed_docs': len(st.session_state.get('processed_docs', {})),
'chat_messages': len(st.session_state.get('chat_history', [])),
'crewai_analyses': len(st.session_state.get('crewai_history', [])),
'vector_store_ready': st.session_state.get('vector_store_built', False)
} |