""" Funzioni utility e gestione stato sessione. """ import streamlit as st import json import pandas as pd from datetime import datetime from anonymizer import NERAnonimizer from ai_processor import AzureProcessor, RAGChatbot, CrewAIManager def init_session_state(): """Inizializza stato sessione""" if 'anonymizer' not in st.session_state: st.session_state.anonymizer = NERAnonimizer() if 'processor' not in st.session_state: st.session_state.processor = AzureProcessor() if 'rag_chatbot' not in st.session_state: st.session_state.rag_chatbot = RAGChatbot() if 'crewai_manager' not in st.session_state: st.session_state.crewai_manager = CrewAIManager(st.session_state.rag_chatbot) if 'uploaded_files' not in st.session_state: st.session_state.uploaded_files = {} if 'anonymized_docs' not in st.session_state: st.session_state.anonymized_docs = {} if 'processed_docs' not in st.session_state: st.session_state.processed_docs = {} if 'chat_history' not in st.session_state: st.session_state.chat_history = [] if 'crewai_history' not in st.session_state: st.session_state.crewai_history = [] if 'vector_store_built' not in st.session_state: st.session_state.vector_store_built = False def validate_file_upload(uploaded_file) -> bool: """Valida file caricato""" if not uploaded_file: return False # Controlla estensione if not uploaded_file.name.endswith('.txt'): st.error("Solo file .txt sono supportati") return False # Controlla dimensione (max 10MB) if uploaded_file.size > 10 * 1024 * 1024: st.error("File troppo grande (max 10MB)") return False return True def process_uploaded_files(uploaded_files): """Processa file caricati""" new_files_uploaded = False for file in uploaded_files: if validate_file_upload(file) and file.name not in st.session_state.uploaded_files: try: content = file.read().decode('utf-8') st.session_state.uploaded_files[file.name] = { 'content': content, 'size': len(content) } new_files_uploaded = True except Exception as e: st.error(f"Errore lettura file {file.name}: {e}") if new_files_uploaded: # Reset stato quando si caricano nuovi file st.session_state.anonymized_docs = {} st.session_state.processed_docs = {} st.session_state.vector_store_built = False st.session_state.chat_history = [] st.session_state.crewai_history = [] return True return False def run_anonymization(): """Esegue anonimizzazione su tutti i file""" if not st.session_state.uploaded_files: st.warning("Nessun file caricato") return progress_bar = st.progress(0) total_files = len(st.session_state.uploaded_files) for i, (filename, file_data) in enumerate(st.session_state.uploaded_files.items()): progress_bar.progress((i + 1) / total_files, f"Processando {filename}...") # Anonimizza anonymized_text, entities = st.session_state.anonymizer.anonymize(file_data['content']) st.session_state.anonymized_docs[filename] = { 'original': file_data['content'], 'anonymized': anonymized_text, 'entities': entities, 'confirmed': False } progress_bar.empty() st.success("✅ Anonimizzazione completata!") st.session_state.vector_store_built = False def run_ai_analysis(): """Esegue analisi AI sui documenti confermati""" confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items() if v.get('confirmed', False)} if not confirmed_docs: st.warning("Nessun documento confermato") return progress_bar = st.progress(0) for i, (filename, doc_data) in enumerate(confirmed_docs.items()): progress_bar.progress((i + 1) / len(confirmed_docs), f"Analizzando {filename}...") # Analisi Azure analysis = st.session_state.processor.process_document(doc_data['anonymized']) st.session_state.processed_docs[filename] = { 'anonymized_text': doc_data['anonymized'], 'entities_count': len(doc_data['entities']), 'analysis': analysis, 'entities': doc_data['entities'] } progress_bar.empty() st.success("✅ Analisi completata!") def build_rag_knowledge_base(): """Costruisce knowledge base RAG""" confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items() if v.get('confirmed', False)} if not confirmed_docs: st.warning("Nessun documento confermato per RAG") return False if not st.session_state.vector_store_built: with st.spinner("Costruendo knowledge base..."): st.session_state.rag_chatbot.build_vector_store(confirmed_docs) st.session_state.vector_store_built = True return True return True def export_results_json(results: dict, filename_prefix: str) -> str: """Esporta risultati in JSON""" export_data = { **results, 'metadata': { 'exported_at': datetime.now().isoformat(), 'total_items': len(results) if isinstance(results, dict) else 1 } } return json.dumps(export_data, indent=2, ensure_ascii=False, default=str) def get_confirmed_docs_count() -> int: """Ritorna numero documenti confermati""" if 'anonymized_docs' not in st.session_state: return 0 return sum(1 for doc in st.session_state.anonymized_docs.values() if doc.get('confirmed', False)) def reset_document_state(filename: str): """Reset stato documento specifico""" if filename in st.session_state.uploaded_files: original_data = st.session_state.uploaded_files[filename] anonymized_text, entities = st.session_state.anonymizer.anonymize(original_data['content']) st.session_state.anonymized_docs[filename] = { 'original': original_data['content'], 'anonymized': anonymized_text, 'entities': entities, 'confirmed': False } st.session_state.vector_store_built = False def add_chat_message(role: str, content: str): """Aggiunge messaggio alla chat history""" st.session_state.chat_history.append({ "role": role, "content": content }) def add_crewai_result(query: str, analysis_type: str, result: str, agents_used=None): """Aggiunge risultato CrewAI alla history""" analysis_result = { "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "query": query, "analysis_type": analysis_type, "result": result, "agents_used": agents_used if agents_used else "auto" } st.session_state.crewai_history.append(analysis_result) def clear_chat_history(): """Pulisce cronologia chat""" st.session_state.chat_history = [] def clear_crewai_history(): """Pulisce cronologia CrewAI""" st.session_state.crewai_history = [] def get_system_stats() -> dict: """Ritorna statistiche sistema""" return { 'uploaded_files': len(st.session_state.get('uploaded_files', {})), 'anonymized_docs': len(st.session_state.get('anonymized_docs', {})), 'confirmed_docs': get_confirmed_docs_count(), 'processed_docs': len(st.session_state.get('processed_docs', {})), 'chat_messages': len(st.session_state.get('chat_history', [])), 'crewai_analyses': len(st.session_state.get('crewai_history', [])), 'vector_store_ready': st.session_state.get('vector_store_built', False) }