File size: 8,218 Bytes
9c8c4f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
"""

Funzioni utility e gestione stato sessione.

"""

import streamlit as st
import json
import pandas as pd
from datetime import datetime
from anonymizer import NERAnonimizer
from ai_processor import AzureProcessor, RAGChatbot, CrewAIManager

def init_session_state():
    """Inizializza stato sessione"""
    if 'anonymizer' not in st.session_state:
        st.session_state.anonymizer = NERAnonimizer()
    
    if 'processor' not in st.session_state:
        st.session_state.processor = AzureProcessor()
    
    if 'rag_chatbot' not in st.session_state:
        st.session_state.rag_chatbot = RAGChatbot()
    
    if 'crewai_manager' not in st.session_state:
        st.session_state.crewai_manager = CrewAIManager(st.session_state.rag_chatbot)
    
    if 'uploaded_files' not in st.session_state:
        st.session_state.uploaded_files = {}
    
    if 'anonymized_docs' not in st.session_state:
        st.session_state.anonymized_docs = {}
    
    if 'processed_docs' not in st.session_state:
        st.session_state.processed_docs = {}
    
    if 'chat_history' not in st.session_state:
        st.session_state.chat_history = []
    
    if 'crewai_history' not in st.session_state:
        st.session_state.crewai_history = []
    
    if 'vector_store_built' not in st.session_state:
        st.session_state.vector_store_built = False

def validate_file_upload(uploaded_file) -> bool:
    """Valida file caricato"""
    if not uploaded_file:
        return False
    
    # Controlla estensione
    if not uploaded_file.name.endswith('.txt'):
        st.error("Solo file .txt sono supportati")
        return False
    
    # Controlla dimensione (max 10MB)
    if uploaded_file.size > 10 * 1024 * 1024:
        st.error("File troppo grande (max 10MB)")
        return False
    
    return True

def process_uploaded_files(uploaded_files):
    """Processa file caricati"""
    new_files_uploaded = False
    
    for file in uploaded_files:
        if validate_file_upload(file) and file.name not in st.session_state.uploaded_files:
            try:
                content = file.read().decode('utf-8')
                st.session_state.uploaded_files[file.name] = {
                    'content': content,
                    'size': len(content)
                }
                new_files_uploaded = True
            except Exception as e:
                st.error(f"Errore lettura file {file.name}: {e}")
    
    if new_files_uploaded:
        # Reset stato quando si caricano nuovi file
        st.session_state.anonymized_docs = {}
        st.session_state.processed_docs = {}
        st.session_state.vector_store_built = False
        st.session_state.chat_history = []
        st.session_state.crewai_history = []
        return True
    
    return False

def run_anonymization():
    """Esegue anonimizzazione su tutti i file"""
    if not st.session_state.uploaded_files:
        st.warning("Nessun file caricato")
        return
    
    progress_bar = st.progress(0)
    total_files = len(st.session_state.uploaded_files)
    
    for i, (filename, file_data) in enumerate(st.session_state.uploaded_files.items()):
        progress_bar.progress((i + 1) / total_files, f"Processando {filename}...")
        
        # Anonimizza
        anonymized_text, entities = st.session_state.anonymizer.anonymize(file_data['content'])
        
        st.session_state.anonymized_docs[filename] = {
            'original': file_data['content'],
            'anonymized': anonymized_text,
            'entities': entities,
            'confirmed': False
        }
    
    progress_bar.empty()
    st.success("βœ… Anonimizzazione completata!")
    st.session_state.vector_store_built = False

def run_ai_analysis():
    """Esegue analisi AI sui documenti confermati"""
    confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items() 
                     if v.get('confirmed', False)}
    
    if not confirmed_docs:
        st.warning("Nessun documento confermato")
        return
    
    progress_bar = st.progress(0)
    
    for i, (filename, doc_data) in enumerate(confirmed_docs.items()):
        progress_bar.progress((i + 1) / len(confirmed_docs), f"Analizzando {filename}...")
        
        # Analisi Azure
        analysis = st.session_state.processor.process_document(doc_data['anonymized'])
        
        st.session_state.processed_docs[filename] = {
            'anonymized_text': doc_data['anonymized'],
            'entities_count': len(doc_data['entities']),
            'analysis': analysis,
            'entities': doc_data['entities']
        }
    
    progress_bar.empty()
    st.success("βœ… Analisi completata!")

def build_rag_knowledge_base():
    """Costruisce knowledge base RAG"""
    confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items() 
                     if v.get('confirmed', False)}
    
    if not confirmed_docs:
        st.warning("Nessun documento confermato per RAG")
        return False
    
    if not st.session_state.vector_store_built:
        with st.spinner("Costruendo knowledge base..."):
            st.session_state.rag_chatbot.build_vector_store(confirmed_docs)
            st.session_state.vector_store_built = True
            return True
    
    return True

def export_results_json(results: dict, filename_prefix: str) -> str:
    """Esporta risultati in JSON"""
    export_data = {
        **results,
        'metadata': {
            'exported_at': datetime.now().isoformat(),
            'total_items': len(results) if isinstance(results, dict) else 1
        }
    }
    
    return json.dumps(export_data, indent=2, ensure_ascii=False, default=str)

def get_confirmed_docs_count() -> int:
    """Ritorna numero documenti confermati"""
    if 'anonymized_docs' not in st.session_state:
        return 0
    
    return sum(1 for doc in st.session_state.anonymized_docs.values() 
              if doc.get('confirmed', False))

def reset_document_state(filename: str):
    """Reset stato documento specifico"""
    if filename in st.session_state.uploaded_files:
        original_data = st.session_state.uploaded_files[filename]
        anonymized_text, entities = st.session_state.anonymizer.anonymize(original_data['content'])
        
        st.session_state.anonymized_docs[filename] = {
            'original': original_data['content'],
            'anonymized': anonymized_text,
            'entities': entities,
            'confirmed': False
        }
        st.session_state.vector_store_built = False

def add_chat_message(role: str, content: str):
    """Aggiunge messaggio alla chat history"""
    st.session_state.chat_history.append({
        "role": role,
        "content": content
    })

def add_crewai_result(query: str, analysis_type: str, result: str, agents_used=None):
    """Aggiunge risultato CrewAI alla history"""
    analysis_result = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "query": query,
        "analysis_type": analysis_type,
        "result": result,
        "agents_used": agents_used if agents_used else "auto"
    }
    
    st.session_state.crewai_history.append(analysis_result)

def clear_chat_history():
    """Pulisce cronologia chat"""
    st.session_state.chat_history = []

def clear_crewai_history():
    """Pulisce cronologia CrewAI"""
    st.session_state.crewai_history = []

def get_system_stats() -> dict:
    """Ritorna statistiche sistema"""
    return {
        'uploaded_files': len(st.session_state.get('uploaded_files', {})),
        'anonymized_docs': len(st.session_state.get('anonymized_docs', {})),
        'confirmed_docs': get_confirmed_docs_count(),
        'processed_docs': len(st.session_state.get('processed_docs', {})),
        'chat_messages': len(st.session_state.get('chat_history', [])),
        'crewai_analyses': len(st.session_state.get('crewai_history', [])),
        'vector_store_ready': st.session_state.get('vector_store_built', False)
    }