AlessandroAlfieri commited on
Commit
9c8c4f7
·
verified ·
1 Parent(s): d939ca3

creazione dell'app

Browse files
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
src/__init__.py ADDED
File without changes
src/__pycache__/ai_processor.cpython-313.pyc ADDED
Binary file (17.7 kB). View file
 
src/__pycache__/anonymizer.cpython-313.pyc ADDED
Binary file (5.09 kB). View file
 
src/__pycache__/config.cpython-313.pyc ADDED
Binary file (1.69 kB). View file
 
src/__pycache__/ui_components.cpython-313.pyc ADDED
Binary file (13.7 kB). View file
 
src/__pycache__/utils.cpython-313.pyc ADDED
Binary file (12.2 kB). View file
 
src/ai_processor.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tutti i componenti AI: Azure, RAG e CrewAI.
3
+ """
4
+
5
+ import re
6
+ from typing import Dict, List
7
+ import streamlit as st
8
+ from openai import AzureOpenAI
9
+
10
+ # LangChain imports
11
+ from langchain_text_splitters import CharacterTextSplitter
12
+ from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
13
+ from langchain_community.vectorstores import FAISS
14
+ from langchain.chains import RetrievalQA
15
+ from langchain_core.prompts import PromptTemplate
16
+
17
+ # CrewAI imports
18
+ from crewai import Agent, Task, Crew
19
+ from crewai.llm import LLM
20
+
21
+ from config import Config
22
+
23
+ class AzureProcessor:
24
+ """Processore Azure OpenAI"""
25
+
26
+ def __init__(self):
27
+ self.client = None
28
+ self.setup_client()
29
+
30
+ def setup_client(self):
31
+ """Setup client Azure"""
32
+ if Config.AZURE_API_KEY and Config.AZURE_ENDPOINT:
33
+ try:
34
+ self.client = AzureOpenAI(
35
+ api_key=Config.AZURE_API_KEY,
36
+ api_version=Config.AZURE_API_VERSION,
37
+ azure_endpoint=Config.AZURE_ENDPOINT
38
+ )
39
+ except Exception as e:
40
+ st.error(f"Errore Azure OpenAI: {e}")
41
+ self.client = None
42
+ else:
43
+ st.warning("Credenziali Azure OpenAI non trovate.")
44
+
45
+ def process_document(self, anonymized_text: str) -> str:
46
+ """Processa documento con AI"""
47
+ if not self.client:
48
+ return "Azure OpenAI non configurato."
49
+
50
+ try:
51
+ messages = [
52
+ {
53
+ "role": "system",
54
+ "content": (
55
+ "Analizza il documento anonimizzato e fornisci:\n"
56
+ "1. Tipo di documento\n"
57
+ "2. Riepilogo (max 5 righe)\n"
58
+ "3. Analisi semantica (temi, sentiment)\n"
59
+ "4. Risposta suggerita se è comunicazione cliente\n"
60
+ "Usa solo i contenuti del documento fornito."
61
+ )
62
+ },
63
+ {
64
+ "role": "user",
65
+ "content": f"Analizza questo documento:\n\n{anonymized_text}"
66
+ }
67
+ ]
68
+
69
+ response = self.client.chat.completions.create(
70
+ model=Config.DEPLOYMENT_NAME,
71
+ messages=messages,
72
+ max_tokens=800,
73
+ temperature=0.7
74
+ )
75
+
76
+ return response.choices[0].message.content
77
+
78
+ except Exception as e:
79
+ return f"Errore analisi AI: {e}"
80
+
81
+ class RAGChatbot:
82
+ """Chatbot RAG con LangChain"""
83
+
84
+ def __init__(self):
85
+ self.vector_store = None
86
+ self.qa_chain = None
87
+ self.embeddings = None
88
+ self.llm = None
89
+ self.setup_langchain_components()
90
+
91
+ def setup_langchain_components(self):
92
+ """Setup componenti LangChain"""
93
+ if not (Config.AZURE_API_KEY and Config.AZURE_ENDPOINT and
94
+ Config.AZURE_EMBEDDING_API_KEY and Config.AZURE_EMBEDDING_ENDPOINT):
95
+ st.warning("Credenziali Azure incomplete. RAG non disponibile.")
96
+ return
97
+
98
+ try:
99
+ # Embeddings
100
+ self.embeddings = AzureOpenAIEmbeddings(
101
+ model=Config.AZURE_EMBEDDING_DEPLOYMENT_NAME,
102
+ api_version=Config.AZURE_API_VERSION,
103
+ azure_endpoint=Config.AZURE_EMBEDDING_ENDPOINT,
104
+ api_key=Config.AZURE_EMBEDDING_API_KEY,
105
+ chunk_size=16
106
+ )
107
+
108
+ # LLM
109
+ self.llm = AzureChatOpenAI(
110
+ deployment_name=Config.DEPLOYMENT_NAME,
111
+ azure_endpoint=Config.AZURE_ENDPOINT,
112
+ api_key=Config.AZURE_API_KEY,
113
+ api_version=Config.AZURE_API_VERSION,
114
+ temperature=0.2
115
+ )
116
+ except Exception as e:
117
+ st.error(f"Errore setup LangChain: {e}")
118
+ self.embeddings = None
119
+ self.llm = None
120
+
121
+ def build_vector_store(self, anonymized_docs: Dict[str, Dict]):
122
+ """Costruisce vector store FAISS"""
123
+ if not self.embeddings or not self.llm:
124
+ st.error("Componenti LangChain non configurati.")
125
+ return
126
+
127
+ # Prepara testi per RAG
128
+ all_texts = []
129
+ for filename, doc_data in anonymized_docs.items():
130
+ if doc_data.get('confirmed', False):
131
+ all_texts.append(f"Documento {filename}:\n{doc_data['anonymized']}")
132
+
133
+ if not all_texts:
134
+ st.warning("Nessun documento confermato per RAG.")
135
+ return
136
+
137
+ with st.spinner("Creando vector store..."):
138
+ # Chunking
139
+ combined_text = "\n\n".join(all_texts)
140
+ text_splitter = CharacterTextSplitter(
141
+ separator="\n\n",
142
+ chunk_size=1000,
143
+ chunk_overlap=200,
144
+ length_function=len,
145
+ )
146
+ texts = text_splitter.split_text(combined_text)
147
+
148
+ # Crea FAISS index
149
+ self.vector_store = FAISS.from_texts(texts, self.embeddings)
150
+ st.success(f"Vector store con {len(texts)} chunks creato.")
151
+
152
+ # Setup QA chain
153
+ qa_prompt = """Usa il contesto per rispondere alla domanda.
154
+ Se non sai la risposta, dillo chiaramente.
155
+
156
+ {context}
157
+
158
+ Domanda: {question}
159
+ Risposta:"""
160
+
161
+ QA_PROMPT = PromptTemplate.from_template(qa_prompt)
162
+
163
+ self.qa_chain = RetrievalQA.from_chain_type(
164
+ llm=self.llm,
165
+ chain_type="stuff",
166
+ retriever=self.vector_store.as_retriever(),
167
+ return_source_documents=True,
168
+ chain_type_kwargs={"prompt": QA_PROMPT}
169
+ )
170
+
171
+ def answer_question(self, query: str) -> str:
172
+ """Risponde usando RAG"""
173
+ if not self.qa_chain:
174
+ return "RAG non pronto. Costruisci prima il knowledge base."
175
+
176
+ try:
177
+ result = self.qa_chain.invoke({"query": query})
178
+ answer = result["result"]
179
+
180
+ # Aggiungi fonti se disponibili
181
+ source_docs = result.get("source_documents", [])
182
+ if source_docs:
183
+ answer += "\n\n**Fonti:**\n"
184
+ for i, doc in enumerate(source_docs):
185
+ match = re.search(r"Documento (.*?):\n", doc.page_content)
186
+ source_info = f" (da {match.group(1)})" if match else ""
187
+ answer += f"- ...{doc.page_content[-100:]}{source_info}\n"
188
+
189
+ return answer
190
+ except Exception as e:
191
+ return f"Errore RAG: {e}"
192
+
193
+ def get_relevant_context(self, query: str, max_docs: int = 3) -> str:
194
+ """Estrae contesto rilevante per query"""
195
+ if not self.vector_store:
196
+ return ""
197
+
198
+ try:
199
+ docs = self.vector_store.similarity_search(query, k=max_docs)
200
+ context = "\n\n".join([doc.page_content for doc in docs])
201
+ return context
202
+ except Exception as e:
203
+ return f"Errore contesto: {e}"
204
+
205
+ class CrewAIManager:
206
+ """Manager agenti CrewAI"""
207
+
208
+ def __init__(self, rag_chatbot: RAGChatbot):
209
+ self.rag_chatbot = rag_chatbot
210
+ self.agents = None
211
+ self.llm = None
212
+ self.setup_crew()
213
+
214
+ def setup_crew(self):
215
+ """Setup agenti CrewAI"""
216
+ if not Config.AZURE_API_KEY:
217
+ st.warning("Azure non disponibile per CrewAI")
218
+ return
219
+
220
+ try:
221
+ # LLM per CrewAI
222
+ self.llm = LLM(
223
+ model=f"azure/{Config.DEPLOYMENT_NAME}",
224
+ api_key=Config.AZURE_API_KEY,
225
+ base_url=Config.AZURE_ENDPOINT,
226
+ api_version=Config.AZURE_API_VERSION
227
+ )
228
+
229
+ # Agenti
230
+ document_analyst = Agent(
231
+ role="Document Analyst",
232
+ goal="Analizzare documenti anonimizzati e fornire insights",
233
+ backstory="Esperto analista documenti con focus su privacy e compliance. "
234
+ "Lavori solo con documenti anonimizzati per proteggere i dati.",
235
+ llm=self.llm,
236
+ verbose=True,
237
+ allow_delegation=False,
238
+ max_iter=3
239
+ )
240
+
241
+ rag_specialist = Agent(
242
+ role="RAG Specialist",
243
+ goal="Rispondere a domande usando il sistema RAG",
244
+ backstory="Esperto in Information Retrieval e RAG systems. "
245
+ "Specializzato nel recupero di informazioni da documenti anonimizzati.",
246
+ llm=self.llm,
247
+ verbose=True,
248
+ allow_delegation=False,
249
+ max_iter=3
250
+ )
251
+
252
+ sentiment_analyst = Agent(
253
+ role="Sentiment Analyst",
254
+ goal="Analizzare sentiment e emozioni nei documenti",
255
+ backstory="Esperto in sentiment analysis e behavioral analytics. "
256
+ "Identifichi emozioni, trend e segnali nei documenti.",
257
+ llm=self.llm,
258
+ verbose=True,
259
+ allow_delegation=False,
260
+ max_iter=3
261
+ )
262
+
263
+ strategy_coordinator = Agent(
264
+ role="Strategy Coordinator",
265
+ goal="Coordinare analisi e fornire raccomandazioni strategiche",
266
+ backstory="Senior consultant con background in strategic management. "
267
+ "Traduci insights tecnici in raccomandazioni business concrete.",
268
+ llm=self.llm,
269
+ verbose=True,
270
+ allow_delegation=True,
271
+ max_iter=4
272
+ )
273
+
274
+ self.agents = {
275
+ 'document_analyst': document_analyst,
276
+ 'rag_specialist': rag_specialist,
277
+ 'sentiment_analyst': sentiment_analyst,
278
+ 'strategy_coordinator': strategy_coordinator
279
+ }
280
+
281
+ st.success("✅ Agenti CrewAI configurati")
282
+
283
+ except Exception as e:
284
+ st.error(f"Errore setup CrewAI: {e}")
285
+ self.agents = None
286
+
287
+ def create_analysis_task(self, query: str, analysis_type: str = "comprehensive") -> str:
288
+ """Crea task di analisi per il crew"""
289
+ if not self.agents:
290
+ return "CrewAI non configurato"
291
+
292
+ try:
293
+ # Ottieni contesto dal RAG
294
+ context = self.rag_chatbot.get_relevant_context(query, max_docs=5)
295
+
296
+ tasks = []
297
+
298
+ if analysis_type in ["comprehensive", "document"]:
299
+ # Task analisi documentale
300
+ doc_task = Task(
301
+ description=f"""
302
+ Analizza documenti per: {query}
303
+
304
+ CONTESTO: {context}
305
+
306
+ Fornisci:
307
+ - Tipo e classificazione documenti
308
+ - Temi e argomenti principali
309
+ - Elementi rilevanti business
310
+ - Note compliance
311
+ """,
312
+ expected_output="Analisi strutturata con classificazione e insights",
313
+ agent=self.agents['document_analyst']
314
+ )
315
+ tasks.append(doc_task)
316
+
317
+ if analysis_type in ["comprehensive", "sentiment"]:
318
+ # Task sentiment
319
+ sentiment_task = Task(
320
+ description=f"""
321
+ Analizza sentiment per: {query}
322
+
323
+ CONTESTO: {context}
324
+
325
+ Valuta:
326
+ - Sentiment generale (scala 1-10)
327
+ - Emozioni prevalenti
328
+ - Trend comunicazioni
329
+ - Segnali rischio/opportunità
330
+ """,
331
+ expected_output="Analisi sentiment con valutazioni quantitative",
332
+ agent=self.agents['sentiment_analyst']
333
+ )
334
+ tasks.append(sentiment_task)
335
+
336
+ if analysis_type in ["comprehensive", "rag"]:
337
+ # Task RAG
338
+ rag_task = Task(
339
+ description=f"""
340
+ Rispondi usando RAG: {query}
341
+
342
+ CONTESTO: {context}
343
+
344
+ Includi:
345
+ - Risposta diretta
346
+ - Evidenze documenti
347
+ - Correlazioni trovate
348
+ - Informazioni mancanti
349
+ - Suggerimenti approfondimento
350
+ """,
351
+ expected_output="Risposta RAG con evidenze",
352
+ agent=self.agents['rag_specialist']
353
+ )
354
+ tasks.append(rag_task)
355
+
356
+ # Task coordinamento (sempre incluso)
357
+ coord_task = Task(
358
+ description=f"""
359
+ Sintetizza risultati per: {query}
360
+
361
+ Crea sintesi con:
362
+ - Executive Summary (3 punti)
363
+ - Insights strategici
364
+ - Raccomandazioni prioritarie
365
+ - Next steps concreti
366
+ - Valutazione rischi
367
+
368
+ Output executive-ready e actionable.
369
+ """,
370
+ expected_output="Sintesi strategica con raccomandazioni",
371
+ agent=self.agents['strategy_coordinator']
372
+ )
373
+ tasks.append(coord_task)
374
+
375
+ # Crea crew
376
+ crew = Crew(
377
+ agents=list(self.agents.values()),
378
+ tasks=tasks,
379
+ verbose=True
380
+ )
381
+
382
+ with st.spinner(f"Eseguendo analisi {analysis_type}..."):
383
+ result = crew.kickoff()
384
+
385
+ return str(result)
386
+
387
+ except Exception as e:
388
+ return f"Errore CrewAI: {e}"
389
+
390
+ def create_custom_task(self, query: str, selected_agents: List[str], custom_instructions: str = "") -> str:
391
+ """Task personalizzate con agenti specifici"""
392
+ if not self.agents:
393
+ return "CrewAI non configurato"
394
+
395
+ try:
396
+ context = self.rag_chatbot.get_relevant_context(query, max_docs=5)
397
+
398
+ tasks = []
399
+ agents_to_use = []
400
+
401
+ for agent_key in selected_agents:
402
+ if agent_key in self.agents:
403
+ agents_to_use.append(self.agents[agent_key])
404
+
405
+ task = Task(
406
+ description=f"""
407
+ {custom_instructions if custom_instructions else f'Analizza secondo il ruolo di {agent_key}'}
408
+
409
+ QUERY: {query}
410
+ CONTESTO: {context}
411
+
412
+ Fornisci analisi specializzata secondo il tuo ruolo.
413
+ """,
414
+ expected_output=f"Analisi specializzata da {agent_key}",
415
+ agent=self.agents[agent_key]
416
+ )
417
+ tasks.append(task)
418
+
419
+ if not tasks:
420
+ return "Nessun agente valido selezionato"
421
+
422
+ crew = Crew(
423
+ agents=agents_to_use,
424
+ tasks=tasks,
425
+ verbose=True
426
+ )
427
+
428
+ with st.spinner(f"Eseguendo task con {len(agents_to_use)} agenti..."):
429
+ result = crew.kickoff()
430
+
431
+ return str(result)
432
+
433
+ except Exception as e:
434
+ return f"Errore task personalizzato: {e}"
src/anonymizer.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sistema di anonimizzazione con NER e regex.
3
+ """
4
+
5
+ import re
6
+ from typing import Dict, Tuple
7
+ from transformers import pipeline
8
+ import streamlit as st
9
+ from config import Config, REGEX_PATTERNS
10
+
11
+ class NERAnonimizer:
12
+ """Anonimizzatore con NER e regex"""
13
+
14
+ def __init__(self):
15
+ self.regex_patterns = REGEX_PATTERNS
16
+ self._ner_pipe = None
17
+
18
+ @property
19
+ def ner_pipe(self):
20
+ """Lazy loading del modello NER"""
21
+ if self._ner_pipe is None:
22
+ with st.spinner("Caricamento modello NER..."):
23
+ try:
24
+ self._ner_pipe = pipeline(
25
+ "ner",
26
+ model=Config.NER_MODEL,
27
+ aggregation_strategy="simple"
28
+ )
29
+ except Exception as e:
30
+ st.error(f"Errore caricamento NER: {e}")
31
+ return None
32
+ return self._ner_pipe
33
+
34
+ def mask_with_regex(self, text: str) -> Tuple[str, Dict]:
35
+ """Applica mascheramento con regex"""
36
+ masked_text = text
37
+ found_entities = {}
38
+
39
+ # Ordina pattern per lunghezza (più lunghi prima)
40
+ sorted_patterns = sorted(
41
+ self.regex_patterns.items(),
42
+ key=lambda item: len(item[1]),
43
+ reverse=True
44
+ )
45
+
46
+ for label, pattern in sorted_patterns:
47
+ matches = list(re.finditer(pattern, masked_text, flags=re.IGNORECASE))
48
+ for match in reversed(matches):
49
+ original = match.group()
50
+ if original.startswith('[') and original.endswith(']'):
51
+ continue
52
+
53
+ placeholder = f"[{label}_{len(found_entities)}]"
54
+ found_entities[placeholder] = original
55
+ masked_text = masked_text[:match.start()] + placeholder + masked_text[match.end():]
56
+
57
+ return masked_text, found_entities
58
+
59
+ def mask_with_ner(self, text: str) -> Tuple[str, Dict]:
60
+ """Applica mascheramento con NER"""
61
+ if not self.ner_pipe:
62
+ return text, {}
63
+
64
+ try:
65
+ entities = self.ner_pipe(text)
66
+ entity_map = {}
67
+
68
+ sorted_entities = sorted(entities, key=lambda x: x['start'], reverse=True)
69
+
70
+ for ent in sorted_entities:
71
+ if ent['score'] > 0.5:
72
+ label = ent['entity_group']
73
+ original_text = text[ent['start']:ent['end']]
74
+
75
+ if original_text.startswith('[') and original_text.endswith(']'):
76
+ continue
77
+
78
+ placeholder = f"[{label}_{len(entity_map)}]"
79
+ entity_map[placeholder] = original_text
80
+
81
+ text = text[:ent['start']] + placeholder + text[ent['end']:]
82
+
83
+ return text, entity_map
84
+
85
+ except Exception as e:
86
+ st.error(f"Errore NER: {e}")
87
+ return text, {}
88
+
89
+ def anonymize(self, text: str) -> Tuple[str, Dict]:
90
+ """Pipeline completa di anonimizzazione"""
91
+ if not text or not text.strip():
92
+ return text, {}
93
+
94
+ # Regex prima, poi NER
95
+ masked_text, regex_entities = self.mask_with_regex(text)
96
+ final_text, ner_entities = self.mask_with_ner(masked_text)
97
+
98
+ # Combina entità
99
+ all_entities = {**regex_entities, **ner_entities}
100
+
101
+ return final_text, all_entities
src/config.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configurazioni per il sistema di anonimizzazione documenti.
3
+ """
4
+
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Carica variabili d'ambiente
9
+ load_dotenv()
10
+
11
+ class Config:
12
+ """Configurazione del sistema"""
13
+
14
+ # Modelli AI
15
+ NER_MODEL = "Davlan/bert-base-multilingual-cased-ner-hrl"
16
+
17
+ # Azure OpenAI
18
+ AZURE_ENDPOINT = os.getenv("AZURE_ENDPOINT")
19
+ AZURE_API_KEY = os.getenv("AZURE_API_KEY")
20
+ AZURE_EMBEDDING_ENDPOINT = os.getenv("AZURE_ENDPOINT_EMB", os.getenv("AZURE_ENDPOINT"))
21
+ AZURE_EMBEDDING_API_KEY = os.getenv("AZURE_API_KEY_EMB", os.getenv("AZURE_API_KEY"))
22
+ AZURE_API_VERSION = "2024-02-01"
23
+ DEPLOYMENT_NAME = "gpt-4o"
24
+ AZURE_EMBEDDING_DEPLOYMENT_NAME = "text-embedding-ada-002"
25
+
26
+ # Pattern regex per entità sensibili
27
+ REGEX_PATTERNS = {
28
+ "IBAN": r'\bIT\d{2}(?: ?[A-Z0-9]){11,30}\b',
29
+ "EMAIL": r'\b[\w\.-]+@[\w\.-]+\.\w{2,}\b',
30
+ "CF": r'\b[A-Z]{6}[0-9]{2}[A-Z][0-9]{2}[A-Z][0-9]{3}[A-Z]\b',
31
+ "CARD": r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b',
32
+ "PHONE": r'\b\+?[0-9\s\-\(\)]{8,15}\b'
33
+ }
34
+
35
+ # Configura OPENAI_API_KEY per compatibilità
36
+ if Config.AZURE_API_KEY:
37
+ os.environ["OPENAI_API_KEY"] = Config.AZURE_API_KEY
src/main.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ App principale Streamlit per l'anonimizzazione documenti.
3
+ """
4
+
5
+ import streamlit as st
6
+ import json
7
+ import pandas as pd
8
+ from ui_components import (
9
+ setup_page_config, display_sidebar, display_entity_editor,
10
+ display_file_preview, display_analysis_results, display_crewai_result,
11
+ display_progress_metrics, display_examples_section, create_download_button
12
+ )
13
+ from utils import (
14
+ init_session_state, process_uploaded_files, run_anonymization,
15
+ run_ai_analysis, build_rag_knowledge_base, export_results_json,
16
+ get_confirmed_docs_count, reset_document_state, add_chat_message,
17
+ add_crewai_result, clear_crewai_history
18
+ )
19
+
20
+ def main():
21
+ """Funzione principale dell'app"""
22
+
23
+ # Setup
24
+ setup_page_config()
25
+ init_session_state()
26
+
27
+ # Header
28
+ st.title("🔒 Anonimizzatore Documenti con NER, RAG e CrewAI")
29
+ st.markdown("---")
30
+
31
+ # Sidebar
32
+ display_sidebar()
33
+
34
+ # Main tabs
35
+ tab1, tab2, tab3, tab4, tab5 = st.tabs([
36
+ "📤 Upload",
37
+ "🔍 Anonimizzazione",
38
+ "📊 Analisi",
39
+ "💬 Chatbot RAG",
40
+ "🤖 CrewAI"
41
+ ])
42
+
43
+ # TAB 1: Upload
44
+ with tab1:
45
+ upload_tab()
46
+
47
+ # TAB 2: Anonimizzazione
48
+ with tab2:
49
+ anonymization_tab()
50
+
51
+ # TAB 3: Analisi
52
+ with tab3:
53
+ analysis_tab()
54
+
55
+ # TAB 4: RAG
56
+ with tab4:
57
+ rag_tab()
58
+
59
+ # TAB 5: CrewAI
60
+ with tab5:
61
+ crewai_tab()
62
+
63
+ def upload_tab():
64
+ """Tab per upload file"""
65
+ st.header("📤 Carica Documenti")
66
+
67
+ uploaded_files = st.file_uploader(
68
+ "Carica uno o più file .txt",
69
+ type=['txt'],
70
+ accept_multiple_files=True,
71
+ help="Seleziona i file di testo da anonimizzare"
72
+ )
73
+
74
+ if uploaded_files:
75
+ if process_uploaded_files(uploaded_files):
76
+ st.success(f"Caricati {len(uploaded_files)} file")
77
+ st.rerun()
78
+ else:
79
+ st.info("Nessun nuovo file caricato.")
80
+
81
+ # Mostra anteprima
82
+ st.subheader("📄 File caricati")
83
+ for filename, file_data in st.session_state.uploaded_files.items():
84
+ display_file_preview(filename, file_data['content'])
85
+
86
+ def anonymization_tab():
87
+ """Tab per anonimizzazione"""
88
+ st.header("🔍 Anonimizzazione e Revisione")
89
+
90
+ if not st.session_state.uploaded_files:
91
+ st.warning("⚠️ Carica prima alcuni documenti nella tab 'Upload'")
92
+ return
93
+
94
+ # Bottone anonimizzazione
95
+ if st.button("🚀 Avvia Anonimizzazione", type="primary"):
96
+ run_anonymization()
97
+ st.rerun()
98
+
99
+ # Mostra documenti anonimizzati
100
+ if st.session_state.anonymized_docs:
101
+ st.subheader("📝 Revisiona Documenti Anonimizzati")
102
+
103
+ for filename, doc_data in st.session_state.anonymized_docs.items():
104
+ with st.expander(
105
+ f"📄 {filename} {'✅' if doc_data['confirmed'] else '⏳'}",
106
+ expanded=not doc_data['confirmed']
107
+ ):
108
+
109
+ col1, col2 = st.columns(2)
110
+
111
+ # Testo originale
112
+ with col1:
113
+ st.write("**Testo Originale:**")
114
+ preview = doc_data['original'][:300]
115
+ if len(doc_data['original']) > 300:
116
+ preview += "..."
117
+
118
+ st.text_area(
119
+ "Originale",
120
+ value=preview,
121
+ height=200,
122
+ disabled=True,
123
+ key=f"orig_{filename}",
124
+ label_visibility="collapsed"
125
+ )
126
+
127
+ # Testo anonimizzato
128
+ with col2:
129
+ st.write("**Testo Anonimizzato:**")
130
+ edited_text = st.text_area(
131
+ "Anonimizzato (modificabile)",
132
+ value=doc_data['anonymized'],
133
+ height=200,
134
+ key=f"anon_{filename}",
135
+ label_visibility="collapsed"
136
+ )
137
+
138
+ # Aggiorna se modificato
139
+ if edited_text != doc_data['anonymized']:
140
+ st.session_state.anonymized_docs[filename]['anonymized'] = edited_text
141
+
142
+ # Editor entità
143
+ updated_entities = display_entity_editor(dict(doc_data['entities']), filename)
144
+
145
+ # Bottoni azione
146
+ col_confirm, col_reset = st.columns(2)
147
+
148
+ with col_confirm:
149
+ if st.button(f"✅ Conferma {filename}", key=f"confirm_{filename}"):
150
+ st.session_state.anonymized_docs[filename]['confirmed'] = True
151
+ st.session_state.anonymized_docs[filename]['entities'] = updated_entities
152
+ st.success(f"✅ {filename} confermato!")
153
+ st.session_state.vector_store_built = False
154
+ st.rerun()
155
+
156
+ with col_reset:
157
+ if st.button(f"🔄 Reset {filename}", key=f"reset_{filename}"):
158
+ reset_document_state(filename)
159
+ st.rerun()
160
+
161
+ # Statistiche progresso
162
+ display_progress_metrics()
163
+
164
+ def analysis_tab():
165
+ """Tab per analisi AI"""
166
+ st.header("📊 Analisi AI")
167
+
168
+ confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
169
+ if v.get('confirmed', False)}
170
+
171
+ if not confirmed_docs:
172
+ st.warning("⚠️ Conferma prima alcuni documenti anonimizzati")
173
+ return
174
+
175
+ st.write(f"Documenti confermati pronti: **{len(confirmed_docs)}**")
176
+
177
+ if st.button("🤖 Avvia Analisi AI", type="primary"):
178
+ run_ai_analysis()
179
+
180
+ # Mostra risultati
181
+ if st.session_state.processed_docs:
182
+ st.subheader("📋 Risultati Analisi")
183
+
184
+ for filename, result in st.session_state.processed_docs.items():
185
+ display_analysis_results(filename, result)
186
+
187
+ # Download JSON
188
+ result_json = export_results_json({
189
+ 'filename': filename,
190
+ 'anonymized_text': result['anonymized_text'],
191
+ 'analysis': result['analysis'],
192
+ 'entities': result['entities'],
193
+ 'entities_count': result['entities_count']
194
+ }, f"analisi_{filename}")
195
+
196
+ create_download_button(
197
+ result_json,
198
+ f"analisi_{filename}.json",
199
+ f"💾 Scarica {filename}",
200
+ f"download_{filename}"
201
+ )
202
+
203
+ def rag_tab():
204
+ """Tab per RAG chatbot"""
205
+ st.header("💬 Chatta con i Documenti")
206
+
207
+ confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
208
+ if v.get('confirmed', False)}
209
+
210
+ if not confirmed_docs:
211
+ st.warning("⚠️ Carica e conferma documenti per abilitare il chatbot")
212
+ return
213
+
214
+ # Costruisci knowledge base
215
+ if build_rag_knowledge_base():
216
+ st.info(f"Chatbot pronto per {len(confirmed_docs)} documenti")
217
+
218
+ # Mostra cronologia chat
219
+ for message in st.session_state.chat_history:
220
+ with st.chat_message(message["role"]):
221
+ st.markdown(message["content"])
222
+
223
+ # Input utente
224
+ if prompt := st.chat_input("Fai una domanda sui documenti..."):
225
+ # Aggiungi messaggio utente
226
+ add_chat_message("user", prompt)
227
+ with st.chat_message("user"):
228
+ st.markdown(prompt)
229
+
230
+ # Genera risposta
231
+ with st.chat_message("assistant"):
232
+ with st.spinner("Generando risposta..."):
233
+ response = st.session_state.rag_chatbot.answer_question(prompt)
234
+ st.markdown(response)
235
+
236
+ # Aggiungi risposta
237
+ add_chat_message("assistant", response)
238
+ else:
239
+ st.error("Impossibile costruire knowledge base. Verifica configurazione Azure.")
240
+
241
+ def crewai_tab():
242
+ """Tab per CrewAI"""
243
+ st.header("🤖 Analisi Multi-Agente CrewAI")
244
+
245
+ confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
246
+ if v.get('confirmed', False)}
247
+
248
+ if not confirmed_docs:
249
+ st.warning("⚠️ Conferma documenti per abilitare CrewAI")
250
+ return
251
+
252
+ if not st.session_state.crewai_manager.agents:
253
+ st.error("❌ CrewAI non configurato. Verifica Azure OpenAI.")
254
+ return
255
+
256
+ # Assicura knowledge base
257
+ build_rag_knowledge_base()
258
+
259
+ st.success(f"🎯 CrewAI pronto per {len(confirmed_docs)} documenti")
260
+
261
+ # Configurazione analisi
262
+ st.subheader("⚙️ Configurazione Analisi")
263
+
264
+ col1, col2 = st.columns(2)
265
+
266
+ with col1:
267
+ analysis_type = st.selectbox(
268
+ "Tipo di Analisi",
269
+ options=["comprehensive", "document", "sentiment", "rag", "custom"],
270
+ format_func=lambda x: {
271
+ "comprehensive": "🔍 Analisi Comprensiva",
272
+ "document": "📄 Analisi Documentale",
273
+ "sentiment": "😊 Sentiment Analysis",
274
+ "rag": "🔍 Query RAG Avanzata",
275
+ "custom": "⚙️ Personalizzata"
276
+ }[x]
277
+ )
278
+
279
+ with col2:
280
+ if analysis_type == "custom":
281
+ selected_agents = st.multiselect(
282
+ "Agenti da utilizzare",
283
+ options=list(st.session_state.crewai_manager.agents.keys()),
284
+ default=["strategy_coordinator"],
285
+ format_func=lambda x: {
286
+ "document_analyst": "📄 Document Analyst",
287
+ "rag_specialist": "🔍 RAG Specialist",
288
+ "strategy_coordinator": "🎯 Strategy Coordinator",
289
+ "sentiment_analyst": "😊 Sentiment Analyst"
290
+ }.get(x, x)
291
+ )
292
+ else:
293
+ selected_agents = []
294
+
295
+ # Query input
296
+ st.subheader("❓ Query per l'Analisi")
297
+ query_input = st.text_area(
298
+ "Inserisci la tua domanda:",
299
+ placeholder="Es: Analizza i temi principali e identifica rischi operativi...",
300
+ height=100
301
+ )
302
+
303
+ # Istruzioni personalizzate
304
+ if analysis_type == "custom":
305
+ custom_instructions = st.text_area(
306
+ "Istruzioni Personalizzate:",
307
+ placeholder="Istruzioni specifiche per gli agenti...",
308
+ height=80
309
+ )
310
+ else:
311
+ custom_instructions = ""
312
+
313
+ # Bottoni
314
+ col_analyze, col_clear = st.columns(2)
315
+
316
+ with col_analyze:
317
+ if st.button("🚀 Avvia Analisi CrewAI", type="primary", disabled=not query_input.strip()):
318
+ if analysis_type == "custom" and not selected_agents:
319
+ st.error("Seleziona almeno un agente")
320
+ else:
321
+ # Esegui analisi
322
+ if analysis_type == "custom":
323
+ result = st.session_state.crewai_manager.create_custom_task(
324
+ query_input, selected_agents, custom_instructions
325
+ )
326
+ else:
327
+ result = st.session_state.crewai_manager.create_analysis_task(
328
+ query_input, analysis_type
329
+ )
330
+
331
+ # Salva risultato
332
+ add_crewai_result(query_input, analysis_type, result, selected_agents)
333
+ st.success("✅ Analisi CrewAI completata!")
334
+
335
+ with col_clear:
336
+ if st.button("🗑️ Pulisci Cronologia"):
337
+ clear_crewai_history()
338
+ st.success("Cronologia pulita!")
339
+ st.rerun()
340
+
341
+ # Mostra risultati
342
+ if st.session_state.crewai_history:
343
+ st.subheader("📋 Risultati Analisi CrewAI")
344
+
345
+ for i, analysis in enumerate(reversed(st.session_state.crewai_history)):
346
+ display_crewai_result(analysis, len(st.session_state.crewai_history) - i)
347
+
348
+ # Download
349
+ result_json = export_results_json(analysis, f"crewai_analysis_{i}")
350
+ create_download_button(
351
+ result_json,
352
+ f"crewai_analysis_{analysis['timestamp'].replace(':', '-').replace(' ', '_')}.json",
353
+ "💾 Scarica Risultato",
354
+ f"download_crewai_{i}"
355
+ )
356
+
357
+ # Esempi
358
+ display_examples_section()
359
+
360
+ if __name__ == "__main__":
361
+ main()
src/ui_components.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Componenti UI riutilizzabili per Streamlit.
3
+ """
4
+
5
+ import streamlit as st
6
+ import pandas as pd
7
+ from typing import Dict
8
+ from config import Config
9
+
10
+ def setup_page_config():
11
+ """Configura la pagina Streamlit"""
12
+ st.set_page_config(
13
+ page_title="Anonimizzatore Documenti",
14
+ page_icon="🔒",
15
+ layout="wide"
16
+ )
17
+
18
+ def display_sidebar():
19
+ """Mostra sidebar con configurazioni"""
20
+ with st.sidebar:
21
+ st.header("⚙️ Configurazione")
22
+
23
+ # Status Azure
24
+ if Config.AZURE_API_KEY and Config.AZURE_ENDPOINT:
25
+ st.success("✅ Azure OpenAI configurato")
26
+ st.info(f"Chat Model: {Config.DEPLOYMENT_NAME}")
27
+ st.info(f"Embedding Model: {Config.AZURE_EMBEDDING_DEPLOYMENT_NAME}")
28
+ else:
29
+ st.error("❌ Azure OpenAI non configurato")
30
+ st.write("Configura le variabili d'ambiente:")
31
+ st.code("""
32
+ AZURE_ENDPOINT=your_endpoint
33
+ AZURE_API_KEY=your_api_key
34
+ AZURE_ENDPOINT_EMB=your_embedding_endpoint
35
+ AZURE_API_KEY_EMB=your_embedding_api_key
36
+ """)
37
+
38
+ st.markdown("---")
39
+
40
+ # Statistiche documenti
41
+ if 'uploaded_files' in st.session_state and st.session_state.uploaded_files:
42
+ st.subheader("📊 Statistiche")
43
+ uploaded_count = len(st.session_state.uploaded_files)
44
+ anonymized_count = len(st.session_state.get('anonymized_docs', {}))
45
+ confirmed_count = sum(1 for doc in st.session_state.get('anonymized_docs', {}).values()
46
+ if doc.get('confirmed', False))
47
+
48
+ st.metric("File caricati", uploaded_count)
49
+ st.metric("Anonimizzati", anonymized_count)
50
+ st.metric("Confermati", confirmed_count)
51
+
52
+ if confirmed_count > 0:
53
+ if st.session_state.get('vector_store_built', False):
54
+ st.success("✅ Knowledge Base pronto")
55
+ else:
56
+ st.info("🔄 Knowledge Base da costruire")
57
+
58
+ st.markdown("---")
59
+
60
+ # Reset button
61
+ if st.button("🔄 Reset sessione"):
62
+ for key in list(st.session_state.keys()):
63
+ del st.session_state[key]
64
+ st.rerun()
65
+
66
+ def display_entity_editor(entities: Dict, doc_key: str):
67
+ """Editor per entità rilevate"""
68
+ if not entities:
69
+ st.info("Nessuna entità sensibile rilevata.")
70
+ return entities
71
+
72
+ st.subheader("🔍 Entità rilevate")
73
+ st.write("Verifica e modifica le entità sensibili:")
74
+
75
+ current_entities_list = list(entities.items())
76
+ updated_entities_dict = {}
77
+ deleted_placeholders = set()
78
+
79
+ for i, (placeholder, original_value) in enumerate(current_entities_list):
80
+ col1, col2, col3 = st.columns([2, 3, 1])
81
+
82
+ with col1:
83
+ st.write(f"**{placeholder}**")
84
+
85
+ with col2:
86
+ new_value = st.text_input(
87
+ "Valore originale",
88
+ value=original_value,
89
+ key=f"{doc_key}_{placeholder}_value_{i}"
90
+ )
91
+ updated_entities_dict[placeholder] = new_value
92
+
93
+ with col3:
94
+ if st.button("🗑️", key=f"{doc_key}_{placeholder}_delete_{i}", help="Rimuovi"):
95
+ deleted_placeholders.add(placeholder)
96
+
97
+ # Gestisci cancellazioni
98
+ if deleted_placeholders:
99
+ final_entities = {k: v for k, v in updated_entities_dict.items()
100
+ if k not in deleted_placeholders}
101
+ st.session_state.anonymized_docs[doc_key]['entities'] = final_entities
102
+
103
+ # Re-anonimizza testo
104
+ from anonymizer import NERAnonimizer
105
+ anonymizer = NERAnonimizer()
106
+ st.session_state.anonymized_docs[doc_key]['anonymized'], _ = anonymizer.anonymize(
107
+ st.session_state.anonymized_docs[doc_key]['original']
108
+ )
109
+ st.session_state.vector_store_built = False
110
+ st.rerun()
111
+
112
+ return updated_entities_dict
113
+
114
+ def display_file_preview(filename: str, content: str, max_chars: int = 500):
115
+ """Mostra anteprima file"""
116
+ with st.expander(f"📄 {filename} ({len(content)} caratteri)"):
117
+ preview_text = content[:max_chars]
118
+ if len(content) > max_chars:
119
+ preview_text += "..."
120
+
121
+ st.text_area(
122
+ "Contenuto",
123
+ value=preview_text,
124
+ height=150,
125
+ disabled=True,
126
+ key=f"preview_{filename}",
127
+ label_visibility="collapsed"
128
+ )
129
+
130
+ def display_analysis_results(filename: str, result: Dict):
131
+ """Mostra risultati analisi"""
132
+ with st.expander(f"📊 Analisi: {filename}"):
133
+ # Metriche
134
+ col1, col2, col3 = st.columns(3)
135
+ col1.metric("Caratteri testo", len(result['anonymized_text']))
136
+ col2.metric("Entità trovate", result['entities_count'])
137
+ col3.metric("Stato", "✅ Completato")
138
+
139
+ # Testo anonimizzato
140
+ st.subheader("📄 Testo Anonimizzato")
141
+ st.text_area(
142
+ "Testo processato",
143
+ value=result['anonymized_text'],
144
+ height=150,
145
+ disabled=True,
146
+ key=f"analysis_text_{filename}"
147
+ )
148
+
149
+ # Analisi AI
150
+ st.subheader("🤖 Analisi AI")
151
+ st.markdown(result['analysis'])
152
+
153
+ # Entità
154
+ if result['entities']:
155
+ st.subheader("🔍 Entità Anonimizzate")
156
+ entities_df = pd.DataFrame([
157
+ {
158
+ 'Placeholder': k,
159
+ 'Valore Originale': v,
160
+ 'Tipo': k.split('_')[0].replace('[', '')
161
+ }
162
+ for k, v in result['entities'].items()
163
+ ])
164
+ st.dataframe(entities_df, use_container_width=True)
165
+
166
+ def display_crewai_result(analysis: Dict, index: int):
167
+ """Mostra risultato analisi CrewAI"""
168
+ with st.expander(
169
+ f"🤖 Analisi {index}: {analysis['analysis_type'].upper()} - {analysis['timestamp']}"
170
+ ):
171
+ # Info header
172
+ col1, col2, col3 = st.columns(3)
173
+
174
+ with col1:
175
+ st.metric("Tipo Analisi", analysis['analysis_type'].capitalize())
176
+
177
+ with col2:
178
+ st.metric("Timestamp", analysis['timestamp'])
179
+
180
+ with col3:
181
+ agents_used = analysis.get('agents_used', 'auto')
182
+ if agents_used == 'auto':
183
+ agent_count = "Automatico"
184
+ elif isinstance(agents_used, list):
185
+ agent_count = f"{len(agents_used)} agenti"
186
+ else:
187
+ agent_count = str(agents_used)
188
+ st.metric("Agenti", agent_count)
189
+
190
+ # Query e risultato
191
+ st.subheader("❓ Query Originale")
192
+ st.info(analysis['query'])
193
+
194
+ st.subheader("🎯 Risultato Analisi")
195
+ st.markdown(analysis['result'])
196
+
197
+ def display_progress_metrics():
198
+ """Mostra metriche di progresso"""
199
+ if 'anonymized_docs' in st.session_state:
200
+ confirmed_count = sum(1 for doc in st.session_state.anonymized_docs.values()
201
+ if doc.get('confirmed', False))
202
+ total_count = len(st.session_state.anonymized_docs)
203
+
204
+ if total_count > 0:
205
+ st.metric(
206
+ "Progresso Conferme",
207
+ f"{confirmed_count}/{total_count}",
208
+ delta=f"{(confirmed_count/total_count)*100:.1f}%"
209
+ )
210
+
211
+ def display_examples_section():
212
+ """Mostra esempi di query CrewAI"""
213
+ with st.expander("💡 Esempi di Query per CrewAI"):
214
+ st.markdown("""
215
+ **Analisi Comprensiva:**
216
+ - "Fornisci un'analisi completa dei documenti identificando rischi, opportunità e raccomandazioni strategiche"
217
+ - "Analizza la comunicazione aziendale e suggerisci miglioramenti nella gestione clienti"
218
+
219
+ **Analisi Documentale:**
220
+ - "Classifica i documenti per tipologia e identifica pattern ricorrenti"
221
+ - "Analizza la struttura e organizzazione delle informazioni nei documenti"
222
+
223
+ **Sentiment Analysis:**
224
+ - "Valuta il sentiment generale nelle comunicazioni e identifica aree di miglioramento"
225
+ - "Analizza le emozioni e i trend nei feedback dei clienti"
226
+
227
+ **Query RAG Avanzata:**
228
+ - "Trova tutte le menzioni di problemi operativi e le relative soluzioni proposte"
229
+ - "Estrai informazioni su scadenze, deadline e milestone importanti"
230
+
231
+ **Personalizzata:**
232
+ - Combina agenti specifici per analisi mirate alle tue esigenze
233
+ """)
234
+
235
+ def create_download_button(data: str, filename: str, label: str, key: str):
236
+ """Crea bottone download con dati"""
237
+ st.download_button(
238
+ label=label,
239
+ data=data,
240
+ file_name=filename,
241
+ mime="application/json",
242
+ key=key
243
+ )
src/utils.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Funzioni utility e gestione stato sessione.
3
+ """
4
+
5
+ import streamlit as st
6
+ import json
7
+ import pandas as pd
8
+ from datetime import datetime
9
+ from anonymizer import NERAnonimizer
10
+ from ai_processor import AzureProcessor, RAGChatbot, CrewAIManager
11
+
12
+ def init_session_state():
13
+ """Inizializza stato sessione"""
14
+ if 'anonymizer' not in st.session_state:
15
+ st.session_state.anonymizer = NERAnonimizer()
16
+
17
+ if 'processor' not in st.session_state:
18
+ st.session_state.processor = AzureProcessor()
19
+
20
+ if 'rag_chatbot' not in st.session_state:
21
+ st.session_state.rag_chatbot = RAGChatbot()
22
+
23
+ if 'crewai_manager' not in st.session_state:
24
+ st.session_state.crewai_manager = CrewAIManager(st.session_state.rag_chatbot)
25
+
26
+ if 'uploaded_files' not in st.session_state:
27
+ st.session_state.uploaded_files = {}
28
+
29
+ if 'anonymized_docs' not in st.session_state:
30
+ st.session_state.anonymized_docs = {}
31
+
32
+ if 'processed_docs' not in st.session_state:
33
+ st.session_state.processed_docs = {}
34
+
35
+ if 'chat_history' not in st.session_state:
36
+ st.session_state.chat_history = []
37
+
38
+ if 'crewai_history' not in st.session_state:
39
+ st.session_state.crewai_history = []
40
+
41
+ if 'vector_store_built' not in st.session_state:
42
+ st.session_state.vector_store_built = False
43
+
44
+ def validate_file_upload(uploaded_file) -> bool:
45
+ """Valida file caricato"""
46
+ if not uploaded_file:
47
+ return False
48
+
49
+ # Controlla estensione
50
+ if not uploaded_file.name.endswith('.txt'):
51
+ st.error("Solo file .txt sono supportati")
52
+ return False
53
+
54
+ # Controlla dimensione (max 10MB)
55
+ if uploaded_file.size > 10 * 1024 * 1024:
56
+ st.error("File troppo grande (max 10MB)")
57
+ return False
58
+
59
+ return True
60
+
61
+ def process_uploaded_files(uploaded_files):
62
+ """Processa file caricati"""
63
+ new_files_uploaded = False
64
+
65
+ for file in uploaded_files:
66
+ if validate_file_upload(file) and file.name not in st.session_state.uploaded_files:
67
+ try:
68
+ content = file.read().decode('utf-8')
69
+ st.session_state.uploaded_files[file.name] = {
70
+ 'content': content,
71
+ 'size': len(content)
72
+ }
73
+ new_files_uploaded = True
74
+ except Exception as e:
75
+ st.error(f"Errore lettura file {file.name}: {e}")
76
+
77
+ if new_files_uploaded:
78
+ # Reset stato quando si caricano nuovi file
79
+ st.session_state.anonymized_docs = {}
80
+ st.session_state.processed_docs = {}
81
+ st.session_state.vector_store_built = False
82
+ st.session_state.chat_history = []
83
+ st.session_state.crewai_history = []
84
+ return True
85
+
86
+ return False
87
+
88
+ def run_anonymization():
89
+ """Esegue anonimizzazione su tutti i file"""
90
+ if not st.session_state.uploaded_files:
91
+ st.warning("Nessun file caricato")
92
+ return
93
+
94
+ progress_bar = st.progress(0)
95
+ total_files = len(st.session_state.uploaded_files)
96
+
97
+ for i, (filename, file_data) in enumerate(st.session_state.uploaded_files.items()):
98
+ progress_bar.progress((i + 1) / total_files, f"Processando {filename}...")
99
+
100
+ # Anonimizza
101
+ anonymized_text, entities = st.session_state.anonymizer.anonymize(file_data['content'])
102
+
103
+ st.session_state.anonymized_docs[filename] = {
104
+ 'original': file_data['content'],
105
+ 'anonymized': anonymized_text,
106
+ 'entities': entities,
107
+ 'confirmed': False
108
+ }
109
+
110
+ progress_bar.empty()
111
+ st.success("✅ Anonimizzazione completata!")
112
+ st.session_state.vector_store_built = False
113
+
114
+ def run_ai_analysis():
115
+ """Esegue analisi AI sui documenti confermati"""
116
+ confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
117
+ if v.get('confirmed', False)}
118
+
119
+ if not confirmed_docs:
120
+ st.warning("Nessun documento confermato")
121
+ return
122
+
123
+ progress_bar = st.progress(0)
124
+
125
+ for i, (filename, doc_data) in enumerate(confirmed_docs.items()):
126
+ progress_bar.progress((i + 1) / len(confirmed_docs), f"Analizzando {filename}...")
127
+
128
+ # Analisi Azure
129
+ analysis = st.session_state.processor.process_document(doc_data['anonymized'])
130
+
131
+ st.session_state.processed_docs[filename] = {
132
+ 'anonymized_text': doc_data['anonymized'],
133
+ 'entities_count': len(doc_data['entities']),
134
+ 'analysis': analysis,
135
+ 'entities': doc_data['entities']
136
+ }
137
+
138
+ progress_bar.empty()
139
+ st.success("✅ Analisi completata!")
140
+
141
+ def build_rag_knowledge_base():
142
+ """Costruisce knowledge base RAG"""
143
+ confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
144
+ if v.get('confirmed', False)}
145
+
146
+ if not confirmed_docs:
147
+ st.warning("Nessun documento confermato per RAG")
148
+ return False
149
+
150
+ if not st.session_state.vector_store_built:
151
+ with st.spinner("Costruendo knowledge base..."):
152
+ st.session_state.rag_chatbot.build_vector_store(confirmed_docs)
153
+ st.session_state.vector_store_built = True
154
+ return True
155
+
156
+ return True
157
+
158
+ def export_results_json(results: dict, filename_prefix: str) -> str:
159
+ """Esporta risultati in JSON"""
160
+ export_data = {
161
+ **results,
162
+ 'metadata': {
163
+ 'exported_at': datetime.now().isoformat(),
164
+ 'total_items': len(results) if isinstance(results, dict) else 1
165
+ }
166
+ }
167
+
168
+ return json.dumps(export_data, indent=2, ensure_ascii=False, default=str)
169
+
170
+ def get_confirmed_docs_count() -> int:
171
+ """Ritorna numero documenti confermati"""
172
+ if 'anonymized_docs' not in st.session_state:
173
+ return 0
174
+
175
+ return sum(1 for doc in st.session_state.anonymized_docs.values()
176
+ if doc.get('confirmed', False))
177
+
178
+ def reset_document_state(filename: str):
179
+ """Reset stato documento specifico"""
180
+ if filename in st.session_state.uploaded_files:
181
+ original_data = st.session_state.uploaded_files[filename]
182
+ anonymized_text, entities = st.session_state.anonymizer.anonymize(original_data['content'])
183
+
184
+ st.session_state.anonymized_docs[filename] = {
185
+ 'original': original_data['content'],
186
+ 'anonymized': anonymized_text,
187
+ 'entities': entities,
188
+ 'confirmed': False
189
+ }
190
+ st.session_state.vector_store_built = False
191
+
192
+ def add_chat_message(role: str, content: str):
193
+ """Aggiunge messaggio alla chat history"""
194
+ st.session_state.chat_history.append({
195
+ "role": role,
196
+ "content": content
197
+ })
198
+
199
+ def add_crewai_result(query: str, analysis_type: str, result: str, agents_used=None):
200
+ """Aggiunge risultato CrewAI alla history"""
201
+ analysis_result = {
202
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
203
+ "query": query,
204
+ "analysis_type": analysis_type,
205
+ "result": result,
206
+ "agents_used": agents_used if agents_used else "auto"
207
+ }
208
+
209
+ st.session_state.crewai_history.append(analysis_result)
210
+
211
+ def clear_chat_history():
212
+ """Pulisce cronologia chat"""
213
+ st.session_state.chat_history = []
214
+
215
+ def clear_crewai_history():
216
+ """Pulisce cronologia CrewAI"""
217
+ st.session_state.crewai_history = []
218
+
219
+ def get_system_stats() -> dict:
220
+ """Ritorna statistiche sistema"""
221
+ return {
222
+ 'uploaded_files': len(st.session_state.get('uploaded_files', {})),
223
+ 'anonymized_docs': len(st.session_state.get('anonymized_docs', {})),
224
+ 'confirmed_docs': get_confirmed_docs_count(),
225
+ 'processed_docs': len(st.session_state.get('processed_docs', {})),
226
+ 'chat_messages': len(st.session_state.get('chat_history', [])),
227
+ 'crewai_analyses': len(st.session_state.get('crewai_history', [])),
228
+ 'vector_store_ready': st.session_state.get('vector_store_built', False)
229
+ }