Spaces:
Sleeping
Sleeping
creazione dell'app
Browse files- requirements.txt +0 -0
- src/__init__.py +0 -0
- src/__pycache__/ai_processor.cpython-313.pyc +0 -0
- src/__pycache__/anonymizer.cpython-313.pyc +0 -0
- src/__pycache__/config.cpython-313.pyc +0 -0
- src/__pycache__/ui_components.cpython-313.pyc +0 -0
- src/__pycache__/utils.cpython-313.pyc +0 -0
- src/ai_processor.py +434 -0
- src/anonymizer.py +101 -0
- src/config.py +37 -0
- src/main.py +361 -0
- src/ui_components.py +243 -0
- src/utils.py +229 -0
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
src/__init__.py
ADDED
File without changes
|
src/__pycache__/ai_processor.cpython-313.pyc
ADDED
Binary file (17.7 kB). View file
|
|
src/__pycache__/anonymizer.cpython-313.pyc
ADDED
Binary file (5.09 kB). View file
|
|
src/__pycache__/config.cpython-313.pyc
ADDED
Binary file (1.69 kB). View file
|
|
src/__pycache__/ui_components.cpython-313.pyc
ADDED
Binary file (13.7 kB). View file
|
|
src/__pycache__/utils.cpython-313.pyc
ADDED
Binary file (12.2 kB). View file
|
|
src/ai_processor.py
ADDED
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Tutti i componenti AI: Azure, RAG e CrewAI.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import re
|
6 |
+
from typing import Dict, List
|
7 |
+
import streamlit as st
|
8 |
+
from openai import AzureOpenAI
|
9 |
+
|
10 |
+
# LangChain imports
|
11 |
+
from langchain_text_splitters import CharacterTextSplitter
|
12 |
+
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
|
13 |
+
from langchain_community.vectorstores import FAISS
|
14 |
+
from langchain.chains import RetrievalQA
|
15 |
+
from langchain_core.prompts import PromptTemplate
|
16 |
+
|
17 |
+
# CrewAI imports
|
18 |
+
from crewai import Agent, Task, Crew
|
19 |
+
from crewai.llm import LLM
|
20 |
+
|
21 |
+
from config import Config
|
22 |
+
|
23 |
+
class AzureProcessor:
|
24 |
+
"""Processore Azure OpenAI"""
|
25 |
+
|
26 |
+
def __init__(self):
|
27 |
+
self.client = None
|
28 |
+
self.setup_client()
|
29 |
+
|
30 |
+
def setup_client(self):
|
31 |
+
"""Setup client Azure"""
|
32 |
+
if Config.AZURE_API_KEY and Config.AZURE_ENDPOINT:
|
33 |
+
try:
|
34 |
+
self.client = AzureOpenAI(
|
35 |
+
api_key=Config.AZURE_API_KEY,
|
36 |
+
api_version=Config.AZURE_API_VERSION,
|
37 |
+
azure_endpoint=Config.AZURE_ENDPOINT
|
38 |
+
)
|
39 |
+
except Exception as e:
|
40 |
+
st.error(f"Errore Azure OpenAI: {e}")
|
41 |
+
self.client = None
|
42 |
+
else:
|
43 |
+
st.warning("Credenziali Azure OpenAI non trovate.")
|
44 |
+
|
45 |
+
def process_document(self, anonymized_text: str) -> str:
|
46 |
+
"""Processa documento con AI"""
|
47 |
+
if not self.client:
|
48 |
+
return "Azure OpenAI non configurato."
|
49 |
+
|
50 |
+
try:
|
51 |
+
messages = [
|
52 |
+
{
|
53 |
+
"role": "system",
|
54 |
+
"content": (
|
55 |
+
"Analizza il documento anonimizzato e fornisci:\n"
|
56 |
+
"1. Tipo di documento\n"
|
57 |
+
"2. Riepilogo (max 5 righe)\n"
|
58 |
+
"3. Analisi semantica (temi, sentiment)\n"
|
59 |
+
"4. Risposta suggerita se è comunicazione cliente\n"
|
60 |
+
"Usa solo i contenuti del documento fornito."
|
61 |
+
)
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"role": "user",
|
65 |
+
"content": f"Analizza questo documento:\n\n{anonymized_text}"
|
66 |
+
}
|
67 |
+
]
|
68 |
+
|
69 |
+
response = self.client.chat.completions.create(
|
70 |
+
model=Config.DEPLOYMENT_NAME,
|
71 |
+
messages=messages,
|
72 |
+
max_tokens=800,
|
73 |
+
temperature=0.7
|
74 |
+
)
|
75 |
+
|
76 |
+
return response.choices[0].message.content
|
77 |
+
|
78 |
+
except Exception as e:
|
79 |
+
return f"Errore analisi AI: {e}"
|
80 |
+
|
81 |
+
class RAGChatbot:
|
82 |
+
"""Chatbot RAG con LangChain"""
|
83 |
+
|
84 |
+
def __init__(self):
|
85 |
+
self.vector_store = None
|
86 |
+
self.qa_chain = None
|
87 |
+
self.embeddings = None
|
88 |
+
self.llm = None
|
89 |
+
self.setup_langchain_components()
|
90 |
+
|
91 |
+
def setup_langchain_components(self):
|
92 |
+
"""Setup componenti LangChain"""
|
93 |
+
if not (Config.AZURE_API_KEY and Config.AZURE_ENDPOINT and
|
94 |
+
Config.AZURE_EMBEDDING_API_KEY and Config.AZURE_EMBEDDING_ENDPOINT):
|
95 |
+
st.warning("Credenziali Azure incomplete. RAG non disponibile.")
|
96 |
+
return
|
97 |
+
|
98 |
+
try:
|
99 |
+
# Embeddings
|
100 |
+
self.embeddings = AzureOpenAIEmbeddings(
|
101 |
+
model=Config.AZURE_EMBEDDING_DEPLOYMENT_NAME,
|
102 |
+
api_version=Config.AZURE_API_VERSION,
|
103 |
+
azure_endpoint=Config.AZURE_EMBEDDING_ENDPOINT,
|
104 |
+
api_key=Config.AZURE_EMBEDDING_API_KEY,
|
105 |
+
chunk_size=16
|
106 |
+
)
|
107 |
+
|
108 |
+
# LLM
|
109 |
+
self.llm = AzureChatOpenAI(
|
110 |
+
deployment_name=Config.DEPLOYMENT_NAME,
|
111 |
+
azure_endpoint=Config.AZURE_ENDPOINT,
|
112 |
+
api_key=Config.AZURE_API_KEY,
|
113 |
+
api_version=Config.AZURE_API_VERSION,
|
114 |
+
temperature=0.2
|
115 |
+
)
|
116 |
+
except Exception as e:
|
117 |
+
st.error(f"Errore setup LangChain: {e}")
|
118 |
+
self.embeddings = None
|
119 |
+
self.llm = None
|
120 |
+
|
121 |
+
def build_vector_store(self, anonymized_docs: Dict[str, Dict]):
|
122 |
+
"""Costruisce vector store FAISS"""
|
123 |
+
if not self.embeddings or not self.llm:
|
124 |
+
st.error("Componenti LangChain non configurati.")
|
125 |
+
return
|
126 |
+
|
127 |
+
# Prepara testi per RAG
|
128 |
+
all_texts = []
|
129 |
+
for filename, doc_data in anonymized_docs.items():
|
130 |
+
if doc_data.get('confirmed', False):
|
131 |
+
all_texts.append(f"Documento {filename}:\n{doc_data['anonymized']}")
|
132 |
+
|
133 |
+
if not all_texts:
|
134 |
+
st.warning("Nessun documento confermato per RAG.")
|
135 |
+
return
|
136 |
+
|
137 |
+
with st.spinner("Creando vector store..."):
|
138 |
+
# Chunking
|
139 |
+
combined_text = "\n\n".join(all_texts)
|
140 |
+
text_splitter = CharacterTextSplitter(
|
141 |
+
separator="\n\n",
|
142 |
+
chunk_size=1000,
|
143 |
+
chunk_overlap=200,
|
144 |
+
length_function=len,
|
145 |
+
)
|
146 |
+
texts = text_splitter.split_text(combined_text)
|
147 |
+
|
148 |
+
# Crea FAISS index
|
149 |
+
self.vector_store = FAISS.from_texts(texts, self.embeddings)
|
150 |
+
st.success(f"Vector store con {len(texts)} chunks creato.")
|
151 |
+
|
152 |
+
# Setup QA chain
|
153 |
+
qa_prompt = """Usa il contesto per rispondere alla domanda.
|
154 |
+
Se non sai la risposta, dillo chiaramente.
|
155 |
+
|
156 |
+
{context}
|
157 |
+
|
158 |
+
Domanda: {question}
|
159 |
+
Risposta:"""
|
160 |
+
|
161 |
+
QA_PROMPT = PromptTemplate.from_template(qa_prompt)
|
162 |
+
|
163 |
+
self.qa_chain = RetrievalQA.from_chain_type(
|
164 |
+
llm=self.llm,
|
165 |
+
chain_type="stuff",
|
166 |
+
retriever=self.vector_store.as_retriever(),
|
167 |
+
return_source_documents=True,
|
168 |
+
chain_type_kwargs={"prompt": QA_PROMPT}
|
169 |
+
)
|
170 |
+
|
171 |
+
def answer_question(self, query: str) -> str:
|
172 |
+
"""Risponde usando RAG"""
|
173 |
+
if not self.qa_chain:
|
174 |
+
return "RAG non pronto. Costruisci prima il knowledge base."
|
175 |
+
|
176 |
+
try:
|
177 |
+
result = self.qa_chain.invoke({"query": query})
|
178 |
+
answer = result["result"]
|
179 |
+
|
180 |
+
# Aggiungi fonti se disponibili
|
181 |
+
source_docs = result.get("source_documents", [])
|
182 |
+
if source_docs:
|
183 |
+
answer += "\n\n**Fonti:**\n"
|
184 |
+
for i, doc in enumerate(source_docs):
|
185 |
+
match = re.search(r"Documento (.*?):\n", doc.page_content)
|
186 |
+
source_info = f" (da {match.group(1)})" if match else ""
|
187 |
+
answer += f"- ...{doc.page_content[-100:]}{source_info}\n"
|
188 |
+
|
189 |
+
return answer
|
190 |
+
except Exception as e:
|
191 |
+
return f"Errore RAG: {e}"
|
192 |
+
|
193 |
+
def get_relevant_context(self, query: str, max_docs: int = 3) -> str:
|
194 |
+
"""Estrae contesto rilevante per query"""
|
195 |
+
if not self.vector_store:
|
196 |
+
return ""
|
197 |
+
|
198 |
+
try:
|
199 |
+
docs = self.vector_store.similarity_search(query, k=max_docs)
|
200 |
+
context = "\n\n".join([doc.page_content for doc in docs])
|
201 |
+
return context
|
202 |
+
except Exception as e:
|
203 |
+
return f"Errore contesto: {e}"
|
204 |
+
|
205 |
+
class CrewAIManager:
|
206 |
+
"""Manager agenti CrewAI"""
|
207 |
+
|
208 |
+
def __init__(self, rag_chatbot: RAGChatbot):
|
209 |
+
self.rag_chatbot = rag_chatbot
|
210 |
+
self.agents = None
|
211 |
+
self.llm = None
|
212 |
+
self.setup_crew()
|
213 |
+
|
214 |
+
def setup_crew(self):
|
215 |
+
"""Setup agenti CrewAI"""
|
216 |
+
if not Config.AZURE_API_KEY:
|
217 |
+
st.warning("Azure non disponibile per CrewAI")
|
218 |
+
return
|
219 |
+
|
220 |
+
try:
|
221 |
+
# LLM per CrewAI
|
222 |
+
self.llm = LLM(
|
223 |
+
model=f"azure/{Config.DEPLOYMENT_NAME}",
|
224 |
+
api_key=Config.AZURE_API_KEY,
|
225 |
+
base_url=Config.AZURE_ENDPOINT,
|
226 |
+
api_version=Config.AZURE_API_VERSION
|
227 |
+
)
|
228 |
+
|
229 |
+
# Agenti
|
230 |
+
document_analyst = Agent(
|
231 |
+
role="Document Analyst",
|
232 |
+
goal="Analizzare documenti anonimizzati e fornire insights",
|
233 |
+
backstory="Esperto analista documenti con focus su privacy e compliance. "
|
234 |
+
"Lavori solo con documenti anonimizzati per proteggere i dati.",
|
235 |
+
llm=self.llm,
|
236 |
+
verbose=True,
|
237 |
+
allow_delegation=False,
|
238 |
+
max_iter=3
|
239 |
+
)
|
240 |
+
|
241 |
+
rag_specialist = Agent(
|
242 |
+
role="RAG Specialist",
|
243 |
+
goal="Rispondere a domande usando il sistema RAG",
|
244 |
+
backstory="Esperto in Information Retrieval e RAG systems. "
|
245 |
+
"Specializzato nel recupero di informazioni da documenti anonimizzati.",
|
246 |
+
llm=self.llm,
|
247 |
+
verbose=True,
|
248 |
+
allow_delegation=False,
|
249 |
+
max_iter=3
|
250 |
+
)
|
251 |
+
|
252 |
+
sentiment_analyst = Agent(
|
253 |
+
role="Sentiment Analyst",
|
254 |
+
goal="Analizzare sentiment e emozioni nei documenti",
|
255 |
+
backstory="Esperto in sentiment analysis e behavioral analytics. "
|
256 |
+
"Identifichi emozioni, trend e segnali nei documenti.",
|
257 |
+
llm=self.llm,
|
258 |
+
verbose=True,
|
259 |
+
allow_delegation=False,
|
260 |
+
max_iter=3
|
261 |
+
)
|
262 |
+
|
263 |
+
strategy_coordinator = Agent(
|
264 |
+
role="Strategy Coordinator",
|
265 |
+
goal="Coordinare analisi e fornire raccomandazioni strategiche",
|
266 |
+
backstory="Senior consultant con background in strategic management. "
|
267 |
+
"Traduci insights tecnici in raccomandazioni business concrete.",
|
268 |
+
llm=self.llm,
|
269 |
+
verbose=True,
|
270 |
+
allow_delegation=True,
|
271 |
+
max_iter=4
|
272 |
+
)
|
273 |
+
|
274 |
+
self.agents = {
|
275 |
+
'document_analyst': document_analyst,
|
276 |
+
'rag_specialist': rag_specialist,
|
277 |
+
'sentiment_analyst': sentiment_analyst,
|
278 |
+
'strategy_coordinator': strategy_coordinator
|
279 |
+
}
|
280 |
+
|
281 |
+
st.success("✅ Agenti CrewAI configurati")
|
282 |
+
|
283 |
+
except Exception as e:
|
284 |
+
st.error(f"Errore setup CrewAI: {e}")
|
285 |
+
self.agents = None
|
286 |
+
|
287 |
+
def create_analysis_task(self, query: str, analysis_type: str = "comprehensive") -> str:
|
288 |
+
"""Crea task di analisi per il crew"""
|
289 |
+
if not self.agents:
|
290 |
+
return "CrewAI non configurato"
|
291 |
+
|
292 |
+
try:
|
293 |
+
# Ottieni contesto dal RAG
|
294 |
+
context = self.rag_chatbot.get_relevant_context(query, max_docs=5)
|
295 |
+
|
296 |
+
tasks = []
|
297 |
+
|
298 |
+
if analysis_type in ["comprehensive", "document"]:
|
299 |
+
# Task analisi documentale
|
300 |
+
doc_task = Task(
|
301 |
+
description=f"""
|
302 |
+
Analizza documenti per: {query}
|
303 |
+
|
304 |
+
CONTESTO: {context}
|
305 |
+
|
306 |
+
Fornisci:
|
307 |
+
- Tipo e classificazione documenti
|
308 |
+
- Temi e argomenti principali
|
309 |
+
- Elementi rilevanti business
|
310 |
+
- Note compliance
|
311 |
+
""",
|
312 |
+
expected_output="Analisi strutturata con classificazione e insights",
|
313 |
+
agent=self.agents['document_analyst']
|
314 |
+
)
|
315 |
+
tasks.append(doc_task)
|
316 |
+
|
317 |
+
if analysis_type in ["comprehensive", "sentiment"]:
|
318 |
+
# Task sentiment
|
319 |
+
sentiment_task = Task(
|
320 |
+
description=f"""
|
321 |
+
Analizza sentiment per: {query}
|
322 |
+
|
323 |
+
CONTESTO: {context}
|
324 |
+
|
325 |
+
Valuta:
|
326 |
+
- Sentiment generale (scala 1-10)
|
327 |
+
- Emozioni prevalenti
|
328 |
+
- Trend comunicazioni
|
329 |
+
- Segnali rischio/opportunità
|
330 |
+
""",
|
331 |
+
expected_output="Analisi sentiment con valutazioni quantitative",
|
332 |
+
agent=self.agents['sentiment_analyst']
|
333 |
+
)
|
334 |
+
tasks.append(sentiment_task)
|
335 |
+
|
336 |
+
if analysis_type in ["comprehensive", "rag"]:
|
337 |
+
# Task RAG
|
338 |
+
rag_task = Task(
|
339 |
+
description=f"""
|
340 |
+
Rispondi usando RAG: {query}
|
341 |
+
|
342 |
+
CONTESTO: {context}
|
343 |
+
|
344 |
+
Includi:
|
345 |
+
- Risposta diretta
|
346 |
+
- Evidenze documenti
|
347 |
+
- Correlazioni trovate
|
348 |
+
- Informazioni mancanti
|
349 |
+
- Suggerimenti approfondimento
|
350 |
+
""",
|
351 |
+
expected_output="Risposta RAG con evidenze",
|
352 |
+
agent=self.agents['rag_specialist']
|
353 |
+
)
|
354 |
+
tasks.append(rag_task)
|
355 |
+
|
356 |
+
# Task coordinamento (sempre incluso)
|
357 |
+
coord_task = Task(
|
358 |
+
description=f"""
|
359 |
+
Sintetizza risultati per: {query}
|
360 |
+
|
361 |
+
Crea sintesi con:
|
362 |
+
- Executive Summary (3 punti)
|
363 |
+
- Insights strategici
|
364 |
+
- Raccomandazioni prioritarie
|
365 |
+
- Next steps concreti
|
366 |
+
- Valutazione rischi
|
367 |
+
|
368 |
+
Output executive-ready e actionable.
|
369 |
+
""",
|
370 |
+
expected_output="Sintesi strategica con raccomandazioni",
|
371 |
+
agent=self.agents['strategy_coordinator']
|
372 |
+
)
|
373 |
+
tasks.append(coord_task)
|
374 |
+
|
375 |
+
# Crea crew
|
376 |
+
crew = Crew(
|
377 |
+
agents=list(self.agents.values()),
|
378 |
+
tasks=tasks,
|
379 |
+
verbose=True
|
380 |
+
)
|
381 |
+
|
382 |
+
with st.spinner(f"Eseguendo analisi {analysis_type}..."):
|
383 |
+
result = crew.kickoff()
|
384 |
+
|
385 |
+
return str(result)
|
386 |
+
|
387 |
+
except Exception as e:
|
388 |
+
return f"Errore CrewAI: {e}"
|
389 |
+
|
390 |
+
def create_custom_task(self, query: str, selected_agents: List[str], custom_instructions: str = "") -> str:
|
391 |
+
"""Task personalizzate con agenti specifici"""
|
392 |
+
if not self.agents:
|
393 |
+
return "CrewAI non configurato"
|
394 |
+
|
395 |
+
try:
|
396 |
+
context = self.rag_chatbot.get_relevant_context(query, max_docs=5)
|
397 |
+
|
398 |
+
tasks = []
|
399 |
+
agents_to_use = []
|
400 |
+
|
401 |
+
for agent_key in selected_agents:
|
402 |
+
if agent_key in self.agents:
|
403 |
+
agents_to_use.append(self.agents[agent_key])
|
404 |
+
|
405 |
+
task = Task(
|
406 |
+
description=f"""
|
407 |
+
{custom_instructions if custom_instructions else f'Analizza secondo il ruolo di {agent_key}'}
|
408 |
+
|
409 |
+
QUERY: {query}
|
410 |
+
CONTESTO: {context}
|
411 |
+
|
412 |
+
Fornisci analisi specializzata secondo il tuo ruolo.
|
413 |
+
""",
|
414 |
+
expected_output=f"Analisi specializzata da {agent_key}",
|
415 |
+
agent=self.agents[agent_key]
|
416 |
+
)
|
417 |
+
tasks.append(task)
|
418 |
+
|
419 |
+
if not tasks:
|
420 |
+
return "Nessun agente valido selezionato"
|
421 |
+
|
422 |
+
crew = Crew(
|
423 |
+
agents=agents_to_use,
|
424 |
+
tasks=tasks,
|
425 |
+
verbose=True
|
426 |
+
)
|
427 |
+
|
428 |
+
with st.spinner(f"Eseguendo task con {len(agents_to_use)} agenti..."):
|
429 |
+
result = crew.kickoff()
|
430 |
+
|
431 |
+
return str(result)
|
432 |
+
|
433 |
+
except Exception as e:
|
434 |
+
return f"Errore task personalizzato: {e}"
|
src/anonymizer.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Sistema di anonimizzazione con NER e regex.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import re
|
6 |
+
from typing import Dict, Tuple
|
7 |
+
from transformers import pipeline
|
8 |
+
import streamlit as st
|
9 |
+
from config import Config, REGEX_PATTERNS
|
10 |
+
|
11 |
+
class NERAnonimizer:
|
12 |
+
"""Anonimizzatore con NER e regex"""
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
self.regex_patterns = REGEX_PATTERNS
|
16 |
+
self._ner_pipe = None
|
17 |
+
|
18 |
+
@property
|
19 |
+
def ner_pipe(self):
|
20 |
+
"""Lazy loading del modello NER"""
|
21 |
+
if self._ner_pipe is None:
|
22 |
+
with st.spinner("Caricamento modello NER..."):
|
23 |
+
try:
|
24 |
+
self._ner_pipe = pipeline(
|
25 |
+
"ner",
|
26 |
+
model=Config.NER_MODEL,
|
27 |
+
aggregation_strategy="simple"
|
28 |
+
)
|
29 |
+
except Exception as e:
|
30 |
+
st.error(f"Errore caricamento NER: {e}")
|
31 |
+
return None
|
32 |
+
return self._ner_pipe
|
33 |
+
|
34 |
+
def mask_with_regex(self, text: str) -> Tuple[str, Dict]:
|
35 |
+
"""Applica mascheramento con regex"""
|
36 |
+
masked_text = text
|
37 |
+
found_entities = {}
|
38 |
+
|
39 |
+
# Ordina pattern per lunghezza (più lunghi prima)
|
40 |
+
sorted_patterns = sorted(
|
41 |
+
self.regex_patterns.items(),
|
42 |
+
key=lambda item: len(item[1]),
|
43 |
+
reverse=True
|
44 |
+
)
|
45 |
+
|
46 |
+
for label, pattern in sorted_patterns:
|
47 |
+
matches = list(re.finditer(pattern, masked_text, flags=re.IGNORECASE))
|
48 |
+
for match in reversed(matches):
|
49 |
+
original = match.group()
|
50 |
+
if original.startswith('[') and original.endswith(']'):
|
51 |
+
continue
|
52 |
+
|
53 |
+
placeholder = f"[{label}_{len(found_entities)}]"
|
54 |
+
found_entities[placeholder] = original
|
55 |
+
masked_text = masked_text[:match.start()] + placeholder + masked_text[match.end():]
|
56 |
+
|
57 |
+
return masked_text, found_entities
|
58 |
+
|
59 |
+
def mask_with_ner(self, text: str) -> Tuple[str, Dict]:
|
60 |
+
"""Applica mascheramento con NER"""
|
61 |
+
if not self.ner_pipe:
|
62 |
+
return text, {}
|
63 |
+
|
64 |
+
try:
|
65 |
+
entities = self.ner_pipe(text)
|
66 |
+
entity_map = {}
|
67 |
+
|
68 |
+
sorted_entities = sorted(entities, key=lambda x: x['start'], reverse=True)
|
69 |
+
|
70 |
+
for ent in sorted_entities:
|
71 |
+
if ent['score'] > 0.5:
|
72 |
+
label = ent['entity_group']
|
73 |
+
original_text = text[ent['start']:ent['end']]
|
74 |
+
|
75 |
+
if original_text.startswith('[') and original_text.endswith(']'):
|
76 |
+
continue
|
77 |
+
|
78 |
+
placeholder = f"[{label}_{len(entity_map)}]"
|
79 |
+
entity_map[placeholder] = original_text
|
80 |
+
|
81 |
+
text = text[:ent['start']] + placeholder + text[ent['end']:]
|
82 |
+
|
83 |
+
return text, entity_map
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
st.error(f"Errore NER: {e}")
|
87 |
+
return text, {}
|
88 |
+
|
89 |
+
def anonymize(self, text: str) -> Tuple[str, Dict]:
|
90 |
+
"""Pipeline completa di anonimizzazione"""
|
91 |
+
if not text or not text.strip():
|
92 |
+
return text, {}
|
93 |
+
|
94 |
+
# Regex prima, poi NER
|
95 |
+
masked_text, regex_entities = self.mask_with_regex(text)
|
96 |
+
final_text, ner_entities = self.mask_with_ner(masked_text)
|
97 |
+
|
98 |
+
# Combina entità
|
99 |
+
all_entities = {**regex_entities, **ner_entities}
|
100 |
+
|
101 |
+
return final_text, all_entities
|
src/config.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Configurazioni per il sistema di anonimizzazione documenti.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Carica variabili d'ambiente
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
class Config:
|
12 |
+
"""Configurazione del sistema"""
|
13 |
+
|
14 |
+
# Modelli AI
|
15 |
+
NER_MODEL = "Davlan/bert-base-multilingual-cased-ner-hrl"
|
16 |
+
|
17 |
+
# Azure OpenAI
|
18 |
+
AZURE_ENDPOINT = os.getenv("AZURE_ENDPOINT")
|
19 |
+
AZURE_API_KEY = os.getenv("AZURE_API_KEY")
|
20 |
+
AZURE_EMBEDDING_ENDPOINT = os.getenv("AZURE_ENDPOINT_EMB", os.getenv("AZURE_ENDPOINT"))
|
21 |
+
AZURE_EMBEDDING_API_KEY = os.getenv("AZURE_API_KEY_EMB", os.getenv("AZURE_API_KEY"))
|
22 |
+
AZURE_API_VERSION = "2024-02-01"
|
23 |
+
DEPLOYMENT_NAME = "gpt-4o"
|
24 |
+
AZURE_EMBEDDING_DEPLOYMENT_NAME = "text-embedding-ada-002"
|
25 |
+
|
26 |
+
# Pattern regex per entità sensibili
|
27 |
+
REGEX_PATTERNS = {
|
28 |
+
"IBAN": r'\bIT\d{2}(?: ?[A-Z0-9]){11,30}\b',
|
29 |
+
"EMAIL": r'\b[\w\.-]+@[\w\.-]+\.\w{2,}\b',
|
30 |
+
"CF": r'\b[A-Z]{6}[0-9]{2}[A-Z][0-9]{2}[A-Z][0-9]{3}[A-Z]\b',
|
31 |
+
"CARD": r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b',
|
32 |
+
"PHONE": r'\b\+?[0-9\s\-\(\)]{8,15}\b'
|
33 |
+
}
|
34 |
+
|
35 |
+
# Configura OPENAI_API_KEY per compatibilità
|
36 |
+
if Config.AZURE_API_KEY:
|
37 |
+
os.environ["OPENAI_API_KEY"] = Config.AZURE_API_KEY
|
src/main.py
ADDED
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
App principale Streamlit per l'anonimizzazione documenti.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import streamlit as st
|
6 |
+
import json
|
7 |
+
import pandas as pd
|
8 |
+
from ui_components import (
|
9 |
+
setup_page_config, display_sidebar, display_entity_editor,
|
10 |
+
display_file_preview, display_analysis_results, display_crewai_result,
|
11 |
+
display_progress_metrics, display_examples_section, create_download_button
|
12 |
+
)
|
13 |
+
from utils import (
|
14 |
+
init_session_state, process_uploaded_files, run_anonymization,
|
15 |
+
run_ai_analysis, build_rag_knowledge_base, export_results_json,
|
16 |
+
get_confirmed_docs_count, reset_document_state, add_chat_message,
|
17 |
+
add_crewai_result, clear_crewai_history
|
18 |
+
)
|
19 |
+
|
20 |
+
def main():
|
21 |
+
"""Funzione principale dell'app"""
|
22 |
+
|
23 |
+
# Setup
|
24 |
+
setup_page_config()
|
25 |
+
init_session_state()
|
26 |
+
|
27 |
+
# Header
|
28 |
+
st.title("🔒 Anonimizzatore Documenti con NER, RAG e CrewAI")
|
29 |
+
st.markdown("---")
|
30 |
+
|
31 |
+
# Sidebar
|
32 |
+
display_sidebar()
|
33 |
+
|
34 |
+
# Main tabs
|
35 |
+
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
36 |
+
"📤 Upload",
|
37 |
+
"🔍 Anonimizzazione",
|
38 |
+
"📊 Analisi",
|
39 |
+
"💬 Chatbot RAG",
|
40 |
+
"🤖 CrewAI"
|
41 |
+
])
|
42 |
+
|
43 |
+
# TAB 1: Upload
|
44 |
+
with tab1:
|
45 |
+
upload_tab()
|
46 |
+
|
47 |
+
# TAB 2: Anonimizzazione
|
48 |
+
with tab2:
|
49 |
+
anonymization_tab()
|
50 |
+
|
51 |
+
# TAB 3: Analisi
|
52 |
+
with tab3:
|
53 |
+
analysis_tab()
|
54 |
+
|
55 |
+
# TAB 4: RAG
|
56 |
+
with tab4:
|
57 |
+
rag_tab()
|
58 |
+
|
59 |
+
# TAB 5: CrewAI
|
60 |
+
with tab5:
|
61 |
+
crewai_tab()
|
62 |
+
|
63 |
+
def upload_tab():
|
64 |
+
"""Tab per upload file"""
|
65 |
+
st.header("📤 Carica Documenti")
|
66 |
+
|
67 |
+
uploaded_files = st.file_uploader(
|
68 |
+
"Carica uno o più file .txt",
|
69 |
+
type=['txt'],
|
70 |
+
accept_multiple_files=True,
|
71 |
+
help="Seleziona i file di testo da anonimizzare"
|
72 |
+
)
|
73 |
+
|
74 |
+
if uploaded_files:
|
75 |
+
if process_uploaded_files(uploaded_files):
|
76 |
+
st.success(f"Caricati {len(uploaded_files)} file")
|
77 |
+
st.rerun()
|
78 |
+
else:
|
79 |
+
st.info("Nessun nuovo file caricato.")
|
80 |
+
|
81 |
+
# Mostra anteprima
|
82 |
+
st.subheader("📄 File caricati")
|
83 |
+
for filename, file_data in st.session_state.uploaded_files.items():
|
84 |
+
display_file_preview(filename, file_data['content'])
|
85 |
+
|
86 |
+
def anonymization_tab():
|
87 |
+
"""Tab per anonimizzazione"""
|
88 |
+
st.header("🔍 Anonimizzazione e Revisione")
|
89 |
+
|
90 |
+
if not st.session_state.uploaded_files:
|
91 |
+
st.warning("⚠️ Carica prima alcuni documenti nella tab 'Upload'")
|
92 |
+
return
|
93 |
+
|
94 |
+
# Bottone anonimizzazione
|
95 |
+
if st.button("🚀 Avvia Anonimizzazione", type="primary"):
|
96 |
+
run_anonymization()
|
97 |
+
st.rerun()
|
98 |
+
|
99 |
+
# Mostra documenti anonimizzati
|
100 |
+
if st.session_state.anonymized_docs:
|
101 |
+
st.subheader("📝 Revisiona Documenti Anonimizzati")
|
102 |
+
|
103 |
+
for filename, doc_data in st.session_state.anonymized_docs.items():
|
104 |
+
with st.expander(
|
105 |
+
f"📄 {filename} {'✅' if doc_data['confirmed'] else '⏳'}",
|
106 |
+
expanded=not doc_data['confirmed']
|
107 |
+
):
|
108 |
+
|
109 |
+
col1, col2 = st.columns(2)
|
110 |
+
|
111 |
+
# Testo originale
|
112 |
+
with col1:
|
113 |
+
st.write("**Testo Originale:**")
|
114 |
+
preview = doc_data['original'][:300]
|
115 |
+
if len(doc_data['original']) > 300:
|
116 |
+
preview += "..."
|
117 |
+
|
118 |
+
st.text_area(
|
119 |
+
"Originale",
|
120 |
+
value=preview,
|
121 |
+
height=200,
|
122 |
+
disabled=True,
|
123 |
+
key=f"orig_{filename}",
|
124 |
+
label_visibility="collapsed"
|
125 |
+
)
|
126 |
+
|
127 |
+
# Testo anonimizzato
|
128 |
+
with col2:
|
129 |
+
st.write("**Testo Anonimizzato:**")
|
130 |
+
edited_text = st.text_area(
|
131 |
+
"Anonimizzato (modificabile)",
|
132 |
+
value=doc_data['anonymized'],
|
133 |
+
height=200,
|
134 |
+
key=f"anon_{filename}",
|
135 |
+
label_visibility="collapsed"
|
136 |
+
)
|
137 |
+
|
138 |
+
# Aggiorna se modificato
|
139 |
+
if edited_text != doc_data['anonymized']:
|
140 |
+
st.session_state.anonymized_docs[filename]['anonymized'] = edited_text
|
141 |
+
|
142 |
+
# Editor entità
|
143 |
+
updated_entities = display_entity_editor(dict(doc_data['entities']), filename)
|
144 |
+
|
145 |
+
# Bottoni azione
|
146 |
+
col_confirm, col_reset = st.columns(2)
|
147 |
+
|
148 |
+
with col_confirm:
|
149 |
+
if st.button(f"✅ Conferma {filename}", key=f"confirm_{filename}"):
|
150 |
+
st.session_state.anonymized_docs[filename]['confirmed'] = True
|
151 |
+
st.session_state.anonymized_docs[filename]['entities'] = updated_entities
|
152 |
+
st.success(f"✅ {filename} confermato!")
|
153 |
+
st.session_state.vector_store_built = False
|
154 |
+
st.rerun()
|
155 |
+
|
156 |
+
with col_reset:
|
157 |
+
if st.button(f"🔄 Reset {filename}", key=f"reset_{filename}"):
|
158 |
+
reset_document_state(filename)
|
159 |
+
st.rerun()
|
160 |
+
|
161 |
+
# Statistiche progresso
|
162 |
+
display_progress_metrics()
|
163 |
+
|
164 |
+
def analysis_tab():
|
165 |
+
"""Tab per analisi AI"""
|
166 |
+
st.header("📊 Analisi AI")
|
167 |
+
|
168 |
+
confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
|
169 |
+
if v.get('confirmed', False)}
|
170 |
+
|
171 |
+
if not confirmed_docs:
|
172 |
+
st.warning("⚠️ Conferma prima alcuni documenti anonimizzati")
|
173 |
+
return
|
174 |
+
|
175 |
+
st.write(f"Documenti confermati pronti: **{len(confirmed_docs)}**")
|
176 |
+
|
177 |
+
if st.button("🤖 Avvia Analisi AI", type="primary"):
|
178 |
+
run_ai_analysis()
|
179 |
+
|
180 |
+
# Mostra risultati
|
181 |
+
if st.session_state.processed_docs:
|
182 |
+
st.subheader("📋 Risultati Analisi")
|
183 |
+
|
184 |
+
for filename, result in st.session_state.processed_docs.items():
|
185 |
+
display_analysis_results(filename, result)
|
186 |
+
|
187 |
+
# Download JSON
|
188 |
+
result_json = export_results_json({
|
189 |
+
'filename': filename,
|
190 |
+
'anonymized_text': result['anonymized_text'],
|
191 |
+
'analysis': result['analysis'],
|
192 |
+
'entities': result['entities'],
|
193 |
+
'entities_count': result['entities_count']
|
194 |
+
}, f"analisi_{filename}")
|
195 |
+
|
196 |
+
create_download_button(
|
197 |
+
result_json,
|
198 |
+
f"analisi_{filename}.json",
|
199 |
+
f"💾 Scarica {filename}",
|
200 |
+
f"download_{filename}"
|
201 |
+
)
|
202 |
+
|
203 |
+
def rag_tab():
|
204 |
+
"""Tab per RAG chatbot"""
|
205 |
+
st.header("💬 Chatta con i Documenti")
|
206 |
+
|
207 |
+
confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
|
208 |
+
if v.get('confirmed', False)}
|
209 |
+
|
210 |
+
if not confirmed_docs:
|
211 |
+
st.warning("⚠️ Carica e conferma documenti per abilitare il chatbot")
|
212 |
+
return
|
213 |
+
|
214 |
+
# Costruisci knowledge base
|
215 |
+
if build_rag_knowledge_base():
|
216 |
+
st.info(f"Chatbot pronto per {len(confirmed_docs)} documenti")
|
217 |
+
|
218 |
+
# Mostra cronologia chat
|
219 |
+
for message in st.session_state.chat_history:
|
220 |
+
with st.chat_message(message["role"]):
|
221 |
+
st.markdown(message["content"])
|
222 |
+
|
223 |
+
# Input utente
|
224 |
+
if prompt := st.chat_input("Fai una domanda sui documenti..."):
|
225 |
+
# Aggiungi messaggio utente
|
226 |
+
add_chat_message("user", prompt)
|
227 |
+
with st.chat_message("user"):
|
228 |
+
st.markdown(prompt)
|
229 |
+
|
230 |
+
# Genera risposta
|
231 |
+
with st.chat_message("assistant"):
|
232 |
+
with st.spinner("Generando risposta..."):
|
233 |
+
response = st.session_state.rag_chatbot.answer_question(prompt)
|
234 |
+
st.markdown(response)
|
235 |
+
|
236 |
+
# Aggiungi risposta
|
237 |
+
add_chat_message("assistant", response)
|
238 |
+
else:
|
239 |
+
st.error("Impossibile costruire knowledge base. Verifica configurazione Azure.")
|
240 |
+
|
241 |
+
def crewai_tab():
|
242 |
+
"""Tab per CrewAI"""
|
243 |
+
st.header("🤖 Analisi Multi-Agente CrewAI")
|
244 |
+
|
245 |
+
confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
|
246 |
+
if v.get('confirmed', False)}
|
247 |
+
|
248 |
+
if not confirmed_docs:
|
249 |
+
st.warning("⚠️ Conferma documenti per abilitare CrewAI")
|
250 |
+
return
|
251 |
+
|
252 |
+
if not st.session_state.crewai_manager.agents:
|
253 |
+
st.error("❌ CrewAI non configurato. Verifica Azure OpenAI.")
|
254 |
+
return
|
255 |
+
|
256 |
+
# Assicura knowledge base
|
257 |
+
build_rag_knowledge_base()
|
258 |
+
|
259 |
+
st.success(f"🎯 CrewAI pronto per {len(confirmed_docs)} documenti")
|
260 |
+
|
261 |
+
# Configurazione analisi
|
262 |
+
st.subheader("⚙️ Configurazione Analisi")
|
263 |
+
|
264 |
+
col1, col2 = st.columns(2)
|
265 |
+
|
266 |
+
with col1:
|
267 |
+
analysis_type = st.selectbox(
|
268 |
+
"Tipo di Analisi",
|
269 |
+
options=["comprehensive", "document", "sentiment", "rag", "custom"],
|
270 |
+
format_func=lambda x: {
|
271 |
+
"comprehensive": "🔍 Analisi Comprensiva",
|
272 |
+
"document": "📄 Analisi Documentale",
|
273 |
+
"sentiment": "😊 Sentiment Analysis",
|
274 |
+
"rag": "🔍 Query RAG Avanzata",
|
275 |
+
"custom": "⚙️ Personalizzata"
|
276 |
+
}[x]
|
277 |
+
)
|
278 |
+
|
279 |
+
with col2:
|
280 |
+
if analysis_type == "custom":
|
281 |
+
selected_agents = st.multiselect(
|
282 |
+
"Agenti da utilizzare",
|
283 |
+
options=list(st.session_state.crewai_manager.agents.keys()),
|
284 |
+
default=["strategy_coordinator"],
|
285 |
+
format_func=lambda x: {
|
286 |
+
"document_analyst": "📄 Document Analyst",
|
287 |
+
"rag_specialist": "🔍 RAG Specialist",
|
288 |
+
"strategy_coordinator": "🎯 Strategy Coordinator",
|
289 |
+
"sentiment_analyst": "😊 Sentiment Analyst"
|
290 |
+
}.get(x, x)
|
291 |
+
)
|
292 |
+
else:
|
293 |
+
selected_agents = []
|
294 |
+
|
295 |
+
# Query input
|
296 |
+
st.subheader("❓ Query per l'Analisi")
|
297 |
+
query_input = st.text_area(
|
298 |
+
"Inserisci la tua domanda:",
|
299 |
+
placeholder="Es: Analizza i temi principali e identifica rischi operativi...",
|
300 |
+
height=100
|
301 |
+
)
|
302 |
+
|
303 |
+
# Istruzioni personalizzate
|
304 |
+
if analysis_type == "custom":
|
305 |
+
custom_instructions = st.text_area(
|
306 |
+
"Istruzioni Personalizzate:",
|
307 |
+
placeholder="Istruzioni specifiche per gli agenti...",
|
308 |
+
height=80
|
309 |
+
)
|
310 |
+
else:
|
311 |
+
custom_instructions = ""
|
312 |
+
|
313 |
+
# Bottoni
|
314 |
+
col_analyze, col_clear = st.columns(2)
|
315 |
+
|
316 |
+
with col_analyze:
|
317 |
+
if st.button("🚀 Avvia Analisi CrewAI", type="primary", disabled=not query_input.strip()):
|
318 |
+
if analysis_type == "custom" and not selected_agents:
|
319 |
+
st.error("Seleziona almeno un agente")
|
320 |
+
else:
|
321 |
+
# Esegui analisi
|
322 |
+
if analysis_type == "custom":
|
323 |
+
result = st.session_state.crewai_manager.create_custom_task(
|
324 |
+
query_input, selected_agents, custom_instructions
|
325 |
+
)
|
326 |
+
else:
|
327 |
+
result = st.session_state.crewai_manager.create_analysis_task(
|
328 |
+
query_input, analysis_type
|
329 |
+
)
|
330 |
+
|
331 |
+
# Salva risultato
|
332 |
+
add_crewai_result(query_input, analysis_type, result, selected_agents)
|
333 |
+
st.success("✅ Analisi CrewAI completata!")
|
334 |
+
|
335 |
+
with col_clear:
|
336 |
+
if st.button("🗑️ Pulisci Cronologia"):
|
337 |
+
clear_crewai_history()
|
338 |
+
st.success("Cronologia pulita!")
|
339 |
+
st.rerun()
|
340 |
+
|
341 |
+
# Mostra risultati
|
342 |
+
if st.session_state.crewai_history:
|
343 |
+
st.subheader("📋 Risultati Analisi CrewAI")
|
344 |
+
|
345 |
+
for i, analysis in enumerate(reversed(st.session_state.crewai_history)):
|
346 |
+
display_crewai_result(analysis, len(st.session_state.crewai_history) - i)
|
347 |
+
|
348 |
+
# Download
|
349 |
+
result_json = export_results_json(analysis, f"crewai_analysis_{i}")
|
350 |
+
create_download_button(
|
351 |
+
result_json,
|
352 |
+
f"crewai_analysis_{analysis['timestamp'].replace(':', '-').replace(' ', '_')}.json",
|
353 |
+
"💾 Scarica Risultato",
|
354 |
+
f"download_crewai_{i}"
|
355 |
+
)
|
356 |
+
|
357 |
+
# Esempi
|
358 |
+
display_examples_section()
|
359 |
+
|
360 |
+
if __name__ == "__main__":
|
361 |
+
main()
|
src/ui_components.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Componenti UI riutilizzabili per Streamlit.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import streamlit as st
|
6 |
+
import pandas as pd
|
7 |
+
from typing import Dict
|
8 |
+
from config import Config
|
9 |
+
|
10 |
+
def setup_page_config():
|
11 |
+
"""Configura la pagina Streamlit"""
|
12 |
+
st.set_page_config(
|
13 |
+
page_title="Anonimizzatore Documenti",
|
14 |
+
page_icon="🔒",
|
15 |
+
layout="wide"
|
16 |
+
)
|
17 |
+
|
18 |
+
def display_sidebar():
|
19 |
+
"""Mostra sidebar con configurazioni"""
|
20 |
+
with st.sidebar:
|
21 |
+
st.header("⚙️ Configurazione")
|
22 |
+
|
23 |
+
# Status Azure
|
24 |
+
if Config.AZURE_API_KEY and Config.AZURE_ENDPOINT:
|
25 |
+
st.success("✅ Azure OpenAI configurato")
|
26 |
+
st.info(f"Chat Model: {Config.DEPLOYMENT_NAME}")
|
27 |
+
st.info(f"Embedding Model: {Config.AZURE_EMBEDDING_DEPLOYMENT_NAME}")
|
28 |
+
else:
|
29 |
+
st.error("❌ Azure OpenAI non configurato")
|
30 |
+
st.write("Configura le variabili d'ambiente:")
|
31 |
+
st.code("""
|
32 |
+
AZURE_ENDPOINT=your_endpoint
|
33 |
+
AZURE_API_KEY=your_api_key
|
34 |
+
AZURE_ENDPOINT_EMB=your_embedding_endpoint
|
35 |
+
AZURE_API_KEY_EMB=your_embedding_api_key
|
36 |
+
""")
|
37 |
+
|
38 |
+
st.markdown("---")
|
39 |
+
|
40 |
+
# Statistiche documenti
|
41 |
+
if 'uploaded_files' in st.session_state and st.session_state.uploaded_files:
|
42 |
+
st.subheader("📊 Statistiche")
|
43 |
+
uploaded_count = len(st.session_state.uploaded_files)
|
44 |
+
anonymized_count = len(st.session_state.get('anonymized_docs', {}))
|
45 |
+
confirmed_count = sum(1 for doc in st.session_state.get('anonymized_docs', {}).values()
|
46 |
+
if doc.get('confirmed', False))
|
47 |
+
|
48 |
+
st.metric("File caricati", uploaded_count)
|
49 |
+
st.metric("Anonimizzati", anonymized_count)
|
50 |
+
st.metric("Confermati", confirmed_count)
|
51 |
+
|
52 |
+
if confirmed_count > 0:
|
53 |
+
if st.session_state.get('vector_store_built', False):
|
54 |
+
st.success("✅ Knowledge Base pronto")
|
55 |
+
else:
|
56 |
+
st.info("🔄 Knowledge Base da costruire")
|
57 |
+
|
58 |
+
st.markdown("---")
|
59 |
+
|
60 |
+
# Reset button
|
61 |
+
if st.button("🔄 Reset sessione"):
|
62 |
+
for key in list(st.session_state.keys()):
|
63 |
+
del st.session_state[key]
|
64 |
+
st.rerun()
|
65 |
+
|
66 |
+
def display_entity_editor(entities: Dict, doc_key: str):
|
67 |
+
"""Editor per entità rilevate"""
|
68 |
+
if not entities:
|
69 |
+
st.info("Nessuna entità sensibile rilevata.")
|
70 |
+
return entities
|
71 |
+
|
72 |
+
st.subheader("🔍 Entità rilevate")
|
73 |
+
st.write("Verifica e modifica le entità sensibili:")
|
74 |
+
|
75 |
+
current_entities_list = list(entities.items())
|
76 |
+
updated_entities_dict = {}
|
77 |
+
deleted_placeholders = set()
|
78 |
+
|
79 |
+
for i, (placeholder, original_value) in enumerate(current_entities_list):
|
80 |
+
col1, col2, col3 = st.columns([2, 3, 1])
|
81 |
+
|
82 |
+
with col1:
|
83 |
+
st.write(f"**{placeholder}**")
|
84 |
+
|
85 |
+
with col2:
|
86 |
+
new_value = st.text_input(
|
87 |
+
"Valore originale",
|
88 |
+
value=original_value,
|
89 |
+
key=f"{doc_key}_{placeholder}_value_{i}"
|
90 |
+
)
|
91 |
+
updated_entities_dict[placeholder] = new_value
|
92 |
+
|
93 |
+
with col3:
|
94 |
+
if st.button("🗑️", key=f"{doc_key}_{placeholder}_delete_{i}", help="Rimuovi"):
|
95 |
+
deleted_placeholders.add(placeholder)
|
96 |
+
|
97 |
+
# Gestisci cancellazioni
|
98 |
+
if deleted_placeholders:
|
99 |
+
final_entities = {k: v for k, v in updated_entities_dict.items()
|
100 |
+
if k not in deleted_placeholders}
|
101 |
+
st.session_state.anonymized_docs[doc_key]['entities'] = final_entities
|
102 |
+
|
103 |
+
# Re-anonimizza testo
|
104 |
+
from anonymizer import NERAnonimizer
|
105 |
+
anonymizer = NERAnonimizer()
|
106 |
+
st.session_state.anonymized_docs[doc_key]['anonymized'], _ = anonymizer.anonymize(
|
107 |
+
st.session_state.anonymized_docs[doc_key]['original']
|
108 |
+
)
|
109 |
+
st.session_state.vector_store_built = False
|
110 |
+
st.rerun()
|
111 |
+
|
112 |
+
return updated_entities_dict
|
113 |
+
|
114 |
+
def display_file_preview(filename: str, content: str, max_chars: int = 500):
|
115 |
+
"""Mostra anteprima file"""
|
116 |
+
with st.expander(f"📄 {filename} ({len(content)} caratteri)"):
|
117 |
+
preview_text = content[:max_chars]
|
118 |
+
if len(content) > max_chars:
|
119 |
+
preview_text += "..."
|
120 |
+
|
121 |
+
st.text_area(
|
122 |
+
"Contenuto",
|
123 |
+
value=preview_text,
|
124 |
+
height=150,
|
125 |
+
disabled=True,
|
126 |
+
key=f"preview_{filename}",
|
127 |
+
label_visibility="collapsed"
|
128 |
+
)
|
129 |
+
|
130 |
+
def display_analysis_results(filename: str, result: Dict):
|
131 |
+
"""Mostra risultati analisi"""
|
132 |
+
with st.expander(f"📊 Analisi: {filename}"):
|
133 |
+
# Metriche
|
134 |
+
col1, col2, col3 = st.columns(3)
|
135 |
+
col1.metric("Caratteri testo", len(result['anonymized_text']))
|
136 |
+
col2.metric("Entità trovate", result['entities_count'])
|
137 |
+
col3.metric("Stato", "✅ Completato")
|
138 |
+
|
139 |
+
# Testo anonimizzato
|
140 |
+
st.subheader("📄 Testo Anonimizzato")
|
141 |
+
st.text_area(
|
142 |
+
"Testo processato",
|
143 |
+
value=result['anonymized_text'],
|
144 |
+
height=150,
|
145 |
+
disabled=True,
|
146 |
+
key=f"analysis_text_{filename}"
|
147 |
+
)
|
148 |
+
|
149 |
+
# Analisi AI
|
150 |
+
st.subheader("🤖 Analisi AI")
|
151 |
+
st.markdown(result['analysis'])
|
152 |
+
|
153 |
+
# Entità
|
154 |
+
if result['entities']:
|
155 |
+
st.subheader("🔍 Entità Anonimizzate")
|
156 |
+
entities_df = pd.DataFrame([
|
157 |
+
{
|
158 |
+
'Placeholder': k,
|
159 |
+
'Valore Originale': v,
|
160 |
+
'Tipo': k.split('_')[0].replace('[', '')
|
161 |
+
}
|
162 |
+
for k, v in result['entities'].items()
|
163 |
+
])
|
164 |
+
st.dataframe(entities_df, use_container_width=True)
|
165 |
+
|
166 |
+
def display_crewai_result(analysis: Dict, index: int):
|
167 |
+
"""Mostra risultato analisi CrewAI"""
|
168 |
+
with st.expander(
|
169 |
+
f"🤖 Analisi {index}: {analysis['analysis_type'].upper()} - {analysis['timestamp']}"
|
170 |
+
):
|
171 |
+
# Info header
|
172 |
+
col1, col2, col3 = st.columns(3)
|
173 |
+
|
174 |
+
with col1:
|
175 |
+
st.metric("Tipo Analisi", analysis['analysis_type'].capitalize())
|
176 |
+
|
177 |
+
with col2:
|
178 |
+
st.metric("Timestamp", analysis['timestamp'])
|
179 |
+
|
180 |
+
with col3:
|
181 |
+
agents_used = analysis.get('agents_used', 'auto')
|
182 |
+
if agents_used == 'auto':
|
183 |
+
agent_count = "Automatico"
|
184 |
+
elif isinstance(agents_used, list):
|
185 |
+
agent_count = f"{len(agents_used)} agenti"
|
186 |
+
else:
|
187 |
+
agent_count = str(agents_used)
|
188 |
+
st.metric("Agenti", agent_count)
|
189 |
+
|
190 |
+
# Query e risultato
|
191 |
+
st.subheader("❓ Query Originale")
|
192 |
+
st.info(analysis['query'])
|
193 |
+
|
194 |
+
st.subheader("🎯 Risultato Analisi")
|
195 |
+
st.markdown(analysis['result'])
|
196 |
+
|
197 |
+
def display_progress_metrics():
|
198 |
+
"""Mostra metriche di progresso"""
|
199 |
+
if 'anonymized_docs' in st.session_state:
|
200 |
+
confirmed_count = sum(1 for doc in st.session_state.anonymized_docs.values()
|
201 |
+
if doc.get('confirmed', False))
|
202 |
+
total_count = len(st.session_state.anonymized_docs)
|
203 |
+
|
204 |
+
if total_count > 0:
|
205 |
+
st.metric(
|
206 |
+
"Progresso Conferme",
|
207 |
+
f"{confirmed_count}/{total_count}",
|
208 |
+
delta=f"{(confirmed_count/total_count)*100:.1f}%"
|
209 |
+
)
|
210 |
+
|
211 |
+
def display_examples_section():
|
212 |
+
"""Mostra esempi di query CrewAI"""
|
213 |
+
with st.expander("💡 Esempi di Query per CrewAI"):
|
214 |
+
st.markdown("""
|
215 |
+
**Analisi Comprensiva:**
|
216 |
+
- "Fornisci un'analisi completa dei documenti identificando rischi, opportunità e raccomandazioni strategiche"
|
217 |
+
- "Analizza la comunicazione aziendale e suggerisci miglioramenti nella gestione clienti"
|
218 |
+
|
219 |
+
**Analisi Documentale:**
|
220 |
+
- "Classifica i documenti per tipologia e identifica pattern ricorrenti"
|
221 |
+
- "Analizza la struttura e organizzazione delle informazioni nei documenti"
|
222 |
+
|
223 |
+
**Sentiment Analysis:**
|
224 |
+
- "Valuta il sentiment generale nelle comunicazioni e identifica aree di miglioramento"
|
225 |
+
- "Analizza le emozioni e i trend nei feedback dei clienti"
|
226 |
+
|
227 |
+
**Query RAG Avanzata:**
|
228 |
+
- "Trova tutte le menzioni di problemi operativi e le relative soluzioni proposte"
|
229 |
+
- "Estrai informazioni su scadenze, deadline e milestone importanti"
|
230 |
+
|
231 |
+
**Personalizzata:**
|
232 |
+
- Combina agenti specifici per analisi mirate alle tue esigenze
|
233 |
+
""")
|
234 |
+
|
235 |
+
def create_download_button(data: str, filename: str, label: str, key: str):
|
236 |
+
"""Crea bottone download con dati"""
|
237 |
+
st.download_button(
|
238 |
+
label=label,
|
239 |
+
data=data,
|
240 |
+
file_name=filename,
|
241 |
+
mime="application/json",
|
242 |
+
key=key
|
243 |
+
)
|
src/utils.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Funzioni utility e gestione stato sessione.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import streamlit as st
|
6 |
+
import json
|
7 |
+
import pandas as pd
|
8 |
+
from datetime import datetime
|
9 |
+
from anonymizer import NERAnonimizer
|
10 |
+
from ai_processor import AzureProcessor, RAGChatbot, CrewAIManager
|
11 |
+
|
12 |
+
def init_session_state():
|
13 |
+
"""Inizializza stato sessione"""
|
14 |
+
if 'anonymizer' not in st.session_state:
|
15 |
+
st.session_state.anonymizer = NERAnonimizer()
|
16 |
+
|
17 |
+
if 'processor' not in st.session_state:
|
18 |
+
st.session_state.processor = AzureProcessor()
|
19 |
+
|
20 |
+
if 'rag_chatbot' not in st.session_state:
|
21 |
+
st.session_state.rag_chatbot = RAGChatbot()
|
22 |
+
|
23 |
+
if 'crewai_manager' not in st.session_state:
|
24 |
+
st.session_state.crewai_manager = CrewAIManager(st.session_state.rag_chatbot)
|
25 |
+
|
26 |
+
if 'uploaded_files' not in st.session_state:
|
27 |
+
st.session_state.uploaded_files = {}
|
28 |
+
|
29 |
+
if 'anonymized_docs' not in st.session_state:
|
30 |
+
st.session_state.anonymized_docs = {}
|
31 |
+
|
32 |
+
if 'processed_docs' not in st.session_state:
|
33 |
+
st.session_state.processed_docs = {}
|
34 |
+
|
35 |
+
if 'chat_history' not in st.session_state:
|
36 |
+
st.session_state.chat_history = []
|
37 |
+
|
38 |
+
if 'crewai_history' not in st.session_state:
|
39 |
+
st.session_state.crewai_history = []
|
40 |
+
|
41 |
+
if 'vector_store_built' not in st.session_state:
|
42 |
+
st.session_state.vector_store_built = False
|
43 |
+
|
44 |
+
def validate_file_upload(uploaded_file) -> bool:
|
45 |
+
"""Valida file caricato"""
|
46 |
+
if not uploaded_file:
|
47 |
+
return False
|
48 |
+
|
49 |
+
# Controlla estensione
|
50 |
+
if not uploaded_file.name.endswith('.txt'):
|
51 |
+
st.error("Solo file .txt sono supportati")
|
52 |
+
return False
|
53 |
+
|
54 |
+
# Controlla dimensione (max 10MB)
|
55 |
+
if uploaded_file.size > 10 * 1024 * 1024:
|
56 |
+
st.error("File troppo grande (max 10MB)")
|
57 |
+
return False
|
58 |
+
|
59 |
+
return True
|
60 |
+
|
61 |
+
def process_uploaded_files(uploaded_files):
|
62 |
+
"""Processa file caricati"""
|
63 |
+
new_files_uploaded = False
|
64 |
+
|
65 |
+
for file in uploaded_files:
|
66 |
+
if validate_file_upload(file) and file.name not in st.session_state.uploaded_files:
|
67 |
+
try:
|
68 |
+
content = file.read().decode('utf-8')
|
69 |
+
st.session_state.uploaded_files[file.name] = {
|
70 |
+
'content': content,
|
71 |
+
'size': len(content)
|
72 |
+
}
|
73 |
+
new_files_uploaded = True
|
74 |
+
except Exception as e:
|
75 |
+
st.error(f"Errore lettura file {file.name}: {e}")
|
76 |
+
|
77 |
+
if new_files_uploaded:
|
78 |
+
# Reset stato quando si caricano nuovi file
|
79 |
+
st.session_state.anonymized_docs = {}
|
80 |
+
st.session_state.processed_docs = {}
|
81 |
+
st.session_state.vector_store_built = False
|
82 |
+
st.session_state.chat_history = []
|
83 |
+
st.session_state.crewai_history = []
|
84 |
+
return True
|
85 |
+
|
86 |
+
return False
|
87 |
+
|
88 |
+
def run_anonymization():
|
89 |
+
"""Esegue anonimizzazione su tutti i file"""
|
90 |
+
if not st.session_state.uploaded_files:
|
91 |
+
st.warning("Nessun file caricato")
|
92 |
+
return
|
93 |
+
|
94 |
+
progress_bar = st.progress(0)
|
95 |
+
total_files = len(st.session_state.uploaded_files)
|
96 |
+
|
97 |
+
for i, (filename, file_data) in enumerate(st.session_state.uploaded_files.items()):
|
98 |
+
progress_bar.progress((i + 1) / total_files, f"Processando {filename}...")
|
99 |
+
|
100 |
+
# Anonimizza
|
101 |
+
anonymized_text, entities = st.session_state.anonymizer.anonymize(file_data['content'])
|
102 |
+
|
103 |
+
st.session_state.anonymized_docs[filename] = {
|
104 |
+
'original': file_data['content'],
|
105 |
+
'anonymized': anonymized_text,
|
106 |
+
'entities': entities,
|
107 |
+
'confirmed': False
|
108 |
+
}
|
109 |
+
|
110 |
+
progress_bar.empty()
|
111 |
+
st.success("✅ Anonimizzazione completata!")
|
112 |
+
st.session_state.vector_store_built = False
|
113 |
+
|
114 |
+
def run_ai_analysis():
|
115 |
+
"""Esegue analisi AI sui documenti confermati"""
|
116 |
+
confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
|
117 |
+
if v.get('confirmed', False)}
|
118 |
+
|
119 |
+
if not confirmed_docs:
|
120 |
+
st.warning("Nessun documento confermato")
|
121 |
+
return
|
122 |
+
|
123 |
+
progress_bar = st.progress(0)
|
124 |
+
|
125 |
+
for i, (filename, doc_data) in enumerate(confirmed_docs.items()):
|
126 |
+
progress_bar.progress((i + 1) / len(confirmed_docs), f"Analizzando {filename}...")
|
127 |
+
|
128 |
+
# Analisi Azure
|
129 |
+
analysis = st.session_state.processor.process_document(doc_data['anonymized'])
|
130 |
+
|
131 |
+
st.session_state.processed_docs[filename] = {
|
132 |
+
'anonymized_text': doc_data['anonymized'],
|
133 |
+
'entities_count': len(doc_data['entities']),
|
134 |
+
'analysis': analysis,
|
135 |
+
'entities': doc_data['entities']
|
136 |
+
}
|
137 |
+
|
138 |
+
progress_bar.empty()
|
139 |
+
st.success("✅ Analisi completata!")
|
140 |
+
|
141 |
+
def build_rag_knowledge_base():
|
142 |
+
"""Costruisce knowledge base RAG"""
|
143 |
+
confirmed_docs = {k: v for k, v in st.session_state.anonymized_docs.items()
|
144 |
+
if v.get('confirmed', False)}
|
145 |
+
|
146 |
+
if not confirmed_docs:
|
147 |
+
st.warning("Nessun documento confermato per RAG")
|
148 |
+
return False
|
149 |
+
|
150 |
+
if not st.session_state.vector_store_built:
|
151 |
+
with st.spinner("Costruendo knowledge base..."):
|
152 |
+
st.session_state.rag_chatbot.build_vector_store(confirmed_docs)
|
153 |
+
st.session_state.vector_store_built = True
|
154 |
+
return True
|
155 |
+
|
156 |
+
return True
|
157 |
+
|
158 |
+
def export_results_json(results: dict, filename_prefix: str) -> str:
|
159 |
+
"""Esporta risultati in JSON"""
|
160 |
+
export_data = {
|
161 |
+
**results,
|
162 |
+
'metadata': {
|
163 |
+
'exported_at': datetime.now().isoformat(),
|
164 |
+
'total_items': len(results) if isinstance(results, dict) else 1
|
165 |
+
}
|
166 |
+
}
|
167 |
+
|
168 |
+
return json.dumps(export_data, indent=2, ensure_ascii=False, default=str)
|
169 |
+
|
170 |
+
def get_confirmed_docs_count() -> int:
|
171 |
+
"""Ritorna numero documenti confermati"""
|
172 |
+
if 'anonymized_docs' not in st.session_state:
|
173 |
+
return 0
|
174 |
+
|
175 |
+
return sum(1 for doc in st.session_state.anonymized_docs.values()
|
176 |
+
if doc.get('confirmed', False))
|
177 |
+
|
178 |
+
def reset_document_state(filename: str):
|
179 |
+
"""Reset stato documento specifico"""
|
180 |
+
if filename in st.session_state.uploaded_files:
|
181 |
+
original_data = st.session_state.uploaded_files[filename]
|
182 |
+
anonymized_text, entities = st.session_state.anonymizer.anonymize(original_data['content'])
|
183 |
+
|
184 |
+
st.session_state.anonymized_docs[filename] = {
|
185 |
+
'original': original_data['content'],
|
186 |
+
'anonymized': anonymized_text,
|
187 |
+
'entities': entities,
|
188 |
+
'confirmed': False
|
189 |
+
}
|
190 |
+
st.session_state.vector_store_built = False
|
191 |
+
|
192 |
+
def add_chat_message(role: str, content: str):
|
193 |
+
"""Aggiunge messaggio alla chat history"""
|
194 |
+
st.session_state.chat_history.append({
|
195 |
+
"role": role,
|
196 |
+
"content": content
|
197 |
+
})
|
198 |
+
|
199 |
+
def add_crewai_result(query: str, analysis_type: str, result: str, agents_used=None):
|
200 |
+
"""Aggiunge risultato CrewAI alla history"""
|
201 |
+
analysis_result = {
|
202 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
203 |
+
"query": query,
|
204 |
+
"analysis_type": analysis_type,
|
205 |
+
"result": result,
|
206 |
+
"agents_used": agents_used if agents_used else "auto"
|
207 |
+
}
|
208 |
+
|
209 |
+
st.session_state.crewai_history.append(analysis_result)
|
210 |
+
|
211 |
+
def clear_chat_history():
|
212 |
+
"""Pulisce cronologia chat"""
|
213 |
+
st.session_state.chat_history = []
|
214 |
+
|
215 |
+
def clear_crewai_history():
|
216 |
+
"""Pulisce cronologia CrewAI"""
|
217 |
+
st.session_state.crewai_history = []
|
218 |
+
|
219 |
+
def get_system_stats() -> dict:
|
220 |
+
"""Ritorna statistiche sistema"""
|
221 |
+
return {
|
222 |
+
'uploaded_files': len(st.session_state.get('uploaded_files', {})),
|
223 |
+
'anonymized_docs': len(st.session_state.get('anonymized_docs', {})),
|
224 |
+
'confirmed_docs': get_confirmed_docs_count(),
|
225 |
+
'processed_docs': len(st.session_state.get('processed_docs', {})),
|
226 |
+
'chat_messages': len(st.session_state.get('chat_history', [])),
|
227 |
+
'crewai_analyses': len(st.session_state.get('crewai_history', [])),
|
228 |
+
'vector_store_ready': st.session_state.get('vector_store_built', False)
|
229 |
+
}
|