import streamlit as st
import pandas as pd
import time
import matplotlib.pyplot as plt
from openpyxl.utils.dataframe import dataframe_to_rows
import io
from rapidfuzz import fuzz
import os
from openpyxl import load_workbook
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from transformers import pipeline
from io import StringIO, BytesIO
import sys
import contextlib
from langchain_openai import ChatOpenAI # Updated import
import pdfkit
from jinja2 import Template
def translate_reasoning_to_russian(llm, text):
template = """
Translate this English explanation to Russian, maintaining a formal business style:
"{text}"
Your response should contain only the Russian translation.
"""
prompt = PromptTemplate(template=template, input_variables=["text"])
chain = prompt | llm | RunnablePassthrough()
response = chain.invoke({"text": text})
# Handle different response types
if hasattr(response, 'content'):
return response.content.strip()
elif isinstance(response, str):
return response.strip()
else:
return str(response).strip()
def create_download_section(excel_data, pdf_data):
st.markdown("""
""", unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
if excel_data is not None:
st.download_button(
label="π Π‘ΠΊΠ°ΡΠ°ΡΡ Excel ΠΎΡΡΠ΅Ρ",
data=excel_data,
file_name="ΡΠ΅Π·ΡΠ»ΡΡΠ°Ρ_Π°Π½Π°Π»ΠΈΠ·Π°.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
key="excel_download"
)
else:
st.error("ΠΡΠΈΠ±ΠΊΠ° ΠΏΡΠΈ ΡΠΎΠ·Π΄Π°Π½ΠΈΠΈ Excel ΡΠ°ΠΉΠ»Π°")
def display_sentiment_results(row, sentiment, impact=None, reasoning=None):
if sentiment == "Negative":
st.markdown(f"""
ΠΠ±ΡΠ΅ΠΊΡ: {row['ΠΠ±ΡΠ΅ΠΊΡ']}
ΠΠΎΠ²ΠΎΡΡΡ: {row['ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']}
Π’ΠΎΠ½Π°Π»ΡΠ½ΠΎΡΡΡ: {sentiment}
{"ΠΡΡΠ΅ΠΊΡ: " + impact + "
" if impact else ""}
{"ΠΠ±ΠΎΡΠ½ΠΎΠ²Π°Π½ΠΈΠ΅: " + reasoning + "
" if reasoning else ""}
""", unsafe_allow_html=True)
elif sentiment == "Positive":
st.markdown(f"""
ΠΠ±ΡΠ΅ΠΊΡ: {row['ΠΠ±ΡΠ΅ΠΊΡ']}
ΠΠΎΠ²ΠΎΡΡΡ: {row['ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']}
Π’ΠΎΠ½Π°Π»ΡΠ½ΠΎΡΡΡ: {sentiment}
""", unsafe_allow_html=True)
else:
st.write(f"ΠΠ±ΡΠ΅ΠΊΡ: {row['ΠΠ±ΡΠ΅ΠΊΡ']}")
st.write(f"ΠΠΎΠ²ΠΎΡΡΡ: {row['ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']}")
st.write(f"Π’ΠΎΠ½Π°Π»ΡΠ½ΠΎΡΡΡ: {sentiment}")
st.write("---")
# Initialize sentiment analyzers
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
def get_mapped_sentiment(result):
label = result['label'].lower()
if label in ["positive", "label_2", "pos", "pos_label"]:
return "Positive"
elif label in ["negative", "label_0", "neg", "neg_label"]:
return "Negative"
return "Neutral"
def analyze_sentiment(text):
finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0])
roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0])
finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0])
# Consider sentiment negative if any model says it's negative
if any(result == "Negative" for result in [finbert_result, roberta_result, finbert_tone_result]):
return "Negative"
elif all(result == "Positive" for result in [finbert_result, roberta_result, finbert_tone_result]):
return "Positive"
return "Neutral"
def analyze_sentiment(text):
finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0])
roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0])
finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0])
# Count occurrences of each sentiment
sentiments = [finbert_result, roberta_result, finbert_tone_result]
sentiment_counts = {s: sentiments.count(s) for s in set(sentiments)}
# Return sentiment if at least two models agree, otherwise return Neutral
for sentiment, count in sentiment_counts.items():
if count >= 2:
return sentiment
return "Neutral"
def detect_events(llm, text, entity):
template = """
ΠΡΠΎΠ°Π½Π°Π»ΠΈΠ·ΠΈΡΡΠΉΡΠ΅ ΡΠ»Π΅Π΄ΡΡΡΡΡ Π½ΠΎΠ²ΠΎΡΡΡ ΠΎ ΠΊΠΎΠΌΠΏΠ°Π½ΠΈΠΈ "{entity}" ΠΈ ΠΎΠΏΡΠ΅Π΄Π΅Π»ΠΈΡΠ΅ Π½Π°Π»ΠΈΡΠΈΠ΅ ΡΠ»Π΅Π΄ΡΡΡΠΈΡ
ΡΠΎΠ±ΡΡΠΈΠΉ:
1. ΠΡΠ±Π»ΠΈΠΊΠ°ΡΠΈΡ ΠΎΡΡΠ΅ΡΠ½ΠΎΡΡΠΈ ΠΈ ΠΊΠ»ΡΡΠ΅Π²ΡΠ΅ ΠΏΠΎΠΊΠ°Π·Π°ΡΠ΅Π»ΠΈ (Π²ΡΡΡΡΠΊΠ°, ΠΏΡΠΈΠ±ΡΠ»Ρ, EBITDA)
2. Π‘ΠΎΠ±ΡΡΠΈΡ Π½Π° ΡΡΠ½ΠΊΠ΅ ΡΠ΅Π½Π½ΡΡ
Π±ΡΠΌΠ°Π³ (ΠΏΠΎΠ³Π°ΡΠ΅Π½ΠΈΠ΅ ΠΎΠ±Π»ΠΈΠ³Π°ΡΠΈΠΉ, Π²ΡΠΏΠ»Π°ΡΠ°/Π½Π΅Π²ΡΠΏΠ»Π°ΡΠ° ΠΊΡΠΏΠΎΠ½Π°, Π΄Π΅ΡΠΎΠ»Ρ, ΡΠ΅ΡΡΡΡΠΊΡΡΡΠΈΠ·Π°ΡΠΈΡ)
3. Π‘ΡΠ΄Π΅Π±Π½ΡΠ΅ ΠΈΡΠΊΠΈ ΠΈΠ»ΠΈ ΡΡΠΈΠ΄ΠΈΡΠ΅ΡΠΊΠΈΠ΅ Π΄Π΅ΠΉΡΡΠ²ΠΈΡ ΠΏΡΠΎΡΠΈΠ² ΠΊΠΎΠΌΠΏΠ°Π½ΠΈΠΈ, Π°ΠΊΡΠΈΠΎΠ½Π΅ΡΠΎΠ², ΠΌΠ΅Π½Π΅Π΄ΠΆΠ΅ΡΠΎΠ²
ΠΠΎΠ²ΠΎΡΡΡ: {text}
ΠΡΠ²Π΅ΡΡΡΠ΅ Π² ΡΠ»Π΅Π΄ΡΡΡΠ΅ΠΌ ΡΠΎΡΠΌΠ°ΡΠ΅:
Π’ΠΈΠΏ: ["ΠΡΡΠ΅ΡΠ½ΠΎΡΡΡ" ΠΈΠ»ΠΈ "Π Π¦Π" ΠΈΠ»ΠΈ "Π‘ΡΠ΄" ΠΈΠ»ΠΈ "ΠΠ΅Ρ"]
ΠΡΠ°ΡΠΊΠΎΠ΅ ΠΎΠΏΠΈΡΠ°Π½ΠΈΠ΅: [ΠΊΡΠ°ΡΠΊΠΎΠ΅ ΠΎΠΏΠΈΡΠ°Π½ΠΈΠ΅ ΡΠΎΠ±ΡΡΠΈΡ Π½Π° ΡΡΡΡΠΊΠΎΠΌ ΡΠ·ΡΠΊΠ΅, Π½Π΅ Π±ΠΎΠ»Π΅Π΅ 2 ΠΏΡΠ΅Π΄Π»ΠΎΠΆΠ΅Π½ΠΈΠΉ]
"""
prompt = PromptTemplate(template=template, input_variables=["entity", "text"])
chain = prompt | llm
response = chain.invoke({"entity": entity, "text": text})
event_type = "ΠΠ΅Ρ"
summary = ""
try:
response_text = response.content if hasattr(response, 'content') else str(response)
if "Π’ΠΈΠΏ:" in response_text and "ΠΡΠ°ΡΠΊΠΎΠ΅ ΠΎΠΏΠΈΡΠ°Π½ΠΈΠ΅:" in response_text:
type_part, summary_part = response_text.split("ΠΡΠ°ΡΠΊΠΎΠ΅ ΠΎΠΏΠΈΡΠ°Π½ΠΈΠ΅:")
event_type = type_part.split("Π’ΠΈΠΏ:")[1].strip()
summary = summary_part.strip()
except Exception as e:
st.warning(f"ΠΡΠΈΠ±ΠΊΠ° ΠΏΡΠΈ Π°Π½Π°Π»ΠΈΠ·Π΅ ΡΠΎΠ±ΡΡΠΈΠΉ: {str(e)}")
return event_type, summary
def fuzzy_deduplicate(df, column, threshold=50):
seen_texts = []
indices_to_keep = []
for i, text in enumerate(df[column]):
if pd.isna(text):
indices_to_keep.append(i)
continue
text = str(text)
if not seen_texts or all(fuzz.ratio(text, seen) < threshold for seen in seen_texts):
seen_texts.append(text)
indices_to_keep.append(i)
return df.iloc[indices_to_keep]
def translate_text(llm, text):
try:
# All models now use OpenAI-compatible API format
messages = [
{"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."},
{"role": "user", "content": f"Translate this Russian text to English: {text}"}
]
response = llm.invoke(messages)
if hasattr(response, 'content'):
return response.content.strip()
elif isinstance(response, str):
return response.strip()
else:
return str(response).strip()
except Exception as e:
st.error(f"Translation error: {str(e)}")
return text
def init_langchain_llm(model_choice):
try:
if model_choice == "Groq (llama-3.1-70b)":
if 'groq_key' not in st.secrets:
st.error("Groq API key not found in secrets. Please add it with the key 'groq_key'.")
st.stop()
return ChatOpenAI(
base_url="https://api.groq.com/openai/v1",
model="llama-3.1-70b-versatile",
openai_api_key=st.secrets['groq_key'],
temperature=0.0
)
elif model_choice == "ChatGPT-4-mini":
if 'openai_key' not in st.secrets:
st.error("OpenAI API key not found in secrets. Please add it with the key 'openai_key'.")
st.stop()
return ChatOpenAI(
model="gpt-4",
openai_api_key=st.secrets['openai_key'],
temperature=0.0
)
else: # Qwen API
if 'ali_key' not in st.secrets:
st.error("DashScope API key not found in secrets. Please add it with the key 'dashscope_api_key'.")
st.stop()
# Using Qwen's API through DashScope
return ChatOpenAI(
base_url="https://dashscope.aliyuncs.com/api/v1",
model="qwen-max",
openai_api_key=st.secrets['ali_key'],
temperature=0.0
)
except Exception as e:
st.error(f"Error initializing the LLM: {str(e)}")
st.stop()
def estimate_impact(llm, news_text, entity):
template = """
Analyze the following news piece about the entity "{entity}" and estimate its monetary impact in Russian rubles for this entity in the next 6 months.
If precise monetary estimate is not possible, categorize the impact as one of the following:
1. "ΠΠ½Π°ΡΠΈΡΠ΅Π»ΡΠ½ΡΠΉ ΡΠΈΡΠΊ ΡΠ±ΡΡΠΊΠΎΠ²"
2. "Π£ΠΌΠ΅ΡΠ΅Π½Π½ΡΠΉ ΡΠΈΡΠΊ ΡΠ±ΡΡΠΊΠΎΠ²"
3. "ΠΠ΅Π·Π½Π°ΡΠΈΡΠ΅Π»ΡΠ½ΡΠΉ ΡΠΈΡΠΊ ΡΠ±ΡΡΠΊΠΎΠ²"
4. "ΠΠ΅ΡΠΎΡΡΠ½ΠΎΡΡΡ ΠΏΡΠΈΠ±ΡΠ»ΠΈ"
5. "ΠΠ΅ΠΎΠΏΡΠ΅Π΄Π΅Π»Π΅Π½Π½ΡΠΉ ΡΡΡΠ΅ΠΊΡ"
Provide brief reasoning (maximum 100 words).
News: {news}
Your response should be in the following format:
Impact: [Your estimate or category]
Reasoning: [Your reasoning]
"""
prompt = PromptTemplate(template=template, input_variables=["entity", "news"])
chain = prompt | llm
response = chain.invoke({"entity": entity, "news": news_text})
impact = "ΠΠ΅ΠΎΠΏΡΠ΅Π΄Π΅Π»Π΅Π½Π½ΡΠΉ ΡΡΡΠ΅ΠΊΡ"
reasoning = "ΠΠ΅ ΡΠ΄Π°Π»ΠΎΡΡ ΠΏΠΎΠ»ΡΡΠΈΡΡ ΠΎΠ±ΠΎΡΠ½ΠΎΠ²Π°Π½ΠΈΠ΅"
# Extract content from response
response_text = response.content if hasattr(response, 'content') else str(response)
try:
if "Impact:" in response_text and "Reasoning:" in response_text:
impact_part, reasoning_part = response_text.split("Reasoning:")
impact = impact_part.split("Impact:")[1].strip()
reasoning = reasoning_part.strip()
except Exception as e:
st.error(f"Error parsing LLM response: {str(e)}")
return impact, reasoning
def format_elapsed_time(seconds):
hours, remainder = divmod(int(seconds), 3600)
minutes, seconds = divmod(remainder, 60)
time_parts = []
if hours > 0:
time_parts.append(f"{hours} ΡΠ°Ρ{'ΠΎΠ²' if hours != 1 else ''}")
if minutes > 0:
time_parts.append(f"{minutes} ΠΌΠΈΠ½ΡΡ{'' if minutes == 1 else 'Ρ' if 2 <= minutes <= 4 else ''}")
if seconds > 0 or not time_parts:
time_parts.append(f"{seconds} ΡΠ΅ΠΊΡΠ½Π΄{'Π°' if seconds == 1 else 'Ρ' if 2 <= seconds <= 4 else ''}")
return " ".join(time_parts)
def generate_sentiment_visualization(df):
negative_df = df[df['Sentiment'] == 'Negative']
if negative_df.empty:
st.warning("ΠΠ΅ ΠΎΠ±Π½Π°ΡΡΠΆΠ΅Π½ΠΎ Π½Π΅Π³Π°ΡΠΈΠ²Π½ΡΡ
ΡΠΏΠΎΠΌΠΈΠ½Π°Π½ΠΈΠΉ. ΠΡΠΎΠ±ΡΠ°ΠΆΠ°Π΅ΠΌ ΠΎΠ±ΡΡΡ ΡΡΠ°ΡΠΈΡΡΠΈΠΊΡ ΠΏΠΎ ΠΎΠ±ΡΠ΅ΠΊΡΠ°ΠΌ.")
entity_counts = df['ΠΠ±ΡΠ΅ΠΊΡ'].value_counts()
else:
entity_counts = negative_df['ΠΠ±ΡΠ΅ΠΊΡ'].value_counts()
if len(entity_counts) == 0:
st.warning("ΠΠ΅Ρ Π΄Π°Π½Π½ΡΡ
Π΄Π»Ρ Π²ΠΈΠ·ΡΠ°Π»ΠΈΠ·Π°ΡΠΈΠΈ.")
return None
fig, ax = plt.subplots(figsize=(12, max(6, len(entity_counts) * 0.5)))
entity_counts.plot(kind='barh', ax=ax)
ax.set_title('ΠΠΎΠ»ΠΈΡΠ΅ΡΡΠ²ΠΎ Π½Π΅Π³Π°ΡΠΈΠ²Π½ΡΡ
ΡΠΏΠΎΠΌΠΈΠ½Π°Π½ΠΈΠΉ ΠΏΠΎ ΠΎΠ±ΡΠ΅ΠΊΡΠ°ΠΌ')
ax.set_xlabel('ΠΠΎΠ»ΠΈΡΠ΅ΡΡΠ²ΠΎ ΡΠΏΠΎΠΌΠΈΠ½Π°Π½ΠΈΠΉ')
plt.tight_layout()
return fig
def process_file(uploaded_file, model_choice):
df = None
try:
df = pd.read_excel(uploaded_file, sheet_name='ΠΡΠ±Π»ΠΈΠΊΠ°ΡΠΈΠΈ')
llm = init_langchain_llm(model_choice)
# Validate required columns
required_columns = ['ΠΠ±ΡΠ΅ΠΊΡ', 'ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}")
return df if df is not None else None
# Deduplication
original_news_count = len(df)
df = df.groupby('ΠΠ±ΡΠ΅ΠΊΡ', group_keys=False).apply(
lambda x: fuzzy_deduplicate(x, 'ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°', 65)
).reset_index(drop=True)
remaining_news_count = len(df)
duplicates_removed = original_news_count - remaining_news_count
st.write(f"ΠΠ· {original_news_count} Π½ΠΎΠ²ΠΎΡΡΠ½ΡΡ
ΡΠΎΠΎΠ±ΡΠ΅Π½ΠΈΠΉ ΡΠ΄Π°Π»Π΅Π½Ρ {duplicates_removed} Π΄ΡΠ±Π»ΠΈΡΡΡΡΠΈΡ
. ΠΡΡΠ°Π»ΠΎΡΡ {remaining_news_count}.")
# Initialize progress tracking
progress_bar = st.progress(0)
status_text = st.empty()
# Initialize new columns
df['Translated'] = ''
df['Sentiment'] = ''
df['Impact'] = ''
df['Reasoning'] = ''
df['Event_Type'] = ''
df['Event_Summary'] = ''
# Process each news item
for index, row in df.iterrows():
try:
# Translate and analyze sentiment
translated_text = translate_text(llm, row['ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°'])
df.at[index, 'Translated'] = translated_text
sentiment = analyze_sentiment(translated_text)
df.at[index, 'Sentiment'] = sentiment
# Detect events
event_type, event_summary = detect_events(llm, row['ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°'], row['ΠΠ±ΡΠ΅ΠΊΡ'])
df.at[index, 'Event_Type'] = event_type
df.at[index, 'Event_Summary'] = event_summary
if sentiment == "Negative":
impact, reasoning = estimate_impact(llm, translated_text, row['ΠΠ±ΡΠ΅ΠΊΡ'])
df.at[index, 'Impact'] = impact
df.at[index, 'Reasoning'] = reasoning
# Update progress
progress = (index + 1) / len(df)
progress_bar.progress(progress)
status_text.text(f"ΠΡΠΎΠ°Π½Π°Π»ΠΈΠ·ΠΈΡΠΎΠ²Π°Π½ΠΎ {index + 1} ΠΈΠ· {len(df)} Π½ΠΎΠ²ΠΎΡΡΠ΅ΠΉ")
except Exception as e:
st.warning(f"ΠΡΠΈΠ±ΠΊΠ° ΠΏΡΠΈ ΠΎΠ±ΡΠ°Π±ΠΎΡΠΊΠ΅ Π½ΠΎΠ²ΠΎΡΡΠΈ {index + 1}: {str(e)}")
continue
return df
except Exception as e:
st.error(f"β ΠΡΠΈΠ±ΠΊΠ° ΠΏΡΠΈ ΠΎΠ±ΡΠ°Π±ΠΎΡΠΊΠ΅ ΡΠ°ΠΉΠ»Π°: {str(e)}")
return df if df is not None else None
def create_analysis_data(df):
analysis_data = []
for _, row in df.iterrows():
if row['Sentiment'] == 'Negative':
analysis_data.append([
row['ΠΠ±ΡΠ΅ΠΊΡ'],
row['ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ'],
'Π ΠΠ‘Π Π£ΠΠ«Π’ΠΠ',
row['Impact'],
row['Reasoning'],
row['ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°']
])
return pd.DataFrame(analysis_data, columns=[
'ΠΠ±ΡΠ΅ΠΊΡ',
'ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ',
'ΠΡΠΈΠ·Π½Π°ΠΊ',
'ΠΡΠ΅Π½ΠΊΠ° Π²Π»ΠΈΡΠ½ΠΈΡ',
'ΠΠ±ΠΎΡΠ½ΠΎΠ²Π°Π½ΠΈΠ΅',
'Π’Π΅ΠΊΡΡ ΡΠΎΠΎΠ±ΡΠ΅Π½ΠΈΡ'
])
def create_output_file(df, uploaded_file, llm):
wb = load_workbook("sample_file.xlsx")
try:
# Update 'ΠΠΎΠ½ΠΈΡΠΎΡΠΈΠ½Π³' sheet with events
ws = wb['ΠΠΎΠ½ΠΈΡΠΎΡΠΈΠ½Π³']
row_idx = 4
for _, row in df.iterrows():
if row['Event_Type'] != 'ΠΠ΅Ρ':
ws.cell(row=row_idx, column=5, value=row['ΠΠ±ΡΠ΅ΠΊΡ']) # Column E
ws.cell(row=row_idx, column=6, value=row['ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']) # Column F
ws.cell(row=row_idx, column=7, value=row['Event_Type']) # Column G
ws.cell(row=row_idx, column=8, value=row['Event_Summary']) # Column H
ws.cell(row=row_idx, column=9, value=row['ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°']) # Column I
row_idx += 1
# Sort entities by number of negative publications
entity_stats = pd.DataFrame({
'ΠΠ±ΡΠ΅ΠΊΡ': df['ΠΠ±ΡΠ΅ΠΊΡ'].unique(),
'ΠΡΠ΅Π³ΠΎ': df.groupby('ΠΠ±ΡΠ΅ΠΊΡ').size(),
'ΠΠ΅Π³Π°ΡΠΈΠ²Π½ΡΠ΅': df[df['Sentiment'] == 'Negative'].groupby('ΠΠ±ΡΠ΅ΠΊΡ').size().fillna(0).astype(int),
'ΠΠΎΠ·ΠΈΡΠΈΠ²Π½ΡΠ΅': df[df['Sentiment'] == 'Positive'].groupby('ΠΠ±ΡΠ΅ΠΊΡ').size().fillna(0).astype(int)
}).sort_values('ΠΠ΅Π³Π°ΡΠΈΠ²Π½ΡΠ΅', ascending=False)
# Calculate most negative impact for each entity
entity_impacts = {}
for entity in df['ΠΠ±ΡΠ΅ΠΊΡ'].unique():
entity_df = df[df['ΠΠ±ΡΠ΅ΠΊΡ'] == entity]
negative_impacts = entity_df[entity_df['Sentiment'] == 'Negative']['Impact']
entity_impacts[entity] = negative_impacts.iloc[0] if len(negative_impacts) > 0 else 'ΠΠ΅ΠΎΠΏΡΠ΅Π΄Π΅Π»Π΅Π½Π½ΡΠΉ ΡΡΡΠ΅ΠΊΡ'
# Update 'Π‘Π²ΠΎΠ΄ΠΊΠ°' sheet
ws = wb['Π‘Π²ΠΎΠ΄ΠΊΠ°']
for idx, (entity, row) in enumerate(entity_stats.iterrows(), start=4):
ws.cell(row=idx, column=5, value=entity) # Column E
ws.cell(row=idx, column=6, value=row['ΠΡΠ΅Π³ΠΎ']) # Column F
ws.cell(row=idx, column=7, value=row['ΠΠ΅Π³Π°ΡΠΈΠ²Π½ΡΠ΅']) # Column G
ws.cell(row=idx, column=8, value=row['ΠΠΎΠ·ΠΈΡΠΈΠ²Π½ΡΠ΅']) # Column H
ws.cell(row=idx, column=9, value=entity_impacts[entity]) # Column I
# Update 'ΠΠ½Π°ΡΠΈΠΌΡΠ΅' sheet
ws = wb['ΠΠ½Π°ΡΠΈΠΌΡΠ΅']
row_idx = 3
for _, row in df.iterrows():
if row['Sentiment'] in ['Negative', 'Positive']:
ws.cell(row=row_idx, column=3, value=row['ΠΠ±ΡΠ΅ΠΊΡ']) # Column C
ws.cell(row=row_idx, column=4, value='ΡΠ΅Π»Π΅Π²Π°Π½ΡΠ½ΠΎ') # Column D
ws.cell(row=row_idx, column=5, value=row['Sentiment']) # Column E
ws.cell(row=row_idx, column=6, value=row['Impact']) # Column F
ws.cell(row=row_idx, column=7, value=row['ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']) # Column G
ws.cell(row=row_idx, column=8, value=row['ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°']) # Column H
row_idx += 1
# Copy 'ΠΡΠ±Π»ΠΈΠΊΠ°ΡΠΈΠΈ' sheet
original_df = pd.read_excel(uploaded_file, sheet_name='ΠΡΠ±Π»ΠΈΠΊΠ°ΡΠΈΠΈ')
ws = wb['ΠΡΠ±Π»ΠΈΠΊΠ°ΡΠΈΠΈ']
for r_idx, row in enumerate(dataframe_to_rows(original_df, index=False, header=True), start=1):
for c_idx, value in enumerate(row, start=1):
ws.cell(row=r_idx, column=c_idx, value=value)
# Update 'ΠΠ½Π°Π»ΠΈΠ·' sheet
ws = wb['ΠΠ½Π°Π»ΠΈΠ·']
row_idx = 4
for _, row in df[df['Sentiment'] == 'Negative'].iterrows():
ws.cell(row=row_idx, column=5, value=row['ΠΠ±ΡΠ΅ΠΊΡ']) # Column E
ws.cell(row=row_idx, column=6, value=row['ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']) # Column F
ws.cell(row=row_idx, column=7, value="Π ΠΈΡΠΊ ΡΠ±ΡΡΠΊΠ°") # Column G
# Translate reasoning if it exists
if pd.notna(row['Reasoning']):
translated_reasoning = translate_reasoning_to_russian(llm, row['Reasoning'])
ws.cell(row=row_idx, column=8, value=translated_reasoning) # Column H
ws.cell(row=row_idx, column=9, value=row['ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°']) # Column I
row_idx += 1
# Update 'Π’Π΅Ρ
.ΠΏΡΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅' sheet
tech_df = df[['ΠΠ±ΡΠ΅ΠΊΡ', 'ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'ΠΡΠ΄Π΅ΡΠΆΠΊΠΈ ΠΈΠ· ΡΠ΅ΠΊΡΡΠ°', 'Translated', 'Sentiment', 'Impact', 'Reasoning']]
if 'Π’Π΅Ρ
.ΠΏΡΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅' not in wb.sheetnames:
wb.create_sheet('Π’Π΅Ρ
.ΠΏΡΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅')
ws = wb['Π’Π΅Ρ
.ΠΏΡΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅']
for r_idx, row in enumerate(dataframe_to_rows(tech_df, index=False, header=True), start=1):
for c_idx, value in enumerate(row, start=1):
ws.cell(row=r_idx, column=c_idx, value=value)
except Exception as e:
st.warning(f"ΠΡΠΈΠ±ΠΊΠ° ΠΏΡΠΈ ΡΠΎΠ·Π΄Π°Π½ΠΈΠΈ Π²ΡΡ
ΠΎΠ΄Π½ΠΎΠ³ΠΎ ΡΠ°ΠΉΠ»Π°: {str(e)}")
output = io.BytesIO()
wb.save(output)
output.seek(0)
return output
def main():
with st.sidebar:
st.title("::: AI-Π°Π½Π°Π»ΠΈΠ· ΠΌΠΎΠ½ΠΈΡΠΎΡΠΈΠ½Π³Π° Π½ΠΎΠ²ΠΎΡΡΠ΅ΠΉ (v.3.30):::")
st.subheader("ΠΏΠΎ ΠΌΠ°ΡΠ΅ΡΠΈΠ°Π»Π°ΠΌ Π‘ΠΠΠ-ΠΠΠ’ΠΠ Π€ΠΠΠ‘ ")
model_choice = st.radio(
"ΠΡΠ±Π΅ΡΠΈΡΠ΅ ΠΌΠΎΠ΄Π΅Π»Ρ Π΄Π»Ρ Π°Π½Π°Π»ΠΈΠ·Π°:",
["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen-Max"],
key="model_selector"
)
st.markdown(
"""
ΠΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°Π½Ρ ΡΠ΅Ρ
Π½ΠΎΠ»ΠΎΠ³ΠΈΠΈ:
- ΠΠ½Π°Π»ΠΈΠ· Π΅ΡΡΠ΅ΡΡΠ²Π΅Π½Π½ΠΎΠ³ΠΎ ΡΠ·ΡΠΊΠ° Ρ ΠΏΠΎΠΌΠΎΡΡΡ ΠΏΡΠ΅Π΄ΡΡΠ΅Π½ΠΈΡΠΎΠ²Π°Π½Π½ΡΡ
Π½Π΅ΠΉΡΠΎΡΠ΅ΡΠ΅ΠΉ **BERT**,
- ΠΠΎΠΏΠΎΠ»Π½ΠΈΡΠ΅Π»ΡΠ½Π°Ρ ΠΎΠ±ΡΠ°Π±ΠΎΡΠΊΠ° ΠΏΡΠΈ ΠΏΠΎΠΌΠΎΡΠΈ Π±ΠΎΠ»ΡΡΠΈΡ
ΡΠ·ΡΠΊΠΎΠ²ΡΡ
ΠΌΠΎΠ΄Π΅Π»Π΅ΠΉ (**LLM**),
- ΠΎΠ±ΡΠ΅Π΄ΠΈΠ½Π΅Π½Π½ΡΠ΅ ΠΏΡΠΈ ΠΏΠΎΠΌΠΎΡΠΈ ΡΡΠ΅ΠΉΠΌΠ²ΠΎΡΠΊΠ° **LangChain**.
""",
unsafe_allow_html=True)
# Model selection is now handled in init_langchain_llm()
with st.expander("βΉοΈ ΠΠ½ΡΡΡΡΠΊΡΠΈΡ"):
st.markdown("""
1. ΠΡΠ±Π΅ΡΠΈΡΠ΅ ΠΌΠΎΠ΄Π΅Π»Ρ Π΄Π»Ρ Π°Π½Π°Π»ΠΈΠ·Π°
2. ΠΠ°Π³ΡΡΠ·ΠΈΡΠ΅ Excel ΡΠ°ΠΉΠ» Ρ Π½ΠΎΠ²ΠΎΡΡΡΠΌΠΈ
3. ΠΠΎΠΆΠ΄ΠΈΡΠ΅ΡΡ Π·Π°Π²Π΅ΡΡΠ΅Π½ΠΈΡ Π°Π½Π°Π»ΠΈΠ·Π°
4. Π‘ΠΊΠ°ΡΠ°ΠΉΡΠ΅ ΡΠ΅Π·ΡΠ»ΡΡΠ°ΡΡ Π°Π½Π°Π»ΠΈΠ·Π° Π² ΡΠΎΡΠΌΠ°ΡΠ΅ Excel
""", unsafe_allow_html=True)
st.markdown(
"""
denis.pokrovsky.npff
""",
unsafe_allow_html=True
)
st.title("ΠΠ½Π°Π»ΠΈΠ· ΠΌΠΎΠ½ΠΈΡΠΎΡΠΈΠ½Π³Π° Π½ΠΎΠ²ΠΎΡΡΠ΅ΠΉ")
if 'processed_df' not in st.session_state:
st.session_state.processed_df = None
# Single file uploader with unique key
uploaded_file = st.sidebar.file_uploader("ΠΡΠ±ΠΈΡΠ°ΠΉΡΠ΅ Excel-ΡΠ°ΠΉΠ»", type="xlsx", key="unique_file_uploader")
if uploaded_file is not None and st.session_state.processed_df is None:
start_time = time.time()
# Initialize LLM with selected model
llm = init_langchain_llm(model_choice)
st.session_state.processed_df = process_file(uploaded_file, model_choice)
st.subheader("ΠΡΠ΅Π΄ΠΏΡΠΎΡΠΌΠΎΡΡ Π΄Π°Π½Π½ΡΡ
")
preview_df = st.session_state.processed_df[['ΠΠ±ΡΠ΅ΠΊΡ', 'ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'Sentiment', 'Impact']].head()
st.dataframe(preview_df)
# Add preview of Monitoring results
st.subheader("ΠΡΠ΅Π΄ΠΏΡΠΎΡΠΌΠΎΡΡ ΠΌΠΎΠ½ΠΈΡΠΎΡΠΈΠ½Π³Π° ΡΠΎΠ±ΡΡΠΈΠΉ ΠΈ ΡΠΈΡΠΊ-ΡΠ°ΠΊΡΠΎΡΠΎΠ² ΡΠΌΠΈΡΠ΅Π½ΡΠΎΠ²")
monitoring_df = st.session_state.processed_df[
(st.session_state.processed_df['Event_Type'] != 'ΠΠ΅Ρ') &
(st.session_state.processed_df['Event_Type'].notna())
][['ΠΠ±ΡΠ΅ΠΊΡ', 'ΠΠ°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'Event_Type', 'Event_Summary']].head()
if len(monitoring_df) > 0:
st.dataframe(monitoring_df)
else:
st.info("ΠΠ΅ ΠΎΠ±Π½Π°ΡΡΠΆΠ΅Π½ΠΎ Π·Π½Π°ΡΠΈΠΌΡΡ
ΡΠΎΠ±ΡΡΠΈΠΉ Π΄Π»Ρ ΠΌΠΎΠ½ΠΈΡΠΎΡΠΈΠ½Π³Π°")
analysis_df = create_analysis_data(st.session_state.processed_df)
st.subheader("ΠΠ½Π°Π»ΠΈΠ·")
st.dataframe(analysis_df)
output = create_output_file(st.session_state.processed_df, uploaded_file, llm)
end_time = time.time()
elapsed_time = end_time - start_time
formatted_time = format_elapsed_time(elapsed_time)
st.success(f"ΠΠ±ΡΠ°Π±ΠΎΡΠΊΠ° ΠΈ Π°Π½Π°Π»ΠΈΠ· Π·Π°Π²Π΅ΡΡΠ΅Π½Ρ Π·Π° {formatted_time}.")
st.download_button(
label="Π‘ΠΊΠ°ΡΠ°ΡΡ ΡΠ΅Π·ΡΠ»ΡΡΠ°Ρ Π°Π½Π°Π»ΠΈΠ·Π°",
data=output,
file_name="ΡΠ΅Π·ΡΠ»ΡΡΠ°Ρ_Π°Π½Π°Π»ΠΈΠ·Π°.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
if __name__ == "__main__":
main()