import streamlit as st import pandas as pd import time import matplotlib.pyplot as plt from openpyxl.utils.dataframe import dataframe_to_rows import io from rapidfuzz import fuzz import os from openpyxl import load_workbook from langchain.prompts import PromptTemplate from langchain_core.runnables import RunnablePassthrough from transformers import pipeline from io import StringIO, BytesIO import sys import contextlib from langchain_openai import ChatOpenAI # Updated import import pdfkit from jinja2 import Template def translate_reasoning_to_russian(llm, text): template = """ Translate this English explanation to Russian, maintaining a formal business style: "{text}" Your response should contain only the Russian translation. """ prompt = PromptTemplate(template=template, input_variables=["text"]) chain = prompt | llm | RunnablePassthrough() response = chain.invoke({"text": text}) # Handle different response types if hasattr(response, 'content'): return response.content.strip() elif isinstance(response, str): return response.strip() else: return str(response).strip() def create_download_section(excel_data, pdf_data): st.markdown("""
πŸ“₯ Π Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚Ρ‹ Π°Π½Π°Π»ΠΈΠ·Π° доступны для скачивания:
""", unsafe_allow_html=True) col1, col2 = st.columns(2) with col1: if excel_data is not None: st.download_button( label="πŸ“Š Π‘ΠΊΠ°Ρ‡Π°Ρ‚ΡŒ Excel ΠΎΡ‚Ρ‡Π΅Ρ‚", data=excel_data, file_name="Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚_Π°Π½Π°Π»ΠΈΠ·Π°.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", key="excel_download" ) else: st.error("Ошибка ΠΏΡ€ΠΈ создании Excel Ρ„Π°ΠΉΠ»Π°") def display_sentiment_results(row, sentiment, impact=None, reasoning=None): if sentiment == "Negative": st.markdown(f"""
ΠžΠ±ΡŠΠ΅ΠΊΡ‚: {row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']}
ΠΠΎΠ²ΠΎΡΡ‚ΡŒ: {row['Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']}
Π’ΠΎΠ½Π°Π»ΡŒΠ½ΠΎΡΡ‚ΡŒ: {sentiment}
{"Π­Ρ„Ρ„Π΅ΠΊΡ‚: " + impact + "
" if impact else ""} {"ОбоснованиС: " + reasoning + "
" if reasoning else ""}
""", unsafe_allow_html=True) elif sentiment == "Positive": st.markdown(f"""
ΠžΠ±ΡŠΠ΅ΠΊΡ‚: {row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']}
ΠΠΎΠ²ΠΎΡΡ‚ΡŒ: {row['Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']}
Π’ΠΎΠ½Π°Π»ΡŒΠ½ΠΎΡΡ‚ΡŒ: {sentiment}
""", unsafe_allow_html=True) else: st.write(f"ΠžΠ±ΡŠΠ΅ΠΊΡ‚: {row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']}") st.write(f"ΠΠΎΠ²ΠΎΡΡ‚ΡŒ: {row['Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']}") st.write(f"Π’ΠΎΠ½Π°Π»ΡŒΠ½ΠΎΡΡ‚ΡŒ: {sentiment}") st.write("---") # Initialize sentiment analyzers finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert") roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment") finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone") def get_mapped_sentiment(result): label = result['label'].lower() if label in ["positive", "label_2", "pos", "pos_label"]: return "Positive" elif label in ["negative", "label_0", "neg", "neg_label"]: return "Negative" return "Neutral" def analyze_sentiment(text): finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0]) roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0]) finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0]) # Consider sentiment negative if any model says it's negative if any(result == "Negative" for result in [finbert_result, roberta_result, finbert_tone_result]): return "Negative" elif all(result == "Positive" for result in [finbert_result, roberta_result, finbert_tone_result]): return "Positive" return "Neutral" def analyze_sentiment(text): finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0]) roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0]) finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0]) # Count occurrences of each sentiment sentiments = [finbert_result, roberta_result, finbert_tone_result] sentiment_counts = {s: sentiments.count(s) for s in set(sentiments)} # Return sentiment if at least two models agree, otherwise return Neutral for sentiment, count in sentiment_counts.items(): if count >= 2: return sentiment return "Neutral" def detect_events(llm, text, entity): template = """ ΠŸΡ€ΠΎΠ°Π½Π°Π»ΠΈΠ·ΠΈΡ€ΡƒΠΉΡ‚Π΅ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΡƒΡŽ Π½ΠΎΠ²ΠΎΡΡ‚ΡŒ ΠΎ ΠΊΠΎΠΌΠΏΠ°Π½ΠΈΠΈ "{entity}" ΠΈ ΠΎΠΏΡ€Π΅Π΄Π΅Π»ΠΈΡ‚Π΅ Π½Π°Π»ΠΈΡ‡ΠΈΠ΅ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΡ… событий: 1. ΠŸΡƒΠ±Π»ΠΈΠΊΠ°Ρ†ΠΈΡ отчСтности ΠΈ ΠΊΠ»ΡŽΡ‡Π΅Π²Ρ‹Π΅ ΠΏΠΎΠΊΠ°Π·Π°Ρ‚Π΅Π»ΠΈ (Π²Ρ‹Ρ€ΡƒΡ‡ΠΊΠ°, ΠΏΡ€ΠΈΠ±Ρ‹Π»ΡŒ, EBITDA) 2. Бобытия Π½Π° Ρ€Ρ‹Π½ΠΊΠ΅ Ρ†Π΅Π½Π½Ρ‹Ρ… Π±ΡƒΠΌΠ°Π³ (погашСниС ΠΎΠ±Π»ΠΈΠ³Π°Ρ†ΠΈΠΉ, Π²Ρ‹ΠΏΠ»Π°Ρ‚Π°/Π½Π΅Π²Ρ‹ΠΏΠ»Π°Ρ‚Π° ΠΊΡƒΠΏΠΎΠ½Π°, Π΄Π΅Ρ„ΠΎΠ»Ρ‚, рСструктуризация) 3. Π‘ΡƒΠ΄Π΅Π±Π½Ρ‹Π΅ иски ΠΈΠ»ΠΈ ΡŽΡ€ΠΈΠ΄ΠΈΡ‡Π΅ΡΠΊΠΈΠ΅ дСйствия ΠΏΡ€ΠΎΡ‚ΠΈΠ² ΠΊΠΎΠΌΠΏΠ°Π½ΠΈΠΈ, Π°ΠΊΡ†ΠΈΠΎΠ½Π΅Ρ€ΠΎΠ², ΠΌΠ΅Π½Π΅Π΄ΠΆΠ΅Ρ€ΠΎΠ² ΠΠΎΠ²ΠΎΡΡ‚ΡŒ: {text} ΠžΡ‚Π²Π΅Ρ‚ΡŒΡ‚Π΅ Π² ΡΠ»Π΅Π΄ΡƒΡŽΡ‰Π΅ΠΌ Ρ„ΠΎΡ€ΠΌΠ°Ρ‚Π΅: Π’ΠΈΠΏ: ["ΠžΡ‚Ρ‡Π΅Ρ‚Π½ΠΎΡΡ‚ΡŒ" ΠΈΠ»ΠΈ "Π Π¦Π‘" ΠΈΠ»ΠΈ "Π‘ΡƒΠ΄" ΠΈΠ»ΠΈ "НСт"] ΠšΡ€Π°Ρ‚ΠΊΠΎΠ΅ описаниС: [ΠΊΡ€Π°Ρ‚ΠΊΠΎΠ΅ описаниС события Π½Π° русском языкС, Π½Π΅ Π±ΠΎΠ»Π΅Π΅ 2 ΠΏΡ€Π΅Π΄Π»ΠΎΠΆΠ΅Π½ΠΈΠΉ] """ prompt = PromptTemplate(template=template, input_variables=["entity", "text"]) chain = prompt | llm response = chain.invoke({"entity": entity, "text": text}) event_type = "НСт" summary = "" try: response_text = response.content if hasattr(response, 'content') else str(response) if "Π’ΠΈΠΏ:" in response_text and "ΠšΡ€Π°Ρ‚ΠΊΠΎΠ΅ описаниС:" in response_text: type_part, summary_part = response_text.split("ΠšΡ€Π°Ρ‚ΠΊΠΎΠ΅ описаниС:") event_type = type_part.split("Π’ΠΈΠΏ:")[1].strip() summary = summary_part.strip() except Exception as e: st.warning(f"Ошибка ΠΏΡ€ΠΈ Π°Π½Π°Π»ΠΈΠ·Π΅ событий: {str(e)}") return event_type, summary def fuzzy_deduplicate(df, column, threshold=50): seen_texts = [] indices_to_keep = [] for i, text in enumerate(df[column]): if pd.isna(text): indices_to_keep.append(i) continue text = str(text) if not seen_texts or all(fuzz.ratio(text, seen) < threshold for seen in seen_texts): seen_texts.append(text) indices_to_keep.append(i) return df.iloc[indices_to_keep] def translate_text(llm, text): try: # All models now use OpenAI-compatible API format messages = [ {"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."}, {"role": "user", "content": f"Translate this Russian text to English: {text}"} ] response = llm.invoke(messages) if hasattr(response, 'content'): return response.content.strip() elif isinstance(response, str): return response.strip() else: return str(response).strip() except Exception as e: st.error(f"Translation error: {str(e)}") return text def init_langchain_llm(model_choice): try: if model_choice == "Groq (llama-3.1-70b)": if 'groq_key' not in st.secrets: st.error("Groq API key not found in secrets. Please add it with the key 'groq_key'.") st.stop() return ChatOpenAI( base_url="https://api.groq.com/openai/v1", model="llama-3.1-70b-versatile", openai_api_key=st.secrets['groq_key'], temperature=0.0 ) elif model_choice == "ChatGPT-4-mini": if 'openai_key' not in st.secrets: st.error("OpenAI API key not found in secrets. Please add it with the key 'openai_key'.") st.stop() return ChatOpenAI( model="gpt-4", openai_api_key=st.secrets['openai_key'], temperature=0.0 ) else: # Qwen API if 'ali_key' not in st.secrets: st.error("DashScope API key not found in secrets. Please add it with the key 'dashscope_api_key'.") st.stop() # Using Qwen's API through DashScope return ChatOpenAI( base_url="https://dashscope.aliyuncs.com/api/v1", model="qwen-max", openai_api_key=st.secrets['ali_key'], temperature=0.0 ) except Exception as e: st.error(f"Error initializing the LLM: {str(e)}") st.stop() def estimate_impact(llm, news_text, entity): template = """ Analyze the following news piece about the entity "{entity}" and estimate its monetary impact in Russian rubles for this entity in the next 6 months. If precise monetary estimate is not possible, categorize the impact as one of the following: 1. "Π—Π½Π°Ρ‡ΠΈΡ‚Π΅Π»ΡŒΠ½Ρ‹ΠΉ риск ΡƒΠ±Ρ‹Ρ‚ΠΊΠΎΠ²" 2. "Π£ΠΌΠ΅Ρ€Π΅Π½Π½Ρ‹ΠΉ риск ΡƒΠ±Ρ‹Ρ‚ΠΊΠΎΠ²" 3. "ΠΠ΅Π·Π½Π°Ρ‡ΠΈΡ‚Π΅Π»ΡŒΠ½Ρ‹ΠΉ риск ΡƒΠ±Ρ‹Ρ‚ΠΊΠΎΠ²" 4. "Π’Π΅Ρ€ΠΎΡΡ‚Π½ΠΎΡΡ‚ΡŒ ΠΏΡ€ΠΈΠ±Ρ‹Π»ΠΈ" 5. "НСопрСдСлСнный эффСкт" Provide brief reasoning (maximum 100 words). News: {news} Your response should be in the following format: Impact: [Your estimate or category] Reasoning: [Your reasoning] """ prompt = PromptTemplate(template=template, input_variables=["entity", "news"]) chain = prompt | llm response = chain.invoke({"entity": entity, "news": news_text}) impact = "НСопрСдСлСнный эффСкт" reasoning = "НС ΡƒΠ΄Π°Π»ΠΎΡΡŒ ΠΏΠΎΠ»ΡƒΡ‡ΠΈΡ‚ΡŒ обоснованиС" # Extract content from response response_text = response.content if hasattr(response, 'content') else str(response) try: if "Impact:" in response_text and "Reasoning:" in response_text: impact_part, reasoning_part = response_text.split("Reasoning:") impact = impact_part.split("Impact:")[1].strip() reasoning = reasoning_part.strip() except Exception as e: st.error(f"Error parsing LLM response: {str(e)}") return impact, reasoning def format_elapsed_time(seconds): hours, remainder = divmod(int(seconds), 3600) minutes, seconds = divmod(remainder, 60) time_parts = [] if hours > 0: time_parts.append(f"{hours} час{'ΠΎΠ²' if hours != 1 else ''}") if minutes > 0: time_parts.append(f"{minutes} ΠΌΠΈΠ½ΡƒΡ‚{'' if minutes == 1 else 'Ρ‹' if 2 <= minutes <= 4 else ''}") if seconds > 0 or not time_parts: time_parts.append(f"{seconds} сСкунд{'Π°' if seconds == 1 else 'Ρ‹' if 2 <= seconds <= 4 else ''}") return " ".join(time_parts) def generate_sentiment_visualization(df): negative_df = df[df['Sentiment'] == 'Negative'] if negative_df.empty: st.warning("НС ΠΎΠ±Π½Π°Ρ€ΡƒΠΆΠ΅Π½ΠΎ Π½Π΅Π³Π°Ρ‚ΠΈΠ²Π½Ρ‹Ρ… ΡƒΠΏΠΎΠΌΠΈΠ½Π°Π½ΠΈΠΉ. ΠžΡ‚ΠΎΠ±Ρ€Π°ΠΆΠ°Π΅ΠΌ ΠΎΠ±Ρ‰ΡƒΡŽ статистику ΠΏΠΎ ΠΎΠ±ΡŠΠ΅ΠΊΡ‚Π°ΠΌ.") entity_counts = df['ΠžΠ±ΡŠΠ΅ΠΊΡ‚'].value_counts() else: entity_counts = negative_df['ΠžΠ±ΡŠΠ΅ΠΊΡ‚'].value_counts() if len(entity_counts) == 0: st.warning("НСт Π΄Π°Π½Π½Ρ‹Ρ… для Π²ΠΈΠ·ΡƒΠ°Π»ΠΈΠ·Π°Ρ†ΠΈΠΈ.") return None fig, ax = plt.subplots(figsize=(12, max(6, len(entity_counts) * 0.5))) entity_counts.plot(kind='barh', ax=ax) ax.set_title('ΠšΠΎΠ»ΠΈΡ‡Π΅ΡΡ‚Π²ΠΎ Π½Π΅Π³Π°Ρ‚ΠΈΠ²Π½Ρ‹Ρ… ΡƒΠΏΠΎΠΌΠΈΠ½Π°Π½ΠΈΠΉ ΠΏΠΎ ΠΎΠ±ΡŠΠ΅ΠΊΡ‚Π°ΠΌ') ax.set_xlabel('ΠšΠΎΠ»ΠΈΡ‡Π΅ΡΡ‚Π²ΠΎ ΡƒΠΏΠΎΠΌΠΈΠ½Π°Π½ΠΈΠΉ') plt.tight_layout() return fig def process_file(uploaded_file, model_choice): df = None try: df = pd.read_excel(uploaded_file, sheet_name='ΠŸΡƒΠ±Π»ΠΈΠΊΠ°Ρ†ΠΈΠΈ') llm = init_langchain_llm(model_choice) # Validate required columns required_columns = ['ΠžΠ±ΡŠΠ΅ΠΊΡ‚', 'Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста'] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}") return df if df is not None else None # Deduplication original_news_count = len(df) df = df.groupby('ΠžΠ±ΡŠΠ΅ΠΊΡ‚', group_keys=False).apply( lambda x: fuzzy_deduplicate(x, 'Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста', 65) ).reset_index(drop=True) remaining_news_count = len(df) duplicates_removed = original_news_count - remaining_news_count st.write(f"Из {original_news_count} новостных сообщСний ΡƒΠ΄Π°Π»Π΅Π½Ρ‹ {duplicates_removed} Π΄ΡƒΠ±Π»ΠΈΡ€ΡƒΡŽΡ‰ΠΈΡ…. ΠžΡΡ‚Π°Π»ΠΎΡΡŒ {remaining_news_count}.") # Initialize progress tracking progress_bar = st.progress(0) status_text = st.empty() # Initialize new columns df['Translated'] = '' df['Sentiment'] = '' df['Impact'] = '' df['Reasoning'] = '' df['Event_Type'] = '' df['Event_Summary'] = '' # Process each news item for index, row in df.iterrows(): try: # Translate and analyze sentiment translated_text = translate_text(llm, row['Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста']) df.at[index, 'Translated'] = translated_text sentiment = analyze_sentiment(translated_text) df.at[index, 'Sentiment'] = sentiment # Detect events event_type, event_summary = detect_events(llm, row['Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста'], row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']) df.at[index, 'Event_Type'] = event_type df.at[index, 'Event_Summary'] = event_summary if sentiment == "Negative": impact, reasoning = estimate_impact(llm, translated_text, row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']) df.at[index, 'Impact'] = impact df.at[index, 'Reasoning'] = reasoning # Update progress progress = (index + 1) / len(df) progress_bar.progress(progress) status_text.text(f"ΠŸΡ€ΠΎΠ°Π½Π°Π»ΠΈΠ·ΠΈΡ€ΠΎΠ²Π°Π½ΠΎ {index + 1} ΠΈΠ· {len(df)} новостСй") except Exception as e: st.warning(f"Ошибка ΠΏΡ€ΠΈ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠ΅ новости {index + 1}: {str(e)}") continue return df except Exception as e: st.error(f"❌ Ошибка ΠΏΡ€ΠΈ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠ΅ Ρ„Π°ΠΉΠ»Π°: {str(e)}") return df if df is not None else None def create_analysis_data(df): analysis_data = [] for _, row in df.iterrows(): if row['Sentiment'] == 'Negative': analysis_data.append([ row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚'], row['Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ'], 'РИБК Π£Π‘Π«Π’ΠšΠ', row['Impact'], row['Reasoning'], row['Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста'] ]) return pd.DataFrame(analysis_data, columns=[ 'ΠžΠ±ΡŠΠ΅ΠΊΡ‚', 'Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'ΠŸΡ€ΠΈΠ·Π½Π°ΠΊ', 'ΠžΡ†Π΅Π½ΠΊΠ° влияния', 'ОбоснованиС', 'ВСкст сообщСния' ]) def create_output_file(df, uploaded_file, llm): wb = load_workbook("sample_file.xlsx") try: # Update 'ΠœΠΎΠ½ΠΈΡ‚ΠΎΡ€ΠΈΠ½Π³' sheet with events ws = wb['ΠœΠΎΠ½ΠΈΡ‚ΠΎΡ€ΠΈΠ½Π³'] row_idx = 4 for _, row in df.iterrows(): if row['Event_Type'] != 'НСт': ws.cell(row=row_idx, column=5, value=row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']) # Column E ws.cell(row=row_idx, column=6, value=row['Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']) # Column F ws.cell(row=row_idx, column=7, value=row['Event_Type']) # Column G ws.cell(row=row_idx, column=8, value=row['Event_Summary']) # Column H ws.cell(row=row_idx, column=9, value=row['Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста']) # Column I row_idx += 1 # Sort entities by number of negative publications entity_stats = pd.DataFrame({ 'ΠžΠ±ΡŠΠ΅ΠΊΡ‚': df['ΠžΠ±ΡŠΠ΅ΠΊΡ‚'].unique(), 'ВсСго': df.groupby('ΠžΠ±ΡŠΠ΅ΠΊΡ‚').size(), 'НСгативныС': df[df['Sentiment'] == 'Negative'].groupby('ΠžΠ±ΡŠΠ΅ΠΊΡ‚').size().fillna(0).astype(int), 'ΠŸΠΎΠ·ΠΈΡ‚ΠΈΠ²Π½Ρ‹Π΅': df[df['Sentiment'] == 'Positive'].groupby('ΠžΠ±ΡŠΠ΅ΠΊΡ‚').size().fillna(0).astype(int) }).sort_values('НСгативныС', ascending=False) # Calculate most negative impact for each entity entity_impacts = {} for entity in df['ΠžΠ±ΡŠΠ΅ΠΊΡ‚'].unique(): entity_df = df[df['ΠžΠ±ΡŠΠ΅ΠΊΡ‚'] == entity] negative_impacts = entity_df[entity_df['Sentiment'] == 'Negative']['Impact'] entity_impacts[entity] = negative_impacts.iloc[0] if len(negative_impacts) > 0 else 'НСопрСдСлСнный эффСкт' # Update 'Π‘Π²ΠΎΠ΄ΠΊΠ°' sheet ws = wb['Π‘Π²ΠΎΠ΄ΠΊΠ°'] for idx, (entity, row) in enumerate(entity_stats.iterrows(), start=4): ws.cell(row=idx, column=5, value=entity) # Column E ws.cell(row=idx, column=6, value=row['ВсСго']) # Column F ws.cell(row=idx, column=7, value=row['НСгативныС']) # Column G ws.cell(row=idx, column=8, value=row['ΠŸΠΎΠ·ΠΈΡ‚ΠΈΠ²Π½Ρ‹Π΅']) # Column H ws.cell(row=idx, column=9, value=entity_impacts[entity]) # Column I # Update 'Π—Π½Π°Ρ‡ΠΈΠΌΡ‹Π΅' sheet ws = wb['Π—Π½Π°Ρ‡ΠΈΠΌΡ‹Π΅'] row_idx = 3 for _, row in df.iterrows(): if row['Sentiment'] in ['Negative', 'Positive']: ws.cell(row=row_idx, column=3, value=row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']) # Column C ws.cell(row=row_idx, column=4, value='Ρ€Π΅Π»Π΅Π²Π°Π½Ρ‚Π½ΠΎ') # Column D ws.cell(row=row_idx, column=5, value=row['Sentiment']) # Column E ws.cell(row=row_idx, column=6, value=row['Impact']) # Column F ws.cell(row=row_idx, column=7, value=row['Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']) # Column G ws.cell(row=row_idx, column=8, value=row['Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста']) # Column H row_idx += 1 # Copy 'ΠŸΡƒΠ±Π»ΠΈΠΊΠ°Ρ†ΠΈΠΈ' sheet original_df = pd.read_excel(uploaded_file, sheet_name='ΠŸΡƒΠ±Π»ΠΈΠΊΠ°Ρ†ΠΈΠΈ') ws = wb['ΠŸΡƒΠ±Π»ΠΈΠΊΠ°Ρ†ΠΈΠΈ'] for r_idx, row in enumerate(dataframe_to_rows(original_df, index=False, header=True), start=1): for c_idx, value in enumerate(row, start=1): ws.cell(row=r_idx, column=c_idx, value=value) # Update 'Анализ' sheet ws = wb['Анализ'] row_idx = 4 for _, row in df[df['Sentiment'] == 'Negative'].iterrows(): ws.cell(row=row_idx, column=5, value=row['ΠžΠ±ΡŠΠ΅ΠΊΡ‚']) # Column E ws.cell(row=row_idx, column=6, value=row['Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ']) # Column F ws.cell(row=row_idx, column=7, value="Риск ΡƒΠ±Ρ‹Ρ‚ΠΊΠ°") # Column G # Translate reasoning if it exists if pd.notna(row['Reasoning']): translated_reasoning = translate_reasoning_to_russian(llm, row['Reasoning']) ws.cell(row=row_idx, column=8, value=translated_reasoning) # Column H ws.cell(row=row_idx, column=9, value=row['Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста']) # Column I row_idx += 1 # Update 'Π’Π΅Ρ….ΠΏΡ€ΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅' sheet tech_df = df[['ΠžΠ±ΡŠΠ΅ΠΊΡ‚', 'Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'Π’Ρ‹Π΄Π΅Ρ€ΠΆΠΊΠΈ ΠΈΠ· тСкста', 'Translated', 'Sentiment', 'Impact', 'Reasoning']] if 'Π’Π΅Ρ….ΠΏΡ€ΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅' not in wb.sheetnames: wb.create_sheet('Π’Π΅Ρ….ΠΏΡ€ΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅') ws = wb['Π’Π΅Ρ….ΠΏΡ€ΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅'] for r_idx, row in enumerate(dataframe_to_rows(tech_df, index=False, header=True), start=1): for c_idx, value in enumerate(row, start=1): ws.cell(row=r_idx, column=c_idx, value=value) except Exception as e: st.warning(f"Ошибка ΠΏΡ€ΠΈ создании Π²Ρ‹Ρ…ΠΎΠ΄Π½ΠΎΠ³ΠΎ Ρ„Π°ΠΉΠ»Π°: {str(e)}") output = io.BytesIO() wb.save(output) output.seek(0) return output def main(): with st.sidebar: st.title("::: AI-Π°Π½Π°Π»ΠΈΠ· ΠΌΠΎΠ½ΠΈΡ‚ΠΎΡ€ΠΈΠ½Π³Π° новостСй (v.3.30):::") st.subheader("ΠΏΠΎ ΠΌΠ°Ρ‚Π΅Ρ€ΠΈΠ°Π»Π°ΠΌ БКАН-Π˜ΠΠ’Π•Π Π€ΠΠšΠ‘ ") model_choice = st.radio( "Π’Ρ‹Π±Π΅Ρ€ΠΈΡ‚Π΅ модСль для Π°Π½Π°Π»ΠΈΠ·Π°:", ["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen-Max"], key="model_selector" ) st.markdown( """ Π˜ΡΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Π½Ρ‹ Ρ‚Π΅Ρ…Π½ΠΎΠ»ΠΎΠ³ΠΈΠΈ: - Анализ СстСствСнного языка с ΠΏΠΎΠΌΠΎΡ‰ΡŒΡŽ ΠΏΡ€Π΅Π΄Ρ‚Ρ€Π΅Π½ΠΈΡ€ΠΎΠ²Π°Π½Π½Ρ‹Ρ… нСйросСтСй **BERT**,
- Π”ΠΎΠΏΠΎΠ»Π½ΠΈΡ‚Π΅Π»ΡŒΠ½Π°Ρ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠ° ΠΏΡ€ΠΈ ΠΏΠΎΠΌΠΎΡ‰ΠΈ Π±ΠΎΠ»ΡŒΡˆΠΈΡ… языковых ΠΌΠΎΠ΄Π΅Π»Π΅ΠΉ (**LLM**),
- ΠΎΠ±ΡŠΠ΅Π΄ΠΈΠ½Π΅Π½Π½Ρ‹Π΅ ΠΏΡ€ΠΈ ΠΏΠΎΠΌΠΎΡ‰ΠΈ Ρ„Ρ€Π΅ΠΉΠΌΠ²ΠΎΡ€ΠΊΠ° **LangChain**.
""", unsafe_allow_html=True) # Model selection is now handled in init_langchain_llm() with st.expander("ℹ️ Π˜Π½ΡΡ‚Ρ€ΡƒΠΊΡ†ΠΈΡ"): st.markdown(""" 1. Π’Ρ‹Π±Π΅Ρ€ΠΈΡ‚Π΅ модСль для Π°Π½Π°Π»ΠΈΠ·Π° 2. Π—Π°Π³Ρ€ΡƒΠ·ΠΈΡ‚Π΅ Excel Ρ„Π°ΠΉΠ» с новостями
3. Π”ΠΎΠΆΠ΄ΠΈΡ‚Π΅ΡΡŒ Π·Π°Π²Π΅Ρ€ΡˆΠ΅Π½ΠΈΡ Π°Π½Π°Π»ΠΈΠ·Π°
4. Π‘ΠΊΠ°Ρ‡Π°ΠΉΡ‚Π΅ Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚Ρ‹ Π°Π½Π°Π»ΠΈΠ·Π° Π² Ρ„ΠΎΡ€ΠΌΠ°Ρ‚Π΅ Excel
""", unsafe_allow_html=True) st.markdown( """
denis.pokrovsky.npff
""", unsafe_allow_html=True ) st.title("Анализ ΠΌΠΎΠ½ΠΈΡ‚ΠΎΡ€ΠΈΠ½Π³Π° новостСй") if 'processed_df' not in st.session_state: st.session_state.processed_df = None # Single file uploader with unique key uploaded_file = st.sidebar.file_uploader("Π’Ρ‹Π±ΠΈΡ€Π°ΠΉΡ‚Π΅ Excel-Ρ„Π°ΠΉΠ»", type="xlsx", key="unique_file_uploader") if uploaded_file is not None and st.session_state.processed_df is None: start_time = time.time() # Initialize LLM with selected model llm = init_langchain_llm(model_choice) st.session_state.processed_df = process_file(uploaded_file, model_choice) st.subheader("ΠŸΡ€Π΅Π΄ΠΏΡ€ΠΎΡΠΌΠΎΡ‚Ρ€ Π΄Π°Π½Π½Ρ‹Ρ…") preview_df = st.session_state.processed_df[['ΠžΠ±ΡŠΠ΅ΠΊΡ‚', 'Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'Sentiment', 'Impact']].head() st.dataframe(preview_df) # Add preview of Monitoring results st.subheader("ΠŸΡ€Π΅Π΄ΠΏΡ€ΠΎΡΠΌΠΎΡ‚Ρ€ ΠΌΠΎΠ½ΠΈΡ‚ΠΎΡ€ΠΈΠ½Π³Π° событий ΠΈ риск-Ρ„Π°ΠΊΡ‚ΠΎΡ€ΠΎΠ² эмитСнтов") monitoring_df = st.session_state.processed_df[ (st.session_state.processed_df['Event_Type'] != 'НСт') & (st.session_state.processed_df['Event_Type'].notna()) ][['ΠžΠ±ΡŠΠ΅ΠΊΡ‚', 'Π—Π°Π³ΠΎΠ»ΠΎΠ²ΠΎΠΊ', 'Event_Type', 'Event_Summary']].head() if len(monitoring_df) > 0: st.dataframe(monitoring_df) else: st.info("НС ΠΎΠ±Π½Π°Ρ€ΡƒΠΆΠ΅Π½ΠΎ Π·Π½Π°Ρ‡ΠΈΠΌΡ‹Ρ… событий для ΠΌΠΎΠ½ΠΈΡ‚ΠΎΡ€ΠΈΠ½Π³Π°") analysis_df = create_analysis_data(st.session_state.processed_df) st.subheader("Анализ") st.dataframe(analysis_df) output = create_output_file(st.session_state.processed_df, uploaded_file, llm) end_time = time.time() elapsed_time = end_time - start_time formatted_time = format_elapsed_time(elapsed_time) st.success(f"ΠžΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠ° ΠΈ Π°Π½Π°Π»ΠΈΠ· Π·Π°Π²Π΅Ρ€ΡˆΠ΅Π½Ρ‹ Π·Π° {formatted_time}.") st.download_button( label="Π‘ΠΊΠ°Ρ‡Π°Ρ‚ΡŒ Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚ Π°Π½Π°Π»ΠΈΠ·Π°", data=output, file_name="Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚_Π°Π½Π°Π»ΠΈΠ·Π°.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) if __name__ == "__main__": main()