Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import time | |
| import matplotlib.pyplot as plt | |
| from openpyxl.utils.dataframe import dataframe_to_rows | |
| import io | |
| from rapidfuzz import fuzz | |
| import os | |
| from openpyxl import load_workbook | |
| from langchain.prompts import PromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from transformers import pipeline | |
| from io import StringIO, BytesIO | |
| import sys | |
| import contextlib | |
| from langchain_openai import ChatOpenAI # Updated import | |
| import pdfkit | |
| from jinja2 import Template | |
| import time | |
| from tenacity import retry, stop_after_attempt, wait_exponential | |
| from typing import Optional | |
| from deep_translator import GoogleTranslator | |
| from googletrans import Translator as LegacyTranslator | |
| class TranslationSystem: | |
| def __init__(self, batch_size=5): | |
| """ | |
| Initialize translation system using Helsinki NLP model. | |
| """ | |
| try: | |
| self.translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ru-en") # Note: ru-en for Russian to English | |
| self.batch_size = batch_size | |
| except Exception as e: | |
| st.error(f"Error initializing Helsinki NLP translator: {str(e)}") | |
| raise | |
| def translate_text(self, text): | |
| """ | |
| Translate single text using Helsinki NLP model with chunking for long texts. | |
| """ | |
| if pd.isna(text) or not isinstance(text, str) or not text.strip(): | |
| return text | |
| text = str(text).strip() | |
| if not text: | |
| return text | |
| try: | |
| # Helsinki NLP model typically has a max length limit | |
| max_chunk_size = 512 # Standard transformer length | |
| if len(text.split()) <= max_chunk_size: | |
| # Direct translation for short texts | |
| result = self.translator(text, max_length=512) | |
| return result[0]['translation_text'] | |
| # Split long text into chunks by sentences | |
| chunks = self._split_into_chunks(text, max_chunk_size) | |
| translated_chunks = [] | |
| for chunk in chunks: | |
| result = self.translator(chunk, max_length=512) | |
| translated_chunks.append(result[0]['translation_text']) | |
| time.sleep(0.1) # Small delay between chunks | |
| return ' '.join(translated_chunks) | |
| except Exception as e: | |
| st.warning(f"Translation error: {str(e)}. Using original text.") | |
| return text | |
| def _split_into_chunks(self, text, max_length): | |
| """ | |
| Split text into chunks by sentences, respecting max length. | |
| """ | |
| # Simple sentence splitting by common punctuation | |
| sentences = [s.strip() for s in text.replace('!', '.').replace('?', '.').split('.') if s.strip()] | |
| chunks = [] | |
| current_chunk = [] | |
| current_length = 0 | |
| for sentence in sentences: | |
| sentence_length = len(sentence.split()) | |
| if current_length + sentence_length > max_length: | |
| if current_chunk: | |
| chunks.append(' '.join(current_chunk)) | |
| current_chunk = [sentence] | |
| current_length = sentence_length | |
| else: | |
| current_chunk.append(sentence) | |
| current_length += sentence_length | |
| if current_chunk: | |
| chunks.append(' '.join(current_chunk)) | |
| return chunks | |
| def process_file(uploaded_file, model_choice, translation_method=None): | |
| df = None | |
| try: | |
| df = pd.read_excel(uploaded_file, sheet_name='Публикации') | |
| llm = init_langchain_llm(model_choice) | |
| translator = TranslationSystem(batch_size=5) | |
| # Initialize all required columns first | |
| df['Translated'] = '' | |
| df['Sentiment'] = '' | |
| df['Impact'] = '' | |
| df['Reasoning'] = '' | |
| df['Event_Type'] = '' | |
| df['Event_Summary'] = '' | |
| # Validate required columns | |
| required_columns = ['Объект', 'Заголовок', 'Выдержки из текста'] | |
| missing_columns = [col for col in required_columns if col not in df.columns] | |
| if missing_columns: | |
| st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}") | |
| return None | |
| # Deduplication | |
| original_news_count = len(df) | |
| df = df.groupby('Объект', group_keys=False).apply( | |
| lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65) | |
| ).reset_index(drop=True) | |
| remaining_news_count = len(df) | |
| duplicates_removed = original_news_count - remaining_news_count | |
| st.write(f"Из {original_news_count} новостных сообщений удалены {duplicates_removed} дублирующих. Осталось {remaining_news_count}.") | |
| # Initialize progress tracking | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| # Process in batches | |
| batch_size = 5 | |
| for i in range(0, len(df), batch_size): | |
| batch_df = df.iloc[i:i+batch_size] | |
| for idx, row in batch_df.iterrows(): | |
| try: | |
| # Translation with Helsinki NLP | |
| translated_text = translator.translate_text(row['Выдержки из текста']) | |
| df.at[idx, 'Translated'] = translated_text | |
| # Sentiment analysis | |
| sentiment = analyze_sentiment(translated_text) | |
| df.at[idx, 'Sentiment'] = sentiment | |
| # Event detection | |
| event_type, event_summary = detect_events( | |
| llm, | |
| row['Выдержки из текста'], | |
| row['Объект'] | |
| ) | |
| df.at[idx, 'Event_Type'] = event_type | |
| df.at[idx, 'Event_Summary'] = event_summary | |
| if sentiment == "Negative": | |
| impact, reasoning = estimate_impact( | |
| llm, | |
| translated_text, | |
| row['Объект'] | |
| ) | |
| df.at[idx, 'Impact'] = impact | |
| df.at[idx, 'Reasoning'] = reasoning | |
| # Update progress | |
| progress = (idx + 1) / len(df) | |
| progress_bar.progress(progress) | |
| status_text.text(f"Проанализировано {idx + 1} из {len(df)} новостей") | |
| except Exception as e: | |
| if 'rate limit' in str(e).lower(): | |
| wait_time = 240 # 4 minutes wait for rate limit | |
| st.warning(f"Rate limit reached. Waiting {wait_time} seconds...") | |
| time.sleep(wait_time) | |
| continue | |
| st.warning(f"Ошибка при обработке новости {idx + 1}: {str(e)}") | |
| continue | |
| # Small delay between items | |
| time.sleep(0.5) | |
| # Delay between batches | |
| time.sleep(2) | |
| return df | |
| except Exception as e: | |
| st.error(f"❌ Ошибка при обработке файла: {str(e)}") | |
| return df if df is not None else None | |
| def translate_reasoning_to_russian(llm, text): | |
| template = """ | |
| Translate this English explanation to Russian, maintaining a formal business style: | |
| "{text}" | |
| Your response should contain only the Russian translation. | |
| """ | |
| prompt = PromptTemplate(template=template, input_variables=["text"]) | |
| chain = prompt | llm | RunnablePassthrough() | |
| response = chain.invoke({"text": text}) | |
| # Handle different response types | |
| if hasattr(response, 'content'): | |
| return response.content.strip() | |
| elif isinstance(response, str): | |
| return response.strip() | |
| else: | |
| return str(response).strip() | |
| def create_download_section(excel_data, pdf_data): | |
| st.markdown(""" | |
| <div class="download-container"> | |
| <div class="download-header">📥 Результаты анализа доступны для скачивания:</div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if excel_data is not None: | |
| st.download_button( | |
| label="📊 Скачать Excel отчет", | |
| data=excel_data, | |
| file_name="результат_анализа.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| key="excel_download" | |
| ) | |
| else: | |
| st.error("Ошибка при создании Excel файла") | |
| def display_sentiment_results(row, sentiment, impact=None, reasoning=None): | |
| if sentiment == "Negative": | |
| st.markdown(f""" | |
| <div style='color: red; font-weight: bold;'> | |
| Объект: {row['Объект']}<br> | |
| Новость: {row['Заголовок']}<br> | |
| Тональность: {sentiment}<br> | |
| {"Эффект: " + impact + "<br>" if impact else ""} | |
| {"Обоснование: " + reasoning + "<br>" if reasoning else ""} | |
| </div> | |
| """, unsafe_allow_html=True) | |
| elif sentiment == "Positive": | |
| st.markdown(f""" | |
| <div style='color: green; font-weight: bold;'> | |
| Объект: {row['Объект']}<br> | |
| Новость: {row['Заголовок']}<br> | |
| Тональность: {sentiment}<br> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| st.write(f"Объект: {row['Объект']}") | |
| st.write(f"Новость: {row['Заголовок']}") | |
| st.write(f"Тональность: {sentiment}") | |
| st.write("---") | |
| # Initialize sentiment analyzers | |
| finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert") | |
| roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment") | |
| finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone") | |
| def get_mapped_sentiment(result): | |
| label = result['label'].lower() | |
| if label in ["positive", "label_2", "pos", "pos_label"]: | |
| return "Positive" | |
| elif label in ["negative", "label_0", "neg", "neg_label"]: | |
| return "Negative" | |
| return "Neutral" | |
| def analyze_sentiment(text): | |
| finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0]) | |
| roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0]) | |
| finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0]) | |
| # Consider sentiment negative if any model says it's negative | |
| if any(result == "Negative" for result in [finbert_result, roberta_result, finbert_tone_result]): | |
| return "Negative" | |
| elif all(result == "Positive" for result in [finbert_result, roberta_result, finbert_tone_result]): | |
| return "Positive" | |
| return "Neutral" | |
| def analyze_sentiment(text): | |
| finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0]) | |
| roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0]) | |
| finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0]) | |
| # Count occurrences of each sentiment | |
| sentiments = [finbert_result, roberta_result, finbert_tone_result] | |
| sentiment_counts = {s: sentiments.count(s) for s in set(sentiments)} | |
| # Return sentiment if at least two models agree, otherwise return Neutral | |
| for sentiment, count in sentiment_counts.items(): | |
| if count >= 2: | |
| return sentiment | |
| return "Neutral" | |
| def detect_events(llm, text, entity): | |
| template = """ | |
| Проанализируйте следующую новость о компании "{entity}" и определите наличие следующих событий: | |
| 1. Публикация отчетности и ключевые показатели (выручка, прибыль, EBITDA) | |
| 2. События на рынке ценных бумаг (погашение облигаций, выплата/невыплата купона, дефолт, реструктуризация) | |
| 3. Судебные иски или юридические действия против компании, акционеров, менеджеров | |
| Новость: {text} | |
| Ответьте в следующем формате: | |
| Тип: ["Отчетность" или "РЦБ" или "Суд" или "Нет"] | |
| Краткое описание: [краткое описание события на русском языке, не более 2 предложений] | |
| """ | |
| prompt = PromptTemplate(template=template, input_variables=["entity", "text"]) | |
| chain = prompt | llm | |
| response = chain.invoke({"entity": entity, "text": text}) | |
| event_type = "Нет" | |
| summary = "" | |
| try: | |
| response_text = response.content if hasattr(response, 'content') else str(response) | |
| if "Тип:" in response_text and "Краткое описание:" in response_text: | |
| type_part, summary_part = response_text.split("Краткое описание:") | |
| event_type = type_part.split("Тип:")[1].strip() | |
| summary = summary_part.strip() | |
| except Exception as e: | |
| st.warning(f"Ошибка при анализе событий: {str(e)}") | |
| return event_type, summary | |
| def fuzzy_deduplicate(df, column, threshold=50): | |
| seen_texts = [] | |
| indices_to_keep = [] | |
| for i, text in enumerate(df[column]): | |
| if pd.isna(text): | |
| indices_to_keep.append(i) | |
| continue | |
| text = str(text) | |
| if not seen_texts or all(fuzz.ratio(text, seen) < threshold for seen in seen_texts): | |
| seen_texts.append(text) | |
| indices_to_keep.append(i) | |
| return df.iloc[indices_to_keep] | |
| def init_langchain_llm(model_choice): | |
| try: | |
| if model_choice == "Groq (llama-3.1-70b)": | |
| if 'groq_key' not in st.secrets: | |
| st.error("Groq API key not found in secrets. Please add it with the key 'groq_key'.") | |
| st.stop() | |
| return ChatOpenAI( | |
| base_url="https://api.groq.com/openai/v1", | |
| model="llama-3.1-70b-versatile", | |
| openai_api_key=st.secrets['groq_key'], | |
| temperature=0.0 | |
| ) | |
| elif model_choice == "ChatGPT-4-mini": | |
| if 'openai_key' not in st.secrets: | |
| st.error("OpenAI API key not found in secrets. Please add it with the key 'openai_key'.") | |
| st.stop() | |
| return ChatOpenAI( | |
| model="gpt-4", | |
| openai_api_key=st.secrets['openai_key'], | |
| temperature=0.0 | |
| ) | |
| else: # Qwen API | |
| if 'ali_key' not in st.secrets: | |
| st.error("DashScope API key not found in secrets. Please add it with the key 'dashscope_api_key'.") | |
| st.stop() | |
| # Using Qwen's API through DashScope | |
| return ChatOpenAI( | |
| base_url="https://dashscope.aliyuncs.com/api/v1", | |
| model="qwen-max", | |
| openai_api_key=st.secrets['ali_key'], | |
| temperature=0.0 | |
| ) | |
| except Exception as e: | |
| st.error(f"Error initializing the LLM: {str(e)}") | |
| st.stop() | |
| def estimate_impact(llm, news_text, entity): | |
| template = """ | |
| Analyze the following news piece about the entity "{entity}" and estimate its monetary impact in Russian rubles for this entity in the next 6 months. | |
| If precise monetary estimate is not possible, categorize the impact as one of the following: | |
| 1. "Значительный риск убытков" | |
| 2. "Умеренный риск убытков" | |
| 3. "Незначительный риск убытков" | |
| 4. "Вероятность прибыли" | |
| 5. "Неопределенный эффект" | |
| Provide brief reasoning (maximum 100 words). | |
| News: {news} | |
| Your response should be in the following format: | |
| Impact: [Your estimate or category] | |
| Reasoning: [Your reasoning] | |
| """ | |
| prompt = PromptTemplate(template=template, input_variables=["entity", "news"]) | |
| chain = prompt | llm | |
| response = chain.invoke({"entity": entity, "news": news_text}) | |
| impact = "Неопределенный эффект" | |
| reasoning = "Не удалось получить обоснование" | |
| # Extract content from response | |
| response_text = response.content if hasattr(response, 'content') else str(response) | |
| try: | |
| if "Impact:" in response_text and "Reasoning:" in response_text: | |
| impact_part, reasoning_part = response_text.split("Reasoning:") | |
| impact = impact_part.split("Impact:")[1].strip() | |
| reasoning = reasoning_part.strip() | |
| except Exception as e: | |
| st.error(f"Error parsing LLM response: {str(e)}") | |
| return impact, reasoning | |
| def format_elapsed_time(seconds): | |
| hours, remainder = divmod(int(seconds), 3600) | |
| minutes, seconds = divmod(remainder, 60) | |
| time_parts = [] | |
| if hours > 0: | |
| time_parts.append(f"{hours} час{'ов' if hours != 1 else ''}") | |
| if minutes > 0: | |
| time_parts.append(f"{minutes} минут{'' if minutes == 1 else 'ы' if 2 <= minutes <= 4 else ''}") | |
| if seconds > 0 or not time_parts: | |
| time_parts.append(f"{seconds} секунд{'а' if seconds == 1 else 'ы' if 2 <= seconds <= 4 else ''}") | |
| return " ".join(time_parts) | |
| def generate_sentiment_visualization(df): | |
| negative_df = df[df['Sentiment'] == 'Negative'] | |
| if negative_df.empty: | |
| st.warning("Не обнаружено негативных упоминаний. Отображаем общую статистику по объектам.") | |
| entity_counts = df['Объект'].value_counts() | |
| else: | |
| entity_counts = negative_df['Объект'].value_counts() | |
| if len(entity_counts) == 0: | |
| st.warning("Нет данных для визуализации.") | |
| return None | |
| fig, ax = plt.subplots(figsize=(12, max(6, len(entity_counts) * 0.5))) | |
| entity_counts.plot(kind='barh', ax=ax) | |
| ax.set_title('Количество негативных упоминаний по объектам') | |
| ax.set_xlabel('Количество упоминаний') | |
| plt.tight_layout() | |
| return fig | |
| def create_analysis_data(df): | |
| analysis_data = [] | |
| for _, row in df.iterrows(): | |
| if row['Sentiment'] == 'Negative': | |
| analysis_data.append([ | |
| row['Объект'], | |
| row['Заголовок'], | |
| 'РИСК УБЫТКА', | |
| row['Impact'], | |
| row['Reasoning'], | |
| row['Выдержки из текста'] | |
| ]) | |
| return pd.DataFrame(analysis_data, columns=[ | |
| 'Объект', | |
| 'Заголовок', | |
| 'Признак', | |
| 'Оценка влияния', | |
| 'Обоснование', | |
| 'Текст сообщения' | |
| ]) | |
| def create_output_file(df, uploaded_file, llm): | |
| wb = load_workbook("sample_file.xlsx") | |
| try: | |
| # Update 'Мониторинг' sheet with events | |
| ws = wb['Мониторинг'] | |
| row_idx = 4 | |
| for _, row in df.iterrows(): | |
| if row['Event_Type'] != 'Нет': | |
| ws.cell(row=row_idx, column=5, value=row['Объект']) # Column E | |
| ws.cell(row=row_idx, column=6, value=row['Заголовок']) # Column F | |
| ws.cell(row=row_idx, column=7, value=row['Event_Type']) # Column G | |
| ws.cell(row=row_idx, column=8, value=row['Event_Summary']) # Column H | |
| ws.cell(row=row_idx, column=9, value=row['Выдержки из текста']) # Column I | |
| row_idx += 1 | |
| # Sort entities by number of negative publications | |
| entity_stats = pd.DataFrame({ | |
| 'Объект': df['Объект'].unique(), | |
| 'Всего': df.groupby('Объект').size(), | |
| 'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size().fillna(0).astype(int), | |
| 'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size().fillna(0).astype(int) | |
| }).sort_values('Негативные', ascending=False) | |
| # Calculate most negative impact for each entity | |
| entity_impacts = {} | |
| for entity in df['Объект'].unique(): | |
| entity_df = df[df['Объект'] == entity] | |
| negative_impacts = entity_df[entity_df['Sentiment'] == 'Negative']['Impact'] | |
| entity_impacts[entity] = negative_impacts.iloc[0] if len(negative_impacts) > 0 else 'Неопределенный эффект' | |
| # Update 'Сводка' sheet | |
| ws = wb['Сводка'] | |
| for idx, (entity, row) in enumerate(entity_stats.iterrows(), start=4): | |
| ws.cell(row=idx, column=5, value=entity) # Column E | |
| ws.cell(row=idx, column=6, value=row['Всего']) # Column F | |
| ws.cell(row=idx, column=7, value=row['Негативные']) # Column G | |
| ws.cell(row=idx, column=8, value=row['Позитивные']) # Column H | |
| ws.cell(row=idx, column=9, value=entity_impacts[entity]) # Column I | |
| # Update 'Значимые' sheet | |
| ws = wb['Значимые'] | |
| row_idx = 3 | |
| for _, row in df.iterrows(): | |
| if row['Sentiment'] in ['Negative', 'Positive']: | |
| ws.cell(row=row_idx, column=3, value=row['Объект']) # Column C | |
| ws.cell(row=row_idx, column=4, value='релевантно') # Column D | |
| ws.cell(row=row_idx, column=5, value=row['Sentiment']) # Column E | |
| ws.cell(row=row_idx, column=6, value=row['Impact']) # Column F | |
| ws.cell(row=row_idx, column=7, value=row['Заголовок']) # Column G | |
| ws.cell(row=row_idx, column=8, value=row['Выдержки из текста']) # Column H | |
| row_idx += 1 | |
| # Copy 'Публикации' sheet | |
| original_df = pd.read_excel(uploaded_file, sheet_name='Публикации') | |
| ws = wb['Публикации'] | |
| for r_idx, row in enumerate(dataframe_to_rows(original_df, index=False, header=True), start=1): | |
| for c_idx, value in enumerate(row, start=1): | |
| ws.cell(row=r_idx, column=c_idx, value=value) | |
| # Update 'Анализ' sheet | |
| ws = wb['Анализ'] | |
| row_idx = 4 | |
| for _, row in df[df['Sentiment'] == 'Negative'].iterrows(): | |
| ws.cell(row=row_idx, column=5, value=row['Объект']) # Column E | |
| ws.cell(row=row_idx, column=6, value=row['Заголовок']) # Column F | |
| ws.cell(row=row_idx, column=7, value="Риск убытка") # Column G | |
| # Translate reasoning if it exists | |
| if pd.notna(row['Reasoning']): | |
| translated_reasoning = translate_reasoning_to_russian(llm, row['Reasoning']) | |
| ws.cell(row=row_idx, column=8, value=translated_reasoning) # Column H | |
| ws.cell(row=row_idx, column=9, value=row['Выдержки из текста']) # Column I | |
| row_idx += 1 | |
| # Update 'Тех.приложение' sheet | |
| tech_df = df[['Объект', 'Заголовок', 'Выдержки из текста', 'Translated', 'Sentiment', 'Impact', 'Reasoning']] | |
| if 'Тех.приложение' not in wb.sheetnames: | |
| wb.create_sheet('Тех.приложение') | |
| ws = wb['Тех.приложение'] | |
| for r_idx, row in enumerate(dataframe_to_rows(tech_df, index=False, header=True), start=1): | |
| for c_idx, value in enumerate(row, start=1): | |
| ws.cell(row=r_idx, column=c_idx, value=value) | |
| except Exception as e: | |
| st.warning(f"Ошибка при создании выходного файла: {str(e)}") | |
| output = io.BytesIO() | |
| wb.save(output) | |
| output.seek(0) | |
| return output | |
| def main(): | |
| with st.sidebar: | |
| st.title("::: AI-анализ мониторинга новостей (v.3.42 ):::") | |
| st.subheader("по материалам СКАН-ИНТЕРФАКС ") | |
| model_choice = st.radio( | |
| "Выберите модель для анализа:", | |
| ["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen-Max"], | |
| key="model_selector" | |
| ) | |
| st.markdown( | |
| """ | |
| Использованы технологии: | |
| - Анализ естественного языка с помощью предтренированных нейросетей **BERT**,<br/> | |
| - Дополнительная обработка при помощи больших языковых моделей (**LLM**),<br/> | |
| - объединенные при помощи фреймворка **LangChain**.<br> | |
| """, | |
| unsafe_allow_html=True) | |
| with st.expander("ℹ️ Инструкция"): | |
| st.markdown(""" | |
| 1. Выберите модель для анализа | |
| 2. Выберите метод перевода | |
| 3. Загрузите Excel файл с новостями | |
| 4. Дождитесь завершения анализа | |
| 5. Скачайте результаты анализа в формате Excel | |
| """, unsafe_allow_html=True) | |
| st.markdown( | |
| """ | |
| <style> | |
| .signature { | |
| position: fixed; | |
| right: 12px; | |
| up: 12px; | |
| font-size: 14px; | |
| color: #FF0000; | |
| opacity: 0.9; | |
| z-index: 999; | |
| } | |
| </style> | |
| <div class="signature">denis.pokrovsky.npff</div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.title("Анализ мониторинга новостей") | |
| if 'processed_df' not in st.session_state: | |
| st.session_state.processed_df = None | |
| # Single file uploader with unique key | |
| uploaded_file = st.sidebar.file_uploader("Выбирайте Excel-файл", type="xlsx", key="unique_file_uploader") | |
| if uploaded_file is not None and st.session_state.processed_df is None: | |
| st.session_state.processed_df = process_file( | |
| uploaded_file, | |
| model_choice, | |
| translation_method = 'auto' # This parameter won't affect the translation method but keeps the interface consistent | |
| ) | |
| st.subheader("Предпросмотр данных") | |
| preview_df = st.session_state.processed_df[['Объект', 'Заголовок', 'Sentiment', 'Impact']].head() | |
| st.dataframe(preview_df) | |
| # Add preview of Monitoring results | |
| st.subheader("Предпросмотр мониторинга событий и риск-факторов эмитентов") | |
| monitoring_df = st.session_state.processed_df[ | |
| (st.session_state.processed_df['Event_Type'] != 'Нет') & | |
| (st.session_state.processed_df['Event_Type'].notna()) | |
| ][['Объект', 'Заголовок', 'Event_Type', 'Event_Summary']].head() | |
| if len(monitoring_df) > 0: | |
| st.dataframe(monitoring_df) | |
| else: | |
| st.info("Не обнаружено значимых событий для мониторинга") | |
| analysis_df = create_analysis_data(st.session_state.processed_df) | |
| st.subheader("Анализ") | |
| st.dataframe(analysis_df) | |
| output = create_output_file(st.session_state.processed_df, uploaded_file, llm) | |
| end_time = time.time() | |
| elapsed_time = end_time - start_time | |
| formatted_time = format_elapsed_time(elapsed_time) | |
| st.success(f"Обработка и анализ завершены за {formatted_time}.") | |
| st.download_button( | |
| label="Скачать результат анализа", | |
| data=output, | |
| file_name="результат_анализа.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| if __name__ == "__main__": | |
| main() |