Spaces:

sigridveronica
/

ai-news-analyzer

Runtime error

App Files Files Community

Sigrid De los Santos commited on Jul 26

Commit

0858d17

1 Parent(s): be852d2

debugging

Browse files

Files changed (2) hide show

app.py +9 -28
src/main.py +33 -90

app.py CHANGED Viewed

@@ -4,24 +4,23 @@ import tempfile
 import streamlit as st
 import pandas as pd
-# Add 'src' to Python path so we can import main.py
 sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
 from main import run_pipeline
 st.set_page_config(page_title="📰 AI News Analyzer", layout="wide")
 st.title("🧠 AI-Powered Investing News Analyzer")
-# === API Key Input ===
 st.subheader("🔐 API Keys")
 openai_api_key = st.text_input("OpenAI API Key", type="password").strip()
 tavily_api_key = st.text_input("Tavily API Key", type="password").strip()
-# === Topic Input ===
 st.subheader("📈 Topics of Interest")
 topics_data = []
 with st.form("topics_form"):
     topic_count = st.number_input("How many topics?", min_value=1, max_value=10, value=1, step=1)
     for i in range(topic_count):
         col1, col2 = st.columns(2)
         with col1:
@@ -29,23 +28,20 @@ with st.form("topics_form"):
         with col2:
             days = st.number_input(f"Timespan (days)", min_value=1, max_value=30, value=7, key=f"days_{i}")
         topics_data.append({"topic": topic, "timespan_days": days})
     submitted = st.form_submit_button("Run Analysis")
-# === Tabs Setup ===
 tab_report, tab_articles, tab_insights = st.tabs(["📝 Report", "📋 Articles", "📊 Insights"])
 articles_df = pd.DataFrame()
 insights_df = pd.DataFrame()
 html_paths = []
-# === Submission logic ===
 if submitted:
     if not openai_api_key or not tavily_api_key or not all([td['topic'] for td in topics_data]):
         st.warning("Please fill in all fields.")
     else:
         os.environ["OPENAI_API_KEY"] = openai_api_key
         os.environ["TAVILY_API_KEY"] = tavily_api_key
         df = pd.DataFrame(topics_data)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_csv:
             df.to_csv(tmp_csv.name, index=False)
@@ -61,12 +57,10 @@ if submitted:
         try:
             spinner_box.markdown("⏳ Running analysis pipeline...")
             html_paths, articles_df, insights_df = run_pipeline(csv_path, tavily_api_key, progress_callback=log)
             spinner_box.success("✅ Analysis complete!")
-            # --- Report Tab ---
             with tab_report:
                 if html_paths:
                     for path in html_paths:
@@ -76,31 +70,18 @@ if submitted:
                 else:
                     st.error("❌ No reports were generated.")
-            # --- Articles Tab ---
             with tab_articles:
-                st.subheader("📋 Articles Table")
                 if not articles_df.empty:
-                    st.dataframe(articles_df, use_container_width=True)
-                    st.download_button(
-                        label="⬇️ Download Articles CSV",
-                        data=articles_df.to_csv(index=False).encode("utf-8"),
-                        file_name="articles.csv",
-                        mime="text/csv"
-                    )
                 else:
                     st.info("No articles available.")
-            # --- Insights Tab ---
             with tab_insights:
-                st.subheader("📊 Investment Insights")
                 if not insights_df.empty:
                     st.dataframe(insights_df, use_container_width=True)
-                    st.download_button(
-                        label="⬇️ Download Insights CSV",
-                        data=insights_df.to_csv(index=False).encode("utf-8"),
-                        file_name="insights.csv",
-                        mime="text/csv"
-                    )
                 else:
                     st.info("No insights available.")

 import streamlit as st
 import pandas as pd
+# Add 'src' to Python path
 sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
 from main import run_pipeline
 st.set_page_config(page_title="📰 AI News Analyzer", layout="wide")
 st.title("🧠 AI-Powered Investing News Analyzer")
+# --- API Keys ---
 st.subheader("🔐 API Keys")
 openai_api_key = st.text_input("OpenAI API Key", type="password").strip()
 tavily_api_key = st.text_input("Tavily API Key", type="password").strip()
+# --- Topics ---
 st.subheader("📈 Topics of Interest")
 topics_data = []
 with st.form("topics_form"):
     topic_count = st.number_input("How many topics?", min_value=1, max_value=10, value=1, step=1)
     for i in range(topic_count):
         col1, col2 = st.columns(2)
         with col1:
         with col2:
             days = st.number_input(f"Timespan (days)", min_value=1, max_value=30, value=7, key=f"days_{i}")
         topics_data.append({"topic": topic, "timespan_days": days})
     submitted = st.form_submit_button("Run Analysis")
+# --- Tabs ---
 tab_report, tab_articles, tab_insights = st.tabs(["📝 Report", "📋 Articles", "📊 Insights"])
 articles_df = pd.DataFrame()
 insights_df = pd.DataFrame()
 html_paths = []
 if submitted:
     if not openai_api_key or not tavily_api_key or not all([td['topic'] for td in topics_data]):
         st.warning("Please fill in all fields.")
     else:
         os.environ["OPENAI_API_KEY"] = openai_api_key
         os.environ["TAVILY_API_KEY"] = tavily_api_key
         df = pd.DataFrame(topics_data)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_csv:
             df.to_csv(tmp_csv.name, index=False)
         try:
             spinner_box.markdown("⏳ Running analysis pipeline...")
             html_paths, articles_df, insights_df = run_pipeline(csv_path, tavily_api_key, progress_callback=log)
             spinner_box.success("✅ Analysis complete!")
+            # Report Tab
             with tab_report:
                 if html_paths:
                     for path in html_paths:
                 else:
                     st.error("❌ No reports were generated.")
+            # Articles Tab
             with tab_articles:
                 if not articles_df.empty:
+                    st.dataframe(articles_df[["Title", "URL", "Summary", "Priority", "Date"]],
+                                 use_container_width=True)
                 else:
                     st.info("No articles available.")
+            # Insights Tab
             with tab_insights:
                 if not insights_df.empty:
                     st.dataframe(insights_df, use_container_width=True)
                 else:
                     st.info("No insights available.")

src/main.py CHANGED Viewed

@@ -9,27 +9,14 @@ from fin_interpreter import analyze_article
 BASE_DIR = os.path.dirname(os.path.dirname(__file__))
 DATA_DIR = os.path.join(BASE_DIR, "data")
 HTML_DIR = os.path.join(BASE_DIR, "html")
-CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(HTML_DIR, exist_ok=True)
 load_dotenv()
-def build_metrics_box(topic, num_articles):
-    now = datetime.now().strftime("%Y-%m-%d %H:%M")
-    return f"""
-> Topic: `{topic}`
-> Articles Collected: `{num_articles}`
-> Generated: `{now}`
->
-"""
 def derive_priority(sentiment, confidence):
-    """Basic logic to derive priority for articles."""
-    if sentiment == "Positive" and confidence > 0.75:
         return "High"
     elif sentiment == "Negative" and confidence > 0.6:
         return "High"
@@ -37,141 +24,97 @@ def derive_priority(sentiment, confidence):
         return "Medium"
     return "Low"
-def derive_signal(sentiment, confidence):
-    """Basic investment signal logic."""
-    if sentiment == "Positive" and confidence > 0.7:
-        return "Buy"
-    elif sentiment == "Negative":
-        return "Avoid"
-    else:
-        return "Watch"
 def run_value_investing_analysis(csv_path, progress_callback=None):
     current_df = pd.read_csv(csv_path)
-    new_md_files = []
     all_articles = []
     for _, row in current_df.iterrows():
         topic = row.get("topic")
         timespan = row.get("timespan_days", 7)
-        msg = f"🔍 Processing: {topic} ({timespan} days)"
-        print(msg)
         if progress_callback:
-            progress_callback(msg)
         news = fetch_deep_news(topic, timespan)
         if not news:
-            warning = f"⚠️ No news found for: {topic}"
-            if progress_callback:
-                progress_callback(warning)
             continue
-        # Process each article
         for article in news:
             summary = article.get("summary", "")
             title = article.get("title", "Untitled")
             url = article.get("url", "")
             date = article.get("date", datetime.now().strftime("%Y-%m-%d"))
-            company = article.get("company", topic)
             try:
-                res = analyze_article(summary)
-                if isinstance(res, dict):
-                    sentiment = res.get("sentiment", "Neutral")
-                    confidence = float(res.get("confidence", 0.0))
-                    signal = res.get("signal", "Watch")
-                else:
-                    sentiment, confidence, signal = res[0], float(res[1]), res[2]
-            except Exception:
-                sentiment, confidence, signal = "Neutral", 0.0, "Watch"
             priority = derive_priority(sentiment, confidence)
-            if signal == "None":
-                signal = derive_signal(sentiment, confidence)
             all_articles.append({
                 "Title": title,
                 "URL": url,
                 "Summary": summary,
                 "Priority": priority,
                 "Date": date,
-                "Company": company,
                 "Sentiment": sentiment,
-                "Confidence": round(confidence, 2),
-                "Signal": signal
             })
-        # Generate markdown report
         report_body = generate_value_investor_report(topic, news)
-        metrics_md = build_metrics_box(topic, len(news))
-        full_md = metrics_md + report_body
         filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
         filepath = os.path.join(DATA_DIR, filename)
-        counter = 1
-        while os.path.exists(filepath):
-            filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}_{counter}.md"
-            filepath = os.path.join(DATA_DIR, filename)
-            counter += 1
         with open(filepath, "w", encoding="utf-8") as f:
-            f.write(full_md)
-        new_md_files.append(filepath)
-    return new_md_files, all_articles
-def build_company_insights(articles_df):
-    if articles_df.empty:
         return pd.DataFrame()
     insights = []
-    for company, group in articles_df.groupby("Company"):
         mentions = len(group)
         dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral"
-        dominant_signal = group["Signal"].mode()[0] if not group["Signal"].mode().empty else "Watch"
         avg_confidence = round(group["Confidence"].mean(), 2)
-        risk_level = "High" if (dominant_sentiment == "Negative" and avg_confidence > 0.5) else "Low"
-        if dominant_sentiment == "Neutral":
-            risk_level = "Medium"
         highlights = " | ".join(group["Summary"].head(2).tolist())
         insights.append({
             "Company": company,
             "Mentions": mentions,
             "Sentiment": dominant_sentiment,
-            "Signal": dominant_signal,
-            "Risk": risk_level,
             "Confidence": avg_confidence,
             "Highlights": highlights
         })
     return pd.DataFrame(insights)
 def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
     os.environ["TAVILY_API_KEY"] = tavily_api_key
-    new_md_files, all_articles = run_value_investing_analysis(csv_path, progress_callback)
-    new_html_paths = []
-    for md_path in new_md_files:
-        convert_md_to_html(md_path, HTML_DIR)
-        html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
-        new_html_paths.append(html_path)
     articles_df = pd.DataFrame(all_articles)
-    insights_df = build_company_insights(articles_df)
-    return new_html_paths, articles_df, insights_df
-if __name__ == "__main__":
-    md_files, all_articles = run_value_investing_analysis(CSV_PATH)
-    for md in md_files:
-        convert_md_to_html(md, HTML_DIR)
-    print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
 # import os

 BASE_DIR = os.path.dirname(os.path.dirname(__file__))
 DATA_DIR = os.path.join(BASE_DIR, "data")
 HTML_DIR = os.path.join(BASE_DIR, "html")
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(HTML_DIR, exist_ok=True)
 load_dotenv()
 def derive_priority(sentiment, confidence):
+    if sentiment == "Positive" and confidence > 0.7:
         return "High"
     elif sentiment == "Negative" and confidence > 0.6:
         return "High"
         return "Medium"
     return "Low"
 def run_value_investing_analysis(csv_path, progress_callback=None):
     current_df = pd.read_csv(csv_path)
     all_articles = []
+    company_data = []
     for _, row in current_df.iterrows():
         topic = row.get("topic")
         timespan = row.get("timespan_days", 7)
         if progress_callback:
+            progress_callback(f"🔍 Processing: {topic} ({timespan} days)")
         news = fetch_deep_news(topic, timespan)
         if not news:
             continue
         for article in news:
             summary = article.get("summary", "")
             title = article.get("title", "Untitled")
             url = article.get("url", "")
             date = article.get("date", datetime.now().strftime("%Y-%m-%d"))
             try:
+                result = analyze_article(summary)
+                sentiment = result.get("sentiment", "Neutral")
+                confidence = float(result.get("confidence", 0.0))
+            except Exception as e:
+                print(f"[FinBERT ERROR] {e}")
+                sentiment, confidence = "Neutral", 0.0
             priority = derive_priority(sentiment, confidence)
+            # Add to articles_df
             all_articles.append({
                 "Title": title,
                 "URL": url,
                 "Summary": summary,
                 "Priority": priority,
                 "Date": date,
+            })
+            # Collect company-level data for insights
+            company_data.append({
+                "Company": topic,  # For now, use topic as company proxy
                 "Sentiment": sentiment,
+                "Confidence": confidence,
+                "Summary": summary,
             })
+        # Save markdown report
         report_body = generate_value_investor_report(topic, news)
         filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
         filepath = os.path.join(DATA_DIR, filename)
         with open(filepath, "w", encoding="utf-8") as f:
+            f.write(report_body)
+    return all_articles, company_data
+def build_company_insights(company_data):
+    if not company_data:
         return pd.DataFrame()
+    df = pd.DataFrame(company_data)
     insights = []
+    for company, group in df.groupby("Company"):
         mentions = len(group)
         dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral"
         avg_confidence = round(group["Confidence"].mean(), 2)
         highlights = " | ".join(group["Summary"].head(2).tolist())
         insights.append({
             "Company": company,
             "Mentions": mentions,
             "Sentiment": dominant_sentiment,
             "Confidence": avg_confidence,
             "Highlights": highlights
         })
     return pd.DataFrame(insights)
 def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
     os.environ["TAVILY_API_KEY"] = tavily_api_key
+    all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback)
+    # Convert markdown to HTML
+    html_paths = []
+    for md_file in os.listdir(DATA_DIR):
+        if md_file.endswith(".md"):
+            convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR)
+            html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html")))
     articles_df = pd.DataFrame(all_articles)
+    insights_df = build_company_insights(company_data)
+    return html_paths, articles_df, insights_df
 # import os