Spaces:

sigridveronica
/

ai-news-analyzer

Running

File size: 9,039 Bytes

import os
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
import traceback

from md_html import convert_single_md_to_html as convert_md_to_html
from news_analysis import fetch_deep_news, generate_value_investor_report
from csv_utils import detect_changes
from fin_interpreter import analyze_article

BASE_DIR = os.path.dirname(os.path.dirname(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
HTML_DIR = os.path.join(BASE_DIR, "html")
CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(HTML_DIR, exist_ok=True)

load_dotenv()


def build_metrics_box(topic, num_articles):
    now = datetime.now().strftime("%Y-%m-%d %H:%M")
    return f"""
> Topic: `{topic}`
> Articles Collected: `{num_articles}`
> Generated: `{now}`
>
"""


def run_value_investing_analysis(csv_path, progress_callback=None):
    """
    Runs the analysis for all topics in the CSV.
    Returns:
        md_files (list of md file paths)
        all_articles (list of article dicts)
    """
    current_df = pd.read_csv(csv_path)
    prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")

    if os.path.exists(prev_path):
        previous_df = pd.read_csv(prev_path)
        changed_df = detect_changes(current_df, previous_df)
        if changed_df.empty:
            if progress_callback:
                progress_callback("✅ No changes detected. Skipping processing.")
            return [], []
    else:
        changed_df = current_df

    new_md_files = []
    all_articles = []

    for _, row in changed_df.iterrows():
        topic = row.get("topic")
        timespan = row.get("timespan_days", 7)
        msg = f"🔍 Processing: {topic} ({timespan} days)"
        print(msg)
        if progress_callback:
            progress_callback(msg)

        news = fetch_deep_news(topic, timespan)
        if not news:
            warning = f"⚠️ No news found for: {topic}"
            print(warning)
            if progress_callback:
                progress_callback(warning)
            continue

        # Add articles to all_articles
        for article in news:
            try:
                res = analyze_article(article.get("summary", ""))
                if isinstance(res, dict):
                    sentiment = res.get("sentiment")
                    confidence = res.get("confidence")
                    signal = res.get("signal")
                else:
                    sentiment, confidence, signal = res[0], res[1], res[2]
            except Exception as e:
                sentiment, confidence, signal = "Unknown", 0.0, "None"
                print(f"Error analyzing article: {e}")

            all_articles.append({
                "Title": article.get("title"),
                "URL": article.get("url"),
                "Summary": article.get("summary"),
                "Priority": article.get("priority", "Low"),
                "Date": article.get("date"),
                "Company": article.get("company", topic),
                "Sentiment": sentiment,
                "Confidence": confidence,
                "Signal": signal
            })

        # Generate report
        report_body = generate_value_investor_report(topic, news)
        metrics_md = build_metrics_box(topic, len(news))
        full_md = metrics_md + report_body

        filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
        filepath = os.path.join(DATA_DIR, filename)
        counter = 1
        while os.path.exists(filepath):
            filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}_{counter}.md"
            filepath = os.path.join(DATA_DIR, filename)
            counter += 1

        with open(filepath, "w", encoding="utf-8") as f:
            f.write(full_md)

        new_md_files.append(filepath)

    if progress_callback:
        progress_callback(f"✅ Markdown saved to: {DATA_DIR}")
    current_df.to_csv(prev_path, index=False)
    return new_md_files, all_articles


def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
    os.environ["TAVILY_API_KEY"] = tavily_api_key

    new_md_files, all_articles = run_value_investing_analysis(csv_path, progress_callback)
    new_html_paths = []
    for md_path in new_md_files:
        convert_md_to_html(md_path, HTML_DIR)
        html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
        new_html_paths.append(html_path)

    articles_df = pd.DataFrame(all_articles)
    insights_df = build_company_insights(articles_df)
    return new_html_paths, articles_df, insights_df


def build_company_insights(articles_df):
    if articles_df.empty:
        return pd.DataFrame()
    grouped = (
        articles_df.groupby("Company")
        .agg({
            "Title": "count",
            "Sentiment": lambda x: x.mode()[0] if not x.mode().empty else "Neutral",
            "Signal": lambda x: x.mode()[0] if not x.mode().empty else "Watch"
        })
        .reset_index()
        .rename(columns={"Title": "Mentions"})
    )
    return grouped


if __name__ == "__main__":
    md_files, _ = run_value_investing_analysis(CSV_PATH)
    for md in md_files:
        convert_md_to_html(md, HTML_DIR)
    print(f"🌐 All reports converted to HTML at: {HTML_DIR}")

#import os
# import sys
# from datetime import datetime
# from dotenv import load_dotenv
# import pandas as pd

# from md_html import convert_single_md_to_html as convert_md_to_html
# from news_analysis import fetch_deep_news, generate_value_investor_report
# from csv_utils import detect_changes

# # === Setup Paths ===
# BASE_DIR = os.path.dirname(os.path.dirname(__file__))
# DATA_DIR = os.path.join(BASE_DIR, "data")
# HTML_DIR = os.path.join(BASE_DIR, "html")
# CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")

# os.makedirs(DATA_DIR, exist_ok=True)
# os.makedirs(HTML_DIR, exist_ok=True)

# # === Load .env ===
# load_dotenv()

# def build_metrics_box(topic, num_articles):
#     now = datetime.now().strftime("%Y-%m-%d %H:%M")
#     return f"""
# > Topic: `{topic}`
# > Articles Collected: `{num_articles}`
# > Generated: `{now}`
# >
# """

# def run_value_investing_analysis(csv_path, progress_callback=None):
#     current_df = pd.read_csv(csv_path)
#     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
    
#     if os.path.exists(prev_path):
#         previous_df = pd.read_csv(prev_path)
#         changed_df = detect_changes(current_df, previous_df)
#         if changed_df.empty:
#             if progress_callback:
#                 progress_callback("✅ No changes detected. Skipping processing.")
#             return []
#     else:
#         changed_df = current_df

#     new_md_files = []

#     for _, row in changed_df.iterrows():
#         topic = row.get("topic")
#         timespan = row.get("timespan_days", 7)
#         msg = f"🔍 Processing: {topic} ({timespan} days)"
#         print(msg)
#         if progress_callback:
#             progress_callback(msg)

#         news = fetch_deep_news(topic, timespan)
#         if not news:
#             warning = f"⚠️ No news found for: {topic}"
#             print(warning)
#             if progress_callback:
#                 progress_callback(warning)
#             continue

#         report_body = generate_value_investor_report(topic, news)
#         image_url = "https://via.placeholder.com/1281x721?text=No+Image+Available"
#         image_credit = "Image placeholder"

#         metrics_md = build_metrics_box(topic, len(news))
#         full_md = metrics_md + report_body

#         base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
#         filename = base_filename + ".md"
#         filepath = os.path.join(DATA_DIR, filename)

#         counter = 1
#         while os.path.exists(filepath):
#             filename = f"{base_filename}_{counter}.md"
#             filepath = os.path.join(DATA_DIR, filename)
#             counter += 1

#         with open(filepath, "w", encoding="utf-8") as f:
#             f.write(full_md)

#         new_md_files.append(filepath)

#     if progress_callback:
#         progress_callback(f"✅ Markdown saved to: {DATA_DIR}")
#     current_df.to_csv(prev_path, index=False)
#     return new_md_files

# def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
#     os.environ["TAVILY_API_KEY"] = tavily_api_key

#     new_md_files = run_value_investing_analysis(csv_path, progress_callback)
#     new_html_paths = []

#     for md_path in new_md_files:
#         convert_md_to_html(md_path, HTML_DIR)
#         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
#         new_html_paths.append(html_path)

#     return new_html_paths

# if __name__ == "__main__":
#     md_files = run_value_investing_analysis(CSV_PATH)
#     for md in md_files:
#         convert_md_to_html(md, HTML_DIR)
#     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")