Spaces:
Runtime error
Runtime error
Sigrid De los Santos
commited on
Commit
Β·
be852d2
1
Parent(s):
e5c82b0
debugging
Browse files- app.py +47 -63
- src/main.py +145 -79
app.py
CHANGED
|
@@ -62,7 +62,6 @@ if submitted:
|
|
| 62 |
try:
|
| 63 |
spinner_box.markdown("β³ Running analysis pipeline...")
|
| 64 |
|
| 65 |
-
# Run the full pipeline
|
| 66 |
html_paths, articles_df, insights_df = run_pipeline(csv_path, tavily_api_key, progress_callback=log)
|
| 67 |
|
| 68 |
spinner_box.success("β
Analysis complete!")
|
|
@@ -79,15 +78,29 @@ if submitted:
|
|
| 79 |
|
| 80 |
# --- Articles Tab ---
|
| 81 |
with tab_articles:
|
|
|
|
| 82 |
if not articles_df.empty:
|
| 83 |
st.dataframe(articles_df, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
else:
|
| 85 |
st.info("No articles available.")
|
| 86 |
|
| 87 |
# --- Insights Tab ---
|
| 88 |
with tab_insights:
|
|
|
|
| 89 |
if not insights_df.empty:
|
| 90 |
st.dataframe(insights_df, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
else:
|
| 92 |
st.info("No insights available.")
|
| 93 |
|
|
@@ -95,16 +108,11 @@ if submitted:
|
|
| 95 |
spinner_box.error("β Failed.")
|
| 96 |
log_box.error(f"β Error: {e}")
|
| 97 |
|
| 98 |
-
|
| 99 |
# import os
|
| 100 |
# import sys
|
| 101 |
# import tempfile
|
| 102 |
-
# import time
|
| 103 |
-
# import itertools
|
| 104 |
# import streamlit as st
|
| 105 |
# import pandas as pd
|
| 106 |
-
# from threading import Thread
|
| 107 |
-
# from io import StringIO
|
| 108 |
|
| 109 |
# # Add 'src' to Python path so we can import main.py
|
| 110 |
# sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
|
@@ -121,7 +129,6 @@ if submitted:
|
|
| 121 |
# # === Topic Input ===
|
| 122 |
# st.subheader("π Topics of Interest")
|
| 123 |
# topics_data = []
|
| 124 |
-
|
| 125 |
# with st.form("topics_form"):
|
| 126 |
# topic_count = st.number_input("How many topics?", min_value=1, max_value=10, value=1, step=1)
|
| 127 |
|
|
@@ -135,6 +142,12 @@ if submitted:
|
|
| 135 |
|
| 136 |
# submitted = st.form_submit_button("Run Analysis")
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# # === Submission logic ===
|
| 139 |
# if submitted:
|
| 140 |
# if not openai_api_key or not tavily_api_key or not all([td['topic'] for td in topics_data]):
|
|
@@ -148,77 +161,48 @@ if submitted:
|
|
| 148 |
# df.to_csv(tmp_csv.name, index=False)
|
| 149 |
# csv_path = tmp_csv.name
|
| 150 |
|
| 151 |
-
#
|
| 152 |
-
#
|
| 153 |
-
# log_box = st.empty() # For logs
|
| 154 |
# logs = []
|
| 155 |
-
# rotating = True
|
| 156 |
|
| 157 |
# def log(msg):
|
| 158 |
# logs.append(msg)
|
| 159 |
# log_box.code("\n".join(logs))
|
| 160 |
|
| 161 |
-
# # === Rotating UI Messages ===
|
| 162 |
-
# def rotating_messages():
|
| 163 |
-
# messages = itertools.cycle([
|
| 164 |
-
# "π Searching financial news...",
|
| 165 |
-
# "π§ Running language models...",
|
| 166 |
-
# "π Analyzing investor sentiment...",
|
| 167 |
-
# "π Summarizing key takeaways...",
|
| 168 |
-
# "πΉ Building markdown reports..."
|
| 169 |
-
# ])
|
| 170 |
-
# while rotating:
|
| 171 |
-
# spinner_box.markdown(f"β³ {next(messages)}")
|
| 172 |
-
# time.sleep(1.5)
|
| 173 |
-
|
| 174 |
-
# rotator_thread = Thread(target=rotating_messages)
|
| 175 |
-
# rotator_thread.start()
|
| 176 |
-
|
| 177 |
# try:
|
| 178 |
-
#
|
| 179 |
-
# import openai
|
| 180 |
-
# openai.OpenAI(api_key=openai_api_key).models.list()
|
| 181 |
-
# log("β
OpenAI API key is valid.")
|
| 182 |
-
|
| 183 |
-
# import requests
|
| 184 |
-
# tavily_test = requests.post(
|
| 185 |
-
# "https://api.tavily.com/search",
|
| 186 |
-
# headers={"Authorization": f"Bearer {tavily_api_key}"},
|
| 187 |
-
# json={"query": "test", "days": 1, "max_results": 1}
|
| 188 |
-
# )
|
| 189 |
-
# if tavily_test.status_code == 200:
|
| 190 |
-
# log("β
Tavily API key is valid.")
|
| 191 |
-
# else:
|
| 192 |
-
# raise ValueError(f"Tavily error: {tavily_test.status_code} - {tavily_test.text}")
|
| 193 |
|
| 194 |
# # Run the full pipeline
|
| 195 |
-
#
|
| 196 |
-
# output_path = run_pipeline(csv_path, tavily_api_key, progress_callback=log)
|
| 197 |
|
| 198 |
-
# rotating = False
|
| 199 |
-
# rotator_thread.join()
|
| 200 |
# spinner_box.success("β
Analysis complete!")
|
| 201 |
|
| 202 |
-
#
|
| 203 |
-
#
|
| 204 |
-
#
|
| 205 |
-
#
|
| 206 |
-
#
|
| 207 |
-
#
|
| 208 |
-
|
| 209 |
-
# st.download_button(
|
| 210 |
-
# label=f"π₯ Download {filename}",
|
| 211 |
-
# data=html_content,
|
| 212 |
-
# file_name=filename,
|
| 213 |
-
# mime="text/html"
|
| 214 |
-
# )
|
| 215 |
# st.components.v1.html(html_content, height=600, scrolling=True)
|
| 216 |
-
#
|
| 217 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
# except Exception as e:
|
| 220 |
-
# rotating = False
|
| 221 |
-
# rotator_thread.join()
|
| 222 |
# spinner_box.error("β Failed.")
|
| 223 |
# log_box.error(f"β Error: {e}")
|
| 224 |
|
|
|
|
|
|
| 62 |
try:
|
| 63 |
spinner_box.markdown("β³ Running analysis pipeline...")
|
| 64 |
|
|
|
|
| 65 |
html_paths, articles_df, insights_df = run_pipeline(csv_path, tavily_api_key, progress_callback=log)
|
| 66 |
|
| 67 |
spinner_box.success("β
Analysis complete!")
|
|
|
|
| 78 |
|
| 79 |
# --- Articles Tab ---
|
| 80 |
with tab_articles:
|
| 81 |
+
st.subheader("π Articles Table")
|
| 82 |
if not articles_df.empty:
|
| 83 |
st.dataframe(articles_df, use_container_width=True)
|
| 84 |
+
st.download_button(
|
| 85 |
+
label="β¬οΈ Download Articles CSV",
|
| 86 |
+
data=articles_df.to_csv(index=False).encode("utf-8"),
|
| 87 |
+
file_name="articles.csv",
|
| 88 |
+
mime="text/csv"
|
| 89 |
+
)
|
| 90 |
else:
|
| 91 |
st.info("No articles available.")
|
| 92 |
|
| 93 |
# --- Insights Tab ---
|
| 94 |
with tab_insights:
|
| 95 |
+
st.subheader("π Investment Insights")
|
| 96 |
if not insights_df.empty:
|
| 97 |
st.dataframe(insights_df, use_container_width=True)
|
| 98 |
+
st.download_button(
|
| 99 |
+
label="β¬οΈ Download Insights CSV",
|
| 100 |
+
data=insights_df.to_csv(index=False).encode("utf-8"),
|
| 101 |
+
file_name="insights.csv",
|
| 102 |
+
mime="text/csv"
|
| 103 |
+
)
|
| 104 |
else:
|
| 105 |
st.info("No insights available.")
|
| 106 |
|
|
|
|
| 108 |
spinner_box.error("β Failed.")
|
| 109 |
log_box.error(f"β Error: {e}")
|
| 110 |
|
|
|
|
| 111 |
# import os
|
| 112 |
# import sys
|
| 113 |
# import tempfile
|
|
|
|
|
|
|
| 114 |
# import streamlit as st
|
| 115 |
# import pandas as pd
|
|
|
|
|
|
|
| 116 |
|
| 117 |
# # Add 'src' to Python path so we can import main.py
|
| 118 |
# sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
|
| 129 |
# # === Topic Input ===
|
| 130 |
# st.subheader("π Topics of Interest")
|
| 131 |
# topics_data = []
|
|
|
|
| 132 |
# with st.form("topics_form"):
|
| 133 |
# topic_count = st.number_input("How many topics?", min_value=1, max_value=10, value=1, step=1)
|
| 134 |
|
|
|
|
| 142 |
|
| 143 |
# submitted = st.form_submit_button("Run Analysis")
|
| 144 |
|
| 145 |
+
# # === Tabs Setup ===
|
| 146 |
+
# tab_report, tab_articles, tab_insights = st.tabs(["π Report", "π Articles", "π Insights"])
|
| 147 |
+
# articles_df = pd.DataFrame()
|
| 148 |
+
# insights_df = pd.DataFrame()
|
| 149 |
+
# html_paths = []
|
| 150 |
+
|
| 151 |
# # === Submission logic ===
|
| 152 |
# if submitted:
|
| 153 |
# if not openai_api_key or not tavily_api_key or not all([td['topic'] for td in topics_data]):
|
|
|
|
| 161 |
# df.to_csv(tmp_csv.name, index=False)
|
| 162 |
# csv_path = tmp_csv.name
|
| 163 |
|
| 164 |
+
# spinner_box = st.empty()
|
| 165 |
+
# log_box = st.empty()
|
|
|
|
| 166 |
# logs = []
|
|
|
|
| 167 |
|
| 168 |
# def log(msg):
|
| 169 |
# logs.append(msg)
|
| 170 |
# log_box.code("\n".join(logs))
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
# try:
|
| 173 |
+
# spinner_box.markdown("β³ Running analysis pipeline...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
# # Run the full pipeline
|
| 176 |
+
# html_paths, articles_df, insights_df = run_pipeline(csv_path, tavily_api_key, progress_callback=log)
|
|
|
|
| 177 |
|
|
|
|
|
|
|
| 178 |
# spinner_box.success("β
Analysis complete!")
|
| 179 |
|
| 180 |
+
# # --- Report Tab ---
|
| 181 |
+
# with tab_report:
|
| 182 |
+
# if html_paths:
|
| 183 |
+
# for path in html_paths:
|
| 184 |
+
# with open(path, 'r', encoding='utf-8') as f:
|
| 185 |
+
# html_content = f.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
# st.components.v1.html(html_content, height=600, scrolling=True)
|
| 187 |
+
# else:
|
| 188 |
+
# st.error("β No reports were generated.")
|
| 189 |
+
|
| 190 |
+
# # --- Articles Tab ---
|
| 191 |
+
# with tab_articles:
|
| 192 |
+
# if not articles_df.empty:
|
| 193 |
+
# st.dataframe(articles_df, use_container_width=True)
|
| 194 |
+
# else:
|
| 195 |
+
# st.info("No articles available.")
|
| 196 |
+
|
| 197 |
+
# # --- Insights Tab ---
|
| 198 |
+
# with tab_insights:
|
| 199 |
+
# if not insights_df.empty:
|
| 200 |
+
# st.dataframe(insights_df, use_container_width=True)
|
| 201 |
+
# else:
|
| 202 |
+
# st.info("No insights available.")
|
| 203 |
|
| 204 |
# except Exception as e:
|
|
|
|
|
|
|
| 205 |
# spinner_box.error("β Failed.")
|
| 206 |
# log_box.error(f"β Error: {e}")
|
| 207 |
|
| 208 |
+
|
src/main.py
CHANGED
|
@@ -2,11 +2,8 @@ import os
|
|
| 2 |
import pandas as pd
|
| 3 |
from datetime import datetime
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
-
import traceback
|
| 6 |
-
|
| 7 |
from md_html import convert_single_md_to_html as convert_md_to_html
|
| 8 |
from news_analysis import fetch_deep_news, generate_value_investor_report
|
| 9 |
-
from csv_utils import detect_changes
|
| 10 |
from fin_interpreter import analyze_article
|
| 11 |
|
| 12 |
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
|
@@ -30,30 +27,33 @@ def build_metrics_box(topic, num_articles):
|
|
| 30 |
"""
|
| 31 |
|
| 32 |
|
| 33 |
-
def
|
| 34 |
-
"""
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
else:
|
| 51 |
-
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
new_md_files = []
|
| 54 |
all_articles = []
|
| 55 |
|
| 56 |
-
for _, row in
|
| 57 |
topic = row.get("topic")
|
| 58 |
timespan = row.get("timespan_days", 7)
|
| 59 |
msg = f"π Processing: {topic} ({timespan} days)"
|
|
@@ -64,38 +64,46 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
|
|
| 64 |
news = fetch_deep_news(topic, timespan)
|
| 65 |
if not news:
|
| 66 |
warning = f"β οΈ No news found for: {topic}"
|
| 67 |
-
print(warning)
|
| 68 |
if progress_callback:
|
| 69 |
progress_callback(warning)
|
| 70 |
continue
|
| 71 |
|
| 72 |
-
#
|
| 73 |
for article in news:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
try:
|
| 75 |
-
res = analyze_article(
|
| 76 |
if isinstance(res, dict):
|
| 77 |
-
sentiment = res.get("sentiment")
|
| 78 |
-
confidence = res.get("confidence")
|
| 79 |
-
signal = res.get("signal")
|
| 80 |
else:
|
| 81 |
-
sentiment, confidence, signal = res[0], res[1], res[2]
|
| 82 |
-
except Exception
|
| 83 |
-
sentiment, confidence, signal = "
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
all_articles.append({
|
| 87 |
-
"Title":
|
| 88 |
-
"URL":
|
| 89 |
-
"Summary":
|
| 90 |
-
"Priority":
|
| 91 |
-
"Date":
|
| 92 |
-
"Company":
|
| 93 |
"Sentiment": sentiment,
|
| 94 |
-
"Confidence": confidence,
|
| 95 |
"Signal": signal
|
| 96 |
})
|
| 97 |
|
| 98 |
-
# Generate report
|
| 99 |
report_body = generate_value_investor_report(topic, news)
|
| 100 |
metrics_md = build_metrics_box(topic, len(news))
|
| 101 |
full_md = metrics_md + report_body
|
|
@@ -113,12 +121,37 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
|
|
| 113 |
|
| 114 |
new_md_files.append(filepath)
|
| 115 |
|
| 116 |
-
if progress_callback:
|
| 117 |
-
progress_callback(f"β
Markdown saved to: {DATA_DIR}")
|
| 118 |
-
current_df.to_csv(prev_path, index=False)
|
| 119 |
return new_md_files, all_articles
|
| 120 |
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
|
| 123 |
os.environ["TAVILY_API_KEY"] = tavily_api_key
|
| 124 |
|
|
@@ -134,39 +167,24 @@ def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
|
|
| 134 |
return new_html_paths, articles_df, insights_df
|
| 135 |
|
| 136 |
|
| 137 |
-
def build_company_insights(articles_df):
|
| 138 |
-
if articles_df.empty:
|
| 139 |
-
return pd.DataFrame()
|
| 140 |
-
grouped = (
|
| 141 |
-
articles_df.groupby("Company")
|
| 142 |
-
.agg({
|
| 143 |
-
"Title": "count",
|
| 144 |
-
"Sentiment": lambda x: x.mode()[0] if not x.mode().empty else "Neutral",
|
| 145 |
-
"Signal": lambda x: x.mode()[0] if not x.mode().empty else "Watch"
|
| 146 |
-
})
|
| 147 |
-
.reset_index()
|
| 148 |
-
.rename(columns={"Title": "Mentions"})
|
| 149 |
-
)
|
| 150 |
-
return grouped
|
| 151 |
-
|
| 152 |
-
|
| 153 |
if __name__ == "__main__":
|
| 154 |
-
md_files,
|
| 155 |
for md in md_files:
|
| 156 |
convert_md_to_html(md, HTML_DIR)
|
| 157 |
print(f"π All reports converted to HTML at: {HTML_DIR}")
|
| 158 |
|
| 159 |
-
|
| 160 |
-
# import
|
|
|
|
| 161 |
# from datetime import datetime
|
| 162 |
# from dotenv import load_dotenv
|
| 163 |
-
# import
|
| 164 |
|
| 165 |
# from md_html import convert_single_md_to_html as convert_md_to_html
|
| 166 |
# from news_analysis import fetch_deep_news, generate_value_investor_report
|
| 167 |
# from csv_utils import detect_changes
|
|
|
|
| 168 |
|
| 169 |
-
# # === Setup Paths ===
|
| 170 |
# BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
| 171 |
# DATA_DIR = os.path.join(BASE_DIR, "data")
|
| 172 |
# HTML_DIR = os.path.join(BASE_DIR, "html")
|
|
@@ -175,9 +193,9 @@ if __name__ == "__main__":
|
|
| 175 |
# os.makedirs(DATA_DIR, exist_ok=True)
|
| 176 |
# os.makedirs(HTML_DIR, exist_ok=True)
|
| 177 |
|
| 178 |
-
# # === Load .env ===
|
| 179 |
# load_dotenv()
|
| 180 |
|
|
|
|
| 181 |
# def build_metrics_box(topic, num_articles):
|
| 182 |
# now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 183 |
# return f"""
|
|
@@ -187,21 +205,29 @@ if __name__ == "__main__":
|
|
| 187 |
# >
|
| 188 |
# """
|
| 189 |
|
|
|
|
| 190 |
# def run_value_investing_analysis(csv_path, progress_callback=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
# current_df = pd.read_csv(csv_path)
|
| 192 |
# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
| 193 |
-
|
| 194 |
# if os.path.exists(prev_path):
|
| 195 |
# previous_df = pd.read_csv(prev_path)
|
| 196 |
# changed_df = detect_changes(current_df, previous_df)
|
| 197 |
# if changed_df.empty:
|
| 198 |
# if progress_callback:
|
| 199 |
# progress_callback("β
No changes detected. Skipping processing.")
|
| 200 |
-
# return []
|
| 201 |
# else:
|
| 202 |
# changed_df = current_df
|
| 203 |
|
| 204 |
# new_md_files = []
|
|
|
|
| 205 |
|
| 206 |
# for _, row in changed_df.iterrows():
|
| 207 |
# topic = row.get("topic")
|
|
@@ -219,20 +245,42 @@ if __name__ == "__main__":
|
|
| 219 |
# progress_callback(warning)
|
| 220 |
# continue
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
# report_body = generate_value_investor_report(topic, news)
|
| 223 |
-
# image_url = "https://via.placeholder.com/1281x721?text=No+Image+Available"
|
| 224 |
-
# image_credit = "Image placeholder"
|
| 225 |
-
|
| 226 |
# metrics_md = build_metrics_box(topic, len(news))
|
| 227 |
# full_md = metrics_md + report_body
|
| 228 |
|
| 229 |
-
#
|
| 230 |
-
# filename = base_filename + ".md"
|
| 231 |
# filepath = os.path.join(DATA_DIR, filename)
|
| 232 |
-
|
| 233 |
# counter = 1
|
| 234 |
# while os.path.exists(filepath):
|
| 235 |
-
# filename = f"{
|
| 236 |
# filepath = os.path.join(DATA_DIR, filename)
|
| 237 |
# counter += 1
|
| 238 |
|
|
@@ -244,25 +292,43 @@ if __name__ == "__main__":
|
|
| 244 |
# if progress_callback:
|
| 245 |
# progress_callback(f"β
Markdown saved to: {DATA_DIR}")
|
| 246 |
# current_df.to_csv(prev_path, index=False)
|
| 247 |
-
# return new_md_files
|
|
|
|
| 248 |
|
| 249 |
# def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
|
| 250 |
# os.environ["TAVILY_API_KEY"] = tavily_api_key
|
| 251 |
|
| 252 |
-
# new_md_files = run_value_investing_analysis(csv_path, progress_callback)
|
| 253 |
# new_html_paths = []
|
| 254 |
-
|
| 255 |
# for md_path in new_md_files:
|
| 256 |
# convert_md_to_html(md_path, HTML_DIR)
|
| 257 |
# html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
| 258 |
# new_html_paths.append(html_path)
|
| 259 |
|
| 260 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
# if __name__ == "__main__":
|
| 263 |
-
# md_files = run_value_investing_analysis(CSV_PATH)
|
| 264 |
# for md in md_files:
|
| 265 |
# convert_md_to_html(md, HTML_DIR)
|
| 266 |
# print(f"π All reports converted to HTML at: {HTML_DIR}")
|
| 267 |
|
| 268 |
-
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from datetime import datetime
|
| 4 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
| 5 |
from md_html import convert_single_md_to_html as convert_md_to_html
|
| 6 |
from news_analysis import fetch_deep_news, generate_value_investor_report
|
|
|
|
| 7 |
from fin_interpreter import analyze_article
|
| 8 |
|
| 9 |
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
|
|
|
| 27 |
"""
|
| 28 |
|
| 29 |
|
| 30 |
+
def derive_priority(sentiment, confidence):
|
| 31 |
+
"""Basic logic to derive priority for articles."""
|
| 32 |
+
if sentiment == "Positive" and confidence > 0.75:
|
| 33 |
+
return "High"
|
| 34 |
+
elif sentiment == "Negative" and confidence > 0.6:
|
| 35 |
+
return "High"
|
| 36 |
+
elif confidence > 0.5:
|
| 37 |
+
return "Medium"
|
| 38 |
+
return "Low"
|
| 39 |
|
| 40 |
+
|
| 41 |
+
def derive_signal(sentiment, confidence):
|
| 42 |
+
"""Basic investment signal logic."""
|
| 43 |
+
if sentiment == "Positive" and confidence > 0.7:
|
| 44 |
+
return "Buy"
|
| 45 |
+
elif sentiment == "Negative":
|
| 46 |
+
return "Avoid"
|
| 47 |
else:
|
| 48 |
+
return "Watch"
|
| 49 |
|
| 50 |
+
|
| 51 |
+
def run_value_investing_analysis(csv_path, progress_callback=None):
|
| 52 |
+
current_df = pd.read_csv(csv_path)
|
| 53 |
new_md_files = []
|
| 54 |
all_articles = []
|
| 55 |
|
| 56 |
+
for _, row in current_df.iterrows():
|
| 57 |
topic = row.get("topic")
|
| 58 |
timespan = row.get("timespan_days", 7)
|
| 59 |
msg = f"π Processing: {topic} ({timespan} days)"
|
|
|
|
| 64 |
news = fetch_deep_news(topic, timespan)
|
| 65 |
if not news:
|
| 66 |
warning = f"β οΈ No news found for: {topic}"
|
|
|
|
| 67 |
if progress_callback:
|
| 68 |
progress_callback(warning)
|
| 69 |
continue
|
| 70 |
|
| 71 |
+
# Process each article
|
| 72 |
for article in news:
|
| 73 |
+
summary = article.get("summary", "")
|
| 74 |
+
title = article.get("title", "Untitled")
|
| 75 |
+
url = article.get("url", "")
|
| 76 |
+
date = article.get("date", datetime.now().strftime("%Y-%m-%d"))
|
| 77 |
+
company = article.get("company", topic)
|
| 78 |
+
|
| 79 |
try:
|
| 80 |
+
res = analyze_article(summary)
|
| 81 |
if isinstance(res, dict):
|
| 82 |
+
sentiment = res.get("sentiment", "Neutral")
|
| 83 |
+
confidence = float(res.get("confidence", 0.0))
|
| 84 |
+
signal = res.get("signal", "Watch")
|
| 85 |
else:
|
| 86 |
+
sentiment, confidence, signal = res[0], float(res[1]), res[2]
|
| 87 |
+
except Exception:
|
| 88 |
+
sentiment, confidence, signal = "Neutral", 0.0, "Watch"
|
| 89 |
+
|
| 90 |
+
priority = derive_priority(sentiment, confidence)
|
| 91 |
+
if signal == "None":
|
| 92 |
+
signal = derive_signal(sentiment, confidence)
|
| 93 |
|
| 94 |
all_articles.append({
|
| 95 |
+
"Title": title,
|
| 96 |
+
"URL": url,
|
| 97 |
+
"Summary": summary,
|
| 98 |
+
"Priority": priority,
|
| 99 |
+
"Date": date,
|
| 100 |
+
"Company": company,
|
| 101 |
"Sentiment": sentiment,
|
| 102 |
+
"Confidence": round(confidence, 2),
|
| 103 |
"Signal": signal
|
| 104 |
})
|
| 105 |
|
| 106 |
+
# Generate markdown report
|
| 107 |
report_body = generate_value_investor_report(topic, news)
|
| 108 |
metrics_md = build_metrics_box(topic, len(news))
|
| 109 |
full_md = metrics_md + report_body
|
|
|
|
| 121 |
|
| 122 |
new_md_files.append(filepath)
|
| 123 |
|
|
|
|
|
|
|
|
|
|
| 124 |
return new_md_files, all_articles
|
| 125 |
|
| 126 |
|
| 127 |
+
def build_company_insights(articles_df):
|
| 128 |
+
if articles_df.empty:
|
| 129 |
+
return pd.DataFrame()
|
| 130 |
+
|
| 131 |
+
insights = []
|
| 132 |
+
for company, group in articles_df.groupby("Company"):
|
| 133 |
+
mentions = len(group)
|
| 134 |
+
dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral"
|
| 135 |
+
dominant_signal = group["Signal"].mode()[0] if not group["Signal"].mode().empty else "Watch"
|
| 136 |
+
avg_confidence = round(group["Confidence"].mean(), 2)
|
| 137 |
+
risk_level = "High" if (dominant_sentiment == "Negative" and avg_confidence > 0.5) else "Low"
|
| 138 |
+
if dominant_sentiment == "Neutral":
|
| 139 |
+
risk_level = "Medium"
|
| 140 |
+
|
| 141 |
+
highlights = " | ".join(group["Summary"].head(2).tolist())
|
| 142 |
+
insights.append({
|
| 143 |
+
"Company": company,
|
| 144 |
+
"Mentions": mentions,
|
| 145 |
+
"Sentiment": dominant_sentiment,
|
| 146 |
+
"Signal": dominant_signal,
|
| 147 |
+
"Risk": risk_level,
|
| 148 |
+
"Confidence": avg_confidence,
|
| 149 |
+
"Highlights": highlights
|
| 150 |
+
})
|
| 151 |
+
|
| 152 |
+
return pd.DataFrame(insights)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
|
| 156 |
os.environ["TAVILY_API_KEY"] = tavily_api_key
|
| 157 |
|
|
|
|
| 167 |
return new_html_paths, articles_df, insights_df
|
| 168 |
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
if __name__ == "__main__":
|
| 171 |
+
md_files, all_articles = run_value_investing_analysis(CSV_PATH)
|
| 172 |
for md in md_files:
|
| 173 |
convert_md_to_html(md, HTML_DIR)
|
| 174 |
print(f"π All reports converted to HTML at: {HTML_DIR}")
|
| 175 |
|
| 176 |
+
|
| 177 |
+
# import os
|
| 178 |
+
# import pandas as pd
|
| 179 |
# from datetime import datetime
|
| 180 |
# from dotenv import load_dotenv
|
| 181 |
+
# import traceback
|
| 182 |
|
| 183 |
# from md_html import convert_single_md_to_html as convert_md_to_html
|
| 184 |
# from news_analysis import fetch_deep_news, generate_value_investor_report
|
| 185 |
# from csv_utils import detect_changes
|
| 186 |
+
# from fin_interpreter import analyze_article
|
| 187 |
|
|
|
|
| 188 |
# BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
| 189 |
# DATA_DIR = os.path.join(BASE_DIR, "data")
|
| 190 |
# HTML_DIR = os.path.join(BASE_DIR, "html")
|
|
|
|
| 193 |
# os.makedirs(DATA_DIR, exist_ok=True)
|
| 194 |
# os.makedirs(HTML_DIR, exist_ok=True)
|
| 195 |
|
|
|
|
| 196 |
# load_dotenv()
|
| 197 |
|
| 198 |
+
|
| 199 |
# def build_metrics_box(topic, num_articles):
|
| 200 |
# now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 201 |
# return f"""
|
|
|
|
| 205 |
# >
|
| 206 |
# """
|
| 207 |
|
| 208 |
+
|
| 209 |
# def run_value_investing_analysis(csv_path, progress_callback=None):
|
| 210 |
+
# """
|
| 211 |
+
# Runs the analysis for all topics in the CSV.
|
| 212 |
+
# Returns:
|
| 213 |
+
# md_files (list of md file paths)
|
| 214 |
+
# all_articles (list of article dicts)
|
| 215 |
+
# """
|
| 216 |
# current_df = pd.read_csv(csv_path)
|
| 217 |
# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
| 218 |
+
|
| 219 |
# if os.path.exists(prev_path):
|
| 220 |
# previous_df = pd.read_csv(prev_path)
|
| 221 |
# changed_df = detect_changes(current_df, previous_df)
|
| 222 |
# if changed_df.empty:
|
| 223 |
# if progress_callback:
|
| 224 |
# progress_callback("β
No changes detected. Skipping processing.")
|
| 225 |
+
# return [], []
|
| 226 |
# else:
|
| 227 |
# changed_df = current_df
|
| 228 |
|
| 229 |
# new_md_files = []
|
| 230 |
+
# all_articles = []
|
| 231 |
|
| 232 |
# for _, row in changed_df.iterrows():
|
| 233 |
# topic = row.get("topic")
|
|
|
|
| 245 |
# progress_callback(warning)
|
| 246 |
# continue
|
| 247 |
|
| 248 |
+
# # Add articles to all_articles
|
| 249 |
+
# for article in news:
|
| 250 |
+
# try:
|
| 251 |
+
# res = analyze_article(article.get("summary", ""))
|
| 252 |
+
# if isinstance(res, dict):
|
| 253 |
+
# sentiment = res.get("sentiment")
|
| 254 |
+
# confidence = res.get("confidence")
|
| 255 |
+
# signal = res.get("signal")
|
| 256 |
+
# else:
|
| 257 |
+
# sentiment, confidence, signal = res[0], res[1], res[2]
|
| 258 |
+
# except Exception as e:
|
| 259 |
+
# sentiment, confidence, signal = "Unknown", 0.0, "None"
|
| 260 |
+
# print(f"Error analyzing article: {e}")
|
| 261 |
+
|
| 262 |
+
# all_articles.append({
|
| 263 |
+
# "Title": article.get("title"),
|
| 264 |
+
# "URL": article.get("url"),
|
| 265 |
+
# "Summary": article.get("summary"),
|
| 266 |
+
# "Priority": article.get("priority", "Low"),
|
| 267 |
+
# "Date": article.get("date"),
|
| 268 |
+
# "Company": article.get("company", topic),
|
| 269 |
+
# "Sentiment": sentiment,
|
| 270 |
+
# "Confidence": confidence,
|
| 271 |
+
# "Signal": signal
|
| 272 |
+
# })
|
| 273 |
+
|
| 274 |
+
# # Generate report
|
| 275 |
# report_body = generate_value_investor_report(topic, news)
|
|
|
|
|
|
|
|
|
|
| 276 |
# metrics_md = build_metrics_box(topic, len(news))
|
| 277 |
# full_md = metrics_md + report_body
|
| 278 |
|
| 279 |
+
# filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
|
|
|
|
| 280 |
# filepath = os.path.join(DATA_DIR, filename)
|
|
|
|
| 281 |
# counter = 1
|
| 282 |
# while os.path.exists(filepath):
|
| 283 |
+
# filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}_{counter}.md"
|
| 284 |
# filepath = os.path.join(DATA_DIR, filename)
|
| 285 |
# counter += 1
|
| 286 |
|
|
|
|
| 292 |
# if progress_callback:
|
| 293 |
# progress_callback(f"β
Markdown saved to: {DATA_DIR}")
|
| 294 |
# current_df.to_csv(prev_path, index=False)
|
| 295 |
+
# return new_md_files, all_articles
|
| 296 |
+
|
| 297 |
|
| 298 |
# def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
|
| 299 |
# os.environ["TAVILY_API_KEY"] = tavily_api_key
|
| 300 |
|
| 301 |
+
# new_md_files, all_articles = run_value_investing_analysis(csv_path, progress_callback)
|
| 302 |
# new_html_paths = []
|
|
|
|
| 303 |
# for md_path in new_md_files:
|
| 304 |
# convert_md_to_html(md_path, HTML_DIR)
|
| 305 |
# html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
| 306 |
# new_html_paths.append(html_path)
|
| 307 |
|
| 308 |
+
# articles_df = pd.DataFrame(all_articles)
|
| 309 |
+
# insights_df = build_company_insights(articles_df)
|
| 310 |
+
# return new_html_paths, articles_df, insights_df
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
# def build_company_insights(articles_df):
|
| 314 |
+
# if articles_df.empty:
|
| 315 |
+
# return pd.DataFrame()
|
| 316 |
+
# grouped = (
|
| 317 |
+
# articles_df.groupby("Company")
|
| 318 |
+
# .agg({
|
| 319 |
+
# "Title": "count",
|
| 320 |
+
# "Sentiment": lambda x: x.mode()[0] if not x.mode().empty else "Neutral",
|
| 321 |
+
# "Signal": lambda x: x.mode()[0] if not x.mode().empty else "Watch"
|
| 322 |
+
# })
|
| 323 |
+
# .reset_index()
|
| 324 |
+
# .rename(columns={"Title": "Mentions"})
|
| 325 |
+
# )
|
| 326 |
+
# return grouped
|
| 327 |
+
|
| 328 |
|
| 329 |
# if __name__ == "__main__":
|
| 330 |
+
# md_files, _ = run_value_investing_analysis(CSV_PATH)
|
| 331 |
# for md in md_files:
|
| 332 |
# convert_md_to_html(md, HTML_DIR)
|
| 333 |
# print(f"π All reports converted to HTML at: {HTML_DIR}")
|
| 334 |
|
|
|