angxt commited on
Commit
6356568
·
verified ·
1 Parent(s): 7324412

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -401
app.py DELETED
@@ -1,401 +0,0 @@
1
- # app_final.py (final debugged version)
2
- import streamlit as st
3
- import requests
4
- import yfinance as yf
5
- import pandas as pd
6
- import numpy as np
7
- import os
8
- from datetime import datetime, timedelta
9
- import joblib
10
- import re
11
- import time
12
- import cloudpickle
13
-
14
- # ---------------------------- CONFIG ----------------------------
15
- HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
16
- CRYPTO_NEWS_API_KEY = "of9jvyylshwcddtw0qsv16zswpi8k39lbr67qm97"
17
- FRED_API_KEY = "4c3fd5be0b1f052f5d1d0080261277b1"
18
-
19
- FINBERT_API = "https://api-inference.huggingface.co/models/ProsusAI/finbert"
20
- HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
21
-
22
- TICKERS = {
23
- "bitcoin": "BTC-USD",
24
- "gold": "GC=F",
25
- "sp500": "^GSPC",
26
- "dxy": "DX-Y.NYB"
27
- }
28
-
29
- FRED_CODES = {
30
- "interest_rate": "FEDFUNDS",
31
- "inflation": "CPIAUCSL"
32
- }
33
- # Load model using cloudpickle
34
- with open("histgb_pca_model_clean.pkl", "rb") as f:
35
- model = cloudpickle.load(f)
36
-
37
- pca = joblib.load("pca.pkl")
38
- scaler = joblib.load("scaler.pkl")
39
-
40
- # ---------------------------- FUNCTIONS ----------------------------
41
- def fetch_news(source):
42
- url = f"https://cryptonews-api.com/api/v1/category"
43
- params = {
44
- "section": "general",
45
- "items": 10,
46
- "page": 1,
47
- "source": source,
48
- "token": CRYPTO_NEWS_API_KEY
49
- }
50
- r = requests.get(url, params=params)
51
- articles = r.json().get("data", [])
52
- texts = []
53
- for art in articles:
54
- summary = art.get("text") or art.get("content", "").split(".")[0]
55
- texts.append(summary)
56
- return texts
57
-
58
- def call_finbert(news_list):
59
- results_df = []
60
- news_list = news_list[:5]
61
- for idx, news in enumerate(news_list):
62
- if not isinstance(news, str) or not news.strip():
63
- results_df.append({"positive": 0.0, "neutral": 0.0, "negative": 0.0})
64
- continue
65
- payload = {"inputs": news}
66
- for attempt in range(5):
67
- try:
68
- response = requests.post(FINBERT_API, headers=HEADERS, json=payload, timeout=30)
69
- response.raise_for_status()
70
- output = response.json()
71
-
72
- # Get raw scores
73
- scores_raw = {item["label"].lower(): item["score"] for item in output[0]}
74
-
75
- # Ensure fixed column order
76
- aligned_scores = {
77
- "positive": scores_raw.get("positive", 0.0),
78
- "neutral": scores_raw.get("neutral", 0.0),
79
- "negative": scores_raw.get("negative", 0.0)
80
- }
81
-
82
- results_df.append(aligned_scores)
83
- break
84
- except requests.exceptions.RequestException as e:
85
- st.warning(f"⚠️ FinBERT error on article {idx+1}, attempt {attempt+1}/5: {e}")
86
- time.sleep(2)
87
- except Exception as ex:
88
- st.warning(f"❌ Failed to analyze article {idx+1}: {ex}")
89
- results_df.append({"positive": 0.0, "neutral": 0.0, "negative": 0.0})
90
- break
91
- return pd.DataFrame(results_df)
92
-
93
- def aggregate_sentiments(sentiment_df):
94
- scaled = sentiment_df.copy()
95
- for col in scaled.columns:
96
- scaled[col] = (scaled[col] - scaled[col].min()) / (scaled[col].max() - scaled[col].min() + 1e-8)
97
- weighted = scaled.copy()
98
- for col in ["positive", "negative"]:
99
- weighted[col] = np.where(scaled[col] > 0.75, scaled[col] * 1.5, scaled[col])
100
- weighted[col] = np.clip(weighted[col], 0, 1)
101
- weighted["neutral"] = scaled["neutral"]
102
- return weighted.mean().to_dict(), (scaled > 0.75).sum().to_dict()
103
-
104
- def fetch_yahoo_data(ticker, date):
105
- data = yf.Ticker(ticker).history(start=date, end=date + timedelta(days=1))
106
- if not data.empty:
107
- return {
108
- "open": round(data["Open"].iloc[0], 2),
109
- "high": round(data["High"].iloc[0], 2),
110
- "low": round(data["Low"].iloc[0], 2),
111
- "close": round(data["Close"].iloc[0], 2),
112
- "volume": int(data["Volume"].iloc[0]) if ticker != TICKERS["dxy"] else None,
113
- "change_pct": round(((data["Close"].iloc[0] - data["Open"].iloc[0]) / data["Open"].iloc[0]) * 100, 2)
114
- }
115
- else:
116
- st.warning(f"⚠️ No trading data for {ticker} on {date.strftime('%Y-%m-%d')}, using previous available data.")
117
- return fetch_yahoo_data(ticker, date - timedelta(days=1))
118
-
119
- def fetch_fred(code, month):
120
- url = f"https://api.stlouisfed.org/fred/series/observations"
121
- params = {
122
- "series_id": code,
123
- "observation_start": f"{month}-01",
124
- "api_key": FRED_API_KEY,
125
- "file_type": "json"
126
- }
127
- res = requests.get(url, params=params).json()
128
- try:
129
- return float(res["observations"][0]["value"])
130
- except:
131
- prev_month = (datetime.strptime(month, "%Y-%m") - timedelta(days=30)).strftime("%Y-%m")
132
- return fetch_fred(code, prev_month)
133
-
134
- def make_prediction(input_data):
135
- expected_cols = list(scaler.feature_names_in_)
136
-
137
- # SAFETY CHECK
138
- if len(input_data) != len(expected_cols):
139
- raise ValueError(f"❌ Input length mismatch! Got {len(input_data)}, expected {len(expected_cols)}")
140
-
141
- # Align input values to expected column order
142
- input_dict = dict(zip(expected_cols, input_data))
143
- input_df = pd.DataFrame([input_dict])[expected_cols]
144
-
145
- # DEBUG VIEW
146
- st.write("📄 Aligned Input DataFrame:")
147
- st.dataframe(input_df)
148
-
149
- # Transform
150
- x_scaled = scaler.transform(input_df)
151
- x_pca = pca.transform(x_scaled)
152
- proba = model.predict_proba(x_pca)[0][1]
153
- prediction = "Increase" if proba >= 0.62 else "Decrease"
154
- return prediction, round(proba, 4)
155
-
156
-
157
- import gspread
158
- from oauth2client.service_account import ServiceAccountCredentials
159
-
160
- def log_prediction(record):
161
- try:
162
- scope = ["https://spreadsheets.google.com/feeds",
163
- "https://www.googleapis.com/auth/drive"]
164
-
165
- creds = ServiceAccountCredentials.from_json_keyfile_name("creds.json", scope)
166
- client = gspread.authorize(creds)
167
-
168
- sheet = client.open("BTC Predictions Log").sheet1 # Must match your actual Google Sheet name
169
- sheet.append_row(list(record.values()))
170
- st.success("✅ Logged to Google Sheet successfully.")
171
- except Exception as e:
172
- st.warning(f"⚠️ Logging to Google Sheets failed: {e}")
173
-
174
- # ---------------------------- STREAMLIT UI ----------------------------
175
- st.set_page_config(page_title="Next Day Bitcoin Price Movement", layout="wide")
176
- st.title("🔮 Next Day Bitcoin Price Movement Predictor")
177
-
178
- date = st.date_input("Select a date", datetime.today() - timedelta(days=1))
179
- month = date.strftime("%Y-%m")
180
-
181
- if "news_loaded" not in st.session_state:
182
- st.session_state.news_loaded = False
183
-
184
- sentiment_features = []
185
- aggregated_display = {}
186
- news_by_source = {"CryptoNews": [], "CryptoPotato": []}
187
- edited_news_by_source = {}
188
-
189
- # ------------------------------------
190
- # STEP 1: FETCH NEWS + ENABLE EDITING
191
- # ------------------------------------
192
- if not st.session_state.news_loaded:
193
- if st.button("📥 Fetch News"):
194
- for src in ["CryptoNews", "CryptoPotato"]:
195
- try:
196
- news = fetch_news(src)
197
- news_by_source[src] = news
198
- st.session_state[src] = "\n\n".join(news) # store for text_area default
199
- except Exception as e:
200
- st.warning(f"⚠️ Could not fetch {src}: {e}")
201
- st.session_state[src] = ""
202
- st.session_state.news_loaded = True
203
- st.rerun()
204
-
205
- # ------------------------------------
206
- # STEP 2: SHOW TEXT BOXES + RUN PREDICTION
207
- # ------------------------------------
208
- if st.session_state.news_loaded:
209
- st.subheader("📝 Edit News Articles")
210
- for src in ["CryptoNews", "CryptoPotato"]:
211
- default_text = st.session_state.get(src, "")
212
- user_input = st.text_area(f"{src} Articles (5 max, one per paragraph)", default_text, height=300)
213
- edited_news_by_source[src] = [para.strip() for para in user_input.split("\n\n") if para.strip()]
214
-
215
- if st.button("🔮 Make Prediction"):
216
- for src in ["CryptoNews", "CryptoPotato"]:
217
- try:
218
- news_by_source[src] = edited_news_by_source[src]
219
- scores_df = call_finbert(news_by_source[src])
220
- st.write(f"📊 FinBERT Scores for {src}:", scores_df)
221
-
222
- weighted_avg, extreme_count = aggregate_sentiments(scores_df)
223
- total_articles = len(scores_df)
224
-
225
- pct_scores = {
226
- "positive_pct": extreme_count.get("positive", 0) / total_articles,
227
- "neutral_pct": extreme_count.get("neutral", 0) / total_articles,
228
- "negative_pct": extreme_count.get("negative", 0) / total_articles
229
- }
230
-
231
- sentiment_features.extend([
232
- weighted_avg["positive"],
233
- weighted_avg["neutral"],
234
- weighted_avg["negative"],
235
- pct_scores["positive_pct"],
236
- pct_scores["neutral_pct"],
237
- pct_scores["negative_pct"]
238
- ])
239
- except Exception as e:
240
- st.warning(f"⚠️ Failed for {src}: {e}")
241
- sentiment_features.extend([0.0] * 6)
242
- news_by_source[src] = []
243
-
244
- st.markdown("**Aggregated Sentiment**")
245
- st.write("🔎 News by Source:", news_by_source)
246
- sentiment_feature_labels = {
247
- "cryptonews_positive_weighted": sentiment_features[0],
248
- "cryptonews_neutral_weighted": sentiment_features[1],
249
- "cryptonews_negative_weighted": sentiment_features[2],
250
- "cryptonews_positive_pct": sentiment_features[3],
251
- "cryptonews_neutral_pct": sentiment_features[4],
252
- "cryptonews_negative_pct": sentiment_features[5],
253
- "cryptopotato_positive_weighted": sentiment_features[6],
254
- "cryptopotato_neutral_weighted": sentiment_features[7],
255
- "cryptopotato_negative_weighted": sentiment_features[8],
256
- "cryptopotato_positive_pct": sentiment_features[9],
257
- "cryptopotato_neutral_pct": sentiment_features[10],
258
- "cryptopotato_negative_pct": sentiment_features[11],
259
- }
260
- st.markdown("### 🧠 Sentiment Features by Source")
261
- st.json(sentiment_feature_labels)
262
-
263
- # Average across both sources
264
- if len(sentiment_features) == 12:
265
- aggregated_sentiments = [
266
- (sentiment_features[0] + sentiment_features[6]) / 2,
267
- (sentiment_features[1] + sentiment_features[7]) / 2,
268
- (sentiment_features[2] + sentiment_features[8]) / 2,
269
- (sentiment_features[3] + sentiment_features[9]) / 2,
270
- (sentiment_features[4] + sentiment_features[10]) / 2,
271
- (sentiment_features[5] + sentiment_features[11]) / 2
272
- ]
273
- elif len(sentiment_features) == 6:
274
- aggregated_sentiments = sentiment_features
275
- else:
276
- st.warning("⚠️ Sentiment features incomplete. Defaulting to 0s.")
277
- aggregated_sentiments = [0.0] * 6
278
-
279
- # Fetch BTC + macro data
280
- st.subheader("📈 Bitcoin Price Data")
281
- btc = fetch_yahoo_data(TICKERS["bitcoin"], date)
282
- st.json(btc)
283
-
284
- st.subheader("📊 Macroeconomic Indicators")
285
- macro = {}
286
- for k, t in TICKERS.items():
287
- if k != "bitcoin":
288
- try:
289
- macro[k] = fetch_yahoo_data(t, date)
290
- except Exception as e:
291
- st.warning(f"⚠️ Failed to fetch {k.upper()} data: {e}")
292
- macro[k] = {"open": 0, "high": 0, "low": 0, "close": 0, "volume": 0, "change_pct": 0}
293
- st.json(macro)
294
-
295
- st.subheader("🏩 Fed Indicators")
296
- fed = {
297
- "interest_rate": fetch_fred(FRED_CODES["interest_rate"], month),
298
- "inflation": fetch_fred(FRED_CODES["inflation"], month)
299
- }
300
- st.json(fed)
301
-
302
- # ========== BUILD FINAL INPUT DICT SAFELY ==========
303
- final_input_dict = {
304
- "S&P_500_Open": macro["sp500"].get("open", 0),
305
- "S&P_500_High": macro["sp500"].get("high", 0),
306
- "S&P_500_Low": macro["sp500"].get("low", 0),
307
- "S&P_500_Close": macro["sp500"].get("close", 0),
308
- "S&P_500_Volume": macro["sp500"].get("volume", 0),
309
- "S&P_500_%_Change": macro["sp500"].get("change_pct", 0),
310
-
311
- "Gold_Prices_Open": macro["gold"].get("open", 0),
312
- "Gold_Prices_High": macro["gold"].get("high", 0),
313
- "Gold_Prices_Low": macro["gold"].get("low", 0),
314
- "Gold_Prices_Close": macro["gold"].get("close", 0),
315
- "Gold_Prices_Volume": macro["gold"].get("volume", 0),
316
- "Gold_Prices_%_Change": macro["gold"].get("change_pct", 0),
317
-
318
- "US_Dollar_Index_DXY_Open": macro["dxy"].get("open", 0),
319
- "US_Dollar_Index_DXY_High": macro["dxy"].get("high", 0),
320
- "US_Dollar_Index_DXY_Low": macro["dxy"].get("low", 0),
321
- "US_Dollar_Index_DXY_Close": macro["dxy"].get("close", 0),
322
- "US_Dollar_Index_DXY_%_Change": macro["dxy"].get("change_pct", 0),
323
-
324
- "Federal_Reserve_Interest_Rates_FEDFUNDS": fed.get("interest_rate", 0),
325
- "Inflation_CPIAUCNS": fed.get("inflation", 0),
326
-
327
- "Open": btc.get("open", 0),
328
- "High": btc.get("high", 0),
329
- "Low": btc.get("low", 0),
330
- "Close": btc.get("close", 0),
331
- "Volume": btc.get("volume", 0),
332
- "Change %": btc.get("change_pct", 0),
333
-
334
- "positive_weighted": aggregated_sentiments[0],
335
- "neutral_weighted": aggregated_sentiments[1],
336
- "negative_weighted": aggregated_sentiments[2],
337
- "negative_pct": aggregated_sentiments[5],
338
- "neutral_pct": aggregated_sentiments[4],
339
- "positive_pct": aggregated_sentiments[3],
340
- }
341
-
342
- # ========== PREPARE & PREDICT ==========
343
- expected_cols = list(scaler.feature_names_in_)
344
- final_input = [final_input_dict[col] for col in expected_cols]
345
-
346
- if any(pd.isna(x) for x in final_input):
347
- st.error("❌ Missing or invalid input data. Please check news, market, or macro feeds.")
348
- else:
349
- # Prepare aligned input
350
- input_df = pd.DataFrame([final_input_dict])[expected_cols]
351
- x_scaled = scaler.transform(input_df)
352
- x_pca = pca.transform(x_scaled)
353
-
354
- # Model prediction
355
- proba = model.predict_proba(x_pca)[0][1]
356
- prediction = "Increase" if proba >= 0.62 else "Decrease"
357
-
358
- # PCA features table
359
- pca_df = pd.DataFrame(x_pca, columns=[f"PC{i+1}" for i in range(x_pca.shape[1])])
360
- st.markdown("### 🧬 PCA-Transformed Features")
361
- st.dataframe(pca_df.style.format("{:.4f}"))
362
-
363
- # Prediction display
364
- st.subheader("🔮 Prediction")
365
- if prediction == "Decrease":
366
- st.markdown(
367
- f"<div style='background-color:#fbeaea;color:#9e1c1c;padding:10px;border-radius:8px;'>"
368
- f"<b>Next Day BTC Price:</b> {prediction} (Prob: {proba:.2f})</div>",
369
- unsafe_allow_html=True
370
- )
371
- else:
372
- st.success(f"Next Day BTC Price: **{prediction}** (Prob: {proba:.2f})")
373
-
374
- # Log prediction
375
- log = {
376
- "fetch_date": datetime.today().strftime("%Y-%m-%d"),
377
- "btc_open": btc["open"],
378
- "btc_close": btc["close"],
379
- "sent_pos": aggregated_sentiments[0],
380
- "sent_neu": aggregated_sentiments[1],
381
- "sent_neg": aggregated_sentiments[2],
382
- "sent_pos_pct": aggregated_sentiments[3],
383
- "sent_neu_pct": aggregated_sentiments[4],
384
- "sent_neg_pct": aggregated_sentiments[5],
385
- "macro_gold": macro["gold"]["close"],
386
- "macro_sp500": macro["sp500"]["close"],
387
- "macro_dxy": macro["dxy"]["close"],
388
- "interest_rate": fed["interest_rate"],
389
- "inflation": fed["inflation"],
390
- "prediction": prediction,
391
- "prob": proba,
392
- "news_cryptonews": " || ".join(news_by_source["CryptoNews"]),
393
- "news_cryptopotato": " || ".join(news_by_source["CryptoPotato"])
394
- }
395
-
396
- log_prediction(log)
397
- st.success("✅ Logged to predictions_log.csv")
398
-
399
-
400
-
401
-