S6six commited on
Commit
9719f08
·
0 Parent(s):

Initial commit of stock sentiment analysis project

Browse files
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment variables
2
+ .env
3
+
4
+ # Jupyter Notebook checkpoints
5
+ .ipynb_checkpoints/
6
+
7
+ # Python cache
8
+ __pycache__/
9
+ *.pyc
10
+ *.pyo
11
+ *.pyd
12
+
13
+ # Data files (optional, depends if you commit data)
14
+ data/raw/*
15
+ data/processed/*
16
+ !.gitkeep
17
+
18
+ # Virtual environment
19
+ venv/
20
+ env/
21
+ .venv/
README.md ADDED
File without changes
data/processed ADDED
File without changes
data/raw ADDED
File without changes
notebooks/financial_sentiment_analysis.ipynb ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "8ccfe024",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Stock Sentiment Analysis\n",
9
+ "\n",
10
+ "This notebook performs sentiment analysis on news articles related to specific stocks and correlates it with stock price movements."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "id": "784f2635",
16
+ "metadata": {},
17
+ "source": [
18
+ "## 1. Setup and Imports\n",
19
+ "\n",
20
+ "Import necessary libraries and modules from our `src` directory."
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 17,
26
+ "id": "3038c1d8",
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "Setup complete.\n"
34
+ ]
35
+ }
36
+ ],
37
+ "source": [
38
+ "import pandas as pd\n",
39
+ "import sys\n",
40
+ "import os\n",
41
+ "from datetime import datetime, timedelta\n",
42
+ "import matplotlib.pyplot as plt\n",
43
+ "\n",
44
+ "# Add src directory to path to import modules\n",
45
+ "module_path = os.path.abspath(os.path.join('..'))\n",
46
+ "if module_path not in sys.path:\n",
47
+ " sys.path.append(module_path)\n",
48
+ "\n",
49
+ "from src.data_fetcher import get_stock_data, get_news_articles\n",
50
+ "\n",
51
+ "# Configure pandas display options\n",
52
+ "pd.set_option('display.max_rows', 100)\n",
53
+ "pd.set_option('display.max_columns', 50)\n",
54
+ "pd.set_option('display.width', 1000)\n",
55
+ "\n",
56
+ "print(\"Setup complete.\")"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "markdown",
61
+ "id": "4ed65790",
62
+ "metadata": {},
63
+ "source": [
64
+ "## 2. Define Parameters\n",
65
+ "\n",
66
+ "Set the stock ticker and date range for analysis."
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 18,
72
+ "id": "d0bb6ca4",
73
+ "metadata": {},
74
+ "outputs": [
75
+ {
76
+ "name": "stdout",
77
+ "output_type": "stream",
78
+ "text": [
79
+ "Ticker: AAPL\n",
80
+ "Start Date: 2025-03-31\n",
81
+ "End Date: 2025-04-30\n"
82
+ ]
83
+ }
84
+ ],
85
+ "source": [
86
+ "TICKER = 'AAPL' # Example: Apple Inc.\n",
87
+ "END_DATE = datetime.now().strftime('%Y-%m-%d')\n",
88
+ "# Fetch data for the last 30 days (adjust as needed)\n",
89
+ "# Note: NewsAPI free tier limits searches to the past month\n",
90
+ "START_DATE = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') \n",
91
+ "\n",
92
+ "print(f\"Ticker: {TICKER}\")\n",
93
+ "print(f\"Start Date: {START_DATE}\")\n",
94
+ "print(f\"End Date: {END_DATE}\")"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "markdown",
99
+ "id": "902753f9",
100
+ "metadata": {},
101
+ "source": [
102
+ "## 3. Fetch Data\n",
103
+ "\n",
104
+ "Use the functions from `data_fetcher.py` to get stock prices and news articles."
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 19,
110
+ "id": "0d28dcf3",
111
+ "metadata": {},
112
+ "outputs": [
113
+ {
114
+ "name": "stdout",
115
+ "output_type": "stream",
116
+ "text": [
117
+ "Fetching stock data...\n",
118
+ "Successfully fetched 21 days of stock data.\n"
119
+ ]
120
+ },
121
+ {
122
+ "data": {
123
+ "text/html": [
124
+ "<div>\n",
125
+ "<style scoped>\n",
126
+ " .dataframe tbody tr th:only-of-type {\n",
127
+ " vertical-align: middle;\n",
128
+ " }\n",
129
+ "\n",
130
+ " .dataframe tbody tr th {\n",
131
+ " vertical-align: top;\n",
132
+ " }\n",
133
+ "\n",
134
+ " .dataframe thead th {\n",
135
+ " text-align: right;\n",
136
+ " }\n",
137
+ "</style>\n",
138
+ "<table border=\"1\" class=\"dataframe\">\n",
139
+ " <thead>\n",
140
+ " <tr style=\"text-align: right;\">\n",
141
+ " <th></th>\n",
142
+ " <th>Date</th>\n",
143
+ " <th>Open</th>\n",
144
+ " <th>High</th>\n",
145
+ " <th>Low</th>\n",
146
+ " <th>Close</th>\n",
147
+ " <th>Volume</th>\n",
148
+ " <th>Dividends</th>\n",
149
+ " <th>Stock Splits</th>\n",
150
+ " </tr>\n",
151
+ " </thead>\n",
152
+ " <tbody>\n",
153
+ " <tr>\n",
154
+ " <th>0</th>\n",
155
+ " <td>2025-03-31</td>\n",
156
+ " <td>217.009995</td>\n",
157
+ " <td>225.619995</td>\n",
158
+ " <td>216.229996</td>\n",
159
+ " <td>222.130005</td>\n",
160
+ " <td>65299300</td>\n",
161
+ " <td>0.0</td>\n",
162
+ " <td>0.0</td>\n",
163
+ " </tr>\n",
164
+ " <tr>\n",
165
+ " <th>1</th>\n",
166
+ " <td>2025-04-01</td>\n",
167
+ " <td>219.809998</td>\n",
168
+ " <td>223.679993</td>\n",
169
+ " <td>218.899994</td>\n",
170
+ " <td>223.190002</td>\n",
171
+ " <td>36412700</td>\n",
172
+ " <td>0.0</td>\n",
173
+ " <td>0.0</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>2</th>\n",
177
+ " <td>2025-04-02</td>\n",
178
+ " <td>221.320007</td>\n",
179
+ " <td>225.190002</td>\n",
180
+ " <td>221.020004</td>\n",
181
+ " <td>223.889999</td>\n",
182
+ " <td>35905900</td>\n",
183
+ " <td>0.0</td>\n",
184
+ " <td>0.0</td>\n",
185
+ " </tr>\n",
186
+ " <tr>\n",
187
+ " <th>3</th>\n",
188
+ " <td>2025-04-03</td>\n",
189
+ " <td>205.539993</td>\n",
190
+ " <td>207.490005</td>\n",
191
+ " <td>201.250000</td>\n",
192
+ " <td>203.190002</td>\n",
193
+ " <td>103419000</td>\n",
194
+ " <td>0.0</td>\n",
195
+ " <td>0.0</td>\n",
196
+ " </tr>\n",
197
+ " <tr>\n",
198
+ " <th>4</th>\n",
199
+ " <td>2025-04-04</td>\n",
200
+ " <td>193.889999</td>\n",
201
+ " <td>199.880005</td>\n",
202
+ " <td>187.339996</td>\n",
203
+ " <td>188.380005</td>\n",
204
+ " <td>125910900</td>\n",
205
+ " <td>0.0</td>\n",
206
+ " <td>0.0</td>\n",
207
+ " </tr>\n",
208
+ " </tbody>\n",
209
+ "</table>\n",
210
+ "</div>"
211
+ ],
212
+ "text/plain": [
213
+ " Date Open High Low Close Volume Dividends Stock Splits\n",
214
+ "0 2025-03-31 217.009995 225.619995 216.229996 222.130005 65299300 0.0 0.0\n",
215
+ "1 2025-04-01 219.809998 223.679993 218.899994 223.190002 36412700 0.0 0.0\n",
216
+ "2 2025-04-02 221.320007 225.190002 221.020004 223.889999 35905900 0.0 0.0\n",
217
+ "3 2025-04-03 205.539993 207.490005 201.250000 203.190002 103419000 0.0 0.0\n",
218
+ "4 2025-04-04 193.889999 199.880005 187.339996 188.380005 125910900 0.0 0.0"
219
+ ]
220
+ },
221
+ "metadata": {},
222
+ "output_type": "display_data"
223
+ }
224
+ ],
225
+ "source": [
226
+ "# Fetch Stock Data\n",
227
+ "print(\"Fetching stock data...\")\n",
228
+ "stock_df = get_stock_data(TICKER, START_DATE, END_DATE)\n",
229
+ "\n",
230
+ "if stock_df is not None:\n",
231
+ " print(f\"Successfully fetched {len(stock_df)} days of stock data.\")\n",
232
+ " display(stock_df.head())\n",
233
+ "else:\n",
234
+ " print(\"Failed to fetch stock data.\")"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": null,
240
+ "id": "45b2014d",
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "name": "stdout",
245
+ "output_type": "stream",
246
+ "text": [
247
+ "Fetching news articles...\n",
248
+ "Found 853 articles for 'AAPL'\n"
249
+ ]
250
+ },
251
+ {
252
+ "ename": "AttributeError",
253
+ "evalue": "'list' object has no attribute 'empty'",
254
+ "output_type": "error",
255
+ "traceback": [
256
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
257
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
258
+ "Cell \u001b[1;32mIn[20], line 4\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFetching news articles...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 3\u001b[0m news_df \u001b[38;5;241m=\u001b[39m get_news_articles(TICKER, START_DATE, END_DATE)\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m news_df \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mnews_df\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mempty\u001b[49m:\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccessfully fetched \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(news_df)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m news articles.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 6\u001b[0m display(news_df\u001b[38;5;241m.\u001b[39mhead())\n",
259
+ "\u001b[1;31mAttributeError\u001b[0m: 'list' object has no attribute 'empty'"
260
+ ]
261
+ }
262
+ ],
263
+ "source": [
264
+ "# Fetch News Articles\n",
265
+ "print(\"Fetching news articles...\")\n",
266
+ "articles_list = get_news_articles(TICKER, START_DATE, END_DATE)\n",
267
+ "\n",
268
+ "# Convert the list of articles to a DataFrame\n",
269
+ "if articles_list is not None:\n",
270
+ " news_df = pd.DataFrame(articles_list)\n",
271
+ " # Convert publishedAt to datetime and extract date\n",
272
+ " if 'publishedAt' in news_df.columns:\n",
273
+ " news_df['publishedAt'] = pd.to_datetime(news_df['publishedAt'])\n",
274
+ " news_df['date'] = news_df['publishedAt'].dt.date\n",
275
+ " else:\n",
276
+ " news_df['date'] = None # Handle case where publishedAt might be missing\n",
277
+ "else:\n",
278
+ " news_df = pd.DataFrame() # Create an empty DataFrame if fetching failed\n",
279
+ "\n",
280
+ "# Now check the DataFrame\n",
281
+ "if not news_df.empty:\n",
282
+ " print(f\"Successfully fetched and converted {len(news_df)} news articles to DataFrame.\")\n",
283
+ " display(news_df[['date', 'title', 'description', 'source']].head()) # Display relevant columns\n",
284
+ "else:\n",
285
+ " print(\"No news articles found or failed to create DataFrame.\")"
286
+ ]
287
+ },
288
+ {
289
+ "cell_type": "markdown",
290
+ "id": "060f293c",
291
+ "metadata": {},
292
+ "source": [
293
+ "## 4. Sentiment Analysis\n",
294
+ "\n",
295
+ "Apply sentiment analysis to the fetched news articles."
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": null,
301
+ "id": "23508f73",
302
+ "metadata": {},
303
+ "outputs": [
304
+ {
305
+ "name": "stdout",
306
+ "output_type": "stream",
307
+ "text": [
308
+ "Skipping sentiment analysis as no news articles were successfully fetched or the DataFrame is empty.\n"
309
+ ]
310
+ }
311
+ ],
312
+ "source": [
313
+ "from src.sentiment_analyzer import analyze_sentiment\n",
314
+ "# Check if news_df exists and is not empty\n",
315
+ "if 'news_df' in locals() and not news_df.empty:\n",
316
+ " print(f\"Performing sentiment analysis on {len(news_df)} articles...\")\n",
317
+ " # Combine title and description for better context (handle None values)\n",
318
+ " news_df['text_to_analyze'] = news_df['title'].fillna('') + \". \" + news_df['description'].fillna('')\n",
319
+ " # Apply the sentiment analysis function\n",
320
+ " # This might take a while depending on the number of articles and your hardware\n",
321
+ " sentiment_results = news_df['text_to_analyze'].apply(lambda x: analyze_sentiment(x) if pd.notna(x) else (None, None, None))\n",
322
+ " # Unpack results into separate columns\n",
323
+ " news_df['sentiment_label'] = sentiment_results.apply(lambda x: x[0])\n",
324
+ " news_df['sentiment_score'] = sentiment_results.apply(lambda x: x[1])\n",
325
+ " news_df['sentiment_scores_all'] = sentiment_results.apply(lambda x: x[2])\n",
326
+ " # Display the results\n",
327
+ " print(\"Sentiment analysis complete.\")\n",
328
+ " display(news_df[['date', 'title', 'sentiment_label', 'sentiment_score']].head())\n",
329
+ " # Display value counts for sentiment labels\n",
330
+ " print(\"\\nSentiment Label Distribution:\")\n",
331
+ " print(news_df['sentiment_label'].value_counts())\n",
332
+ "else:\n",
333
+ " print(\"Skipping sentiment analysis as no news articles were successfully fetched or the DataFrame is empty.\")"
334
+ ]
335
+ }
336
+ ],
337
+ "metadata": {
338
+ "kernelspec": {
339
+ "display_name": ".venv",
340
+ "language": "python",
341
+ "name": "python3"
342
+ },
343
+ "language_info": {
344
+ "codemirror_mode": {
345
+ "name": "ipython",
346
+ "version": 3
347
+ },
348
+ "file_extension": ".py",
349
+ "mimetype": "text/x-python",
350
+ "name": "python",
351
+ "nbconvert_exporter": "python",
352
+ "pygments_lexer": "ipython3",
353
+ "version": "3.10.6"
354
+ }
355
+ },
356
+ "nbformat": 4,
357
+ "nbformat_minor": 5
358
+ }
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ yfinance
3
+ newsapi-python
4
+ jupyter
5
+ torch
6
+ transformers
7
+ scikit-learn
8
+ matplotlib
9
+ nltk
10
+ python-dotenv
src/data_fetcher.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yfinance as yf
2
+ import pandas as pd
3
+ from newsapi import NewsApiClient
4
+ import os
5
+ from dotenv import load_dotenv
6
+ from datetime import datetime, timedelta
7
+
8
+ def load_api_keys():
9
+ """Loads API keys from the .env file."""
10
+ load_dotenv()
11
+ news_api_key = os.getenv("NEWS_API_KEY")
12
+ alpha_vantage_key = os.getenv("ALPHA_VANTAGE_KEY") # Add ALPHA_VANTAGE_KEY=YOUR_KEY to .env if using
13
+ if not news_api_key:
14
+ print("Warning: NEWS_API_KEY not found in .env file.")
15
+ # Add similar check for alpha_vantage_key if you plan to use it
16
+ return news_api_key, alpha_vantage_key
17
+
18
+ def get_stock_data(ticker, start_date, end_date):
19
+ """
20
+ Fetches historical stock data for a given ticker symbol.
21
+
22
+ Args:
23
+ ticker (str): The stock ticker symbol (e.g., 'AAPL').
24
+ start_date (str): Start date in 'YYYY-MM-DD' format.
25
+ end_date (str): End date in 'YYYY-MM-DD' format.
26
+
27
+ Returns:
28
+ pandas.DataFrame: DataFrame containing historical stock data, or None if an error occurs.
29
+ """
30
+ try:
31
+ stock = yf.Ticker(ticker)
32
+ hist = stock.history(start=start_date, end=end_date)
33
+ if hist.empty:
34
+ print(f"No data found for {ticker} between {start_date} and {end_date}.")
35
+ return None
36
+ hist.reset_index(inplace=True) # Make Date a column
37
+ hist['Date'] = pd.to_datetime(hist['Date']).dt.date # Keep only the date part
38
+ return hist
39
+ except Exception as e:
40
+ print(f"Error fetching stock data for {ticker}: {e}")
41
+ return None
42
+
43
+ def get_news_articles(query, from_date, to_date, language='en', sort_by='relevancy', page_size=100):
44
+ """
45
+ Fetches news articles related to a query within a date range using NewsAPI.
46
+
47
+ Args:
48
+ query (str): The search query (e.g., 'Apple stock').
49
+ from_date (str): Start date in 'YYYY-MM-DD' format.
50
+ to_date (str): End date in 'YYYY-MM-DD' format.
51
+ language (str): Language of the articles (default: 'en').
52
+ sort_by (str): Sorting criteria (default: 'relevancy'). Options: 'relevancy', 'popularity', 'publishedAt'.
53
+ page_size (int): Number of results per page (max 100 for developer plan).
54
+
55
+ Returns:
56
+ list: A list of dictionaries, where each dictionary represents an article, or None if an error occurs.
57
+ Returns an empty list if no articles are found.
58
+ """
59
+ print(f"Attempting to fetch news with query: '{query}'") # Added print
60
+ print(f"Date range: {from_date} to {to_date}") # Added print
61
+ news_api_key, _ = load_api_keys()
62
+ if not news_api_key:
63
+ print("Error: NewsAPI key not available. Cannot fetch news.") # Made error clearer
64
+ return None
65
+
66
+ try:
67
+ newsapi = NewsApiClient(api_key=news_api_key)
68
+ # NewsAPI free tier only allows searching articles up to one month old
69
+ # Ensure from_date is not too far in the past if using free tier
70
+ one_month_ago = (datetime.now() - timedelta(days=29)).strftime('%Y-%m-%d') # Use 29 days to be safe
71
+ print(f"One month ago date limit (approx): {one_month_ago}") # Added print
72
+ if from_date < one_month_ago:
73
+ print(f"Warning: NewsAPI free tier limits searches to the past month. Adjusting from_date from {from_date} to {one_month_ago}")
74
+ from_date = one_month_ago
75
+
76
+ print(f"Calling NewsAPI with: q='{query}', from='{from_date}', to='{to_date}', page_size={page_size}") # Added print
77
+ all_articles = newsapi.get_everything(q=query,
78
+ from_param=from_date,
79
+ to=to_date,
80
+ language=language,
81
+ sort_by=sort_by,
82
+ page_size=page_size) # Max 100 for free tier
83
+
84
+ print(f"NewsAPI response status: {all_articles.get('status')}") # Added print
85
+ if all_articles['status'] == 'ok':
86
+ total_results = all_articles['totalResults']
87
+ print(f"Found {total_results} articles for '{query}'")
88
+ if total_results == 0:
89
+ print("Warning: NewsAPI returned 0 articles for this query and date range.") # Added warning
90
+ return all_articles['articles']
91
+ else:
92
+ error_code = all_articles.get('code')
93
+ error_message = all_articles.get('message')
94
+ print(f"Error fetching news from NewsAPI. Code: {error_code}, Message: {error_message}") # More detailed error
95
+ return None
96
+ except Exception as e:
97
+ print(f"Exception occurred while connecting to NewsAPI: {e}") # Clarified exception source
98
+ return None
99
+
100
+ # Placeholder for Alpha Vantage data fetching
101
+ def get_alpha_vantage_data(symbol):
102
+ """Placeholder function to fetch data using Alpha Vantage."""
103
+ _, alpha_vantage_key = load_api_keys()
104
+ if not alpha_vantage_key:
105
+ print("Alpha Vantage API key not found in .env file.")
106
+ return None
107
+ print(f"Fetching data for {symbol} using Alpha Vantage (implementation pending)...")
108
+ # Add Alpha Vantage API call logic here
109
+ return None
110
+
111
+ if __name__ == '__main__':
112
+ # Example usage (for testing the module directly)
113
+ ticker = 'AAPL'
114
+ end_date = datetime.now().strftime('%Y-%m-%d')
115
+ start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') # Look back 30 days
116
+
117
+ print(f"--- Testing Stock Data Fetching ({ticker}) ---")
118
+ stock_data = get_stock_data(ticker, start_date, end_date)
119
+ if stock_data is not None:
120
+ print(f"Successfully fetched {len(stock_data)} rows of stock data.")
121
+ print(stock_data.head())
122
+ else:
123
+ print("Failed to fetch stock data.")
124
+
125
+ print(f"\n--- Testing News Article Fetching ({ticker}) ---")
126
+ news_query = f"{ticker} stock"
127
+ articles = get_news_articles(news_query, start_date, end_date)
128
+ if articles is not None:
129
+ print(f"Successfully fetched {len(articles)} articles.")
130
+ if articles:
131
+ print("First article title:", articles[0]['title'])
132
+ else:
133
+ print("Failed to fetch news articles.")
134
+
135
+ # print("\n--- Testing Alpha Vantage (Placeholder) ---")
136
+ # get_alpha_vantage_data(ticker)
src/sentiment_analyzer.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ # Load the FinBERT model and tokenizer
7
+ # This might download the model files the first time it's run
8
+ tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
9
+ model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
10
+
11
+ def analyze_sentiment(text):
12
+ """
13
+ Analyzes the sentiment of a given text using the FinBERT model.
14
+
15
+ Args:
16
+ text (str): The input text (e.g., news headline or description).
17
+
18
+ Returns:
19
+ tuple: A tuple containing:
20
+ - sentiment_label (str): 'positive', 'negative', or 'neutral'.
21
+ - sentiment_score (float): The probability score of the predicted sentiment.
22
+ - scores (dict): Dictionary containing probabilities for all labels ('positive', 'negative', 'neutral').
23
+ Returns (None, None, None) if the input is invalid or an error occurs.
24
+ """
25
+ if not isinstance(text, str) or not text.strip():
26
+ return None, None, None # Return None for empty or invalid input
27
+
28
+ try:
29
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
30
+ with torch.no_grad(): # Disable gradient calculation for inference
31
+ outputs = model(**inputs)
32
+
33
+ # Get probabilities using softmax
34
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
35
+ scores = probabilities[0].numpy() # Get scores for the first (and only) input
36
+
37
+ # Get the predicted sentiment label index
38
+ predicted_class_id = np.argmax(scores)
39
+
40
+ # Map index to label based on model config
41
+ sentiment_label = model.config.id2label[predicted_class_id]
42
+ sentiment_score = scores[predicted_class_id]
43
+
44
+ all_scores = {model.config.id2label[i]: scores[i] for i in range(len(scores))}
45
+
46
+ return sentiment_label, float(sentiment_score), {k: float(v) for k, v in all_scores.items()}
47
+
48
+ except Exception as e:
49
+ print(f"Error during sentiment analysis for text: '{text[:50]}...': {e}")
50
+ return None, None, None
51
+
52
+ # Example usage (for testing the module directly)
53
+ if __name__ == '__main__':
54
+ test_texts = [
55
+ "Stocks rallied on positive economic news.",
56
+ "The company reported a significant drop in profits.",
57
+ "Market remains flat amid uncertainty.",
58
+ "", # Empty string test
59
+ None # None test
60
+ ]
61
+
62
+ print("--- Testing Sentiment Analysis ---")
63
+ for t in test_texts:
64
+ label, score, all_scores_dict = analyze_sentiment(t)
65
+ if label:
66
+ print(f"Text: '{t}'")
67
+ print(f" Sentiment: {label} (Score: {score:.4f})")
68
+ print(f" All Scores: {all_scores_dict}")
69
+ else:
70
+ print(f"Text: '{t}' -> Invalid input or error during analysis.")
src/utils.py ADDED
File without changes