Spaces:

Knight-coderr
/

StockAnalysis

Sleeping

App Files Files Community

StockAnalysis / app.py

Knight-coderr

Update app.py

0251baa verified 4 months ago

raw

history blame

7.79 kB

	import streamlit as st
	import pandas as pd
	import yfinance as yf
	from textblob import TextBlob
	import joblib
	import matplotlib.pyplot as plt
	import datetime

	# Function to load stock data using yfinance
	@st.cache_data(ttl=86400)
	def load_yfinance_data():
	# List of stock tickers
	tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL', 'NFLX', 'TSM',
	'KO', 'F', 'COST', 'DIS', 'VZ', 'CRM', 'INTC', 'BA', 'BX', 'NOC', 'PYPL', 'ENPH', 'NIO', 'ZS', 'XPEV']

	# Set the start and end dates for the past 1 year
	start_date = (datetime.datetime.now() - datetime.timedelta(days=365)).strftime('%Y-%m-%d')
	end_date = datetime.datetime.today().strftime('%Y-%m-%d')

	# Download the stock data using yfinance
	data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker')

	# Process and format the data for each ticker
	all_data = []
	for ticker in tickers:
	df = data[ticker].copy()
	df.reset_index(inplace=True)
	df['Stock Name'] = ticker
	all_data.append(df)

	# Concatenate all the data into a single DataFrame
	all_stock_data = pd.concat(all_data, ignore_index=True)

	return all_stock_data

	# Load the data
	data = load_yfinance_data()

	# Perform sentiment analysis on tweets (assuming you still have your tweets data)
	tweets_data = pd.read_csv('data/stock_tweets.csv')

	# Convert the Date columns to datetime
	tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date

	# Perform sentiment analysis on tweets
	def get_sentiment(tweet):
	analysis = TextBlob(tweet)
	return analysis.sentiment.polarity

	tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)

	# Aggregate sentiment by date and stock
	daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()

	# Convert the Date column in daily_sentiment to datetime64[ns]
	daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])

	# Merge stock data with sentiment data
	merged_data = pd.merge(data, daily_sentiment, how='left', on=['Date', 'Stock Name'])

	# Fill missing sentiment values with 0 (neutral sentiment)
	merged_data['Sentiment'] = merged_data['Sentiment'].fillna(0)

	# Sort the data by date
	merged_data = merged_data.sort_values(by='Date')

	# Create lagged features
	merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
	merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)

	# Create moving averages
	merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
	merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())

	# Create daily price changes
	merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']

	# Create volatility
	merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())

	# Drop rows with missing values
	merged_data.dropna(inplace=True)

	# Load the best model
	model_filename = 'model/best_model.pkl'
	model = joblib.load(model_filename)

	# Streamlit application layout
	st.title("Stock Price Prediction Using Sentiment Analysis")

	# User input for stock data
	st.header("Input Stock Data")
	stock_names = merged_data['Stock Name'].unique()
	selected_stock = st.selectbox("Select Stock Name", stock_names)
	days_to_predict = st.number_input("Number of Days to Predict", min_value=1, max_value=30, value=10)

	# Get the latest data for the selected stock
	latest_data = merged_data[merged_data['Stock Name'] == selected_stock].iloc[-1]
	prev_close = latest_data['Close']
	prev_sentiment = latest_data['Sentiment']
	ma7 = latest_data['MA7']
	ma14 = latest_data['MA14']
	daily_change = latest_data['Daily_Change']
	volatility = latest_data['Volatility']

	# Display the latest stock data in a table
	latest_data_df = pd.DataFrame({
	'Metric': ['Previous Close Price', 'Previous Sentiment', '7-day Moving Average', '14-day Moving Average', 'Daily Change', 'Volatility'],
	'Value': [prev_close, prev_sentiment, ma7, ma14, daily_change, volatility]
	})

	st.write("Latest Stock Data:")
	st.write(latest_data_df)

	st.write("Use the inputs above to predict the next days close prices of the stock.")
	if st.button("Predict"):
	predictions = []
	latest_date = datetime.datetime.now()

	for i in range(days_to_predict):
	X_future = pd.DataFrame({
	'Prev_Close': [prev_close],
	'Prev_Sentiment': [prev_sentiment],
	'MA7': [ma7],
	'MA14': [ma14],
	'Daily_Change': [daily_change],
	'Volatility': [volatility]
	})

	next_day_prediction = model.predict(X_future)[0]
	predictions.append(next_day_prediction)

	# Update features for next prediction
	prev_close = next_day_prediction
	ma7 = (ma7 * 6 + next_day_prediction) / 7 # Simplified rolling calculation
	ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
	daily_change = next_day_prediction - prev_close

	# Prepare prediction data for display
	prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
	prediction_df = pd.DataFrame({
	'Date': prediction_dates,
	'Predicted Close Price': predictions
	})

	st.subheader("Predicted Prices")
	# st.write(prediction_df)
	st.dataframe(prediction_df)
	# Plot predictions using Plotly
	import plotly.express as px
	fig = px.line(prediction_df, x='Date', y='Predicted Close Price',
	markers=True, title=f"{selected_stock} Predicted Close Prices")
	st.plotly_chart(fig, use_container_width=True)

	# ----------------------------------------
	# Enhanced Visualizations
	st.header(" Enhanced Stock Analysis")
	stock_history = data[data['Stock Name'] == selected_stock]

	# Date filter slider
	min_date = stock_history['Date'].min()
	max_date = stock_history['Date'].max()
	date_range = st.slider("Select Date Range for Visualizations",
	min_value=min_date, max_value=max_date,
	value=(min_date, max_date))
	filtered_data = stock_history[(stock_history['Date'] >= date_range[0]) & (stock_history['Date'] <= date_range[1])]

	with st.expander(" Price vs Sentiment Trend"):
	fig1 = px.line(filtered_data, x='Date', y=['Close', 'Sentiment'],
	labels={'value': 'Price / Sentiment', 'variable': 'Metric'},
	title=f"{selected_stock} - Close Price & Sentiment")
	st.plotly_chart(fig1, use_container_width=True)

	with st.expander(" Volatility Over Time"):
	fig2 = px.line(filtered_data, x='Date', y='Volatility',
	title=f"{selected_stock} - 7-Day Rolling Volatility")
	st.plotly_chart(fig2, use_container_width=True)

	with st.expander(" Moving Averages (MA7 vs MA14)"):
	fig3 = px.line(filtered_data, x='Date',
	y=['MA7', 'MA14'],
	labels={'value': 'Price', 'variable': 'Moving Average'},
	title=f"{selected_stock} - Moving Averages")
	st.plotly_chart(fig3, use_container_width=True)

	with st.expander(" Daily Price Change"):
	fig4 = px.line(filtered_data, x='Date', y='Daily_Change',
	title=f"{selected_stock} - Daily Price Change")
	st.plotly_chart(fig4, use_container_width=True)

	with st.expander(" Sentiment Distribution"):
	fig5 = px.histogram(filtered_data, x='Sentiment', nbins=30,
	title=f"{selected_stock} - Sentiment Score Distribution")
	st.plotly_chart(fig5, use_container_width=True)