Falcao Zane Vijay
deploy #1
8e0b458
raw
history blame
17.9 kB
import streamlit as st
import pandas as pd
import numpy as np
import yfinance as yf
import pickle
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import warnings
from curl_cffi import requests
session = requests.Session(impersonate="chrome")
warnings.filterwarnings('ignore')
# Page config
st.set_page_config(
page_title="Stock Price Prediction App",
page_icon="📈",
layout="wide"
)
# Title and description
st.title("📈 Stock Price Prediction App")
st.markdown("This app uses a trained Logistic Regression model to predict whether a stock will go **UP** ⬆️ or **DOWN** ⬇️ the next day.")
# Sidebar for user inputs
st.sidebar.header("🔧 Configuration")
# Stock symbols from your model
STOCK_SYMBOLS = [
'ADANIENT.NS', 'ADANIPORTS.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS',
'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS',
'BEL.NS', 'BHARTIARTL.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DRREDDY.NS',
'EICHERMOT.NS', 'GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS',
'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'ICICIBANK.NS',
'INDUSINDBK.NS', 'INFY.NS', 'ITC.NS', 'JIOFIN.NS', 'JSWSTEEL.NS',
'KOTAKBANK.NS', 'LT.NS', 'M&M.NS', 'MARUTI.NS', 'NESTLEIND.NS',
'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS',
'SHRIRAMFIN.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TATACONSUM.NS', 'TCS.NS',
'TATAMOTORS.NS', 'TATASTEEL.NS', 'TECHM.NS', 'TITAN.NS', 'TRENT.NS',
'ULTRACEMCO.NS', 'WIPRO.NS', 'ETERNAL.NS'
]
# User inputs
selected_stock = st.sidebar.selectbox("Select Stock Symbol", STOCK_SYMBOLS, index=35) # Default to RELIANCE.NS
start_date = st.sidebar.date_input("Start Date", value=datetime(2020, 1, 1))
end_date = st.sidebar.date_input("End Date", value=datetime.now())
prediction_mode = st.sidebar.button("Start Analysis")
rsi_period = st.sidebar.slider("RSI Period", min_value=5, max_value=30, value=14, step=1)
short_period = st.sidebar.slider("Short-term", min_value=5, max_value=50, value=20, step=1)
long_period = st.sidebar.slider("Long-term", min_value=50, max_value=200, value=50, step=1)
# Helper functions (same as in your original code)
def SMA(series, period):
return series.rolling(window=period).mean()
def EMA(series, period):
return series.ewm(span=period, adjust=False).mean()
def MACD(series, fast=12, slow=26, signal=9):
ema_fast = EMA(series, fast)
ema_slow = EMA(series, slow)
macd = ema_fast - ema_slow
macd_signal = EMA(macd, signal)
macd_hist = macd - macd_signal
return macd, macd_signal, macd_hist
def RSI(series, period=14):
delta = series.diff()
gain = (delta.where(delta > 0, 0)).ewm(alpha=1/period, min_periods=period).mean()
loss = (-delta.where(delta < 0, 0)).ewm(alpha=1/period, min_periods=period).mean()
RS = gain / loss
return 100 - (100 / (1 + RS))
def create_volatility_features(df):
if 'return_1d' not in df.columns:
df['return_1d'] = df['Close'].pct_change()
for period in [5, 10, 20, 30]:
df[f'volatility_{period}d'] = df['return_1d'].rolling(period).std()
df['vol_ratio_5_20'] = df['volatility_5d'] / df['volatility_20d']
df['vol_ratio_10_20'] = df['volatility_10d'] / df['volatility_20d']
df['vol_rank_20'] = df['volatility_5d'].rolling(20).rank(pct=True)
df['vol_rank_50'] = df['volatility_5d'].rolling(50).rank(pct=True)
return df
def create_enhanced_lag_features(df):
for lag in [1, 2, 3, 5, 10]:
df[f'return_lag_{lag}'] = df['return_1d'].shift(lag)
for lag in [1, 2, 3]:
if 'RSI14' in df.columns:
df[f'rsi_lag_{lag}'] = df['RSI14'].shift(lag)
if 'MACD' in df.columns:
df[f'macd_lag_{lag}'] = df['MACD'].shift(lag)
if 'volume_ratio_20' in df.columns:
for lag in [1, 2]:
df[f'volume_ratio_lag_{lag}'] = df['volume_ratio_20'].shift(lag)
return df
def create_volume_features(df):
df['volume_sma_10'] = df['Volume'].rolling(10).mean()
df['volume_sma_20'] = df['Volume'].rolling(20).mean()
df['volume_sma_50'] = df['Volume'].rolling(50).mean()
df['volume_ratio_10'] = df['Volume'] / df['volume_sma_10']
df['volume_ratio_20'] = df['Volume'] / df['volume_sma_20']
df['volume_ratio_50'] = df['Volume'] / df['volume_sma_50']
df['price_volume'] = df['Close'] * df['Volume']
df['pv_sma_5'] = df['price_volume'].rolling(5).mean()
df['volume_momentum_5'] = df['Volume'] / df['Volume'].shift(5)
return df
def create_momentum_features(df):
for period in [3, 5, 10, 20]:
df[f'momentum_{period}d'] = df['Close'] / df['Close'].shift(period) - 1
for period in [5, 10]:
df[f'roc_{period}d'] = (df['Close'] - df['Close'].shift(period)) / df['Close'].shift(period)
return df
def create_position_features(df):
for period in [10, 20, 50]:
df[f'high_{period}d'] = df['High'].rolling(period).max()
df[f'low_{period}d'] = df['Low'].rolling(period).min()
df[f'price_position_{period}'] = (df['Close'] - df[f'low_{period}d']) / (df[f'high_{period}d'] - df[f'low_{period}d'])
if 'SMA20' in df.columns:
bb_std = df['Close'].rolling(20).std()
df['bb_upper'] = df['SMA20'] + (bb_std * 2)
df['bb_lower'] = df['SMA20'] - (bb_std * 2)
df['bb_position'] = (df['Close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])
return df
def process_stock_data(df):
"""Process stock data to create all features"""
df = df.copy()
# Basic technical indicators
df['SMA20'] = SMA(df['Close'], short_period)
df['SMA50'] = SMA(df['Close'], long_period)
df['EMA20'] = EMA(df['Close'], short_period)
df['EMA50'] = EMA(df['Close'], long_period)
df['RSI14'] = RSI(df['Close'], rsi_period)
df['RSI20'] = RSI(df['Close'], rsi_period + 6) # Example for another RSI period
df['MACD'], df['MACD_signal'], df['MACD_hist'] = MACD(df['Close'])
# Create feature sets
df = create_volatility_features(df)
df = create_enhanced_lag_features(df)
df = create_volume_features(df)
df = create_momentum_features(df)
df = create_position_features(df)
# Additional features
df['SMA_crossover'] = (df['SMA20'] > df['SMA50']).astype(int)
df['RSI_oversold'] = (df['RSI14'] < 30).astype(int)
# Target: next-day up/down
df['next_close'] = df['Close'].shift(-1)
df['target'] = (df['next_close'] > df['Close']).astype(int)
return df
@st.cache_data
def load_stock_data(symbol, start_date, end_date):
"""Load stock data from Yahoo Finance"""
try:
data = yf.download(symbol, start=start_date, end=end_date,session=session)
# Flatten the MultiIndex columns
data.columns = [col[0] for col in data.columns]
return data
except Exception as e:
st.error(f"Error loading data: {e}")
return None
# Feature list (same as in your model)
FEATURES = [
'Close', 'Volume', 'SMA20', 'SMA50', 'EMA20', 'EMA50',
'RSI14', 'MACD', 'MACD_signal', 'MACD_hist',
'SMA_crossover', 'RSI_oversold',
'return_1d', 'volatility_5d', 'volatility_10d', 'volatility_20d',
'volatility_30d', 'vol_ratio_5_20', 'vol_ratio_10_20', 'vol_rank_20',
'vol_rank_50', 'return_lag_1', 'return_lag_2', 'return_lag_3',
'return_lag_5', 'return_lag_10', 'rsi_lag_1', 'macd_lag_1', 'rsi_lag_2',
'macd_lag_2', 'rsi_lag_3', 'macd_lag_3', 'volume_sma_10',
'volume_sma_20', 'volume_sma_50', 'volume_ratio_10', 'volume_ratio_20',
'volume_ratio_50', 'price_volume', 'pv_sma_5', 'volume_momentum_5',
'momentum_3d', 'momentum_5d', 'momentum_10d', 'momentum_20d', 'roc_5d',
'roc_10d', 'high_10d', 'low_10d', 'price_position_10', 'high_20d',
'low_20d', 'price_position_20', 'high_50d', 'low_50d',
'price_position_50', 'bb_upper', 'bb_lower', 'bb_position','target'
]
# Main app logic
st.header(f"📊 Latest Data Prediction for {selected_stock}")
with st.spinner("Loading stock data..."):
stock_data = load_stock_data(selected_stock, start_date, end_date)
if stock_data is not None and not stock_data.empty:
# Process the data
processed_data = process_stock_data(stock_data)
processed_data = processed_data.dropna()
if len(processed_data) > 0:
# Get the latest row for prediction
latest_data = processed_data.iloc[-1]
# Display current stock info
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Current Price", f"₹{latest_data['Close']:.2f}")
with col2:
daily_change = ((latest_data['Close'] - processed_data.iloc[-2]['Close']) / processed_data.iloc[-2]['Close']) * 100
st.metric("Daily Change", f"{daily_change:.2f}%")
with col3:
st.metric("Volume", f"{latest_data['Volume']:,.0f}")
with col4:
st.metric("RSI14", f"{latest_data['RSI14']:.2f}")
# Create feature vector
feature_vector = latest_data[FEATURES].values.reshape(1, -1)
# For demo purposes, create a mock prediction (since we don't have the actual model file)
# In real implementation, you would load your saved model:
model = pickle.load(open('logistic_regression_model.pkl', 'rb'))
scaler = pickle.load(open('scaler.pkl', 'rb')) # You'd need to save this too
# Scale the features
feature_vector_scaled = scaler.transform(feature_vector)
# Make prediction
prediction = model.predict(feature_vector_scaled)[0]
probability = model.predict_proba(feature_vector_scaled)[0].max()
# Display prediction
st.header("🔮 Prediction")
col1, col2 = st.columns(2)
with col1:
if prediction == 1:
st.success("📈 **PREDICTION: UP**")
st.write(f"The model predicts the stock will go **UP** tomorrow with {probability:.1%} confidence.")
else:
st.error("📉 **PREDICTION: DOWN**")
st.write(f"The model predicts the stock will go **DOWN** tomorrow with {probability:.1%} confidence.")
with col2:
# Confidence gauge
fig_gauge = go.Figure(go.Indicator(
mode = "gauge+number",
value = probability * 100,
domain = {'x': [0, 1], 'y': [0, 1]},
title = {'text': "Confidence %"},
gauge = {
'axis': {'range': [None, 100]},
'bar': {'color': "darkgreen" if prediction == 1 else "darkred"},
'steps': [
{'range': [0, 50], 'color': "lightgray"},
{'range': [50, 80], 'color': "yellow"},
{'range': [80, 100], 'color': "lightgreen"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 90
}
}
))
fig_gauge.update_layout(height=300)
st.plotly_chart(fig_gauge, use_container_width=True)
# Technical indicators chart
st.header("📈 Technical Analysis")
# Price and Simple moving averages
fig_price = go.Figure()
fig_price.add_trace(go.Scatter(
x=processed_data.index[-60:],
y=processed_data['Close'][-60:],
mode='lines',
name='Close Price',
line=dict(color='blue', width=2)
))
fig_price.add_trace(go.Scatter(
x=processed_data.index[-60:],
y=processed_data['SMA20'][-60:],
mode='lines',
name='SMA20',
line=dict(color='orange', width=1)
))
fig_price.add_trace(go.Scatter(
x=processed_data.index[-60:],
y=processed_data['SMA50'][-60:],
mode='lines',
name='SMA50',
line=dict(color='red', width=1)
))
fig_price.update_layout(
title=f"{selected_stock} - Price and Simple Moving Averages (Last 60 Days)",
xaxis_title="Date",
yaxis_title="Price (₹)",
height=400
)
st.plotly_chart(fig_price, use_container_width=True)
# Price and Exponential moving averages
fig_price = go.Figure()
fig_price.add_trace(go.Scatter(
x=processed_data.index[-30:],
y=processed_data['Close'][-30:],
mode='lines',
name='Close Price',
line=dict(color='blue', width=2)
))
fig_price.add_trace(go.Scatter(
x=processed_data.index[-30:],
y=processed_data['EMA20'][-30:],
mode='lines',
name='EMA20',
line=dict(color='orange', width=1)
))
fig_price.add_trace(go.Scatter(
x=processed_data.index[-30:],
y=processed_data['EMA50'][-30:],
mode='lines',
name='EMA50',
line=dict(color='red', width=1)
))
fig_price.update_layout(
title=f"{selected_stock} - Price and Exponential Moving Averages (Last 60 Days)",
xaxis_title="Date",
yaxis_title="Price (₹)",
height=400
)
st.plotly_chart(fig_price, use_container_width=True)
# RSI chart
col1, col2 = st.columns(2)
with col1:
fig_rsi = go.Figure()
fig_rsi.add_trace(go.Scatter(
x=processed_data.index[-30:],
y=processed_data['RSI14'][-30:],
mode='lines',
name='RSI14',
line=dict(color='purple')
))
fig_rsi.add_hline(y=70, line_dash="dash", line_color="red", annotation_text="Overbought")
fig_rsi.add_hline(y=30, line_dash="dash", line_color="green", annotation_text="Oversold")
fig_rsi.update_layout(
title="RSI (14-day)",
xaxis_title="Date",
yaxis_title="RSI",
height=300
)
st.plotly_chart(fig_rsi, use_container_width=True)
with col2:
# MACD chart
fig_macd = go.Figure()
fig_macd.add_trace(go.Scatter(
x=processed_data.index[-30:],
y=processed_data['MACD'][-30:],
mode='lines',
name='MACD',
line=dict(color='blue')
))
fig_macd.add_trace(go.Scatter(
x=processed_data.index[-30:],
y=processed_data['MACD_signal'][-30:],
mode='lines',
name='Signal',
line=dict(color='red')
))
fig_macd.update_layout(
title="MACD",
xaxis_title="Date",
yaxis_title="MACD",
height=300
)
st.plotly_chart(fig_macd, use_container_width=True)
# Feature importance (mock data for demo)
st.header("🎯 Key Factors")
st.write("Most important features affecting the prediction:")
mock_features = ['RSI14', 'return_lag_1', 'volatility_5d', 'MACD', 'volume_ratio_20']
mock_importance = [0.15, 0.12, 0.10, 0.08, 0.07]
fig_importance = px.bar(
x=mock_importance,
y=mock_features,
orientation='h',
title="Feature Importance"
)
fig_importance.update_layout(height=300)
st.plotly_chart(fig_importance, use_container_width=True)
else:
st.error("Not enough data to make a prediction. Please try a different stock or date range.")
else:
st.error("Unable to load stock data. Please check the symbol and try again.")
# Sidebar information
st.sidebar.markdown("---")
st.sidebar.header("ℹ️ About")
st.sidebar.write("""
This app uses a Logistic Regression model trained on:
- **50 Indian stocks** from NSE
- **59 technical features** including RSI, MACD, moving averages, volatility measures, and lag features
- **Historical data** for pattern recognition
**Disclaimer**: This is for educational purposes only. Always do your own research before making investment decisions.
""")
st.sidebar.markdown("---")
st.sidebar.write("**Model Performance:**")
st.sidebar.write("• Accuracy: 55%")
st.sidebar.write("• F1 Score: 0.4839")
st.sidebar.write("• AUC: 0.5370")
st.sidebar.write("Average Precision (AP): 0.5300")
# Footer
st.markdown("---")
st.markdown("**⚠️ Disclaimer**: This prediction model is for research purposes only. Stock market investments are subject to market risks. Please consult with a financial advisor before making investment decisions.")