import streamlit as st
import pandas as pd
import requests
from datetime import datetime, timedelta
import time
import csv
from dotenv import load_dotenv
import os
import torch
from torch.utils.data import DataLoader
from transformers import BertTokenizer, BertForSequenceClassification
import matplotlib.pyplot as plt
import altair as alt
 
# Load environment variables from .env file
load_dotenv()
AppID = os.getenv('APP_ID')
APIKey = os.getenv('API_KEY')
PolygonAPIKey = os.getenv('POLYGON_API_KEY')
 
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

st.set_page_config(
    page_title="Stock News and Data Analysis",
    page_icon="📈",
    initial_sidebar_state="expanded",
)
 
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, features, tokenizer, max_length=512):
        self.features = features
        self.tokenizer = tokenizer
        self.max_length = max_length
 
    def __len__(self):
        return len(self.features)
 
    def __getitem__(self, idx):
        feature = self.features.iloc[idx]
        text = f"{feature['Keywords']}"
        inputs = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return inputs
 
def get_auth_header(app_id, api_key):
    return {
        'X-Application-Id': app_id,
        'X-Application-Key': api_key
    }
 
def fetch_stories_for_date_range(ticker, headers, start_date, end_date):
    all_stories = []
    params = {
        'entities.stock_tickers': ticker,
        'published_at.start': start_date.strftime('%Y-%m-%dT%H:%M:%SZ'),
        'published_at.end': end_date.strftime('%Y-%m-%dT%H:%M:%SZ'),
        'language': 'en',
        'per_page': 100,
        'sort_by': 'published_at',
        'sort_direction': 'desc'
    }
 
    while True:
        time.sleep(1)
        response = requests.get('https://api.aylien.com/news/stories', headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            stories = data.get('stories', [])
            if not stories:
                break
            all_stories.extend(stories)
            if 'next' in data.get('links', {}):
                params['cursor'] = data['links']['next']
            else:
                break
        else:
            break
 
    return all_stories
 
def get_stock_data(api_key, symbol, start_date, end_date):
    time.sleep(1)
    base_url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}?apiKey={api_key}"
    response = requests.get(base_url)
    if response.status_code == 200:
        data = response.json()
        return data.get('results', [])
    else:
        return []
 
def predict_stock_price(model, data_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for batch in data_loader:
            inputs = {key: val.to(model.device) for key, val in batch.items()}
            outputs = model(**inputs)
            logits = outputs.logits
            predictions.extend(logits.cpu().numpy())
    return predictions
 
# Main app interface
st.title("Stock News and Data Analysis")
tickers = ['AAPL', 'AMZN', 'TSLA', 'MSFT', 'AMD', 'BA', 'GOOGL', 'NVDA']
 
tab1, tab2 = st.tabs(["News and Stock Data", "Predictive Stock Price"])
 
with tab1:
    selected_ticker = st.selectbox('Select a stock symbol:', tickers)
    start_date = st.date_input("Start date", datetime.now() - timedelta(days=30))
    end_date = st.date_input("End date", datetime.now())
 
    # Fetch Stock Data Button and functionality
    if st.button('Fetch Stock Data'):
        stock_data = get_stock_data(PolygonAPIKey, selected_ticker, start_date, end_date)
        if stock_data:
            stock_df = pd.DataFrame(stock_data)
            stock_df['date'] = pd.to_datetime(stock_df['t'], unit='ms').dt.date
            stock_df.rename(columns={'v': 'Volume', 'o': 'Open', 'c': 'Close', 'h': 'High', 'l': 'Low'}, inplace=True)
            st.subheader(f"Stock Data for {selected_ticker}")
            st.dataframe(stock_df.style.format(subset=['Open', 'Close', 'High', 'Low'], formatter="{:.2f}"))
           
            # Enhance the chart
            # Show chart title
            st.write(f"Stock Price Chart for {selected_ticker}")
            line_chart = alt.Chart(stock_df).mark_line().encode(
                x=alt.X('date:T', title='Date'),
                y=alt.Y('Close:Q', title='Close Price'),
                tooltip=['date', 'Open', 'High', 'Low', 'Close', 'Volume']
            ).interactive().properties(
                width=800,
                height=400
            )
            st.altair_chart(line_chart, use_container_width=True)
        else:
            st.error('Failed to fetch stock data. Please check the ticker or try again later.')
   
    # Initialize session state variables
    if 'story_index' not in st.session_state:
        st.session_state.story_index = 0  # Index to keep track of displayed stories
    if 'fetched_stories' not in st.session_state:
        st.session_state.fetched_stories = []
 
    with st.expander("News Stories", expanded=True):
        headers = get_auth_header(AppID, APIKey)
        # Fetch stories only if we haven't already, or if the "Fetch News Stories" button is pressed
        if st.button('Fetch News Stories') or not st.session_state.fetched_stories:
            st.session_state.fetched_stories = fetch_stories_for_date_range(selected_ticker, headers, start_date, end_date)
            st.session_state.story_index = 0  # Reset story index
    
        if st.session_state.fetched_stories:
            displayed_stories = st.session_state.fetched_stories[st.session_state.story_index:st.session_state.story_index + 5]
            for story in displayed_stories:
                st.markdown(f"**Title:** {story.get('title')}")
                st.markdown(f"**Summary:** {story.get('body')}")
                sentiment = story.get('sentiment', {}).get('polarity', 'neutral')
                sentiment_icon = "🔴" if sentiment == "negative" else "🟢" if sentiment == "positive" else "🟡"
                st.markdown(f"**Sentiment:** {sentiment_icon} {sentiment.capitalize()}")
                st.markdown(f"**Source:** {story.get('source', {}).get('name')}")
                st.markdown(f"**Published At:** {story.get('published_at')}")
                st.markdown("---")
    
            # Load More Stories Button
            if st.button('Load More Stories'):
                # Check if there are more stories to load
                if st.session_state.story_index + 5 < len(st.session_state.fetched_stories):
                    st.session_state.story_index += 5
                    st.rerun()
                else:
                    st.warning("No more stories to load.")
    
        else:
            st.error('No stories fetched. Please check the ticker or try a different date range.')
    
with tab2:
    stock_mapping = {
    "AAPL": {"csv_path": "CurrentDatabase/AAPL_db.csv", "model_path": "TrainedModels/saved_model_AAPL/"},
    "AMD": {"csv_path": "CurrentDatabase/AMD_db.csv", "model_path": "TrainedModels/saved_model_AMD/"},
    "GOOGL": {"csv_path": "CurrentDatabase/GOOGL_db.csv", "model_path": "TrainedModels/saved_model_GOOGL/"},
    "MSFT": {"csv_path": "CurrentDatabase/MSFT_db.csv", "model_path": "TrainedModels/saved_model_MSFT/"},
    "NVDA": {"csv_path": "CurrentDatabase/NVDA_db.csv", "model_path": "TrainedModels/saved_model_NVDA/"},
    "TSLA": {"csv_path": "CurrentDatabase/TSLA_db.csv", "model_path": "TrainedModels/saved_model_TSLA/"},
    "AMZN": {"csv_path": "CurrentDatabase/AMZN_db.csv", "model_path": "TrainedModels/saved_model_AMZN/"},
    "BA": {"csv_path": "CurrentDatabase/BA_db.csv", "model_path": "TrainedModels/saved_model_BA/"}
    
}

    # Select stock symbol from dropdown
    selected_stock = st.selectbox("Select a stock symbol:", list(stock_mapping.keys()))

    # Load the new data
    new_data = pd.read_csv(stock_mapping[selected_stock]["csv_path"])

    # Convert 'Sentiment Polarity' to numerical representation
    new_data['Sentiment Polarity'] = new_data['Sentiment Polarity'].map({'neutral': 0, 'positive': 1, 'negative': -1})

    # Convert 'Publication Date' and 'stock_date' to datetime objects
    new_data['Publication Date'] = pd.to_datetime(new_data['Publication Date'])
    new_data['stock_date'] = pd.to_datetime(new_data['stock_date'])

    # Use only required columns
    new_data = new_data[['Publication Date', 'Sentiment Polarity', 'Sentiment Confidence', 'Keywords', 'stock_date', 'percentage_change']]

    # Initialize the tokenizer and model
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertForSequenceClassification.from_pretrained(stock_mapping[selected_stock]["model_path"])

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Define custom dataset class
    class CustomDataset(torch.utils.data.Dataset):
        def __init__(self, features, tokenizer, max_length=512):
            self.features = features
            self.tokenizer = tokenizer
            self.max_length = max_length

        def __len__(self):
            return len(self.features)

        def __getitem__(self, idx):
            feature = self.features.iloc[idx]
            text = f"Publication Date: {feature['Publication Date']}, Sentiment Polarity: {feature['Sentiment Polarity']}, Sentiment Confidence: {feature['Sentiment Confidence']}, Keywords: {feature['Keywords']}, Stock Date: {feature['stock_date']}"
            inputs = self.tokenizer.encode_plus(
                text,
                add_special_tokens=True,
                max_length=self.max_length,
                padding='max_length',
                return_token_type_ids=False,
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt'
            )
            return inputs

    # Create DataLoader for the new data
    new_dataset = CustomDataset(new_data, tokenizer)
    new_dataloader = DataLoader(new_dataset, batch_size=32)

    # Predictions
    predictions = []

    model.eval()
    with torch.no_grad():
        for batch_inputs in new_dataloader:
            batch_inputs = {key: val.squeeze(1).to(device) for key, val in batch_inputs.items()}
            outputs = model(**batch_inputs)
            logits = outputs.logits
            predictions.extend(logits.flatten().cpu().detach().numpy())

    # Convert predictions to percentage change
    predicted_percentage_change = predictions  # Modify this line as needed based on how your model is trained to predict percentage change

    # Get actual percentage change from the CSV file
    actual_percentage_change = new_data['percentage_change'].values

    # Predictions for tomorrow
    tomorrow_date = datetime.now() + timedelta(days=1)
    tomorrow_prediction = []

    with torch.no_grad():
        text = f"Publication Date: {tomorrow_date}, Sentiment Polarity: 0, Sentiment Confidence: 0, Keywords: None, Stock Date: None"
        inputs = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            return_token_type_ids=False,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        inputs = {key: val.to(device) for key, val in inputs.items()}
        outputs = model(**inputs)
        logits = outputs.logits
        tomorrow_prediction = logits.item()

    import subprocess

    # Function to run cdb2.py script
    def run_cdb2_script():
        try:
            # Run the cdb2.py script using subprocess
            subprocess.run(["python", "CurrentDB.py"])
            st.write("Please wait a moment, updating current")
        except Exception as e:
            st.error(f"An error occurred while running the cdb2.py script: {e}")

    # Add a button to run the cdb2.py script
    if st.button("Fetch Latest Data"):
        run_cdb2_script()

    # Plotting
    fig, ax = plt.subplots(figsize=(12, 8))

    # Plot actual vs predicted percentage change
    ax.plot(new_data['stock_date'], actual_percentage_change, label='Actual Percentage Change', marker='o', linestyle='-')

    # Plot predicted percentage change if available
    if predicted_percentage_change:
        ax.plot(new_data['stock_date'], predicted_percentage_change, label='Predicted Percentage Change', marker='x', linestyle='--')

    # Plot tomorrow's prediction
    ax.plot(tomorrow_date, tomorrow_prediction, label='Tomorrow Prediction', marker='*', linestyle='--')

    # Draw a dotted green line from the last predicted percentage change to tomorrow's prediction if predictions are available
    if predicted_percentage_change:
        last_predicted_date = new_data['stock_date'].iloc[-1]
        last_predicted_change = predicted_percentage_change[-1]
        ax.plot([last_predicted_date, tomorrow_date], [last_predicted_change, tomorrow_prediction], 'g--')

    # Formatting
    ax.set_xlabel('Date')
    ax.set_ylabel('Percentage Change')
    ax.set_title('Comparison of Actual vs Predicted Percentage Change')
    ax.legend()
    ax.grid(True)
    plt.xticks(rotation=45)

    # Streamlit app
    st.title('Comparison of Actual vs Predicted Percentage Change')
    st.pyplot(fig)