File size: 9,743 Bytes

318e02f

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Z6OeRBuqH7cJ"
      },
      "outputs": [],
      "source": [
        "#@title Step 1: Installing and Importing Necessary Libraries\n",
        "# We are installing the necessary libraries in the Google Colab environment.\n",
        "# yfinance: To fetch financial data from Yahoo Finance.\n",
        "# tensorflow: To build and train the neural network.\n",
        "# scikit-learn: For data preprocessing (normalization).\n",
        "!pip install yfinance tensorflow scikit-learn pandas matplotlib -q\n",
        "\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "import yfinance as yf\n",
        "from sklearn.preprocessing import MinMaxScaler\n",
        "from tensorflow.keras.models import Sequential\n",
        "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
        "import datetime\n",
        "\n",
        "print(\"Libraries have been successfully installed and imported!\")\n",
        "\n",
        "\n",
        "#@title Step 2: Fetching and Visualizing Bitcoin Data\n",
        "# Let's fetch the BTC-USD (Bitcoin/US Dollar) data for the last few years.\n",
        "start_date = '2019-01-01'\n",
        "# We set the end date to today's date.\n",
        "end_date = datetime.date.today().strftime(\"%Y-%m-%d\")\n",
        "\n",
        "try:\n",
        "    btc_data = yf.download('BTC-USD', start=start_date, end=end_date)\n",
        "    print(f\"Bitcoin data between {start_date} and {end_date} has been fetched.\")\n",
        "    print(\"First 5 rows of the dataset:\")\n",
        "    print(btc_data.head())\n",
        "\n",
        "    # Let's plot the 'Close' prices of the dataset in a graph.\n",
        "    plt.figure(figsize=(14, 7))\n",
        "    plt.style.use('seaborn-v0_8-darkgrid')\n",
        "    plt.plot(btc_data['Close'], color='orange')\n",
        "    plt.title('Bitcoin Closing Prices (BTC-USD)', fontsize=16)\n",
        "    plt.xlabel('Date', fontsize=12)\n",
        "    plt.ylabel('Price (USD)', fontsize=12)\n",
        "    plt.legend(['Closing Price'])\n",
        "    plt.show()\n",
        "\n",
        "except Exception as e:\n",
        "    print(f\"An error occurred while fetching data: {e}\")\n",
        "\n",
        "\n",
        "#@title Step 3: Data Preprocessing\n",
        "# We are preparing the data to train our model.\n",
        "\n",
        "# We will only use the 'Close' column.\n",
        "close_data = btc_data['Close'].values.reshape(-1, 1)\n",
        "\n",
        "# We are scaling the data between 0 and 1 (Normalization).\n",
        "# Neural networks work more efficiently with data in this range.\n",
        "scaler = MinMaxScaler(feature_range=(0, 1))\n",
        "scaled_data = scaler.fit_transform(close_data)\n",
        "\n",
        "# We are splitting the dataset: 80% for training, 20% for testing.\n",
        "training_data_len = int(np.ceil(len(scaled_data) * 0.8))\n",
        "\n",
        "# Let's create the training data.\n",
        "train_data = scaled_data[0:int(training_data_len), :]\n",
        "\n",
        "# Let's prepare the x_train and y_train sets for training.\n",
        "# The model will predict the next day's price by looking at the past 60 days' prices.\n",
        "prediction_days = 60\n",
        "x_train = []\n",
        "y_train = []\n",
        "\n",
        "for i in range(prediction_days, len(train_data)):\n",
        "    x_train.append(train_data[i-prediction_days:i, 0])\n",
        "    y_train.append(train_data[i, 0])\n",
        "\n",
        "# Converting the lists to numpy arrays.\n",
        "x_train, y_train = np.array(x_train), np.array(y_train)\n",
        "\n",
        "# Reshaping the data into a 3D format suitable for the LSTM model: [number of samples, time steps, number of features]\n",
        "x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))\n",
        "print(f\"Training data prepared. x_train shape: {x_train.shape}\")\n",
        "\n",
        "\n",
        "#@title Step 4: Building the LSTM Model\n",
        "# We are designing our neural network model using Keras.\n",
        "\n",
        "model = Sequential()\n",
        "\n",
        "# Layer 1: LSTM layer with 50 neurons. `return_sequences=True` because we will send data to the next LSTM layer.\n",
        "model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))\n",
        "model.add(Dropout(0.2)) # We are deactivating 20% of the neurons to prevent overfitting.\n",
        "\n",
        "# Layer 2: LSTM layer with 50 neurons.\n",
        "model.add(LSTM(units=50, return_sequences=False))\n",
        "model.add(Dropout(0.2))\n",
        "\n",
        "# Output Layer: Consists of 1 neuron as we will predict a single value (the price).\n",
        "model.add(Dense(units=1))\n",
        "\n",
        "# Compiling the model. 'adam' is a popular optimizer. 'mean_squared_error' is the loss function.\n",
        "model.compile(optimizer='adam', loss='mean_squared_error')\n",
        "\n",
        "# Let's see the model's architecture.\n",
        "model.summary()\n",
        "\n",
        "\n",
        "#@title Step 5: Training the Model\n",
        "# We are training the model with the prepared data.\n",
        "# epochs: The number of times the model will process the entire dataset.\n",
        "# batch_size: The number of data samples the model will see in each iteration.\n",
        "print(\"Starting model training...\")\n",
        "history = model.fit(x_train, y_train, batch_size=32, epochs=25)\n",
        "print(\"Model training completed!\")\n",
        "\n",
        "\n",
        "#@title Step 6: Testing the Model and Evaluating Results\n",
        "# Let's create the test data.\n",
        "test_data = scaled_data[training_data_len - prediction_days:, :]\n",
        "\n",
        "# Let's prepare the x_test and y_test sets.\n",
        "x_test = []\n",
        "y_test = close_data[training_data_len:, :] # y_test is the original (unscaled) data.\n",
        "\n",
        "for i in range(prediction_days, len(test_data)):\n",
        "    x_test.append(test_data[i-prediction_days:i, 0])\n",
        "\n",
        "x_test = np.array(x_test)\n",
        "x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))\n",
        "\n",
        "# Let's make predictions on the test data with the model.\n",
        "predictions = model.predict(x_test)\n",
        "\n",
        "# Let's scale the predictions back to the original price (from 0-1 range to USD).\n",
        "predictions = scaler.inverse_transform(predictions)\n",
        "\n",
        "# Let's calculate RMSE (Root Mean Squared Error) to measure the model's performance.\n",
        "rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))\n",
        "print(f'\\nModel Error Rate on Test Data (RMSE): {rmse:.2f} USD')\n",
        "\n",
        "# Let's show the actual and predicted prices on the same graph.\n",
        "train = btc_data[:training_data_len]\n",
        "valid = btc_data[training_data_len:].copy() # Using .copy() to avoid SettingWithCopyWarning.\n",
        "valid.loc[:, 'Predictions'] = predictions\n",
        "\n",
        "plt.figure(figsize=(16, 8))\n",
        "plt.title('Model Predictions vs Actual Prices', fontsize=16)\n",
        "plt.xlabel('Date', fontsize=12)\n",
        "plt.ylabel('Closing Price (USD)', fontsize=12)\n",
        "plt.plot(train['Close'], color='blue', alpha=0.6)\n",
        "plt.plot(valid['Close'], color='green')\n",
        "plt.plot(valid['Predictions'], color='red', linestyle='--')\n",
        "plt.legend(['Training Data', 'Actual Price', 'Predicted Price'], loc='upper left')\n",
        "plt.show()\n",
        "\n",
        "# Let's take a closer look at the last 15 days of predictions.\n",
        "print(\"\\nLast 15 Days of Actual and Predicted Prices:\")\n",
        "print(valid[['Close', 'Predictions']].tail(15))\n",
        "\n",
        "\n",
        "#@title Step 7: Using the Model to Predict the Future\n",
        "\n",
        "# Get the last 60 days of data\n",
        "last_60_days = scaled_data[-prediction_days:]\n",
        "X_predict = np.reshape(last_60_days, (1, prediction_days, 1))\n",
        "\n",
        "# Make a guess\n",
        "predicted_price_scaled = model.predict(X_predict)\n",
        "predicted_price = scaler.inverse_transform(predicted_price_scaled)\n",
        "\n",
        "# Date information\n",
        "tomorrow = datetime.date.today() + datetime.timedelta(days=1)\n",
        "\n",
        "# Convert with float() to avoid errors\n",
        "last_row = btc_data.tail(1)\n",
        "last_index = last_row.index[0]\n",
        "last_actual_price = float(last_row['Close'].iloc[0])\n",
        "\n",
        "# Print results\n",
        "print(\"\\n\" + \"=\"*50)\n",
        "print(\"FUTURE PREDICTION\")\n",
        "print(\"=\"*50)\n",
        "print(f\"Last closing price({last_index.strftime('%Y-%m-%d')}): {last_actual_price:.2f} USD\")\n",
        "print(f\"The model {tomorrow.strftime('%Y-%m-%d')} Bitcoin price prediction for: {float(predicted_price[0][0]):.2f} USD\")\n",
        "print(\"=\"*50)\n",
        "print(\"\\nWARNING: This model is for educational purposes only and does not constitute financial advice.\")\n",
        "\n",
        "\n"
      ]
    }
  ]
}