File size: 9,743 Bytes
318e02f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Z6OeRBuqH7cJ"
      },
      "outputs": [],
      "source": [
        "#@title Step 1: Installing and Importing Necessary Libraries\n",
        "# We are installing the necessary libraries in the Google Colab environment.\n",
        "# yfinance: To fetch financial data from Yahoo Finance.\n",
        "# tensorflow: To build and train the neural network.\n",
        "# scikit-learn: For data preprocessing (normalization).\n",
        "!pip install yfinance tensorflow scikit-learn pandas matplotlib -q\n",
        "\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "import yfinance as yf\n",
        "from sklearn.preprocessing import MinMaxScaler\n",
        "from tensorflow.keras.models import Sequential\n",
        "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
        "import datetime\n",
        "\n",
        "print(\"Libraries have been successfully installed and imported!\")\n",
        "\n",
        "\n",
        "#@title Step 2: Fetching and Visualizing Bitcoin Data\n",
        "# Let's fetch the BTC-USD (Bitcoin/US Dollar) data for the last few years.\n",
        "start_date = '2019-01-01'\n",
        "# We set the end date to today's date.\n",
        "end_date = datetime.date.today().strftime(\"%Y-%m-%d\")\n",
        "\n",
        "try:\n",
        "    btc_data = yf.download('BTC-USD', start=start_date, end=end_date)\n",
        "    print(f\"Bitcoin data between {start_date} and {end_date} has been fetched.\")\n",
        "    print(\"First 5 rows of the dataset:\")\n",
        "    print(btc_data.head())\n",
        "\n",
        "    # Let's plot the 'Close' prices of the dataset in a graph.\n",
        "    plt.figure(figsize=(14, 7))\n",
        "    plt.style.use('seaborn-v0_8-darkgrid')\n",
        "    plt.plot(btc_data['Close'], color='orange')\n",
        "    plt.title('Bitcoin Closing Prices (BTC-USD)', fontsize=16)\n",
        "    plt.xlabel('Date', fontsize=12)\n",
        "    plt.ylabel('Price (USD)', fontsize=12)\n",
        "    plt.legend(['Closing Price'])\n",
        "    plt.show()\n",
        "\n",
        "except Exception as e:\n",
        "    print(f\"An error occurred while fetching data: {e}\")\n",
        "\n",
        "\n",
        "#@title Step 3: Data Preprocessing\n",
        "# We are preparing the data to train our model.\n",
        "\n",
        "# We will only use the 'Close' column.\n",
        "close_data = btc_data['Close'].values.reshape(-1, 1)\n",
        "\n",
        "# We are scaling the data between 0 and 1 (Normalization).\n",
        "# Neural networks work more efficiently with data in this range.\n",
        "scaler = MinMaxScaler(feature_range=(0, 1))\n",
        "scaled_data = scaler.fit_transform(close_data)\n",
        "\n",
        "# We are splitting the dataset: 80% for training, 20% for testing.\n",
        "training_data_len = int(np.ceil(len(scaled_data) * 0.8))\n",
        "\n",
        "# Let's create the training data.\n",
        "train_data = scaled_data[0:int(training_data_len), :]\n",
        "\n",
        "# Let's prepare the x_train and y_train sets for training.\n",
        "# The model will predict the next day's price by looking at the past 60 days' prices.\n",
        "prediction_days = 60\n",
        "x_train = []\n",
        "y_train = []\n",
        "\n",
        "for i in range(prediction_days, len(train_data)):\n",
        "    x_train.append(train_data[i-prediction_days:i, 0])\n",
        "    y_train.append(train_data[i, 0])\n",
        "\n",
        "# Converting the lists to numpy arrays.\n",
        "x_train, y_train = np.array(x_train), np.array(y_train)\n",
        "\n",
        "# Reshaping the data into a 3D format suitable for the LSTM model: [number of samples, time steps, number of features]\n",
        "x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))\n",
        "print(f\"Training data prepared. x_train shape: {x_train.shape}\")\n",
        "\n",
        "\n",
        "#@title Step 4: Building the LSTM Model\n",
        "# We are designing our neural network model using Keras.\n",
        "\n",
        "model = Sequential()\n",
        "\n",
        "# Layer 1: LSTM layer with 50 neurons. `return_sequences=True` because we will send data to the next LSTM layer.\n",
        "model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))\n",
        "model.add(Dropout(0.2)) # We are deactivating 20% of the neurons to prevent overfitting.\n",
        "\n",
        "# Layer 2: LSTM layer with 50 neurons.\n",
        "model.add(LSTM(units=50, return_sequences=False))\n",
        "model.add(Dropout(0.2))\n",
        "\n",
        "# Output Layer: Consists of 1 neuron as we will predict a single value (the price).\n",
        "model.add(Dense(units=1))\n",
        "\n",
        "# Compiling the model. 'adam' is a popular optimizer. 'mean_squared_error' is the loss function.\n",
        "model.compile(optimizer='adam', loss='mean_squared_error')\n",
        "\n",
        "# Let's see the model's architecture.\n",
        "model.summary()\n",
        "\n",
        "\n",
        "#@title Step 5: Training the Model\n",
        "# We are training the model with the prepared data.\n",
        "# epochs: The number of times the model will process the entire dataset.\n",
        "# batch_size: The number of data samples the model will see in each iteration.\n",
        "print(\"Starting model training...\")\n",
        "history = model.fit(x_train, y_train, batch_size=32, epochs=25)\n",
        "print(\"Model training completed!\")\n",
        "\n",
        "\n",
        "#@title Step 6: Testing the Model and Evaluating Results\n",
        "# Let's create the test data.\n",
        "test_data = scaled_data[training_data_len - prediction_days:, :]\n",
        "\n",
        "# Let's prepare the x_test and y_test sets.\n",
        "x_test = []\n",
        "y_test = close_data[training_data_len:, :] # y_test is the original (unscaled) data.\n",
        "\n",
        "for i in range(prediction_days, len(test_data)):\n",
        "    x_test.append(test_data[i-prediction_days:i, 0])\n",
        "\n",
        "x_test = np.array(x_test)\n",
        "x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))\n",
        "\n",
        "# Let's make predictions on the test data with the model.\n",
        "predictions = model.predict(x_test)\n",
        "\n",
        "# Let's scale the predictions back to the original price (from 0-1 range to USD).\n",
        "predictions = scaler.inverse_transform(predictions)\n",
        "\n",
        "# Let's calculate RMSE (Root Mean Squared Error) to measure the model's performance.\n",
        "rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))\n",
        "print(f'\\nModel Error Rate on Test Data (RMSE): {rmse:.2f} USD')\n",
        "\n",
        "# Let's show the actual and predicted prices on the same graph.\n",
        "train = btc_data[:training_data_len]\n",
        "valid = btc_data[training_data_len:].copy() # Using .copy() to avoid SettingWithCopyWarning.\n",
        "valid.loc[:, 'Predictions'] = predictions\n",
        "\n",
        "plt.figure(figsize=(16, 8))\n",
        "plt.title('Model Predictions vs Actual Prices', fontsize=16)\n",
        "plt.xlabel('Date', fontsize=12)\n",
        "plt.ylabel('Closing Price (USD)', fontsize=12)\n",
        "plt.plot(train['Close'], color='blue', alpha=0.6)\n",
        "plt.plot(valid['Close'], color='green')\n",
        "plt.plot(valid['Predictions'], color='red', linestyle='--')\n",
        "plt.legend(['Training Data', 'Actual Price', 'Predicted Price'], loc='upper left')\n",
        "plt.show()\n",
        "\n",
        "# Let's take a closer look at the last 15 days of predictions.\n",
        "print(\"\\nLast 15 Days of Actual and Predicted Prices:\")\n",
        "print(valid[['Close', 'Predictions']].tail(15))\n",
        "\n",
        "\n",
        "#@title Step 7: Using the Model to Predict the Future\n",
        "\n",
        "# Get the last 60 days of data\n",
        "last_60_days = scaled_data[-prediction_days:]\n",
        "X_predict = np.reshape(last_60_days, (1, prediction_days, 1))\n",
        "\n",
        "# Make a guess\n",
        "predicted_price_scaled = model.predict(X_predict)\n",
        "predicted_price = scaler.inverse_transform(predicted_price_scaled)\n",
        "\n",
        "# Date information\n",
        "tomorrow = datetime.date.today() + datetime.timedelta(days=1)\n",
        "\n",
        "# Convert with float() to avoid errors\n",
        "last_row = btc_data.tail(1)\n",
        "last_index = last_row.index[0]\n",
        "last_actual_price = float(last_row['Close'].iloc[0])\n",
        "\n",
        "# Print results\n",
        "print(\"\\n\" + \"=\"*50)\n",
        "print(\"FUTURE PREDICTION\")\n",
        "print(\"=\"*50)\n",
        "print(f\"Last closing price({last_index.strftime('%Y-%m-%d')}): {last_actual_price:.2f} USD\")\n",
        "print(f\"The model {tomorrow.strftime('%Y-%m-%d')} Bitcoin price prediction for: {float(predicted_price[0][0]):.2f} USD\")\n",
        "print(\"=\"*50)\n",
        "print(\"\\nWARNING: This model is for educational purposes only and does not constitute financial advice.\")\n",
        "\n",
        "\n"
      ]
    }
  ]
}