File size: 9,743 Bytes
318e02f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Z6OeRBuqH7cJ"
},
"outputs": [],
"source": [
"#@title Step 1: Installing and Importing Necessary Libraries\n",
"# We are installing the necessary libraries in the Google Colab environment.\n",
"# yfinance: To fetch financial data from Yahoo Finance.\n",
"# tensorflow: To build and train the neural network.\n",
"# scikit-learn: For data preprocessing (normalization).\n",
"!pip install yfinance tensorflow scikit-learn pandas matplotlib -q\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import yfinance as yf\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
"import datetime\n",
"\n",
"print(\"Libraries have been successfully installed and imported!\")\n",
"\n",
"\n",
"#@title Step 2: Fetching and Visualizing Bitcoin Data\n",
"# Let's fetch the BTC-USD (Bitcoin/US Dollar) data for the last few years.\n",
"start_date = '2019-01-01'\n",
"# We set the end date to today's date.\n",
"end_date = datetime.date.today().strftime(\"%Y-%m-%d\")\n",
"\n",
"try:\n",
" btc_data = yf.download('BTC-USD', start=start_date, end=end_date)\n",
" print(f\"Bitcoin data between {start_date} and {end_date} has been fetched.\")\n",
" print(\"First 5 rows of the dataset:\")\n",
" print(btc_data.head())\n",
"\n",
" # Let's plot the 'Close' prices of the dataset in a graph.\n",
" plt.figure(figsize=(14, 7))\n",
" plt.style.use('seaborn-v0_8-darkgrid')\n",
" plt.plot(btc_data['Close'], color='orange')\n",
" plt.title('Bitcoin Closing Prices (BTC-USD)', fontsize=16)\n",
" plt.xlabel('Date', fontsize=12)\n",
" plt.ylabel('Price (USD)', fontsize=12)\n",
" plt.legend(['Closing Price'])\n",
" plt.show()\n",
"\n",
"except Exception as e:\n",
" print(f\"An error occurred while fetching data: {e}\")\n",
"\n",
"\n",
"#@title Step 3: Data Preprocessing\n",
"# We are preparing the data to train our model.\n",
"\n",
"# We will only use the 'Close' column.\n",
"close_data = btc_data['Close'].values.reshape(-1, 1)\n",
"\n",
"# We are scaling the data between 0 and 1 (Normalization).\n",
"# Neural networks work more efficiently with data in this range.\n",
"scaler = MinMaxScaler(feature_range=(0, 1))\n",
"scaled_data = scaler.fit_transform(close_data)\n",
"\n",
"# We are splitting the dataset: 80% for training, 20% for testing.\n",
"training_data_len = int(np.ceil(len(scaled_data) * 0.8))\n",
"\n",
"# Let's create the training data.\n",
"train_data = scaled_data[0:int(training_data_len), :]\n",
"\n",
"# Let's prepare the x_train and y_train sets for training.\n",
"# The model will predict the next day's price by looking at the past 60 days' prices.\n",
"prediction_days = 60\n",
"x_train = []\n",
"y_train = []\n",
"\n",
"for i in range(prediction_days, len(train_data)):\n",
" x_train.append(train_data[i-prediction_days:i, 0])\n",
" y_train.append(train_data[i, 0])\n",
"\n",
"# Converting the lists to numpy arrays.\n",
"x_train, y_train = np.array(x_train), np.array(y_train)\n",
"\n",
"# Reshaping the data into a 3D format suitable for the LSTM model: [number of samples, time steps, number of features]\n",
"x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))\n",
"print(f\"Training data prepared. x_train shape: {x_train.shape}\")\n",
"\n",
"\n",
"#@title Step 4: Building the LSTM Model\n",
"# We are designing our neural network model using Keras.\n",
"\n",
"model = Sequential()\n",
"\n",
"# Layer 1: LSTM layer with 50 neurons. `return_sequences=True` because we will send data to the next LSTM layer.\n",
"model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))\n",
"model.add(Dropout(0.2)) # We are deactivating 20% of the neurons to prevent overfitting.\n",
"\n",
"# Layer 2: LSTM layer with 50 neurons.\n",
"model.add(LSTM(units=50, return_sequences=False))\n",
"model.add(Dropout(0.2))\n",
"\n",
"# Output Layer: Consists of 1 neuron as we will predict a single value (the price).\n",
"model.add(Dense(units=1))\n",
"\n",
"# Compiling the model. 'adam' is a popular optimizer. 'mean_squared_error' is the loss function.\n",
"model.compile(optimizer='adam', loss='mean_squared_error')\n",
"\n",
"# Let's see the model's architecture.\n",
"model.summary()\n",
"\n",
"\n",
"#@title Step 5: Training the Model\n",
"# We are training the model with the prepared data.\n",
"# epochs: The number of times the model will process the entire dataset.\n",
"# batch_size: The number of data samples the model will see in each iteration.\n",
"print(\"Starting model training...\")\n",
"history = model.fit(x_train, y_train, batch_size=32, epochs=25)\n",
"print(\"Model training completed!\")\n",
"\n",
"\n",
"#@title Step 6: Testing the Model and Evaluating Results\n",
"# Let's create the test data.\n",
"test_data = scaled_data[training_data_len - prediction_days:, :]\n",
"\n",
"# Let's prepare the x_test and y_test sets.\n",
"x_test = []\n",
"y_test = close_data[training_data_len:, :] # y_test is the original (unscaled) data.\n",
"\n",
"for i in range(prediction_days, len(test_data)):\n",
" x_test.append(test_data[i-prediction_days:i, 0])\n",
"\n",
"x_test = np.array(x_test)\n",
"x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))\n",
"\n",
"# Let's make predictions on the test data with the model.\n",
"predictions = model.predict(x_test)\n",
"\n",
"# Let's scale the predictions back to the original price (from 0-1 range to USD).\n",
"predictions = scaler.inverse_transform(predictions)\n",
"\n",
"# Let's calculate RMSE (Root Mean Squared Error) to measure the model's performance.\n",
"rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))\n",
"print(f'\\nModel Error Rate on Test Data (RMSE): {rmse:.2f} USD')\n",
"\n",
"# Let's show the actual and predicted prices on the same graph.\n",
"train = btc_data[:training_data_len]\n",
"valid = btc_data[training_data_len:].copy() # Using .copy() to avoid SettingWithCopyWarning.\n",
"valid.loc[:, 'Predictions'] = predictions\n",
"\n",
"plt.figure(figsize=(16, 8))\n",
"plt.title('Model Predictions vs Actual Prices', fontsize=16)\n",
"plt.xlabel('Date', fontsize=12)\n",
"plt.ylabel('Closing Price (USD)', fontsize=12)\n",
"plt.plot(train['Close'], color='blue', alpha=0.6)\n",
"plt.plot(valid['Close'], color='green')\n",
"plt.plot(valid['Predictions'], color='red', linestyle='--')\n",
"plt.legend(['Training Data', 'Actual Price', 'Predicted Price'], loc='upper left')\n",
"plt.show()\n",
"\n",
"# Let's take a closer look at the last 15 days of predictions.\n",
"print(\"\\nLast 15 Days of Actual and Predicted Prices:\")\n",
"print(valid[['Close', 'Predictions']].tail(15))\n",
"\n",
"\n",
"#@title Step 7: Using the Model to Predict the Future\n",
"\n",
"# Get the last 60 days of data\n",
"last_60_days = scaled_data[-prediction_days:]\n",
"X_predict = np.reshape(last_60_days, (1, prediction_days, 1))\n",
"\n",
"# Make a guess\n",
"predicted_price_scaled = model.predict(X_predict)\n",
"predicted_price = scaler.inverse_transform(predicted_price_scaled)\n",
"\n",
"# Date information\n",
"tomorrow = datetime.date.today() + datetime.timedelta(days=1)\n",
"\n",
"# Convert with float() to avoid errors\n",
"last_row = btc_data.tail(1)\n",
"last_index = last_row.index[0]\n",
"last_actual_price = float(last_row['Close'].iloc[0])\n",
"\n",
"# Print results\n",
"print(\"\\n\" + \"=\"*50)\n",
"print(\"FUTURE PREDICTION\")\n",
"print(\"=\"*50)\n",
"print(f\"Last closing price({last_index.strftime('%Y-%m-%d')}): {last_actual_price:.2f} USD\")\n",
"print(f\"The model {tomorrow.strftime('%Y-%m-%d')} Bitcoin price prediction for: {float(predicted_price[0][0]):.2f} USD\")\n",
"print(\"=\"*50)\n",
"print(\"\\nWARNING: This model is for educational purposes only and does not constitute financial advice.\")\n",
"\n",
"\n"
]
}
]
} |