import pandas as pd import yfinance as yf from ta import add_all_ta_features from ta.utils import dropna from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score # Load historical stock data using yfinance symbol = 'AAPL' start_date = '2021-01-01' end_date = '2022-01-01' stock_data = yf.download(symbol, start=start_date, end=end_date) # Clean NaN values stock_data = dropna(stock_data) # Add all ta features stock_data = add_all_ta_features( stock_data, open="Open", high="High", low="Low", close="Close", volume="Volume") # Define target variable (1 for pattern occurrence, 0 otherwise) stock_data['DoubleTop'] = stock_data['close'].shift(-1) > stock_data['close'] # Drop NaN values introduced by the shift stock_data = dropna(stock_data) # Features and target X = stock_data.drop(['DoubleTop'], axis=1) y = stock_data['DoubleTop'] # Split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train a simple RandomForestClassifier (you may need a more sophisticated model) clf = RandomForestClassifier() clf.fit(X_train, y_train) # Predictions y_pred = clf.predict(X_test) # Evaluate the model accuracy = accuracy_score(y_test, y_pred) print(f"Model Accuracy: {accuracy}")