import pandas as pd
import yfinance as yf
from ta import add_all_ta_features
from ta.utils import dropna
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load historical stock data using yfinance
symbol = 'AAPL'
start_date = '2021-01-01'
end_date = '2022-01-01'
stock_data = yf.download(symbol, start=start_date, end=end_date)

# Clean NaN values
stock_data = dropna(stock_data)

# Add all ta features
stock_data = add_all_ta_features(
    stock_data, open="Open", high="High", low="Low", close="Close", volume="Volume")

# Define target variable (1 for pattern occurrence, 0 otherwise)
stock_data['DoubleTop'] = stock_data['close'].shift(-1) > stock_data['close']

# Drop NaN values introduced by the shift
stock_data = dropna(stock_data)

# Features and target
X = stock_data.drop(['DoubleTop'], axis=1)
y = stock_data['DoubleTop']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a simple RandomForestClassifier (you may need a more sophisticated model)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")