File size: 3,694 Bytes
5fb2a02
 
 
 
 
 
cf73df8
8e3072c
5fb2a02
2863f52
 
f7ed643
 
 
5fb2a02
f7ed643
5fb2a02
f7ed643
 
5fb2a02
 
 
 
f7ed643
 
 
 
 
5fb2a02
 
 
2863f52
 
5fb2a02
2863f52
5fb2a02
 
8e3072c
f7ed643
5fb2a02
8e3072c
5fb2a02
 
 
 
 
 
 
 
 
8e3072c
5fb2a02
8e3072c
 
 
 
 
 
 
5fb2a02
2863f52
 
f7ed643
2863f52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7ed643
 
 
 
 
 
 
2863f52
 
 
 
 
 
f7ed643
 
 
 
2863f52
f7ed643
 
2863f52
 
f7ed643
 
2863f52
f7ed643
 
2863f52
f7ed643
 
 
 
5fb2a02
 
8e3072c
 
f7ed643
8e3072c
f7ed643
8e3072c
f7ed643
 
 
 
8e3072c
f7ed643
 
8e3072c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from env import Environment
from policy import Policy
from utils import myOptimizer

import pandas as pd
import numpy as np
import torch
from collections import OrderedDict

import matplotlib.pyplot as plt

from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

if __name__ == "__main__":
    writer = SummaryWriter('runs/new_data_ex_7')

    # data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
    data = pd.read_csv('./data/EURUSD_Candlestick_30_M_BID_01.01.2021-04.02.2023.csv')
    # data['Local time'] = pd.to_datetime(data['Local time'])
    data = data.set_index('Local time')
    print(data.index.min(), data.index.max())

    # date_split = '19.09.2022 17:55:00.000 GMT-0500'
    # date_split = '25.08.2022 04:30:00.000 GMT-0500' # 30 min

    date_split = '03.02.2023 15:30:00.000 GMT-0600' # 30 min

    train = data[:date_split]
    test = data[date_split:]


    learning_rate = 0.001
    first_momentum = 0.0
    second_momentum = 0.0001
    transaction_cost = 0.0001
    adaptation_rate = 0.01
    state_size = 15
    equity = 1.0

    agent = Policy(input_channels=state_size)
    optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)

    

    history = []
    for i in range(1, state_size):
        c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
        history.append(c)

    env = Environment(train, history=history, state_size=state_size)
    observation = env.reset()
    

    model_gradients_history = dict()
    checkpoint = OrderedDict()

    for name, param in agent.named_parameters():
        model_gradients_history.update({name: torch.zeros_like(param)})



    for i in tqdm(range(state_size, len(train))):
        observation = torch.as_tensor(observation).float()
        action = agent(observation)
        observation, reward, _ = env.step(action.data.to("cpu").numpy())


        
        
        action.backward()

        for name, param in agent.named_parameters():
        
            grad_n = param.grad
            param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
            checkpoint[name] = param
            model_gradients_history.update({name: grad_n})

        if i > 10000:
            equity += env.profit
            writer.add_scalar('equity', equity, i)    
        else:
            writer.add_scalar('equity', 1.0, i) 

        optimizer.after_step(reward)
        agent.load_state_dict(checkpoint)

    ###########
    ###########

    # history = []
    # for i in range(1, state_size):
    #     c = test.iloc[i, :]['Close'] - test.iloc[i-1, :]['Close']
    #     history.append(c)

    # env = Environment(test, history=history, state_size=state_size)
    # observation = env.reset()
    

    # model_gradients_history = dict()
    # checkpoint = OrderedDict()

    # for name, param in agent.named_parameters():
    #     model_gradients_history.update({name: torch.zeros_like(param)})

    # for _ in tqdm(range(state_size, len(test))):
    #     observation = torch.as_tensor(observation).float()
    #     action = agent(observation)
    #     observation, reward, _ = env.step(action.data.numpy())


        
        
    #     action.backward()

    #     for name, param in agent.named_parameters():
        
    #         grad_n = param.grad
    #         param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
    #         checkpoint[name] = param
    #         model_gradients_history.update({name: grad_n})

    #     optimizer.after_step(reward)
    #     agent.load_state_dict(checkpoint)

    print(env.profits)