This post covers the introduction to a simple LSTM neural network strategy in China A-share market, as well as its programming realization. Here we use RqAlpha's trading API.

Performance

Backtest Interval: 20130101 - 20170101 Initial Capital: 1,000,000 Annualized Return (Strategy): 68.444% Annualized Return (Benchmark): 9.205% Sharpe Ratio: 3.4771 Maximum Drawdown: -13.396%

Intuition: Small Cap

It is quite intuitive that small cap companies generally embrace larger probability to go up. Also, due to market making in A-share market, small caps usually have more stable paterns. These two facts hold from bear to bull and are rarely violated.

Tool: LSTM Neural Network

LSTM is an RNN network model. It's strength is in efficiently processing text and time series of various length. For more detailed information, check this out.

Codes

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
'''
author: Allen Frostline
update: 2017-01-16
'''

# some importance libraries
import pandas as pd
import numpy as np
from datetime import timedelta
from pybrain.datasets import SequentialDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.networks import Network
from pybrain.structure.modules import ReluLayer, LSTMLayer
from pybrain.supervised import RPropMinusTrainer

# train on trainX and trainY, return a net and its score
def train(context, trainX, trainY):
ds = SequentialDataSet(4, 1)
for dataX, dataY in zip(trainX, trainY):
ds.addSample(dataX, dataY)
net = buildNetwork(4, 1, 1, hiddenclass=LSTMLayer, outputbias=False, recurrent=True)
trainer = RPropMinusTrainer(net, dataset=ds)
EPOCHS_PER_CYCLE = 5
CYCLES = 5
for i in range(CYCLES):
trainer.trainEpochs(EPOCHS_PER_CYCLE)
return net, trainer.testOnData()

# update data
def load(context, ticker):
close = history_bars(ticker, 180, '1d', 'close')
high = history_bars(ticker, 180, '1d', 'high')
low = history_bars(ticker, 180, '1d', 'low')
volume= history_bars(ticker, 180, '1d', 'volume')
data = np.matrix([close, high, low, volume])
context.position_ratio.append(np.array([close.mean(), high.mean(), low.mean(), volume.mean()]))
context.shape_ratio.append(np.array([close.std(), high.std(), low.std(), volume.std()]))
data[0,:] = (data[0,:] - context.position_ratio[-1][0]) / context.shape_ratio[-1][0]
data[1,:] = (data[1,:] - context.position_ratio[-1][1]) / context.shape_ratio[-1][1]
data[2,:] = (data[2,:] - context.position_ratio[-1][2]) / context.shape_ratio[-1][2]
data[3,:] = (data[3,:] - context.position_ratio[-1][3]) / context.shape_ratio[-1][3]
return data


# blacklist is a necessity
def filter_blacklist(context, stock_list):
return [ticker for ticker in stock_list if ticker not in context.blacklist]

def filter_stlist(stock_list):
return [ticker for ticker in stock_list if not is_st_stock(ticker)]

# use data of past 6 months to train the model and apply it for the next 3 months
def modelize(context, bar_dict):
if context.every_3_months % 3 != 0:
context.every_3_months += 1
return 0
print('-'*65)
print('------'+'{:-^59}'.format('modelizing'))
context.position_ratio = []
context.shape_ratio = []
context.data = []
context.net = []
context.list = []
templist = list(get_fundamentals(query(fundamentals.eod_derivative_indicator.market_cap)
.order_by(fundamentals.eod_derivative_indicator.market_cap.asc())
.limit(context.num*5)).columns)
context.list = filter_blacklist(context, filter_stlist(templist))[:context.num]
names = []
scores = []
for ticker in context.list:
names.append('{:<11}'.format(ticker))
data = load(context, ticker)
trainX = data[:,:-1].T
trainY = data[0,1:].T
try:
net, mse = train(context, trainX, trainY)
except:
context.blacklist.append(ticker)
context.mflag = 0
return 0
context.data.append(data)
context.net.append(net)
scores.append('{:<11}'.format(str(mse)[:6]))
if np.isnan(mse):
context.blacklist.append(ticker)
context.mflag = 0
return 0
context.pct = [0] * context.num
print('------'+'{:-^59}'.format('finished'))
print('-'*65)
print(' nm | '+' '.join(names))
print('mse | '+' '.join(scores))

context.mflag = 1 # flag that we've already modelized at least once
context.tflag = 0
context.every_3_months += 1
context.mv = dict(zip(context.list, [0]*len(context.list)))

def mkt_panic():
# market alert
mkt = history_bars('000001.XSHG', 3, '1d', 'close')
panic = (mkt[-1]/mkt[-2] < 0.97 and mkt[-2]/mkt[-3] < 0.97) or mkt[-1]/mkt[-2] < 0.95
if panic:
print('!!!!!!'+'{:!^59}'.format('panic'))
return 1
return 0

# set alert range [a,b]
def trade(context,bar_dict):

while context.mflag == 0: modelize(context, bar_dict)

trash_bin = [ticker for ticker in context.portfolio.positions if ticker not in context.list]
for ticker in trash_bin: order_target_percent(ticker, 0)

actual_close = []
actual_high = []
actual_low = []
actual_vol = []
actual_open = []
actual_data = []
predict_close = []

for i in range(context.num):
actual_close.append((history_bars(context.list[i], 1,'1d','close')[0] - context.position_ratio[i][0]) / context.shape_ratio[i][0])
actual_high.append((history_bars(context.list[i], 1,'1d','high')[0] - context.position_ratio[i][1]) / context.shape_ratio[i][1])
actual_low.append((history_bars(context.list[i], 1,'1d','low')[0] - context.position_ratio[i][2]) / context.shape_ratio[i][2])
actual_vol.append((history_bars(context.list[i], 1,'1d','volume')[0] - context.position_ratio[i][3]) / context.shape_ratio[i][3])
actual_open.append((history_bars(context.list[i], 1,'1m','close')[0] - context.position_ratio[i][0]) / context.shape_ratio[i][0])
actual_data.append([actual_close[i],actual_high[i],actual_low[i],actual_vol[i]])
predict_close.append(context.net[i].activate(actual_data[i])[0])

if context.tflag == 0:
context.temp_pc = predict_close

r = [float((pc*shape_ratio[0]+position_ratio[0]) / (ao*shape_ratio[0]+position_ratio[0]) - 1) for pc, ao, shape_ratio, position_ratio in zip(predict_close, actual_open, context.shape_ratio, context.position_ratio)]

temp_r = [float((pc*shape_ratio[0]+position_ratio[0]) / (tpc*shape_ratio[0]+position_ratio[0]) - 1) for pc, tpc, shape_ratio, position_ratio in zip(predict_close, context.temp_pc, context.shape_ratio, context.position_ratio)]

# stop loss trick
for stock in context.portfolio.positions:
try:
mv = context.portfolio.positions[stock].market_value # Cap
if mv > context.mv[stock]: context.mv[stock] = mv # History Price Max
md = 1 - mv/context.mv[stock] # Max DD
if md > .5: order_value(stock, mv/2)
except:
pass

# The essence of this strategy
hybrid_r = [max(ri,temp_ri,ri+temp_ri) for ri, temp_ri in zip(r,temp_r)]
bad_hybrid_signal = sum([x <= 0 for x in hybrid_r])
a, b = 0.02, -0.02
panic = mkt_panic()
for i in range(context.num):
if panic or 0 < context.post_panic < 22 * context.num:
context.pct[i] = 0
context.post_panic = (1 - panic) * (context.post_panic + 1) + panic
elif hybrid_r[i] > a:
context.pct[i] = min(context.pct[i] + .5/context.num, 2/context.num)
context.post_panic = 0
elif hybrid_r[i] < b or bad_hybrid_signal > 3*context.num//5:
context.pct[i] = max(context.pct[i] - .5/context.num, 0)
context.post_panic = 0

if context.tflag == 1: print(' ac | '+' '.join(['{:<11}'.format(str(ac)[:6]) for ac in actual_close]))
print('-'*65)
print(' ao | '+' '.join(['{:<11}'.format(str(ao)[:6]) for ao in actual_open]))
print(' pc | '+' '.join(['{:<11}'.format(str(pc)[:6]) for pc in predict_close]))
print(' r | '+' '.join(['{:<11}'.format(str(ri)[:6]) for ri in hybrid_r]))
pct = sum([context.portfolio.positions[ticker].market_value for ticker in context.portfolio.positions])/(context.portfolio.market_value+context.portfolio.cash)
tot_pct = max(sum(context.pct), 1)
context.pct = list(map(lambda x: x/tot_pct, context.pct))
print(' % | '+' '.join(['{:<11}'.format(str(p)[:6]) for p in context.pct]))
plot('total position', pct * 100)
for i in range(context.num): order_target_percent(context.list[i], context.pct[i])
context.tflag = 1
context.temp_pc = predict_close


def init(context):
context.temp_pc = []
context.every_3_months = 0
context.tflag = 0
context.mflag = 0
context.mv = []
context.position_ratio = []
context.shape_ratio = []
context.num = 20
context.list = []
context.pct = [0] * context.num
context.net = []
context.data = []
context.post_panic = 0
context.blacklist = [
'000004.XSHE','000546.XSHE',
'000594.XSHE','002352.XSHE',
'300176.XSHE','300260.XSHE',
'300372.XSHE','600137.XSHG',
'600306.XSHG','600656.XSHG',
]
scheduler.run_monthly(modelize,1)
scheduler.run_daily(trade)

# do nothing before 9:00
def before_trading(context):
pass

# do not trigger minute bar activities
def handle_bar(context, bar_dict):
pass

References: