Strategy: LSTM Neural Network + Small Cap
2017-01-17
This post covers the introduction to a simple LSTM neural network strategy in China A-share market, as well as its programming realization. Here we use RqAlpha’s trading API.
Performance
Info | Result |
---|---|
Backtest Interval | 20130101 - 20170101 |
Initial Capital | 1,000,000 |
Annualized Return (Strategy) | 68.444% |
Annualized Return (Benchmark) | 9.205% |
Sharpe Ratio | 3.4771 |
Maximum Drawdown | -13.396% |
Intuition: Small Cap
It is quite intuitive that small cap companies generally embrace larger probability to go up. Also, due to market making in A-share market, small caps usually have more stable paterns. These two facts hold from bear to bull and are rarely violated.
Tool: LSTM Neural Network
LSTM is an RNN network model. It’s strength is in efficiently processing text and time series of various length. For more detailed information, check this out.
Codes
'''
author: Allen Frostline
update: 2017-01-16
'''
# some importance libraries
import pandas as pd
import numpy as np
from datetime import timedelta
from pybrain.datasets import SequentialDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.networks import Network
from pybrain.structure.modules import ReluLayer, LSTMLayer
from pybrain.supervised import RPropMinusTrainer
# train on trainX and trainY, return a net and its score
def train(context, trainX, trainY):
ds = SequentialDataSet(4, 1)
for dataX, dataY in zip(trainX, trainY):
ds.addSample(dataX, dataY)
net = buildNetwork(4, 1, 1, hiddenclass=LSTMLayer, outputbias=False, recurrent=True)
trainer = RPropMinusTrainer(net, dataset=ds)
EPOCHS_PER_CYCLE = 5
CYCLES = 5
for i in range(CYCLES):
trainer.trainEpochs(EPOCHS_PER_CYCLE)
return net, trainer.testOnData()
# update data
def load(context, ticker):
close = history_bars(ticker, 180, '1d', 'close')
high = history_bars(ticker, 180, '1d', 'high')
low = history_bars(ticker, 180, '1d', 'low')
volume= history_bars(ticker, 180, '1d', 'volume')
data = np.matrix([close, high, low, volume])
context.position_ratio.append(np.array([close.mean(), high.mean(), low.mean(), volume.mean()]))
context.shape_ratio.append(np.array([close.std(), high.std(), low.std(), volume.std()]))
data[0,:] = (data[0,:] - context.position_ratio[-1][0]) / context.shape_ratio[-1][0]
data[1,:] = (data[1,:] - context.position_ratio[-1][1]) / context.shape_ratio[-1][1]
data[2,:] = (data[2,:] - context.position_ratio[-1][2]) / context.shape_ratio[-1][2]
data[3,:] = (data[3,:] - context.position_ratio[-1][3]) / context.shape_ratio[-1][3]
return data
# blacklist is a necessity
def filter_blacklist(context, stock_list):
return [ticker for ticker in stock_list if ticker not in context.blacklist]
def filter_stlist(stock_list):
return [ticker for ticker in stock_list if not is_st_stock(ticker)]
# use data of past 6 months to train the model and apply it for the next 3 months
def modelize(context, bar_dict):
if context.every_3_months % 3 != 0:
context.every_3_months += 1
return 0
print('-'*65)
print('------'+'{:-^59}'.format('modelizing'))
context.position_ratio = []
context.shape_ratio = []
context.data = []
context.net = []
context.list = []
templist = list(get_fundamentals(query(fundamentals.eod_derivative_indicator.market_cap)
.order_by(fundamentals.eod_derivative_indicator.market_cap.asc())
.limit(context.num*5)).columns)
context.list = filter_blacklist(context, filter_stlist(templist))[:context.num]
names = []
scores = []
for ticker in context.list:
names.append('{:<11}'.format(ticker))
data = load(context, ticker)
trainX = data[:,:-1].T
trainY = data[0,1:].T
try:
net, mse = train(context, trainX, trainY)
except:
context.blacklist.append(ticker)
context.mflag = 0
return 0
context.data.append(data)
context.net.append(net)
scores.append('{:<11}'.format(str(mse)[:6]))
if np.isnan(mse):
context.blacklist.append(ticker)
context.mflag = 0
return 0
context.pct = [0] * context.num
print('------'+'{:-^59}'.format('finished'))
print('-'*65)
print(' nm | '+' '.join(names))
print('mse | '+' '.join(scores))
context.mflag = 1 # flag that we've already modelized at least once
context.tflag = 0
context.every_3_months += 1
context.mv = dict(zip(context.list, [0]*len(context.list)))
def mkt_panic():
# market alert
mkt = history_bars('000001.XSHG', 3, '1d', 'close')
panic = (mkt[-1]/mkt[-2] < 0.97 and mkt[-2]/mkt[-3] < 0.97) or mkt[-1]/mkt[-2] < 0.95
if panic:
print('!!!!!!'+'{:!^59}'.format('panic'))
return 1
return 0
# set alert range [a,b]
def trade(context,bar_dict):
while context.mflag == 0: modelize(context, bar_dict)
trash_bin = [ticker for ticker in context.portfolio.positions if ticker not in context.list]
for ticker in trash_bin: order_target_percent(ticker, 0)
actual_close = []
actual_high = []
actual_low = []
actual_vol = []
actual_open = []
actual_data = []
predict_close = []
for i in range(context.num):
actual_close.append((history_bars(context.list[i], 1,'1d','close')[0] - context.position_ratio[i][0]) / context.shape_ratio[i][0])
actual_high.append((history_bars(context.list[i], 1,'1d','high')[0] - context.position_ratio[i][1]) / context.shape_ratio[i][1])
actual_low.append((history_bars(context.list[i], 1,'1d','low')[0] - context.position_ratio[i][2]) / context.shape_ratio[i][2])
actual_vol.append((history_bars(context.list[i], 1,'1d','volume')[0] - context.position_ratio[i][3]) / context.shape_ratio[i][3])
actual_open.append((history_bars(context.list[i], 1,'1m','close')[0] - context.position_ratio[i][0]) / context.shape_ratio[i][0])
actual_data.append([actual_close[i],actual_high[i],actual_low[i],actual_vol[i]])
predict_close.append(context.net[i].activate(actual_data[i])[0])
if context.tflag == 0:
context.temp_pc = predict_close
r = [float((pc*shape_ratio[0]+position_ratio[0]) / (ao*shape_ratio[0]+position_ratio[0]) - 1) for pc, ao, shape_ratio, position_ratio in zip(predict_close, actual_open, context.shape_ratio, context.position_ratio)]
temp_r = [float((pc*shape_ratio[0]+position_ratio[0]) / (tpc*shape_ratio[0]+position_ratio[0]) - 1) for pc, tpc, shape_ratio, position_ratio in zip(predict_close, context.temp_pc, context.shape_ratio, context.position_ratio)]
# stop loss trick
for stock in context.portfolio.positions:
try:
mv = context.portfolio.positions[stock].market_value # Cap
if mv > context.mv[stock]: context.mv[stock] = mv # History Price Max
md = 1 - mv/context.mv[stock] # Max DD
if md > .5: order_value(stock, mv/2)
except:
pass
# The essence of this strategy
hybrid_r = [max(ri,temp_ri,ri+temp_ri) for ri, temp_ri in zip(r,temp_r)]
bad_hybrid_signal = sum([x <= 0 for x in hybrid_r])
a, b = 0.02, -0.02
panic = mkt_panic()
for i in range(context.num):
if panic or 0 < context.post_panic < 22 * context.num:
context.pct[i] = 0
context.post_panic = (1 - panic) * (context.post_panic + 1) + panic
elif hybrid_r[i] > a:
context.pct[i] = min(context.pct[i] + .5/context.num, 2/context.num)
context.post_panic = 0
elif hybrid_r[i] < b or bad_hybrid_signal > 3*context.num//5:
context.pct[i] = max(context.pct[i] - .5/context.num, 0)
context.post_panic = 0
if context.tflag == 1: print(' ac | '+' '.join(['{:<11}'.format(str(ac)[:6]) for ac in actual_close]))
print('-'*65)
print(' ao | '+' '.join(['{:<11}'.format(str(ao)[:6]) for ao in actual_open]))
print(' pc | '+' '.join(['{:<11}'.format(str(pc)[:6]) for pc in predict_close]))
print(' r | '+' '.join(['{:<11}'.format(str(ri)[:6]) for ri in hybrid_r]))
pct = sum([context.portfolio.positions[ticker].market_value for ticker in context.portfolio.positions])/(context.portfolio.market_value+context.portfolio.cash)
tot_pct = max(sum(context.pct), 1)
context.pct = list(map(lambda x: x/tot_pct, context.pct))
print(' % | '+' '.join(['{:<11}'.format(str(p)[:6]) for p in context.pct]))
plot('total position', pct * 100)
for i in range(context.num): order_target_percent(context.list[i], context.pct[i])
context.tflag = 1
context.temp_pc = predict_close
def init(context):
context.temp_pc = []
context.every_3_months = 0
context.tflag = 0
context.mflag = 0
context.mv = []
context.position_ratio = []
context.shape_ratio = []
context.num = 20
context.list = []
context.pct = [0] * context.num
context.net = []
context.data = []
context.post_panic = 0
context.blacklist = [
'000004.XSHE','000546.XSHE',
'000594.XSHE','002352.XSHE',
'300176.XSHE','300260.XSHE',
'300372.XSHE','600137.XSHG',
'600306.XSHG','600656.XSHG',
]
scheduler.run_monthly(modelize,1)
scheduler.run_daily(trade)
# do nothing before 9:00
def before_trading(context):
pass
# do not trigger minute bar activities
def handle_bar(context, bar_dict):
pass