allenfrostline

Strategy: LSTM Neural Network + Small Cap


2017-01-17

This post covers the introduction to a simple LSTM neural network strategy in China A-share market, as well as its programming realization. Here we use RqAlpha’s trading API.

Performance

Info Result
Backtest Interval 20130101 - 20170101
Initial Capital 1,000,000
Annualized Return (Strategy) 68.444%
Annualized Return (Benchmark) 9.205%
Sharpe Ratio 3.4771
Maximum Drawdown -13.396%

Intuition: Small Cap

It is quite intuitive that small cap companies generally embrace larger probability to go up. Also, due to market making in A-share market, small caps usually have more stable paterns. These two facts hold from bear to bull and are rarely violated.

Tool: LSTM Neural Network

LSTM is an RNN network model. It’s strength is in efficiently processing text and time series of various length. For more detailed information, check this out.

Codes

'''
author: Allen Frostline
update: 2017-01-16
'''

# some importance libraries
import pandas as pd
import numpy as np
from datetime import timedelta
from pybrain.datasets import SequentialDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.networks import Network
from pybrain.structure.modules import ReluLayer, LSTMLayer
from pybrain.supervised import RPropMinusTrainer

# train on trainX and trainY, return a net and its score
def train(context, trainX, trainY):
    ds = SequentialDataSet(4, 1)
    for dataX, dataY in zip(trainX, trainY):
        ds.addSample(dataX, dataY)
    net = buildNetwork(4, 1, 1, hiddenclass=LSTMLayer, outputbias=False, recurrent=True)
    trainer = RPropMinusTrainer(net, dataset=ds)
    EPOCHS_PER_CYCLE = 5
    CYCLES = 5
    for i in range(CYCLES):
        trainer.trainEpochs(EPOCHS_PER_CYCLE)
    return net, trainer.testOnData()

# update data
def load(context, ticker):
    close = history_bars(ticker, 180, '1d', 'close')
    high  = history_bars(ticker, 180, '1d', 'high')
    low   = history_bars(ticker, 180, '1d', 'low')
    volume= history_bars(ticker, 180, '1d', 'volume')
    data = np.matrix([close, high, low, volume])
    context.position_ratio.append(np.array([close.mean(), high.mean(), low.mean(), volume.mean()]))
    context.shape_ratio.append(np.array([close.std(), high.std(), low.std(), volume.std()]))
    data[0,:] = (data[0,:] - context.position_ratio[-1][0]) / context.shape_ratio[-1][0]
    data[1,:] = (data[1,:] - context.position_ratio[-1][1]) / context.shape_ratio[-1][1]
    data[2,:] = (data[2,:] - context.position_ratio[-1][2]) / context.shape_ratio[-1][2]
    data[3,:] = (data[3,:] - context.position_ratio[-1][3]) / context.shape_ratio[-1][3]
    return data


# blacklist is a necessity
def filter_blacklist(context, stock_list):
    return [ticker for ticker in stock_list if ticker not in context.blacklist]
    
def filter_stlist(stock_list):
    return [ticker for ticker in stock_list if not is_st_stock(ticker)]

# use data of past 6 months to train the model and apply it for the next 3 months
def modelize(context, bar_dict):
    if context.every_3_months % 3 != 0: 
        context.every_3_months += 1
        return 0
    print('-'*65)
    print('------'+'{:-^59}'.format('modelizing'))
    context.position_ratio = []
    context.shape_ratio    = []
    context.data  = []
    context.net   = []
    context.list  = []
    templist = list(get_fundamentals(query(fundamentals.eod_derivative_indicator.market_cap)
                                    .order_by(fundamentals.eod_derivative_indicator.market_cap.asc())
                                    .limit(context.num*5)).columns)
    context.list = filter_blacklist(context, filter_stlist(templist))[:context.num]
    names  = []
    scores = []
    for ticker in context.list:
        names.append('{:<11}'.format(ticker))
        data = load(context, ticker)
        trainX = data[:,:-1].T
        trainY = data[0,1:].T
        try:
            net, mse = train(context, trainX, trainY)
        except:
            context.blacklist.append(ticker)
            context.mflag = 0
            return 0
        context.data.append(data)
        context.net.append(net)
        scores.append('{:<11}'.format(str(mse)[:6]))
        if np.isnan(mse):
            context.blacklist.append(ticker)
            context.mflag = 0
            return 0
    context.pct = [0] * context.num
    print('------'+'{:-^59}'.format('finished'))
    print('-'*65)
    print(' nm | '+' '.join(names))
    print('mse | '+' '.join(scores))

    context.mflag = 1  # flag that we've already modelized at least once
    context.tflag = 0
    context.every_3_months += 1
    context.mv = dict(zip(context.list, [0]*len(context.list)))

def mkt_panic():
    # market alert
    mkt = history_bars('000001.XSHG', 3, '1d', 'close')
    panic = (mkt[-1]/mkt[-2] < 0.97 and mkt[-2]/mkt[-3] < 0.97) or mkt[-1]/mkt[-2] < 0.95
    if panic:
        print('!!!!!!'+'{:!^59}'.format('panic'))
        return 1
    return 0

# set alert range [a,b]
def trade(context,bar_dict):
    
    while context.mflag == 0: modelize(context, bar_dict)
    
    trash_bin = [ticker for ticker in context.portfolio.positions if ticker not in context.list]
    for ticker in trash_bin: order_target_percent(ticker, 0)
    
    actual_close  = []
    actual_high   = []
    actual_low    = []
    actual_vol    = []
    actual_open   = []
    actual_data   = []
    predict_close = []
    
    for i in range(context.num):
        actual_close.append((history_bars(context.list[i], 1,'1d','close')[0] - context.position_ratio[i][0]) / context.shape_ratio[i][0])
        actual_high.append((history_bars(context.list[i], 1,'1d','high')[0]   - context.position_ratio[i][1]) / context.shape_ratio[i][1])
        actual_low.append((history_bars(context.list[i], 1,'1d','low')[0]     - context.position_ratio[i][2]) / context.shape_ratio[i][2])
        actual_vol.append((history_bars(context.list[i], 1,'1d','volume')[0]  - context.position_ratio[i][3]) / context.shape_ratio[i][3])
        actual_open.append((history_bars(context.list[i], 1,'1m','close')[0]  - context.position_ratio[i][0]) / context.shape_ratio[i][0])
        actual_data.append([actual_close[i],actual_high[i],actual_low[i],actual_vol[i]])
        predict_close.append(context.net[i].activate(actual_data[i])[0])

    if context.tflag == 0: 
        context.temp_pc = predict_close
    
    r = [float((pc*shape_ratio[0]+position_ratio[0]) / (ao*shape_ratio[0]+position_ratio[0]) - 1) for pc, ao, shape_ratio, position_ratio in zip(predict_close, actual_open, context.shape_ratio, context.position_ratio)]
    
    temp_r = [float((pc*shape_ratio[0]+position_ratio[0]) / (tpc*shape_ratio[0]+position_ratio[0]) - 1) for pc, tpc, shape_ratio, position_ratio in zip(predict_close, context.temp_pc, context.shape_ratio, context.position_ratio)]
    
    # stop loss trick
    for stock in context.portfolio.positions:
        try:
            mv = context.portfolio.positions[stock].market_value # Cap
            if mv > context.mv[stock]: context.mv[stock] = mv    # History Price Max
            md = 1 - mv/context.mv[stock]                        # Max DD
            if md > .5: order_value(stock, mv/2)
        except:
            pass
    
    # The essence of this strategy
    hybrid_r = [max(ri,temp_ri,ri+temp_ri) for ri, temp_ri in zip(r,temp_r)]
    bad_hybrid_signal = sum([x <= 0 for x in hybrid_r])
    a, b = 0.02, -0.02
    panic = mkt_panic()
    for i in range(context.num):
        if panic or 0 < context.post_panic < 22 * context.num:
            context.pct[i] = 0
            context.post_panic = (1 - panic) * (context.post_panic + 1) + panic
        elif hybrid_r[i] > a:
            context.pct[i] = min(context.pct[i] + .5/context.num, 2/context.num)
            context.post_panic = 0
        elif hybrid_r[i] < b or bad_hybrid_signal > 3*context.num//5:
            context.pct[i] = max(context.pct[i] - .5/context.num, 0)
            context.post_panic = 0

    if context.tflag == 1: print(' ac | '+' '.join(['{:<11}'.format(str(ac)[:6]) for ac in actual_close]))
    print('-'*65)
    print(' ao | '+' '.join(['{:<11}'.format(str(ao)[:6]) for ao in actual_open]))
    print(' pc | '+' '.join(['{:<11}'.format(str(pc)[:6]) for pc in predict_close]))
    print('  r | '+' '.join(['{:<11}'.format(str(ri)[:6]) for ri in hybrid_r]))
    pct = sum([context.portfolio.positions[ticker].market_value for ticker in context.portfolio.positions])/(context.portfolio.market_value+context.portfolio.cash)
    tot_pct = max(sum(context.pct), 1)
    context.pct = list(map(lambda x: x/tot_pct, context.pct))
    print('  % | '+' '.join(['{:<11}'.format(str(p)[:6]) for p in context.pct]))
    plot('total position', pct * 100)
    for i in range(context.num): order_target_percent(context.list[i], context.pct[i])
    context.tflag = 1
    context.temp_pc = predict_close
    

def init(context):
    context.temp_pc        = []
    context.every_3_months = 0
    context.tflag          = 0
    context.mflag          = 0
    context.mv             = []
    context.position_ratio = []
    context.shape_ratio    = []
    context.num            = 20
    context.list           = []
    context.pct            = [0] * context.num
    context.net            = []
    context.data           = []
    context.post_panic     = 0
    context.blacklist      = [
                              '000004.XSHE','000546.XSHE',
                              '000594.XSHE','002352.XSHE',
                              '300176.XSHE','300260.XSHE',
                              '300372.XSHE','600137.XSHG',
                              '600306.XSHG','600656.XSHG',
                             ]
    scheduler.run_monthly(modelize,1)
    scheduler.run_daily(trade)

# do nothing before 9:00
def before_trading(context):
    pass

# do not trigger minute bar activities
def handle_bar(context, bar_dict):
    pass

References: