适度冒险因子

由于本因子需要用到分钟级别的量价数据,全部得到数据量太大,难以保存与下载。因此,我只选取了其中299只股票2021年的数据进行简单的实现。

数据准备

1
2
3
4
5
6
7
8
9
import pandas as pd
import json
import os
import zipfile
import numpy as np
from tqdm import tqdm
from mytools import backtest
import warnings
warnings.filterwarnings("ignore")
1
2
with open("../data/stock_pool.json", 'r') as f:
stock_pool = json.load(f)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# close:  收盘价
# volume: 交易量
# stock_code: 股票代码
# date: 交易日期
# hour: 交易小时
# minute: 交易分钟
# rtn: 收益率
# volume_delta: 交易量变化情况

def dataloader(stock_code):
with zipfile.ZipFile("../data/mins.zip", 'r') as zfile:
f = zfile.open(f'mins/{stock_code}.csv')
df = pd.read_csv(f)
df['rtn'] = df.groupby('date').apply(lambda x: (x['close']-x['close'].shift(1)) / x['close'].shift(1)).reset_index(drop=True)
df['date'] = pd.to_datetime(df['date'])
return df

df = dataloader("000001.SZ")
df.head()
closevolumestock_codedatehourminutertn
02853.10133887508000001.SZ2021-01-04931NaN
12847.05001843936000001.SZ2021-01-04932-0.002121
22850.07571673800000001.SZ2021-01-049330.001063
32824.35842422714000001.SZ2021-01-04934-0.009023
42813.76902531900000001.SZ2021-01-04935-0.003749
1
2
3
4
5
6
7
8
9
10
11
12
13
# stock_code: 股票代码
# date: 交易日期
# open: 开盘价
# high: 最高价
# low: 最低价
# close: 收盘价
# pre_close: 前收盘价
# volume: 交易量

daily_stock_data = pd.read_csv("../data/daily_stock_data.csv")
daily_stock_data['date'] = pd.to_datetime(daily_stock_data['date'])
daily_stock_data['rtn'] = (daily_stock_data['close'] - daily_stock_data['pre_close']) / daily_stock_data['pre_close']
daily_stock_data.head(5)
stock_codedateopenhighlowclosepre_closevolumertn
0000001.SZ2021-12-3116.8616.9016.4016.4816.821750760.89-0.020214
1000001.SZ2021-12-3016.7616.9516.7216.8216.75796663.600.004179
2000001.SZ2021-12-2917.1617.1616.7016.7517.171469373.98-0.024461
3000001.SZ2021-12-2817.2217.3317.0917.1717.221126638.91-0.002904
4000001.SZ2021-12-2717.3317.3517.1617.2217.31731118.99-0.005199

计算预测收益

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
dsd = {}
for key in tqdm(['high', 'open', 'low', 'close', 'volume']):
dsd[key] = pd.pivot(daily_stock_data, index='date', columns='stock_code', values=key)

dsd['pred_rtn'] = (dsd['close'].shift(-1)-dsd['close'])/dsd['close']

pred_rtn_na = dsd['pred_rtn'].isna() # 不要把空值变成0

# 明天停牌的股票只能获得0的收益
vol0 = dsd['volume'].shift(-1)==0
dsd['pred_rtn'][vol0 & (~pred_rtn_na)] = 0

# 明天一字涨停的股票无法买入,只能获得0的收益
yz = dsd['high'].shift(-1)==dsd['low'].shift(-1) # “一字”,价格没有变化
zt = ~(dsd['close'].shift(-1) > dsd['close']) # “涨停”,价格不比上周高
dsd['pred_rtn'][yz & zt & (~pred_rtn_na)] = 0

pred_rtn = dsd['pred_rtn'].stack().reset_index().rename(columns={0: 'pred_rtn'})

因子计算

因子计算思路:

计算分钟频率交易量的变化: Δvolume=volumetvolumet1\Delta volume = volume_t-volume_{t-1}

由此得到每日放量的“激增时刻”: ts=Δvolume>Δvolume+σ(Δvolume)?0:1t_s = \Delta volume>\overline{\Delta volume}+\sigma (\Delta volume)?0:1

分别计算激增时刻后五分钟收益率的平均值,标准差作为这个激增时刻所引起的市场反应的“耀眼收益率rsr_s”与”“耀眼波动率σs\sigma_s

分别计算t日所有激增时刻的“耀眼收益率rsr_s”与”“耀眼波动率\sigma_s”的均值,作为“日耀眼收益率rstr_s^t”与“日耀眼波动率σst\sigma_s^t

计算二者在截面上的均值作为当日的“适度水平”,计算两个日度指标与市场平均水平的差距,然后计算差距的均值作为“月均耀眼指标”,标准差作为“月稳耀眼指标”。

最后:

月耀眼收益率=月均耀眼收益率+月稳耀眼收益率月耀眼波动率=月均耀眼波动率+月稳耀眼波动率月耀眼收益率 = 月均耀眼收益率 + 月稳耀眼收益率 \\ 月耀眼波动率 = 月均耀眼波动率 + 月稳耀眼波动率

激增时刻

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
def find_surge_time(stk_data):
"""
识别每日交易过程中的“激增时刻”,即交易量超过当天交易量增量mean+std的时刻
Args:
stk_data (_type_): 单只股票的分钟序列

Returns:
_type_: _description_
"""
stk_data['volume_delta'] = stk_data.groupby(['stock_code', 'date']) \
.apply(lambda x: x['volume']-x['volume'].shift(1)).reset_index(drop=True)
up_bound = stk_data.groupby(['stock_code', 'date'])['volume_delta'] \
.apply(lambda x: x.mean()+x.std()).reset_index() \
.rename(columns={"volume_delta": 'up_bound'})
stk_data = pd.merge(stk_data, up_bound, on=['stock_code', 'date'], how="left")

stk_data['surge'] = 0
stk_data.loc[stk_data['volume_delta']>stk_data['up_bound'], 'surge'] = 1
return stk_data
1
2
3
4
5
6
ls = []
for stock_code in tqdm(stock_pool):
stk_data = dataloader(stock_code)
ls.append(stk_data)
stk_data = pd.concat(ls).reset_index(drop=True)
stk_data = find_surge_time(stk_data)

因子计算

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def calculate_moderate_risk_factor(stk_data0):
"""
计算适度冒险因子
Args:
stk_data (_type_): 股票数据
"""
def monthly_excellent_factor(stk_data, aspect):
"""
计算股票不同指标月度情况
"""
# 激增时刻后五分钟内收益率指标的均值
stk_data.loc[stk_data['surge']==0][aspect] = np.nan
fac_ex = stk_data.groupby(['stock_code', 'date'], group_keys=False)[aspect] \
.apply(lambda x: x.mean()).to_frame() \
.rename(columns={aspect: 'excellent'}).reset_index()

# 以截面均值为适度水平,计算每只股票与适度水平的距离
market_level = fac_ex.groupby('date')['excellent'] \
.mean().to_frame().rename(columns={'excellent': 'market_level'})
fac_ex = pd.merge(fac_ex, market_level, on="date", how='left')
fac_ex['moderate'] = abs(fac_ex['excellent'] - fac_ex['market_level'])
fac_ex = fac_ex.set_index('date')
# 分别讨论距离的均值与波动率
factor = pd.DataFrame()
factor['moderate_mean'] = fac_ex.groupby('stock_code')['moderate'].rolling(20).mean() # 月均耀眼指标
factor['moderate_std'] = fac_ex.groupby('stock_code')['moderate'].rolling(20).std()# 月稳耀眼指标
factor['factor'] = factor['moderate_mean'] + factor['moderate_std']
return factor[['factor']]

# 计算激增时刻后五分钟内收益率的均值与标准差
stk_data = stk_data0.copy()
stk_data['rtn_m5'] = stk_data.groupby(['stock_code', 'date'], group_keys=False)['rtn'] \
.apply(lambda x: x.rolling(5).mean().shift(-5))
stk_data['rtn_s5'] = stk_data.groupby(['stock_code', 'date'], group_keys=False)['rtn'] \
.apply(lambda x: x.rolling(5).std().shift(-5))

# 等比例相加,形成适度冒险因子
fac_ex_ret = monthly_excellent_factor(stk_data, "rtn_m5")
fac_ex_vol = monthly_excellent_factor(stk_data, "rtn_s5")
factor = (fac_ex_ret['factor'] + fac_ex_vol['factor']).reset_index()
return factor

因子数据处理

1
2
3
4
5
6
7
8
factor = calculate_moderate_risk_factor(stk_data)
factor = factor.dropna()

factor = pd.merge(factor, pred_rtn, on=['date', 'stock_code'], how='left')
factor = factor[~factor['pred_rtn'].isna()].rename(columns={'factor': "moderate_risk_factor", 'date': "close_date"})

factor = backtest.winsorize_factor(factor, 'moderate_risk_factor')
factor.head(5)
stock_codeclose_datemoderate_risk_factorpred_rtn
5651000001.SZ2021-01-290.0005510.063231
5652000002.SZ2021-01-290.0011120.010076
5653000004.SZ2021-01-290.000724-0.055281
5654000005.SZ2021-01-290.001220-0.093458
5655000006.SZ2021-01-290.0006340.014403

因子检测

1
2
3
res_dict = backtest.fama_macbeth(factor, 'moderate_risk_factor')
fama_macbeth_res = pd.DataFrame([res_dict])
fama_macbeth_res
fac_nametppos_countneg_count
0moderate_risk_factor0.1896310.849773113109
1
group_rtns, group_cum_rtns = backtest.group_return_analysis(factor, 'moderate_risk_factor')

适度冒险因子策略实现_22_1

整体来看该因子是一个正向因子,从我选择的回测期来看,这并不是一个有效的因子。

通过Fama-MacBeth检验,其带来的收益几乎为0,而且并不显著。

对因子进行分组回测可以看到,收益两头高中间低,可以进行进一步的优化。

但是由于回测时间太短,而且只在300只股票中测试,无法判定因子的真实效果,可能只是收到市场风格影响,可以在更长的时间,更大的票池上测试。

1
rtn, evaluate_result = backtest.backtest_1week_nstock(factor, 'moderate_risk_factor')

适度冒险因子策略实现_24_1

1
evaluate_result
sharpe_ratiomax_drawdownmax_drawdown_startmax_drawdown_endsortino_ratioannual_returnannual_volatilitysection
01.9304540.1314832021-09-102021-11-042.7779970.3887840.178471Sum
11.9304540.1314832021-09-102021-11-042.7779970.3887840.1784712021

从策略指标来看,效果其实还可以,夏普比接近2,回撤也较小。整体收益比300只股票的均值大一些。

1
2
3
market_rtn = daily_stock_data.groupby('date')['rtn'].mean().to_frame().rename(columns={'rtn': 'market_rtn'})
rtn = pd.merge(rtn, market_rtn, right_index=True, left_index=True, how="left")
rtn['market_cum_rtn'] = (1 + rtn['market_rtn']).cumprod()
1
rtn[['cum_rtn', 'market_cum_rtn']].plot()

适度冒险因子策略实现_27_1