重写qlib的alpha158年因子表达式,年化40%(代码+数据共享)

今天继续做因子分析,今天不是分析一个因子,而是一组因子——qlib的alpha158这158个因子,基于沪深300股票池的IC分析。

与qlib里的函数大同小异,我们的表达式更简洁一下,不需要qlib里的$,它的rank是时序rank,所以我改成了ta_rank,然后统一用小写字母表示函数。

图片

class Alpha(AlphaBase)
def __init__(self):
pass

@staticmethod
def parse_config_to_fields(): # ['CORD30', 'STD30', 'CORR5', 'RESI10', 'CORD60', 'STD5', 'LOW0', # 'WVMA30', 'RESI5', 'ROC5', 'KSFT', 'STD20', 'RSV5', 'STD60', 'KLEN'] fields = [] names = [] # kbar fields += [ "(close-open)/open", "(high-low)/open", "(close-open)/(high-low+1e-12)", "(high-greater(open, close))/open", "(high-greater(open, close))/(high-low+1e-12)", "(less(open, close)-low)/open", "(less(open, close)-low)/(high-low+1e-12)", "(2*close-high-low)/open", "(2*close-high-low)/(high-low+1e-12)", ] names += [ "KMID", "KLEN", "KMID2", "KUP", "KUP2", "KLOW", "KLOW2", "KSFT", "KSFT2", ] # =========== price ========== feature = ["OPEN", "HIGH", "LOW", "CLOSE"] windows = range(5) for field in feature: field = field.lower() fields += ["shift(%s, %d)/close" % (field, d) if d != 0 else "%s/close" % field for d in windows] names += [field.upper() + str(d) for d in windows] # ================ volume =========== fields += ["shift(volume, %d)/(volume+1e-12)" % d if d != 0 else "volume/(volume+1e-12)" for d in windows] names += ["VOLUME" + str(d) for d in windows] # ================= rolling ==================== windows = [5, 10, 20, 30, 60] fields += ["shift(close, %d)/close" % d for d in windows] names += ["ROC%d" % d for d in windows] fields += ["mean(close, %d)/close" % d for d in windows] names += ["MA%d" % d for d in windows] fields += ["std(close, %d)/close" % d for d in windows] names += ["STD%d" % d for d in windows] fields += ["slope(close, %d)/close" % d for d in windows] names += ["BETA%d" % d for d in windows] fields += ["max(high, %d)/close" % d for d in windows] names += ["MAX%d" % d for d in windows] fields += ["min(low, %d)/close" % d for d in windows] names += ["MIN%d" % d for d in windows] fields += ["quantile(close, %d, 0.8)/close" % d for d in windows] names += ["QTLU%d" % d for d in windows] fields += ["quantile(close, %d, 0.2)/close" % d for d in windows] names += ["QTLD%d" % d for d in windows] fields += ["ts_rank(close, %d)" % d for d in windows] names += ["RANK%d" % d for d in windows] fields += ["(close-min(low, %d))/(max(high, %d)-min(low, %d)+1e-12)" % (d, d, d) for d in windows] names += ["RSV%d" % d for d in windows] fields += ["idxmax(high, %d)/%d" % (d, d) for d in windows] names += ["IMAX%d" % d for d in windows] fields += ["idxmin(low, %d)/%d" % (d, d) for d in windows] names += ["IMIN%d" % d for d in windows] fields += ["(idxmax(high, %d)-idxmin(low, %d))/%d" % (d, d, d) for d in windows] names += ["IMXD%d" % d for d in windows] fields += ["corr(close, log(volume+1), %d)" % d for d in windows] names += ["CORR%d" % d for d in windows] fields += ["corr(close/shift(close,1), log(volume/shift(volume, 1)+1), %d)" % d for d in windows] names += ["CORD%d" % d for d in windows] fields += ["mean(close>shift(close, 1), %d)" % d for d in windows] names += ["CNTP%d" % d for d in windows] fields += ["mean(close<shift(close, 1), %d)" % d for d in windows] names += ["CNTN%d" % d for d in windows] fields += ["mean(close>shift(close, 1), %d)-mean(close<shift(close, 1), %d)" % (d, d) for d in windows] names += ["CNTD%d" % d for d in windows] fields += [ "sum(greater(close-shift(close, 1), 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["SUMP%d" % d for d in windows] fields += [ "sum(greater(shift(close, 1)-close, 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["SUMN%d" % d for d in windows] fields += [ "(sum(greater(close-shift(close, 1), 0), %d)-sum(greater(shift(close, 1)-close, 0), %d))" "/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d, d) for d in windows ] names += ["SUMD%d" % d for d in windows] fields += ["mean(volume, %d)/(volume+1e-12)" % d for d in windows] names += ["VMA%d" % d for d in windows] fields += ["std(volume, %d)/(volume+1e-12)" % d for d in windows] names += ["VSTD%d" % d for d in windows] fields += [ "std(Abs(close/shift(close, 1)-1)*volume, %d)/(mean(Abs(close/shift(close, 1)-1)*volume, %d)+1e-12)" % (d, d) for d in windows ] names += ["WVMA%d" % d for d in windows] fields += [ "sum(greater(volume-shift(volume, 1), 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["VSUMP%d" % d for d in windows] fields += [ "sum(greater(shift(volume, 1)-volume, 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["VSUMN%d" % d for d in windows] fields += [ "(sum(greater(volume-shift(volume, 1), 0), %d)-sum(greater(shift(volume, 1)-volume, 0), %d))" "/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d, d) for d in windows ] names += ["VSUMD%d" % d for d in windows] fields += ['close/shift(close,20)-1'] names += ['roc_20'] return fields, names

我们的单因子ic分析及回测框架:

以如下价量因子为例:

factor_expr = '-1*(corr(close/shift(close,1), log(volume/shift(volume, 1)+1), 60))'
factor_name = 'factor'

import pandas as pd# 把昨天的文件包,放在ailabx/data下的hist_hs300_20230813下,使用duckdb直接访问from engine.alpha.ic_analysis import calc_icfrom engine.datafeed.dataloader import Duckdbloaderfrom engine.env import Envfrom engine.algo.algo_weights import *from engine.algo.algos import *factor_expr = "-1 * correlation(open, volume, 10)"# factor_expr = '-1 * sum(rank(correlation(rank(high), rank(volume), 3)), 3)'factor_name = 'alpha006'# factor_name = 'alpha015'factor_expr = '-1*(corr(close/shift(close,1), log(volume/shift(volume, 1)+1), 60))'factor_name = 'factor'loader = Duckdbloader(symbols=None, columns=['close', 'open', 'high', 'low', 'volume'],                      start_date="20100101")fields = [factor_expr, "close/shift(close,1)-1"]names = [factor_name, 'return_0']days = [1, 5, 10, 20]for d in days:    fields.extend(["shift(close,-{})/close-1".format(d)])    names.extend(['return_{}'.format(d)])df = loader.load(fields=fields, names=names)df.dropna(inplace=True)print(df)def ic_analisys():    ic_mean = 0.0    for d in days:        print(d)        ric = calc_ic(pred=df[factor_name], label=df['return_{}'.format(d)])        mean = ric.mean()        print(mean)        ic_mean += mean        # std = ric.std()        # r_ic = mean / std    print(ic_mean / len(days))def backtest():    e = Env(df)    e.set_algos([        #RunDays(5),        # SelectBySignal(buy_rules=['ind(roc_20)>0.02'], sell_rules=['ind(roc_20)<-0.02']),        SelectTopK(K=1, order_by=factor_name),        WeightEqually()    ])    e.backtest_loop()    e.show_results()if __name__ == '__main__':    ic_analisys()    backtest()

图片

发布者:股市刺客,转载请注明出处:https://www.95sca.cn/archives/103980
站内所有文章皆来自网络转载或读者投稿,请勿用于商业用途。如有侵权、不妥之处,请联系站长并出示版权证明以便删除。敬请谅解!

(0)
股市刺客的头像股市刺客
上一篇 2024 年 7 月 29 日
下一篇 2024 年 7 月 29 日

相关推荐

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注