重写qlib的alpha158年因子表达式，年化40%（代码+数据共享）

今天继续做因子分析，今天不是分析一个因子，而是一组因子——qlib的alpha158这158个因子，基于沪深300股票池的IC分析。

与qlib里的函数大同小异，我们的表达式更简洁一下，不需要qlib里的$，它的rank是时序rank，所以我改成了ta_rank，然后统一用小写字母表示函数。

class Alpha(AlphaBase)
    def __init__(self):
        pass

    @staticmethod
    def parse_config_to_fields():        # ['CORD30', 'STD30', 'CORR5', 'RESI10', 'CORD60', 'STD5', 'LOW0',        # 'WVMA30', 'RESI5', 'ROC5', 'KSFT', 'STD20', 'RSV5', 'STD60', 'KLEN']        fields = []        names = []        # kbar        fields += [            "(close-open)/open",            "(high-low)/open",            "(close-open)/(high-low+1e-12)",            "(high-greater(open, close))/open",            "(high-greater(open, close))/(high-low+1e-12)",            "(less(open, close)-low)/open",            "(less(open, close)-low)/(high-low+1e-12)",            "(2*close-high-low)/open",            "(2*close-high-low)/(high-low+1e-12)",        ]        names += [            "KMID",            "KLEN",            "KMID2",            "KUP",            "KUP2",            "KLOW",            "KLOW2",            "KSFT",            "KSFT2",        ]        # =========== price ==========        feature = ["OPEN", "HIGH", "LOW", "CLOSE"]        windows = range(5)        for field in feature:            field = field.lower()            fields += ["shift(%s, %d)/close" % (field, d) if d != 0 else "%s/close" % field for d in windows]            names += [field.upper() + str(d) for d in windows]        # ================ volume ===========        fields += ["shift(volume, %d)/(volume+1e-12)" % d if d != 0 else "volume/(volume+1e-12)" for d in windows]        names += ["VOLUME" + str(d) for d in windows]        # ================= rolling ====================        windows = [5, 10, 20, 30, 60]        fields += ["shift(close, %d)/close" % d for d in windows]        names += ["ROC%d" % d for d in windows]        fields += ["mean(close, %d)/close" % d for d in windows]        names += ["MA%d" % d for d in windows]        fields += ["std(close, %d)/close" % d for d in windows]        names += ["STD%d" % d for d in windows]        fields += ["slope(close, %d)/close" % d for d in windows]        names += ["BETA%d" % d for d in windows]        fields += ["max(high, %d)/close" % d for d in windows]        names += ["MAX%d" % d for d in windows]        fields += ["min(low, %d)/close" % d for d in windows]        names += ["MIN%d" % d for d in windows]        fields += ["quantile(close, %d, 0.8)/close" % d for d in windows]        names += ["QTLU%d" % d for d in windows]        fields += ["quantile(close, %d, 0.2)/close" % d for d in windows]        names += ["QTLD%d" % d for d in windows]        fields += ["ts_rank(close, %d)" % d for d in windows]        names += ["RANK%d" % d for d in windows]        fields += ["(close-min(low, %d))/(max(high, %d)-min(low, %d)+1e-12)" % (d, d, d) for d in windows]        names += ["RSV%d" % d for d in windows]        fields += ["idxmax(high, %d)/%d" % (d, d) for d in windows]        names += ["IMAX%d" % d for d in windows]        fields += ["idxmin(low, %d)/%d" % (d, d) for d in windows]        names += ["IMIN%d" % d for d in windows]        fields += ["(idxmax(high, %d)-idxmin(low, %d))/%d" % (d, d, d) for d in windows]        names += ["IMXD%d" % d for d in windows]        fields += ["corr(close, log(volume+1), %d)" % d for d in windows]        names += ["CORR%d" % d for d in windows]        fields += ["corr(close/shift(close,1), log(volume/shift(volume, 1)+1), %d)" % d for d in windows]        names += ["CORD%d" % d for d in windows]        fields += ["mean(close>shift(close, 1), %d)" % d for d in windows]        names += ["CNTP%d" % d for d in windows]        fields += ["mean(close<shift(close, 1), %d)" % d for d in windows]        names += ["CNTN%d" % d for d in windows]        fields += ["mean(close>shift(close, 1), %d)-mean(close<shift(close, 1), %d)" % (d, d) for d in windows]        names += ["CNTD%d" % d for d in windows]        fields += [            "sum(greater(close-shift(close, 1), 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d)            for d in windows        ]        names += ["SUMP%d" % d for d in windows]        fields += [            "sum(greater(shift(close, 1)-close, 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d)            for d in windows        ]        names += ["SUMN%d" % d for d in windows]        fields += [            "(sum(greater(close-shift(close, 1), 0), %d)-sum(greater(shift(close, 1)-close, 0), %d))"            "/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d, d)            for d in windows        ]        names += ["SUMD%d" % d for d in windows]        fields += ["mean(volume, %d)/(volume+1e-12)" % d for d in windows]        names += ["VMA%d" % d for d in windows]        fields += ["std(volume, %d)/(volume+1e-12)" % d for d in windows]        names += ["VSTD%d" % d for d in windows]        fields += [            "std(Abs(close/shift(close, 1)-1)*volume, %d)/(mean(Abs(close/shift(close, 1)-1)*volume, %d)+1e-12)"            % (d, d)            for d in windows        ]        names += ["WVMA%d" % d for d in windows]        fields += [            "sum(greater(volume-shift(volume, 1), 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)"            % (d, d)            for d in windows        ]        names += ["VSUMP%d" % d for d in windows]        fields += [            "sum(greater(shift(volume, 1)-volume, 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)"            % (d, d)            for d in windows        ]        names += ["VSUMN%d" % d for d in windows]        fields += [            "(sum(greater(volume-shift(volume, 1), 0), %d)-sum(greater(shift(volume, 1)-volume, 0), %d))"            "/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d, d)            for d in windows        ]        names += ["VSUMD%d" % d for d in windows]        fields += ['close/shift(close,20)-1']        names += ['roc_20']        return fields, names

我们的单因子ic分析及回测框架：

以如下价量因子为例：

factor_expr = '-1*(corr(close/shift(close,1), log(volume/shift(volume, 1)+1), 60))'
factor_name = 'factor'

import pandas as pd# 把昨天的文件包，放在ailabx/data下的hist_hs300_20230813下，使用duckdb直接访问from engine.alpha.ic_analysis import calc_icfrom engine.datafeed.dataloader import Duckdbloaderfrom engine.env import Envfrom engine.algo.algo_weights import *from engine.algo.algos import *factor_expr = "-1 * correlation(open, volume, 10)"# factor_expr = '-1 * sum(rank(correlation(rank(high), rank(volume), 3)), 3)'factor_name = 'alpha006'# factor_name = 'alpha015'factor_expr = '-1*(corr(close/shift(close,1), log(volume/shift(volume, 1)+1), 60))'factor_name = 'factor'loader = Duckdbloader(symbols=None, columns=['close', 'open', 'high', 'low', 'volume'],                      start_date="20100101")fields = [factor_expr, "close/shift(close,1)-1"]names = [factor_name, 'return_0']days = [1, 5, 10, 20]for d in days:    fields.extend(["shift(close,-{})/close-1".format(d)])    names.extend(['return_{}'.format(d)])df = loader.load(fields=fields, names=names)df.dropna(inplace=True)print(df)def ic_analisys():    ic_mean = 0.0    for d in days:        print(d)        ric = calc_ic(pred=df[factor_name], label=df['return_{}'.format(d)])        mean = ric.mean()        print(mean)        ic_mean += mean        # std = ric.std()        # r_ic = mean / std    print(ic_mean / len(days))def backtest():    e = Env(df)    e.set_algos([        #RunDays(5),        # SelectBySignal(buy_rules=['ind(roc_20)>0.02'], sell_rules=['ind(roc_20)<-0.02']),        SelectTopK(K=1, order_by=factor_name),        WeightEqually()    ])    e.backtest_loop()    e.show_results()if __name__ == '__main__':    ic_analisys()    backtest()