lightGBM的特征筛选,选择最强效果的因子集

今天继续补充因子,之前alpha158的因子没有补全,今天把它完成。

而且顺带把qlib里的表达式的函数基本都复制了。说实话,我们的表达式比qlib简洁,当然这里得感谢qlib的思路,这就是开源的魅力。

图片

class Alpha(AlphaBase):    def __init__(self):        pass    @staticmethod    def parse_config_to_fields():        # ['CORD30', 'STD30', 'CORR5', 'RESI10', 'CORD60', 'STD5', 'LOW0',        # 'WVMA30', 'RESI5', 'ROC5', 'KSFT', 'STD20', 'RSV5', 'STD60', 'KLEN']        fields = []        names = []        # kbar        fields += [            "(close-open)/open",            "(high-low)/open",            "(close-open)/(high-low+1e-12)",            "(high-greater(open, close))/open",            "(high-greater(open, close))/(high-low+1e-12)",            "(less(open, close)-low)/open",            "(less(open, close)-low)/(high-low+1e-12)",            "(2*close-high-low)/open",            "(2*close-high-low)/(high-low+1e-12)",        ]        names += [            "KMID",            "KLEN",            "KMID2",            "KUP",            "KUP2",            "KLOW",            "KLOW2",            "KSFT",            "KSFT2",        ]        # =========== price ==========        feature = ["OPEN", "HIGH", "LOW", "CLOSE"]        windows = range(5)        for field in feature:            field = field.lower()            fields += ["shift(%s, %d)/close" % (field, d) if d != 0 else "%s/close" % field for d in windows]            names += [field.upper() + str(d) for d in windows]        # ================ volume ===========        fields += ["shift(volume, %d)/(volume+1e-12)" % d if d != 0 else "volume/(volume+1e-12)" for d in windows]        names += ["VOLUME" + str(d) for d in windows]        # ================= rolling ====================                windows = [5, 10, 20, 30, 60]        fields += ["shift(close, %d)/close" % d for d in windows]        names += ["ROC%d" % d for d in windows]        fields += ["mean(close, %d)/close" % d for d in windows]        names += ["MA%d" % d for d in windows]                fields += ["std(close, %d)/close" % d for d in windows]        names += ["STD%d" % d for d in windows]        fields += ["slope(close, %d)/close" % d for d in windows]        names += ["BETA%d" % d for d in windows]        fields += ["max(high, %d)/close" % d for d in windows]        names += ["MAX%d" % d for d in windows]        fields += ["min(low, %d)/close" % d for d in windows]        names += ["MIN%d" % d for d in windows]        fields += ["quantile(close, %d, 0.8)/close" % d for d in windows]        names += ["QTLU%d" % d for d in windows]        fields += ["quantile(close, %d, 0.2)/close" % d for d in windows]        names += ["QTLD%d" % d for d in windows]        fields += ["rank(close, %d)" % d for d in windows]        names += ["RANK%d" % d for d in windows]        fields += ["(close-min(low, %d))/(max(high, %d)-min(low, %d)+1e-12)" % (d, d, d) for d in windows]        names += ["RSV%d" % d for d in windows]        fields += ["idxmax(high, %d)/%d" % (d, d) for d in windows]        names += ["IMAX%d" % d for d in windows]        fields += ["idxmin(low, %d)/%d" % (d, d) for d in windows]        names += ["IMIN%d" % d for d in windows]        fields += ["(idxmax(high, %d)-idxmin(low, %d))/%d" % (d, d, d) for d in windows]        names += ["IMXD%d" % d for d in windows]        fields += ["corr(close, log(volume+1), %d)" % d for d in windows]        names += ["CORR%d" % d for d in windows]        fields += ["corr(close/shift(close,1), log(volume/shift(volume, 1)+1), %d)" % d for d in windows]        names += ["CORD%d" % d for d in windows]        fields += ["mean(close>shift(close, 1), %d)" % d for d in windows]        names += ["CNTP%d" % d for d in windows]        fields += ["mean(close<shift(close, 1), %d)" % d for d in windows]        names += ["CNTN%d" % d for d in windows]        fields += ["mean(close>shift(close, 1), %d)-mean(close<shift(close, 1), %d)" % (d, d) for d in windows]        names += ["CNTD%d" % d for d in windows]        fields += [            "sum(greater(close-shift(close, 1), 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d)            for d in windows        ]        names += ["SUMP%d" % d for d in windows]        fields += [            "sum(greater(shift(close, 1)-close, 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d)            for d in windows        ]        names += ["SUMN%d" % d for d in windows]        fields += [            "(sum(greater(close-shift(close, 1), 0), %d)-sum(greater(shift(close, 1)-close, 0), %d))"            "/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d, d)            for d in windows        ]        names += ["SUMD%d" % d for d in windows]        fields += ["mean(volume, %d)/(volume+1e-12)" % d for d in windows]        names += ["VMA%d" % d for d in windows]        fields += ["std(volume, %d)/(volume+1e-12)" % d for d in windows]        names += ["VSTD%d" % d for d in windows]        fields += [            "std(Abs(close/shift(close, 1)-1)*volume, %d)/(mean(Abs(close/shift(close, 1)-1)*volume, %d)+1e-12)"            % (d, d)            for d in windows        ]        names += ["WVMA%d" % d for d in windows]        fields += [            "sum(greater(volume-shift(volume, 1), 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)"            % (d, d)            for d in windows        ]        names += ["VSUMP%d" % d for d in windows]        fields += [            "sum(greater(shift(volume, 1)-volume, 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)"            % (d, d)            for d in windows        ]        names += ["VSUMN%d" % d for d in windows]        fields += [            "(sum(greater(volume-shift(volume, 1), 0), %d)-sum(greater(shift(volume, 1)-volume, 0), %d))"            "/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d, d)            for d in windows        ]        names += ["VSUMD%d" % d for d in windows]                fields += ['close/shift(close,20)-1']        names += ['roc_20']        return fields, names

这就是Alpha158的复刻。

可以说,把OLHCV的数据用到“极致”了,有K线关系,有不同周期,还有非线性的关系。机器学习模型的好处就是反正都是因子。

当然,因子并不意味着越多越好,或者越复杂越好

好在树模型天然可以筛选特征。

 

发布者:股市刺客,转载请注明出处:https://www.95sca.cn/archives/104113
站内所有文章皆来自网络转载或读者投稿,请勿用于商业用途。如有侵权、不妥之处,请联系站长并出示版权证明以便删除。敬请谅解!

(0)
股市刺客的头像股市刺客
上一篇 2024 年 7 月 29 日
下一篇 2024 年 7 月 29 日

相关推荐

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注