今天继续补充因子,之前alpha158的因子没有补全,今天把它完成。
而且顺带把qlib里的表达式的函数基本都复制了。说实话,我们的表达式比qlib简洁,当然这里得感谢qlib的思路,这就是开源的魅力。
class Alpha(AlphaBase): def __init__(self): pass @staticmethod def parse_config_to_fields(): # ['CORD30', 'STD30', 'CORR5', 'RESI10', 'CORD60', 'STD5', 'LOW0', # 'WVMA30', 'RESI5', 'ROC5', 'KSFT', 'STD20', 'RSV5', 'STD60', 'KLEN'] fields = [] names = [] # kbar fields += [ "(close-open)/open", "(high-low)/open", "(close-open)/(high-low+1e-12)", "(high-greater(open, close))/open", "(high-greater(open, close))/(high-low+1e-12)", "(less(open, close)-low)/open", "(less(open, close)-low)/(high-low+1e-12)", "(2*close-high-low)/open", "(2*close-high-low)/(high-low+1e-12)", ] names += [ "KMID", "KLEN", "KMID2", "KUP", "KUP2", "KLOW", "KLOW2", "KSFT", "KSFT2", ] # =========== price ========== feature = ["OPEN", "HIGH", "LOW", "CLOSE"] windows = range(5) for field in feature: field = field.lower() fields += ["shift(%s, %d)/close" % (field, d) if d != 0 else "%s/close" % field for d in windows] names += [field.upper() + str(d) for d in windows] # ================ volume =========== fields += ["shift(volume, %d)/(volume+1e-12)" % d if d != 0 else "volume/(volume+1e-12)" for d in windows] names += ["VOLUME" + str(d) for d in windows] # ================= rolling ==================== windows = [5, 10, 20, 30, 60] fields += ["shift(close, %d)/close" % d for d in windows] names += ["ROC%d" % d for d in windows] fields += ["mean(close, %d)/close" % d for d in windows] names += ["MA%d" % d for d in windows] fields += ["std(close, %d)/close" % d for d in windows] names += ["STD%d" % d for d in windows] fields += ["slope(close, %d)/close" % d for d in windows] names += ["BETA%d" % d for d in windows] fields += ["max(high, %d)/close" % d for d in windows] names += ["MAX%d" % d for d in windows] fields += ["min(low, %d)/close" % d for d in windows] names += ["MIN%d" % d for d in windows] fields += ["quantile(close, %d, 0.8)/close" % d for d in windows] names += ["QTLU%d" % d for d in windows] fields += ["quantile(close, %d, 0.2)/close" % d for d in windows] names += ["QTLD%d" % d for d in windows] fields += ["rank(close, %d)" % d for d in windows] names += ["RANK%d" % d for d in windows] fields += ["(close-min(low, %d))/(max(high, %d)-min(low, %d)+1e-12)" % (d, d, d) for d in windows] names += ["RSV%d" % d for d in windows] fields += ["idxmax(high, %d)/%d" % (d, d) for d in windows] names += ["IMAX%d" % d for d in windows] fields += ["idxmin(low, %d)/%d" % (d, d) for d in windows] names += ["IMIN%d" % d for d in windows] fields += ["(idxmax(high, %d)-idxmin(low, %d))/%d" % (d, d, d) for d in windows] names += ["IMXD%d" % d for d in windows] fields += ["corr(close, log(volume+1), %d)" % d for d in windows] names += ["CORR%d" % d for d in windows] fields += ["corr(close/shift(close,1), log(volume/shift(volume, 1)+1), %d)" % d for d in windows] names += ["CORD%d" % d for d in windows] fields += ["mean(close>shift(close, 1), %d)" % d for d in windows] names += ["CNTP%d" % d for d in windows] fields += ["mean(close<shift(close, 1), %d)" % d for d in windows] names += ["CNTN%d" % d for d in windows] fields += ["mean(close>shift(close, 1), %d)-mean(close<shift(close, 1), %d)" % (d, d) for d in windows] names += ["CNTD%d" % d for d in windows] fields += [ "sum(greater(close-shift(close, 1), 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["SUMP%d" % d for d in windows] fields += [ "sum(greater(shift(close, 1)-close, 0), %d)/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["SUMN%d" % d for d in windows] fields += [ "(sum(greater(close-shift(close, 1), 0), %d)-sum(greater(shift(close, 1)-close, 0), %d))" "/(sum(Abs(close-shift(close, 1)), %d)+1e-12)" % (d, d, d) for d in windows ] names += ["SUMD%d" % d for d in windows] fields += ["mean(volume, %d)/(volume+1e-12)" % d for d in windows] names += ["VMA%d" % d for d in windows] fields += ["std(volume, %d)/(volume+1e-12)" % d for d in windows] names += ["VSTD%d" % d for d in windows] fields += [ "std(Abs(close/shift(close, 1)-1)*volume, %d)/(mean(Abs(close/shift(close, 1)-1)*volume, %d)+1e-12)" % (d, d) for d in windows ] names += ["WVMA%d" % d for d in windows] fields += [ "sum(greater(volume-shift(volume, 1), 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["VSUMP%d" % d for d in windows] fields += [ "sum(greater(shift(volume, 1)-volume, 0), %d)/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d) for d in windows ] names += ["VSUMN%d" % d for d in windows] fields += [ "(sum(greater(volume-shift(volume, 1), 0), %d)-sum(greater(shift(volume, 1)-volume, 0), %d))" "/(sum(Abs(volume-shift(volume, 1)), %d)+1e-12)" % (d, d, d) for d in windows ] names += ["VSUMD%d" % d for d in windows] fields += ['close/shift(close,20)-1'] names += ['roc_20'] return fields, names
这就是Alpha158的复刻。
可以说,把OLHCV的数据用到“极致”了,有K线关系,有不同周期,还有非线性的关系。机器学习模型的好处就是反正都是因子。
当然,因子并不意味着越多越好,或者越复杂越好。
好在树模型天然可以筛选特征。
发布者:股市刺客,转载请注明出处:https://www.95sca.cn/archives/104113
站内所有文章皆来自网络转载或读者投稿,请勿用于商业用途。如有侵权、不妥之处,请联系站长并出示版权证明以便删除。敬请谅解!