DeepseekQLIB实现秒级股票高频量化交易系统

下面是一个基于QLIB框架构建的股票量化交易系统的完整示例,包含数据处理、模型训练和回测三个主要模块:

from qlib.constant import REG_CN
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record import SigRecord
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.evaluate import backtest_daily
from qlib.contrib.strategy import TopkDropoutStrategy

# 1. 初始化QLIB环境
def init_qlib():
    from qlib import init
    provider_uri = "~/.qlib/qlib_data/cn_data"  # 需要提前下载数据集
    init(provider_uri=provider_uri, region=REG_CN)

# 2. 数据处理模块
def data_processing():
    # 定义特征工程
    handler_config = {
        "start_time": "2010-01-01",
        "end_time": "2020-12-31",
        "fit_start_time": "2010-01-01",
        "fit_end_time": "2015-12-31",
        "instruments": "csi300",
        "handler": {
            "class": "Alpha158",
            "module_path": "qlib.contrib.data.handler",
            "kwargs": {
                "learn_processors": [
                    {"class": "RobustZScoreNorm", "kwargs": {"fields_group": "feature", "clip_outlier": True}},
                    {"class": "Fillna", "kwargs": {"fields_group": "feature"}}
                ],
                "label": ["Ref($close, -2)/Ref($close, -1) - 1"],
                "label_processor": [
                    {"class": "RobustZScoreNorm"},
                    {"class": "Fillna"}
                ]
            }
        }
    }
    
    # 创建数据集
    dataset_config = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": handler_config,
            "segments": {
                "train": ("2010-01-01", "2015-12-31"),
                "valid": ("2016-01-01", "2017-12-31"),
                "test": ("2018-01-01", "2020-12-31")
            }
        }
    }
    
    return init_instance_by_config(dataset_config)

# 3. 模型构建与训练
def model_training(dataset):
    # 配置LightGBM模型
    model_config = {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
            "early_stopping_rounds": 100,
            "verbose": -1
        }
    }

    model = init_instance_by_config(model_config)
    
    # 训练模型
    with R.start(experiment_name="stock_prediction"):
        R.log_params(**model_config["kwargs"])
        model.fit(dataset)
        
        # 生成预测结果
        pred = model.predict(dataset)
        SigRecord(R, pred, name="pred").save()
        
    return model

# 4. 回测模块
def portfolio_backtest():
    strategy_config = {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy",
        "kwargs": {
            "topk": 50,
            "n_drop": 5,
            "signal": "pred"
        }
    }
    
    backtest_config = {
        "start_time": "2018-01-01",
        "end_time": "2020-12-31",
        "account": 100000000,
        "benchmark": "SH000300",
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5
        }
    }
    
    # 执行回测
    strategy_obj = TopkDropoutStrategy(**strategy_config["kwargs"])
    backtest_result = backtest_daily(account=backtest_config["account"],
                                    benchmark=backtest_config["benchmark"],
                                    strategy=strategy_obj,
                                    **backtest_config["exchange_kwargs"])
    
    return backtest_result

# 主程序
if __name__ == "__main__":
    # 初始化环境
    init_qlib()
    
    # 数据处理
    print("Processing data...")
    dataset = data_processing()
    
    # 模型训练
    print("Training model...")
    model = model_training(dataset)
    
    # 回测验证
    print("Running backtest...")
    result = portfolio_backtest()
    
    # 输出回测结果
    print("Backtest Results:")
    print(f"Total Return: {result['total_return']*100:.2f}%")
    print(f"Annualized Return: {result['annualized_return']*100:.2f}%")
    print(f"Max Drawdown: {result['max_drawdown']*100:.2f}%")
    print(f"Information Ratio: {result['information_ratio']:.2f}")

系统主要包含以下核心模块:

  1. 数据处理模块
  • 使用Alpha158特征集(包含158个技术指标)
  • 数据预处理包含标准化和缺失值处理
  • 划分训练集/验证集/测试集
  • 标签定义为未来2日收益率
  1. 模型训练模块
  • 使用LightGBM梯度提升树模型
  • 包含特征重要性自动选择
  • 早停机制防止过拟合
  • 自动记录实验参数和结果
  1. 回测模块
  • TopK选股策略(选取预测收益最高的50只股票)
  • 动态调仓机制(每日调整持仓)
  • 真实交易成本建模(佣金+印花税)
  • 风险控制参数(涨跌停限制)
  1. 风险指标计算
  • 总收益率
  • 年化收益率
  • 最大回撤
  • 信息比率

数据准备:需要提前下载QLIB的A股数据集(约30GB)

python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
  1. 特征工程:可根据需求替换Alpha158为其他特征集(如Alpha360)
  2. 模型优化:可使用QLIB内置的超参优化功能
from qlib.contrib.report.analysis import HPOReport

发布者:股市刺客,转载请注明出处:https://www.95sca.cn/archives/949241
站内所有文章皆来自网络转载或读者投稿,请勿用于商业用途。如有侵权、不妥之处,请联系站长并出示版权证明以便删除。敬请谅解!

(0)
股市刺客的头像股市刺客
上一篇 19分钟前
下一篇 17分钟前

相关推荐

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注