下面是一个基于QLIB框架构建的股票量化交易系统的完整示例,包含数据处理、模型训练和回测三个主要模块:
from qlib.constant import REG_CN
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record import SigRecord
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.evaluate import backtest_daily
from qlib.contrib.strategy import TopkDropoutStrategy
# 1. 初始化QLIB环境
def init_qlib():
from qlib import init
provider_uri = "~/.qlib/qlib_data/cn_data" # 需要提前下载数据集
init(provider_uri=provider_uri, region=REG_CN)
# 2. 数据处理模块
def data_processing():
# 定义特征工程
handler_config = {
"start_time": "2010-01-01",
"end_time": "2020-12-31",
"fit_start_time": "2010-01-01",
"fit_end_time": "2015-12-31",
"instruments": "csi300",
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": {
"learn_processors": [
{"class": "RobustZScoreNorm", "kwargs": {"fields_group": "feature", "clip_outlier": True}},
{"class": "Fillna", "kwargs": {"fields_group": "feature"}}
],
"label": ["Ref($close, -2)/Ref($close, -1) - 1"],
"label_processor": [
{"class": "RobustZScoreNorm"},
{"class": "Fillna"}
]
}
}
}
# 创建数据集
dataset_config = {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": handler_config,
"segments": {
"train": ("2010-01-01", "2015-12-31"),
"valid": ("2016-01-01", "2017-12-31"),
"test": ("2018-01-01", "2020-12-31")
}
}
}
return init_instance_by_config(dataset_config)
# 3. 模型构建与训练
def model_training(dataset):
# 配置LightGBM模型
model_config = {
"class": "LGBModel",
"module_path": "qlib.contrib.model.gbdt",
"kwargs": {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
"early_stopping_rounds": 100,
"verbose": -1
}
}
model = init_instance_by_config(model_config)
# 训练模型
with R.start(experiment_name="stock_prediction"):
R.log_params(**model_config["kwargs"])
model.fit(dataset)
# 生成预测结果
pred = model.predict(dataset)
SigRecord(R, pred, name="pred").save()
return model
# 4. 回测模块
def portfolio_backtest():
strategy_config = {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy",
"kwargs": {
"topk": 50,
"n_drop": 5,
"signal": "pred"
}
}
backtest_config = {
"start_time": "2018-01-01",
"end_time": "2020-12-31",
"account": 100000000,
"benchmark": "SH000300",
"exchange_kwargs": {
"freq": "day",
"limit_threshold": 0.095,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5
}
}
# 执行回测
strategy_obj = TopkDropoutStrategy(**strategy_config["kwargs"])
backtest_result = backtest_daily(account=backtest_config["account"],
benchmark=backtest_config["benchmark"],
strategy=strategy_obj,
**backtest_config["exchange_kwargs"])
return backtest_result
# 主程序
if __name__ == "__main__":
# 初始化环境
init_qlib()
# 数据处理
print("Processing data...")
dataset = data_processing()
# 模型训练
print("Training model...")
model = model_training(dataset)
# 回测验证
print("Running backtest...")
result = portfolio_backtest()
# 输出回测结果
print("Backtest Results:")
print(f"Total Return: {result['total_return']*100:.2f}%")
print(f"Annualized Return: {result['annualized_return']*100:.2f}%")
print(f"Max Drawdown: {result['max_drawdown']*100:.2f}%")
print(f"Information Ratio: {result['information_ratio']:.2f}")
系统主要包含以下核心模块:
- 数据处理模块:
- 使用Alpha158特征集(包含158个技术指标)
- 数据预处理包含标准化和缺失值处理
- 划分训练集/验证集/测试集
- 标签定义为未来2日收益率
- 模型训练模块:
- 使用LightGBM梯度提升树模型
- 包含特征重要性自动选择
- 早停机制防止过拟合
- 自动记录实验参数和结果
- 回测模块:
- TopK选股策略(选取预测收益最高的50只股票)
- 动态调仓机制(每日调整持仓)
- 真实交易成本建模(佣金+印花税)
- 风险控制参数(涨跌停限制)
- 风险指标计算:
- 总收益率
- 年化收益率
- 最大回撤
- 信息比率
数据准备:需要提前下载QLIB的A股数据集(约30GB)
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
- 特征工程:可根据需求替换Alpha158为其他特征集(如Alpha360)
- 模型优化:可使用QLIB内置的超参优化功能
from qlib.contrib.report.analysis import HPOReport
发布者:股市刺客,转载请注明出处:https://www.95sca.cn/archives/949241
站内所有文章皆来自网络转载或读者投稿,请勿用于商业用途。如有侵权、不妥之处,请联系站长并出示版权证明以便删除。敬请谅解!