import baostock as bsimport pandas as pdimport numpy as npfrom sklearn.neighbors import KNeighborsClassifierimport matplotlib.pyplot as plt# 设置 Matplotlib 字体以支持中文显示plt.rcParams['font.sans-serif'] = ['SimHei'] # 或 'Microsoft YaHei'plt.rcParams['axes.unicode_minus'] = False # 正确显示负号# 登录 Baostocklg = bs.login()print(f"登录状态:{lg.error_msg}")# 获取股票数据def get_stock_data(ts_code, start_date, end_date): rs = bs.query_history_k_data_plus( ts_code, "date,open,high,low,close", start_date=start_date, end_date=end_date, frequency="d", # 日线数据 adjustflag="3" # 不复权 ) data_list = [] while (rs.error_code == '0') & rs.next(): data_list.append(rs.get_row_data()) bs.logout() df = pd.DataFrame(data_list, columns=["date", "open", "high", "low", "close"]) df['date'] = pd.to_datetime(df['date']) df.set_index('date', inplace=True) df['open'] = df['open'].astype(float) df['high'] = df['high'].astype(float) df['low'] = df['low'].astype(float) df['close'] = df['close'].astype(float) return df# 特征工程def feature_engineering(df): df['open-close'] = df['open'] - df['close'] df['high-low'] = df['high'] - df['low'] df['target'] = np.where(df['close'].shift(-1) > df['close'], 1, -1) df.dropna(inplace=True) X = df[['open-close', 'high-low']] y = df['target'] return X, y# 获取股票 601318 的数据ts_code = 'sh.601318' # 工商银行股票代码start_date = '2020-01-01'end_date = '2024-12-31'df = get_stock_data(ts_code, start_date, end_date)# 特征工程X, y = feature_engineering(df)# 划分训练集和测试集split_date = '2023-01-01'split_date_ts = pd.to_datetime(split_date)# 使用布尔索引划分数据train_mask = df.index < split_date_tsX_train, X_test = X[train_mask], X[~train_mask]y_train, y_test = y[train_mask], y[~train_mask]# 训练 KNN 模型knn_clf = KNeighborsClassifier(n_neighbors=5)knn_clf.fit(X_train, y_train)# 预测df['predict_signal'] = knn_clf.predict(X)# 计算收益df['return'] = np.log(df['close'] / df['close'].shift(1))df['strategy_return'] = df['return'] * df['predict_signal'].shift(1)# 计算基准收益(这里以自身股票收益作为基准)cum_return = df['return'].cumsum() * 100cum_strategy_return = df['strategy_return'].cumsum() * 100# 绘制图像plt.figure(figsize=(16, 8))plt.plot(cum_return.index, cum_return, label='基准收益')plt.plot(cum_strategy_return.index, cum_strategy_return, label='策略收益')plt.legend()plt.title('KNN 策略收益与基准收益对比')plt.xlabel('日期')plt.ylabel('累计收益 (%)')plt.show()
【结果分析】从图中可以看到,蓝线部分是该股票的累积基准收益,黄线部分是使用算法进行交易的累计收益。
虽然这里使用的KNN分类模型的准确率并不高,但是使用该模型进行涨跌预测后,进行交易的收益还是高于该股票的基准收益的。
如果我们通过补充因子(或者说数据集的特征)的方法来进一步提高模型的准确率的话,则算法交易带来的收益还会显著提高。
最后,跟着我运行的朋友有没有发现,获取数据有些麻烦?所有,后续我就用量化软件演示了哦。
Copyright © 2024-2025 成都宁时科技有限公司 版权所有