- 使用前确请保验证集存在(第五届全国工业互联网数据创新应用大赛 机组数据驱动的风电场短期风况预测 【1】【验证集制作】)
- 验证集和测试集的文件夹组织形式完全一致,你通过测试集生成答案的方法对验证集也有效,不过,请把验证集生成的答案文件名修改为answer_train.csv
- 部分变量名来源(采用谷歌翻译,省略“风”)
- 风场: field
- 风机: machine
- 时段: period
- 时刻: time
- 风速: speed
- 风向: direction
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
读取验证集答案和预测答案
df_true = pd.read_csv('answer_val.csv', encoding='utf-8')
df_pred = pd.read_csv('answer_train.csv', encoding='utf-8')
预测答案标准化
缺失值用0代替
field_1, field_2 = '风场1', '风场2'
machine_1 = [f'x{i}' for i in list(range(26, 50+1))]
machine_2 = [f'x{i}' for i in list(range(25, 49+1))]
period = [f'{s}_{str(i).zfill(2)}' for s in ['春', '夏', '秋', '冬'] for i in range(1, 20+1)]
tmp_df = df_true.copy()
tmp_df.loc[:, ['风速', '风向']] = None
for field_train in [field_1, field_2]:
for machine_train in tqdm(machine_1 if field_train == field_1 else machine_2):
for period_train in period:
tmp_index = (tmp_df['风场'] == field_train) * (tmp_df['风机'] == machine_train) * (tmp_df['时段'] == period_train)
df_pred_index = (df_pred['风场'] == field_train) * (df_pred['风机'] == machine_train) * (df_pred['时段'] == period_train)
if np.any(df_pred_index):
tmp_df.loc[tmp_index, ['风速', '风向']] = df_pred.loc[df_pred_index, ['风速', '风向']].values
df_pred = tmp_df.fillna(0)
计算最终得分R
【风况预测评分规则-python实现】第五届全国工业互联网数据创新应用大赛-机组数据驱动的风电场短期风况预测
直接使用我这篇博客的实现方式2
%%time # 先算再用 w = lambda k: 0.06 if k <= 10 else 0.04 # 两个风场分开计算 df_pred_1, df_pred_2 = [df_pred[df_pred['风场'] == key] for key in df_pred['风场'].unique()] df_pred_1.index = df_pred_2.index = np.arange(40000) df_true_1, df_true_2 = [df_true[df_true['风场'] == key] for key in df_true['风场'].unique()] df_true_1.index = df_true_2.index = np.arange(40000) # 分开 风速 和 风向 这两列 v_pred_1, d_pred_1 = np.vsplit(df_pred_1[['风速', '风向']].values.T, 2) v_true_1, d_true_1 = np.vsplit(df_true_1[['风速', '风向']].values.T, 2) v_pred_2, d_pred_2 = np.vsplit(df_pred_2[['风速', '风向']].values.T, 2) v_true_2, d_true_2 = np.vsplit(df_true_2[['风速', '风向']].values.T, 2) # --------------------主要思路-------------------- # 先计算全部的 mj 和 nj # 根据 公式2 计算同一时段所有风机的平均预测误差 # 再计算同一个风场的所有时段的平均预测误差 # 最后根据 公式1 计算最终得分R # -------------------------------------------------- # 以下是风场1 m1 = np.sum(np.abs(v_pred_1.reshape(25 * 80, 20) - v_true_1.reshape(25 * 80, 20)) * np.array([w(k) for k in range(1, 20+1)]), -1) # 计算 a1(alpha_k 风向预测偏差的权重)时要注意阈值,风场1和风场2的阈值不一样 threshold = 0.15 a1 = (v_true_1.reshape(25 * 80, 20) <= threshold) * np.array([w(k) for k in range(1, 20+1)]) e1 = np.minimum((d_pred_1.reshape(25 * 80, 20) - d_true_1.reshape(25 * 80, 20)) % 1, (d_true_1.reshape(25 * 80, 20) - d_pred_1.reshape(25 * 80, 20)) % 1) n1 = np.sum(a1 * e1, -1) # reshape后,行 为风机,列 为时段,转置后,就能按公式2来计算了 E1 = np.sum((0.7 * m1 + 0.3 * n1).reshape(25, 80).T.sum(-1) / 25) # --------------------------------------------------(过程和计算风场1时一样) # 以下是风场2 m2 = np.sum(np.abs(v_pred_2.reshape(25 * 80, 20) - v_true_2.reshape(25 * 80, 20)) * np.array([w(k) for k in range(1, 20+1)]), -1) threshold = 0.086 a2 = (v_true_2.reshape(25 * 80, 20) <= threshold) * np.array([w(k) for k in range(1, 20+1)]) e2 = np.minimum((d_pred_2.reshape(25 * 80, 20) - d_true_2.reshape(25 * 80, 20)) % 1, (d_true_2.reshape(25 * 80, 20) - d_pred_2.reshape(25 * 80, 20)) % 1) n2 = np.sum(a2 * e2, -1) E2 = np.sum((0.7 * m2 + 0.3 * n2).reshape(25, 80).T.sum(-1) / 25) # -------------------------------------------------- R = 100 / (1 + E1 + E2) print(R)



